├── .Rbuildignore
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-plot_mbs_contexts.R
    │   ├── test-plot_main_dbs_contexts.R
    │   ├── test-plot_main_indel_contexts.R
    │   ├── test-pool_mut_matrix.R
    │   ├── test-cos_sim.R
    │   ├── test-mut_type.R
    │   ├── test-count_mbs_contexts.R
    │   ├── test-plot_dbs_contexts.R
    │   ├── test-get_dbs_context.R
    │   ├── test-count_indel_contexts.R
    │   ├── test-plot_correlation_bootstrap.R
    │   ├── test-plot_bootstrapped_contribution.R
    │   ├── test-plot_signature_strand_bias.R
    │   ├── test-plot_river.R
    │   ├── test-plot_indel_contexts.R
    │   ├── test-get_indel_context.R
    │   ├── test-signature_potential_damage_analysis.R
    │   ├── test-plot_long_profile.R
    │   ├── test-plot_96_profile.R
    │   ├── test-plot_compare_mbs.R
    │   ├── test-plot_192_profile.R
    │   ├── test-count_dbs_contexts.R
    │   ├── test-bin_mutation_density.R
    │   ├── test-plot_compare_profiles.R
    │   ├── test-strand_occurrences.R
    │   ├── test-plot_compare_dbs.R
    │   ├── test-cos_sim_matrix.R
    │   ├── test-plot_compare_indels.R
    │   ├── test-plot_original_vs_reconstructed.R
    │   ├── test-binomial_test.R
    │   ├── test-plot_profile_heatmap.R
    │   ├── test-plot_strand.R
    │   ├── test-plot_regional_similarity.R
    │   ├── test-plot_enrichment_depletion.R
    │   ├── test-fit_to_signatures_bootstrapped.R
    │   ├── test-convert_sigs_to_ref.R
    │   ├── test-context_potential_damage_analysis.R
    │   ├── test-plot_lesion_segregation.R
    │   ├── test-mut_type_occurrences.R
    │   ├── test-plot_contribution_heatmap.R
    │   ├── test-plot_strand_bias.R
    │   ├── test-type_context.R
    │   ├── test-lengthen_mut_matrix.R
    │   ├── test-plot_contribution.R
    │   ├── test-mut_matrix.R
    │   ├── test-fit_to_signatures.R
    │   ├── test-mutations_from_vcf.R
    │   ├── test-get_mut_type.R
    │   ├── test-plot_cosine_heatmap.R
    │   ├── test-enrichment_depletion_test.R
    │   ├── test-fit_to_signatures_strict.R
    │   ├── test-plot_spectrum.R
    │   ├── test-plot_spectrum_region.R
    │   ├── test-strand_bias_test.R
    │   ├── test-split_muts_region.R
    │   ├── test-genomic_distribution.R
    │   ├── test-plot_rainfall.R
    │   └── test-mut_matrix_stranded.R
├── inst
    ├── states
    │   ├── blood_grl.rds
    │   ├── blosum62.rds
    │   ├── dbs_refit.rds
    │   ├── distr_data.rds
    │   ├── snv_refit.rds
    │   ├── CTCF_g_data.rds
    │   ├── indel_refit.rds
    │   ├── mut_mat_data.rds
    │   ├── nmf_res_data.rds
    │   ├── repli_strand.rds
    │   ├── blood_grl_dbs.rds
    │   ├── blood_grl_indel.rds
    │   ├── blood_grl_mbs.rds
    │   ├── mut_mat_repli.rds
    │   ├── mut_mat_s_data.rds
    │   ├── promoter_g_data.rds
    │   ├── regional_sims.rds
    │   ├── regulatory_data.rds
    │   ├── blood_dbs_counts.rds
    │   ├── blood_mbs_counts.rds
    │   ├── grl_split_region.rds
    │   ├── strict_snv_refit.rds
    │   ├── blood_indel_counts.rds
    │   ├── context_mismatches.rds
    │   ├── lesion_segregation.rds
    │   ├── mut_mat_longregions.rds
    │   ├── mut_mat_splitregions.rds
    │   ├── nmf_res_strand_data.rds
    │   ├── blood_grl_dbs_context.rds
    │   ├── bootstrapped_snv_refit.rds
    │   ├── mut_mat_data_extended.rds
    │   ├── strict_best_snv_refit.rds
    │   ├── blood_grl_indel_context.rds
    │   ├── promoter_flanking_g_data.rds
    │   ├── read_vcfs_as_granges_output.rds
    │   ├── blood_indels_counts_split_region.rds
    │   └── blood_indels_longmatrix_split_region.rds
    ├── scripts
    │   ├── create_example_mbs.R
    │   ├── create_example_dbs.R
    │   ├── create_example_indels.R
    │   └── create_example_regulatory_regions.R
    ├── CITATION
    └── extdata
    │   └── empty.vcf
├── .gitignore
├── man
    ├── show-region_cossim-method.Rd
    ├── MutationalPatterns-defunct.Rd
    ├── cos_sim.Rd
    ├── mut_96_occurrences.Rd
    ├── mut_type.Rd
    ├── mutations_from_vcf.Rd
    ├── binomial_test.Rd
    ├── get_sim_tb.Rd
    ├── pool_mut_mat.Rd
    ├── mut_192_occurrences.Rd
    ├── mut_type_occurrences.Rd
    ├── get_dbs_context.Rd
    ├── count_mbs_contexts.Rd
    ├── convert_sigs_to_ref.Rd
    ├── cluster_signatures.Rd
    ├── type_context.Rd
    ├── plot_river.Rd
    ├── count_dbs_contexts.Rd
    ├── mut_context.Rd
    ├── plot_mbs_contexts.Rd
    ├── cos_sim_matrix.Rd
    ├── plot_correlation_bootstrap.Rd
    ├── plot_signature_strand_bias.Rd
    ├── count_indel_contexts.Rd
    ├── mut_matrix.Rd
    ├── strand_occurrences.Rd
    ├── plot_main_dbs_contexts.Rd
    ├── region_cossim-class.Rd
    ├── merge_signatures.Rd
    ├── plot_96_profile.Rd
    ├── plot_strand.Rd
    ├── plot_main_indel_contexts.Rd
    ├── plot_dbs_contexts.Rd
    ├── get_mut_type.Rd
    ├── fit_to_signatures.Rd
    ├── plot_192_profile.Rd
    ├── plot_strand_bias.Rd
    ├── get_indel_context.Rd
    ├── plot_bootstrapped_contribution.Rd
    ├── plot_profile_region.Rd
    ├── strand_bias_test.Rd
    ├── enrichment_depletion_test.Rd
    ├── lengthen_mut_matrix.Rd
    ├── plot_profile_heatmap.Rd
    ├── plot_enrichment_depletion.Rd
    ├── plot_original_vs_reconstructed.Rd
    ├── bin_mutation_density.Rd
    ├── split_muts_region.Rd
    ├── plot_compare_mbs.Rd
    ├── plot_indel_contexts.Rd
    ├── extract_signatures.Rd
    ├── plot_compare_dbs.Rd
    ├── plot_rainfall.Rd
    └── rename_nmf_signatures.Rd
├── R
    ├── cos_sim.R
    ├── mutations_from_vcf.R
    ├── mut_type.R
    ├── get_ref_alt.R
    ├── pool_mut_matrix.R
    ├── binomial_test.R
    ├── get_sig_start.R
    ├── cluster_signatures.R
    ├── mut_context.R
    ├── intersect_with_region.R
    ├── mut_matrix.R
    ├── type_context.R
    ├── cos_sim_matrix.R
    ├── convert_sigs_to_ref.R
    ├── mut_192_occurrences.R
    └── plot_mbs_contexts.R
├── LICENSE
├── README.md
└── NAMESPACE


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^data-raw$
2 | ^.*\.Rproj$
3 | ^\.Rproj\.user$
4 | paper/*
5 | images
6 | vignettes/*.html


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(MutationalPatterns)
3 | 
4 | test_check("MutationalPatterns")
5 | 


--------------------------------------------------------------------------------
/inst/states/blood_grl.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl.rds


--------------------------------------------------------------------------------
/inst/states/blosum62.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blosum62.rds


--------------------------------------------------------------------------------
/inst/states/dbs_refit.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/dbs_refit.rds


--------------------------------------------------------------------------------
/inst/states/distr_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/distr_data.rds


--------------------------------------------------------------------------------
/inst/states/snv_refit.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/snv_refit.rds


--------------------------------------------------------------------------------
/inst/states/CTCF_g_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/CTCF_g_data.rds


--------------------------------------------------------------------------------
/inst/states/indel_refit.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/indel_refit.rds


--------------------------------------------------------------------------------
/inst/states/mut_mat_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_data.rds


--------------------------------------------------------------------------------
/inst/states/nmf_res_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/nmf_res_data.rds


--------------------------------------------------------------------------------
/inst/states/repli_strand.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/repli_strand.rds


--------------------------------------------------------------------------------
/inst/states/blood_grl_dbs.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl_dbs.rds


--------------------------------------------------------------------------------
/inst/states/blood_grl_indel.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl_indel.rds


--------------------------------------------------------------------------------
/inst/states/blood_grl_mbs.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl_mbs.rds


--------------------------------------------------------------------------------
/inst/states/mut_mat_repli.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_repli.rds


--------------------------------------------------------------------------------
/inst/states/mut_mat_s_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_s_data.rds


--------------------------------------------------------------------------------
/inst/states/promoter_g_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/promoter_g_data.rds


--------------------------------------------------------------------------------
/inst/states/regional_sims.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/regional_sims.rds


--------------------------------------------------------------------------------
/inst/states/regulatory_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/regulatory_data.rds


--------------------------------------------------------------------------------
/inst/states/blood_dbs_counts.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_dbs_counts.rds


--------------------------------------------------------------------------------
/inst/states/blood_mbs_counts.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_mbs_counts.rds


--------------------------------------------------------------------------------
/inst/states/grl_split_region.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/grl_split_region.rds


--------------------------------------------------------------------------------
/inst/states/strict_snv_refit.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/strict_snv_refit.rds


--------------------------------------------------------------------------------
/inst/states/blood_indel_counts.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_indel_counts.rds


--------------------------------------------------------------------------------
/inst/states/context_mismatches.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/context_mismatches.rds


--------------------------------------------------------------------------------
/inst/states/lesion_segregation.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/lesion_segregation.rds


--------------------------------------------------------------------------------
/inst/states/mut_mat_longregions.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_longregions.rds


--------------------------------------------------------------------------------
/inst/states/mut_mat_splitregions.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_splitregions.rds


--------------------------------------------------------------------------------
/inst/states/nmf_res_strand_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/nmf_res_strand_data.rds


--------------------------------------------------------------------------------
/inst/states/blood_grl_dbs_context.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl_dbs_context.rds


--------------------------------------------------------------------------------
/inst/states/bootstrapped_snv_refit.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/bootstrapped_snv_refit.rds


--------------------------------------------------------------------------------
/inst/states/mut_mat_data_extended.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/mut_mat_data_extended.rds


--------------------------------------------------------------------------------
/inst/states/strict_best_snv_refit.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/strict_best_snv_refit.rds


--------------------------------------------------------------------------------
/inst/states/blood_grl_indel_context.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_grl_indel_context.rds


--------------------------------------------------------------------------------
/inst/states/promoter_flanking_g_data.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/promoter_flanking_g_data.rds


--------------------------------------------------------------------------------
/inst/states/read_vcfs_as_granges_output.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/read_vcfs_as_granges_output.rds


--------------------------------------------------------------------------------
/inst/states/blood_indels_counts_split_region.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_indels_counts_split_region.rds


--------------------------------------------------------------------------------
/inst/states/blood_indels_longmatrix_split_region.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ToolsVanBox/MutationalPatterns/HEAD/inst/states/blood_indels_longmatrix_split_region.rds


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Rapp.history
 5 | .DS_Store
 6 | .RDataTmp
 7 | MutationalPatterns.Rproj
 8 | vignettes/.build.timestamp
 9 | *.pdf
10 | *.tex
11 | *.synctex.gz
12 | Introduction_to_MutationalPatterns.toc
13 | Introduction_to_MutationalPatterns.log
14 | Introduction_to_MutationalPatterns.bbl
15 | vignettes/NMF_*
16 | vignettes/*.html


--------------------------------------------------------------------------------
/tests/testthat/test-plot_mbs_contexts.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_mbs_contexts")
 2 | 
 3 | ## Get mbs counts
 4 | mbs_counts <- readRDS(system.file("states/blood_mbs_counts.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Plot contexts
 9 | output <- plot_mbs_contexts(mbs_counts)
10 | 
11 | test_that("Output has correct class", {
12 |   expect_true(inherits(output, c("gg")))
13 | })
14 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_main_dbs_contexts.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_main_dbs_contexts")
 2 | 
 3 | ## Get dbs counts
 4 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Plot contexts
 9 | output <- plot_main_dbs_contexts(dbs_counts)
10 | 
11 | test_that("Output has correct class", {
12 |   expect_true(inherits(output, c("gg")))
13 | })
14 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_main_indel_contexts.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_main_indel_contexts")
 2 | 
 3 | 
 4 | ## Get indel counts
 5 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds",
 6 |   package = "MutationalPatterns"
 7 | ))
 8 | 
 9 | ## Plot contexts
10 | output <- plot_main_indel_contexts(indel_counts)
11 | 
12 | test_that("Output has correct class", {
13 |   expect_true(inherits(output, c("gg")))
14 | })
15 | 


--------------------------------------------------------------------------------
/man/show-region_cossim-method.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/S4_class.R
 3 | \name{show,region_cossim-method}
 4 | \alias{show,region_cossim-method}
 5 | \title{An S4 method to show an instance of the region_cossim class.}
 6 | \usage{
 7 | \S4method{show}{region_cossim}(object)
 8 | }
 9 | \arguments{
10 | \item{object}{A region_cossim object.}
11 | }
12 | \description{
13 | An S4 method to show an instance of the region_cossim class.
14 | }
15 | 


--------------------------------------------------------------------------------
/inst/scripts/create_example_mbs.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse)
 2 | library(VariantAnnotation)
 3 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
 4 | library(ref_genome, character.only = TRUE)
 5 | 
 6 | # Get grl
 7 | grl <- readRDS("inst/states/blood_grl.rds")
 8 | 
 9 | # Get mbs
10 | grl_mbs <- get_mut_type(grl, "mbs")
11 | saveRDS(grl_mbs, "inst/states/blood_grl_mbs.rds")
12 | 
13 | # Count contexts
14 | mbs_counts <- count_mbs_contexts(grl_mbs)
15 | saveRDS(mbs_counts, "inst/states/blood_mbs_counts.rds")
16 | 


--------------------------------------------------------------------------------
/man/MutationalPatterns-defunct.Rd:
--------------------------------------------------------------------------------
 1 | \name{MutationalPatterns-defunct}
 2 | \alias{mutation_context}
 3 | \alias{mutation_types}
 4 | \alias{strand_from_vcf}
 5 | \alias{explained_by_signatures}
 6 | \title{Defunct functions in package \sQuote{MutationalPattern}}
 7 | \description{These functions are defunct and no longer available.}
 8 | 
 9 | \details{
10 |   Defunct functions are: 
11 |   \code{mutation_context}, 
12 |   \code{mutation_types}, 
13 |   \code{strand_from_vcf}, 
14 |   \code{explained_by_signatures}
15 | }


--------------------------------------------------------------------------------
/tests/testthat/test-pool_mut_matrix.R:
--------------------------------------------------------------------------------
 1 | context("test-pool_mut_matrix")
 2 | 
 3 | # Get mut_mat
 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | grouping <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3))
 8 | 
 9 | 
10 | output <- pool_mut_mat(mut_mat, grouping)
11 | 
12 | test_that("Output has correct class", {
13 |   expect_true(inherits(output, c("matrix")))
14 | })
15 | 
16 | test_that("Output has correct dimensions", {
17 |   expect_equal(dim(output), c(96, 3))
18 | })
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-cos_sim.R:
--------------------------------------------------------------------------------
 1 | context("test-cos_sim")
 2 | 
 3 | # Calculate cosine similarity
 4 | x <- c(1.1, 2.1, 0.2, 0.1, 2.9)
 5 | y <- c(0.9, 1.9, 0.5, 0.4, 3.1)
 6 | output <- cos_sim(x, y)
 7 | 
 8 | 
 9 | test_that("Output has correct class and data type", {
10 |   expect_true(inherits(output, c("numeric")))
11 |   expect_equal(typeof(output), "double")
12 | })
13 | 
14 | test_that("Output has expected size", {
15 |   expect_equal(length(output), 1)
16 | })
17 | 
18 | test_that("Output is equal to expected", {
19 |   expect_equal(output, 0.9895599)
20 | })
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mut_type.R:
--------------------------------------------------------------------------------
 1 | context("test-mut_type")
 2 | 
 3 | # Read vcfs
 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | # Get mut type
 8 | output <- mut_type(vcfs[[1]])
 9 | 
10 | # Unit tests
11 | test_that("Output has correct class", {
12 |   expect_true(inherits(output, c("character")))
13 | })
14 | 
15 | test_that("The 6 base mutation types are returned", {
16 |   base_types <- sort(unique(output))
17 |   expect_equal(base_types, c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G"))
18 | })
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-count_mbs_contexts.R:
--------------------------------------------------------------------------------
 1 | context("test-count_mbs_contexts")
 2 | 
 3 | ## Get a GRangesList object with mbs contexts.
 4 | grl_mbs <- readRDS(system.file("states/blood_grl_mbs.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | output <- count_mbs_contexts(grl_mbs)
 9 | expected <- readRDS(system.file("states/blood_mbs_counts.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | test_that("Output has correct class", {
14 |   expect_true(inherits(output, c("matrix")))
15 | })
16 | 
17 | test_that("Output is identical to expected", {
18 |   expect_identical(output, expected)
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_dbs_contexts.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_dbs_contexts")
 2 | 
 3 | ## Get dbs counts
 4 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Plot contexts
 9 | output <- plot_dbs_contexts(dbs_counts)
10 | output_samey <- plot_dbs_contexts(dbs_counts, same_y = TRUE)
11 | output_condensed <- plot_dbs_contexts(dbs_counts, condensed = TRUE)
12 | 
13 | test_that("Output has correct class", {
14 |   expect_true(inherits(output, c("gg")))
15 |   expect_true(inherits(output_samey, c("gg")))
16 |   expect_true(inherits(output_condensed, c("gg")))
17 | })
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get_dbs_context.R:
--------------------------------------------------------------------------------
 1 | context("test-get_dbs_context")
 2 | 
 3 | 
 4 | ## Get GRangesList with DBS.
 5 | dbs_grl <- readRDS(system.file("states/blood_grl_dbs.rds",
 6 |   package = "MutationalPatterns"
 7 | ))
 8 | 
 9 | ## Set context dbs
10 | output <- get_dbs_context(dbs_grl)
11 | 
12 | expected <- readRDS(system.file("states/blood_grl_dbs_context.rds",
13 |   package = "MutationalPatterns"
14 | ))
15 | 
16 | 
17 | test_that("Output has correct class", {
18 |   expect_true(inherits(output, c("GRanges", "CompressedGRangesList")))
19 | })
20 | 
21 | test_that("Output is equal to expected", {
22 |   expect_equal(output, expected)
23 | })
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test-count_indel_contexts.R:
--------------------------------------------------------------------------------
 1 | context("test-count_indel_contexts")
 2 | 
 3 | ## Get a GRangesList object with indel contexts.
 4 | grl_indel_context <- readRDS(system.file("states/blood_grl_indel_context.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | output <- count_indel_contexts(grl_indel_context)
 9 | expected <- readRDS(system.file("states/blood_indel_counts.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | test_that("Output has correct class", {
14 |   expect_true(inherits(output, c("matrix")))
15 | })
16 | 
17 | test_that("Output is identical to expected", {
18 |   expect_identical(output, expected)
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_correlation_bootstrap.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_correlation_bootstrap")
 2 | 
 3 | # Get contri boots
 4 | contri_boots <- readRDS(system.file("states/bootstrapped_snv_refit.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Run default function
 9 | output <- plot_correlation_bootstrap(contri_boots)
10 | 
11 | # Run for all samples combined
12 | output_combi <- plot_correlation_bootstrap(contri_boots, per_sample = FALSE)
13 | 
14 | # Test
15 | test_that("Output has correct class", {
16 |   expect_true(inherits(output, c("list")))
17 |   expect_true(inherits(output[[1]], c("gg")))
18 |   expect_true(inherits(output_combi, c("gg")))
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_bootstrapped_contribution.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_bootstrapped_contribution")
 2 | 
 3 | ## contri_boots
 4 | contri_boots <- readRDS(system.file("states/bootstrapped_snv_refit.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Plot contexts
 9 | 
10 | test_that("Output has correct class", {
11 |   output <- plot_bootstrapped_contribution(contri_boots)
12 |   expect_true(inherits(output, c("gg")))
13 | 
14 |   output <- plot_bootstrapped_contribution(contri_boots, mode = "relative")
15 |   expect_true(inherits(output, c("gg")))
16 | 
17 |   output <- plot_bootstrapped_contribution(contri_boots, plot_type = "barplot")
18 |   expect_true(inherits(output, c("gg")))
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_signature_strand_bias.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_signature_strand_bias")
 2 | 
 3 | # Load strand data
 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Load nmf results
 9 | nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | ## Provide column names for the plot.
14 | colnames(nmf_res_strand$signatures) <- c("Signature A", "Signature B")
15 | 
16 | output <- plot_signature_strand_bias(nmf_res_strand$signatures)
17 | 
18 | # Perform tests
19 | test_that("Output has correct class", {
20 |   expect_true(inherits(output, c("gg")))
21 | })
22 | 


--------------------------------------------------------------------------------
/R/cos_sim.R:
--------------------------------------------------------------------------------
 1 | #' Cosine similarity function
 2 | #'
 3 | #' Calculate the cosine similarity between two vectors of the same length.
 4 | #' The cosine similarity is a value between 0 (distinct) and 1 (identical)
 5 | #' and indicates how much two vectors are alike.
 6 | #'
 7 | #' @param x Vector 1 of length n
 8 | #' @param y Vector 2 of length n
 9 | #' @return Cosine similarity value; a value between 0 and 1
10 | #'
11 | #' @examples
12 | #' x <- c(1.1, 2.1, 0.2, 0.1, 2.9)
13 | #' y <- c(0.9, 1.9, 0.5, 0.4, 3.1)
14 | #' cos_sim(x, y)
15 | #' @export
16 | 
17 | cos_sim <- function(x, y) {
18 |   res <- x %*% y / (sqrt(x %*% x) * sqrt(y %*% y))
19 |   # coerce matrix to numeric
20 |   res <- as.numeric(res)
21 |   return(res)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/cos_sim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cos_sim.R
 3 | \name{cos_sim}
 4 | \alias{cos_sim}
 5 | \title{Cosine similarity function}
 6 | \usage{
 7 | cos_sim(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector 1 of length n}
11 | 
12 | \item{y}{Vector 2 of length n}
13 | }
14 | \value{
15 | Cosine similarity value; a value between 0 and 1
16 | }
17 | \description{
18 | Calculate the cosine similarity between two vectors of the same length.
19 | The cosine similarity is a value between 0 (distinct) and 1 (identical)
20 | and indicates how much two vectors are alike.
21 | }
22 | \examples{
23 | x <- c(1.1, 2.1, 0.2, 0.1, 2.9)
24 | y <- c(0.9, 1.9, 0.5, 0.4, 3.1)
25 | cos_sim(x, y)
26 | }
27 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_river.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_river")
 2 | 
 3 | 
 4 | # Get input data
 5 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 6 |                                package = "MutationalPatterns"
 7 | ))
 8 | 
 9 | mut_mat_extended <- readRDS(system.file("states/mut_mat_data_extended.rds",
10 |                                         package = "MutationalPatterns"
11 | ))
12 | 
13 | 
14 | ## Create riverplot of profile
15 | output <- plot_river(mut_mat)
16 | 
17 | ## Create condensed riverplot of extended profile
18 | output_extended <- plot_river(mut_mat_extended, condensed = TRUE)
19 | 
20 | 
21 | test_that("Output has correct class", {
22 |     expect_true(inherits(output, c("gg")))
23 |     expect_true(inherits(output_extended, c("gg")))
24 | })
25 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_indel_contexts.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_indel_contexts")
 2 | 
 3 | ## Get indel counts
 4 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Plot contexts
 9 | output <- plot_indel_contexts(indel_counts)
10 | output_same_y <- plot_indel_contexts(indel_counts, same_y = TRUE)
11 | output_extra_labels <- plot_indel_contexts(indel_counts, extra_labels = TRUE)
12 | output_condensed <- plot_indel_contexts(indel_counts, condensed = TRUE)
13 | 
14 | test_that("Output has correct class", {
15 |   expect_true(inherits(output, c("gg")))
16 |   expect_true(inherits(output_same_y, c("gg")))
17 |   expect_true(inherits(output_extra_labels, c("gg")))
18 |   expect_true(inherits(output_condensed, c("gg")))
19 | })
20 | 


--------------------------------------------------------------------------------
/man/mut_96_occurrences.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mut_96_occurrences.R
 3 | \name{mut_96_occurrences}
 4 | \alias{mut_96_occurrences}
 5 | \title{Count 96 trinucleotide mutation occurrences}
 6 | \usage{
 7 | mut_96_occurrences(type_context, gr_sizes)
 8 | }
 9 | \arguments{
10 | \item{type_context}{result from type_context function}
11 | 
12 | \item{gr_sizes}{A vector indicating the number of variants per GRanges}
13 | }
14 | \value{
15 | Mutation matrix with 96 trinucleotide mutation occurrences
16 | }
17 | \description{
18 | @details
19 |  This function is called by mut_matrix. It calculates the 96 trinucleotide context for all variants
20 |  and then splits these per GRanges (samples). It then calculates how often each 96 trinucleotide context occurs.
21 | }
22 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get_indel_context.R:
--------------------------------------------------------------------------------
 1 | context("test-get_indel_context")
 2 | 
 3 | ## Get a GRangesList object with only indels.
 4 | indel_grl <- readRDS(system.file("states/blood_grl_indel.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Load the corresponding reference genome.
 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
10 | library(ref_genome, character.only = TRUE)
11 | 
12 | ## Get the indel contexts
13 | output <- get_indel_context(indel_grl, ref_genome)
14 | 
15 | expected <- readRDS(system.file("states/blood_grl_indel_context.rds",
16 |   package = "MutationalPatterns"
17 | ))
18 | 
19 | test_that("Output has correct class", {
20 |   expect_true(inherits(output, c("GRanges", "CompressedGRangesList")))
21 | })
22 | 
23 | test_that("Output is equal to expected", {
24 |   expect_equal(output, expected)
25 | })
26 | 


--------------------------------------------------------------------------------
/tests/testthat/test-signature_potential_damage_analysis.R:
--------------------------------------------------------------------------------
 1 | context("test-signature_potential_damage_analysis")
 2 | 
 3 | # Get the signatures
 4 | signatures <- get_known_signatures()
 5 | 
 6 | # Get the contexts
 7 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 8 |   package = "MutationalPatterns"
 9 | ))
10 | 
11 | contexts <- rownames(mut_mat)[1:6]
12 | 
13 | # Get context mismatches
14 | context_mismatches <- readRDS(system.file("states/context_mismatches.rds",
15 |   package = "MutationalPatterns"
16 | ))
17 | 
18 | # Run function
19 | output <- signature_potential_damage_analysis(signatures, contexts, context_mismatches)
20 | 
21 | test_that("Output has correct class", {
22 |   expect_true(inherits(output, "tbl_df"))
23 | })
24 | 
25 | test_that("Output has correct size", {
26 |   expect_equal(dim(output), c(240, 7))
27 | })
28 | 


--------------------------------------------------------------------------------
/inst/scripts/create_example_dbs.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse)
 2 | library(VariantAnnotation)
 3 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
 4 | library(ref_genome, character.only = TRUE)
 5 | 
 6 | # Get grl
 7 | grl <- readRDS("inst/states/blood_grl.rds")
 8 | 
 9 | # Get dbs
10 | grl_dbs <- get_mut_type(grl, "dbs")
11 | saveRDS(grl_dbs, "inst/states/blood_grl_dbs.rds")
12 | 
13 | # Set context
14 | grl_dbs_context <- get_dbs_context(grl_dbs)
15 | saveRDS(grl_dbs_context, "inst/states/blood_grl_dbs_context.rds")
16 | 
17 | # Count contexts
18 | dbs_counts <- count_dbs_contexts(grl_dbs_context)
19 | saveRDS(dbs_counts, "inst/states/blood_dbs_counts.rds")
20 | 
21 | # Refit to signatures
22 | signatures <- get_known_signatures("dbs")
23 | 
24 | 
25 | fit_res <- fit_to_signatures(dbs_counts, signatures)
26 | saveRDS(fit_res, "inst/states/dbs_refit.rds")
27 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_long_profile.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_profile_region")
 2 | 
 3 | # Read the long mutation matrix information:
 4 | input <- readRDS(system.file("states/mut_mat_longregions.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Plot the 96-profile of three samples
 9 | output <- plot_profile_region(input)
10 | output_relative_sample_feature <- plot_profile_region(input, mode = "relative_sample_feature")
11 | output_absolute <- plot_profile_region(input, mode = "absolute")
12 | output_condensed <- plot_profile_region(input, condensed = TRUE)
13 | 
14 | test_that("Output has correct class", {
15 |   expect_true(inherits(output, c("gg")))
16 |   expect_true(inherits(output_relative_sample_feature, c("gg")))
17 |   expect_true(inherits(output_absolute, c("gg")))
18 |   expect_true(inherits(output_condensed, c("gg")))
19 | })
20 | 


--------------------------------------------------------------------------------
/man/mut_type.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mut_type.R
 3 | \name{mut_type}
 4 | \alias{mut_type}
 5 | \title{Retrieve base substitution types from a VCF object}
 6 | \usage{
 7 | mut_type(vcf)
 8 | }
 9 | \arguments{
10 | \item{vcf}{A CollapsedVCF object}
11 | }
12 | \value{
13 | Character vector with base substitution types
14 | }
15 | \description{
16 | A function to extract the base substitutions from a vcf and translate to
17 | the 6 common base substitution types.
18 | }
19 | \examples{
20 | ## See the 'read_vcfs_as_granges()' example for how we obtained the
21 | ## following data:
22 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
23 |   package = "MutationalPatterns"
24 | ))
25 | 
26 | mut_type(vcfs[[1]])
27 | }
28 | \seealso{
29 | \code{\link{read_vcfs_as_granges}}
30 | }
31 | 


--------------------------------------------------------------------------------
/man/mutations_from_vcf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mutations_from_vcf.R
 3 | \name{mutations_from_vcf}
 4 | \alias{mutations_from_vcf}
 5 | \title{Retrieve base substitutions from vcf}
 6 | \usage{
 7 | mutations_from_vcf(vcf)
 8 | }
 9 | \arguments{
10 | \item{vcf}{A CollapsedVCF object}
11 | }
12 | \value{
13 | Character vector with base substitutions
14 | }
15 | \description{
16 | A function to extract base substitutions of each position in vcf
17 | }
18 | \examples{
19 | ## See the 'read_vcfs_as_granges()' example for how we obtained the
20 | ## following data:
21 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
22 |   package = "MutationalPatterns"
23 | ))
24 | 
25 | muts <- mutations_from_vcf(vcfs[[1]])
26 | }
27 | \seealso{
28 | \code{\link{read_vcfs_as_granges}}
29 | }
30 | 


--------------------------------------------------------------------------------
/man/binomial_test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/binomial_test.R
 3 | \name{binomial_test}
 4 | \alias{binomial_test}
 5 | \title{Binomial test for enrichment or depletion testing}
 6 | \usage{
 7 | binomial_test(p, n, x, p_cutoffs = 0.05)
 8 | }
 9 | \arguments{
10 | \item{p}{Probability of success}
11 | 
12 | \item{n}{Number of trials}
13 | 
14 | \item{x}{Observed number of successes}
15 | 
16 | \item{p_cutoffs}{Significance cutoff for the p value. Default: 0.05}
17 | }
18 | \value{
19 | A data.frame with direction of effect (enrichment/depletion),
20 | P-value and significance asterisks
21 | }
22 | \description{
23 | This function performs lower-tail binomial test for depletion and
24 | upper-tail test for enrichment
25 | }
26 | \examples{
27 | binomial_test(0.5, 1200, 543)
28 | binomial_test(0.2, 800, 150)
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_96_profile.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_96_profile")
 2 | 
 3 | 
 4 | # Load mutation matrix
 5 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 6 |   package = "MutationalPatterns"
 7 | ))
 8 | 
 9 | # Plot the 96-profile of three samples
10 | output <- plot_96_profile(mut_mat[, c(1, 4, 7)])
11 | 
12 | # Plot a condensed profile
13 | output_condensed <- plot_96_profile(mut_mat[, c(1, 4, 7)], condensed = TRUE)
14 | 
15 | # Load extracted signatures and plot
16 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds",
17 |   package = "MutationalPatterns"
18 | ))
19 | output_signatures <- plot_96_profile(nmf_res$signatures)
20 | 
21 | 
22 | # Perform tests
23 | test_that("Output has correct class", {
24 |   expect_true(inherits(output, c("gg")))
25 |   expect_true(inherits(output_condensed, c("gg")))
26 |   expect_true(inherits(output_signatures, c("gg")))
27 | })
28 | 


--------------------------------------------------------------------------------
/man/get_sim_tb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/S4_class.R
 3 | \name{get_sim_tb}
 4 | \alias{get_sim_tb}
 5 | \alias{get_sim_tb,region_cossim-method}
 6 | \title{An S4 generic to get the sim_tb from a region_cossim object.}
 7 | \usage{
 8 | get_sim_tb(x)
 9 | 
10 | \S4method{get_sim_tb}{region_cossim}(x)
11 | }
12 | \arguments{
13 | \item{x}{A region_cossim object}
14 | 
15 | \item{region_cossim}{A region_cossim object}
16 | }
17 | \value{
18 | A tibble containing the calculated similarities of the windows.
19 | 
20 | A tibble containing the calculated similarities of the windows.
21 | }
22 | \description{
23 | An S4 generic to get the sim_tb from a region_cossim object.
24 | 
25 | An S4 method for the get_sim_tb generic
26 | }
27 | \section{Methods (by class)}{
28 | \itemize{
29 | \item \code{region_cossim}: Get the sim_tb from a region_cossim object.
30 | }}
31 | 
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_compare_mbs.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_compare_mbs")
 2 | 
 3 | # Get the mbs counts
 4 | mbs_counts <- readRDS(system.file("states/blood_mbs_counts.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | 
 9 | # Run default function
10 | output <- plot_compare_mbs(
11 |   mbs_counts[, 1],
12 |   mbs_counts[, 2]
13 | )
14 | 
15 | # Change the names of the profiles
16 | output_name <- plot_compare_mbs(mbs_counts[, 1],
17 |   mbs_counts[, 2],
18 |   profile_names = c("Original", "Reconstructed")
19 | )
20 | 
21 | # Change the y_limits
22 | output_yaxis <- plot_compare_mbs(mbs_counts[, 1],
23 |   mbs_counts[, 2],
24 |   profile_ymax = 0.9,
25 |   diff_ylim = c(-0.8, 0.8)
26 | )
27 | 
28 | # Perform tests
29 | test_that("Output has correct class", {
30 |   expect_true(inherits(output, c("gg")))
31 |   expect_true(inherits(output_name, c("gg")))
32 |   expect_true(inherits(output_yaxis, c("gg")))
33 | })
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_192_profile.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_192_profile")
 2 | 
 3 | # Load mutation matrix
 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Plot profile for some of the samples
 9 | output <- plot_192_profile(mut_mat_s[, c(1, 4, 7)])
10 | 
11 | # You can create a more condensed version of the plot
12 | output_condensed <- plot_192_profile(mut_mat_s[, c(1, 4, 7)], condensed = TRUE)
13 | 
14 | # Load extracted signatures and plot
15 | nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds",
16 |   package = "MutationalPatterns"
17 | ))
18 | output_signatures <- plot_192_profile(nmf_res_strand$signatures)
19 | 
20 | # Perform tests
21 | test_that("Output has correct class", {
22 |   expect_true(inherits(output, c("gg")))
23 |   expect_true(inherits(output_condensed, c("gg")))
24 |   expect_true(inherits(output_signatures, c("gg")))
25 | })
26 | 


--------------------------------------------------------------------------------
/R/mutations_from_vcf.R:
--------------------------------------------------------------------------------
 1 | #' Retrieve base substitutions from vcf
 2 | #'
 3 | #' A function to extract base substitutions of each position in vcf
 4 | #' @param vcf A CollapsedVCF object
 5 | #' @return Character vector with base substitutions
 6 | #' @import GenomicRanges
 7 | #'
 8 | #' @examples
 9 | #' ## See the 'read_vcfs_as_granges()' example for how we obtained the
10 | #' ## following data:
11 | #' vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
12 | #'   package = "MutationalPatterns"
13 | #' ))
14 | #'
15 | #' muts <- mutations_from_vcf(vcfs[[1]])
16 | #' @seealso
17 | #' \code{\link{read_vcfs_as_granges}}
18 | #'
19 | #' @export
20 | 
21 | mutations_from_vcf <- function(vcf) {
22 | 
23 |   # Check that no indels are present.
24 |   .check_no_indels(vcf)
25 | 
26 |   ref <- as.character(.get_ref(vcf))
27 |   alt <- as.character(unlist(.get_alt(vcf)))
28 | 
29 |   muts <- paste(ref, alt, sep = ">")
30 |   return(muts)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/pool_mut_mat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pool_mut_matrix.R
 3 | \name{pool_mut_mat}
 4 | \alias{pool_mut_mat}
 5 | \title{Pool multiple samples from a mutation matrix together}
 6 | \usage{
 7 | pool_mut_mat(mut_matrix, grouping)
 8 | }
 9 | \arguments{
10 | \item{mut_matrix}{Mutation count matrix (dimensions: x mutation types
11 | X n samples)}
12 | 
13 | \item{grouping}{Grouping variable}
14 | }
15 | \value{
16 | Mutation count matrix (dimensions: x mutation types
17 | X n groups)
18 | }
19 | \description{
20 | The mutation counts of columns (samples) are added up according to the grouping variable.
21 | }
22 | \examples{
23 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
24 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
25 |   package = "MutationalPatterns"
26 | ))
27 | grouping <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3))
28 | pool_mut_mat(mut_mat, grouping)
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-count_dbs_contexts.R:
--------------------------------------------------------------------------------
 1 | context("test-count_dbs_contexts")
 2 | 
 3 | ## Get a GRangesList object with dbs contexts.
 4 | grl_dbs_context <- readRDS(system.file("states/blood_grl_dbs_context.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | output <- count_dbs_contexts(grl_dbs_context)
 9 | expected <- readRDS(system.file("states/blood_dbs_counts.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | test_that("Output has correct class", {
14 |   expect_true(inherits(output, c("matrix")))
15 | })
16 | 
17 | test_that("Output is identical to expected", {
18 |   expect_identical(output, expected)
19 | })
20 | 
21 | 
22 | grl_dbs_context_bad <- readRDS(system.file("states/blood_grl_dbs.rds",
23 |                                         package = "MutationalPatterns"
24 | ))
25 | 
26 | 
27 | test_that("An error occurs when the context has not been set.", {
28 |     expect_error({count_dbs_contexts(grl_dbs_context_bad)},
29 |                  "There are some REF or ALT bases, that do not belong")
30 |     })
31 | 


--------------------------------------------------------------------------------
/R/mut_type.R:
--------------------------------------------------------------------------------
 1 | #' Retrieve base substitution types from a VCF object
 2 | #'
 3 | #' A function to extract the base substitutions from a vcf and translate to
 4 | #' the 6 common base substitution types.
 5 | #'
 6 | #' @param vcf A CollapsedVCF object
 7 | #' @return Character vector with base substitution types
 8 | #'
 9 | #' @examples
10 | #' ## See the 'read_vcfs_as_granges()' example for how we obtained the
11 | #' ## following data:
12 | #' vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
13 | #'   package = "MutationalPatterns"
14 | #' ))
15 | #'
16 | #' mut_type(vcfs[[1]])
17 | #' @seealso
18 | #' \code{\link{read_vcfs_as_granges}}
19 | #'
20 | #' @export
21 | 
22 | mut_type <- function(vcf) {
23 |   muts <- mutations_from_vcf(vcf)
24 |   types <- unlist(muts)
25 |   types <- gsub("G>T", "C>A", types)
26 |   types <- gsub("G>C", "C>G", types)
27 |   types <- gsub("G>A", "C>T", types)
28 |   types <- gsub("A>T", "T>A", types)
29 |   types <- gsub("A>G", "T>C", types)
30 |   types <- gsub("A>C", "T>G", types)
31 |   return(types)
32 | }
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | 
3 | Copyright (c) 2016 Cuppen Research
4 | 
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 | 
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 | 
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/tests/testthat/test-bin_mutation_density.R:
--------------------------------------------------------------------------------
 1 | context("test-bin_mutation_density")
 2 | 
 3 | 
 4 | # Read grl
 5 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
 6 |   package = "MutationalPatterns"
 7 | ))
 8 | 
 9 | ## Load the corresponding reference genome.
10 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
11 | library(ref_genome, character.only = TRUE)
12 | 
13 | ## Determine region density
14 | output <- bin_mutation_density(grl, ref_genome, nrbins = 3)
15 | 
16 | 
17 | # Use manual cutoffs
18 | output_man <- bin_mutation_density(grl, ref_genome, man_dens_cutoffs = c(0, 2e-08, 1))
19 | 
20 | 
21 | # Tests
22 | test_that("Output has correct class", {
23 |   expect_true(inherits(output, "CompressedGRangesList"))
24 |   expect_true(inherits(output_man, "CompressedGRangesList"))
25 | })
26 | 
27 | test_that("Output has correct dimensions", {
28 |   expect_equal(length(output), 3)
29 |   expect_equal(length(output_man), 2)
30 |   expect_equal(as.vector(S4Vectors::elementNROWS(output)), c(30, 11, 2))
31 |   expect_equal(as.vector(S4Vectors::elementNROWS(output_man)), c(25, 4))
32 | })
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_compare_profiles.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_compare_profiles")
 2 | 
 3 | # Load mutation data
 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Load nmf data
 9 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | # Compare profiles
14 | output <- plot_compare_profiles(mut_mat[, 1],
15 |   nmf_res$reconstructed[, 1],
16 |   profile_names = c("Original", "Reconstructed")
17 | )
18 | 
19 | output_condensed <- plot_compare_profiles(mut_mat[, 1],
20 |   nmf_res$reconstructed[, 1],
21 |   profile_names = c("Original", "Reconstructed"),
22 |   condensed = TRUE
23 | )
24 | 
25 | output_yaxis <- plot_compare_profiles(mut_mat[, 1],
26 |   nmf_res$reconstructed[, 1],
27 |   profile_ymax = 0.3,
28 |   diff_ylim = c(-0.03, 0.03)
29 | )
30 | 
31 | # Perform tests
32 | test_that("Output has correct class", {
33 |   expect_true(inherits(output, c("gg")))
34 |   expect_true(inherits(output_condensed, c("gg")))
35 |   expect_true(inherits(output_yaxis, c("gg")))
36 | })
37 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citEntry(
 2 |     entry = "article",
 3 |     title = "MutationalPatterns: The one stop shop for the analysis of mutational processes",
 4 |     author = personList(
 5 |         as.person("Freek Manders"),
 6 |         as.person("Arianne M. Brandsma"),
 7 |         as.person("Jurrian de Kanter"),
 8 |         as.person("Mark Verheul"),
 9 |         as.person("Rurika Oka"),
10 |         as.person("Markus J. van Roosmalen"),
11 |         as.person("Bastiaan van der Roest"),
12 |         as.person("Arne van Hoeck"),
13 |         as.person("Edwin Cuppen"),
14 |         as.person("Ruben van Boxtel")
15 |     ),
16 |     year = 2022,
17 |     journal = "BMC Genomics",
18 |     doi = "10.1186/s12864-022-08357-3",
19 |     textVersion =
20 |         paste("Freek Manders, Arianne M. Brandsma, Jurrian de Kanter, Mark Verheul, Rurika Oka, Markus J. van Roosmalen, Bastiaan van der Roest, Arne van Hoeck, Edwin Cuppen, Ruben van Boxtel (2022):",
21 |               "MutationalPatterns: The one stop shop for the analysis of mutational processes",
22 |               "BMC Genomics", "doi:10.1186/s12864-022-08357-3")
23 | )


--------------------------------------------------------------------------------
/tests/testthat/test-strand_occurrences.R:
--------------------------------------------------------------------------------
 1 | context("test-strand_occurrences")
 2 | 
 3 | # Read in stranded mutation matrix
 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Load a reference genome.
 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
10 | library(ref_genome, character.only = TRUE)
11 | 
12 | # Set tissue names
13 | tissue <- c(
14 |   "colon", "colon", "colon",
15 |   "intestine", "intestine", "intestine",
16 |   "liver", "liver", "liver"
17 | )
18 | 
19 | output <- strand_occurrences(mut_mat_s, by = tissue)
20 | 
21 | # Repeat for replication bias.
22 | mut_mat_repli <- readRDS(system.file("states/mut_mat_repli.rds",
23 |   package = "MutationalPatterns"
24 | ))
25 | output_repli <- strand_occurrences(mut_mat_repli, by = tissue)
26 | 
27 | # Tests
28 | test_that("Output has correct class", {
29 |   expect_true(inherits(output, c("tbl_df")))
30 |   expect_true(inherits(output_repli, c("tbl_df")))
31 | })
32 | 
33 | test_that("Output has correct size", {
34 |   expect_equal(dim(output), c(36, 5))
35 |   expect_equal(dim(output_repli), c(36, 5))
36 | })
37 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_compare_dbs.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_compare_dbs")
 2 | 
 3 | # Get dbs counts
 4 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Get dbs refit
 9 | fit_res <- readRDS(system.file("states/dbs_refit.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | # Run default function
14 | output <- plot_compare_dbs(dbs_counts[, 1], fit_res$reconstructed[, 1])
15 | 
16 | # Test you can change the name
17 | output_name <- plot_compare_dbs(dbs_counts[, 1],
18 |   fit_res$reconstructed[, 2],
19 |   profile_names = c("Original", "Reconstructed")
20 | )
21 | 
22 | ## You can also change the y limits.
23 | ## This can be done separately for the profiles and the different facets.
24 | output_yaxis <- plot_compare_dbs(dbs_counts[, 1],
25 |   fit_res$reconstructed[, 2],
26 |   profile_ymax = 0.3,
27 |   diff_ylim = c(-0.03, 0.03)
28 | )
29 | 
30 | # Perform tests
31 | test_that("Output has correct class", {
32 |   expect_true(inherits(output, c("gg")))
33 |   expect_true(inherits(output_name, c("gg")))
34 |   expect_true(inherits(output_yaxis, c("gg")))
35 | })
36 | 


--------------------------------------------------------------------------------
/man/mut_192_occurrences.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mut_192_occurrences.R
 3 | \name{mut_192_occurrences}
 4 | \alias{mut_192_occurrences}
 5 | \title{Count 192 trinucleotide mutation occurrences}
 6 | \usage{
 7 | mut_192_occurrences(type_context, strand, gr_sizes)
 8 | }
 9 | \arguments{
10 | \item{type_context}{result from type_context function}
11 | 
12 | \item{strand}{factor with strand information for each
13 | position, for example "U" for untranscribed, "T" for transcribed strand,
14 | and "-" for unknown}
15 | 
16 | \item{gr_sizes}{A vector indicating the number of variants per GRanges}
17 | }
18 | \value{
19 | Mutation matrix with 192 mutation occurrences and 96 trinucleotides
20 | for two strands
21 | }
22 | \description{
23 | @details
24 |  This function is called by mut_matrix_stranded.
25 |  The 192 trinucleotide context is the 96 trinucleotide context combined with the strands.
26 |  This function calculates the 192 trinucleotide context for all variants.
27 |  and then splits these per GRanges (samples). It then calculates how often each 192 trinucleotide context occurs.
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-cos_sim_matrix.R:
--------------------------------------------------------------------------------
 1 | context("test-cos_sim_matrix")
 2 | 
 3 | # Read signatures
 4 | signatures <- get_known_signatures()
 5 | 
 6 | 
 7 | # Read mut_matrix
 8 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 9 |   package = "MutationalPatterns"
10 | ))
11 | 
12 | 
13 | # Calculate the cosine similarity between each COSMIC signature and each 96 mutational profile
14 | output <- cos_sim_matrix(mut_mat, signatures)
15 | 
16 | # Perform tests.
17 | test_that("Output has correct class and data type", {
18 |   expect_true(inherits(output, c("matrix")))
19 |   expect_equal(typeof(output), "double")
20 | })
21 | 
22 | test_that("Output has expected size", {
23 |   expect_equal(dim(output), c(9, 60))
24 | })
25 | 
26 | mut_mat_df = as.data.frame(mut_mat)
27 | mut_mat_chr = mut_mat_df
28 | mut_mat_chr[,1] <- as.character(mut_mat_chr[,1, drop = TRUE])
29 | test_that("Non-numeric inputs give an error", {
30 |     expect_error(cos_sim_matrix(mut_mat_chr, signatures))
31 | })
32 | 
33 | test_that("Tibble inputs are converted into data.frames.", {
34 |     output2 = cos_sim_matrix(tibble::as_tibble(mut_mat_df), signatures)
35 |     expect_equal(output, output2)
36 | })


--------------------------------------------------------------------------------
/tests/testthat/test-plot_compare_indels.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_compare_indels")
 2 | 
 3 | # Get indel counts
 4 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Get indel refit
 9 | fit_res <- readRDS(system.file("states/indel_refit.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | # Run default function
14 | output <- plot_compare_indels(indel_counts[, 1], fit_res$reconstructed[, 1])
15 | 
16 | # Test you can change the name
17 | output_name <- plot_compare_indels(indel_counts[, 1],
18 |   fit_res$reconstructed[, 2],
19 |   profile_names = c("Original", "Reconstructed")
20 | )
21 | 
22 | ## You can also change the y limits.
23 | ## This can be done separately for the profiles and the different facets.
24 | output_yaxis <- plot_compare_indels(indel_counts[, 1],
25 |   fit_res$reconstructed[, 2],
26 |   profile_ymax = 0.3,
27 |   diff_ylim = c(-0.03, 0.03)
28 | )
29 | 
30 | # Perform tests
31 | test_that("Output has correct class", {
32 |   expect_true(inherits(output, c("gg")))
33 |   expect_true(inherits(output_name, c("gg")))
34 |   expect_true(inherits(output_yaxis, c("gg")))
35 | })
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_original_vs_reconstructed.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_original_vs_reconstructed")
 2 | 
 3 | # Load mutation matrix
 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Load the nmf res
 9 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | 
14 | # Load signature refit.
15 | fit_res <- readRDS(system.file("states/snv_refit.rds",
16 |   package = "MutationalPatterns"
17 | ))
18 | 
19 | # Run function
20 | output <- plot_original_vs_reconstructed(mut_mat, nmf_res$reconstructed)
21 | output_fit <- plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed)
22 | output_intercept <- plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed, y_intercept = 0.90)
23 | output_lims <- plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed, ylims = c(0, 1))
24 | 
25 | # Test
26 | test_that("Output has correct class", {
27 |   expect_true(inherits(output, c("gg")))
28 |   expect_true(inherits(output_fit, c("gg")))
29 |   expect_true(inherits(output_intercept, c("gg")))
30 |   expect_true(inherits(output_lims, c("gg")))
31 | })
32 | 


--------------------------------------------------------------------------------
/man/mut_type_occurrences.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mut_type_occurrences.R
 3 | \name{mut_type_occurrences}
 4 | \alias{mut_type_occurrences}
 5 | \title{Count the occurrences of each base substitution type}
 6 | \usage{
 7 | mut_type_occurrences(vcf_list, ref_genome)
 8 | }
 9 | \arguments{
10 | \item{vcf_list}{GRangesList or GRanges object.}
11 | 
12 | \item{ref_genome}{BSgenome reference genome object}
13 | }
14 | \value{
15 | data.frame with counts of each base substitution type for
16 | each sample in vcf_list
17 | }
18 | \description{
19 | Count the occurrences of each base substitution type
20 | }
21 | \examples{
22 | ## See the 'read_vcfs_as_granges()' example for how we obtained the
23 | ## following data:
24 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
25 |   package = "MutationalPatterns"
26 | ))
27 | 
28 | ## Load a reference genome.
29 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
30 | library(ref_genome, character.only = TRUE)
31 | 
32 | ## Get the type occurrences for all VCF objects.
33 | type_occurrences <- mut_type_occurrences(vcfs, ref_genome)
34 | }
35 | \seealso{
36 | \code{\link{read_vcfs_as_granges}},
37 | }
38 | 


--------------------------------------------------------------------------------
/tests/testthat/test-binomial_test.R:
--------------------------------------------------------------------------------
 1 | context("test-binomial_test")
 2 | 
 3 | output_signi <- binomial_test(0.5, 1200, 543)
 4 | output_notsigni <- binomial_test(0.2, 800, 170)
 5 | output_strictcutoff <- binomial_test(0.5, 1200, 543, p_cutoffs = 0.00001)
 6 | 
 7 | 
 8 | test_that("Output has correct class", {
 9 |   expect_true(inherits(output_signi, c("data.frame")))
10 |   expect_true(inherits(output_notsigni, c("data.frame")))
11 |   expect_true(inherits(output_strictcutoff, c("data.frame")))
12 | })
13 | 
14 | test_that("Output has correct size", {
15 |   expect_equal(dim(output_signi), c(1, 3))
16 |   expect_equal(dim(output_notsigni), c(1, 3))
17 |   expect_equal(dim(output_strictcutoff), c(1, 3))
18 | })
19 | 
20 | test_that("Output has correct significance level", {
21 |   expect_equal(round(output_signi$pval, 5), 0.0011)
22 |   expect_equal(round(output_notsigni$pval, 5), 0.39961)
23 |   expect_equal(round(output_strictcutoff$pval, 5), 0.0011)
24 | })
25 | 
26 | test_that("enrichment/depletion correctly determined", {
27 |   expect_equal(output_signi$effect, factor("depletion"))
28 |   expect_equal(output_notsigni$effect, factor("enrichment"))
29 |   expect_equal(output_strictcutoff$effect, factor("depletion"))
30 | })
31 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_profile_heatmap.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_profile_heatmap")
 2 | 
 3 | 
 4 | # Get input data
 5 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 6 |   package = "MutationalPatterns"
 7 | ))
 8 | 
 9 | mut_mat_extended <- readRDS(system.file("states/mut_mat_data_extended.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | 
14 | ## Create heatmap of profile
15 | output_basic <- plot_profile_heatmap(mut_mat, max = 0.1)
16 | 
17 | ## Create heatmap of extended profile
18 | output <- plot_profile_heatmap(mut_mat_extended)
19 | 
20 | ## Or plot heatmap per tissue
21 | tissue <- c(
22 |   "colon", "colon", "colon",
23 |   "intestine", "intestine", "intestine",
24 |   "liver", "liver", "liver"
25 | )
26 | 
27 | output_tissue <- plot_profile_heatmap(mut_mat_extended, by = tissue)
28 | 
29 | ## Or plot the heatmap per sample.
30 | output_sample <- plot_profile_heatmap(mut_mat_extended,
31 |   by = colnames(mut_mat_extended),
32 |   max = 0.05
33 | )
34 | 
35 | 
36 | test_that("Output has correct class", {
37 |   expect_true(inherits(output_basic, c("gg")))
38 |   expect_true(inherits(output, c("gg")))
39 |   expect_true(inherits(output_tissue, c("gg")))
40 |   expect_true(inherits(output_sample, c("gg")))
41 | })
42 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_strand.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_strand")
 2 | 
 3 | # Load stranded mutation matrix data
 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Load a reference genome.
 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
10 | library(ref_genome, character.only = TRUE)
11 | 
12 | tissue <- c(
13 |   "colon", "colon", "colon",
14 |   "intestine", "intestine", "intestine",
15 |   "liver", "liver", "liver"
16 | )
17 | 
18 | # Calculate strand counts
19 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue)
20 | 
21 | # Plot the strand in relative mode.
22 | output <- plot_strand(strand_counts)
23 | 
24 | # Plot in absolute mode.
25 | output_absolute <- plot_strand(strand_counts, mode = "absolute")
26 | 
27 | # Repeat for replication bias.
28 | mut_mat_repli <- readRDS(system.file("states/mut_mat_repli.rds",
29 |   package = "MutationalPatterns"
30 | ))
31 | strand_counts <- strand_occurrences(mut_mat_repli, by = tissue)
32 | output_repli <- plot_strand(strand_counts)
33 | 
34 | 
35 | test_that("Output has correct class", {
36 |   expect_true(inherits(output, c("gg")))
37 |   expect_true(inherits(output_absolute, c("gg")))
38 |   expect_true(inherits(output_repli, c("gg")))
39 | })
40 | 


--------------------------------------------------------------------------------
/man/get_dbs_context.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_dbs_context.R
 3 | \name{get_dbs_context}
 4 | \alias{get_dbs_context}
 5 | \title{Get DBS context}
 6 | \usage{
 7 | get_dbs_context(vcf_list)
 8 | }
 9 | \arguments{
10 | \item{vcf_list}{GRanges/GRangesList}
11 | }
12 | \value{
13 | A version of the GRanges/GRangesList object, with modified REF and ALT columns.
14 | }
15 | \description{
16 | Get the DBS COSMIC context on an GRanges/GRangesList object.
17 | It applies the get_dbs_context_gr function to each gr in the input,
18 | which works by changing the REF and ALT columns of the GRanges into the COSMIC types.
19 | }
20 | \examples{
21 | ## Get GRangesList with DBS.
22 | ## See 'get_mut_type' or 'read_vcfs_as_granges' for more info on how to do this.
23 | dbs_grl <- readRDS(system.file("states/blood_grl_dbs.rds",
24 |   package = "MutationalPatterns"
25 | ))
26 | 
27 | ## Set context DBS
28 | get_dbs_context(dbs_grl)
29 | }
30 | \seealso{
31 | \code{\link{get_mut_type}}, \code{\link{read_vcfs_as_granges}}
32 | 
33 | Other DBS: 
34 | \code{\link{count_dbs_contexts}()},
35 | \code{\link{plot_compare_dbs}()},
36 | \code{\link{plot_dbs_contexts}()},
37 | \code{\link{plot_main_dbs_contexts}()}
38 | }
39 | \concept{DBS}
40 | 


--------------------------------------------------------------------------------
/man/count_mbs_contexts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_mbs_contexts.R
 3 | \name{count_mbs_contexts}
 4 | \alias{count_mbs_contexts}
 5 | \title{Count MBS variants grouped by length.}
 6 | \usage{
 7 | count_mbs_contexts(vcf_list)
 8 | }
 9 | \arguments{
10 | \item{vcf_list}{GRanges or GRangesList object containing mbs variants.}
11 | }
12 | \value{
13 | A tibble containing the number of MBS per MBS length per gr.
14 | }
15 | \description{
16 | Count MBS variants grouped by length.
17 | }
18 | \details{
19 | Counts the number of mbs grouped by length from a GRanges or GRangesList object containing mbs variants.
20 | This is used, since a COSMIC context has to our knowledge not yet been defined.
21 | This function applies the count_mbs_contexts_gr function to each gr in its input.
22 | It then combines the results in a single tibble and returns this.
23 | }
24 | \examples{
25 | ## Get a GRangesList or GRanges object with mbs variants.
26 | mbs_grl <- readRDS(system.file("states/blood_grl_mbs.rds",
27 |   package = "MutationalPatterns"
28 | ))
29 | 
30 | # Count the MBSs
31 | count_mbs_contexts(mbs_grl)
32 | }
33 | \seealso{
34 | Other MBS: 
35 | \code{\link{plot_compare_mbs}()},
36 | \code{\link{plot_mbs_contexts}()}
37 | }
38 | \concept{MBS}
39 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_regional_similarity.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_regional_similarity")
 2 | 
 3 | 
 4 | 
 5 | # Load local_cossim object
 6 | regional_sims <- readRDS(system.file("states/regional_sims.rds",
 7 |   package = "MutationalPatterns"
 8 | ))
 9 | 
10 | # Plot the regional similarity
11 | output = plot_regional_similarity(regional_sims)
12 | 
13 | # Plot outlier samples with a different color.
14 | output_outlier = plot_regional_similarity(regional_sims, max_cossim = 0.5)
15 | 
16 | # Plot samples per chromosome
17 | output_l = plot_regional_similarity(regional_sims, per_chrom = TRUE)
18 | 
19 | # Plot samples with a rug
20 | output_rug = plot_regional_similarity(regional_sims, plot_rug = TRUE)
21 | 
22 | # Use custom x-axis breaks
23 | output_xbreaks = plot_regional_similarity(regional_sims, x_axis_breaks = c(30, 66, 300))
24 | 
25 | # Run tests
26 | test_that("Output has correct class", {
27 |     expect_true(inherits(output, c("gg")))
28 |     expect_true(inherits(output_outlier, c("gg")))
29 |     expect_true(inherits(output_l, c("list")))
30 |     expect_true(inherits(output_l[[1]], c("gg")))
31 |     expect_true(inherits(output_rug, c("gg")))
32 |     expect_true(inherits(output_xbreaks, c("gg")))
33 | })
34 | 
35 | test_that("Output per chromosome has correct length", {
36 |     expect_equal(length(output_l), 3)
37 | })


--------------------------------------------------------------------------------
/R/get_ref_alt.R:
--------------------------------------------------------------------------------
 1 | #' get REF column from GRanges
 2 | #'
 3 | #' Retreives the REF column from a GRanges object.
 4 | #' This can be spelled as REF, Ref or ref.
 5 | #'
 6 | #' @param gr GRanges object
 7 | #'
 8 | #' @return DNAStringSet
 9 | #' @noRd
10 | #'
11 | .get_ref <- function(gr) {
12 |   gr_cols <- colnames(S4Vectors::mcols(gr))
13 |   if ("REF" %in% gr_cols) {
14 |     ref <- gr$REF
15 |   } else if ("ref" %in% gr_cols) {
16 |     ref <- gr$ref
17 |   } else if ("Ref" %in% gr_cols) {
18 |     ref <- gr$Ref
19 |   } else {
20 |     stop("Some of your data is missing a REF column.", call. = FALSE)
21 |     ref <- Biostrings::DNAStringSet()
22 |   }
23 |   return(ref)
24 | }
25 | 
26 | #' get ALT column from GRanges
27 | #'
28 | #' Retreives the ALT column from a GRanges object.
29 | #' This can be spelled as ALT, Alt or alt
30 | #'
31 | #' @param gr GRanges object
32 | #'
33 | #' @return DNAStringSetList
34 | #' @noRd
35 | #'
36 | .get_alt <- function(gr) {
37 |   gr_cols <- colnames(S4Vectors::mcols(gr))
38 |   if ("ALT" %in% gr_cols) {
39 |     alt <- gr$ALT
40 |   } else if ("alt" %in% gr_cols) {
41 |     alt <- gr$alt
42 |   } else if ("Alt" %in% gr_cols) {
43 |     alt <- gr$Alt
44 |   } else {
45 |     stop("Some of your data is missing a ALT column.", call. = FALSE)
46 |     alt <- Biostrings::DNAStringSetList()
47 |   }
48 |   return(alt)
49 | }
50 | 


--------------------------------------------------------------------------------
/R/pool_mut_matrix.R:
--------------------------------------------------------------------------------
 1 | #' Pool multiple samples from a mutation matrix together
 2 | #'
 3 | #' The mutation counts of columns (samples) are added up according to the grouping variable.
 4 | #'
 5 | #' @param mut_matrix Mutation count matrix (dimensions: x mutation types
 6 | #' X n samples)
 7 | #' @param grouping Grouping variable
 8 | #'
 9 | #' @return Mutation count matrix (dimensions: x mutation types
10 | #' X n groups)
11 | #' @export
12 | #' @importFrom magrittr %>%
13 | #'
14 | #' @examples
15 | #' ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
16 | #' mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
17 | #'   package = "MutationalPatterns"
18 | #' ))
19 | #' grouping <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3))
20 | #' pool_mut_mat(mut_mat, grouping)
21 | pool_mut_mat <- function(mut_matrix, grouping) {
22 |   # These variables use non standard evaluation.
23 |   # To avoid R CMD check complaints we initialize them to NULL.
24 |   . <- NULL
25 | 
26 |   grouping <- factor(grouping)
27 |   mut_mat_group <- mut_matrix %>%
28 |     t(.) %>%
29 |     tibble::as_tibble() %>%
30 |     dplyr::mutate(factor = grouping) %>%
31 |     dplyr::group_by(factor) %>%
32 |     dplyr::summarise_all(sum) %>%
33 |     dplyr::select(-factor) %>%
34 |     t(.)
35 |   colnames(mut_mat_group) <- levels(grouping)
36 |   return(mut_mat_group)
37 | }
38 | 


--------------------------------------------------------------------------------
/man/convert_sigs_to_ref.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert_sigs_to_ref.R
 3 | \name{convert_sigs_to_ref}
 4 | \alias{convert_sigs_to_ref}
 5 | \title{Convert tissue specific signature exposures to reference}
 6 | \usage{
 7 | convert_sigs_to_ref(fit_res)
 8 | }
 9 | \arguments{
10 | \item{fit_res}{Named list with signature contributions and reconstructed
11 | mutation matrix}
12 | }
13 | \value{
14 | The input fit_res, but with converted signature contributions.
15 | }
16 | \description{
17 | This function converts tissue specific signature contributions into
18 | reference signature contributions. This works on SNV signatures from SIGNAL.
19 | It uses a conversion matrix to do the conversion.
20 | The output can include possible artifact signatures.
21 | }
22 | \examples{
23 | 
24 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
25 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
26 |   package = "MutationalPatterns"
27 | ))
28 | 
29 | ## Get tissue specific signatures
30 | signatures <- get_known_signatures(source = "SIGNAL", sig_type = "tissue", tissue_type = "Skin")
31 | 
32 | ## Fit tissue specific signatures
33 | fit_res <- fit_to_signatures(mut_mat, signatures)
34 | 
35 | ## Convert the tissue specific signatures exposures to reference
36 | fit_res <- convert_sigs_to_ref(fit_res)
37 | }
38 | 


--------------------------------------------------------------------------------
/R/binomial_test.R:
--------------------------------------------------------------------------------
 1 | #' Binomial test for enrichment or depletion testing
 2 | #'
 3 | #' This function performs lower-tail binomial test for depletion and
 4 | #' upper-tail test for enrichment
 5 | #'
 6 | #' @param p Probability of success
 7 | #' @param n Number of trials
 8 | #' @param x Observed number of successes
 9 | #' @param p_cutoffs Significance cutoff for the p value. Default: 0.05
10 | #' @return A data.frame with direction of effect (enrichment/depletion),
11 | #' P-value and significance asterisks
12 | #'
13 | #' @examples
14 | #' binomial_test(0.5, 1200, 543)
15 | #' binomial_test(0.2, 800, 150)
16 | #' @export
17 | 
18 | binomial_test <- function(p, n, x, p_cutoffs = 0.05) {
19 |   # Calculate expected number of successes
20 |   expected <- p * n
21 | 
22 |   # Handle depletion
23 |   if (x < expected) {
24 |     # do lower tail test
25 |     pval <- stats::pbinom(x, n, p, lower.tail = TRUE)
26 |     effect <- "depletion"
27 |   }
28 | 
29 |   # Handle enrichment
30 |   else {
31 |     # do upper tail test
32 |     pval <- stats::pbinom(x - 1, n, p, lower.tail = FALSE)
33 |     effect <- "enrichment"
34 |   }
35 | 
36 |   # make test two sided.
37 |   pval <- 2 * min(pval, 1 - pval)
38 | 
39 |   # Add significance asteriks
40 |   significant <- .get_sig_star(pval, p_cutoffs)
41 | 
42 |   res <- data.frame("effect" = factor(effect), pval, significant)
43 |   return(res)
44 | }
45 | 


--------------------------------------------------------------------------------
/man/cluster_signatures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cluster_signatures.R
 3 | \name{cluster_signatures}
 4 | \alias{cluster_signatures}
 5 | \title{Signature clustering function}
 6 | \usage{
 7 | cluster_signatures(signatures, method = "complete")
 8 | }
 9 | \arguments{
10 | \item{signatures}{Matrix with 96 trinucleotides (rows) and any number of
11 | signatures (columns)}
12 | 
13 | \item{method}{The agglomeration method to be used for hierarchical
14 | clustering. This should be one of "ward.D", "ward.D2", "single", "complete",
15 | "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or
16 | "centroid" (= UPGMC). Default = "complete".}
17 | }
18 | \value{
19 | hclust object
20 | }
21 | \description{
22 | Hierarchical clustering of signatures based on cosine similarity
23 | }
24 | \examples{
25 | ## Get signatures
26 | signatures <- get_known_signatures()
27 | 
28 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
29 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
30 |   package = "MutationalPatterns"
31 | ))
32 | 
33 | 
34 | ## Hierarchically cluster the cancer signatures based on cosine similarity
35 | hclust_signatures <- cluster_signatures(signatures)
36 | 
37 | ## Plot dendrogram
38 | plot(hclust_signatures)
39 | }
40 | \seealso{
41 | \code{\link{plot_contribution_heatmap}}
42 | }
43 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_enrichment_depletion.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_enrichment_depletion")
 2 | 
 3 | # Read distribution data
 4 | distr <- readRDS(system.file("states/distr_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | # Set tissue
 8 | tissue <- c(
 9 |   "colon", "colon", "colon",
10 |   "intestine", "intestine", "intestine",
11 |   "liver", "liver", "liver"
12 | )
13 | 
14 | ## Perform the enrichment/depletion test.
15 | distr_test <- enrichment_depletion_test(distr, by = tissue)
16 | distr_test2 <- enrichment_depletion_test(distr)
17 | 
18 | ## Plot the enrichment/depletion
19 | output <- plot_enrichment_depletion(distr_test)
20 | output_persample <- plot_enrichment_depletion(distr_test2)
21 | 
22 | ## Test with p instead of fdr
23 | output_pval <- plot_enrichment_depletion(distr_test, sig_type = "p")
24 | 
25 | ## Use multiple (max 3) significance cutoffs.
26 | distr_multistars <- enrichment_depletion_test(distr,
27 |   by = tissue,
28 |   p_cutoffs = c(0.05, 0.01, 0.005),
29 |   fdr_cutoffs = c(0.1, 0.05, 0.01)
30 | )
31 | output_multistars <- plot_enrichment_depletion(distr_multistars)
32 | 
33 | 
34 | # Perform tests
35 | test_that("Output has correct class", {
36 |   expect_true(inherits(output, c("gg")))
37 |   expect_true(inherits(output_persample, c("gg")))
38 |   expect_true(inherits(output_pval, c("gg")))
39 |   expect_true(inherits(output_multistars, c("gg")))
40 | })
41 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fit_to_signatures_bootstrapped.R:
--------------------------------------------------------------------------------
 1 | context("test-fit_to_signatures_bootstrapped")
 2 | 
 3 | # Get mut_mat
 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Get signatures
 9 | signatures <- get_known_signatures()
10 | 
11 | test_that("Output has correct class", {
12 |   output <- fit_to_signatures_bootstrapped(mut_mat, signatures, n_boots = 2, max_delta = 0.05)
13 |   expect_true(inherits(output, "matrix"))
14 | 
15 |   output_ori <- fit_to_signatures_bootstrapped(mut_mat, signatures, n_boots = 2, max_delta = 0.05, method = "regular")
16 |   expect_true(inherits(output_ori, "matrix"))
17 | 
18 |   output_ori_10 <- fit_to_signatures_bootstrapped(mut_mat, signatures, n_boots = 2, max_delta = 0.05, method = "regular_10+")
19 |   expect_true(inherits(output_ori_10, "matrix"))
20 |   
21 |   output_best <- fit_to_signatures_bootstrapped(mut_mat, signatures[,1:5], n_boots = 2, max_delta = 0.05, method = "strict_best_subset")
22 |   expect_true(inherits(output_best, "matrix"))
23 | })
24 | 
25 | expected <- readRDS(system.file("states/bootstrapped_snv_refit.rds",
26 |   package = "MutationalPatterns"
27 | ))
28 | 
29 | test_that("Output is equal to expected", {
30 |   set.seed(42)
31 |   output <- fit_to_signatures_bootstrapped(mut_mat, signatures, n_boots = 2, max_delta = 0.05)
32 |   expect_equal(output, expected)
33 | })
34 | 


--------------------------------------------------------------------------------
/man/type_context.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/type_context.R
 3 | \name{type_context}
 4 | \alias{type_context}
 5 | \title{Retrieve context of base substitution types}
 6 | \usage{
 7 | type_context(vcf, ref_genome, extension = 1)
 8 | }
 9 | \arguments{
10 | \item{vcf}{A CollapsedVCF object}
11 | 
12 | \item{ref_genome}{Reference genome}
13 | 
14 | \item{extension}{The number of bases, that's extracted upstream and
15 | downstream of the base substitutions. (Default: 1).}
16 | }
17 | \value{
18 | Mutation types and context character vectors in a named list
19 | }
20 | \description{
21 | A function to extract the bases 3' upstream and 5' downstream of the base
22 | substitution types.
23 | }
24 | \examples{
25 | ## See the 'read_vcfs_as_granges()' example for how we obtained the
26 | ## following data:
27 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
28 |   package = "MutationalPatterns"
29 | ))
30 | 
31 | ## Load the corresponding reference genome.
32 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
33 | library(ref_genome, character.only = TRUE)
34 | 
35 | ## Get type context
36 | type_context <- type_context(vcfs[[1]], ref_genome)
37 | 
38 | ## Get larger type context
39 | type_context_larger <- type_context(vcfs[[1]], ref_genome, extension = 2)
40 | }
41 | \seealso{
42 | \code{\link{read_vcfs_as_granges}},
43 | \code{\link{mut_context}}
44 | }
45 | 


--------------------------------------------------------------------------------
/man/plot_river.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_river.R
 3 | \name{plot_river}
 4 | \alias{plot_river}
 5 | \title{Plot a riverplot}
 6 | \usage{
 7 | plot_river(mut_matrix, condensed = FALSE)
 8 | }
 9 | \arguments{
10 | \item{mut_matrix}{Matrix containing mutation counts.}
11 | 
12 | \item{condensed}{More condensed plotting format. Default = F.}
13 | }
14 | \value{
15 | A ggplot object
16 | }
17 | \description{
18 | Function to plot a SNV mutation matrix as a riverplot.
19 | This is especially useful when looking at a wide
20 | mutational context
21 | }
22 | \examples{
23 | 
24 | ## See the 'mut_matrix()' examples for how we obtained the
25 | ## mutation matrix information:
26 | ## Get regular matrix
27 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
28 |   package = "MutationalPatterns"
29 | ))
30 | 
31 | ## Create heatmap of profile
32 | plot_river(mut_mat[,c(1,4)])
33 | 
34 | ## Get extended matrix
35 | mut_mat_extended <- readRDS(system.file("states/mut_mat_data_extended.rds",
36 |   package = "MutationalPatterns"
37 | ))
38 | 
39 | ## Create heatmap of extended profile
40 | plot_river(mut_mat_extended[,c(1,4)])
41 | 
42 | ## Create condensed version of riverplot
43 | plot_river(mut_mat_extended[,c(1,4)], condensed = TRUE)
44 | 
45 | }
46 | \seealso{
47 | \code{\link{mut_matrix}},
48 | \code{\link{plot_96_profile}},
49 | \code{\link{plot_profile_heatmap}}
50 | }
51 | 


--------------------------------------------------------------------------------
/R/get_sig_start.R:
--------------------------------------------------------------------------------
 1 | #' Determine the number of significance stars
 2 | #'
 3 | #' The number of significance stars is determined based on the statistical value
 4 | #' and the significance cutoffs.
 5 | #'
 6 | #' @param val Statistical value. Either a p value or fdr.
 7 | #' @param cutoffs Significance cutoffs for the statistical value.
 8 | #'
 9 | #' @return A vector of significance stars and empty strings (not significant).
10 | #' @noRd
11 | #'
12 | .get_sig_star <- function(val, cutoffs) {
13 | 
14 |   # Get name of cutoffs argument
15 |   cutoffs_name <- deparse(substitute(cutoffs))
16 | 
17 |   # Validate cutoff argument
18 |   if (length(cutoffs) > 3) {
19 |     stop(paste0("The length of the ", cutoffs_name, " argument can't be higher than 3."),
20 |       call. = FALSE
21 |     )
22 |   }
23 | 
24 |   if (!all.equal(cutoffs, sort(cutoffs, decreasing = TRUE))) {
25 |     stop(paste0("The ", cutoffs_name, " argument should be in decreasing order."),
26 |       call. = FALSE
27 |     )
28 |   }
29 | 
30 |   # Add -Infs to cutoffs if the length is lower than 3.
31 |   # Since a val cant be lower than -Inf, these cutoffs will never be reached.
32 |   cutoffs <- c(cutoffs, rep(-Inf, 3 - length(cutoffs)))
33 | 
34 | 
35 |   # Determine significance level
36 |   stars <- dplyr::case_when(
37 |     val < cutoffs[3] ~ "***",
38 |     val < cutoffs[2] ~ "**",
39 |     val < cutoffs[1] ~ "*",
40 |     TRUE ~ ""
41 |   )
42 |   return(stars)
43 | }
44 | 


--------------------------------------------------------------------------------
/man/count_dbs_contexts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_dbs_contexts.R
 3 | \name{count_dbs_contexts}
 4 | \alias{count_dbs_contexts}
 5 | \title{Count DBS contexts}
 6 | \usage{
 7 | count_dbs_contexts(vcf_list)
 8 | }
 9 | \arguments{
10 | \item{vcf_list}{GRanges or GRangesList object containing DBS mutations in which the context was added with get_dbs_context.}
11 | }
12 | \value{
13 | A tibble containing the number of DBS per COSMIC context per gr.
14 | }
15 | \description{
16 | Count DBS contexts
17 | }
18 | \details{
19 | Counts the number of DBS per COSMIC context from a GRanges or GRangesList object containing DBS variants.
20 | This function applies the count_dbs_contexts_gr function to each gr in its input.
21 | It then combines the results in a single tibble and returns this.
22 | }
23 | \examples{
24 | ## Get a GRangesList or GRanges object with DBS contexts.
25 | ## See 'dbs_get_context' for more info on how to do this.
26 | grl_dbs_context <- readRDS(system.file("states/blood_grl_dbs_context.rds",
27 |   package = "MutationalPatterns"
28 | ))
29 | 
30 | # Count the DBS contexts
31 | count_dbs_contexts(grl_dbs_context)
32 | }
33 | \seealso{
34 | \code{\link{get_dbs_context}}
35 | 
36 | Other DBS: 
37 | \code{\link{get_dbs_context}()},
38 | \code{\link{plot_compare_dbs}()},
39 | \code{\link{plot_dbs_contexts}()},
40 | \code{\link{plot_main_dbs_contexts}()}
41 | }
42 | \concept{DBS}
43 | 


--------------------------------------------------------------------------------
/man/mut_context.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mut_context.R
 3 | \name{mut_context}
 4 | \alias{mut_context}
 5 | \title{Retrieve context of base substitutions}
 6 | \usage{
 7 | mut_context(vcf, ref_genome, extension = 1)
 8 | }
 9 | \arguments{
10 | \item{vcf}{A Granges object}
11 | 
12 | \item{ref_genome}{Reference genome}
13 | 
14 | \item{extension}{The number of bases, that's extracted upstream and
15 | downstream of the base substitutions. (Default: 1).}
16 | }
17 | \value{
18 | Character vector with the context of the base substitutions
19 | }
20 | \description{
21 | A function to extract the bases 3' upstream and 5' downstream of the base
22 | substitutions from the reference genome. The user an choose how many bases
23 | are extracted.
24 | }
25 | \examples{
26 | ## See the 'read_vcfs_as_granges()' example for how we obtained the
27 | ## following data:
28 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
29 |   package = "MutationalPatterns"
30 | ))
31 | 
32 | ## Load the corresponding reference genome.
33 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
34 | library(ref_genome, character.only = TRUE)
35 | 
36 | ## Get the standard context
37 | mut_context <- mut_context(vcfs[[1]], ref_genome)
38 | 
39 | ## Get larger context
40 | mut_context_larger <- mut_context(vcfs[[1]], ref_genome, extension = 2)
41 | }
42 | \seealso{
43 | \code{\link{read_vcfs_as_granges}},
44 | }
45 | 


--------------------------------------------------------------------------------
/man/plot_mbs_contexts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_mbs_contexts.R
 3 | \name{plot_mbs_contexts}
 4 | \alias{plot_mbs_contexts}
 5 | \title{Plot the MBS contexts}
 6 | \usage{
 7 | plot_mbs_contexts(counts, same_y = TRUE)
 8 | }
 9 | \arguments{
10 | \item{counts}{A tibble containing the number of MBS per MBS length.}
11 | 
12 | \item{same_y}{A boolean describing whether the same y axis should be used for all samples.}
13 | }
14 | \value{
15 | A ggplot figure.
16 | }
17 | \description{
18 | Plot the MBS contexts
19 | }
20 | \details{
21 | Plots the number of MBS per MBS length per sample.
22 | It takes a tibble with counts as its input. This tibble can be generated by count_mbs_contexts
23 | Each sample is plotted in a separate facet.
24 | The same y axis can be used for all samples or a separate y axis can be used.
25 | }
26 | \examples{
27 | ## Get The mbs counts
28 | ## See 'count_mbs_contexts()' for more info on how to do this.
29 | mbs_counts <- readRDS(system.file("states/blood_mbs_counts.rds",
30 |   package = "MutationalPatterns"
31 | ))
32 | 
33 | ## Plot contexts
34 | plot_mbs_contexts(mbs_counts)
35 | 
36 | ## Use a different y axis for all samples.
37 | plot_mbs_contexts(mbs_counts, same_y = FALSE)
38 | }
39 | \seealso{
40 | \code{\link{count_mbs_contexts}}
41 | 
42 | Other MBS: 
43 | \code{\link{count_mbs_contexts}()},
44 | \code{\link{plot_compare_mbs}()}
45 | }
46 | \concept{MBS}
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-convert_sigs_to_ref.R:
--------------------------------------------------------------------------------
 1 | context("test-convert_sigs_to_ref")
 2 | 
 3 | # Load mutation matrix
 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Get signatures
 9 | signatures <- get_known_signatures(source = "SIGNAL", sig_type = "tissue", tissue_type = "Skin")
10 | 
11 | # Fit tissue specific signatures
12 | fit_res <- fit_to_signatures(mut_mat, signatures)
13 | 
14 | # Convert the tissue specific signatures exposures to reference
15 | output <- convert_sigs_to_ref(fit_res)
16 | 
17 | # Run tests
18 | test_that("Output has correct class", {
19 |   expect_true(inherits(output, "list"))
20 |   expect_true(inherits(output$contribution, "matrix"))
21 |   expect_true(inherits(output$reconstructed, "matrix"))
22 | })
23 | 
24 | test_that("Output has correct dimensions", {
25 |   expect_equal(dim(output$contribution), c(38, 9))
26 | })
27 | 
28 | test_that("Nr. mutations hasn't changed", {
29 |   expect_equal(colSums(output$contribution), colSums(fit_res$contribution))
30 | })
31 | 
32 | # Test that an error is thrown when the sig names don't match.
33 | fit_res_badname <- fit_res
34 | rownames(fit_res_badname$contribution)[1] <- "fakename"
35 | test_that("An error is thrown when the sig names don't match", {
36 |   expect_error(
37 |     {
38 |       convert_sigs_to_ref(fit_res_badname)
39 |     },
40 |     "The signature names of the fit_res don't match that of"
41 |   )
42 | })
43 | 


--------------------------------------------------------------------------------
/tests/testthat/test-context_potential_damage_analysis.R:
--------------------------------------------------------------------------------
 1 | context("test-context_potential_damage_analysis")
 2 | 
 3 | 
 4 | # Get contexts
 5 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 6 |   package = "MutationalPatterns"
 7 | ))
 8 | 
 9 | contexts <- rownames(mut_mat)[1:6]
10 | 
11 | # Load the corresponding reference genome.
12 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
13 | library(ref_genome, character.only = TRUE)
14 | 
15 | # Load transcription database
16 | library("TxDb.Hsapiens.UCSC.hg19.knownGene")
17 | txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
18 | 
19 | # Set gene ids
20 | # TP53
21 | gene_ids <- c(7157)
22 | 
23 | # Run the function
24 | output <- context_potential_damage_analysis(contexts, txdb, ref_genome, gene_ids)
25 | 
26 | # Run the function with verbosity
27 | output_verbose <- context_potential_damage_analysis(contexts, txdb, ref_genome, gene_ids, verbose = TRUE)
28 | 
29 | # Run unit tests
30 | test_that("Output has correct class", {
31 |   expect_true(inherits(output, "tbl_df"))
32 |   expect_true(inherits(output_verbose, "tbl_df"))
33 | })
34 | 
35 | test_that("Output has correct size", {
36 |   expect_equal(dim(output), c(24, 5))
37 |   expect_equal(dim(output_verbose), c(24, 5))
38 | })
39 | 
40 | # Expected
41 | expected <- readRDS(system.file("states/context_mismatches.rds",
42 |   package = "MutationalPatterns"
43 | ))
44 | 
45 | test_that("Output is equal to expected", {
46 |   expect_equal(output, expected)
47 | })
48 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_lesion_segregation.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_lesion_segregation")
 2 | 
 3 | 
 4 | # Load GRangesList
 5 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
 6 |   package = "MutationalPatterns"
 7 | ))
 8 | 
 9 | # Only use two samples to reduce runtime
10 | grl <- grl[1:2]
11 | 
12 | # Select sample
13 | gr <- grl[[1]]
14 | 
15 | # Perform function
16 | output <- plot_lesion_segregation(grl)
17 | output_singlesample <- plot_lesion_segregation(gr, sample_name = "Colon1")
18 | output_noname <- plot_lesion_segregation(gr)
19 | output_l <- plot_lesion_segregation(gr, per_chrom = TRUE, sample_name = "Colon1")
20 | output_chr_filter = plot_lesion_segregation(grl, chromosomes = c("chr2", "chr3"))
21 | output_chr_filter2 = plot_lesion_segregation(grl, chromosomes = c("2", "3"))
22 | output_subsample <- plot_lesion_segregation(grl, subsample = 0.1)
23 | 
24 | test_that("Output has correct class", {
25 |   expect_true(inherits(output, c("gg")))
26 |   expect_true(inherits(output_singlesample, c("gg")))
27 |   expect_true(inherits(output_noname, c("gg")))
28 |   expect_true(inherits(output_l, c("list")))
29 |   expect_true(inherits(output_l[[1]], c("gg")))
30 |   expect_true(inherits(output_chr_filter, c("gg")))
31 |   expect_true(inherits(output_chr_filter2, c("gg")))
32 |   expect_true(inherits(output_subsample, c("gg")))
33 | })
34 | 
35 | test_that("Output per chromosome has correct length", {
36 |   expect_equal(length(output_l), 22)
37 | })
38 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mut_type_occurrences.R:
--------------------------------------------------------------------------------
 1 | context("mut_type_occurrences")
 2 | 
 3 | # Read vcfs
 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Load a reference genome.
 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
10 | library(ref_genome, character.only = TRUE)
11 | 
12 | # Get the type occurrences for all VCF objects.
13 | output <- mut_type_occurrences(vcfs, ref_genome)
14 | 
15 | # Get type occurence for single sample
16 | output_single_sample <- mut_type_occurrences(vcfs[[1]], ref_genome)
17 | 
18 | # Get type occurence for few muts
19 | output_fewmuts <- mut_type_occurrences(vcfs[[1]][1:2], ref_genome)
20 | 
21 | test_that("Output has correct class", {
22 |   expect_true(inherits(output, "data.frame"))
23 |   expect_true(inherits(output_single_sample, "data.frame"))
24 |   expect_true(inherits(output_fewmuts, "data.frame"))
25 | })
26 | 
27 | test_that("Outpus has correct dimensions", {
28 |   expect_equal(dim(output), c(9, 8))
29 |   expect_equal(dim(output_single_sample), c(1, 8))
30 |   expect_equal(dim(output_fewmuts), c(1, 8))
31 | })
32 | 
33 | test_that("Transforms correctly", {
34 |   expect_equal(
35 |     output_single_sample,
36 |     structure(list(
37 |       `C>A` = 28L, `C>G` = 5L, `C>T` = 109L, `T>A` = 12L,
38 |       `T>C` = 30L, `T>G` = 12L, `C>T at CpG` = 59L, `C>T other` = 50L
39 |     ),
40 |     row.names = "My_sample", class = "data.frame"
41 |     )
42 |   )
43 | })
44 | 


--------------------------------------------------------------------------------
/man/cos_sim_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cos_sim_matrix.R
 3 | \name{cos_sim_matrix}
 4 | \alias{cos_sim_matrix}
 5 | \title{Compute all pairwise cosine similarities between mutational profiles/signatures}
 6 | \usage{
 7 | cos_sim_matrix(mut_matrix1, mut_matrix2)
 8 | }
 9 | \arguments{
10 | \item{mut_matrix1}{mutation count matrix (dimensions: a mutation features X n samples)}
11 | 
12 | \item{mut_matrix2}{96 mutation count matrix (dimensions: a mutation features X m samples)}
13 | }
14 | \value{
15 | Matrix with pairwise cosine similarities (dimensions: n mutational profiles X m mutational profiles)
16 | }
17 | \description{
18 | Computes all pairwise cosine similarities between the mutational profiles provided in the two mutation count matrices.
19 | The cosine similarity is a value between 0 (distinct) and 1 (identical) and indicates how much two vectors are alike.
20 | }
21 | \examples{
22 | ## Get signatures
23 | signatures <- get_known_signatures()
24 | 
25 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
26 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
27 |   package = "MutationalPatterns"
28 | ))
29 | 
30 | 
31 | ## Calculate the cosine similarity between each COSMIC signature and each 96 mutational profile
32 | cos_sim_matrix(mut_mat, signatures)
33 | }
34 | \seealso{
35 | \code{\link{mut_matrix}},
36 | \code{\link{fit_to_signatures}},
37 | \code{\link{plot_cosine_heatmap}}
38 | }
39 | 


--------------------------------------------------------------------------------
/man/plot_correlation_bootstrap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_correlation_bootstrap.R
 3 | \name{plot_correlation_bootstrap}
 4 | \alias{plot_correlation_bootstrap}
 5 | \title{Plots the correlation between bootstrapped signature contributions}
 6 | \usage{
 7 | plot_correlation_bootstrap(contri_boots, per_sample = TRUE)
 8 | }
 9 | \arguments{
10 | \item{contri_boots}{A dataframe with bootstrapped signature contributions.}
11 | 
12 | \item{per_sample}{Whether or not a plot should be made per sample. Default: TRUE.}
13 | }
14 | \value{
15 | A list of ggplot2 objects if run per sample.
16 | Else it returns a single ggplot2 object.
17 | }
18 | \description{
19 | This function plots the pearson correlation between signatures.
20 | This can be done per sample or for all samples together.
21 | It returns a list of the created figures.
22 | }
23 | \examples{
24 | 
25 | ## Get a dataframe with bootstrapped signature contributions.
26 | ## See 'fit_to_signatures_bootstrapped()' for how to do this.
27 | contri_boots <- readRDS(system.file("states/bootstrapped_snv_refit.rds",
28 |   package = "MutationalPatterns"
29 | ))
30 | 
31 | ## Plot the correlations between signatures per sample
32 | fig_l <- plot_correlation_bootstrap(contri_boots)
33 | 
34 | ## Look at the figure of the first sample.
35 | fig_l[[1]]
36 | 
37 | ## You can also look at the correlation for all samples combined
38 | plot_correlation_bootstrap(contri_boots, per_sample = FALSE)
39 | }
40 | 


--------------------------------------------------------------------------------
/man/plot_signature_strand_bias.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_signature_strand_bias.R
 3 | \name{plot_signature_strand_bias}
 4 | \alias{plot_signature_strand_bias}
 5 | \title{Plot signature strand bias}
 6 | \usage{
 7 | plot_signature_strand_bias(signatures_strand_bias)
 8 | }
 9 | \arguments{
10 | \item{signatures_strand_bias}{Signature matrix with 192 features}
11 | }
12 | \value{
13 | Barplot
14 | }
15 | \description{
16 | Plot strand bias per mutation type for each signature.
17 | }
18 | \examples{
19 | ## See the 'mut_matrix()' example for how we obtained the following
20 | ## mutation matrix.
21 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
22 |   package = "MutationalPatterns"
23 | ))
24 | 
25 | ## Extracting signatures can be computationally intensive, so
26 | ## we use pre-computed data generated with the following command:
27 | # nmf_res_strand <- extract_signatures(mut_mat_s, rank = 2)
28 | 
29 | nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds",
30 |   package = "MutationalPatterns"
31 | ))
32 | 
33 | ## Provide column names for the plot.
34 | colnames(nmf_res_strand$signatures) <- c("Signature A", "Signature B")
35 | 
36 | ## Creat figure
37 | plot_signature_strand_bias(nmf_res_strand$signatures)
38 | 
39 | ## You can also plot the bias of samples
40 | plot_signature_strand_bias(mut_mat_s[, c(1, 2)])
41 | }
42 | \seealso{
43 | \code{link{extract_signatures}},
44 | \code{link{mut_matrix}}
45 | }
46 | 


--------------------------------------------------------------------------------
/man/count_indel_contexts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count_indel_contexts.R
 3 | \name{count_indel_contexts}
 4 | \alias{count_indel_contexts}
 5 | \title{Count indel contexts}
 6 | \usage{
 7 | count_indel_contexts(vcf_list)
 8 | }
 9 | \arguments{
10 | \item{vcf_list}{GRanges or GRangesList object containing indel mutations in which the context was added with get_indel_context.}
11 | }
12 | \value{
13 | A tibble containing the number of indels per COSMIC context per gr.
14 | }
15 | \description{
16 | Count indel contexts
17 | }
18 | \details{
19 | Counts the number of indels per COSMIC context from a GRanges or GRangesList object containing indel mutations.
20 | This function applies the count_indel_contexts_gr function to each gr in its input.
21 | It then combines the results in a single tibble and returns this.
22 | }
23 | \examples{
24 | ## Get a GRangesList or GRanges object with indel contexts.
25 | ## See 'indel_get_context' for more info on how to do this.
26 | grl_indel_context <- readRDS(system.file("states/blood_grl_indel_context.rds",
27 |   package = "MutationalPatterns"
28 | ))
29 | 
30 | # Count the indel contexts
31 | count_indel_contexts(grl_indel_context)
32 | }
33 | \seealso{
34 | \code{\link{get_indel_context}}
35 | 
36 | Other Indels: 
37 | \code{\link{get_indel_context}()},
38 | \code{\link{plot_compare_indels}()},
39 | \code{\link{plot_indel_contexts}()},
40 | \code{\link{plot_main_indel_contexts}()}
41 | }
42 | \concept{Indels}
43 | 


--------------------------------------------------------------------------------
/man/mut_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mut_matrix.R
 3 | \name{mut_matrix}
 4 | \alias{mut_matrix}
 5 | \title{Make mutation count matrix of 96 trinucleotides}
 6 | \usage{
 7 | mut_matrix(vcf_list, ref_genome, extension = 1)
 8 | }
 9 | \arguments{
10 | \item{vcf_list}{GRangesList or GRanges object.}
11 | 
12 | \item{ref_genome}{BSgenome reference genome object}
13 | 
14 | \item{extension}{The number of bases, that's extracted upstream and
15 | downstream of the base substitutions. (Default: 1).}
16 | }
17 | \value{
18 | 96 mutation count matrix
19 | }
20 | \description{
21 | Make 96 trinucleotide mutation count matrix
22 | }
23 | \examples{
24 | ## See the 'read_vcfs_as_granges()' example for how we obtained the
25 | ## following data:
26 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
27 |   package = "MutationalPatterns"
28 | ))
29 | 
30 | ## Load the corresponding reference genome.
31 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
32 | library(ref_genome, character.only = TRUE)
33 | 
34 | ## Construct a mutation matrix from the loaded VCFs in comparison to the
35 | ## ref_genome.
36 | mut_mat <- mut_matrix(vcf_list = grl, ref_genome = ref_genome)
37 | 
38 | ## Construct a mutation matrix with a larger context.
39 | ## This is most usefull when you have many mutations per sample.
40 | mut_mat_extended <- mut_matrix(vcf_list = grl, ref_genome = ref_genome, extension = 2)
41 | }
42 | \seealso{
43 | \code{\link{read_vcfs_as_granges}}
44 | }
45 | 


--------------------------------------------------------------------------------
/R/cluster_signatures.R:
--------------------------------------------------------------------------------
 1 | #' Signature clustering function
 2 | #'
 3 | #' Hierarchical clustering of signatures based on cosine similarity
 4 | #'
 5 | #' @param signatures Matrix with 96 trinucleotides (rows) and any number of
 6 | #' signatures (columns)
 7 | #' @param method     The agglomeration method to be used for hierarchical
 8 | #' clustering. This should be one of "ward.D", "ward.D2", "single", "complete",
 9 | #' "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or
10 | #' "centroid" (= UPGMC). Default = "complete".
11 | #' @return hclust object
12 | #'
13 | #' @examples
14 | #' ## Get signatures
15 | #' signatures <- get_known_signatures()
16 | #'
17 | #' ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
18 | #' mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
19 | #'   package = "MutationalPatterns"
20 | #' ))
21 | #'
22 | #'
23 | #' ## Hierarchically cluster the cancer signatures based on cosine similarity
24 | #' hclust_signatures <- cluster_signatures(signatures)
25 | #'
26 | #' ## Plot dendrogram
27 | #' plot(hclust_signatures)
28 | #' @seealso
29 | #' \code{\link{plot_contribution_heatmap}}
30 | #'
31 | #' @export
32 | 
33 | cluster_signatures <- function(signatures, method = "complete") {
34 |   # construct cosine similarity matrix
35 |   sim <- cos_sim_matrix(signatures, signatures)
36 |   # transform to distance
37 |   dist <- as.dist(1 - sim)
38 |   # perform hierarchical clustering
39 |   hc_sig_cos <- hclust(dist, method = method)
40 |   return(hc_sig_cos)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/strand_occurrences.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/strand_occurrences.R
 3 | \name{strand_occurrences}
 4 | \alias{strand_occurrences}
 5 | \title{Count occurrences per base substitution type and strand}
 6 | \usage{
 7 | strand_occurrences(mut_mat_s, by = NA)
 8 | }
 9 | \arguments{
10 | \item{mut_mat_s}{192 feature mutation count matrix, result from
11 | 'mut_matrix_stranded()'}
12 | 
13 | \item{by}{Character vector with grouping info, optional}
14 | }
15 | \value{
16 | A data.frame with the total number of mutations and relative
17 | contribution within group per base substitution type and strand
18 | }
19 | \description{
20 | For each base substitution type and strand the total number
21 | of mutations and the relative contribution within a group is returned.
22 | }
23 | \examples{
24 | ## See the 'mut_matrix_stranded()' example for how we obtained the
25 | ## following mutation matrix.
26 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
27 |   package = "MutationalPatterns"
28 | ))
29 | 
30 | ## Load a reference genome.
31 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
32 | library(ref_genome, character.only = TRUE)
33 | 
34 | tissue <- c(
35 |   "colon", "colon", "colon",
36 |   "intestine", "intestine", "intestine",
37 |   "liver", "liver", "liver"
38 | )
39 | 
40 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue)
41 | }
42 | \seealso{
43 | \code{\link{mut_matrix_stranded}},
44 | \code{\link{plot_strand}},
45 | \code{\link{plot_strand_bias}}
46 | }
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_contribution_heatmap.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_contribution_heatmap")
 2 | 
 3 | 
 4 | # Read in nmf results
 5 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds",
 6 |   package = "MutationalPatterns"
 7 | ))
 8 | rownames(nmf_res$contribution) <- c("Signature A", "Signature B")
 9 | 
10 | # Plot with clustering.
11 | output <- plot_contribution_heatmap(nmf_res$contribution, cluster_samples = TRUE, cluster_sigs = TRUE)
12 | 
13 | # Define signature and sample order for plotting.
14 | sig_order <- c("Signature B", "Signature A")
15 | sample_order <- c(
16 |   "colon1", "colon2", "colon3", "intestine1", "intestine2",
17 |   "intestine3", "liver3", "liver2", "liver1"
18 | )
19 | output_supplied_order <- plot_contribution_heatmap(nmf_res$contribution,
20 |   cluster_samples = FALSE,
21 |   sig_order = sig_order, sample_order = sample_order
22 | )
23 | 
24 | ## Contribution heatmap with text values
25 | output_text <- plot_contribution_heatmap(nmf_res$contribution, plot_values = TRUE)
26 | 
27 | # Read in signature refitting results
28 | snv_refit <- readRDS(system.file("states/strict_snv_refit.rds",
29 |   package = "MutationalPatterns"
30 | ))
31 | output_refit <- plot_contribution_heatmap(snv_refit$contribution, cluster_samples = TRUE, cluster_sigs = TRUE)
32 | 
33 | 
34 | test_that("Output has correct class", {
35 |   expect_true(inherits(output, "gg"))
36 |   expect_true(inherits(output_supplied_order, "gg"))
37 |   expect_true(inherits(output_text, "gg"))
38 |   expect_true(inherits(output_refit, "gg"))
39 | })
40 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_strand_bias.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_strand_bias")
 2 | 
 3 | # Read stranded mut_mat
 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | tissue <- c(
 9 |   "colon", "colon", "colon",
10 |   "intestine", "intestine", "intestine",
11 |   "liver", "liver", "liver"
12 | )
13 | 
14 | ## Perform the strand bias test.
15 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue)
16 | strand_bias <- strand_bias_test(strand_counts)
17 | 
18 | ## Plot the strand bias.
19 | output <- plot_strand_bias(strand_bias)
20 | 
21 | # Repeat for replication bias.
22 | mut_mat_repli <- readRDS(system.file("states/mut_mat_repli.rds",
23 |   package = "MutationalPatterns"
24 | ))
25 | strand_counts <- strand_occurrences(mut_mat_repli, by = tissue)
26 | strand_bias <- strand_bias_test(strand_counts)
27 | output_repli <- plot_strand_bias(strand_bias)
28 | 
29 | ## Test with p instead of fdr
30 | output_pval <- plot_strand_bias(strand_bias, sig_type = "p")
31 | 
32 | 
33 | ## Use multiple (max 3) significance cutoffs.
34 | strand_bias_multistars <- strand_bias_test(strand_counts,
35 |   p_cutoffs = c(0.05, 0.01, 0.005),
36 |   fdr_cutoffs = c(0.1, 0.05, 0.01)
37 | )
38 | output_multistars <- plot_strand_bias(strand_bias_multistars)
39 | 
40 | 
41 | test_that("Output has correct class", {
42 |   expect_true(inherits(output, c("gg")))
43 |   expect_true(inherits(output_repli, c("gg")))
44 |   expect_true(inherits(output_pval, c("gg")))
45 |   expect_true(inherits(output_multistars, c("gg")))
46 | })
47 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MutationalPatterns
 2 | 
 3 | The MutationalPatterns R package provides a comprehensive set of flexible
 4 | functions for easy finding and plotting of mutational patterns in mutation
 5 | catalogues. Single nucleotide variants (SNVs), insertions and deletions
 6 | (Indels), double base substitutions (DBSs) and larger multi base substitutions
 7 | (MBSs) are supported.
 8 | 
 9 | ## Citation
10 | 
11 | Please give credit and cite the MutationalPatterns R Package when you use it for
12 | your data analysis. A paper for the newest version of the package is published in 
13 | [BMC Genomics](https://doi.org/doi:10.1186/s12864-022-08357-3).
14 | The original MutationalPatterns paper is published in Genome Medicine
15 | [Blokzijl et al. 2018](https://doi.org/10.1186/s13073-018-0539-0).
16 | 
17 | 
18 | ## Dev version
19 | 
20 | To use the current development version of the package, you should first clone it
21 | from GitHub. Next, you can load it using:
22 | ```r
23 | devtools::load_all(DIR)
24 | ```
25 | 
26 | A raw version of the new vignette can be found
27 | [here](../master/vignettes/Introduction_to_MutationalPatterns.Rmd)
28 | 
29 | ## Bioconductor version
30 | 
31 | Please use [Bioconductor](http://bioconductor.org/packages/MutationalPatterns/)
32 | to install the released version of this package.  In [Introduction to MutationalPatterns](https://bioconductor.org/packages/release/bioc/vignettes/MutationalPatterns/inst/doc/Introduction_to_MutationalPatterns.html) you can find
33 | comprehensive examples and explanations for the functions this package
34 | provides.
35 | 


--------------------------------------------------------------------------------
/tests/testthat/test-type_context.R:
--------------------------------------------------------------------------------
 1 | context("test-type_context")
 2 | 
 3 | # Read vcfs
 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | ## Load the corresponding reference genome.
 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
10 | library(ref_genome, character.only = TRUE)
11 | 
12 | # Get type_context
13 | input <- vcfs[[1]]
14 | output <- type_context(input, ref_genome)
15 | output_longer <- type_context(input, ref_genome, extension = 2)
16 | 
17 | 
18 | # Unit tests
19 | test_that("Output has correct class", {
20 |   expect_true(inherits(output, c("list")))
21 |   expect_true(inherits(output$types, c("character")))
22 |   expect_true(inherits(output$context, c("character")))
23 |   expect_true(inherits(output_longer, c("list")))
24 |   expect_true(inherits(output_longer$types, c("character")))
25 |   expect_true(inherits(output_longer$context, c("character")))
26 | })
27 | 
28 | test_that("Output size is correct", {
29 |   expect_equal(length(output$types), length(input))
30 |   expect_equal(length(output$context), length(input))
31 |   expect_equal(length(output_longer$types), length(input))
32 |   expect_equal(length(output_longer$context), length(input))
33 | })
34 | 
35 | test_that("GRanges with 0 muts as input gives list with two empty vectors", {
36 |   expect_warning({
37 |     output_empty <- type_context(input[0], ref_genome)
38 |   })
39 |   expect_true(inherits(output_empty, "list"))
40 |   expect_equal(length(output_empty$types), 0)
41 |   expect_equal(length(output_empty$context), 0)
42 | })
43 | 


--------------------------------------------------------------------------------
/tests/testthat/test-lengthen_mut_matrix.R:
--------------------------------------------------------------------------------
 1 | context("test-lengthen_mut_matrix")
 2 | 
 3 | # Read in mut_matrix
 4 | input <- readRDS(system.file("states/mut_mat_splitregions.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Read in indel
 9 | input_indel <- readRDS(system.file("states/blood_indels_counts_split_region.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | ## Lengthen the matrix
14 | 
15 | # Run function
16 | output <- lengthen_mut_matrix(input)
17 | output_indel <- lengthen_mut_matrix(input_indel)
18 | 
19 | 
20 | 
21 | test_that("Output has correct class", {
22 |   expect_true(inherits(output, "matrix"))
23 |   expect_true(inherits(output_indel, "matrix"))
24 | })
25 | 
26 | nr_regions <- input %>%
27 |   colnames() %>%
28 |   stringr::str_remove(".*\\.") %>%
29 |   unique() %>%
30 |   length()
31 | 
32 | nr_regions_indel <- input_indel %>%
33 |   colnames() %>%
34 |   stringr::str_remove(".*\\.") %>%
35 |   unique() %>%
36 |   length()
37 | 
38 | test_that("Output has correct size", {
39 |   expect_equal(dim(output), c(nrow(input) * nr_regions, ncol(input) / nr_regions))
40 |   expect_equal(dim(output_indel), c(nrow(input_indel) * nr_regions_indel, ncol(input_indel) / nr_regions_indel))
41 | })
42 | 
43 | expected <- readRDS(system.file("states/mut_mat_longregions.rds",
44 |   package = "MutationalPatterns"
45 | ))
46 | 
47 | expected_indel <- readRDS(system.file("states/blood_indels_longmatrix_split_region.rds",
48 |   package = "MutationalPatterns"
49 | ))
50 | 
51 | test_that("Output transforms correctly", {
52 |   expect_equal(output, expected)
53 |   expect_equal(output_indel, expected_indel)
54 | })
55 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_contribution.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_contribution")
 2 | 
 3 | 
 4 | # Load nmf data
 5 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds",
 6 |   package = "MutationalPatterns"
 7 | ))
 8 | 
 9 | ## Plot the relative contribution
10 | output <- plot_contribution(nmf_res$contribution)
11 | 
12 | ## Plot the absolute contribution.
13 | ## When plotting absolute NMF results, the signatures need to be included.
14 | output_absolute <- plot_contribution(nmf_res$contribution,
15 |   nmf_res$signature,
16 |   mode = "absolute"
17 | )
18 | 
19 | 
20 | ## Only plot a subset of samples
21 | output_subset <- plot_contribution(nmf_res$contribution,
22 |   nmf_res$signature,
23 |   mode = "absolute",
24 |   index = c(1, 2)
25 | )
26 | ## Flip the coordinates
27 | output_flipcoord <- plot_contribution(nmf_res$contribution,
28 |   nmf_res$signature,
29 |   mode = "absolute",
30 |   coord_flip = TRUE
31 | )
32 | 
33 | # Use signature refitting results
34 | fit_res <- readRDS(system.file("states/snv_refit.rds",
35 |   package = "MutationalPatterns"
36 | ))
37 | 
38 | output_sigfit <- plot_contribution(fit_res$contribution)
39 | 
40 | ## refitting results in absolute mode
41 | output_sigfit_absolute <- plot_contribution(fit_res$contribution,
42 |   mode = "absolute"
43 | )
44 | 
45 | test_that("Output has correct class", {
46 |   expect_true(inherits(output, "gg"))
47 |   expect_true(inherits(output_absolute, "gg"))
48 |   expect_true(inherits(output_subset, "gg"))
49 |   expect_true(inherits(output_flipcoord, "gg"))
50 |   expect_true(inherits(output_sigfit, "gg"))
51 |   expect_true(inherits(output_sigfit_absolute, "gg"))
52 | })
53 | 


--------------------------------------------------------------------------------
/man/plot_main_dbs_contexts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_main_dbs_contexts.R
 3 | \name{plot_main_dbs_contexts}
 4 | \alias{plot_main_dbs_contexts}
 5 | \title{Plot the main DBS contexts}
 6 | \usage{
 7 | plot_main_dbs_contexts(counts, same_y = FALSE)
 8 | }
 9 | \arguments{
10 | \item{counts}{A tibble containing the number of DBS per COSMIC context.}
11 | 
12 | \item{same_y}{A boolean describing whether the same y axis should be used for all samples.}
13 | }
14 | \value{
15 | A ggplot figure.
16 | }
17 | \description{
18 | Plot the main DBS contexts
19 | }
20 | \details{
21 | Plots the number of DBS per main COSMIC context per sample.
22 | The contexts are only divided by REF and not by ALT.
23 | It takes a tibble with counts as its input. This tibble can be generated by count_dbs_contexts
24 | Each sample is plotted in a separate facet.
25 | The same y axis can be used for all samples or a separate y axis can be used.
26 | }
27 | \examples{
28 | ## Get The DBS counts
29 | ## See 'count_dbs_contexts()' for more info on how to do this.
30 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds",
31 |   package = "MutationalPatterns"
32 | ))
33 | 
34 | ## Plot contexts
35 | plot_main_dbs_contexts(dbs_counts)
36 | 
37 | ## Use the same y axis for all samples.
38 | plot_main_dbs_contexts(dbs_counts, same_y = TRUE)
39 | }
40 | \seealso{
41 | \code{\link{count_dbs_contexts}}, \code{\link{plot_dbs_contexts}}
42 | 
43 | Other DBS: 
44 | \code{\link{count_dbs_contexts}()},
45 | \code{\link{get_dbs_context}()},
46 | \code{\link{plot_compare_dbs}()},
47 | \code{\link{plot_dbs_contexts}()}
48 | }
49 | \concept{DBS}
50 | 


--------------------------------------------------------------------------------
/man/region_cossim-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/S4_class.R
 3 | \docType{class}
 4 | \name{region_cossim-class}
 5 | \alias{region_cossim-class}
 6 | \title{An S4 class to store the results of a regional mutation pattern similarity
 7 | analysis}
 8 | \description{
 9 | An S4 class to store the results of a regional mutation pattern similarity
10 | analysis
11 | }
12 | \section{Slots}{
13 | 
14 | \describe{
15 | \item{\code{sim_tb}}{A tibble containing the calculated similarities of the windows.}
16 | 
17 | \item{\code{pos_tb}}{A tibble containing the mutation positions.}
18 | 
19 | \item{\code{chr_lengths}}{Vector containing the chromosome lengths.}
20 | 
21 | \item{\code{window_size}}{The number of mutations in a window.}
22 | 
23 | \item{\code{max_window_size_gen}}{The maximum size of a window before it is removed.}
24 | 
25 | \item{\code{ref_genome}}{BSgenome reference genome object}
26 | 
27 | \item{\code{muts_per_chr}}{Vector containing the number of mutations per chromosome.}
28 | 
29 | \item{\code{mean_window_size}}{The mean length of the genome covered by the windows.}
30 | 
31 | \item{\code{stepsize}}{The number of mutations that a window slides in each step.}
32 | 
33 | \item{\code{extension}}{The number of bases, that's extracted upstream and
34 | downstream of the base substitutions, to create the mutation matrices.}
35 | 
36 | \item{\code{chromosomes}}{Vector of chromosome/contig names of the reference genome
37 | to be plotted.}
38 | 
39 | \item{\code{exclude_self_mut_mat}}{Boolean describing whether the mutations in a
40 | window should be subtracted from the global mutation matrix.}
41 | }}
42 | 
43 | 


--------------------------------------------------------------------------------
/R/mut_context.R:
--------------------------------------------------------------------------------
 1 | #' Retrieve context of base substitutions
 2 | #'
 3 | #' A function to extract the bases 3' upstream and 5' downstream of the base
 4 | #' substitutions from the reference genome. The user an choose how many bases
 5 | #' are extracted.
 6 | #'
 7 | #' @param vcf A Granges object
 8 | #' @param ref_genome Reference genome
 9 | #' @param extension The number of bases, that's extracted upstream and
10 | #' downstream of the base substitutions. (Default: 1).
11 | #' @return Character vector with the context of the base substitutions
12 | #'
13 | #' @examples
14 | #' ## See the 'read_vcfs_as_granges()' example for how we obtained the
15 | #' ## following data:
16 | #' vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
17 | #'   package = "MutationalPatterns"
18 | #' ))
19 | #'
20 | #' ## Load the corresponding reference genome.
21 | #' ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
22 | #' library(ref_genome, character.only = TRUE)
23 | #'
24 | #' ## Get the standard context
25 | #' mut_context <- mut_context(vcfs[[1]], ref_genome)
26 | #'
27 | #' ## Get larger context
28 | #' mut_context_larger <- mut_context(vcfs[[1]], ref_genome, extension = 2)
29 | #' @seealso
30 | #' \code{\link{read_vcfs_as_granges}},
31 | #'
32 | #' @export
33 | 
34 | mut_context <- function(vcf, ref_genome, extension = 1) {
35 |   # Check that the seqnames of the gr and ref_genome match
36 |   .check_chroms(vcf, ref_genome)
37 | 
38 |   # Get context of mutation.
39 |   vcf_context <- as.character(Biostrings::getSeq(
40 |     BSgenome::getBSgenome(ref_genome),
41 |     seqnames(vcf),
42 |     start(vcf) - extension,
43 |     end(vcf) + extension
44 |   ))
45 |   return(vcf_context)
46 | }
47 | 


--------------------------------------------------------------------------------
/man/merge_signatures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/merge_signatures.R
 3 | \name{merge_signatures}
 4 | \alias{merge_signatures}
 5 | \title{Merge signatures based on cosine similarity}
 6 | \usage{
 7 | merge_signatures(
 8 |   signatures,
 9 |   cos_sim_cutoff = 0.8,
10 |   merge_char = ";",
11 |   verbose = TRUE
12 | )
13 | }
14 | \arguments{
15 | \item{signatures}{Signature matrix (dimensions: x mutation types
16 | X n signatures)}
17 | 
18 | \item{cos_sim_cutoff}{Cutoff for cosine similarity. Signatures are merged when their
19 | cosine similarity is higher than the limit. Default: 0.8}
20 | 
21 | \item{merge_char}{Character used to merge the signature names. This character shouldn't
22 | be in the signature names beforehand. Default: ";"}
23 | 
24 | \item{verbose}{Verbosity. If TRUE it shows which signatures got merged. Default: TRUE}
25 | }
26 | \value{
27 | Signature matrix (dimensions: x mutation types
28 | X n signatures)
29 | }
30 | \description{
31 | This function merges signatures based on their cosine similarity.
32 | It iteratively merges the two signatures with the highest cosine similarity.
33 | Merging is stopped when the maximum cosine similarity is lower than the limit.
34 | }
35 | \examples{
36 | 
37 | ## Get signatures
38 | signatures <- get_known_signatures()
39 | 
40 | ## Merge signatures
41 | merge_signatures(signatures)
42 | 
43 | 
44 | ## Merge signatures using a stricter cutoff
45 | merge_signatures(signatures, cos_sim_cutoff = 0.9)
46 | 
47 | ## Merge signatures using a different merging character
48 | merge_signatures(signatures, merge_char = "_")
49 | 
50 | ## Merge signatures silently
51 | merge_signatures(signatures, verbose = FALSE)
52 | }
53 | 


--------------------------------------------------------------------------------
/man/plot_96_profile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_96_profile.R
 3 | \name{plot_96_profile}
 4 | \alias{plot_96_profile}
 5 | \title{Plot 96 trinucleotide profile}
 6 | \usage{
 7 | plot_96_profile(mut_matrix, colors = NA, ymax = 0.2, condensed = FALSE)
 8 | }
 9 | \arguments{
10 | \item{mut_matrix}{96 trinucleotide profile matrix}
11 | 
12 | \item{colors}{Optional 6 value color vector.}
13 | 
14 | \item{ymax}{Y axis maximum value, default = 0.2}
15 | 
16 | \item{condensed}{More condensed plotting format. Default = F.}
17 | }
18 | \value{
19 | 96 trinucleotide profile plot
20 | }
21 | \description{
22 | Plot relative contribution of 96 trinucleotides
23 | }
24 | \examples{
25 | ## See the 'mut_matrix()' example for how we obtained the
26 | ## mutation matrix information:
27 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
28 |   package = "MutationalPatterns"
29 | ))
30 | 
31 | ## Plot the 96-profile of three samples
32 | plot_96_profile(mut_mat[, c(1, 4, 7)])
33 | 
34 | ## Plot a condensed profile
35 | plot_96_profile(mut_mat[, c(1, 4, 7)], condensed = TRUE)
36 | 
37 | ## It's also possible to plot signatures, for example signatures
38 | ## generated with NMF
39 | ## See 'extract_signatures()' on how we obtained these signatures.
40 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds",
41 |   package = "MutationalPatterns"
42 | ))
43 | 
44 | ## Optionally, provide signature names
45 | colnames(nmf_res$signatures) <- c("Signature A", "Signature B")
46 | 
47 | ## Generate the plot
48 | plot_96_profile(nmf_res$signatures)
49 | }
50 | \seealso{
51 | \code{\link{mut_matrix}},
52 | \code{\link{plot_profile_heatmap}},
53 | \code{\link{plot_river}}
54 | }
55 | 


--------------------------------------------------------------------------------
/man/plot_strand.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_strand.R
 3 | \name{plot_strand}
 4 | \alias{plot_strand}
 5 | \title{Plot strand per base substitution type}
 6 | \usage{
 7 | plot_strand(strand_bias_df, mode = c("relative", "absolute"), colors = NA)
 8 | }
 9 | \arguments{
10 | \item{strand_bias_df}{data.frame, result from strand_bias function}
11 | 
12 | \item{mode}{Either "absolute" for absolute number of mutations, or
13 | "relative" for relative contribution, default = "relative"}
14 | 
15 | \item{colors}{Optional color vector for plotting with 6 values}
16 | }
17 | \value{
18 | Barplot
19 | }
20 | \description{
21 | For each base substitution type and transcriptional strand the total number
22 | of mutations and the relative contribution within a group is returned.
23 | }
24 | \examples{
25 | ## See the 'mut_matrix_stranded()' example for how we obtained the
26 | ## following mutation matrix.
27 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
28 |   package = "MutationalPatterns"
29 | ))
30 | 
31 | ## Load a reference genome.
32 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
33 | library(ref_genome, character.only = TRUE)
34 | 
35 | tissue <- c(
36 |   "colon", "colon", "colon",
37 |   "intestine", "intestine", "intestine",
38 |   "liver", "liver", "liver"
39 | )
40 | 
41 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue)
42 | 
43 | ## Plot the strand in relative mode.
44 | strand_plot <- plot_strand(strand_counts)
45 | 
46 | ## Or absolute mode.
47 | strand_plot <- plot_strand(strand_counts, mode = "absolute")
48 | }
49 | \seealso{
50 | \code{\link{mut_matrix_stranded}},
51 | \code{\link{strand_occurrences}},
52 | \code{\link{plot_strand_bias}}
53 | }
54 | 


--------------------------------------------------------------------------------
/man/plot_main_indel_contexts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_main_indel_contexts.R
 3 | \name{plot_main_indel_contexts}
 4 | \alias{plot_main_indel_contexts}
 5 | \title{Plot the main indel contexts}
 6 | \usage{
 7 | plot_main_indel_contexts(counts, same_y = FALSE)
 8 | }
 9 | \arguments{
10 | \item{counts}{A tibble containing the number of indels per COSMIC context.}
11 | 
12 | \item{same_y}{A boolean describing whether the same y axis should be used for all samples.}
13 | }
14 | \value{
15 | A ggplot figure.
16 | }
17 | \description{
18 | Plot the main indel contexts
19 | }
20 | \details{
21 | Plots the number of indels per main COSMIC context per sample.
22 | The contexts are not subdivided into the number of repeats/microhomology length.
23 | It takes a tibble with counts as its input. This tibble can be generated by count_indel_contexts
24 | Each sample is plotted in a separate facet.
25 | The same y axis can be used for all samples or a separate y axis can be used.
26 | }
27 | \examples{
28 | ## Get The indel counts
29 | ## See 'count_indel_contexts()' for more info on how to do this.
30 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds",
31 |   package = "MutationalPatterns"
32 | ))
33 | 
34 | ## Plot contexts
35 | plot_main_indel_contexts(indel_counts)
36 | 
37 | ## Use the same y axis for all samples.
38 | plot_main_indel_contexts(indel_counts, same_y = TRUE)
39 | }
40 | \seealso{
41 | \code{\link{count_indel_contexts}}, \code{\link{plot_indel_contexts}}
42 | 
43 | Other Indels: 
44 | \code{\link{count_indel_contexts}()},
45 | \code{\link{get_indel_context}()},
46 | \code{\link{plot_compare_indels}()},
47 | \code{\link{plot_indel_contexts}()}
48 | }
49 | \concept{Indels}
50 | 


--------------------------------------------------------------------------------
/man/plot_dbs_contexts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_dbs_contexts.R
 3 | \name{plot_dbs_contexts}
 4 | \alias{plot_dbs_contexts}
 5 | \title{Plot the DBS contexts}
 6 | \usage{
 7 | plot_dbs_contexts(counts, same_y = FALSE, condensed = FALSE)
 8 | }
 9 | \arguments{
10 | \item{counts}{A tibble containing the number of DBS per COSMIC context.}
11 | 
12 | \item{same_y}{A boolean describing whether the same y axis should be used for all samples.}
13 | 
14 | \item{condensed}{More condensed plotting format. Default = F.}
15 | }
16 | \value{
17 | A ggplot figure.
18 | }
19 | \description{
20 | Plot the DBS contexts
21 | }
22 | \details{
23 | Plots the number of DBS COSMIC context per sample.
24 | It takes a tibble with counts as its input. This tibble can be generated by count_dbs_contexts
25 | Each sample is plotted in a separate facet.
26 | The same y axis can be used for all samples or a separate y axis can be used.
27 | }
28 | \examples{
29 | ## Get The DBS counts
30 | ## See 'count_dbs_contexts()' for more info on how to do this.
31 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds",
32 |   package = "MutationalPatterns"
33 | ))
34 | 
35 | ## Plot contexts
36 | plot_dbs_contexts(dbs_counts)
37 | 
38 | ## Use the same y axis for all samples.
39 | plot_dbs_contexts(dbs_counts, same_y = TRUE)
40 | 
41 | ## Create a more condensed plot
42 | plot_dbs_contexts(dbs_counts, condensed = TRUE)
43 | }
44 | \seealso{
45 | \code{\link{count_dbs_contexts}}, \code{\link{plot_main_dbs_contexts}}
46 | 
47 | Other DBS: 
48 | \code{\link{count_dbs_contexts}()},
49 | \code{\link{get_dbs_context}()},
50 | \code{\link{plot_compare_dbs}()},
51 | \code{\link{plot_main_dbs_contexts}()}
52 | }
53 | \concept{DBS}
54 | 


--------------------------------------------------------------------------------
/man/get_mut_type.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_mut_type.R
 3 | \name{get_mut_type}
 4 | \alias{get_mut_type}
 5 | \title{Get variants with mut_type from GRanges}
 6 | \usage{
 7 | get_mut_type(
 8 |   vcf_list,
 9 |   type = c("snv", "indel", "dbs", "mbs"),
10 |   predefined_dbs_mbs = FALSE
11 | )
12 | }
13 | \arguments{
14 | \item{vcf_list}{GRanges/GRangesList}
15 | 
16 | \item{type}{The type of variant that will be returned.}
17 | 
18 | \item{predefined_dbs_mbs}{Boolean. Whether dbs and mbs variants have been
19 | predefined in your vcf. This function by default assumes that dbs and mbs
20 | variants are present in the vcf as snvs, which are positioned next to each
21 | other. If your dbs/mbs variants are called separately you should set this
22 | argument to TRUE. (default = FALSE)}
23 | }
24 | \value{
25 | GRanges/GRangesList of the desired mutation type.
26 | }
27 | \description{
28 | Get the variants of a certain mutation type from a GRanges or GRangesList object.
29 | All other variants will be filtered out.
30 | It is assumed that DBS/MBSs are called as separate SNVs.
31 | They are merged into single variants.
32 | The type of variant can be chosen with type.
33 | }
34 | \examples{
35 | ## Get a GRanges list object.
36 | ## See 'read_vcfs_as_granges' for more info how to do this.
37 | grl <- readRDS(system.file("states/blood_grl.rds",
38 |   package = "MutationalPatterns"
39 | ))
40 | 
41 | ## Here we only use two samples to reduce runtime
42 | grl <- grl[1:2]
43 | 
44 | ## Get a specific mutation type.
45 | snv_grl <- get_mut_type(grl, "snv")
46 | indel_grl <- get_mut_type(grl, "indel")
47 | dbs_grl <- get_mut_type(grl, "dbs")
48 | mbs_grl <- get_mut_type(grl, "mbs")
49 | }
50 | \seealso{
51 | \code{\link{read_vcfs_as_granges}}
52 | }
53 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mut_matrix.R:
--------------------------------------------------------------------------------
 1 | context("test-mut_matrix")
 2 | 
 3 | # To test mut_matrix, we need to load the reference genome first.
 4 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
 5 | library(ref_genome, character.only = TRUE)
 6 | 
 7 | # We re-use the data that is shipped with the package.
 8 | input <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
 9 |   package = "MutationalPatterns"
10 | ))
11 | 
12 | # Expected output
13 | expected <- readRDS(system.file("states/mut_mat_data.rds",
14 |   package = "MutationalPatterns"
15 | ))
16 | 
17 | # Run function
18 | output <- mut_matrix(input, ref_genome)
19 | output_longer <- mut_matrix(vcf_list = input, ref_genome = ref_genome, extension = 2)
20 | 
21 | 
22 | # Perform tests
23 | 
24 | test_that("Output has correct class", {
25 |   expect_true(inherits(output, "matrix"))
26 |   expect_true(inherits(output_longer, "matrix"))
27 | })
28 | 
29 | test_that("Output has correct dimensions", {
30 |   expect_equal(dim(output), c(96, 9))
31 |   expect_equal(dim(output_longer), c(1536, 9))
32 | })
33 | 
34 | test_that("Number of variants in output is correct", {
35 |   expect_equal(colSums(output), elementNROWS(input))
36 |   expect_equal(colSums(output_longer), elementNROWS(input))
37 | })
38 | 
39 | test_that("transforms correctly", {
40 |   expect_equal(output, expected)
41 | })
42 | 
43 | test_that("a list is also acceptable input", {
44 |   output_list <- mut_matrix(as.list(input), ref_genome)
45 | 
46 |   expect_equal(output_list, output)
47 |   expect_equal(output_list, expected)
48 | })
49 | 
50 | test_that("A single GR can also be used as input", {
51 |   output_singlesample <- mut_matrix(input[[1]], ref_genome)
52 |   expect_true(inherits(output_singlesample, "matrix"))
53 |   expect_equal(dim(output_singlesample), c(96, 1))
54 | })
55 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fit_to_signatures.R:
--------------------------------------------------------------------------------
 1 | context("test-fit_to_signatures")
 2 | 
 3 | # Get mut_mat
 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Get signatures
 9 | signatures <- get_known_signatures()
10 | 
11 | # Run function
12 | output <- fit_to_signatures(mut_mat, signatures)
13 | 
14 | # Get expected
15 | expected <- readRDS(system.file("states/snv_refit.rds",
16 |   package = "MutationalPatterns"
17 | ))
18 | 
19 | # Run tests
20 | test_that("Output has correct class", {
21 |   expect_true(inherits(output, "list"))
22 |   expect_true(inherits(output$contribution, "matrix"))
23 |   expect_true(inherits(output$reconstructed, "matrix"))
24 | })
25 | 
26 | test_that("Output is equal to expected", {
27 |   expect_equal(output, expected)
28 | })
29 | 
30 | # Get indel mut_mat
31 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds", package = "MutationalPatterns"))
32 | 
33 | # Get indel signatures
34 | signatures <- get_known_signatures("indel")
35 | 
36 | # Get expected
37 | expected <- readRDS(system.file("states/indel_refit.rds",
38 |   package = "MutationalPatterns"
39 | ))
40 | 
41 | # Run tests
42 | test_that("Refitting indels gives expected output.", {
43 |   output <- fit_to_signatures(indel_counts, signatures)
44 |   expect_equal(output, expected)
45 | })
46 | 
47 | # Get dbs mut_mat
48 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds", package = "MutationalPatterns"))
49 | 
50 | signatures <- get_known_signatures("dbs")
51 | 
52 | 
53 | expected <- readRDS(system.file("states/dbs_refit.rds",
54 |   package = "MutationalPatterns"
55 | ))
56 | 
57 | test_that("Refitting dbss gives expected output.", {
58 |   output <- fit_to_signatures(dbs_counts, signatures)
59 |   expect_equal(output, expected)
60 | })
61 | 


--------------------------------------------------------------------------------
/man/fit_to_signatures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fit_to_signatures.R
 3 | \name{fit_to_signatures}
 4 | \alias{fit_to_signatures}
 5 | \title{Find optimal nonnegative linear combination of mutation signatures to
 6 | reconstruct the mutation matrix.}
 7 | \usage{
 8 | fit_to_signatures(mut_matrix, signatures)
 9 | }
10 | \arguments{
11 | \item{mut_matrix}{mutation count matrix (dimensions: x mutation types
12 | X n samples)}
13 | 
14 | \item{signatures}{Signature matrix (dimensions: x mutation types
15 | X n signatures)}
16 | }
17 | \value{
18 | Named list with signature contributions and reconstructed
19 | mutation matrix
20 | }
21 | \description{
22 | Find the linear combination of mutation signatures that most closely
23 | reconstructs the mutation matrix by solving the nonnegative least-squares
24 | constraints problem.
25 | }
26 | \examples{
27 | 
28 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
29 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
30 |   package = "MutationalPatterns"
31 | ))
32 | 
33 | ## Get signatures
34 | signatures <- get_known_signatures()
35 | 
36 | ## Perform the fitting
37 | fit_res <- fit_to_signatures(mut_mat, signatures)
38 | 
39 | ## This will also work for indels and dbs.
40 | ## An example is given for indels
41 | 
42 | ## Get The indel counts
43 | ## See 'count_indel_contexts()' for more info on how to do this.
44 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds",
45 |   package = "MutationalPatterns"
46 | ))
47 | 
48 | ## Get signatures
49 | signatures <- get_known_signatures("indel")
50 | 
51 | fit_to_signatures(indel_counts, signatures)
52 | }
53 | \seealso{
54 | \code{\link{mut_matrix}},\code{\link{fit_to_signatures_strict}},\code{\link{fit_to_signatures_bootstrapped}}
55 | }
56 | 


--------------------------------------------------------------------------------
/man/plot_192_profile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_192_profile.R
 3 | \name{plot_192_profile}
 4 | \alias{plot_192_profile}
 5 | \title{Plot 192 trinucleotide profile}
 6 | \usage{
 7 | plot_192_profile(mut_matrix, colors = NA, ymax = 0.2, condensed = FALSE)
 8 | }
 9 | \arguments{
10 | \item{mut_matrix}{192 trinucleotide profile matrix}
11 | 
12 | \item{colors}{6 value color vector}
13 | 
14 | \item{ymax}{Y axis maximum value, default = 0.2}
15 | 
16 | \item{condensed}{More condensed plotting format. Default = F.}
17 | }
18 | \value{
19 | 192 trinucleotide profile plot
20 | }
21 | \description{
22 | Plot relative contribution of 192 trinucleotides
23 | }
24 | \examples{
25 | ## See the 'mut_matrix_stranded()' example for how we obtained the
26 | ## mutation matrix with transcriptional strand information:
27 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
28 |   package = "MutationalPatterns"
29 | ))
30 | 
31 | ## Plot profile for some of the samples
32 | plot_192_profile(mut_mat_s[, c(1, 4, 7)])
33 | 
34 | ## You can create a more condensed version of the plot
35 | plot_192_profile(mut_mat_s[, c(1, 4, 7)], condensed = TRUE)
36 | 
37 | ## It's also possible to plot signatures, for example signatures
38 | ## generated with NMF
39 | ## See 'extract_signatures()' on how we obtained these signatures.
40 | nmf_res_strand <- readRDS(system.file("states/nmf_res_strand_data.rds",
41 |   package = "MutationalPatterns"
42 | ))
43 | 
44 | ## Optionally, provide signature names
45 | colnames(nmf_res_strand$signatures) <- c("Signature A", "Signature B")
46 | 
47 | ## Generate the plot
48 | plot_192_profile(nmf_res_strand$signatures)
49 | }
50 | \seealso{
51 | \code{\link{mut_matrix_stranded}},
52 | \code{\link{extract_signatures}},
53 | \code{\link{plot_96_profile}}
54 | }
55 | 


--------------------------------------------------------------------------------
/man/plot_strand_bias.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_strand_bias.R
 3 | \name{plot_strand_bias}
 4 | \alias{plot_strand_bias}
 5 | \title{Plot strand bias per base substitution type per group}
 6 | \usage{
 7 | plot_strand_bias(strand_bias, colors = NA, sig_type = c("fdr", "p"))
 8 | }
 9 | \arguments{
10 | \item{strand_bias}{data.frame, result from strand_bias function}
11 | 
12 | \item{colors}{Optional color vector with 6 values for plotting}
13 | 
14 | \item{sig_type}{The type of significance to be used. Possible values:
15 | * 'fdr' False discovery rate.
16 | A type of multiple testing correction.;
17 | * 'p' for regular p values.}
18 | }
19 | \value{
20 | Barplot
21 | }
22 | \description{
23 | Plot strand bias per base substitution type per group
24 | }
25 | \examples{
26 | ## See the 'mut_matrix_stranded()' example for how we obtained the
27 | ## following mutation matrix.
28 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
29 |   package = "MutationalPatterns"
30 | ))
31 | 
32 | 
33 | tissue <- c(
34 |   "colon", "colon", "colon",
35 |   "intestine", "intestine", "intestine",
36 |   "liver", "liver", "liver"
37 | )
38 | 
39 | ## Perform the strand bias test.
40 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue)
41 | strand_bias <- strand_bias_test(strand_counts)
42 | 
43 | ## Plot the strand bias.
44 | plot_strand_bias(strand_bias)
45 | 
46 | ## Use multiple (max 3) significance cutoffs.
47 | ## This will vary the number of significance stars.
48 | strand_bias_multistars <- strand_bias_test(strand_counts,
49 |   p_cutoffs = c(0.05, 0.01, 0.005),
50 |   fdr_cutoffs = c(0.1, 0.05, 0.01)
51 | )
52 | plot_strand_bias(strand_bias_multistars)
53 | }
54 | \seealso{
55 | \code{\link{mut_matrix_stranded}},
56 | \code{\link{strand_occurrences}},
57 | \code{\link{strand_bias_test}}
58 | \code{\link{plot_strand}}
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mutations_from_vcf.R:
--------------------------------------------------------------------------------
 1 | context("test-mutations_from_vcf")
 2 | 
 3 | # Read vcfs
 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | vcf <- vcfs[[1]]
 8 | 
 9 | # Run function
10 | output <- mutations_from_vcf(vcf)
11 | 
12 | # Check it works on empty input
13 | output_empty <- mutations_from_vcf(vcf[0])
14 | 
15 | # Check it works on lowercase input
16 | vcf_lowercase <- vcf
17 | colnames(mcols(vcf_lowercase)) <- c("paramRangeID", "ref", "alt", "QUAL", "FILTER")
18 | output_lowercase <- mutations_from_vcf(vcf_lowercase)
19 | 
20 | # Check it gives a warning on data with no ref or alt
21 | vcf_noref <- vcf
22 | colnames(mcols(vcf_noref)) <- c("paramRangeID", "a", "ALT", "QUAL", "FILTER")
23 | vcf_noalt <- vcf
24 | colnames(mcols(vcf_noalt)) <- c("paramRangeID", "REF", "a", "QUAL", "FILTER")
25 | 
26 | # Unit tests
27 | test_that("Output has correct class", {
28 |   expect_true(inherits(output, c("character")))
29 |   expect_true(inherits(output_empty, c("character")))
30 | })
31 | 
32 | test_that("The 12 substitution types are returned", {
33 |   types <- sort(unique(output))
34 |   expect_equal(types, c(
35 |     "A>C", "A>G", "A>T", "C>A", "C>G", "C>T",
36 |     "G>A", "G>C", "G>T", "T>A", "T>C", "T>G"
37 |   ))
38 | })
39 | 
40 | test_that("GRanges with 0 muts as input gives empty output", {
41 |   expect_equal(length(output_empty), 0)
42 | })
43 | 
44 | test_that("Input with lowercase doesn't change result", {
45 |   expect_equal(output, output_lowercase)
46 | })
47 | 
48 | test_that("GR with no REF or ALT gives an error.", {
49 |   expect_error(
50 |     {
51 |       output_noref <- mutations_from_vcf(vcf_noref)
52 |     },
53 |     "missing a REF column"
54 |   )
55 |   expect_error(
56 |     {
57 |       output_noalt <- mutations_from_vcf(vcf_noalt)
58 |     },
59 |     "missing a ALT column"
60 |   )
61 | })
62 | 


--------------------------------------------------------------------------------
/man/get_indel_context.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_indel_context.R
 3 | \name{get_indel_context}
 4 | \alias{get_indel_context}
 5 | \title{Get indel contexts}
 6 | \usage{
 7 | get_indel_context(vcf_list, ref_genome)
 8 | }
 9 | \arguments{
10 | \item{vcf_list}{GRanges or GRangesList object containing Indel mutations.
11 | The mutations should be called similarly to HaplotypeCaller.}
12 | 
13 | \item{ref_genome}{BSgenome reference genome object}
14 | }
15 | \value{
16 | A modified version of the input grl. In each gr two columns have been added.
17 | "muttype" showing the main indel type and "muttype_sub" which shows the subtype.
18 | The subtype is either the number of repeats or the microhomology length.
19 | }
20 | \description{
21 | Get indel contexts
22 | }
23 | \details{
24 | Determines the COSMIC context from a GRanges or GRangesList object containing Indel mutations.
25 | It applies the get_indel_context_gr function to each gr in the input.
26 | It searches for repeat units both to the left and right of the indel.
27 | }
28 | \examples{
29 | 
30 | ## Get a GRangesList or GRanges object with only indels.
31 | ## See 'read_vcfs_as_granges' or 'get_mut_type' for more info on how to do this.
32 | indel_grl <- readRDS(system.file("states/blood_grl_indel.rds",
33 |   package = "MutationalPatterns"
34 | ))
35 | 
36 | ## Load the corresponding reference genome.
37 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
38 | library(ref_genome, character.only = TRUE)
39 | 
40 | ## Get the indel contexts
41 | get_indel_context(indel_grl, ref_genome)
42 | }
43 | \seealso{
44 | \code{\link{read_vcfs_as_granges}}, \code{\link{get_mut_type}}
45 | 
46 | Other Indels: 
47 | \code{\link{count_indel_contexts}()},
48 | \code{\link{plot_compare_indels}()},
49 | \code{\link{plot_indel_contexts}()},
50 | \code{\link{plot_main_indel_contexts}()}
51 | }
52 | \concept{Indels}
53 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get_mut_type.R:
--------------------------------------------------------------------------------
 1 | context("test-get_mut_type")
 2 | 
 3 | # Get a grl with variants.
 4 | grl <- readRDS(system.file("states/blood_grl.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Only use two samples to reduce runtime
 9 | grl <- grl[1:2]
10 | 
11 | ## Get a specific mutation type.
12 | snv_grl <- get_mut_type(grl, "snv")
13 | indel_grl <- get_mut_type(grl, "indel")
14 | dbs_grl <- get_mut_type(grl, "dbs")
15 | mbs_grl <- get_mut_type(grl, "mbs")
16 | gr_singlesample <- get_mut_type(grl[[1]], type = "dbs")
17 | empty_gr <- get_mut_type(grl[[1]][0], type = "dbs")
18 | gr_nodbs <- get_mut_type(grl[[1]][1:20], type = "dbs")
19 | 
20 | # Change names of indel_grl, to make them prettier.
21 | remove_names_gr <- function(gr) {
22 |   names(gr) <- seq_along(gr)
23 |   return(gr)
24 | }
25 | indel_grl <- purrr::map(as.list(indel_grl), remove_names_gr) %>%
26 |   GRangesList()
27 | 
28 | expected_indel_grl <- readRDS(system.file("states/blood_grl_indel.rds",
29 |   package = "MutationalPatterns"
30 | ))[1:2]
31 | 
32 | 
33 | test_that("Output has correct class", {
34 |   expect_true(inherits(snv_grl, c("GRanges", "CompressedGRangesList")))
35 |   expect_true(inherits(indel_grl, c("GRanges", "CompressedGRangesList")))
36 |   expect_true(inherits(dbs_grl, c("GRanges", "CompressedGRangesList")))
37 |   expect_true(inherits(mbs_grl, c("GRanges", "CompressedGRangesList")))
38 |   expect_true(inherits(gr_singlesample, c("GRanges")))
39 |   expect_true(inherits(empty_gr, c("GRanges")))
40 |   expect_true(inherits(gr_nodbs, c("GRanges")))
41 | })
42 | 
43 | test_that("Output is equal to expected", {
44 |   expect_equal(indel_grl, expected_indel_grl)
45 | })
46 | 
47 | test_that("Empty gr is returned when a mut type is not present", {
48 |   expect_equal(length(empty_gr), 0)
49 | })
50 | 
51 | test_that("Empty gr as input results in a empty output gr", {
52 |   expect_equal(length(gr_nodbs), 0)
53 | })
54 | 


--------------------------------------------------------------------------------
/man/plot_bootstrapped_contribution.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_bootstrapped_contribution.R
 3 | \name{plot_bootstrapped_contribution}
 4 | \alias{plot_bootstrapped_contribution}
 5 | \title{Plot the bootstrapped signature contributions}
 6 | \usage{
 7 | plot_bootstrapped_contribution(
 8 |   contri_boots,
 9 |   mode = c("absolute", "relative"),
10 |   plot_type = c("jitter", "barplot", "dotplot")
11 | )
12 | }
13 | \arguments{
14 | \item{contri_boots}{matrix showing  signature contributions across bootstrap iterations.}
15 | 
16 | \item{mode}{Either "absolute" for absolute number of mutations, or
17 | "relative" for relative contribution, default = "absolute"}
18 | 
19 | \item{plot_type}{Either "jitter" for a jitter plot, "barplot" for a barplot, or "dotplot" for a dotplot}
20 | }
21 | \value{
22 | A ggplot2 graph
23 | }
24 | \description{
25 | Plot the signature contributions retrieved with 'fit_to_signatures_bootstrapped'.
26 | The function can plot both the absolute or the relative signature contribution.
27 | The graph can be plotted as either a jitter plot or as a barplot.
28 | }
29 | \examples{
30 | ## Get the bootstrapped signature contributions
31 | ## See 'count_indel_contexts()' for more info on how to do this.
32 | contri_boots <- readRDS(system.file("states/bootstrapped_snv_refit.rds",
33 |   package = "MutationalPatterns"
34 | ))
35 | 
36 | ## Plot bootstrapped contribution
37 | plot_bootstrapped_contribution(contri_boots)
38 | 
39 | ## Plot bootstrapped contribution with relative contributions
40 | plot_bootstrapped_contribution(contri_boots, mode = "relative")
41 | 
42 | ## Plot bootstrapped contribution with a barplot
43 | plot_bootstrapped_contribution(contri_boots, plot_type = "barplot")
44 | 
45 | ## Plot bootstrapped contribution with a dotplot
46 | plot_bootstrapped_contribution(contri_boots, plot_type = "dotplot", mode = "absolute")
47 | }
48 | 


--------------------------------------------------------------------------------
/man/plot_profile_region.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_profile_region.R
 3 | \name{plot_profile_region}
 4 | \alias{plot_profile_region}
 5 | \title{Plot 96 trinucleotide profile per subgroup}
 6 | \usage{
 7 | plot_profile_region(
 8 |   mut_matrix,
 9 |   mode = c("relative_sample", "relative_sample_feature", "absolute"),
10 |   colors = NULL,
11 |   ymax = 0.2,
12 |   condensed = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{mut_matrix}{Mutation matrix}
17 | 
18 | \item{mode}{'relative_sample', 'relative_sample_feature' or 'absolute'
19 | When 'relative_sample', the number of variants will be shown
20 | divided by the total number of variants in that sample.
21 | When 'relative_sample_feature', the number of variants will be shown
22 | divided by the total number of variants in that sample. and genomic region.}
23 | 
24 | \item{colors}{6 value color vector}
25 | 
26 | \item{ymax}{Y axis maximum value, default = 0.2}
27 | 
28 | \item{condensed}{More condensed plotting format. Default = FALSE.}
29 | }
30 | \value{
31 | 96 trinucleotide profile plot per region
32 | }
33 | \description{
34 | Plot relative contribution of 96 trinucleotides per subgroup.
35 | This can be genomic regions but could also be other subsets.
36 | The function uses a matrix generated by 'lengthen_mut_matrix()'
37 | as its input.
38 | }
39 | \examples{
40 | ## See the 'lengthen_mut_matrix()' example for how we obtained the
41 | ## mutation matrix information:
42 | mut_mat_long <- readRDS(system.file("states/mut_mat_longregions.rds",
43 |   package = "MutationalPatterns"
44 | ))
45 | 
46 | ## Plot the 96-profile of three samples
47 | plot_profile_region(mut_mat_long[, c(1, 4, 7)])
48 | }
49 | \seealso{
50 | \code{\link{mut_matrix}}
51 | 
52 | Other genomic_regions: 
53 | \code{\link{bin_mutation_density}()},
54 | \code{\link{lengthen_mut_matrix}()},
55 | \code{\link{plot_spectrum_region}()},
56 | \code{\link{split_muts_region}()}
57 | }
58 | \concept{genomic_regions}
59 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_cosine_heatmap.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_cosine_heatmap")
 2 | 
 3 | # Get mut_mat
 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Get signatures
 9 | signatures <- get_known_signatures()
10 | 
11 | 
12 | # Calculate the cosine similarity between each signature and each 96 mutational profile
13 | cos_matrix <- cos_sim_matrix(mut_mat, signatures)
14 | 
15 | # Plot the cosine similarity between each signature and each sample with hierarchical
16 | # clustering of samples and signatures.
17 | output <- plot_cosine_heatmap(cos_matrix, cluster_rows = TRUE, cluster_cols = TRUE, method = "complete")
18 | 
19 | # In the above example, clustering is performed on the similarities of the samples with
20 | # the signatures. It's also possible to cluster the signatures and samples on their (96) profile.
21 | hclust_cosmic <- cluster_signatures(signatures, method = "average")
22 | cosmic_order <- colnames(signatures)[hclust_cosmic$order]
23 | hclust_samples <- cluster_signatures(mut_mat, method = "average")
24 | sample_order <- colnames(mut_mat)[hclust_samples$order]
25 | # Plot the cosine heatmap using this given signature order.
26 | output_supplied_order <- plot_cosine_heatmap(cos_matrix,
27 |   cluster_rows = FALSE, cluster_cols = FALSE, row_order = sample_order,
28 |   col_order = cosmic_order, method = "complete"
29 | )
30 | 
31 | # You can also plot the similarity of samples with eachother
32 | cos_matrix <- cos_sim_matrix(mut_mat, mut_mat)
33 | output_inner <- plot_cosine_heatmap(cos_matrix, cluster_rows = TRUE, cluster_cols = TRUE, method = "complete")
34 | 
35 | # You can also include test
36 | output_text <- plot_cosine_heatmap(cos_matrix, cluster_rows = TRUE, cluster_cols = TRUE, plot_values = TRUE)
37 | 
38 | test_that("Output has correct class", {
39 |   expect_true(inherits(output, "gg"))
40 |   expect_true(inherits(output_supplied_order, "gg"))
41 |   expect_true(inherits(output_inner, "gg"))
42 |   expect_true(inherits(output_text, "gg"))
43 | })
44 | 


--------------------------------------------------------------------------------
/man/strand_bias_test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/strand_bias_test.R
 3 | \name{strand_bias_test}
 4 | \alias{strand_bias_test}
 5 | \title{Significance test for strand asymmetry}
 6 | \usage{
 7 | strand_bias_test(strand_occurrences, p_cutoffs = 0.05, fdr_cutoffs = 0.1)
 8 | }
 9 | \arguments{
10 | \item{strand_occurrences}{Dataframe with mutation count per strand, result
11 | from 'strand_occurrences()'}
12 | 
13 | \item{p_cutoffs}{Significance cutoff for the p value. Default: 0.05}
14 | 
15 | \item{fdr_cutoffs}{Significance cutoff for the fdr. Default: 0.1}
16 | }
17 | \value{
18 | Dataframe with poisson test P value for the ratio between the
19 | two strands per group per base substitution type.
20 | }
21 | \description{
22 | This function performs a two sided Poisson test for the ratio between mutations on
23 | each strand. Multiple testing correction is also performed.
24 | }
25 | \examples{
26 | ## See the 'mut_matrix_stranded()' example for how we obtained the
27 | ## following mutation matrix.
28 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
29 |   package = "MutationalPatterns"
30 | ))
31 | 
32 | tissue <- c(
33 |   "colon", "colon", "colon",
34 |   "intestine", "intestine", "intestine",
35 |   "liver", "liver", "liver"
36 | )
37 | 
38 | ## Perform the strand bias test.
39 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue)
40 | strand_bias <- strand_bias_test(strand_counts)
41 | 
42 | ## Use different significance cutoffs for the pvalue and fdr
43 | strand_bias_strict <- strand_bias_test(strand_counts,
44 |   p_cutoffs = 0.01, fdr_cutoffs = 0.05
45 | )
46 | 
47 | ## Use multiple (max 3) significance cutoffs.
48 | ## This will vary the number of significance stars.
49 | strand_bias_multistars <- strand_bias_test(strand_counts,
50 |   p_cutoffs = c(0.05, 0.01, 0.005),
51 |   fdr_cutoffs = c(0.1, 0.05, 0.01)
52 | )
53 | }
54 | \seealso{
55 | \code{\link{mut_matrix_stranded}},
56 | \code{\link{strand_occurrences}},
57 | \code{\link{plot_strand_bias}}
58 | }
59 | 


--------------------------------------------------------------------------------
/R/intersect_with_region.R:
--------------------------------------------------------------------------------
 1 | #' Find overlap between mutations and a genomic region
 2 | #'
 3 | #' Find the number of mutations that reside in genomic region and take
 4 | #' surveyed area of genome into account.
 5 | #'
 6 | #' @param vcf CollapsedVCF object with mutations
 7 | #' @param surveyed GRanges object with regions of the genome that were surveyed
 8 | #' @param region GRanges object with genomic region(s)
 9 | #' @noRd
10 | #' @return A data.frame containing the overlapping mutations for a
11 | #' genomic region.
12 | 
13 | .intersect_with_region <- function(vcf, surveyed, region) {
14 |   # Number of mutations in vcf file
15 |   n_muts <- length(vcf)
16 | 
17 |   # Number of base pairs that were surveyed
18 |   surveyed_length <- sum(as.numeric(BiocGenerics::width(surveyed)))
19 | 
20 |   # Check if chromosome names are the same in the objects
21 |   if (GenomeInfoDb::seqlevelsStyle(vcf) != GenomeInfoDb::seqlevelsStyle(surveyed)) {
22 |     stop(paste(
23 |       "The chromosome names (seqlevels) of the VCF and the",
24 |       "surveyed GRanges object do not match."
25 |     ))
26 |   }
27 | 
28 |   if (GenomeInfoDb::seqlevelsStyle(region) != GenomeInfoDb::seqlevelsStyle(surveyed)) {
29 |     stop(paste(
30 |       "The chromosome names (seqlevels) of the surveyed and",
31 |       "the region GRanges object do not match."
32 |     ))
33 |   }
34 | 
35 |   # Intersect genomic region and surveyed region
36 |   surveyed_region <- GenomicRanges::intersect(surveyed, region, ignore.strand = TRUE)
37 |   surveyed_region_length <- sum(width(surveyed_region))
38 | 
39 |   # Find which mutations lie in surveyed genomic region
40 |   overlap <- GenomicRanges::findOverlaps(vcf, surveyed_region)
41 |   muts_in_region <- as.data.frame(as.matrix(overlap))$queryHits
42 | 
43 |   observed <- length(muts_in_region)
44 |   prob <- n_muts / surveyed_length
45 |   expected <- prob * surveyed_region_length
46 | 
47 |   res <- data.frame(
48 |     n_muts,
49 |     surveyed_length,
50 |     prob, surveyed_region_length,
51 |     expected,
52 |     observed
53 |   )
54 |   return(res)
55 | }
56 | 


--------------------------------------------------------------------------------
/tests/testthat/test-enrichment_depletion_test.R:
--------------------------------------------------------------------------------
 1 | context("test-enrichment_depletion_test")
 2 | 
 3 | # Read distribution data
 4 | distr <- readRDS(system.file("states/distr_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | # Set tissue
 8 | tissue <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3))
 9 | 
10 | ## Perform the enrichment/depletion test by tissue type.
11 | output <- enrichment_depletion_test(distr, by = tissue)
12 | 
13 | ## Or without specifying the 'by' parameter.
14 | output_pooled <- enrichment_depletion_test(distr)
15 | 
16 | ## Use different cutoffs for p and fdr
17 | output_strictcutoff <- enrichment_depletion_test(distr,
18 |   by = tissue,
19 |   p_cutoffs = 0.000001, fdr_cutoffs = 0.000005
20 | )
21 | 
22 | # Use multiple cutoffs for p and fdr
23 | output_multistars <- enrichment_depletion_test(distr,
24 |   by = tissue,
25 |   p_cutoffs = c(0.05, 0.01, 0.00000005),
26 |   fdr_cutoffs = c(0.1, 0.05, 0.00000001)
27 | )
28 | test_that("Output has correct class", {
29 |   expect_true(inherits(output, c("data.frame")))
30 |   expect_true(inherits(output_pooled, c("data.frame")))
31 |   expect_true(inherits(output_strictcutoff, c("data.frame")))
32 |   expect_true(inherits(output_multistars, c("data.frame")))
33 | })
34 | 
35 | test_that("Output has correct size", {
36 |   expect_equal(dim(output), c(15, 13))
37 |   expect_equal(dim(output_pooled), c(5, 13))
38 |   expect_equal(dim(output_strictcutoff), c(15, 13))
39 |   expect_equal(dim(output_multistars), c(15, 13))
40 | })
41 | 
42 | test_that("Number significant is correct", {
43 |   expect_equal(sum(output$significant == "*"), 15)
44 |   expect_equal(sum(output$significant_fdr == "*"), 15)
45 |   expect_equal(sum(output_pooled$significant == "*"), 5)
46 |   expect_equal(sum(output_pooled$significant_fdr == "*"), 5)
47 |   expect_equal(sum(output_strictcutoff$significant == "*"), 9)
48 |   expect_equal(sum(output_strictcutoff$significant_fdr == "*"), 9)
49 |   expect_equal(sum(output_multistars$significant == "***"), 8)
50 |   expect_equal(sum(output_multistars$significant_fdr == "**"), 9)
51 | })
52 | 


--------------------------------------------------------------------------------
/man/enrichment_depletion_test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/enrichment_depletion_test.R
 3 | \name{enrichment_depletion_test}
 4 | \alias{enrichment_depletion_test}
 5 | \title{Test for enrichment or depletion of mutations in genomic regions}
 6 | \usage{
 7 | enrichment_depletion_test(x, by = NA, p_cutoffs = 0.05, fdr_cutoffs = 0.1)
 8 | }
 9 | \arguments{
10 | \item{x}{data.frame result from genomic_distribution()}
11 | 
12 | \item{by}{Optional grouping variable, e.g. tissue type}
13 | 
14 | \item{p_cutoffs}{Significance cutoff for the p value. Default: 0.05}
15 | 
16 | \item{fdr_cutoffs}{Significance cutoff for the fdr. Default: 0.1}
17 | }
18 | \value{
19 | data.frame with the observed and expected number of mutations per
20 | genomic region per group (by) or sample
21 | }
22 | \description{
23 | This function aggregates mutations per group (optional) and performs an
24 | enrichment depletion test.
25 | }
26 | \examples{
27 | ## See the 'genomic_distribution()' example for how we obtained the
28 | ## following data:
29 | distr <- readRDS(system.file("states/distr_data.rds",
30 |   package = "MutationalPatterns"
31 | ))
32 | 
33 | tissue <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3))
34 | 
35 | ## Perform the enrichment/depletion test by tissue type.
36 | distr_test <- enrichment_depletion_test(distr, by = tissue)
37 | 
38 | ## Or without specifying the 'by' parameter, to pool all samples.
39 | distr_single_sample <- enrichment_depletion_test(distr)
40 | 
41 | ## Use different significance cutoffs for the pvalue and fdr
42 | distr_strict <- enrichment_depletion_test(distr,
43 |   by = tissue,
44 |   p_cutoffs = 0.01, fdr_cutoffs = 0.05
45 | )
46 | 
47 | ## Use multiple (max 3) significance cutoffs.
48 | ## This will vary the number of significance stars.
49 | distr_multistars <- enrichment_depletion_test(distr,
50 |   by = tissue,
51 |   p_cutoffs = c(0.05, 0.01, 0.005),
52 |   fdr_cutoffs = c(0.1, 0.05, 0.01)
53 | )
54 | }
55 | \seealso{
56 | \code{\link{genomic_distribution}},
57 | \code{\link{plot_enrichment_depletion}}
58 | }
59 | 


--------------------------------------------------------------------------------
/man/lengthen_mut_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lengthen_mut_matrix.R
 3 | \name{lengthen_mut_matrix}
 4 | \alias{lengthen_mut_matrix}
 5 | \title{Lengthen mutation matrix}
 6 | \usage{
 7 | lengthen_mut_matrix(mut_matrix)
 8 | }
 9 | \arguments{
10 | \item{mut_matrix}{Mutation matrix}
11 | }
12 | \value{
13 | mut_matrix
14 | }
15 | \description{
16 | A mutation_matrix calculated on a GRangesList or GR object modified by 'split_muts_region()',
17 | will contain a column per combination of sample and genomic region. In essence different regions
18 | are treated as different samples. This function will transform the matrix, so that these regions
19 | are instead treated as different mutation types. For example, instead of 'C[C>T]G', you might have
20 | the feature 'C[C>T]G Promoter'. The number of rows in the matrix will thus be
21 | multiplied by the number of regions.
22 | After using 'split_muts_region()', use 'mut_matrix()' to get a mut_matrix that can be used
23 | for this function.
24 | The result can be plotted with plot_profile_region, but could also be used for NMF, refitting ect.
25 | }
26 | \examples{
27 | 
28 | ## See the 'split_muts_region()' and 'mut_matrix()' examples for how we obtained the
29 | ## mutation matrix information:
30 | mut_mat_split_region <- readRDS(system.file("states/mut_mat_data.rds",
31 |   package = "MutationalPatterns"
32 | ))
33 | 
34 | long_mut_mat <- lengthen_mut_matrix(mut_mat_split_region)
35 | 
36 | 
37 | ## This also works on indels:
38 | ## See the 'split_muts_region()' and 'count_indels_context()' examples for how we
39 | ## obtained the indel counts:
40 | indel_counts_split <- readRDS(system.file("states/blood_indels_counts_split_region.rds",
41 |   package = "MutationalPatterns"
42 | ))
43 | 
44 | 
45 | ## Lengthen the matrix
46 | lengthen_mut_matrix(indel_counts_split)
47 | }
48 | \seealso{
49 | Other genomic_regions: 
50 | \code{\link{bin_mutation_density}()},
51 | \code{\link{plot_profile_region}()},
52 | \code{\link{plot_spectrum_region}()},
53 | \code{\link{split_muts_region}()}
54 | }
55 | \concept{genomic_regions}
56 | 


--------------------------------------------------------------------------------
/man/plot_profile_heatmap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_profile_heatmap.R
 3 | \name{plot_profile_heatmap}
 4 | \alias{plot_profile_heatmap}
 5 | \title{Plot a mutation matrix as a heatmap}
 6 | \usage{
 7 | plot_profile_heatmap(mut_matrix, by = NA, max = 0.02, condensed = FALSE)
 8 | }
 9 | \arguments{
10 | \item{mut_matrix}{Matrix containing mutation counts.}
11 | 
12 | \item{by}{Optional grouping variable}
13 | 
14 | \item{max}{Maximum value used for plotting the relative contributions.
15 | Contributions that are higher will have the maximum colour. (Default: 0.02)}
16 | 
17 | \item{condensed}{More condensed plotting format. Default = F.}
18 | }
19 | \value{
20 | A ggplot object
21 | }
22 | \description{
23 | Function to plot a SNV mutation matrix as a heatmap.
24 | This is especially useful when looking at a wide mutational context.
25 | }
26 | \examples{
27 | 
28 | ## See the 'mut_matrix()' examples for how we obtained the
29 | ## mutation matrix information:
30 | ## Get regular matrix
31 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
32 |   package = "MutationalPatterns"
33 | ))
34 | 
35 | ## Create heatmap of profile
36 | plot_profile_heatmap(mut_mat, max = 0.1)
37 | 
38 | ## Get extended matrix
39 | mut_mat_extended <- readRDS(system.file("states/mut_mat_data_extended.rds",
40 |   package = "MutationalPatterns"
41 | ))
42 | 
43 | ## Create heatmap of extended profile
44 | plot_profile_heatmap(mut_mat_extended)
45 | 
46 | ## Or plot heatmap per tissue
47 | tissue <- c(
48 |   "colon", "colon", "colon",
49 |   "intestine", "intestine", "intestine",
50 |   "liver", "liver", "liver"
51 | )
52 | 
53 | plot_profile_heatmap(mut_mat_extended, by = tissue)
54 | 
55 | ## Or plot the heatmap per sample.
56 | plot_profile_heatmap(mut_mat_extended,
57 |   by = colnames(mut_mat_extended),
58 |   max = 0.05
59 | )
60 | 
61 | 
62 | ## Create a condensed heatmap of extended profile
63 | plot_profile_heatmap(mut_mat_extended, condensed = TRUE)
64 | }
65 | \seealso{
66 | \code{\link{mut_matrix}},
67 | \code{\link{plot_96_profile}},
68 | \code{\link{plot_river}}
69 | }
70 | 


--------------------------------------------------------------------------------
/inst/scripts/create_example_indels.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse)
 2 | library(VariantAnnotation)
 3 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
 4 | library(ref_genome, character.only = TRUE)
 5 | 
 6 | # Get grl
 7 | grl <- readRDS("inst/states/blood_grl.rds")
 8 | 
 9 | # Get indels
10 | grl_indel <- get_mut_type(grl, "indel")
11 | 
12 | # Remove names from gr, because they are often very long.
13 | remove_names_gr <- function(gr) {
14 |   names(gr) <- seq_along(gr)
15 |   return(gr)
16 | }
17 | grl_indel <- purrr::map(as.list(grl_indel), remove_names_gr) %>%
18 |   GRangesList()
19 | 
20 | saveRDS(grl_indel, "inst/states/blood_grl_indel.rds")
21 | 
22 | # Get context
23 | grl_indel_context <- get_indel_context(grl_indel, ref_genome)
24 | saveRDS(grl_indel_context, "inst/states/blood_grl_indel_context.rds")
25 | 
26 | # Count contexts
27 | indel_counts <- count_indel_contexts(grl_indel_context)
28 | saveRDS(indel_counts, "inst/states/blood_indel_counts.rds")
29 | 
30 | 
31 | # Refit to signatures
32 | signatures <- get_known_signatures("indel")
33 | 
34 | fit_res <- fit_to_signatures(indel_counts, signatures)
35 | saveRDS(fit_res, "inst/states/indel_refit.rds")
36 | 
37 | 
38 | 
39 | 
40 | # Split per region
41 | CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds",
42 |   package = "MutationalPatterns"
43 | ))
44 | promoter_g <- readRDS(system.file("states/promoter_g_data.rds",
45 |   package = "MutationalPatterns"
46 | ))
47 | flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds",
48 |   package = "MutationalPatterns"
49 | ))
50 | 
51 | # Combine the regions into a single GRangesList
52 | regions <- GRangesList(promoter_g, flanking_g, CTCF_g)
53 | names(regions) <- c("Promoter", "Promoter flanking", "CTCF")
54 | 
55 | seqlevelsStyle(regions) <- "UCSC"
56 | grl_indel_split <- split_muts_region(grl_indel_context, regions)
57 | indel_counts_split <- count_indel_contexts(grl_indel_split)
58 | saveRDS(indel_counts_split, "inst/states/blood_indels_counts_split_region.rds")
59 | indel_matrix_long <- lengthen_mut_matrix(indel_counts_split)
60 | saveRDS(indel_matrix_long, "inst/states/blood_indels_longmatrix_split_region.rds")
61 | 


--------------------------------------------------------------------------------
/man/plot_enrichment_depletion.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_enrichment_depletion.R
 3 | \name{plot_enrichment_depletion}
 4 | \alias{plot_enrichment_depletion}
 5 | \title{Plot enrichment/depletion of mutations in genomic regions}
 6 | \usage{
 7 | plot_enrichment_depletion(df, sig_type = c("fdr", "p"))
 8 | }
 9 | \arguments{
10 | \item{df}{Dataframe result from enrichment_depletion_test()}
11 | 
12 | \item{sig_type}{The type of significance to be used. Possible values:
13 | * 'fdr' False discovery rate.
14 | A type of multiple testing correction.;
15 | * 'p' for regular p values.}
16 | }
17 | \value{
18 | Plot with two parts. 1: Barplot with no. mutations expected and
19 | observed per region. 2: Effect size of enrichment/depletion
20 | (log2ratio) with results significance test.
21 | }
22 | \description{
23 | Plot enrichment/depletion of mutations in genomic regions
24 | }
25 | \examples{
26 | ## See the 'genomic_distribution()' example for how we obtained the
27 | ## following data:
28 | distr <- readRDS(system.file("states/distr_data.rds",
29 |   package = "MutationalPatterns"
30 | ))
31 | 
32 | tissue <- c(
33 |   "colon", "colon", "colon",
34 |   "intestine", "intestine", "intestine",
35 |   "liver", "liver", "liver"
36 | )
37 | 
38 | ## Perform the enrichment/depletion test.
39 | distr_test <- enrichment_depletion_test(distr, by = tissue)
40 | 
41 | ## Plot the enrichment/depletion
42 | plot_enrichment_depletion(distr_test)
43 | 
44 | #Perform and plot the enrichmet depletion test for all samples pooled
45 | distr_test2 <- enrichment_depletion_test(distr)
46 | plot_enrichment_depletion(distr_test2)
47 | 
48 | ## Plot with p values instead of fdr
49 | plot_enrichment_depletion(distr_test, sig_type = "p")
50 | 
51 | ## Use multiple (max 3) significance cutoffs.
52 | ## This will vary the number of significance stars.
53 | distr_multistars <- enrichment_depletion_test(distr,
54 |   by = tissue,
55 |   p_cutoffs = c(0.05, 0.01, 0.005),
56 |   fdr_cutoffs = c(0.1, 0.05, 0.01)
57 | )
58 | plot_enrichment_depletion(distr_multistars)
59 | }
60 | \seealso{
61 | \code{\link{enrichment_depletion_test}},
62 | \code{\link{genomic_distribution}}
63 | }
64 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fit_to_signatures_strict.R:
--------------------------------------------------------------------------------
 1 | context("test-fit_to_signatures_strict")
 2 | 
 3 | # Get mut_mat
 4 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Get signatures
 9 | signatures <- get_known_signatures()
10 | 
11 | output <- fit_to_signatures_strict(mut_mat, signatures, max_delta = 0.05)
12 | output_best <- fit_to_signatures_strict(mut_mat, signatures[,1:5], max_delta = 0.004, method = "best_subset")
13 | output_single_sig = fit_to_signatures_strict(mut_mat, signatures[,1, drop = F], max_delta = 0.05)
14 | 
15 | expected <- readRDS(system.file("states/strict_snv_refit.rds",
16 |   package = "MutationalPatterns"
17 | ))
18 | expected_best <- readRDS(system.file("states/strict_best_snv_refit.rds",
19 |                                 package = "MutationalPatterns"
20 | ))
21 | 
22 | test_that("Output has correct class", {
23 |   expect_true(inherits(output, "list"))
24 |   expect_true(inherits(output$fit_res, "list"))
25 |   expect_true(inherits(output$fit_res$contribution, "matrix"))
26 |   expect_true(inherits(output$fit_res$reconstructed, "matrix"))
27 |   expect_true(inherits(output$sim_decay_fig, "list"))
28 |   expect_true(inherits(output$sim_decay_fig[[1]], "gg"))
29 |   expect_true(inherits(output_best, "list"))
30 |   expect_true(inherits(output_best$fit_res, "list"))
31 |   expect_true(inherits(output_best$fit_res$contribution, "matrix"))
32 |   expect_true(inherits(output_best$fit_res$reconstructed, "matrix"))
33 |   expect_true(inherits(output_best$sim_decay_fig, "list"))
34 |   expect_true(inherits(output_best$sim_decay_fig[[1]], "gg"))
35 |   expect_true(inherits(output_single_sig, "list"))
36 |   expect_true(inherits(output_single_sig$fit_res, "list"))
37 |   expect_true(inherits(output_single_sig$fit_res$contribution, "matrix"))
38 |   expect_true(inherits(output_single_sig$fit_res$reconstructed, "matrix"))
39 |   expect_true(inherits(output_single_sig$sim_decay_fig, "list"))
40 |   expect_true(inherits(output_single_sig$sim_decay_fig[[1]], "gg"))
41 | })
42 | 
43 | test_that("Output is equal to expected", {
44 |   expect_equal(output$fit_res, expected)
45 |   expect_equal(output_best$fit_res, expected_best)
46 | })
47 | 


--------------------------------------------------------------------------------
/R/mut_matrix.R:
--------------------------------------------------------------------------------
 1 | #' Make mutation count matrix of 96 trinucleotides
 2 | #'
 3 | #' @description Make 96 trinucleotide mutation count matrix
 4 | #' @param vcf_list GRangesList or GRanges object.
 5 | #' @param ref_genome BSgenome reference genome object
 6 | #' @param extension The number of bases, that's extracted upstream and
 7 | #' downstream of the base substitutions. (Default: 1).
 8 | #' @return 96 mutation count matrix
 9 | #'
10 | #' @examples
11 | #' ## See the 'read_vcfs_as_granges()' example for how we obtained the
12 | #' ## following data:
13 | #' grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
14 | #'   package = "MutationalPatterns"
15 | #' ))
16 | #'
17 | #' ## Load the corresponding reference genome.
18 | #' ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
19 | #' library(ref_genome, character.only = TRUE)
20 | #'
21 | #' ## Construct a mutation matrix from the loaded VCFs in comparison to the
22 | #' ## ref_genome.
23 | #' mut_mat <- mut_matrix(vcf_list = grl, ref_genome = ref_genome)
24 | #'
25 | #' ## Construct a mutation matrix with a larger context.
26 | #' ## This is most usefull when you have many mutations per sample.
27 | #' mut_mat_extended <- mut_matrix(vcf_list = grl, ref_genome = ref_genome, extension = 2)
28 | #' @seealso
29 | #' \code{\link{read_vcfs_as_granges}}
30 | #'
31 | #' @export
32 | mut_matrix <- function(vcf_list, ref_genome, extension = 1) {
33 | 
34 |   # Convert list to grl if necessary
35 |   if (inherits(vcf_list, "list")) {
36 |     vcf_list <- GenomicRanges::GRangesList(vcf_list)
37 |   }
38 | 
39 |   # Determine nr mutations per sample
40 |   if (inherits(vcf_list, "CompressedGRangesList")) {
41 |     gr_sizes <- S4Vectors::elementNROWS(vcf_list)
42 |     gr <- BiocGenerics::unlist(vcf_list)
43 |   } else if (inherits(vcf_list, "GRanges")) {
44 |     gr <- vcf_list
45 |     gr_sizes <- length(gr)
46 |     names(gr_sizes) <- "My_sample"
47 |   } else {
48 |     .not_gr_or_grl(vcf_list)
49 |   }
50 |   # Determine type and context of all mutations
51 |   type_context <- type_context(gr, ref_genome, extension)
52 | 
53 |   # Count the type and context to create the mut_mat
54 |   mut_mat <- mut_96_occurrences(type_context, gr_sizes)
55 |   return(mut_mat)
56 | }
57 | 


--------------------------------------------------------------------------------
/man/plot_original_vs_reconstructed.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_original_vs_reconstructed.R
 3 | \name{plot_original_vs_reconstructed}
 4 | \alias{plot_original_vs_reconstructed}
 5 | \title{Plot the similarity between a mutation matrix and its reconstructed profile}
 6 | \usage{
 7 | plot_original_vs_reconstructed(
 8 |   mut_matrix,
 9 |   reconstructed,
10 |   y_intercept = 0.95,
11 |   ylims = c(0.6, 1)
12 | )
13 | }
14 | \arguments{
15 | \item{mut_matrix}{mutation count matrix (dimensions: x mutation types
16 | X n samples)}
17 | 
18 | \item{reconstructed}{A reconstructed mutation count matrix}
19 | 
20 | \item{y_intercept}{The y intercept of the plotted horizontal line. Default: 0.95.}
21 | 
22 | \item{ylims}{The limits of the y axis. Default: c(0.6, 1)}
23 | }
24 | \value{
25 | A ggplot figure
26 | }
27 | \description{
28 | When a reconstructed profile has a cosine similarity of more than 0.95 with
29 | original, the reconstructed profile is considered very good.
30 | }
31 | \examples{
32 | 
33 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
34 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
35 |   package = "MutationalPatterns"
36 | ))
37 | 
38 | ## Extracting signatures can be computationally intensive, so
39 | ## we use pre-computed data generated with the following command:
40 | # nmf_res <- extract_signatures(mut_mat, rank = 2)
41 | 
42 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds",
43 |   package = "MutationalPatterns"
44 | ))
45 | 
46 | ## Create figure
47 | plot_original_vs_reconstructed(mut_mat, nmf_res$reconstructed)
48 | 
49 | ## You can also use the results of signature refitting.
50 | ## Here we load some data as an example
51 | fit_res <- readRDS(system.file("states/snv_refit.rds",
52 |   package = "MutationalPatterns"
53 | ))
54 | plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed)
55 | 
56 | ## You can also change the height of the horizontal line
57 | plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed, y_intercept = 0.90)
58 | 
59 | ## It's also possible to change the limits of the y axis
60 | plot_original_vs_reconstructed(mut_mat, fit_res$reconstructed, ylims = c(0, 1))
61 | }
62 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_spectrum.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_spectrum")
 2 | 
 3 | # Laad variants:
 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | 
 9 | ## Load a reference genome.
10 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
11 | library(ref_genome, character.only = TRUE)
12 | 
13 | ## Get the type occurrences for all VCF objects.
14 | type_occurrences <- mut_type_occurrences(vcfs, ref_genome)
15 | 
16 | ## Plot the point mutation spectrum over all samples
17 | output <- plot_spectrum(type_occurrences)
18 | 
19 | ## CT distinction
20 | output_CT <- plot_spectrum(type_occurrences, CT = TRUE)
21 | 
22 | ## You can also include individual sample points.
23 | output_indv <- plot_spectrum(type_occurrences, CT = TRUE, indv_points = TRUE)
24 | 
25 | ## You can also change the type of error bars
26 | output_stdev <- plot_spectrum(type_occurrences, error_bars = "stdev")
27 | output_sem <- plot_spectrum(type_occurrences, error_bars = "SEM")
28 | 
29 | ## Or plot spectrum per tissue
30 | tissue <- c(
31 |   "colon", "colon", "colon",
32 |   "intestine", "intestine", "intestine",
33 |   "liver", "liver", "liver"
34 | )
35 | 
36 | output_tissue <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE)
37 | 
38 | ## Or plot the spectrum per sample. Error bars are set to 'none', because they can't be plotted.
39 | output_sample <- plot_spectrum(type_occurrences, by = names(vcfs), CT = TRUE, error_bars = "none")
40 | 
41 | ## You can also set custom colors.
42 | my_colors <- c(
43 |   "pink", "orange", "blue", "lightblue",
44 |   "green", "red", "purple"
45 | )
46 | 
47 | ## And use them in a plot.
48 | output_color <- plot_spectrum(type_occurrences,
49 |   CT = TRUE,
50 |   legend = TRUE,
51 |   colors = my_colors
52 | )
53 | 
54 | test_that("Output has correct class", {
55 |   expect_true(inherits(output, c("gg")))
56 |   expect_true(inherits(output_CT, c("gg")))
57 |   expect_true(inherits(output_indv, c("gg")))
58 |   expect_true(inherits(output_stdev, c("gg")))
59 |   expect_true(inherits(output_sem, c("gg")))
60 |   expect_true(inherits(output_tissue, c("gg")))
61 |   expect_true(inherits(output_sample, c("gg")))
62 |   expect_true(inherits(output_color, c("gg")))
63 | })
64 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_spectrum_region.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_spectrum_region")
 2 | 
 3 | # load data
 4 | grl <- readRDS(system.file("states/grl_split_region.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Load a reference genome.
 9 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
10 | library(ref_genome, character.only = TRUE)
11 | 
12 | 
13 | # Get the type occurrences for all VCF objects.
14 | type_occurrences <- mut_type_occurrences(grl, ref_genome)
15 | 
16 | # Plot the point mutation spectrum over all samples
17 | output <- plot_spectrum_region(type_occurrences)
18 | 
19 | # Plot the point mutation spectrum, relative only to the samples.
20 | output_relative_sample <- plot_spectrum_region(type_occurrences, mode = "relative_sample")
21 | 
22 | 
23 | # Plot the absolute point mutation spectrum over all samples
24 | output_absolute <- plot_spectrum_region(type_occurrences, mode = "absolute")
25 | 
26 | # Plot using different types of error bars.
27 | output_stdev <- plot_spectrum_region(type_occurrences, error_bars = "stdev")
28 | output_sem <- plot_spectrum_region(type_occurrences, error_bars = "SEM")
29 | 
30 | # Plot including the sample points themselves
31 | output_indv <- plot_spectrum_region(type_occurrences, indv_points = TRUE)
32 | 
33 | # Plot per tissue
34 | tissue <- c(
35 |   "colon", "colon", "colon",
36 |   "intestine", "intestine", "intestine",
37 |   "liver", "liver", "liver"
38 | )
39 | output_tissue <- plot_spectrum_region(type_occurrences, by = tissue)
40 | 
41 | # Plot each sample separately
42 | sample_names <- c(
43 |   "colon1", "colon2", "colon3",
44 |   "intestine1", "intestine2", "intestine3",
45 |   "liver1", "liver2", "liver3"
46 | )
47 | output_sample <- plot_spectrum_region(type_occurrences, by = sample_names, error_bars = "none")
48 | 
49 | # Test different outputs
50 | test_that("Output has correct class", {
51 |   expect_true(inherits(output, c("gg")))
52 |   expect_true(inherits(output_relative_sample, c("gg")))
53 |   expect_true(inherits(output_absolute, c("gg")))
54 |   expect_true(inherits(output_stdev, c("gg")))
55 |   expect_true(inherits(output_sem, c("gg")))
56 |   expect_true(inherits(output_indv, c("gg")))
57 |   expect_true(inherits(output_tissue, c("gg")))
58 |   expect_true(inherits(output_sample, c("gg")))
59 | })
60 | 


--------------------------------------------------------------------------------
/tests/testthat/test-strand_bias_test.R:
--------------------------------------------------------------------------------
 1 | context("test-strand_bias_test")
 2 | 
 3 | # Load stranded mutation matrix
 4 | mut_mat_s <- readRDS(system.file("states/mut_mat_s_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Set tissue names
 9 | tissue <- c(
10 |   "colon", "colon", "colon",
11 |   "intestine", "intestine", "intestine",
12 |   "liver", "liver", "liver"
13 | )
14 | 
15 | ## Perform the strand bias test.
16 | strand_counts <- strand_occurrences(mut_mat_s, by = tissue)
17 | output <- strand_bias_test(strand_counts)
18 | 
19 | # Repeat for replication bias.
20 | mut_mat_repli <- readRDS(system.file("states/mut_mat_repli.rds",
21 |   package = "MutationalPatterns"
22 | ))
23 | strand_counts_repli <- strand_occurrences(mut_mat_repli, by = tissue)
24 | output_repli <- strand_bias_test(strand_counts_repli)
25 | 
26 | ## Use different cutoffs for p and fdr
27 | output_lenientcutoff <- strand_bias_test(strand_counts, p_cutoffs = 0.1, fdr_cutoffs = 0.4)
28 | 
29 | # Use multiple cutoffs for p and fdr
30 | output_multistars <- strand_bias_test(strand_counts,
31 |   p_cutoffs = c(0.5, 0.1, 0.05),
32 |   fdr_cutoffs = c(0.5, 0.35, 0.1)
33 | )
34 | 
35 | # Tests
36 | test_that("Output has correct class", {
37 |   expect_true(inherits(output, c("tbl_df")))
38 |   expect_true(inherits(output_repli, c("tbl_df")))
39 |   expect_true(inherits(output_lenientcutoff, c("tbl_df")))
40 |   expect_true(inherits(output_multistars, c("tbl_df")))
41 | })
42 | 
43 | test_that("Output has correct size", {
44 |   expect_equal(dim(output), c(18, 10))
45 |   expect_equal(dim(output_repli), c(18, 10))
46 |   expect_equal(dim(output_lenientcutoff), c(18, 10))
47 |   expect_equal(dim(output_multistars), c(18, 10))
48 | })
49 | 
50 | test_that("Number significant is correct", {
51 |   expect_equal(sum(output$significant == "*"), 1)
52 |   expect_equal(sum(output$significant_fdr == "*"), 0)
53 |   expect_equal(sum(output_repli$significant == "*"), 0)
54 |   expect_equal(sum(output_repli$significant_fdr == "*"), 0)
55 |   expect_equal(sum(output_lenientcutoff$significant == "*"), 3)
56 |   expect_equal(sum(output_lenientcutoff$significant_fdr == "*"), 3)
57 |   expect_equal(sum(output_multistars$significant == "***"), 1)
58 |   expect_equal(sum(output_multistars$significant_fdr == "**"), 3)
59 | })
60 | 


--------------------------------------------------------------------------------
/man/bin_mutation_density.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bin_mutation_density.R
 3 | \name{bin_mutation_density}
 4 | \alias{bin_mutation_density}
 5 | \title{Bin the genome based on mutation density}
 6 | \usage{
 7 | bin_mutation_density(vcf_list, ref_genome, nrbins = 3, man_dens_cutoffs = NA)
 8 | }
 9 | \arguments{
10 | \item{vcf_list}{GRangesList or GRanges object.}
11 | 
12 | \item{ref_genome}{BSgenome reference genome object}
13 | 
14 | \item{nrbins}{The number of bins in which to separate the genome}
15 | 
16 | \item{man_dens_cutoffs}{Manual density cutoffs to use.}
17 | }
18 | \value{
19 | GRangesList
20 | }
21 | \description{
22 | This function splits the genome based on the mutation density.
23 | The density is calculated per chromosome. The density is split
24 | into bins. The difference in density between subsequent bins is the same
25 | for all bins. In other words, the difference in density between bins 1 and
26 | 2 is the same as between bins 2 and 3.
27 | The function returns a GRangesList. Each GRanges in the list contains the
28 | regions associated with that bin. This can be used with the
29 | 'split_muts_region()' function.
30 | }
31 | \examples{
32 | 
33 | ### See the 'read_vcfs_as_granges()' example for how we obtained the
34 | ## following data:
35 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
36 |   package = "MutationalPatterns"
37 | ))
38 | 
39 | ## Load the corresponding reference genome.
40 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
41 | library(ref_genome, character.only = TRUE)
42 | 
43 | ## Determine region density
44 | dens_grl <- bin_mutation_density(grl, ref_genome, nrbins = 3)
45 | names(dens_grl) <- c("Low", "Medium", "High")
46 | 
47 | 
48 | ## You can also use manual cutoffs. This feature is meant for more
49 | ## advanced users. It can be usefull if you want to find highly mutated regions, with
50 | ## a consistent cutoff between analyses.
51 | dens_grl_man <- bin_mutation_density(grl, ref_genome, man_dens_cutoffs = c(0, 2e-08, 1))
52 | }
53 | \seealso{
54 | Other genomic_regions: 
55 | \code{\link{lengthen_mut_matrix}()},
56 | \code{\link{plot_profile_region}()},
57 | \code{\link{plot_spectrum_region}()},
58 | \code{\link{split_muts_region}()}
59 | }
60 | \concept{genomic_regions}
61 | 


--------------------------------------------------------------------------------
/tests/testthat/test-split_muts_region.R:
--------------------------------------------------------------------------------
 1 | context("test-split_muts_region")
 2 | 
 3 | # Read in genomic regions
 4 | CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | promoter_g <- readRDS(system.file("states/promoter_g_data.rds",
 8 |   package = "MutationalPatterns"
 9 | ))
10 | flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds",
11 |   package = "MutationalPatterns"
12 | ))
13 | 
14 | # Combine the regions into a single GRangesList
15 | regions <- GRangesList(promoter_g, flanking_g, CTCF_g)
16 | names(regions) <- c("Promoter", "Promoter flanking", "CTCF")
17 | seqlevelsStyle(regions) <- "UCSC"
18 | 
19 | # Read in some variants.
20 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
21 |   package = "MutationalPatterns"
22 | ))
23 | 
24 | # Only use two samples to reduce runtime
25 | grl <- grl[1:2]
26 | 
27 | 
28 | # Run function
29 | output <- split_muts_region(grl, regions)
30 | output_single_gr <- split_muts_region(grl[[1]], regions)
31 | output_single_region <- split_muts_region(grl, regions[[1]])
32 | output_noother <- split_muts_region(grl, regions, include_other = FALSE)
33 | 
34 | 
35 | test_that("Output has correct class", {
36 |   expect_true(inherits(output, "CompressedGRangesList"))
37 |   expect_true(inherits(output_single_gr, "CompressedGRangesList"))
38 |   expect_true(inherits(output_single_region, "CompressedGRangesList"))
39 |   expect_true(inherits(output_noother, "CompressedGRangesList"))
40 | })
41 | 
42 | expected_length <- function(grl, regions) {
43 |   exp_length <- (length(regions) + 1) * length(grl) # nr. samples * nr. regions. +1 is for the variants in 'other'
44 |   return(exp_length)
45 | }
46 | 
47 | test_that("Output GRangesList has correct length", {
48 |   expect_equal(length(output), expected_length(grl, regions))
49 |   expect_equal(length(output_single_gr), expected_length(grl[1], regions))
50 |   expect_equal(length(output_single_region), expected_length(grl, regions[1]))
51 |   expect_equal(length(output_noother), length(regions) * length(grl))
52 | })
53 | 
54 | expected <- readRDS(system.file("states/grl_split_region.rds",
55 |   package = "MutationalPatterns"
56 | ))[1:8]
57 | test_that("Output transforms correctly", {
58 |   expect_equal(output, expected)
59 | })
60 | 


--------------------------------------------------------------------------------
/tests/testthat/test-genomic_distribution.R:
--------------------------------------------------------------------------------
 1 | context("test-genomic_distribution")
 2 | 
 3 | # Read vcfs
 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | # Load genomic regions
 9 | CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds",
10 |   package = "MutationalPatterns"
11 | ))
12 | 
13 | promoter_g <- readRDS(system.file("states/promoter_g_data.rds",
14 |   package = "MutationalPatterns"
15 | ))
16 | 
17 | flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds",
18 |   package = "MutationalPatterns"
19 | ))
20 | 
21 | # Combine regions and set seqlevelstyle
22 | regions <- GRangesList(promoter_g, flanking_g, CTCF_g)
23 | names(regions) <- c("Promoter", "Promoter flanking", "CTCF")
24 | seqlevelsStyle(regions) <- "UCSC"
25 | 
26 | # Get the callable regions
27 | surveyed_file <- system.file("extdata/callableloci-sample.bed",
28 |   package = "MutationalPatterns"
29 | )
30 | 
31 | library(rtracklayer)
32 | surveyed <- rtracklayer::import(surveyed_file)
33 | seqlevelsStyle(surveyed) <- "UCSC"
34 | 
35 | # Use the same callable loci for all samples.
36 | surveyed_list <- rep(list(surveyed), 9)
37 | 
38 | ## Calculate the number of observed and expected number of mutations in
39 | ## each genomic regions for each sample.
40 | output <- genomic_distribution(vcfs, surveyed_list, regions)
41 | 
42 | 
43 | test_that("Output has correct class", {
44 |   expect_true(inherits(output, c("data.frame")))
45 | })
46 | 
47 | test_that("Output has correct size", {
48 |   expect_equal(dim(output), c(27, 8))
49 | })
50 | 
51 | # Test that an error is given when the vcf_list and surveyed list are not the same size
52 | test_that("An error is given when input sizes don't match", {
53 |   expect_error(
54 |     {
55 |       genomic_distribution(vcfs, surveyed_list[1:8], regions)
56 |     },
57 |     "vcf_list and surveyed_list must have the same length"
58 |   )
59 | })
60 | 
61 | # Test that an error is given when regions_list names are not set.
62 | regions_noname <- regions
63 | names(regions_noname) <- NULL
64 | test_that("An error is given when regions_list names are not set", {
65 |   expect_error(
66 |     {
67 |       genomic_distribution(vcfs, surveyed_list, regions_noname)
68 |     },
69 |     "Please set the names of region_list using"
70 |   )
71 | })
72 | 


--------------------------------------------------------------------------------
/inst/scripts/create_example_regulatory_regions.R:
--------------------------------------------------------------------------------
 1 | # Use biomaRt to obtain data.
 2 | 
 3 | mart <- "ensemble"
 4 | library(biomaRt)
 5 | regulatory <- useEnsembl(
 6 |   biomart = "regulation",
 7 |   dataset = "hsapiens_regulatory_feature",
 8 |   GRCh = 37
 9 | )
10 | saveRDS(regulatory, "inst/states/regulatory_data.rds")
11 | 
12 | # Download the regulatory CTCF binding sites and convert them to
13 | # a GRanges object.
14 | CTCF <- getBM(
15 |   attributes = c(
16 |     "chromosome_name",
17 |     "chromosome_start",
18 |     "chromosome_end",
19 |     "feature_type_name"
20 |   ),
21 |   filters = "regulatory_feature_type_name",
22 |   values = "CTCF Binding Site",
23 |   mart = regulatory
24 | )
25 | 
26 | CTCF_g <- reduce(GRanges(
27 |   CTCF$chromosome_name,
28 |   IRanges(
29 |     CTCF$chromosome_start,
30 |     CTCF$chromosome_end
31 |   )
32 | ))
33 | seqlevels(CTCF_g) <- c(1:22, "X", "Y")
34 | CTCF_g <- sort(CTCF_g)
35 | CTCF_g <- CTCF_g[sample.int(length(CTCF_g), 50000)]
36 | saveRDS(CTCF_g, "inst/states/CTCF_g_data.rds")
37 | 
38 | # Download the promoter regions and conver them to a GRanges object.
39 | promoter <- getBM(
40 |   attributes = c(
41 |     "chromosome_name", "chromosome_start",
42 |     "chromosome_end", "feature_type_name"
43 |   ),
44 |   filters = "regulatory_feature_type_name",
45 |   values = "Promoter",
46 |   mart = regulatory
47 | )
48 | promoter_g <- reduce(GRanges(
49 |   promoter$chromosome_name,
50 |   IRanges(
51 |     promoter$chromosome_start,
52 |     promoter$chromosome_end
53 |   )
54 | ))
55 | seqlevels(promoter_g) <- c(1:22, "X", "Y")
56 | promoter_g <- sort(promoter_g)
57 | saveRDS(promoter_g, "inst/states/promoter_g_data.rds")
58 | 
59 | flanking <- getBM(
60 |   attributes = c(
61 |     "chromosome_name",
62 |     "chromosome_start",
63 |     "chromosome_end",
64 |     "feature_type_name"
65 |   ),
66 |   filters = "regulatory_feature_type_name",
67 |   values = "Promoter Flanking Region",
68 |   mart = regulatory
69 | )
70 | flanking_g <- reduce(GRanges(
71 |   flanking$chromosome_name,
72 |   IRanges(
73 |     flanking$chromosome_start,
74 |     flanking$chromosome_end
75 |   )
76 | ))
77 | seqlevels(flanking_g) <- c(1:22, "X", "Y")
78 | flanking_g <- sort(flanking_g)
79 | flanking_g <- flanking_g[sample.int(length(flanking_g), 50000)]
80 | saveRDS(flanking_g, "inst/states/promoter_flanking_g_data.rds")
81 | 


--------------------------------------------------------------------------------
/tests/testthat/test-plot_rainfall.R:
--------------------------------------------------------------------------------
 1 | context("test-plot_rainfall")
 2 | 
 3 | # Read data
 4 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
 5 |   package = "MutationalPatterns"
 6 | ))
 7 | 
 8 | grl_indel_context <- readRDS(system.file("states/blood_grl_indel_context.rds",
 9 |   package = "MutationalPatterns"
10 | ))
11 | 
12 | grl_dbs_context <- readRDS(system.file("states/blood_grl_dbs_context.rds",
13 |   package = "MutationalPatterns"
14 | ))
15 | 
16 | mbs_grl <- readRDS(system.file("states/blood_grl_mbs.rds",
17 |   package = "MutationalPatterns"
18 | ))
19 | 
20 | 
21 | # Specify chromosomes of interest.
22 | chromosomes <- paste0("chr", c(1:22))
23 | 
24 | # Do a rainfall plot for all chromosomes:
25 | output <- plot_rainfall(vcfs[[1]], title = names(vcfs[1]), chromosomes = chromosomes)
26 | 
27 | # Plot a single chromosome (chromosome 1):
28 | output_singlechrom <- plot_rainfall(vcfs[[1]], title = names(vcfs[1]), chromosomes = chromosomes[1])
29 | 
30 | # Plot a subset of the variants
31 | output_subset <- plot_rainfall(vcfs[[1]][1:10], title = names(vcfs[1]), chromosomes = chromosomes)
32 | 
33 | # plot indels
34 | output_indel <- plot_rainfall(grl_indel_context[[1]], title = names(grl_indel_context[1]), chromosomes = chromosomes, type = "indel")
35 | 
36 | # plot dbs
37 | output_dbs <- plot_rainfall(grl_dbs_context[[1]], title = names(grl_dbs_context[1]), chromosomes = chromosomes, type = "dbs")
38 | 
39 | # plot mbs
40 | output_mbs <- plot_rainfall(mbs_grl[[1]], title = names(mbs_grl[1]), chromosomes = chromosomes, type = "mbs")
41 | 
42 | # Plot an empty gr
43 | output_empty <- plot_rainfall(vcfs[[1]][0], title = names(vcfs[1]), chromosomes = chromosomes)
44 | 
45 | 
46 | test_that("Output has correct class", {
47 |   expect_true(inherits(output, c("gg")))
48 |   expect_true(inherits(output_singlechrom, c("gg")))
49 |   expect_true(inherits(output_subset, c("gg")))
50 |   expect_true(inherits(output_indel, c("gg")))
51 |   expect_true(inherits(output_dbs, c("gg")))
52 |   expect_true(inherits(output_mbs, c("gg")))
53 |   expect_true(inherits(output_empty, c("gg")))
54 | })
55 | 
56 | test_that("Subsetted output contains the correct subset of colours", {
57 |   colours_used <- unique(ggplot_build(output_subset)$data[[1]][["colour"]])
58 |   expect_equal(colours_used, c("#ADCC54", "#DE1C14", "#2EBAED", "#D4D2D2"))
59 | })
60 | 


--------------------------------------------------------------------------------
/R/type_context.R:
--------------------------------------------------------------------------------
 1 | #' Retrieve context of base substitution types
 2 | #'
 3 | #' A function to extract the bases 3' upstream and 5' downstream of the base
 4 | #' substitution types.
 5 | #'
 6 | #' @param vcf A CollapsedVCF object
 7 | #' @param ref_genome Reference genome
 8 | #' @param extension The number of bases, that's extracted upstream and
 9 | #' downstream of the base substitutions. (Default: 1).
10 | #' @return Mutation types and context character vectors in a named list
11 | #'
12 | #'
13 | #' @examples
14 | #' ## See the 'read_vcfs_as_granges()' example for how we obtained the
15 | #' ## following data:
16 | #' vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
17 | #'   package = "MutationalPatterns"
18 | #' ))
19 | #'
20 | #' ## Load the corresponding reference genome.
21 | #' ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
22 | #' library(ref_genome, character.only = TRUE)
23 | #'
24 | #' ## Get type context
25 | #' type_context <- type_context(vcfs[[1]], ref_genome)
26 | #'
27 | #' ## Get larger type context
28 | #' type_context_larger <- type_context(vcfs[[1]], ref_genome, extension = 2)
29 | #' @seealso
30 | #' \code{\link{read_vcfs_as_granges}},
31 | #' \code{\link{mut_context}}
32 | #'
33 | #' @export
34 | 
35 | type_context <- function(vcf, ref_genome, extension = 1) {
36 |   # Deal with empty GRanges objects.
37 |   if (length(vcf) == 0) {
38 |     warning("Detected empty GRanges object.
39 |                 Returning an empty list for this sample.", call. = FALSE)
40 |     res <- list("types" = NULL, "context" = NULL)
41 |     return(res)
42 |   }
43 | 
44 |   # Get the mut context
45 |   mut_context <- mut_context(vcf, ref_genome, extension)
46 | 
47 |   # Get the mutations
48 |   muts <- mutations_from_vcf(vcf)
49 | 
50 |   # Get the 6 base mutation types
51 |   types <- mut_type(vcf)
52 | 
53 |   # find the mutations for which the context needs to be adjusted
54 |   x <- which(muts != types)
55 | 
56 |   # subset mut_context
57 |   y <- mut_context[x]
58 | 
59 |   # Change the context of these mutations to reverse complement
60 |   # of the context
61 |   y <- IRanges::reverse(chartr("ATGC", "TACG", y))
62 | 
63 |   # replace subset with reverse complement
64 |   mut_context[x] <- y
65 | 
66 |   # return as named list
67 |   res <- list(types, mut_context)
68 |   names(res) <- c("types", "context")
69 | 
70 |   return(res)
71 | }
72 | 


--------------------------------------------------------------------------------
/man/split_muts_region.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/split_muts_region.R
 3 | \name{split_muts_region}
 4 | \alias{split_muts_region}
 5 | \title{Split GRangesList or GRanges based on a list of regions.}
 6 | \usage{
 7 | split_muts_region(vcf_list, ranges_grl, include_other = TRUE)
 8 | }
 9 | \arguments{
10 | \item{vcf_list}{GRangesList or GRanges object}
11 | 
12 | \item{ranges_grl}{GRangesList or GRanges object containing regions of interest}
13 | 
14 | \item{include_other}{Boolean. Whether or not to include a "Other" region
15 | containing mutations that aren't in any other region.}
16 | }
17 | \value{
18 | GRangesList
19 | }
20 | \description{
21 | A GRangesList or GRanges object containing variants is split based on a list of regions.
22 | This list can be either a GRangesList or a GRanges object.
23 | The result is a GRangesList where each element contains the variants of one sample from one region.
24 | Variant that are not in any of the provided region are put in a list of 'other'.
25 | }
26 | \examples{
27 | 
28 | ## Read in some existing genomic regions.
29 | ## See the 'genomic_distribution()' example for how we obtained the
30 | ## following data:
31 | CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds",
32 |   package = "MutationalPatterns"
33 | ))
34 | promoter_g <- readRDS(system.file("states/promoter_g_data.rds",
35 |   package = "MutationalPatterns"
36 | ))
37 | flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds",
38 |   package = "MutationalPatterns"
39 | ))
40 | 
41 | ## Combine the regions into a single GRangesList
42 | regions <- GRangesList(promoter_g, flanking_g, CTCF_g)
43 | 
44 | names(regions) <- c("Promoter", "Promoter flanking", "CTCF")
45 | 
46 | ## Read in some variants.
47 | ## See the 'read_vcfs_as_granges()' example for how we obtained the
48 | ## following data:
49 | grl <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
50 |   package = "MutationalPatterns"
51 | ))
52 | 
53 | ## Split muts based on the supplied regions
54 | split_muts_region(grl, regions)
55 | 
56 | ## Don't include muts outside of the supplied regions
57 | split_muts_region(grl, regions, include_other = FALSE)
58 | }
59 | \seealso{
60 | Other genomic_regions: 
61 | \code{\link{bin_mutation_density}()},
62 | \code{\link{lengthen_mut_matrix}()},
63 | \code{\link{plot_profile_region}()},
64 | \code{\link{plot_spectrum_region}()}
65 | }
66 | \concept{genomic_regions}
67 | 


--------------------------------------------------------------------------------
/man/plot_compare_mbs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_compare_mbs.R
 3 | \name{plot_compare_mbs}
 4 | \alias{plot_compare_mbs}
 5 | \title{Compare two mbs mutation profiles}
 6 | \usage{
 7 | plot_compare_mbs(
 8 |   profile1,
 9 |   profile2,
10 |   profile_names = c("profile 1", "profile 2"),
11 |   profile_ymax = 1,
12 |   diff_ylim = c(-0.5, 0.5)
13 | )
14 | }
15 | \arguments{
16 | \item{profile1}{First mutation profile}
17 | 
18 | \item{profile2}{Second mutation profile}
19 | 
20 | \item{profile_names}{Character vector with names of the mutations profiles
21 | used for plotting, default = c("profile 1", "profile 2")}
22 | 
23 | \item{profile_ymax}{Maximum value of y-axis (relative contribution) for
24 | profile plotting. This can only be used to increase the y axis.
25 | If bars fall outside this limit, the maximum value is
26 | automatically increased. default = 1.}
27 | 
28 | \item{diff_ylim}{Y-axis limits for profile difference plot,
29 | default = c(-0.5, 0.5)}
30 | }
31 | \value{
32 | A ggplot2 object
33 | }
34 | \description{
35 | Plots two mbs mutation profiles and their difference, reports the residual
36 | sum of squares (RSS).
37 | }
38 | \examples{
39 | 
40 | ## Get the mbs counts
41 | ## See 'count_mbs_contexts()' for more info on how to do this.
42 | mbs_counts <- readRDS(system.file("states/blood_mbs_counts.rds",
43 |   package = "MutationalPatterns"
44 | ))
45 | 
46 | 
47 | ## You could compare regular mutation profiles with eachother.
48 | plot_compare_mbs(
49 |   mbs_counts[, 1],
50 |   mbs_counts[, 2]
51 | )
52 | 
53 | ## Or change the names of the profiles
54 | plot_compare_mbs(mbs_counts[, 1],
55 |   mbs_counts[, 2],
56 |   profile_names = c("Original", "Reconstructed")
57 | )
58 | 
59 | ## You can also change the y limits.
60 | ## This can be done separately for the profiles and the different facets.
61 | plot_compare_mbs(mbs_counts[, 1],
62 |   mbs_counts[, 2],
63 |   profile_ymax = 0.9,
64 |   diff_ylim = c(-0.8, 0.8)
65 | )
66 | 
67 | ## You could also compare a reconstructed profile.
68 | ## However, the example data does not contain enough MBS variants to use NMF.
69 | ## Existing signatures have also not yet been defined.
70 | }
71 | \seealso{
72 | \code{\link{plot_compare_profiles}},
73 | \code{\link{plot_compare_dbs}},
74 | \code{\link{plot_compare_indels}}
75 | 
76 | Other MBS: 
77 | \code{\link{count_mbs_contexts}()},
78 | \code{\link{plot_mbs_contexts}()}
79 | }
80 | \concept{MBS}
81 | 


--------------------------------------------------------------------------------
/R/cos_sim_matrix.R:
--------------------------------------------------------------------------------
 1 | #' Compute all pairwise cosine similarities between mutational profiles/signatures
 2 | #'
 3 | #' Computes all pairwise cosine similarities between the mutational profiles provided in the two mutation count matrices.
 4 | #' The cosine similarity is a value between 0 (distinct) and 1 (identical) and indicates how much two vectors are alike.
 5 | #'
 6 | #' @param mut_matrix1 mutation count matrix (dimensions: a mutation features X n samples)
 7 | #' @param mut_matrix2 96 mutation count matrix (dimensions: a mutation features X m samples)
 8 | #' @return Matrix with pairwise cosine similarities (dimensions: n mutational profiles X m mutational profiles)
 9 | #'
10 | #' @examples
11 | #' ## Get signatures
12 | #' signatures <- get_known_signatures()
13 | #'
14 | #' ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
15 | #' mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
16 | #'   package = "MutationalPatterns"
17 | #' ))
18 | #'
19 | #'
20 | #' ## Calculate the cosine similarity between each COSMIC signature and each 96 mutational profile
21 | #' cos_sim_matrix(mut_mat, signatures)
22 | #' @seealso
23 | #' \code{\link{mut_matrix}},
24 | #' \code{\link{fit_to_signatures}},
25 | #' \code{\link{plot_cosine_heatmap}}
26 | #'
27 | #' @export
28 | 
29 | cos_sim_matrix <- function(mut_matrix1, mut_matrix2) {
30 |   
31 |   # Check that both inputs are numeric.
32 |   if (!all(apply(mut_matrix1, 2, is.numeric))){
33 |     stop("The first input contains non-numeric columns, while all columns should be numeric.")
34 |   }
35 |   if (!all(apply(mut_matrix2, 2, is.numeric))){
36 |     stop("The second input contains non-numeric columns, while all columns should be numeric.")
37 |   }
38 |   
39 |   # Determine number of samples
40 |   n_samples1 <- ncol(mut_matrix1)
41 |   n_samples2 <- ncol(mut_matrix2)
42 |   res_matrix <- matrix(nrow = n_samples1, ncol = n_samples2)
43 | 
44 |   # Loop over the columns of both input matrices,
45 |   # to determine the cosine similarities.
46 |   for (s in seq_len(n_samples1))
47 |   {
48 |     signal1 <- mut_matrix1[, s, drop = TRUE]
49 |     cos_sim_vector <- c()
50 |     for (i in seq_len(n_samples2))
51 |     {
52 |       signal2 <- mut_matrix2[, i, drop = TRUE]
53 |       cos_sim_vector[i] <- cos_sim(signal1, signal2)
54 |     }
55 |     res_matrix[s, ] <- cos_sim_vector
56 |   }
57 |   rownames(res_matrix) <- colnames(mut_matrix1)
58 |   colnames(res_matrix) <- colnames(mut_matrix2)
59 | 
60 |   return(res_matrix)
61 | }
62 | 


--------------------------------------------------------------------------------
/R/convert_sigs_to_ref.R:
--------------------------------------------------------------------------------
 1 | #' Convert tissue specific signature exposures to reference
 2 | #'
 3 | #' This function converts tissue specific signature contributions into
 4 | #' reference signature contributions. This works on SNV signatures from SIGNAL.
 5 | #' It uses a conversion matrix to do the conversion.
 6 | #' The output can include possible artifact signatures.
 7 | #'
 8 | #' @param fit_res Named list with signature contributions and reconstructed
 9 | #' mutation matrix
10 | #'
11 | #' @return The input fit_res, but with converted signature contributions.
12 | #' @export
13 | #'
14 | #' @importFrom magrittr %>%
15 | #'
16 | #' @examples
17 | #'
18 | #' ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
19 | #' mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
20 | #'   package = "MutationalPatterns"
21 | #' ))
22 | #'
23 | #' ## Get tissue specific signatures
24 | #' signatures <- get_known_signatures(source = "SIGNAL", sig_type = "tissue", tissue_type = "Skin")
25 | #'
26 | #' ## Fit tissue specific signatures
27 | #' fit_res <- fit_to_signatures(mut_mat, signatures)
28 | #'
29 | #' ## Convert the tissue specific signatures exposures to reference
30 | #' fit_res <- convert_sigs_to_ref(fit_res)
31 | convert_sigs_to_ref <- function(fit_res) {
32 | 
33 |   # Get contribution
34 |   contri <- fit_res$contribution
35 | 
36 |   # Determine convertion matrix filename
37 |   fname_matrix <- file.path("extdata", "signatures", "SIGNAL_conversion_matrix.txt")
38 |   fname_matrix <- system.file(fname_matrix, package = "MutationalPatterns")
39 | 
40 |   # Read conversion matrix
41 |   conv_m <- read.table(fname_matrix,
42 |     sep = "\t",
43 |     header = TRUE,
44 |     stringsAsFactors = FALSE,
45 |     dec = ",",
46 |     check.names = FALSE
47 |   ) %>%
48 |     tibble::column_to_rownames("Tissue_sig") %>%
49 |     as.matrix()
50 | 
51 |   # Check that the tissue specific signature names are all in the conversion matrix.
52 |   if (sum(!rownames(contri) %in% rownames(conv_m))) {
53 |     stop(paste0(
54 |       "The signature names of the fit_res don't match that of ",
55 |       "the conversion matrix.\n You have to use tissue specific SNV ",
56 |       "signatures from SIGNAL."
57 |     ), call. = FALSE)
58 |   }
59 | 
60 |   # Remove signatures, that weren't used, from the conversion matrix.
61 |   conv_m <- conv_m[rownames(conv_m) %in% rownames(contri), , drop = FALSE]
62 | 
63 | 
64 |   # Convert signatures to reference.
65 |   fit_res$contribution <- t(conv_m) %*% contri
66 | 
67 |   return(fit_res)
68 | }
69 | 


--------------------------------------------------------------------------------
/man/plot_indel_contexts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_indel_contexts.R
 3 | \name{plot_indel_contexts}
 4 | \alias{plot_indel_contexts}
 5 | \title{Plot the indel contexts}
 6 | \usage{
 7 | plot_indel_contexts(
 8 |   counts,
 9 |   same_y = FALSE,
10 |   extra_labels = FALSE,
11 |   condensed = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{counts}{A tibble containing the number of indels per COSMIC context.}
16 | 
17 | \item{same_y}{A boolean describing whether the same y axis should be used for all samples.}
18 | 
19 | \item{extra_labels}{A boolean describing whether extra labels should be added.
20 | These can clarify the plot, but will shift when different plot widths are used.
21 | We recommend saving a plot with a width of 12, when using this argument.}
22 | 
23 | \item{condensed}{More condensed plotting format. Default = F.}
24 | }
25 | \value{
26 | A ggplot figure.
27 | }
28 | \description{
29 | Plot the indel contexts
30 | }
31 | \details{
32 | Plots the number of indels  COSMIC context per sample.
33 | It takes a tibble with counts as its input. This tibble can be generated by 'count_indel_contexts()'.
34 | Each sample is plotted in a separate facet.
35 | The same y axis can be used for all samples or a separate y axis can be used.
36 | The facets at the top show the indel types. First the C and T deletions
37 | Then the C and T insertions. Next are the multi base deletions and insertions.
38 | Finally the deletions with microhomology (mh) are shown.
39 | The x-axis at the bottom shows the number of repeat units.
40 | For mh deletions the microhomology length is shown.
41 | }
42 | \examples{
43 | ## Get The indel counts
44 | ## See 'count_indel_contexts()' for more info on how to do this.
45 | indel_counts <- readRDS(system.file("states/blood_indel_counts.rds",
46 |   package = "MutationalPatterns"
47 | ))
48 | 
49 | ## Plot contexts
50 | plot_indel_contexts(indel_counts)
51 | 
52 | ## Use the same y axis for all samples.
53 | plot_indel_contexts(indel_counts, same_y = TRUE)
54 | 
55 | ## Add extra labels to make plot clearer
56 | plot_indel_contexts(indel_counts, extra_labels = TRUE)
57 | 
58 | ## Create a more condensed plot
59 | plot_indel_contexts(indel_counts, condensed = TRUE)
60 | }
61 | \seealso{
62 | \code{\link{count_indel_contexts}}, \code{\link{plot_main_indel_contexts}}
63 | 
64 | Other Indels: 
65 | \code{\link{count_indel_contexts}()},
66 | \code{\link{get_indel_context}()},
67 | \code{\link{plot_compare_indels}()},
68 | \code{\link{plot_main_indel_contexts}()}
69 | }
70 | \concept{Indels}
71 | 


--------------------------------------------------------------------------------
/R/mut_192_occurrences.R:
--------------------------------------------------------------------------------
 1 | #' Count 192 trinucleotide mutation occurrences
 2 | #'
 3 | #'  @details
 4 | #'  This function is called by mut_matrix_stranded.
 5 | #'  The 192 trinucleotide context is the 96 trinucleotide context combined with the strands.
 6 | #'  This function calculates the 192 trinucleotide context for all variants.
 7 | #'  and then splits these per GRanges (samples). It then calculates how often each 192 trinucleotide context occurs.
 8 | #'
 9 | #' @param type_context result from type_context function
10 | #' @param strand factor with strand information for each
11 | #' position, for example "U" for untranscribed, "T" for transcribed strand,
12 | #' and "-" for unknown
13 | #' @param gr_sizes A vector indicating the number of variants per GRanges
14 | #'
15 | #' @importFrom magrittr %>%
16 | #'
17 | #' @return Mutation matrix with 192 mutation occurrences and 96 trinucleotides
18 | #' for two strands
19 | 
20 | mut_192_occurrences <- function(type_context, strand, gr_sizes) {
21 |   # get possible strand values
22 |   values <- levels(strand)
23 | 
24 |   idx1 <- which(strand == values[1])
25 |   idx2 <- which(strand == values[2])
26 | 
27 |   # get type context for both vcf subsets
28 |   type_context_1 <- purrr::map(type_context, function(x) x[idx1])
29 |   type_context_2 <- purrr::map(type_context, function(x) x[idx2])
30 | 
31 |   # Subset the gr_sizes.
32 |   sample_vector <- rep(names(gr_sizes), gr_sizes) %>%
33 |     factor(levels = names(gr_sizes))
34 |   table_vector_1 <- sample_vector[idx1] %>%
35 |     table()
36 |   gr_sizes_1 <- as.vector(table_vector_1)
37 |   names(gr_sizes_1) <- names(table_vector_1)
38 |   table_vector_2 <- sample_vector[idx2] %>%
39 |     table()
40 |   gr_sizes_2 <- as.vector(table_vector_2)
41 |   names(gr_sizes_2) <- names(table_vector_2)
42 | 
43 |   # make 96-trinucleotide count vector per set
44 |   mut_mat_1 <- mut_96_occurrences(type_context_1, gr_sizes_1)
45 |   mut_mat_2 <- mut_96_occurrences(type_context_2, gr_sizes_2)
46 | 
47 |   # add names
48 |   names_1 <- paste(rownames(mut_mat_1), values[1], sep = "-")
49 |   names_2 <- paste(rownames(mut_mat_2), values[2], sep = "-")
50 | 
51 |   # combine matrixes
52 |   mut_mat <- rbind(mut_mat_1, mut_mat_2)
53 |   rownames(mut_mat) <- c(names_1, names_2)
54 | 
55 |   # Reorder for backwards compatibility
56 |   reorder_i <- purrr::map2(
57 |     seq(1, nrow(mut_mat) / 2),
58 |     seq(
59 |       nrow(mut_mat) / 2 + 1,
60 |       nrow(mut_mat)
61 |     ),
62 |     c
63 |   ) %>%
64 |     unlist()
65 |   mut_mat <- mut_mat[reorder_i, , drop = FALSE]
66 | 
67 |   return(mut_mat)
68 | }
69 | 


--------------------------------------------------------------------------------
/man/extract_signatures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/extract_signatures.R
 3 | \name{extract_signatures}
 4 | \alias{extract_signatures}
 5 | \title{Extract mutational signatures from 96 mutation matrix using NMF}
 6 | \usage{
 7 | extract_signatures(
 8 |   mut_matrix,
 9 |   rank,
10 |   nrun = 200,
11 |   nmf_type = c("regular", "variational_bayes"),
12 |   single_core = FALSE,
13 |   fudge = NULL,
14 |   seed = 123456
15 | )
16 | }
17 | \arguments{
18 | \item{mut_matrix}{96 mutation count matrix}
19 | 
20 | \item{rank}{Number of signatures to extract}
21 | 
22 | \item{nrun}{Number of iterations, default = 200.
23 | A lower number will be faster, but result in less accurate results.}
24 | 
25 | \item{nmf_type}{Type of NMF to be used.
26 |              Possible values:
27 |              * 'regular'
28 |              * 'variational_bayes'
29 | The 'regular' method comes from the NMF package.
30 | The 'variational_bayes' method comes from the ccfindR package.
31 | This method uses bayesian inference, which makes it easier to determine the
32 | mathematically optimal number of signatures.}
33 | 
34 | \item{single_core}{Boolean. If TRUE, it forces the NMF algorithm to
35 | use only a single core. This can sometimes prevent issues.
36 | Doesn't apply to variational-bayes NMF}
37 | 
38 | \item{fudge}{Small positive number that is used for the variational_bayes NMF.
39 | Setting this to a small value like 0.0001 can prevent errors from occurring,
40 | when extracting many signatures at once. In general, we recommend extracting
41 | less signatures when errors occur, but this parameter can be used when that
42 | is not an option.
43 | Default = NULL.}
44 | 
45 | \item{seed}{Random seed used for the regular NMF, default = 123456}
46 | }
47 | \value{
48 | Named list of mutation matrix, signatures and signature contribution
49 | }
50 | \description{
51 | Decomposes trinucleotide count matrix into signatures and contribution of
52 | those signatures to the spectra of the samples/vcf files.
53 | }
54 | \examples{
55 | ## See the 'mut_matrix()' example for how we obtained the mutation matrix:
56 | mut_mat <- readRDS(system.file("states/mut_mat_data.rds",
57 |   package = "MutationalPatterns"
58 | ))
59 | 
60 | ## This function is computationally intensive.
61 | # nmf_res <- extract_signatures(mut_mat, rank = 2)
62 | 
63 | ## It's also possible to use a variational Bayes method.
64 | ## It requires the ccfindR package to work.
65 | # nmf_res <- extract_signatures(mut_mat, rank = 2, nmf_type = "variational_bayes")
66 | }
67 | \seealso{
68 | \code{\link{mut_matrix}}
69 | }
70 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(bin_mutation_density)
 4 | export(binomial_test)
 5 | export(calculate_lesion_segregation)
 6 | export(cluster_signatures)
 7 | export(context_potential_damage_analysis)
 8 | export(convert_sigs_to_ref)
 9 | export(cos_sim)
10 | export(cos_sim_matrix)
11 | export(count_dbs_contexts)
12 | export(count_indel_contexts)
13 | export(count_mbs_contexts)
14 | export(determine_regional_similarity)
15 | export(enrichment_depletion_test)
16 | export(extract_signatures)
17 | export(fit_to_signatures)
18 | export(fit_to_signatures_bootstrapped)
19 | export(fit_to_signatures_strict)
20 | export(genomic_distribution)
21 | export(get_dbs_context)
22 | export(get_indel_context)
23 | export(get_known_signatures)
24 | export(get_mut_type)
25 | export(get_sim_tb)
26 | export(lengthen_mut_matrix)
27 | export(merge_signatures)
28 | export(mut_context)
29 | export(mut_matrix)
30 | export(mut_matrix_stranded)
31 | export(mut_strand)
32 | export(mut_type)
33 | export(mut_type_occurrences)
34 | export(mutations_from_vcf)
35 | export(plot_192_profile)
36 | export(plot_96_profile)
37 | export(plot_bootstrapped_contribution)
38 | export(plot_compare_dbs)
39 | export(plot_compare_indels)
40 | export(plot_compare_mbs)
41 | export(plot_compare_profiles)
42 | export(plot_contribution)
43 | export(plot_contribution_heatmap)
44 | export(plot_correlation_bootstrap)
45 | export(plot_cosine_heatmap)
46 | export(plot_dbs_contexts)
47 | export(plot_enrichment_depletion)
48 | export(plot_indel_contexts)
49 | export(plot_lesion_segregation)
50 | export(plot_main_dbs_contexts)
51 | export(plot_main_indel_contexts)
52 | export(plot_mbs_contexts)
53 | export(plot_original_vs_reconstructed)
54 | export(plot_profile_heatmap)
55 | export(plot_profile_region)
56 | export(plot_rainfall)
57 | export(plot_regional_similarity)
58 | export(plot_river)
59 | export(plot_signature_strand_bias)
60 | export(plot_spectrum)
61 | export(plot_spectrum_region)
62 | export(plot_strand)
63 | export(plot_strand_bias)
64 | export(pool_mut_mat)
65 | export(read_vcfs_as_granges)
66 | export(rename_nmf_signatures)
67 | export(signature_potential_damage_analysis)
68 | export(split_muts_region)
69 | export(strand_bias_test)
70 | export(strand_occurrences)
71 | export(type_context)
72 | exportClasses(region_cossim)
73 | exportMethods(get_sim_tb)
74 | exportMethods(show)
75 | import(GenomicRanges)
76 | import(NMF)
77 | import(ggalluvial)
78 | import(ggplot2)
79 | importFrom(magrittr,"%>%")
80 | importFrom(methods,setClass)
81 | importFrom(methods,setGeneric)
82 | importFrom(methods,setMethod)
83 | importFrom(pracma,lsqnonneg)
84 | 


--------------------------------------------------------------------------------
/inst/extdata/empty.vcf:
--------------------------------------------------------------------------------
 1 | ##fileformat=VCFv4.1
 2 | ##SnpEffCmd="SnpEff  GRCh37.74 AC_vcf.filtered_variants.vcf -hgvs -lof -no-downstream -no-upstream -no-intergenic "
 3 | ##SnpEffVersion="4.1h (build 2015-08-03), by Pablo Cingolani"
 4 | ##SnpSiftCmd="SnpSift annotate -tabix -name GoNLv5_ -info AF,AN,AC /hpc/cog_bioinf/common_dbs/GoNL/gonl_release5/site_freqs/gonl.snps_indels.r5.sorted.vcf.gz AC_snpEff_snpSift_Cosmicv76.vcf.filtered_variants.vcf"
 5 | ##SnpSiftVersion="SnpSift 4.1h (build 2015-08-03), by Pablo Cingolani"
 6 | ##fileDate=20200528
 7 | ##reference=file:///hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta
 8 | ##source=SelectVariants
 9 | ##FILTER=All filters passed
10 | ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
11 | ##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
12 | ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
13 | ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
14 | ##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
15 | ##contig=<ID=1,length=249250621,assembly="hg19">
16 | ##contig=<ID=2,length=243199373,assembly="hg19">
17 | ##contig=<ID=3,length=198022430,assembly="hg19">
18 | ##contig=<ID=4,length=191154276,assembly="hg19">
19 | ##contig=<ID=5,length=180915260,assembly="hg19">
20 | ##contig=<ID=6,length=171115067,assembly="hg19">
21 | ##contig=<ID=7,length=159138663,assembly="hg19">
22 | ##contig=<ID=8,length=146364022,assembly="hg19">
23 | ##contig=<ID=9,length=141213431,assembly="hg19">
24 | ##contig=<ID=10,length=135534747,assembly="hg19">
25 | ##contig=<ID=11,length=135006516,assembly="hg19">
26 | ##contig=<ID=12,length=133851895,assembly="hg19">
27 | ##contig=<ID=13,length=115169878,assembly="hg19">
28 | ##contig=<ID=14,length=107349540,assembly="hg19">
29 | ##contig=<ID=15,length=102531392,assembly="hg19">
30 | ##contig=<ID=16,length=90354753,assembly="hg19">
31 | ##contig=<ID=17,length=81195210,assembly="hg19">
32 | ##contig=<ID=18,length=78077248,assembly="hg19">
33 | ##contig=<ID=19,length=59128983,assembly="hg19">
34 | ##contig=<ID=20,length=63025520,assembly="hg19">
35 | ##contig=<ID=21,length=48129895,assembly="hg19">
36 | ##contig=<ID=22,length=51304566,assembly="hg19">
37 | ##contig=<ID=X,length=155270560,assembly="hg19">
38 | ##contig=<ID=Y,length=59373566,assembly="hg19">
39 | ##contig=<ID=MT,length=16569,assembly="hg19">
40 | #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	ACGRABULK
41 | 


--------------------------------------------------------------------------------
/R/plot_mbs_contexts.R:
--------------------------------------------------------------------------------
 1 | #' Plot the MBS contexts
 2 | #'
 3 | #' @details
 4 | #' Plots the number of MBS per MBS length per sample.
 5 | #' It takes a tibble with counts as its input. This tibble can be generated by count_mbs_contexts
 6 | #' Each sample is plotted in a separate facet.
 7 | #' The same y axis can be used for all samples or a separate y axis can be used.
 8 | #'
 9 | #' @param counts A tibble containing the number of MBS per MBS length.
10 | #' @param same_y A boolean describing whether the same y axis should be used for all samples.
11 | #'
12 | #' @return A ggplot figure.
13 | #'
14 | #' @examples
15 | #' ## Get The mbs counts
16 | #' ## See 'count_mbs_contexts()' for more info on how to do this.
17 | #' mbs_counts <- readRDS(system.file("states/blood_mbs_counts.rds",
18 | #'   package = "MutationalPatterns"
19 | #' ))
20 | #'
21 | #' ## Plot contexts
22 | #' plot_mbs_contexts(mbs_counts)
23 | #'
24 | #' ## Use a different y axis for all samples.
25 | #' plot_mbs_contexts(mbs_counts, same_y = FALSE)
26 | #' @import ggplot2
27 | #' @importFrom magrittr %>%
28 | #' @family MBS
29 | #'
30 | #' @seealso \code{\link{count_mbs_contexts}}
31 | #'
32 | #' @export
33 | plot_mbs_contexts <- function(counts, same_y = TRUE) {
34 | 
35 |   # These variables use non standard evaluation.
36 |   # To avoid R CMD check complaints we initialize them to NULL.
37 |   count <- size <- NULL
38 | 
39 |   # Make data long
40 |   counts <- counts %>%
41 |     as.data.frame() %>%
42 |     tibble::rownames_to_column("size") %>%
43 |     tidyr::pivot_longer(-size, names_to = "sample", values_to = "count") %>%
44 |     dplyr::mutate(
45 |       size = factor(size, levels = unique(size)),
46 |       sample = factor(sample, levels = unique(sample))
47 |     )
48 | 
49 |   # Count nr. muts
50 |   nr_muts <- counts %>%
51 |     dplyr::group_by(sample) %>%
52 |     dplyr::summarise(nr_muts = round(sum(count)))
53 | 
54 |   # Create facets
55 |   if (same_y) {
56 |     facet_scale <- "fixed"
57 |   } else {
58 |     facet_scale <- "free_y"
59 |   }
60 | 
61 |   facet_labs_y <- stringr::str_c(nr_muts$sample, " (n = ", nr_muts$nr_muts, ")")
62 |   names(facet_labs_y) <- nr_muts$sample
63 | 
64 |   # Create plot
65 |   fig <- ggplot(counts, aes(x = size, y = count, fill = size)) +
66 |     geom_bar(stat = "identity") +
67 |     facet_grid(sample ~ .,
68 |       scales = facet_scale,
69 |       labeller = labeller(sample = facet_labs_y)
70 |     ) +
71 |     scale_fill_manual(values = MBS_COLORS) +
72 |     labs(x = "MBS size", y = "Nr. of MBSs") +
73 |     guides(fill = "none") +
74 |     theme_bw() +
75 |     theme(
76 |       panel.grid.major.x = element_blank(),
77 |       panel.grid.minor.y = element_blank(),
78 |     )
79 |   return(fig)
80 | }
81 | 


--------------------------------------------------------------------------------
/man/plot_compare_dbs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_compare_dbs.R
 3 | \name{plot_compare_dbs}
 4 | \alias{plot_compare_dbs}
 5 | \title{Compare two DBS mutation profiles}
 6 | \usage{
 7 | plot_compare_dbs(
 8 |   profile1,
 9 |   profile2,
10 |   profile_names = c("profile 1", "profile 2"),
11 |   profile_ymax = 0.2,
12 |   diff_ylim = c(-0.1, 0.1)
13 | )
14 | }
15 | \arguments{
16 | \item{profile1}{First mutation profile}
17 | 
18 | \item{profile2}{Second mutation profile}
19 | 
20 | \item{profile_names}{Character vector with names of the mutations profiles
21 | used for plotting, default = c("profile 1", "profile 2")}
22 | 
23 | \item{profile_ymax}{Maximum value of y-axis (relative contribution) for
24 | profile plotting. This can only be used to increase the y axis.
25 | If bars fall outside this limit, the maximum value is
26 | automatically increased. default = 0.2.}
27 | 
28 | \item{diff_ylim}{Y-axis limits for profile difference plot,
29 | default = c(-0.1, 0.1)}
30 | }
31 | \value{
32 | A ggplot2 object
33 | }
34 | \description{
35 | Plots two DBS mutation profiles and their difference, reports the residual
36 | sum of squares (RSS).
37 | }
38 | \examples{
39 | 
40 | ## Get the DBS counts
41 | ## See 'count_dbs_contexts()' for more info on how to do this.
42 | dbs_counts <- readRDS(system.file("states/blood_dbs_counts.rds",
43 |   package = "MutationalPatterns"
44 | ))
45 | 
46 | ## Get DBS refit info.
47 | ## See 'fit_to_signatures()' for more info on how to do this.
48 | fit_res <- readRDS(system.file("states/dbs_refit.rds",
49 |   package = "MutationalPatterns"
50 | ))
51 | 
52 | ## Compare the reconstructed profile of sample 1 with the original profile
53 | ## The same thing could be done with a reconstructed profile from NMF.
54 | plot_compare_dbs(dbs_counts[, 1], fit_res$reconstructed[, 1])
55 | 
56 | ## You could also compare regular mutation profiles with eachother.
57 | plot_compare_dbs(
58 |   dbs_counts[, 1],
59 |   dbs_counts[, 2]
60 | )
61 | 
62 | ## Or change the names of the profiles
63 | plot_compare_dbs(dbs_counts[, 1],
64 |   dbs_counts[, 2],
65 |   profile_names = c("Original", "Reconstructed")
66 | )
67 | 
68 | ## You can also change the y limits.
69 | ## This can be done separately for the profiles and the different facets.
70 | plot_compare_dbs(dbs_counts[, 1],
71 |   dbs_counts[, 2],
72 |   profile_ymax = 0.3,
73 |   diff_ylim = c(-0.03, 0.03)
74 | )
75 | }
76 | \seealso{
77 | \code{\link{plot_compare_profiles}},
78 | \code{\link{plot_compare_indels}},
79 | \code{\link{plot_compare_mbs}}
80 | 
81 | Other DBS: 
82 | \code{\link{count_dbs_contexts}()},
83 | \code{\link{get_dbs_context}()},
84 | \code{\link{plot_dbs_contexts}()},
85 | \code{\link{plot_main_dbs_contexts}()}
86 | }
87 | \concept{DBS}
88 | 


--------------------------------------------------------------------------------
/man/plot_rainfall.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_rainfall.R
 3 | \name{plot_rainfall}
 4 | \alias{plot_rainfall}
 5 | \title{Plot genomic rainfall}
 6 | \usage{
 7 | plot_rainfall(
 8 |   vcf,
 9 |   chromosomes,
10 |   title = "",
11 |   colors = NA,
12 |   cex = 2.5,
13 |   cex_text = 3,
14 |   ylim = 1e+08,
15 |   type = c("snv", "indel", "dbs", "mbs")
16 | )
17 | }
18 | \arguments{
19 | \item{vcf}{GRanges object}
20 | 
21 | \item{chromosomes}{Vector of chromosome/contig names of the reference
22 | genome to be plotted}
23 | 
24 | \item{title}{Optional plot title}
25 | 
26 | \item{colors}{Vector of 6 colors used for plotting}
27 | 
28 | \item{cex}{Point size}
29 | 
30 | \item{cex_text}{Text size}
31 | 
32 | \item{ylim}{Maximum y value (genomic distance)}
33 | 
34 | \item{type}{The mutation type of the GRanges object that will be used.
35 | Possible values:
36 | * 'snv' (default)
37 | * 'indel'
38 | * 'dbs'
39 | * 'mbs'}
40 | }
41 | \value{
42 | Rainfall plot
43 | }
44 | \description{
45 | Rainfall plot visualizes the types of mutations and intermutation distance
46 | }
47 | \details{
48 | Rainfall plots can be used to visualize the distribution of mutations
49 | along the genome or a subset of chromosomes. The distance of a mutation
50 | with the mutation prior to it (the intermutation distance) is plotted on
51 | the y-axis on a log scale. The input GRanges are sorted before plotting.
52 | 
53 | The colour of the points indicates the base substitution type.
54 | Clusters of mutations with lower intermutation distance represent mutation
55 | hotspots.
56 | }
57 | \examples{
58 | ## See the 'read_vcfs_as_granges()' example for how we obtained the
59 | ## following data:
60 | vcfs <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
61 |   package = "MutationalPatterns"
62 | ))
63 | 
64 | # Specify chromosomes of interest.
65 | chromosomes <- names(genome(vcfs[[1]])[1:22])
66 | 
67 | ## Do a rainfall plot for all chromosomes:
68 | plot_rainfall(vcfs[[1]],
69 |   title = names(vcfs[1]),
70 |   chromosomes = chromosomes,
71 |   cex = 1
72 | )
73 | 
74 | ## Or for a single chromosome (chromosome 1):
75 | plot_rainfall(vcfs[[1]],
76 |   title = names(vcfs[1]),
77 |   chromosomes = chromosomes[1],
78 |   cex = 2
79 | )
80 | 
81 | ## You can also use other variant types
82 | 
83 | ## Get a GRangesList or GRanges object with indel contexts.
84 | ## See 'indel_get_context' for more info on how to do this.
85 | grl_indel_context <- readRDS(system.file("states/blood_grl_indel_context.rds",
86 |   package = "MutationalPatterns"
87 | ))
88 | 
89 | plot_rainfall(grl_indel_context[[1]],
90 |   title = "Indel rainfall",
91 |   chromosomes,
92 |   type = "indel"
93 | )
94 | 
95 | }
96 | \seealso{
97 | \code{\link{read_vcfs_as_granges}}
98 | }
99 | 


--------------------------------------------------------------------------------
/man/rename_nmf_signatures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rename_nmf_signatures.R
 3 | \name{rename_nmf_signatures}
 4 | \alias{rename_nmf_signatures}
 5 | \title{Rename NMF signatures based on previously defined signatures}
 6 | \usage{
 7 | rename_nmf_signatures(
 8 |   nmf_res,
 9 |   signatures,
10 |   cutoff = 0.85,
11 |   base_name = "SBS",
12 |   suffix = "-like"
13 | )
14 | }
15 | \arguments{
16 | \item{nmf_res}{Named list of mutation matrix, signatures and signature contribution}
17 | 
18 | \item{signatures}{A signature matrix}
19 | 
20 | \item{cutoff}{Cutoff at which signatures are considered similar. Default: 0.85}
21 | 
22 | \item{base_name}{The base part of a letter based signature name. Default: "SBS"}
23 | 
24 | \item{suffix}{String. The suffix added to the name of a renamed signature. Default: "-like"}
25 | }
26 | \value{
27 | A nmf_res with changed signature names
28 | }
29 | \description{
30 | This function renames signatures identified with NMF based on previously defined signatures.
31 | If a NMF signature has a cosine similarity with a previously defined signature,
32 | that is higher than the cutoff, then this NMF signature will get the name
33 | of the previously defined signature. If not the NMF signature will receive a letter based name.
34 | For example: SBSA.
35 | This only changes the names of signatures, not their actual values.
36 | This function can be help with identifying whether signatures found with NMF are already known,
37 | which can be useful for interpretation.
38 | An extracted signature that is not similar to any previously defined signatures,
39 | is not proof of a "novel" signature. The extracted signature might be a
40 | combination of known signatures, that could not be split by NMF. This can happen
41 | when, for example, too few samples were used for the NMF.
42 | }
43 | \examples{
44 | 
45 | ## Extracting signatures can be computationally intensive, so
46 | ## we use pre-computed data generated with the following command:
47 | # nmf_res <- extract_signatures(mut_mat, rank = 2)
48 | 
49 | nmf_res <- readRDS(system.file("states/nmf_res_data.rds",
50 |   package = "MutationalPatterns"
51 | ))
52 | 
53 | ## Get signatures
54 | signatures <- get_known_signatures()
55 | 
56 | rename_nmf_signatures(nmf_res, signatures)
57 | 
58 | ## You can change or remove the suffix of the renamed signatures.
59 | rename_nmf_signatures(nmf_res, signatures, suffix = "")
60 | 
61 | ## You can change how similar the signatures have to be, before they are considered similar.
62 | rename_nmf_signatures(nmf_res, signatures, cutoff = 0.95)
63 | 
64 | ## You can also change the base_name of the signatures that end up with a letter name.
65 | rename_nmf_signatures(nmf_res, signatures, cutoff = 0.95, base_name = "Signature_")
66 | }
67 | 


--------------------------------------------------------------------------------
/tests/testthat/test-mut_matrix_stranded.R:
--------------------------------------------------------------------------------
 1 | context("test-mut_matrix_stranded")
 2 | 
 3 | 
 4 | # To test mut_matrix, we need to load the reference genome and the genes first.
 5 | ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
 6 | library(ref_genome, character.only = TRUE)
 7 | library("TxDb.Hsapiens.UCSC.hg19.knownGene")
 8 | 
 9 | # Test that the function works with default arguments
10 | genes_hg19 <- genes(TxDb.Hsapiens.UCSC.hg19.knownGene)
11 | input <- readRDS(system.file("states/read_vcfs_as_granges_output.rds",
12 |   package = "MutationalPatterns"
13 | ))
14 | expected <- readRDS(system.file("states/mut_mat_s_data.rds",
15 |   package = "MutationalPatterns"
16 | ))
17 | 
18 | test_that("transforms correctly", {
19 |   output <- mut_matrix_stranded(input, ref_genome, ranges = genes_hg19)
20 |   expect_equal(output, expected)
21 | })
22 | 
23 | # Test that a list is an acceptable input
24 | test_that("a list is also acceptable input", {
25 |   output <- mut_matrix_stranded(input, ref_genome, ranges = genes_hg19)
26 |   output_list <- mut_matrix_stranded(as.list(input), ref_genome, ranges = genes_hg19)
27 | 
28 |   expect_equal(output_list, output)
29 |   expect_equal(output_list, expected)
30 | })
31 | 
32 | # A single sample can be used as input.
33 | test_that("A single GR can also be used as input", {
34 |   output_singlesample <- mut_matrix_stranded(input[[1]], ref_genome, ranges = genes_hg19)
35 |   expect_true(inherits(output_singlesample, "matrix"))
36 |   expect_equal(dim(output_singlesample), c(192, 1))
37 | })
38 | 
39 | # seqlevels genes need to match the input
40 | genes_badseqlevel <- genes_hg19
41 | seqlevels(genes_badseqlevel)[1] <- "chrtest"
42 | test_that("A single GR can also be used as input", {
43 |   expect_error(
44 |     {
45 |       mut_matrix_stranded(input[[1]], ref_genome, ranges = genes_badseqlevel)
46 |     },
47 |     "Chromosome names \\(seqlevels\\) of vcf and genes Granges object do not match"
48 |   )
49 | })
50 | 
51 | 
52 | # Test replication mode
53 | repli_strand_granges <- readRDS(system.file("states/repli_strand.rds",
54 |   package = "MutationalPatterns"
55 | ))
56 | expected_repli <- readRDS(system.file("states/mut_mat_repli.rds",
57 |   package = "MutationalPatterns"
58 | ))
59 | 
60 | test_that("replication mode transforms correctly", {
61 |   mut_mat_repli <- mut_matrix_stranded(input, ref_genome, repli_strand_granges, mode = "replication")
62 |   expect_equal(mut_mat_repli, expected_repli)
63 | })
64 | 
65 | 
66 | # Test longer context
67 | output_longer <- mut_matrix_stranded(input, ref_genome, ranges = genes_hg19, extension = 2)
68 | 
69 | test_that("Output has correct class", {
70 |   expect_true(inherits(output_longer, "matrix"))
71 | })
72 | 
73 | test_that("Output has correct dimensions", {
74 |   expect_equal(dim(output_longer), c(3072, 9))
75 | })
76 | 


--------------------------------------------------------------------------------