├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
├── RcppExports.R
├── cluster.R
├── complement.R
├── intersect.R
├── join_closest.R
└── subtract.R
├── README.md
├── _pkgdown.yml
├── cran-comments.md
├── docs
├── articles
│ ├── index.html
│ ├── intro.html
│ └── resources
│ │ ├── genome_cluster_docu.png
│ │ ├── genome_complement_docu.png
│ │ ├── genome_intersect_docu.png
│ │ ├── genome_join_closest_docu.png
│ │ ├── genome_join_docu.png
│ │ └── genome_subtract_docu.png
├── authors.html
├── docsearch.css
├── docsearch.js
├── index.html
├── link.svg
├── news
│ └── index.html
├── pkgdown.css
├── pkgdown.js
├── pkgdown.yml
└── reference
│ ├── cluster_interval.html
│ ├── genome_cluster.html
│ ├── genome_complement.html
│ ├── genome_intersect.html
│ ├── genome_join_closest.html
│ ├── genome_subtract.html
│ └── index.html
├── man
├── cluster_interval.Rd
├── genome_cluster.Rd
├── genome_complement.Rd
├── genome_intersect.Rd
├── genome_join_closest.Rd
└── genome_subtract.Rd
├── src
├── .gitignore
├── RcppExports.cpp
├── cluster_interval.cpp
└── tidygenomics_init.c
├── tests
├── testthat.R
└── testthat
│ ├── test_cluster.R
│ ├── test_complement.R
│ ├── test_intersect.R
│ ├── test_issue.R
│ ├── test_join_closest.R
│ └── test_subtract.R
├── tidygenomics.Rproj
└── vignettes
├── intro.Rmd
└── resources
├── genome_cluster_docu.png
├── genome_complement_docu.png
├── genome_intersect_docu.png
├── genome_join_closest_docu.png
├── genome_join_docu.png
└── genome_subtract_docu.png
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | cran-comments.md
4 | ^_pkgdown\.yml$
5 | ^docs$
6 | ^pkgdown$
7 | ^CRAN-RELEASE$
8 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | inst/doc
6 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: tidygenomics
2 | Type: Package
3 | Title: Tidy Verbs for Dealing with Genomic Data Frames
4 | Version: 0.1.2
5 | Authors@R: c(person("Constantin", "Ahlmann-Eltze", email = "artjom31415@googlemail.com", role = c("aut", "cre"),
6 | comment = c(ORCID = "0000-0002-3762-068X")),
7 | person("Stan Developers", role="cph",
8 | comment="Code from the Stan Math library is reused in 'cluster_interval.cpp'"),
9 | person("David", "Robinson", role="cph",
10 | comment="Code from the fuzzyjoin package is reused"))
11 | Description: Handle genomic data within data frames just as you would with 'GRanges'.
12 | This packages provides method to deal with genomic intervals the "tidy-way" which makes
13 | it simpler to integrate in the the general data munging process. The API is inspired by the
14 | popular 'bedtools' and the genome_join() method from the 'fuzzyjoin' package.
15 | URL: https://github.com/const-ae/tidygenomics
16 | License: GPL-3
17 | Encoding: UTF-8
18 | LazyData: true
19 | Imports:
20 | dplyr,
21 | rlang,
22 | purrr,
23 | tidyr,
24 | fuzzyjoin (>= 0.1.3),
25 | IRanges,
26 | Rcpp
27 | Suggests: testthat,
28 | knitr,
29 | rmarkdown
30 | RoxygenNote: 6.1.1
31 | LinkingTo:
32 | Rcpp
33 | VignetteBuilder: knitr
34 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(cluster_interval)
4 | export(genome_anti_join_closest)
5 | export(genome_cluster)
6 | export(genome_complement)
7 | export(genome_full_join_closest)
8 | export(genome_inner_join_closest)
9 | export(genome_intersect)
10 | export(genome_join_closest)
11 | export(genome_left_join_closest)
12 | export(genome_right_join_closest)
13 | export(genome_semi_join_closest)
14 | export(genome_subtract)
15 | importFrom(Rcpp,sourceCpp)
16 | importFrom(dplyr,"%>%")
17 | importFrom(dplyr,"n")
18 | importFrom(rlang,":=")
19 | importFrom(rlang,"sym")
20 | useDynLib(tidygenomics, .registration = TRUE)
21 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 |
2 | # v0.1.2
3 |
4 | * Remove remaining calls to deprecated tidyr functions to become compatible with tidyr v1.0.0.
5 | Thanks to @jennybc for the pull request (#6)
6 |
7 | # v0.1.1
8 |
9 | * Fix issue #5
10 | - the genome_cluster method assigned all chunks to cluster zero if their end was smaller
11 | than the end of the first entry
12 |
13 | * Port dplyr calls to new tidyeval API
14 | - This avoids plenty of deprecation warnings
15 |
16 | * Add pkgdown webpage: https://const-ae.github.io/tidygenomics/
17 |
18 | # Initial Release (v0.1.0)
19 |
20 | First acceptance on CRAN
21 |
--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 |
4 | sort_indices <- function(x) {
5 | .Call(`_tidygenomics_sort_indices`, x)
6 | }
7 |
8 | #' Cluster ranges which are implemented as 2 equal-length numeric vectors.
9 | #' @param starts A numeric vector that defines the starts of each interval
10 | #' @param ends A numeric vector that defines the ends of each interval
11 | #' @param max_distance The maximum distance up to which intervals are still considered to be
12 | #' the same cluster. Default: 0.
13 | #' @examples
14 | #' starts <- c(50, 100, 120)
15 | #' ends <- c(75, 130, 150)
16 | #' j <- cluster_interval(starts, ends)
17 | #' j == c(0,1,1)
18 | #' @export
19 | cluster_interval <- function(starts, ends, max_distance = 0L) {
20 | .Call(`_tidygenomics_cluster_interval`, starts, ends, max_distance)
21 | }
22 |
23 |
--------------------------------------------------------------------------------
/R/cluster.R:
--------------------------------------------------------------------------------
1 |
2 | #' @useDynLib tidygenomics, .registration = TRUE
3 | #' @importFrom Rcpp sourceCpp
4 | NULL
5 |
6 | .onUnload <- function (libpath) {
7 | library.dynam.unload("tidygenomics", libpath)
8 | }
9 |
10 | #' Intersect data frames based on chromosome, start and end.
11 | #'
12 | #' @param x A dataframe.
13 | #' @param by A character vector with 3 entries which are the chromosome, start and end column.
14 | #' For example: \code{by=c("chr", "start", "end")}
15 | #' @param max_distance The maximum distance up to which intervals are still considered to be
16 | #' the same cluster. Default: 0.
17 | #' @param cluster_column_name A string that is used as the new column name
18 | #' @return The dataframe with the additional column of the cluster
19 | #' @examples
20 | #'
21 | #' library(dplyr)
22 | #'
23 | #' x1 <- data.frame(id = 1:4, bla=letters[1:4],
24 | #' chromosome = c("chr1", "chr1", "chr2", "chr1"),
25 | #' start = c(100, 120, 300, 260),
26 | #' end = c(150, 250, 350, 450))
27 | #' genome_cluster(x1, by=c("chromosome", "start", "end"))
28 | #' genome_cluster(x1, by=c("chromosome", "start", "end"), max_distance=10)
29 | #' @export
30 | genome_cluster <- function(x, by=NULL, max_distance=0, cluster_column_name="cluster_id"){
31 |
32 | x_groups <- dplyr::groups(x)
33 | x <- dplyr::ungroup(x)
34 | regroup <- function(d) {
35 | if (is.null(x_groups)) {
36 | return(d)
37 | }
38 | g <- purrr::map_chr(x_groups, as.character)
39 | missing <- !(g %in% colnames(d))
40 | g[missing] <- paste0(g[missing], ".x")
41 | dplyr::group_by_(d, .dots = g)
42 | }
43 |
44 | if (is.null(by) | length(by) != 3) {
45 | stop("genome_cluster must join on exactly three columns")
46 | }
47 |
48 | ret <- x %>%
49 | dplyr::group_by(!!sym(by[1])) %>%
50 | dplyr::mutate(!! cluster_column_name := cluster_interval(!!sym(by[2]), !!sym(by[3]), max_distance = max_distance)) %>%
51 | dplyr::ungroup() %>%
52 | dplyr::mutate(!! cluster_column_name := as.numeric(as.factor(paste0(!!sym(by[1]), "-", !!sym(cluster_column_name))))-1)
53 |
54 | ret <- regroup(ret)
55 | return(ret)
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/R/complement.R:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | #' Calculates the complement to the intervals covered by the intervals in
6 | #' a data frame. It can optionally take a \code{chromosome_size} data frame
7 | #' that contains 2 or 3 columns, the first the names of chromosome and in case
8 | #' there are 2 columns the size or first the start index and lastly the end index
9 | #' on the chromosome.
10 | #' @param x A data frame for which the complement is calculated
11 | #' @param chromosome_size A dataframe with at least 2 columns that contains
12 | #' first the chromosome name and then the size of that chromosome. Can be NULL
13 | #' in which case the largest value per chromosome from \code{x} is used.
14 | #' @param by A character vector with 3 entries which are the chromosome, start and end column.
15 | #' For example: \code{by=c("chr", "start", "end")}
16 | #' @examples
17 | #'
18 | #' library(dplyr)
19 | #'
20 | #' x1 <- data.frame(id = 1:4, bla=letters[1:4],
21 | #' chromosome = c("chr1", "chr1", "chr2", "chr1"),
22 | #' start = c(100, 200, 300, 400),
23 | #' end = c(150, 250, 350, 450))
24 | #'
25 | #' genome_complement(x1, by=c("chromosome", "start", "end"))
26 | #' @export
27 | genome_complement <- function(x, chromosome_size=NULL, by=NULL){
28 |
29 | if (is.null(by) | length(by) != 3) {
30 | stop("genome_complement must work on exactly three columns")
31 | }
32 |
33 |
34 | if(is.null(chromosome_size)){
35 | chromosome_size <- x %>%
36 | dplyr::group_by(!! sym(by[1])) %>%
37 | dplyr::summarize(start = 1,
38 | end = max(!! sym(by[3])))
39 | }else if(ncol(chromosome_size) == 2){
40 | chromosome_size <- cbind(chromosome_size[, 1, drop=FALSE], data.frame(start=1), chromosome_size[, -1, drop=FALSE])
41 | }
42 |
43 | colnames(chromosome_size)[1:3] <- by
44 |
45 | chromosome_size %>%
46 | genome_subtract(x, by=by)
47 | }
48 |
--------------------------------------------------------------------------------
/R/intersect.R:
--------------------------------------------------------------------------------
1 |
2 | #' @importFrom dplyr "%>%" "n"
3 | #' @importFrom rlang "sym" ":="
4 | NULL
5 |
6 |
7 | ## quiets concerns of R CMD check re: the .'s that appear in pipelines
8 | if(getRversion() >= "2.15.1") utils::globalVariables(c("..start", "..end", "..id", "..distance"))
9 |
10 |
11 |
12 | #' Intersect data frames based on chromosome, start and end.
13 | #'
14 | #' @param x A dataframe.
15 | #' @param y A dataframe.
16 | #' @param by A character vector with 3 entries which are used to match the chromosome, start and end column.
17 | #' For example: \code{by=c("Chromosome"="chr", "Start"="start", "End"="end")}
18 | #' @param mode One of "both", "left", "right" or "anti".
19 | #' @return The intersected dataframe of \code{x} and \code{y} with the new boundaries.
20 | #' @examples
21 | #'
22 | #' library(dplyr)
23 | #'
24 | #' x1 <- data.frame(id = 1:4, bla=letters[1:4],
25 | #' chromosome = c("chr1", "chr1", "chr2", "chr2"),
26 | #' start = c(100, 200, 300, 400),
27 | #' end = c(150, 250, 350, 450))
28 | #'
29 | #' x2 <- data.frame(id = 1:4, BLA=LETTERS[1:4],
30 | #' chromosome = c("chr1", "chr2", "chr2", "chr1"),
31 | #' start = c(140, 210, 400, 300),
32 | #' end = c(160, 240, 415, 320))
33 | #' j <- genome_intersect(x1, x2, by=c("chromosome", "start", "end"), mode="both")
34 | #' print(j)
35 | #'
36 | #'
37 | #'
38 | #' @export
39 | genome_intersect <- function(x, y, by=NULL, mode= "both"){
40 |
41 | # Much of this code is copied from https://github.com/dgrtwo/fuzzyjoin
42 |
43 | x_groups <- dplyr::groups(x)
44 | x <- dplyr::ungroup(x)
45 | regroup <- function(d) {
46 | if (is.null(x_groups)) {
47 | return(d)
48 | }
49 | g <- purrr::map_chr(x_groups, as.character)
50 | missing <- !(g %in% colnames(d))
51 | g[missing] <- paste0(g[missing], ".x")
52 | dplyr::group_by_(d, .dots = g)
53 | }
54 |
55 | mode <- match.arg(mode, c("both", "left", "right", "anti"))
56 |
57 | by <- dplyr::common_by(by, x, y)
58 |
59 | if (length(by$x) != 3) {
60 | stop("genome_join must join on exactly three columns")
61 | }
62 |
63 |
64 | index_match_fun <- function(x,y){
65 | # nest around the chromosome column
66 | x$..index <- seq_len(nrow(x))
67 | y$..index <- seq_len(nrow(y))
68 | nested_x <- dplyr::group_by_at(x, 1) %>% tidyr::nest()
69 | nested_y <- dplyr::group_by_at(y, 1) %>% tidyr::nest()
70 | by <- c(colnames(nested_y)[1])
71 | names(by) <- colnames(nested_x)[1]
72 |
73 | joined <- dplyr::inner_join(nested_x, nested_y, by = by)
74 |
75 | # find matching ranges in each
76 | find_overlaps <- function(xd, yd) {
77 | r1 <- IRanges::IRanges(xd[[1]], xd[[2]])
78 | r2 <- IRanges::IRanges(yd[[1]], yd[[2]])
79 | o <- as.data.frame(IRanges::findOverlaps(r1, r2))
80 | intersection <- IRanges::pintersect(r1[o$queryHits], r2[o$subjectHits])
81 | data.frame(x = xd$..index[o$queryHits], y = yd$..index[o$subjectHits],
82 | ..start=IRanges::start(intersection), ..end=IRanges::end(intersection))
83 | }
84 |
85 | ret <- purrr::map2_df(joined$data.x, joined$data.y, find_overlaps)
86 | ret
87 | }
88 |
89 | d1 <- x[, by$x, drop = FALSE]
90 | d2 <- y[, by$y, drop = FALSE]
91 | matches <- index_match_fun(d1, d2)
92 |
93 | matches$i <- NULL
94 | if (mode == "anti") {
95 | if (nrow(matches) == 0) {
96 | return(regroup(x))
97 | }
98 | return(regroup(x[-sort(unique(matches$x)), ]))
99 | }
100 | if (mode == "left") {
101 | ret <- x %>%
102 | dplyr::select(- dplyr::one_of(by$x[-1])) %>%
103 | dplyr::mutate(..id=seq_len(n())) %>%
104 | dplyr::inner_join(matches[, c("x", "..start", "..end")], by=c("..id"="x")) %>%
105 | dplyr::rename(!! by$x[2] := `..start`, !! by$x[3] := `..end`) %>%
106 | dplyr::select(- `..id`) %>%
107 | regroup()
108 | return(ret)
109 | }
110 | else if (mode == "right") {
111 | ret <- y %>%
112 | dplyr::select(- dplyr::one_of(by$y[-1])) %>%
113 | dplyr::mutate(..id=seq_len(n())) %>%
114 | dplyr::inner_join(matches[,c("y", "..start", "..end")], by=c("..id"="y")) %>%
115 | dplyr::rename(!! by$y[2] := `..start`, !! by$y[3] := `..end`) %>%
116 | dplyr::select(- `..id`) %>%
117 | regroup()
118 | return(ret)
119 | }
120 |
121 | matches <- dplyr::arrange(matches, x, y)
122 | for (n in intersect(colnames(x), colnames(y))) {
123 | if(! n %in% by$x){
124 | x <- dplyr::rename(x, !! paste0(n, ".x") := !! sym(n))
125 | }
126 | if(! n %in% by$y){
127 | y <- dplyr::rename(y, !! paste0(n, ".y") := !! sym(n))
128 | }
129 | }
130 |
131 | ret <- dplyr::bind_cols(x[matches$x, , drop = FALSE] %>% dplyr::select(- dplyr::one_of(by$x[-1])),
132 | y[matches$y, , drop = FALSE] %>% dplyr::select(- dplyr::one_of(by$y)))
133 | if (ncol(matches) > 2) {
134 | extra_cols <- matches[, -(1:2), drop = FALSE]
135 | ret <- dplyr::bind_cols(ret, extra_cols) %>%
136 | dplyr::rename(!! by$x[2] := `..start`, !! by$x[3] := `..end`)
137 | }
138 | regroup(ret)
139 |
140 |
141 | }
142 |
--------------------------------------------------------------------------------
/R/join_closest.R:
--------------------------------------------------------------------------------
1 |
2 |
3 | #' Join intervals on chromosomes in data frames, to the closest partner
4 | #'
5 | #' @param x A dataframe.
6 | #' @param y A dataframe.
7 | #' @param by A character vector with 3 entries which are used to match the chromosome, start and end column.
8 | #' For example: \code{by=c("Chromosome"="chr", "Start"="start", "End"="end")}
9 | #' @param mode One of "inner", "full", "left", "right", "semi" or "anti".
10 | #' @param distance_column_name A string that is used as the new column name with the distance.
11 | #' If \code{NULL} no new column is added.
12 | #' @param max_distance The maximum distance that is allowed to join 2 entries.
13 | #' @param select A string that is passed on to \code{IRanges::distanceToNearest}, can either be
14 | #' all which means that in case that multiple intervals have the same distance all are reported, or
15 | #' arbitrary which means in that case one would be chosen at random.
16 | #' @param ... Additional arguments parsed on to genome_join_closest.
17 | #' @return The joined dataframe of \code{x} and \code{y}.
18 | #' @examples
19 | #'
20 | #' library(dplyr)
21 | #'
22 | #' x1 <- data.frame(id = 1:4, bla=letters[1:4],
23 | #' chromosome = c("chr1", "chr1", "chr2", "chr2"),
24 | #' start = c(100, 200, 300, 400),
25 | #' end = c(150, 250, 350, 450))
26 | #'
27 | #' x2 <- data.frame(id = 1:4, BLA=LETTERS[1:4],
28 | #' chromosome = c("chr1", "chr2", "chr2", "chr1"),
29 | #' start = c(140, 210, 400, 300),
30 | #' end = c(160, 240, 415, 320))
31 | #' j <- genome_intersect(x1, x2, by=c("chromosome", "start", "end"), mode="both")
32 | #' print(j)
33 | #' @export
34 | genome_join_closest <- function(x, y, by=NULL, mode = "inner",
35 | distance_column_name=NULL, max_distance=Inf, select="all"){
36 |
37 | # Nearly all of this code is copied from https://github.com/dgrtwo/fuzzyjoin
38 |
39 | if (!requireNamespace("IRanges", quietly = TRUE)) {
40 | stop("genome_join_closest requires the IRanges package: ",
41 | "https://bioconductor.org/packages/release/bioc/html/IRanges.html")
42 | }
43 |
44 | select <- match.arg(select, c("all", "arbitrary"))
45 |
46 | by <- dplyr::common_by(by, x, y)
47 | if (length(by$x) != 3) {
48 | stop("genome_join_closest must join on exactly three columns")
49 | }
50 |
51 | f <- function(x, y) {
52 | # nest around the chromosome column
53 | x$..index <- seq_len(nrow(x))
54 | y$..index <- seq_len(nrow(y))
55 |
56 | nested_x <- dplyr::group_by_at(x, 1) %>% tidyr::nest()
57 | nested_y <- dplyr::group_by_at(y, 1) %>% tidyr::nest()
58 | by <- c(colnames(nested_y)[1])
59 | names(by) <- colnames(nested_x)[1]
60 |
61 | joined <- dplyr::inner_join(nested_x, nested_y, by = by)
62 |
63 | # find matching ranges in each
64 | find_closest <- function(xd, yd) {
65 | r1 <- IRanges::IRanges(xd[[1]], xd[[2]])
66 | r2 <- IRanges::IRanges(yd[[1]], yd[[2]])
67 | o <- as.data.frame(IRanges::distanceToNearest(r1, r2, select=select))
68 |
69 | data.frame(x = xd$..index[o$queryHits], y = yd$..index[o$subjectHits], ..distance=o$distance) %>%
70 | dplyr::filter(`..distance` < max_distance)
71 | }
72 |
73 | ret <- purrr::map2_df(joined$data.x, joined$data.y, find_closest)
74 |
75 | if(! is.null(distance_column_name)){
76 | ret[[distance_column_name]] <- ret$..distance
77 | }
78 | ret$..distance <- NULL
79 |
80 | ret
81 | }
82 |
83 | fuzzyjoin::fuzzy_join(x, y, mode = mode, index_match_fun = f, multi_by = by)
84 |
85 | }
86 |
87 |
88 | #' @rdname genome_join_closest
89 | #' @export
90 | genome_inner_join_closest <- function(x, y, by = NULL, ...) {
91 | genome_join_closest (x, y, by, mode = "inner", ...)
92 | }
93 |
94 |
95 | #' @rdname genome_join_closest
96 | #' @export
97 | genome_left_join_closest <- function(x, y, by = NULL, ...) {
98 | genome_join_closest (x, y, by, mode = "left", ...)
99 | }
100 |
101 |
102 | #' @rdname genome_join_closest
103 | #' @export
104 | genome_right_join_closest <- function(x, y, by = NULL, ...) {
105 | genome_join_closest (x, y, by, mode = "right", ...)
106 | }
107 |
108 |
109 | #' @rdname genome_join_closest
110 | #' @export
111 | genome_full_join_closest <- function(x, y, by = NULL, ...) {
112 | genome_join_closest (x, y, by, mode = "full", ...)
113 | }
114 |
115 |
116 | #' @rdname genome_join_closest
117 | #' @export
118 | genome_semi_join_closest <- function(x, y, by = NULL, ...) {
119 | genome_join_closest (x, y, by, mode = "semi", ...)
120 | }
121 |
122 |
123 | #' @rdname genome_join_closest
124 | #' @export
125 | genome_anti_join_closest <- function(x, y, by = NULL, ...) {
126 | genome_join_closest (x, y, by, mode = "anti", ...)
127 | }
128 |
129 |
130 |
--------------------------------------------------------------------------------
/R/subtract.R:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | #' Subtract one data frame from another based on chromosome, start and end.
5 | #'
6 | #' @param x A dataframe.
7 | #' @param y A dataframe.
8 | #' @param by A character vector with 3 entries which are used to match the chromosome, start and end column.
9 | #' For example: \code{by=c("Chromosome"="chr", "Start"="start", "End"="end")}
10 | #' @return The subtracted dataframe of \code{x} and \code{y} with the new boundaries.
11 | #' @examples
12 | #'
13 | #' library(dplyr)
14 | #'
15 | #' x1 <- data.frame(id = 1:4, bla=letters[1:4],
16 | #' chromosome = c("chr1", "chr1", "chr2", "chr1"),
17 | #' start = c(100, 200, 300, 400),
18 | #' end = c(150, 250, 350, 450))
19 | #'
20 | #' x2 <- data.frame(id = 1:4, BLA=LETTERS[1:4],
21 | #' chromosome = c("chr1", "chr2", "chr1", "chr1"),
22 | #' start = c(120, 210, 300, 400),
23 | #' end = c(125, 240, 320, 415))
24 | #'
25 | #' j <- genome_subtract(x1, x2, by=c("chromosome", "start", "end"))
26 | #' print(j)
27 | #'
28 | #'
29 | #' @export
30 | genome_subtract <- function(x, y, by=NULL){
31 |
32 | # Much of this code is copied from https://github.com/dgrtwo/fuzzyjoin
33 |
34 | x_groups <- dplyr::groups(x)
35 | x <- dplyr::ungroup(x)
36 | regroup <- function(d) {
37 | if (is.null(x_groups)) {
38 | return(d)
39 | }
40 | g <- purrr::map_chr(x_groups, as.character)
41 | missing <- !(g %in% colnames(d))
42 | g[missing] <- paste0(g[missing], ".x")
43 | dplyr::group_by_(d, .dots = g)
44 | }
45 |
46 | by <- dplyr::common_by(by, x, y)
47 |
48 | if (length(by$x) != 3) {
49 | stop("genome_join must join on exactly three columns")
50 | }
51 |
52 |
53 | f <- function(x,y){
54 | # nest around the chromosome column
55 | x$..index <- seq_len(nrow(x))
56 | y$..index <- seq_len(nrow(y))
57 | nested_x <- dplyr::group_by_at(x, 1) %>% tidyr::nest()
58 | nested_y <- dplyr::group_by_at(y, 1) %>% tidyr::nest()
59 | by <- c(colnames(nested_y)[1])
60 | names(by) <- colnames(nested_x)[1]
61 |
62 | joined <- dplyr::inner_join(nested_x, nested_y, by = by)
63 |
64 | # find matching ranges in each
65 | find_subtractions <- function(xd, yd) {
66 | r1 <- IRanges::IRanges(xd[[1]], xd[[2]])
67 | r2 <- IRanges::IRanges(yd[[1]], yd[[2]])
68 |
69 | subtraction <- IRanges::setdiff(r1, r2)
70 |
71 | o <- as.data.frame(IRanges::findOverlaps(subtraction, r1))
72 | data.frame(x = xd$..index[o$subjectHits],
73 | ..start=pmax(IRanges::start(subtraction)[o$queryHits], IRanges::start(r1)[o$subjectHits]),
74 | ..end=pmin(IRanges::end(subtraction)[o$queryHits], IRanges::end(r1)[o$subjectHits]))
75 | }
76 |
77 | ret <- purrr::map2_df(joined$data.x, joined$data.y, find_subtractions)
78 | ret
79 | }
80 |
81 | d1 <- x[, by$x, drop = FALSE]
82 | d2 <- y[, by$y, drop = FALSE]
83 |
84 | matches <- f(d1, d2)
85 | ret <- x %>%
86 | dplyr::select(- dplyr::one_of(by$x[-1])) %>%
87 | dplyr::mutate(..id=seq_len(n())) %>%
88 | dplyr::inner_join(matches[, c("x", "..start", "..end")], by=c("..id"="x")) %>%
89 | dplyr::rename(!! by$x[2] := `..start`, !! by$x[3] := `..end`) %>%
90 | dplyr::select(- `..id`) %>%
91 | regroup()
92 | return(ret)
93 |
94 | }
95 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # tidygenomics
2 |
3 | [](https://cran.r-project.org/package=tidygenomics)
4 |
5 | Tidy Verbs for Dealing with Genomic Data Frames
6 |
7 | ## Description
8 |
9 | Handle genomic data within data frames just as you would with `GRanges`.
10 | This packages provides method to deal with genomics intervals the "tidy-way" which makes
11 | it simpler to integrate in the the general data munging process. The API is inspired by the
12 | popular bedtools and the genome_join() method from the fuzzyjoin package.
13 |
14 | ## Installation
15 |
16 | ```
17 | install.packages("tidygenomics")
18 | ```
19 |
20 | Or to get the latest development version
21 | ```
22 | devtools::install_github("const-ae/tidygenomics")
23 | ```
24 |
25 | ## Documentation
26 |
27 |
28 | #### genome_intersect
29 |
30 | Joins 2 data frames based on their genomic overlap. Unlike the `genome_join` function it updates the boundaries to reflect
31 | the overlap of the regions.
32 |
33 |
34 |
35 |
36 | ```{r}
37 | x1 <- data.frame(id = 1:4,
38 | chromosome = c("chr1", "chr1", "chr2", "chr2"),
39 | start = c(100, 200, 300, 400),
40 | end = c(150, 250, 350, 450))
41 |
42 | x2 <- data.frame(id = 1:4,
43 | chromosome = c("chr1", "chr2", "chr2", "chr1"),
44 | start = c(140, 210, 400, 300),
45 | end = c(160, 240, 415, 320))
46 |
47 | genome_intersect(x1, x2, by=c("chromosome", "start", "end"), mode="both")
48 | ```
49 |
50 | | id.x|chromosome | id.y| start| end|
51 | |----:|:----------|----:|-----:|---:|
52 | | 1|chr1 | 1| 140| 150|
53 | | 4|chr2 | 3| 400| 415|
54 |
55 | #### genome_subtract
56 |
57 | Subtracts one data frame from the other. This can be used to split the x data frame into smaller areas.
58 |
59 |
60 |
61 | ```{r}
62 | x1 <- data.frame(id = 1:4,
63 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
64 | start = c(100, 200, 300, 400),
65 | end = c(150, 250, 350, 450))
66 |
67 | x2 <- data.frame(id = 1:4,
68 | chromosome = c("chr1", "chr2", "chr1", "chr1"),
69 | start = c(120, 210, 300, 400),
70 | end = c(125, 240, 320, 415))
71 |
72 | genome_subtract(x1, x2, by=c("chromosome", "start", "end"))
73 | ```
74 |
75 | | id|chromosome | start| end|
76 | |--:|:----------|-----:|---:|
77 | | 1|chr1 | 100| 119|
78 | | 1|chr1 | 126| 150|
79 | | 2|chr1 | 200| 250|
80 | | 3|chr2 | 300| 350|
81 | | 4|chr1 | 416| 450|
82 |
83 |
84 | #### genome_join_closest
85 |
86 | Joins 2 data frames based on their genomic location. If no exact overlap is found the next closest interval is used.
87 |
88 |
89 |
90 | ```{r}
91 | x1 <- data_frame(id = 1:4,
92 | chr = c("chr1", "chr1", "chr2", "chr3"),
93 | start = c(100, 200, 300, 400),
94 | end = c(150, 250, 350, 450))
95 |
96 | x2 <- data_frame(id = 1:4,
97 | chr = c("chr1", "chr1", "chr1", "chr2"),
98 | start = c(220, 210, 300, 400),
99 | end = c(225, 240, 320, 415))
100 | genome_join_closest(x1, x2, by=c("chr", "start", "end"), distance_column_name="distance", mode="left")
101 | ```
102 |
103 | | id.x|chr.x | start.x| end.x| id.y|chr.y | start.y| end.y| distance|
104 | |----:|:-----|-------:|-----:|----:|:-----|-------:|-----:|--------:|
105 | | 1|chr1 | 100| 150| 2|chr1 | 210| 240| 59|
106 | | 2|chr1 | 200| 250| 1|chr1 | 220| 225| 0|
107 | | 2|chr1 | 200| 250| 2|chr1 | 210| 240| 0|
108 | | 3|chr2 | 300| 350| 4|chr2 | 400| 415| 49|
109 | | 4|chr3 | 400| 450| NA|NA | NA| NA| NA|
110 |
111 | #### genome_cluster
112 |
113 | Add a new column with the cluster if 2 intervals are overlapping or are within the `max_distance`.
114 |
115 |
116 |
117 | ```{r}
118 | x1 <- data.frame(id = 1:4, bla=letters[1:4],
119 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
120 | start = c(100, 120, 300, 260),
121 | end = c(150, 250, 350, 450))
122 | genome_cluster(x1, by=c("chromosome", "start", "end"))
123 | ```
124 |
125 | | id|bla |chromosome | start| end| cluster_id|
126 | |--:|:---|:----------|-----:|---:|----------:|
127 | | 1|a |chr1 | 100| 150| 0|
128 | | 2|b |chr1 | 120| 250| 0|
129 | | 3|c |chr2 | 300| 350| 2|
130 | | 4|d |chr1 | 260| 450| 1|
131 |
132 | ```{r}
133 | genome_cluster(x1, by=c("chromosome", "start", "end"), max_distance=10)
134 | ```
135 |
136 | | id|bla |chromosome | start| end| cluster_id|
137 | |--:|:---|:----------|-----:|---:|----------:|
138 | | 1|a |chr1 | 100| 150| 0|
139 | | 2|b |chr1 | 120| 250| 0|
140 | | 3|c |chr2 | 300| 350| 1|
141 | | 4|d |chr1 | 260| 450| 0|
142 |
143 | #### genome_complement
144 |
145 | Calculates the complement of a genomic region.
146 |
147 |
148 |
149 | ```{r}
150 | x1 <- data.frame(id = 1:4,
151 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
152 | start = c(100, 200, 300, 400),
153 | end = c(150, 250, 350, 450))
154 |
155 | genome_complement(x1, by=c("chromosome", "start", "end"))
156 | ```
157 |
158 | |chromosome | start| end|
159 | |:----------|-----:|---:|
160 | |chr1 | 1| 99|
161 | |chr1 | 151| 199|
162 | |chr1 | 251| 399|
163 | |chr2 | 1| 299|
164 |
165 |
166 | #### genome_join
167 |
168 | Classical join function based on the overlap of the interval. Implemented and maintained in the
169 | [fuzzyjoin](https://github.com/dgrtwo/fuzzyjoin) package and documented here only for completeness.
170 |
171 |
172 |
173 | ```{r}
174 | x1 <- data_frame(id = 1:4,
175 | chr = c("chr1", "chr1", "chr2", "chr3"),
176 | start = c(100, 200, 300, 400),
177 | end = c(150, 250, 350, 450))
178 |
179 | x2 <- data_frame(id = 1:4,
180 | chr = c("chr1", "chr1", "chr1", "chr2"),
181 | start = c(220, 210, 300, 400),
182 | end = c(225, 240, 320, 415))
183 | fuzzyjoin::genome_join(x1, x2, by=c("chr", "start", "end"), mode="inner")
184 | ```
185 |
186 | | id.x|chr.x | start.x| end.x| id.y|chr.y | start.y| end.y|
187 | |----:|:-----|-------:|-----:|----:|:-----|-------:|-----:|
188 | | 2|chr1 | 200| 250| 1|chr1 | 220| 225|
189 | | 2|chr1 | 200| 250| 2|chr1 | 210| 240|
190 |
191 | ```{r}
192 | fuzzyjoin::genome_join(x1, x2, by=c("chr", "start", "end"), mode="left")
193 | ```
194 |
195 | | id.x|chr.x | start.x| end.x| id.y|chr.y | start.y| end.y|
196 | |----:|:-----|-------:|-----:|----:|:-----|-------:|-----:|
197 | | 1|chr1 | 100| 150| NA|NA | NA| NA|
198 | | 2|chr1 | 200| 250| 1|chr1 | 220| 225|
199 | | 2|chr1 | 200| 250| 2|chr1 | 210| 240|
200 | | 3|chr2 | 300| 350| NA|NA | NA| NA|
201 | | 4|chr3 | 400| 450| NA|NA | NA| NA|
202 |
203 | ```{r}
204 | fuzzyjoin::genome_join(x1, x2, by=c("chr", "start", "end"), mode="anti")
205 | ```
206 |
207 | | id|chr | start| end|
208 | |--:|:----|-----:|---:|
209 | | 1|chr1 | 100| 150|
210 | | 3|chr2 | 300| 350|
211 | | 4|chr3 | 400| 450|
212 |
213 |
214 |
215 | ## Inspiration
216 |
217 | - [tidyverse](http://tidyverse.org/)
218 | - [fuzzyjoin](https://github.com/dgrtwo/fuzzyjoin)
219 | - [GenomicRanges](http://bioconductor.org/packages/release/bioc/html/GenomicRanges.html)
220 | - [bedtools](http://bedtools.readthedocs.io)
221 |
222 | If you have any additional questions or encounter issues please raise them on the [github page](https://github.com/Artjom-Metro/tidygenomics).
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | destination: docs
2 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | # Bug fix
2 |
3 | In this release I have fixed a bug and ported code to new dplyr API
4 |
5 | ## Test environments
6 | * macOS Mojave: R 3.6.1
7 | * R-Hub (Fedora R-devel, Ubuntu R-release)
8 | * winbuilder (R-devel, R-release)
9 |
10 | ## R CMD check results
11 | There were no ERRORs, WARNINGs
12 |
13 | ## Downstream dependencies
14 |
15 | There is no downstream dependency yet
16 |
17 |
--------------------------------------------------------------------------------
/docs/articles/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Articles • tidygenomics
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
66 |
67 |
68 |
94 |
95 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
116 |
117 |
118 |
All vignettes
119 |
120 |
121 |
124 |
125 |
126 |
127 |
128 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
--------------------------------------------------------------------------------
/docs/articles/resources/genome_cluster_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/docs/articles/resources/genome_cluster_docu.png
--------------------------------------------------------------------------------
/docs/articles/resources/genome_complement_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/docs/articles/resources/genome_complement_docu.png
--------------------------------------------------------------------------------
/docs/articles/resources/genome_intersect_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/docs/articles/resources/genome_intersect_docu.png
--------------------------------------------------------------------------------
/docs/articles/resources/genome_join_closest_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/docs/articles/resources/genome_join_closest_docu.png
--------------------------------------------------------------------------------
/docs/articles/resources/genome_join_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/docs/articles/resources/genome_join_docu.png
--------------------------------------------------------------------------------
/docs/articles/resources/genome_subtract_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/docs/articles/resources/genome_subtract_docu.png
--------------------------------------------------------------------------------
/docs/authors.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Authors • tidygenomics
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
66 |
67 |
68 |
94 |
95 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
116 |
117 |
118 |
119 | Constantin Ahlmann-Eltze . Author, maintainer.
120 |
121 |
122 |
123 | Stan Developers . Copyright holder.
124 | Code from the Stan Math library is reused in 'cluster_interval.cpp'
125 |
126 |
127 | David Robinson . Copyright holder.
128 | Code from the fuzzyjoin package is reused
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
--------------------------------------------------------------------------------
/docs/docsearch.css:
--------------------------------------------------------------------------------
1 | /* Docsearch -------------------------------------------------------------- */
2 | /*
3 | Source: https://github.com/algolia/docsearch/
4 | License: MIT
5 | */
6 |
7 | .algolia-autocomplete {
8 | display: block;
9 | -webkit-box-flex: 1;
10 | -ms-flex: 1;
11 | flex: 1
12 | }
13 |
14 | .algolia-autocomplete .ds-dropdown-menu {
15 | width: 100%;
16 | min-width: none;
17 | max-width: none;
18 | padding: .75rem 0;
19 | background-color: #fff;
20 | background-clip: padding-box;
21 | border: 1px solid rgba(0, 0, 0, .1);
22 | box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175);
23 | }
24 |
25 | @media (min-width:768px) {
26 | .algolia-autocomplete .ds-dropdown-menu {
27 | width: 175%
28 | }
29 | }
30 |
31 | .algolia-autocomplete .ds-dropdown-menu::before {
32 | display: none
33 | }
34 |
35 | .algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] {
36 | padding: 0;
37 | background-color: rgb(255,255,255);
38 | border: 0;
39 | max-height: 80vh;
40 | }
41 |
42 | .algolia-autocomplete .ds-dropdown-menu .ds-suggestions {
43 | margin-top: 0
44 | }
45 |
46 | .algolia-autocomplete .algolia-docsearch-suggestion {
47 | padding: 0;
48 | overflow: visible
49 | }
50 |
51 | .algolia-autocomplete .algolia-docsearch-suggestion--category-header {
52 | padding: .125rem 1rem;
53 | margin-top: 0;
54 | font-size: 1.3em;
55 | font-weight: 500;
56 | color: #00008B;
57 | border-bottom: 0
58 | }
59 |
60 | .algolia-autocomplete .algolia-docsearch-suggestion--wrapper {
61 | float: none;
62 | padding-top: 0
63 | }
64 |
65 | .algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column {
66 | float: none;
67 | width: auto;
68 | padding: 0;
69 | text-align: left
70 | }
71 |
72 | .algolia-autocomplete .algolia-docsearch-suggestion--content {
73 | float: none;
74 | width: auto;
75 | padding: 0
76 | }
77 |
78 | .algolia-autocomplete .algolia-docsearch-suggestion--content::before {
79 | display: none
80 | }
81 |
82 | .algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header {
83 | padding-top: .75rem;
84 | margin-top: .75rem;
85 | border-top: 1px solid rgba(0, 0, 0, .1)
86 | }
87 |
88 | .algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column {
89 | display: block;
90 | padding: .1rem 1rem;
91 | margin-bottom: 0.1;
92 | font-size: 1.0em;
93 | font-weight: 400
94 | /* display: none */
95 | }
96 |
97 | .algolia-autocomplete .algolia-docsearch-suggestion--title {
98 | display: block;
99 | padding: .25rem 1rem;
100 | margin-bottom: 0;
101 | font-size: 0.9em;
102 | font-weight: 400
103 | }
104 |
105 | .algolia-autocomplete .algolia-docsearch-suggestion--text {
106 | padding: 0 1rem .5rem;
107 | margin-top: -.25rem;
108 | font-size: 0.8em;
109 | font-weight: 400;
110 | line-height: 1.25
111 | }
112 |
113 | .algolia-autocomplete .algolia-docsearch-footer {
114 | width: 110px;
115 | height: 20px;
116 | z-index: 3;
117 | margin-top: 10.66667px;
118 | float: right;
119 | font-size: 0;
120 | line-height: 0;
121 | }
122 |
123 | .algolia-autocomplete .algolia-docsearch-footer--logo {
124 | background-image: url("data:image/svg+xml;utf8, ");
125 | background-repeat: no-repeat;
126 | background-position: 50%;
127 | background-size: 100%;
128 | overflow: hidden;
129 | text-indent: -9000px;
130 | width: 100%;
131 | height: 100%;
132 | display: block;
133 | transform: translate(-8px);
134 | }
135 |
136 | .algolia-autocomplete .algolia-docsearch-suggestion--highlight {
137 | color: #FF8C00;
138 | background: rgba(232, 189, 54, 0.1)
139 | }
140 |
141 |
142 | .algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight {
143 | box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5)
144 | }
145 |
146 | .algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content {
147 | background-color: rgba(192, 192, 192, .15)
148 | }
149 |
--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
1 | $(function() {
2 |
3 | // register a handler to move the focus to the search bar
4 | // upon pressing shift + "/" (i.e. "?")
5 | $(document).on('keydown', function(e) {
6 | if (e.shiftKey && e.keyCode == 191) {
7 | e.preventDefault();
8 | $("#search-input").focus();
9 | }
10 | });
11 |
12 | $(document).ready(function() {
13 | // do keyword highlighting
14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 | var mark = function() {
16 |
17 | var referrer = document.URL ;
18 | var paramKey = "q" ;
19 |
20 | if (referrer.indexOf("?") !== -1) {
21 | var qs = referrer.substr(referrer.indexOf('?') + 1);
22 | var qs_noanchor = qs.split('#')[0];
23 | var qsa = qs_noanchor.split('&');
24 | var keyword = "";
25 |
26 | for (var i = 0; i < qsa.length; i++) {
27 | var currentParam = qsa[i].split('=');
28 |
29 | if (currentParam.length !== 2) {
30 | continue;
31 | }
32 |
33 | if (currentParam[0] == paramKey) {
34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 | }
36 | }
37 |
38 | if (keyword !== "") {
39 | $(".contents").unmark({
40 | done: function() {
41 | $(".contents").mark(keyword);
42 | }
43 | });
44 | }
45 | }
46 | };
47 |
48 | mark();
49 | });
50 | });
51 |
52 | /* Search term highlighting ------------------------------*/
53 |
54 | function matchedWords(hit) {
55 | var words = [];
56 |
57 | var hierarchy = hit._highlightResult.hierarchy;
58 | // loop to fetch from lvl0, lvl1, etc.
59 | for (var idx in hierarchy) {
60 | words = words.concat(hierarchy[idx].matchedWords);
61 | }
62 |
63 | var content = hit._highlightResult.content;
64 | if (content) {
65 | words = words.concat(content.matchedWords);
66 | }
67 |
68 | // return unique words
69 | var words_uniq = [...new Set(words)];
70 | return words_uniq;
71 | }
72 |
73 | function updateHitURL(hit) {
74 |
75 | var words = matchedWords(hit);
76 | var url = "";
77 |
78 | if (hit.anchor) {
79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 | } else {
81 | url = hit.url + '?q=' + escape(words.join(" "));
82 | }
83 |
84 | return url;
85 | }
86 |
--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | Tidy Verbs for Dealing with Genomic Data Frames • tidygenomics
9 |
10 |
11 |
12 |
13 |
17 |
18 |
22 |
23 |
24 |
25 |
84 |
85 |
86 |
88 |
89 |
Tidy Verbs for Dealing with Genomic Data Frames
90 |
91 |
92 | Description
93 |
Handle genomic data within data frames just as you would with GRanges
. This packages provides method to deal with genomics intervals the “tidy-way” which makes it simpler to integrate in the the general data munging process. The API is inspired by the popular bedtools and the genome_join() method from the fuzzyjoin package.
94 |
95 |
103 |
104 |
105 | Documentation
106 |
107 |
108 | genome_intersect
109 |
Joins 2 data frames based on their genomic overlap. Unlike the genome_join
function it updates the boundaries to reflect the overlap of the regions.
110 |
111 |
x1 <- data.frame(id = 1:4,
112 | chromosome = c("chr1", "chr1", "chr2", "chr2"),
113 | start = c(100, 200, 300, 400),
114 | end = c(150, 250, 350, 450))
115 |
116 | x2 <- data.frame(id = 1:4,
117 | chromosome = c("chr1", "chr2", "chr2", "chr1"),
118 | start = c(140, 210, 400, 300),
119 | end = c(160, 240, 415, 320))
120 |
121 | genome_intersect(x1, x2, by=c("chromosome", "start", "end"), mode="both")
122 |
123 |
130 |
131 |
132 | 1
133 | chr1
134 | 1
135 | 140
136 | 150
137 |
138 |
139 | 4
140 | chr2
141 | 3
142 | 400
143 | 415
144 |
145 |
146 |
147 |
148 |
149 |
150 | genome_subtract
151 |
Subtracts one data frame from the other. This can be used to split the x data frame into smaller areas.
152 |
153 |
x1 <- data.frame(id = 1:4,
154 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
155 | start = c(100, 200, 300, 400),
156 | end = c(150, 250, 350, 450))
157 |
158 | x2 <- data.frame(id = 1:4,
159 | chromosome = c("chr1", "chr2", "chr1", "chr1"),
160 | start = c(120, 210, 300, 400),
161 | end = c(125, 240, 320, 415))
162 |
163 | genome_subtract(x1, x2, by=c("chromosome", "start", "end"))
164 |
165 |
171 |
172 |
173 | 1
174 | chr1
175 | 100
176 | 119
177 |
178 |
179 | 1
180 | chr1
181 | 126
182 | 150
183 |
184 |
185 | 2
186 | chr1
187 | 200
188 | 250
189 |
190 |
191 | 3
192 | chr2
193 | 300
194 | 350
195 |
196 |
197 | 4
198 | chr1
199 | 416
200 | 450
201 |
202 |
203 |
204 |
205 |
206 |
207 | genome_join_closest
208 |
Joins 2 data frames based on their genomic location. If no exact overlap is found the next closest interval is used.
209 |
210 |
x1 <- data_frame(id = 1:4,
211 | chr = c("chr1", "chr1", "chr2", "chr3"),
212 | start = c(100, 200, 300, 400),
213 | end = c(150, 250, 350, 450))
214 |
215 | x2 <- data_frame(id = 1:4,
216 | chr = c("chr1", "chr1", "chr1", "chr2"),
217 | start = c(220, 210, 300, 400),
218 | end = c(225, 240, 320, 415))
219 | genome_join_closest(x1, x2, by=c("chr", "start", "end"), distance_column_name="distance", mode="left")
220 |
221 |
232 |
233 |
234 | 1
235 | chr1
236 | 100
237 | 150
238 | 2
239 | chr1
240 | 210
241 | 240
242 | 59
243 |
244 |
245 | 2
246 | chr1
247 | 200
248 | 250
249 | 1
250 | chr1
251 | 220
252 | 225
253 | 0
254 |
255 |
256 | 2
257 | chr1
258 | 200
259 | 250
260 | 2
261 | chr1
262 | 210
263 | 240
264 | 0
265 |
266 |
267 | 3
268 | chr2
269 | 300
270 | 350
271 | 4
272 | chr2
273 | 400
274 | 415
275 | 49
276 |
277 |
278 | 4
279 | chr3
280 | 400
281 | 450
282 | NA
283 | NA
284 | NA
285 | NA
286 | NA
287 |
288 |
289 |
290 |
291 |
292 |
293 | genome_cluster
294 |
Add a new column with the cluster if 2 intervals are overlapping or are within the max_distance
.
295 |
296 |
x1 <- data.frame(id = 1:4, bla=letters[1:4],
297 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
298 | start = c(100, 120, 300, 260),
299 | end = c(150, 250, 350, 450))
300 | genome_cluster(x1, by=c("chromosome", "start", "end"))
301 |
302 |
310 |
311 |
312 | 1
313 | a
314 | chr1
315 | 100
316 | 150
317 | 0
318 |
319 |
320 | 2
321 | b
322 | chr1
323 | 120
324 | 250
325 | 0
326 |
327 |
328 | 3
329 | c
330 | chr2
331 | 300
332 | 350
333 | 2
334 |
335 |
336 | 4
337 | d
338 | chr1
339 | 260
340 | 450
341 | 1
342 |
343 |
344 |
345 |
genome_cluster(x1, by=c("chromosome", "start", "end"), max_distance=10)
346 |
347 |
355 |
356 |
357 | 1
358 | a
359 | chr1
360 | 100
361 | 150
362 | 0
363 |
364 |
365 | 2
366 | b
367 | chr1
368 | 120
369 | 250
370 | 0
371 |
372 |
373 | 3
374 | c
375 | chr2
376 | 300
377 | 350
378 | 1
379 |
380 |
381 | 4
382 | d
383 | chr1
384 | 260
385 | 450
386 | 0
387 |
388 |
389 |
390 |
391 |
392 |
393 | genome_complement
394 |
Calculates the complement of a genomic region.
395 |
396 |
x1 <- data.frame(id = 1:4,
397 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
398 | start = c(100, 200, 300, 400),
399 | end = c(150, 250, 350, 450))
400 |
401 | genome_complement(x1, by=c("chromosome", "start", "end"))
402 |
403 |
408 |
409 |
410 | chr1
411 | 1
412 | 99
413 |
414 |
415 | chr1
416 | 151
417 | 199
418 |
419 |
420 | chr1
421 | 251
422 | 399
423 |
424 |
425 | chr2
426 | 1
427 | 299
428 |
429 |
430 |
431 |
432 |
433 |
434 | genome_join
435 |
Classical join function based on the overlap of the interval. Implemented and maintained in the fuzzyjoin package and documented here only for completeness.
436 |
437 |
x1 <- data_frame(id = 1:4,
438 | chr = c("chr1", "chr1", "chr2", "chr3"),
439 | start = c(100, 200, 300, 400),
440 | end = c(150, 250, 350, 450))
441 |
442 | x2 <- data_frame(id = 1:4,
443 | chr = c("chr1", "chr1", "chr1", "chr2"),
444 | start = c(220, 210, 300, 400),
445 | end = c(225, 240, 320, 415))
446 | fuzzyjoin::genome_join(x1, x2, by=c("chr", "start", "end"), mode="inner")
447 |
448 |
458 |
459 |
460 | 2
461 | chr1
462 | 200
463 | 250
464 | 1
465 | chr1
466 | 220
467 | 225
468 |
469 |
470 | 2
471 | chr1
472 | 200
473 | 250
474 | 2
475 | chr1
476 | 210
477 | 240
478 |
479 |
480 |
481 |
fuzzyjoin::genome_join(x1, x2, by=c("chr", "start", "end"), mode="left")
482 |
483 |
493 |
494 |
495 | 1
496 | chr1
497 | 100
498 | 150
499 | NA
500 | NA
501 | NA
502 | NA
503 |
504 |
505 | 2
506 | chr1
507 | 200
508 | 250
509 | 1
510 | chr1
511 | 220
512 | 225
513 |
514 |
515 | 2
516 | chr1
517 | 200
518 | 250
519 | 2
520 | chr1
521 | 210
522 | 240
523 |
524 |
525 | 3
526 | chr2
527 | 300
528 | 350
529 | NA
530 | NA
531 | NA
532 | NA
533 |
534 |
535 | 4
536 | chr3
537 | 400
538 | 450
539 | NA
540 | NA
541 | NA
542 | NA
543 |
544 |
545 |
546 |
fuzzyjoin::genome_join(x1, x2, by=c("chr", "start", "end"), mode="anti")
547 |
548 |
554 |
555 |
556 | 1
557 | chr1
558 | 100
559 | 150
560 |
561 |
562 | 3
563 | chr2
564 | 300
565 | 350
566 |
567 |
568 | 4
569 | chr3
570 | 400
571 | 450
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 | Inspiration
580 |
586 |
If you have any additional questions or encounter issues please raise them on the github page .
587 |
588 |
589 |
590 |
591 |
622 |
623 |
624 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
8 |
12 |
13 |
--------------------------------------------------------------------------------
/docs/news/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Changelog • tidygenomics
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
66 |
67 |
68 |
94 |
95 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
117 |
118 |
119 |
120 |
127 |
128 |
129 |
130 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
--------------------------------------------------------------------------------
/docs/pkgdown.css:
--------------------------------------------------------------------------------
1 | /* Sticky footer */
2 |
3 | /**
4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/
5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css
6 | *
7 | * .Site -> body > .container
8 | * .Site-content -> body > .container .row
9 | * .footer -> footer
10 | *
11 | * Key idea seems to be to ensure that .container and __all its parents__
12 | * have height set to 100%
13 | *
14 | */
15 |
16 | html, body {
17 | height: 100%;
18 | }
19 |
20 | body > .container {
21 | display: flex;
22 | height: 100%;
23 | flex-direction: column;
24 |
25 | padding-top: 60px;
26 | }
27 |
28 | body > .container .row {
29 | flex: 1 0 auto;
30 | }
31 |
32 | footer {
33 | margin-top: 45px;
34 | padding: 35px 0 36px;
35 | border-top: 1px solid #e5e5e5;
36 | color: #666;
37 | display: flex;
38 | flex-shrink: 0;
39 | }
40 | footer p {
41 | margin-bottom: 0;
42 | }
43 | footer div {
44 | flex: 1;
45 | }
46 | footer .pkgdown {
47 | text-align: right;
48 | }
49 | footer p {
50 | margin-bottom: 0;
51 | }
52 |
53 | img.icon {
54 | float: right;
55 | }
56 |
57 | img {
58 | max-width: 100%;
59 | }
60 |
61 | /* Fix bug in bootstrap (only seen in firefox) */
62 | summary {
63 | display: list-item;
64 | }
65 |
66 | /* Typographic tweaking ---------------------------------*/
67 |
68 | .contents .page-header {
69 | margin-top: calc(-60px + 1em);
70 | }
71 |
72 | /* Section anchors ---------------------------------*/
73 |
74 | a.anchor {
75 | margin-left: -30px;
76 | display:inline-block;
77 | width: 30px;
78 | height: 30px;
79 | visibility: hidden;
80 |
81 | background-image: url(./link.svg);
82 | background-repeat: no-repeat;
83 | background-size: 20px 20px;
84 | background-position: center center;
85 | }
86 |
87 | .hasAnchor:hover a.anchor {
88 | visibility: visible;
89 | }
90 |
91 | @media (max-width: 767px) {
92 | .hasAnchor:hover a.anchor {
93 | visibility: hidden;
94 | }
95 | }
96 |
97 |
98 | /* Fixes for fixed navbar --------------------------*/
99 |
100 | .contents h1, .contents h2, .contents h3, .contents h4 {
101 | padding-top: 60px;
102 | margin-top: -40px;
103 | }
104 |
105 | /* Static header placement on mobile devices */
106 | @media (max-width: 767px) {
107 | .navbar-fixed-top {
108 | position: absolute;
109 | }
110 | .navbar {
111 | padding: 0;
112 | }
113 | }
114 |
115 |
116 | /* Sidebar --------------------------*/
117 |
118 | #sidebar {
119 | margin-top: 30px;
120 | }
121 | #sidebar h2 {
122 | font-size: 1.5em;
123 | margin-top: 1em;
124 | }
125 |
126 | #sidebar h2:first-child {
127 | margin-top: 0;
128 | }
129 |
130 | #sidebar .list-unstyled li {
131 | margin-bottom: 0.5em;
132 | }
133 |
134 | .orcid {
135 | height: 16px;
136 | vertical-align: middle;
137 | }
138 |
139 | /* Reference index & topics ----------------------------------------------- */
140 |
141 | .ref-index th {font-weight: normal;}
142 |
143 | .ref-index td {vertical-align: top;}
144 | .ref-index .icon {width: 40px;}
145 | .ref-index .alias {width: 40%;}
146 | .ref-index-icons .alias {width: calc(40% - 40px);}
147 | .ref-index .title {width: 60%;}
148 |
149 | .ref-arguments th {text-align: right; padding-right: 10px;}
150 | .ref-arguments th, .ref-arguments td {vertical-align: top;}
151 | .ref-arguments .name {width: 20%;}
152 | .ref-arguments .desc {width: 80%;}
153 |
154 | /* Nice scrolling for wide elements --------------------------------------- */
155 |
156 | table {
157 | display: block;
158 | overflow: auto;
159 | }
160 |
161 | /* Syntax highlighting ---------------------------------------------------- */
162 |
163 | pre {
164 | word-wrap: normal;
165 | word-break: normal;
166 | border: 1px solid #eee;
167 | }
168 |
169 | pre, code {
170 | background-color: #f8f8f8;
171 | color: #333;
172 | }
173 |
174 | pre code {
175 | overflow: auto;
176 | word-wrap: normal;
177 | white-space: pre;
178 | }
179 |
180 | pre .img {
181 | margin: 5px 0;
182 | }
183 |
184 | pre .img img {
185 | background-color: #fff;
186 | display: block;
187 | height: auto;
188 | }
189 |
190 | code a, pre a {
191 | color: #375f84;
192 | }
193 |
194 | a.sourceLine:hover {
195 | text-decoration: none;
196 | }
197 |
198 | .fl {color: #1514b5;}
199 | .fu {color: #000000;} /* function */
200 | .ch,.st {color: #036a07;} /* string */
201 | .kw {color: #264D66;} /* keyword */
202 | .co {color: #888888;} /* comment */
203 |
204 | .message { color: black; font-weight: bolder;}
205 | .error { color: orange; font-weight: bolder;}
206 | .warning { color: #6A0366; font-weight: bolder;}
207 |
208 | /* Clipboard --------------------------*/
209 |
210 | .hasCopyButton {
211 | position: relative;
212 | }
213 |
214 | .btn-copy-ex {
215 | position: absolute;
216 | right: 0;
217 | top: 0;
218 | visibility: hidden;
219 | }
220 |
221 | .hasCopyButton:hover button.btn-copy-ex {
222 | visibility: visible;
223 | }
224 |
225 | /* mark.js ----------------------------*/
226 |
227 | mark {
228 | background-color: rgba(255, 255, 51, 0.5);
229 | border-bottom: 2px solid rgba(255, 153, 51, 0.3);
230 | padding: 1px;
231 | }
232 |
233 | /* vertical spacing after htmlwidgets */
234 | .html-widget {
235 | margin-bottom: 10px;
236 | }
237 |
--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
2 | (function($) {
3 | $(function() {
4 |
5 | $("#sidebar")
6 | .stick_in_parent({offset_top: 40})
7 | .on('sticky_kit:bottom', function(e) {
8 | $(this).parent().css('position', 'static');
9 | })
10 | .on('sticky_kit:unbottom', function(e) {
11 | $(this).parent().css('position', 'relative');
12 | });
13 |
14 | $('body').scrollspy({
15 | target: '#sidebar',
16 | offset: 60
17 | });
18 |
19 | $('[data-toggle="tooltip"]').tooltip();
20 |
21 | var cur_path = paths(location.pathname);
22 | var links = $("#navbar ul li a");
23 | var max_length = -1;
24 | var pos = -1;
25 | for (var i = 0; i < links.length; i++) {
26 | if (links[i].getAttribute("href") === "#")
27 | continue;
28 | // Ignore external links
29 | if (links[i].host !== location.host)
30 | continue;
31 |
32 | var nav_path = paths(links[i].pathname);
33 |
34 | var length = prefix_length(nav_path, cur_path);
35 | if (length > max_length) {
36 | max_length = length;
37 | pos = i;
38 | }
39 | }
40 |
41 | // Add class to parent , and enclosing if in dropdown
42 | if (pos >= 0) {
43 | var menu_anchor = $(links[pos]);
44 | menu_anchor.parent().addClass("active");
45 | menu_anchor.closest("li.dropdown").addClass("active");
46 | }
47 | });
48 |
49 | function paths(pathname) {
50 | var pieces = pathname.split("/");
51 | pieces.shift(); // always starts with /
52 |
53 | var end = pieces[pieces.length - 1];
54 | if (end === "index.html" || end === "")
55 | pieces.pop();
56 | return(pieces);
57 | }
58 |
59 | // Returns -1 if not found
60 | function prefix_length(needle, haystack) {
61 | if (needle.length > haystack.length)
62 | return(-1);
63 |
64 | // Special case for length-0 haystack, since for loop won't run
65 | if (haystack.length === 0) {
66 | return(needle.length === 0 ? 0 : -1);
67 | }
68 |
69 | for (var i = 0; i < haystack.length; i++) {
70 | if (needle[i] != haystack[i])
71 | return(i);
72 | }
73 |
74 | return(haystack.length);
75 | }
76 |
77 | /* Clipboard --------------------------*/
78 |
79 | function changeTooltipMessage(element, msg) {
80 | var tooltipOriginalTitle=element.getAttribute('data-original-title');
81 | element.setAttribute('data-original-title', msg);
82 | $(element).tooltip('show');
83 | element.setAttribute('data-original-title', tooltipOriginalTitle);
84 | }
85 |
86 | if(ClipboardJS.isSupported()) {
87 | $(document).ready(function() {
88 | var copyButton = " ";
89 |
90 | $(".examples, div.sourceCode").addClass("hasCopyButton");
91 |
92 | // Insert copy buttons:
93 | $(copyButton).prependTo(".hasCopyButton");
94 |
95 | // Initialize tooltips:
96 | $('.btn-copy-ex').tooltip({container: 'body'});
97 |
98 | // Initialize clipboard:
99 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
100 | text: function(trigger) {
101 | return trigger.parentNode.textContent;
102 | }
103 | });
104 |
105 | clipboardBtnCopies.on('success', function(e) {
106 | changeTooltipMessage(e.trigger, 'Copied!');
107 | e.clearSelection();
108 | });
109 |
110 | clipboardBtnCopies.on('error', function() {
111 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
112 | });
113 | });
114 | }
115 | })(window.jQuery || window.$)
116 |
--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: 2.3.1
2 | pkgdown: 1.3.0
3 | pkgdown_sha: ~
4 | articles:
5 | intro: intro.html
6 |
7 |
--------------------------------------------------------------------------------
/docs/reference/cluster_interval.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Cluster ranges which are implemented as 2 equal-length numeric vectors. — cluster_interval • tidygenomics
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
69 |
70 |
71 |
97 |
98 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
121 |
122 |
123 |
124 |
Cluster ranges which are implemented as 2 equal-length numeric vectors.
125 |
126 |
127 |
128 |
cluster_interval (starts , ends , max_distance = 0L )
129 |
130 |
Arguments
131 |
132 |
133 |
134 | starts
135 | A numeric vector that defines the starts of each interval
136 |
137 |
138 | ends
139 | A numeric vector that defines the ends of each interval
140 |
141 |
142 | max_distance
143 | The maximum distance up to which intervals are still considered to be
144 | the same cluster. Default: 0.
145 |
146 |
147 |
148 |
149 |
Examples
150 |
#> [1] TRUE TRUE TRUE
154 |
155 |
164 |
165 |
166 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
--------------------------------------------------------------------------------
/docs/reference/genome_cluster.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Intersect data frames based on chromosome, start and end. — genome_cluster • tidygenomics
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
69 |
70 |
71 |
97 |
98 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
121 |
122 |
123 |
124 |
Intersect data frames based on chromosome, start and end.
125 |
126 |
127 |
128 |
genome_cluster (x , by = NULL , max_distance = 0 ,
129 | cluster_column_name = "cluster_id" )
130 |
131 |
Arguments
132 |
133 |
134 |
135 | x
136 | A dataframe.
137 |
138 |
139 | by
140 | A character vector with 3 entries which are the chromosome, start and end column.
141 | For example: by=c("chr", "start", "end")
142 |
143 |
144 | max_distance
145 | The maximum distance up to which intervals are still considered to be
146 | the same cluster. Default: 0.
147 |
148 |
149 | cluster_column_name
150 | A string that is used as the new column name
151 |
152 |
153 |
154 |
Value
155 |
156 |
The dataframe with the additional column of the cluster
157 |
158 |
159 |
Examples
160 |
#>
162 | #> Attaching package: ‘dplyr’
#> The following object is masked from ‘package:testthat’:
163 | #>
164 | #> matches
#> The following objects are masked from ‘package:stats’:
165 | #>
166 | #> filter, lag
#> The following objects are masked from ‘package:base’:
167 | #>
168 | #> intersect, setdiff, setequal, union
#> # A tibble: 4 x 6
174 | #> id bla chromosome start end cluster_id
175 | #> <int> <fct> <fct> <dbl> <dbl> <dbl>
176 | #> 1 1 a chr1 100 150 0
177 | #> 2 2 b chr1 120 250 0
178 | #> 3 3 c chr2 300 350 2
179 | #> 4 4 d chr1 260 450 1
#> # A tibble: 4 x 6
180 | #> id bla chromosome start end cluster_id
181 | #> <int> <fct> <fct> <dbl> <dbl> <dbl>
182 | #> 1 1 a chr1 100 150 0
183 | #> 2 2 b chr1 120 250 0
184 | #> 3 3 c chr2 300 350 1
185 | #> 4 4 d chr1 260 450 0
186 |
187 |
198 |
199 |
200 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
--------------------------------------------------------------------------------
/docs/reference/genome_complement.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Calculates the complement to the intervals covered by the intervals in
10 | a data frame. It can optionally take a chromosome_size
data frame
11 | that contains 2 or 3 columns, the first the names of chromosome and in case
12 | there are 2 columns the size or first the start index and lastly the end index
13 | on the chromosome. — genome_complement • tidygenomics
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
42 |
43 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
81 |
82 |
83 |
109 |
110 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
137 |
138 |
139 |
140 |
Calculates the complement to the intervals covered by the intervals in
141 | a data frame. It can optionally take a chromosome_size
data frame
142 | that contains 2 or 3 columns, the first the names of chromosome and in case
143 | there are 2 columns the size or first the start index and lastly the end index
144 | on the chromosome.
145 |
146 |
147 |
148 |
genome_complement (x , chromosome_size = NULL , by = NULL )
149 |
150 |
Arguments
151 |
152 |
153 |
154 | x
155 | A data frame for which the complement is calculated
156 |
157 |
158 | chromosome_size
159 | A dataframe with at least 2 columns that contains
160 | first the chromosome name and then the size of that chromosome. Can be NULL
161 | in which case the largest value per chromosome from x
is used.
162 |
163 |
164 | by
165 | A character vector with 3 entries which are the chromosome, start and end column.
166 | For example: by=c("chr", "start", "end")
167 |
168 |
169 |
170 |
171 |
Examples
172 |
#> # A tibble: 4 x 3
181 | #> chromosome start end
182 | #> <fct> <int> <int>
183 | #> 1 chr1 1 99
184 | #> 2 chr1 151 199
185 | #> 3 chr1 251 399
186 | #> 4 chr2 1 299
187 |
188 |
197 |
198 |
199 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
--------------------------------------------------------------------------------
/docs/reference/genome_intersect.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Intersect data frames based on chromosome, start and end. — genome_intersect • tidygenomics
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
69 |
70 |
71 |
97 |
98 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
121 |
122 |
123 |
124 |
Intersect data frames based on chromosome, start and end.
125 |
126 |
127 |
128 |
genome_intersect (x , y , by = NULL , mode = "both" )
129 |
130 |
Arguments
131 |
132 |
133 |
134 | x
135 | A dataframe.
136 |
137 |
138 | y
139 | A dataframe.
140 |
141 |
142 | by
143 | A character vector with 3 entries which are used to match the chromosome, start and end column.
144 | For example: by=c("Chromosome"="chr", "Start"="start", "End"="end")
145 |
146 |
147 | mode
148 | One of "both", "left", "right" or "anti".
149 |
150 |
151 |
152 |
Value
153 |
154 |
The intersected dataframe of x
and y
with the new boundaries.
155 |
156 |
157 |
Examples
158 |
#> id.x bla chromosome id.y BLA start end
172 | #> 1 1 a chr1 1 A 140 150
173 | #> 2 4 d chr2 3 C 400 415
174 |
175 |
176 |
177 |
178 |
189 |
190 |
191 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
--------------------------------------------------------------------------------
/docs/reference/genome_join_closest.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Join intervals on chromosomes in data frames, to the closest partner — genome_join_closest • tidygenomics
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
69 |
70 |
71 |
97 |
98 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
121 |
122 |
123 |
124 |
Join intervals on chromosomes in data frames, to the closest partner
125 |
126 |
127 |
128 |
genome_join_closest (x , y , by = NULL , mode = "inner" ,
129 | distance_column_name = NULL , max_distance = Inf , select = "all" )
130 |
131 | genome_inner_join_closest (x , y , by = NULL , ... )
132 |
133 | genome_left_join_closest (x , y , by = NULL , ... )
134 |
135 | genome_right_join_closest (x , y , by = NULL , ... )
136 |
137 | genome_full_join_closest (x , y , by = NULL , ... )
138 |
139 | genome_semi_join_closest (x , y , by = NULL , ... )
140 |
141 | genome_anti_join_closest (x , y , by = NULL , ... )
142 |
143 |
Arguments
144 |
145 |
146 |
147 | x
148 | A dataframe.
149 |
150 |
151 | y
152 | A dataframe.
153 |
154 |
155 | by
156 | A character vector with 3 entries which are used to match the chromosome, start and end column.
157 | For example: by=c("Chromosome"="chr", "Start"="start", "End"="end")
158 |
159 |
160 | mode
161 | One of "inner", "full", "left", "right", "semi" or "anti".
162 |
163 |
164 | distance_column_name
165 | A string that is used as the new column name with the distance.
166 | If NULL
no new column is added.
167 |
168 |
169 | max_distance
170 | The maximum distance that is allowed to join 2 entries.
171 |
172 |
173 | select
174 | A string that is passed on to IRanges::distanceToNearest
, can either be
175 | all which means that in case that multiple intervals have the same distance all are reported, or
176 | arbitrary which means in that case one would be chosen at random.
177 |
178 |
179 | ...
180 | Additional arguments parsed on to genome_join_closest.
181 |
182 |
183 |
184 |
Value
185 |
186 |
The joined dataframe of x
and y
.
187 |
188 |
189 |
Examples
190 |
#> id.x bla chromosome id.y BLA start end
204 | #> 1 1 a chr1 1 A 140 150
205 | #> 2 4 d chr2 3 C 400 415
206 |
207 |
218 |
219 |
220 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
--------------------------------------------------------------------------------
/docs/reference/genome_subtract.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Subtract one data frame from another based on chromosome, start and end. — genome_subtract • tidygenomics
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
69 |
70 |
71 |
97 |
98 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
121 |
122 |
123 |
124 |
Subtract one data frame from another based on chromosome, start and end.
125 |
126 |
127 |
128 |
genome_subtract (x , y , by = NULL )
129 |
130 |
Arguments
131 |
132 |
133 |
134 | x
135 | A dataframe.
136 |
137 |
138 | y
139 | A dataframe.
140 |
141 |
142 | by
143 | A character vector with 3 entries which are used to match the chromosome, start and end column.
144 | For example: by=c("Chromosome"="chr", "Start"="start", "End"="end")
145 |
146 |
147 |
148 |
Value
149 |
150 |
The subtracted dataframe of x
and y
with the new boundaries.
151 |
152 |
153 |
Examples
154 |
#> id bla chromosome start end
169 | #> 1 1 a chr1 100 119
170 | #> 2 1 a chr1 126 150
171 | #> 3 2 b chr1 200 250
172 | #> 4 3 c chr2 300 350
173 | #> 5 4 d chr1 416 450
174 |
175 |
176 |
177 |
188 |
189 |
190 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
--------------------------------------------------------------------------------
/docs/reference/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Function reference • tidygenomics
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
66 |
67 |
68 |
94 |
95 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
184 |
185 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
--------------------------------------------------------------------------------
/man/cluster_interval.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/RcppExports.R
3 | \name{cluster_interval}
4 | \alias{cluster_interval}
5 | \title{Cluster ranges which are implemented as 2 equal-length numeric vectors.}
6 | \usage{
7 | cluster_interval(starts, ends, max_distance = 0L)
8 | }
9 | \arguments{
10 | \item{starts}{A numeric vector that defines the starts of each interval}
11 |
12 | \item{ends}{A numeric vector that defines the ends of each interval}
13 |
14 | \item{max_distance}{The maximum distance up to which intervals are still considered to be
15 | the same cluster. Default: 0.}
16 | }
17 | \description{
18 | Cluster ranges which are implemented as 2 equal-length numeric vectors.
19 | }
20 | \examples{
21 | starts <- c(50, 100, 120)
22 | ends <- c(75, 130, 150)
23 | j <- cluster_interval(starts, ends)
24 | j == c(0,1,1)
25 | }
26 |
--------------------------------------------------------------------------------
/man/genome_cluster.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/cluster.R
3 | \name{genome_cluster}
4 | \alias{genome_cluster}
5 | \title{Intersect data frames based on chromosome, start and end.}
6 | \usage{
7 | genome_cluster(x, by = NULL, max_distance = 0,
8 | cluster_column_name = "cluster_id")
9 | }
10 | \arguments{
11 | \item{x}{A dataframe.}
12 |
13 | \item{by}{A character vector with 3 entries which are the chromosome, start and end column.
14 | For example: \code{by=c("chr", "start", "end")}}
15 |
16 | \item{max_distance}{The maximum distance up to which intervals are still considered to be
17 | the same cluster. Default: 0.}
18 |
19 | \item{cluster_column_name}{A string that is used as the new column name}
20 | }
21 | \value{
22 | The dataframe with the additional column of the cluster
23 | }
24 | \description{
25 | Intersect data frames based on chromosome, start and end.
26 | }
27 | \examples{
28 |
29 | library(dplyr)
30 |
31 | x1 <- data.frame(id = 1:4, bla=letters[1:4],
32 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
33 | start = c(100, 120, 300, 260),
34 | end = c(150, 250, 350, 450))
35 | genome_cluster(x1, by=c("chromosome", "start", "end"))
36 | genome_cluster(x1, by=c("chromosome", "start", "end"), max_distance=10)
37 | }
38 |
--------------------------------------------------------------------------------
/man/genome_complement.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/complement.R
3 | \name{genome_complement}
4 | \alias{genome_complement}
5 | \title{Calculates the complement to the intervals covered by the intervals in
6 | a data frame. It can optionally take a \code{chromosome_size} data frame
7 | that contains 2 or 3 columns, the first the names of chromosome and in case
8 | there are 2 columns the size or first the start index and lastly the end index
9 | on the chromosome.}
10 | \usage{
11 | genome_complement(x, chromosome_size = NULL, by = NULL)
12 | }
13 | \arguments{
14 | \item{x}{A data frame for which the complement is calculated}
15 |
16 | \item{chromosome_size}{A dataframe with at least 2 columns that contains
17 | first the chromosome name and then the size of that chromosome. Can be NULL
18 | in which case the largest value per chromosome from \code{x} is used.}
19 |
20 | \item{by}{A character vector with 3 entries which are the chromosome, start and end column.
21 | For example: \code{by=c("chr", "start", "end")}}
22 | }
23 | \description{
24 | Calculates the complement to the intervals covered by the intervals in
25 | a data frame. It can optionally take a \code{chromosome_size} data frame
26 | that contains 2 or 3 columns, the first the names of chromosome and in case
27 | there are 2 columns the size or first the start index and lastly the end index
28 | on the chromosome.
29 | }
30 | \examples{
31 |
32 | library(dplyr)
33 |
34 | x1 <- data.frame(id = 1:4, bla=letters[1:4],
35 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
36 | start = c(100, 200, 300, 400),
37 | end = c(150, 250, 350, 450))
38 |
39 | genome_complement(x1, by=c("chromosome", "start", "end"))
40 | }
41 |
--------------------------------------------------------------------------------
/man/genome_intersect.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/intersect.R
3 | \name{genome_intersect}
4 | \alias{genome_intersect}
5 | \title{Intersect data frames based on chromosome, start and end.}
6 | \usage{
7 | genome_intersect(x, y, by = NULL, mode = "both")
8 | }
9 | \arguments{
10 | \item{x}{A dataframe.}
11 |
12 | \item{y}{A dataframe.}
13 |
14 | \item{by}{A character vector with 3 entries which are used to match the chromosome, start and end column.
15 | For example: \code{by=c("Chromosome"="chr", "Start"="start", "End"="end")}}
16 |
17 | \item{mode}{One of "both", "left", "right" or "anti".}
18 | }
19 | \value{
20 | The intersected dataframe of \code{x} and \code{y} with the new boundaries.
21 | }
22 | \description{
23 | Intersect data frames based on chromosome, start and end.
24 | }
25 | \examples{
26 |
27 | library(dplyr)
28 |
29 | x1 <- data.frame(id = 1:4, bla=letters[1:4],
30 | chromosome = c("chr1", "chr1", "chr2", "chr2"),
31 | start = c(100, 200, 300, 400),
32 | end = c(150, 250, 350, 450))
33 |
34 | x2 <- data.frame(id = 1:4, BLA=LETTERS[1:4],
35 | chromosome = c("chr1", "chr2", "chr2", "chr1"),
36 | start = c(140, 210, 400, 300),
37 | end = c(160, 240, 415, 320))
38 | j <- genome_intersect(x1, x2, by=c("chromosome", "start", "end"), mode="both")
39 | print(j)
40 |
41 |
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/man/genome_join_closest.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/join_closest.R
3 | \name{genome_join_closest}
4 | \alias{genome_join_closest}
5 | \alias{genome_inner_join_closest}
6 | \alias{genome_left_join_closest}
7 | \alias{genome_right_join_closest}
8 | \alias{genome_full_join_closest}
9 | \alias{genome_semi_join_closest}
10 | \alias{genome_anti_join_closest}
11 | \title{Join intervals on chromosomes in data frames, to the closest partner}
12 | \usage{
13 | genome_join_closest(x, y, by = NULL, mode = "inner",
14 | distance_column_name = NULL, max_distance = Inf, select = "all")
15 |
16 | genome_inner_join_closest(x, y, by = NULL, ...)
17 |
18 | genome_left_join_closest(x, y, by = NULL, ...)
19 |
20 | genome_right_join_closest(x, y, by = NULL, ...)
21 |
22 | genome_full_join_closest(x, y, by = NULL, ...)
23 |
24 | genome_semi_join_closest(x, y, by = NULL, ...)
25 |
26 | genome_anti_join_closest(x, y, by = NULL, ...)
27 | }
28 | \arguments{
29 | \item{x}{A dataframe.}
30 |
31 | \item{y}{A dataframe.}
32 |
33 | \item{by}{A character vector with 3 entries which are used to match the chromosome, start and end column.
34 | For example: \code{by=c("Chromosome"="chr", "Start"="start", "End"="end")}}
35 |
36 | \item{mode}{One of "inner", "full", "left", "right", "semi" or "anti".}
37 |
38 | \item{distance_column_name}{A string that is used as the new column name with the distance.
39 | If \code{NULL} no new column is added.}
40 |
41 | \item{max_distance}{The maximum distance that is allowed to join 2 entries.}
42 |
43 | \item{select}{A string that is passed on to \code{IRanges::distanceToNearest}, can either be
44 | all which means that in case that multiple intervals have the same distance all are reported, or
45 | arbitrary which means in that case one would be chosen at random.}
46 |
47 | \item{...}{Additional arguments parsed on to genome_join_closest.}
48 | }
49 | \value{
50 | The joined dataframe of \code{x} and \code{y}.
51 | }
52 | \description{
53 | Join intervals on chromosomes in data frames, to the closest partner
54 | }
55 | \examples{
56 |
57 | library(dplyr)
58 |
59 | x1 <- data.frame(id = 1:4, bla=letters[1:4],
60 | chromosome = c("chr1", "chr1", "chr2", "chr2"),
61 | start = c(100, 200, 300, 400),
62 | end = c(150, 250, 350, 450))
63 |
64 | x2 <- data.frame(id = 1:4, BLA=LETTERS[1:4],
65 | chromosome = c("chr1", "chr2", "chr2", "chr1"),
66 | start = c(140, 210, 400, 300),
67 | end = c(160, 240, 415, 320))
68 | j <- genome_intersect(x1, x2, by=c("chromosome", "start", "end"), mode="both")
69 | print(j)
70 | }
71 |
--------------------------------------------------------------------------------
/man/genome_subtract.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/subtract.R
3 | \name{genome_subtract}
4 | \alias{genome_subtract}
5 | \title{Subtract one data frame from another based on chromosome, start and end.}
6 | \usage{
7 | genome_subtract(x, y, by = NULL)
8 | }
9 | \arguments{
10 | \item{x}{A dataframe.}
11 |
12 | \item{y}{A dataframe.}
13 |
14 | \item{by}{A character vector with 3 entries which are used to match the chromosome, start and end column.
15 | For example: \code{by=c("Chromosome"="chr", "Start"="start", "End"="end")}}
16 | }
17 | \value{
18 | The subtracted dataframe of \code{x} and \code{y} with the new boundaries.
19 | }
20 | \description{
21 | Subtract one data frame from another based on chromosome, start and end.
22 | }
23 | \examples{
24 |
25 | library(dplyr)
26 |
27 | x1 <- data.frame(id = 1:4, bla=letters[1:4],
28 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
29 | start = c(100, 200, 300, 400),
30 | end = c(150, 250, 350, 450))
31 |
32 | x2 <- data.frame(id = 1:4, BLA=LETTERS[1:4],
33 | chromosome = c("chr1", "chr2", "chr1", "chr1"),
34 | start = c(120, 210, 300, 400),
35 | end = c(125, 240, 320, 415))
36 |
37 | j <- genome_subtract(x1, x2, by=c("chromosome", "start", "end"))
38 | print(j)
39 |
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 |
--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 |
4 | #include
5 |
6 | using namespace Rcpp;
7 |
8 | // sort_indices
9 | IntegerVector sort_indices(NumericVector x);
10 | RcppExport SEXP _tidygenomics_sort_indices(SEXP xSEXP) {
11 | BEGIN_RCPP
12 | Rcpp::RObject rcpp_result_gen;
13 | Rcpp::RNGScope rcpp_rngScope_gen;
14 | Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP);
15 | rcpp_result_gen = Rcpp::wrap(sort_indices(x));
16 | return rcpp_result_gen;
17 | END_RCPP
18 | }
19 | // cluster_interval
20 | IntegerVector cluster_interval(NumericVector starts, NumericVector ends, int max_distance);
21 | RcppExport SEXP _tidygenomics_cluster_interval(SEXP startsSEXP, SEXP endsSEXP, SEXP max_distanceSEXP) {
22 | BEGIN_RCPP
23 | Rcpp::RObject rcpp_result_gen;
24 | Rcpp::RNGScope rcpp_rngScope_gen;
25 | Rcpp::traits::input_parameter< NumericVector >::type starts(startsSEXP);
26 | Rcpp::traits::input_parameter< NumericVector >::type ends(endsSEXP);
27 | Rcpp::traits::input_parameter< int >::type max_distance(max_distanceSEXP);
28 | rcpp_result_gen = Rcpp::wrap(cluster_interval(starts, ends, max_distance));
29 | return rcpp_result_gen;
30 | END_RCPP
31 | }
32 |
--------------------------------------------------------------------------------
/src/cluster_interval.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 |
4 | using namespace Rcpp;
5 | using namespace std;
6 |
7 |
8 | // The following code was copied from the stan math library
9 | // https://github.com/stan-dev/stan/blob/e118db2b78ed33c40f7b5c774f3ce5b85aa5dfdf/src/stan/math/matrix/sort_indices.hpp
10 |
11 | /*
12 | * Copyright (c) 2011--2015, Stan Developers and their Assignees
13 | All rights reserved.
14 |
15 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
16 |
17 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
18 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
19 | * Neither the name of Columbia University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22 | */
23 |
24 | template
25 | class index_comparator {
26 | const C& xs_;
27 | public:
28 | /**
29 | * Construct an index comparator holding a reference
30 | * to the specified container.
31 | *
32 | * @patam xs Container
33 | */
34 | index_comparator(const C& xs) : xs_(xs) { }
35 |
36 | /**
37 | * Return true if the value at the first index is sorted in
38 | * front of the value at the second index; this will depend
39 | * on the template parameter ascending
.
40 | *
41 | * @param i Index of first value for comparison
42 | * @param j Index of second value for comparison
43 | */
44 | bool operator()(int i, int j) const {
45 | if (ascending)
46 | return xs_[i] < xs_[j];
47 | else
48 | return xs_[i] > xs_[j];
49 | }
50 | };
51 |
52 |
53 | /**
54 | * Return an integer array of indices of the specified container
55 | * sorting the values in ascending or descending order based on
56 | * the value of the first template prameter.
57 | *
58 | * @tparam ascending true if sort is in ascending order
59 | * @tparam C type of container
60 | * @param xs Container to sort
61 | * @return sorted version of container
62 | */
63 | template
64 | std::vector sort_indices(const C& xs) {
65 | typename C::size_type size = xs.size();
66 | std::vector idxs;
67 | idxs.resize(size);
68 | for (typename C::size_type i = 0; i < size; ++i)
69 | idxs[i] = i;
70 | index_comparator comparator(xs);
71 | std::sort(idxs.begin(), idxs.end(), comparator);
72 | return idxs;
73 | }
74 |
75 |
76 | // [[Rcpp::export]]
77 | IntegerVector sort_indices(NumericVector x){
78 | return wrap(sort_indices(as >(x)));
79 | }
80 |
81 |
82 | //' Cluster ranges which are implemented as 2 equal-length numeric vectors.
83 | //' @param starts A numeric vector that defines the starts of each interval
84 | //' @param ends A numeric vector that defines the ends of each interval
85 | //' @param max_distance The maximum distance up to which intervals are still considered to be
86 | //' the same cluster. Default: 0.
87 | //' @examples
88 | //' starts <- c(50, 100, 120)
89 | //' ends <- c(75, 130, 150)
90 | //' j <- cluster_interval(starts, ends)
91 | //' j == c(0,1,1)
92 | //' @export
93 | // [[Rcpp::export]]
94 | IntegerVector cluster_interval(NumericVector starts, NumericVector ends, int max_distance=0) {
95 |
96 | // Require that starts and ends are the same length
97 |
98 | // The implementation is inspired by the bedtools implementation:
99 | // https://github.com/arq5x/bedtools2/blob/14fbbb8aed5c6a04685da2cee3f11b98d70304a7/src/clusterBed/clusterBed.cpp
100 | IntegerVector result(starts.size());
101 | int cluster_id = -1;
102 | int prev_end = std::numeric_limits::min();
103 | vector indices = sort_indices(as >(starts));
104 | for (int j = 0; j < indices.size(); j++) {
105 | int i = indices[j];
106 | Rcpp::checkUserInterrupt();
107 | if(starts[i] - prev_end > max_distance){
108 | cluster_id++;
109 | prev_end = ends[i];
110 | }else{
111 | if(ends[i] > prev_end){
112 | prev_end = ends[i];
113 | }
114 | }
115 | result[i] = cluster_id;
116 | }
117 |
118 | return result;
119 | }
120 |
121 |
122 |
123 |
--------------------------------------------------------------------------------
/src/tidygenomics_init.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include // for NULL
4 | #include
5 |
6 | /* FIXME:
7 | Check these declarations against the C/Fortran source code.
8 | */
9 |
10 | /* .Call calls */
11 | extern SEXP _tidygenomics_cluster_interval(SEXP, SEXP, SEXP);
12 | extern SEXP _tidygenomics_sort_indices(SEXP);
13 |
14 | static const R_CallMethodDef CallEntries[] = {
15 | {"_tidygenomics_cluster_interval", (DL_FUNC) &_tidygenomics_cluster_interval, 3},
16 | {"_tidygenomics_sort_indices", (DL_FUNC) &_tidygenomics_sort_indices, 1},
17 | {NULL, NULL, 0}
18 | };
19 |
20 | void R_init_tidygenomics(DllInfo *dll)
21 | {
22 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
23 | R_useDynamicSymbols(dll, FALSE);
24 | }
25 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(tidygenomics)
3 |
4 | test_check("tidygenomics")
5 |
--------------------------------------------------------------------------------
/tests/testthat/test_cluster.R:
--------------------------------------------------------------------------------
1 |
2 | context("genome_cluster")
3 |
4 | library(dplyr)
5 |
6 | x1 <- tibble(id = 1:4, bla=letters[1:4],
7 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
8 | start = c(100, 120, 300, 260),
9 | end = c(150, 250, 350, 450))
10 |
11 |
12 | test_that("genome_clustering assings that correct clusters", {
13 | j <- genome_cluster(x1, by=c("chromosome", "start", "end"), max_distance=5)
14 |
15 | print(j)
16 |
17 | expect_equal(j$cluster_id, c(0,0,2,1))
18 | })
19 |
20 |
21 | test_that("cluster_interval works", {
22 | starts <- c(50, 100, 120)
23 | ends <- c(75, 130, 150)
24 | j <- cluster_interval(starts, ends)
25 | expect_equal(j, c(0,1,1))
26 | expect_equal(cluster_interval(starts, ends, max_distance = 24), c(0,1,1))
27 | expect_equal(cluster_interval(starts, ends, max_distance = 25), c(0,0,0))
28 |
29 | starts <- c(50, 100, 120, 180, 350)
30 | ends <- c(75, 200, 150, 210, 400)
31 | expect_equal(cluster_interval(starts, ends), c(0,1,1,1,2))
32 |
33 | starts <- c(500, 300, 150)
34 | ends <- c(510, 310, 160)
35 | expect_equal(cluster_interval(starts, ends), c(2,1,0))
36 |
37 | expect_equal(cluster_interval(numeric(0), numeric(0)), numeric(0))
38 | })
39 |
--------------------------------------------------------------------------------
/tests/testthat/test_complement.R:
--------------------------------------------------------------------------------
1 |
2 | context("genome_complement")
3 |
4 | library(dplyr)
5 |
6 | x1 <- tibble(id = 1:4, bla=letters[1:4],
7 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
8 | start = c(100, 200, 300, 400),
9 | end = c(150, 250, 350, 450))
10 |
11 | test_that("Calculating the complement of a sequence works", {
12 | j <- genome_complement(x1, by=c("chromosome", "start", "end"))
13 | print(j)
14 | expect_equal(j$chromosome, c("chr1", "chr1", "chr1", "chr2"))
15 | expect_equal(j$start, c(1,151, 251,1))
16 | expect_equal(j$end, c(99,199, 399, 299))
17 | })
18 |
--------------------------------------------------------------------------------
/tests/testthat/test_intersect.R:
--------------------------------------------------------------------------------
1 |
2 | context("genome_intersect")
3 |
4 | suppressPackageStartupMessages(library(dplyr))
5 |
6 | x1 <- tibble(id = 1:4, bla=letters[1:4],
7 | chromosome = c("chr1", "chr1", "chr2", "chr2"),
8 | start = c(100, 200, 300, 400),
9 | end = c(150, 250, 350, 450))
10 |
11 | x2 <- tibble(id = 1:4, BLA=LETTERS[1:4],
12 | chromosome = c("chr1", "chr2", "chr2", "chr1"),
13 | start = c(140, 210, 400, 300),
14 | end = c(160, 240, 415, 320))
15 |
16 | test_that("Intersection (both) of 2 data frames works as expected", {
17 | j <- genome_intersect(x1, x2, by=c("chromosome", "start", "end"), mode="both")
18 | # print(j)
19 | expect_equal(colnames(j), c("id.x", "bla", "chromosome", "id.y", "BLA", "start", "end"))
20 | expect_equal(j$start, c(140, 400))
21 | expect_equal(j$end, c(150, 415))
22 | })
23 |
24 | test_that("Intersection of 2 data frames works for multi-overlap ranges", {
25 | x2 <- tibble(id = 1, BLA=LETTERS[1],
26 | chromosome = c("chr1"),
27 | start = c(140),
28 | end = c(220))
29 | j <- genome_intersect(x1, x2, by=c("chromosome", "start", "end"), mode="both")
30 | # print(j)
31 | expect_equal(colnames(j), c("id.x", "bla", "chromosome", "id.y", "BLA", "start", "end"))
32 | expect_equal(j$start, c(140, 200))
33 | expect_equal(j$end, c(150, 220))
34 | expect_equal(j$id.x, c(1,2))
35 | expect_equal(j$id.y, c(1,1))
36 |
37 | })
38 |
39 |
40 |
41 | test_that("Intersection of 2 data frames works for multi-overlap ranges the other way around", {
42 | x1 <- tibble(id = 1, bla=letters[1],
43 | chromosome = c("chr1"),
44 | start = c(100),
45 | end = c(420))
46 | j <- genome_intersect(x1, x2, by=c("chromosome", "start", "end"), mode="both")
47 | # print(j)
48 | expect_equal(colnames(j), c("id.x", "bla", "chromosome", "id.y", "BLA", "start", "end"))
49 | expect_equal(j$start, c(140, 300))
50 | expect_equal(j$end, c(160, 320))
51 | expect_equal(j$id.x, c(1,1))
52 | expect_equal(j$id.y, c(1,4))
53 |
54 | })
55 |
56 |
57 | test_that("Intersect and findOverlap always match", {
58 | r1 <- IRanges::IRanges(start=c(1,3,24), end=c(1,130,24))
59 | r2 <- IRanges::IRanges(start=c(1,20,100), end=c(10,30,110))
60 | o <- as.data.frame(IRanges::findOverlaps(r1, r2))
61 | intersection <- IRanges::pintersect(r1[o$queryHits], r2[o$subjectHits])
62 | expect_equal(length(o$queryHits), length(intersection))
63 | expect_true(all(IRanges::poverlaps(intersection, r1[o$queryHits])))
64 | })
65 |
66 |
67 |
--------------------------------------------------------------------------------
/tests/testthat/test_issue.R:
--------------------------------------------------------------------------------
1 |
2 | context("genome_issue")
3 |
4 |
5 | suppressPackageStartupMessages(library(dplyr))
6 |
7 |
8 | test_that("Latest issue", {
9 |
10 | })
11 |
--------------------------------------------------------------------------------
/tests/testthat/test_join_closest.R:
--------------------------------------------------------------------------------
1 |
2 | context("genome_join_closest")
3 |
4 | library(dplyr)
5 |
6 | x1 <- tibble(id = 1:4, bla=letters[1:4],
7 | chromosome = c("chr1", "chr1", "chr2", "chr3"),
8 | start = c(100, 200, 300, 400),
9 | end = c(150, 250, 350, 450))
10 |
11 | x2 <- tibble(id = 1:4, BLA=LETTERS[1:4],
12 | chromosome = c("chr1", "chr1", "chr1", "chr2"),
13 | start = c(220, 210, 300, 400),
14 | end = c(225, 240, 320, 415))
15 |
16 | test_that("Joining with closest works as expected", {
17 | j <- genome_join_closest(x1, x2, by=c("chromosome", "start", "end"), distance_column_name="distance", mode="left")
18 | print(j)
19 | expect_equal(colnames(j), c("id.x", "bla", "chromosome.x", "start.x", "end.x",
20 | "id.y", "BLA", "chromosome.y", "start.y", "end.y", "distance"))
21 | expect_equal(j$start.y, c(210, 220, 210, 400, NA))
22 | expect_equal(j$distance, c(59, 0, 0, 49, NA))
23 | })
24 |
25 |
26 |
--------------------------------------------------------------------------------
/tests/testthat/test_subtract.R:
--------------------------------------------------------------------------------
1 |
2 | context("genome_subtract")
3 |
4 |
5 | suppressPackageStartupMessages(library(dplyr))
6 |
7 | x1 <- tibble(id = 1:4, bla=letters[1:4],
8 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
9 | start = c(100, 200, 300, 400),
10 | end = c(150, 250, 350, 450))
11 |
12 | x2 <- tibble(id = 1:4, BLA=LETTERS[1:4],
13 | chromosome = c("chr1", "chr2", "chr1", "chr1"),
14 | start = c(120, 210, 300, 400),
15 | end = c(125, 240, 320, 415))
16 |
17 | test_that("Subtraction of 2 data frames works as expected", {
18 | j <- genome_subtract(x1, x2, by=c("chromosome", "start", "end"))
19 | # print(j)
20 | expect_equal(colnames(j), c("id", "bla", "chromosome", "start", "end"))
21 | expect_equal(j$start, c(100, 126, 200, 300, 416))
22 | expect_equal(j$end, c(119, 150, 250, 350, 450))
23 | })
24 |
25 |
26 |
27 | test_that("Edge cases of subtraction of 2 data frames works as expected", {
28 | x1 <- tibble(id = 1:2, bla=letters[1:2],
29 | chromosome = c("chr1", "chr1"),
30 | start = c(100, 200),
31 | end = c(150, 250))
32 |
33 | x2 <- tibble(id = 1:4, BLA=LETTERS[1:4],
34 | chromosome = c("chr1", "chr1", "chr1", "chr1"),
35 | start = c(120, 110, 190, 400),
36 | end = c(125, 122, 320, 415))
37 |
38 | j <- genome_subtract(x1, x2, by=c("chromosome", "start", "end"))
39 | print(j)
40 | expect_equal(colnames(j), c("id", "bla", "chromosome", "start", "end"))
41 | expect_equal(j$start, c(100, 126))
42 | expect_equal(j$end, c(109, 150))
43 | })
44 |
45 |
46 | test_that("during subtraction the intervals are not unified", {
47 | x1 <- tibble(id = 1:3, bla=letters[1:3],
48 | chromosome = c("chr1", "chr1", "chr1"),
49 | start = c(100, 115, 200),
50 | end = c(150, 160, 250))
51 |
52 | x2 <- tibble(id = 1, BLA=LETTERS[1],
53 | chromosome = c("chr1"),
54 | start = c(110),
55 | end = c(130))
56 |
57 | j <- genome_subtract(x1, x2, by=c("chromosome", "start", "end"))
58 | print(j)
59 | expect_equal(colnames(j), c("id", "bla", "chromosome", "start", "end"))
60 | expect_equal(j$start, c(100, 131, 131, 200))
61 | expect_equal(j$end, c(109, 150, 160, 250))
62 | })
63 |
--------------------------------------------------------------------------------
/tidygenomics.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 |
--------------------------------------------------------------------------------
/vignettes/intro.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Tidy Genomics"
3 | author: "Constantin Ahlmann-Eltze"
4 | date: "`r Sys.Date()`"
5 | output:
6 | rmarkdown::html_vignette:
7 | fig_caption: yes
8 | vignette: >
9 | %\VignetteIndexEntry{Tidy Genomics}
10 | %\VignetteEngine{knitr::rmarkdown}
11 | %\VignetteEncoding{UTF-8}
12 | ---
13 |
14 | The most dramatic impact on programming in R the last years was the development of the [tidyverse](http://tidyverse.org/) by Hadley Wickham et al.
15 | which, combined with the ingenious `%>%` from magrittr, provides a uniform philosophy for handling data.
16 |
17 | The genomics community has an alternative set of approaches, for which [bioconductor](http://bioconductor.org/) and the
18 | [GenomicRanges](http://bioconductor.org/packages/release/bioc/html/GenomicRanges.html) package provide the basis. The `GenomicRanges` and
19 | the underlying `IRanges` package provide a great set of methods for dealing with intervals as they typically encountered in genomics.
20 |
21 | Unfortunately it is not always easy to combine those two worlds, many common operations in `GenomicRanges` focus solely on the
22 | ranges and loose the additional metadata columns. On the other hand the `tidyverse` does not provide a unified set of methods
23 | to do common set operations with intervals.
24 |
25 | At least until recently, when the [fuzzyjoin](https://github.com/dgrtwo/fuzzyjoin) package was extended with the `genome_join`
26 | method for combining genomic data stored in a `data.frame`. It demonstrated that genomic data could appropriately be handled
27 | with the _tidy_-philosophy.
28 |
29 | The `tidygenomics` package extends the limited set of methods provided by the `fuzzyjoin` package for dealing with genomic
30 | data. Its API is inspired by the very popular [bedtools](http://bedtools.readthedocs.io/en/latest/index.html):
31 |
32 |
33 | - `genome_intersect`
34 | - `genome_subtract`
35 | - `genome_join_closest`
36 | - `genome_cluster`
37 | - `genome_complement`
38 | - `genome_join` _Provided by the fuzzyjoin package_
39 |
40 | ```{r, message=FALSE, warning=FALSE, echo=FALSE}
41 | library(dplyr)
42 | library(tidygenomics)
43 | ```
44 |
45 |
46 | ## genome_intersect
47 |
48 | Joins 2 data frames based on their genomic overlap. Unlike the `genome_join` function it updates the boundaries to reflect
49 | the overlap of the regions.
50 |
51 |
52 |
53 |
54 | ```{r}
55 | x1 <- data.frame(id = 1:4,
56 | chromosome = c("chr1", "chr1", "chr2", "chr2"),
57 | start = c(100, 200, 300, 400),
58 | end = c(150, 250, 350, 450))
59 |
60 | x2 <- data.frame(id = 1:4,
61 | chromosome = c("chr1", "chr2", "chr2", "chr1"),
62 | start = c(140, 210, 400, 300),
63 | end = c(160, 240, 415, 320))
64 |
65 | genome_intersect(x1, x2, by=c("chromosome", "start", "end"), mode="both")
66 | ```
67 |
68 |
69 | ## genome_subtract
70 |
71 | Subtracts one data frame from the other. This can be used to split the x data frame into smaller areas.
72 |
73 |
74 |
75 | ```{r}
76 | x1 <- data.frame(id = 1:4,
77 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
78 | start = c(100, 200, 300, 400),
79 | end = c(150, 250, 350, 450))
80 |
81 | x2 <- data.frame(id = 1:4,
82 | chromosome = c("chr1", "chr2", "chr1", "chr1"),
83 | start = c(120, 210, 300, 400),
84 | end = c(125, 240, 320, 415))
85 |
86 | genome_subtract(x1, x2, by=c("chromosome", "start", "end"))
87 | ```
88 |
89 |
90 |
91 |
92 | ## genome_join_closest
93 |
94 | Joins 2 data frames based on their genomic location. If no exact overlap is found the next closest interval is used.
95 |
96 |
97 |
98 | ```{r}
99 | x1 <- tibble(id = 1:4,
100 | chr = c("chr1", "chr1", "chr2", "chr3"),
101 | start = c(100, 200, 300, 400),
102 | end = c(150, 250, 350, 450))
103 |
104 | x2 <- tibble(id = 1:4,
105 | chr = c("chr1", "chr1", "chr1", "chr2"),
106 | start = c(220, 210, 300, 400),
107 | end = c(225, 240, 320, 415))
108 | genome_join_closest(x1, x2, by=c("chr", "start", "end"), distance_column_name="distance", mode="left")
109 | ```
110 |
111 |
112 | ## genome_cluster
113 |
114 | Add a new column with the cluster if 2 intervals are overlapping or are within the `max_distance`.
115 |
116 |
117 |
118 | ```{r}
119 | x1 <- data.frame(id = 1:4, bla=letters[1:4],
120 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
121 | start = c(100, 120, 300, 260),
122 | end = c(150, 250, 350, 450))
123 | genome_cluster(x1, by=c("chromosome", "start", "end"))
124 | genome_cluster(x1, by=c("chromosome", "start", "end"), max_distance=10)
125 | ```
126 |
127 | ## genome_complement
128 |
129 | Calculates the complement of a genomic region.
130 |
131 |
132 |
133 | ```{r}
134 | x1 <- data.frame(id = 1:4,
135 | chromosome = c("chr1", "chr1", "chr2", "chr1"),
136 | start = c(100, 200, 300, 400),
137 | end = c(150, 250, 350, 450))
138 |
139 | genome_complement(x1, by=c("chromosome", "start", "end"))
140 | ```
141 |
142 |
143 |
144 | ## genome_join
145 |
146 | Classical join function based on the overlap of the interval. Implemented and mainted in the
147 | [fuzzyjoin](https://github.com/dgrtwo/fuzzyjoin) package and documented here only for completeness.
148 |
149 |
150 |
151 | ```{r}
152 | x1 <- tibble(id = 1:4,
153 | chr = c("chr1", "chr1", "chr2", "chr3"),
154 | start = c(100, 200, 300, 400),
155 | end = c(150, 250, 350, 450))
156 |
157 | x2 <- tibble(id = 1:4,
158 | chr = c("chr1", "chr1", "chr1", "chr2"),
159 | start = c(220, 210, 300, 400),
160 | end = c(225, 240, 320, 415))
161 | fuzzyjoin::genome_join(x1, x2, by=c("chr", "start", "end"), mode="inner")
162 |
163 | fuzzyjoin::genome_join(x1, x2, by=c("chr", "start", "end"), mode="left")
164 |
165 | fuzzyjoin::genome_join(x1, x2, by=c("chr", "start", "end"), mode="anti")
166 | ```
167 |
168 |
169 |
170 |
--------------------------------------------------------------------------------
/vignettes/resources/genome_cluster_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/vignettes/resources/genome_cluster_docu.png
--------------------------------------------------------------------------------
/vignettes/resources/genome_complement_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/vignettes/resources/genome_complement_docu.png
--------------------------------------------------------------------------------
/vignettes/resources/genome_intersect_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/vignettes/resources/genome_intersect_docu.png
--------------------------------------------------------------------------------
/vignettes/resources/genome_join_closest_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/vignettes/resources/genome_join_closest_docu.png
--------------------------------------------------------------------------------
/vignettes/resources/genome_join_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/vignettes/resources/genome_join_docu.png
--------------------------------------------------------------------------------
/vignettes/resources/genome_subtract_docu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/const-ae/tidygenomics/23737e99f7ff9893f485e2b6b48c1d15c13a5623/vignettes/resources/genome_subtract_docu.png
--------------------------------------------------------------------------------