├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── FUN_distances.R ├── RcppExports.R ├── correlation_distances.R ├── distance_functions.r ├── farthest_point_sampling.R ├── is_metric.R ├── product_metric.R └── rdist-package.r ├── README.md ├── docs ├── authors.html ├── docsearch.css ├── index.html ├── jquery.sticky-kit.min.js ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml └── reference │ ├── farthest_point_sampling.html │ ├── index.html │ ├── is_metric.html │ ├── product_metric.html │ └── rdist.html ├── man ├── farthest_point_sampling.Rd ├── is_metric.Rd ├── product_metric.Rd └── rdist.Rd ├── src ├── .gitignore ├── Makevars ├── Makevars.win ├── RcppExports.cpp ├── canberra.cpp ├── canberra.h ├── dist.cpp ├── dist.h ├── euclidean.cpp ├── farthest_point_sampling.cpp ├── hamming.cpp ├── hamming.h ├── init.c ├── jaccard.cpp ├── jaccard.h ├── manhattan.cpp ├── manhattan.h ├── maximum.cpp ├── maximum.h ├── minkowski.cpp ├── minkowski.h └── triangle.cpp └── tests ├── testthat.R └── testthat ├── test-canberra.R ├── test-correlation.R ├── test-euclidean.R ├── test-hamming.R ├── test-jaccard.R ├── test-manhattan.R ├── test-maximum.R ├── test-minkowski.R └── test-user-defined.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | docs 2 | ^\.travis\.yml$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | sudo: false 5 | cache: packages 6 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: rdist 2 | Title: Calculate Pairwise Distances 3 | Version: 0.0.6 4 | Authors@R: person("Nello", "Blaser", email = "nello.blaser@uib.no", role = c("aut", "cre")) 5 | Description: A common framework for calculating distance matrices. 6 | Depends: 7 | R (>= 3.2.2) 8 | License: GPL 9 | URL: https://github.com/blasern/rdist 10 | BugReports: https://github.com/blasern/rdist/issues 11 | Encoding: UTF-8 12 | LinkingTo: Rcpp, RcppArmadillo 13 | Imports: 14 | Rcpp, methods 15 | RoxygenNote: 7.2.3 16 | Suggests: testthat 17 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(cdist) 4 | export(farthest_point_sampling) 5 | export(is_distance_matrix) 6 | export(pdist) 7 | export(product_metric) 8 | export(rdist) 9 | export(triangle_inequality) 10 | importFrom(Rcpp,sourceCpp) 11 | importFrom(methods,is) 12 | importFrom(stats,as.dist) 13 | importFrom(stats,cor) 14 | useDynLib(rdist, .registration = TRUE) 15 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # rdist 0.0.6 2 | - Minor fix for CRAN 3 | 4 | # rdist 0.0.5 5 | - Minor fix for CRAN 6 | 7 | # rdist 0.0.4 8 | - Some bugfixes to make CRAN compatible 9 | 10 | # rdist 0.0.3 11 | - Added documentation pages 12 | - Added `product_metric` 13 | - Added `farthest_point_sampling` 14 | - Added `is_distance_matrix` and `triangle_inequality` checks 15 | 16 | # rdist 0.0.2 17 | - Fixed bug in jaccard distance 18 | - Fixed bug in euclidean distance 19 | - Added labels to rdist output 20 | 21 | # rdist 0.0.1 22 | An R package to calculate distances. This provide a common framework to calculate distances. 23 | There are three main functions: 24 | - `rdist` computes the pairwise distances between observations in one matrix and returns a `dist` object, 25 | - `pdist` computes the pairwise distances between observations in one matrix and returns a `matrix`, and 26 | - `cdist` computes the distances between observations in two matrices and returns a `matrix`. 27 | 28 | All functions have an argument `metric` that can be used to specify the distance function. Available metrics are `"euclidean"`, `"minkowski"`, `"manhattan"`, `"maximum"`, `"canberra"`, `"angular"`, `"correlation"`, `"absolute_correlation"`, `"hamming"`, and `"jaccard"`. 29 | All functions will return NA or NaN when one of the compared vectors contains NAs. 30 | 31 | ## Installation 32 | 33 | To install the latest released version from CRAN: 34 | 35 | install.packages("rdist") 36 | 37 | To install the latest development version from github: 38 | 39 | install.packages("devtools") 40 | devtools::install_github("blasern/rdist") 41 | 42 | ## Development 43 | 44 | If you find issues, please [let me know](https://github.com/blasern/rdist/issues). 45 | If you would like to contribute, please [create a pull request](https://github.com/blasern/rdist/compare). 46 | -------------------------------------------------------------------------------- /R/FUN_distances.R: -------------------------------------------------------------------------------- 1 | # distances with user-defined functions 2 | FUN_rdist <- function(X, metric = function(x, y) sqrt(sum((x-y)^2))){ 3 | as.dist(FUN_pdist(X, metric = metric)) 4 | } 5 | 6 | FUN_pdist <- function(X, metric = function(x, y) sqrt(sum((x-y)^2))){ 7 | res <- apply(X, 1, function(x){ 8 | apply(X, 1, function(y){ 9 | metric(x, y) 10 | }) 11 | }) 12 | # fixing dimensions 13 | dim(res) <- rep(nrow(X), 2) 14 | res 15 | } 16 | 17 | FUN_cdist <- function(X, Y, metric = function(x, y) sqrt(sum((x-y)^2))){ 18 | res <- apply(Y, 1, function(y){ 19 | apply(X, 1, function(x){ 20 | metric(x, y) 21 | }) 22 | }) 23 | # fixing dimensions 24 | dim(res) <- c(nrow(X), nrow(Y)) 25 | res 26 | } 27 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | canberra_rdist <- function(A) { 5 | .Call(`_rdist_canberra_rdist`, A) 6 | } 7 | 8 | canberra_pdist <- function(A) { 9 | .Call(`_rdist_canberra_pdist`, A) 10 | } 11 | 12 | canberra_cdist <- function(A, B) { 13 | .Call(`_rdist_canberra_cdist`, A, B) 14 | } 15 | 16 | rdist_cpp <- function(A, metric, p = 2.0) { 17 | .Call(`_rdist_rdist_cpp`, A, metric, p) 18 | } 19 | 20 | pdist_cpp <- function(A, metric, p = 2.0) { 21 | .Call(`_rdist_pdist_cpp`, A, metric, p) 22 | } 23 | 24 | cdist_cpp <- function(A, B, metric, p = 2.0) { 25 | .Call(`_rdist_cdist_cpp`, A, B, metric, p) 26 | } 27 | 28 | euclidean_rdist <- function(A) { 29 | .Call(`_rdist_euclidean_rdist`, A) 30 | } 31 | 32 | euclidean_pdist <- function(A) { 33 | .Call(`_rdist_euclidean_pdist`, A) 34 | } 35 | 36 | euclidean_cdist <- function(A, B) { 37 | .Call(`_rdist_euclidean_cdist`, A, B) 38 | } 39 | 40 | farthest_point_sampling_cpp <- function(mat, metric, k, initial_point_index = 0L, return_clusters = FALSE) { 41 | .Call(`_rdist_farthest_point_sampling_cpp`, mat, metric, k, initial_point_index, return_clusters) 42 | } 43 | 44 | hamming_rdist <- function(A) { 45 | .Call(`_rdist_hamming_rdist`, A) 46 | } 47 | 48 | hamming_pdist <- function(A) { 49 | .Call(`_rdist_hamming_pdist`, A) 50 | } 51 | 52 | hamming_cdist <- function(A, B) { 53 | .Call(`_rdist_hamming_cdist`, A, B) 54 | } 55 | 56 | jaccard_rdist <- function(A) { 57 | .Call(`_rdist_jaccard_rdist`, A) 58 | } 59 | 60 | jaccard_pdist <- function(A) { 61 | .Call(`_rdist_jaccard_pdist`, A) 62 | } 63 | 64 | jaccard_cdist <- function(A, B) { 65 | .Call(`_rdist_jaccard_cdist`, A, B) 66 | } 67 | 68 | manhattan_rdist <- function(A) { 69 | .Call(`_rdist_manhattan_rdist`, A) 70 | } 71 | 72 | manhattan_pdist <- function(A) { 73 | .Call(`_rdist_manhattan_pdist`, A) 74 | } 75 | 76 | manhattan_cdist <- function(A, B) { 77 | .Call(`_rdist_manhattan_cdist`, A, B) 78 | } 79 | 80 | maximum_rdist <- function(A) { 81 | .Call(`_rdist_maximum_rdist`, A) 82 | } 83 | 84 | maximum_pdist <- function(A) { 85 | .Call(`_rdist_maximum_pdist`, A) 86 | } 87 | 88 | maximum_cdist <- function(A, B) { 89 | .Call(`_rdist_maximum_cdist`, A, B) 90 | } 91 | 92 | minkowski_rdist <- function(A, p) { 93 | .Call(`_rdist_minkowski_rdist`, A, p) 94 | } 95 | 96 | minkowski_pdist <- function(A, p) { 97 | .Call(`_rdist_minkowski_pdist`, A, p) 98 | } 99 | 100 | minkowski_cdist <- function(A, B, p) { 101 | .Call(`_rdist_minkowski_cdist`, A, B, p) 102 | } 103 | 104 | cpp_triangle_inequality <- function(mat, tolerance = 0) { 105 | .Call(`_rdist_cpp_triangle_inequality`, mat, tolerance) 106 | } 107 | 108 | -------------------------------------------------------------------------------- /R/correlation_distances.R: -------------------------------------------------------------------------------- 1 | bound <- function(x, min = -1, max = 1){ 2 | x[x > max] <- max 3 | x[x < min] <- min 4 | x 5 | } 6 | 7 | # correlation distances 8 | correlation_rdist <- function(X){ 9 | stats::as.dist(sqrt((1 - bound(stats::cor(t(X))))/2)) 10 | } 11 | correlation_pdist <- function(X){ 12 | sqrt((1 - bound(stats::cor(t(X))))/2) 13 | } 14 | correlation_cdist <- function(X, Y){ 15 | sqrt((1 - bound(stats::cor(t(X), t(Y))))/2) 16 | } 17 | 18 | # angular distances 19 | angular_rdist <- function(X){ 20 | stats::as.dist(acos(bound(stats::cor(t(X))))) 21 | } 22 | angular_pdist <- function(X){ 23 | acos(bound(stats::cor(t(X)))) 24 | } 25 | angular_cdist <- function(X, Y){ 26 | acos(bound(stats::cor(t(X), t(Y)))) 27 | } 28 | 29 | # absolute correlation distances 30 | absolute_correlation_rdist <- function(X){ 31 | stats::as.dist(sqrt(1 - bound(stats::cor(t(X))) ^ 2)) 32 | } 33 | absolute_correlation_pdist <- function(X){ 34 | sqrt(1 - bound(stats::cor(t(X))) ^ 2) 35 | } 36 | absolute_correlation_cdist <- function(X, Y){ 37 | sqrt(1 - bound(stats::cor(t(X), t(Y))) ^ 2) 38 | } -------------------------------------------------------------------------------- /R/distance_functions.r: -------------------------------------------------------------------------------- 1 | available_metrics <- c("euclidean", "minkowski", "manhattan", 2 | "chebyshev", "maximum", "canberra", 3 | "angular", "correlation", "absolute_correlation", 4 | "hamming", "jaccard", "user") 5 | 6 | #' @rdname rdist 7 | #' @export 8 | rdist <- function(X, 9 | metric = "euclidean", 10 | p = 2L){ 11 | if (is.function(metric)) { 12 | FUN <- metric 13 | metric <- "user" 14 | } 15 | # make sure input is well-defined 16 | metric <- match.arg(metric, available_metrics) 17 | X <- as.matrix(X) 18 | # use metric 19 | ans <- switch(metric, 20 | "euclidean" = minkowski_rdist(X, p = 2L), 21 | "minkowski" = minkowski_rdist(X, p = p), 22 | "manhattan" = manhattan_rdist(X), 23 | "chebyshev" = maximum_rdist(X), 24 | "maximum" = maximum_rdist(X), 25 | "canberra" = canberra_rdist(X), 26 | "angular" = angular_rdist(X), 27 | "correlation" = correlation_rdist(X), 28 | "absolute_correlation" = absolute_correlation_rdist(X), 29 | "hamming" = hamming_rdist(X), 30 | "jaccard" = jaccard_rdist(X), 31 | "user" = FUN_rdist(X, metric = FUN)) 32 | # change attributes 33 | attributes(ans) <- NULL 34 | attr(ans, "Size") <- nrow(X) 35 | attr(ans, "call") <- match.call() 36 | attr(ans, "method") <- metric 37 | attr(ans, "Labels") <- dimnames(X)[[1L]] 38 | class(ans) <- "dist" 39 | return(ans) 40 | } 41 | 42 | #' @rdname rdist 43 | #' @export 44 | pdist <- function(X, 45 | metric = "euclidean", 46 | p = 2){ 47 | if (is.function(metric)) { 48 | FUN <- metric 49 | metric <- "user" 50 | } 51 | # make sure input is well-defined 52 | metric <- match.arg(metric, available_metrics) 53 | X <- as.matrix(X) 54 | # use metric 55 | switch(metric, 56 | "euclidean" = minkowski_pdist(X, p = 2L), 57 | "minkowski" = minkowski_pdist(X, p = p), 58 | "manhattan" = manhattan_pdist(X), 59 | "chebyshev" = maximum_pdist(X), 60 | "maximum" = maximum_pdist(X), 61 | "canberra" = canberra_pdist(X), 62 | "angular" = angular_pdist(X), 63 | "correlation" = correlation_pdist(X), 64 | "absolute_correlation" = absolute_correlation_pdist(X), 65 | "hamming" = hamming_pdist(X), 66 | "jaccard" = jaccard_pdist(X), 67 | "user" = FUN_pdist(X, metric = FUN)) 68 | } 69 | 70 | #' @rdname rdist 71 | #' @export 72 | cdist <- function(X, Y, 73 | metric = "euclidean", 74 | p = 2){ 75 | if (is.function(metric)) { 76 | FUN <- metric 77 | metric <- "user" 78 | } 79 | # make sure input is well-defined 80 | metric <- match.arg(metric, available_metrics) 81 | X <- as.matrix(X) 82 | Y <- as.matrix(Y) 83 | stopifnot(ncol(X) == ncol(Y)) 84 | # use metric 85 | switch(metric, 86 | "euclidean" = minkowski_cdist(X, Y, p = 2L), 87 | "minkowski" = minkowski_cdist(X, Y, p = p), 88 | "manhattan" = manhattan_cdist(X, Y), 89 | "chebyshev" = maximum_cdist(X, Y), 90 | "maximum" = maximum_cdist(X, Y), 91 | "canberra" = canberra_cdist(X, Y), 92 | "angular" = angular_cdist(X, Y), 93 | "correlation" = correlation_cdist(X, Y), 94 | "absolute_correlation" = absolute_correlation_cdist(X, Y), 95 | "hamming" = hamming_cdist(X, Y), 96 | "jaccard" = jaccard_cdist(X, Y), 97 | "user" = FUN_cdist(X, Y, metric = FUN)) 98 | } -------------------------------------------------------------------------------- /R/farthest_point_sampling.R: -------------------------------------------------------------------------------- 1 | #' Farthest point sampling 2 | #' 3 | #' Farthest point sampling returns a reordering of the metric 4 | #' space P = {p_1, ..., p_k}, such that each p_i is the farthest 5 | #' point from the first i-1 points. 6 | #' 7 | #' @param mat Original distance matrix 8 | #' @param metric Distance metric to use (either "precomputed" or a metric from \code{\link{rdist}}) 9 | #' @param k Number of points to sample 10 | #' @param initial_point_index Index of p_1 11 | #' @param return_clusters Should the indices of the closest farthest points be returned? 12 | #' 13 | #' @examples 14 | #' 15 | #' # generate data 16 | #' df <- matrix(runif(200), ncol = 2) 17 | #' dist_mat <- pdist(df) 18 | #' # farthest point sampling 19 | #' fps <- farthest_point_sampling(dist_mat) 20 | #' fps2 <- farthest_point_sampling(df, metric = "euclidean") 21 | #' all.equal(fps, fps2) 22 | #' # have a look at the fps distance matrix 23 | #' rdist(df[fps[1:5], ]) 24 | #' dist_mat[fps, fps][1:5, 1:5] 25 | #' @export 26 | farthest_point_sampling <- function(mat, 27 | metric = "precomputed", 28 | k = nrow(mat), 29 | initial_point_index = 1L, 30 | return_clusters = FALSE){ 31 | metric <- match.arg(metric, c("precomputed", available_metrics)) 32 | mat <- as.matrix(mat) 33 | initial_point_index <- as.integer(initial_point_index) 34 | k <- as.integer(k) 35 | # sanity check 36 | if (metric == "precomputed"){ 37 | stopifnot(nrow(mat) == ncol(mat)) 38 | } 39 | stopifnot(initial_point_index >= 0L) 40 | # farthest point sampling 41 | fps <- farthest_point_sampling_cpp(mat, metric, k, initial_point_index, return_clusters) 42 | if (return_clusters){ 43 | clusters <- as.integer(attr(fps, "clusters")) 44 | fps <- as.integer(fps) 45 | return(list(fps, clusters)) 46 | } 47 | else { 48 | fps <- as.integer(fps) 49 | return(fps) 50 | } 51 | } -------------------------------------------------------------------------------- /R/is_metric.R: -------------------------------------------------------------------------------- 1 | #' Metric and triangle inequality 2 | #' 3 | #' Does the distance matric come from a metric 4 | #' 5 | #' @param mat The matrix to evaluate 6 | #' @param tolerance Differences smaller than tolerance are not reported. 7 | #' @name is_metric 8 | #' @examples 9 | #' data <- matrix(rnorm(20), ncol = 2) 10 | #' dm <- pdist(data) 11 | #' is_distance_matrix(dm) 12 | #' triangle_inequality(dm) 13 | #' 14 | #' dm[1, 2] <- 1.1 * dm[1, 2] 15 | #' is_distance_matrix(dm) 16 | #' @export 17 | is_distance_matrix <- function(mat, tolerance = .Machine$double.eps ^ 0.5){ 18 | mat <- as.matrix(mat) 19 | msg <- character(0) 20 | # 21 | not_square <- nrow(mat) != ncol(mat) 22 | if (not_square){ 23 | msg <- c(msg, "Matrix is not square.") 24 | assymetric <- TRUE 25 | } 26 | else { 27 | assymetric <- !isTRUE(all.equal(mat, t(mat), tolerance = tolerance)) 28 | if (assymetric){ 29 | msg <- c(msg, "Matrix is not symmetric.") 30 | } 31 | } 32 | negative <- any(mat < 0 - tolerance) 33 | if (negative){ 34 | msg <- c(msg, "Matrix is not positive.") 35 | } 36 | identity <- !isTRUE(all.equal(diag(mat), rep(0, nrow(mat)))) 37 | if (identity){ 38 | msg <- c(msg, "Diagonal is not zero.") 39 | } 40 | if (!(not_square | assymetric | negative | identity)){ 41 | triangle <- triangle_inequality(mat, tolerance = tolerance) 42 | if (!triangle){ 43 | msg <- c(msg, "Matrix does not satisfy triangle inequality.") 44 | } 45 | } 46 | dm <- length(msg) == 0 47 | if (!dm){ 48 | cat(paste(msg, "\n", collapse = "")) 49 | } 50 | return(dm) 51 | } 52 | 53 | #' @rdname is_metric 54 | #' @export 55 | triangle_inequality <- function(mat, tolerance = .Machine$double.eps ^ 0.5){ 56 | mat <- as.matrix(mat) 57 | stopifnot(nrow(mat) == ncol(mat), all.equal(mat, t(mat), tolerance = tolerance)) 58 | cpp_triangle_inequality(mat, tolerance) 59 | } -------------------------------------------------------------------------------- /R/product_metric.R: -------------------------------------------------------------------------------- 1 | #' Product metric 2 | #' 3 | #' Returns the p-product metric of two metric spaces. 4 | #' Works for output of `rdist`, `pdist` or `cdist`. 5 | #' 6 | #' @param ... Distance matrices or dist objects 7 | #' @param p The power of the Minkowski distance 8 | #' 9 | #' @examples 10 | #' # generate data 11 | #' df <- matrix(runif(200), ncol = 2) 12 | #' # distance matrices 13 | #' dist_mat <- pdist(df) 14 | #' dist_1 <- pdist(df[, 1]) 15 | #' dist_2 <- pdist(df[, 2]) 16 | #' # product distance matrix 17 | #' dist_prod <- product_metric(dist_1, dist_2) 18 | #' # check equality 19 | #' all.equal(dist_mat, dist_prod) 20 | #' @importFrom methods is 21 | #' @export 22 | product_metric <- function(..., p = 2){ 23 | metrics <- list(...) 24 | # check for equal dimension 25 | stopifnot(length(unique(lapply(metrics, dim))) == 1, 26 | length(unique(lapply(metrics, length))) == 1) 27 | if (p < Inf){ 28 | p_metrics <- lapply(metrics, `^`, p) 29 | p_result <- Reduce(`+`, p_metrics) 30 | res <- p_result ^ (1/p) 31 | } 32 | else { 33 | res <- pmax(...) 34 | } 35 | if (methods::is(metrics[[1]], "dist")) { 36 | attr(res, "call") <- match.call() 37 | } 38 | return(res) 39 | } -------------------------------------------------------------------------------- /R/rdist-package.r: -------------------------------------------------------------------------------- 1 | #' rdist: an R package for distances 2 | #' 3 | #' \code{rdist} provide a common framework to calculate distances. There are three main functions: 4 | #' \itemize{ 5 | #' \item \code{rdist} computes the pairwise distances between observations in one matrix and returns a \code{dist} object, 6 | #' \item \code{pdist} computes the pairwise distances between observations in one matrix and returns a \code{matrix}, and 7 | #' \item \code{cdist} computes the distances between observations in two matrices and returns a \code{matrix}. 8 | #' } 9 | #' In particular the \code{cdist} function is often missing in other distance functions. All 10 | #' calculations involving \code{NA} values will consistently return \code{NA}. 11 | #' 12 | #' @details Available distance measures are (written for two vectors v and w): 13 | #' \itemize{ 14 | #' \item \code{"euclidean"}: \eqn{\sqrt{\sum_i(v_i - w_i)^2}}{sqrt(sum_i((v_i - w_i)^2))} 15 | #' \item \code{"minkowski"}: \eqn{(\sum_i|v_i - w_i|^p)^{1/p}}{(sum_i(|v_i - w_i|^p))^{1/p}} 16 | #' \item \code{"manhattan"}: \eqn{\sum_i(|v_i-w_i|)}{sum_i(|v_i-w_i|)} 17 | #' \item \code{"maximum"} or \code{"chebyshev"}: \eqn{\max_i(|v_i-w_i|)}{max_i(|v_i-w_i|)} 18 | #' \item \code{"canberra"}: \eqn{\sum_i(\frac{|v_i-w_i|}{|v_i|+|w_i|})}{sum_i(|v_i-w_i|/(|v_i|+|w_i|))} 19 | #' \item \code{"angular"}: \eqn{\cos^{-1}(cor(v, w))}{arccos(cor(v, w))} 20 | #' \item \code{"correlation"}: \eqn{\sqrt{\frac{1-cor(v, w)}{2}}}{sqrt((1-cor(v, w))/2)} 21 | #' \item \code{"absolute_correlation"}: \eqn{\sqrt{1-|cor(v, w)|^2}}{sqrt((1-|cor(v, w)|^2))} 22 | #' \item \code{"hamming"}: \eqn{(\sum_i v_i \neq w_i) / \sum_i 1}{sum_i(v_i != w_i)/sum_i(1)} 23 | #' \item \code{"jaccard"}: \eqn{(\sum_i v_i \neq w_i) / \sum_i 1_{v_i \neq 0 \cup w_i \neq 0}}{sum_i(v_i != w_i)/sum_i(v_i != 0 or w_i != 0)} 24 | #' \item Any function that defines a distance between two vectors. 25 | #' } 26 | #' @param X,Y A matrix 27 | #' @param metric The distance metric to use 28 | #' @param p The power of the Minkowski distance 29 | #' @name rdist 30 | #' @aliases rdist-package 31 | #' @docType package 32 | #' @useDynLib rdist, .registration = TRUE 33 | #' @importFrom Rcpp sourceCpp 34 | #' @importFrom stats as.dist cor 35 | NULL 36 | 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/blasern/rdist.svg?branch=master)](https://travis-ci.org/blasern/rdist) 2 | 3 | # rdist 4 | 5 | An R package to calculate distances. This provide a common framework to calculate distances. 6 | There are three main functions: 7 | - `rdist` computes the pairwise distances between observations in one matrix and returns a `dist` object, 8 | - `pdist` computes the pairwise distances between observations in one matrix and returns a `matrix`, and 9 | - `cdist` computes the distances between observations in two matrices and returns a `matrix`. 10 | 11 | All functions have an argument `metric` that can be used to specify the distance function. Available metrics are `"euclidean"`, `"minkowski"`, `"manhattan"`, `"maximum"`, `"canberra"`, `"angular"`, `"correlation"`, `"absolute_correlation"`, `"hamming"`, and `"jaccard"`. In addition the metric can be any function that takes two vectors as arguments and returns their distance. 12 | All predefined functions will return NA or NaN when one of the compared vectors contains NAs. 13 | 14 | ## Installation 15 | 16 | To install the latest released version from CRAN: 17 | 18 | install.packages("rdist") 19 | 20 | To install the latest development version from github: 21 | 22 | install.packages("devtools") 23 | devtools::install_github("blasern/rdist") 24 | 25 | ## Development 26 | 27 | If you find issues, please [let me know](https://github.com/blasern/rdist/issues). 28 | If you would like to contribute, please [create a pull request](https://github.com/blasern/rdist/compare). 29 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Authors • rdist 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 41 | 42 | 43 | 44 | 45 | 46 |
47 |
48 | 90 | 91 | 92 |
93 | 94 |
95 |
96 | 99 | 100 |
    101 |
  • 102 |

    Nello Blaser. Author, maintainer. 103 |

    104 |
  • 105 |
106 | 107 |
108 | 109 |
110 | 111 | 112 | 122 |
123 | 124 | 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /docs/docsearch.css: -------------------------------------------------------------------------------- 1 | /* Docsearch -------------------------------------------------------------- */ 2 | /* 3 | Source: https://github.com/algolia/docsearch/ 4 | License: MIT 5 | */ 6 | 7 | .algolia-autocomplete { 8 | display: block; 9 | -webkit-box-flex: 1; 10 | -ms-flex: 1; 11 | flex: 1 12 | } 13 | 14 | .algolia-autocomplete .ds-dropdown-menu { 15 | width: 100%; 16 | min-width: none; 17 | max-width: none; 18 | padding: .75rem 0; 19 | background-color: #fff; 20 | background-clip: padding-box; 21 | border: 1px solid rgba(0, 0, 0, .1); 22 | box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); 23 | } 24 | 25 | @media (min-width:768px) { 26 | .algolia-autocomplete .ds-dropdown-menu { 27 | width: 175% 28 | } 29 | } 30 | 31 | .algolia-autocomplete .ds-dropdown-menu::before { 32 | display: none 33 | } 34 | 35 | .algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { 36 | padding: 0; 37 | background-color: rgb(255,255,255); 38 | border: 0; 39 | max-height: 80vh; 40 | } 41 | 42 | .algolia-autocomplete .ds-dropdown-menu .ds-suggestions { 43 | margin-top: 0 44 | } 45 | 46 | .algolia-autocomplete .algolia-docsearch-suggestion { 47 | padding: 0; 48 | overflow: visible 49 | } 50 | 51 | .algolia-autocomplete .algolia-docsearch-suggestion--category-header { 52 | padding: .125rem 1rem; 53 | margin-top: 0; 54 | font-size: 1.3em; 55 | font-weight: 500; 56 | color: #00008B; 57 | border-bottom: 0 58 | } 59 | 60 | .algolia-autocomplete .algolia-docsearch-suggestion--wrapper { 61 | float: none; 62 | padding-top: 0 63 | } 64 | 65 | .algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { 66 | float: none; 67 | width: auto; 68 | padding: 0; 69 | text-align: left 70 | } 71 | 72 | .algolia-autocomplete .algolia-docsearch-suggestion--content { 73 | float: none; 74 | width: auto; 75 | padding: 0 76 | } 77 | 78 | .algolia-autocomplete .algolia-docsearch-suggestion--content::before { 79 | display: none 80 | } 81 | 82 | .algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { 83 | padding-top: .75rem; 84 | margin-top: .75rem; 85 | border-top: 1px solid rgba(0, 0, 0, .1) 86 | } 87 | 88 | .algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { 89 | display: block; 90 | padding: .1rem 1rem; 91 | margin-bottom: 0.1; 92 | font-size: 1.0em; 93 | font-weight: 400 94 | /* display: none */ 95 | } 96 | 97 | .algolia-autocomplete .algolia-docsearch-suggestion--title { 98 | display: block; 99 | padding: .25rem 1rem; 100 | margin-bottom: 0; 101 | font-size: 0.9em; 102 | font-weight: 400 103 | } 104 | 105 | .algolia-autocomplete .algolia-docsearch-suggestion--text { 106 | padding: 0 1rem .5rem; 107 | margin-top: -.25rem; 108 | font-size: 0.8em; 109 | font-weight: 400; 110 | line-height: 1.25 111 | } 112 | 113 | .algolia-autocomplete .algolia-docsearch-footer { 114 | float: none; 115 | width: auto; 116 | height: auto; 117 | padding: .75rem 1rem 0; 118 | font-size: .95rem; 119 | line-height: 1; 120 | color: #767676; 121 | background-color: rgb(255, 255, 255); 122 | border-top: 1px solid rgba(0, 0, 0, .1) 123 | } 124 | 125 | .algolia-autocomplete .algolia-docsearch-footer--logo { 126 | display: inline; 127 | overflow: visible; 128 | color: inherit; 129 | text-indent: 0; 130 | background: 0 0 131 | } 132 | 133 | .algolia-autocomplete .algolia-docsearch-suggestion--highlight { 134 | color: #FF8C00; 135 | background: rgba(232, 189, 54, 0.1) 136 | } 137 | 138 | 139 | .algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { 140 | box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) 141 | } 142 | 143 | .algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { 144 | background-color: rgba(192, 192, 192, .15) 145 | } 146 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Calculate Pairwise Distances • rdist 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 21 |
22 |
64 | 65 | 66 | 67 |
68 |
69 |
70 | 72 |

An R package to calculate distances. This provide a common framework to calculate distances.
73 | There are three main functions:

74 |
    75 |
  • 76 | rdist computes the pairwise distances between observations in one matrix and returns a dist object,
  • 77 |
  • 78 | pdist computes the pairwise distances between observations in one matrix and returns a matrix, and
  • 79 |
  • 80 | cdist computes the distances between observations in two matrices and returns a matrix.
  • 81 |
82 |

All functions have an argument metric that can be used to specify the distance function. Available metrics are "euclidean", "minkowski", "manhattan", "maximum", "canberra", "angular", "correlation", "absolute_correlation", "hamming", and "jaccard". In addition the metric can be any function that takes two vectors as arguments and returns their distance. All predefined functions will return NA or NaN when one of the compared vectors contains NAs.

83 |
84 |

85 | Installation

86 |

To install the latest released version from CRAN:

87 | 88 |

To install the latest development version from github:

89 | 91 |
92 |
93 |

94 | Development

95 |

If you find issues, please let me know. If you would like to contribute, please create a pull request.

96 |
97 |
98 |
99 | 100 | 126 |
127 | 128 | 129 | 138 |
139 | 140 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /docs/jquery.sticky-kit.min.js: -------------------------------------------------------------------------------- 1 | /* Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | */ 2 | /* 3 | Source: https://github.com/leafo/sticky-kit 4 | License: MIT 5 | */ 6 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k)); 7 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
"))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q, 8 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}), 10 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize", 11 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/news/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Changelog • rdist 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 41 | 42 | 43 | 44 | 45 | 46 |
47 |
48 | 90 | 91 | 92 |
93 | 94 |
95 |
96 | 100 | 101 |
102 |

103 | rdist 0.0.3 Unreleased 104 |

105 |
    106 |
  • Added documentation pages
  • 107 |
  • Added product_metric 108 |
  • 109 |
  • Added farthest_point_sampling 110 |
  • 111 |
  • Added is_distance_matrix and triangle_inequality checks
  • 112 |
113 |
114 |
115 |

116 | rdist 0.0.2 2017-05-12 117 |

118 |
    119 |
  • Fixed bug in jaccard distance
  • 120 |
  • Fixed bug in euclidean distance
  • 121 |
  • Added labels to rdist output
  • 122 |
123 |
124 |
125 |

126 | rdist 0.0.1 2017-03-05 127 |

128 |

An R package to calculate distances. This provide a common framework to calculate distances.
129 | There are three main functions:

130 |
    131 |
  • 132 | rdist computes the pairwise distances between observations in one matrix and returns a dist object,
  • 133 |
  • 134 | pdist computes the pairwise distances between observations in one matrix and returns a matrix, and
  • 135 |
  • 136 | cdist computes the distances between observations in two matrices and returns a matrix.
  • 137 |
138 |

All functions have an argument metric that can be used to specify the distance function. Available metrics are "euclidean", "minkowski", "manhattan", "maximum", "canberra", "angular", "correlation", "absolute_correlation", "hamming", and "jaccard". All functions will return NA or NaN when one of the compared vectors contains NAs.

139 |
140 |

141 | Installation

142 |

To install the latest released version from CRAN:

143 | 144 |

To install the latest development version from github:

145 | 147 |
148 |
149 |

150 | Development

151 |

If you find issues, please let me know. If you would like to contribute, please create a pull request.

152 |
153 |
154 |
155 | 156 | 166 | 167 |
168 | 169 |
170 | 173 | 174 |
175 |

Site built with pkgdown.

176 |
177 | 178 |
179 |
180 | 181 | 182 | 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticky footer */ 2 | 3 | /** 4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ 5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css 6 | * 7 | * .Site -> body > .container 8 | * .Site-content -> body > .container .row 9 | * .footer -> footer 10 | * 11 | * Key idea seems to be to ensure that .container and __all its parents__ 12 | * have height set to 100% 13 | * 14 | */ 15 | 16 | html, body { 17 | height: 100%; 18 | } 19 | 20 | body > .container { 21 | display: flex; 22 | height: 100%; 23 | flex-direction: column; 24 | 25 | padding-top: 60px; 26 | } 27 | 28 | body > .container .row { 29 | flex: 1 0 auto; 30 | } 31 | 32 | footer { 33 | margin-top: 45px; 34 | padding: 35px 0 36px; 35 | border-top: 1px solid #e5e5e5; 36 | color: #666; 37 | display: flex; 38 | flex-shrink: 0; 39 | } 40 | footer p { 41 | margin-bottom: 0; 42 | } 43 | footer div { 44 | flex: 1; 45 | } 46 | footer .pkgdown { 47 | text-align: right; 48 | } 49 | footer p { 50 | margin-bottom: 0; 51 | } 52 | 53 | img.icon { 54 | float: right; 55 | } 56 | 57 | img { 58 | max-width: 100%; 59 | } 60 | 61 | /* Typographic tweaking ---------------------------------*/ 62 | 63 | .contents h1.page-header { 64 | margin-top: calc(-60px + 1em); 65 | } 66 | 67 | /* Section anchors ---------------------------------*/ 68 | 69 | a.anchor { 70 | margin-left: -30px; 71 | display:inline-block; 72 | width: 30px; 73 | height: 30px; 74 | visibility: hidden; 75 | 76 | background-image: url(./link.svg); 77 | background-repeat: no-repeat; 78 | background-size: 20px 20px; 79 | background-position: center center; 80 | } 81 | 82 | .hasAnchor:hover a.anchor { 83 | visibility: visible; 84 | } 85 | 86 | @media (max-width: 767px) { 87 | .hasAnchor:hover a.anchor { 88 | visibility: hidden; 89 | } 90 | } 91 | 92 | 93 | /* Fixes for fixed navbar --------------------------*/ 94 | 95 | .contents h1, .contents h2, .contents h3, .contents h4 { 96 | padding-top: 60px; 97 | margin-top: -40px; 98 | } 99 | 100 | /* Static header placement on mobile devices */ 101 | @media (max-width: 767px) { 102 | .navbar-fixed-top { 103 | position: absolute; 104 | } 105 | .navbar { 106 | padding: 0; 107 | } 108 | } 109 | 110 | 111 | /* Sidebar --------------------------*/ 112 | 113 | #sidebar { 114 | margin-top: 30px; 115 | } 116 | #sidebar h2 { 117 | font-size: 1.5em; 118 | margin-top: 1em; 119 | } 120 | 121 | #sidebar h2:first-child { 122 | margin-top: 0; 123 | } 124 | 125 | #sidebar .list-unstyled li { 126 | margin-bottom: 0.5em; 127 | } 128 | 129 | .orcid { 130 | height: 16px; 131 | vertical-align: middle; 132 | } 133 | 134 | /* Reference index & topics ----------------------------------------------- */ 135 | 136 | .ref-index th {font-weight: normal;} 137 | 138 | .ref-index td {vertical-align: top;} 139 | .ref-index .alias {width: 40%;} 140 | .ref-index .title {width: 60%;} 141 | 142 | .ref-index .alias {width: 40%;} 143 | .ref-index .title {width: 60%;} 144 | 145 | .ref-arguments th {text-align: right; padding-right: 10px;} 146 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 147 | .ref-arguments .name {width: 20%;} 148 | .ref-arguments .desc {width: 80%;} 149 | 150 | /* Nice scrolling for wide elements --------------------------------------- */ 151 | 152 | table { 153 | display: block; 154 | overflow: auto; 155 | } 156 | 157 | /* Syntax highlighting ---------------------------------------------------- */ 158 | 159 | pre { 160 | word-wrap: normal; 161 | word-break: normal; 162 | border: 1px solid #eee; 163 | } 164 | 165 | pre, code { 166 | background-color: #f8f8f8; 167 | color: #333; 168 | } 169 | 170 | pre code { 171 | overflow: auto; 172 | word-wrap: normal; 173 | white-space: pre; 174 | } 175 | 176 | pre .img { 177 | margin: 5px 0; 178 | } 179 | 180 | pre .img img { 181 | background-color: #fff; 182 | display: block; 183 | height: auto; 184 | } 185 | 186 | code a, pre a { 187 | color: #375f84; 188 | } 189 | 190 | a.sourceLine:hover { 191 | text-decoration: none; 192 | } 193 | 194 | .fl {color: #1514b5;} 195 | .fu {color: #000000;} /* function */ 196 | .ch,.st {color: #036a07;} /* string */ 197 | .kw {color: #264D66;} /* keyword */ 198 | .co {color: #888888;} /* comment */ 199 | 200 | .message { color: black; font-weight: bolder;} 201 | .error { color: orange; font-weight: bolder;} 202 | .warning { color: #6A0366; font-weight: bolder;} 203 | 204 | /* Clipboard --------------------------*/ 205 | 206 | .hasCopyButton { 207 | position: relative; 208 | } 209 | 210 | .btn-copy-ex { 211 | position: absolute; 212 | right: 0; 213 | top: 0; 214 | visibility: hidden; 215 | } 216 | 217 | .hasCopyButton:hover button.btn-copy-ex { 218 | visibility: visible; 219 | } 220 | 221 | /* mark.js ----------------------------*/ 222 | 223 | mark { 224 | background-color: rgba(255, 255, 51, 0.5); 225 | border-bottom: 2px solid rgba(255, 153, 51, 0.3); 226 | padding: 1px; 227 | } 228 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | $("#sidebar") 4 | .stick_in_parent({offset_top: 40}) 5 | .on('sticky_kit:bottom', function(e) { 6 | $(this).parent().css('position', 'static'); 7 | }) 8 | .on('sticky_kit:unbottom', function(e) { 9 | $(this).parent().css('position', 'relative'); 10 | }); 11 | 12 | $('body').scrollspy({ 13 | target: '#sidebar', 14 | offset: 60 15 | }); 16 | 17 | $('[data-toggle="tooltip"]').tooltip(); 18 | 19 | var cur_path = paths(location.pathname); 20 | $("#navbar ul li a").each(function(index, value) { 21 | if (value.text == "Home") 22 | return; 23 | if (value.getAttribute("href") === "#") 24 | return; 25 | 26 | var path = paths(value.pathname); 27 | if (is_prefix(cur_path, path)) { 28 | // Add class to parent
  • , and enclosing
  • if in dropdown 29 | var menu_anchor = $(value); 30 | menu_anchor.parent().addClass("active"); 31 | menu_anchor.closest("li.dropdown").addClass("active"); 32 | } 33 | }); 34 | }); 35 | 36 | $(document).ready(function() { 37 | // do keyword highlighting 38 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 39 | var mark = function() { 40 | 41 | var referrer = document.URL ; 42 | var paramKey = "q" ; 43 | 44 | if (referrer.indexOf("?") !== -1) { 45 | var qs = referrer.substr(referrer.indexOf('?') + 1); 46 | var qs_noanchor = qs.split('#')[0]; 47 | var qsa = qs_noanchor.split('&'); 48 | var keyword = ""; 49 | 50 | for (var i = 0; i < qsa.length; i++) { 51 | var currentParam = qsa[i].split('='); 52 | 53 | if (currentParam.length !== 2) { 54 | continue; 55 | } 56 | 57 | if (currentParam[0] == paramKey) { 58 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 59 | } 60 | } 61 | 62 | if (keyword !== "") { 63 | $(".contents").unmark({ 64 | done: function() { 65 | $(".contents").mark(keyword); 66 | } 67 | }); 68 | } 69 | } 70 | }; 71 | 72 | mark(); 73 | }); 74 | 75 | function paths(pathname) { 76 | var pieces = pathname.split("/"); 77 | pieces.shift(); // always starts with / 78 | 79 | var end = pieces[pieces.length - 1]; 80 | if (end === "index.html" || end === "") 81 | pieces.pop(); 82 | return(pieces); 83 | } 84 | 85 | function is_prefix(needle, haystack) { 86 | if (needle.length > haystack.lengh) 87 | return(false); 88 | 89 | // Special case for length-0 haystack, since for loop won't run 90 | if (haystack.length === 0) { 91 | return(needle.length === 0); 92 | } 93 | 94 | for (var i = 0; i < haystack.length; i++) { 95 | if (needle[i] != haystack[i]) 96 | return(false); 97 | } 98 | 99 | return(true); 100 | } 101 | 102 | /* Clipboard --------------------------*/ 103 | 104 | function changeTooltipMessage(element, msg) { 105 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 106 | element.setAttribute('data-original-title', msg); 107 | $(element).tooltip('show'); 108 | element.setAttribute('data-original-title', tooltipOriginalTitle); 109 | } 110 | 111 | if(Clipboard.isSupported()) { 112 | $(document).ready(function() { 113 | var copyButton = ""; 114 | 115 | $(".examples").addClass("hasCopyButton"); 116 | 117 | // Insert copy buttons: 118 | $(copyButton).prependTo(".hasCopyButton"); 119 | 120 | // Initialize tooltips: 121 | $('.btn-copy-ex').tooltip({container: 'body'}); 122 | 123 | // Initialize clipboard: 124 | var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', { 125 | text: function(trigger) { 126 | return trigger.parentNode.textContent; 127 | } 128 | }); 129 | 130 | clipboardBtnCopies.on('success', function(e) { 131 | changeTooltipMessage(e.trigger, 'Copied!'); 132 | e.clearSelection(); 133 | }); 134 | 135 | clipboardBtnCopies.on('error', function() { 136 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 137 | }); 138 | }); 139 | } 140 | 141 | /* Search term highlighting ------------------------------*/ 142 | 143 | function matchedWords(hit) { 144 | var words = []; 145 | 146 | var hierarchy = hit._highlightResult.hierarchy; 147 | // loop to fetch from lvl0, lvl1, etc. 148 | for (var idx in hierarchy) { 149 | words = words.concat(hierarchy[idx].matchedWords); 150 | } 151 | 152 | var content = hit._highlightResult.content; 153 | if (content) { 154 | words = words.concat(content.matchedWords); 155 | } 156 | 157 | // return unique words 158 | var words_uniq = [...new Set(words)]; 159 | return words_uniq; 160 | } 161 | 162 | function updateHitURL(hit) { 163 | 164 | var words = matchedWords(hit); 165 | var url = ""; 166 | 167 | if (hit.anchor) { 168 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 169 | } else { 170 | url = hit.url + '?q=' + escape(words.join(" ")); 171 | } 172 | 173 | return url; 174 | } 175 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.0.6 2 | pkgdown: 1.0.0 3 | pkgdown_sha: ~ 4 | articles: [] 5 | 6 | -------------------------------------------------------------------------------- /docs/reference/farthest_point_sampling.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Farthest point sampling — farthest_point_sampling • rdist 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 46 | 47 | 48 | 49 | 50 | 51 |
    52 |
    53 | 95 | 96 | 97 |
    98 | 99 |
    100 |
    101 | 106 | 107 |
    108 | 109 |

    Farthest point sampling returns a reordering of the metric 110 | space P = p_1, ..., p_k, such that each p_i is the farthest 111 | point from the first i-1 points.

    112 | 113 |
    114 | 115 |
    farthest_point_sampling(mat, metric = "precomputed", k = nrow(mat),
    116 |   initial_point_index = 1L, return_clusters = FALSE)
    117 | 118 |

    Arguments

    119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 |
    mat

    Original distance matrix

    metric

    Distance metric to use (either "precomputed" or a metric from rdist)

    k

    Number of points to sample

    initial_point_index

    Index of p_1

    return_clusters

    Should the indices of the closest farthest points be returned?

    142 | 143 | 144 |

    Examples

    145 |
    146 | # generate data 147 | df <- matrix(runif(200), ncol = 2) 148 | dist_mat <- pdist(df) 149 | # farthest point sampling 150 | fps <- farthest_point_sampling(dist_mat) 151 | fps2 <- farthest_point_sampling(df, metric = "euclidean") 152 | all.equal(fps, fps2)
    #> [1] TRUE
    # have a look at the fps distance matrix 153 | rdist(df[fps[1:5], ])
    #> 1 2 3 4 154 | #> 2 1.0924360 155 | #> 3 0.9280582 0.8499785 156 | #> 4 0.8029144 0.8977409 1.3558406 157 | #> 5 0.5409166 0.5515509 0.7071422 0.6487099
    dist_mat[fps, fps][1:5, 1:5]
    #> [,1] [,2] [,3] [,4] [,5] 158 | #> [1,] 0.0000000 1.0924360 0.9280582 0.8029144 0.5409166 159 | #> [2,] 1.0924360 0.0000000 0.8499785 0.8977409 0.5515509 160 | #> [3,] 0.9280582 0.8499785 0.0000000 1.3558406 0.7071422 161 | #> [4,] 0.8029144 0.8977409 1.3558406 0.0000000 0.6487099 162 | #> [5,] 0.5409166 0.5515509 0.7071422 0.6487099 0.0000000
    163 |
    164 | 173 |
    174 | 175 |
    176 | 179 | 180 |
    181 |

    Site built with pkgdown.

    182 |
    183 | 184 |
    185 |
    186 | 187 | 188 | 189 | 190 | 191 | 192 | -------------------------------------------------------------------------------- /docs/reference/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Function reference • rdist 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 41 | 42 | 43 | 44 | 45 | 46 |
    47 |
    48 | 90 | 91 | 92 |
    93 | 94 |
    95 |
    96 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 113 | 114 | 115 | 116 | 119 | 120 | 121 | 122 | 125 | 126 | 127 | 128 | 131 | 132 | 133 | 134 | 137 | 138 | 139 | 140 |
    110 |

    All functions

    111 |

    112 |
    117 |

    farthest_point_sampling()

    118 |

    Farthest point sampling

    123 |

    is_distance_matrix() triangle_inequality()

    124 |

    Metric and triangle inequality

    129 |

    product_metric()

    130 |

    Product metric

    135 |

    rdist() pdist() cdist()

    136 |

    rdist: an R package for distances

    141 |
    142 | 143 | 149 |
    150 | 151 |
    152 | 155 | 156 |
    157 |

    Site built with pkgdown.

    158 |
    159 | 160 |
    161 |
    162 | 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /docs/reference/is_metric.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Metric and triangle inequality — is_metric • rdist 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
    50 |
    51 | 93 | 94 | 95 |
    96 | 97 |
    98 |
    99 | 104 | 105 |
    106 | 107 |

    Does the distance matric come from a metric

    108 | 109 |
    110 | 111 |
    is_distance_matrix(mat, tolerance = .Machine$double.eps^0.5)
    112 | 
    113 | triangle_inequality(mat, tolerance = .Machine$double.eps^0.5)
    114 | 115 |

    Arguments

    116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 |
    mat

    The matrix to evaluate

    tolerance

    Differences smaller than tolerance are not reported.

    127 | 128 | 129 |

    Examples

    130 |
    data <- matrix(rnorm(20), ncol = 2) 131 | dm <- pdist(data) 132 | is_distance_matrix(dm)
    #> [1] TRUE
    triangle_inequality(dm)
    #> [1] TRUE
    133 | dm[1, 2] <- 1.1 * dm[1, 2] 134 | is_distance_matrix(dm)
    #> Matrix is not symmetric.
    #> [1] FALSE
    135 |
    136 | 145 |
    146 | 147 |
    148 | 151 | 152 |
    153 |

    Site built with pkgdown.

    154 |
    155 | 156 |
    157 |
    158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /docs/reference/product_metric.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Product metric — product_metric • rdist 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 45 | 46 | 47 | 48 | 49 | 50 |
    51 |
    52 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 105 | 106 |
    107 | 108 |

    Returns the p-product metric of two metric spaces. 109 | Works for output of `rdist`, `pdist` or `cdist`.

    110 | 111 |
    112 | 113 |
    product_metric(..., p = 2)
    114 | 115 |

    Arguments

    116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 |
    ...

    Distance matrices or dist objects

    p

    The power of the Minkowski distance

    127 | 128 | 129 |

    Examples

    130 |
    # generate data 131 | df <- matrix(runif(200), ncol = 2) 132 | # distance matrices 133 | dist_mat <- pdist(df) 134 | dist_1 <- pdist(df[, 1]) 135 | dist_2 <- pdist(df[, 2]) 136 | # product distance matrix 137 | dist_prod <- product_metric(dist_1, dist_2) 138 | # check equality 139 | all.equal(dist_mat, dist_prod)
    #> [1] TRUE
    140 |
    141 | 150 |
    151 | 152 |
    153 | 156 | 157 |
    158 |

    Site built with pkgdown.

    159 |
    160 | 161 |
    162 |
    163 | 164 | 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /docs/reference/rdist.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | rdist: an R package for distances — rdist • rdist 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 49 | 50 | 51 | 52 | 53 | 54 |
    55 |
    56 | 98 | 99 | 100 |
    101 | 102 |
    103 |
    104 | 109 | 110 |
    111 | 112 |

    rdist provide a common framework to calculate distances. There are three main functions:

      113 |
    • rdist computes the pairwise distances between observations in one matrix and returns a dist object,

    • 114 |
    • pdist computes the pairwise distances between observations in one matrix and returns a matrix, and

    • 115 |
    • cdist computes the distances between observations in two matrices and returns a matrix.

    • 116 |

    In particular the cdist function is often missing in other distance functions. All 117 | calculations involving NA values will consistently return NA.

    118 | 119 |
    120 | 121 |
    rdist(X, metric = "euclidean", p = 2L)
    122 | 
    123 | pdist(X, metric = "euclidean", p = 2)
    124 | 
    125 | cdist(X, Y, metric = "euclidean", p = 2)
    126 | 127 |

    Arguments

    128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 |
    X, Y

    A matrix

    metric

    The distance metric to use

    p

    The power of the Minkowski distance

    143 | 144 |

    Details

    145 | 146 |

    Available distance measures are (written for two vectors v and w):

      147 |
    • "euclidean": \(\sqrt{\sum_i(v_i - w_i)^2}\)

    • 148 |
    • "minkowski": \((\sum_i|v_i - w_i|^p)^{1/p}\)

    • 149 |
    • "manhattan": \(\sum_i(|v_i-w_i|)\)

    • 150 |
    • "maximum" or "chebyshev": \(\max_i(|v_i-w_i|)\)

    • 151 |
    • "canberra": \(\sum_i(\frac{|v_i-w_i|}{|v_i|+|w_i|})\)

    • 152 |
    • "angular": \(\cos^{-1}(cor(v, w))\)

    • 153 |
    • "correlation": \(\sqrt{\frac{1-cor(v, w)}{2}}\)

    • 154 |
    • "absolute_correlation": \(\sqrt{1-|cor(v, w)|^2}\)

    • 155 |
    • "hamming": \((\sum_i v_i \neq w_i) / \sum_i 1\)

    • 156 |
    • "jaccard": \((\sum_i v_i \neq w_i) / \sum_i 1_{v_i \neq 0 \cup w_i \neq 0}\)

    • 157 |
    • Any function that defines a distance between two vectors.

    • 158 |
    159 | 160 | 161 |
    162 | 171 |
    172 | 173 |
    174 | 177 | 178 |
    179 |

    Site built with pkgdown.

    180 |
    181 | 182 |
    183 |
    184 | 185 | 186 | 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /man/farthest_point_sampling.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/farthest_point_sampling.R 3 | \name{farthest_point_sampling} 4 | \alias{farthest_point_sampling} 5 | \title{Farthest point sampling} 6 | \usage{ 7 | farthest_point_sampling( 8 | mat, 9 | metric = "precomputed", 10 | k = nrow(mat), 11 | initial_point_index = 1L, 12 | return_clusters = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{mat}{Original distance matrix} 17 | 18 | \item{metric}{Distance metric to use (either "precomputed" or a metric from \code{\link{rdist}})} 19 | 20 | \item{k}{Number of points to sample} 21 | 22 | \item{initial_point_index}{Index of p_1} 23 | 24 | \item{return_clusters}{Should the indices of the closest farthest points be returned?} 25 | } 26 | \description{ 27 | Farthest point sampling returns a reordering of the metric 28 | space P = {p_1, ..., p_k}, such that each p_i is the farthest 29 | point from the first i-1 points. 30 | } 31 | \examples{ 32 | 33 | # generate data 34 | df <- matrix(runif(200), ncol = 2) 35 | dist_mat <- pdist(df) 36 | # farthest point sampling 37 | fps <- farthest_point_sampling(dist_mat) 38 | fps2 <- farthest_point_sampling(df, metric = "euclidean") 39 | all.equal(fps, fps2) 40 | # have a look at the fps distance matrix 41 | rdist(df[fps[1:5], ]) 42 | dist_mat[fps, fps][1:5, 1:5] 43 | } 44 | -------------------------------------------------------------------------------- /man/is_metric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/is_metric.R 3 | \name{is_metric} 4 | \alias{is_metric} 5 | \alias{is_distance_matrix} 6 | \alias{triangle_inequality} 7 | \title{Metric and triangle inequality} 8 | \usage{ 9 | is_distance_matrix(mat, tolerance = .Machine$double.eps^0.5) 10 | 11 | triangle_inequality(mat, tolerance = .Machine$double.eps^0.5) 12 | } 13 | \arguments{ 14 | \item{mat}{The matrix to evaluate} 15 | 16 | \item{tolerance}{Differences smaller than tolerance are not reported.} 17 | } 18 | \description{ 19 | Does the distance matric come from a metric 20 | } 21 | \examples{ 22 | data <- matrix(rnorm(20), ncol = 2) 23 | dm <- pdist(data) 24 | is_distance_matrix(dm) 25 | triangle_inequality(dm) 26 | 27 | dm[1, 2] <- 1.1 * dm[1, 2] 28 | is_distance_matrix(dm) 29 | } 30 | -------------------------------------------------------------------------------- /man/product_metric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/product_metric.R 3 | \name{product_metric} 4 | \alias{product_metric} 5 | \title{Product metric} 6 | \usage{ 7 | product_metric(..., p = 2) 8 | } 9 | \arguments{ 10 | \item{...}{Distance matrices or dist objects} 11 | 12 | \item{p}{The power of the Minkowski distance} 13 | } 14 | \description{ 15 | Returns the p-product metric of two metric spaces. 16 | Works for output of `rdist`, `pdist` or `cdist`. 17 | } 18 | \examples{ 19 | # generate data 20 | df <- matrix(runif(200), ncol = 2) 21 | # distance matrices 22 | dist_mat <- pdist(df) 23 | dist_1 <- pdist(df[, 1]) 24 | dist_2 <- pdist(df[, 2]) 25 | # product distance matrix 26 | dist_prod <- product_metric(dist_1, dist_2) 27 | # check equality 28 | all.equal(dist_mat, dist_prod) 29 | } 30 | -------------------------------------------------------------------------------- /man/rdist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distance_functions.r, R/rdist-package.r 3 | \docType{package} 4 | \name{rdist} 5 | \alias{rdist} 6 | \alias{pdist} 7 | \alias{cdist} 8 | \alias{rdist-package} 9 | \title{rdist: an R package for distances} 10 | \usage{ 11 | rdist(X, metric = "euclidean", p = 2L) 12 | 13 | pdist(X, metric = "euclidean", p = 2) 14 | 15 | cdist(X, Y, metric = "euclidean", p = 2) 16 | } 17 | \arguments{ 18 | \item{X, Y}{A matrix} 19 | 20 | \item{metric}{The distance metric to use} 21 | 22 | \item{p}{The power of the Minkowski distance} 23 | } 24 | \description{ 25 | \code{rdist} provide a common framework to calculate distances. There are three main functions: 26 | \itemize{ 27 | \item \code{rdist} computes the pairwise distances between observations in one matrix and returns a \code{dist} object, 28 | \item \code{pdist} computes the pairwise distances between observations in one matrix and returns a \code{matrix}, and 29 | \item \code{cdist} computes the distances between observations in two matrices and returns a \code{matrix}. 30 | } 31 | In particular the \code{cdist} function is often missing in other distance functions. All 32 | calculations involving \code{NA} values will consistently return \code{NA}. 33 | } 34 | \details{ 35 | Available distance measures are (written for two vectors v and w): 36 | \itemize{ 37 | \item \code{"euclidean"}: \eqn{\sqrt{\sum_i(v_i - w_i)^2}}{sqrt(sum_i((v_i - w_i)^2))} 38 | \item \code{"minkowski"}: \eqn{(\sum_i|v_i - w_i|^p)^{1/p}}{(sum_i(|v_i - w_i|^p))^{1/p}} 39 | \item \code{"manhattan"}: \eqn{\sum_i(|v_i-w_i|)}{sum_i(|v_i-w_i|)} 40 | \item \code{"maximum"} or \code{"chebyshev"}: \eqn{\max_i(|v_i-w_i|)}{max_i(|v_i-w_i|)} 41 | \item \code{"canberra"}: \eqn{\sum_i(\frac{|v_i-w_i|}{|v_i|+|w_i|})}{sum_i(|v_i-w_i|/(|v_i|+|w_i|))} 42 | \item \code{"angular"}: \eqn{\cos^{-1}(cor(v, w))}{arccos(cor(v, w))} 43 | \item \code{"correlation"}: \eqn{\sqrt{\frac{1-cor(v, w)}{2}}}{sqrt((1-cor(v, w))/2)} 44 | \item \code{"absolute_correlation"}: \eqn{\sqrt{1-|cor(v, w)|^2}}{sqrt((1-|cor(v, w)|^2))} 45 | \item \code{"hamming"}: \eqn{(\sum_i v_i \neq w_i) / \sum_i 1}{sum_i(v_i != w_i)/sum_i(1)} 46 | \item \code{"jaccard"}: \eqn{(\sum_i v_i \neq w_i) / \sum_i 1_{v_i \neq 0 \cup w_i \neq 0}}{sum_i(v_i != w_i)/sum_i(v_i != 0 or w_i != 0)} 47 | \item Any function that defines a distance between two vectors. 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 10 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 11 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 12 | #endif 13 | 14 | // canberra_rdist 15 | NumericVector canberra_rdist(NumericMatrix A); 16 | RcppExport SEXP _rdist_canberra_rdist(SEXP ASEXP) { 17 | BEGIN_RCPP 18 | Rcpp::RObject rcpp_result_gen; 19 | Rcpp::RNGScope rcpp_rngScope_gen; 20 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 21 | rcpp_result_gen = Rcpp::wrap(canberra_rdist(A)); 22 | return rcpp_result_gen; 23 | END_RCPP 24 | } 25 | // canberra_pdist 26 | NumericMatrix canberra_pdist(NumericMatrix A); 27 | RcppExport SEXP _rdist_canberra_pdist(SEXP ASEXP) { 28 | BEGIN_RCPP 29 | Rcpp::RObject rcpp_result_gen; 30 | Rcpp::RNGScope rcpp_rngScope_gen; 31 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 32 | rcpp_result_gen = Rcpp::wrap(canberra_pdist(A)); 33 | return rcpp_result_gen; 34 | END_RCPP 35 | } 36 | // canberra_cdist 37 | NumericMatrix canberra_cdist(NumericMatrix A, NumericMatrix B); 38 | RcppExport SEXP _rdist_canberra_cdist(SEXP ASEXP, SEXP BSEXP) { 39 | BEGIN_RCPP 40 | Rcpp::RObject rcpp_result_gen; 41 | Rcpp::RNGScope rcpp_rngScope_gen; 42 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 43 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP); 44 | rcpp_result_gen = Rcpp::wrap(canberra_cdist(A, B)); 45 | return rcpp_result_gen; 46 | END_RCPP 47 | } 48 | // rdist_cpp 49 | NumericVector rdist_cpp(NumericMatrix A, String metric, double p); 50 | RcppExport SEXP _rdist_rdist_cpp(SEXP ASEXP, SEXP metricSEXP, SEXP pSEXP) { 51 | BEGIN_RCPP 52 | Rcpp::RObject rcpp_result_gen; 53 | Rcpp::RNGScope rcpp_rngScope_gen; 54 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 55 | Rcpp::traits::input_parameter< String >::type metric(metricSEXP); 56 | Rcpp::traits::input_parameter< double >::type p(pSEXP); 57 | rcpp_result_gen = Rcpp::wrap(rdist_cpp(A, metric, p)); 58 | return rcpp_result_gen; 59 | END_RCPP 60 | } 61 | // pdist_cpp 62 | NumericMatrix pdist_cpp(NumericMatrix A, String metric, double p); 63 | RcppExport SEXP _rdist_pdist_cpp(SEXP ASEXP, SEXP metricSEXP, SEXP pSEXP) { 64 | BEGIN_RCPP 65 | Rcpp::RObject rcpp_result_gen; 66 | Rcpp::RNGScope rcpp_rngScope_gen; 67 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 68 | Rcpp::traits::input_parameter< String >::type metric(metricSEXP); 69 | Rcpp::traits::input_parameter< double >::type p(pSEXP); 70 | rcpp_result_gen = Rcpp::wrap(pdist_cpp(A, metric, p)); 71 | return rcpp_result_gen; 72 | END_RCPP 73 | } 74 | // cdist_cpp 75 | NumericMatrix cdist_cpp(NumericMatrix A, NumericMatrix B, String metric, double p); 76 | RcppExport SEXP _rdist_cdist_cpp(SEXP ASEXP, SEXP BSEXP, SEXP metricSEXP, SEXP pSEXP) { 77 | BEGIN_RCPP 78 | Rcpp::RObject rcpp_result_gen; 79 | Rcpp::RNGScope rcpp_rngScope_gen; 80 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 81 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP); 82 | Rcpp::traits::input_parameter< String >::type metric(metricSEXP); 83 | Rcpp::traits::input_parameter< double >::type p(pSEXP); 84 | rcpp_result_gen = Rcpp::wrap(cdist_cpp(A, B, metric, p)); 85 | return rcpp_result_gen; 86 | END_RCPP 87 | } 88 | // euclidean_rdist 89 | NumericVector euclidean_rdist(NumericMatrix A); 90 | RcppExport SEXP _rdist_euclidean_rdist(SEXP ASEXP) { 91 | BEGIN_RCPP 92 | Rcpp::RObject rcpp_result_gen; 93 | Rcpp::RNGScope rcpp_rngScope_gen; 94 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 95 | rcpp_result_gen = Rcpp::wrap(euclidean_rdist(A)); 96 | return rcpp_result_gen; 97 | END_RCPP 98 | } 99 | // euclidean_pdist 100 | NumericMatrix euclidean_pdist(NumericMatrix A); 101 | RcppExport SEXP _rdist_euclidean_pdist(SEXP ASEXP) { 102 | BEGIN_RCPP 103 | Rcpp::RObject rcpp_result_gen; 104 | Rcpp::RNGScope rcpp_rngScope_gen; 105 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 106 | rcpp_result_gen = Rcpp::wrap(euclidean_pdist(A)); 107 | return rcpp_result_gen; 108 | END_RCPP 109 | } 110 | // euclidean_cdist 111 | NumericMatrix euclidean_cdist(NumericMatrix A, NumericMatrix B); 112 | RcppExport SEXP _rdist_euclidean_cdist(SEXP ASEXP, SEXP BSEXP) { 113 | BEGIN_RCPP 114 | Rcpp::RObject rcpp_result_gen; 115 | Rcpp::RNGScope rcpp_rngScope_gen; 116 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 117 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP); 118 | rcpp_result_gen = Rcpp::wrap(euclidean_cdist(A, B)); 119 | return rcpp_result_gen; 120 | END_RCPP 121 | } 122 | // farthest_point_sampling_cpp 123 | NumericMatrix farthest_point_sampling_cpp(NumericMatrix mat, String metric, int k, int initial_point_index, bool return_clusters); 124 | RcppExport SEXP _rdist_farthest_point_sampling_cpp(SEXP matSEXP, SEXP metricSEXP, SEXP kSEXP, SEXP initial_point_indexSEXP, SEXP return_clustersSEXP) { 125 | BEGIN_RCPP 126 | Rcpp::RObject rcpp_result_gen; 127 | Rcpp::RNGScope rcpp_rngScope_gen; 128 | Rcpp::traits::input_parameter< NumericMatrix >::type mat(matSEXP); 129 | Rcpp::traits::input_parameter< String >::type metric(metricSEXP); 130 | Rcpp::traits::input_parameter< int >::type k(kSEXP); 131 | Rcpp::traits::input_parameter< int >::type initial_point_index(initial_point_indexSEXP); 132 | Rcpp::traits::input_parameter< bool >::type return_clusters(return_clustersSEXP); 133 | rcpp_result_gen = Rcpp::wrap(farthest_point_sampling_cpp(mat, metric, k, initial_point_index, return_clusters)); 134 | return rcpp_result_gen; 135 | END_RCPP 136 | } 137 | // hamming_rdist 138 | NumericVector hamming_rdist(NumericMatrix A); 139 | RcppExport SEXP _rdist_hamming_rdist(SEXP ASEXP) { 140 | BEGIN_RCPP 141 | Rcpp::RObject rcpp_result_gen; 142 | Rcpp::RNGScope rcpp_rngScope_gen; 143 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 144 | rcpp_result_gen = Rcpp::wrap(hamming_rdist(A)); 145 | return rcpp_result_gen; 146 | END_RCPP 147 | } 148 | // hamming_pdist 149 | NumericMatrix hamming_pdist(NumericMatrix A); 150 | RcppExport SEXP _rdist_hamming_pdist(SEXP ASEXP) { 151 | BEGIN_RCPP 152 | Rcpp::RObject rcpp_result_gen; 153 | Rcpp::RNGScope rcpp_rngScope_gen; 154 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 155 | rcpp_result_gen = Rcpp::wrap(hamming_pdist(A)); 156 | return rcpp_result_gen; 157 | END_RCPP 158 | } 159 | // hamming_cdist 160 | NumericMatrix hamming_cdist(NumericMatrix A, NumericMatrix B); 161 | RcppExport SEXP _rdist_hamming_cdist(SEXP ASEXP, SEXP BSEXP) { 162 | BEGIN_RCPP 163 | Rcpp::RObject rcpp_result_gen; 164 | Rcpp::RNGScope rcpp_rngScope_gen; 165 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 166 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP); 167 | rcpp_result_gen = Rcpp::wrap(hamming_cdist(A, B)); 168 | return rcpp_result_gen; 169 | END_RCPP 170 | } 171 | // jaccard_rdist 172 | NumericVector jaccard_rdist(NumericMatrix A); 173 | RcppExport SEXP _rdist_jaccard_rdist(SEXP ASEXP) { 174 | BEGIN_RCPP 175 | Rcpp::RObject rcpp_result_gen; 176 | Rcpp::RNGScope rcpp_rngScope_gen; 177 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 178 | rcpp_result_gen = Rcpp::wrap(jaccard_rdist(A)); 179 | return rcpp_result_gen; 180 | END_RCPP 181 | } 182 | // jaccard_pdist 183 | NumericMatrix jaccard_pdist(NumericMatrix A); 184 | RcppExport SEXP _rdist_jaccard_pdist(SEXP ASEXP) { 185 | BEGIN_RCPP 186 | Rcpp::RObject rcpp_result_gen; 187 | Rcpp::RNGScope rcpp_rngScope_gen; 188 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 189 | rcpp_result_gen = Rcpp::wrap(jaccard_pdist(A)); 190 | return rcpp_result_gen; 191 | END_RCPP 192 | } 193 | // jaccard_cdist 194 | NumericMatrix jaccard_cdist(NumericMatrix A, NumericMatrix B); 195 | RcppExport SEXP _rdist_jaccard_cdist(SEXP ASEXP, SEXP BSEXP) { 196 | BEGIN_RCPP 197 | Rcpp::RObject rcpp_result_gen; 198 | Rcpp::RNGScope rcpp_rngScope_gen; 199 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 200 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP); 201 | rcpp_result_gen = Rcpp::wrap(jaccard_cdist(A, B)); 202 | return rcpp_result_gen; 203 | END_RCPP 204 | } 205 | // manhattan_rdist 206 | NumericVector manhattan_rdist(NumericMatrix A); 207 | RcppExport SEXP _rdist_manhattan_rdist(SEXP ASEXP) { 208 | BEGIN_RCPP 209 | Rcpp::RObject rcpp_result_gen; 210 | Rcpp::RNGScope rcpp_rngScope_gen; 211 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 212 | rcpp_result_gen = Rcpp::wrap(manhattan_rdist(A)); 213 | return rcpp_result_gen; 214 | END_RCPP 215 | } 216 | // manhattan_pdist 217 | NumericMatrix manhattan_pdist(NumericMatrix A); 218 | RcppExport SEXP _rdist_manhattan_pdist(SEXP ASEXP) { 219 | BEGIN_RCPP 220 | Rcpp::RObject rcpp_result_gen; 221 | Rcpp::RNGScope rcpp_rngScope_gen; 222 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 223 | rcpp_result_gen = Rcpp::wrap(manhattan_pdist(A)); 224 | return rcpp_result_gen; 225 | END_RCPP 226 | } 227 | // manhattan_cdist 228 | NumericMatrix manhattan_cdist(NumericMatrix A, NumericMatrix B); 229 | RcppExport SEXP _rdist_manhattan_cdist(SEXP ASEXP, SEXP BSEXP) { 230 | BEGIN_RCPP 231 | Rcpp::RObject rcpp_result_gen; 232 | Rcpp::RNGScope rcpp_rngScope_gen; 233 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 234 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP); 235 | rcpp_result_gen = Rcpp::wrap(manhattan_cdist(A, B)); 236 | return rcpp_result_gen; 237 | END_RCPP 238 | } 239 | // maximum_rdist 240 | NumericVector maximum_rdist(NumericMatrix A); 241 | RcppExport SEXP _rdist_maximum_rdist(SEXP ASEXP) { 242 | BEGIN_RCPP 243 | Rcpp::RObject rcpp_result_gen; 244 | Rcpp::RNGScope rcpp_rngScope_gen; 245 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 246 | rcpp_result_gen = Rcpp::wrap(maximum_rdist(A)); 247 | return rcpp_result_gen; 248 | END_RCPP 249 | } 250 | // maximum_pdist 251 | NumericMatrix maximum_pdist(NumericMatrix A); 252 | RcppExport SEXP _rdist_maximum_pdist(SEXP ASEXP) { 253 | BEGIN_RCPP 254 | Rcpp::RObject rcpp_result_gen; 255 | Rcpp::RNGScope rcpp_rngScope_gen; 256 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 257 | rcpp_result_gen = Rcpp::wrap(maximum_pdist(A)); 258 | return rcpp_result_gen; 259 | END_RCPP 260 | } 261 | // maximum_cdist 262 | NumericMatrix maximum_cdist(NumericMatrix A, NumericMatrix B); 263 | RcppExport SEXP _rdist_maximum_cdist(SEXP ASEXP, SEXP BSEXP) { 264 | BEGIN_RCPP 265 | Rcpp::RObject rcpp_result_gen; 266 | Rcpp::RNGScope rcpp_rngScope_gen; 267 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 268 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP); 269 | rcpp_result_gen = Rcpp::wrap(maximum_cdist(A, B)); 270 | return rcpp_result_gen; 271 | END_RCPP 272 | } 273 | // minkowski_rdist 274 | NumericVector minkowski_rdist(NumericMatrix A, double p); 275 | RcppExport SEXP _rdist_minkowski_rdist(SEXP ASEXP, SEXP pSEXP) { 276 | BEGIN_RCPP 277 | Rcpp::RObject rcpp_result_gen; 278 | Rcpp::RNGScope rcpp_rngScope_gen; 279 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 280 | Rcpp::traits::input_parameter< double >::type p(pSEXP); 281 | rcpp_result_gen = Rcpp::wrap(minkowski_rdist(A, p)); 282 | return rcpp_result_gen; 283 | END_RCPP 284 | } 285 | // minkowski_pdist 286 | NumericMatrix minkowski_pdist(NumericMatrix A, double p); 287 | RcppExport SEXP _rdist_minkowski_pdist(SEXP ASEXP, SEXP pSEXP) { 288 | BEGIN_RCPP 289 | Rcpp::RObject rcpp_result_gen; 290 | Rcpp::RNGScope rcpp_rngScope_gen; 291 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 292 | Rcpp::traits::input_parameter< double >::type p(pSEXP); 293 | rcpp_result_gen = Rcpp::wrap(minkowski_pdist(A, p)); 294 | return rcpp_result_gen; 295 | END_RCPP 296 | } 297 | // minkowski_cdist 298 | NumericMatrix minkowski_cdist(NumericMatrix A, NumericMatrix B, double p); 299 | RcppExport SEXP _rdist_minkowski_cdist(SEXP ASEXP, SEXP BSEXP, SEXP pSEXP) { 300 | BEGIN_RCPP 301 | Rcpp::RObject rcpp_result_gen; 302 | Rcpp::RNGScope rcpp_rngScope_gen; 303 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP); 304 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP); 305 | Rcpp::traits::input_parameter< double >::type p(pSEXP); 306 | rcpp_result_gen = Rcpp::wrap(minkowski_cdist(A, B, p)); 307 | return rcpp_result_gen; 308 | END_RCPP 309 | } 310 | // cpp_triangle_inequality 311 | bool cpp_triangle_inequality(Rcpp::NumericMatrix mat, double tolerance); 312 | RcppExport SEXP _rdist_cpp_triangle_inequality(SEXP matSEXP, SEXP toleranceSEXP) { 313 | BEGIN_RCPP 314 | Rcpp::RObject rcpp_result_gen; 315 | Rcpp::RNGScope rcpp_rngScope_gen; 316 | Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type mat(matSEXP); 317 | Rcpp::traits::input_parameter< double >::type tolerance(toleranceSEXP); 318 | rcpp_result_gen = Rcpp::wrap(cpp_triangle_inequality(mat, tolerance)); 319 | return rcpp_result_gen; 320 | END_RCPP 321 | } 322 | -------------------------------------------------------------------------------- /src/canberra.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | NumericVector canberra_rdist(NumericMatrix A) { 8 | int n = A.nrow(), k = A.ncol(); 9 | 10 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 11 | 12 | NumericVector C(n * (n-1) / 2); 13 | 14 | int l = 0; 15 | for (int i = 0; i < n; ++i){ 16 | arma::mat Arow = Ar.row(i); 17 | for (int j = i + 1; j < n; ++j){ 18 | C(l) = sum(abs(Arow - Ar.row(j))/(abs(Arow) + abs(Ar.row(j)))); 19 | l++; 20 | } 21 | } 22 | 23 | return wrap(C); 24 | } 25 | 26 | // [[Rcpp::export]] 27 | NumericMatrix canberra_pdist(NumericMatrix A) { 28 | int n = A.nrow(), k = A.ncol(); 29 | 30 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 31 | 32 | arma::mat C(n, n); 33 | 34 | for (int i = 0; i < n; ++i){ 35 | arma::mat Arow = Ar.row(i); 36 | for (int j = 0; j < n; ++j){ 37 | C(i, j) = sum(abs(Arow - Ar.row(j))/(abs(Arow) + abs(Ar.row(j)))); 38 | } 39 | } 40 | 41 | return wrap(C); 42 | } 43 | 44 | // [[Rcpp::export]] 45 | NumericMatrix canberra_cdist(NumericMatrix A, NumericMatrix B) { 46 | int m = A.nrow(), n = B.nrow(), k = A.ncol(); 47 | 48 | arma::mat Ar = arma::mat(A.begin(), m, k, false); 49 | arma::mat Br = arma::mat(B.begin(), n, k, false); 50 | 51 | arma::mat C(m, n); 52 | 53 | for (int i = 0; i < m; ++i){ 54 | arma::mat Arow = Ar.row(i); 55 | for (int j = 0; j < n; ++j){ 56 | C(i, j) = sum(abs(Arow - Br.row(j))/(abs(Arow) + abs(Br.row(j)))); 57 | } 58 | } 59 | 60 | return wrap(C); 61 | } -------------------------------------------------------------------------------- /src/canberra.h: -------------------------------------------------------------------------------- 1 | #ifndef CANBERRA_ 2 | #define CANBERRA_ 3 | 4 | #include 5 | 6 | Rcpp::NumericVector canberra_rdist(Rcpp::NumericMatrix A); 7 | Rcpp::NumericMatrix canberra_pdist(Rcpp::NumericMatrix A); 8 | Rcpp::NumericMatrix canberra_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B); 9 | 10 | #endif // CANBERRA_ -------------------------------------------------------------------------------- /src/dist.cpp: -------------------------------------------------------------------------------- 1 | #include "canberra.h" 2 | #include "minkowski.h" 3 | #include "manhattan.h" 4 | #include "maximum.h" 5 | #include "hamming.h" 6 | #include "jaccard.h" 7 | #include 8 | // [[Rcpp::depends(RcppArmadillo)]] 9 | 10 | using namespace Rcpp; 11 | 12 | // [[Rcpp::export]] 13 | NumericVector rdist_cpp(NumericMatrix A, String metric, double p=2.0) { 14 | NumericMatrix res; 15 | if (metric == "euclidean"){ 16 | return minkowski_rdist(A, 2.0); 17 | } 18 | if (metric == "minkowski"){ 19 | return minkowski_rdist(A, p); 20 | } 21 | if (metric == "manhattan"){ 22 | return manhattan_rdist(A); 23 | } 24 | if (metric == "chebyshev"){ 25 | return maximum_rdist(A); 26 | } 27 | if (metric == "maximum"){ 28 | return maximum_rdist(A); 29 | } 30 | if (metric == "canberra"){ 31 | return canberra_rdist(A); 32 | } 33 | if (metric == "angular"){ 34 | Rcpp::Environment package_env("package:rdist"); 35 | Rcpp::Function angular_rdist = package_env["angular_rdist"]; 36 | return angular_rdist(A); 37 | } 38 | if (metric == "correlation"){ 39 | Rcpp::Environment package_env("package:rdist"); 40 | Rcpp::Function correlation_rdist = package_env["correlation_rdist"]; 41 | return correlation_rdist(A); 42 | } 43 | if (metric == "absolute_correlation"){ 44 | Rcpp::Environment package_env("package:rdist"); 45 | Rcpp::Function absolute_correlation_rdist = package_env["absolute_correlation_rdist"]; 46 | return absolute_correlation_rdist(A); 47 | } 48 | if (metric == "hamming"){ 49 | return hamming_rdist(A); 50 | } 51 | if (metric == "jaccard"){ 52 | return jaccard_rdist(A); 53 | } 54 | Rcpp::stop("metric not recognized."); 55 | return NumericMatrix(0); 56 | } 57 | 58 | // [[Rcpp::export]] 59 | NumericMatrix pdist_cpp(NumericMatrix A, String metric, double p=2.0) { 60 | NumericMatrix res; 61 | if (metric == "euclidean"){ 62 | return minkowski_pdist(A, 2.0); 63 | } 64 | if (metric == "minkowski"){ 65 | return minkowski_pdist(A, p); 66 | } 67 | if (metric == "manhattan"){ 68 | return manhattan_pdist(A); 69 | } 70 | if (metric == "chebyshev"){ 71 | return maximum_pdist(A); 72 | } 73 | if (metric == "maximum"){ 74 | return maximum_pdist(A); 75 | } 76 | if (metric == "canberra"){ 77 | return canberra_pdist(A); 78 | } 79 | if (metric == "angular"){ 80 | Rcpp::Environment package_env("package:rdist"); 81 | Rcpp::Function angular_pdist = package_env["angular_pdist"]; 82 | return angular_pdist(A); 83 | } 84 | if (metric == "correlation"){ 85 | Rcpp::Environment package_env("package:rdist"); 86 | Rcpp::Function correlation_pdist = package_env["correlation_pdist"]; 87 | return correlation_pdist(A); 88 | } 89 | if (metric == "absolute_correlation"){ 90 | Rcpp::Environment package_env("package:rdist"); 91 | Rcpp::Function absolute_correlation_pdist = package_env["absolute_correlation_pdist"]; 92 | return absolute_correlation_pdist(A); 93 | } 94 | if (metric == "hamming"){ 95 | return hamming_pdist(A); 96 | } 97 | if (metric == "jaccard"){ 98 | return jaccard_pdist(A); 99 | } 100 | Rcpp::stop("metric not recognized."); 101 | return NumericMatrix(0); 102 | } 103 | 104 | // [[Rcpp::export]] 105 | NumericMatrix cdist_cpp(NumericMatrix A, NumericMatrix B, String metric, double p=2.0) { 106 | NumericMatrix res; 107 | if (metric == "euclidean"){ 108 | return minkowski_cdist(A, B, 2.0); 109 | } 110 | if (metric == "minkowski"){ 111 | return minkowski_cdist(A, B, p); 112 | } 113 | if (metric == "manhattan"){ 114 | return manhattan_cdist(A, B); 115 | } 116 | if (metric == "chebyshev"){ 117 | return maximum_cdist(A, B); 118 | } 119 | if (metric == "maximum"){ 120 | return maximum_cdist(A, B); 121 | } 122 | if (metric == "canberra"){ 123 | return canberra_cdist(A, B); 124 | } 125 | if (metric == "angular"){ 126 | Rcpp::Environment package_env("package:rdist"); 127 | Rcpp::Function angular_cdist = package_env["angular_cdist"]; 128 | return angular_cdist(A, B); 129 | } 130 | if (metric == "correlation"){ 131 | Rcpp::Environment package_env("package:rdist"); 132 | Rcpp::Function correlation_cdist = package_env["correlation_cdist"]; 133 | return correlation_cdist(A, B); 134 | } 135 | if (metric == "absolute_correlation"){ 136 | Rcpp::Environment package_env("package:rdist"); 137 | Rcpp::Function absolute_correlation_cdist = package_env["absolute_correlation_cdist"]; 138 | return absolute_correlation_cdist(A, B); 139 | } 140 | if (metric == "hamming"){ 141 | return hamming_cdist(A, B); 142 | } 143 | if (metric == "jaccard"){ 144 | return jaccard_cdist(A, B); 145 | } 146 | Rcpp::stop("metric not recognized."); 147 | return NumericMatrix(0); 148 | } 149 | 150 | -------------------------------------------------------------------------------- /src/dist.h: -------------------------------------------------------------------------------- 1 | #ifndef DIST_ 2 | #define DIST_ 3 | 4 | #include 5 | 6 | Rcpp::NumericVector rdist_cpp(Rcpp::NumericMatrix A, Rcpp::String metric, double p); 7 | Rcpp::NumericMatrix pdist_cpp(Rcpp::NumericMatrix A, Rcpp::String metric, double p); 8 | Rcpp::NumericMatrix cdist_cpp(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B, Rcpp::String metric, double p); 9 | 10 | #endif // DIST_ 11 | -------------------------------------------------------------------------------- /src/euclidean.cpp: -------------------------------------------------------------------------------- 1 | // adjusted from https://www.r-bloggers.com/pairwise-distances-in-r/ 2 | #include 3 | // [[Rcpp::depends(RcppArmadillo)]] 4 | 5 | using namespace Rcpp; 6 | 7 | // [[Rcpp::export]] 8 | NumericVector euclidean_rdist(NumericMatrix A) { 9 | int n = A.nrow(), 10 | k = A.ncol(); 11 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 12 | 13 | arma::colvec An = sum(square(Ar),1); 14 | 15 | arma::mat C = -2 * (Ar * Ar.t()); 16 | C.each_col() += An; 17 | C.each_row() += An.t(); 18 | 19 | arma::mat D(1, n * (n-1) / 2); 20 | int l = 0; 21 | for (int i = 0; i < n; ++i){ 22 | for (int j = i + 1; j < n; ++j){ 23 | D(l) = C(i, j); 24 | l++; 25 | } 26 | } 27 | 28 | return wrap(sqrt(D)); 29 | } 30 | 31 | // [[Rcpp::export]] 32 | NumericMatrix euclidean_pdist(NumericMatrix A) { 33 | int n = A.nrow(), 34 | k = A.ncol(); 35 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 36 | 37 | arma::colvec An = sum(square(Ar), 1); 38 | 39 | arma::mat C = -2 * (Ar * Ar.t()); 40 | C.each_col() += An; 41 | C.each_row() += An.t(); 42 | 43 | return wrap(sqrt(C)); 44 | } 45 | 46 | // [[Rcpp::export]] 47 | NumericMatrix euclidean_cdist(NumericMatrix A, NumericMatrix B) { 48 | int m = A.nrow(), 49 | n = B.nrow(), 50 | k = A.ncol(); 51 | arma::mat Ar = arma::mat(A.begin(), m, k, false); 52 | arma::mat Br = arma::mat(B.begin(), n, k, false); 53 | 54 | arma::colvec An = sum(square(Ar), 1); 55 | arma::colvec Bn = sum(square(Br), 1); 56 | 57 | arma::mat C = -2 * (Ar * Br.t()); 58 | C.each_col() += An; 59 | C.each_row() += Bn.t(); 60 | 61 | return wrap(sqrt(C)); 62 | } -------------------------------------------------------------------------------- /src/farthest_point_sampling.cpp: -------------------------------------------------------------------------------- 1 | #include "dist.h" 2 | #include 3 | // [[Rcpp::depends(RcppArmadillo)]] 4 | 5 | using namespace Rcpp; 6 | 7 | 8 | arma::uvec remove_element(arma::uvec& x, int y){ 9 | arma::uvec q1 = arma::find(x == y); 10 | if (!q1.empty()){ 11 | x.shed_row(q1(0)); 12 | } 13 | return x; 14 | } 15 | 16 | // [[Rcpp::export]] 17 | NumericMatrix farthest_point_sampling_cpp(NumericMatrix mat, 18 | String metric, 19 | int k, 20 | int initial_point_index = 0, 21 | bool return_clusters = false) { 22 | /* 23 | Farthest Point Sampling 24 | 25 | Parameters 26 | ========== 27 | NumericMatrix mat 28 | Original data or distance matrix 29 | String metric 30 | Metric used 31 | int k 32 | Number of points to sample 33 | int initial_point_index (default: 0) 34 | Index of point to start farthest point sampling with 35 | 36 | Return value 37 | ============ 38 | The indices of the first k furthest points. 39 | */ 40 | // initialize 41 | int n = mat.nrow(); 42 | int m = mat.ncol(); 43 | arma::mat amat = arma::mat(mat.begin(), n, m, false); 44 | arma::uvec reordering(k, arma::fill::ones); 45 | reordering *= initial_point_index - 1; 46 | arma::uvec preordering = arma::linspace(0, n-1, n); 47 | arma::urowvec cluster_mat(n); 48 | cluster_mat.fill(0); 49 | 50 | // find ordered indices 51 | if (metric == "precomputed"){ 52 | arma::mat local_dist; 53 | for (int i = 1; i < k; ++i){ 54 | preordering = remove_element(preordering, reordering(i-1)); 55 | arma::uvec indices = reordering.rows(0, i-1); 56 | local_dist = amat.submat(indices, preordering); 57 | arma::mat local_min_dist = arma::min(local_dist, 0); 58 | reordering(i) = preordering(local_min_dist.index_max()); 59 | } 60 | if (return_clusters){ 61 | arma::mat complete_local_dist = amat.rows(reordering); 62 | cluster_mat = arma::index_min(complete_local_dist, 0); 63 | } 64 | } 65 | else { 66 | arma::mat local_min_dist; 67 | 68 | for (int i = 1; i <= k; ++i){ 69 | preordering = remove_element(preordering, reordering(i-1)); 70 | arma::uvec indices = reordering.rows(0, i-1); 71 | 72 | arma::mat m_indices = amat.row(reordering(i-1)); 73 | NumericMatrix mat_indices = wrap(m_indices); 74 | 75 | NumericMatrix n_dist = cdist_cpp(mat_indices, mat, metric, 2); 76 | 77 | arma::mat new_dist = arma::mat(n_dist.begin(), n_dist.ncol(), 1, false); 78 | if (i == 1){ 79 | local_min_dist = new_dist; 80 | } 81 | arma::mat local_dist = arma::join_rows(local_min_dist, new_dist); 82 | local_min_dist = arma::min(local_dist, 1); 83 | if (return_clusters){ 84 | arma::urowvec pre_cluster_mat = arma::find(arma::index_min(local_dist, 1)).t(); 85 | arma::urowvec new_clusters(pre_cluster_mat.n_elem); 86 | new_clusters.fill(i-1); 87 | cluster_mat.elem(pre_cluster_mat) = new_clusters; 88 | } 89 | arma::mat local_local_min_dist = local_min_dist.elem(preordering); 90 | if (i < k){ 91 | reordering(i) = preordering(local_local_min_dist.index_max()); 92 | } 93 | } 94 | } 95 | 96 | // return reordering 97 | NumericMatrix output = wrap(reordering + 1); 98 | if (return_clusters){ 99 | NumericVector clusters = wrap(cluster_mat + 1); 100 | output.attr("clusters") = clusters; 101 | } 102 | return output; 103 | } 104 | -------------------------------------------------------------------------------- /src/hamming.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | NumericVector hamming_rdist(NumericMatrix A) { 8 | int n = A.nrow(), k = A.ncol(); 9 | 10 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 11 | 12 | NumericVector C(n * (n-1) / 2); 13 | 14 | int l = 0; 15 | for (int i = 0; i < n; ++i){ 16 | arma::mat Arow = Ar.row(i); 17 | for (int j = i + 1; j < n; ++j){ 18 | C(l) = sum(Arow != Ar.row(j)); 19 | l++; 20 | } 21 | } 22 | 23 | return C / k; 24 | } 25 | 26 | // [[Rcpp::export]] 27 | NumericMatrix hamming_pdist(NumericMatrix A) { 28 | int n = A.nrow(), k = A.ncol(); 29 | 30 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 31 | 32 | arma::mat C(n, n); 33 | 34 | for (int i = 0; i < n; ++i){ 35 | arma::mat Arow = Ar.row(i); 36 | for (int j = 0; j < n; ++j){ 37 | C(i, j) = sum(Arow != Ar.row(j)); 38 | } 39 | } 40 | 41 | return wrap(C / k); 42 | } 43 | 44 | // [[Rcpp::export]] 45 | NumericMatrix hamming_cdist(NumericMatrix A, NumericMatrix B) { 46 | int m = A.nrow(), n = B.nrow(), k = A.ncol(); 47 | 48 | arma::mat Ar = arma::mat(A.begin(), m, k, false); 49 | arma::mat Br = arma::mat(B.begin(), n, k, false); 50 | 51 | arma::mat C(m, n); 52 | 53 | for (int i = 0; i < m; ++i){ 54 | arma::mat Arow = Ar.row(i); 55 | for (int j = 0; j < n; ++j){ 56 | C(i, j) = sum(Arow != Br.row(j)); 57 | } 58 | } 59 | 60 | return wrap(C / k); 61 | } -------------------------------------------------------------------------------- /src/hamming.h: -------------------------------------------------------------------------------- 1 | #ifndef HAMMING_ 2 | #define HAMMING_ 3 | 4 | #include 5 | 6 | Rcpp::NumericVector hamming_rdist(Rcpp::NumericMatrix A); 7 | Rcpp::NumericMatrix hamming_pdist(Rcpp::NumericMatrix A); 8 | Rcpp::NumericMatrix hamming_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B); 9 | 10 | #endif // HAMMING_ -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // for NULL 4 | #include 5 | 6 | /* .Call calls */ 7 | extern SEXP _rdist_canberra_cdist(SEXP, SEXP); 8 | extern SEXP _rdist_canberra_pdist(SEXP); 9 | extern SEXP _rdist_canberra_rdist(SEXP); 10 | extern SEXP _rdist_euclidean_cdist(SEXP, SEXP); 11 | extern SEXP _rdist_euclidean_pdist(SEXP); 12 | extern SEXP _rdist_euclidean_rdist(SEXP); 13 | extern SEXP _rdist_farthest_point_sampling_cpp(SEXP, SEXP, SEXP, SEXP, SEXP); 14 | extern SEXP _rdist_hamming_cdist(SEXP, SEXP); 15 | extern SEXP _rdist_hamming_pdist(SEXP); 16 | extern SEXP _rdist_hamming_rdist(SEXP); 17 | extern SEXP _rdist_jaccard_cdist(SEXP, SEXP); 18 | extern SEXP _rdist_jaccard_pdist(SEXP); 19 | extern SEXP _rdist_jaccard_rdist(SEXP); 20 | extern SEXP _rdist_manhattan_cdist(SEXP, SEXP); 21 | extern SEXP _rdist_manhattan_pdist(SEXP); 22 | extern SEXP _rdist_manhattan_rdist(SEXP); 23 | extern SEXP _rdist_maximum_cdist(SEXP, SEXP); 24 | extern SEXP _rdist_maximum_pdist(SEXP); 25 | extern SEXP _rdist_maximum_rdist(SEXP); 26 | extern SEXP _rdist_minkowski_cdist(SEXP, SEXP, SEXP); 27 | extern SEXP _rdist_minkowski_pdist(SEXP, SEXP); 28 | extern SEXP _rdist_minkowski_rdist(SEXP, SEXP); 29 | extern SEXP _rdist_cpp_triangle_inequality(SEXP, SEXP); 30 | extern SEXP _rdist_rdist_cpp(SEXP, SEXP, SEXP); 31 | extern SEXP _rdist_pdist_cpp(SEXP, SEXP, SEXP); 32 | extern SEXP _rdist_cdist_cpp(SEXP, SEXP, SEXP, SEXP); 33 | 34 | static const R_CallMethodDef CallEntries[] = { 35 | {"_rdist_canberra_cdist", (DL_FUNC) &_rdist_canberra_cdist, 2}, 36 | {"_rdist_canberra_pdist", (DL_FUNC) &_rdist_canberra_pdist, 1}, 37 | {"_rdist_canberra_rdist", (DL_FUNC) &_rdist_canberra_rdist, 1}, 38 | {"_rdist_euclidean_cdist", (DL_FUNC) &_rdist_euclidean_cdist, 2}, 39 | {"_rdist_euclidean_pdist", (DL_FUNC) &_rdist_euclidean_pdist, 1}, 40 | {"_rdist_euclidean_rdist", (DL_FUNC) &_rdist_euclidean_rdist, 1}, 41 | {"_rdist_farthest_point_sampling_cpp", (DL_FUNC) &_rdist_farthest_point_sampling_cpp, 5}, 42 | {"_rdist_hamming_cdist", (DL_FUNC) &_rdist_hamming_cdist, 2}, 43 | {"_rdist_hamming_pdist", (DL_FUNC) &_rdist_hamming_pdist, 1}, 44 | {"_rdist_hamming_rdist", (DL_FUNC) &_rdist_hamming_rdist, 1}, 45 | {"_rdist_jaccard_cdist", (DL_FUNC) &_rdist_jaccard_cdist, 2}, 46 | {"_rdist_jaccard_pdist", (DL_FUNC) &_rdist_jaccard_pdist, 1}, 47 | {"_rdist_jaccard_rdist", (DL_FUNC) &_rdist_jaccard_rdist, 1}, 48 | {"_rdist_manhattan_cdist", (DL_FUNC) &_rdist_manhattan_cdist, 2}, 49 | {"_rdist_manhattan_pdist", (DL_FUNC) &_rdist_manhattan_pdist, 1}, 50 | {"_rdist_manhattan_rdist", (DL_FUNC) &_rdist_manhattan_rdist, 1}, 51 | {"_rdist_maximum_cdist", (DL_FUNC) &_rdist_maximum_cdist, 2}, 52 | {"_rdist_maximum_pdist", (DL_FUNC) &_rdist_maximum_pdist, 1}, 53 | {"_rdist_maximum_rdist", (DL_FUNC) &_rdist_maximum_rdist, 1}, 54 | {"_rdist_minkowski_cdist", (DL_FUNC) &_rdist_minkowski_cdist, 3}, 55 | {"_rdist_minkowski_pdist", (DL_FUNC) &_rdist_minkowski_pdist, 2}, 56 | {"_rdist_minkowski_rdist", (DL_FUNC) &_rdist_minkowski_rdist, 2}, 57 | {"_rdist_cpp_triangle_inequality", (DL_FUNC) &_rdist_cpp_triangle_inequality, 2}, 58 | {"_rdist_rdist_cpp", (DL_FUNC) &_rdist_rdist_cpp, 3}, 59 | {"_rdist_pdist_cpp", (DL_FUNC) &_rdist_pdist_cpp, 3}, 60 | {"_rdist_cdist_cpp", (DL_FUNC) &_rdist_cdist_cpp, 4}, 61 | {NULL, NULL, 0} 62 | }; 63 | 64 | void R_init_rdist(DllInfo *dll) 65 | { 66 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 67 | R_useDynamicSymbols(dll, FALSE); 68 | } 69 | -------------------------------------------------------------------------------- /src/jaccard.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | NumericVector jaccard_rdist(NumericMatrix A) { 8 | int n = A.nrow(), k = A.ncol(); 9 | 10 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 11 | 12 | NumericVector C(n * (n-1) / 2); 13 | 14 | int l = 0; 15 | for (int i = 0; i < n; ++i){ 16 | arma::rowvec Arow = Ar.row(i); 17 | for (int j = i + 1; j < n; ++j){ 18 | arma::urowvec zerovec = (Arow == 0); 19 | zerovec.elem( find(Ar.row(j) != 0) ).zeros(); 20 | if (all(Arow == Ar.row(j))) { 21 | C(l) = 0.0; 22 | } 23 | else{ 24 | C(l) = (double)sum(Arow != Ar.row(j)) / (double)sum(zerovec != 1); 25 | } 26 | 27 | l++; 28 | } 29 | } 30 | 31 | return wrap(C); 32 | } 33 | 34 | // [[Rcpp::export]] 35 | NumericMatrix jaccard_pdist(NumericMatrix A) { 36 | int n = A.nrow(), k = A.ncol(); 37 | 38 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 39 | 40 | arma::mat C(n, n); 41 | 42 | for (int i = 0; i < n; ++i){ 43 | arma::mat Arow = Ar.row(i); 44 | for (int j = 0; j < n; ++j){ 45 | arma::urowvec zerovec = (Arow == 0); 46 | zerovec.elem( find(Ar.row(j) != 0) ).zeros(); 47 | if (all(Arow == Ar.row(j))) { 48 | C(i, j) = 0.0; 49 | } 50 | else{ 51 | C(i, j) = (double)sum(Arow != Ar.row(j)) / (double)sum(zerovec != 1); 52 | } 53 | } 54 | } 55 | 56 | return wrap(C); 57 | } 58 | 59 | // [[Rcpp::export]] 60 | NumericMatrix jaccard_cdist(NumericMatrix A, NumericMatrix B) { 61 | int m = A.nrow(), n = B.nrow(), k = A.ncol(); 62 | 63 | arma::mat Ar = arma::mat(A.begin(), m, k, false); 64 | arma::mat Br = arma::mat(B.begin(), n, k, false); 65 | 66 | arma::mat C(m, n); 67 | 68 | for (int i = 0; i < m; ++i){ 69 | arma::mat Arow = Ar.row(i); 70 | for (int j = 0; j < n; ++j){ 71 | arma::urowvec zerovec = (Arow == 0); 72 | zerovec.elem( find(Br.row(j) != 0) ).zeros(); 73 | if (all(Arow == Br.row(j))) { 74 | C(i, j) = 0.0; 75 | } 76 | else{ 77 | C(i, j) = (double)sum(Arow != Br.row(j)) / (double)sum(zerovec != 1); 78 | } 79 | } 80 | } 81 | 82 | return wrap(C); 83 | } -------------------------------------------------------------------------------- /src/jaccard.h: -------------------------------------------------------------------------------- 1 | #ifndef JACCARD_ 2 | #define JACCARD_ 3 | 4 | #include 5 | 6 | Rcpp::NumericVector jaccard_rdist(Rcpp::NumericMatrix A); 7 | Rcpp::NumericMatrix jaccard_pdist(Rcpp::NumericMatrix A); 8 | Rcpp::NumericMatrix jaccard_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B); 9 | 10 | #endif // JACCARD_ -------------------------------------------------------------------------------- /src/manhattan.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | NumericVector manhattan_rdist(NumericMatrix A) { 8 | int n = A.nrow(), k = A.ncol(); 9 | 10 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 11 | 12 | NumericVector C(n * (n-1) / 2); 13 | 14 | int l = 0; 15 | for (int i = 0; i < n; ++i){ 16 | arma::mat Arow = Ar.row(i); 17 | for (int j = i + 1; j < n; ++j){ 18 | C(l) = sum(abs(Arow - Ar.row(j))); 19 | l++; 20 | } 21 | } 22 | 23 | return C; 24 | } 25 | 26 | // [[Rcpp::export]] 27 | NumericMatrix manhattan_pdist(NumericMatrix A) { 28 | int n = A.nrow(), k = A.ncol(); 29 | 30 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 31 | 32 | arma::mat C(n, n); 33 | 34 | for (int i = 0; i < n; ++i){ 35 | arma::mat Arow = Ar.row(i); 36 | for (int j = 0; j < n; ++j){ 37 | C(i, j) = sum(abs(Arow - Ar.row(j))); 38 | } 39 | } 40 | 41 | return wrap(C); 42 | } 43 | 44 | // [[Rcpp::export]] 45 | NumericMatrix manhattan_cdist(NumericMatrix A, NumericMatrix B) { 46 | int m = A.nrow(), n = B.nrow(), k = A.ncol(); 47 | 48 | arma::mat Ar = arma::mat(A.begin(), m, k, false); 49 | arma::mat Br = arma::mat(B.begin(), n, k, false); 50 | 51 | arma::mat C(m, n); 52 | 53 | for (int i = 0; i < m; ++i){ 54 | arma::mat Arow = Ar.row(i); 55 | for (int j = 0; j < n; ++j){ 56 | C(i, j) = sum(abs(Arow - Br.row(j))); 57 | } 58 | } 59 | 60 | return wrap(C); 61 | } -------------------------------------------------------------------------------- /src/manhattan.h: -------------------------------------------------------------------------------- 1 | #ifndef MANHATTAN_ 2 | #define MANHATTAN_ 3 | 4 | #include 5 | 6 | Rcpp::NumericVector manhattan_rdist(Rcpp::NumericMatrix A); 7 | Rcpp::NumericMatrix manhattan_pdist(Rcpp::NumericMatrix A); 8 | Rcpp::NumericMatrix manhattan_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B); 9 | 10 | #endif // MANHATTAN_ -------------------------------------------------------------------------------- /src/maximum.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | NumericVector maximum_rdist(NumericMatrix A) { 8 | int n = A.nrow(), k = A.ncol(); 9 | 10 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 11 | 12 | NumericVector C(n * (n-1) / 2); 13 | 14 | int l = 0; 15 | for (int i = 0; i < n; ++i){ 16 | arma::mat Arow = Ar.row(i); 17 | for (int j = i + 1; j < n; ++j){ 18 | C(l) = max(abs(Arow - Ar.row(j))); 19 | l++; 20 | } 21 | } 22 | 23 | return C; 24 | } 25 | 26 | // [[Rcpp::export]] 27 | NumericMatrix maximum_pdist(NumericMatrix A) { 28 | int n = A.nrow(), k = A.ncol(); 29 | 30 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 31 | 32 | arma::mat C(n, n); 33 | 34 | for (int i = 0; i < n; ++i){ 35 | arma::mat Arow = Ar.row(i); 36 | for (int j = 0; j < n; ++j){ 37 | C(i, j) = max(abs(Arow - Ar.row(j))); 38 | } 39 | } 40 | 41 | return wrap(C); 42 | } 43 | 44 | // [[Rcpp::export]] 45 | NumericMatrix maximum_cdist(NumericMatrix A, NumericMatrix B) { 46 | int m = A.nrow(), n = B.nrow(), k = A.ncol(); 47 | 48 | arma::mat Ar = arma::mat(A.begin(), m, k, false); 49 | arma::mat Br = arma::mat(B.begin(), n, k, false); 50 | 51 | arma::mat C(m, n); 52 | 53 | for (int i = 0; i < m; ++i){ 54 | arma::mat Arow = Ar.row(i); 55 | for (int j = 0; j < n; ++j){ 56 | C(i, j) = max(abs(Arow - Br.row(j))); 57 | } 58 | } 59 | 60 | return wrap(C); 61 | } -------------------------------------------------------------------------------- /src/maximum.h: -------------------------------------------------------------------------------- 1 | #ifndef MAXIMUM_ 2 | #define MAXIMUM_ 3 | 4 | #include 5 | 6 | Rcpp::NumericVector maximum_rdist(Rcpp::NumericMatrix A); 7 | Rcpp::NumericMatrix maximum_pdist(Rcpp::NumericMatrix A); 8 | Rcpp::NumericMatrix maximum_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B); 9 | 10 | #endif // MAXIMUM_ -------------------------------------------------------------------------------- /src/minkowski.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | NumericVector minkowski_rdist(NumericMatrix A, double p) { 8 | int n = A.nrow(), k = A.ncol(); 9 | 10 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 11 | 12 | arma::mat C(1, n * (n-1) / 2); 13 | 14 | int l = 0; 15 | for (int i = 0; i < n; ++i){ 16 | arma::mat Arow = Ar.row(i); 17 | for (int j = i + 1; j < n; ++j){ 18 | C(l) = sum(pow(abs(Arow - Ar.row(j)), p)); 19 | l++; 20 | } 21 | } 22 | 23 | return wrap(pow(C, 1/p)); 24 | } 25 | 26 | // [[Rcpp::export]] 27 | NumericMatrix minkowski_pdist(NumericMatrix A, double p) { 28 | int n = A.nrow(), k = A.ncol(); 29 | 30 | arma::mat Ar = arma::mat(A.begin(), n, k, false); 31 | 32 | arma::mat C(n, n); 33 | 34 | for (int i = 0; i < n; ++i){ 35 | arma::mat Arow = Ar.row(i); 36 | for (int j = 0; j < n; ++j){ 37 | C(i, j) = sum(pow(abs(Arow - Ar.row(j)), p)); 38 | } 39 | } 40 | 41 | return wrap(pow(C, 1/p)); 42 | } 43 | 44 | // [[Rcpp::export]] 45 | NumericMatrix minkowski_cdist(NumericMatrix A, NumericMatrix B, double p) { 46 | int m = A.nrow(), n = B.nrow(), k = A.ncol(); 47 | 48 | arma::mat Ar = arma::mat(A.begin(), m, k, false); 49 | arma::mat Br = arma::mat(B.begin(), n, k, false); 50 | 51 | arma::mat C(m, n); 52 | 53 | for (int i = 0; i < m; ++i){ 54 | arma::mat Arow = Ar.row(i); 55 | for (int j = 0; j < n; ++j){ 56 | C(i, j) = sum(pow(abs(Arow - Br.row(j)), p)); 57 | } 58 | } 59 | 60 | return wrap(pow(C, 1/p)); 61 | } 62 | -------------------------------------------------------------------------------- /src/minkowski.h: -------------------------------------------------------------------------------- 1 | #ifndef MINKOWSKI_ 2 | #define MINKOWSKI_ 3 | 4 | #include 5 | 6 | Rcpp::NumericVector minkowski_rdist(Rcpp::NumericMatrix A, double p); 7 | Rcpp::NumericMatrix minkowski_pdist(Rcpp::NumericMatrix A, double p); 8 | Rcpp::NumericMatrix minkowski_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B, double p); 9 | 10 | #endif // MINKOWSKI_ 11 | -------------------------------------------------------------------------------- /src/triangle.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // [[Rcpp::export]] 4 | bool cpp_triangle_inequality(Rcpp::NumericMatrix mat, double tolerance = 0) { 5 | int n = mat.nrow(); 6 | 7 | for (int i = 0; i < n; ++i){ 8 | for (int j = 0; j < i; ++j){ 9 | for (int k = 0; k < n; ++k){ 10 | bool ieq = mat(i, j) <= mat(i, k) + mat(k, j) + tolerance; 11 | if (!ieq){ 12 | Rcpp::Rcout << "mat[" << i+1 << ", " << j+1 << "] > mat[" << i+1 << ", " << k+1 << "] + mat[" << k+1 << ", " << j+1 << "]" << std::endl; 13 | return false; 14 | } 15 | } 16 | } 17 | } 18 | 19 | return true; 20 | } -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(rdist) 3 | 4 | test_check("rdist") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-canberra.R: -------------------------------------------------------------------------------- 1 | context("canberra") 2 | 3 | test_that("canberra metric works as expected", { 4 | x <- matrix(sample(1:6, 200, replace = TRUE), nrow = 100) 5 | 6 | dist_dist <- dist(x, method = "canberra") 7 | dist_mat <- as.matrix(dist_dist) 8 | attr(dist_mat, "dimnames") <- NULL 9 | # check pdist and cdist 10 | expect_equivalent(dist_dist, rdist(x, metric = "canberra")) 11 | expect_equivalent(dist_mat, pdist(x, metric = "canberra")) 12 | expect_equivalent(dist_mat, cdist(x, x, metric = "canberra")) 13 | expect_equivalent(dist_mat[1:2, 3:100], 14 | cdist(x[1:2, ], x[3:100, ], metric = "canberra")) 15 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 16 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "canberra")) 17 | }) 18 | -------------------------------------------------------------------------------- /tests/testthat/test-correlation.R: -------------------------------------------------------------------------------- 1 | context("correlation") 2 | 3 | bound <- function(x, min = -1, max = 1){ 4 | x[x > max] <- max 5 | x[x < min] <- min 6 | x 7 | } 8 | 9 | test_that("correlation metric works as expected", { 10 | x <- matrix(runif(200), nrow = 100) 11 | 12 | # reference results 13 | dist_mat <- sqrt((1 - bound(cor(t(x))))/2) 14 | dist_dist <- as.dist(dist_mat) 15 | # check pdist and cdist 16 | expect_equal(dist_dist, rdist(x, metric = "correlation"), tolerance = 1e-7, check.attributes = FALSE) 17 | expect_equal(dist_mat, pdist(x, metric = "correlation"), tolerance = 1e-7) 18 | expect_equal(dist_mat, cdist(x, x, metric = "correlation"), tolerance = 1e-7) 19 | expect_equal(dist_mat[1:2, 3:100], cdist(x[1:2, ], x[3:100, ], metric = "correlation"), tolerance = 1e-7) 20 | expect_equal(dist_mat[1, 2:100, drop = FALSE], cdist(x[1, , drop = FALSE], x[2:100, ], metric = "correlation"), tolerance = 1e-7) 21 | }) 22 | 23 | test_that("angular metric works as expected", { 24 | x <- matrix(runif(200), nrow = 100) 25 | 26 | # reference results 27 | dist_mat <- acos(bound(cor(t(x)))) 28 | dist_dist <- as.dist(dist_mat) 29 | # check pdist and cdist 30 | expect_equal(dist_dist, rdist(x, metric = "angular"), tolerance = 1e-7, check.attributes = FALSE) 31 | expect_equal(dist_mat, pdist(x, metric = "angular"), tolerance = 1e-7) 32 | expect_equal(dist_mat, cdist(x, x, metric = "angular"), tolerance = 1e-7) 33 | expect_equal(dist_mat[1:2, 3:100], cdist(x[1:2, ], x[3:100, ], metric = "angular"), tolerance = 1e-7) 34 | expect_equal(dist_mat[1, 2:100, drop = FALSE], cdist(x[1, , drop = FALSE], x[2:100, ], metric = "angular"), tolerance = 1e-7) 35 | }) 36 | 37 | test_that("absolute correlation metric works as expected", { 38 | x <- matrix(runif(200), nrow = 100) 39 | 40 | # reference results 41 | dist_mat <- sqrt((1 - bound(cor(t(x)))^2)) 42 | dist_dist <- as.dist(dist_mat) 43 | # check pdist and cdist 44 | expect_equal(dist_dist, rdist(x, metric = "absolute_correlation"), tolerance = 1e-7, check.attributes = FALSE) 45 | expect_equal(dist_mat, pdist(x, metric = "absolute_correlation"), tolerance = 1e-7) 46 | expect_equal(dist_mat, cdist(x, x, metric = "absolute_correlation"), tolerance = 1e-7) 47 | expect_equal(dist_mat[1:2, 3:100], cdist(x[1:2, ], x[3:100, ], metric = "absolute_correlation"), tolerance = 1e-7) 48 | expect_equal(dist_mat[1, 2:100, drop = FALSE], cdist(x[1, , drop = FALSE], x[2:100, ], metric = "absolute_correlation"), tolerance = 1e-7) 49 | }) 50 | -------------------------------------------------------------------------------- /tests/testthat/test-euclidean.R: -------------------------------------------------------------------------------- 1 | context("euclidean") 2 | 3 | test_that("euclidean metric works as expected", { 4 | x <- matrix(runif(200), nrow = 100) 5 | 6 | # reference results 7 | dist_dist <- dist(x) 8 | dist_mat <- as.matrix(dist_dist) 9 | attr(dist_mat, "dimnames") <- NULL 10 | # check pdist and cdist 11 | expect_equivalent(dist_dist, rdist(x)) 12 | expect_equivalent(dist(rbind(x[1:5, ], x[1:5,])), rdist(rbind(x[1:5, ], x[1:5,]))) 13 | expect_equivalent(dist_mat, pdist(x)) 14 | expect_equivalent(dist_mat, cdist(x, x)) 15 | expect_equivalent(dist_mat[1:2, 3:100], cdist(x[1:2, ], x[3:100, ])) 16 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], cdist(x[1, , drop = FALSE], x[2:100, ])) 17 | }) 18 | 19 | test_that("euclidean metric works when distance is 0", { 20 | a <- c(-1.18541558816627, -1.37865753930762, -0.413418072278032, -0.327468637225544) 21 | b <- rnorm(4) 22 | amat <- matrix(c(a, a), nrow = 2, byrow = TRUE) 23 | bmat <- matrix(c(b, b), nrow = 2, byrow = TRUE) 24 | 25 | expect_equivalent( 26 | dist(amat, method="euclidean"), 27 | rdist::rdist(amat, metric="euclidean") 28 | ) 29 | expect_equivalent( 30 | dist(amat, method = "euclidean"), 31 | rdist::rdist(amat, metric="minkowski") 32 | ) 33 | expect_equivalent( 34 | dist(bmat, method="euclidean"), 35 | rdist::rdist(bmat, metric="euclidean") 36 | ) 37 | expect_equivalent( 38 | dist(bmat, method = "euclidean"), 39 | rdist::rdist(bmat, metric="minkowski") 40 | ) 41 | }) 42 | -------------------------------------------------------------------------------- /tests/testthat/test-hamming.R: -------------------------------------------------------------------------------- 1 | context("hamming") 2 | 3 | test_that("hamming metric works as expected", { 4 | # generate data 5 | x <- matrix(sample(1:5, 200, replace = TRUE), nrow = 100) 6 | 7 | # reference result 8 | n <- nrow(x) 9 | m <- matrix(0, nrow=n, ncol=n) 10 | for(i in seq_len(n - 1)) 11 | for(j in seq(i, n)) 12 | m[j, i] <- m[i, j] <- sum(x[i,] != x[j,]) 13 | dist_mat <- m/ncol(x) 14 | dist_dist <- as.dist(dist_mat) 15 | 16 | # check pdist and cdist 17 | expect_equivalent(dist_dist, rdist(x, metric = "hamming")) 18 | expect_equivalent(dist_mat, pdist(x, metric = "hamming")) 19 | expect_equivalent(dist_mat, cdist(x, x, metric = "hamming")) 20 | expect_equivalent(dist_mat[1:2, 3:100], 21 | cdist(x[1:2, ], x[3:100, ], metric = "hamming")) 22 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 23 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "hamming")) 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test-jaccard.R: -------------------------------------------------------------------------------- 1 | context("jaccard") 2 | 3 | test_that("jaccard metric works as expected", { 4 | # generate data 5 | x <- matrix(sample(0:5, 200, replace = TRUE), nrow = 100) 6 | 7 | # reference result 8 | n <- nrow(x) 9 | m <- matrix(0, nrow=n, ncol=n) 10 | for(i in seq_len(n - 1)) 11 | for(j in seq(i, n)){ 12 | if (all(x[i, ] == x[j, ])) { 13 | m[j, i] <- m[i, j] <- 0 14 | } 15 | else { 16 | m[j, i] <- m[i, j] <- sum(x[i,] != x[j,]) / sum(x[i, ] != 0 | x[j, ] != 0) 17 | } 18 | } 19 | dist_mat <- m 20 | dist_dist <- as.dist(dist_mat) 21 | 22 | # check pdist and cdist 23 | expect_equivalent(dist_dist, rdist(x, metric = "jaccard")) 24 | expect_equivalent(dist_mat, pdist(x, metric = "jaccard")) 25 | expect_equivalent(dist_mat, cdist(x, x, metric = "jaccard")) 26 | expect_equivalent(dist_mat[1:2, 3:100], 27 | cdist(x[1:2, ], x[3:100, ], metric = "jaccard")) 28 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 29 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "jaccard")) 30 | }) 31 | -------------------------------------------------------------------------------- /tests/testthat/test-manhattan.R: -------------------------------------------------------------------------------- 1 | context("manhattan") 2 | 3 | test_that("manhattan metric works as expected", { 4 | x <- matrix(rnorm(200), nrow = 100) 5 | 6 | dist_dist <- dist(x, method = "manhattan") 7 | dist_mat <- as.matrix(dist_dist) 8 | attr(dist_mat, "dimnames") <- NULL 9 | # check pdist and cdist 10 | expect_equivalent(dist_dist, rdist(x, metric = "manhattan")) 11 | expect_equivalent(dist_mat, pdist(x, metric = "manhattan")) 12 | expect_equivalent(dist_mat, cdist(x, x, metric = "manhattan")) 13 | expect_equivalent(dist_mat[1:2, 3:100], 14 | cdist(x[1:2, ], x[3:100, ], metric = "manhattan")) 15 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 16 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "manhattan")) 17 | }) 18 | -------------------------------------------------------------------------------- /tests/testthat/test-maximum.R: -------------------------------------------------------------------------------- 1 | context("maximum") 2 | 3 | test_that("maximum metric works as expected", { 4 | x <- matrix(rnorm(200), nrow = 100) 5 | 6 | dist_dist <- dist(x, method = "maximum") 7 | dist_mat <- as.matrix(dist_dist) 8 | attr(dist_mat, "dimnames") <- NULL 9 | # check pdist and cdist 10 | expect_equivalent(dist_dist, rdist(x, metric = "maximum")) 11 | expect_equivalent(dist_mat, pdist(x, metric = "maximum")) 12 | expect_equivalent(dist_mat, cdist(x, x, metric = "maximum")) 13 | expect_equivalent(dist_mat[1:2, 3:100], 14 | cdist(x[1:2, ], x[3:100, ], metric = "maximum")) 15 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 16 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "maximum")) 17 | }) 18 | -------------------------------------------------------------------------------- /tests/testthat/test-minkowski.R: -------------------------------------------------------------------------------- 1 | context("minkowski") 2 | 3 | test_that("minkowski metric works as expected", { 4 | x <- matrix(rnorm(200), nrow = 100) 5 | 6 | # p = 2 7 | p <- 2 8 | dist_dist <- dist(x, method = "minkowski", p = p) 9 | dist_mat <- as.matrix(dist_dist) 10 | attr(dist_mat, "dimnames") <- NULL 11 | # check pdist and cdist 12 | expect_equivalent(dist_dist, rdist(x, metric = "minkowski", p = p)) 13 | expect_equivalent(dist_mat, pdist(x, metric = "minkowski", p = p)) 14 | expect_equivalent(dist_mat, cdist(x, x, metric = "minkowski", p = p)) 15 | expect_equivalent(dist_mat[1:2, 3:100], 16 | cdist(x[1:2, ], x[3:100, ], metric = "minkowski", p = p)) 17 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 18 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "minkowski", p = p)) 19 | 20 | # p = 1: 21 | p <- 1 22 | dist_dist <- dist(x, method = "minkowski", p = p) 23 | dist_mat <- as.matrix(dist_dist) 24 | attr(dist_mat, "dimnames") <- NULL 25 | # check pdist and cdist 26 | expect_equivalent(dist_dist, rdist(x, metric = "minkowski", p = p)) 27 | expect_equivalent(dist_mat, pdist(x, metric = "minkowski", p = p)) 28 | expect_equivalent(dist_mat, cdist(x, x, metric = "minkowski", p = p)) 29 | expect_equivalent(dist_mat[1:2, 3:100], 30 | cdist(x[1:2, ], x[3:100, ], metric = "minkowski", p = p)) 31 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 32 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "minkowski", p = p)) 33 | 34 | # p = 100: 35 | p <- 100 36 | dist_dist <- dist(x, method = "minkowski", p = p) 37 | dist_mat <- as.matrix(dist_dist) 38 | attr(dist_mat, "dimnames") <- NULL 39 | # check pdist and cdist 40 | expect_equivalent(dist_dist, rdist(x, metric = "minkowski", p = p)) 41 | expect_equivalent(dist_mat, pdist(x, metric = "minkowski", p = p)) 42 | expect_equivalent(dist_mat, cdist(x, x, metric = "minkowski", p = p)) 43 | expect_equivalent(dist_mat[1:2, 3:100], 44 | cdist(x[1:2, ], x[3:100, ], metric = "minkowski", p = p)) 45 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 46 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "minkowski", p = p)) 47 | }) 48 | -------------------------------------------------------------------------------- /tests/testthat/test-user-defined.R: -------------------------------------------------------------------------------- 1 | context("minkowski") 2 | 3 | test_that("user-defined metric works as expected", { 4 | x <- matrix(rnorm(200), nrow = 100) 5 | 6 | # euclidean metric 7 | euclidean_function <- function(x, y){ 8 | sqrt(sum((x-y)^2)) 9 | } 10 | dist_dist <- dist(x, method = "euclidean") 11 | dist_mat <- as.matrix(dist_dist) 12 | attr(dist_mat, "dimnames") <- NULL 13 | # check pdist and cdist 14 | expect_equivalent(dist_dist, rdist(x, metric = euclidean_function)) 15 | expect_equivalent(dist_mat, pdist(x, metric = euclidean_function)) 16 | expect_equivalent(dist_mat, cdist(x, x, metric = euclidean_function)) 17 | expect_equivalent(dist_mat[1:2, 3:100], 18 | cdist(x[1:2, ], x[3:100, ], metric = euclidean_function)) 19 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 20 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = euclidean_function)) 21 | 22 | # manhattan metric 23 | manhattan_function <- function(x, y){ 24 | sum(abs(x-y)) 25 | } 26 | dist_dist <- dist(x, method = "manhattan") 27 | dist_mat <- as.matrix(dist_dist) 28 | attr(dist_mat, "dimnames") <- NULL 29 | # check pdist and cdist 30 | expect_equivalent(dist_dist, rdist(x, metric = manhattan_function)) 31 | expect_equivalent(dist_mat, pdist(x, metric = manhattan_function)) 32 | expect_equivalent(dist_mat, cdist(x, x, metric = manhattan_function)) 33 | expect_equivalent(dist_mat[1:2, 3:100], 34 | cdist(x[1:2, ], x[3:100, ], metric = manhattan_function)) 35 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 36 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = manhattan_function)) 37 | 38 | # maximum metric 39 | maximum_function <- function(x, y){ 40 | max(abs(x-y)) 41 | } 42 | dist_dist <- dist(x, method = "maximum") 43 | dist_mat <- as.matrix(dist_dist) 44 | attr(dist_mat, "dimnames") <- NULL 45 | # check pdist and cdist 46 | expect_equivalent(dist_dist, rdist(x, metric = maximum_function)) 47 | expect_equivalent(dist_mat, pdist(x, metric = maximum_function)) 48 | expect_equivalent(dist_mat, cdist(x, x, metric = maximum_function)) 49 | expect_equivalent(dist_mat[1:2, 3:100], 50 | cdist(x[1:2, ], x[3:100, ], metric = maximum_function)) 51 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], 52 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = maximum_function)) 53 | }) 54 | --------------------------------------------------------------------------------