├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
├── FUN_distances.R
├── RcppExports.R
├── correlation_distances.R
├── distance_functions.r
├── farthest_point_sampling.R
├── is_metric.R
├── product_metric.R
└── rdist-package.r
├── README.md
├── docs
├── authors.html
├── docsearch.css
├── index.html
├── jquery.sticky-kit.min.js
├── link.svg
├── news
│ └── index.html
├── pkgdown.css
├── pkgdown.js
├── pkgdown.yml
└── reference
│ ├── farthest_point_sampling.html
│ ├── index.html
│ ├── is_metric.html
│ ├── product_metric.html
│ └── rdist.html
├── man
├── farthest_point_sampling.Rd
├── is_metric.Rd
├── product_metric.Rd
└── rdist.Rd
├── src
├── .gitignore
├── Makevars
├── Makevars.win
├── RcppExports.cpp
├── canberra.cpp
├── canberra.h
├── dist.cpp
├── dist.h
├── euclidean.cpp
├── farthest_point_sampling.cpp
├── hamming.cpp
├── hamming.h
├── init.c
├── jaccard.cpp
├── jaccard.h
├── manhattan.cpp
├── manhattan.h
├── maximum.cpp
├── maximum.h
├── minkowski.cpp
├── minkowski.h
└── triangle.cpp
└── tests
├── testthat.R
└── testthat
├── test-canberra.R
├── test-correlation.R
├── test-euclidean.R
├── test-hamming.R
├── test-jaccard.R
├── test-manhattan.R
├── test-maximum.R
├── test-minkowski.R
└── test-user-defined.R
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | docs
2 | ^\.travis\.yml$
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Session Data files
6 | .RData
7 |
8 | # Example code in package build process
9 | *-Ex.R
10 |
11 | # Output files from R CMD build
12 | /*.tar.gz
13 |
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 |
17 | # RStudio files
18 | .Rproj.user/
19 |
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 |
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 |
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 |
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 |
3 | language: R
4 | sudo: false
5 | cache: packages
6 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: rdist
2 | Title: Calculate Pairwise Distances
3 | Version: 0.0.6
4 | Authors@R: person("Nello", "Blaser", email = "nello.blaser@uib.no", role = c("aut", "cre"))
5 | Description: A common framework for calculating distance matrices.
6 | Depends:
7 | R (>= 3.2.2)
8 | License: GPL
9 | URL: https://github.com/blasern/rdist
10 | BugReports: https://github.com/blasern/rdist/issues
11 | Encoding: UTF-8
12 | LinkingTo: Rcpp, RcppArmadillo
13 | Imports:
14 | Rcpp, methods
15 | RoxygenNote: 7.2.3
16 | Suggests: testthat
17 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(cdist)
4 | export(farthest_point_sampling)
5 | export(is_distance_matrix)
6 | export(pdist)
7 | export(product_metric)
8 | export(rdist)
9 | export(triangle_inequality)
10 | importFrom(Rcpp,sourceCpp)
11 | importFrom(methods,is)
12 | importFrom(stats,as.dist)
13 | importFrom(stats,cor)
14 | useDynLib(rdist, .registration = TRUE)
15 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # rdist 0.0.6
2 | - Minor fix for CRAN
3 |
4 | # rdist 0.0.5
5 | - Minor fix for CRAN
6 |
7 | # rdist 0.0.4
8 | - Some bugfixes to make CRAN compatible
9 |
10 | # rdist 0.0.3
11 | - Added documentation pages
12 | - Added `product_metric`
13 | - Added `farthest_point_sampling`
14 | - Added `is_distance_matrix` and `triangle_inequality` checks
15 |
16 | # rdist 0.0.2
17 | - Fixed bug in jaccard distance
18 | - Fixed bug in euclidean distance
19 | - Added labels to rdist output
20 |
21 | # rdist 0.0.1
22 | An R package to calculate distances. This provide a common framework to calculate distances.
23 | There are three main functions:
24 | - `rdist` computes the pairwise distances between observations in one matrix and returns a `dist` object,
25 | - `pdist` computes the pairwise distances between observations in one matrix and returns a `matrix`, and
26 | - `cdist` computes the distances between observations in two matrices and returns a `matrix`.
27 |
28 | All functions have an argument `metric` that can be used to specify the distance function. Available metrics are `"euclidean"`, `"minkowski"`, `"manhattan"`, `"maximum"`, `"canberra"`, `"angular"`, `"correlation"`, `"absolute_correlation"`, `"hamming"`, and `"jaccard"`.
29 | All functions will return NA or NaN when one of the compared vectors contains NAs.
30 |
31 | ## Installation
32 |
33 | To install the latest released version from CRAN:
34 |
35 | install.packages("rdist")
36 |
37 | To install the latest development version from github:
38 |
39 | install.packages("devtools")
40 | devtools::install_github("blasern/rdist")
41 |
42 | ## Development
43 |
44 | If you find issues, please [let me know](https://github.com/blasern/rdist/issues).
45 | If you would like to contribute, please [create a pull request](https://github.com/blasern/rdist/compare).
46 |
--------------------------------------------------------------------------------
/R/FUN_distances.R:
--------------------------------------------------------------------------------
1 | # distances with user-defined functions
2 | FUN_rdist <- function(X, metric = function(x, y) sqrt(sum((x-y)^2))){
3 | as.dist(FUN_pdist(X, metric = metric))
4 | }
5 |
6 | FUN_pdist <- function(X, metric = function(x, y) sqrt(sum((x-y)^2))){
7 | res <- apply(X, 1, function(x){
8 | apply(X, 1, function(y){
9 | metric(x, y)
10 | })
11 | })
12 | # fixing dimensions
13 | dim(res) <- rep(nrow(X), 2)
14 | res
15 | }
16 |
17 | FUN_cdist <- function(X, Y, metric = function(x, y) sqrt(sum((x-y)^2))){
18 | res <- apply(Y, 1, function(y){
19 | apply(X, 1, function(x){
20 | metric(x, y)
21 | })
22 | })
23 | # fixing dimensions
24 | dim(res) <- c(nrow(X), nrow(Y))
25 | res
26 | }
27 |
--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 |
4 | canberra_rdist <- function(A) {
5 | .Call(`_rdist_canberra_rdist`, A)
6 | }
7 |
8 | canberra_pdist <- function(A) {
9 | .Call(`_rdist_canberra_pdist`, A)
10 | }
11 |
12 | canberra_cdist <- function(A, B) {
13 | .Call(`_rdist_canberra_cdist`, A, B)
14 | }
15 |
16 | rdist_cpp <- function(A, metric, p = 2.0) {
17 | .Call(`_rdist_rdist_cpp`, A, metric, p)
18 | }
19 |
20 | pdist_cpp <- function(A, metric, p = 2.0) {
21 | .Call(`_rdist_pdist_cpp`, A, metric, p)
22 | }
23 |
24 | cdist_cpp <- function(A, B, metric, p = 2.0) {
25 | .Call(`_rdist_cdist_cpp`, A, B, metric, p)
26 | }
27 |
28 | euclidean_rdist <- function(A) {
29 | .Call(`_rdist_euclidean_rdist`, A)
30 | }
31 |
32 | euclidean_pdist <- function(A) {
33 | .Call(`_rdist_euclidean_pdist`, A)
34 | }
35 |
36 | euclidean_cdist <- function(A, B) {
37 | .Call(`_rdist_euclidean_cdist`, A, B)
38 | }
39 |
40 | farthest_point_sampling_cpp <- function(mat, metric, k, initial_point_index = 0L, return_clusters = FALSE) {
41 | .Call(`_rdist_farthest_point_sampling_cpp`, mat, metric, k, initial_point_index, return_clusters)
42 | }
43 |
44 | hamming_rdist <- function(A) {
45 | .Call(`_rdist_hamming_rdist`, A)
46 | }
47 |
48 | hamming_pdist <- function(A) {
49 | .Call(`_rdist_hamming_pdist`, A)
50 | }
51 |
52 | hamming_cdist <- function(A, B) {
53 | .Call(`_rdist_hamming_cdist`, A, B)
54 | }
55 |
56 | jaccard_rdist <- function(A) {
57 | .Call(`_rdist_jaccard_rdist`, A)
58 | }
59 |
60 | jaccard_pdist <- function(A) {
61 | .Call(`_rdist_jaccard_pdist`, A)
62 | }
63 |
64 | jaccard_cdist <- function(A, B) {
65 | .Call(`_rdist_jaccard_cdist`, A, B)
66 | }
67 |
68 | manhattan_rdist <- function(A) {
69 | .Call(`_rdist_manhattan_rdist`, A)
70 | }
71 |
72 | manhattan_pdist <- function(A) {
73 | .Call(`_rdist_manhattan_pdist`, A)
74 | }
75 |
76 | manhattan_cdist <- function(A, B) {
77 | .Call(`_rdist_manhattan_cdist`, A, B)
78 | }
79 |
80 | maximum_rdist <- function(A) {
81 | .Call(`_rdist_maximum_rdist`, A)
82 | }
83 |
84 | maximum_pdist <- function(A) {
85 | .Call(`_rdist_maximum_pdist`, A)
86 | }
87 |
88 | maximum_cdist <- function(A, B) {
89 | .Call(`_rdist_maximum_cdist`, A, B)
90 | }
91 |
92 | minkowski_rdist <- function(A, p) {
93 | .Call(`_rdist_minkowski_rdist`, A, p)
94 | }
95 |
96 | minkowski_pdist <- function(A, p) {
97 | .Call(`_rdist_minkowski_pdist`, A, p)
98 | }
99 |
100 | minkowski_cdist <- function(A, B, p) {
101 | .Call(`_rdist_minkowski_cdist`, A, B, p)
102 | }
103 |
104 | cpp_triangle_inequality <- function(mat, tolerance = 0) {
105 | .Call(`_rdist_cpp_triangle_inequality`, mat, tolerance)
106 | }
107 |
108 |
--------------------------------------------------------------------------------
/R/correlation_distances.R:
--------------------------------------------------------------------------------
1 | bound <- function(x, min = -1, max = 1){
2 | x[x > max] <- max
3 | x[x < min] <- min
4 | x
5 | }
6 |
7 | # correlation distances
8 | correlation_rdist <- function(X){
9 | stats::as.dist(sqrt((1 - bound(stats::cor(t(X))))/2))
10 | }
11 | correlation_pdist <- function(X){
12 | sqrt((1 - bound(stats::cor(t(X))))/2)
13 | }
14 | correlation_cdist <- function(X, Y){
15 | sqrt((1 - bound(stats::cor(t(X), t(Y))))/2)
16 | }
17 |
18 | # angular distances
19 | angular_rdist <- function(X){
20 | stats::as.dist(acos(bound(stats::cor(t(X)))))
21 | }
22 | angular_pdist <- function(X){
23 | acos(bound(stats::cor(t(X))))
24 | }
25 | angular_cdist <- function(X, Y){
26 | acos(bound(stats::cor(t(X), t(Y))))
27 | }
28 |
29 | # absolute correlation distances
30 | absolute_correlation_rdist <- function(X){
31 | stats::as.dist(sqrt(1 - bound(stats::cor(t(X))) ^ 2))
32 | }
33 | absolute_correlation_pdist <- function(X){
34 | sqrt(1 - bound(stats::cor(t(X))) ^ 2)
35 | }
36 | absolute_correlation_cdist <- function(X, Y){
37 | sqrt(1 - bound(stats::cor(t(X), t(Y))) ^ 2)
38 | }
--------------------------------------------------------------------------------
/R/distance_functions.r:
--------------------------------------------------------------------------------
1 | available_metrics <- c("euclidean", "minkowski", "manhattan",
2 | "chebyshev", "maximum", "canberra",
3 | "angular", "correlation", "absolute_correlation",
4 | "hamming", "jaccard", "user")
5 |
6 | #' @rdname rdist
7 | #' @export
8 | rdist <- function(X,
9 | metric = "euclidean",
10 | p = 2L){
11 | if (is.function(metric)) {
12 | FUN <- metric
13 | metric <- "user"
14 | }
15 | # make sure input is well-defined
16 | metric <- match.arg(metric, available_metrics)
17 | X <- as.matrix(X)
18 | # use metric
19 | ans <- switch(metric,
20 | "euclidean" = minkowski_rdist(X, p = 2L),
21 | "minkowski" = minkowski_rdist(X, p = p),
22 | "manhattan" = manhattan_rdist(X),
23 | "chebyshev" = maximum_rdist(X),
24 | "maximum" = maximum_rdist(X),
25 | "canberra" = canberra_rdist(X),
26 | "angular" = angular_rdist(X),
27 | "correlation" = correlation_rdist(X),
28 | "absolute_correlation" = absolute_correlation_rdist(X),
29 | "hamming" = hamming_rdist(X),
30 | "jaccard" = jaccard_rdist(X),
31 | "user" = FUN_rdist(X, metric = FUN))
32 | # change attributes
33 | attributes(ans) <- NULL
34 | attr(ans, "Size") <- nrow(X)
35 | attr(ans, "call") <- match.call()
36 | attr(ans, "method") <- metric
37 | attr(ans, "Labels") <- dimnames(X)[[1L]]
38 | class(ans) <- "dist"
39 | return(ans)
40 | }
41 |
42 | #' @rdname rdist
43 | #' @export
44 | pdist <- function(X,
45 | metric = "euclidean",
46 | p = 2){
47 | if (is.function(metric)) {
48 | FUN <- metric
49 | metric <- "user"
50 | }
51 | # make sure input is well-defined
52 | metric <- match.arg(metric, available_metrics)
53 | X <- as.matrix(X)
54 | # use metric
55 | switch(metric,
56 | "euclidean" = minkowski_pdist(X, p = 2L),
57 | "minkowski" = minkowski_pdist(X, p = p),
58 | "manhattan" = manhattan_pdist(X),
59 | "chebyshev" = maximum_pdist(X),
60 | "maximum" = maximum_pdist(X),
61 | "canberra" = canberra_pdist(X),
62 | "angular" = angular_pdist(X),
63 | "correlation" = correlation_pdist(X),
64 | "absolute_correlation" = absolute_correlation_pdist(X),
65 | "hamming" = hamming_pdist(X),
66 | "jaccard" = jaccard_pdist(X),
67 | "user" = FUN_pdist(X, metric = FUN))
68 | }
69 |
70 | #' @rdname rdist
71 | #' @export
72 | cdist <- function(X, Y,
73 | metric = "euclidean",
74 | p = 2){
75 | if (is.function(metric)) {
76 | FUN <- metric
77 | metric <- "user"
78 | }
79 | # make sure input is well-defined
80 | metric <- match.arg(metric, available_metrics)
81 | X <- as.matrix(X)
82 | Y <- as.matrix(Y)
83 | stopifnot(ncol(X) == ncol(Y))
84 | # use metric
85 | switch(metric,
86 | "euclidean" = minkowski_cdist(X, Y, p = 2L),
87 | "minkowski" = minkowski_cdist(X, Y, p = p),
88 | "manhattan" = manhattan_cdist(X, Y),
89 | "chebyshev" = maximum_cdist(X, Y),
90 | "maximum" = maximum_cdist(X, Y),
91 | "canberra" = canberra_cdist(X, Y),
92 | "angular" = angular_cdist(X, Y),
93 | "correlation" = correlation_cdist(X, Y),
94 | "absolute_correlation" = absolute_correlation_cdist(X, Y),
95 | "hamming" = hamming_cdist(X, Y),
96 | "jaccard" = jaccard_cdist(X, Y),
97 | "user" = FUN_cdist(X, Y, metric = FUN))
98 | }
--------------------------------------------------------------------------------
/R/farthest_point_sampling.R:
--------------------------------------------------------------------------------
1 | #' Farthest point sampling
2 | #'
3 | #' Farthest point sampling returns a reordering of the metric
4 | #' space P = {p_1, ..., p_k}, such that each p_i is the farthest
5 | #' point from the first i-1 points.
6 | #'
7 | #' @param mat Original distance matrix
8 | #' @param metric Distance metric to use (either "precomputed" or a metric from \code{\link{rdist}})
9 | #' @param k Number of points to sample
10 | #' @param initial_point_index Index of p_1
11 | #' @param return_clusters Should the indices of the closest farthest points be returned?
12 | #'
13 | #' @examples
14 | #'
15 | #' # generate data
16 | #' df <- matrix(runif(200), ncol = 2)
17 | #' dist_mat <- pdist(df)
18 | #' # farthest point sampling
19 | #' fps <- farthest_point_sampling(dist_mat)
20 | #' fps2 <- farthest_point_sampling(df, metric = "euclidean")
21 | #' all.equal(fps, fps2)
22 | #' # have a look at the fps distance matrix
23 | #' rdist(df[fps[1:5], ])
24 | #' dist_mat[fps, fps][1:5, 1:5]
25 | #' @export
26 | farthest_point_sampling <- function(mat,
27 | metric = "precomputed",
28 | k = nrow(mat),
29 | initial_point_index = 1L,
30 | return_clusters = FALSE){
31 | metric <- match.arg(metric, c("precomputed", available_metrics))
32 | mat <- as.matrix(mat)
33 | initial_point_index <- as.integer(initial_point_index)
34 | k <- as.integer(k)
35 | # sanity check
36 | if (metric == "precomputed"){
37 | stopifnot(nrow(mat) == ncol(mat))
38 | }
39 | stopifnot(initial_point_index >= 0L)
40 | # farthest point sampling
41 | fps <- farthest_point_sampling_cpp(mat, metric, k, initial_point_index, return_clusters)
42 | if (return_clusters){
43 | clusters <- as.integer(attr(fps, "clusters"))
44 | fps <- as.integer(fps)
45 | return(list(fps, clusters))
46 | }
47 | else {
48 | fps <- as.integer(fps)
49 | return(fps)
50 | }
51 | }
--------------------------------------------------------------------------------
/R/is_metric.R:
--------------------------------------------------------------------------------
1 | #' Metric and triangle inequality
2 | #'
3 | #' Does the distance matric come from a metric
4 | #'
5 | #' @param mat The matrix to evaluate
6 | #' @param tolerance Differences smaller than tolerance are not reported.
7 | #' @name is_metric
8 | #' @examples
9 | #' data <- matrix(rnorm(20), ncol = 2)
10 | #' dm <- pdist(data)
11 | #' is_distance_matrix(dm)
12 | #' triangle_inequality(dm)
13 | #'
14 | #' dm[1, 2] <- 1.1 * dm[1, 2]
15 | #' is_distance_matrix(dm)
16 | #' @export
17 | is_distance_matrix <- function(mat, tolerance = .Machine$double.eps ^ 0.5){
18 | mat <- as.matrix(mat)
19 | msg <- character(0)
20 | #
21 | not_square <- nrow(mat) != ncol(mat)
22 | if (not_square){
23 | msg <- c(msg, "Matrix is not square.")
24 | assymetric <- TRUE
25 | }
26 | else {
27 | assymetric <- !isTRUE(all.equal(mat, t(mat), tolerance = tolerance))
28 | if (assymetric){
29 | msg <- c(msg, "Matrix is not symmetric.")
30 | }
31 | }
32 | negative <- any(mat < 0 - tolerance)
33 | if (negative){
34 | msg <- c(msg, "Matrix is not positive.")
35 | }
36 | identity <- !isTRUE(all.equal(diag(mat), rep(0, nrow(mat))))
37 | if (identity){
38 | msg <- c(msg, "Diagonal is not zero.")
39 | }
40 | if (!(not_square | assymetric | negative | identity)){
41 | triangle <- triangle_inequality(mat, tolerance = tolerance)
42 | if (!triangle){
43 | msg <- c(msg, "Matrix does not satisfy triangle inequality.")
44 | }
45 | }
46 | dm <- length(msg) == 0
47 | if (!dm){
48 | cat(paste(msg, "\n", collapse = ""))
49 | }
50 | return(dm)
51 | }
52 |
53 | #' @rdname is_metric
54 | #' @export
55 | triangle_inequality <- function(mat, tolerance = .Machine$double.eps ^ 0.5){
56 | mat <- as.matrix(mat)
57 | stopifnot(nrow(mat) == ncol(mat), all.equal(mat, t(mat), tolerance = tolerance))
58 | cpp_triangle_inequality(mat, tolerance)
59 | }
--------------------------------------------------------------------------------
/R/product_metric.R:
--------------------------------------------------------------------------------
1 | #' Product metric
2 | #'
3 | #' Returns the p-product metric of two metric spaces.
4 | #' Works for output of `rdist`, `pdist` or `cdist`.
5 | #'
6 | #' @param ... Distance matrices or dist objects
7 | #' @param p The power of the Minkowski distance
8 | #'
9 | #' @examples
10 | #' # generate data
11 | #' df <- matrix(runif(200), ncol = 2)
12 | #' # distance matrices
13 | #' dist_mat <- pdist(df)
14 | #' dist_1 <- pdist(df[, 1])
15 | #' dist_2 <- pdist(df[, 2])
16 | #' # product distance matrix
17 | #' dist_prod <- product_metric(dist_1, dist_2)
18 | #' # check equality
19 | #' all.equal(dist_mat, dist_prod)
20 | #' @importFrom methods is
21 | #' @export
22 | product_metric <- function(..., p = 2){
23 | metrics <- list(...)
24 | # check for equal dimension
25 | stopifnot(length(unique(lapply(metrics, dim))) == 1,
26 | length(unique(lapply(metrics, length))) == 1)
27 | if (p < Inf){
28 | p_metrics <- lapply(metrics, `^`, p)
29 | p_result <- Reduce(`+`, p_metrics)
30 | res <- p_result ^ (1/p)
31 | }
32 | else {
33 | res <- pmax(...)
34 | }
35 | if (methods::is(metrics[[1]], "dist")) {
36 | attr(res, "call") <- match.call()
37 | }
38 | return(res)
39 | }
--------------------------------------------------------------------------------
/R/rdist-package.r:
--------------------------------------------------------------------------------
1 | #' rdist: an R package for distances
2 | #'
3 | #' \code{rdist} provide a common framework to calculate distances. There are three main functions:
4 | #' \itemize{
5 | #' \item \code{rdist} computes the pairwise distances between observations in one matrix and returns a \code{dist} object,
6 | #' \item \code{pdist} computes the pairwise distances between observations in one matrix and returns a \code{matrix}, and
7 | #' \item \code{cdist} computes the distances between observations in two matrices and returns a \code{matrix}.
8 | #' }
9 | #' In particular the \code{cdist} function is often missing in other distance functions. All
10 | #' calculations involving \code{NA} values will consistently return \code{NA}.
11 | #'
12 | #' @details Available distance measures are (written for two vectors v and w):
13 | #' \itemize{
14 | #' \item \code{"euclidean"}: \eqn{\sqrt{\sum_i(v_i - w_i)^2}}{sqrt(sum_i((v_i - w_i)^2))}
15 | #' \item \code{"minkowski"}: \eqn{(\sum_i|v_i - w_i|^p)^{1/p}}{(sum_i(|v_i - w_i|^p))^{1/p}}
16 | #' \item \code{"manhattan"}: \eqn{\sum_i(|v_i-w_i|)}{sum_i(|v_i-w_i|)}
17 | #' \item \code{"maximum"} or \code{"chebyshev"}: \eqn{\max_i(|v_i-w_i|)}{max_i(|v_i-w_i|)}
18 | #' \item \code{"canberra"}: \eqn{\sum_i(\frac{|v_i-w_i|}{|v_i|+|w_i|})}{sum_i(|v_i-w_i|/(|v_i|+|w_i|))}
19 | #' \item \code{"angular"}: \eqn{\cos^{-1}(cor(v, w))}{arccos(cor(v, w))}
20 | #' \item \code{"correlation"}: \eqn{\sqrt{\frac{1-cor(v, w)}{2}}}{sqrt((1-cor(v, w))/2)}
21 | #' \item \code{"absolute_correlation"}: \eqn{\sqrt{1-|cor(v, w)|^2}}{sqrt((1-|cor(v, w)|^2))}
22 | #' \item \code{"hamming"}: \eqn{(\sum_i v_i \neq w_i) / \sum_i 1}{sum_i(v_i != w_i)/sum_i(1)}
23 | #' \item \code{"jaccard"}: \eqn{(\sum_i v_i \neq w_i) / \sum_i 1_{v_i \neq 0 \cup w_i \neq 0}}{sum_i(v_i != w_i)/sum_i(v_i != 0 or w_i != 0)}
24 | #' \item Any function that defines a distance between two vectors.
25 | #' }
26 | #' @param X,Y A matrix
27 | #' @param metric The distance metric to use
28 | #' @param p The power of the Minkowski distance
29 | #' @name rdist
30 | #' @aliases rdist-package
31 | #' @docType package
32 | #' @useDynLib rdist, .registration = TRUE
33 | #' @importFrom Rcpp sourceCpp
34 | #' @importFrom stats as.dist cor
35 | NULL
36 |
37 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/blasern/rdist)
2 |
3 | # rdist
4 |
5 | An R package to calculate distances. This provide a common framework to calculate distances.
6 | There are three main functions:
7 | - `rdist` computes the pairwise distances between observations in one matrix and returns a `dist` object,
8 | - `pdist` computes the pairwise distances between observations in one matrix and returns a `matrix`, and
9 | - `cdist` computes the distances between observations in two matrices and returns a `matrix`.
10 |
11 | All functions have an argument `metric` that can be used to specify the distance function. Available metrics are `"euclidean"`, `"minkowski"`, `"manhattan"`, `"maximum"`, `"canberra"`, `"angular"`, `"correlation"`, `"absolute_correlation"`, `"hamming"`, and `"jaccard"`. In addition the metric can be any function that takes two vectors as arguments and returns their distance.
12 | All predefined functions will return NA or NaN when one of the compared vectors contains NAs.
13 |
14 | ## Installation
15 |
16 | To install the latest released version from CRAN:
17 |
18 | install.packages("rdist")
19 |
20 | To install the latest development version from github:
21 |
22 | install.packages("devtools")
23 | devtools::install_github("blasern/rdist")
24 |
25 | ## Development
26 |
27 | If you find issues, please [let me know](https://github.com/blasern/rdist/issues).
28 | If you would like to contribute, please [create a pull request](https://github.com/blasern/rdist/compare).
29 |
--------------------------------------------------------------------------------
/docs/authors.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Authors • rdist
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
93 |
94 |
95 |
96 |
99 |
100 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
--------------------------------------------------------------------------------
/docs/docsearch.css:
--------------------------------------------------------------------------------
1 | /* Docsearch -------------------------------------------------------------- */
2 | /*
3 | Source: https://github.com/algolia/docsearch/
4 | License: MIT
5 | */
6 |
7 | .algolia-autocomplete {
8 | display: block;
9 | -webkit-box-flex: 1;
10 | -ms-flex: 1;
11 | flex: 1
12 | }
13 |
14 | .algolia-autocomplete .ds-dropdown-menu {
15 | width: 100%;
16 | min-width: none;
17 | max-width: none;
18 | padding: .75rem 0;
19 | background-color: #fff;
20 | background-clip: padding-box;
21 | border: 1px solid rgba(0, 0, 0, .1);
22 | box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175);
23 | }
24 |
25 | @media (min-width:768px) {
26 | .algolia-autocomplete .ds-dropdown-menu {
27 | width: 175%
28 | }
29 | }
30 |
31 | .algolia-autocomplete .ds-dropdown-menu::before {
32 | display: none
33 | }
34 |
35 | .algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] {
36 | padding: 0;
37 | background-color: rgb(255,255,255);
38 | border: 0;
39 | max-height: 80vh;
40 | }
41 |
42 | .algolia-autocomplete .ds-dropdown-menu .ds-suggestions {
43 | margin-top: 0
44 | }
45 |
46 | .algolia-autocomplete .algolia-docsearch-suggestion {
47 | padding: 0;
48 | overflow: visible
49 | }
50 |
51 | .algolia-autocomplete .algolia-docsearch-suggestion--category-header {
52 | padding: .125rem 1rem;
53 | margin-top: 0;
54 | font-size: 1.3em;
55 | font-weight: 500;
56 | color: #00008B;
57 | border-bottom: 0
58 | }
59 |
60 | .algolia-autocomplete .algolia-docsearch-suggestion--wrapper {
61 | float: none;
62 | padding-top: 0
63 | }
64 |
65 | .algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column {
66 | float: none;
67 | width: auto;
68 | padding: 0;
69 | text-align: left
70 | }
71 |
72 | .algolia-autocomplete .algolia-docsearch-suggestion--content {
73 | float: none;
74 | width: auto;
75 | padding: 0
76 | }
77 |
78 | .algolia-autocomplete .algolia-docsearch-suggestion--content::before {
79 | display: none
80 | }
81 |
82 | .algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header {
83 | padding-top: .75rem;
84 | margin-top: .75rem;
85 | border-top: 1px solid rgba(0, 0, 0, .1)
86 | }
87 |
88 | .algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column {
89 | display: block;
90 | padding: .1rem 1rem;
91 | margin-bottom: 0.1;
92 | font-size: 1.0em;
93 | font-weight: 400
94 | /* display: none */
95 | }
96 |
97 | .algolia-autocomplete .algolia-docsearch-suggestion--title {
98 | display: block;
99 | padding: .25rem 1rem;
100 | margin-bottom: 0;
101 | font-size: 0.9em;
102 | font-weight: 400
103 | }
104 |
105 | .algolia-autocomplete .algolia-docsearch-suggestion--text {
106 | padding: 0 1rem .5rem;
107 | margin-top: -.25rem;
108 | font-size: 0.8em;
109 | font-weight: 400;
110 | line-height: 1.25
111 | }
112 |
113 | .algolia-autocomplete .algolia-docsearch-footer {
114 | float: none;
115 | width: auto;
116 | height: auto;
117 | padding: .75rem 1rem 0;
118 | font-size: .95rem;
119 | line-height: 1;
120 | color: #767676;
121 | background-color: rgb(255, 255, 255);
122 | border-top: 1px solid rgba(0, 0, 0, .1)
123 | }
124 |
125 | .algolia-autocomplete .algolia-docsearch-footer--logo {
126 | display: inline;
127 | overflow: visible;
128 | color: inherit;
129 | text-indent: 0;
130 | background: 0 0
131 | }
132 |
133 | .algolia-autocomplete .algolia-docsearch-suggestion--highlight {
134 | color: #FF8C00;
135 | background: rgba(232, 189, 54, 0.1)
136 | }
137 |
138 |
139 | .algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight {
140 | box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5)
141 | }
142 |
143 | .algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content {
144 | background-color: rgba(192, 192, 192, .15)
145 | }
146 |
--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | Calculate Pairwise Distances • rdist
9 |
10 |
11 |
12 |
13 |
14 |
15 |
19 |
20 |
21 |
22 |
68 |
69 |
70 |
72 |
An R package to calculate distances. This provide a common framework to calculate distances.
73 | There are three main functions:
74 |
75 |
76 | rdist
computes the pairwise distances between observations in one matrix and returns a dist
object,
77 |
78 | pdist
computes the pairwise distances between observations in one matrix and returns a matrix
, and
79 |
80 | cdist
computes the distances between observations in two matrices and returns a matrix
.
81 |
82 |
All functions have an argument metric
that can be used to specify the distance function. Available metrics are "euclidean"
, "minkowski"
, "manhattan"
, "maximum"
, "canberra"
, "angular"
, "correlation"
, "absolute_correlation"
, "hamming"
, and "jaccard"
. In addition the metric can be any function that takes two vectors as arguments and returns their distance. All predefined functions will return NA or NaN when one of the compared vectors contains NAs.
83 |
84 |
85 | Installation
86 |
To install the latest released version from CRAN:
87 |
88 |
To install the latest development version from github:
89 |
91 |
92 |
97 |
98 |
99 |
100 |
126 |
127 |
128 |
129 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
--------------------------------------------------------------------------------
/docs/jquery.sticky-kit.min.js:
--------------------------------------------------------------------------------
1 | /* Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | */
2 | /*
3 | Source: https://github.com/leafo/sticky-kit
4 | License: MIT
5 | */
6 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k));
7 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
"))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q,
8 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}),
10 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize",
11 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n
2 |
3 |
5 |
8 |
12 |
13 |
--------------------------------------------------------------------------------
/docs/news/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Changelog • rdist
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
93 |
94 |
95 |
96 |
100 |
101 |
102 |
105 |
106 | Added documentation pages
107 | Added product_metric
108 |
109 | Added farthest_point_sampling
110 |
111 | Added is_distance_matrix
and triangle_inequality
checks
112 |
113 |
114 |
115 |
118 |
119 | Fixed bug in jaccard distance
120 | Fixed bug in euclidean distance
121 | Added labels to rdist output
122 |
123 |
124 |
125 |
128 |
An R package to calculate distances. This provide a common framework to calculate distances.
129 | There are three main functions:
130 |
131 |
132 | rdist
computes the pairwise distances between observations in one matrix and returns a dist
object,
133 |
134 | pdist
computes the pairwise distances between observations in one matrix and returns a matrix
, and
135 |
136 | cdist
computes the distances between observations in two matrices and returns a matrix
.
137 |
138 |
All functions have an argument metric
that can be used to specify the distance function. Available metrics are "euclidean"
, "minkowski"
, "manhattan"
, "maximum"
, "canberra"
, "angular"
, "correlation"
, "absolute_correlation"
, "hamming"
, and "jaccard"
. All functions will return NA or NaN when one of the compared vectors contains NAs.
139 |
140 |
141 | Installation
142 |
To install the latest released version from CRAN:
143 |
144 |
To install the latest development version from github:
145 |
147 |
148 |
153 |
154 |
155 |
156 |
166 |
167 |
168 |
169 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
--------------------------------------------------------------------------------
/docs/pkgdown.css:
--------------------------------------------------------------------------------
1 | /* Sticky footer */
2 |
3 | /**
4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/
5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css
6 | *
7 | * .Site -> body > .container
8 | * .Site-content -> body > .container .row
9 | * .footer -> footer
10 | *
11 | * Key idea seems to be to ensure that .container and __all its parents__
12 | * have height set to 100%
13 | *
14 | */
15 |
16 | html, body {
17 | height: 100%;
18 | }
19 |
20 | body > .container {
21 | display: flex;
22 | height: 100%;
23 | flex-direction: column;
24 |
25 | padding-top: 60px;
26 | }
27 |
28 | body > .container .row {
29 | flex: 1 0 auto;
30 | }
31 |
32 | footer {
33 | margin-top: 45px;
34 | padding: 35px 0 36px;
35 | border-top: 1px solid #e5e5e5;
36 | color: #666;
37 | display: flex;
38 | flex-shrink: 0;
39 | }
40 | footer p {
41 | margin-bottom: 0;
42 | }
43 | footer div {
44 | flex: 1;
45 | }
46 | footer .pkgdown {
47 | text-align: right;
48 | }
49 | footer p {
50 | margin-bottom: 0;
51 | }
52 |
53 | img.icon {
54 | float: right;
55 | }
56 |
57 | img {
58 | max-width: 100%;
59 | }
60 |
61 | /* Typographic tweaking ---------------------------------*/
62 |
63 | .contents h1.page-header {
64 | margin-top: calc(-60px + 1em);
65 | }
66 |
67 | /* Section anchors ---------------------------------*/
68 |
69 | a.anchor {
70 | margin-left: -30px;
71 | display:inline-block;
72 | width: 30px;
73 | height: 30px;
74 | visibility: hidden;
75 |
76 | background-image: url(./link.svg);
77 | background-repeat: no-repeat;
78 | background-size: 20px 20px;
79 | background-position: center center;
80 | }
81 |
82 | .hasAnchor:hover a.anchor {
83 | visibility: visible;
84 | }
85 |
86 | @media (max-width: 767px) {
87 | .hasAnchor:hover a.anchor {
88 | visibility: hidden;
89 | }
90 | }
91 |
92 |
93 | /* Fixes for fixed navbar --------------------------*/
94 |
95 | .contents h1, .contents h2, .contents h3, .contents h4 {
96 | padding-top: 60px;
97 | margin-top: -40px;
98 | }
99 |
100 | /* Static header placement on mobile devices */
101 | @media (max-width: 767px) {
102 | .navbar-fixed-top {
103 | position: absolute;
104 | }
105 | .navbar {
106 | padding: 0;
107 | }
108 | }
109 |
110 |
111 | /* Sidebar --------------------------*/
112 |
113 | #sidebar {
114 | margin-top: 30px;
115 | }
116 | #sidebar h2 {
117 | font-size: 1.5em;
118 | margin-top: 1em;
119 | }
120 |
121 | #sidebar h2:first-child {
122 | margin-top: 0;
123 | }
124 |
125 | #sidebar .list-unstyled li {
126 | margin-bottom: 0.5em;
127 | }
128 |
129 | .orcid {
130 | height: 16px;
131 | vertical-align: middle;
132 | }
133 |
134 | /* Reference index & topics ----------------------------------------------- */
135 |
136 | .ref-index th {font-weight: normal;}
137 |
138 | .ref-index td {vertical-align: top;}
139 | .ref-index .alias {width: 40%;}
140 | .ref-index .title {width: 60%;}
141 |
142 | .ref-index .alias {width: 40%;}
143 | .ref-index .title {width: 60%;}
144 |
145 | .ref-arguments th {text-align: right; padding-right: 10px;}
146 | .ref-arguments th, .ref-arguments td {vertical-align: top;}
147 | .ref-arguments .name {width: 20%;}
148 | .ref-arguments .desc {width: 80%;}
149 |
150 | /* Nice scrolling for wide elements --------------------------------------- */
151 |
152 | table {
153 | display: block;
154 | overflow: auto;
155 | }
156 |
157 | /* Syntax highlighting ---------------------------------------------------- */
158 |
159 | pre {
160 | word-wrap: normal;
161 | word-break: normal;
162 | border: 1px solid #eee;
163 | }
164 |
165 | pre, code {
166 | background-color: #f8f8f8;
167 | color: #333;
168 | }
169 |
170 | pre code {
171 | overflow: auto;
172 | word-wrap: normal;
173 | white-space: pre;
174 | }
175 |
176 | pre .img {
177 | margin: 5px 0;
178 | }
179 |
180 | pre .img img {
181 | background-color: #fff;
182 | display: block;
183 | height: auto;
184 | }
185 |
186 | code a, pre a {
187 | color: #375f84;
188 | }
189 |
190 | a.sourceLine:hover {
191 | text-decoration: none;
192 | }
193 |
194 | .fl {color: #1514b5;}
195 | .fu {color: #000000;} /* function */
196 | .ch,.st {color: #036a07;} /* string */
197 | .kw {color: #264D66;} /* keyword */
198 | .co {color: #888888;} /* comment */
199 |
200 | .message { color: black; font-weight: bolder;}
201 | .error { color: orange; font-weight: bolder;}
202 | .warning { color: #6A0366; font-weight: bolder;}
203 |
204 | /* Clipboard --------------------------*/
205 |
206 | .hasCopyButton {
207 | position: relative;
208 | }
209 |
210 | .btn-copy-ex {
211 | position: absolute;
212 | right: 0;
213 | top: 0;
214 | visibility: hidden;
215 | }
216 |
217 | .hasCopyButton:hover button.btn-copy-ex {
218 | visibility: visible;
219 | }
220 |
221 | /* mark.js ----------------------------*/
222 |
223 | mark {
224 | background-color: rgba(255, 255, 51, 0.5);
225 | border-bottom: 2px solid rgba(255, 153, 51, 0.3);
226 | padding: 1px;
227 | }
228 |
--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
1 | $(function() {
2 |
3 | $("#sidebar")
4 | .stick_in_parent({offset_top: 40})
5 | .on('sticky_kit:bottom', function(e) {
6 | $(this).parent().css('position', 'static');
7 | })
8 | .on('sticky_kit:unbottom', function(e) {
9 | $(this).parent().css('position', 'relative');
10 | });
11 |
12 | $('body').scrollspy({
13 | target: '#sidebar',
14 | offset: 60
15 | });
16 |
17 | $('[data-toggle="tooltip"]').tooltip();
18 |
19 | var cur_path = paths(location.pathname);
20 | $("#navbar ul li a").each(function(index, value) {
21 | if (value.text == "Home")
22 | return;
23 | if (value.getAttribute("href") === "#")
24 | return;
25 |
26 | var path = paths(value.pathname);
27 | if (is_prefix(cur_path, path)) {
28 | // Add class to parent , and enclosing if in dropdown
29 | var menu_anchor = $(value);
30 | menu_anchor.parent().addClass("active");
31 | menu_anchor.closest("li.dropdown").addClass("active");
32 | }
33 | });
34 | });
35 |
36 | $(document).ready(function() {
37 | // do keyword highlighting
38 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
39 | var mark = function() {
40 |
41 | var referrer = document.URL ;
42 | var paramKey = "q" ;
43 |
44 | if (referrer.indexOf("?") !== -1) {
45 | var qs = referrer.substr(referrer.indexOf('?') + 1);
46 | var qs_noanchor = qs.split('#')[0];
47 | var qsa = qs_noanchor.split('&');
48 | var keyword = "";
49 |
50 | for (var i = 0; i < qsa.length; i++) {
51 | var currentParam = qsa[i].split('=');
52 |
53 | if (currentParam.length !== 2) {
54 | continue;
55 | }
56 |
57 | if (currentParam[0] == paramKey) {
58 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
59 | }
60 | }
61 |
62 | if (keyword !== "") {
63 | $(".contents").unmark({
64 | done: function() {
65 | $(".contents").mark(keyword);
66 | }
67 | });
68 | }
69 | }
70 | };
71 |
72 | mark();
73 | });
74 |
75 | function paths(pathname) {
76 | var pieces = pathname.split("/");
77 | pieces.shift(); // always starts with /
78 |
79 | var end = pieces[pieces.length - 1];
80 | if (end === "index.html" || end === "")
81 | pieces.pop();
82 | return(pieces);
83 | }
84 |
85 | function is_prefix(needle, haystack) {
86 | if (needle.length > haystack.lengh)
87 | return(false);
88 |
89 | // Special case for length-0 haystack, since for loop won't run
90 | if (haystack.length === 0) {
91 | return(needle.length === 0);
92 | }
93 |
94 | for (var i = 0; i < haystack.length; i++) {
95 | if (needle[i] != haystack[i])
96 | return(false);
97 | }
98 |
99 | return(true);
100 | }
101 |
102 | /* Clipboard --------------------------*/
103 |
104 | function changeTooltipMessage(element, msg) {
105 | var tooltipOriginalTitle=element.getAttribute('data-original-title');
106 | element.setAttribute('data-original-title', msg);
107 | $(element).tooltip('show');
108 | element.setAttribute('data-original-title', tooltipOriginalTitle);
109 | }
110 |
111 | if(Clipboard.isSupported()) {
112 | $(document).ready(function() {
113 | var copyButton = " ";
114 |
115 | $(".examples").addClass("hasCopyButton");
116 |
117 | // Insert copy buttons:
118 | $(copyButton).prependTo(".hasCopyButton");
119 |
120 | // Initialize tooltips:
121 | $('.btn-copy-ex').tooltip({container: 'body'});
122 |
123 | // Initialize clipboard:
124 | var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', {
125 | text: function(trigger) {
126 | return trigger.parentNode.textContent;
127 | }
128 | });
129 |
130 | clipboardBtnCopies.on('success', function(e) {
131 | changeTooltipMessage(e.trigger, 'Copied!');
132 | e.clearSelection();
133 | });
134 |
135 | clipboardBtnCopies.on('error', function() {
136 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
137 | });
138 | });
139 | }
140 |
141 | /* Search term highlighting ------------------------------*/
142 |
143 | function matchedWords(hit) {
144 | var words = [];
145 |
146 | var hierarchy = hit._highlightResult.hierarchy;
147 | // loop to fetch from lvl0, lvl1, etc.
148 | for (var idx in hierarchy) {
149 | words = words.concat(hierarchy[idx].matchedWords);
150 | }
151 |
152 | var content = hit._highlightResult.content;
153 | if (content) {
154 | words = words.concat(content.matchedWords);
155 | }
156 |
157 | // return unique words
158 | var words_uniq = [...new Set(words)];
159 | return words_uniq;
160 | }
161 |
162 | function updateHitURL(hit) {
163 |
164 | var words = matchedWords(hit);
165 | var url = "";
166 |
167 | if (hit.anchor) {
168 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
169 | } else {
170 | url = hit.url + '?q=' + escape(words.join(" "));
171 | }
172 |
173 | return url;
174 | }
175 |
--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: 2.0.6
2 | pkgdown: 1.0.0
3 | pkgdown_sha: ~
4 | articles: []
5 |
6 |
--------------------------------------------------------------------------------
/docs/reference/farthest_point_sampling.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Farthest point sampling — farthest_point_sampling • rdist
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
98 |
99 |
100 |
101 |
106 |
107 |
108 |
109 |
Farthest point sampling returns a reordering of the metric
110 | space P = p_1, ..., p_k, such that each p_i is the farthest
111 | point from the first i-1 points.
112 |
113 |
114 |
115 |
farthest_point_sampling (mat , metric = "precomputed" , k = nrow (mat ),
116 | initial_point_index = 1L , return_clusters = FALSE )
117 |
118 |
Arguments
119 |
120 |
121 |
122 | mat
123 | Original distance matrix
124 |
125 |
126 | metric
127 | Distance metric to use (either "precomputed" or a metric from rdist
)
128 |
129 |
130 | k
131 | Number of points to sample
132 |
133 |
134 | initial_point_index
135 | Index of p_1
136 |
137 |
138 | return_clusters
139 | Should the indices of the closest farthest points be returned?
140 |
141 |
142 |
143 |
144 |
Examples
145 |
#> [1] TRUE
# have a look at the fps distance matrix
153 | rdist (df [fps [1 :5 ], ])
#> 1 2 3 4
154 | #> 2 1.0924360
155 | #> 3 0.9280582 0.8499785
156 | #> 4 0.8029144 0.8977409 1.3558406
157 | #> 5 0.5409166 0.5515509 0.7071422 0.6487099
dist_mat [fps , fps ][1 :5 , 1 :5 ]
#> [,1] [,2] [,3] [,4] [,5]
158 | #> [1,] 0.0000000 1.0924360 0.9280582 0.8029144 0.5409166
159 | #> [2,] 1.0924360 0.0000000 0.8499785 0.8977409 0.5515509
160 | #> [3,] 0.9280582 0.8499785 0.0000000 1.3558406 0.7071422
161 | #> [4,] 0.8029144 0.8977409 1.3558406 0.0000000 0.6487099
162 | #> [5,] 0.5409166 0.5515509 0.7071422 0.6487099 0.0000000
163 |
164 |
173 |
174 |
175 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
--------------------------------------------------------------------------------
/docs/reference/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Function reference • rdist
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
93 |
94 |
150 |
151 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
--------------------------------------------------------------------------------
/docs/reference/is_metric.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Metric and triangle inequality — is_metric • rdist
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
96 |
97 |
98 |
99 |
104 |
105 |
106 |
107 |
Does the distance matric come from a metric
108 |
109 |
110 |
111 |
is_distance_matrix (mat , tolerance = .Machine $double.eps ^0.5 )
112 |
113 | triangle_inequality (mat , tolerance = .Machine $double.eps ^0.5 )
114 |
115 |
Arguments
116 |
117 |
118 |
119 | mat
120 | The matrix to evaluate
121 |
122 |
123 | tolerance
124 | Differences smaller than tolerance are not reported.
125 |
126 |
127 |
128 |
129 |
Examples
130 |
#> [1] TRUE
triangle_inequality (dm )
#> [1] TRUE
133 | dm [1 , 2 ] <- 1.1 * dm [1 , 2 ]
134 | is_distance_matrix (dm )
#> Matrix is not symmetric.
#> [1] FALSE
135 |
136 |
145 |
146 |
147 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
--------------------------------------------------------------------------------
/docs/reference/product_metric.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Product metric — product_metric • rdist
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
97 |
98 |
99 |
100 |
105 |
106 |
107 |
108 |
Returns the p-product metric of two metric spaces.
109 | Works for output of `rdist`, `pdist` or `cdist`.
110 |
111 |
112 |
113 |
product_metric (... , p = 2 )
114 |
115 |
Arguments
116 |
117 |
118 |
119 | ...
120 | Distance matrices or dist objects
121 |
122 |
123 | p
124 | The power of the Minkowski distance
125 |
126 |
127 |
128 |
129 |
Examples
130 |
#> [1] TRUE
140 |
141 |
150 |
151 |
152 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
--------------------------------------------------------------------------------
/docs/reference/rdist.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | rdist: an R package for distances — rdist • rdist
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
101 |
102 |
103 |
104 |
109 |
110 |
111 |
112 |
rdist
provide a common framework to calculate distances. There are three main functions:
113 | rdist
computes the pairwise distances between observations in one matrix and returns a dist
object,
114 | pdist
computes the pairwise distances between observations in one matrix and returns a matrix
, and
115 | cdist
computes the distances between observations in two matrices and returns a matrix
.
116 | In particular the cdist
function is often missing in other distance functions. All
117 | calculations involving NA
values will consistently return NA
.
118 |
119 |
120 |
121 |
rdist (X , metric = "euclidean" , p = 2L )
122 |
123 | pdist (X , metric = "euclidean" , p = 2 )
124 |
125 | cdist (X , Y , metric = "euclidean" , p = 2 )
126 |
127 |
Arguments
128 |
129 |
130 |
131 | X, Y
132 | A matrix
133 |
134 |
135 | metric
136 | The distance metric to use
137 |
138 |
139 | p
140 | The power of the Minkowski distance
141 |
142 |
143 |
144 |
Details
145 |
146 |
Available distance measures are (written for two vectors v and w):
147 | "euclidean"
: \(\sqrt{\sum_i(v_i - w_i)^2}\)
148 | "minkowski"
: \((\sum_i|v_i - w_i|^p)^{1/p}\)
149 | "manhattan"
: \(\sum_i(|v_i-w_i|)\)
150 | "maximum"
or "chebyshev"
: \(\max_i(|v_i-w_i|)\)
151 | "canberra"
: \(\sum_i(\frac{|v_i-w_i|}{|v_i|+|w_i|})\)
152 | "angular"
: \(\cos^{-1}(cor(v, w))\)
153 | "correlation"
: \(\sqrt{\frac{1-cor(v, w)}{2}}\)
154 | "absolute_correlation"
: \(\sqrt{1-|cor(v, w)|^2}\)
155 | "hamming"
: \((\sum_i v_i \neq w_i) / \sum_i 1\)
156 | "jaccard"
: \((\sum_i v_i \neq w_i) / \sum_i 1_{v_i \neq 0 \cup w_i \neq 0}\)
157 | Any function that defines a distance between two vectors.
158 |
159 |
160 |
161 |
162 |
171 |
172 |
173 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
--------------------------------------------------------------------------------
/man/farthest_point_sampling.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/farthest_point_sampling.R
3 | \name{farthest_point_sampling}
4 | \alias{farthest_point_sampling}
5 | \title{Farthest point sampling}
6 | \usage{
7 | farthest_point_sampling(
8 | mat,
9 | metric = "precomputed",
10 | k = nrow(mat),
11 | initial_point_index = 1L,
12 | return_clusters = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{mat}{Original distance matrix}
17 |
18 | \item{metric}{Distance metric to use (either "precomputed" or a metric from \code{\link{rdist}})}
19 |
20 | \item{k}{Number of points to sample}
21 |
22 | \item{initial_point_index}{Index of p_1}
23 |
24 | \item{return_clusters}{Should the indices of the closest farthest points be returned?}
25 | }
26 | \description{
27 | Farthest point sampling returns a reordering of the metric
28 | space P = {p_1, ..., p_k}, such that each p_i is the farthest
29 | point from the first i-1 points.
30 | }
31 | \examples{
32 |
33 | # generate data
34 | df <- matrix(runif(200), ncol = 2)
35 | dist_mat <- pdist(df)
36 | # farthest point sampling
37 | fps <- farthest_point_sampling(dist_mat)
38 | fps2 <- farthest_point_sampling(df, metric = "euclidean")
39 | all.equal(fps, fps2)
40 | # have a look at the fps distance matrix
41 | rdist(df[fps[1:5], ])
42 | dist_mat[fps, fps][1:5, 1:5]
43 | }
44 |
--------------------------------------------------------------------------------
/man/is_metric.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/is_metric.R
3 | \name{is_metric}
4 | \alias{is_metric}
5 | \alias{is_distance_matrix}
6 | \alias{triangle_inequality}
7 | \title{Metric and triangle inequality}
8 | \usage{
9 | is_distance_matrix(mat, tolerance = .Machine$double.eps^0.5)
10 |
11 | triangle_inequality(mat, tolerance = .Machine$double.eps^0.5)
12 | }
13 | \arguments{
14 | \item{mat}{The matrix to evaluate}
15 |
16 | \item{tolerance}{Differences smaller than tolerance are not reported.}
17 | }
18 | \description{
19 | Does the distance matric come from a metric
20 | }
21 | \examples{
22 | data <- matrix(rnorm(20), ncol = 2)
23 | dm <- pdist(data)
24 | is_distance_matrix(dm)
25 | triangle_inequality(dm)
26 |
27 | dm[1, 2] <- 1.1 * dm[1, 2]
28 | is_distance_matrix(dm)
29 | }
30 |
--------------------------------------------------------------------------------
/man/product_metric.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/product_metric.R
3 | \name{product_metric}
4 | \alias{product_metric}
5 | \title{Product metric}
6 | \usage{
7 | product_metric(..., p = 2)
8 | }
9 | \arguments{
10 | \item{...}{Distance matrices or dist objects}
11 |
12 | \item{p}{The power of the Minkowski distance}
13 | }
14 | \description{
15 | Returns the p-product metric of two metric spaces.
16 | Works for output of `rdist`, `pdist` or `cdist`.
17 | }
18 | \examples{
19 | # generate data
20 | df <- matrix(runif(200), ncol = 2)
21 | # distance matrices
22 | dist_mat <- pdist(df)
23 | dist_1 <- pdist(df[, 1])
24 | dist_2 <- pdist(df[, 2])
25 | # product distance matrix
26 | dist_prod <- product_metric(dist_1, dist_2)
27 | # check equality
28 | all.equal(dist_mat, dist_prod)
29 | }
30 |
--------------------------------------------------------------------------------
/man/rdist.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/distance_functions.r, R/rdist-package.r
3 | \docType{package}
4 | \name{rdist}
5 | \alias{rdist}
6 | \alias{pdist}
7 | \alias{cdist}
8 | \alias{rdist-package}
9 | \title{rdist: an R package for distances}
10 | \usage{
11 | rdist(X, metric = "euclidean", p = 2L)
12 |
13 | pdist(X, metric = "euclidean", p = 2)
14 |
15 | cdist(X, Y, metric = "euclidean", p = 2)
16 | }
17 | \arguments{
18 | \item{X, Y}{A matrix}
19 |
20 | \item{metric}{The distance metric to use}
21 |
22 | \item{p}{The power of the Minkowski distance}
23 | }
24 | \description{
25 | \code{rdist} provide a common framework to calculate distances. There are three main functions:
26 | \itemize{
27 | \item \code{rdist} computes the pairwise distances between observations in one matrix and returns a \code{dist} object,
28 | \item \code{pdist} computes the pairwise distances between observations in one matrix and returns a \code{matrix}, and
29 | \item \code{cdist} computes the distances between observations in two matrices and returns a \code{matrix}.
30 | }
31 | In particular the \code{cdist} function is often missing in other distance functions. All
32 | calculations involving \code{NA} values will consistently return \code{NA}.
33 | }
34 | \details{
35 | Available distance measures are (written for two vectors v and w):
36 | \itemize{
37 | \item \code{"euclidean"}: \eqn{\sqrt{\sum_i(v_i - w_i)^2}}{sqrt(sum_i((v_i - w_i)^2))}
38 | \item \code{"minkowski"}: \eqn{(\sum_i|v_i - w_i|^p)^{1/p}}{(sum_i(|v_i - w_i|^p))^{1/p}}
39 | \item \code{"manhattan"}: \eqn{\sum_i(|v_i-w_i|)}{sum_i(|v_i-w_i|)}
40 | \item \code{"maximum"} or \code{"chebyshev"}: \eqn{\max_i(|v_i-w_i|)}{max_i(|v_i-w_i|)}
41 | \item \code{"canberra"}: \eqn{\sum_i(\frac{|v_i-w_i|}{|v_i|+|w_i|})}{sum_i(|v_i-w_i|/(|v_i|+|w_i|))}
42 | \item \code{"angular"}: \eqn{\cos^{-1}(cor(v, w))}{arccos(cor(v, w))}
43 | \item \code{"correlation"}: \eqn{\sqrt{\frac{1-cor(v, w)}{2}}}{sqrt((1-cor(v, w))/2)}
44 | \item \code{"absolute_correlation"}: \eqn{\sqrt{1-|cor(v, w)|^2}}{sqrt((1-|cor(v, w)|^2))}
45 | \item \code{"hamming"}: \eqn{(\sum_i v_i \neq w_i) / \sum_i 1}{sum_i(v_i != w_i)/sum_i(1)}
46 | \item \code{"jaccard"}: \eqn{(\sum_i v_i \neq w_i) / \sum_i 1_{v_i \neq 0 \cup w_i \neq 0}}{sum_i(v_i != w_i)/sum_i(v_i != 0 or w_i != 0)}
47 | \item Any function that defines a distance between two vectors.
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 |
--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)
2 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 |
4 | #include
5 | #include
6 |
7 | using namespace Rcpp;
8 |
9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
10 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get();
11 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
12 | #endif
13 |
14 | // canberra_rdist
15 | NumericVector canberra_rdist(NumericMatrix A);
16 | RcppExport SEXP _rdist_canberra_rdist(SEXP ASEXP) {
17 | BEGIN_RCPP
18 | Rcpp::RObject rcpp_result_gen;
19 | Rcpp::RNGScope rcpp_rngScope_gen;
20 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
21 | rcpp_result_gen = Rcpp::wrap(canberra_rdist(A));
22 | return rcpp_result_gen;
23 | END_RCPP
24 | }
25 | // canberra_pdist
26 | NumericMatrix canberra_pdist(NumericMatrix A);
27 | RcppExport SEXP _rdist_canberra_pdist(SEXP ASEXP) {
28 | BEGIN_RCPP
29 | Rcpp::RObject rcpp_result_gen;
30 | Rcpp::RNGScope rcpp_rngScope_gen;
31 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
32 | rcpp_result_gen = Rcpp::wrap(canberra_pdist(A));
33 | return rcpp_result_gen;
34 | END_RCPP
35 | }
36 | // canberra_cdist
37 | NumericMatrix canberra_cdist(NumericMatrix A, NumericMatrix B);
38 | RcppExport SEXP _rdist_canberra_cdist(SEXP ASEXP, SEXP BSEXP) {
39 | BEGIN_RCPP
40 | Rcpp::RObject rcpp_result_gen;
41 | Rcpp::RNGScope rcpp_rngScope_gen;
42 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
43 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP);
44 | rcpp_result_gen = Rcpp::wrap(canberra_cdist(A, B));
45 | return rcpp_result_gen;
46 | END_RCPP
47 | }
48 | // rdist_cpp
49 | NumericVector rdist_cpp(NumericMatrix A, String metric, double p);
50 | RcppExport SEXP _rdist_rdist_cpp(SEXP ASEXP, SEXP metricSEXP, SEXP pSEXP) {
51 | BEGIN_RCPP
52 | Rcpp::RObject rcpp_result_gen;
53 | Rcpp::RNGScope rcpp_rngScope_gen;
54 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
55 | Rcpp::traits::input_parameter< String >::type metric(metricSEXP);
56 | Rcpp::traits::input_parameter< double >::type p(pSEXP);
57 | rcpp_result_gen = Rcpp::wrap(rdist_cpp(A, metric, p));
58 | return rcpp_result_gen;
59 | END_RCPP
60 | }
61 | // pdist_cpp
62 | NumericMatrix pdist_cpp(NumericMatrix A, String metric, double p);
63 | RcppExport SEXP _rdist_pdist_cpp(SEXP ASEXP, SEXP metricSEXP, SEXP pSEXP) {
64 | BEGIN_RCPP
65 | Rcpp::RObject rcpp_result_gen;
66 | Rcpp::RNGScope rcpp_rngScope_gen;
67 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
68 | Rcpp::traits::input_parameter< String >::type metric(metricSEXP);
69 | Rcpp::traits::input_parameter< double >::type p(pSEXP);
70 | rcpp_result_gen = Rcpp::wrap(pdist_cpp(A, metric, p));
71 | return rcpp_result_gen;
72 | END_RCPP
73 | }
74 | // cdist_cpp
75 | NumericMatrix cdist_cpp(NumericMatrix A, NumericMatrix B, String metric, double p);
76 | RcppExport SEXP _rdist_cdist_cpp(SEXP ASEXP, SEXP BSEXP, SEXP metricSEXP, SEXP pSEXP) {
77 | BEGIN_RCPP
78 | Rcpp::RObject rcpp_result_gen;
79 | Rcpp::RNGScope rcpp_rngScope_gen;
80 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
81 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP);
82 | Rcpp::traits::input_parameter< String >::type metric(metricSEXP);
83 | Rcpp::traits::input_parameter< double >::type p(pSEXP);
84 | rcpp_result_gen = Rcpp::wrap(cdist_cpp(A, B, metric, p));
85 | return rcpp_result_gen;
86 | END_RCPP
87 | }
88 | // euclidean_rdist
89 | NumericVector euclidean_rdist(NumericMatrix A);
90 | RcppExport SEXP _rdist_euclidean_rdist(SEXP ASEXP) {
91 | BEGIN_RCPP
92 | Rcpp::RObject rcpp_result_gen;
93 | Rcpp::RNGScope rcpp_rngScope_gen;
94 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
95 | rcpp_result_gen = Rcpp::wrap(euclidean_rdist(A));
96 | return rcpp_result_gen;
97 | END_RCPP
98 | }
99 | // euclidean_pdist
100 | NumericMatrix euclidean_pdist(NumericMatrix A);
101 | RcppExport SEXP _rdist_euclidean_pdist(SEXP ASEXP) {
102 | BEGIN_RCPP
103 | Rcpp::RObject rcpp_result_gen;
104 | Rcpp::RNGScope rcpp_rngScope_gen;
105 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
106 | rcpp_result_gen = Rcpp::wrap(euclidean_pdist(A));
107 | return rcpp_result_gen;
108 | END_RCPP
109 | }
110 | // euclidean_cdist
111 | NumericMatrix euclidean_cdist(NumericMatrix A, NumericMatrix B);
112 | RcppExport SEXP _rdist_euclidean_cdist(SEXP ASEXP, SEXP BSEXP) {
113 | BEGIN_RCPP
114 | Rcpp::RObject rcpp_result_gen;
115 | Rcpp::RNGScope rcpp_rngScope_gen;
116 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
117 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP);
118 | rcpp_result_gen = Rcpp::wrap(euclidean_cdist(A, B));
119 | return rcpp_result_gen;
120 | END_RCPP
121 | }
122 | // farthest_point_sampling_cpp
123 | NumericMatrix farthest_point_sampling_cpp(NumericMatrix mat, String metric, int k, int initial_point_index, bool return_clusters);
124 | RcppExport SEXP _rdist_farthest_point_sampling_cpp(SEXP matSEXP, SEXP metricSEXP, SEXP kSEXP, SEXP initial_point_indexSEXP, SEXP return_clustersSEXP) {
125 | BEGIN_RCPP
126 | Rcpp::RObject rcpp_result_gen;
127 | Rcpp::RNGScope rcpp_rngScope_gen;
128 | Rcpp::traits::input_parameter< NumericMatrix >::type mat(matSEXP);
129 | Rcpp::traits::input_parameter< String >::type metric(metricSEXP);
130 | Rcpp::traits::input_parameter< int >::type k(kSEXP);
131 | Rcpp::traits::input_parameter< int >::type initial_point_index(initial_point_indexSEXP);
132 | Rcpp::traits::input_parameter< bool >::type return_clusters(return_clustersSEXP);
133 | rcpp_result_gen = Rcpp::wrap(farthest_point_sampling_cpp(mat, metric, k, initial_point_index, return_clusters));
134 | return rcpp_result_gen;
135 | END_RCPP
136 | }
137 | // hamming_rdist
138 | NumericVector hamming_rdist(NumericMatrix A);
139 | RcppExport SEXP _rdist_hamming_rdist(SEXP ASEXP) {
140 | BEGIN_RCPP
141 | Rcpp::RObject rcpp_result_gen;
142 | Rcpp::RNGScope rcpp_rngScope_gen;
143 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
144 | rcpp_result_gen = Rcpp::wrap(hamming_rdist(A));
145 | return rcpp_result_gen;
146 | END_RCPP
147 | }
148 | // hamming_pdist
149 | NumericMatrix hamming_pdist(NumericMatrix A);
150 | RcppExport SEXP _rdist_hamming_pdist(SEXP ASEXP) {
151 | BEGIN_RCPP
152 | Rcpp::RObject rcpp_result_gen;
153 | Rcpp::RNGScope rcpp_rngScope_gen;
154 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
155 | rcpp_result_gen = Rcpp::wrap(hamming_pdist(A));
156 | return rcpp_result_gen;
157 | END_RCPP
158 | }
159 | // hamming_cdist
160 | NumericMatrix hamming_cdist(NumericMatrix A, NumericMatrix B);
161 | RcppExport SEXP _rdist_hamming_cdist(SEXP ASEXP, SEXP BSEXP) {
162 | BEGIN_RCPP
163 | Rcpp::RObject rcpp_result_gen;
164 | Rcpp::RNGScope rcpp_rngScope_gen;
165 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
166 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP);
167 | rcpp_result_gen = Rcpp::wrap(hamming_cdist(A, B));
168 | return rcpp_result_gen;
169 | END_RCPP
170 | }
171 | // jaccard_rdist
172 | NumericVector jaccard_rdist(NumericMatrix A);
173 | RcppExport SEXP _rdist_jaccard_rdist(SEXP ASEXP) {
174 | BEGIN_RCPP
175 | Rcpp::RObject rcpp_result_gen;
176 | Rcpp::RNGScope rcpp_rngScope_gen;
177 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
178 | rcpp_result_gen = Rcpp::wrap(jaccard_rdist(A));
179 | return rcpp_result_gen;
180 | END_RCPP
181 | }
182 | // jaccard_pdist
183 | NumericMatrix jaccard_pdist(NumericMatrix A);
184 | RcppExport SEXP _rdist_jaccard_pdist(SEXP ASEXP) {
185 | BEGIN_RCPP
186 | Rcpp::RObject rcpp_result_gen;
187 | Rcpp::RNGScope rcpp_rngScope_gen;
188 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
189 | rcpp_result_gen = Rcpp::wrap(jaccard_pdist(A));
190 | return rcpp_result_gen;
191 | END_RCPP
192 | }
193 | // jaccard_cdist
194 | NumericMatrix jaccard_cdist(NumericMatrix A, NumericMatrix B);
195 | RcppExport SEXP _rdist_jaccard_cdist(SEXP ASEXP, SEXP BSEXP) {
196 | BEGIN_RCPP
197 | Rcpp::RObject rcpp_result_gen;
198 | Rcpp::RNGScope rcpp_rngScope_gen;
199 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
200 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP);
201 | rcpp_result_gen = Rcpp::wrap(jaccard_cdist(A, B));
202 | return rcpp_result_gen;
203 | END_RCPP
204 | }
205 | // manhattan_rdist
206 | NumericVector manhattan_rdist(NumericMatrix A);
207 | RcppExport SEXP _rdist_manhattan_rdist(SEXP ASEXP) {
208 | BEGIN_RCPP
209 | Rcpp::RObject rcpp_result_gen;
210 | Rcpp::RNGScope rcpp_rngScope_gen;
211 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
212 | rcpp_result_gen = Rcpp::wrap(manhattan_rdist(A));
213 | return rcpp_result_gen;
214 | END_RCPP
215 | }
216 | // manhattan_pdist
217 | NumericMatrix manhattan_pdist(NumericMatrix A);
218 | RcppExport SEXP _rdist_manhattan_pdist(SEXP ASEXP) {
219 | BEGIN_RCPP
220 | Rcpp::RObject rcpp_result_gen;
221 | Rcpp::RNGScope rcpp_rngScope_gen;
222 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
223 | rcpp_result_gen = Rcpp::wrap(manhattan_pdist(A));
224 | return rcpp_result_gen;
225 | END_RCPP
226 | }
227 | // manhattan_cdist
228 | NumericMatrix manhattan_cdist(NumericMatrix A, NumericMatrix B);
229 | RcppExport SEXP _rdist_manhattan_cdist(SEXP ASEXP, SEXP BSEXP) {
230 | BEGIN_RCPP
231 | Rcpp::RObject rcpp_result_gen;
232 | Rcpp::RNGScope rcpp_rngScope_gen;
233 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
234 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP);
235 | rcpp_result_gen = Rcpp::wrap(manhattan_cdist(A, B));
236 | return rcpp_result_gen;
237 | END_RCPP
238 | }
239 | // maximum_rdist
240 | NumericVector maximum_rdist(NumericMatrix A);
241 | RcppExport SEXP _rdist_maximum_rdist(SEXP ASEXP) {
242 | BEGIN_RCPP
243 | Rcpp::RObject rcpp_result_gen;
244 | Rcpp::RNGScope rcpp_rngScope_gen;
245 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
246 | rcpp_result_gen = Rcpp::wrap(maximum_rdist(A));
247 | return rcpp_result_gen;
248 | END_RCPP
249 | }
250 | // maximum_pdist
251 | NumericMatrix maximum_pdist(NumericMatrix A);
252 | RcppExport SEXP _rdist_maximum_pdist(SEXP ASEXP) {
253 | BEGIN_RCPP
254 | Rcpp::RObject rcpp_result_gen;
255 | Rcpp::RNGScope rcpp_rngScope_gen;
256 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
257 | rcpp_result_gen = Rcpp::wrap(maximum_pdist(A));
258 | return rcpp_result_gen;
259 | END_RCPP
260 | }
261 | // maximum_cdist
262 | NumericMatrix maximum_cdist(NumericMatrix A, NumericMatrix B);
263 | RcppExport SEXP _rdist_maximum_cdist(SEXP ASEXP, SEXP BSEXP) {
264 | BEGIN_RCPP
265 | Rcpp::RObject rcpp_result_gen;
266 | Rcpp::RNGScope rcpp_rngScope_gen;
267 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
268 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP);
269 | rcpp_result_gen = Rcpp::wrap(maximum_cdist(A, B));
270 | return rcpp_result_gen;
271 | END_RCPP
272 | }
273 | // minkowski_rdist
274 | NumericVector minkowski_rdist(NumericMatrix A, double p);
275 | RcppExport SEXP _rdist_minkowski_rdist(SEXP ASEXP, SEXP pSEXP) {
276 | BEGIN_RCPP
277 | Rcpp::RObject rcpp_result_gen;
278 | Rcpp::RNGScope rcpp_rngScope_gen;
279 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
280 | Rcpp::traits::input_parameter< double >::type p(pSEXP);
281 | rcpp_result_gen = Rcpp::wrap(minkowski_rdist(A, p));
282 | return rcpp_result_gen;
283 | END_RCPP
284 | }
285 | // minkowski_pdist
286 | NumericMatrix minkowski_pdist(NumericMatrix A, double p);
287 | RcppExport SEXP _rdist_minkowski_pdist(SEXP ASEXP, SEXP pSEXP) {
288 | BEGIN_RCPP
289 | Rcpp::RObject rcpp_result_gen;
290 | Rcpp::RNGScope rcpp_rngScope_gen;
291 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
292 | Rcpp::traits::input_parameter< double >::type p(pSEXP);
293 | rcpp_result_gen = Rcpp::wrap(minkowski_pdist(A, p));
294 | return rcpp_result_gen;
295 | END_RCPP
296 | }
297 | // minkowski_cdist
298 | NumericMatrix minkowski_cdist(NumericMatrix A, NumericMatrix B, double p);
299 | RcppExport SEXP _rdist_minkowski_cdist(SEXP ASEXP, SEXP BSEXP, SEXP pSEXP) {
300 | BEGIN_RCPP
301 | Rcpp::RObject rcpp_result_gen;
302 | Rcpp::RNGScope rcpp_rngScope_gen;
303 | Rcpp::traits::input_parameter< NumericMatrix >::type A(ASEXP);
304 | Rcpp::traits::input_parameter< NumericMatrix >::type B(BSEXP);
305 | Rcpp::traits::input_parameter< double >::type p(pSEXP);
306 | rcpp_result_gen = Rcpp::wrap(minkowski_cdist(A, B, p));
307 | return rcpp_result_gen;
308 | END_RCPP
309 | }
310 | // cpp_triangle_inequality
311 | bool cpp_triangle_inequality(Rcpp::NumericMatrix mat, double tolerance);
312 | RcppExport SEXP _rdist_cpp_triangle_inequality(SEXP matSEXP, SEXP toleranceSEXP) {
313 | BEGIN_RCPP
314 | Rcpp::RObject rcpp_result_gen;
315 | Rcpp::RNGScope rcpp_rngScope_gen;
316 | Rcpp::traits::input_parameter< Rcpp::NumericMatrix >::type mat(matSEXP);
317 | Rcpp::traits::input_parameter< double >::type tolerance(toleranceSEXP);
318 | rcpp_result_gen = Rcpp::wrap(cpp_triangle_inequality(mat, tolerance));
319 | return rcpp_result_gen;
320 | END_RCPP
321 | }
322 |
--------------------------------------------------------------------------------
/src/canberra.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | // [[Rcpp::depends(RcppArmadillo)]]
3 |
4 | using namespace Rcpp;
5 |
6 | // [[Rcpp::export]]
7 | NumericVector canberra_rdist(NumericMatrix A) {
8 | int n = A.nrow(), k = A.ncol();
9 |
10 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
11 |
12 | NumericVector C(n * (n-1) / 2);
13 |
14 | int l = 0;
15 | for (int i = 0; i < n; ++i){
16 | arma::mat Arow = Ar.row(i);
17 | for (int j = i + 1; j < n; ++j){
18 | C(l) = sum(abs(Arow - Ar.row(j))/(abs(Arow) + abs(Ar.row(j))));
19 | l++;
20 | }
21 | }
22 |
23 | return wrap(C);
24 | }
25 |
26 | // [[Rcpp::export]]
27 | NumericMatrix canberra_pdist(NumericMatrix A) {
28 | int n = A.nrow(), k = A.ncol();
29 |
30 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
31 |
32 | arma::mat C(n, n);
33 |
34 | for (int i = 0; i < n; ++i){
35 | arma::mat Arow = Ar.row(i);
36 | for (int j = 0; j < n; ++j){
37 | C(i, j) = sum(abs(Arow - Ar.row(j))/(abs(Arow) + abs(Ar.row(j))));
38 | }
39 | }
40 |
41 | return wrap(C);
42 | }
43 |
44 | // [[Rcpp::export]]
45 | NumericMatrix canberra_cdist(NumericMatrix A, NumericMatrix B) {
46 | int m = A.nrow(), n = B.nrow(), k = A.ncol();
47 |
48 | arma::mat Ar = arma::mat(A.begin(), m, k, false);
49 | arma::mat Br = arma::mat(B.begin(), n, k, false);
50 |
51 | arma::mat C(m, n);
52 |
53 | for (int i = 0; i < m; ++i){
54 | arma::mat Arow = Ar.row(i);
55 | for (int j = 0; j < n; ++j){
56 | C(i, j) = sum(abs(Arow - Br.row(j))/(abs(Arow) + abs(Br.row(j))));
57 | }
58 | }
59 |
60 | return wrap(C);
61 | }
--------------------------------------------------------------------------------
/src/canberra.h:
--------------------------------------------------------------------------------
1 | #ifndef CANBERRA_
2 | #define CANBERRA_
3 |
4 | #include
5 |
6 | Rcpp::NumericVector canberra_rdist(Rcpp::NumericMatrix A);
7 | Rcpp::NumericMatrix canberra_pdist(Rcpp::NumericMatrix A);
8 | Rcpp::NumericMatrix canberra_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B);
9 |
10 | #endif // CANBERRA_
--------------------------------------------------------------------------------
/src/dist.cpp:
--------------------------------------------------------------------------------
1 | #include "canberra.h"
2 | #include "minkowski.h"
3 | #include "manhattan.h"
4 | #include "maximum.h"
5 | #include "hamming.h"
6 | #include "jaccard.h"
7 | #include
8 | // [[Rcpp::depends(RcppArmadillo)]]
9 |
10 | using namespace Rcpp;
11 |
12 | // [[Rcpp::export]]
13 | NumericVector rdist_cpp(NumericMatrix A, String metric, double p=2.0) {
14 | NumericMatrix res;
15 | if (metric == "euclidean"){
16 | return minkowski_rdist(A, 2.0);
17 | }
18 | if (metric == "minkowski"){
19 | return minkowski_rdist(A, p);
20 | }
21 | if (metric == "manhattan"){
22 | return manhattan_rdist(A);
23 | }
24 | if (metric == "chebyshev"){
25 | return maximum_rdist(A);
26 | }
27 | if (metric == "maximum"){
28 | return maximum_rdist(A);
29 | }
30 | if (metric == "canberra"){
31 | return canberra_rdist(A);
32 | }
33 | if (metric == "angular"){
34 | Rcpp::Environment package_env("package:rdist");
35 | Rcpp::Function angular_rdist = package_env["angular_rdist"];
36 | return angular_rdist(A);
37 | }
38 | if (metric == "correlation"){
39 | Rcpp::Environment package_env("package:rdist");
40 | Rcpp::Function correlation_rdist = package_env["correlation_rdist"];
41 | return correlation_rdist(A);
42 | }
43 | if (metric == "absolute_correlation"){
44 | Rcpp::Environment package_env("package:rdist");
45 | Rcpp::Function absolute_correlation_rdist = package_env["absolute_correlation_rdist"];
46 | return absolute_correlation_rdist(A);
47 | }
48 | if (metric == "hamming"){
49 | return hamming_rdist(A);
50 | }
51 | if (metric == "jaccard"){
52 | return jaccard_rdist(A);
53 | }
54 | Rcpp::stop("metric not recognized.");
55 | return NumericMatrix(0);
56 | }
57 |
58 | // [[Rcpp::export]]
59 | NumericMatrix pdist_cpp(NumericMatrix A, String metric, double p=2.0) {
60 | NumericMatrix res;
61 | if (metric == "euclidean"){
62 | return minkowski_pdist(A, 2.0);
63 | }
64 | if (metric == "minkowski"){
65 | return minkowski_pdist(A, p);
66 | }
67 | if (metric == "manhattan"){
68 | return manhattan_pdist(A);
69 | }
70 | if (metric == "chebyshev"){
71 | return maximum_pdist(A);
72 | }
73 | if (metric == "maximum"){
74 | return maximum_pdist(A);
75 | }
76 | if (metric == "canberra"){
77 | return canberra_pdist(A);
78 | }
79 | if (metric == "angular"){
80 | Rcpp::Environment package_env("package:rdist");
81 | Rcpp::Function angular_pdist = package_env["angular_pdist"];
82 | return angular_pdist(A);
83 | }
84 | if (metric == "correlation"){
85 | Rcpp::Environment package_env("package:rdist");
86 | Rcpp::Function correlation_pdist = package_env["correlation_pdist"];
87 | return correlation_pdist(A);
88 | }
89 | if (metric == "absolute_correlation"){
90 | Rcpp::Environment package_env("package:rdist");
91 | Rcpp::Function absolute_correlation_pdist = package_env["absolute_correlation_pdist"];
92 | return absolute_correlation_pdist(A);
93 | }
94 | if (metric == "hamming"){
95 | return hamming_pdist(A);
96 | }
97 | if (metric == "jaccard"){
98 | return jaccard_pdist(A);
99 | }
100 | Rcpp::stop("metric not recognized.");
101 | return NumericMatrix(0);
102 | }
103 |
104 | // [[Rcpp::export]]
105 | NumericMatrix cdist_cpp(NumericMatrix A, NumericMatrix B, String metric, double p=2.0) {
106 | NumericMatrix res;
107 | if (metric == "euclidean"){
108 | return minkowski_cdist(A, B, 2.0);
109 | }
110 | if (metric == "minkowski"){
111 | return minkowski_cdist(A, B, p);
112 | }
113 | if (metric == "manhattan"){
114 | return manhattan_cdist(A, B);
115 | }
116 | if (metric == "chebyshev"){
117 | return maximum_cdist(A, B);
118 | }
119 | if (metric == "maximum"){
120 | return maximum_cdist(A, B);
121 | }
122 | if (metric == "canberra"){
123 | return canberra_cdist(A, B);
124 | }
125 | if (metric == "angular"){
126 | Rcpp::Environment package_env("package:rdist");
127 | Rcpp::Function angular_cdist = package_env["angular_cdist"];
128 | return angular_cdist(A, B);
129 | }
130 | if (metric == "correlation"){
131 | Rcpp::Environment package_env("package:rdist");
132 | Rcpp::Function correlation_cdist = package_env["correlation_cdist"];
133 | return correlation_cdist(A, B);
134 | }
135 | if (metric == "absolute_correlation"){
136 | Rcpp::Environment package_env("package:rdist");
137 | Rcpp::Function absolute_correlation_cdist = package_env["absolute_correlation_cdist"];
138 | return absolute_correlation_cdist(A, B);
139 | }
140 | if (metric == "hamming"){
141 | return hamming_cdist(A, B);
142 | }
143 | if (metric == "jaccard"){
144 | return jaccard_cdist(A, B);
145 | }
146 | Rcpp::stop("metric not recognized.");
147 | return NumericMatrix(0);
148 | }
149 |
150 |
--------------------------------------------------------------------------------
/src/dist.h:
--------------------------------------------------------------------------------
1 | #ifndef DIST_
2 | #define DIST_
3 |
4 | #include
5 |
6 | Rcpp::NumericVector rdist_cpp(Rcpp::NumericMatrix A, Rcpp::String metric, double p);
7 | Rcpp::NumericMatrix pdist_cpp(Rcpp::NumericMatrix A, Rcpp::String metric, double p);
8 | Rcpp::NumericMatrix cdist_cpp(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B, Rcpp::String metric, double p);
9 |
10 | #endif // DIST_
11 |
--------------------------------------------------------------------------------
/src/euclidean.cpp:
--------------------------------------------------------------------------------
1 | // adjusted from https://www.r-bloggers.com/pairwise-distances-in-r/
2 | #include
3 | // [[Rcpp::depends(RcppArmadillo)]]
4 |
5 | using namespace Rcpp;
6 |
7 | // [[Rcpp::export]]
8 | NumericVector euclidean_rdist(NumericMatrix A) {
9 | int n = A.nrow(),
10 | k = A.ncol();
11 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
12 |
13 | arma::colvec An = sum(square(Ar),1);
14 |
15 | arma::mat C = -2 * (Ar * Ar.t());
16 | C.each_col() += An;
17 | C.each_row() += An.t();
18 |
19 | arma::mat D(1, n * (n-1) / 2);
20 | int l = 0;
21 | for (int i = 0; i < n; ++i){
22 | for (int j = i + 1; j < n; ++j){
23 | D(l) = C(i, j);
24 | l++;
25 | }
26 | }
27 |
28 | return wrap(sqrt(D));
29 | }
30 |
31 | // [[Rcpp::export]]
32 | NumericMatrix euclidean_pdist(NumericMatrix A) {
33 | int n = A.nrow(),
34 | k = A.ncol();
35 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
36 |
37 | arma::colvec An = sum(square(Ar), 1);
38 |
39 | arma::mat C = -2 * (Ar * Ar.t());
40 | C.each_col() += An;
41 | C.each_row() += An.t();
42 |
43 | return wrap(sqrt(C));
44 | }
45 |
46 | // [[Rcpp::export]]
47 | NumericMatrix euclidean_cdist(NumericMatrix A, NumericMatrix B) {
48 | int m = A.nrow(),
49 | n = B.nrow(),
50 | k = A.ncol();
51 | arma::mat Ar = arma::mat(A.begin(), m, k, false);
52 | arma::mat Br = arma::mat(B.begin(), n, k, false);
53 |
54 | arma::colvec An = sum(square(Ar), 1);
55 | arma::colvec Bn = sum(square(Br), 1);
56 |
57 | arma::mat C = -2 * (Ar * Br.t());
58 | C.each_col() += An;
59 | C.each_row() += Bn.t();
60 |
61 | return wrap(sqrt(C));
62 | }
--------------------------------------------------------------------------------
/src/farthest_point_sampling.cpp:
--------------------------------------------------------------------------------
1 | #include "dist.h"
2 | #include
3 | // [[Rcpp::depends(RcppArmadillo)]]
4 |
5 | using namespace Rcpp;
6 |
7 |
8 | arma::uvec remove_element(arma::uvec& x, int y){
9 | arma::uvec q1 = arma::find(x == y);
10 | if (!q1.empty()){
11 | x.shed_row(q1(0));
12 | }
13 | return x;
14 | }
15 |
16 | // [[Rcpp::export]]
17 | NumericMatrix farthest_point_sampling_cpp(NumericMatrix mat,
18 | String metric,
19 | int k,
20 | int initial_point_index = 0,
21 | bool return_clusters = false) {
22 | /*
23 | Farthest Point Sampling
24 |
25 | Parameters
26 | ==========
27 | NumericMatrix mat
28 | Original data or distance matrix
29 | String metric
30 | Metric used
31 | int k
32 | Number of points to sample
33 | int initial_point_index (default: 0)
34 | Index of point to start farthest point sampling with
35 |
36 | Return value
37 | ============
38 | The indices of the first k furthest points.
39 | */
40 | // initialize
41 | int n = mat.nrow();
42 | int m = mat.ncol();
43 | arma::mat amat = arma::mat(mat.begin(), n, m, false);
44 | arma::uvec reordering(k, arma::fill::ones);
45 | reordering *= initial_point_index - 1;
46 | arma::uvec preordering = arma::linspace(0, n-1, n);
47 | arma::urowvec cluster_mat(n);
48 | cluster_mat.fill(0);
49 |
50 | // find ordered indices
51 | if (metric == "precomputed"){
52 | arma::mat local_dist;
53 | for (int i = 1; i < k; ++i){
54 | preordering = remove_element(preordering, reordering(i-1));
55 | arma::uvec indices = reordering.rows(0, i-1);
56 | local_dist = amat.submat(indices, preordering);
57 | arma::mat local_min_dist = arma::min(local_dist, 0);
58 | reordering(i) = preordering(local_min_dist.index_max());
59 | }
60 | if (return_clusters){
61 | arma::mat complete_local_dist = amat.rows(reordering);
62 | cluster_mat = arma::index_min(complete_local_dist, 0);
63 | }
64 | }
65 | else {
66 | arma::mat local_min_dist;
67 |
68 | for (int i = 1; i <= k; ++i){
69 | preordering = remove_element(preordering, reordering(i-1));
70 | arma::uvec indices = reordering.rows(0, i-1);
71 |
72 | arma::mat m_indices = amat.row(reordering(i-1));
73 | NumericMatrix mat_indices = wrap(m_indices);
74 |
75 | NumericMatrix n_dist = cdist_cpp(mat_indices, mat, metric, 2);
76 |
77 | arma::mat new_dist = arma::mat(n_dist.begin(), n_dist.ncol(), 1, false);
78 | if (i == 1){
79 | local_min_dist = new_dist;
80 | }
81 | arma::mat local_dist = arma::join_rows(local_min_dist, new_dist);
82 | local_min_dist = arma::min(local_dist, 1);
83 | if (return_clusters){
84 | arma::urowvec pre_cluster_mat = arma::find(arma::index_min(local_dist, 1)).t();
85 | arma::urowvec new_clusters(pre_cluster_mat.n_elem);
86 | new_clusters.fill(i-1);
87 | cluster_mat.elem(pre_cluster_mat) = new_clusters;
88 | }
89 | arma::mat local_local_min_dist = local_min_dist.elem(preordering);
90 | if (i < k){
91 | reordering(i) = preordering(local_local_min_dist.index_max());
92 | }
93 | }
94 | }
95 |
96 | // return reordering
97 | NumericMatrix output = wrap(reordering + 1);
98 | if (return_clusters){
99 | NumericVector clusters = wrap(cluster_mat + 1);
100 | output.attr("clusters") = clusters;
101 | }
102 | return output;
103 | }
104 |
--------------------------------------------------------------------------------
/src/hamming.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | // [[Rcpp::depends(RcppArmadillo)]]
3 |
4 | using namespace Rcpp;
5 |
6 | // [[Rcpp::export]]
7 | NumericVector hamming_rdist(NumericMatrix A) {
8 | int n = A.nrow(), k = A.ncol();
9 |
10 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
11 |
12 | NumericVector C(n * (n-1) / 2);
13 |
14 | int l = 0;
15 | for (int i = 0; i < n; ++i){
16 | arma::mat Arow = Ar.row(i);
17 | for (int j = i + 1; j < n; ++j){
18 | C(l) = sum(Arow != Ar.row(j));
19 | l++;
20 | }
21 | }
22 |
23 | return C / k;
24 | }
25 |
26 | // [[Rcpp::export]]
27 | NumericMatrix hamming_pdist(NumericMatrix A) {
28 | int n = A.nrow(), k = A.ncol();
29 |
30 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
31 |
32 | arma::mat C(n, n);
33 |
34 | for (int i = 0; i < n; ++i){
35 | arma::mat Arow = Ar.row(i);
36 | for (int j = 0; j < n; ++j){
37 | C(i, j) = sum(Arow != Ar.row(j));
38 | }
39 | }
40 |
41 | return wrap(C / k);
42 | }
43 |
44 | // [[Rcpp::export]]
45 | NumericMatrix hamming_cdist(NumericMatrix A, NumericMatrix B) {
46 | int m = A.nrow(), n = B.nrow(), k = A.ncol();
47 |
48 | arma::mat Ar = arma::mat(A.begin(), m, k, false);
49 | arma::mat Br = arma::mat(B.begin(), n, k, false);
50 |
51 | arma::mat C(m, n);
52 |
53 | for (int i = 0; i < m; ++i){
54 | arma::mat Arow = Ar.row(i);
55 | for (int j = 0; j < n; ++j){
56 | C(i, j) = sum(Arow != Br.row(j));
57 | }
58 | }
59 |
60 | return wrap(C / k);
61 | }
--------------------------------------------------------------------------------
/src/hamming.h:
--------------------------------------------------------------------------------
1 | #ifndef HAMMING_
2 | #define HAMMING_
3 |
4 | #include
5 |
6 | Rcpp::NumericVector hamming_rdist(Rcpp::NumericMatrix A);
7 | Rcpp::NumericMatrix hamming_pdist(Rcpp::NumericMatrix A);
8 | Rcpp::NumericMatrix hamming_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B);
9 |
10 | #endif // HAMMING_
--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include // for NULL
4 | #include
5 |
6 | /* .Call calls */
7 | extern SEXP _rdist_canberra_cdist(SEXP, SEXP);
8 | extern SEXP _rdist_canberra_pdist(SEXP);
9 | extern SEXP _rdist_canberra_rdist(SEXP);
10 | extern SEXP _rdist_euclidean_cdist(SEXP, SEXP);
11 | extern SEXP _rdist_euclidean_pdist(SEXP);
12 | extern SEXP _rdist_euclidean_rdist(SEXP);
13 | extern SEXP _rdist_farthest_point_sampling_cpp(SEXP, SEXP, SEXP, SEXP, SEXP);
14 | extern SEXP _rdist_hamming_cdist(SEXP, SEXP);
15 | extern SEXP _rdist_hamming_pdist(SEXP);
16 | extern SEXP _rdist_hamming_rdist(SEXP);
17 | extern SEXP _rdist_jaccard_cdist(SEXP, SEXP);
18 | extern SEXP _rdist_jaccard_pdist(SEXP);
19 | extern SEXP _rdist_jaccard_rdist(SEXP);
20 | extern SEXP _rdist_manhattan_cdist(SEXP, SEXP);
21 | extern SEXP _rdist_manhattan_pdist(SEXP);
22 | extern SEXP _rdist_manhattan_rdist(SEXP);
23 | extern SEXP _rdist_maximum_cdist(SEXP, SEXP);
24 | extern SEXP _rdist_maximum_pdist(SEXP);
25 | extern SEXP _rdist_maximum_rdist(SEXP);
26 | extern SEXP _rdist_minkowski_cdist(SEXP, SEXP, SEXP);
27 | extern SEXP _rdist_minkowski_pdist(SEXP, SEXP);
28 | extern SEXP _rdist_minkowski_rdist(SEXP, SEXP);
29 | extern SEXP _rdist_cpp_triangle_inequality(SEXP, SEXP);
30 | extern SEXP _rdist_rdist_cpp(SEXP, SEXP, SEXP);
31 | extern SEXP _rdist_pdist_cpp(SEXP, SEXP, SEXP);
32 | extern SEXP _rdist_cdist_cpp(SEXP, SEXP, SEXP, SEXP);
33 |
34 | static const R_CallMethodDef CallEntries[] = {
35 | {"_rdist_canberra_cdist", (DL_FUNC) &_rdist_canberra_cdist, 2},
36 | {"_rdist_canberra_pdist", (DL_FUNC) &_rdist_canberra_pdist, 1},
37 | {"_rdist_canberra_rdist", (DL_FUNC) &_rdist_canberra_rdist, 1},
38 | {"_rdist_euclidean_cdist", (DL_FUNC) &_rdist_euclidean_cdist, 2},
39 | {"_rdist_euclidean_pdist", (DL_FUNC) &_rdist_euclidean_pdist, 1},
40 | {"_rdist_euclidean_rdist", (DL_FUNC) &_rdist_euclidean_rdist, 1},
41 | {"_rdist_farthest_point_sampling_cpp", (DL_FUNC) &_rdist_farthest_point_sampling_cpp, 5},
42 | {"_rdist_hamming_cdist", (DL_FUNC) &_rdist_hamming_cdist, 2},
43 | {"_rdist_hamming_pdist", (DL_FUNC) &_rdist_hamming_pdist, 1},
44 | {"_rdist_hamming_rdist", (DL_FUNC) &_rdist_hamming_rdist, 1},
45 | {"_rdist_jaccard_cdist", (DL_FUNC) &_rdist_jaccard_cdist, 2},
46 | {"_rdist_jaccard_pdist", (DL_FUNC) &_rdist_jaccard_pdist, 1},
47 | {"_rdist_jaccard_rdist", (DL_FUNC) &_rdist_jaccard_rdist, 1},
48 | {"_rdist_manhattan_cdist", (DL_FUNC) &_rdist_manhattan_cdist, 2},
49 | {"_rdist_manhattan_pdist", (DL_FUNC) &_rdist_manhattan_pdist, 1},
50 | {"_rdist_manhattan_rdist", (DL_FUNC) &_rdist_manhattan_rdist, 1},
51 | {"_rdist_maximum_cdist", (DL_FUNC) &_rdist_maximum_cdist, 2},
52 | {"_rdist_maximum_pdist", (DL_FUNC) &_rdist_maximum_pdist, 1},
53 | {"_rdist_maximum_rdist", (DL_FUNC) &_rdist_maximum_rdist, 1},
54 | {"_rdist_minkowski_cdist", (DL_FUNC) &_rdist_minkowski_cdist, 3},
55 | {"_rdist_minkowski_pdist", (DL_FUNC) &_rdist_minkowski_pdist, 2},
56 | {"_rdist_minkowski_rdist", (DL_FUNC) &_rdist_minkowski_rdist, 2},
57 | {"_rdist_cpp_triangle_inequality", (DL_FUNC) &_rdist_cpp_triangle_inequality, 2},
58 | {"_rdist_rdist_cpp", (DL_FUNC) &_rdist_rdist_cpp, 3},
59 | {"_rdist_pdist_cpp", (DL_FUNC) &_rdist_pdist_cpp, 3},
60 | {"_rdist_cdist_cpp", (DL_FUNC) &_rdist_cdist_cpp, 4},
61 | {NULL, NULL, 0}
62 | };
63 |
64 | void R_init_rdist(DllInfo *dll)
65 | {
66 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
67 | R_useDynamicSymbols(dll, FALSE);
68 | }
69 |
--------------------------------------------------------------------------------
/src/jaccard.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | // [[Rcpp::depends(RcppArmadillo)]]
3 |
4 | using namespace Rcpp;
5 |
6 | // [[Rcpp::export]]
7 | NumericVector jaccard_rdist(NumericMatrix A) {
8 | int n = A.nrow(), k = A.ncol();
9 |
10 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
11 |
12 | NumericVector C(n * (n-1) / 2);
13 |
14 | int l = 0;
15 | for (int i = 0; i < n; ++i){
16 | arma::rowvec Arow = Ar.row(i);
17 | for (int j = i + 1; j < n; ++j){
18 | arma::urowvec zerovec = (Arow == 0);
19 | zerovec.elem( find(Ar.row(j) != 0) ).zeros();
20 | if (all(Arow == Ar.row(j))) {
21 | C(l) = 0.0;
22 | }
23 | else{
24 | C(l) = (double)sum(Arow != Ar.row(j)) / (double)sum(zerovec != 1);
25 | }
26 |
27 | l++;
28 | }
29 | }
30 |
31 | return wrap(C);
32 | }
33 |
34 | // [[Rcpp::export]]
35 | NumericMatrix jaccard_pdist(NumericMatrix A) {
36 | int n = A.nrow(), k = A.ncol();
37 |
38 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
39 |
40 | arma::mat C(n, n);
41 |
42 | for (int i = 0; i < n; ++i){
43 | arma::mat Arow = Ar.row(i);
44 | for (int j = 0; j < n; ++j){
45 | arma::urowvec zerovec = (Arow == 0);
46 | zerovec.elem( find(Ar.row(j) != 0) ).zeros();
47 | if (all(Arow == Ar.row(j))) {
48 | C(i, j) = 0.0;
49 | }
50 | else{
51 | C(i, j) = (double)sum(Arow != Ar.row(j)) / (double)sum(zerovec != 1);
52 | }
53 | }
54 | }
55 |
56 | return wrap(C);
57 | }
58 |
59 | // [[Rcpp::export]]
60 | NumericMatrix jaccard_cdist(NumericMatrix A, NumericMatrix B) {
61 | int m = A.nrow(), n = B.nrow(), k = A.ncol();
62 |
63 | arma::mat Ar = arma::mat(A.begin(), m, k, false);
64 | arma::mat Br = arma::mat(B.begin(), n, k, false);
65 |
66 | arma::mat C(m, n);
67 |
68 | for (int i = 0; i < m; ++i){
69 | arma::mat Arow = Ar.row(i);
70 | for (int j = 0; j < n; ++j){
71 | arma::urowvec zerovec = (Arow == 0);
72 | zerovec.elem( find(Br.row(j) != 0) ).zeros();
73 | if (all(Arow == Br.row(j))) {
74 | C(i, j) = 0.0;
75 | }
76 | else{
77 | C(i, j) = (double)sum(Arow != Br.row(j)) / (double)sum(zerovec != 1);
78 | }
79 | }
80 | }
81 |
82 | return wrap(C);
83 | }
--------------------------------------------------------------------------------
/src/jaccard.h:
--------------------------------------------------------------------------------
1 | #ifndef JACCARD_
2 | #define JACCARD_
3 |
4 | #include
5 |
6 | Rcpp::NumericVector jaccard_rdist(Rcpp::NumericMatrix A);
7 | Rcpp::NumericMatrix jaccard_pdist(Rcpp::NumericMatrix A);
8 | Rcpp::NumericMatrix jaccard_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B);
9 |
10 | #endif // JACCARD_
--------------------------------------------------------------------------------
/src/manhattan.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | // [[Rcpp::depends(RcppArmadillo)]]
3 |
4 | using namespace Rcpp;
5 |
6 | // [[Rcpp::export]]
7 | NumericVector manhattan_rdist(NumericMatrix A) {
8 | int n = A.nrow(), k = A.ncol();
9 |
10 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
11 |
12 | NumericVector C(n * (n-1) / 2);
13 |
14 | int l = 0;
15 | for (int i = 0; i < n; ++i){
16 | arma::mat Arow = Ar.row(i);
17 | for (int j = i + 1; j < n; ++j){
18 | C(l) = sum(abs(Arow - Ar.row(j)));
19 | l++;
20 | }
21 | }
22 |
23 | return C;
24 | }
25 |
26 | // [[Rcpp::export]]
27 | NumericMatrix manhattan_pdist(NumericMatrix A) {
28 | int n = A.nrow(), k = A.ncol();
29 |
30 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
31 |
32 | arma::mat C(n, n);
33 |
34 | for (int i = 0; i < n; ++i){
35 | arma::mat Arow = Ar.row(i);
36 | for (int j = 0; j < n; ++j){
37 | C(i, j) = sum(abs(Arow - Ar.row(j)));
38 | }
39 | }
40 |
41 | return wrap(C);
42 | }
43 |
44 | // [[Rcpp::export]]
45 | NumericMatrix manhattan_cdist(NumericMatrix A, NumericMatrix B) {
46 | int m = A.nrow(), n = B.nrow(), k = A.ncol();
47 |
48 | arma::mat Ar = arma::mat(A.begin(), m, k, false);
49 | arma::mat Br = arma::mat(B.begin(), n, k, false);
50 |
51 | arma::mat C(m, n);
52 |
53 | for (int i = 0; i < m; ++i){
54 | arma::mat Arow = Ar.row(i);
55 | for (int j = 0; j < n; ++j){
56 | C(i, j) = sum(abs(Arow - Br.row(j)));
57 | }
58 | }
59 |
60 | return wrap(C);
61 | }
--------------------------------------------------------------------------------
/src/manhattan.h:
--------------------------------------------------------------------------------
1 | #ifndef MANHATTAN_
2 | #define MANHATTAN_
3 |
4 | #include
5 |
6 | Rcpp::NumericVector manhattan_rdist(Rcpp::NumericMatrix A);
7 | Rcpp::NumericMatrix manhattan_pdist(Rcpp::NumericMatrix A);
8 | Rcpp::NumericMatrix manhattan_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B);
9 |
10 | #endif // MANHATTAN_
--------------------------------------------------------------------------------
/src/maximum.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | // [[Rcpp::depends(RcppArmadillo)]]
3 |
4 | using namespace Rcpp;
5 |
6 | // [[Rcpp::export]]
7 | NumericVector maximum_rdist(NumericMatrix A) {
8 | int n = A.nrow(), k = A.ncol();
9 |
10 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
11 |
12 | NumericVector C(n * (n-1) / 2);
13 |
14 | int l = 0;
15 | for (int i = 0; i < n; ++i){
16 | arma::mat Arow = Ar.row(i);
17 | for (int j = i + 1; j < n; ++j){
18 | C(l) = max(abs(Arow - Ar.row(j)));
19 | l++;
20 | }
21 | }
22 |
23 | return C;
24 | }
25 |
26 | // [[Rcpp::export]]
27 | NumericMatrix maximum_pdist(NumericMatrix A) {
28 | int n = A.nrow(), k = A.ncol();
29 |
30 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
31 |
32 | arma::mat C(n, n);
33 |
34 | for (int i = 0; i < n; ++i){
35 | arma::mat Arow = Ar.row(i);
36 | for (int j = 0; j < n; ++j){
37 | C(i, j) = max(abs(Arow - Ar.row(j)));
38 | }
39 | }
40 |
41 | return wrap(C);
42 | }
43 |
44 | // [[Rcpp::export]]
45 | NumericMatrix maximum_cdist(NumericMatrix A, NumericMatrix B) {
46 | int m = A.nrow(), n = B.nrow(), k = A.ncol();
47 |
48 | arma::mat Ar = arma::mat(A.begin(), m, k, false);
49 | arma::mat Br = arma::mat(B.begin(), n, k, false);
50 |
51 | arma::mat C(m, n);
52 |
53 | for (int i = 0; i < m; ++i){
54 | arma::mat Arow = Ar.row(i);
55 | for (int j = 0; j < n; ++j){
56 | C(i, j) = max(abs(Arow - Br.row(j)));
57 | }
58 | }
59 |
60 | return wrap(C);
61 | }
--------------------------------------------------------------------------------
/src/maximum.h:
--------------------------------------------------------------------------------
1 | #ifndef MAXIMUM_
2 | #define MAXIMUM_
3 |
4 | #include
5 |
6 | Rcpp::NumericVector maximum_rdist(Rcpp::NumericMatrix A);
7 | Rcpp::NumericMatrix maximum_pdist(Rcpp::NumericMatrix A);
8 | Rcpp::NumericMatrix maximum_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B);
9 |
10 | #endif // MAXIMUM_
--------------------------------------------------------------------------------
/src/minkowski.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | // [[Rcpp::depends(RcppArmadillo)]]
3 |
4 | using namespace Rcpp;
5 |
6 | // [[Rcpp::export]]
7 | NumericVector minkowski_rdist(NumericMatrix A, double p) {
8 | int n = A.nrow(), k = A.ncol();
9 |
10 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
11 |
12 | arma::mat C(1, n * (n-1) / 2);
13 |
14 | int l = 0;
15 | for (int i = 0; i < n; ++i){
16 | arma::mat Arow = Ar.row(i);
17 | for (int j = i + 1; j < n; ++j){
18 | C(l) = sum(pow(abs(Arow - Ar.row(j)), p));
19 | l++;
20 | }
21 | }
22 |
23 | return wrap(pow(C, 1/p));
24 | }
25 |
26 | // [[Rcpp::export]]
27 | NumericMatrix minkowski_pdist(NumericMatrix A, double p) {
28 | int n = A.nrow(), k = A.ncol();
29 |
30 | arma::mat Ar = arma::mat(A.begin(), n, k, false);
31 |
32 | arma::mat C(n, n);
33 |
34 | for (int i = 0; i < n; ++i){
35 | arma::mat Arow = Ar.row(i);
36 | for (int j = 0; j < n; ++j){
37 | C(i, j) = sum(pow(abs(Arow - Ar.row(j)), p));
38 | }
39 | }
40 |
41 | return wrap(pow(C, 1/p));
42 | }
43 |
44 | // [[Rcpp::export]]
45 | NumericMatrix minkowski_cdist(NumericMatrix A, NumericMatrix B, double p) {
46 | int m = A.nrow(), n = B.nrow(), k = A.ncol();
47 |
48 | arma::mat Ar = arma::mat(A.begin(), m, k, false);
49 | arma::mat Br = arma::mat(B.begin(), n, k, false);
50 |
51 | arma::mat C(m, n);
52 |
53 | for (int i = 0; i < m; ++i){
54 | arma::mat Arow = Ar.row(i);
55 | for (int j = 0; j < n; ++j){
56 | C(i, j) = sum(pow(abs(Arow - Br.row(j)), p));
57 | }
58 | }
59 |
60 | return wrap(pow(C, 1/p));
61 | }
62 |
--------------------------------------------------------------------------------
/src/minkowski.h:
--------------------------------------------------------------------------------
1 | #ifndef MINKOWSKI_
2 | #define MINKOWSKI_
3 |
4 | #include
5 |
6 | Rcpp::NumericVector minkowski_rdist(Rcpp::NumericMatrix A, double p);
7 | Rcpp::NumericMatrix minkowski_pdist(Rcpp::NumericMatrix A, double p);
8 | Rcpp::NumericMatrix minkowski_cdist(Rcpp::NumericMatrix A, Rcpp::NumericMatrix B, double p);
9 |
10 | #endif // MINKOWSKI_
11 |
--------------------------------------------------------------------------------
/src/triangle.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | // [[Rcpp::export]]
4 | bool cpp_triangle_inequality(Rcpp::NumericMatrix mat, double tolerance = 0) {
5 | int n = mat.nrow();
6 |
7 | for (int i = 0; i < n; ++i){
8 | for (int j = 0; j < i; ++j){
9 | for (int k = 0; k < n; ++k){
10 | bool ieq = mat(i, j) <= mat(i, k) + mat(k, j) + tolerance;
11 | if (!ieq){
12 | Rcpp::Rcout << "mat[" << i+1 << ", " << j+1 << "] > mat[" << i+1 << ", " << k+1 << "] + mat[" << k+1 << ", " << j+1 << "]" << std::endl;
13 | return false;
14 | }
15 | }
16 | }
17 | }
18 |
19 | return true;
20 | }
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(rdist)
3 |
4 | test_check("rdist")
5 |
--------------------------------------------------------------------------------
/tests/testthat/test-canberra.R:
--------------------------------------------------------------------------------
1 | context("canberra")
2 |
3 | test_that("canberra metric works as expected", {
4 | x <- matrix(sample(1:6, 200, replace = TRUE), nrow = 100)
5 |
6 | dist_dist <- dist(x, method = "canberra")
7 | dist_mat <- as.matrix(dist_dist)
8 | attr(dist_mat, "dimnames") <- NULL
9 | # check pdist and cdist
10 | expect_equivalent(dist_dist, rdist(x, metric = "canberra"))
11 | expect_equivalent(dist_mat, pdist(x, metric = "canberra"))
12 | expect_equivalent(dist_mat, cdist(x, x, metric = "canberra"))
13 | expect_equivalent(dist_mat[1:2, 3:100],
14 | cdist(x[1:2, ], x[3:100, ], metric = "canberra"))
15 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
16 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "canberra"))
17 | })
18 |
--------------------------------------------------------------------------------
/tests/testthat/test-correlation.R:
--------------------------------------------------------------------------------
1 | context("correlation")
2 |
3 | bound <- function(x, min = -1, max = 1){
4 | x[x > max] <- max
5 | x[x < min] <- min
6 | x
7 | }
8 |
9 | test_that("correlation metric works as expected", {
10 | x <- matrix(runif(200), nrow = 100)
11 |
12 | # reference results
13 | dist_mat <- sqrt((1 - bound(cor(t(x))))/2)
14 | dist_dist <- as.dist(dist_mat)
15 | # check pdist and cdist
16 | expect_equal(dist_dist, rdist(x, metric = "correlation"), tolerance = 1e-7, check.attributes = FALSE)
17 | expect_equal(dist_mat, pdist(x, metric = "correlation"), tolerance = 1e-7)
18 | expect_equal(dist_mat, cdist(x, x, metric = "correlation"), tolerance = 1e-7)
19 | expect_equal(dist_mat[1:2, 3:100], cdist(x[1:2, ], x[3:100, ], metric = "correlation"), tolerance = 1e-7)
20 | expect_equal(dist_mat[1, 2:100, drop = FALSE], cdist(x[1, , drop = FALSE], x[2:100, ], metric = "correlation"), tolerance = 1e-7)
21 | })
22 |
23 | test_that("angular metric works as expected", {
24 | x <- matrix(runif(200), nrow = 100)
25 |
26 | # reference results
27 | dist_mat <- acos(bound(cor(t(x))))
28 | dist_dist <- as.dist(dist_mat)
29 | # check pdist and cdist
30 | expect_equal(dist_dist, rdist(x, metric = "angular"), tolerance = 1e-7, check.attributes = FALSE)
31 | expect_equal(dist_mat, pdist(x, metric = "angular"), tolerance = 1e-7)
32 | expect_equal(dist_mat, cdist(x, x, metric = "angular"), tolerance = 1e-7)
33 | expect_equal(dist_mat[1:2, 3:100], cdist(x[1:2, ], x[3:100, ], metric = "angular"), tolerance = 1e-7)
34 | expect_equal(dist_mat[1, 2:100, drop = FALSE], cdist(x[1, , drop = FALSE], x[2:100, ], metric = "angular"), tolerance = 1e-7)
35 | })
36 |
37 | test_that("absolute correlation metric works as expected", {
38 | x <- matrix(runif(200), nrow = 100)
39 |
40 | # reference results
41 | dist_mat <- sqrt((1 - bound(cor(t(x)))^2))
42 | dist_dist <- as.dist(dist_mat)
43 | # check pdist and cdist
44 | expect_equal(dist_dist, rdist(x, metric = "absolute_correlation"), tolerance = 1e-7, check.attributes = FALSE)
45 | expect_equal(dist_mat, pdist(x, metric = "absolute_correlation"), tolerance = 1e-7)
46 | expect_equal(dist_mat, cdist(x, x, metric = "absolute_correlation"), tolerance = 1e-7)
47 | expect_equal(dist_mat[1:2, 3:100], cdist(x[1:2, ], x[3:100, ], metric = "absolute_correlation"), tolerance = 1e-7)
48 | expect_equal(dist_mat[1, 2:100, drop = FALSE], cdist(x[1, , drop = FALSE], x[2:100, ], metric = "absolute_correlation"), tolerance = 1e-7)
49 | })
50 |
--------------------------------------------------------------------------------
/tests/testthat/test-euclidean.R:
--------------------------------------------------------------------------------
1 | context("euclidean")
2 |
3 | test_that("euclidean metric works as expected", {
4 | x <- matrix(runif(200), nrow = 100)
5 |
6 | # reference results
7 | dist_dist <- dist(x)
8 | dist_mat <- as.matrix(dist_dist)
9 | attr(dist_mat, "dimnames") <- NULL
10 | # check pdist and cdist
11 | expect_equivalent(dist_dist, rdist(x))
12 | expect_equivalent(dist(rbind(x[1:5, ], x[1:5,])), rdist(rbind(x[1:5, ], x[1:5,])))
13 | expect_equivalent(dist_mat, pdist(x))
14 | expect_equivalent(dist_mat, cdist(x, x))
15 | expect_equivalent(dist_mat[1:2, 3:100], cdist(x[1:2, ], x[3:100, ]))
16 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE], cdist(x[1, , drop = FALSE], x[2:100, ]))
17 | })
18 |
19 | test_that("euclidean metric works when distance is 0", {
20 | a <- c(-1.18541558816627, -1.37865753930762, -0.413418072278032, -0.327468637225544)
21 | b <- rnorm(4)
22 | amat <- matrix(c(a, a), nrow = 2, byrow = TRUE)
23 | bmat <- matrix(c(b, b), nrow = 2, byrow = TRUE)
24 |
25 | expect_equivalent(
26 | dist(amat, method="euclidean"),
27 | rdist::rdist(amat, metric="euclidean")
28 | )
29 | expect_equivalent(
30 | dist(amat, method = "euclidean"),
31 | rdist::rdist(amat, metric="minkowski")
32 | )
33 | expect_equivalent(
34 | dist(bmat, method="euclidean"),
35 | rdist::rdist(bmat, metric="euclidean")
36 | )
37 | expect_equivalent(
38 | dist(bmat, method = "euclidean"),
39 | rdist::rdist(bmat, metric="minkowski")
40 | )
41 | })
42 |
--------------------------------------------------------------------------------
/tests/testthat/test-hamming.R:
--------------------------------------------------------------------------------
1 | context("hamming")
2 |
3 | test_that("hamming metric works as expected", {
4 | # generate data
5 | x <- matrix(sample(1:5, 200, replace = TRUE), nrow = 100)
6 |
7 | # reference result
8 | n <- nrow(x)
9 | m <- matrix(0, nrow=n, ncol=n)
10 | for(i in seq_len(n - 1))
11 | for(j in seq(i, n))
12 | m[j, i] <- m[i, j] <- sum(x[i,] != x[j,])
13 | dist_mat <- m/ncol(x)
14 | dist_dist <- as.dist(dist_mat)
15 |
16 | # check pdist and cdist
17 | expect_equivalent(dist_dist, rdist(x, metric = "hamming"))
18 | expect_equivalent(dist_mat, pdist(x, metric = "hamming"))
19 | expect_equivalent(dist_mat, cdist(x, x, metric = "hamming"))
20 | expect_equivalent(dist_mat[1:2, 3:100],
21 | cdist(x[1:2, ], x[3:100, ], metric = "hamming"))
22 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
23 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "hamming"))
24 | })
25 |
--------------------------------------------------------------------------------
/tests/testthat/test-jaccard.R:
--------------------------------------------------------------------------------
1 | context("jaccard")
2 |
3 | test_that("jaccard metric works as expected", {
4 | # generate data
5 | x <- matrix(sample(0:5, 200, replace = TRUE), nrow = 100)
6 |
7 | # reference result
8 | n <- nrow(x)
9 | m <- matrix(0, nrow=n, ncol=n)
10 | for(i in seq_len(n - 1))
11 | for(j in seq(i, n)){
12 | if (all(x[i, ] == x[j, ])) {
13 | m[j, i] <- m[i, j] <- 0
14 | }
15 | else {
16 | m[j, i] <- m[i, j] <- sum(x[i,] != x[j,]) / sum(x[i, ] != 0 | x[j, ] != 0)
17 | }
18 | }
19 | dist_mat <- m
20 | dist_dist <- as.dist(dist_mat)
21 |
22 | # check pdist and cdist
23 | expect_equivalent(dist_dist, rdist(x, metric = "jaccard"))
24 | expect_equivalent(dist_mat, pdist(x, metric = "jaccard"))
25 | expect_equivalent(dist_mat, cdist(x, x, metric = "jaccard"))
26 | expect_equivalent(dist_mat[1:2, 3:100],
27 | cdist(x[1:2, ], x[3:100, ], metric = "jaccard"))
28 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
29 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "jaccard"))
30 | })
31 |
--------------------------------------------------------------------------------
/tests/testthat/test-manhattan.R:
--------------------------------------------------------------------------------
1 | context("manhattan")
2 |
3 | test_that("manhattan metric works as expected", {
4 | x <- matrix(rnorm(200), nrow = 100)
5 |
6 | dist_dist <- dist(x, method = "manhattan")
7 | dist_mat <- as.matrix(dist_dist)
8 | attr(dist_mat, "dimnames") <- NULL
9 | # check pdist and cdist
10 | expect_equivalent(dist_dist, rdist(x, metric = "manhattan"))
11 | expect_equivalent(dist_mat, pdist(x, metric = "manhattan"))
12 | expect_equivalent(dist_mat, cdist(x, x, metric = "manhattan"))
13 | expect_equivalent(dist_mat[1:2, 3:100],
14 | cdist(x[1:2, ], x[3:100, ], metric = "manhattan"))
15 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
16 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "manhattan"))
17 | })
18 |
--------------------------------------------------------------------------------
/tests/testthat/test-maximum.R:
--------------------------------------------------------------------------------
1 | context("maximum")
2 |
3 | test_that("maximum metric works as expected", {
4 | x <- matrix(rnorm(200), nrow = 100)
5 |
6 | dist_dist <- dist(x, method = "maximum")
7 | dist_mat <- as.matrix(dist_dist)
8 | attr(dist_mat, "dimnames") <- NULL
9 | # check pdist and cdist
10 | expect_equivalent(dist_dist, rdist(x, metric = "maximum"))
11 | expect_equivalent(dist_mat, pdist(x, metric = "maximum"))
12 | expect_equivalent(dist_mat, cdist(x, x, metric = "maximum"))
13 | expect_equivalent(dist_mat[1:2, 3:100],
14 | cdist(x[1:2, ], x[3:100, ], metric = "maximum"))
15 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
16 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "maximum"))
17 | })
18 |
--------------------------------------------------------------------------------
/tests/testthat/test-minkowski.R:
--------------------------------------------------------------------------------
1 | context("minkowski")
2 |
3 | test_that("minkowski metric works as expected", {
4 | x <- matrix(rnorm(200), nrow = 100)
5 |
6 | # p = 2
7 | p <- 2
8 | dist_dist <- dist(x, method = "minkowski", p = p)
9 | dist_mat <- as.matrix(dist_dist)
10 | attr(dist_mat, "dimnames") <- NULL
11 | # check pdist and cdist
12 | expect_equivalent(dist_dist, rdist(x, metric = "minkowski", p = p))
13 | expect_equivalent(dist_mat, pdist(x, metric = "minkowski", p = p))
14 | expect_equivalent(dist_mat, cdist(x, x, metric = "minkowski", p = p))
15 | expect_equivalent(dist_mat[1:2, 3:100],
16 | cdist(x[1:2, ], x[3:100, ], metric = "minkowski", p = p))
17 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
18 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "minkowski", p = p))
19 |
20 | # p = 1:
21 | p <- 1
22 | dist_dist <- dist(x, method = "minkowski", p = p)
23 | dist_mat <- as.matrix(dist_dist)
24 | attr(dist_mat, "dimnames") <- NULL
25 | # check pdist and cdist
26 | expect_equivalent(dist_dist, rdist(x, metric = "minkowski", p = p))
27 | expect_equivalent(dist_mat, pdist(x, metric = "minkowski", p = p))
28 | expect_equivalent(dist_mat, cdist(x, x, metric = "minkowski", p = p))
29 | expect_equivalent(dist_mat[1:2, 3:100],
30 | cdist(x[1:2, ], x[3:100, ], metric = "minkowski", p = p))
31 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
32 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "minkowski", p = p))
33 |
34 | # p = 100:
35 | p <- 100
36 | dist_dist <- dist(x, method = "minkowski", p = p)
37 | dist_mat <- as.matrix(dist_dist)
38 | attr(dist_mat, "dimnames") <- NULL
39 | # check pdist and cdist
40 | expect_equivalent(dist_dist, rdist(x, metric = "minkowski", p = p))
41 | expect_equivalent(dist_mat, pdist(x, metric = "minkowski", p = p))
42 | expect_equivalent(dist_mat, cdist(x, x, metric = "minkowski", p = p))
43 | expect_equivalent(dist_mat[1:2, 3:100],
44 | cdist(x[1:2, ], x[3:100, ], metric = "minkowski", p = p))
45 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
46 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = "minkowski", p = p))
47 | })
48 |
--------------------------------------------------------------------------------
/tests/testthat/test-user-defined.R:
--------------------------------------------------------------------------------
1 | context("minkowski")
2 |
3 | test_that("user-defined metric works as expected", {
4 | x <- matrix(rnorm(200), nrow = 100)
5 |
6 | # euclidean metric
7 | euclidean_function <- function(x, y){
8 | sqrt(sum((x-y)^2))
9 | }
10 | dist_dist <- dist(x, method = "euclidean")
11 | dist_mat <- as.matrix(dist_dist)
12 | attr(dist_mat, "dimnames") <- NULL
13 | # check pdist and cdist
14 | expect_equivalent(dist_dist, rdist(x, metric = euclidean_function))
15 | expect_equivalent(dist_mat, pdist(x, metric = euclidean_function))
16 | expect_equivalent(dist_mat, cdist(x, x, metric = euclidean_function))
17 | expect_equivalent(dist_mat[1:2, 3:100],
18 | cdist(x[1:2, ], x[3:100, ], metric = euclidean_function))
19 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
20 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = euclidean_function))
21 |
22 | # manhattan metric
23 | manhattan_function <- function(x, y){
24 | sum(abs(x-y))
25 | }
26 | dist_dist <- dist(x, method = "manhattan")
27 | dist_mat <- as.matrix(dist_dist)
28 | attr(dist_mat, "dimnames") <- NULL
29 | # check pdist and cdist
30 | expect_equivalent(dist_dist, rdist(x, metric = manhattan_function))
31 | expect_equivalent(dist_mat, pdist(x, metric = manhattan_function))
32 | expect_equivalent(dist_mat, cdist(x, x, metric = manhattan_function))
33 | expect_equivalent(dist_mat[1:2, 3:100],
34 | cdist(x[1:2, ], x[3:100, ], metric = manhattan_function))
35 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
36 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = manhattan_function))
37 |
38 | # maximum metric
39 | maximum_function <- function(x, y){
40 | max(abs(x-y))
41 | }
42 | dist_dist <- dist(x, method = "maximum")
43 | dist_mat <- as.matrix(dist_dist)
44 | attr(dist_mat, "dimnames") <- NULL
45 | # check pdist and cdist
46 | expect_equivalent(dist_dist, rdist(x, metric = maximum_function))
47 | expect_equivalent(dist_mat, pdist(x, metric = maximum_function))
48 | expect_equivalent(dist_mat, cdist(x, x, metric = maximum_function))
49 | expect_equivalent(dist_mat[1:2, 3:100],
50 | cdist(x[1:2, ], x[3:100, ], metric = maximum_function))
51 | expect_equivalent(dist_mat[1, 2:100, drop = FALSE],
52 | cdist(x[1, , drop = FALSE], x[2:100, , drop = FALSE], metric = maximum_function))
53 | })
54 |
--------------------------------------------------------------------------------