├── data
    └── EVnormal.rda
├── docs
    ├── pkgdown.yml
    ├── reference
    │   ├── pkgdown.yml
    │   ├── energy-deprecated.html
    │   └── energy-defunct.html
    ├── link.svg
    ├── docsearch.js
    ├── pkgdown.js
    ├── 404.html
    └── authors.html
├── R
    ├── pdcor.R
    ├── energy-defunct.R
    ├── indep-deprecated.R
    ├── Ecluster.R
    ├── centering.R
    ├── mutual-indep.R
    ├── dcovu.R
    ├── RcppExports.R
    ├── energy-deprecated.R
    ├── mvI.R
    ├── kgroups.R
    ├── dcorT.R
    ├── edist.R
    ├── pdcov-test.R
    ├── util.R
    ├── Epoisson.R
    ├── Eeqdist.R
    ├── dcov.R
    ├── Emvnorm.R
    ├── dcov2d.R
    └── disco.R
├── src
    ├── U-product.cpp
    ├── Rcpp-utilities.cpp
    ├── dcovU.cpp
    ├── projection.cpp
    ├── poissonM.cpp
    ├── centering.cpp
    ├── energy_init.c
    ├── partial-dcor.cpp
    ├── mvI.cpp
    ├── B-tree.cpp
    ├── kgroups.cpp
    ├── dcov.c
    ├── RcppExports.cpp
    └── utilities.c
├── README.md
├── man
    ├── energy-defunct.Rd
    ├── energy-deprecated.Rd
    ├── sortrank.Rd
    ├── eigen.Rd
    ├── U_product.Rd
    ├── energy-package.Rd
    ├── dmatrix.Rd
    ├── mutualIndep.Rd
    ├── pdcor.Rd
    ├── centering.Rd
    ├── dcovU_stats.Rd
    ├── dcovu.Rd
    ├── dcorT.Rd
    ├── normalGOF.Rd
    ├── mvnorm-test.Rd
    ├── dcov2d.Rd
    ├── kgroups.Rd
    ├── poisson.Rd
    ├── indep-deprecated.Rd
    ├── edist.Rd
    ├── mvI.test.Rd
    ├── dcov.Rd
    ├── disco.Rd
    ├── dcov.test.Rd
    ├── energy.hclust.Rd
    └── eqdist.etest.Rd
├── NAMESPACE
└── DESCRIPTION


/data/EVnormal.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mariarizzo/energy/HEAD/data/EVnormal.rda


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: '3.3'
2 | pkgdown: 2.1.0
3 | pkgdown_sha: ~
4 | articles: {}
5 | last_built: 2024-08-27T19:42Z
6 | 


--------------------------------------------------------------------------------
/docs/reference/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: '3.3'
2 | pkgdown: 2.1.0
3 | pkgdown_sha: ~
4 | articles: {}
5 | last_built: 2024-08-25T21:46Z
6 | 


--------------------------------------------------------------------------------
/R/pdcor.R:
--------------------------------------------------------------------------------
 1 | ## pdcor.R
 2 | ##
 3 | ##
 4 | 
 5 | pdcor <- function(x, y, z) {
 6 |   x <- .arg2dist.matrix(x)
 7 |   y <- .arg2dist.matrix(y)
 8 |   z <- .arg2dist.matrix(z)
 9 |   partial_dcor(x, y, z)["pdcor"]
10 | }
11 | 
12 | pdcov <- function(x, y, z) {
13 |   x <- .arg2dist.matrix(x)
14 |   y <- .arg2dist.matrix(y)
15 |   z <- .arg2dist.matrix(z)
16 |   partial_dcov(x, y, z)
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/R/energy-defunct.R:
--------------------------------------------------------------------------------
 1 | ## defunct functions from the energy package
 2 | 
 3 | dcor.ttest <- function(x, y, distance=FALSE) {
 4 |   .Defunct(new = "dcorT.test", package = "energy",
 5 |               msg = "dcort.ttest replaced by dcorT.test")
 6 | }
 7 | 
 8 | dcor.t <- function(x, y, distance=FALSE) {
 9 |   .Deprecated(new = "dcorT", package = "energy",
10 |               msg = "dcor.t replaced by dcorT")
11 | }
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/R/indep-deprecated.R:
--------------------------------------------------------------------------------
 1 | # deprecated independence test
 2 | 
 3 | indep.test<-
 4 | function(x, y, method = c("dcov","mvI"), index = 1, R) {
 5 |   # two energy tests for multivariate independence
 6 |   .Deprecated(new = "dcov.test", package = "energy",
 7 |               msg = "indep.test is deprecated, 
 8 |               replaced by dcov.test or mvI.test")
 9 |   
10 |   type <- match.arg(method)
11 |     if (type == "dcov")
12 |         return(dcov.test(x, y, index, R)) else
13 |     if (type == "mvI")
14 |         return(mvI.test(x, y, R))
15 | }
16 | 


--------------------------------------------------------------------------------
/src/U-product.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | /*
 5 |  Author: Maria L. Rizzo
 6 |  energy package
 7 |  github.com/mariarizzo/energy
 8 |  */
 9 | 
10 | 
11 | // [[Rcpp::export]]
12 | double U_product(NumericMatrix U, NumericMatrix V) {
13 |   // U and V are U-centered dissimilarity matrices of the two samples
14 |   int n = U.nrow();
15 |   int i, j;
16 |   double sums = 0.0;
17 | 
18 |   for (i = 0; i < n; i++)
19 |     for (j=0; j<i; j++)
20 |       sums += U(i, j) * V(i, j);
21 |   sums = 2.0 * sums / ((double) n * (n-3));
22 |   return (sums);
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/R/Ecluster.R:
--------------------------------------------------------------------------------
 1 | 
 2 | energy.hclust <-
 3 | function(dst, alpha = 1) {
 4 |     if (!inherits(dst, "dist"))
 5 |       stop("The first argument must be a dist object.")
 6 |     d <- dst
 7 |     n <- attr(d, "Size")
 8 |     if (!isTRUE(all.equal(alpha, 1))) {
 9 |     	if (alpha > 2)
10 |     	    warning("Exponent alpha should be in (0,2]")
11 |       if (alpha < 0)
12 |         stop("Cannot use negative exponent on distance.")
13 |     	d <- d^alpha
14 |     }
15 |     ## heights of hclust are half of energy; otherwise equivalent
16 |     return(hclust(d, method = "ward.D"))
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # energy
 2 | energy package for R
 3 | 
 4 | The energy package for R implements several methods in multivariate analysis
 5 | and multivariate inference based on the energy distance, which characterizes
 6 | equality of distributions. 
 7 | 
 8 | Distance correlation (multivariate independence), disco (nonparametric extension
 9 | of ANOVA), and goodness-of-fit tests are examples of some of the methods included.
10 | 
11 | energy is named based on the analogy with potential energy in physics. See
12 | the references in the manual for more details. 
13 | 
14 | <!-- badges: start -->
15 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/energy)](https://cran.r-project.org/package=energy)
16 | <!-- badges: end -->
17 | 


--------------------------------------------------------------------------------
/man/energy-defunct.Rd:
--------------------------------------------------------------------------------
 1 | \name{dcor.ttest}
 2 | \alias{dcor.ttest}
 3 | \alias{dcor.t}
 4 | \title{ Distance Correlation t-test for High Dimensions}
 5 | \description{Defunct: use \code{dcorT.test} and \code{dcorT}.}
 6 | \usage{
 7 | dcor.t(x, y, distance = FALSE)
 8 | dcor.ttest(x, y, distance = FALSE)
 9 | }
10 | \arguments{
11 |   \item{x}{ data or distances of first sample}
12 |   \item{y}{ data or distances of second sample}
13 |   \item{distance}{ TRUE if x and y are distances, otherwise FALSE}
14 | }
15 | \details{
16 | See \code{\link{dcorT}}. 
17 | }
18 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
19 | Gabor J. Szekely
20 | }
21 | \keyword{ htest }
22 | \keyword{ multivariate }
23 | \concept{ energy statistics }
24 | 
25 | 


--------------------------------------------------------------------------------
/src/Rcpp-utilities.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | /*
 5 |  Author: Maria L. Rizzo
 6 |  energy package
 7 |  github.com/mariarizzo/energy
 8 |  */
 9 | 
10 | 
11 | // [[Rcpp::export]]
12 | NumericMatrix calc_dist(NumericMatrix x) {
13 |   int n = x.nrow(), d = x.ncol(), i, j, k;
14 |   double dsum, dk;
15 |   NumericMatrix Dx(n, n);
16 |   for (i = 0; i < n; i++) {
17 |     for (j = i; j < n; j++) {
18 |       if (i == j) {
19 |         Dx(i, i) = 0.0;
20 |       } else {
21 |         dsum = 0.0;
22 |         for (k = 0; k < d; k++) {
23 |           dk = x(i,k) - x(j,k);
24 |           dsum += dk * dk;
25 |         }
26 |         Dx(i, j) = sqrt(dsum);
27 |         Dx(j, i) = Dx(i, j);
28 |       }
29 |     }
30 |   }
31 |   return Dx;
32 | }
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/man/energy-deprecated.Rd:
--------------------------------------------------------------------------------
 1 | \name{energy-deprecated}
 2 | \alias{DCOR}
 3 | \title{ Deprecated Functions}
 4 | \description{ These deprecated functions have been replaced by revised functions and will be removed in future releases of the energy package.}
 5 | \usage{
 6 | DCOR(x, y, index=1.0)
 7 | }
 8 | \arguments{
 9 |   \item{x}{ data or distances of first sample}
10 |   \item{y}{ data or distances of second sample}
11 |   \item{index}{ exponent on Euclidean distance in (0, 2)}
12 | }
13 | \details{ 
14 | DCOR is an R version replaced by faster compiled code.
15 | }
16 | \keyword{ multivariate }
17 | \keyword{ nonparametric }
18 | \concept{ independence }
19 | \concept{ multivariate }
20 | \concept{ distance correlation }
21 | \concept{ distance covariance }
22 | \concept{ energy statistics }
23 | 
24 | 


--------------------------------------------------------------------------------
/R/centering.R:
--------------------------------------------------------------------------------
 1 | ## use the Rcpp exported function U_center or D_center
 2 | ## the utilities in this file are provided for reference and historical reasons
 3 | 
 4 | Dcenter <- function(x) {
 5 |   ## x is a dist object or data matrix
 6 |   if (!inherits(x, "dist")) x <- dist(x)
 7 |   d <- as.matrix(x)
 8 |   n <- nrow(d)
 9 |   m <- rowSums(d)
10 |   M <- sum(m) / n^2
11 |   m <- m / n
12 |   a <- sweep(d, 1, m)
13 |   b <- sweep(a, 2, m)
14 |   B <- b + M
15 | }
16 | 
17 | Ucenter <- function(x) {
18 |   ## x is a dist object or data matrix
19 |   if (!inherits(x, "dist")) x <- dist(x)
20 |   d <- as.matrix(x)
21 |   n <- nrow(d)
22 |   m <- rowSums(d)
23 |   M <- sum(m) / ((n-1)*(n-2))
24 |   m <- m / (n-2)
25 |   a <- sweep(d, 1, m)
26 |   b <- sweep(a, 2, m)
27 |   B <- b + M
28 |   diag(B) <- 0
29 |   B
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/R/mutual-indep.R:
--------------------------------------------------------------------------------
 1 | mutualIndep.test <-
 2 | function(x, R) {
 3 |   if (NCOL(x) < 2) {
 4 |     stop("Expecting two or more samples")
 5 |     }
 6 |   bootfn <- function(x, i) { 
 7 |     d <- ncol(x)
 8 |     dc <- numeric(d-1)
 9 |     for (k in 1:(d-1)) {
10 |       dc[k] <- energy::bcdcor(x[i,k], x[,(k+1):d])
11 |     }
12 |     return (dc)
13 |   }
14 |   
15 |   b <- boot::boot(x, bootfn, sim="permutation", R=R)
16 |   t0 <- sum(b$t0)
17 |   tp <- rowSums(b$t)
18 |   pval <- (1 + sum(tp > t0)) / (R + 1)
19 |   estimate <- round(b$t0, 3)
20 |   names(t0) <- "Sum(R*)"
21 |   names(estimate) <- paste0("R*", 1:length(b$t0))
22 |   method <- paste("Energy Test of Mutual Independence")
23 |   call <- match.call()
24 |   NOTE <- "statistic=sum(bcdcor); permutation test"
25 |   rval <- list(statistic = t0, p.value = pval, call = call,
26 |                data.name=paste(deparse(substitute(x))," dim ", paste(dim(x), collapse=",")),
27 |                estimate=estimate, method=method, note=NOTE)
28 |   class(rval) <- "power.htest"
29 |   return(rval)
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/man/sortrank.Rd:
--------------------------------------------------------------------------------
 1 | \name{sortrank}
 2 | \alias{sortrank}
 3 | \title{ Sort, order and rank a vector }
 4 | \description{
 5 | A utility that returns a list with the components
 6 | equivalent to sort(x), order(x), rank(x, ties.method = "first").
 7 |  }
 8 | \usage{
 9 | sortrank(x)
10 | }
11 | \arguments{
12 |   \item{x}{ vector compatible with sort(x)}
13 | }
14 | \details{
15 | This utility exists to save a little time on large vectors when two or all three of the sort(), order(), rank() results are required. In case of ties, the ranks component matches \code{rank(x, ties.method = "first")}. 
16 | }
17 | \value{
18 | A list with components 
19 |   \item{x}{the sorted input vector x}
20 |   \item{ix}{the permutation = order(x) which rearranges x into ascending order}
21 |   \item{r}{the ranks of x}
22 | }
23 | \note{
24 | This function was benchmarked faster than the combined calls to \code{sort} and \code{rank}. 
25 | }
26 | \examples{
27 | sortrank(rnorm(5))
28 | }
29 | \references{
30 | See \code{\link{sort}}.
31 | }
32 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu}
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/R/dcovu.R:
--------------------------------------------------------------------------------
 1 | ## dcovu.R
 2 | ## unbiased dcov^2 and bias-corrected dcor^2
 3 | ##
 4 | 
 5 | 
 6 | bcdcor <- function(x, y) {
 7 |   ## compute bias corrected distance correlation
 8 |   dcorU(x, y)
 9 | }
10 | 
11 | dcovU <-
12 |   function(x, y) {
13 |     ## unbiased dcov^2
14 |     if (!inherits(x, "dist")) x <- dist(x)
15 |     if (!inherits(y, "dist")) y <- dist(y)    
16 |     x <- as.matrix(x)
17 |     y <- as.matrix(y)
18 |     n <- nrow(x)
19 |     m <- nrow(y)
20 |     if (n != m) stop("sample sizes must agree")
21 |     if (! (all(is.finite(c(x, y)))))
22 |       stop("data contains missing or infinite values")
23 | 
24 |     estimates <- dcovU_stats(x, y) #RcppExports
25 |     return (estimates[1])
26 |   }
27 | 
28 | dcorU <-
29 | function(x, y) {
30 |   ## unbiased dcov^2
31 |   x <- .arg2dist.matrix(x)
32 |   y <- .arg2dist.matrix(y)
33 |   n <- nrow(x)
34 |   m <- nrow(y)
35 |   if (n != m) stop("sample sizes must agree")
36 |   if (! (all(is.finite(c(x, y)))))
37 |     stop("data contains missing or infinite values")
38 | 
39 |   estimates <- dcovU_stats(x, y) #RcppExports
40 |   return (estimates[2])
41 | }
42 | 


--------------------------------------------------------------------------------
/man/eigen.Rd:
--------------------------------------------------------------------------------
 1 | \name{EVnormal}
 2 | \docType{data}
 3 | \alias{EVnormal}
 4 | \alias{eigenvalues}
 5 | \title{Eigenvalues for the energy Test of Univariate Normality}
 6 | \description{
 7 |   Pre-computed eigenvalues corresponding to the asymptotic sampling
 8 |   distribution of the energy test statistic for univariate
 9 |   normality, under the null hypothesis. Four Cases are computed:
10 |     \enumerate{
11 |       \item Simple hypothesis, known parameters.
12 |       \item Estimated mean, known variance.
13 |       \item Known mean, estimated variance.
14 |       \item Composite hypothesis, estimated parameters.
15 |     }
16 | Case 4 eigenvalues are used in the test function \code{normal.test}
17 | when \code{method=="limit"}.
18 | }
19 | \usage{data(EVnormal)}
20 | \format{Numeric matrix with 125 rows and 5 columns;
21 |   column 1 is the index, and columns 2-5 are 
22 |   the eigenvalues of Cases 1-4.}
23 | \source{Computed}
24 | \references{
25 |   Szekely, G. J. and Rizzo, M. L. (2005) A New Test for
26 |   Multivariate Normality, \emph{Journal of Multivariate Analysis},
27 |   93/1, 58-80,
28 |   \doi{10.1016/j.jmva.2003.12.002}.
29 | }
30 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | useDynLib(energy, .registration=TRUE)
 2 | 
 3 | importFrom(Rcpp, evalCpp)
 4 | importFrom("stats", "as.dist", "dist", "dnorm", "hclust", "model.matrix",
 5 |            "pnorm", "ppois", "pt", "rnorm", "rpois", "sd", "var")
 6 | importFrom(boot, boot)
 7 | importFrom(gsl, hyperg_1F1)
 8 | 
 9 | export(
10 |   bcdcor,
11 |   calc_dist,
12 |   D_center,
13 |   Dcenter,
14 |   dcor,
15 |   dcor2d,
16 |   DCOR,
17 |   dcor.test,
18 |   dcorT,
19 |   dcorT.test,
20 |   dcov,
21 |   dcov2d,
22 |   dcov.test,
23 |   dcovU,
24 |   dcovU_stats,
25 |   disco,
26 |   disco.between,
27 |   edist,
28 |   energy.hclust,
29 |   eqdist.e,
30 |   eqdist.etest,
31 |   indep.test,
32 |   is.dmatrix,
33 |   kgroups,
34 |   ksample.e,
35 |   mutualIndep.test,
36 |   mvI,
37 |   mvI.test,
38 |   mvnorm.e,
39 |   mvnorm.etest,
40 |   mvnorm.test,
41 |   normal.e,
42 |   normal.test,
43 |   pdcor,
44 |   pdcor.test,
45 |   pdcov,
46 |   pdcov.test,
47 |   poisson.e,
48 |   poisson.etest,
49 |   poisson.m,
50 |   poisson.mtest,
51 |   poisson.tests,
52 |   sortrank,
53 |   U_center,
54 |   U_product,
55 |   Ucenter
56 | )
57 | 
58 | S3method(print, disco)
59 | S3method(print, kgroups)
60 | S3method(fitted, kgroups)
61 | 


--------------------------------------------------------------------------------
/src/dcovU.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | 
 5 | //  Author: Maria L. Rizzo
 6 | //  energy package
 7 | //  github.com/mariarizzo/energy
 8 | 
 9 | NumericMatrix U_center(NumericMatrix);
10 | 
11 | //[[Rcpp::export]]
12 | NumericVector dcovU_stats(NumericMatrix Dx, NumericMatrix Dy) {
13 |   // x and y must be square distance matrices
14 |   NumericMatrix A = U_center(Dx);
15 |   NumericMatrix B = U_center(Dy);
16 |   double ab = 0.0, aa = 0.0, bb = 0.0;
17 |   double V, dcorU = 0.0;
18 |   double eps = std::numeric_limits<double>::epsilon();  //machine epsilon
19 |   int n = Dx.nrow();
20 |   int n2 = n * (n - 3);
21 | 
22 |   for (int i=0; i<n; i++)
23 |     for (int j=0; j<i; j++) {
24 |       // U-centered is symmetric, with zero diagonal
25 |       ab += A(i, j) * B(i, j);
26 |       aa += A(i, j) * A(i, j);
27 |       bb += B(i, j) * B(i, j);
28 |     }
29 |   ab = 2.0 * ab / (double) n2;
30 |   aa = 2.0 * aa / (double) n2;
31 |   bb = 2.0 * bb / (double) n2;
32 |   V = aa * bb;
33 |   if (V > eps)
34 |     dcorU = ab / sqrt(V);
35 | 
36 |   return NumericVector::create(
37 |     _["dCovU"] = ab,
38 |     _["bcdcor"] = dcorU,
39 |     _["dVarXU"] = aa,
40 |     _["dVarYU"] = bb
41 |   );
42 | }


--------------------------------------------------------------------------------
/src/projection.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | /*
 5 |  Author: Maria L. Rizzo
 6 |  energy package
 7 |  github.com/mariarizzo/energy
 8 |  */
 9 | 
10 | 
11 | NumericMatrix U_center(NumericMatrix);
12 | double        U_product(NumericMatrix, NumericMatrix);
13 | 
14 | // [[Rcpp::export]]
15 | NumericMatrix projection(NumericMatrix Dx, NumericMatrix Dz) {
16 |   /*
17 |   returns the projection of A(x) distance matrix Dx onto the
18 |   orthogonal complement of C(z) distance matrix;
19 |   both Dx and Dz are n by n distance or dissimilarity matrices
20 |   the projection is an n by n matrix
21 |   */
22 |   int    n = Dx.nrow();
23 |   int    i, j;
24 |   NumericMatrix A(n, n), C(n, n), P(n, n);
25 |   double AC, CC, c1;
26 |   double eps = std::numeric_limits<double>::epsilon();  //machine epsilon
27 | 
28 |   A = U_center(Dx);        // U-centering to get A^U etc.
29 |   C = U_center(Dz);
30 |   AC = U_product(A, C);    // (A,C) = dcov^U
31 |   CC = U_product(C, C);
32 |   c1 = 0.0;
33 |   // if (C,C)==0 then C==0 so c1=(A,C)=0
34 |   if (fabs(CC) > eps)
35 |     c1 = AC / CC;
36 |   for (i=0; i<n; i++) {
37 |     for (j=0; j<n; j++) {
38 |       P(i, j) = A(i, j) - c1 * C(i, j);
39 |     }
40 |   }
41 |   return P;
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: energy
 2 | Title: E-Statistics: Multivariate Inference via the Energy of Data
 3 | Version: 1.7-12
 4 | Date: 2024-08-22
 5 | Authors@R: c(
 6 |     person("Maria", "Rizzo", , "mrizzo@bgsu.edu", c("aut", "cre")),
 7 |     person("Gabor", "Szekely", , , "aut"))
 8 | Description: E-statistics (energy) tests and statistics for multivariate and univariate inference,
 9 |              including distance correlation, one-sample, two-sample, and multi-sample tests for
10 |              comparing multivariate distributions, are implemented. Measuring and testing
11 |              multivariate independence based on distance correlation, partial distance correlation,
12 |              multivariate goodness-of-fit tests, k-groups and hierarchical clustering based on energy 
13 |              distance, testing for multivariate normality, distance components (disco) for non-parametric 
14 |              analysis of structured data, and other energy statistics/methods are implemented.
15 | Imports: Rcpp (>= 0.12.6), stats, boot, gsl
16 | LinkingTo: Rcpp
17 | Suggests: 
18 |     MASS,
19 |     CompQuadForm,
20 |     knitr,
21 |     rmarkdown
22 | Depends: R (>= 3.1)
23 | URL: https://github.com/mariarizzo/energy
24 | License: GPL (>= 2)
25 | LazyData: true
26 | NeedsCompilation: yes
27 | Repository: CRAN
28 | RoxygenNote: 7.2.3
29 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | Btree_sum <- function(y, z) {
 5 |     .Call(`_energy_Btree_sum`, y, z)
 6 | }
 7 | 
 8 | calc_dist <- function(x) {
 9 |     .Call(`_energy_calc_dist`, x)
10 | }
11 | 
12 | U_product <- function(U, V) {
13 |     .Call(`_energy_U_product`, U, V)
14 | }
15 | 
16 | D_center <- function(Dx) {
17 |     .Call(`_energy_D_center`, Dx)
18 | }
19 | 
20 | U_center <- function(Dx) {
21 |     .Call(`_energy_U_center`, Dx)
22 | }
23 | 
24 | dcovU_stats <- function(Dx, Dy) {
25 |     .Call(`_energy_dcovU_stats`, Dx, Dy)
26 | }
27 | 
28 | kgroups_start <- function(x, k, clus, iter_max, distance) {
29 |     .Call(`_energy_kgroups_start`, x, k, clus, iter_max, distance)
30 | }
31 | 
32 | Istat <- function(Dx, Dy) {
33 |     .Call(`_energy_Istat`, Dx, Dy)
34 | }
35 | 
36 | Istats <- function(Dx, Dy, R) {
37 |     .Call(`_energy_Istats`, Dx, Dy, R)
38 | }
39 | 
40 | partial_dcor <- function(Dx, Dy, Dz) {
41 |     .Call(`_energy_partial_dcor`, Dx, Dy, Dz)
42 | }
43 | 
44 | partial_dcov <- function(Dx, Dy, Dz) {
45 |     .Call(`_energy_partial_dcov`, Dx, Dy, Dz)
46 | }
47 | 
48 | .poisMstat <- function(x) {
49 |     .Call(`_energy_poisMstat`, x)
50 | }
51 | 
52 | projection <- function(Dx, Dz) {
53 |     .Call(`_energy_projection`, Dx, Dz)
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/R/energy-deprecated.R:
--------------------------------------------------------------------------------
 1 | ## deprecated functions in energy package
 2 | 
 3 | 
 4 | DCOR <-
 5 |   function(x, y, index=1.0) {
 6 |     # distance covariance and correlation statistics
 7 |     # alternate method, implemented in R without .C call
 8 |     # this method is usually slower than the C version
 9 |     
10 |     
11 |     .Deprecated(new = "dcor", package = "energy",
12 |                 msg = "DCOR is deprecated, replaced by dcor or dcov")
13 |     
14 |     if (!inherits(x, "dist")) x <- dist(x)
15 |     if (!inherits(y, "dist")) y <- dist(y)
16 |     x <- as.matrix(x)
17 |     y <- as.matrix(y)
18 |     n <- nrow(x)
19 |     m <- nrow(y)
20 |     if (n != m) stop("Sample sizes must agree")
21 |     if (! (all(is.finite(c(x, y)))))
22 |       stop("Data contains missing or infinite values")
23 |     if (index < 0 || index > 2) {
24 |       warning("index must be in [0,2), using default index=1")
25 |       index=1.0}
26 |     
27 |     stat <- 0
28 |     dims <- c(n, ncol(x), ncol(y))
29 |     
30 |     Akl <- function(x) {
31 |       d <- as.matrix(x)^index
32 |       m <- rowMeans(d)
33 |       M <- mean(d)
34 |       a <- sweep(d, 1, m)
35 |       b <- sweep(a, 2, m)
36 |       return(b + M)
37 |     }
38 |     
39 |     A <- Akl(x)
40 |     B <- Akl(y)
41 |     dCov <- sqrt(mean(A * B))
42 |     dVarX <- sqrt(mean(A * A))
43 |     dVarY <- sqrt(mean(B * B))
44 |     V <- sqrt(dVarX * dVarY)
45 |     if (V > 0)
46 |       dCor <- dCov / V else dCor <- 0
47 |     return(list(dCov=dCov, dCor=dCor, dVarX=dVarX, dVarY=dVarY))
48 |   }
49 | 
50 | 


--------------------------------------------------------------------------------
/man/U_product.Rd:
--------------------------------------------------------------------------------
 1 | \name{U_product}
 2 | \alias{U_product}
 3 | \title{ Inner product in the Hilbert space of U-centered
 4 | distance matrices}
 5 | \description{
 6 |  Stand-alone function to compute the inner product in the
 7 |  Hilbert space of U-centered distance matrices, as in the definition of
 8 |  partial distance covariance.
 9 | }
10 | \usage{
11 | U_product(U, V)
12 | }
13 | \arguments{
14 |   \item{U}{ U-centered distance matrix}
15 |   \item{V}{ U-centered distance matrix}
16 | }
17 | \details{
18 |  Note that \code{pdcor}, etc. functions include the centering and
19 |  projection operations, so that these stand alone versions are not
20 |  needed except in case one wants to check the internal computations.
21 | 
22 |  Exported from U_product.cpp.
23 | }
24 | \value{
25 | \code{U_product} returns the inner product, a scalar.
26 | }
27 | \references{
28 |   Szekely, G.J. and Rizzo, M.L. (2014),
29 |  Partial Distance Correlation with Methods for Dissimilarities,
30 |  \emph{Annals of Statistics}, Vol. 42, No. 6, pp. 2382-2412.}
31 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
32 | Gabor J. Szekely
33 | }
34 | \examples{
35 |  x <- iris[1:10, 1:4]
36 |  y <- iris[11:20, 1:4]
37 |  M1 <- as.matrix(dist(x))
38 |  M2 <- as.matrix(dist(y))
39 |  U <- U_center(M1)
40 |  V <- U_center(M2)
41 | 
42 |  U_product(U, V)
43 |  dcovU_stats(M1, M2)
44 |  }
45 | \keyword{ multivariate }
46 | \concept{ multivariate }
47 | \concept{ distance correlation }
48 | \concept{ distance covariance }
49 | \concept{ energy statistics }
50 | 


--------------------------------------------------------------------------------
/R/mvI.R:
--------------------------------------------------------------------------------
 1 | indep.test<-
 2 | function(x, y, method = c("dcov","mvI"), index = 1, R) {
 3 |     # two energy tests for multivariate independence
 4 |     type <- match.arg(method)
 5 |     if (type == "dcov")
 6 |         return(dcov.test(x, y, index, R)) else
 7 |     if (type == "mvI")
 8 |         return(mvI.test(x, y, R))
 9 | }
10 | 
11 | mvI <-
12 | function(x, y) {
13 |     # energy statistic for multivariate independence
14 |     # returns dependence coefficient I_n
15 |     n <- NROW(x)
16 |     m <- NROW(y)
17 |     Dx <- .arg2dist.matrix(x)
18 |     Dy <- .arg2dist.matrix(y)
19 |     return(Istat(Dx, Dy)) #Rcpp
20 | }
21 | 
22 | mvI.test<-
23 |   function(x, y, R) {
24 |     # an energy test for multivariate independence
25 |     # not based on dCov or dCor
26 |     n <- NROW(x)
27 |     m <- NROW(y)
28 |     if (n != m || n < 2) stop("Sample sizes must agree")
29 |     Dx <- .arg2dist.matrix(x)
30 |     Dy <- .arg2dist.matrix(y)
31 |     stats <- Istats(Dx, Dy, R)
32 |     stat <- n * stats[1]^2
33 |     est <- stats[1]
34 |     names(est) <- "I"
35 |     names(stat) <- "n I^2"
36 |     dataname <- paste("x (",n," by ",ncol(x), "), y(",n," by ", ncol(y), "), replicates ", R, sep="")
37 |     if (R > 0) {
38 |       p.value = (1 + sum(stats[-1] > est)) / (R+1)
39 |     } else {
40 |       p.value = NA
41 |     }
42 |     e <- list(
43 |       method = "mvI energy test of independence",
44 |       statistic = stat,
45 |       estimate = est,
46 |       replicates = stats[-1],
47 |       p.value = p.value,
48 |       data.name = dataname)
49 |     class(e) <- "htest"
50 |     e
51 |   }
52 | 


--------------------------------------------------------------------------------
/src/poissonM.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | /*
 5 |  Author: Maria L. Rizzo
 6 |  energy package
 7 |  github.com/mariarizzo/energy
 8 |  */
 9 | 
10 | 
11 | // [[Rcpp::export(.poisMstat)]]
12 | NumericVector poisMstat(IntegerVector x)
13 | {
14 |   /* computes the Poisson mean distance statistic */
15 |   int i, j, k, n=x.size();
16 |   double eps=1.0e-10;
17 |   double ad, cvm, d, lambda, m, q;
18 |   double Mcdf1, Mcdf0, Mpdf1, cdf1, cdf0;
19 |   NumericVector stats(2);
20 | 
21 |   lambda = mean(x);
22 |   q = R::qpois(1.0-eps, lambda, TRUE, FALSE) + 1;
23 | 
24 |   m = 0.0;
25 |   for (j=0; j<n; j++) m += abs(x(j) - 1);
26 |   m /= ((double) n);                   /* est of m_1 = E|1 - X| */
27 |   Mcdf0 = (m + 1.0 - lambda) / 2.0;    /* M-est of F(0) */
28 | 
29 |   cdf0 = exp(-lambda);                 /* MLE of F(0) */
30 |   d = Mcdf0 - cdf0;
31 |   cvm = d * d * cdf0;   /* von Mises type of distance */
32 |   ad = d * d * cdf0 / (cdf0 * (1-cdf0));  /* A-D weight */
33 |     
34 |   for (i=1; i<q; i++) {
35 |     m = 0;
36 |     k = i + 1;
37 |     for (j=0; j<n; j++) m += abs(x(j)-k);
38 |     m /= ((double) n);  /* est of m_{i+1} = E|i+1 - X| */
39 | 
40 |   /* compute M-estimate of f(i) and F(i) */
41 |   Mpdf1 = (m-(k-lambda)*(2.0*Mcdf0-1.0))/((double) 2.0*k);
42 |   if (Mpdf1 < 0.0) Mpdf1 = 0.0;
43 |   Mcdf1 = Mcdf0 + Mpdf1;
44 |   if (Mcdf1 > 1) Mcdf1 = 1.0;
45 | 
46 |   cdf1 = R::ppois(i, lambda, TRUE, FALSE); /* MLE of F(i) */
47 |   d = Mcdf1 - cdf1;
48 |   cvm += d * d * (cdf1 - cdf0);
49 |   ad += d * d * (cdf1 - cdf0) / (cdf1 * (1-cdf1));
50 |   
51 |   cdf0 = cdf1;
52 |   Mcdf0 = Mcdf1;
53 |   }
54 |   cvm *= n;
55 |   ad *= n;
56 |   stats(0) = cvm;
57 |   stats(1) = ad;
58 |   return stats;
59 | }
60 | 


--------------------------------------------------------------------------------
/man/energy-package.Rd:
--------------------------------------------------------------------------------
 1 | \name{energy-package}
 2 | \alias{energy-package}
 3 | \alias{energy}
 4 | \docType{package}
 5 | \title{
 6 |  E-statistics: Multivariate Inference via the Energy of Data
 7 | }
 8 | \description{
 9 | Description: E-statistics (energy) tests and statistics for multivariate and univariate inference,
10 |              including distance correlation, one-sample, two-sample, and multi-sample tests for
11 |              comparing multivariate distributions, are implemented. Measuring and testing
12 |              multivariate independence based on distance correlation, partial distance
13 |              correlation, multivariate goodness-of-fit tests, clustering based on energy distance,
14 |              testing for multivariate normality, distance components (disco) for non-parametric
15 |              analysis of structured data, and other energy statistics/methods are implemented.
16 | }
17 | \author{
18 | Maria L. Rizzo and Gabor J. Szekely
19 | }
20 | \references{
21 | G. J. Szekely and M. L. Rizzo (2013). Energy statistics:
22 | A class of statistics based on distances, \emph{Journal of
23 | Statistical Planning and Inference}.
24 | 
25 | M. L. Rizzo and G. J. Szekely (2016). Energy Distance,
26 | \emph{WIRES Computational Statistics}, Wiley, Volume 8 Issue 1, 27-38.
27 | Available online Dec., 2015, \doi{10.1002/wics.1375}.
28 | 
29 | G. J. Szekely and M. L. Rizzo (2017). The Energy of Data. 
30 | \emph{The Annual Review of Statistics and Its Application} 
31 | 4:447-79. 
32 | 
33 | G. J. Szekely and M. L. Rizzo (2023). \emph{The Energy of Data and Distance Correlation}. Chapman & Hall/CRC Monographs on Statistics and Applied Probability. ISBN 9781482242744. 
34 | \url{https://www.routledge.com/The-Energy-of-Data-and-Distance-Correlation/Szekely-Rizzo/p/book/9781482242744}.
35 | }
36 | \keyword{ package }
37 | \keyword{ multivariate }
38 | 


--------------------------------------------------------------------------------
/man/dmatrix.Rd:
--------------------------------------------------------------------------------
 1 | \name{Distance Matrix}
 2 | \alias{is.dmatrix}
 3 | \alias{calc_dist}
 4 | \title{ Distance Matrices }
 5 | \description{
 6 | Utilities for working with distance matrices. 
 7 | \code{is.dmatrix} is a utility that checks whether the argument is a distance or dissimilarity matrix; is it square symmetric, non-negative, with zero diagonal? \code{calc_dist} computes a distance matrix directly from a data matrix. 
 8 | }
 9 | \usage{
10 | is.dmatrix(x, tol = 100 * .Machine$double.eps)
11 | calc_dist(x)
12 | }
13 | \arguments{
14 |   \item{x}{ numeric matrix}
15 |   \item{tol}{ tolerance for checking required conditions}
16 | } 
17 | \details{
18 | Energy functions work with the distance matrices of samples. The \code{is.dmatrix} function is used internally when converting arguments to distance matrices. The default \code{tol} is the same as default tolerance of \code{isSymmetric}.
19 | 
20 | \code{calc_dist} is an exported Rcpp function that returns a Euclidean distance matrix from the input data matrix. 
21 | }
22 | \value{
23 | \code{is.dmatrix} returns TRUE if (within tolerance) \code{x} is a distance/dissimilarity matrix; otherwise FALSE. It will return FALSE if \code{x} is a class \code{dist} object.
24 | 
25 | \code{calc_dist} returns the Euclidean distance matrix for the data matrix \code{x}, which has observations in rows. 
26 | }
27 | \note{
28 | In practice, if \code{dist(x)} is not yet computed, \code{calc_dist(x)} will be faster than \code{as.matrix(dist(x))}. 
29 | 
30 | On working with non-Euclidean dissimilarities, see the references.
31 | }
32 | \examples{
33 | x <- matrix(rnorm(20), 10, 2)
34 | D <- calc_dist(x)
35 | is.dmatrix(D)
36 | is.dmatrix(cov(x))
37 | }
38 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu}
39 | }
40 | \references{
41 | Szekely, G.J. and Rizzo, M.L. (2014),
42 |  Partial Distance Correlation with Methods for Dissimilarities.
43 |  \emph{Annals of Statistics}, Vol. 42 No. 6, 2382-2412.
44 | }
45 | 


--------------------------------------------------------------------------------
/src/centering.cpp:
--------------------------------------------------------------------------------
 1 | // double centering utilities for the energy package
 2 | //
 3 | 
 4 | //  Author: Maria L. Rizzo
 5 | //  energy package
 6 | //  github.com/mariarizzo/energy
 7 | 
 8 | 
 9 | #include <Rcpp.h>
10 | using namespace Rcpp;
11 | 
12 | NumericMatrix D_center(NumericMatrix Dx);
13 | NumericMatrix U_center(NumericMatrix Dx);
14 | 
15 | // [[Rcpp::export]]
16 | NumericMatrix D_center(NumericMatrix Dx) {
17 |   /*
18 |   computes the double centered distance matrix for distance matrix Dx
19 |    for dCov, dCor, etc.
20 |   a_{ij} - a_{i.}/n - a_{.j}/n + a_{..}/n^2, all i, j
21 |   */
22 |   int j, k;
23 |   int n = Dx.nrow();
24 |   NumericVector akbar(n);
25 |   NumericMatrix A(n, n);
26 |   double abar = 0.0;
27 | 
28 |   for (k=0; k<n; k++) {
29 |     akbar(k) = 0.0;
30 |     for (j=0; j<n; j++) {
31 |       akbar(k) += Dx(k, j);
32 |     }
33 |     abar += akbar(k);
34 |     akbar(k) /= (double) (n);
35 |   }
36 |   abar /= (double) (n * n);
37 | 
38 |   for (k=0; k<n; k++)
39 |     for (j=k; j<n; j++) {
40 |       A(k, j) = Dx(k, j) - akbar(k) - akbar(j) + abar;
41 |       A(j, k) = A(k, j);
42 |     }
43 | 
44 |   return A;
45 | }
46 | 
47 | // [[Rcpp::export]]
48 | NumericMatrix U_center(NumericMatrix Dx) {
49 |   /*
50 |   computes the A_{kl}^U distances from the distance matrix (Dx_{kl}) for dCov^U
51 |   U-centering: if Dx = (a_{ij}) then compute U-centered A^U using
52 |   a_{ij} - a_{i.}/(n-2) - a_{.j}/(n-2) + a_{..}/((n-1)(n-2)), i \neq j
53 |   and zero diagonal
54 |   */
55 |   int j, k;
56 |   int n = Dx.nrow();
57 |   NumericVector akbar(n);
58 |   NumericMatrix A(n, n);
59 |   double abar = 0.0;
60 | 
61 |   for (k=0; k<n; k++) {
62 |     akbar(k) = 0.0;
63 |     for (j=0; j<n; j++) {
64 |       akbar(k) += Dx(k, j);
65 |     }
66 |     abar += akbar(k);
67 |     akbar(k) /= (double) (n-2);
68 |   }
69 |   abar /= (double) ((n-1)*(n-2));
70 | 
71 |   for (k=0; k<n; k++) {
72 |     for (j=k; j<n; j++) {
73 |       A(k, j) = Dx(k, j) - akbar(k) - akbar(j) + abar;
74 |       A(j, k) = A(k, j);
75 |     }
76 |   }
77 |   /* diagonal is zero */
78 |   for (k=0; k<n; k++)
79 |     A(k, k) = 0.0;
80 | 
81 |   return A;
82 | }
83 | 
84 | 


--------------------------------------------------------------------------------
/R/kgroups.R:
--------------------------------------------------------------------------------
 1 | 
 2 | kgroups <- function(x, k, iter.max = 10, nstart = 1, cluster = NULL) {
 3 |   distance <- inherits(x, "dist")
 4 |   x <- as.matrix(x)
 5 |   if (!is.numeric(x))
 6 |     stop("x must be numeric")
 7 |   n <- nrow(x)
 8 |   if (is.null(cluster)) {
 9 |     cluster <- sample(0:(k-1), size = n, replace = TRUE)
10 |   } else {
11 |     ## recode cluster as 0,1,...,k-1
12 |     cluster <- factor(cluster)
13 |     if(length(levels(cluster)) != k)
14 |       stop("cluster vector does not have k clusters")
15 |     cluster <- as.integer(cluster) - 1
16 |     if(length(cluster) != n)
17 |       stop("data and length of cluster vector must match")
18 |   }
19 |   value <- kgroups_start(x, k, cluster, iter.max, distance = distance)
20 | 
21 |   if (nstart > 1) {
22 |     objective <- rep(0, nstart)
23 |     objective[1] <- value$W
24 |     values <- vector("list", nstart)
25 |     values[[1]] <- value
26 |     for (j in 2:nstart) {
27 |       ## random initialization of cluster labels
28 |       cluster <- sample(0:(k-1), size = n, replace = TRUE)
29 |       values[[j]] <- kgroups_start(x, k, cluster, iter.max, distance = distance)
30 | 
31 |       objective[j] <- values[[j]]$W
32 |     }
33 |     best <- which.min(objective)
34 |     value <- values[[best]]
35 |   }
36 | 
37 |   obj  <- structure(list(
38 |     call = match.call(),
39 |     cluster = value$cluster + 1,
40 |     sizes = value$sizes,
41 |     within = value$within,
42 |     W = sum(value$within),
43 |     count = value$count,
44 |     iterations = value$it,
45 |     k = k),
46 |     class = "kgroups")
47 |   return (obj)
48 | }
49 | 
50 | 
51 | print.kgroups <- function(x, ...) {
52 |   cat("\n"); print(x$call)
53 |   cat("\nK-groups cluster analysis\n")
54 |   cat(x$k, " groups of size ", x$sizes, "\n")
55 |   cat("Within cluster distances:\n", x$within)
56 |   cat("\nIterations: ", x$iterations, "  Count: ", x$count, "\n")
57 | }
58 | 
59 | fitted.kgroups <- function(object, method = c("labels", "groups"), ...) {
60 |   method = match.arg(method)
61 |   if (method == "groups") {
62 |     k <- object$k
63 |     CList <- vector("list", k)
64 |     for (i in 1:k)
65 |       CList[[i]] <- which(object$cluster == i)
66 |     return (CList)
67 |   }
68 |   return (object$cluster)
69 | }
70 | 


--------------------------------------------------------------------------------
/man/mutualIndep.Rd:
--------------------------------------------------------------------------------
 1 | \name{mutual independence}
 2 | \alias{mutualIndep.test}
 3 | \title{ Energy Test of Mutual Independence}
 4 | \description{
 5 |  The test statistic is the sum of d-1 bias-corrected squared dcor statistics where the number of variables is d. Implementation is by permuation test. 
 6 | }
 7 | \usage{
 8 | mutualIndep.test(x, R)
 9 | }
10 | \arguments{
11 |   \item{x}{ data matrix or data frame}
12 |   \item{R}{ number of permutation replicates}
13 | }
14 | \details{
15 | A population coefficient for mutual independence of d random variables, \eqn{d \geq 2},  is
16 | \deqn{
17 |   \sum_{k=1}^{d-1} \mathcal R^2(X_k, [X_{k+1},\dots,X_d]).
18 | }
19 | which is non-negative and equals zero iff mutual independence holds. 
20 | For example, if d=4 the population coefficient is 
21 | \deqn{
22 | \mathcal R^2(X_1, [X_2,X_3,X_4]) +
23 | \mathcal R^2(X_2, [X_3,X_4]) +
24 | \mathcal R^2(X_3, X_4),
25 | }
26 | A permutation test is implemented based on the corresponding sample coefficient. 
27 | To test mutual independence of \deqn{X_1,\dots,X_d} the test statistic is the sum of the d-1 
28 | statistics (bias-corrected \eqn{dcor^2} statistics):
29 | \deqn{\sum_{k=1}^{d-1} \mathcal R_n^*(X_k, [X_{k+1},\dots,X_d])}. 
30 | }
31 | \value{
32 | \code{mutualIndep.test} returns an object of class \code{power.htest}.
33 | }
34 | \note{
35 | See Szekely and Rizzo (2014) for details on unbiased \eqn{dCov^2} and bias-corrected \eqn{dCor^2} (\code{bcdcor}) statistics.
36 | }
37 | \seealso{
38 | \code{\link{bcdcor}}, \code{\link{dcovU_stats}}
39 | }
40 | \references{
41 |  Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007),
42 |  Measuring and Testing Dependence by Correlation of Distances,
43 |  \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794.
44 |  \cr \doi{10.1214/009053607000000505}
45 |  
46 | Szekely, G.J. and Rizzo, M.L. (2014),
47 |  Partial Distance Correlation with Methods for Dissimilarities.
48 |  \emph{Annals of Statistics}, Vol. 42 No. 6, 2382-2412.
49 | }
50 | 
51 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
52 | Gabor J. Szekely
53 | }
54 | \examples{
55 | x <- matrix(rnorm(100), nrow=20, ncol=5)
56 | mutualIndep.test(x, 199)
57 | }
58 | \keyword{ multivariate }
59 | \concept{ independence }
60 | \concept{ distance correlation }
61 | \concept{ distance covariance }
62 | \concept{ energy statistics }
63 | 
64 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/man/pdcor.Rd:
--------------------------------------------------------------------------------
 1 | \name{pdcor}
 2 | \alias{pdcor}
 3 | \alias{pdcov}
 4 | \alias{pdcor.test}
 5 | \alias{pdcov.test}
 6 | \title{
 7 | Partial distance correlation and covariance
 8 | }
 9 | \description{Partial distance correlation pdcor, pdcov, and tests.}
10 | \usage{
11 |   pdcov.test(x, y, z, R)
12 |   pdcor.test(x, y, z, R)
13 |   pdcor(x, y, z)
14 |   pdcov(x, y, z)
15 | }
16 | \arguments{
17 | \item{x}{ data or dist object of first sample}
18 | \item{y}{ data or dist object of second sample}
19 | \item{z}{ data or dist object of third sample}
20 | \item{R}{ replicates for permutation test}
21 | }
22 | \details{
23 | \code{pdcor(x, y, z)} and \code{pdcov(x, y, z)} compute the partial distance
24 | correlation and partial distance covariance, respectively,
25 | of x and y removing z.
26 | 
27 | A test for zero partial distance correlation (or zero partial distance covariance) is implemented in \code{pdcor.test}, and \code{pdcov.test}.
28 | 
29 | Argument types supported are numeric data matrix, data.frame, tibble, numeric vector, class "dist" object, or factor. For unordered factors a 0-1 distance matrix is computed. 
30 | }
31 | \value{
32 | Each test returns an object of class \code{htest}.
33 | }
34 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
35 | Gabor J. Szekely
36 | }
37 | \references{
38 | Szekely, G.J. and Rizzo, M.L. (2014),
39 |  Partial Distance Correlation with Methods for Dissimilarities.
40 |  \emph{Annals of Statistics}, Vol. 42 No. 6, 2382-2412.
41 | }
42 | \examples{
43 |   n = 30
44 |   R <- 199
45 | 
46 |   ## mutually independent standard normal vectors
47 |   x <- rnorm(n)
48 |   y <- rnorm(n)
49 |   z <- rnorm(n)
50 | 
51 |   pdcor(x, y, z)
52 |   pdcov(x, y, z)
53 |   set.seed(1)
54 |   pdcov.test(x, y, z, R=R)
55 |   set.seed(1)
56 |   pdcor.test(x, y, z, R=R)
57 | 
58 | \donttest{
59 |   if (require(MASS)) {
60 |     p = 4
61 |     mu <- rep(0, p)
62 |     Sigma <- diag(p)
63 |   
64 |     ## linear dependence
65 |     y <- mvrnorm(n, mu, Sigma) + x
66 |     print(pdcov.test(x, y, z, R=R))
67 |   
68 |     ## non-linear dependence
69 |     y <- mvrnorm(n, mu, Sigma) * x
70 |     print(pdcov.test(x, y, z, R=R))
71 |     }
72 |   }
73 | }
74 | \keyword{ htest }
75 | \keyword{ multivariate }
76 | \keyword{ nonparametric }
77 | \concept{ independence }
78 | \concept{ multivariate }
79 | \concept{ distance correlation }
80 | \concept{ distance covariance }
81 | \concept{ energy statistics }
82 | 


--------------------------------------------------------------------------------
/R/dcorT.R:
--------------------------------------------------------------------------------
 1 | ### dcorT.R
 2 | ### implementation of the distance correlation t-test
 3 | ### for high dimension
 4 | 
 5 | Astar <- function(d) {
 6 |   ## d is a distance matrix or distance object
 7 |   ## modified or corrected doubly centered distance matrices
 8 |   ## denoted A* (or B*) in JMVA t-test paper (2013)
 9 |   if (inherits(d, "dist"))
10 |     d <- as.matrix(d)
11 |   n <- nrow(d)
12 |   if (n != ncol(d)) stop("Argument d should be distance")
13 |   m <- rowMeans(d)
14 |   M <- mean(d)
15 |   a <- sweep(d, 1, m)
16 |   b <- sweep(a, 2, m)
17 |   A <- b + M  #same as plain A
18 |   #correction to get A^*
19 |   A <- A - d/n
20 |   diag(A) <- m - M
21 |   (n / (n-1)) * A
22 | }
23 | 
24 | BCDCOR <- function(x, y) {
25 |   ## compute bias corrected distance correlation
26 |   ## internal function not in NAMESPACE (external: use bcdcor) 
27 |   ## revised version from v. 1.7-7 
28 |   if (!inherits(x, "dist")) {
29 |     x <- as.matrix(dist(x))
30 |   } else {
31 |     x <- as.matrix(x)
32 |   }
33 |   if (!inherits(y, "dist")) {
34 |     y <- as.matrix(dist(y))
35 |   } else {
36 |     y <- as.matrix(y)
37 |   }
38 |   
39 |   n <- NROW(x)
40 |   AA <- Astar(x)
41 |   BB <- Astar(y)
42 |   XY <- sum(AA*BB) - (n/(n-2)) * sum(diag(AA*BB))
43 |   XX <- sum(AA*AA) - (n/(n-2)) * sum(diag(AA*AA))
44 |   YY <- sum(BB*BB) - (n/(n-2)) * sum(diag(BB*BB))
45 |   list(bcR=XY / sqrt(XX*YY), XY=XY/n^2, XX=XX/n^2, YY=YY/n^2, n=n)
46 | }
47 | 
48 | 
49 | 
50 | dcorT <- function(x, y) {
51 |   # computes the t statistic for corrected high-dim dCor
52 |   # should be approximately student T
53 |   # x and y are observed samples or distance objects
54 |   r <- BCDCOR(x, y)
55 |   Cn <- r$bcR
56 |   n <- r$n
57 |   M <- n*(n-3)/2
58 |   sqrt(M-1) * Cn / sqrt(1-Cn^2)
59 | }
60 | 
61 | dcorT.test <- function(x, y) {
62 |   # x and y are observed samples or distance objects
63 |   dname <- paste(deparse(substitute(x)),"and",
64 |                  deparse(substitute(y)))
65 |   stats <- BCDCOR(x, y)
66 |   bcR <- stats$bcR
67 |   n <- stats$n
68 |   M <- n * (n-3) / 2
69 |   df <- M - 1
70 |   names(df) <- "df"
71 |   tstat <-  sqrt(M-1) * bcR / sqrt(1-bcR^2)
72 |   names(tstat) <- "T"
73 |   estimate <- bcR
74 |   names(estimate) <- "Bias corrected dcor"
75 |   pval <- 1 - pt(tstat, df=df)
76 |   method <- "dcor t-test of independence for high dimension"
77 |   rval <- list(statistic = tstat, parameter = df, p.value = pval,
78 |                estimate=estimate, method=method, data.name=dname)
79 |   class(rval) <- "htest"
80 |   return(rval)
81 | }
82 | 
83 | 


--------------------------------------------------------------------------------
/man/centering.Rd:
--------------------------------------------------------------------------------
 1 | \name{centering distance matrices}
 2 | \alias{Ucenter}
 3 | \alias{Dcenter}
 4 | \alias{U_center}
 5 | \alias{D_center}
 6 | \title{ Double centering and U-centering }
 7 | \description{
 8 |  Stand-alone double centering and U-centering functions
 9 |  that are applied in unbiased distance covariance, bias
10 |  corrected distance correlation, and partial distance correlation.
11 |  }
12 | \usage{
13 | Dcenter(x)
14 | Ucenter(x)
15 | U_center(Dx)
16 | D_center(Dx)
17 | }
18 | \arguments{
19 |   \item{x}{ dist object or data matrix}
20 |   \item{Dx}{ distance or dissimilarity matrix}
21 | }
22 | \details{
23 |  In \code{Dcenter} and \code{Ucenter}, \code{x} must be
24 |  a \code{dist} object or a data matrix. Both functions return
25 |  a doubly centered distance matrix.
26 | 
27 |  Note that \code{pdcor}, etc. functions include the
28 |  centering operations (in C), so that these stand alone versions
29 |  of centering functions are not needed except in case one
30 |  wants to compute just a double-centered or U-centered matrix.
31 | 
32 |  \code{U_center} is the Rcpp export of the cpp function.
33 |  \code{D_center} is the Rcpp export of the cpp function.
34 | }
35 | \value{
36 | All functions return a square symmetric matrix.
37 | 
38 |  \code{Dcenter} returns a matrix
39 |  \deqn{A_{ij}=a_{ij} - \bar a_{i.} - \bar a_{.j} + \bar a_{..}}
40 |  as in classical multidimensional scaling. \code{Ucenter}
41 |  returns a matrix
42 |  \deqn{\tilde A_{ij}=a_{ij} - \frac{a_{i.}}{n-2}
43 |  - \frac{a_{.j}}{n-2} + \frac{a_{..}}{(n-1)(n-2)},\quad i \neq j,}
44 |  with zero diagonal,
45 |  and this is the double centering applied in \code{pdcov} and
46 |  \code{pdcor} as well as the unbiased dCov and bias corrected
47 |  dCor statistics.
48 | }
49 | \note{
50 | The c++ versions \code{D_center} and \code{U_center} should typically
51 | be faster. R versions are retained for historical reasons.
52 | }
53 | \references{
54 |   Szekely, G.J. and Rizzo, M.L. (2014),
55 |  Partial Distance Correlation with Methods for Dissimilarities,
56 |  \emph{Annals of Statistics}, Vol. 42, No. 6, pp. 2382-2412.
57 | }
58 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
59 | Gabor J. Szekely
60 | }
61 | \examples{
62 |  x <- iris[1:10, 1:4]
63 |  dx <- dist(x)
64 |  Dx <- as.matrix(dx)
65 |  M <- U_center(Dx)
66 | 
67 |  all.equal(M, U_center(M))     #idempotence
68 |  all.equal(M, D_center(M))     #invariance
69 | }
70 | \keyword{ multivariate }
71 | \concept{ multivariate }
72 | \concept{ distance correlation }
73 | \concept{ distance covariance }
74 | \concept{ energy statistics }
75 | 


--------------------------------------------------------------------------------
/R/edist.R:
--------------------------------------------------------------------------------
 1 | edist <-
 2 | function(x, sizes, distance = FALSE, ix = 1:sum(sizes), alpha = 1,
 3 |     method = c("cluster","discoB")) {
 4 |     #  computes the e-dissimilarity matrix between k samples or clusters
 5 |     #  x:          pooled sample or Euclidean distances
 6 |     #  sizes:      vector of sample (cluster) sizes
 7 |     #  distance:   TRUE if x is a distance matrix, otherwise FALSE
 8 |     #  ix:         a permutation of row indices of x
 9 |     #  alpha:      distance exponent
10 |     #  method:     cluster distances or disco statistics
11 |     #
12 |     k <- length(sizes)
13 |     if (k == 1) return (as.dist(0.0))
14 |     if (k < 1) return (NA)
15 |     e <- matrix(nrow=k, ncol=k)
16 |     n <- cumsum(sizes)
17 |     m <- 1 + c(0, n[1:(k-1)])
18 |     
19 |     if (is.vector(x)) x <- matrix(x, ncol=1)
20 |     if (inherits(x, "dist")) distance <- TRUE
21 |     if (distance)
22 |       dst <- as.matrix(x) else dst <- as.matrix(dist(x))
23 |     N <- NROW(dst)
24 |     if (NCOL(dst) != N)
25 |       stop("distance==TRUE but first argument is not distance")
26 |     
27 |     if (alpha != 1) {
28 |     	if (alpha <= 0 || alpha > 2)
29 |     	    warning("exponent alpha should be in (0,2]")
30 |     	dst <- dst^alpha
31 |     	}
32 | 
33 |     type <- match.arg(method)
34 | 	  if (type == "cluster") {
35 |       for (i in 1:(k - 1)) {
36 |         e[i, i] <- 0.0
37 |         for (j in (i + 1):k) {
38 |             n1 <- sizes[i]
39 |             n2 <- sizes[j]
40 |             ii <- ix[m[i]:n[i]]
41 |             jj <- ix[m[j]:n[j]]
42 |             w <- n1 * n2 / (n1 + n2)
43 |             m11 <- sum(dst[ii, ii]) / (n1 * n1)
44 |             m22 <- sum(dst[jj, jj]) / (n2 * n2)
45 |             m12 <- sum(dst[ii, jj]) / (n1 * n2)
46 |             e[i, j] <- e[j, i] <- w * ((m12 + m12) - (m11 + m22))
47 |             }
48 |         }
49 |     }
50 | 
51 | 
52 |     if (type == "discoB") {
53 |       #disco statistics for testing F=G
54 |       for (i in 1:(k - 1)) {
55 |         e[i, i] <- 0.0
56 |         for (j in (i + 1):k) {
57 |             n1 <- sizes[i]
58 |             n2 <- sizes[j]
59 |             ii <- ix[m[i]:n[i]]
60 |             jj <- ix[m[j]:n[j]]
61 |             J <- c(ii,jj)
62 |             d <- dst[J, J]
63 |       	    e[i, j] <- eqdist.e(d, sizes=c(n1, n2), distance=TRUE)
64 |       	    e[j, i] <- e[i, j] <- e[i, j] * (n1 + n2) 
65 |         }
66 |       }
67 |       e <- 0.5 * e / sum(sizes)  #discoB formula
68 |     }
69 |     
70 |     e <- as.dist(e) 
71 |     attr(e,"method") <- paste(method,": index= ", alpha)
72 |     e
73 | }
74 | 
75 | 


--------------------------------------------------------------------------------
/man/dcovU_stats.Rd:
--------------------------------------------------------------------------------
 1 | \name{dcovU_stats}
 2 | \alias{dcovU_stats}
 3 | \title{Unbiased distance covariance statistics}
 4 | \description{
 5 |  This function computes unbiased estimators of squared distance
 6 |  covariance, distance variance, and a bias-corrected estimator of
 7 |  (squared) distance correlation.
 8 | }
 9 | \usage{
10 | dcovU_stats(Dx, Dy)
11 | }
12 | \arguments{
13 |   \item{Dx}{ distance matrix of first sample}
14 |   \item{Dy}{ distance matrix of second sample}
15 | }
16 | \details{
17 |  The unbiased (squared) dcov is inner product definition of
18 |  dCov, in the Hilbert space of U-centered distance matrices.
19 | 
20 |  The sample sizes (number of rows) of the two samples must
21 |  agree, and samples must not contain missing values. The
22 |  arguments must be square symmetric matrices. 
23 | }
24 | \value{
25 | \code{dcovU_stats} returns a vector of the components of bias-corrected
26 | dcor: [dCovU, bcdcor, dVarXU, dVarYU].
27 | }
28 | \note{
29 | Unbiased distance covariance (SR2014) corresponds to the biased
30 | (original) \eqn{\mathrm{dCov^2}}{dCov^2}. Since \code{dcovU} is an
31 | unbiased statistic, it is signed and we do not take the square root.
32 | For the original distance covariance test of independence (SRB2007,
33 | SR2009), the distance covariance test statistic is the V-statistic
34 | \eqn{\mathrm{n\, dCov^2} = n \mathcal{V}_n^2}{n V_n^2} (not dCov).
35 | Similarly, \code{bcdcor} is bias-corrected, so we do not take the
36 | square root as with dCor.
37 | }
38 | \references{
39 |  Szekely, G.J. and Rizzo, M.L. (2014),
40 |  Partial Distance Correlation with Methods for Dissimilarities.
41 |  \emph{Annals of Statistics}, Vol. 42 No. 6, 2382-2412.
42 | 
43 |  Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007),
44 |  Measuring and Testing Dependence by Correlation of Distances,
45 |  \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794.
46 |  \cr \doi{10.1214/009053607000000505}
47 | 
48 |  Szekely, G.J. and Rizzo, M.L. (2009),
49 |  Brownian Distance Covariance,
50 |  \emph{Annals of Applied Statistics},
51 |  Vol. 3, No. 4, 1236-1265.
52 |  \cr \doi{10.1214/09-AOAS312}
53 |   }
54 |   \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
55 | Gabor J. Szekely
56 | }
57 | \examples{
58 |  x <- iris[1:50, 1:4]
59 |  y <- iris[51:100, 1:4]
60 |  Dx <- as.matrix(dist(x))
61 |  Dy <- as.matrix(dist(y))
62 |  dcovU_stats(Dx, Dy)
63 |  }
64 | \keyword{ multivariate }
65 | \keyword{ nonparametric }
66 | \concept{ independence }
67 | \concept{ multivariate }
68 | \concept{ distance correlation }
69 | \concept{ distance covariance }
70 | \concept{ energy statistics }
71 | 


--------------------------------------------------------------------------------
/R/pdcov-test.R:
--------------------------------------------------------------------------------
 1 | pdcov.test <- function(x, y, z, R) {
 2 |   if (missing(R)) R <- 0
 3 |   
 4 |   Dx <- .arg2dist.matrix(x)
 5 |   Dy <- .arg2dist.matrix(y)
 6 |   Dz <- .arg2dist.matrix(z)
 7 |   n <- nrow(Dx)
 8 |   Pxz <- projection(Dx, Dz)  #U-center and compute projections
 9 |   Pyz <- projection(Dy, Dz)
10 | 
11 |   #PxzU <- U_center(Pxz)  #not necessary, because of invariance
12 |   #PyzU <- U_center(Pyz)
13 | 
14 |   teststat <- n * U_product(Pxz, Pyz)
15 |   ## calc. pdcor
16 |   den <- sqrt(U_product(Pxz, Pxz) * U_product(Pyz, Pyz))
17 |   if (den > 0.0) {
18 |     estimate <- teststat / (n * den)
19 |   } else estimate <- 0.0
20 |   bootfn <- function(Pxz, i, Pyz) {
21 |     # generate the permutation replicates of dcovU(Pxz, Pyz)
22 |     # PxzU and PyzU are the U-centered matrices
23 |     U_product(Pxz[i, i], Pyz)  #RcppExports
24 |   }
25 | 
26 |   if (R > 0 && den > 0.0) {
27 |     reps <- replicate(R, expr= {
28 |       i <- sample(1:n)
29 |       bootfn(Pxz, i, Pyz=Pyz)
30 |     })
31 | 
32 |     replicates <- n * reps
33 |     pval <- (1 + sum(replicates > teststat)) / (1 + R)
34 |     #df <- n * (n-3) / 2 - 2
35 |   } else {
36 |     pval <- NA
37 |     replicates <- NA
38 |   }
39 |   
40 |   dataname <- paste("replicates ", R, sep="")
41 |   if (! R>0)
42 |     dataname <- "Specify R>0 replicates for a test"
43 |   condition <- (den > 0.0)
44 |   names(estimate) <- "pdcor"
45 |   names(teststat) <- "n V^*"
46 |   e <- list(
47 |     call = match.call(),
48 |     method = paste("pdcov test", sep = ""),
49 |     statistic = teststat,
50 |     estimate = estimate,
51 |     p.value = pval,
52 |     n = n,
53 |     replicates = replicates,
54 |     condition = condition,
55 |     data.name = dataname)
56 |   class(e) <- "htest"
57 |   return(e)
58 | }
59 | 
60 | 
61 | pdcor.test <- function(x, y, z, R) {
62 |   ## x, y, z must be dist. objects or data matrices (no dist matrix)
63 |   ## all required calc. done in pdcov.test
64 |   if (missing(R)) R <- 0
65 |   result <- pdcov.test(x, y, z, R=R)
66 | 
67 |   if (result$condition) {
68 |     ## if (A*A)(B*B) > 0
69 |     nRootV <- result$statistic / result$estimate
70 |     pdcor_reps <- result$replicates / nRootV
71 |   } else pdcor_reps <- NA
72 |   
73 |   e <- list(
74 |     call = match.call(),
75 |     method = paste("pdcor test", sep = ""),
76 |     statistic = result$estimate,
77 |     estimate = result$estimate,
78 |     p.value = result$p.value,
79 |     n = result$n,
80 |     replicates = pdcor_reps,
81 |     condition = result$condition,
82 |     data.name = result$data.name)
83 |   class(e) <- "htest"
84 |   return(e)
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/src/energy_init.c:
--------------------------------------------------------------------------------
 1 | #include <R.h>
 2 | #include <Rinternals.h>
 3 | #include <stdlib.h> // for NULL
 4 | #include <R_ext/Rdynload.h>
 5 | 
 6 | /*
 7 |   Author: Maria L. Rizzo
 8 |   energy package
 9 |   github.com/mariarizzo/energy
10 | */
11 | 
12 | /* declarations to register native routines in this package */ 
13 | 
14 | /* .C calls */
15 | extern void dCOV(void *, void *, void *, void *);
16 | extern void dCOVtest(void *, void *, void *, void *, void *, void *, void *);
17 | extern void ksampleEtest(void *, void *, void *, void *, void *, void *, void *, void *, void *);
18 | extern void permute_check(void *, void *);
19 | 
20 | /* .Call calls */
21 | extern SEXP _energy_D_center(SEXP);
22 | extern SEXP _energy_dcovU_stats(SEXP, SEXP);
23 | extern SEXP _energy_partial_dcor(SEXP, SEXP, SEXP);
24 | extern SEXP _energy_partial_dcov(SEXP, SEXP, SEXP);
25 | extern SEXP _energy_poisMstat(SEXP);
26 | extern SEXP _energy_projection(SEXP, SEXP);
27 | extern SEXP _energy_U_center(SEXP);
28 | extern SEXP _energy_U_product(SEXP, SEXP);
29 | extern SEXP _energy_Btree_sum(SEXP, SEXP);
30 | extern SEXP _energy_kgroups_start(SEXP, SEXP, SEXP, SEXP, SEXP);
31 | extern SEXP _energy_calc_dist(SEXP);
32 | extern SEXP _energy_Istat(SEXP, SEXP);
33 | extern SEXP _energy_Istats(SEXP, SEXP, SEXP);
34 | 
35 | static const R_CMethodDef CEntries[] = {
36 |   {"dCOV",         (DL_FUNC) &dCOV,         4},
37 |   {"dCOVtest",     (DL_FUNC) &dCOVtest,     7},
38 |   {"ksampleEtest", (DL_FUNC) &ksampleEtest, 9},
39 |   {"permute_check",(DL_FUNC) &permute_check,2},
40 |   {NULL, NULL, 0}
41 | };
42 | 
43 | static const R_CallMethodDef CallEntries[] = {
44 |   {"_energy_D_center",       (DL_FUNC) &_energy_D_center,      1},
45 |   {"_energy_dcovU_stats",    (DL_FUNC) &_energy_dcovU_stats,   2},
46 |   {"_energy_Istat",          (DL_FUNC) &_energy_Istat,         2},
47 |   {"_energy_Istats",         (DL_FUNC) &_energy_Istats,        3},
48 |   {"_energy_partial_dcor",   (DL_FUNC) &_energy_partial_dcor,  3},
49 |   {"_energy_partial_dcov",   (DL_FUNC) &_energy_partial_dcov,  3},
50 |   {"_energy_poisMstat",      (DL_FUNC) &_energy_poisMstat,     1},
51 |   {"_energy_projection",     (DL_FUNC) &_energy_projection,    2},
52 |   {"_energy_U_center",       (DL_FUNC) &_energy_U_center,      1},
53 |   {"_energy_U_product",      (DL_FUNC) &_energy_U_product,     2},
54 |   {"_energy_Btree_sum",      (DL_FUNC) &_energy_Btree_sum,     2},
55 |   {"_energy_kgroups_start",  (DL_FUNC) &_energy_kgroups_start, 5},
56 |   {"_energy_calc_dist",      (DL_FUNC) &_energy_calc_dist,     1},
57 |   {NULL, NULL, 0}
58 | };
59 | 
60 | void R_init_energy(DllInfo *dll)
61 | {
62 |   R_registerRoutines(dll, CEntries, CallEntries, NULL, NULL);
63 |   R_useDynamicSymbols(dll, FALSE);
64 | }
65 | 


--------------------------------------------------------------------------------
/man/dcovu.Rd:
--------------------------------------------------------------------------------
 1 | \name{Unbiased distance covariance}
 2 | \alias{bcdcor}
 3 | \alias{dcovU}
 4 | \title{Unbiased dcov and bias-corrected dcor statistics}
 5 | \description{
 6 |  These functions compute unbiased estimators of squared distance
 7 |  covariance and a bias-corrected estimator of
 8 |  (squared) distance correlation.
 9 | }
10 | \usage{
11 | bcdcor(x, y)
12 | dcovU(x, y)
13 | }
14 | \arguments{
15 |   \item{x}{ data or dist object of first sample}
16 |   \item{y}{ data or dist object of second sample}
17 | }
18 | \details{
19 |  The unbiased (squared) dcov is inner product definition of
20 |  dCov, in the Hilbert space of U-centered distance matrices.
21 | 
22 |  The sample sizes (number of rows) of the two samples must
23 |  agree, and samples must not contain missing values. 
24 |  
25 | Argument types supported are 
26 | numeric data matrix, data.frame, or tibble, with observations in rows;
27 | numeric vector; ordered or unordered factors. In case of unordered factors
28 | a 0-1 distance matrix is computed.
29 | }
30 | \value{
31 | \code{dcovU} returns the unbiased estimator of squared dcov.
32 | \code{bcdcor} returns a bias-corrected estimator of squared dcor.
33 | }
34 | \note{
35 | Unbiased distance covariance (SR2014) corresponds to the biased
36 | (original) \eqn{\mathrm{dCov^2}}{dCov^2}. Since \code{dcovU} is an
37 | unbiased statistic, it is signed and we do not take the square root.
38 | For the original distance covariance test of independence (SRB2007,
39 | SR2009), the distance covariance test statistic is the V-statistic
40 | \eqn{\mathrm{n\, dCov^2} = n \mathcal{V}_n^2}{n V_n^2} (not dCov).
41 | Similarly, \code{bcdcor} is bias-corrected, so we do not take the
42 | square root as with dCor.
43 | }
44 | \references{
45 |  Szekely, G.J. and Rizzo, M.L. (2014),
46 |  Partial Distance Correlation with Methods for Dissimilarities.
47 |  \emph{Annals of Statistics}, Vol. 42 No. 6, 2382-2412.
48 | 
49 |  Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007),
50 |  Measuring and Testing Dependence by Correlation of Distances,
51 |  \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794.
52 |  \cr \doi{10.1214/009053607000000505}
53 | 
54 |  Szekely, G.J. and Rizzo, M.L. (2009),
55 |  Brownian Distance Covariance,
56 |  \emph{Annals of Applied Statistics},
57 |  Vol. 3, No. 4, 1236-1265.
58 |  \cr \doi{10.1214/09-AOAS312}
59 |   }
60 |   \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
61 | Gabor J. Szekely
62 | }
63 | \examples{
64 |  x <- iris[1:50, 1:4]
65 |  y <- iris[51:100, 1:4]
66 |  dcovU(x, y)
67 |  bcdcor(x, y)
68 | }
69 | \keyword{ multivariate }
70 | \keyword{ nonparametric }
71 | \concept{ independence }
72 | \concept{ multivariate }
73 | \concept{ distance correlation }
74 | \concept{ distance covariance }
75 | \concept{ energy statistics }
76 | 


--------------------------------------------------------------------------------
/R/util.R:
--------------------------------------------------------------------------------
 1 | ## util.R
 2 | ##
 3 | ## utilities for the energy package
 4 | ## Author: Maria Rizzo
 5 | ## github.com/mariarizzo/energy
 6 | ##
 7 | 
 8 | 
 9 | .arg2dist.matrix <- function(x) {
10 |   ## argument check and conversion for energy functions
11 |   ## that take optionally data or distance object arguments
12 |   ## check type of argument, return a distance matrix
13 |   ## supported argument types: matrix, vector, data.frame, tibble, factor, dist
14 |   
15 |   if (anyNA(x)) warning("missing values not supported")
16 |   
17 |   if (inherits(x, "dist")) {
18 |     Dx <- as.matrix(x)
19 |     return(Dx)
20 |   }
21 |   
22 |   if (is.factor(x)) {
23 |     z <- as.matrix(as.integer(x))
24 |     Dx <- calc_dist(z)
25 |     if (!is.ordered(x) && nlevels(x) > 2) {
26 |       # need a 0-1 matrix
27 |       Dx <- matrix(as.integer(Dx > 0), nrow=nrow(Dx))
28 |     }
29 |     return(Dx)
30 |   }
31 |   
32 | 
33 |   if (is.vector(x) || is.data.frame(x)) {
34 |     ## also for tibble
35 |     Dx <- calc_dist(as.matrix(x))
36 |   }
37 |     
38 |   if (is.matrix(x)) {
39 |     if (is.dmatrix(x)) {
40 |       Dx <- x
41 |     } else {
42 |       ## should be data matrix
43 |       Dx <- calc_dist(x)
44 |     }
45 |   }
46 |   return(Dx)
47 |   
48 |   ## if here, arg type is not supported
49 |   stop(paste("cannot compute distances for", class(x)))
50 |   return(NA)
51 | }
52 | 
53 | is.dmatrix <- function(x, tol = 100 * .Machine$double.eps) {
54 |   ## check if zero diagonal, symmetric, non-negative square matrix
55 |   ## i.e., distance matrix or dissimilarity matrix
56 |   value <- FALSE
57 |   if (is.matrix(x)) {
58 |     if (nrow(x) == ncol(x)) {
59 |       if (max(abs(diag(x)) < tol) && (max(abs(x - t(x)) < tol))) {
60 |         if (! any(x < 0.0)) value <-  TRUE
61 |       }
62 |     }
63 |   }
64 |   return (value)
65 | }
66 |   
67 | perm.matrix <- function(n, R) {
68 |   ## Generate the same matrix as boot.array with
69 |   ## sim="permutation" and default other arguments
70 |   ## with same seed we get boot.array(boot.out, indices=T)
71 |   
72 |   pfn <- function(x, n) x[sample.int(n)]
73 |   perms <- matrix(1:n, n, R)
74 |   perms <- t(apply(perms, 2, pfn, n=n))
75 | }
76 | 
77 | 
78 | permutation <- function(n) {
79 |   ## call the internal permute() function using permute_check()
80 |   J <- 1:n
81 |   a <- .C("permute_check",
82 |           J = as.integer(J),
83 |           n = as.integer(n),
84 |           PACKAGE = "energy")
85 |   return (a$J)
86 | }  
87 | 
88 | sortrank <- function(x) {
89 |   ## sort and rank data with one call to order()
90 |   ## faster than calling sort and rank separately
91 |   ## returns an object identical to:
92 |   ##   list(x=sort(x), ix=order(x), r=rank(x, ties.method = "first"))
93 |   o <- order(x)
94 |   n <- length(o)
95 |   N <- 1:n
96 |   N[o] <- N
97 |   return(list(x=x[o], ix=o, r=N))
98 | }
99 | 


--------------------------------------------------------------------------------
/man/dcorT.Rd:
--------------------------------------------------------------------------------
 1 | \name{dcorT}
 2 | \alias{dcorT.test}
 3 | \alias{dcorT}
 4 | \title{ Distance Correlation t-Test}
 5 | \description{
 6 |  Distance correlation t-test of multivariate independence for high dimension.}
 7 | \usage{
 8 | dcorT.test(x, y)
 9 | dcorT(x, y)
10 | }
11 | \arguments{
12 |   \item{x}{ data or distances of first sample}
13 |   \item{y}{ data or distances of second sample}
14 | }
15 | \details{
16 |  \code{dcorT.test} performs a nonparametric t-test of
17 |  multivariate independence in high dimension (dimension is close to
18 |  or larger than sample size). As dimension goes to infinity, the
19 |  asymptotic distribution of the test statistic is approximately Student t with \eqn{n(n-3)/2-1} degrees of freedom and for \eqn{n \geq 10} the statistic is approximately distributed as standard normal.
20 | 
21 |  The sample sizes (number of rows) of the two samples must
22 |  agree, and samples must not contain missing values.
23 | 
24 |  The t statistic (dcorT) is a transformation of a bias corrected 
25 |  version of distance correlation (see SR 2013 for details).
26 | 
27 | Large values (upper tail) of the dcorT statistic are significant.
28 | }
29 | \note{
30 | \code{dcor.t} and \code{dcor.ttest} are deprecated.
31 | }
32 | \value{
33 | \code{dcorT} returns the dcor t statistic, and
34 | \code{dcorT.test} returns a list with class \code{htest} containing
35 |    \item{     method}{ description of test}
36 |    \item{  statistic}{ observed value of the test statistic}
37 |    \item{  parameter}{ degrees of freedom}
38 |    \item{   estimate}{ (bias corrected) squared dCor(x,y)}
39 |    \item{    p.value}{ p-value of the t-test}
40 |    \item{  data.name}{ description of data}
41 | }
42 | \seealso{
43 |  \code{\link{bcdcor}} \code{\link{dcov.test}} \code{\link{dcor}} \code{\link{DCOR}}
44 | }
45 | 
46 | \references{
47 |  Szekely, G.J. and Rizzo, M.L. (2013). The distance correlation t-test of  independence in high dimension. \emph{Journal of Multivariate Analysis},  Volume 117, pp. 193-213. \cr
48 |  \doi{10.1016/j.jmva.2013.02.012}
49 | 
50 | Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007),
51 |  Measuring and Testing Dependence by Correlation of Distances,
52 |  \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794.
53 |  \cr \doi{10.1214/009053607000000505}
54 | 
55 |  Szekely, G.J. and Rizzo, M.L. (2009),
56 |  Brownian Distance Covariance,
57 |  \emph{Annals of Applied Statistics},
58 |  Vol. 3, No. 4, 1236-1265.
59 |  \cr \doi{10.1214/09-AOAS312}
60 | }
61 | \author{
62 | Maria L. Rizzo \email{mrizzo@bgsu.edu} and
63 | Gabor J. Szekely
64 | }
65 | \examples{
66 |  x <- matrix(rnorm(100), 10, 10)
67 |  y <- matrix(runif(100), 10, 10)
68 |  dcorT(x, y)
69 |  dcorT.test(x, y)
70 | }
71 | 
72 | 
73 | \keyword{ htest }
74 | \keyword{ multivariate }
75 | \keyword{ nonparametric }
76 | \concept{ independence }
77 | \concept{ multivariate }
78 | \concept{ distance correlation }
79 | \concept{ distance covariance }
80 | \concept{ energy statistics }
81 | 
82 | 


--------------------------------------------------------------------------------
/man/normalGOF.Rd:
--------------------------------------------------------------------------------
 1 | \name{normal.test}
 2 | \alias{normal.test}
 3 | \alias{normal.e}
 4 | \title{Energy Test of Univariate Normality}
 5 | \description{
 6 |  Performs the energy test of univariate normality
 7 |  for the composite hypothesis Case 4, estimated parameters.
 8 | }
 9 | \usage{
10 | normal.test(x, method=c("mc","limit"), R)
11 | normal.e(x)
12 | }
13 | \arguments{
14 |   \item{x}{ univariate data vector}
15 |   \item{method}{ method for p-value}
16 |   \item{R}{ number of replications if Monte Carlo method}
17 | }
18 | \details{
19 | If \code{method="mc"} this test function applies the parametric
20 | bootstrap method implemented in \code{\link{mvnorm.test}}. 
21 | 
22 | If \code{method="limit"}, the p-value of the test is computed from 
23 | the asymptotic distribution of the test statistic under the null 
24 | hypothesis. The asymptotic 
25 | distribution is a quadratic form of centered Gaussian random variables, 
26 | which has the form
27 | \deqn{\sum_{k=1}^\infty \lambda_k Z_k^2,}
28 | where \eqn{\lambda_k} are positive constants (eigenvalues) and
29 | \eqn{Z_k} are iid standard normal variables. Eigenvalues are
30 | pre-computed and stored internally. 
31 | A p-value is computed using Imhof's method as implemented in the
32 | \pkg{CompQuadForm} package. 
33 | 
34 | Note that the "limit" method is intended for moderately large
35 | samples because it applies the asymptotic distribution.  
36 | 
37 | The energy test of normality was proposed
38 |  and implemented by Szekely and Rizzo (2005). 
39 |  See \code{\link{mvnorm.test}}
40 |  for more details. 
41 | }
42 | 
43 | \value{
44 | \code{normal.e} returns the energy goodness-of-fit statistic for
45 | a univariate sample. 
46 | 
47 |  \code{normal.test} returns a list with class \code{htest} containing
48 |  \item{statistic}{observed value of the test statistic}
49 |  \item{p.value}{p-value of the test}
50 |  \item{estimate}{sample estimates: mean, sd}
51 |  \item{data.name}{description of data}
52 | }
53 | \seealso{
54 |   \code{\link{mvnorm.test}} and \code{\link{mvnorm.e}} for the
55 |   energy test of multivariate normality and the test statistic
56 |   for multivariate samples.
57 | }
58 | \references{
59 |  Szekely, G. J. and Rizzo, M. L. (2005) A New Test for
60 |  Multivariate Normality, \emph{Journal of Multivariate Analysis},
61 |  93/1, 58-80,
62 |  \doi{10.1016/j.jmva.2003.12.002}.
63 | 
64 | Mori, T. F., Szekely, G. J. and Rizzo, M. L. "On energy tests of normality." Journal of Statistical Planning and Inference 213 (2021): 1-15. 
65 | 
66 |  Rizzo, M. L. (2002). A New Rotation Invariant Goodness-of-Fit Test,
67 |  Ph.D. dissertation, Bowling Green State University.
68 | 
69 | J. P. Imhof (1961). Computing the Distribution of Quadratic Forms in 
70 | Normal Variables, \emph{Biometrika}, Volume 48, Issue 3/4,
71 | 419-426.
72 | }
73 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
74 | Gabor J. Szekely
75 | }
76 | \examples{
77 |   x <- iris[1:50, 1]
78 |   normal.e(x)
79 |   normal.test(x, R=199)
80 |   normal.test(x, method="limit")
81 | }
82 | \keyword{ htest }
83 | \concept{ goodness-of-fit}
84 | \concept{ normal distribution}
85 | \concept{ energy statistics }
86 | 


--------------------------------------------------------------------------------
/src/partial-dcor.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | using namespace Rcpp;
  3 | 
  4 | /*
  5 |  Author: Maria L. Rizzo
  6 |  energy package
  7 |  github.com/mariarizzo/energy
  8 |  */
  9 | 
 10 | 
 11 | NumericVector partial_dcor(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz);
 12 | double        partial_dcov(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz);
 13 | 
 14 | NumericMatrix U_center(NumericMatrix);
 15 | double        U_product(NumericMatrix U, NumericMatrix V);
 16 | NumericMatrix projection(NumericMatrix Dx, NumericMatrix Dz);
 17 | 
 18 | // [[Rcpp::export]]
 19 | NumericVector partial_dcor(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz) {
 20 |   /*  partial distance correlation, second formulation
 21 |   Dx, Dy, Dz are symmetric distance or dissimilarity matrices with zero diagonals
 22 |   partial_dcor  : vector length 4, partial_dcor[0] is pdcor
 23 |   partial_dcor returns vector [Rxyz, Rxy, Rxz, Ryz] starred versions
 24 |   */
 25 |   int    n = Dx.nrow();
 26 |   NumericMatrix A(n, n), B(n, n), C(n, n);
 27 |   double Rxy=0.0, Rxz=0.0, Ryz=0.0, Rxyz=0.0, den;
 28 |   double AB, AC, BC, AA, BB, CC, pDCOV;
 29 |   double eps = std::numeric_limits<double>::epsilon();  //machine epsilon
 30 | 
 31 |   A = U_center(Dx);           /* U-centering to get A^U etc. */
 32 |   B = U_center(Dy);
 33 |   C = U_center(Dz);
 34 | 
 35 |   AB = U_product(A, B);
 36 |   AC = U_product(A, C);
 37 |   BC = U_product(B, C);
 38 |   AA = U_product(A, A);
 39 |   BB = U_product(B, B);
 40 |   CC = U_product(C, C);
 41 |   pDCOV = U_product(projection(Dx, Dz), projection(Dy, Dz));
 42 | 
 43 |   den = sqrt(AA*BB);
 44 |   if (den > eps)
 45 |     Rxy = AB / den;
 46 |   den = sqrt(AA*CC);
 47 |   if (den > eps)
 48 |     Rxz = AC / den;
 49 |   den = sqrt(BB*CC);
 50 |   if (den > eps)
 51 |     Ryz = BC / den;
 52 |   den = sqrt(1 - Rxz*Rxz) * sqrt(1 - Ryz * Ryz);
 53 | 
 54 |   if (den > eps)
 55 |     Rxyz = (Rxy - Rxz * Ryz) / den;
 56 |   else {
 57 |     Rxyz = 0.0;
 58 |   }
 59 | 
 60 |   return NumericVector::create(
 61 |     _["pdcor"] = Rxyz,
 62 |     _["pdcov"] = pDCOV,
 63 |     _["Rxy"] = Rxy,
 64 |     _["Rxz"] = Rxz,
 65 |     _["Ryz"] = Ryz
 66 |   );
 67 | }
 68 | 
 69 | 
 70 | //[[Rcpp::export]]
 71 | double partial_dcov(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz) {
 72 |   /* pdcov following the definition via projections
 73 |   Dx, Dy, Dz are symmetric distance or dissimilarity matrices with zero diagonals
 74 |   returns pdcov sample coefficient
 75 |   */
 76 |   int    n = Dx.nrow();
 77 |   int    i, j;
 78 |   NumericMatrix A(n, n), B(n, n), C(n, n), Pxz(n, n), Pyz(n, n);
 79 |   double AC, BC, CC, c1, c2;
 80 |   double eps = std::numeric_limits<double>::epsilon();  //machine epsilon
 81 | 
 82 |   A = U_center(Dx);           /* U-centering to get A^U etc. */
 83 |   B = U_center(Dy);
 84 |   C = U_center(Dz);
 85 | 
 86 |   AC = U_product(A, C);
 87 |   BC = U_product(B, C);
 88 |   CC = U_product(C, C);
 89 | 
 90 |   c1 = c2 = 0.0;
 91 |   // if (C,C)==0 then C=0 and both (A,C)=0 and (B,C)=0
 92 |   if (fabs(CC) > eps) {
 93 |     c1 = AC / CC;
 94 |     c2 = BC / CC;
 95 |   }
 96 | 
 97 |   for (i=0; i<n; i++) {
 98 |     for (j=0; j<n; j++) {
 99 |       Pxz(i, j) = A(i, j) - c1 * C(i, j);
100 |       Pyz(i, j) = B(i, j) - c2 * C(i, j);
101 |     }
102 |   }
103 | 
104 |   return U_product(Pxz, Pyz);
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $('.navbar-fixed-top').headroom();
  6 | 
  7 |     $('body').css('padding-top', $('.navbar').height() + 10);
  8 |     $(window).resize(function(){
  9 |       $('body').css('padding-top', $('.navbar').height() + 10);
 10 |     });
 11 | 
 12 |     $('[data-toggle="tooltip"]').tooltip();
 13 | 
 14 |     var cur_path = paths(location.pathname);
 15 |     var links = $("#navbar ul li a");
 16 |     var max_length = -1;
 17 |     var pos = -1;
 18 |     for (var i = 0; i < links.length; i++) {
 19 |       if (links[i].getAttribute("href") === "#")
 20 |         continue;
 21 |       // Ignore external links
 22 |       if (links[i].host !== location.host)
 23 |         continue;
 24 | 
 25 |       var nav_path = paths(links[i].pathname);
 26 | 
 27 |       var length = prefix_length(nav_path, cur_path);
 28 |       if (length > max_length) {
 29 |         max_length = length;
 30 |         pos = i;
 31 |       }
 32 |     }
 33 | 
 34 |     // Add class to parent <li>, and enclosing <li> if in dropdown
 35 |     if (pos >= 0) {
 36 |       var menu_anchor = $(links[pos]);
 37 |       menu_anchor.parent().addClass("active");
 38 |       menu_anchor.closest("li.dropdown").addClass("active");
 39 |     }
 40 |   });
 41 | 
 42 |   function paths(pathname) {
 43 |     var pieces = pathname.split("/");
 44 |     pieces.shift(); // always starts with /
 45 | 
 46 |     var end = pieces[pieces.length - 1];
 47 |     if (end === "index.html" || end === "")
 48 |       pieces.pop();
 49 |     return(pieces);
 50 |   }
 51 | 
 52 |   // Returns -1 if not found
 53 |   function prefix_length(needle, haystack) {
 54 |     if (needle.length > haystack.length)
 55 |       return(-1);
 56 | 
 57 |     // Special case for length-0 haystack, since for loop won't run
 58 |     if (haystack.length === 0) {
 59 |       return(needle.length === 0 ? 0 : -1);
 60 |     }
 61 | 
 62 |     for (var i = 0; i < haystack.length; i++) {
 63 |       if (needle[i] != haystack[i])
 64 |         return(i);
 65 |     }
 66 | 
 67 |     return(haystack.length);
 68 |   }
 69 | 
 70 |   /* Clipboard --------------------------*/
 71 | 
 72 |   function changeTooltipMessage(element, msg) {
 73 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 74 |     element.setAttribute('data-original-title', msg);
 75 |     $(element).tooltip('show');
 76 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 77 |   }
 78 | 
 79 |   if(ClipboardJS.isSupported()) {
 80 |     $(document).ready(function() {
 81 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 82 | 
 83 |       $("div.sourceCode").addClass("hasCopyButton");
 84 | 
 85 |       // Insert copy buttons:
 86 |       $(copyButton).prependTo(".hasCopyButton");
 87 | 
 88 |       // Initialize tooltips:
 89 |       $('.btn-copy-ex').tooltip({container: 'body'});
 90 | 
 91 |       // Initialize clipboard:
 92 |       var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
 93 |         text: function(trigger) {
 94 |           return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
 95 |         }
 96 |       });
 97 | 
 98 |       clipboardBtnCopies.on('success', function(e) {
 99 |         changeTooltipMessage(e.trigger, 'Copied!');
100 |         e.clearSelection();
101 |       });
102 | 
103 |       clipboardBtnCopies.on('error', function() {
104 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
105 |       });
106 |     });
107 |   }
108 | })(window.jQuery || window.$)
109 | 


--------------------------------------------------------------------------------
/man/mvnorm-test.Rd:
--------------------------------------------------------------------------------
 1 | \name{mvnorm.test}
 2 | \alias{mvnorm.test}
 3 | \alias{mvnorm.etest}
 4 | \alias{mvnorm.e}
 5 | \title{E-statistic (Energy) Test of Multivariate Normality}
 6 | \description{
 7 |  Performs the E-statistic (energy) test of multivariate or univariate normality.
 8 | }
 9 | \usage{
10 | mvnorm.test(x, R)
11 | mvnorm.etest(x, R)
12 | mvnorm.e(x)
13 | }
14 | \arguments{
15 |   \item{x}{ data matrix of multivariate sample, or univariate data vector}
16 |   \item{R}{ number of bootstrap replicates }
17 | }
18 | \details{
19 |  If \code{x} is a matrix, each row is a multivariate observation. The
20 |  data will be standardized to zero mean and identity covariance matrix
21 |  using the sample mean vector and sample covariance matrix. If \code{x}
22 |  is a vector, \code{mvnorm.e} returns the univariate statistic 
23 |  \code{normal.e(x)}.
24 |  If the data contains missing values or the sample covariance matrix is
25 |  singular, \code{mvnorm.e} returns NA. 
26 | 
27 |  The \eqn{\mathcal{E}}{E}-test of multivariate normality was proposed
28 |  and implemented by Szekely and Rizzo (2005). The test statistic for
29 |  d-variate normality is given by
30 |  \deqn{\mathcal{E} = n (\frac{2}{n} \sum_{i=1}^n E\|y_i-Z\| -
31 |  E\|Z-Z'\| - \frac{1}{n^2} \sum_{i=1}^n \sum_{j=1}^n \|y_i-y_j\|),
32 |  }{E = n((2/n) sum[1:n] E||y_i-Z|| - E||Z-Z'|| - (1/n^2) sum[1:n,1:n]
33 |  ||y_i-y_j||),}
34 |  where \eqn{y_1,\ldots,y_n} is the standardized sample,
35 |  \eqn{Z, Z'} are iid standard d-variate normal, and
36 |  \eqn{\| \cdot \|}{|| ||} denotes Euclidean norm.
37 | 
38 |    The \eqn{\mathcal{E}}{E}-test of multivariate (univariate) normality
39 |   is implemented by parametric bootstrap with \code{R} replicates.
40 | }
41 | \value{
42 |  The value of the \eqn{\mathcal{E}}{E}-statistic for multivariate
43 |  normality is returned by \code{mvnorm.e}.
44 | 
45 |  \code{mvnorm.test} returns a list with class \code{htest} containing
46 |  \item{method}{description of test}
47 |  \item{statistic}{observed value of the test statistic}
48 |  \item{p.value}{approximate p-value of the test}
49 |  \item{data.name}{description of data}
50 |  
51 |  \code{mvnorm.etest} is replaced by \code{mvnorm.test}. 
52 | }
53 | \seealso{
54 | \code{\link{normal.test}} for the energy test of univariate
55 | normality and \code{\link{normal.e}} for the statistic.
56 | }
57 | \note{If the data is univariate, the test statistic is formally
58 | the same as the multivariate case, but a more efficient computational
59 | formula is applied in \code{\link{normal.e}}.
60 | 
61 | \code{\link{normal.test}} also provides an optional method for the
62 | test based on the asymptotic sampling distribution of the test
63 | statistic. 
64 | }
65 | \references{
66 |  Szekely, G. J. and Rizzo, M. L. (2005) A New Test for
67 |  Multivariate Normality, \emph{Journal of Multivariate Analysis},
68 |  93/1, 58-80,
69 |  \doi{10.1016/j.jmva.2003.12.002}.
70 |  
71 | Mori, T. F., Szekely, G. J. and Rizzo, M. L. "On energy tests of normality." Journal of Statistical Planning and Inference 213 (2021): 1-15. 
72 | 
73 |  Rizzo, M. L. (2002). A New Rotation Invariant Goodness-of-Fit Test, Ph.D. dissertation, Bowling Green State University.
74 | 
75 | Szekely, G. J. (1989) Potential and Kinetic Energy in Statistics,
76 | Lecture Notes, Budapest Institute of Technology (Technical University).
77 | }
78 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
79 | Gabor J. Szekely
80 | }
81 | \examples{
82 |  ## compute normality test statistic for iris Setosa data
83 |  data(iris)
84 |  mvnorm.e(iris[1:50, 1:4])
85 | 
86 |  ## test if the iris Setosa data has multivariate normal distribution
87 |  mvnorm.test(iris[1:50,1:4], R = 199)
88 | }
89 | \keyword{ multivariate }
90 | \keyword{ htest }
91 | \concept{ goodness-of-fit}
92 | \concept{ normal distribution}
93 | \concept{ energy statistics }
94 | 


--------------------------------------------------------------------------------
/src/mvI.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | using namespace Rcpp;
  3 | 
  4 | NumericMatrix Dxi(NumericMatrix Dx, IntegerVector ix);
  5 |   
  6 | // [[Rcpp::export]]
  7 | double Istat(NumericMatrix Dx, NumericMatrix Dy) {
  8 |   // compute independence coefficient I_n (the square root)
  9 |   // Dx and Dy are the Euclidean distance matrices
 10 |   
 11 |   int n = Dx.nrow();
 12 |   int i, j, k, m;
 13 |   double n2 = n*n, n3 = n*n2, n4 = n2*n2;
 14 |   double Cx, Cy, zd, zbar, z;
 15 |   IntegerVector ix(n), iy(n);
 16 |   NumericMatrix Dx2(n, n), Dy2(n, n);
 17 |   
 18 |   Cx = 0.0; Cy = 0.0; z = 0.0;
 19 |   for (i=0; i<n; i++) {
 20 |     for (j=0; j<i; j++) {
 21 |       Cx += 2*Dx(i, j);
 22 |       Cy += 2*Dy(i, j);
 23 |       Dx2(i, j) = Dx(i, j) * Dx(i, j);
 24 |       Dy2(i, j) = Dy(i, j) * Dy(i, j);
 25 |       Dx2(j, i) = Dx2(i, j);
 26 |       Dy2(j, i) = Dy2(i, j);
 27 |     }
 28 |   }
 29 |   Cx /= n2;
 30 |   Cy /= n2;
 31 |   
 32 |   for (i=0; i<n; i++) {
 33 |     for (j=0; j<n; j++) {
 34 |       for (k=0; k<n; k++) {
 35 |         for (m=0; m<n; m++) {
 36 |           z += sqrt(Dx2(k, i) + Dy2(m, j));
 37 |         }
 38 |       }
 39 |     }
 40 |   }
 41 |   z /= n4;
 42 |   
 43 |   zbar = 0.0; zd = 0.0;
 44 |   for (i=0; i<n; i++) {
 45 |     for (j=0; j<n; j++) {
 46 |       zd += sqrt(Dx2(i, j) + Dy2(i, j));
 47 |       for (k=0; k<n; k++) {
 48 |         zbar += sqrt(Dx2(k, i) + Dy2(k, j));
 49 |       }
 50 |     }
 51 |   }
 52 |   zbar /= n3;
 53 |   zd /= n2;
 54 |   return sqrt((2 * zbar - zd - z) / (Cx + Cy - z));
 55 | }
 56 | 
 57 | NumericMatrix Dxi(NumericMatrix Dx, IntegerVector ix) {
 58 |   int i, j, n=Dx.nrow();
 59 |   NumericMatrix D(n, n);
 60 |   for (i=0; i<n; i++) {
 61 |     for (j=0; j<n; j++) {
 62 |       D(i, j) = Dx(ix[i], ix[j]);      
 63 |     }
 64 |   }
 65 |   return D;
 66 | }
 67 | 
 68 | // [[Rcpp::export]]
 69 | NumericVector Istats(NumericMatrix Dx, NumericMatrix Dy, int R) {
 70 | // computes the observed I_n independence coefficient and
 71 | // R permutation replicates
 72 | // denominator is invariant to permutations of row indices
 73 | // returns vector c(I_n, I_n^*) 
 74 |   int n = Dx.nrow();
 75 |   int b, i, j, k, m;
 76 |   double n2 = n*n, n3 = n*n2, n4 = n2*n2;
 77 |   double Cx, Cy, zd, zbar, z;
 78 |   IntegerVector ix(n);
 79 |   NumericVector istats(R+1);
 80 |   NumericMatrix Dx2(n, n), Dy2(n, n), Dx2b(n, n);
 81 |   
 82 |   Cx = 0.0; Cy = 0.0; z = 0.0;
 83 |   for (i=0; i<n; i++) {
 84 |     for (j=0; j<i; j++) {
 85 |       Cx += 2*Dx(i, j);
 86 |       Cy += 2*Dy(i, j);
 87 |       Dx2(i, j) = Dx(i, j) * Dx(i, j);
 88 |       Dy2(i, j) = Dy(i, j) * Dy(i, j);
 89 |       Dx2(j, i) = Dx2(i, j);
 90 |       Dy2(j, i) = Dy2(i, j);
 91 |     }
 92 |   }
 93 |   Cx /= n2;
 94 |   Cy /= n2;
 95 |   
 96 |   for (i=0; i<n; i++) {
 97 |     for (j=0; j<n; j++) {
 98 |       for (k=0; k<n; k++) {
 99 |         for (m=0; m<n; m++) {
100 |           z += sqrt(Dx2(k, i) + Dy2(m, j));
101 |         }
102 |       }
103 |     }
104 |   }
105 |   z /= n4;
106 |   
107 |   zbar = 0.0; zd = 0.0;
108 |   for (i=0; i<n; i++) {
109 |     for (j=0; j<n; j++) {
110 |       zd += sqrt(Dx2(i, j) + Dy2(i, j));
111 |       for (k=0; k<n; k++) {
112 |         zbar += sqrt(Dx2(k, i) + Dy2(k, j));
113 |       }
114 |     }
115 |   }
116 |   zbar /= n3;
117 |   zd /= n2;
118 |   istats[0] = sqrt((2 * zbar - zd - z) / (Cx + Cy - z));
119 | 
120 |   for (b=1; b<=R; b++) {
121 |     ix = sample(n, n, false, R_NilValue, false);
122 |     Dx2b = Dxi(Dx2, ix);
123 |     zd = 0.0; zbar = 0.0;
124 |     for (i=0; i<n; i++) {
125 |       for (j=0; j<n; j++) {
126 |         zd += sqrt(Dx2b(i, j) + Dy2(i, j));
127 |         for (k=0; k<n; k++) {
128 |           zbar += sqrt(Dx2b(k, i) + Dy2(k, j));
129 |         }
130 |       }
131 |     }
132 |     zd /= n2;
133 |     zbar /= n3;
134 |     istats[b] = sqrt((2 * zbar - zd - z) / (Cx + Cy - z));
135 |   }
136 |   return istats;
137 | }
138 | 
139 | 


--------------------------------------------------------------------------------
/src/B-tree.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | using namespace Rcpp;
  3 | 
  4 | //  Author: Maria L. Rizzo
  5 | //  energy package
  6 | //  github.com/mariarizzo/energy
  7 |   
  8 | // compute partial sum using binary search algorithm like AVL
  9 | // pre-compute powers of two to save repeated calculations
 10 | 
 11 | 
 12 | IntegerVector containerNodes (int y, IntegerVector pwr2, IntegerVector psum);
 13 | NumericVector gamma1_direct(IntegerVector y, NumericVector z);
 14 | IntegerVector p2sum(IntegerVector pwr2);
 15 | IntegerVector powers2 (int L);
 16 | NumericVector rowsumsDist(NumericVector x, NumericVector sorted, IntegerVector ranks);
 17 | IntegerVector subNodes (int y, IntegerVector pwr2, IntegerVector psum);
 18 | 
 19 | 
 20 | 
 21 | // [[Rcpp::export]]
 22 | NumericVector Btree_sum (IntegerVector y, NumericVector z) {
 23 |   //  
 24 |   //  y is a permutation of the integers 1:n
 25 |   //  z is a numeric vector of length n
 26 |   //  compute gamma1(i) = sum(j<i; y_j<y_i) z(j)
 27 |   //  
 28 |   
 29 |   int n = y.length(), L = ceil(log2(n));
 30 |   int i, node, p;
 31 |   IntegerVector pwr2 = powers2(L);
 32 |   IntegerVector psum = p2sum(pwr2);
 33 |   IntegerVector nodes(L);
 34 |   NumericVector sums(2 * pwr2(L - 1));
 35 |   NumericVector gamma1(n);
 36 |   
 37 |   for (i = 1; i < n; i++) {
 38 |     // update container sums for y(i - 1)
 39 |     nodes = containerNodes(y(i - 1), pwr2, psum);
 40 |     for (p = 0; p < L; p++)
 41 |       sums(nodes(p)) += z(i - 1);
 42 |     
 43 |     // get nodes below y(i) and update gamma(i)
 44 |     nodes = subNodes(y(i) - 1, pwr2, psum);
 45 |     for (p = 0; p < L; p++) {
 46 |       node = nodes(p);
 47 |       if (node > 0)
 48 |         gamma1(i) += sums(node);
 49 |     }
 50 |   }
 51 |   return gamma1;
 52 | }
 53 | 
 54 | IntegerVector containerNodes (int y, IntegerVector pwr2, IntegerVector psum) {
 55 |   /*
 56 |   * get the indices of all nodes of binary tree whose closed
 57 |   * intervals contain integer y
 58 |   */
 59 |   int i, L = pwr2.length();
 60 |   IntegerVector nodes(L);
 61 | 
 62 |   nodes(0) = y;
 63 |   for (i = 0; i < L-1; i++) {
 64 |      nodes(i+1) = ceil((double) y / pwr2(i)) + psum(i);
 65 |   }
 66 |   return nodes;  
 67 | }
 68 | 
 69 | 
 70 | IntegerVector subNodes (int y, IntegerVector pwr2, IntegerVector psum) {
 71 |   /*
 72 |    * get indices of nodes whose intervals disjoint union is 1:y
 73 |    */
 74 |   int L = psum.length();
 75 |   int idx, k, level, p2;
 76 |   IntegerVector nodes(L);
 77 |   
 78 |   std::fill(nodes.begin(), nodes.end(), -1L); 
 79 |   
 80 |   k = y;
 81 |   for (level = L - 1; level > 0; level --) {
 82 |     p2 = pwr2(level - 1);
 83 |     if (k >= p2) {
 84 |       // at index of left node plus an offset
 85 |       idx = psum(level - 1) + (y / p2);
 86 |       nodes(L - level - 1) = idx;
 87 |       k -= p2;
 88 |     }
 89 |   }
 90 |   if (k > 0)
 91 |     nodes(L - 1) = y;
 92 |   return nodes;  
 93 | }
 94 | 
 95 | 
 96 | IntegerVector  powers2 (int L) {
 97 |   //  (2, 4, 8, ..., 2^L, 2^(L+1))
 98 |   int k;
 99 |   IntegerVector pwr2(L);
100 |   
101 |   pwr2(0) = 2;
102 |   for (k = 1; k < L; k++)
103 |     pwr2(k) = pwr2(k-1) * 2;
104 |   return pwr2;
105 | }
106 | 
107 | IntegerVector p2sum(IntegerVector pwr2) {
108 |   // computes the cumsum of 2^L, 2^(L-1), ..., 2^2, 2
109 |   int i, L = pwr2.length();
110 |   IntegerVector psum(L);
111 |   
112 |   std::fill(psum.begin(), psum.end(), pwr2(L-1));
113 |   for (i = 1; i < L; i++)
114 |     psum(i) = psum(i-1) + pwr2(L-i-1);
115 |   return psum;
116 | }
117 | 
118 | 
119 | NumericVector gamma1_direct(IntegerVector y, NumericVector z) {
120 |   // utility: direct computation of the sum gamm1
121 |   // for the purpose of testing and benchmarks
122 |   
123 |   int n = y.length();
124 |   int i, j;
125 |   NumericVector gamma1(n);
126 |   
127 |   for (i = 1; i < n; i++) {
128 |     for (j = 0; j < i; j++) {
129 |       if (y(j) < y(i)) {
130 |         gamma1(i) += z(j);
131 |       }
132 |     }
133 |   }
134 |   return gamma1;
135 | } 
136 | 


--------------------------------------------------------------------------------
/R/Epoisson.R:
--------------------------------------------------------------------------------
  1 | poisson.tests <-
  2 | function(x, R, test="all") {
  3 |   # parametric bootstrap tests of Poisson distribution
  4 |   # poisson.e is the energy GOF statistic
  5 |   # poisson.m is the mean distance statistic
  6 |   # (not related to the test stats::poisson.test)
  7 |   if (!is.integer(x) || any(x < 0)) {
  8 |     warning("sample must be non-negative integers")
  9 |     return(NULL)
 10 |   }
 11 |   test <- tolower(test)
 12 |   poisson.stats <- function(x) {
 13 |     c(poisson.m(x), poisson.e(x))
 14 |   }
 15 |   stat <- switch(test,
 16 |                  "m" = poisson.m,
 17 |                  "e" = poisson.e,
 18 |                  poisson.stats)
 19 |   
 20 |   method <- switch(test, 
 21 |                    m=c("M-CvM","M-AD"), 
 22 |                    e="Energy", 
 23 |                    c("M-CvM","M-AD","Energy"))
 24 |   method <- paste(method, " test", sep="")
 25 |   n <- length(x)
 26 |   lambda <- mean(x)
 27 |   if (missing(R) || is.null(R)) {
 28 |     R <- 0
 29 |     message("Specify R > 0 replicates for MC test")
 30 |   }
 31 | 
 32 |   bootobj <- boot::boot(x, statistic = stat, R = R, 
 33 |                    sim = "parametric",
 34 |                    ran.gen = function(x, y) {rpois(n, lambda)})
 35 |   
 36 |   N <- length(bootobj$t0)
 37 |   p <- rep(NA, times=N)
 38 |   if (R > 0) {
 39 |     for (i in 1:N) {
 40 |     p[i] <- 1 - mean(bootobj$t[,i] < bootobj$t0[i])
 41 |     }
 42 |   }
 43 |   
 44 |   # a data frame, not an htest object  
 45 |   # comparable to broom::tidy on an htest object
 46 |   RVAL <- data.frame(estimate=lambda, statistic=bootobj$t0,
 47 |                        p.value=p, method=method)
 48 |   return(RVAL)
 49 | }
 50 | 
 51 | poisson.mtest <-
 52 | function(x, R=NULL) {
 53 |   if (is.null(R)) R <- 0
 54 |   rval <- poisson.tests(x, R, test="M")
 55 |   DNAME <- paste(deparse1(substitute(x)), "replicates: ", R)
 56 |   stat <- rval$statistic[1]
 57 |   names(stat) <- "M-CvM"
 58 |     e <- list(
 59 |       method = paste("Poisson M-test", sep = ""),
 60 |       statistic = stat,
 61 |       p.value = rval$p.value[1],
 62 |       data.name = DNAME,
 63 |       estimate = rval$estimate[1])
 64 |     class(e) <- "htest"
 65 |     e
 66 | }
 67 | 
 68 | poisson.etest <- 
 69 | function(x, R=NULL) {
 70 |   if (is.null(R)) R <- 0
 71 |     rval <- poisson.tests(x, R, test="E")
 72 |     DNAME <- paste(deparse1(substitute(x)), "replicates: ", R)
 73 |     stat <- rval$statistic
 74 |     names(stat) <- "E"
 75 |     e <- list(
 76 |       method = paste("Poisson E-test", sep = ""),
 77 |       statistic = stat,
 78 |       p.value = rval$p.value,
 79 |       data.name = paste("replicates: ", R,  sep=""),
 80 |       estimate = rval$estimate)
 81 |     class(e) <- "htest"
 82 |     e
 83 | }
 84 | 
 85 | poisson.m <-
 86 | function(x) {
 87 |     # mean distance statistic for Poissonity
 88 |     if (any(!is.integer(x)) || any(x < 0)) {
 89 |       warning("sample must be non-negative integers")
 90 |       return(NULL)
 91 |     }
 92 |   stats <- .poisMstat(x)
 93 |   names(stats) <- c("M-CvM", "M-AD")
 94 |   return(stats)
 95 | }
 96 | 
 97 | poisson.e <-
 98 | function(x) {
 99 |   # energy GOF statistic for Poissonity
100 |   if (any(!is.integer(x)) || any(x < 0)) {
101 |     warning("sample must be non-negative integers")
102 |     return(NULL)
103 |   }
104 |   lambda <- mean(x)
105 |   n <- length(x)
106 |   
107 |   ## E|y-X| for X Poisson(lambda) (vectorized)
108 |   Px <- ppois(x, lambda)
109 |   Px1 <- ppois(x-1, lambda)
110 |   meanvec <- 2*x*Px - 2*lambda*Px1 + lambda - x
111 |   
112 |   ## second mean E|X-X'|
113 |   a <- 2 * lambda
114 |   EXX <- a * exp(-a) * (besselI(a, 0) + besselI(a, 1))
115 |   
116 |   ## third mean = sum_{i,j} |x_i - x_j| / n^2
117 |   K <- seq(1 - n, n - 1, 2)
118 |   y <- sort(x)
119 |   meanxx <- 2 * sum(K * y) / n^2
120 |   stat <- n * (2 * mean(meanvec) - EXX - meanxx)
121 |   names(stat) <- "E"
122 |   return(stat)
123 | }
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/man/dcov2d.Rd:
--------------------------------------------------------------------------------
 1 | \name{dcov2d}
 2 | \alias{dcor2d}
 3 | \alias{dcov2d}
 4 | \title{Fast dCor and dCov for bivariate data only}
 5 | \description{
 6 | For bivariate data only, these are fast O(n log n) implementations of distance
 7 | correlation and distance covariance statistics. The U-statistic for dcov^2 is unbiased; 
 8 | the V-statistic is the original definition in SRB 2007. These algorithms do not
 9 | store the distance matrices, so they are suitable for large samples. 
10 | }
11 | \usage{
12 | dcor2d(x, y, type = c("V", "U"))
13 | dcov2d(x, y, type = c("V", "U"), all.stats = FALSE)
14 | }
15 | \arguments{
16 |   \item{x}{ numeric vector}
17 |   \item{y}{ numeric vector}
18 |   \item{type}{ "V" or "U", for V- or U-statistics}
19 |   \item{all.stats}{ logical}
20 | }
21 | \details{
22 | The unbiased (squared) dcov is documented in \code{dcovU}, for multivariate data in arbitrary, not necessarily equal dimensions. \code{dcov2d} and \code{dcor2d} provide a faster O(n log n) algorithm for bivariate (x, y) only (X and Y are real-valued random vectors). The O(n log n) algorithm was proposed by Huo and Szekely (2016). The algorithm is faster above a certain sample size n. It does not store the distance matrix so the sample size can be very large. 
23 | }
24 | \value{
25 | By default, \code{dcov2d} returns the V-statistic \eqn{V_n = dCov_n^2(x, y)}{V_n = dCov_n^2(x, y)}, and if type="U", it returns the U-statistic, unbiased for \eqn{dCov^2(X, Y)}{dCov^2(X,Y)}. The argument all.stats=TRUE is used internally when the function is called from \code{dcor2d}. 
26 | 
27 | By default, \code{dcor2d} returns \eqn{dCor_n^2(x, y)}{dCor_n^2(x, y)}, and if type="U", it returns a bias-corrected estimator of squared dcor equivalent to \code{bcdcor}.
28 | 
29 | These functions do not store the distance matrices so they are helpful when sample size is large and the data is bivariate. 
30 | }
31 | \note{
32 | The U-statistic \eqn{U_n}{U_n} can be negative in the lower tail so 
33 | the square root of the U-statistic is not applied.
34 | Similarly, \code{dcor2d(x, y, "U")} is bias-corrected and can be
35 | negative in the lower tail, so we do not take the
36 | square root. The original definitions of dCov and dCor 
37 | (SRB2007, SR2009) were based on V-statistics, which are non-negative,
38 | and defined using the square root of V-statistics.
39 | 
40 | It has been suggested that instead of taking the square root of the U-statistic, one could take the root of \eqn{|U_n|}{|U_n|} before applying the sign, but that introduces more bias than the original dCor, and should never be used. 
41 | }
42 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
43 | Gabor J. Szekely
44 | }
45 | \seealso{
46 |  \code{\link{dcov}} \code{\link{dcov.test}}  \code{\link{dcor}} \code{\link{dcor.test}} (multivariate statistics and permutation test) 
47 | }
48 | \references{
49 | Huo, X. and Szekely, G.J. (2016). Fast computing for 
50 | distance covariance. Technometrics, 58(4), 435-447.
51 | 
52 |  Szekely, G.J. and Rizzo, M.L. (2014),
53 |  Partial Distance Correlation with Methods for Dissimilarities.
54 |  \emph{Annals of Statistics}, Vol. 42 No. 6, 2382-2412.
55 | 
56 |  Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007),
57 |  Measuring and Testing Dependence by Correlation of Distances,
58 |  \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794.
59 |  \cr \doi{10.1214/009053607000000505}
60 | }
61 | \examples{
62 |   \donttest{
63 |     ## these are equivalent, but 2d is faster for n > 50
64 |     n <- 100
65 |     x <- rnorm(100)
66 |     y <- rnorm(100)
67 |     all.equal(dcov(x, y)^2, dcov2d(x, y), check.attributes = FALSE)
68 |     all.equal(bcdcor(x, y), dcor2d(x, y, "U"), check.attributes = FALSE)
69 | 
70 |     x <- rlnorm(400)
71 |     y <- rexp(400)
72 |     dcov.test(x, y, R=199)    #permutation test
73 |     dcor.test(x, y, R=199)
74 |     }  
75 | }
76 | \keyword{ htest }
77 | \keyword{ nonparametric }
78 | \concept{ independence }
79 | \concept{ distance correlation }
80 | \concept{ distance covariance }
81 | \concept{ energy statistics }
82 | 


--------------------------------------------------------------------------------
/man/kgroups.Rd:
--------------------------------------------------------------------------------
 1 | \name{kgroups}
 2 | \alias{kgroups}
 3 | 
 4 | \title{
 5 | K-Groups Clustering
 6 | }
 7 | \description{
 8 | Perform k-groups clustering by energy distance.
 9 | }
10 | \usage{
11 | kgroups(x, k, iter.max = 10, nstart = 1, cluster = NULL)
12 | }
13 | \arguments{
14 |   \item{x}{Data frame or data matrix or distance object}
15 |   \item{k}{number of clusters}
16 |   \item{iter.max}{maximum number of iterations}
17 |   \item{nstart}{number of restarts}
18 |   \item{cluster}{initial clustering vector}
19 | }
20 | 
21 | \details{
22 | K-groups is based on the multisample energy distance for comparing distributions.
23 | Based on the disco decomposition of total dispersion (a Gini type mean distance) the objective function should either maximize the total between cluster energy distance, or equivalently, minimize the total within cluster energy distance. It is more computationally efficient to minimize within distances, and that makes it possible to use a modified version of the Hartigan-Wong algorithm (1979) to implement K-groups clustering.
24 | 
25 | The within cluster Gini mean distance is
26 | \deqn{G(C_j) = \frac{1}{n_j^2} \sum_{i,m=1}^{n_j} |x_{i,j} - x_{m,j}|}
27 | and the K-groups within cluster distance is
28 | \deqn{W_j = \frac{n_j}{2}G(C_j) = \frac{1}{2 n_j} \sum_{i,m=1}^{n_j} |x_{i,j} - x_{m,j}.}
29 | If z is the data matrix for cluster \eqn{C_j}, then \eqn{W_j} could be computed as
30 | \code{sum(dist(z)) / nrow(z)}.
31 | 
32 | If cluster is not NULL, the clusters are initialized by this vector (can be a factor or integer vector). Otherwise clusters are initialized with random labels in k approximately equal size clusters.
33 | 
34 | If \code{x} is not a distance object (class(x) == "dist") then \code{x} is converted to a data matrix for analysis. 
35 | 
36 | Run up to \code{iter.max} complete passes through the data set until a local min is reached. If \code{nstart > 1}, on second and later starts, clusters are initialized at random, and the best result is returned. 
37 | }
38 | 
39 | \value{
40 | An object of class \code{kgroups} containing the components
41 | \item{call}{the function call}
42 | \item{cluster}{vector of cluster indices}
43 | \item{sizes}{cluster sizes}
44 | \item{within}{vector of Gini within cluster distances}
45 | \item{W}{sum of within cluster distances}
46 | \item{count}{number of moves}
47 | \item{iterations}{number of iterations}
48 | \item{k}{number of clusters}
49 | 
50 | \code{cluster} is a vector containing the group labels, 1 to k. \code{print.kgroups}
51 | prints some of the components of the kgroups object.
52 | 
53 | Expect that count is 0 if the algorithm converged to a local min (that is, 0 moves happened on the last iteration). If iterations equals iter.max and count is positive, then the algorithm did not converge to a local min. 
54 | }
55 | \author{
56 | Maria Rizzo and Songzi Li
57 | }
58 | 
59 | \references{
60 | Li, Songzi (2015).
61 | "K-groups: A Generalization of K-means by Energy Distance."
62 | Ph.D. thesis, Bowling Green State University.
63 | 
64 | Li, S. and Rizzo, M. L. (2017).
65 | "K-groups: A Generalization of K-means Clustering".
66 | ArXiv e-print 1711.04359. https://arxiv.org/abs/1711.04359
67 | 
68 | Szekely, G. J., and M. L. Rizzo. "Testing for equal distributions in high dimension." InterStat 5, no. 16.10 (2004).
69 | 
70 | Rizzo, M. L., and G. J. Szekely. "Disco analysis: A nonparametric extension of analysis of variance." The Annals of Applied Statistics (2010): 1034-1055.
71 | 
72 | Hartigan, J. A. and Wong, M. A. (1979). "Algorithm AS 136: A K-means clustering algorithm." Applied Statistics, 28, 100-108. doi: 10.2307/2346830.
73 | }
74 | 
75 | \examples{
76 |   x <- as.matrix(iris[ ,1:4])
77 |   set.seed(123)
78 |   kg <- kgroups(x, k = 3, iter.max = 5, nstart = 2)
79 |   kg
80 |   fitted(kg)
81 |   
82 |   \donttest{
83 |     d <- dist(x)
84 |     set.seed(123)
85 |     kg <- kgroups(d, k = 3, iter.max = 5, nstart = 2)
86 |     kg
87 |     
88 |     kg$cluster
89 |   
90 |     fitted(kg)
91 |     fitted(kg, method = "groups")
92 |     }
93 | }
94 | 
95 | \keyword{ cluster }
96 | \keyword{ multivariate }
97 | 


--------------------------------------------------------------------------------
/R/Eeqdist.R:
--------------------------------------------------------------------------------
  1 | eqdist.e <-
  2 | function(x, sizes, distance = FALSE, method = c("original","discoB","discoF"))
  3 | {
  4 |     ## multivariate E-statistic for testing equal distributions
  5 |     ##   x:          matrix of pooled sample or distance matrix
  6 |     ##   sizes:      vector of sample sizes
  7 |     ##   distance:   logical, TRUE if x is a distance matrix, otherwise false
  8 |     ##   method:     original (default) or disco between components, or disco F ratio
  9 | 
 10 |     method <-match.arg(method)
 11 |     if (method=="discoB") {
 12 |         g <- as.factor(rep(1:length(sizes), sizes))
 13 |         RVAL <- disco(x, factors=g, distance=distance, R=0, method=method)
 14 |         } else {
 15 | 			RVAL <- eqdist.etest(x, sizes, distance = distance, R=0, method=method)$statistic
 16 | 		}
 17 | 	RVAL
 18 | }
 19 | 
 20 | eqdist.etest <-
 21 | function(x, sizes, distance = FALSE, method = c("original","discoB","discoF"), R)
 22 | {
 23 |     ## multivariate E-test of the multisample hypothesis of equal distributions
 24 |     ##   x:          matrix of pooled sample or distance matrix
 25 |     ##   sizes:      vector of sample sizes
 26 |     ##   distance:   logical, TRUE if x is a distance matrix, otherwise false
 27 |     ##   method:     original (default) or disco components
 28 |     ##   R:          number of replicates
 29 |     ##
 30 | 
 31 |     method <-match.arg(method)
 32 | 
 33 |     if (method=="discoB" || method=="discoF") {
 34 |       g <- as.factor(rep(1:length(sizes), sizes))
 35 | 	  # for other index use disco() function directly
 36 |       return(disco(x, factors=g, distance=distance, index=1.0, R=R, method=method))
 37 |       }
 38 | 
 39 |     nsamples <- length(sizes)
 40 |     if (nsamples < 2) return (NA)
 41 |     if (min(sizes) < 1) return (NA)
 42 |     if (!is.null(attr(x, "Size"))) distance <- TRUE
 43 | 
 44 |     x <- as.matrix(x)
 45 |     if (NROW(x) != sum(sizes)) stop("nrow(x) should equal sum(sizes)")
 46 |     if (distance == FALSE && nrow(x) == ncol(x))
 47 |         warning("square data matrix with distance==FALSE")
 48 |     d <- NCOL(x)
 49 |     if (distance == TRUE) d <- 0
 50 |     str <- "Multivariate "
 51 |     if (d == 1) str <- "Univariate "
 52 |     if (d == 0) str <- ""
 53 | 
 54 |     e0 <- 0.0
 55 |     repl <- rep(0, R)
 56 |     pval <- 1.0
 57 |     b <- .C("ksampleEtest",
 58 |         x = as.double(t(x)),
 59 |         byrow = as.integer(1),
 60 |         nsamples = as.integer(nsamples),
 61 |         sizes = as.integer(sizes),
 62 |         dim = as.integer(d),
 63 |         R = as.integer(R),
 64 |         e0 = as.double(e0),
 65 |         e = as.double(repl),
 66 |         pval = as.double(pval),
 67 |         PACKAGE = "energy")
 68 | 
 69 |     names(b$e0) <- "E-statistic"
 70 |     sz <- paste(sizes, collapse = " ", sep = "")
 71 |     methodname <- paste(str, length(sizes),
 72 |                   "-sample E-test of equal distributions", sep = "")
 73 |     dataname <- paste("sample sizes ", sz, ", replicates ", R, sep="")
 74 |     e <- list(
 75 | 	    call = match.call(),
 76 |         method = methodname,
 77 |         statistic = b$e0,
 78 |         p.value = b$pval,
 79 |         data.name = dataname)
 80 | 
 81 |     class(e) <- "htest"
 82 |     e
 83 | }
 84 | 
 85 | ksample.e <-
 86 | function(x, sizes, distance = FALSE, method = c("original","discoB","discoF"),
 87 |          ix = 1:sum(sizes))
 88 | {
 89 |     ## computes k-sample E-statistics for equal distributions
 90 |     ## retained for backward compatibility or use with boot
 91 |     ## (this function simply passes arguments to eqdist.e)
 92 |     ##
 93 |     ##   x:          pooled sample or distance matrix
 94 |     ##   sizes:      vector of sample sizes
 95 |     ##   distance:   TRUE if x is a distance matrix, otherwise FALSE
 96 |     ##   method:     default (original) or disco between components or disco F ratio
 97 |     ##   ix:         a permutation of row indices of x
 98 |     ##
 99 |     x <- as.matrix(x)
100 |     method <- match.arg(method)
101 |     eqdist.e(x[ix,], sizes=sizes, distance=distance, method=method)
102 | }
103 | 
104 | 


--------------------------------------------------------------------------------
/src/kgroups.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | using namespace Rcpp;
  3 | 
  4 | /*
  5 |  Author: Maria L. Rizzo
  6 |  energy package
  7 |  github.com/mariarizzo/energy
  8 |  */
  9 | 
 10 | 
 11 | int kgroups_update(NumericMatrix x, int k, IntegerVector clus,
 12 |                    IntegerVector sizes, NumericVector within, bool distance);
 13 | List kgroups_start(NumericMatrix x, int k, IntegerVector clus,
 14 |                    int iter_max, bool distance);
 15 | 
 16 | int kgroups_update(NumericMatrix x, int k, IntegerVector clus,
 17 |                       IntegerVector sizes, NumericVector w, bool distance) {
 18 |   /*
 19 |    * k-groups one pass through sample moving one point at a time
 20 |    * x: data matrix or distance
 21 |    * k: number of clusters
 22 |    * clus: clustering vector clus(i)==j ==> x_i is in cluster j
 23 |    * sizes: cluster sizes
 24 |    * within: vector of within cluster dispersions
 25 |    * distance: true if x is distance matrix
 26 |    * update clus, sizes, and withins
 27 |    * return count = number of points moved
 28 |    */
 29 | 
 30 |   int n = x.nrow(), d = x.ncol();
 31 |   int i, j, I, J, ix, nI, nJ;
 32 |   NumericVector rowdst(k), e(k);
 33 |   int best, count = 0;
 34 |   double dsum, dif;
 35 | 
 36 |   for (ix = 0; ix < n; ix++) {
 37 |     I = clus(ix);
 38 |     nI = sizes(I);
 39 |     if (nI > 1) {
 40 |       // calculate the E-distances of this point to each cluster
 41 |       rowdst.fill(0.0);
 42 |       for (i = 0; i < n; i++) {
 43 |         J = clus(i);
 44 |         if (distance == true) {
 45 |           rowdst(J) += x(ix, i);
 46 |         } else {
 47 |           dsum = 0.0;
 48 |           for (j = 0; j < d; j++) {
 49 |             dif = x(ix, j) - x(i, j);
 50 |             dsum += dif * dif;
 51 |           }
 52 |           rowdst(J) += sqrt(dsum);
 53 |         }
 54 |       }
 55 | 
 56 |       for (J = 0; J < k; J++) {
 57 |         nJ = sizes(J);
 58 |         e(J) = (2.0 / (double) nJ) * (rowdst(J) - w(J));
 59 |       }
 60 | 
 61 |       best = Rcpp::which_min(e);
 62 |       if (best != I) {
 63 |         // move this point and update
 64 |         nI = sizes(I);
 65 |         nJ = sizes(best);
 66 |         w(best) = (((double) nJ) * w(best) + rowdst(best)) / ((double) (nJ + 1));
 67 |         w(I) = (((double) nI) * w(I) - rowdst(I)) / ((double) (nI - 1));
 68 |         clus(ix) = best;
 69 |         sizes(I) = nI - 1;
 70 |         sizes(best) = nJ + 1;
 71 |         count ++;  // number of moves
 72 |         }
 73 |       }
 74 |     }
 75 | 
 76 |   return count;
 77 | }
 78 | 
 79 | 
 80 | 
 81 | // [[Rcpp::export]]
 82 | List kgroups_start(NumericMatrix x, int k, IntegerVector clus,
 83 |                    int iter_max, bool distance) {
 84 |   // k-groups clustering with initial clustering vector clus
 85 |   // up to iter_max iterations of n possible moves each
 86 |   // distance: true if x is distance matrix
 87 |     NumericVector within(k, 0.0);
 88 |   IntegerVector sizes(k, 0);
 89 |   double dif, dsum;
 90 |   int I, J, h, i, j;
 91 |   int n = x.nrow(), d = x.ncol();
 92 | 
 93 |   for (i = 0; i < n; i++) {
 94 |     I = clus(i);
 95 |     sizes(I)++;
 96 |     for (j = 0; j < i; j++) {
 97 |       J = clus(j);
 98 |       if (I == J) {
 99 |         if (distance == true) {
100 |           within(I) += x(i, j);
101 |         } else {
102 |           dsum = 0.0;
103 |           for (h = 0; h < d; h++) {
104 |             dif = x(i, h) - x(j, h);
105 |             dsum += dif * dif;
106 |           }
107 |           within(I) += sqrt(dsum);
108 |         }
109 |       }
110 |     }
111 |   }
112 |   for (I = 0; I < k; I++)
113 |     within(I) /= ((double) sizes(I));
114 | 
115 |   int it = 1, count = 1;
116 |   count = kgroups_update(x, k, clus, sizes, within, distance);
117 | 
118 |   while (it < iter_max && count > 0) {
119 |     count = kgroups_update(x, k, clus, sizes, within, distance);
120 |     it++;
121 |   }
122 |   double W = Rcpp::sum(within);
123 | 
124 |   return List::create(
125 |         _["within"] = within,
126 |         _["W"] = W,
127 |         _["sizes"] = sizes,
128 |         _["cluster"] = clus,
129 |         _["iterations"] = it,
130 |         _["count"] = count);
131 | }
132 | 
133 | 
134 | 


--------------------------------------------------------------------------------
/man/poisson.Rd:
--------------------------------------------------------------------------------
 1 | \name{Poisson Tests}
 2 | \alias{poisson.tests}
 3 | \alias{poisson.e}
 4 | \alias{poisson.etest}
 5 | \alias{poisson.m}
 6 | \alias{poisson.mtest}
 7 | \title{ Goodness-of-Fit Tests for Poisson Distribution}
 8 | \description{
 9 | Performs the mean distance goodness-of-fit test and the energy goodness-of-fit test of Poisson distribution with unknown parameter.
10 | }
11 | \usage{
12 | poisson.e(x)
13 | poisson.m(x)
14 | poisson.etest(x, R)
15 | poisson.mtest(x, R)
16 | poisson.tests(x, R, test="all")
17 | }
18 | \arguments{
19 |   \item{x}{ vector of nonnegative integers, the sample data }
20 |   \item{R}{ number of bootstrap replicates }
21 |   \item{test}{ name of test(s) }
22 | }
23 | \details{
24 | Two distance-based tests of Poissonity are applied in \code{poisson.tests}, "M" and "E". The default is to 
25 | do all tests and return results in a data frame. 
26 | Valid choices for \code{test} are "M", "E", or "all" with 
27 | default "all". 
28 | 
29 | If "all" tests, all tests are performed by a single parametric bootstrap computing all test statistics on each sample. 
30 | 
31 | The "M" choice is two tests, one based on a Cramer-von Mises  distance and the other an Anderson-Darling distance. The "E" choice is the energy goodness-of-fit test. 
32 | 
33 | \code{R} must be a positive integer for a test. If \code{R} is missing or 0, a warning is printed but test statistics are computed (without testing). 
34 | 
35 | The mean distance test of Poissonity (M-test) is based on the result that the sequence
36 |  of expected values E|X-j|, j=0,1,2,... characterizes the distribution of
37 |  the random  variable X. As an application of this characterization one can
38 |  get an estimator \eqn{\hat F(j)} of the CDF. The test statistic
39 |  (see \code{\link{poisson.m}}) is a Cramer-von Mises type of distance, with
40 |  M-estimates replacing the usual EDF estimates of the CDF:
41 |   \deqn{M_n = n\sum_{j=0}^\infty (\hat F(j) - F(j\;; \hat \lambda))^2
42 |  f(j\;; \hat \lambda).}{M_n = n sum [j>=0] (\hat F(j) - F(j; \hat \lambda))^2
43 |  f(j; \hat \lambda).} 
44 |  
45 | In \code{poisson.tests}, an Anderson-Darling type of weight is also applied when \code{test="M"} or \code{test="all"}.
46 |  
47 | The tests are implemented by parametric bootstrap with
48 |  \code{R} replicates.
49 |  
50 | An energy goodness-of-fit test (E) is based on the test statistic
51 | \deqn{Q_n = n (\frac{2}{n} \sum_{i=1}^n E|x_i - X| - E|X-X'| - \frac{1}{n^2} \sum_{i,j=1}^n |x_i - x_j|,
52 | }{Q_n = n((2/n) sum[1:n] E|x_i-X| - E|X-X'| - (1/n^2) sum[1:n,1:n]
53 |  |x_i-x_j|),}
54 | where X and X' are iid with the hypothesized null distribution. For a test of H: X ~ Poisson(\eqn{\lambda}), we can express E|X-X'| in terms of Bessel functions, and E|x_i - X| in terms of the CDF of Poisson(\eqn{\lambda}).
55 | 
56 | If test=="all" or not specified, all tests are run with a single parametric bootstrap. \code{poisson.mtest} implements only the Poisson M-test with Cramer-von Mises type distance. \code{poisson.etest} implements only the Poisson energy test.
57 | }
58 | \value{
59 | The functions \code{poisson.m} and \code{poisson.e} return the test statistics. The function
60 | \code{poisson.mtest} or \code{poisson.etest} return an \code{htest} object containing
61 |  \item{method}{Description of test}
62 |  \item{statistic}{observed value of the test statistic}
63 |  \item{p.value}{approximate p-value of the test}
64 |  \item{data.name}{replicates R}
65 |  \item{estimate}{sample mean}
66 |  
67 | \code{poisson.tests} returns "M-CvM test", "M-AD test" and "Energy test" results in a data frame with columns 
68 |  \item{estimate}{sample mean}
69 |  \item{statistic}{observed value of the test statistic}
70 |  \item{p.value}{approximate p-value of the test}
71 |  \item{method}{Description of test}
72 | which can be coerced to a \code{tibble}.
73 | }
74 | \note{The running time of the M test is much faster than the E-test.}
75 | \references{
76 | Szekely, G. J. and Rizzo, M. L. (2004) Mean Distance Test of Poisson Distribution, \emph{Statistics and Probability Letters},
77 | 67/3, 241-247. \doi{10.1016/j.spl.2004.01.005}.
78 | 
79 | Szekely, G. J. and Rizzo, M. L. (2005) A New Test for
80 |  Multivariate Normality, \emph{Journal of Multivariate Analysis},
81 |  93/1, 58-80,
82 |  \doi{10.1016/j.jmva.2003.12.002}.
83 | }
84 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
85 | Gabor J. Szekely
86 | }
87 | \examples{
88 |  x <- rpois(50, 2)
89 |  poisson.m(x)
90 |  poisson.e(x)
91 |  \donttest{
92 |  poisson.etest(x, R=199)
93 |  poisson.mtest(x, R=199)
94 |  poisson.tests(x, R=199)
95 |  }
96 | }
97 | \keyword{ htest }
98 | \keyword{ energy }
99 | 


--------------------------------------------------------------------------------
/man/indep-deprecated.Rd:
--------------------------------------------------------------------------------
  1 | \name{indep.test}
  2 | \alias{indep.test}
  3 | \title{ Energy-tests of Independence}
  4 | \description{
  5 |  Computes a multivariate nonparametric test of independence.
  6 |  The default method implements the distance covariance test
  7 |  \code{\link{dcov.test}}.
  8 |  }
  9 | \usage{
 10 | indep.test(x, y, method = c("dcov","mvI"), index = 1, R)
 11 | }
 12 | \arguments{
 13 |   \item{x}{ matrix: first sample, observations in rows}
 14 |   \item{y}{ matrix: second sample, observations in rows}
 15 |   \item{method}{ a character string giving the name of the test}
 16 |   \item{index}{ exponent on Euclidean distances}
 17 |   \item{R}{ number of replicates}
 18 | }
 19 | \details{
 20 |  \code{indep.test} with the default \code{method = "dcov"} computes
 21 |  the distance
 22 |  covariance test of independence. \code{index} is an exponent on
 23 |  the Euclidean distances. Valid choices for \code{index} are in (0,2],
 24 |  with default value 1 (Euclidean distance). The arguments are passed
 25 |  to the \code{dcov.test} function. See the help topic \code{\link{dcov.test}} for
 26 |  the description and documentation and also see the references below.
 27 | 
 28 |  \code{indep.test} with \code{method = "mvI"}
 29 |  computes the coefficient \eqn{\mathcal I_n}{I_n} and performs a nonparametric
 30 |  \eqn{\mathcal E}{E}-test of independence. The arguments are passed to
 31 |  \code{mvI.test}. The
 32 |  \code{index} argument is ignored (\code{index = 1} is applied).
 33 |  See the help topic \code{\link{mvI.test}} and also
 34 |  see the reference (2006) below for details.
 35 | 
 36 |  The test decision is obtained via
 37 |  bootstrap, with \code{R} replicates.
 38 |  The sample sizes (number of rows) of the two samples must agree, and
 39 |  samples must not contain missing values.
 40 | 
 41 |  These energy tests of independence are based on related theoretical
 42 |  results, but different test statistics.
 43 |  The \code{dcov} method is faster than \code{mvI} method by
 44 |  approximately a factor of O(n).
 45 | }
 46 | \value{
 47 |  \code{indep.test} returns a list with class
 48 |  \code{htest} containing
 49 |  \item{     method}{description of test}
 50 |  \item{  statistic}{observed value of the
 51 |  test statistic \eqn{n \mathcal V_n^2}{n V_n^2}
 52 |  or \eqn{n \mathcal I_n^2}{n I_n^2}}
 53 |     \item{  estimate}{ \eqn{\mathcal V_n}{V_n} or \eqn{\mathcal I_n}{I_n}}
 54 |     \item{  estimates}{ a vector [dCov(x,y), dCor(x,y), dVar(x), dVar(y)]
 55 |     (method dcov)}
 56 |    \item{ replicates}{ replicates of the test statistic}
 57 |    \item{    p.value}{approximate p-value of the test}
 58 |  \item{  data.name}{description of data}
 59 |  }
 60 | \note{As of energy-1.1-0,
 61 | \code{indep.etest} is deprecated and replaced by \code{indep.test}, which
 62 | has methods for two different energy tests of independence.  \code{indep.test} applies
 63 | the distance covariance test (see \code{dcov.test}) by default (\code{method = "dcov"}).
 64 | The original \code{indep.etest} applied the independence coefficient
 65 | \eqn{\mathcal I_n}{I_n}, which is now obtained by \code{method = "mvI"}.
 66 |  }
 67 |  \seealso{
 68 |  \code{ \link{dcov.test} }
 69 |  \code{ \link{mvI.test} }
 70 |  \code{ \link{dcov} }
 71 |  \code{ \link{mvI} }
 72 |  }
 73 | \references{
 74 |   Szekely, G.J. and Rizzo, M.L. (2009),
 75 |   Brownian Distance Covariance,
 76 |  \emph{Annals of Applied Statistics}, Vol. 3 No. 4, pp.
 77 |  1236-1265. (Also see discussion and rejoinder.)
 78 |  \cr \doi{10.1214/09-AOAS312}
 79 | 
 80 |   Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007),
 81 |   Measuring and Testing Dependence by Correlation of Distances,
 82 |  \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794.
 83 |  \cr \doi{10.1214/009053607000000505}
 84 | 
 85 |  Bakirov, N.K., Rizzo, M.L., and Szekely, G.J. (2006), A Multivariate
 86 |  Nonparametric Test of Independence, \emph{Journal of Multivariate Analysis}
 87 |  93/1, 58-80, \cr
 88 |  \doi{10.1016/j.jmva.2005.10.005}
 89 |  }
 90 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
 91 | Gabor J. Szekely
 92 | }
 93 | \examples{
 94 | \donttest{
 95 |  ## independent multivariate data
 96 |  x <- matrix(rnorm(60), nrow=20, ncol=3)
 97 |  y <- matrix(rnorm(40), nrow=20, ncol=2)
 98 |  indep.test(x, y, method = "dcov", R = 99)
 99 |  indep.test(x, y, method = "mvI", R = 99)
100 | 
101 |  ## dependent multivariate data
102 |  if (require(MASS)) {
103 |    Sigma <- matrix(c(1, .1, 0, 0 , 1, 0, 0 ,.1, 1), 3, 3)
104 |    x <- mvrnorm(30, c(0, 0, 0), diag(3))
105 |    y <- mvrnorm(30, c(0, 0, 0), Sigma) * x
106 |    indep.test(x, y, R = 99)    #dcov method
107 |    indep.test(x, y, method = "mvI", R = 99)
108 |     }
109 |   }
110 | }
111 | 
112 | \keyword{ htest }
113 | \keyword{ multivariate }
114 | \keyword{ nonparametric }
115 | \concept{ independence }
116 | \concept{ energy statistics }
117 | 
118 | 


--------------------------------------------------------------------------------
/R/dcov.R:
--------------------------------------------------------------------------------
  1 | dcov.test <-
  2 | function(x, y, index=1.0, R=NULL) {
  3 |     ## check for valid number of replicates R
  4 |     method <- "Specify the number of replicates R (R > 0) for an independence test"
  5 |     if (! is.null(R)) {
  6 |       R <- floor(R)
  7 |       if (R < 1) R <- 0
  8 |       if (R > 0) 
  9 |         method <- "dCov independence test (permutation test)"
 10 |     } else {
 11 |       R <- 0
 12 |     }
 13 | 
 14 |     Dx <- .arg2dist.matrix(x)
 15 |     Dy <- .arg2dist.matrix(y)
 16 |     if (!isTRUE(all.equal(index, 1.0))) {
 17 |       Dx <- Dx^index
 18 |       Dy <- Dy^index
 19 |     }
 20 |     
 21 |     n <- nrow(Dx)
 22 |     m <- nrow(Dy)
 23 |     if (n != m) stop("Sample sizes must agree")
 24 | 
 25 |     stat <- dcorr <- reps <- 0
 26 |     dcov <- rep(0, 4)
 27 |     if (R > 0) reps <- rep(0, R)
 28 |     pval <- 1
 29 |     dims <- c(n, ncol(Dx), ncol(Dy), R)
 30 | 
 31 |     # dcov = [dCov,dCor,dVar(x),dVar(y)]
 32 |     a <- .C("dCOVtest",
 33 |             x = as.double(t(Dx)),
 34 |             y = as.double(t(Dy)),
 35 |             nrow = as.integer(nrow(Dx)),
 36 |             nreps = as.integer(R),
 37 |             reps = as.double(reps),
 38 |             DCOV = as.double(dcov),
 39 |             pval = as.double(pval),
 40 |             PACKAGE = "energy")
 41 |     # test statistic is n times the square of dCov statistic
 42 |     stat <- n * a$DCOV[1]^2
 43 |     dcorr <- a$DCOV
 44 |     V <- dcorr[[1]]
 45 |     names(stat) <- "nV^2"
 46 |     names(V) <- "dCov"
 47 |     dataname <- paste("index ", index, ", replicates ", R, sep="")
 48 |     pval <- ifelse (R < 1, NA, a$pval)
 49 |     e <- list(
 50 |         statistic = stat,
 51 |         method = method,
 52 |         estimate = V,
 53 |         estimates = dcorr,
 54 |         p.value = pval,
 55 |         replicates = n* a$reps^2,
 56 |         n = n,
 57 |         data.name = dataname)
 58 |     class(e) <- "htest"
 59 |     return(e)
 60 | }
 61 | 
 62 | dcor.test <-
 63 |   function(x, y, index=1.0, R) {
 64 |     # distance correlation test for multivariate independence
 65 |     # like dcov.test but using dcor as the test statistic
 66 |     if (missing(R)) R <- 0
 67 |     R <- ifelse(R > 0, floor(R), 0)
 68 |     RESULT <- dcov.test(x, y, index=index, R) 
 69 |     # this test statistic is n times the square of dCov statistic
 70 |     DCOVteststat <- RESULT$statistic
 71 |     DCOVreplicates <- RESULT$replicates
 72 |     
 73 |     # RESULT$estimates = [dCov,dCor,dVar(x),dVar(y)]
 74 |     # dVar are invariant under permutation of sample indices
 75 |     estimates = RESULT$estimates
 76 |     names(estimates) <- c("dCov", "dCor", "dVar(X)", "dVar(Y)")
 77 |     
 78 |     DCORteststat <- RESULT$estimates[2]
 79 |     dvarX <- RESULT$estimates[3]
 80 |     dvarY <- RESULT$estimates[4]
 81 |     n <- RESULT$n
 82 |     
 83 |     if (R > 0) {
 84 |       DCORreps <- sqrt(DCOVreplicates / n) / sqrt(dvarX * dvarY)
 85 |       p.value <- (1 + sum(DCORreps >= DCORteststat)) / (1 + R)
 86 |     } else {
 87 |       p.value <- NA
 88 |       DCORreps <- NA
 89 |     }
 90 |     
 91 |     names(DCORteststat) <- "dCor"
 92 |     dataname <- paste("index ", index, ", replicates ", R, sep="")
 93 |     method <- ifelse(R > 0, "dCor independence test (permutation test)", 
 94 |                      "Specify the number of replicates R>0 for an independence test")
 95 |     e <- list(
 96 |       method = method,
 97 |       statistic = DCORteststat,
 98 |       estimates = estimates,
 99 |       p.value = p.value,
100 |       replicates = DCORreps,
101 |       n = n,
102 |       data.name = dataname)
103 |     class(e) <- "htest"
104 |     return(e)
105 |   }
106 | 
107 | 
108 | .dcov <-
109 | function(x, y, index=1.0) {
110 |     # distance covariance statistic for independence
111 |     # dcov = [dCov,dCor,dVar(x),dVar(y)]   (vector)
112 |     # this function provides the fast method for computing dCov
113 |     # it is called by the dcov and dcor functions
114 |   
115 |   Dx <- .arg2dist.matrix(x)
116 |   Dy <- .arg2dist.matrix(y)
117 |   if (!isTRUE(all.equal(index, 1.0))) {
118 |     Dx <- Dx^index
119 |     Dy <- Dy^index
120 |   }
121 |   
122 |   n <- nrow(Dx)
123 |   m <- nrow(Dy)
124 |   if (n != m) stop("Sample sizes must agree")
125 |   dims <- c(n, ncol(Dx), ncol(Dy))
126 |     idx <- 1:dims[1]
127 |     DCOV <- numeric(4)
128 |     a <- .C("dCOV",
129 |             x = as.double(t(Dx)),
130 |             y = as.double(t(Dy)),
131 |             nrow = as.integer(n),
132 |             DCOV = as.double(DCOV),
133 |             PACKAGE = "energy")
134 |     return(a$DCOV)
135 | }
136 | 
137 | dcov <-
138 | function(x, y, index=1.0) {
139 |     # distance correlation statistic for independence
140 |     return(.dcov(x, y, index)[1])
141 | }
142 | 
143 | dcor <-
144 | function(x, y, index=1.0) {
145 |     # distance correlation statistic for independence
146 |     return(.dcov(x, y, index)[2])
147 | }
148 | 


--------------------------------------------------------------------------------
/R/Emvnorm.R:
--------------------------------------------------------------------------------
  1 | mvnorm.test <- function(x, R) {
  2 |   # parametric bootstrap E-test for multivariate normality
  3 |   if (missing(R)) {
  4 |     method = "Energy test of multivariate normality: (Specify R > 0 for MC test)"
  5 |     R <- 0
  6 |   } else {
  7 |     method = "Energy test of multivariate normality: estimated parameters"
  8 |   }
  9 |   
 10 |   if (is.vector(x) || NCOL(x)==1) {
 11 |     n <- NROW(x)
 12 |     d <- 1
 13 |     bootobj <- boot::boot(x, statistic = normal.e, R = R, sim = "parametric",
 14 |                           ran.gen = function(x, y) {
 15 |                             return(rnorm(n))
 16 |                           })
 17 |   } else {
 18 |     n <- nrow(x)
 19 |     d <- ncol(x)
 20 |     bootobj <- boot::boot(x, statistic = mvnorm.e, R = R, sim = "parametric",
 21 |                           ran.gen = function(x, y) {
 22 |                             return(matrix(rnorm(n * d), nrow = n, ncol = d))
 23 |                           })
 24 |   }
 25 |   if (R > 0)
 26 |     p <- 1 - mean(bootobj$t < bootobj$t0) else p <- NA
 27 |     
 28 |     names(bootobj$t0) <- "E-statistic"
 29 |     e <- list(statistic = bootobj$t0, p.value = p,
 30 |               method = method,
 31 |               data.name = paste("x, sample size ", n, ", dimension ", d, ", replicates ",
 32 |                                 R, sep = ""))
 33 |     class(e) <- "htest"
 34 |     e
 35 | }
 36 | 
 37 | mvnorm.etest <- function(x, R) {
 38 |   return(mvnorm.test(x, R))
 39 | }
 40 | 
 41 | mvnorm.e <- function(x) {
 42 |   # E-statistic for multivariate normality
 43 |   if (is.vector(x) || NCOL(x)==1)
 44 |     return(normal.e(x))
 45 |   n <- nrow(x)
 46 |   d <- ncol(x)
 47 |   if (n < 2) {
 48 |     warning("sample size must be at least 2")
 49 |     return(NA)
 50 |   }
 51 |   # subtract column means and compute S^(-1/2)
 52 |   z <- scale(x, scale = FALSE)
 53 |   ev <- eigen(var(x), symmetric = TRUE)
 54 |   P <- ev$vectors
 55 |   lambda <- ev$values
 56 |   D <- diag(d)
 57 |   diag(D) <- 1 / sqrt(lambda)
 58 |   y <- z %*% (P %*% D %*% t(P))
 59 |   if (any(!is.finite(y))) {
 60 |     warning("missing or non-finite y")
 61 |     return(NA)
 62 |   }
 63 |   
 64 |   if (requireNamespace("gsl", quietly=TRUE)) {
 65 |     const <- exp(lgamma((d+1)/2) - lgamma(d/2))
 66 |     mean2 <- 2*const
 67 |     ysq <- rowSums(y^2)
 68 |     mean1 <- sqrt(2) * const * 
 69 |       mean(gsl::hyperg_1F1(-1/2, d/2, -ysq/2))
 70 |     mean3 <- 2*sum(dist(y)) / n^2
 71 |     return(n * (2*mean1 - mean2 - mean3))  
 72 |   } else {
 73 |     warning("package gsl required but not found")
 74 |     return (NA)
 75 |   }
 76 | }
 77 | 
 78 | normal.e <- function(x) {
 79 |   ## Case 4: unknown parameters
 80 |   x <- as.vector(x)
 81 |   n <- length(x)
 82 |   s <- sd(x)
 83 |   if (!is.finite(s) || !(s > 0)) {
 84 |     warning("sd(x)>0 required")
 85 |     return(NA)
 86 |   }
 87 |   y <- (x - mean(x)) / sd(x)
 88 |   y <- sort(y)
 89 |   K <- seq(1 - n, n - 1, 2)
 90 |   return(2 * (sum(2 * y * pnorm(y) + 2 * dnorm(y)) -
 91 |                 n/sqrt(pi) - mean(K * y)))
 92 | }
 93 | 
 94 | normal.test <- function(x, method=c("mc", "limit"), R) {
 95 |   ## implements the test for for d=1 
 96 |   ## Case 4: composite hypothesis
 97 |   method <- match.arg(method)
 98 |   estimate <- c(mean(x), sd(x))
 99 |   names(estimate) <- c("mean", "sd")
100 |   
101 |   if (method == "mc") {
102 |     ## Monte Carlo approach
103 |     if (missing(R)) R <- 0
104 |     e <- energy::mvnorm.etest(x, R=R)
105 |     e$method <- "Energy test of normality"
106 |     e$method <- ifelse(R > 0,
107 |                        paste0(e$method,": estimated parameters"),
108 |                        paste0(e$method, "  (Specify R > 0 for MC test)"))
109 |     e$estimate <- estimate
110 |     return(e)
111 |   }
112 |   
113 |   ## implement test using asymptotic distribution for p-value
114 |   if (!is.numeric(x) || (!is.vector(x) && NCOL(x) > 1)) {
115 |     warning("x must be a numeric vector")
116 |     return (NA)
117 |   } else {
118 |     x <- as.vector(x, mode="numeric")
119 |   }
120 |   
121 |   n <- length(x)
122 |   t0 <- normal.e(x)
123 |   names(t0) <- "statistic"
124 |   
125 |   ## load pre-computed eigenvalues
126 |   ev <- energy::EVnormal[, "Case4"]
127 |   
128 |   if (requireNamespace("CompQuadForm", quietly=TRUE)) {
129 |     p <- CompQuadForm::imhof(t0, ev)$Qq
130 |   } else {
131 |     warning("limit distribution method requires CompQuadForm package for p-value")
132 |     p <- NA
133 |   }
134 |   estimate <- c(mean(x), sd(x))
135 |   names(estimate) <- c("mean", "sd")
136 |   e <- list(statistic = t0, p.value = p,
137 |             method = paste("Energy test of normality: limit distribution"),
138 |             estimate = estimate,
139 |             data.name = "Case 4: composite hypothesis, estimated parameters")
140 |   class(e) <- "htest"
141 |   e
142 | }
143 | 


--------------------------------------------------------------------------------
/man/edist.Rd:
--------------------------------------------------------------------------------
  1 | \name{edist}
  2 | \alias{edist}
  3 | \title{E-distance}
  4 | \description{
  5 |  Returns the E-distances (energy statistics) between clusters.
  6 | }
  7 | \usage{
  8 |  edist(x, sizes, distance = FALSE, ix = 1:sum(sizes), alpha = 1,
  9 |         method = c("cluster","discoB"))
 10 | }
 11 | \arguments{
 12 |   \item{x}{ data matrix of pooled sample or Euclidean distances}
 13 |   \item{sizes}{ vector of sample sizes}
 14 |   \item{distance}{ logical: if TRUE, x is a distance matrix}
 15 |   \item{ix}{ a permutation of the row indices of x }
 16 |   \item{alpha}{ distance exponent in (0,2]}
 17 |   \item{method}{ how to weight the statistics }
 18 | }
 19 | \details{
 20 |   A vector containing the pairwise two-sample multivariate
 21 |   \eqn{\mathcal{E}}{E}-statistics for comparing clusters or samples is returned.
 22 |   The e-distance between clusters is computed from the original pooled data,
 23 |   stacked in matrix \code{x} where each row is a multivariate observation, or
 24 |   from the distance matrix \code{x} of the original data, or distance object
 25 |   returned by \code{dist}. The first \code{sizes[1]} rows of the original data
 26 |   matrix are the first sample, the next \code{sizes[2]} rows are the second
 27 |   sample, etc. The permutation vector \code{ix} may be used to obtain
 28 |   e-distances corresponding to a clustering solution at a given level in
 29 |   the hierarchy.
 30 | 
 31 |   The default method \code{cluster} summarizes the e-distances between
 32 |   clusters in a table.
 33 |   The e-distance between two clusters \eqn{C_i, C_j}
 34 |   of size \eqn{n_i, n_j}
 35 |   proposed by Szekely and Rizzo (2005)
 36 |   is the e-distance \eqn{e(C_i,C_j)}, defined by
 37 |   \deqn{e(C_i,C_j)=\frac{n_i n_j}{n_i+n_j}[2M_{ij}-M_{ii}-M_{jj}],
 38 |   }{e(S_i, S_j) = (n_i n_j)/(n_i+n_j)[2M_(ij)-M_(ii)-M_(jj)],}
 39 |   where
 40 |   \deqn{M_{ij}=\frac{1}{n_i n_j}\sum_{p=1}^{n_i} \sum_{q=1}^{n_j}
 41 |      \|X_{ip}-X_{jq}\|^\alpha,}{
 42 |      M_{ij} = 1/(n_i n_j) sum[1:n_i, 1:n_j] ||X_(ip) - X_(jq)||^a,}
 43 |      \eqn{\|\cdot\|}{|| ||} denotes Euclidean norm, \eqn{\alpha=}{a=}
 44 |      \code{alpha}, and \eqn{X_{ip}}{
 45 |      X_(ip)} denotes the p-th observation in the i-th cluster.  The
 46 |      exponent \code{alpha} should be in the interval (0,2].
 47 | 
 48 |   The coefficient \eqn{\frac{n_i n_j}{n_i+n_j}}{(n_i n_j)(n_i+n_j)}
 49 |   is one-half of the harmonic mean of the sample sizes. The
 50 |   \code{discoB} method is related but with
 51 |   different ways of summarizing the pairwise differences between samples.
 52 |   The \code{disco} methods apply the coefficient
 53 |   \eqn{\frac{n_i n_j}{2N}}{(n_i n_j)/(2N)} where N is the total number
 54 |   of observations. This weights each (i,j) statistic by sample size
 55 |   relative to N. See the \code{disco} topic for more details.
 56 | }
 57 | \value{
 58 |  A object of class \code{dist} containing the lower triangle of the
 59 |  e-distance matrix of cluster distances corresponding to the permutation
 60 |  of indices \code{ix} is returned. The \code{method} attribute of the
 61 |  distance object is assigned a value of type, index.
 62 | }
 63 | \references{
 64 |  Szekely, G. J. and Rizzo, M. L. (2005) Hierarchical Clustering
 65 |  via Joint Between-Within Distances: Extending Ward's Minimum
 66 |  Variance Method, \emph{Journal of Classification} 22(2) 151-183.
 67 |  \cr \doi{10.1007/s00357-005-0012-9}
 68 | 
 69 | M. L. Rizzo and G. J. Szekely (2010).
 70 | DISCO Analysis: A Nonparametric Extension of
 71 | Analysis of Variance, Annals of Applied Statistics,
 72 | Vol. 4, No. 2, 1034-1055.
 73 |  \cr \doi{10.1214/09-AOAS245}
 74 | 
 75 |  Szekely, G. J. and Rizzo, M. L. (2004) Testing for Equal
 76 |  Distributions in High Dimension, InterStat, November (5).
 77 | 
 78 |  Szekely, G. J. (2000) Technical Report 03-05,
 79 |  \eqn{\mathcal{E}}{E}-statistics: Energy of
 80 |  Statistical Samples, Department of Mathematics and Statistics,
 81 |  Bowling Green State University.
 82 | }
 83 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
 84 | Gabor J. Szekely
 85 | }
 86 | \seealso{
 87 |  \code{\link{energy.hclust}}
 88 |  \code{\link{eqdist.etest}}
 89 |  \code{\link{ksample.e}}
 90 |  \code{\link{disco}}
 91 |  }
 92 | \examples{
 93 |      ## compute cluster e-distances for 3 samples of iris data
 94 |      data(iris)
 95 |      edist(iris[,1:4], c(50,50,50))
 96 |     
 97 |      ## pairwise disco statistics
 98 |      edist(iris[,1:4], c(50,50,50), method="discoB")  
 99 | 
100 |      ## compute e-distances from a distance object
101 |      data(iris)
102 |      edist(dist(iris[,1:4]), c(50, 50, 50), distance=TRUE, alpha = 1)
103 | 
104 |      ## compute e-distances from a distance matrix
105 |      data(iris)
106 |      d <- as.matrix(dist(iris[,1:4]))
107 |      edist(d, c(50, 50, 50), distance=TRUE, alpha = 1)
108 | 
109 |  }
110 | \keyword{ multivariate }
111 | \keyword{ cluster }
112 | \keyword{ nonparametric }
113 | \concept{ energy statistics }
114 | 


--------------------------------------------------------------------------------
/docs/404.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en">
  3 | <head>
  4 | <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  5 | <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | <title>Page not found (404) • energy</title>
  9 | <!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.7.1/jquery.min.js" integrity="sha512-v2CJ7UaYy4JwqLDIrZUI/4hqeoQieOmAZNXBeQyjo21dadnwR+8ZaIJVT8EE2iyI61OV8e6M8PP2/4hpQINQ/g==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous">
 10 | <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css">
 11 | <script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
 12 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
 13 | <!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet">
 14 | <script src="pkgdown.js"></script><meta property="og:title" content="Page not found (404)">
 15 | <!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 16 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 17 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 18 | <![endif]-->
 19 | </head>
 20 | <body data-spy="scroll" data-target="#toc">
 21 | 
 22 | 
 23 |     <div class="container template-title-body">
 24 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 25 |   <div class="container">
 26 |     <div class="navbar-header">
 27 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 28 |         <span class="sr-only">Toggle navigation</span>
 29 |         <span class="icon-bar"></span>
 30 |         <span class="icon-bar"></span>
 31 |         <span class="icon-bar"></span>
 32 |       </button>
 33 |       <span class="navbar-brand">
 34 |         <a class="navbar-link" href="index.html">energy</a>
 35 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.7-12</span>
 36 |       </span>
 37 |     </div>
 38 | 
 39 |     <div id="navbar" class="navbar-collapse collapse">
 40 |       <ul class="nav navbar-nav">
 41 | <li>
 42 |   <a href="reference/index.html">Reference</a>
 43 | </li>
 44 | <li>
 45 |   <a href="news/index.html">Changelog</a>
 46 | </li>
 47 |       </ul>
 48 | <ul class="nav navbar-nav navbar-right">
 49 | <li>
 50 |   <a href="https://github.com/mariarizzo/energy/" class="external-link">
 51 |     <span class="fab fa-github fa-lg"></span>
 52 | 
 53 |   </a>
 54 | </li>
 55 |       </ul>
 56 | </div>
 57 | <!--/.nav-collapse -->
 58 |   </div>
 59 | <!--/.container -->
 60 | </div>
 61 | <!--/.navbar -->
 62 | 
 63 | 
 64 | 
 65 |       </header><div class="row">
 66 |   <div class="contents col-md-9">
 67 |     <div class="page-header">
 68 |       <h1>Page not found (404)</h1>
 69 |     </div>
 70 | 
 71 | Content not found. Please use links in the navbar.
 72 | 
 73 |   </div>
 74 | 
 75 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
 76 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
 77 |     </nav>
 78 | </div>
 79 | 
 80 | </div>
 81 | 
 82 | 
 83 | 
 84 |       <footer><div class="copyright">
 85 |   <p></p>
 86 | <p>Developed by Maria Rizzo, Gabor Szekely.</p>
 87 | </div>
 88 | 
 89 | <div class="pkgdown">
 90 |   <p></p>
 91 | <p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.1.0.</p>
 92 | </div>
 93 | 
 94 |       </footer>
 95 | </div>
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 |   </body>
103 | </html>
104 | 


--------------------------------------------------------------------------------
/man/mvI.test.Rd:
--------------------------------------------------------------------------------
  1 | \name{mvI.test}
  2 | \alias{mvI.test}
  3 | \alias{mvI}
  4 | \title{ Independence Coefficient and Test}
  5 | \description{
  6 |  Computes a type of multivariate nonparametric E-statistic and test of independence
  7 |  based on independence coefficient \eqn{\mathcal I_n}{I_n}. This coefficient pre-dates and is different from distance covariance or distance correlation.}
  8 | \usage{
  9 |     mvI.test(x, y, R)
 10 |     mvI(x, y)
 11 | }
 12 | \arguments{
 13 |   \item{x}{ matrix: first sample, observations in rows}
 14 |   \item{y}{ matrix: second sample, observations in rows}
 15 |   \item{R}{ number of replicates}
 16 | }
 17 | \details{
 18 |  \code{mvI} computes the coefficient \eqn{\mathcal I_n}{I_n} and \code{mvI.test} performs a nonparametric test of independence. The test decision is obtained via permutation 
 19 |  bootstrap, with \code{R} replicates.
 20 |  The sample sizes (number of rows) of the two samples must agree, and
 21 |  samples must not contain missing values. 
 22 | 
 23 | Historically this is the first energy test of independence. The
 24 |  distance covariance test  \code{\link{dcov.test}}, distance correlation \code{\link{dcor}}, and related methods are more recent (2007, 2009).
 25 |  
 26 | The distance covariance test \code{\link{dcov.test}} and distance correlation test \code{\link{dcor.test}} are much faster and have  different properties than \code{mvI.test}. All are based on a population independence coefficient that characterizes independence and all of these tests are statistically consistent. However, dCor is scale invariant while \eqn{\mathcal I_n}{I_n} is not. In applications \code{\link{dcor.test}} or \code{\link{dcov.test}} are the recommended tests. 
 27 | 
 28 | Computing formula from Bakirov, Rizzo, and Szekely (2006), equation (2): 
 29 | 
 30 | Suppose the two samples are \eqn{X_1,\dots,X_n \in R^p} and \eqn{Y_1,\dots,Y_n \in R^q}. Define \eqn{Z_{kl} = (X_k, Y_l) \in R^{p+q}.} 
 31 | 
 32 | The independence coefficient \eqn{\mathcal I_n}{I_n} is defined
 33 | \deqn{
 34 | \mathcal I_n = \sqrt{\frac{2\bar z - z_d - z}{x + y - z}}, 
 35 | }
 36 | where
 37 | \deqn{z_d= \frac{1}{n^2} \sum_{k,l=1}^n |Z_{kk}-Z_{ll}|_{p+q},}
 38 | \deqn{z= \frac{1}{n^4} \sum_{k,l=1}^n \sum_{i,j=1}^n |Z_{kl}-Z_{ij}|_{p+q},}
 39 | \deqn{\bar z= \frac{1}{n^3} \sum_{k=1}^n \sum_{i,j=1}^n |Z_{kk}-Z_{ij}|_{p+q},}
 40 | \deqn{x= \frac{1}{n^2} \sum_{k,l=1}^n |X_{k}-X_{l}|_p,}
 41 | \deqn{y= \frac{1}{n^2} \sum_{k,l=1}^n |Y_{k}-Y_{l}|_q.}
 42 | 
 43 | Some properties:
 44 | \itemize{
 45 | \item 
 46 | \eqn{0 \leq \mathcal I_n \leq 1} (Theorem 1).
 47 | \item 
 48 | Large values of \eqn{n \mathcal I_n^2} (or \eqn{\mathcal I_n})  support the alternative hypothesis that the sampled random variables are dependent.
 49 | \item \eqn{\mathcal I_n} is invariant to shifts and orthogonal transformations of X and Y.  
 50 | \item \eqn{\sqrt{n} \, \mathcal I_n} determines a statistically consistent test of independence against all fixed dependent alternatives (Corollary 1).
 51 | \item The population independence coefficient \eqn{\mathcal I} is a normalized distance between the joint characteristic function and the product of the marginal characteristic functions. \eqn{\mathcal I_n} converges almost surely to \eqn{\mathcal I} as \eqn{n \to \infty}. X and Y are independent if and only if \eqn{\mathcal I(X, Y) = 0}.
 52 | See the 2006 reference below for more details. 
 53 | }}
 54 | \value{
 55 | \code{mvI} returns the statistic. \code{mvI.test} returns
 56 |  a list with class
 57 |  \code{htest} containing
 58 |  \item{     method}{ description of test}
 59 |  \item{  statistic}{ observed value of the test statistic \eqn{n\mathcal I_n^2}{n I_n^2}}
 60 |  \item{   estimate}{ \eqn{\mathcal I_n}{I_n}}
 61 |  \item{ replicates}{ permutation replicates}
 62 |  \item{    p.value}{ p-value of the test}
 63 |  \item{  data.name}{ description of data}
 64 | }
 65 | \references{
 66 |  Bakirov, N.K., Rizzo, M.L., and Szekely, G.J. (2006), A Multivariate
 67 |  Nonparametric Test of Independence, \emph{Journal of Multivariate Analysis} 93/1, 58-80.
 68 |  
 69 |  Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007),
 70 |  Measuring and Testing Dependence by Correlation of Distances,
 71 |  \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794.
 72 | 
 73 |  Szekely, G.J. and Rizzo, M.L. (2009),
 74 |  Brownian Distance Covariance,
 75 |  \emph{Annals of Applied Statistics},
 76 |  Vol. 3, No. 4, 1236-1265.
 77 |  }
 78 | \note{
 79 | On scale invariance: Distance correlation (\code{\link{dcor}}) has the property that if we change the scale of X from e.g., meters to kilometers, and the scale of Y from e.g. grams to ounces, the statistic and the test are not changed. \eqn{\mathcal I_n}{I_n} does not have this property; it is invariant only under a common rescaling of X and Y by the same constant. Thus, if the units of measurement change for either or both variables, dCor is invariant, but \eqn{\mathcal I_n}{I_n} and possibly the \code{mvI.test}  decision changes.   
 80 | }
 81 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
 82 | Gabor J. Szekely
 83 | }
 84 | \examples{
 85 | mvI(iris[1:25, 1], iris[1:25, 2])
 86 | \donttest{
 87 | mvI.test(iris[1:25, 1], iris[1:25, 2], R=99)
 88 | }
 89 | }
 90 | \seealso{
 91 |  \code{ \link{dcov.test} }
 92 |  \code{ \link{dcov} }
 93 |  \code{ \link{dcor.test} }
 94 |  \code{ \link{dcor} }
 95 |  \code{ \link{dcov2d} }
 96 |  \code{ \link{dcor2d} }
 97 |  }
 98 |  \keyword{ htest }
 99 |  \keyword{ multivariate }
100 |  \keyword{ nonparametric }
101 |  \concept{ independence }
102 |  \concept{ energy statistics }
103 | 
104 | 


--------------------------------------------------------------------------------
/man/dcov.Rd:
--------------------------------------------------------------------------------
  1 | \name{distance correlation}
  2 | \alias{dcor}
  3 | \alias{dcov}
  4 | \title{ Distance Correlation and Covariance Statistics}
  5 | \description{
  6 |  Computes distance covariance and distance correlation statistics,
  7 |  which are multivariate measures of dependence.
  8 |  }
  9 | \usage{
 10 | dcov(x, y, index = 1.0)
 11 | dcor(x, y, index = 1.0)
 12 | }
 13 | \arguments{
 14 |   \item{x}{ data or distances of first sample}
 15 |   \item{y}{ data or distances of second sample}
 16 |   \item{index}{ exponent on Euclidean distance, in (0,2]}
 17 | }
 18 | \details{
 19 |  \code{dcov} and \code{dcor} compute distance
 20 |  covariance and distance correlation statistics.
 21 | 
 22 |   The sample sizes (number of rows) of the two samples must
 23 |   agree, and samples must not contain missing values. 
 24 |   
 25 | The \code{index} is an optional exponent on Euclidean distance.
 26 | Valid exponents for energy are in (0, 2) excluding 2. 
 27 | 
 28 | Argument types supported are 
 29 | numeric data matrix, data.frame, or tibble, with observations in rows;
 30 | numeric vector; ordered or unordered factors. In case of unordered factors
 31 | a 0-1 distance matrix is computed.
 32 | 
 33 | Optionally pre-computed distances can be input as class "dist" objects or as distance matrices. 
 34 |  For data types of arguments, distance matrices are computed internally. 
 35 | 
 36 | Distance correlation is a new measure of dependence between random
 37 | vectors introduced by Szekely, Rizzo, and Bakirov (2007).
 38 | For all distributions with finite first moments, distance
 39 | correlation \eqn{\mathcal R}{R} generalizes the idea of correlation in two
 40 | fundamental ways:
 41 |  (1) \eqn{\mathcal R(X,Y)}{R(X,Y)} is defined for \eqn{X} and \eqn{Y} in arbitrary dimension.
 42 |  (2) \eqn{\mathcal R(X,Y)=0}{R(X,Y)=0} characterizes independence of \eqn{X} and
 43 |  \eqn{Y}.
 44 | 
 45 | Distance correlation satisfies \eqn{0 \le \mathcal R \le 1}{0 \le R \le 1}, and
 46 | \eqn{\mathcal R = 0}{R = 0} only if \eqn{X} and \eqn{Y} are independent. Distance
 47 | covariance \eqn{\mathcal V}{V} provides a new approach to the problem of
 48 | testing the joint independence of random vectors. The formal
 49 | definitions of the population coefficients \eqn{\mathcal V}{V} and
 50 | \eqn{\mathcal R}{R} are given in (SRB 2007). The definitions of the
 51 | empirical coefficients are as follows.
 52 | 
 53 | The empirical distance covariance \eqn{\mathcal{V}_n(\mathbf{X,Y})}{V_n(X,Y)}
 54 | with index 1 is
 55 | the nonnegative number defined by
 56 | \deqn{
 57 |  \mathcal{V}^2_n (\mathbf{X,Y}) = \frac{1}{n^2} \sum_{k,\,l=1}^n
 58 |  A_{kl}B_{kl}
 59 |  }{
 60 | V^2_n (X,Y) = (1/n^2) sum_{k,l=1:n}
 61 |  A_{kl}B_{kl}
 62 |  }
 63 |  where \eqn{A_{kl}} and \eqn{B_{kl}} are
 64 |  \deqn{
 65 | A_{kl} = a_{kl}-\bar a_{k.}- \bar a_{.l} + \bar a_{..}
 66 | }
 67 | \deqn{
 68 |  B_{kl} = b_{kl}-\bar b_{k.}- \bar b_{.l} + \bar b_{..}.
 69 |  }
 70 | Here
 71 | \deqn{
 72 | a_{kl} = \|X_k - X_l\|_p, \quad b_{kl} = \|Y_k - Y_l\|_q, \quad
 73 | k,l=1,\dots,n,
 74 | }{
 75 | a_{kl} = ||X_k - X_l||_p, b_{kl} = ||Y_k - Y_l||_q,
 76 | k,l=1,\dots,n,
 77 | }
 78 | and the subscript \code{.} denotes that the mean is computed for the
 79 | index that it replaces.  Similarly,
 80 | \eqn{\mathcal{V}_n(\mathbf{X})}{V_n(X)} is the nonnegative number defined by
 81 | \deqn{
 82 |  \mathcal{V}^2_n (\mathbf{X}) = \mathcal{V}^2_n (\mathbf{X,X}) =
 83 |  \frac{1}{n^2} \sum_{k,\,l=1}^n
 84 |  A_{kl}^2.
 85 |  }{
 86 | V^2_n (X) = V^2_n (X,X) =
 87 |  (1/n^2) sum_{k,l=1:n}
 88 |  A_{kl}^2.
 89 |  }
 90 | 
 91 | The empirical distance correlation \eqn{\mathcal{R}_n(\mathbf{X,Y})}{R(\mathbf{X,Y})} is
 92 | the square root of
 93 | \deqn{
 94 |   \mathcal{R}^2_n(\mathbf{X,Y})=
 95 |  \frac {\mathcal{V}^2_n(\mathbf{X,Y})}
 96 |  {\sqrt{ \mathcal{V}^2_n (\mathbf{X}) \mathcal{V}^2_n(\mathbf{Y})}}.
 97 | }{
 98 |   R^2_n(X,Y)=
 99 |  V^2_n(X,Y) / sqrt(V^2_n (X) V^2_n(Y)).
100 | }
101 | See \code{\link{dcov.test}} for a test of multivariate independence
102 | based on the distance covariance statistic. 
103 | }
104 | \value{
105 | \code{dcov} returns the sample distance covariance and
106 | \code{dcor} returns the sample distance correlation.
107 | }
108 | \note{
109 | Note that it is inefficient to compute dCor by:
110 | 
111 | square root of
112 | \code{dcov(x,y)/sqrt(dcov(x,x)*dcov(y,y))}
113 | 
114 | because the individual
115 | calls to \code{dcov} involve unnecessary repetition of calculations.
116 | }
117 | \seealso{
118 | \code{\link{dcov2d}} \code{\link{dcor2d}} 
119 | \code{\link{bcdcor}}  \code{\link{dcovU}}  \code{\link{pdcor}}
120 | \code{\link{dcov.test}} \code{\link{dcor.test}}  \code{\link{pdcor.test}}
121 | }
122 | \references{
123 |  Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007),
124 |  Measuring and Testing Dependence by Correlation of Distances,
125 |  \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794.
126 |  \cr \doi{10.1214/009053607000000505}
127 | 
128 |  Szekely, G.J. and Rizzo, M.L. (2009),
129 |  Brownian Distance Covariance,
130 |  \emph{Annals of Applied Statistics},
131 |  Vol. 3, No. 4, 1236-1265.
132 |  \cr \doi{10.1214/09-AOAS312}
133 | 
134 |  Szekely, G.J. and Rizzo, M.L. (2009),
135 |  Rejoinder: Brownian Distance Covariance,
136 |  \emph{Annals of Applied Statistics}, Vol. 3, No. 4, 1303-1308.
137 |   }
138 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
139 | Gabor J. Szekely
140 | }
141 | \examples{
142 |  x <- iris[1:50, 1:4]
143 |  y <- iris[51:100, 1:4]
144 |  dcov(x, y)
145 |  dcov(dist(x), dist(y))  #same thing
146 | }
147 | \keyword{ multivariate }
148 | \concept{ independence }
149 | \concept{ distance correlation }
150 | \concept{ distance covariance }
151 | \concept{ energy statistics }
152 | 
153 | 


--------------------------------------------------------------------------------
/man/disco.Rd:
--------------------------------------------------------------------------------
  1 | \name{disco}
  2 | \alias{disco}
  3 | \alias{disco.between}
  4 | \alias{print.disco}
  5 | \title{ distance components (DISCO)}
  6 | \description{
  7 |   E-statistics DIStance COmponents and tests, analogous to variance components
  8 |   and anova.
  9 | }
 10 | \usage{
 11 | disco(x, factors, distance, index=1.0, R, method=c("disco","discoB","discoF"))
 12 | disco.between(x, factors, distance, index=1.0, R)
 13 | }
 14 | \arguments{
 15 |   \item{x}{ data matrix or distance matrix or dist object}
 16 |   \item{factors}{ matrix or data frame of factor labels or integers (not design matrix)}
 17 |   \item{distance}{ logical, TRUE if x is distance matrix}
 18 |   \item{index}{ exponent on Euclidean distance in (0,2]}
 19 |   \item{R}{ number of replicates for a permutation test}
 20 |   \item{method}{ test statistic }
 21 | }
 22 | \details{
 23 |   \code{disco} calculates the distance components decomposition of
 24 |   total dispersion and if R > 0 tests for significance using the test statistic
 25 |   disco "F" ratio (default \code{method="disco"}),
 26 |   or using the between component statistic (\code{method="discoB"}),
 27 |   each implemented by permutation test.
 28 |   
 29 |   If \code{x} is a \code{dist} object, argument \code{distance} is
 30 |   ignored. If \code{x} is a distance matrix, set \code{distance=TRUE}. 
 31 | 
 32 |   In the current release \code{disco} computes the decomposition for one-way models
 33 |   only.
 34 | }
 35 | \value{
 36 |   When \code{method="discoF"}, \code{disco} returns a list similar to the
 37 |   return value from \code{anova.lm}, and the \code{print.disco} method is
 38 |   provided to format the output into a similar table. Details:
 39 | 
 40 | \code{disco} returns a class \code{disco} object, which is a list containing
 41 | \item{call}{call}
 42 | \item{method}{method}
 43 | \item{statistic}{vector of observed statistics}
 44 | \item{p.value}{vector of p-values}
 45 | \item{k}{number of factors}
 46 | \item{N}{number of observations}
 47 | \item{between}{between-sample distance components}
 48 | \item{withins}{one-way within-sample distance components}
 49 | \item{within}{within-sample distance component}
 50 | \item{total}{total dispersion}
 51 | \item{Df.trt}{degrees of freedom for treatments}
 52 | \item{Df.e}{degrees of freedom for error}
 53 | \item{index}{index (exponent on distance)}
 54 | \item{factor.names}{factor names}
 55 | \item{factor.levels}{factor levels}
 56 | \item{sample.sizes}{sample sizes}
 57 | \item{stats}{matrix containing decomposition}
 58 | 
 59 | When \code{method="discoB"}, \code{disco} passes the arguments to
 60 | \code{disco.between}, which returns a class \code{htest} object.
 61 | 
 62 | \code{disco.between} returns a class \code{htest} object, where the test
 63 | statistic is the between-sample statistic (proportional to the numerator of the F ratio
 64 | of the \code{disco} test.
 65 | }
 66 | \references{
 67 | M. L. Rizzo and G. J. Szekely (2010).
 68 | DISCO Analysis: A Nonparametric Extension of
 69 | Analysis of Variance, Annals of Applied Statistics,
 70 | Vol. 4, No. 2, 1034-1055.
 71 | \cr \doi{10.1214/09-AOAS245}
 72 | }
 73 | \note{
 74 | The current version does all calculations via matrix arithmetic and
 75 | boot function. Support for more general additive models
 76 | and a formula interface is under development.
 77 | 
 78 | \code{disco} methods have been added to the cluster distance summary
 79 | function \code{edist}, and energy tests for equality of distribution
 80 | (see \code{eqdist.etest}).
 81 | }
 82 | \seealso{
 83 |  \code{ \link{edist} }
 84 |  \code{ \link{eqdist.e} }
 85 |  \code{ \link{eqdist.etest} }
 86 |  \code{ \link{ksample.e} }
 87 |  }
 88 |  \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and Gabor J. Szekely
 89 |  }
 90 | \examples{
 91 |       ## warpbreaks one-way decompositions
 92 |       data(warpbreaks)
 93 |       attach(warpbreaks)
 94 |       disco(breaks, factors=wool, R=99)
 95 |       
 96 |       ## warpbreaks two-way wool+tension
 97 |       disco(breaks, factors=data.frame(wool, tension), R=0)
 98 | 
 99 |       ## warpbreaks two-way wool*tension
100 |       disco(breaks, factors=data.frame(wool, tension, wool:tension), R=0)
101 | 
102 |       ## When index=2 for univariate data, we get ANOVA decomposition
103 |       disco(breaks, factors=tension, index=2.0, R=99)
104 |       aov(breaks ~ tension)
105 | 
106 |       ## Multivariate response
107 |       ## Example on producing plastic film from Krzanowski (1998, p. 381)
108 |       tear <- c(6.5, 6.2, 5.8, 6.5, 6.5, 6.9, 7.2, 6.9, 6.1, 6.3,
109 |                 6.7, 6.6, 7.2, 7.1, 6.8, 7.1, 7.0, 7.2, 7.5, 7.6)
110 |       gloss <- c(9.5, 9.9, 9.6, 9.6, 9.2, 9.1, 10.0, 9.9, 9.5, 9.4,
111 |                  9.1, 9.3, 8.3, 8.4, 8.5, 9.2, 8.8, 9.7, 10.1, 9.2)
112 |       opacity <- c(4.4, 6.4, 3.0, 4.1, 0.8, 5.7, 2.0, 3.9, 1.9, 5.7,
113 |                    2.8, 4.1, 3.8, 1.6, 3.4, 8.4, 5.2, 6.9, 2.7, 1.9)
114 |       Y <- cbind(tear, gloss, opacity)
115 |       rate <- factor(gl(2,10), labels=c("Low", "High"))
116 | 
117 | 	    ## test for equal distributions by rate
118 |       disco(Y, factors=rate, R=99)
119 | 	    disco(Y, factors=rate, R=99, method="discoB")
120 | 
121 |       ## Just extract the decomposition table
122 |       disco(Y, factors=rate, R=0)$stats
123 | 
124 | 	    ## Compare eqdist.e methods for rate
125 | 	    ## disco between stat is half of original when sample sizes equal
126 | 	    eqdist.e(Y, sizes=c(10, 10), method="original")
127 | 	    eqdist.e(Y, sizes=c(10, 10), method="discoB")
128 | 
129 |       ## The between-sample distance component
130 |       disco.between(Y, factors=rate, R=0)
131 | }
132 | \keyword{ htest }
133 | \keyword{ multivariate }
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/R/dcov2d.R:
--------------------------------------------------------------------------------
  1 | dcor2d<- function(x, y, type = c("V", "U")) {
  2 |   ## computes dcor^2 or bias-corrected dcor^2 by O(n log n) algorithm
  3 |   ## bivariate data only: (x,y) in R^2
  4 |   ## should be faster than direct calc. for big n
  5 |   type <- match.arg(type)
  6 |   ## argument checking in dcov2d
  7 |   stat <- dcov2d(x, y, type, all.stats=TRUE)
  8 |   dvarX <- stat[2]
  9 |   dvarY <- stat[3]
 10 |   R2 <- 0.0
 11 |   if (abs(dvarX*dvarY > 10*.Machine$double.eps))
 12 |     R2 <- stat[1] / sqrt(dvarX*dvarY)
 13 |   return (R2)
 14 | }
 15 | 
 16 | dcov2d<- function(x, y, type=c("V", "U"), all.stats=FALSE) {
 17 |   ## O(n log n) computation of dcovU or dcov^2 (V^2) for (x, y) in R^2 only
 18 |   type <- match.arg(type)
 19 |   if (!is.vector(x) || !is.vector(y)) {
 20 |     if (NCOL(x) > 1 || NCOL(y) > 1)
 21 |       stop("this method is only for univariate x and y")
 22 |   }
 23 |   x <- as.vector(x)
 24 |   y <- as.vector(y)
 25 |   n <- length(x)
 26 |   if (n != length(y))
 27 |     stop("sample sizes must agree")
 28 |   
 29 |   Sums <- .dcovSums2d(x, y, all.sums=all.stats)
 30 |   if (type =="V") {
 31 |     d1 <- n^2
 32 |     d2 <- n^3
 33 |     d3 <- n^4
 34 |   } else {
 35 |     d1 <- n * (n - 3)
 36 |     d2 <- d1 * (n - 2)
 37 |     d3 <- d2 * (n - 1)
 38 |   }
 39 |   dCov2d <- Sums$S1/d1 - 2*Sums$S2/d2 + Sums$S3/d3
 40 |   if (all.stats) {
 41 |     dvarX <- Sums$S1a/d1 - 2*Sums$S2a/d2 + Sums$S3a/d3
 42 |     dvarY <- Sums$S1b/d1 - 2*Sums$S2b/d2 + Sums$S3b/d3
 43 |   }
 44 |   rval <- ifelse(type=="V", c(V=dCov2d), c(U=dCov2d))
 45 |   if (all.stats)
 46 |     rval <- c(rval, dvarX=dvarX, dvarY=dvarY) 
 47 |   return (rval)
 48 | }
 49 | 
 50 | 
 51 | .dcovSums2d <- function(x, y, all.sums = FALSE) {
 52 |   ## compute the sums S1, S2, S3 of distances for dcov^2
 53 |   ## dCov^2 <- S1/d1 - 2 * S2/d2 + S3/d3  
 54 |   ## denominators differ for U-statistic, V-statisic
 55 |   ## if all.sums==TRUE, also return sums for dVar and kernel 
 56 |   if (is.matrix(x) || is.matrix(y)) {
 57 |     if (ncol(x) > 1 || ncol(y) > 1)
 58 |       stop("Found multivariate (x,y) in .dcovSums2d, expecting bivariate")
 59 |   }
 60 |   n <- length(x)
 61 |   SRx <- sortrank(x)
 62 |   SRy <- sortrank(y)
 63 |   ## compute the rowSums of the distance matrices
 64 |   a. <- .rowSumsDist1(x, SRx)
 65 |   b. <- .rowSumsDist1(y, SRy)
 66 |   S2 <- sum(a. * b.)
 67 |   a.. <- sum(a.)
 68 |   b.. <- sum(b.)
 69 |   S3 <- sum(a.) * sum(b.)
 70 |   
 71 |   ## also need order and rank for y[order(x)] in gamma1()
 72 |   x1 <- SRx$x
 73 |   y1 <- y[SRx$ix]
 74 |   SRy1 <- sortrank(y1)
 75 |   ones <- rep(1, n)
 76 |   g_1 <-  .gamma1(x1=x1, y1=y1, z1=ones,  SRx=SRx, SRy1=SRy1)
 77 |   g_x <-  .gamma1(x1=x1, y1=y1, z1=x1,    SRx=SRx, SRy1=SRy1)
 78 |   g_y <-  .gamma1(x1=x1, y1=y1, z1=y1,    SRx=SRx, SRy1=SRy1)
 79 |   g_xy <- .gamma1(x1=x1, y1=y1, z1=x1*y1, SRx=SRx, SRy1=SRy1)
 80 |   S1 <- sum(x * y * g_1 + g_xy - x * g_y - y * g_x)
 81 | 
 82 |   L <- list(S1=S1, S2=S2, S3=S3, 
 83 |             S1a=NA, S1b=NA, S2a=NA, S2b=NA, S3a=NA, S3b=NA,
 84 |             rowsumsA=NA, rowsumsB=NA, sumA=NA, sumB=NA)
 85 |   if (all.sums) {
 86 |     L$S1a <- 2 * n * (n-1) * var(x)
 87 |     L$S1b <- 2 * n * (n-1) * var(y)
 88 |     L$S2a <- sum(a.^2)
 89 |     L$S2b <- sum(b.^2)
 90 |     L$S3a <- a..^2
 91 |     L$S3b <- b..^2
 92 |     L$rowsumsA <- a.
 93 |     L$rowsumsB <- b.
 94 |     L$sumA <- a..
 95 |     L$sumB <- b..
 96 |   }
 97 |   return (L);
 98 | }
 99 | 
100 | .dvarU2 <- function(x, SRx = NULL) {
101 |   ## O(n log n) computation of dvarU for univariate x only
102 |   ## this is an internal function that will do a stand-alone dVar calc.
103 |   ## but it is not faster than dcovU2(x, x) unless we supply
104 |   ## the precomputed sort + rank results in SRx
105 |   n <- length(x)
106 |   ## compute the rowSums of the distance matrices
107 |   if (is.null(SRx))
108 |     SRx <- sortrank(x)
109 |   a. <- .rowSumsDist1(x, SRx)
110 |   S2 <- sum(a. * a.)
111 |   S3 <- sum(a.)^2
112 | 
113 |   ## also need order and rank for y[order(x)] in gamma1()
114 |   x1 <- SRx$x
115 |   x2 <- x1
116 |   SRx1 <- sortrank(x1)
117 |   ones <- rep(1, n)
118 |   g_1 <-  .gamma1(x1=x1, y1=x2, z1=ones,  SRx, SRx1)
119 |   g_x <-  .gamma1(x1=x1, y1=x2, z1=x1,    SRx, SRx1)
120 |   g_xx <- .gamma1(x1=x1, y1=x2, z1=x1*x2, SRx, SRx1)
121 |   S1 <- sum(x^2 * g_1 + g_xx - 2 * x * g_x)
122 |   d1 <- n * (n - 3)
123 |   d2 <- d1 * (n - 2)
124 |   d3 <- d2 * (n - 1)
125 |   dVar <- S1/d1 - 2 * S2/d2 + S3/d3
126 |   return(dVar)
127 | }
128 | 
129 | .gamma1 <- function(x1, y1, z1, SRx, SRy1) {
130 |   # computes the terms of the sum (ab) in dcovU
131 |   # original sample (x_i, y_i, z_i)
132 |   # triples (x1_i, y1_i, z1_i) are sorted by ix=order(x)
133 |   # SRx is the result of sortrank(x), original order
134 |   # SRy1 is the result of sortrank(y1), y1=y[order(x)]
135 |   # pre-compute SRx, SRy1 to avoid repeated sort and rank
136 |   #
137 |   n <- length(x1)
138 |   ix <- SRx$ix      #order(x)
139 |   rankx <- SRx$r    #ranks of original sample x
140 | 
141 |   ## ranks and order vector for this permutation of sample y1
142 |   iy1 <- SRy1$ix       #order(y1)
143 |   ranky1 <- SRy1$r     #rank(y1)
144 | 
145 |   ## the partial sums in the formula g_1
146 |   psumsy1 <- (cumsum(as.numeric(z1[iy1])) - z1[iy1])[ranky1]
147 |   psumsx1 <- cumsum(as.numeric(z1)) - z1
148 | 
149 |   gamma1 <- Btree_sum(y=ranky1, z=z1)   #y1 replaced by rank(y1)
150 |   g <- sum(z1) - z1 - 2 * psumsx1 - 2 * psumsy1 + 4 * gamma1
151 |   g <- g[rankx]
152 | }
153 | 
154 | .rowSumsDist1 <- function(x, Sx = NULL) {
155 |   ## for univariate samples, equivalent to rowSums(as.matrix(dist(x)))
156 |   ## but much faster
157 |   ## Sx is a sortrank object usually pre-computed here
158 |   ## x is the data vector, Sx$x is sort(x)
159 |   if (is.null(Sx))
160 |     Sx <- sortrank(x)
161 |   n <- length(x)
162 |   r <- Sx$r  #ranks
163 |   z <- Sx$x  #ordered sample x
164 |   psums1 <- (cumsum(as.numeric(z)) - z)[r]
165 |   (2*(r-1)-n)*x + sum(x) - 2*psums1
166 | }
167 | 


--------------------------------------------------------------------------------
/man/dcov.test.Rd:
--------------------------------------------------------------------------------
  1 | \name{dcov.test}
  2 | \alias{distance covariance}
  3 | \alias{dcov.test}
  4 | \alias{dcor.test}
  5 | \title{ Distance Covariance Test and Distance Correlation test}
  6 | \description{
  7 |  Distance covariance test and distance correlation test of multivariate independence.
  8 |  Distance covariance and distance correlation are
  9 |  multivariate measures of dependence.}
 10 | \usage{
 11 | dcov.test(x, y, index = 1.0, R = NULL)
 12 | dcor.test(x, y, index = 1.0, R)
 13 | }
 14 | \arguments{
 15 |   \item{x}{ data or distances of first sample}
 16 |   \item{y}{ data or distances of second sample}
 17 |   \item{R}{ number of replicates}
 18 |   \item{index}{ exponent on Euclidean distance, in (0,2]}
 19 | }
 20 | \details{
 21 |  \code{dcov.test} and \code{dcor.test} are nonparametric
 22 |  tests of multivariate independence. The test decision is
 23 |  obtained via permutation bootstrap, with \code{R} replicates.
 24 | 
 25 |  The sample sizes (number of rows) of the two samples must
 26 |  agree, and samples must not contain missing values. 
 27 |  
 28 |  The \code{index} is an optional exponent on Euclidean distance.
 29 | Valid exponents for energy are in (0, 2) excluding 2. 
 30 | 
 31 | Argument types supported are 
 32 | numeric data matrix, data.frame, or tibble, with observations in rows;
 33 | numeric vector; ordered or unordered factors. In case of unordered factors
 34 | a 0-1 distance matrix is computed.
 35 | 
 36 | Optionally pre-computed distances can be input as class "dist" objects or as distance matrices. 
 37 | For data types of arguments,
 38 | distance matrices are computed internally. 
 39 | 
 40 |  The \code{dcov} test statistic is
 41 |  \eqn{n \mathcal V_n^2}{nV_n^2} where
 42 |  \eqn{\mathcal V_n(x,y)}{V_n(x,y)} = dcov(x,y),
 43 |  which is based on interpoint Euclidean distances
 44 |  \eqn{\|x_{i}-x_{j}\|}{||x_{i}-x_{j}||}. The \code{index}
 45 |  is an optional exponent on Euclidean distance.
 46 | 
 47 | Similarly, the \code{dcor} test statistic is based on the normalized
 48 | coefficient, the distance correlation. (See the manual page for \code{dcor}.)
 49 | 
 50 | Distance correlation is a new measure of dependence between random
 51 | vectors introduced by Szekely, Rizzo, and Bakirov (2007).
 52 | For all distributions with finite first moments, distance
 53 | correlation \eqn{\mathcal R}{R} generalizes the idea of correlation in two
 54 | fundamental ways:
 55 | 
 56 |  (1) \eqn{\mathcal R(X,Y)}{R(X,Y)} is defined for \eqn{X} and \eqn{Y} in arbitrary dimension.
 57 | 
 58 |  (2) \eqn{\mathcal R(X,Y)=0}{R(X,Y)=0} characterizes independence of \eqn{X} and
 59 |  \eqn{Y}.
 60 | 
 61 | Characterization (2) also holds for powers of Euclidean distance \eqn{\|x_i-x_j\|^s}{|x_i-x_j|^s}, where \eqn{0<s<2}, but (2) does not hold when \eqn{s=2}.
 62 | 
 63 | Distance correlation satisfies \eqn{0 \le \mathcal R \le 1}{0 \le R \le 1}, and
 64 | \eqn{\mathcal R = 0}{R = 0} only if \eqn{X} and \eqn{Y} are independent. Distance
 65 | covariance \eqn{\mathcal V}{V} provides a new approach to the problem of
 66 | testing the joint independence of random vectors. The formal
 67 | definitions of the population coefficients \eqn{\mathcal V}{V} and
 68 | \eqn{\mathcal R}{R} are given in (SRB 2007). The definitions of the
 69 | empirical coefficients are given in the energy
 70 | \code{\link{dcov}} topic.
 71 | 
 72 | For all values of the index in (0,2), under independence
 73 | the asymptotic distribution of \eqn{n\mathcal V_n^2}{nV_n^2}
 74 | is a quadratic form of centered Gaussian random variables,
 75 | with coefficients that depend on the distributions of \eqn{X} and \eqn{Y}. For the general problem of testing independence when the distributions of \eqn{X} and \eqn{Y} are unknown, the test based on \eqn{n\mathcal V^2_n}{n V_n^2} can be implemented as a permutation test. See (SRB 2007) for
 76 | theoretical properties of the test, including statistical consistency.
 77 | }
 78 | \value{
 79 | \code{dcov.test} or \code{dcor.test} returns a list with class \code{htest} containing
 80 |    \item{     method}{ description of test}
 81 |    \item{  statistic}{ observed value of the test statistic}
 82 |    \item{   estimate}{ dCov(x,y) or dCor(x,y)}
 83 |    \item{  estimates}{ a vector: [dCov(x,y), dCor(x,y), dVar(x), dVar(y)]}
 84 |    \item{  condition}{ logical, permutation test applied}
 85 |    \item{ replicates}{ replicates of the test statistic}
 86 |    \item{    p.value}{ approximate p-value of the test}
 87 |    \item{          n}{ sample size}
 88 |    \item{  data.name}{ description of data}
 89 | }
 90 | \note{For the dcov test of independence,
 91 | the distance covariance test statistic is the V-statistic
 92 | \eqn{\mathrm{n\, dCov^2} = n \mathcal{V}_n^2}{n V_n^2} (not dCov). 
 93 | }
 94 | \seealso{
 95 |  \code{\link{dcov} } \code{\link{dcor} } 
 96 |  \code{\link{pdcov.test}} \code{\link{pdcor.test}} 
 97 |  \code{\link{dcor.ttest}}
 98 |  }
 99 | 
100 | \references{
101 |  Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007),
102 |  Measuring and Testing Dependence by Correlation of Distances,
103 |  \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794.
104 |  \cr \doi{10.1214/009053607000000505}
105 | 
106 |  Szekely, G.J. and Rizzo, M.L. (2009),
107 |  Brownian Distance Covariance,
108 |  \emph{Annals of Applied Statistics},
109 |  Vol. 3, No. 4, 1236-1265.
110 |  \cr \doi{10.1214/09-AOAS312}
111 | 
112 |  Szekely, G.J. and Rizzo, M.L. (2009),
113 |  Rejoinder: Brownian Distance Covariance,
114 |  \emph{Annals of Applied Statistics}, Vol. 3, No. 4, 1303-1308.
115 | }
116 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
117 | Gabor J. Szekely
118 | }
119 | \examples{
120 |  x <- iris[1:50, 1:4]
121 |  y <- iris[51:100, 1:4]
122 |  set.seed(1)
123 |  dcor.test(dist(x), dist(y), R=199)
124 |  set.seed(1)
125 |  dcov.test(x, y, R=199)
126 | }
127 | \keyword{ htest }
128 | \keyword{ multivariate }
129 | \keyword{ nonparametric }
130 | \concept{ independence }
131 | \concept{ multivariate }
132 | \concept{ distance correlation }
133 | \concept{ distance covariance }
134 | \concept{ energy statistics }
135 | 
136 | 


--------------------------------------------------------------------------------
/man/energy.hclust.Rd:
--------------------------------------------------------------------------------
  1 | \name{energy.hclust}
  2 | \alias{energy.hclust}
  3 | \title{ Hierarchical Clustering by Minimum (Energy) E-distance }
  4 | \description{
  5 |   Performs hierarchical clustering by minimum (energy) E-distance method.
  6 | }
  7 | \usage{
  8 |     energy.hclust(dst, alpha = 1)
  9 | }
 10 | \arguments{
 11 |   \item{dst}{\code{dist} object}
 12 |   \item{alpha}{distance exponent}
 13 | }
 14 | \details{
 15 |   Dissimilarities are \eqn{d(x,y) = \|x-y\|^\alpha}{||x-y||^a},
 16 |   where the exponent \eqn{\alpha}{a} is in the interval (0,2].
 17 |   This function performs agglomerative hierarchical clustering.
 18 |   Initially, each of the n singletons is a cluster. At each of n-1 steps, the
 19 |   procedure merges the pair of clusters with minimum e-distance.
 20 |   The e-distance between two clusters \eqn{C_i, C_j} of sizes \eqn{n_i, n_j} 
 21 |   is given by
 22 |     \deqn{e(C_i, C_j)=\frac{n_i n_j}{n_i+n_j}[2M_{ij}-M_{ii}-M_{jj}],
 23 |     }
 24 |     where
 25 |     \deqn{M_{ij}=\frac{1}{n_i n_j}\sum_{p=1}^{n_i} \sum_{q=1}^{n_j}
 26 |        \|X_{ip}-X_{jq}\|^\alpha,}{
 27 |        M_{ij} = 1/(n_i n_j) sum[1:n_i, 1:n_j] ||X_(ip) - X_(jq)||^a,}
 28 |        \eqn{\|\cdot\|}{|| ||} denotes Euclidean norm, and \eqn{X_{ip}}{
 29 |        X_(ip)} denotes the p-th observation in the i-th cluster.
 30 | 
 31 |   The return value is an object of class \code{hclust}, so \code{hclust}
 32 |   methods such as print or plot methods, \code{plclust}, and \code{cutree}
 33 |   are available. See the documentation for \code{hclust}.
 34 | 
 35 |   The e-distance measures both the heterogeneity between clusters and the
 36 |   homogeneity within clusters. \eqn{\mathcal E}{E}-clustering
 37 |   (\eqn{\alpha=1}{a=1}) is particularly effective in
 38 |   high dimension, and is more effective than some standard hierarchical
 39 |   methods when clusters have equal means (see example below).
 40 |   For other advantages see the references.
 41 |   
 42 |   \code{edist} computes the energy distances for the result (or any partition)
 43 |   and returns the cluster distances in a \code{dist} object. See the \code{edist}
 44 |   examples.
 45 | }
 46 | \value{
 47 |      An object of class \code{hclust} which describes the tree produced by
 48 |      the clustering process. The object is a list with components:
 49 | \item{merge:}{ an n-1 by 2 matrix, where row i of \code{merge} describes the
 50 |      merging of clusters at step i of the clustering. If an element j in the
 51 |      row is negative, then observation -j was merged at this
 52 |      stage. If j is positive then the merge was with the cluster
 53 |      formed at the (earlier) stage j of the algorithm.}
 54 | \item{height:}{the clustering height: a vector of n-1 non-decreasing
 55 |      real numbers (the e-distance between merging clusters)}
 56 | \item{order:}{ a vector giving a permutation of the indices of
 57 |      original observations suitable for plotting, in the sense that a
 58 |      cluster plot using this ordering and matrix \code{merge} will not have
 59 |      crossings of the branches.}
 60 | \item{labels:}{ labels for each of the objects being clustered.}
 61 | \item{call:}{ the call which produced the result.}
 62 | \item{method:}{ the cluster method that has been used (e-distance).}
 63 | \item{dist.method:}{ the distance that has been used to create \code{dst}.}
 64 | }
 65 | \note{
 66 | Currently \code{stats::hclust} implements Ward's method by \code{method="ward.D2"},
 67 | which applies the squared distances. That method was previously \code{"ward"}. 
 68 | Because both \code{hclust} and energy use the same type of Lance-Williams recursive formula to update cluster distances, now with the additional option \code{method="ward.D"} in \code{hclust}, the
 69 | energy distance method is easily implemented by \code{hclust}. (Some "Ward" algorithms do not use Lance-Williams, however). Energy clustering (with \code{alpha=1}) and "ward.D" now return the same result, except that the cluster heights of energy hierarchical clustering with \code{alpha=1} are two times the heights from \code{hclust}. In order to ensure compatibility with hclust methods, \code{energy.hclust} now passes arguments through to \code{hclust} after possibly applying the optional exponent to distance.
 70 | }
 71 | \references{
 72 |      Szekely, G. J. and Rizzo, M. L. (2005) Hierarchical Clustering
 73 |      via Joint Between-Within Distances: Extending Ward's Minimum
 74 |      Variance Method, \emph{Journal of Classification} 22(2) 151-183.
 75 |      \cr \doi{10.1007/s00357-005-0012-9}
 76 | 
 77 |      Szekely, G. J. and Rizzo, M. L. (2004) Testing for Equal
 78 |      Distributions in High Dimension, \emph{InterStat}, November (5).
 79 | 
 80 |  Szekely, G. J. (2000) Technical Report 03-05:
 81 |  \eqn{\mathcal{E}}{E}-statistics: Energy of
 82 |  Statistical Samples, Department of Mathematics and Statistics, Bowling
 83 |  Green State University.
 84 | }
 85 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
 86 |   Gabor J. Szekely
 87 |   }
 88 | \seealso{
 89 |   \code{\link{edist}} \code{\link{ksample.e}} \code{\link{eqdist.etest}} \code{hclust}}
 90 | \examples{
 91 |    \dontrun{
 92 |    library(cluster)
 93 |    data(animals)
 94 |    plot(energy.hclust(dist(animals)))
 95 | 
 96 |    data(USArrests)
 97 |    ecl <- energy.hclust(dist(USArrests))
 98 |    print(ecl)
 99 |    plot(ecl)
100 |    cutree(ecl, k=3)
101 |    cutree(ecl, h=150)
102 | 
103 |    ## compare performance of e-clustering, Ward's method, group average method
104 |    ## when sampled populations have equal means: n=200, d=5, two groups
105 |    z <- rbind(matrix(rnorm(1000), nrow=200), matrix(rnorm(1000, 0, 5), nrow=200))
106 |    g <- c(rep(1, 200), rep(2, 200))
107 |    d <- dist(z)
108 |    e <- energy.hclust(d)
109 |    a <- hclust(d, method="average")
110 |    w <- hclust(d^2, method="ward.D2")
111 |    list("E" = table(cutree(e, k=2) == g), "Ward" = table(cutree(w, k=2) == g),
112 |     "Avg" = table(cutree(a, k=2) == g))
113 |   }
114 |  }
115 | \keyword{ multivariate }
116 | \keyword{ cluster }
117 | \concept{ energy statistics }
118 | 


--------------------------------------------------------------------------------
/docs/authors.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Authors and Citation • energy</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.7.1/jquery.min.js" integrity="sha512-v2CJ7UaYy4JwqLDIrZUI/4hqeoQieOmAZNXBeQyjo21dadnwR+8ZaIJVT8EE2iyI61OV8e6M8PP2/4hpQINQ/g==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css"><script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet"><script src="pkgdown.js"></script><meta property="og:title" content="Authors and Citation"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 | 
  7 | 
  8 |     <div class="container template-citation-authors">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="index.html">energy</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.7-12</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="reference/index.html">Reference</a>
 27 | </li>
 28 | <li>
 29 |   <a href="news/index.html">Changelog</a>
 30 | </li>
 31 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 32 |   <a href="https://github.com/mariarizzo/energy/" class="external-link">
 33 |     <span class="fab fa-github fa-lg"></span>
 34 | 
 35 |   </a>
 36 | </li>
 37 |       </ul></div><!--/.nav-collapse -->
 38 |   </div><!--/.container -->
 39 | </div><!--/.navbar -->
 40 | 
 41 | 
 42 | 
 43 |       </header><div class="row">
 44 |   <div class="contents col-md-9">
 45 |     <div class="section level2 authors-section">
 46 |       <div class="page-header">
 47 |         <h1>Authors and Citation</h1>
 48 |       </div>
 49 | 
 50 | 
 51 |       <ul class="list-unstyled"><li>
 52 |           <p><strong>Maria Rizzo</strong>. Author, maintainer.
 53 |           </p>
 54 |         </li>
 55 |         <li>
 56 |           <p><strong>Gabor Szekely</strong>. Author.
 57 |           </p>
 58 |         </li>
 59 |       </ul></div>
 60 |     <div class="section level2 citation-section">
 61 |     <div>
 62 |       <h1 id="citation">Citation</h1>
 63 |       <small class="dont-index">Source: <a href="https://github.com/mariarizzo/energy/blob/HEAD/DESCRIPTION" class="external-link"><code>DESCRIPTION</code></a></small>
 64 |     </div>
 65 |     </div>
 66 | 
 67 | 
 68 |     <p>Rizzo M, Szekely G (2024).
 69 | <em>energy: E-Statistics: Multivariate Inference via the Energy of Data</em>.
 70 | R package version 1.7-12, <a href="https://CRAN.R-project.org/package=energy" class="external-link">https://CRAN.R-project.org/package=energy</a>.
 71 | </p>
 72 |     <pre>@Manual{,
 73 |   title = {energy: E-Statistics: Multivariate Inference via the Energy of Data},
 74 |   author = {Maria Rizzo and Gabor Szekely},
 75 |   year = {2024},
 76 |   note = {R package version 1.7-12},
 77 |   url = {https://CRAN.R-project.org/package=energy},
 78 | }</pre>
 79 | 
 80 |   </div>
 81 | 
 82 | </div>
 83 | 
 84 | 
 85 | 
 86 |       <footer><div class="copyright">
 87 |   <p></p><p>Developed by Maria Rizzo, Gabor Szekely.</p>
 88 | </div>
 89 | 
 90 | <div class="pkgdown">
 91 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.1.0.</p>
 92 | </div>
 93 | 
 94 |       </footer></div>
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 |   </body></html>
102 | 
103 | 


--------------------------------------------------------------------------------
/src/dcov.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    dcov.c: distance correlation and covariance statistics
  3 |    and dCov test for multivariate independence
  4 | 
  5 |    Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007)
  6 |    "Measuring and testing dependence by correlation of distances"
  7 |    Annals of Statistics, Vol. 35 No. 6, pp. 2769-2794.
  8 | 
  9 |  Author: Maria L. Rizzo
 10 |  energy package
 11 |  github.com/mariarizzo/energy
 12 |  
 13 |  */
 14 | 
 15 | #include <R.h>
 16 | #include <Rmath.h>
 17 | 
 18 | void   dCOVtest(double *x, double *y, int *nrow, int *nreps,
 19 |                 double *reps, double *DCOV, double *pval);
 20 | void   dCOV(double *x, double *y, int *nrow, double *DCOV);
 21 | double Akl(double **akl, double **A, int n);
 22 | 
 23 | /* functions in utilities.c */
 24 | extern double **alloc_matrix(int r, int c);
 25 | extern int    **alloc_int_matrix(int r, int c);
 26 | extern void   free_matrix(double **matrix, int r, int c);
 27 | extern void   free_int_matrix(int **matrix, int r, int c);
 28 | extern void   permute(int *J, int n);
 29 | extern void   roworder(double *x, int *byrow, int r, int c);
 30 | extern void   Euclidean_distance(double *x, double **Dx, int n, int d);
 31 | extern void   index_distance(double **Dx, int n, double index);
 32 | extern void   vector2matrix(double *x, double **y, int N, int d, int isroworder);
 33 | 
 34 | 
 35 | void dCOVtest(double *x, double *y, int *nrow, int *nreps,
 36 |               double *reps, double *DCOV, double *pval) {
 37 |     /*  input vectors must expand to distance matrices 
 38 |         any exponent must be pre-computed in R
 39 |         computes dCov(x,y), dCor(x,y), dVar(x), dVar(y)
 40 |         V-statistic is n*dCov^2 where n*dCov^2 --> Q
 41 |         DCOV  : vector [dCov, dCor, dVar(x), dVar(y), mean(A), mean(B)]
 42 |      */
 43 |     int    i, j, k, r, J, K, M;
 44 |     int    n = nrow[0], R = nreps[0];
 45 |     int*   perm;
 46 |     double **Dx, **Dy, **A, **B;
 47 |     double dcov, V;
 48 |     double n2 = (double) n * n;
 49 |     Dx = alloc_matrix(n, n);
 50 |     Dy = alloc_matrix(n, n);
 51 |   	vector2matrix(x, Dx, n, n, 1);
 52 | 	  vector2matrix(y, Dy, n, n, 1);
 53 | 
 54 |     A = alloc_matrix(n, n);
 55 |     B = alloc_matrix(n, n);
 56 |     Akl(Dx, A, n);
 57 |     Akl(Dy, B, n);
 58 |     free_matrix(Dx, n, n);
 59 |     free_matrix(Dy, n, n);
 60 | 
 61 |     /* compute dCov(x,y), dVar(x), dVar(y) */
 62 |     for (k=0; k<4; k++)
 63 |         DCOV[k] = 0.0;
 64 |     for (k=0; k<n; k++)
 65 |         for (j=0; j<n; j++) {
 66 |             DCOV[0] += A[k][j]*B[k][j];
 67 |             DCOV[2] += A[k][j]*A[k][j];
 68 |             DCOV[3] += B[k][j]*B[k][j];
 69 |         }
 70 | 
 71 |     for (k=0; k<4; k++) {
 72 |         DCOV[k] /= n2;
 73 |         if (DCOV[k] > 0)
 74 |             DCOV[k] = sqrt(DCOV[k]);
 75 |             else DCOV[k] = 0.0;
 76 |     }
 77 |     /* compute dCor(x, y) */
 78 |     V = DCOV[2]*DCOV[3];
 79 |     if (V > DBL_EPSILON)
 80 |         DCOV[1] = DCOV[0] / sqrt(V);
 81 |         else DCOV[1] = 0.0;
 82 | 
 83 | 	if (R > 0) {
 84 | 		/* compute the replicates */
 85 | 		if (DCOV[1] > 0.0) {
 86 | 			perm = R_Calloc(n, int);
 87 | 			M = 0;
 88 | 			for (i=0; i<n; i++) perm[i] = i;
 89 |       GetRNGstate();
 90 |       for (r=0; r<R; r++) {
 91 | 			   permute(perm, n);
 92 | 			   dcov = 0.0;
 93 | 			   for (k=0; k<n; k++) {
 94 | 				   K = perm[k];
 95 | 				   for (j=0; j<n; j++) {
 96 | 					   J = perm[j];
 97 | 					   dcov += A[k][j]*B[K][J];
 98 | 				   }
 99 | 			   }
100 | 			   dcov /= n2;
101 | 			   dcov = sqrt(dcov);
102 | 			   reps[r] = dcov;
103 | 			   if (dcov >= DCOV[0]) M++;
104 | 			}
105 | 			*pval = (double) (M+1) / (double) (R+1);
106 |       PutRNGstate();
107 | 			R_Free(perm);
108 | 		} else {
109 | 		    *pval = 1.0;
110 | 			}
111 | 	}
112 | 
113 |     free_matrix(A, n, n);
114 |     free_matrix(B, n, n);
115 |     return;
116 | }
117 | 
118 | void dCOV(double *x, double *y, int *nrow, double *DCOV) {
119 |   /*  input vectors must expand to distance matrices 
120 |       any exponent must be pre-computed in R
121 |         computes dCov(x,y), dCor(x,y), dVar(x), dVar(y)
122 |         V-statistic is n*dCov^2 where n*dCov^2 --> Q
123 |         DCOV  : vector [dCov, dCor, dVar(x), dVar(y)]
124 |      */
125 | 
126 |     int    j, k, n = nrow[0];
127 |     double **Dx, **Dy, **A, **B;
128 |     double V, n2 = (double) n * n;
129 | 
130 |     Dx = alloc_matrix(n, n);
131 |     Dy = alloc_matrix(n, n);
132 | 		vector2matrix(x, Dx, n, n, 1);
133 | 		vector2matrix(y, Dy, n, n, 1);
134 | 
135 |     A = alloc_matrix(n, n);
136 |     B = alloc_matrix(n, n);
137 |     Akl(Dx, A, n);
138 |     Akl(Dy, B, n);
139 |     free_matrix(Dx, n, n);
140 |     free_matrix(Dy, n, n);
141 | 
142 |     n2 = ((double) n) * n;
143 | 
144 |     /* compute dCov(x,y), dVar(x), dVar(y) */
145 |     for (k=0; k<4; k++)
146 |         DCOV[k] = 0.0;
147 |     for (k=0; k<n; k++)
148 |         for (j=0; j<n; j++) {
149 |             DCOV[0] += A[k][j]*B[k][j];
150 |             DCOV[2] += A[k][j]*A[k][j];
151 |             DCOV[3] += B[k][j]*B[k][j];
152 |         }
153 | 
154 |     for (k=0; k<4; k++) {
155 |         DCOV[k] /= n2;
156 |         if (DCOV[k] > 0)
157 |             DCOV[k] = sqrt(DCOV[k]);
158 |             else DCOV[k] = 0.0;
159 |     }
160 |     /* compute dCor(x, y) */
161 |     V = DCOV[2]*DCOV[3];
162 |     if (V > DBL_EPSILON)
163 |         DCOV[1] = DCOV[0] / sqrt(V);
164 |         else DCOV[1] = 0.0;
165 | 
166 |     free_matrix(A, n, n);
167 |     free_matrix(B, n, n);
168 |     return;
169 | }
170 | 
171 | double Akl(double **akl, double **A, int n) {
172 |     /* -computes the A_{kl} or B_{kl} distances from the
173 |         distance matrix (a_{kl}) or (b_{kl}) for dCov, dCor, dVar
174 |         dCov = mean(Akl*Bkl), dVar(X) = mean(Akl^2), etc.
175 |     */
176 |     int j, k;
177 |     double *akbar;
178 |     double abar;
179 | 
180 |     akbar = R_Calloc(n, double);
181 |     abar = 0.0;
182 |     for (k=0; k<n; k++) {
183 |         akbar[k] = 0.0;
184 |         for (j=0; j<n; j++) {
185 |             akbar[k] += akl[k][j];
186 |         }
187 |         abar += akbar[k];
188 |         akbar[k] /= (double) n;
189 |     }
190 |     abar /= (double) (n*n);
191 | 
192 |     for (k=0; k<n; k++)
193 |         for (j=k; j<n; j++) {
194 |             A[k][j] = akl[k][j] - akbar[k] - akbar[j] + abar;
195 |             A[j][k] = A[k][j];
196 |         }
197 |     R_Free(akbar);
198 |     return(abar);
199 | }
200 | 


--------------------------------------------------------------------------------
/man/eqdist.etest.Rd:
--------------------------------------------------------------------------------
  1 | \name{eqdist.etest}
  2 | \alias{eqdist.etest}
  3 | \alias{eqdist.e}
  4 | \alias{ksample.e}
  5 | \title{Multisample E-statistic (Energy) Test of Equal Distributions}
  6 | \description{
  7 |  Performs the nonparametric multisample E-statistic (energy) test
  8 |  for equality of multivariate distributions.
  9 | }
 10 | \usage{
 11 | eqdist.etest(x, sizes, distance = FALSE,
 12 |     method=c("original","discoB","discoF"), R)
 13 | eqdist.e(x, sizes, distance = FALSE,
 14 |     method=c("original","discoB","discoF"))
 15 | ksample.e(x, sizes, distance = FALSE,
 16 |     method=c("original","discoB","discoF"), ix = 1:sum(sizes))
 17 | }
 18 | \arguments{
 19 |   \item{x}{ data matrix of pooled sample}
 20 |   \item{sizes}{ vector of sample sizes}
 21 |   \item{distance}{logical: if TRUE, first argument is a distance matrix}
 22 |   \item{method}{ use original (default) or distance components (discoB, discoF)}
 23 |   \item{R}{ number of bootstrap replicates }
 24 |   \item{ix}{ a permutation of the row indices of x }
 25 | }
 26 | \details{
 27 |   The k-sample multivariate \eqn{\mathcal{E}}{E}-test of equal distributions
 28 |   is performed. The statistic is computed from the original
 29 |   pooled samples, stacked in matrix \code{x} where each row
 30 |   is a multivariate observation, or the corresponding distance matrix. The
 31 |   first \code{sizes[1]} rows of \code{x} are the first sample, the next
 32 |   \code{sizes[2]} rows of \code{x} are the second sample, etc.
 33 | 
 34 |   The test is implemented by nonparametric bootstrap, an approximate
 35 |   permutation test with \code{R} replicates.
 36 | 
 37 |   The function \code{eqdist.e} returns the test statistic only; it simply
 38 |   passes the arguments through to \code{eqdist.etest} with \code{R = 0}.
 39 | 
 40 |     The k-sample multivariate \eqn{\mathcal{E}}{E}-statistic for testing equal distributions
 41 |     is returned. The statistic is computed from the original pooled samples, stacked in
 42 |     matrix \code{x} where each row is a multivariate observation, or from the distance
 43 |     matrix \code{x} of the original data. The
 44 |     first \code{sizes[1]} rows of \code{x} are the first sample, the next
 45 |     \code{sizes[2]} rows of \code{x} are the second sample, etc.
 46 | 
 47 |     The two-sample \eqn{\mathcal{E}}{E}-statistic proposed by
 48 |     Szekely and Rizzo (2004)
 49 |     is the e-distance \eqn{e(S_i,S_j)}, defined for two samples \eqn{S_i, S_j}
 50 |     of size \eqn{n_i, n_j} by
 51 |     \deqn{e(S_i,S_j)=\frac{n_i n_j}{n_i+n_j}[2M_{ij}-M_{ii}-M_{jj}],
 52 |     }{e(S_i, S_j) = (n_i n_j)(n_i+n_j)[2M_(ij)-M_(ii)-M_(jj)],}
 53 |     where
 54 |     \deqn{M_{ij}=\frac{1}{n_i n_j}\sum_{p=1}^{n_i} \sum_{q=1}^{n_j}
 55 |        \|X_{ip}-X_{jq}\|,}{
 56 |        M_{ij} = 1/(n_i n_j) sum[1:n_i, 1:n_j] ||X_(ip) - X_(jq)||,}
 57 |        \eqn{\|\cdot\|}{|| ||} denotes Euclidean norm, and \eqn{X_{ip}}{
 58 |        X_(ip)} denotes the p-th observation in the i-th sample.
 59 | 
 60 | 
 61 |     The original (default method) k-sample
 62 |     \eqn{\mathcal{E}}{E}-statistic is defined by summing the pairwise e-distances over
 63 |     all \eqn{k(k-1)/2} pairs
 64 |     of samples:
 65 |     \deqn{\mathcal{E}=\sum_{1 \leq i < j \leq k} e(S_i,S_j).
 66 |     }{\emph{E} = sum[i<j] e(S_i,S_j).}
 67 |     Large values of \eqn{\mathcal{E}}{\emph{E}} are significant.
 68 | 
 69 |     The \code{discoB} method computes the between-sample disco statistic.
 70 |     For a one-way analysis, it is related to the original statistic as follows.
 71 |     In the above equation, the weights \eqn{\frac{n_i n_j}{n_i+n_j}}{n_i n_j/(n_i+n_j)}
 72 |     are replaced with
 73 |     \deqn{\frac{n_i + n_j}{2N}\frac{n_i n_j}{n_i+n_j} =
 74 |     \frac{n_i n_j}{2N}}{(n_i + n_j)/(2N) n_i n_j/(n_i+n_j) = n_i n_j/(2N)}
 75 |     where N is the total number of observations: \eqn{N=n_1+...+n_k}{N=n_1+...+n_k}.
 76 | 
 77 | 	The \code{discoF} method is based on the disco F ratio, while the \code{discoB}
 78 | 	method is based on the between sample component.
 79 | 
 80 |   Also see \code{disco} and \code{disco.between} functions.
 81 | }
 82 | \value{
 83 |  A list with class \code{htest} containing
 84 |  \item{method}{description of test}
 85 |  \item{statistic}{observed value of the test statistic}
 86 |  \item{p.value}{approximate p-value of the test}
 87 |  \item{data.name}{description of data}
 88 | 
 89 |  \code{eqdist.e} returns test statistic only.
 90 | }
 91 | \note{
 92 | The pairwise e-distances between samples can be conveniently
 93 | computed by the \code{edist} function, which returns a \code{dist} object.
 94 | }
 95 | \references{
 96 |  Szekely, G. J. and Rizzo, M. L. (2004) Testing for Equal
 97 |  Distributions in High Dimension, \emph{InterStat}, November (5).
 98 | 
 99 |  M. L. Rizzo and G. J. Szekely (2010).
100 |  DISCO Analysis: A Nonparametric Extension of
101 |  Analysis of Variance, Annals of Applied Statistics,
102 |  Vol. 4, No. 2, 1034-1055.
103 | \cr \doi{10.1214/09-AOAS245}
104 | 
105 |  Szekely, G. J. (2000) Technical Report 03-05:
106 |  \eqn{\mathcal{E}}{E}-statistics: Energy of
107 |  Statistical Samples, Department of Mathematics and Statistics, Bowling
108 |  Green State University.
109 | }
110 | \author{ Maria L. Rizzo \email{mrizzo@bgsu.edu} and
111 | Gabor J. Szekely
112 | }
113 | \seealso{
114 |  \code{\link{ksample.e}},
115 |  \code{\link{edist}},
116 |  \code{\link{disco}},
117 |  \code{\link{disco.between}},
118 |  \code{\link{energy.hclust}}.
119 |  }
120 | \examples{
121 |  data(iris)
122 | 
123 |  ## test if the 3 varieties of iris data (d=4) have equal distributions
124 |  eqdist.etest(iris[,1:4], c(50,50,50), R = 199)
125 | 
126 |  ## example that uses method="disco"
127 |   x <- matrix(rnorm(100), nrow=20)
128 |   y <- matrix(rnorm(100), nrow=20)
129 |   X <- rbind(x, y)
130 |   d <- dist(X)
131 | 
132 |   # should match edist default statistic
133 |   set.seed(1234)
134 |   eqdist.etest(d, sizes=c(20, 20), distance=TRUE, R = 199)
135 | 
136 |   # comparison with edist
137 |   edist(d, sizes=c(20, 10), distance=TRUE)
138 | 
139 |   # for comparison
140 |   g <- as.factor(rep(1:2, c(20, 20)))
141 |   set.seed(1234)
142 |   disco(d, factors=g, distance=TRUE, R=199)
143 | 
144 |   # should match statistic in edist method="discoB", above
145 |   set.seed(1234)
146 |   disco.between(d, factors=g, distance=TRUE, R=199)
147 | }
148 | \keyword{ multivariate }
149 | \keyword{ htest }
150 | \keyword{ nonparametric }
151 | \concept{ energy statistics}
152 | 


--------------------------------------------------------------------------------
/docs/reference/energy-deprecated.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Deprecated Functions — energy-deprecated • energy</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.7.1/jquery.min.js" integrity="sha512-v2CJ7UaYy4JwqLDIrZUI/4hqeoQieOmAZNXBeQyjo21dadnwR+8ZaIJVT8EE2iyI61OV8e6M8PP2/4hpQINQ/g==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Deprecated Functions — energy-deprecated"><meta property="og:description" content="These deprecated functions have been replaced by revised functions and will be removed in future releases of the energy package."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 | 
  7 | 
  8 |     <div class="container template-reference-topic">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">energy</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.7-12</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../reference/index.html">Reference</a>
 27 | </li>
 28 | <li>
 29 |   <a href="../news/index.html">Changelog</a>
 30 | </li>
 31 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 32 |   <a href="https://github.com/mariarizzo/energy/" class="external-link">
 33 |     <span class="fab fa-github fa-lg"></span>
 34 | 
 35 |   </a>
 36 | </li>
 37 |       </ul></div><!--/.nav-collapse -->
 38 |   </div><!--/.container -->
 39 | </div><!--/.navbar -->
 40 | 
 41 | 
 42 | 
 43 |       </header><div class="row">
 44 |   <div class="col-md-9 contents">
 45 |     <div class="page-header">
 46 |     <h1>Deprecated Functions</h1>
 47 | 
 48 |     <div class="hidden name"><code>energy-deprecated.Rd</code></div>
 49 |     </div>
 50 | 
 51 |     <div class="ref-description">
 52 |     <p>These deprecated functions have been replaced by revised functions and will be removed in future releases of the energy package.</p>
 53 |     </div>
 54 | 
 55 |     <div id="ref-usage">
 56 |     <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">DCOR</span><span class="op">(</span><span class="va">x</span>, <span class="va">y</span>, index<span class="op">=</span><span class="fl">1.0</span><span class="op">)</span></span></code></pre></div>
 57 |     </div>
 58 | 
 59 |     <div id="arguments">
 60 |     <h2>Arguments</h2>
 61 |     <p></p>
 62 | <dl><dt id="arg-x">x<a class="anchor" aria-label="anchor" href="#arg-x"></a></dt>
 63 | <dd><p>data or distances of first sample</p></dd>
 64 | 
 65 |   <dt id="arg-y">y<a class="anchor" aria-label="anchor" href="#arg-y"></a></dt>
 66 | <dd><p>data or distances of second sample</p></dd>
 67 | 
 68 |   <dt id="arg-index">index<a class="anchor" aria-label="anchor" href="#arg-index"></a></dt>
 69 | <dd><p>exponent on Euclidean distance in (0, 2)</p></dd>
 70 | 
 71 | </dl></div>
 72 |     <div id="details">
 73 |     <h2>Details</h2>
 74 |     <p>DCOR is an R version replaced by faster compiled code.</p>
 75 |     </div>
 76 | 
 77 |   </div>
 78 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
 79 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
 80 |     </nav></div>
 81 | </div>
 82 | 
 83 | 
 84 |       <footer><div class="copyright">
 85 |   <p></p><p>Developed by Maria Rizzo, Gabor Szekely.</p>
 86 | </div>
 87 | 
 88 | <div class="pkgdown">
 89 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.1.0.</p>
 90 | </div>
 91 | 
 92 |       </footer></div>
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 |   </body></html>
100 | 
101 | 


--------------------------------------------------------------------------------
/docs/reference/energy-defunct.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Distance Correlation t-test for High Dimensions — dcor.ttest • energy</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.7.1/jquery.min.js" integrity="sha512-v2CJ7UaYy4JwqLDIrZUI/4hqeoQieOmAZNXBeQyjo21dadnwR+8ZaIJVT8EE2iyI61OV8e6M8PP2/4hpQINQ/g==" crossorigin="anonymous" referrerpolicy="no-referrer"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Distance Correlation t-test for High Dimensions — dcor.ttest"><meta property="og:description" content="Defunct: use dcorT.test and dcorT."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 | 
  7 | 
  8 |     <div class="container template-reference-topic">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">energy</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.7-12</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../reference/index.html">Reference</a>
 27 | </li>
 28 | <li>
 29 |   <a href="../news/index.html">Changelog</a>
 30 | </li>
 31 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 32 |   <a href="https://github.com/mariarizzo/energy/" class="external-link">
 33 |     <span class="fab fa-github fa-lg"></span>
 34 | 
 35 |   </a>
 36 | </li>
 37 |       </ul></div><!--/.nav-collapse -->
 38 |   </div><!--/.container -->
 39 | </div><!--/.navbar -->
 40 | 
 41 | 
 42 | 
 43 |       </header><div class="row">
 44 |   <div class="col-md-9 contents">
 45 |     <div class="page-header">
 46 |     <h1>Distance Correlation t-test for High Dimensions</h1>
 47 | 
 48 |     <div class="hidden name"><code>energy-defunct.Rd</code></div>
 49 |     </div>
 50 | 
 51 |     <div class="ref-description">
 52 |     <p>Defunct: use <code>dcorT.test</code> and <code>dcorT</code>.</p>
 53 |     </div>
 54 | 
 55 |     <div id="ref-usage">
 56 |     <div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">dcor.t</span><span class="op">(</span><span class="va">x</span>, <span class="va">y</span>, distance <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span>
 57 | <span><span class="fu">dcor.ttest</span><span class="op">(</span><span class="va">x</span>, <span class="va">y</span>, distance <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span></code></pre></div>
 58 |     </div>
 59 | 
 60 |     <div id="arguments">
 61 |     <h2>Arguments</h2>
 62 |     <p></p>
 63 | <dl><dt id="arg-x">x<a class="anchor" aria-label="anchor" href="#arg-x"></a></dt>
 64 | <dd><p>data or distances of first sample</p></dd>
 65 | 
 66 |   <dt id="arg-y">y<a class="anchor" aria-label="anchor" href="#arg-y"></a></dt>
 67 | <dd><p>data or distances of second sample</p></dd>
 68 | 
 69 |   <dt id="arg-distance">distance<a class="anchor" aria-label="anchor" href="#arg-distance"></a></dt>
 70 | <dd><p>TRUE if x and y are distances, otherwise FALSE</p></dd>
 71 | 
 72 | </dl></div>
 73 |     <div id="details">
 74 |     <h2>Details</h2>
 75 |     <p>See <code><a href="dcorT.html">dcorT</a></code>.</p>
 76 |     </div>
 77 |     <div id="author">
 78 |     <h2>Author</h2>
 79 |     <p>Maria L. Rizzo <a href="mailto:mrizzo@bgsu.edu">mrizzo@bgsu.edu</a> and
 80 | Gabor J. Szekely</p>
 81 |     </div>
 82 | 
 83 |   </div>
 84 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
 85 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
 86 |     </nav></div>
 87 | </div>
 88 | 
 89 | 
 90 |       <footer><div class="copyright">
 91 |   <p></p><p>Developed by Maria Rizzo, Gabor Szekely.</p>
 92 | </div>
 93 | 
 94 | <div class="pkgdown">
 95 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.1.0.</p>
 96 | </div>
 97 | 
 98 |       </footer></div>
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 |   </body></html>
106 | 
107 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
  1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | #include <Rcpp.h>
  5 | 
  6 | using namespace Rcpp;
  7 | 
  8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
  9 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 10 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 11 | #endif
 12 | 
 13 | // Btree_sum
 14 | NumericVector Btree_sum(IntegerVector y, NumericVector z);
 15 | RcppExport SEXP _energy_Btree_sum(SEXP ySEXP, SEXP zSEXP) {
 16 | BEGIN_RCPP
 17 |     Rcpp::RObject rcpp_result_gen;
 18 |     Rcpp::RNGScope rcpp_rngScope_gen;
 19 |     Rcpp::traits::input_parameter< IntegerVector >::type y(ySEXP);
 20 |     Rcpp::traits::input_parameter< NumericVector >::type z(zSEXP);
 21 |     rcpp_result_gen = Rcpp::wrap(Btree_sum(y, z));
 22 |     return rcpp_result_gen;
 23 | END_RCPP
 24 | }
 25 | // calc_dist
 26 | NumericMatrix calc_dist(NumericMatrix x);
 27 | RcppExport SEXP _energy_calc_dist(SEXP xSEXP) {
 28 | BEGIN_RCPP
 29 |     Rcpp::RObject rcpp_result_gen;
 30 |     Rcpp::RNGScope rcpp_rngScope_gen;
 31 |     Rcpp::traits::input_parameter< NumericMatrix >::type x(xSEXP);
 32 |     rcpp_result_gen = Rcpp::wrap(calc_dist(x));
 33 |     return rcpp_result_gen;
 34 | END_RCPP
 35 | }
 36 | // U_product
 37 | double U_product(NumericMatrix U, NumericMatrix V);
 38 | RcppExport SEXP _energy_U_product(SEXP USEXP, SEXP VSEXP) {
 39 | BEGIN_RCPP
 40 |     Rcpp::RObject rcpp_result_gen;
 41 |     Rcpp::RNGScope rcpp_rngScope_gen;
 42 |     Rcpp::traits::input_parameter< NumericMatrix >::type U(USEXP);
 43 |     Rcpp::traits::input_parameter< NumericMatrix >::type V(VSEXP);
 44 |     rcpp_result_gen = Rcpp::wrap(U_product(U, V));
 45 |     return rcpp_result_gen;
 46 | END_RCPP
 47 | }
 48 | // D_center
 49 | NumericMatrix D_center(NumericMatrix Dx);
 50 | RcppExport SEXP _energy_D_center(SEXP DxSEXP) {
 51 | BEGIN_RCPP
 52 |     Rcpp::RObject rcpp_result_gen;
 53 |     Rcpp::RNGScope rcpp_rngScope_gen;
 54 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP);
 55 |     rcpp_result_gen = Rcpp::wrap(D_center(Dx));
 56 |     return rcpp_result_gen;
 57 | END_RCPP
 58 | }
 59 | // U_center
 60 | NumericMatrix U_center(NumericMatrix Dx);
 61 | RcppExport SEXP _energy_U_center(SEXP DxSEXP) {
 62 | BEGIN_RCPP
 63 |     Rcpp::RObject rcpp_result_gen;
 64 |     Rcpp::RNGScope rcpp_rngScope_gen;
 65 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP);
 66 |     rcpp_result_gen = Rcpp::wrap(U_center(Dx));
 67 |     return rcpp_result_gen;
 68 | END_RCPP
 69 | }
 70 | // dcovU_stats
 71 | NumericVector dcovU_stats(NumericMatrix Dx, NumericMatrix Dy);
 72 | RcppExport SEXP _energy_dcovU_stats(SEXP DxSEXP, SEXP DySEXP) {
 73 | BEGIN_RCPP
 74 |     Rcpp::RObject rcpp_result_gen;
 75 |     Rcpp::RNGScope rcpp_rngScope_gen;
 76 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP);
 77 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dy(DySEXP);
 78 |     rcpp_result_gen = Rcpp::wrap(dcovU_stats(Dx, Dy));
 79 |     return rcpp_result_gen;
 80 | END_RCPP
 81 | }
 82 | // kgroups_start
 83 | List kgroups_start(NumericMatrix x, int k, IntegerVector clus, int iter_max, bool distance);
 84 | RcppExport SEXP _energy_kgroups_start(SEXP xSEXP, SEXP kSEXP, SEXP clusSEXP, SEXP iter_maxSEXP, SEXP distanceSEXP) {
 85 | BEGIN_RCPP
 86 |     Rcpp::RObject rcpp_result_gen;
 87 |     Rcpp::RNGScope rcpp_rngScope_gen;
 88 |     Rcpp::traits::input_parameter< NumericMatrix >::type x(xSEXP);
 89 |     Rcpp::traits::input_parameter< int >::type k(kSEXP);
 90 |     Rcpp::traits::input_parameter< IntegerVector >::type clus(clusSEXP);
 91 |     Rcpp::traits::input_parameter< int >::type iter_max(iter_maxSEXP);
 92 |     Rcpp::traits::input_parameter< bool >::type distance(distanceSEXP);
 93 |     rcpp_result_gen = Rcpp::wrap(kgroups_start(x, k, clus, iter_max, distance));
 94 |     return rcpp_result_gen;
 95 | END_RCPP
 96 | }
 97 | // Istat
 98 | double Istat(NumericMatrix Dx, NumericMatrix Dy);
 99 | RcppExport SEXP _energy_Istat(SEXP DxSEXP, SEXP DySEXP) {
100 | BEGIN_RCPP
101 |     Rcpp::RObject rcpp_result_gen;
102 |     Rcpp::RNGScope rcpp_rngScope_gen;
103 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP);
104 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dy(DySEXP);
105 |     rcpp_result_gen = Rcpp::wrap(Istat(Dx, Dy));
106 |     return rcpp_result_gen;
107 | END_RCPP
108 | }
109 | // Istats
110 | NumericVector Istats(NumericMatrix Dx, NumericMatrix Dy, int R);
111 | RcppExport SEXP _energy_Istats(SEXP DxSEXP, SEXP DySEXP, SEXP RSEXP) {
112 | BEGIN_RCPP
113 |     Rcpp::RObject rcpp_result_gen;
114 |     Rcpp::RNGScope rcpp_rngScope_gen;
115 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP);
116 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dy(DySEXP);
117 |     Rcpp::traits::input_parameter< int >::type R(RSEXP);
118 |     rcpp_result_gen = Rcpp::wrap(Istats(Dx, Dy, R));
119 |     return rcpp_result_gen;
120 | END_RCPP
121 | }
122 | // partial_dcor
123 | NumericVector partial_dcor(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz);
124 | RcppExport SEXP _energy_partial_dcor(SEXP DxSEXP, SEXP DySEXP, SEXP DzSEXP) {
125 | BEGIN_RCPP
126 |     Rcpp::RObject rcpp_result_gen;
127 |     Rcpp::RNGScope rcpp_rngScope_gen;
128 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP);
129 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dy(DySEXP);
130 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dz(DzSEXP);
131 |     rcpp_result_gen = Rcpp::wrap(partial_dcor(Dx, Dy, Dz));
132 |     return rcpp_result_gen;
133 | END_RCPP
134 | }
135 | // partial_dcov
136 | double partial_dcov(NumericMatrix Dx, NumericMatrix Dy, NumericMatrix Dz);
137 | RcppExport SEXP _energy_partial_dcov(SEXP DxSEXP, SEXP DySEXP, SEXP DzSEXP) {
138 | BEGIN_RCPP
139 |     Rcpp::RObject rcpp_result_gen;
140 |     Rcpp::RNGScope rcpp_rngScope_gen;
141 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP);
142 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dy(DySEXP);
143 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dz(DzSEXP);
144 |     rcpp_result_gen = Rcpp::wrap(partial_dcov(Dx, Dy, Dz));
145 |     return rcpp_result_gen;
146 | END_RCPP
147 | }
148 | // poisMstat
149 | NumericVector poisMstat(IntegerVector x);
150 | RcppExport SEXP _energy_poisMstat(SEXP xSEXP) {
151 | BEGIN_RCPP
152 |     Rcpp::RObject rcpp_result_gen;
153 |     Rcpp::RNGScope rcpp_rngScope_gen;
154 |     Rcpp::traits::input_parameter< IntegerVector >::type x(xSEXP);
155 |     rcpp_result_gen = Rcpp::wrap(poisMstat(x));
156 |     return rcpp_result_gen;
157 | END_RCPP
158 | }
159 | // projection
160 | NumericMatrix projection(NumericMatrix Dx, NumericMatrix Dz);
161 | RcppExport SEXP _energy_projection(SEXP DxSEXP, SEXP DzSEXP) {
162 | BEGIN_RCPP
163 |     Rcpp::RObject rcpp_result_gen;
164 |     Rcpp::RNGScope rcpp_rngScope_gen;
165 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dx(DxSEXP);
166 |     Rcpp::traits::input_parameter< NumericMatrix >::type Dz(DzSEXP);
167 |     rcpp_result_gen = Rcpp::wrap(projection(Dx, Dz));
168 |     return rcpp_result_gen;
169 | END_RCPP
170 | }
171 | 


--------------------------------------------------------------------------------
/R/disco.R:
--------------------------------------------------------------------------------
  1 | ### disco tests - implementation of DIStance COmponents methods in:
  2 | ###
  3 | ### Rizzo, M.L. and Szekely, G.J. (2010) "DISCO Analysis: A Nonparametric
  4 | ### Extension of Analysis of Variance, Annals of Applied Statistics
  5 | ###  Vol. 4, No. 2, 1034-1055.
  6 | ###
  7 | ### disco: computes the decomposition and test using F ratio
  8 | ### disco.between: statistic and test using between component
  9 | ### .disco1: internal computations for one factor
 10 | ### .disco1stat, .disco1Bstat: internal for boot function
 11 | ###
 12 | ###
 13 | 
 14 | 
 15 | 
 16 | disco <- function(x, factors, distance = FALSE, index = 1, R,
 17 |                   method = c("disco", "discoB", "discoF")) {
 18 |   ## x is response or Euclidean distance matrix or dist() object factors
 19 |   ## is a matrix or data frame of group labels distance=TRUE if x is
 20 |   ## distance, otherwise FALSE index is the exponent on distance, in (0,2]
 21 |   ## R is number of replicates for test method: use F ratio (default) or
 22 |   ## between component (discoB) disco method is currently alias for discoF
 23 |   
 24 |   
 25 |   method <- match.arg(method)
 26 |   factors <- data.frame(factors)
 27 |   if (inherits(x, "dist")) distance <- TRUE
 28 |   if (method == "discoB")
 29 |     return(disco.between(x, factors = factors, distance = distance,
 30 |                          index = index, R = R))
 31 |   nfactors <- NCOL(factors)
 32 |   if (distance || inherits(x, "dist"))
 33 |     dst <- as.matrix(x) else dst <- as.matrix(dist(x))
 34 |   N <- NROW(dst)
 35 |   if (NCOL(dst) != N)
 36 |     stop("distance==TRUE but first argument is not distance")
 37 |   if (!isTRUE(all.equal(index, 1)))
 38 |     dst <- dst^index
 39 |   
 40 |   stats <- matrix(0, nfactors, 6)
 41 |   colnames(stats) <- c("Trt", "Within", "df1", "df2", "Stat", "p-value")
 42 |   
 43 |   for (j in 1:nfactors) {
 44 |     trt <- factors[, j]
 45 |     stats[j, 1:4] <- .disco1(trt = trt, dst = dst)
 46 |     if (R > 0) {
 47 |       b <- boot::boot(data = dst, statistic = .disco1stat, sim = "permutation",
 48 |                       R = R, trt = trt)
 49 |       stats[j, 5] <- b$t0
 50 |       stats[j, 6] <- (sum(b$t > b$t0) + 1)/(R + 1)
 51 |     } else {
 52 |       stats[j, 5] <- .disco1stat(dst, i = 1:nrow(dst), trt = trt)
 53 |       stats[j, 6] <- NA
 54 |     }
 55 |   }
 56 |   
 57 |   methodname <- "DISCO (F ratio)"
 58 |   dataname <- deparse(substitute(x))
 59 |   total <- sum(stats[1, 1:2])
 60 |   within <- total - sum(stats[, 1])
 61 |   Df.trt <- stats[, 3]
 62 |   factor.names <- names(factors)
 63 |   factor.levels <- sapply(factors, nlevels)
 64 |   sizes <- sapply(factors, tabulate)
 65 |   e <- list(call = match.call(), method = methodname,
 66 |             statistic = stats[, 5],
 67 |             p.value = stats[, 6],
 68 |             k = nfactors,
 69 |             N = N,
 70 |             between = stats[, 1],
 71 |             withins = stats[, 2],
 72 |             within = within,
 73 |             total = total,
 74 |             Df.trt = Df.trt,
 75 |             Df.e = nrow(dst) - sum(Df.trt) - 1,
 76 |             index = index, factor.names = factor.names,
 77 |             factor.levels = factor.levels,
 78 |             sample.sizes = sizes, stats = stats)
 79 |   class(e) <- "disco"
 80 |   e
 81 | }
 82 | 
 83 | disco.between <- function(x, factors, distance = FALSE, index = 1, R) {
 84 |   ## disco test based on the between-sample component similar to disco
 85 |   ## except that 'disco' test is based on the F ratio disco.between test
 86 |   ## for one factor (balanced) is asymptotically equivalent to k-sample E
 87 |   ## test (test statistics are proportional in that case but not in
 88 |   ## general).  x is response or Euclidean distance matrix or dist()
 89 |   ## object factors is a matrix or data frame of group labels
 90 |   ## distance=TRUE if x is distance, otherwise FALSE index is the exponent
 91 |   ## on distance, in (0,2]
 92 |   
 93 |   factors <- data.frame(factors)
 94 |   nfactors <- NCOL(factors)
 95 |   if (nfactors > 1)
 96 |     stop("More than one factor is not implemented in disco.between")
 97 |   if (distance || inherits(x, "dist"))
 98 |     dst <- as.matrix(x) else dst <- as.matrix(dist(x))
 99 |   N <- NROW(dst)
100 |   if (NCOL(dst) != N)
101 |     stop("distance==TRUE but first argument is not distance")
102 |   if (!isTRUE(all.equal(index, 1)))
103 |     dst <- dst^index
104 |   
105 |   trt <- factors[, 1]
106 |   if (R > 0) {
107 |     b <- boot::boot(data = dst, statistic = .disco1Bstat, sim = "permutation",
108 |                     R = R, trt = trt)
109 |     between <- b$t0
110 |     reps <- b$t
111 |     pval <- (1+sum(reps > between)) / (R+1)
112 |   } else {
113 |     between <- .disco1Bstat(dst, i = 1:nrow(dst), trt = trt)
114 |     pval <- NA
115 |   }
116 |   if (R == 0)
117 |     return(between)
118 |   
119 |   methodname <- "DISCO (Between-sample)"
120 |   dataname <- deparse(substitute(x))
121 |   
122 |   names(between) <- "DISCO between statistic"
123 |   e <- list(call = match.call(), method = methodname, statistic = between,
124 |             p.value = pval, data.name = dataname)
125 |   
126 |   class(e) <- "htest"
127 |   e
128 | }
129 | 
130 | .disco1 <- function(trt, dst) {
131 |   ## dst is Euclidean distance matrix or power of it trt is the treatment,
132 |   ## a factor
133 |   
134 |   trt <- factor(trt)
135 |   k <- nlevels(trt)
136 |   n <- tabulate(trt)
137 |   N <- sum(n)
138 |   total <- sum(dst)/(2 * N)
139 |   y <- as.vector(dst[, 1])
140 |   M <- model.matrix(y ~ 0 + trt)
141 |   G <- t(M) %*% dst %*% M
142 |   withins <- diag(G)/(2 * n)
143 |   W <- sum(withins)
144 |   B <- total - W
145 |   c(B, W, k - 1, N - k)
146 | }
147 | 
148 | .disco1stat <- function(dst, i, trt) {
149 |   ## i is permuation vector supplied by bootstrap dst is Euclidean
150 |   ## distance matrix or power of it trt is the treatment, a factor returns
151 |   ## the disco 'F' ratio
152 |   idx <- 1:nrow(dst)
153 |   d <- .disco1(trt = trt[idx[i]], dst = dst)
154 |   statistic <- (d[1]/d[3])/(d[2]/d[4])
155 | }
156 | 
157 | .disco1Bstat <- function(dst, i, trt) {
158 |   ## i is permuation vector supplied by bootstrap dst is Euclidean
159 |   ## distance matrix or power of it trt is the treatment, a factor returns
160 |   ## the between-sample component (for one factor)
161 |   idx <- 1:nrow(dst)
162 |   .disco1(trt = trt[idx[i]], dst = dst)[1]
163 | }
164 | 
165 | print.disco <- function(x, ...) {
166 |   k <- x$k
167 |   md1 <- x$between/x$Df.trt
168 |   md2 <- x$within/x$Df.e
169 |   f0 <- x$statistic
170 |   print(x$call)
171 |   cat(sprintf("\nDistance Components: index %5.2f\n", x$index))
172 |   cat(sprintf("%-15s %4s %10s %10s %9s %9s\n", "Source", "Df", "Sum Dist",
173 |               "Mean Dist", "F-ratio", "p-value"))
174 |   fabb <- abbreviate(x$factor.names, minlength=12)
175 |   for (i in 1:k) {
176 |     fname <- fabb[i]
177 |     cat(sprintf("%-15s %4d %10.5f %10.5f %9.3f %9s\n", fname, x$Df.trt[i],
178 |                 x$between[i], md1[i], f0[i], format.pval(x$p.value[i])))
179 |   }
180 |   cat(sprintf("%-15s %4d %10.5f %10.5f\n", "Within", x$Df.e, x$within,
181 |               md2))
182 |   cat(sprintf("%-15s %4d %10.5f\n", "Total", x$N - 1, x$total))
183 | }
184 | 


--------------------------------------------------------------------------------
/src/utilities.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |    utilities.c: some utilities for the energy package
  3 | 
  4 |    Author: Maria L. Rizzo
  5 |    github.com/mariarizzo/energy
  6 | 
  7 |    alloc_matrix, alloc_int_matrix, free_matrix, free_int_matrix:
  8 |      use R_Calloc, R_Free instead of Calloc, Free for memory management
  9 | 
 10 |    permute            permutes the first n elements of an integer vector
 11 |    row_order          converts arg from column order to row order
 12 |    vector2matrix      copies double* arg into double** arg
 13 |    distance           computes Euclidean distance matrix from double**
 14 |    Euclidean_distance computes Euclidean distance matrix from double*
 15 |    index_distance     computes Euclidean distance matrix D then D^index
 16 |    sumdist            sums the distance matrix without creating the matrix
 17 | 
 18 |    Notes:
 19 |    1. index_distance (declaration and body of the function) revised in
 20 |       energy 1.3-0, 2/2011.
 21 | */
 22 | 
 23 | #include <R.h>
 24 | #include <Rmath.h>
 25 | 
 26 | double **alloc_matrix(int r, int c);
 27 | int    **alloc_int_matrix(int r, int c);
 28 | void   free_matrix(double **matrix, int r, int c);
 29 | void   free_int_matrix(int **matrix, int r, int c);
 30 | 
 31 | void   permute(int *J, int n);
 32 | void   permute_check(int *J, int *N);
 33 | void   roworder(double *x, int *byrow, int r, int c);
 34 | void   vector2matrix(double *x, double **y, int N, int d, int isroworder);
 35 | 
 36 | void   distance(double **bxy, double **D, int N, int d);
 37 | void   Euclidean_distance(double *x, double **Dx, int n, int d);
 38 | void   index_distance(double **Dx, int n, double index);
 39 | void   sumdist(double *x, int *byrow, int *nrow, int *ncol, double *lowersum);
 40 | 
 41 | 
 42 | 
 43 | double **alloc_matrix(int r, int c)
 44 | {
 45 |     /* allocate a matrix with r rows and c columns */
 46 |     int i;
 47 |     double **matrix;
 48 |     matrix = R_Calloc(r, double *);
 49 |     for (i = 0; i < r; i++)
 50 |     matrix[i] = R_Calloc(c, double);
 51 |     return matrix;
 52 | }
 53 | 
 54 | 
 55 | int **alloc_int_matrix(int r, int c)
 56 | {
 57 |     /* allocate an integer matrix with r rows and c columns */
 58 |     int i;
 59 |     int **matrix;
 60 |     matrix = R_Calloc(r, int *);
 61 |     for (i = 0; i < r; i++)
 62 |     matrix[i] = R_Calloc(c, int);
 63 |     return matrix;
 64 | }
 65 | 
 66 | void free_matrix(double **matrix, int r, int c)
 67 | {
 68 |     /* free a matrix with r rows and c columns */
 69 |     int i;
 70 |     for (i = 0; i < r; i++) R_Free(matrix[i]);
 71 |     R_Free(matrix);
 72 | }
 73 | 
 74 | void free_int_matrix(int **matrix, int r, int c)
 75 | {
 76 |     /* free an integer matrix with r rows and c columns */
 77 |     int i;
 78 |     for (i = 0; i < r; i++) R_Free(matrix[i]);
 79 |     R_Free(matrix);
 80 | }
 81 | 
 82 | void permute(int *J, int n)
 83 | {
 84 |     /*
 85 |        permute the first n integers of J
 86 |        if n is length(J), returns a permutation vector
 87 |        equal to rev(Rcpp::sample(n, n, false))
 88 |     */
 89 |     int i, j, j0, m=n;
 90 |     for (i=0; i<n-1; i++) {
 91 | 		j = floor(runif(0.0, (double) m));
 92 |         m--;
 93 |         j0 = J[j];
 94 |         J[j] = J[m];
 95 |         J[m] = j0;
 96 |     }
 97 | }
 98 | 
 99 | void permute_check(int *J, int *N)
100 | {
101 |     /*
102 | 	  the permute function can be called from R using this wrapper
103 |     for debugging purposes 
104 |     */
105 | 	int n = *N;
106 | 	permute(J, n);
107 | }
108 | 
109 | void vector2matrix(double *x, double **y, int N, int d, int isroworder) {
110 |     /* copy a d-variate sample into a matrix, N samples in rows */
111 |     int i, k;
112 |     if (isroworder == TRUE) {
113 |         for (k=0; k<d; k++)
114 |             for (i=0; i<N; i++)
115 |                 y[i][k] = (*(x+i*d+k));
116 |         }
117 |     else {
118 |         for (k=0; k<N; k++)
119 |             for (i=0; i<d; i++)
120 |                 y[i][k] = (*(x+k*N+i));
121 |         }
122 |     return;
123 | }
124 | 
125 | 
126 | void roworder(double *x, int *byrow, int r, int c) {
127 |     /*
128 |       utility to convert a vector from column order to row order
129 |       assume that x is r by c matrix as a vector in column order
130 |     */
131 |     int    i, j, k, n=r*c;
132 |     double *y;
133 |     if (*byrow == TRUE) return;
134 |     y = R_Calloc(n, double);
135 |     i = 0;
136 |     for (j=0; j<r; j++) {
137 |         for (k=0; k<n; k+=r) {
138 |             y[i] = x[k+j];
139 |             i++;
140 |         }
141 |     }
142 |     for (i=0; i<n; i++)
143 |         x[i] = y[i];
144 |     R_Free(y);
145 |     *byrow = TRUE;
146 |     return;
147 | }
148 | 
149 | 
150 | void distance(double **data, double **D, int N, int d) {
151 |     /*
152 |        compute the distance matrix of sample in N by d matrix data
153 |        equivalent R code is:  D <- as.matrix(dist(data))
154 |     */
155 |     int    i, j, k;
156 |     double dif;
157 |     for (i=0; i<N; i++) {
158 |         D[i][i] = 0.0;
159 |         for (j=i+1; j<N; j++) {
160 |             D[i][j] = 0.0;
161 |             for (k=0; k<d; k++) {
162 |                 dif = data[i][k] - data[j][k];
163 |                 D[i][j] += dif*dif;
164 |             }
165 |             D[i][j] = sqrt(D[i][j]);
166 |             D[j][i] = D[i][j];
167 |         }
168 |     }
169 |     return;
170 | }
171 | 
172 | 
173 | void Euclidean_distance(double *x, double **Dx, int n, int d)
174 | {
175 |     /*
176 |         interpret x as an n by d matrix, in row order (n vectors in R^d)
177 |         compute the Euclidean distance matrix Dx
178 |     */
179 |     int i, j, k, p, q;
180 |     double dsum, dif;
181 |     for (i=1; i<n; i++) {
182 |         Dx[i][i] = 0.0;
183 |         p = i*d;
184 |         for (j=0; j<i; j++) {
185 |             dsum = 0.0;
186 |             q = j*d;
187 |             for (k=0; k<d; k++) {
188 |                 dif = *(x+p+k) - *(x+q+k);
189 |                 dsum += dif*dif;
190 |             }
191 |             Dx[i][j] = Dx[j][i] = sqrt(dsum);
192 |         }
193 |     }
194 | }
195 | 
196 | 
197 | void index_distance(double **Dx, int n, double index)
198 | {
199 |     /*
200 |         Dx is an n by n Euclidean distance matrix
201 |         if index NEQ 1, compute D^index
202 |     */
203 |     int i, j;
204 | 
205 |     if (fabs(index - 1) > DBL_EPSILON) {
206 |         for (i=0; i<n; i++)
207 |             for (j=i+1; j<n; j++) {
208 |                 Dx[i][j] = R_pow(Dx[i][j], index);
209 |                 Dx[j][i] = Dx[i][j];
210 |             }
211 |     }
212 | }
213 | 
214 | 
215 | void sumdist(double *x, int *byrow, int *nrow, int *ncol, double *lowersum)
216 | {
217 |     /*
218 |        sum all pairwise distances between rows of x
219 |        equivalent to this in R:  h <- sum(dist(x))
220 |        x must be in row order: x=as.double(t(x))
221 |     */
222 | 
223 |     int i, j, k, p, q, n=(*nrow), d=(*ncol);
224 |     double sum, dsum, dif;
225 |     if (*byrow == FALSE)
226 |         roworder(x, byrow, n, d);
227 |     sum = 0.0;
228 |     for (i=1; i<n; i++) {
229 |         p = i*d;
230 |         for (j=0; j<i; j++) {
231 |             dsum = 0.0;
232 |             q = j*d;
233 |             for (k=0; k<d; k++) {
234 |                 dif = *(x+p+k) - *(x+q+k);
235 |                 dsum += dif*dif;
236 |             }
237 |             sum += sqrt(dsum);
238 |         }
239 |     }
240 |     (*lowersum) = sum;
241 | }
242 | 
243 | 


--------------------------------------------------------------------------------