├── tests ├── ex1.Rout.save ├── sim1.Rout.save ├── mechler-ex.Rout.save ├── mechler-ex.R ├── sim1.R ├── sim1.Rout.save-32b ├── ex1.R ├── sim1.Rout.save-64b ├── mechler-ex.Rout.save-32b ├── mechler-ex.Rout.save-64b ├── ex1.Rout.save-32b └── ex1.Rout.save-64b ├── inst ├── doc │ └── hist-D11.rda ├── extraData │ └── qDiptab.rds └── NEWS.Rd ├── .Rinstignore ├── ChangeLog ├── data ├── qDiptab.R ├── exHartigan.R └── statfaculty.R ├── stuff ├── statlib_217_v2005.f ├── minProb.rds ├── asymp-res.rda ├── hist-D11.rda ├── d10k_do.R ├── d16k_do.R ├── d20k_do.R ├── d24k_do.R ├── d32k_do.R ├── d36k_do.R ├── d40k_do.R ├── d44k_do.R ├── d52k_do.R ├── d60k_do.R ├── d6k_do.R ├── d72k_do.R ├── d8k_do.R ├── d12k_do.R ├── sim-minProb.R ├── dip_N_do.R ├── qdipTab-use.R ├── new-simul1e5.R ├── Stuetzle-stat593-S2003-olive.doc ├── new-simul.R ├── qdipTab-large-n-expand.R ├── sim-minProb.Rout ├── minProb-anal.R ├── jeremy-unimodality-olives.R ├── pre-bugfix-2003-11 │ ├── dip.S │ └── dipF-statlib_2005-08-04.f ├── new-sim-analysis.R ├── unimodality.Rnw ├── asymp-distrib.R ├── jeremy-unimodality.R └── Stuetzle-stat593-S2003-olive.dat ├── .gitignore ├── .Rbuildignore ├── NAMESPACE ├── man ├── exHartigan.Rd ├── statfaculty.Rd ├── plot.dip.Rd ├── qDiptab.Rd ├── dip.test.Rd └── dip.Rd ├── TODO.md ├── DESCRIPTION ├── README.md ├── R ├── dipTest.R └── dip.R ├── vignettes ├── diptest.bib ├── myVignette.sty └── diptest-issues.Rnw └── src └── dip.c /tests/ex1.Rout.save: -------------------------------------------------------------------------------- 1 | ex1.Rout.save-64b -------------------------------------------------------------------------------- /tests/sim1.Rout.save: -------------------------------------------------------------------------------- 1 | sim1.Rout.save-64b -------------------------------------------------------------------------------- /inst/doc/hist-D11.rda: -------------------------------------------------------------------------------- 1 | ../../stuff/hist-D11.rda -------------------------------------------------------------------------------- /.Rinstignore: -------------------------------------------------------------------------------- 1 | inst/doc/Makefile 2 | inst/doc/.*sty 3 | -------------------------------------------------------------------------------- /tests/mechler-ex.Rout.save: -------------------------------------------------------------------------------- 1 | mechler-ex.Rout.save-64b -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmaechler/diptest/HEAD/ChangeLog -------------------------------------------------------------------------------- /data/qDiptab.R: -------------------------------------------------------------------------------- 1 | qDiptab <- diptest:::rdRDS("extraData", "qDiptab.rds") 2 | -------------------------------------------------------------------------------- /stuff/statlib_217_v2005.f: -------------------------------------------------------------------------------- 1 | pre-bugfix-2003-11/dipF-statlib_2005-08-04.f -------------------------------------------------------------------------------- /stuff/minProb.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmaechler/diptest/HEAD/stuff/minProb.rds -------------------------------------------------------------------------------- /stuff/asymp-res.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmaechler/diptest/HEAD/stuff/asymp-res.rda -------------------------------------------------------------------------------- /stuff/hist-D11.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmaechler/diptest/HEAD/stuff/hist-D11.rda -------------------------------------------------------------------------------- /inst/extraData/qDiptab.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mmaechler/diptest/HEAD/inst/extraData/qDiptab.rds -------------------------------------------------------------------------------- /stuff/d10k_do.R: -------------------------------------------------------------------------------- 1 | N <- 10000 2 | n.sim <- 1000001 3 | set.seed(14) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d16k_do.R: -------------------------------------------------------------------------------- 1 | N <- 16000 2 | n.sim <- 1000001 3 | set.seed(117) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d20k_do.R: -------------------------------------------------------------------------------- 1 | N <- 20000 2 | n.sim <- 1000001 3 | set.seed(12) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d24k_do.R: -------------------------------------------------------------------------------- 1 | N <- 24000 2 | n.sim <- 1000001 3 | set.seed(14) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d32k_do.R: -------------------------------------------------------------------------------- 1 | N <- 32000 2 | n.sim <- 1000001 3 | set.seed(14) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d36k_do.R: -------------------------------------------------------------------------------- 1 | N <- 36000 2 | n.sim <- 1000001 3 | set.seed(47) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d40k_do.R: -------------------------------------------------------------------------------- 1 | N <- 40000 2 | n.sim <- 1000001 3 | set.seed(67) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d44k_do.R: -------------------------------------------------------------------------------- 1 | N <- 44000 2 | n.sim <- 1000001 3 | set.seed(14) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d52k_do.R: -------------------------------------------------------------------------------- 1 | N <- 52000 2 | n.sim <- 1000001 3 | set.seed(109) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d60k_do.R: -------------------------------------------------------------------------------- 1 | N <- 60000 2 | n.sim <- 1000001 3 | set.seed(97) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d6k_do.R: -------------------------------------------------------------------------------- 1 | N <- 6000 2 | n.sim <- 1000001 3 | set.seed(64) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d72k_do.R: -------------------------------------------------------------------------------- 1 | N <- 72000 2 | n.sim <- 1000001 3 | set.seed(43) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /stuff/d8k_do.R: -------------------------------------------------------------------------------- 1 | N <- 20000 2 | n.sim <- 1000001 3 | set.seed(12) 4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE) 5 | 6 | -------------------------------------------------------------------------------- /data/exHartigan.R: -------------------------------------------------------------------------------- 1 | message("'exHartigan' data is identical to 'statfaculty' and hence deprecated.", 2 | "\n Use the 'statfaculty' instead") 3 | source("statfaculty.R") 4 | exHartigan <- statfaculty 5 | -------------------------------------------------------------------------------- /data/statfaculty.R: -------------------------------------------------------------------------------- 1 | statfaculty <- 2 | c(30,33,35,36,37,37,39,39,39,39,39,40,40,40,40,41,42,43,43,43,44,44,45,45,46, 3 | 46,47,47,48,48,48,49,50,50,51,52,52,53,53,53,53,53,54,54,57,57,59,60,60,60, 4 | 61,61,61,61,62,62,62,62,63,66,70,72,72) 5 | 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Archive*.txt 2 | *M1mac* 3 | diptest*.out 4 | stuff/pre-bugfix-2003-11 5 | stuff/*.out 6 | stuff/*.tex 7 | stuff/*.Rout* 8 | stuff/dip[0-9]*.rda 9 | stuff/dipSim*.rda 10 | inst/doc/diptest-issues* 11 | inst/doc/*.bib 12 | inst/doc/*.sty 13 | TAGS 14 | *~ 15 | *,v* 16 | *-ss 17 | vignettes/cache/* 18 | .Rhistory 19 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ## -*-sh-*- using reg.exp (==> no initial or final '.*'), but case is ignored! 2 | 3 | stuff 4 | ^M1mac 5 | ^TODO 6 | ,v$ 7 | -ss$ 8 | tests/.*64b 9 | tests/.*32b 10 | vignettes/auto 11 | inst/doc/auto 12 | vignettes/.*\.(tex|toc|aux|log|bbl|blg|out)$ 13 | inst/doc/.*\.(tex|toc|aux|log|bbl|blg|out)$ 14 | inst/doc/jss. 15 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(diptest, .registration=TRUE)# <--> src/dip.c 2nd part 2 | 3 | importFrom("graphics", 4 | abline, axis, 5 | legend, lines, 6 | par, 7 | title) 8 | 9 | importFrom("stats", 10 | approx, complete.cases, ecdf, runif) 11 | 12 | export(dip) 13 | export(dip.test) 14 | ## export(aLine, getCM) 15 | 16 | S3method(print, dip) 17 | S3method(plot, dip) 18 | 19 | -------------------------------------------------------------------------------- /man/exHartigan.Rd: -------------------------------------------------------------------------------- 1 | \name{exHartigan} 2 | \alias{exHartigan} 3 | \title{Hartigan's Artificial n-modal Example Data Set} 4 | \description{ 5 | 63 (integer) numbers; unimodal or bimodal, that's the question. 6 | 7 | This is now \emph{deprecated}. 8 | Please use \code{\link{statfaculty}} instead! 9 | } 10 | \examples{ 11 | data(exHartigan) 12 | plot(dH <- density(exHartigan)) 13 | rug(exHartigan)# should jitter 14 | } 15 | \keyword{data} 16 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | - The vignette `inst/doc/diptest-issues.Rnw` 2 | has mentioned the "new" (May 2011) dip.test() function; 3 | Still have not finalized *analyzing* the simulations in ./stuff/ 4 | 5 | - Consider an analogue qnormDiptab which is constructed using 6 | `rnorm(.)` instead of `runif(.)` simulations. 7 | - This idea is old; Werner Stuetzle's student, Jeremy Tantrum, 8 | did things in this direction in 2003; see, `stuff/jeremy-unimodality.R` 9 | 10 | - Visualize the l.c.m. and g.c.m. and the modal interval ! 11 | g.c.m = greatest convex minorant =: G(x) 12 | l.c.m = least concave majorant =: H(x) 13 | -------------------------------------------------------------------------------- /stuff/d12k_do.R: -------------------------------------------------------------------------------- 1 | stopifnot(require("diptest")) 2 | setwd("/u/maechler/R/Pkgs/diptest/stuff") 3 | 4 | N <- 12000 5 | n.sim <- 1000001 6 | dd <- numeric(n.sim) 7 | .pt <- proc.time() 8 | 9 | set.seed(12) 10 | for(i in 1:n.sim) { 11 | dd[i] <- dip(runif(N)) 12 | if(i %% 100 == 0) { 13 | cat(".") 14 | if(i %% 10000 == 0) { 15 | cat("",i,"\n") 16 | .opt <- .pt ; .pt <- proc.time(); .pt - .opt 17 | } 18 | } 19 | } 20 | 21 | nam <- paste("dip", floor(N/1000),"k", sep='') 22 | assign(nam, dd) 23 | (outf <- paste(nam, "rda", sep=".")) 24 | save(list = c(nam, "N"), file = outf) 25 | -------------------------------------------------------------------------------- /tests/mechler-ex.R: -------------------------------------------------------------------------------- 1 | library(diptest) 2 | ## These are from 3 | ## the 217-readme.doc file that explains the bug fixed by 4 | ## Ferenc Mechler (fmechler@med.cornell.edu). [5/Sep/2002] 5 | ## 6 | ex1 <- c(0.0198, 0.0198, 0.1961, 0.2898, 0.3184, 0.3687, 7 | 0.4336, 0.4987, 0.5661, 0.6530, 0.7476, 0.8555) 8 | 9 | ex2 <- c(0.0198, 0.1961, 0.2898, 0.3184, 0.3687, 0.4336, 10 | 0.4987, 0.5661, 0.6530, 0.7476, 0.8555, 0.9912) 11 | 12 | ## Multiply them by 10000 here: 13 | 14 | (D1 <- dip(10000*ex1, full=TRUE, debug=2)) 15 | str(D1, digits = 10, vec.len = 12) 16 | 17 | (D2 <- dip(10000*ex2, full=TRUE, debug=2)) 18 | str(D2, digits = 10, vec.len = 12) 19 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: diptest 2 | Version: 0.77-2 3 | VersionNote: Last CRAN: 0.77-1 on 2024-03-31 4 | Date: 2025-08-19 5 | Authors@R: c( 6 | person("Martin", "Maechler", role = c("aut", "cre"), 7 | email = "maechler@stat.math.ethz.ch", 8 | comment = c("Ported to R and C from Fortran and S-plus; much enhanced", 9 | ORCID = "0000-0002-8685-9910")), 10 | person("Dario Ringach", role = "aut", comment = "Fortran and S-plus; at NYU.edu")) 11 | Title: Hartigan's Dip Test Statistic for Unimodality - Corrected 12 | Description: Compute Hartigan's dip test statistic for unimodality / 13 | multimodality and provide a test with simulation based p-values, where 14 | the original public code has been corrected. 15 | Imports: graphics, stats 16 | BuildResaveData: no 17 | License: GPL (>= 2) 18 | URL: https://github.com/mmaechler/diptest 19 | BugReports: https://github.com/mmaechler/diptest/issues 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # diptest 2 | 3 | ## R CRAN package `diptest`: Hartigan's diptest for unimodality (including p-values) 4 | 5 | Although this package `diptest` has been created a long time ago, with a 6 | first CRAN publication in 2003, see the official page of 7 | [CRAN package diptest](https://CRAN.R-project.org/package=diptest), the 8 | package *development* sources and history have only been made available on 9 | Nov.30, 2020. 10 | 11 | As with my R package [sfsmisc](https://github.com/mmaechler/sfsmisc), I was 12 | able to keep most of the editing and development _history_ of this package, 13 | using my [emacs](https://www.gnu.org/software/emacs/) backup files (`*.~`) 14 | together with some RCS (`*,v`) histories, using my `G2RCSn` shell script, 15 | and most importantly a `ruby` script to create a git repos including 16 | history, see here, for how it worked with `sfsmisc`: 17 | https://mmaechler.blogspot.com/2014/08/how-i-got-175-years-old-github.html 18 | 19 | -------------------------------------------------------------------------------- /stuff/sim-minProb.R: -------------------------------------------------------------------------------- 1 | 2 | ### If D_n = D(X_1, ... X_n) is the dip statistic, we have 3 | ### D_n >= 1/(2 n) 4 | ### pm(n) := Pr[ D == 1/(2 n) ] > 0 5 | ### == ------------------------ 6 | ## and hence the distribution of D_n(X), X ~ U[0,1] starts with a jump, 7 | ## from 0 to pm(n) at d = 1/(2n). 8 | 9 | ### Now estimate pm(n) via simulation. 10 | ### The simulation is analyzed in file ./minProb-anal.R 11 | ### ~~~~~~~~~~~~~~ 12 | setwd("~/R/Pkgs/diptest/stuff") 13 | 14 | library(diptest) 15 | 16 | Ns <- 500000 # number of samples (per n) 17 | isim <- 1:Ns 18 | nn <- c(4:18,20,25,30,35,40,50,60, 75, 100) 19 | 20 | nMin <- sapply(nn, function(n) 21 | sum(sapply(isim, 22 | function(i) abs(1 - 2*n*dip(runif(n))) < 1e-5))) 23 | names(nMin) <- paste(nn) 24 | attr(nMin, "Ns") <- Ns 25 | ## nMin / Ns == pm(n), i.e. pm(nn) 26 | saveRDS(nMin, file= "minProb.rds") 27 | 28 | proc.time() 29 | sessionInfo() 30 | -------------------------------------------------------------------------------- /stuff/dip_N_do.R: -------------------------------------------------------------------------------- 1 | ### This is sourced from a file that sets N , n.sim , .Random.seed ... 2 | ### 3 | stopifnot(exists("N"), is.numeric(N), length(N) == 1, N == round(N), 4 | exists("n.sim"), is.numeric(n.sim), length(n.sim) == 1, 5 | n.sim == round(n.sim)) 6 | if(exists(".Random.seed")) { 7 | cat("using \n") 8 | dump(".Random.seed", "") 9 | } else 10 | set.seed(12) 11 | stopifnot(require("diptest")) 12 | setwd("/u/maechler/R/Pkgs/diptest/stuff") 13 | 14 | dd <- numeric(n.sim) 15 | .pt <- proc.time() 16 | 17 | for(i in 1:n.sim) { 18 | dd[i] <- dip(runif(N)) 19 | if(i %% 100 == 0) { 20 | cat(".") 21 | if(i %% 10000 == 0) { 22 | cat("",i,"\n") 23 | .opt <- .pt ; .pt <- proc.time(); .pt - .opt 24 | } 25 | } 26 | } 27 | 28 | nam <- paste("dip", floor(N/1000),"k", sep='') 29 | assign(nam, dd) 30 | (outf <- paste(nam, "rda", sep=".")) 31 | save(list = c(nam, "N"), file = outf) 32 | 33 | sessionInfo() 34 | -------------------------------------------------------------------------------- /man/statfaculty.Rd: -------------------------------------------------------------------------------- 1 | \name{statfaculty} 2 | \alias{statfaculty} 3 | %- uncomment, once exHartigan is defunct: 4 | %- \alias{exHartigan} 5 | \title{Faculty Quality in Statistics Departments} 6 | \docType{data} 7 | \usage{ 8 | data(statfaculty) 9 | } 10 | \description{ 11 | Faculty quality in statistics departments was assessed as part of a 12 | larger study reported by Scully(1982). 13 | 14 | Accidentally, this is also provided as the \code{exHartigan} 15 | (\dQuote{\bold{ex}ample of \bold{Hartigan}s'}) data set. 16 | } 17 | \format{A numeric vector of 63 (integer) numbers, sorted increasingly, 18 | as reported by the reference. 19 | } 20 | \references{ 21 | J. A. Hartigan and P. M. Hartigan (1985) 22 | The Dip Test of Unimodality; 23 | \emph{Annals of Statistics} \bold{13}, 70--84. 24 | } 25 | \source{ 26 | M. G. Scully (1982) 27 | Evaluation of 596 programs in mathematics and physical sciences; 28 | \emph{Chronicle Higher Educ.} \bold{25} 5, 8--10. 29 | } 30 | \examples{ 31 | data(statfaculty) 32 | plot(dH <- density(statfaculty)) 33 | rug(jitter(statfaculty)) 34 | 35 | data(exHartigan) 36 | stopifnot(identical(exHartigan,statfaculty)) 37 | } 38 | \keyword{datasets} 39 | -------------------------------------------------------------------------------- /tests/sim1.R: -------------------------------------------------------------------------------- 1 | #### Very small scale simulation to make the point 2 | #### --> See ../stuff/ for much more 3 | library(diptest) 4 | 5 | P.p <- c(1, 5, 10, 25)/100 6 | (P.p <- c(P.p, 1/2, rev(1 - P.p))) 7 | 8 | N.sim <- 9999 9 | set.seed(94) 10 | .p0 <- proc.time() 11 | dU100 <- replicate(N.sim, dip(runif(100))) 12 | cat('Time elapsed: ', (p1 <- proc.time()) - .p0,'\n'); .p0 <- p1 13 | ## Lynne (2003: P IV, 1.6 GHz): ~7 s 14 | ## 2010 (AMD Phenom II X4 925): 1.3 s 15 | 16 | 100 * round(q100 <- quantile(dU100, p = P.p), 4) 17 | 18 | plot(density(sqrt(100) * dU100), lwd = 2, col=2, 19 | main = expression("Dip distribution" ~~ 20 | list(sqrt(n)* D[n], ~ n == 100))) 21 | abline(h=0, col="dark gray", lty=3) 22 | 23 | round(1e4 * quantile(dU100, p = seq(0,1, by = 0.01), names = FALSE)) 24 | 25 | ##--- an extreme unimodal case -- i.e. very small dip(): 26 | set.seed(60); x <- rexp(301,1)^3 27 | hist(x) 28 | (dt.x <- dip.test(x)) 29 | (dt2 <- dip.test(x, simulate = TRUE)) 30 | (dt3 <- dip.test(x, simulate = TRUE, B = 10000)) 31 | stopifnot(dt.x$p.value == 1,## <- gave NA earlier 32 | dt2$p.value == 1, 33 | dt3$p.value == 1) 34 | 35 | 36 | cat('Time elapsed: ', proc.time() - .p0,'\n') # "stats" 37 | -------------------------------------------------------------------------------- /stuff/qdipTab-use.R: -------------------------------------------------------------------------------- 1 | ####--- Use the simulated quantiles to compute P-values 2 | 3 | ### Now only working with qdipTab 4 | library(diptest) 5 | data(qDiptab) 6 | (dnq <- dimnames(qDiptab)) 7 | (P.p <- as.numeric(dnq$Pr))## 0 0.01 ..... 0.99999 1 8 | (n <- as.integer(dnq$n)) ## 4 5..10 15 ... 1000 2000 5000 9 | ## sqrt(n) * D has limit distribution (sometimes..) 10 | rq <- sqrt(n) * qDiptab 11 | 12 | filled.contour(n, P.p, log10(rq), 13 | plot.title= contour(n, P.p, log10(rq), add = TRUE)) 14 | 15 | logit <- qlogis 16 | contour(n, P.p, logit(rq)) 17 | 18 | ## correct P = {0,1} to {1/(2N), 1 - 1/(2N)} for N = 1000001 - 1 = 1e6 19 | N <- 1000000 20 | Pcp <- P.p 21 | Pcp[P.p == 0] <- 1/(2*N) 22 | Pcp[P.p == 1] <- 1 - 1/(2*N) 23 | 24 | str(dd <- cbind(expand.grid(n=n, p=Pcp), rq = c(rq))) 25 | 26 | coplot(rq ~ p | log10(n), data = dd) 27 | coplot(p ~ rq | log10(n), data = dd) 28 | 29 | ## NOTA BENE: qlogis(.) === logit(.) 30 | coplot(qlogis(p) ~ rq | log10(n), data = dd) 31 | 32 | library(mgcv) 33 | g1 <- gam(qlogis(p) ~ s(log(n),rq), data = dd) 34 | summary(g1) # 21.56 d.f. // Deviance explained 88.5% 35 | 36 | if(FALSE) ## Not quite --- 50 warnings --- need to use weights! etc 37 | g2 <- gam(p ~ s(log(n),rq), family = binomial, data = dd) 38 | -------------------------------------------------------------------------------- /stuff/new-simul1e5.R: -------------------------------------------------------------------------------- 1 | #### More extensive simulations than Hartigan (1985)'s table 1 2 | #### --------------------------------------------------- 3 | ##-> ./dip-simul.S 4 | ##-> ./sim1.R 5 | library(diptest) 6 | 7 | nn <- c(4:10, 15, 20, 30, 50, 100, 200, 500, 1000, 2000, 5000) 8 | Ns <- 100001 9 | ## ~~~~~~ number of "simulations" (i.e. samples for each n in nn) 10 | 11 | ## which percentages 12 | p.hi <- sort(1 - c(outer(c(1,2,5), 2:5, function(x,y) x*10^-y))) 13 | p.lo <- c(1,2,5)/100 14 | (P.p <- c(p.lo, (1:9)/10, p.hi)) 15 | 16 | ## Proof that "quantile()" is just order statistics for this `Ns' : 17 | iq.p <- 1 + P.p * (Ns - 1) 18 | U <- runif(Ns) 19 | stopifnot(identical(sort(U)[iq.p], quantile(U, P.p, names=FALSE))) 20 | 21 | P.dip <- matrix(nrow= length(nn), ncol = length(P.p), 22 | dimnames = list(as.character(nn), formatC(P.p,w=1,digits=7))) 23 | dip.n <- numeric(Ns) 24 | Tcpu <- 0 25 | set.seed(963) 26 | for(n in nn) { 27 | cat("n=",n,":") 28 | cpu <- system.time(for(i in 1:Ns) 29 | { 30 | if(i %% (Ns%/% 10) == 0) cat(i,"") 31 | else if(i %% (Ns%/% 100) == 0) cat(".") 32 | dip.n[i] <- dip(runif(n)) 33 | } 34 | )[1:3] 35 | P.dip[paste(n),] <- quantile(dip.n, p = P.p, names=FALSE) 36 | cat("\nn=",n,", cpu=", paste(formatC(cpu), collapse=", "),"\n\n") 37 | Tcpu <- Tcpu + cpu 38 | } 39 | 40 | save(nn, Ns, P.p, P.dip, file="dipSim_1e5.rda", compress=TRUE) 41 | 42 | cat("\nTotal CPU = ", paste(formatC(Tcpu), collapse=", "),"\n\n") 43 | 44 | ##--> File "dip-simul-sess" to the CPU times .. ! 45 | q('no') 46 | -------------------------------------------------------------------------------- /stuff/Stuetzle-stat593-S2003-olive.doc: -------------------------------------------------------------------------------- 1 | From Werner Stuetzle's 2 | "Stat 593 E, Cluster Analysis and Other Unsupervised Learning Methods, Spring 2003" 3 | 4 | --> http://www.stat.washington.edu/wxs/Stat593-s03/datasets.htm 5 | and http://www.stat.washington.edu/wxs/Stat593-s03/Datasets/olive.dat 6 | http://www.stat.washington.edu/wxs/Stat593-s03/Datasets/olive.doc 7 | both files, dated 31-Mar-2003 16:46 8 | 9 | ---- MM : I have constructed ./olive.tab which you can read into R with 10 | 11 | olives <- 12 | read.table("/u/maechler/R/Pkgs/diptest/stuff/Stuetzle-stat593-S2003-olive.tab", 13 | header=TRUE) 14 | 15 | ## together with code in ./jeremy-unimodality.R : 16 | all.equal(unname(olives.CF[,-2]), unname(olives[,-2])) ## TRUE 17 | ## but clearly, also the last three *Variable* names differ... [aargh] 18 | 19 | --------------- this is olive.doc ------------------------------------------- 20 | 21 | From @craft.clarkson.edu:hopkepk@craft.camp.clarkson.edu Tue Nov 17 10:23:51 1992 22 | Received: from omnigate.clarkson.edu by stat.rutgers.edu (5.59/SMI4.0/RU1.5/3.08) 23 | id AA12876; Tue, 17 Nov 92 10:23:30 EST 24 | Received: from craft.clarkson.edu by omnigate.clarkson.edu id aa26582; 25 | 17 Nov 92 10:23 EST 26 | Received: by craft.camp.clarkson.edu (AIX 3.1/UCB 5.61/4.03) 27 | id AA28251; Tue, 17 Nov 92 10:22:46 -0500 28 | Date: Tue, 17 Nov 92 10:22:46 -0500 29 | From: "Philip K. Hopke" 30 | Message-Id: <9211171522.AA28251@craft.camp.clarkson.edu> 31 | To: dcook@stat.rutgers.edu 32 | Subject: try again 33 | Status: R 34 | 35 | ID Region Class palmitic palmitoleic stearic oleic linoleic eicosanoic linolenic eicosenoic 36 | -------------------------------------------------------------------------------- /man/plot.dip.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.dip} 2 | \alias{plot.dip} 3 | \title{Plot a dip() Result, i.e., Class "dip" Object} 4 | \description{ 5 | Plot method for \code{"dip"} objects, i.e., the result of 6 | \code{\link{dip}(., full.result=TRUE)} or similar. 7 | 8 | Note: We may decide to enhance the plot in the future, possibly not 9 | entirely back-compatibly. 10 | } 11 | \usage{ 12 | \method{plot}{dip}(x, do.points = (n < 20), 13 | colG = "red3", colL = "blue3", colM = "forest green", 14 | col.points = par("col"), col.hor = col.points, 15 | doModal = TRUE, doLegend = TRUE, \dots) 16 | } 17 | \arguments{ 18 | \item{x}{an \R object of \code{\link{class}} \code{"dip"}, i.e., 19 | typically the result of \code{\link{dip}(., full.result= FF)} where 20 | \code{FF} is \code{TRUE} or a string such as \code{"all"}.} 21 | \item{do.points}{logical indicating if the ECDF plot should include 22 | points; passed to \code{\link{plot.ecdf}}.} 23 | \item{colG, colL, colM}{the colors to be used in the graphics for the 24 | \bold{G}reatest convex minorant, the 25 | \bold{L}east concave majorant, and the 26 | \bold{M}odal interval, respectively.} 27 | \item{col.points, col.hor}{the color of points or horizontal lines, 28 | respectively, simply passed to \code{\link{plot.ecdf}}.} 29 | \item{doModal}{logical indicating if the modal interval \eqn{[x_L, x_U]}{[xL, xU]} 30 | should be shown.} 31 | \item{doLegend}{logical indicating if a legend should be shown.} 32 | \item{\dots}{further optional arguments, passed to \code{\link{plot.ecdf}}.} 33 | } 34 | \author{Martin Maechler} 35 | \seealso{ 36 | \code{\link{dip}}, also for examples; \code{\link{plot.ecdf}}. 37 | } 38 | \keyword{hplot} 39 | -------------------------------------------------------------------------------- /stuff/new-simul.R: -------------------------------------------------------------------------------- 1 | #### More extensive simulations than Hartigan (1985)'s table 1 2 | #### --------------------------------------------------- 3 | ##-> ./dip-simul.S 4 | ##-> ./sim1.R 5 | library(diptest) 6 | pd <- package.description("diptest") 7 | cat("diptest package version ", pd['Version']," (",pd['Date'],")\n",sep="") 8 | 9 | nn <- c(4:10, 15, 20, 30, 50, 100, 200, 500, 1000, 2000, 5000) 10 | Ns <- 1000001 11 | ## ~~~~~~~ number of "simulations" (i.e. samples for each n in nn) 12 | 13 | ## which percentages 14 | p.hi <- sort(1 - c(outer(c(1,2,5), 2:5, function(x,y) x*10^-y))) 15 | p.lo <- c(1,2,5)/100 16 | (P.p <- c(0, p.lo, (1:9)/10, p.hi, 1))# 0 & 1: the extremes are "interesting" here 17 | 18 | ## "quantile()" is just order statistics X[k] (k := iq.p) for this `Ns' : 19 | (iq.p <- 1 + P.p * (Ns - 1)) 20 | U <- runif(Ns) 21 | stopifnot(identical(sort(U)[iq.p], ## , partial=iq.p, method="quick" 22 | quantile(U, P.p, names=FALSE))) 23 | 24 | P.dip <- matrix(nrow= length(nn), ncol = length(P.p), 25 | dimnames = list(as.character(nn), formatC(P.p,w=1,digits=7))) 26 | attr(P.dip, "N_1") <- Ns - 1 27 | dip.n <- numeric(Ns) 28 | Tcpu <- 0 29 | set.seed(963) 30 | for(n in nn) { 31 | cat("n=",n,":") 32 | cpu <- system.time(for(i in 1:Ns) 33 | { 34 | if(i %% (Ns%/% 10) == 0) cat(i,"") 35 | else if(i %% (Ns%/% 100) == 0) cat(".") 36 | dip.n[i] <- dip(runif(n)) 37 | } 38 | )[1:3] 39 | P.dip [paste(n),] <- quantile(dip.n, p = P.p, names=FALSE) 40 | cat("\nn=",n,", cpu=", paste(formatC(cpu), collapse=", "),"\n\n") 41 | Tcpu <- Tcpu + cpu 42 | } 43 | 44 | save(nn, Ns, P.p, P.dip, file="dipSim_1e6.rda", compress=TRUE) 45 | 46 | cat("\nTotal CPU = ", paste(formatC(Tcpu), collapse=", "),"\n\n") 47 | 48 | ##--> File "dip-simul-sess" to the CPU times .. ! 49 | q('no') 50 | -------------------------------------------------------------------------------- /stuff/qdipTab-large-n-expand.R: -------------------------------------------------------------------------------- 1 | Ns <- 1000001 2 | 3 | stopifnot(require("diptest") && is.character(data(qDiptab))) 4 | 5 | ## begin { from ./asymp-distrib.R } ------------------------------------------- 6 | ## These all have n.sim = 1000'001 samples of 7 | ## dip(runif(N)) for "large N" 8 | ## where produced by scripts such as ./d20k_do.R 9 | N.k.set <- c(8,12,16,20,24,32,36,40) 10 | ## or automatically 11 | patt <- "^dip(.*)k\\.rda" 12 | N.k.set <- sort(as.integer(sub(patt, "\\1", list.files(pattern = patt)))) 13 | dip.nm <- function(N.k, file=FALSE) 14 | paste("dip", N.k, if(file)"k.rda" else "k", sep='') 15 | for(N.k in N.k.set) 16 | load(dip.nm(print(N.k), file=TRUE)) 17 | 18 | d.dip <- function(N.k, scaleUp = TRUE) 19 | { 20 | ## Purpose: Simulation data for N = N.k * 1000, possibly sqrt(N) scaled 21 | ## ---------------------------------------------------------------------- 22 | ## Author: Martin Maechler, Date: 15 Apr 2009, 10:36 23 | N <- N.k * 1000 24 | nm <- dip.nm(N.k) 25 | if(scaleUp) sqrt(N) * get(nm) else get(nm) 26 | } 27 | ##----end{ from ./asymp-distrib.R } ------------------------------------------- 28 | 29 | ## 30 | dn <- dimnames(qDiptab) 31 | nn <- as.numeric(dn[[1]]) 32 | P.p <- as.numeric(dn[[2]]) 33 | names(N.k.set) <- 1000* N.k.set 34 | qNew <- sapply(N.k.set, function(.) 35 | quantile(d.dip(., scaleUp=FALSE), 36 | probs = P.p)) 37 | signif(qNew, 3) 38 | qDiptab.n <- rbind(qDiptab, 39 | t(qNew[, as.character(1000*c(10,20,40,72))])) 40 | ## the dimnames-names got lost in rbind(): 41 | names(dimnames(qDiptab.n)) <- names(dimnames(qDiptab)) 42 | 43 | attr(qDiptab.n, "N_1") <- as.integer(Ns - 1) 44 | ## --- Here comes the *NEW* one: 45 | attach("~/R/Pkgs/diptest/data/qDiptab.rda") 46 | 47 | qDiptab.prev <- qDiptab 48 | qDiptab <- qDiptab.n 49 | if(FALSE) # do not do this "accidentally" ! 50 | save(qDiptab, file="~/R/Pkgs/diptest/data/qDiptab.rda") 51 | -------------------------------------------------------------------------------- /man/qDiptab.Rd: -------------------------------------------------------------------------------- 1 | \name{qDiptab} 2 | \alias{qDiptab} 3 | \title{Table of Quantiles from a Large Simulation for Hartigan's Dip Test} 4 | \docType{data} 5 | \description{ 6 | Whereas Hartigan(1985) published a table of empirical percentage 7 | points of the dip statistic (see \code{\link{dip}}) based on N=9999 8 | samples of size \eqn{n} from \eqn{U[0,1]}, our table of empirical 9 | quantiles is currently based on N=1'000'001 samples for each \eqn{n}. 10 | } 11 | \note{ 12 | Taking N=1'000'001 ensures that all the \code{\link{quantile}(X, p)} 13 | used here are exactly order statistics \code{sort(X)[k]}. 14 | } 15 | \format{ 16 | A numeric matrix %may change: of dimension 17 * 26, 17 | where each row corresponds to sample size \eqn{n}, and each column to 18 | a probability (percentage) in \eqn{[0,1]}. The dimnames are named 19 | \code{n} and \code{Pr} and coercable to these values, see the 20 | examples. \code{attr(qDiptab, "N_1")} is \eqn{N - 1}, such that with 21 | \code{k <- as.numeric(dimnames(qDiptab)$Pr) * attr(qDiptab, "N_1")}, 22 | e.g., \code{qDiptab[n == 15,]} contains exactly the order statistics 23 | \eqn{D_{[k]}} (from the \eqn{N+1} simulated values of 24 | \code{\link{dip}(U)}, where \code{U <- runif(15)}. 25 | } 26 | \seealso{\code{\link{dip}}, also for the references; 27 | \code{\link{dip.test}()} which performs the hypothesis test, using 28 | \code{qDtiptab} (and its null hypothesis of a uniform distribution). 29 | } 30 | \author{Martin Maechler \email{maechler@stat.math.ethz.ch}, in its 31 | earliest form in August 1994. 32 | } 33 | \examples{ 34 | data(qDiptab) 35 | str(qDiptab) 36 | ## the sample sizes `n' : 37 | dnqd <- dimnames(qDiptab) 38 | (nn <- as.integer(dnqd $n)) 39 | ## the probabilities: 40 | P.p <- as.numeric(print(dnqd $ Pr)) 41 | 42 | ## This is as "Table 1" in Hartigan & Hartigan (1985) -- but more accurate 43 | ps <- c(1,5,10,50,90,95,99, 99.5, 99.9)/100 44 | tab1 <- qDiptab[nn <= 200, as.character(ps)] 45 | round(tab1, 4) 46 | } 47 | \keyword{datasets} 48 | -------------------------------------------------------------------------------- /R/dipTest.R: -------------------------------------------------------------------------------- 1 | ##' also called from ../data/qDiptab.R : 2 | if(getRversion() < "2.13.0") { 3 | rdRDS <- function(..., package = "diptest") 4 | .readRDS(system.file(..., package=package)) 5 | } else 6 | rdRDS <- function(..., package = "diptest") 7 | readRDS(system.file(..., package=package, mustWork=TRUE)) 8 | 9 | dip.test <- function(x, simulate.p.value = FALSE, B = 2000) 10 | { 11 | DNAME <- deparse(substitute(x)) 12 | x <- sort(x[complete.cases(x)]) 13 | stopifnot(is.numeric(x)) 14 | n <- length(x) # *is* integer 15 | D <- dip(x) 16 | method <- "Hartigans' dip test for unimodality / multimodality" 17 | if(n <= 3) { 18 | P <- 1 19 | } else if(simulate.p.value) { 20 | method <- paste(method, 21 | "with simulated p-value\n\t (based on", B, "replicates)") 22 | P <- mean(D <= replicate(B, dip(runif(n)))) 23 | } else { 24 | ## Long "codetools-compatible" way of data(qDiptab) : 25 | qDiptab <- rdRDS("extraData", "qDiptab.rds") 26 | dn <- dimnames(qDiptab) 27 | max.n <- max(nn <- as.integer(dn[["n"]])) 28 | P.s <- as.numeric(dn[["Pr"]]) 29 | 30 | if(n >= max.n) { ## extrapolate, or rather just use the last n as == "asymptotic" 31 | if(n > max.n) 32 | message("n = ",n," >= max_n{n in table} = ", max.n, 33 | " -- using that as asymptotic value.") 34 | n1 <- n0 <- max.n 35 | i2 <- i.n <- length(nn) 36 | f.n <- 0 37 | } else { 38 | n0 <- nn[i.n <- findInterval(n, nn)] 39 | n1 <- nn[(i2 <- i.n +1)] 40 | f.n <- (n - n0)/(n1 - n0)# in [0, 1] 41 | } 42 | ## Now "find" y-interval: 43 | y.0 <- sqrt(n0)* qDiptab[i.n, ] 44 | y.1 <- sqrt(n1)* qDiptab[i2 , ] 45 | sD <- sqrt(n) * D 46 | P <- 1 - approx(y.0 + f.n*(y.1 - y.0), P.s, rule = 2,# <- [min, max] 47 | xout = sD)[["y"]] 48 | } 49 | structure(list(statistic = c(D = D), p.value = P, nobs = n, 50 | alternative = "non-unimodal, i.e., at least bimodal", 51 | method = method, data.name = DNAME), class = "htest") 52 | } 53 | -------------------------------------------------------------------------------- /tests/sim1.Rout.save-32b: -------------------------------------------------------------------------------- 1 | 2 | R version 2.13.0 Patched (2011-05-19 r55964) 3 | Copyright (C) 2011 The R Foundation for Statistical Computing 4 | ISBN 3-900051-07-0 5 | Platform: i686-pc-linux-gnu (32-bit) 6 | 7 | R is free software and comes with ABSOLUTELY NO WARRANTY. 8 | You are welcome to redistribute it under certain conditions. 9 | Type 'license()' or 'licence()' for distribution details. 10 | 11 | R is a collaborative project with many contributors. 12 | Type 'contributors()' for more information and 13 | 'citation()' on how to cite R or R packages in publications. 14 | 15 | Type 'demo()' for some demos, 'help()' for on-line help, or 16 | 'help.start()' for an HTML browser interface to help. 17 | Type 'q()' to quit R. 18 | 19 | > #### Very small scale simulation to make the point 20 | > #### --> See ../stuff/ for much more 21 | > library(diptest) 22 | > 23 | > P.p <- c(1, 5, 10, 25)/100 24 | > (P.p <- c(P.p, 1/2, rev(1 - P.p))) 25 | [1] 0.01 0.05 0.10 0.25 0.50 0.75 0.90 0.95 0.99 26 | > 27 | > N.sim <- 9999 28 | > set.seed(94) 29 | > .p0 <- proc.time() 30 | > dU100 <- replicate(N.sim, dip(runif(100))) 31 | > cat('Time elapsed: ', (p1 <- proc.time()) - .p0,'\n'); .p0 <- p1 32 | Time elapsed: 1.74 0.008 1.761 0 0 33 | > ## Lynne (2003: P IV, 1.6 GHz): ~7 s 34 | > ## 2010 (AMD Phenom II X4 925): 1.3 s 35 | > 36 | > 100 * round(q100 <- quantile(dU100, p = P.p), 4) 37 | 1% 5% 10% 25% 50% 75% 90% 95% 99% 38 | 2.29 2.56 2.75 3.08 3.54 4.12 4.70 5.09 5.90 39 | > 40 | > plot(density(sqrt(100) * dU100), lwd = 2, col=2, 41 | + main = expression("Dip distribution" ~~ 42 | + list(sqrt(n)* D[n], ~ n == 100))) 43 | > abline(h=0, col="dark gray", lty=3) 44 | > 45 | > round(1e4 * quantile(dU100, p = seq(0,1, by = 0.01), names = FALSE)) 46 | [1] 191 229 239 246 252 256 261 265 268 272 275 277 280 282 285 287 289 292 47 | [19] 294 296 298 300 302 304 305 308 310 312 314 315 317 319 321 323 325 327 48 | [37] 329 331 332 334 336 338 340 341 343 345 347 349 351 352 354 356 358 360 49 | [55] 362 364 366 368 370 372 374 376 379 381 383 385 387 390 393 395 397 400 50 | [73] 403 406 409 412 415 418 421 424 427 431 434 438 442 446 450 455 460 464 51 | [91] 470 476 483 489 499 509 520 539 562 590 773 52 | > 53 | -------------------------------------------------------------------------------- /tests/ex1.R: -------------------------------------------------------------------------------- 1 | library(diptest) 2 | 3 | stopifnot(dip(c(1,1,2,2)) == 1/4)# the maximal value possible: two point dist 4 | 5 | ## very first small "unimodal" example --- the 1/(2*n) result: 6 | n <- length(u <- cumsum(0:3)) 7 | d <- dip(u, debug=TRUE)# shows the final if() {added by MM} is really needed 8 | stopifnot(d == dip(-u), d == 1/(2*n))# exact "=" for n = 4 ! 9 | ## Note that I believe this should *not* give 0 (as fmechler@.. did), 10 | ## but rather 1/(2n) because that's (1/n) / 2 and 11 | ## (1/n) is the correct distance between LCM and GCM 12 | 13 | ## Small example -- but MM sees difference (32-bit / 64-bit) *and* on M1mac: 14 | x <- c(0,2:3,5:6) 15 | ## IGNORE_RDIFF_BEGIN 16 | d1 <- dip(x, full=TRUE, debug=2) 17 | d2 <- dip(6-x, full=TRUE, debug=2) 18 | str(d2) # differences to M1mac (!) 19 | ## IGNORE_RDIFF_END 20 | str(d1) 21 | 22 | if(!dev.interactive(orNone=TRUE)) pdf("ex1.pdf") 23 | par(mfrow = 2:1, mar = .1+c(3,4,2,1), mgp=c(1.5,.6,0), oma = c(0,0,2.1,0)) 24 | # 25 | plot(d1) 26 | abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray") 27 | # 28 | plot(d2) 29 | abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray") 30 | # 31 | ## "title" only now 32 | mtext("dip() problem with 'mirror x'", side=3, line = 0.8, 33 | outer=TRUE, cex = 1.5, font = 2) 34 | 35 | 36 | ## Yong Lu example -- a bit smaller 37 | x2 <- c(1, rep(2, 9)) 38 | stopifnot(dip(x2) == dip(3 - x2)) 39 | str(dip(x2, full=TRUE)) 40 | cat('Time elapsed: ', (.pt <- proc.time()),'\n') # "stats" 41 | 42 | ## Real data examples : 43 | 44 | data(statfaculty) 45 | 46 | str(dip(statfaculty, full = "all", debug = 3), vec.len = 8) 47 | 48 | data(faithful) 49 | fE <- faithful$eruptions 50 | str(dip(fE, full = "all", debug = 3), 51 | vec.len= 8) 52 | 53 | data(precip) 54 | str(dip(precip, full = TRUE, debug = TRUE)) 55 | 56 | ## current qDiptab <--> n = 72'000 is "asymptotic" boundary 57 | set.seed(123); x <- rnorm(72000) 58 | dt72k <- dip.test(x) 59 | ## gave error in qDiptab[i2, ] : subscript out of bounds -- in diptest <= 0.77-0 60 | stopifnot(all.equal(list(statistic = c(D = 0.0005171098381181), p.value = 1, nobs = 72000L), 61 | dt72k[c("statistic", "p.value", "nobs")], tolerance = 1e-13)) 62 | 63 | 64 | cat('Time elapsed: ', proc.time() - .pt,'\n') # "stats" 65 | 66 | if(!interactive()) warnings() 67 | -------------------------------------------------------------------------------- /vignettes/diptest.bib: -------------------------------------------------------------------------------- 1 | @string{AmStat = "The American Statistician"} 2 | @string{AnnStat = "Annals of Statistics"} 3 | % @string{JASA = "Journal of the American Statistical Association"} 4 | @string{AnInStMa = "Annals of the Inst.\ of Stat.\ Math."} 5 | @string{JASA = "JASA"} 6 | @string{JAppTh = "Journal of Approximation Theory"} 7 | @string{JMAA = "Journal of Analysis and Applications"} 8 | @string{JRSS = "Journal of the Royal Statistical Society"} %% ALWAYS #~A or #~B! 9 | @string{JRSSA = JRSS # "~A, General"} 10 | @string{JRSSB = JRSS # "~B"} 11 | @string{JRSSC-AS = "Applied Statistics --- " # JRSS # "~C"} 12 | @string{NuMath = "Numerische Mathematik"} 13 | @string{SIAM = "Society for Industrial and Applied Mathematics"} 14 | @string{SSci = "Statistical Science"} 15 | @string{StMed = "Statist.\ in Med."}% Statistics in Medicine 16 | @string{ScandS = "Scandinavian Journal of Statistics"} 17 | @string{JSS = "Journal of Statistical Software"} 18 | @string{JSSC = "SIAM Journal on Scientific and Statistical Computing"} 19 | % @string{JSSC = "SIAM J. Sci.\ Statist.\ Comput."} 20 | @string{JCGS = "Journal of Computational and Graphical Statistics"} 21 | % @string{CSDA = "Computational Statistics \& Data Analysis"} 22 | @string{CSDA = "Computat.\ Statist.\ Data Anal."} 23 | 24 | @string{IEEE-ASSP = "IEEE Trans.\ Acoust., Speech, Signal Processing"} 25 | % @string{TOMS = "ACM Transactions on Mathematical Software"} 26 | % @string{TOMS = "{ACM} Transactions on Mathematical Software (TOMS)"} 27 | @string{TOMS = "ACM Trans.\ Math.\ Software"} 28 | 29 | @string{ETH = "Swiss Federal Institute of Technology (ETH)"} 30 | @string{UWstat = "Department of Statistics, University of Washington"} 31 | 32 | @string{Wiley = "Wiley"} 33 | @string{NY = "N.~Y."} 34 | 35 | 36 | @article{HarJH85, 37 | Author = {J. A. Hartigan and P. M. Hartigan}, 38 | Title = {The Dip Test of Unimodality}, 39 | Year = 1985, 40 | Journal = AnnStat, 41 | Volume = 13, 42 | Pages = {70--84}, 43 | Keywords = {Multimodality; Isotonic regression; Empirical distribution} 44 | } 45 | 46 | @article{HarP85, 47 | author = {P. M. Hartigan}, 48 | title = {Computation of the Dip Statistic to Test for Unimodality}, 49 | year = 1985, 50 | journal = {Applied Statistics}, 51 | pages = {320--325}, 52 | volume = 34 53 | } 54 | 55 | %% -- note that ~/bib/master.bib has quite a few more on "modality" testing 56 | -------------------------------------------------------------------------------- /stuff/sim-minProb.Rout: -------------------------------------------------------------------------------- 1 | 2 | R version 2.9.0 (2009-04-17) 3 | Copyright (C) 2009 The R Foundation for Statistical Computing 4 | ISBN 3-900051-07-0 5 | 6 | R is free software and comes with ABSOLUTELY NO WARRANTY. 7 | You are welcome to redistribute it under certain conditions. 8 | Type 'license()' or 'licence()' for distribution details. 9 | 10 | Natural language support but running in an English locale 11 | 12 | R is a collaborative project with many contributors. 13 | Type 'contributors()' for more information and 14 | 'citation()' on how to cite R or R packages in publications. 15 | 16 | Type 'demo()' for some demos, 'help()' for on-line help, or 17 | 'help.start()' for an HTML browser interface to help. 18 | Type 'q()' to quit R. 19 | 20 | > 21 | > ### If D_n = D(X_1, ... X_n) is the dip statistic, we have 22 | > ### D_n >= 1/(2 n) 23 | > ### pm(n) := Pr[ D == 1/(2 n) ] > 0 24 | > ### == ------------------------ 25 | > ## and hence the distribution of D_n(X), X ~ U[0,1] starts with a jump, 26 | > ## from 0 to pm(n) at d = 1/(2n). 27 | > 28 | > ### Now estimate pm(n) via simulation. 29 | > ### The simulation is analyzed in file ./minProb-anal.R 30 | > ### ~~~~~~~~~~~~~~ 31 | > setwd("~/R/Pkgs/diptest/stuff") 32 | > 33 | > library(diptest) 34 | > 35 | > Ns <- 500000 # number of samples (per n) 36 | > isim <- 1:Ns 37 | > nn <- c(4:18,20,25,30,35,40,50,60, 75, 100) 38 | > 39 | > nMin <- sapply(nn, function(n) 40 | + sum(sapply(isim, 41 | + function(i) abs(1 - 2*n*dip(runif(n))) < 1e-5))) 42 | > names(nMin) <- paste(nn) 43 | > attr(nMin, "Ns") <- Ns 44 | > ## nMin / Ns == pm(n), i.e. pm(nn) 45 | > save(nMin, file= "minProb.rda") 46 | > 47 | > proc.time() 48 | user system elapsed 49 | 2130.176 2.594 2134.998 50 | > sessionInfo() 51 | R version 2.9.0 (2009-04-17) 52 | x86_64-unknown-linux-gnu 53 | 54 | locale: 55 | LC_CTYPE=de_CH.UTF-8;LC_NUMERIC=C;LC_TIME=en_US.UTF-8;LC_COLLATE=de_CH.UTF-8;LC_MONETARY=C;LC_MESSAGES=de_CH.UTF-8;LC_PAPER=de_CH.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=de_CH.UTF-8;LC_IDENTIFICATION=C 56 | 57 | attached base packages: 58 | [1] graphics grDevices datasets stats utils methods base 59 | 60 | other attached packages: 61 | [1] diptest_0.25-2 sfsmisc_1.0-7 62 | > 63 | > proc.time() 64 | user system elapsed 65 | 2130.188 2.605 2135.174 66 | -------------------------------------------------------------------------------- /man/dip.test.Rd: -------------------------------------------------------------------------------- 1 | \name{dip.test} 2 | \title{Hartigans' Dip Test for Unimodality} 3 | \alias{dip.test} 4 | \concept{multimodality} 5 | \description{ 6 | Compute Hartigans' dip statistic \eqn{D_n}{Dn}, and 7 | its p-value for the test for unimodality, by interpolating 8 | tabulated quantiles of \eqn{\sqrt{n} D_n}{sqrt(n) * Dn}. 9 | 10 | For \eqn{X_i \sim F, i.i.d.}{X_i ~ F, i.i.d}, 11 | the null hypothesis is that \eqn{F} is a unimodal distribution. 12 | Consequently, the test alternative is non-unimodal, i.e., at least 13 | bimodal. Using the language of medical testing, you would call the 14 | test \dQuote{Test for \bold{Multi}modality}. 15 | } 16 | \usage{ 17 | dip.test(x, simulate.p.value = FALSE, B = 2000) 18 | } 19 | \arguments{ 20 | \item{x}{numeric vector; sample to be tested for unimodality.} 21 | \item{simulate.p.value}{a logical indicating whether to compute 22 | p-values by Monte Carlo simulation.} 23 | \item{B}{an integer specifying the number of replicates used in the 24 | Monte Carlo test.} 25 | } 26 | \details{ 27 | If \code{simulate.p.value} is \code{FALSE}, the p-value is computed 28 | via linear interpolation (of \eqn{\sqrt{n} D_n}{sqrt(n) * Dn}) in the 29 | \code{\link{qDiptab}} table. 30 | Otherwise the p-value is computed from a Monte Carlo simulation of a 31 | uniform distribution (\code{\link{runif}(n)}) with \code{B} 32 | replicates. 33 | } 34 | \value{ 35 | A list with class \code{"htest"} containing the following 36 | components: 37 | \item{statistic}{the dip statistic \eqn{D_n}{Dn}, i.e., 38 | \code{\link{dip}(x)}.} 39 | \item{p.value}{the p-value for the test, see details.} 40 | \item{method}{character string describing the test, and whether Monte 41 | Carlo simulation was used.} 42 | \item{data.name}{a character string giving the name(s) of the data.} 43 | } 44 | \seealso{ 45 | For goodness-of-fit testing, notably of continuous distributions, 46 | \code{\link{ks.test}}. 47 | } 48 | \references{ 49 | see those in \code{\link{dip}}. 50 | } 51 | \author{Martin Maechler} 52 | \note{ 53 | see also the package vignette, which describes the procedure in more details. 54 | } 55 | \examples{ 56 | ## a first non-trivial case 57 | (d.t <- dip.test(c(0,0, 1,1))) # "perfect bi-modal for n=4" --> p-value = 0 58 | stopifnot(d.t$p.value == 0) 59 | 60 | data(statfaculty) 61 | plot(density(statfaculty)); rug(statfaculty) 62 | (d.t <- dip.test(statfaculty)) 63 | 64 | x <- c(rnorm(50), rnorm(50) + 3) 65 | plot(density(x)); rug(x) 66 | ## border-line bi-modal ... BUT (most of the times) not significantly: 67 | dip.test(x) 68 | dip.test(x, simulate=TRUE, B=5000) 69 | 70 | ## really large n -- get a message 71 | dip.test(runif(4e5)) 72 | 73 | } 74 | \keyword{htest} 75 | \keyword{distribution} 76 | -------------------------------------------------------------------------------- /vignettes/myVignette.sty: -------------------------------------------------------------------------------- 1 | %% originates from ~/R/Pkgs/Matrix/inst/doc/myVignette.sty [April 2009] 2 | \RequirePackage{hyperref} 3 | \RequirePackage{url} 4 | \RequirePackage{amsmath} 5 | \RequirePackage{bm}%-> \bm (= bold math) 6 | \newcommand{\Slang}{\textsf{S} language} 7 | \newcommand{\RR}{\textsf{R}} 8 | \newcommand{\email}[1]{\href{mailto:#1}{\normalfont\texttt{#1}}} 9 | %- R programming markup 10 | \newcommand\code{\bgroup\@codex} 11 | \def\@codex#1{{\normalfont\ttfamily\hyphenchar\font=-1 #1}\egroup} 12 | \let\env=\code 13 | \let\command=\code 14 | \newcommand*{\Rfun}[1]{\code{#1()}\index{\RR~function #1}} 15 | \newcommand*{\class}[1]{\code{#1}\index{class #1}}% 16 | \newcommand*{\pkg}[1]{\code{#1}\index{\RR~package #1}} 17 | % 18 | \newcommand{\kbd}[1]{{\normalfont\texttt{#1}}} 19 | \newcommand{\key}[1]{{\normalfont\texttt{\uppercase{#1}}}} 20 | \newcommand\samp{`\bgroup\@noligs\@sampx} 21 | \def\@sampx#1{{\normalfont\texttt{#1}}\egroup'} 22 | \let\option=\samp 23 | \newcommand{\var}[1]{{\normalfont\textsl{#1}}} 24 | \newcommand{\file}[1]{{`\normalfont\textsf{#1}'}} 25 | \newcommand{\dfn}[1]{{\normalfont\textsl{#1}}} 26 | \newcommand{\acronym}[1]{{\normalfont\textsc{\lowercase{#1}}}} 27 | \newcommand{\strong}[1]{{\normalfont\fontseries{b}\selectfont #1}} 28 | \let\pkg=\strong 29 | % 30 | \RequirePackage{alltt} 31 | \newenvironment{example}{\begin{alltt}}{\end{alltt}} 32 | \newenvironment{smallexample}{\begin{alltt}\small}{\end{alltt}} 33 | \newenvironment{display}{\list{}{}\item\relax}{\endlist} 34 | \newenvironment{smallverbatim}{\small\verbatim}{\endverbatim} 35 | % This is already in ``Sweave'' : 36 | %% \RequirePackage{fancyvrb} 37 | %% \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontsize=\small,fontshape=sl} 38 | %% \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontsize=\small} 39 | %% \DefineVerbatimEnvironment{Scode}{Verbatim}{fontsize=\small,fontshape=sl} 40 | % 41 | \newcommand{\FIXME}[1]{\marginpar{ \dots FIXME \emph{#1} \dots}} 42 | \newcommand{\TODO}[1]{\par\noindent\textsc{Todo:} \textit{#1}\par} 43 | % 44 | \newcommand*{\myOp}[1]{{$\left\langle\ensuremath{#1}\right\rangle$}} 45 | \newcommand{\noFootnote}[1]{{\small (\textit{#1})}} 46 | % 47 | %% diptest stuff : 48 | % 49 | %% Probabily P[.], Expectation E[.] etc 50 | %% == subsection of our flexible-style "texab.sty" : 51 | \newcommand{\@pkl}{[} % Probability Klammer links 52 | \newcommand{\@pkr}{]} 53 | \newcommand{\@ekl}{[} % Erwartungswert Klammer links 54 | \newcommand{\@ekr}{]} % Erwartungswert Klammer rechts 55 | \DeclareMathOperator{\PRSymbol}{P} 56 | % Next line (\makeright): if #1 == \left then \right #2 else #1 #2 57 | \newcommand{\makeright}[2]{\ifx#1\left\right#2\else#1#2\fi} 58 | %% the real commands 59 | \newcommand{\PR}[2][\left] {\PRSymbol #1\@pkl #2 \makeright{#1}{\@pkr}} 60 | \newcommand{\ERW}[2][\left] {\ERWSymbol #1\@ekl #2 \makeright{#1}{\@ekr}} 61 | 62 | \newcommand{\isD}{\ {\stackrel{\mathcal{D}}{=}}\ \ } 63 | \newcommand{\iid}{\mbox{ i.i.d. }} 64 | -------------------------------------------------------------------------------- /stuff/minProb-anal.R: -------------------------------------------------------------------------------- 1 | #### Analysis of the result of ./sim-minProb.R 2 | #### ~~~~~~~~ ~~~~~~~~~~~~~~~ 3 | 4 | setwd("/u/maechler/R/Pkgs/diptest/stuff") 5 | (nMin <- readRDS(file= "minProb.rds")) 6 | Ns <- attr(nMin, "Ns") 7 | nn <- as.integer(names(nMin)) 8 | nMin[nMin > 0] ## now only goes up to n=13, even for Ns == 500'000 9 | 10 | ## Actually nMin ~ Bin(pm(n), Ns) --> estimate pm(n) parametrically: 11 | ## ===================== ====== 12 | glm.n <- glm(cbind(nMin,Ns) ~ log(nn), family = binomial) 13 | summary(glm.n) 14 | ## ^^ shows that logistic link & log(nn) was not good enough: 15 | 16 | glm.s <- glm(cbind(nMin,Ns) ~ sqrt(nn), family = binomial)# warning about prob in ~= {0, 1} 17 | summary(glm.s) 18 | glm.is <- glm(cbind(nMin,Ns) ~ I(nn^-0.5), family = binomial) # "best" 19 | summary(glm.is) 20 | glm.I <- glm(cbind(nMin,Ns) ~ I(nn^-1), family = binomial) 21 | summary(glm.I) 22 | 23 | matplot(nn, cbind(nMin 24 | ,Ns * predict(glm.s, type="response") # "2" 25 | ,Ns * predict(glm.n, type="response") # "3" 26 | ,Ns * predict(glm.is, type="response")# "4" (best!) 27 | ,Ns * predict(glm.I, type="response") # "5" 28 | ), type='b', 29 | log = "xy", 30 | ylab = "nMin & 4 different glm() predictions") 31 | 32 | ## actually with large simulation, "glm.is" are not bad 33 | plot(nMin ~ nn, type = 'o') 34 | 35 | ## or 36 | require(gplots) 37 | plotCI(nn, nMin, uiw = sqrt(nMin*(Ns-nMin)/Ns), type = 'o') 38 | 39 | ## in Log-Log scale -- + error bars 40 | plotCI(nn, nMin, uiw = 1.96*sqrt(nMin*(Ns-nMin)/Ns), type = 'o', log = 'xy') 41 | ## 2011-05-14: AAARGH the above now gives an error! 42 | ## whereas this still works: "was" 43 | plot(nMin ~ nn, type = 'o', log = 'xy') 44 | axis(2, at=c(2000,5000,10000)) 45 | axis(1, at=nn) 46 | 47 | ## Better confidence intervals (this takes time!): 48 | ci <- Ns*sapply(nMin, function(k) 49 | binom.test(k, Ns, conf.level = .99)$conf.int) 50 | 51 | plot(nMin ~ nn, type = 'o', log = 'xy', xlab = 'n', cex = 0.8) 52 | segments(nn, ci[1,], nn, ci[2,], col = "tomato") 53 | axis(2, at=c(2000,5000,10000)) 54 | axis(1, at=nn) 55 | title(paste("#{dip = 1/(2 n)} for Ns=",formatC(Ns,form="d")," samples")) 56 | mtext("exact binomial 99%-confidence intervals", col = "tomato") 57 | text(nn, nMin, formatC(signif(nMin/Ns, 2)), adj = -0.2, cex = 0.8) 58 | 59 | ## i.e., for small 'n' the probability is remarkably high, 60 | ## where as for n = 5000, 61 | ## it is only visible if you look very carefully : 62 | 63 | ##___________ No longer ____ after fixing the 2003-10 bug ______ 64 | 65 | setwd("pre-bugfix-2003-11") 66 | 67 | ### for n=5000 "5k": 68 | load("dip5k.rda")# d5k has 100'000 values dip(runif(5000)) 69 | 70 | ## main = substitute("density of " * P(D[n] == frac(1,2*n)) 71 | 72 | plot(density(d5k), 73 | main = substitute("density of " * { 74 | D[n] == "dip(runif(5000)), simulated " } * 75 | N == nnn *" times", list(nnn = length(d5k)))) 76 | ## or with `sfsmisc' package: 77 | tkdensity(d5k, do.rug=FALSE, from.f = -4) 78 | ## --> Title of a paper: The distribution of the dip is "bimodal" !! 79 | ## 80 | ## or "The Dip Distribution has a Dip" ! 81 | 82 | -------------------------------------------------------------------------------- /tests/sim1.Rout.save-64b: -------------------------------------------------------------------------------- 1 | 2 | R Under development (unstable) (2015-03-03 r67931) -- "Unsuffered Consequences" 3 | Copyright (C) 2015 The R Foundation for Statistical Computing 4 | Platform: x86_64-unknown-linux-gnu (64-bit) 5 | 6 | R is free software and comes with ABSOLUTELY NO WARRANTY. 7 | You are welcome to redistribute it under certain conditions. 8 | Type 'license()' or 'licence()' for distribution details. 9 | 10 | R is a collaborative project with many contributors. 11 | Type 'contributors()' for more information and 12 | 'citation()' on how to cite R or R packages in publications. 13 | 14 | Type 'demo()' for some demos, 'help()' for on-line help, or 15 | 'help.start()' for an HTML browser interface to help. 16 | Type 'q()' to quit R. 17 | 18 | > #### Very small scale simulation to make the point 19 | > #### --> See ../stuff/ for much more 20 | > library(diptest) 21 | > 22 | > P.p <- c(1, 5, 10, 25)/100 23 | > (P.p <- c(P.p, 1/2, rev(1 - P.p))) 24 | [1] 0.01 0.05 0.10 0.25 0.50 0.75 0.90 0.95 0.99 25 | > 26 | > N.sim <- 9999 27 | > set.seed(94) 28 | > .p0 <- proc.time() 29 | > dU100 <- replicate(N.sim, dip(runif(100))) 30 | > cat('Time elapsed: ', (p1 <- proc.time()) - .p0,'\n'); .p0 <- p1 31 | Time elapsed: 0.512 0.085 0.597 0 0 32 | > ## Lynne (2003: P IV, 1.6 GHz): ~7 s 33 | > ## 2010 (AMD Phenom II X4 925): 1.3 s 34 | > 35 | > 100 * round(q100 <- quantile(dU100, p = P.p), 4) 36 | 1% 5% 10% 25% 50% 75% 90% 95% 99% 37 | 2.29 2.56 2.75 3.08 3.54 4.12 4.70 5.09 5.90 38 | > 39 | > plot(density(sqrt(100) * dU100), lwd = 2, col=2, 40 | + main = expression("Dip distribution" ~~ 41 | + list(sqrt(n)* D[n], ~ n == 100))) 42 | > abline(h=0, col="dark gray", lty=3) 43 | > 44 | > round(1e4 * quantile(dU100, p = seq(0,1, by = 0.01), names = FALSE)) 45 | [1] 191 229 239 246 252 256 261 265 268 272 275 277 280 282 285 287 289 292 46 | [19] 294 296 298 300 302 304 305 308 310 312 314 315 317 319 321 323 325 327 47 | [37] 329 331 332 334 336 338 340 341 343 345 347 349 351 352 354 356 358 360 48 | [55] 362 364 366 368 370 372 374 376 379 381 383 385 387 390 393 395 397 400 49 | [73] 403 406 409 412 415 418 421 424 427 431 434 438 442 446 450 455 460 464 50 | [91] 470 476 483 489 499 509 520 539 562 590 773 51 | > 52 | > ##--- an extreme unimodal case -- i.e. very small dip(): 53 | > set.seed(60); x <- rexp(301,1)^3 54 | > hist(x) 55 | > (dt.x <- dip.test(x)) 56 | 57 | Hartigans' dip test for unimodality / multimodality 58 | 59 | data: x 60 | D = 0.0072617, p-value = 1 61 | alternative hypothesis: non-unimodal, i.e., at least bimodal 62 | 63 | > (dt2 <- dip.test(x, simulate = TRUE)) 64 | 65 | Hartigans' dip test for unimodality / multimodality with simulated 66 | p-value (based on 2000 replicates) 67 | 68 | data: x 69 | D = 0.0072617, p-value = 1 70 | alternative hypothesis: non-unimodal, i.e., at least bimodal 71 | 72 | > (dt3 <- dip.test(x, simulate = TRUE, B = 10000)) 73 | 74 | Hartigans' dip test for unimodality / multimodality with simulated 75 | p-value (based on 10000 replicates) 76 | 77 | data: x 78 | D = 0.0072617, p-value = 1 79 | alternative hypothesis: non-unimodal, i.e., at least bimodal 80 | 81 | > stopifnot(dt.x$p.value == 1,## <- gave NA earlier 82 | + dt2$p.value == 1, 83 | + dt3$p.value == 1) 84 | > 85 | > 86 | > cat('Time elapsed: ', proc.time() - .p0,'\n') # "stats" 87 | Time elapsed: 1.136 0.018 1.158 0 0 88 | > 89 | > proc.time() 90 | user system elapsed 91 | 1.765 0.129 1.935 92 | -------------------------------------------------------------------------------- /stuff/jeremy-unimodality-olives.R: -------------------------------------------------------------------------------- 1 | ##-*- mode: R; kept-new-versions: 21; kept-old-versions: 12; -*- 2 | 3 | #### Originally, from 4 | #### http://www.stat.washington.edu/wxs/Stat593-s03/Code/jeremy-unimodality.R 5 | #### 6 | ####----------------------------------------------------------------------------------- 7 | ## Diagnostic plots for clustering and the DIP test for unimodality 8 | ## Code written by Jeremy Tantrum, Winter 2003 9 | ##================================================================= 10 | 11 | #### --> see ./jeremy-unimodality.R 12 | 13 | ## An example of it working: Olive oil data - region 2 - are areas 5 and 6 14 | ## different. 15 | 16 | ## -- MM: Aargh, 'olives' *differs* from R package to R package 17 | data(olives, package="classifly") 18 | str(olives.CF <- olives) 19 | data(olives, package="TWIX") 20 | str(olives.TW <- olives) 21 | if(FALSE)## These are completely different: 22 | data(oliveoil, package="pls") 23 | 24 | 25 | ## First, the meaning of 'Area' and 'Region' is swapped between the two: 26 | with(olives.TW, table(Region, Area)) 27 | ## Area 28 | ## Region North Sardinia South 29 | ## Calabria 0 0 56 30 | ## Coast Sardinia 0 33 0 31 | ## East Liguria 50 0 0 32 | ## Inland Sardinia 0 65 0 33 | ## North Apulia 0 0 25 34 | ## Sicily 0 0 36 35 | ## South Apulia 0 0 206 36 | ## Umbria 51 0 0 37 | ## West Liguria 50 0 0 38 | 39 | with(olives.CF, table(Area, Region)) 40 | ## Region 41 | ## Area 1 2 3 42 | ## Calabria 56 0 0 43 | ## Coast-Sardinia 0 33 0 44 | ## East-Liguria 0 0 50 45 | ## Inland-Sardinia 0 65 0 46 | ## North-Apulia 25 0 0 47 | ## Sicily 36 0 0 48 | ## South-Apulia 206 0 0 49 | ## Umbria 0 0 51 50 | ## West-Liguria 0 0 50 51 | 52 | 53 | ## Not at all the same... aargh 54 | { op <- par(ask=TRUE) 55 | for(n in names(oo)) { plot(oo[,n], olives[,n], main=n) } 56 | par(op) } 57 | 58 | ## But looking at subsets 59 | subset(olives.TW, Region == "Coast Sardinia")[, 1:8] 60 | subset(olives.CF, Area == "Coast-Sardinia")[, -(1:2)] 61 | 62 | ### See ./Stuetzle-stat593-S2003-olive.doc : 63 | olives.WS <- 64 | read.table("/u/maechler/R/Pkgs/diptest/stuff/Stuetzle-stat593-S2003-olive.tab", 65 | header=TRUE) 66 | 67 | ## together with code in ./jeremy-unimodality.R : 68 | all.equal(unname(olives.CF[,-2]), unname(olives.WS[,-2])) ## TRUE 69 | ## but clearly, also the last three *Variable* names are confused... [aargh] 70 | 71 | ## How can we *match* the two sets? Look at the [,2] variables 72 | with(olives.CF, table(Area, Region)) 73 | with(olives.WS, table(Class, Region)) 74 | 75 | ## -> manually : 76 | cl2area <- c(5,1,7,6, 4,2, 3,9,8) 77 | 78 | ## but can do this programmatically: 79 | (areas <- levels(olives.CF[,2])) 80 | c2a <- unique(cbind(olives.CF[,2], # $ Area 81 | olives.WS[,2]))# $ Class 82 | (c2a <- c2a[order(c2a[,2]) , ]) 83 | c2a <- c2a[,1] 84 | stopifnot(all(c2a == cl2area), 85 | all(olives.CF[,2] == areas[c2a[olives.WS[,2]]])) 86 | 87 | ## Now to work with Jeremy's code, assuming he used Stuetzle's version of the data: 88 | olive.area <- olives.WS $ Class 89 | olive.region <- olives.WS $ Region 90 | i.rest <- !(names(olives.WS) %in% c("Class", "Region")) 91 | olive <- data.matrix(olives.WS[, i.rest]) 92 | 93 | ## 94 | x.labs <- olive.area[olive.region==2] 95 | library(MASS) 96 | g <- lda(olive[olive.region==2,], x.labs) 97 | g.proj <- unclass(g)$scaling 98 | ## 99 | x <- olive[olive.region==2,] %*% g.proj 100 | plot.ucdf(x) 101 | plot.silverman(x) 102 | str(x.dip <- calcdip(x, plot.it=FALSE)) #-> . $dip = 0.149 103 | dips <- rep(0,100) 104 | for(i in 1:100) { 105 | x.boot <- unisample(x.dip$unicurve,length(x)) 106 | dips[i] <- calcdip(x.boot, plot.it=FALSE)$dip 107 | } 108 | (p.value <- sum(dips>x.dip$dip)/100) # 0 ( < 0.01 ) 109 | # < 0.01 110 | ## 111 | -------------------------------------------------------------------------------- /inst/NEWS.Rd: -------------------------------------------------------------------------------- 1 | % Check from R: 2 | % news(db = tools:::.build_news_db_from_package_NEWS_Rd("~/R/Pkgs/diptest/inst/NEWS.Rd")) 3 | \name{NEWS} 4 | \title{News for \R Package \pkg{diptest}} 5 | \encoding{UTF-8} 6 | 7 | \section{CHANGES in diptest VERSION 0.77-1 (2024-03-31)}{ 8 | \subsection{BUG FIXES}{ 9 | \itemize{ 10 | \item \code{diptest(x72000, *)} for \code{length(x) == 72000} now 11 | works correctly (with a \code{warning} about being asymptotic), 12 | thanks to William Davis, UC Berkeley. 13 | } 14 | } 15 | } 16 | 17 | \section{CHANGES in diptest VERSION 0.77-0 (2023-11-27)}{ 18 | \subsection{BUG FIXES}{ 19 | \itemize{ 20 | \item C level format: s/\%ld/\%d/ 21 | } 22 | } 23 | } 24 | 25 | \section{CHANGES in diptest VERSION 0.76-0 (2021-03-23)}{ 26 | 27 | \subsection{NEW FEATURES}{ 28 | \itemize{ 29 | \item add \file{README.Rd} mostly for github readers 30 | } 31 | } 32 | \subsection{BUG FIXES}{ 33 | \itemize{ 34 | \item Added Imports (to "base" packages where they were not checked previously) to \file{NAMESPACE}. 35 | \item Using \file{NEWS.Rd} file more. 36 | } 37 | } 38 | } 39 | 40 | \section{CHANGES in diptest VERSION 0.75-7 (2015-06-07)}{% CRAN release 41 | 42 | \subsection{NEW FEATURES}{ 43 | \itemize{ 44 | \item Started this \file{NEWS.Rd} file, to eventually replace the \file{ChangeLog} 45 | } 46 | } 47 | \subsection{BUG FIXES}{ 48 | \itemize{ 49 | \item . 50 | } 51 | } 52 | } 53 | 54 | \section{CHANGES in diptest VERSION 0.75-6 (2014-11-25)}{ 55 | \subsection{NEW FEATURES}{ 56 | \itemize{ 57 | \item . 58 | } 59 | } 60 | \subsection{BUG FIXES}{ 61 | \itemize{ 62 | \item . 63 | } 64 | } 65 | } 66 | 67 | \section{CHANGES in diptest VERSION 0.75-5 (2013-07-23)}{ 68 | \subsection{NEW FEATURES}{ 69 | \itemize{ 70 | \item add \file{NEWS.Rd} (albeit mostly empty) 71 | } 72 | } 73 | \subsection{BUG FIXES}{ 74 | \itemize{ 75 | \item \code{rdRDS()} wrapper corrrectly tests for R 2.13.0 76 | } 77 | } 78 | } 79 | 80 | 81 | %% this is the *latest* entry in ../ChangeLog 82 | \section{CHANGES in diptest VERSION 0.75-4 (2012-08-13)}{ 83 | \subsection{NEW FEATURES}{ 84 | \itemize{ 85 | \item Enable package for pre-R-2.13.x via \code{rdRDS()} wrapper 86 | 87 | \item \code{dip.test()} now also returns an \code{alternative} component, 88 | e.g. for printing. 89 | } 90 | } 91 | } 92 | 93 | \section{CHANGES in diptest VERSION 0.75-3 (2012-04-18)}{ 94 | \subsection{NEW FEATURES}{ 95 | \itemize{ 96 | \item . 97 | } 98 | } 99 | \subsection{BUG FIXES}{ 100 | \itemize{ 101 | \item . 102 | } 103 | } 104 | } 105 | 106 | 107 | \section{CHANGES in diptest VERSION 0.75-1 (2011-08-10)}{ 108 | \subsection{NEW FEATURES}{ 109 | \itemize{ 110 | \item . 111 | } 112 | } 113 | \subsection{BUG FIXES}{ 114 | \itemize{ 115 | \item . 116 | } 117 | } 118 | } 119 | 120 | 121 | \section{CHANGES in diptest VERSION 0.25-3 (2010-08-11)}{ 122 | \subsection{NEW FEATURES}{ 123 | \itemize{ 124 | \item First version of the \dQuote{"diptest issues"} vignette. 125 | } 126 | } 127 | \subsection{BUG FIXES}{ 128 | \itemize{ 129 | \item . 130 | } 131 | } 132 | } 133 | 134 | 135 | \section{CHANGES in diptest VERSION 0.25-2 (2009-02-09)}{ 136 | \subsection{NEW FEATURES}{ 137 | \itemize{ 138 | \item . 139 | } 140 | } 141 | \subsection{BUG FIXES}{ 142 | \itemize{ 143 | \item . 144 | } 145 | } 146 | } 147 | 148 | 149 | \section{CHANGES in diptest VERSION 0.25-1 (2004-08-12)}{ 150 | \subsection{NEW FEATURES}{ 151 | \itemize{ 152 | \item . 153 | } 154 | } 155 | \subsection{BUG FIXES}{ 156 | \itemize{ 157 | \item . 158 | } 159 | } 160 | } 161 | 162 | 163 | \section{CHANGES in diptest VERSION 0.25-0 (2004-02-13)}{ 164 | \subsection{NEW FEATURES}{ 165 | \itemize{ 166 | \item More output in the \R object, allows \code{debug} information. 167 | } 168 | } 169 | \subsection{BUG FIXES}{ 170 | \itemize{ 171 | \item Previously, the \code{dip()} had not been \dQuote{symmetric} 172 | with respect to \dQuote{mirroring} such as a sign flip in the 173 | data. Thanks to Yong Lu, who was able to track the bug to a 174 | misplaced \code{(} in the original Fortran code, the bug has been 175 | fixed on Oct.10, 2003. 176 | } 177 | } 178 | } 179 | 180 | \section{CHANGES in diptest VERSION 0.9-1 (2003-07-15)}{ 181 | \subsection{NEW FEATURES}{ 182 | \itemize{ 183 | \item First(?) CRAN release 184 | } 185 | } 186 | \subsection{BUG FIXES}{ 187 | \itemize{ 188 | \item on 1994-07-30, added code to prevent an infinite loop in 189 | rare cases, but e.g., for \code{dip(1:n)}. 190 | } 191 | } 192 | } 193 | 194 | -------------------------------------------------------------------------------- /tests/mechler-ex.Rout.save-32b: -------------------------------------------------------------------------------- 1 | 2 | R version 2.13.1 Patched (2011-08-09 r56694) 3 | Copyright (C) 2011 The R Foundation for Statistical Computing 4 | ISBN 3-900051-07-0 5 | Platform: i686-pc-linux-gnu (32-bit) 6 | 7 | R is free software and comes with ABSOLUTELY NO WARRANTY. 8 | You are welcome to redistribute it under certain conditions. 9 | Type 'license()' or 'licence()' for distribution details. 10 | 11 | R is a collaborative project with many contributors. 12 | Type 'contributors()' for more information and 13 | 'citation()' on how to cite R or R packages in publications. 14 | 15 | Type 'demo()' for some demos, 'help()' for on-line help, or 16 | 'help.start()' for an HTML browser interface to help. 17 | Type 'q()' to quit R. 18 | 19 | > library(diptest) 20 | > ## These are from 21 | > ## the 217-readme.doc file that explains the bug fixed by 22 | > ## Ferenc Mechler (fmechler@med.cornell.edu). [5/Sep/2002] 23 | > ## 24 | > ex1 <- c(0.0198, 0.0198, 0.1961, 0.2898, 0.3184, 0.3687, 25 | + 0.4336, 0.4987, 0.5661, 0.6530, 0.7476, 0.8555) 26 | > 27 | > ex2 <- c(0.0198, 0.1961, 0.2898, 0.3184, 0.3687, 0.4336, 28 | + 0.4987, 0.5661, 0.6530, 0.7476, 0.8555, 0.9912) 29 | > 30 | > ## Multiply them by 10000 here: 31 | > 32 | > (D1 <- dip(10000*ex1, full=TRUE, debug=2)) 33 | dip() in C: n = 12; starting with 2N*dip = 1. 34 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 12]; l_lcm/gcm = ( 6, 3) 35 | while(gcm[ix] != lcm[iv]) : 36 | L(3,2) --> ix = 2, iv = 3 37 | G(2,3) --> ix = 1, iv = 3 38 | --> ix = 1, iv = 4 39 | --> ix = 1, iv = 5 40 | --> ix = 1, iv = 6 41 | calculating dip .. (dip_l, dip_u) = (2, 1) -> new larger dip 2 (j_best = 2) 42 | 'dip': LOOP-BEGIN: 2n*D= 2 [low,high] = [ 4, 9]; l_lcm/gcm = ( 6, 2) 43 | while(gcm[ix] != lcm[iv]) : 44 | L(2,2) --> ix = 1, iv = 3 45 | L(2,3) --> ix = 1, iv = 4 46 | --> ix = 1, iv = 5 47 | --> ix = 1, iv = 6 48 | 49 | Call: 50 | dip(x = 10000 * ex1, full.result = TRUE, debug = 2) 51 | 52 | n = 12. Dip statistic, D_n = 0.08333333 = 2/(2n) 53 | Modal interval [xL, xU] = [x[4], x[9]] = [2898, 5661] 54 | GCM and LCM have 2 and 6 nodes inside [xL, xU], respectively. 55 | > str(D1, digits = 10, vec.len = 12) 56 | List of 15 57 | $ call : language dip(x = 10000 * ex1, full.result = TRUE, debug = 2) 58 | $ x : num [1:12] 198 198 1961 2898 3184 3687 4336 4987 5661 6530 7476 8555 59 | $ n : int 12 60 | $ dip : num 0.08333333333 61 | $ lo.hi : int [1:2] 4 9 62 | $ ifault : int 0 63 | $ gcm : int [1:2] 9 4 64 | $ lcm : int [1:6] 4 5 6 7 8 9 65 | $ mn : int [1:12] 1 1 1 1 4 4 4 4 4 4 4 4 66 | $ mj : int [1:12] 2 9 6 5 6 7 8 9 10 11 12 12 67 | $ min.is.0 : logi FALSE 68 | $ debug : int 2 69 | $ xl : num 2898 70 | $ xu : num 5661 71 | $ full.result: logi TRUE 72 | - attr(*, "class")= chr "dip" 73 | > 74 | > (D2 <- dip(10000*ex2, full=TRUE, debug=2)) 75 | dip() in C: n = 12; starting with 2N*dip = 1. 76 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 12]; l_lcm/gcm = ( 6, 4) 77 | while(gcm[ix] != lcm[iv]) : 78 | G(3,2) --> ix = 2, iv = 2 79 | G(2,2) --> ix = 1, iv = 2 80 | --> ix = 1, iv = 3 81 | --> ix = 1, iv = 4 82 | --> ix = 1, iv = 5 83 | --> ix = 1, iv = 6 84 | calculating dip .. (dip_l, dip_u) = (1, 1) 85 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 3, 8]; l_lcm/gcm = ( 6, 2) 86 | while(gcm[ix] != lcm[iv]) : 87 | L(2,2) --> ix = 1, iv = 3 88 | L(2,3) --> ix = 1, iv = 4 89 | --> ix = 1, iv = 5 90 | --> ix = 1, iv = 6 91 | calculating dip .. (dip_l, dip_u) = (0, 1) 92 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 3, 5]; l_lcm/gcm = ( 3, 2) 93 | while(gcm[ix] != lcm[iv]) : 94 | L(2,2) --> ix = 1, iv = 3 95 | calculating dip .. (dip_l, dip_u) = (0, 1) 96 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 3, 4]; l_lcm/gcm = ( 2, 2) 97 | ** (l_lcm,l_gcm) = (2,2) ==> d := -0 98 | calculating dip .. (dip_l, dip_u) = (0, 0) 99 | No improvement in low = 3 nor high = 4 --> END 100 | 101 | Call: 102 | dip(x = 10000 * ex2, full.result = TRUE, debug = 2) 103 | 104 | n = 12. Dip statistic, D_n = 0.04166667 = 1/(2n) 105 | Modal interval [xL, xU] = [x[3], x[4]] = [2898, 3184] 106 | GCM and LCM have 2 and 2 nodes inside [xL, xU], respectively. 107 | > str(D2, digits = 10, vec.len = 12) 108 | List of 15 109 | $ call : language dip(x = 10000 * ex2, full.result = TRUE, debug = 2) 110 | $ x : num [1:12] 198 1961 2898 3184 3687 4336 4987 5661 6530 7476 8555 9912 111 | $ n : int 12 112 | $ dip : num 0.04166666667 113 | $ lo.hi : int [1:2] 3 4 114 | $ ifault : int 0 115 | $ gcm : int [1:2] 4 3 116 | $ lcm : int [1:2] 3 4 117 | $ mn : int [1:12] 1 1 2 3 3 3 3 3 3 3 3 3 118 | $ mj : int [1:12] 8 5 4 5 6 7 8 9 10 11 12 12 119 | $ min.is.0 : logi FALSE 120 | $ debug : int 2 121 | $ xl : num 2898 122 | $ xu : num 3184 123 | $ full.result: logi TRUE 124 | - attr(*, "class")= chr "dip" 125 | > 126 | -------------------------------------------------------------------------------- /tests/mechler-ex.Rout.save-64b: -------------------------------------------------------------------------------- 1 | 2 | R version 2.13.1 Patched (2011-08-09 r56694) 3 | Copyright (C) 2011 The R Foundation for Statistical Computing 4 | ISBN 3-900051-07-0 5 | Platform: x86_64-unknown-linux-gnu (64-bit) 6 | 7 | R is free software and comes with ABSOLUTELY NO WARRANTY. 8 | You are welcome to redistribute it under certain conditions. 9 | Type 'license()' or 'licence()' for distribution details. 10 | 11 | R is a collaborative project with many contributors. 12 | Type 'contributors()' for more information and 13 | 'citation()' on how to cite R or R packages in publications. 14 | 15 | Type 'demo()' for some demos, 'help()' for on-line help, or 16 | 'help.start()' for an HTML browser interface to help. 17 | Type 'q()' to quit R. 18 | 19 | > library(diptest) 20 | > ## These are from 21 | > ## the 217-readme.doc file that explains the bug fixed by 22 | > ## Ferenc Mechler (fmechler@med.cornell.edu). [5/Sep/2002] 23 | > ## 24 | > ex1 <- c(0.0198, 0.0198, 0.1961, 0.2898, 0.3184, 0.3687, 25 | + 0.4336, 0.4987, 0.5661, 0.6530, 0.7476, 0.8555) 26 | > 27 | > ex2 <- c(0.0198, 0.1961, 0.2898, 0.3184, 0.3687, 0.4336, 28 | + 0.4987, 0.5661, 0.6530, 0.7476, 0.8555, 0.9912) 29 | > 30 | > ## Multiply them by 10000 here: 31 | > 32 | > (D1 <- dip(10000*ex1, full=TRUE, debug=2)) 33 | dip() in C: n = 12; starting with 2N*dip = 1. 34 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 12]; l_lcm/gcm = ( 6, 3) 35 | while(gcm[ix] != lcm[iv]) : 36 | L(3,2) --> ix = 2, iv = 3 37 | G(2,3) --> ix = 1, iv = 3 38 | --> ix = 1, iv = 4 39 | --> ix = 1, iv = 5 40 | --> ix = 1, iv = 6 41 | calculating dip .. (dip_l, dip_u) = (2, 1) -> new larger dip 2 (j_best = 2) 42 | 'dip': LOOP-BEGIN: 2n*D= 2 [low,high] = [ 4, 9]; l_lcm/gcm = ( 6, 2) 43 | while(gcm[ix] != lcm[iv]) : 44 | L(2,2) --> ix = 1, iv = 3 45 | L(2,3) --> ix = 1, iv = 4 46 | --> ix = 1, iv = 5 47 | --> ix = 1, iv = 6 48 | 49 | Call: 50 | dip(x = 10000 * ex1, full.result = TRUE, debug = 2) 51 | 52 | n = 12. Dip statistic, D_n = 0.08333333 = 2/(2n) 53 | Modal interval [xL, xU] = [x[4], x[9]] = [2898, 5661] 54 | GCM and LCM have 2 and 6 nodes inside [xL, xU], respectively. 55 | > str(D1, digits = 10, vec.len = 12) 56 | List of 15 57 | $ call : language dip(x = 10000 * ex1, full.result = TRUE, debug = 2) 58 | $ x : num [1:12] 198 198 1961 2898 3184 3687 4336 4987 5661 6530 7476 8555 59 | $ n : int 12 60 | $ dip : num 0.08333333333 61 | $ lo.hi : int [1:2] 4 9 62 | $ ifault : int 0 63 | $ gcm : int [1:2] 9 4 64 | $ lcm : int [1:6] 4 5 6 7 8 9 65 | $ mn : int [1:12] 1 1 1 1 4 4 4 4 4 4 4 4 66 | $ mj : int [1:12] 2 9 6 5 6 7 8 9 10 11 12 12 67 | $ min.is.0 : logi FALSE 68 | $ debug : int 2 69 | $ xl : num 2898 70 | $ xu : num 5661 71 | $ full.result: logi TRUE 72 | - attr(*, "class")= chr "dip" 73 | > 74 | > (D2 <- dip(10000*ex2, full=TRUE, debug=2)) 75 | dip() in C: n = 12; starting with 2N*dip = 1. 76 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 12]; l_lcm/gcm = ( 6, 4) 77 | while(gcm[ix] != lcm[iv]) : 78 | G(3,2) --> ix = 2, iv = 2 79 | G(2,2) --> ix = 1, iv = 2 80 | --> ix = 1, iv = 3 81 | --> ix = 1, iv = 4 82 | --> ix = 1, iv = 5 83 | --> ix = 1, iv = 6 84 | calculating dip .. (dip_l, dip_u) = (1, 1) 85 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 3, 8]; l_lcm/gcm = ( 6, 2) 86 | while(gcm[ix] != lcm[iv]) : 87 | L(2,2) --> ix = 1, iv = 3 88 | L(2,3) --> ix = 1, iv = 4 89 | --> ix = 1, iv = 5 90 | --> ix = 1, iv = 6 91 | calculating dip .. (dip_l, dip_u) = (0, 1) 92 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 3, 5]; l_lcm/gcm = ( 3, 2) 93 | while(gcm[ix] != lcm[iv]) : 94 | L(2,2) --> ix = 1, iv = 3 95 | calculating dip .. (dip_l, dip_u) = (0, 1) 96 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 3, 4]; l_lcm/gcm = ( 2, 2) 97 | ** (l_lcm,l_gcm) = (2,2) ==> d := 1 98 | calculating dip .. (dip_l, dip_u) = (0, 0) 99 | No improvement in low = 3 nor high = 4 --> END 100 | 101 | Call: 102 | dip(x = 10000 * ex2, full.result = TRUE, debug = 2) 103 | 104 | n = 12. Dip statistic, D_n = 0.04166667 = 1/(2n) 105 | Modal interval [xL, xU] = [x[3], x[4]] = [2898, 3184] 106 | GCM and LCM have 2 and 2 nodes inside [xL, xU], respectively. 107 | > str(D2, digits = 10, vec.len = 12) 108 | List of 15 109 | $ call : language dip(x = 10000 * ex2, full.result = TRUE, debug = 2) 110 | $ x : num [1:12] 198 1961 2898 3184 3687 4336 4987 5661 6530 7476 8555 9912 111 | $ n : int 12 112 | $ dip : num 0.04166666667 113 | $ lo.hi : int [1:2] 3 4 114 | $ ifault : int 0 115 | $ gcm : int [1:2] 4 3 116 | $ lcm : int [1:2] 3 4 117 | $ mn : int [1:12] 1 1 2 3 3 3 3 3 3 3 3 3 118 | $ mj : int [1:12] 8 5 4 5 6 7 8 9 10 11 12 12 119 | $ min.is.0 : logi FALSE 120 | $ debug : int 2 121 | $ xl : num 2898 122 | $ xu : num 3184 123 | $ full.result: logi TRUE 124 | - attr(*, "class")= chr "dip" 125 | > 126 | -------------------------------------------------------------------------------- /R/dip.R: -------------------------------------------------------------------------------- 1 | ### S-interface to Hartigan's algorithm for "The dip test for unimodality" 2 | ### 3 | ### Beginning: Dario Ringach 4 | ### Rest: Martin Maechler 5 | 6 | dip <- function(x, full.result = FALSE, min.is.0 = FALSE, debug = FALSE) 7 | { 8 | allRes <- (!is.logical(rFull <- full.result)) 9 | if(allRes) { 10 | if(full.result %in% c("all")) 11 | rFull <- TRUE 12 | else stop(gettextf("'full.result' = \"%s\"' is not valid", full.result)) 13 | } 14 | if(rFull) cl <- match.call() 15 | 16 | if(is.unsorted(x)) 17 | x <- sort(x, method="quick") 18 | n <- as.integer(length(x)) 19 | r <- .C(diptst, 20 | x = as.double(x), 21 | n = n, 22 | dip = double(1), 23 | lo.hi = integer(4), 24 | ifault= integer(1), 25 | gcm = integer(n), 26 | lcm = integer(n), 27 | mn = integer(n), 28 | mj = integer(n), 29 | min.is.0 = as.logical(min.is.0), 30 | debug = as.integer(debug)# FALSE/TRUE or 2, 3, ... 31 | )[if(rFull) TRUE else "dip"] 32 | if(rFull) { 33 | l.GL <- r$lo.hi[3:4] 34 | length(r$gcm) <- l.GL[1] 35 | length(r$lcm) <- l.GL[2] 36 | length(r$lo.hi) <- 2L 37 | u <- x[r$lo.hi] 38 | structure(class = "dip", 39 | c(list(call = cl), r, 40 | list(xl = u[1], xu = u[2], full.result=full.result), 41 | if(allRes) getCM(r$mn, r$mj, n))) 42 | } 43 | else r[[1]] 44 | } 45 | 46 | getCM <- function(mn, mj, n = length(mn)) { 47 | stopifnot(length(mn) <= n, length(mj) <= n) # currently '=='... 48 | ## First recover "the full GCM / LCM" - by repeating what happened in C 49 | ## in the first "loop" : 50 | low <- 1L ; high <- n 51 | gcm <- lcm <- integer(n) # pre-allocate! {maybe smaller ?} 52 | 53 | ## Collect the change points for the GCM from HIGH to LOW. */ 54 | gcm[i <- 1L] <- high 55 | while(gcm[i] > low) 56 | gcm[(i <- i+1L)] <- mn[gcm[i]] 57 | length(gcm) <- i 58 | 59 | ## Collect the change points for the LCM from LOW to HIGH. */ 60 | lcm[i <- 1L] <- low 61 | while(lcm[i] < high) 62 | lcm[(i <- i+1L)] <- mj[lcm[i]] 63 | length(lcm) <- i 64 | list(GCM = gcm, LCM = lcm) 65 | } 66 | 67 | print.dip <- function(x, digits = getOption("digits"), ...) 68 | { 69 | stopifnot(is.integer(n <- x$n), is.numeric(D <- x$dip), 70 | length(lh <- x$lo.hi) == 2) 71 | cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), 72 | "\n\n", sep = "") 73 | xLU.c <- sapply(x$x[lh], formatC, digits=digits, width=1) 74 | cat("n = ", n,". Dip statistic, D_n = ", 75 | format(D, digits=digits)," = ", 76 | format(2*n* D, digits=digits),"/(2n)\n", 77 | sprintf(" Modal interval [xL, xU] = [x[%d], x[%d]] = [%s, %s]\n", 78 | lh[1], lh[2], xLU.c[1], xLU.c[2]), 79 | sprintf(" GCM and LCM have %d and %d nodes inside [xL, xU], respectively", 80 | ## 3 5 7 9 1 3 5 7 81 | length(x$gcm), length(x$lcm)), 82 | if(x$full.result == "all") 83 | sprintf(", and\n%17s %d and %d nodes in [x_1, x_n].\n", "", 84 | length(x$GCM), length(x$LCM)) else ".\n", 85 | sep="") 86 | invisible(x) 87 | } 88 | 89 | aLine <- function(r.dip, lType = c("gcm","lcm","GCM","LCM"), 90 | type = "b", col="red3", lwd=1.5, ...) 91 | { 92 | lType <- match.arg(lType) 93 | stopifnot(is.numeric(x <- r.dip$x), 94 | length(r.dip$n) == 1, r.dip$n == round(r.dip$n), 95 | is.integer(i <- r.dip[[lType]]) # 'gcm' or 'lcm' or component 96 | ) 97 | e <- if(lType %in% c("gcm","GCM")) .01*min(diff(unique(x))) else 0 98 | i <- i[i != 0] 99 | lines(x[i], ecdf(x)(x[i] - e), 100 | type=type, col=col, lwd=lwd, ...) 101 | } 102 | 103 | plot.dip <- function(x, do.points=(n < 20), ## <- plot.stepfun() 104 | colG="red3", colL="blue3", colM="forest green", 105 | col.points=par("col"), col.hor=col.points, ## <- plot.stepfun(): 106 | doModal=TRUE, doLegend=TRUE, ...) 107 | { 108 | stopifnot(is.integer(n <- x$n), is.numeric(D <- x$dip), 109 | length(lh <- x$lo.hi) == 2) 110 | Fn <- ecdf(x$x) 111 | ## and now manipulate the call such that it's plotted nicely 112 | cl <- x$call[1:2] 113 | cl[[1]] <- as.name("ecdf") ; names(cl)[2] <- "" 114 | attr(Fn, "call") <- cl 115 | chD <- formatC(D, digits=pmax(3, getOption("digits")-2)) 116 | tit <- bquote("Dip" ~~ {D[n] == D[.(n)]} == .(chD)) 117 | plot(Fn, do.points=do.points, col.points=col.points, col.hor=col.hor, 118 | verticals=TRUE, col.vert = "sky blue", lwd=2, ...) 119 | title(tit, adj = 0, line = 1.25) 120 | aLine(x, "gcm", col=colG) 121 | aLine(x, "lcm", col=colL) 122 | if(doCM.2 <- (x$full.result == "all")) { 123 | aLine(x, "GCM", col=colG, lty=5) 124 | aLine(x, "LCM", col=colL, lty=5) 125 | } 126 | if(doModal) { 127 | x12 <- x$x[lh] 128 | abline(v= x12, col = colM, lty = 2) 129 | op <- par(mgp = c(3, 1/16, 0))# should not need on.exit(par(op)) here .. 130 | axis(3, at=x12, labels = expression(x[L], x[U]), 131 | tick=FALSE, col.axis = colM) 132 | par(op) 133 | } 134 | if(doLegend) { 135 | txt <- c("greatest convex minorant GCM", 136 | ### make sure have *no* [TAB] in next string ! 137 | "least concave majorant LCM") 138 | t1 <- paste(txt," in [xL, xU]") 139 | legend("topleft", bty = "n", 140 | if(doCM.2) c(t1, txt) else t1, 141 | lwd=1.5, col = c(colG, colL), lty= if(doCM.2) c(1,1,5,5) else 1) 142 | } 143 | invisible() 144 | } 145 | -------------------------------------------------------------------------------- /stuff/pre-bugfix-2003-11/dip.S: -------------------------------------------------------------------------------- 1 | ### S-interface to Hartigan's algorithm for "The dip test for unimodality" 2 | ### 3 | ### Beginning: Dario Ringach 4 | ### Rest: Martin Maechler 5 | 6 | ###--- 1st page of this file should be source(.)able !! ----------- 7 | 8 | if(!interactive()) { 9 | options(echo = T) #-- when being run as batch [FAILS for source() !!] 10 | print(date()) 11 | } 12 | 13 | 14 | DIP.dir <- getenv("PWD") 15 | 16 | hartigan.data <- 17 | c(30,33,35,36,37,37,39,39,39,39,39,40,40,40,40,41,42,43,43,43,44,44,45,45,46, 18 | 46,47,47,48,48,48,49,50,50,51,52,52,53,53,53,53,53,54,54,57,57,59,60,60,60, 19 | 61,61,61,61,62,62,62,62,63,66,70,72,72) 20 | 21 | dip <- function(x, full.result = FALSE, debug = F) 22 | { 23 | ## Purpose: Compute the "Dip test for unimodality" (statistic) 24 | ## ------------------------------------------------------------------------- 25 | ## Arguments: x: the Data, full.result: return also 'xl', 'xu' (modal interv.) 26 | ## ------------------------------------------------------------------------- 27 | ## Author: Martin Maechler , Jul 94 28 | ## from 1st version of Dario Ringach 29 | 30 | if(!is.loaded(symbol.C("diptst"))) 31 | dyn.load(paste(DIP.dir,"dip.o", sep="/")) 32 | 33 | n <- length(x) 34 | 35 | ii <- if(full.result) 1:11 else "dip" 36 | simplify <- function(lis) if(length(lis)==1) as.single(lis[[1]]) else lis 37 | simplify(.C("diptst", 38 | x = as.single(sort(x)), 39 | n = as.integer(n), 40 | dip = single(1), 41 | xl = single(1), 42 | xu = single(1), 43 | ifault= integer(1), 44 | gcm = integer(n), 45 | lcm = integer(n), 46 | mn = integer(n), 47 | mj = integer(n), 48 | debug= as.integer(debug))[ii]) 49 | ##- if(z$ifault!=0) #-- something not ok, but this is IMPOSSIBLE here 50 | ##- stop(paste("Problem -- C 'message' : ifault = ", z$ifault)) 51 | } 52 | 53 | dip(hstart)## 0.03998769 54 | dip(ship) ## 0.02328392 55 | dip(iris) ## 0.02874999 56 | dip(hartigan.data) ## Should give 0.05952381 57 | str(dip.hh <- dip(hartigan.data, full = T)) 58 | 59 | ## NOTA BENE: dip(1:10) gives INFINITE LOOP (error in Hartigan's Algorithm!) 60 | 61 | dip.1.10_rep(0,100) 62 | unix.time(for(i in 1:100) dip.1.10[i] <- dip(jitter(1:10))) 63 | ##--> 2.3 CPU sec. on Sparstation 10 (ingrid) 64 | summary(dip.1.10) 65 | ##- Min. 1st Qu. Median Mean 3rd Qu. Max. 66 | ##- 0.05 0.05757 0.06016 0.05979 0.06237 0.06686 67 | ##- 0.05 0.058 0.06027 0.06011 0.06231 0.06696 68 | ##-- ~~~~ EXAKT (9 times out of 200) 69 | 70 | 71 | 72 | cdf <- function(x, X=x) 73 | { 74 | ## Purpose: Empirical distribution function F_{X}(x) 75 | ## ------------------------------------------------------------------------- 76 | ## Arguments: x: vector of ARGuments; X : data 77 | ## ------------------------------------------------------------------------- 78 | ## Author: Martin Maechler, Date: 28 Jul 94, 18:03 79 | apply(outer(X,x,"<="),2,sum) / length(X) 80 | } 81 | 82 | p.cdf <- function(X, type='b',...) 83 | { 84 | ## Purpose: PLOT empirical distribution function 85 | ## ------------------------------------------------------------------------- 86 | ## Arguments: 87 | ## ------------------------------------------------------------------------- 88 | ## Author: Martin Maechler, Date: 30 Jul 94, 11:41 89 | X _ sort(X) 90 | plot(X, cdf(X), type=type, ...) 91 | } 92 | 93 | ## NOTE: For plotting, Martin Maechler's plot.step(X) is MUCH better ! 94 | 95 | 96 | dip0 <- function(x, debug=F) 97 | { 98 | ## Load in ANY case ! if(!is.loaded(symbol.C("diptst"))) 99 | dyn.load(paste(DIP.dir,"dip0.o", sep="/")) 100 | n <- length(x) 101 | .C("diptst", 102 | as.single(sort(x)), 103 | as.integer(n), 104 | ans = single(1), 105 | as.single(0), 106 | as.single(0), 107 | as.integer(0), 108 | integer(n), 109 | integer(n), 110 | integer(n), 111 | integer(n), 112 | as.integer(debug)) $ans 113 | } 114 | dip0(hartigan.data) ##-> [1] 0.05952381 115 | 116 | ## Load my current "diptst" C-symbol ! 117 | dyn.load(paste(DIP.dir,"dip.o", sep="/")) 118 | 119 | 120 | dipF <- function(x) 121 | { 122 | if(!is.loaded(symbol.For("diptst"))) 123 | dyn.load(paste(DIP.dir,"dipF.o", sep="/")) 124 | n <- length(x) 125 | .Fortran("diptst", 126 | as.single(sort(x)), 127 | as.integer(n), 128 | ans = single(1), 129 | as.single(0), 130 | as.single(0), 131 | as.integer(0), 132 | integer(n), 133 | integer(n), 134 | integer(n), 135 | integer(n)) $ans 136 | } 137 | dipF(hartigan.data) ##-> [1] 0.05952381 138 | 139 | ### Test if dip.c is still okay, after all my hacking at the code 140 | ### (dipF.f which is used by dipF(..) is ORIGINAL !) 141 | 142 | for(i in 1:100){x_runif(50); if(dip(x) != dipF(x)) { xx<<-x; stop("DIFFERENT")}} 143 | for(i in 1:100) { 144 | x_c(rnorm(100), 5+ rnorm(20)) 145 | if(dip(x) != dipF(x)) { xx<<-x; stop("DIFFERENT") } } 146 | 147 | 148 | ##-- NOTE: for small 'n' (n=4,5, even 8, 10 (rarely!)) sometimes INFINITE loop!! 149 | 150 | ###===> FIXED in big.c !! 151 | 152 | x4.ok <- x4.bad <- NULL 153 | xx4_round(runif(4),3); cat(format(dipF(xx4))); x4.ok_cbind(x4.ok,xx4) 154 | ##-- C-c (kill) the above line if running long; then do 155 | x4.bad <- cbind(x4.bad, xx4, deparse.level=0) 156 | 157 | ##-- After a while (MANUALLY !) 158 | dim(x4.bad) # 4 10 159 | dim(x4.ok) # 4 34 160 | 161 | 162 | ##-- n=10 even can be bad: 1st example: 1:10 163 | ## 2nd example: 164 | n_10; set.seed(99);for(i in 1:3635) x _ runif(n) 165 | x10.bad _ round(sort(x),4) 166 | dipF(round(x10.bad,3)) #-- .05 (minimal value) 167 | plot.step(x10.bad); subtit(vcat(x10.bad,sep=", ")) 168 | 169 | u.dev.default() 170 | nb_ncol(x4.bad); mult.fig(nb); for(i in 1:nb) plot.step(x4.bad[,i],cad.lag=F,main="") 171 | u.dev.default() 172 | no_ncol(x4.ok); mult.fig(no); for(i in 1:no) plot.step(x4.ok[,i],cad.lag=F,main="") 173 | -------------------------------------------------------------------------------- /man/dip.Rd: -------------------------------------------------------------------------------- 1 | \name{dip} 2 | \alias{dip} 3 | \title{Compute Hartigans' Dip Test Statistic for Unimodality} 4 | \concept{multimodality} 5 | \description{ 6 | Computes Hartigans' dip test statistic for testing unimodality, 7 | and additionally the modal interval. 8 | } 9 | \usage{ 10 | dip(x, full.result = FALSE, min.is.0 = FALSE, debug = FALSE) 11 | } 12 | \arguments{ 13 | \item{x}{numeric; the data.} 14 | \item{full.result}{logical or string; \code{dip(., full.result=TRUE)} returns the full result 15 | list; if \code{"all"} it additionally uses the \code{mn} and 16 | \code{mj} components to compute the initial GCM and LCM, see below.} 17 | \item{min.is.0}{logical indicating if the \bold{min}imal value of the 18 | dip statistic \eqn{D_n}{Dn} can be zero or not. Arguably should be 19 | set to \code{TRUE} for internal consistency reasons, but is false by 20 | default both for continuity and backwards compatibility reasons, see 21 | the examples below.} 22 | % backcompatibility both with earlier 23 | % versions of the \pkg{diptest} package, and with Hartigan's original 24 | % implementation.} 25 | \item{debug}{logical; if true, some tracing information is printed 26 | (from the C routine).} 27 | } 28 | \value{ 29 | depending on \code{full.result} either a number, the dip statistic, or 30 | an object of class \code{"dip"} which is a \code{\link{list}} with components 31 | \item{x}{the sorted \code{\link{unname}()}d data.} 32 | \item{n}{\code{length(x)}.} 33 | \item{dip}{the dip statistic} 34 | \item{lo.hi}{indices into \code{x} for lower and higher end of modal interval} 35 | \item{xl, xu}{lower and upper end of modal interval} 36 | \item{gcm, lcm}{(last used) indices for \bold{g}reatest \bold{c}onvex 37 | \bold{m}inorant and the \bold{l}east \bold{c}oncave \bold{m}ajorant.} 38 | \item{mn, mj}{index vectors of length \code{n} for the GC minorant and 39 | the LC majorant respectively.} 40 | 41 | For \dQuote{full} results of class \code{"dip"}, there are 42 | \code{\link{print}} and \code{\link{plot}} methods, the latter with 43 | its own \link[=plot.dip]{manual page}. 44 | } 45 | \note{ 46 | For \eqn{n \le 3}{n <= 3} where \code{n <- length(x)}, the dip 47 | statistic \eqn{D_n}{Dn} is always the same minimum value, 48 | \eqn{1/(2n)}, i.e., there's no possible dip test. 49 | Note that up to May 2011, from Hartigan's original Fortran code, \code{Dn} 50 | was set to zero, when all \code{x} values were identical. However, 51 | this entailed discontinuous behavior, where for arbitrarily close 52 | data \eqn{\tilde x}{x~}, \eqn{D_n(\tilde x) = \frac 1{2n}}{Dn(x~) = 1/(2n)}. 53 | 54 | Yong Lu \email{lyongu+@cs.cmu.edu} found in Oct 2003 that the code 55 | was not giving symmetric results for mirrored data (and was giving 56 | results of almost 1, and then found the reason, a misplaced \samp{")"} 57 | in the original Fortran code. This bug has been corrected for diptest 58 | version 0.25-0 (Feb 13, 2004). 59 | 60 | Nick Cox (Durham Univ.) said (on March 20, 2008 on the Stata-list):\cr 61 | As it comes from a bimodal husband-wife collaboration, the name 62 | perhaps should be \emph{\dQuote{Hartigan-Hartigan dip test}}, but that 63 | does not seem to have caught on. Some of my less statistical 64 | colleagues would sniff out the hegemony of patriarchy there, although 65 | which Hartigan is being overlooked is not clear. 66 | 67 | Martin Maechler, as a Swiss, and politician, would say:\cr 68 | Let's find a compromise, and call it \emph{\dQuote{Hartigans' dip test}}, 69 | so we only have to adapt orthography (:-). 70 | } 71 | \references{ 72 | P. M. Hartigan (1985) 73 | Computation of the Dip Statistic to Test for Unimodality; 74 | \emph{Applied Statistics (JRSS C)} \bold{34}, 320--325.\cr 75 | Corresponding (buggy!) Fortran code of \sQuote{AS 217} available from Statlib, 76 | \url{https://lib.stat.cmu.edu/apstat/217} 77 | 78 | J. A. Hartigan and P. M. Hartigan (1985) 79 | The Dip Test of Unimodality; 80 | \emph{Annals of Statistics} \bold{13}, 70--84. 81 | } 82 | \author{Martin Maechler \email{maechler@stat.math.ethz.ch}, 1994, 83 | based on S (S-PLUS) and C code donated from Dario Ringach 84 | \email{dario@wotan.cns.nyu.edu} who had applied \command{f2c} on the 85 | original Fortran code available from Statlib. 86 | 87 | In Aug.1993, recreated and improved Hartigans' "P-value" table, which 88 | later became \code{\link{qDiptab}}. 89 | } 90 | \seealso{ 91 | \code{\link{dip.test}} to compute the dip \emph{and} perform the unimodality test, 92 | based on P-values, interpolated from \code{\link{qDiptab}}; 93 | \code{\link{isoreg}} for isotonic regression. 94 | } 95 | \examples{ 96 | data(statfaculty) 97 | plot(density(statfaculty)) 98 | rug(statfaculty, col="midnight blue"); abline(h=0, col="gray") 99 | dip(statfaculty) 100 | (dS <- dip(statfaculty, full = TRUE, debug = TRUE)) 101 | plot(dS) 102 | ## even more output -- + plot showing "global" GCM/LCM: 103 | (dS2 <- dip(statfaculty, full = "all", debug = 3)) 104 | plot(dS2) 105 | 106 | data(faithful) 107 | fE <- faithful$eruptions 108 | plot(density(fE)) 109 | rug(fE, col="midnight blue"); abline(h=0, col="gray") 110 | dip(fE, debug = 2) ## showing internal work 111 | (dE <- dip(fE, full = TRUE)) ## note the print method 112 | plot(dE, do.points=FALSE) 113 | 114 | data(precip) 115 | plot(density(precip)) 116 | rug(precip, col="midnight blue"); abline(h=0, col="gray") 117 | str(dip(precip, full = TRUE, debug = TRUE)) 118 | 119 | ##----------------- The 'min.is.0' option : --------------------- 120 | 121 | ##' dip(.) continuity and 'min.is.0' exploration: 122 | dd <- function(x, debug=FALSE) { 123 | x_ <- x ; x_[1] <- 0.9999999999 * x[1] 124 | rbind(dip(x , debug=debug), 125 | dip(x_, debug=debug), 126 | dip(x , min.is.0=TRUE, debug=debug), 127 | dip(x_, min.is.0=TRUE, debug=debug), deparse.level=2) 128 | } 129 | 130 | dd( rep(1, 8) ) # the 3rd one differs ==> min.is.0=TRUE is *dis*continuous 131 | dd( 1:7 ) # ditto 132 | 133 | dd( 1:7, debug=TRUE) 134 | ## border-line case .. 135 | dd( 1:2, debug=TRUE) 136 | 137 | ## Demonstrate that 'min.is.0 = TRUE' does not change the typical result: 138 | B.sim <- 1000 # or larger 139 | D5 <- {set.seed(1); replicate(B.sim, dip(runif(5)))} 140 | D5. <- {set.seed(1); replicate(B.sim, dip(runif(5), min.is.0=TRUE))} 141 | stopifnot(identical(D5, D5.), all.equal(min(D5), 1/(2*5))) 142 | hist(D5, 64); rug(D5) 143 | 144 | D8 <- {set.seed(7); replicate(B.sim, dip(runif(8)))} 145 | D8. <- {set.seed(7); replicate(B.sim, dip(runif(8), min.is.0=TRUE))} 146 | stopifnot(identical(D8, D8.)) 147 | } 148 | \keyword{htest} 149 | \keyword{distribution} 150 | -------------------------------------------------------------------------------- /stuff/pre-bugfix-2003-11/dipF-statlib_2005-08-04.f: -------------------------------------------------------------------------------- 1 | SUBROUTINE DIPTST1(X,N,DIP,XL,XU,IFAULT,GCM,LCM,MN,MJ,DDX,DDXSGN) 2 | C 3 | C ALGORITHM AS 217 APPL. STATIST. (1985) VOL.34, NO.3 4 | C 5 | C Does the dip calculation for an ordered vector X using the 6 | C greatest convex minorant and the least concave majorant, skipping 7 | C through the data using the change points of these distributions. 8 | C It returns the dip statistic 'DIP' and the modal interval 9 | C (XL, XU). 10 | C 11 | C MODIFICATIONS SEP 2 2002 BY F. MECHLER TO FIX PROBLEMS WITH 12 | C UNIMODAL (INCLUDING MONOTONIC) INPUT 13 | C 14 | REAL X(N) 15 | INTEGER MN(N), MJ(N), LCM(N), GCM(N), HIGH 16 | REAL ZERO, HALF, ONE 17 | C NEXT TWO LINES ARE ADDED 18 | REAL DDX(N) 19 | INTEGER DDXSGN(N), POSK, NEGK 20 | DATA ZERO/0.0/, HALF/0.5/, ONE/1.0/ 21 | C 22 | IFAULT = 1 23 | IF (N .LE. 0) RETURN 24 | IFAULT = 0 25 | C 26 | C Check if N = 1 27 | C 28 | IF (N .EQ. 1) GO TO 4 29 | C 30 | C Check that X is sorted 31 | C 32 | IFAULT = 2 33 | DO 3 K = 2, N 34 | IF (X(K) .LT. X(K-1)) RETURN 35 | 3 CONTINUE 36 | IFAULT = 0 37 | C 38 | C Check for all values of X identical, 39 | C and for 1 < N < 4. 40 | C 41 | IF (X(N) .GT. X(1) .AND. N .GE. 4) GO TO 505 42 | 4 XL = X(1) 43 | XU = X(N) 44 | DIP = ZERO 45 | RETURN 46 | C The code amendment below is intended to be inseted above the line marked "5" in the original FORTRAN code 47 | C The amendment checks the condition whether the input X is perfectly unimodal 48 | C Hartigan's original DIPTST algorithm did not check for this condition 49 | C and DIPTST runs into an infinite cycle for a unimodal input 50 | C The condition that the input is unimodal is equivalent to having 51 | C at most 1 sign change in the second derivative of the input p.d.f. 52 | C In MATLAB syntax, we check the flips in the function xsign=-sign(diff(1./diff(x)))=-sign(diff(diff(x))); 53 | C with DDXSGN=xsign in the fortran code below 54 | 505 NEGK=0 55 | POSK=0 56 | DO 104 K = 3,N 57 | DDX(K) = X(K)+X(K-2)-2*X(K-1) 58 | IF (DDX(K) .LT. 0) DDXSGN(K) = 1 59 | IF (DDX(K) .EQ. 0) DDXSGN(K) = 0 60 | IF (DDX(K) .GT. 0) DDXSGN(K) = -1 61 | IF (DDXSGN(K) .GT. 0) POSK = K 62 | IF ((DDXSGN(K) .LT. 0) .AND. (NEGK .EQ. 0)) NEGK = K 63 | 104 CONTINUE 64 | 65 | C The condition check below examines whether the greatest position with a positive second derivative 66 | C is smaller than the smallest position with a negative second derivative 67 | C The boolean check gets it right even if 68 | C the unimodal p.d.f. has its mode in the very first or last point of the input 69 | 70 | IF ((POSK .GT. NEGK) .AND. (NEGK .GT. 0)) GOTO 5 71 | XL=X(1) 72 | XU=X(N) 73 | DIP=0 74 | IFAULT=5 75 | RETURN 76 | C 77 | C LOW contains the index of the current estimate of the lower end 78 | C of the modal interval, HIGH contains the index for the upper end. 79 | C 80 | 5 FN = FLOAT(N) 81 | LOW = 1 82 | HIGH = N 83 | DIP = ONE / FN 84 | XL = X(LOW) 85 | XU = X(HIGH) 86 | C 87 | C Establish the indices over which combination is necessary for the 88 | C convex minorant fit. 89 | C 90 | MN(1) = 1 91 | DO 28 J = 2, N 92 | MN(J) = J - 1 93 | 25 MNJ = MN(J) 94 | MNMNJ = MN(MNJ) 95 | A = FLOAT(MNJ - MNMNJ) 96 | B = FLOAT(J - MNJ) 97 | IF (MNJ .EQ. 1 .OR. (X(J) - X(MNJ))*A .LT. (X(MNJ) - X(MNMNJ)) 98 | + *B) GO TO 28 99 | MN(J) = MNMNJ 100 | GO TO 25 101 | 28 CONTINUE 102 | C 103 | C Establish the indices over which combination is necessary for the 104 | C concave majorant fit. 105 | C 106 | MJ(N) = N 107 | NA = N - 1 108 | DO 34 JK = 1, NA 109 | K = N - JK 110 | MJ(K) = K + 1 111 | 32 MJK = MJ(K) 112 | MJMJK = MJ(MJK) 113 | A = FLOAT(MJK - MJMJK) 114 | B = FLOAT(K - MJK) 115 | IF (MJK .EQ. N .OR. (X(K) - X(MJK))*A .LT. (X(MJK) - X(MJMJK)) 116 | + *B) GO TO 34 117 | MJ(K) = MJMJK 118 | GO TO 32 119 | 34 CONTINUE 120 | C 121 | C Start the cycling. 122 | C Collect the change points for the GCM from HIGH to LOW. 123 | C 124 | 40 IC = 1 125 | GCM(1) = HIGH 126 | 42 IGCM1 = GCM(IC) 127 | IC = IC + 1 128 | GCM(IC) = MN(IGCM1) 129 | IF (GCM(IC) .GT. LOW) GO TO 42 130 | ICX = IC 131 | C 132 | C Collect the change points for the LCM from LOW to HIGH. 133 | C 134 | IC = 1 135 | LCM(1) = LOW 136 | 44 LCM1 = LCM(IC) 137 | IC = IC + 1 138 | LCM(IC) = MJ(LCM1) 139 | IF (LCM(IC) .LT. HIGH) GO TO 44 140 | ICV = IC 141 | C 142 | C ICX, IX, IG are counters for the convex minorant, 143 | C ICV, IV, IH are counters for the concave majorant. 144 | C 145 | IG = ICX 146 | IH = ICV 147 | C 148 | C Find the largest distance greater than 'DIP' between the GCM and 149 | C the LCM from LOW to HIGH. 150 | C 151 | IX = ICX - 1 152 | IV = 2 153 | D = ZERO 154 | IF (ICX .NE. 2 .OR. ICV .NE. 2) GO TO 50 155 | D = ONE / FN 156 | GO TO 65 157 | 50 IGCMX = GCM(IX) 158 | LCMIV = LCM(IV) 159 | IF (IGCMX .GT. LCMIV) GO TO 55 160 | C 161 | C If the next point of either the GCM or LCM is from the LCM, 162 | C calculate the distance here. 163 | C 164 | LCMIV1 = LCM(IV - 1) 165 | A = FLOAT(LCMIV - LCMIV1) 166 | B = FLOAT(IGCMX - LCMIV1 - 1) 167 | DX = (X(IGCMX) - X(LCMIV1))*A / (FN*(X(LCMIV) - X(LCMIV1))) 168 | + - B / FN 169 | IX = IX - 1 170 | IF (DX .LT. D) GO TO 60 171 | D = DX 172 | IG = IX + 1 173 | IH = IV 174 | GO TO 60 175 | C 176 | C If the next point of either the GCM or LCM is from the GCM, 177 | C calculate the distance here. 178 | C 179 | 55 LCMIV = LCM(IV) 180 | IGCM = GCM(IX) 181 | IGCM1 = GCM(IX + 1) 182 | A = FLOAT(LCMIV - IGCM1 + 1) 183 | B = FLOAT(IGCM - IGCM1) 184 | DX = A / FN - ((X(LCMIV) - X(IGCM1))*B) / (FN * (X(IGCM) 185 | + - X(IGCM1))) 186 | IV = IV + 1 187 | IF (DX .LT. D) GO TO 60 188 | D = DX 189 | IG = IX + 1 190 | IH = IV - 1 191 | 60 IF (IX .LT. 1) IX = 1 192 | IF (IV .GT. ICV) IV = ICV 193 | IF (GCM(IX) .NE. LCM(IV)) GO TO 50 194 | 65 IF (D .LT. DIP) GO TO 100 195 | C 196 | C Calculate the DIPs for the current LOW and HIGH. 197 | C 198 | C The DIP for the convex minorant. 199 | C 200 | DL = ZERO 201 | IF (IG .EQ. ICX) GO TO 80 202 | ICXA = ICX - 1 203 | DO 76 J = IG, ICXA 204 | TEMP = ONE / FN 205 | JB = GCM(J + 1) 206 | JE = GCM(J) 207 | IF (JE - JB .LE. 1) GO TO 74 208 | IF (X(JE) .EQ. X(JB)) GO TO 74 209 | A = FLOAT(JE - JB) 210 | CONST = A / (FN * (X(JE) - X(JB))) 211 | DO 72 JR = JB, JE 212 | B = FLOAT(JR - JB + 1) 213 | T = B / FN - (X(JR) - X(JB))*CONST 214 | IF (T .GT. TEMP) TEMP = T 215 | 72 CONTINUE 216 | 74 IF (DL .LT. TEMP) DL = TEMP 217 | 76 CONTINUE 218 | C 219 | C The DIP for the concave majorant. 220 | C 221 | 80 DU = ZERO 222 | IF (IH .EQ. ICV) GO TO 90 223 | ICVA = ICV - 1 224 | DO 88 K = IH, ICVA 225 | TEMP = ONE / FN 226 | KB = LCM(K) 227 | KE = LCM(K + 1) 228 | IF (KE - KB .LE. 1) GO TO 86 229 | IF (X(KE) .EQ. X(KB)) GO TO 86 230 | A = FLOAT(KE - KB) 231 | CONST = A / (FN * (X(KE) - X(KB))) 232 | DO 84 KR = KB, KE 233 | B = FLOAT(KR - KB - 1) 234 | T = (X(KR) - X(KB))*CONST - B / FN 235 | IF (T .GT. TEMP) TEMP = T 236 | 84 CONTINUE 237 | 86 IF (DU .LT. TEMP) DU = TEMP 238 | 88 CONTINUE 239 | C 240 | C Determine the current maximum. 241 | C 242 | 90 DIPNEW = DL 243 | IF (DU .GT. DL) DIPNEW = DU 244 | IF (DIP .LT. DIPNEW) DIP = DIPNEW 245 | LOW = GCM(IG) 246 | HIGH = LCM(IH) 247 | C 248 | C Recycle 249 | C 250 | GO TO 40 251 | C 252 | 100 DIP = HALF * DIP 253 | XL = X(LOW) 254 | XU = X(HIGH) 255 | C 256 | RETURN 257 | END 258 | 259 | -------------------------------------------------------------------------------- /stuff/new-sim-analysis.R: -------------------------------------------------------------------------------- 1 | ### From the large (Ns = 100001) simulation in 2 | ### ./new-simul.R 3 | ### ~~~~~~~~~~~~ 4 | 5 | Ns <- 1000001 6 | 7 | if(require("diptest") && is.character(data(qDiptab))) { 8 | dn <- dimnames(qDiptab) 9 | 10 | nn <- as.numeric(dn[[1]]) 11 | P.p <- as.numeric(dn[[2]]) 12 | 13 | } else { ## no longer needed, now we have 'qDiptab' 14 | setwd("/u/maechler/R/Pkgs/diptest/stuff") 15 | 16 | ## "wrong" dip stat: load("/u/maechler/R/Pkgs/diptest/stuff/dipSim_1e5.rda") 17 | load("dipSim_1e6.rda") 18 | 19 | stopifnot(identical(P.p, as.numeric(colnames(P.dip))), 20 | identical(nn, as.numeric(rownames(P.dip)))) 21 | 22 | names(dimnames(P.dip)) <- c("n","Pr") 23 | 24 | data(qDiptab, package="diptest") 25 | identical(P.dip, qDiptab) # ! 26 | 27 | attr(P.dip, "N_1") <- as.integer(Ns - 1) 28 | 29 | ## new data set! 30 | qDiptab <- P.dip 31 | 32 | } # end {else: no longer needed} 33 | 34 | 35 | Pp <- P.p [ P.p < 1]# not max() = 100% percentile 36 | qDip.Rn <- qDiptab[, P.p < 1]*sqrt(nn) 37 | nP <- length(Pp) 38 | 39 | ## Titles 40 | nqdip.tit <- 41 | expression(sqrt(n) %*% " percentile of "* list(dip(X), 42 | " "* X *" ~ " * U * group("[",list(0,1),"]"))* 43 | " vs. " * n) 44 | y.tit <- expression(sqrt(n) *" " %*% " " * qDip) 45 | (y.titL <- substitute(F * " [log scaled]", list(F = y.tit[[1]]))) 46 | 47 | mPerclegend <- function(x,y, pr) { 48 | nP <- length(pr) 49 | legend(x, y, legend= 50 | rev(paste(c(paste(c(1:9,0)),letters)[1:nP], 51 | paste(100*pr,"%",sep=""), sep=": ")), 52 | col = rev(rep(1:6,length=nP)), 53 | lty = rev(rep(1:5,length=nP)), bty='n') 54 | } 55 | 56 | if(FALSE)## for paper --- write a vignette !! --- 57 | sfsmisc::pdf.latex("dip_critical.pdf") 58 | ## for print out, or "sending along": 59 | 60 | pdf.do("dip_critical.pdf", paper = "a4") 61 | 62 | matplot(nn, qDip.Rn, type = "n", yaxt = "n", 63 | xlab = 'n [log scale]', ylab = y.tit, 64 | xlim = range(1.5,nn), log='x', main= nqdip.tit) 65 | abline(h = seq(0.1, 0.9, by = 0.05), col = "gray80", lty = 3) 66 | op <- par(las=2); for(i in c(2,4)) axis(i, at = seq(0.1,0.9, by=0.1)); par(op) 67 | mPerclegend(1.0, 0.95, Pp) 68 | matlines(nn, qDip.Rn[, "0.5"], lwd = 3, col = "dark gray") 69 | matlines(nn, qDip.Rn, type = 'o') 70 | mtext(paste(Ns, " simulated samples"), 3, line=0) 71 | mtext("© Martin Maechler, ETH Zurich", 1, line = 3.2, adj = 1) 72 | 73 | pdf.end() 74 | 75 | mtext(paste(getwd(), date(), sep="\n"), 4, cex=.8, adj=0) 76 | 77 | ## "research" : 78 | ## 1. prove that min(dip) = 1/(2 * n) 79 | ## {I'm sure this follows from the "string" equivalence; 80 | ## but that is not according to theory D(F) dip definition, where 81 | ## D(F) = 0 <==> F itself is unimodal 82 | ## Can we prove that dip(F_n) = 1/(2n) ===> F_n is unimodal ? 83 | ## 84 | ## 2. derive the function ff(n) := Prob[dip = 1/(2 * n)] 85 | 86 | 87 | ## log y: --->> more symmetric distributions, but still skewed to the right 88 | ## ----- (asymptotic) is of interest, but also: how to do interpolation 89 | nP <- sum(smP <- 0.01 < Pp & Pp <= .999)# only "relevant subset" 90 | matplot(nn, qDip.Rn[, smP], type='n', log='xy', xlim = range(1.5,nn), 91 | xlab = 'n [log scale]',ylab = y.titL, main = nqdip.tit) 92 | mPerclegend(1.0, .75, Pp[smP]) 93 | matlines(nn, qDip.Rn[, "0.5"], lwd = 3, col = "dark gray") 94 | matlines(nn, qDip.Rn[, smP], type = 'o') 95 | mtext(paste(Ns, " simulated samples"), side = 3, line = 0) 96 | 97 | 98 | 99 | ### only larger N to see if it became constant: 100 | nN <- sum(nL <- nn > 100) 101 | matplot(nn[nL], qDip.Rn[nL,], type='o', xlim = range(50,nn[nL]), 102 | log='x', xlab = 'n [log scale]', ylab = y.tit, main = nqdip.tit) 103 | mtext(paste(Ns, " simulated samples"), side = 3, line = 0) 104 | mPerclegend("topleft", NULL, Pp) 105 | matlines(nn[nL], qDip.Rn[nL, "0.5"], lwd = 4, 106 | col = adjustcolor("black", 0.4)) 107 | 108 | 109 | ##-- Asymptotic : see more in ./asymp-distrib.R 110 | ## ~~~~~~~~~~~~~~~ 111 | 112 | 113 | mult.fig(9, main = "dip(U[0,1]) distribution {simulated} -- for small n") 114 | for (cn in nn[1:9]) { 115 | plot(qDiptab[paste(cn),], P.p, 116 | xlab = "dip = d(x[1 .. n])", ylab = expression(P(D >= d)), 117 | type = 'o', cex = 0.6, main = paste("n = ",cn)) 118 | abline(h=0:1, col="gray") 119 | } 120 | ### 121 | 122 | 123 | ## Another thing: Draw "density" from quantiles only: 124 | ## Use derivative of cubic spline interpolation or so? 125 | 126 | 127 | P2dens <- function(x, probs, eps.p = 1e-7, xlim = NULL, f.lim = 0.5, 128 | method = c("interpSpline", "monoH.FC", "natural")) 129 | { 130 | ## Purpose: Density from probabilty/quantiles -- return a *function* 131 | ## ---------------------------------------------------------------------- 132 | ## Arguments: x: quantiles , x[1:n] 133 | ## probs: probabilities, i.e., Pr{X <= x[i]} == probs[i] 134 | ## eps.p: small value used in: 135 | ## xlim: if(probs[1] > eps.p and/or probs[n] < 1-eps.p), use 136 | ## x[0] = xlim[1] and/or x[n+1] = xlim[2] with 137 | ## probs[0] = 0 and/or probs[n+1] = 1 138 | ## Per default, xlim = range(x) + f.lim * c(-d,d), 139 | ## where d = diff(range(x)) = max(x) - min(x) 140 | ## ---------------------------------------------------------------------- 141 | ## Author: Martin Maechler, Date: 14 Jul 2003, 14:57 142 | if((n <- length(x)) != length(probs)) 143 | stop("'x' and 'probs' must have same length") 144 | if(any(0 > probs | probs > 1)) 145 | stop("'probs' must be in [0,1]") 146 | if(is.unsorted(probs)) { 147 | sp <- sort(probs, index.return=TRUE) 148 | x <- x[sp$ix] 149 | probs <- sp$x 150 | rm(sp) 151 | } 152 | 153 | if((L <- probs[1] > eps.p) | 154 | (R <- probs[n] < 1-eps.p)) { 155 | d <- diff(r <- range(x)) 156 | if(L) { x <- c(r[1] - f.lim*d, x); probs <- c(0, probs) } 157 | if(R) { x <- c(x, r[2] + f.lim*d); probs <- c(probs, 1) } 158 | rm(r,d,L,R) 159 | } 160 | 161 | method <- match.arg(method) 162 | switch(method, 163 | "interpSpline" = { 164 | library(splines) 165 | Fspl <- interpSpline(x, probs) 166 | rm(x,probs) 167 | function(x) predict(Fspl, x, deriv = 1) $y 168 | }, 169 | "monoH.FC" = { 170 | f <- splinefun(x, probs, method="mono") 171 | formals(f)[["deriv"]] <- 1 172 | f 173 | }, 174 | "natural" = { 175 | f <- splinefun(x, probs, method="natural") 176 | formals(f)[["deriv"]] <- 1 177 | f 178 | }, 179 | ## otherwise 180 | stop("invalid method ", method)) 181 | } 182 | 183 | d1 <- P2dens(-3:3, pr=pnorm(-3:3)) 184 | plot(d1, -7, 7, n = 501) 185 | points(-3:3, d1(-3:3)) 186 | ## quite fine : 187 | curve(dnorm, col = 2, add=TRUE, n = 501) 188 | 189 | ## Now with *monotone* Hermite interpolation --- this is *WORSE* !! 190 | d2 <- P2dens(-3:3, pr=pnorm(-3:3), method = "mono") 191 | plot(d2, -7, 7, n = 501, add=TRUE, col="midnightblue") 192 | points(-3:3, d2(-3:3)) 193 | 194 | ## and more experiments suggest the best (here!) solution being "natural" 195 | d2 <- P2dens(-3:3, pr=pnorm(-3:3), method = "natural") 196 | plot(d2, -7, 7, n = 501, add=TRUE, col="midnightblue") 197 | points(-3:3, d2(-3:3)) 198 | ## well, 199 | x <- seq(-7,7, len=1001) 200 | all.equal(d1(x), d2(x), tol = 1e-15)# TRUE 201 | 202 | str(get("Fspl", envir= environment(d1))) 203 | ##- List of 2 204 | ##- $ knots : num [1:9] -6 -3 -2 -1 0 1 2 3 6 205 | ##- $ coefficients: num [1:9, 1:4] 0.00000 0.00135 0.02275 0.15866 0.50000 ... 206 | ##- - attr(*, ........... 207 | 208 | x0 <- c(0, 2^(-3:6)) 209 | d2 <- P2dens(x0, pr=pgamma(x0, shape = 1.5)) 210 | plot(d2, 0, max(x0), n = 501) 211 | rug(x0) 212 | # not bad, too : 213 | curve(dgamma(x,shape=1.5), col = 2, add=TRUE, n = 501) 214 | 215 | 216 | ##-- But the thing I wanted fails because of point masses (left border): 217 | dDips <- apply(qDiptab, 1, function(qd) P2dens(qd, pr = P.p)) 218 | ## Error in interpSpline..(.) : values of x must be distinct << ! 219 | -------------------------------------------------------------------------------- /stuff/unimodality.Rnw: -------------------------------------------------------------------------------- 1 | % -*- mode: noweb; noweb-default-code-mode: R-mode; -*- 2 | \documentclass[a4paper,12pt]{article} 3 | \usepackage[utf8]{inputenc} 4 | \usepackage[UKenglish]{babel} 5 | \usepackage{graphicx,natbib,array,subfig}%,lineno} 6 | \usepackage[user,titleref]{zref}% para fazer referencia ao nome da seção (a o invés do número) 7 | %\usepackage[font=sf,textfont=md]{caption} %% atenção: incompatível com o pacote subfig! 8 | %\newcolumntype{M}{>{\small\sffamily}l} 9 | % \newcolumntype{N}{>{\small\sffamily}c} 10 | % \setlength\extrarowheight{2pt} 11 | 12 | \usepackage{ifthen} 13 | \newboolean{ONLYME} 14 | \setboolean{ONLYME}{false} % modificar \SweaveOpts abaixo 15 | %%%%%% 16 | %%%%%%%%%%%%%% 17 | %%% how to use Sweave: 18 | % Once the Sweave file (this file) has been written, it can be 19 | % processed in R by typing into a running R session 20 | % Sweave("filename.Snw") 21 | % where filename.Snw is the Sweave file. If R is run in a different directory, 22 | % one has to provide the full path name. This generates the file filename.tex 23 | % which can be processed as usual with LATEX. 24 | %%% 25 | 26 | \title{Checking for unimodality} 27 | \author{Og DeSouza \& Karsten Schönrogge} 28 | \date{\today} 29 | \usepackage{xspace} % para manter espaço em branco após os comandos 30 | % montados com newcommand{} 31 | \begin{document} 32 | %%%%%%%%%%%%%%%%%%%%%%%% opção global do ``echo'' 33 | \SweaveOpts{echo=T} 34 | %%%%%%%%%%%%%%%%%%%%%%% 35 | \setkeys{Gin}{width=0.5\linewidth} % largura dos gráficos no output final 36 | \maketitle 37 | 38 | <>= 39 | # largura da linha do R no output final 40 | options(width=66) 41 | 42 | # parâmetros dos gráficos do R 43 | 44 | options(SweaveHooks=list(fig=function() par(pty="s"))) 45 | 46 | @ 47 | \newcommand\x{\textsf{x }\xspace} % x em sans-serif 48 | \newcommand\y{\textsf{y }\xspace} % y em sans-serif 49 | %\linenumbers 50 | \section{Introduction} 51 | 52 | 53 | 54 | The R package \texttt{diptest} computes Hartigan's dip test statistic for testing unimodality. It derives from \citet{HartiganHartigan1985Dip}.%, which is itself a \textit{bimodal} husband-wife collaboration! 55 | The package was written by Martin Maechler (ETH Zürich), based on Fortran and S-plus from Dario Ringach, NYU.edu. It is currently mantainned by Martin Maechler. 56 | 57 | The dip value measures the departure of a sample from unimodality, measuring the maximum 58 | difference, over all sample points, between the empirical distribution function, 59 | and the unimodal distribution function that minimizes that maximum 60 | difference. The test gives the \textit{dip} value, which after conveniently transformed, inform a P-value for the hipothesis that the dataset is best described by an unimodal, as opposed to bimodal, distribution. P-values are calculated by comparing the dip statistic obtained with those for repeated samples of the same size from a uniform distribution. 61 | 62 | 63 | \section{The procedure in R} 64 | We start by calling the appropriate package\footnote{If you to not have the package installed, simply type \texttt{> install.packages()}, choose your closest mirror and then choose the \texttt{diptest} package from the list.}: 65 | <<>>= 66 | library(diptest) 67 | @ 68 | \subsection{Calling and inspecting the dataset} 69 | Now we call the dataset: 70 | <<>>= 71 | microdon <- c(10,10,10,15,15,15,15,20,20,20,25,25,25,30,30,30,30,30,35,35,35,35,35,35,35,35,39,39,39,39,39) 72 | @ 73 | 74 | \begin{figure}% 75 | \centering 76 | <>= 77 | hist(microdon) 78 | @ 79 | \caption{Histogram for Microdon's raw data} 80 | \label{fig:histogram} 81 | \end{figure} 82 | \begin{figure} 83 | \centering 84 | <>= 85 | plot(density(microdon)) 86 | @ 87 | \caption{Density function for Microdon's raw data.}% 88 | \label{fig:density}% 89 | \end{figure} 90 | 91 | \subsection{Calculating} 92 | 93 | To calculate the \textit{dip} value, simple type: 94 | <<>>= 95 | dvalue <- dip(microdon) 96 | dvalue 97 | @ 98 | \section{P-values}\label{sec:pvalues} 99 | 100 | The are two equivalent ways to calculate the P-value for this empirical \textit{dip} value. One way would be to inspect the table of quantiles which is given by \texttt{qDiptab}. This table has probability values in the columns and number of observations in the rows. A drawback is that this table only holds dip values for a limited number of observations, and you end having to use a p-value for the number of observations available in the table that is closest to your actual numbers. This should be no problem if your p-value is not poised in a decision boundary (e.g. p = 0.045). If this is the case, then you would be better off by using the second procedure, that involves simulating dip values for that particular number of observations. 101 | 102 | \subsection{Extracting p-values from \texttt{qDiptab}}\label{sec:qDiptab} 103 | 104 | In such table, you should locate your empirical value (dip = \Sexpr{round(dvalue,4)}), taking note of its row and column, for they are respectively the number of observations in your dataset (here, n = \Sexpr{length(microdon)}) and the respective Pr value. Then you take one minus the Pr value you find on top of the table, and that is the P value for the hipothesis that your dataset was drawn from a unimodal distribution. A P value of, say, 0.04 would mean that your dataset has only a small chance to belong to an unimodal distribution. 105 | 106 | 107 | So, your dip value is (d=\Sexpr{round(dvalue,4)}), and your dataset has \Sexpr{length(microdon)} observations. You will notice that the \texttt{qDiptab} has only a limited set of rows, meaning that you won't get a row for n = \Sexpr{length(microdon)} observations. You should then look for the closest one, which would the row for n = 30. There you will find the following dip values: 108 | 109 | <<>>= 110 | n30 <- qDiptab["30",] 111 | n30 112 | @ 113 | 114 | In this table, the dip values which are closest to your emprical dip value d = \Sexpr{round(dvalue,4)} are 115 | <>= 116 | lrgdvalue <- names(n30[n30>dvalue])[1] # finds the Pr value for dip > empirical dip 117 | lrgdvalue 118 | smldvalue <- names(n30[n30>= 131 | rdip <- replicate(1000,dip(runif(length(microdon)))) 132 | pvalue <- sum(rdip>dip(microdon))/1000 133 | pvalue 134 | @ 135 | 136 | The pvalue now changes to \Sexpr{round(pvalue,4)}, which is actually not too different from that we've calculated at section~\ref{sec:qDiptab} (recall that there we've only approximated the pvalue, because the actual number of observations was not available in \texttt{qDiptab} table). 137 | 138 | You migth want to take a look on this new set of dip values, and where does your empirical dip value is located. As you can see in Fig.~\ref{fig:simulate}, most of the simulated dip values are actually larger that your empirical dip value. That is, there is \Sexpr{pvalue*100}\% chance to find a dip value larger than yours in this simulation, therefore, we should consider this empirical value to belong to that uniform distribution, and hence from an unimodal data set. You should recall that that the dip value is actually a mesurement of the difference between the empirical distribution and its closest unimodal distribution. Therefore, we are only concerned about large dip values, since a small dip value actually means that the empirical distribution is very close a theoretical unimodal distribution. 139 | \begin{figure} 140 | <>= 141 | hist(rdip) 142 | abline(v=dip(microdon),lty=2) 143 | text(0.09,200,paste("pvalue",pvalue,sep=" = ")) 144 | @ 145 | \caption{A distribution dip values simulated out of a 1000 uniform distributions. The dashed line shows where your empirical dip value is located.} 146 | \label{fig:simulate} 147 | \end{figure} 148 | 149 | \section{Acknowledgements} 150 | We thank Martin Maechler for revising this document. 151 | 152 | \begin{thebibliography}{1} 153 | \providecommand{\natexlab}[1]{#1} 154 | \providecommand{\url}[1]{\texttt{#1}} 155 | \providecommand{\urlprefix}{URL } 156 | \expandafter\ifx\csname urlstyle\endcsname\relax 157 | \providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else 158 | \providecommand{\doi}{doi:\discretionary{}{}{}\begingroup 159 | \urlstyle{rm}\Url}\fi 160 | \providecommand{\eprint}[2][]{\url{#2}} 161 | 162 | \bibitem[{Hartigan \& Hartigan(1985)}]{HartiganHartigan1985Dip} 163 | \textbf{Hartigan, J.A. \& Hartigan, P.M.} (1985) The dip test of unimodality. 164 | \newblock \emph{Annals of Statistics} 13, 70–84. 165 | 166 | \end{thebibliography} 167 | 168 | \end{document} 169 | -------------------------------------------------------------------------------- /stuff/asymp-distrib.R: -------------------------------------------------------------------------------- 1 | ####---- More "full simulations" of the asymptotic limit: sqrt(n) * D_n 2 | #### ================ 3 | 4 | setwd("/u/maechler/R/Pkgs/diptest/stuff") ## will load all the LARGE dipk.rda files 5 | 6 | ## These all have n.sim = 1000'001 samples of 7 | ## dip(runif(N)) for "large N" 8 | ## where produced by scripts such as ./d20k_do.R 9 | N.k.set <- c(8,12,16,20,24,32,36,40) 10 | ## or automatically 11 | patt <- "^dip(.*)k\\.rda" 12 | N.k.set <- sort(as.integer(sub(patt, "\\1", list.files(pattern = patt)))) 13 | ## for now: drop 5 14 | if(5 %in% N.k.set) { N.k.set <- N.k.set[N.k.set != 5] } 15 | N.k.set 16 | 17 | dip.nm <- function(N.k, file=FALSE) 18 | paste("dip", N.k, if(file)"k.rda" else "k", sep='') 19 | for(N.k in N.k.set) 20 | load(dip.nm(print(N.k), file=TRUE)) 21 | 22 | if(interactive()) # show more interesting table below 23 | invisible(lapply(dip.nm(N.k.set), function(nm) {cat(nm,": "); str(get(nm)) })) 24 | ## all are vectors of length 1'000'001 25 | 26 | d.dip <- function(N.k, scaleUp = TRUE) 27 | { 28 | ## Purpose: Simulation data for N = N.k * 1000, possibly sqrt(N) scaled 29 | ## ---------------------------------------------------------------------- 30 | ## Author: Martin Maechler, Date: 15 Apr 2009, 10:36 31 | N <- N.k * 1000 32 | nm <- dip.nm(N.k) 33 | if(scaleUp) sqrt(N) * get(nm) else get(nm) 34 | } 35 | Nk2char <- function(Nk) { 36 | dput(unname(Nk), 37 | textConnection(".val", "w", local=TRUE), 38 | control = "S_compatible")# no "10L" but "10" 39 | .val 40 | } 41 | mFormat <- function(n) sub("([0-9]{3})$", "'\\1", format(n)) 42 | 43 | if(!dev.interactive(orNone=TRUE)) pdf("asymp-distrib.pdf") 44 | 45 | names(N.k.set) <- paste(format(N.k.set), "'000", sep='') 46 | t(sums <- sapply(N.k.set, function(Nk) summary(d.dip(Nk)))) 47 | ## Min. 1st Qu. Median Mean 3rd Qu. Max. 48 | ## 6'000 0.1777 0.3274 0.3759 0.3876 0.4357 0.9858 49 | ## 8'000 0.1795 0.3278 0.3765 0.3881 0.4364 0.9772 50 | ## 10'000 0.1750 0.3280 0.3767 0.3884 0.4368 0.9522 51 | ## 12'000 0.1779 0.3283 0.3770 0.3887 0.4370 0.9421 52 | ## 16'000 0.1768 0.3285 0.3774 0.3890 0.4373 1.0080 53 | ## 20'000 0.1689 0.3288 0.3774 0.3891 0.4373 0.9699 54 | ## 24'000 0.1814 0.3288 0.3775 0.3893 0.4375 1.0570 55 | ## 32'000 0.1758 0.3290 0.3779 0.3896 0.4380 0.9757 56 | ## 36'000 0.1724 0.3295 0.3783 0.3899 0.4382 1.0500 57 | ## 40'000 0.1705 0.3294 0.3781 0.3899 0.4382 1.0270 58 | ## 44'000 0.1742 0.3295 0.3782 0.3898 0.4381 0.9693 59 | ## 52'000 0.1752 0.3295 0.3783 0.3900 0.4383 0.9463 60 | ## 60'000 0.1755 0.3295 0.3785 0.3901 0.4384 1.0060 61 | ## 72'000 0.1729 0.3296 0.3785 0.3902 0.4386 1.0100 62 | 63 | ## And the empirical densities of the sqrt(n)-blown up look 64 | ## "practically" identical: 65 | plot (density(d.dip(36)), main = "") 66 | abline(h=0, col="dark gray", lty=3) 67 | for(N.k in head(N.k.set,-1)) 68 | lines(density(d.dip(N.k))) 69 | title(expression("Dip distributions"~~~ sqrt(n) * D[n] ~~~ 70 | "superimposed for")) 71 | mtext(paste(" n = 1000 *", Nk2char(N.k.set)), line = .5) 72 | 73 | ## Could Log-normal fit ? 74 | plot (d <- density(log(d.dip(max(N.k.set)))), main="") 75 | abline(h=0, col="dark gray", lty=3) 76 | for(N.k in head(N.k.set,-1)) 77 | lines(density(log(d.dip(N.k)), bw = d$bw)) 78 | title(expression("log(Dip) distributions"~~~ log(sqrt(n) * D[n]) ~~~ 79 | "superimposed for")) 80 | mtext(paste(" n = 1000 *", Nk2char(N.k.set)), line = .5) 81 | 82 | 83 | ## Now only for larger N: 84 | (N.k.L <- tail(N.k.set, 4)) 85 | plot (d <- density(log(d.dip(max(N.k.L)))), main="") 86 | abline(h=0, col="dark gray", lty=3) 87 | for(N.k in head(N.k.L, -1)) 88 | lines(density(log(d.dip(N.k)), bw = d$bw)) 89 | title(expression("log(Dip) distributions"~~~ log(sqrt(n) * D[n]) ~~~ 90 | "superimposed for")) 91 | mtext(paste(" n = 1000 *", Nk2char(N.k.L)), line = .5) 92 | ## -- below, we *add* to this plot ! 93 | 94 | ## looks quite symmetric (but is not quite, see below!) 95 | 96 | x <- sort(sqrt(20000)* dip20k) # sorting, just for possible convenience 97 | lx <- log(x) 98 | 99 | if(FALSE) { ## Attention! Plotting 1 mio points on Xlib("cairo") is deadly! 100 | x11(type = "Xlib") 101 | qqnorm(lx) #-> already clear that (log)normal does NOT fit 102 | } 103 | library(MASS) 104 | fd <- fitdistr(x, "lognormal") 105 | fd 106 | ## meanlog sdlog 107 | ## -0.9656529434 0.2074114508 108 | ## ( 0.0002074113) ( 0.0001466620) 109 | logLik(fd) # 1119766 110 | 111 | fd. <- fitdistr(lx, "normal") 112 | fd. # exact same parameters as above 113 | logLik(fd.) # 154112.4 (df=2) --- very different to fd's 114 | 115 | dlnormFit <- function(x) do.call(dlnorm, c(list(x=x), coef(fd))) 116 | dnormFit <- function(x) do.call(dnorm, c(list(x=x), coef(fd.))) 117 | curve(dnormFit, add = TRUE, col = "tomato") 118 | mtext(expression(dnorm(paste(symbol("\341"),# == "left angle", see ?plotmath, 119 | "fit to ", log(sqrt(n) * D[n]), 120 | symbol("\361")# == right angle 121 | ))), col = "tomato", line = -1, adj = .95) 122 | ##--> log-normal clearly does *not* fit ! 123 | 124 | 125 | fdg <- fitdistr(x, "gamma") ## this takes a few seconds! 126 | fdg 127 | ## shape rate 128 | ## 23.09376812 59.34742081 129 | ## ( 0.03242641) ( 0.08424100) 130 | dgammaFit <- function(x) do.call(dgamma, c(list(x=x), coef(fdg))) 131 | 132 | fdw <- fitdistr(x, "weibull") ## this takes a few seconds! 133 | fdw <- fitdistr(x, "weibull", control=list(trace=2)) 134 | fdw 135 | ## shape scale 136 | ## 4.665147e+00 4.231184e-01 137 | ## (3.277802e-03) (9.629305e-05) 138 | dweibullFit <- function(x) do.call(dweibull, c(list(x=x), coef(fdw))) 139 | 140 | ## In original scale : 141 | plot (density(d.dip(36))) 142 | lines(density(d.dip(32)), col="gray") 143 | lines(density(d.dip(24)), col="gray30") 144 | 145 | curve(dlnormFit, add = TRUE, col = "tomato") 146 | ## does not fit 147 | curve(dgammaFit, add = TRUE, col = "blue3") 148 | ## is even worse than log-normal 149 | curve(dweibullFit, add = TRUE, col = "forest green") 150 | ## is much worse even 151 | 152 | 153 | ###---------> "back" to look at CDFs --------------------------------- 154 | 155 | Fn12k <- ecdf(d.dip(12)) 156 | Fn20k <- ecdf(d.dip(20)) 157 | Fn24k <- ecdf(d.dip(24)) 158 | Fn32k <- ecdf(d.dip(32)) 159 | Fn36k <- ecdf(d.dip(36)) 160 | 161 | 162 | plot(Fn20k, do.points = FALSE)# still slow 163 | lines(ecdf(sqrt(12000) * dip12k), col=2, do.points = FALSE) 164 | 165 | ks.test(sqrt(12000) * dip12k, 166 | sqrt(20000) * dip20k) 167 | ## Two-sample Kolmogorov-Smirnov test 168 | 169 | ## data: sqrt(12000) * dip12k and sqrt(20000) * dip20k 170 | ## D = 0.0029, p-value = 0.0004664 <<<--- *** 171 | ## alternative hypothesis: two-sided 172 | 173 | ## and comparing the 8000 with 12'000 case even 174 | ## has p-value = 4.98 e-5 175 | 176 | ## Now, with new "24'000": 177 | ks.test(sqrt(24000) * dip24k, 178 | sqrt(20000) * dip20k) ## Heureka ! 179 | ## 180 | ## Two-sample Kolmogorov-Smirnov test 181 | 182 | ## data: sqrt(24000) * dip24k and sqrt(20000) * dip20k 183 | ## D = 0.0014, p-value = 0.292 184 | ## alternative hypothesis: two-sided 185 | ## 186 | 187 | ##--- Compute P-values of all pairwise KS tests: -- takes several minutes!! 188 | m <- length(N.k.set) 189 | P.vals <- matrix(NA, m,m, dimnames = list(names(N.k.set),names(N.k.set))) 190 | diag(P.vals) <- 0 191 | for(i in 1:(m-1)) { 192 | cat("i = ", i) 193 | d.i <- d.dip(N.k.set[i]) 194 | for(j in (i+1):m) { 195 | cat(".") 196 | p <- ks.test(d.i, d.dip(N.k.set[j])) $ p.value 197 | P.vals[i,j] <- P.vals[j,i] <- p 198 | }; cat("\n") 199 | } 200 | 201 | Matrix::Matrix(round(100* P.vals[,7:14, drop=FALSE],1)) 202 | ## 24'000 32'000 36'000 40'000 44'000 52'000 60'000 72'000 203 | ## 6'000 . . . . . . . . 204 | ## 8'000 . . . . . . . . 205 | ## 10'000 . . . . . . . . 206 | ## 12'000 . . . . . . . . 207 | ## 16'000 3.0 . . . . . . . 208 | ## 20'000 29.2 . . . . . . . 209 | ## 24'000 . 2.6 . . . . . . 210 | ## 32'000 2.6 . 0.2 2.1 0.1 . . . 211 | ## 36'000 . 0.2 . 40.8 47.1 87.0 13.6 20.9 212 | ## 40'000 . 2.1 40.8 . 45.8 8.6 0.9 1.4 213 | ## 44'000 . 0.1 47.1 45.8 . 20.4 1.9 2.4 214 | ## 52'000 . . 87.0 8.6 20.4 . 34.2 57.5 215 | ## 60'000 . . 13.6 0.9 1.9 34.2 . 57.1 216 | ## 72'000 . . 20.9 1.4 2.4 57.5 57.1 . 217 | 218 | ## Hmm, .. why is 36'000 different from 32'000 ?? 219 | 220 | save(P.vals, fd, fd., fdg, fdw, 221 | file = "asymp-res.rda") 222 | 223 | (xl. <- extendrange(d.dip(20))) 224 | xi <- seq(xl.[1], xl.[2], length.out = 2000) 225 | 226 | plot(xi, Fn20k(xi) - Fn12k(xi), type = "l", col = 2) 227 | ## looks like the difference is largest where the density is large: 228 | den20k <- density(sqrt(20000)* dip20k, 229 | n = 2048, from = xl.[1], to = xl.[2], cut = 1) 230 | f.scale <- .99 * par("usr")[3] / max(den20k$y) 231 | with(den20k, lines(f.scale * y ~ x, col = "pink")) 232 | 233 | ## Graphical "KS test" 234 | plot(xi, Fn24k(xi) - Fn20k(xi), type = "l", col = 2) 235 | ## looks much less systematic ... 236 | f.scale <- .99 * par("usr")[3] / max(den20k$y) 237 | with(den20k, lines(f.scale * y ~ x, col = "pink")) 238 | 239 | ## 36k vs 32k which looks "bad" in KS test: 240 | plot(xi, Fn36k(xi) - Fn32k(xi), type = "l", col = 2) 241 | ## looks much less systematic ... 242 | f.scale <- .99 * par("usr")[3] / max(den20k$y) 243 | with(den20k, lines(f.scale * y ~ x, col = "pink")) 244 | 245 | ##--- Ok, look even closer for systematic: 246 | require(RColorBrewer) 247 | 248 | Nmax <- max(N.k.set)# too large now 249 | ## rather just 250 | Nmax <- 44 251 | Fn <- ecdf(d.dip(Nmax)) 252 | Fn.xi <- Fn(xi) 253 | ii <- seq_along(Ns <- N.k.set[N.k.set < Nmax]) 254 | opal <- palette(brewer.pal(length(Ns), "Set3"))# "Dark2" if nColor <= 8 255 | 256 | yrng <- range(Fn.xi - ecdf(d.dip(min(Ns)))(xi), 257 | Fn.xi - ecdf(d.dip(max(Ns)))(xi)) 258 | plot(range(xi), yrng, type = "n", 259 | ylab = "", xlab = "D_N =^= dip( runif(N) )", 260 | main = paste("F_n(D_{N=",Nmax,"'000}) - F_n(D_N) ; n=", 261 | mFormat(length(d.dip(Nmax))), sep='')) 262 | abline(h=0, col="gray") 263 | for(i in ii) 264 | lines(xi, Fn(xi) - ecdf(d.dip(Ns[i]))(xi), col = i+1) 265 | legend("right", legend=rev(paste("N = ", format(Ns), "'000", sep='')), 266 | col= rev(ii+1), lty=1, lwd=2, inset = .05) 267 | 268 | palette(opal) 269 | 270 | ## How does the upper tail look? 271 | ## log(1 - P) = log(P{X >= x}) 272 | -------------------------------------------------------------------------------- /src/dip.c: -------------------------------------------------------------------------------- 1 | /* ALGORITHM AS 217 APPL. STATIST. (1985) VOL.34, NO.3 2 | 3 | @article{HarP85, 4 | author = {P. M. Hartigan}, 5 | title = {Computation of the Dip Statistic to Test for Unimodality}, 6 | year = 1985, 7 | journal = {Applied Statistics}, 8 | pages = {320--325}, 9 | volume = 34 } 10 | @article{HarJH85, 11 | author = {J. A. Hartigan and P. M. Hartigan}, 12 | title = {The Dip Test of Unimodality}, 13 | year = 1985, 14 | journal = {Ann. of Statistics}, 15 | pages = {70--84}, 16 | volume = 13 } 17 | 18 | Does the dip calculation for an ordered vector X using the 19 | greatest convex minorant and the least concave majorant, skipping 20 | through the data using the change points of these distributions. 21 | 22 | It returns the dip statistic 'DIP' and the modal interval (XL, XU). 23 | === ====== 24 | 25 | dip.f -- translated by f2c (version of 22 July 1992 22:54:52). 26 | 27 | Pretty-Edited and extended (debug argument) 28 | by Martin Maechler 29 | ETH Seminar fuer Statistik 30 | 8092 Zurich SWITZERLAND 31 | 32 | --------------- 33 | 34 | Two Bug Fixes: 35 | ========= 36 | 37 | 1) July 30 1994 : For unimodal data, gave "infinite loop" (end of code) 38 | 2) Oct 31 2003 : Yong Lu : ")" typo in Fortran 39 | gave wrong result (larger dip than possible) in some cases 40 | */ 41 | 42 | #include 43 | 44 | // for the "registration part": 45 | #include 46 | 47 | #include 48 | 49 | 50 | /* Subroutine */ 51 | void diptst(const double x[], const int *n_, 52 | double *dip, int *lo_hi, int *ifault, 53 | int *gcm, int *lcm, int *mn, int *mj, 54 | const int *min_is_0, const int *debug) 55 | { 56 | #define low lo_hi[0] 57 | #define high lo_hi[1] 58 | #define l_gcm lo_hi[2] 59 | #define l_lcm lo_hi[3] 60 | 61 | const int n = *n_; 62 | int mnj, mnmnj, mjk, mjmjk, ig, ih, iv, ix, i, j, k; 63 | double dip_l, dip_u, dipnew; 64 | 65 | /* Parameter adjustments, so I can do "as with index 1" : x[1]..x[n] */ 66 | --mj; --mn; 67 | --lcm; --gcm; 68 | --x; 69 | 70 | /*-------- Function Body ------------------------------ */ 71 | 72 | *ifault = 1; if (n <= 0) return; 73 | *ifault = 0; 74 | 75 | /* Check that X is sorted --- if not, return with ifault = 2*/ 76 | 77 | *ifault = 2; for (k = 2; k <= n; ++k) if (x[k] < x[k - 1]) return; 78 | *ifault = 0; 79 | 80 | /* Check for all values of X identical, */ 81 | /* and for 1 <= n < 4. */ 82 | 83 | /* LOW contains the index of the current estimate of the lower end 84 | of the modal interval, HIGH contains the index for the upper end. 85 | */ 86 | low = 1; high = n; /*-- IDEA: *xl = x[low]; *xu = x[high]; --*/ 87 | 88 | /* M.Maechler -- speedup: it saves many divisions by n when we just work with 89 | * (2n * dip) everywhere but the very end! */ 90 | *dip = (*min_is_0) ? 0. : 1.; 91 | if (n < 2 || x[n] == x[1]) goto L_END; 92 | 93 | if(*debug) 94 | Rprintf("dip() in C: n = %d; starting with 2N*dip = %g.\n", 95 | n, *dip); 96 | 97 | /* Establish the indices mn[1..n] over which combination is necessary 98 | for the convex MINORANT (GCM) fit. 99 | */ 100 | mn[1] = 1; 101 | for (j = 2; j <= n; ++j) { 102 | mn[j] = j - 1; 103 | while(1) { 104 | mnj = mn[j]; 105 | mnmnj = mn[mnj]; 106 | if (mnj == 1 || 107 | ( x[j] - x[mnj]) * (mnj - mnmnj) < 108 | (x[mnj] - x[mnmnj]) * (j - mnj)) break; 109 | mn[j] = mnmnj; 110 | } 111 | } 112 | 113 | /* Establish the indices mj[1..n] over which combination is necessary 114 | for the concave MAJORANT (LCM) fit. 115 | */ 116 | mj[n] = n; 117 | for (k = n - 1; k >= 1; k--) { 118 | mj[k] = k + 1; 119 | while(1) { 120 | mjk = mj[k]; 121 | mjmjk = mj[mjk]; 122 | if (mjk == n || 123 | ( x[k] - x[mjk]) * (mjk - mjmjk) < 124 | (x[mjk] - x[mjmjk]) * (k - mjk)) break; 125 | mj[k] = mjmjk; 126 | } 127 | } 128 | 129 | /* ----------------------- Start the cycling. ------------------------------- */ 130 | LOOP_Start: 131 | 132 | /* Collect the change points for the GCM from HIGH to LOW. */ 133 | gcm[1] = high; 134 | for(i = 1; gcm[i] > low; i++) 135 | gcm[i+1] = mn[gcm[i]]; 136 | ig = l_gcm = i; // l_gcm == relevant_length(GCM) 137 | ix = ig - 1; // ix, ig are counters for the convex minorant. 138 | 139 | /* Collect the change points for the LCM from LOW to HIGH. */ 140 | lcm[1] = low; 141 | for(i = 1; lcm[i] < high; i++) 142 | lcm[i+1] = mj[lcm[i]]; 143 | ih = l_lcm = i; // l_lcm == relevant_length(LCM) 144 | iv = 2; // iv, ih are counters for the concave majorant. 145 | 146 | if(*debug) { 147 | Rprintf("'dip': LOOP-BEGIN: 2n*D= %-8.5g [low,high] = [%3d,%3d]", *dip, low,high); 148 | if(*debug >= 3) { 149 | Rprintf(" :\n gcm[1:%d] = ", l_gcm); 150 | for(i = 1; i <= l_gcm; i++) Rprintf("%d%s", gcm[i], (i < l_gcm)? ", " : "\n"); 151 | Rprintf(" lcm[1:%d] = ", l_lcm); 152 | for(i = 1; i <= l_lcm; i++) Rprintf("%d%s", lcm[i], (i < l_lcm)? ", " : "\n"); 153 | } else { // debug <= 2 : 154 | Rprintf("; l_lcm/gcm = (%2d,%2d)\n", l_lcm,l_gcm); 155 | } 156 | } 157 | 158 | /* Find the largest distance greater than 'DIP' between the GCM and 159 | * the LCM from LOW to HIGH. */ 160 | 161 | // FIXME: should provide LDOUBLE or something like it 162 | long double d = 0.;// <<-- see if this makes 32-bit/64-bit difference go.. 163 | if (l_gcm != 2 || l_lcm != 2) { 164 | if(*debug) Rprintf(" while(gcm[ix] != lcm[iv]) :%s", 165 | (*debug >= 2) ? "\n" : " "); 166 | do { /* gcm[ix] != lcm[iv] (after first loop) */ 167 | long double dx; 168 | int gcmix = gcm[ix], 169 | lcmiv = lcm[iv]; 170 | if (gcmix > lcmiv) { 171 | /* If the next point of either the GCM or LCM is from the LCM, 172 | * calculate the distance here. */ 173 | int gcmi1 = gcm[ix + 1]; 174 | dx = (lcmiv - gcmi1 + 1) - 175 | ((long double) x[lcmiv] - x[gcmi1]) * (gcmix - gcmi1)/(x[gcmix] - x[gcmi1]); 176 | ++iv; 177 | if (dx >= d) { 178 | d = dx; 179 | ig = ix + 1; 180 | ih = iv - 1; 181 | if(*debug >= 2) Rprintf(" L(%d,%d)", ig,ih); 182 | } 183 | } 184 | else { 185 | /* If the next point of either the GCM or LCM is from the GCM, 186 | * calculate the distance here. */ 187 | int lcmiv1 = lcm[iv - 1]; 188 | /* Fix by Yong Lu {symmetric to above!}; original Fortran: only ")" misplaced! :*/ 189 | dx = ((long double)x[gcmix] - x[lcmiv1]) * (lcmiv - lcmiv1) / 190 | (x[lcmiv] - x[lcmiv1])- (gcmix - lcmiv1 - 1); 191 | --ix; 192 | if (dx >= d) { 193 | d = dx; 194 | ig = ix + 1; 195 | ih = iv; 196 | if(*debug >= 2) Rprintf(" G(%d,%d)", ig,ih); 197 | } 198 | } 199 | if (ix < 1) ix = 1; 200 | if (iv > l_lcm) iv = l_lcm; 201 | if(*debug) { 202 | if(*debug >= 2) Rprintf(" --> ix = %d, iv = %d\n", ix,iv); 203 | else Rprintf("."); 204 | } 205 | } while (gcm[ix] != lcm[iv]); 206 | if(*debug && *debug < 2) Rprintf("\n"); 207 | } 208 | else { /* l_gcm or l_lcm == 2 */ 209 | d = (*min_is_0) ? 0. : 1.; 210 | if(*debug) 211 | Rprintf(" ** (l_lcm,l_gcm) = (%d,%d) ==> d := %g\n", l_lcm, l_gcm, (double)d); 212 | } 213 | 214 | if (d < *dip) goto L_END; 215 | 216 | /* Calculate the DIPs for the current LOW and HIGH. */ 217 | if(*debug) Rprintf(" calculating dip .."); 218 | 219 | int j_best, j_l = -1, j_u = -1; 220 | 221 | /* The DIP for the convex minorant. */ 222 | dip_l = 0.; 223 | for (j = ig; j < l_gcm; ++j) { 224 | double max_t = 1.; 225 | int j_ = -1, jb = gcm[j + 1], je = gcm[j]; 226 | if (je - jb > 1 && x[je] != x[jb]) { 227 | double C = (je - jb) / (x[je] - x[jb]); 228 | for (int jj = jb; jj <= je; ++jj) { 229 | double t = (jj - jb + 1) - (x[jj] - x[jb]) * C; 230 | if (max_t < t) { 231 | max_t = t; j_ = jj; 232 | } 233 | } 234 | } 235 | if (dip_l < max_t) { 236 | dip_l = max_t; j_l = j_; 237 | } 238 | } 239 | 240 | /* The DIP for the concave majorant. */ 241 | dip_u = 0.; 242 | for (j = ih; j < l_lcm; ++j) { 243 | double max_t = 1.; 244 | int j_ = -1, jb = lcm[j], je = lcm[j + 1]; 245 | if (je - jb > 1 && x[je] != x[jb]) { 246 | double C = (je - jb) / (x[je] - x[jb]); 247 | for (int jj = jb; jj <= je; ++jj) { 248 | double t = (x[jj] - x[jb]) * C - (jj - jb - 1); 249 | if (max_t < t) { 250 | max_t = t; j_ = jj; 251 | } 252 | } 253 | } 254 | if (dip_u < max_t) { 255 | dip_u = max_t; j_u = j_; 256 | } 257 | } 258 | 259 | if(*debug) Rprintf(" (dip_l, dip_u) = (%g, %g)", dip_l, dip_u); 260 | 261 | /* Determine the current maximum. */ 262 | if(dip_u > dip_l) { 263 | dipnew = dip_u; j_best = j_u; 264 | } else { 265 | dipnew = dip_l; j_best = j_l; 266 | } 267 | if (*dip < dipnew) { 268 | *dip = dipnew; 269 | if(*debug) 270 | Rprintf(" -> new larger dip %g (j_best = %d)\n", dipnew, j_best); 271 | } 272 | else if(*debug) Rprintf("\n"); 273 | 274 | /*--- The following if-clause is NECESSARY (may loop infinitely otherwise)! 275 | --- Martin Maechler, Statistics, ETH Zurich, July 30 1994 ---------- */ 276 | if (low == gcm[ig] && high == lcm[ih]) { 277 | if(*debug) 278 | Rprintf("No improvement in low = %d nor high = %d --> END\n", 279 | low, high); 280 | } else { 281 | low = gcm[ig]; 282 | high = lcm[ih]; goto LOOP_Start; /* Recycle */ 283 | } 284 | /*---------------------------------------------------------------------------*/ 285 | 286 | L_END: 287 | /* do this in the caller : 288 | * *xl = x[low]; *xu = x[high]; 289 | * rather return the (low, high) indices -- automagically via lo_hi[] */ 290 | *dip /= (2*n); 291 | return; 292 | } /* diptst */ 293 | #undef low 294 | #undef high 295 | 296 | //----------------- Registration <==> ../NAMESPACE 297 | #define CDEF(name) {#name, (DL_FUNC) &name, sizeof(name ## _t)/sizeof(name ## _t[0]), name ##_t} 298 | 299 | #define CALLDEF(name, n) {#name, (DL_FUNC) &name, n} 300 | 301 | 302 | // void diptst(double *x, int *n_, 303 | // double *dip, int *lo_hi, int *ifault, 304 | // int *gcm, int *lcm, int *mn, int *mj, 305 | // int *min_is_0, int *debug) 306 | static R_NativePrimitiveArgType diptst_t[] = { 307 | REALSXP, INTSXP, /* dip: */ REALSXP, INTSXP, INTSXP, 308 | /* gcm: */ INTSXP, INTSXP, INTSXP, INTSXP, 309 | /* min_is_0:*/ LGLSXP, INTSXP 310 | }; 311 | 312 | 313 | static const R_CMethodDef CEntries[] = { 314 | CDEF(diptst), 315 | {NULL, NULL, 0} 316 | }; 317 | 318 | /* static R_CallMethodDef CallEntries[] = { */ 319 | /* CALLDEF(sinc_c, 1), */ 320 | 321 | /* {NULL, NULL, 0} */ 322 | /* }; */ 323 | 324 | /** 325 | * register routines 326 | * @param dll pointer 327 | * @return none 328 | * @author Martin Maechler 329 | */ 330 | void 331 | #ifdef HAVE_VISIBILITY_ATTRIBUTE 332 | __attribute__ ((visibility ("default"))) 333 | #endif 334 | R_init_diptest(DllInfo *dll) 335 | { 336 | R_registerRoutines(dll, CEntries, NULL, NULL, NULL); 337 | /* R_registerRoutines(dll, CEntries, CallEntries, NULL, NULL); */ 338 | R_useDynamicSymbols(dll, FALSE); 339 | } 340 | -------------------------------------------------------------------------------- /tests/ex1.Rout.save-32b: -------------------------------------------------------------------------------- 1 | 2 | R version 2.15.1 Patched (2012-08-11 r60235) -- "Roasted Marshmallows" 3 | Copyright (C) 2012 The R Foundation for Statistical Computing 4 | ISBN 3-900051-07-0 5 | Platform: i686-pc-linux-gnu (32-bit) 6 | 7 | R is free software and comes with ABSOLUTELY NO WARRANTY. 8 | You are welcome to redistribute it under certain conditions. 9 | Type 'license()' or 'licence()' for distribution details. 10 | 11 | R is a collaborative project with many contributors. 12 | Type 'contributors()' for more information and 13 | 'citation()' on how to cite R or R packages in publications. 14 | 15 | Type 'demo()' for some demos, 'help()' for on-line help, or 16 | 'help.start()' for an HTML browser interface to help. 17 | Type 'q()' to quit R. 18 | 19 | > library(diptest) 20 | > 21 | > stopifnot(dip(c(1,1,2,2)) == 1/4)# the maximal value possible: two point dist 22 | > 23 | > ## very first small "unimodal" example --- the 1/(2*n) result: 24 | > n <- length(u <- cumsum(0:3)) 25 | > d <- dip(u, debug=TRUE)# shows the final if() {added by MM} is really needed 26 | dip() in C: n = 4; starting with 2N*dip = 1. 27 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 4]; l_lcm/gcm = ( 4, 2) 28 | while(gcm[ix] != lcm[iv]) : .. 29 | calculating dip .. (dip_l, dip_u) = (0, 1) 30 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 3]; l_lcm/gcm = ( 3, 2) 31 | while(gcm[ix] != lcm[iv]) : . 32 | calculating dip .. (dip_l, dip_u) = (0, 1) 33 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 2]; l_lcm/gcm = ( 2, 2) 34 | ** (l_lcm,l_gcm) = (2,2) ==> d := 1 35 | calculating dip .. (dip_l, dip_u) = (0, 0) 36 | No improvement in low = 1 nor high = 2 --> END 37 | > stopifnot(d == dip(-u), d == 1/(2*n))# exact "=" for n = 4 ! 38 | > ## Note that I believe this should *not* give 0 (as fmechler@.. did), 39 | > ## but rather 1/(2n) because that's (1/n) / 2 and 40 | > ## (1/n) is the correct distance between LCM and GCM 41 | > 42 | > ## Small example -- but MM sees difference (32-bit / 64-bit): 43 | > x <- c(0,2:3,5:6) 44 | > d1 <- dip(x, full=TRUE, debug=2) 45 | dip() in C: n = 5; starting with 2N*dip = 1. 46 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 5]; l_lcm/gcm = ( 2, 4) 47 | while(gcm[ix] != lcm[iv]) : 48 | G(3,2) --> ix = 2, iv = 2 49 | --> ix = 1, iv = 2 50 | calculating dip .. (dip_l, dip_u) = (1, 0) 51 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 2, 5]; l_lcm/gcm = ( 3, 3) 52 | while(gcm[ix] != lcm[iv]) : 53 | L(3,2) --> ix = 2, iv = 3 54 | G(2,3) --> ix = 1, iv = 3 55 | calculating dip .. (dip_l, dip_u) = (1.33333, 0) -> new larger dip 1.33333 (j_best = 3) 56 | 'dip': LOOP-BEGIN: 2n*D= 1.3333 [low,high] = [ 4, 5]; l_lcm/gcm = ( 2, 2) 57 | ** (l_lcm,l_gcm) = (2,2) ==> d := 1 58 | > d2 <- dip(6-x, full=TRUE, debug=2) 59 | dip() in C: n = 5; starting with 2N*dip = 1. 60 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 5]; l_lcm/gcm = ( 4, 2) 61 | while(gcm[ix] != lcm[iv]) : 62 | L(2,2) --> ix = 1, iv = 3 63 | L(2,3) --> ix = 1, iv = 4 64 | calculating dip .. (dip_l, dip_u) = (0, 1) 65 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 4]; l_lcm/gcm = ( 3, 3) 66 | while(gcm[ix] != lcm[iv]) : 67 | L(3,2) --> ix = 2, iv = 3 68 | G(2,3) --> ix = 1, iv = 3 69 | calculating dip .. (dip_l, dip_u) = (1.33333, 0) -> new larger dip 1.33333 (j_best = 2) 70 | 'dip': LOOP-BEGIN: 2n*D= 1.3333 [low,high] = [ 3, 4]; l_lcm/gcm = ( 2, 2) 71 | ** (l_lcm,l_gcm) = (2,2) ==> d := 1 72 | > str(d1) 73 | List of 15 74 | $ call : language dip(x = x, full.result = TRUE, debug = 2) 75 | $ x : num [1:5] 0 2 3 5 6 76 | $ n : int 5 77 | $ dip : num 0.133 78 | $ lo.hi : int [1:2] 4 5 79 | $ ifault : int 0 80 | $ gcm : int [1:2] 5 4 81 | $ lcm : int [1:2] 4 5 82 | $ mn : int [1:5] 1 1 2 2 4 83 | $ mj : int [1:5] 5 3 5 5 5 84 | $ min.is.0 : logi FALSE 85 | $ debug : int 2 86 | $ xl : num 5 87 | $ xu : num 6 88 | $ full.result: logi TRUE 89 | - attr(*, "class")= chr "dip" 90 | > str(d2) 91 | List of 15 92 | $ call : language dip(x = 6 - x, full.result = TRUE, debug = 2) 93 | $ x : num [1:5] 0 1 3 4 6 94 | $ n : int 5 95 | $ dip : num 0.133 96 | $ lo.hi : int [1:2] 3 4 97 | $ ifault : int 0 98 | $ gcm : int [1:2] 4 3 99 | $ lcm : int [1:2] 3 4 100 | $ mn : int [1:5] 1 1 1 3 1 101 | $ mj : int [1:5] 2 4 4 5 5 102 | $ min.is.0 : logi FALSE 103 | $ debug : int 2 104 | $ xl : num 3 105 | $ xu : num 4 106 | $ full.result: logi TRUE 107 | - attr(*, "class")= chr "dip" 108 | > 109 | > if(!dev.interactive(orNone=TRUE)) pdf("ex1.pdf") 110 | > par(mfrow = 2:1, mar = .1+c(3,4,2,1), mgp=c(1.5,.6,0), oma = c(0,0,2.1,0)) 111 | > # 112 | > plot(d1) 113 | > abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray") 114 | > # 115 | > plot(d2) 116 | > abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray") 117 | > # 118 | > ## "title" only now 119 | > mtext("dip() problem with 'mirror x'", side=3, line = 0.8, 120 | + outer=TRUE, cex = 1.5, font = 2) 121 | > 122 | > 123 | > ## Yong Lu example -- a bit smaller 124 | > x2 <- c(1, rep(2, 9)) 125 | > stopifnot(dip(x2) == dip(3 - x2)) 126 | > str(dip(x2, full=TRUE)) 127 | List of 15 128 | $ call : language dip(x = x2, full.result = TRUE) 129 | $ x : num [1:10] 1 2 2 2 2 2 2 2 2 2 130 | $ n : int 10 131 | $ dip : num 0.05 132 | $ lo.hi : int [1:2] 2 10 133 | $ ifault : int 0 134 | $ gcm : int [1:2] 10 2 135 | $ lcm : int [1:2] 2 10 136 | $ mn : int [1:10] 1 1 2 2 2 2 2 2 2 2 137 | $ mj : int [1:10] 10 10 10 10 10 10 10 10 10 10 138 | $ min.is.0 : logi FALSE 139 | $ debug : int 0 140 | $ xl : num 2 141 | $ xu : num 2 142 | $ full.result: logi TRUE 143 | - attr(*, "class")= chr "dip" 144 | > cat('Time elapsed: ', (.pt <- proc.time()),'\n') # "stats" 145 | Time elapsed: 0.41 0.042 0.504 0 0.008 146 | > 147 | > ## Real data examples : 148 | > 149 | > data(statfaculty) 150 | > 151 | > str(dip(statfaculty, full = "all", debug = 3), vec.len = 8) 152 | dip() in C: n = 63; starting with 2N*dip = 1. 153 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 63] : 154 | gcm[1:6] = 63, 62, 7, 3, 2, 1 155 | lcm[1:5] = 1, 44, 58, 59, 63 156 | while(gcm[ix] != lcm[iv]) : 157 | G(5,2) --> ix = 4, iv = 2 158 | G(4,2) --> ix = 3, iv = 2 159 | G(3,2) --> ix = 2, iv = 2 160 | L(3,2) --> ix = 2, iv = 3 161 | L(3,3) --> ix = 2, iv = 4 162 | --> ix = 2, iv = 5 163 | --> ix = 1, iv = 5 164 | calculating dip .. (dip_l, dip_u) = (2, 2.11111) -> new larger dip 2.11111 (j_best = 61) 165 | 'dip': LOOP-BEGIN: 2n*D= 2.1111 [low,high] = [ 7, 58] : 166 | gcm[1:5] = 58, 55, 51, 48, 7 167 | lcm[1:6] = 7, 11, 15, 42, 44, 58 168 | while(gcm[ix] != lcm[iv]) : 169 | L(5,2) --> ix = 4, iv = 3 170 | L(5,3) --> ix = 4, iv = 4 171 | L(5,4) --> ix = 4, iv = 5 172 | L(5,5) --> ix = 4, iv = 6 173 | --> ix = 3, iv = 6 174 | --> ix = 2, iv = 6 175 | --> ix = 1, iv = 6 176 | calculating dip .. (dip_l, dip_u) = (0, 7.5) -> new larger dip 7.5 (j_best = 48) 177 | 'dip': LOOP-BEGIN: 2n*D= 7.5 [low,high] = [ 7, 44] : 178 | gcm[1:4] = 44, 43, 38, 7 179 | lcm[1:5] = 7, 11, 15, 42, 44 180 | while(gcm[ix] != lcm[iv]) : 181 | L(4,2) --> ix = 3, iv = 3 182 | L(4,3) --> ix = 3, iv = 4 183 | --> ix = 2, iv = 4 184 | --> ix = 2, iv = 5 185 | --> ix = 1, iv = 5 186 | List of 17 187 | $ call : language dip(x = statfaculty, full.result = "all", debug = 3) 188 | $ x : num [1:63] 30 33 35 36 37 37 39 39 39 39 39 40 40 40 40 41 42 43 43 43 ... 189 | $ n : int 63 190 | $ dip : num 0.0595 191 | $ lo.hi : int [1:2] 7 44 192 | $ ifault : int 0 193 | $ gcm : int [1:4] 44 43 38 7 194 | $ lcm : int [1:5] 7 11 15 42 44 195 | $ mn : int [1:63] 1 1 2 3 3 5 3 7 7 7 7 7 12 12 12 7 7 7 18 18 ... 196 | $ mj : int [1:63] 44 44 15 15 6 15 11 11 11 11 15 15 15 15 42 42 20 20 20 42 ... 197 | $ min.is.0 : logi FALSE 198 | $ debug : int 3 199 | $ xl : num 39 200 | $ xu : num 54 201 | $ full.result: chr "all" 202 | $ GCM : int [1:6] 63 62 7 3 2 1 203 | $ LCM : int [1:5] 1 44 58 59 63 204 | - attr(*, "class")= chr "dip" 205 | > 206 | > data(faithful) 207 | > fE <- faithful$eruptions 208 | > str(dip(fE, full = "all", debug = 3), 209 | + vec.len= 8) 210 | dip() in C: n = 272; starting with 2N*dip = 1. 211 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1,272] : 212 | gcm[1:7] = 272, 135, 120, 119, 4, 2, 1 213 | lcm[1:10] = 1, 40, 58, 60, 66, 79, 91, 261, 268, 272 214 | while(gcm[ix] != lcm[iv]) : 215 | G(6,2) --> ix = 5, iv = 2 216 | G(5,2) --> ix = 4, iv = 2 217 | L(5,2) --> ix = 4, iv = 3 218 | L(5,3) --> ix = 4, iv = 4 219 | L(5,4) --> ix = 4, iv = 5 220 | L(5,5) --> ix = 4, iv = 6 221 | L(5,6) --> ix = 4, iv = 7 222 | L(5,7) --> ix = 4, iv = 8 223 | G(4,8) --> ix = 3, iv = 8 224 | G(3,8) --> ix = 2, iv = 8 225 | --> ix = 1, iv = 8 226 | --> ix = 1, iv = 9 227 | --> ix = 1, iv = 10 228 | calculating dip .. (dip_l, dip_u) = (50.2553, 3) -> new larger dip 50.2553 (j_best = 91) 229 | 'dip': LOOP-BEGIN: 2n*D= 50.255 [low,high] = [120,261] : 230 | gcm[1:7] = 261, 260, 252, 181, 146, 135, 120 231 | lcm[1:5] = 120, 124, 233, 246, 261 232 | while(gcm[ix] != lcm[iv]) : 233 | L(7,2) --> ix = 6, iv = 3 234 | G(6,3) --> ix = 5, iv = 3 235 | G(5,3) --> ix = 4, iv = 3 236 | G(4,3) --> ix = 3, iv = 3 237 | --> ix = 3, iv = 4 238 | --> ix = 3, iv = 5 239 | --> ix = 2, iv = 5 240 | --> ix = 1, iv = 5 241 | List of 17 242 | $ call : language dip(x = fE, full.result = "all", debug = 3) 243 | $ x : num [1:272] 1.6 1.67 1.7 1.73 1.75 1.75 1.75 1.75 1.75 1.75 ... 244 | $ n : int 272 245 | $ dip : num 0.0924 246 | $ lo.hi : int [1:2] 120 261 247 | $ ifault : int 0 248 | $ gcm : int [1:7] 261 260 252 181 146 135 120 249 | $ lcm : int [1:5] 120 124 233 246 261 250 | $ mn : int [1:272] 1 1 2 2 4 5 5 5 5 5 5 11 5 13 13 13 13 17 17 13 ... 251 | $ mj : int [1:272] 40 40 40 10 10 10 10 10 10 40 12 36 16 16 16 26 19 19 26 26 ... 252 | $ min.is.0 : logi FALSE 253 | $ debug : int 3 254 | $ xl : num 3.83 255 | $ xu : num 4.83 256 | $ full.result: chr "all" 257 | $ GCM : int [1:7] 272 135 120 119 4 2 1 258 | $ LCM : int [1:10] 1 40 58 60 66 79 91 261 268 272 259 | - attr(*, "class")= chr "dip" 260 | > 261 | > data(precip) 262 | > str(dip(precip, full = TRUE, debug = TRUE)) 263 | dip() in C: n = 70; starting with 2N*dip = 1. 264 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 70]; l_lcm/gcm = ( 6, 4) 265 | while(gcm[ix] != lcm[iv]) : ...... 266 | calculating dip .. (dip_l, dip_u) = (5, 2.5) -> new larger dip 5 (j_best = 13) 267 | 'dip': LOOP-BEGIN: 2n*D= 5 [low,high] = [ 19, 64]; l_lcm/gcm = ( 6, 6) 268 | while(gcm[ix] != lcm[iv]) : ........ 269 | calculating dip .. (dip_l, dip_u) = (3.875, 3.44828) 270 | 'dip': LOOP-BEGIN: 2n*D= 5 [low,high] = [ 31, 55]; l_lcm/gcm = ( 4, 3) 271 | while(gcm[ix] != lcm[iv]) : ... 272 | List of 15 273 | $ call : language dip(x = precip, full.result = TRUE, debug = TRUE) 274 | $ x : num [1:70] 7 7.2 7.8 7.8 11.5 13 14 14.6 15 15.2 ... 275 | $ n : int 70 276 | $ dip : num 0.0357 277 | $ lo.hi : int [1:2] 31 55 278 | $ ifault : int 0 279 | $ gcm : int [1:3] 55 49 31 280 | $ lcm : int [1:4] 31 32 35 55 281 | $ mn : int [1:70] 1 1 1 3 1 1 6 7 8 9 ... 282 | $ mj : int [1:70] 2 4 4 64 55 10 10 10 10 55 ... 283 | $ min.is.0 : logi FALSE 284 | $ debug : int 1 285 | $ xl : num 35.9 286 | $ xu : num 43.4 287 | $ full.result: logi TRUE 288 | - attr(*, "class")= chr "dip" 289 | > 290 | > cat('Time elapsed: ', proc.time() - .pt,'\n') # "stats" 291 | Time elapsed: 0.061 0.007 0.085 0 0 292 | > 293 | > if(!interactive()) warnings() 294 | NULL 295 | > 296 | > proc.time() 297 | user system elapsed 298 | 0.472 0.057 0.590 299 | -------------------------------------------------------------------------------- /stuff/jeremy-unimodality.R: -------------------------------------------------------------------------------- 1 | ##-*- mode: R; kept-new-versions: 21; kept-old-versions: 12; -*- 2 | 3 | #### From http://www.stat.washington.edu/wxs/Stat593-s03/Code/jeremy-unimodality.R 4 | #### 5 | #### After recommendation by Mark Difford, 6 | #### on R-help Sep 09, 2008, Subject "Re: Modality Test" 7 | #### https://stat.ethz.ch/pipermail/r-help/2008-September/173308.html 8 | 9 | ## TODO [MM] : compare this "seriously" with dip() from my package 'diptest' 10 | ## 11 | ####----------------------------------------------------------------------------------- 12 | ## Diagnostic plots for clustering and the DIP test for unimodality 13 | ## Code written by Jeremy Tantrum, Winter 2003 14 | ##================================================================= 15 | ## 16 | ## plot.ucdf(x) - plots the cdf of x and the closest unimodal cdf of x. 17 | ## 18 | ## plot.silverman(x) - plots the unimodal Gaussian smoother closest to the 19 | ## x and the closest bimodal Gaussian smoother. 20 | ## 21 | ## calcdip(x) - calculates the dip test statistic for x, using the mode found 22 | ## by the closest unimodal Gaussian smoother. 23 | ## 24 | ## unisample(cd.out,n) - generates a sample from the unimodal distribution 25 | ## returned by the output of calcdip. 26 | ## 27 | ## 28 | ## An example of it working: Olive oil data - region 2 - are areas 5 and 6 29 | ## different. 30 | ## ---> see new file ./jeremy-unimodality-olives.R 31 | ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 32 | 33 | ##================================================================= 34 | 35 | plot.ucdf <- function(x,plot.it = TRUE,COL1 = 1,COL2 = 2,LWD1 = 3,LWD2 = 2) 36 | { 37 | x.cdf <- (1:length(x))/length(x) 38 | plot(c(min(x),sort(x)),c(0,x.cdf),type = 'n',xlab = "data",ylab = "cdf") 39 | lines(c(par('usr')[1],sort(c(x,x)),par('usr')[2]),sort(c(0,0,x.cdf,x.cdf)), 40 | lwd = LWD1,col = COL1) 41 | h.0 <- critwidth(x,4) 42 | x.f0 <- density(x,width = h.0$low) 43 | x.mode <- x.f0$x[order(x.f0$y)[length(x.f0$x)]] 44 | x.mode2 <- x[order(abs(x-x.mode))[1]] 45 | if(x.mode2 < sort(x)[4]) 46 | x.mode2 <- sort(x)[4] 47 | if(x.mode2 > sort(x)[length(x)-4]) 48 | x.mode2 <- sort(x)[length(x)-4] 49 | x.ord <- sort(x) 50 | 51 | x.split <- x.ord < x.mode2 52 | x.split2 <- x.ord >= x.mode2 53 | hull1 <- chull(c(x.ord[x.split],x.mode2),c(0,x.cdf[x.split])) 54 | n.1 <- sum(x.split)+1 55 | hlist <- c(hull1,hull1,hull1) 56 | start <- (1:length(hlist))[hlist == n.1][2] 57 | if(hlist[start+1] == 1) hlist <- rev(hlist) 58 | start <- (1:length(hlist))[hlist == n.1][2] 59 | x.hull1 <- n.1 60 | i <- start 61 | if(findslope(x.ord,x.cdf,1,hlist[i]) > findslope(x.ord,x.cdf,1,hlist[i+1])) 62 | while(hlist[i] > 1) { 63 | i <- i + 1 64 | x.hull1 <- c(x.hull1, hlist[i]) 65 | } 66 | else 67 | while(hlist[i] > 1) { 68 | i <- i - 1 69 | x.hull1 <- c(x.hull1, hlist[i]) 70 | } 71 | x.hull1 <- sort(x.hull1) 72 | hull2 <- chull(x.ord[x.split2],x.cdf[x.split2]) 73 | n.2 <- sum(x.split2) 74 | hlist <- c(hull2,hull2,hull2) 75 | start <- (1:length(hlist))[hlist == n.2][2] 76 | if(hlist[start+1] == 1) hlist <- rev(hlist) 77 | start <- (1:length(hlist))[hlist == n.2][2] 78 | x.hull2 <- n.2 79 | i <- start 80 | if(findslope(x.ord[x.split2],x.cdf[x.split2],1,hlist[i]) < 81 | findslope(x.ord[x.split2],x.cdf[x.split2],1,hlist[i+1])) 82 | while(hlist[i] > 1) { 83 | i <- i +1 84 | x.hull2 <- c(x.hull2, hlist[i]) 85 | } 86 | else 87 | while(hlist[i] > 1) { 88 | i <- i - 1 89 | x.hull2 <- c(x.hull2, hlist[i]) 90 | } 91 | x.hull2 <- sort(x.hull2) 92 | lines(c(x.ord[x.split],x.mode2)[x.hull1],c(0,x.cdf[x.split])[x.hull1], 93 | col = COL2,lwd = LWD2) 94 | lines(x.ord[x.split2][x.hull2],x.cdf[x.split2][x.hull2],col = COL2,lwd = LWD2) 95 | 96 | hull.out <- list(hull1 = x.hull1, hull2 = x.hull2+n.1-1, 97 | x = x.ord[sort(unique(c(x.hull1,x.hull2+n.1-1)))], 98 | y = x.cdf[sort(unique(c(x.hull1,x.hull2+n.1-1)))]) 99 | delta <- rep(0,length(x)) 100 | for(i in 1:length(x)) 101 | delta[i] <- abs(x.cdf[i] - fofx(sort(x)[i],hull.out)) 102 | 103 | return(invisible(list(cdf = x.cdf,unicurve = hull.out, mode = x.mode2, 104 | dip = max(delta), delta = delta, 105 | dipwhere = order(delta)[length(delta)] ))) 106 | } 107 | 108 | ##----------------------------------------------------------------- 109 | 110 | plot.silverman <- function(x,COL1 = 1,COL2 = "grey",...) 111 | { 112 | h.0 <- critwidth(x,4,tol = 0.0001)$high 113 | h.1 <- critwidth2(x,h.0,tol = 0.001)$high 114 | f.c <- density(x,window = "g",width = h.0,n = 100) 115 | f.n <- density(x,width = h.1,n = 100) 116 | plot(c(f.c$x,f.n$x),c(f.c$y,f.n$y),type = 'n',xlab = "",ylab = "density",...) 117 | lines(f.n,lwd = 3,col = COL1) 118 | lines(f.c,lwd = 3,col = COL2) 119 | points(x,rep(0,length(x)),pch = '|') 120 | } 121 | 122 | ## BELOW HERE ARE functions used by plot.silverman: 123 | 124 | critwidth <- function(g.data,start,tol = 0.001,n.points = 200) 125 | { 126 | if(is.unimodal(density(g.data,window = "g",width = start,n = n.points))) { 127 | high <- start 128 | low <- start/2 129 | while(is.unimodal(density(g.data,window = "g",width = low,n = n.points))) 130 | low <- low/2 131 | } 132 | else { 133 | low <- start 134 | high <- start*2 135 | while(!is.unimodal(density(g.data,window = "g",width = high,n = n.points))) 136 | high <- high*2 137 | } 138 | ## is.unimodal(low)=F and is.unimodal(high)=T 139 | while(high-low > tol) { 140 | wdth <- 0.5 * (high+low) 141 | if(is.unimodal(density(g.data,window = "g",width = wdth,n = n.points))) 142 | high <- wdth 143 | else 144 | low <- wdth 145 | } 146 | return(list(low = low,high = high)) 147 | } 148 | 149 | ########################################################################### 150 | critwidth2 <- function(g.data,h.0,tol = 0.001,n.points = 200) 151 | { 152 | ## h.0 is the critical width for a is.unimodal 153 | start <- h.0 + 2 * tol 154 | if(is.bimodal(density(g.data,window = "g",width = start,n = n.points))) { 155 | high <- start 156 | low <- start/2 157 | while(is.bimodal(density(g.data,window = "g",width = low,n = n.points))) 158 | low <- low/2 159 | } 160 | else { 161 | low <- start 162 | high <- start*2 163 | while(!is.bimodal(density(g.data,window = "g",width = high,n = n.points))) 164 | high <- high*2 165 | } 166 | ## is.unimodal(low)=F and is.unimodal(high)=T 167 | while(high-low > tol) { 168 | wdth <- 0.5 * (high+low) 169 | if(is.bimodal(density(g.data,window = "g",width = wdth,n = n.points))) 170 | high <- wdth 171 | else 172 | low <- wdth 173 | } 174 | return(list(low = low,high = high)) 175 | } 176 | 177 | is.unimodal <- function(dens) 178 | { 179 | ## dens is a list of dens$x and dens$y 180 | cdf <- cumsum(dens$y) 181 | n <- length(cdf) 182 | cdf.diff1 <- cdf[-1] - cdf[-n] 183 | cdf.diff2 <- cdf.diff1[-1] - cdf.diff1[-(n-1)] 184 | return(!any(order(-sign(cdf.diff2)) - 1:(n-2) > 0)) 185 | } 186 | 187 | is.bimodal <- function(dens) 188 | { 189 | ## dens is a list of dens$x and dens$y 190 | cdf <- cumsum(dens$y) 191 | n <- length(cdf) 192 | cdf.diff1 <- cdf[-1] - cdf[-n] 193 | cdf.diff2 <- cdf.diff1[-1] - cdf.diff1[-(n-1)] 194 | return(sum(sign(cdf.diff2)[-1] - sign(cdf.diff2)[-(n-2)] < 0) <= 2) 195 | } 196 | 197 | ##----------------------------------------------------------------- 198 | 199 | calcdip <- function(x, plot.it = TRUE, calc.it = TRUE) 200 | { 201 | x <- sort(x) 202 | stopifnot((n <- length(x)) >= 4) 203 | h.0 <- critwidth(x,4) 204 | x.f0 <- density(x,width = h.0$low) 205 | x.mode <- x.f0$x[order(x.f0$y)[length(x.f0$x)]] 206 | x.mode2 <- x[order(abs(x-x.mode))[1]] 207 | if(x.mode2 < x[4]) 208 | x.mode2 <- x[4] 209 | if(x.mode2 > x[n-4]) 210 | x.mode2 <- x[n-4] 211 | x.cdf <- (1:n)/n 212 | hull.out <- findhulls(x,x.cdf,x.mode2,plot.it = plot.it,xlab = "",ylab = "CDF") 213 | delta <- rep(0,n) 214 | if(calc.it) 215 | for(i in 1:n) 216 | delta[i] <- abs(x.cdf[i] - fofx(x[i],hull.out)) 217 | return(list(dip = max(delta),unicurve = hull.out)) 218 | } 219 | 220 | ## and the other functions needed: 221 | unisample <- function(hull.out,size) 222 | { 223 | n <- length(hull.out$x) 224 | min.x <- hull.out$x[1] - hull.out$y[1]/findslope(hull.out$x,hull.out$y,1,2) 225 | probs <- hull.out$y[-1] - c(0,hull.out$y[-c(1,n)]) 226 | where <- sample(1:(n-1),size,replace = TRUE,prob = probs) 227 | out <- numeric(0) 228 | x <- c(min.x,hull.out$x[-1]) 229 | for(i in 2:n) { 230 | x.s <- sum(where == i-1) 231 | if(x.s > 0) 232 | out <- c(out,runif(x.s,x[i-1],x[i])) 233 | } 234 | return(out) 235 | } 236 | 237 | findhulls <- function(x.ord,x.cdf,x.mode,plot.it = TRUE,...) 238 | { 239 | x.split <- x.ord <= x.mode 240 | x.split2 <- x.ord >= x.mode 241 | hull1 <- chull(x.ord[x.split],x.cdf[x.split]) 242 | n.1 <- sum(x.split) 243 | hlist <- c(hull1,hull1,hull1) 244 | start <- (1:length(hlist))[hlist == n.1][2] 245 | if(hlist[start+1] == 1) hlist <- rev(hlist) 246 | start <- (1:length(hlist))[hlist == n.1][2] 247 | x.hull1 <- n.1 248 | i <- start 249 | if(findslope(x.ord,x.cdf,1,hlist[i]) > findslope(x.ord,x.cdf,1,hlist[i+1])) 250 | while(hlist[i] > 1) { 251 | i <- i + 1 252 | x.hull1 <- c(x.hull1, hlist[i]) 253 | } 254 | else 255 | while(hlist[i] > 1) { 256 | i <- i - 1 257 | x.hull1 <- c(x.hull1, hlist[i]) 258 | } 259 | x.hull1 <- sort(x.hull1) 260 | hull2 <- chull(x.ord[x.split2],x.cdf[x.split2]) 261 | n.2 <- sum(x.split2) 262 | hlist <- c(hull2,hull2,hull2) 263 | start <- (1:length(hlist))[hlist == n.2][2] 264 | if(hlist[start+1] == 1) hlist <- rev(hlist) 265 | start <- (1:length(hlist))[hlist == n.2][2] 266 | x.hull2 <- n.2 267 | i <- start 268 | if(findslope(x.ord[x.split2],x.cdf[x.split2],1,hlist[i]) < 269 | findslope(x.ord[x.split2],x.cdf[x.split2],1,hlist[i+1])) 270 | while(hlist[i] > 1) { 271 | i <- i +1 272 | x.hull2 <- c(x.hull2, hlist[i]) 273 | } 274 | else 275 | while(hlist[i] > 1) { 276 | i <- i - 1 277 | x.hull2 <- c(x.hull2, hlist[i]) 278 | } 279 | x.hull2 <- sort(x.hull2) 280 | if(plot.it) { 281 | plot(x.ord,x.cdf,...) 282 | lines(x.ord[x.split][x.hull1],x.cdf[x.split][x.hull1]) 283 | lines(x.ord[x.split2][x.hull2],x.cdf[x.split2][x.hull2]) 284 | } 285 | return(list(hull1 = x.hull1, hull2 = x.hull2+n.1 -1, 286 | x = x.ord[sort(unique(c(x.hull1,x.hull2+n.1-1)))], 287 | y = x.cdf[sort(unique(c(x.hull1,x.hull2+n.1-1)))])) 288 | } 289 | 290 | ########################################################################### 291 | findslope <- function(x,y,i,j) return((y[j] - y[i])/(x[j]-x[i])) 292 | 293 | ########################################################################### 294 | fofx <- function(x,hull.out) 295 | { 296 | n <- length(hull.out$x) 297 | if(x <= hull.out$x[1]) 298 | return(0) 299 | if(x >= hull.out$x[n]) 300 | return(1) 301 | where <- (1:n)[order(c(x,hull.out$x)) == 1] 302 | return( (x-hull.out$x[where-1])/(hull.out$x[where]-hull.out$x[where-1]) * 303 | (hull.out$y[where]-hull.out$y[where-1]) + hull.out$y[where-1]) 304 | } 305 | 306 | -------------------------------------------------------------------------------- /tests/ex1.Rout.save-64b: -------------------------------------------------------------------------------- 1 | 2 | R version 4.5.1 Patched (2025-08-11 r88580) -- "Great Square Root" 3 | Copyright (C) 2025 The R Foundation for Statistical Computing 4 | Platform: x86_64-pc-linux-gnu 5 | 6 | R is free software and comes with ABSOLUTELY NO WARRANTY. 7 | You are welcome to redistribute it under certain conditions. 8 | Type 'license()' or 'licence()' for distribution details. 9 | 10 | R is a collaborative project with many contributors. 11 | Type 'contributors()' for more information and 12 | 'citation()' on how to cite R or R packages in publications. 13 | 14 | Type 'demo()' for some demos, 'help()' for on-line help, or 15 | 'help.start()' for an HTML browser interface to help. 16 | Type 'q()' to quit R. 17 | 18 | > library(diptest) 19 | > 20 | > stopifnot(dip(c(1,1,2,2)) == 1/4)# the maximal value possible: two point dist 21 | > 22 | > ## very first small "unimodal" example --- the 1/(2*n) result: 23 | > n <- length(u <- cumsum(0:3)) 24 | > d <- dip(u, debug=TRUE)# shows the final if() {added by MM} is really needed 25 | dip() in C: n = 4; starting with 2N*dip = 1. 26 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 4]; l_lcm/gcm = ( 4, 2) 27 | while(gcm[ix] != lcm[iv]) : .. 28 | calculating dip .. (dip_l, dip_u) = (0, 1) 29 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 3]; l_lcm/gcm = ( 3, 2) 30 | while(gcm[ix] != lcm[iv]) : . 31 | calculating dip .. (dip_l, dip_u) = (0, 1) 32 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 2]; l_lcm/gcm = ( 2, 2) 33 | ** (l_lcm,l_gcm) = (2,2) ==> d := 1 34 | calculating dip .. (dip_l, dip_u) = (0, 0) 35 | No improvement in low = 1 nor high = 2 --> END 36 | > stopifnot(d == dip(-u), d == 1/(2*n))# exact "=" for n = 4 ! 37 | > ## Note that I believe this should *not* give 0 (as fmechler@.. did), 38 | > ## but rather 1/(2n) because that's (1/n) / 2 and 39 | > ## (1/n) is the correct distance between LCM and GCM 40 | > 41 | > ## Small example -- but MM sees difference (32-bit / 64-bit) *and* on M1mac: 42 | > x <- c(0,2:3,5:6) 43 | > ## IGNORE_RDIFF_BEGIN 44 | > d1 <- dip(x, full=TRUE, debug=2) 45 | dip() in C: n = 5; starting with 2N*dip = 1. 46 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 5]; l_lcm/gcm = ( 2, 4) 47 | while(gcm[ix] != lcm[iv]) : 48 | G(3,2) --> ix = 2, iv = 2 49 | --> ix = 1, iv = 2 50 | calculating dip .. (dip_l, dip_u) = (1, 0) 51 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 2, 5]; l_lcm/gcm = ( 3, 3) 52 | while(gcm[ix] != lcm[iv]) : 53 | L(3,2) --> ix = 2, iv = 3 54 | G(2,3) --> ix = 1, iv = 3 55 | calculating dip .. (dip_l, dip_u) = (1.33333, 0) -> new larger dip 1.33333 (j_best = 3) 56 | 'dip': LOOP-BEGIN: 2n*D= 1.3333 [low,high] = [ 4, 5]; l_lcm/gcm = ( 2, 2) 57 | ** (l_lcm,l_gcm) = (2,2) ==> d := 1 58 | > d2 <- dip(6-x, full=TRUE, debug=2) 59 | dip() in C: n = 5; starting with 2N*dip = 1. 60 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 5]; l_lcm/gcm = ( 4, 2) 61 | while(gcm[ix] != lcm[iv]) : 62 | L(2,2) --> ix = 1, iv = 3 63 | L(2,3) --> ix = 1, iv = 4 64 | calculating dip .. (dip_l, dip_u) = (0, 1) 65 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 4]; l_lcm/gcm = ( 3, 3) 66 | while(gcm[ix] != lcm[iv]) : 67 | L(3,2) --> ix = 2, iv = 3 68 | G(2,3) --> ix = 1, iv = 3 69 | calculating dip .. (dip_l, dip_u) = (1.33333, 0) -> new larger dip 1.33333 (j_best = 2) 70 | 'dip': LOOP-BEGIN: 2n*D= 1.3333 [low,high] = [ 3, 4]; l_lcm/gcm = ( 2, 2) 71 | ** (l_lcm,l_gcm) = (2,2) ==> d := 1 72 | > str(d2) # differences to M1mac (!) 73 | List of 15 74 | $ call : language dip(x = 6 - x, full.result = TRUE, debug = 2) 75 | $ x : num [1:5] 0 1 3 4 6 76 | $ n : int 5 77 | $ dip : num 0.133 78 | $ lo.hi : int [1:2] 3 4 79 | $ ifault : int 0 80 | $ gcm : int [1:2] 4 3 81 | $ lcm : int [1:2] 3 4 82 | $ mn : int [1:5] 1 1 1 3 1 83 | $ mj : int [1:5] 2 4 4 5 5 84 | $ min.is.0 : logi FALSE 85 | $ debug : int 2 86 | $ xl : num 3 87 | $ xu : num 4 88 | $ full.result: logi TRUE 89 | - attr(*, "class")= chr "dip" 90 | > ## IGNORE_RDIFF_END 91 | > str(d1) 92 | List of 15 93 | $ call : language dip(x = x, full.result = TRUE, debug = 2) 94 | $ x : num [1:5] 0 2 3 5 6 95 | $ n : int 5 96 | $ dip : num 0.133 97 | $ lo.hi : int [1:2] 4 5 98 | $ ifault : int 0 99 | $ gcm : int [1:2] 5 4 100 | $ lcm : int [1:2] 4 5 101 | $ mn : int [1:5] 1 1 2 2 4 102 | $ mj : int [1:5] 5 3 5 5 5 103 | $ min.is.0 : logi FALSE 104 | $ debug : int 2 105 | $ xl : num 5 106 | $ xu : num 6 107 | $ full.result: logi TRUE 108 | - attr(*, "class")= chr "dip" 109 | > 110 | > if(!dev.interactive(orNone=TRUE)) pdf("ex1.pdf") 111 | > par(mfrow = 2:1, mar = .1+c(3,4,2,1), mgp=c(1.5,.6,0), oma = c(0,0,2.1,0)) 112 | > # 113 | > plot(d1) 114 | > abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray") 115 | > # 116 | > plot(d2) 117 | > abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray") 118 | > # 119 | > ## "title" only now 120 | > mtext("dip() problem with 'mirror x'", side=3, line = 0.8, 121 | + outer=TRUE, cex = 1.5, font = 2) 122 | > 123 | > 124 | > ## Yong Lu example -- a bit smaller 125 | > x2 <- c(1, rep(2, 9)) 126 | > stopifnot(dip(x2) == dip(3 - x2)) 127 | > str(dip(x2, full=TRUE)) 128 | List of 15 129 | $ call : language dip(x = x2, full.result = TRUE) 130 | $ x : num [1:10] 1 2 2 2 2 2 2 2 2 2 131 | $ n : int 10 132 | $ dip : num 0.05 133 | $ lo.hi : int [1:2] 2 10 134 | $ ifault : int 0 135 | $ gcm : int [1:2] 10 2 136 | $ lcm : int [1:2] 2 10 137 | $ mn : int [1:10] 1 1 2 2 2 2 2 2 2 2 138 | $ mj : int [1:10] 10 10 10 10 10 10 10 10 10 10 139 | $ min.is.0 : logi FALSE 140 | $ debug : int 0 141 | $ xl : num 2 142 | $ xu : num 2 143 | $ full.result: logi TRUE 144 | - attr(*, "class")= chr "dip" 145 | > cat('Time elapsed: ', (.pt <- proc.time()),'\n') # "stats" 146 | Time elapsed: 0.222 0.029 0.249 0.002 0.006 147 | > 148 | > ## Real data examples : 149 | > 150 | > data(statfaculty) 151 | > 152 | > str(dip(statfaculty, full = "all", debug = 3), vec.len = 8) 153 | dip() in C: n = 63; starting with 2N*dip = 1. 154 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 63] : 155 | gcm[1:6] = 63, 62, 7, 3, 2, 1 156 | lcm[1:5] = 1, 44, 58, 59, 63 157 | while(gcm[ix] != lcm[iv]) : 158 | G(5,2) --> ix = 4, iv = 2 159 | G(4,2) --> ix = 3, iv = 2 160 | G(3,2) --> ix = 2, iv = 2 161 | L(3,2) --> ix = 2, iv = 3 162 | L(3,3) --> ix = 2, iv = 4 163 | --> ix = 2, iv = 5 164 | --> ix = 1, iv = 5 165 | calculating dip .. (dip_l, dip_u) = (2, 2.11111) -> new larger dip 2.11111 (j_best = 61) 166 | 'dip': LOOP-BEGIN: 2n*D= 2.1111 [low,high] = [ 7, 58] : 167 | gcm[1:5] = 58, 55, 51, 48, 7 168 | lcm[1:6] = 7, 11, 15, 42, 44, 58 169 | while(gcm[ix] != lcm[iv]) : 170 | L(5,2) --> ix = 4, iv = 3 171 | L(5,3) --> ix = 4, iv = 4 172 | L(5,4) --> ix = 4, iv = 5 173 | L(5,5) --> ix = 4, iv = 6 174 | --> ix = 3, iv = 6 175 | --> ix = 2, iv = 6 176 | --> ix = 1, iv = 6 177 | calculating dip .. (dip_l, dip_u) = (0, 7.5) -> new larger dip 7.5 (j_best = 48) 178 | 'dip': LOOP-BEGIN: 2n*D= 7.5 [low,high] = [ 7, 44] : 179 | gcm[1:4] = 44, 43, 38, 7 180 | lcm[1:5] = 7, 11, 15, 42, 44 181 | while(gcm[ix] != lcm[iv]) : 182 | L(4,2) --> ix = 3, iv = 3 183 | L(4,3) --> ix = 3, iv = 4 184 | --> ix = 2, iv = 4 185 | --> ix = 2, iv = 5 186 | --> ix = 1, iv = 5 187 | List of 17 188 | $ call : language dip(x = statfaculty, full.result = "all", debug = 3) 189 | $ x : num [1:63] 30 33 35 36 37 37 39 39 39 39 39 40 40 40 40 41 42 43 43 43 ... 190 | $ n : int 63 191 | $ dip : num 0.0595 192 | $ lo.hi : int [1:2] 7 44 193 | $ ifault : int 0 194 | $ gcm : int [1:4] 44 43 38 7 195 | $ lcm : int [1:5] 7 11 15 42 44 196 | $ mn : int [1:63] 1 1 2 3 3 5 3 7 7 7 7 7 12 12 12 7 7 7 18 18 ... 197 | $ mj : int [1:63] 44 44 15 15 6 15 11 11 11 11 15 15 15 15 42 42 20 20 20 42 ... 198 | $ min.is.0 : logi FALSE 199 | $ debug : int 3 200 | $ xl : num 39 201 | $ xu : num 54 202 | $ full.result: chr "all" 203 | $ GCM : int [1:6] 63 62 7 3 2 1 204 | $ LCM : int [1:5] 1 44 58 59 63 205 | - attr(*, "class")= chr "dip" 206 | > 207 | > data(faithful) 208 | > fE <- faithful$eruptions 209 | > str(dip(fE, full = "all", debug = 3), 210 | + vec.len= 8) 211 | dip() in C: n = 272; starting with 2N*dip = 1. 212 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1,272] : 213 | gcm[1:7] = 272, 135, 120, 119, 4, 2, 1 214 | lcm[1:10] = 1, 40, 58, 60, 66, 79, 91, 261, 268, 272 215 | while(gcm[ix] != lcm[iv]) : 216 | G(6,2) --> ix = 5, iv = 2 217 | G(5,2) --> ix = 4, iv = 2 218 | L(5,2) --> ix = 4, iv = 3 219 | L(5,3) --> ix = 4, iv = 4 220 | L(5,4) --> ix = 4, iv = 5 221 | L(5,5) --> ix = 4, iv = 6 222 | L(5,6) --> ix = 4, iv = 7 223 | L(5,7) --> ix = 4, iv = 8 224 | G(4,8) --> ix = 3, iv = 8 225 | G(3,8) --> ix = 2, iv = 8 226 | --> ix = 1, iv = 8 227 | --> ix = 1, iv = 9 228 | --> ix = 1, iv = 10 229 | calculating dip .. (dip_l, dip_u) = (50.2553, 3) -> new larger dip 50.2553 (j_best = 91) 230 | 'dip': LOOP-BEGIN: 2n*D= 50.255 [low,high] = [120,261] : 231 | gcm[1:7] = 261, 260, 252, 181, 146, 135, 120 232 | lcm[1:5] = 120, 124, 233, 246, 261 233 | while(gcm[ix] != lcm[iv]) : 234 | L(7,2) --> ix = 6, iv = 3 235 | G(6,3) --> ix = 5, iv = 3 236 | G(5,3) --> ix = 4, iv = 3 237 | G(4,3) --> ix = 3, iv = 3 238 | --> ix = 3, iv = 4 239 | --> ix = 3, iv = 5 240 | --> ix = 2, iv = 5 241 | --> ix = 1, iv = 5 242 | List of 17 243 | $ call : language dip(x = fE, full.result = "all", debug = 3) 244 | $ x : num [1:272] 1.6 1.67 1.7 1.73 1.75 1.75 1.75 1.75 1.75 1.75 ... 245 | $ n : int 272 246 | $ dip : num 0.0924 247 | $ lo.hi : int [1:2] 120 261 248 | $ ifault : int 0 249 | $ gcm : int [1:7] 261 260 252 181 146 135 120 250 | $ lcm : int [1:5] 120 124 233 246 261 251 | $ mn : int [1:272] 1 1 2 2 4 5 5 5 5 5 5 11 5 13 13 13 13 17 17 13 ... 252 | $ mj : int [1:272] 40 40 40 10 10 10 10 10 10 40 12 36 16 16 16 26 19 19 26 26 ... 253 | $ min.is.0 : logi FALSE 254 | $ debug : int 3 255 | $ xl : num 3.83 256 | $ xu : num 4.83 257 | $ full.result: chr "all" 258 | $ GCM : int [1:7] 272 135 120 119 4 2 1 259 | $ LCM : int [1:10] 1 40 58 60 66 79 91 261 268 272 260 | - attr(*, "class")= chr "dip" 261 | > 262 | > data(precip) 263 | > str(dip(precip, full = TRUE, debug = TRUE)) 264 | dip() in C: n = 70; starting with 2N*dip = 1. 265 | 'dip': LOOP-BEGIN: 2n*D= 1 [low,high] = [ 1, 70]; l_lcm/gcm = ( 6, 4) 266 | while(gcm[ix] != lcm[iv]) : ...... 267 | calculating dip .. (dip_l, dip_u) = (5, 2.5) -> new larger dip 5 (j_best = 13) 268 | 'dip': LOOP-BEGIN: 2n*D= 5 [low,high] = [ 19, 64]; l_lcm/gcm = ( 6, 6) 269 | while(gcm[ix] != lcm[iv]) : ........ 270 | calculating dip .. (dip_l, dip_u) = (3.875, 3.44828) 271 | 'dip': LOOP-BEGIN: 2n*D= 5 [low,high] = [ 31, 55]; l_lcm/gcm = ( 4, 3) 272 | while(gcm[ix] != lcm[iv]) : ... 273 | List of 15 274 | $ call : language dip(x = precip, full.result = TRUE, debug = TRUE) 275 | $ x : num [1:70] 7 7.2 7.8 7.8 11.5 13 14 14.6 15 15.2 ... 276 | $ n : int 70 277 | $ dip : num 0.0357 278 | $ lo.hi : int [1:2] 31 55 279 | $ ifault : int 0 280 | $ gcm : int [1:3] 55 49 31 281 | $ lcm : int [1:4] 31 32 35 55 282 | $ mn : int [1:70] 1 1 1 3 1 1 6 7 8 9 ... 283 | $ mj : int [1:70] 2 4 4 64 55 10 10 10 10 55 ... 284 | $ min.is.0 : logi FALSE 285 | $ debug : int 1 286 | $ xl : Named num 35.9 287 | ..- attr(*, "names")= chr "Dallas" 288 | $ xu : Named num 43.4 289 | ..- attr(*, "names")= chr "Hartford" 290 | $ full.result: logi TRUE 291 | - attr(*, "class")= chr "dip" 292 | > 293 | > ## current qDiptab <--> n = 72'000 is "asymptotic" boundary 294 | > set.seed(123); x <- rnorm(72000) 295 | > dt72k <- dip.test(x) 296 | > ## gave error in qDiptab[i2, ] : subscript out of bounds -- in diptest <= 0.77-0 297 | > stopifnot(all.equal(list(statistic = c(D = 0.0005171098381181), p.value = 1, nobs = 72000L), 298 | + dt72k[c("statistic", "p.value", "nobs")], tolerance = 1e-13)) 299 | > 300 | > 301 | > cat('Time elapsed: ', proc.time() - .pt,'\n') # "stats" 302 | Time elapsed: 0.092 0.015 0.108 0 0 303 | > 304 | > if(!interactive()) warnings() 305 | > 306 | > proc.time() 307 | user system elapsed 308 | 0.280 0.043 0.313 309 | -------------------------------------------------------------------------------- /vignettes/diptest-issues.Rnw: -------------------------------------------------------------------------------- 1 | %\documentclass[article]{jss} 2 | \documentclass[nojss,article]{jss} 3 | % ----- for the package-vignette, don't use JSS logo, etc 4 | % 5 | % \author{Martin Maechler\\ Seminar f\"ur Statistik \\ ETH Zurich, \ Switzerland 6 | % \\\email{maechler@stat.math.ethz.ch}} 7 | \author{Martin M\"achler \\ ETH Zurich} 8 | \title{Dip Test Distributions, P-values, and other Explorations} 9 | % \def\mythanks{a version of this paper, for \pkg{nacopula} 0.4\_4, has been published 10 | % in JSS, \url{http://www.jstatsoft.org/v39/i09}.} 11 | %% for pretty printing and a nice hypersummary also set: 12 | \Plainauthor{Martin M\"achler} %% comma-separated 13 | \Plaintitle{Dip Test Distributions, P-values, and other Explorations} 14 | % \Shorttitle{} 15 | %\date{April 2009 ({\tiny typeset on \tiny\today})} 16 | %%\VignetteIndexEntry{Dip Test Distributions, P-values, and other Explorations} 17 | %%\VignetteDepends{diptest} 18 | \SweaveOpts{engine=R,keep.source=TRUE,strip.white=true} 19 | % ^^^^^^^^^^^^^^^^ 20 | \SweaveOpts{eps=FALSE,pdf=TRUE,width=7,height=4} 21 | 22 | %% an abstract and keywords 23 | \Abstract{ 24 | 25 | ... % FIXME 26 | 27 | ... % FIXME 28 | } 29 | % 30 | \Keywords{MPFR, Abitrary Precision, Multiple Precision Floating-Point, R} 31 | %% at least one keyword must be supplied 32 | 33 | %% publication information 34 | %% NOTE: Typically, this can be left commented and will be filled out by the technical editor 35 | %% \Volume{13} 36 | %% \Issue{9} 37 | %% \Month{September} 38 | %% \Year{2004} 39 | %% \Submitdate{2004-09-29} 40 | %% \Acceptdate{2004-09-29} 41 | 42 | %% The address of (at least) one author should be given 43 | %% in the following format: 44 | \Address{ 45 | Martin M\"achler\\ 46 | Seminar f\"ur Statistik, HG G~14.2\\ 47 | ETH Zurich\\ 48 | 8092 Zurich, Switzerland\\ 49 | E-mail: \email{maechler@stat.math.ethz.ch}\\ 50 | URL: \url{https://people.math.ethz.ch/~maechler/} 51 | } 52 | %% It is also possible to add a telephone and fax number 53 | %% before the e-mail in the following format: 54 | %% Telephone: +43/1/31336-5053 55 | %% Fax: +43/1/31336-734 56 | 57 | %% for those who use Sweave please include the following line (with % symbols): 58 | %% MM: this is "substituted" by jss.cls: 59 | %% need no \usepackage{Sweave.sty} 60 | 61 | % \usepackage{myVignette} 62 | % \usepackage{fullpage}% save trees ;-) --- FIXME use {geometry} package 63 | % \usepackage[authoryear,round,longnamesfirst]{natbib} 64 | % \bibliographystyle{plainnat} 65 | % 66 | %% Marius' packages 67 | \usepackage[american]{babel}%for American English 68 | % \usepackage{microtype}%for character protrusion and font expansion (only with pdflatex) 69 | \usepackage{amsmath}%sophisticated mathematical formulas with amstex (includes \text{}) 70 | \usepackage{mathtools}%fix amsmath deficiencies 71 | \usepackage{amssymb}%sophisticated mathematical symbols with amstex (includes \mathbb{}) 72 | % \usepackage{amsthm}%theorem environments 73 | % \usepackage{bm}%for bold math symbols: \bm (= bold math) 74 | % %NON-STANDARD:\RequirePackage{bbm}%only for indicator functions 75 | % \usepackage{enumitem}%for automatic numbering of new enumerate environments 76 | % \usepackage[ 77 | % format=hang, 78 | % % NOT for JSS: labelsep=space, 79 | % justification=justified, 80 | % singlelinecheck=false%, 81 | % % NOT for JSS: labelfont=bf 82 | % ]{caption}%for captions 83 | % \usepackage{tikz}%sophisticated graphics package 84 | % \usepackage{tabularx}%for special table environment (tabularx-table) 85 | % \usepackage{booktabs}%for table layout 86 | 87 | % This is already in jss above -- but withOUT the fontsize=\small part !! 88 | \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontsize=\small,fontshape=sl} 89 | \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontsize=\small} 90 | \DefineVerbatimEnvironment{Scode}{Verbatim}{fontsize=\small,fontshape=sl} 91 | % but when submitting, do get rid of too much vertical space between R 92 | % input & output, i.e. between Sinput and Soutput: 93 | \fvset{listparameters={\setlength{\topsep}{0pt}}}% !! quite an effect! 94 | %% 95 | % 96 | \newcommand*{\R}{\proglang{R}}%{\textsf{R}} 97 | \newcommand*{\Arg}[1]{\texttt{\itshape $\langle$#1$\rangle$}} 98 | \newcommand*{\file}[1]{{`\normalfont\texttt{#1}'}} 99 | \newcommand*{\eps}{\varepsilon} 100 | % 101 | %% Probability P[.], Expectation E[.] etc 102 | \makeatletter 103 | %% == subsection of our flexible-style "texab.sty" : 104 | \newcommand{\@pkl}{[} % Probability Klammer links 105 | \newcommand{\@pkr}{]} 106 | \newcommand{\@ekl}{[} % Erwartungswert Klammer links 107 | \newcommand{\@ekr}{]} % Erwartungswert Klammer rechts 108 | \DeclareMathOperator{\PRSymbol}{P} 109 | % Next line (\makeright): if #1 == \left then \right #2 else #1 #2 110 | \newcommand{\makeright}[2]{\ifx#1\left\right#2\else#1#2\fi} 111 | %% the real commands 112 | \newcommand{\PR}[2][\left] {\PRSymbol #1\@pkl #2 \makeright{#1}{\@pkr}} 113 | \newcommand{\ERW}[2][\left] {\ERWSymbol #1\@ekl #2 \makeright{#1}{\@ekr}} 114 | \makeatother 115 | \newcommand{\isD}{\ {\stackrel{\mathcal{D}}{=}}\ \ } 116 | \newcommand*{\iid}{\mbox{ i.i.d. }} 117 | 118 | % 119 | \begin{document} 120 | \setkeys{Gin}{width=\textwidth} 121 | % Manuel has 122 | \setlength{\abovecaptionskip}{-5pt} 123 | % 124 | %% include your article here, just as usual 125 | %% Note that you should use the \pkg{}, \proglang{} and \code{} commands. 126 | % \section[About Java]{About \proglang{Java}} 127 | %% Note: If there is markup in \(sub)section, then it has to be escape as above. 128 | %% Note: These are explained in '?RweaveLatex' : 129 | \begin{footnotesize} 130 | <>= 131 | options(SweaveHooks= list(fig=function() par(mar=c(5.1, 4.1, 1.1, 2.1))), 132 | width = 75, 133 | digits = 7, # <-- here, keep R's default! 134 | prompt = "R> ", # <- "yuck!" - required by JSS 135 | continue=" ") 136 | set.seed(47) 137 | Sys.setenv(LANGUAGE = "en") 138 | if(.Platform$OS.type != "windows") 139 | Sys.setlocale("LC_MESSAGES","C") 140 | 141 | ## In order to save() and load() expensive results 142 | thisDir <- system.file('doc', package='diptest') 143 | ## not yet used: 144 | xtraDir <- if(Sys.getenv("USER") == "maechler") 145 | "~/R/Pkgs/diptest/stuff" else thisDir 146 | res1.file <- file.path(thisDir, "aggr_results.Rdata") 147 | 148 | <>= 149 | if(nzchar(Sys.getenv("R_MM_PKG_CHECKING"))) print( .libPaths() ) 150 | @ 151 | \end{footnotesize} 152 | % \maketitle 153 | % \begin{abstract} 154 | % \end{abstract} 155 | 156 | \section[Introduction]{Introduction}% \small~\footnote{\mythanks}} 157 | \label{sec:Intro} 158 | 159 | %% MM 160 | FIXME: Need notation 161 | 162 | $D_n :=$\texttt{dip( runif(n) )}; 163 | 164 | but more generally, 165 | \begin{equation} 166 | \label{eq:Dn.F} 167 | D_n(F) := D(X_1, X_2, \dots, X_n), \mbox{ \ \ \texttt{where} } X_i \iid, X_i \sim F. 168 | \end{equation} 169 | 170 | \citet{HarJH85} in their ``seminal'' paper on the dip statistic $D_n$ 171 | already proved that $ \sqrt{n} \; D_n$ converges in distribution, i.e., 172 | \begin{equation} 173 | \label{eq:D.infty} 174 | \lim_{n\to\infty}\sqrt{n} \; D_n \isD D_\infty. 175 | \end{equation} 176 | 177 | A considerable part of this paper is devoted to explore the distribution of $D_\infty$. 178 | 179 | \bigskip 180 | \section[History of the diptest package]{History of the \texttt{diptest} \textsf{R} package} 181 | 182 | \citet{HarP85} published an implementation in Fortran of a concrete algorithm, 183 | % ALGORITHM AS 217 APPL. STATIST. (1985) VOL.34, NO.3 184 | where the code was also made available on Statlib\footnote{Statlib is now a 185 | website, of course, \url{http://lib.stat.cmu.edu/}, but then was \emph{the} preferred way 186 | for distributing algorithms for statistical computing, available years 187 | before the existence of the WWW, and entailing e-mail and (anonymous) FTP} 188 | 189 | On July 28, 1994, Dario Ringach, then at NY University, asked on Snews (the 190 | mailing list for S and S-plus users) about distributions and was helped by 191 | me and then about \texttt{dyn.load} problems, again helped by 192 | me. Subsequently he provided me with S-plus code which interfaced to 193 | (a \texttt{f2c}ed version of) Hartigan's Fortran code, for computing the dip statistic. 194 | and ended the (then private) e-mail with 195 | \begin{quotation} 196 | I am not going to have time to set this up for submission to StatLib. 197 | If you want to do it, please go ahead. 198 | 199 | Regards, 200 | Dario 201 | \end{quotation} 202 | 203 | 204 | 205 | - several important bug fixes; 206 | last one Oct./Nov.~2003 207 | 208 | However, the Fortran code file \url{http://lib.stat.cmu.edu/apstat/217}, 209 | was last changed {Thu 04 Aug 2005 03:43:28 PM CEST}. 210 | 211 | We have some results of the dip.dist of \emph{before} the bug fix; 212 | notably the ``dip of the dip'' probabilities have changed considerably!! 213 | 214 | - see rcs log of ../../src/dip.c 215 | 216 | \section{21st Century Improvement of Hartigan$^2$'s Table} 217 | 218 | (( 219 | 220 | Use listing package (or so to more or less ``cut \& paste'' the nice code in 221 | \texttt{../../stuff/new-simul.Rout-1e6} 222 | 223 | )) 224 | 225 | \section{The Dip in the Dip's Distribution} 226 | \label{sec:dip_dip} 227 | We have found empirically that the dip distribution itself starts with a ``dip''. 228 | Specifically, the minimal possible value of $D_n$ is $\frac{1}{2n}$ 229 | \emph{and} the probability of reaching that value, 230 | \begin{equation} 231 | \label{eq:P.Dn_min} 232 | \PR{D_n = \frac{1}{2n}}, 233 | \end{equation} 234 | is large for small $n$. 235 | 236 | E.g., consider an approximation of the dip distribution for $n=5$, 237 | <>= 238 | require("diptest") # after installing it .. 239 | D5 <- replicate(10000, dip(runif(5))) 240 | hist(D5, breaks=128, main = "Histogram of replicate(10'000, dip(runif(5))))") 241 | @ 242 | 243 | which looks as if there was a bug in the software --- but that look is misleading! 244 | Note how the phenomenon is still visible for $n=8$, 245 | <>= 246 | D8 <- replicate(10000, dip(runif(8))) 247 | hist(D8, breaks=128, main = "Histogram of replicate(10'000, dip(runif(8))))") 248 | @ 249 | 250 | Note that there is another phenomenon, in addition to the point mass at $1/(2n)$, 251 | particularly visible, if we use \emph{many} replicates, 252 | <>= 253 | set.seed(11) 254 | n <- 11 255 | B.s11 <- 500000 256 | D11 <- replicate(B.s11, dip(runif(n))) 257 | <<2nd-small-sample-phenomen--n-eq-11, echo=false>>= 258 | if(file.exists(ff <- file.path(thisDir, "hist-D11.rda"))) { 259 | load(ff) 260 | } else { ## takes a few minutes 261 | <> 262 | hD11 <- hist(D11, breaks=1e-6+(63:298)/(2*11*64), plot=FALSE) 263 | save(hD11, n, B.s11, file= ff) 264 | } 265 | <<2nd-small-sample-phenomen--n-eq-11, echo=false, fig=true>>= 266 | B.str <- format(B.s11, sci=FALSE, big.mark="'") 267 | plot(hD11, main = "", 268 | ## main = sprintf("Histogram of replicate(%s, dip(runif(%d)))", B.str, n), 269 | border=NA, col="dark gray", 270 | xlab = substitute("Dip" ~~ D[.N.](U(group("[",list(0,1),"]"))), list(.N. = n))) 271 | title(xlab= substitute(B == .B.SIM. ~ "replicates", list(.B.SIM. = B.str)), 272 | adj = .88) 273 | lcol <- adjustcolor("orange4", 0.4) 274 | abline(v = (1:3)/(2*n), col=lcol, lty=3, lwd=2) 275 | axis(1, pos=0, at = (1:3)/(2*n), 276 | labels = expression(1/22, 2/22, 3/22), col=lcol, col.axis=lcol) 277 | @ 278 | 279 | FIXME:\\ 280 | use \file{../../stuff/sim-minProb.R} \\ 281 | and \file{../../stuff/minProb-anal.R} 282 | 283 | Further, it can be seen that the \emph{maximal} dip statistic 284 | is $\frac 1 4 = 0.25$ and this upper bound can be reached simply (for even 285 | $n$) using the the data $(0,0,\dots,0, \; 1, 1,\dots,1)$, a bi-point mass 286 | with equal mass at both points. 287 | 288 | \section{P-values for the Dip Test} 289 | \label{sec:Pvals} 290 | Note that it is not obvious how to compute $p$-values for ``the dip test'', 291 | as that means determining the distribution of the test statistic, i.e., 292 | $D_n$ under the null hypothesis, but a natural null, 293 | $H_o: F \in \{F \mathrm{cadlag} \mid f := \frac d{dx} F is unimodal \}$ 294 | is too large. Hartigans'(1985) argued for using the uniform $U[0,1]$ i.e., 295 | $F'(x) = f(x)= \mathbf{1}_{[0,1]}(x) = [0 \le x \le 1]$ (Iverson bracket) 296 | instead, even though they showed that it is not quite the ``least 297 | favorable'' one. 298 | Following Hartigans', we will define the $p$-value of an observed $d_n$ as 299 | \begin{equation} 300 | \label{eq:Pval} 301 | P_{d_n} := \PR{D_n \ge d_n} := \PR{\mathrm{dip}(U_1,\dots,U_n) \ge d_n}, \ \ 302 | \mathrm{where} \ U_i \sim U[0,1], \ \, \iid 303 | \end{equation} 304 | 305 | \subsection{Interpolating the Dip Table} 306 | \label{sec:interpol} 307 | Because of the asymptotic distribution, 308 | $ \lim_{n\to\infty}\sqrt{n} \; D_n \isD D_\infty$, 309 | it is makes sense to consider the ``$\sqrt{n} D_n$''--scale, 310 | even for finite $n$ values: 311 | <>= 312 | data(qDiptab) 313 | dnqd <- dimnames(qDiptab) 314 | (nn. <- as.integer(dnqd[["n"]])) 315 | matplot(nn., qDiptab*sqrt(nn.), type ="o", pch=1, cex = 0.4, 316 | log="x", xlab="n [log scaled]", 317 | ylab = expression(sqrt(n) %*% q[D[n]])) 318 | ## Note that 1/2n is the first possible value (with finite mass),, 319 | ## clearly visible for (very) small n: 320 | lines(nn., sqrt(nn.)/(2*nn.), col=adjustcolor("yellow2",0.5), lwd=3) 321 | 322 | P.p <- as.numeric(print(noquote(dnqd[["Pr"]]))) 323 | ## Now look at one well known data set: 324 | D <- dip(x <- faithful$waiting) 325 | n <- length(x) 326 | points(n, sqrt(n)*D, pch=13, cex=2, col= adjustcolor("blue2",.5), lwd=2) 327 | ## a simulated (approximate) $p$-value for D is 328 | mean(D <= replicate(10000, dip(runif(n)))) ## ~ 0.002 329 | @ 330 | 331 | but we can use our table to compute a deterministic (but still approximate, 332 | as the table is from simulation too) $p$-value: 333 | <>= 334 | ## We are in this interval: 335 | n0 <- nn.[i.n <- findInterval(n, nn.)] 336 | n1 <- nn.[i.n +1] ; c(n0, n1) 337 | f.n <- (n - n0)/(n1 - n0)# in [0, 1] 338 | ## Now "find" y-interval: 339 | y.0 <- sqrt(n0)* qDiptab[i.n ,] 340 | y.1 <- sqrt(n1)* qDiptab[i.n+1,] 341 | (Pval <- 1 - approx(y.0 + f.n*(y.1 - y.0), 342 | P.p, 343 | xout = sqrt(n) * D)[["y"]]) 344 | ## 0.018095 345 | @ 346 | 347 | Finally, in May 2011, after several years of people asking for it, 348 | I have implemented a \code{dip.test} function which makes use of a --- 349 | somewhat more sophisticated --- interpolation scheme like the one above, 350 | to compute a $p$-value. 351 | As \code{qDiptab} has been based on $10^6$ samples, the interpolation 352 | yields accurate $p$-values, unless in very extreme cases. 353 | Here is the small ($n=63$) example from Hartigan$^2$, 354 | <>= 355 | data(statfaculty) 356 | dip.test(statfaculty) 357 | @ 358 | where, from a $p$-value of 8.7\%, we'd conclude that there is not enough 359 | evidence against unimodality. 360 | 361 | \subsection{Asymptotic Dip Distribution} 362 | \label{sec:asymp} 363 | We have conducted extensive simulations in order to explore the limit 364 | distribution of $D_\infty$, i.e., the limit of $\sqrt{n} \; D_n$, (\ref{eq:D.infty}). 365 | 366 | Our current \R\ code is in \file{ ../../stuff/asymp-distrib.R } 367 | but the simulation results (7 Megabytes for each $n$) cannot be assumed to 368 | be part of the package, nor maybe even to be simply accessible via the internet. 369 | %% is bandwidth a problem ? probably no longer in the near future? 370 | 371 | 372 | %% Maybe 373 | \section{Less Conservative Dip Testing} 374 | 375 | 376 | \section{Session Info} 377 | 378 | <>= 379 | toLatex(sessionInfo()) 380 | @ 381 | 382 | \bibliography{diptest} 383 | 384 | \end{document} 385 | -------------------------------------------------------------------------------- /stuff/Stuetzle-stat593-S2003-olive.dat: -------------------------------------------------------------------------------- 1 | 1 1 1075 75 226 7823 672 36 60 29 2 | 1 1 1088 73 224 7709 781 31 61 29 3 | 1 1 911 54 246 8113 549 31 63 29 4 | 1 1 966 57 240 7952 619 50 78 35 5 | 1 1 1051 67 259 7771 672 50 80 46 6 | 1 1 911 49 268 7924 678 51 70 44 7 | 1 1 922 66 264 7990 618 49 56 29 8 | 1 1 1100 61 235 7728 734 39 64 35 9 | 1 1 1082 60 239 7745 709 46 83 33 10 | 1 1 1037 55 213 7944 633 26 52 30 11 | 1 1 1051 35 219 7978 605 21 65 24 12 | 1 1 1036 59 235 7868 661 30 62 44 13 | 1 1 1074 70 214 7728 747 50 79 33 14 | 1 1 875 52 243 8018 655 41 79 32 15 | 1 1 952 49 254 7795 780 50 75 41 16 | 1 1 1155 98 201 7606 816 32 60 29 17 | 1 1 943 94 183 7840 788 42 75 31 18 | 1 1 1278 69 205 7344 957 45 70 28 19 | 1 1 961 70 195 7958 742 46 75 30 20 | 1 1 952 77 258 7820 736 43 78 33 21 | 1 1 1074 67 236 7692 716 56 83 45 22 | 1 1 995 46 288 7806 679 56 86 40 23 | 1 1 1056 53 247 7703 700 54 89 51 24 | 1 1 1065 39 234 7876 703 42 74 26 25 | 1 1 1065 45 245 7779 696 47 82 38 26 | 1 2 1315 139 230 7299 832 42 60 32 27 | 1 2 1321 136 217 7174 950 43 63 30 28 | 1 2 1359 115 246 7234 874 45 63 18 29 | 1 2 1378 111 272 7127 940 46 64 23 30 | 1 2 1295 109 245 7253 903 43 62 38 31 | 1 2 1275 121 215 7285 892 40 68 41 32 | 1 2 1336 120 318 7083 915 50 70 38 33 | 1 2 1309 122 241 7257 870 46 72 35 34 | 1 2 1340 114 189 7337 820 48 72 21 35 | 1 2 1299 116 253 7309 823 40 69 27 36 | 1 2 1221 107 221 7441 798 54 70 28 37 | 1 2 1245 72 283 7395 829 44 67 28 38 | 1 2 1285 129 244 7323 819 57 65 36 39 | 1 2 1248 107 313 7299 840 46 66 33 40 | 1 2 1356 106 236 7209 866 48 75 36 41 | 1 2 1260 102 228 7354 870 49 64 28 42 | 1 2 1261 121 312 7238 877 47 65 25 43 | 1 2 1304 124 279 7160 928 48 61 37 44 | 1 2 1344 117 287 7129 897 51 65 41 45 | 1 2 1323 96 300 7351 757 47 54 26 46 | 1 2 1292 117 215 7351 839 48 61 32 47 | 1 2 1254 118 244 7394 786 46 71 24 48 | 1 2 1312 131 259 7167 939 41 69 20 49 | 1 2 1213 109 301 7261 925 47 65 31 50 | 1 2 1359 98 351 7262 780 41 56 16 51 | 1 2 1266 97 263 7435 743 45 69 29 52 | 1 2 1298 99 311 7311 787 45 67 23 53 | 1 2 1272 116 279 7258 872 43 72 27 54 | 1 2 1278 87 332 7379 771 44 53 24 55 | 1 2 1184 112 311 7391 819 48 57 28 56 | 1 2 1382 110 268 7241 828 39 60 30 57 | 1 2 1183 146 292 7580 618 38 51 23 58 | 1 2 1261 153 219 7355 818 52 70 26 59 | 1 2 1198 136 239 7639 633 27 55 19 60 | 1 2 1225 134 232 7658 616 36 49 26 61 | 1 2 1339 166 208 7190 923 40 69 25 62 | 1 2 1132 157 240 7641 638 45 60 31 63 | 1 2 1381 183 245 7385 609 47 70 25 64 | 1 2 1409 128 257 7257 759 43 57 16 65 | 1 2 1306 127 250 7254 869 47 68 24 66 | 1 2 1372 120 250 7355 702 44 68 28 67 | 1 2 1336 113 242 7293 855 38 60 18 68 | 1 2 1401 151 238 7164 857 45 72 36 69 | 1 2 1390 119 234 7236 823 40 62 41 70 | 1 2 1432 152 281 7029 949 39 55 25 71 | 1 2 1412 124 298 7182 790 45 68 28 72 | 1 2 1366 147 291 7197 783 51 70 34 73 | 1 2 1383 118 273 7282 738 45 68 29 74 | 1 2 1283 102 263 7400 763 54 65 28 75 | 1 2 1296 136 260 7380 780 48 51 18 76 | 1 2 1287 108 287 7343 826 44 44 23 77 | 1 2 1351 159 296 7229 810 36 60 22 78 | 1 2 1241 97 268 7499 709 52 69 36 79 | 1 2 1267 101 300 7230 898 74 65 34 80 | 1 2 1235 138 252 7322 861 54 66 36 81 | 1 2 1255 103 223 7395 848 47 56 30 82 | 1 3 1454 183 196 7057 1014 27 46 19 83 | 1 3 1347 194 197 7277 895 25 46 15 84 | 1 3 1364 204 225 6929 1084 21 50 14 85 | 1 3 1410 199 216 7130 955 21 48 19 86 | 1 3 1384 178 208 7105 999 29 67 26 87 | 1 3 1412 185 217 6842 1203 34 72 32 88 | 1 3 1410 232 280 6715 1233 32 60 24 89 | 1 3 1509 209 257 6647 1240 42 62 30 90 | 1 3 1317 197 256 7036 1067 40 60 22 91 | 1 3 1286 192 203 7132 1053 38 65 28 92 | 1 3 1273 191 202 6862 1303 43 70 28 93 | 1 3 1463 183 183 6747 1307 36 60 24 94 | 1 3 1399 187 191 6861 1233 38 60 17 95 | 1 3 1413 193 208 6875 1202 30 60 18 96 | 1 3 1369 206 203 6953 1168 35 50 16 97 | 1 3 1488 172 170 6920 1144 37 54 14 98 | 1 3 1323 160 205 6911 1298 24 50 17 99 | 1 3 1311 166 170 6902 1312 41 69 28 100 | 1 3 1286 163 183 7040 1230 29 57 12 101 | 1 3 1380 173 188 7038 1139 31 44 14 102 | 1 3 1394 164 223 7086 1042 24 43 23 103 | 1 3 1324 174 198 6863 1289 36 70 21 104 | 1 3 1290 157 192 7000 1263 26 51 19 105 | 1 3 1361 163 196 6888 1273 37 58 24 106 | 1 3 1387 182 242 6913 1101 44 68 30 107 | 1 3 1369 180 181 7000 1130 39 45 24 108 | 1 3 1303 165 175 7025 1243 31 41 16 109 | 1 3 1346 160 169 7072 1151 39 48 15 110 | 1 3 1369 171 184 6937 1246 30 48 15 111 | 1 3 1305 172 169 7004 1260 28 50 11 112 | 1 3 1351 179 186 6935 1243 36 50 19 113 | 1 3 1283 151 182 7000 1271 40 52 21 114 | 1 3 1449 175 198 6883 1162 40 70 22 115 | 1 3 1310 180 183 7054 1202 26 32 12 116 | 1 3 1360 163 176 6901 1280 28 65 27 117 | 1 3 1300 187 196 6920 1253 41 76 25 118 | 1 3 1368 171 218 7010 1057 41 54 26 119 | 1 3 1207 151 156 7159 1234 27 51 14 120 | 1 3 1348 154 183 6917 1277 48 56 16 121 | 1 3 1334 186 229 7261 827 34 56 20 122 | 1 3 1301 156 207 7003 1229 41 48 14 123 | 1 3 1226 181 213 6961 1230 47 74 26 124 | 1 3 1201 168 190 7100 1216 43 64 16 125 | 1 3 1297 153 177 7004 1260 35 60 16 126 | 1 3 1248 163 158 7103 1222 31 60 14 127 | 1 3 1335 159 197 6974 1220 36 60 17 128 | 1 3 1219 167 171 7087 1254 35 50 16 129 | 1 3 1318 179 177 7030 1194 35 42 25 130 | 1 3 1264 167 166 7130 1187 22 52 12 131 | 1 3 1201 175 201 7129 1193 36 49 15 132 | 1 3 1252 180 181 7055 1214 31 59 38 133 | 1 3 1273 182 209 6965 1191 43 74 23 134 | 1 3 1351 179 170 7034 1154 35 66 10 135 | 1 3 1336 155 212 7103 1086 33 55 20 136 | 1 3 1499 201 182 6803 1204 30 56 24 137 | 1 3 1425 198 193 7032 1041 31 52 17 138 | 1 3 1358 204 227 6962 1109 41 65 34 139 | 1 3 1346 181 257 7147 933 40 60 36 140 | 1 3 1392 186 256 6732 1278 53 64 29 141 | 1 3 1311 166 222 7006 1147 41 80 27 142 | 1 3 1314 171 229 6923 1198 47 76 42 143 | 1 3 1409 200 207 6842 1224 31 60 27 144 | 1 3 1342 174 221 6993 1147 36 64 23 145 | 1 3 1387 182 206 7100 1020 34 54 17 146 | 1 3 1413 202 205 6920 1165 36 46 13 147 | 1 3 1430 209 225 6800 1200 32 59 27 148 | 1 3 1336 185 223 6956 1155 56 73 16 149 | 1 3 1372 200 200 6916 1189 33 50 22 150 | 1 3 1330 157 228 7055 1108 42 55 25 151 | 1 3 1412 207 208 6822 1239 36 51 28 152 | 1 3 1321 209 217 6948 1178 42 62 23 153 | 1 3 1401 200 217 6980 1073 40 68 21 154 | 1 3 1401 214 217 6734 1293 44 69 27 155 | 1 3 1457 168 242 6724 1266 54 59 30 156 | 1 3 1451 199 221 6835 1177 37 51 29 157 | 1 3 1438 206 248 6806 1183 34 57 28 158 | 1 3 1462 204 237 6644 1309 42 54 28 159 | 1 3 1529 215 203 6602 1310 45 69 27 160 | 1 3 1510 189 245 6752 1188 36 52 28 161 | 1 3 1437 222 184 6803 1240 43 56 16 162 | 1 3 1327 129 247 7024 1157 38 56 22 163 | 1 3 1438 172 252 6630 1380 40 64 24 164 | 1 3 1447 176 189 6849 1180 42 64 26 165 | 1 3 1355 144 214 6972 1198 33 60 24 166 | 1 3 1369 156 241 6890 1209 42 63 30 167 | 1 3 1471 188 276 6697 1269 34 51 16 168 | 1 3 1456 179 240 6738 1267 41 65 14 169 | 1 3 1314 140 207 7020 1220 28 59 12 170 | 1 3 1408 176 192 6909 1195 45 50 25 171 | 1 3 1397 172 191 7107 1018 36 50 29 172 | 1 3 1413 191 186 6937 1180 31 46 13 173 | 1 3 1539 194 213 6764 1178 38 58 16 174 | 1 3 1304 159 234 7019 1174 38 53 19 175 | 1 3 1341 160 231 7033 1069 40 67 33 176 | 1 3 1508 208 249 6641 1311 25 43 20 177 | 1 3 1515 226 257 6595 1287 41 63 16 178 | 1 3 1262 165 235 7120 1113 32 51 21 179 | 1 3 1307 197 238 7003 1144 37 50 24 180 | 1 3 1294 159 253 7009 1190 30 52 13 181 | 1 3 1460 187 215 6843 1172 35 56 32 182 | 1 3 1476 187 203 6837 1197 36 48 22 183 | 1 3 1482 178 197 6814 1201 40 64 24 184 | 1 3 1388 176 185 7008 1111 48 53 31 185 | 1 3 1367 172 235 7066 1054 35 45 26 186 | 1 3 1272 207 205 7152 1098 37 52 22 187 | 1 3 1323 157 234 7132 1022 38 58 31 188 | 1 3 1206 218 242 7193 1002 37 54 25 189 | 1 3 1383 157 217 7018 1090 40 60 37 190 | 1 3 1521 190 238 6956 986 36 50 23 191 | 1 3 1350 168 227 6986 1165 29 58 17 192 | 1 3 1422 181 218 6813 1230 30 59 21 193 | 1 3 1298 166 224 6986 1162 34 65 31 194 | 1 3 1447 236 245 6607 1336 33 51 21 195 | 1 3 1347 197 211 6795 1300 32 59 34 196 | 1 3 1339 170 253 6989 1110 29 63 23 197 | 1 3 1388 183 216 6867 1208 28 61 21 198 | 1 3 1527 260 232 6488 1370 31 45 20 199 | 1 3 1495 237 236 6571 1318 32 58 26 200 | 1 3 1487 246 251 6504 1390 29 53 19 201 | 1 3 1399 180 232 6855 1190 32 66 22 202 | 1 3 1489 215 242 6777 1145 30 60 22 203 | 1 3 1339 166 226 6928 1198 30 60 23 204 | 1 3 1482 246 238 6444 1462 27 50 20 205 | 1 3 1434 172 255 6646 1354 27 59 25 206 | 1 3 1347 156 214 6850 1313 25 48 19 207 | 1 3 1340 158 233 6848 1272 32 63 25 208 | 1 3 1453 180 244 6752 1238 34 54 23 209 | 1 3 1306 149 226 7082 1097 33 61 24 210 | 1 3 1349 161 217 6997 1138 31 62 23 211 | 1 3 1254 151 205 7319 947 28 54 23 212 | 1 3 1168 144 220 7230 1109 31 52 28 213 | 1 3 1346 167 224 6959 1111 30 49 23 214 | 1 3 1390 184 212 6898 1189 29 44 19 215 | 1 3 1283 149 224 7077 1104 30 57 32 216 | 1 3 1214 137 232 7269 1005 32 55 23 217 | 1 3 1491 227 205 6941 988 33 68 34 218 | 1 3 1479 218 207 7039 887 36 65 36 219 | 1 3 1445 174 228 6875 1123 29 69 31 220 | 1 3 1439 183 218 6775 1226 32 66 29 221 | 1 3 1387 154 204 6991 1090 34 74 32 222 | 1 3 1426 169 192 7025 1043 31 64 27 223 | 1 3 1451 200 208 6980 1006 30 62 31 224 | 1 3 1493 204 188 6913 1044 32 61 35 225 | 1 3 1419 192 207 6996 1014 36 70 36 226 | 1 3 1342 177 199 7172 952 34 65 33 227 | 1 3 1349 152 236 7145 949 35 75 29 228 | 1 3 1440 196 208 6938 1070 32 61 26 229 | 1 3 1460 215 197 6918 1081 28 55 23 230 | 1 3 1249 133 205 7417 827 33 72 33 231 | 1 3 1348 159 238 7017 1081 31 67 25 232 | 1 3 1341 155 244 6958 1144 32 68 26 233 | 1 3 1398 149 204 7182 907 29 76 30 234 | 1 3 1454 200 199 6910 1090 30 62 25 235 | 1 3 1334 153 219 6928 1214 33 66 24 236 | 1 3 1438 204 189 7107 910 33 63 27 237 | 1 3 1303 138 212 7170 1016 34 69 25 238 | 1 3 1323 147 210 7108 1070 33 61 20 239 | 1 3 1417 169 207 6875 1184 34 57 27 240 | 1 3 1360 167 225 6883 1220 31 55 27 241 | 1 3 1420 179 214 6923 1121 33 56 27 242 | 1 3 1472 218 214 6724 1238 29 53 23 243 | 1 3 1368 174 205 7042 1066 31 57 26 244 | 1 3 1367 173 228 6948 1141 32 53 24 245 | 1 3 1403 173 209 6843 1210 33 63 33 246 | 1 3 1413 197 206 6737 1387 34 60 31 247 | 1 3 1201 138 207 7011 1269 37 64 35 248 | 1 3 1359 180 207 6895 1203 33 61 30 249 | 1 3 1518 198 225 6681 1243 29 57 24 250 | 1 3 1434 185 189 6771 1269 30 62 25 251 | 1 3 1367 162 179 6772 1368 33 64 27 252 | 1 3 1461 181 197 6783 1246 26 57 23 253 | 1 3 1368 161 198 7030 1095 33 59 31 254 | 1 3 1419 159 215 6862 1193 35 60 31 255 | 1 3 1514 162 298 6725 1119 45 93 30 256 | 1 3 1328 171 253 6987 1030 38 83 39 257 | 1 3 1469 160 337 6675 1127 44 94 36 258 | 1 4 1222 133 227 7425 824 36 69 35 259 | 1 4 1639 172 331 6510 1124 46 91 32 260 | 1 4 1345 133 272 6801 1194 48 83 37 261 | 1 4 1339 170 275 6838 1060 46 88 43 262 | 1 4 1194 135 263 7277 889 44 95 41 263 | 1 4 1112 68 375 7770 448 52 69 45 264 | 1 4 1222 70 329 7605 566 48 67 43 265 | 1 4 1136 72 341 7616 661 49 65 32 266 | 1 4 926 41 277 7815 784 45 65 25 267 | 1 4 1105 69 373 7714 532 51 68 37 268 | 1 4 1109 79 305 7576 763 45 64 36 269 | 1 4 1284 93 265 7235 893 43 77 46 270 | 1 4 1120 69 277 7416 946 42 59 36 271 | 1 4 916 52 281 7870 694 42 64 58 272 | 1 4 905 49 288 7747 812 49 71 56 273 | 1 4 1206 55 287 7329 935 44 74 42 274 | 1 4 1457 182 267 7020 863 41 84 37 275 | 1 4 1327 140 193 7328 823 36 87 35 276 | 1 4 1303 100 251 7045 1049 40 86 40 277 | 1 4 1444 175 259 6876 1027 34 78 32 278 | 1 4 1505 243 226 6962 858 30 72 27 279 | 1 4 1429 162 223 6917 1041 37 77 40 280 | 1 4 1491 162 211 6994 928 37 97 38 281 | 1 4 1393 128 211 7189 870 38 93 40 282 | 1 4 1404 134 210 7110 923 40 101 43 283 | 1 4 1222 130 214 7374 856 38 89 45 284 | 1 4 1153 74 316 7593 705 42 64 32 285 | 1 4 1169 76 307 7553 728 43 69 32 286 | 1 4 1369 104 237 7375 775 39 70 15 287 | 1 4 993 58 267 7743 773 41 62 44 288 | 1 4 980 53 254 7719 815 44 69 47 289 | 1 4 967 55 273 7692 833 45 63 47 290 | 1 4 1128 73 354 7527 728 44 76 38 291 | 1 4 1188 85 273 7445 814 44 73 42 292 | 1 4 1257 95 247 7405 812 43 70 35 293 | 1 4 1262 88 301 7471 704 43 71 31 294 | 1 3 1283 153 196 7107 1115 37 60 28 295 | 1 3 1263 155 199 7140 1148 31 42 18 296 | 1 3 1369 158 215 7160 958 38 69 32 297 | 1 3 1353 172 175 6965 1212 28 75 19 298 | 1 3 1187 139 185 7427 952 29 56 22 299 | 1 3 1732 231 156 6437 1313 45 62 23 300 | 1 3 1620 255 166 6628 1212 29 62 27 301 | 1 3 1543 172 193 6740 1157 52 87 34 302 | 1 3 1498 170 195 6804 1206 35 66 23 303 | 1 3 1399 169 171 7011 1100 36 72 16 304 | 1 3 1293 156 191 7101 1111 32 60 31 305 | 1 3 1420 175 152 7004 1149 27 50 20 306 | 1 3 1721 238 255 6300 1350 35 70 28 307 | 1 3 1742 221 156 6415 1315 43 82 23 308 | 1 3 1391 187 189 6975 1062 52 70 45 309 | 1 3 1517 206 249 6680 1205 33 80 27 310 | 1 3 1269 157 193 7140 1148 31 40 18 311 | 1 3 1577 204 208 6732 1183 20 52 20 312 | 1 3 1590 241 195 6705 1149 27 68 21 313 | 1 3 1621 280 197 6608 1179 28 58 27 314 | 1 3 1753 275 236 6367 1214 23 61 27 315 | 1 3 1679 260 177 6568 1191 30 59 33 316 | 1 3 1419 203 176 6973 1083 38 78 27 317 | 1 3 1693 236 174 6499 1204 51 102 37 318 | 1 3 1692 270 234 6499 1196 31 59 15 319 | 1 3 1638 252 215 6570 1199 39 53 29 320 | 1 3 1497 247 219 6621 1270 36 73 32 321 | 1 3 1442 222 194 6677 1314 36 72 38 322 | 1 3 1680 270 170 6440 1310 31 62 28 323 | 1 3 1463 164 185 6909 1154 49 58 17 324 | 2 5 1129 120 222 7272 1112 43 98 2 325 | 2 5 1042 135 210 7376 1116 35 90 3 326 | 2 5 1103 96 210 7380 1085 32 94 3 327 | 2 5 1118 97 221 7279 1154 35 94 2 328 | 2 5 1052 95 215 7388 1126 31 92 1 329 | 2 5 1116 102 231 7290 1168 26 66 1 330 | 2 5 1108 132 231 7319 1101 20 66 2 331 | 2 5 1129 108 212 7386 1074 28 62 3 332 | 2 5 1085 91 223 7384 1126 28 62 3 333 | 2 5 1104 103 233 7322 1147 27 61 2 334 | 2 5 1098 88 212 7338 1140 28 67 1 335 | 2 6 1135 98 251 7120 1314 20 61 2 336 | 2 6 1158 108 245 7065 1326 22 75 1 337 | 2 6 1133 110 241 7080 1342 21 68 3 338 | 2 6 1095 125 250 7120 1305 21 83 1 339 | 2 6 1201 87 238 6990 1383 25 75 3 340 | 2 6 1213 112 245 7007 1335 22 65 3 341 | 2 5 1108 92 231 7367 1110 29 62 3 342 | 2 5 1075 103 207 7413 1096 32 68 2 343 | 2 5 1059 96 228 7386 1128 25 72 2 344 | 2 5 1176 92 207 7347 1057 35 82 1 345 | 2 5 1159 98 213 7320 1108 38 64 1 346 | 2 5 1132 80 201 7398 1095 27 67 2 347 | 2 5 1107 75 220 7399 1096 29 90 1 348 | 2 5 1092 104 234 7355 1126 28 58 2 349 | 2 5 1119 81 219 7409 1057 33 81 2 350 | 2 5 1106 93 212 7381 1104 35 68 1 351 | 2 5 1047 101 238 7385 1120 28 89 1 352 | 2 5 1165 99 214 7331 1101 22 67 3 353 | 2 5 1158 84 201 7327 1123 29 77 2 354 | 2 5 1095 88 203 7415 1093 37 78 1 355 | 2 5 1176 75 205 7396 1107 33 74 2 356 | 2 5 1103 109 220 7335 1140 28 59 2 357 | 2 5 1112 92 209 7356 1125 32 73 2 358 | 2 5 1091 93 222 7377 1113 20 53 2 359 | 2 5 1080 98 219 7371 1125 33 78 1 360 | 2 5 1051 108 227 7403 1114 30 66 3 361 | 2 5 1096 84 211 7415 1091 30 71 2 362 | 2 5 1142 97 225 7341 1101 28 65 1 363 | 2 5 1047 96 236 7399 1107 32 80 3 364 | 2 5 1114 86 210 7359 1116 31 83 2 365 | 2 5 1140 93 241 7324 1098 23 74 1 366 | 2 5 1075 91 200 7410 1107 36 80 1 367 | 2 5 1092 106 219 7427 1125 33 77 1 368 | 2 5 1076 95 204 7408 1130 27 79 2 369 | 2 5 1178 89 201 7381 1099 34 87 2 370 | 2 5 1095 104 223 7367 1111 43 56 2 371 | 2 6 1166 97 272 6971 1390 20 83 3 372 | 2 6 1154 119 257 7130 1253 22 61 1 373 | 2 6 1177 111 241 6882 1470 22 95 2 374 | 2 6 1160 96 240 7043 1357 24 79 2 375 | 2 6 1122 104 241 7145 1313 15 58 1 376 | 2 6 1132 99 257 7065 1362 24 90 3 377 | 2 6 1096 100 260 7162 1282 25 74 2 378 | 2 6 1131 87 233 7144 1307 25 72 3 379 | 2 6 1184 105 258 7020 1340 26 66 2 380 | 2 6 1135 94 235 7123 1320 24 67 2 381 | 2 6 1084 96 240 7164 1330 28 57 1 382 | 2 6 1086 127 252 7159 1285 28 62 2 383 | 2 6 1140 95 258 7085 1347 23 71 3 384 | 2 6 1138 101 254 7103 1310 25 68 1 385 | 2 6 1159 110 261 7068 1297 27 77 2 386 | 2 5 1051 78 211 7421 1146 30 82 2 387 | 2 5 1048 79 213 7439 1130 28 61 2 388 | 2 5 1061 86 220 7421 1102 29 79 3 389 | 2 5 1105 88 210 7353 1142 28 72 1 390 | 2 5 1145 35 237 7208 1118 20 46 2 391 | 2 5 1049 96 219 7303 1168 22 47 2 392 | 2 5 1105 120 218 7302 1158 23 45 3 393 | 2 5 1030 84 214 7403 1177 21 70 1 394 | 2 5 1070 98 215 7280 1240 28 68 3 395 | 2 5 1103 81 208 7310 1177 30 90 3 396 | 2 5 1040 101 205 7368 1176 25 85 3 397 | 2 5 1100 95 210 7320 1113 22 72 3 398 | 2 5 1118 85 199 7415 1060 36 86 3 399 | 2 5 1065 98 230 7345 1163 24 74 1 400 | 2 5 1131 78 221 7358 1120 22 69 2 401 | 2 5 1080 120 218 7296 1145 35 105 2 402 | 2 5 1075 86 231 7403 1109 22 73 3 403 | 2 5 1040 103 228 7364 1173 25 66 2 404 | 2 5 1128 82 203 7320 1148 30 88 1 405 | 2 5 1060 111 231 7363 1149 20 65 1 406 | 2 5 1103 78 220 7365 1149 20 65 2 407 | 2 5 1110 91 201 7318 1185 24 74 2 408 | 2 5 1091 108 218 7383 1183 28 88 3 409 | 2 5 1094 96 220 7341 1127 26 96 2 410 | 2 6 1131 87 208 7170 1308 28 57 2 411 | 2 6 1175 108 214 7076 1307 33 85 2 412 | 2 6 1076 77 202 7243 1305 29 67 1 413 | 2 6 1120 90 240 7068 1383 23 75 1 414 | 2 6 1152 111 238 7080 1372 25 81 2 415 | 2 6 1141 95 250 7035 1388 22 68 2 416 | 2 6 1098 103 267 7135 1301 24 76 2 417 | 2 6 1126 100 236 7062 1380 26 69 1 418 | 2 6 1087 89 243 7200 1302 18 60 1 419 | 2 6 1115 96 236 7085 1372 20 75 2 420 | 2 6 1178 92 241 7006 1376 22 84 1 421 | 2 6 1162 106 242 7025 1368 25 71 2 422 | 3 9 1085 70 180 7955 605 20 50 1 423 | 3 9 1085 70 185 7955 600 25 55 1 424 | 3 9 1090 60 190 7950 600 28 47 2 425 | 3 9 1080 65 189 7960 602 35 20 1 426 | 3 9 1090 60 195 7955 600 28 42 2 427 | 3 9 1105 55 200 7900 600 37 55 2 428 | 3 9 1060 75 175 7975 610 20 55 2 429 | 3 9 1050 70 170 7977 605 28 65 1 430 | 3 9 1100 55 198 7905 600 35 50 3 431 | 3 9 1065 65 178 7965 605 22 65 2 432 | 3 9 1085 60 188 7955 602 30 50 2 433 | 3 9 1080 65 180 7960 605 25 55 1 434 | 3 9 1085 60 190 7955 602 30 53 1 435 | 3 9 1075 68 195 7960 602 20 40 3 436 | 3 9 1090 58 192 7950 600 35 40 3 437 | 3 9 1095 60 198 7945 600 38 34 2 438 | 3 9 1090 58 195 7950 600 30 42 2 439 | 3 9 1095 58 198 7950 602 35 32 1 440 | 3 9 1090 58 195 7940 600 35 42 2 441 | 3 9 1095 58 198 7945 600 35 34 1 442 | 3 9 1095 55 200 7940 600 35 45 3 443 | 3 9 1080 70 188 7965 608 28 36 3 444 | 3 9 1090 60 195 7950 600 32 38 2 445 | 3 9 1105 55 200 7900 595 39 56 1 446 | 3 9 1110 50 205 7900 595 40 52 1 447 | 3 9 1075 70 198 7978 608 28 33 2 448 | 3 9 1075 65 185 7980 608 35 42 3 449 | 3 9 1065 75 180 7975 610 25 50 3 450 | 3 9 1070 75 188 7980 602 22 45 2 451 | 3 9 1070 75 188 7980 602 22 45 1 452 | 3 9 1100 70 200 7910 610 39 44 1 453 | 3 9 1075 70 185 7960 610 22 58 2 454 | 3 9 1050 78 175 7990 610 18 59 3 455 | 3 9 1090 60 198 7945 600 32 35 2 456 | 3 9 1050 78 188 7990 608 28 23 3 457 | 3 9 1075 70 190 7975 605 28 27 3 458 | 3 9 1098 54 202 7945 595 42 32 2 459 | 3 9 1105 15 198 8005 575 52 20 2 460 | 3 9 1110 75 220 7915 510 55 65 2 461 | 3 9 1058 50 178 7988 626 40 55 3 462 | 3 9 1115 30 225 7955 600 55 15 2 463 | 3 9 1105 30 198 7995 570 52 20 3 464 | 3 9 1072 49 178 7980 615 48 48 2 465 | 3 9 1110 15 210 7990 570 50 20 2 466 | 3 9 1110 80 215 7910 525 50 60 1 467 | 3 9 1055 60 175 7985 620 45 50 1 468 | 3 9 1100 80 215 7930 535 45 60 2 469 | 3 9 1105 55 205 7965 600 25 20 2 470 | 3 9 1095 50 210 7948 600 25 35 2 471 | 3 9 1110 50 220 7950 600 52 10 2 472 | 3 9 1092 37 210 7955 600 40 40 3 473 | 3 7 1290 60 260 7550 670 70 100 2 474 | 3 7 1170 80 230 7690 720 40 70 1 475 | 3 7 1100 90 250 7680 760 30 80 2 476 | 3 7 1120 70 240 7720 730 40 80 2 477 | 3 7 1160 70 250 7650 750 30 90 1 478 | 3 7 1200 50 210 7770 690 20 50 3 479 | 3 7 1140 50 200 7990 580 10 20 1 480 | 3 7 1220 80 240 7610 760 30 60 2 481 | 3 7 1180 90 250 7520 800 50 100 2 482 | 3 7 1210 70 250 7560 780 40 90 2 483 | 3 7 1220 80 220 7540 770 60 100 2 484 | 3 7 1180 100 190 7520 820 50 100 1 485 | 3 7 1160 90 220 7580 790 40 90 1 486 | 3 7 1130 100 240 7620 780 30 90 1 487 | 3 7 1080 100 260 7710 750 20 70 2 488 | 3 7 1090 90 280 7730 720 50 100 1 489 | 3 7 1020 100 270 7770 710 40 90 1 490 | 3 7 1090 90 250 7680 760 60 80 1 491 | 3 7 1120 100 260 7720 680 30 80 2 492 | 3 7 1080 80 240 7830 670 30 70 2 493 | 3 7 1160 70 230 7860 640 10 20 1 494 | 3 7 1100 80 240 7820 670 20 70 2 495 | 3 7 1050 100 250 7930 630 10 30 3 496 | 3 7 1090 90 270 7780 690 30 50 3 497 | 3 7 1120 80 260 7750 680 30 80 3 498 | 3 7 1120 100 250 7680 730 40 70 2 499 | 3 7 1190 90 230 7670 710 30 80 2 500 | 3 7 1170 110 250 7620 740 20 90 1 501 | 3 7 1120 100 230 7720 730 20 70 1 502 | 3 7 1190 80 270 7690 720 10 40 2 503 | 3 7 1400 90 270 7420 800 0 20 2 504 | 3 7 1350 80 250 7520 760 10 30 1 505 | 3 7 1090 60 220 7890 670 10 60 2 506 | 3 7 1150 90 230 7790 650 30 60 1 507 | 3 7 1240 90 220 7820 590 10 30 1 508 | 3 7 1220 100 240 7890 530 0 10 2 509 | 3 7 1180 80 250 7870 580 10 30 2 510 | 3 7 1170 110 240 7730 630 30 90 1 511 | 3 7 1170 100 280 7710 640 20 70 3 512 | 3 7 1180 80 220 7790 680 10 40 1 513 | 3 7 1200 90 240 7820 590 10 50 2 514 | 3 7 1140 90 240 7880 570 20 60 3 515 | 3 7 1160 70 210 7870 580 30 80 3 516 | 3 7 1130 80 250 7780 650 40 60 3 517 | 3 7 1150 80 240 7800 630 30 70 2 518 | 3 7 1110 70 240 7820 670 20 70 3 519 | 3 7 1150 70 220 7850 620 20 40 2 520 | 3 7 1180 80 240 7760 670 20 50 2 521 | 3 7 1020 80 250 7920 680 10 30 3 522 | 3 7 610 80 230 8410 650 0 20 3 523 | 3 8 1190 150 290 7340 1020 0 10 2 524 | 3 8 1110 130 210 7550 1000 0 0 1 525 | 3 8 1020 100 220 7530 1030 0 0 3 526 | 3 8 1070 120 210 7600 990 0 10 3 527 | 3 8 1010 90 350 7480 1050 10 10 1 528 | 3 8 1060 140 240 7680 830 10 40 2 529 | 3 8 1060 140 270 7620 880 10 20 1 530 | 3 8 1030 100 230 7740 900 0 0 2 531 | 3 8 1120 130 250 7530 970 0 0 3 532 | 3 8 1030 110 220 7760 980 0 0 2 533 | 3 8 1070 100 230 7600 990 10 0 1 534 | 3 8 1140 180 220 7610 850 10 10 2 535 | 3 8 1090 180 230 7590 860 10 40 2 536 | 3 8 980 110 300 7720 910 10 0 3 537 | 3 8 980 90 330 7540 1040 0 0 2 538 | 3 8 960 90 200 7810 940 0 0 2 539 | 3 8 990 90 210 7780 930 0 0 2 540 | 3 8 1060 120 210 7600 1010 0 0 1 541 | 3 8 1240 150 250 7610 730 10 10 1 542 | 3 8 1060 90 310 7850 690 0 0 2 543 | 3 8 1020 100 290 7620 960 0 10 2 544 | 3 8 970 90 220 7700 1020 0 0 3 545 | 3 8 1180 130 220 7450 1010 0 10 2 546 | 3 8 1060 140 240 7690 850 10 10 1 547 | 3 8 990 100 250 7630 1030 0 0 3 548 | 3 8 1010 90 350 7630 940 10 0 3 549 | 3 8 1040 90 250 7780 820 10 10 1 550 | 3 8 1040 90 250 7810 810 10 10 2 551 | 3 8 1020 90 350 7620 920 10 0 3 552 | 3 8 1020 90 260 7620 1010 0 0 3 553 | 3 8 1010 90 350 7610 930 10 0 3 554 | 3 8 920 110 340 7720 910 0 0 3 555 | 3 8 1030 100 250 7710 900 0 10 2 556 | 3 8 960 90 300 7820 830 0 0 3 557 | 3 8 1030 110 210 7810 840 0 0 1 558 | 3 8 1010 100 240 7710 910 10 20 2 559 | 3 8 1020 90 240 7800 850 0 0 2 560 | 3 8 1120 90 300 7650 830 0 10 1 561 | 3 8 1090 90 290 7710 800 10 0 2 562 | 3 8 1100 120 280 7630 770 10 10 2 563 | 3 8 1090 80 240 7820 760 10 0 2 564 | 3 8 1150 90 250 7720 810 0 10 3 565 | 3 8 1110 90 230 7810 750 0 10 2 566 | 3 8 1010 110 210 7720 950 0 0 1 567 | 3 8 1070 100 220 7730 870 10 10 2 568 | 3 8 1280 110 290 7490 790 10 10 2 569 | 3 8 1060 100 270 7740 810 10 10 3 570 | 3 8 1010 90 210 7720 970 0 0 2 571 | 3 8 990 120 250 7750 870 10 10 2 572 | 3 8 960 80 240 7950 740 10 20 2 573 | --------------------------------------------------------------------------------