├── tests
    ├── ex1.Rout.save
    ├── sim1.Rout.save
    ├── mechler-ex.Rout.save
    ├── mechler-ex.R
    ├── sim1.R
    ├── sim1.Rout.save-32b
    ├── ex1.R
    ├── sim1.Rout.save-64b
    ├── mechler-ex.Rout.save-32b
    ├── mechler-ex.Rout.save-64b
    ├── ex1.Rout.save-32b
    └── ex1.Rout.save-64b
├── inst
    ├── doc
    │   └── hist-D11.rda
    ├── extraData
    │   └── qDiptab.rds
    └── NEWS.Rd
├── .Rinstignore
├── ChangeLog
├── data
    ├── qDiptab.R
    ├── exHartigan.R
    └── statfaculty.R
├── stuff
    ├── statlib_217_v2005.f
    ├── minProb.rds
    ├── asymp-res.rda
    ├── hist-D11.rda
    ├── d10k_do.R
    ├── d16k_do.R
    ├── d20k_do.R
    ├── d24k_do.R
    ├── d32k_do.R
    ├── d36k_do.R
    ├── d40k_do.R
    ├── d44k_do.R
    ├── d52k_do.R
    ├── d60k_do.R
    ├── d6k_do.R
    ├── d72k_do.R
    ├── d8k_do.R
    ├── d12k_do.R
    ├── sim-minProb.R
    ├── dip_N_do.R
    ├── qdipTab-use.R
    ├── new-simul1e5.R
    ├── Stuetzle-stat593-S2003-olive.doc
    ├── new-simul.R
    ├── qdipTab-large-n-expand.R
    ├── sim-minProb.Rout
    ├── minProb-anal.R
    ├── jeremy-unimodality-olives.R
    ├── pre-bugfix-2003-11
    │   ├── dip.S
    │   └── dipF-statlib_2005-08-04.f
    ├── new-sim-analysis.R
    ├── unimodality.Rnw
    ├── asymp-distrib.R
    ├── jeremy-unimodality.R
    └── Stuetzle-stat593-S2003-olive.dat
├── .gitignore
├── .Rbuildignore
├── NAMESPACE
├── man
    ├── exHartigan.Rd
    ├── statfaculty.Rd
    ├── plot.dip.Rd
    ├── qDiptab.Rd
    ├── dip.test.Rd
    └── dip.Rd
├── TODO.md
├── DESCRIPTION
├── README.md
├── R
    ├── dipTest.R
    └── dip.R
├── vignettes
    ├── diptest.bib
    ├── myVignette.sty
    └── diptest-issues.Rnw
└── src
    └── dip.c


/tests/ex1.Rout.save:
--------------------------------------------------------------------------------
1 | ex1.Rout.save-64b


--------------------------------------------------------------------------------
/tests/sim1.Rout.save:
--------------------------------------------------------------------------------
1 | sim1.Rout.save-64b


--------------------------------------------------------------------------------
/inst/doc/hist-D11.rda:
--------------------------------------------------------------------------------
1 | ../../stuff/hist-D11.rda


--------------------------------------------------------------------------------
/.Rinstignore:
--------------------------------------------------------------------------------
1 | inst/doc/Makefile
2 | inst/doc/.*sty
3 | 


--------------------------------------------------------------------------------
/tests/mechler-ex.Rout.save:
--------------------------------------------------------------------------------
1 | mechler-ex.Rout.save-64b


--------------------------------------------------------------------------------
/ChangeLog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmaechler/diptest/HEAD/ChangeLog


--------------------------------------------------------------------------------
/data/qDiptab.R:
--------------------------------------------------------------------------------
1 | qDiptab <- diptest:::rdRDS("extraData", "qDiptab.rds")
2 | 


--------------------------------------------------------------------------------
/stuff/statlib_217_v2005.f:
--------------------------------------------------------------------------------
1 | pre-bugfix-2003-11/dipF-statlib_2005-08-04.f


--------------------------------------------------------------------------------
/stuff/minProb.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmaechler/diptest/HEAD/stuff/minProb.rds


--------------------------------------------------------------------------------
/stuff/asymp-res.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmaechler/diptest/HEAD/stuff/asymp-res.rda


--------------------------------------------------------------------------------
/stuff/hist-D11.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmaechler/diptest/HEAD/stuff/hist-D11.rda


--------------------------------------------------------------------------------
/inst/extraData/qDiptab.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmaechler/diptest/HEAD/inst/extraData/qDiptab.rds


--------------------------------------------------------------------------------
/stuff/d10k_do.R:
--------------------------------------------------------------------------------
1 | N <- 10000
2 | n.sim <- 1000001
3 | set.seed(14)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d16k_do.R:
--------------------------------------------------------------------------------
1 | N <- 16000
2 | n.sim <- 1000001
3 | set.seed(117)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d20k_do.R:
--------------------------------------------------------------------------------
1 | N <- 20000
2 | n.sim <- 1000001
3 | set.seed(12)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d24k_do.R:
--------------------------------------------------------------------------------
1 | N <- 24000
2 | n.sim <- 1000001
3 | set.seed(14)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d32k_do.R:
--------------------------------------------------------------------------------
1 | N <- 32000
2 | n.sim <- 1000001
3 | set.seed(14)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d36k_do.R:
--------------------------------------------------------------------------------
1 | N <- 36000
2 | n.sim <- 1000001
3 | set.seed(47)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d40k_do.R:
--------------------------------------------------------------------------------
1 | N <- 40000
2 | n.sim <- 1000001
3 | set.seed(67)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d44k_do.R:
--------------------------------------------------------------------------------
1 | N <- 44000
2 | n.sim <- 1000001
3 | set.seed(14)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d52k_do.R:
--------------------------------------------------------------------------------
1 | N <- 52000
2 | n.sim <- 1000001
3 | set.seed(109)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d60k_do.R:
--------------------------------------------------------------------------------
1 | N <- 60000
2 | n.sim <- 1000001
3 | set.seed(97)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d6k_do.R:
--------------------------------------------------------------------------------
1 | N <- 6000
2 | n.sim <- 1000001
3 | set.seed(64)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d72k_do.R:
--------------------------------------------------------------------------------
1 | N <- 72000
2 | n.sim <- 1000001
3 | set.seed(43)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/stuff/d8k_do.R:
--------------------------------------------------------------------------------
1 | N <- 20000
2 | n.sim <- 1000001
3 | set.seed(12)
4 | source("/u/maechler/R/Pkgs/diptest/stuff/dip_N_do.R", echo=TRUE)
5 | 
6 | 


--------------------------------------------------------------------------------
/data/exHartigan.R:
--------------------------------------------------------------------------------
1 | message("'exHartigan' data is identical to 'statfaculty' and hence deprecated.",
2 |         "\n Use the 'statfaculty' instead")
3 | source("statfaculty.R")
4 | exHartigan <- statfaculty
5 | 


--------------------------------------------------------------------------------
/data/statfaculty.R:
--------------------------------------------------------------------------------
1 | statfaculty <-
2 |   c(30,33,35,36,37,37,39,39,39,39,39,40,40,40,40,41,42,43,43,43,44,44,45,45,46,
3 |     46,47,47,48,48,48,49,50,50,51,52,52,53,53,53,53,53,54,54,57,57,59,60,60,60,
4 |     61,61,61,61,62,62,62,62,63,66,70,72,72)
5 | 
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | Archive*.txt
 2 | *M1mac*
 3 | diptest*.out
 4 | stuff/pre-bugfix-2003-11
 5 | stuff/*.out
 6 | stuff/*.tex
 7 | stuff/*.Rout*
 8 | stuff/dip[0-9]*.rda
 9 | stuff/dipSim*.rda
10 | inst/doc/diptest-issues*
11 | inst/doc/*.bib
12 | inst/doc/*.sty
13 | TAGS
14 | *~
15 | *,v*
16 | *-ss
17 | vignettes/cache/*
18 | .Rhistory
19 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ## -*-sh-*-  using reg.exp (==> no initial or final '.*'),  but case is ignored!
 2 | 
 3 | stuff
 4 | ^M1mac
 5 | ^TODO
 6 | ,v$
 7 | -ss$
 8 | tests/.*64b
 9 | tests/.*32b
10 | vignettes/auto
11 | inst/doc/auto
12 | vignettes/.*\.(tex|toc|aux|log|bbl|blg|out)$
13 | inst/doc/.*\.(tex|toc|aux|log|bbl|blg|out)$
14 | inst/doc/jss.
15 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | useDynLib(diptest, .registration=TRUE)# <--> src/dip.c 2nd part
 2 | 
 3 | importFrom("graphics",
 4 |            abline, axis,
 5 |            legend, lines,
 6 |            par,
 7 |            title)
 8 | 
 9 | importFrom("stats",
10 |            approx, complete.cases, ecdf, runif)
11 | 
12 | export(dip)
13 | export(dip.test)
14 | ## export(aLine, getCM)
15 | 
16 | S3method(print, dip)
17 | S3method(plot,  dip)
18 | 
19 | 


--------------------------------------------------------------------------------
/man/exHartigan.Rd:
--------------------------------------------------------------------------------
 1 | \name{exHartigan}
 2 | \alias{exHartigan}
 3 | \title{Hartigan's Artificial n-modal Example Data Set}
 4 | \description{
 5 |  63 (integer) numbers; unimodal or bimodal, that's the question.
 6 | 
 7 |  This is now \emph{deprecated}.
 8 |  Please use \code{\link{statfaculty}} instead!
 9 | }
10 | \examples{
11 | data(exHartigan)
12 | plot(dH <- density(exHartigan))
13 | rug(exHartigan)# should jitter
14 | }
15 | \keyword{data}
16 | 


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | - The vignette `inst/doc/diptest-issues.Rnw`
 2 |   has mentioned the "new" (May 2011)  dip.test() function;
 3 |   Still have not finalized *analyzing* the simulations in ./stuff/
 4 | 
 5 | - Consider an analogue qnormDiptab  which is constructed using
 6 |    `rnorm(.)` instead of `runif(.)` simulations.
 7 |     - This idea is old; Werner Stuetzle's student, Jeremy Tantrum, 
 8 | 	  did things in this direction in 2003; see, `stuff/jeremy-unimodality.R`
 9 | 
10 | - Visualize the l.c.m. and g.c.m. and the modal interval !
11 | 	 g.c.m = greatest convex minorant =: G(x)
12 |      l.c.m = least   concave majorant =: H(x)
13 | 


--------------------------------------------------------------------------------
/stuff/d12k_do.R:
--------------------------------------------------------------------------------
 1 | stopifnot(require("diptest"))
 2 | setwd("/u/maechler/R/Pkgs/diptest/stuff")
 3 | 
 4 | N <- 12000
 5 | n.sim <- 1000001
 6 | dd <- numeric(n.sim)
 7 | .pt <- proc.time()
 8 | 
 9 | set.seed(12)
10 | for(i in 1:n.sim) {
11 |     dd[i] <- dip(runif(N))
12 |     if(i %% 100 == 0) {
13 |         cat(".")
14 |         if(i %% 10000 == 0) {
15 |             cat("",i,"\n")
16 |             .opt <- .pt ; .pt <- proc.time(); .pt - .opt
17 |         }
18 |     }
19 | }
20 | 
21 | nam <- paste("dip", floor(N/1000),"k", sep='')
22 | assign(nam, dd)
23 | (outf <- paste(nam, "rda", sep="."))
24 | save(list = c(nam, "N"), file = outf)
25 | 


--------------------------------------------------------------------------------
/tests/mechler-ex.R:
--------------------------------------------------------------------------------
 1 | library(diptest)
 2 | ## These are from
 3 | ## the 217-readme.doc file that explains the bug fixed by
 4 | ## Ferenc Mechler (fmechler@med.cornell.edu). [5/Sep/2002]
 5 | ##
 6 | ex1 <- c(0.0198, 0.0198, 0.1961, 0.2898, 0.3184, 0.3687,
 7 |          0.4336, 0.4987, 0.5661, 0.6530, 0.7476, 0.8555)
 8 | 
 9 | ex2 <- c(0.0198, 0.1961, 0.2898, 0.3184, 0.3687, 0.4336,
10 |          0.4987, 0.5661, 0.6530, 0.7476, 0.8555, 0.9912)
11 | 
12 | ## Multiply them by 10000 here:
13 | 
14 | (D1 <- dip(10000*ex1, full=TRUE, debug=2))
15 | str(D1, digits = 10, vec.len = 12)
16 | 
17 | (D2 <- dip(10000*ex2, full=TRUE, debug=2))
18 | str(D2, digits = 10, vec.len = 12)
19 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: diptest
 2 | Version: 0.77-2
 3 | VersionNote: Last CRAN: 0.77-1 on 2024-03-31
 4 | Date: 2025-08-19
 5 | Authors@R: c(
 6 |     person("Martin", "Maechler", role = c("aut", "cre"),
 7 | 	   email = "maechler@stat.math.ethz.ch",
 8 | 	   comment = c("Ported to R and C from Fortran and S-plus; much enhanced",
 9 | 		       ORCID = "0000-0002-8685-9910")),
10 |     person("Dario Ringach", role = "aut", comment = "Fortran and S-plus; at NYU.edu"))
11 | Title: Hartigan's Dip Test Statistic for Unimodality - Corrected
12 | Description: Compute Hartigan's dip test statistic for unimodality /
13 |  multimodality and provide a test with simulation based p-values,  where
14 |  the original public code has been corrected.
15 | Imports: graphics, stats
16 | BuildResaveData: no
17 | License: GPL (>= 2)
18 | URL: https://github.com/mmaechler/diptest
19 | BugReports: https://github.com/mmaechler/diptest/issues
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # diptest
 2 | 
 3 | ## R CRAN package `diptest`: Hartigan's diptest for unimodality (including p-values)
 4 | 
 5 | Although this package `diptest` has been created a long time ago, with a
 6 | first CRAN publication in 2003, see the official page of
 7 | [CRAN package diptest](https://CRAN.R-project.org/package=diptest), the
 8 | package *development* sources and history have only been made available on
 9 | Nov.30, 2020.
10 | 
11 | As with my R package [sfsmisc](https://github.com/mmaechler/sfsmisc), I was
12 | able to keep most of the editing and development _history_ of this package,
13 | using my [emacs](https://www.gnu.org/software/emacs/) backup files (`*.~`)
14 | together with some RCS (`*,v`) histories, using my `G2RCSn` shell script,
15 | and most importantly a `ruby` script to create a git repos including
16 | history, see here, for how it worked with `sfsmisc`:
17 | https://mmaechler.blogspot.com/2014/08/how-i-got-175-years-old-github.html
18 | 
19 | 


--------------------------------------------------------------------------------
/stuff/sim-minProb.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ### If  D_n = D(X_1, ... X_n) is the dip statistic, we have
 3 | ###     D_n >= 1/(2 n)
 4 | ###   pm(n) := Pr[ D == 1/(2 n) ]  > 0
 5 | ###   ==      ------------------------
 6 | ## and hence the distribution of D_n(X), X ~ U[0,1] starts with a jump,
 7 | ## from 0 to pm(n) at  d = 1/(2n).
 8 | 
 9 | ### Now estimate pm(n) via simulation.
10 | ### The simulation is analyzed in file ./minProb-anal.R
11 | ###                                      ~~~~~~~~~~~~~~
12 | setwd("~/R/Pkgs/diptest/stuff")
13 | 
14 | library(diptest)
15 | 
16 | Ns <- 500000 # number of samples (per n)
17 | isim <- 1:Ns
18 | nn <- c(4:18,20,25,30,35,40,50,60, 75, 100)
19 | 
20 | nMin <- sapply(nn, function(n)
21 |                sum(sapply(isim,
22 |                           function(i) abs(1 - 2*n*dip(runif(n))) < 1e-5)))
23 | names(nMin) <- paste(nn)
24 | attr(nMin, "Ns") <- Ns
25 | ## nMin / Ns == pm(n), i.e. pm(nn)
26 | saveRDS(nMin, file= "minProb.rds")
27 | 
28 | proc.time()
29 | sessionInfo()
30 | 


--------------------------------------------------------------------------------
/stuff/dip_N_do.R:
--------------------------------------------------------------------------------
 1 | ### This is sourced from a file that sets  N , n.sim  , .Random.seed  ...
 2 | ###
 3 | stopifnot(exists("N"),     is.numeric(N),     length(N) == 1, N == round(N),
 4 |           exists("n.sim"), is.numeric(n.sim), length(n.sim) == 1,
 5 |           n.sim == round(n.sim))
 6 | if(exists(".Random.seed")) {
 7 |     cat("using \n")
 8 |     dump(".Random.seed", "")
 9 | } else
10 |     set.seed(12)
11 | stopifnot(require("diptest"))
12 | setwd("/u/maechler/R/Pkgs/diptest/stuff")
13 | 
14 | dd <- numeric(n.sim)
15 | .pt <- proc.time()
16 | 
17 | for(i in 1:n.sim) {
18 |     dd[i] <- dip(runif(N))
19 |     if(i %% 100 == 0) {
20 |         cat(".")
21 |         if(i %% 10000 == 0) {
22 |             cat("",i,"\n")
23 |             .opt <- .pt ; .pt <- proc.time(); .pt - .opt
24 |         }
25 |     }
26 | }
27 | 
28 | nam <- paste("dip", floor(N/1000),"k", sep='')
29 | assign(nam, dd)
30 | (outf <- paste(nam, "rda", sep="."))
31 | save(list = c(nam, "N"), file = outf)
32 | 
33 | sessionInfo()
34 | 


--------------------------------------------------------------------------------
/man/statfaculty.Rd:
--------------------------------------------------------------------------------
 1 | \name{statfaculty}
 2 | \alias{statfaculty}
 3 | %- uncomment, once exHartigan is defunct:
 4 | %- \alias{exHartigan}
 5 | \title{Faculty Quality in Statistics Departments}
 6 | \docType{data}
 7 | \usage{
 8 | data(statfaculty)
 9 | }
10 | \description{
11 |  Faculty quality in statistics departments was assessed as part of a
12 |  larger study reported by Scully(1982).
13 | 
14 |  Accidentally, this is also provided as the \code{exHartigan}
15 |  (\dQuote{\bold{ex}ample of \bold{Hartigan}s'}) data set.
16 | }
17 | \format{A numeric vector of 63 (integer) numbers, sorted increasingly,
18 |   as reported by the reference.
19 | }
20 | \references{
21 |   J. A. Hartigan and P. M. Hartigan (1985)
22 |   The Dip Test of Unimodality;
23 |   \emph{Annals of Statistics} \bold{13}, 70--84.
24 | }
25 | \source{
26 |   M. G. Scully (1982)
27 |   Evaluation of 596 programs in mathematics and physical sciences;
28 |   \emph{Chronicle Higher Educ.} \bold{25} 5, 8--10.
29 | }
30 | \examples{
31 | data(statfaculty)
32 | plot(dH <- density(statfaculty))
33 | rug(jitter(statfaculty))
34 | 
35 | data(exHartigan)
36 | stopifnot(identical(exHartigan,statfaculty))
37 | }
38 | \keyword{datasets}
39 | 


--------------------------------------------------------------------------------
/tests/sim1.R:
--------------------------------------------------------------------------------
 1 | #### Very small scale simulation to make the point
 2 | #### --> See ../stuff/  for much more
 3 | library(diptest)
 4 | 
 5 | P.p <- c(1, 5, 10, 25)/100
 6 | (P.p <- c(P.p, 1/2, rev(1 - P.p)))
 7 | 
 8 | N.sim <- 9999
 9 | set.seed(94)
10 | .p0 <- proc.time()
11 | dU100 <- replicate(N.sim, dip(runif(100)))
12 | cat('Time elapsed: ', (p1 <- proc.time()) - .p0,'\n'); .p0 <- p1
13 | ## Lynne (2003: P IV, 1.6 GHz): ~7   s
14 | ## 2010 (AMD Phenom II X4 925):  1.3 s
15 | 
16 | 100 * round(q100 <- quantile(dU100, p = P.p), 4)
17 | 
18 | plot(density(sqrt(100) * dU100), lwd = 2, col=2,
19 |      main = expression("Dip  distribution" ~~
20 |          list(sqrt(n)* D[n], ~ n == 100)))
21 | abline(h=0, col="dark gray", lty=3)
22 | 
23 | round(1e4 * quantile(dU100, p = seq(0,1, by = 0.01), names = FALSE))
24 | 
25 | ##--- an extreme unimodal case -- i.e. very small dip():
26 | set.seed(60); x <- rexp(301,1)^3
27 | hist(x)
28 | (dt.x <- dip.test(x))
29 | (dt2 <- dip.test(x, simulate = TRUE))
30 | (dt3 <- dip.test(x, simulate = TRUE, B = 10000))
31 | stopifnot(dt.x$p.value == 1,## <- gave	NA  earlier
32 | 	  dt2$p.value == 1,
33 | 	  dt3$p.value == 1)
34 | 
35 | 
36 | cat('Time elapsed: ', proc.time() - .p0,'\n') # "stats"
37 | 


--------------------------------------------------------------------------------
/stuff/qdipTab-use.R:
--------------------------------------------------------------------------------
 1 | ####--- Use the simulated quantiles to compute P-values
 2 | 
 3 | ### Now only working with qdipTab
 4 | library(diptest)
 5 | data(qDiptab)
 6 | (dnq <- dimnames(qDiptab))
 7 | (P.p <- as.numeric(dnq$Pr))## 0  0.01 ..... 0.99999 1
 8 | (n <- as.integer(dnq$n))   ## 4 5..10 15 ... 1000 2000 5000
 9 | ## sqrt(n) * D  has limit distribution (sometimes..)
10 | rq <- sqrt(n) * qDiptab
11 | 
12 | filled.contour(n, P.p, log10(rq),
13 |                plot.title= contour(n, P.p, log10(rq), add = TRUE))
14 | 
15 | logit <- qlogis
16 | contour(n, P.p, logit(rq))
17 | 
18 | ## correct P = {0,1} to {1/(2N), 1 - 1/(2N)} for N = 1000001 - 1 = 1e6
19 | N <- 1000000
20 | Pcp <- P.p
21 | Pcp[P.p == 0] <-     1/(2*N)
22 | Pcp[P.p == 1] <- 1 - 1/(2*N)
23 | 
24 | str(dd <- cbind(expand.grid(n=n, p=Pcp), rq = c(rq)))
25 | 
26 | coplot(rq ~ p | log10(n), data = dd)
27 | coplot(p ~ rq | log10(n), data = dd)
28 | 
29 | ## NOTA BENE:  qlogis(.) === logit(.)
30 | coplot(qlogis(p) ~ rq | log10(n), data = dd)
31 | 
32 | library(mgcv)
33 | g1 <- gam(qlogis(p) ~ s(log(n),rq), data = dd)
34 | summary(g1) # 21.56 d.f. // Deviance explained 88.5%
35 | 
36 | if(FALSE) ## Not quite --- 50 warnings --- need to use  weights! etc
37 | g2 <- gam(p ~ s(log(n),rq), family = binomial, data = dd)
38 | 


--------------------------------------------------------------------------------
/stuff/new-simul1e5.R:
--------------------------------------------------------------------------------
 1 | #### More extensive simulations than  Hartigan (1985)'s  table 1
 2 | #### ---------------------------------------------------
 3 | ##-> ./dip-simul.S
 4 | ##-> ./sim1.R
 5 | library(diptest)
 6 | 
 7 | nn <- c(4:10, 15, 20, 30, 50, 100, 200, 500, 1000, 2000, 5000)
 8 | Ns <- 100001
 9 | ##    ~~~~~~ number of "simulations" (i.e. samples for each n in nn)
10 | 
11 | ## which percentages
12 | p.hi <- sort(1 - c(outer(c(1,2,5), 2:5, function(x,y) x*10^-y)))
13 | p.lo <- c(1,2,5)/100
14 | (P.p <- c(p.lo, (1:9)/10, p.hi))
15 | 
16 | ## Proof that "quantile()" is just order statistics for this `Ns' :
17 | iq.p <- 1 + P.p * (Ns - 1)
18 | U <- runif(Ns)
19 | stopifnot(identical(sort(U)[iq.p],  quantile(U, P.p, names=FALSE)))
20 | 
21 | P.dip <- matrix(nrow= length(nn), ncol = length(P.p),
22 | 		dimnames = list(as.character(nn), formatC(P.p,w=1,digits=7)))
23 | dip.n <- numeric(Ns)
24 | Tcpu <- 0
25 | set.seed(963)
26 | for(n in nn) {
27 |     cat("n=",n,":")
28 |     cpu <- system.time(for(i in 1:Ns)
29 |                    {
30 |                        if(i %% (Ns%/% 10) == 0) cat(i,"")
31 |                        else if(i %% (Ns%/% 100) == 0) cat(".")
32 |                        dip.n[i] <- dip(runif(n))
33 |                    }
34 |                        )[1:3]
35 |     P.dip[paste(n),] <- quantile(dip.n, p = P.p, names=FALSE)
36 |     cat("\nn=",n,", cpu=", paste(formatC(cpu), collapse=", "),"\n\n")
37 |     Tcpu <- Tcpu + cpu
38 | }
39 | 
40 | save(nn, Ns, P.p, P.dip, file="dipSim_1e5.rda", compress=TRUE)
41 | 
42 | cat("\nTotal CPU = ", paste(formatC(Tcpu), collapse=", "),"\n\n")
43 | 
44 | ##--> File "dip-simul-sess"	to the CPU times .. !
45 | q('no')
46 | 


--------------------------------------------------------------------------------
/stuff/Stuetzle-stat593-S2003-olive.doc:
--------------------------------------------------------------------------------
 1 | From Werner Stuetzle's
 2 |      "Stat 593 E, Cluster Analysis and Other Unsupervised Learning Methods, Spring 2003"
 3 | 
 4 | --> http://www.stat.washington.edu/wxs/Stat593-s03/datasets.htm
 5 | and http://www.stat.washington.edu/wxs/Stat593-s03/Datasets/olive.dat
 6 |     http://www.stat.washington.edu/wxs/Stat593-s03/Datasets/olive.doc
 7 | both files, dated  31-Mar-2003 16:46
 8 | 
 9 | ---- MM : I have constructed  ./olive.tab  which you can read into R with
10 | 
11 | olives <-
12 |    read.table("/u/maechler/R/Pkgs/diptest/stuff/Stuetzle-stat593-S2003-olive.tab",
13 |               header=TRUE)
14 | 
15 | ## together with code in ./jeremy-unimodality.R :
16 | all.equal(unname(olives.CF[,-2]), unname(olives[,-2])) ## TRUE
17 | ## but clearly, also the last three *Variable* names differ...  [aargh]
18 | 
19 | --------------- this is olive.doc -------------------------------------------
20 | 
21 | From @craft.clarkson.edu:hopkepk@craft.camp.clarkson.edu Tue Nov 17 10:23:51 1992
22 | Received: from omnigate.clarkson.edu by stat.rutgers.edu (5.59/SMI4.0/RU1.5/3.08)
23 | 	id AA12876; Tue, 17 Nov 92 10:23:30 EST
24 | Received: from craft.clarkson.edu by omnigate.clarkson.edu id aa26582;
25 |           17 Nov 92 10:23 EST
26 | Received: by craft.camp.clarkson.edu (AIX 3.1/UCB 5.61/4.03)
27 |           id AA28251; Tue, 17 Nov 92 10:22:46 -0500
28 | Date: Tue, 17 Nov 92 10:22:46 -0500
29 | From: "Philip K. Hopke" <hopkepk@craft.camp.clarkson.edu>
30 | Message-Id: <9211171522.AA28251@craft.camp.clarkson.edu>
31 | To: dcook@stat.rutgers.edu
32 | Subject: try again
33 | Status: R
34 | 
35 | ID Region Class palmitic palmitoleic stearic oleic linoleic eicosanoic linolenic eicosenoic
36 | 


--------------------------------------------------------------------------------
/man/plot.dip.Rd:
--------------------------------------------------------------------------------
 1 | \name{plot.dip}
 2 | \alias{plot.dip}
 3 | \title{Plot a dip() Result, i.e., Class "dip" Object}
 4 | \description{
 5 |   Plot method for \code{"dip"} objects, i.e., the result of
 6 |   \code{\link{dip}(., full.result=TRUE)} or similar.
 7 | 
 8 |   Note: We may decide to enhance the plot in the future, possibly not
 9 |   entirely back-compatibly.
10 | }
11 | \usage{
12 | \method{plot}{dip}(x, do.points = (n < 20),
13 |      colG = "red3", colL = "blue3", colM = "forest green",
14 |      col.points = par("col"), col.hor = col.points,
15 |      doModal = TRUE, doLegend = TRUE, \dots)
16 | }
17 | \arguments{
18 |   \item{x}{an \R object of \code{\link{class}} \code{"dip"}, i.e.,
19 |     typically the result of \code{\link{dip}(., full.result= FF)} where
20 |     \code{FF} is \code{TRUE} or a string such as \code{"all"}.}
21 |   \item{do.points}{logical indicating if the ECDF plot should include
22 |     points; passed to \code{\link{plot.ecdf}}.}
23 |   \item{colG, colL, colM}{the colors to be used in the graphics for the
24 |     \bold{G}reatest convex minorant, the
25 |     \bold{L}east concave majorant, and the
26 |     \bold{M}odal interval, respectively.}
27 |   \item{col.points, col.hor}{the color of points or horizontal lines,
28 |     respectively, simply passed to \code{\link{plot.ecdf}}.}
29 |   \item{doModal}{logical indicating if the modal interval \eqn{[x_L, x_U]}{[xL, xU]}
30 |     should be shown.}
31 |   \item{doLegend}{logical indicating if a legend should be shown.}
32 |   \item{\dots}{further optional arguments, passed to \code{\link{plot.ecdf}}.}
33 | }
34 | \author{Martin Maechler}
35 | \seealso{
36 |   \code{\link{dip}}, also for examples; \code{\link{plot.ecdf}}.
37 | }
38 | \keyword{hplot}
39 | 


--------------------------------------------------------------------------------
/stuff/new-simul.R:
--------------------------------------------------------------------------------
 1 | #### More extensive simulations than  Hartigan (1985)'s  table 1
 2 | #### ---------------------------------------------------
 3 | ##-> ./dip-simul.S
 4 | ##-> ./sim1.R
 5 | library(diptest)
 6 | pd <- package.description("diptest")
 7 | cat("diptest package version ", pd['Version']," (",pd['Date'],")\n",sep="")
 8 | 
 9 | nn <- c(4:10, 15, 20, 30, 50, 100, 200, 500, 1000, 2000, 5000)
10 | Ns <- 1000001
11 | ##    ~~~~~~~ number of "simulations" (i.e. samples for each n in nn)
12 | 
13 | ## which percentages
14 | p.hi <- sort(1 - c(outer(c(1,2,5), 2:5, function(x,y) x*10^-y)))
15 | p.lo <- c(1,2,5)/100
16 | (P.p <- c(0, p.lo, (1:9)/10, p.hi, 1))# 0 & 1: the extremes are "interesting" here
17 | 
18 | ## "quantile()" is just order statistics X[k] (k := iq.p)  for this `Ns' :
19 | (iq.p <- 1 + P.p * (Ns - 1))
20 | U <- runif(Ns)
21 | stopifnot(identical(sort(U)[iq.p], ## , partial=iq.p, method="quick"
22 |                     quantile(U, P.p, names=FALSE)))
23 | 
24 | P.dip <- matrix(nrow= length(nn), ncol = length(P.p),
25 | 		dimnames = list(as.character(nn), formatC(P.p,w=1,digits=7)))
26 | attr(P.dip, "N_1") <- Ns - 1
27 | dip.n <- numeric(Ns)
28 | Tcpu <- 0
29 | set.seed(963)
30 | for(n in nn) {
31 |     cat("n=",n,":")
32 |     cpu <- system.time(for(i in 1:Ns)
33 |                    {
34 |                        if(i %% (Ns%/% 10) == 0) cat(i,"")
35 |                        else if(i %% (Ns%/% 100) == 0) cat(".")
36 |                        dip.n[i] <- dip(runif(n))
37 |                    }
38 |                        )[1:3]
39 |     P.dip [paste(n),] <- quantile(dip.n, p = P.p, names=FALSE)
40 |     cat("\nn=",n,", cpu=", paste(formatC(cpu), collapse=", "),"\n\n")
41 |     Tcpu <- Tcpu + cpu
42 | }
43 | 
44 | save(nn, Ns, P.p, P.dip, file="dipSim_1e6.rda", compress=TRUE)
45 | 
46 | cat("\nTotal CPU = ", paste(formatC(Tcpu), collapse=", "),"\n\n")
47 | 
48 | ##--> File "dip-simul-sess"	to the CPU times .. !
49 | q('no')
50 | 


--------------------------------------------------------------------------------
/stuff/qdipTab-large-n-expand.R:
--------------------------------------------------------------------------------
 1 | Ns <- 1000001
 2 | 
 3 | stopifnot(require("diptest") && is.character(data(qDiptab)))
 4 | 
 5 | ## begin { from  ./asymp-distrib.R } -------------------------------------------
 6 | ## These all have  n.sim = 1000'001  samples of
 7 | ## dip(runif(N)) for "large N"
 8 | ## where produced by scripts such as  ./d20k_do.R
 9 | N.k.set <- c(8,12,16,20,24,32,36,40)
10 | ## or automatically
11 | patt <- "^dip(.*)k\\.rda"
12 | N.k.set <- sort(as.integer(sub(patt, "\\1", list.files(pattern = patt))))
13 | dip.nm <- function(N.k, file=FALSE)
14 |     paste("dip", N.k, if(file)"k.rda" else "k", sep='')
15 | for(N.k in N.k.set)
16 |     load(dip.nm(print(N.k), file=TRUE))
17 | 
18 | d.dip <- function(N.k, scaleUp = TRUE)
19 | {
20 |     ## Purpose: Simulation data for N = N.k * 1000, possibly sqrt(N) scaled
21 |     ## ----------------------------------------------------------------------
22 |     ## Author: Martin Maechler, Date: 15 Apr 2009, 10:36
23 |     N <- N.k * 1000
24 |     nm <- dip.nm(N.k)
25 |     if(scaleUp) sqrt(N) * get(nm) else get(nm)
26 | }
27 | ##----end{ from ./asymp-distrib.R } -------------------------------------------
28 | 
29 | ##
30 | dn <- dimnames(qDiptab)
31 | nn  <- as.numeric(dn[[1]])
32 | P.p <- as.numeric(dn[[2]])
33 | names(N.k.set) <- 1000* N.k.set
34 | qNew <- sapply(N.k.set, function(.)
35 |                quantile(d.dip(., scaleUp=FALSE),
36 |                         probs = P.p))
37 | signif(qNew, 3)
38 | qDiptab.n <- rbind(qDiptab,
39 |                    t(qNew[, as.character(1000*c(10,20,40,72))]))
40 | ## the dimnames-names got lost in rbind():
41 | names(dimnames(qDiptab.n)) <- names(dimnames(qDiptab))
42 | 
43 | attr(qDiptab.n, "N_1") <- as.integer(Ns - 1)
44 | ## --- Here comes the *NEW* one:
45 | attach("~/R/Pkgs/diptest/data/qDiptab.rda")
46 | 
47 | qDiptab.prev <- qDiptab
48 | qDiptab <- qDiptab.n
49 | if(FALSE) # do not do this "accidentally" !
50 | save(qDiptab, file="~/R/Pkgs/diptest/data/qDiptab.rda")
51 | 


--------------------------------------------------------------------------------
/man/qDiptab.Rd:
--------------------------------------------------------------------------------
 1 | \name{qDiptab}
 2 | \alias{qDiptab}
 3 | \title{Table of Quantiles from a Large Simulation for Hartigan's Dip Test}
 4 | \docType{data}
 5 | \description{
 6 |   Whereas Hartigan(1985) published a table of empirical percentage
 7 |   points of the dip statistic (see \code{\link{dip}}) based on N=9999
 8 |   samples of size \eqn{n} from \eqn{U[0,1]}, our table of empirical
 9 |   quantiles is currently based on N=1'000'001 samples for each \eqn{n}.
10 | }
11 | \note{
12 |   Taking N=1'000'001 ensures that all the \code{\link{quantile}(X, p)}
13 |   used here are exactly order statistics \code{sort(X)[k]}.
14 | }
15 | \format{
16 |   A numeric matrix %may change: of dimension 17 * 26,
17 |   where each row corresponds to sample size \eqn{n}, and each column to
18 |   a probability (percentage) in \eqn{[0,1]}.  The dimnames are named
19 |   \code{n} and \code{Pr} and coercable to these values, see the
20 |   examples.  \code{attr(qDiptab, "N_1")} is \eqn{N - 1}, such that with
21 |   \code{k <- as.numeric(dimnames(qDiptab)$Pr) * attr(qDiptab, "N_1")},
22 |   e.g., \code{qDiptab[n == 15,]} contains exactly the order statistics
23 |   \eqn{D_{[k]}} (from the \eqn{N+1} simulated values of
24 |   \code{\link{dip}(U)}, where \code{U <- runif(15)}.
25 | }
26 | \seealso{\code{\link{dip}}, also for the references;
27 |   \code{\link{dip.test}()} which performs the hypothesis test, using
28 |   \code{qDtiptab} (and its null hypothesis of a uniform distribution).
29 | }
30 | \author{Martin Maechler \email{maechler@stat.math.ethz.ch}, in its
31 |   earliest form in August 1994.
32 | }
33 | \examples{
34 | data(qDiptab)
35 | str(qDiptab)
36 | ## the sample sizes `n' :
37 | dnqd <- dimnames(qDiptab)
38 | (nn <- as.integer(dnqd $n))
39 | ## the probabilities:
40 | P.p <- as.numeric(print(dnqd $ Pr))
41 | 
42 | ## This is as "Table 1" in Hartigan & Hartigan (1985) -- but more accurate
43 | ps <- c(1,5,10,50,90,95,99, 99.5, 99.9)/100
44 | tab1 <- qDiptab[nn <= 200,  as.character(ps)]
45 | round(tab1, 4)
46 | }
47 | \keyword{datasets}
48 | 


--------------------------------------------------------------------------------
/R/dipTest.R:
--------------------------------------------------------------------------------
 1 | ##' also called from ../data/qDiptab.R :
 2 | if(getRversion() < "2.13.0") {
 3 |    rdRDS <- function(..., package = "diptest")
 4 |     .readRDS(system.file(..., package=package))
 5 | } else
 6 |    rdRDS <- function(..., package = "diptest")
 7 |     readRDS(system.file(..., package=package, mustWork=TRUE))
 8 | 
 9 | dip.test <- function(x, simulate.p.value = FALSE, B = 2000)
10 | {
11 |     DNAME <- deparse(substitute(x))
12 |     x <- sort(x[complete.cases(x)])
13 |     stopifnot(is.numeric(x))
14 |     n <- length(x) # *is* integer
15 |     D <- dip(x)
16 |     method <- "Hartigans' dip test for unimodality / multimodality"
17 |     if(n <= 3) {
18 | 	P <- 1
19 |     } else if(simulate.p.value) {
20 | 	method <- paste(method,
21 | 			"with simulated p-value\n\t (based on", B, "replicates)")
22 | 	P <- mean(D <= replicate(B, dip(runif(n))))
23 |     } else {
24 | 	## Long "codetools-compatible" way of	data(qDiptab) :
25 | 	qDiptab <- rdRDS("extraData", "qDiptab.rds")
26 | 	dn <- dimnames(qDiptab)
27 | 	max.n <- max(nn <- as.integer(dn[["n"]]))
28 | 	P.s <- as.numeric(dn[["Pr"]])
29 | 
30 |         if(n >= max.n) { ## extrapolate, or rather just use the last n as == "asymptotic"
31 |             if(n > max.n)
32 |                 message("n = ",n," >= max_n{n in table} = ", max.n,
33 |                     " -- using that as asymptotic value.")
34 |             n1 <- n0 <- max.n
35 |             i2 <- i.n <- length(nn)
36 |             f.n <- 0
37 |         } else {
38 |             n0 <- nn[i.n <- findInterval(n, nn)]
39 |             n1 <- nn[(i2 <- i.n +1)]
40 |             f.n <- (n - n0)/(n1 - n0)# in [0, 1]
41 |         }
42 |         ## Now "find" y-interval:
43 |         y.0 <- sqrt(n0)* qDiptab[i.n, ]
44 | 	y.1 <- sqrt(n1)* qDiptab[i2 , ]
45 |         sD  <- sqrt(n) * D
46 | 	P <- 1 - approx(y.0 + f.n*(y.1 - y.0), P.s, rule = 2,# <- [min, max]
47 | 			xout = sD)[["y"]]
48 |     }
49 |     structure(list(statistic = c(D = D), p.value = P, nobs = n,
50 | 		   alternative = "non-unimodal, i.e., at least bimodal",
51 | 		   method = method, data.name = DNAME), class = "htest")
52 | }
53 | 


--------------------------------------------------------------------------------
/tests/sim1.Rout.save-32b:
--------------------------------------------------------------------------------
 1 | 
 2 | R version 2.13.0 Patched (2011-05-19 r55964)
 3 | Copyright (C) 2011 The R Foundation for Statistical Computing
 4 | ISBN 3-900051-07-0
 5 | Platform: i686-pc-linux-gnu (32-bit)
 6 | 
 7 | R is free software and comes with ABSOLUTELY NO WARRANTY.
 8 | You are welcome to redistribute it under certain conditions.
 9 | Type 'license()' or 'licence()' for distribution details.
10 | 
11 | R is a collaborative project with many contributors.
12 | Type 'contributors()' for more information and
13 | 'citation()' on how to cite R or R packages in publications.
14 | 
15 | Type 'demo()' for some demos, 'help()' for on-line help, or
16 | 'help.start()' for an HTML browser interface to help.
17 | Type 'q()' to quit R.
18 | 
19 | > #### Very small scale simulation to make the point
20 | > #### --> See ../stuff/  for much more
21 | > library(diptest)
22 | > 
23 | > P.p <- c(1, 5, 10, 25)/100
24 | > (P.p <- c(P.p, 1/2, rev(1 - P.p)))
25 | [1] 0.01 0.05 0.10 0.25 0.50 0.75 0.90 0.95 0.99
26 | > 
27 | > N.sim <- 9999
28 | > set.seed(94)
29 | > .p0 <- proc.time()
30 | > dU100 <- replicate(N.sim, dip(runif(100)))
31 | > cat('Time elapsed: ', (p1 <- proc.time()) - .p0,'\n'); .p0 <- p1
32 | Time elapsed:  1.74 0.008 1.761 0 0 
33 | > ## Lynne (2003: P IV, 1.6 GHz): ~7   s
34 | > ## 2010 (AMD Phenom II X4 925):  1.3 s
35 | > 
36 | > 100 * round(q100 <- quantile(dU100, p = P.p), 4)
37 |   1%   5%  10%  25%  50%  75%  90%  95%  99% 
38 | 2.29 2.56 2.75 3.08 3.54 4.12 4.70 5.09 5.90 
39 | > 
40 | > plot(density(sqrt(100) * dU100), lwd = 2, col=2,
41 | +      main = expression("Dip  distribution" ~~
42 | +          list(sqrt(n)* D[n], ~ n == 100)))
43 | > abline(h=0, col="dark gray", lty=3)
44 | > 
45 | > round(1e4 * quantile(dU100, p = seq(0,1, by = 0.01), names = FALSE))
46 |   [1] 191 229 239 246 252 256 261 265 268 272 275 277 280 282 285 287 289 292
47 |  [19] 294 296 298 300 302 304 305 308 310 312 314 315 317 319 321 323 325 327
48 |  [37] 329 331 332 334 336 338 340 341 343 345 347 349 351 352 354 356 358 360
49 |  [55] 362 364 366 368 370 372 374 376 379 381 383 385 387 390 393 395 397 400
50 |  [73] 403 406 409 412 415 418 421 424 427 431 434 438 442 446 450 455 460 464
51 |  [91] 470 476 483 489 499 509 520 539 562 590 773
52 | > 
53 | 


--------------------------------------------------------------------------------
/tests/ex1.R:
--------------------------------------------------------------------------------
 1 | library(diptest)
 2 | 
 3 | stopifnot(dip(c(1,1,2,2)) == 1/4)# the maximal value possible: two point dist
 4 | 
 5 | ## very first small "unimodal" example --- the  1/(2*n) result:
 6 | n <- length(u <- cumsum(0:3))
 7 | d <- dip(u, debug=TRUE)# shows the final if() {added by MM} is really needed
 8 | stopifnot(d == dip(-u), d == 1/(2*n))# exact "=" for n = 4 !
 9 | ## Note that I believe this should *not* give 0 (as fmechler@.. did),
10 | ## but rather 1/(2n) because that's  (1/n) / 2  and
11 | ## (1/n) is the correct distance between LCM and GCM
12 | 
13 | ## Small example -- but MM sees difference (32-bit / 64-bit) *and* on M1mac:
14 | x <- c(0,2:3,5:6)
15 | ## IGNORE_RDIFF_BEGIN
16 | d1 <- dip(x,   full=TRUE, debug=2)
17 | d2 <- dip(6-x, full=TRUE, debug=2)
18 | str(d2) # differences to M1mac (!)
19 | ## IGNORE_RDIFF_END
20 | str(d1)
21 | 
22 | if(!dev.interactive(orNone=TRUE)) pdf("ex1.pdf")
23 | par(mfrow = 2:1, mar = .1+c(3,4,2,1), mgp=c(1.5,.6,0), oma = c(0,0,2.1,0))
24 | #
25 | plot(d1)
26 | abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray")
27 | #
28 | plot(d2)
29 | abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray")
30 | #
31 | ## "title" only now
32 | mtext("dip() problem with 'mirror x'", side=3, line = 0.8,
33 |       outer=TRUE, cex = 1.5, font = 2)
34 | 
35 | 
36 | ##  Yong Lu <lyongu+@cs.cmu.edu> example -- a bit smaller
37 | x2 <- c(1, rep(2, 9))
38 | stopifnot(dip(x2) == dip(3 - x2))
39 | str(dip(x2, full=TRUE))
40 | cat('Time elapsed: ', (.pt <- proc.time()),'\n') # "stats"
41 | 
42 | ## Real data examples :
43 | 
44 | data(statfaculty)
45 | 
46 | str(dip(statfaculty, full = "all", debug = 3), vec.len = 8)
47 | 
48 | data(faithful)
49 | fE <- faithful$eruptions
50 | str(dip(fE, full = "all", debug = 3),
51 |     vec.len= 8)
52 | 
53 | data(precip)
54 | str(dip(precip, full = TRUE, debug = TRUE))
55 | 
56 | ## current qDiptab <--> n = 72'000 is "asymptotic" boundary
57 | set.seed(123); x <- rnorm(72000)
58 | dt72k <- dip.test(x)
59 | ## gave error  in qDiptab[i2, ] : subscript out of bounds  -- in diptest <= 0.77-0
60 | stopifnot(all.equal(list(statistic = c(D = 0.0005171098381181), p.value = 1, nobs = 72000L),
61 |                     dt72k[c("statistic", "p.value", "nobs")], tolerance = 1e-13))
62 | 
63 | 
64 | cat('Time elapsed: ', proc.time() - .pt,'\n') # "stats"
65 | 
66 | if(!interactive()) warnings()
67 | 


--------------------------------------------------------------------------------
/vignettes/diptest.bib:
--------------------------------------------------------------------------------
 1 | @string{AmStat = "The American Statistician"}
 2 | @string{AnnStat = "Annals of Statistics"}
 3 | % @string{JASA = "Journal of the American Statistical Association"}
 4 | @string{AnInStMa = "Annals of the Inst.\ of Stat.\ Math."}
 5 | @string{JASA = "JASA"}
 6 | @string{JAppTh = "Journal of Approximation Theory"}
 7 | @string{JMAA = "Journal of Analysis and Applications"}
 8 | @string{JRSS = "Journal of the Royal Statistical Society"} %% ALWAYS #~A or #~B!
 9 | @string{JRSSA = JRSS # "~A, General"}
10 | @string{JRSSB = JRSS # "~B"}
11 | @string{JRSSC-AS = "Applied Statistics --- " # JRSS # "~C"}
12 | @string{NuMath = "Numerische Mathematik"}
13 | @string{SIAM = "Society for Industrial and Applied Mathematics"}
14 | @string{SSci = "Statistical Science"}
15 | @string{StMed = "Statist.\ in Med."}% Statistics in Medicine
16 | @string{ScandS = "Scandinavian Journal of Statistics"}
17 | @string{JSS = "Journal of Statistical Software"}
18 | @string{JSSC = "SIAM Journal on Scientific and Statistical Computing"}
19 | % @string{JSSC = "SIAM J. Sci.\ Statist.\ Comput."}
20 | @string{JCGS = "Journal of Computational and Graphical Statistics"}
21 | % @string{CSDA = "Computational Statistics \& Data Analysis"}
22 | @string{CSDA = "Computat.\ Statist.\ Data Anal."}
23 | 
24 | @string{IEEE-ASSP = "IEEE Trans.\ Acoust., Speech, Signal Processing"}
25 | % @string{TOMS = "ACM Transactions on Mathematical Software"}
26 | % @string{TOMS = "{ACM} Transactions on Mathematical Software (TOMS)"}
27 | @string{TOMS = "ACM Trans.\ Math.\ Software"}
28 | 
29 | @string{ETH = "Swiss Federal Institute of Technology (ETH)"}
30 | @string{UWstat = "Department of Statistics, University of Washington"}
31 | 
32 | @string{Wiley = "Wiley"}
33 | @string{NY = "N.~Y."}
34 | 
35 | 
36 | @article{HarJH85,
37 |     Author = {J. A. Hartigan and P. M. Hartigan},
38 |     Title = {The Dip Test of Unimodality},
39 |     Year = 1985,
40 |     Journal = AnnStat,
41 |     Volume = 13,
42 |     Pages = {70--84},
43 |     Keywords = {Multimodality; Isotonic regression; Empirical distribution}
44 | }
45 | 
46 | @article{HarP85,
47 |      author = {P. M. Hartigan},
48 |      title = {Computation of the Dip Statistic to Test for Unimodality},
49 |      year = 1985,
50 |      journal = {Applied Statistics},
51 |      pages = {320--325},
52 |      volume = 34
53 | }
54 | 
55 | %% -- note that ~/bib/master.bib  has quite a few more on "modality" testing
56 | 


--------------------------------------------------------------------------------
/stuff/sim-minProb.Rout:
--------------------------------------------------------------------------------
 1 | 
 2 | R version 2.9.0 (2009-04-17)
 3 | Copyright (C) 2009 The R Foundation for Statistical Computing
 4 | ISBN 3-900051-07-0
 5 | 
 6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
 7 | You are welcome to redistribute it under certain conditions.
 8 | Type 'license()' or 'licence()' for distribution details.
 9 | 
10 |   Natural language support but running in an English locale
11 | 
12 | R is a collaborative project with many contributors.
13 | Type 'contributors()' for more information and
14 | 'citation()' on how to cite R or R packages in publications.
15 | 
16 | Type 'demo()' for some demos, 'help()' for on-line help, or
17 | 'help.start()' for an HTML browser interface to help.
18 | Type 'q()' to quit R.
19 | 
20 | > 
21 | > ### If  D_n = D(X_1, ... X_n) is the dip statistic, we have
22 | > ###     D_n >= 1/(2 n)
23 | > ###   pm(n) := Pr[ D == 1/(2 n) ]  > 0
24 | > ###   ==      ------------------------
25 | > ## and hence the distribution of D_n(X), X ~ U[0,1] starts with a jump,
26 | > ## from 0 to pm(n) at  d = 1/(2n).
27 | > 
28 | > ### Now estimate pm(n) via simulation.
29 | > ### The simulation is analyzed in file ./minProb-anal.R
30 | > ###                                      ~~~~~~~~~~~~~~
31 | > setwd("~/R/Pkgs/diptest/stuff")
32 | > 
33 | > library(diptest)
34 | > 
35 | > Ns <- 500000 # number of samples (per n)
36 | > isim <- 1:Ns
37 | > nn <- c(4:18,20,25,30,35,40,50,60, 75, 100)
38 | > 
39 | > nMin <- sapply(nn, function(n)
40 | +                sum(sapply(isim,
41 | +                           function(i) abs(1 - 2*n*dip(runif(n))) < 1e-5)))
42 | > names(nMin) <- paste(nn)
43 | > attr(nMin, "Ns") <- Ns
44 | > ## nMin / Ns == pm(n), i.e. pm(nn)
45 | > save(nMin, file= "minProb.rda")
46 | > 
47 | > proc.time()
48 |     user   system  elapsed 
49 | 2130.176    2.594 2134.998 
50 | > sessionInfo()
51 | R version 2.9.0 (2009-04-17) 
52 | x86_64-unknown-linux-gnu 
53 | 
54 | locale:
55 | LC_CTYPE=de_CH.UTF-8;LC_NUMERIC=C;LC_TIME=en_US.UTF-8;LC_COLLATE=de_CH.UTF-8;LC_MONETARY=C;LC_MESSAGES=de_CH.UTF-8;LC_PAPER=de_CH.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=de_CH.UTF-8;LC_IDENTIFICATION=C
56 | 
57 | attached base packages:
58 | [1] graphics  grDevices datasets  stats     utils     methods   base     
59 | 
60 | other attached packages:
61 | [1] diptest_0.25-2 sfsmisc_1.0-7 
62 | > 
63 | > proc.time()
64 |     user   system  elapsed 
65 | 2130.188    2.605 2135.174 
66 | 


--------------------------------------------------------------------------------
/man/dip.test.Rd:
--------------------------------------------------------------------------------
 1 | \name{dip.test}
 2 | \title{Hartigans' Dip Test for Unimodality}
 3 | \alias{dip.test}
 4 | \concept{multimodality}
 5 | \description{
 6 |   Compute Hartigans' dip statistic \eqn{D_n}{Dn}, and
 7 |   its p-value for the test for unimodality, by interpolating
 8 |   tabulated quantiles of \eqn{\sqrt{n} D_n}{sqrt(n) * Dn}.
 9 | 
10 |   For \eqn{X_i \sim F, i.i.d.}{X_i ~ F, i.i.d},
11 |   the null hypothesis is that \eqn{F} is a unimodal distribution.
12 |   Consequently, the test alternative is non-unimodal, i.e., at least
13 |   bimodal.  Using the language of medical testing, you would call the
14 |   test \dQuote{Test for \bold{Multi}modality}.
15 | }
16 | \usage{
17 | dip.test(x, simulate.p.value = FALSE, B = 2000)
18 | }
19 | \arguments{
20 |   \item{x}{numeric vector; sample to be tested for unimodality.}
21 |   \item{simulate.p.value}{a logical indicating whether to compute
22 |     p-values by Monte Carlo simulation.}
23 |   \item{B}{an integer specifying the number of replicates used in the
24 |     Monte Carlo test.}
25 | }
26 | \details{
27 |   If \code{simulate.p.value} is \code{FALSE}, the p-value is computed
28 |   via linear interpolation (of \eqn{\sqrt{n} D_n}{sqrt(n) * Dn}) in the
29 |   \code{\link{qDiptab}} table.
30 |   Otherwise the p-value is computed from a Monte Carlo simulation of a
31 |   uniform distribution (\code{\link{runif}(n)}) with \code{B}
32 |   replicates.
33 | }
34 | \value{
35 |   A list with class \code{"htest"} containing the following
36 |   components:
37 |   \item{statistic}{the dip statistic \eqn{D_n}{Dn}, i.e.,
38 |     \code{\link{dip}(x)}.}
39 |   \item{p.value}{the p-value for the test, see details.}
40 |   \item{method}{character string describing the test, and whether Monte
41 |     Carlo simulation was used.}
42 |   \item{data.name}{a character string giving the name(s) of the data.}
43 | }
44 | \seealso{
45 |   For goodness-of-fit testing, notably of continuous distributions,
46 |   \code{\link{ks.test}}.
47 | }
48 | \references{
49 |   see those in \code{\link{dip}}.
50 | }
51 | \author{Martin Maechler}
52 | \note{
53 |   see also the package vignette, which describes the procedure in more details.
54 | }
55 | \examples{
56 | ## a first non-trivial case
57 | (d.t <- dip.test(c(0,0, 1,1))) # "perfect bi-modal for n=4" --> p-value = 0
58 | stopifnot(d.t$p.value == 0)
59 | 
60 | data(statfaculty)
61 | plot(density(statfaculty)); rug(statfaculty)
62 | (d.t <- dip.test(statfaculty))
63 | 
64 | x <- c(rnorm(50), rnorm(50) + 3)
65 | plot(density(x)); rug(x)
66 | ## border-line bi-modal ...  BUT (most of the times) not significantly:
67 | dip.test(x)
68 | dip.test(x, simulate=TRUE, B=5000)
69 | 
70 | ## really large n -- get a message
71 | dip.test(runif(4e5))
72 | 
73 | }
74 | \keyword{htest}
75 | \keyword{distribution}
76 | 


--------------------------------------------------------------------------------
/vignettes/myVignette.sty:
--------------------------------------------------------------------------------
 1 | %% originates  from   ~/R/Pkgs/Matrix/inst/doc/myVignette.sty  [April 2009]
 2 | \RequirePackage{hyperref}
 3 | \RequirePackage{url}
 4 | \RequirePackage{amsmath}
 5 | \RequirePackage{bm}%-> \bm (= bold math)
 6 | \newcommand{\Slang}{\textsf{S} language}
 7 | \newcommand{\RR}{\textsf{R}}
 8 | \newcommand{\email}[1]{\href{mailto:#1}{\normalfont\texttt{#1}}}
 9 | %- R programming markup
10 | \newcommand\code{\bgroup\@codex}
11 | \def\@codex#1{{\normalfont\ttfamily\hyphenchar\font=-1 #1}\egroup}
12 | \let\env=\code
13 | \let\command=\code
14 | \newcommand*{\Rfun}[1]{\code{#1()}\index{\RR~function #1}}
15 | \newcommand*{\class}[1]{\code{#1}\index{class #1}}%
16 | \newcommand*{\pkg}[1]{\code{#1}\index{\RR~package #1}}
17 | %
18 | \newcommand{\kbd}[1]{{\normalfont\texttt{#1}}}
19 | \newcommand{\key}[1]{{\normalfont\texttt{\uppercase{#1}}}}
20 | \newcommand\samp{`\bgroup\@noligs\@sampx}
21 | \def\@sampx#1{{\normalfont\texttt{#1}}\egroup'}
22 | \let\option=\samp
23 | \newcommand{\var}[1]{{\normalfont\textsl{#1}}}
24 | \newcommand{\file}[1]{{`\normalfont\textsf{#1}'}}
25 | \newcommand{\dfn}[1]{{\normalfont\textsl{#1}}}
26 | \newcommand{\acronym}[1]{{\normalfont\textsc{\lowercase{#1}}}}
27 | \newcommand{\strong}[1]{{\normalfont\fontseries{b}\selectfont #1}}
28 | \let\pkg=\strong
29 | %
30 | \RequirePackage{alltt}
31 | \newenvironment{example}{\begin{alltt}}{\end{alltt}}
32 | \newenvironment{smallexample}{\begin{alltt}\small}{\end{alltt}}
33 | \newenvironment{display}{\list{}{}\item\relax}{\endlist}
34 | \newenvironment{smallverbatim}{\small\verbatim}{\endverbatim}
35 | % This is already in ``Sweave'' :
36 | %% \RequirePackage{fancyvrb}
37 | %% \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontsize=\small,fontshape=sl}
38 | %% \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontsize=\small}
39 | %% \DefineVerbatimEnvironment{Scode}{Verbatim}{fontsize=\small,fontshape=sl}
40 | %
41 | \newcommand{\FIXME}[1]{\marginpar{ \dots FIXME \emph{#1} \dots}}
42 | \newcommand{\TODO}[1]{\par\noindent\textsc{Todo:} \textit{#1}\par}
43 | %
44 | \newcommand*{\myOp}[1]{{$\left\langle\ensuremath{#1}\right\rangle$}}
45 | \newcommand{\noFootnote}[1]{{\small (\textit{#1})}}
46 | %
47 | %% diptest stuff :
48 | %
49 | %% Probabily P[.],  Expectation E[.]  etc
50 | %% ==  subsection of our flexible-style "texab.sty" :
51 | \newcommand{\@pkl}{[}     % Probability Klammer links
52 | \newcommand{\@pkr}{]}
53 | \newcommand{\@ekl}{[}     % Erwartungswert Klammer links
54 | \newcommand{\@ekr}{]}     % Erwartungswert Klammer rechts
55 | \DeclareMathOperator{\PRSymbol}{P}
56 | % Next line (\makeright): if #1 == \left then \right #2 else #1 #2
57 | \newcommand{\makeright}[2]{\ifx#1\left\right#2\else#1#2\fi}
58 | %% the real commands
59 | \newcommand{\PR}[2][\left]  {\PRSymbol     #1\@pkl #2 \makeright{#1}{\@pkr}}
60 | \newcommand{\ERW}[2][\left] {\ERWSymbol    #1\@ekl #2 \makeright{#1}{\@ekr}}
61 | 
62 | \newcommand{\isD}{\ {\stackrel{\mathcal{D}}{=}}\ \ }
63 | \newcommand{\iid}{\mbox{ i.i.d. }}
64 | 


--------------------------------------------------------------------------------
/stuff/minProb-anal.R:
--------------------------------------------------------------------------------
 1 | #### Analysis of the result of ./sim-minProb.R
 2 | #### ~~~~~~~~                  ~~~~~~~~~~~~~~~
 3 | 
 4 | setwd("/u/maechler/R/Pkgs/diptest/stuff")
 5 | (nMin <- readRDS(file= "minProb.rds"))
 6 | Ns <- attr(nMin, "Ns")
 7 | nn <- as.integer(names(nMin))
 8 | nMin[nMin > 0] ## now only goes up to n=13,  even for Ns == 500'000
 9 | 
10 | ## Actually  nMin ~ Bin(pm(n), Ns) --> estimate pm(n) parametrically:
11 | ##           =====================             ======
12 | glm.n <- glm(cbind(nMin,Ns) ~ log(nn), family = binomial)
13 | summary(glm.n)
14 | ## ^^ shows that logistic link  &  log(nn)  was not good enough:
15 | 
16 | glm.s <- glm(cbind(nMin,Ns) ~ sqrt(nn), family = binomial)# warning about prob in ~= {0, 1}
17 | summary(glm.s)
18 | glm.is <- glm(cbind(nMin,Ns) ~ I(nn^-0.5), family = binomial) #  "best"
19 | summary(glm.is)
20 | glm.I <- glm(cbind(nMin,Ns) ~ I(nn^-1), family = binomial)
21 | summary(glm.I)
22 | 
23 | matplot(nn, cbind(nMin
24 |                   ,Ns * predict(glm.s, type="response") # "2"
25 |                   ,Ns * predict(glm.n, type="response") # "3"
26 |                   ,Ns * predict(glm.is, type="response")# "4" (best!)
27 |                   ,Ns * predict(glm.I, type="response") # "5"
28 |                   ), type='b',
29 |         log = "xy",
30 |         ylab = "nMin  & 4 different glm() predictions")
31 | 
32 | ## actually with large simulation, "glm.is" are not bad
33 | plot(nMin ~ nn, type = 'o')
34 | 
35 | ## or
36 | require(gplots)
37 | plotCI(nn, nMin, uiw = sqrt(nMin*(Ns-nMin)/Ns), type = 'o')
38 | 
39 | ## in Log-Log scale  -- + error bars
40 | plotCI(nn, nMin, uiw = 1.96*sqrt(nMin*(Ns-nMin)/Ns), type = 'o', log = 'xy')
41 | ## 2011-05-14: AAARGH the above now gives an error!
42 | ## whereas this still works: "was"
43 | plot(nMin ~ nn, type = 'o', log = 'xy')
44 | axis(2, at=c(2000,5000,10000))
45 | axis(1, at=nn)
46 | 
47 | ## Better confidence intervals (this takes time!):
48 | ci <- Ns*sapply(nMin, function(k)
49 |                 binom.test(k, Ns, conf.level = .99)$conf.int)
50 | 
51 | plot(nMin ~ nn, type = 'o', log = 'xy', xlab = 'n', cex = 0.8)
52 | segments(nn, ci[1,], nn, ci[2,], col = "tomato")
53 | axis(2, at=c(2000,5000,10000))
54 | axis(1, at=nn)
55 | title(paste("#{dip = 1/(2 n)}  for Ns=",formatC(Ns,form="d"),"  samples"))
56 | mtext("exact binomial 99%-confidence intervals", col = "tomato")
57 | text(nn, nMin, formatC(signif(nMin/Ns, 2)), adj = -0.2, cex = 0.8)
58 | 
59 | ## i.e., for small 'n'  the probability is remarkably high,
60 | ## where as for n = 5000,
61 | ## it is only visible if you look very carefully :
62 | 
63 | ##___________ No longer ____ after fixing the 2003-10  bug ______
64 | 
65 | setwd("pre-bugfix-2003-11")
66 | 
67 | ### for  n=5000 "5k":
68 | load("dip5k.rda")# d5k has 100'000 values dip(runif(5000))
69 | 
70 | ##     main = substitute("density of  " * P(D[n] == frac(1,2*n))
71 | 
72 | plot(density(d5k),
73 |      main = substitute("density of  " * {
74 |          D[n] == "dip(runif(5000)),   simulated  " } *
75 |      N == nnn *" times", list(nnn = length(d5k))))
76 | ## or with `sfsmisc' package:
77 | tkdensity(d5k, do.rug=FALSE, from.f = -4)
78 | ## --> Title of a paper: The distribution of the dip is "bimodal"  !!
79 | ##
80 | ## or  "The Dip Distribution has a Dip" !
81 | 
82 | 


--------------------------------------------------------------------------------
/tests/sim1.Rout.save-64b:
--------------------------------------------------------------------------------
 1 | 
 2 | R Under development (unstable) (2015-03-03 r67931) -- "Unsuffered Consequences"
 3 | Copyright (C) 2015 The R Foundation for Statistical Computing
 4 | Platform: x86_64-unknown-linux-gnu (64-bit)
 5 | 
 6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
 7 | You are welcome to redistribute it under certain conditions.
 8 | Type 'license()' or 'licence()' for distribution details.
 9 | 
10 | R is a collaborative project with many contributors.
11 | Type 'contributors()' for more information and
12 | 'citation()' on how to cite R or R packages in publications.
13 | 
14 | Type 'demo()' for some demos, 'help()' for on-line help, or
15 | 'help.start()' for an HTML browser interface to help.
16 | Type 'q()' to quit R.
17 | 
18 | > #### Very small scale simulation to make the point
19 | > #### --> See ../stuff/  for much more
20 | > library(diptest)
21 | > 
22 | > P.p <- c(1, 5, 10, 25)/100
23 | > (P.p <- c(P.p, 1/2, rev(1 - P.p)))
24 | [1] 0.01 0.05 0.10 0.25 0.50 0.75 0.90 0.95 0.99
25 | > 
26 | > N.sim <- 9999
27 | > set.seed(94)
28 | > .p0 <- proc.time()
29 | > dU100 <- replicate(N.sim, dip(runif(100)))
30 | > cat('Time elapsed: ', (p1 <- proc.time()) - .p0,'\n'); .p0 <- p1
31 | Time elapsed:  0.512 0.085 0.597 0 0 
32 | > ## Lynne (2003: P IV, 1.6 GHz): ~7   s
33 | > ## 2010 (AMD Phenom II X4 925):  1.3 s
34 | > 
35 | > 100 * round(q100 <- quantile(dU100, p = P.p), 4)
36 |   1%   5%  10%  25%  50%  75%  90%  95%  99% 
37 | 2.29 2.56 2.75 3.08 3.54 4.12 4.70 5.09 5.90 
38 | > 
39 | > plot(density(sqrt(100) * dU100), lwd = 2, col=2,
40 | +      main = expression("Dip  distribution" ~~
41 | +          list(sqrt(n)* D[n], ~ n == 100)))
42 | > abline(h=0, col="dark gray", lty=3)
43 | > 
44 | > round(1e4 * quantile(dU100, p = seq(0,1, by = 0.01), names = FALSE))
45 |   [1] 191 229 239 246 252 256 261 265 268 272 275 277 280 282 285 287 289 292
46 |  [19] 294 296 298 300 302 304 305 308 310 312 314 315 317 319 321 323 325 327
47 |  [37] 329 331 332 334 336 338 340 341 343 345 347 349 351 352 354 356 358 360
48 |  [55] 362 364 366 368 370 372 374 376 379 381 383 385 387 390 393 395 397 400
49 |  [73] 403 406 409 412 415 418 421 424 427 431 434 438 442 446 450 455 460 464
50 |  [91] 470 476 483 489 499 509 520 539 562 590 773
51 | > 
52 | > ##--- an extreme unimodal case -- i.e. very small dip():
53 | > set.seed(60); x <- rexp(301,1)^3
54 | > hist(x)
55 | > (dt.x <- dip.test(x))
56 | 
57 | 	Hartigans' dip test for unimodality / multimodality
58 | 
59 | data:  x
60 | D = 0.0072617, p-value = 1
61 | alternative hypothesis: non-unimodal, i.e., at least bimodal
62 | 
63 | > (dt2 <- dip.test(x, simulate = TRUE))
64 | 
65 | 	Hartigans' dip test for unimodality / multimodality with simulated
66 | 	p-value (based on 2000 replicates)
67 | 
68 | data:  x
69 | D = 0.0072617, p-value = 1
70 | alternative hypothesis: non-unimodal, i.e., at least bimodal
71 | 
72 | > (dt3 <- dip.test(x, simulate = TRUE, B = 10000))
73 | 
74 | 	Hartigans' dip test for unimodality / multimodality with simulated
75 | 	p-value (based on 10000 replicates)
76 | 
77 | data:  x
78 | D = 0.0072617, p-value = 1
79 | alternative hypothesis: non-unimodal, i.e., at least bimodal
80 | 
81 | > stopifnot(dt.x$p.value == 1,## <- gave	NA  earlier
82 | + 	  dt2$p.value == 1,
83 | + 	  dt3$p.value == 1)
84 | > 
85 | > 
86 | > cat('Time elapsed: ', proc.time() - .p0,'\n') # "stats"
87 | Time elapsed:  1.136 0.018 1.158 0 0 
88 | > 
89 | > proc.time()
90 |    user  system elapsed 
91 |   1.765   0.129   1.935 
92 | 


--------------------------------------------------------------------------------
/stuff/jeremy-unimodality-olives.R:
--------------------------------------------------------------------------------
  1 | ##-*- mode: R; kept-new-versions: 21; kept-old-versions: 12; -*-
  2 | 
  3 | #### Originally, from
  4 | ####   http://www.stat.washington.edu/wxs/Stat593-s03/Code/jeremy-unimodality.R
  5 | ####
  6 | ####-----------------------------------------------------------------------------------
  7 | ## Diagnostic plots for clustering and the DIP test for unimodality
  8 | ## Code written by Jeremy Tantrum, Winter 2003
  9 | ##=================================================================
 10 | 
 11 | #### --> see ./jeremy-unimodality.R
 12 | 
 13 | ## An example of it working: Olive oil data - region 2 - are areas 5 and 6
 14 | ## different.
 15 | 
 16 | ## -- MM:  Aargh, 'olives' *differs* from R package to R package
 17 | data(olives, package="classifly")
 18 | str(olives.CF <- olives)
 19 | data(olives, package="TWIX")
 20 | str(olives.TW <- olives)
 21 | if(FALSE)## These are completely different:
 22 |     data(oliveoil, package="pls")
 23 | 
 24 | 
 25 | ## First, the meaning of  'Area' and 'Region' is swapped between the two:
 26 | with(olives.TW, table(Region, Area))
 27 | ##                  Area
 28 | ## Region            North Sardinia South
 29 | ##   Calabria            0        0    56
 30 | ##   Coast Sardinia      0       33     0
 31 | ##   East Liguria       50        0     0
 32 | ##   Inland Sardinia     0       65     0
 33 | ##   North Apulia        0        0    25
 34 | ##   Sicily              0        0    36
 35 | ##   South Apulia        0        0   206
 36 | ##   Umbria             51        0     0
 37 | ##   West Liguria       50        0     0
 38 | 
 39 | with(olives.CF, table(Area, Region))
 40 | ##                  Region
 41 | ## Area                1   2   3
 42 | ##   Calabria         56   0   0
 43 | ##   Coast-Sardinia    0  33   0
 44 | ##   East-Liguria      0   0  50
 45 | ##   Inland-Sardinia   0  65   0
 46 | ##   North-Apulia     25   0   0
 47 | ##   Sicily           36   0   0
 48 | ##   South-Apulia    206   0   0
 49 | ##   Umbria            0   0  51
 50 | ##   West-Liguria      0   0  50
 51 | 
 52 | 
 53 | ## Not at all the same... aargh
 54 | { op <- par(ask=TRUE)
 55 |   for(n in names(oo)) { plot(oo[,n], olives[,n], main=n) }
 56 |   par(op) }
 57 | 
 58 | ## But looking at subsets
 59 | subset(olives.TW, Region == "Coast Sardinia")[, 1:8]
 60 | subset(olives.CF, Area   == "Coast-Sardinia")[, -(1:2)]
 61 | 
 62 | ### See ./Stuetzle-stat593-S2003-olive.doc :
 63 | olives.WS <-
 64 |    read.table("/u/maechler/R/Pkgs/diptest/stuff/Stuetzle-stat593-S2003-olive.tab",
 65 |               header=TRUE)
 66 | 
 67 | ## together with code in ./jeremy-unimodality.R :
 68 | all.equal(unname(olives.CF[,-2]), unname(olives.WS[,-2])) ## TRUE
 69 | ## but clearly, also the last three *Variable* names are confused...  [aargh]
 70 | 
 71 | ## How can we *match* the two sets?  Look at the [,2] variables
 72 | with(olives.CF, table(Area,  Region))
 73 | with(olives.WS, table(Class, Region))
 74 | 
 75 | ## -> manually :
 76 | cl2area <- c(5,1,7,6, 4,2, 3,9,8)
 77 | 
 78 | ## but can do this programmatically:
 79 | (areas <- levels(olives.CF[,2]))
 80 | c2a <- unique(cbind(olives.CF[,2], # $ Area
 81 |                     olives.WS[,2]))# $ Class
 82 | (c2a <- c2a[order(c2a[,2]) , ])
 83 | c2a <- c2a[,1]
 84 | stopifnot(all(c2a == cl2area),
 85 |           all(olives.CF[,2] == areas[c2a[olives.WS[,2]]]))
 86 | 
 87 | ## Now to work with Jeremy's code, assuming he used Stuetzle's version of the data:
 88 | olive.area   <- olives.WS $ Class
 89 | olive.region <- olives.WS $ Region
 90 | i.rest <- !(names(olives.WS) %in% c("Class", "Region"))
 91 | olive        <- data.matrix(olives.WS[, i.rest])
 92 | 
 93 | ##
 94 | x.labs <- olive.area[olive.region==2]
 95 | library(MASS)
 96 | g <- lda(olive[olive.region==2,], x.labs)
 97 | g.proj <- unclass(g)$scaling
 98 | ##
 99 | x <- olive[olive.region==2,] %*% g.proj
100 | plot.ucdf(x)
101 | plot.silverman(x)
102 | str(x.dip <- calcdip(x, plot.it=FALSE)) #-> . $dip = 0.149
103 | dips <- rep(0,100)
104 | for(i in 1:100) {
105 |   x.boot <- unisample(x.dip$unicurve,length(x))
106 |   dips[i] <- calcdip(x.boot, plot.it=FALSE)$dip
107 | }
108 | (p.value <- sum(dips>x.dip$dip)/100) # 0  ( < 0.01 )
109 | # < 0.01
110 | ##
111 | 


--------------------------------------------------------------------------------
/inst/NEWS.Rd:
--------------------------------------------------------------------------------
  1 | % Check from R:
  2 | %  news(db = tools:::.build_news_db_from_package_NEWS_Rd("~/R/Pkgs/diptest/inst/NEWS.Rd"))
  3 | \name{NEWS}
  4 | \title{News for \R Package \pkg{diptest}}
  5 | \encoding{UTF-8}
  6 | 
  7 | \section{CHANGES in diptest VERSION 0.77-1 (2024-03-31)}{
  8 |   \subsection{BUG FIXES}{
  9 |     \itemize{
 10 |       \item \code{diptest(x72000, *)} for \code{length(x) == 72000} now
 11 |       works correctly (with a \code{warning} about being asymptotic),
 12 |       thanks to William Davis, UC Berkeley.
 13 |     }
 14 |   }
 15 | }
 16 | 
 17 | \section{CHANGES in diptest VERSION 0.77-0 (2023-11-27)}{
 18 |   \subsection{BUG FIXES}{
 19 |     \itemize{
 20 |       \item C level format: s/\%ld/\%d/
 21 |     }
 22 |   }
 23 | }
 24 | 
 25 | \section{CHANGES in diptest VERSION 0.76-0 (2021-03-23)}{
 26 | 
 27 |   \subsection{NEW FEATURES}{
 28 |     \itemize{
 29 |       \item add \file{README.Rd} mostly for github readers
 30 |     }
 31 |   }
 32 |   \subsection{BUG FIXES}{
 33 |     \itemize{
 34 |       \item Added Imports (to "base" packages where they were not checked previously) to \file{NAMESPACE}.
 35 |       \item Using \file{NEWS.Rd} file more.
 36 |     }
 37 |   }
 38 | }
 39 | 
 40 | \section{CHANGES in diptest VERSION 0.75-7 (2015-06-07)}{% CRAN release
 41 | 
 42 |   \subsection{NEW FEATURES}{
 43 |     \itemize{
 44 |       \item Started this \file{NEWS.Rd} file, to eventually replace the \file{ChangeLog}
 45 |     }
 46 |   }
 47 |   \subsection{BUG FIXES}{
 48 |     \itemize{
 49 |       \item .
 50 |     }
 51 |   }
 52 | }
 53 | 
 54 | \section{CHANGES in diptest VERSION 0.75-6 (2014-11-25)}{
 55 |   \subsection{NEW FEATURES}{
 56 |     \itemize{
 57 |       \item .
 58 |     }
 59 |   }
 60 |   \subsection{BUG FIXES}{
 61 |     \itemize{
 62 |       \item .
 63 |     }
 64 |   }
 65 | }
 66 | 
 67 | \section{CHANGES in diptest VERSION 0.75-5 (2013-07-23)}{
 68 |   \subsection{NEW FEATURES}{
 69 |     \itemize{
 70 |       \item add \file{NEWS.Rd} (albeit mostly empty)
 71 |     }
 72 |   }
 73 |   \subsection{BUG FIXES}{
 74 |     \itemize{
 75 |       \item \code{rdRDS()} wrapper corrrectly tests for R 2.13.0
 76 |     }
 77 |   }
 78 | }
 79 | 
 80 | 
 81 | %% this is the *latest* entry  in  ../ChangeLog
 82 | \section{CHANGES in diptest VERSION 0.75-4 (2012-08-13)}{
 83 |   \subsection{NEW FEATURES}{
 84 |     \itemize{
 85 |       \item Enable package for pre-R-2.13.x via \code{rdRDS()} wrapper
 86 | 
 87 |       \item \code{dip.test()} now also returns an \code{alternative} component,
 88 |       e.g. for printing.
 89 |     }
 90 |   }
 91 | }
 92 | 
 93 | \section{CHANGES in diptest VERSION 0.75-3 (2012-04-18)}{
 94 |   \subsection{NEW FEATURES}{
 95 |     \itemize{
 96 |       \item .
 97 |     }
 98 |   }
 99 |   \subsection{BUG FIXES}{
100 |     \itemize{
101 |       \item .
102 |     }
103 |   }
104 | }
105 | 
106 | 
107 | \section{CHANGES in diptest VERSION 0.75-1 (2011-08-10)}{
108 |   \subsection{NEW FEATURES}{
109 |     \itemize{
110 |       \item .
111 |     }
112 |   }
113 |   \subsection{BUG FIXES}{
114 |     \itemize{
115 |       \item .
116 |     }
117 |   }
118 | }
119 | 
120 | 
121 | \section{CHANGES in diptest VERSION 0.25-3 (2010-08-11)}{
122 |   \subsection{NEW FEATURES}{
123 |     \itemize{
124 |       \item First version of the \dQuote{"diptest issues"} vignette.
125 |     }
126 |   }
127 |   \subsection{BUG FIXES}{
128 |     \itemize{
129 |       \item .
130 |     }
131 |   }
132 | }
133 | 
134 | 
135 | \section{CHANGES in diptest VERSION 0.25-2 (2009-02-09)}{
136 |   \subsection{NEW FEATURES}{
137 |     \itemize{
138 |       \item .
139 |     }
140 |   }
141 |   \subsection{BUG FIXES}{
142 |     \itemize{
143 |       \item .
144 |     }
145 |   }
146 | }
147 | 
148 | 
149 | \section{CHANGES in diptest VERSION 0.25-1 (2004-08-12)}{
150 |   \subsection{NEW FEATURES}{
151 |     \itemize{
152 |       \item .
153 |     }
154 |   }
155 |   \subsection{BUG FIXES}{
156 |     \itemize{
157 |       \item .
158 |     }
159 |   }
160 | }
161 | 
162 | 
163 | \section{CHANGES in diptest VERSION 0.25-0 (2004-02-13)}{
164 |   \subsection{NEW FEATURES}{
165 |     \itemize{
166 |       \item More output in the \R object, allows \code{debug} information.
167 |     }
168 |   }
169 |   \subsection{BUG FIXES}{
170 |     \itemize{
171 |       \item Previously, the \code{dip()} had not been \dQuote{symmetric}
172 |       with respect to \dQuote{mirroring} such as a sign flip in the
173 |       data.  Thanks to Yong Lu, who was able to track the bug to a
174 |       misplaced \code{(} in the original Fortran code, the bug has been
175 |       fixed on Oct.10, 2003.
176 |     }
177 |   }
178 | }
179 | 
180 | \section{CHANGES in diptest VERSION 0.9-1 (2003-07-15)}{
181 |   \subsection{NEW FEATURES}{
182 |     \itemize{
183 |       \item First(?) CRAN release
184 |     }
185 |   }
186 |   \subsection{BUG FIXES}{
187 |     \itemize{
188 |       \item on 1994-07-30, added code to prevent an infinite loop in
189 |       rare cases, but e.g., for \code{dip(1:n)}.
190 |     }
191 |   }
192 | }
193 | 
194 | 


--------------------------------------------------------------------------------
/tests/mechler-ex.Rout.save-32b:
--------------------------------------------------------------------------------
  1 | 
  2 | R version 2.13.1 Patched (2011-08-09 r56694)
  3 | Copyright (C) 2011 The R Foundation for Statistical Computing
  4 | ISBN 3-900051-07-0
  5 | Platform: i686-pc-linux-gnu (32-bit)
  6 | 
  7 | R is free software and comes with ABSOLUTELY NO WARRANTY.
  8 | You are welcome to redistribute it under certain conditions.
  9 | Type 'license()' or 'licence()' for distribution details.
 10 | 
 11 | R is a collaborative project with many contributors.
 12 | Type 'contributors()' for more information and
 13 | 'citation()' on how to cite R or R packages in publications.
 14 | 
 15 | Type 'demo()' for some demos, 'help()' for on-line help, or
 16 | 'help.start()' for an HTML browser interface to help.
 17 | Type 'q()' to quit R.
 18 | 
 19 | > library(diptest)
 20 | > ## These are from
 21 | > ## the 217-readme.doc file that explains the bug fixed by
 22 | > ## Ferenc Mechler (fmechler@med.cornell.edu). [5/Sep/2002]
 23 | > ##
 24 | > ex1 <- c(0.0198, 0.0198, 0.1961, 0.2898, 0.3184, 0.3687,
 25 | +          0.4336, 0.4987, 0.5661, 0.6530, 0.7476, 0.8555)
 26 | > 
 27 | > ex2 <- c(0.0198, 0.1961, 0.2898, 0.3184, 0.3687, 0.4336,
 28 | +          0.4987, 0.5661, 0.6530, 0.7476, 0.8555, 0.9912)
 29 | > 
 30 | > ## Multiply them by 10000 here:
 31 | > 
 32 | > (D1 <- dip(10000*ex1, full=TRUE, debug=2))
 33 | dip() in C: n = 12; starting with  2N*dip = 1.
 34 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1, 12]; l_lcm/gcm = ( 6, 3)
 35 |   while(gcm[ix] != lcm[iv]) :
 36 |  L(3,2) --> ix = 2, iv = 3
 37 |  G(2,3) --> ix = 1, iv = 3
 38 |  --> ix = 1, iv = 4
 39 |  --> ix = 1, iv = 5
 40 |  --> ix = 1, iv = 6
 41 |   calculating dip .. (dip_l, dip_u) = (2, 1) -> new larger dip 2 (j_best = 2)
 42 | 'dip': LOOP-BEGIN: 2n*D= 2         [low,high] = [  4,  9]; l_lcm/gcm = ( 6, 2)
 43 |   while(gcm[ix] != lcm[iv]) :
 44 |  L(2,2) --> ix = 1, iv = 3
 45 |  L(2,3) --> ix = 1, iv = 4
 46 |  --> ix = 1, iv = 5
 47 |  --> ix = 1, iv = 6
 48 | 
 49 | Call:
 50 | dip(x = 10000 * ex1, full.result = TRUE, debug = 2)
 51 | 
 52 | n = 12.  Dip statistic, D_n = 0.08333333 = 2/(2n)
 53 |  Modal interval [xL, xU] = [x[4], x[9]] = [2898, 5661]
 54 |  GCM and LCM have 2 and 6 nodes inside [xL, xU], respectively.
 55 | > str(D1, digits = 10, vec.len = 12)
 56 | List of 15
 57 |  $ call       : language dip(x = 10000 * ex1, full.result = TRUE, debug = 2)
 58 |  $ x          : num [1:12] 198 198 1961 2898 3184 3687 4336 4987 5661 6530 7476 8555
 59 |  $ n          : int 12
 60 |  $ dip        : num 0.08333333333
 61 |  $ lo.hi      : int [1:2] 4 9
 62 |  $ ifault     : int 0
 63 |  $ gcm        : int [1:2] 9 4
 64 |  $ lcm        : int [1:6] 4 5 6 7 8 9
 65 |  $ mn         : int [1:12] 1 1 1 1 4 4 4 4 4 4 4 4
 66 |  $ mj         : int [1:12] 2 9 6 5 6 7 8 9 10 11 12 12
 67 |  $ min.is.0   : logi FALSE
 68 |  $ debug      : int 2
 69 |  $ xl         : num 2898
 70 |  $ xu         : num 5661
 71 |  $ full.result: logi TRUE
 72 |  - attr(*, "class")= chr "dip"
 73 | > 
 74 | > (D2 <- dip(10000*ex2, full=TRUE, debug=2))
 75 | dip() in C: n = 12; starting with  2N*dip = 1.
 76 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1, 12]; l_lcm/gcm = ( 6, 4)
 77 |   while(gcm[ix] != lcm[iv]) :
 78 |  G(3,2) --> ix = 2, iv = 2
 79 |  G(2,2) --> ix = 1, iv = 2
 80 |  --> ix = 1, iv = 3
 81 |  --> ix = 1, iv = 4
 82 |  --> ix = 1, iv = 5
 83 |  --> ix = 1, iv = 6
 84 |   calculating dip .. (dip_l, dip_u) = (1, 1)
 85 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  3,  8]; l_lcm/gcm = ( 6, 2)
 86 |   while(gcm[ix] != lcm[iv]) :
 87 |  L(2,2) --> ix = 1, iv = 3
 88 |  L(2,3) --> ix = 1, iv = 4
 89 |  --> ix = 1, iv = 5
 90 |  --> ix = 1, iv = 6
 91 |   calculating dip .. (dip_l, dip_u) = (0, 1)
 92 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  3,  5]; l_lcm/gcm = ( 3, 2)
 93 |   while(gcm[ix] != lcm[iv]) :
 94 |  L(2,2) --> ix = 1, iv = 3
 95 |   calculating dip .. (dip_l, dip_u) = (0, 1)
 96 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  3,  4]; l_lcm/gcm = ( 2, 2)
 97 |   ** (l_lcm,l_gcm) = (2,2) ==> d := -0
 98 |   calculating dip .. (dip_l, dip_u) = (0, 0)
 99 | No improvement in  low = 3  nor  high = 4 --> END
100 | 
101 | Call:
102 | dip(x = 10000 * ex2, full.result = TRUE, debug = 2)
103 | 
104 | n = 12.  Dip statistic, D_n = 0.04166667 = 1/(2n)
105 |  Modal interval [xL, xU] = [x[3], x[4]] = [2898, 3184]
106 |  GCM and LCM have 2 and 2 nodes inside [xL, xU], respectively.
107 | > str(D2, digits = 10, vec.len = 12)
108 | List of 15
109 |  $ call       : language dip(x = 10000 * ex2, full.result = TRUE, debug = 2)
110 |  $ x          : num [1:12] 198 1961 2898 3184 3687 4336 4987 5661 6530 7476 8555 9912
111 |  $ n          : int 12
112 |  $ dip        : num 0.04166666667
113 |  $ lo.hi      : int [1:2] 3 4
114 |  $ ifault     : int 0
115 |  $ gcm        : int [1:2] 4 3
116 |  $ lcm        : int [1:2] 3 4
117 |  $ mn         : int [1:12] 1 1 2 3 3 3 3 3 3 3 3 3
118 |  $ mj         : int [1:12] 8 5 4 5 6 7 8 9 10 11 12 12
119 |  $ min.is.0   : logi FALSE
120 |  $ debug      : int 2
121 |  $ xl         : num 2898
122 |  $ xu         : num 3184
123 |  $ full.result: logi TRUE
124 |  - attr(*, "class")= chr "dip"
125 | > 
126 | 


--------------------------------------------------------------------------------
/tests/mechler-ex.Rout.save-64b:
--------------------------------------------------------------------------------
  1 | 
  2 | R version 2.13.1 Patched (2011-08-09 r56694)
  3 | Copyright (C) 2011 The R Foundation for Statistical Computing
  4 | ISBN 3-900051-07-0
  5 | Platform: x86_64-unknown-linux-gnu (64-bit)
  6 | 
  7 | R is free software and comes with ABSOLUTELY NO WARRANTY.
  8 | You are welcome to redistribute it under certain conditions.
  9 | Type 'license()' or 'licence()' for distribution details.
 10 | 
 11 | R is a collaborative project with many contributors.
 12 | Type 'contributors()' for more information and
 13 | 'citation()' on how to cite R or R packages in publications.
 14 | 
 15 | Type 'demo()' for some demos, 'help()' for on-line help, or
 16 | 'help.start()' for an HTML browser interface to help.
 17 | Type 'q()' to quit R.
 18 | 
 19 | > library(diptest)
 20 | > ## These are from
 21 | > ## the 217-readme.doc file that explains the bug fixed by
 22 | > ## Ferenc Mechler (fmechler@med.cornell.edu). [5/Sep/2002]
 23 | > ##
 24 | > ex1 <- c(0.0198, 0.0198, 0.1961, 0.2898, 0.3184, 0.3687,
 25 | +          0.4336, 0.4987, 0.5661, 0.6530, 0.7476, 0.8555)
 26 | > 
 27 | > ex2 <- c(0.0198, 0.1961, 0.2898, 0.3184, 0.3687, 0.4336,
 28 | +          0.4987, 0.5661, 0.6530, 0.7476, 0.8555, 0.9912)
 29 | > 
 30 | > ## Multiply them by 10000 here:
 31 | > 
 32 | > (D1 <- dip(10000*ex1, full=TRUE, debug=2))
 33 | dip() in C: n = 12; starting with  2N*dip = 1.
 34 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1, 12]; l_lcm/gcm = ( 6, 3)
 35 |   while(gcm[ix] != lcm[iv]) :
 36 |  L(3,2) --> ix = 2, iv = 3
 37 |  G(2,3) --> ix = 1, iv = 3
 38 |  --> ix = 1, iv = 4
 39 |  --> ix = 1, iv = 5
 40 |  --> ix = 1, iv = 6
 41 |   calculating dip .. (dip_l, dip_u) = (2, 1) -> new larger dip 2 (j_best = 2)
 42 | 'dip': LOOP-BEGIN: 2n*D= 2         [low,high] = [  4,  9]; l_lcm/gcm = ( 6, 2)
 43 |   while(gcm[ix] != lcm[iv]) :
 44 |  L(2,2) --> ix = 1, iv = 3
 45 |  L(2,3) --> ix = 1, iv = 4
 46 |  --> ix = 1, iv = 5
 47 |  --> ix = 1, iv = 6
 48 | 
 49 | Call:
 50 | dip(x = 10000 * ex1, full.result = TRUE, debug = 2)
 51 | 
 52 | n = 12.  Dip statistic, D_n = 0.08333333 = 2/(2n)
 53 |  Modal interval [xL, xU] = [x[4], x[9]] = [2898, 5661]
 54 |  GCM and LCM have 2 and 6 nodes inside [xL, xU], respectively.
 55 | > str(D1, digits = 10, vec.len = 12)
 56 | List of 15
 57 |  $ call       : language dip(x = 10000 * ex1, full.result = TRUE, debug = 2)
 58 |  $ x          : num [1:12] 198 198 1961 2898 3184 3687 4336 4987 5661 6530 7476 8555
 59 |  $ n          : int 12
 60 |  $ dip        : num 0.08333333333
 61 |  $ lo.hi      : int [1:2] 4 9
 62 |  $ ifault     : int 0
 63 |  $ gcm        : int [1:2] 9 4
 64 |  $ lcm        : int [1:6] 4 5 6 7 8 9
 65 |  $ mn         : int [1:12] 1 1 1 1 4 4 4 4 4 4 4 4
 66 |  $ mj         : int [1:12] 2 9 6 5 6 7 8 9 10 11 12 12
 67 |  $ min.is.0   : logi FALSE
 68 |  $ debug      : int 2
 69 |  $ xl         : num 2898
 70 |  $ xu         : num 5661
 71 |  $ full.result: logi TRUE
 72 |  - attr(*, "class")= chr "dip"
 73 | > 
 74 | > (D2 <- dip(10000*ex2, full=TRUE, debug=2))
 75 | dip() in C: n = 12; starting with  2N*dip = 1.
 76 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1, 12]; l_lcm/gcm = ( 6, 4)
 77 |   while(gcm[ix] != lcm[iv]) :
 78 |  G(3,2) --> ix = 2, iv = 2
 79 |  G(2,2) --> ix = 1, iv = 2
 80 |  --> ix = 1, iv = 3
 81 |  --> ix = 1, iv = 4
 82 |  --> ix = 1, iv = 5
 83 |  --> ix = 1, iv = 6
 84 |   calculating dip .. (dip_l, dip_u) = (1, 1)
 85 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  3,  8]; l_lcm/gcm = ( 6, 2)
 86 |   while(gcm[ix] != lcm[iv]) :
 87 |  L(2,2) --> ix = 1, iv = 3
 88 |  L(2,3) --> ix = 1, iv = 4
 89 |  --> ix = 1, iv = 5
 90 |  --> ix = 1, iv = 6
 91 |   calculating dip .. (dip_l, dip_u) = (0, 1)
 92 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  3,  5]; l_lcm/gcm = ( 3, 2)
 93 |   while(gcm[ix] != lcm[iv]) :
 94 |  L(2,2) --> ix = 1, iv = 3
 95 |   calculating dip .. (dip_l, dip_u) = (0, 1)
 96 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  3,  4]; l_lcm/gcm = ( 2, 2)
 97 |   ** (l_lcm,l_gcm) = (2,2) ==> d := 1
 98 |   calculating dip .. (dip_l, dip_u) = (0, 0)
 99 | No improvement in  low = 3  nor  high = 4 --> END
100 | 
101 | Call:
102 | dip(x = 10000 * ex2, full.result = TRUE, debug = 2)
103 | 
104 | n = 12.  Dip statistic, D_n = 0.04166667 = 1/(2n)
105 |  Modal interval [xL, xU] = [x[3], x[4]] = [2898, 3184]
106 |  GCM and LCM have 2 and 2 nodes inside [xL, xU], respectively.
107 | > str(D2, digits = 10, vec.len = 12)
108 | List of 15
109 |  $ call       : language dip(x = 10000 * ex2, full.result = TRUE, debug = 2)
110 |  $ x          : num [1:12] 198 1961 2898 3184 3687 4336 4987 5661 6530 7476 8555 9912
111 |  $ n          : int 12
112 |  $ dip        : num 0.04166666667
113 |  $ lo.hi      : int [1:2] 3 4
114 |  $ ifault     : int 0
115 |  $ gcm        : int [1:2] 4 3
116 |  $ lcm        : int [1:2] 3 4
117 |  $ mn         : int [1:12] 1 1 2 3 3 3 3 3 3 3 3 3
118 |  $ mj         : int [1:12] 8 5 4 5 6 7 8 9 10 11 12 12
119 |  $ min.is.0   : logi FALSE
120 |  $ debug      : int 2
121 |  $ xl         : num 2898
122 |  $ xu         : num 3184
123 |  $ full.result: logi TRUE
124 |  - attr(*, "class")= chr "dip"
125 | > 
126 | 


--------------------------------------------------------------------------------
/R/dip.R:
--------------------------------------------------------------------------------
  1 | ### S-interface to Hartigan's algorithm for "The dip test for unimodality"
  2 | ###
  3 | ### Beginning:	Dario Ringach <dario@wotan.cns.nyu.edu>
  4 | ### Rest:	Martin Maechler <maechler@stat.math.ethz.ch>
  5 | 
  6 | dip <- function(x, full.result = FALSE, min.is.0 = FALSE, debug = FALSE)
  7 | {
  8 |     allRes <- (!is.logical(rFull <- full.result))
  9 |     if(allRes) {
 10 | 	if(full.result %in% c("all"))
 11 | 	    rFull <- TRUE
 12 | 	else stop(gettextf("'full.result' = \"%s\"' is not valid", full.result))
 13 |     }
 14 |     if(rFull) cl <- match.call()
 15 | 
 16 |     if(is.unsorted(x))
 17 | 	x <- sort(x, method="quick")
 18 |     n <- as.integer(length(x))
 19 |     r <- .C(diptst,
 20 | 	    x	= as.double(x),
 21 | 	    n	= n,
 22 | 	    dip = double(1),
 23 | 	    lo.hi = integer(4),
 24 | 	    ifault= integer(1),
 25 | 	    gcm =   integer(n),
 26 | 	    lcm =   integer(n),
 27 | 	    mn	=   integer(n),
 28 | 	    mj	=   integer(n),
 29 | 	    min.is.0 = as.logical(min.is.0),
 30 | 	    debug = as.integer(debug)# FALSE/TRUE or 2, 3, ...
 31 |             )[if(rFull) TRUE else "dip"]
 32 |     if(rFull) {
 33 | 	l.GL <- r$lo.hi[3:4]
 34 | 	length(r$gcm) <- l.GL[1]
 35 | 	length(r$lcm) <- l.GL[2]
 36 | 	length(r$lo.hi) <- 2L
 37 | 	u <- x[r$lo.hi]
 38 | 	structure(class = "dip",
 39 | 		  c(list(call = cl), r,
 40 | 		    list(xl = u[1], xu = u[2], full.result=full.result),
 41 | 		    if(allRes) getCM(r$mn, r$mj, n)))
 42 |     }
 43 |     else r[[1]]
 44 | }
 45 | 
 46 | getCM <- function(mn, mj, n = length(mn)) {
 47 |     stopifnot(length(mn) <= n, length(mj) <= n) # currently '=='...
 48 |     ## First recover "the full GCM / LCM" - by repeating what happened in C
 49 |     ## in the first "loop" :
 50 |     low <- 1L ; high <- n
 51 |     gcm <- lcm <- integer(n) # pre-allocate!  {maybe smaller ?}
 52 | 
 53 |     ## Collect the change points for the GCM from HIGH to LOW. */
 54 |     gcm[i <- 1L] <- high
 55 |     while(gcm[i] > low)
 56 | 	gcm[(i <- i+1L)] <- mn[gcm[i]]
 57 |     length(gcm) <- i
 58 | 
 59 |     ## Collect the change points for the LCM from LOW to HIGH. */
 60 |     lcm[i <- 1L] <- low
 61 |     while(lcm[i] < high)
 62 | 	lcm[(i <- i+1L)] <- mj[lcm[i]]
 63 |     length(lcm) <- i
 64 |     list(GCM = gcm, LCM = lcm)
 65 | }
 66 | 
 67 | print.dip <- function(x, digits = getOption("digits"), ...)
 68 | {
 69 |     stopifnot(is.integer(n <- x$n), is.numeric(D <- x$dip),
 70 | 	      length(lh <- x$lo.hi) == 2)
 71 |     cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"),
 72 | 	"\n\n", sep = "")
 73 |     xLU.c <- sapply(x$x[lh], formatC, digits=digits, width=1)
 74 |     cat("n = ", n,".  Dip statistic, D_n = ",
 75 | 	format(D, digits=digits)," = ",
 76 | 	format(2*n* D, digits=digits),"/(2n)\n",
 77 | 	sprintf(" Modal interval [xL, xU] = [x[%d], x[%d]] = [%s, %s]\n",
 78 | 		lh[1], lh[2], xLU.c[1], xLU.c[2]),
 79 | 	sprintf(" GCM and LCM have %d and %d nodes inside [xL, xU], respectively",
 80 |                 ## 3 5 7 9 1 3 5 7
 81 | 		length(x$gcm), length(x$lcm)),
 82 | 	if(x$full.result == "all")
 83 | 	sprintf(", and\n%17s %d and %d nodes in   [x_1, x_n].\n", "",
 84 | 		length(x$GCM), length(x$LCM)) else ".\n",
 85 | 	sep="")
 86 |     invisible(x)
 87 | }
 88 | 
 89 | aLine <- function(r.dip, lType = c("gcm","lcm","GCM","LCM"),
 90 | 		  type = "b", col="red3", lwd=1.5, ...)
 91 | {
 92 |     lType <- match.arg(lType)
 93 |     stopifnot(is.numeric(x <- r.dip$x),
 94 |               length(r.dip$n) == 1, r.dip$n == round(r.dip$n),
 95 | 	      is.integer(i <- r.dip[[lType]]) # 'gcm' or 'lcm' or component
 96 | 	      )
 97 |     e <- if(lType %in% c("gcm","GCM")) .01*min(diff(unique(x))) else 0
 98 |     i <- i[i != 0]
 99 |     lines(x[i], ecdf(x)(x[i] - e),
100 | 	  type=type, col=col, lwd=lwd, ...)
101 | }
102 | 
103 | plot.dip <- function(x, do.points=(n < 20), ## <- plot.stepfun()
104 | 		     colG="red3", colL="blue3", colM="forest green",
105 | 		     col.points=par("col"), col.hor=col.points, ## <- plot.stepfun():
106 | 		     doModal=TRUE, doLegend=TRUE, ...)
107 | {
108 |     stopifnot(is.integer(n <- x$n), is.numeric(D <- x$dip),
109 | 	      length(lh <- x$lo.hi) == 2)
110 |     Fn <- ecdf(x$x)
111 |     ## and now manipulate the call such that it's plotted nicely
112 |     cl <- x$call[1:2]
113 |     cl[[1]] <- as.name("ecdf") ; names(cl)[2] <- ""
114 |     attr(Fn, "call") <- cl
115 |     chD <- formatC(D, digits=pmax(3, getOption("digits")-2))
116 |     tit <- bquote("Dip" ~~ {D[n] == D[.(n)]} == .(chD))
117 |     plot(Fn, do.points=do.points, col.points=col.points, col.hor=col.hor,
118 | 	 verticals=TRUE, col.vert = "sky blue", lwd=2, ...)
119 |     title(tit, adj = 0, line = 1.25)
120 |     aLine(x, "gcm", col=colG)
121 |     aLine(x, "lcm", col=colL)
122 |     if(doCM.2 <- (x$full.result == "all")) {
123 |         aLine(x, "GCM", col=colG, lty=5)
124 |         aLine(x, "LCM", col=colL, lty=5)
125 |     }
126 |     if(doModal) {
127 | 	x12 <- x$x[lh]
128 | 	abline(v= x12, col = colM, lty = 2)
129 | 	op <- par(mgp = c(3, 1/16, 0))# should not need on.exit(par(op)) here ..
130 | 	axis(3, at=x12, labels = expression(x[L], x[U]),
131 | 	     tick=FALSE, col.axis = colM)
132 | 	par(op)
133 |     }
134 |     if(doLegend) {
135 | 	txt <- c("greatest convex minorant GCM",
136 | ### make sure have *no* [TAB] in next string !
137 | 		 "least     concave majorant LCM")
138 | 	t1 <- paste(txt," in [xL, xU]")
139 | 	legend("topleft", bty = "n",
140 | 	       if(doCM.2) c(t1, txt) else t1,
141 | 	       lwd=1.5, col = c(colG, colL), lty= if(doCM.2) c(1,1,5,5) else 1)
142 |     }
143 |     invisible()
144 | }
145 | 


--------------------------------------------------------------------------------
/stuff/pre-bugfix-2003-11/dip.S:
--------------------------------------------------------------------------------
  1 | ### S-interface to Hartigan's algorithm for "The dip test for unimodality"
  2 | ###
  3 | ### Beginning:	Dario Ringach <dario@wotan.cns.nyu.edu>
  4 | ### Rest:	Martin Maechler <maechler@stat.math.ethz.ch>
  5 | 
  6 | ###--- 1st page of this file should be  source(.)able !! -----------
  7 | 
  8 | if(!interactive()) {
  9 |   options(echo = T) #-- when being run as batch [FAILS for source() !!]
 10 |   print(date())
 11 | }
 12 | 
 13 | 
 14 | DIP.dir <- getenv("PWD")
 15 | 
 16 | hartigan.data <-
 17 |   c(30,33,35,36,37,37,39,39,39,39,39,40,40,40,40,41,42,43,43,43,44,44,45,45,46,
 18 |     46,47,47,48,48,48,49,50,50,51,52,52,53,53,53,53,53,54,54,57,57,59,60,60,60,
 19 |     61,61,61,61,62,62,62,62,63,66,70,72,72)
 20 | 
 21 | dip <- function(x, full.result = FALSE, debug = F)
 22 | {
 23 |   ## Purpose: Compute the "Dip test for unimodality" (statistic)
 24 |   ## -------------------------------------------------------------------------
 25 |   ## Arguments: x: the Data, full.result: return also 'xl', 'xu' (modal interv.)
 26 |   ## -------------------------------------------------------------------------
 27 |   ## Author: Martin Maechler <maechler@stat.math.ethz.ch>, Jul 94
 28 |   ##      from 1st version of   Dario Ringach <dario@wotan.cns.nyu.edu>
 29 |   
 30 |   if(!is.loaded(symbol.C("diptst"))) 
 31 |     dyn.load(paste(DIP.dir,"dip.o", sep="/"))
 32 | 
 33 |   n <- length(x)
 34 | 
 35 |   ii <- if(full.result) 1:11 else  "dip"
 36 |   simplify <- function(lis) if(length(lis)==1) as.single(lis[[1]]) else lis
 37 |   simplify(.C("diptst",
 38 | 	      x   = as.single(sort(x)),
 39 | 	      n   = as.integer(n),
 40 | 	      dip = single(1),
 41 | 	      xl  = single(1),
 42 | 	      xu  = single(1),
 43 | 	      ifault= integer(1),
 44 | 	      gcm =   integer(n),
 45 | 	      lcm =   integer(n),
 46 | 	      mn  =   integer(n),
 47 | 	      mj  =   integer(n),
 48 | 	     debug= as.integer(debug))[ii])
 49 |   ##-   if(z$ifault!=0)  #-- something not ok, but this is IMPOSSIBLE here
 50 |   ##-     stop(paste("Problem -- C 'message' : ifault = ", z$ifault))
 51 | }
 52 | 
 53 | dip(hstart)## 0.03998769
 54 | dip(ship)  ## 0.02328392
 55 | dip(iris)  ## 0.02874999
 56 | dip(hartigan.data) ## Should give   0.05952381
 57 | str(dip.hh <- dip(hartigan.data, full = T))
 58 | 
 59 | ## NOTA BENE:  dip(1:10) gives INFINITE LOOP (error in Hartigan's Algorithm!)
 60 | 
 61 | dip.1.10_rep(0,100)
 62 | unix.time(for(i in 1:100) dip.1.10[i] <- dip(jitter(1:10)))
 63 | ##--> 2.3 CPU sec. on Sparstation 10 (ingrid)
 64 | summary(dip.1.10)
 65 | ##-  Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 66 | ##-  0.05 0.05757 0.06016 0.05979 0.06237 0.06686
 67 | ##-  0.05   0.058 0.06027 0.06011 0.06231 0.06696
 68 | ##-- ~~~~ EXAKT (9 times out of 200)
 69 | 
 70 | 
 71 | 
 72 | cdf <- function(x, X=x)
 73 | {
 74 |   ## Purpose: Empirical distribution function F_{X}(x)
 75 |   ## -------------------------------------------------------------------------
 76 |   ## Arguments: x: vector of ARGuments;  X : data
 77 |   ## -------------------------------------------------------------------------
 78 |   ## Author: Martin Maechler, Date: 28 Jul 94, 18:03
 79 |   apply(outer(X,x,"<="),2,sum) / length(X)
 80 | }
 81 | 
 82 | p.cdf <- function(X, type='b',...)
 83 | {
 84 |   ## Purpose:  PLOT empirical distribution function
 85 |   ## -------------------------------------------------------------------------
 86 |   ## Arguments:
 87 |   ## -------------------------------------------------------------------------
 88 |   ## Author: Martin Maechler, Date: 30 Jul 94, 11:41
 89 |   X _ sort(X)
 90 |   plot(X, cdf(X), type=type, ...)
 91 | }
 92 | 
 93 | ## NOTE: For plotting, Martin Maechler's   plot.step(X)  is MUCH better !
 94 | 
 95 | 
 96 | dip0 <- function(x, debug=F)
 97 | {
 98 |   ## Load in ANY case ! if(!is.loaded(symbol.C("diptst")))
 99 |   dyn.load(paste(DIP.dir,"dip0.o", sep="/"))
100 |   n <- length(x)
101 |   .C("diptst",
102 |      as.single(sort(x)),
103 |      as.integer(n),
104 |      ans = single(1),
105 |      as.single(0),
106 |      as.single(0),
107 |      as.integer(0),
108 |     integer(n),
109 |     integer(n),
110 |     integer(n),
111 |     integer(n),
112 |      as.integer(debug)) $ans
113 | }
114 | dip0(hartigan.data) ##-> [1] 0.05952381
115 | 
116 | ## Load my current "diptst" C-symbol !
117 | dyn.load(paste(DIP.dir,"dip.o", sep="/"))
118 | 
119 | 
120 | dipF <- function(x)
121 | { 
122 |   if(!is.loaded(symbol.For("diptst")))
123 |     dyn.load(paste(DIP.dir,"dipF.o", sep="/"))
124 |   n <- length(x)
125 |   .Fortran("diptst",
126 |      as.single(sort(x)),
127 |      as.integer(n),
128 |      ans = single(1),
129 |      as.single(0),
130 |      as.single(0),
131 |      as.integer(0),
132 |     integer(n),
133 |     integer(n),
134 |     integer(n),
135 |     integer(n)) $ans
136 | }
137 | dipF(hartigan.data) ##-> [1] 0.05952381
138 | 
139 | ### Test if  dip.c  is still okay, after all my hacking at the code
140 | ###                 (dipF.f  which is used by dipF(..) is ORIGINAL !)
141 | 
142 | for(i in 1:100){x_runif(50); if(dip(x) != dipF(x)) { xx<<-x; stop("DIFFERENT")}}
143 | for(i in 1:100) { 
144 |   x_c(rnorm(100), 5+ rnorm(20))
145 |   if(dip(x) != dipF(x)) { xx<<-x; stop("DIFFERENT") } }
146 | 
147 | 
148 | ##-- NOTE: for small 'n' (n=4,5, even 8, 10 (rarely!)) sometimes INFINITE loop!!
149 | 
150 | ###===> FIXED in big.c  !!
151 | 
152 | x4.ok <- x4.bad <- NULL
153 | xx4_round(runif(4),3); cat(format(dipF(xx4))); x4.ok_cbind(x4.ok,xx4)
154 | ##-- C-c (kill) the above line if running long;  then do
155 | x4.bad <- cbind(x4.bad, xx4, deparse.level=0)
156 | 
157 | ##-- After a while (MANUALLY !)
158 | dim(x4.bad) #  4 10
159 | dim(x4.ok)  #  4 34
160 | 
161 | 
162 | ##-- n=10 even can be bad: 1st example:  1:10
163 | ## 2nd example: 
164 | n_10; set.seed(99);for(i in 1:3635) x _ runif(n)
165 | x10.bad _ round(sort(x),4)
166 | dipF(round(x10.bad,3)) #-- .05 (minimal value)
167 | plot.step(x10.bad);  subtit(vcat(x10.bad,sep=",  "))
168 | 
169 | u.dev.default() 
170 | nb_ncol(x4.bad); mult.fig(nb); for(i in 1:nb) plot.step(x4.bad[,i],cad.lag=F,main="")
171 | u.dev.default() 
172 | no_ncol(x4.ok);  mult.fig(no); for(i in 1:no) plot.step(x4.ok[,i],cad.lag=F,main="")
173 | 


--------------------------------------------------------------------------------
/man/dip.Rd:
--------------------------------------------------------------------------------
  1 | \name{dip}
  2 | \alias{dip}
  3 | \title{Compute Hartigans' Dip Test Statistic for Unimodality}
  4 | \concept{multimodality}
  5 | \description{
  6 |   Computes Hartigans' dip test statistic for testing unimodality,
  7 |   and additionally the modal interval.
  8 | }
  9 | \usage{
 10 | dip(x, full.result = FALSE, min.is.0 = FALSE, debug = FALSE)
 11 | }
 12 | \arguments{
 13 |   \item{x}{numeric; the data.}
 14 |   \item{full.result}{logical or string; \code{dip(., full.result=TRUE)} returns the full result
 15 |     list; if \code{"all"} it additionally uses the \code{mn} and
 16 |     \code{mj} components to compute the initial GCM and LCM, see below.}
 17 |   \item{min.is.0}{logical indicating if the \bold{min}imal value of the
 18 |     dip statistic \eqn{D_n}{Dn} can be zero or not.  Arguably should be
 19 |     set to \code{TRUE} for internal consistency reasons, but is false by
 20 |     default both for continuity and backwards compatibility reasons, see
 21 |     the examples below.}
 22 |     % backcompatibility both with earlier
 23 |     % versions of the \pkg{diptest} package, and with Hartigan's original
 24 |     % implementation.}
 25 |   \item{debug}{logical; if true, some tracing information is printed
 26 |     (from the C routine).}
 27 | }
 28 | \value{
 29 |   depending on \code{full.result} either a number, the dip statistic, or
 30 |   an object of class \code{"dip"} which is a \code{\link{list}} with components
 31 |   \item{x}{the sorted \code{\link{unname}()}d data.}
 32 |   \item{n}{\code{length(x)}.}
 33 |   \item{dip}{the dip statistic}
 34 |   \item{lo.hi}{indices into \code{x} for lower and higher end of modal interval}
 35 |   \item{xl, xu}{lower and upper end of modal interval}
 36 |   \item{gcm, lcm}{(last used) indices for \bold{g}reatest \bold{c}onvex
 37 |     \bold{m}inorant and the \bold{l}east \bold{c}oncave \bold{m}ajorant.}
 38 |   \item{mn, mj}{index vectors of length \code{n} for the GC minorant and
 39 |     the LC majorant respectively.}
 40 | 
 41 |   For \dQuote{full} results of class \code{"dip"}, there are
 42 |   \code{\link{print}} and \code{\link{plot}} methods, the latter with
 43 |   its own \link[=plot.dip]{manual page}.
 44 | }
 45 | \note{
 46 |   For \eqn{n \le 3}{n <= 3} where \code{n <- length(x)}, the dip
 47 |   statistic \eqn{D_n}{Dn} is always the same minimum value,
 48 |   \eqn{1/(2n)}, i.e., there's no possible dip test.
 49 |   Note that up to May 2011, from Hartigan's original Fortran code, \code{Dn}
 50 |   was set to zero, when all \code{x} values were identical.  However,
 51 |   this entailed discontinuous behavior, where for arbitrarily close
 52 |   data \eqn{\tilde x}{x~}, \eqn{D_n(\tilde x) = \frac 1{2n}}{Dn(x~) = 1/(2n)}.
 53 | 
 54 |   Yong Lu \email{lyongu+@cs.cmu.edu} found in Oct 2003 that the code
 55 |   was not giving symmetric results for mirrored data (and was giving
 56 |   results of almost 1, and then found the reason, a misplaced \samp{")"}
 57 |   in the original Fortran code.  This bug has been corrected for diptest
 58 |   version 0.25-0 (Feb 13, 2004).
 59 | 
 60 |   Nick Cox (Durham Univ.) said (on March 20, 2008 on the Stata-list):\cr
 61 |   As it comes from a bimodal husband-wife collaboration, the name
 62 |   perhaps should be \emph{\dQuote{Hartigan-Hartigan dip test}}, but that
 63 |   does not seem to have caught on.  Some of my less statistical
 64 |   colleagues would sniff out the hegemony of patriarchy there, although
 65 |   which Hartigan is being overlooked is not clear.
 66 | 
 67 |   Martin Maechler, as a Swiss, and politician, would say:\cr
 68 |   Let's find a compromise, and call it \emph{\dQuote{Hartigans' dip test}},
 69 |   so we only have to adapt orthography (:-).
 70 | }
 71 | \references{
 72 |   P. M. Hartigan (1985)
 73 |   Computation of the Dip Statistic to Test for Unimodality;
 74 |   \emph{Applied Statistics (JRSS C)} \bold{34}, 320--325.\cr
 75 |   Corresponding (buggy!) Fortran code of \sQuote{AS 217} available from Statlib,
 76 |   \url{https://lib.stat.cmu.edu/apstat/217}
 77 | 
 78 |   J. A. Hartigan and P. M. Hartigan (1985)
 79 |   The Dip Test of Unimodality;
 80 |   \emph{Annals of Statistics} \bold{13}, 70--84.
 81 | }
 82 | \author{Martin Maechler \email{maechler@stat.math.ethz.ch}, 1994,
 83 |   based on S (S-PLUS) and C code donated from Dario Ringach
 84 |   \email{dario@wotan.cns.nyu.edu} who had applied \command{f2c} on the
 85 |   original Fortran code available from Statlib.
 86 | 
 87 |   In Aug.1993, recreated and improved Hartigans' "P-value" table, which
 88 |   later became \code{\link{qDiptab}}.
 89 | }
 90 | \seealso{
 91 |   \code{\link{dip.test}} to compute the dip \emph{and} perform the unimodality test,
 92 |   based on P-values, interpolated from \code{\link{qDiptab}};
 93 |   \code{\link{isoreg}} for isotonic regression.
 94 | }
 95 | \examples{
 96 | data(statfaculty)
 97 | plot(density(statfaculty))
 98 | rug(statfaculty, col="midnight blue"); abline(h=0, col="gray")
 99 | dip(statfaculty)
100 | (dS <- dip(statfaculty, full = TRUE, debug = TRUE))
101 | plot(dS)
102 | ## even more output -- + plot showing "global" GCM/LCM:
103 | (dS2 <- dip(statfaculty, full = "all", debug = 3))
104 | plot(dS2)
105 | 
106 | data(faithful)
107 | fE <- faithful$eruptions
108 | plot(density(fE))
109 | rug(fE, col="midnight blue"); abline(h=0, col="gray")
110 | dip(fE, debug = 2) ## showing internal work
111 | (dE <- dip(fE, full = TRUE)) ## note the print method
112 | plot(dE, do.points=FALSE)
113 | 
114 | data(precip)
115 | plot(density(precip))
116 | rug(precip, col="midnight blue"); abline(h=0, col="gray")
117 | str(dip(precip, full = TRUE, debug = TRUE))
118 | 
119 | ##-----------------  The  'min.is.0' option :  ---------------------
120 | 
121 | ##' dip(.) continuity and 'min.is.0' exploration:
122 | dd <- function(x, debug=FALSE) {
123 |    x_ <- x ; x_[1] <- 0.9999999999 * x[1]
124 |    rbind(dip(x , debug=debug),
125 |          dip(x_, debug=debug),
126 |          dip(x , min.is.0=TRUE, debug=debug),
127 |          dip(x_, min.is.0=TRUE, debug=debug), deparse.level=2)
128 | }
129 | 
130 | dd( rep(1, 8) ) # the 3rd one differs ==> min.is.0=TRUE is *dis*continuous
131 | dd( 1:7 )       # ditto
132 | 
133 | dd( 1:7, debug=TRUE)
134 | ## border-line case ..
135 | dd( 1:2, debug=TRUE)
136 | 
137 | ## Demonstrate that  'min.is.0 = TRUE'  does not change the typical result:
138 | B.sim <- 1000 # or larger
139 | D5  <- {set.seed(1); replicate(B.sim, dip(runif(5)))}
140 | D5. <- {set.seed(1); replicate(B.sim, dip(runif(5), min.is.0=TRUE))}
141 | stopifnot(identical(D5, D5.), all.equal(min(D5), 1/(2*5)))
142 | hist(D5, 64); rug(D5)
143 | 
144 | D8  <- {set.seed(7); replicate(B.sim, dip(runif(8)))}
145 | D8. <- {set.seed(7); replicate(B.sim, dip(runif(8), min.is.0=TRUE))}
146 | stopifnot(identical(D8, D8.))
147 | }
148 | \keyword{htest}
149 | \keyword{distribution}
150 | 


--------------------------------------------------------------------------------
/stuff/pre-bugfix-2003-11/dipF-statlib_2005-08-04.f:
--------------------------------------------------------------------------------
  1 |       SUBROUTINE DIPTST1(X,N,DIP,XL,XU,IFAULT,GCM,LCM,MN,MJ,DDX,DDXSGN)
  2 | C
  3 | C     ALGORITHM AS 217 APPL. STATIST. (1985) VOL.34, NO.3
  4 | C
  5 | C     Does the dip calculation for an ordered vector X using the
  6 | C     greatest convex minorant and the least concave majorant, skipping
  7 | C     through the data using the change points of these distributions.
  8 | C     It returns the dip statistic 'DIP' and the modal interval
  9 | C     (XL, XU).
 10 | C
 11 | C     MODIFICATIONS SEP 2 2002 BY F. MECHLER TO FIX PROBLEMS WITH
 12 | C	UNIMODAL (INCLUDING MONOTONIC) INPUT 
 13 | C
 14 |       REAL X(N)
 15 |       INTEGER MN(N), MJ(N), LCM(N), GCM(N), HIGH
 16 |       REAL ZERO, HALF, ONE
 17 | C     NEXT TWO LINES ARE ADDED
 18 |       REAL DDX(N)
 19 |       INTEGER DDXSGN(N), POSK, NEGK
 20 |       DATA ZERO/0.0/, HALF/0.5/, ONE/1.0/
 21 | C
 22 |       IFAULT = 1
 23 |       IF (N .LE. 0) RETURN
 24 |       IFAULT = 0
 25 | C
 26 | C     Check if N = 1
 27 | C
 28 |       IF (N .EQ. 1) GO TO 4
 29 | C
 30 | C     Check that X is sorted
 31 | C
 32 |       IFAULT = 2
 33 |       DO 3 K = 2, N
 34 |         IF (X(K) .LT. X(K-1)) RETURN
 35 |     3 CONTINUE
 36 |       IFAULT = 0
 37 | C
 38 | C     Check for all values of X identical,
 39 | C     and for 1 < N < 4.
 40 | C
 41 |       IF (X(N) .GT. X(1) .AND. N .GE. 4) GO TO 505
 42 |     4 XL = X(1)
 43 |       XU = X(N)
 44 |       DIP = ZERO
 45 |       RETURN
 46 | C The code amendment below is intended to be inseted above the line marked "5" in the original FORTRAN code
 47 | C The amendment checks the condition whether the input X is perfectly unimodal
 48 | C Hartigan's original DIPTST algorithm did not check for this condition
 49 | C and DIPTST runs into an infinite cycle for a unimodal input
 50 | C The condition that the input is unimodal is equivalent to having 
 51 | C at most 1 sign change in the second derivative of the input p.d.f.
 52 | C In MATLAB syntax, we check the flips in the function xsign=-sign(diff(1./diff(x)))=-sign(diff(diff(x)));
 53 | C with DDXSGN=xsign in the fortran code below
 54 | 505   NEGK=0
 55 |       POSK=0
 56 |       DO 104 K = 3,N
 57 |       DDX(K) = X(K)+X(K-2)-2*X(K-1)
 58 |       IF (DDX(K) .LT. 0) DDXSGN(K) = 1
 59 |       IF (DDX(K) .EQ. 0) DDXSGN(K) = 0
 60 |       IF (DDX(K) .GT. 0) DDXSGN(K) = -1
 61 |       IF (DDXSGN(K) .GT. 0) POSK = K
 62 |       IF ((DDXSGN(K) .LT. 0) .AND. (NEGK .EQ. 0)) NEGK = K      
 63 | 104   CONTINUE
 64 | 
 65 | C The condition check below examines whether the greatest position with a positive second derivative 
 66 | C is smaller than the smallest position with a negative second derivative
 67 | C The boolean check gets it right even if 
 68 | C the unimodal p.d.f. has its mode in the very first or last point of the input 
 69 | 
 70 |       IF ((POSK .GT. NEGK) .AND. (NEGK .GT. 0)) GOTO 5
 71 |       XL=X(1)
 72 |       XU=X(N)
 73 |       DIP=0
 74 |       IFAULT=5
 75 |       RETURN
 76 | C
 77 | C     LOW contains the index of the current estimate of the lower end
 78 | C     of the modal interval, HIGH contains the index for the upper end.
 79 | C
 80 | 5     FN = FLOAT(N)
 81 |       LOW = 1
 82 |       HIGH = N
 83 |       DIP = ONE / FN
 84 |       XL = X(LOW)
 85 |       XU = X(HIGH)
 86 | C
 87 | C     Establish the indices over which combination is necessary for the
 88 | C     convex minorant fit.
 89 | C
 90 |       MN(1) = 1
 91 |       DO 28 J = 2, N
 92 |         MN(J) = J - 1
 93 |    25   MNJ = MN(J)
 94 |         MNMNJ = MN(MNJ)
 95 |         A = FLOAT(MNJ - MNMNJ)
 96 |         B = FLOAT(J - MNJ)
 97 |         IF (MNJ .EQ. 1 .OR. (X(J) - X(MNJ))*A .LT. (X(MNJ) - X(MNMNJ))
 98 |      +          *B) GO TO 28
 99 |         MN(J) = MNMNJ
100 |         GO TO 25
101 |    28 CONTINUE
102 | C
103 | C     Establish the indices over which combination is necessary for the
104 | C     concave majorant fit.
105 | C
106 |       MJ(N) = N
107 |       NA = N - 1
108 |       DO 34 JK = 1, NA
109 |         K = N - JK
110 |         MJ(K) = K + 1
111 |    32   MJK = MJ(K)
112 |         MJMJK = MJ(MJK)
113 |         A = FLOAT(MJK - MJMJK)
114 |         B = FLOAT(K - MJK)
115 |         IF (MJK .EQ. N .OR. (X(K) - X(MJK))*A .LT. (X(MJK) - X(MJMJK))
116 |      +          *B) GO TO 34
117 |         MJ(K) = MJMJK
118 |         GO TO 32
119 |    34 CONTINUE
120 | C
121 | C     Start the cycling.
122 | C     Collect the change points for the GCM from HIGH to LOW.
123 | C
124 |    40 IC = 1
125 |       GCM(1) = HIGH
126 |    42 IGCM1 = GCM(IC)
127 |       IC = IC + 1
128 |       GCM(IC) = MN(IGCM1)
129 |       IF (GCM(IC) .GT. LOW) GO TO 42
130 |       ICX = IC
131 | C
132 | C     Collect the change points for the LCM from LOW to HIGH.
133 | C
134 |       IC = 1
135 |       LCM(1) = LOW
136 |    44 LCM1 = LCM(IC)
137 |       IC = IC + 1
138 |       LCM(IC) = MJ(LCM1)
139 |       IF (LCM(IC) .LT. HIGH) GO TO 44
140 |       ICV = IC
141 | C
142 | C     ICX, IX, IG are counters for the convex minorant,
143 | C     ICV, IV, IH are counters for the concave majorant.
144 | C
145 |       IG = ICX
146 |       IH = ICV
147 | C
148 | C     Find the largest distance greater than 'DIP' between the GCM and
149 | C     the LCM from LOW to HIGH.
150 | C
151 |       IX = ICX - 1
152 |       IV = 2
153 |       D = ZERO
154 |       IF (ICX .NE. 2 .OR. ICV .NE. 2) GO TO 50
155 |       D = ONE / FN
156 |       GO TO 65
157 |    50 IGCMX = GCM(IX)
158 |       LCMIV = LCM(IV)
159 |       IF (IGCMX .GT. LCMIV) GO TO 55
160 | C
161 | C     If the next point of either the GCM or LCM is from the LCM,
162 | C     calculate the distance here.
163 | C
164 |       LCMIV1 = LCM(IV - 1)
165 |       A = FLOAT(LCMIV - LCMIV1)
166 |       B = FLOAT(IGCMX - LCMIV1 - 1)
167 |       DX = (X(IGCMX) - X(LCMIV1))*A / (FN*(X(LCMIV) - X(LCMIV1)))
168 |      +          - B / FN
169 |       IX = IX - 1
170 |       IF (DX .LT. D) GO TO 60
171 |       D = DX
172 |       IG = IX + 1
173 |       IH = IV
174 |       GO TO 60
175 | C
176 | C     If the next point of either the GCM or LCM is from the GCM,
177 | C     calculate the distance here.
178 | C
179 |    55 LCMIV = LCM(IV)
180 |       IGCM = GCM(IX)
181 |       IGCM1 = GCM(IX + 1)
182 |       A = FLOAT(LCMIV - IGCM1 + 1)
183 |       B = FLOAT(IGCM - IGCM1)
184 |       DX = A / FN - ((X(LCMIV) - X(IGCM1))*B) / (FN * (X(IGCM)
185 |      +          - X(IGCM1)))
186 |       IV = IV + 1
187 |       IF (DX .LT. D) GO TO 60
188 |       D = DX
189 |       IG = IX + 1
190 |       IH = IV - 1
191 |    60 IF (IX .LT. 1) IX = 1
192 |       IF (IV .GT. ICV) IV = ICV
193 |       IF (GCM(IX) .NE. LCM(IV)) GO TO 50
194 |    65 IF (D .LT. DIP) GO TO 100
195 | C
196 | C     Calculate the DIPs for the current LOW and HIGH.
197 | C
198 | C     The DIP for the convex minorant.
199 | C
200 |       DL = ZERO
201 |       IF (IG .EQ. ICX) GO TO 80
202 |       ICXA = ICX - 1
203 |       DO 76 J = IG, ICXA
204 |         TEMP = ONE / FN
205 |         JB = GCM(J + 1)
206 |         JE = GCM(J)
207 |         IF (JE - JB .LE. 1) GO TO 74
208 |         IF (X(JE) .EQ. X(JB)) GO TO 74
209 |         A = FLOAT(JE - JB)
210 |         CONST = A / (FN * (X(JE) - X(JB)))
211 |         DO 72 JR = JB, JE
212 |           B = FLOAT(JR - JB + 1)
213 |           T = B / FN - (X(JR) - X(JB))*CONST
214 |           IF (T .GT. TEMP) TEMP = T
215 |    72   CONTINUE
216 |    74   IF (DL .LT. TEMP) DL = TEMP
217 |    76 CONTINUE
218 | C
219 | C     The DIP for the concave majorant.
220 | C
221 |    80 DU = ZERO
222 |       IF (IH .EQ. ICV) GO TO 90
223 |       ICVA = ICV - 1
224 |       DO 88 K = IH, ICVA
225 |         TEMP = ONE / FN
226 |         KB = LCM(K)
227 |         KE = LCM(K + 1)
228 |         IF (KE - KB .LE. 1) GO TO 86
229 |         IF (X(KE) .EQ. X(KB)) GO TO 86
230 |         A = FLOAT(KE - KB)
231 |         CONST = A / (FN * (X(KE) - X(KB)))
232 |         DO 84 KR = KB, KE
233 |           B = FLOAT(KR - KB - 1)
234 |           T = (X(KR) - X(KB))*CONST - B / FN
235 |           IF (T .GT. TEMP) TEMP = T
236 |    84   CONTINUE
237 |    86   IF (DU .LT. TEMP) DU = TEMP
238 |    88 CONTINUE
239 | C
240 | C     Determine the current maximum.
241 | C
242 |    90 DIPNEW = DL
243 |       IF (DU .GT. DL) DIPNEW = DU
244 |       IF (DIP .LT. DIPNEW) DIP = DIPNEW
245 |       LOW = GCM(IG)
246 |       HIGH = LCM(IH)
247 | C
248 | C     Recycle
249 | C
250 |       GO TO 40
251 | C
252 |   100 DIP = HALF * DIP
253 |       XL = X(LOW)
254 |       XU = X(HIGH)
255 | C
256 |       RETURN
257 |       END
258 | 
259 | 


--------------------------------------------------------------------------------
/stuff/new-sim-analysis.R:
--------------------------------------------------------------------------------
  1 | ### From the large (Ns = 100001) simulation in
  2 | ### ./new-simul.R
  3 | ###  ~~~~~~~~~~~~
  4 | 
  5 | Ns <- 1000001
  6 | 
  7 | if(require("diptest") && is.character(data(qDiptab))) {
  8 |     dn <- dimnames(qDiptab)
  9 | 
 10 |     nn  <- as.numeric(dn[[1]])
 11 |     P.p <- as.numeric(dn[[2]])
 12 | 
 13 | } else { ## no longer needed, now we have  'qDiptab'
 14 |     setwd("/u/maechler/R/Pkgs/diptest/stuff")
 15 | 
 16 |     ## "wrong" dip stat: load("/u/maechler/R/Pkgs/diptest/stuff/dipSim_1e5.rda")
 17 |     load("dipSim_1e6.rda")
 18 | 
 19 |     stopifnot(identical(P.p, as.numeric(colnames(P.dip))),
 20 |               identical(nn,  as.numeric(rownames(P.dip))))
 21 | 
 22 |     names(dimnames(P.dip)) <- c("n","Pr")
 23 | 
 24 |     data(qDiptab, package="diptest")
 25 |     identical(P.dip, qDiptab) # !
 26 | 
 27 |     attr(P.dip, "N_1") <- as.integer(Ns - 1)
 28 | 
 29 |     ## new data set!
 30 |     qDiptab <- P.dip
 31 | 
 32 | } # end {else: no longer needed}
 33 | 
 34 | 
 35 | Pp <- P.p  [  P.p < 1]# not max() = 100% percentile
 36 | qDip.Rn <- qDiptab[, P.p < 1]*sqrt(nn)
 37 | nP <- length(Pp)
 38 | 
 39 | ## Titles
 40 | nqdip.tit <-
 41 |     expression(sqrt(n) %*% " percentile of  "* list(dip(X),
 42 |         " "* X *" ~ " * U * group("[",list(0,1),"]"))*
 43 |         "   vs.  " * n)
 44 | y.tit <- expression(sqrt(n) *" " %*% " " * qDip)
 45 | (y.titL <- substitute(F * "  [log scaled]", list(F = y.tit[[1]])))
 46 | 
 47 | mPerclegend <- function(x,y, pr) {
 48 |     nP <- length(pr)
 49 |     legend(x, y, legend=
 50 |            rev(paste(c(paste(c(1:9,0)),letters)[1:nP],
 51 |                      paste(100*pr,"%",sep=""), sep=": ")),
 52 |            col = rev(rep(1:6,length=nP)),
 53 |            lty = rev(rep(1:5,length=nP)), bty='n')
 54 | }
 55 | 
 56 | if(FALSE)## for paper --- write a vignette !! ---
 57 | sfsmisc::pdf.latex("dip_critical.pdf")
 58 | ## for print out, or "sending along":
 59 | 
 60 | pdf.do("dip_critical.pdf", paper = "a4")
 61 | 
 62 | matplot(nn, qDip.Rn, type = "n", yaxt = "n",
 63 |         xlab = 'n  [log scale]', ylab = y.tit,
 64 |         xlim = range(1.5,nn), log='x', main= nqdip.tit)
 65 | abline(h = seq(0.1, 0.9, by = 0.05), col = "gray80", lty = 3)
 66 | op <- par(las=2); for(i in c(2,4)) axis(i, at = seq(0.1,0.9, by=0.1)); par(op)
 67 | mPerclegend(1.0, 0.95, Pp)
 68 | matlines(nn, qDip.Rn[, "0.5"], lwd = 3, col = "dark gray")
 69 | matlines(nn, qDip.Rn, type = 'o')
 70 | mtext(paste(Ns, " simulated samples"), 3, line=0)
 71 | mtext("© Martin Maechler, ETH Zurich", 1, line = 3.2, adj = 1)
 72 | 
 73 | pdf.end()
 74 | 
 75 | mtext(paste(getwd(), date(), sep="\n"), 4, cex=.8, adj=0)
 76 | 
 77 | ## "research" :
 78 | ## 1. prove that min(dip) = 1/(2 * n)
 79 | ##	{I'm sure this follows from the "string" equivalence;
 80 | ##       but that is not according to theory D(F) dip definition, where
 81 | ##	 D(F) = 0  <==> F itself is unimodal
 82 | ##    Can we prove that  dip(F_n) = 1/(2n)  ===>  F_n is unimodal ?
 83 | ##
 84 | ## 2. derive the function  ff(n) :=  Prob[dip = 1/(2 * n)]
 85 | 
 86 | 
 87 | ## log y: --->> more symmetric distributions, but still skewed to the right
 88 | ## -----        (asymptotic) is of interest, but also: how to do interpolation
 89 | nP <- sum(smP <- 0.01 < Pp & Pp <= .999)# only "relevant subset"
 90 | matplot(nn, qDip.Rn[, smP], type='n', log='xy', xlim = range(1.5,nn),
 91 |         xlab = 'n  [log scale]',ylab = y.titL, main = nqdip.tit)
 92 | mPerclegend(1.0, .75, Pp[smP])
 93 | matlines(nn, qDip.Rn[, "0.5"], lwd = 3, col = "dark gray")
 94 | matlines(nn, qDip.Rn[, smP], type = 'o')
 95 | mtext(paste(Ns, " simulated samples"), side = 3, line = 0)
 96 | 
 97 | 
 98 | 
 99 | ### only larger N   to see if it became constant:
100 | nN <- sum(nL <- nn > 100)
101 | matplot(nn[nL], qDip.Rn[nL,], type='o', xlim = range(50,nn[nL]),
102 |         log='x', xlab = 'n  [log scale]', ylab = y.tit, main = nqdip.tit)
103 | mtext(paste(Ns, " simulated samples"), side = 3, line = 0)
104 | mPerclegend("topleft", NULL, Pp)
105 | matlines(nn[nL], qDip.Rn[nL, "0.5"], lwd = 4,
106 |          col = adjustcolor("black", 0.4))
107 | 
108 | 
109 | ##-- Asymptotic : see more in ./asymp-distrib.R
110 | ##                              ~~~~~~~~~~~~~~~
111 | 
112 | 
113 | mult.fig(9, main = "dip(U[0,1]) distribution {simulated} -- for small n")
114 | for (cn in nn[1:9]) {
115 |     plot(qDiptab[paste(cn),], P.p,
116 |          xlab = "dip = d(x[1 .. n])", ylab = expression(P(D >= d)),
117 |          type = 'o', cex = 0.6, main = paste("n = ",cn))
118 |     abline(h=0:1, col="gray")
119 | }
120 | ###
121 | 
122 | 
123 | ## Another thing: Draw "density" from quantiles only:
124 | ## Use derivative of cubic spline interpolation or so?
125 | 
126 | 
127 | P2dens <- function(x, probs, eps.p = 1e-7, xlim = NULL, f.lim = 0.5,
128 |                    method = c("interpSpline", "monoH.FC", "natural"))
129 | {
130 |     ## Purpose: Density from probabilty/quantiles -- return a *function*
131 |     ## ----------------------------------------------------------------------
132 |     ## Arguments:     x: quantiles , x[1:n]
133 |     ##            probs: probabilities, i.e., Pr{X <= x[i]} == probs[i]
134 |     ##            eps.p: small value used in:
135 |     ##            xlim: if(probs[1] > eps.p and/or probs[n] < 1-eps.p),  use
136 |     ##                   x[0] = xlim[1] and/or x[n+1] = xlim[2]  with
137 |     ##                   probs[0] = 0 and/or probs[n+1] = 1
138 |     ##                  Per default, xlim = range(x) +  f.lim * c(-d,d),
139 |     ##                  where d = diff(range(x)) = max(x) - min(x)
140 |     ## ----------------------------------------------------------------------
141 |     ## Author: Martin Maechler, Date: 14 Jul 2003, 14:57
142 |     if((n <- length(x)) != length(probs))
143 |         stop("'x' and 'probs' must have same length")
144 |     if(any(0 > probs | probs > 1))
145 |         stop("'probs' must be in [0,1]")
146 |     if(is.unsorted(probs)) {
147 |         sp <- sort(probs, index.return=TRUE)
148 |         x <- x[sp$ix]
149 |         probs <- sp$x
150 |         rm(sp)
151 |     }
152 | 
153 |     if((L <- probs[1] > eps.p) |
154 |        (R <- probs[n] < 1-eps.p)) {
155 |         d <- diff(r <- range(x))
156 |         if(L) { x <- c(r[1] - f.lim*d, x); probs <- c(0, probs) }
157 |         if(R) { x <- c(x, r[2] + f.lim*d); probs <- c(probs, 1) }
158 |         rm(r,d,L,R)
159 |     }
160 | 
161 |     method <- match.arg(method)
162 |     switch(method,
163 | 	   "interpSpline" = {
164 | 	       library(splines)
165 | 	       Fspl <- interpSpline(x, probs)
166 | 	       rm(x,probs)
167 | 	       function(x) predict(Fspl, x, deriv = 1) $y
168 | 	   },
169 | 	   "monoH.FC" = {
170 | 	       f <- splinefun(x, probs, method="mono")
171 | 	       formals(f)[["deriv"]] <- 1
172 | 	       f
173 | 	   },
174 | 	   "natural" = {
175 | 	       f <- splinefun(x, probs, method="natural")
176 | 	       formals(f)[["deriv"]] <- 1
177 | 	       f
178 | 	   },
179 | 	   ## otherwise
180 | 	   stop("invalid method ", method))
181 | }
182 | 
183 | d1 <- P2dens(-3:3, pr=pnorm(-3:3))
184 | plot(d1, -7, 7, n = 501)
185 | points(-3:3, d1(-3:3))
186 | ## quite fine :
187 | curve(dnorm, col = 2, add=TRUE, n = 501)
188 | 
189 | ## Now with *monotone* Hermite interpolation --- this is *WORSE* !!
190 | d2 <- P2dens(-3:3, pr=pnorm(-3:3), method = "mono")
191 | plot(d2, -7, 7, n = 501, add=TRUE, col="midnightblue")
192 | points(-3:3, d2(-3:3))
193 | 
194 | ## and more experiments suggest the best (here!) solution being "natural"
195 | d2 <- P2dens(-3:3, pr=pnorm(-3:3), method = "natural")
196 | plot(d2, -7, 7, n = 501, add=TRUE, col="midnightblue")
197 | points(-3:3, d2(-3:3))
198 | ## well,
199 | x <- seq(-7,7, len=1001)
200 | all.equal(d1(x), d2(x), tol = 1e-15)# TRUE
201 | 
202 | str(get("Fspl", envir= environment(d1)))
203 | ##- List of 2
204 | ##-  $ knots       : num [1:9] -6 -3 -2 -1 0 1 2 3 6
205 | ##-  $ coefficients: num [1:9, 1:4] 0.00000 0.00135 0.02275 0.15866 0.50000 ...
206 | ##-  - attr(*, ...........
207 | 
208 | x0 <- c(0, 2^(-3:6))
209 | d2 <- P2dens(x0, pr=pgamma(x0, shape = 1.5))
210 | plot(d2, 0, max(x0), n = 501)
211 | rug(x0)
212 | # not bad, too :
213 | curve(dgamma(x,shape=1.5), col = 2, add=TRUE, n = 501)
214 | 
215 | 
216 | ##-- But the thing I wanted fails because of point masses (left border):
217 | dDips <- apply(qDiptab, 1, function(qd) P2dens(qd, pr = P.p))
218 | ## Error in interpSpline..(.) : values of x must be distinct << !
219 | 


--------------------------------------------------------------------------------
/stuff/unimodality.Rnw:
--------------------------------------------------------------------------------
  1 | % -*- mode: noweb; noweb-default-code-mode: R-mode; -*-
  2 | \documentclass[a4paper,12pt]{article}
  3 | \usepackage[utf8]{inputenc}
  4 | \usepackage[UKenglish]{babel}
  5 | \usepackage{graphicx,natbib,array,subfig}%,lineno}
  6 | \usepackage[user,titleref]{zref}% para fazer referencia ao nome da seção (a o invés do número)
  7 | %\usepackage[font=sf,textfont=md]{caption} %% atenção: incompatível com o pacote subfig!
  8 | %\newcolumntype{M}{>{\small\sffamily}l}
  9 | % \newcolumntype{N}{>{\small\sffamily}c}
 10 | % \setlength\extrarowheight{2pt}
 11 | 
 12 | \usepackage{ifthen}
 13 | \newboolean{ONLYME}
 14 | \setboolean{ONLYME}{false} % modificar \SweaveOpts abaixo
 15 | %%%%%%
 16 | %%%%%%%%%%%%%%
 17 | %%% how to use Sweave:
 18 | % Once the Sweave file (this file) has been written, it can be
 19 | % processed in R by typing into a running R session
 20 | %   Sweave("filename.Snw")
 21 | % where filename.Snw is the Sweave file. If R is run in a different directory,
 22 | % one has to provide the full path name. This generates the file filename.tex
 23 | % which can be processed as usual with LATEX.
 24 | %%%
 25 | 
 26 | \title{Checking for unimodality}
 27 | \author{Og DeSouza \& Karsten Schönrogge}
 28 | \date{\today}
 29 | \usepackage{xspace} % para manter espaço em branco após os comandos
 30 |                     % montados com newcommand{}
 31 | \begin{document}
 32 | %%%%%%%%%%%%%%%%%%%%%%%% opção global do ``echo''
 33 | \SweaveOpts{echo=T} 
 34 | %%%%%%%%%%%%%%%%%%%%%%%
 35 | \setkeys{Gin}{width=0.5\linewidth} % largura dos gráficos no output final
 36 | \maketitle
 37 | 
 38 | <<echo=FALSE>>=
 39 | # largura da linha do R no output final
 40 | options(width=66)   
 41 | 
 42 | # parâmetros dos gráficos do R
 43 | 
 44 | options(SweaveHooks=list(fig=function() par(pty="s")))
 45 | 
 46 | @ 
 47 | \newcommand\x{\textsf{x }\xspace} %  x em sans-serif
 48 | \newcommand\y{\textsf{y }\xspace} % y em sans-serif
 49 | %\linenumbers
 50 | \section{Introduction}
 51 | 
 52 | 
 53 | 
 54 | The R package \texttt{diptest} computes Hartigan's dip test statistic for testing unimodality. It derives from \citet{HartiganHartigan1985Dip}.%, which is itself a \textit{bimodal} husband-wife collaboration!
 55 | The package was written by Martin Maechler (ETH Zürich), based on Fortran and S-plus from Dario Ringach, NYU.edu. It is currently mantainned by Martin Maechler.
 56 | 
 57 | The dip value measures the  departure of a sample from unimodality, measuring the maximum
 58 | difference, over all sample points, between the empirical distribution function,
 59 | and the unimodal distribution function that minimizes that maximum
 60 | difference. The test gives the \textit{dip} value, which after conveniently transformed, inform a P-value for the hipothesis that the dataset is best described by an unimodal, as opposed to bimodal, distribution. P-values are calculated by comparing the dip statistic obtained with those for repeated samples of the same size from a uniform distribution.
 61 | 
 62 | 
 63 | \section{The procedure in R}
 64 | We start by calling the appropriate package\footnote{If you to not have the package installed, simply type \texttt{> install.packages()}, choose your closest mirror and then choose the \texttt{diptest} package from the list.}:
 65 | <<>>=
 66 | library(diptest)
 67 | @ 
 68 | \subsection{Calling and inspecting the dataset}
 69 | Now we call the dataset:
 70 | <<>>=
 71 | microdon <- c(10,10,10,15,15,15,15,20,20,20,25,25,25,30,30,30,30,30,35,35,35,35,35,35,35,35,39,39,39,39,39)
 72 | @ 
 73 | 
 74 | \begin{figure}%
 75 | \centering
 76 | <<fig=T>>=
 77 | hist(microdon)
 78 | @ 
 79 | \caption{Histogram for Microdon's raw data}
 80 | \label{fig:histogram}
 81 | \end{figure}
 82 | \begin{figure}
 83 | \centering
 84 | <<fig=T>>=
 85 |  plot(density(microdon))
 86 | @ 
 87 | \caption{Density function for Microdon's raw data.}%
 88 | \label{fig:density}%
 89 |  \end{figure}
 90 | 
 91 | \subsection{Calculating}
 92 | 
 93 | To calculate the \textit{dip} value, simple type:
 94 | <<>>=
 95 | dvalue <- dip(microdon)
 96 | dvalue
 97 | @  
 98 | \section{P-values}\label{sec:pvalues}
 99 | 
100 | The are two equivalent ways to calculate the P-value for this empirical \textit{dip} value. One way would be to inspect the table of quantiles which is given by \texttt{qDiptab}. This table has probability values in the columns and number of observations in the rows. A drawback is that this table only holds dip values for a limited number of observations, and you end having to use a p-value for the number of observations available in the table that is closest to your actual numbers. This should be no problem if your p-value is not poised in a decision boundary (e.g. p = 0.045). If this is the case, then you would be better off by using the second procedure, that involves simulating dip values for that particular number of observations.
101 | 
102 | \subsection{Extracting p-values from \texttt{qDiptab}}\label{sec:qDiptab}
103 | 
104 | In such table, you should locate your empirical value (dip = \Sexpr{round(dvalue,4)}), taking note of its row and column, for they are respectively the number of observations in your dataset (here, n = \Sexpr{length(microdon)}) and the respective Pr value. Then you take one minus the Pr value you find on top of the table, and that is the P value for the hipothesis that your dataset was drawn from a unimodal distribution. A P value of, say, 0.04 would mean that your dataset has only a small chance to belong to an unimodal distribution.
105 | 
106 | 
107 | So, your dip value is (d=\Sexpr{round(dvalue,4)}), and your dataset has \Sexpr{length(microdon)} observations. You will notice that the \texttt{qDiptab} has only a limited set of rows, meaning that you won't get a row for n =  \Sexpr{length(microdon)} observations. You should then look for the closest one, which would the row for n = 30. There you will find the following dip values:
108 | 
109 | <<>>=
110 | n30 <- qDiptab["30",]
111 | n30
112 | @ 
113 | 
114 | In this table, the dip values which are closest to your emprical dip value d = \Sexpr{round(dvalue,4)} are
115 | <<echo=F,results=hide>>=
116 | lrgdvalue <- names(n30[n30>dvalue])[1] # finds the Pr value for dip > empirical dip
117 | lrgdvalue
118 | smldvalue <- names(n30[n30<dvalue]) # finds all Pr values for dip < empirical dip
119 | smldvalue
120 | smldvalue <- smldvalue[length(smldvalue)] # chooses only the smallest Pr value for dip < empirical dip
121 | smldvalue
122 | @ 
123 | \Sexpr{round(n30[[paste(smldvalue)]],4)} (which sits under the column Pr = \Sexpr{smldvalue}),  and  \Sexpr{round(n30[[paste(lrgdvalue)]],4)} (sitting under the column Pr = \Sexpr{lrgdvalue}). That indicates that p-values for your empirical dip value  (d=\Sexpr{round(dvalue,4)}) would be $\Sexpr{1-as.numeric(smldvalue)}<p-value<\Sexpr{1-as.numeric(lrgdvalue)}$, since p-value = 1 - Pr. 
124 | 
125 | \subsection{Calculating p-values}
126 | 
127 | Rather than using the \texttt{qDiptab} table, you could calculate the actual p-values for your number of observations. This is done by drawing several dip values out of a uniform distribution having the same number of observations and, then, compare your empirical dip value to see if it differs from that.
128 | 
129 | First you draw dip values out of 1000 rectangular (uniform) distributions:
130 | <<>>=
131 | rdip <- replicate(1000,dip(runif(length(microdon))))
132 | pvalue <- sum(rdip>dip(microdon))/1000
133 | pvalue
134 | @ 
135 | 
136 | The pvalue now changes to \Sexpr{round(pvalue,4)}, which is actually not too different from that we've calculated at section~\ref{sec:qDiptab} (recall that there we've only approximated the pvalue, because the actual number of observations was not available in \texttt{qDiptab} table).
137 | 
138 | You migth want to take a look on this new set of dip values, and where does your empirical dip value is located. As you can see in Fig.~\ref{fig:simulate}, most of the simulated dip values are actually larger that your empirical dip value. That is, there is \Sexpr{pvalue*100}\% chance to find a dip value larger than yours in this simulation, therefore, we should consider this empirical value to belong to that uniform distribution, and hence from an unimodal data set. You should recall that  that the dip value is actually a mesurement of the difference between the empirical distribution and its closest unimodal distribution. Therefore, we are only concerned about large dip values, since a small dip value actually means that the empirical distribution is very close a theoretical unimodal distribution.
139 | \begin{figure}
140 | <<fig=T>>=
141 | hist(rdip)
142 | abline(v=dip(microdon),lty=2)
143 | text(0.09,200,paste("pvalue",pvalue,sep=" = "))
144 | @ 
145 | \caption{A distribution dip values simulated out of a 1000 uniform distributions. The dashed line shows where your empirical dip value is located.}
146 | \label{fig:simulate}
147 | \end{figure}
148 | 
149 | \section{Acknowledgements}
150 | We thank Martin Maechler for revising this document.
151 | 
152 | \begin{thebibliography}{1}
153 | \providecommand{\natexlab}[1]{#1}
154 | \providecommand{\url}[1]{\texttt{#1}}
155 | \providecommand{\urlprefix}{URL }
156 | \expandafter\ifx\csname urlstyle\endcsname\relax
157 |   \providecommand{\doi}[1]{doi:\discretionary{}{}{}#1}\else
158 |   \providecommand{\doi}{doi:\discretionary{}{}{}\begingroup
159 |   \urlstyle{rm}\Url}\fi
160 | \providecommand{\eprint}[2][]{\url{#2}}
161 | 
162 | \bibitem[{Hartigan \& Hartigan(1985)}]{HartiganHartigan1985Dip}
163 | \textbf{Hartigan, J.A. \& Hartigan, P.M.} (1985) The dip test of unimodality.
164 | \newblock \emph{Annals of Statistics} 13, 70–84.
165 | 
166 | \end{thebibliography}
167 | 
168 | \end{document}
169 | 


--------------------------------------------------------------------------------
/stuff/asymp-distrib.R:
--------------------------------------------------------------------------------
  1 | ####---- More  "full simulations" of the asymptotic limit:  sqrt(n) * D_n
  2 | ####                                     ================
  3 | 
  4 | setwd("/u/maechler/R/Pkgs/diptest/stuff") ## will load all the LARGE   dip<n>k.rda files
  5 | 
  6 | ## These all have  n.sim = 1000'001  samples of
  7 | ## dip(runif(N)) for "large N"
  8 | ## where produced by scripts such as  ./d20k_do.R
  9 | N.k.set <- c(8,12,16,20,24,32,36,40)
 10 | ## or automatically
 11 | patt <- "^dip(.*)k\\.rda"
 12 | N.k.set <- sort(as.integer(sub(patt, "\\1", list.files(pattern = patt))))
 13 | ## for now: drop 5
 14 | if(5 %in% N.k.set) { N.k.set <- N.k.set[N.k.set != 5] }
 15 | N.k.set
 16 | 
 17 | dip.nm <- function(N.k, file=FALSE)
 18 |     paste("dip", N.k, if(file)"k.rda" else "k", sep='')
 19 | for(N.k in N.k.set)
 20 |     load(dip.nm(print(N.k), file=TRUE))
 21 | 
 22 | if(interactive()) # show more interesting table below
 23 | invisible(lapply(dip.nm(N.k.set), function(nm) {cat(nm,": "); str(get(nm)) }))
 24 | ## all are vectors of length 1'000'001
 25 | 
 26 | d.dip <- function(N.k, scaleUp = TRUE)
 27 | {
 28 |     ## Purpose: Simulation data for N = N.k * 1000, possibly sqrt(N) scaled
 29 |     ## ----------------------------------------------------------------------
 30 |     ## Author: Martin Maechler, Date: 15 Apr 2009, 10:36
 31 |     N <- N.k * 1000
 32 |     nm <- dip.nm(N.k)
 33 |     if(scaleUp) sqrt(N) * get(nm) else get(nm)
 34 | }
 35 | Nk2char <- function(Nk) {
 36 |     dput(unname(Nk),
 37 |          textConnection(".val", "w", local=TRUE),
 38 |          control = "S_compatible")# no "10L" but "10"
 39 |     .val
 40 | }
 41 | mFormat <- function(n) sub("([0-9]{3})$", "'\\1", format(n))
 42 | 
 43 | if(!dev.interactive(orNone=TRUE)) pdf("asymp-distrib.pdf")
 44 | 
 45 | names(N.k.set) <- paste(format(N.k.set), "'000", sep='')
 46 | t(sums <- sapply(N.k.set, function(Nk) summary(d.dip(Nk))))
 47 | ##          Min. 1st Qu. Median   Mean 3rd Qu.   Max.
 48 | ##  6'000 0.1777  0.3274 0.3759 0.3876  0.4357 0.9858
 49 | ##  8'000 0.1795  0.3278 0.3765 0.3881  0.4364 0.9772
 50 | ## 10'000 0.1750  0.3280 0.3767 0.3884  0.4368 0.9522
 51 | ## 12'000 0.1779  0.3283 0.3770 0.3887  0.4370 0.9421
 52 | ## 16'000 0.1768  0.3285 0.3774 0.3890  0.4373 1.0080
 53 | ## 20'000 0.1689  0.3288 0.3774 0.3891  0.4373 0.9699
 54 | ## 24'000 0.1814  0.3288 0.3775 0.3893  0.4375 1.0570
 55 | ## 32'000 0.1758  0.3290 0.3779 0.3896  0.4380 0.9757
 56 | ## 36'000 0.1724  0.3295 0.3783 0.3899  0.4382 1.0500
 57 | ## 40'000 0.1705  0.3294 0.3781 0.3899  0.4382 1.0270
 58 | ## 44'000 0.1742  0.3295 0.3782 0.3898  0.4381 0.9693
 59 | ## 52'000 0.1752  0.3295 0.3783 0.3900  0.4383 0.9463
 60 | ## 60'000 0.1755  0.3295 0.3785 0.3901  0.4384 1.0060
 61 | ## 72'000 0.1729  0.3296 0.3785 0.3902  0.4386 1.0100
 62 | 
 63 | ## And the empirical densities of the sqrt(n)-blown up look
 64 | ## "practically" identical:
 65 | plot (density(d.dip(36)), main = "")
 66 | abline(h=0, col="dark gray", lty=3)
 67 | for(N.k in head(N.k.set,-1))
 68 |     lines(density(d.dip(N.k)))
 69 | title(expression("Dip  distributions"~~~ sqrt(n) * D[n] ~~~
 70 |     "superimposed for"))
 71 | mtext(paste(" n = 1000 *", Nk2char(N.k.set)), line = .5)
 72 | 
 73 | ## Could Log-normal fit ?
 74 | plot (d <- density(log(d.dip(max(N.k.set)))), main="")
 75 | abline(h=0, col="dark gray", lty=3)
 76 | for(N.k in head(N.k.set,-1))
 77 |     lines(density(log(d.dip(N.k)), bw = d$bw))
 78 | title(expression("log(Dip)  distributions"~~~ log(sqrt(n) * D[n]) ~~~
 79 |     "superimposed for"))
 80 | mtext(paste(" n = 1000 *", Nk2char(N.k.set)), line = .5)
 81 | 
 82 | 
 83 | ## Now only for larger N:
 84 | (N.k.L <- tail(N.k.set, 4))
 85 | plot (d <- density(log(d.dip(max(N.k.L)))), main="")
 86 | abline(h=0, col="dark gray", lty=3)
 87 | for(N.k in head(N.k.L, -1))
 88 |     lines(density(log(d.dip(N.k)), bw = d$bw))
 89 | title(expression("log(Dip)  distributions"~~~ log(sqrt(n) * D[n]) ~~~
 90 |     "superimposed for"))
 91 | mtext(paste(" n = 1000 *", Nk2char(N.k.L)), line = .5)
 92 | ## -- below, we *add* to this plot !
 93 | 
 94 | ## looks quite symmetric (but is not quite, see below!)
 95 | 
 96 | x <- sort(sqrt(20000)* dip20k) # sorting, just for possible convenience
 97 | lx <- log(x)
 98 | 
 99 | if(FALSE) { ## Attention! Plotting 1 mio points on Xlib("cairo") is deadly!
100 |     x11(type = "Xlib")
101 |     qqnorm(lx) #-> already clear that (log)normal does  NOT fit
102 | }
103 | library(MASS)
104 | fd <- fitdistr(x, "lognormal")
105 | fd
106 | ##       meanlog          sdlog
107 | ##   -0.9656529434    0.2074114508
108 | ##  ( 0.0002074113) ( 0.0001466620)
109 | logLik(fd) # 1119766
110 | 
111 | fd. <- fitdistr(lx, "normal")
112 | fd. # exact same parameters as above
113 | logLik(fd.) # 154112.4 (df=2) --- very different to  fd's
114 | 
115 | dlnormFit <- function(x) do.call(dlnorm, c(list(x=x), coef(fd)))
116 | dnormFit <- function(x) do.call(dnorm, c(list(x=x), coef(fd.)))
117 | curve(dnormFit, add = TRUE, col = "tomato")
118 | mtext(expression(dnorm(paste(symbol("\341"),# == "left angle", see ?plotmath,
119 |                              "fit to ", log(sqrt(n) * D[n]),
120 |                              symbol("\361")# == right angle
121 |     ))), col = "tomato", line = -1, adj = .95)
122 | ##-->  log-normal clearly does *not* fit !
123 | 
124 | 
125 | fdg <- fitdistr(x, "gamma") ## this takes a few seconds!
126 | fdg
127 | ##       shape         rate
128 | ##   23.09376812   59.34742081
129 | ##  ( 0.03242641) ( 0.08424100)
130 | dgammaFit <- function(x) do.call(dgamma, c(list(x=x), coef(fdg)))
131 | 
132 | fdw <- fitdistr(x, "weibull") ## this takes a few seconds!
133 | fdw <- fitdistr(x, "weibull", control=list(trace=2))
134 | fdw
135 | ##       shape          scale
136 | ##   4.665147e+00   4.231184e-01
137 | ##  (3.277802e-03) (9.629305e-05)
138 | dweibullFit <- function(x) do.call(dweibull, c(list(x=x), coef(fdw)))
139 | 
140 | ## In original scale :
141 | plot (density(d.dip(36)))
142 | lines(density(d.dip(32)), col="gray")
143 | lines(density(d.dip(24)), col="gray30")
144 | 
145 | curve(dlnormFit, add = TRUE, col = "tomato")
146 | ## does not fit
147 | curve(dgammaFit, add = TRUE, col = "blue3")
148 | ## is even worse than log-normal
149 | curve(dweibullFit, add = TRUE, col = "forest green")
150 | ## is much worse even
151 | 
152 | 
153 | ###---------> "back" to look at  CDFs ---------------------------------
154 | 
155 | Fn12k <- ecdf(d.dip(12))
156 | Fn20k <- ecdf(d.dip(20))
157 | Fn24k <- ecdf(d.dip(24))
158 | Fn32k <- ecdf(d.dip(32))
159 | Fn36k <- ecdf(d.dip(36))
160 | 
161 | 
162 | plot(Fn20k, do.points = FALSE)# still slow
163 | lines(ecdf(sqrt(12000) * dip12k), col=2, do.points = FALSE)
164 | 
165 | ks.test(sqrt(12000) * dip12k,
166 |         sqrt(20000) * dip20k)
167 | ## 	Two-sample Kolmogorov-Smirnov test
168 | 
169 | ## data:  sqrt(12000) * dip12k and sqrt(20000) * dip20k
170 | ## D = 0.0029, p-value = 0.0004664  <<<--- ***
171 | ## alternative hypothesis: two-sided
172 | 
173 | ## and comparing the 8000 with 12'000 case even
174 | ## has  p-value = 4.98 e-5
175 | 
176 | ## Now, with new "24'000":
177 | ks.test(sqrt(24000) * dip24k,
178 |         sqrt(20000) * dip20k)  ## Heureka !
179 | ##
180 | ## 	Two-sample Kolmogorov-Smirnov test
181 | 
182 | ## data:  sqrt(24000) * dip24k and sqrt(20000) * dip20k
183 | ## D = 0.0014, p-value = 0.292
184 | ## alternative hypothesis: two-sided
185 | ##
186 | 
187 | ##--- Compute P-values of all pairwise KS tests: -- takes several minutes!!
188 | m <- length(N.k.set)
189 | P.vals <- matrix(NA, m,m, dimnames = list(names(N.k.set),names(N.k.set)))
190 | diag(P.vals) <- 0
191 | for(i in 1:(m-1)) {
192 |     cat("i = ", i)
193 |     d.i <- d.dip(N.k.set[i])
194 |     for(j in (i+1):m) {
195 |         cat(".")
196 |         p <- ks.test(d.i, d.dip(N.k.set[j])) $ p.value
197 |         P.vals[i,j] <- P.vals[j,i] <- p
198 |     }; cat("\n")
199 | }
200 | 
201 | Matrix::Matrix(round(100* P.vals[,7:14, drop=FALSE],1))
202 | ##        24'000 32'000 36'000 40'000 44'000 52'000 60'000 72'000
203 | ##  6'000    .      .      .      .      .      .      .      .
204 | ##  8'000    .      .      .      .      .      .      .      .
205 | ## 10'000    .      .      .      .      .      .      .      .
206 | ## 12'000    .      .      .      .      .      .      .      .
207 | ## 16'000    3.0    .      .      .      .      .      .      .
208 | ## 20'000   29.2    .      .      .      .      .      .      .
209 | ## 24'000    .      2.6    .      .      .      .      .      .
210 | ## 32'000    2.6    .      0.2    2.1    0.1    .      .      .
211 | ## 36'000    .      0.2    .     40.8   47.1   87.0   13.6   20.9
212 | ## 40'000    .      2.1   40.8    .     45.8    8.6    0.9    1.4
213 | ## 44'000    .      0.1   47.1   45.8    .     20.4    1.9    2.4
214 | ## 52'000    .      .     87.0    8.6   20.4    .     34.2   57.5
215 | ## 60'000    .      .     13.6    0.9    1.9   34.2    .     57.1
216 | ## 72'000    .      .     20.9    1.4    2.4   57.5   57.1    .
217 | 
218 | ## Hmm,  .. why is 36'000 different from 32'000 ??
219 | 
220 | save(P.vals, fd, fd., fdg, fdw,
221 |      file = "asymp-res.rda")
222 | 
223 | (xl. <- extendrange(d.dip(20)))
224 | xi <- seq(xl.[1], xl.[2], length.out = 2000)
225 | 
226 | plot(xi, Fn20k(xi) - Fn12k(xi), type = "l", col = 2)
227 | ## looks like the difference is largest where the density is large:
228 | den20k <- density(sqrt(20000)* dip20k,
229 |                   n = 2048, from = xl.[1], to = xl.[2], cut = 1)
230 | f.scale <- .99 * par("usr")[3] / max(den20k$y)
231 | with(den20k, lines(f.scale * y ~ x, col = "pink"))
232 | 
233 | ## Graphical "KS test"
234 | plot(xi, Fn24k(xi) - Fn20k(xi), type = "l", col = 2)
235 | ## looks much less systematic ...
236 | f.scale <- .99 * par("usr")[3] / max(den20k$y)
237 | with(den20k, lines(f.scale * y ~ x, col = "pink"))
238 | 
239 | ## 36k  vs  32k  which looks "bad" in  KS test:
240 | plot(xi, Fn36k(xi) - Fn32k(xi), type = "l", col = 2)
241 | ## looks much less systematic ...
242 | f.scale <- .99 * par("usr")[3] / max(den20k$y)
243 | with(den20k, lines(f.scale * y ~ x, col = "pink"))
244 | 
245 | ##--- Ok, look even closer for systematic:
246 | require(RColorBrewer)
247 | 
248 | Nmax <- max(N.k.set)# too large now
249 | ## rather just
250 | Nmax <- 44
251 | Fn <- ecdf(d.dip(Nmax))
252 | Fn.xi <- Fn(xi)
253 | ii <- seq_along(Ns <- N.k.set[N.k.set < Nmax])
254 | opal <- palette(brewer.pal(length(Ns), "Set3"))# "Dark2" if  nColor <= 8
255 | 
256 | yrng <- range(Fn.xi - ecdf(d.dip(min(Ns)))(xi),
257 |               Fn.xi - ecdf(d.dip(max(Ns)))(xi))
258 | plot(range(xi), yrng, type = "n",
259 |      ylab = "", xlab = "D_N  =^=  dip( runif(N) )",
260 |      main = paste("F_n(D_{N=",Nmax,"'000})  -  F_n(D_N) ;   n=",
261 |                   mFormat(length(d.dip(Nmax))), sep=''))
262 | abline(h=0, col="gray")
263 | for(i in ii)
264 |     lines(xi, Fn(xi) - ecdf(d.dip(Ns[i]))(xi), col = i+1)
265 | legend("right", legend=rev(paste("N = ", format(Ns), "'000", sep='')),
266 |        col= rev(ii+1), lty=1, lwd=2, inset = .05)
267 | 
268 | palette(opal)
269 | 
270 | ## How does the upper tail look?
271 | ## log(1 - P) = log(P{X >= x})
272 | 


--------------------------------------------------------------------------------
/src/dip.c:
--------------------------------------------------------------------------------
  1 | /*   ALGORITHM AS 217 APPL. STATIST. (1985) VOL.34, NO.3
  2 | 
  3 |   @article{HarP85,
  4 |      author = {P. M. Hartigan},
  5 |      title = {Computation of the Dip Statistic to Test for Unimodality},
  6 |      year = 1985,
  7 |      journal = {Applied Statistics},
  8 |      pages = {320--325},
  9 |      volume = 34 }
 10 |   @article{HarJH85,
 11 |      author = {J. A. Hartigan and P. M. Hartigan},
 12 |      title = {The Dip Test of Unimodality},
 13 |      year = 1985,
 14 |      journal = {Ann. of Statistics},
 15 |      pages = {70--84},
 16 |      volume = 13 }
 17 | 
 18 |   Does the dip calculation for an ordered vector X using the
 19 |   greatest convex minorant and the least concave majorant, skipping
 20 |   through the data using the change points of these distributions.
 21 | 
 22 |   It returns the dip statistic 'DIP' and the modal interval (XL, XU).
 23 | 				===			     ======
 24 | 
 25 |    dip.f -- translated by f2c (version of 22 July 1992	22:54:52).
 26 | 
 27 |    Pretty-Edited and extended (debug argument)
 28 |    by Martin Maechler <maechler@stat.math.ethz.ch>
 29 |       ETH Seminar fuer Statistik
 30 |       8092 Zurich	 SWITZERLAND
 31 | 
 32 | ---------------
 33 | 
 34 |    Two Bug Fixes:
 35 |        =========
 36 | 
 37 |    1)	July 30 1994 : For unimodal data, gave "infinite loop"  (end of code)
 38 |    2)	Oct  31 2003 : Yong Lu <lyongu+@cs.cmu.edu> : ")" typo in Fortran
 39 |                        gave wrong result (larger dip than possible) in some cases
 40 | */
 41 | 
 42 | #include <R.h>
 43 | 
 44 | // for the "registration part":
 45 | #include <Rinternals.h>
 46 | 
 47 | #include <R_ext/Rdynload.h>
 48 | 
 49 | 
 50 | /* Subroutine */
 51 | void diptst(const double x[], const int *n_,
 52 | 	    double *dip, int *lo_hi, int *ifault,
 53 | 	    int *gcm, int *lcm, int *mn, int *mj,
 54 | 	    const int *min_is_0, const int *debug)
 55 | {
 56 | #define low   lo_hi[0]
 57 | #define high  lo_hi[1]
 58 | #define l_gcm lo_hi[2]
 59 | #define l_lcm lo_hi[3]
 60 | 
 61 |     const int n = *n_;
 62 |     int mnj, mnmnj, mjk, mjmjk, ig, ih, iv, ix,  i, j, k;
 63 |     double dip_l, dip_u, dipnew;
 64 | 
 65 |     /* Parameter adjustments, so I can do "as with index 1" : x[1]..x[n] */
 66 |     --mj;    --mn;
 67 |     --lcm;   --gcm;
 68 |     --x;
 69 | 
 70 | /*-------- Function Body ------------------------------ */
 71 | 
 72 |     *ifault = 1;    if (n <= 0) return;
 73 |     *ifault = 0;
 74 | 
 75 | /* Check that X is sorted --- if not, return with  ifault = 2*/
 76 | 
 77 |     *ifault = 2;    for (k = 2; k <= n; ++k) if (x[k] < x[k - 1]) return;
 78 |     *ifault = 0;
 79 | 
 80 | /* Check for all values of X identical, */
 81 | /*     and for 1 <= n < 4. */
 82 | 
 83 | /* LOW contains the index of the current estimate  of the lower end
 84 |    of the modal interval, HIGH contains the index for the upper end.
 85 | */
 86 |     low = 1;	high = n; /*-- IDEA:  *xl = x[low];    *xu = x[high]; --*/
 87 | 
 88 | /* M.Maechler -- speedup: it saves many divisions by n when we just work with
 89 |  * (2n * dip) everywhere but the very end! */
 90 |     *dip = (*min_is_0) ? 0. : 1.;
 91 |     if (n < 2 || x[n] == x[1])		goto L_END;
 92 | 
 93 |     if(*debug)
 94 | 	Rprintf("dip() in C: n = %d; starting with  2N*dip = %g.\n",
 95 | 		n, *dip);
 96 | 
 97 | /* Establish the indices   mn[1..n]  over which combination is necessary
 98 |    for the convex MINORANT (GCM) fit.
 99 | */
100 |     mn[1] = 1;
101 |     for (j = 2; j <= n; ++j) {
102 | 	mn[j] = j - 1;
103 | 	while(1) {
104 | 	  mnj = mn[j];
105 | 	  mnmnj = mn[mnj];
106 | 	  if (mnj == 1 ||
107 | 	      ( x[j]  - x[mnj]) * (mnj - mnmnj) <
108 | 	      (x[mnj] - x[mnmnj]) * (j - mnj)) break;
109 | 	  mn[j] = mnmnj;
110 | 	}
111 |     }
112 | 
113 | /* Establish the indices   mj[1..n]  over which combination is necessary
114 |    for the concave MAJORANT (LCM) fit.
115 | */
116 |     mj[n] = n;
117 |     for (k = n - 1; k >= 1; k--) {
118 | 	mj[k] = k + 1;
119 | 	while(1) {
120 | 	  mjk = mj[k];
121 | 	  mjmjk = mj[mjk];
122 | 	  if (mjk == n ||
123 | 	      ( x[k]  - x[mjk]) * (mjk - mjmjk) <
124 | 	      (x[mjk] - x[mjmjk]) * (k - mjk)) break;
125 | 	  mj[k] = mjmjk;
126 | 	}
127 |     }
128 | 
129 | /* ----------------------- Start the cycling. ------------------------------- */
130 | LOOP_Start:
131 | 
132 |     /* Collect the change points for the GCM from HIGH to LOW. */
133 |     gcm[1] = high;
134 |     for(i = 1; gcm[i] > low; i++)
135 | 	gcm[i+1] = mn[gcm[i]];
136 |     ig = l_gcm = i; // l_gcm == relevant_length(GCM)
137 |     ix = ig - 1; //  ix, ig  are counters for the convex minorant.
138 | 
139 |     /* Collect the change points for the LCM from LOW to HIGH. */
140 |     lcm[1] = low;
141 |     for(i = 1; lcm[i] < high; i++)
142 | 	lcm[i+1] = mj[lcm[i]];
143 |     ih = l_lcm = i; // l_lcm == relevant_length(LCM)
144 |     iv = 2; //  iv, ih  are counters for the concave majorant.
145 | 
146 |     if(*debug) {
147 | 	Rprintf("'dip': LOOP-BEGIN: 2n*D= %-8.5g  [low,high] = [%3d,%3d]", *dip, low,high);
148 | 	if(*debug >= 3) {
149 | 	    Rprintf(" :\n gcm[1:%d] = ", l_gcm);
150 | 	    for(i = 1; i <= l_gcm; i++) Rprintf("%d%s", gcm[i], (i < l_gcm)? ", " : "\n");
151 | 	    Rprintf(" lcm[1:%d] = ", l_lcm);
152 | 	    for(i = 1; i <= l_lcm; i++) Rprintf("%d%s", lcm[i], (i < l_lcm)? ", " : "\n");
153 | 	} else { // debug <= 2 :
154 | 	    Rprintf("; l_lcm/gcm = (%2d,%2d)\n", l_lcm,l_gcm);
155 | 	}
156 |     }
157 | 
158 | /*	Find the largest distance greater than 'DIP' between the GCM and
159 |  *	the LCM from LOW to HIGH. */
160 | 
161 |     // FIXME: <Rconfig.h>  should provide LDOUBLE or something like it
162 |     long double d = 0.;// <<-- see if this makes 32-bit/64-bit difference go..
163 |     if (l_gcm != 2 || l_lcm != 2) {
164 | 	if(*debug) Rprintf("  while(gcm[ix] != lcm[iv]) :%s",
165 | 			   (*debug >= 2) ? "\n" : " ");
166 |       do { /* gcm[ix] != lcm[iv]  (after first loop) */
167 | 	  long double dx;
168 | 	  int gcmix = gcm[ix],
169 | 	      lcmiv = lcm[iv];
170 | 	  if (gcmix > lcmiv) {
171 | 	      /* If the next point of either the GCM or LCM is from the LCM,
172 | 	       * calculate the distance here. */
173 | 	      int gcmi1 = gcm[ix + 1];
174 | 	      dx = (lcmiv - gcmi1 + 1) -
175 | 		  ((long double) x[lcmiv] - x[gcmi1]) * (gcmix - gcmi1)/(x[gcmix] - x[gcmi1]);
176 | 	      ++iv;
177 | 	      if (dx >= d) {
178 | 		  d = dx;
179 | 		  ig = ix + 1;
180 | 		  ih = iv - 1;
181 | 		  if(*debug >= 2) Rprintf(" L(%d,%d)", ig,ih);
182 | 	      }
183 | 	  }
184 | 	  else {
185 | 	      /* If the next point of either the GCM or LCM is from the GCM,
186 | 	       * calculate the distance here. */
187 | 	      int lcmiv1 = lcm[iv - 1];
188 | /* Fix by Yong Lu {symmetric to above!}; original Fortran: only ")" misplaced! :*/
189 | 	      dx = ((long double)x[gcmix] - x[lcmiv1]) * (lcmiv - lcmiv1) /
190 | 		  (x[lcmiv] - x[lcmiv1])- (gcmix - lcmiv1 - 1);
191 | 	      --ix;
192 | 	      if (dx >= d) {
193 | 		  d = dx;
194 | 		  ig = ix + 1;
195 | 		  ih = iv;
196 | 		  if(*debug >= 2) Rprintf(" G(%d,%d)", ig,ih);
197 | 	      }
198 | 	  }
199 | 	  if (ix < 1)	ix = 1;
200 | 	  if (iv > l_lcm)	iv = l_lcm;
201 | 	  if(*debug) {
202 | 	      if(*debug >= 2) Rprintf(" --> ix = %d, iv = %d\n", ix,iv);
203 | 	      else Rprintf(".");
204 | 	  }
205 |       } while (gcm[ix] != lcm[iv]);
206 |       if(*debug && *debug < 2) Rprintf("\n");
207 |     }
208 |     else { /* l_gcm or l_lcm == 2 */
209 | 	d = (*min_is_0) ? 0. : 1.;
210 | 	if(*debug)
211 | 	    Rprintf("  ** (l_lcm,l_gcm) = (%d,%d) ==> d := %g\n", l_lcm, l_gcm, (double)d);
212 |     }
213 | 
214 |     if (d < *dip)	goto L_END;
215 | 
216 | /*     Calculate the DIPs for the current LOW and HIGH. */
217 |     if(*debug) Rprintf("  calculating dip ..");
218 | 
219 |     int j_best, j_l = -1, j_u = -1;
220 | 
221 |     /* The DIP for the convex minorant. */
222 |     dip_l = 0.;
223 |     for (j = ig; j < l_gcm; ++j) {
224 | 	double max_t = 1.;
225 | 	int j_ = -1, jb = gcm[j + 1], je = gcm[j];
226 | 	if (je - jb > 1 && x[je] != x[jb]) {
227 | 	  double C = (je - jb) / (x[je] - x[jb]);
228 | 	  for (int jj = jb; jj <= je; ++jj) {
229 | 	    double t = (jj - jb + 1) - (x[jj] - x[jb]) * C;
230 | 	    if (max_t < t) {
231 | 		max_t = t; j_ = jj;
232 | 	    }
233 | 	  }
234 | 	}
235 | 	if (dip_l < max_t) {
236 | 	    dip_l = max_t; j_l = j_;
237 | 	}
238 |     }
239 | 
240 |     /* The DIP for the concave majorant. */
241 |     dip_u = 0.;
242 |     for (j = ih; j < l_lcm; ++j) {
243 | 	double max_t = 1.;
244 | 	int j_ = -1, jb = lcm[j], je = lcm[j + 1];
245 | 	if (je - jb > 1 && x[je] != x[jb]) {
246 | 	  double C = (je - jb) / (x[je] - x[jb]);
247 | 	  for (int jj = jb; jj <= je; ++jj) {
248 | 	    double t = (x[jj] - x[jb]) * C - (jj - jb - 1);
249 | 	    if (max_t < t) {
250 | 		max_t = t; j_ = jj;
251 | 	    }
252 | 	  }
253 | 	}
254 | 	if (dip_u < max_t) {
255 | 	    dip_u = max_t; j_u = j_;
256 | 	}
257 |     }
258 | 
259 |     if(*debug) Rprintf(" (dip_l, dip_u) = (%g, %g)", dip_l, dip_u);
260 | 
261 |     /* Determine the current maximum. */
262 |     if(dip_u > dip_l) {
263 | 	dipnew = dip_u; j_best = j_u;
264 |     } else {
265 | 	dipnew = dip_l; j_best = j_l;
266 |     }
267 |     if (*dip < dipnew) {
268 | 	*dip = dipnew;
269 | 	if(*debug)
270 | 	    Rprintf(" -> new larger dip %g (j_best = %d)\n", dipnew, j_best);
271 |     }
272 |     else if(*debug) Rprintf("\n");
273 | 
274 |     /*--- The following if-clause is NECESSARY  (may loop infinitely otherwise)!
275 |       --- Martin Maechler, Statistics, ETH Zurich, July 30 1994 ---------- */
276 |     if (low == gcm[ig] && high == lcm[ih]) {
277 |       if(*debug)
278 | 	Rprintf("No improvement in  low = %d  nor  high = %d --> END\n",
279 | 		low, high);
280 |     } else {
281 | 	low  = gcm[ig];
282 | 	high = lcm[ih];	   goto LOOP_Start; /* Recycle */
283 |     }
284 | /*---------------------------------------------------------------------------*/
285 | 
286 | L_END:
287 |     /* do this in the caller :
288 |      *   *xl = x[low];  *xu = x[high];
289 |      * rather return the (low, high) indices -- automagically via lo_hi[]  */
290 |     *dip /= (2*n);
291 |     return;
292 | } /* diptst */
293 | #undef low
294 | #undef high
295 | 
296 | //----------------- Registration <==> ../NAMESPACE
297 | #define CDEF(name)  {#name, (DL_FUNC) &name, sizeof(name ## _t)/sizeof(name ## _t[0]), name ##_t}
298 | 
299 | #define CALLDEF(name, n)  {#name, (DL_FUNC) &name, n}
300 | 
301 | 
302 | // void diptst(double *x, int *n_,
303 | // 	    double *dip, int *lo_hi, int *ifault,
304 | // 	    int *gcm, int *lcm, int *mn, int *mj,
305 | // 	    int *min_is_0, int *debug)
306 | static R_NativePrimitiveArgType diptst_t[] = {
307 |     REALSXP, INTSXP, /* dip: */ REALSXP, INTSXP, INTSXP,
308 |     /* gcm: */ INTSXP, INTSXP,  INTSXP, INTSXP,
309 |     /* min_is_0:*/ LGLSXP, INTSXP
310 | };
311 | 
312 | 
313 | static const R_CMethodDef CEntries[]  = {
314 |     CDEF(diptst),
315 |     {NULL, NULL, 0}
316 | };
317 | 
318 | /* static R_CallMethodDef CallEntries[] = { */
319 | /*     CALLDEF(sinc_c, 1), */
320 | 
321 | /*     {NULL, NULL, 0} */
322 | /* }; */
323 | 
324 | /**
325 |  * register routines
326 |  * @param dll pointer
327 |  * @return none
328 |  * @author Martin Maechler
329 |  */
330 | void
331 | #ifdef HAVE_VISIBILITY_ATTRIBUTE
332 | __attribute__ ((visibility ("default")))
333 | #endif
334 | R_init_diptest(DllInfo *dll)
335 | {
336 |     R_registerRoutines(dll, CEntries, NULL, NULL, NULL);
337 |     /* R_registerRoutines(dll, CEntries, CallEntries, NULL, NULL); */
338 |     R_useDynamicSymbols(dll, FALSE);
339 | }
340 | 


--------------------------------------------------------------------------------
/tests/ex1.Rout.save-32b:
--------------------------------------------------------------------------------
  1 | 
  2 | R version 2.15.1 Patched (2012-08-11 r60235) -- "Roasted Marshmallows"
  3 | Copyright (C) 2012 The R Foundation for Statistical Computing
  4 | ISBN 3-900051-07-0
  5 | Platform: i686-pc-linux-gnu (32-bit)
  6 | 
  7 | R is free software and comes with ABSOLUTELY NO WARRANTY.
  8 | You are welcome to redistribute it under certain conditions.
  9 | Type 'license()' or 'licence()' for distribution details.
 10 | 
 11 | R is a collaborative project with many contributors.
 12 | Type 'contributors()' for more information and
 13 | 'citation()' on how to cite R or R packages in publications.
 14 | 
 15 | Type 'demo()' for some demos, 'help()' for on-line help, or
 16 | 'help.start()' for an HTML browser interface to help.
 17 | Type 'q()' to quit R.
 18 | 
 19 | > library(diptest)
 20 | > 
 21 | > stopifnot(dip(c(1,1,2,2)) == 1/4)# the maximal value possible: two point dist
 22 | > 
 23 | > ## very first small "unimodal" example --- the  1/(2*n) result:
 24 | > n <- length(u <- cumsum(0:3))
 25 | > d <- dip(u, debug=TRUE)# shows the final if() {added by MM} is really needed
 26 | dip() in C: n = 4; starting with  2N*dip = 1.
 27 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  4]; l_lcm/gcm = ( 4, 2)
 28 |   while(gcm[ix] != lcm[iv]) : ..
 29 |   calculating dip .. (dip_l, dip_u) = (0, 1)
 30 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  3]; l_lcm/gcm = ( 3, 2)
 31 |   while(gcm[ix] != lcm[iv]) : .
 32 |   calculating dip .. (dip_l, dip_u) = (0, 1)
 33 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  2]; l_lcm/gcm = ( 2, 2)
 34 |   ** (l_lcm,l_gcm) = (2,2) ==> d := 1
 35 |   calculating dip .. (dip_l, dip_u) = (0, 0)
 36 | No improvement in  low = 1  nor  high = 2 --> END
 37 | > stopifnot(d == dip(-u), d == 1/(2*n))# exact "=" for n = 4 !
 38 | > ## Note that I believe this should *not* give 0 (as fmechler@.. did),
 39 | > ## but rather 1/(2n) because that's  (1/n) / 2  and
 40 | > ## (1/n) is the correct distance between LCM and GCM
 41 | > 
 42 | > ## Small example -- but MM sees difference (32-bit / 64-bit):
 43 | > x <- c(0,2:3,5:6)
 44 | > d1 <- dip(x,   full=TRUE, debug=2)
 45 | dip() in C: n = 5; starting with  2N*dip = 1.
 46 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  5]; l_lcm/gcm = ( 2, 4)
 47 |   while(gcm[ix] != lcm[iv]) :
 48 |  G(3,2) --> ix = 2, iv = 2
 49 |  --> ix = 1, iv = 2
 50 |   calculating dip .. (dip_l, dip_u) = (1, 0)
 51 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  2,  5]; l_lcm/gcm = ( 3, 3)
 52 |   while(gcm[ix] != lcm[iv]) :
 53 |  L(3,2) --> ix = 2, iv = 3
 54 |  G(2,3) --> ix = 1, iv = 3
 55 |   calculating dip .. (dip_l, dip_u) = (1.33333, 0) -> new larger dip 1.33333 (j_best = 3)
 56 | 'dip': LOOP-BEGIN: 2n*D= 1.3333    [low,high] = [  4,  5]; l_lcm/gcm = ( 2, 2)
 57 |   ** (l_lcm,l_gcm) = (2,2) ==> d := 1
 58 | > d2 <- dip(6-x, full=TRUE, debug=2)
 59 | dip() in C: n = 5; starting with  2N*dip = 1.
 60 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  5]; l_lcm/gcm = ( 4, 2)
 61 |   while(gcm[ix] != lcm[iv]) :
 62 |  L(2,2) --> ix = 1, iv = 3
 63 |  L(2,3) --> ix = 1, iv = 4
 64 |   calculating dip .. (dip_l, dip_u) = (0, 1)
 65 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  4]; l_lcm/gcm = ( 3, 3)
 66 |   while(gcm[ix] != lcm[iv]) :
 67 |  L(3,2) --> ix = 2, iv = 3
 68 |  G(2,3) --> ix = 1, iv = 3
 69 |   calculating dip .. (dip_l, dip_u) = (1.33333, 0) -> new larger dip 1.33333 (j_best = 2)
 70 | 'dip': LOOP-BEGIN: 2n*D= 1.3333    [low,high] = [  3,  4]; l_lcm/gcm = ( 2, 2)
 71 |   ** (l_lcm,l_gcm) = (2,2) ==> d := 1
 72 | > str(d1)
 73 | List of 15
 74 |  $ call       : language dip(x = x, full.result = TRUE, debug = 2)
 75 |  $ x          : num [1:5] 0 2 3 5 6
 76 |  $ n          : int 5
 77 |  $ dip        : num 0.133
 78 |  $ lo.hi      : int [1:2] 4 5
 79 |  $ ifault     : int 0
 80 |  $ gcm        : int [1:2] 5 4
 81 |  $ lcm        : int [1:2] 4 5
 82 |  $ mn         : int [1:5] 1 1 2 2 4
 83 |  $ mj         : int [1:5] 5 3 5 5 5
 84 |  $ min.is.0   : logi FALSE
 85 |  $ debug      : int 2
 86 |  $ xl         : num 5
 87 |  $ xu         : num 6
 88 |  $ full.result: logi TRUE
 89 |  - attr(*, "class")= chr "dip"
 90 | > str(d2)
 91 | List of 15
 92 |  $ call       : language dip(x = 6 - x, full.result = TRUE, debug = 2)
 93 |  $ x          : num [1:5] 0 1 3 4 6
 94 |  $ n          : int 5
 95 |  $ dip        : num 0.133
 96 |  $ lo.hi      : int [1:2] 3 4
 97 |  $ ifault     : int 0
 98 |  $ gcm        : int [1:2] 4 3
 99 |  $ lcm        : int [1:2] 3 4
100 |  $ mn         : int [1:5] 1 1 1 3 1
101 |  $ mj         : int [1:5] 2 4 4 5 5
102 |  $ min.is.0   : logi FALSE
103 |  $ debug      : int 2
104 |  $ xl         : num 3
105 |  $ xu         : num 4
106 |  $ full.result: logi TRUE
107 |  - attr(*, "class")= chr "dip"
108 | > 
109 | > if(!dev.interactive(orNone=TRUE)) pdf("ex1.pdf")
110 | > par(mfrow = 2:1, mar = .1+c(3,4,2,1), mgp=c(1.5,.6,0), oma = c(0,0,2.1,0))
111 | > #
112 | > plot(d1)
113 | > abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray")
114 | > #
115 | > plot(d2)
116 | > abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray")
117 | > #
118 | > ## "title" only now
119 | > mtext("dip() problem with 'mirror x'", side=3, line = 0.8,
120 | +       outer=TRUE, cex = 1.5, font = 2)
121 | > 
122 | > 
123 | > ##  Yong Lu <lyongu+@cs.cmu.edu> example -- a bit smaller
124 | > x2 <- c(1, rep(2, 9))
125 | > stopifnot(dip(x2) == dip(3 - x2))
126 | > str(dip(x2, full=TRUE))
127 | List of 15
128 |  $ call       : language dip(x = x2, full.result = TRUE)
129 |  $ x          : num [1:10] 1 2 2 2 2 2 2 2 2 2
130 |  $ n          : int 10
131 |  $ dip        : num 0.05
132 |  $ lo.hi      : int [1:2] 2 10
133 |  $ ifault     : int 0
134 |  $ gcm        : int [1:2] 10 2
135 |  $ lcm        : int [1:2] 2 10
136 |  $ mn         : int [1:10] 1 1 2 2 2 2 2 2 2 2
137 |  $ mj         : int [1:10] 10 10 10 10 10 10 10 10 10 10
138 |  $ min.is.0   : logi FALSE
139 |  $ debug      : int 0
140 |  $ xl         : num 2
141 |  $ xu         : num 2
142 |  $ full.result: logi TRUE
143 |  - attr(*, "class")= chr "dip"
144 | > cat('Time elapsed: ', (.pt <- proc.time()),'\n') # "stats"
145 | Time elapsed:  0.41 0.042 0.504 0 0.008 
146 | > 
147 | > ## Real data examples :
148 | > 
149 | > data(statfaculty)
150 | > 
151 | > str(dip(statfaculty, full = "all", debug = 3), vec.len = 8)
152 | dip() in C: n = 63; starting with  2N*dip = 1.
153 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1, 63] :
154 |  gcm[1:6] = 63, 62, 7, 3, 2, 1
155 |  lcm[1:5] = 1, 44, 58, 59, 63
156 |   while(gcm[ix] != lcm[iv]) :
157 |  G(5,2) --> ix = 4, iv = 2
158 |  G(4,2) --> ix = 3, iv = 2
159 |  G(3,2) --> ix = 2, iv = 2
160 |  L(3,2) --> ix = 2, iv = 3
161 |  L(3,3) --> ix = 2, iv = 4
162 |  --> ix = 2, iv = 5
163 |  --> ix = 1, iv = 5
164 |   calculating dip .. (dip_l, dip_u) = (2, 2.11111) -> new larger dip 2.11111 (j_best = 61)
165 | 'dip': LOOP-BEGIN: 2n*D= 2.1111    [low,high] = [  7, 58] :
166 |  gcm[1:5] = 58, 55, 51, 48, 7
167 |  lcm[1:6] = 7, 11, 15, 42, 44, 58
168 |   while(gcm[ix] != lcm[iv]) :
169 |  L(5,2) --> ix = 4, iv = 3
170 |  L(5,3) --> ix = 4, iv = 4
171 |  L(5,4) --> ix = 4, iv = 5
172 |  L(5,5) --> ix = 4, iv = 6
173 |  --> ix = 3, iv = 6
174 |  --> ix = 2, iv = 6
175 |  --> ix = 1, iv = 6
176 |   calculating dip .. (dip_l, dip_u) = (0, 7.5) -> new larger dip 7.5 (j_best = 48)
177 | 'dip': LOOP-BEGIN: 2n*D= 7.5       [low,high] = [  7, 44] :
178 |  gcm[1:4] = 44, 43, 38, 7
179 |  lcm[1:5] = 7, 11, 15, 42, 44
180 |   while(gcm[ix] != lcm[iv]) :
181 |  L(4,2) --> ix = 3, iv = 3
182 |  L(4,3) --> ix = 3, iv = 4
183 |  --> ix = 2, iv = 4
184 |  --> ix = 2, iv = 5
185 |  --> ix = 1, iv = 5
186 | List of 17
187 |  $ call       : language dip(x = statfaculty, full.result = "all", debug = 3)
188 |  $ x          : num [1:63] 30 33 35 36 37 37 39 39 39 39 39 40 40 40 40 41 42 43 43 43 ...
189 |  $ n          : int 63
190 |  $ dip        : num 0.0595
191 |  $ lo.hi      : int [1:2] 7 44
192 |  $ ifault     : int 0
193 |  $ gcm        : int [1:4] 44 43 38 7
194 |  $ lcm        : int [1:5] 7 11 15 42 44
195 |  $ mn         : int [1:63] 1 1 2 3 3 5 3 7 7 7 7 7 12 12 12 7 7 7 18 18 ...
196 |  $ mj         : int [1:63] 44 44 15 15 6 15 11 11 11 11 15 15 15 15 42 42 20 20 20 42 ...
197 |  $ min.is.0   : logi FALSE
198 |  $ debug      : int 3
199 |  $ xl         : num 39
200 |  $ xu         : num 54
201 |  $ full.result: chr "all"
202 |  $ GCM        : int [1:6] 63 62 7 3 2 1
203 |  $ LCM        : int [1:5] 1 44 58 59 63
204 |  - attr(*, "class")= chr "dip"
205 | > 
206 | > data(faithful)
207 | > fE <- faithful$eruptions
208 | > str(dip(fE, full = "all", debug = 3),
209 | +     vec.len= 8)
210 | dip() in C: n = 272; starting with  2N*dip = 1.
211 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,272] :
212 |  gcm[1:7] = 272, 135, 120, 119, 4, 2, 1
213 |  lcm[1:10] = 1, 40, 58, 60, 66, 79, 91, 261, 268, 272
214 |   while(gcm[ix] != lcm[iv]) :
215 |  G(6,2) --> ix = 5, iv = 2
216 |  G(5,2) --> ix = 4, iv = 2
217 |  L(5,2) --> ix = 4, iv = 3
218 |  L(5,3) --> ix = 4, iv = 4
219 |  L(5,4) --> ix = 4, iv = 5
220 |  L(5,5) --> ix = 4, iv = 6
221 |  L(5,6) --> ix = 4, iv = 7
222 |  L(5,7) --> ix = 4, iv = 8
223 |  G(4,8) --> ix = 3, iv = 8
224 |  G(3,8) --> ix = 2, iv = 8
225 |  --> ix = 1, iv = 8
226 |  --> ix = 1, iv = 9
227 |  --> ix = 1, iv = 10
228 |   calculating dip .. (dip_l, dip_u) = (50.2553, 3) -> new larger dip 50.2553 (j_best = 91)
229 | 'dip': LOOP-BEGIN: 2n*D= 50.255    [low,high] = [120,261] :
230 |  gcm[1:7] = 261, 260, 252, 181, 146, 135, 120
231 |  lcm[1:5] = 120, 124, 233, 246, 261
232 |   while(gcm[ix] != lcm[iv]) :
233 |  L(7,2) --> ix = 6, iv = 3
234 |  G(6,3) --> ix = 5, iv = 3
235 |  G(5,3) --> ix = 4, iv = 3
236 |  G(4,3) --> ix = 3, iv = 3
237 |  --> ix = 3, iv = 4
238 |  --> ix = 3, iv = 5
239 |  --> ix = 2, iv = 5
240 |  --> ix = 1, iv = 5
241 | List of 17
242 |  $ call       : language dip(x = fE, full.result = "all", debug = 3)
243 |  $ x          : num [1:272] 1.6 1.67 1.7 1.73 1.75 1.75 1.75 1.75 1.75 1.75 ...
244 |  $ n          : int 272
245 |  $ dip        : num 0.0924
246 |  $ lo.hi      : int [1:2] 120 261
247 |  $ ifault     : int 0
248 |  $ gcm        : int [1:7] 261 260 252 181 146 135 120
249 |  $ lcm        : int [1:5] 120 124 233 246 261
250 |  $ mn         : int [1:272] 1 1 2 2 4 5 5 5 5 5 5 11 5 13 13 13 13 17 17 13 ...
251 |  $ mj         : int [1:272] 40 40 40 10 10 10 10 10 10 40 12 36 16 16 16 26 19 19 26 26 ...
252 |  $ min.is.0   : logi FALSE
253 |  $ debug      : int 3
254 |  $ xl         : num 3.83
255 |  $ xu         : num 4.83
256 |  $ full.result: chr "all"
257 |  $ GCM        : int [1:7] 272 135 120 119 4 2 1
258 |  $ LCM        : int [1:10] 1 40 58 60 66 79 91 261 268 272
259 |  - attr(*, "class")= chr "dip"
260 | > 
261 | > data(precip)
262 | > str(dip(precip, full = TRUE, debug = TRUE))
263 | dip() in C: n = 70; starting with  2N*dip = 1.
264 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1, 70]; l_lcm/gcm = ( 6, 4)
265 |   while(gcm[ix] != lcm[iv]) : ......
266 |   calculating dip .. (dip_l, dip_u) = (5, 2.5) -> new larger dip 5 (j_best = 13)
267 | 'dip': LOOP-BEGIN: 2n*D= 5         [low,high] = [ 19, 64]; l_lcm/gcm = ( 6, 6)
268 |   while(gcm[ix] != lcm[iv]) : ........
269 |   calculating dip .. (dip_l, dip_u) = (3.875, 3.44828)
270 | 'dip': LOOP-BEGIN: 2n*D= 5         [low,high] = [ 31, 55]; l_lcm/gcm = ( 4, 3)
271 |   while(gcm[ix] != lcm[iv]) : ...
272 | List of 15
273 |  $ call       : language dip(x = precip, full.result = TRUE, debug = TRUE)
274 |  $ x          : num [1:70] 7 7.2 7.8 7.8 11.5 13 14 14.6 15 15.2 ...
275 |  $ n          : int 70
276 |  $ dip        : num 0.0357
277 |  $ lo.hi      : int [1:2] 31 55
278 |  $ ifault     : int 0
279 |  $ gcm        : int [1:3] 55 49 31
280 |  $ lcm        : int [1:4] 31 32 35 55
281 |  $ mn         : int [1:70] 1 1 1 3 1 1 6 7 8 9 ...
282 |  $ mj         : int [1:70] 2 4 4 64 55 10 10 10 10 55 ...
283 |  $ min.is.0   : logi FALSE
284 |  $ debug      : int 1
285 |  $ xl         : num 35.9
286 |  $ xu         : num 43.4
287 |  $ full.result: logi TRUE
288 |  - attr(*, "class")= chr "dip"
289 | > 
290 | > cat('Time elapsed: ', proc.time() - .pt,'\n') # "stats"
291 | Time elapsed:  0.061 0.007 0.085 0 0 
292 | > 
293 | > if(!interactive()) warnings()
294 | NULL
295 | > 
296 | > proc.time()
297 |    user  system elapsed 
298 |   0.472   0.057   0.590 
299 | 


--------------------------------------------------------------------------------
/stuff/jeremy-unimodality.R:
--------------------------------------------------------------------------------
  1 | ##-*- mode: R; kept-new-versions: 21; kept-old-versions: 12; -*-
  2 | 
  3 | #### From  http://www.stat.washington.edu/wxs/Stat593-s03/Code/jeremy-unimodality.R
  4 | ####
  5 | #### After recommendation by Mark Difford,
  6 | ####	on R-help Sep 09, 2008,  Subject  "Re: Modality Test"
  7 | #### https://stat.ethz.ch/pipermail/r-help/2008-September/173308.html
  8 | 
  9 | ## TODO [MM] : compare this "seriously" with dip() from my package 'diptest'
 10 | ##
 11 | ####-----------------------------------------------------------------------------------
 12 | ## Diagnostic plots for clustering and the DIP test for unimodality
 13 | ## Code written by Jeremy Tantrum, Winter 2003
 14 | ##=================================================================
 15 | ##
 16 | ## plot.ucdf(x) - plots the cdf of x and the closest unimodal cdf of x.
 17 | ##
 18 | ## plot.silverman(x) - plots the unimodal Gaussian smoother closest to the
 19 | ##                     x and the closest bimodal Gaussian smoother.
 20 | ##
 21 | ## calcdip(x) - calculates the dip test statistic for x, using the mode found
 22 | ##              by the closest unimodal Gaussian smoother.
 23 | ##
 24 | ## unisample(cd.out,n) - generates a sample from the unimodal distribution
 25 | ##                       returned by the output of calcdip.
 26 | ##
 27 | ##
 28 | ## An example of it working: Olive oil data - region 2 - are areas 5 and 6
 29 | ## different.
 30 | ## ---> see new file  ./jeremy-unimodality-olives.R
 31 | ##                    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 32 | 
 33 | ##=================================================================
 34 | 
 35 | plot.ucdf <- function(x,plot.it = TRUE,COL1 = 1,COL2 = 2,LWD1 = 3,LWD2 = 2)
 36 | {
 37 |     x.cdf <- (1:length(x))/length(x)
 38 |     plot(c(min(x),sort(x)),c(0,x.cdf),type = 'n',xlab = "data",ylab = "cdf")
 39 |     lines(c(par('usr')[1],sort(c(x,x)),par('usr')[2]),sort(c(0,0,x.cdf,x.cdf)),
 40 |           lwd = LWD1,col = COL1)
 41 |     h.0 <- critwidth(x,4)
 42 |     x.f0 <- density(x,width = h.0$low)
 43 |     x.mode <- x.f0$x[order(x.f0$y)[length(x.f0$x)]]
 44 |     x.mode2 <- x[order(abs(x-x.mode))[1]]
 45 |     if(x.mode2 < sort(x)[4])
 46 |         x.mode2 <- sort(x)[4]
 47 |     if(x.mode2 > sort(x)[length(x)-4])
 48 |         x.mode2 <- sort(x)[length(x)-4]
 49 |     x.ord <- sort(x)
 50 | 
 51 |     x.split <- x.ord < x.mode2
 52 |     x.split2 <- x.ord >= x.mode2
 53 |     hull1 <- chull(c(x.ord[x.split],x.mode2),c(0,x.cdf[x.split]))
 54 |     n.1 <- sum(x.split)+1
 55 |     hlist <- c(hull1,hull1,hull1)
 56 |     start <- (1:length(hlist))[hlist == n.1][2]
 57 |     if(hlist[start+1] == 1) hlist <- rev(hlist)
 58 |     start <- (1:length(hlist))[hlist == n.1][2]
 59 |     x.hull1 <- n.1
 60 |     i <- start
 61 |     if(findslope(x.ord,x.cdf,1,hlist[i]) > findslope(x.ord,x.cdf,1,hlist[i+1]))
 62 |         while(hlist[i] > 1) {
 63 |             i <- i + 1
 64 |             x.hull1 <- c(x.hull1, hlist[i])
 65 |         }
 66 |     else
 67 |         while(hlist[i] > 1) {
 68 |             i <- i - 1
 69 |             x.hull1 <- c(x.hull1, hlist[i])
 70 |         }
 71 |     x.hull1 <- sort(x.hull1)
 72 |     hull2 <- chull(x.ord[x.split2],x.cdf[x.split2])
 73 |     n.2 <- sum(x.split2)
 74 |     hlist <- c(hull2,hull2,hull2)
 75 |     start <- (1:length(hlist))[hlist == n.2][2]
 76 |     if(hlist[start+1] == 1) hlist <- rev(hlist)
 77 |     start <- (1:length(hlist))[hlist == n.2][2]
 78 |     x.hull2 <- n.2
 79 |     i <- start
 80 |     if(findslope(x.ord[x.split2],x.cdf[x.split2],1,hlist[i]) <
 81 |        findslope(x.ord[x.split2],x.cdf[x.split2],1,hlist[i+1]))
 82 |         while(hlist[i] > 1) {
 83 |             i <- i +1
 84 |             x.hull2 <- c(x.hull2, hlist[i])
 85 |         }
 86 |     else
 87 |         while(hlist[i] > 1) {
 88 |             i <- i - 1
 89 |             x.hull2 <- c(x.hull2, hlist[i])
 90 |         }
 91 |     x.hull2 <- sort(x.hull2)
 92 |     lines(c(x.ord[x.split],x.mode2)[x.hull1],c(0,x.cdf[x.split])[x.hull1],
 93 |           col = COL2,lwd = LWD2)
 94 |     lines(x.ord[x.split2][x.hull2],x.cdf[x.split2][x.hull2],col = COL2,lwd = LWD2)
 95 | 
 96 |     hull.out <- list(hull1 = x.hull1, hull2 = x.hull2+n.1-1,
 97 |                      x = x.ord[sort(unique(c(x.hull1,x.hull2+n.1-1)))],
 98 |                      y = x.cdf[sort(unique(c(x.hull1,x.hull2+n.1-1)))])
 99 |     delta <- rep(0,length(x))
100 |     for(i in 1:length(x))
101 |         delta[i] <- abs(x.cdf[i] - fofx(sort(x)[i],hull.out))
102 | 
103 |     return(invisible(list(cdf = x.cdf,unicurve = hull.out, mode = x.mode2,
104 |                           dip = max(delta), delta = delta,
105 |                           dipwhere = order(delta)[length(delta)] )))
106 | }
107 | 
108 | ##-----------------------------------------------------------------
109 | 
110 | plot.silverman <- function(x,COL1 = 1,COL2 = "grey",...)
111 | {
112 |   h.0 <- critwidth(x,4,tol = 0.0001)$high
113 |   h.1 <- critwidth2(x,h.0,tol = 0.001)$high
114 |   f.c <- density(x,window = "g",width = h.0,n = 100)
115 |   f.n <- density(x,width = h.1,n = 100)
116 |   plot(c(f.c$x,f.n$x),c(f.c$y,f.n$y),type = 'n',xlab = "",ylab = "density",...)
117 |   lines(f.n,lwd = 3,col = COL1)
118 |   lines(f.c,lwd = 3,col = COL2)
119 |   points(x,rep(0,length(x)),pch = '|')
120 | }
121 | 
122 | ## BELOW HERE ARE functions used by plot.silverman:
123 | 
124 | critwidth <- function(g.data,start,tol = 0.001,n.points = 200)
125 | {
126 |     if(is.unimodal(density(g.data,window = "g",width = start,n = n.points))) {
127 |         high <- start
128 |         low <- start/2
129 |         while(is.unimodal(density(g.data,window = "g",width = low,n = n.points)))
130 |             low <- low/2
131 |     }
132 |     else {
133 |         low <- start
134 |         high <- start*2
135 |         while(!is.unimodal(density(g.data,window = "g",width = high,n = n.points)))
136 |             high <- high*2
137 |     }
138 |     ## is.unimodal(low)=F and is.unimodal(high)=T
139 |     while(high-low > tol) {
140 |         wdth <- 0.5 * (high+low)
141 |         if(is.unimodal(density(g.data,window = "g",width = wdth,n = n.points)))
142 |             high <- wdth
143 |         else
144 |             low <- wdth
145 |     }
146 |     return(list(low = low,high = high))
147 | }
148 | 
149 | ###########################################################################
150 | critwidth2 <- function(g.data,h.0,tol = 0.001,n.points = 200)
151 | {
152 |     ## h.0 is the critical width for a is.unimodal
153 |     start <- h.0 + 2 * tol
154 |     if(is.bimodal(density(g.data,window = "g",width = start,n = n.points))) {
155 |         high <- start
156 |         low <- start/2
157 |         while(is.bimodal(density(g.data,window = "g",width = low,n = n.points)))
158 |             low <- low/2
159 |     }
160 |     else {
161 |         low <- start
162 |         high <- start*2
163 |         while(!is.bimodal(density(g.data,window = "g",width = high,n = n.points)))
164 |             high <- high*2
165 |     }
166 |     ## is.unimodal(low)=F and is.unimodal(high)=T
167 |     while(high-low > tol) {
168 |         wdth <- 0.5 * (high+low)
169 |         if(is.bimodal(density(g.data,window = "g",width = wdth,n = n.points)))
170 |             high <- wdth
171 |         else
172 |             low <- wdth
173 |     }
174 |     return(list(low = low,high = high))
175 | }
176 | 
177 | is.unimodal <- function(dens)
178 | {
179 | ## dens is a list of dens$x and dens$y
180 |   cdf <- cumsum(dens$y)
181 |   n <- length(cdf)
182 |   cdf.diff1 <- cdf[-1] - cdf[-n]
183 |   cdf.diff2 <- cdf.diff1[-1] - cdf.diff1[-(n-1)]
184 |   return(!any(order(-sign(cdf.diff2)) - 1:(n-2) > 0))
185 | }
186 | 
187 | is.bimodal <- function(dens)
188 | {
189 |  ## dens is a list of dens$x and dens$y
190 |   cdf <- cumsum(dens$y)
191 |   n <- length(cdf)
192 |   cdf.diff1 <- cdf[-1] - cdf[-n]
193 |   cdf.diff2 <- cdf.diff1[-1] - cdf.diff1[-(n-1)]
194 |   return(sum(sign(cdf.diff2)[-1] - sign(cdf.diff2)[-(n-2)] < 0) <= 2)
195 | }
196 | 
197 | ##-----------------------------------------------------------------
198 | 
199 | calcdip <- function(x, plot.it = TRUE, calc.it = TRUE)
200 | {
201 |     x <- sort(x)
202 |     stopifnot((n <- length(x)) >= 4)
203 |     h.0 <- critwidth(x,4)
204 |     x.f0 <- density(x,width = h.0$low)
205 |     x.mode <- x.f0$x[order(x.f0$y)[length(x.f0$x)]]
206 |     x.mode2 <- x[order(abs(x-x.mode))[1]]
207 |     if(x.mode2 < x[4])
208 |         x.mode2 <- x[4]
209 |     if(x.mode2 > x[n-4])
210 |         x.mode2 <- x[n-4]
211 |     x.cdf <- (1:n)/n
212 |     hull.out <- findhulls(x,x.cdf,x.mode2,plot.it = plot.it,xlab = "",ylab = "CDF")
213 |     delta <- rep(0,n)
214 |     if(calc.it)
215 |         for(i in 1:n)
216 |             delta[i] <- abs(x.cdf[i] - fofx(x[i],hull.out))
217 |     return(list(dip = max(delta),unicurve = hull.out))
218 | }
219 | 
220 | ## and the other functions needed:
221 | unisample <- function(hull.out,size)
222 | {
223 |     n <- length(hull.out$x)
224 |     min.x <- hull.out$x[1] - hull.out$y[1]/findslope(hull.out$x,hull.out$y,1,2)
225 |     probs <- hull.out$y[-1] - c(0,hull.out$y[-c(1,n)])
226 |     where <- sample(1:(n-1),size,replace = TRUE,prob = probs)
227 |     out <- numeric(0)
228 |     x <- c(min.x,hull.out$x[-1])
229 |     for(i in 2:n) {
230 |         x.s <- sum(where == i-1)
231 |         if(x.s > 0)
232 |             out <- c(out,runif(x.s,x[i-1],x[i]))
233 |     }
234 |     return(out)
235 | }
236 | 
237 | findhulls <- function(x.ord,x.cdf,x.mode,plot.it = TRUE,...)
238 | {
239 |   x.split <- x.ord <= x.mode
240 |   x.split2 <- x.ord >= x.mode
241 |   hull1 <- chull(x.ord[x.split],x.cdf[x.split])
242 |   n.1 <- sum(x.split)
243 |   hlist <- c(hull1,hull1,hull1)
244 |   start <- (1:length(hlist))[hlist == n.1][2]
245 |   if(hlist[start+1] == 1) hlist <- rev(hlist)
246 |   start <- (1:length(hlist))[hlist == n.1][2]
247 |   x.hull1 <- n.1
248 |   i <- start
249 |   if(findslope(x.ord,x.cdf,1,hlist[i]) > findslope(x.ord,x.cdf,1,hlist[i+1]))
250 |     while(hlist[i] > 1) {
251 |         i <- i + 1
252 |         x.hull1 <- c(x.hull1, hlist[i])
253 |       }
254 |   else
255 |     while(hlist[i] > 1) {
256 |         i <- i - 1
257 |         x.hull1 <- c(x.hull1, hlist[i])
258 |       }
259 |   x.hull1 <- sort(x.hull1)
260 |   hull2 <- chull(x.ord[x.split2],x.cdf[x.split2])
261 |   n.2 <- sum(x.split2)
262 |   hlist <- c(hull2,hull2,hull2)
263 |   start <- (1:length(hlist))[hlist == n.2][2]
264 |   if(hlist[start+1] == 1) hlist <- rev(hlist)
265 |   start <- (1:length(hlist))[hlist == n.2][2]
266 |   x.hull2 <- n.2
267 |   i <- start
268 |   if(findslope(x.ord[x.split2],x.cdf[x.split2],1,hlist[i]) <
269 |      findslope(x.ord[x.split2],x.cdf[x.split2],1,hlist[i+1]))
270 |     while(hlist[i] > 1) {
271 |         i <- i +1
272 |         x.hull2 <- c(x.hull2, hlist[i])
273 |       }
274 |   else
275 |     while(hlist[i] > 1) {
276 |         i <- i - 1
277 |         x.hull2 <- c(x.hull2, hlist[i])
278 |   }
279 |   x.hull2 <- sort(x.hull2)
280 |   if(plot.it) {
281 |     plot(x.ord,x.cdf,...)
282 |     lines(x.ord[x.split][x.hull1],x.cdf[x.split][x.hull1])
283 |     lines(x.ord[x.split2][x.hull2],x.cdf[x.split2][x.hull2])
284 |   }
285 |   return(list(hull1 = x.hull1, hull2 = x.hull2+n.1 -1,
286 |               x = x.ord[sort(unique(c(x.hull1,x.hull2+n.1-1)))],
287 |               y = x.cdf[sort(unique(c(x.hull1,x.hull2+n.1-1)))]))
288 | }
289 | 
290 | ###########################################################################
291 | findslope <- function(x,y,i,j)  return((y[j] - y[i])/(x[j]-x[i]))
292 | 
293 | ###########################################################################
294 | fofx <- function(x,hull.out)
295 | {
296 |     n <- length(hull.out$x)
297 |     if(x <= hull.out$x[1])
298 |         return(0)
299 |     if(x >= hull.out$x[n])
300 |         return(1)
301 |     where <- (1:n)[order(c(x,hull.out$x)) == 1]
302 |     return( (x-hull.out$x[where-1])/(hull.out$x[where]-hull.out$x[where-1]) *
303 |            (hull.out$y[where]-hull.out$y[where-1]) + hull.out$y[where-1])
304 | }
305 | 
306 | 


--------------------------------------------------------------------------------
/tests/ex1.Rout.save-64b:
--------------------------------------------------------------------------------
  1 | 
  2 | R version 4.5.1 Patched (2025-08-11 r88580) -- "Great Square Root"
  3 | Copyright (C) 2025 The R Foundation for Statistical Computing
  4 | Platform: x86_64-pc-linux-gnu
  5 | 
  6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
  7 | You are welcome to redistribute it under certain conditions.
  8 | Type 'license()' or 'licence()' for distribution details.
  9 | 
 10 | R is a collaborative project with many contributors.
 11 | Type 'contributors()' for more information and
 12 | 'citation()' on how to cite R or R packages in publications.
 13 | 
 14 | Type 'demo()' for some demos, 'help()' for on-line help, or
 15 | 'help.start()' for an HTML browser interface to help.
 16 | Type 'q()' to quit R.
 17 | 
 18 | > library(diptest)
 19 | > 
 20 | > stopifnot(dip(c(1,1,2,2)) == 1/4)# the maximal value possible: two point dist
 21 | > 
 22 | > ## very first small "unimodal" example --- the  1/(2*n) result:
 23 | > n <- length(u <- cumsum(0:3))
 24 | > d <- dip(u, debug=TRUE)# shows the final if() {added by MM} is really needed
 25 | dip() in C: n = 4; starting with  2N*dip = 1.
 26 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  4]; l_lcm/gcm = ( 4, 2)
 27 |   while(gcm[ix] != lcm[iv]) : ..
 28 |   calculating dip .. (dip_l, dip_u) = (0, 1)
 29 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  3]; l_lcm/gcm = ( 3, 2)
 30 |   while(gcm[ix] != lcm[iv]) : .
 31 |   calculating dip .. (dip_l, dip_u) = (0, 1)
 32 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  2]; l_lcm/gcm = ( 2, 2)
 33 |   ** (l_lcm,l_gcm) = (2,2) ==> d := 1
 34 |   calculating dip .. (dip_l, dip_u) = (0, 0)
 35 | No improvement in  low = 1  nor  high = 2 --> END
 36 | > stopifnot(d == dip(-u), d == 1/(2*n))# exact "=" for n = 4 !
 37 | > ## Note that I believe this should *not* give 0 (as fmechler@.. did),
 38 | > ## but rather 1/(2n) because that's  (1/n) / 2  and
 39 | > ## (1/n) is the correct distance between LCM and GCM
 40 | > 
 41 | > ## Small example -- but MM sees difference (32-bit / 64-bit) *and* on M1mac:
 42 | > x <- c(0,2:3,5:6)
 43 | > ## IGNORE_RDIFF_BEGIN
 44 | > d1 <- dip(x,   full=TRUE, debug=2)
 45 | dip() in C: n = 5; starting with  2N*dip = 1.
 46 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  5]; l_lcm/gcm = ( 2, 4)
 47 |   while(gcm[ix] != lcm[iv]) :
 48 |  G(3,2) --> ix = 2, iv = 2
 49 |  --> ix = 1, iv = 2
 50 |   calculating dip .. (dip_l, dip_u) = (1, 0)
 51 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  2,  5]; l_lcm/gcm = ( 3, 3)
 52 |   while(gcm[ix] != lcm[iv]) :
 53 |  L(3,2) --> ix = 2, iv = 3
 54 |  G(2,3) --> ix = 1, iv = 3
 55 |   calculating dip .. (dip_l, dip_u) = (1.33333, 0) -> new larger dip 1.33333 (j_best = 3)
 56 | 'dip': LOOP-BEGIN: 2n*D= 1.3333    [low,high] = [  4,  5]; l_lcm/gcm = ( 2, 2)
 57 |   ** (l_lcm,l_gcm) = (2,2) ==> d := 1
 58 | > d2 <- dip(6-x, full=TRUE, debug=2)
 59 | dip() in C: n = 5; starting with  2N*dip = 1.
 60 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  5]; l_lcm/gcm = ( 4, 2)
 61 |   while(gcm[ix] != lcm[iv]) :
 62 |  L(2,2) --> ix = 1, iv = 3
 63 |  L(2,3) --> ix = 1, iv = 4
 64 |   calculating dip .. (dip_l, dip_u) = (0, 1)
 65 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,  4]; l_lcm/gcm = ( 3, 3)
 66 |   while(gcm[ix] != lcm[iv]) :
 67 |  L(3,2) --> ix = 2, iv = 3
 68 |  G(2,3) --> ix = 1, iv = 3
 69 |   calculating dip .. (dip_l, dip_u) = (1.33333, 0) -> new larger dip 1.33333 (j_best = 2)
 70 | 'dip': LOOP-BEGIN: 2n*D= 1.3333    [low,high] = [  3,  4]; l_lcm/gcm = ( 2, 2)
 71 |   ** (l_lcm,l_gcm) = (2,2) ==> d := 1
 72 | > str(d2) # differences to M1mac (!)
 73 | List of 15
 74 |  $ call       : language dip(x = 6 - x, full.result = TRUE, debug = 2)
 75 |  $ x          : num [1:5] 0 1 3 4 6
 76 |  $ n          : int 5
 77 |  $ dip        : num 0.133
 78 |  $ lo.hi      : int [1:2] 3 4
 79 |  $ ifault     : int 0
 80 |  $ gcm        : int [1:2] 4 3
 81 |  $ lcm        : int [1:2] 3 4
 82 |  $ mn         : int [1:5] 1 1 1 3 1
 83 |  $ mj         : int [1:5] 2 4 4 5 5
 84 |  $ min.is.0   : logi FALSE
 85 |  $ debug      : int 2
 86 |  $ xl         : num 3
 87 |  $ xu         : num 4
 88 |  $ full.result: logi TRUE
 89 |  - attr(*, "class")= chr "dip"
 90 | > ## IGNORE_RDIFF_END
 91 | > str(d1)
 92 | List of 15
 93 |  $ call       : language dip(x = x, full.result = TRUE, debug = 2)
 94 |  $ x          : num [1:5] 0 2 3 5 6
 95 |  $ n          : int 5
 96 |  $ dip        : num 0.133
 97 |  $ lo.hi      : int [1:2] 4 5
 98 |  $ ifault     : int 0
 99 |  $ gcm        : int [1:2] 5 4
100 |  $ lcm        : int [1:2] 4 5
101 |  $ mn         : int [1:5] 1 1 2 2 4
102 |  $ mj         : int [1:5] 5 3 5 5 5
103 |  $ min.is.0   : logi FALSE
104 |  $ debug      : int 2
105 |  $ xl         : num 5
106 |  $ xu         : num 6
107 |  $ full.result: logi TRUE
108 |  - attr(*, "class")= chr "dip"
109 | > 
110 | > if(!dev.interactive(orNone=TRUE)) pdf("ex1.pdf")
111 | > par(mfrow = 2:1, mar = .1+c(3,4,2,1), mgp=c(1.5,.6,0), oma = c(0,0,2.1,0))
112 | > #
113 | > plot(d1)
114 | > abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray")
115 | > #
116 | > plot(d2)
117 | > abline(v=-1:7, h = seq(0,1,by=0.2), lty="83", col = "gray")
118 | > #
119 | > ## "title" only now
120 | > mtext("dip() problem with 'mirror x'", side=3, line = 0.8,
121 | +       outer=TRUE, cex = 1.5, font = 2)
122 | > 
123 | > 
124 | > ##  Yong Lu <lyongu+@cs.cmu.edu> example -- a bit smaller
125 | > x2 <- c(1, rep(2, 9))
126 | > stopifnot(dip(x2) == dip(3 - x2))
127 | > str(dip(x2, full=TRUE))
128 | List of 15
129 |  $ call       : language dip(x = x2, full.result = TRUE)
130 |  $ x          : num [1:10] 1 2 2 2 2 2 2 2 2 2
131 |  $ n          : int 10
132 |  $ dip        : num 0.05
133 |  $ lo.hi      : int [1:2] 2 10
134 |  $ ifault     : int 0
135 |  $ gcm        : int [1:2] 10 2
136 |  $ lcm        : int [1:2] 2 10
137 |  $ mn         : int [1:10] 1 1 2 2 2 2 2 2 2 2
138 |  $ mj         : int [1:10] 10 10 10 10 10 10 10 10 10 10
139 |  $ min.is.0   : logi FALSE
140 |  $ debug      : int 0
141 |  $ xl         : num 2
142 |  $ xu         : num 2
143 |  $ full.result: logi TRUE
144 |  - attr(*, "class")= chr "dip"
145 | > cat('Time elapsed: ', (.pt <- proc.time()),'\n') # "stats"
146 | Time elapsed:  0.222 0.029 0.249 0.002 0.006 
147 | > 
148 | > ## Real data examples :
149 | > 
150 | > data(statfaculty)
151 | > 
152 | > str(dip(statfaculty, full = "all", debug = 3), vec.len = 8)
153 | dip() in C: n = 63; starting with  2N*dip = 1.
154 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1, 63] :
155 |  gcm[1:6] = 63, 62, 7, 3, 2, 1
156 |  lcm[1:5] = 1, 44, 58, 59, 63
157 |   while(gcm[ix] != lcm[iv]) :
158 |  G(5,2) --> ix = 4, iv = 2
159 |  G(4,2) --> ix = 3, iv = 2
160 |  G(3,2) --> ix = 2, iv = 2
161 |  L(3,2) --> ix = 2, iv = 3
162 |  L(3,3) --> ix = 2, iv = 4
163 |  --> ix = 2, iv = 5
164 |  --> ix = 1, iv = 5
165 |   calculating dip .. (dip_l, dip_u) = (2, 2.11111) -> new larger dip 2.11111 (j_best = 61)
166 | 'dip': LOOP-BEGIN: 2n*D= 2.1111    [low,high] = [  7, 58] :
167 |  gcm[1:5] = 58, 55, 51, 48, 7
168 |  lcm[1:6] = 7, 11, 15, 42, 44, 58
169 |   while(gcm[ix] != lcm[iv]) :
170 |  L(5,2) --> ix = 4, iv = 3
171 |  L(5,3) --> ix = 4, iv = 4
172 |  L(5,4) --> ix = 4, iv = 5
173 |  L(5,5) --> ix = 4, iv = 6
174 |  --> ix = 3, iv = 6
175 |  --> ix = 2, iv = 6
176 |  --> ix = 1, iv = 6
177 |   calculating dip .. (dip_l, dip_u) = (0, 7.5) -> new larger dip 7.5 (j_best = 48)
178 | 'dip': LOOP-BEGIN: 2n*D= 7.5       [low,high] = [  7, 44] :
179 |  gcm[1:4] = 44, 43, 38, 7
180 |  lcm[1:5] = 7, 11, 15, 42, 44
181 |   while(gcm[ix] != lcm[iv]) :
182 |  L(4,2) --> ix = 3, iv = 3
183 |  L(4,3) --> ix = 3, iv = 4
184 |  --> ix = 2, iv = 4
185 |  --> ix = 2, iv = 5
186 |  --> ix = 1, iv = 5
187 | List of 17
188 |  $ call       : language dip(x = statfaculty, full.result = "all", debug = 3)
189 |  $ x          : num [1:63] 30 33 35 36 37 37 39 39 39 39 39 40 40 40 40 41 42 43 43 43 ...
190 |  $ n          : int 63
191 |  $ dip        : num 0.0595
192 |  $ lo.hi      : int [1:2] 7 44
193 |  $ ifault     : int 0
194 |  $ gcm        : int [1:4] 44 43 38 7
195 |  $ lcm        : int [1:5] 7 11 15 42 44
196 |  $ mn         : int [1:63] 1 1 2 3 3 5 3 7 7 7 7 7 12 12 12 7 7 7 18 18 ...
197 |  $ mj         : int [1:63] 44 44 15 15 6 15 11 11 11 11 15 15 15 15 42 42 20 20 20 42 ...
198 |  $ min.is.0   : logi FALSE
199 |  $ debug      : int 3
200 |  $ xl         : num 39
201 |  $ xu         : num 54
202 |  $ full.result: chr "all"
203 |  $ GCM        : int [1:6] 63 62 7 3 2 1
204 |  $ LCM        : int [1:5] 1 44 58 59 63
205 |  - attr(*, "class")= chr "dip"
206 | > 
207 | > data(faithful)
208 | > fE <- faithful$eruptions
209 | > str(dip(fE, full = "all", debug = 3),
210 | +     vec.len= 8)
211 | dip() in C: n = 272; starting with  2N*dip = 1.
212 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1,272] :
213 |  gcm[1:7] = 272, 135, 120, 119, 4, 2, 1
214 |  lcm[1:10] = 1, 40, 58, 60, 66, 79, 91, 261, 268, 272
215 |   while(gcm[ix] != lcm[iv]) :
216 |  G(6,2) --> ix = 5, iv = 2
217 |  G(5,2) --> ix = 4, iv = 2
218 |  L(5,2) --> ix = 4, iv = 3
219 |  L(5,3) --> ix = 4, iv = 4
220 |  L(5,4) --> ix = 4, iv = 5
221 |  L(5,5) --> ix = 4, iv = 6
222 |  L(5,6) --> ix = 4, iv = 7
223 |  L(5,7) --> ix = 4, iv = 8
224 |  G(4,8) --> ix = 3, iv = 8
225 |  G(3,8) --> ix = 2, iv = 8
226 |  --> ix = 1, iv = 8
227 |  --> ix = 1, iv = 9
228 |  --> ix = 1, iv = 10
229 |   calculating dip .. (dip_l, dip_u) = (50.2553, 3) -> new larger dip 50.2553 (j_best = 91)
230 | 'dip': LOOP-BEGIN: 2n*D= 50.255    [low,high] = [120,261] :
231 |  gcm[1:7] = 261, 260, 252, 181, 146, 135, 120
232 |  lcm[1:5] = 120, 124, 233, 246, 261
233 |   while(gcm[ix] != lcm[iv]) :
234 |  L(7,2) --> ix = 6, iv = 3
235 |  G(6,3) --> ix = 5, iv = 3
236 |  G(5,3) --> ix = 4, iv = 3
237 |  G(4,3) --> ix = 3, iv = 3
238 |  --> ix = 3, iv = 4
239 |  --> ix = 3, iv = 5
240 |  --> ix = 2, iv = 5
241 |  --> ix = 1, iv = 5
242 | List of 17
243 |  $ call       : language dip(x = fE, full.result = "all", debug = 3)
244 |  $ x          : num [1:272] 1.6 1.67 1.7 1.73 1.75 1.75 1.75 1.75 1.75 1.75 ...
245 |  $ n          : int 272
246 |  $ dip        : num 0.0924
247 |  $ lo.hi      : int [1:2] 120 261
248 |  $ ifault     : int 0
249 |  $ gcm        : int [1:7] 261 260 252 181 146 135 120
250 |  $ lcm        : int [1:5] 120 124 233 246 261
251 |  $ mn         : int [1:272] 1 1 2 2 4 5 5 5 5 5 5 11 5 13 13 13 13 17 17 13 ...
252 |  $ mj         : int [1:272] 40 40 40 10 10 10 10 10 10 40 12 36 16 16 16 26 19 19 26 26 ...
253 |  $ min.is.0   : logi FALSE
254 |  $ debug      : int 3
255 |  $ xl         : num 3.83
256 |  $ xu         : num 4.83
257 |  $ full.result: chr "all"
258 |  $ GCM        : int [1:7] 272 135 120 119 4 2 1
259 |  $ LCM        : int [1:10] 1 40 58 60 66 79 91 261 268 272
260 |  - attr(*, "class")= chr "dip"
261 | > 
262 | > data(precip)
263 | > str(dip(precip, full = TRUE, debug = TRUE))
264 | dip() in C: n = 70; starting with  2N*dip = 1.
265 | 'dip': LOOP-BEGIN: 2n*D= 1         [low,high] = [  1, 70]; l_lcm/gcm = ( 6, 4)
266 |   while(gcm[ix] != lcm[iv]) : ......
267 |   calculating dip .. (dip_l, dip_u) = (5, 2.5) -> new larger dip 5 (j_best = 13)
268 | 'dip': LOOP-BEGIN: 2n*D= 5         [low,high] = [ 19, 64]; l_lcm/gcm = ( 6, 6)
269 |   while(gcm[ix] != lcm[iv]) : ........
270 |   calculating dip .. (dip_l, dip_u) = (3.875, 3.44828)
271 | 'dip': LOOP-BEGIN: 2n*D= 5         [low,high] = [ 31, 55]; l_lcm/gcm = ( 4, 3)
272 |   while(gcm[ix] != lcm[iv]) : ...
273 | List of 15
274 |  $ call       : language dip(x = precip, full.result = TRUE, debug = TRUE)
275 |  $ x          : num [1:70] 7 7.2 7.8 7.8 11.5 13 14 14.6 15 15.2 ...
276 |  $ n          : int 70
277 |  $ dip        : num 0.0357
278 |  $ lo.hi      : int [1:2] 31 55
279 |  $ ifault     : int 0
280 |  $ gcm        : int [1:3] 55 49 31
281 |  $ lcm        : int [1:4] 31 32 35 55
282 |  $ mn         : int [1:70] 1 1 1 3 1 1 6 7 8 9 ...
283 |  $ mj         : int [1:70] 2 4 4 64 55 10 10 10 10 55 ...
284 |  $ min.is.0   : logi FALSE
285 |  $ debug      : int 1
286 |  $ xl         : Named num 35.9
287 |   ..- attr(*, "names")= chr "Dallas"
288 |  $ xu         : Named num 43.4
289 |   ..- attr(*, "names")= chr "Hartford"
290 |  $ full.result: logi TRUE
291 |  - attr(*, "class")= chr "dip"
292 | > 
293 | > ## current qDiptab <--> n = 72'000 is "asymptotic" boundary
294 | > set.seed(123); x <- rnorm(72000)
295 | > dt72k <- dip.test(x)
296 | > ## gave error  in qDiptab[i2, ] : subscript out of bounds  -- in diptest <= 0.77-0
297 | > stopifnot(all.equal(list(statistic = c(D = 0.0005171098381181), p.value = 1, nobs = 72000L),
298 | +                     dt72k[c("statistic", "p.value", "nobs")], tolerance = 1e-13))
299 | > 
300 | > 
301 | > cat('Time elapsed: ', proc.time() - .pt,'\n') # "stats"
302 | Time elapsed:  0.092 0.015 0.108 0 0 
303 | > 
304 | > if(!interactive()) warnings()
305 | > 
306 | > proc.time()
307 |    user  system elapsed 
308 |   0.280   0.043   0.313 
309 | 


--------------------------------------------------------------------------------
/vignettes/diptest-issues.Rnw:
--------------------------------------------------------------------------------
  1 | %\documentclass[article]{jss}
  2 | \documentclass[nojss,article]{jss}
  3 | %              ----- for the package-vignette, don't use JSS logo, etc
  4 | %
  5 | % \author{Martin Maechler\\ Seminar f\"ur Statistik \\ ETH Zurich, \ Switzerland
  6 | %   \\\email{maechler@stat.math.ethz.ch}}
  7 | \author{Martin M\"achler \\ ETH Zurich}
  8 | \title{Dip Test Distributions, P-values, and other Explorations}
  9 | % \def\mythanks{a version of this paper, for \pkg{nacopula} 0.4\_4, has been published
 10 | %     in JSS, \url{http://www.jstatsoft.org/v39/i09}.}
 11 | %% for pretty printing and a nice hypersummary also set:
 12 | \Plainauthor{Martin M\"achler} %% comma-separated
 13 | \Plaintitle{Dip Test Distributions, P-values, and other Explorations}
 14 | % \Shorttitle{}
 15 | %\date{April 2009 ({\tiny typeset on \tiny\today})}
 16 | %%\VignetteIndexEntry{Dip Test Distributions, P-values, and other Explorations}
 17 | %%\VignetteDepends{diptest}
 18 | \SweaveOpts{engine=R,keep.source=TRUE,strip.white=true}
 19 | %		     ^^^^^^^^^^^^^^^^
 20 | \SweaveOpts{eps=FALSE,pdf=TRUE,width=7,height=4}
 21 | 
 22 | %% an abstract and keywords
 23 | \Abstract{
 24 | 
 25 | ... % FIXME
 26 | 
 27 | ... % FIXME
 28 | }
 29 | %
 30 | \Keywords{MPFR, Abitrary Precision, Multiple Precision Floating-Point, R}
 31 | %% at least one keyword must be supplied
 32 | 
 33 | %% publication information
 34 | %% NOTE: Typically, this can be left commented and will be filled out by the technical editor
 35 | %% \Volume{13}
 36 | %% \Issue{9}
 37 | %% \Month{September}
 38 | %% \Year{2004}
 39 | %% \Submitdate{2004-09-29}
 40 | %% \Acceptdate{2004-09-29}
 41 | 
 42 | %% The address of (at least) one author should be given
 43 | %% in the following format:
 44 | \Address{
 45 | 	Martin M\"achler\\
 46 | 	Seminar f\"ur Statistik, HG G~14.2\\
 47 | 	ETH Zurich\\
 48 | 	8092 Zurich, Switzerland\\
 49 | 	E-mail: \email{maechler@stat.math.ethz.ch}\\
 50 | 	URL: \url{https://people.math.ethz.ch/~maechler/}
 51 | }
 52 | %% It is also possible to add a telephone and fax number
 53 | %% before the e-mail in the following format:
 54 | %% Telephone: +43/1/31336-5053
 55 | %% Fax: +43/1/31336-734
 56 | 
 57 | %% for those who use Sweave please include the following line (with % symbols):
 58 | %% MM: this is "substituted" by  jss.cls:
 59 | %% need no \usepackage{Sweave.sty}
 60 | 
 61 | % \usepackage{myVignette}
 62 | % \usepackage{fullpage}% save trees ;-) --- FIXME use {geometry} package
 63 | % \usepackage[authoryear,round,longnamesfirst]{natbib}
 64 | % \bibliographystyle{plainnat}
 65 | %
 66 | %% Marius' packages
 67 | \usepackage[american]{babel}%for American English
 68 | % \usepackage{microtype}%for character protrusion and font expansion (only with pdflatex)
 69 | \usepackage{amsmath}%sophisticated mathematical formulas with amstex (includes \text{})
 70 | \usepackage{mathtools}%fix amsmath deficiencies
 71 | \usepackage{amssymb}%sophisticated mathematical symbols with amstex (includes \mathbb{})
 72 | % \usepackage{amsthm}%theorem environments
 73 | % \usepackage{bm}%for bold math symbols: \bm (= bold math)
 74 | % %NON-STANDARD:\RequirePackage{bbm}%only for indicator functions
 75 | % \usepackage{enumitem}%for automatic numbering of new enumerate environments
 76 | % \usepackage[
 77 | %   format=hang,
 78 | %   % NOT for JSS: labelsep=space,
 79 | %   justification=justified,
 80 | %   singlelinecheck=false%,
 81 | %   % NOT for JSS: labelfont=bf
 82 | % ]{caption}%for captions
 83 | % \usepackage{tikz}%sophisticated graphics package
 84 | % \usepackage{tabularx}%for special table environment (tabularx-table)
 85 | % \usepackage{booktabs}%for table layout
 86 | 
 87 | % This is already in jss above -- but withOUT the  fontsize=\small part !!
 88 | \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontsize=\small,fontshape=sl}
 89 | \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontsize=\small}
 90 | \DefineVerbatimEnvironment{Scode}{Verbatim}{fontsize=\small,fontshape=sl}
 91 | % but when submitting, do get rid of too much vertical space between R
 92 | % input & output, i.e. between Sinput and Soutput:
 93 | \fvset{listparameters={\setlength{\topsep}{0pt}}}% !! quite an effect!
 94 | %%
 95 | %
 96 | \newcommand*{\R}{\proglang{R}}%{\textsf{R}}
 97 | \newcommand*{\Arg}[1]{\texttt{\itshape $\langle$#1$\rangle$}}
 98 | \newcommand*{\file}[1]{{`\normalfont\texttt{#1}'}}
 99 | \newcommand*{\eps}{\varepsilon}
100 | %
101 | %% Probability P[.],  Expectation E[.]  etc
102 | \makeatletter
103 | %% ==  subsection of our flexible-style "texab.sty" :
104 | \newcommand{\@pkl}{[}     % Probability Klammer links
105 | \newcommand{\@pkr}{]}
106 | \newcommand{\@ekl}{[}     % Erwartungswert Klammer links
107 | \newcommand{\@ekr}{]}     % Erwartungswert Klammer rechts
108 | \DeclareMathOperator{\PRSymbol}{P}
109 | % Next line (\makeright): if #1 == \left then \right #2 else #1 #2
110 | \newcommand{\makeright}[2]{\ifx#1\left\right#2\else#1#2\fi}
111 | %% the real commands
112 | \newcommand{\PR}[2][\left]  {\PRSymbol     #1\@pkl #2 \makeright{#1}{\@pkr}}
113 | \newcommand{\ERW}[2][\left] {\ERWSymbol    #1\@ekl #2 \makeright{#1}{\@ekr}}
114 | \makeatother
115 | \newcommand{\isD}{\ {\stackrel{\mathcal{D}}{=}}\ \ }
116 | \newcommand*{\iid}{\mbox{ i.i.d. }}
117 | 
118 | %
119 | \begin{document}
120 | \setkeys{Gin}{width=\textwidth}
121 | % Manuel has
122 | \setlength{\abovecaptionskip}{-5pt}
123 | %
124 | %% include your article here, just as usual
125 | %% Note that you should use the \pkg{}, \proglang{} and \code{} commands.
126 | % \section[About Java]{About \proglang{Java}}
127 | %% Note: If there is markup in \(sub)section, then it has to be escape as above.
128 | %% Note: These are explained in '?RweaveLatex' :
129 | \begin{footnotesize}
130 | <<preliminaries, echo=FALSE, results=hide>>=
131 | options(SweaveHooks= list(fig=function() par(mar=c(5.1, 4.1, 1.1, 2.1))),
132 |         width = 75,
133 |         digits = 7, # <-- here, keep R's default!
134 |         prompt = "R> ", # <- "yuck!" - required by JSS
135 |         continue=" ")
136 | set.seed(47)
137 | Sys.setenv(LANGUAGE = "en")
138 | if(.Platform$OS.type != "windows")
139 |   Sys.setlocale("LC_MESSAGES","C")
140 | 
141 | ## In order to  save() and load() expensive results
142 | thisDir <- system.file('doc', package='diptest')
143 | ## not yet used:
144 | xtraDir <- if(Sys.getenv("USER") == "maechler")
145 |     "~/R/Pkgs/diptest/stuff" else thisDir
146 | res1.file <- file.path(thisDir, "aggr_results.Rdata")
147 | 
148 | <<diagnose-lib, echo=FALSE>>=
149 | if(nzchar(Sys.getenv("R_MM_PKG_CHECKING"))) print( .libPaths() )
150 | @
151 | \end{footnotesize}
152 | % \maketitle
153 | % \begin{abstract}
154 | % \end{abstract}
155 | 
156 | \section[Introduction]{Introduction}% \small~\footnote{\mythanks}}
157 | \label{sec:Intro}
158 | 
159 | %% MM
160 | FIXME: Need notation
161 | 
162 | $D_n :=$\texttt{dip( runif(n) )};
163 | 
164 | but more generally,
165 | \begin{equation}
166 |   \label{eq:Dn.F}
167 |   D_n(F) := D(X_1, X_2, \dots, X_n),  \mbox{ \ \ \texttt{where} } X_i \iid, X_i \sim F.
168 | \end{equation}
169 | 
170 | \citet{HarJH85} in their ``seminal'' paper on the dip statistic $D_n$
171 | already proved that $ \sqrt{n} \; D_n$ converges in distribution, i.e.,
172 | \begin{equation}
173 |   \label{eq:D.infty}
174 |  \lim_{n\to\infty}\sqrt{n} \; D_n \isD  D_\infty.
175 | \end{equation}
176 | 
177 | A considerable part of this paper is devoted to explore the distribution of $D_\infty$.
178 | 
179 | \bigskip
180 | \section[History of the diptest package]{History of the \texttt{diptest} \textsf{R} package}
181 | 
182 | \citet{HarP85} published an implementation in Fortran of a concrete algorithm,
183 | % ALGORITHM AS 217 APPL. STATIST. (1985) VOL.34, NO.3
184 | where the code was also made available on Statlib\footnote{Statlib is now a
185 |   website, of course, \url{http://lib.stat.cmu.edu/}, but then was \emph{the} preferred way
186 |   for distributing algorithms for statistical computing, available years
187 |   before the existence of the WWW, and entailing e-mail and (anonymous) FTP}
188 | 
189 | On July 28, 1994, Dario Ringach, then at NY University, asked on Snews (the
190 | mailing list for S and S-plus users) about distributions and was helped by
191 | me and then about \texttt{dyn.load} problems, again helped by
192 | me. Subsequently he provided me with S-plus code which interfaced to
193 | (a \texttt{f2c}ed version of) Hartigan's Fortran code, for computing the dip statistic.
194 | and ended the (then private) e-mail with
195 | \begin{quotation}
196 | I am not going to have time to set this up for submission to StatLib.
197 | If you want to do it, please go ahead.
198 | 
199 | Regards,
200 | Dario
201 | \end{quotation}
202 | 
203 | 
204 | 
205 | - several important bug fixes;
206 |   last one Oct./Nov.~2003
207 | 
208 |   However, the Fortran code file \url{http://lib.stat.cmu.edu/apstat/217},
209 |   was last changed {Thu 04 Aug 2005 03:43:28 PM CEST}.
210 | 
211 |   We have some results of the dip.dist of \emph{before} the bug fix;
212 |   notably the ``dip of the dip'' probabilities have changed considerably!!
213 | 
214 | - see rcs log of ../../src/dip.c
215 | 
216 | \section{21st Century Improvement of  Hartigan$^2$'s Table}
217 | 
218 | ((
219 | 
220 | Use listing package (or so to more or less ``cut \& paste'' the nice code in
221 | \texttt{../../stuff/new-simul.Rout-1e6}
222 | 
223 | ))
224 | 
225 | \section{The Dip in the Dip's Distribution}
226 | \label{sec:dip_dip}
227 | We have found empirically that the dip distribution itself starts with a ``dip''.
228 | Specifically, the minimal possible value of $D_n$ is $\frac{1}{2n}$
229 | \emph{and} the probability of reaching that value,
230 | \begin{equation}
231 |   \label{eq:P.Dn_min}
232 |   \PR{D_n = \frac{1}{2n}},
233 | \end{equation}
234 | is large for small $n$.
235 | 
236 | E.g., consider an approximation of the dip distribution for $n=5$,
237 | <<dip_n-is-5,fig=TRUE>>=
238 | require("diptest") # after installing it ..
239 | D5 <- replicate(10000, dip(runif(5)))
240 | hist(D5, breaks=128, main = "Histogram of  replicate(10'000, dip(runif(5))))")
241 | @
242 | 
243 | which looks as if there was a bug in the software --- but that look is misleading!
244 | Note how the phenomenon is still visible for $n=8$,
245 | <<dip_n-is-8,fig=TRUE>>=
246 | D8 <- replicate(10000, dip(runif(8)))
247 | hist(D8, breaks=128, main = "Histogram of  replicate(10'000, dip(runif(8))))")
248 | @
249 | 
250 | Note that there is another phenomenon, in addition to the point mass at $1/(2n)$,
251 | particularly visible, if we use \emph{many} replicates,
252 | <<sim--n-eq-11, eval=FALSE>>=
253 |  set.seed(11)
254 |  n <- 11
255 |  B.s11 <- 500000
256 |  D11 <- replicate(B.s11, dip(runif(n)))
257 | <<2nd-small-sample-phenomen--n-eq-11, echo=false>>=
258 | if(file.exists(ff <- file.path(thisDir, "hist-D11.rda"))) {
259 |   load(ff)
260 | } else { ## takes a few minutes
261 | <<sim--n-eq-11>>
262 |   hD11 <- hist(D11, breaks=1e-6+(63:298)/(2*11*64), plot=FALSE)
263 |   save(hD11, n, B.s11, file= ff)
264 | }
265 | <<2nd-small-sample-phenomen--n-eq-11, echo=false, fig=true>>=
266 | B.str <- format(B.s11, sci=FALSE, big.mark="'")
267 | plot(hD11, main = "",
268 |      ## main = sprintf("Histogram of  replicate(%s, dip(runif(%d)))", B.str, n),
269 |      border=NA, col="dark gray",
270 |      xlab = substitute("Dip" ~~ D[.N.](U(group("[",list(0,1),"]"))), list(.N. = n)))
271 | title(xlab= substitute(B == .B.SIM. ~ "replicates", list(.B.SIM. = B.str)),
272 |       adj = .88)
273 | lcol <- adjustcolor("orange4", 0.4)
274 | abline(v = (1:3)/(2*n), col=lcol, lty=3, lwd=2)
275 | axis(1, pos=0, at = (1:3)/(2*n),
276 |      labels = expression(1/22, 2/22, 3/22), col=lcol, col.axis=lcol)
277 | @
278 | 
279 | FIXME:\\
280 | use \file{../../stuff/sim-minProb.R} \\
281 | and \file{../../stuff/minProb-anal.R}
282 | 
283 | Further, it can be seen that the \emph{maximal} dip statistic
284 | is $\frac 1 4 = 0.25$ and this upper bound can be reached simply (for even
285 | $n$) using the the data $(0,0,\dots,0, \; 1, 1,\dots,1)$, a bi-point mass
286 | with equal mass at both points.
287 | 
288 | \section{P-values for the Dip Test}
289 | \label{sec:Pvals}
290 | Note that it is not obvious how to compute $p$-values for ``the dip test'',
291 | as that means determining the distribution of the test statistic, i.e.,
292 | $D_n$ under the null hypothesis, but a natural null,
293 | $H_o: F \in \{F \mathrm{cadlag} \mid f := \frac d{dx} F is unimodal \}$
294 | is too large.  Hartigans'(1985) argued for using the uniform $U[0,1]$ i.e.,
295 | $F'(x) = f(x)= \mathbf{1}_{[0,1]}(x) = [0 \le x \le 1]$ (Iverson bracket)
296 | instead, even though they showed that it is not quite the ``least
297 | favorable'' one.
298 | Following Hartigans', we will define the $p$-value of an observed $d_n$ as
299 | \begin{equation}
300 |   \label{eq:Pval}
301 |   P_{d_n} := \PR{D_n \ge d_n} := \PR{\mathrm{dip}(U_1,\dots,U_n) \ge d_n}, \ \
302 |   \mathrm{where} \ U_i \sim U[0,1], \ \, \iid
303 | \end{equation}
304 | 
305 | \subsection{Interpolating the Dip Table}
306 | \label{sec:interpol}
307 | Because of the asymptotic distribution,
308 | $ \lim_{n\to\infty}\sqrt{n} \; D_n \isD  D_\infty$,
309 | it is makes sense to consider the ``$\sqrt{n} D_n$''--scale,
310 | even for finite $n$ values:
311 | <<sqrt-n-qdip,fig=true>>=
312 | data(qDiptab)
313 | dnqd <- dimnames(qDiptab)
314 | (nn. <- as.integer(dnqd[["n"]]))
315 | matplot(nn., qDiptab*sqrt(nn.), type ="o", pch=1, cex = 0.4,
316 |         log="x", xlab="n   [log scaled]",
317 |         ylab = expression(sqrt(n) %*% q[D[n]]))
318 | ## Note that  1/2n  is the first possible value (with finite mass),,
319 | ## clearly visible for (very) small n:
320 | lines(nn., sqrt(nn.)/(2*nn.), col=adjustcolor("yellow2",0.5), lwd=3)
321 | 
322 | P.p <- as.numeric(print(noquote(dnqd[["Pr"]])))
323 | ## Now look at one well known data set:
324 | D <- dip(x <- faithful$waiting)
325 | n <- length(x)
326 | points(n, sqrt(n)*D, pch=13, cex=2, col= adjustcolor("blue2",.5), lwd=2)
327 | ## a simulated (approximate) $p$-value for D  is
328 | mean(D <= replicate(10000, dip(runif(n)))) ## ~ 0.002
329 | @
330 | 
331 | but we can use our table to compute a deterministic (but still approximate,
332 | as the table is from simulation too) $p$-value:
333 | <<interpolate-dip-table>>=
334 | ## We are in this interval:
335 | n0 <- nn.[i.n <- findInterval(n, nn.)]
336 | n1 <- nn.[i.n +1] ; c(n0, n1)
337 | f.n <- (n - n0)/(n1 - n0)# in [0, 1]
338 | ## Now "find" y-interval:
339 | y.0 <- sqrt(n0)* qDiptab[i.n  ,]
340 | y.1 <- sqrt(n1)* qDiptab[i.n+1,]
341 | (Pval <- 1 - approx(y.0 + f.n*(y.1 - y.0),
342 |                     P.p,
343 |                     xout = sqrt(n) * D)[["y"]])
344 | ## 0.018095
345 | @
346 | 
347 | Finally, in May 2011, after several years of people asking for it,
348 | I have implemented a \code{dip.test} function which makes use of a ---
349 | somewhat more sophisticated --- interpolation scheme like the one above,
350 | to compute a $p$-value.
351 | As \code{qDiptab} has been based on $10^6$ samples, the interpolation
352 | yields accurate $p$-values, unless in very extreme cases.
353 | Here is the small ($n=63$) example from Hartigan$^2$,
354 | <<statfac-dip.test>>=
355 | data(statfaculty)
356 | dip.test(statfaculty)
357 | @
358 | where, from a $p$-value of 8.7\%, we'd conclude that there is not enough
359 | evidence against unimodality.
360 | 
361 | \subsection{Asymptotic Dip Distribution}
362 | \label{sec:asymp}
363 | We have conducted extensive simulations in order to explore the limit
364 | distribution of $D_\infty$, i.e., the limit of $\sqrt{n} \; D_n$, (\ref{eq:D.infty}).
365 | 
366 | Our current \R\ code is in \file{ ../../stuff/asymp-distrib.R }
367 | but the simulation results (7 Megabytes for each $n$) cannot be assumed to
368 | be part of the package, nor maybe even to be simply accessible via the internet.
369 | %% is bandwidth a problem ?  probably no longer in the near future?
370 | 
371 | 
372 | %% Maybe
373 | \section{Less Conservative Dip Testing}
374 | 
375 | 
376 | \section{Session Info}
377 | 
378 | <<sessionInfo, results=tex>>=
379 | toLatex(sessionInfo())
380 | @
381 | 
382 | \bibliography{diptest}
383 | 
384 | \end{document}
385 | 


--------------------------------------------------------------------------------
/stuff/Stuetzle-stat593-S2003-olive.dat:
--------------------------------------------------------------------------------
  1 | 1 1 1075 75 226 7823 672 36 60 29
  2 | 1 1 1088 73 224 7709 781 31 61 29
  3 | 1 1 911 54 246 8113 549 31 63 29
  4 | 1 1 966 57 240 7952 619 50 78 35
  5 | 1 1 1051 67 259 7771 672 50 80 46
  6 | 1 1 911 49 268 7924 678 51 70 44
  7 | 1 1 922 66 264 7990 618 49 56 29
  8 | 1 1 1100 61 235 7728 734 39 64 35
  9 | 1 1 1082 60 239 7745 709 46 83 33
 10 | 1 1 1037 55 213 7944 633 26 52 30
 11 | 1 1 1051 35 219 7978 605 21 65 24
 12 | 1 1 1036 59 235 7868 661 30 62 44
 13 | 1 1 1074 70 214 7728 747 50 79 33
 14 | 1 1 875 52 243 8018 655 41 79 32
 15 | 1 1 952 49 254 7795 780 50 75 41
 16 | 1 1 1155 98 201 7606 816 32 60 29
 17 | 1 1 943 94 183 7840 788 42 75 31
 18 | 1 1 1278 69 205 7344 957 45 70 28
 19 | 1 1 961 70 195 7958 742 46 75 30
 20 | 1 1 952 77 258 7820 736 43 78 33
 21 | 1 1 1074 67 236 7692 716 56 83 45
 22 | 1 1 995 46 288 7806 679 56 86 40
 23 | 1 1 1056 53 247 7703 700 54 89 51
 24 | 1 1 1065 39 234 7876 703 42 74 26
 25 | 1 1 1065 45 245 7779 696 47 82 38
 26 | 1 2 1315 139 230 7299 832 42 60 32
 27 | 1 2 1321 136 217 7174 950 43 63 30
 28 | 1 2 1359 115 246 7234 874 45 63 18
 29 | 1 2 1378 111 272 7127 940 46 64 23
 30 | 1 2 1295 109 245 7253 903 43 62 38
 31 | 1 2 1275 121 215 7285 892 40 68 41
 32 | 1 2 1336 120 318 7083 915 50 70 38
 33 | 1 2 1309 122 241 7257 870 46 72 35
 34 | 1 2 1340 114 189 7337 820 48 72 21
 35 | 1 2 1299 116 253 7309 823 40 69 27
 36 | 1 2 1221 107 221 7441 798 54 70 28
 37 | 1 2 1245 72 283 7395 829 44 67 28
 38 | 1 2 1285 129 244 7323 819 57 65 36
 39 | 1 2 1248 107 313 7299 840 46 66 33
 40 | 1 2 1356 106 236 7209 866 48 75 36
 41 | 1 2 1260 102 228 7354 870 49 64 28
 42 | 1 2 1261 121 312 7238 877 47 65 25
 43 | 1 2 1304 124 279 7160 928 48 61 37
 44 | 1 2 1344 117 287 7129 897 51 65 41
 45 | 1 2 1323 96 300 7351 757 47 54 26
 46 | 1 2 1292 117 215 7351 839 48 61 32
 47 | 1 2 1254 118 244 7394 786 46 71 24
 48 | 1 2 1312 131 259 7167 939 41 69 20
 49 | 1 2 1213 109 301 7261 925 47 65 31
 50 | 1 2 1359 98 351 7262 780 41 56 16
 51 | 1 2 1266 97 263 7435 743 45 69 29
 52 | 1 2 1298 99 311 7311 787 45 67 23
 53 | 1 2 1272 116 279 7258 872 43 72 27
 54 | 1 2 1278 87 332 7379 771 44 53 24
 55 | 1 2 1184 112 311 7391 819 48 57 28
 56 | 1 2 1382 110 268 7241 828 39 60 30
 57 | 1 2 1183 146 292 7580 618 38 51 23
 58 | 1 2 1261 153 219 7355 818 52 70 26
 59 | 1 2 1198 136 239 7639 633 27 55 19
 60 | 1 2 1225 134 232 7658 616 36 49 26
 61 | 1 2 1339 166 208 7190 923 40 69 25
 62 | 1 2 1132 157 240 7641 638 45 60 31
 63 | 1 2 1381 183 245 7385 609 47 70 25
 64 | 1 2 1409 128 257 7257 759 43 57 16
 65 | 1 2 1306 127 250 7254 869 47 68 24
 66 | 1 2 1372 120 250 7355 702 44 68 28
 67 | 1 2 1336 113 242 7293 855 38 60 18
 68 | 1 2 1401 151 238 7164 857 45 72 36
 69 | 1 2 1390 119 234 7236 823 40 62 41
 70 | 1 2 1432 152 281 7029 949 39 55 25
 71 | 1 2 1412 124 298 7182 790 45 68 28
 72 | 1 2 1366 147 291 7197 783 51 70 34
 73 | 1 2 1383 118 273 7282 738 45 68 29
 74 | 1 2 1283 102 263 7400 763 54 65 28
 75 | 1 2 1296 136 260 7380 780 48 51 18
 76 | 1 2 1287 108 287 7343 826 44 44 23
 77 | 1 2 1351 159 296 7229 810 36 60 22
 78 | 1 2 1241 97 268 7499 709 52 69 36
 79 | 1 2 1267 101 300 7230 898 74 65 34
 80 | 1 2 1235 138 252 7322 861 54 66 36
 81 | 1 2 1255 103 223 7395 848 47 56 30
 82 | 1 3 1454 183 196 7057 1014 27 46 19
 83 | 1 3 1347 194 197 7277 895 25 46 15
 84 | 1 3 1364 204 225 6929 1084 21 50 14
 85 | 1 3 1410 199 216 7130 955 21 48 19
 86 | 1 3 1384 178 208 7105 999 29 67 26
 87 | 1 3 1412 185 217 6842 1203 34 72 32
 88 | 1 3 1410 232 280 6715 1233 32 60 24
 89 | 1 3 1509 209 257 6647 1240 42 62 30
 90 | 1 3 1317 197 256 7036 1067 40 60 22
 91 | 1 3 1286 192 203 7132 1053 38 65 28
 92 | 1 3 1273 191 202 6862 1303 43 70 28
 93 | 1 3 1463 183 183 6747 1307 36 60 24
 94 | 1 3 1399 187 191 6861 1233 38 60 17
 95 | 1 3 1413 193 208 6875 1202 30 60 18
 96 | 1 3 1369 206 203 6953 1168 35 50 16
 97 | 1 3 1488 172 170 6920 1144 37 54 14
 98 | 1 3 1323 160 205 6911 1298 24 50 17
 99 | 1 3 1311 166 170 6902 1312 41 69 28
100 | 1 3 1286 163 183 7040 1230 29 57 12
101 | 1 3 1380 173 188 7038 1139 31 44 14
102 | 1 3 1394 164 223 7086 1042 24 43 23
103 | 1 3 1324 174 198 6863 1289 36 70 21
104 | 1 3 1290 157 192 7000 1263 26 51 19
105 | 1 3 1361 163 196 6888 1273 37 58 24
106 | 1 3 1387 182 242 6913 1101 44 68 30
107 | 1 3 1369 180 181 7000 1130 39 45 24
108 | 1 3 1303 165 175 7025 1243 31 41 16
109 | 1 3 1346 160 169 7072 1151 39 48 15
110 | 1 3 1369 171 184 6937 1246 30 48 15
111 | 1 3 1305 172 169 7004 1260 28 50 11
112 | 1 3 1351 179 186 6935 1243 36 50 19
113 | 1 3 1283 151 182 7000 1271 40 52 21
114 | 1 3 1449 175 198 6883 1162 40 70 22
115 | 1 3 1310 180 183 7054 1202 26 32 12
116 | 1 3 1360 163 176 6901 1280 28 65 27
117 | 1 3 1300 187 196 6920 1253 41 76 25
118 | 1 3 1368 171 218 7010 1057 41 54 26
119 | 1 3 1207 151 156 7159 1234 27 51 14
120 | 1 3 1348 154 183 6917 1277 48 56 16
121 | 1 3 1334 186 229 7261 827 34 56 20
122 | 1 3 1301 156 207 7003 1229 41 48 14
123 | 1 3 1226 181 213 6961 1230 47 74 26
124 | 1 3 1201 168 190 7100 1216 43 64 16
125 | 1 3 1297 153 177 7004 1260 35 60 16
126 | 1 3 1248 163 158 7103 1222 31 60 14
127 | 1 3 1335 159 197 6974 1220 36 60 17
128 | 1 3 1219 167 171 7087 1254 35 50 16
129 | 1 3 1318 179 177 7030 1194 35 42 25
130 | 1 3 1264 167 166 7130 1187 22 52 12
131 | 1 3 1201 175 201 7129 1193 36 49 15
132 | 1 3 1252 180 181 7055 1214 31 59 38
133 | 1 3 1273 182 209 6965 1191 43 74 23
134 | 1 3 1351 179 170 7034 1154 35 66 10
135 | 1 3 1336 155 212 7103 1086 33 55 20
136 | 1 3 1499 201 182 6803 1204 30 56 24
137 | 1 3 1425 198 193 7032 1041 31 52 17
138 | 1 3 1358 204 227 6962 1109 41 65 34
139 | 1 3 1346 181 257 7147 933 40 60 36
140 | 1 3 1392 186 256 6732 1278 53 64 29
141 | 1 3 1311 166 222 7006 1147 41 80 27
142 | 1 3 1314 171 229 6923 1198 47 76 42
143 | 1 3 1409 200 207 6842 1224 31 60 27
144 | 1 3 1342 174 221 6993 1147 36 64 23
145 | 1 3 1387 182 206 7100 1020 34 54 17
146 | 1 3 1413 202 205 6920 1165 36 46 13
147 | 1 3 1430 209 225 6800 1200 32 59 27
148 | 1 3 1336 185 223 6956 1155 56 73 16
149 | 1 3 1372 200 200 6916 1189 33 50 22
150 | 1 3 1330 157 228 7055 1108 42 55 25
151 | 1 3 1412 207 208 6822 1239 36 51 28
152 | 1 3 1321 209 217 6948 1178 42 62 23
153 | 1 3 1401 200 217 6980 1073 40 68 21
154 | 1 3 1401 214 217 6734 1293 44 69 27
155 | 1 3 1457 168 242 6724 1266 54 59 30
156 | 1 3 1451 199 221 6835 1177 37 51 29
157 | 1 3 1438 206 248 6806 1183 34 57 28
158 | 1 3 1462 204 237 6644 1309 42 54 28
159 | 1 3 1529 215 203 6602 1310 45 69 27
160 | 1 3 1510 189 245 6752 1188 36 52 28
161 | 1 3 1437 222 184 6803 1240 43 56 16
162 | 1 3 1327 129 247 7024 1157 38 56 22
163 | 1 3 1438 172 252 6630 1380 40 64 24
164 | 1 3 1447 176 189 6849 1180 42 64 26
165 | 1 3 1355 144 214 6972 1198 33 60 24
166 | 1 3 1369 156 241 6890 1209 42 63 30
167 | 1 3 1471 188 276 6697 1269 34 51 16
168 | 1 3 1456 179 240 6738 1267 41 65 14
169 | 1 3 1314 140 207 7020 1220 28 59 12
170 | 1 3 1408 176 192 6909 1195 45 50 25
171 | 1 3 1397 172 191 7107 1018 36 50 29
172 | 1 3 1413 191 186 6937 1180 31 46 13
173 | 1 3 1539 194 213 6764 1178 38 58 16
174 | 1 3 1304 159 234 7019 1174 38 53 19
175 | 1 3 1341 160 231 7033 1069 40 67 33
176 | 1 3 1508 208 249 6641 1311 25 43 20
177 | 1 3 1515 226 257 6595 1287 41 63 16
178 | 1 3 1262 165 235 7120 1113 32 51 21
179 | 1 3 1307 197 238 7003 1144 37 50 24
180 | 1 3 1294 159 253 7009 1190 30 52 13
181 | 1 3 1460 187 215 6843 1172 35 56 32
182 | 1 3 1476 187 203 6837 1197 36 48 22
183 | 1 3 1482 178 197 6814 1201 40 64 24
184 | 1 3 1388 176 185 7008 1111 48 53 31
185 | 1 3 1367 172 235 7066 1054 35 45 26
186 | 1 3 1272 207 205 7152 1098 37 52 22
187 | 1 3 1323 157 234 7132 1022 38 58 31
188 | 1 3 1206 218 242 7193 1002 37 54 25
189 | 1 3 1383 157 217 7018 1090 40 60 37
190 | 1 3 1521 190 238 6956 986 36 50 23
191 | 1 3 1350 168 227 6986 1165 29 58 17
192 | 1 3 1422 181 218 6813 1230 30 59 21
193 | 1 3 1298 166 224 6986 1162 34 65 31
194 | 1 3 1447 236 245 6607 1336 33 51 21
195 | 1 3 1347 197 211 6795 1300 32 59 34
196 | 1 3 1339 170 253 6989 1110 29 63 23
197 | 1 3 1388 183 216 6867 1208 28 61 21
198 | 1 3 1527 260 232 6488 1370 31 45 20
199 | 1 3 1495 237 236 6571 1318 32 58 26
200 | 1 3 1487 246 251 6504 1390 29 53 19
201 | 1 3 1399 180 232 6855 1190 32 66 22
202 | 1 3 1489 215 242 6777 1145 30 60 22
203 | 1 3 1339 166 226 6928 1198 30 60 23
204 | 1 3 1482 246 238 6444 1462 27 50 20
205 | 1 3 1434 172 255 6646 1354 27 59 25
206 | 1 3 1347 156 214 6850 1313 25 48 19
207 | 1 3 1340 158 233 6848 1272 32 63 25
208 | 1 3 1453 180 244 6752 1238 34 54 23
209 | 1 3 1306 149 226 7082 1097 33 61 24
210 | 1 3 1349 161 217 6997 1138 31 62 23
211 | 1 3 1254 151 205 7319 947 28 54 23
212 | 1 3 1168 144 220 7230 1109 31 52 28
213 | 1 3 1346 167 224 6959 1111 30 49 23
214 | 1 3 1390 184 212 6898 1189 29 44 19
215 | 1 3 1283 149 224 7077 1104 30 57 32
216 | 1 3 1214 137 232 7269 1005 32 55 23
217 | 1 3 1491 227 205 6941 988 33 68 34
218 | 1 3 1479 218 207 7039 887 36 65 36
219 | 1 3 1445 174 228 6875 1123 29 69 31
220 | 1 3 1439 183 218 6775 1226 32 66 29
221 | 1 3 1387 154 204 6991 1090 34 74 32
222 | 1 3 1426 169 192 7025 1043 31 64 27
223 | 1 3 1451 200 208 6980 1006 30 62 31
224 | 1 3 1493 204 188 6913 1044 32 61 35
225 | 1 3 1419 192 207 6996 1014 36 70 36
226 | 1 3 1342 177 199 7172 952 34 65 33
227 | 1 3 1349 152 236 7145 949 35 75 29
228 | 1 3 1440 196 208 6938 1070 32 61 26
229 | 1 3 1460 215 197 6918 1081 28 55 23
230 | 1 3 1249 133 205 7417 827 33 72 33
231 | 1 3 1348 159 238 7017 1081 31 67 25
232 | 1 3 1341 155 244 6958 1144 32 68 26
233 | 1 3 1398 149 204 7182 907 29 76 30
234 | 1 3 1454 200 199 6910 1090 30 62 25
235 | 1 3 1334 153 219 6928 1214 33 66 24
236 | 1 3 1438 204 189 7107 910 33 63 27
237 | 1 3 1303 138 212 7170 1016 34 69 25
238 | 1 3 1323 147 210 7108 1070 33 61 20
239 | 1 3 1417 169 207 6875 1184 34 57 27
240 | 1 3 1360 167 225 6883 1220 31 55 27
241 | 1 3 1420 179 214 6923 1121 33 56 27
242 | 1 3 1472 218 214 6724 1238 29 53 23
243 | 1 3 1368 174 205 7042 1066 31 57 26
244 | 1 3 1367 173 228 6948 1141 32 53 24
245 | 1 3 1403 173 209 6843 1210 33 63 33
246 | 1 3 1413 197 206 6737 1387 34 60 31
247 | 1 3 1201 138 207 7011 1269 37 64 35
248 | 1 3 1359 180 207 6895 1203 33 61 30
249 | 1 3 1518 198 225 6681 1243 29 57 24
250 | 1 3 1434 185 189 6771 1269 30 62 25
251 | 1 3 1367 162 179 6772 1368 33 64 27
252 | 1 3 1461 181 197 6783 1246 26 57 23
253 | 1 3 1368 161 198 7030 1095 33 59 31
254 | 1 3 1419 159 215 6862 1193 35 60 31
255 | 1 3 1514 162 298 6725 1119 45 93 30
256 | 1 3 1328 171 253 6987 1030 38 83 39
257 | 1 3 1469 160 337 6675 1127 44 94 36
258 | 1 4 1222 133 227 7425 824 36 69 35
259 | 1 4 1639 172 331 6510 1124 46 91 32
260 | 1 4 1345 133 272 6801 1194 48 83 37
261 | 1 4 1339 170 275 6838 1060 46 88 43
262 | 1 4 1194 135 263 7277 889 44 95 41
263 | 1 4 1112 68 375 7770 448 52 69 45
264 | 1 4 1222 70 329 7605 566 48 67 43
265 | 1 4 1136 72 341 7616 661 49 65 32
266 | 1 4 926 41 277 7815 784 45 65 25
267 | 1 4 1105 69 373 7714 532 51 68 37
268 | 1 4 1109 79 305 7576 763 45 64 36
269 | 1 4 1284 93 265 7235 893 43 77 46
270 | 1 4 1120 69 277 7416 946 42 59 36
271 | 1 4 916 52 281 7870 694 42 64 58
272 | 1 4 905 49 288 7747 812 49 71 56
273 | 1 4 1206 55 287 7329 935 44 74 42
274 | 1 4 1457 182 267 7020 863 41 84 37
275 | 1 4 1327 140 193 7328 823 36 87 35
276 | 1 4 1303 100 251 7045 1049 40 86 40
277 | 1 4 1444 175 259 6876 1027 34 78 32
278 | 1 4 1505 243 226 6962 858 30 72 27
279 | 1 4 1429 162 223 6917 1041 37 77 40
280 | 1 4 1491 162 211 6994 928 37 97 38
281 | 1 4 1393 128 211 7189 870 38 93 40
282 | 1 4 1404 134 210 7110 923 40 101 43
283 | 1 4 1222 130 214 7374 856 38 89 45
284 | 1 4 1153 74 316 7593 705 42 64 32
285 | 1 4 1169 76 307 7553 728 43 69 32
286 | 1 4 1369 104 237 7375 775 39 70 15
287 | 1 4 993 58 267 7743 773 41 62 44
288 | 1 4 980 53 254 7719 815 44 69 47
289 | 1 4 967 55 273 7692 833 45 63 47
290 | 1 4 1128 73 354 7527 728 44 76 38
291 | 1 4 1188 85 273 7445 814 44 73 42
292 | 1 4 1257 95 247 7405 812 43 70 35
293 | 1 4 1262 88 301 7471 704 43 71 31
294 | 1 3 1283 153 196 7107 1115 37 60 28
295 | 1 3 1263 155 199 7140 1148 31 42 18
296 | 1 3 1369 158 215 7160 958 38 69 32
297 | 1 3 1353 172 175 6965 1212 28 75 19
298 | 1 3 1187 139 185 7427 952 29 56 22
299 | 1 3 1732 231 156 6437 1313 45 62 23
300 | 1 3 1620 255 166 6628 1212 29 62 27
301 | 1 3 1543 172 193 6740 1157 52 87 34
302 | 1 3 1498 170 195 6804 1206 35 66 23
303 | 1 3 1399 169 171 7011 1100 36 72 16
304 | 1 3 1293 156 191 7101 1111 32 60 31
305 | 1 3 1420 175 152 7004 1149 27 50 20
306 | 1 3 1721 238 255 6300 1350 35 70 28
307 | 1 3 1742 221 156 6415 1315 43 82 23
308 | 1 3 1391 187 189 6975 1062 52 70 45
309 | 1 3 1517 206 249 6680 1205 33 80 27
310 | 1 3 1269 157 193 7140 1148 31 40 18
311 | 1 3 1577 204 208 6732 1183 20 52 20
312 | 1 3 1590 241 195 6705 1149 27 68 21
313 | 1 3 1621 280 197 6608 1179 28 58 27
314 | 1 3 1753 275 236 6367 1214 23 61 27
315 | 1 3 1679 260 177 6568 1191 30 59 33
316 | 1 3 1419 203 176 6973 1083 38 78 27
317 | 1 3 1693 236 174 6499 1204 51 102 37
318 | 1 3 1692 270 234 6499 1196 31 59 15
319 | 1 3 1638 252 215 6570 1199 39 53 29
320 | 1 3 1497 247 219 6621 1270 36 73 32
321 | 1 3 1442 222 194 6677 1314 36 72 38
322 | 1 3 1680 270 170 6440 1310 31 62 28
323 | 1 3 1463 164 185 6909 1154 49 58 17
324 | 2 5 1129 120 222 7272 1112 43 98 2
325 | 2 5 1042 135 210 7376 1116 35 90 3
326 | 2 5 1103 96 210 7380 1085 32 94 3
327 | 2 5 1118 97 221 7279 1154 35 94 2
328 | 2 5 1052 95 215 7388 1126 31 92 1
329 | 2 5 1116 102 231 7290 1168 26 66 1
330 | 2 5 1108 132 231 7319 1101 20 66 2
331 | 2 5 1129 108 212 7386 1074 28 62 3
332 | 2 5 1085 91 223 7384 1126 28 62 3
333 | 2 5 1104 103 233 7322 1147 27 61 2
334 | 2 5 1098 88 212 7338 1140 28 67 1
335 | 2 6 1135 98 251 7120 1314 20 61 2
336 | 2 6 1158 108 245 7065 1326 22 75 1
337 | 2 6 1133 110 241 7080 1342 21 68 3
338 | 2 6 1095 125 250 7120 1305 21 83 1
339 | 2 6 1201 87 238 6990 1383 25 75 3
340 | 2 6 1213 112 245 7007 1335 22 65 3
341 | 2 5 1108 92 231 7367 1110 29 62 3
342 | 2 5 1075 103 207 7413 1096 32 68 2
343 | 2 5 1059 96 228 7386 1128 25 72 2
344 | 2 5 1176 92 207 7347 1057 35 82 1
345 | 2 5 1159 98 213 7320 1108 38 64 1
346 | 2 5 1132 80 201 7398 1095 27 67 2
347 | 2 5 1107 75 220 7399 1096 29 90 1
348 | 2 5 1092 104 234 7355 1126 28 58 2
349 | 2 5 1119 81 219 7409 1057 33 81 2
350 | 2 5 1106 93 212 7381 1104 35 68 1
351 | 2 5 1047 101 238 7385 1120 28 89 1
352 | 2 5 1165 99 214 7331 1101 22 67 3
353 | 2 5 1158 84 201 7327 1123 29 77 2
354 | 2 5 1095 88 203 7415 1093 37 78 1
355 | 2 5 1176 75 205 7396 1107 33 74 2
356 | 2 5 1103 109 220 7335 1140 28 59 2
357 | 2 5 1112 92 209 7356 1125 32 73 2
358 | 2 5 1091 93 222 7377 1113 20 53 2
359 | 2 5 1080 98 219 7371 1125 33 78 1
360 | 2 5 1051 108 227 7403 1114 30 66 3
361 | 2 5 1096 84 211 7415 1091 30 71 2
362 | 2 5 1142 97 225 7341 1101 28 65 1
363 | 2 5 1047 96 236 7399 1107 32 80 3
364 | 2 5 1114 86 210 7359 1116 31 83 2
365 | 2 5 1140 93 241 7324 1098 23 74 1
366 | 2 5 1075 91 200 7410 1107 36 80 1
367 | 2 5 1092 106 219 7427 1125 33 77 1
368 | 2 5 1076 95 204 7408 1130 27 79 2
369 | 2 5 1178 89 201 7381 1099 34 87 2
370 | 2 5 1095 104 223 7367 1111 43 56 2
371 | 2 6 1166 97 272 6971 1390 20 83 3
372 | 2 6 1154 119 257 7130 1253 22 61 1
373 | 2 6 1177 111 241 6882 1470 22 95 2
374 | 2 6 1160 96 240 7043 1357 24 79 2
375 | 2 6 1122 104 241 7145 1313 15 58 1
376 | 2 6 1132 99 257 7065 1362 24 90 3
377 | 2 6 1096 100 260 7162 1282 25 74 2
378 | 2 6 1131 87 233 7144 1307 25 72 3
379 | 2 6 1184 105 258 7020 1340 26 66 2
380 | 2 6 1135 94 235 7123 1320 24 67 2
381 | 2 6 1084 96 240 7164 1330 28 57 1
382 | 2 6 1086 127 252 7159 1285 28 62 2
383 | 2 6 1140 95 258 7085 1347 23 71 3
384 | 2 6 1138 101 254 7103 1310 25 68 1
385 | 2 6 1159 110 261 7068 1297 27 77 2
386 | 2 5 1051 78 211 7421 1146 30 82 2
387 | 2 5 1048 79 213 7439 1130 28 61 2
388 | 2 5 1061 86 220 7421 1102 29 79 3
389 | 2 5 1105 88 210 7353 1142 28 72 1
390 | 2 5 1145 35 237 7208 1118 20 46 2
391 | 2 5 1049 96 219 7303 1168 22 47 2
392 | 2 5 1105 120 218 7302 1158 23 45 3
393 | 2 5 1030 84 214 7403 1177 21 70 1
394 | 2 5 1070 98 215 7280 1240 28 68 3
395 | 2 5 1103 81 208 7310 1177 30 90 3
396 | 2 5 1040 101 205 7368 1176 25 85 3
397 | 2 5 1100 95 210 7320 1113 22 72 3
398 | 2 5 1118 85 199 7415 1060 36 86 3
399 | 2 5 1065 98 230 7345 1163 24 74 1
400 | 2 5 1131 78 221 7358 1120 22 69 2
401 | 2 5 1080 120 218 7296 1145 35 105 2
402 | 2 5 1075 86 231 7403 1109 22 73 3
403 | 2 5 1040 103 228 7364 1173 25 66 2
404 | 2 5 1128 82 203 7320 1148 30 88 1
405 | 2 5 1060 111 231 7363 1149 20 65 1
406 | 2 5 1103 78 220 7365 1149 20 65 2
407 | 2 5 1110 91 201 7318 1185 24 74 2
408 | 2 5 1091 108 218 7383 1183 28 88 3
409 | 2 5 1094 96 220 7341 1127 26 96 2
410 | 2 6 1131 87 208 7170 1308 28 57 2
411 | 2 6 1175 108 214 7076 1307 33 85 2
412 | 2 6 1076 77 202 7243 1305 29 67 1
413 | 2 6 1120 90 240 7068 1383 23 75 1
414 | 2 6 1152 111 238 7080 1372 25 81 2
415 | 2 6 1141 95 250 7035 1388 22 68 2
416 | 2 6 1098 103 267 7135 1301 24 76 2
417 | 2 6 1126 100 236 7062 1380 26 69 1
418 | 2 6 1087 89 243 7200 1302 18 60 1
419 | 2 6 1115 96 236 7085 1372 20 75 2
420 | 2 6 1178 92 241 7006 1376 22 84 1
421 | 2 6 1162 106 242 7025 1368 25 71 2
422 | 3 9 1085 70 180 7955 605 20 50 1
423 | 3 9 1085 70 185 7955 600 25 55 1
424 | 3 9 1090 60 190 7950 600 28 47 2
425 | 3 9 1080 65 189 7960 602 35 20 1
426 | 3 9 1090 60 195 7955 600 28 42 2
427 | 3 9 1105 55 200 7900 600 37 55 2
428 | 3 9 1060 75 175 7975 610 20 55 2
429 | 3 9 1050 70 170 7977 605 28 65 1
430 | 3 9 1100 55 198 7905 600 35 50 3
431 | 3 9 1065 65 178 7965 605 22 65 2
432 | 3 9 1085 60 188 7955 602 30 50 2
433 | 3 9 1080 65 180 7960 605 25 55 1
434 | 3 9 1085 60 190 7955 602 30 53 1
435 | 3 9 1075 68 195 7960 602 20 40 3
436 | 3 9 1090 58 192 7950 600 35 40 3
437 | 3 9 1095 60 198 7945 600 38 34 2
438 | 3 9 1090 58 195 7950 600 30 42 2
439 | 3 9 1095 58 198 7950 602 35 32 1
440 | 3 9 1090 58 195 7940 600 35 42 2
441 | 3 9 1095 58 198 7945 600 35 34 1
442 | 3 9 1095 55 200 7940 600 35 45 3
443 | 3 9 1080 70 188 7965 608 28 36 3
444 | 3 9 1090 60 195 7950 600 32 38 2
445 | 3 9 1105 55 200 7900 595 39 56 1
446 | 3 9 1110 50 205 7900 595 40 52 1
447 | 3 9 1075 70 198 7978 608 28 33 2
448 | 3 9 1075 65 185 7980 608 35 42 3
449 | 3 9 1065 75 180 7975 610 25 50 3
450 | 3 9 1070 75 188 7980 602 22 45 2
451 | 3 9 1070 75 188 7980 602 22 45 1
452 | 3 9 1100 70 200 7910 610 39 44 1
453 | 3 9 1075 70 185 7960 610 22 58 2
454 | 3 9 1050 78 175 7990 610 18 59 3
455 | 3 9 1090 60 198 7945 600 32 35 2
456 | 3 9 1050 78 188 7990 608 28 23 3
457 | 3 9 1075 70 190 7975 605 28 27 3
458 | 3 9 1098 54 202 7945 595 42 32 2
459 | 3 9 1105 15 198 8005 575 52 20 2
460 | 3 9 1110 75 220 7915 510 55 65 2
461 | 3 9 1058 50 178 7988 626 40 55 3
462 | 3 9 1115 30 225 7955 600 55 15 2
463 | 3 9 1105 30 198 7995 570 52 20 3
464 | 3 9 1072 49 178 7980 615 48 48 2
465 | 3 9 1110 15 210 7990 570 50 20 2
466 | 3 9 1110 80 215 7910 525 50 60 1
467 | 3 9 1055 60 175 7985 620 45 50 1
468 | 3 9 1100 80 215 7930 535 45 60 2
469 | 3 9 1105 55 205 7965 600 25 20 2
470 | 3 9 1095 50 210 7948 600 25 35 2
471 | 3 9 1110 50 220 7950 600 52 10 2
472 | 3 9 1092 37 210 7955 600 40 40 3
473 | 3 7 1290 60 260 7550 670 70 100 2
474 | 3 7 1170 80 230 7690 720 40 70 1
475 | 3 7 1100 90 250 7680 760 30 80 2
476 | 3 7 1120 70 240 7720 730 40 80 2
477 | 3 7 1160 70 250 7650 750 30 90 1
478 | 3 7 1200 50 210 7770 690 20 50 3
479 | 3 7 1140 50 200 7990 580 10 20 1
480 | 3 7 1220 80 240 7610 760 30 60 2
481 | 3 7 1180 90 250 7520 800 50 100 2
482 | 3 7 1210 70 250 7560 780 40 90 2
483 | 3 7 1220 80 220 7540 770 60 100 2
484 | 3 7 1180 100 190 7520 820 50 100 1
485 | 3 7 1160 90 220 7580 790 40 90 1
486 | 3 7 1130 100 240 7620 780 30 90 1
487 | 3 7 1080 100 260 7710 750 20 70 2
488 | 3 7 1090 90 280 7730 720 50 100 1
489 | 3 7 1020 100 270 7770 710 40 90 1
490 | 3 7 1090 90 250 7680 760 60 80 1
491 | 3 7 1120 100 260 7720 680 30 80 2
492 | 3 7 1080 80 240 7830 670 30 70 2
493 | 3 7 1160 70 230 7860 640 10 20 1
494 | 3 7 1100 80 240 7820 670 20 70 2
495 | 3 7 1050 100 250 7930 630 10 30 3
496 | 3 7 1090 90 270 7780 690 30 50 3
497 | 3 7 1120 80 260 7750 680 30 80 3
498 | 3 7 1120 100 250 7680 730 40 70 2
499 | 3 7 1190 90 230 7670 710 30 80 2
500 | 3 7 1170 110 250 7620 740 20 90 1
501 | 3 7 1120 100 230 7720 730 20 70 1
502 | 3 7 1190 80 270 7690 720 10 40 2
503 | 3 7 1400 90 270 7420 800 0 20 2
504 | 3 7 1350 80 250 7520 760 10 30 1
505 | 3 7 1090 60 220 7890 670 10 60 2
506 | 3 7 1150 90 230 7790 650 30 60 1
507 | 3 7 1240 90 220 7820 590 10 30 1
508 | 3 7 1220 100 240 7890 530 0 10 2
509 | 3 7 1180 80 250 7870 580 10 30 2
510 | 3 7 1170 110 240 7730 630 30 90 1
511 | 3 7 1170 100 280 7710 640 20 70 3
512 | 3 7 1180 80 220 7790 680 10 40 1
513 | 3 7 1200 90 240 7820 590 10 50 2
514 | 3 7 1140 90 240 7880 570 20 60 3
515 | 3 7 1160 70 210 7870 580 30 80 3
516 | 3 7 1130 80 250 7780 650 40 60 3
517 | 3 7 1150 80 240 7800 630 30 70 2
518 | 3 7 1110 70 240 7820 670 20 70 3
519 | 3 7 1150 70 220 7850 620 20 40 2
520 | 3 7 1180 80 240 7760 670 20 50 2
521 | 3 7 1020 80 250 7920 680 10 30 3
522 | 3 7 610 80 230 8410 650 0 20 3
523 | 3 8 1190 150 290 7340 1020 0 10 2
524 | 3 8 1110 130 210 7550 1000 0 0 1
525 | 3 8 1020 100 220 7530 1030 0 0 3
526 | 3 8 1070 120 210 7600 990 0 10 3
527 | 3 8 1010 90 350 7480 1050 10 10 1
528 | 3 8 1060 140 240 7680 830 10 40 2
529 | 3 8 1060 140 270 7620 880 10 20 1
530 | 3 8 1030 100 230 7740 900 0 0 2
531 | 3 8 1120 130 250 7530 970 0 0 3
532 | 3 8 1030 110 220 7760 980 0 0 2
533 | 3 8 1070 100 230 7600 990 10 0 1
534 | 3 8 1140 180 220 7610 850 10 10 2
535 | 3 8 1090 180 230 7590 860 10 40 2
536 | 3 8 980 110 300 7720 910 10 0 3
537 | 3 8 980 90 330 7540 1040 0 0 2
538 | 3 8 960 90 200 7810 940 0 0 2
539 | 3 8 990 90 210 7780 930 0 0 2
540 | 3 8 1060 120 210 7600 1010 0 0 1
541 | 3 8 1240 150 250 7610 730 10 10 1
542 | 3 8 1060 90 310 7850 690 0 0 2
543 | 3 8 1020 100 290 7620 960 0 10 2
544 | 3 8 970 90 220 7700 1020 0 0 3
545 | 3 8 1180 130 220 7450 1010 0 10 2
546 | 3 8 1060 140 240 7690 850 10 10 1
547 | 3 8 990 100 250 7630 1030 0 0 3
548 | 3 8 1010 90 350 7630 940 10 0 3
549 | 3 8 1040 90 250 7780 820 10 10 1
550 | 3 8 1040 90 250 7810 810 10 10 2
551 | 3 8 1020 90 350 7620 920 10 0 3
552 | 3 8 1020 90 260 7620 1010 0 0 3
553 | 3 8 1010 90 350 7610 930 10 0 3
554 | 3 8 920 110 340 7720 910 0 0 3
555 | 3 8 1030 100 250 7710 900 0 10 2
556 | 3 8 960 90 300 7820 830 0 0 3
557 | 3 8 1030 110 210 7810 840 0 0 1
558 | 3 8 1010 100 240 7710 910 10 20 2
559 | 3 8 1020 90 240 7800 850 0 0 2
560 | 3 8 1120 90 300 7650 830 0 10 1
561 | 3 8 1090 90 290 7710 800 10 0 2
562 | 3 8 1100 120 280 7630 770 10 10 2
563 | 3 8 1090 80 240 7820 760 10 0 2
564 | 3 8 1150 90 250 7720 810 0 10 3
565 | 3 8 1110 90 230 7810 750 0 10 2
566 | 3 8 1010 110 210 7720 950 0 0 1
567 | 3 8 1070 100 220 7730 870 10 10 2
568 | 3 8 1280 110 290 7490 790 10 10 2
569 | 3 8 1060 100 270 7740 810 10 10 3
570 | 3 8 1010 90 210 7720 970 0 0 2
571 | 3 8 990 120 250 7750 870 10 10 2
572 | 3 8 960 80 240 7950 740 10 20 2
573 | 


--------------------------------------------------------------------------------