├── DESCRIPTION ├── MD5 ├── NAMESPACE ├── R ├── censored.mean.R ├── composite.stack.R ├── dummy.code.R ├── gam.mask.R ├── gam.style.R ├── huber.R ├── mcqrnn.R ├── qquantile.nw.R ├── qrnn-rbf.R ├── qrnn.R ├── qrnn2.R ├── quantile.dtn.R ├── tilted.abs.R └── transfer.R ├── README.md ├── data └── YVRprecip.rda ├── inst └── CITATION └── man ├── YVRprecip.Rd ├── adam.Rd ├── censored.mean.Rd ├── composite.stack.Rd ├── dummy.code.Rd ├── gam.style.Rd ├── huber.Rd ├── mcqrnn.Rd ├── qrnn-package.Rd ├── qrnn-rbf.Rd ├── qrnn.cost.Rd ├── qrnn.fit.Rd ├── qrnn.initialize.Rd ├── qrnn.predict.Rd ├── qrnn2.Rd ├── quantile.dtn.Rd ├── tilted.abs.Rd └── transfer.Rd /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: qrnn 2 | Type: Package 3 | Title: Quantile Regression Neural Network 4 | Version: 2.1.1 5 | Authors@R: person(given="Alex J.", family="Cannon", 6 | email="alex.cannon@ec.gc.ca", role=c("aut", "cre"), 7 | comment=c(ORCID="0000-0002-8025-3790")) 8 | Description: Fit quantile regression neural network models with optional 9 | left censoring, partial monotonicity constraints, generalized additive 10 | model constraints, and the ability to fit multiple non-crossing quantile 11 | functions following Cannon (2011) 12 | and Cannon (2018) . 13 | Date: 2024-02-29 14 | License: GPL-2 15 | LazyLoad: yes 16 | Repository: CRAN 17 | NeedsCompilation: no 18 | Author: Alex J. Cannon [aut, cre] () 19 | Maintainer: Alex J. Cannon 20 | Packaged: 2024-02-29 19:22:04 UTC; rac001 21 | Date/Publication: 2024-02-29 22:30:12 UTC 22 | -------------------------------------------------------------------------------- /MD5: -------------------------------------------------------------------------------- 1 | c60ea52bfbea1fce30a99a9195515c78 *DESCRIPTION 2 | 25d7ed8a6ecda14dfbbc4868c7164d04 *NAMESPACE 3 | 5b556573ed5cac147cf00319800ec152 *R/censored.mean.R 4 | d9a671906fc8673cae7afbf94fa2e371 *R/composite.stack.R 5 | 10973fa8c93a6d3a37647239e0db3c3b *R/dummy.code.R 6 | 70135124ddbf84ebeb9ee6a0380652e5 *R/gam.mask.R 7 | f3c7b98d65bceb46745a1c05025f7a2f *R/gam.style.R 8 | e301bccca3d61624821eafe475632123 *R/huber.R 9 | 0a321710dbe27052b3093641a8e6be9c *R/mcqrnn.R 10 | 67c941ead7c5a6c6698e751cb5200b6a *R/qquantile.nw.R 11 | 6a8d084ac045a78dd671a752bf3a10d1 *R/qrnn-rbf.R 12 | 2576734a9522d2c255a9fc07694ce9a0 *R/qrnn.R 13 | 49c5a3ac581435450f322fdb3bc160bc *R/qrnn2.R 14 | 66e1246d2cd079be22416bd0a7f09a20 *R/quantile.dtn.R 15 | 0fcfe460ea1a483ebbb53b9a1ae26ab5 *R/tilted.abs.R 16 | 340d635152450645e4e2554857bcd1e9 *R/transfer.R 17 | 223987b9336d28ff9a2f3a862d6a16c8 *README.md 18 | 9e548031a8253a0a83b1646f06220646 *data/YVRprecip.rda 19 | 31c0e537e466a5864e67baffb69ea5a3 *inst/CITATION 20 | c0a89d514d7eb8b2387b41ad1c7e69d1 *man/YVRprecip.Rd 21 | 306f5d619e3f5a17762981965023e2af *man/adam.Rd 22 | f354e64966e7c02998460f06af0713b5 *man/censored.mean.Rd 23 | 54f360483c40ca919117fc915be89b07 *man/composite.stack.Rd 24 | 59a4c66edd4f0251fb0e199e6a977f14 *man/dummy.code.Rd 25 | 563d8a9ee3808e653a4267e91cd872c8 *man/gam.style.Rd 26 | 1de261206c277a00b066ccaa717c3325 *man/huber.Rd 27 | d57bd9dd53f9a30014e34d9279fe2fc5 *man/mcqrnn.Rd 28 | 5d5a34e1461ba5d15339f4728aa90d00 *man/qrnn-package.Rd 29 | 031612666629bf8e182b345366ef6907 *man/qrnn-rbf.Rd 30 | 9a131dfd45bbc902cbc1934035ce3c6d *man/qrnn.cost.Rd 31 | 7dc52ac4a4f98c55c39ebac47b879e05 *man/qrnn.fit.Rd 32 | 131cb3ce4d694ff9c790081bb6d516ea *man/qrnn.initialize.Rd 33 | 0252a78e00e1ec13ab992a6ad4a0b3b2 *man/qrnn.predict.Rd 34 | a7aee67072c0535a8a6bc4086b78d568 *man/qrnn2.Rd 35 | 0e13072f70ec4bbcbaaed90c8e22ddba *man/quantile.dtn.Rd 36 | bb3755702e88d2d48d32c79894a3e6ab *man/tilted.abs.Rd 37 | c185338987c9a8a3eea056932be23d50 *man/transfer.Rd 38 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | export(censored.mean) 2 | export(composite.stack) 3 | export(dquantile) 4 | export(dummy.code) 5 | export(elu.prime) 6 | export(elu) 7 | export(gam.style) 8 | export(hramp.prime) 9 | export(hramp) 10 | export(huber.prime) 11 | export(huber) 12 | export(linear.prime) 13 | export(linear) 14 | export(logistic.prime) 15 | export(logistic) 16 | export(lrelu.prime) 17 | export(lrelu) 18 | export(pquantile) 19 | export(pquantile.nw) 20 | export(qquantile) 21 | export(qquantile.nw) 22 | export(qrnn.cost) 23 | export(qrnn.fit) 24 | export(qrnn2.fit) 25 | export(qrnn.initialize) 26 | export(qrnn.predict) 27 | export(qrnn2.predict) 28 | export(mcqrnn.fit) 29 | export(mcqrnn.predict) 30 | export(qrnn.rbf) 31 | export(relu.prime) 32 | export(relu) 33 | export(rquantile) 34 | export(rquantile.nw) 35 | export(sigmoid.prime) 36 | export(sigmoid) 37 | export(softmax) 38 | export(softplus.prime) 39 | export(softplus) 40 | export(tilted.abs) 41 | export(tilted.approx.prime) 42 | export(tilted.approx) 43 | export(adam) 44 | importFrom(stats, nlm) 45 | importFrom(stats, approx) 46 | importFrom(stats, integrate) 47 | importFrom(stats, median) 48 | importFrom(stats, runif) 49 | importFrom(stats, uniroot) 50 | importFrom(stats, sd) 51 | importFrom(graphics, abline) 52 | importFrom(graphics, segments) 53 | importFrom(grDevices, dev.new) 54 | -------------------------------------------------------------------------------- /R/censored.mean.R: -------------------------------------------------------------------------------- 1 | censored.mean <- 2 | function(x, lower, trim = 0) 3 | { 4 | x.mean <- mean(x, trim = trim) 5 | x.median <- median(x) 6 | if (isTRUE(all.equal(x.median, lower))){ 7 | return(x.median) 8 | } else{ 9 | return(x.mean) 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /R/composite.stack.R: -------------------------------------------------------------------------------- 1 | composite.stack <- function(x, y, tau){ 2 | n <- nrow(x) 3 | x <- kronecker(rep(1, length(tau)), x) 4 | y <- kronecker(rep(1, length(tau)), y) 5 | tau <- rep(tau, each=n) 6 | list(x=x, y=y, tau=tau) 7 | } 8 | 9 | -------------------------------------------------------------------------------- /R/dummy.code.R: -------------------------------------------------------------------------------- 1 | dummy.code <- 2 | function(x) { 3 | if(!is.factor(x)) stop("\"x\" is not a factor") 4 | lvls <- levels(x) 5 | n.lvls <- length(lvls) 6 | lvls <- lvls[-n.lvls] 7 | categs <- matrix(0, ncol=n.lvls-1, nrow=length(x)) 8 | for(i in seq_along(lvls)) categs[x==lvls[i],i] <- 1 9 | colnames(categs) <- lvls 10 | attr(categs, "levels") <- levels(x) 11 | categs 12 | } 13 | -------------------------------------------------------------------------------- /R/gam.mask.R: -------------------------------------------------------------------------------- 1 | gam.mask <- 2 | function(x, n.hidden) 3 | { 4 | mask <- matrix(1, ncol(x)+1, n.hidden) 5 | nh <- ncol(mask)/ncol(x) 6 | ij <- matrix(seq(nh*ncol(x)), nrow=nh) 7 | for(i in seq(ncol(x))){ 8 | mask[i,c(ij[,-i])] <- 0 9 | } 10 | mask 11 | } 12 | -------------------------------------------------------------------------------- /R/gam.style.R: -------------------------------------------------------------------------------- 1 | gam.style <- 2 | function(x, parms, column, baseline=mean(x[,column]), 3 | epsilon=1e-5, seg.len=0.02, seg.cols="black", plot=TRUE, 4 | return.results=FALSE, trim=0, ...) 5 | { 6 | effect <- 7 | function(x, column, parms, baseline, ensemble.average, trim) 8 | { 9 | parms$lower <- -Inf 10 | x.baseline <- x 11 | x.baseline[,column] <- baseline 12 | p.baseline <- qrnn.predict(x.baseline, parms) 13 | p <- qrnn.predict(x, parms) 14 | if(ensemble.average){ 15 | p.baseline <- apply(p.baseline, 1, censored.mean, 16 | lower=parms$lower, trim=trim) 17 | p <- apply(p, 1, censored.mean, lower=parms$lower, trim=trim) 18 | } 19 | p - p.baseline 20 | } 21 | partial <- 22 | function(x, column, parms, epsilon, ensemble.average, trim) 23 | { 24 | parms$lower <- -Inf 25 | x.plus <- x.minus <- x 26 | x.plus[,column] <- x.plus[,column] + epsilon 27 | x.minus[,column] <- x.minus[,column] - epsilon 28 | p.plus <- qrnn.predict(x.plus, parms) 29 | p.minus <- qrnn.predict(x.minus, parms) 30 | if(ensemble.average){ 31 | p.plus <- apply(p.plus, 1, censored.mean, lower=parms$lower, 32 | trim=trim) 33 | p.minus <- apply(p.minus, 1, censored.mean, lower=parms$lower, 34 | trim=trim) 35 | } 36 | (p.plus - p.minus)/(2*epsilon) 37 | } 38 | effects <- effect(x, column, parms, baseline, plot, trim) 39 | partials <- partial(x, column, parms, epsilon, plot, trim) 40 | if(plot){ 41 | if(is.null(colnames(x))) 42 | colnames(x) <- paste("x", 1:ncol(x), sep="") 43 | xlab <- colnames(x)[column] 44 | x.var <- x[,column] 45 | if(length(seg.cols)==1) seg.cols <- rep(seg.cols, length(x.var)) 46 | ylab <- paste("Effects: tau =", parms$tau) 47 | theta <- atan(partials) 48 | ymin <- min(effects) 49 | ymax <- max(effects) 50 | xmin <- min(x.var) 51 | xmax <- max(x.var) 52 | aspect <- (ymax - ymin)/(xmax - xmin) 53 | xdev <- seg.len*(xmax-xmin)*cos(theta) 54 | ydev <- seg.len*(xmax-xmin)*sin(theta) 55 | scale <- sqrt(xdev**2 + (ydev/aspect)**2) 56 | xdev <- xdev*(seg.len*(xmax-xmin))/scale 57 | ydev <- ydev*(seg.len*(xmax-xmin))/scale 58 | dev.new() 59 | plot(x.var, effects, type="n", xlab=xlab, ylab=ylab, ...) 60 | for(case in seq_along(x.var)){ 61 | xi <- x.var[case] 62 | yi <- effects[case] 63 | xd <- xdev[case] 64 | yd <- ydev[case] 65 | segments(xi, yi, xi+xd, yi+yd, col=seg.cols[case]) 66 | } 67 | abline(v=baseline, lty=3) 68 | } 69 | if(return.results) return(list(effects=effects, partials=partials)) 70 | } 71 | -------------------------------------------------------------------------------- /R/huber.R: -------------------------------------------------------------------------------- 1 | huber.prime <- 2 | function(x, eps) 3 | { 4 | dh <- x/eps 5 | dh[x>eps] <- 1 6 | dh[x< -eps] <- -1 7 | dh[is.nan(dh)] <- 0 8 | dh 9 | } 10 | 11 | huber <- 12 | function(x, eps) 13 | { 14 | h <- ifelse(abs(x)>eps, abs(x)-eps/2, (x^2)/(2*eps)) 15 | h[is.nan(h)] <- 0 16 | h 17 | } 18 | 19 | tilted.approx.prime <- 20 | function(x, tau, eps) 21 | { 22 | ifelse(x>0, tau*huber.prime(x, eps), (1-tau)*huber.prime(x, eps)) 23 | } 24 | tilted.approx <- 25 | function(x, tau, eps) 26 | { 27 | ifelse(x>0, tau*huber(x, eps), (1-tau)*huber(x, eps)) 28 | } 29 | hramp.prime <- 30 | function(x, lower, eps) 31 | { 32 | if(length(lower) > 1){ 33 | mapply(hramp.prime, x, lower, eps) 34 | } else{ 35 | if (lower==-Inf){ 36 | return(1) 37 | } else{ 38 | dhr <- (x-lower)/eps 39 | dhr[x>(lower+eps)] <- 1 40 | dhr[x 1){ 49 | mapply(hramp, x, lower, eps) 50 | } else{ 51 | if (lower==-Inf){ 52 | return(x) 53 | } else{ 54 | return(ifelse(x>lower, huber(x-lower, eps), 0)+lower) 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /R/mcqrnn.R: -------------------------------------------------------------------------------- 1 | mcqrnn.fit <- function(x, y, n.hidden=2, n.hidden2=NULL, w=NULL, 2 | tau=c(0.1, 0.5, 0.9), iter.max=5000, 3 | n.trials=5, lower=-Inf, 4 | init.range = c(-0.5, 0.5, -0.5, 0.5, -0.5, 0.5), 5 | monotone=NULL, eps.seq=2^seq(-8, -32, by=-4), 6 | Th=sigmoid, Th.prime=sigmoid.prime, penalty=0, 7 | n.errors.max=10, trace=TRUE, 8 | method=c("nlm", "adam"), scale.y=TRUE, ...){ 9 | if(length(tau)==1 && is.integer(tau)){ 10 | if(trace) cat(paste("Stochastic estimation of quantile regression process using", tau, "samples\n")) 11 | xs <- matrix(NA, ncol=ncol(x), nrow=tau) 12 | ys <- matrix(NA, ncol=1, nrow=tau) 13 | taus <- rep(NA, tau) 14 | if(!is.null(w)) ws <- rep(NA, tau) 15 | for(i in seq(tau)){ 16 | case.i <- sample(nrow(x), size=1) 17 | x.i <- x[case.i,,drop=FALSE] 18 | y.i <- y[case.i,,drop=FALSE] 19 | tau.i <- runif(1) 20 | x.y.tau.i <- composite.stack(x.i, y.i, tau.i) 21 | xs[i,] <- x.y.tau.i$x 22 | ys[i,] <- x.y.tau.i$y 23 | taus[i] <- x.y.tau.i$tau 24 | if(!is.null(w)) ws[i] <- w[case.i] 25 | } 26 | xs <- cbind(taus, xs) 27 | if(!is.null(w)) w <- ws 28 | } else{ 29 | if(length(tau)==1) 30 | stop("Improper \'tau\' for stochastic estimation of quantile regression process (e.g., \'tau\' is not an integer)") 31 | x.y.tau <- composite.stack(x, y, tau) 32 | taus <- x.y.tau$tau 33 | xs <- cbind(taus, x.y.tau$x) 34 | ys <- x.y.tau$y 35 | } 36 | if(is.null(monotone)){ 37 | monotone <- 1 38 | } else{ 39 | monotone <- c(1, monotone+1) 40 | } 41 | if(is.null(n.hidden2)){ 42 | if(!is.list(init.range) && length(init.range) > 4) 43 | init.range <- init.range[1:4] 44 | parms <- qrnn.fit(x=xs, y=ys, n.hidden=n.hidden, w=w, tau=taus, 45 | n.ensemble=1, iter.max=iter.max, 46 | n.trials=n.trials, bag=FALSE, lower=lower, 47 | init.range=init.range, monotone=monotone, 48 | eps.seq=eps.seq, Th=Th, Th.prime=Th.prime, 49 | penalty=penalty, unpenalized=1, 50 | n.errors.max=n.errors.max, trace=trace, 51 | scale.y=scale.y, ...) 52 | } else{ 53 | parms <- qrnn2.fit(x=xs, y=ys, n.hidden=n.hidden, n.hidden2=n.hidden2, 54 | w=w, tau=taus, n.ensemble=1, iter.max=iter.max, 55 | n.trials=n.trials, bag=FALSE, lower=lower, 56 | init.range=init.range, monotone=monotone, 57 | eps.seq=eps.seq, Th=Th, Th.prime=Th.prime, 58 | penalty=penalty, unpenalized=1, 59 | n.errors.max=n.errors.max, trace=trace, 60 | method=method, scale.y=scale.y, ...) 61 | } 62 | parms 63 | } 64 | 65 | mcqrnn.predict <- function(x, parms, tau=NULL){ 66 | if(is.null(tau)) tau <- unique(parms$tau) 67 | x.tau.stack <- composite.stack(x, NA, tau) 68 | taus <- x.tau.stack$tau 69 | xs <- cbind(taus, x.tau.stack$x) 70 | if(any(grepl('W3', sapply(parms$weights, names)))){ 71 | pred <- matrix(qrnn2.predict(xs, parms), ncol=length(tau)) 72 | } else{ 73 | pred <- matrix(qrnn.predict(xs, parms), ncol=length(tau)) 74 | } 75 | colnames(pred) <- paste0('tau=', tau) 76 | pred 77 | } -------------------------------------------------------------------------------- /R/qquantile.nw.R: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Quantile, cumulative distribution, and random variate functions based on 3 | # interpolation of [tau, quant] pairs using the Nadaraya-Watson estimator 4 | # with a beta kernel (bandwith = h): 5 | # Passow, C., R.V. Donner, 2020. Regression-based distribution mapping for 6 | # bias correction of climate model outputs using linear quantile regression. 7 | # Stochastic Environmental Research and Risk Assessment, 34:87-102. 8 | # doi:10.1007/s00477-019-01750-7 9 | 10 | qquantile.nw <- function(p, tau, quant, h=0.001){ 11 | # Quantile function based on [tau, quant] pairs 12 | K <- function(p, tau, h){ 13 | ((p^(tau/h))*(1-p)^((1-tau)/h))/ 14 | beta(tau/h + 1, (1-tau)/h + 1) 15 | } 16 | q <- sum(K(p, tau, h)*quant)/sum(K(p, tau, h)) 17 | q 18 | } 19 | 20 | pquantile.nw <- function(q, tau, quant, h=0.001, ...){ 21 | # Cumulative distribution function based on [tau, quant] pairs 22 | func <- function(p, q, tau, quant, h){ 23 | qq <- qquantile.nw(p, tau=tau, quant=quant, h=h) 24 | q-qq 25 | } 26 | p <- uniroot(f=func, q=q, lower=min(tau), upper=max(tau), tau=tau, 27 | quant=quant, h=h, ...)$root 28 | p 29 | } 30 | 31 | rquantile.nw <- function(n, tau, quant, h=0.001){ 32 | # Random variate function based on [tau, quant] pairs 33 | sapply(runif(n), qquantile.nw, tau=tau, quant=quant, h=h) 34 | } 35 | 36 | ################################################################################ -------------------------------------------------------------------------------- /R/qrnn-rbf.R: -------------------------------------------------------------------------------- 1 | qrnn.rbf <- 2 | function(x, x.basis, sigma) 3 | { 4 | kern <- matrix(0, nrow=nrow(x), ncol=nrow(x.basis)) 5 | for (k in seq(nrow(x.basis))){ 6 | x.basis.test <- matrix(x.basis[k,], nrow=nrow(x), 7 | ncol=ncol(x.basis), byrow=TRUE) 8 | kern[,k] <- exp(-apply(((x-x.basis.test)^2)/(2*sigma^2), 1, sum)) 9 | } 10 | kern 11 | } 12 | -------------------------------------------------------------------------------- /R/qrnn.R: -------------------------------------------------------------------------------- 1 | qrnn.cost <- 2 | function(weights, x, y, n.hidden, w, tau, lower, monotone, additive, eps, 3 | Th, Th.prime, penalty, unpenalized) 4 | { 5 | penalty2 <- ifelse(identical(Th, linear), penalty, 0) 6 | w1w2 <- qrnn.reshape(x, y, weights, n.hidden) 7 | W1 <- w1w2$W1; rW1 <- nrow(W1); cW1 <- ncol(W1) 8 | W2 <- w1w2$W2; rW2 <- nrow(W2); cW2 <- ncol(W2) 9 | if (!is.null(monotone)) { 10 | W1[monotone,] <- exp(W1[monotone,]) 11 | W2[1:(rW2-1), ] <- exp(W2[1:(rW2-1),]) 12 | } 13 | if(!is.logical(additive)){ 14 | W1 <- W1*additive 15 | } 16 | # Forward pass 17 | x <- cbind(x, 1) 18 | h1 <- x %*% W1 19 | y1 <- Th(h1) 20 | aug.y1 <- cbind(y1, 1) 21 | h2 <- aug.y1 %*% W2 22 | y2 <- hramp(h2, lower, eps) 23 | E <- y-y2 24 | # Backward pass 25 | delta2 <- hramp.prime(h2, lower, eps)*tilted.approx.prime(E, tau, eps) 26 | gradient.W2 <- -(t(aug.y1) %*% sweep(delta2, 1, w, '*')) 27 | if (!is.null(monotone)){ 28 | gradient.W2[1:(rW2-1),] <- gradient.W2[1:(rW2-1),]*W2[1:(rW2-1),] 29 | } 30 | gradient.W2.penalty <- 2*penalty2*rbind(W2[1:(rW2-1),,drop=FALSE], 0)/ 31 | (length(W2)-cW2) 32 | E1 <- delta2 %*% t(W2[1:(rW2-1),,drop=FALSE]) 33 | delta1 <- Th.prime(h1)*E1 34 | gradient.W1 = -(t(x) %*% sweep(delta1, 1, w, '*')) 35 | if (!is.null(monotone)){ 36 | gradient.W1[monotone,] <- gradient.W1[monotone,]*W1[monotone,] 37 | } 38 | W1p <- W1; W1p[c(unpenalized, rW1),] <- 0 39 | gradient.W1.penalty <- 2*penalty*W1p/sum(W1p != 0) 40 | # Error & gradient 41 | cost <- sum(w*tilted.approx(E, tau, eps)) + 42 | penalty*sum(W1p^2)/sum(W1p != 0) + 43 | penalty2*sum(W2[1:(rW2-1),,drop=FALSE]^2)/(length(W2)-cW2) 44 | gradient <- c(gradient.W1 + gradient.W1.penalty, 45 | gradient.W2 + gradient.W2.penalty) 46 | attr(cost, "gradient") <- gradient 47 | cost 48 | } 49 | qrnn.eval <- 50 | function(x, W1, W2, lower, monotone, additive, eps, Th) 51 | { 52 | if (!is.null(monotone)) { 53 | W1[monotone, ] <- exp(W1[monotone, ]) 54 | W2[1:(nrow(W2) - 1), ] <- exp(W2[1:(nrow(W2) - 1),]) 55 | } 56 | if(!is.logical(additive)){ 57 | W1 <- W1*additive 58 | } 59 | x <- cbind(x, 1) 60 | h1 <- x %*% W1 61 | y1 <- Th(h1) 62 | aug.y1 <- cbind(y1, 1) 63 | y2 <- aug.y1 %*% W2 64 | y2 <- hramp(y2, lower, eps) 65 | y2 66 | } 67 | qrnn.fit <- 68 | function(x, y, n.hidden, w=NULL, tau=0.5, n.ensemble=1, iter.max=5000, 69 | n.trials=5, bag=FALSE, lower=-Inf, init.range=c(-0.5, 0.5, -0.5, 0.5), 70 | monotone=NULL, additive=FALSE, eps.seq=2^seq(-8, -32, by=-4), 71 | Th=sigmoid, Th.prime=sigmoid.prime, penalty=0, unpenalized=NULL, 72 | n.errors.max=10, trace=TRUE, scale.y=TRUE, ...) 73 | 74 | { 75 | if (!is.matrix(x)) stop("\"x\" must be a matrix") 76 | if (!is.matrix(y)) stop("\"y\" must be a matrix") 77 | if (any(is.na(c(x, y)))) stop("missing values in \"x\" or \"y\"") 78 | if (any(apply(x, 2, sd) < .Machine$double.eps^0.5)) stop("zero variance column(s) in \"x\"") 79 | if (ncol(y) != 1) stop("\"y\" must be univariate") 80 | if (any((tau > 1) | (tau < 0))) stop("invalid \"tau\"") 81 | if (!identical(Th, linear) && missing(n.hidden)) stop("must specify \"n.hidden\"") 82 | if (identical(Th, linear)) n.hidden <- 1 83 | is.whole <- function(x, tol = .Machine$double.eps^0.5) 84 | abs(x - round(x)) < tol 85 | if (additive && !is.whole(n.hidden/ncol(x))) stop("\"n.hidden\" must be an integer multiple of \"ncol(x)\" when \"additive=TRUE\"") 86 | if(is.null(w)) w <- rep(1/nrow(y), nrow(y)) 87 | if (any(w < 0)) stop("invalid \"w\"") 88 | x <- scale(x) 89 | x.center <- attr(x, "scaled:center") 90 | x.scale <- attr(x, "scaled:scale") 91 | if(scale.y){ 92 | y <- scale(y) 93 | } else{ 94 | attr(y, "scaled:center") <- 0 95 | attr(y, "scaled:scale") <- 1 96 | } 97 | y.center <- attr(y, "scaled:center") 98 | y.scale <- attr(y, "scaled:scale") 99 | lower.scaled <- (lower-y.center)/y.scale 100 | if(additive) 101 | additive <- gam.mask(x, n.hidden) 102 | weights <- vector("list", n.ensemble) 103 | if(trace){ 104 | if(length(unique(tau)) <= 100){ 105 | cat("tau =", unique(tau), "\n", sep=" ") 106 | } else{ 107 | cat("tau = [", range(tau), "]; n.tau =", length(tau), "\n", sep=" ") 108 | } 109 | } 110 | for (i in seq(n.ensemble)){ 111 | if(trace) cat(i, "/", n.ensemble, "\n", sep="") 112 | w.tmp <- NA 113 | class(w.tmp) <- "try-error" 114 | n.errors <- 0 115 | while (inherits(w.tmp, "try-error")) { 116 | w.tmp <- try(qrnn.nlm(x, y, n.hidden, w, tau, iter.max, 117 | n.trials, bag, lower.scaled, 118 | init.range, monotone, additive, eps.seq, 119 | Th, Th.prime, penalty, unpenalized, 120 | trace, ...), 121 | silent = TRUE) 122 | n.errors <- n.errors + 1 123 | if (n.errors > n.errors.max) stop("nlm optimization failed") 124 | } 125 | weights[[i]] <- w.tmp 126 | } 127 | if(trace) cat("\n") 128 | parms <- list(weights=weights, lower=lower, eps.seq=eps.seq, 129 | tau=tau, Th=Th, x.center=x.center, 130 | x.scale=x.scale, y.center=y.center, y.scale=y.scale, 131 | monotone=monotone, additive=additive) 132 | parms 133 | } 134 | qrnn.initialize <- 135 | function(x, y, n.hidden, init.range=c(-0.5, 0.5, -0.5, 0.5)) 136 | { 137 | if(!is.list(init.range)){ 138 | if(length(init.range)==4){ 139 | r11 <- init.range[1] 140 | r12 <- init.range[2] 141 | r21 <- init.range[3] 142 | r22 <- init.range[4] 143 | } else{ 144 | r11 <- r21 <- init.range[1] 145 | r12 <- r22 <- init.range[2] 146 | } 147 | W1 <- matrix(runif((ncol(x)+1)*n.hidden, r11, r12), ncol(x)+1, n.hidden) 148 | W2 <- matrix(runif((n.hidden+1)*ncol(y), r21, r22), n.hidden+1, ncol(y)) 149 | weights <- c(W1, W2) 150 | } else{ 151 | weights <- unlist(init.range) 152 | } 153 | weights 154 | } 155 | qrnn.nlm <- 156 | function(x, y, n.hidden, w, tau, iter.max, n.trials, bag, lower, init.range, 157 | monotone, additive, eps.seq, Th, Th.prime, penalty, unpenalized, 158 | trace, ...) 159 | { 160 | cases <- seq(nrow(x)) 161 | if (bag) cases <- sample(nrow(x), replace=TRUE) 162 | x <- x[cases,,drop=FALSE] 163 | y <- y[cases,,drop=FALSE] 164 | w <- w[cases] 165 | if(length(tau) > 1) tau <- tau[cases] 166 | if(length(lower) > 1) lower <- lower[cases] 167 | eps.seq <- sort(eps.seq, decreasing=TRUE) 168 | cost.best <- Inf 169 | for(i in seq(n.trials)){ 170 | weights <- qrnn.initialize(x, y, n.hidden, init.range) 171 | if(any(lower != -Inf)){ 172 | for(eps in eps.seq){ 173 | fit <- suppressWarnings(nlm(qrnn.cost, weights, 174 | iterlim=iter.max, x=x, y=y, n.hidden=n.hidden, 175 | w=w, tau=tau, lower=-Inf, monotone=monotone, 176 | additive=additive, eps=eps, Th=Th, 177 | Th.prime=Th.prime, penalty=penalty, 178 | unpenalized=unpenalized, 179 | check.analyticals=FALSE, ...)) 180 | weights <- fit$estimate 181 | } 182 | } 183 | for(eps in eps.seq){ 184 | fit <- suppressWarnings(nlm(qrnn.cost, weights, iterlim=iter.max, 185 | x=x, y=y, n.hidden=n.hidden, w=w, tau=tau, lower=lower, 186 | monotone=monotone, additive=additive, eps=eps, Th=Th, 187 | Th.prime=Th.prime, penalty=penalty, 188 | unpenalized=unpenalized, 189 | check.analyticals=FALSE, ...)) 190 | weights <- fit$estimate 191 | } 192 | cost <- fit$minimum 193 | if(trace) cat(i, cost, "\n") 194 | if(cost < cost.best){ 195 | cost.best <- cost 196 | weights.best <- fit$estimate 197 | } 198 | } 199 | if(trace) cat("*", cost.best, "\n") 200 | weights.best <- qrnn.reshape(x, y, weights.best, n.hidden) 201 | if(!is.logical(additive)){ 202 | weights.best$W1 <- weights.best$W1*additive 203 | } 204 | weights.best 205 | } 206 | qrnn.predict <- 207 | function(x, parms) 208 | { 209 | if (!is.matrix(x)) stop("\"x\" must be a matrix") 210 | weights <- parms$weights 211 | lower <- parms$lower 212 | monotone <- parms$monotone 213 | additive <- parms$additive 214 | eps <- min(parms$eps.seq) 215 | Th <- parms$Th 216 | x.center <- parms$x.center 217 | x.scale <- parms$x.scale 218 | y.center <- parms$y.center 219 | y.scale <- parms$y.scale 220 | lower <- (lower-y.center)/y.scale 221 | x <- sweep(x, 2, x.center, "-") 222 | x <- sweep(x, 2, x.scale, "/") 223 | y.bag <- matrix(0, ncol=length(weights), nrow=nrow(x)) 224 | for (i in seq_along(weights)){ 225 | y.bag[,i] <- qrnn.eval(x, weights[[i]]$W1, weights[[i]]$W2, 226 | lower, monotone, additive, eps, Th) 227 | y.bag[,i] <- y.bag[,i]*y.scale + y.center 228 | } 229 | y.bag 230 | } 231 | qrnn.reshape <- 232 | function(x, y, weights, n.hidden) 233 | { 234 | N11 <- ncol(x)+1 235 | N12 <- n.hidden 236 | N1 <- N11*N12 237 | W1 <- weights[1:N1] 238 | W1 <- matrix(W1, N11, N12) 239 | N21 <- n.hidden+1 240 | N22 <- ncol(y) 241 | N2 <- N1 + N21*N22 242 | W2 <- weights[(N1+1):N2] 243 | W2 <- matrix(W2, N21, N22) 244 | list(W1=W1, W2=W2) 245 | } 246 | -------------------------------------------------------------------------------- /R/qrnn2.R: -------------------------------------------------------------------------------- 1 | qrnn2.cost <- 2 | function(weights, x, y, n.hidden, n.hidden2, w, tau, lower, monotone, eps, 3 | Th, Th.prime, penalty, unpenalized) 4 | { 5 | w1w2w3 <- qrnn2.reshape(x, y, weights, n.hidden, n.hidden2) 6 | W1 <- w1w2w3$W1; rW1 <- nrow(W1); cW1 <- ncol(W1) 7 | W2 <- w1w2w3$W2; rW2 <- nrow(W2); cW2 <- ncol(W2) 8 | W3 <- w1w2w3$W3; rW3 <- nrow(W3); cW3 <- ncol(W3) 9 | if (!is.null(monotone)) { 10 | W1[monotone,] <- exp(W1[monotone,]) 11 | W2[1:(rW2-1), ] <- exp(W2[1:(rW2-1),]) 12 | W3[1:(rW3-1), ] <- exp(W3[1:(rW3-1),]) 13 | } 14 | # Forward pass 15 | x <- cbind(x, 1) 16 | h1 <- x %*% W1 17 | y1 <- Th(h1) 18 | aug.y1 <- cbind(y1, 1) 19 | h2 <- aug.y1 %*% W2 20 | y2 <- Th(h2) 21 | aug.y2 <- cbind(y2, 1) 22 | h3 <- aug.y2 %*% W3 23 | y3 <- hramp(h3, lower, eps) 24 | E <- y-y3 25 | # Backward pass 26 | delta3 <- hramp.prime(h3, lower, eps)*tilted.approx.prime(E, tau, eps) 27 | gradient.W3 <- -(t(aug.y2) %*% sweep(delta3, 1, w, "*")) 28 | if (!is.null(monotone)){ 29 | gradient.W3[1:(rW3-1),] <- gradient.W3[1:(rW3-1),]*W3[1:(rW3-1),] 30 | } 31 | E2 <- delta3 %*% t(W3[1:(rW3-1),,drop=FALSE]) 32 | delta2 <- Th.prime(h2)*E2 33 | gradient.W2 <- -(t(aug.y1) %*% sweep(delta2, 1, w, "*")) 34 | if (!is.null(monotone)){ 35 | gradient.W2[1:(rW2-1),] <- gradient.W2[1:(rW2-1),]*W2[1:(rW2-1),] 36 | } 37 | gradient.W2.penalty <- 2*penalty*rbind(W2[1:(rW2-1),,drop=FALSE], 0)/ 38 | (length(W2)-cW2) 39 | E1 <- delta2 %*% t(W2[1:(rW2-1),,drop=FALSE]) 40 | delta1 <- Th.prime(h1)*E1 41 | gradient.W1 = -(t(x) %*% sweep(delta1, 1, w, "*")) 42 | if (!is.null(monotone)){ 43 | gradient.W1[monotone,] <- gradient.W1[monotone,]*W1[monotone,] 44 | } 45 | W1p <- W1; W1p[c(unpenalized, rW1),] <- 0 46 | gradient.W1.penalty <- 2*penalty*W1p/sum(W1p != 0) 47 | # Error & gradient 48 | cost <- sum(w*tilted.approx(E, tau, eps)) + 49 | penalty*sum(W1p^2)/sum(W1p != 0) + 50 | penalty*sum(W2[1:(rW2-1),,drop=FALSE]^2)/(length(W2)-cW2) 51 | gradient <- c(gradient.W1 + gradient.W1.penalty, 52 | gradient.W2 + gradient.W2.penalty, 53 | gradient.W3) 54 | attr(cost, "gradient") <- gradient 55 | cost 56 | } 57 | qrnn2.eval <- 58 | function(x, W1, W2, W3, lower, monotone, eps, Th) 59 | { 60 | if (!is.null(monotone)) { 61 | W1[monotone, ] <- exp(W1[monotone, ]) 62 | W2[1:(nrow(W2) - 1), ] <- exp(W2[1:(nrow(W2) - 1),]) 63 | W3[1:(nrow(W3) - 1), ] <- exp(W3[1:(nrow(W3) - 1),]) 64 | } 65 | x <- cbind(x, 1) 66 | h1 <- x %*% W1 67 | y1 <- Th(h1) 68 | aug.y1 <- cbind(y1, 1) 69 | h2 <- aug.y1 %*% W2 70 | y2 <- Th(h2) 71 | aug.y2 <- cbind(y2, 1) 72 | h3 <- aug.y2 %*% W3 73 | y3 <- hramp(h3, lower, eps) 74 | y3 75 | } 76 | qrnn2.initialize <- 77 | function(x, y, n.hidden, n.hidden2, 78 | init.range=c(-0.5, 0.5, -0.5, 0.5, -0.5, 0.5)) 79 | { 80 | if(!is.list(init.range)){ 81 | if(length(init.range)==6){ 82 | r11 <- init.range[1] 83 | r12 <- init.range[2] 84 | r21 <- init.range[3] 85 | r22 <- init.range[4] 86 | r31 <- init.range[5] 87 | r32 <- init.range[6] 88 | } else{ 89 | r11 <- r21 <- r31 <- init.range[1] 90 | r12 <- r22 <- r32 <- init.range[2] 91 | } 92 | W1 <- matrix(runif((ncol(x)+1)*n.hidden, r11, r12), ncol(x)+1, n.hidden) 93 | W2 <- matrix(runif((n.hidden+1)*n.hidden2, r21, r22), n.hidden+1, n.hidden2) 94 | W3 <- matrix(runif((n.hidden2+1)*ncol(y), r31, r32), n.hidden2+1, ncol(y)) 95 | weights <- c(W1, W2, W3) 96 | } else{ 97 | weights <- unlist(init.range) 98 | } 99 | weights 100 | } 101 | adam <- function(f, p, x, y, w, tau, ..., iterlim=5000, iterbreak=iterlim, 102 | alpha=0.01, minibatch=nrow(x), beta1=0.9, beta2=0.999, 103 | epsilon=1e-8, print.level=10){ 104 | minibatch <- min(minibatch, nrow(x)) 105 | if(minibatch < 1) minibatch <- 1 106 | minibatches <- suppressWarnings(matrix(seq_along(y), nrow=minibatch)) 107 | f.best <- f(p, x=x, y=y, w=w, tau=tau, ...) 108 | p.best <- p 109 | i.break <- 0 110 | M <- R <- p*0 111 | if(print.level > 0) 112 | cat("minibatch = ", minibatch, "\n", 0, f.best, i.break, f.best, "\n") 113 | for(iter in seq(iterlim)){ 114 | cases.random <- sample(nrow(x)) 115 | for(i in seq(ncol(minibatches))){ 116 | cases <- cases.random[minibatches[,i]] 117 | grad <- attr(f(p, x=x[cases,,drop=FALSE], y=y[cases,,drop=FALSE], 118 | w=w[cases], tau=tau[cases], ...), "gradient") 119 | M <- beta1*M + (1-beta1)*grad 120 | R <- beta2*R + (1-beta2)*grad^2 121 | m_k_hat <- M/(1-beta1^iter) 122 | r_k_hat <- R/(1-beta2^iter) 123 | p <- p - alpha*m_k_hat/(sqrt(r_k_hat) + epsilon) 124 | } 125 | f.iter <- f(p, x=x, y=y, w=w, tau=tau, ...) 126 | if(f.iter < f.best){ 127 | f.best <- f.iter 128 | p.best <- p 129 | i.break <- 0 130 | } else{ 131 | i.break <- i.break + 1 132 | } 133 | if(print.level > 0 && iter%%print.level==0) 134 | cat(iter, f.iter, i.break, f.best, "\n") 135 | if(i.break > iterbreak){ 136 | break 137 | } 138 | } 139 | if(print.level > 0) cat("****", f.best, "\n") 140 | list(estimate=p.best, minimum=f.best) 141 | } 142 | qrnn2.fit <- 143 | function(x, y, n.hidden=2, n.hidden2=2, w=NULL, tau=0.5, n.ensemble=1, 144 | iter.max=5000, n.trials=5, bag=FALSE, lower=-Inf, 145 | init.range=c(-0.5, 0.5, -0.5, 0.5, -0.5, 0.5), 146 | monotone=NULL, eps.seq=2^seq(-8, -32, by=-4), Th=sigmoid, 147 | Th.prime=sigmoid.prime, penalty=0, unpenalized=NULL, n.errors.max=10, 148 | trace=TRUE, method=c("nlm", "adam"), scale.y=TRUE, ...) 149 | 150 | { 151 | method <- match.arg(method) 152 | if (!is.matrix(x)) stop("\"x\" must be a matrix") 153 | if (!is.matrix(y)) stop("\"y\" must be a matrix") 154 | if (ncol(y) != 1) stop("\"y\" must be univariate") 155 | if (any(is.na(c(x, y)))) stop("missing values in \"x\" or \"y\"") 156 | if (any(apply(x, 2, sd) < .Machine$double.eps^0.5)) stop("zero variance column(s) in \"x\"") 157 | if (any((tau > 1) | (tau < 0))) stop("invalid \"tau\"") 158 | if (identical(Th, linear)) 159 | stop("use \"qrnn.fit\" for linear models") 160 | if(is.null(w)) w <- rep(1/nrow(y), nrow(y)) 161 | if (any(w < 0)) stop("invalid \"w\"") 162 | x <- scale(x) 163 | x.center <- attr(x, "scaled:center") 164 | x.scale <- attr(x, "scaled:scale") 165 | if(scale.y){ 166 | y <- scale(y) 167 | } else{ 168 | attr(y, "scaled:center") <- 0 169 | attr(y, "scaled:scale") <- 1 170 | } 171 | y.center <- attr(y, "scaled:center") 172 | y.scale <- attr(y, "scaled:scale") 173 | lower.scaled <- (lower-y.center)/y.scale 174 | weights <- vector("list", n.ensemble) 175 | if(trace){ 176 | if(length(unique(tau)) <= 100){ 177 | cat("tau =", unique(tau), "\n", sep=" ") 178 | } else{ 179 | cat("tau = [", range(tau), "]; n.tau =", length(tau), "\n", sep=" ") 180 | } 181 | } 182 | for (i in seq(n.ensemble)){ 183 | if(trace) cat(i, "/", n.ensemble, "\n", sep="") 184 | w.tmp <- NA 185 | class(w.tmp) <- "try-error" 186 | n.errors <- 0 187 | while (inherits(w.tmp, "try-error")) { 188 | w.tmp <- try(qrnn2.optimize(x, y, n.hidden, n.hidden2, w, tau, 189 | iter.max, n.trials, bag, lower.scaled, 190 | init.range, monotone, eps.seq, Th, Th.prime, 191 | penalty, unpenalized, trace, method, ...), 192 | silent = TRUE) 193 | n.errors <- n.errors + 1 194 | if (n.errors > n.errors.max) stop("optimization failed") 195 | } 196 | weights[[i]] <- w.tmp 197 | } 198 | if(trace) cat("\n") 199 | parms <- list(weights=weights, lower=lower, eps.seq=eps.seq, 200 | tau=tau, Th=Th, x.center=x.center, 201 | x.scale=x.scale, y.center=y.center, y.scale=y.scale, 202 | monotone=monotone) 203 | parms 204 | } 205 | qrnn2.optimize <- 206 | function(x, y, n.hidden, n.hidden2, w, tau, iter.max, n.trials, bag, lower, 207 | init.range, monotone, eps.seq, Th, Th.prime, penalty, unpenalized, 208 | trace, method, ...) 209 | { 210 | cases <- seq(nrow(x)) 211 | if (bag) cases <- sample(nrow(x), replace=TRUE) 212 | x <- x[cases,,drop=FALSE] 213 | y <- y[cases,,drop=FALSE] 214 | w <- w[cases] 215 | if(length(lower) > 1) lower <- lower[cases] 216 | if(length(tau)==1) tau <- rep(tau, length(y)) 217 | tau <- tau[cases] 218 | eps.seq <- sort(eps.seq, decreasing=TRUE) 219 | cost.best <- Inf 220 | for(i in seq(n.trials)){ 221 | weights <- qrnn2.initialize(x, y, n.hidden, n.hidden2, init.range) 222 | if(any(lower != -Inf)){ 223 | for(eps in eps.seq){ 224 | if(method=="nlm"){ 225 | fit <- suppressWarnings(nlm(qrnn2.cost, weights, 226 | iterlim=iter.max, x=x, y=y, n.hidden=n.hidden, 227 | n.hidden2=n.hidden2, w=w, tau=tau, lower=-Inf, 228 | monotone=monotone, eps=eps, Th=Th, 229 | Th.prime=Th.prime, penalty=penalty, 230 | unpenalized=unpenalized, 231 | check.analyticals=FALSE, ...)) 232 | } else if(method=="adam"){ 233 | fit <- suppressWarnings(adam(qrnn2.cost, weights, x, y, 234 | w, tau, iterlim=iter.max, n.hidden=n.hidden, 235 | n.hidden2=n.hidden2, lower=-Inf, 236 | monotone=monotone, eps=eps, Th=Th, 237 | Th.prime=Th.prime, penalty=penalty, 238 | unpenalized=unpenalized, ...)) 239 | } 240 | weights <- fit$estimate 241 | } 242 | } 243 | for(eps in eps.seq){ 244 | if(method=="nlm"){ 245 | fit <- suppressWarnings(nlm(qrnn2.cost, weights, 246 | iterlim=iter.max, x=x, y=y, n.hidden=n.hidden, 247 | n.hidden2=n.hidden2, w=w, tau=tau, lower=lower, 248 | monotone=monotone, eps=eps, Th=Th, Th.prime=Th.prime, 249 | penalty=penalty, unpenalized=unpenalized, 250 | check.analyticals=FALSE, ...)) 251 | } else if(method=="adam"){ 252 | fit <- suppressWarnings(adam(qrnn2.cost, weights, x, y, w, tau, 253 | iterlim=iter.max, n.hidden=n.hidden, 254 | n.hidden2=n.hidden2, lower=lower, monotone=monotone, 255 | eps=eps, Th=Th, Th.prime=Th.prime, 256 | penalty=penalty, unpenalized=unpenalized, ...)) 257 | } 258 | weights <- fit$estimate 259 | } 260 | cost <- fit$minimum 261 | if(trace) cat(i, cost, "\n") 262 | if(cost < cost.best){ 263 | cost.best <- cost 264 | weights.best <- fit$estimate 265 | } 266 | } 267 | if(trace) cat("*", cost.best, "\n") 268 | weights.best <- qrnn2.reshape(x, y, weights.best, n.hidden, n.hidden2) 269 | weights.best 270 | } 271 | qrnn2.predict <- 272 | function(x, parms) 273 | { 274 | if (!is.matrix(x)) stop("\"x\" must be a matrix") 275 | weights <- parms$weights 276 | lower <- parms$lower 277 | monotone <- parms$monotone 278 | eps <- min(parms$eps.seq) 279 | Th <- parms$Th 280 | x.center <- parms$x.center 281 | x.scale <- parms$x.scale 282 | y.center <- parms$y.center 283 | y.scale <- parms$y.scale 284 | lower <- (lower-y.center)/y.scale 285 | x <- sweep(x, 2, x.center, "-") 286 | x <- sweep(x, 2, x.scale, "/") 287 | y.bag <- matrix(0, ncol=length(weights), nrow=nrow(x)) 288 | for (i in seq_along(weights)){ 289 | y.bag[,i] <- qrnn2.eval(x, weights[[i]]$W1, weights[[i]]$W2, 290 | weights[[i]]$W3, lower, monotone, 291 | eps, Th) 292 | y.bag[,i] <- y.bag[,i]*y.scale + y.center 293 | } 294 | y.bag 295 | } 296 | qrnn2.reshape <- 297 | function(x, y, weights, n.hidden, n.hidden2) 298 | { 299 | N11 <- ncol(x)+1 300 | N12 <- n.hidden 301 | N1 <- N11*N12 302 | W1 <- weights[1:N1] 303 | W1 <- matrix(W1, N11, N12) 304 | N21 <- n.hidden+1 305 | N22 <- n.hidden2 306 | N2 <- N1 + N21*N22 307 | W2 <- weights[(N1+1):N2] 308 | W2 <- matrix(W2, N21, N22) 309 | N31 <- n.hidden2+1 310 | N32 <- ncol(y) 311 | N3 <- N2 + N31*N32 312 | W3 <- weights[(N2+1):N3] 313 | W3 <- matrix(W3, N31, N32) 314 | list(W1=W1, W2=W2, W3=W3) 315 | } -------------------------------------------------------------------------------- /R/quantile.dtn.R: -------------------------------------------------------------------------------- 1 | dquantile <- 2 | function (x, tau, quant, lower = -Inf) 3 | { 4 | if (length(tau) != length(quant)) 5 | stop("\"tau\" and \"quant\" must be same length") 6 | if (any(tau < 0) | any(tau > 1)) 7 | stop("\"tau\" must be in range [0, 1]") 8 | quant[quant < lower] <- lower 9 | if (is.unsorted(tau) | is.unsorted(quant)) { 10 | warning("sorting \"tau\" or \"quant\"") 11 | tau <- sort(tau) 12 | quant <- sort(quant) 13 | } 14 | if (any(x < lower)) { 15 | warning("\"x\" < lower limit; replacing values with \"lower\"") 16 | x[x < lower] <- lower 17 | } 18 | if ((lower != -Inf) & (!(0 %in% tau))) { 19 | quant <- c(lower, quant) 20 | tau <- c(0, tau) 21 | } 22 | dq <- function(x, tau, quant) { 23 | n <- length(quant) 24 | d <- 0 25 | z1 <- (tau[2]-tau[1])/(quant[2]-quant[1]) 26 | b1 <- tau[1]/z1 27 | zn <- (tau[n]-tau[n-1])/(quant[n]-quant[n-1]) 28 | bn <- (1-tau[n])/zn 29 | run <- TRUE 30 | if (x < quant[1]) { 31 | d <- z1*exp(-abs(x-quant[1])/b1) 32 | run <- FALSE 33 | } 34 | if (x >= quant[n]) { 35 | d <- zn*exp(-abs(x-quant[n])/bn) 36 | run <- FALSE 37 | } 38 | if (run) { 39 | for (j in 2:n) { 40 | if (quant[j] > x) { 41 | d <- (tau[j]-tau[j-1])/(quant[j]-quant[j-1]) 42 | break 43 | } 44 | } 45 | } 46 | d 47 | } 48 | d <- sapply(x, dq, tau = tau, quant = quant) 49 | if((lower != -Inf) & any(x == lower)) { 50 | d[x == lower] <- max(tau[quant == lower]) 51 | } 52 | d 53 | } 54 | pquantile <- 55 | function(q, tau, quant, lower = -Inf, ...) 56 | { 57 | if (length(tau) != length(quant)) 58 | stop("\"tau\" and \"quant\" must be same length") 59 | if (any(tau < 0) | any(tau > 1)) 60 | stop("\"tau\" must be in range [0, 1]") 61 | quant[quant < lower] <- lower 62 | if (is.unsorted(tau) | is.unsorted(quant)) { 63 | warning("sorting \"tau\" or \"quant\"") 64 | tau <- sort(tau) 65 | quant <- sort(quant) 66 | } 67 | if ((lower != -Inf) & (!(0 %in% tau))) { 68 | quant <- c(lower, quant) 69 | tau <- c(0, tau) 70 | } else if (lower == -Inf) { 71 | tau <- tau[quant != -Inf] 72 | quant <- quant[quant != -Inf] 73 | } 74 | p <- q 75 | p[q >= lower] <- approx(x = quant, y = tau, xout = q[q >= lower], 76 | ties = max)$y 77 | pq.lu <- function(q, tau, quant, ...) { 78 | if (q < quant[1]){ 79 | p <- integrate(dquantile, lower = -Inf, upper = q, 80 | tau = tau, quant = quant, ...)$value 81 | } else { 82 | p <- integrate(dquantile, lower = quant[length(quant)], 83 | upper = q, tau = tau, quant = quant, 84 | ...)$value + tau[length(quant)] 85 | } 86 | p 87 | } 88 | if (any(is.na(p))) { 89 | p[is.na(p)] <- sapply(q[is.na(p)], pq.lu, tau = tau, 90 | quant = quant, ...) 91 | } 92 | if (any(q < lower)) { 93 | warning("\"q\" < lower limit") 94 | p[q < lower] <- NA 95 | } 96 | if (lower == -Inf) p[q == -Inf] <- 0 97 | p[q == Inf] <- 1 98 | p 99 | } 100 | qquantile <- 101 | function(p, tau, quant, lower = -Inf, tol = .Machine$double.eps^0.25, 102 | maxiter = 1000, range.mult = 1.1, max.error = 100, ...) 103 | { 104 | if (length(tau) != length(quant)) 105 | stop("\"tau\" and \"quant\" must be same length") 106 | if (any(tau < 0) | any(tau > 1)) 107 | stop("\"tau\" must be in range [0, 1]") 108 | if (any(p < 0) | any(p > 1)) 109 | stop("\"p\" must be in range [0, 1]") 110 | quant[quant < lower] <- lower 111 | if (is.unsorted(tau) | is.unsorted(quant)) { 112 | warning("sorting \"tau\" or \"quant\"") 113 | tau <- sort(tau) 114 | quant <- sort(quant) 115 | } 116 | if ((lower != -Inf) & (!(0 %in% tau))) { 117 | quant <- c(lower, quant) 118 | tau <- c(0, tau) 119 | } else if (lower == -Inf) { 120 | tau <- tau[quant != -Inf] 121 | quant <- quant[quant != -Inf] 122 | } 123 | q <- approx(x = tau, y = quant, xout = p)$y 124 | qq.lu <- function(p, tau, quant, min.max, tol, maxiter, ...) { 125 | cost <- function(q, p, tau, quant, ...){ 126 | pp <- pquantile(q, tau = tau, quant = quant, ...) 127 | pp-p 128 | } 129 | if(p < tau[1]){ 130 | q <- uniroot(f = cost, lower = min(min.max), 131 | upper = quant[1], tol = tol, 132 | maxiter = maxiter, p = p, tau = tau, 133 | quant = quant, ...)$root 134 | } else { 135 | q <- uniroot(f = cost, lower = quant[length(quant)], 136 | upper = max(min.max), tol = tol, 137 | maxiter = maxiter, p = p, tau = tau, 138 | quant = quant, ...)$root 139 | } 140 | q 141 | } 142 | if (any(is.na(q))) { 143 | error <- TRUE 144 | n.error <- 0 145 | quant.range <- diff(range(quant)) 146 | while (error & (n.error < max.error)) { 147 | quant.range <- range.mult*quant.range 148 | min.max <- c(quant[1] - quant.range, 149 | quant[length(quant)] + quant.range) 150 | qq <- try(sapply(p[is.na(q)], qq.lu, tau = tau, 151 | quant = quant, min.max = min.max, 152 | tol = tol, maxiter = maxiter, ...), 153 | silent = TRUE) 154 | error <- inherits(qq, "try-error") 155 | n.error <- n.error + 1 156 | } 157 | if (error) stop("n.error > max.error") 158 | q[is.na(q)] <- qq 159 | } 160 | if (lower == -Inf) q[p == 0] <- -Inf 161 | q[p == 1] <- Inf 162 | q 163 | } 164 | rquantile <- 165 | function(n, tau, quant, lower = -Inf, tol = .Machine$double.eps^0.25, 166 | maxiter = 1000, range.mult = 1.1, max.error = 100, ...) 167 | { 168 | if (length(tau) != length(quant)) 169 | stop("\"tau\" and \"quant\" must be same length") 170 | if (any(tau < 0) | any(tau > 1)) 171 | stop("\"tau\" must be in range [0, 1]") 172 | quant[quant < lower] <- lower 173 | if (is.unsorted(tau) | is.unsorted(quant)) { 174 | warning("sorting \"tau\" or \"quant\"") 175 | tau <- sort(tau) 176 | quant <- sort(quant) 177 | } 178 | q <- qquantile(runif(n), tau = tau, quant = quant, 179 | lower = lower, tol = tol, maxiter = maxiter, 180 | range.mult = range.mult, max.error = max.error, 181 | ...) 182 | q 183 | } 184 | -------------------------------------------------------------------------------- /R/tilted.abs.R: -------------------------------------------------------------------------------- 1 | tilted.abs <- 2 | function(x, tau) 3 | { 4 | ifelse(x>0, x*tau, x*(tau-1)) 5 | } 6 | 7 | -------------------------------------------------------------------------------- /R/transfer.R: -------------------------------------------------------------------------------- 1 | linear.prime <- 2 | function(x) 3 | { 4 | x*0 + 1 5 | } 6 | 7 | linear <- 8 | function(x) 9 | { 10 | x 11 | } 12 | 13 | elu.prime <- 14 | function(x, alpha=1) 15 | { 16 | ifelse(x >= 0, 1, elu(x, alpha) + alpha) 17 | } 18 | 19 | elu <- 20 | function(x, alpha=1) 21 | { 22 | ifelse(x >= 0, x, alpha*(exp(x)-1)) 23 | } 24 | 25 | sigmoid.prime <- 26 | function(x) 27 | { 28 | (0.5)*(1-tanh(0.5*x)^2) 29 | } 30 | 31 | sigmoid <- 32 | function(x) 33 | { 34 | tanh(0.5*x) 35 | } 36 | 37 | softplus.prime <- 38 | function(x, alpha=2) 39 | { 40 | y <- exp(alpha*x)/(1+exp(alpha*x)) 41 | ifelse(is.nan(y), 1, y) 42 | } 43 | 44 | softplus <- 45 | function(x, alpha=2) 46 | { 47 | y <- log(1+exp(alpha*x))/alpha 48 | ifelse(is.infinite(y), x, y) 49 | } 50 | 51 | relu.prime <- function(x){ 52 | ifelse(x >= 0, 1, 0) 53 | } 54 | 55 | relu <- function(x){ 56 | ifelse(x >= 0, x, 0) 57 | } 58 | 59 | logistic.prime <- function(x){ 60 | 0.25/(cosh(x/2)^2) 61 | } 62 | 63 | logistic <- function(x){ 64 | 0.5 + 0.5*tanh(x/2) 65 | } 66 | 67 | lrelu.prime <- function(x){ 68 | ifelse(x >= 0, 1, 0.01) 69 | } 70 | 71 | lrelu <- function(x){ 72 | ifelse(x >= 0, x, 0.01*x) 73 | } 74 | 75 | softmax <- function(x){ 76 | exp(x)/sum(exp(x)) 77 | } 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Description 2 | 3 | The [qrnn](https://cran.r-project.org/package=qrnn) package for R implements 4 | the quantile regression neural network (QRNN) (Taylor, 2000; Cannon, 2011; 5 | Cannon, 2018), which is a flexible nonlinear form of quantile regression. 6 | While low level modelling functions are available, it is recommended that the 7 | `mcqrnn.fit` and `mcqrnn.predict` wrappers be used for most applications. More 8 | information is provided below. 9 | 10 | The goal of quantile regression is to estimate conditional quantiles of a 11 | response variable that depend on covariates in some form of regression 12 | equation. The QRNN adopts the multi-layer perceptron neural network 13 | architecture. The implementation follows from previous work on the estimation 14 | of censored regression quantiles, thus allowing predictions for mixed 15 | discrete-continuous variables like precipitation (Friederichs and Hense, 2007). 16 | A differentiable approximation to the quantile regression cost function is 17 | adopted so that a simplified form of the finite smoothing algorithm 18 | (Chen, 2007) can be used to estimate model parameters. This approximation can 19 | also be used to force the model to solve a standard least squares regression 20 | problem or an expectile regression problem (Cannon, 2018). Weight penalty 21 | regularization can be added to help avoid overfitting, and ensemble models 22 | with bootstrap aggregation are also provided. 23 | 24 | An optional monotone constraint can be invoked, which guarantees monotonic 25 | non-decreasing behaviour of model outputs with respect to specified covariates 26 | (Zhang, 1999). The input-hidden layer weight matrix can also be constrained 27 | so that model relationships are strictly additive (see `gam.style`; 28 | Cannon, 2018). Borrowing strength by using a composite model for multiple 29 | regression quantiles (Zou et al., 2008; Xu et al., 2017) is also possible 30 | (see `composite.stack`). Weights can be applied to individual cases 31 | (Jiang et al., 2012). 32 | 33 | Applying the monotone constraint in combination with the composite model allows 34 | one to simultaneously estimate multiple non-crossing quantiles (Cannon, 2018); 35 | the resulting monotone composite QRNN (MCQRNN) is provided by the 36 | `mcqrnn.fit` and `mcqrnn.predict` wrapper functions. Examples for `qrnn.fit` 37 | and `qrnn2.fit` show how the same functionality can be achieved using the low 38 | level `composite.stack` and fitting functions. 39 | 40 | QRNN models with a single layer of hidden nodes can be fitted using the 41 | `qrnn.fit` function. Predictions from a fitted model are made using 42 | the `qrnn.predict` function. The function `gam.style` can be used to visualize 43 | and investigate fitted covariate/response relationships from `qrnn.fit` 44 | (Plate et al., 2000). Note: a single hidden layer is usually sufficient 45 | for most modelling tasks. With added monotonicity constraints, a second hidden 46 | layer may sometimes be beneficial (Lang, 2005; Minin et al., 2010). QRNN models 47 | with two hidden layers are available using the `qrnn2.fit` and `qrnn2.predict` 48 | functions. For non-crossing quantiles, the `mcqrnn.fit` and `mcqrnn.predict` 49 | wrappers also allow models with one or two hidden layers to be fitted and 50 | predictions to be made from the fitted models. 51 | 52 | In general, `mcqrnn.fit` offers a convenient, single function for fitting 53 | multiple quantiles simultaneously. Note, however, that default settings in 54 | mcqrnn.fit and other model fitting functions are not optimized for general 55 | speed, memory efficiency, or accuracy and should be adjusted for a particular 56 | regression problem as needed. In particular, the approximation to the quantile 57 | regression cost function `eps.seq`, the number of trials `n.trials`, and number 58 | of iterations `iter.max` can all influence fitting speed (and accuracy), as can 59 | changing the optimization algorithm via `method`. Non-crossing quantiles are 60 | implemented by stacking multiple copies of the `x` and `y` data, one copy per 61 | value of `tau`. Depending on the dataset size, this can lead to large matrices 62 | being passed to the optimization routine. In the `adam` adaptive stochastic 63 | gradient descent method, the `minibatch` size can be adjusted to help offset 64 | this cost. Model complexity is determined via the number of hidden nodes, 65 | `n.hidden` and `n.hidden2`, as well as the optional weight penalty `penalty`; 66 | values of these hyperparameters are crucial to obtaining a well performing 67 | model. 68 | 69 | When using `mcqrnn.fit`, it is also possible to estimate the full quantile 70 | regression process by specifying a single integer value for `tau`. In this 71 | case, `tau` is the number of random samples used in the stochastic estimation. 72 | For more information, see Tagasovska and Lopez-Paz (2019). It may be necessary 73 | to restart the optimization multiple times from the previous weights and biases, 74 | in which case `init.range` can be set to the `weights` values from the 75 | previously completed optimization run. For large datasets, it is recommended 76 | that the `adam` method with an integer `tau` and an appropriate `minibatch` 77 | size be used for optimization. 78 | 79 | If models for multiple quantiles have been fitted, for example by 80 | `mcqrnn.fit` or multiple calls to either `qrnn.fit` or `qrnn2.fit`, the 81 | (experimental) `dquantile` function and its companion functions are available to 82 | create proper probability density, distribution, and quantile functions 83 | (Quiñonero-Candela et al., 2006; Cannon, 2011). Alternative distribution, 84 | quantile, and random variate functions based on the Nadaraya-Watson estimator 85 | (Passow and Donner, 2020) are also available in `[p,q,r]quantile.nw`. These can 86 | be useful for assessing probabilistic calibration and evaluating model 87 | performance. 88 | 89 | Note: the user cannot easily change the output layer transfer function 90 | to be different than `hramp`, which provides either the identity function or a 91 | ramp function to accommodate optional left censoring. Some applications, for 92 | example fitting smoothed binary quantile regression models for a binary target 93 | variable (Kordas, 2006), require an alternative like the logistic sigmoid. 94 | While not straightforward, it is possible to change the output layer transfer 95 | function by switching off `scale.y` in the call to the fitting 96 | function and reassigning `hramp` and `hramp.prime` as follows: 97 | 98 | ``` 99 | library(qrnn) 100 | 101 | # Use the logistic sigmoid as the output layer transfer function 102 | To.logistic <- function(x, lower, eps) 0.5 + 0.5*tanh(x/2) 103 | environment(To.logistic) <- asNamespace("qrnn") 104 | assignInNamespace("hramp", To.logistic, ns="qrnn") 105 | 106 | # Change the derivative of the output layer transfer function 107 | To.logistic.prime <- function(x, lower, eps) 0.25/(cosh(x/2)^2) 108 | environment(To.logistic.prime) <- asNamespace("qrnn") 109 | assignInNamespace("hramp.prime", To.logistic.prime, ns="qrnn") 110 | 111 | ``` 112 | 113 | ### References 114 | 115 | Cannon, A.J., 2011. Quantile regression neural networks: implementation 116 | in R and application to precipitation downscaling. Computers & Geosciences, 117 | 37: 1277-1284. doi:10.1016/j.cageo.2010.07.005 118 | 119 | Cannon, A.J., 2018. Non-crossing nonlinear regression quantiles by 120 | monotone composite quantile regression neural network, with application 121 | to rainfall extremes. Stochastic Environmental Research and Risk Assessment, 122 | 32(11): 3207-3225. doi:10.1007/s00477-018-1573-6 123 | 124 | Chen, C., 2007. A finite smoothing algorithm for quantile regression. 125 | Journal of Computational and Graphical Statistics, 16: 136-164. 126 | 127 | Friederichs, P. and A. Hense, 2007. Statistical downscaling of extreme 128 | precipitation events using censored quantile regression. Monthly Weather 129 | Review, 135: 2365-2378. 130 | 131 | Jiang, X., J. Jiang, and X. Song, 2012. Oracle model selection for nonlinear 132 | models based on weighted composite quantile regression. Statistica Sinica, 133 | 22(4): 1479-1506. 134 | 135 | Kordas, G., 2006. Smoothed binary regression quantiles. Journal of Applied 136 | Econometrics, 21(3): 387-407. 137 | 138 | Lang, B., 2005. Monotonic multi-layer perceptron networks as universal 139 | approximators. International Conference on Artificial Neural Networks, 140 | Artificial Neural Networks: Formal Models and Their Applications-ICANN 2005, 141 | pp. 31-37. 142 | 143 | Minin, A., M. Velikova, B. Lang, and H. Daniels, 2010. Comparison of universal 144 | approximators incorporating partial monotonicity by structure. 145 | Neural Networks, 23(4): 471-475. 146 | 147 | Passow, C., R.V. Donner, 2020. Regression-based distribution mapping for 148 | bias correction of climate model outputs using linear quantile regression. 149 | Stochastic Environmental Research and Risk Assessment, 34: 87-102. 150 | 151 | Plate, T., J. Bert, J. Grace, and P. Band, 2000. Visualizing the function 152 | computed by a feedforward neural network. Neural Computation, 153 | 12(6): 1337-1354. 154 | 155 | Quiñonero-Candela, J., C. Rasmussen, F. Sinz, O. Bousquet, 156 | B. Scholkopf, 2006. Evaluating Predictive Uncertainty Challenge. 157 | Lecture Notes in Artificial Intelligence, 3944: 1-27. 158 | 159 | Tagasovska, N., D. Lopez-Paz, 2019. Single-model uncertainties for deep 160 | learning. Advances in Neural Information Processing Systems, 32, 161 | NeurIPS 2019. doi:10.48550/arXiv.1811.00908 162 | 163 | Taylor, J.W., 2000. A quantile regression neural network approach to 164 | estimating the conditional density of multiperiod returns. Journal of 165 | Forecasting, 19(4): 299-311. 166 | 167 | Xu, Q., K. Deng, C. Jiang, F. Sun, and X. Huang, 2017. Composite quantile 168 | regression neural network with applications. Expert Systems with Applications, 169 | 76, 129-139. 170 | 171 | Zhang, H. and Zhang, Z., 1999. Feedforward networks with monotone 172 | constraints. In: International Joint Conference on Neural Networks, 173 | vol. 3, p. 1820-1823. doi:10.1109/IJCNN.1999.832655 174 | 175 | Zou, H. and M. Yuan, 2008. Composite quantile regression and the oracle model 176 | selection theory. The Annals of Statistics, 1108-1126. 177 | -------------------------------------------------------------------------------- /data/YVRprecip.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/qrnn/61ef772fff87c1a46322c156834aec260c840aaa/data/YVRprecip.rda -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citation(auto = meta) 2 | 3 | bibentry(bibtype = "Article", 4 | title = "Quantile regression neural networks: implementation in R and application to precipitation downscaling", 5 | author = person(c("Alex", "J."), "Cannon"), 6 | year = "2011", 7 | journal = "Computers \\& Geosciences", 8 | volume = "37", 9 | pages = "1277-1284", 10 | doi = "10.1007/b98882") 11 | 12 | bibentry(bibtype = "Article", 13 | title = "Non-crossing nonlinear regression quantiles by monotone composite quantile regression neural network, with application to rainfall extremes", 14 | author = person(c("Alex", "J."), "Cannon"), 15 | year = "2018", 16 | journal = "Stochastic Environmental Research and Risk Assessment", 17 | volume = "32(11)", 18 | pages = "3207-3225", 19 | doi = "10.1007/s00477-018-1573-6") 20 | -------------------------------------------------------------------------------- /man/YVRprecip.Rd: -------------------------------------------------------------------------------- 1 | \name{YVRprecip} 2 | \alias{YVRprecip} 3 | \docType{data} 4 | \title{ 5 | Daily precipitation data at Vancouver Int'l Airport (YVR) 6 | } 7 | \description{ 8 | Daily precipitation totals (mm) at Vancouver Int'l Airport (YVR) for 9 | the period 1971-2000. 10 | 11 | Covariates for a simple downscaling task include daily sea-level 12 | pressures (Pa), 700-hPa specific humidities (kg/kg), and 500-hPa 13 | geopotential heights (m) from the NCEP/NCAR Reanalysis 14 | (Kalnay et al., 1996) grid point centered on 50 deg. N and 237.5 deg. E. 15 | 16 | NCEP/NCAR Reanalysis data provided by the NOAA/OAR/ESRL PSD, Boulder, 17 | Colorado, USA, from their Web site at \url{https://psl.noaa.gov/}. 18 | } 19 | \references{ 20 | Kalnay, E. et al., 1996. The NCEP/NCAR 40-year reanalysis project, 21 | Bulletin of the American Meteorological Society, 77: 437-470. 22 | } 23 | \examples{ 24 | ## YVR precipitation data with seasonal cycle and NCEP/NCAR Reanalysis 25 | ## covariates 26 | 27 | data(YVRprecip) 28 | y <- YVRprecip$precip 29 | x <- cbind(sin(2*pi*seq_along(y)/365.25), 30 | cos(2*pi*seq_along(y)/365.25), 31 | YVRprecip$ncep) 32 | 33 | ## Fit QRNN and quantile regression models for the conditional 75th 34 | ## percentile using the final 3 years of record for training and the 35 | ## remaining years for testing. 36 | train <- as.numeric(format(YVRprecip$date, "\%Y")) >= 1998 37 | test <- !train 38 | 39 | set.seed(1) 40 | w.qrnn <- qrnn.fit(x=x[train,], y=y[train,,drop=FALSE], 41 | n.hidden=1, tau=0.75, iter.max=200, 42 | n.trials=1, lower=0) 43 | p.qrnn <- qrnn.predict(x=x[test,], parms=w.qrnn) 44 | w.qreg <- qrnn.fit(x=x[train,], y=y[train,,drop=FALSE], 45 | tau=0.75, n.trials=1, lower=0, 46 | Th=linear, Th.prime=linear.prime) 47 | p.qreg <- qrnn.predict(x=x[test,], parms=w.qreg) 48 | 49 | ## Tilted absolute value cost function on test dataset 50 | qvs.qrnn <- mean(tilted.abs(y[test]-p.qrnn, 0.75)) 51 | qvs.qreg <- mean(tilted.abs(y[test]-p.qreg, 0.75)) 52 | cat("Cost QRNN", qvs.qrnn, "\n") 53 | cat("Cost QREG", qvs.qreg, "\n") 54 | 55 | ## Plot first year of test dataset 56 | plot(y[test][1:365], type="h", xlab="Day", ylab="Precip. (mm)") 57 | points(p.qrnn[1:365], col="red", pch=19) 58 | points(p.qreg[1:365], col="blue", pch=19) 59 | 60 | } 61 | \keyword{datasets} 62 | -------------------------------------------------------------------------------- /man/adam.Rd: -------------------------------------------------------------------------------- 1 | \name{adam} 2 | \alias{adam} 3 | \title{ 4 | Adaptive stochastic gradient descent optimization algorithm (Adam) 5 | } 6 | \description{ 7 | From Kingma and Ba (2015): "We introduce Adam, an algorithm for first-order 8 | gradient-based optimization of stochastic objective functions, based on 9 | adaptive estimates of lower-order moments. The method is straightforward to 10 | implement, is computationally efficient, has little memory requirements, is 11 | invariant to diagonal rescaling of the gradients, and is well suited for 12 | problems that are large in terms of data and/or parameters. The method is 13 | also appropriate for non-stationary objectives and problems with very noisy 14 | and/or sparse gradients. The hyper-parameters have intuitive interpretations 15 | and typically require little tuning. Some connections to related algorithms, 16 | on which Adam was inspired, are discussed. We also analyze the theoretical 17 | convergence properties of the algorithm and provide a regret bound on the 18 | convergence rate that is comparable to the best known results under the 19 | online convex optimization framework. Empirical results demonstrate that Adam 20 | works well in practice and compares favorably to other stochastic 21 | optimization methods. Finally, we discuss AdaMax, a variant of Adam based on 22 | the infinity norm." 23 | } 24 | \usage{ 25 | adam(f, p, x, y, w, tau, ..., iterlim=5000, iterbreak=iterlim, 26 | alpha=0.01, minibatch=nrow(x), beta1=0.9, beta2=0.999, 27 | epsilon=1e-8, print.level=10) 28 | 29 | } 30 | \arguments{ 31 | \item{f}{ 32 | the function to be minimized, including gradient information 33 | contained in the \code{gradient} attribute. 34 | } 35 | \item{p}{ 36 | the starting parameters for the minimization. 37 | } 38 | \item{x}{ 39 | covariate matrix with number of rows equal to the number of 40 | samples and number of columns equal to the number of variables. 41 | } 42 | \item{y}{ 43 | response column matrix with number of rows equal to the number 44 | of samples. 45 | } 46 | \item{w}{ 47 | vector of weights with length equal to the number of samples. 48 | } 49 | \item{tau}{ 50 | vector of desired tau-quantile(s) with length equal to the 51 | number of samples. 52 | } 53 | \item{\dots}{ 54 | additional parameters passed to the \code{f} cost function. 55 | } 56 | \item{iterlim}{ 57 | the maximum number of iterations before the optimization is stopped. 58 | } 59 | \item{iterbreak}{ 60 | the maximum number of iterations without progress before the 61 | optimization is stopped. 62 | } 63 | \item{alpha}{ 64 | size of the learning rate. 65 | } 66 | \item{minibatch}{ 67 | number of samples in each minibatch. 68 | } 69 | \item{beta1}{ 70 | controls the exponential decay rate used to scale the biased first 71 | moment estimate. 72 | } 73 | \item{beta2}{ 74 | controls the exponential decay rate used to scale the biased second 75 | raw moment estimate. 76 | } 77 | \item{epsilon}{ 78 | smoothing term to avoid division by zero. 79 | } 80 | \item{print.level}{ 81 | the level of printing which is done during optimization. A value of 82 | \code{0} suppresses any progress reporting, whereas positive values 83 | report the value of \code{f} every \code{print.level} iterations. 84 | } 85 | } 86 | \value{ 87 | A list with elements: 88 | \item{estimate}{The best set of parameters found.} 89 | \item{minimum}{The value of \code{f} corresponding to \code{estimate}.} 90 | } 91 | \references{ 92 | Kingma, D.P. and J. Ba, 2015. Adam: A method for stochastic optimization. 93 | The International Conference on Learning Representations (ICLR) 2015. 94 | http://arxiv.org/abs/1412.6980 95 | } 96 | -------------------------------------------------------------------------------- /man/censored.mean.Rd: -------------------------------------------------------------------------------- 1 | \name{censored.mean} 2 | \alias{censored.mean} 3 | \title{ 4 | A hybrid mean/median function for left censored variables 5 | } 6 | \description{ 7 | Returns the median if the majority of values are censored and the mean otherwise. 8 | } 9 | \usage{ 10 | censored.mean(x, lower, trim=0) 11 | } 12 | \arguments{ 13 | \item{x}{ 14 | numeric vector. 15 | } 16 | \item{lower}{ 17 | left censoring point. 18 | } 19 | \item{trim}{ 20 | fraction of observations to be trimmed from each end of \code{x} before the mean is computed. 21 | } 22 | } 23 | \seealso{ 24 | \code{\link{qrnn.fit}}, \code{\link{qrnn.predict}}} 25 | \examples{ 26 | x <- c(0, 0, 1, 2, 3) 27 | print(censored.mean(x, lower=0)) 28 | x.cens <- c(0, 0, 0, 1, 2) 29 | print(censored.mean(x.cens, lower=0)) 30 | } 31 | -------------------------------------------------------------------------------- /man/composite.stack.Rd: -------------------------------------------------------------------------------- 1 | \name{composite.stack} 2 | \alias{composite.stack} 3 | \title{ 4 | Reformat data matrices for composite quantile regression 5 | } 6 | \description{ 7 | Returns stacked \code{x} and \code{y} matrices and \code{tau} vector, 8 | which can be passed to \code{qrnn.fit} to fit composite quantile 9 | regression and composite QRNN models (Zou et al., 2008; 10 | Xu et al., 2017). In combination with the partial monotonicity 11 | constraints, stacking can be used to fit multiple non-crossing 12 | quantile functions (see \code{\link{mcqrnn}}). More details 13 | are provided in Cannon (2018). 14 | } 15 | \usage{ 16 | composite.stack(x, y, tau) 17 | } 18 | \arguments{ 19 | \item{x}{ 20 | covariate matrix with number of rows equal to the number of samples and number of columns equal to 21 | the number of variables. 22 | } 23 | \item{y}{ 24 | response column matrix with number of rows equal to the number of samples. 25 | } 26 | \item{tau}{ 27 | vector of tau-quantiles. 28 | } 29 | } 30 | \seealso{ 31 | \code{\link{qrnn.fit}}, \code{\link{mcqrnn}} 32 | } 33 | \examples{ 34 | x <- as.matrix(iris[,"Petal.Length",drop=FALSE]) 35 | y <- as.matrix(iris[,"Petal.Width",drop=FALSE]) 36 | 37 | cases <- order(x) 38 | x <- x[cases,,drop=FALSE] 39 | y <- y[cases,,drop=FALSE] 40 | 41 | tau <- seq(0.05, 0.95, by=0.05) 42 | x.y.tau <- composite.stack(x, y, tau) 43 | binary.tau <- dummy.code(as.factor(x.y.tau$tau)) 44 | 45 | set.seed(1) 46 | 47 | # Composite QR 48 | fit.cqr <- qrnn.fit(cbind(binary.tau, x.y.tau$x), x.y.tau$y, 49 | tau=x.y.tau$tau, n.hidden=1, n.trials=1, 50 | Th=linear, Th.prime=linear.prime) 51 | pred.cqr <- matrix(qrnn.predict(cbind(binary.tau, x.y.tau$x), fit.cqr), 52 | ncol=length(tau)) 53 | coef.cqr <- lm.fit(cbind(1, x), pred.cqr)$coef 54 | colnames(coef.cqr) <- tau 55 | print(coef.cqr) 56 | 57 | # Composite QRNN 58 | fit.cqrnn <- qrnn.fit(x.y.tau$x, x.y.tau$y, tau=x.y.tau$tau, 59 | n.hidden=1, n.trials=1, Th=sigmoid, 60 | Th.prime=sigmoid.prime) 61 | pred.cqrnn <- qrnn.predict(x.y.tau$x, fit.cqrnn) 62 | pred.cqrnn <- matrix(pred.cqrnn, ncol=length(tau), byrow=FALSE) 63 | 64 | matplot(x, pred.cqrnn, col="red", type="l") 65 | points(x, y, pch=20) 66 | 67 | } 68 | 69 | \references{ 70 | Cannon, A.J., 2018. Non-crossing nonlinear regression quantiles by 71 | monotone composite quantile regression neural network, with application 72 | to rainfall extremes. Stochastic Environmental Research and Risk Assessment, 73 | 32(11): 3207-3225. doi:10.1007/s00477-018-1573-6 74 | 75 | Xu, Q., K. Deng, C. Jiang, F. Sun, and X. Huang, 2017. Composite quantile 76 | regression neural network with applications. Expert Systems with Applications, 77 | 76, 129-139. 78 | 79 | Zou, H. and M. Yuan, 2008. Composite quantile regression and the oracle model 80 | selection theory. The Annals of Statistics, 1108-1126. 81 | } 82 | -------------------------------------------------------------------------------- /man/dummy.code.Rd: -------------------------------------------------------------------------------- 1 | \name{dummy.code} 2 | \alias{dummy.code} 3 | \title{ 4 | Convert a factor to a matrix of dummy codes 5 | } 6 | \description{ 7 | Converts a factor (categorical) variable to a matrix of dummy codes 8 | using a 1 of C-1 binary coding scheme. 9 | } 10 | \usage{ 11 | dummy.code(x) 12 | } 13 | \arguments{ 14 | \item{x}{ 15 | a factor variable. 16 | } 17 | } 18 | \value{ 19 | a matrix with the number of rows equal to the number of cases in \code{x} 20 | and the number of columns equal to one minus the number of factors in 21 | \code{x}. The last factor serves as the reference group. 22 | } 23 | \examples{ 24 | print(dummy.code(iris$Species)) 25 | } 26 | -------------------------------------------------------------------------------- /man/gam.style.Rd: -------------------------------------------------------------------------------- 1 | \name{gam.style} 2 | \alias{gam.style} 3 | \title{ 4 | Modified generalized additive model plots for interpreting QRNN models 5 | } 6 | \description{ 7 | Generalized additive model (GAM)-style effects plots provide a graphical 8 | means of interpreting relationships between covariates and conditional 9 | quantiles predicted by a QRNN. From Plate et al. (2000): The effect of the 10 | \code{i}th input variable at a particular input point \code{Delta.i.x} 11 | is the change in \code{f} resulting from changing \code{X1} to \code{x1} 12 | from \code{b1} (the baseline value [...]) while keeping the other 13 | inputs constant. The effects are plotted as short line segments, centered 14 | at (\code{x.i}, \code{Delta.i.x}), where the slope of the segment 15 | is given by the partial derivative. Variables that strongly influence 16 | the function value have a large total vertical range of effects. 17 | Functions without interactions appear as possibly broken straight lines 18 | (linear functions) or curves (nonlinear functions). Interactions show up as 19 | vertical spread at a particular horizontal location, that is, a vertical 20 | scattering of segments. Interactions are present when the effect of 21 | a variable depends on the values of other variables. 22 | } 23 | 24 | \usage{ 25 | gam.style(x, parms, column, baseline=mean(x[,column]), 26 | epsilon=1e-5, seg.len=0.02, seg.cols="black", 27 | plot=TRUE, return.results=FALSE, trim=0, 28 | ...) 29 | } 30 | \arguments{ 31 | \item{x}{ 32 | matrix with number of rows equal to the number of samples and number of columns equal to the number of covariate variables. 33 | } 34 | \item{parms}{ 35 | list returned by \code{\link{qrnn.fit}}. 36 | } 37 | \item{column}{ 38 | column of \code{x} for which effects plots should be returned. 39 | } 40 | \item{baseline}{ 41 | value of \code{x[,column]} to be used as the baseline for calculation of covariate effects; defaults to \code{mean(x[,column])}. 42 | } 43 | \item{epsilon}{ 44 | step-size used in the finite difference calculation of the partial derivatives. 45 | } 46 | \item{seg.len}{ 47 | length of effects line segments expressed as a fraction of the range of \code{x[,column]}. 48 | } 49 | \item{seg.cols}{ 50 | colors of effects line segments. 51 | } 52 | \item{plot}{ 53 | if \code{TRUE} (the default) then an effects plots for the given model is produced. 54 | } 55 | \item{return.results}{ 56 | if \code{TRUE} then values of effects and partial derivatives are returned. 57 | } 58 | \item{trim}{ 59 | if \code{plot=TRUE} and \code{parms} is for a model with \code{n.ensemble > 1}, value of \code{trim} passed to \code{\link{censored.mean}}. 60 | } 61 | \item{\dots}{ 62 | further arguments to be passed to \code{plot}. 63 | } 64 | } 65 | \seealso{ 66 | \code{\link{qrnn.fit}}, \code{\link{qrnn.predict}} 67 | } 68 | \value{ 69 | A list with elements: 70 | \item{effects}{a matrix of covariate effects.} 71 | \item{partials}{a matrix of covariate partial derivatives.} 72 | } 73 | \examples{ 74 | ## YVR precipitation data with seasonal cycle and NCEP/NCAR Reanalysis 75 | ## covariates 76 | data(YVRprecip) 77 | 78 | y <- YVRprecip$precip 79 | x <- cbind(sin(2*pi*seq_along(y)/365.25), 80 | cos(2*pi*seq_along(y)/365.25), 81 | YVRprecip$ncep) 82 | 83 | ## Fit QRNN, additive QRNN (QADD), and quantile regression (QREG) 84 | ## models for the conditional 75th percentile 85 | set.seed(1) 86 | train <- c(TRUE, rep(FALSE, 49)) 87 | w.qrnn <- qrnn.fit(x=x[train,], y=y[train,,drop=FALSE], 88 | n.hidden=2, tau=0.75, iter.max=500, 89 | n.trials=1, lower=0, penalty=0.01) 90 | w.qadd <- qrnn.fit(x=x[train,], y=y[train,,drop=FALSE], 91 | n.hidden=ncol(x), tau=0.75, iter.max=250, 92 | n.trials=1, lower=0, additive=TRUE) 93 | w.qreg <- qrnn.fit(x=x[train,], y=y[train,,drop=FALSE], 94 | tau=0.75, iter.max=100, n.trials=1, 95 | lower=0, Th=linear, Th.prime=linear.prime) 96 | 97 | ## GAM-style plots for slp, sh700, and z500 98 | for (column in 3:5) { 99 | gam.style(x[train,], parms=w.qrnn, column=column, 100 | main="QRNN") 101 | gam.style(x[train,], parms=w.qadd, column=column, 102 | main="QADD") 103 | gam.style(x[train,], parms=w.qreg, column=column, 104 | main="QREG") 105 | } 106 | } 107 | \references{ 108 | Cannon, A.J. and I.G. McKendry, 2002. A graphical sensitivity analysis 109 | for interpreting statistical climate models: Application to Indian 110 | monsoon rainfall prediction by artificial neural networks and 111 | multiple linear regression models. International Journal of 112 | Climatology, 22:1687-1708. 113 | 114 | Plate, T., J. Bert, J. Grace, and P. Band, 2000. Visualizing the function 115 | computed by a feedforward neural network. Neural Computation, 116 | 12(6): 1337-1354. 117 | } 118 | -------------------------------------------------------------------------------- /man/huber.Rd: -------------------------------------------------------------------------------- 1 | \name{huber} 2 | \alias{huber} 3 | \alias{huber.prime} 4 | \alias{hramp} 5 | \alias{hramp.prime} 6 | \alias{tilted.approx} 7 | \alias{tilted.approx.prime} 8 | \title{ 9 | Huber norm and Huber approximations to the ramp and tilted absolute value functions 10 | } 11 | \description{ 12 | Huber norm function providing a hybrid L1/L2 norm. Huber approximations to the ramp \code{hramp} and tilted absolute value \code{tilted.approx} functions. \code{huber.prime}, \code{hramp.prime}, and \code{tilted.approx.prime} provide the corresponding derivatives. 13 | } 14 | \usage{ 15 | huber(x, eps) 16 | huber.prime(x, eps) 17 | hramp(x, lower, eps) 18 | hramp.prime(x, lower, eps) 19 | tilted.approx(x, tau, eps) 20 | tilted.approx.prime(x, tau, eps) 21 | } 22 | \arguments{ 23 | \item{x}{ 24 | numeric vector. 25 | } 26 | \item{eps}{ 27 | epsilon value used in \code{\link{huber}} and related functions. 28 | } 29 | \item{tau}{ 30 | desired tau-quantile. 31 | } 32 | \item{lower}{ 33 | left censoring point. 34 | } 35 | } 36 | \seealso{ 37 | \code{\link{tilted.abs}}, \code{\link{qrnn.cost}} 38 | } 39 | \examples{ 40 | x <- seq(-10, 10, length=100) 41 | plot(x, huber(x, eps=1), type="l", col="black", ylim=c(-2, 10), ylab="") 42 | lines(x, hramp(x, lower=0, eps=1), col="red") 43 | lines(x, tilted.approx(x, tau=0.1, eps=1), col="blue") 44 | lines(x, huber.prime(x, eps=1), col="black", lty=2) 45 | lines(x, hramp.prime(x, lower=0, eps=1), lty=2, col="red") 46 | lines(x, tilted.approx.prime(x, tau=0.1, eps=1), lty=2, col="blue") 47 | } 48 | -------------------------------------------------------------------------------- /man/mcqrnn.Rd: -------------------------------------------------------------------------------- 1 | \name{mcqrnn} 2 | \alias{mcqrnn} 3 | \alias{mcqrnn.fit} 4 | \alias{mcqrnn.predict} 5 | \title{ 6 | Monotone composite quantile regression neural network (MCQRNN) for simultaneous estimation of multiple non-crossing quantiles 7 | } 8 | \description{ 9 | High level wrapper functions for fitting and making predictions from a 10 | monotone composite quantile regression neural network (MCQRNN) model for 11 | multiple non-crossing regression quantiles (Cannon, 2018). 12 | 13 | Uses \code{composite.stack} and monotonicity constraints in 14 | \code{qrnn.fit} or \code{qrnn2.fit} to fit MCQRNN models with 15 | one or two hidden layers. Note: \code{Th} must be a non-decreasing 16 | function to guarantee non-crossing. 17 | 18 | Following Tagasovska and Lopez-Paz (2019), it is also possible to estimate the 19 | full quantile regression process by specifying a single integer value for 20 | \code{tau}. In this case, tau is the number of random samples used in the 21 | stochastic estimation. It may be necessary to restart the optimization multiple 22 | times from the previous weights and biases, in which case \code{init.range} can 23 | be set to the weights values from the previously completed optimization run. 24 | For large datasets, it is recommended that the \code{adam} method with an 25 | appropriate \code{minibatch} size be used for optimization. 26 | 27 | } 28 | \usage{ 29 | mcqrnn.fit(x, y, n.hidden=2, n.hidden2=NULL, w=NULL, 30 | tau=c(0.1, 0.5, 0.9), iter.max=5000, n.trials=5, 31 | lower=-Inf, init.range=c(-0.5, 0.5, -0.5, 0.5, -0.5, 0.5), 32 | monotone=NULL, eps.seq=2^seq(-8, -32, by=-4), Th=sigmoid, 33 | Th.prime=sigmoid.prime, penalty=0, n.errors.max=10, 34 | trace=TRUE, method=c("nlm", "adam"), scale.y=TRUE, ...) 35 | mcqrnn.predict(x, parms, tau=NULL) 36 | } 37 | \arguments{ 38 | \item{x}{ 39 | covariate matrix with number of rows equal to the number of samples and number of columns equal to the number of variables. 40 | } 41 | \item{y}{ 42 | response column matrix with number of rows equal to the number of samples. 43 | } 44 | \item{n.hidden}{ 45 | number of hidden nodes in the first hidden layer. 46 | } 47 | \item{n.hidden2}{ 48 | number of hidden nodes in the second hidden layer; \code{NULL} fits a model with a single hidden layer. 49 | } 50 | \item{w}{ 51 | if \code{tau} specifies a finite number of tau-quantiles, a vector of weights with length equal to the number of samples 52 | times the length of \code{tau}; see \code{composite.stack}. Otherwise, a vector of weights with length equal to the 53 | number of samples. \code{NULL} gives equal weight to each sample. 54 | } 55 | \item{tau}{ 56 | desired tau-quantiles; \code{NULL} in \code{mcqrnn.predict} uses values from the original call to \code{mcqrnn.fit}. 57 | If \code{tau} is an integer, specifies the number of random samples used for stochastic estimation of the full quantile 58 | regression process. 59 | } 60 | \item{iter.max}{ 61 | maximum number of iterations of the optimization algorithm. 62 | } 63 | \item{n.trials}{ 64 | number of repeated trials used to avoid local minima. 65 | } 66 | \item{lower}{ 67 | left censoring point. 68 | } 69 | \item{init.range}{ 70 | initial weight range for input-hidden, hidden-hidden, and hidden-output weight matrices. If supplied with a list 71 | of weight matrices from a prior run of \code{mcqrnn.fit}, will restart model fitting with these values. 72 | } 73 | \item{monotone}{ 74 | column indices of covariates for which the monotonicity constraint should hold. 75 | } 76 | \item{eps.seq}{ 77 | sequence of \code{eps} values for the finite smoothing algorithm. 78 | } 79 | \item{Th}{ 80 | hidden layer transfer function; use \code{\link{sigmoid}}, \code{\link{elu}}, \code{\link{relu}}, 81 | \code{\link{lrelu}}, \code{\link{softplus}}, or other non-decreasing function. 82 | } 83 | \item{Th.prime}{ 84 | derivative of the hidden layer transfer function \code{Th}. 85 | } 86 | \item{penalty}{ 87 | weight penalty for weight decay regularization. 88 | } 89 | \item{n.errors.max}{ 90 | maximum number of \code{nlm} optimization failures allowed before quitting. 91 | } 92 | \item{trace}{ 93 | logical variable indicating whether or not diagnostic messages are printed during optimization. 94 | } 95 | \item{method}{ 96 | character string indicating which optimization algorithm to use when \code{n.hidden2 != NULL}. 97 | } 98 | \item{scale.y}{ 99 | logical variable indicating whether \code{y} should be scaled to zero mean and unit standard deviation. 100 | } 101 | \item{\dots}{ 102 | additional parameters passed to the \code{\link{nlm}} or \code{\link{adam}} optimization routines. 103 | } 104 | \item{parms}{ 105 | list containing MCQRNN weight matrices and other parameters. 106 | } 107 | } 108 | \seealso{ 109 | \code{\link{composite.stack}}, \code{\link{qrnn.fit}}, 110 | \code{\link{qrnn2.fit}}, \code{\link{qrnn.predict}}, 111 | \code{\link{qrnn2.predict}}, \code{\link{adam}} 112 | } 113 | \examples{ 114 | x <- as.matrix(iris[,"Petal.Length",drop=FALSE]) 115 | y <- as.matrix(iris[,"Petal.Width",drop=FALSE]) 116 | 117 | cases <- order(x) 118 | x <- x[cases,,drop=FALSE] 119 | y <- y[cases,,drop=FALSE] 120 | 121 | set.seed(1) 122 | 123 | ## MCQRNN model w/ 2 hidden layers for simultaneous estimation of 124 | ## multiple non-crossing quantile functions 125 | fit.mcqrnn <- mcqrnn.fit(x, y, tau=seq(0.1, 0.9, by=0.1), 126 | n.hidden=2, n.hidden2=2, n.trials=1, 127 | iter.max=500) 128 | pred.mcqrnn <- mcqrnn.predict(x, fit.mcqrnn) 129 | 130 | ## Estimate the full quantile regression process by specifying 131 | ## the number of samples/random values of tau used in training 132 | 133 | fit.full <- mcqrnn.fit(x, y, tau=1000L, n.hidden=3, n.hidden2=3, 134 | n.trials=1, iter.max=300, eps.seq=1e-6, 135 | method="adam", minibatch=64, print.level=100) 136 | # Show how to initialize from previous weights 137 | fit.full <- mcqrnn.fit(x, y, tau=1000L, n.hidden=3, n.hidden2=3, 138 | n.trials=1, iter.max=300, eps.seq=1e-6, 139 | method="adam", minibatch=64, print.level=100, 140 | init.range=fit.full$weights) 141 | pred.full <- mcqrnn.predict(x, fit.full, tau=seq(0.1, 0.9, by=0.1)) 142 | 143 | par(mfrow=c(1, 2)) 144 | matplot(x, pred.mcqrnn, col="blue", type="l") 145 | points(x, y) 146 | matplot(x, pred.full, col="blue", type="l") 147 | points(x, y) 148 | 149 | } 150 | \references{ 151 | Cannon, A.J., 2011. Quantile regression neural networks: implementation 152 | in R and application to precipitation downscaling. Computers & Geosciences, 153 | 37: 1277-1284. doi:10.1016/j.cageo.2010.07.005 154 | 155 | Cannon, A.J., 2018. Non-crossing nonlinear regression quantiles by 156 | monotone composite quantile regression neural network, with application 157 | to rainfall extremes. Stochastic Environmental Research and Risk Assessment, 158 | 32(11): 3207-3225. doi:10.1007/s00477-018-1573-6 159 | 160 | Tagasovska, N., D. Lopez-Paz, 2019. Single-model uncertainties for deep 161 | learning. Advances in Neural Information Processing Systems, 32, 162 | NeurIPS 2019. doi:10.48550/arXiv.1811.00908 163 | } 164 | -------------------------------------------------------------------------------- /man/qrnn-package.Rd: -------------------------------------------------------------------------------- 1 | \name{qrnn-package} 2 | \alias{qrnn-package} 3 | \alias{qrnn} 4 | \docType{package} 5 | \title{Quantile Regression Neural Network} 6 | \encoding{UTF-8} 7 | \description{ 8 | This package implements the quantile regression neural network (QRNN) 9 | (Taylor, 2000; Cannon, 2011; Cannon, 2018), which is a flexible nonlinear form 10 | of quantile regression. While low level modelling functions are available, it is 11 | recommended that the \code{\link{mcqrnn.fit}} and \code{\link{mcqrnn.predict}} 12 | wrappers be used for most applications. More information is provided below. 13 | 14 | The goal of quantile regression is to estimate conditional quantiles of a 15 | response variable that depend on covariates in some form of regression equation. 16 | The QRNN adopts the multi-layer perceptron neural network architecture. The 17 | implementation follows from previous work on the estimation of censored 18 | regression quantiles, thus allowing predictions for mixed discrete-continuous 19 | variables like precipitation (Friederichs and Hense, 2007). A differentiable 20 | approximation to the quantile regression cost function is adopted so that a 21 | simplified form of the finite smoothing algorithm (Chen, 2007) can be used to 22 | estimate model parameters. This approximation can also be used to force the 23 | model to solve a standard least squares regression problem or an expectile 24 | regression problem (Cannon, 2018). Weight penalty regularization can be added 25 | to help avoid overfitting, and ensemble models with bootstrap aggregation are 26 | also provided. 27 | 28 | An optional monotone constraint can be invoked, which guarantees monotonic 29 | non-decreasing behaviour of model outputs with respect to specified covariates 30 | (Zhang, 1999). The input-hidden layer weight matrix can also be constrained 31 | so that model relationships are strictly additive (see \code{\link{gam.style}}; 32 | Cannon, 2018). Borrowing strength by using a composite model for multiple 33 | regression quantiles (Zou et al., 2008; Xu et al., 2017) is also possible 34 | (see \code{\link{composite.stack}}). Weights can be applied to individual cases 35 | (Jiang et al., 2012). 36 | 37 | Applying the monotone constraint in combination with the composite model allows 38 | one to simultaneously estimate multiple non-crossing quantiles (Cannon, 2018); 39 | the resulting monotone composite QRNN (MCQRNN) is provided by the 40 | \code{\link{mcqrnn.fit}} and \code{\link{mcqrnn.predict}} wrapper functions. 41 | Examples for \code{\link{qrnn.fit}} and \code{\link{qrnn2.fit}} show how the 42 | same functionality can be achieved using the low level 43 | \code{\link{composite.stack}} and fitting functions. 44 | 45 | QRNN models with a single layer of hidden nodes can be fitted using the 46 | \code{\link{qrnn.fit}} function. Predictions from a fitted model are made using 47 | the \code{\link{qrnn.predict}} function. The function \code{\link{gam.style}} 48 | can be used to visualize and investigate fitted covariate/response relationships 49 | from \code{\link{qrnn.fit}} (Plate et al., 2000). Note: a single hidden layer 50 | is usually sufficient for most modelling tasks. With added monotonicity 51 | constraints, a second hidden layer may sometimes be beneficial 52 | (Lang, 2005; Minin et al., 2010). QRNN models with two hidden layers are 53 | available using the \code{\link{qrnn2.fit}} and 54 | \code{\link{qrnn2.predict}} functions. For non-crossing quantiles, the 55 | \code{\link{mcqrnn.fit}} and \code{\link{mcqrnn.predict}} wrappers also allow 56 | models with one or two hidden layers to be fitted and predictions to be made 57 | from the fitted models. 58 | 59 | In general, \code{\link{mcqrnn.fit}} offers a convenient, single function for 60 | fitting multiple quantiles simultaneously. Note, however, that default 61 | settings in \code{\link{mcqrnn.fit}} and other model fitting functions are 62 | not optimized for general speed, memory efficiency, or accuracy and should be 63 | adjusted for a particular regression problem as needed. In particular, the 64 | approximation to the quantile regression cost function \code{eps.seq}, the 65 | number of trials \code{n.trials}, and number of iterations \code{iter.max} 66 | can all influence fitting speed (and accuracy), as can changing the 67 | optimization algorithm via \code{method}. Non-crossing quantiles are 68 | implemented by stacking multiple copies of the \code{x} and \code{y} data, 69 | one copy per value of \code{tau}. Depending on the dataset size, this can 70 | lead to large matrices being passed to the optimization routine. In the 71 | \code{\link{adam}} adaptive stochastic gradient descent method, the 72 | \code{minibatch} size can be adjusted to help offset this cost. Model complexity 73 | is determined via the number of hidden nodes, \code{n.hidden} and 74 | \code{n.hidden2}, as well as the optional weight penalty \code{penalty}; values 75 | of these hyperparameters are crucial to obtaining a well performing model. 76 | 77 | When using \code{\link{mcqrnn.fit}}, it is also possible to estimate the full 78 | quantile regression process by specifying a single integer value for \code{tau}. 79 | In this case, \code{tau} is the number of random samples used in the stochastic 80 | estimation. For more information, see Tagasovska and Lopez-Paz (2019). It may be 81 | necessary to restart the optimization multiple times from the previous weights 82 | and biases, in which case \code{init.range} can be set to the \code{weights} 83 | values from the previously completed optimization run. For large datasets, it is 84 | recommended that the \code{\link{adam}} method with an appropriate integer 85 | \code{tau} and \code{minibatch} size be used for optimization. 86 | 87 | If models for multiple quantiles have been fitted, for example by 88 | \code{\link{mcqrnn.fit}} or multiple calls to either \code{\link{qrnn.fit}} 89 | or \code{\link{qrnn2.fit}}, the (experimental) \code{\link{dquantile}} 90 | function and its companion functions are available to create proper 91 | probability density, distribution, and quantile functions 92 | (Quiñonero-Candela et al., 2006; Cannon, 2011). Alternative distribution, 93 | quantile, and random variate functions based on the Nadaraya-Watson estimator 94 | (Passow and Donner, 2020) are also available in \code{[p,q,r]quantile.nw}. 95 | These can be useful for assessing probabilistic calibration and evaluating 96 | model performance. 97 | 98 | Note: the user cannot easily change the output layer transfer function 99 | to be different than \code{hramp}, which provides either the identity function or a 100 | ramp function to accommodate optional left censoring. Some applications, for 101 | example fitting smoothed binary quantile regression models for a binary target 102 | variable (Kordas, 2006), require an alternative like the logistic sigmoid. 103 | While not straightforward, it is possible to change the output layer transfer 104 | function by switching off \code{scale.y} in the call to the fitting 105 | function and reassigning \code{hramp} and \code{hramp.prime} as follows: 106 | 107 | \preformatted{ 108 | library(qrnn) 109 | 110 | # Use the logistic sigmoid as the output layer transfer function 111 | To.logistic <- function(x, lower, eps) 0.5 + 0.5*tanh(x/2) 112 | environment(To.logistic) <- asNamespace("qrnn") 113 | assignInNamespace("hramp", To.logistic, ns="qrnn") 114 | 115 | # Change the derivative of the output layer transfer function 116 | To.logistic.prime <- function(x, lower, eps) 0.25/(cosh(x/2)^2) 117 | environment(To.logistic.prime) <- asNamespace("qrnn") 118 | assignInNamespace("hramp.prime", To.logistic.prime, ns="qrnn") 119 | 120 | } 121 | 122 | } 123 | \details{ 124 | \tabular{ll}{ 125 | Package: \tab qrnn\cr 126 | Type: \tab Package\cr 127 | License: \tab GPL-2\cr 128 | LazyLoad: \tab yes\cr 129 | } 130 | } 131 | \references{ 132 | Cannon, A.J., 2011. Quantile regression neural networks: implementation 133 | in R and application to precipitation downscaling. Computers & Geosciences, 134 | 37: 1277-1284. doi:10.1016/j.cageo.2010.07.005 135 | 136 | Cannon, A.J., 2018. Non-crossing nonlinear regression quantiles by 137 | monotone composite quantile regression neural network, with application 138 | to rainfall extremes. Stochastic Environmental Research and Risk Assessment, 139 | 32(11): 3207-3225. doi:10.1007/s00477-018-1573-6 140 | 141 | Chen, C., 2007. A finite smoothing algorithm for quantile regression. 142 | Journal of Computational and Graphical Statistics, 16: 136-164. 143 | 144 | Friederichs, P. and A. Hense, 2007. Statistical downscaling of extreme 145 | precipitation events using censored quantile regression. Monthly Weather 146 | Review, 135: 2365-2378. 147 | 148 | Jiang, X., J. Jiang, and X. Song, 2012. Oracle model selection for nonlinear 149 | models based on weighted composite quantile regression. Statistica Sinica, 150 | 22(4): 1479-1506. 151 | 152 | Kordas, G., 2006. Smoothed binary regression quantiles. Journal of Applied 153 | Econometrics, 21(3): 387-407. 154 | 155 | Lang, B., 2005. Monotonic multi-layer perceptron networks as universal 156 | approximators. International Conference on Artificial Neural Networks, 157 | Artificial Neural Networks: Formal Models and Their Applications-ICANN 2005, 158 | pp. 31-37. 159 | 160 | Minin, A., M. Velikova, B. Lang, and H. Daniels, 2010. Comparison of universal 161 | approximators incorporating partial monotonicity by structure. 162 | Neural Networks, 23(4): 471-475. 163 | 164 | Passow, C., R.V. Donner, 2020. Regression-based distribution mapping for 165 | bias correction of climate model outputs using linear quantile regression. 166 | Stochastic Environmental Research and Risk Assessment, 34: 87-102. 167 | 168 | Plate, T., J. Bert, J. Grace, and P. Band, 2000. Visualizing the function 169 | computed by a feedforward neural network. Neural Computation, 170 | 12(6): 1337-1354. 171 | 172 | Quiñonero-Candela, J., C. Rasmussen, F. Sinz, O. Bousquet, 173 | B. Scholkopf, 2006. Evaluating Predictive Uncertainty Challenge. 174 | Lecture Notes in Artificial Intelligence, 3944: 1-27. 175 | 176 | Tagasovska, N., D. Lopez-Paz, 2019. Single-model uncertainties for deep 177 | learning. Advances in Neural Information Processing Systems, 32, 178 | NeurIPS 2019. doi:10.48550/arXiv.1811.00908 179 | 180 | Taylor, J.W., 2000. A quantile regression neural network approach to 181 | estimating the conditional density of multiperiod returns. Journal of 182 | Forecasting, 19(4): 299-311. 183 | 184 | Xu, Q., K. Deng, C. Jiang, F. Sun, and X. Huang, 2017. Composite quantile 185 | regression neural network with applications. Expert Systems with Applications, 186 | 76, 129-139. 187 | 188 | Zhang, H. and Zhang, Z., 1999. Feedforward networks with monotone 189 | constraints. In: International Joint Conference on Neural Networks, 190 | vol. 3, p. 1820-1823. doi:10.1109/IJCNN.1999.832655 191 | 192 | Zou, H. and M. Yuan, 2008. Composite quantile regression and the oracle model 193 | selection theory. The Annals of Statistics, 1108-1126. 194 | } 195 | -------------------------------------------------------------------------------- /man/qrnn-rbf.Rd: -------------------------------------------------------------------------------- 1 | \name{qrnn.rbf} 2 | \alias{qrnn.rbf} 3 | \title{ 4 | Radial basis function kernel 5 | } 6 | \description{ 7 | Evaluate a kernel matrix based on the radial basis function kernel. Can 8 | be used in conjunction with \code{\link{qrnn.fit}} with \code{Th} set to 9 | \code{\link{linear}} and \code{penalty} set to a nonzero value for 10 | kernel quantile ridge regression. 11 | } 12 | \usage{ 13 | qrnn.rbf(x, x.basis, sigma) 14 | } 15 | \arguments{ 16 | \item{x}{ 17 | covariate matrix with number of rows equal to the number of samples and number of columns equal to the number of variables. 18 | } 19 | \item{x.basis}{ 20 | covariate matrix with number of rows equal to the number of basis functions and number of columns equal to the number of variables. 21 | } 22 | \item{sigma}{ 23 | kernel width 24 | } 25 | } 26 | \value{ 27 | kernel matrix with number of rows equal to the number of samples and number of columns equal to the number of basis functions. 28 | } 29 | \seealso{ 30 | \code{\link{qrnn.fit}} 31 | } 32 | \examples{ 33 | x <- as.matrix(iris[,"Petal.Length",drop=FALSE]) 34 | y <- as.matrix(iris[,"Petal.Width",drop=FALSE]) 35 | 36 | cases <- order(x) 37 | x <- x[cases,,drop=FALSE] 38 | y <- y[cases,,drop=FALSE] 39 | 40 | set.seed(1) 41 | kern <- qrnn.rbf(x, x.basis=x, sigma=1) 42 | 43 | parms <- qrnn.fit(x=kern, y=y, tau=0.5, penalty=0.1, 44 | Th=linear, Th.prime=linear.prime, 45 | iter.max=500, n.trials=1) 46 | p <- qrnn.predict(x=kern, parms=parms) 47 | 48 | matplot(x, cbind(y, p), type=c("p", "l"), pch=1, lwd=1) 49 | } 50 | -------------------------------------------------------------------------------- /man/qrnn.cost.Rd: -------------------------------------------------------------------------------- 1 | \name{qrnn.cost} 2 | \alias{qrnn.cost} 3 | \title{ 4 | Smooth approximation to the tilted absolute value cost function 5 | } 6 | \description{ 7 | Smooth approximation to the tilted absolute value cost function 8 | used to fit a QRNN model. Optional left censoring, monotone constraints, 9 | and additive constraints are supported. 10 | } 11 | \usage{ 12 | qrnn.cost(weights, x, y, n.hidden, w, tau, lower, monotone, 13 | additive, eps, Th, Th.prime, penalty, unpenalized) 14 | } 15 | \arguments{ 16 | \item{weights}{ 17 | weight vector of length returned by \code{\link{qrnn.initialize}}. 18 | } 19 | \item{x}{ 20 | covariate matrix with number of rows equal to the number of samples and number of columns equal to the number of variables. 21 | } 22 | \item{y}{ 23 | response column matrix with number of rows equal to the number of samples. 24 | } 25 | \item{n.hidden}{ 26 | number of hidden nodes in the QRNN model. 27 | } 28 | \item{w}{ 29 | vector of weights with length equal to the number of samples; 30 | \code{NULL} gives equal weight to each sample. 31 | } 32 | \item{tau}{ 33 | desired tau-quantile. 34 | } 35 | \item{lower}{ 36 | left censoring point. 37 | } 38 | \item{monotone}{ 39 | column indices of covariates for which the monotonicity constraint should hold. 40 | } 41 | \item{additive}{ 42 | force additive relationships. 43 | } 44 | \item{eps}{ 45 | epsilon value used in the approximation functions. 46 | } 47 | \item{Th}{ 48 | hidden layer transfer function; use \code{\link{sigmoid}}, \code{\link{elu}}, \code{\link{relu}}, \code{\link{lrelu}}, 49 | \code{\link{softplus}}, or other non-decreasing function for a nonlinear model and \code{\link{linear}} for a linear model. 50 | } 51 | \item{Th.prime}{ 52 | derivative of the hidden layer transfer function \code{Th}. 53 | } 54 | \item{penalty}{ 55 | weight penalty for weight decay regularization. 56 | } 57 | \item{unpenalized}{ 58 | column indices of covariates for which the weight penalty should not be applied to input-hidden layer weights. 59 | } 60 | } 61 | \value{ 62 | numeric value indicating tilted absolute value cost function, along with attribute containing vector with gradient information. 63 | } 64 | \seealso{ 65 | \code{\link{qrnn.fit}} 66 | } 67 | 68 | -------------------------------------------------------------------------------- /man/qrnn.fit.Rd: -------------------------------------------------------------------------------- 1 | \name{qrnn.fit} 2 | \alias{qrnn.fit} 3 | \title{ 4 | Main function used to fit a QRNN model or ensemble of QRNN models 5 | } 6 | \description{ 7 | Function used to fit a QRNN model or ensemble of QRNN models. 8 | } 9 | \details{ 10 | Fit a censored quantile regression neural network model for the 11 | \code{tau}-quantile by minimizing a cost function based on smooth 12 | Huber-norm approximations to the tilted absolute value and ramp functions. 13 | Left censoring can be turned on by setting \code{lower} to a value 14 | greater than \code{-Inf}. A simplified form of the finite smoothing 15 | algorithm, in which the \code{\link{nlm}} optimization algorithm 16 | is run with values of the \code{eps} approximation tolerance progressively 17 | reduced in magnitude over the sequence \code{eps.seq}, is used to set the 18 | QRNN weights and biases. Local minima of the cost function can be 19 | avoided by setting \code{n.trials}, which controls the number of 20 | repeated runs from different starting weights and biases, to a value 21 | greater than one. 22 | 23 | (Note: if \code{eps.seq} is set to a single, sufficiently large value and \code{tau} 24 | is set to \code{0.5}, then the result will be a standard least squares 25 | regression model. The same value of \code{eps.seq} and other values 26 | of \code{tau} leads to expectile regression.) 27 | 28 | If invoked, the \code{monotone} argument enforces non-decreasing behaviour 29 | between specified columns of \code{x} and model outputs. This holds if 30 | \code{Th} and \code{To} are monotone non-decreasing functions. In this case, 31 | the \code{exp} function is applied to the relevant weights following 32 | initialization and during optimization; manual adjustment of 33 | \code{init.weights} or \code{qrnn.initialize} may be needed due to 34 | differences in scaling of the constrained and unconstrained weights. 35 | Non-increasing behaviour can be forced by transforming the relevant 36 | covariates, e.g., by reversing sign. 37 | 38 | The \code{additive} argument sets relevant input-hidden layer weights 39 | to zero, resulting in a purely additive model. Interactions between covariates 40 | are thus suppressed, leading to a compromise in flexibility between 41 | linear quantile regression and the quantile regression neural network. 42 | 43 | Borrowing strength by using a composite model for multiple regression quantiles 44 | is also possible (see \code{\link{composite.stack}}). Applying the monotone 45 | constraint in combination with the composite model allows 46 | one to simultaneously estimate multiple non-crossing quantiles; 47 | the resulting monotone composite QRNN (MCQRNN) is demonstrated in 48 | \code{\link{mcqrnn}}. 49 | 50 | In the linear case, model complexity does not depend on the number 51 | of hidden nodes; the value of \code{n.hidden} is ignored and is instead 52 | set to one internally. In the nonlinear case, \code{n.hidden} 53 | controls the overall complexity of the model. As an added means of 54 | avoiding overfitting, weight penalty regularization for the magnitude 55 | of the input-hidden layer weights (excluding biases) can be applied 56 | by setting \code{penalty} to a nonzero value. (For the linear model, 57 | this penalizes both input-hidden and hidden-output layer weights, 58 | leading to a quantile ridge regression model. In this case, kernel 59 | quantile ridge regression can be performed with the aid of the 60 | \code{\link{qrnn.rbf}} function.) Finally, if the \code{bag} argument 61 | is set to \code{TRUE}, models are trained on bootstrapped \code{x} and 62 | \code{y} sample pairs; bootstrap aggregation (bagging) can be turned 63 | on by setting \code{n.ensemble} to a value greater than one. Averaging 64 | over an ensemble of bagged models will also tend to alleviate 65 | overfitting. 66 | 67 | The \code{\link{gam.style}} function can be used to plot modified 68 | generalized additive model effects plots, which are useful for visualizing 69 | the modelled covariate-response relationships. 70 | 71 | Note: values of \code{x} and \code{y} need not be standardized or 72 | rescaled by the user. All variables are automatically scaled to zero 73 | mean and unit standard deviation prior to fitting and parameters are 74 | automatically rescaled by \code{\link{qrnn.predict}} and other prediction 75 | functions. Values of \code{eps.seq} are relative to the residuals in 76 | standard deviation units. Note: scaling of \code{y} can be turned off using 77 | the \code{scale.y} argument. 78 | } 79 | \usage{ 80 | qrnn.fit(x, y, n.hidden, w=NULL, tau=0.5, n.ensemble=1, 81 | iter.max=5000, n.trials=5, bag=FALSE, lower=-Inf, 82 | init.range=c(-0.5, 0.5, -0.5, 0.5), monotone=NULL, 83 | additive=FALSE, eps.seq=2^seq(-8, -32, by=-4), 84 | Th=sigmoid, Th.prime=sigmoid.prime, penalty=0, 85 | unpenalized=NULL, n.errors.max=10, trace=TRUE, 86 | scale.y=TRUE, ...) 87 | } 88 | \arguments{ 89 | \item{x}{ 90 | covariate matrix with number of rows equal to the number of samples and number of columns equal to the number of variables. 91 | } 92 | \item{y}{ 93 | response column matrix with number of rows equal to the number of samples. 94 | } 95 | \item{n.hidden}{ 96 | number of hidden nodes in the QRNN model. 97 | } 98 | \item{w}{ 99 | vector of weights with length equal to the number of samples; 100 | \code{NULL} gives equal weight to each sample. 101 | } 102 | \item{tau}{ 103 | desired tau-quantile(s). 104 | } 105 | \item{n.ensemble}{ 106 | number of ensemble members to fit. 107 | } 108 | \item{iter.max}{ 109 | maximum number of iterations of the optimization algorithm. 110 | } 111 | \item{n.trials}{ 112 | number of repeated trials used to avoid local minima. 113 | } 114 | \item{bag}{ 115 | logical variable indicating whether or not bootstrap aggregation (bagging) should be used. 116 | } 117 | \item{lower}{ 118 | left censoring point. 119 | } 120 | \item{init.range}{ 121 | initial weight range for input-hidden and hidden-output weight matrices. 122 | } 123 | \item{monotone}{ 124 | column indices of covariates for which the monotonicity constraint should hold. 125 | } 126 | \item{additive}{ 127 | force additive relationships. 128 | } 129 | \item{eps.seq}{ 130 | sequence of \code{eps} values for the finite smoothing algorithm. 131 | } 132 | \item{Th}{ 133 | hidden layer transfer function; use \code{\link{sigmoid}}, \code{\link{elu}}, \code{\link{relu}}, \code{\link{lrelu}}, 134 | \code{\link{softplus}}, or other non-decreasing function for a nonlinear model and \code{\link{linear}} for a linear model. 135 | } 136 | \item{Th.prime}{ 137 | derivative of the hidden layer transfer function \code{Th}. 138 | } 139 | \item{penalty}{ 140 | weight penalty for weight decay regularization. 141 | } 142 | \item{unpenalized}{ 143 | column indices of covariates for which the weight penalty should not be applied to input-hidden layer weights. 144 | } 145 | \item{n.errors.max}{ 146 | maximum number of \code{nlm} optimization failures allowed before quitting. 147 | } 148 | \item{trace}{ 149 | logical variable indicating whether or not diagnostic messages are printed during optimization. 150 | } 151 | \item{scale.y}{ 152 | logical variable indicating whether \code{y} should be scaled to zero mean and unit standard deviation. 153 | } 154 | \item{\dots}{ 155 | additional parameters passed to the \code{\link{nlm}} optimization routine. 156 | } 157 | } 158 | \value{ 159 | a list containing elements 160 | \item{weights}{a list containing fitted weight matrices} 161 | \item{lower}{left censoring point} 162 | \item{eps.seq}{sequence of \code{eps} values for the finite smoothing algorithm} 163 | \item{tau}{desired tau-quantile(s)} 164 | \item{Th}{hidden layer transfer function} 165 | \item{x.center}{vector of column means for \code{x}} 166 | \item{x.scale}{vector of column standard deviations for \code{x}} 167 | \item{y.center}{vector of column means for \code{y}} 168 | \item{y.scale}{vector of column standard deviations for \code{y}} 169 | \item{monotone}{column indices indicating covariate monotonicity constraints.} 170 | \item{additive}{force additive relationships.} 171 | } 172 | \seealso{ 173 | \code{\link{qrnn.predict}}, \code{\link{qrnn.cost}}, \code{\link{composite.stack}}, \code{\link{mcqrnn}}, \code{\link{gam.style}} 174 | } 175 | \examples{ 176 | x <- as.matrix(iris[,"Petal.Length",drop=FALSE]) 177 | y <- as.matrix(iris[,"Petal.Width",drop=FALSE]) 178 | 179 | cases <- order(x) 180 | x <- x[cases,,drop=FALSE] 181 | y <- y[cases,,drop=FALSE] 182 | 183 | tau <- c(0.05, 0.5, 0.95) 184 | 185 | set.seed(1) 186 | 187 | ## QRNN models for conditional 5th, 50th, and 95th percentiles 188 | w <- p <- vector("list", length(tau)) 189 | for(i in seq_along(tau)){ 190 | w[[i]] <- qrnn.fit(x=x, y=y, n.hidden=3, tau=tau[i], 191 | iter.max=200, n.trials=1) 192 | p[[i]] <- qrnn.predict(x, w[[i]]) 193 | } 194 | 195 | ## Monotone composite QRNN (MCQRNN) for simultaneous estimation of 196 | ## multiple non-crossing quantile functions 197 | x.y.tau <- composite.stack(x, y, tau) 198 | fit.mcqrnn <- qrnn.fit(cbind(x.y.tau$tau, x.y.tau$x), x.y.tau$y, 199 | tau=x.y.tau$tau, n.hidden=3, n.trials=1, 200 | iter.max=500, monotone=1) 201 | pred.mcqrnn <- matrix(qrnn.predict(cbind(x.y.tau$tau, x.y.tau$x), 202 | fit.mcqrnn), ncol=length(tau)) 203 | 204 | par(mfrow=c(1, 2)) 205 | matplot(x, matrix(unlist(p), nrow=nrow(x), ncol=length(p)), col="red", 206 | type="l") 207 | points(x, y) 208 | matplot(x, pred.mcqrnn, col="blue", type="l") 209 | points(x, y) 210 | 211 | } 212 | \references{ 213 | Cannon, A.J., 2011. Quantile regression neural networks: implementation 214 | in R and application to precipitation downscaling. Computers & Geosciences, 215 | 37: 1277-1284. doi:10.1016/j.cageo.2010.07.005 216 | 217 | Cannon, A.J., 2018. Non-crossing nonlinear regression quantiles by 218 | monotone composite quantile regression neural network, with application 219 | to rainfall extremes. Stochastic Environmental Research and Risk Assessment, 220 | 32(11): 3207-3225. doi:10.1007/s00477-018-1573-6 221 | } 222 | -------------------------------------------------------------------------------- /man/qrnn.initialize.Rd: -------------------------------------------------------------------------------- 1 | \name{qrnn.initialize} 2 | \alias{qrnn.initialize} 3 | \title{ 4 | Initialize a QRNN weight vector 5 | } 6 | \description{ 7 | Random initialization of the weight vector used during fitting of a QRNN model. 8 | } 9 | \usage{ 10 | qrnn.initialize(x, y, n.hidden, init.range=c(-0.5, 0.5, -0.5, 0.5)) 11 | } 12 | \arguments{ 13 | \item{x}{ 14 | covariate matrix with number of rows equal to the number of samples and number of columns equal to the number of variables. 15 | } 16 | \item{y}{ 17 | response column matrix with number of rows equal to the number of samples. 18 | } 19 | \item{n.hidden}{ 20 | number of hidden nodes in the QRNN model. 21 | } 22 | \item{init.range}{ 23 | initial weight range for input-hidden and hidden-output weight matrices. 24 | } 25 | } 26 | 27 | -------------------------------------------------------------------------------- /man/qrnn.predict.Rd: -------------------------------------------------------------------------------- 1 | \name{qrnn.predict} 2 | \alias{qrnn.predict} 3 | \title{ 4 | Evaluate quantiles from trained QRNN model 5 | } 6 | \description{ 7 | Evaluate a fitted QRNN model or ensemble of models, resulting in a list 8 | containing the predicted quantiles. 9 | } 10 | \usage{ 11 | qrnn.predict(x, parms) 12 | } 13 | \arguments{ 14 | \item{x}{ 15 | covariate matrix with number of rows equal to the number of samples and number of columns equal to the number of variables. 16 | } 17 | \item{parms}{ 18 | list containing QRNN input-hidden and hidden-output layer weight matrices and other parameters from \code{\link{qrnn.fit}}. 19 | } 20 | } 21 | \value{ 22 | a list with number of elements equal to that of \code{parms}, each containing a column matrix of predicted quantiles. 23 | } 24 | \seealso{ 25 | \code{\link{qrnn.fit}} 26 | } 27 | \examples{ 28 | x <- as.matrix(iris[,"Petal.Length",drop=FALSE]) 29 | y <- as.matrix(iris[,"Petal.Width",drop=FALSE]) 30 | 31 | cases <- order(x) 32 | x <- x[cases,,drop=FALSE] 33 | y <- y[cases,,drop=FALSE] 34 | y[y < 0.5] <- 0.5 35 | 36 | set.seed(1) 37 | parms <- qrnn.fit(x=x, y=y, n.hidden=3, tau=0.5, lower=0.5, 38 | iter.max=500, n.trials=1) 39 | p <- qrnn.predict(x=x, parms=parms) 40 | 41 | matplot(x, cbind(y, p), type=c("p", "l"), pch=1, lwd=1) 42 | } 43 | 44 | -------------------------------------------------------------------------------- /man/qrnn2.Rd: -------------------------------------------------------------------------------- 1 | \name{qrnn2} 2 | \alias{qrnn2.fit} 3 | \alias{qrnn2.predict} 4 | \title{ 5 | Fit and make predictions from QRNN models with two hidden layers 6 | } 7 | \description{ 8 | Functions used to fit and make predictions from QRNN models with two hidden layers. 9 | Note: \code{Th} must be a non-decreasing function if \code{monotone != NULL}. 10 | } 11 | \usage{ 12 | qrnn2.fit(x, y, n.hidden=2, n.hidden2=2, w=NULL, tau=0.5, 13 | n.ensemble=1, iter.max=5000, n.trials=5, bag=FALSE, 14 | lower=-Inf, init.range=c(-0.5, 0.5, -0.5, 0.5, -0.5, 0.5), 15 | monotone=NULL, eps.seq=2^seq(-8, -32, by=-4), Th=sigmoid, 16 | Th.prime=sigmoid.prime, penalty=0, unpenalized=NULL, 17 | n.errors.max=10, trace=TRUE, method=c("nlm", "adam"), 18 | scale.y=TRUE, ...) 19 | qrnn2.predict(x, parms) 20 | } 21 | \arguments{ 22 | \item{x}{ 23 | covariate matrix with number of rows equal to the number of samples and number of columns equal to the number of variables. 24 | } 25 | \item{y}{ 26 | response column matrix with number of rows equal to the number of samples. 27 | } 28 | \item{n.hidden}{ 29 | number of hidden nodes in the first hidden layer. 30 | } 31 | \item{n.hidden2}{ 32 | number of hidden nodes in the second hidden layer. 33 | } 34 | \item{w}{ 35 | vector of weights with length equal to the number of samples; 36 | \code{NULL} gives equal weight to each sample. 37 | } 38 | \item{tau}{ 39 | desired tau-quantile(s). 40 | } 41 | \item{n.ensemble}{ 42 | number of ensemble members to fit. 43 | } 44 | \item{iter.max}{ 45 | maximum number of iterations of the optimization algorithm. 46 | } 47 | \item{n.trials}{ 48 | number of repeated trials used to avoid local minima. 49 | } 50 | \item{bag}{ 51 | logical variable indicating whether or not bootstrap aggregation (bagging) should be used. 52 | } 53 | \item{lower}{ 54 | left censoring point. 55 | } 56 | \item{init.range}{ 57 | initial weight range for input-hidden, hidden-hidden, and hidden-output weight matrices. 58 | } 59 | \item{monotone}{ 60 | column indices of covariates for which the monotonicity constraint should hold. 61 | } 62 | \item{eps.seq}{ 63 | sequence of \code{eps} values for the finite smoothing algorithm. 64 | } 65 | \item{Th}{ 66 | hidden layer transfer function; use \code{\link{sigmoid}}, \code{\link{elu}}, \code{\link{relu}}, \code{\link{lrelu}}, 67 | \code{\link{softplus}}, or other non-decreasing function for a nonlinear model and \code{\link{linear}} for a linear model. 68 | } 69 | \item{Th.prime}{ 70 | derivative of the hidden layer transfer function \code{Th}. 71 | } 72 | \item{penalty}{ 73 | weight penalty for weight decay regularization. 74 | } 75 | \item{unpenalized}{ 76 | column indices of covariates for which the weight penalty should not be applied to input-hidden layer weights. 77 | } 78 | \item{n.errors.max}{ 79 | maximum number of \code{nlm} optimization failures allowed before quitting. 80 | } 81 | \item{trace}{ 82 | logical variable indicating whether or not diagnostic messages are printed during optimization. 83 | } 84 | \item{method}{ 85 | character string indicating which optimization algorithm to use. 86 | } 87 | \item{scale.y}{ 88 | logical variable indicating whether \code{y} should be scaled to zero mean and unit standard deviation. 89 | } 90 | \item{\dots}{ 91 | additional parameters passed to the \code{\link{nlm}} or \code{\link{adam}} optimization routines. 92 | } 93 | \item{parms}{ 94 | list containing QRNN weight matrices and other parameters from \code{\link{qrnn2.fit}}. 95 | } 96 | } 97 | \seealso{ 98 | \code{\link{qrnn.fit}}, \code{\link{qrnn.predict}}, 99 | \code{\link{qrnn.cost}}, \code{\link{composite.stack}}, 100 | \code{\link{mcqrnn}}, \code{\link{adam}} 101 | } 102 | \examples{ 103 | x <- as.matrix(iris[,"Petal.Length",drop=FALSE]) 104 | y <- as.matrix(iris[,"Petal.Width",drop=FALSE]) 105 | 106 | cases <- order(x) 107 | x <- x[cases,,drop=FALSE] 108 | y <- y[cases,,drop=FALSE] 109 | 110 | tau <- c(0.05, 0.5, 0.95) 111 | 112 | set.seed(1) 113 | 114 | ## QRNN models w/ 2 hidden layers (tau=0.05, 0.50, 0.95) 115 | w <- p <- vector("list", length(tau)) 116 | for(i in seq_along(tau)){ 117 | w[[i]] <- qrnn2.fit(x=x, y=y, n.hidden=3, n.hidden2=3, 118 | tau=tau[i], iter.max=200, n.trials=1) 119 | p[[i]] <- qrnn2.predict(x, w[[i]]) 120 | } 121 | 122 | ## MCQRNN model w/ 2 hidden layers for simultaneous estimation of 123 | ## multiple non-crossing quantile functions 124 | x.y.tau <- composite.stack(x, y, tau) 125 | fit.mcqrnn <- qrnn2.fit(cbind(x.y.tau$tau, x.y.tau$x), x.y.tau$y, 126 | tau=x.y.tau$tau, n.hidden=3, n.hidden2=3, 127 | n.trials=1, iter.max=500, monotone=1) 128 | pred.mcqrnn <- matrix(qrnn2.predict(cbind(x.y.tau$tau, x.y.tau$x), 129 | fit.mcqrnn), ncol=length(tau)) 130 | 131 | par(mfrow=c(1, 2)) 132 | matplot(x, matrix(unlist(p), nrow=nrow(x), ncol=length(p)), col="red", 133 | type="l") 134 | points(x, y) 135 | matplot(x, pred.mcqrnn, col="blue", type="l") 136 | points(x, y) 137 | 138 | } 139 | \references{ 140 | Cannon, A.J., 2011. Quantile regression neural networks: implementation 141 | in R and application to precipitation downscaling. Computers & Geosciences, 142 | 37: 1277-1284. doi:10.1016/j.cageo.2010.07.005 143 | 144 | Cannon, A.J., 2018. Non-crossing nonlinear regression quantiles by 145 | monotone composite quantile regression neural network, with application 146 | to rainfall extremes. Stochastic Environmental Research and Risk Assessment, 147 | 32(11): 3207-3225. doi:10.1007/s00477-018-1573-6 148 | } 149 | -------------------------------------------------------------------------------- /man/quantile.dtn.Rd: -------------------------------------------------------------------------------- 1 | \name{quantile.dtn} 2 | \alias{dquantile} 3 | \alias{pquantile} 4 | \alias{qquantile} 5 | \alias{rquantile} 6 | \alias{pquantile.nw} 7 | \alias{qquantile.nw} 8 | \alias{rquantile.nw} 9 | \encoding{UTF-8} 10 | \title{ 11 | Interpolated quantile distribution with exponential tails and Nadaraya-Watson quantile distribution 12 | } 13 | \description{ 14 | \code{dquantile} gives a probability density function (pdf) by combining 15 | step-interpolation of probability densities for specified 16 | \code{tau}-quantiles (\code{quant}) with exponential lower/upper tails 17 | (Quiñonero-Candela, 2006; Cannon, 2011). Point mass (e.g., as might occur 18 | when using censored QRNN models) can be defined by setting \code{lower} to 19 | the left censoring point. \code{pquantile} gives the cumulative distribution 20 | function (cdf); the \code{\link{integrate}} function is used for values 21 | outside the range of \code{quant}. The inverse cdf is given by 22 | \code{qquantile}; the \code{\link{uniroot}} function is used for values 23 | outside the range of \code{tau}. \code{rquantile} is used for 24 | generating random variates. 25 | 26 | Alternative formulations (without left censoring) based on the 27 | Nadaraya-Watson estimator \code{[p,q,r]quantile.nw} are also provided 28 | (Passow and Donner, 2020). 29 | 30 | Note: these functions have not been extensively tested or optimized and 31 | should be considered experimental. 32 | 33 | } 34 | \usage{ 35 | dquantile(x, tau, quant, lower=-Inf) 36 | pquantile(q, tau, quant, lower=-Inf, ...) 37 | pquantile.nw(q, tau, quant, h=0.001, ...) 38 | qquantile(p, tau, quant, lower=-Inf, 39 | tol=.Machine$double.eps^0.25, maxiter=1000, 40 | range.mult=1.1, max.error=100, ...) 41 | qquantile.nw(p, tau, quant, h=0.001) 42 | rquantile(n, tau, quant, lower=-Inf, 43 | tol=.Machine$double.eps^0.25, maxiter=1000, 44 | range.mult=1.1, max.error=100, ...) 45 | rquantile.nw(n, tau, quant, h=0.001) 46 | } 47 | \arguments{ 48 | \item{x, q}{ 49 | vector of quantiles. 50 | } 51 | \item{p}{ 52 | vector of cumulative probabilities. 53 | } 54 | \item{n}{ 55 | number of random samples. 56 | } 57 | \item{tau}{ 58 | ordered vector of cumulative probabilities associated with \code{quant} argument. 59 | } 60 | \item{quant}{ 61 | ordered vector of quantiles associated with \code{tau} argument. 62 | } 63 | \item{lower}{ 64 | left censoring point. 65 | } 66 | \item{tol}{ 67 | tolerance passed to \code{\link{uniroot}}. 68 | } 69 | \item{h}{ 70 | bandwidth for Nadaraya-Watson kernel. 71 | } 72 | 73 | \item{maxiter}{ 74 | maximum number of iterations passed to \code{\link{uniroot}}. 75 | } 76 | \item{range.mult}{ 77 | values of \code{lower} and \code{upper} in \code{\link{uniroot}} are initialized to \cr \code{quant[1]-range.mult*diff(range(quant))} and \cr \code{quant[length(quant)]+range.mult*diff(range(quant))} respectively; \code{range.mult} is squared, \code{lower} and \code{upper} are recalculated, and \code{\link{uniroot}} is rerun if the current values lead to an exception. 78 | } 79 | \item{max.error}{ 80 | maximum number of \code{uniroot} errors allowed before termination. 81 | } 82 | \item{...}{ 83 | additional arguments passed to \code{\link{integrate}} or \code{\link{uniroot}}. 84 | } 85 | } 86 | \value{ 87 | \code{dquantile} gives the pdf, \code{pquantile} gives the cdf, 88 | \code{qquantile} gives the inverse cdf (or quantile function), and 89 | \code{rquantile} generates random deviates. 90 | } 91 | \references{ 92 | Cannon, A.J., 2011. Quantile regression neural networks: implementation 93 | in R and application to precipitation downscaling. Computers & Geosciences, 94 | 37: 1277-1284. doi:10.1016/j.cageo.2010.07.005 95 | 96 | Passow, C., R.V. Donner, 2020. Regression-based distribution mapping for 97 | bias correction of climate model outputs using linear quantile regression. 98 | Stochastic Environmental Research and Risk Assessment, 34:87-102. 99 | doi:10.1007/s00477-019-01750-7 100 | 101 | Quiñonero-Candela, J., C. Rasmussen, F. Sinz, O. Bousquet, 102 | B. Scholkopf, 2006. Evaluating Predictive Uncertainty Challenge. 103 | Lecture Notes in Artificial Intelligence, 3944: 1-27. 104 | } 105 | \seealso{ 106 | \code{\link{integrate}}, \code{\link{uniroot}}, \code{\link{qrnn.predict}} 107 | } 108 | \examples{ 109 | ## Normal distribution 110 | tau <- c(0.01, seq(0.05, 0.95, by=0.05), 0.99) 111 | quant <- qnorm(tau) 112 | 113 | x <- seq(-3, 3, length=500) 114 | plot(x, dnorm(x), type="l", col="red", lty=2, lwd=2, 115 | main="pdf") 116 | lines(x, dquantile(x, tau, quant), col="blue") 117 | 118 | q <- seq(-3, 3, length=20) 119 | plot(q, pnorm(q), type="b", col="red", lty=2, lwd=2, 120 | main="cdf") 121 | lines(q, pquantile(q, tau, quant), 122 | col="blue") 123 | 124 | abline(v=1.96, lty=2) 125 | abline(h=pnorm(1.96), lty=2) 126 | abline(h=pquantile(1.96, tau, quant), lty=3) 127 | abline(h=pquantile.nw(1.96, tau, quant, h=0.01), lty=3) 128 | 129 | p <- c(0.001, 0.01, 0.025, seq(0.05, 0.95, by=0.05), 130 | 0.975, 0.99, 0.999) 131 | plot(p, qnorm(p), type="b", col="red", lty=2, lwd=2, 132 | main="inverse cdf") 133 | lines(p, qquantile(p, tau, quant), col="blue") 134 | 135 | ## Distribution with point mass at zero 136 | tau.0 <- c(0.3, 0.5, 0.7, 0.8, 0.9) 137 | quant.0 <- c(0, 5, 7, 15, 20) 138 | 139 | r.0 <- rquantile(500, tau=tau.0, quant=quant.0, lower=0) 140 | x.0 <- seq(0, 40, by=0.5) 141 | d.0 <- dquantile(x.0, tau=tau.0, quant=quant.0, lower=0) 142 | p.0 <- pquantile(x.0, tau=tau.0, quant=quant.0, lower=0) 143 | q.0 <- qquantile(p.0, tau=tau.0, quant=quant.0, lower=0) 144 | 145 | par(mfrow=c(2, 2)) 146 | plot(r.0, pch=20, main="random") 147 | plot(x.0, d.0, type="b", col="red", main="pdf") 148 | plot(x.0, p.0, type="b", col="blue", ylim=c(0, 1), 149 | main="cdf") 150 | plot(p.0, q.0, type="b", col="green", xlim=c(0, 1), 151 | main="inverse cdf") 152 | } 153 | -------------------------------------------------------------------------------- /man/tilted.abs.Rd: -------------------------------------------------------------------------------- 1 | \name{tilted.abs} 2 | \alias{tilted.abs} 3 | \title{ 4 | Tilted absolute value function 5 | } 6 | \description{ 7 | Tilted absolute value function. Also known as the check function, hinge function, or the pinball loss function. 8 | } 9 | \usage{ 10 | tilted.abs(x, tau) 11 | } 12 | \arguments{ 13 | \item{x}{ 14 | numeric vector. 15 | } 16 | \item{tau}{ 17 | desired tau-quantile. 18 | } 19 | } 20 | \seealso{\code{\link{tilted.approx}}} 21 | \examples{ 22 | x <- seq(-2, 2, length=200) 23 | plot(x, tilted.abs(x, tau=0.75), type="l") 24 | } 25 | -------------------------------------------------------------------------------- /man/transfer.Rd: -------------------------------------------------------------------------------- 1 | \name{transfer} 2 | \alias{sigmoid} 3 | \alias{sigmoid.prime} 4 | \alias{elu} 5 | \alias{elu.prime} 6 | \alias{softplus} 7 | \alias{softplus.prime} 8 | \alias{logistic} 9 | \alias{logistic.prime} 10 | \alias{lrelu} 11 | \alias{lrelu.prime} 12 | \alias{relu} 13 | \alias{relu.prime} 14 | \alias{linear} 15 | \alias{linear.prime} 16 | \alias{softmax} 17 | \title{ 18 | Transfer functions and their derivatives 19 | } 20 | \description{ 21 | The \code{sigmoid}, exponential linear \code{elu}, \code{softplus}, 22 | \code{lrelu}, and \code{relu} functions can be used as the hidden layer 23 | transfer function for a nonlinear QRNN model. \code{sigmoid} is 24 | used by default. The \code{linear} function is used as the 25 | hidden layer transfer function for linear QRNN models. 26 | \code{sigmoid.prime}, \code{elu.prime}, \code{softplus.prime}, 27 | \code{lrelu.prime}, \code{relu.prime}, and \code{linear.prime} 28 | provide the corresponding derivatives. 29 | } 30 | \usage{ 31 | sigmoid(x) 32 | sigmoid.prime(x) 33 | elu(x, alpha=1) 34 | elu.prime(x, alpha=1) 35 | softplus(x, alpha=2) 36 | softplus.prime(x, alpha=2) 37 | logistic(x) 38 | logistic.prime(x) 39 | lrelu(x) 40 | lrelu.prime(x) 41 | relu(x) 42 | relu.prime(x) 43 | linear(x) 44 | linear.prime(x) 45 | } 46 | \arguments{ 47 | \item{x}{ 48 | numeric vector. 49 | } 50 | \item{alpha}{ 51 | transition parameter for \code{elu} and \code{softplus} functions. 52 | } 53 | } 54 | \examples{ 55 | x <- seq(-10, 10, length=100) 56 | plot(x, sigmoid(x), type="l", col="black", ylab="") 57 | lines(x, sigmoid.prime(x), lty=2, col="black") 58 | lines(x, elu(x), col="red") 59 | lines(x, elu.prime(x), lty=2, col="red") 60 | lines(x, softplus(x), col="blue") 61 | lines(x, softplus.prime(x), lty=2, col="blue") 62 | lines(x, logistic(x), col="brown") 63 | lines(x, logistic.prime(x), lty=2, col="brown") 64 | lines(x, lrelu(x), col="orange") 65 | lines(x, lrelu.prime(x), lty=2, col="orange") 66 | lines(x, relu(x), col="pink") 67 | lines(x, relu.prime(x), lty=2, col="pink") 68 | lines(x, linear(x), col="green") 69 | lines(x, linear.prime(x), lty=2, col="green") 70 | } 71 | --------------------------------------------------------------------------------