├── .gitattributes ├── .gitignore ├── tests ├── testthat.R └── testthat │ ├── test_multiclass.R │ └── test_neuralnet.R ├── .Rbuildignore ├── NAMESPACE ├── NEWS ├── man ├── compute.Rd ├── predict.nn.Rd ├── prediction.Rd ├── confidence.interval.Rd ├── gwplot.Rd ├── plot.nn.Rd ├── neuralnet-package.Rd └── neuralnet.Rd ├── README.md ├── .travis.yml ├── appveyor.yml ├── R ├── compute.r ├── convert_functions.R ├── print.R ├── predict.R ├── neuralnet-package.R ├── prediction.r ├── gwplot.r ├── plot.nn.r ├── confidence.interval.r ├── neuralnet.r └── fit_neuralnet.R └── DESCRIPTION /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | data/* binary 3 | src/* text=lf 4 | R/* text=lf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | *.Rproj 6 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(neuralnet) 3 | 4 | test_check("neuralnet") -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^appveyor\.yml$ 4 | ^\.travis\.yml$ 5 | ^README\.md$ -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(plot,nn) 4 | S3method(predict,nn) 5 | export(compute) 6 | export(confidence.interval) 7 | export(gwplot) 8 | export(neuralnet) 9 | export(prediction) 10 | import(stats) 11 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | 2 | ##### Version 1.44.6 3 | * Bug fixes 4 | 5 | ##### Version 1.44.5 6 | * Allow prediction with NA weights (set to 0 as in old version) 7 | * Fix prediction for networks grown with older versions 8 | 9 | ##### Version 1.44.4 10 | * Add ReLu activation function 11 | 12 | ##### Version 1.44.3 13 | * Bug fixes 14 | 15 | ##### Version 1.44.2 16 | * Make compute() return the same as in previous versions 17 | * Soft-deprecate compute() for now 18 | 19 | ##### Version 1.44.1 20 | * Add support for dots in formula 21 | * Allow direct multiclass outcomes with factors 22 | 23 | ##### Version 1.44.0 24 | * New maintainer: Marvin N.Wright 25 | * Add a predict() function to replace compute() 26 | * Use Deriv package for symbolic derivation 27 | * Many internal changes to improve code quality 28 | * Bug fixes -------------------------------------------------------------------------------- /man/compute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compute.r 3 | \name{compute} 4 | \alias{compute} 5 | \title{Deprecated function} 6 | \usage{ 7 | compute(x, covariate, rep = 1) 8 | } 9 | \arguments{ 10 | \item{x}{an object of class \code{nn}.} 11 | 12 | \item{covariate}{a dataframe or matrix containing the variables that had 13 | been used to train the neural network.} 14 | 15 | \item{rep}{an integer indicating the neural network's repetition which 16 | should be used.} 17 | } 18 | \value{ 19 | \code{compute} returns a list containing the following components: 20 | \item{neurons}{a list of the neurons' output for each layer of the neural 21 | network.} \item{net.result}{a matrix containing the overall result of the 22 | neural network.} 23 | } 24 | \description{ 25 | The function \code{compute} is deprecated. Please refer to the new function \code{\link{predict.nn}}. 26 | } 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Travis Build Status](https://travis-ci.org/bips-hb/neuralnet.svg?branch=master)](https://travis-ci.org/bips-hb/neuralnet) 2 | [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/bips-hb/neuralnet?branch=master&svg=true)](https://ci.appveyor.com/project/mnwright/neuralnet) 3 | ![CRAN Downloads month](http://cranlogs.r-pkg.org/badges/neuralnet?color=brightgreen) 4 | ![CRAN Downloads overall](http://cranlogs.r-pkg.org/badges/grand-total/neuralnet?color=brightgreen) 5 | # neuralnet 6 | Training of Neural Networks 7 | 8 | The **neuralnet** package is now on GitHub! New maintainer will be [@mnwright](https://github.com/mnwright). 9 | 10 | Please note that **neuralnet** will not turn into a super fast deep learning framework. We will continue to focus on ease of use and flexibility. 11 | 12 | Please post any issue you have with the package here on GitHub. Code contributions are also very welcome! 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | cache: packages 3 | warnings_are_errors: true 4 | r: 5 | - oldrel 6 | - release 7 | - devel 8 | 9 | r_github_packages: 10 | - r-lib/covr 11 | 12 | after_success: 13 | - Rscript -e 'covr::coveralls()' 14 | 15 | notifications: 16 | email: 17 | recipients: 18 | secure: qh6JWWLG+P2D9MstM/7zk9C1eZEehdgMA3qIRMfa+gW02YPMXJK7+I1CTPPvVzVQP7iQJKavC+LWoYKEPWdCyu0PzbzDI50PBArPbsOHNF0ZKGA+z0bFsRlCo/BzStdSO/bg3m60+zo6BCPSpPWqGWsnED4Wb2YGf20M7TLUeKZ2htmcIgQx5VmfOPTZG0lh8u3/c4SFM9jF7jxjuV3QI8C1gnvFfidic24OJH91NvIajIttWsFU7bSRLM0eKbutWFnuqPRDjVIw62zQsigXJjy3wo6Yo2CY41X95wAu88AOgO14i4GyRM6Hv7juRLJJfuts8OFUMtG8btzemx2fBx81YUeG1QRD1Puxax61elDDJtNALEghzAscAnguUoTUCdUpw9ras1nIf8R+HDErSZd9ZEJ0QpGBGJBbl5pe/4V2XWo9EPhvQe8pAO+3iXIsFnEcItxBBwe0tJqCnW+pdTd3N99szdCKP6HyuV+8SJqy7tgilBFJZpC3SGQ1ynP4FRSF59ekLUtAGlm1RYGnUTMtVAWHZVQOMmLKbZfPW3JqUwVuaMCzG7XOFEgCtpT1VmIcggvyeGMMCdXPlW4Ns42tPuSEEcxo7kmVo7v5SDIWdGu6Znqbrs9UG17GqL7oLehQsGnrbGMIuLu3CNW+5/6m7YrO45lsi4nqHaqEW8Y= 19 | on_success: change 20 | on_failure: change 21 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # DO NOT CHANGE the "init" and "install" sections below 2 | 3 | # Download script file from GitHub 4 | init: 5 | ps: | 6 | $ErrorActionPreference = "Stop" 7 | Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" 8 | Import-Module '..\appveyor-tool.ps1' 9 | 10 | install: 11 | ps: Bootstrap 12 | 13 | cache: 14 | - C:\RLibrary 15 | 16 | # Adapt as necessary starting from here 17 | 18 | build_script: 19 | - travis-tool.sh install_deps 20 | 21 | test_script: 22 | - travis-tool.sh run_tests 23 | 24 | on_failure: 25 | - 7z a failure.zip *.Rcheck\* 26 | - appveyor PushArtifact failure.zip 27 | 28 | artifacts: 29 | - path: '*.Rcheck\**\*.log' 30 | name: Logs 31 | 32 | - path: '*.Rcheck\**\*.out' 33 | name: Logs 34 | 35 | - path: '*.Rcheck\**\*.fail' 36 | name: Logs 37 | 38 | - path: '*.Rcheck\**\*.Rout' 39 | name: Logs 40 | 41 | - path: '\*_*.tar.gz' 42 | name: Bits 43 | 44 | - path: '\*_*.zip' 45 | name: Bits 46 | -------------------------------------------------------------------------------- /R/compute.r: -------------------------------------------------------------------------------- 1 | 2 | #' Deprecated function 3 | #' 4 | #' The function \code{compute} is deprecated. Please refer to the new function \code{\link{predict.nn}}. 5 | #' 6 | #' @param x an object of class \code{nn}. 7 | #' @param covariate a dataframe or matrix containing the variables that had 8 | #' been used to train the neural network. 9 | #' @param rep an integer indicating the neural network's repetition which 10 | #' should be used. 11 | #' @return \code{compute} returns a list containing the following components: 12 | #' \item{neurons}{a list of the neurons' output for each layer of the neural 13 | #' network.} \item{net.result}{a matrix containing the overall result of the 14 | #' neural network.} 15 | #' 16 | #' @export 17 | compute <- function(x, covariate, rep = 1) { 18 | #.Deprecated("predict", package = "neuralnet") 19 | pred <- predict.nn(x, newdata = covariate, rep = rep, all.units = TRUE) 20 | 21 | # Create old format by adding intercept 22 | for (i in 1:(length(pred) - 1)) { 23 | pred[[i]] <- cbind(1, pred[[i]]) 24 | } 25 | 26 | list(neurons = pred[-length(pred)], net.result = pred[[length(pred)]]) 27 | } 28 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: neuralnet 2 | Type: Package 3 | Title: Training of Neural Networks 4 | Version: 1.44.6 5 | Date: 2019-04-17 6 | Authors@R: c( 7 | person(given="Stefan", family="Fritsch", role="aut"), 8 | person(given="Frauke", family="Guenther", role=c("aut")), 9 | person(given="Marvin N.", family="Wright", role=c("aut","cre"),email="wright@leibniz-bips.de"), 10 | person(given="Marc", family="Suling", role="ctb"), 11 | person(given="Sebastian M.", family="Mueller", role = "ctb")) 12 | Depends: R (>= 2.9.0) 13 | Imports: grid, MASS, grDevices, stats, utils, Deriv 14 | Description: Training of neural networks using backpropagation, 15 | resilient backpropagation with (Riedmiller, 1994) or without 16 | weight backtracking (Riedmiller and Braun, 1993) or the 17 | modified globally convergent version by Anastasiadis et al. 18 | (2005). The package allows flexible settings through 19 | custom-choice of error and activation function. Furthermore, 20 | the calculation of generalized weights (Intrator O & Intrator 21 | N, 1993) is implemented. 22 | License: GPL (>= 2) 23 | Suggests: testthat 24 | URL: https://github.com/bips-hb/neuralnet 25 | BugReports: https://github.com/bips-hb/neuralnet/issues 26 | RoxygenNote: 6.1.0 27 | -------------------------------------------------------------------------------- /R/convert_functions.R: -------------------------------------------------------------------------------- 1 | # Convert named activation functions in R functions, including derivatives 2 | convert.activation.function <- function(fun) { 3 | if (fun == "tanh") { 4 | fct <- function(x) { 5 | tanh(x) 6 | } 7 | attr(fct, "type") <- "tanh" 8 | deriv.fct <- function(x) { 9 | 1 - x^2 10 | } 11 | } else if (fun == "logistic") { 12 | fct <- function(x) { 13 | 1/(1 + exp(-x)) 14 | } 15 | attr(fct, "type") <- "logistic" 16 | deriv.fct <- function(x) { 17 | x * (1 - x) 18 | } 19 | } else if (fun == "relu" || fun == "ReLu") { 20 | fct <- function(x) { 21 | x * (x > 0) 22 | } 23 | attr(fct, "type") <- "relu" 24 | deriv.fct <- function(x) { 25 | 1 * (x > 0) 26 | } 27 | } else { 28 | stop("Unknown function.", call. = FALSE) 29 | } 30 | list(fct = fct, deriv.fct = deriv.fct) 31 | } 32 | 33 | # Convert named error functions in R functions, including derivatives 34 | convert.error.function <- function(fun) { 35 | if (fun == "sse") { 36 | fct <- function(x, y) { 37 | 1/2 * (y - x)^2 38 | } 39 | attr(fct, "type") <- "sse" 40 | deriv.fct <- function(x, y) { 41 | x - y 42 | } 43 | } else if (fun == "ce") { 44 | fct <- function(x, y) { 45 | -(y * log(x) + (1 - y) * log(1 - x)) 46 | } 47 | attr(fct, "type") <- "ce" 48 | deriv.fct <- function(x, y) { 49 | (1 - y)/(1 - x) - y/x 50 | } 51 | } else { 52 | stop("Unknown function.", call. = FALSE) 53 | } 54 | list(fct = fct, deriv.fct = deriv.fct) 55 | } -------------------------------------------------------------------------------- /man/predict.nn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.R 3 | \name{predict.nn} 4 | \alias{predict.nn} 5 | \title{Neural network prediction} 6 | \usage{ 7 | \method{predict}{nn}(object, newdata, rep = 1, all.units = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{object}{Neural network of class \code{nn}.} 11 | 12 | \item{newdata}{New data of class \code{data.frame} or \code{matrix}.} 13 | 14 | \item{rep}{Integer indicating the neural network's repetition which should be used.} 15 | 16 | \item{all.units}{Return output for all units instead of final output only.} 17 | 18 | \item{...}{further arguments passed to or from other methods.} 19 | } 20 | \value{ 21 | Matrix of predictions. Each column represents one output unit. 22 | If \code{all.units=TRUE}, a list of matrices with output for each unit. 23 | } 24 | \description{ 25 | Prediction of artificial neural network of class \code{nn}, produced by \code{neuralnet()}. 26 | } 27 | \examples{ 28 | library(neuralnet) 29 | 30 | # Split data 31 | train_idx <- sample(nrow(iris), 2/3 * nrow(iris)) 32 | iris_train <- iris[train_idx, ] 33 | iris_test <- iris[-train_idx, ] 34 | 35 | # Binary classification 36 | nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, iris_train, linear.output = FALSE) 37 | pred <- predict(nn, iris_test) 38 | table(iris_test$Species == "setosa", pred[, 1] > 0.5) 39 | 40 | # Multiclass classification 41 | nn <- neuralnet((Species == "setosa") + (Species == "versicolor") + (Species == "virginica") 42 | ~ Petal.Length + Petal.Width, iris_train, linear.output = FALSE) 43 | pred <- predict(nn, iris_test) 44 | table(iris_test$Species, apply(pred, 1, which.max)) 45 | 46 | } 47 | \author{ 48 | Marvin N. Wright 49 | } 50 | -------------------------------------------------------------------------------- /man/prediction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prediction.r 3 | \name{prediction} 4 | \alias{prediction} 5 | \title{Summarizes the output of the neural network, the data and the fitted values 6 | of glm objects (if available)} 7 | \usage{ 8 | prediction(x, list.glm = NULL) 9 | } 10 | \arguments{ 11 | \item{x}{neural network} 12 | 13 | \item{list.glm}{an optional list of glm objects} 14 | } 15 | \value{ 16 | a list of the summaries of the repetitions of the neural networks, 17 | the data and the glm objects (if available). 18 | } 19 | \description{ 20 | \code{prediction}, a method for objects of class \code{nn}, typically 21 | produced by \code{neuralnet}. In a first step, the dataframe will be 22 | amended by a mean response, the mean of all responses corresponding to the 23 | same covariate-vector. The calculated data.error is the error function 24 | between the original response and the new mean response. In a second step, 25 | all duplicate rows will be erased to get a quick overview of the data. To 26 | obtain an overview of the results of the neural network and the glm objects, 27 | the covariate matrix will be bound to the output of the neural network and 28 | the fitted values of the glm object(if available) and will be reduced by all 29 | duplicate rows. 30 | } 31 | \examples{ 32 | 33 | Var1 <- rpois(100,0.5) 34 | Var2 <- rbinom(100,2,0.6) 35 | Var3 <- rbinom(100,1,0.5) 36 | SUM <- as.integer(abs(Var1+Var2+Var3+(rnorm(100)))) 37 | sum.data <- data.frame(Var1,Var2,Var3, SUM) 38 | print(net.sum <- neuralnet( SUM~Var1+Var2+Var3, sum.data, hidden=1, 39 | act.fct="tanh")) 40 | main <- glm(SUM~Var1+Var2+Var3, sum.data, family=poisson()) 41 | full <- glm(SUM~Var1*Var2*Var3, sum.data, family=poisson()) 42 | prediction(net.sum, list.glm=list(main=main, full=full)) 43 | 44 | } 45 | \seealso{ 46 | \code{\link{neuralnet}} 47 | } 48 | \author{ 49 | Stefan Fritsch, Frauke Guenther \email{guenther@leibniz-bips.de} 50 | } 51 | \keyword{neural} 52 | -------------------------------------------------------------------------------- /man/confidence.interval.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/confidence.interval.r 3 | \name{confidence.interval} 4 | \alias{confidence.interval} 5 | \title{Calculates confidence intervals of the weights} 6 | \usage{ 7 | confidence.interval(x, alpha = 0.05) 8 | } 9 | \arguments{ 10 | \item{x}{neural network} 11 | 12 | \item{alpha}{numerical. Sets the confidence level to (1-alpha).} 13 | } 14 | \value{ 15 | \code{confidence.interval} returns a list containing the following 16 | components: 17 | 18 | \item{ lower.ci }{a list containing the lower confidence bounds of all 19 | weights of the neural network differentiated by the repetitions.} \item{ 20 | upper.ci }{a list containing the upper confidence bounds of all weights of 21 | the neural network differentiated by the repetitions.} \item{ nic }{a vector 22 | containg the information criteria NIC for every repetition.} 23 | } 24 | \description{ 25 | \code{confidence.interval}, a method for objects of class \code{nn}, 26 | typically produced by \code{neuralnet}. Calculates confidence intervals of 27 | the weights (White, 1989) and the network information criteria NIC (Murata 28 | et al. 1994). All confidence intervals are calculated under the assumption 29 | of a local identification of the given neural network. If this assumption 30 | is violated, the results will not be reasonable. Please make also sure that 31 | the chosen error function equals the negative log-likelihood function, 32 | otherwise the results are not meaningfull, too. 33 | } 34 | \examples{ 35 | 36 | data(infert, package="datasets") 37 | print(net.infert <- neuralnet(case~parity+induced+spontaneous, 38 | infert, err.fct="ce", linear.output=FALSE)) 39 | confidence.interval(net.infert) 40 | 41 | } 42 | \references{ 43 | White (1989) \emph{Learning in artificial neural networks. A 44 | statistical perspective.} Neural Computation (1), pages 425-464 45 | 46 | Murata et al. (1994) \emph{Network information criterion - determining the 47 | number of hidden units for an artificial neural network model.} IEEE 48 | Transactions on Neural Networks 5 (6), pages 865-871 49 | } 50 | \seealso{ 51 | \code{\link{neuralnet}} 52 | } 53 | \author{ 54 | Stefan Fritsch, Frauke Guenther \email{guenther@leibniz-bips.de} 55 | } 56 | \keyword{neural} 57 | -------------------------------------------------------------------------------- /R/print.R: -------------------------------------------------------------------------------- 1 | print.nn <- 2 | function (x, ...) 3 | { 4 | matrix <- x$result.matrix 5 | cat("Call: ", deparse(x$call), "\n\n", sep = "") 6 | if (!is.null(matrix)) { 7 | if (ncol(matrix) > 1) { 8 | cat(ncol(matrix), " repetitions were calculated.\n\n", 9 | sep = "") 10 | sorted.matrix <- matrix[, order(matrix["error", ])] 11 | if (any(rownames(sorted.matrix) == "aic")) { 12 | print(t(rbind(Error = sorted.matrix["error", 13 | ], AIC = sorted.matrix["aic", ], BIC = sorted.matrix["bic", 14 | ], `Reached Threshold` = sorted.matrix["reached.threshold", 15 | ], Steps = sorted.matrix["steps", ]))) 16 | } 17 | else { 18 | print(t(rbind(Error = sorted.matrix["error", 19 | ], `Reached Threshold` = sorted.matrix["reached.threshold", 20 | ], Steps = sorted.matrix["steps", ]))) 21 | } 22 | } 23 | else { 24 | cat(ncol(matrix), " repetition was calculated.\n\n", 25 | sep = "") 26 | if (any(rownames(matrix) == "aic")) { 27 | print(t(matrix(c(matrix["error", ], matrix["aic", 28 | ], matrix["bic", ], matrix["reached.threshold", 29 | ], matrix["steps", ]), dimnames = list(c("Error", 30 | "AIC", "BIC", "Reached Threshold", "Steps"), 31 | c(1))))) 32 | } 33 | else { 34 | print(t(matrix(c(matrix["error", ], matrix["reached.threshold", 35 | ], matrix["steps", ]), dimnames = list(c("Error", 36 | "Reached Threshold", "Steps"), c(1))))) 37 | } 38 | } 39 | } 40 | cat("\n") 41 | } -------------------------------------------------------------------------------- /man/gwplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gwplot.r 3 | \name{gwplot} 4 | \alias{gwplot} 5 | \title{Plot method for generalized weights} 6 | \usage{ 7 | gwplot(x, rep = NULL, max = NULL, min = NULL, file = NULL, 8 | selected.covariate = 1, selected.response = 1, highlight = FALSE, 9 | type = "p", col = "black", ...) 10 | } 11 | \arguments{ 12 | \item{x}{an object of class \code{nn}} 13 | 14 | \item{rep}{an integer indicating the repetition to plot. If rep="best", the 15 | repetition with the smallest error will be plotted. If not stated all 16 | repetitions will be plotted.} 17 | 18 | \item{max}{maximum of the y axis. In default, max is set to the highest 19 | y-value.} 20 | 21 | \item{min}{minimum of the y axis. In default, min is set to the smallest 22 | y-value.} 23 | 24 | \item{file}{a character string naming the plot to write to. If not stated, 25 | the plot will not be saved.} 26 | 27 | \item{selected.covariate}{either a string of the covariate's name or an 28 | integer of the ordered covariates, indicating the reference covariate in the 29 | generalized weights plot. Defaulting to the first covariate.} 30 | 31 | \item{selected.response}{either a string of the response variable's name or 32 | an integer of the ordered response variables, indicating the reference 33 | response in the generalized weights plot. Defaulting to the first response 34 | variable.} 35 | 36 | \item{highlight}{a logical value, indicating whether to highlight (red 37 | color) the best repetition (smallest error). Only reasonable if rep=NULL. 38 | Default is FALSE} 39 | 40 | \item{type}{a character indicating the type of plotting; actually any of the 41 | types as in \code{\link{plot.default}}.} 42 | 43 | \item{col}{a color of the generalized weights.} 44 | 45 | \item{\dots}{Arguments to be passed to methods, such as graphical parameters 46 | (see \code{\link{par}}).} 47 | } 48 | \description{ 49 | \code{gwplot}, a method for objects of class \code{nn}, typically produced 50 | by \code{neuralnet}. Plots the generalized weights (Intrator and Intrator, 51 | 1993) for one specific covariate and one response variable. 52 | } 53 | \examples{ 54 | 55 | data(infert, package="datasets") 56 | print(net.infert <- neuralnet(case~parity+induced+spontaneous, infert, 57 | err.fct="ce", linear.output=FALSE, likelihood=TRUE)) 58 | gwplot(net.infert, selected.covariate="parity") 59 | gwplot(net.infert, selected.covariate="induced") 60 | gwplot(net.infert, selected.covariate="spontaneous") 61 | 62 | } 63 | \references{ 64 | Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for 65 | Interpretation of Nonlinear Models.} Proceedings of the Statistical 66 | Computing Section, 244-249 San Francisco: American Statistical Society 67 | (eds.) 68 | } 69 | \seealso{ 70 | \code{\link{neuralnet}} 71 | } 72 | \author{ 73 | Stefan Fritsch, Frauke Guenther \email{guenther@leibniz-bips.de} 74 | } 75 | \keyword{neural} 76 | -------------------------------------------------------------------------------- /man/plot.nn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.nn.r 3 | \name{plot.nn} 4 | \alias{plot.nn} 5 | \title{Plot method for neural networks} 6 | \usage{ 7 | \method{plot}{nn}(x, rep = NULL, x.entry = NULL, x.out = NULL, 8 | radius = 0.15, arrow.length = 0.2, intercept = TRUE, 9 | intercept.factor = 0.4, information = TRUE, information.pos = 0.1, 10 | col.entry.synapse = "black", col.entry = "black", 11 | col.hidden = "black", col.hidden.synapse = "black", 12 | col.out = "black", col.out.synapse = "black", 13 | col.intercept = "blue", fontsize = 12, dimension = 6, 14 | show.weights = TRUE, file = NULL, ...) 15 | } 16 | \arguments{ 17 | \item{x}{an object of class \code{nn}} 18 | 19 | \item{rep}{repetition of the neural network. If rep="best", the repetition 20 | with the smallest error will be plotted. If not stated all repetitions will 21 | be plotted, each in a separate window.} 22 | 23 | \item{x.entry}{x-coordinate of the entry layer. Depends on the arrow.length 24 | in default.} 25 | 26 | \item{x.out}{x-coordinate of the output layer.} 27 | 28 | \item{radius}{radius of the neurons.} 29 | 30 | \item{arrow.length}{length of the entry and out arrows.} 31 | 32 | \item{intercept}{a logical value indicating whether to plot the intercept.} 33 | 34 | \item{intercept.factor}{x-position factor of the intercept. The closer the 35 | factor is to 0, the closer the intercept is to its left neuron.} 36 | 37 | \item{information}{a logical value indicating whether to add the error and 38 | steps to the plot.} 39 | 40 | \item{information.pos}{y-position of the information.} 41 | 42 | \item{col.entry.synapse}{color of the synapses leading to the input neurons.} 43 | 44 | \item{col.entry}{color of the input neurons.} 45 | 46 | \item{col.hidden}{color of the neurons in the hidden layer.} 47 | 48 | \item{col.hidden.synapse}{color of the weighted synapses.} 49 | 50 | \item{col.out}{color of the output neurons.} 51 | 52 | \item{col.out.synapse}{color of the synapses leading away from the output 53 | neurons.} 54 | 55 | \item{col.intercept}{color of the intercept.} 56 | 57 | \item{fontsize}{fontsize of the text.} 58 | 59 | \item{dimension}{size of the plot in inches.} 60 | 61 | \item{show.weights}{a logical value indicating whether to print the 62 | calculated weights above the synapses.} 63 | 64 | \item{file}{a character string naming the plot to write to. If not stated, 65 | the plot will not be saved.} 66 | 67 | \item{\dots}{arguments to be passed to methods, such as graphical parameters 68 | (see \code{\link{par}}).} 69 | } 70 | \description{ 71 | \code{plot.nn}, a method for the \code{plot} generic. It is designed for an 72 | inspection of the weights for objects of class \code{nn}, typically produced 73 | by \code{neuralnet}. 74 | } 75 | \examples{ 76 | 77 | XOR <- c(0,1,1,0) 78 | xor.data <- data.frame(expand.grid(c(0,1), c(0,1)), XOR) 79 | print(net.xor <- neuralnet( XOR~Var1+Var2, xor.data, hidden=2, rep=5)) 80 | plot(net.xor, rep="best") 81 | 82 | } 83 | \seealso{ 84 | \code{\link{neuralnet}} 85 | } 86 | \author{ 87 | Stefan Fritsch, Frauke Guenther \email{guenther@leibniz-bips.de} 88 | } 89 | \keyword{neural} 90 | -------------------------------------------------------------------------------- /R/predict.R: -------------------------------------------------------------------------------- 1 | 2 | #' Neural network prediction 3 | #' 4 | #' Prediction of artificial neural network of class \code{nn}, produced by \code{neuralnet()}. 5 | #' 6 | #' @param object Neural network of class \code{nn}. 7 | #' @param newdata New data of class \code{data.frame} or \code{matrix}. 8 | #' @param rep Integer indicating the neural network's repetition which should be used. 9 | #' @param all.units Return output for all units instead of final output only. 10 | #' @param ... further arguments passed to or from other methods. 11 | #' 12 | #' @return Matrix of predictions. Each column represents one output unit. 13 | #' If \code{all.units=TRUE}, a list of matrices with output for each unit. 14 | #' 15 | #' @examples 16 | #' library(neuralnet) 17 | #' 18 | #' # Split data 19 | #' train_idx <- sample(nrow(iris), 2/3 * nrow(iris)) 20 | #' iris_train <- iris[train_idx, ] 21 | #' iris_test <- iris[-train_idx, ] 22 | #' 23 | #' # Binary classification 24 | #' nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, iris_train, linear.output = FALSE) 25 | #' pred <- predict(nn, iris_test) 26 | #' table(iris_test$Species == "setosa", pred[, 1] > 0.5) 27 | #' 28 | #' # Multiclass classification 29 | #' nn <- neuralnet((Species == "setosa") + (Species == "versicolor") + (Species == "virginica") 30 | #' ~ Petal.Length + Petal.Width, iris_train, linear.output = FALSE) 31 | #' pred <- predict(nn, iris_test) 32 | #' table(iris_test$Species, apply(pred, 1, which.max)) 33 | #' 34 | #' @author Marvin N. Wright 35 | #' @export 36 | predict.nn <- function(object, newdata, rep = 1, all.units = FALSE, ...) { 37 | weights <- object$weights[[rep]] 38 | num_hidden_layers <- length(weights) - 1 39 | 40 | # Set NA weights to 0 41 | for (i in 1:length(weights)) { 42 | if (any(is.na(weights[[i]]))) { 43 | weights[[i]][is.na(weights[[i]])] <- 0 44 | } 45 | } 46 | 47 | # Init prediction with data, subset if necessary 48 | if (ncol(newdata) == length(object$model.list$variables) | is.null(object$model.list)) { 49 | pred <- as.matrix(newdata) 50 | } else { 51 | pred <- as.matrix(newdata[, object$model.list$variables]) 52 | } 53 | 54 | 55 | # Init units if requested 56 | if (all.units) { 57 | units <- list(pred) 58 | } 59 | 60 | # Hidden layers 61 | if (num_hidden_layers > 0) { 62 | for (i in 1:num_hidden_layers) { 63 | pred <- object$act.fct(cbind(1, pred) %*% weights[[i]]) 64 | 65 | # Save unit outputs if requested 66 | if (all.units) { 67 | units <- append(units, list(pred)) 68 | } 69 | } 70 | } 71 | 72 | # Output layer: Only apply activation function if non-linear output 73 | pred <- cbind(1, pred) %*% weights[[num_hidden_layers + 1]] 74 | if (!object$linear.output) { 75 | if (is.null(object$output.act.fct)) { 76 | # Networks from version <1.44.4, no separate output activation 77 | pred <- object$act.fct(pred) 78 | } else { 79 | pred <- object$output.act.fct(pred) 80 | } 81 | } 82 | 83 | # Save unit outputs if requested 84 | if (all.units) { 85 | units <- append(units, list(pred)) 86 | } 87 | 88 | # Return result 89 | if (all.units) { 90 | units 91 | } else { 92 | pred 93 | } 94 | } 95 | 96 | -------------------------------------------------------------------------------- /tests/testthat/test_multiclass.R: -------------------------------------------------------------------------------- 1 | library(neuralnet) 2 | context("neuralnet_multiclass") 3 | 4 | # Scale iris data 5 | dat <- iris 6 | dat[, -5] <- scale(dat[, -5]) 7 | 8 | test_that("Manual multiclass returns correct dimensions", { 9 | nn_multi <- neuralnet((Species == "setosa") + (Species == "versicolor") ~ Petal.Length + Petal.Width, dat, 10 | hidden = c(2, 3), linear.output = FALSE) 11 | 12 | # Skip if not converged 13 | skip_if(is.null(nn_multi$weights)) 14 | 15 | pred_multi <- predict(nn_multi, dat[, c("Petal.Length", "Petal.Width")]) 16 | 17 | expect_length(nn_multi$weights[[1]], 3) # 2 hidden + 1 output layer 18 | expect_equal(dim(nn_multi$weights[[1]][[1]]), c(3, 2)) # First hidden layer: 3 in (1 bias) x 2 units 19 | expect_equal(dim(nn_multi$weights[[1]][[2]]), c(3, 3)) # Second hidden layer: 3 in (1 bias) x 3 units 20 | expect_equal(dim(nn_multi$weights[[1]][[3]]), c(4, 2)) # Output layer: 4 in (1 bias) x 2 units 21 | 22 | expect_equal(dim(pred_multi), c(nrow(dat), 2)) # Predict 2 outputs 23 | }) 24 | 25 | test_that("Auto multiclass returns correct dimensions", { 26 | nn_multi <- neuralnet(Species ~ Petal.Length + Petal.Width, dat, hidden = c(2, 3), linear.output = FALSE) 27 | 28 | # Skip if not converged 29 | skip_if(is.null(nn_multi$weights)) 30 | 31 | pred_multi <- predict(nn_multi, dat[, c("Petal.Length", "Petal.Width")]) 32 | 33 | expect_length(nn_multi$weights[[1]], 3) # 2 hidden + 1 output layer 34 | expect_equal(dim(nn_multi$weights[[1]][[1]]), c(3, 2)) # First hidden layer: 3 in (1 bias) x 2 units 35 | expect_equal(dim(nn_multi$weights[[1]][[2]]), c(3, 3)) # Second hidden layer: 3 in (1 bias) x 3 units 36 | expect_equal(dim(nn_multi$weights[[1]][[3]]), c(4, 3)) # Output layer: 4 in (1 bias) x 3 units 37 | 38 | expect_equal(dim(pred_multi), c(nrow(dat), 3)) # Predict 3 outputs 39 | }) 40 | 41 | 42 | test_that("Same results with manual multiclass", { 43 | set.seed(42) 44 | nn_manual <- neuralnet((Species == "setosa") + (Species == "versicolor") + (Species == "virginica") ~ Petal.Length + Petal.Width, dat, linear.output = FALSE) 45 | 46 | set.seed(42) 47 | nn_auto <- neuralnet(Species ~ Petal.Length + Petal.Width, dat, linear.output = FALSE) 48 | 49 | expect_equal(nn_manual$weights[[1]][[1]], nn_auto$weights[[1]][[1]]) 50 | expect_equal(nn_manual$weights[[1]][[2]], nn_auto$weights[[1]][[2]]) 51 | expect_equal(nn_manual$net.result[[1]], nn_auto$net.result[[1]]) 52 | }) 53 | 54 | test_that("Response vector has correct factor levels", { 55 | nn <- neuralnet(Species ~ Petal.Length + Petal.Width, dat, linear.output = FALSE) 56 | expect_equal(unname(apply(nn$response, 1, which)), as.numeric(dat$Species)) 57 | 58 | dat_char <- dat 59 | dat_char$Species <- as.character(dat_char$Species) 60 | nn <- neuralnet(Species ~ Petal.Length + Petal.Width, dat_char, linear.output = FALSE) 61 | expect_equal(colnames(nn$response)[apply(nn$response, 1, which)], dat_char$Species) 62 | 63 | dat_reordered <- rbind(dat[101:150, ], dat[51:100, ], dat[1:50, ]) 64 | nn <- neuralnet(Species ~ Petal.Length + Petal.Width, dat_reordered, linear.output = FALSE) 65 | expect_equal(unname(apply(nn$response, 1, which)), as.numeric(dat_reordered$Species)) 66 | 67 | dat_char_reordered <- rbind(dat_char[101:150, ], dat_char[51:100, ], dat_char[1:50, ]) 68 | nn <- neuralnet(Species ~ Petal.Length + Petal.Width, dat_char_reordered, linear.output = FALSE) 69 | expect_equal(colnames(nn$response)[apply(nn$response, 1, which)], dat_char_reordered$Species) 70 | }) 71 | 72 | -------------------------------------------------------------------------------- /man/neuralnet-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/neuralnet-package.R 3 | \docType{package} 4 | \name{neuralnet-package} 5 | \alias{neuralnet-package} 6 | \title{Training of Neural Networks} 7 | \description{ 8 | Training of neural networks using the backpropagation, resilient 9 | backpropagation with (Riedmiller, 1994) or without weight backtracking 10 | (Riedmiller, 1993) or the modified globally convergent version by 11 | Anastasiadis et al. (2005). The package allows flexible settings through 12 | custom-choice of error and activation function. Furthermore, the calculation 13 | of generalized weights (Intrator O & Intrator N, 1993) is implemented. 14 | } 15 | \note{ 16 | This work has been supported by the German Research Foundation\cr 17 | (DFG: \url{http://www.dfg.de}) under grant scheme PI 345/3-1. 18 | } 19 | \examples{ 20 | 21 | AND <- c(rep(0,7),1) 22 | OR <- c(0,rep(1,7)) 23 | binary.data <- data.frame(expand.grid(c(0,1), c(0,1), c(0,1)), AND, OR) 24 | print(net <- neuralnet(AND+OR~Var1+Var2+Var3, binary.data, hidden=0, 25 | rep=10, err.fct="ce", linear.output=FALSE)) 26 | 27 | XOR <- c(0,1,1,0) 28 | xor.data <- data.frame(expand.grid(c(0,1), c(0,1)), XOR) 29 | print(net.xor <- neuralnet(XOR~Var1+Var2, xor.data, hidden=2, rep=5)) 30 | plot(net.xor, rep="best") 31 | 32 | data(infert, package="datasets") 33 | print(net.infert <- neuralnet(case~parity+induced+spontaneous, infert, 34 | err.fct="ce", linear.output=FALSE, likelihood=TRUE)) 35 | gwplot(net.infert, selected.covariate="parity") 36 | gwplot(net.infert, selected.covariate="induced") 37 | gwplot(net.infert, selected.covariate="spontaneous") 38 | confidence.interval(net.infert) 39 | 40 | Var1 <- runif(50, 0, 100) 41 | sqrt.data <- data.frame(Var1, Sqrt=sqrt(Var1)) 42 | print(net.sqrt <- neuralnet(Sqrt~Var1, sqrt.data, hidden=10, 43 | threshold=0.01)) 44 | predict(net.sqrt, data.frame(Var1 = (1:10)^2)) 45 | 46 | Var1 <- rpois(100,0.5) 47 | Var2 <- rbinom(100,2,0.6) 48 | Var3 <- rbinom(100,1,0.5) 49 | SUM <- as.integer(abs(Var1+Var2+Var3+(rnorm(100)))) 50 | sum.data <- data.frame(Var1,Var2,Var3, SUM) 51 | print(net.sum <- neuralnet(SUM~Var1+Var2+Var3, sum.data, hidden=1, 52 | act.fct="tanh")) 53 | prediction(net.sum) 54 | 55 | } 56 | \references{ 57 | Riedmiller M. (1994) \emph{Rprop - Description and 58 | Implementation Details.} Technical Report. University of Karlsruhe. 59 | 60 | Riedmiller M. and Braun H. (1993) \emph{A direct adaptive method for faster 61 | backpropagation learning: The RPROP algorithm.} Proceedings of the IEEE 62 | International Conference on Neural Networks (ICNN), pages 586-591. San 63 | Francisco. 64 | 65 | Anastasiadis A. et. al. (2005) \emph{New globally convergent training scheme 66 | based on the resilient propagation algorithm.} Neurocomputing 64, pages 67 | 253-270. 68 | 69 | Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for 70 | Interpretation of Nonlinear Models.} Proceedings of the Statistical 71 | Computing Section, 244-249 San Francisco: American Statistical Society 72 | (eds). 73 | } 74 | \seealso{ 75 | \code{\link{plot.nn}} for plotting of the neural network. 76 | 77 | \code{\link{gwplot}} for plotting of the generalized weights. 78 | 79 | \code{\link{compute}} for computation of the calculated network. 80 | 81 | \code{\link{confidence.interval}} for calculation of a confidence interval 82 | for the weights. 83 | 84 | \code{\link{prediction}} for calculation of a prediction. 85 | } 86 | \author{ 87 | Stefan Fritsch, Frauke Guenther \email{guenther@leibniz-bips.de}, 88 | 89 | Maintainer: Frauke Guenther \email{guenther@leibniz-bips.de} 90 | } 91 | \keyword{neural} 92 | -------------------------------------------------------------------------------- /R/neuralnet-package.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' Training of Neural Networks 4 | #' 5 | #' Training of neural networks using the backpropagation, resilient 6 | #' backpropagation with (Riedmiller, 1994) or without weight backtracking 7 | #' (Riedmiller, 1993) or the modified globally convergent version by 8 | #' Anastasiadis et al. (2005). The package allows flexible settings through 9 | #' custom-choice of error and activation function. Furthermore, the calculation 10 | #' of generalized weights (Intrator O & Intrator N, 1993) is implemented. 11 | #' 12 | #' @name neuralnet-package 13 | #' @docType package 14 | #' @note This work has been supported by the German Research Foundation\cr 15 | #' (DFG: \url{http://www.dfg.de}) under grant scheme PI 345/3-1. 16 | #' @author Stefan Fritsch, Frauke Guenther \email{guenther@@leibniz-bips.de}, 17 | #' 18 | #' Maintainer: Frauke Guenther \email{guenther@@leibniz-bips.de} 19 | #' @seealso \code{\link{plot.nn}} for plotting of the neural network. 20 | #' 21 | #' \code{\link{gwplot}} for plotting of the generalized weights. 22 | #' 23 | #' \code{\link{compute}} for computation of the calculated network. 24 | #' 25 | #' \code{\link{confidence.interval}} for calculation of a confidence interval 26 | #' for the weights. 27 | #' 28 | #' \code{\link{prediction}} for calculation of a prediction. 29 | #' @references Riedmiller M. (1994) \emph{Rprop - Description and 30 | #' Implementation Details.} Technical Report. University of Karlsruhe. 31 | #' 32 | #' Riedmiller M. and Braun H. (1993) \emph{A direct adaptive method for faster 33 | #' backpropagation learning: The RPROP algorithm.} Proceedings of the IEEE 34 | #' International Conference on Neural Networks (ICNN), pages 586-591. San 35 | #' Francisco. 36 | #' 37 | #' Anastasiadis A. et. al. (2005) \emph{New globally convergent training scheme 38 | #' based on the resilient propagation algorithm.} Neurocomputing 64, pages 39 | #' 253-270. 40 | #' 41 | #' Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for 42 | #' Interpretation of Nonlinear Models.} Proceedings of the Statistical 43 | #' Computing Section, 244-249 San Francisco: American Statistical Society 44 | #' (eds). 45 | #' @keywords neural 46 | #' @examples 47 | #' 48 | #' AND <- c(rep(0,7),1) 49 | #' OR <- c(0,rep(1,7)) 50 | #' binary.data <- data.frame(expand.grid(c(0,1), c(0,1), c(0,1)), AND, OR) 51 | #' print(net <- neuralnet(AND+OR~Var1+Var2+Var3, binary.data, hidden=0, 52 | #' rep=10, err.fct="ce", linear.output=FALSE)) 53 | #' 54 | #' XOR <- c(0,1,1,0) 55 | #' xor.data <- data.frame(expand.grid(c(0,1), c(0,1)), XOR) 56 | #' print(net.xor <- neuralnet(XOR~Var1+Var2, xor.data, hidden=2, rep=5)) 57 | #' plot(net.xor, rep="best") 58 | #' 59 | #' data(infert, package="datasets") 60 | #' print(net.infert <- neuralnet(case~parity+induced+spontaneous, infert, 61 | #' err.fct="ce", linear.output=FALSE, likelihood=TRUE)) 62 | #' gwplot(net.infert, selected.covariate="parity") 63 | #' gwplot(net.infert, selected.covariate="induced") 64 | #' gwplot(net.infert, selected.covariate="spontaneous") 65 | #' confidence.interval(net.infert) 66 | #' 67 | #' Var1 <- runif(50, 0, 100) 68 | #' sqrt.data <- data.frame(Var1, Sqrt=sqrt(Var1)) 69 | #' print(net.sqrt <- neuralnet(Sqrt~Var1, sqrt.data, hidden=10, 70 | #' threshold=0.01)) 71 | #' predict(net.sqrt, data.frame(Var1 = (1:10)^2)) 72 | #' 73 | #' Var1 <- rpois(100,0.5) 74 | #' Var2 <- rbinom(100,2,0.6) 75 | #' Var3 <- rbinom(100,1,0.5) 76 | #' SUM <- as.integer(abs(Var1+Var2+Var3+(rnorm(100)))) 77 | #' sum.data <- data.frame(Var1,Var2,Var3, SUM) 78 | #' print(net.sum <- neuralnet(SUM~Var1+Var2+Var3, sum.data, hidden=1, 79 | #' act.fct="tanh")) 80 | #' prediction(net.sum) 81 | #' 82 | NULL 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /tests/testthat/test_neuralnet.R: -------------------------------------------------------------------------------- 1 | library(neuralnet) 2 | context("neuralnet") 3 | 4 | # Scale iris data 5 | dat <- iris 6 | dat[, -5] <- scale(dat[, -5]) 7 | 8 | nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, linear.output = FALSE) 9 | nn2 <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, hidden = c(3, 2), linear.output = FALSE) 10 | 11 | if (!is.null(nn$weights)) { 12 | pred <- predict(nn, dat[, c("Petal.Length", "Petal.Width")]) 13 | } 14 | if (!is.null(nn2$weights)) { 15 | pred2 <- predict(nn2, dat[, c("Petal.Length", "Petal.Width")]) 16 | } 17 | 18 | test_that("Fitting returns nn object with correct size", { 19 | skip_if(is.null(nn$weights)) 20 | expect_is(nn, "nn") 21 | expect_length(nn, 15) 22 | }) 23 | 24 | test_that("Prediction returns numeric with correct size", { 25 | skip_if(is.null(nn$weights)) 26 | expect_is(pred, "matrix") 27 | expect_equal(dim(pred), c(nrow(dat), 1)) 28 | }) 29 | 30 | test_that("predict() function returns list of correct size for unit prediction", { 31 | skip_if(is.null(nn$weights)) 32 | pred_all <- predict(nn, dat[, c("Petal.Length", "Petal.Width")], all.units = TRUE) 33 | expect_equal(length(pred_all), 3) 34 | 35 | skip_if(is.null(nn2$weights)) 36 | pred_all2 <- predict(nn2, dat[, c("Petal.Length", "Petal.Width")], all.units = TRUE) 37 | expect_equal(length(pred_all2), 4) 38 | }) 39 | 40 | test_that("predict() works if more variables in data", { 41 | skip_if(is.null(nn$weights)) 42 | pred_all <- predict(nn, dat) 43 | expect_equal(dim(pred_all), c(nrow(dat), 1)) 44 | }) 45 | 46 | test_that("Custom activation function works", { 47 | expect_silent(neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 48 | linear.output = FALSE, act.fct = function(x) log(1 + exp(x)))) 49 | expect_silent(neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 50 | linear.output = FALSE, act.fct = function(x) {log(1 + exp(x))})) 51 | 52 | }) 53 | 54 | test_that("Same result with custom activation function", { 55 | set.seed(10) 56 | nn_custom <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 57 | linear.output = FALSE, act.fct = function(x) 1/(1 + exp(-x))) 58 | 59 | set.seed(10) 60 | nn_custom2 <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 61 | linear.output = FALSE, act.fct = function(x) {1/(1 + exp(-x))}) 62 | 63 | set.seed(10) 64 | nn_default <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 65 | linear.output = FALSE, act.fct = "logistic") 66 | 67 | expect_equal(nn_custom$net.result, nn_custom2$net.result) 68 | expect_equal(nn_custom$net.result, nn_default$net.result) 69 | 70 | expect_equal(nn_custom$result.matrix, nn_custom2$result.matrix) 71 | expect_equal(nn_custom$result.matrix, nn_default$result.matrix) 72 | }) 73 | 74 | test_that("Same result with custom error function", { 75 | set.seed(10) 76 | nn_custom <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 77 | linear.output = FALSE, err.fct = function(x, y) {1/2 * (y - x)^2}) 78 | 79 | set.seed(10) 80 | nn_default <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 81 | linear.output = FALSE, err.fct = "sse") 82 | 83 | expect_equal(nn_custom$net.result, nn_default$net.result) 84 | expect_equal(nn_custom$result.matrix, nn_default$result.matrix) 85 | }) 86 | 87 | test_that("Error if 'ce' error function used in non-binary outcome", { 88 | expect_error(neuralnet(Sepal.Length ~ Petal.Length + Petal.Width, 89 | dat, linear.output = TRUE, err.fct = "ce"), 90 | "Error function 'ce' only implemented for binary response\\.") 91 | }) 92 | 93 | test_that("Dots in formula work", { 94 | set.seed(42) 95 | nn_dot <- neuralnet(Species ~ ., dat, linear.output = FALSE) 96 | 97 | set.seed(42) 98 | nn_nodot <- neuralnet(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, dat, linear.output = FALSE) 99 | 100 | expect_equal(nn_dot$weights[[1]][[1]], nn_nodot$weights[[1]][[1]]) 101 | expect_equal(nn_dot$weights[[1]][[2]], nn_nodot$weights[[1]][[2]]) 102 | expect_equal(nn_dot$net.result[[1]], nn_nodot$net.result[[1]]) 103 | }) 104 | 105 | test_that("No error if replications don't converge", { 106 | # One fail, one success (2 reps) 107 | set.seed(1) 108 | expect_warning(nn <- neuralnet(Species ~ ., iris, linear.output = FALSE, rep = 2, stepmax = 1e4)) 109 | expect_is(nn$net.result, "list") 110 | expect_length(nn$net.result, 2) 111 | expect_null(nn$net.result[[1]]) 112 | 113 | # Both fail (2 reps) 114 | set.seed(1) 115 | expect_warning(nn <- neuralnet(Species ~ ., iris, linear.output = FALSE, rep = 2, stepmax = 10)) 116 | expect_null(nn$net.result) 117 | }) 118 | 119 | test_that("Works with ReLu activation", { 120 | expect_silent(neuralnet(Species ~ ., dat, linear.output = FALSE, act.fct = "relu")) 121 | expect_silent(neuralnet(Species ~ ., dat, linear.output = FALSE, act.fct = "ReLu")) 122 | }) 123 | -------------------------------------------------------------------------------- /R/prediction.r: -------------------------------------------------------------------------------- 1 | #' Summarizes the output of the neural network, the data and the fitted values 2 | #' of glm objects (if available) 3 | #' 4 | #' \code{prediction}, a method for objects of class \code{nn}, typically 5 | #' produced by \code{neuralnet}. In a first step, the dataframe will be 6 | #' amended by a mean response, the mean of all responses corresponding to the 7 | #' same covariate-vector. The calculated data.error is the error function 8 | #' between the original response and the new mean response. In a second step, 9 | #' all duplicate rows will be erased to get a quick overview of the data. To 10 | #' obtain an overview of the results of the neural network and the glm objects, 11 | #' the covariate matrix will be bound to the output of the neural network and 12 | #' the fitted values of the glm object(if available) and will be reduced by all 13 | #' duplicate rows. 14 | #' 15 | #' 16 | #' @param x neural network 17 | #' @param list.glm an optional list of glm objects 18 | #' @return a list of the summaries of the repetitions of the neural networks, 19 | #' the data and the glm objects (if available). 20 | #' @author Stefan Fritsch, Frauke Guenther \email{guenther@@leibniz-bips.de} 21 | #' @seealso \code{\link{neuralnet}} 22 | #' @keywords neural 23 | #' @examples 24 | #' 25 | #' Var1 <- rpois(100,0.5) 26 | #' Var2 <- rbinom(100,2,0.6) 27 | #' Var3 <- rbinom(100,1,0.5) 28 | #' SUM <- as.integer(abs(Var1+Var2+Var3+(rnorm(100)))) 29 | #' sum.data <- data.frame(Var1,Var2,Var3, SUM) 30 | #' print(net.sum <- neuralnet( SUM~Var1+Var2+Var3, sum.data, hidden=1, 31 | #' act.fct="tanh")) 32 | #' main <- glm(SUM~Var1+Var2+Var3, sum.data, family=poisson()) 33 | #' full <- glm(SUM~Var1*Var2*Var3, sum.data, family=poisson()) 34 | #' prediction(net.sum, list.glm=list(main=main, full=full)) 35 | #' 36 | #' @export prediction 37 | prediction <- 38 | function (x, list.glm = NULL) 39 | { 40 | nn <- x 41 | data.result <- calculate.data.result(response = nn$response, 42 | model.list = nn$model.list, covariate = nn$covariate) 43 | predictions <- calculate.predictions(covariate = nn$covariate, 44 | data.result = data.result, list.glm = list.glm, matrix = nn$result.matrix, 45 | list.net.result = nn$net.result, model.list = nn$model.list) 46 | if (attr(nn$err.fct, "type") == "ce" && all(data.result >= 0) && 47 | all(data.result <= 1)) 48 | data.error <- sum(nn$err.fct(data.result, nn$response), 49 | na.rm = T) 50 | else data.error <- sum(nn$err.fct(data.result, nn$response)) 51 | message("Data Error:\t", data.error, ";") 52 | predictions 53 | } 54 | calculate.predictions <- 55 | function (covariate, data.result, list.glm, matrix, list.net.result, 56 | model.list) 57 | { 58 | not.duplicated <- !duplicated(covariate) 59 | nrow.notdupl <- sum(not.duplicated) 60 | covariate.mod <- matrix(covariate[not.duplicated, ], nrow = nrow.notdupl) 61 | predictions <- list(data = cbind(covariate.mod, matrix(data.result[not.duplicated, 62 | ], nrow = nrow.notdupl))) 63 | if (!is.null(matrix)) { 64 | for (i in length(list.net.result):1) { 65 | pred.temp <- cbind(covariate.mod, matrix(list.net.result[[i]][not.duplicated, 66 | ], nrow = nrow.notdupl)) 67 | predictions <- eval(parse(text = paste("c(list(rep", 68 | i, "=pred.temp), predictions)", sep = ""))) 69 | } 70 | } 71 | if (!is.null(list.glm)) { 72 | for (i in 1:length(list.glm)) { 73 | pred.temp <- cbind(covariate.mod, matrix(list.glm[[i]]$fitted.values[not.duplicated], 74 | nrow = nrow.notdupl)) 75 | text <- paste("c(predictions, list(glm.", names(list.glm[i]), 76 | "=pred.temp))", sep = "") 77 | predictions <- eval(parse(text = text)) 78 | } 79 | } 80 | for (i in 1:length(predictions)) { 81 | colnames(predictions[[i]]) <- c(model.list$variables, 82 | model.list$response) 83 | if (nrow(covariate) > 1) 84 | for (j in (1:ncol(covariate))) predictions[[i]] <- predictions[[i]][order(predictions[[i]][, 85 | j]), ] 86 | rownames(predictions[[i]]) <- 1:nrow(predictions[[i]]) 87 | } 88 | predictions 89 | } 90 | calculate.data.result <- 91 | function (response, covariate, model.list) 92 | { 93 | duplicated <- duplicated(covariate) 94 | if (!any(duplicated)) { 95 | return(response) 96 | } 97 | which.duplicated <- seq_along(duplicated)[duplicated] 98 | which.not.duplicated <- seq_along(duplicated)[!duplicated] 99 | ncol.response <- ncol(response) 100 | if (ncol(covariate) == 1) { 101 | for (each in which.not.duplicated) { 102 | out <- NULL 103 | if (length(which.duplicated) > 0) { 104 | out <- covariate[which.duplicated, ] == covariate[each, 105 | ] 106 | if (any(out)) { 107 | rows <- c(each, which.duplicated[out]) 108 | response[rows, ] = matrix(colMeans(matrix(response[rows, 109 | ], ncol = ncol.response)), ncol = ncol.response, 110 | nrow = length(rows), byrow = T) 111 | which.duplicated <- which.duplicated[-out] 112 | } 113 | } 114 | } 115 | } 116 | else { 117 | tcovariate <- t(covariate) 118 | for (each in which.not.duplicated) { 119 | out <- NULL 120 | if (length(which.duplicated) > 0) { 121 | out <- apply(tcovariate[, which.duplicated] == 122 | covariate[each, ], 2, FUN = all) 123 | if (any(out)) { 124 | rows <- c(each, which.duplicated[out]) 125 | response[rows, ] = matrix(colMeans(matrix(response[rows, 126 | ], ncol = ncol.response)), ncol = ncol.response, 127 | nrow = length(rows), byrow = T) 128 | which.duplicated <- which.duplicated[-out] 129 | } 130 | } 131 | } 132 | } 133 | response 134 | } 135 | -------------------------------------------------------------------------------- /R/gwplot.r: -------------------------------------------------------------------------------- 1 | #' Plot method for generalized weights 2 | #' 3 | #' \code{gwplot}, a method for objects of class \code{nn}, typically produced 4 | #' by \code{neuralnet}. Plots the generalized weights (Intrator and Intrator, 5 | #' 1993) for one specific covariate and one response variable. 6 | #' 7 | #' 8 | #' @param x an object of class \code{nn} 9 | #' @param rep an integer indicating the repetition to plot. If rep="best", the 10 | #' repetition with the smallest error will be plotted. If not stated all 11 | #' repetitions will be plotted. 12 | #' @param max maximum of the y axis. In default, max is set to the highest 13 | #' y-value. 14 | #' @param min minimum of the y axis. In default, min is set to the smallest 15 | #' y-value. 16 | #' @param file a character string naming the plot to write to. If not stated, 17 | #' the plot will not be saved. 18 | #' @param selected.covariate either a string of the covariate's name or an 19 | #' integer of the ordered covariates, indicating the reference covariate in the 20 | #' generalized weights plot. Defaulting to the first covariate. 21 | #' @param selected.response either a string of the response variable's name or 22 | #' an integer of the ordered response variables, indicating the reference 23 | #' response in the generalized weights plot. Defaulting to the first response 24 | #' variable. 25 | #' @param highlight a logical value, indicating whether to highlight (red 26 | #' color) the best repetition (smallest error). Only reasonable if rep=NULL. 27 | #' Default is FALSE 28 | #' @param type a character indicating the type of plotting; actually any of the 29 | #' types as in \code{\link{plot.default}}. 30 | #' @param col a color of the generalized weights. 31 | #' @param \dots Arguments to be passed to methods, such as graphical parameters 32 | #' (see \code{\link{par}}). 33 | #' @author Stefan Fritsch, Frauke Guenther \email{guenther@@leibniz-bips.de} 34 | #' @seealso \code{\link{neuralnet}} 35 | #' @references Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for 36 | #' Interpretation of Nonlinear Models.} Proceedings of the Statistical 37 | #' Computing Section, 244-249 San Francisco: American Statistical Society 38 | #' (eds.) 39 | #' @keywords neural 40 | #' @examples 41 | #' 42 | #' data(infert, package="datasets") 43 | #' print(net.infert <- neuralnet(case~parity+induced+spontaneous, infert, 44 | #' err.fct="ce", linear.output=FALSE, likelihood=TRUE)) 45 | #' gwplot(net.infert, selected.covariate="parity") 46 | #' gwplot(net.infert, selected.covariate="induced") 47 | #' gwplot(net.infert, selected.covariate="spontaneous") 48 | #' 49 | #' @export gwplot 50 | gwplot <- 51 | function (x, rep = NULL, max = NULL, min = NULL, file = NULL, 52 | selected.covariate = 1, selected.response = 1, highlight = FALSE, 53 | type = "p", col = "black", ...) 54 | { 55 | net <- x 56 | if (is.null(net$generalized.weights)) 57 | stop("generalized weights were not calculated", call. = F) 58 | if (!is.null(file)) { 59 | if (!is.character(file)) 60 | stop("'file' must be a string") 61 | if (file.exists(file)) 62 | stop(sprintf("%s already exists", sQuote(file))) 63 | } 64 | if (!is.numeric(selected.covariate)) 65 | for (i in 1:length(net$model.list$variables)) if (net$model.list$variables[i] == 66 | selected.covariate) 67 | selected.covariate = i 68 | if (!is.numeric(selected.covariate) || selected.covariate < 69 | 1 || selected.covariate > ncol(net$covariate)) 70 | stop("'selected.covariate' does not exist") 71 | if (!is.numeric(selected.response)) 72 | for (i in 1:length(net$model.list$response)) if (net$model.list$response[i] == 73 | selected.response) 74 | selected.response = i 75 | if (!is.numeric(selected.response) || selected.response < 76 | 1 || selected.response > ncol(net$response)) 77 | stop("'selected.response' does not exist") 78 | if (!is.null(rep)) { 79 | if (rep == "best") 80 | rep <- as.integer(which.min(net$result.matrix["error", 81 | ])) 82 | if (length(net$generalized.weights) < rep) 83 | stop("'rep' does not exist") 84 | } 85 | covariate <- as.vector(net$covariate[, selected.covariate]) 86 | variablename <- net$model.list$variables[selected.covariate] 87 | column <- (selected.response - 1) * ncol(net$covariate) + 88 | selected.covariate 89 | if (is.null(rep)) { 90 | matrix <- as.matrix(sapply(net$generalized.weights, function(x) rbind(x[, 91 | column]))) 92 | item.to.print <- min(which.min(net$result.matrix["error", 93 | ])) 94 | } 95 | else { 96 | highlight = F 97 | matrix <- as.matrix(net$generalized.weights[[rep]][, 98 | column]) 99 | item.to.print <- 1 100 | } 101 | if (is.null(max)) 102 | max <- max(matrix) 103 | if (is.null(min)) 104 | min <- min(matrix) 105 | ylim <- c(min, max) 106 | if (!highlight || item.to.print != 1 || ncol(matrix) == 1) 107 | graphics::plot(x = covariate, y = matrix[, 1], ylim = ylim, xlab = variablename, 108 | ylab = "GW", type = type, col = col, ...) 109 | else graphics::plot(x = covariate, y = matrix[, 2], ylim = ylim, xlab = variablename, 110 | ylab = "GW", type = type, col = col, ...) 111 | if (ncol(matrix) >= 2) { 112 | for (i in 2:ncol(matrix)) if (!highlight || (i != item.to.print)) 113 | graphics::lines(x = covariate, y = matrix[, i], type = type, 114 | col = col, ...) 115 | } 116 | if (highlight) { 117 | graphics::lines(x = covariate, y = matrix[, item.to.print], type = type, 118 | col = "red", ...) 119 | graphics::legend("topright", paste("Minimal Error: ", round(net$result.matrix["error", 120 | item.to.print], 3), sep = ""), col = "red", ...) 121 | } 122 | graphics::title(paste("Response: ", net$model.list$response[selected.response], 123 | sep = "")) 124 | if (!is.null(file)) { 125 | weight.plot <- grDevices::recordPlot() 126 | save(weight.plot, file = file) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /man/neuralnet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/neuralnet.r 3 | \name{neuralnet} 4 | \alias{neuralnet} 5 | \alias{print.nn} 6 | \title{Training of neural networks} 7 | \usage{ 8 | neuralnet(formula, data, hidden = 1, threshold = 0.01, 9 | stepmax = 1e+05, rep = 1, startweights = NULL, 10 | learningrate.limit = NULL, learningrate.factor = list(minus = 0.5, 11 | plus = 1.2), learningrate = NULL, lifesign = "none", 12 | lifesign.step = 1000, algorithm = "rprop+", err.fct = "sse", 13 | act.fct = "logistic", output.act.fct = "logistic", 14 | linear.output = TRUE, exclude = NULL, constant.weights = NULL, 15 | likelihood = FALSE) 16 | } 17 | \arguments{ 18 | \item{formula}{a symbolic description of the model to be fitted.} 19 | 20 | \item{data}{a data frame containing the variables specified in 21 | \code{formula}.} 22 | 23 | \item{hidden}{a vector of integers specifying the number of hidden neurons 24 | (vertices) in each layer.} 25 | 26 | \item{threshold}{a numeric value specifying the threshold for the partial 27 | derivatives of the error function as stopping criteria.} 28 | 29 | \item{stepmax}{the maximum steps for the training of the neural network. 30 | Reaching this maximum leads to a stop of the neural network's training 31 | process.} 32 | 33 | \item{rep}{the number of repetitions for the neural network's training.} 34 | 35 | \item{startweights}{a vector containing starting values for the weights. 36 | Set to \code{NULL} for random initialization.} 37 | 38 | \item{learningrate.limit}{a vector or a list containing the lowest and 39 | highest limit for the learning rate. Used only for RPROP and GRPROP.} 40 | 41 | \item{learningrate.factor}{a vector or a list containing the multiplication 42 | factors for the upper and lower learning rate. Used only for RPROP and 43 | GRPROP.} 44 | 45 | \item{learningrate}{a numeric value specifying the learning rate used by 46 | traditional backpropagation. Used only for traditional backpropagation.} 47 | 48 | \item{lifesign}{a string specifying how much the function will print during 49 | the calculation of the neural network. 'none', 'minimal' or 'full'.} 50 | 51 | \item{lifesign.step}{an integer specifying the stepsize to print the minimal 52 | threshold in full lifesign mode.} 53 | 54 | \item{algorithm}{a string containing the algorithm type to calculate the 55 | neural network. The following types are possible: 'backprop', 'rprop+', 56 | 'rprop-', 'sag', or 'slr'. 'backprop' refers to backpropagation, 'rprop+' 57 | and 'rprop-' refer to the resilient backpropagation with and without weight 58 | backtracking, while 'sag' and 'slr' induce the usage of the modified 59 | globally convergent algorithm (grprop). See Details for more information.} 60 | 61 | \item{err.fct}{a differentiable function that is used for the calculation of 62 | the error. Alternatively, the strings 'sse' and 'ce' which stand for the sum 63 | of squared errors and the cross-entropy can be used.} 64 | 65 | \item{act.fct}{a differentiable function that is used for smoothing the 66 | result of the cross product of the covariate or neurons and the weights. 67 | Additionally the strings, 'logistic', 'tanh' and 'relu' are possible for the 68 | logistic function, tangent hyperbolicus and rectified linear unit.} 69 | 70 | \item{output.act.fct}{activation function used in the output layer 71 | (if \code{linear.output = FALSE}). Use same functions/strings as in \code{act.fct}.} 72 | 73 | \item{linear.output}{logical. If output.act.fct should not be applied to the output 74 | neurons set linear output to TRUE, otherwise to FALSE.} 75 | 76 | \item{exclude}{a vector or a matrix specifying the weights, that are 77 | excluded from the calculation. If given as a vector, the exact positions of 78 | the weights must be known. A matrix with n-rows and 3 columns will exclude n 79 | weights, where the first column stands for the layer, the second column for 80 | the input neuron and the third column for the output neuron of the weight.} 81 | 82 | \item{constant.weights}{a vector specifying the values of the weights that 83 | are excluded from the training process and treated as fix.} 84 | 85 | \item{likelihood}{logical. If the error function is equal to the negative 86 | log-likelihood function, the information criteria AIC and BIC will be 87 | calculated. Furthermore the usage of confidence.interval is meaningfull.} 88 | } 89 | \value{ 90 | \code{neuralnet} returns an object of class \code{nn}. An object of 91 | class \code{nn} is a list containing at most the following components: 92 | 93 | \item{ call }{ the matched call. } 94 | \item{ response }{ extracted from the \code{data argument}. } 95 | \item{ covariate }{ the variables extracted from the \code{data argument}. } 96 | \item{ model.list }{ a list containing the covariates and the response variables extracted from the \code{formula argument}. } 97 | \item{ err.fct }{ the error function. } 98 | \item{ act.fct }{ the activation function. } 99 | \item{ data }{ the \code{data argument}.} 100 | \item{ net.result }{ a list containing the overall result of the neural network for every repetition.} 101 | \item{ weights }{ a list containing the fitted weights of the neural network for every repetition. } 102 | \item{ generalized.weights }{ a list containing the generalized weights of the neural network for every repetition. } 103 | \item{ result.matrix }{ a matrix containing the reached threshold, needed steps, error, AIC and BIC (if computed) and weights for every repetition. Each column represents one repetition. } 104 | \item{ startweights }{ a list containing the startweights of the neural network for every repetition. } 105 | } 106 | \description{ 107 | Train neural networks using backpropagation, 108 | resilient backpropagation (RPROP) with (Riedmiller, 1994) or without weight 109 | backtracking (Riedmiller and Braun, 1993) or the modified globally 110 | convergent version (GRPROP) by Anastasiadis et al. (2005). The function 111 | allows flexible settings through custom-choice of error and activation 112 | function. Furthermore, the calculation of generalized weights (Intrator O. 113 | and Intrator N., 1993) is implemented. 114 | } 115 | \details{ 116 | The globally convergent algorithm is based on the resilient backpropagation 117 | without weight backtracking and additionally modifies one learning rate, 118 | either the learningrate associated with the smallest absolute gradient (sag) 119 | or the smallest learningrate (slr) itself. The learning rates in the grprop 120 | algorithm are limited to the boundaries defined in learningrate.limit. 121 | } 122 | \examples{ 123 | 124 | library(neuralnet) 125 | 126 | # Binary classification 127 | nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, iris, linear.output = FALSE) 128 | \dontrun{print(nn)} 129 | \dontrun{plot(nn)} 130 | 131 | # Multiclass classification 132 | nn <- neuralnet(Species ~ Petal.Length + Petal.Width, iris, linear.output = FALSE) 133 | \dontrun{print(nn)} 134 | \dontrun{plot(nn)} 135 | 136 | # Custom activation function 137 | softplus <- function(x) log(1 + exp(x)) 138 | nn <- neuralnet((Species == "setosa") ~ Petal.Length + Petal.Width, iris, 139 | linear.output = FALSE, hidden = c(3, 2), act.fct = softplus) 140 | \dontrun{print(nn)} 141 | \dontrun{plot(nn)} 142 | 143 | } 144 | \references{ 145 | Riedmiller M. (1994) \emph{Rprop - Description and 146 | Implementation Details.} Technical Report. University of Karlsruhe. 147 | 148 | Riedmiller M. and Braun H. (1993) \emph{A direct adaptive method for faster 149 | backpropagation learning: The RPROP algorithm.} Proceedings of the IEEE 150 | International Conference on Neural Networks (ICNN), pages 586-591. San 151 | Francisco. 152 | 153 | Anastasiadis A. et. al. (2005) \emph{New globally convergent training scheme 154 | based on the resilient propagation algorithm.} Neurocomputing 64, pages 155 | 253-270. 156 | 157 | Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for 158 | Interpretation of Nonlinear Models.} Proceedings of the Statistical 159 | Computing Section, 244-249 San Francisco: American Statistical Society 160 | (eds). 161 | } 162 | \seealso{ 163 | \code{\link{plot.nn}} for plotting the neural network. 164 | 165 | \code{\link{gwplot}} for plotting the generalized weights. 166 | 167 | \code{\link{predict.nn}} for computation of a given neural network for given 168 | covariate vectors (formerly \code{compute}). 169 | 170 | \code{\link{confidence.interval}} for calculation of confidence intervals of 171 | the weights. 172 | 173 | \code{\link{prediction}} for a summary of the output of the neural network. 174 | } 175 | \author{ 176 | Stefan Fritsch, Frauke Guenther, Marvin N. Wright 177 | } 178 | \keyword{neural} 179 | -------------------------------------------------------------------------------- /R/plot.nn.r: -------------------------------------------------------------------------------- 1 | #' Plot method for neural networks 2 | #' 3 | #' \code{plot.nn}, a method for the \code{plot} generic. It is designed for an 4 | #' inspection of the weights for objects of class \code{nn}, typically produced 5 | #' by \code{neuralnet}. 6 | #' 7 | #' 8 | #' @param x an object of class \code{nn} 9 | #' @param rep repetition of the neural network. If rep="best", the repetition 10 | #' with the smallest error will be plotted. If not stated all repetitions will 11 | #' be plotted, each in a separate window. 12 | #' @param x.entry x-coordinate of the entry layer. Depends on the arrow.length 13 | #' in default. 14 | #' @param x.out x-coordinate of the output layer. 15 | #' @param radius radius of the neurons. 16 | #' @param arrow.length length of the entry and out arrows. 17 | #' @param intercept a logical value indicating whether to plot the intercept. 18 | #' @param intercept.factor x-position factor of the intercept. The closer the 19 | #' factor is to 0, the closer the intercept is to its left neuron. 20 | #' @param information a logical value indicating whether to add the error and 21 | #' steps to the plot. 22 | #' @param information.pos y-position of the information. 23 | #' @param col.entry.synapse color of the synapses leading to the input neurons. 24 | #' @param col.entry color of the input neurons. 25 | #' @param col.hidden color of the neurons in the hidden layer. 26 | #' @param col.hidden.synapse color of the weighted synapses. 27 | #' @param col.out color of the output neurons. 28 | #' @param col.out.synapse color of the synapses leading away from the output 29 | #' neurons. 30 | #' @param col.intercept color of the intercept. 31 | #' @param fontsize fontsize of the text. 32 | #' @param dimension size of the plot in inches. 33 | #' @param show.weights a logical value indicating whether to print the 34 | #' calculated weights above the synapses. 35 | #' @param file a character string naming the plot to write to. If not stated, 36 | #' the plot will not be saved. 37 | #' @param \dots arguments to be passed to methods, such as graphical parameters 38 | #' (see \code{\link{par}}). 39 | #' @author Stefan Fritsch, Frauke Guenther \email{guenther@@leibniz-bips.de} 40 | #' @seealso \code{\link{neuralnet}} 41 | #' @keywords neural 42 | #' @examples 43 | #' 44 | #' XOR <- c(0,1,1,0) 45 | #' xor.data <- data.frame(expand.grid(c(0,1), c(0,1)), XOR) 46 | #' print(net.xor <- neuralnet( XOR~Var1+Var2, xor.data, hidden=2, rep=5)) 47 | #' plot(net.xor, rep="best") 48 | #' 49 | #' @S3method plot nn 50 | plot.nn <- 51 | function (x, rep = NULL, x.entry = NULL, x.out = NULL, radius = 0.15, 52 | arrow.length = 0.2, intercept = TRUE, intercept.factor = 0.4, 53 | information = TRUE, information.pos = 0.1, col.entry.synapse = "black", 54 | col.entry = "black", col.hidden = "black", col.hidden.synapse = "black", 55 | col.out = "black", col.out.synapse = "black", col.intercept = "blue", 56 | fontsize = 12, dimension = 6, show.weights = TRUE, file = NULL, 57 | ...) 58 | { 59 | net <- x 60 | if (is.null(net$weights)) 61 | stop("weights were not calculated") 62 | if (!is.null(file) && !is.character(file)) 63 | stop("'file' must be a string") 64 | if (is.null(rep)) { 65 | for (i in 1:length(net$weights)) { 66 | if (!is.null(file)) 67 | file.rep <- paste(file, ".", i, sep = "") 68 | else file.rep <- NULL 69 | grDevices::dev.new() 70 | plot.nn(net, rep = i, x.entry, x.out, radius, arrow.length, 71 | intercept, intercept.factor, information, information.pos, 72 | col.entry.synapse, col.entry, col.hidden, col.hidden.synapse, 73 | col.out, col.out.synapse, col.intercept, fontsize, 74 | dimension, show.weights, file.rep, ...) 75 | } 76 | } 77 | else { 78 | if (is.character(file) && file.exists(file)) 79 | stop(sprintf("%s already exists", sQuote(file))) 80 | result.matrix <- t(net$result.matrix) 81 | if (rep == "best") 82 | rep <- as.integer(which.min(result.matrix[, "error"])) 83 | if (rep > length(net$weights)) 84 | stop("'rep' does not exist") 85 | weights <- net$weights[[rep]] 86 | if (is.null(x.entry)) 87 | x.entry <- 0.5 - (arrow.length/2) * length(weights) 88 | if (is.null(x.out)) 89 | x.out <- 0.5 + (arrow.length/2) * length(weights) 90 | width <- max(x.out - x.entry + 0.2, 0.8) * 8 91 | radius <- radius/dimension 92 | entry.label <- net$model.list$variables 93 | out.label <- net$model.list$response 94 | neuron.count <- array(0, length(weights) + 1) 95 | neuron.count[1] <- nrow(weights[[1]]) - 1 96 | neuron.count[2] <- ncol(weights[[1]]) 97 | x.position <- array(0, length(weights) + 1) 98 | x.position[1] <- x.entry 99 | x.position[length(weights) + 1] <- x.out 100 | if (length(weights) > 1) 101 | for (i in 2:length(weights)) { 102 | neuron.count[i + 1] <- ncol(weights[[i]]) 103 | x.position[i] <- x.entry + (i - 1) * (x.out - 104 | x.entry)/length(weights) 105 | } 106 | y.step <- 1/(neuron.count + 1) 107 | y.position <- array(0, length(weights) + 1) 108 | y.intercept <- 1 - 2 * radius 109 | information.pos <- min(min(y.step) - 0.1, 0.2) 110 | if (length(entry.label) != neuron.count[1]) { 111 | if (length(entry.label) < neuron.count[1]) { 112 | tmp <- NULL 113 | for (i in 1:(neuron.count[1] - length(entry.label))) { 114 | tmp <- c(tmp, "no name") 115 | } 116 | entry.label <- c(entry.label, tmp) 117 | } 118 | } 119 | if (length(out.label) != neuron.count[length(neuron.count)]) { 120 | if (length(out.label) < neuron.count[length(neuron.count)]) { 121 | tmp <- NULL 122 | for (i in 1:(neuron.count[length(neuron.count)] - 123 | length(out.label))) { 124 | tmp <- c(tmp, "no name") 125 | } 126 | out.label <- c(out.label, tmp) 127 | } 128 | } 129 | grid::grid.newpage() 130 | for (k in 1:length(weights)) { 131 | for (i in 1:neuron.count[k]) { 132 | y.position[k] <- y.position[k] + y.step[k] 133 | y.tmp <- 0 134 | for (j in 1:neuron.count[k + 1]) { 135 | y.tmp <- y.tmp + y.step[k + 1] 136 | result <- calculate.delta(c(x.position[k], 137 | x.position[k + 1]), c(y.position[k], y.tmp), 138 | radius) 139 | x <- c(x.position[k], x.position[k + 1] - result[1]) 140 | y <- c(y.position[k], y.tmp + result[2]) 141 | grid::grid.lines(x = x, y = y, arrow = grid::arrow(length = grid::unit(0.15, 142 | "cm"), type = "closed"), gp = grid::gpar(fill = col.hidden.synapse, 143 | col = col.hidden.synapse, ...)) 144 | if (show.weights) 145 | draw.text(label = weights[[k]][neuron.count[k] - 146 | i + 2, neuron.count[k + 1] - j + 1], x = c(x.position[k], 147 | x.position[k + 1]), y = c(y.position[k], 148 | y.tmp), xy.null = 1.25 * result, color = col.hidden.synapse, 149 | fontsize = fontsize - 2, ...) 150 | } 151 | if (k == 1) { 152 | grid::grid.lines(x = c((x.position[1] - arrow.length), 153 | x.position[1] - radius), y = y.position[k], 154 | arrow = grid::arrow(length = grid::unit(0.15, "cm"), 155 | type = "closed"), gp = grid::gpar(fill = col.entry.synapse, 156 | col = col.entry.synapse, ...)) 157 | draw.text(label = entry.label[(neuron.count[1] + 158 | 1) - i], x = c((x.position - arrow.length), 159 | x.position[1] - radius), y = c(y.position[k], 160 | y.position[k]), xy.null = c(0, 0), color = col.entry.synapse, 161 | fontsize = fontsize, ...) 162 | grid::grid.circle(x = x.position[k], y = y.position[k], 163 | r = radius, gp = grid::gpar(fill = "white", col = col.entry, 164 | ...)) 165 | } 166 | else { 167 | grid::grid.circle(x = x.position[k], y = y.position[k], 168 | r = radius, gp = grid::gpar(fill = "white", col = col.hidden, 169 | ...)) 170 | } 171 | } 172 | } 173 | out <- length(neuron.count) 174 | for (i in 1:neuron.count[out]) { 175 | y.position[out] <- y.position[out] + y.step[out] 176 | grid::grid.lines(x = c(x.position[out] + radius, x.position[out] + 177 | arrow.length), y = y.position[out], arrow = grid::arrow(length = grid::unit(0.15, 178 | "cm"), type = "closed"), gp = grid::gpar(fill = col.out.synapse, 179 | col = col.out.synapse, ...)) 180 | draw.text(label = out.label[(neuron.count[out] + 181 | 1) - i], x = c((x.position[out] + radius), x.position[out] + 182 | arrow.length), y = c(y.position[out], y.position[out]), 183 | xy.null = c(0, 0), color = col.out.synapse, fontsize = fontsize, 184 | ...) 185 | grid::grid.circle(x = x.position[out], y = y.position[out], 186 | r = radius, gp = grid::gpar(fill = "white", col = col.out, 187 | ...)) 188 | } 189 | if (intercept) { 190 | for (k in 1:length(weights)) { 191 | y.tmp <- 0 192 | x.intercept <- (x.position[k + 1] - x.position[k]) * 193 | intercept.factor + x.position[k] 194 | for (i in 1:neuron.count[k + 1]) { 195 | y.tmp <- y.tmp + y.step[k + 1] 196 | result <- calculate.delta(c(x.intercept, x.position[k + 197 | 1]), c(y.intercept, y.tmp), radius) 198 | x <- c(x.intercept, x.position[k + 1] - result[1]) 199 | y <- c(y.intercept, y.tmp + result[2]) 200 | grid::grid.lines(x = x, y = y, arrow = grid::arrow(length = grid::unit(0.15, 201 | "cm"), type = "closed"), gp = grid::gpar(fill = col.intercept, 202 | col = col.intercept, ...)) 203 | xy.null <- cbind(x.position[k + 1] - x.intercept - 204 | 2 * result[1], -(y.tmp - y.intercept + 2 * 205 | result[2])) 206 | if (show.weights) 207 | draw.text(label = weights[[k]][1, neuron.count[k + 208 | 1] - i + 1], x = c(x.intercept, x.position[k + 209 | 1]), y = c(y.intercept, y.tmp), xy.null = xy.null, 210 | color = col.intercept, alignment = c("right", 211 | "bottom"), fontsize = fontsize - 2, ...) 212 | } 213 | grid::grid.circle(x = x.intercept, y = y.intercept, 214 | r = radius, gp = grid::gpar(fill = "white", col = col.intercept, 215 | ...)) 216 | grid::grid.text(1, x = x.intercept, y = y.intercept, 217 | gp = grid::gpar(col = col.intercept, ...)) 218 | } 219 | } 220 | if (information) 221 | grid::grid.text(paste("Error: ", round(result.matrix[rep, 222 | "error"], 6), " Steps: ", result.matrix[rep, 223 | "steps"], sep = ""), x = 0.5, y = information.pos, 224 | just = "bottom", gp = grid::gpar(fontsize = fontsize + 225 | 2, ...)) 226 | if (!is.null(file)) { 227 | weight.plot <- grDevices::recordPlot() 228 | save(weight.plot, file = file) 229 | } 230 | } 231 | } 232 | calculate.delta <- 233 | function (x, y, r) 234 | { 235 | delta.x <- x[2] - x[1] 236 | delta.y <- y[2] - y[1] 237 | x.null <- r/sqrt(delta.x^2 + delta.y^2) * delta.x 238 | if (y[1] < y[2]) 239 | y.null <- -sqrt(r^2 - x.null^2) 240 | else if (y[1] > y[2]) 241 | y.null <- sqrt(r^2 - x.null^2) 242 | else y.null <- 0 243 | c(x.null, y.null) 244 | } 245 | draw.text <- 246 | function (label, x, y, xy.null = c(0, 0), color, alignment = c("left", 247 | "bottom"), ...) 248 | { 249 | x.label <- x[1] + xy.null[1] 250 | y.label <- y[1] - xy.null[2] 251 | x.delta <- x[2] - x[1] 252 | y.delta <- y[2] - y[1] 253 | angle = atan(y.delta/x.delta) * (180/pi) 254 | if (angle < 0) 255 | angle <- angle + 0 256 | else if (angle > 0) 257 | angle <- angle - 0 258 | if (is.numeric(label)) 259 | label <- round(label, 5) 260 | vp <- grid::viewport(x = x.label, y = y.label, width = 0, height = , 261 | angle = angle, name = "vp1", just = alignment) 262 | grid::grid.text(label, x = 0, y = grid::unit(0.75, "mm"), just = alignment, 263 | gp = grid::gpar(col = color, ...), vp = vp) 264 | } 265 | -------------------------------------------------------------------------------- /R/confidence.interval.r: -------------------------------------------------------------------------------- 1 | #' Calculates confidence intervals of the weights 2 | #' 3 | #' \code{confidence.interval}, a method for objects of class \code{nn}, 4 | #' typically produced by \code{neuralnet}. Calculates confidence intervals of 5 | #' the weights (White, 1989) and the network information criteria NIC (Murata 6 | #' et al. 1994). All confidence intervals are calculated under the assumption 7 | #' of a local identification of the given neural network. If this assumption 8 | #' is violated, the results will not be reasonable. Please make also sure that 9 | #' the chosen error function equals the negative log-likelihood function, 10 | #' otherwise the results are not meaningfull, too. 11 | #' 12 | #' 13 | #' @param x neural network 14 | #' @param alpha numerical. Sets the confidence level to (1-alpha). 15 | #' @return \code{confidence.interval} returns a list containing the following 16 | #' components: 17 | #' 18 | #' \item{ lower.ci }{a list containing the lower confidence bounds of all 19 | #' weights of the neural network differentiated by the repetitions.} \item{ 20 | #' upper.ci }{a list containing the upper confidence bounds of all weights of 21 | #' the neural network differentiated by the repetitions.} \item{ nic }{a vector 22 | #' containg the information criteria NIC for every repetition.} 23 | #' @author Stefan Fritsch, Frauke Guenther \email{guenther@@leibniz-bips.de} 24 | #' @seealso \code{\link{neuralnet}} 25 | #' @references White (1989) \emph{Learning in artificial neural networks. A 26 | #' statistical perspective.} Neural Computation (1), pages 425-464 27 | #' 28 | #' Murata et al. (1994) \emph{Network information criterion - determining the 29 | #' number of hidden units for an artificial neural network model.} IEEE 30 | #' Transactions on Neural Networks 5 (6), pages 865-871 31 | #' @keywords neural 32 | #' @examples 33 | #' 34 | #' data(infert, package="datasets") 35 | #' print(net.infert <- neuralnet(case~parity+induced+spontaneous, 36 | #' infert, err.fct="ce", linear.output=FALSE)) 37 | #' confidence.interval(net.infert) 38 | #' 39 | #' @export confidence.interval 40 | confidence.interval <- 41 | function (x, alpha = 0.05) 42 | { 43 | net <- x 44 | covariate <- cbind(1, net$covariate) 45 | response <- net$response 46 | err.fct <- net$err.fct 47 | act.fct <- net$act.fct 48 | linear.output <- net$linear.output 49 | exclude <- net$exclude 50 | list.weights <- net$weights 51 | rep <- length(list.weights) 52 | if (is.null(list.weights)) 53 | stop("weights were not calculated") 54 | nrow.weights <- sapply(list.weights[[1]], nrow) 55 | ncol.weights <- sapply(list.weights[[1]], ncol) 56 | lower.ci <- NULL 57 | upper.ci <- NULL 58 | nic <- NULL 59 | for (i in 1:rep) { 60 | weights <- list.weights[[i]] 61 | error <- net$result.matrix["error", i] 62 | if (length(weights) > 2) 63 | stop("nic and confidence intervals will not be calculated for more than one hidden layer of neurons", 64 | call. = FALSE) 65 | result.nic <- calculate.information.matrices(covariate, 66 | response, weights, err.fct, act.fct, exclude, linear.output) 67 | nic <- c(nic, error + result.nic$trace) 68 | if (!is.null(result.nic$variance)) { 69 | if (all(diag(result.nic$variance) >= 0)) { 70 | weights.vector <- unlist(weights) 71 | if (!is.null(exclude)) { 72 | d <- rep(NA, length(weights.vector)) 73 | d[-exclude] <- stats::qnorm(1 - alpha/2) * sqrt(diag(result.nic$variance))/sqrt(nrow(covariate)) 74 | } 75 | else { 76 | d <- stats::qnorm(1 - alpha/2) * sqrt(diag(result.nic$variance))/sqrt(nrow(covariate)) 77 | } 78 | lower.ci <- c(lower.ci, list(relist(weights.vector - 79 | d, nrow.weights, ncol.weights))) 80 | upper.ci <- c(upper.ci, list(relist(weights.vector + 81 | d, nrow.weights, ncol.weights))) 82 | } 83 | } 84 | } 85 | if (length(lower.ci) < rep) 86 | warning(sprintf("%s of %s repetition(s) could not calculate confidence intervals for the weights; varify that the neural network does not contain irrelevant neurons", 87 | length(lower.ci), rep), call. = F) 88 | list(lower.ci = lower.ci, upper.ci = upper.ci, nic = nic) 89 | } 90 | calculate.information.matrices <- 91 | function (covariate, response, weights, err.fct, act.fct, exclude, 92 | linear.output) 93 | { 94 | temp <- act.fct 95 | if (attr(act.fct, "type") == "logistic") { 96 | act.deriv.fct <- function(x) { 97 | act.fct(x) * (1 - act.fct(x)) 98 | } 99 | act.deriv2.fct <- function(x) { 100 | act.fct(x) * (1 - act.fct(x)) * (1 - (2 * act.fct(x))) 101 | } 102 | } 103 | else { 104 | attr(temp, "type") <- NULL 105 | act.deriv.fct <- Deriv::Deriv(temp, nderiv = 1, x = "x") 106 | act.deriv2.fct <- Deriv::Deriv(temp, nderiv = 2, x = "x") 107 | } 108 | temp <- err.fct 109 | attr(temp, "type") <- NULL 110 | err.deriv.fct <- Deriv::Deriv(temp, nderiv = 1, x = "x") 111 | err.deriv2.fct <- Deriv::Deriv(temp, nderiv = 2, x = "x") 112 | length.weights <- length(weights) 113 | nrow.weights <- sapply(weights, nrow) 114 | ncol.weights <- sapply(weights, ncol) 115 | if (linear.output) { 116 | output.act.fct <- function(x) { 117 | x 118 | } 119 | output.act.deriv.fct <- function(x) { 120 | matrix(1, nrow(x), ncol(x)) 121 | } 122 | output.act.deriv2.fct <- function(x) { 123 | matrix(0, nrow(x), ncol(x)) 124 | } 125 | } 126 | else { 127 | output.act.fct <- act.fct 128 | output.act.deriv.fct <- act.deriv.fct 129 | output.act.deriv2.fct <- act.deriv2.fct 130 | } 131 | neuron.deriv <- NULL 132 | neuron.deriv2 <- NULL 133 | neurons <- list(covariate) 134 | if (length.weights > 1) 135 | for (i in 1:(length.weights - 1)) { 136 | temp <- neurons[[i]] %*% weights[[i]] 137 | act.temp <- act.fct(temp) 138 | neuron.deriv[[i]] <- act.deriv.fct(temp) 139 | neuron.deriv2[[i]] <- act.deriv2.fct(temp) 140 | neurons[[i + 1]] <- cbind(1, act.temp) 141 | } 142 | if (!is.list(neuron.deriv)) 143 | neuron.deriv <- list(neuron.deriv) 144 | if (!is.list(neuron.deriv2)) 145 | neuron.deriv2 <- list(neuron.deriv2) 146 | temp <- neurons[[length.weights]] %*% weights[[length.weights]] 147 | net.result <- output.act.fct(temp) 148 | neuron.deriv[[length.weights]] <- output.act.deriv.fct(temp) 149 | neuron.deriv2[[length.weights]] <- output.act.deriv2.fct(temp) 150 | err.deriv <- err.deriv.fct(net.result, response) 151 | err.deriv2 <- err.deriv2.fct(net.result, response) 152 | if (any(is.na(unlist(neuron.deriv2)))) { 153 | return(list(nic = NA, hessian = NULL)) 154 | warning("neuron.deriv2 contains NA; this might be caused by a wrong choice of 'act.fct'") 155 | } 156 | if (any(is.na(err.deriv)) || any(is.na(err.deriv2))) { 157 | if (attr(err.fct, "type") == "ce") { 158 | one <- which(net.result == 1) 159 | if (length(one) > 0) { 160 | for (i in 1:length(one)) { 161 | if (response[one[i]] == 1) { 162 | err.deriv[one[i]] <- 1 163 | err.deriv2[one[i]] <- 1 164 | } 165 | } 166 | } 167 | zero <- which(net.result == 0) 168 | if (length(zero) > 0) { 169 | for (i in 1:length(zero)) { 170 | if (response[zero[i]] == 0) { 171 | err.deriv[zero[i]] <- 1 172 | err.deriv2[zero[i]] <- -1 173 | } 174 | } 175 | } 176 | } 177 | } 178 | if (any(is.na(err.deriv))) { 179 | return(list(nic = NA, hessian = NULL)) 180 | warning("err.deriv contains NA; this might be caused by a wrong choice of 'act.fct'") 181 | } 182 | if (any(is.na(err.deriv2))) { 183 | return(list(nic = NA, hessian = NULL)) 184 | warning("err.deriv2 contains NA; this might be caused by a wrong choice of 'act.fct'") 185 | } 186 | if (length.weights == 2) { 187 | length.betha <- (nrow.weights * ncol.weights)[1] 188 | length.alpha <- (nrow.weights * ncol.weights)[2] 189 | total.length.weights <- length.alpha + length.betha 190 | betha.ind <- matrix(1:length.betha, nrow = nrow.weights[1], 191 | ncol = ncol.weights[1]) 192 | alpha.ind <- matrix(1:length.alpha, nrow = nrow.weights[2], 193 | ncol = ncol.weights[2]) 194 | Hesse <- matrix(NA, nrow = total.length.weights, ncol = total.length.weights) 195 | Cross.Gradient <- matrix(NA, nrow = total.length.weights, 196 | ncol = total.length.weights) 197 | Cross.Gradient2 <- matrix(NA, nrow = total.length.weights, 198 | ncol = total.length.weights) 199 | for (i in 1:total.length.weights) { 200 | for (j in 1:total.length.weights) { 201 | if (is.null(exclude) || all(i != exclude & j != 202 | exclude)) { 203 | if (i <= length.betha) { 204 | temp <- which(betha.ind == i, arr.ind = T) 205 | r <- temp[1] 206 | s <- temp[2] 207 | } 208 | else { 209 | temp <- which(alpha.ind == (i - length.betha), 210 | arr.ind = T) 211 | r <- temp[1] 212 | s <- temp[2] 213 | } 214 | if (j <= length.betha) { 215 | temp <- which(betha.ind == j, arr.ind = T) 216 | u <- temp[1] 217 | v <- temp[2] 218 | } 219 | else { 220 | temp <- which(alpha.ind == (j - length.betha), 221 | arr.ind = T) 222 | u <- temp[1] 223 | v <- temp[2] 224 | } 225 | if ((i <= length.betha) && (j <= length.betha)) { 226 | Cross.Gradient[i, j] <- sum(((err.deriv^2 * 227 | neuron.deriv[[2]]^2) %*% (weights[[2]][(s + 228 | 1), ] * weights[[2]][(v + 1), ])) * neuron.deriv[[1]][, 229 | s] * neurons[[1]][, r] * neuron.deriv[[1]][, 230 | v] * neurons[[1]][, u]) 231 | Cross.Gradient2[i, j] <- sum(((err.deriv2 * 232 | neuron.deriv[[2]]^2) %*% (weights[[2]][(s + 233 | 1), ] * weights[[2]][(v + 1), ])) * neuron.deriv[[1]][, 234 | s] * neurons[[1]][, r] * neuron.deriv[[1]][, 235 | v] * neurons[[1]][, u]) 236 | if (s == v) 237 | Hesse[i, j] <- sum(neuron.deriv[[1]][, 238 | s] * neurons[[1]][, r] * neuron.deriv[[1]][, 239 | v] * neurons[[1]][, u] * ((neuron.deriv2[[2]] * 240 | err.deriv) %*% (weights[[2]][(s + 1), 241 | ] * weights[[2]][(v + 1), ]))) + sum(neuron.deriv2[[1]][, 242 | s] * neurons[[1]][, r] * neurons[[1]][, 243 | u] * ((neuron.deriv[[2]] * err.deriv) %*% 244 | weights[[2]][(s + 1), ])) 245 | else Hesse[i, j] <- sum(neuron.deriv[[1]][, 246 | s] * neurons[[1]][, r] * neuron.deriv[[1]][, 247 | v] * neurons[[1]][, u] * ((neuron.deriv2[[2]] * 248 | err.deriv) %*% (weights[[2]][(s + 1), ] * 249 | weights[[2]][(v + 1), ]))) 250 | } 251 | else if ((i > length.betha) && (j > length.betha)) { 252 | if (v == s) { 253 | Cross.Gradient[i, j] <- sum(err.deriv[, 254 | v]^2 * (neuron.deriv[[2]][, s] * neurons[[2]][, 255 | r] * neuron.deriv[[2]][, v] * neurons[[2]][, 256 | u])) 257 | Cross.Gradient2[i, j] <- sum(err.deriv2[, 258 | v] * (neuron.deriv[[2]][, s] * neurons[[2]][, 259 | r] * neuron.deriv[[2]][, v] * neurons[[2]][, 260 | u])) 261 | } 262 | else { 263 | Cross.Gradient[i, j] <- 0 264 | Cross.Gradient2[i, j] <- 0 265 | } 266 | if (v == s) 267 | Hesse[i, j] <- sum(neuron.deriv2[[2]][, 268 | s] * err.deriv[, s] * neurons[[2]][, 269 | u] * neurons[[2]][, r]) 270 | else Hesse[i, j] <- 0 271 | } 272 | else if ((i > length.betha) && (j <= length.betha)) { 273 | Cross.Gradient[i, j] <- sum(err.deriv[, s]^2 * 274 | (neuron.deriv[[2]][, s] * neurons[[2]][, 275 | r] * (neuron.deriv[[2]][, s] * weights[[2]][(v + 276 | 1), s]) * neuron.deriv[[1]][, v] * neurons[[1]][, 277 | u])) 278 | Cross.Gradient2[i, j] <- sum(err.deriv2[, 279 | s] * (neuron.deriv[[2]][, s] * neurons[[2]][, 280 | r] * (neuron.deriv[[2]][, s] * weights[[2]][(v + 281 | 1), s]) * neuron.deriv[[1]][, v] * neurons[[1]][, 282 | u])) 283 | if (v == r) 284 | Hesse[i, j] <- sum(neurons[[2]][, r] * 285 | neuron.deriv[[1]][, v] * neurons[[1]][, 286 | u] * neuron.deriv2[[2]][, s] * err.deriv[, 287 | s] * weights[[2]][(v + 1), s]) + sum(neuron.deriv[[2]][, 288 | s] * err.deriv[, s] * neurons[[1]][, 289 | u] * neuron.deriv[[1]][, v]) 290 | else Hesse[i, j] <- sum(neurons[[2]][, r] * 291 | neuron.deriv[[1]][, v] * neurons[[1]][, 292 | u] * neuron.deriv2[[2]][, s] * err.deriv[, 293 | s] * weights[[2]][(v + 1), s]) 294 | } 295 | else { 296 | Cross.Gradient[i, j] <- sum(err.deriv[, v]^2 * 297 | (neuron.deriv[[2]][, v] * neurons[[2]][, 298 | u] * (neuron.deriv[[2]][, v] * weights[[2]][(s + 299 | 1), v]) * neuron.deriv[[1]][, s] * neurons[[1]][, 300 | r])) 301 | Cross.Gradient2[i, j] <- sum(err.deriv2[, 302 | v] * (neuron.deriv[[2]][, v] * neurons[[2]][, 303 | u] * (neuron.deriv[[2]][, v] * weights[[2]][(s + 304 | 1), v]) * neuron.deriv[[1]][, s] * neurons[[1]][, 305 | r])) 306 | if (s == u) 307 | Hesse[i, j] <- sum(neurons[[2]][, u] * 308 | neuron.deriv[[1]][, s] * neurons[[1]][, 309 | r] * neuron.deriv2[[2]][, v] * err.deriv[, 310 | v] * weights[[2]][(s + 1), v]) + sum(neuron.deriv[[2]][, 311 | v] * err.deriv[, v] * neurons[[1]][, 312 | r] * neuron.deriv[[1]][, s]) 313 | else Hesse[i, j] <- sum(neurons[[2]][, u] * 314 | neuron.deriv[[1]][, s] * neurons[[1]][, 315 | r] * neuron.deriv2[[2]][, v] * err.deriv[, 316 | v] * weights[[2]][(s + 1), v]) 317 | } 318 | } 319 | } 320 | } 321 | } 322 | else if (length.weights == 1) { 323 | length.alpha <- sum(nrow.weights * ncol.weights) 324 | alpha.ind <- matrix(1:length.alpha, nrow = nrow.weights[1], 325 | ncol = ncol.weights[1]) 326 | Hesse <- matrix(NA, nrow = length.alpha, ncol = length.alpha) 327 | Cross.Gradient <- matrix(NA, nrow = length.alpha, ncol = length.alpha) 328 | Cross.Gradient2 <- matrix(NA, nrow = length.alpha, ncol = length.alpha) 329 | for (i in 1:length.alpha) { 330 | for (j in 1:length.alpha) { 331 | if (is.null(exclude) || all(i != exclude & j != 332 | exclude)) { 333 | r <- which(alpha.ind == i, arr.ind = T)[1] 334 | s <- which(alpha.ind == i, arr.ind = T)[2] 335 | u <- which(alpha.ind == j, arr.ind = T)[1] 336 | v <- which(alpha.ind == j, arr.ind = T)[2] 337 | if (s == v) { 338 | Hesse[i, j] <- sum(neuron.deriv2[[1]][, s] * 339 | err.deriv[, s] * neurons[[1]][, r] * neurons[[1]][, 340 | u]) 341 | Cross.Gradient[i, j] <- sum(neuron.deriv[[1]][, 342 | s]^2 * err.deriv[, s]^2 * neurons[[1]][, 343 | r] * neurons[[1]][, u]) 344 | Cross.Gradient2[i, j] <- sum(neuron.deriv[[1]][, 345 | s]^2 * err.deriv2[, s] * neurons[[1]][, 346 | r] * neurons[[1]][, u]) 347 | } 348 | else { 349 | Hesse[i, j] <- 0 350 | Cross.Gradient[i, j] <- 0 351 | Cross.Gradient2[i, j] <- 0 352 | } 353 | } 354 | } 355 | } 356 | } 357 | B <- Cross.Gradient/nrow(neurons[[1]]) 358 | A <- (Cross.Gradient2 + Hesse)/nrow(neurons[[1]]) 359 | if (!is.null(exclude)) { 360 | B <- as.matrix(B[-exclude, -exclude]) 361 | A <- as.matrix(A[-exclude, -exclude]) 362 | } 363 | if (det(A) == 0) { 364 | trace <- NA 365 | variance <- NULL 366 | } 367 | else { 368 | A.inv <- MASS::ginv(A) 369 | variance <- A.inv %*% B %*% A.inv 370 | trace <- sum(diag(B %*% A.inv)) 371 | } 372 | return(list(trace = trace, variance = variance)) 373 | } 374 | -------------------------------------------------------------------------------- /R/neuralnet.r: -------------------------------------------------------------------------------- 1 | #' Training of neural networks 2 | #' 3 | #' Train neural networks using backpropagation, 4 | #' resilient backpropagation (RPROP) with (Riedmiller, 1994) or without weight 5 | #' backtracking (Riedmiller and Braun, 1993) or the modified globally 6 | #' convergent version (GRPROP) by Anastasiadis et al. (2005). The function 7 | #' allows flexible settings through custom-choice of error and activation 8 | #' function. Furthermore, the calculation of generalized weights (Intrator O. 9 | #' and Intrator N., 1993) is implemented. 10 | #' 11 | #' The globally convergent algorithm is based on the resilient backpropagation 12 | #' without weight backtracking and additionally modifies one learning rate, 13 | #' either the learningrate associated with the smallest absolute gradient (sag) 14 | #' or the smallest learningrate (slr) itself. The learning rates in the grprop 15 | #' algorithm are limited to the boundaries defined in learningrate.limit. 16 | #' 17 | #' @aliases neuralnet print.nn 18 | #' @param formula a symbolic description of the model to be fitted. 19 | #' @param data a data frame containing the variables specified in 20 | #' \code{formula}. 21 | #' @param hidden a vector of integers specifying the number of hidden neurons 22 | #' (vertices) in each layer. 23 | #' @param threshold a numeric value specifying the threshold for the partial 24 | #' derivatives of the error function as stopping criteria. 25 | #' @param stepmax the maximum steps for the training of the neural network. 26 | #' Reaching this maximum leads to a stop of the neural network's training 27 | #' process. 28 | #' @param rep the number of repetitions for the neural network's training. 29 | #' @param startweights a vector containing starting values for the weights. 30 | #' Set to \code{NULL} for random initialization. 31 | #' @param learningrate.limit a vector or a list containing the lowest and 32 | #' highest limit for the learning rate. Used only for RPROP and GRPROP. 33 | #' @param learningrate.factor a vector or a list containing the multiplication 34 | #' factors for the upper and lower learning rate. Used only for RPROP and 35 | #' GRPROP. 36 | #' @param learningrate a numeric value specifying the learning rate used by 37 | #' traditional backpropagation. Used only for traditional backpropagation. 38 | #' @param lifesign a string specifying how much the function will print during 39 | #' the calculation of the neural network. 'none', 'minimal' or 'full'. 40 | #' @param lifesign.step an integer specifying the stepsize to print the minimal 41 | #' threshold in full lifesign mode. 42 | #' @param algorithm a string containing the algorithm type to calculate the 43 | #' neural network. The following types are possible: 'backprop', 'rprop+', 44 | #' 'rprop-', 'sag', or 'slr'. 'backprop' refers to backpropagation, 'rprop+' 45 | #' and 'rprop-' refer to the resilient backpropagation with and without weight 46 | #' backtracking, while 'sag' and 'slr' induce the usage of the modified 47 | #' globally convergent algorithm (grprop). See Details for more information. 48 | #' @param err.fct a differentiable function that is used for the calculation of 49 | #' the error. Alternatively, the strings 'sse' and 'ce' which stand for the sum 50 | #' of squared errors and the cross-entropy can be used. 51 | #' @param act.fct a differentiable function that is used for smoothing the 52 | #' result of the cross product of the covariate or neurons and the weights. 53 | #' Additionally the strings, 'logistic', 'tanh' and 'relu' are possible for the 54 | #' logistic function, tangent hyperbolicus and rectified linear unit. 55 | #' @param output.act.fct activation function used in the output layer 56 | #' (if \code{linear.output = FALSE}). Use same functions/strings as in \code{act.fct}. 57 | #' @param linear.output logical. If output.act.fct should not be applied to the output 58 | #' neurons set linear output to TRUE, otherwise to FALSE. 59 | #' @param exclude a vector or a matrix specifying the weights, that are 60 | #' excluded from the calculation. If given as a vector, the exact positions of 61 | #' the weights must be known. A matrix with n-rows and 3 columns will exclude n 62 | #' weights, where the first column stands for the layer, the second column for 63 | #' the input neuron and the third column for the output neuron of the weight. 64 | #' @param constant.weights a vector specifying the values of the weights that 65 | #' are excluded from the training process and treated as fix. 66 | #' @param likelihood logical. If the error function is equal to the negative 67 | #' log-likelihood function, the information criteria AIC and BIC will be 68 | #' calculated. Furthermore the usage of confidence.interval is meaningful. 69 | #' 70 | #' @return \code{neuralnet} returns an object of class \code{nn}. An object of 71 | #' class \code{nn} is a list containing at most the following components: 72 | #' 73 | #' \item{ call }{ the matched call. } 74 | #' \item{ response }{ extracted from the \code{data argument}. } 75 | #' \item{ covariate }{ the variables extracted from the \code{data argument}. } 76 | #' \item{ model.list }{ a list containing the covariates and the response variables extracted from the \code{formula argument}. } 77 | #' \item{ err.fct }{ the error function. } 78 | #' \item{ act.fct }{ the activation function. } 79 | #' \item{ data }{ the \code{data argument}.} 80 | #' \item{ net.result }{ a list containing the overall result of the neural network for every repetition.} 81 | #' \item{ weights }{ a list containing the fitted weights of the neural network for every repetition. } 82 | #' \item{ generalized.weights }{ a list containing the generalized weights of the neural network for every repetition. } 83 | #' \item{ result.matrix }{ a matrix containing the reached threshold, needed steps, error, AIC and BIC (if computed) and weights for every repetition. Each column represents one repetition. } 84 | #' \item{ startweights }{ a list containing the startweights of the neural network for every repetition. } 85 | #' 86 | #' @author Stefan Fritsch, Frauke Guenther, Marvin N. Wright 87 | #' 88 | #' @seealso \code{\link{plot.nn}} for plotting the neural network. 89 | #' 90 | #' \code{\link{gwplot}} for plotting the generalized weights. 91 | #' 92 | #' \code{\link{predict.nn}} for computation of a given neural network for given 93 | #' covariate vectors (formerly \code{compute}). 94 | #' 95 | #' \code{\link{confidence.interval}} for calculation of confidence intervals of 96 | #' the weights. 97 | #' 98 | #' \code{\link{prediction}} for a summary of the output of the neural network. 99 | #' 100 | #' @references Riedmiller M. (1994) \emph{Rprop - Description and 101 | #' Implementation Details.} Technical Report. University of Karlsruhe. 102 | #' 103 | #' Riedmiller M. and Braun H. (1993) \emph{A direct adaptive method for faster 104 | #' backpropagation learning: The RPROP algorithm.} Proceedings of the IEEE 105 | #' International Conference on Neural Networks (ICNN), pages 586-591. San 106 | #' Francisco. 107 | #' 108 | #' Anastasiadis A. et. al. (2005) \emph{New globally convergent training scheme 109 | #' based on the resilient propagation algorithm.} Neurocomputing 64, pages 110 | #' 253-270. 111 | #' 112 | #' Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for 113 | #' Interpretation of Nonlinear Models.} Proceedings of the Statistical 114 | #' Computing Section, 244-249 San Francisco: American Statistical Society 115 | #' (eds). 116 | #' @keywords neural 117 | #' @examples 118 | #' 119 | #' library(neuralnet) 120 | #' 121 | #' # Binary classification 122 | #' nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, iris, linear.output = FALSE) 123 | #' \dontrun{print(nn)} 124 | #' \dontrun{plot(nn)} 125 | #' 126 | #' # Multiclass classification 127 | #' nn <- neuralnet(Species ~ Petal.Length + Petal.Width, iris, linear.output = FALSE) 128 | #' \dontrun{print(nn)} 129 | #' \dontrun{plot(nn)} 130 | #' 131 | #' # Custom activation function 132 | #' softplus <- function(x) log(1 + exp(x)) 133 | #' nn <- neuralnet((Species == "setosa") ~ Petal.Length + Petal.Width, iris, 134 | #' linear.output = FALSE, hidden = c(3, 2), act.fct = softplus) 135 | #' \dontrun{print(nn)} 136 | #' \dontrun{plot(nn)} 137 | #' 138 | #' @import stats 139 | #' @export 140 | neuralnet <- 141 | function (formula, data, hidden = 1, threshold = 0.01, stepmax = 1e+05, 142 | rep = 1, startweights = NULL, learningrate.limit = NULL, 143 | learningrate.factor = list(minus = 0.5, plus = 1.2), learningrate = NULL, 144 | lifesign = "none", lifesign.step = 1000, algorithm = "rprop+", 145 | err.fct = "sse", act.fct = "logistic", output.act.fct = "logistic", linear.output = TRUE, 146 | exclude = NULL, constant.weights = NULL, likelihood = FALSE) { 147 | 148 | # Save call 149 | call <- match.call() 150 | 151 | # Check arguments 152 | if (is.null(data)) { 153 | stop("Missing 'data' argument.", call. = FALSE) 154 | } 155 | data <- as.data.frame(data) 156 | 157 | if (is.null(formula)) { 158 | stop("Missing 'formula' argument.", call. = FALSE) 159 | } 160 | formula <- stats::as.formula(formula) 161 | 162 | # Learning rate limit 163 | if (!is.null(learningrate.limit)) { 164 | if (length(learningrate.limit) != 2) { 165 | stop("Argument 'learningrate.factor' must consist of two components.", 166 | call. = FALSE) 167 | } 168 | learningrate.limit <- as.list(learningrate.limit) 169 | names(learningrate.limit) <- c("min", "max") 170 | 171 | if (is.na(learningrate.limit$min) || is.na(learningrate.limit$max)) { 172 | stop("'learningrate.limit' must be a numeric vector", 173 | call. = FALSE) 174 | } 175 | } else { 176 | learningrate.limit <- list(min = 1e-10, max = 0.1) 177 | } 178 | 179 | # Learning rate factor 180 | if (!is.null(learningrate.factor)) { 181 | if (length(learningrate.factor) != 2) { 182 | stop("Argument 'learningrate.factor' must consist of two components.", 183 | call. = FALSE) 184 | } 185 | learningrate.factor <- as.list(learningrate.factor) 186 | names(learningrate.factor) <- c("minus", "plus") 187 | 188 | if (is.na(learningrate.factor$minus) || is.na(learningrate.factor$plus)) { 189 | stop("'learningrate.factor' must be a numeric vector", 190 | call. = FALSE) 191 | } 192 | } else { 193 | learningrate.factor <- list(minus = 0.5, plus = 1.2) 194 | } 195 | 196 | # Learning rate (backprop) 197 | if (algorithm == "backprop") { 198 | if (is.null(learningrate) || !is.numeric(learningrate)) { 199 | stop("Argument 'learningrate' must be a numeric value, if the backpropagation algorithm is used.", 200 | call. = FALSE) 201 | } 202 | } 203 | 204 | # TODO: Rename? 205 | # Lifesign 206 | if (!(lifesign %in% c("none", "minimal", "full"))) { 207 | stop("Argument 'lifesign' must be one of 'none', 'minimal', 'full'.", call. = FALSE) 208 | } 209 | 210 | # Algorithm 211 | if (!(algorithm %in% c("rprop+", "rprop-", "slr", "sag", "backprop"))) { 212 | stop("Unknown algorithm.", call. = FALSE) 213 | } 214 | 215 | # Threshold 216 | if (is.na(threshold)) { 217 | stop("Argument 'threshold' must be a numeric value.", call. = FALSE) 218 | } 219 | 220 | # Hidden units 221 | if (any(is.na(hidden))) { 222 | stop("Argument 'hidden' must be an integer vector or a single integer.", 223 | call. = FALSE) 224 | } 225 | if (length(hidden) > 1 && any(hidden == 0)) { 226 | stop("Argument 'hidden' contains at least one 0.", call. = FALSE) 227 | } 228 | 229 | # Replications 230 | if (is.na(rep)) { 231 | stop("Argument 'rep' must be an integer", call. = FALSE) 232 | } 233 | 234 | # Max steps 235 | if (is.na(stepmax)) { 236 | stop("Argument 'stepmax' must be an integer", call. = FALSE) 237 | } 238 | 239 | # Activation function 240 | if (!(is.function(act.fct) || act.fct %in% c("logistic", "tanh", "relu", "ReLu"))) { 241 | stop("Unknown activation function.", call. = FALSE) 242 | } 243 | if (!(is.function(output.act.fct) || output.act.fct %in% c("logistic", "tanh", "relu", "ReLu"))) { 244 | stop("Unknown output activation function.", call. = FALSE) 245 | } 246 | 247 | # Error function 248 | if (!(is.function(err.fct) || err.fct %in% c("sse", "ce"))) { 249 | stop("Unknown error function.", call. = FALSE) 250 | } 251 | 252 | # Formula interface 253 | model.list <- list(response = attr(terms(as.formula(call("~", formula[[2]]))), "term.labels"), 254 | variables = attr(terms(formula, data = data), "term.labels")) 255 | response <- as.matrix(model.frame(as.formula(call("~", formula[[2]])), data)) 256 | covariate <- cbind(intercept = 1, as.matrix(data[, model.list$variables])) 257 | 258 | # Multiclass response 259 | if (is.character(response)) { 260 | response <- model.matrix( ~ response[, 1] - 1) == 1 261 | colnames(response) <- gsub("response\\[, 1\\]", "", colnames(response)) 262 | model.list$response <- colnames(response) 263 | } 264 | 265 | # Activation function 266 | if (is.function(act.fct)) { 267 | act.deriv.fct <- Deriv::Deriv(act.fct) 268 | attr(act.fct, "type") <- "function" 269 | } else { 270 | converted.fct <- convert.activation.function(act.fct) 271 | act.fct <- converted.fct$fct 272 | act.deriv.fct <- converted.fct$deriv.fct 273 | } 274 | 275 | # Output activation function 276 | if (is.function(output.act.fct)) { 277 | output.act.deriv.fct <- Deriv::Deriv(output.act.fct) 278 | attr(output.act.fct, "type") <- "function" 279 | } else { 280 | converted.fct <- convert.activation.function(output.act.fct) 281 | output.act.fct <- converted.fct$fct 282 | output.act.deriv.fct <- converted.fct$deriv.fct 283 | } 284 | 285 | # Error function 286 | if (is.function(err.fct)) { 287 | attr(err.fct, "type") <- "function" 288 | err.deriv.fct <- Deriv::Deriv(err.fct, x = "x") 289 | } else { 290 | converted.fct <- convert.error.function(err.fct) 291 | err.fct <- converted.fct$fct 292 | err.deriv.fct <- converted.fct$deriv.fct 293 | } 294 | 295 | if (attr(err.fct, "type") == "ce" && !all(response %in% 0:1)) { 296 | stop("Error function 'ce' only implemented for binary response.", call. = FALSE) 297 | } 298 | 299 | # Fit network for each replication 300 | list.result <- lapply(1:rep, function(i) { 301 | # Show progress 302 | if (lifesign != "none") { 303 | lifesign <- display(hidden, threshold, rep, i, lifesign) 304 | } 305 | 306 | # Fit network 307 | calculate.neuralnet(learningrate.limit = learningrate.limit, 308 | learningrate.factor = learningrate.factor, covariate = covariate, 309 | response = response, data = data, model.list = model.list, 310 | threshold = threshold, lifesign.step = lifesign.step, 311 | stepmax = stepmax, hidden = hidden, lifesign = lifesign, 312 | startweights = startweights, algorithm = algorithm, 313 | err.fct = err.fct, err.deriv.fct = err.deriv.fct, 314 | act.fct = act.fct, act.deriv.fct = act.deriv.fct, 315 | output.act.fct = output.act.fct, output.act.deriv.fct = output.act.deriv.fct, 316 | rep = i, linear.output = linear.output, exclude = exclude, 317 | constant.weights = constant.weights, likelihood = likelihood, 318 | learningrate.bp = learningrate) 319 | }) 320 | null_reps <- sapply(list.result, function(x) {is.null(x$output.vector)}) 321 | matrix <- sapply(list.result[!null_reps], function(x) {x$output.vector}) 322 | if (is.matrix(matrix)) { 323 | ncol.matrix <- ncol(matrix) 324 | } else { 325 | list.result <- NULL 326 | matrix <- NULL 327 | ncol.matrix <- 0 328 | } 329 | 330 | # Warning if some replications did not converge 331 | if (ncol.matrix < rep) { 332 | warning(sprintf("Algorithm did not converge in %s of %s repetition(s) within the stepmax.", 333 | (rep - ncol.matrix), rep), call. = FALSE) 334 | } 335 | 336 | # Return output 337 | generate.output(covariate, call, rep, threshold, matrix, 338 | startweights, model.list, response, err.fct, act.fct, output.act.fct, 339 | data, list.result, linear.output, exclude) 340 | } 341 | 342 | # Display output of replication 343 | display <- function (hidden, threshold, rep, i.rep, lifesign) { 344 | message("hidden: ", paste(hidden, collapse = ", "), " thresh: ", 345 | threshold, " rep: ", strrep(" ", nchar(rep) - nchar(i.rep)), 346 | i.rep, "/", rep, " steps: ", appendLF = FALSE) 347 | utils::flush.console() 348 | 349 | if (lifesign == "full") { 350 | lifesign <- sum(nchar(hidden)) + 2 * length(hidden) - 351 | 2 + max(nchar(threshold)) + 2 * nchar(rep) + 41 352 | } 353 | return(lifesign) 354 | } 355 | 356 | # Generate output object 357 | generate.output <- function(covariate, call, rep, threshold, matrix, startweights, 358 | model.list, response, err.fct, act.fct, output.act.fct, 359 | data, list.result, linear.output, exclude) { 360 | 361 | nn <- list(call = call, response = response, covariate = covariate[, -1, drop = FALSE], 362 | model.list = model.list, err.fct = err.fct, act.fct = act.fct, output.act.fct = output.act.fct, 363 | linear.output = linear.output, data = data, exclude = exclude) 364 | 365 | if (!is.null(matrix)) { 366 | nn$net.result <- lapply(list.result, function(x) {x$net.result}) 367 | nn$weights <- lapply(list.result, function(x) {x$weights}) 368 | nn$generalized.weights <- lapply(list.result, function(x) {x$generalized.weights}) 369 | nn$startweights <- lapply(list.result, function(x) {x$startweights}) 370 | nn$result.matrix <- matrix 371 | null_reps <- sapply(list.result, function(x) {is.null(x$output.vector)}) 372 | rownames(nn$result.matrix) <- c(rownames(matrix)[rownames(matrix) != ""], 373 | get_weight_names(nn$weights[[which(!null_reps)[1]]], model.list)) 374 | } 375 | 376 | class(nn) <- c("nn") 377 | return(nn) 378 | } 379 | 380 | # Get names of all weights in network 381 | get_weight_names <- function(weights, model.list) { 382 | # All hidden unit names 383 | if (length(weights) > 1) { 384 | hidden_units <- lapply(1:(length(weights) - 1), function(i) { 385 | paste0(i, "layhid", 1:ncol(weights[[i]])) 386 | }) 387 | } else { 388 | hidden_units <- list() 389 | } 390 | 391 | # All unit names including input and output 392 | units <- c(list(model.list$variables), 393 | hidden_units, 394 | list(model.list$response)) 395 | 396 | # Combine each layer with the next, add intercept 397 | weight_names <- do.call(c, lapply(1:(length(units) - 1), function(i) { 398 | as.vector(outer(c("Intercept", units[[i]]), units[[i + 1]], paste, sep = ".to.")) 399 | })) 400 | return(weight_names) 401 | } 402 | -------------------------------------------------------------------------------- /R/fit_neuralnet.R: -------------------------------------------------------------------------------- 1 | calculate.neuralnet <- 2 | function (data, model.list, hidden, stepmax, rep, threshold, 3 | learningrate.limit, learningrate.factor, lifesign, covariate, 4 | response, lifesign.step, startweights, algorithm, act.fct, 5 | act.deriv.fct, output.act.fct, output.act.deriv.fct, 6 | err.fct, err.deriv.fct, linear.output, likelihood, 7 | exclude, constant.weights, learningrate.bp) 8 | { 9 | time.start.local <- Sys.time() 10 | result <- generate.startweights(model.list, hidden, startweights, 11 | rep, exclude, constant.weights) 12 | weights <- result$weights 13 | exclude <- result$exclude 14 | nrow.weights <- sapply(weights, nrow) 15 | ncol.weights <- sapply(weights, ncol) 16 | result <- rprop(weights = weights, threshold = threshold, 17 | response = response, covariate = covariate, learningrate.limit = learningrate.limit, 18 | learningrate.factor = learningrate.factor, stepmax = stepmax, 19 | lifesign = lifesign, lifesign.step = lifesign.step, act.fct = act.fct, 20 | act.deriv.fct = act.deriv.fct, output.act.fct = output.act.fct, 21 | output.act.deriv.fct = output.act.deriv.fct, 22 | err.fct = err.fct, err.deriv.fct = err.deriv.fct, 23 | algorithm = algorithm, linear.output = linear.output, 24 | exclude = exclude, learningrate.bp = learningrate.bp) 25 | startweights <- weights 26 | weights <- result$weights 27 | step <- result$step 28 | reached.threshold <- result$reached.threshold 29 | net.result <- result$net.result 30 | error <- sum(err.fct(net.result, response)) 31 | if (is.na(error) & attr(err.fct, "type") == "ce") 32 | if (all(net.result <= 1, net.result >= 0)) 33 | error <- sum(err.fct(net.result, response), na.rm = T) 34 | if (!is.null(constant.weights) && any(constant.weights != 35 | 0)) 36 | exclude <- exclude[-which(constant.weights != 0)] 37 | if (length(exclude) == 0) 38 | exclude <- NULL 39 | aic <- NULL 40 | bic <- NULL 41 | if (likelihood) { 42 | synapse.count <- length(unlist(weights)) - length(exclude) 43 | aic <- 2 * error + (2 * synapse.count) 44 | bic <- 2 * error + log(nrow(response)) * synapse.count 45 | } 46 | if (is.na(error)) 47 | warning("'err.fct' does not fit 'data' or 'act.fct'", 48 | call. = F) 49 | if (lifesign != "none") { 50 | if (reached.threshold <= threshold) { 51 | message(rep(" ", (max(nchar(stepmax), nchar("stepmax")) - 52 | nchar(step))), step, appendLF = FALSE) 53 | message("\terror: ", round(error, 5), rep(" ", 6 - (nchar(round(error, 54 | 5)) - nchar(round(error, 0)))), appendLF = FALSE) 55 | if (!is.null(aic)) { 56 | message("\taic: ", round(aic, 5), rep(" ", 6 - (nchar(round(aic, 57 | 5)) - nchar(round(aic, 0)))), appendLF = FALSE) 58 | } 59 | if (!is.null(bic)) { 60 | message("\tbic: ", round(bic, 5), rep(" ", 6 - (nchar(round(bic, 61 | 5)) - nchar(round(bic, 0)))), appendLF = FALSE) 62 | } 63 | time <- difftime(Sys.time(), time.start.local) 64 | message("\ttime: ", round(time, 2), " ", attr(time, "units")) 65 | } 66 | } 67 | if (reached.threshold > threshold) 68 | return(result = list(output.vector = NULL, weights = NULL)) 69 | output.vector <- c(error = error, reached.threshold = reached.threshold, 70 | steps = step) 71 | if (!is.null(aic)) { 72 | output.vector <- c(output.vector, aic = aic) 73 | } 74 | if (!is.null(bic)) { 75 | output.vector <- c(output.vector, bic = bic) 76 | } 77 | for (w in 1:length(weights)) output.vector <- c(output.vector, 78 | as.vector(weights[[w]])) 79 | generalized.weights <- calculate.generalized.weights(weights, 80 | neuron.deriv = result$neuron.deriv, net.result = net.result) 81 | startweights <- unlist(startweights) 82 | weights <- unlist(weights) 83 | if (!is.null(exclude)) { 84 | startweights[exclude] <- NA 85 | weights[exclude] <- NA 86 | } 87 | startweights <- relist(startweights, nrow.weights, ncol.weights) 88 | weights <- relist(weights, nrow.weights, ncol.weights) 89 | return(list(generalized.weights = generalized.weights, weights = weights, 90 | startweights = startweights, net.result = result$net.result, 91 | output.vector = output.vector)) 92 | } 93 | generate.startweights <- 94 | function (model.list, hidden, startweights, rep, exclude, constant.weights) 95 | { 96 | input.count <- length(model.list$variables) 97 | output.count <- length(model.list$response) 98 | if (!(length(hidden) == 1 && hidden == 0)) { 99 | length.weights <- length(hidden) + 1 100 | nrow.weights <- array(0, dim = c(length.weights)) 101 | ncol.weights <- array(0, dim = c(length.weights)) 102 | nrow.weights[1] <- (input.count + 1) 103 | ncol.weights[1] <- hidden[1] 104 | if (length(hidden) > 1) 105 | for (i in 2:length(hidden)) { 106 | nrow.weights[i] <- hidden[i - 1] + 1 107 | ncol.weights[i] <- hidden[i] 108 | } 109 | nrow.weights[length.weights] <- hidden[length.weights - 110 | 1] + 1 111 | ncol.weights[length.weights] <- output.count 112 | } 113 | else { 114 | length.weights <- 1 115 | nrow.weights <- array((input.count + 1), dim = c(1)) 116 | ncol.weights <- array(output.count, dim = c(1)) 117 | } 118 | length <- sum(ncol.weights * nrow.weights) 119 | vector <- rep(0, length) 120 | if (!is.null(exclude)) { 121 | if (is.matrix(exclude)) { 122 | exclude <- matrix(as.integer(exclude), ncol = ncol(exclude), 123 | nrow = nrow(exclude)) 124 | if (nrow(exclude) >= length || ncol(exclude) != 3) 125 | stop("'exclude' has wrong dimensions", call. = FALSE) 126 | if (any(exclude < 1)) 127 | stop("'exclude' contains at least one invalid weight", 128 | call. = FALSE) 129 | temp <- relist(vector, nrow.weights, ncol.weights) 130 | for (i in 1:nrow(exclude)) { 131 | if (exclude[i, 1] > length.weights || exclude[i, 132 | 2] > nrow.weights[exclude[i, 1]] || exclude[i, 133 | 3] > ncol.weights[exclude[i, 1]]) 134 | stop("'exclude' contains at least one invalid weight", 135 | call. = FALSE) 136 | temp[[exclude[i, 1]]][exclude[i, 2], exclude[i, 137 | 3]] <- 1 138 | } 139 | exclude <- which(unlist(temp) == 1) 140 | } 141 | else if (is.vector(exclude)) { 142 | exclude <- as.integer(exclude) 143 | if (max(exclude) > length || min(exclude) < 1) { 144 | stop("'exclude' contains at least one invalid weight", 145 | call. = FALSE) 146 | } 147 | } 148 | else { 149 | stop("'exclude' must be a vector or matrix", call. = FALSE) 150 | } 151 | if (length(exclude) >= length) 152 | stop("all weights are exluded", call. = FALSE) 153 | } 154 | length <- length - length(exclude) 155 | if (!is.null(exclude)) { 156 | if (is.null(startweights) || length(startweights) < (length * 157 | rep)) 158 | vector[-exclude] <- stats::rnorm(length) 159 | else vector[-exclude] <- startweights[((rep - 1) * length + 160 | 1):(length * rep)] 161 | } 162 | else { 163 | if (is.null(startweights) || length(startweights) < (length * 164 | rep)) 165 | vector <- stats::rnorm(length) 166 | else vector <- startweights[((rep - 1) * length + 1):(length * 167 | rep)] 168 | } 169 | if (!is.null(exclude) && !is.null(constant.weights)) { 170 | if (length(exclude) < length(constant.weights)) 171 | stop("constant.weights contains more weights than exclude", 172 | call. = FALSE) 173 | else vector[exclude[1:length(constant.weights)]] <- constant.weights 174 | } 175 | weights <- relist(vector, nrow.weights, ncol.weights) 176 | return(list(weights = weights, exclude = exclude)) 177 | } 178 | rprop <- 179 | function (weights, response, covariate, threshold, learningrate.limit, 180 | learningrate.factor, stepmax, lifesign, lifesign.step, act.fct, 181 | act.deriv.fct, output.act.fct, output.act.deriv.fct, err.fct, err.deriv.fct, algorithm, linear.output, 182 | exclude, learningrate.bp) 183 | { 184 | step <- 1 185 | nchar.stepmax <- max(nchar(stepmax), 7) 186 | length.weights <- length(weights) 187 | nrow.weights <- sapply(weights, nrow) 188 | ncol.weights <- sapply(weights, ncol) 189 | length.unlist <- length(unlist(weights)) - length(exclude) 190 | learningrate <- as.vector(matrix(0.1, nrow = 1, ncol = length.unlist)) 191 | gradients.old <- as.vector(matrix(0, nrow = 1, ncol = length.unlist)) 192 | if (is.null(exclude)) 193 | exclude <- length(unlist(weights)) + 1 194 | if (attr(act.fct, "type") == "tanh" || attr(act.fct, "type") == "logistic" || attr(act.fct, "type") == "relu") 195 | special <- TRUE 196 | else special <- FALSE 197 | if (attr(output.act.fct, "type") == "tanh" || attr(output.act.fct, "type") == "logistic" || attr(output.act.fct, "type") == "relu") 198 | output.special <- TRUE 199 | else output.special <- FALSE 200 | if (linear.output) { 201 | output.act.fct <- function(x) { 202 | x 203 | } 204 | output.act.deriv.fct <- function(x) { 205 | matrix(1, nrow(x), ncol(x)) 206 | } 207 | } 208 | else { 209 | if (attr(err.fct, "type") == "ce" && attr(act.fct, "type") == "logistic") { 210 | err.deriv.fct <- function(x, y) { 211 | x * (1 - y) - y * (1 - x) 212 | } 213 | linear.output <- TRUE 214 | } 215 | #output.act.fct <- act.fct 216 | #output.act.deriv.fct <- act.deriv.fct 217 | } 218 | result <- compute.net(weights, length.weights, covariate = covariate, 219 | act.fct = act.fct, act.deriv.fct = act.deriv.fct, output.act.fct = output.act.fct, 220 | output.act.deriv.fct = output.act.deriv.fct, special, output.special) 221 | err.deriv <- err.deriv.fct(result$net.result, response) 222 | gradients <- calculate.gradients(weights = weights, length.weights = length.weights, 223 | neurons = result$neurons, neuron.deriv = result$neuron.deriv, 224 | err.deriv = err.deriv, exclude = exclude, linear.output = linear.output) 225 | reached.threshold <- max(abs(gradients)) 226 | min.reached.threshold <- reached.threshold 227 | while (step < stepmax && reached.threshold > threshold) { 228 | if (!is.character(lifesign) && step%%lifesign.step == 229 | 0) { 230 | text <- paste("%", nchar.stepmax, "s", sep = "") 231 | message(sprintf(eval(expression(text)), step), "\tmin thresh: ", 232 | min.reached.threshold, "\n", rep(" ", lifesign), appendLF = FALSE) 233 | utils::flush.console() 234 | } 235 | if (algorithm == "rprop+") 236 | result <- plus(gradients, gradients.old, weights, 237 | nrow.weights, ncol.weights, learningrate, learningrate.factor, 238 | learningrate.limit, exclude) 239 | else if (algorithm == "backprop") 240 | result <- backprop(gradients, weights, length.weights, 241 | nrow.weights, ncol.weights, learningrate.bp, 242 | exclude) 243 | else result <- minus(gradients, gradients.old, weights, 244 | length.weights, nrow.weights, ncol.weights, learningrate, 245 | learningrate.factor, learningrate.limit, algorithm, 246 | exclude) 247 | gradients.old <- result$gradients.old 248 | weights <- result$weights 249 | learningrate <- result$learningrate 250 | result <- compute.net(weights, length.weights, covariate = covariate, 251 | act.fct = act.fct, act.deriv.fct = act.deriv.fct, 252 | output.act.fct = output.act.fct, output.act.deriv.fct = output.act.deriv.fct, 253 | special, output.special) 254 | err.deriv <- err.deriv.fct(result$net.result, response) 255 | gradients <- calculate.gradients(weights = weights, length.weights = length.weights, 256 | neurons = result$neurons, neuron.deriv = result$neuron.deriv, 257 | err.deriv = err.deriv, exclude = exclude, linear.output = linear.output) 258 | reached.threshold <- max(abs(gradients)) 259 | if (reached.threshold < min.reached.threshold) { 260 | min.reached.threshold <- reached.threshold 261 | } 262 | step <- step + 1 263 | } 264 | if (lifesign != "none" && reached.threshold > threshold) { 265 | message("stepmax\tmin thresh: ", min.reached.threshold) 266 | } 267 | return(list(weights = weights, step = as.integer(step), reached.threshold = reached.threshold, 268 | net.result = result$net.result, neuron.deriv = result$neuron.deriv)) 269 | } 270 | compute.net <- 271 | function (weights, length.weights, covariate, act.fct, act.deriv.fct, 272 | output.act.fct, output.act.deriv.fct, special, output.special) 273 | { 274 | neuron.deriv <- NULL 275 | neurons <- list(covariate) 276 | if (length.weights > 1) 277 | for (i in 1:(length.weights - 1)) { 278 | temp <- neurons[[i]] %*% weights[[i]] 279 | act.temp <- act.fct(temp) 280 | if (special) 281 | neuron.deriv[[i]] <- act.deriv.fct(act.temp) 282 | else neuron.deriv[[i]] <- act.deriv.fct(temp) 283 | neurons[[i + 1]] <- cbind(1, act.temp) 284 | } 285 | if (!is.list(neuron.deriv)) 286 | neuron.deriv <- list(neuron.deriv) 287 | temp <- neurons[[length.weights]] %*% weights[[length.weights]] 288 | net.result <- output.act.fct(temp) 289 | if (output.special) 290 | neuron.deriv[[length.weights]] <- output.act.deriv.fct(net.result) 291 | else neuron.deriv[[length.weights]] <- output.act.deriv.fct(temp) 292 | if (any(is.na(neuron.deriv))) 293 | stop("neuron derivatives contain a NA; varify that the derivative function does not divide by 0", 294 | call. = FALSE) 295 | list(neurons = neurons, neuron.deriv = neuron.deriv, net.result = net.result) 296 | } 297 | calculate.gradients <- 298 | function (weights, length.weights, neurons, neuron.deriv, err.deriv, 299 | exclude, linear.output) 300 | { 301 | if (any(is.na(err.deriv))) 302 | stop("the error derivative contains a NA; varify that the derivative function does not divide by 0 (e.g. cross entropy)", 303 | call. = FALSE) 304 | if (!linear.output) 305 | delta <- neuron.deriv[[length.weights]] * err.deriv 306 | else delta <- err.deriv 307 | gradients <- crossprod(neurons[[length.weights]], delta) 308 | if (length.weights > 1) 309 | for (w in (length.weights - 1):1) { 310 | delta <- neuron.deriv[[w]] * tcrossprod(delta, weights[[w + 1]][-1,, drop = FALSE]) 311 | gradients <- c(crossprod(neurons[[w]], delta), gradients) 312 | } 313 | gradients[-exclude] 314 | } 315 | plus <- 316 | function (gradients, gradients.old, weights, nrow.weights, ncol.weights, 317 | learningrate, learningrate.factor, learningrate.limit, exclude) 318 | { 319 | weights <- unlist(weights) 320 | sign.gradient <- sign(gradients) 321 | temp <- gradients.old * sign.gradient 322 | positive <- temp > 0 323 | negative <- temp < 0 324 | not.negative <- !negative 325 | if (any(positive)) { 326 | learningrate[positive] <- pmin.int(learningrate[positive] * 327 | learningrate.factor$plus, learningrate.limit$max) 328 | } 329 | if (any(negative)) { 330 | weights[-exclude][negative] <- weights[-exclude][negative] + 331 | gradients.old[negative] * learningrate[negative] 332 | learningrate[negative] <- pmax.int(learningrate[negative] * 333 | learningrate.factor$minus, learningrate.limit$min) 334 | gradients.old[negative] <- 0 335 | if (any(not.negative)) { 336 | weights[-exclude][not.negative] <- weights[-exclude][not.negative] - 337 | sign.gradient[not.negative] * learningrate[not.negative] 338 | gradients.old[not.negative] <- sign.gradient[not.negative] 339 | } 340 | } 341 | else { 342 | weights[-exclude] <- weights[-exclude] - sign.gradient * 343 | learningrate 344 | gradients.old <- sign.gradient 345 | } 346 | list(gradients.old = gradients.old, weights = relist(weights, 347 | nrow.weights, ncol.weights), learningrate = learningrate) 348 | } 349 | backprop <- 350 | function (gradients, weights, length.weights, nrow.weights, ncol.weights, 351 | learningrate.bp, exclude) 352 | { 353 | weights <- unlist(weights) 354 | if (!is.null(exclude)) 355 | weights[-exclude] <- weights[-exclude] - gradients * 356 | learningrate.bp 357 | else weights <- weights - gradients * learningrate.bp 358 | list(gradients.old = gradients, weights = relist(weights, 359 | nrow.weights, ncol.weights), learningrate = learningrate.bp) 360 | } 361 | minus <- 362 | function (gradients, gradients.old, weights, length.weights, 363 | nrow.weights, ncol.weights, learningrate, learningrate.factor, 364 | learningrate.limit, algorithm, exclude) 365 | { 366 | weights <- unlist(weights) 367 | temp <- gradients.old * gradients 368 | positive <- temp > 0 369 | negative <- temp < 0 370 | if (any(positive)) 371 | learningrate[positive] <- pmin.int(learningrate[positive] * 372 | learningrate.factor$plus, learningrate.limit$max) 373 | if (any(negative)) 374 | learningrate[negative] <- pmax.int(learningrate[negative] * 375 | learningrate.factor$minus, learningrate.limit$min) 376 | if (algorithm != "rprop-") { 377 | delta <- 10^-6 378 | notzero <- gradients != 0 379 | gradients.notzero <- gradients[notzero] 380 | if (algorithm == "slr") { 381 | min <- which.min(learningrate[notzero]) 382 | } 383 | else if (algorithm == "sag") { 384 | min <- which.min(abs(gradients.notzero)) 385 | } 386 | if (length(min) != 0) { 387 | temp <- learningrate[notzero] * gradients.notzero 388 | sum <- sum(temp[-min]) + delta 389 | learningrate[notzero][min] <- min(max(-sum/gradients.notzero[min], 390 | learningrate.limit$min), learningrate.limit$max) 391 | } 392 | } 393 | weights[-exclude] <- weights[-exclude] - sign(gradients) * 394 | learningrate 395 | list(gradients.old = gradients, weights = relist(weights, 396 | nrow.weights, ncol.weights), learningrate = learningrate) 397 | } 398 | calculate.generalized.weights <- 399 | function (weights, neuron.deriv, net.result) 400 | { 401 | for (w in 1:length(weights)) { 402 | weights[[w]] <- weights[[w]][-1,, drop = FALSE] 403 | } 404 | generalized.weights <- NULL 405 | for (k in 1:ncol(net.result)) { 406 | for (w in length(weights):1) { 407 | if (w == length(weights)) { 408 | temp <- neuron.deriv[[length(weights)]][, k] * 409 | 1/(net.result[, k] * (1 - (net.result[, k]))) 410 | delta <- tcrossprod(temp, weights[[w]][, k]) 411 | } 412 | else { 413 | delta <- tcrossprod(delta * neuron.deriv[[w]], 414 | weights[[w]]) 415 | } 416 | } 417 | generalized.weights <- cbind(generalized.weights, delta) 418 | } 419 | return(generalized.weights) 420 | } 421 | 422 | relist <- 423 | function (x, nrow, ncol) 424 | { 425 | list.x <- NULL 426 | for (w in 1:length(nrow)) { 427 | length <- nrow[w] * ncol[w] 428 | list.x[[w]] <- matrix(x[1:length], nrow = nrow[w], ncol = ncol[w]) 429 | x <- x[-(1:length)] 430 | } 431 | list.x 432 | } --------------------------------------------------------------------------------