├── .gitattributes
├── .gitignore
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_multiclass.R
    │   └── test_neuralnet.R
├── .Rbuildignore
├── NAMESPACE
├── NEWS
├── man
    ├── compute.Rd
    ├── predict.nn.Rd
    ├── prediction.Rd
    ├── confidence.interval.Rd
    ├── gwplot.Rd
    ├── plot.nn.Rd
    ├── neuralnet-package.Rd
    └── neuralnet.Rd
├── README.md
├── .travis.yml
├── appveyor.yml
├── R
    ├── compute.r
    ├── convert_functions.R
    ├── print.R
    ├── predict.R
    ├── neuralnet-package.R
    ├── prediction.r
    ├── gwplot.r
    ├── plot.nn.r
    ├── confidence.interval.r
    ├── neuralnet.r
    └── fit_neuralnet.R
└── DESCRIPTION


/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 | data/* binary
3 | src/* text=lf
4 | R/* text=lf


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | *.Rproj
6 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(neuralnet)
3 | 
4 | test_check("neuralnet")


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^appveyor\.yml$
4 | ^\.travis\.yml$
5 | ^README\.md$


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(plot,nn)
 4 | S3method(predict,nn)
 5 | export(compute)
 6 | export(confidence.interval)
 7 | export(gwplot)
 8 | export(neuralnet)
 9 | export(prediction)
10 | import(stats)
11 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | 
 2 | ##### Version 1.44.6
 3 | * Bug fixes
 4 | 
 5 | ##### Version 1.44.5
 6 | * Allow prediction with NA weights (set to 0 as in old version)
 7 | * Fix prediction for networks grown with older versions
 8 | 
 9 | ##### Version 1.44.4
10 | * Add ReLu activation function
11 | 
12 | ##### Version 1.44.3
13 | * Bug fixes
14 | 
15 | ##### Version 1.44.2
16 | * Make compute() return the same as in previous versions
17 | * Soft-deprecate compute() for now
18 | 
19 | ##### Version 1.44.1
20 | * Add support for dots in formula
21 | * Allow direct multiclass outcomes with factors
22 | 
23 | ##### Version 1.44.0
24 | * New maintainer: Marvin N.Wright
25 | * Add a predict() function to replace compute()
26 | * Use Deriv package for symbolic derivation
27 | * Many internal changes to improve code quality
28 | * Bug fixes


--------------------------------------------------------------------------------
/man/compute.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compute.r
 3 | \name{compute}
 4 | \alias{compute}
 5 | \title{Deprecated function}
 6 | \usage{
 7 | compute(x, covariate, rep = 1)
 8 | }
 9 | \arguments{
10 | \item{x}{an object of class \code{nn}.}
11 | 
12 | \item{covariate}{a dataframe or matrix containing the variables that had
13 | been used to train the neural network.}
14 | 
15 | \item{rep}{an integer indicating the neural network's repetition which
16 | should be used.}
17 | }
18 | \value{
19 | \code{compute} returns a list containing the following components:
20 | \item{neurons}{a list of the neurons' output for each layer of the neural
21 | network.} \item{net.result}{a matrix containing the overall result of the
22 | neural network.}
23 | }
24 | \description{
25 | The function \code{compute} is deprecated. Please refer to the new function \code{\link{predict.nn}}.
26 | }
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Travis Build Status](https://travis-ci.org/bips-hb/neuralnet.svg?branch=master)](https://travis-ci.org/bips-hb/neuralnet)
 2 | [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/bips-hb/neuralnet?branch=master&svg=true)](https://ci.appveyor.com/project/mnwright/neuralnet)
 3 | ![CRAN Downloads month](http://cranlogs.r-pkg.org/badges/neuralnet?color=brightgreen)
 4 | ![CRAN Downloads overall](http://cranlogs.r-pkg.org/badges/grand-total/neuralnet?color=brightgreen)
 5 | # neuralnet
 6 | Training of Neural Networks
 7 | 
 8 | The **neuralnet** package is now on GitHub! New maintainer will be [@mnwright](https://github.com/mnwright).
 9 | 
10 | Please note that **neuralnet** will not turn into a super fast deep learning framework. We will continue to focus on ease of use and flexibility.
11 | 
12 | Please post any issue you have with the package here on GitHub. Code contributions are also very welcome!  
13 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | cache: packages
 3 | warnings_are_errors: true
 4 | r:
 5 |   - oldrel
 6 |   - release
 7 |   - devel
 8 | 
 9 | r_github_packages:
10 |   - r-lib/covr
11 | 
12 | after_success:
13 |   - Rscript -e 'covr::coveralls()'
14 | 
15 | notifications:
16 |   email:
17 |     recipients:
18 |       secure: qh6JWWLG+P2D9MstM/7zk9C1eZEehdgMA3qIRMfa+gW02YPMXJK7+I1CTPPvVzVQP7iQJKavC+LWoYKEPWdCyu0PzbzDI50PBArPbsOHNF0ZKGA+z0bFsRlCo/BzStdSO/bg3m60+zo6BCPSpPWqGWsnED4Wb2YGf20M7TLUeKZ2htmcIgQx5VmfOPTZG0lh8u3/c4SFM9jF7jxjuV3QI8C1gnvFfidic24OJH91NvIajIttWsFU7bSRLM0eKbutWFnuqPRDjVIw62zQsigXJjy3wo6Yo2CY41X95wAu88AOgO14i4GyRM6Hv7juRLJJfuts8OFUMtG8btzemx2fBx81YUeG1QRD1Puxax61elDDJtNALEghzAscAnguUoTUCdUpw9ras1nIf8R+HDErSZd9ZEJ0QpGBGJBbl5pe/4V2XWo9EPhvQe8pAO+3iXIsFnEcItxBBwe0tJqCnW+pdTd3N99szdCKP6HyuV+8SJqy7tgilBFJZpC3SGQ1ynP4FRSF59ekLUtAGlm1RYGnUTMtVAWHZVQOMmLKbZfPW3JqUwVuaMCzG7XOFEgCtpT1VmIcggvyeGMMCdXPlW4Ns42tPuSEEcxo7kmVo7v5SDIWdGu6Znqbrs9UG17GqL7oLehQsGnrbGMIuLu3CNW+5/6m7YrO45lsi4nqHaqEW8Y=
19 |     on_success: change
20 |     on_failure: change
21 |     


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT CHANGE the "init" and "install" sections below
 2 | 
 3 | # Download script file from GitHub
 4 | init:
 5 |   ps: |
 6 |         $ErrorActionPreference = "Stop"
 7 |         Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
 8 |         Import-Module '..\appveyor-tool.ps1'
 9 | 
10 | install:
11 |   ps: Bootstrap
12 | 
13 | cache:
14 |   - C:\RLibrary
15 | 
16 | # Adapt as necessary starting from here
17 | 
18 | build_script:
19 |   - travis-tool.sh install_deps
20 | 
21 | test_script:
22 |   - travis-tool.sh run_tests
23 | 
24 | on_failure:
25 |   - 7z a failure.zip *.Rcheck\*
26 |   - appveyor PushArtifact failure.zip
27 | 
28 | artifacts:
29 |   - path: '*.Rcheck\**\*.log'
30 |     name: Logs
31 | 
32 |   - path: '*.Rcheck\**\*.out'
33 |     name: Logs
34 | 
35 |   - path: '*.Rcheck\**\*.fail'
36 |     name: Logs
37 | 
38 |   - path: '*.Rcheck\**\*.Rout'
39 |     name: Logs
40 | 
41 |   - path: '\*_*.tar.gz'
42 |     name: Bits
43 | 
44 |   - path: '\*_*.zip'
45 |     name: Bits
46 | 


--------------------------------------------------------------------------------
/R/compute.r:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Deprecated function
 3 | #' 
 4 | #' The function \code{compute} is deprecated. Please refer to the new function \code{\link{predict.nn}}.
 5 | #' 
 6 | #' @param x an object of class \code{nn}.
 7 | #' @param covariate a dataframe or matrix containing the variables that had
 8 | #' been used to train the neural network.
 9 | #' @param rep an integer indicating the neural network's repetition which
10 | #' should be used.
11 | #' @return \code{compute} returns a list containing the following components:
12 | #' \item{neurons}{a list of the neurons' output for each layer of the neural
13 | #' network.} \item{net.result}{a matrix containing the overall result of the
14 | #' neural network.}
15 | #' 
16 | #' @export
17 | compute <- function(x, covariate, rep = 1) {
18 |   #.Deprecated("predict", package = "neuralnet")
19 |   pred <- predict.nn(x, newdata = covariate, rep = rep, all.units = TRUE)
20 |   
21 |   # Create old format by adding intercept
22 |   for (i in 1:(length(pred) - 1)) {
23 |     pred[[i]] <- cbind(1, pred[[i]])
24 |   }
25 |   
26 |   list(neurons = pred[-length(pred)], net.result = pred[[length(pred)]])
27 | }
28 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: neuralnet
 2 | Type: Package
 3 | Title: Training of Neural Networks
 4 | Version: 1.44.6
 5 | Date: 2019-04-17
 6 | Authors@R: c(
 7 |     person(given="Stefan", family="Fritsch", role="aut"), 
 8 |     person(given="Frauke", family="Guenther", role=c("aut")),
 9 |     person(given="Marvin N.", family="Wright", role=c("aut","cre"),email="wright@leibniz-bips.de"),
10 |     person(given="Marc", family="Suling", role="ctb"),	
11 |     person(given="Sebastian M.", family="Mueller", role = "ctb"))
12 | Depends: R (>= 2.9.0)
13 | Imports: grid, MASS, grDevices, stats, utils, Deriv
14 | Description: Training of neural networks using backpropagation,
15 |     resilient backpropagation with (Riedmiller, 1994) or without
16 |     weight backtracking (Riedmiller and Braun, 1993) or the
17 |     modified globally convergent version by Anastasiadis et al.
18 |     (2005). The package allows flexible settings through
19 |     custom-choice of error and activation function. Furthermore,
20 |     the calculation of generalized weights (Intrator O & Intrator
21 |     N, 1993) is implemented.
22 | License: GPL (>= 2)
23 | Suggests: testthat
24 | URL: https://github.com/bips-hb/neuralnet
25 | BugReports: https://github.com/bips-hb/neuralnet/issues
26 | RoxygenNote: 6.1.0
27 | 


--------------------------------------------------------------------------------
/R/convert_functions.R:
--------------------------------------------------------------------------------
 1 | # Convert named activation functions in R functions, including derivatives 
 2 | convert.activation.function <- function(fun) {
 3 |   if (fun == "tanh") {
 4 |     fct <- function(x) {
 5 |       tanh(x)
 6 |     }
 7 |     attr(fct, "type") <- "tanh"
 8 |     deriv.fct <- function(x) {
 9 |       1 - x^2
10 |     }
11 |   } else if (fun == "logistic") {
12 |     fct <- function(x) {
13 |       1/(1 + exp(-x))
14 |     }
15 |     attr(fct, "type") <- "logistic"
16 |     deriv.fct <- function(x) {
17 |       x * (1 - x)
18 |     }
19 |   } else if (fun == "relu" || fun == "ReLu") {
20 |     fct <- function(x) {
21 |       x * (x > 0)
22 |     }
23 |     attr(fct, "type") <- "relu"
24 |     deriv.fct <- function(x) {
25 |       1 * (x > 0)
26 |     }
27 |   } else {
28 |     stop("Unknown function.", call. = FALSE)
29 |   }
30 |   list(fct = fct, deriv.fct = deriv.fct)
31 | }
32 | 
33 | # Convert named error functions in R functions, including derivatives 
34 | convert.error.function <- function(fun) {
35 |   if (fun == "sse") {
36 |     fct <- function(x, y) {
37 |       1/2 * (y - x)^2
38 |     }
39 |     attr(fct, "type") <- "sse"
40 |     deriv.fct <- function(x, y) {
41 |       x - y
42 |     }
43 |   } else if (fun == "ce") {
44 |     fct <- function(x, y) {
45 |       -(y * log(x) + (1 - y) * log(1 - x))
46 |     }
47 |     attr(fct, "type") <- "ce"
48 |     deriv.fct <- function(x, y) {
49 |       (1 - y)/(1 - x) - y/x
50 |     }
51 |   } else {
52 |     stop("Unknown function.", call. = FALSE)
53 |   }
54 |   list(fct = fct, deriv.fct = deriv.fct)
55 | }


--------------------------------------------------------------------------------
/man/predict.nn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.R
 3 | \name{predict.nn}
 4 | \alias{predict.nn}
 5 | \title{Neural network prediction}
 6 | \usage{
 7 | \method{predict}{nn}(object, newdata, rep = 1, all.units = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{Neural network of class \code{nn}.}
11 | 
12 | \item{newdata}{New data of class \code{data.frame} or \code{matrix}.}
13 | 
14 | \item{rep}{Integer indicating the neural network's repetition which should be used.}
15 | 
16 | \item{all.units}{Return output for all units instead of final output only.}
17 | 
18 | \item{...}{further arguments passed to or from other methods.}
19 | }
20 | \value{
21 | Matrix of predictions. Each column represents one output unit. 
22 | If \code{all.units=TRUE}, a list of matrices with output for each unit.
23 | }
24 | \description{
25 | Prediction of artificial neural network of class \code{nn}, produced by \code{neuralnet()}.
26 | }
27 | \examples{
28 | library(neuralnet)
29 | 
30 | # Split data
31 | train_idx <- sample(nrow(iris), 2/3 * nrow(iris))
32 | iris_train <- iris[train_idx, ]
33 | iris_test <- iris[-train_idx, ]
34 | 
35 | # Binary classification
36 | nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, iris_train, linear.output = FALSE)
37 | pred <- predict(nn, iris_test)
38 | table(iris_test$Species == "setosa", pred[, 1] > 0.5)
39 | 
40 | # Multiclass classification
41 | nn <- neuralnet((Species == "setosa") + (Species == "versicolor") + (Species == "virginica")
42 |                  ~ Petal.Length + Petal.Width, iris_train, linear.output = FALSE)
43 | pred <- predict(nn, iris_test)
44 | table(iris_test$Species, apply(pred, 1, which.max))
45 | 
46 | }
47 | \author{
48 | Marvin N. Wright
49 | }
50 | 


--------------------------------------------------------------------------------
/man/prediction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prediction.r
 3 | \name{prediction}
 4 | \alias{prediction}
 5 | \title{Summarizes the output of the neural network, the data and the fitted values
 6 | of glm objects (if available)}
 7 | \usage{
 8 | prediction(x, list.glm = NULL)
 9 | }
10 | \arguments{
11 | \item{x}{neural network}
12 | 
13 | \item{list.glm}{an optional list of glm objects}
14 | }
15 | \value{
16 | a list of the summaries of the repetitions of the neural networks,
17 | the data and the glm objects (if available).
18 | }
19 | \description{
20 | \code{prediction}, a method for objects of class \code{nn}, typically
21 | produced by \code{neuralnet}.  In a first step, the dataframe will be
22 | amended by a mean response, the mean of all responses corresponding to the
23 | same covariate-vector.  The calculated data.error is the error function
24 | between the original response and the new mean response.  In a second step,
25 | all duplicate rows will be erased to get a quick overview of the data.  To
26 | obtain an overview of the results of the neural network and the glm objects,
27 | the covariate matrix will be bound to the output of the neural network and
28 | the fitted values of the glm object(if available) and will be reduced by all
29 | duplicate rows.
30 | }
31 | \examples{
32 | 
33 | Var1 <- rpois(100,0.5)
34 | Var2 <- rbinom(100,2,0.6)
35 | Var3 <- rbinom(100,1,0.5)
36 | SUM <- as.integer(abs(Var1+Var2+Var3+(rnorm(100))))
37 | sum.data <- data.frame(Var1,Var2,Var3, SUM)
38 | print(net.sum <- neuralnet( SUM~Var1+Var2+Var3,  sum.data, hidden=1, 
39 |                  act.fct="tanh"))
40 | main <- glm(SUM~Var1+Var2+Var3, sum.data, family=poisson())
41 | full <- glm(SUM~Var1*Var2*Var3, sum.data, family=poisson())
42 | prediction(net.sum, list.glm=list(main=main, full=full))
43 | 
44 | }
45 | \seealso{
46 | \code{\link{neuralnet}}
47 | }
48 | \author{
49 | Stefan Fritsch, Frauke Guenther \email{guenther@leibniz-bips.de}
50 | }
51 | \keyword{neural}
52 | 


--------------------------------------------------------------------------------
/man/confidence.interval.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/confidence.interval.r
 3 | \name{confidence.interval}
 4 | \alias{confidence.interval}
 5 | \title{Calculates confidence intervals of the weights}
 6 | \usage{
 7 | confidence.interval(x, alpha = 0.05)
 8 | }
 9 | \arguments{
10 | \item{x}{neural network}
11 | 
12 | \item{alpha}{numerical. Sets the confidence level to (1-alpha).}
13 | }
14 | \value{
15 | \code{confidence.interval} returns a list containing the following
16 | components:
17 | 
18 | \item{ lower.ci }{a list containing the lower confidence bounds of all
19 | weights of the neural network differentiated by the repetitions.} \item{
20 | upper.ci }{a list containing the upper confidence bounds of all weights of
21 | the neural network differentiated by the repetitions.} \item{ nic }{a vector
22 | containg the information criteria NIC for every repetition.}
23 | }
24 | \description{
25 | \code{confidence.interval}, a method for objects of class \code{nn},
26 | typically produced by \code{neuralnet}.  Calculates confidence intervals of
27 | the weights (White, 1989) and the network information criteria NIC (Murata
28 | et al. 1994). All confidence intervals are calculated under the assumption
29 | of a local identification of the given neural network.  If this assumption
30 | is violated, the results will not be reasonable. Please make also sure that
31 | the chosen error function equals the negative log-likelihood function,
32 | otherwise the results are not meaningfull, too.
33 | }
34 | \examples{
35 | 
36 |     data(infert, package="datasets")
37 |     print(net.infert <- neuralnet(case~parity+induced+spontaneous,  
38 |                         infert, err.fct="ce", linear.output=FALSE))
39 |     confidence.interval(net.infert)
40 | 
41 | }
42 | \references{
43 | White (1989) \emph{Learning in artificial neural networks. A
44 | statistical perspective.} Neural Computation (1), pages 425-464
45 | 
46 | Murata et al. (1994) \emph{Network information criterion - determining the
47 | number of hidden units for an artificial neural network model.} IEEE
48 | Transactions on Neural Networks 5 (6), pages 865-871
49 | }
50 | \seealso{
51 | \code{\link{neuralnet}}
52 | }
53 | \author{
54 | Stefan Fritsch, Frauke Guenther \email{guenther@leibniz-bips.de}
55 | }
56 | \keyword{neural}
57 | 


--------------------------------------------------------------------------------
/R/print.R:
--------------------------------------------------------------------------------
 1 | print.nn <-
 2 |   function (x, ...) 
 3 |   {
 4 |     matrix <- x$result.matrix
 5 |     cat("Call: ", deparse(x$call), "\n\n", sep = "")
 6 |     if (!is.null(matrix)) {
 7 |       if (ncol(matrix) > 1) {
 8 |         cat(ncol(matrix), " repetitions were calculated.\n\n", 
 9 |             sep = "")
10 |         sorted.matrix <- matrix[, order(matrix["error", ])]
11 |         if (any(rownames(sorted.matrix) == "aic")) {
12 |           print(t(rbind(Error = sorted.matrix["error", 
13 |                                               ], AIC = sorted.matrix["aic", ], BIC = sorted.matrix["bic", 
14 |                                                                                                    ], `Reached Threshold` = sorted.matrix["reached.threshold", 
15 |                                                                                                                                           ], Steps = sorted.matrix["steps", ])))
16 |         }
17 |         else {
18 |           print(t(rbind(Error = sorted.matrix["error", 
19 |                                               ], `Reached Threshold` = sorted.matrix["reached.threshold", 
20 |                                                                                      ], Steps = sorted.matrix["steps", ])))
21 |         }
22 |       }
23 |       else {
24 |         cat(ncol(matrix), " repetition was calculated.\n\n", 
25 |             sep = "")
26 |         if (any(rownames(matrix) == "aic")) {
27 |           print(t(matrix(c(matrix["error", ], matrix["aic", 
28 |                                                      ], matrix["bic", ], matrix["reached.threshold", 
29 |                                                                                 ], matrix["steps", ]), dimnames = list(c("Error", 
30 |                                                                                                                          "AIC", "BIC", "Reached Threshold", "Steps"), 
31 |                                                                                                                        c(1)))))
32 |         }
33 |         else {
34 |           print(t(matrix(c(matrix["error", ], matrix["reached.threshold", 
35 |                                                      ], matrix["steps", ]), dimnames = list(c("Error", 
36 |                                                                                               "Reached Threshold", "Steps"), c(1)))))
37 |         }
38 |       }
39 |     }
40 |     cat("\n")
41 |   }


--------------------------------------------------------------------------------
/man/gwplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gwplot.r
 3 | \name{gwplot}
 4 | \alias{gwplot}
 5 | \title{Plot method for generalized weights}
 6 | \usage{
 7 | gwplot(x, rep = NULL, max = NULL, min = NULL, file = NULL,
 8 |   selected.covariate = 1, selected.response = 1, highlight = FALSE,
 9 |   type = "p", col = "black", ...)
10 | }
11 | \arguments{
12 | \item{x}{an object of class \code{nn}}
13 | 
14 | \item{rep}{an integer indicating the repetition to plot. If rep="best", the
15 | repetition with the smallest error will be plotted. If not stated all
16 | repetitions will be plotted.}
17 | 
18 | \item{max}{maximum of the y axis. In default, max is set to the highest
19 | y-value.}
20 | 
21 | \item{min}{minimum of the y axis. In default, min is set to the smallest
22 | y-value.}
23 | 
24 | \item{file}{a character string naming the plot to write to. If not stated,
25 | the plot will not be saved.}
26 | 
27 | \item{selected.covariate}{either a string of the covariate's name or an
28 | integer of the ordered covariates, indicating the reference covariate in the
29 | generalized weights plot. Defaulting to the first covariate.}
30 | 
31 | \item{selected.response}{either a string of the response variable's name or
32 | an integer of the ordered response variables, indicating the reference
33 | response in the generalized weights plot. Defaulting to the first response
34 | variable.}
35 | 
36 | \item{highlight}{a logical value, indicating whether to highlight (red
37 | color) the best repetition (smallest error). Only reasonable if rep=NULL.
38 | Default is FALSE}
39 | 
40 | \item{type}{a character indicating the type of plotting; actually any of the
41 | types as in \code{\link{plot.default}}.}
42 | 
43 | \item{col}{a color of the generalized weights.}
44 | 
45 | \item{\dots}{Arguments to be passed to methods, such as graphical parameters
46 | (see \code{\link{par}}).}
47 | }
48 | \description{
49 | \code{gwplot}, a method for objects of class \code{nn}, typically produced
50 | by \code{neuralnet}.  Plots the generalized weights (Intrator and Intrator,
51 | 1993) for one specific covariate and one response variable.
52 | }
53 | \examples{
54 | 
55 | data(infert, package="datasets")
56 | print(net.infert <- neuralnet(case~parity+induced+spontaneous, infert, 
57 | 		                err.fct="ce", linear.output=FALSE, likelihood=TRUE))
58 | gwplot(net.infert, selected.covariate="parity")
59 | gwplot(net.infert, selected.covariate="induced")
60 | gwplot(net.infert, selected.covariate="spontaneous")
61 | 
62 | }
63 | \references{
64 | Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for
65 | Interpretation of Nonlinear Models.} Proceedings of the Statistical
66 | Computing Section, 244-249 San Francisco: American Statistical Society
67 | (eds.)
68 | }
69 | \seealso{
70 | \code{\link{neuralnet}}
71 | }
72 | \author{
73 | Stefan Fritsch, Frauke Guenther \email{guenther@leibniz-bips.de}
74 | }
75 | \keyword{neural}
76 | 


--------------------------------------------------------------------------------
/man/plot.nn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.nn.r
 3 | \name{plot.nn}
 4 | \alias{plot.nn}
 5 | \title{Plot method for neural networks}
 6 | \usage{
 7 | \method{plot}{nn}(x, rep = NULL, x.entry = NULL, x.out = NULL,
 8 |   radius = 0.15, arrow.length = 0.2, intercept = TRUE,
 9 |   intercept.factor = 0.4, information = TRUE, information.pos = 0.1,
10 |   col.entry.synapse = "black", col.entry = "black",
11 |   col.hidden = "black", col.hidden.synapse = "black",
12 |   col.out = "black", col.out.synapse = "black",
13 |   col.intercept = "blue", fontsize = 12, dimension = 6,
14 |   show.weights = TRUE, file = NULL, ...)
15 | }
16 | \arguments{
17 | \item{x}{an object of class \code{nn}}
18 | 
19 | \item{rep}{repetition of the neural network. If rep="best", the repetition
20 | with the smallest error will be plotted. If not stated all repetitions will
21 | be plotted, each in a separate window.}
22 | 
23 | \item{x.entry}{x-coordinate of the entry layer. Depends on the arrow.length
24 | in default.}
25 | 
26 | \item{x.out}{x-coordinate of the output layer.}
27 | 
28 | \item{radius}{radius of the neurons.}
29 | 
30 | \item{arrow.length}{length of the entry and out arrows.}
31 | 
32 | \item{intercept}{a logical value indicating whether to plot the intercept.}
33 | 
34 | \item{intercept.factor}{x-position factor of the intercept. The closer the
35 | factor is to 0, the closer the intercept is to its left neuron.}
36 | 
37 | \item{information}{a logical value indicating whether to add the error and
38 | steps to the plot.}
39 | 
40 | \item{information.pos}{y-position of the information.}
41 | 
42 | \item{col.entry.synapse}{color of the synapses leading to the input neurons.}
43 | 
44 | \item{col.entry}{color of the input neurons.}
45 | 
46 | \item{col.hidden}{color of the neurons in the hidden layer.}
47 | 
48 | \item{col.hidden.synapse}{color of the weighted synapses.}
49 | 
50 | \item{col.out}{color of the output neurons.}
51 | 
52 | \item{col.out.synapse}{color of the synapses leading away from the output
53 | neurons.}
54 | 
55 | \item{col.intercept}{color of the intercept.}
56 | 
57 | \item{fontsize}{fontsize of the text.}
58 | 
59 | \item{dimension}{size of the plot in inches.}
60 | 
61 | \item{show.weights}{a logical value indicating whether to print the
62 | calculated weights above the synapses.}
63 | 
64 | \item{file}{a character string naming the plot to write to. If not stated,
65 | the plot will not be saved.}
66 | 
67 | \item{\dots}{arguments to be passed to methods, such as graphical parameters
68 | (see \code{\link{par}}).}
69 | }
70 | \description{
71 | \code{plot.nn}, a method for the \code{plot} generic. It is designed for an
72 | inspection of the weights for objects of class \code{nn}, typically produced
73 | by \code{neuralnet}.
74 | }
75 | \examples{
76 | 
77 | XOR <- c(0,1,1,0)
78 | xor.data <- data.frame(expand.grid(c(0,1), c(0,1)), XOR)
79 | print(net.xor <- neuralnet( XOR~Var1+Var2, xor.data, hidden=2, rep=5))
80 | plot(net.xor, rep="best")
81 | 
82 | }
83 | \seealso{
84 | \code{\link{neuralnet}}
85 | }
86 | \author{
87 | Stefan Fritsch, Frauke Guenther \email{guenther@leibniz-bips.de}
88 | }
89 | \keyword{neural}
90 | 


--------------------------------------------------------------------------------
/R/predict.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Neural network prediction
 3 | #' 
 4 | #' Prediction of artificial neural network of class \code{nn}, produced by \code{neuralnet()}. 
 5 | #' 
 6 | #' @param object Neural network of class \code{nn}.
 7 | #' @param newdata New data of class \code{data.frame} or \code{matrix}. 
 8 | #' @param rep Integer indicating the neural network's repetition which should be used.
 9 | #' @param all.units Return output for all units instead of final output only.
10 | #' @param ... further arguments passed to or from other methods.
11 | #'
12 | #' @return Matrix of predictions. Each column represents one output unit. 
13 | #' If \code{all.units=TRUE}, a list of matrices with output for each unit.
14 | #'
15 | #' @examples
16 | #' library(neuralnet)
17 | #' 
18 | #' # Split data
19 | #' train_idx <- sample(nrow(iris), 2/3 * nrow(iris))
20 | #' iris_train <- iris[train_idx, ]
21 | #' iris_test <- iris[-train_idx, ]
22 | #' 
23 | #' # Binary classification
24 | #' nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, iris_train, linear.output = FALSE)
25 | #' pred <- predict(nn, iris_test)
26 | #' table(iris_test$Species == "setosa", pred[, 1] > 0.5)
27 | #' 
28 | #' # Multiclass classification
29 | #' nn <- neuralnet((Species == "setosa") + (Species == "versicolor") + (Species == "virginica")
30 | #'                  ~ Petal.Length + Petal.Width, iris_train, linear.output = FALSE)
31 | #' pred <- predict(nn, iris_test)
32 | #' table(iris_test$Species, apply(pred, 1, which.max))
33 | #' 
34 | #' @author Marvin N. Wright
35 | #' @export
36 | predict.nn <- function(object, newdata, rep = 1, all.units = FALSE, ...) {
37 |   weights <- object$weights[[rep]]
38 |   num_hidden_layers <- length(weights) - 1
39 |   
40 |   # Set NA weights to 0
41 |   for (i in 1:length(weights)) {
42 |     if (any(is.na(weights[[i]]))) {
43 |       weights[[i]][is.na(weights[[i]])] <- 0
44 |     }
45 |   }
46 |     
47 |   # Init prediction with data, subset if necessary
48 |   if (ncol(newdata) == length(object$model.list$variables) | is.null(object$model.list)) {
49 |     pred <- as.matrix(newdata)
50 |   } else {
51 |     pred <- as.matrix(newdata[, object$model.list$variables])
52 |   }
53 |   
54 |   
55 |   # Init units if requested
56 |   if (all.units) {
57 |     units <- list(pred)
58 |   }
59 |   
60 |   # Hidden layers
61 |   if (num_hidden_layers > 0) {
62 |     for (i in 1:num_hidden_layers) {
63 |       pred <- object$act.fct(cbind(1, pred) %*% weights[[i]])
64 |       
65 |       # Save unit outputs if requested
66 |       if (all.units) {
67 |         units <- append(units, list(pred))
68 |       }
69 |     }
70 |   }
71 |   
72 |   # Output layer: Only apply activation function if non-linear output
73 |   pred <- cbind(1, pred) %*% weights[[num_hidden_layers + 1]]
74 |   if (!object$linear.output) {
75 |     if (is.null(object$output.act.fct)) {
76 |       # Networks from version <1.44.4, no separate output activation
77 |       pred <- object$act.fct(pred)
78 |     } else {
79 |       pred <- object$output.act.fct(pred)
80 |     }
81 |   }
82 |   
83 |   # Save unit outputs if requested
84 |   if (all.units) {
85 |     units <- append(units, list(pred))
86 |   } 
87 |   
88 |   # Return result
89 |   if (all.units) {
90 |     units
91 |   } else {
92 |     pred
93 |   }
94 | }
95 |   
96 | 


--------------------------------------------------------------------------------
/tests/testthat/test_multiclass.R:
--------------------------------------------------------------------------------
 1 | library(neuralnet)
 2 | context("neuralnet_multiclass")
 3 | 
 4 | # Scale iris data
 5 | dat <- iris
 6 | dat[, -5] <- scale(dat[, -5])
 7 | 
 8 | test_that("Manual multiclass returns correct dimensions", {
 9 |   nn_multi <- neuralnet((Species == "setosa") + (Species == "versicolor") ~ Petal.Length + Petal.Width, dat, 
10 |                         hidden = c(2, 3), linear.output = FALSE)
11 | 
12 |   # Skip if not converged
13 |   skip_if(is.null(nn_multi$weights))
14 |   
15 |   pred_multi <- predict(nn_multi, dat[, c("Petal.Length", "Petal.Width")])
16 |   
17 |   expect_length(nn_multi$weights[[1]], 3) # 2 hidden + 1 output layer
18 |   expect_equal(dim(nn_multi$weights[[1]][[1]]), c(3, 2)) # First hidden layer: 3 in (1 bias) x 2 units
19 |   expect_equal(dim(nn_multi$weights[[1]][[2]]), c(3, 3)) # Second hidden layer: 3 in (1 bias) x 3 units
20 |   expect_equal(dim(nn_multi$weights[[1]][[3]]), c(4, 2)) # Output layer: 4 in (1 bias) x 2 units
21 |   
22 |   expect_equal(dim(pred_multi), c(nrow(dat), 2)) # Predict 2 outputs 
23 | })
24 | 
25 | test_that("Auto multiclass returns correct dimensions", {
26 |   nn_multi <- neuralnet(Species ~ Petal.Length + Petal.Width, dat, hidden = c(2, 3), linear.output = FALSE)
27 |   
28 |   # Skip if not converged
29 |   skip_if(is.null(nn_multi$weights))
30 |   
31 |   pred_multi <- predict(nn_multi, dat[, c("Petal.Length", "Petal.Width")])
32 |     
33 |   expect_length(nn_multi$weights[[1]], 3) # 2 hidden + 1 output layer
34 |   expect_equal(dim(nn_multi$weights[[1]][[1]]), c(3, 2)) # First hidden layer: 3 in (1 bias) x 2 units
35 |   expect_equal(dim(nn_multi$weights[[1]][[2]]), c(3, 3)) # Second hidden layer: 3 in (1 bias) x 3 units
36 |   expect_equal(dim(nn_multi$weights[[1]][[3]]), c(4, 3)) # Output layer: 4 in (1 bias) x 3 units
37 |     
38 |   expect_equal(dim(pred_multi), c(nrow(dat), 3)) # Predict 3 outputs
39 | })
40 | 
41 | 
42 | test_that("Same results with manual multiclass", {
43 |   set.seed(42)
44 |   nn_manual <- neuralnet((Species == "setosa") + (Species == "versicolor") + (Species == "virginica") ~ Petal.Length + Petal.Width, dat, linear.output = FALSE)
45 |   
46 |   set.seed(42)
47 |   nn_auto <- neuralnet(Species ~ Petal.Length + Petal.Width, dat, linear.output = FALSE)
48 |   
49 |   expect_equal(nn_manual$weights[[1]][[1]], nn_auto$weights[[1]][[1]])
50 |   expect_equal(nn_manual$weights[[1]][[2]], nn_auto$weights[[1]][[2]])
51 |   expect_equal(nn_manual$net.result[[1]], nn_auto$net.result[[1]])
52 | })
53 | 
54 | test_that("Response vector has correct factor levels", {
55 |   nn <- neuralnet(Species ~ Petal.Length + Petal.Width, dat, linear.output = FALSE)
56 |   expect_equal(unname(apply(nn$response, 1, which)), as.numeric(dat$Species))
57 |   
58 |   dat_char <- dat
59 |   dat_char$Species <- as.character(dat_char$Species)
60 |   nn <- neuralnet(Species ~ Petal.Length + Petal.Width, dat_char, linear.output = FALSE)
61 |   expect_equal(colnames(nn$response)[apply(nn$response, 1, which)], dat_char$Species)
62 |   
63 |   dat_reordered <- rbind(dat[101:150, ], dat[51:100, ], dat[1:50, ])
64 |   nn <- neuralnet(Species ~ Petal.Length + Petal.Width, dat_reordered, linear.output = FALSE)
65 |   expect_equal(unname(apply(nn$response, 1, which)), as.numeric(dat_reordered$Species))
66 |   
67 |   dat_char_reordered <- rbind(dat_char[101:150, ], dat_char[51:100, ], dat_char[1:50, ])
68 |   nn <- neuralnet(Species ~ Petal.Length + Petal.Width, dat_char_reordered, linear.output = FALSE)
69 |   expect_equal(colnames(nn$response)[apply(nn$response, 1, which)], dat_char_reordered$Species)
70 | })
71 | 
72 | 


--------------------------------------------------------------------------------
/man/neuralnet-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/neuralnet-package.R
 3 | \docType{package}
 4 | \name{neuralnet-package}
 5 | \alias{neuralnet-package}
 6 | \title{Training of Neural Networks}
 7 | \description{
 8 | Training of neural networks using the backpropagation, resilient
 9 | backpropagation with (Riedmiller, 1994) or without weight backtracking
10 | (Riedmiller, 1993) or the modified globally convergent version by
11 | Anastasiadis et al. (2005). The package allows flexible settings through
12 | custom-choice of error and activation function. Furthermore, the calculation
13 | of generalized weights (Intrator O & Intrator N, 1993) is implemented.
14 | }
15 | \note{
16 | This work has been supported by the German Research Foundation\cr
17 | (DFG: \url{http://www.dfg.de}) under grant scheme PI 345/3-1.
18 | }
19 | \examples{
20 | 
21 | AND <- c(rep(0,7),1)
22 | OR <- c(0,rep(1,7))
23 | binary.data <- data.frame(expand.grid(c(0,1), c(0,1), c(0,1)), AND, OR)
24 | print(net <- neuralnet(AND+OR~Var1+Var2+Var3,  binary.data, hidden=0, 
25 |              rep=10, err.fct="ce", linear.output=FALSE))
26 | 
27 | XOR <- c(0,1,1,0)
28 | xor.data <- data.frame(expand.grid(c(0,1), c(0,1)), XOR)
29 | print(net.xor <- neuralnet(XOR~Var1+Var2, xor.data, hidden=2, rep=5))
30 | plot(net.xor, rep="best")
31 | 
32 | data(infert, package="datasets")
33 | print(net.infert <- neuralnet(case~parity+induced+spontaneous,  infert, 
34 |                     err.fct="ce", linear.output=FALSE, likelihood=TRUE))
35 | gwplot(net.infert, selected.covariate="parity")
36 | gwplot(net.infert, selected.covariate="induced")
37 | gwplot(net.infert, selected.covariate="spontaneous")
38 | confidence.interval(net.infert)
39 | 
40 | Var1 <- runif(50, 0, 100) 
41 | sqrt.data <- data.frame(Var1, Sqrt=sqrt(Var1))
42 | print(net.sqrt <- neuralnet(Sqrt~Var1, sqrt.data, hidden=10, 
43 |                   threshold=0.01))
44 | predict(net.sqrt, data.frame(Var1 = (1:10)^2))
45 | 
46 | Var1 <- rpois(100,0.5)
47 | Var2 <- rbinom(100,2,0.6)
48 | Var3 <- rbinom(100,1,0.5)
49 | SUM <- as.integer(abs(Var1+Var2+Var3+(rnorm(100))))
50 | sum.data <- data.frame(Var1,Var2,Var3, SUM)
51 | print(net.sum <- neuralnet(SUM~Var1+Var2+Var3, sum.data, hidden=1, 
52 |                  act.fct="tanh"))
53 | prediction(net.sum)
54 | 
55 | }
56 | \references{
57 | Riedmiller M. (1994) \emph{Rprop - Description and
58 | Implementation Details.} Technical Report. University of Karlsruhe.
59 | 
60 | Riedmiller M. and Braun H. (1993) \emph{A direct adaptive method for faster
61 | backpropagation learning: The RPROP algorithm.} Proceedings of the IEEE
62 | International Conference on Neural Networks (ICNN), pages 586-591.  San
63 | Francisco.
64 | 
65 | Anastasiadis A. et. al. (2005) \emph{New globally convergent training scheme
66 | based on the resilient propagation algorithm.} Neurocomputing 64, pages
67 | 253-270.
68 | 
69 | Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for
70 | Interpretation of Nonlinear Models.} Proceedings of the Statistical
71 | Computing Section, 244-249 San Francisco: American Statistical Society
72 | (eds).
73 | }
74 | \seealso{
75 | \code{\link{plot.nn}} for plotting of the neural network.
76 | 
77 | \code{\link{gwplot}} for plotting of the generalized weights.
78 | 
79 | \code{\link{compute}} for computation of the calculated network.
80 | 
81 | \code{\link{confidence.interval}} for calculation of a confidence interval
82 | for the weights.
83 | 
84 | \code{\link{prediction}} for calculation of a prediction.
85 | }
86 | \author{
87 | Stefan Fritsch, Frauke Guenther \email{guenther@leibniz-bips.de},
88 | 
89 | Maintainer: Frauke Guenther \email{guenther@leibniz-bips.de}
90 | }
91 | \keyword{neural}
92 | 


--------------------------------------------------------------------------------
/R/neuralnet-package.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' Training of Neural Networks
 4 | #' 
 5 | #' Training of neural networks using the backpropagation, resilient
 6 | #' backpropagation with (Riedmiller, 1994) or without weight backtracking
 7 | #' (Riedmiller, 1993) or the modified globally convergent version by
 8 | #' Anastasiadis et al. (2005). The package allows flexible settings through
 9 | #' custom-choice of error and activation function. Furthermore, the calculation
10 | #' of generalized weights (Intrator O & Intrator N, 1993) is implemented.
11 | #' 
12 | #' @name neuralnet-package
13 | #' @docType package
14 | #' @note This work has been supported by the German Research Foundation\cr
15 | #' (DFG: \url{http://www.dfg.de}) under grant scheme PI 345/3-1.
16 | #' @author Stefan Fritsch, Frauke Guenther \email{guenther@@leibniz-bips.de},
17 | #' 
18 | #' Maintainer: Frauke Guenther \email{guenther@@leibniz-bips.de}
19 | #' @seealso \code{\link{plot.nn}} for plotting of the neural network.
20 | #' 
21 | #' \code{\link{gwplot}} for plotting of the generalized weights.
22 | #' 
23 | #' \code{\link{compute}} for computation of the calculated network.
24 | #' 
25 | #' \code{\link{confidence.interval}} for calculation of a confidence interval
26 | #' for the weights.
27 | #' 
28 | #' \code{\link{prediction}} for calculation of a prediction.
29 | #' @references Riedmiller M. (1994) \emph{Rprop - Description and
30 | #' Implementation Details.} Technical Report. University of Karlsruhe.
31 | #' 
32 | #' Riedmiller M. and Braun H. (1993) \emph{A direct adaptive method for faster
33 | #' backpropagation learning: The RPROP algorithm.} Proceedings of the IEEE
34 | #' International Conference on Neural Networks (ICNN), pages 586-591.  San
35 | #' Francisco.
36 | #' 
37 | #' Anastasiadis A. et. al. (2005) \emph{New globally convergent training scheme
38 | #' based on the resilient propagation algorithm.} Neurocomputing 64, pages
39 | #' 253-270.
40 | #' 
41 | #' Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for
42 | #' Interpretation of Nonlinear Models.} Proceedings of the Statistical
43 | #' Computing Section, 244-249 San Francisco: American Statistical Society
44 | #' (eds).
45 | #' @keywords neural
46 | #' @examples
47 | #' 
48 | #' AND <- c(rep(0,7),1)
49 | #' OR <- c(0,rep(1,7))
50 | #' binary.data <- data.frame(expand.grid(c(0,1), c(0,1), c(0,1)), AND, OR)
51 | #' print(net <- neuralnet(AND+OR~Var1+Var2+Var3,  binary.data, hidden=0, 
52 | #'              rep=10, err.fct="ce", linear.output=FALSE))
53 | #' 
54 | #' XOR <- c(0,1,1,0)
55 | #' xor.data <- data.frame(expand.grid(c(0,1), c(0,1)), XOR)
56 | #' print(net.xor <- neuralnet(XOR~Var1+Var2, xor.data, hidden=2, rep=5))
57 | #' plot(net.xor, rep="best")
58 | #' 
59 | #' data(infert, package="datasets")
60 | #' print(net.infert <- neuralnet(case~parity+induced+spontaneous,  infert, 
61 | #'                     err.fct="ce", linear.output=FALSE, likelihood=TRUE))
62 | #' gwplot(net.infert, selected.covariate="parity")
63 | #' gwplot(net.infert, selected.covariate="induced")
64 | #' gwplot(net.infert, selected.covariate="spontaneous")
65 | #' confidence.interval(net.infert)
66 | #' 
67 | #' Var1 <- runif(50, 0, 100) 
68 | #' sqrt.data <- data.frame(Var1, Sqrt=sqrt(Var1))
69 | #' print(net.sqrt <- neuralnet(Sqrt~Var1, sqrt.data, hidden=10, 
70 | #'                   threshold=0.01))
71 | #' predict(net.sqrt, data.frame(Var1 = (1:10)^2))
72 | #' 
73 | #' Var1 <- rpois(100,0.5)
74 | #' Var2 <- rbinom(100,2,0.6)
75 | #' Var3 <- rbinom(100,1,0.5)
76 | #' SUM <- as.integer(abs(Var1+Var2+Var3+(rnorm(100))))
77 | #' sum.data <- data.frame(Var1,Var2,Var3, SUM)
78 | #' print(net.sum <- neuralnet(SUM~Var1+Var2+Var3, sum.data, hidden=1, 
79 | #'                  act.fct="tanh"))
80 | #' prediction(net.sum)
81 | #' 
82 | NULL
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/tests/testthat/test_neuralnet.R:
--------------------------------------------------------------------------------
  1 | library(neuralnet)
  2 | context("neuralnet")
  3 | 
  4 | # Scale iris data
  5 | dat <- iris
  6 | dat[, -5] <- scale(dat[, -5])
  7 | 
  8 | nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, linear.output = FALSE)
  9 | nn2 <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, hidden = c(3, 2), linear.output = FALSE)
 10 | 
 11 | if (!is.null(nn$weights)) {
 12 |   pred <- predict(nn, dat[, c("Petal.Length", "Petal.Width")])
 13 | }
 14 | if (!is.null(nn2$weights)) {
 15 |   pred2 <- predict(nn2, dat[, c("Petal.Length", "Petal.Width")])
 16 | }
 17 | 
 18 | test_that("Fitting returns nn object with correct size", {
 19 |   skip_if(is.null(nn$weights))
 20 |   expect_is(nn, "nn")
 21 |   expect_length(nn, 15)
 22 | })
 23 | 
 24 | test_that("Prediction returns numeric with correct size", {
 25 |   skip_if(is.null(nn$weights))
 26 |   expect_is(pred, "matrix")
 27 |   expect_equal(dim(pred), c(nrow(dat), 1))
 28 | })
 29 | 
 30 | test_that("predict() function returns list of correct size for unit prediction", {
 31 |   skip_if(is.null(nn$weights))
 32 |   pred_all <- predict(nn, dat[, c("Petal.Length", "Petal.Width")], all.units = TRUE)
 33 |   expect_equal(length(pred_all), 3)
 34 | 
 35 |   skip_if(is.null(nn2$weights))
 36 |   pred_all2 <- predict(nn2, dat[, c("Petal.Length", "Petal.Width")], all.units = TRUE)
 37 |   expect_equal(length(pred_all2), 4)
 38 | })
 39 | 
 40 | test_that("predict() works if more variables in data", {
 41 |   skip_if(is.null(nn$weights))
 42 |   pred_all <- predict(nn, dat)
 43 |   expect_equal(dim(pred_all), c(nrow(dat), 1))
 44 | })
 45 | 
 46 | test_that("Custom activation function works", {
 47 |   expect_silent(neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 
 48 |                           linear.output = FALSE, act.fct = function(x) log(1 + exp(x))))
 49 |   expect_silent(neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 
 50 |                           linear.output = FALSE, act.fct = function(x) {log(1 + exp(x))}))
 51 |   
 52 | })
 53 | 
 54 | test_that("Same result with custom activation function", {
 55 |   set.seed(10)
 56 |   nn_custom <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 
 57 |                          linear.output = FALSE, act.fct = function(x) 1/(1 + exp(-x)))
 58 |   
 59 |   set.seed(10)
 60 |   nn_custom2 <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 
 61 |                           linear.output = FALSE, act.fct = function(x) {1/(1 + exp(-x))})
 62 |   
 63 |   set.seed(10)
 64 |   nn_default <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 
 65 |                           linear.output = FALSE, act.fct = "logistic")
 66 |   
 67 |   expect_equal(nn_custom$net.result, nn_custom2$net.result)
 68 |   expect_equal(nn_custom$net.result, nn_default$net.result)
 69 |   
 70 |   expect_equal(nn_custom$result.matrix, nn_custom2$result.matrix)
 71 |   expect_equal(nn_custom$result.matrix, nn_default$result.matrix)
 72 | })
 73 | 
 74 | test_that("Same result with custom error function", {
 75 |   set.seed(10)
 76 |   nn_custom <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 
 77 |                          linear.output = FALSE, err.fct = function(x, y) {1/2 * (y - x)^2})
 78 |   
 79 |   set.seed(10)
 80 |   nn_default <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, dat, 
 81 |                           linear.output = FALSE, err.fct = "sse")
 82 |   
 83 |   expect_equal(nn_custom$net.result, nn_default$net.result)
 84 |   expect_equal(nn_custom$result.matrix, nn_default$result.matrix)
 85 | })
 86 | 
 87 | test_that("Error if 'ce' error function used in non-binary outcome", {
 88 |   expect_error(neuralnet(Sepal.Length ~ Petal.Length + Petal.Width, 
 89 |                          dat, linear.output = TRUE, err.fct = "ce"), 
 90 |                "Error function 'ce' only implemented for binary response\\.")
 91 | })
 92 | 
 93 | test_that("Dots in formula work", {
 94 |   set.seed(42)
 95 |   nn_dot <- neuralnet(Species ~ ., dat, linear.output = FALSE)
 96 |   
 97 |   set.seed(42)
 98 |   nn_nodot <- neuralnet(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, dat, linear.output = FALSE)
 99 |   
100 |   expect_equal(nn_dot$weights[[1]][[1]], nn_nodot$weights[[1]][[1]])
101 |   expect_equal(nn_dot$weights[[1]][[2]], nn_nodot$weights[[1]][[2]])
102 |   expect_equal(nn_dot$net.result[[1]], nn_nodot$net.result[[1]])
103 | })
104 | 
105 | test_that("No error if replications don't converge", {
106 |   # One fail, one success (2 reps)
107 |   set.seed(1)
108 |   expect_warning(nn <- neuralnet(Species ~ .,  iris, linear.output = FALSE, rep = 2, stepmax = 1e4))
109 |   expect_is(nn$net.result, "list")
110 |   expect_length(nn$net.result, 2)
111 |   expect_null(nn$net.result[[1]])
112 |   
113 |   # Both fail (2 reps)
114 |   set.seed(1)
115 |   expect_warning(nn <- neuralnet(Species ~ ., iris, linear.output = FALSE, rep = 2, stepmax = 10))
116 |   expect_null(nn$net.result)
117 | })
118 | 
119 | test_that("Works with ReLu activation", {
120 |   expect_silent(neuralnet(Species ~ ., dat, linear.output = FALSE, act.fct = "relu"))
121 |   expect_silent(neuralnet(Species ~ ., dat, linear.output = FALSE, act.fct = "ReLu"))
122 | })
123 | 


--------------------------------------------------------------------------------
/R/prediction.r:
--------------------------------------------------------------------------------
  1 | #' Summarizes the output of the neural network, the data and the fitted values
  2 | #' of glm objects (if available)
  3 | #' 
  4 | #' \code{prediction}, a method for objects of class \code{nn}, typically
  5 | #' produced by \code{neuralnet}.  In a first step, the dataframe will be
  6 | #' amended by a mean response, the mean of all responses corresponding to the
  7 | #' same covariate-vector.  The calculated data.error is the error function
  8 | #' between the original response and the new mean response.  In a second step,
  9 | #' all duplicate rows will be erased to get a quick overview of the data.  To
 10 | #' obtain an overview of the results of the neural network and the glm objects,
 11 | #' the covariate matrix will be bound to the output of the neural network and
 12 | #' the fitted values of the glm object(if available) and will be reduced by all
 13 | #' duplicate rows.
 14 | #' 
 15 | #' 
 16 | #' @param x neural network
 17 | #' @param list.glm an optional list of glm objects
 18 | #' @return a list of the summaries of the repetitions of the neural networks,
 19 | #' the data and the glm objects (if available).
 20 | #' @author Stefan Fritsch, Frauke Guenther \email{guenther@@leibniz-bips.de}
 21 | #' @seealso \code{\link{neuralnet}}
 22 | #' @keywords neural
 23 | #' @examples
 24 | #' 
 25 | #' Var1 <- rpois(100,0.5)
 26 | #' Var2 <- rbinom(100,2,0.6)
 27 | #' Var3 <- rbinom(100,1,0.5)
 28 | #' SUM <- as.integer(abs(Var1+Var2+Var3+(rnorm(100))))
 29 | #' sum.data <- data.frame(Var1,Var2,Var3, SUM)
 30 | #' print(net.sum <- neuralnet( SUM~Var1+Var2+Var3,  sum.data, hidden=1, 
 31 | #'                  act.fct="tanh"))
 32 | #' main <- glm(SUM~Var1+Var2+Var3, sum.data, family=poisson())
 33 | #' full <- glm(SUM~Var1*Var2*Var3, sum.data, family=poisson())
 34 | #' prediction(net.sum, list.glm=list(main=main, full=full))
 35 | #' 
 36 | #' @export prediction
 37 | prediction <-
 38 | function (x, list.glm = NULL) 
 39 | {
 40 |     nn <- x
 41 |     data.result <- calculate.data.result(response = nn$response, 
 42 |         model.list = nn$model.list, covariate = nn$covariate)
 43 |     predictions <- calculate.predictions(covariate = nn$covariate, 
 44 |         data.result = data.result, list.glm = list.glm, matrix = nn$result.matrix, 
 45 |         list.net.result = nn$net.result, model.list = nn$model.list)
 46 |     if (attr(nn$err.fct, "type") == "ce" && all(data.result >= 0) && 
 47 |         all(data.result <= 1)) 
 48 |         data.error <- sum(nn$err.fct(data.result, nn$response), 
 49 |             na.rm = T)
 50 |     else data.error <- sum(nn$err.fct(data.result, nn$response))
 51 |     message("Data Error:\t", data.error, ";")
 52 |     predictions
 53 | }
 54 | calculate.predictions <-
 55 | function (covariate, data.result, list.glm, matrix, list.net.result, 
 56 |     model.list) 
 57 | {
 58 |     not.duplicated <- !duplicated(covariate)
 59 |     nrow.notdupl <- sum(not.duplicated)
 60 |     covariate.mod <- matrix(covariate[not.duplicated, ], nrow = nrow.notdupl)
 61 |     predictions <- list(data = cbind(covariate.mod, matrix(data.result[not.duplicated, 
 62 |         ], nrow = nrow.notdupl)))
 63 |     if (!is.null(matrix)) {
 64 |         for (i in length(list.net.result):1) {
 65 |             pred.temp <- cbind(covariate.mod, matrix(list.net.result[[i]][not.duplicated, 
 66 |                 ], nrow = nrow.notdupl))
 67 |             predictions <- eval(parse(text = paste("c(list(rep", 
 68 |                 i, "=pred.temp), predictions)", sep = "")))
 69 |         }
 70 |     }
 71 |     if (!is.null(list.glm)) {
 72 |         for (i in 1:length(list.glm)) {
 73 |             pred.temp <- cbind(covariate.mod, matrix(list.glm[[i]]$fitted.values[not.duplicated], 
 74 |                 nrow = nrow.notdupl))
 75 |             text <- paste("c(predictions, list(glm.", names(list.glm[i]), 
 76 |                 "=pred.temp))", sep = "")
 77 |             predictions <- eval(parse(text = text))
 78 |         }
 79 |     }
 80 |     for (i in 1:length(predictions)) {
 81 |         colnames(predictions[[i]]) <- c(model.list$variables, 
 82 |             model.list$response)
 83 |         if (nrow(covariate) > 1) 
 84 |             for (j in (1:ncol(covariate))) predictions[[i]] <- predictions[[i]][order(predictions[[i]][, 
 85 |                 j]), ]
 86 |         rownames(predictions[[i]]) <- 1:nrow(predictions[[i]])
 87 |     }
 88 |     predictions
 89 | }
 90 | calculate.data.result <-
 91 | function (response, covariate, model.list) 
 92 | {
 93 |     duplicated <- duplicated(covariate)
 94 |     if (!any(duplicated)) {
 95 |         return(response)
 96 |     }
 97 |     which.duplicated <- seq_along(duplicated)[duplicated]
 98 |     which.not.duplicated <- seq_along(duplicated)[!duplicated]
 99 |     ncol.response <- ncol(response)
100 |     if (ncol(covariate) == 1) {
101 |         for (each in which.not.duplicated) {
102 |             out <- NULL
103 |             if (length(which.duplicated) > 0) {
104 |                 out <- covariate[which.duplicated, ] == covariate[each, 
105 |                   ]
106 |                 if (any(out)) {
107 |                   rows <- c(each, which.duplicated[out])
108 |                   response[rows, ] = matrix(colMeans(matrix(response[rows, 
109 |                     ], ncol = ncol.response)), ncol = ncol.response, 
110 |                     nrow = length(rows), byrow = T)
111 |                   which.duplicated <- which.duplicated[-out]
112 |                 }
113 |             }
114 |         }
115 |     }
116 |     else {
117 |         tcovariate <- t(covariate)
118 |         for (each in which.not.duplicated) {
119 |             out <- NULL
120 |             if (length(which.duplicated) > 0) {
121 |                 out <- apply(tcovariate[, which.duplicated] == 
122 |                   covariate[each, ], 2, FUN = all)
123 |                 if (any(out)) {
124 |                   rows <- c(each, which.duplicated[out])
125 |                   response[rows, ] = matrix(colMeans(matrix(response[rows, 
126 |                     ], ncol = ncol.response)), ncol = ncol.response, 
127 |                     nrow = length(rows), byrow = T)
128 |                   which.duplicated <- which.duplicated[-out]
129 |                 }
130 |             }
131 |         }
132 |     }
133 |     response
134 | }
135 | 


--------------------------------------------------------------------------------
/R/gwplot.r:
--------------------------------------------------------------------------------
  1 | #' Plot method for generalized weights
  2 | #' 
  3 | #' \code{gwplot}, a method for objects of class \code{nn}, typically produced
  4 | #' by \code{neuralnet}.  Plots the generalized weights (Intrator and Intrator,
  5 | #' 1993) for one specific covariate and one response variable.
  6 | #' 
  7 | #' 
  8 | #' @param x an object of class \code{nn}
  9 | #' @param rep an integer indicating the repetition to plot. If rep="best", the
 10 | #' repetition with the smallest error will be plotted. If not stated all
 11 | #' repetitions will be plotted.
 12 | #' @param max maximum of the y axis. In default, max is set to the highest
 13 | #' y-value.
 14 | #' @param min minimum of the y axis. In default, min is set to the smallest
 15 | #' y-value.
 16 | #' @param file a character string naming the plot to write to. If not stated,
 17 | #' the plot will not be saved.
 18 | #' @param selected.covariate either a string of the covariate's name or an
 19 | #' integer of the ordered covariates, indicating the reference covariate in the
 20 | #' generalized weights plot. Defaulting to the first covariate.
 21 | #' @param selected.response either a string of the response variable's name or
 22 | #' an integer of the ordered response variables, indicating the reference
 23 | #' response in the generalized weights plot. Defaulting to the first response
 24 | #' variable.
 25 | #' @param highlight a logical value, indicating whether to highlight (red
 26 | #' color) the best repetition (smallest error). Only reasonable if rep=NULL.
 27 | #' Default is FALSE
 28 | #' @param type a character indicating the type of plotting; actually any of the
 29 | #' types as in \code{\link{plot.default}}.
 30 | #' @param col a color of the generalized weights.
 31 | #' @param \dots Arguments to be passed to methods, such as graphical parameters
 32 | #' (see \code{\link{par}}).
 33 | #' @author Stefan Fritsch, Frauke Guenther \email{guenther@@leibniz-bips.de}
 34 | #' @seealso \code{\link{neuralnet}}
 35 | #' @references Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for
 36 | #' Interpretation of Nonlinear Models.} Proceedings of the Statistical
 37 | #' Computing Section, 244-249 San Francisco: American Statistical Society
 38 | #' (eds.)
 39 | #' @keywords neural
 40 | #' @examples
 41 | #' 
 42 | #' data(infert, package="datasets")
 43 | #' print(net.infert <- neuralnet(case~parity+induced+spontaneous, infert, 
 44 | #' 		                err.fct="ce", linear.output=FALSE, likelihood=TRUE))
 45 | #' gwplot(net.infert, selected.covariate="parity")
 46 | #' gwplot(net.infert, selected.covariate="induced")
 47 | #' gwplot(net.infert, selected.covariate="spontaneous")
 48 | #' 
 49 | #' @export gwplot
 50 | gwplot <-
 51 | function (x, rep = NULL, max = NULL, min = NULL, file = NULL, 
 52 |     selected.covariate = 1, selected.response = 1, highlight = FALSE, 
 53 |     type = "p", col = "black", ...) 
 54 | {
 55 |     net <- x
 56 |     if (is.null(net$generalized.weights)) 
 57 |         stop("generalized weights were not calculated", call. = F)
 58 |     if (!is.null(file)) {
 59 |         if (!is.character(file)) 
 60 |             stop("'file' must be a string")
 61 |         if (file.exists(file)) 
 62 |             stop(sprintf("%s already exists", sQuote(file)))
 63 |     }
 64 |     if (!is.numeric(selected.covariate)) 
 65 |         for (i in 1:length(net$model.list$variables)) if (net$model.list$variables[i] == 
 66 |             selected.covariate) 
 67 |             selected.covariate = i
 68 |     if (!is.numeric(selected.covariate) || selected.covariate < 
 69 |         1 || selected.covariate > ncol(net$covariate)) 
 70 |         stop("'selected.covariate' does not exist")
 71 |     if (!is.numeric(selected.response)) 
 72 |         for (i in 1:length(net$model.list$response)) if (net$model.list$response[i] == 
 73 |             selected.response) 
 74 |             selected.response = i
 75 |     if (!is.numeric(selected.response) || selected.response < 
 76 |         1 || selected.response > ncol(net$response)) 
 77 |         stop("'selected.response' does not exist")
 78 |     if (!is.null(rep)) {
 79 |         if (rep == "best") 
 80 |             rep <- as.integer(which.min(net$result.matrix["error", 
 81 |                 ]))
 82 |         if (length(net$generalized.weights) < rep) 
 83 |             stop("'rep' does not exist")
 84 |     }
 85 |     covariate <- as.vector(net$covariate[, selected.covariate])
 86 |     variablename <- net$model.list$variables[selected.covariate]
 87 |     column <- (selected.response - 1) * ncol(net$covariate) + 
 88 |         selected.covariate
 89 |     if (is.null(rep)) {
 90 |         matrix <- as.matrix(sapply(net$generalized.weights, function(x) rbind(x[, 
 91 |             column])))
 92 |         item.to.print <- min(which.min(net$result.matrix["error", 
 93 |             ]))
 94 |     }
 95 |     else {
 96 |         highlight = F
 97 |         matrix <- as.matrix(net$generalized.weights[[rep]][, 
 98 |             column])
 99 |         item.to.print <- 1
100 |     }
101 |     if (is.null(max)) 
102 |         max <- max(matrix)
103 |     if (is.null(min)) 
104 |         min <- min(matrix)
105 |     ylim <- c(min, max)
106 |     if (!highlight || item.to.print != 1 || ncol(matrix) == 1) 
107 |         graphics::plot(x = covariate, y = matrix[, 1], ylim = ylim, xlab = variablename, 
108 |             ylab = "GW", type = type, col = col, ...)
109 |     else graphics::plot(x = covariate, y = matrix[, 2], ylim = ylim, xlab = variablename, 
110 |         ylab = "GW", type = type, col = col, ...)
111 |     if (ncol(matrix) >= 2) {
112 |         for (i in 2:ncol(matrix)) if (!highlight || (i != item.to.print)) 
113 |           graphics::lines(x = covariate, y = matrix[, i], type = type, 
114 |                 col = col, ...)
115 |     }
116 |     if (highlight) {
117 |       graphics::lines(x = covariate, y = matrix[, item.to.print], type = type, 
118 |             col = "red", ...)
119 |       graphics::legend("topright", paste("Minimal Error: ", round(net$result.matrix["error", 
120 |             item.to.print], 3), sep = ""), col = "red", ...)
121 |     }
122 |     graphics::title(paste("Response: ", net$model.list$response[selected.response], 
123 |         sep = ""))
124 |     if (!is.null(file)) {
125 |         weight.plot <- grDevices::recordPlot()
126 |         save(weight.plot, file = file)
127 |     }
128 | }
129 | 


--------------------------------------------------------------------------------
/man/neuralnet.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/neuralnet.r
  3 | \name{neuralnet}
  4 | \alias{neuralnet}
  5 | \alias{print.nn}
  6 | \title{Training of neural networks}
  7 | \usage{
  8 | neuralnet(formula, data, hidden = 1, threshold = 0.01,
  9 |   stepmax = 1e+05, rep = 1, startweights = NULL,
 10 |   learningrate.limit = NULL, learningrate.factor = list(minus = 0.5,
 11 |   plus = 1.2), learningrate = NULL, lifesign = "none",
 12 |   lifesign.step = 1000, algorithm = "rprop+", err.fct = "sse",
 13 |   act.fct = "logistic", output.act.fct = "logistic",
 14 |   linear.output = TRUE, exclude = NULL, constant.weights = NULL,
 15 |   likelihood = FALSE)
 16 | }
 17 | \arguments{
 18 | \item{formula}{a symbolic description of the model to be fitted.}
 19 | 
 20 | \item{data}{a data frame containing the variables specified in
 21 | \code{formula}.}
 22 | 
 23 | \item{hidden}{a vector of integers specifying the number of hidden neurons
 24 | (vertices) in each layer.}
 25 | 
 26 | \item{threshold}{a numeric value specifying the threshold for the partial
 27 | derivatives of the error function as stopping criteria.}
 28 | 
 29 | \item{stepmax}{the maximum steps for the training of the neural network.
 30 | Reaching this maximum leads to a stop of the neural network's training
 31 | process.}
 32 | 
 33 | \item{rep}{the number of repetitions for the neural network's training.}
 34 | 
 35 | \item{startweights}{a vector containing starting values for the weights. 
 36 | Set to \code{NULL} for random initialization.}
 37 | 
 38 | \item{learningrate.limit}{a vector or a list containing the lowest and
 39 | highest limit for the learning rate. Used only for RPROP and GRPROP.}
 40 | 
 41 | \item{learningrate.factor}{a vector or a list containing the multiplication
 42 | factors for the upper and lower learning rate. Used only for RPROP and
 43 | GRPROP.}
 44 | 
 45 | \item{learningrate}{a numeric value specifying the learning rate used by
 46 | traditional backpropagation. Used only for traditional backpropagation.}
 47 | 
 48 | \item{lifesign}{a string specifying how much the function will print during
 49 | the calculation of the neural network. 'none', 'minimal' or 'full'.}
 50 | 
 51 | \item{lifesign.step}{an integer specifying the stepsize to print the minimal
 52 | threshold in full lifesign mode.}
 53 | 
 54 | \item{algorithm}{a string containing the algorithm type to calculate the
 55 | neural network. The following types are possible: 'backprop', 'rprop+',
 56 | 'rprop-', 'sag', or 'slr'. 'backprop' refers to backpropagation, 'rprop+'
 57 | and 'rprop-' refer to the resilient backpropagation with and without weight
 58 | backtracking, while 'sag' and 'slr' induce the usage of the modified
 59 | globally convergent algorithm (grprop). See Details for more information.}
 60 | 
 61 | \item{err.fct}{a differentiable function that is used for the calculation of
 62 | the error. Alternatively, the strings 'sse' and 'ce' which stand for the sum
 63 | of squared errors and the cross-entropy can be used.}
 64 | 
 65 | \item{act.fct}{a differentiable function that is used for smoothing the
 66 | result of the cross product of the covariate or neurons and the weights.
 67 | Additionally the strings, 'logistic', 'tanh' and 'relu' are possible for the
 68 | logistic function, tangent hyperbolicus and rectified linear unit.}
 69 | 
 70 | \item{output.act.fct}{activation function used in the output layer 
 71 | (if \code{linear.output = FALSE}). Use same functions/strings as in \code{act.fct}.}
 72 | 
 73 | \item{linear.output}{logical. If output.act.fct should not be applied to the output
 74 | neurons set linear output to TRUE, otherwise to FALSE.}
 75 | 
 76 | \item{exclude}{a vector or a matrix specifying the weights, that are
 77 | excluded from the calculation. If given as a vector, the exact positions of
 78 | the weights must be known. A matrix with n-rows and 3 columns will exclude n
 79 | weights, where the first column stands for the layer, the second column for
 80 | the input neuron and the third column for the output neuron of the weight.}
 81 | 
 82 | \item{constant.weights}{a vector specifying the values of the weights that
 83 | are excluded from the training process and treated as fix.}
 84 | 
 85 | \item{likelihood}{logical. If the error function is equal to the negative
 86 | log-likelihood function, the information criteria AIC and BIC will be
 87 | calculated. Furthermore the usage of confidence.interval is meaningfull.}
 88 | }
 89 | \value{
 90 | \code{neuralnet} returns an object of class \code{nn}.  An object of
 91 | class \code{nn} is a list containing at most the following components:
 92 | 
 93 | \item{ call }{ the matched call. } 
 94 | \item{ response }{ extracted from the \code{data argument}.  } 
 95 | \item{ covariate }{ the variables extracted from the \code{data argument}. } 
 96 | \item{ model.list }{ a list containing the covariates and the response variables extracted from the \code{formula argument}. } 
 97 | \item{ err.fct }{ the error function. } 
 98 | \item{ act.fct }{ the activation function. } 
 99 | \item{ data }{ the \code{data argument}.} 
100 | \item{ net.result }{ a list containing the overall result of the neural network for every repetition.} 
101 | \item{ weights }{ a list containing the fitted weights of the neural network for every repetition. } 
102 | \item{ generalized.weights }{ a list containing the generalized weights of the neural network for every repetition. } 
103 | \item{ result.matrix }{ a matrix containing the reached threshold, needed steps, error, AIC and BIC (if computed) and weights for every repetition. Each column represents one repetition. } 
104 | \item{ startweights }{ a list containing the startweights of the neural network for every repetition. }
105 | }
106 | \description{
107 | Train neural networks using backpropagation,
108 | resilient backpropagation (RPROP) with (Riedmiller, 1994) or without weight
109 | backtracking (Riedmiller and Braun, 1993) or the modified globally
110 | convergent version (GRPROP) by Anastasiadis et al. (2005). The function
111 | allows flexible settings through custom-choice of error and activation
112 | function. Furthermore, the calculation of generalized weights (Intrator O.
113 | and Intrator N., 1993) is implemented.
114 | }
115 | \details{
116 | The globally convergent algorithm is based on the resilient backpropagation
117 | without weight backtracking and additionally modifies one learning rate,
118 | either the learningrate associated with the smallest absolute gradient (sag)
119 | or the smallest learningrate (slr) itself. The learning rates in the grprop
120 | algorithm are limited to the boundaries defined in learningrate.limit.
121 | }
122 | \examples{
123 | 
124 | library(neuralnet)
125 | 
126 | # Binary classification
127 | nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, iris, linear.output = FALSE)
128 | \dontrun{print(nn)}
129 | \dontrun{plot(nn)}
130 | 
131 | # Multiclass classification
132 | nn <- neuralnet(Species ~ Petal.Length + Petal.Width, iris, linear.output = FALSE)
133 | \dontrun{print(nn)}
134 | \dontrun{plot(nn)}
135 | 
136 | # Custom activation function
137 | softplus <- function(x) log(1 + exp(x))
138 | nn <- neuralnet((Species == "setosa") ~ Petal.Length + Petal.Width, iris, 
139 |                 linear.output = FALSE, hidden = c(3, 2), act.fct = softplus)
140 | \dontrun{print(nn)}
141 | \dontrun{plot(nn)}
142 | 
143 | }
144 | \references{
145 | Riedmiller M. (1994) \emph{Rprop - Description and
146 | Implementation Details.} Technical Report. University of Karlsruhe.
147 | 
148 | Riedmiller M. and Braun H. (1993) \emph{A direct adaptive method for faster
149 | backpropagation learning: The RPROP algorithm.} Proceedings of the IEEE
150 | International Conference on Neural Networks (ICNN), pages 586-591.  San
151 | Francisco.
152 | 
153 | Anastasiadis A. et. al. (2005) \emph{New globally convergent training scheme
154 | based on the resilient propagation algorithm.} Neurocomputing 64, pages
155 | 253-270.
156 | 
157 | Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for
158 | Interpretation of Nonlinear Models.} Proceedings of the Statistical
159 | Computing Section, 244-249 San Francisco: American Statistical Society
160 | (eds).
161 | }
162 | \seealso{
163 | \code{\link{plot.nn}} for plotting the neural network.
164 | 
165 | \code{\link{gwplot}} for plotting the generalized weights.
166 | 
167 | \code{\link{predict.nn}} for computation of a given neural network for given
168 | covariate vectors (formerly \code{compute}).
169 | 
170 | \code{\link{confidence.interval}} for calculation of confidence intervals of
171 | the weights.
172 | 
173 | \code{\link{prediction}} for a summary of the output of the neural network.
174 | }
175 | \author{
176 | Stefan Fritsch, Frauke Guenther, Marvin N. Wright
177 | }
178 | \keyword{neural}
179 | 


--------------------------------------------------------------------------------
/R/plot.nn.r:
--------------------------------------------------------------------------------
  1 | #' Plot method for neural networks
  2 | #' 
  3 | #' \code{plot.nn}, a method for the \code{plot} generic. It is designed for an
  4 | #' inspection of the weights for objects of class \code{nn}, typically produced
  5 | #' by \code{neuralnet}.
  6 | #' 
  7 | #' 
  8 | #' @param x an object of class \code{nn}
  9 | #' @param rep repetition of the neural network. If rep="best", the repetition
 10 | #' with the smallest error will be plotted. If not stated all repetitions will
 11 | #' be plotted, each in a separate window.
 12 | #' @param x.entry x-coordinate of the entry layer. Depends on the arrow.length
 13 | #' in default.
 14 | #' @param x.out x-coordinate of the output layer.
 15 | #' @param radius radius of the neurons.
 16 | #' @param arrow.length length of the entry and out arrows.
 17 | #' @param intercept a logical value indicating whether to plot the intercept.
 18 | #' @param intercept.factor x-position factor of the intercept. The closer the
 19 | #' factor is to 0, the closer the intercept is to its left neuron.
 20 | #' @param information a logical value indicating whether to add the error and
 21 | #' steps to the plot.
 22 | #' @param information.pos y-position of the information.
 23 | #' @param col.entry.synapse color of the synapses leading to the input neurons.
 24 | #' @param col.entry color of the input neurons.
 25 | #' @param col.hidden color of the neurons in the hidden layer.
 26 | #' @param col.hidden.synapse color of the weighted synapses.
 27 | #' @param col.out color of the output neurons.
 28 | #' @param col.out.synapse color of the synapses leading away from the output
 29 | #' neurons.
 30 | #' @param col.intercept color of the intercept.
 31 | #' @param fontsize fontsize of the text.
 32 | #' @param dimension size of the plot in inches.
 33 | #' @param show.weights a logical value indicating whether to print the
 34 | #' calculated weights above the synapses.
 35 | #' @param file a character string naming the plot to write to. If not stated,
 36 | #' the plot will not be saved.
 37 | #' @param \dots arguments to be passed to methods, such as graphical parameters
 38 | #' (see \code{\link{par}}).
 39 | #' @author Stefan Fritsch, Frauke Guenther \email{guenther@@leibniz-bips.de}
 40 | #' @seealso \code{\link{neuralnet}}
 41 | #' @keywords neural
 42 | #' @examples
 43 | #' 
 44 | #' XOR <- c(0,1,1,0)
 45 | #' xor.data <- data.frame(expand.grid(c(0,1), c(0,1)), XOR)
 46 | #' print(net.xor <- neuralnet( XOR~Var1+Var2, xor.data, hidden=2, rep=5))
 47 | #' plot(net.xor, rep="best")
 48 | #' 
 49 | #' @S3method plot nn
 50 | plot.nn <-
 51 | function (x, rep = NULL, x.entry = NULL, x.out = NULL, radius = 0.15, 
 52 |     arrow.length = 0.2, intercept = TRUE, intercept.factor = 0.4, 
 53 |     information = TRUE, information.pos = 0.1, col.entry.synapse = "black", 
 54 |     col.entry = "black", col.hidden = "black", col.hidden.synapse = "black", 
 55 |     col.out = "black", col.out.synapse = "black", col.intercept = "blue", 
 56 |     fontsize = 12, dimension = 6, show.weights = TRUE, file = NULL, 
 57 |     ...) 
 58 | {
 59 |     net <- x
 60 |     if (is.null(net$weights)) 
 61 |         stop("weights were not calculated")
 62 |     if (!is.null(file) && !is.character(file)) 
 63 |         stop("'file' must be a string")
 64 |     if (is.null(rep)) {
 65 |         for (i in 1:length(net$weights)) {
 66 |             if (!is.null(file)) 
 67 |                 file.rep <- paste(file, ".", i, sep = "")
 68 |             else file.rep <- NULL
 69 |             grDevices::dev.new()
 70 |             plot.nn(net, rep = i, x.entry, x.out, radius, arrow.length, 
 71 |                 intercept, intercept.factor, information, information.pos, 
 72 |                 col.entry.synapse, col.entry, col.hidden, col.hidden.synapse, 
 73 |                 col.out, col.out.synapse, col.intercept, fontsize, 
 74 |                 dimension, show.weights, file.rep, ...)
 75 |         }
 76 |     }
 77 |     else {
 78 |         if (is.character(file) && file.exists(file)) 
 79 |             stop(sprintf("%s already exists", sQuote(file)))
 80 |         result.matrix <- t(net$result.matrix)
 81 |         if (rep == "best") 
 82 |             rep <- as.integer(which.min(result.matrix[, "error"]))
 83 |         if (rep > length(net$weights)) 
 84 |             stop("'rep' does not exist")
 85 |         weights <- net$weights[[rep]]
 86 |         if (is.null(x.entry)) 
 87 |             x.entry <- 0.5 - (arrow.length/2) * length(weights)
 88 |         if (is.null(x.out)) 
 89 |             x.out <- 0.5 + (arrow.length/2) * length(weights)
 90 |         width <- max(x.out - x.entry + 0.2, 0.8) * 8
 91 |         radius <- radius/dimension
 92 |         entry.label <- net$model.list$variables
 93 |         out.label <- net$model.list$response
 94 |         neuron.count <- array(0, length(weights) + 1)
 95 |         neuron.count[1] <- nrow(weights[[1]]) - 1
 96 |         neuron.count[2] <- ncol(weights[[1]])
 97 |         x.position <- array(0, length(weights) + 1)
 98 |         x.position[1] <- x.entry
 99 |         x.position[length(weights) + 1] <- x.out
100 |         if (length(weights) > 1) 
101 |             for (i in 2:length(weights)) {
102 |                 neuron.count[i + 1] <- ncol(weights[[i]])
103 |                 x.position[i] <- x.entry + (i - 1) * (x.out - 
104 |                   x.entry)/length(weights)
105 |             }
106 |         y.step <- 1/(neuron.count + 1)
107 |         y.position <- array(0, length(weights) + 1)
108 |         y.intercept <- 1 - 2 * radius
109 |         information.pos <- min(min(y.step) - 0.1, 0.2)
110 |         if (length(entry.label) != neuron.count[1]) {
111 |             if (length(entry.label) < neuron.count[1]) {
112 |                 tmp <- NULL
113 |                 for (i in 1:(neuron.count[1] - length(entry.label))) {
114 |                   tmp <- c(tmp, "no name")
115 |                 }
116 |                 entry.label <- c(entry.label, tmp)
117 |             }
118 |         }
119 |         if (length(out.label) != neuron.count[length(neuron.count)]) {
120 |             if (length(out.label) < neuron.count[length(neuron.count)]) {
121 |                 tmp <- NULL
122 |                 for (i in 1:(neuron.count[length(neuron.count)] - 
123 |                   length(out.label))) {
124 |                   tmp <- c(tmp, "no name")
125 |                 }
126 |                 out.label <- c(out.label, tmp)
127 |             }
128 |         }
129 |         grid::grid.newpage()
130 |         for (k in 1:length(weights)) {
131 |             for (i in 1:neuron.count[k]) {
132 |                 y.position[k] <- y.position[k] + y.step[k]
133 |                 y.tmp <- 0
134 |                 for (j in 1:neuron.count[k + 1]) {
135 |                   y.tmp <- y.tmp + y.step[k + 1]
136 |                   result <- calculate.delta(c(x.position[k], 
137 |                     x.position[k + 1]), c(y.position[k], y.tmp), 
138 |                     radius)
139 |                   x <- c(x.position[k], x.position[k + 1] - result[1])
140 |                   y <- c(y.position[k], y.tmp + result[2])
141 |                   grid::grid.lines(x = x, y = y, arrow = grid::arrow(length = grid::unit(0.15, 
142 |                     "cm"), type = "closed"), gp = grid::gpar(fill = col.hidden.synapse, 
143 |                     col = col.hidden.synapse, ...))
144 |                   if (show.weights) 
145 |                     draw.text(label = weights[[k]][neuron.count[k] - 
146 |                       i + 2, neuron.count[k + 1] - j + 1], x = c(x.position[k], 
147 |                       x.position[k + 1]), y = c(y.position[k], 
148 |                       y.tmp), xy.null = 1.25 * result, color = col.hidden.synapse, 
149 |                       fontsize = fontsize - 2, ...)
150 |                 }
151 |                 if (k == 1) {
152 |                   grid::grid.lines(x = c((x.position[1] - arrow.length), 
153 |                     x.position[1] - radius), y = y.position[k], 
154 |                     arrow = grid::arrow(length = grid::unit(0.15, "cm"), 
155 |                       type = "closed"), gp = grid::gpar(fill = col.entry.synapse, 
156 |                       col = col.entry.synapse, ...))
157 |                   draw.text(label = entry.label[(neuron.count[1] + 
158 |                     1) - i], x = c((x.position - arrow.length), 
159 |                     x.position[1] - radius), y = c(y.position[k], 
160 |                     y.position[k]), xy.null = c(0, 0), color = col.entry.synapse, 
161 |                     fontsize = fontsize, ...)
162 |                   grid::grid.circle(x = x.position[k], y = y.position[k], 
163 |                     r = radius, gp = grid::gpar(fill = "white", col = col.entry, 
164 |                       ...))
165 |                 }
166 |                 else {
167 |                   grid::grid.circle(x = x.position[k], y = y.position[k], 
168 |                     r = radius, gp = grid::gpar(fill = "white", col = col.hidden, 
169 |                       ...))
170 |                 }
171 |             }
172 |         }
173 |         out <- length(neuron.count)
174 |         for (i in 1:neuron.count[out]) {
175 |             y.position[out] <- y.position[out] + y.step[out]
176 |             grid::grid.lines(x = c(x.position[out] + radius, x.position[out] + 
177 |                 arrow.length), y = y.position[out], arrow = grid::arrow(length = grid::unit(0.15, 
178 |                 "cm"), type = "closed"), gp = grid::gpar(fill = col.out.synapse, 
179 |                 col = col.out.synapse, ...))
180 |             draw.text(label = out.label[(neuron.count[out] + 
181 |                 1) - i], x = c((x.position[out] + radius), x.position[out] + 
182 |                 arrow.length), y = c(y.position[out], y.position[out]), 
183 |                 xy.null = c(0, 0), color = col.out.synapse, fontsize = fontsize, 
184 |                 ...)
185 |             grid::grid.circle(x = x.position[out], y = y.position[out], 
186 |                 r = radius, gp = grid::gpar(fill = "white", col = col.out, 
187 |                   ...))
188 |         }
189 |         if (intercept) {
190 |             for (k in 1:length(weights)) {
191 |                 y.tmp <- 0
192 |                 x.intercept <- (x.position[k + 1] - x.position[k]) * 
193 |                   intercept.factor + x.position[k]
194 |                 for (i in 1:neuron.count[k + 1]) {
195 |                   y.tmp <- y.tmp + y.step[k + 1]
196 |                   result <- calculate.delta(c(x.intercept, x.position[k + 
197 |                     1]), c(y.intercept, y.tmp), radius)
198 |                   x <- c(x.intercept, x.position[k + 1] - result[1])
199 |                   y <- c(y.intercept, y.tmp + result[2])
200 |                   grid::grid.lines(x = x, y = y, arrow = grid::arrow(length = grid::unit(0.15, 
201 |                     "cm"), type = "closed"), gp = grid::gpar(fill = col.intercept, 
202 |                     col = col.intercept, ...))
203 |                   xy.null <- cbind(x.position[k + 1] - x.intercept - 
204 |                     2 * result[1], -(y.tmp - y.intercept + 2 * 
205 |                     result[2]))
206 |                   if (show.weights) 
207 |                     draw.text(label = weights[[k]][1, neuron.count[k + 
208 |                       1] - i + 1], x = c(x.intercept, x.position[k + 
209 |                       1]), y = c(y.intercept, y.tmp), xy.null = xy.null, 
210 |                       color = col.intercept, alignment = c("right", 
211 |                         "bottom"), fontsize = fontsize - 2, ...)
212 |                 }
213 |                 grid::grid.circle(x = x.intercept, y = y.intercept, 
214 |                   r = radius, gp = grid::gpar(fill = "white", col = col.intercept, 
215 |                     ...))
216 |                 grid::grid.text(1, x = x.intercept, y = y.intercept, 
217 |                   gp = grid::gpar(col = col.intercept, ...))
218 |             }
219 |         }
220 |         if (information) 
221 |           grid::grid.text(paste("Error: ", round(result.matrix[rep, 
222 |                 "error"], 6), "   Steps: ", result.matrix[rep, 
223 |                 "steps"], sep = ""), x = 0.5, y = information.pos, 
224 |                 just = "bottom", gp = grid::gpar(fontsize = fontsize + 
225 |                   2, ...))
226 |         if (!is.null(file)) {
227 |             weight.plot <- grDevices::recordPlot()
228 |             save(weight.plot, file = file)
229 |         }
230 |     }
231 | }
232 | calculate.delta <-
233 | function (x, y, r) 
234 | {
235 |     delta.x <- x[2] - x[1]
236 |     delta.y <- y[2] - y[1]
237 |     x.null <- r/sqrt(delta.x^2 + delta.y^2) * delta.x
238 |     if (y[1] < y[2]) 
239 |         y.null <- -sqrt(r^2 - x.null^2)
240 |     else if (y[1] > y[2]) 
241 |         y.null <- sqrt(r^2 - x.null^2)
242 |     else y.null <- 0
243 |     c(x.null, y.null)
244 | }
245 | draw.text <-
246 | function (label, x, y, xy.null = c(0, 0), color, alignment = c("left", 
247 |     "bottom"), ...) 
248 | {
249 |     x.label <- x[1] + xy.null[1]
250 |     y.label <- y[1] - xy.null[2]
251 |     x.delta <- x[2] - x[1]
252 |     y.delta <- y[2] - y[1]
253 |     angle = atan(y.delta/x.delta) * (180/pi)
254 |     if (angle < 0) 
255 |         angle <- angle + 0
256 |     else if (angle > 0) 
257 |         angle <- angle - 0
258 |     if (is.numeric(label)) 
259 |         label <- round(label, 5)
260 |     vp <- grid::viewport(x = x.label, y = y.label, width = 0, height = , 
261 |         angle = angle, name = "vp1", just = alignment)
262 |     grid::grid.text(label, x = 0, y = grid::unit(0.75, "mm"), just = alignment, 
263 |         gp = grid::gpar(col = color, ...), vp = vp)
264 | }
265 | 


--------------------------------------------------------------------------------
/R/confidence.interval.r:
--------------------------------------------------------------------------------
  1 | #' Calculates confidence intervals of the weights
  2 | #' 
  3 | #' \code{confidence.interval}, a method for objects of class \code{nn},
  4 | #' typically produced by \code{neuralnet}.  Calculates confidence intervals of
  5 | #' the weights (White, 1989) and the network information criteria NIC (Murata
  6 | #' et al. 1994). All confidence intervals are calculated under the assumption
  7 | #' of a local identification of the given neural network.  If this assumption
  8 | #' is violated, the results will not be reasonable. Please make also sure that
  9 | #' the chosen error function equals the negative log-likelihood function,
 10 | #' otherwise the results are not meaningfull, too.
 11 | #' 
 12 | #' 
 13 | #' @param x neural network
 14 | #' @param alpha numerical. Sets the confidence level to (1-alpha).
 15 | #' @return \code{confidence.interval} returns a list containing the following
 16 | #' components:
 17 | #' 
 18 | #' \item{ lower.ci }{a list containing the lower confidence bounds of all
 19 | #' weights of the neural network differentiated by the repetitions.} \item{
 20 | #' upper.ci }{a list containing the upper confidence bounds of all weights of
 21 | #' the neural network differentiated by the repetitions.} \item{ nic }{a vector
 22 | #' containg the information criteria NIC for every repetition.}
 23 | #' @author Stefan Fritsch, Frauke Guenther \email{guenther@@leibniz-bips.de}
 24 | #' @seealso \code{\link{neuralnet}}
 25 | #' @references White (1989) \emph{Learning in artificial neural networks. A
 26 | #' statistical perspective.} Neural Computation (1), pages 425-464
 27 | #' 
 28 | #' Murata et al. (1994) \emph{Network information criterion - determining the
 29 | #' number of hidden units for an artificial neural network model.} IEEE
 30 | #' Transactions on Neural Networks 5 (6), pages 865-871
 31 | #' @keywords neural
 32 | #' @examples
 33 | #' 
 34 | #'     data(infert, package="datasets")
 35 | #'     print(net.infert <- neuralnet(case~parity+induced+spontaneous,  
 36 | #'                         infert, err.fct="ce", linear.output=FALSE))
 37 | #'     confidence.interval(net.infert)
 38 | #' 
 39 | #' @export confidence.interval
 40 | confidence.interval <-
 41 | function (x, alpha = 0.05) 
 42 | {
 43 |     net <- x
 44 |     covariate <- cbind(1, net$covariate)
 45 |     response <- net$response
 46 |     err.fct <- net$err.fct
 47 |     act.fct <- net$act.fct
 48 |     linear.output <- net$linear.output
 49 |     exclude <- net$exclude
 50 |     list.weights <- net$weights
 51 |     rep <- length(list.weights)
 52 |     if (is.null(list.weights)) 
 53 |         stop("weights were not calculated")
 54 |     nrow.weights <- sapply(list.weights[[1]], nrow)
 55 |     ncol.weights <- sapply(list.weights[[1]], ncol)
 56 |     lower.ci <- NULL
 57 |     upper.ci <- NULL
 58 |     nic <- NULL
 59 |     for (i in 1:rep) {
 60 |         weights <- list.weights[[i]]
 61 |         error <- net$result.matrix["error", i]
 62 |         if (length(weights) > 2) 
 63 |             stop("nic and confidence intervals will not be calculated for more than one hidden layer of neurons", 
 64 |                 call. = FALSE)
 65 |         result.nic <- calculate.information.matrices(covariate, 
 66 |             response, weights, err.fct, act.fct, exclude, linear.output)
 67 |         nic <- c(nic, error + result.nic$trace)
 68 |         if (!is.null(result.nic$variance)) {
 69 |             if (all(diag(result.nic$variance) >= 0)) {
 70 |                 weights.vector <- unlist(weights)
 71 |                 if (!is.null(exclude)) {
 72 |                   d <- rep(NA, length(weights.vector))
 73 |                   d[-exclude] <- stats::qnorm(1 - alpha/2) * sqrt(diag(result.nic$variance))/sqrt(nrow(covariate))
 74 |                 }
 75 |                 else {
 76 |                   d <- stats::qnorm(1 - alpha/2) * sqrt(diag(result.nic$variance))/sqrt(nrow(covariate))
 77 |                 }
 78 |                 lower.ci <- c(lower.ci, list(relist(weights.vector - 
 79 |                   d, nrow.weights, ncol.weights)))
 80 |                 upper.ci <- c(upper.ci, list(relist(weights.vector + 
 81 |                   d, nrow.weights, ncol.weights)))
 82 |             }
 83 |         }
 84 |     }
 85 |     if (length(lower.ci) < rep) 
 86 |         warning(sprintf("%s of %s repetition(s) could not calculate confidence intervals for the weights; varify that the neural network does not contain irrelevant neurons", 
 87 |             length(lower.ci), rep), call. = F)
 88 |     list(lower.ci = lower.ci, upper.ci = upper.ci, nic = nic)
 89 | }
 90 | calculate.information.matrices <-
 91 | function (covariate, response, weights, err.fct, act.fct, exclude, 
 92 |     linear.output) 
 93 | {
 94 |     temp <- act.fct
 95 |     if (attr(act.fct, "type") == "logistic") {
 96 |         act.deriv.fct <- function(x) {
 97 |             act.fct(x) * (1 - act.fct(x))
 98 |         }
 99 |         act.deriv2.fct <- function(x) {
100 |             act.fct(x) * (1 - act.fct(x)) * (1 - (2 * act.fct(x)))
101 |         }
102 |     }
103 |     else {
104 |         attr(temp, "type") <- NULL
105 |         act.deriv.fct <- Deriv::Deriv(temp, nderiv = 1, x = "x")
106 |         act.deriv2.fct <- Deriv::Deriv(temp, nderiv = 2, x = "x")
107 |     }
108 |     temp <- err.fct
109 |     attr(temp, "type") <- NULL
110 |     err.deriv.fct <- Deriv::Deriv(temp, nderiv = 1, x = "x")
111 |     err.deriv2.fct <- Deriv::Deriv(temp, nderiv = 2, x = "x")
112 |     length.weights <- length(weights)
113 |     nrow.weights <- sapply(weights, nrow)
114 |     ncol.weights <- sapply(weights, ncol)
115 |     if (linear.output) {
116 |         output.act.fct <- function(x) {
117 |             x
118 |         }
119 |         output.act.deriv.fct <- function(x) {
120 |             matrix(1, nrow(x), ncol(x))
121 |         }
122 |         output.act.deriv2.fct <- function(x) {
123 |             matrix(0, nrow(x), ncol(x))
124 |         }
125 |     }
126 |     else {
127 |         output.act.fct <- act.fct
128 |         output.act.deriv.fct <- act.deriv.fct
129 |         output.act.deriv2.fct <- act.deriv2.fct
130 |     }
131 |     neuron.deriv <- NULL
132 |     neuron.deriv2 <- NULL
133 |     neurons <- list(covariate)
134 |     if (length.weights > 1) 
135 |         for (i in 1:(length.weights - 1)) {
136 |             temp <- neurons[[i]] %*% weights[[i]]
137 |             act.temp <- act.fct(temp)
138 |             neuron.deriv[[i]] <- act.deriv.fct(temp)
139 |             neuron.deriv2[[i]] <- act.deriv2.fct(temp)
140 |             neurons[[i + 1]] <- cbind(1, act.temp)
141 |         }
142 |     if (!is.list(neuron.deriv)) 
143 |         neuron.deriv <- list(neuron.deriv)
144 |     if (!is.list(neuron.deriv2)) 
145 |         neuron.deriv2 <- list(neuron.deriv2)
146 |     temp <- neurons[[length.weights]] %*% weights[[length.weights]]
147 |     net.result <- output.act.fct(temp)
148 |     neuron.deriv[[length.weights]] <- output.act.deriv.fct(temp)
149 |     neuron.deriv2[[length.weights]] <- output.act.deriv2.fct(temp)
150 |     err.deriv <- err.deriv.fct(net.result, response)
151 |     err.deriv2 <- err.deriv2.fct(net.result, response)
152 |     if (any(is.na(unlist(neuron.deriv2)))) {
153 |         return(list(nic = NA, hessian = NULL))
154 |         warning("neuron.deriv2 contains NA; this might be caused by a wrong choice of 'act.fct'")
155 |     }
156 |     if (any(is.na(err.deriv)) || any(is.na(err.deriv2))) {
157 |         if (attr(err.fct, "type") == "ce") {
158 |             one <- which(net.result == 1)
159 |             if (length(one) > 0) {
160 |                 for (i in 1:length(one)) {
161 |                   if (response[one[i]] == 1) {
162 |                     err.deriv[one[i]] <- 1
163 |                     err.deriv2[one[i]] <- 1
164 |                   }
165 |                 }
166 |             }
167 |             zero <- which(net.result == 0)
168 |             if (length(zero) > 0) {
169 |                 for (i in 1:length(zero)) {
170 |                   if (response[zero[i]] == 0) {
171 |                     err.deriv[zero[i]] <- 1
172 |                     err.deriv2[zero[i]] <- -1
173 |                   }
174 |                 }
175 |             }
176 |         }
177 |     }
178 |     if (any(is.na(err.deriv))) {
179 |         return(list(nic = NA, hessian = NULL))
180 |         warning("err.deriv contains NA; this might be caused by a wrong choice of 'act.fct'")
181 |     }
182 |     if (any(is.na(err.deriv2))) {
183 |         return(list(nic = NA, hessian = NULL))
184 |         warning("err.deriv2 contains NA; this might be caused by a wrong choice of 'act.fct'")
185 |     }
186 |     if (length.weights == 2) {
187 |         length.betha <- (nrow.weights * ncol.weights)[1]
188 |         length.alpha <- (nrow.weights * ncol.weights)[2]
189 |         total.length.weights <- length.alpha + length.betha
190 |         betha.ind <- matrix(1:length.betha, nrow = nrow.weights[1], 
191 |             ncol = ncol.weights[1])
192 |         alpha.ind <- matrix(1:length.alpha, nrow = nrow.weights[2], 
193 |             ncol = ncol.weights[2])
194 |         Hesse <- matrix(NA, nrow = total.length.weights, ncol = total.length.weights)
195 |         Cross.Gradient <- matrix(NA, nrow = total.length.weights, 
196 |             ncol = total.length.weights)
197 |         Cross.Gradient2 <- matrix(NA, nrow = total.length.weights, 
198 |             ncol = total.length.weights)
199 |         for (i in 1:total.length.weights) {
200 |             for (j in 1:total.length.weights) {
201 |                 if (is.null(exclude) || all(i != exclude & j != 
202 |                   exclude)) {
203 |                   if (i <= length.betha) {
204 |                     temp <- which(betha.ind == i, arr.ind = T)
205 |                     r <- temp[1]
206 |                     s <- temp[2]
207 |                   }
208 |                   else {
209 |                     temp <- which(alpha.ind == (i - length.betha), 
210 |                       arr.ind = T)
211 |                     r <- temp[1]
212 |                     s <- temp[2]
213 |                   }
214 |                   if (j <= length.betha) {
215 |                     temp <- which(betha.ind == j, arr.ind = T)
216 |                     u <- temp[1]
217 |                     v <- temp[2]
218 |                   }
219 |                   else {
220 |                     temp <- which(alpha.ind == (j - length.betha), 
221 |                       arr.ind = T)
222 |                     u <- temp[1]
223 |                     v <- temp[2]
224 |                   }
225 |                   if ((i <= length.betha) && (j <= length.betha)) {
226 |                     Cross.Gradient[i, j] <- sum(((err.deriv^2 * 
227 |                       neuron.deriv[[2]]^2) %*% (weights[[2]][(s + 
228 |                       1), ] * weights[[2]][(v + 1), ])) * neuron.deriv[[1]][, 
229 |                       s] * neurons[[1]][, r] * neuron.deriv[[1]][, 
230 |                       v] * neurons[[1]][, u])
231 |                     Cross.Gradient2[i, j] <- sum(((err.deriv2 * 
232 |                       neuron.deriv[[2]]^2) %*% (weights[[2]][(s + 
233 |                       1), ] * weights[[2]][(v + 1), ])) * neuron.deriv[[1]][, 
234 |                       s] * neurons[[1]][, r] * neuron.deriv[[1]][, 
235 |                       v] * neurons[[1]][, u])
236 |                     if (s == v) 
237 |                       Hesse[i, j] <- sum(neuron.deriv[[1]][, 
238 |                         s] * neurons[[1]][, r] * neuron.deriv[[1]][, 
239 |                         v] * neurons[[1]][, u] * ((neuron.deriv2[[2]] * 
240 |                         err.deriv) %*% (weights[[2]][(s + 1), 
241 |                         ] * weights[[2]][(v + 1), ]))) + sum(neuron.deriv2[[1]][, 
242 |                         s] * neurons[[1]][, r] * neurons[[1]][, 
243 |                         u] * ((neuron.deriv[[2]] * err.deriv) %*% 
244 |                         weights[[2]][(s + 1), ]))
245 |                     else Hesse[i, j] <- sum(neuron.deriv[[1]][, 
246 |                       s] * neurons[[1]][, r] * neuron.deriv[[1]][, 
247 |                       v] * neurons[[1]][, u] * ((neuron.deriv2[[2]] * 
248 |                       err.deriv) %*% (weights[[2]][(s + 1), ] * 
249 |                       weights[[2]][(v + 1), ])))
250 |                   }
251 |                   else if ((i > length.betha) && (j > length.betha)) {
252 |                     if (v == s) {
253 |                       Cross.Gradient[i, j] <- sum(err.deriv[, 
254 |                         v]^2 * (neuron.deriv[[2]][, s] * neurons[[2]][, 
255 |                         r] * neuron.deriv[[2]][, v] * neurons[[2]][, 
256 |                         u]))
257 |                       Cross.Gradient2[i, j] <- sum(err.deriv2[, 
258 |                         v] * (neuron.deriv[[2]][, s] * neurons[[2]][, 
259 |                         r] * neuron.deriv[[2]][, v] * neurons[[2]][, 
260 |                         u]))
261 |                     }
262 |                     else {
263 |                       Cross.Gradient[i, j] <- 0
264 |                       Cross.Gradient2[i, j] <- 0
265 |                     }
266 |                     if (v == s) 
267 |                       Hesse[i, j] <- sum(neuron.deriv2[[2]][, 
268 |                         s] * err.deriv[, s] * neurons[[2]][, 
269 |                         u] * neurons[[2]][, r])
270 |                     else Hesse[i, j] <- 0
271 |                   }
272 |                   else if ((i > length.betha) && (j <= length.betha)) {
273 |                     Cross.Gradient[i, j] <- sum(err.deriv[, s]^2 * 
274 |                       (neuron.deriv[[2]][, s] * neurons[[2]][, 
275 |                         r] * (neuron.deriv[[2]][, s] * weights[[2]][(v + 
276 |                         1), s]) * neuron.deriv[[1]][, v] * neurons[[1]][, 
277 |                         u]))
278 |                     Cross.Gradient2[i, j] <- sum(err.deriv2[, 
279 |                       s] * (neuron.deriv[[2]][, s] * neurons[[2]][, 
280 |                       r] * (neuron.deriv[[2]][, s] * weights[[2]][(v + 
281 |                       1), s]) * neuron.deriv[[1]][, v] * neurons[[1]][, 
282 |                       u]))
283 |                     if (v == r) 
284 |                       Hesse[i, j] <- sum(neurons[[2]][, r] * 
285 |                         neuron.deriv[[1]][, v] * neurons[[1]][, 
286 |                         u] * neuron.deriv2[[2]][, s] * err.deriv[, 
287 |                         s] * weights[[2]][(v + 1), s]) + sum(neuron.deriv[[2]][, 
288 |                         s] * err.deriv[, s] * neurons[[1]][, 
289 |                         u] * neuron.deriv[[1]][, v])
290 |                     else Hesse[i, j] <- sum(neurons[[2]][, r] * 
291 |                       neuron.deriv[[1]][, v] * neurons[[1]][, 
292 |                       u] * neuron.deriv2[[2]][, s] * err.deriv[, 
293 |                       s] * weights[[2]][(v + 1), s])
294 |                   }
295 |                   else {
296 |                     Cross.Gradient[i, j] <- sum(err.deriv[, v]^2 * 
297 |                       (neuron.deriv[[2]][, v] * neurons[[2]][, 
298 |                         u] * (neuron.deriv[[2]][, v] * weights[[2]][(s + 
299 |                         1), v]) * neuron.deriv[[1]][, s] * neurons[[1]][, 
300 |                         r]))
301 |                     Cross.Gradient2[i, j] <- sum(err.deriv2[, 
302 |                       v] * (neuron.deriv[[2]][, v] * neurons[[2]][, 
303 |                       u] * (neuron.deriv[[2]][, v] * weights[[2]][(s + 
304 |                       1), v]) * neuron.deriv[[1]][, s] * neurons[[1]][, 
305 |                       r]))
306 |                     if (s == u) 
307 |                       Hesse[i, j] <- sum(neurons[[2]][, u] * 
308 |                         neuron.deriv[[1]][, s] * neurons[[1]][, 
309 |                         r] * neuron.deriv2[[2]][, v] * err.deriv[, 
310 |                         v] * weights[[2]][(s + 1), v]) + sum(neuron.deriv[[2]][, 
311 |                         v] * err.deriv[, v] * neurons[[1]][, 
312 |                         r] * neuron.deriv[[1]][, s])
313 |                     else Hesse[i, j] <- sum(neurons[[2]][, u] * 
314 |                       neuron.deriv[[1]][, s] * neurons[[1]][, 
315 |                       r] * neuron.deriv2[[2]][, v] * err.deriv[, 
316 |                       v] * weights[[2]][(s + 1), v])
317 |                   }
318 |                 }
319 |             }
320 |         }
321 |     }
322 |     else if (length.weights == 1) {
323 |         length.alpha <- sum(nrow.weights * ncol.weights)
324 |         alpha.ind <- matrix(1:length.alpha, nrow = nrow.weights[1], 
325 |             ncol = ncol.weights[1])
326 |         Hesse <- matrix(NA, nrow = length.alpha, ncol = length.alpha)
327 |         Cross.Gradient <- matrix(NA, nrow = length.alpha, ncol = length.alpha)
328 |         Cross.Gradient2 <- matrix(NA, nrow = length.alpha, ncol = length.alpha)
329 |         for (i in 1:length.alpha) {
330 |             for (j in 1:length.alpha) {
331 |                 if (is.null(exclude) || all(i != exclude & j != 
332 |                   exclude)) {
333 |                   r <- which(alpha.ind == i, arr.ind = T)[1]
334 |                   s <- which(alpha.ind == i, arr.ind = T)[2]
335 |                   u <- which(alpha.ind == j, arr.ind = T)[1]
336 |                   v <- which(alpha.ind == j, arr.ind = T)[2]
337 |                   if (s == v) {
338 |                     Hesse[i, j] <- sum(neuron.deriv2[[1]][, s] * 
339 |                       err.deriv[, s] * neurons[[1]][, r] * neurons[[1]][, 
340 |                       u])
341 |                     Cross.Gradient[i, j] <- sum(neuron.deriv[[1]][, 
342 |                       s]^2 * err.deriv[, s]^2 * neurons[[1]][, 
343 |                       r] * neurons[[1]][, u])
344 |                     Cross.Gradient2[i, j] <- sum(neuron.deriv[[1]][, 
345 |                       s]^2 * err.deriv2[, s] * neurons[[1]][, 
346 |                       r] * neurons[[1]][, u])
347 |                   }
348 |                   else {
349 |                     Hesse[i, j] <- 0
350 |                     Cross.Gradient[i, j] <- 0
351 |                     Cross.Gradient2[i, j] <- 0
352 |                   }
353 |                 }
354 |             }
355 |         }
356 |     }
357 |     B <- Cross.Gradient/nrow(neurons[[1]])
358 |     A <- (Cross.Gradient2 + Hesse)/nrow(neurons[[1]])
359 |     if (!is.null(exclude)) {
360 |         B <- as.matrix(B[-exclude, -exclude])
361 |         A <- as.matrix(A[-exclude, -exclude])
362 |     }
363 |     if (det(A) == 0) {
364 |         trace <- NA
365 |         variance <- NULL
366 |     }
367 |     else {
368 |         A.inv <- MASS::ginv(A)
369 |         variance <- A.inv %*% B %*% A.inv
370 |         trace <- sum(diag(B %*% A.inv))
371 |     }
372 |     return(list(trace = trace, variance = variance))
373 | }
374 | 


--------------------------------------------------------------------------------
/R/neuralnet.r:
--------------------------------------------------------------------------------
  1 | #' Training of neural networks
  2 | #' 
  3 | #' Train neural networks using backpropagation,
  4 | #' resilient backpropagation (RPROP) with (Riedmiller, 1994) or without weight
  5 | #' backtracking (Riedmiller and Braun, 1993) or the modified globally
  6 | #' convergent version (GRPROP) by Anastasiadis et al. (2005). The function
  7 | #' allows flexible settings through custom-choice of error and activation
  8 | #' function. Furthermore, the calculation of generalized weights (Intrator O.
  9 | #' and Intrator N., 1993) is implemented.
 10 | #' 
 11 | #' The globally convergent algorithm is based on the resilient backpropagation
 12 | #' without weight backtracking and additionally modifies one learning rate,
 13 | #' either the learningrate associated with the smallest absolute gradient (sag)
 14 | #' or the smallest learningrate (slr) itself. The learning rates in the grprop
 15 | #' algorithm are limited to the boundaries defined in learningrate.limit.
 16 | #' 
 17 | #' @aliases neuralnet print.nn
 18 | #' @param formula a symbolic description of the model to be fitted.
 19 | #' @param data a data frame containing the variables specified in
 20 | #' \code{formula}.
 21 | #' @param hidden a vector of integers specifying the number of hidden neurons
 22 | #' (vertices) in each layer.
 23 | #' @param threshold a numeric value specifying the threshold for the partial
 24 | #' derivatives of the error function as stopping criteria.
 25 | #' @param stepmax the maximum steps for the training of the neural network.
 26 | #' Reaching this maximum leads to a stop of the neural network's training
 27 | #' process.
 28 | #' @param rep the number of repetitions for the neural network's training.
 29 | #' @param startweights a vector containing starting values for the weights. 
 30 | #' Set to \code{NULL} for random initialization. 
 31 | #' @param learningrate.limit a vector or a list containing the lowest and
 32 | #' highest limit for the learning rate. Used only for RPROP and GRPROP.
 33 | #' @param learningrate.factor a vector or a list containing the multiplication
 34 | #' factors for the upper and lower learning rate. Used only for RPROP and
 35 | #' GRPROP.
 36 | #' @param learningrate a numeric value specifying the learning rate used by
 37 | #' traditional backpropagation. Used only for traditional backpropagation.
 38 | #' @param lifesign a string specifying how much the function will print during
 39 | #' the calculation of the neural network. 'none', 'minimal' or 'full'.
 40 | #' @param lifesign.step an integer specifying the stepsize to print the minimal
 41 | #' threshold in full lifesign mode.
 42 | #' @param algorithm a string containing the algorithm type to calculate the
 43 | #' neural network. The following types are possible: 'backprop', 'rprop+',
 44 | #' 'rprop-', 'sag', or 'slr'. 'backprop' refers to backpropagation, 'rprop+'
 45 | #' and 'rprop-' refer to the resilient backpropagation with and without weight
 46 | #' backtracking, while 'sag' and 'slr' induce the usage of the modified
 47 | #' globally convergent algorithm (grprop). See Details for more information.
 48 | #' @param err.fct a differentiable function that is used for the calculation of
 49 | #' the error. Alternatively, the strings 'sse' and 'ce' which stand for the sum
 50 | #' of squared errors and the cross-entropy can be used.
 51 | #' @param act.fct a differentiable function that is used for smoothing the
 52 | #' result of the cross product of the covariate or neurons and the weights.
 53 | #' Additionally the strings, 'logistic', 'tanh' and 'relu' are possible for the
 54 | #' logistic function, tangent hyperbolicus and rectified linear unit.
 55 | #' @param output.act.fct activation function used in the output layer 
 56 | #' (if \code{linear.output = FALSE}). Use same functions/strings as in \code{act.fct}.
 57 | #' @param linear.output logical. If output.act.fct should not be applied to the output
 58 | #' neurons set linear output to TRUE, otherwise to FALSE.
 59 | #' @param exclude a vector or a matrix specifying the weights, that are
 60 | #' excluded from the calculation. If given as a vector, the exact positions of
 61 | #' the weights must be known. A matrix with n-rows and 3 columns will exclude n
 62 | #' weights, where the first column stands for the layer, the second column for
 63 | #' the input neuron and the third column for the output neuron of the weight.
 64 | #' @param constant.weights a vector specifying the values of the weights that
 65 | #' are excluded from the training process and treated as fix.
 66 | #' @param likelihood logical. If the error function is equal to the negative
 67 | #' log-likelihood function, the information criteria AIC and BIC will be
 68 | #' calculated. Furthermore the usage of confidence.interval is meaningful.
 69 | #' 
 70 | #' @return \code{neuralnet} returns an object of class \code{nn}.  An object of
 71 | #' class \code{nn} is a list containing at most the following components:
 72 | #' 
 73 | #' \item{ call }{ the matched call. } 
 74 | #' \item{ response }{ extracted from the \code{data argument}.  } 
 75 | #' \item{ covariate }{ the variables extracted from the \code{data argument}. } 
 76 | #' \item{ model.list }{ a list containing the covariates and the response variables extracted from the \code{formula argument}. } 
 77 | #' \item{ err.fct }{ the error function. } 
 78 | #' \item{ act.fct }{ the activation function. } 
 79 | #' \item{ data }{ the \code{data argument}.} 
 80 | #' \item{ net.result }{ a list containing the overall result of the neural network for every repetition.} 
 81 | #' \item{ weights }{ a list containing the fitted weights of the neural network for every repetition. } 
 82 | #' \item{ generalized.weights }{ a list containing the generalized weights of the neural network for every repetition. } 
 83 | #' \item{ result.matrix }{ a matrix containing the reached threshold, needed steps, error, AIC and BIC (if computed) and weights for every repetition. Each column represents one repetition. } 
 84 | #' \item{ startweights }{ a list containing the startweights of the neural network for every repetition. }
 85 | #' 
 86 | #' @author Stefan Fritsch, Frauke Guenther, Marvin N. Wright
 87 | #' 
 88 | #' @seealso \code{\link{plot.nn}} for plotting the neural network.
 89 | #' 
 90 | #' \code{\link{gwplot}} for plotting the generalized weights.
 91 | #' 
 92 | #' \code{\link{predict.nn}} for computation of a given neural network for given
 93 | #' covariate vectors (formerly \code{compute}).
 94 | #' 
 95 | #' \code{\link{confidence.interval}} for calculation of confidence intervals of
 96 | #' the weights.
 97 | #' 
 98 | #' \code{\link{prediction}} for a summary of the output of the neural network.
 99 | #' 
100 | #' @references Riedmiller M. (1994) \emph{Rprop - Description and
101 | #' Implementation Details.} Technical Report. University of Karlsruhe.
102 | #' 
103 | #' Riedmiller M. and Braun H. (1993) \emph{A direct adaptive method for faster
104 | #' backpropagation learning: The RPROP algorithm.} Proceedings of the IEEE
105 | #' International Conference on Neural Networks (ICNN), pages 586-591.  San
106 | #' Francisco.
107 | #' 
108 | #' Anastasiadis A. et. al. (2005) \emph{New globally convergent training scheme
109 | #' based on the resilient propagation algorithm.} Neurocomputing 64, pages
110 | #' 253-270.
111 | #' 
112 | #' Intrator O. and Intrator N. (1993) \emph{Using Neural Nets for
113 | #' Interpretation of Nonlinear Models.} Proceedings of the Statistical
114 | #' Computing Section, 244-249 San Francisco: American Statistical Society
115 | #' (eds).
116 | #' @keywords neural
117 | #' @examples
118 | #' 
119 | #' library(neuralnet)
120 | #' 
121 | #' # Binary classification
122 | #' nn <- neuralnet(Species == "setosa" ~ Petal.Length + Petal.Width, iris, linear.output = FALSE)
123 | #' \dontrun{print(nn)}
124 | #' \dontrun{plot(nn)}
125 | #' 
126 | #' # Multiclass classification
127 | #' nn <- neuralnet(Species ~ Petal.Length + Petal.Width, iris, linear.output = FALSE)
128 | #' \dontrun{print(nn)}
129 | #' \dontrun{plot(nn)}
130 | #' 
131 | #' # Custom activation function
132 | #' softplus <- function(x) log(1 + exp(x))
133 | #' nn <- neuralnet((Species == "setosa") ~ Petal.Length + Petal.Width, iris, 
134 | #'                 linear.output = FALSE, hidden = c(3, 2), act.fct = softplus)
135 | #' \dontrun{print(nn)}
136 | #' \dontrun{plot(nn)}
137 | #' 
138 | #' @import stats 
139 | #' @export
140 | neuralnet <-
141 |   function (formula, data, hidden = 1, threshold = 0.01, stepmax = 1e+05,
142 |             rep = 1, startweights = NULL, learningrate.limit = NULL,
143 |             learningrate.factor = list(minus = 0.5, plus = 1.2), learningrate = NULL, 
144 |             lifesign = "none", lifesign.step = 1000, algorithm = "rprop+", 
145 |             err.fct = "sse", act.fct = "logistic", output.act.fct = "logistic", linear.output = TRUE,
146 |             exclude = NULL, constant.weights = NULL, likelihood = FALSE) {
147 |     
148 |   # Save call
149 |   call <- match.call()
150 |   
151 |   # Check arguments
152 |   if (is.null(data)) {
153 |     stop("Missing 'data' argument.", call. = FALSE)
154 |   }
155 |   data <- as.data.frame(data)
156 |   
157 |   if (is.null(formula)) {
158 |     stop("Missing 'formula' argument.", call. = FALSE)
159 |   }
160 |   formula <- stats::as.formula(formula)
161 |   
162 |   # Learning rate limit
163 |   if (!is.null(learningrate.limit)) {
164 |     if (length(learningrate.limit) != 2) {
165 |       stop("Argument 'learningrate.factor' must consist of two components.", 
166 |            call. = FALSE)
167 |     }
168 |     learningrate.limit <- as.list(learningrate.limit)
169 |     names(learningrate.limit) <- c("min", "max")
170 |     
171 |     if (is.na(learningrate.limit$min) || is.na(learningrate.limit$max)) {
172 |       stop("'learningrate.limit' must be a numeric vector", 
173 |            call. = FALSE)
174 |     }
175 |   } else {
176 |     learningrate.limit <- list(min = 1e-10, max = 0.1)
177 |   }
178 |   
179 |   # Learning rate factor
180 |   if (!is.null(learningrate.factor)) {
181 |     if (length(learningrate.factor) != 2) {
182 |       stop("Argument 'learningrate.factor' must consist of two components.", 
183 |            call. = FALSE)
184 |     }
185 |     learningrate.factor <- as.list(learningrate.factor)
186 |     names(learningrate.factor) <- c("minus", "plus")
187 | 
188 |     if (is.na(learningrate.factor$minus) || is.na(learningrate.factor$plus)) {
189 |       stop("'learningrate.factor' must be a numeric vector", 
190 |            call. = FALSE)
191 |     } 
192 |   } else {
193 |     learningrate.factor <- list(minus = 0.5, plus = 1.2)
194 |   }
195 |   
196 |   # Learning rate (backprop)
197 |   if (algorithm == "backprop") {
198 |     if (is.null(learningrate) || !is.numeric(learningrate)) {
199 |       stop("Argument 'learningrate' must be a numeric value, if the backpropagation algorithm is used.", 
200 |            call. = FALSE)
201 |     }
202 |   }
203 |   
204 |   # TODO: Rename?
205 |   # Lifesign
206 |   if (!(lifesign %in% c("none", "minimal", "full"))) {
207 |     stop("Argument 'lifesign' must be one of 'none', 'minimal', 'full'.", call. = FALSE)
208 |   }
209 | 
210 |   # Algorithm
211 |   if (!(algorithm %in% c("rprop+", "rprop-", "slr", "sag", "backprop"))) {
212 |     stop("Unknown algorithm.", call. = FALSE)
213 |   }
214 |   
215 |   # Threshold
216 |   if (is.na(threshold)) {
217 |     stop("Argument 'threshold' must be a numeric value.", call. = FALSE)
218 |   }
219 |     
220 |   # Hidden units
221 |   if (any(is.na(hidden))) {
222 |     stop("Argument 'hidden' must be an integer vector or a single integer.", 
223 |          call. = FALSE)
224 |   } 
225 |   if (length(hidden) > 1 && any(hidden == 0)) {
226 |     stop("Argument 'hidden' contains at least one 0.", call. = FALSE)
227 |   }
228 |     
229 |   # Replications
230 |   if (is.na(rep)) {
231 |     stop("Argument 'rep' must be an integer", call. = FALSE)
232 |   }
233 |     
234 |   # Max steps
235 |   if (is.na(stepmax)) {
236 |     stop("Argument 'stepmax' must be an integer", call. = FALSE)
237 |   }
238 |     
239 |   # Activation function
240 |   if (!(is.function(act.fct) || act.fct %in% c("logistic", "tanh", "relu", "ReLu"))) {
241 |     stop("Unknown activation function.", call. = FALSE)
242 |   }
243 |   if (!(is.function(output.act.fct) || output.act.fct %in% c("logistic", "tanh", "relu", "ReLu"))) {
244 |     stop("Unknown output activation function.", call. = FALSE)
245 |   }
246 |   
247 |   # Error function
248 |   if (!(is.function(err.fct) || err.fct %in% c("sse", "ce"))) {
249 |     stop("Unknown error function.", call. = FALSE)
250 |   }
251 |   
252 |   # Formula interface
253 |   model.list <- list(response = attr(terms(as.formula(call("~", formula[[2]]))), "term.labels"), 
254 |                      variables = attr(terms(formula, data = data), "term.labels"))
255 |   response <- as.matrix(model.frame(as.formula(call("~", formula[[2]])), data))
256 |   covariate <- cbind(intercept = 1, as.matrix(data[, model.list$variables]))
257 |   
258 |   # Multiclass response
259 |   if (is.character(response)) {
260 |     response <- model.matrix( ~ response[, 1] - 1) == 1
261 |     colnames(response) <- gsub("response\\[, 1\\]", "", colnames(response))
262 |     model.list$response <- colnames(response)
263 |   }
264 |   
265 |   # Activation function
266 |   if (is.function(act.fct)) {
267 |     act.deriv.fct <- Deriv::Deriv(act.fct)
268 |     attr(act.fct, "type") <- "function"
269 |   } else {
270 |     converted.fct <- convert.activation.function(act.fct)
271 |     act.fct <- converted.fct$fct
272 |     act.deriv.fct <- converted.fct$deriv.fct
273 |   }
274 |   
275 |   # Output activation function
276 |   if (is.function(output.act.fct)) {
277 |     output.act.deriv.fct <- Deriv::Deriv(output.act.fct)
278 |     attr(output.act.fct, "type") <- "function"
279 |   } else {
280 |     converted.fct <- convert.activation.function(output.act.fct)
281 |     output.act.fct <- converted.fct$fct
282 |     output.act.deriv.fct <- converted.fct$deriv.fct
283 |   }
284 |   
285 |   # Error function
286 |   if (is.function(err.fct)) {
287 |     attr(err.fct, "type") <- "function"
288 |     err.deriv.fct <- Deriv::Deriv(err.fct, x = "x")
289 |   } else {
290 |     converted.fct <- convert.error.function(err.fct)
291 |     err.fct <- converted.fct$fct
292 |     err.deriv.fct <- converted.fct$deriv.fct
293 |   }
294 |   
295 |   if (attr(err.fct, "type") == "ce" && !all(response %in% 0:1)) {
296 |     stop("Error function 'ce' only implemented for binary response.", call. = FALSE)
297 |   } 
298 |   
299 |   # Fit network for each replication
300 |   list.result <- lapply(1:rep, function(i) {
301 |     # Show progress
302 |     if (lifesign != "none") {
303 |       lifesign <- display(hidden, threshold, rep, i, lifesign)
304 |     }
305 |     
306 |     # Fit network
307 |     calculate.neuralnet(learningrate.limit = learningrate.limit, 
308 |                         learningrate.factor = learningrate.factor, covariate = covariate, 
309 |                         response = response, data = data, model.list = model.list, 
310 |                         threshold = threshold, lifesign.step = lifesign.step, 
311 |                         stepmax = stepmax, hidden = hidden, lifesign = lifesign, 
312 |                         startweights = startweights, algorithm = algorithm, 
313 |                         err.fct = err.fct, err.deriv.fct = err.deriv.fct, 
314 |                         act.fct = act.fct, act.deriv.fct = act.deriv.fct, 
315 |                         output.act.fct = output.act.fct, output.act.deriv.fct = output.act.deriv.fct,
316 |                         rep = i, linear.output = linear.output, exclude = exclude, 
317 |                         constant.weights = constant.weights, likelihood = likelihood, 
318 |                         learningrate.bp = learningrate)
319 |   })
320 |   null_reps <- sapply(list.result, function(x) {is.null(x$output.vector)})
321 |   matrix <- sapply(list.result[!null_reps], function(x) {x$output.vector})
322 |   if (is.matrix(matrix)) {
323 |     ncol.matrix <- ncol(matrix)
324 |   } else {
325 |     list.result <- NULL
326 |     matrix <- NULL
327 |     ncol.matrix <- 0
328 |   }
329 |   
330 |   # Warning if some replications did not converge
331 |   if (ncol.matrix < rep) {
332 |     warning(sprintf("Algorithm did not converge in %s of %s repetition(s) within the stepmax.", 
333 |                     (rep - ncol.matrix), rep), call. = FALSE)
334 |   }
335 |       
336 |   # Return output
337 |   generate.output(covariate, call, rep, threshold, matrix, 
338 |       startweights, model.list, response, err.fct, act.fct, output.act.fct,
339 |       data, list.result, linear.output, exclude)
340 | }
341 | 
342 | # Display output of replication
343 | display <- function (hidden, threshold, rep, i.rep, lifesign) {
344 |   message("hidden: ", paste(hidden, collapse = ", "), "    thresh: ", 
345 |           threshold, "    rep: ", strrep(" ", nchar(rep) - nchar(i.rep)), 
346 |           i.rep, "/", rep, "    steps: ", appendLF = FALSE)
347 |   utils::flush.console()
348 |   
349 |   if (lifesign == "full") {
350 |     lifesign <- sum(nchar(hidden)) + 2 * length(hidden) - 
351 |       2 + max(nchar(threshold)) + 2 * nchar(rep) + 41
352 |   }
353 |   return(lifesign)
354 | }
355 | 
356 | # Generate output object
357 | generate.output <- function(covariate, call, rep, threshold, matrix, startweights, 
358 |                             model.list, response, err.fct, act.fct, output.act.fct, 
359 |                             data, list.result, linear.output, exclude) {
360 |   
361 |   nn <- list(call = call, response = response, covariate = covariate[, -1, drop = FALSE], 
362 |              model.list = model.list, err.fct = err.fct, act.fct = act.fct, output.act.fct = output.act.fct,
363 |              linear.output = linear.output, data = data, exclude = exclude)
364 |   
365 |   if (!is.null(matrix)) {
366 |     nn$net.result <- lapply(list.result, function(x) {x$net.result})
367 |     nn$weights <- lapply(list.result, function(x) {x$weights})
368 |     nn$generalized.weights <- lapply(list.result, function(x) {x$generalized.weights})
369 |     nn$startweights <- lapply(list.result, function(x) {x$startweights})
370 |     nn$result.matrix <- matrix
371 |     null_reps <- sapply(list.result, function(x) {is.null(x$output.vector)})
372 |     rownames(nn$result.matrix) <- c(rownames(matrix)[rownames(matrix) != ""], 
373 |                                     get_weight_names(nn$weights[[which(!null_reps)[1]]], model.list))
374 |   }
375 |   
376 |   class(nn) <- c("nn")
377 |   return(nn)
378 | }
379 | 
380 | # Get names of all weights in network
381 | get_weight_names <- function(weights, model.list) {
382 |   # All hidden unit names
383 |   if (length(weights) > 1) {
384 |     hidden_units <- lapply(1:(length(weights) - 1), function(i) {
385 |       paste0(i, "layhid", 1:ncol(weights[[i]]))
386 |     })
387 |   } else {
388 |     hidden_units <- list()
389 |   }
390 |   
391 |   # All unit names including input and output
392 |   units <- c(list(model.list$variables),
393 |              hidden_units, 
394 |              list(model.list$response))
395 |   
396 |   # Combine each layer with the next, add intercept
397 |   weight_names <- do.call(c, lapply(1:(length(units) - 1), function(i) {
398 |     as.vector(outer(c("Intercept", units[[i]]), units[[i + 1]], paste, sep = ".to."))
399 |   }))
400 |  return(weight_names) 
401 | }
402 | 


--------------------------------------------------------------------------------
/R/fit_neuralnet.R:
--------------------------------------------------------------------------------
  1 | calculate.neuralnet <-
  2 |   function (data, model.list, hidden, stepmax, rep, threshold, 
  3 |             learningrate.limit, learningrate.factor, lifesign, covariate, 
  4 |             response, lifesign.step, startweights, algorithm, act.fct, 
  5 |             act.deriv.fct, output.act.fct, output.act.deriv.fct, 
  6 |             err.fct, err.deriv.fct, linear.output, likelihood, 
  7 |             exclude, constant.weights, learningrate.bp) 
  8 |   {
  9 |     time.start.local <- Sys.time()
 10 |     result <- generate.startweights(model.list, hidden, startweights, 
 11 |                                     rep, exclude, constant.weights)
 12 |     weights <- result$weights
 13 |     exclude <- result$exclude
 14 |     nrow.weights <- sapply(weights, nrow)
 15 |     ncol.weights <- sapply(weights, ncol)
 16 |     result <- rprop(weights = weights, threshold = threshold, 
 17 |                     response = response, covariate = covariate, learningrate.limit = learningrate.limit, 
 18 |                     learningrate.factor = learningrate.factor, stepmax = stepmax, 
 19 |                     lifesign = lifesign, lifesign.step = lifesign.step, act.fct = act.fct, 
 20 |                     act.deriv.fct = act.deriv.fct, output.act.fct = output.act.fct, 
 21 |                     output.act.deriv.fct = output.act.deriv.fct,
 22 |                     err.fct = err.fct, err.deriv.fct = err.deriv.fct, 
 23 |                     algorithm = algorithm, linear.output = linear.output, 
 24 |                     exclude = exclude, learningrate.bp = learningrate.bp)
 25 |     startweights <- weights
 26 |     weights <- result$weights
 27 |     step <- result$step
 28 |     reached.threshold <- result$reached.threshold
 29 |     net.result <- result$net.result
 30 |     error <- sum(err.fct(net.result, response))
 31 |     if (is.na(error) & attr(err.fct, "type") == "ce") 
 32 |       if (all(net.result <= 1, net.result >= 0)) 
 33 |         error <- sum(err.fct(net.result, response), na.rm = T)
 34 |     if (!is.null(constant.weights) && any(constant.weights != 
 35 |                                           0)) 
 36 |       exclude <- exclude[-which(constant.weights != 0)]
 37 |     if (length(exclude) == 0) 
 38 |       exclude <- NULL
 39 |     aic <- NULL
 40 |     bic <- NULL
 41 |     if (likelihood) {
 42 |       synapse.count <- length(unlist(weights)) - length(exclude)
 43 |       aic <- 2 * error + (2 * synapse.count)
 44 |       bic <- 2 * error + log(nrow(response)) * synapse.count
 45 |     }
 46 |     if (is.na(error)) 
 47 |       warning("'err.fct' does not fit 'data' or 'act.fct'", 
 48 |               call. = F)
 49 |     if (lifesign != "none") {
 50 |       if (reached.threshold <= threshold) {
 51 |         message(rep(" ", (max(nchar(stepmax), nchar("stepmax")) - 
 52 |                             nchar(step))), step, appendLF = FALSE)
 53 |         message("\terror: ", round(error, 5), rep(" ", 6 - (nchar(round(error, 
 54 |                                                                         5)) - nchar(round(error, 0)))), appendLF = FALSE)
 55 |         if (!is.null(aic)) {
 56 |           message("\taic: ", round(aic, 5), rep(" ", 6 - (nchar(round(aic, 
 57 |                                                                       5)) - nchar(round(aic, 0)))), appendLF = FALSE)
 58 |         }
 59 |         if (!is.null(bic)) {
 60 |           message("\tbic: ", round(bic, 5), rep(" ", 6 - (nchar(round(bic, 
 61 |                                                                       5)) - nchar(round(bic, 0)))), appendLF = FALSE)
 62 |         }
 63 |         time <- difftime(Sys.time(), time.start.local)
 64 |         message("\ttime: ", round(time, 2), " ", attr(time, "units"))
 65 |       }
 66 |     }
 67 |     if (reached.threshold > threshold) 
 68 |       return(result = list(output.vector = NULL, weights = NULL))
 69 |     output.vector <- c(error = error, reached.threshold = reached.threshold, 
 70 |                        steps = step)
 71 |     if (!is.null(aic)) {
 72 |       output.vector <- c(output.vector, aic = aic)
 73 |     }
 74 |     if (!is.null(bic)) {
 75 |       output.vector <- c(output.vector, bic = bic)
 76 |     }
 77 |     for (w in 1:length(weights)) output.vector <- c(output.vector, 
 78 |                                                     as.vector(weights[[w]]))
 79 |     generalized.weights <- calculate.generalized.weights(weights, 
 80 |                                                          neuron.deriv = result$neuron.deriv, net.result = net.result)
 81 |     startweights <- unlist(startweights)
 82 |     weights <- unlist(weights)
 83 |     if (!is.null(exclude)) {
 84 |       startweights[exclude] <- NA
 85 |       weights[exclude] <- NA
 86 |     }
 87 |     startweights <- relist(startweights, nrow.weights, ncol.weights)
 88 |     weights <- relist(weights, nrow.weights, ncol.weights)
 89 |     return(list(generalized.weights = generalized.weights, weights = weights, 
 90 |                 startweights = startweights, net.result = result$net.result, 
 91 |                 output.vector = output.vector))
 92 |   }
 93 | generate.startweights <-
 94 |   function (model.list, hidden, startweights, rep, exclude, constant.weights) 
 95 |   {
 96 |     input.count <- length(model.list$variables)
 97 |     output.count <- length(model.list$response)
 98 |     if (!(length(hidden) == 1 && hidden == 0)) {
 99 |       length.weights <- length(hidden) + 1
100 |       nrow.weights <- array(0, dim = c(length.weights))
101 |       ncol.weights <- array(0, dim = c(length.weights))
102 |       nrow.weights[1] <- (input.count + 1)
103 |       ncol.weights[1] <- hidden[1]
104 |       if (length(hidden) > 1) 
105 |         for (i in 2:length(hidden)) {
106 |           nrow.weights[i] <- hidden[i - 1] + 1
107 |           ncol.weights[i] <- hidden[i]
108 |         }
109 |       nrow.weights[length.weights] <- hidden[length.weights - 
110 |                                                1] + 1
111 |       ncol.weights[length.weights] <- output.count
112 |     }
113 |     else {
114 |       length.weights <- 1
115 |       nrow.weights <- array((input.count + 1), dim = c(1))
116 |       ncol.weights <- array(output.count, dim = c(1))
117 |     }
118 |     length <- sum(ncol.weights * nrow.weights)
119 |     vector <- rep(0, length)
120 |     if (!is.null(exclude)) {
121 |       if (is.matrix(exclude)) {
122 |         exclude <- matrix(as.integer(exclude), ncol = ncol(exclude), 
123 |                           nrow = nrow(exclude))
124 |         if (nrow(exclude) >= length || ncol(exclude) != 3) 
125 |           stop("'exclude' has wrong dimensions", call. = FALSE)
126 |         if (any(exclude < 1)) 
127 |           stop("'exclude' contains at least one invalid weight", 
128 |                call. = FALSE)
129 |         temp <- relist(vector, nrow.weights, ncol.weights)
130 |         for (i in 1:nrow(exclude)) {
131 |           if (exclude[i, 1] > length.weights || exclude[i, 
132 |                                                         2] > nrow.weights[exclude[i, 1]] || exclude[i, 
133 |                                                                                                     3] > ncol.weights[exclude[i, 1]]) 
134 |             stop("'exclude' contains at least one invalid weight", 
135 |                  call. = FALSE)
136 |           temp[[exclude[i, 1]]][exclude[i, 2], exclude[i, 
137 |                                                        3]] <- 1
138 |         }
139 |         exclude <- which(unlist(temp) == 1)
140 |       }
141 |       else if (is.vector(exclude)) {
142 |         exclude <- as.integer(exclude)
143 |         if (max(exclude) > length || min(exclude) < 1) {
144 |           stop("'exclude' contains at least one invalid weight", 
145 |                call. = FALSE)
146 |         }
147 |       }
148 |       else {
149 |         stop("'exclude' must be a vector or matrix", call. = FALSE)
150 |       }
151 |       if (length(exclude) >= length) 
152 |         stop("all weights are exluded", call. = FALSE)
153 |     }
154 |     length <- length - length(exclude)
155 |     if (!is.null(exclude)) {
156 |       if (is.null(startweights) || length(startweights) < (length * 
157 |                                                            rep)) 
158 |         vector[-exclude] <- stats::rnorm(length)
159 |       else vector[-exclude] <- startweights[((rep - 1) * length + 
160 |                                                1):(length * rep)]
161 |     }
162 |     else {
163 |       if (is.null(startweights) || length(startweights) < (length * 
164 |                                                            rep)) 
165 |         vector <- stats::rnorm(length)
166 |       else vector <- startweights[((rep - 1) * length + 1):(length * 
167 |                                                               rep)]
168 |     }
169 |     if (!is.null(exclude) && !is.null(constant.weights)) {
170 |       if (length(exclude) < length(constant.weights)) 
171 |         stop("constant.weights contains more weights than exclude", 
172 |              call. = FALSE)
173 |       else vector[exclude[1:length(constant.weights)]] <- constant.weights
174 |     }
175 |     weights <- relist(vector, nrow.weights, ncol.weights)
176 |     return(list(weights = weights, exclude = exclude))
177 |   }
178 | rprop <-
179 |   function (weights, response, covariate, threshold, learningrate.limit, 
180 |             learningrate.factor, stepmax, lifesign, lifesign.step, act.fct, 
181 |             act.deriv.fct, output.act.fct, output.act.deriv.fct, err.fct, err.deriv.fct, algorithm, linear.output, 
182 |             exclude, learningrate.bp) 
183 |   {
184 |     step <- 1
185 |     nchar.stepmax <- max(nchar(stepmax), 7)
186 |     length.weights <- length(weights)
187 |     nrow.weights <- sapply(weights, nrow)
188 |     ncol.weights <- sapply(weights, ncol)
189 |     length.unlist <- length(unlist(weights)) - length(exclude)
190 |     learningrate <- as.vector(matrix(0.1, nrow = 1, ncol = length.unlist))
191 |     gradients.old <- as.vector(matrix(0, nrow = 1, ncol = length.unlist))
192 |     if (is.null(exclude)) 
193 |       exclude <- length(unlist(weights)) + 1
194 |     if (attr(act.fct, "type") == "tanh" || attr(act.fct, "type") == "logistic" || attr(act.fct, "type") == "relu") 
195 |       special <- TRUE
196 |     else special <- FALSE
197 |     if (attr(output.act.fct, "type") == "tanh" || attr(output.act.fct, "type") == "logistic" || attr(output.act.fct, "type") == "relu") 
198 |       output.special <- TRUE
199 |     else output.special <- FALSE
200 |     if (linear.output) {
201 |       output.act.fct <- function(x) {
202 |         x
203 |       }
204 |       output.act.deriv.fct <- function(x) {
205 |         matrix(1, nrow(x), ncol(x))
206 |       }
207 |     }
208 |     else {
209 |       if (attr(err.fct, "type") == "ce" && attr(act.fct, "type") == "logistic") {
210 |         err.deriv.fct <- function(x, y) {
211 |           x * (1 - y) - y * (1 - x)
212 |         }
213 |         linear.output <- TRUE
214 |       }
215 |       #output.act.fct <- act.fct
216 |       #output.act.deriv.fct <- act.deriv.fct
217 |     }
218 |     result <- compute.net(weights, length.weights, covariate = covariate, 
219 |                           act.fct = act.fct, act.deriv.fct = act.deriv.fct, output.act.fct = output.act.fct, 
220 |                           output.act.deriv.fct = output.act.deriv.fct, special, output.special)
221 |     err.deriv <- err.deriv.fct(result$net.result, response)
222 |     gradients <- calculate.gradients(weights = weights, length.weights = length.weights, 
223 |                                      neurons = result$neurons, neuron.deriv = result$neuron.deriv, 
224 |                                      err.deriv = err.deriv, exclude = exclude, linear.output = linear.output)
225 |     reached.threshold <- max(abs(gradients))
226 |     min.reached.threshold <- reached.threshold
227 |     while (step < stepmax && reached.threshold > threshold) {
228 |       if (!is.character(lifesign) && step%%lifesign.step == 
229 |           0) {
230 |         text <- paste("%", nchar.stepmax, "s", sep = "")
231 |         message(sprintf(eval(expression(text)), step), "\tmin thresh: ", 
232 |                 min.reached.threshold, "\n", rep(" ", lifesign), appendLF = FALSE)
233 |         utils::flush.console()
234 |       }
235 |       if (algorithm == "rprop+") 
236 |         result <- plus(gradients, gradients.old, weights, 
237 |                        nrow.weights, ncol.weights, learningrate, learningrate.factor, 
238 |                        learningrate.limit, exclude)
239 |       else if (algorithm == "backprop") 
240 |         result <- backprop(gradients, weights, length.weights, 
241 |                            nrow.weights, ncol.weights, learningrate.bp, 
242 |                            exclude)
243 |       else result <- minus(gradients, gradients.old, weights, 
244 |                            length.weights, nrow.weights, ncol.weights, learningrate, 
245 |                            learningrate.factor, learningrate.limit, algorithm, 
246 |                            exclude)
247 |       gradients.old <- result$gradients.old
248 |       weights <- result$weights
249 |       learningrate <- result$learningrate
250 |       result <- compute.net(weights, length.weights, covariate = covariate, 
251 |                             act.fct = act.fct, act.deriv.fct = act.deriv.fct, 
252 |                             output.act.fct = output.act.fct, output.act.deriv.fct = output.act.deriv.fct, 
253 |                             special, output.special)
254 |       err.deriv <- err.deriv.fct(result$net.result, response)
255 |       gradients <- calculate.gradients(weights = weights, length.weights = length.weights, 
256 |                                        neurons = result$neurons, neuron.deriv = result$neuron.deriv, 
257 |                                        err.deriv = err.deriv, exclude = exclude, linear.output = linear.output)
258 |       reached.threshold <- max(abs(gradients))
259 |       if (reached.threshold < min.reached.threshold) {
260 |         min.reached.threshold <- reached.threshold
261 |       }
262 |       step <- step + 1
263 |     }
264 |     if (lifesign != "none" && reached.threshold > threshold) {
265 |       message("stepmax\tmin thresh: ", min.reached.threshold)
266 |     }
267 |     return(list(weights = weights, step = as.integer(step), reached.threshold = reached.threshold, 
268 |                 net.result = result$net.result, neuron.deriv = result$neuron.deriv))
269 |   }
270 | compute.net <-
271 |   function (weights, length.weights, covariate, act.fct, act.deriv.fct, 
272 |             output.act.fct, output.act.deriv.fct, special, output.special) 
273 |   {
274 |     neuron.deriv <- NULL
275 |     neurons <- list(covariate)
276 |     if (length.weights > 1) 
277 |       for (i in 1:(length.weights - 1)) {
278 |         temp <- neurons[[i]] %*% weights[[i]]
279 |         act.temp <- act.fct(temp)
280 |         if (special) 
281 |           neuron.deriv[[i]] <- act.deriv.fct(act.temp)
282 |         else neuron.deriv[[i]] <- act.deriv.fct(temp)
283 |         neurons[[i + 1]] <- cbind(1, act.temp)
284 |       }
285 |     if (!is.list(neuron.deriv)) 
286 |       neuron.deriv <- list(neuron.deriv)
287 |     temp <- neurons[[length.weights]] %*% weights[[length.weights]]
288 |     net.result <- output.act.fct(temp)
289 |     if (output.special) 
290 |       neuron.deriv[[length.weights]] <- output.act.deriv.fct(net.result)
291 |     else neuron.deriv[[length.weights]] <- output.act.deriv.fct(temp)
292 |     if (any(is.na(neuron.deriv))) 
293 |       stop("neuron derivatives contain a NA; varify that the derivative function does not divide by 0", 
294 |            call. = FALSE)
295 |     list(neurons = neurons, neuron.deriv = neuron.deriv, net.result = net.result)
296 |   }
297 | calculate.gradients <-
298 |   function (weights, length.weights, neurons, neuron.deriv, err.deriv, 
299 |             exclude, linear.output) 
300 |   {
301 |     if (any(is.na(err.deriv))) 
302 |       stop("the error derivative contains a NA; varify that the derivative function does not divide by 0 (e.g. cross entropy)", 
303 |            call. = FALSE)
304 |     if (!linear.output) 
305 |       delta <- neuron.deriv[[length.weights]] * err.deriv
306 |     else delta <- err.deriv
307 |     gradients <- crossprod(neurons[[length.weights]], delta)
308 |     if (length.weights > 1) 
309 |       for (w in (length.weights - 1):1) {
310 |         delta <- neuron.deriv[[w]] * tcrossprod(delta, weights[[w + 1]][-1,, drop = FALSE])
311 |         gradients <- c(crossprod(neurons[[w]], delta), gradients)
312 |       }
313 |     gradients[-exclude]
314 |   }
315 | plus <-
316 |   function (gradients, gradients.old, weights, nrow.weights, ncol.weights, 
317 |             learningrate, learningrate.factor, learningrate.limit, exclude) 
318 |   {
319 |     weights <- unlist(weights)
320 |     sign.gradient <- sign(gradients)
321 |     temp <- gradients.old * sign.gradient
322 |     positive <- temp > 0
323 |     negative <- temp < 0
324 |     not.negative <- !negative
325 |     if (any(positive)) {
326 |       learningrate[positive] <- pmin.int(learningrate[positive] * 
327 |                                            learningrate.factor$plus, learningrate.limit$max)
328 |     }
329 |     if (any(negative)) {
330 |       weights[-exclude][negative] <- weights[-exclude][negative] + 
331 |         gradients.old[negative] * learningrate[negative]
332 |       learningrate[negative] <- pmax.int(learningrate[negative] * 
333 |                                            learningrate.factor$minus, learningrate.limit$min)
334 |       gradients.old[negative] <- 0
335 |       if (any(not.negative)) {
336 |         weights[-exclude][not.negative] <- weights[-exclude][not.negative] - 
337 |           sign.gradient[not.negative] * learningrate[not.negative]
338 |         gradients.old[not.negative] <- sign.gradient[not.negative]
339 |       }
340 |     }
341 |     else {
342 |       weights[-exclude] <- weights[-exclude] - sign.gradient * 
343 |         learningrate
344 |       gradients.old <- sign.gradient
345 |     }
346 |     list(gradients.old = gradients.old, weights = relist(weights, 
347 |                                                          nrow.weights, ncol.weights), learningrate = learningrate)
348 |   }
349 | backprop <-
350 |   function (gradients, weights, length.weights, nrow.weights, ncol.weights, 
351 |             learningrate.bp, exclude) 
352 |   {
353 |     weights <- unlist(weights)
354 |     if (!is.null(exclude)) 
355 |       weights[-exclude] <- weights[-exclude] - gradients * 
356 |         learningrate.bp
357 |     else weights <- weights - gradients * learningrate.bp
358 |     list(gradients.old = gradients, weights = relist(weights, 
359 |                                                      nrow.weights, ncol.weights), learningrate = learningrate.bp)
360 |   }
361 | minus <-
362 |   function (gradients, gradients.old, weights, length.weights, 
363 |             nrow.weights, ncol.weights, learningrate, learningrate.factor, 
364 |             learningrate.limit, algorithm, exclude) 
365 |   {
366 |     weights <- unlist(weights)
367 |     temp <- gradients.old * gradients
368 |     positive <- temp > 0
369 |     negative <- temp < 0
370 |     if (any(positive)) 
371 |       learningrate[positive] <- pmin.int(learningrate[positive] * 
372 |                                            learningrate.factor$plus, learningrate.limit$max)
373 |     if (any(negative)) 
374 |       learningrate[negative] <- pmax.int(learningrate[negative] * 
375 |                                            learningrate.factor$minus, learningrate.limit$min)
376 |     if (algorithm != "rprop-") {
377 |       delta <- 10^-6
378 |       notzero <- gradients != 0
379 |       gradients.notzero <- gradients[notzero]
380 |       if (algorithm == "slr") {
381 |         min <- which.min(learningrate[notzero])
382 |       }
383 |       else if (algorithm == "sag") {
384 |         min <- which.min(abs(gradients.notzero))
385 |       }
386 |       if (length(min) != 0) {
387 |         temp <- learningrate[notzero] * gradients.notzero
388 |         sum <- sum(temp[-min]) + delta
389 |         learningrate[notzero][min] <- min(max(-sum/gradients.notzero[min], 
390 |                                               learningrate.limit$min), learningrate.limit$max)
391 |       }
392 |     }
393 |     weights[-exclude] <- weights[-exclude] - sign(gradients) * 
394 |       learningrate
395 |     list(gradients.old = gradients, weights = relist(weights, 
396 |                                                      nrow.weights, ncol.weights), learningrate = learningrate)
397 |   }
398 | calculate.generalized.weights <-
399 |   function (weights, neuron.deriv, net.result) 
400 |   {
401 |     for (w in 1:length(weights)) {
402 |       weights[[w]] <- weights[[w]][-1,, drop = FALSE]
403 |     }
404 |     generalized.weights <- NULL
405 |     for (k in 1:ncol(net.result)) {
406 |       for (w in length(weights):1) {
407 |         if (w == length(weights)) {
408 |           temp <- neuron.deriv[[length(weights)]][, k] * 
409 |             1/(net.result[, k] * (1 - (net.result[, k])))
410 |           delta <- tcrossprod(temp, weights[[w]][, k])
411 |         }
412 |         else {
413 |           delta <- tcrossprod(delta * neuron.deriv[[w]], 
414 |                               weights[[w]])
415 |         }
416 |       }
417 |       generalized.weights <- cbind(generalized.weights, delta)
418 |     }
419 |     return(generalized.weights)
420 |   }
421 | 
422 | relist <-
423 |   function (x, nrow, ncol) 
424 |   {
425 |     list.x <- NULL
426 |     for (w in 1:length(nrow)) {
427 |       length <- nrow[w] * ncol[w]
428 |       list.x[[w]] <- matrix(x[1:length], nrow = nrow[w], ncol = ncol[w])
429 |       x <- x[-(1:length)]
430 |     }
431 |     list.x
432 |   }


--------------------------------------------------------------------------------