├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── AR.R
    ├── backpropagate_delta.R
    ├── batch_normalization.R
    ├── calculate_mu_sigma.R
    ├── dropout.R
    ├── error_functions.R
    ├── finetune_SGD.R
    ├── new_dnn.R
    ├── rectified_linear_unit_function.R
    ├── rsq.R
    ├── run_dnn.R
    ├── train_dnn.R
    └── util.R
├── README.Rmd
├── README.md
├── cran-comments.md
├── deeplearning.Rproj
├── inst
    ├── examples_classification.R
    ├── examples_regression.R
    ├── test_ReLU.R
    ├── test_batch_normalization_differential.R
    ├── test_fineTuneFunctions.R
    ├── test_finetune_SGD_bn.R
    ├── test_new_dnn.R
    ├── test_run_dnn.R
    └── test_train_dnn.R
└── man
    ├── AR.DArch.Rd
    ├── AR.Rd
    ├── AR.default.Rd
    ├── AR.numeric.Rd
    ├── applyDropoutMask.Rd
    ├── backpropagate_delta_bn.Rd
    ├── batch_normalization.Rd
    ├── batch_normalization_differential.Rd
    ├── calcualte_population_mu_sigma.Rd
    ├── classification_error.Rd
    ├── convert_categorical.Rd
    ├── crossEntropyErr.Rd
    ├── finetune_SGD_bn.Rd
    ├── generateDropoutMask.Rd
    ├── generateDropoutMasksForDarch.Rd
    ├── matMult.Rd
    ├── meanSquareErr.Rd
    ├── new_dnn.Rd
    ├── print_weight.Rd
    ├── rectified_linear_unit_function.Rd
    ├── reset_population_mu_sigma.Rd
    ├── rsq.DArch.Rd
    ├── rsq.Rd
    ├── rsq.lm.Rd
    ├── run_dnn.Rd
    ├── train_dnn.Rd
    └── verticalize.Rd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^cran-comments\.md$
4 | ^\.travis\.yml$
5 | ^README\.Rmd$
6 | ^README-.*\.png$
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: R
2 | cache: packages


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: deeplearning
 2 | Type: Package
 3 | Title: An Implementation of Deep Neural Network for Regression and Classification
 4 | Description: An implementation of deep neural network with rectifier linear units trained
 5 | 	with stochastic gradient descent method and batch normalization. A combination of these methods have
 6 | 	achieved state-of-the-art performance in ImageNet classification by overcoming the gradient saturation
 7 | 	problem experienced by many deep architecture neural network models in the past. In addition,
 8 | 	batch normalization and dropout are implemented as a means of regularization. The deeplearning package is
 9 | 	inspired by the darch package and uses its class DArch.
10 | Version: 0.1.0
11 | Date: 2016-04-10
12 | Authors@R: c(
13 |     person(given = "Zhi", family = "Ruan", email = "ryan.zhiruan@gmail.com", role = c("aut", "cre")),
14 |     person("Martin", "Drees", email = "mdrees@stud.fh-dortmund.de", role = c("cph"))
15 |     )
16 | LazyData: TRUE
17 | URL: https://github.com/rz1988/deeplearning
18 | BugReports: https://github.com/rz1988/deeplearning/issues
19 | Depends:
20 | 	  R (>= 3.2.4),
21 |     methods,
22 |     darch (>= 0.10.0),
23 | Imports:
24 |     plotly,
25 |     futile.logger,
26 |     graphics,
27 |     stats
28 | License: GPL (>= 2)
29 | RoxygenNote: 5.0.1
30 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(AR,DArch)
 4 | S3method(AR,default)
 5 | S3method(AR,numeric)
 6 | S3method(rsq,DArch)
 7 | S3method(rsq,lm)
 8 | export(AR)
 9 | export(classification_error)
10 | export(convert_categorical)
11 | export(crossEntropyErr)
12 | export(meanSquareErr)
13 | export(new_dnn)
14 | export(print_weight)
15 | export(rectified_linear_unit_function)
16 | export(rsq)
17 | export(train_dnn)
18 | import(futile.logger)
19 | importClassesFrom(darch,DArch)
20 | importFrom(darch,createDataSet)
21 | importFrom(darch,generateWeights)
22 | importFrom(darch,getDropoutMask)
23 | importFrom(darch,getEpochs)
24 | importFrom(darch,getLayer)
25 | importFrom(darch,getLayerWeights)
26 | importFrom(darch,getLayers)
27 | importFrom(darch,getMomentum)
28 | importFrom(darch,linearUnitDerivative)
29 | importFrom(darch,validateDataSet)
30 | importFrom(graphics,plot)
31 | importFrom(methods,new)
32 | importFrom(stats,predict)
33 | 


--------------------------------------------------------------------------------
/R/AR.R:
--------------------------------------------------------------------------------
 1 | #' Calculates the Accuracy Ratio of a classifier
 2 | #'
 3 | #' This function calculates the Accuracy Ratio of a binary classification
 4 | #'  model
 5 | #'
 6 | #'
 7 | #' @param x model
 8 | #' @param ... additional inputs
 9 | #'
10 | #' @export
11 | 
12 | AR <- function(x, ...) {
13 |   UseMethod("AR")
14 | }
15 | 
16 | 
17 | #' Calculates the Accruacy Ratio of a given set of probability
18 | #'
19 | #' This function calculates the Accuracy Ratio of a binary classification model
20 | #'  output against its targets
21 | #'
22 | #' @param x a list of model output in the form of probabilities
23 | #' @param target binary response
24 | #' @param ... additional inputs
25 | #' @export
26 | 
27 | AR.numeric <- function(x, target, ...) {
28 |   AR.default(x, target)
29 | }
30 | 
31 | 
32 | #' Calculates the Accruacy Ratio of a given set of probability
33 | #'
34 | #' This function calculates the Accuracy Ratio of a binary classification model
35 | #'  output against its targets
36 | #'
37 | #' @param x a list of model output in the form of probabilities
38 | #' @param target binary response
39 | #' @param ... additional inputs
40 | #' @importFrom graphics plot
41 | #'
42 | #' @export
43 | 
44 | AR.default <- function(x, target, ...) {
45 |   N <- length(x)
46 |   seq = order(x, decreasing = T)
47 |   target <- target[seq]
48 |   auc <- 0
49 |   totTarget <- sum(target)
50 |   y <- c()
51 |   for (i in 1:N) {
52 |     lorenzeCurve <- sum(target[1:i]) / totTarget
53 |     auc <- auc + lorenzeCurve * 1 / N
54 |     y <- cbind(y, lorenzeCurve)
55 |   }
56 |   auc <- auc
57 |   pd <- sum(target) / N
58 |   ar <- (2 * auc - 1) / (1 - pd)
59 |   plot(as.vector(y), xlab = "Population", ylab = "Fraction of Positive")
60 |   if(ar > 1) ar <- 1
61 |   return (ar)
62 | }
63 | 
64 | #' Calculates the Accruacy Ratio of a given set of probability
65 | #'
66 | #' This function calculates the Accuracy Ratio of a trained darch instance
67 | #'
68 | #' @param x a DArch instance
69 | #' @param input the input matrix
70 | #' @param target binary response
71 | #' @param ... additional inputs
72 | #'
73 | #' @importFrom stats predict
74 | #'
75 | #' @export
76 | 
77 | 
78 | 
79 | AR.DArch <- function(x, input = x@dataSet@data,
80 |                      target = x@dataSet@targets, ...) {
81 |   pred <- predict(x, newdata = input)
82 |   AR.default(pred, target)
83 | }
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/R/backpropagate_delta.R:
--------------------------------------------------------------------------------
  1 | #' Calculates the delta functions using backpropagation
  2 | #'
  3 | #' function that calculates the delta function of a darch object with batch
  4 | #' normalization
  5 | #'
  6 | #' @param darch a darch instance
  7 | #' @param trainData training input
  8 | #' @param targetData training target
  9 | #' @param errorFunc error function to minimize during training. Right now mean squared
 10 | #'  erros and cross entropy errors are supported.
 11 | #' @param with_BN traing with batch normalization on or off
 12 | #'
 13 | #' @importFrom darch getLayer
 14 | #' @importFrom darch getDropoutMask
 15 | #'
 16 | #'
 17 | #' @references Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
 18 | #'  Sergey Ioffe, Christian Szegedy
 19 | #' @seealso \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4
 20 | 
 21 | 
 22 | backpropagate_delta_bn <- function(darch,
 23 |                                    trainData,
 24 |                                    targetData,
 25 |                                    errorFunc = meanSquareErr,
 26 |                                    with_BN = TRUE) {
 27 | 
 28 |   numLayers <- length(darch@layers)
 29 |   layers <- list()
 30 |   epsilon <- exp(-12) # a numerical stablizaer used in batch normalization
 31 |   numObs <- dim(trainData)[[1]]
 32 | 
 33 |   for(i in 1:numLayers) {
 34 |     ret <- getLayer(darch, i)[[1]]
 35 |     dimV_input <- dim(ret)[[1]] - 1
 36 |     dimV_output <- dim(ret)[[2]]
 37 | 
 38 |     layers[["weight"]][[i]] <- ret[1:dimV_input, ]
 39 | 
 40 |     if(length(getLayer(darch, i)) < 4 | with_BN == FALSE) {
 41 |       layers[["gamma"]][[i]] <-
 42 |         matrix(rep(1, dimV_output * numObs), numObs, byrow = TRUE)
 43 |     } else {
 44 |       layers[["gamma"]][[i]] <-
 45 |         matrix(rep(getLayer(darch, i)[[4]], numObs), numObs, byrow = TRUE)
 46 |     }
 47 | 
 48 |     layers[["beta"]][[i]] <- verticalize(ret[(dimV_input + 1),], numObs)
 49 | 
 50 |     layers[["x"]][[i]] <- list()
 51 |     layers[["mu"]][[i]] <- list()
 52 |     layers[["sigma_2"]][[i]] <- list()
 53 |     layers[["x_hat"]][[i]] <- list()
 54 |     layers[["y"]][[i]] <- list()
 55 | 
 56 |     layers[["delta_weight"]][[i]] <- list()
 57 |     layers[["delta_x"]][[i]] <- list()
 58 |     layers[["delta_y"]][[i]] <- list()
 59 |     layers[["delta_beta"]][[i]] <- list()
 60 |     layers[["delta_gamma"]][[i]] <- list()
 61 |     layers[["output"]][[i]] <- list()
 62 |     layers[["derivative"]][[i]] <- list()
 63 |   }
 64 | 
 65 |   # apply input dropout mask to data
 66 |   # TODO same input dropout mask for all data in a batch?
 67 |   trainData <- applyDropoutMask(trainData, getDropoutMask(darch, 0))
 68 | 
 69 |   # 1. Forwardpropagate
 70 |   data <- trainData
 71 |   for (i in 1:numLayers){
 72 |     weights <- layers[["weight"]][[i]]
 73 |     func <- getLayer(darch, i)[[2]]
 74 |     # Batch Normalization
 75 |     layers[["x"]][[i]] <- data %*% weights
 76 | 
 77 |     if(length(getLayer(darch, i)) < 4 | with_BN == FALSE) {
 78 |       ret <- batch_normalization(layers[["x"]][[i]],
 79 |                                  layers[["gamma"]][[i]],
 80 |                                  layers[["beta"]][[i]],
 81 |                                  mu = verticalize(rep(0, dim(layers[["gamma"]][[i]])[[2]]), numObs),
 82 |                                  sigma_2 = verticalize(rep(1 - epsilon, dim(layers[["gamma"]][[i]])[[2]]), numObs),
 83 |                                  epsilon = epsilon
 84 |                                  )
 85 | 
 86 |     } else {
 87 |       ret <- batch_normalization(layers[["x"]][[i]],
 88 |                                  layers[["gamma"]][[i]],
 89 |                                  layers[["beta"]][[i]],
 90 |                                  epsilon = epsilon )
 91 |     }
 92 |     layers[["mu"]][[i]] <- ret[[1]]
 93 |     layers[["sigma_2"]][[i]] <- ret[[2]]
 94 |     layers[["x_hat"]][[i]] <- ret[[3]]
 95 |     layers[["y"]][[i]] <- ret[[4]]
 96 | 
 97 |     ret <- list()
 98 | 
 99 |     unit_matrix <- diag(dim(layers[['y']][[i]])[[2]])
100 |     ret <- func(layers[["y"]][[i]],unit_matrix)
101 |     # apply dropout masks to output, unless we're on the last layer
102 |     if (i < numLayers)
103 |     {
104 |       ret[[1]] <- applyDropoutMask(ret[[1]], getDropoutMask(darch, i))
105 |       ret[[2]] <- applyDropoutMask(ret[[2]], getDropoutMask(darch, i))
106 |     }
107 | 
108 |     layers[["output"]][[i]] <- ret[[1]]
109 |     data <- ret[[1]]
110 |     layers[["derivative"]][[i]] <- ret[[2]]
111 |   }
112 | 
113 |   # End of forward propagation
114 | 
115 |   # 2. Calculate the Error on the network output layer
116 |   errorDerivative <- errorFunc(layers[["output"]][[numLayers]], targetData)[[2]]
117 |   layers[["delta_y"]][[numLayers]] <- errorDerivative * layers[["derivative"]][[numLayers]]
118 | 
119 |   if(length(getLayer(darch, numLayers)) < 4 | with_BN == FALSE) {
120 |     ret <- batch_normalization_differential(layers[["delta_y"]][[numLayers]],
121 |                                             layers[["mu"]][[numLayers]],
122 |                                             layers[["sigma_2"]][[numLayers]],
123 |                                             layers[["x"]][[numLayers]],
124 |                                             layers[["x_hat"]][[numLayers]],
125 |                                             layers[["y"]][[numLayers]],
126 |                                             layers[["gamma"]][[numLayers]],
127 |                                             layers[["beta"]][[numLayers]],
128 |                                             with_BN = FALSE
129 |                                             )
130 | 
131 |   } else {
132 |     ret <- batch_normalization_differential(layers[["delta_y"]][[numLayers]],
133 |                                             layers[["mu"]][[numLayers]],
134 |                                             layers[["sigma_2"]][[numLayers]],
135 |                                             layers[["x"]][[numLayers]],
136 |                                             layers[["x_hat"]][[numLayers]],
137 |                                             layers[["y"]][[numLayers]],
138 |                                             layers[["gamma"]][[numLayers]],
139 |                                             layers[["beta"]][[numLayers]],
140 |                                             with_BN = TRUE)
141 |   }
142 | 
143 |   layers[["delta_x"]][[numLayers]] <- ret[[1]]
144 |   layers[["delta_gamma"]][[numLayers]] <- ret[[2]]
145 |   layers[["delta_beta"]][[numLayers]] <- ret[[3]]
146 | 
147 |   if (numLayers > 1) {
148 |     layers[["delta_weight"]][[numLayers]] <- t(layers[["output"]][[numLayers - 1]]) %*%
149 |       layers[["delta_y"]][[numLayers]]
150 |   } else {
151 |     layers[["delta_weight"]][[numLayers]] <- t(trainData) %*%
152 |       layers[["delta_y"]][[numLayers]]
153 |   }
154 |   # End of calculation
155 | 
156 |   # 3. Backpropagate the error
157 |   for(i in (numLayers-1):1){
158 |     error <-  layers[["delta_x"]][[i+1]] %*% t(layers[["weight"]][[i + 1]])
159 |     # zero derivatives makes sure that dropout nodes' delta functions are zeros
160 |     layers[["delta_y"]][[i]] <- error * layers[["derivative"]][[i]]
161 | 
162 |     if(length(getLayer(darch, i)) < 4 | with_BN == FALSE) {
163 |       ret <- batch_normalization_differential(layers[["delta_y"]][[i]],
164 |                                               layers[["mu"]][[i]],
165 |                                               layers[["sigma_2"]][[i]],
166 |                                               layers[["x"]][[i]],
167 |                                               layers[["x_hat"]][[i]],
168 |                                               layers[["y"]][[i]],
169 |                                               layers[["gamma"]][[i]],
170 |                                               layers[["beta"]][[i]],
171 |                                               with_BN = FALSE)
172 | 
173 |     } else {
174 |       ret <- batch_normalization_differential(layers[["delta_y"]][[i]],
175 |                                               layers[["mu"]][[i]],
176 |                                               layers[["sigma_2"]][[i]],
177 |                                               layers[["x"]][[i]],
178 |                                               layers[["x_hat"]][[i]],
179 |                                               layers[["y"]][[i]],
180 |                                               layers[["gamma"]][[i]],
181 |                                               layers[["beta"]][[i]],
182 |                                               with_BN = TRUE)
183 |     }
184 | 
185 |     layers[["delta_x"]][[i]] <- ret[[1]]
186 |     layers[["delta_gamma"]][[i]] <- ret[[2]]
187 |     layers[["delta_beta"]][[i]] <- ret[[3]]
188 | 
189 |     if (i > 1) {
190 |       layers[["delta_weight"]][[i]] <- t(layers[["output"]][[i - 1]]) %*% layers[["delta_y"]][[i]]
191 |     } else {
192 |       layers[["delta_weight"]][[i]] <- t(trainData) %*% layers[["delta_y"]][[i]]
193 |     }
194 | 
195 |   }
196 | 
197 |   ret <- list()
198 |   ret[[1]] <- layers[["delta_weight"]]
199 |   ret[[2]] <- layers[["delta_beta"]]
200 |   ret[[3]] <- layers[["delta_gamma"]]
201 |   ret[[4]] <- layers[["output"]]
202 |   ret[[5]] <- layers[["derivative"]]
203 |   ret[[6]] <- layers[["delta_mu"]]
204 |   ret[[7]] <- layers[["delta_sigma_2"]]
205 |   ret[[8]] <- layers[["mu"]]
206 |   ret[[9]] <- layers[["sigma_2"]]
207 |   return(ret)
208 | }
209 | 


--------------------------------------------------------------------------------
/R/batch_normalization.R:
--------------------------------------------------------------------------------
  1 | #' Batch Normalization Function that normalizes the input before applying non-linearity
  2 | #'
  3 | #' This function normalizes the distribution of inputs to hidden layers in
  4 | #' a neural network
  5 | #' @param x weighted sum of outputs from the previous layer
  6 | #' @param gamma the gamma coefficient
  7 | #' @param beta the beta coefficient
  8 | #' @param mu the mean of the input neurons. If NULL, it will be caluclated in the function.
  9 | #' @param sigma_2 the variance of the input nerurons. If NULL, it will be calcualted in the function.
 10 | #' @param epsilon a constant added to the variance for numerical stability
 11 | #' @references Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
 12 | #' Sergey Ioffe, Christian Szegedy
 13 | #' @seealso  \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4
 14 | 
 15 | batch_normalization <- function(x,
 16 |                                 gamma,
 17 |                                 beta,
 18 |                                 mu = NULL,
 19 |                                 sigma_2 = NULL,
 20 |                                 epsilon = exp(-12)) {
 21 | 
 22 |   # helper function that repeat a row vector N times
 23 |   verticalize <- function(vector, N) {
 24 |     return(matrix(rep(vector, N), N, byrow = T))
 25 |   }
 26 | 
 27 |   numObs <- dim(x)[[1]]
 28 |   if(is.null(mu)) {
 29 |     mu <-verticalize(colMeans(x), numObs)
 30 |   }
 31 | 
 32 |   if(is.null(sigma_2)) {
 33 |     sigma_2 <- numObs / (numObs - 1) * (verticalize(colMeans(x^2), numObs) - mu^2)
 34 |   }
 35 | 
 36 | 
 37 | 
 38 |   x_hat <- (x - mu) / sqrt(sigma_2 + epsilon)
 39 |   y <- x_hat * gamma + beta
 40 | 
 41 |   ret <- list()
 42 |   ret[[1]] <- mu
 43 |   ret[[2]] <- sigma_2
 44 |   ret[[3]] <- x_hat
 45 |   ret[[4]] <- y
 46 |   return(ret)
 47 | }
 48 | 
 49 | #' Function that calcualtes the differentials in the batch normalization mode
 50 | #'
 51 | #' Calculates the differentials in batch normalization
 52 | #'
 53 | #' @param delta_y derivative wrt y
 54 | #' @param mu mean of the input
 55 | #' @param sigma_2 variance of the input
 56 | #' @param x input
 57 | #' @param x_hat normalized input
 58 | #' @param y transformed input after batch normalization
 59 | #' @param gamma gamma coefficient
 60 | #' @param beta beta coefficient
 61 | #' @param epsilon the contant added to the variance for numeric stability
 62 | #' @param with_BN logical value, set to TRUE to turn on batch normalization
 63 | #'
 64 | #' @references Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
 65 | #'  Sergey Ioffe, Christian Szegedy
 66 | #' @seealso \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4
 67 | 
 68 | batch_normalization_differential <- function(delta_y,
 69 |                                              mu,
 70 |                                              sigma_2,
 71 |                                              x,
 72 |                                              x_hat,
 73 |                                              y,
 74 |                                              gamma,
 75 |                                              beta,
 76 |                                              epsilon = exp(-12),
 77 |                                              with_BN = T) {
 78 |   # helper function that repeat a row vector N times
 79 |   verticalize <- function(vector, N) {
 80 |     return(matrix(rep(vector, N), N, byrow = T))
 81 |   }
 82 |   numObs <- dim(x)[[1]]
 83 | 
 84 |   delta_x_hat <- delta_y * gamma
 85 | 
 86 |   if(with_BN) {
 87 |     delta_sigma_2 <-  verticalize(colSums(delta_x_hat * (x - mu) * (-0.5) * (sigma_2 + epsilon)^(-1.5)), numObs)
 88 | 
 89 |     tmp1 <- verticalize(colSums(delta_x_hat * (-1) / sqrt(sigma_2 + epsilon)), numObs)
 90 |     tmp2 <- delta_sigma_2 * verticalize(colMeans(-2 * (x- mu)), numObs)
 91 | 
 92 |     delta_mu <- tmp1 + tmp2
 93 | 
 94 |     delta_gamma <- verticalize(colSums(delta_y * x_hat), numObs)
 95 |   } else {
 96 |     delta_sigma_2 <-  verticalize(rep(0, dim(delta_y)[[2]]), numObs)
 97 |     delta_mu <- verticalize(rep(0, dim(delta_y)[[2]]), numObs)
 98 |     delta_gamma <- verticalize(rep(0, dim(delta_y)[[2]]), numObs)
 99 |   }
100 | 
101 |   tmp1 <- delta_x_hat / sqrt(sigma_2 + epsilon)
102 |   tmp2 <- delta_sigma_2 * 2 * (x - mu) / numObs
103 |   tmp3 <- delta_mu / numObs
104 |   delta_x <- tmp1 + tmp2 + tmp3
105 | 
106 |   delta_beta <- verticalize(colSums(delta_y), numObs)
107 | 
108 |   ret <- list()
109 |   ret[[1]] <- delta_x
110 |   ret[[2]] <- delta_gamma
111 |   ret[[3]] <- delta_beta
112 |   ret[[4]] <- delta_x_hat
113 |   ret[[5]] <- delta_sigma_2
114 |   ret[[6]] <- delta_mu
115 |   return(ret)
116 | }
117 | 


--------------------------------------------------------------------------------
/R/calculate_mu_sigma.R:
--------------------------------------------------------------------------------
  1 | #' Calculates the mu and sigmas of a darch instance
  2 | #'
  3 | #' This function calculates the mu and sigmas of hidden layers in a darch instance
  4 | #' @param darch a darch instance
  5 | #' @param input input data
  6 | #'
  7 | #' @importFrom darch getLayer
  8 | #'
  9 | #'
 10 | 
 11 | 
 12 | 
 13 | 
 14 | calcualte_population_mu_sigma <- function (darch, input) {
 15 |   numLayers <- length(darch@layers)
 16 |   layers <- list()
 17 |   epsilon <- exp(-12) # a numerical stablizaer used in batch normalization
 18 |   numObs <- dim(input)[[1]]
 19 | 
 20 |   for(i in 1:numLayers) {
 21 |     ret <- getLayer(darch, i)[[1]]
 22 |     dimV_input <- dim(ret)[[1]] - 1
 23 |     dimV_output <- dim(ret)[[2]]
 24 | 
 25 |     layers[["weight"]][[i]] <- ret[1:dimV_input, ]
 26 | 
 27 |     layers[["gamma"]][[i]] <-
 28 |         matrix(rep(getLayer(darch, i)[[4]], numObs), numObs, byrow = T)
 29 | 
 30 |     layers[["beta"]][[i]] <- verticalize(ret[(dimV_input + 1),], numObs)
 31 | 
 32 |     layers[["x"]][[i]] <- list()
 33 |     layers[["mu"]][[i]] <- list()
 34 |     layers[["sigma_2"]][[i]] <- list()
 35 |     layers[["x_hat"]][[i]] <- list()
 36 |     layers[["y"]][[i]] <- list()
 37 |   }
 38 | 
 39 |   # Forwardpropagate
 40 |   data <- input
 41 |   for (i in 1:numLayers){
 42 |     weights <- layers[["weight"]][[i]]
 43 |     func <- getLayer(darch, i)[[2]]
 44 |     # Batch Normalization
 45 |     layers[["x"]][[i]] <- data %*% weights
 46 | 
 47 |     ret <- batch_normalization(layers[["x"]][[i]],
 48 |                                  layers[["gamma"]][[i]],
 49 |                                  layers[["beta"]][[i]],
 50 |                                  epsilon = epsilon )
 51 | 
 52 |     layers[["mu"]][[i]] <- ret[[1]]
 53 |     layers[["sigma_2"]][[i]] <- ret[[2]]
 54 |     layers[["x_hat"]][[i]] <- ret[[3]]
 55 |     layers[["y"]][[i]] <- ret[[4]]
 56 | 
 57 |     ret <- list()
 58 | 
 59 |     unit_matrix <- diag(dim(layers[['y']][[i]])[[2]])
 60 |     ret <- func(layers[["y"]][[i]],unit_matrix)
 61 | 
 62 |     layers[["output"]][[i]] <- ret[[1]]
 63 |     data <- ret[[1]]
 64 |     layers[["derivative"]][[i]] <- ret[[2]]
 65 |   }
 66 | 
 67 |   # End of forward propagation
 68 | 
 69 |   for (i in 1:numLayers) {
 70 |     darch@layers[[i]][[5]] <- layers[["mu"]][[i]][1, ]
 71 |     darch@layers[[i]][[6]] <- layers[["sigma_2"]][[i]][1, ]
 72 |   }
 73 | 
 74 |   return (darch)
 75 | }
 76 | 
 77 | #' Resets the mu and sigmas of a darch instance to 0 and 1
 78 | #'
 79 | #' This function resets the mu and sigmas of hidden layers in a darch instance
 80 | #'  to 0 and 1
 81 | #' @param darch a darch instance
 82 | #'
 83 | #' @importFrom darch getLayer
 84 | #'
 85 | 
 86 | 
 87 | 
 88 | reset_population_mu_sigma <- function (darch) {
 89 |   numLayers <- length(darch@layers)
 90 |   epsilon <- exp(-12) # a numerical stablizaer used in batch normalization
 91 | 
 92 |   for(i in 1:numLayers) {
 93 |     ret <- getLayer(darch, i)[[1]]
 94 |     dimV_output <- dim(ret)[[2]]
 95 |     darch@layers[[i]][[5]] <- rep(0, dimV_output)
 96 |     darch@layers[[i]][[6]] <- rep(1 - epsilon, dimV_output)
 97 |   }
 98 | 
 99 |   return (darch)
100 | }
101 | 
102 | 


--------------------------------------------------------------------------------
/R/dropout.R:
--------------------------------------------------------------------------------
 1 | #' Generates dropout masks for dnn
 2 | #'
 3 | #' This function generates dropout maks for dnn
 4 | #' @param darch, a DArch instance
 5 | #' @param dropout_input, the dropout rate for the input layer
 6 | #' @param dropout_hidden, the dropout rate for the hidden layer
 7 | #'
 8 | #' @importFrom darch getLayers getLayerWeights
 9 | #' @references Dropout: A Simple Way to Prevent Neural Networks from
10 | #'  Overfitting, Nitish Srivastava
11 | #' @seealso \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf}
12 | 
13 | 
14 | 
15 | generateDropoutMasksForDarch <- function(darch, dropout_input, dropout_hidden)
16 | {
17 |   dropoutMasks <- list()
18 |   numLayers <- length(getLayers(darch))
19 |   # generate dropout masks
20 |   darch@dropoutMasks[[1]]<-
21 |     generateDropoutMask(nrow(getLayerWeights(darch, 1)[]) - 1,
22 |                         dropout_input)
23 | 
24 |   for (i in 1:(numLayers - 1))
25 |   {
26 |     darch@dropoutMasks[[i + 1]] <-
27 |       generateDropoutMask(nrow(getLayerWeights(darch, i+1)[])-1,
28 |                           dropout_hidden)
29 |   }
30 | 
31 |   return (darch)
32 | }
33 | 
34 | #' Generates the dropout mask for the deep neural network
35 | #'
36 | #' This function generates the dropout mask for the deep neural network
37 | #' @param length, the dimension of the layer
38 | #' @param dropoutRate, the dropout rate
39 | #'
40 | #' @references Dropout: A Simple Way to Prevent Neural Networks from
41 | #'  Overfitting, Nitish Srivastava
42 | #' @seealso \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf}
43 | 
44 | 
45 | 
46 | generateDropoutMask <- function(length, dropoutRate)
47 | {
48 |   if (dropoutRate == 0)
49 |   {
50 |     ret <- rep(1, length)
51 |   }
52 |   else
53 |   {
54 |     ret <- sample(c(0, 1/(1 - dropoutRate)), length, replace = T,
55 |                   prob = c(dropoutRate, 1 - dropoutRate))
56 |   }
57 | 
58 |   return (ret)
59 | }
60 | 
61 | 
62 | 
63 | #' Applies the given dropout mask to the given data row-wise.
64 | #'
65 | #' This function multiplies each row with the dropout mask. To apply the dropout
66 | #' mask by row, it can simply be multiplied with the data matrix. This does not
67 | #' work of the mask is to be applied row-wise, hence this function.
68 | #'
69 | #' @param data Data to which the dropout mask should be applied
70 | #' @param mask The dropout mask, a vector of 0 and 1.
71 | #' @return Data with applied dropout mask
72 | #'
73 | #' @references Dropout: A Simple Way to Prevent Neural Networks from
74 | #'  Overfitting, Nitish Srivastava
75 | #' @seealso \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf}
76 | 
77 | 
78 | applyDropoutMask <- function(data, mask)
79 | {
80 |   return (data * matrix(rep(mask, nrow(data)), nrow=nrow(data), byrow=T))
81 | }
82 | 
83 | 


--------------------------------------------------------------------------------
/R/error_functions.R:
--------------------------------------------------------------------------------
 1 | #' Calculates the cross entropy error
 2 | #'
 3 | #' This function calculates the cross entropy error and its first order derivatives
 4 | #'
 5 | #' @param output the output value
 6 | #' @param target the target value
 7 | #'
 8 | #' @export
 9 | 
10 | crossEntropyErr <- function(output, target) {
11 |   # err <- - sum(target[] * log(output[]) + (1 - target[]) * log(1 - output[]))
12 |   err <- - sum(target * log(output) + (1 - target) * log(1 - output))
13 |   err2 <- (1-target)/(1-output) - target/output
14 | 
15 |   ret <- list()
16 |   ret[[1]] <- err
17 |   ret[[2]] <- err2
18 |   ret[[3]] <- "Cross Entropy Error"
19 |   return(ret)
20 | }
21 | 
22 | #' Calculates the mean squared error
23 | #'
24 | #' This function calculates the mean squared error and its first order derivatives
25 | #'
26 | #' @param output the output value
27 | #' @param target the target value
28 | #'
29 | #' @export
30 | 
31 | meanSquareErr <- function(output, target) {
32 |   err <- 1/2 * sum(output - target)^2 / dim(output)[[1]]
33 |   err2 <-  (output - target)
34 |   ret <- list()
35 |   ret[[1]] <- err
36 |   ret[[2]] <- err2
37 |   ret[[3]] <- "Mean Squared Error"
38 |   return(ret)
39 | 
40 | }
41 | 
42 | #' Calculates the classification error
43 | #'
44 | #' This function calculates the classification error
45 | #'
46 | #' @param output the output of a classifier in the form of probability. Probability > 1
47 | #' will be treated as positive (target = 1).
48 | #' @param target the target variable
49 | #'
50 | #' @export
51 | 
52 | classification_error <- function(output, target) {
53 |   boolOut <- (output > 0.5) * 1
54 |   boolOutTarget <- cbind(boolOut, target)
55 |   rows <- nrow(target)
56 |   cols <- ncol(target)
57 |   classification_error <- sum(apply(boolOutTarget, 1, function(y)
58 |   { any(y[1:cols] != y[(cols+1):(2*cols)])})) / rows * 100
59 | 
60 |   ret <- list()
61 |   ret[[1]] <- classification_error
62 |   ret[[2]] <- "Classification Error"
63 |   return (ret)
64 | }
65 | 


--------------------------------------------------------------------------------
/R/finetune_SGD.R:
--------------------------------------------------------------------------------
 1 | #' Updates a deep neural network's parameters using stochastic gradient descent
 2 | #'  method and batch normalization
 3 | #'
 4 | #' This function finetunes a DArch network using SGD approach
 5 | #'
 6 | #' @param darch a darch instance
 7 | #' @param trainData training input
 8 | #' @param targetData training target
 9 | #' @param learn_rate_weight leanring rate for the weight matrices
10 | #' @param learn_rate_bias learning rate for the biases
11 | #' @param learn_rate_gamma learning rate for the gammas
12 | #' @param errorFunc the error function to minimize during training
13 | #' @param with_BN logical value, T to train the neural net with batch normalization
14 | #'
15 | #' @importFrom darch getLayer getDropoutMask getMomentum
16 | #'
17 | #' @return a darch instance with parameters updated with stochastic gradient descent
18 | #'
19 | 
20 | finetune_SGD_bn <- function(darch,
21 |                             trainData,
22 |                             targetData,
23 |                             learn_rate_weight = exp(-10),
24 |                             learn_rate_bias = exp(-10),
25 |                             learn_rate_gamma = exp(-10),
26 |                             errorFunc = meanSquareErr,
27 |                             with_BN = T) {
28 |     # stats <- getStats(darch)
29 | 
30 |     ret <- backpropagate_delta_bn(darch, trainData, targetData, errorFunc, with_BN)
31 |     delta_weight <- ret[[1]]
32 |     delta_beta <- ret[[2]]
33 |     delta_gamma <- ret[[3]]
34 | 
35 |     learnRateBiases <- learn_rate_bias
36 |     learnRateWeights <- learn_rate_weight
37 |     learnRateGamma <- learn_rate_gamma
38 | 
39 |     numLayers <- length(delta_weight)
40 | 
41 |     for(i in numLayers:1) {
42 |       weights <- getLayer(darch, i)[[1]]
43 |       biases <- weights[nrow(weights),,drop=F]
44 |       weights <- weights[1:(nrow(weights)-1),,drop=F]
45 |       gamma <- getLayer(darch, i)[[4]]
46 |       weightsChange_prev <- getLayer(darch, i)[[3]]
47 | 
48 |     # Calculate the change in weights
49 |     # apply dropout mask to momentum
50 |       weightsInc <- (learnRateWeights * delta_weight[[i]])
51 |       weightsChange <- weightsInc + (getMomentum(darch) *
52 |                              weightsChange_prev * getDropoutMask(darch, i-1)
53 |       )
54 |       weights <- weights - weightsChange
55 | 
56 |       # Calculate the change in beta (biases)
57 |       biasesInc <- learnRateBiases * delta_beta[[i]][1,]
58 |       biases <- biases - biasesInc
59 | 
60 |       # Calculate the change in gamma
61 |       gammaInc <- learnRateGamma * delta_gamma[[i]][1,]
62 |       gamma <- gamma - gammaInc
63 | 
64 |       darch@layers[[i]][[1]] <- rbind(weights,biases)
65 |       darch@layers[[i]][[3]] <- weightsInc
66 |       darch@layers[[i]][[4]] <- gamma
67 |   }
68 | 
69 |   # setStats(darch) <- stats
70 |   return(darch)
71 | 
72 | }
73 | 


--------------------------------------------------------------------------------
/R/new_dnn.R:
--------------------------------------------------------------------------------
  1 | #' Creats a new instance of darch class
  2 | #'
  3 | #' This function creates a new instance of darch class
  4 | #'
  5 | #' @param layer_structure a int vector that specifies the number and width of layers
  6 | #' @param layer_functions a list of activation functions used by each layer
  7 | #' @param output_layer_default the activation function for the output layer
  8 | #' @param hidden_layer_default the activation function for the hidden layers
  9 | #' @param weight_initiliazaiton function that initialize a layer's weight matrix
 10 | #'
 11 | #' @importFrom darch linearUnitDerivative generateWeights  createDataSet
 12 | #' @importFrom methods new
 13 | #' @importClassesFrom darch DArch
 14 | #' @examples
 15 | #' # create a new deep neural network for classificaiton
 16 | #' dnn_regression <- new_dnn(
 17 | #'  c(2, 50, 50, 20, 1),
 18 | #'  # The layer structure of the deep neural network.
 19 | #'  # The first element is the number of input variables.
 20 | #'  # The last element is the number of output variables.
 21 | #'  hidden_layer_default = rectified_linear_unit_function,
 22 | #'  # for hidden layers, use rectified_linear_unit_function
 23 | #'  output_layer_default = sigmoidUnitDerivative
 24 | #'  # for classification, use sigmoidUnitDerivative function
 25 | #' )
 26 | #'
 27 | #' # create a new deep neural network for classificaiton
 28 | #'dnn_regression <- new_dnn(
 29 | #'  c(2, 50, 50, 20, 1),
 30 | #'  # The layer structure of the deep neural network.
 31 | #'  # The first element is the number of input variables.
 32 | #'  # The last element is the number of output variables.
 33 | #'  hidden_layer_default = rectified_linear_unit_function,
 34 | #'  # for hidden layers, use rectified_linear_unit_function
 35 | #'  output_layer_default = linearUnitDerivative
 36 | #'  # for regression, use linearUnitDerivative function
 37 | #')
 38 | #' @export
 39 | 
 40 | new_dnn <- function(layer_structure,
 41 |                     layer_functions = NULL,
 42 |                     output_layer_default = linearUnitDerivative,
 43 |                     hidden_layer_default = rectified_linear_unit_function,
 44 |                     weight_initiliazaiton = generateWeights) {
 45 |   if (!is.null(layer_structure)) {
 46 |     # new a darch instance
 47 |     darch <-new("DArch")
 48 | 
 49 |     # set up the darch stats veriable
 50 |     darch@stats <-
 51 |       list("dataErrors" = list("raw"=c(), "class" = c()),
 52 |            "validErrors" = list("raw"=c(), "class" = c()),
 53 |            "times" = c(), "preTrainTime" = 0, "fineTuneTime" = 0)
 54 | 
 55 |     # set up the layers
 56 |     numLayers <- length(layer_structure)
 57 |     for (i in 1:(numLayers -1)) # first layer is an input layer
 58 |     {
 59 |       layer <- list()
 60 |       # element 1: initialize the layer weights
 61 |       dim_1 <- layer_structure[[i]]
 62 |       dim_2 <- layer_structure[[i + 1]]
 63 |       layer[[1]]  <- weight_initiliazaiton(dim_1 + 1, dim_2)
 64 | 
 65 |       # element 2: set up the layer activation function
 66 |       if (is.null(layer_functions[[as.character(i)]]))
 67 |       {
 68 |         if (i < (numLayers - 1)) {
 69 |           layer[[2]] <- hidden_layer_default
 70 |         } else {
 71 |           layer[[2]] <- output_layer_default
 72 |         }
 73 |       }
 74 |       else
 75 |       {
 76 |         layer[[2]] <- layer_functions[[as.character(i)]]
 77 |       }
 78 | 
 79 |       # element 3: weight increase
 80 |       layer[[3]] <- matrix(0, dim_1, dim_2)
 81 | 
 82 |       # element 4: gamma coefficient in batch normalization
 83 |       layer[[4]] <- rep(1, dim_2)
 84 | 
 85 |       # element 5: mu coefficient in batch normalization
 86 |       layer[[5]] <- rep(0, dim_2)
 87 | 
 88 |       # element 6: sigma_2 coefficient in batch normalization
 89 |       layer[[6]] <- rep(1 - exp(-12), dim_2)
 90 | 
 91 |       # add layer to darch@layers
 92 |       darch@layers[[i]] <- layer
 93 |     }
 94 | 
 95 |     # set up the slots necessary for predict.DArch function
 96 |     darch@dataSet <- createDataSet(matrix(0, 1, layer_structure[[1]]), NULL)
 97 |     darch@ff <- F
 98 | 
 99 |     # set up the execution function
100 |     darch@executeFunction <- run_dnn
101 |   } else {
102 |     darch <- NULL
103 |     flog.fatal("Illegal layer structures!")
104 |   }
105 |   return (darch)
106 | }
107 | 


--------------------------------------------------------------------------------
/R/rectified_linear_unit_function.R:
--------------------------------------------------------------------------------
 1 | #' Rectified Linear Unit Function
 2 | #'
 3 | #' This functions calculates the value and the derivative of a rectified linear
 4 | #' function. Reference Vinod Nair, Geoffrey Hinton, Rectified Linear Units
 5 | #' Improve Restricted Boltzmann Machines
 6 | #'
 7 | #' @param data the data matrix for calculation
 8 | #' @param weights the connection (weight matrix/filter) and the bias
 9 | #' @return A list of function values and derivatives
10 | #' @export
11 | 
12 | rectified_linear_unit_function <- function(data, weights) {
13 |   ret <- list()
14 |   a <- data %*% weights
15 |   x <- a
16 |   x[a<0] <- 0
17 |   derivatives <- matrix(1, dim(a)[[1]], dim(a)[[2]])
18 |   derivatives[a<0] <- 0
19 |   ret[[1]] <- x
20 |   ret[[2]] <- derivatives
21 |   return (ret)
22 | }
23 | 


--------------------------------------------------------------------------------
/R/rsq.R:
--------------------------------------------------------------------------------
 1 | #' Calculate the RSQ of a regression model
 2 | #' Utilitiy function that calcualtes RSQ of a model. It measures the goodness-of-
 3 | #' fit of a regression model.
 4 | #'
 5 | #' @param  x Regression Model
 6 | #' @param  ... Additional Input
 7 | #'
 8 | #' @import  futile.logger
 9 | #' @export
10 | 
11 | rsq <- function(x, ...) {
12 |   UseMethod("rsq", x)
13 | }
14 | 
15 | #' Utilitiy function that calcualtes RSQ of a DArch instance
16 | #'
17 | #' Calcualte a regression model's RSQ of a deep neural network
18 | #'
19 | #' @param  x DArch Model
20 | #' @param  input Input data
21 | #' @param  target Target data
22 | #' @param ... addtional inputs
23 | #' @import futile.logger
24 | #' @importFrom stats predict
25 | #' @importFrom graphics plot
26 | #' @export
27 | 
28 | rsq.DArch <- function(x,
29 |                       input = x@dataSet@data,
30 |                       target = x@dataSet@targets, ...) {
31 |   y <- target
32 |   pred <- predict(x, newdata = input)
33 |   plot(y, pred, xlab = "target", ylab = "prediction")
34 |   RSQ <- 1 - sum((pred-y)^2)/sum((y-mean(y))^2)
35 |   flog.info(paste0("RSQ = ", RSQ))
36 | }
37 | 
38 | #' Utilitiy function that calcualtes RSQ of a linear model
39 | #'
40 | #' Calcualte a regression model's RSQ
41 | #'
42 | #' @param x linear Model
43 | #' @param input Input data
44 | #' @param target Target data
45 | #' @param ... additional inputs
46 | #' @importFrom  stats predict
47 | #' @importFrom graphics plot
48 | #' @import  futile.logger
49 | #' @export
50 | 
51 | rsq.lm <- function(x, input, target, ...) {
52 |   y <- target
53 |   pred <- predict(x, newdata = data.frame(input))
54 |   plot(y, pred)
55 |   plot(y, pred, xlab = "target", ylab = "prediction")
56 |   RSQ <- 1 - sum((pred-y)^2)/sum((y-mean(y))^2)
57 |   flog.info(paste0("RSQ = ", RSQ))
58 | }
59 | 


--------------------------------------------------------------------------------
/R/run_dnn.R:
--------------------------------------------------------------------------------
 1 | #' Execution function that runs in the batch normalization mode
 2 | #'
 3 | #' This function calcualtes the output of a deep neural network with input data
 4 | #'
 5 | #' @param darch a darch instance
 6 | #' @param data input data
 7 | 
 8 | 
 9 | run_dnn <- function(darch, data){
10 |   darch@executeOutput <- list()
11 |   layers <- darch@layers
12 |   # If there's only one row of input data, convert vector to matrix
13 |   # TODO make sure that data is matrix before passing it to this function
14 |   if(is.null(dim(data))){
15 |     data <- t(as.matrix(data))
16 |   }
17 | 
18 |   numRows <- dim(data)[1]
19 | 
20 |   output <- list()
21 |   derivative <- list()
22 | 
23 |   for(i in 1:length(layers)){
24 |     ret <- layers[[i]][[1]]
25 |     dimV_input <- dim(ret)[[1]] - 1
26 |     dimV_output <- dim(ret)[[2]]
27 | 
28 |     weight <- ret[1:(dimV_input), ]
29 |     beta <- verticalize(ret[(dimV_input + 1), ], numRows)
30 | 
31 |     gamma <- darch@layers[[i]][[4]]
32 |     gamma <- verticalize(gamma, numRows)
33 | 
34 |     x <- data %*% weight
35 | 
36 |     mu <- verticalize(layers[[i]][[5]], numRows)
37 | 
38 |     sigma_2 <- verticalize(layers[[i]][[6]], numRows)
39 | 
40 |     ret <- batch_normalization(x, gamma, beta, mu, sigma_2)
41 | 
42 |     y <- ret[[4]]
43 | 
44 |     unit_matrix <- diag(dim(y)[[2]])
45 |     ret <- layers[[i]][[2]](y, unit_matrix)
46 |     data <- ret[[1]]
47 |     output[[i]] <- ret[[1]]
48 |     derivative[[i]] <- ret[[2]]
49 |   }
50 | 
51 |   darch@executeOutput <- output
52 |   return(darch)
53 | }
54 | 


--------------------------------------------------------------------------------
/R/train_dnn.R:
--------------------------------------------------------------------------------
  1 | #' Train a deep neural network
  2 | #'
  3 | #' This function trains a deep neural network
  4 | #'
  5 | #' @param darch a darch instance
  6 | #' @param input input data for training
  7 | #' @param target target data for training
  8 | #' @param input_valid input data for validation
  9 | #' @param target_valid target data for validation
 10 | #' @param ... additional input
 11 | #' @param learn_rate_weight learning rate for the weight matrices
 12 | #' @param learn_rate_bias learning rate for the biases
 13 | #' @param learn_rate_gamma learning rate for the gamma
 14 | #' @param batch_size batch size during training
 15 | #' @param batch_normalization logical value that determines whether to turn on
 16 | #'  batch normalization during training. Recommneded value: T
 17 | #' @param dropout_input dropout ratio at input layer. Recommneded value: 0.2
 18 | #' @param dropout_hidden dropout ratio at hidden layers. Recommended value: 0.5
 19 | #' @param momentum_initial momentum ratio during training. Recommended value: 0.6
 20 | #' @param momentum_final final momentum during training. Recommended value: 0.9
 21 | #' @param momentum_switch afther which epoch the final momentum ratio is used during training
 22 | #' @param num_epochs number of iterations of the training
 23 | #' @param error_function error function to minimize during training
 24 | #' @param report_classification_error logical value. T to report the classification error
 25 | #'  during training
 26 | #'
 27 | #' @importFrom darch createDataSet validateDataSet getEpochs
 28 | #' @importFrom stats predict
 29 | #'
 30 | #' @examples
 31 | #' # Example of Regression
 32 | #'
 33 | #' input <- matrix(runif(1000), 500, 2)
 34 | #' input_valid <- matrix(runif(100), 50, 2)
 35 | #' target <- rowSums(input + input^2)
 36 | #' target_valid <- rowSums(input_valid + input_valid^2)
 37 | #' # create a new deep neural network for classificaiton
 38 | #' dnn_regression <- new_dnn(
 39 | #'  c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
 40 | #'  # The first element is the number of input variables.
 41 | #'  # The last element is the number of output variables.
 42 | #'  hidden_layer_default = rectified_linear_unit_function,
 43 | #'  # for hidden layers, use rectified_linear_unit_function
 44 | #'  output_layer_default = linearUnitDerivative
 45 | #'  # for regression, use linearUnitDerivative function
 46 | #')
 47 | #'
 48 | #'  dnn_regression <- train_dnn(
 49 | #'  dnn_regression,
 50 | #'
 51 | #'  # training data
 52 | #'  input, # input variable for training
 53 | #'  target, # target variable for training
 54 | #'  input_valid, # input variable for validation
 55 | #'  target_valid, # target variable for validation
 56 | #'
 57 | #'  # training parameters
 58 | #'  learn_rate_weight = exp(-8) * 10,
 59 | #'  # learning rate for weights, higher if use dropout
 60 | #'  learn_rate_bias = exp(-8) * 10,
 61 | #'  # learning rate for biases, hihger if use dropout
 62 | #'  learn_rate_gamma = exp(-8) * 10,
 63 | #'  # learning rate for the gamma factor used
 64 | #'  batch_size = 10,
 65 | #'  # number of observations in a batch during training.
 66 | #'  # Higher for faster training. Lower for faster convergence
 67 | #'  batch_normalization = TRUE,
 68 | #'  # logical value, T to use batch normalization
 69 | #'  dropout_input = 0.2,
 70 | #'   # dropout ratio in input.
 71 | #'  dropout_hidden = 0.5,
 72 | #'  # dropout ratio in hidden layers
 73 | #'  momentum_initial = 0.6,
 74 | #'  # initial momentum in Stochastic Gradient Descent training
 75 | #'  momentum_final = 0.9,
 76 | #'  # final momentum in Stochastic Gradient Descent training
 77 | #'  momentum_switch = 100,
 78 | #'  # after which the momentum is switched from initial to final momentum
 79 | #'  num_epochs = 5,
 80 | #'   # number of iterations in training
 81 | #'   # increase numbef of epochs to 100 for better model fit
 82 | #'
 83 | #'
 84 | #'  # Error function
 85 | #'  error_function = meanSquareErr,
 86 | #'  # error function to minimize during training. For regression, use meanSquareErr
 87 | #'  report_classification_error = FALSE
 88 | #'  # whether to print classification error during training
 89 | #')
 90 | #'
 91 | #'
 92 | #' # the prediciton by dnn_regression
 93 | #' pred <- predict(dnn_regression)
 94 | #'
 95 | #' # calculate the r-squared of the prediciton
 96 | #' rsq(dnn_regression)
 97 | #'
 98 | #'
 99 | #' # calcualte the r-squared of the prediciton in validation
100 | #' rsq(dnn_regression, input = input_valid, target = target_valid)
101 | #'
102 | #' # print the layer weights
103 | #' # this function can print heatmap, histogram, or a surface
104 | #' print_weight(dnn_regression, 1, type = "heatmap")
105 | #'
106 | #' print_weight(dnn_regression, 2, type = "surface")
107 | #'
108 | #' print_weight(dnn_regression, 3, type = "histogram")
109 | #'
110 | #'
111 | #' # Examples of classification
112 | #'
113 | #'input <- matrix(runif(1000), 500, 2)
114 | #'input_valid <- matrix(runif(100), 50, 2)
115 | #'target <- (cos(rowSums(input + input^2)) > 0.5) * 1
116 | #'target_valid <- (cos(rowSums(input_valid + input_valid^2)) > 0.5) * 1
117 | #'
118 | #'# create a new deep neural network for classificaiton
119 | #'dnn_classification <- new_dnn(
120 | #'  c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
121 | #'  # The first element is the number of input variables.
122 | #'  # The last element is the number of output variables.
123 | #'  hidden_layer_default = rectified_linear_unit_function,
124 | #'  # for hidden layers, use rectified_linear_unit_function
125 | #'  output_layer_default = sigmoidUnitDerivative
126 | #'  # for classification, use sigmoidUnitDerivative function
127 | #')
128 | #'
129 | #'dnn_classification <- train_dnn(
130 | #'  dnn_classification,
131 | #'
132 | #'  # training data
133 | #'  input, # input variable for training
134 | #'  target, # target variable for training
135 | #'  input_valid, # input variable for validation
136 | #'  target_valid, # target variable for validation
137 | #'
138 | #'  # training parameters
139 | #'  learn_rate_weight = exp(-8) * 10,
140 | #'  # learning rate for weights, higher if use dropout
141 | #'  learn_rate_bias = exp(-8) * 10,
142 | #'  # learning rate for biases, hihger if use dropout
143 | #'  learn_rate_gamma = exp(-8) * 10,
144 | #'  # learning rate for the gamma factor used
145 | #'  batch_size = 10,
146 | #'  # number of observations in a batch during training.
147 | #'  # Higher for faster training. Lower for faster convergence
148 | #'  batch_normalization = TRUE,
149 | #'  # logical value, T to use batch normalization
150 | #'  dropout_input = 0.2,
151 | #'  # dropout ratio in input.
152 | #'  dropout_hidden = 0.5,
153 | #'  # dropout ratio in hidden layers
154 | #'  momentum_initial = 0.6,
155 | #'  # initial momentum in Stochastic Gradient Descent training
156 | #'  momentum_final = 0.9,
157 | #'  # final momentum in Stochastic Gradient Descent training
158 | #'  momentum_switch = 100,
159 | #'  # after which the momentum is switched from initial to final momentum
160 | #'  num_epochs = 5,
161 | #'  # number of iterations in training
162 | #'  # increase num_epochs to 100 for better model fit
163 | #'
164 | #'  # Error function
165 | #'  error_function = crossEntropyErr,
166 | #'  # error function to minimize during training. For regression, use crossEntropyErr
167 | #'  report_classification_error = TRUE
168 | #'  # whether to print classification error during training
169 | #')
170 | #'
171 | #'# the prediciton by dnn_regression
172 | #'pred <- predict(dnn_classification)
173 | #'
174 | #'hist(pred)
175 | #'
176 | #'# calculate the r-squared of the prediciton
177 | #'AR(dnn_classification)
178 | #'
179 | #'# calcualte the r-squared of the prediciton in validation
180 | #'AR(dnn_classification, input = input_valid, target = target_valid)
181 | #'
182 | #'
183 | #' @return a trained deep neural network (darch instance)
184 | #' @export
185 | #'
186 | 
187 | train_dnn <- function(darch, # darch instance to train
188 |                       input, # input data matrix
189 |                       target, # target data matrix
190 |                       input_valid = NULL, # validation data input
191 |                       target_valid = NULL, # validation data target
192 |                       ...,
193 |                       # training parameters
194 |                       learn_rate_weight = exp(-10),
195 |                       learn_rate_bias = exp(-10),
196 |                       learn_rate_gamma = 1,
197 |                       batch_size = 10,
198 |                       batch_normalization = TRUE,
199 |                       dropout_input = 0,
200 |                       dropout_hidden = 0,
201 |                       momentum_initial = .6,
202 |                       momentum_final = .9,
203 |                       momentum_switch = 100,
204 |                       num_epochs = 0,
205 | 
206 |                       # target types
207 |                       error_function = meanSquareErr,
208 |                       report_classification_error = FALSE
209 | ) {
210 |   # 1. set up the inputs
211 |   timeStart <- Sys.time()
212 |   dataSet <- createDataSet(input, target)
213 |   numObs <- nrow(input)
214 |   darch@dataSet <- dataSet # add the training dataset to the darch instance
215 | 
216 |   # set the stats of darch
217 |   if (is.null(darch@stats) || length(darch@stats) < 1){
218 |     stats <-
219 |       list("dataErrors"=list("raw"=c(),"class"=c()),
220 |            "validErrors"=list("raw"=c(),"class"=c()),
221 |            "times"= 0)
222 | 
223 |     darch@stats <- stats
224 |   }
225 | 
226 |   trainData <- as.matrix(input)
227 |   trainTarget <- as.matrix(target)
228 | 
229 |   if(!is.null(input_valid)) {
230 |     validData <- as.matrix(input_valid)
231 |     validTarget <- as.matrix(target_valid)
232 |   } else {
233 |     validData <- NULL
234 |     validTarget <- NULL
235 |   }
236 | 
237 |   if (!validateDataSet(dataSet, darch))
238 |   {
239 |     stop("Invalid dataset provided.")
240 |   }
241 | 
242 |   if (!is.null(validData)) {
243 |     if (dim(trainData)[[2]] != dim(validData)[[2]] |
244 |         dim(as.matrix(trainTarget))[[2]] != dim(as.matrix(validTarget))[[2]]) {
245 |       stop("Invalid validation dataset.")
246 |     }
247 |   }
248 | 
249 |   # 2. train the neural net
250 |   flog.info("Start training the neural net.")
251 |   start_epoch <- getEpochs(darch)
252 |   flog.info(paste("The neural net has been trained ", start_epoch, " times."))
253 | 
254 |   for(epoch in (1 + start_epoch):(num_epochs + start_epoch)) {
255 |     flog.info(paste("Epoch numebr: ", epoch))
256 | 
257 |     # make the batches
258 |     batch <- make_batches(dim(trainData)[[1]], batch_size)
259 |     num_batches <- max(batch[, 2])
260 | 
261 |     for(i in 1:num_batches) {
262 |       # Generate a new dropout mask for each batch
263 |       darch <- generateDropoutMasksForDarch(darch, dropout_input, dropout_hidden)
264 |       # Train the neural net
265 |       darch <- finetune_SGD_bn(darch,
266 |                                trainData[batch[,2] == i,],
267 |                                trainTarget[batch[,2] == i,],
268 |                                learn_rate_weight = learn_rate_weight,
269 |                                learn_rate_bias = learn_rate_bias,
270 |                                learn_rate_gamma = learn_rate_gamma,
271 |                                errorFunc = error_function,
272 |                                with_BN = batch_normalization
273 |                                )
274 |     }
275 | 
276 |     # calculates the new mu and sigma of darch
277 |     if (batch_normalization) {
278 |       darch <- calcualte_population_mu_sigma(darch, trainData)
279 |     } else {
280 |       darch <- reset_population_mu_sigma(darch)
281 |     }
282 | 
283 |     # calcualtes the error
284 | 
285 |     # training errors
286 |     pred_train <- predict(darch, newdata = trainData)
287 |     error_train <- error_function(pred_train, trainTarget)
288 |     flog.info(paste(error_train[[3]], "in training:  ", error_train[[1]]))
289 |     darch@stats$dataErrors$raw <- c(darch@stats$dataErrors$raw, error_train[[1]])
290 | 
291 |     if(report_classification_error) {
292 |       ce_train <- classification_error(pred_train, trainTarget)
293 |       flog.info(paste(ce_train[[2]], "in training:  ", ce_train[[1]]))
294 |       darch@stats$dataErrors$class <- c(darch@stats$dataErrors$class, ce_train[[1]])
295 |     }
296 | 
297 |     # validation errors
298 |     if(!is.null(validData)) {
299 |       pred_valid <- predict(darch, newdata = validData)
300 |       error_valid <- error_function(pred_valid, validTarget)
301 |       flog.info(paste(error_valid[[3]],  "in validation:", error_valid[[1]]))
302 |       darch@stats$validErrors$raw <- c(darch@stats$validErrors$raw, error_valid[[1]])
303 | 
304 |       if(report_classification_error) {
305 |         ce_valid <- classification_error(pred_valid, validTarget)
306 |         flog.info(paste(ce_valid[[2]], "in validation:", ce_valid[[1]]))
307 |         darch@stats$validErrors$class <- c(darch@stats$validErrors$class, ce_valid[[1]])
308 |       }
309 |     }
310 |     # increase the epoch by 1
311 |     darch@epochs <- darch@epochs + 1
312 |   }
313 |   flog.info("End of the training")
314 | 
315 |   # 3. Save the training statistics
316 |   if (is.null(darch@stats[["times"]])) {
317 |     darch@stats[["times"]] <- 0
318 |   }
319 |   darch@stats[["times"]] <- darch@stats[["times"]] + as.double(Sys.time() - timeStart, "secs")
320 | 
321 |   return (darch)
322 | }
323 | 
324 | # Helper function for train_dnn
325 | 
326 | make_batches <- function(numObs, batchsize) {
327 |   order <- sample(1:numObs, numObs)
328 |   group <- c()
329 |   num_batches <- ceiling(numObs / batchsize)
330 |   for (i in 1:numObs) {
331 |     group <- c(group, (i %% num_batches + 1))
332 |   }
333 |   batch <- cbind(order, group)
334 |   batch <- batch[order(order), ]
335 |   return (batch)
336 | }
337 | 


--------------------------------------------------------------------------------
/R/util.R:
--------------------------------------------------------------------------------
  1 | #' Prints out the weight of a deep neural network
  2 | #'
  3 | #' This function prints out the weight in a heat map, 3D surface, or histogram
  4 | #'
  5 | #' @param darch DArch instance
  6 | #' @param num_of_layer the number of the layer to print
  7 | #' @param show_derivative T to show the weight value. F to show the percentage
  8 | #' weight change in the finetuning stage. This helps spot the network saturation problem.
  9 | #' @param type type of the graph. It supports "heatmap", "surface", and "histogram"
 10 | #'
 11 | #' @importFrom darch getLayer
 12 | #'
 13 | #' @examples
 14 | #' # Example of Regression
 15 | #'
 16 | #' input <- matrix(runif(1000), 500, 2)
 17 | #' input_valid <- matrix(runif(100), 50, 2)
 18 | #' target <- rowSums(input + input^2)
 19 | #' target_valid <- rowSums(input_valid + input_valid^2)
 20 | #' # create a new deep neural network for classificaiton
 21 | #' dnn_regression <- new_dnn(
 22 | #'  c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
 23 | #'  # The first element is the number of input variables.
 24 | #'  # The last element is the number of output variables.
 25 | #'  hidden_layer_default = rectified_linear_unit_function,
 26 | #'  # for hidden layers, use rectified_linear_unit_function
 27 | #'  output_layer_default = linearUnitDerivative
 28 | #'  # for regression, use linearUnitDerivative function
 29 | #')
 30 | #'
 31 | #' # print the layer weights
 32 | #' # this function can print heatmap, histogram, or a surface
 33 | #' print_weight(dnn_regression, 1, type = "heatmap")
 34 | #'
 35 | #' print_weight(dnn_regression, 2, type = "surface")
 36 | #'
 37 | #' print_weight(dnn_regression, 3, type = "histogram")
 38 | #'
 39 | #'
 40 | #' @export
 41 | 
 42 | print_weight <- function(darch, num_of_layer, show_derivative = F, type = "heatmap") {
 43 |   weight <- getLayer(darch, num_of_layer)[[1]]
 44 |   weight_change <- getLayer(darch, num_of_layer)[[3]] / weight[1:(dim(weight)[[1]]-1), ]
 45 | 
 46 |   if(type == "histogram") {
 47 |     if(!show_derivative) {
 48 |       plotly::plot_ly(x = c(weight), type = type)
 49 |     } else {
 50 |       plotly::plot_ly(x = c(weight_change), type = type)
 51 |     }
 52 |   } else {
 53 |     if(!show_derivative) {
 54 |       plotly::plot_ly(z = weight, type = type, colorscale = "hot")
 55 |     } else {
 56 |       plotly::plot_ly(z = weight_change, type = type, colorscale = "hot")
 57 |     }
 58 |   }
 59 | }
 60 | 
 61 | 
 62 | 
 63 | #' Calculates the outer product of two matricies
 64 | #'
 65 | #' Calcualtes the outer product of two matrices
 66 | #'
 67 | #' @param data the date matrix
 68 | #' @param weight the weight matrix
 69 | #'
 70 | 
 71 | matMult <- function(data, weight) {
 72 |   return(data %*% weight)
 73 | }
 74 | 
 75 | 
 76 | 
 77 | 
 78 | #' Data proprosess function that covnerts a categorical input to continuous input or
 79 | #' vectorize it
 80 | #'
 81 | #' Proprosess a data set. It converts categorical data into binary variables
 82 | #' if it is unordered or continuous variable from 0 to 1 if it is ordinal
 83 | #' @param x input variable
 84 | #' @param type ordinal or other
 85 | #' @param ordered_list the rank ordering of an ordinal variable. Users are expected to
 86 | #' provide a complete list of the rank ordering. Otherwise, a default rank ordering
 87 | #' will be used.
 88 | #' @param var_name the name of the input variable. This is used to to create vectorized
 89 | #' input variables
 90 | #' @param ... other inputs
 91 | #'
 92 | #' @export
 93 | 
 94 | convert_categorical <- function(x,
 95 |                                 type = "ordinal",
 96 |                                 ordered_list = list(),
 97 |                                 var_name = "var",
 98 |                                 ...) {
 99 | 
100 |   if(type == "ordinal") {
101 |     unique_x <- unique(x)
102 | 
103 |     if(is.null(ordered_list)) {
104 |       ordered_list <- sort(unique_x) # list_x has all unique values in vector x
105 |     }
106 | 
107 |     if(any(!(unique_x %in% ordered_list))) {
108 |       ordered_list <- sort(unique_x) # list_x has all unique values in vector x
109 |     }
110 | 
111 |     num_categories <- length(ordered_list)
112 |     mapped_value <- c(0:(num_categories - 1))/(num_categories - 1)
113 |     ret <- mapped_value[match(x, ordered_list)]
114 |   } else {
115 |     unique_x <- unique(x)
116 |     ordered_list <- sort(unique_x)
117 |     num_categories <- length(ordered_list)
118 |     mapped_value <- c(1:num_categories)
119 |     numeric_x <- mapped_value[match(x, ordered_list)]
120 |     vectorized_x <- matrix(0, nrow = length(x), ncol = length(unique_x))
121 |     for( i in 1:length(x)) {
122 |       vectorized_x[i, numeric_x[i]] <- 1
123 |     }
124 | 
125 |     ret <- data.frame(vectorized_x)
126 |     colnames(ret) <- paste0(var_name, " = ", ordered_list)
127 |   }
128 | 
129 |   return(ret)
130 | }
131 | 
132 | #' Creates a matrix by repeating a row vector N times
133 | #'
134 | #' helper function that repeat a row vector N times
135 | #'
136 | #' @param vector the row vector
137 | #' @param N number of rows in the output matirx
138 | #' @return a matrix
139 | 
140 | verticalize <- function(vector, N) {
141 |   return(matrix(rep(vector, N), N, byrow = T))
142 | }
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output:
  3 |   md_document:
  4 |     variant: markdown_github
  5 | ---
  6 | 
  7 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  8 | 
  9 | ```{r, echo = FALSE}
 10 | knitr::opts_chunk$set(
 11 |   collapse = TRUE,
 12 |   comment = "#>",
 13 |   fig.path = "README-"
 14 | )
 15 | ```
 16 | 
 17 | deeplearning 
 18 | =====
 19 | 
 20 | #### Create and train deep neural network of ReLU type with SGD and batch normalization
 21 | 
 22 | ### About
 23 | The deeplearning package is an R package that implements deep neural networks in R. It employes Rectifier Linear Unit functions as its building blocks and trains a neural network with stochastic gradient descent method with batch normalization to speed up the training and promote regularization. Neural networks of such kind of architecture and training methods are state of the art and even achieved suplassing human-level performance in ImageNet competition. The deeplearning package is inspired by another R package darch which implements layerwise Restricted Boltzmann Machine pretraining and dropout and uses its class DArch as the default class. 
 24 | 
 25 | ### Installtion 
 26 | 
 27 | Install deeplearning from CRAN 
 28 | ```
 29 | install.packages("deeplearning")
 30 | ```
 31 | 
 32 | Or install it from github 
 33 | ```
 34 | devtools::install_github("rz1988/deeplearning")
 35 | 
 36 | ```
 37 | 
 38 | ### Use deeplearning 
 39 | 
 40 | Using the deeplearning package is designed to be easy and fun. It only takes two steps to run your first neural network. 
 41 | 
 42 | In step one, the user will create a new neural network. You will need to specify the strucutre of the neural network which are the number of layers and neurons in the network and the type of activation functions. The default activation is rectifier linear unit function for the hidden layers but you can also use other types of activation such as sigmoidal function or write your own activation function.
 43 | 
 44 | In step two, the user will train the neural network with a training input and a traing target. There are a number of other training parameters. For how to choose these training parameters please refer to https://github.com/rz1988/deeplearning. 
 45 | 
 46 | ### Examples 
 47 | 
 48 | #### Train a neural networ for regression 
 49 | 
 50 | ```
 51 | input <- matrix(runif(1000), 500, 2)
 52 | input_valid <- matrix(runif(100), 50, 2)
 53 | target <- rowSums(input + input^2)
 54 | target_valid <- rowSums(input_valid + input_valid^2)
 55 | 
 56 | 
 57 | # create a new deep neural network for classificaiton
 58 | dnn_regression <- new_dnn(
 59 |                           c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
 60 |                                                 # The first element is the number of input variables.
 61 |                                                 # The last element is the number of output variables.
 62 |                           hidden_layer_default = rectified_linear_unit_function, 
 63 |                           # for hidden layers, use rectified_linear_unit_function
 64 |                           output_layer_default = linearUnitDerivative # for regression, use linearUnitDerivative function
 65 |                           )
 66 | 
 67 | dnn_regression <- train_dnn(
 68 |                      dnn_regression,
 69 | 
 70 |                      # training data
 71 |                      input, # input variable for training
 72 |                      target, # target variable for training
 73 |                      input_valid, # input variable for validation
 74 |                      target_valid, # target variable for validation
 75 | 
 76 |                      # training parameters
 77 |                      learn_rate_weight = exp(-8) * 10, # learning rate for weights, higher if use dropout
 78 |                      learn_rate_bias = exp(-8) * 10, # learning rate for biases, hihger if use dropout
 79 |                      learn_rate_gamma = exp(-8) * 10, # learning rate for the gamma factor used
 80 |                      batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence
 81 |                      batch_normalization = T, # logical value, T to use batch normalization
 82 |                      dropout_input = 0.2, # dropout ratio in input.
 83 |                      dropout_hidden = 0.5, # dropout ratio in hidden layers
 84 |                      momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training
 85 |                      momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training
 86 |                      momentum_switch = 100, # after which the momentum is switched from initial to final momentum
 87 |                      num_epochs = 300, # number of iterations in training
 88 | 
 89 |                      # Error function
 90 |                      error_function = meanSquareErr, # error function to minimize during training. For regression, use meanSquareErr
 91 |                      report_classification_error = F # whether to print classification error during training
 92 | )
 93 | 
 94 | # the prediciton by dnn_regression
 95 | pred <- predict(dnn_regression)
 96 | 
 97 | # calculate the r-squared of the prediciton
 98 | rsq(dnn_regression)
 99 | 
100 | # calcualte the r-squared of the prediciton in validation
101 | rsq(dnn_regression, input = input_valid, target = target_valid)
102 | ```
103 | 
104 | #### Train a neural network for classification 
105 | 
106 | ```
107 | 
108 | input <- matrix(runif(1000), 500, 2)
109 | input_valid <- matrix(runif(100), 50, 2)
110 | target <- (cos(rowSums(input + input^2)) > 0.5) * 1
111 | target_valid <- (cos(rowSums(input_valid + input_valid^2)) > 0.5) * 1
112 | 
113 | # create a new deep neural network for classificaiton
114 | dnn_classification <- new_dnn(
115 |   c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
116 |                         # The first element is the number of input variables.
117 |                         # The last element is the number of output variables.
118 |   hidden_layer_default = rectified_linear_unit_function, # for hidden layers, use rectified_linear_unit_function
119 |   output_layer_default = sigmoidUnitDerivative # for classification, use sigmoidUnitDerivative function
120 | )
121 | 
122 | dnn_classification <- train_dnn(
123 |   dnn_classification,
124 | 
125 |   # training data
126 |   input, # input variable for training
127 |   target, # target variable for training
128 |   input_valid, # input variable for validation
129 |   target_valid, # target variable for validation
130 | 
131 |   # training parameters
132 |   learn_rate_weight = exp(-8) * 10, # learning rate for weights, higher if use dropout
133 |   learn_rate_bias = exp(-8) * 10, # learning rate for biases, hihger if use dropout
134 |   learn_rate_gamma = exp(-8) * 10, # learning rate for the gamma factor used
135 |   batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence
136 |   batch_normalization = T, # logical value, T to use batch normalization
137 |   dropout_input = 0.2, # dropout ratio in input.
138 |   dropout_hidden = 0.5, # dropout ratio in hidden layers
139 |   momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training
140 |   momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training
141 |   momentum_switch = 100, # after which the momentum is switched from initial to final momentum
142 |   num_epochs = 100, # number of iterations in training
143 | 
144 |   # Error function
145 |   error_function = crossEntropyErr, # error function to minimize during training. For regression, use crossEntropyErr
146 |   report_classification_error = T # whether to print classification error during training
147 | )
148 | 
149 | # the prediciton by dnn_regression
150 | pred <- predict(dnn_classification)
151 | 
152 | hist(pred)
153 | 
154 | # calculate the r-squared of the prediciton
155 | AR(dnn_classification)
156 | 
157 | # calcualte the r-squared of the prediciton in validation
158 | AR(dnn_classification, input = input_valid, target = target_valid)
159 | 
160 | # print the layer weights
161 | # this function can print heatmap, histogram, or a surface
162 | print_weight(dnn_regression, 1, type = "heatmap")
163 | 
164 | print_weight(dnn_regression, 2, type = "surface")
165 | 
166 | print_weight(dnn_regression, 3, type = "histogram")
167 | ```
168 | 
169 | #### References 
170 | Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, Ruslan Salakhutdinov, 2013, Dropout: A Simple Way to Prevent Neural Networks from Overfitting, Journal of Machine Learning Research 15 (2014) 1929-1958
171 | 
172 | Sergey Ioffe, Christian Szegedy, 2015, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift, Proceedings of the 32 nd International Conference on Machine Learning, Lille, France, 2015.
173 | 
174 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun, 2015, Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification, arXiv
175 | 
176 | X. Glorot, A. Bordes, and Y. Bengio, 2011,Deep sparse rectifier networks. In Proceedings of the 14th International Conference on Artificial Intelligence and Statistics, pages 315–323
177 | 
178 | 
179 | Drees, Martin (2013). "Implementierung und Analyse von tiefen Architekturen
180 | in R". German. Master's thesis. Fachhochschule Dortmund.
181 | 
182 | Rueckert, Johannes (2015). "Extending the Darch library for deep
183 | architectures". Project thesis. Fachhochschule Dortmund.
184 | URL: [saviola.de](http://static.saviola.de/publications/rueckert_2015.pdf)
185 | 
186 | 
187 |  
188 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  2 | deeplearning
  3 | ============
  4 | 
  5 | #### Create and train deep neural network of ReLU type with SGD and batch normalization
  6 | 
  7 | ### About
  8 | 
  9 | The deeplearning package is an R package that implements deep neural networks in R. It employes Rectifier Linear Unit functions as its building blocks and trains a neural network with stochastic gradient descent method with batch normalization to speed up the training and promote regularization. Neural networks of such kind of architecture and training methods are state of the art and even achieved suplassing human-level performance in ImageNet competition. The deeplearning package is inspired by another R package darch which implements layerwise Restricted Boltzmann Machine pretraining and dropout and uses its class DArch as the default class.
 10 | 
 11 | ### Installtion
 12 | 
 13 | Install deeplearning from CRAN
 14 | 
 15 |     install.packages("deeplearning")
 16 | 
 17 | Or install it from github
 18 | 
 19 |     devtools::install_github("rz1988/deeplearning")
 20 | 
 21 | ### Use deeplearning
 22 | 
 23 | Using the deeplearning package is designed to be easy and fun. It only takes two steps to run your first neural network.
 24 | 
 25 | In step one, the user will create a new neural network. You will need to specify the strucutre of the neural network which are the number of layers and neurons in the network and the type of activation functions. The default activation is rectifier linear unit function for the hidden layers but you can also use other types of activation such as sigmoidal function or write your own activation function.
 26 | 
 27 | In step two, the user will train the neural network with a training input and a traing target. There are a number of other training parameters. For how to choose these training parameters please refer to <https://github.com/rz1988/deeplearning>.
 28 | 
 29 | ### Examples
 30 | 
 31 | #### Train a neural networ for regression
 32 | 
 33 |     input <- matrix(runif(1000), 500, 2)
 34 |     input_valid <- matrix(runif(100), 50, 2)
 35 |     target <- rowSums(input + input^2)
 36 |     target_valid <- rowSums(input_valid + input_valid^2)
 37 | 
 38 | 
 39 |     # create a new deep neural network for classificaiton
 40 |     dnn_regression <- new_dnn(
 41 |                               c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
 42 |                                                     # The first element is the number of input variables.
 43 |                                                     # The last element is the number of output variables.
 44 |                               hidden_layer_default = rectified_linear_unit_function, 
 45 |                               # for hidden layers, use rectified_linear_unit_function
 46 |                               output_layer_default = linearUnitDerivative # for regression, use linearUnitDerivative function
 47 |                               )
 48 | 
 49 |     dnn_regression <- train_dnn(
 50 |                          dnn_regression,
 51 | 
 52 |                          # training data
 53 |                          input, # input variable for training
 54 |                          target, # target variable for training
 55 |                          input_valid, # input variable for validation
 56 |                          target_valid, # target variable for validation
 57 | 
 58 |                          # training parameters
 59 |                          learn_rate_weight = exp(-8) * 10, # learning rate for weights, higher if use dropout
 60 |                          learn_rate_bias = exp(-8) * 10, # learning rate for biases, hihger if use dropout
 61 |                          learn_rate_gamma = exp(-8) * 10, # learning rate for the gamma factor used
 62 |                          batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence
 63 |                          batch_normalization = T, # logical value, T to use batch normalization
 64 |                          dropout_input = 0.2, # dropout ratio in input.
 65 |                          dropout_hidden = 0.5, # dropout ratio in hidden layers
 66 |                          momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training
 67 |                          momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training
 68 |                          momentum_switch = 100, # after which the momentum is switched from initial to final momentum
 69 |                          num_epochs = 300, # number of iterations in training
 70 | 
 71 |                          # Error function
 72 |                          error_function = meanSquareErr, # error function to minimize during training. For regression, use meanSquareErr
 73 |                          report_classification_error = F # whether to print classification error during training
 74 |     )
 75 | 
 76 |     # the prediciton by dnn_regression
 77 |     pred <- predict(dnn_regression)
 78 | 
 79 |     # calculate the r-squared of the prediciton
 80 |     rsq(dnn_regression)
 81 | 
 82 |     # calcualte the r-squared of the prediciton in validation
 83 |     rsq(dnn_regression, input = input_valid, target = target_valid)
 84 | 
 85 | #### Train a neural network for classification
 86 | 
 87 | 
 88 |     input <- matrix(runif(1000), 500, 2)
 89 |     input_valid <- matrix(runif(100), 50, 2)
 90 |     target <- (cos(rowSums(input + input^2)) > 0.5) * 1
 91 |     target_valid <- (cos(rowSums(input_valid + input_valid^2)) > 0.5) * 1
 92 | 
 93 |     # create a new deep neural network for classificaiton
 94 |     dnn_classification <- new_dnn(
 95 |       c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
 96 |                             # The first element is the number of input variables.
 97 |                             # The last element is the number of output variables.
 98 |       hidden_layer_default = rectified_linear_unit_function, # for hidden layers, use rectified_linear_unit_function
 99 |       output_layer_default = sigmoidUnitDerivative # for classification, use sigmoidUnitDerivative function
100 |     )
101 | 
102 |     dnn_classification <- train_dnn(
103 |       dnn_classification,
104 | 
105 |       # training data
106 |       input, # input variable for training
107 |       target, # target variable for training
108 |       input_valid, # input variable for validation
109 |       target_valid, # target variable for validation
110 | 
111 |       # training parameters
112 |       learn_rate_weight = exp(-8) * 10, # learning rate for weights, higher if use dropout
113 |       learn_rate_bias = exp(-8) * 10, # learning rate for biases, hihger if use dropout
114 |       learn_rate_gamma = exp(-8) * 10, # learning rate for the gamma factor used
115 |       batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence
116 |       batch_normalization = T, # logical value, T to use batch normalization
117 |       dropout_input = 0.2, # dropout ratio in input.
118 |       dropout_hidden = 0.5, # dropout ratio in hidden layers
119 |       momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training
120 |       momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training
121 |       momentum_switch = 100, # after which the momentum is switched from initial to final momentum
122 |       num_epochs = 100, # number of iterations in training
123 | 
124 |       # Error function
125 |       error_function = crossEntropyErr, # error function to minimize during training. For regression, use crossEntropyErr
126 |       report_classification_error = T # whether to print classification error during training
127 |     )
128 | 
129 |     # the prediciton by dnn_regression
130 |     pred <- predict(dnn_classification)
131 | 
132 |     hist(pred)
133 | 
134 |     # calculate the r-squared of the prediciton
135 |     AR(dnn_classification)
136 | 
137 |     # calcualte the r-squared of the prediciton in validation
138 |     AR(dnn_classification, input = input_valid, target = target_valid)
139 | 
140 |     # print the layer weights
141 |     # this function can print heatmap, histogram, or a surface
142 |     print_weight(dnn_regression, 1, type = "heatmap")
143 | 
144 |     print_weight(dnn_regression, 2, type = "surface")
145 | 
146 |     print_weight(dnn_regression, 3, type = "histogram")
147 | 
148 | #### References
149 | 
150 | Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, Ruslan Salakhutdinov, 2013, Dropout: A Simple Way to Prevent Neural Networks from Overfitting, Journal of Machine Learning Research 15 (2014) 1929-1958
151 | 
152 | Sergey Ioffe, Christian Szegedy, 2015, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift, Proceedings of the 32 nd International Conference on Machine Learning, Lille, France, 2015.
153 | 
154 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun, 2015, Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification, arXiv
155 | 
156 | X. Glorot, A. Bordes, and Y. Bengio, 2011,Deep sparse rectifier networks. In Proceedings of the 14th International Conference on Artificial Intelligence and Statistics, pages 315–323
157 | 
158 | Drees, Martin (2013). "Implementierung und Analyse von tiefen Architekturen in R". German. Master's thesis. Fachhochschule Dortmund.
159 | 
160 | Rueckert, Johannes (2015). "Extending the Darch library for deep architectures". Project thesis. Fachhochschule Dortmund. URL: [saviola.de](http://static.saviola.de/publications/rueckert_2015.pdf)
161 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Resubmission
 2 | This is a resubmission. In this version I have:
 3 | 
 4 | * Converted the DESCRIPTION title to title case.
 5 | 
 6 | * Removed the maintainer from DESCRIPTION. Maintainer will be generated by Author@R.
 7 | 
 8 | * More clearly identified the copyright holders in the DESCRIPTION.
 9 | 
10 | * Added the following to NAMESPACE file.
11 |     importFrom("graphics", "plot") 
12 |     importFrom("methods", "new")
13 |     importFrom("stats", "predict")
14 |     importClassesFrom(darch,DArch)
15 |     
16 | * Updated the examples so they run in less than 5 s.
17 |   
18 | 
19 | ## Test environments
20 | * Windows 10, R 3.2.4
21 | * ubuntu 12.04 (on travis-ci), R 3.2.4
22 | 
23 | ## R CMD check results
24 | * There is no warning or note.
25 | 
26 | ## Downstream dependencies
27 | * This is a new package. There is no any downstream dependency.
28 | 


--------------------------------------------------------------------------------
/deeplearning.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/inst/examples_classification.R:
--------------------------------------------------------------------------------
 1 | # Examples of classification
 2 | 
 3 | input <- matrix(runif(1000), 500, 2)
 4 | input_valid <- matrix(runif(100), 50, 2)
 5 | target <- (cos(rowSums(input + input^2)) > 0.5) * 1
 6 | target_valid <- (cos(rowSums(input_valid + input_valid^2)) > 0.5) * 1
 7 | 
 8 | # create a new deep neural network for classificaiton
 9 | dnn_classification <- new_dnn(
10 |   c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
11 |                         # The first element is the number of input variables.
12 |                         # The last element is the number of output variables.
13 |   hidden_layer_default = rectified_linear_unit_function, # for hidden layers, use rectified_linear_unit_function
14 |   output_layer_default = sigmoidUnitDerivative # for classification, use sigmoidUnitDerivative function
15 | )
16 | 
17 | dnn_classification <- train_dnn(
18 |   dnn_classification,
19 | 
20 |   # training data
21 |   input, # input variable for training
22 |   target, # target variable for training
23 |   input_valid, # input variable for validation
24 |   target_valid, # target variable for validation
25 | 
26 |   # training parameters
27 |   learn_rate_weight = exp(-8) * 10, # learning rate for weights, higher if use dropout
28 |   learn_rate_bias = exp(-8) * 10, # learning rate for biases, hihger if use dropout
29 |   learn_rate_gamma = exp(-8) * 10, # learning rate for the gamma factor used
30 |   batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence
31 |   batch_normalization = T, # logical value, T to use batch normalization
32 |   dropout_input = 0.2, # dropout ratio in input.
33 |   dropout_hidden = 0.5, # dropout ratio in hidden layers
34 |   momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training
35 |   momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training
36 |   momentum_switch = 100, # after which the momentum is switched from initial to final momentum
37 |   num_epochs = 100, # number of iterations in training
38 | 
39 |   # Error function
40 |   error_function = crossEntropyErr, # error function to minimize during training. For regression, use crossEntropyErr
41 |   report_classification_error = T # whether to print classification error during training
42 | )
43 | 
44 | # the prediciton by dnn_regression
45 | pred <- predict(dnn_classification)
46 | 
47 | hist(pred)
48 | 
49 | # calculate the r-squared of the prediciton
50 | AR(dnn_classification)
51 | 
52 | # calcualte the r-squared of the prediciton in validation
53 | AR(dnn_classification, input = input_valid, target = target_valid)
54 | 
55 | # print the layer weights
56 | # this function can print heatmap, histogram, or a surface
57 | print_weight(dnn_regression, 1, type = "heatmap")
58 | 
59 | print_weight(dnn_regression, 2, type = "surface")
60 | 
61 | print_weight(dnn_regression, 3, type = "histogram")
62 | 
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/inst/examples_regression.R:
--------------------------------------------------------------------------------
 1 | # Examples of Regression
 2 | 
 3 | input <- matrix(runif(1000), 500, 2)
 4 | input_valid <- matrix(runif(100), 50, 2)
 5 | target <- rowSums(input + input^2)
 6 | target_valid <- rowSums(input_valid + input_valid^2)
 7 | 
 8 | 
 9 | # create a new deep neural network for classificaiton
10 | dnn_regression <- new_dnn(
11 |                           c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
12 |                                                 # The first element is the number of input variables.
13 |                                                 # The last element is the number of output variables.
14 |                           hidden_layer_default = rectified_linear_unit_function, # for hidden layers, use rectified_linear_unit_function
15 |                           output_layer_default = linearUnitDerivative # for regression, use linearUnitDerivative function
16 |                           )
17 | 
18 | dnn_regression <- train_dnn(
19 |                      dnn_regression,
20 | 
21 |                      # training data
22 |                      input, # input variable for training
23 |                      target, # target variable for training
24 |                      input_valid, # input variable for validation
25 |                      target_valid, # target variable for validation
26 | 
27 |                      # training parameters
28 |                      learn_rate_weight = exp(-8) * 1, # learning rate for weights, higher if use dropout
29 |                      learn_rate_bias = exp(-8) * 1, # learning rate for biases, hihger if use dropout
30 |                      learn_rate_gamma = exp(-8) * 1, # learning rate for the gamma factor used
31 |                      batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence
32 |                      batch_normalization = T, # logical value, T to use batch normalization
33 |                      dropout_input = 0.2, # dropout ratio in input.
34 |                      dropout_hidden = 0.5, # dropout ratio in hidden layers
35 |                      momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training
36 |                      momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training
37 |                      momentum_switch = 100, # after which the momentum is switched from initial to final momentum
38 |                      num_epochs = 100, # number of iterations in training
39 | 
40 |                      # Error function
41 |                      error_function = meanSquareErr, # error function to minimize during training. For regression, use meanSquareErr
42 |                      report_classification_error = F # whether to print classification error during training
43 | )
44 | 
45 | # the prediciton by dnn_regression
46 | pred <- predict(dnn_regression)
47 | 
48 | # calculate the r-squared of the prediciton
49 | rsq(dnn_regression)
50 | 
51 | # calcualte the r-squared of the prediciton in validation
52 | rsq(dnn_regression, input = input_valid, target = target_valid)
53 | 
54 | # print the layer weights
55 | # this function can print heatmap, histogram, or a surface
56 | print_weight(dnn_regression, 1, type = "heatmap")
57 | 
58 | print_weight(dnn_regression, 2, type = "surface")
59 | 
60 | print_weight(dnn_regression, 3, type = "histogram")
61 | 
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/inst/test_ReLU.R:
--------------------------------------------------------------------------------
 1 | 
 2 | num_dim <- 3 # N is the dimension of the input vector
 3 | num_training <- 1000
 4 | num_valid <- 100
 5 | x <- matrix(runif(num_dim * num_training), num_training, num_dim)
 6 | y <- rowSums(sin(x)+cos(x)^2)
 7 | 
 8 | y <- sample(0:1, size = num_training, replace = T)
 9 | 
10 | x_valid <- matrix(runif(num_dim * num_valid), num_valid, num_dim)
11 | y_valid <- rowSums(sin(x_valid)+cos(x_valid)^2)
12 | 
13 | # Run a deep neural net with sigmoidal unit function a
14 | # Pretraining RBM
15 | 
16 | darch <- darch(x = x,
17 |                y = y,
18 |                # darch = darch,
19 |                # xValid = x_valid,
20 |                # yValid = y_valid,
21 |                 layers = c(num_dim, 50, 50, 1),
22 |                 rbm.numEpochs = 0,
23 |                 darch.bootstrap =  F,
24 |                 # darch.layerFunctionDefault = rectified_linear_unit_function,
25 |                 darch.layerFunctionDefault = sigmoidUnitDerivative,
26 |                 darch.layerFunctions = c("3" = sigmoidUnitDerivative),
27 |                 darch.isBin = T,
28 |                 darch.isClass = T,
29 |                 darch.batchSize = 10,
30 |                 darch.numEpochs = 6
31 |                 )
32 | 
33 | rsq(darch)
34 | rsq(darch, x_valid, y_valid)
35 | 
36 | # Run a deep neural net with ReLU without pretraining
37 | 
38 | darch_ReLU <- darch(x = x,
39 |                y = y,
40 |                # xValid = x_valid,
41 |                # yValid = y_valid,
42 |                layers = c(num_dim, 50, 50, 1),
43 |                rbm.numEpochs = 0,
44 |                darch.bootstrap =  F,
45 |                darch.layerFunctionDefault = rectified_linear_unit_function,
46 |                # darch.layerFunctionDefault = sigmoidUnitDerivative,
47 |                darch.layerFunctions = c("3" = linearUnitDerivative),
48 |                darch.isBin = F,
49 |                darch.isClass = F,
50 |                darch.batchSize = 10,
51 |                darch.numEpochs = 10
52 | )
53 | 
54 | rsq(darch_ReLU)
55 | 
56 | fprop1 <- forward_propagate(darch_ReLU, x)
57 | 
58 | n_layer <- 2
59 | 
60 | plotly::plot_ly(z = fprop1[[1]][[n_layer]], type = "heatmap", colorscale = "hot")
61 | 
62 | plotly::plot_ly(x = c(fprop1[[1]][[n_layer]]), type = "histogram")
63 | 
64 | head(fprop1[[2]][[2]])
65 | 
66 | head(getLayer(darch_ReLU,1)[[1]])
67 | 
68 | # Run a linear model
69 | data_lm <- data.frame(x, y)
70 | mod <- gam( y ~ s(X1) + s(X2) + s(X3) + s(X4) + s(X5), data = data_lm)
71 | 
72 | rsq(mod, x, y)
73 | rsq(mod, x_valid, y_valid)
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/inst/test_batch_normalization_differential.R:
--------------------------------------------------------------------------------
 1 | x <- matrix(runif(50), 10, 5)
 2 | beta <- matrix(1, 10, 5)
 3 | gamma <- matrix(.5, 10, 5)
 4 | 
 5 | ret <- batch_normalization(x, gamma, beta)
 6 | 
 7 | mu <- ret[[1]]
 8 | sigma_2 <- ret[[2]]
 9 | x_hat <- ret[[3]]
10 | y <- ret[[4]]
11 | 
12 | target <- matrix(1, 10, 5)
13 | delta_y <- y - target
14 | 
15 | ret <- batch_normalization_differential(delta_y,
16 |                                         mu,
17 |                                         sigma_2,
18 |                                         x,
19 |                                         x_hat,
20 |                                         y,
21 |                                         gamma,
22 |                                         beta)
23 | 
24 | delta_x <- ret[[1]]
25 | delta_gamma <- ret[[2]]
26 | delta_beta <- ret[[3]]
27 | delta_x_hat <- ret[[4]]
28 | delta_sigma_2 <- ret[[5]]
29 | delta_mu <- ret[[6]]
30 | 
31 | write_2_csv <- function(data, file_name) {
32 |   file_name <- paste0('excl/test_batch_normalization_differential/', file_name, '.csv')
33 |   write.csv(data, file = file_name)
34 | }
35 | 
36 | write_2_csv(x, "x.csv")
37 | write_2_csv(y, "y.csv")
38 | write_2_csv(delta_y, "delta_y.csv")
39 | write_2_csv(delta_x, "delta_x.csv")
40 | write_2_csv(delta_gamma, "delta_gamma.csv")
41 | write_2_csv(delta_beta, "detla_beta.csv")
42 | 
43 | write_2_csv(delta_x_hat, "delta_x_hat")
44 | write_2_csv(delta_sigma_2, "detla_sigma_2")
45 | write_2_csv(delta_mu, "delta_mu")
46 | 
47 | write_2_csv(mu, "mu")
48 | write_2_csv(sigma_2, "sigma_2")
49 | 
50 | 


--------------------------------------------------------------------------------
/inst/test_fineTuneFunctions.R:
--------------------------------------------------------------------------------
  1 | # New a DArch instance
  2 | source("inst/dropout.R")
  3 | 
  4 | 
  5 | darch = newDArch(c(10,20,1), batchSize = 10)
  6 | setDropoutOneMaskPerEpoch(darch) = F
  7 | setFineTuneFunction(darch) <- minimizeClassifier
  8 | setFineTuneFunction(darch) <- backpropagation
  9 | setFineTuneFunction(darch) <- backpropSGD
 10 | darch = generateDropoutMasksForDarch(darch)
 11 | 
 12 | # New a dataset
 13 | input <- matrix(runif(250), 50, 5)
 14 | target <- rowSums(cos(input) + sin(input)^2)
 15 | 
 16 | mean_v <- mean(target)
 17 | target <- as.numeric(target > mean_v * 1.02 )
 18 | 
 19 | input_test <- matrix(runif(100), 20, 5)
 20 | target_test <- rowSums(cos(input_test) + sin(input_test)^2)
 21 | mean_v <- mean(target_test)
 22 | target_test <- as.numeric(target_test > mean_v * 1.02)
 23 | 
 24 | # Compare with the benchmark - backpropagation
 25 | 
 26 | darch_1 = darch( x = input,
 27 |                y = target,
 28 |                layers = c(5, 100, 50, 1),
 29 |                # darch = darch,
 30 |                darch.layerFunctionDefault = rectified_linear_unit_function,
 31 |                darch.layerFunctions = c("3" = sigmoidUnitDerivative),
 32 |                darch.bootstrap = F,
 33 |                darch.isBin = F,
 34 |                darch.isClass = F,
 35 |                darch.learnRateWeights = 0.01,
 36 |                darch.learnRateBiases = 0.01,
 37 |                darch.dropoutInput = 0.,
 38 |                darch.dropoutHidden = 0.,
 39 |                darch.fineTuneFunction = backpropagation, # finetune_SGD,
 40 |                darch.batchSize = 10,
 41 |                darch.numEpochs = 50
 42 |                )
 43 | 
 44 | 
 45 | darch_2 = darch( x = input,
 46 |                y = target,
 47 |                layers = c(5, 100, 50, 1),
 48 |                # darch = darch,
 49 |                darch.layerFunctionDefault = rectified_linear_unit_function,
 50 |                darch.layerFunctions = c("3" = sigmoidUnitDerivative),
 51 |                darch.bootstrap = F,
 52 |                darch.isBin = F,
 53 |                darch.isClass = F,
 54 |                darch.learnRateWeights = 0.01,
 55 |                darch.learnRateBiases = 0.01,
 56 |                darch.dropoutInput = 0.,
 57 |                darch.dropoutHidden = 0.,
 58 |                # darch.errorFunction = crossEntropyError,
 59 |                darch.fineTuneFunction = finetune_SGD_bn,
 60 |                errorFunc = meanSquareErr,
 61 |                darch.batchSize = 10,
 62 |                darch.numEpochs = 50
 63 | )
 64 | 
 65 | AR(darch_1)
 66 | AR(darch_2)
 67 | 
 68 | AR(darch_1, input_test, target_test)
 69 | AR(darch_2, input_test, target_test)
 70 | 
 71 | plot(predict(darch_1), predict(darch_2))
 72 | 
 73 | # Just use the finetuneDArch method.
 74 | # This function should be seperated to a train_dnn function
 75 | dataset <- createDataSet(input, target)
 76 | 
 77 | darch3 = fineTuneDArch(darch_1, dataset,
 78 |                        dataSetValid = NULL,
 79 |                        numEpochs = 5,
 80 |                        bootstrap = F,
 81 |                        isBin = T,
 82 |                        isClass = T,
 83 |                        stopErr = -Inf,
 84 |                        stopClassErr = -Inf,
 85 |                        stopValidErr = -Inf,
 86 |                        stopValidClassErr = 101
 87 |                        )
 88 | 
 89 | # Use the fineTune function directly
 90 | 
 91 | darch2 = darch
 92 | 
 93 | # Backpropagation/ Steepest Descent
 94 | darch2 = backpropagation(darch, dataset@data, dataset@targets)
 95 | 
 96 | # Conjugate Gradient Descent - Doesn't seem to work well. A bug in the code?
 97 | # darch3 = backpropCGD(darch, dataset@data, dataset@targets, length = 3, switchLayers = 0)
 98 | 
 99 | # Modified Steepest Gradient Descent
100 | darch2 <- backpropSGD(darch, dataset@data, dataset@targets, crossEntropyErr)
101 | 
102 | testFunc2(darch2, dataset@data, dataset@targets, "Train set")
103 | 
104 | getLayer(darch,1)[[1]][1,]
105 | getLayer(darch2,1)[[1]][1,]
106 | getLayer(darch3,1)[[1]][1,]
107 | 
108 | testFunc2(darch3, dataset@data, dataset@targets, "Train Set")
109 | 
110 | 
111 | gr1 <- calcGradient(par, darch2, dims, data, target, crossEntropyErr, epochSwitch)
112 | gr2 <- fr(par, darch2, dims, data, target, epochSwitch )
113 | gr1 - gr2
114 | 
115 | 


--------------------------------------------------------------------------------
/inst/test_finetune_SGD_bn.R:
--------------------------------------------------------------------------------
  1 | # a toy model to test the finetune_SGD_bn function
  2 | 
  3 | input <- matrix(runif(100), 50, 2)
  4 | target <- rowSums(input + input^2)
  5 | 
  6 | 
  7 | # new a darch instance using new_darch
  8 | darch <- new_dnn(c(2, 20, 30, 20, 1))
  9 | 
 10 | 
 11 | for (i in 1:100) {
 12 |   darch <- generateDropoutMasksForDarch(darch, dropout_input = 0.2, dropout_hidden = 0.5)
 13 |   darch <- finetune_SGD_bn(darch, input, target,
 14 |                            learn_rate_weight = exp(-10),
 15 |                            learn_rate_bias = exp(-10),
 16 |                            learn_rate_gamma = exp(-10),
 17 |                            with_BN = T)
 18 |   darch <- calcualte_population_mu_sigma(darch, input)
 19 |   ret <- mseError(target, predict(darch, newdata = input))
 20 |   print(paste0(ret[[1]], ", ", ret[[2]]))
 21 | }
 22 | 
 23 | 
 24 | 
 25 | 
 26 | 
 27 | 
 28 | 
 29 | 
 30 | 
 31 | 
 32 | 
 33 | 
 34 | 
 35 | 
 36 | 
 37 | 
 38 | 
 39 | 
 40 | darch = darch( x = input,
 41 |                  y = target,
 42 |                  layers = c(2, 10, 1),
 43 |                  # darch = darch,
 44 |                  darch.layerFunctionDefault = rectified_linear_unit_function,
 45 |                  darch.layerFunctions = c("2" = linearUnitDerivative),
 46 |                  darch.bootstrap = F,
 47 |                  darch.isBin = F,
 48 |                  darch.isClass = F,
 49 |                  darch.learnRateWeights = 0.01,
 50 |                  darch.learnRateBiases = 0.01,
 51 |                  darch.dropoutInput = 0.,
 52 |                  darch.dropoutHidden = 0.,
 53 |                  darch.fineTuneFunction = backpropagation, # finetune_SGD,
 54 |                  darch.batchSize = 5,
 55 |                  darch.numEpochs = 1
 56 | )
 57 | 
 58 | darch@executeFunction <- runDArch
 59 | 
 60 | plot(target, predict(darch))
 61 | 
 62 | # run finetune_SGD_bn with batch normalization off
 63 | 
 64 | darch@learnRateBiases <- exp(1)
 65 | darch@learnRateWeights <- exp(1)
 66 | 
 67 | for(i in 1:100) {
 68 |   darch <- finetune_SGD_bn(darch, input, target, learn_rate_gamma = exp(-8), with_BN = F)
 69 |   ret <- backpropagate_delta_bn(darch, input, target, with_BN = F)
 70 |   output <- ret[[4]][[2]]
 71 |   delta_weight <- ret[[1]]
 72 |   mse_err <- mseError(target, output)
 73 |   print(paste0(mse_err[[1]], ": ", mse_err[[2]]))
 74 | }
 75 | 
 76 | plot(target, output)
 77 | 
 78 | darch = darch( x = input,
 79 |                y = target,
 80 |                layers = c(2, 10, 1),
 81 |                # darch = darch,
 82 |                darch.layerFunctionDefault = rectified_linear_unit_function,
 83 |                darch.layerFunctions = c("2" = linearUnitDerivative),
 84 |                darch.bootstrap = F,
 85 |                darch.isBin = F,
 86 |                darch.isClass = F,
 87 |                darch.learnRateWeights = 0.01,
 88 |                darch.learnRateBiases = 0.01,
 89 |                darch.dropoutInput = 0.,
 90 |                darch.dropoutHidden = 0.,
 91 |                darch.fineTuneFunction = finetune_SGD_bn, # ,
 92 |                with_BN = F,
 93 |                darch.batchSize = 5,
 94 |                darch.numEpochs = 100
 95 | )
 96 | 
 97 | darch = darch( x = input,
 98 |                y = target,
 99 |                layers = c(2, 10, 1),
100 |                # darch = darch,
101 |                darch.layerFunctionDefault = rectified_linear_unit_function,
102 |                darch.layerFunctions = c("2" = linearUnitDerivative),
103 |                darch.bootstrap = F,
104 |                darch.isBin = F,
105 |                darch.isClass = F,
106 |                darch.learnRateWeights = 0.01,
107 |                darch.learnRateBiases = 0.01,
108 |                darch.dropoutInput = 0.,
109 |                darch.dropoutHidden = 0.,
110 |                darch.fineTuneFunction = backpropagation,
111 |                darch.batchSize = 5,
112 |                darch.numEpochs = 100
113 | )
114 | 
115 | 
116 | # test run_darch_bn and backpropagate_delta_bn functions
117 | 
118 | darch = darch( x = input,
119 |                y = target,
120 |                layers = c(2, 10, 1),
121 |                # darch = darch,
122 |                darch.layerFunctionDefault = rectified_linear_unit_function,
123 |                darch.layerFunctions = c("2" = linearUnitDerivative),
124 |                darch.bootstrap = F,
125 |                darch.isBin = F,
126 |                darch.isClass = F,
127 |                darch.learnRateWeights = 0.01,
128 |                darch.learnRateBiases = 0.01,
129 |                darch.dropoutInput = 0.,
130 |                darch.dropoutHidden = 0.,
131 |                darch.fineTuneFunction = finetune_SGD_bn,
132 |                with_BN = T,
133 |                darch.batchSize = 5,
134 |                darch.numEpochs = 1
135 | )
136 | 
137 | output1 <- predict(darch, newdata = input)
138 | ret <- backpropagate_delta_bn(darch, input, target, with_BN = T)
139 | output2 <- ret[[4]][[2]]
140 | 


--------------------------------------------------------------------------------
/inst/test_new_dnn.R:
--------------------------------------------------------------------------------
 1 | input <- matrix(runif(6), 3, 2)
 2 | target <- rowSums(input)
 3 | 
 4 | darch <- new_dnn(c(2, 1))
 5 | predict(darch, newdata = input)
 6 | 
 7 | x <- cbind(input, 1)
 8 | weight <- getLayer(darch, 1)[[1]]
 9 | 
10 | y <- x %*% weight
11 | 
12 | y
13 | 


--------------------------------------------------------------------------------
/inst/test_run_dnn.R:
--------------------------------------------------------------------------------
 1 | input <- matrix(runif(6), 3, 2)
 2 | target <- rowSums(input)
 3 | 
 4 | darch <- new_dnn(c(2, 2, 1))
 5 | 
 6 | # use the runDArch as execution function
 7 | darch@executeFunction <- runDArch
 8 | predict(darch, newdata = input)
 9 | 
10 | # now change the execution function to run_dnn
11 | darch@executeFunction <- run_dnn
12 | predict(darch, newdata = input)
13 | 
14 | 
15 | # now change the sigma of the hidden layer
16 | # should expect different output
17 | darch@layers[[1]][[6]] <- rep(0.01, 2)
18 | predict(darch, newdata = input)
19 | 
20 | 
21 | # compare the results with backpropagate_delta_bn function
22 | 
23 | # set up the dropout mask
24 | dropoutMasks <- list()
25 | numLayers <- length(getLayers(darch))
26 | 
27 | # generate dropout masks
28 | generateDropoutMask <- function(length, dropoutRate)
29 | {
30 |   if (dropoutRate == 0)
31 |   {
32 |     ret <- rep(1, length)
33 |   }
34 |   else
35 |   {
36 |     ret <- sample(0:1, length, replace = T,
37 |                   prob = c(dropoutRate, 1 - dropoutRate))
38 |   }
39 | 
40 |   return (ret)
41 | }
42 | 
43 | setDropoutMask(darch, 0) <-
44 |   generateDropoutMask(nrow(getLayerWeights(darch, 1)[]) - 1,
45 |                       darch@dropoutInput)
46 | 
47 | for (i in 1:(numLayers - 2))
48 | {
49 |   setDropoutMask(darch, i) <-
50 |     generateDropoutMask(nrow(getLayerWeights(darch, i+1)[])-1,
51 |                         darch@dropoutHidden)
52 | }
53 | 
54 | 
55 | output <- backpropagate_delta_bn(darch, input, target, with_BN = F)[[4]]
56 | 
57 | y <- predict(darch, newdata = input)
58 | 
59 | 


--------------------------------------------------------------------------------
/inst/test_train_dnn.R:
--------------------------------------------------------------------------------
  1 | ###########################################################################################################################
  2 | # Test case 1: test the basic functionality of run_dnn
  3 | 
  4 | input <- matrix(runif(100), 50, 2)
  5 | input_valid <- matrix(runif(10), 5, 2)
  6 | target <- rowSums(input + input^2)
  7 | target_valid <- rowSums(input_valid + input_valid^2)
  8 | 
  9 | darch <- new_dnn(c(2,5,1))
 10 | darch <- train_dnn(darch,
 11 |           input,
 12 |           target,
 13 |           # input_valid,
 14 |           # target_valid,
 15 | 
 16 |           learn_rate_weight = exp(-5),
 17 |           learn_rate_bias = exp(-5),
 18 |           learn_rate_gamma = exp(-5),
 19 |           batch_size = 10,
 20 |           batch_normalization = F,
 21 |           dropout_input = 0,
 22 |           dropout_hidden = 0,
 23 |           momentunm_initial = 0.6,
 24 |           momentum_final = 0.9,
 25 |           momentum_switch = 100,
 26 |           num_epochs = 100,
 27 | 
 28 |           # target types
 29 |           error_function = meanSquareErr,
 30 |           report_classification_error = F
 31 | )
 32 | 
 33 | # test the dropout
 34 | 
 35 | 
 36 | ###########################################################################################################################
 37 | # Test case 2: Test mixed training of BN and No BN
 38 | # 2.1
 39 | # first train with BN on
 40 | # then train with BN off
 41 | rm(darch)
 42 | darch <- new_dnn(c(2, 5, 10, 1))
 43 | darch <- train_dnn(darch,
 44 |                    input,
 45 |                    target,
 46 |                    input_valid,
 47 |                    target_valid,
 48 |                    # training parameters
 49 |                    learn_rate_weight = exp(-5),
 50 |                    learn_rate_bias = exp(-5),
 51 |                    learn_rate_gamma = exp(-5),
 52 |                    batch_size = 10,
 53 |                    batch_normalization = T,
 54 |                    dropout_input = 0.4,
 55 |                    dropout_hidden = 0.8,
 56 |                    momentunm_initial = 0.6,
 57 |                    momentum_final = 0.9,
 58 |                    momentum_switch = 100,
 59 |                    num_epochs = 50,
 60 |                    # target types
 61 |                    error_function = meanSquareErr,
 62 |                    report_classification_error = F
 63 | )
 64 | 
 65 | darch <- train_dnn(darch,
 66 |                    input,
 67 |                    target,
 68 |                    input_valid,
 69 |                    target_valid,
 70 |                    # training parameters
 71 |                    learn_rate_weight = exp(-5),
 72 |                    learn_rate_bias = exp(-5),
 73 |                    learn_rate_gamma = exp(-5),
 74 |                    batch_size = 10,
 75 |                    batch_normalization = F,
 76 |                    dropout_input = 0.4,
 77 |                    dropout_hidden = 0.8,
 78 |                    momentunm_initial = 0.6,
 79 |                    momentum_final = 0.9,
 80 |                    momentum_switch = 100,
 81 |                    num_epochs = 50,
 82 |                    # target types
 83 |                    error_function = meanSquareErr,
 84 |                    report_classification_error = F
 85 | )
 86 | 
 87 | plot(darch@stats$dataErrors$raw)
 88 | plot(darch@stats$validErrors$raw)
 89 | 
 90 | # 2.2
 91 | # firt train with BN off
 92 | # then trian with BN on
 93 | 
 94 | rm(darch)
 95 | darch <- new_dnn(c(2, 5, 10, 1))
 96 | darch <- train_dnn(darch,
 97 |                    input,
 98 |                    target,
 99 |                    input_valid,
100 |                    target_valid,
101 |                    # training parameters
102 |                    learn_rate_weight = exp(-5),
103 |                    learn_rate_bias = exp(-5),
104 |                    learn_rate_gamma = exp(-5),
105 |                    batch_size = 10,
106 |                    batch_normalization = F,
107 |                    dropout_input = 0.4,
108 |                    dropout_hidden = 0.8,
109 |                    momentunm_initial = 0.6,
110 |                    momentum_final = 0.9,
111 |                    momentum_switch = 100,
112 |                    num_epochs = 50,
113 |                    # target types
114 |                    error_function = meanSquareErr,
115 |                    report_classification_error = F
116 | )
117 | 
118 | 
119 | darch <- train_dnn(darch,
120 |                    input,
121 |                    target,
122 |                    input_valid,
123 |                    target_valid,
124 |                    # training parameters
125 |                    learn_rate_weight = exp(-5),
126 |                    learn_rate_bias = exp(-5),
127 |                    learn_rate_gamma = exp(-5),
128 |                    batch_size = 10,
129 |                    batch_normalization = T,
130 |                    dropout_input = 0.4,
131 |                    dropout_hidden = 0.8,
132 |                    momentunm_initial = 0.6,
133 |                    momentum_final = 0.9,
134 |                    momentum_switch = 100,
135 |                    num_epochs = 50,
136 |                    # target types
137 |                    error_function = meanSquareErr,
138 |                    report_classification_error = F
139 | )
140 | 
141 | plot(darch@stats$dataErrors$raw)
142 | plot(darch@stats$validErrors$raw)
143 | 
144 | 
145 | ###########################################################################################################################
146 | # Test 3: Evaluate the Batch Normalization
147 | # Compare BN training with no BN training
148 | 
149 | input <- matrix(runif(500), 250, 2)
150 | input_valid <- matrix(runif(50), 25, 2)
151 | target <- rowSums(cos(input) + sin(input)^2 + tan(input)^3)
152 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2 + tan(input_valid)^3))
153 | 
154 | 
155 | rm(darch)
156 | darch <- new_dnn(c(2, 10, 10, 1))
157 | darch <- train_dnn(darch,
158 |                    input,
159 |                    target,
160 |                    input_valid,
161 |                    target_valid,
162 |                    # training parameters
163 |                    learn_rate_weight = exp(-8),
164 |                    learn_rate_bias = exp(-8),
165 |                    learn_rate_gamma = exp(-8),
166 |                    batch_size = 10,
167 |                    batch_normalization = T,
168 |                    dropout_input = 0.,
169 |                    dropout_hidden = 0.,
170 |                    momentunm_initial = 0.6,
171 |                    momentum_final = 0.9,
172 |                    momentum_switch = 100,
173 |                    num_epochs = 250,
174 |                    # target types
175 |                    error_function = meanSquareErr,
176 |                    report_classification_error = F
177 | )
178 | 
179 | rsq(darch, input = input, target = target)
180 | # 50 Iter: .959
181 | # 250 Iter: .961
182 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
183 | 
184 | rsq(darch, input = input_valid, target = target_valid)
185 | # 50 iterations: .965
186 | # 250 iterations: .964
187 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
188 | 
189 | plot(darch@stats$dataErrors$raw)
190 | 
191 | plot(darch@stats$validErrors$raw)
192 | 
193 | 
194 | rm(darch_2)
195 | darch_2 <- new_dnn(c(2, 10, 10, 1))
196 | darch_2 <- train_dnn(darch_2,
197 |                    input,
198 |                    target,
199 |                    input_valid,
200 |                    target_valid,
201 |                    # training parameters
202 |                    learn_rate_weight = exp(-8),
203 |                    learn_rate_bias = exp(-8),
204 |                    learn_rate_gamma = exp(-8),
205 |                    batch_size = 10,
206 |                    batch_normalization = F,
207 |                    dropout_input = 0.,
208 |                    dropout_hidden = 0.,
209 |                    momentunm_initial = 0.6,
210 |                    momentum_final = 0.9,
211 |                    momentum_switch = 100,
212 |                    num_epochs = 250,
213 |                    # target types
214 |                    error_function = meanSquareErr,
215 |                    report_classification_error = F
216 | )
217 | 
218 | rsq(darch_2, input = input, target = target)
219 | # 50 iterations:  .687
220 | # 250 iterations: .780
221 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
222 | 
223 | rsq(darch_2, input = input_valid, target = target_valid)
224 | # 50 iterations: .728
225 | # 250 iterations: .881
226 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
227 | 
228 | plot(darch_2@stats$dataErrors$raw)
229 | 
230 | plot(darch_2@stats$validErrors$raw)
231 | 
232 | 
233 | 
234 | ###########################################################################################################################
235 | # Test case 4:
236 | # Cross comparison between train_dnn and darch
237 | # differences:
238 | # 1) batch generation
239 | # 2) batch normalization
240 | # 3) Cross Entropy Error
241 | # 4) Bug in runDArch with dropout
242 | 
243 | # 4.1 benchmark - 1 batch. use no batch normalization
244 | 
245 | input <- matrix(runif(500), 250, 2)
246 | input_valid <- matrix(runif(50), 25, 2)
247 | target <- rowSums(cos(input) + sin(input)^2)
248 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2))
249 | 
250 | # use train_dnn function from deeplearning library
251 | 
252 | rm(darch)
253 | darch <- new_dnn(c(2, 20, 20, 1))
254 | darch <- train_dnn(darch,
255 |                    input,
256 |                    target,
257 |                    input_valid,
258 |                    target_valid,
259 |                    # training parameters
260 |                    learn_rate_weight = exp(-8),
261 |                    learn_rate_bias = exp(-8),
262 |                    learn_rate_gamma = exp(-8),
263 |                    batch_size = 250,
264 |                    batch_normalization = F,
265 |                    dropout_input = 0.,
266 |                    dropout_hidden = 0.,
267 |                    momentunm_initial = 0.6,
268 |                    momentum_final = 0.9,
269 |                    momentum_switch = 100,
270 |                    num_epochs = 500,
271 |                    # target types
272 |                    error_function = meanSquareErr,
273 |                    report_classification_error = F
274 | )
275 | 
276 | 
277 | rsq(darch, input = input_valid, target = target_valid) # .983
278 | lines(x = c(2,3), y = c(2, 3), col = "red")
279 | 
280 | plot(darch@stats$dataErrors$raw)
281 | 
282 | plot(darch@stats$validErrors$raw)
283 | 
284 | # use darch function from the darch library
285 | 
286 | rm(darch)
287 | 
288 | darch <- darch(    input,
289 |                    target,
290 |                    layers = c(2, 20, 20, 1),
291 |                    xvalid = input_valid,
292 |                    yValid = target_valid,
293 |                    # training parameters
294 |                    darch.learnRateBiases = exp(-8),
295 |                    darch.learnRateWeights = exp(-8),
296 |                    darch.layerFunctionDefault = rectified_linear_unit_function,
297 |                    darch.layerFunctions = list("3" = linearUnitDerivative),
298 |                    darch.batchSize = 250,
299 |                    darch.dropoutInput = 0.,
300 |                    darch.dropoutHidden = 0.,
301 |                    darch.momentumSwitch = 100,
302 |                    darch.initialMomentum = 0.6,
303 |                    darch.finalMomentum = 0.9,
304 |                    darch.numEpochs = 500,
305 |                    darch.isBin = F,
306 |                    darch.isClass = F
307 | )
308 | 
309 | rsq(darch, input = input_valid, target = target_valid) # .986
310 | lines(x = c(2,3), y = c(2, 3), col = "red")
311 | 
312 | plot(darch@stats$dataErrors$raw)
313 | 
314 | plot(darch@stats$validErrors$raw)
315 | 
316 | 
317 | ########################################################################
318 | # 4.2 test batch initialization - 50 batches. use no batch normalization
319 | 
320 | 
321 | input <- matrix(runif(500), 250, 2)
322 | input_valid <- matrix(runif(50), 25, 2)
323 | target <- rowSums(cos(input) + sin(input)^2 + tan(input)^3)
324 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2 + tan(input_valid)^3))
325 | 
326 | # use train_dnn function from deeplearning library
327 | 
328 | rm(darch)
329 | darch <- new_dnn(c(2, 20, 20, 1))
330 | darch <- train_dnn(darch,
331 |                    input,
332 |                    target,
333 |                    input_valid,
334 |                    target_valid,
335 |                    # training parameters
336 |                    learn_rate_weight = exp(-8),
337 |                    learn_rate_bias = exp(-8),
338 |                    learn_rate_gamma = exp(-8),
339 |                    batch_size = 10,
340 |                    batch_normalization = F,
341 |                    dropout_input = 0.,
342 |                    dropout_hidden = 0.,
343 |                    momentunm_initial = 0.6,
344 |                    momentum_final = 0.9,
345 |                    momentum_switch = 100,
346 |                    num_epochs = 500,
347 |                    # target types
348 |                    error_function = meanSquareErr,
349 |                    report_classification_error = F
350 | )
351 | 
352 | rsq(darch, input = input, target = target)
353 | # 100 iterations: .760
354 | # 500 iterations: .987
355 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
356 | 
357 | rsq(darch, input = input_valid, target = target_valid)
358 | # 100 iterations: .770
359 | # 500 iterations; .979
360 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
361 | 
362 | plot(darch@stats$dataErrors$raw)
363 | 
364 | plot(darch@stats$validErrors$raw)
365 | 
366 | # use darch function from the darch library
367 | 
368 | darch_2 <- darch(    input,
369 |                      target,
370 |                      layers = c(2, 20, 20, 1),
371 |                      xvalid = input_valid,
372 |                      yValid = target_valid,
373 |                      # training parameters
374 |                      darch.learnRateBiases = exp(-8),
375 |                      darch.learnRateWeights = exp(-8),
376 |                      darch.layerFunctionDefault = rectified_linear_unit_function,
377 |                      darch.layerFunctions = list("3" = linearUnitDerivative),
378 |                      darch.batchSize = 10,
379 |                      darch.dropoutInput = 0.,
380 |                      darch.dropoutHidden = 0.,
381 |                      darch.momentumSwitch = 100,
382 |                      darch.initialMomentum = 0.6,
383 |                      darch.finalMomentum = 0.9,
384 |                      darch.numEpochs = 500,
385 |                      darch.isBin = F,
386 |                      darch.isClass = F
387 | )
388 | 
389 | rsq(darch_2)
390 | # 100 iterations: .767
391 | # 500 iterations: .980
392 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
393 | 
394 | rsq(darch_2, input = input_valid, target = target_valid)
395 | # 100 iterations: .733
396 | # 500 iterations: .974
397 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
398 | 
399 | plot(darch_2@stats$dataErrors$raw)
400 | 
401 | plot(darch_2@stats$validErrors$raw)
402 | 
403 | 
404 | 
405 | ####################################################################
406 | # 4.3 test batchnormalization - 50 batches. use  batch normalization
407 | 
408 | input <- matrix(runif(500), 250, 2)
409 | input_valid <- matrix(runif(50), 25, 2)
410 | target <- rowSums(cos(input) + sin(input)^2 + tan(input)^3)
411 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2 + tan(input_valid)^3))
412 | 
413 | # use train_dnn function from deeplearning library
414 | 
415 | rm(darch)
416 | darch <- new_dnn(c(2, 20, 20, 1))
417 | darch <- train_dnn(darch,
418 |                    input,
419 |                    target,
420 |                    input_valid,
421 |                    target_valid,
422 |                    # training parameters
423 |                    learn_rate_weight = exp(-8),
424 |                    learn_rate_bias = exp(-8),
425 |                    learn_rate_gamma = exp(-8),
426 |                    batch_size = 10,
427 |                    batch_normalization = T,
428 |                    dropout_input = 0.,
429 |                    dropout_hidden = 0,
430 |                    momentunm_initial = 0.6,
431 |                    momentum_final = 0.9,
432 |                    momentum_switch = 100,
433 |                    num_epochs = 100,
434 |                    # target types
435 |                    error_function = meanSquareErr,
436 |                    report_classification_error = F
437 | )
438 | 
439 | rsq(darch, input = input, target = target)
440 | # 100 Iterations: .968
441 | # 500 Iterations: .971
442 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
443 | 
444 | rsq(darch, input = input_valid, target = target_valid)
445 | # 100 Iterations: .946
446 | # 500 Iterations: .930
447 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
448 | 
449 | plot(darch@stats$dataErrors$raw)
450 | 
451 | plot(darch@stats$validErrors$raw)
452 | 
453 | 
454 | 
455 | 
456 | # use darch function from the darch library
457 | darch_2 <- darch(    input,
458 |                      target,
459 |                      layers = c(2, 20, 20, 1),
460 |                      xvalid = input_valid,
461 |                      yValid = target_valid,
462 |                      # training parameters
463 |                      darch.learnRateBiases = exp(-8),
464 |                      darch.learnRateWeights = exp(-8),
465 |                      darch.layerFunctionDefault = rectified_linear_unit_function,
466 |                      darch.layerFunctions = list("3" = linearUnitDerivative),
467 |                      darch.batchSize = 10,
468 |                      darch.dropoutInput = 0.,
469 |                      darch.dropoutHidden = 0,
470 |                      darch.momentumSwitch = 100,
471 |                      darch.initialMomentum = 0.6,
472 |                      darch.finalMomentum = 0.9,
473 |                      darch.numEpochs = 500,
474 |                      darch.isBin = F,
475 |                      darch.isClass = F
476 | )
477 | 
478 | rsq(darch_2)
479 | # 100 Iterations: .727
480 | # 500 Iterations: .974
481 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
482 | 
483 | rsq(darch_2, input = input_valid, target = target_valid)
484 | # 100 Iterations: .742
485 | # 500 Iterations: .974
486 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
487 | 
488 | plot(darch_2@stats$dataErrors$raw)
489 | 
490 | plot(darch_2@stats$validErrors$raw)
491 | 
492 | 
493 | 
494 | #################################################################################
495 | # 4.4 test batchnormalization - 50 batches. use  batch normalization. use dropout
496 | 
497 | input <- matrix(runif(500), 250, 2)
498 | input_valid <- matrix(runif(50), 25, 2)
499 | target <- rowSums(cos(input) + sin(input)^2 + tan(input)^3)
500 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2 + tan(input_valid)^3))
501 | 
502 | # use train_dnn function from deeplearning library
503 | 
504 | rm(darch)
505 | darch <- new_dnn(c(2, 40, 40, 1), hidden_layer_default = sigmoidUnitDerivative)
506 | darch <- train_dnn(darch,
507 |                    input,
508 |                    target,
509 |                    input_valid,
510 |                    target_valid,
511 |                    # training parameters
512 | 
513 |                    learn_rate_weight = exp(-8) * 100,
514 |                    learn_rate_bias = exp(-8) * 100,
515 |                    learn_rate_gamma = exp(-8) * 100,
516 |                    batch_size = 10,
517 |                    batch_normalization = T,
518 |                    dropout_input = 0.2,
519 |                    dropout_hidden = 0,
520 |                    momentunm_initial = 0.6,
521 |                    momentum_final = 0.9,
522 |                    momentum_switch = 100,
523 |                    num_epochs = 500,
524 |                    # target types
525 |                    error_function = meanSquareErr,
526 |                    report_classification_error = F
527 | )
528 | 
529 | rsq(darch, input = input, target = target)
530 | # learn rate: exp(-8) * 100
531 | # dropout input/hidden: .2/.3
532 | # 100 Iterations: .742
533 | # 500 Iterations: .937
534 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
535 | 
536 | rsq(darch, input = input_valid, target = target_valid)
537 | # learn rate: exp(-8) * 100
538 | # dropout input/hidden: .2/.3
539 | # 100 Iterations: .782
540 | # 500 Iterations: .914
541 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
542 | 
543 | plot(darch@stats$dataErrors$raw)
544 | 
545 | plot(darch@stats$validErrors$raw)
546 | 
547 | # use darch function from the darch library
548 | 
549 | 
550 | 
551 | darch_2 <- darch(    input,
552 |                      target,
553 |                      layers = c(2, 40, 40, 1),
554 |                      xvalid = input_valid,
555 |                      yValid = target_valid,
556 |                      # training parameters
557 |                      darch.learnRateBiases = exp(-8) * 100,
558 |                      darch.learnRateWeights = exp(-8) * 100,
559 |                      darch.layerFunctionDefault = sigmoidUnitDerivative,
560 |                      darch.layerFunctions = list("3" = linearUnitDerivative),
561 |                      darch.batchSize = 10,
562 |                      darch.dropoutInput = 0.2,
563 |                      darch.dropoutHidden = 0.3,
564 |                      darch.momentumSwitch = 100,
565 |                      darch.initialMomentum = 0.6,
566 |                      darch.finalMomentum = 0.9,
567 |                      darch.numEpochs = 100,
568 |                      darch.isBin = F,
569 |                      darch.isClass = F
570 | )
571 | 
572 | # drop out fails with ReLU!!!!!!!!!!!!!!!!!!!!!!
573 | 
574 | rsq(darch_2, input = input, target = target)
575 | # learn rate: exp(-8) * 100
576 | # dropout input/hidden: .2/.3
577 | # 100 Iterations: .638
578 | # 500 Iterations: .657
579 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
580 | 
581 | rsq(darch_2, input = input_valid, target = target_valid)
582 | # learn rate: exp(-8) * 100
583 | # dropout input/hidden: .2/.3
584 | # 100 Iterations: .600
585 | # 500 Iterations: .347
586 | lines(x = c(min(target), max(target)), y =  c(min(target), max(target)), col = "red")
587 | 
588 | plot(darch_2@stats$dataErrors$raw)
589 | 
590 | plot(darch_2@stats$validErrors$raw)
591 | 
592 | 
593 | 
594 | 
595 | #################################################################################
596 | # 4.5 test error functions
597 | 
598 | input <- matrix(runif(500), 250, 2)
599 | input_valid <- matrix(runif(50), 25, 2)
600 | target <- rowSums(cos(input) + sin(input)^2 + tan(input)^3)
601 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2 + tan(input_valid)^3))
602 | med <- median(target)
603 | target <- 1 * (target < (med * runif(1) * 2 ))
604 | target_valid <- 1 * (target_valid < (med * runif(1) * 2))
605 | 
606 | 
607 | # use train_dnn function from deeplearning library
608 | rm(darch)
609 | darch <- new_dnn(c(2, 20, 20, 1), output_layer_default = sigmoidUnitDerivative)
610 | darch <- train_dnn(darch,
611 |                    input,
612 |                    target,
613 |                    input_valid,
614 |                    target_valid,
615 |                    # training parameters
616 | 
617 |                    learn_rate_weight = exp(-8) ,
618 |                    learn_rate_bias = exp(-8),
619 |                    learn_rate_gamma = exp(-8),
620 |                    batch_size = 10,
621 |                    batch_normalization = T,
622 |                    dropout_input = 0.,
623 |                    dropout_hidden = 0.,
624 |                    momentunm_initial = 0.6,
625 |                    momentum_final = 0.9,
626 |                    momentum_switch = 100,
627 |                    num_epochs = 50,
628 |                    # target types
629 |                    error_function = crossEntropyErr,
630 |                    report_classification_error = T
631 | )
632 | 
633 | AR(darch, input = input, target = target)
634 | 
635 | # 100 Iterations:  .916
636 | # 500 Iterations:
637 | 
638 | AR(darch, input = input_valid, target = target_valid)
639 | 
640 | # 100 Iterations:  1
641 | # 500 Iterations:
642 | 
643 | plot(darch@stats$dataErrors$raw)
644 | 
645 | plot(darch@stats$validErrors$raw)
646 | 
647 | # use darch function from the darch library
648 | 
649 | 
650 | 
651 | darch_2 <- darch(    input,
652 |                      target,
653 |                      layers = c(2, 20, 20, 1),
654 |                      xvalid = input_valid,
655 |                      yValid = target_valid,
656 |                      # training parameters
657 |                      darch.learnRateBiases = exp(-8) * 1,
658 |                      darch.learnRateWeights = exp(-8) * 1,
659 |                      darch.layerFunctionDefault = rectified_linear_unit_function,
660 |                      darch.layerFunctions = list("3" = sigmoidUnitDerivative),
661 |                      darch.batchSize = 10,
662 |                      darch.dropoutInput = 0.,
663 |                      darch.dropoutHidden = 0.,
664 |                      darch.momentumSwitch = 100,
665 |                      darch.initialMomentum = 0.6,
666 |                      darch.finalMomentum = 0.9,
667 |                      darch.numEpochs = 100,
668 |                      darch.isBin = T,
669 |                      darch.isClass = F
670 | )
671 | 
672 | # drop out fails with ReLU!!!!!!!!!!!!!!!!!!!!!!
673 | 
674 | AR(darch_2, input = input, target = target)
675 | 
676 | # 100 Iterations: .94
677 | # 500 Iterations:
678 | 
679 | AR(darch_2, input = input_valid, target = target_valid)
680 | 
681 | # 100 Iterations:  1
682 | # 500 Iterations:
683 | 
684 | plot(darch_2@stats$dataErrors$raw)
685 | 
686 | plot(darch_2@stats$validErrors$raw)
687 | 
688 | 
689 | 
690 | 
691 | 
692 | 


--------------------------------------------------------------------------------
/man/AR.DArch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AR.R
 3 | \name{AR.DArch}
 4 | \alias{AR.DArch}
 5 | \title{Calculates the Accruacy Ratio of a given set of probability}
 6 | \usage{
 7 | \method{AR}{DArch}(x, input = x@dataSet@data, target = x@dataSet@targets,
 8 |   ...)
 9 | }
10 | \arguments{
11 | \item{x}{a DArch instance}
12 | 
13 | \item{input}{the input matrix}
14 | 
15 | \item{target}{binary response}
16 | 
17 | \item{...}{additional inputs}
18 | }
19 | \description{
20 | This function calculates the Accuracy Ratio of a trained darch instance
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/man/AR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AR.R
 3 | \name{AR}
 4 | \alias{AR}
 5 | \title{Calculates the Accuracy Ratio of a classifier}
 6 | \usage{
 7 | AR(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{model}
11 | 
12 | \item{...}{additional inputs}
13 | }
14 | \description{
15 | This function calculates the Accuracy Ratio of a binary classification
16 |  model
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/man/AR.default.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AR.R
 3 | \name{AR.default}
 4 | \alias{AR.default}
 5 | \title{Calculates the Accruacy Ratio of a given set of probability}
 6 | \usage{
 7 | \method{AR}{default}(x, target, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a list of model output in the form of probabilities}
11 | 
12 | \item{target}{binary response}
13 | 
14 | \item{...}{additional inputs}
15 | }
16 | \description{
17 | This function calculates the Accuracy Ratio of a binary classification model
18 |  output against its targets
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/man/AR.numeric.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AR.R
 3 | \name{AR.numeric}
 4 | \alias{AR.numeric}
 5 | \title{Calculates the Accruacy Ratio of a given set of probability}
 6 | \usage{
 7 | \method{AR}{numeric}(x, target, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{a list of model output in the form of probabilities}
11 | 
12 | \item{target}{binary response}
13 | 
14 | \item{...}{additional inputs}
15 | }
16 | \description{
17 | This function calculates the Accuracy Ratio of a binary classification model
18 |  output against its targets
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/man/applyDropoutMask.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dropout.R
 3 | \name{applyDropoutMask}
 4 | \alias{applyDropoutMask}
 5 | \title{Applies the given dropout mask to the given data row-wise.}
 6 | \usage{
 7 | applyDropoutMask(data, mask)
 8 | }
 9 | \arguments{
10 | \item{data}{Data to which the dropout mask should be applied}
11 | 
12 | \item{mask}{The dropout mask, a vector of 0 and 1.}
13 | }
14 | \value{
15 | Data with applied dropout mask
16 | }
17 | \description{
18 | This function multiplies each row with the dropout mask. To apply the dropout
19 | mask by row, it can simply be multiplied with the data matrix. This does not
20 | work of the mask is to be applied row-wise, hence this function.
21 | }
22 | \references{
23 | Dropout: A Simple Way to Prevent Neural Networks from
24 |  Overfitting, Nitish Srivastava
25 | }
26 | \seealso{
27 | \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf}
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/man/backpropagate_delta_bn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/backpropagate_delta.R
 3 | \name{backpropagate_delta_bn}
 4 | \alias{backpropagate_delta_bn}
 5 | \title{Calculates the delta functions using backpropagation}
 6 | \usage{
 7 | backpropagate_delta_bn(darch, trainData, targetData,
 8 |   errorFunc = meanSquareErr, with_BN = TRUE)
 9 | }
10 | \arguments{
11 | \item{darch}{a darch instance}
12 | 
13 | \item{trainData}{training input}
14 | 
15 | \item{targetData}{training target}
16 | 
17 | \item{errorFunc}{error function to minimize during training. Right now mean squared
18 | erros and cross entropy errors are supported.}
19 | 
20 | \item{with_BN}{traing with batch normalization on or off}
21 | }
22 | \description{
23 | function that calculates the delta function of a darch object with batch
24 | normalization
25 | }
26 | \references{
27 | Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
28 |  Sergey Ioffe, Christian Szegedy
29 | }
30 | \seealso{
31 | \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/man/batch_normalization.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/batch_normalization.R
 3 | \name{batch_normalization}
 4 | \alias{batch_normalization}
 5 | \title{Batch Normalization Function that normalizes the input before applying non-linearity}
 6 | \usage{
 7 | batch_normalization(x, gamma, beta, mu = NULL, sigma_2 = NULL,
 8 |   epsilon = exp(-12))
 9 | }
10 | \arguments{
11 | \item{x}{weighted sum of outputs from the previous layer}
12 | 
13 | \item{gamma}{the gamma coefficient}
14 | 
15 | \item{beta}{the beta coefficient}
16 | 
17 | \item{mu}{the mean of the input neurons. If NULL, it will be caluclated in the function.}
18 | 
19 | \item{sigma_2}{the variance of the input nerurons. If NULL, it will be calcualted in the function.}
20 | 
21 | \item{epsilon}{a constant added to the variance for numerical stability}
22 | }
23 | \description{
24 | This function normalizes the distribution of inputs to hidden layers in
25 | a neural network
26 | }
27 | \references{
28 | Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
29 | Sergey Ioffe, Christian Szegedy
30 | }
31 | \seealso{
32 | \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/man/batch_normalization_differential.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/batch_normalization.R
 3 | \name{batch_normalization_differential}
 4 | \alias{batch_normalization_differential}
 5 | \title{Function that calcualtes the differentials in the batch normalization mode}
 6 | \usage{
 7 | batch_normalization_differential(delta_y, mu, sigma_2, x, x_hat, y, gamma, beta,
 8 |   epsilon = exp(-12), with_BN = T)
 9 | }
10 | \arguments{
11 | \item{delta_y}{derivative wrt y}
12 | 
13 | \item{mu}{mean of the input}
14 | 
15 | \item{sigma_2}{variance of the input}
16 | 
17 | \item{x}{input}
18 | 
19 | \item{x_hat}{normalized input}
20 | 
21 | \item{y}{transformed input after batch normalization}
22 | 
23 | \item{gamma}{gamma coefficient}
24 | 
25 | \item{beta}{beta coefficient}
26 | 
27 | \item{epsilon}{the contant added to the variance for numeric stability}
28 | 
29 | \item{with_BN}{logical value, set to TRUE to turn on batch normalization}
30 | }
31 | \description{
32 | Calculates the differentials in batch normalization
33 | }
34 | \references{
35 | Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
36 |  Sergey Ioffe, Christian Szegedy
37 | }
38 | \seealso{
39 | \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/man/calcualte_population_mu_sigma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_mu_sigma.R
 3 | \name{calcualte_population_mu_sigma}
 4 | \alias{calcualte_population_mu_sigma}
 5 | \title{Calculates the mu and sigmas of a darch instance}
 6 | \usage{
 7 | calcualte_population_mu_sigma(darch, input)
 8 | }
 9 | \arguments{
10 | \item{darch}{a darch instance}
11 | 
12 | \item{input}{input data}
13 | }
14 | \description{
15 | This function calculates the mu and sigmas of hidden layers in a darch instance
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/man/classification_error.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/error_functions.R
 3 | \name{classification_error}
 4 | \alias{classification_error}
 5 | \title{Calculates the classification error}
 6 | \usage{
 7 | classification_error(output, target)
 8 | }
 9 | \arguments{
10 | \item{output}{the output of a classifier in the form of probability. Probability > 1
11 | will be treated as positive (target = 1).}
12 | 
13 | \item{target}{the target variable}
14 | }
15 | \description{
16 | This function calculates the classification error
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/man/convert_categorical.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{convert_categorical}
 4 | \alias{convert_categorical}
 5 | \title{Data proprosess function that covnerts a categorical input to continuous input or
 6 | vectorize it}
 7 | \usage{
 8 | convert_categorical(x, type = "ordinal", ordered_list = list(),
 9 |   var_name = "var", ...)
10 | }
11 | \arguments{
12 | \item{x}{input variable}
13 | 
14 | \item{type}{ordinal or other}
15 | 
16 | \item{ordered_list}{the rank ordering of an ordinal variable. Users are expected to
17 | provide a complete list of the rank ordering. Otherwise, a default rank ordering
18 | will be used.}
19 | 
20 | \item{var_name}{the name of the input variable. This is used to to create vectorized
21 | input variables}
22 | 
23 | \item{...}{other inputs}
24 | }
25 | \description{
26 | Proprosess a data set. It converts categorical data into binary variables
27 | if it is unordered or continuous variable from 0 to 1 if it is ordinal
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/man/crossEntropyErr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/error_functions.R
 3 | \name{crossEntropyErr}
 4 | \alias{crossEntropyErr}
 5 | \title{Calculates the cross entropy error}
 6 | \usage{
 7 | crossEntropyErr(output, target)
 8 | }
 9 | \arguments{
10 | \item{output}{the output value}
11 | 
12 | \item{target}{the target value}
13 | }
14 | \description{
15 | This function calculates the cross entropy error and its first order derivatives
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/man/finetune_SGD_bn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/finetune_SGD.R
 3 | \name{finetune_SGD_bn}
 4 | \alias{finetune_SGD_bn}
 5 | \title{Updates a deep neural network's parameters using stochastic gradient descent
 6 |  method and batch normalization}
 7 | \usage{
 8 | finetune_SGD_bn(darch, trainData, targetData, learn_rate_weight = exp(-10),
 9 |   learn_rate_bias = exp(-10), learn_rate_gamma = exp(-10),
10 |   errorFunc = meanSquareErr, with_BN = T)
11 | }
12 | \arguments{
13 | \item{darch}{a darch instance}
14 | 
15 | \item{trainData}{training input}
16 | 
17 | \item{targetData}{training target}
18 | 
19 | \item{learn_rate_weight}{leanring rate for the weight matrices}
20 | 
21 | \item{learn_rate_bias}{learning rate for the biases}
22 | 
23 | \item{learn_rate_gamma}{learning rate for the gammas}
24 | 
25 | \item{errorFunc}{the error function to minimize during training}
26 | 
27 | \item{with_BN}{logical value, T to train the neural net with batch normalization}
28 | }
29 | \value{
30 | a darch instance with parameters updated with stochastic gradient descent
31 | }
32 | \description{
33 | This function finetunes a DArch network using SGD approach
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/man/generateDropoutMask.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dropout.R
 3 | \name{generateDropoutMask}
 4 | \alias{generateDropoutMask}
 5 | \title{Generates the dropout mask for the deep neural network}
 6 | \usage{
 7 | generateDropoutMask(length, dropoutRate)
 8 | }
 9 | \arguments{
10 | \item{length, }{the dimension of the layer}
11 | 
12 | \item{dropoutRate, }{the dropout rate}
13 | }
14 | \description{
15 | This function generates the dropout mask for the deep neural network
16 | }
17 | \references{
18 | Dropout: A Simple Way to Prevent Neural Networks from
19 |  Overfitting, Nitish Srivastava
20 | }
21 | \seealso{
22 | \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf}
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/man/generateDropoutMasksForDarch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dropout.R
 3 | \name{generateDropoutMasksForDarch}
 4 | \alias{generateDropoutMasksForDarch}
 5 | \title{Generates dropout masks for dnn}
 6 | \usage{
 7 | generateDropoutMasksForDarch(darch, dropout_input, dropout_hidden)
 8 | }
 9 | \arguments{
10 | \item{darch, }{a DArch instance}
11 | 
12 | \item{dropout_input, }{the dropout rate for the input layer}
13 | 
14 | \item{dropout_hidden, }{the dropout rate for the hidden layer}
15 | }
16 | \description{
17 | This function generates dropout maks for dnn
18 | }
19 | \references{
20 | Dropout: A Simple Way to Prevent Neural Networks from
21 |  Overfitting, Nitish Srivastava
22 | }
23 | \seealso{
24 | \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf}
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/man/matMult.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{matMult}
 4 | \alias{matMult}
 5 | \title{Calculates the outer product of two matricies}
 6 | \usage{
 7 | matMult(data, weight)
 8 | }
 9 | \arguments{
10 | \item{data}{the date matrix}
11 | 
12 | \item{weight}{the weight matrix}
13 | }
14 | \description{
15 | Calcualtes the outer product of two matrices
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/man/meanSquareErr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/error_functions.R
 3 | \name{meanSquareErr}
 4 | \alias{meanSquareErr}
 5 | \title{Calculates the mean squared error}
 6 | \usage{
 7 | meanSquareErr(output, target)
 8 | }
 9 | \arguments{
10 | \item{output}{the output value}
11 | 
12 | \item{target}{the target value}
13 | }
14 | \description{
15 | This function calculates the mean squared error and its first order derivatives
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/man/new_dnn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/new_dnn.R
 3 | \name{new_dnn}
 4 | \alias{new_dnn}
 5 | \title{Creats a new instance of darch class}
 6 | \usage{
 7 | new_dnn(layer_structure, layer_functions = NULL,
 8 |   output_layer_default = linearUnitDerivative,
 9 |   hidden_layer_default = rectified_linear_unit_function,
10 |   weight_initiliazaiton = generateWeights)
11 | }
12 | \arguments{
13 | \item{layer_structure}{a int vector that specifies the number and width of layers}
14 | 
15 | \item{layer_functions}{a list of activation functions used by each layer}
16 | 
17 | \item{output_layer_default}{the activation function for the output layer}
18 | 
19 | \item{hidden_layer_default}{the activation function for the hidden layers}
20 | 
21 | \item{weight_initiliazaiton}{function that initialize a layer's weight matrix}
22 | }
23 | \description{
24 | This function creates a new instance of darch class
25 | }
26 | \examples{
27 | # create a new deep neural network for classificaiton
28 | dnn_regression <- new_dnn(
29 |  c(2, 50, 50, 20, 1),
30 |  # The layer structure of the deep neural network.
31 |  # The first element is the number of input variables.
32 |  # The last element is the number of output variables.
33 |  hidden_layer_default = rectified_linear_unit_function,
34 |  # for hidden layers, use rectified_linear_unit_function
35 |  output_layer_default = sigmoidUnitDerivative
36 |  # for classification, use sigmoidUnitDerivative function
37 | )
38 | 
39 | # create a new deep neural network for classificaiton
40 | dnn_regression <- new_dnn(
41 |  c(2, 50, 50, 20, 1),
42 |  # The layer structure of the deep neural network.
43 |  # The first element is the number of input variables.
44 |  # The last element is the number of output variables.
45 |  hidden_layer_default = rectified_linear_unit_function,
46 |  # for hidden layers, use rectified_linear_unit_function
47 |  output_layer_default = linearUnitDerivative
48 |  # for regression, use linearUnitDerivative function
49 | )
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/man/print_weight.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{print_weight}
 4 | \alias{print_weight}
 5 | \title{Prints out the weight of a deep neural network}
 6 | \usage{
 7 | print_weight(darch, num_of_layer, show_derivative = F, type = "heatmap")
 8 | }
 9 | \arguments{
10 | \item{darch}{DArch instance}
11 | 
12 | \item{num_of_layer}{the number of the layer to print}
13 | 
14 | \item{show_derivative}{T to show the weight value. F to show the percentage
15 | weight change in the finetuning stage. This helps spot the network saturation problem.}
16 | 
17 | \item{type}{type of the graph. It supports "heatmap", "surface", and "histogram"}
18 | }
19 | \description{
20 | This function prints out the weight in a heat map, 3D surface, or histogram
21 | }
22 | \examples{
23 | # Example of Regression
24 | 
25 | input <- matrix(runif(1000), 500, 2)
26 | input_valid <- matrix(runif(100), 50, 2)
27 | target <- rowSums(input + input^2)
28 | target_valid <- rowSums(input_valid + input_valid^2)
29 | # create a new deep neural network for classificaiton
30 | dnn_regression <- new_dnn(
31 |  c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
32 |  # The first element is the number of input variables.
33 |  # The last element is the number of output variables.
34 |  hidden_layer_default = rectified_linear_unit_function,
35 |  # for hidden layers, use rectified_linear_unit_function
36 |  output_layer_default = linearUnitDerivative
37 |  # for regression, use linearUnitDerivative function
38 | )
39 | 
40 | # print the layer weights
41 | # this function can print heatmap, histogram, or a surface
42 | print_weight(dnn_regression, 1, type = "heatmap")
43 | 
44 | print_weight(dnn_regression, 2, type = "surface")
45 | 
46 | print_weight(dnn_regression, 3, type = "histogram")
47 | 
48 | 
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/man/rectified_linear_unit_function.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rectified_linear_unit_function.R
 3 | \name{rectified_linear_unit_function}
 4 | \alias{rectified_linear_unit_function}
 5 | \title{Rectified Linear Unit Function}
 6 | \usage{
 7 | rectified_linear_unit_function(data, weights)
 8 | }
 9 | \arguments{
10 | \item{data}{the data matrix for calculation}
11 | 
12 | \item{weights}{the connection (weight matrix/filter) and the bias}
13 | }
14 | \value{
15 | A list of function values and derivatives
16 | }
17 | \description{
18 | This functions calculates the value and the derivative of a rectified linear
19 | function. Reference Vinod Nair, Geoffrey Hinton, Rectified Linear Units
20 | Improve Restricted Boltzmann Machines
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/man/reset_population_mu_sigma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_mu_sigma.R
 3 | \name{reset_population_mu_sigma}
 4 | \alias{reset_population_mu_sigma}
 5 | \title{Resets the mu and sigmas of a darch instance to 0 and 1}
 6 | \usage{
 7 | reset_population_mu_sigma(darch)
 8 | }
 9 | \arguments{
10 | \item{darch}{a darch instance}
11 | }
12 | \description{
13 | This function resets the mu and sigmas of hidden layers in a darch instance
14 |  to 0 and 1
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/man/rsq.DArch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rsq.R
 3 | \name{rsq.DArch}
 4 | \alias{rsq.DArch}
 5 | \title{Utilitiy function that calcualtes RSQ of a DArch instance}
 6 | \usage{
 7 | \method{rsq}{DArch}(x, input = x@dataSet@data, target = x@dataSet@targets,
 8 |   ...)
 9 | }
10 | \arguments{
11 | \item{x}{DArch Model}
12 | 
13 | \item{input}{Input data}
14 | 
15 | \item{target}{Target data}
16 | 
17 | \item{...}{addtional inputs}
18 | }
19 | \description{
20 | Calcualte a regression model's RSQ of a deep neural network
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/man/rsq.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rsq.R
 3 | \name{rsq}
 4 | \alias{rsq}
 5 | \title{Calculate the RSQ of a regression model
 6 | Utilitiy function that calcualtes RSQ of a model. It measures the goodness-of-
 7 | fit of a regression model.}
 8 | \usage{
 9 | rsq(x, ...)
10 | }
11 | \arguments{
12 | \item{x}{Regression Model}
13 | 
14 | \item{...}{Additional Input}
15 | }
16 | \description{
17 | Calculate the RSQ of a regression model
18 | Utilitiy function that calcualtes RSQ of a model. It measures the goodness-of-
19 | fit of a regression model.
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/man/rsq.lm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rsq.R
 3 | \name{rsq.lm}
 4 | \alias{rsq.lm}
 5 | \title{Utilitiy function that calcualtes RSQ of a linear model}
 6 | \usage{
 7 | \method{rsq}{lm}(x, input, target, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{linear Model}
11 | 
12 | \item{input}{Input data}
13 | 
14 | \item{target}{Target data}
15 | 
16 | \item{...}{additional inputs}
17 | }
18 | \description{
19 | Calcualte a regression model's RSQ
20 | }
21 | 
22 | 


--------------------------------------------------------------------------------
/man/run_dnn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/run_dnn.R
 3 | \name{run_dnn}
 4 | \alias{run_dnn}
 5 | \title{Execution function that runs in the batch normalization mode}
 6 | \usage{
 7 | run_dnn(darch, data)
 8 | }
 9 | \arguments{
10 | \item{darch}{a darch instance}
11 | 
12 | \item{data}{input data}
13 | }
14 | \description{
15 | This function calcualtes the output of a deep neural network with input data
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/man/train_dnn.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/train_dnn.R
  3 | \name{train_dnn}
  4 | \alias{train_dnn}
  5 | \title{Train a deep neural network}
  6 | \usage{
  7 | train_dnn(darch, input, target, input_valid = NULL, target_valid = NULL,
  8 |   ..., learn_rate_weight = exp(-10), learn_rate_bias = exp(-10),
  9 |   learn_rate_gamma = 1, batch_size = 10, batch_normalization = TRUE,
 10 |   dropout_input = 0, dropout_hidden = 0, momentum_initial = 0.6,
 11 |   momentum_final = 0.9, momentum_switch = 100, num_epochs = 0,
 12 |   error_function = meanSquareErr, report_classification_error = FALSE)
 13 | }
 14 | \arguments{
 15 | \item{darch}{a darch instance}
 16 | 
 17 | \item{input}{input data for training}
 18 | 
 19 | \item{target}{target data for training}
 20 | 
 21 | \item{input_valid}{input data for validation}
 22 | 
 23 | \item{target_valid}{target data for validation}
 24 | 
 25 | \item{...}{additional input}
 26 | 
 27 | \item{learn_rate_weight}{learning rate for the weight matrices}
 28 | 
 29 | \item{learn_rate_bias}{learning rate for the biases}
 30 | 
 31 | \item{learn_rate_gamma}{learning rate for the gamma}
 32 | 
 33 | \item{batch_size}{batch size during training}
 34 | 
 35 | \item{batch_normalization}{logical value that determines whether to turn on
 36 | batch normalization during training. Recommneded value: T}
 37 | 
 38 | \item{dropout_input}{dropout ratio at input layer. Recommneded value: 0.2}
 39 | 
 40 | \item{dropout_hidden}{dropout ratio at hidden layers. Recommended value: 0.5}
 41 | 
 42 | \item{momentum_initial}{momentum ratio during training. Recommended value: 0.6}
 43 | 
 44 | \item{momentum_final}{final momentum during training. Recommended value: 0.9}
 45 | 
 46 | \item{momentum_switch}{afther which epoch the final momentum ratio is used during training}
 47 | 
 48 | \item{num_epochs}{number of iterations of the training}
 49 | 
 50 | \item{error_function}{error function to minimize during training}
 51 | 
 52 | \item{report_classification_error}{logical value. T to report the classification error
 53 | during training}
 54 | }
 55 | \value{
 56 | a trained deep neural network (darch instance)
 57 | }
 58 | \description{
 59 | This function trains a deep neural network
 60 | }
 61 | \examples{
 62 | # Example of Regression
 63 | 
 64 | input <- matrix(runif(1000), 500, 2)
 65 | input_valid <- matrix(runif(100), 50, 2)
 66 | target <- rowSums(input + input^2)
 67 | target_valid <- rowSums(input_valid + input_valid^2)
 68 | # create a new deep neural network for classificaiton
 69 | dnn_regression <- new_dnn(
 70 |  c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
 71 |  # The first element is the number of input variables.
 72 |  # The last element is the number of output variables.
 73 |  hidden_layer_default = rectified_linear_unit_function,
 74 |  # for hidden layers, use rectified_linear_unit_function
 75 |  output_layer_default = linearUnitDerivative
 76 |  # for regression, use linearUnitDerivative function
 77 | )
 78 | 
 79 |  dnn_regression <- train_dnn(
 80 |  dnn_regression,
 81 | 
 82 |  # training data
 83 |  input, # input variable for training
 84 |  target, # target variable for training
 85 |  input_valid, # input variable for validation
 86 |  target_valid, # target variable for validation
 87 | 
 88 |  # training parameters
 89 |  learn_rate_weight = exp(-8) * 10,
 90 |  # learning rate for weights, higher if use dropout
 91 |  learn_rate_bias = exp(-8) * 10,
 92 |  # learning rate for biases, hihger if use dropout
 93 |  learn_rate_gamma = exp(-8) * 10,
 94 |  # learning rate for the gamma factor used
 95 |  batch_size = 10,
 96 |  # number of observations in a batch during training.
 97 |  # Higher for faster training. Lower for faster convergence
 98 |  batch_normalization = TRUE,
 99 |  # logical value, T to use batch normalization
100 |  dropout_input = 0.2,
101 |   # dropout ratio in input.
102 |  dropout_hidden = 0.5,
103 |  # dropout ratio in hidden layers
104 |  momentum_initial = 0.6,
105 |  # initial momentum in Stochastic Gradient Descent training
106 |  momentum_final = 0.9,
107 |  # final momentum in Stochastic Gradient Descent training
108 |  momentum_switch = 100,
109 |  # after which the momentum is switched from initial to final momentum
110 |  num_epochs = 5,
111 |   # number of iterations in training
112 |   # increase numbef of epochs to 100 for better model fit
113 | 
114 | 
115 |  # Error function
116 |  error_function = meanSquareErr,
117 |  # error function to minimize during training. For regression, use meanSquareErr
118 |  report_classification_error = FALSE
119 |  # whether to print classification error during training
120 | )
121 | 
122 | 
123 | # the prediciton by dnn_regression
124 | pred <- predict(dnn_regression)
125 | 
126 | # calculate the r-squared of the prediciton
127 | rsq(dnn_regression)
128 | 
129 | 
130 | # calcualte the r-squared of the prediciton in validation
131 | rsq(dnn_regression, input = input_valid, target = target_valid)
132 | 
133 | # print the layer weights
134 | # this function can print heatmap, histogram, or a surface
135 | print_weight(dnn_regression, 1, type = "heatmap")
136 | 
137 | print_weight(dnn_regression, 2, type = "surface")
138 | 
139 | print_weight(dnn_regression, 3, type = "histogram")
140 | 
141 | 
142 | # Examples of classification
143 | 
144 | input <- matrix(runif(1000), 500, 2)
145 | input_valid <- matrix(runif(100), 50, 2)
146 | target <- (cos(rowSums(input + input^2)) > 0.5) * 1
147 | target_valid <- (cos(rowSums(input_valid + input_valid^2)) > 0.5) * 1
148 | 
149 | # create a new deep neural network for classificaiton
150 | dnn_classification <- new_dnn(
151 |  c(2, 50, 50, 20, 1),  # The layer structure of the deep neural network.
152 |  # The first element is the number of input variables.
153 |  # The last element is the number of output variables.
154 |  hidden_layer_default = rectified_linear_unit_function,
155 |  # for hidden layers, use rectified_linear_unit_function
156 |  output_layer_default = sigmoidUnitDerivative
157 |  # for classification, use sigmoidUnitDerivative function
158 | )
159 | 
160 | dnn_classification <- train_dnn(
161 |  dnn_classification,
162 | 
163 |  # training data
164 |  input, # input variable for training
165 |  target, # target variable for training
166 |  input_valid, # input variable for validation
167 |  target_valid, # target variable for validation
168 | 
169 |  # training parameters
170 |  learn_rate_weight = exp(-8) * 10,
171 |  # learning rate for weights, higher if use dropout
172 |  learn_rate_bias = exp(-8) * 10,
173 |  # learning rate for biases, hihger if use dropout
174 |  learn_rate_gamma = exp(-8) * 10,
175 |  # learning rate for the gamma factor used
176 |  batch_size = 10,
177 |  # number of observations in a batch during training.
178 |  # Higher for faster training. Lower for faster convergence
179 |  batch_normalization = TRUE,
180 |  # logical value, T to use batch normalization
181 |  dropout_input = 0.2,
182 |  # dropout ratio in input.
183 |  dropout_hidden = 0.5,
184 |  # dropout ratio in hidden layers
185 |  momentum_initial = 0.6,
186 |  # initial momentum in Stochastic Gradient Descent training
187 |  momentum_final = 0.9,
188 |  # final momentum in Stochastic Gradient Descent training
189 |  momentum_switch = 100,
190 |  # after which the momentum is switched from initial to final momentum
191 |  num_epochs = 5,
192 |  # number of iterations in training
193 |  # increase num_epochs to 100 for better model fit
194 | 
195 |  # Error function
196 |  error_function = crossEntropyErr,
197 |  # error function to minimize during training. For regression, use crossEntropyErr
198 |  report_classification_error = TRUE
199 |  # whether to print classification error during training
200 | )
201 | 
202 | # the prediciton by dnn_regression
203 | pred <- predict(dnn_classification)
204 | 
205 | hist(pred)
206 | 
207 | # calculate the r-squared of the prediciton
208 | AR(dnn_classification)
209 | 
210 | # calcualte the r-squared of the prediciton in validation
211 | AR(dnn_classification, input = input_valid, target = target_valid)
212 | 
213 | 
214 | }
215 | 
216 | 


--------------------------------------------------------------------------------
/man/verticalize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{verticalize}
 4 | \alias{verticalize}
 5 | \title{Creates a matrix by repeating a row vector N times}
 6 | \usage{
 7 | verticalize(vector, N)
 8 | }
 9 | \arguments{
10 | \item{vector}{the row vector}
11 | 
12 | \item{N}{number of rows in the output matirx}
13 | }
14 | \value{
15 | a matrix
16 | }
17 | \description{
18 | helper function that repeat a row vector N times
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------