├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── NAMESPACE ├── R ├── AR.R ├── backpropagate_delta.R ├── batch_normalization.R ├── calculate_mu_sigma.R ├── dropout.R ├── error_functions.R ├── finetune_SGD.R ├── new_dnn.R ├── rectified_linear_unit_function.R ├── rsq.R ├── run_dnn.R ├── train_dnn.R └── util.R ├── README.Rmd ├── README.md ├── cran-comments.md ├── deeplearning.Rproj ├── inst ├── examples_classification.R ├── examples_regression.R ├── test_ReLU.R ├── test_batch_normalization_differential.R ├── test_fineTuneFunctions.R ├── test_finetune_SGD_bn.R ├── test_new_dnn.R ├── test_run_dnn.R └── test_train_dnn.R └── man ├── AR.DArch.Rd ├── AR.Rd ├── AR.default.Rd ├── AR.numeric.Rd ├── applyDropoutMask.Rd ├── backpropagate_delta_bn.Rd ├── batch_normalization.Rd ├── batch_normalization_differential.Rd ├── calcualte_population_mu_sigma.Rd ├── classification_error.Rd ├── convert_categorical.Rd ├── crossEntropyErr.Rd ├── finetune_SGD_bn.Rd ├── generateDropoutMask.Rd ├── generateDropoutMasksForDarch.Rd ├── matMult.Rd ├── meanSquareErr.Rd ├── new_dnn.Rd ├── print_weight.Rd ├── rectified_linear_unit_function.Rd ├── reset_population_mu_sigma.Rd ├── rsq.DArch.Rd ├── rsq.Rd ├── rsq.lm.Rd ├── run_dnn.Rd ├── train_dnn.Rd └── verticalize.Rd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^cran-comments\.md$ 4 | ^\.travis\.yml$ 5 | ^README\.Rmd$ 6 | ^README-.*\.png$ 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: R 2 | cache: packages -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: deeplearning 2 | Type: Package 3 | Title: An Implementation of Deep Neural Network for Regression and Classification 4 | Description: An implementation of deep neural network with rectifier linear units trained 5 | with stochastic gradient descent method and batch normalization. A combination of these methods have 6 | achieved state-of-the-art performance in ImageNet classification by overcoming the gradient saturation 7 | problem experienced by many deep architecture neural network models in the past. In addition, 8 | batch normalization and dropout are implemented as a means of regularization. The deeplearning package is 9 | inspired by the darch package and uses its class DArch. 10 | Version: 0.1.0 11 | Date: 2016-04-10 12 | Authors@R: c( 13 | person(given = "Zhi", family = "Ruan", email = "ryan.zhiruan@gmail.com", role = c("aut", "cre")), 14 | person("Martin", "Drees", email = "mdrees@stud.fh-dortmund.de", role = c("cph")) 15 | ) 16 | LazyData: TRUE 17 | URL: https://github.com/rz1988/deeplearning 18 | BugReports: https://github.com/rz1988/deeplearning/issues 19 | Depends: 20 | R (>= 3.2.4), 21 | methods, 22 | darch (>= 0.10.0), 23 | Imports: 24 | plotly, 25 | futile.logger, 26 | graphics, 27 | stats 28 | License: GPL (>= 2) 29 | RoxygenNote: 5.0.1 30 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(AR,DArch) 4 | S3method(AR,default) 5 | S3method(AR,numeric) 6 | S3method(rsq,DArch) 7 | S3method(rsq,lm) 8 | export(AR) 9 | export(classification_error) 10 | export(convert_categorical) 11 | export(crossEntropyErr) 12 | export(meanSquareErr) 13 | export(new_dnn) 14 | export(print_weight) 15 | export(rectified_linear_unit_function) 16 | export(rsq) 17 | export(train_dnn) 18 | import(futile.logger) 19 | importClassesFrom(darch,DArch) 20 | importFrom(darch,createDataSet) 21 | importFrom(darch,generateWeights) 22 | importFrom(darch,getDropoutMask) 23 | importFrom(darch,getEpochs) 24 | importFrom(darch,getLayer) 25 | importFrom(darch,getLayerWeights) 26 | importFrom(darch,getLayers) 27 | importFrom(darch,getMomentum) 28 | importFrom(darch,linearUnitDerivative) 29 | importFrom(darch,validateDataSet) 30 | importFrom(graphics,plot) 31 | importFrom(methods,new) 32 | importFrom(stats,predict) 33 | -------------------------------------------------------------------------------- /R/AR.R: -------------------------------------------------------------------------------- 1 | #' Calculates the Accuracy Ratio of a classifier 2 | #' 3 | #' This function calculates the Accuracy Ratio of a binary classification 4 | #' model 5 | #' 6 | #' 7 | #' @param x model 8 | #' @param ... additional inputs 9 | #' 10 | #' @export 11 | 12 | AR <- function(x, ...) { 13 | UseMethod("AR") 14 | } 15 | 16 | 17 | #' Calculates the Accruacy Ratio of a given set of probability 18 | #' 19 | #' This function calculates the Accuracy Ratio of a binary classification model 20 | #' output against its targets 21 | #' 22 | #' @param x a list of model output in the form of probabilities 23 | #' @param target binary response 24 | #' @param ... additional inputs 25 | #' @export 26 | 27 | AR.numeric <- function(x, target, ...) { 28 | AR.default(x, target) 29 | } 30 | 31 | 32 | #' Calculates the Accruacy Ratio of a given set of probability 33 | #' 34 | #' This function calculates the Accuracy Ratio of a binary classification model 35 | #' output against its targets 36 | #' 37 | #' @param x a list of model output in the form of probabilities 38 | #' @param target binary response 39 | #' @param ... additional inputs 40 | #' @importFrom graphics plot 41 | #' 42 | #' @export 43 | 44 | AR.default <- function(x, target, ...) { 45 | N <- length(x) 46 | seq = order(x, decreasing = T) 47 | target <- target[seq] 48 | auc <- 0 49 | totTarget <- sum(target) 50 | y <- c() 51 | for (i in 1:N) { 52 | lorenzeCurve <- sum(target[1:i]) / totTarget 53 | auc <- auc + lorenzeCurve * 1 / N 54 | y <- cbind(y, lorenzeCurve) 55 | } 56 | auc <- auc 57 | pd <- sum(target) / N 58 | ar <- (2 * auc - 1) / (1 - pd) 59 | plot(as.vector(y), xlab = "Population", ylab = "Fraction of Positive") 60 | if(ar > 1) ar <- 1 61 | return (ar) 62 | } 63 | 64 | #' Calculates the Accruacy Ratio of a given set of probability 65 | #' 66 | #' This function calculates the Accuracy Ratio of a trained darch instance 67 | #' 68 | #' @param x a DArch instance 69 | #' @param input the input matrix 70 | #' @param target binary response 71 | #' @param ... additional inputs 72 | #' 73 | #' @importFrom stats predict 74 | #' 75 | #' @export 76 | 77 | 78 | 79 | AR.DArch <- function(x, input = x@dataSet@data, 80 | target = x@dataSet@targets, ...) { 81 | pred <- predict(x, newdata = input) 82 | AR.default(pred, target) 83 | } 84 | 85 | 86 | -------------------------------------------------------------------------------- /R/backpropagate_delta.R: -------------------------------------------------------------------------------- 1 | #' Calculates the delta functions using backpropagation 2 | #' 3 | #' function that calculates the delta function of a darch object with batch 4 | #' normalization 5 | #' 6 | #' @param darch a darch instance 7 | #' @param trainData training input 8 | #' @param targetData training target 9 | #' @param errorFunc error function to minimize during training. Right now mean squared 10 | #' erros and cross entropy errors are supported. 11 | #' @param with_BN traing with batch normalization on or off 12 | #' 13 | #' @importFrom darch getLayer 14 | #' @importFrom darch getDropoutMask 15 | #' 16 | #' 17 | #' @references Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift 18 | #' Sergey Ioffe, Christian Szegedy 19 | #' @seealso \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4 20 | 21 | 22 | backpropagate_delta_bn <- function(darch, 23 | trainData, 24 | targetData, 25 | errorFunc = meanSquareErr, 26 | with_BN = TRUE) { 27 | 28 | numLayers <- length(darch@layers) 29 | layers <- list() 30 | epsilon <- exp(-12) # a numerical stablizaer used in batch normalization 31 | numObs <- dim(trainData)[[1]] 32 | 33 | for(i in 1:numLayers) { 34 | ret <- getLayer(darch, i)[[1]] 35 | dimV_input <- dim(ret)[[1]] - 1 36 | dimV_output <- dim(ret)[[2]] 37 | 38 | layers[["weight"]][[i]] <- ret[1:dimV_input, ] 39 | 40 | if(length(getLayer(darch, i)) < 4 | with_BN == FALSE) { 41 | layers[["gamma"]][[i]] <- 42 | matrix(rep(1, dimV_output * numObs), numObs, byrow = TRUE) 43 | } else { 44 | layers[["gamma"]][[i]] <- 45 | matrix(rep(getLayer(darch, i)[[4]], numObs), numObs, byrow = TRUE) 46 | } 47 | 48 | layers[["beta"]][[i]] <- verticalize(ret[(dimV_input + 1),], numObs) 49 | 50 | layers[["x"]][[i]] <- list() 51 | layers[["mu"]][[i]] <- list() 52 | layers[["sigma_2"]][[i]] <- list() 53 | layers[["x_hat"]][[i]] <- list() 54 | layers[["y"]][[i]] <- list() 55 | 56 | layers[["delta_weight"]][[i]] <- list() 57 | layers[["delta_x"]][[i]] <- list() 58 | layers[["delta_y"]][[i]] <- list() 59 | layers[["delta_beta"]][[i]] <- list() 60 | layers[["delta_gamma"]][[i]] <- list() 61 | layers[["output"]][[i]] <- list() 62 | layers[["derivative"]][[i]] <- list() 63 | } 64 | 65 | # apply input dropout mask to data 66 | # TODO same input dropout mask for all data in a batch? 67 | trainData <- applyDropoutMask(trainData, getDropoutMask(darch, 0)) 68 | 69 | # 1. Forwardpropagate 70 | data <- trainData 71 | for (i in 1:numLayers){ 72 | weights <- layers[["weight"]][[i]] 73 | func <- getLayer(darch, i)[[2]] 74 | # Batch Normalization 75 | layers[["x"]][[i]] <- data %*% weights 76 | 77 | if(length(getLayer(darch, i)) < 4 | with_BN == FALSE) { 78 | ret <- batch_normalization(layers[["x"]][[i]], 79 | layers[["gamma"]][[i]], 80 | layers[["beta"]][[i]], 81 | mu = verticalize(rep(0, dim(layers[["gamma"]][[i]])[[2]]), numObs), 82 | sigma_2 = verticalize(rep(1 - epsilon, dim(layers[["gamma"]][[i]])[[2]]), numObs), 83 | epsilon = epsilon 84 | ) 85 | 86 | } else { 87 | ret <- batch_normalization(layers[["x"]][[i]], 88 | layers[["gamma"]][[i]], 89 | layers[["beta"]][[i]], 90 | epsilon = epsilon ) 91 | } 92 | layers[["mu"]][[i]] <- ret[[1]] 93 | layers[["sigma_2"]][[i]] <- ret[[2]] 94 | layers[["x_hat"]][[i]] <- ret[[3]] 95 | layers[["y"]][[i]] <- ret[[4]] 96 | 97 | ret <- list() 98 | 99 | unit_matrix <- diag(dim(layers[['y']][[i]])[[2]]) 100 | ret <- func(layers[["y"]][[i]],unit_matrix) 101 | # apply dropout masks to output, unless we're on the last layer 102 | if (i < numLayers) 103 | { 104 | ret[[1]] <- applyDropoutMask(ret[[1]], getDropoutMask(darch, i)) 105 | ret[[2]] <- applyDropoutMask(ret[[2]], getDropoutMask(darch, i)) 106 | } 107 | 108 | layers[["output"]][[i]] <- ret[[1]] 109 | data <- ret[[1]] 110 | layers[["derivative"]][[i]] <- ret[[2]] 111 | } 112 | 113 | # End of forward propagation 114 | 115 | # 2. Calculate the Error on the network output layer 116 | errorDerivative <- errorFunc(layers[["output"]][[numLayers]], targetData)[[2]] 117 | layers[["delta_y"]][[numLayers]] <- errorDerivative * layers[["derivative"]][[numLayers]] 118 | 119 | if(length(getLayer(darch, numLayers)) < 4 | with_BN == FALSE) { 120 | ret <- batch_normalization_differential(layers[["delta_y"]][[numLayers]], 121 | layers[["mu"]][[numLayers]], 122 | layers[["sigma_2"]][[numLayers]], 123 | layers[["x"]][[numLayers]], 124 | layers[["x_hat"]][[numLayers]], 125 | layers[["y"]][[numLayers]], 126 | layers[["gamma"]][[numLayers]], 127 | layers[["beta"]][[numLayers]], 128 | with_BN = FALSE 129 | ) 130 | 131 | } else { 132 | ret <- batch_normalization_differential(layers[["delta_y"]][[numLayers]], 133 | layers[["mu"]][[numLayers]], 134 | layers[["sigma_2"]][[numLayers]], 135 | layers[["x"]][[numLayers]], 136 | layers[["x_hat"]][[numLayers]], 137 | layers[["y"]][[numLayers]], 138 | layers[["gamma"]][[numLayers]], 139 | layers[["beta"]][[numLayers]], 140 | with_BN = TRUE) 141 | } 142 | 143 | layers[["delta_x"]][[numLayers]] <- ret[[1]] 144 | layers[["delta_gamma"]][[numLayers]] <- ret[[2]] 145 | layers[["delta_beta"]][[numLayers]] <- ret[[3]] 146 | 147 | if (numLayers > 1) { 148 | layers[["delta_weight"]][[numLayers]] <- t(layers[["output"]][[numLayers - 1]]) %*% 149 | layers[["delta_y"]][[numLayers]] 150 | } else { 151 | layers[["delta_weight"]][[numLayers]] <- t(trainData) %*% 152 | layers[["delta_y"]][[numLayers]] 153 | } 154 | # End of calculation 155 | 156 | # 3. Backpropagate the error 157 | for(i in (numLayers-1):1){ 158 | error <- layers[["delta_x"]][[i+1]] %*% t(layers[["weight"]][[i + 1]]) 159 | # zero derivatives makes sure that dropout nodes' delta functions are zeros 160 | layers[["delta_y"]][[i]] <- error * layers[["derivative"]][[i]] 161 | 162 | if(length(getLayer(darch, i)) < 4 | with_BN == FALSE) { 163 | ret <- batch_normalization_differential(layers[["delta_y"]][[i]], 164 | layers[["mu"]][[i]], 165 | layers[["sigma_2"]][[i]], 166 | layers[["x"]][[i]], 167 | layers[["x_hat"]][[i]], 168 | layers[["y"]][[i]], 169 | layers[["gamma"]][[i]], 170 | layers[["beta"]][[i]], 171 | with_BN = FALSE) 172 | 173 | } else { 174 | ret <- batch_normalization_differential(layers[["delta_y"]][[i]], 175 | layers[["mu"]][[i]], 176 | layers[["sigma_2"]][[i]], 177 | layers[["x"]][[i]], 178 | layers[["x_hat"]][[i]], 179 | layers[["y"]][[i]], 180 | layers[["gamma"]][[i]], 181 | layers[["beta"]][[i]], 182 | with_BN = TRUE) 183 | } 184 | 185 | layers[["delta_x"]][[i]] <- ret[[1]] 186 | layers[["delta_gamma"]][[i]] <- ret[[2]] 187 | layers[["delta_beta"]][[i]] <- ret[[3]] 188 | 189 | if (i > 1) { 190 | layers[["delta_weight"]][[i]] <- t(layers[["output"]][[i - 1]]) %*% layers[["delta_y"]][[i]] 191 | } else { 192 | layers[["delta_weight"]][[i]] <- t(trainData) %*% layers[["delta_y"]][[i]] 193 | } 194 | 195 | } 196 | 197 | ret <- list() 198 | ret[[1]] <- layers[["delta_weight"]] 199 | ret[[2]] <- layers[["delta_beta"]] 200 | ret[[3]] <- layers[["delta_gamma"]] 201 | ret[[4]] <- layers[["output"]] 202 | ret[[5]] <- layers[["derivative"]] 203 | ret[[6]] <- layers[["delta_mu"]] 204 | ret[[7]] <- layers[["delta_sigma_2"]] 205 | ret[[8]] <- layers[["mu"]] 206 | ret[[9]] <- layers[["sigma_2"]] 207 | return(ret) 208 | } 209 | -------------------------------------------------------------------------------- /R/batch_normalization.R: -------------------------------------------------------------------------------- 1 | #' Batch Normalization Function that normalizes the input before applying non-linearity 2 | #' 3 | #' This function normalizes the distribution of inputs to hidden layers in 4 | #' a neural network 5 | #' @param x weighted sum of outputs from the previous layer 6 | #' @param gamma the gamma coefficient 7 | #' @param beta the beta coefficient 8 | #' @param mu the mean of the input neurons. If NULL, it will be caluclated in the function. 9 | #' @param sigma_2 the variance of the input nerurons. If NULL, it will be calcualted in the function. 10 | #' @param epsilon a constant added to the variance for numerical stability 11 | #' @references Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift 12 | #' Sergey Ioffe, Christian Szegedy 13 | #' @seealso \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4 14 | 15 | batch_normalization <- function(x, 16 | gamma, 17 | beta, 18 | mu = NULL, 19 | sigma_2 = NULL, 20 | epsilon = exp(-12)) { 21 | 22 | # helper function that repeat a row vector N times 23 | verticalize <- function(vector, N) { 24 | return(matrix(rep(vector, N), N, byrow = T)) 25 | } 26 | 27 | numObs <- dim(x)[[1]] 28 | if(is.null(mu)) { 29 | mu <-verticalize(colMeans(x), numObs) 30 | } 31 | 32 | if(is.null(sigma_2)) { 33 | sigma_2 <- numObs / (numObs - 1) * (verticalize(colMeans(x^2), numObs) - mu^2) 34 | } 35 | 36 | 37 | 38 | x_hat <- (x - mu) / sqrt(sigma_2 + epsilon) 39 | y <- x_hat * gamma + beta 40 | 41 | ret <- list() 42 | ret[[1]] <- mu 43 | ret[[2]] <- sigma_2 44 | ret[[3]] <- x_hat 45 | ret[[4]] <- y 46 | return(ret) 47 | } 48 | 49 | #' Function that calcualtes the differentials in the batch normalization mode 50 | #' 51 | #' Calculates the differentials in batch normalization 52 | #' 53 | #' @param delta_y derivative wrt y 54 | #' @param mu mean of the input 55 | #' @param sigma_2 variance of the input 56 | #' @param x input 57 | #' @param x_hat normalized input 58 | #' @param y transformed input after batch normalization 59 | #' @param gamma gamma coefficient 60 | #' @param beta beta coefficient 61 | #' @param epsilon the contant added to the variance for numeric stability 62 | #' @param with_BN logical value, set to TRUE to turn on batch normalization 63 | #' 64 | #' @references Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift 65 | #' Sergey Ioffe, Christian Szegedy 66 | #' @seealso \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4 67 | 68 | batch_normalization_differential <- function(delta_y, 69 | mu, 70 | sigma_2, 71 | x, 72 | x_hat, 73 | y, 74 | gamma, 75 | beta, 76 | epsilon = exp(-12), 77 | with_BN = T) { 78 | # helper function that repeat a row vector N times 79 | verticalize <- function(vector, N) { 80 | return(matrix(rep(vector, N), N, byrow = T)) 81 | } 82 | numObs <- dim(x)[[1]] 83 | 84 | delta_x_hat <- delta_y * gamma 85 | 86 | if(with_BN) { 87 | delta_sigma_2 <- verticalize(colSums(delta_x_hat * (x - mu) * (-0.5) * (sigma_2 + epsilon)^(-1.5)), numObs) 88 | 89 | tmp1 <- verticalize(colSums(delta_x_hat * (-1) / sqrt(sigma_2 + epsilon)), numObs) 90 | tmp2 <- delta_sigma_2 * verticalize(colMeans(-2 * (x- mu)), numObs) 91 | 92 | delta_mu <- tmp1 + tmp2 93 | 94 | delta_gamma <- verticalize(colSums(delta_y * x_hat), numObs) 95 | } else { 96 | delta_sigma_2 <- verticalize(rep(0, dim(delta_y)[[2]]), numObs) 97 | delta_mu <- verticalize(rep(0, dim(delta_y)[[2]]), numObs) 98 | delta_gamma <- verticalize(rep(0, dim(delta_y)[[2]]), numObs) 99 | } 100 | 101 | tmp1 <- delta_x_hat / sqrt(sigma_2 + epsilon) 102 | tmp2 <- delta_sigma_2 * 2 * (x - mu) / numObs 103 | tmp3 <- delta_mu / numObs 104 | delta_x <- tmp1 + tmp2 + tmp3 105 | 106 | delta_beta <- verticalize(colSums(delta_y), numObs) 107 | 108 | ret <- list() 109 | ret[[1]] <- delta_x 110 | ret[[2]] <- delta_gamma 111 | ret[[3]] <- delta_beta 112 | ret[[4]] <- delta_x_hat 113 | ret[[5]] <- delta_sigma_2 114 | ret[[6]] <- delta_mu 115 | return(ret) 116 | } 117 | -------------------------------------------------------------------------------- /R/calculate_mu_sigma.R: -------------------------------------------------------------------------------- 1 | #' Calculates the mu and sigmas of a darch instance 2 | #' 3 | #' This function calculates the mu and sigmas of hidden layers in a darch instance 4 | #' @param darch a darch instance 5 | #' @param input input data 6 | #' 7 | #' @importFrom darch getLayer 8 | #' 9 | #' 10 | 11 | 12 | 13 | 14 | calcualte_population_mu_sigma <- function (darch, input) { 15 | numLayers <- length(darch@layers) 16 | layers <- list() 17 | epsilon <- exp(-12) # a numerical stablizaer used in batch normalization 18 | numObs <- dim(input)[[1]] 19 | 20 | for(i in 1:numLayers) { 21 | ret <- getLayer(darch, i)[[1]] 22 | dimV_input <- dim(ret)[[1]] - 1 23 | dimV_output <- dim(ret)[[2]] 24 | 25 | layers[["weight"]][[i]] <- ret[1:dimV_input, ] 26 | 27 | layers[["gamma"]][[i]] <- 28 | matrix(rep(getLayer(darch, i)[[4]], numObs), numObs, byrow = T) 29 | 30 | layers[["beta"]][[i]] <- verticalize(ret[(dimV_input + 1),], numObs) 31 | 32 | layers[["x"]][[i]] <- list() 33 | layers[["mu"]][[i]] <- list() 34 | layers[["sigma_2"]][[i]] <- list() 35 | layers[["x_hat"]][[i]] <- list() 36 | layers[["y"]][[i]] <- list() 37 | } 38 | 39 | # Forwardpropagate 40 | data <- input 41 | for (i in 1:numLayers){ 42 | weights <- layers[["weight"]][[i]] 43 | func <- getLayer(darch, i)[[2]] 44 | # Batch Normalization 45 | layers[["x"]][[i]] <- data %*% weights 46 | 47 | ret <- batch_normalization(layers[["x"]][[i]], 48 | layers[["gamma"]][[i]], 49 | layers[["beta"]][[i]], 50 | epsilon = epsilon ) 51 | 52 | layers[["mu"]][[i]] <- ret[[1]] 53 | layers[["sigma_2"]][[i]] <- ret[[2]] 54 | layers[["x_hat"]][[i]] <- ret[[3]] 55 | layers[["y"]][[i]] <- ret[[4]] 56 | 57 | ret <- list() 58 | 59 | unit_matrix <- diag(dim(layers[['y']][[i]])[[2]]) 60 | ret <- func(layers[["y"]][[i]],unit_matrix) 61 | 62 | layers[["output"]][[i]] <- ret[[1]] 63 | data <- ret[[1]] 64 | layers[["derivative"]][[i]] <- ret[[2]] 65 | } 66 | 67 | # End of forward propagation 68 | 69 | for (i in 1:numLayers) { 70 | darch@layers[[i]][[5]] <- layers[["mu"]][[i]][1, ] 71 | darch@layers[[i]][[6]] <- layers[["sigma_2"]][[i]][1, ] 72 | } 73 | 74 | return (darch) 75 | } 76 | 77 | #' Resets the mu and sigmas of a darch instance to 0 and 1 78 | #' 79 | #' This function resets the mu and sigmas of hidden layers in a darch instance 80 | #' to 0 and 1 81 | #' @param darch a darch instance 82 | #' 83 | #' @importFrom darch getLayer 84 | #' 85 | 86 | 87 | 88 | reset_population_mu_sigma <- function (darch) { 89 | numLayers <- length(darch@layers) 90 | epsilon <- exp(-12) # a numerical stablizaer used in batch normalization 91 | 92 | for(i in 1:numLayers) { 93 | ret <- getLayer(darch, i)[[1]] 94 | dimV_output <- dim(ret)[[2]] 95 | darch@layers[[i]][[5]] <- rep(0, dimV_output) 96 | darch@layers[[i]][[6]] <- rep(1 - epsilon, dimV_output) 97 | } 98 | 99 | return (darch) 100 | } 101 | 102 | -------------------------------------------------------------------------------- /R/dropout.R: -------------------------------------------------------------------------------- 1 | #' Generates dropout masks for dnn 2 | #' 3 | #' This function generates dropout maks for dnn 4 | #' @param darch, a DArch instance 5 | #' @param dropout_input, the dropout rate for the input layer 6 | #' @param dropout_hidden, the dropout rate for the hidden layer 7 | #' 8 | #' @importFrom darch getLayers getLayerWeights 9 | #' @references Dropout: A Simple Way to Prevent Neural Networks from 10 | #' Overfitting, Nitish Srivastava 11 | #' @seealso \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf} 12 | 13 | 14 | 15 | generateDropoutMasksForDarch <- function(darch, dropout_input, dropout_hidden) 16 | { 17 | dropoutMasks <- list() 18 | numLayers <- length(getLayers(darch)) 19 | # generate dropout masks 20 | darch@dropoutMasks[[1]]<- 21 | generateDropoutMask(nrow(getLayerWeights(darch, 1)[]) - 1, 22 | dropout_input) 23 | 24 | for (i in 1:(numLayers - 1)) 25 | { 26 | darch@dropoutMasks[[i + 1]] <- 27 | generateDropoutMask(nrow(getLayerWeights(darch, i+1)[])-1, 28 | dropout_hidden) 29 | } 30 | 31 | return (darch) 32 | } 33 | 34 | #' Generates the dropout mask for the deep neural network 35 | #' 36 | #' This function generates the dropout mask for the deep neural network 37 | #' @param length, the dimension of the layer 38 | #' @param dropoutRate, the dropout rate 39 | #' 40 | #' @references Dropout: A Simple Way to Prevent Neural Networks from 41 | #' Overfitting, Nitish Srivastava 42 | #' @seealso \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf} 43 | 44 | 45 | 46 | generateDropoutMask <- function(length, dropoutRate) 47 | { 48 | if (dropoutRate == 0) 49 | { 50 | ret <- rep(1, length) 51 | } 52 | else 53 | { 54 | ret <- sample(c(0, 1/(1 - dropoutRate)), length, replace = T, 55 | prob = c(dropoutRate, 1 - dropoutRate)) 56 | } 57 | 58 | return (ret) 59 | } 60 | 61 | 62 | 63 | #' Applies the given dropout mask to the given data row-wise. 64 | #' 65 | #' This function multiplies each row with the dropout mask. To apply the dropout 66 | #' mask by row, it can simply be multiplied with the data matrix. This does not 67 | #' work of the mask is to be applied row-wise, hence this function. 68 | #' 69 | #' @param data Data to which the dropout mask should be applied 70 | #' @param mask The dropout mask, a vector of 0 and 1. 71 | #' @return Data with applied dropout mask 72 | #' 73 | #' @references Dropout: A Simple Way to Prevent Neural Networks from 74 | #' Overfitting, Nitish Srivastava 75 | #' @seealso \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf} 76 | 77 | 78 | applyDropoutMask <- function(data, mask) 79 | { 80 | return (data * matrix(rep(mask, nrow(data)), nrow=nrow(data), byrow=T)) 81 | } 82 | 83 | -------------------------------------------------------------------------------- /R/error_functions.R: -------------------------------------------------------------------------------- 1 | #' Calculates the cross entropy error 2 | #' 3 | #' This function calculates the cross entropy error and its first order derivatives 4 | #' 5 | #' @param output the output value 6 | #' @param target the target value 7 | #' 8 | #' @export 9 | 10 | crossEntropyErr <- function(output, target) { 11 | # err <- - sum(target[] * log(output[]) + (1 - target[]) * log(1 - output[])) 12 | err <- - sum(target * log(output) + (1 - target) * log(1 - output)) 13 | err2 <- (1-target)/(1-output) - target/output 14 | 15 | ret <- list() 16 | ret[[1]] <- err 17 | ret[[2]] <- err2 18 | ret[[3]] <- "Cross Entropy Error" 19 | return(ret) 20 | } 21 | 22 | #' Calculates the mean squared error 23 | #' 24 | #' This function calculates the mean squared error and its first order derivatives 25 | #' 26 | #' @param output the output value 27 | #' @param target the target value 28 | #' 29 | #' @export 30 | 31 | meanSquareErr <- function(output, target) { 32 | err <- 1/2 * sum(output - target)^2 / dim(output)[[1]] 33 | err2 <- (output - target) 34 | ret <- list() 35 | ret[[1]] <- err 36 | ret[[2]] <- err2 37 | ret[[3]] <- "Mean Squared Error" 38 | return(ret) 39 | 40 | } 41 | 42 | #' Calculates the classification error 43 | #' 44 | #' This function calculates the classification error 45 | #' 46 | #' @param output the output of a classifier in the form of probability. Probability > 1 47 | #' will be treated as positive (target = 1). 48 | #' @param target the target variable 49 | #' 50 | #' @export 51 | 52 | classification_error <- function(output, target) { 53 | boolOut <- (output > 0.5) * 1 54 | boolOutTarget <- cbind(boolOut, target) 55 | rows <- nrow(target) 56 | cols <- ncol(target) 57 | classification_error <- sum(apply(boolOutTarget, 1, function(y) 58 | { any(y[1:cols] != y[(cols+1):(2*cols)])})) / rows * 100 59 | 60 | ret <- list() 61 | ret[[1]] <- classification_error 62 | ret[[2]] <- "Classification Error" 63 | return (ret) 64 | } 65 | -------------------------------------------------------------------------------- /R/finetune_SGD.R: -------------------------------------------------------------------------------- 1 | #' Updates a deep neural network's parameters using stochastic gradient descent 2 | #' method and batch normalization 3 | #' 4 | #' This function finetunes a DArch network using SGD approach 5 | #' 6 | #' @param darch a darch instance 7 | #' @param trainData training input 8 | #' @param targetData training target 9 | #' @param learn_rate_weight leanring rate for the weight matrices 10 | #' @param learn_rate_bias learning rate for the biases 11 | #' @param learn_rate_gamma learning rate for the gammas 12 | #' @param errorFunc the error function to minimize during training 13 | #' @param with_BN logical value, T to train the neural net with batch normalization 14 | #' 15 | #' @importFrom darch getLayer getDropoutMask getMomentum 16 | #' 17 | #' @return a darch instance with parameters updated with stochastic gradient descent 18 | #' 19 | 20 | finetune_SGD_bn <- function(darch, 21 | trainData, 22 | targetData, 23 | learn_rate_weight = exp(-10), 24 | learn_rate_bias = exp(-10), 25 | learn_rate_gamma = exp(-10), 26 | errorFunc = meanSquareErr, 27 | with_BN = T) { 28 | # stats <- getStats(darch) 29 | 30 | ret <- backpropagate_delta_bn(darch, trainData, targetData, errorFunc, with_BN) 31 | delta_weight <- ret[[1]] 32 | delta_beta <- ret[[2]] 33 | delta_gamma <- ret[[3]] 34 | 35 | learnRateBiases <- learn_rate_bias 36 | learnRateWeights <- learn_rate_weight 37 | learnRateGamma <- learn_rate_gamma 38 | 39 | numLayers <- length(delta_weight) 40 | 41 | for(i in numLayers:1) { 42 | weights <- getLayer(darch, i)[[1]] 43 | biases <- weights[nrow(weights),,drop=F] 44 | weights <- weights[1:(nrow(weights)-1),,drop=F] 45 | gamma <- getLayer(darch, i)[[4]] 46 | weightsChange_prev <- getLayer(darch, i)[[3]] 47 | 48 | # Calculate the change in weights 49 | # apply dropout mask to momentum 50 | weightsInc <- (learnRateWeights * delta_weight[[i]]) 51 | weightsChange <- weightsInc + (getMomentum(darch) * 52 | weightsChange_prev * getDropoutMask(darch, i-1) 53 | ) 54 | weights <- weights - weightsChange 55 | 56 | # Calculate the change in beta (biases) 57 | biasesInc <- learnRateBiases * delta_beta[[i]][1,] 58 | biases <- biases - biasesInc 59 | 60 | # Calculate the change in gamma 61 | gammaInc <- learnRateGamma * delta_gamma[[i]][1,] 62 | gamma <- gamma - gammaInc 63 | 64 | darch@layers[[i]][[1]] <- rbind(weights,biases) 65 | darch@layers[[i]][[3]] <- weightsInc 66 | darch@layers[[i]][[4]] <- gamma 67 | } 68 | 69 | # setStats(darch) <- stats 70 | return(darch) 71 | 72 | } 73 | -------------------------------------------------------------------------------- /R/new_dnn.R: -------------------------------------------------------------------------------- 1 | #' Creats a new instance of darch class 2 | #' 3 | #' This function creates a new instance of darch class 4 | #' 5 | #' @param layer_structure a int vector that specifies the number and width of layers 6 | #' @param layer_functions a list of activation functions used by each layer 7 | #' @param output_layer_default the activation function for the output layer 8 | #' @param hidden_layer_default the activation function for the hidden layers 9 | #' @param weight_initiliazaiton function that initialize a layer's weight matrix 10 | #' 11 | #' @importFrom darch linearUnitDerivative generateWeights createDataSet 12 | #' @importFrom methods new 13 | #' @importClassesFrom darch DArch 14 | #' @examples 15 | #' # create a new deep neural network for classificaiton 16 | #' dnn_regression <- new_dnn( 17 | #' c(2, 50, 50, 20, 1), 18 | #' # The layer structure of the deep neural network. 19 | #' # The first element is the number of input variables. 20 | #' # The last element is the number of output variables. 21 | #' hidden_layer_default = rectified_linear_unit_function, 22 | #' # for hidden layers, use rectified_linear_unit_function 23 | #' output_layer_default = sigmoidUnitDerivative 24 | #' # for classification, use sigmoidUnitDerivative function 25 | #' ) 26 | #' 27 | #' # create a new deep neural network for classificaiton 28 | #'dnn_regression <- new_dnn( 29 | #' c(2, 50, 50, 20, 1), 30 | #' # The layer structure of the deep neural network. 31 | #' # The first element is the number of input variables. 32 | #' # The last element is the number of output variables. 33 | #' hidden_layer_default = rectified_linear_unit_function, 34 | #' # for hidden layers, use rectified_linear_unit_function 35 | #' output_layer_default = linearUnitDerivative 36 | #' # for regression, use linearUnitDerivative function 37 | #') 38 | #' @export 39 | 40 | new_dnn <- function(layer_structure, 41 | layer_functions = NULL, 42 | output_layer_default = linearUnitDerivative, 43 | hidden_layer_default = rectified_linear_unit_function, 44 | weight_initiliazaiton = generateWeights) { 45 | if (!is.null(layer_structure)) { 46 | # new a darch instance 47 | darch <-new("DArch") 48 | 49 | # set up the darch stats veriable 50 | darch@stats <- 51 | list("dataErrors" = list("raw"=c(), "class" = c()), 52 | "validErrors" = list("raw"=c(), "class" = c()), 53 | "times" = c(), "preTrainTime" = 0, "fineTuneTime" = 0) 54 | 55 | # set up the layers 56 | numLayers <- length(layer_structure) 57 | for (i in 1:(numLayers -1)) # first layer is an input layer 58 | { 59 | layer <- list() 60 | # element 1: initialize the layer weights 61 | dim_1 <- layer_structure[[i]] 62 | dim_2 <- layer_structure[[i + 1]] 63 | layer[[1]] <- weight_initiliazaiton(dim_1 + 1, dim_2) 64 | 65 | # element 2: set up the layer activation function 66 | if (is.null(layer_functions[[as.character(i)]])) 67 | { 68 | if (i < (numLayers - 1)) { 69 | layer[[2]] <- hidden_layer_default 70 | } else { 71 | layer[[2]] <- output_layer_default 72 | } 73 | } 74 | else 75 | { 76 | layer[[2]] <- layer_functions[[as.character(i)]] 77 | } 78 | 79 | # element 3: weight increase 80 | layer[[3]] <- matrix(0, dim_1, dim_2) 81 | 82 | # element 4: gamma coefficient in batch normalization 83 | layer[[4]] <- rep(1, dim_2) 84 | 85 | # element 5: mu coefficient in batch normalization 86 | layer[[5]] <- rep(0, dim_2) 87 | 88 | # element 6: sigma_2 coefficient in batch normalization 89 | layer[[6]] <- rep(1 - exp(-12), dim_2) 90 | 91 | # add layer to darch@layers 92 | darch@layers[[i]] <- layer 93 | } 94 | 95 | # set up the slots necessary for predict.DArch function 96 | darch@dataSet <- createDataSet(matrix(0, 1, layer_structure[[1]]), NULL) 97 | darch@ff <- F 98 | 99 | # set up the execution function 100 | darch@executeFunction <- run_dnn 101 | } else { 102 | darch <- NULL 103 | flog.fatal("Illegal layer structures!") 104 | } 105 | return (darch) 106 | } 107 | -------------------------------------------------------------------------------- /R/rectified_linear_unit_function.R: -------------------------------------------------------------------------------- 1 | #' Rectified Linear Unit Function 2 | #' 3 | #' This functions calculates the value and the derivative of a rectified linear 4 | #' function. Reference Vinod Nair, Geoffrey Hinton, Rectified Linear Units 5 | #' Improve Restricted Boltzmann Machines 6 | #' 7 | #' @param data the data matrix for calculation 8 | #' @param weights the connection (weight matrix/filter) and the bias 9 | #' @return A list of function values and derivatives 10 | #' @export 11 | 12 | rectified_linear_unit_function <- function(data, weights) { 13 | ret <- list() 14 | a <- data %*% weights 15 | x <- a 16 | x[a<0] <- 0 17 | derivatives <- matrix(1, dim(a)[[1]], dim(a)[[2]]) 18 | derivatives[a<0] <- 0 19 | ret[[1]] <- x 20 | ret[[2]] <- derivatives 21 | return (ret) 22 | } 23 | -------------------------------------------------------------------------------- /R/rsq.R: -------------------------------------------------------------------------------- 1 | #' Calculate the RSQ of a regression model 2 | #' Utilitiy function that calcualtes RSQ of a model. It measures the goodness-of- 3 | #' fit of a regression model. 4 | #' 5 | #' @param x Regression Model 6 | #' @param ... Additional Input 7 | #' 8 | #' @import futile.logger 9 | #' @export 10 | 11 | rsq <- function(x, ...) { 12 | UseMethod("rsq", x) 13 | } 14 | 15 | #' Utilitiy function that calcualtes RSQ of a DArch instance 16 | #' 17 | #' Calcualte a regression model's RSQ of a deep neural network 18 | #' 19 | #' @param x DArch Model 20 | #' @param input Input data 21 | #' @param target Target data 22 | #' @param ... addtional inputs 23 | #' @import futile.logger 24 | #' @importFrom stats predict 25 | #' @importFrom graphics plot 26 | #' @export 27 | 28 | rsq.DArch <- function(x, 29 | input = x@dataSet@data, 30 | target = x@dataSet@targets, ...) { 31 | y <- target 32 | pred <- predict(x, newdata = input) 33 | plot(y, pred, xlab = "target", ylab = "prediction") 34 | RSQ <- 1 - sum((pred-y)^2)/sum((y-mean(y))^2) 35 | flog.info(paste0("RSQ = ", RSQ)) 36 | } 37 | 38 | #' Utilitiy function that calcualtes RSQ of a linear model 39 | #' 40 | #' Calcualte a regression model's RSQ 41 | #' 42 | #' @param x linear Model 43 | #' @param input Input data 44 | #' @param target Target data 45 | #' @param ... additional inputs 46 | #' @importFrom stats predict 47 | #' @importFrom graphics plot 48 | #' @import futile.logger 49 | #' @export 50 | 51 | rsq.lm <- function(x, input, target, ...) { 52 | y <- target 53 | pred <- predict(x, newdata = data.frame(input)) 54 | plot(y, pred) 55 | plot(y, pred, xlab = "target", ylab = "prediction") 56 | RSQ <- 1 - sum((pred-y)^2)/sum((y-mean(y))^2) 57 | flog.info(paste0("RSQ = ", RSQ)) 58 | } 59 | -------------------------------------------------------------------------------- /R/run_dnn.R: -------------------------------------------------------------------------------- 1 | #' Execution function that runs in the batch normalization mode 2 | #' 3 | #' This function calcualtes the output of a deep neural network with input data 4 | #' 5 | #' @param darch a darch instance 6 | #' @param data input data 7 | 8 | 9 | run_dnn <- function(darch, data){ 10 | darch@executeOutput <- list() 11 | layers <- darch@layers 12 | # If there's only one row of input data, convert vector to matrix 13 | # TODO make sure that data is matrix before passing it to this function 14 | if(is.null(dim(data))){ 15 | data <- t(as.matrix(data)) 16 | } 17 | 18 | numRows <- dim(data)[1] 19 | 20 | output <- list() 21 | derivative <- list() 22 | 23 | for(i in 1:length(layers)){ 24 | ret <- layers[[i]][[1]] 25 | dimV_input <- dim(ret)[[1]] - 1 26 | dimV_output <- dim(ret)[[2]] 27 | 28 | weight <- ret[1:(dimV_input), ] 29 | beta <- verticalize(ret[(dimV_input + 1), ], numRows) 30 | 31 | gamma <- darch@layers[[i]][[4]] 32 | gamma <- verticalize(gamma, numRows) 33 | 34 | x <- data %*% weight 35 | 36 | mu <- verticalize(layers[[i]][[5]], numRows) 37 | 38 | sigma_2 <- verticalize(layers[[i]][[6]], numRows) 39 | 40 | ret <- batch_normalization(x, gamma, beta, mu, sigma_2) 41 | 42 | y <- ret[[4]] 43 | 44 | unit_matrix <- diag(dim(y)[[2]]) 45 | ret <- layers[[i]][[2]](y, unit_matrix) 46 | data <- ret[[1]] 47 | output[[i]] <- ret[[1]] 48 | derivative[[i]] <- ret[[2]] 49 | } 50 | 51 | darch@executeOutput <- output 52 | return(darch) 53 | } 54 | -------------------------------------------------------------------------------- /R/train_dnn.R: -------------------------------------------------------------------------------- 1 | #' Train a deep neural network 2 | #' 3 | #' This function trains a deep neural network 4 | #' 5 | #' @param darch a darch instance 6 | #' @param input input data for training 7 | #' @param target target data for training 8 | #' @param input_valid input data for validation 9 | #' @param target_valid target data for validation 10 | #' @param ... additional input 11 | #' @param learn_rate_weight learning rate for the weight matrices 12 | #' @param learn_rate_bias learning rate for the biases 13 | #' @param learn_rate_gamma learning rate for the gamma 14 | #' @param batch_size batch size during training 15 | #' @param batch_normalization logical value that determines whether to turn on 16 | #' batch normalization during training. Recommneded value: T 17 | #' @param dropout_input dropout ratio at input layer. Recommneded value: 0.2 18 | #' @param dropout_hidden dropout ratio at hidden layers. Recommended value: 0.5 19 | #' @param momentum_initial momentum ratio during training. Recommended value: 0.6 20 | #' @param momentum_final final momentum during training. Recommended value: 0.9 21 | #' @param momentum_switch afther which epoch the final momentum ratio is used during training 22 | #' @param num_epochs number of iterations of the training 23 | #' @param error_function error function to minimize during training 24 | #' @param report_classification_error logical value. T to report the classification error 25 | #' during training 26 | #' 27 | #' @importFrom darch createDataSet validateDataSet getEpochs 28 | #' @importFrom stats predict 29 | #' 30 | #' @examples 31 | #' # Example of Regression 32 | #' 33 | #' input <- matrix(runif(1000), 500, 2) 34 | #' input_valid <- matrix(runif(100), 50, 2) 35 | #' target <- rowSums(input + input^2) 36 | #' target_valid <- rowSums(input_valid + input_valid^2) 37 | #' # create a new deep neural network for classificaiton 38 | #' dnn_regression <- new_dnn( 39 | #' c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 40 | #' # The first element is the number of input variables. 41 | #' # The last element is the number of output variables. 42 | #' hidden_layer_default = rectified_linear_unit_function, 43 | #' # for hidden layers, use rectified_linear_unit_function 44 | #' output_layer_default = linearUnitDerivative 45 | #' # for regression, use linearUnitDerivative function 46 | #') 47 | #' 48 | #' dnn_regression <- train_dnn( 49 | #' dnn_regression, 50 | #' 51 | #' # training data 52 | #' input, # input variable for training 53 | #' target, # target variable for training 54 | #' input_valid, # input variable for validation 55 | #' target_valid, # target variable for validation 56 | #' 57 | #' # training parameters 58 | #' learn_rate_weight = exp(-8) * 10, 59 | #' # learning rate for weights, higher if use dropout 60 | #' learn_rate_bias = exp(-8) * 10, 61 | #' # learning rate for biases, hihger if use dropout 62 | #' learn_rate_gamma = exp(-8) * 10, 63 | #' # learning rate for the gamma factor used 64 | #' batch_size = 10, 65 | #' # number of observations in a batch during training. 66 | #' # Higher for faster training. Lower for faster convergence 67 | #' batch_normalization = TRUE, 68 | #' # logical value, T to use batch normalization 69 | #' dropout_input = 0.2, 70 | #' # dropout ratio in input. 71 | #' dropout_hidden = 0.5, 72 | #' # dropout ratio in hidden layers 73 | #' momentum_initial = 0.6, 74 | #' # initial momentum in Stochastic Gradient Descent training 75 | #' momentum_final = 0.9, 76 | #' # final momentum in Stochastic Gradient Descent training 77 | #' momentum_switch = 100, 78 | #' # after which the momentum is switched from initial to final momentum 79 | #' num_epochs = 5, 80 | #' # number of iterations in training 81 | #' # increase numbef of epochs to 100 for better model fit 82 | #' 83 | #' 84 | #' # Error function 85 | #' error_function = meanSquareErr, 86 | #' # error function to minimize during training. For regression, use meanSquareErr 87 | #' report_classification_error = FALSE 88 | #' # whether to print classification error during training 89 | #') 90 | #' 91 | #' 92 | #' # the prediciton by dnn_regression 93 | #' pred <- predict(dnn_regression) 94 | #' 95 | #' # calculate the r-squared of the prediciton 96 | #' rsq(dnn_regression) 97 | #' 98 | #' 99 | #' # calcualte the r-squared of the prediciton in validation 100 | #' rsq(dnn_regression, input = input_valid, target = target_valid) 101 | #' 102 | #' # print the layer weights 103 | #' # this function can print heatmap, histogram, or a surface 104 | #' print_weight(dnn_regression, 1, type = "heatmap") 105 | #' 106 | #' print_weight(dnn_regression, 2, type = "surface") 107 | #' 108 | #' print_weight(dnn_regression, 3, type = "histogram") 109 | #' 110 | #' 111 | #' # Examples of classification 112 | #' 113 | #'input <- matrix(runif(1000), 500, 2) 114 | #'input_valid <- matrix(runif(100), 50, 2) 115 | #'target <- (cos(rowSums(input + input^2)) > 0.5) * 1 116 | #'target_valid <- (cos(rowSums(input_valid + input_valid^2)) > 0.5) * 1 117 | #' 118 | #'# create a new deep neural network for classificaiton 119 | #'dnn_classification <- new_dnn( 120 | #' c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 121 | #' # The first element is the number of input variables. 122 | #' # The last element is the number of output variables. 123 | #' hidden_layer_default = rectified_linear_unit_function, 124 | #' # for hidden layers, use rectified_linear_unit_function 125 | #' output_layer_default = sigmoidUnitDerivative 126 | #' # for classification, use sigmoidUnitDerivative function 127 | #') 128 | #' 129 | #'dnn_classification <- train_dnn( 130 | #' dnn_classification, 131 | #' 132 | #' # training data 133 | #' input, # input variable for training 134 | #' target, # target variable for training 135 | #' input_valid, # input variable for validation 136 | #' target_valid, # target variable for validation 137 | #' 138 | #' # training parameters 139 | #' learn_rate_weight = exp(-8) * 10, 140 | #' # learning rate for weights, higher if use dropout 141 | #' learn_rate_bias = exp(-8) * 10, 142 | #' # learning rate for biases, hihger if use dropout 143 | #' learn_rate_gamma = exp(-8) * 10, 144 | #' # learning rate for the gamma factor used 145 | #' batch_size = 10, 146 | #' # number of observations in a batch during training. 147 | #' # Higher for faster training. Lower for faster convergence 148 | #' batch_normalization = TRUE, 149 | #' # logical value, T to use batch normalization 150 | #' dropout_input = 0.2, 151 | #' # dropout ratio in input. 152 | #' dropout_hidden = 0.5, 153 | #' # dropout ratio in hidden layers 154 | #' momentum_initial = 0.6, 155 | #' # initial momentum in Stochastic Gradient Descent training 156 | #' momentum_final = 0.9, 157 | #' # final momentum in Stochastic Gradient Descent training 158 | #' momentum_switch = 100, 159 | #' # after which the momentum is switched from initial to final momentum 160 | #' num_epochs = 5, 161 | #' # number of iterations in training 162 | #' # increase num_epochs to 100 for better model fit 163 | #' 164 | #' # Error function 165 | #' error_function = crossEntropyErr, 166 | #' # error function to minimize during training. For regression, use crossEntropyErr 167 | #' report_classification_error = TRUE 168 | #' # whether to print classification error during training 169 | #') 170 | #' 171 | #'# the prediciton by dnn_regression 172 | #'pred <- predict(dnn_classification) 173 | #' 174 | #'hist(pred) 175 | #' 176 | #'# calculate the r-squared of the prediciton 177 | #'AR(dnn_classification) 178 | #' 179 | #'# calcualte the r-squared of the prediciton in validation 180 | #'AR(dnn_classification, input = input_valid, target = target_valid) 181 | #' 182 | #' 183 | #' @return a trained deep neural network (darch instance) 184 | #' @export 185 | #' 186 | 187 | train_dnn <- function(darch, # darch instance to train 188 | input, # input data matrix 189 | target, # target data matrix 190 | input_valid = NULL, # validation data input 191 | target_valid = NULL, # validation data target 192 | ..., 193 | # training parameters 194 | learn_rate_weight = exp(-10), 195 | learn_rate_bias = exp(-10), 196 | learn_rate_gamma = 1, 197 | batch_size = 10, 198 | batch_normalization = TRUE, 199 | dropout_input = 0, 200 | dropout_hidden = 0, 201 | momentum_initial = .6, 202 | momentum_final = .9, 203 | momentum_switch = 100, 204 | num_epochs = 0, 205 | 206 | # target types 207 | error_function = meanSquareErr, 208 | report_classification_error = FALSE 209 | ) { 210 | # 1. set up the inputs 211 | timeStart <- Sys.time() 212 | dataSet <- createDataSet(input, target) 213 | numObs <- nrow(input) 214 | darch@dataSet <- dataSet # add the training dataset to the darch instance 215 | 216 | # set the stats of darch 217 | if (is.null(darch@stats) || length(darch@stats) < 1){ 218 | stats <- 219 | list("dataErrors"=list("raw"=c(),"class"=c()), 220 | "validErrors"=list("raw"=c(),"class"=c()), 221 | "times"= 0) 222 | 223 | darch@stats <- stats 224 | } 225 | 226 | trainData <- as.matrix(input) 227 | trainTarget <- as.matrix(target) 228 | 229 | if(!is.null(input_valid)) { 230 | validData <- as.matrix(input_valid) 231 | validTarget <- as.matrix(target_valid) 232 | } else { 233 | validData <- NULL 234 | validTarget <- NULL 235 | } 236 | 237 | if (!validateDataSet(dataSet, darch)) 238 | { 239 | stop("Invalid dataset provided.") 240 | } 241 | 242 | if (!is.null(validData)) { 243 | if (dim(trainData)[[2]] != dim(validData)[[2]] | 244 | dim(as.matrix(trainTarget))[[2]] != dim(as.matrix(validTarget))[[2]]) { 245 | stop("Invalid validation dataset.") 246 | } 247 | } 248 | 249 | # 2. train the neural net 250 | flog.info("Start training the neural net.") 251 | start_epoch <- getEpochs(darch) 252 | flog.info(paste("The neural net has been trained ", start_epoch, " times.")) 253 | 254 | for(epoch in (1 + start_epoch):(num_epochs + start_epoch)) { 255 | flog.info(paste("Epoch numebr: ", epoch)) 256 | 257 | # make the batches 258 | batch <- make_batches(dim(trainData)[[1]], batch_size) 259 | num_batches <- max(batch[, 2]) 260 | 261 | for(i in 1:num_batches) { 262 | # Generate a new dropout mask for each batch 263 | darch <- generateDropoutMasksForDarch(darch, dropout_input, dropout_hidden) 264 | # Train the neural net 265 | darch <- finetune_SGD_bn(darch, 266 | trainData[batch[,2] == i,], 267 | trainTarget[batch[,2] == i,], 268 | learn_rate_weight = learn_rate_weight, 269 | learn_rate_bias = learn_rate_bias, 270 | learn_rate_gamma = learn_rate_gamma, 271 | errorFunc = error_function, 272 | with_BN = batch_normalization 273 | ) 274 | } 275 | 276 | # calculates the new mu and sigma of darch 277 | if (batch_normalization) { 278 | darch <- calcualte_population_mu_sigma(darch, trainData) 279 | } else { 280 | darch <- reset_population_mu_sigma(darch) 281 | } 282 | 283 | # calcualtes the error 284 | 285 | # training errors 286 | pred_train <- predict(darch, newdata = trainData) 287 | error_train <- error_function(pred_train, trainTarget) 288 | flog.info(paste(error_train[[3]], "in training: ", error_train[[1]])) 289 | darch@stats$dataErrors$raw <- c(darch@stats$dataErrors$raw, error_train[[1]]) 290 | 291 | if(report_classification_error) { 292 | ce_train <- classification_error(pred_train, trainTarget) 293 | flog.info(paste(ce_train[[2]], "in training: ", ce_train[[1]])) 294 | darch@stats$dataErrors$class <- c(darch@stats$dataErrors$class, ce_train[[1]]) 295 | } 296 | 297 | # validation errors 298 | if(!is.null(validData)) { 299 | pred_valid <- predict(darch, newdata = validData) 300 | error_valid <- error_function(pred_valid, validTarget) 301 | flog.info(paste(error_valid[[3]], "in validation:", error_valid[[1]])) 302 | darch@stats$validErrors$raw <- c(darch@stats$validErrors$raw, error_valid[[1]]) 303 | 304 | if(report_classification_error) { 305 | ce_valid <- classification_error(pred_valid, validTarget) 306 | flog.info(paste(ce_valid[[2]], "in validation:", ce_valid[[1]])) 307 | darch@stats$validErrors$class <- c(darch@stats$validErrors$class, ce_valid[[1]]) 308 | } 309 | } 310 | # increase the epoch by 1 311 | darch@epochs <- darch@epochs + 1 312 | } 313 | flog.info("End of the training") 314 | 315 | # 3. Save the training statistics 316 | if (is.null(darch@stats[["times"]])) { 317 | darch@stats[["times"]] <- 0 318 | } 319 | darch@stats[["times"]] <- darch@stats[["times"]] + as.double(Sys.time() - timeStart, "secs") 320 | 321 | return (darch) 322 | } 323 | 324 | # Helper function for train_dnn 325 | 326 | make_batches <- function(numObs, batchsize) { 327 | order <- sample(1:numObs, numObs) 328 | group <- c() 329 | num_batches <- ceiling(numObs / batchsize) 330 | for (i in 1:numObs) { 331 | group <- c(group, (i %% num_batches + 1)) 332 | } 333 | batch <- cbind(order, group) 334 | batch <- batch[order(order), ] 335 | return (batch) 336 | } 337 | -------------------------------------------------------------------------------- /R/util.R: -------------------------------------------------------------------------------- 1 | #' Prints out the weight of a deep neural network 2 | #' 3 | #' This function prints out the weight in a heat map, 3D surface, or histogram 4 | #' 5 | #' @param darch DArch instance 6 | #' @param num_of_layer the number of the layer to print 7 | #' @param show_derivative T to show the weight value. F to show the percentage 8 | #' weight change in the finetuning stage. This helps spot the network saturation problem. 9 | #' @param type type of the graph. It supports "heatmap", "surface", and "histogram" 10 | #' 11 | #' @importFrom darch getLayer 12 | #' 13 | #' @examples 14 | #' # Example of Regression 15 | #' 16 | #' input <- matrix(runif(1000), 500, 2) 17 | #' input_valid <- matrix(runif(100), 50, 2) 18 | #' target <- rowSums(input + input^2) 19 | #' target_valid <- rowSums(input_valid + input_valid^2) 20 | #' # create a new deep neural network for classificaiton 21 | #' dnn_regression <- new_dnn( 22 | #' c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 23 | #' # The first element is the number of input variables. 24 | #' # The last element is the number of output variables. 25 | #' hidden_layer_default = rectified_linear_unit_function, 26 | #' # for hidden layers, use rectified_linear_unit_function 27 | #' output_layer_default = linearUnitDerivative 28 | #' # for regression, use linearUnitDerivative function 29 | #') 30 | #' 31 | #' # print the layer weights 32 | #' # this function can print heatmap, histogram, or a surface 33 | #' print_weight(dnn_regression, 1, type = "heatmap") 34 | #' 35 | #' print_weight(dnn_regression, 2, type = "surface") 36 | #' 37 | #' print_weight(dnn_regression, 3, type = "histogram") 38 | #' 39 | #' 40 | #' @export 41 | 42 | print_weight <- function(darch, num_of_layer, show_derivative = F, type = "heatmap") { 43 | weight <- getLayer(darch, num_of_layer)[[1]] 44 | weight_change <- getLayer(darch, num_of_layer)[[3]] / weight[1:(dim(weight)[[1]]-1), ] 45 | 46 | if(type == "histogram") { 47 | if(!show_derivative) { 48 | plotly::plot_ly(x = c(weight), type = type) 49 | } else { 50 | plotly::plot_ly(x = c(weight_change), type = type) 51 | } 52 | } else { 53 | if(!show_derivative) { 54 | plotly::plot_ly(z = weight, type = type, colorscale = "hot") 55 | } else { 56 | plotly::plot_ly(z = weight_change, type = type, colorscale = "hot") 57 | } 58 | } 59 | } 60 | 61 | 62 | 63 | #' Calculates the outer product of two matricies 64 | #' 65 | #' Calcualtes the outer product of two matrices 66 | #' 67 | #' @param data the date matrix 68 | #' @param weight the weight matrix 69 | #' 70 | 71 | matMult <- function(data, weight) { 72 | return(data %*% weight) 73 | } 74 | 75 | 76 | 77 | 78 | #' Data proprosess function that covnerts a categorical input to continuous input or 79 | #' vectorize it 80 | #' 81 | #' Proprosess a data set. It converts categorical data into binary variables 82 | #' if it is unordered or continuous variable from 0 to 1 if it is ordinal 83 | #' @param x input variable 84 | #' @param type ordinal or other 85 | #' @param ordered_list the rank ordering of an ordinal variable. Users are expected to 86 | #' provide a complete list of the rank ordering. Otherwise, a default rank ordering 87 | #' will be used. 88 | #' @param var_name the name of the input variable. This is used to to create vectorized 89 | #' input variables 90 | #' @param ... other inputs 91 | #' 92 | #' @export 93 | 94 | convert_categorical <- function(x, 95 | type = "ordinal", 96 | ordered_list = list(), 97 | var_name = "var", 98 | ...) { 99 | 100 | if(type == "ordinal") { 101 | unique_x <- unique(x) 102 | 103 | if(is.null(ordered_list)) { 104 | ordered_list <- sort(unique_x) # list_x has all unique values in vector x 105 | } 106 | 107 | if(any(!(unique_x %in% ordered_list))) { 108 | ordered_list <- sort(unique_x) # list_x has all unique values in vector x 109 | } 110 | 111 | num_categories <- length(ordered_list) 112 | mapped_value <- c(0:(num_categories - 1))/(num_categories - 1) 113 | ret <- mapped_value[match(x, ordered_list)] 114 | } else { 115 | unique_x <- unique(x) 116 | ordered_list <- sort(unique_x) 117 | num_categories <- length(ordered_list) 118 | mapped_value <- c(1:num_categories) 119 | numeric_x <- mapped_value[match(x, ordered_list)] 120 | vectorized_x <- matrix(0, nrow = length(x), ncol = length(unique_x)) 121 | for( i in 1:length(x)) { 122 | vectorized_x[i, numeric_x[i]] <- 1 123 | } 124 | 125 | ret <- data.frame(vectorized_x) 126 | colnames(ret) <- paste0(var_name, " = ", ordered_list) 127 | } 128 | 129 | return(ret) 130 | } 131 | 132 | #' Creates a matrix by repeating a row vector N times 133 | #' 134 | #' helper function that repeat a row vector N times 135 | #' 136 | #' @param vector the row vector 137 | #' @param N number of rows in the output matirx 138 | #' @return a matrix 139 | 140 | verticalize <- function(vector, N) { 141 | return(matrix(rep(vector, N), N, byrow = T)) 142 | } 143 | 144 | 145 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | 8 | 9 | ```{r, echo = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>", 13 | fig.path = "README-" 14 | ) 15 | ``` 16 | 17 | deeplearning 18 | ===== 19 | 20 | #### Create and train deep neural network of ReLU type with SGD and batch normalization 21 | 22 | ### About 23 | The deeplearning package is an R package that implements deep neural networks in R. It employes Rectifier Linear Unit functions as its building blocks and trains a neural network with stochastic gradient descent method with batch normalization to speed up the training and promote regularization. Neural networks of such kind of architecture and training methods are state of the art and even achieved suplassing human-level performance in ImageNet competition. The deeplearning package is inspired by another R package darch which implements layerwise Restricted Boltzmann Machine pretraining and dropout and uses its class DArch as the default class. 24 | 25 | ### Installtion 26 | 27 | Install deeplearning from CRAN 28 | ``` 29 | install.packages("deeplearning") 30 | ``` 31 | 32 | Or install it from github 33 | ``` 34 | devtools::install_github("rz1988/deeplearning") 35 | 36 | ``` 37 | 38 | ### Use deeplearning 39 | 40 | Using the deeplearning package is designed to be easy and fun. It only takes two steps to run your first neural network. 41 | 42 | In step one, the user will create a new neural network. You will need to specify the strucutre of the neural network which are the number of layers and neurons in the network and the type of activation functions. The default activation is rectifier linear unit function for the hidden layers but you can also use other types of activation such as sigmoidal function or write your own activation function. 43 | 44 | In step two, the user will train the neural network with a training input and a traing target. There are a number of other training parameters. For how to choose these training parameters please refer to https://github.com/rz1988/deeplearning. 45 | 46 | ### Examples 47 | 48 | #### Train a neural networ for regression 49 | 50 | ``` 51 | input <- matrix(runif(1000), 500, 2) 52 | input_valid <- matrix(runif(100), 50, 2) 53 | target <- rowSums(input + input^2) 54 | target_valid <- rowSums(input_valid + input_valid^2) 55 | 56 | 57 | # create a new deep neural network for classificaiton 58 | dnn_regression <- new_dnn( 59 | c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 60 | # The first element is the number of input variables. 61 | # The last element is the number of output variables. 62 | hidden_layer_default = rectified_linear_unit_function, 63 | # for hidden layers, use rectified_linear_unit_function 64 | output_layer_default = linearUnitDerivative # for regression, use linearUnitDerivative function 65 | ) 66 | 67 | dnn_regression <- train_dnn( 68 | dnn_regression, 69 | 70 | # training data 71 | input, # input variable for training 72 | target, # target variable for training 73 | input_valid, # input variable for validation 74 | target_valid, # target variable for validation 75 | 76 | # training parameters 77 | learn_rate_weight = exp(-8) * 10, # learning rate for weights, higher if use dropout 78 | learn_rate_bias = exp(-8) * 10, # learning rate for biases, hihger if use dropout 79 | learn_rate_gamma = exp(-8) * 10, # learning rate for the gamma factor used 80 | batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence 81 | batch_normalization = T, # logical value, T to use batch normalization 82 | dropout_input = 0.2, # dropout ratio in input. 83 | dropout_hidden = 0.5, # dropout ratio in hidden layers 84 | momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training 85 | momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training 86 | momentum_switch = 100, # after which the momentum is switched from initial to final momentum 87 | num_epochs = 300, # number of iterations in training 88 | 89 | # Error function 90 | error_function = meanSquareErr, # error function to minimize during training. For regression, use meanSquareErr 91 | report_classification_error = F # whether to print classification error during training 92 | ) 93 | 94 | # the prediciton by dnn_regression 95 | pred <- predict(dnn_regression) 96 | 97 | # calculate the r-squared of the prediciton 98 | rsq(dnn_regression) 99 | 100 | # calcualte the r-squared of the prediciton in validation 101 | rsq(dnn_regression, input = input_valid, target = target_valid) 102 | ``` 103 | 104 | #### Train a neural network for classification 105 | 106 | ``` 107 | 108 | input <- matrix(runif(1000), 500, 2) 109 | input_valid <- matrix(runif(100), 50, 2) 110 | target <- (cos(rowSums(input + input^2)) > 0.5) * 1 111 | target_valid <- (cos(rowSums(input_valid + input_valid^2)) > 0.5) * 1 112 | 113 | # create a new deep neural network for classificaiton 114 | dnn_classification <- new_dnn( 115 | c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 116 | # The first element is the number of input variables. 117 | # The last element is the number of output variables. 118 | hidden_layer_default = rectified_linear_unit_function, # for hidden layers, use rectified_linear_unit_function 119 | output_layer_default = sigmoidUnitDerivative # for classification, use sigmoidUnitDerivative function 120 | ) 121 | 122 | dnn_classification <- train_dnn( 123 | dnn_classification, 124 | 125 | # training data 126 | input, # input variable for training 127 | target, # target variable for training 128 | input_valid, # input variable for validation 129 | target_valid, # target variable for validation 130 | 131 | # training parameters 132 | learn_rate_weight = exp(-8) * 10, # learning rate for weights, higher if use dropout 133 | learn_rate_bias = exp(-8) * 10, # learning rate for biases, hihger if use dropout 134 | learn_rate_gamma = exp(-8) * 10, # learning rate for the gamma factor used 135 | batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence 136 | batch_normalization = T, # logical value, T to use batch normalization 137 | dropout_input = 0.2, # dropout ratio in input. 138 | dropout_hidden = 0.5, # dropout ratio in hidden layers 139 | momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training 140 | momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training 141 | momentum_switch = 100, # after which the momentum is switched from initial to final momentum 142 | num_epochs = 100, # number of iterations in training 143 | 144 | # Error function 145 | error_function = crossEntropyErr, # error function to minimize during training. For regression, use crossEntropyErr 146 | report_classification_error = T # whether to print classification error during training 147 | ) 148 | 149 | # the prediciton by dnn_regression 150 | pred <- predict(dnn_classification) 151 | 152 | hist(pred) 153 | 154 | # calculate the r-squared of the prediciton 155 | AR(dnn_classification) 156 | 157 | # calcualte the r-squared of the prediciton in validation 158 | AR(dnn_classification, input = input_valid, target = target_valid) 159 | 160 | # print the layer weights 161 | # this function can print heatmap, histogram, or a surface 162 | print_weight(dnn_regression, 1, type = "heatmap") 163 | 164 | print_weight(dnn_regression, 2, type = "surface") 165 | 166 | print_weight(dnn_regression, 3, type = "histogram") 167 | ``` 168 | 169 | #### References 170 | Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, Ruslan Salakhutdinov, 2013, Dropout: A Simple Way to Prevent Neural Networks from Overfitting, Journal of Machine Learning Research 15 (2014) 1929-1958 171 | 172 | Sergey Ioffe, Christian Szegedy, 2015, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift, Proceedings of the 32 nd International Conference on Machine Learning, Lille, France, 2015. 173 | 174 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun, 2015, Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification, arXiv 175 | 176 | X. Glorot, A. Bordes, and Y. Bengio, 2011,Deep sparse rectifier networks. In Proceedings of the 14th International Conference on Artificial Intelligence and Statistics, pages 315–323 177 | 178 | 179 | Drees, Martin (2013). "Implementierung und Analyse von tiefen Architekturen 180 | in R". German. Master's thesis. Fachhochschule Dortmund. 181 | 182 | Rueckert, Johannes (2015). "Extending the Darch library for deep 183 | architectures". Project thesis. Fachhochschule Dortmund. 184 | URL: [saviola.de](http://static.saviola.de/publications/rueckert_2015.pdf) 185 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | deeplearning 3 | ============ 4 | 5 | #### Create and train deep neural network of ReLU type with SGD and batch normalization 6 | 7 | ### About 8 | 9 | The deeplearning package is an R package that implements deep neural networks in R. It employes Rectifier Linear Unit functions as its building blocks and trains a neural network with stochastic gradient descent method with batch normalization to speed up the training and promote regularization. Neural networks of such kind of architecture and training methods are state of the art and even achieved suplassing human-level performance in ImageNet competition. The deeplearning package is inspired by another R package darch which implements layerwise Restricted Boltzmann Machine pretraining and dropout and uses its class DArch as the default class. 10 | 11 | ### Installtion 12 | 13 | Install deeplearning from CRAN 14 | 15 | install.packages("deeplearning") 16 | 17 | Or install it from github 18 | 19 | devtools::install_github("rz1988/deeplearning") 20 | 21 | ### Use deeplearning 22 | 23 | Using the deeplearning package is designed to be easy and fun. It only takes two steps to run your first neural network. 24 | 25 | In step one, the user will create a new neural network. You will need to specify the strucutre of the neural network which are the number of layers and neurons in the network and the type of activation functions. The default activation is rectifier linear unit function for the hidden layers but you can also use other types of activation such as sigmoidal function or write your own activation function. 26 | 27 | In step two, the user will train the neural network with a training input and a traing target. There are a number of other training parameters. For how to choose these training parameters please refer to . 28 | 29 | ### Examples 30 | 31 | #### Train a neural networ for regression 32 | 33 | input <- matrix(runif(1000), 500, 2) 34 | input_valid <- matrix(runif(100), 50, 2) 35 | target <- rowSums(input + input^2) 36 | target_valid <- rowSums(input_valid + input_valid^2) 37 | 38 | 39 | # create a new deep neural network for classificaiton 40 | dnn_regression <- new_dnn( 41 | c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 42 | # The first element is the number of input variables. 43 | # The last element is the number of output variables. 44 | hidden_layer_default = rectified_linear_unit_function, 45 | # for hidden layers, use rectified_linear_unit_function 46 | output_layer_default = linearUnitDerivative # for regression, use linearUnitDerivative function 47 | ) 48 | 49 | dnn_regression <- train_dnn( 50 | dnn_regression, 51 | 52 | # training data 53 | input, # input variable for training 54 | target, # target variable for training 55 | input_valid, # input variable for validation 56 | target_valid, # target variable for validation 57 | 58 | # training parameters 59 | learn_rate_weight = exp(-8) * 10, # learning rate for weights, higher if use dropout 60 | learn_rate_bias = exp(-8) * 10, # learning rate for biases, hihger if use dropout 61 | learn_rate_gamma = exp(-8) * 10, # learning rate for the gamma factor used 62 | batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence 63 | batch_normalization = T, # logical value, T to use batch normalization 64 | dropout_input = 0.2, # dropout ratio in input. 65 | dropout_hidden = 0.5, # dropout ratio in hidden layers 66 | momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training 67 | momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training 68 | momentum_switch = 100, # after which the momentum is switched from initial to final momentum 69 | num_epochs = 300, # number of iterations in training 70 | 71 | # Error function 72 | error_function = meanSquareErr, # error function to minimize during training. For regression, use meanSquareErr 73 | report_classification_error = F # whether to print classification error during training 74 | ) 75 | 76 | # the prediciton by dnn_regression 77 | pred <- predict(dnn_regression) 78 | 79 | # calculate the r-squared of the prediciton 80 | rsq(dnn_regression) 81 | 82 | # calcualte the r-squared of the prediciton in validation 83 | rsq(dnn_regression, input = input_valid, target = target_valid) 84 | 85 | #### Train a neural network for classification 86 | 87 | 88 | input <- matrix(runif(1000), 500, 2) 89 | input_valid <- matrix(runif(100), 50, 2) 90 | target <- (cos(rowSums(input + input^2)) > 0.5) * 1 91 | target_valid <- (cos(rowSums(input_valid + input_valid^2)) > 0.5) * 1 92 | 93 | # create a new deep neural network for classificaiton 94 | dnn_classification <- new_dnn( 95 | c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 96 | # The first element is the number of input variables. 97 | # The last element is the number of output variables. 98 | hidden_layer_default = rectified_linear_unit_function, # for hidden layers, use rectified_linear_unit_function 99 | output_layer_default = sigmoidUnitDerivative # for classification, use sigmoidUnitDerivative function 100 | ) 101 | 102 | dnn_classification <- train_dnn( 103 | dnn_classification, 104 | 105 | # training data 106 | input, # input variable for training 107 | target, # target variable for training 108 | input_valid, # input variable for validation 109 | target_valid, # target variable for validation 110 | 111 | # training parameters 112 | learn_rate_weight = exp(-8) * 10, # learning rate for weights, higher if use dropout 113 | learn_rate_bias = exp(-8) * 10, # learning rate for biases, hihger if use dropout 114 | learn_rate_gamma = exp(-8) * 10, # learning rate for the gamma factor used 115 | batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence 116 | batch_normalization = T, # logical value, T to use batch normalization 117 | dropout_input = 0.2, # dropout ratio in input. 118 | dropout_hidden = 0.5, # dropout ratio in hidden layers 119 | momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training 120 | momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training 121 | momentum_switch = 100, # after which the momentum is switched from initial to final momentum 122 | num_epochs = 100, # number of iterations in training 123 | 124 | # Error function 125 | error_function = crossEntropyErr, # error function to minimize during training. For regression, use crossEntropyErr 126 | report_classification_error = T # whether to print classification error during training 127 | ) 128 | 129 | # the prediciton by dnn_regression 130 | pred <- predict(dnn_classification) 131 | 132 | hist(pred) 133 | 134 | # calculate the r-squared of the prediciton 135 | AR(dnn_classification) 136 | 137 | # calcualte the r-squared of the prediciton in validation 138 | AR(dnn_classification, input = input_valid, target = target_valid) 139 | 140 | # print the layer weights 141 | # this function can print heatmap, histogram, or a surface 142 | print_weight(dnn_regression, 1, type = "heatmap") 143 | 144 | print_weight(dnn_regression, 2, type = "surface") 145 | 146 | print_weight(dnn_regression, 3, type = "histogram") 147 | 148 | #### References 149 | 150 | Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, Ruslan Salakhutdinov, 2013, Dropout: A Simple Way to Prevent Neural Networks from Overfitting, Journal of Machine Learning Research 15 (2014) 1929-1958 151 | 152 | Sergey Ioffe, Christian Szegedy, 2015, Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift, Proceedings of the 32 nd International Conference on Machine Learning, Lille, France, 2015. 153 | 154 | Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun, 2015, Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification, arXiv 155 | 156 | X. Glorot, A. Bordes, and Y. Bengio, 2011,Deep sparse rectifier networks. In Proceedings of the 14th International Conference on Artificial Intelligence and Statistics, pages 315–323 157 | 158 | Drees, Martin (2013). "Implementierung und Analyse von tiefen Architekturen in R". German. Master's thesis. Fachhochschule Dortmund. 159 | 160 | Rueckert, Johannes (2015). "Extending the Darch library for deep architectures". Project thesis. Fachhochschule Dortmund. URL: [saviola.de](http://static.saviola.de/publications/rueckert_2015.pdf) 161 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Resubmission 2 | This is a resubmission. In this version I have: 3 | 4 | * Converted the DESCRIPTION title to title case. 5 | 6 | * Removed the maintainer from DESCRIPTION. Maintainer will be generated by Author@R. 7 | 8 | * More clearly identified the copyright holders in the DESCRIPTION. 9 | 10 | * Added the following to NAMESPACE file. 11 | importFrom("graphics", "plot") 12 | importFrom("methods", "new") 13 | importFrom("stats", "predict") 14 | importClassesFrom(darch,DArch) 15 | 16 | * Updated the examples so they run in less than 5 s. 17 | 18 | 19 | ## Test environments 20 | * Windows 10, R 3.2.4 21 | * ubuntu 12.04 (on travis-ci), R 3.2.4 22 | 23 | ## R CMD check results 24 | * There is no warning or note. 25 | 26 | ## Downstream dependencies 27 | * This is a new package. There is no any downstream dependency. 28 | -------------------------------------------------------------------------------- /deeplearning.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /inst/examples_classification.R: -------------------------------------------------------------------------------- 1 | # Examples of classification 2 | 3 | input <- matrix(runif(1000), 500, 2) 4 | input_valid <- matrix(runif(100), 50, 2) 5 | target <- (cos(rowSums(input + input^2)) > 0.5) * 1 6 | target_valid <- (cos(rowSums(input_valid + input_valid^2)) > 0.5) * 1 7 | 8 | # create a new deep neural network for classificaiton 9 | dnn_classification <- new_dnn( 10 | c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 11 | # The first element is the number of input variables. 12 | # The last element is the number of output variables. 13 | hidden_layer_default = rectified_linear_unit_function, # for hidden layers, use rectified_linear_unit_function 14 | output_layer_default = sigmoidUnitDerivative # for classification, use sigmoidUnitDerivative function 15 | ) 16 | 17 | dnn_classification <- train_dnn( 18 | dnn_classification, 19 | 20 | # training data 21 | input, # input variable for training 22 | target, # target variable for training 23 | input_valid, # input variable for validation 24 | target_valid, # target variable for validation 25 | 26 | # training parameters 27 | learn_rate_weight = exp(-8) * 10, # learning rate for weights, higher if use dropout 28 | learn_rate_bias = exp(-8) * 10, # learning rate for biases, hihger if use dropout 29 | learn_rate_gamma = exp(-8) * 10, # learning rate for the gamma factor used 30 | batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence 31 | batch_normalization = T, # logical value, T to use batch normalization 32 | dropout_input = 0.2, # dropout ratio in input. 33 | dropout_hidden = 0.5, # dropout ratio in hidden layers 34 | momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training 35 | momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training 36 | momentum_switch = 100, # after which the momentum is switched from initial to final momentum 37 | num_epochs = 100, # number of iterations in training 38 | 39 | # Error function 40 | error_function = crossEntropyErr, # error function to minimize during training. For regression, use crossEntropyErr 41 | report_classification_error = T # whether to print classification error during training 42 | ) 43 | 44 | # the prediciton by dnn_regression 45 | pred <- predict(dnn_classification) 46 | 47 | hist(pred) 48 | 49 | # calculate the r-squared of the prediciton 50 | AR(dnn_classification) 51 | 52 | # calcualte the r-squared of the prediciton in validation 53 | AR(dnn_classification, input = input_valid, target = target_valid) 54 | 55 | # print the layer weights 56 | # this function can print heatmap, histogram, or a surface 57 | print_weight(dnn_regression, 1, type = "heatmap") 58 | 59 | print_weight(dnn_regression, 2, type = "surface") 60 | 61 | print_weight(dnn_regression, 3, type = "histogram") 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /inst/examples_regression.R: -------------------------------------------------------------------------------- 1 | # Examples of Regression 2 | 3 | input <- matrix(runif(1000), 500, 2) 4 | input_valid <- matrix(runif(100), 50, 2) 5 | target <- rowSums(input + input^2) 6 | target_valid <- rowSums(input_valid + input_valid^2) 7 | 8 | 9 | # create a new deep neural network for classificaiton 10 | dnn_regression <- new_dnn( 11 | c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 12 | # The first element is the number of input variables. 13 | # The last element is the number of output variables. 14 | hidden_layer_default = rectified_linear_unit_function, # for hidden layers, use rectified_linear_unit_function 15 | output_layer_default = linearUnitDerivative # for regression, use linearUnitDerivative function 16 | ) 17 | 18 | dnn_regression <- train_dnn( 19 | dnn_regression, 20 | 21 | # training data 22 | input, # input variable for training 23 | target, # target variable for training 24 | input_valid, # input variable for validation 25 | target_valid, # target variable for validation 26 | 27 | # training parameters 28 | learn_rate_weight = exp(-8) * 1, # learning rate for weights, higher if use dropout 29 | learn_rate_bias = exp(-8) * 1, # learning rate for biases, hihger if use dropout 30 | learn_rate_gamma = exp(-8) * 1, # learning rate for the gamma factor used 31 | batch_size = 10, # number of observations in a batch during training. Higher for faster training. Lower for faster convergence 32 | batch_normalization = T, # logical value, T to use batch normalization 33 | dropout_input = 0.2, # dropout ratio in input. 34 | dropout_hidden = 0.5, # dropout ratio in hidden layers 35 | momentum_initial = 0.6, # initial momentum in Stochastic Gradient Descent training 36 | momentum_final = 0.9, # final momentum in Stochastic Gradient Descent training 37 | momentum_switch = 100, # after which the momentum is switched from initial to final momentum 38 | num_epochs = 100, # number of iterations in training 39 | 40 | # Error function 41 | error_function = meanSquareErr, # error function to minimize during training. For regression, use meanSquareErr 42 | report_classification_error = F # whether to print classification error during training 43 | ) 44 | 45 | # the prediciton by dnn_regression 46 | pred <- predict(dnn_regression) 47 | 48 | # calculate the r-squared of the prediciton 49 | rsq(dnn_regression) 50 | 51 | # calcualte the r-squared of the prediciton in validation 52 | rsq(dnn_regression, input = input_valid, target = target_valid) 53 | 54 | # print the layer weights 55 | # this function can print heatmap, histogram, or a surface 56 | print_weight(dnn_regression, 1, type = "heatmap") 57 | 58 | print_weight(dnn_regression, 2, type = "surface") 59 | 60 | print_weight(dnn_regression, 3, type = "histogram") 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /inst/test_ReLU.R: -------------------------------------------------------------------------------- 1 | 2 | num_dim <- 3 # N is the dimension of the input vector 3 | num_training <- 1000 4 | num_valid <- 100 5 | x <- matrix(runif(num_dim * num_training), num_training, num_dim) 6 | y <- rowSums(sin(x)+cos(x)^2) 7 | 8 | y <- sample(0:1, size = num_training, replace = T) 9 | 10 | x_valid <- matrix(runif(num_dim * num_valid), num_valid, num_dim) 11 | y_valid <- rowSums(sin(x_valid)+cos(x_valid)^2) 12 | 13 | # Run a deep neural net with sigmoidal unit function a 14 | # Pretraining RBM 15 | 16 | darch <- darch(x = x, 17 | y = y, 18 | # darch = darch, 19 | # xValid = x_valid, 20 | # yValid = y_valid, 21 | layers = c(num_dim, 50, 50, 1), 22 | rbm.numEpochs = 0, 23 | darch.bootstrap = F, 24 | # darch.layerFunctionDefault = rectified_linear_unit_function, 25 | darch.layerFunctionDefault = sigmoidUnitDerivative, 26 | darch.layerFunctions = c("3" = sigmoidUnitDerivative), 27 | darch.isBin = T, 28 | darch.isClass = T, 29 | darch.batchSize = 10, 30 | darch.numEpochs = 6 31 | ) 32 | 33 | rsq(darch) 34 | rsq(darch, x_valid, y_valid) 35 | 36 | # Run a deep neural net with ReLU without pretraining 37 | 38 | darch_ReLU <- darch(x = x, 39 | y = y, 40 | # xValid = x_valid, 41 | # yValid = y_valid, 42 | layers = c(num_dim, 50, 50, 1), 43 | rbm.numEpochs = 0, 44 | darch.bootstrap = F, 45 | darch.layerFunctionDefault = rectified_linear_unit_function, 46 | # darch.layerFunctionDefault = sigmoidUnitDerivative, 47 | darch.layerFunctions = c("3" = linearUnitDerivative), 48 | darch.isBin = F, 49 | darch.isClass = F, 50 | darch.batchSize = 10, 51 | darch.numEpochs = 10 52 | ) 53 | 54 | rsq(darch_ReLU) 55 | 56 | fprop1 <- forward_propagate(darch_ReLU, x) 57 | 58 | n_layer <- 2 59 | 60 | plotly::plot_ly(z = fprop1[[1]][[n_layer]], type = "heatmap", colorscale = "hot") 61 | 62 | plotly::plot_ly(x = c(fprop1[[1]][[n_layer]]), type = "histogram") 63 | 64 | head(fprop1[[2]][[2]]) 65 | 66 | head(getLayer(darch_ReLU,1)[[1]]) 67 | 68 | # Run a linear model 69 | data_lm <- data.frame(x, y) 70 | mod <- gam( y ~ s(X1) + s(X2) + s(X3) + s(X4) + s(X5), data = data_lm) 71 | 72 | rsq(mod, x, y) 73 | rsq(mod, x_valid, y_valid) 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /inst/test_batch_normalization_differential.R: -------------------------------------------------------------------------------- 1 | x <- matrix(runif(50), 10, 5) 2 | beta <- matrix(1, 10, 5) 3 | gamma <- matrix(.5, 10, 5) 4 | 5 | ret <- batch_normalization(x, gamma, beta) 6 | 7 | mu <- ret[[1]] 8 | sigma_2 <- ret[[2]] 9 | x_hat <- ret[[3]] 10 | y <- ret[[4]] 11 | 12 | target <- matrix(1, 10, 5) 13 | delta_y <- y - target 14 | 15 | ret <- batch_normalization_differential(delta_y, 16 | mu, 17 | sigma_2, 18 | x, 19 | x_hat, 20 | y, 21 | gamma, 22 | beta) 23 | 24 | delta_x <- ret[[1]] 25 | delta_gamma <- ret[[2]] 26 | delta_beta <- ret[[3]] 27 | delta_x_hat <- ret[[4]] 28 | delta_sigma_2 <- ret[[5]] 29 | delta_mu <- ret[[6]] 30 | 31 | write_2_csv <- function(data, file_name) { 32 | file_name <- paste0('excl/test_batch_normalization_differential/', file_name, '.csv') 33 | write.csv(data, file = file_name) 34 | } 35 | 36 | write_2_csv(x, "x.csv") 37 | write_2_csv(y, "y.csv") 38 | write_2_csv(delta_y, "delta_y.csv") 39 | write_2_csv(delta_x, "delta_x.csv") 40 | write_2_csv(delta_gamma, "delta_gamma.csv") 41 | write_2_csv(delta_beta, "detla_beta.csv") 42 | 43 | write_2_csv(delta_x_hat, "delta_x_hat") 44 | write_2_csv(delta_sigma_2, "detla_sigma_2") 45 | write_2_csv(delta_mu, "delta_mu") 46 | 47 | write_2_csv(mu, "mu") 48 | write_2_csv(sigma_2, "sigma_2") 49 | 50 | -------------------------------------------------------------------------------- /inst/test_fineTuneFunctions.R: -------------------------------------------------------------------------------- 1 | # New a DArch instance 2 | source("inst/dropout.R") 3 | 4 | 5 | darch = newDArch(c(10,20,1), batchSize = 10) 6 | setDropoutOneMaskPerEpoch(darch) = F 7 | setFineTuneFunction(darch) <- minimizeClassifier 8 | setFineTuneFunction(darch) <- backpropagation 9 | setFineTuneFunction(darch) <- backpropSGD 10 | darch = generateDropoutMasksForDarch(darch) 11 | 12 | # New a dataset 13 | input <- matrix(runif(250), 50, 5) 14 | target <- rowSums(cos(input) + sin(input)^2) 15 | 16 | mean_v <- mean(target) 17 | target <- as.numeric(target > mean_v * 1.02 ) 18 | 19 | input_test <- matrix(runif(100), 20, 5) 20 | target_test <- rowSums(cos(input_test) + sin(input_test)^2) 21 | mean_v <- mean(target_test) 22 | target_test <- as.numeric(target_test > mean_v * 1.02) 23 | 24 | # Compare with the benchmark - backpropagation 25 | 26 | darch_1 = darch( x = input, 27 | y = target, 28 | layers = c(5, 100, 50, 1), 29 | # darch = darch, 30 | darch.layerFunctionDefault = rectified_linear_unit_function, 31 | darch.layerFunctions = c("3" = sigmoidUnitDerivative), 32 | darch.bootstrap = F, 33 | darch.isBin = F, 34 | darch.isClass = F, 35 | darch.learnRateWeights = 0.01, 36 | darch.learnRateBiases = 0.01, 37 | darch.dropoutInput = 0., 38 | darch.dropoutHidden = 0., 39 | darch.fineTuneFunction = backpropagation, # finetune_SGD, 40 | darch.batchSize = 10, 41 | darch.numEpochs = 50 42 | ) 43 | 44 | 45 | darch_2 = darch( x = input, 46 | y = target, 47 | layers = c(5, 100, 50, 1), 48 | # darch = darch, 49 | darch.layerFunctionDefault = rectified_linear_unit_function, 50 | darch.layerFunctions = c("3" = sigmoidUnitDerivative), 51 | darch.bootstrap = F, 52 | darch.isBin = F, 53 | darch.isClass = F, 54 | darch.learnRateWeights = 0.01, 55 | darch.learnRateBiases = 0.01, 56 | darch.dropoutInput = 0., 57 | darch.dropoutHidden = 0., 58 | # darch.errorFunction = crossEntropyError, 59 | darch.fineTuneFunction = finetune_SGD_bn, 60 | errorFunc = meanSquareErr, 61 | darch.batchSize = 10, 62 | darch.numEpochs = 50 63 | ) 64 | 65 | AR(darch_1) 66 | AR(darch_2) 67 | 68 | AR(darch_1, input_test, target_test) 69 | AR(darch_2, input_test, target_test) 70 | 71 | plot(predict(darch_1), predict(darch_2)) 72 | 73 | # Just use the finetuneDArch method. 74 | # This function should be seperated to a train_dnn function 75 | dataset <- createDataSet(input, target) 76 | 77 | darch3 = fineTuneDArch(darch_1, dataset, 78 | dataSetValid = NULL, 79 | numEpochs = 5, 80 | bootstrap = F, 81 | isBin = T, 82 | isClass = T, 83 | stopErr = -Inf, 84 | stopClassErr = -Inf, 85 | stopValidErr = -Inf, 86 | stopValidClassErr = 101 87 | ) 88 | 89 | # Use the fineTune function directly 90 | 91 | darch2 = darch 92 | 93 | # Backpropagation/ Steepest Descent 94 | darch2 = backpropagation(darch, dataset@data, dataset@targets) 95 | 96 | # Conjugate Gradient Descent - Doesn't seem to work well. A bug in the code? 97 | # darch3 = backpropCGD(darch, dataset@data, dataset@targets, length = 3, switchLayers = 0) 98 | 99 | # Modified Steepest Gradient Descent 100 | darch2 <- backpropSGD(darch, dataset@data, dataset@targets, crossEntropyErr) 101 | 102 | testFunc2(darch2, dataset@data, dataset@targets, "Train set") 103 | 104 | getLayer(darch,1)[[1]][1,] 105 | getLayer(darch2,1)[[1]][1,] 106 | getLayer(darch3,1)[[1]][1,] 107 | 108 | testFunc2(darch3, dataset@data, dataset@targets, "Train Set") 109 | 110 | 111 | gr1 <- calcGradient(par, darch2, dims, data, target, crossEntropyErr, epochSwitch) 112 | gr2 <- fr(par, darch2, dims, data, target, epochSwitch ) 113 | gr1 - gr2 114 | 115 | -------------------------------------------------------------------------------- /inst/test_finetune_SGD_bn.R: -------------------------------------------------------------------------------- 1 | # a toy model to test the finetune_SGD_bn function 2 | 3 | input <- matrix(runif(100), 50, 2) 4 | target <- rowSums(input + input^2) 5 | 6 | 7 | # new a darch instance using new_darch 8 | darch <- new_dnn(c(2, 20, 30, 20, 1)) 9 | 10 | 11 | for (i in 1:100) { 12 | darch <- generateDropoutMasksForDarch(darch, dropout_input = 0.2, dropout_hidden = 0.5) 13 | darch <- finetune_SGD_bn(darch, input, target, 14 | learn_rate_weight = exp(-10), 15 | learn_rate_bias = exp(-10), 16 | learn_rate_gamma = exp(-10), 17 | with_BN = T) 18 | darch <- calcualte_population_mu_sigma(darch, input) 19 | ret <- mseError(target, predict(darch, newdata = input)) 20 | print(paste0(ret[[1]], ", ", ret[[2]])) 21 | } 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | darch = darch( x = input, 41 | y = target, 42 | layers = c(2, 10, 1), 43 | # darch = darch, 44 | darch.layerFunctionDefault = rectified_linear_unit_function, 45 | darch.layerFunctions = c("2" = linearUnitDerivative), 46 | darch.bootstrap = F, 47 | darch.isBin = F, 48 | darch.isClass = F, 49 | darch.learnRateWeights = 0.01, 50 | darch.learnRateBiases = 0.01, 51 | darch.dropoutInput = 0., 52 | darch.dropoutHidden = 0., 53 | darch.fineTuneFunction = backpropagation, # finetune_SGD, 54 | darch.batchSize = 5, 55 | darch.numEpochs = 1 56 | ) 57 | 58 | darch@executeFunction <- runDArch 59 | 60 | plot(target, predict(darch)) 61 | 62 | # run finetune_SGD_bn with batch normalization off 63 | 64 | darch@learnRateBiases <- exp(1) 65 | darch@learnRateWeights <- exp(1) 66 | 67 | for(i in 1:100) { 68 | darch <- finetune_SGD_bn(darch, input, target, learn_rate_gamma = exp(-8), with_BN = F) 69 | ret <- backpropagate_delta_bn(darch, input, target, with_BN = F) 70 | output <- ret[[4]][[2]] 71 | delta_weight <- ret[[1]] 72 | mse_err <- mseError(target, output) 73 | print(paste0(mse_err[[1]], ": ", mse_err[[2]])) 74 | } 75 | 76 | plot(target, output) 77 | 78 | darch = darch( x = input, 79 | y = target, 80 | layers = c(2, 10, 1), 81 | # darch = darch, 82 | darch.layerFunctionDefault = rectified_linear_unit_function, 83 | darch.layerFunctions = c("2" = linearUnitDerivative), 84 | darch.bootstrap = F, 85 | darch.isBin = F, 86 | darch.isClass = F, 87 | darch.learnRateWeights = 0.01, 88 | darch.learnRateBiases = 0.01, 89 | darch.dropoutInput = 0., 90 | darch.dropoutHidden = 0., 91 | darch.fineTuneFunction = finetune_SGD_bn, # , 92 | with_BN = F, 93 | darch.batchSize = 5, 94 | darch.numEpochs = 100 95 | ) 96 | 97 | darch = darch( x = input, 98 | y = target, 99 | layers = c(2, 10, 1), 100 | # darch = darch, 101 | darch.layerFunctionDefault = rectified_linear_unit_function, 102 | darch.layerFunctions = c("2" = linearUnitDerivative), 103 | darch.bootstrap = F, 104 | darch.isBin = F, 105 | darch.isClass = F, 106 | darch.learnRateWeights = 0.01, 107 | darch.learnRateBiases = 0.01, 108 | darch.dropoutInput = 0., 109 | darch.dropoutHidden = 0., 110 | darch.fineTuneFunction = backpropagation, 111 | darch.batchSize = 5, 112 | darch.numEpochs = 100 113 | ) 114 | 115 | 116 | # test run_darch_bn and backpropagate_delta_bn functions 117 | 118 | darch = darch( x = input, 119 | y = target, 120 | layers = c(2, 10, 1), 121 | # darch = darch, 122 | darch.layerFunctionDefault = rectified_linear_unit_function, 123 | darch.layerFunctions = c("2" = linearUnitDerivative), 124 | darch.bootstrap = F, 125 | darch.isBin = F, 126 | darch.isClass = F, 127 | darch.learnRateWeights = 0.01, 128 | darch.learnRateBiases = 0.01, 129 | darch.dropoutInput = 0., 130 | darch.dropoutHidden = 0., 131 | darch.fineTuneFunction = finetune_SGD_bn, 132 | with_BN = T, 133 | darch.batchSize = 5, 134 | darch.numEpochs = 1 135 | ) 136 | 137 | output1 <- predict(darch, newdata = input) 138 | ret <- backpropagate_delta_bn(darch, input, target, with_BN = T) 139 | output2 <- ret[[4]][[2]] 140 | -------------------------------------------------------------------------------- /inst/test_new_dnn.R: -------------------------------------------------------------------------------- 1 | input <- matrix(runif(6), 3, 2) 2 | target <- rowSums(input) 3 | 4 | darch <- new_dnn(c(2, 1)) 5 | predict(darch, newdata = input) 6 | 7 | x <- cbind(input, 1) 8 | weight <- getLayer(darch, 1)[[1]] 9 | 10 | y <- x %*% weight 11 | 12 | y 13 | -------------------------------------------------------------------------------- /inst/test_run_dnn.R: -------------------------------------------------------------------------------- 1 | input <- matrix(runif(6), 3, 2) 2 | target <- rowSums(input) 3 | 4 | darch <- new_dnn(c(2, 2, 1)) 5 | 6 | # use the runDArch as execution function 7 | darch@executeFunction <- runDArch 8 | predict(darch, newdata = input) 9 | 10 | # now change the execution function to run_dnn 11 | darch@executeFunction <- run_dnn 12 | predict(darch, newdata = input) 13 | 14 | 15 | # now change the sigma of the hidden layer 16 | # should expect different output 17 | darch@layers[[1]][[6]] <- rep(0.01, 2) 18 | predict(darch, newdata = input) 19 | 20 | 21 | # compare the results with backpropagate_delta_bn function 22 | 23 | # set up the dropout mask 24 | dropoutMasks <- list() 25 | numLayers <- length(getLayers(darch)) 26 | 27 | # generate dropout masks 28 | generateDropoutMask <- function(length, dropoutRate) 29 | { 30 | if (dropoutRate == 0) 31 | { 32 | ret <- rep(1, length) 33 | } 34 | else 35 | { 36 | ret <- sample(0:1, length, replace = T, 37 | prob = c(dropoutRate, 1 - dropoutRate)) 38 | } 39 | 40 | return (ret) 41 | } 42 | 43 | setDropoutMask(darch, 0) <- 44 | generateDropoutMask(nrow(getLayerWeights(darch, 1)[]) - 1, 45 | darch@dropoutInput) 46 | 47 | for (i in 1:(numLayers - 2)) 48 | { 49 | setDropoutMask(darch, i) <- 50 | generateDropoutMask(nrow(getLayerWeights(darch, i+1)[])-1, 51 | darch@dropoutHidden) 52 | } 53 | 54 | 55 | output <- backpropagate_delta_bn(darch, input, target, with_BN = F)[[4]] 56 | 57 | y <- predict(darch, newdata = input) 58 | 59 | -------------------------------------------------------------------------------- /inst/test_train_dnn.R: -------------------------------------------------------------------------------- 1 | ########################################################################################################################### 2 | # Test case 1: test the basic functionality of run_dnn 3 | 4 | input <- matrix(runif(100), 50, 2) 5 | input_valid <- matrix(runif(10), 5, 2) 6 | target <- rowSums(input + input^2) 7 | target_valid <- rowSums(input_valid + input_valid^2) 8 | 9 | darch <- new_dnn(c(2,5,1)) 10 | darch <- train_dnn(darch, 11 | input, 12 | target, 13 | # input_valid, 14 | # target_valid, 15 | 16 | learn_rate_weight = exp(-5), 17 | learn_rate_bias = exp(-5), 18 | learn_rate_gamma = exp(-5), 19 | batch_size = 10, 20 | batch_normalization = F, 21 | dropout_input = 0, 22 | dropout_hidden = 0, 23 | momentunm_initial = 0.6, 24 | momentum_final = 0.9, 25 | momentum_switch = 100, 26 | num_epochs = 100, 27 | 28 | # target types 29 | error_function = meanSquareErr, 30 | report_classification_error = F 31 | ) 32 | 33 | # test the dropout 34 | 35 | 36 | ########################################################################################################################### 37 | # Test case 2: Test mixed training of BN and No BN 38 | # 2.1 39 | # first train with BN on 40 | # then train with BN off 41 | rm(darch) 42 | darch <- new_dnn(c(2, 5, 10, 1)) 43 | darch <- train_dnn(darch, 44 | input, 45 | target, 46 | input_valid, 47 | target_valid, 48 | # training parameters 49 | learn_rate_weight = exp(-5), 50 | learn_rate_bias = exp(-5), 51 | learn_rate_gamma = exp(-5), 52 | batch_size = 10, 53 | batch_normalization = T, 54 | dropout_input = 0.4, 55 | dropout_hidden = 0.8, 56 | momentunm_initial = 0.6, 57 | momentum_final = 0.9, 58 | momentum_switch = 100, 59 | num_epochs = 50, 60 | # target types 61 | error_function = meanSquareErr, 62 | report_classification_error = F 63 | ) 64 | 65 | darch <- train_dnn(darch, 66 | input, 67 | target, 68 | input_valid, 69 | target_valid, 70 | # training parameters 71 | learn_rate_weight = exp(-5), 72 | learn_rate_bias = exp(-5), 73 | learn_rate_gamma = exp(-5), 74 | batch_size = 10, 75 | batch_normalization = F, 76 | dropout_input = 0.4, 77 | dropout_hidden = 0.8, 78 | momentunm_initial = 0.6, 79 | momentum_final = 0.9, 80 | momentum_switch = 100, 81 | num_epochs = 50, 82 | # target types 83 | error_function = meanSquareErr, 84 | report_classification_error = F 85 | ) 86 | 87 | plot(darch@stats$dataErrors$raw) 88 | plot(darch@stats$validErrors$raw) 89 | 90 | # 2.2 91 | # firt train with BN off 92 | # then trian with BN on 93 | 94 | rm(darch) 95 | darch <- new_dnn(c(2, 5, 10, 1)) 96 | darch <- train_dnn(darch, 97 | input, 98 | target, 99 | input_valid, 100 | target_valid, 101 | # training parameters 102 | learn_rate_weight = exp(-5), 103 | learn_rate_bias = exp(-5), 104 | learn_rate_gamma = exp(-5), 105 | batch_size = 10, 106 | batch_normalization = F, 107 | dropout_input = 0.4, 108 | dropout_hidden = 0.8, 109 | momentunm_initial = 0.6, 110 | momentum_final = 0.9, 111 | momentum_switch = 100, 112 | num_epochs = 50, 113 | # target types 114 | error_function = meanSquareErr, 115 | report_classification_error = F 116 | ) 117 | 118 | 119 | darch <- train_dnn(darch, 120 | input, 121 | target, 122 | input_valid, 123 | target_valid, 124 | # training parameters 125 | learn_rate_weight = exp(-5), 126 | learn_rate_bias = exp(-5), 127 | learn_rate_gamma = exp(-5), 128 | batch_size = 10, 129 | batch_normalization = T, 130 | dropout_input = 0.4, 131 | dropout_hidden = 0.8, 132 | momentunm_initial = 0.6, 133 | momentum_final = 0.9, 134 | momentum_switch = 100, 135 | num_epochs = 50, 136 | # target types 137 | error_function = meanSquareErr, 138 | report_classification_error = F 139 | ) 140 | 141 | plot(darch@stats$dataErrors$raw) 142 | plot(darch@stats$validErrors$raw) 143 | 144 | 145 | ########################################################################################################################### 146 | # Test 3: Evaluate the Batch Normalization 147 | # Compare BN training with no BN training 148 | 149 | input <- matrix(runif(500), 250, 2) 150 | input_valid <- matrix(runif(50), 25, 2) 151 | target <- rowSums(cos(input) + sin(input)^2 + tan(input)^3) 152 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2 + tan(input_valid)^3)) 153 | 154 | 155 | rm(darch) 156 | darch <- new_dnn(c(2, 10, 10, 1)) 157 | darch <- train_dnn(darch, 158 | input, 159 | target, 160 | input_valid, 161 | target_valid, 162 | # training parameters 163 | learn_rate_weight = exp(-8), 164 | learn_rate_bias = exp(-8), 165 | learn_rate_gamma = exp(-8), 166 | batch_size = 10, 167 | batch_normalization = T, 168 | dropout_input = 0., 169 | dropout_hidden = 0., 170 | momentunm_initial = 0.6, 171 | momentum_final = 0.9, 172 | momentum_switch = 100, 173 | num_epochs = 250, 174 | # target types 175 | error_function = meanSquareErr, 176 | report_classification_error = F 177 | ) 178 | 179 | rsq(darch, input = input, target = target) 180 | # 50 Iter: .959 181 | # 250 Iter: .961 182 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 183 | 184 | rsq(darch, input = input_valid, target = target_valid) 185 | # 50 iterations: .965 186 | # 250 iterations: .964 187 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 188 | 189 | plot(darch@stats$dataErrors$raw) 190 | 191 | plot(darch@stats$validErrors$raw) 192 | 193 | 194 | rm(darch_2) 195 | darch_2 <- new_dnn(c(2, 10, 10, 1)) 196 | darch_2 <- train_dnn(darch_2, 197 | input, 198 | target, 199 | input_valid, 200 | target_valid, 201 | # training parameters 202 | learn_rate_weight = exp(-8), 203 | learn_rate_bias = exp(-8), 204 | learn_rate_gamma = exp(-8), 205 | batch_size = 10, 206 | batch_normalization = F, 207 | dropout_input = 0., 208 | dropout_hidden = 0., 209 | momentunm_initial = 0.6, 210 | momentum_final = 0.9, 211 | momentum_switch = 100, 212 | num_epochs = 250, 213 | # target types 214 | error_function = meanSquareErr, 215 | report_classification_error = F 216 | ) 217 | 218 | rsq(darch_2, input = input, target = target) 219 | # 50 iterations: .687 220 | # 250 iterations: .780 221 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 222 | 223 | rsq(darch_2, input = input_valid, target = target_valid) 224 | # 50 iterations: .728 225 | # 250 iterations: .881 226 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 227 | 228 | plot(darch_2@stats$dataErrors$raw) 229 | 230 | plot(darch_2@stats$validErrors$raw) 231 | 232 | 233 | 234 | ########################################################################################################################### 235 | # Test case 4: 236 | # Cross comparison between train_dnn and darch 237 | # differences: 238 | # 1) batch generation 239 | # 2) batch normalization 240 | # 3) Cross Entropy Error 241 | # 4) Bug in runDArch with dropout 242 | 243 | # 4.1 benchmark - 1 batch. use no batch normalization 244 | 245 | input <- matrix(runif(500), 250, 2) 246 | input_valid <- matrix(runif(50), 25, 2) 247 | target <- rowSums(cos(input) + sin(input)^2) 248 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2)) 249 | 250 | # use train_dnn function from deeplearning library 251 | 252 | rm(darch) 253 | darch <- new_dnn(c(2, 20, 20, 1)) 254 | darch <- train_dnn(darch, 255 | input, 256 | target, 257 | input_valid, 258 | target_valid, 259 | # training parameters 260 | learn_rate_weight = exp(-8), 261 | learn_rate_bias = exp(-8), 262 | learn_rate_gamma = exp(-8), 263 | batch_size = 250, 264 | batch_normalization = F, 265 | dropout_input = 0., 266 | dropout_hidden = 0., 267 | momentunm_initial = 0.6, 268 | momentum_final = 0.9, 269 | momentum_switch = 100, 270 | num_epochs = 500, 271 | # target types 272 | error_function = meanSquareErr, 273 | report_classification_error = F 274 | ) 275 | 276 | 277 | rsq(darch, input = input_valid, target = target_valid) # .983 278 | lines(x = c(2,3), y = c(2, 3), col = "red") 279 | 280 | plot(darch@stats$dataErrors$raw) 281 | 282 | plot(darch@stats$validErrors$raw) 283 | 284 | # use darch function from the darch library 285 | 286 | rm(darch) 287 | 288 | darch <- darch( input, 289 | target, 290 | layers = c(2, 20, 20, 1), 291 | xvalid = input_valid, 292 | yValid = target_valid, 293 | # training parameters 294 | darch.learnRateBiases = exp(-8), 295 | darch.learnRateWeights = exp(-8), 296 | darch.layerFunctionDefault = rectified_linear_unit_function, 297 | darch.layerFunctions = list("3" = linearUnitDerivative), 298 | darch.batchSize = 250, 299 | darch.dropoutInput = 0., 300 | darch.dropoutHidden = 0., 301 | darch.momentumSwitch = 100, 302 | darch.initialMomentum = 0.6, 303 | darch.finalMomentum = 0.9, 304 | darch.numEpochs = 500, 305 | darch.isBin = F, 306 | darch.isClass = F 307 | ) 308 | 309 | rsq(darch, input = input_valid, target = target_valid) # .986 310 | lines(x = c(2,3), y = c(2, 3), col = "red") 311 | 312 | plot(darch@stats$dataErrors$raw) 313 | 314 | plot(darch@stats$validErrors$raw) 315 | 316 | 317 | ######################################################################## 318 | # 4.2 test batch initialization - 50 batches. use no batch normalization 319 | 320 | 321 | input <- matrix(runif(500), 250, 2) 322 | input_valid <- matrix(runif(50), 25, 2) 323 | target <- rowSums(cos(input) + sin(input)^2 + tan(input)^3) 324 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2 + tan(input_valid)^3)) 325 | 326 | # use train_dnn function from deeplearning library 327 | 328 | rm(darch) 329 | darch <- new_dnn(c(2, 20, 20, 1)) 330 | darch <- train_dnn(darch, 331 | input, 332 | target, 333 | input_valid, 334 | target_valid, 335 | # training parameters 336 | learn_rate_weight = exp(-8), 337 | learn_rate_bias = exp(-8), 338 | learn_rate_gamma = exp(-8), 339 | batch_size = 10, 340 | batch_normalization = F, 341 | dropout_input = 0., 342 | dropout_hidden = 0., 343 | momentunm_initial = 0.6, 344 | momentum_final = 0.9, 345 | momentum_switch = 100, 346 | num_epochs = 500, 347 | # target types 348 | error_function = meanSquareErr, 349 | report_classification_error = F 350 | ) 351 | 352 | rsq(darch, input = input, target = target) 353 | # 100 iterations: .760 354 | # 500 iterations: .987 355 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 356 | 357 | rsq(darch, input = input_valid, target = target_valid) 358 | # 100 iterations: .770 359 | # 500 iterations; .979 360 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 361 | 362 | plot(darch@stats$dataErrors$raw) 363 | 364 | plot(darch@stats$validErrors$raw) 365 | 366 | # use darch function from the darch library 367 | 368 | darch_2 <- darch( input, 369 | target, 370 | layers = c(2, 20, 20, 1), 371 | xvalid = input_valid, 372 | yValid = target_valid, 373 | # training parameters 374 | darch.learnRateBiases = exp(-8), 375 | darch.learnRateWeights = exp(-8), 376 | darch.layerFunctionDefault = rectified_linear_unit_function, 377 | darch.layerFunctions = list("3" = linearUnitDerivative), 378 | darch.batchSize = 10, 379 | darch.dropoutInput = 0., 380 | darch.dropoutHidden = 0., 381 | darch.momentumSwitch = 100, 382 | darch.initialMomentum = 0.6, 383 | darch.finalMomentum = 0.9, 384 | darch.numEpochs = 500, 385 | darch.isBin = F, 386 | darch.isClass = F 387 | ) 388 | 389 | rsq(darch_2) 390 | # 100 iterations: .767 391 | # 500 iterations: .980 392 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 393 | 394 | rsq(darch_2, input = input_valid, target = target_valid) 395 | # 100 iterations: .733 396 | # 500 iterations: .974 397 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 398 | 399 | plot(darch_2@stats$dataErrors$raw) 400 | 401 | plot(darch_2@stats$validErrors$raw) 402 | 403 | 404 | 405 | #################################################################### 406 | # 4.3 test batchnormalization - 50 batches. use batch normalization 407 | 408 | input <- matrix(runif(500), 250, 2) 409 | input_valid <- matrix(runif(50), 25, 2) 410 | target <- rowSums(cos(input) + sin(input)^2 + tan(input)^3) 411 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2 + tan(input_valid)^3)) 412 | 413 | # use train_dnn function from deeplearning library 414 | 415 | rm(darch) 416 | darch <- new_dnn(c(2, 20, 20, 1)) 417 | darch <- train_dnn(darch, 418 | input, 419 | target, 420 | input_valid, 421 | target_valid, 422 | # training parameters 423 | learn_rate_weight = exp(-8), 424 | learn_rate_bias = exp(-8), 425 | learn_rate_gamma = exp(-8), 426 | batch_size = 10, 427 | batch_normalization = T, 428 | dropout_input = 0., 429 | dropout_hidden = 0, 430 | momentunm_initial = 0.6, 431 | momentum_final = 0.9, 432 | momentum_switch = 100, 433 | num_epochs = 100, 434 | # target types 435 | error_function = meanSquareErr, 436 | report_classification_error = F 437 | ) 438 | 439 | rsq(darch, input = input, target = target) 440 | # 100 Iterations: .968 441 | # 500 Iterations: .971 442 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 443 | 444 | rsq(darch, input = input_valid, target = target_valid) 445 | # 100 Iterations: .946 446 | # 500 Iterations: .930 447 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 448 | 449 | plot(darch@stats$dataErrors$raw) 450 | 451 | plot(darch@stats$validErrors$raw) 452 | 453 | 454 | 455 | 456 | # use darch function from the darch library 457 | darch_2 <- darch( input, 458 | target, 459 | layers = c(2, 20, 20, 1), 460 | xvalid = input_valid, 461 | yValid = target_valid, 462 | # training parameters 463 | darch.learnRateBiases = exp(-8), 464 | darch.learnRateWeights = exp(-8), 465 | darch.layerFunctionDefault = rectified_linear_unit_function, 466 | darch.layerFunctions = list("3" = linearUnitDerivative), 467 | darch.batchSize = 10, 468 | darch.dropoutInput = 0., 469 | darch.dropoutHidden = 0, 470 | darch.momentumSwitch = 100, 471 | darch.initialMomentum = 0.6, 472 | darch.finalMomentum = 0.9, 473 | darch.numEpochs = 500, 474 | darch.isBin = F, 475 | darch.isClass = F 476 | ) 477 | 478 | rsq(darch_2) 479 | # 100 Iterations: .727 480 | # 500 Iterations: .974 481 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 482 | 483 | rsq(darch_2, input = input_valid, target = target_valid) 484 | # 100 Iterations: .742 485 | # 500 Iterations: .974 486 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 487 | 488 | plot(darch_2@stats$dataErrors$raw) 489 | 490 | plot(darch_2@stats$validErrors$raw) 491 | 492 | 493 | 494 | ################################################################################# 495 | # 4.4 test batchnormalization - 50 batches. use batch normalization. use dropout 496 | 497 | input <- matrix(runif(500), 250, 2) 498 | input_valid <- matrix(runif(50), 25, 2) 499 | target <- rowSums(cos(input) + sin(input)^2 + tan(input)^3) 500 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2 + tan(input_valid)^3)) 501 | 502 | # use train_dnn function from deeplearning library 503 | 504 | rm(darch) 505 | darch <- new_dnn(c(2, 40, 40, 1), hidden_layer_default = sigmoidUnitDerivative) 506 | darch <- train_dnn(darch, 507 | input, 508 | target, 509 | input_valid, 510 | target_valid, 511 | # training parameters 512 | 513 | learn_rate_weight = exp(-8) * 100, 514 | learn_rate_bias = exp(-8) * 100, 515 | learn_rate_gamma = exp(-8) * 100, 516 | batch_size = 10, 517 | batch_normalization = T, 518 | dropout_input = 0.2, 519 | dropout_hidden = 0, 520 | momentunm_initial = 0.6, 521 | momentum_final = 0.9, 522 | momentum_switch = 100, 523 | num_epochs = 500, 524 | # target types 525 | error_function = meanSquareErr, 526 | report_classification_error = F 527 | ) 528 | 529 | rsq(darch, input = input, target = target) 530 | # learn rate: exp(-8) * 100 531 | # dropout input/hidden: .2/.3 532 | # 100 Iterations: .742 533 | # 500 Iterations: .937 534 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 535 | 536 | rsq(darch, input = input_valid, target = target_valid) 537 | # learn rate: exp(-8) * 100 538 | # dropout input/hidden: .2/.3 539 | # 100 Iterations: .782 540 | # 500 Iterations: .914 541 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 542 | 543 | plot(darch@stats$dataErrors$raw) 544 | 545 | plot(darch@stats$validErrors$raw) 546 | 547 | # use darch function from the darch library 548 | 549 | 550 | 551 | darch_2 <- darch( input, 552 | target, 553 | layers = c(2, 40, 40, 1), 554 | xvalid = input_valid, 555 | yValid = target_valid, 556 | # training parameters 557 | darch.learnRateBiases = exp(-8) * 100, 558 | darch.learnRateWeights = exp(-8) * 100, 559 | darch.layerFunctionDefault = sigmoidUnitDerivative, 560 | darch.layerFunctions = list("3" = linearUnitDerivative), 561 | darch.batchSize = 10, 562 | darch.dropoutInput = 0.2, 563 | darch.dropoutHidden = 0.3, 564 | darch.momentumSwitch = 100, 565 | darch.initialMomentum = 0.6, 566 | darch.finalMomentum = 0.9, 567 | darch.numEpochs = 100, 568 | darch.isBin = F, 569 | darch.isClass = F 570 | ) 571 | 572 | # drop out fails with ReLU!!!!!!!!!!!!!!!!!!!!!! 573 | 574 | rsq(darch_2, input = input, target = target) 575 | # learn rate: exp(-8) * 100 576 | # dropout input/hidden: .2/.3 577 | # 100 Iterations: .638 578 | # 500 Iterations: .657 579 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 580 | 581 | rsq(darch_2, input = input_valid, target = target_valid) 582 | # learn rate: exp(-8) * 100 583 | # dropout input/hidden: .2/.3 584 | # 100 Iterations: .600 585 | # 500 Iterations: .347 586 | lines(x = c(min(target), max(target)), y = c(min(target), max(target)), col = "red") 587 | 588 | plot(darch_2@stats$dataErrors$raw) 589 | 590 | plot(darch_2@stats$validErrors$raw) 591 | 592 | 593 | 594 | 595 | ################################################################################# 596 | # 4.5 test error functions 597 | 598 | input <- matrix(runif(500), 250, 2) 599 | input_valid <- matrix(runif(50), 25, 2) 600 | target <- rowSums(cos(input) + sin(input)^2 + tan(input)^3) 601 | target_valid <- as.matrix(rowSums(cos(input_valid) + sin(input_valid)^2 + tan(input_valid)^3)) 602 | med <- median(target) 603 | target <- 1 * (target < (med * runif(1) * 2 )) 604 | target_valid <- 1 * (target_valid < (med * runif(1) * 2)) 605 | 606 | 607 | # use train_dnn function from deeplearning library 608 | rm(darch) 609 | darch <- new_dnn(c(2, 20, 20, 1), output_layer_default = sigmoidUnitDerivative) 610 | darch <- train_dnn(darch, 611 | input, 612 | target, 613 | input_valid, 614 | target_valid, 615 | # training parameters 616 | 617 | learn_rate_weight = exp(-8) , 618 | learn_rate_bias = exp(-8), 619 | learn_rate_gamma = exp(-8), 620 | batch_size = 10, 621 | batch_normalization = T, 622 | dropout_input = 0., 623 | dropout_hidden = 0., 624 | momentunm_initial = 0.6, 625 | momentum_final = 0.9, 626 | momentum_switch = 100, 627 | num_epochs = 50, 628 | # target types 629 | error_function = crossEntropyErr, 630 | report_classification_error = T 631 | ) 632 | 633 | AR(darch, input = input, target = target) 634 | 635 | # 100 Iterations: .916 636 | # 500 Iterations: 637 | 638 | AR(darch, input = input_valid, target = target_valid) 639 | 640 | # 100 Iterations: 1 641 | # 500 Iterations: 642 | 643 | plot(darch@stats$dataErrors$raw) 644 | 645 | plot(darch@stats$validErrors$raw) 646 | 647 | # use darch function from the darch library 648 | 649 | 650 | 651 | darch_2 <- darch( input, 652 | target, 653 | layers = c(2, 20, 20, 1), 654 | xvalid = input_valid, 655 | yValid = target_valid, 656 | # training parameters 657 | darch.learnRateBiases = exp(-8) * 1, 658 | darch.learnRateWeights = exp(-8) * 1, 659 | darch.layerFunctionDefault = rectified_linear_unit_function, 660 | darch.layerFunctions = list("3" = sigmoidUnitDerivative), 661 | darch.batchSize = 10, 662 | darch.dropoutInput = 0., 663 | darch.dropoutHidden = 0., 664 | darch.momentumSwitch = 100, 665 | darch.initialMomentum = 0.6, 666 | darch.finalMomentum = 0.9, 667 | darch.numEpochs = 100, 668 | darch.isBin = T, 669 | darch.isClass = F 670 | ) 671 | 672 | # drop out fails with ReLU!!!!!!!!!!!!!!!!!!!!!! 673 | 674 | AR(darch_2, input = input, target = target) 675 | 676 | # 100 Iterations: .94 677 | # 500 Iterations: 678 | 679 | AR(darch_2, input = input_valid, target = target_valid) 680 | 681 | # 100 Iterations: 1 682 | # 500 Iterations: 683 | 684 | plot(darch_2@stats$dataErrors$raw) 685 | 686 | plot(darch_2@stats$validErrors$raw) 687 | 688 | 689 | 690 | 691 | 692 | -------------------------------------------------------------------------------- /man/AR.DArch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AR.R 3 | \name{AR.DArch} 4 | \alias{AR.DArch} 5 | \title{Calculates the Accruacy Ratio of a given set of probability} 6 | \usage{ 7 | \method{AR}{DArch}(x, input = x@dataSet@data, target = x@dataSet@targets, 8 | ...) 9 | } 10 | \arguments{ 11 | \item{x}{a DArch instance} 12 | 13 | \item{input}{the input matrix} 14 | 15 | \item{target}{binary response} 16 | 17 | \item{...}{additional inputs} 18 | } 19 | \description{ 20 | This function calculates the Accuracy Ratio of a trained darch instance 21 | } 22 | 23 | -------------------------------------------------------------------------------- /man/AR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AR.R 3 | \name{AR} 4 | \alias{AR} 5 | \title{Calculates the Accuracy Ratio of a classifier} 6 | \usage{ 7 | AR(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{model} 11 | 12 | \item{...}{additional inputs} 13 | } 14 | \description{ 15 | This function calculates the Accuracy Ratio of a binary classification 16 | model 17 | } 18 | 19 | -------------------------------------------------------------------------------- /man/AR.default.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AR.R 3 | \name{AR.default} 4 | \alias{AR.default} 5 | \title{Calculates the Accruacy Ratio of a given set of probability} 6 | \usage{ 7 | \method{AR}{default}(x, target, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a list of model output in the form of probabilities} 11 | 12 | \item{target}{binary response} 13 | 14 | \item{...}{additional inputs} 15 | } 16 | \description{ 17 | This function calculates the Accuracy Ratio of a binary classification model 18 | output against its targets 19 | } 20 | 21 | -------------------------------------------------------------------------------- /man/AR.numeric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AR.R 3 | \name{AR.numeric} 4 | \alias{AR.numeric} 5 | \title{Calculates the Accruacy Ratio of a given set of probability} 6 | \usage{ 7 | \method{AR}{numeric}(x, target, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a list of model output in the form of probabilities} 11 | 12 | \item{target}{binary response} 13 | 14 | \item{...}{additional inputs} 15 | } 16 | \description{ 17 | This function calculates the Accuracy Ratio of a binary classification model 18 | output against its targets 19 | } 20 | 21 | -------------------------------------------------------------------------------- /man/applyDropoutMask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dropout.R 3 | \name{applyDropoutMask} 4 | \alias{applyDropoutMask} 5 | \title{Applies the given dropout mask to the given data row-wise.} 6 | \usage{ 7 | applyDropoutMask(data, mask) 8 | } 9 | \arguments{ 10 | \item{data}{Data to which the dropout mask should be applied} 11 | 12 | \item{mask}{The dropout mask, a vector of 0 and 1.} 13 | } 14 | \value{ 15 | Data with applied dropout mask 16 | } 17 | \description{ 18 | This function multiplies each row with the dropout mask. To apply the dropout 19 | mask by row, it can simply be multiplied with the data matrix. This does not 20 | work of the mask is to be applied row-wise, hence this function. 21 | } 22 | \references{ 23 | Dropout: A Simple Way to Prevent Neural Networks from 24 | Overfitting, Nitish Srivastava 25 | } 26 | \seealso{ 27 | \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf} 28 | } 29 | 30 | -------------------------------------------------------------------------------- /man/backpropagate_delta_bn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/backpropagate_delta.R 3 | \name{backpropagate_delta_bn} 4 | \alias{backpropagate_delta_bn} 5 | \title{Calculates the delta functions using backpropagation} 6 | \usage{ 7 | backpropagate_delta_bn(darch, trainData, targetData, 8 | errorFunc = meanSquareErr, with_BN = TRUE) 9 | } 10 | \arguments{ 11 | \item{darch}{a darch instance} 12 | 13 | \item{trainData}{training input} 14 | 15 | \item{targetData}{training target} 16 | 17 | \item{errorFunc}{error function to minimize during training. Right now mean squared 18 | erros and cross entropy errors are supported.} 19 | 20 | \item{with_BN}{traing with batch normalization on or off} 21 | } 22 | \description{ 23 | function that calculates the delta function of a darch object with batch 24 | normalization 25 | } 26 | \references{ 27 | Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift 28 | Sergey Ioffe, Christian Szegedy 29 | } 30 | \seealso{ 31 | \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4 32 | } 33 | 34 | -------------------------------------------------------------------------------- /man/batch_normalization.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/batch_normalization.R 3 | \name{batch_normalization} 4 | \alias{batch_normalization} 5 | \title{Batch Normalization Function that normalizes the input before applying non-linearity} 6 | \usage{ 7 | batch_normalization(x, gamma, beta, mu = NULL, sigma_2 = NULL, 8 | epsilon = exp(-12)) 9 | } 10 | \arguments{ 11 | \item{x}{weighted sum of outputs from the previous layer} 12 | 13 | \item{gamma}{the gamma coefficient} 14 | 15 | \item{beta}{the beta coefficient} 16 | 17 | \item{mu}{the mean of the input neurons. If NULL, it will be caluclated in the function.} 18 | 19 | \item{sigma_2}{the variance of the input nerurons. If NULL, it will be calcualted in the function.} 20 | 21 | \item{epsilon}{a constant added to the variance for numerical stability} 22 | } 23 | \description{ 24 | This function normalizes the distribution of inputs to hidden layers in 25 | a neural network 26 | } 27 | \references{ 28 | Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift 29 | Sergey Ioffe, Christian Szegedy 30 | } 31 | \seealso{ 32 | \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4 33 | } 34 | 35 | -------------------------------------------------------------------------------- /man/batch_normalization_differential.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/batch_normalization.R 3 | \name{batch_normalization_differential} 4 | \alias{batch_normalization_differential} 5 | \title{Function that calcualtes the differentials in the batch normalization mode} 6 | \usage{ 7 | batch_normalization_differential(delta_y, mu, sigma_2, x, x_hat, y, gamma, beta, 8 | epsilon = exp(-12), with_BN = T) 9 | } 10 | \arguments{ 11 | \item{delta_y}{derivative wrt y} 12 | 13 | \item{mu}{mean of the input} 14 | 15 | \item{sigma_2}{variance of the input} 16 | 17 | \item{x}{input} 18 | 19 | \item{x_hat}{normalized input} 20 | 21 | \item{y}{transformed input after batch normalization} 22 | 23 | \item{gamma}{gamma coefficient} 24 | 25 | \item{beta}{beta coefficient} 26 | 27 | \item{epsilon}{the contant added to the variance for numeric stability} 28 | 29 | \item{with_BN}{logical value, set to TRUE to turn on batch normalization} 30 | } 31 | \description{ 32 | Calculates the differentials in batch normalization 33 | } 34 | \references{ 35 | Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift 36 | Sergey Ioffe, Christian Szegedy 37 | } 38 | \seealso{ 39 | \url{http://jmlr.org/proceedings/papers/v37/ioffe15.pdf} Pg 4 40 | } 41 | 42 | -------------------------------------------------------------------------------- /man/calcualte_population_mu_sigma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calculate_mu_sigma.R 3 | \name{calcualte_population_mu_sigma} 4 | \alias{calcualte_population_mu_sigma} 5 | \title{Calculates the mu and sigmas of a darch instance} 6 | \usage{ 7 | calcualte_population_mu_sigma(darch, input) 8 | } 9 | \arguments{ 10 | \item{darch}{a darch instance} 11 | 12 | \item{input}{input data} 13 | } 14 | \description{ 15 | This function calculates the mu and sigmas of hidden layers in a darch instance 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/classification_error.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/error_functions.R 3 | \name{classification_error} 4 | \alias{classification_error} 5 | \title{Calculates the classification error} 6 | \usage{ 7 | classification_error(output, target) 8 | } 9 | \arguments{ 10 | \item{output}{the output of a classifier in the form of probability. Probability > 1 11 | will be treated as positive (target = 1).} 12 | 13 | \item{target}{the target variable} 14 | } 15 | \description{ 16 | This function calculates the classification error 17 | } 18 | 19 | -------------------------------------------------------------------------------- /man/convert_categorical.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util.R 3 | \name{convert_categorical} 4 | \alias{convert_categorical} 5 | \title{Data proprosess function that covnerts a categorical input to continuous input or 6 | vectorize it} 7 | \usage{ 8 | convert_categorical(x, type = "ordinal", ordered_list = list(), 9 | var_name = "var", ...) 10 | } 11 | \arguments{ 12 | \item{x}{input variable} 13 | 14 | \item{type}{ordinal or other} 15 | 16 | \item{ordered_list}{the rank ordering of an ordinal variable. Users are expected to 17 | provide a complete list of the rank ordering. Otherwise, a default rank ordering 18 | will be used.} 19 | 20 | \item{var_name}{the name of the input variable. This is used to to create vectorized 21 | input variables} 22 | 23 | \item{...}{other inputs} 24 | } 25 | \description{ 26 | Proprosess a data set. It converts categorical data into binary variables 27 | if it is unordered or continuous variable from 0 to 1 if it is ordinal 28 | } 29 | 30 | -------------------------------------------------------------------------------- /man/crossEntropyErr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/error_functions.R 3 | \name{crossEntropyErr} 4 | \alias{crossEntropyErr} 5 | \title{Calculates the cross entropy error} 6 | \usage{ 7 | crossEntropyErr(output, target) 8 | } 9 | \arguments{ 10 | \item{output}{the output value} 11 | 12 | \item{target}{the target value} 13 | } 14 | \description{ 15 | This function calculates the cross entropy error and its first order derivatives 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/finetune_SGD_bn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/finetune_SGD.R 3 | \name{finetune_SGD_bn} 4 | \alias{finetune_SGD_bn} 5 | \title{Updates a deep neural network's parameters using stochastic gradient descent 6 | method and batch normalization} 7 | \usage{ 8 | finetune_SGD_bn(darch, trainData, targetData, learn_rate_weight = exp(-10), 9 | learn_rate_bias = exp(-10), learn_rate_gamma = exp(-10), 10 | errorFunc = meanSquareErr, with_BN = T) 11 | } 12 | \arguments{ 13 | \item{darch}{a darch instance} 14 | 15 | \item{trainData}{training input} 16 | 17 | \item{targetData}{training target} 18 | 19 | \item{learn_rate_weight}{leanring rate for the weight matrices} 20 | 21 | \item{learn_rate_bias}{learning rate for the biases} 22 | 23 | \item{learn_rate_gamma}{learning rate for the gammas} 24 | 25 | \item{errorFunc}{the error function to minimize during training} 26 | 27 | \item{with_BN}{logical value, T to train the neural net with batch normalization} 28 | } 29 | \value{ 30 | a darch instance with parameters updated with stochastic gradient descent 31 | } 32 | \description{ 33 | This function finetunes a DArch network using SGD approach 34 | } 35 | 36 | -------------------------------------------------------------------------------- /man/generateDropoutMask.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dropout.R 3 | \name{generateDropoutMask} 4 | \alias{generateDropoutMask} 5 | \title{Generates the dropout mask for the deep neural network} 6 | \usage{ 7 | generateDropoutMask(length, dropoutRate) 8 | } 9 | \arguments{ 10 | \item{length, }{the dimension of the layer} 11 | 12 | \item{dropoutRate, }{the dropout rate} 13 | } 14 | \description{ 15 | This function generates the dropout mask for the deep neural network 16 | } 17 | \references{ 18 | Dropout: A Simple Way to Prevent Neural Networks from 19 | Overfitting, Nitish Srivastava 20 | } 21 | \seealso{ 22 | \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf} 23 | } 24 | 25 | -------------------------------------------------------------------------------- /man/generateDropoutMasksForDarch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dropout.R 3 | \name{generateDropoutMasksForDarch} 4 | \alias{generateDropoutMasksForDarch} 5 | \title{Generates dropout masks for dnn} 6 | \usage{ 7 | generateDropoutMasksForDarch(darch, dropout_input, dropout_hidden) 8 | } 9 | \arguments{ 10 | \item{darch, }{a DArch instance} 11 | 12 | \item{dropout_input, }{the dropout rate for the input layer} 13 | 14 | \item{dropout_hidden, }{the dropout rate for the hidden layer} 15 | } 16 | \description{ 17 | This function generates dropout maks for dnn 18 | } 19 | \references{ 20 | Dropout: A Simple Way to Prevent Neural Networks from 21 | Overfitting, Nitish Srivastava 22 | } 23 | \seealso{ 24 | \url{https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf} 25 | } 26 | 27 | -------------------------------------------------------------------------------- /man/matMult.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util.R 3 | \name{matMult} 4 | \alias{matMult} 5 | \title{Calculates the outer product of two matricies} 6 | \usage{ 7 | matMult(data, weight) 8 | } 9 | \arguments{ 10 | \item{data}{the date matrix} 11 | 12 | \item{weight}{the weight matrix} 13 | } 14 | \description{ 15 | Calcualtes the outer product of two matrices 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/meanSquareErr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/error_functions.R 3 | \name{meanSquareErr} 4 | \alias{meanSquareErr} 5 | \title{Calculates the mean squared error} 6 | \usage{ 7 | meanSquareErr(output, target) 8 | } 9 | \arguments{ 10 | \item{output}{the output value} 11 | 12 | \item{target}{the target value} 13 | } 14 | \description{ 15 | This function calculates the mean squared error and its first order derivatives 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/new_dnn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/new_dnn.R 3 | \name{new_dnn} 4 | \alias{new_dnn} 5 | \title{Creats a new instance of darch class} 6 | \usage{ 7 | new_dnn(layer_structure, layer_functions = NULL, 8 | output_layer_default = linearUnitDerivative, 9 | hidden_layer_default = rectified_linear_unit_function, 10 | weight_initiliazaiton = generateWeights) 11 | } 12 | \arguments{ 13 | \item{layer_structure}{a int vector that specifies the number and width of layers} 14 | 15 | \item{layer_functions}{a list of activation functions used by each layer} 16 | 17 | \item{output_layer_default}{the activation function for the output layer} 18 | 19 | \item{hidden_layer_default}{the activation function for the hidden layers} 20 | 21 | \item{weight_initiliazaiton}{function that initialize a layer's weight matrix} 22 | } 23 | \description{ 24 | This function creates a new instance of darch class 25 | } 26 | \examples{ 27 | # create a new deep neural network for classificaiton 28 | dnn_regression <- new_dnn( 29 | c(2, 50, 50, 20, 1), 30 | # The layer structure of the deep neural network. 31 | # The first element is the number of input variables. 32 | # The last element is the number of output variables. 33 | hidden_layer_default = rectified_linear_unit_function, 34 | # for hidden layers, use rectified_linear_unit_function 35 | output_layer_default = sigmoidUnitDerivative 36 | # for classification, use sigmoidUnitDerivative function 37 | ) 38 | 39 | # create a new deep neural network for classificaiton 40 | dnn_regression <- new_dnn( 41 | c(2, 50, 50, 20, 1), 42 | # The layer structure of the deep neural network. 43 | # The first element is the number of input variables. 44 | # The last element is the number of output variables. 45 | hidden_layer_default = rectified_linear_unit_function, 46 | # for hidden layers, use rectified_linear_unit_function 47 | output_layer_default = linearUnitDerivative 48 | # for regression, use linearUnitDerivative function 49 | ) 50 | } 51 | 52 | -------------------------------------------------------------------------------- /man/print_weight.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util.R 3 | \name{print_weight} 4 | \alias{print_weight} 5 | \title{Prints out the weight of a deep neural network} 6 | \usage{ 7 | print_weight(darch, num_of_layer, show_derivative = F, type = "heatmap") 8 | } 9 | \arguments{ 10 | \item{darch}{DArch instance} 11 | 12 | \item{num_of_layer}{the number of the layer to print} 13 | 14 | \item{show_derivative}{T to show the weight value. F to show the percentage 15 | weight change in the finetuning stage. This helps spot the network saturation problem.} 16 | 17 | \item{type}{type of the graph. It supports "heatmap", "surface", and "histogram"} 18 | } 19 | \description{ 20 | This function prints out the weight in a heat map, 3D surface, or histogram 21 | } 22 | \examples{ 23 | # Example of Regression 24 | 25 | input <- matrix(runif(1000), 500, 2) 26 | input_valid <- matrix(runif(100), 50, 2) 27 | target <- rowSums(input + input^2) 28 | target_valid <- rowSums(input_valid + input_valid^2) 29 | # create a new deep neural network for classificaiton 30 | dnn_regression <- new_dnn( 31 | c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 32 | # The first element is the number of input variables. 33 | # The last element is the number of output variables. 34 | hidden_layer_default = rectified_linear_unit_function, 35 | # for hidden layers, use rectified_linear_unit_function 36 | output_layer_default = linearUnitDerivative 37 | # for regression, use linearUnitDerivative function 38 | ) 39 | 40 | # print the layer weights 41 | # this function can print heatmap, histogram, or a surface 42 | print_weight(dnn_regression, 1, type = "heatmap") 43 | 44 | print_weight(dnn_regression, 2, type = "surface") 45 | 46 | print_weight(dnn_regression, 3, type = "histogram") 47 | 48 | 49 | } 50 | 51 | -------------------------------------------------------------------------------- /man/rectified_linear_unit_function.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rectified_linear_unit_function.R 3 | \name{rectified_linear_unit_function} 4 | \alias{rectified_linear_unit_function} 5 | \title{Rectified Linear Unit Function} 6 | \usage{ 7 | rectified_linear_unit_function(data, weights) 8 | } 9 | \arguments{ 10 | \item{data}{the data matrix for calculation} 11 | 12 | \item{weights}{the connection (weight matrix/filter) and the bias} 13 | } 14 | \value{ 15 | A list of function values and derivatives 16 | } 17 | \description{ 18 | This functions calculates the value and the derivative of a rectified linear 19 | function. Reference Vinod Nair, Geoffrey Hinton, Rectified Linear Units 20 | Improve Restricted Boltzmann Machines 21 | } 22 | 23 | -------------------------------------------------------------------------------- /man/reset_population_mu_sigma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calculate_mu_sigma.R 3 | \name{reset_population_mu_sigma} 4 | \alias{reset_population_mu_sigma} 5 | \title{Resets the mu and sigmas of a darch instance to 0 and 1} 6 | \usage{ 7 | reset_population_mu_sigma(darch) 8 | } 9 | \arguments{ 10 | \item{darch}{a darch instance} 11 | } 12 | \description{ 13 | This function resets the mu and sigmas of hidden layers in a darch instance 14 | to 0 and 1 15 | } 16 | 17 | -------------------------------------------------------------------------------- /man/rsq.DArch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rsq.R 3 | \name{rsq.DArch} 4 | \alias{rsq.DArch} 5 | \title{Utilitiy function that calcualtes RSQ of a DArch instance} 6 | \usage{ 7 | \method{rsq}{DArch}(x, input = x@dataSet@data, target = x@dataSet@targets, 8 | ...) 9 | } 10 | \arguments{ 11 | \item{x}{DArch Model} 12 | 13 | \item{input}{Input data} 14 | 15 | \item{target}{Target data} 16 | 17 | \item{...}{addtional inputs} 18 | } 19 | \description{ 20 | Calcualte a regression model's RSQ of a deep neural network 21 | } 22 | 23 | -------------------------------------------------------------------------------- /man/rsq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rsq.R 3 | \name{rsq} 4 | \alias{rsq} 5 | \title{Calculate the RSQ of a regression model 6 | Utilitiy function that calcualtes RSQ of a model. It measures the goodness-of- 7 | fit of a regression model.} 8 | \usage{ 9 | rsq(x, ...) 10 | } 11 | \arguments{ 12 | \item{x}{Regression Model} 13 | 14 | \item{...}{Additional Input} 15 | } 16 | \description{ 17 | Calculate the RSQ of a regression model 18 | Utilitiy function that calcualtes RSQ of a model. It measures the goodness-of- 19 | fit of a regression model. 20 | } 21 | 22 | -------------------------------------------------------------------------------- /man/rsq.lm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rsq.R 3 | \name{rsq.lm} 4 | \alias{rsq.lm} 5 | \title{Utilitiy function that calcualtes RSQ of a linear model} 6 | \usage{ 7 | \method{rsq}{lm}(x, input, target, ...) 8 | } 9 | \arguments{ 10 | \item{x}{linear Model} 11 | 12 | \item{input}{Input data} 13 | 14 | \item{target}{Target data} 15 | 16 | \item{...}{additional inputs} 17 | } 18 | \description{ 19 | Calcualte a regression model's RSQ 20 | } 21 | 22 | -------------------------------------------------------------------------------- /man/run_dnn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run_dnn.R 3 | \name{run_dnn} 4 | \alias{run_dnn} 5 | \title{Execution function that runs in the batch normalization mode} 6 | \usage{ 7 | run_dnn(darch, data) 8 | } 9 | \arguments{ 10 | \item{darch}{a darch instance} 11 | 12 | \item{data}{input data} 13 | } 14 | \description{ 15 | This function calcualtes the output of a deep neural network with input data 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/train_dnn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/train_dnn.R 3 | \name{train_dnn} 4 | \alias{train_dnn} 5 | \title{Train a deep neural network} 6 | \usage{ 7 | train_dnn(darch, input, target, input_valid = NULL, target_valid = NULL, 8 | ..., learn_rate_weight = exp(-10), learn_rate_bias = exp(-10), 9 | learn_rate_gamma = 1, batch_size = 10, batch_normalization = TRUE, 10 | dropout_input = 0, dropout_hidden = 0, momentum_initial = 0.6, 11 | momentum_final = 0.9, momentum_switch = 100, num_epochs = 0, 12 | error_function = meanSquareErr, report_classification_error = FALSE) 13 | } 14 | \arguments{ 15 | \item{darch}{a darch instance} 16 | 17 | \item{input}{input data for training} 18 | 19 | \item{target}{target data for training} 20 | 21 | \item{input_valid}{input data for validation} 22 | 23 | \item{target_valid}{target data for validation} 24 | 25 | \item{...}{additional input} 26 | 27 | \item{learn_rate_weight}{learning rate for the weight matrices} 28 | 29 | \item{learn_rate_bias}{learning rate for the biases} 30 | 31 | \item{learn_rate_gamma}{learning rate for the gamma} 32 | 33 | \item{batch_size}{batch size during training} 34 | 35 | \item{batch_normalization}{logical value that determines whether to turn on 36 | batch normalization during training. Recommneded value: T} 37 | 38 | \item{dropout_input}{dropout ratio at input layer. Recommneded value: 0.2} 39 | 40 | \item{dropout_hidden}{dropout ratio at hidden layers. Recommended value: 0.5} 41 | 42 | \item{momentum_initial}{momentum ratio during training. Recommended value: 0.6} 43 | 44 | \item{momentum_final}{final momentum during training. Recommended value: 0.9} 45 | 46 | \item{momentum_switch}{afther which epoch the final momentum ratio is used during training} 47 | 48 | \item{num_epochs}{number of iterations of the training} 49 | 50 | \item{error_function}{error function to minimize during training} 51 | 52 | \item{report_classification_error}{logical value. T to report the classification error 53 | during training} 54 | } 55 | \value{ 56 | a trained deep neural network (darch instance) 57 | } 58 | \description{ 59 | This function trains a deep neural network 60 | } 61 | \examples{ 62 | # Example of Regression 63 | 64 | input <- matrix(runif(1000), 500, 2) 65 | input_valid <- matrix(runif(100), 50, 2) 66 | target <- rowSums(input + input^2) 67 | target_valid <- rowSums(input_valid + input_valid^2) 68 | # create a new deep neural network for classificaiton 69 | dnn_regression <- new_dnn( 70 | c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 71 | # The first element is the number of input variables. 72 | # The last element is the number of output variables. 73 | hidden_layer_default = rectified_linear_unit_function, 74 | # for hidden layers, use rectified_linear_unit_function 75 | output_layer_default = linearUnitDerivative 76 | # for regression, use linearUnitDerivative function 77 | ) 78 | 79 | dnn_regression <- train_dnn( 80 | dnn_regression, 81 | 82 | # training data 83 | input, # input variable for training 84 | target, # target variable for training 85 | input_valid, # input variable for validation 86 | target_valid, # target variable for validation 87 | 88 | # training parameters 89 | learn_rate_weight = exp(-8) * 10, 90 | # learning rate for weights, higher if use dropout 91 | learn_rate_bias = exp(-8) * 10, 92 | # learning rate for biases, hihger if use dropout 93 | learn_rate_gamma = exp(-8) * 10, 94 | # learning rate for the gamma factor used 95 | batch_size = 10, 96 | # number of observations in a batch during training. 97 | # Higher for faster training. Lower for faster convergence 98 | batch_normalization = TRUE, 99 | # logical value, T to use batch normalization 100 | dropout_input = 0.2, 101 | # dropout ratio in input. 102 | dropout_hidden = 0.5, 103 | # dropout ratio in hidden layers 104 | momentum_initial = 0.6, 105 | # initial momentum in Stochastic Gradient Descent training 106 | momentum_final = 0.9, 107 | # final momentum in Stochastic Gradient Descent training 108 | momentum_switch = 100, 109 | # after which the momentum is switched from initial to final momentum 110 | num_epochs = 5, 111 | # number of iterations in training 112 | # increase numbef of epochs to 100 for better model fit 113 | 114 | 115 | # Error function 116 | error_function = meanSquareErr, 117 | # error function to minimize during training. For regression, use meanSquareErr 118 | report_classification_error = FALSE 119 | # whether to print classification error during training 120 | ) 121 | 122 | 123 | # the prediciton by dnn_regression 124 | pred <- predict(dnn_regression) 125 | 126 | # calculate the r-squared of the prediciton 127 | rsq(dnn_regression) 128 | 129 | 130 | # calcualte the r-squared of the prediciton in validation 131 | rsq(dnn_regression, input = input_valid, target = target_valid) 132 | 133 | # print the layer weights 134 | # this function can print heatmap, histogram, or a surface 135 | print_weight(dnn_regression, 1, type = "heatmap") 136 | 137 | print_weight(dnn_regression, 2, type = "surface") 138 | 139 | print_weight(dnn_regression, 3, type = "histogram") 140 | 141 | 142 | # Examples of classification 143 | 144 | input <- matrix(runif(1000), 500, 2) 145 | input_valid <- matrix(runif(100), 50, 2) 146 | target <- (cos(rowSums(input + input^2)) > 0.5) * 1 147 | target_valid <- (cos(rowSums(input_valid + input_valid^2)) > 0.5) * 1 148 | 149 | # create a new deep neural network for classificaiton 150 | dnn_classification <- new_dnn( 151 | c(2, 50, 50, 20, 1), # The layer structure of the deep neural network. 152 | # The first element is the number of input variables. 153 | # The last element is the number of output variables. 154 | hidden_layer_default = rectified_linear_unit_function, 155 | # for hidden layers, use rectified_linear_unit_function 156 | output_layer_default = sigmoidUnitDerivative 157 | # for classification, use sigmoidUnitDerivative function 158 | ) 159 | 160 | dnn_classification <- train_dnn( 161 | dnn_classification, 162 | 163 | # training data 164 | input, # input variable for training 165 | target, # target variable for training 166 | input_valid, # input variable for validation 167 | target_valid, # target variable for validation 168 | 169 | # training parameters 170 | learn_rate_weight = exp(-8) * 10, 171 | # learning rate for weights, higher if use dropout 172 | learn_rate_bias = exp(-8) * 10, 173 | # learning rate for biases, hihger if use dropout 174 | learn_rate_gamma = exp(-8) * 10, 175 | # learning rate for the gamma factor used 176 | batch_size = 10, 177 | # number of observations in a batch during training. 178 | # Higher for faster training. Lower for faster convergence 179 | batch_normalization = TRUE, 180 | # logical value, T to use batch normalization 181 | dropout_input = 0.2, 182 | # dropout ratio in input. 183 | dropout_hidden = 0.5, 184 | # dropout ratio in hidden layers 185 | momentum_initial = 0.6, 186 | # initial momentum in Stochastic Gradient Descent training 187 | momentum_final = 0.9, 188 | # final momentum in Stochastic Gradient Descent training 189 | momentum_switch = 100, 190 | # after which the momentum is switched from initial to final momentum 191 | num_epochs = 5, 192 | # number of iterations in training 193 | # increase num_epochs to 100 for better model fit 194 | 195 | # Error function 196 | error_function = crossEntropyErr, 197 | # error function to minimize during training. For regression, use crossEntropyErr 198 | report_classification_error = TRUE 199 | # whether to print classification error during training 200 | ) 201 | 202 | # the prediciton by dnn_regression 203 | pred <- predict(dnn_classification) 204 | 205 | hist(pred) 206 | 207 | # calculate the r-squared of the prediciton 208 | AR(dnn_classification) 209 | 210 | # calcualte the r-squared of the prediciton in validation 211 | AR(dnn_classification, input = input_valid, target = target_valid) 212 | 213 | 214 | } 215 | 216 | -------------------------------------------------------------------------------- /man/verticalize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util.R 3 | \name{verticalize} 4 | \alias{verticalize} 5 | \title{Creates a matrix by repeating a row vector N times} 6 | \usage{ 7 | verticalize(vector, N) 8 | } 9 | \arguments{ 10 | \item{vector}{the row vector} 11 | 12 | \item{N}{number of rows in the output matirx} 13 | } 14 | \value{ 15 | a matrix 16 | } 17 | \description{ 18 | helper function that repeat a row vector N times 19 | } 20 | 21 | --------------------------------------------------------------------------------