├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── pkgdown.yaml
    │   ├── pr-commands.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── arcsinh_x.R
    ├── bestLogConstant.R
    ├── bestNormalize-package.R
    ├── bestNormalize.R
    ├── binarize.R
    ├── boxcox.R
    ├── data.R
    ├── double_reverse_log.R
    ├── exp_x.R
    ├── lambert.R
    ├── log_x.R
    ├── no_transform.R
    ├── orderNorm.R
    ├── plots.R
    ├── sqrt_x.R
    ├── step_best_normalize.R
    ├── step_orderNorm.R
    └── yeojohnson.R
├── README.Rmd
├── README.md
├── bestNormalize.Rproj
├── codecov.yml
├── cran-comments.md
├── data
    └── autotrader.RData
├── inst
    └── CITATION
├── man
    ├── arcsinh_x.Rd
    ├── autotrader.Rd
    ├── bestLogConstant.Rd
    ├── bestNormalize-package.Rd
    ├── bestNormalize.Rd
    ├── binarize.Rd
    ├── boxcox.Rd
    ├── double_reverse_log.Rd
    ├── exp_x.Rd
    ├── lambert.Rd
    ├── log_x.Rd
    ├── no_transform.Rd
    ├── orderNorm.Rd
    ├── plot.bestNormalize.Rd
    ├── reexports.Rd
    ├── sqrt_x.Rd
    ├── step_best_normalize.Rd
    ├── step_orderNorm.Rd
    └── yeojohnson.Rd
├── revdep
    ├── .gitignore
    ├── README.md
    ├── cran.md
    ├── failures.md
    └── problems.md
├── tests
    ├── testthat-bestNormalize.R
    ├── testthat-custom-parallel.R
    ├── testthat-main-transforms.R
    ├── testthat-methods-step.R
    ├── testthat-minor-transforms.R
    └── testthat
    │   ├── Rplots.pdf
    │   ├── test_arcsinh_x.R
    │   ├── test_bestNormalize.R
    │   ├── test_binarize.R
    │   ├── test_bn_custom_normstat.R
    │   ├── test_bn_parallel.R
    │   ├── test_boxcox.R
    │   ├── test_custom_fn.R
    │   ├── test_double_reverse_log.R
    │   ├── test_exp_x.R
    │   ├── test_lambert.R
    │   ├── test_log_x.R
    │   ├── test_no_transform.R
    │   ├── test_orderNorm.R
    │   ├── test_plot_methods.R
    │   ├── test_print_methods.R
    │   ├── test_sqrt_x.R
    │   ├── test_step_fns.R
    │   └── test_yeojohnson.R
└── vignettes
    ├── bestNormalize.Rmd
    ├── customization.Rmd
    └── parallel_timings.jpg


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^cran-comments\.md$
 4 | ^README\.Rmd$
 5 | ^README-.*\.png$
 6 | ^\.travis\.yml$
 7 | ^doc$
 8 | ^Meta$
 9 | ^CRAN-RELEASE$
10 | ^\.github$
11 | ^codecov\.yml$
12 | ^revdep$
13 | ^\.git$
14 | ^CRAN-SUBMISSION$
15 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 | 
 7 | name: pkgdown
 8 | 
 9 | jobs:
10 |   pkgdown:
11 |     runs-on: ubuntu-18.04
12 |     env:
13 |       RSPM: https://packagemanager.rstudio.com/cran/__linux__/bionic/latest
14 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
15 | 
16 |     steps:
17 |       - uses: actions/checkout@v2
18 | 
19 |       - uses: r-lib/actions/setup-r@v1
20 |         id: install-r
21 | 
22 |       - uses: r-lib/actions/setup-pandoc@v1
23 | 
24 |       - name: Install pak and query dependencies
25 |         run: |
26 |           install.packages("pak", repos = "https://r-lib.github.io/p/pak/dev/")
27 |           saveRDS(pak::pkg_deps("local::.", dependencies = TRUE), ".github/r-depends.rds")
28 |         shell: Rscript {0}
29 | 
30 |       - name: Restore R package cache
31 |         uses: actions/cache@v2
32 |         with:
33 |           path: |
34 |             ${{ env.R_LIBS_USER }}/*
35 |             !${{ env.R_LIBS_USER }}/pak
36 |           key: ubuntu-18.04-${{ steps.install-r.outputs.installed-r-version }}-1-${{ hashFiles('.github/r-depends.rds') }}
37 |           restore-keys: ubuntu-18.04-${{ steps.install-r.outputs.installed-r-version }}-1-
38 | 
39 |       - name: Install system dependencies
40 |         if: runner.os == 'Linux'
41 |         run: |
42 |           pak::local_system_requirements(execute = TRUE)
43 |           pak::pkg_system_requirements("pkgdown", execute = TRUE)
44 |         shell: Rscript {0}
45 | 
46 |       - name: Install dependencies
47 |         run: |
48 |           pak::local_install_dev_deps(upgrade = TRUE, dependencies = c("all", "Config/Needs/website"))
49 |           pak::pkg_install("pkgdown")
50 |         shell: Rscript {0}
51 | 
52 |       - name: Install package
53 |         run: R CMD INSTALL .
54 | 
55 |       - name: Build and deploy pkgdown site
56 |         run: |
57 |           git config --local user.name "$GITHUB_ACTOR"
58 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
59 |           Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
60 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   issue_comment:
 3 |     types: [created]
 4 | name: Commands
 5 | jobs:
 6 |   document:
 7 |     if: startsWith(github.event.comment.body, '/document')
 8 |     name: document
 9 |     runs-on: macOS-latest
10 |     env:
11 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
12 |     steps:
13 |       - uses: actions/checkout@v2
14 |       - uses: r-lib/actions/pr-fetch@v1
15 |         with:
16 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
17 |       - uses: r-lib/actions/setup-r@v1
18 |       - name: Install dependencies
19 |         run: Rscript -e 'install.packages(c("remotes", "roxygen2"))' -e 'remotes::install_deps(dependencies = TRUE)'
20 |       - name: Document
21 |         run: Rscript -e 'roxygen2::roxygenise()'
22 |       - name: commit
23 |         run: |
24 |           git config --local user.email "actions@github.com"
25 |           git config --local user.name "GitHub Actions"
26 |           git add man/\* NAMESPACE
27 |           git commit -m 'Document'
28 |       - uses: r-lib/actions/pr-push@v1
29 |         with:
30 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
31 |   style:
32 |     if: startsWith(github.event.comment.body, '/style')
33 |     name: style
34 |     runs-on: macOS-latest
35 |     env:
36 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
37 |     steps:
38 |       - uses: actions/checkout@v2
39 |       - uses: r-lib/actions/pr-fetch@v1
40 |         with:
41 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
42 |       - uses: r-lib/actions/setup-r@v1
43 |       - name: Install dependencies
44 |         run: Rscript -e 'install.packages("styler")'
45 |       - name: Style
46 |         run: Rscript -e 'styler::style_pkg()'
47 |       - name: commit
48 |         run: |
49 |           git config --local user.email "actions@github.com"
50 |           git config --local user.name "GitHub Actions"
51 |           git add \*.R
52 |           git commit -m 'Style'
53 |       - uses: r-lib/actions/pr-push@v1
54 |         with:
55 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
56 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - main
 9 |       - master
10 | 
11 | name: test-coverage
12 | 
13 | jobs:
14 |   test-coverage:
15 |     runs-on: ubuntu-18.04
16 |     env:
17 |       RSPM: https://packagemanager.rstudio.com/cran/__linux__/bionic/latest
18 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v2
22 | 
23 |       - uses: r-lib/actions/setup-r@v1
24 |         id: install-r
25 | 
26 |       - name: Install pak and query dependencies
27 |         run: |
28 |           install.packages("pak", repos = "https://r-lib.github.io/p/pak/dev/")
29 |           saveRDS(pak::pkg_deps("local::.", dependencies = TRUE), ".github/r-depends.rds")
30 |         shell: Rscript {0}
31 | 
32 |       - name: Restore R package cache
33 |         uses: actions/cache@v2
34 |         with:
35 |           path: |
36 |             ${{ env.R_LIBS_USER }}/*
37 |             !${{ env.R_LIBS_USER }}/pak
38 |           key: ubuntu-18.04-${{ steps.install-r.outputs.installed-r-version }}-1-${{ hashFiles('.github/r-depends.rds') }}
39 |           restore-keys: ubuntu-18.04-${{ steps.install-r.outputs.installed-r-version }}-1-
40 | 
41 |       - name: Install system dependencies
42 |         if: runner.os == 'Linux'
43 |         run: |
44 |           pak::local_system_requirements(execute = TRUE)
45 |           pak::pkg_system_requirements("covr", execute = TRUE)
46 |         shell: Rscript {0}
47 | 
48 |       - name: Install dependencies
49 |         run: |
50 |           pak::local_install_dev_deps(upgrade = TRUE)
51 |           pak::pkg_install("covr")
52 |         shell: Rscript {0}
53 | 
54 |       - name: Test coverage
55 |         run: covr::codecov()
56 |         shell: Rscript {0}
57 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | doc
4 | Meta
5 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bestNormalize
 2 | Type: Package
 3 | Title: Normalizing Transformation Functions
 4 | Version: 1.9.1.9000
 5 | Date: 2023-09-20
 6 | Authors@R: person("Ryan Andrew", "Peterson", 
 7 |                   email = "ryan.a.peterson@cuanschutz.edu", 
 8 |                   role = c("aut", "cre"), 
 9 |                   comment = c(ORCID = "0000-0002-4650-5798"))
10 | Description: Estimate a suite of normalizing transformations, including 
11 |     a new adaptation of a technique based on ranks which can guarantee 
12 |     normally distributed transformed data if there are no ties: ordered 
13 |     quantile normalization (ORQ). ORQ normalization combines a rank-mapping
14 |     approach with a shifted logit approximation that allows
15 |     the transformation to work on data outside the original domain. It is 
16 |     also able to handle new data within the original domain via linear 
17 |     interpolation. The package is built to estimate the best normalizing 
18 |     transformation for a vector consistently and accurately. It implements 
19 |     the Box-Cox transformation, the Yeo-Johnson transformation, three types 
20 |     of Lambert WxF transformations, and the ordered quantile normalization 
21 |     transformation. It estimates the normalization efficacy of other
22 |     commonly used transformations, and it allows users to specify 
23 |     custom transformations or normalization statistics. Finally, functionality
24 |     can be integrated into a machine learning workflow via recipes. 
25 | URL: 
26 |     https://petersonr.github.io/bestNormalize/, 
27 |     https://github.com/petersonR/bestNormalize
28 | License: GPL-3
29 | Depends: 
30 |     R (>= 3.1.0)
31 | Imports: 
32 |     LambertW (>= 0.6.5),
33 |     nortest,
34 |     dplyr,
35 |     doParallel,
36 |     foreach,
37 |     doRNG,
38 |     recipes,
39 |     tibble,
40 |     methods,
41 |     butcher, 
42 |     purrr,
43 |     generics
44 | Suggests: 
45 |     knitr,
46 |     rmarkdown,
47 |     MASS,
48 |     testthat,
49 |     mgcv,
50 |     parallel,
51 |     ggplot2,
52 |     scales,
53 |     rlang,
54 |     covr
55 | VignetteBuilder: knitr 
56 | LazyData: true
57 | RoxygenNote: 7.3.1
58 | Encoding: UTF-8
59 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | S3method(axe_env,step_best_normalize)
  4 | S3method(axe_env,step_orderNorm)
  5 | S3method(bake,step_best_normalize)
  6 | S3method(bake,step_orderNorm)
  7 | S3method(plot,bestNormalize)
  8 | S3method(plot,boxcox)
  9 | S3method(plot,lambert)
 10 | S3method(plot,orderNorm)
 11 | S3method(plot,yeojohnson)
 12 | S3method(predict,arcsinh_x)
 13 | S3method(predict,bestLogConstant)
 14 | S3method(predict,bestNormalize)
 15 | S3method(predict,binarize)
 16 | S3method(predict,boxcox)
 17 | S3method(predict,center_scale)
 18 | S3method(predict,double_reverse_log)
 19 | S3method(predict,exp_x)
 20 | S3method(predict,lambert)
 21 | S3method(predict,log_x)
 22 | S3method(predict,no_transform)
 23 | S3method(predict,orderNorm)
 24 | S3method(predict,sqrt_x)
 25 | S3method(predict,yeojohnson)
 26 | S3method(prep,step_best_normalize)
 27 | S3method(prep,step_orderNorm)
 28 | S3method(print,arcsinh_x)
 29 | S3method(print,bestLogConstant)
 30 | S3method(print,bestNormalize)
 31 | S3method(print,binarize)
 32 | S3method(print,boxcox)
 33 | S3method(print,center_scale)
 34 | S3method(print,double_reverse_log)
 35 | S3method(print,exp_x)
 36 | S3method(print,lambert)
 37 | S3method(print,log_x)
 38 | S3method(print,no_transform)
 39 | S3method(print,orderNorm)
 40 | S3method(print,sqrt_x)
 41 | S3method(print,step_best_normalize)
 42 | S3method(print,step_orderNorm)
 43 | S3method(print,yeojohnson)
 44 | S3method(required_pkgs,step_best_normalize)
 45 | S3method(required_pkgs,step_orderNorm)
 46 | S3method(tidy,bestNormalize)
 47 | S3method(tidy,no_transform)
 48 | S3method(tidy,step_best_normalize)
 49 | S3method(tidy,step_orderNorm)
 50 | export(arcsinh_x)
 51 | export(bestLogConstant)
 52 | export(bestNormalize)
 53 | export(binarize)
 54 | export(boxcox)
 55 | export(center_scale)
 56 | export(double_reverse_log)
 57 | export(exp_x)
 58 | export(lambert)
 59 | export(log_x)
 60 | export(no_transform)
 61 | export(orderNorm)
 62 | export(required_pkgs)
 63 | export(sqrt_x)
 64 | export(step_bestNormalize)
 65 | export(step_bestNormalize_new)
 66 | export(step_best_normalize)
 67 | export(step_orderNorm)
 68 | export(yeojohnson)
 69 | importFrom(butcher,axe_env)
 70 | importFrom(doParallel,registerDoParallel)
 71 | importFrom(doRNG,"%dorng%")
 72 | importFrom(foreach,"%dopar%")
 73 | importFrom(foreach,foreach)
 74 | importFrom(generics,required_pkgs)
 75 | importFrom(graphics,legend)
 76 | importFrom(graphics,lines)
 77 | importFrom(graphics,plot)
 78 | importFrom(graphics,points)
 79 | importFrom(methods,is)
 80 | importFrom(purrr,map)
 81 | importFrom(recipes,add_step)
 82 | importFrom(recipes,bake)
 83 | importFrom(recipes,check_type)
 84 | importFrom(recipes,ellipse_check)
 85 | importFrom(recipes,is_trained)
 86 | importFrom(recipes,prep)
 87 | importFrom(recipes,print_step)
 88 | importFrom(recipes,rand_id)
 89 | importFrom(recipes,recipe)
 90 | importFrom(recipes,recipes_eval_select)
 91 | importFrom(recipes,sel2char)
 92 | importFrom(recipes,step)
 93 | importFrom(recipes,tidy)
 94 | importFrom(stats,approx)
 95 | importFrom(stats,density)
 96 | importFrom(stats,fitted)
 97 | importFrom(stats,glm)
 98 | importFrom(stats,lm)
 99 | importFrom(stats,median)
100 | importFrom(stats,optimize)
101 | importFrom(stats,pnorm)
102 | importFrom(stats,predict)
103 | importFrom(stats,predict.glm)
104 | importFrom(stats,qnorm)
105 | importFrom(stats,quantile)
106 | importFrom(stats,sd)
107 | importFrom(stats,var)
108 | importFrom(tibble,as_tibble)
109 | importFrom(tibble,tibble)
110 | 


--------------------------------------------------------------------------------
/R/arcsinh_x.R:
--------------------------------------------------------------------------------
  1 | #' arcsinh(x) Transformation
  2 | #' 
  3 | #' @name arcsinh_x
  4 | #' @aliases predict.arcsinh_x
  5 | #'   
  6 | #' @description Perform a arcsinh(x) transformation 
  7 | #' @param x A vector to normalize with with x
  8 | #' @param standardize If TRUE, the transformed values are also centered and
  9 | #'   scaled, such that the transformation attempts a standard normal
 10 | #' @param object an object of class 'arcsinh_x'
 11 | #' @param newdata a vector of data to be (potentially reverse) transformed
 12 | #' @param inverse if TRUE, performs reverse transformation
 13 | #' @param ... additional arguments
 14 | #' @details \code{arcsinh_x} performs an arcsinh transformation in the context of 
 15 | #' bestNormalize, such that it creates a transformation that can be estimated
 16 | #' and applied to new data via the \code{predict} function. 
 17 | #' 
 18 | #' The function is explicitly: log(x + sqrt(x^2 + 1))
 19 | #'  
 20 | #' @return A list of class \code{arcsinh_x} with elements 
 21 | #' \item{x.t}{transformed 
 22 | #'   original data} 
 23 | #'   \item{x}{original data} 
 24 | #'   \item{mean}{mean after transformation but prior to standardization} 
 25 | #'   \item{sd}{sd after transformation but prior to standardization} 
 26 | #'   \item{n}{number of nonmissing observations}
 27 | #'   \item{norm_stat}{Pearson's P / degrees of freedom}
 28 | #'   \item{standardize}{was the transformation standardized}
 29 | #'   
 30 | #'   The \code{predict} function returns the numeric value of the transformation
 31 | #'   performed on new data, and allows for the inverse transformation as well.
 32 | #'   
 33 | #' @examples 
 34 | #' x <- rgamma(100, 1, 1)
 35 | #' 
 36 | #' arcsinh_x_obj <- arcsinh_x(x)
 37 | #' arcsinh_x_obj
 38 | #' p <- predict(arcsinh_x_obj)
 39 | #' x2 <- predict(arcsinh_x_obj, newdata = p, inverse = TRUE)
 40 | #' 
 41 | #' all.equal(x2, x)
 42 | #' 
 43 | #' @importFrom stats sd
 44 | #' @export
 45 | arcsinh_x <- function(x, standardize = TRUE, ...) {
 46 |   stopifnot(is.numeric(x))
 47 |   
 48 |   x.t <- asinh(x)
 49 |   mu <- mean(x.t, na.rm = TRUE)
 50 |   sigma <- sd(x.t, na.rm = TRUE)
 51 |   if (standardize) x.t <- (x.t - mu) / sigma
 52 |   
 53 |   ptest <- nortest::pearson.test(x.t)
 54 |   
 55 |   val <- list(
 56 |     x.t = x.t,
 57 |     x = x,
 58 |     mean = mu,
 59 |     sd = sigma,
 60 |     n = length(x.t) - sum(is.na(x)),
 61 |     norm_stat = unname(ptest$statistic / ptest$df),
 62 |     standardize = standardize
 63 |   )
 64 |   class(val) <- c('arcsinh_x', class(val))
 65 |   val
 66 | }
 67 | 
 68 | #' @rdname arcsinh_x
 69 | #' @method predict arcsinh_x
 70 | #' @export
 71 | predict.arcsinh_x <- function(object, newdata = NULL, inverse = FALSE, ...) {
 72 |   if (is.null(newdata) & !inverse)
 73 |     newdata <- object$x
 74 |   if (is.null(newdata) & inverse)
 75 |     newdata <- object$x.t
 76 |   
 77 |   if (inverse) {
 78 |     if (object$standardize) 
 79 |       newdata <- newdata * object$sd + object$mean
 80 |     newdata <-  sinh(newdata)
 81 |   } else if (!inverse) {
 82 |     newdata <- asinh(newdata)
 83 |     if (object$standardize) 
 84 |       newdata <- (newdata - object$mean) / object$sd
 85 |   }
 86 |   unname(newdata)
 87 | }
 88 | 
 89 | #' @rdname arcsinh_x
 90 | #' @method print arcsinh_x
 91 | #' @export
 92 | print.arcsinh_x <- function(x, ...) {
 93 |   cat(ifelse(x$standardize, "Standardized", "Non-Standardized"),
 94 |       'asinh(x) Transformation with', x$n, 'nonmissing obs.:\n', 
 95 |       'Relevant statistics:\n',
 96 |       '- mean (before standardization) =', x$mean, '\n',
 97 |       '- sd (before standardization) =', x$sd, '\n')
 98 | }
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/R/bestLogConstant.R:
--------------------------------------------------------------------------------
  1 | #' Calculate and perform best normalizing log transformation (experimental)
  2 | #'
  3 | #' @aliases predict.bestLogConstant 
  4 | #'
  5 | #' @description Similar to bestNormalize, this selects the
  6 | #'   best candidate constant for a log transformation on the basis 
  7 | #'   of the Pearson P test statistic for normality. The
  8 | #'   transformation that has the lowest P (calculated on the transformed data)
  9 | #'   is selected. This function is currently in development and may not behave 
 10 | #'   as expected. 
 11 | #'   
 12 | #'   See details for more information.
 13 | #'
 14 | #' @param x A vector to normalize
 15 | #' @param standardize If TRUE, the transformed values are also centered and
 16 | #'   scaled, such that the transformation attempts a standard normal. This will
 17 | #'   not change the normality statistic.
 18 | #' @param newdata a vector of data to be (reverse) transformed
 19 | #' @param inverse if TRUE, performs reverse transformation
 20 | #' @param object an object of class 'bestLogConstant'
 21 | #' @param a (optional) a list of candidate constants to choose from
 22 | #' @param ... additional arguments.
 23 | 
 24 | #' @details \code{bestLogConstant} estimates the optimal normalizing constant 
 25 | #'   for a log transformation. This transformation can be performed on new data, and
 26 | #'   inverted, via the \code{predict} function.
 27 | #'
 28 | #' @return A list of class \code{bestLogConstant} with elements
 29 | #'
 30 | #'   \item{x.t}{transformed original data} \item{x}{original data}
 31 | #'   \item{norm_stats}{Pearson's Pearson's P / degrees of freedom}
 32 | #'   \item{method}{out-of-sample or in-sample, number of folds + repeats}
 33 | #'   \item{chosen_constant}{the chosen constant transformation (of class `log_x`)}
 34 | #'   \item{other_transforms}{the other transformations (of class `log_x`)}
 35 | #'
 36 | #'   The \code{predict} function returns the numeric value of the transformation
 37 | #'   performed on new data, and allows for the inverse transformation as well.
 38 | #'
 39 | #'
 40 | #' @seealso  \code{\link[bestNormalize]{bestNormalize}}, \code{\link{log_x}},
 41 | #' @export
 42 | bestLogConstant <- function(x, 
 43 |                             a,
 44 |                             standardize = TRUE, 
 45 |                           ...) {
 46 |   stopifnot(is.numeric(x))
 47 |   x.t <- list()
 48 | 
 49 |   if(missing(a))
 50 |     a = c(10^(-2:9))
 51 |   
 52 |   args <- lapply(a, function(x) list("a"= x, "standardize" = standardize))
 53 |   
 54 |   method_calls <- rep("log_x", length = length(a))
 55 |   names(args) <- method_calls
 56 |   
 57 |   
 58 |   for(i in 1:length(method_calls)) {
 59 |     args_i <- args[[i]]
 60 |     args_i$x <- x
 61 |     
 62 |     trans_i <- try(do.call(method_calls[i], args_i), silent = TRUE)
 63 |     x.t[[i]] <- trans_i
 64 |   }
 65 |   
 66 |   norm_stat_fn <- function(x) {
 67 |     val <- nortest::pearson.test(x)
 68 |     unname(val$stat/val$df)
 69 |   }
 70 | 
 71 |   norm_stats <- unlist(lapply(x.t, function(x) norm_stat_fn(x$x.t)))
 72 |   names(norm_stats) <- paste0("a=",a)
 73 |   best_idx <- which.min(norm_stats)
 74 |   
 75 |   val <- list(
 76 |     x.t = x.t[[best_idx]]$x.t,
 77 |     x = x,
 78 |     a = a,
 79 |     best_a = a[best_idx],
 80 |     norm_stats = norm_stats,
 81 |     chosen_transform = x.t[[best_idx]],
 82 |     other_transforms = x.t[names(x.t) != best_idx],
 83 |     standardize = standardize
 84 |   )
 85 |   class(val) <- 'bestLogConstant'
 86 |   val
 87 | }
 88 | 
 89 | #' @rdname bestLogConstant
 90 | #' @method predict bestLogConstant
 91 | #' @importFrom stats predict
 92 | #' @export
 93 | predict.bestLogConstant <- function(object, newdata = NULL, inverse = FALSE, ...) {
 94 |   predict(object$chosen_transform, newdata = newdata, inverse = inverse, ...)
 95 | }
 96 | 
 97 | #' @rdname bestLogConstant
 98 | #' @method print bestLogConstant
 99 | #' @export
100 | print.bestLogConstant <- function(x, ...) {
101 |   
102 |   results <- paste0(" - ", x$a, ": ", round(x$norm_stats, 4), collapse="\n")
103 |   
104 |   prettyD <- paste0(
105 |     'Estimated Normality Statistics\n',
106 |     "(Pearson P / df, lower => more normal):\n",
107 |     results, '\n')
108 |   
109 |   cat('Best Normalizing log constant transformation with', x$chosen_transform$n, 'Observations\n',
110 |       prettyD, '\nBased off these, bestLogConstant chose:\n')
111 |   print(x$chosen_transform)
112 | }
113 | 


--------------------------------------------------------------------------------
/R/bestNormalize-package.R:
--------------------------------------------------------------------------------
 1 | #' bestNormalize: Flexibly calculate the best normalizing transformation for a
 2 | #' vector
 3 | #' 
 4 | #' The \code{bestNormalize} package provides several normalizing transformations, and introduces a
 5 | #' new transformation based off of the order statistics, \code{orderNorm}.
 6 | #' Perhaps the most useful function is \code{bestNormalize}, which attempts all
 7 | #' of these transformations and picks the best one based off of a goodness of
 8 | #' fit statistic.
 9 | #' 
10 | #' @docType package
11 | #' @aliases bestNormalize-package
12 | "_PACKAGE"


--------------------------------------------------------------------------------
/R/binarize.R:
--------------------------------------------------------------------------------
  1 | #' Binarize
  2 | #' 
  3 | #' @name binarize
  4 | #' @aliases predict.binarize
  5 | #'   
  6 | #' @description This function will perform a binarizing transformation, which
  7 | #'   could be used as a last resort if the data cannot be adequately normalized.
  8 | #'   This may be useful when accidentally attempting normalization of a binary
  9 | #'   vector (which could occur if implementing bestNormalize in an automated
 10 | #'   fashion).
 11 | #'   
 12 | #'   Note that the transformation is not one-to-one, in contrast to the other
 13 | #'   functions in this package.
 14 | #'   
 15 | #' @param x A vector to binarize
 16 | #' @param location_measure which location measure should be used? can either be
 17 | #'   "median", "mean", "mode", a number, or a function.
 18 | #' @param newdata a vector of data to be (reverse) transformed
 19 | #' @param inverse if TRUE, performs reverse transformation
 20 | #' @param object an object of class 'binarize'
 21 | #' @param ... additional arguments
 22 | #' 
 23 | #' @return A list of class \code{binarize} with elements 
 24 | #'   \item{x.t}{transformed original data} 
 25 | #'   \item{x}{original data} 
 26 | #'   \item{method}{location_measure used for original fitting} 
 27 | #'   \item{location}{estimated location_measure} 
 28 | #'   \item{n}{number of nonmissing observations}
 29 | #'   \item{norm_stat}{Pearson's P / degrees of freedom}
 30 | #'   
 31 | #'   The \code{predict} function with \code{inverse = FALSE} returns the numeric
 32 | #'   value (0 or 1) of the transformation on \code{newdata} (which defaults to
 33 | #'   the original data).
 34 | #'   
 35 | #'   If \code{inverse = TRUE}, since the transform is not 1-1, it will create
 36 | #'   and return a factor that indicates where the original data was cut.
 37 | #'   
 38 | #' @examples 
 39 | #' x <- rgamma(100, 1, 1)
 40 | #' binarize_obj <- binarize(x)
 41 | #' (p <- predict(binarize_obj))
 42 | #' 
 43 | #' predict(binarize_obj, newdata = p, inverse = TRUE)
 44 | #' @importFrom stats density median
 45 | #' @export
 46 | binarize <- function(x, location_measure = 'median') {
 47 |   stopifnot(is.numeric(x))
 48 |   
 49 |   # Check and set location measure
 50 |   if (is.numeric(location_measure)) {
 51 |     loc <- location_measure
 52 |   } else if (is.function(location_measure)) {
 53 |     loc <- location_measure(x)
 54 |   } else  if (location_measure == 'median') {
 55 |     loc <- median(x, na.rm = TRUE)
 56 |   } else if (location_measure == 'mean') {
 57 |     loc <- mean(x, na.rm = TRUE)
 58 |   } else if (location_measure == 'mode') {
 59 |     dens <- density(x[!is.na(x)])
 60 |     loc <- dens$x[which.max(dens$y)]
 61 |   }
 62 |   
 63 |   x.t <- as.numeric(x > loc)
 64 |   
 65 |   ptest <- nortest::pearson.test(x.t)
 66 |   
 67 |   val <- list(
 68 |     x.t = x.t,
 69 |     x = x,
 70 |     method = location_measure,
 71 |     location = loc,
 72 |     n = length(x.t) - sum(is.na(x)),
 73 |     norm_stat = unname(ptest$statistic / ptest$df)
 74 |   )
 75 |   class(val) <- 'binarize'
 76 |   val
 77 | }
 78 | 
 79 | 
 80 | #' @rdname binarize
 81 | #' @method predict binarize
 82 | #' @export
 83 | predict.binarize <- function(object, newdata = NULL, inverse = FALSE, ...) {
 84 |   if (is.null(newdata) & !inverse)
 85 |     newdata <- object$x
 86 |   if (is.null(newdata))
 87 |     newdata <- object$x.t
 88 |   
 89 |   if (!inverse)
 90 |     return(as.numeric(newdata > object$location))
 91 |   prettyLoc <- round(object$location,
 92 |                      getOption('digits', 2))
 93 |   labels <- c(paste0('< ', prettyLoc),
 94 |               paste0('>= ', prettyLoc))
 95 |   factor(newdata, levels = 0:1, labels = labels)
 96 | }
 97 | 
 98 | 
 99 | #' @rdname binarize
100 | #' @method print binarize
101 | #' @export
102 | print.binarize <- function(x, ...) {
103 |   cat('Binarize Transformation with', x$n, 
104 |       'nonmissing obs.\nEstimated Statistic:\n -', x$method,
105 |       '=', x$location)
106 | }
107 | 
108 | 


--------------------------------------------------------------------------------
/R/boxcox.R:
--------------------------------------------------------------------------------
  1 | #' Box-Cox Normalization
  2 | #' 
  3 | #' @name boxcox
  4 | #' @aliases predict.boxcox
  5 | #'   
  6 | #' @description Perform a Box-Cox transformation and center/scale a vector to
  7 | #'   attempt normalization
  8 | #' @param x A vector to normalize with Box-Cox
  9 | #' @param standardize If TRUE, the transformed values are also centered and
 10 | #'   scaled, such that the transformation attempts a standard normal
 11 | #' @param ... Additional arguments that can be passed to the estimation of the
 12 | #'   lambda parameter (lower, upper, epsilon)
 13 | #' @param object an object of class 'boxcox'
 14 | #' @param newdata a vector of data to be (reverse) transformed
 15 | #' @param inverse if TRUE, performs reverse transformation
 16 | #' @details \code{boxcox} estimates the optimal value of lambda for the Box-Cox
 17 | #' transformation. This transformation can be performed on new data, and
 18 | #' inverted, via the \code{predict} function.
 19 | #' 
 20 | #' The function will return an error if a user attempt to transform nonpositive
 21 | #' data.
 22 | #' 
 23 | #' 
 24 | #' @return A list of class \code{boxcox} with elements 
 25 | #' \item{x.t}{transformed 
 26 | #'   original data} 
 27 | #'   \item{x}{original data} 
 28 | #'   \item{mean}{mean after transformation but prior to standardization} 
 29 | #'   \item{sd}{sd after transformation but prior to standardization} 
 30 | #'   \item{lambda}{estimated lambda value for skew transformation} 
 31 | #'   \item{n}{number of nonmissing observations}
 32 | #'   \item{norm_stat}{Pearson's P / degrees of freedom}
 33 | #'   \item{standardize}{was the transformation standardized}
 34 | #'   
 35 | #'   The \code{predict} function returns the numeric value of the transformation
 36 | #'   performed on new data, and allows for the inverse transformation as well.
 37 | #'   
 38 | #' @references Box, G. E. P. and Cox, D. R. (1964) An analysis of
 39 | #'   transformations. Journal of the Royal Statistical Society B, 26, 211-252.
 40 | #'   
 41 | #' @examples 
 42 | #' x <- rgamma(100, 1, 1)
 43 | #' 
 44 | #' bc_obj <- boxcox(x)
 45 | #' bc_obj
 46 | #' p <- predict(bc_obj)
 47 | #' x2 <- predict(bc_obj, newdata = p, inverse = TRUE)
 48 | #' 
 49 | #' all.equal(x2, x)
 50 | #' @seealso  \code{\link[MASS]{boxcox}}
 51 | #' @importFrom stats sd
 52 | #' @export
 53 | boxcox <- function(x, standardize = TRUE, ...) {
 54 |   stopifnot(is.numeric(x))
 55 |   l <- estimate_boxcox_lambda(x, ...)
 56 |   x.t <- boxcox_trans(x, l)
 57 |   mu <- mean(x.t, na.rm = TRUE)
 58 |   sigma <- sd(x.t, na.rm = TRUE)
 59 |   if (standardize) x.t <- (x.t - mu) / sigma
 60 |   
 61 |   ptest <- nortest::pearson.test(x.t)
 62 |   
 63 |   val <- list(
 64 |     x.t = x.t,
 65 |     x = x,
 66 |     mean = mu,
 67 |     sd = sigma,
 68 |     lambda = l,
 69 |     n = length(x.t) - sum(is.na(x)),
 70 |     norm_stat = unname(ptest$statistic / ptest$df),
 71 |     standardize = standardize
 72 |   )
 73 |   class(val) <- c('boxcox', class(val))
 74 |   val
 75 | }
 76 | 
 77 | #' @rdname boxcox
 78 | #' @method predict boxcox
 79 | #' @export
 80 | predict.boxcox <- function(object, newdata = NULL, inverse = FALSE, ...) {
 81 |   if (is.null(newdata) & !inverse)
 82 |     newdata <- object$x
 83 |   if (is.null(newdata) & inverse)
 84 |     newdata <- object$x.t
 85 |   
 86 |   if (inverse) {
 87 |     if (object$standardize) newdata <- newdata * object$sd + object$mean
 88 |     newdata <-  inv_boxcox_trans(newdata, object$lambda)
 89 |   } else if (!inverse) {
 90 |     newdata <- boxcox_trans(newdata, object$lambda)
 91 |     if (object$standardize) newdata <- (newdata - object$mean) / object$sd
 92 |   }
 93 |   unname(newdata)
 94 | }
 95 | 
 96 | #' @rdname boxcox
 97 | #' @method print boxcox
 98 | #' @export
 99 | print.boxcox <- function(x, ...) {
100 |   cat(ifelse(x$standardize, "Standardized", "Non-Standardized"),
101 |       'Box Cox Transformation with', x$n, 'nonmissing obs.:\n', 
102 |       'Estimated statistics:\n',
103 |       '- lambda =', x$lambda, '\n',
104 |       '- mean (before standardization) =', x$mean, '\n',
105 |       '- sd (before standardization) =', x$sd, '\n')
106 | }
107 | 
108 | # Modified version of boxcox from MASS package
109 | #' @importFrom stats lm optimize
110 | estimate_boxcox_lambda <- function(x, lower = -1, upper = 2, eps = .001) {
111 |   n <- length(x)
112 |   ccID <- !is.na(x)
113 |   x <- x[ccID]
114 |   
115 |   if (any(x <= 0))
116 |     stop("x must be positive")
117 |   
118 |   log_x <- log(x)
119 |   xbar <- exp(mean(log_x))
120 |   
121 |   boxcox_loglik <- function(lambda) {
122 |     gm0 <- xbar ^ (lambda - 1)
123 |     z <- if (abs(lambda) <= eps)
124 |       log_x / gm0
125 |     else
126 |       (x ^ lambda - 1) / (lambda * gm0)
127 |     var_z <- var(z) * (n - 1) / n
128 |     - .5 * n * log(var_z)
129 |   }
130 |   
131 |   results <- optimize(boxcox_loglik, lower = lower, 
132 |                       upper = upper, maximum = TRUE,
133 |                       tol = .0001)
134 |   
135 |   results$maximum
136 | }
137 | 
138 | # Internal transformation functions
139 | boxcox_trans <- function(x, lambda, eps = .001) {
140 |   if (lambda < 0)
141 |     x[x < 0] <- NA
142 |   if (abs(lambda) < eps)
143 |     val <- log(x)
144 |   else
145 |     val <- (sign(x) * abs(x) ^ lambda - 1) / lambda
146 |   val
147 | }
148 | 
149 | inv_boxcox_trans <- function(x, lambda, eps = .001) {
150 |   if (lambda < 0)
151 |     x[x > -1 / lambda] <- NA
152 |   if (abs(lambda) < eps)
153 |     val <- exp(x)
154 |   else {
155 |     x <- x * lambda + 1
156 |     val <- sign(x) * abs(x) ^ (1 / lambda)
157 |   }
158 |   val
159 | }
160 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Prices of 6,283 cars listed on Autotrader
 2 | #'
 3 | #' A dataset containing the prices and other attributes of over 6000 cars in the Minneapolis area. 
 4 | #'
 5 | #' @format A data frame with 6283 rows and 10 variables:
 6 | #' \describe{
 7 | #'   \item{price}{price, in US dollars}
 8 | #'   \item{Car_Info}{Raw description from website}
 9 | #'   \item{Link}{hyperlink to listing (must be appended to https://www.autotrader.com/)}
10 | #'   \item{Make}{Car manufacturer}
11 | #'   \item{Year}{Year car manufactured}
12 | #'   \item{Location}{Location of listing}
13 | #'   \item{Radius}{Radius chosen for search}
14 | #'   \item{mileage}{mileage on vehicle}
15 | #'   \item{status}{used/new/certified}
16 | #'   \item{model}{make and model, separated by space}
17 | #' }
18 | #' @source \url{https://www.autotrader.com/}
19 | "autotrader"


--------------------------------------------------------------------------------
/R/double_reverse_log.R:
--------------------------------------------------------------------------------
  1 | #' Double Reverse Log(x + a) Transformation
  2 | #' 
  3 | #' @name double_reverse_log
  4 | #' @aliases predict.double_reverse_log
  5 | #'   
  6 | #' @description First reverses scores, then perform a log_b(x) 
  7 | #' normalization transformation, and then reverses scores again.
  8 | #' 
  9 | #' @param x A vector to normalize with with x
 10 | #' @param b The base of the log (defaults to 10)
 11 | #' @param standardize If TRUE, the transformed values are also centered and
 12 | #'   scaled, such that the transformation attempts a standard normal
 13 | #' @param eps The cushion for the transformation range (defaults to 10 percent)
 14 | #' @param warn Should a warning result from infinite values?
 15 | #' @param object an object of class 'double_reverse_log'
 16 | #' @param newdata a vector of data to be (potentially reverse) transformed
 17 | #' @param inverse if TRUE, performs reverse transformation
 18 | #' @param ... additional arguments
 19 | #' @details \code{double_reverse_log} performs a simple log transformation in the 
 20 | #' context of bestNormalize, such that it creates a transformation that can be 
 21 | #' estimated and applied to new data via the \code{predict} function. The parameter 
 22 | #' a is essentially estimated by the training set by default (estimated as the 
 23 | #' minimum possible to some extent epsilon), while the base must be specified 
 24 | #' beforehand.
 25 | #'  
 26 | #' @return A list of class \code{double_reverse_log} with elements 
 27 | #' \item{x.t}{transformed 
 28 | #'   original data} 
 29 | #'   \item{x}{original data} 
 30 | #'   \item{mean}{mean after transformation but prior to standardization} 
 31 | #'   \item{sd}{sd after transformation but prior to standardization} 
 32 | #'   \item{b}{estimated base b value} 
 33 | #'   \item{n}{number of nonmissing observations}
 34 | #'   \item{norm_stat}{Pearson's P / degrees of freedom}
 35 | #'   \item{standardize}{was the transformation standardized}
 36 | #'   
 37 | #'   The \code{predict} function returns the numeric value of the transformation
 38 | #'   performed on new data, and allows for the inverse transformation as well.
 39 | #'   
 40 | #' @examples 
 41 | #' x <- rgamma(100, 1, 1)
 42 | #' 
 43 | #' double_reverse_log_obj <- double_reverse_log(x)
 44 | #' double_reverse_log_obj
 45 | #' p <- predict(double_reverse_log_obj)
 46 | #' x2 <- predict(double_reverse_log_obj, newdata = p, inverse = TRUE)
 47 | #' 
 48 | #' all.equal(x2, x)
 49 | #' 
 50 | #' @importFrom stats sd
 51 | #' @export
 52 | double_reverse_log  <- function(x, 
 53 |                                 b = 10, 
 54 |                                 standardize = TRUE, 
 55 |                                 eps = diff(range(x, na.rm = TRUE))/10,
 56 |                                 warn = TRUE,
 57 |                                 ...) {
 58 |   stopifnot(is.numeric(x))
 59 |   
 60 |   # Calculated padded max + min of x
 61 |   max_x <- max(x, na.rm = TRUE) + eps
 62 |   min_x <- min(x, na.rm = TRUE) - eps
 63 |   
 64 |   # calculate padded max(x.t)
 65 |   max_xt <- log(max_x - min_x, base = b)
 66 |   
 67 |   # Perform transformation (reverse, log, reverse)
 68 |   x_rev <- max_x - x
 69 |   x.t_rev <- log(x_rev, base = b) 
 70 |   x.t <- max_xt - x.t_rev 
 71 |   
 72 |   stopifnot(!all(infinite_idx <- is.infinite(x.t)))
 73 |   if(any(infinite_idx)) {
 74 |     warning("Some values are infinite")
 75 |     standardize <- FALSE
 76 |   }
 77 |   
 78 |   mu <- mean(x.t, na.rm = TRUE)
 79 |   sigma <- sd(x.t, na.rm = TRUE)
 80 |   if (standardize) x.t <- (x.t - mu) / sigma
 81 |   
 82 |   ptest <- nortest::pearson.test(x.t)
 83 |   
 84 |   val <- list(
 85 |     x.t = x.t,
 86 |     x = x,
 87 |     mean = mu,
 88 |     sd = sigma,
 89 |     b = b,
 90 |     eps = eps,
 91 |     n = length(x.t) - sum(is.na(x)),
 92 |     norm_stat = unname(ptest$statistic / ptest$df),
 93 |     standardize = standardize,
 94 |     max_x = max_x,
 95 |     min_x = min_x,
 96 |     max_xt = max_xt
 97 |   )
 98 |   class(val) <- c('double_reverse_log', class(val))
 99 |   val
100 |   
101 | }
102 | 
103 | #' @rdname double_reverse_log
104 | #' @method predict double_reverse_log
105 | #' @export
106 | predict.double_reverse_log <- function(object, newdata = NULL, inverse = FALSE, ...) {
107 |   if (is.null(newdata) & !inverse)
108 |     newdata <- object$x
109 |   if (is.null(newdata) & inverse){
110 |     newdata <- object$x.t
111 |   }
112 |   if (inverse) {
113 |     if (object$standardize) {
114 |       newdata <- newdata * object$sd + object$mean
115 |     }
116 |     
117 |     # Perform transformation
118 |     new_xt_rev <- object$max_xt - newdata
119 |     new_x_rev <- object$b^new_xt_rev
120 |     newdata <- object$max_x - new_x_rev
121 | 
122 |     } else if (!inverse) {
123 |       
124 |       new_x_rev <- object$max_x - newdata 
125 |       new_xt_rev <-  log(new_x_rev, object$b)
126 |       newdata <- object$max_xt - new_xt_rev 
127 |       
128 |     if (object$standardize) {
129 |       newdata <- (newdata - object$mean) / object$sd
130 |     }
131 |   }
132 |   
133 |   unname(newdata)
134 | }
135 | 
136 | #' @rdname double_reverse_log
137 | #' @method print double_reverse_log
138 | #' @export
139 | print.double_reverse_log <- function(x, ...) {
140 |   cat(ifelse(x$standardize, "Standardized", "Non-Standardized"),
141 |       'double reversed Log_b(x + a) Transformation with', x$n, 'nonmissing obs.:\n', 
142 |       'Relevant statistics:\n',
143 |       '- a =', x$a, '\n',
144 |       '- b =', x$b, '\n',
145 |       '- max(x) =', x$max_x, '; min(x) =', x$min_x, '\n',
146 |       '- mean (before standardization) =', x$mean, '\n',
147 |       '- sd (before standardization) =', x$sd, '\n')
148 | }
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------
/R/exp_x.R:
--------------------------------------------------------------------------------
  1 | #' exp(x) Transformation
  2 | #' 
  3 | #' @name exp_x
  4 | #' @aliases predict.exp_x
  5 | #'   
  6 | #' @description Perform a exp(x) transformation 
  7 | #' @param x A vector to normalize with with x
  8 | #' @param standardize If TRUE, the transformed values are also centered and
  9 | #'   scaled, such that the transformation attempts a standard normal
 10 | #' @param warn Should a warning result from infinite values?
 11 | #' @param object an object of class 'exp_x'
 12 | #' @param newdata a vector of data to be (potentially reverse) transformed
 13 | #' @param inverse if TRUE, performs reverse transformation
 14 | #' @param ... additional arguments
 15 | #' @details \code{exp_x} performs a simple exponential transformation in the context of 
 16 | #' bestNormalize, such that it creates a transformation that can be estimated
 17 | #' and applied to new data via the \code{predict} function. 
 18 | #'  
 19 | #' @return A list of class \code{exp_x} with elements 
 20 | #' \item{x.t}{transformed 
 21 | #'   original data} 
 22 | #'   \item{x}{original data} 
 23 | #'   \item{mean}{mean after transformation but prior to standardization} 
 24 | #'   \item{sd}{sd after transformation but prior to standardization} 
 25 | #'   \item{n}{number of nonmissing observations}
 26 | #'   \item{norm_stat}{Pearson's P / degrees of freedom}
 27 | #'   \item{standardize}{was the transformation standardized}
 28 | #'   
 29 | #'   The \code{predict} function returns the numeric value of the transformation
 30 | #'   performed on new data, and allows for the inverse transformation as well.
 31 | #'   
 32 | #' @examples 
 33 | #' x <- rgamma(100, 1, 1)
 34 | #' 
 35 | #' exp_x_obj <- exp_x(x)
 36 | #' exp_x_obj
 37 | #' p <- predict(exp_x_obj)
 38 | #' x2 <- predict(exp_x_obj, newdata = p, inverse = TRUE)
 39 | #' 
 40 | #' all.equal(x2, x)
 41 | #' 
 42 | #' @importFrom stats sd
 43 | #' @export
 44 | exp_x <- function(x, standardize = TRUE, warn = TRUE, ...) {
 45 |   stopifnot(is.numeric(x))
 46 |   
 47 |   x.t <- exp(x)
 48 |   mu <- mean(x.t, na.rm = TRUE)
 49 |   sigma <- sd(x.t, na.rm = TRUE)
 50 |   infinite_idx <- is.infinite(x.t)
 51 |   if (standardize) x.t <- (x.t - mu) / sigma
 52 | 
 53 |   if(any(infinite_idx)) {
 54 |     stop("infinite post-transformation values")
 55 |   }
 56 |   
 57 |   ptest <- nortest::pearson.test(x.t)
 58 |   
 59 |   val <- list(
 60 |     x.t = x.t,
 61 |     x = x,
 62 |     mean = mu,
 63 |     sd = sigma,
 64 |     n = length(x.t) - sum(is.na(x)),
 65 |     norm_stat = unname(ptest$statistic / ptest$df),
 66 |     standardize = standardize
 67 |   )
 68 |   class(val) <- c('exp_x', class(val))
 69 |   val
 70 | }
 71 | 
 72 | #' @rdname exp_x
 73 | #' @method predict exp_x
 74 | #' @export
 75 | predict.exp_x <- function(object, newdata = NULL, inverse = FALSE, ...) {
 76 |   if (is.null(newdata) & !inverse)
 77 |     newdata <- object$x
 78 |   if (is.null(newdata) & inverse)
 79 |     newdata <- object$x.t
 80 |   
 81 |   if (inverse) {
 82 |     if (object$standardize) 
 83 |       newdata <- newdata * object$sd + object$mean
 84 |     newdata <-  log(newdata)
 85 |   } else if (!inverse) {
 86 |     newdata <- exp(newdata)
 87 |     if (object$standardize) 
 88 |       newdata <- (newdata - object$mean) / object$sd
 89 |   }
 90 |   unname(newdata)
 91 | }
 92 | 
 93 | #' @rdname exp_x
 94 | #' @method print exp_x
 95 | #' @export
 96 | print.exp_x <- function(x, ...) {
 97 |   cat(ifelse(x$standardize, "Standardized", "Non-Standardized"),
 98 |       'exp(x) Transformation with', x$n, 'nonmissing obs.:\n', 
 99 |       'Relevant statistics:\n',
100 |       '- mean (before standardization) =', x$mean, '\n',
101 |       '- sd (before standardization) =', x$sd, '\n')
102 | }
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/R/lambert.R:
--------------------------------------------------------------------------------
  1 | #' Lambert W x F Normalization
  2 | #' 
  3 | #' @name lambert
  4 | #' @aliases predict.lambert
  5 | #'   
  6 | #' @description Perform Lambert's W x F transformation and center/scale a vector
  7 | #'   to attempt normalization via the \code{LambertW} package.
  8 | #' @param x A vector to normalize with Box-Cox
  9 | #' @param type a character indicating which transformation to perform (options 
 10 | #'   are "s", "h", and "hh", see details)
 11 | #' @param standardize If TRUE, the transformed values are also centered and
 12 | #'   scaled, such that the transformation attempts a standard normal
 13 | #' @param ... Additional arguments that can be passed to the 
 14 | #'   LambertW::Gaussianize function
 15 | #' @param newdata a vector of data to be (reverse) transformed
 16 | #' @param inverse if TRUE, performs reverse transformation
 17 | #' @param object an object of class 'lambert'
 18 | #' @param warn should the function show warnings
 19 | #' 
 20 | #' @details \code{lambert} uses the \code{LambertW} package to estimate a 
 21 | #'   normalizing (or "Gaussianizing") transformation. This transformation can be
 22 | #'   performed on new data, and inverted, via the \code{predict} function.
 23 | #'   
 24 | #'   NOTE: The type = "s" argument is the only one that does the 1-1 transform
 25 | #'   consistently, and so it is the only method currently used in
 26 | #'   \code{bestNormalize()}. Use type = "h" or type = 'hh' at risk of not having
 27 | #'   this estimate 1-1 transform. These alternative types are effective when the
 28 | #'   data has exceptionally heavy tails, e.g. the Cauchy distribution.
 29 | #'   
 30 | #'   Additionally, sometimes (depending on the distribution) this method will be
 31 | #'   unable to extrapolate beyond the observed bounds. In these cases, NaN is
 32 | #'   returned.
 33 | #'   
 34 | #' @return A list of class \code{lambert} with elements 
 35 | #'   \item{x.t}{transformed original data} 
 36 | #'   \item{x}{original data} 
 37 | #'   \item{mean}{mean after transformation but prior to standardization} 
 38 | #'   \item{sd}{sd after transformation but prior to standardization} 
 39 | #'   \item{tau.mat}{estimated parameters of LambertW::Gaussianize} 
 40 | #'   \item{n}{number of nonmissing observations}
 41 | #'   \item{norm_stat}{Pearson's P / degrees of freedom}
 42 | #'   \item{standardize}{was the transformation standardized}
 43 | #'   
 44 | #' The \code{predict} function returns the numeric value of the transformation 
 45 | #' performed on new data, and allows for the inverse transformation as well.
 46 | #' 
 47 | #' @references Georg M. Goerg (2016). LambertW: An R package for Lambert W x F 
 48 | #'   Random Variables. R package version 0.6.4.
 49 | #'   
 50 | #'   Georg M. Goerg (2011): Lambert W random variables - a new family of 
 51 | #'   generalized skewed distributions with applications to risk estimation. 
 52 | #'   Annals of Applied Statistics 3(5). 2197-2230.
 53 | #'   
 54 | #'   Georg M. Goerg (2014): The Lambert Way to Gaussianize heavy-tailed data 
 55 | #'   with the inverse of Tukey's h transformation as a special case. The 
 56 | #'   Scientific World Journal.
 57 | #'   
 58 | #' @examples 
 59 | #' \dontrun{
 60 | #' x <- rgamma(100, 1, 1)
 61 | #' 
 62 | #' lambert_obj <- lambert(x)
 63 | #' lambert_obj
 64 | #' p <- predict(lambert_obj)
 65 | #' x2 <- predict(lambert_obj, newdata = p, inverse = TRUE)
 66 | #' 
 67 | #' all.equal(x2, x)
 68 | #' }
 69 | #' 
 70 | #' @seealso  \code{\link[LambertW]{Gaussianize}}
 71 | #' @importFrom stats sd
 72 | #' @export
 73 | lambert <- function(x, type = 's', standardize = TRUE, warn = FALSE, ...) {
 74 |   stopifnot(is.numeric(x))
 75 |   na_idx <- is.na(x)
 76 |   x_complete <- x[!na_idx]
 77 |   obj <- unname(LambertW::Gaussianize(x_complete, type = type, return.tau.mat = TRUE, ...))
 78 |   
 79 |   x.t <- x
 80 |   x.t[!na_idx] <- obj[[1]]
 81 |   tau.mat <- obj[[2]]
 82 |   
 83 |   mu <- mean(x.t, na.rm = TRUE)
 84 |   sigma <- sd(x.t, na.rm = TRUE)
 85 |   
 86 |   if (standardize) x.t <- (x.t - mu) / sigma
 87 |   
 88 |   attributes(x.t) <- NULL
 89 |   
 90 |   ptest <- nortest::pearson.test(x.t)
 91 |   
 92 |   val <- list(
 93 |     x.t = unname(x.t),
 94 |     x = x,
 95 |     mean = mu,
 96 |     sd = sigma,
 97 |     tau.mat = tau.mat,
 98 |     n = length(x.t) - sum(na_idx),
 99 |     type = type,
100 |     norm_stat = unname(ptest$statistic / ptest$df),
101 |     standardize = standardize
102 |   )
103 |   
104 |   class(val) <- 'lambert'
105 |   val
106 | }
107 | 
108 | #' @rdname lambert
109 | #' @method predict lambert
110 | #' @export
111 | predict.lambert <- function(object,
112 |                             newdata = NULL,
113 |                             inverse = FALSE, 
114 |                             ...) {
115 |   if (is.null(newdata) & !inverse)
116 |     newdata <- object$x
117 |   if (is.null(newdata) & inverse)
118 |     newdata <- object$x.t
119 |   
120 |   if (inverse & object$standardize)
121 |     newdata <- newdata * object$sd + object$mean
122 |   
123 |   stopifnot(is.numeric(newdata))
124 |   
125 |   na_idx <- is.na(newdata)
126 | 
127 |   newdata[!na_idx] <- LambertW::Gaussianize(
128 |     as.matrix(newdata[!na_idx]),
129 |     type = object$type,
130 |     tau.mat = object$tau.mat,
131 |     inverse = inverse
132 |   )
133 |   
134 |   if (!inverse & object$standardize)
135 |     newdata <- (newdata - object$mean) / object$sd
136 |   
137 |   attributes(newdata) <- NULL
138 |   unname(newdata)
139 | }
140 | 
141 | #' @rdname lambert
142 | #' @method print lambert
143 | #' @export
144 | print.lambert <- function(x, ...) {
145 |   prettyTau <- apply(cbind('- ',
146 |     rownames(x$tau.mat)[-c(1:2)], ' = ',
147 |     round(x$tau.mat[-c(1:2)],4), '\n'
148 |   ), 1, paste, collapse = '')
149 | 
150 |   
151 |   cat(ifelse(x$standardize, "Standardized", "Non-Standardized"),
152 |       'Lambert WxF Transformation of type', x$type, 
153 |       'with', x$n, 'nonmissing obs.:\n', 
154 |       'Estimated statistics:\n',
155 |       prettyTau,
156 |       '- mean (before standardization) =', x$mean, '\n',
157 |       '- sd (before standardization) =', x$sd, '\n')
158 | }
159 | 


--------------------------------------------------------------------------------
/R/log_x.R:
--------------------------------------------------------------------------------
  1 | #' Log(x + a) Transformation
  2 | #' 
  3 | #' @name log_x
  4 | #' @aliases predict.log_x
  5 | #'   
  6 | #' @description Perform a log_b (x+a) normalization transformation 
  7 | #' @param x A vector to normalize with with x
  8 | #' @param standardize If TRUE, the transformed values are also centered and
  9 | #'   scaled, such that the transformation attempts a standard normal
 10 | #' @param a The constant to add to x (defaults to max(0, -min(x) + eps));
 11 | #'    see \code{bestLogConstant} 
 12 | #' @param b The base of the log (defaults to 10)
 13 | #' @param eps The allowed error in the expression for the selected a
 14 | #' @param warn Should a warning result from infinite values?
 15 | #' @param object an object of class 'log_x'
 16 | #' @param newdata a vector of data to be (potentially reverse) transformed
 17 | #' @param inverse if TRUE, performs reverse transformation
 18 | #' @param ... additional arguments
 19 | #' @details \code{log_x} performs a simple log transformation in the context of 
 20 | #' bestNormalize, such that it creates a transformation that can be estimated
 21 | #' and applied to new data via the \code{predict} function. The parameter a is
 22 | #' essentially estimated by the training set by default (estimated as the minimum
 23 | #' possible to some extent epsilon), while the base must be 
 24 | #' specified beforehand.
 25 | #'  
 26 | #' @return A list of class \code{log_x} with elements 
 27 | #' \item{x.t}{transformed 
 28 | #'   original data} 
 29 | #'   \item{x}{original data} 
 30 | #'   \item{mean}{mean after transformation but prior to standardization} 
 31 | #'   \item{sd}{sd after transformation but prior to standardization} 
 32 | #'   \item{a}{estimated a value} 
 33 | #'   \item{b}{estimated base b value} 
 34 | #'   \item{n}{number of nonmissing observations}
 35 | #'   \item{norm_stat}{Pearson's P / degrees of freedom}
 36 | #'   \item{standardize}{was the transformation standardized}
 37 | #'   
 38 | #'   The \code{predict} function returns the numeric value of the transformation
 39 | #'   performed on new data, and allows for the inverse transformation as well.
 40 | #'   
 41 | #' @examples 
 42 | #' x <- rgamma(100, 1, 1)
 43 | #' 
 44 | #' log_x_obj <- log_x(x)
 45 | #' log_x_obj
 46 | #' p <- predict(log_x_obj)
 47 | #' x2 <- predict(log_x_obj, newdata = p, inverse = TRUE)
 48 | #' 
 49 | #' all.equal(x2, x)
 50 | #' 
 51 | #' @importFrom stats sd
 52 | #' @export
 53 | log_x <- function(x, a = NULL, b = 10, standardize = TRUE, eps = .001, warn = TRUE, ...) {
 54 |   stopifnot(is.numeric(x))
 55 |   
 56 |   min_a <- max(0, -(min(x, na.rm = TRUE) - eps))
 57 |   if(!length(a)) 
 58 |     a <- min_a
 59 |   if(a < min_a) {
 60 |     warning("Setting a <  max(0, -(min(x) - eps)) can lead to transformation issues",
 61 |             "Standardize set to FALSE")
 62 |     standardize <- FALSE
 63 |   }
 64 |    
 65 |   
 66 |   x.t <- log(x + a, base = b)
 67 |   
 68 |   stopifnot(!all(infinite_idx <- is.infinite(x.t)))
 69 |   if(any(infinite_idx)) {
 70 |     warning("Some values are infinite")
 71 |     standardize <- FALSE
 72 |   }
 73 |   
 74 |   mu <- mean(x.t, na.rm = TRUE)
 75 |   sigma <- sd(x.t, na.rm = TRUE)
 76 |   if (standardize) x.t <- (x.t - mu) / sigma
 77 |   
 78 |   ptest <- nortest::pearson.test(x.t)
 79 |   
 80 |   val <- list(
 81 |     x.t = x.t,
 82 |     x = x,
 83 |     mean = mu,
 84 |     sd = sigma,
 85 |     a = a,
 86 |     b = b,
 87 |     eps = eps,
 88 |     n = length(x.t) - sum(is.na(x)),
 89 |     norm_stat = unname(ptest$statistic / ptest$df),
 90 |     standardize = standardize
 91 |   )
 92 |   class(val) <- c('log_x', class(val))
 93 |   val
 94 | }
 95 | 
 96 | #' @rdname log_x
 97 | #' @method predict log_x
 98 | #' @export
 99 | predict.log_x <- function(object, newdata = NULL, inverse = FALSE, ...) {
100 |   if (is.null(newdata) & !inverse)
101 |     newdata <- object$x
102 |   if (is.null(newdata) & inverse)
103 |     newdata <- object$x.t
104 |   
105 |   if (inverse) {
106 |     if (object$standardize) 
107 |       newdata <- newdata * object$sd + object$mean
108 |     newdata <-  object$b^newdata - object$a
109 |   } else if (!inverse) {
110 |     newdata <- log(newdata + object$a, object$b)
111 |     if (object$standardize) 
112 |       newdata <- (newdata - object$mean) / object$sd
113 |   }
114 |   unname(newdata)
115 | }
116 | 
117 | #' @rdname log_x
118 | #' @method print log_x
119 | #' @export
120 | print.log_x <- function(x, ...) {
121 |   cat(ifelse(x$standardize, "Standardized", "Non-Standardized"),
122 |       'Log_b(x + a) Transformation with', x$n, 'nonmissing obs.:\n', 
123 |       'Relevant statistics:\n',
124 |       '- a =', x$a, '\n',
125 |       '- b =', x$b, '\n',
126 |       '- mean (before standardization) =', x$mean, '\n',
127 |       '- sd (before standardization) =', x$sd, '\n')
128 | }
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/R/no_transform.R:
--------------------------------------------------------------------------------
  1 | #' Identity transformation and center/scale transform
  2 | #'
  3 | #' @name no_transform
  4 | #' @aliases predict.no_transform center_scale 
  5 | #'
  6 | #' @description Perform an identity transformation. Admittedly it seems odd to
  7 | #'   have a dedicated function to essentially do I(x), but it makes sense to
  8 | #'   keep the same syntax as the other transformations so it plays nicely
  9 | #'   with them. As a benefit, the bestNormalize function will also show
 10 | #'   a comparable normalization statistic for the untransformed data. If 
 11 | #'   \code{standardize == TRUE}, \code{center_scale} passes to bestNormalize instead.
 12 | #' @param x A vector 
 13 | #' @param warn Should a warning result from infinite values?
 14 | #' @param object an object of class 'no_transform'
 15 | #' @param newdata a vector of data to be (potentially reverse) transformed
 16 | #' @param inverse if TRUE, performs reverse transformation
 17 | #' @param ... additional arguments
 18 | #' @details \code{no_transform} creates a identity transformation object 
 19 | #'   that can be applied to new data via the \code{predict} function.
 20 | #'
 21 | #' @return A list of class \code{no_transform} with elements
 22 | #'   \item{x.t}{transformed original data} 
 23 | #'   \item{x}{original data}
 24 | #'   \item{n}{number of nonmissing observations}
 25 | #'   \item{norm_stat}{Pearson's P / degrees of freedom} 
 26 | #'
 27 | #'   The \code{predict} function returns the numeric value of the transformation
 28 | #'   performed on new data, and allows for the inverse transformation as well.
 29 | #'   
 30 | #' @examples
 31 | #' x <- rgamma(100, 1, 1)
 32 | #'
 33 | #' no_transform_obj <- no_transform(x)
 34 | #' no_transform_obj
 35 | #' p <- predict(no_transform_obj)
 36 | #' x2 <- predict(no_transform_obj, newdata = p, inverse = TRUE)
 37 | #'
 38 | #' all.equal(x2, x)
 39 | #'
 40 | #' @importFrom stats sd
 41 | #' @export
 42 | no_transform <- function(x, warn = TRUE, ...) {
 43 |   stopifnot(is.numeric(x))
 44 |   
 45 |   x.t <- x
 46 |   
 47 |   if (all(infinite_idx <- is.infinite(x.t))) {
 48 |     stop("Transformation infinite for all x")
 49 |   }
 50 |   if(any(infinite_idx) & warn) {
 51 |     warning("Some values (but not all) transformed values are infinite")
 52 |   }
 53 |   
 54 |   mu <- mean(x.t, na.rm = TRUE)
 55 |   sigma <- sd(x.t, na.rm = TRUE)
 56 | 
 57 |   ptest <- nortest::pearson.test(x.t)
 58 |   
 59 |   val <- list(
 60 |     x.t = x.t,
 61 |     x = x,
 62 |     mean = mu,
 63 |     sd = sigma,
 64 |     n = length(x.t) - sum(is.na(x)),
 65 |     norm_stat = unname(ptest$statistic / ptest$df)
 66 |   )
 67 |   class(val) <- c('no_transform', class(val))
 68 |   val
 69 | }
 70 | 
 71 | #' @rdname no_transform
 72 | #' @method predict no_transform
 73 | #' @export
 74 | predict.no_transform <- function(object, newdata = NULL, inverse = FALSE, ...) {
 75 |   if (is.null(newdata) & !inverse)
 76 |     newdata <- object$x
 77 |   if (is.null(newdata) & inverse)
 78 |     newdata <- object$x.t
 79 |   
 80 |   if (inverse) {
 81 |     newdata <-  newdata
 82 |   } else if (!inverse) {
 83 |     newdata <- newdata
 84 |   }
 85 |   unname(newdata)
 86 | }
 87 | 
 88 | #' @rdname no_transform
 89 | #' @method print no_transform
 90 | #' @export
 91 | print.no_transform <- function(x, ...) {
 92 |   cat('I(x) Transformation with', x$n, 'nonmissing obs.\n')
 93 | }
 94 | 
 95 | #' @rdname no_transform
 96 | #' @importFrom stats sd
 97 | #' @export
 98 | center_scale <- function(x, warn = TRUE, ...) {
 99 |   stopifnot(is.numeric(x))
100 |   
101 |   x.t <- x
102 |   
103 |   if (all(infinite_idx <- is.infinite(x.t))) {
104 |     stop("Transformation infinite for all x")
105 |   }
106 |   if(any(infinite_idx) & warn) {
107 |     warning("Some values (but not all) transformed values are infinite")
108 |   }
109 |   
110 |   mu <- mean(x.t, na.rm = TRUE)
111 |   sigma <- sd(x.t, na.rm = TRUE)
112 |   
113 |   x.t <- (x.t - mu) / sigma
114 |   
115 |   ptest <- nortest::pearson.test(x.t)
116 |   
117 |   val <- list(
118 |     x.t = x.t,
119 |     x = x,
120 |     mean = mu,
121 |     sd = sigma,
122 |     n = length(x.t) - sum(is.na(x)),
123 |     norm_stat = unname(ptest$statistic / ptest$df)
124 |   )
125 |   class(val) <- c('center_scale', class(val))
126 |   val
127 | }
128 | 
129 | #' @rdname no_transform
130 | #' @method predict center_scale
131 | #' @export
132 | predict.center_scale <- function(object, newdata = NULL, inverse = FALSE, ...) {
133 |   if (is.null(newdata) & !inverse)
134 |     newdata <- object$x
135 |   if (is.null(newdata) & inverse)
136 |     newdata <- object$x.t
137 |   
138 |   if (inverse) {
139 |     newdata <- newdata * object$sd + object$mean
140 |   } else if (!inverse) {
141 |     newdata <- (newdata - object$mean) / object$sd
142 |   }
143 |   unname(newdata)
144 | }
145 | 
146 | #' @rdname no_transform
147 | #' @method print center_scale
148 | #' @export
149 | print.center_scale <- function(x, ...) {
150 |   cat('center_scale(x) Transformation with', x$n, 'nonmissing obs.\n', 
151 |       'Estimated statistics:\n',
152 |       '- mean (before standardization) =', x$mean, '\n',
153 |       '- sd (before standardization) =', x$sd, '\n')
154 | }
155 | 
156 | #' @rdname no_transform
157 | #' @param x A `no_transform` object.
158 | #' @param ... not used 
159 | #' @importFrom tibble tibble
160 | #' @export
161 | tidy.no_transform <- function(x, ...) {
162 |   value <- tibble(
163 |     "transform" = c("no_transform"),
164 |     "norm_stat" = x$norm_stat, 
165 |     "chosen" = 1
166 |   )
167 | }
168 | 


--------------------------------------------------------------------------------
/R/plots.R:
--------------------------------------------------------------------------------
  1 | #' Transformation plotting
  2 | #'
  3 | #' Plots transformation functions for objects produced by the bestNormalize
  4 | #' package
  5 | #'
  6 | #' @name plot.bestNormalize
  7 | #' @aliases plot.orderNorm plot.boxcox plot.yeojohnson plot.lambert
  8 | #'
  9 | #' @details The plots produced by the individual transformations are simply
 10 | #'   plots of the original values by the newly transformed values, with a line
 11 | #'   denoting where transformations would take place for new data.
 12 | #'
 13 | #'   For the bestNormalize object, this plots each of the possible
 14 | #'   transformations run by the original call to bestNormalize. The first
 15 | #'   argument in the "cols" parameter refers to the color of the chosen
 16 | #'   transformation.
 17 | #'   
 18 | #' @rdname plot.bestNormalize
 19 | #' @param x a fitted transformation
 20 | #' @param inverse if TRUE, plots the inverse transformation
 21 | #' @param bounds a vector of bounds to plot for the transformation
 22 | #' @param cols a vector of colors to use for the transforms (see details)
 23 | #' @param methods a vector of transformations to plot
 24 | #' @param leg_loc the location of the legend on the plot
 25 | #' @param ... further parameters to be passed to \code{plot} and \code{lines}
 26 | #' @importFrom graphics legend lines plot points
 27 | #' @export
 28 | plot.bestNormalize <- function(x, inverse = FALSE, bounds = NULL, 
 29 |                                cols = NULL, 
 30 |                                methods = NULL, 
 31 |                                leg_loc = 'top', 
 32 |                                ...) {
 33 |   
 34 |   if(!inverse) {
 35 |     xvals <- x$x 
 36 |     x_t <- x$x.t
 37 |   } else {
 38 |     xvals <- x$x.t
 39 |     x_t <- x$x
 40 |   }
 41 |   
 42 |   if(is.null(bounds)) {
 43 |     xx <- seq(min(xvals, na.rm = TRUE), max(xvals, na.rm = TRUE), length = 1000)
 44 |   } else 
 45 |     xx <- seq(min(bounds), max(bounds), length = 1000)
 46 |   
 47 |   yy <- predict(x, newdata = xx, inverse = inverse, warn = FALSE)
 48 |   
 49 |   ## Other methods to plot
 50 |   if(is.null(methods)) {
 51 |     methods <- c(names(x$other_transforms))
 52 |     ys <- lapply(x$other_transforms, function(obj) {
 53 |       predict(obj, newdata = xx, inverse = inverse, warn = FALSE)
 54 |     })
 55 |   } else {
 56 |     methods_found <- intersect(names(x$other_transforms), methods)
 57 |     if(length(methods_found) != length(methods)) 
 58 |       stop("Not all specified methods found")
 59 |     methods <- methods_found
 60 |     
 61 |     other_transforms <- x$other_transforms[names(x$other_transforms) %in% methods]
 62 |     
 63 |     ys <- lapply(other_transforms, function(obj) {
 64 |       predict(obj, newdata = xx, inverse = inverse, warn = FALSE)
 65 |     })
 66 |   }
 67 |   
 68 |   ## Color palette
 69 |   if(is.null(cols)) {
 70 |     cols <- 1:(length(methods) + 1)
 71 |   }
 72 |   
 73 |   
 74 |   plot(xx, yy, ylim = range(yy, ys, na.rm = TRUE), 
 75 |        xlim = range(xx), type = 'l', 
 76 |        col = cols[1], lwd = 2, 
 77 |        xlab = ifelse(inverse, "g(x)", "x"), 
 78 |        ylab = ifelse(!inverse, "g(x)", "x"),  
 79 |        ...)
 80 |   
 81 |   lapply(1:length(ys), function(i) {lines(xx, ys[[i]], col = cols[i + 1], lwd = 2, ...)})
 82 |   
 83 |   labs <- c(class(x$chosen_transform)[1], methods)
 84 |   
 85 |   legend(leg_loc, labs, col = cols, bty = 'n', lwd = 2)
 86 |   if(!inverse) 
 87 |     points(x = jitter(xvals), y = rep(min(range(ys, yy, na.rm = TRUE)), length(xvals)), pch = '|')
 88 |   else 
 89 |     points(x =  rep(min(xvals, bounds), length(x$x)), y = x$x, pch = '_')
 90 |   invisible(x)
 91 | }
 92 | 
 93 | #' @rdname plot.bestNormalize
 94 | #' @importFrom graphics lines plot
 95 | #' @export
 96 | plot.orderNorm <- function(x, inverse = FALSE, bounds = NULL, ...) {
 97 |   
 98 |   if(!inverse) {
 99 |     xvals <- x$x 
100 |     x_t <- x$x.t
101 |   } else {
102 |     xvals <- x$x.t
103 |     x_t <- x$x
104 |   }
105 |     
106 |   if(is.null(bounds)) {
107 |     xx <- seq(min(xvals, na.rm = TRUE), max(xvals, na.rm = TRUE), length = 1000)
108 |   } else 
109 |     xx <- seq(min(bounds, na.rm = TRUE), max(bounds, na.rm = TRUE), length = 100)
110 |     
111 |   yy <- predict(x, newdata = xx, inverse = inverse, warn = FALSE)
112 |   plot(xvals, x_t, pch = 20, ylim = range(yy, na.rm = TRUE), xlim = range(xx), ...)
113 |   lines(xx, yy, col = 'slateblue', lwd = 2, ...)
114 |   invisible(x)
115 | }
116 | 
117 | #' @rdname plot.bestNormalize
118 | #' @importFrom graphics lines plot
119 | #' @export
120 | plot.boxcox <- plot.orderNorm
121 | 
122 | #' @rdname plot.bestNormalize
123 | #' @importFrom graphics lines plot
124 | #' @export
125 | plot.yeojohnson <- plot.orderNorm
126 | 
127 | #' @rdname plot.bestNormalize
128 | #' @importFrom graphics lines plot
129 | #' @export
130 | plot.lambert <- plot.orderNorm
131 | 
132 | 


--------------------------------------------------------------------------------
/R/sqrt_x.R:
--------------------------------------------------------------------------------
  1 | #' sqrt(x + a) Normalization
  2 | #'
  3 | #' @name sqrt_x
  4 | #' @aliases predict.sqrt_x
  5 | #'
  6 | #' @description Perform a sqrt (x+a) normalization transformation
  7 | #' @param x A vector to normalize with with x
  8 | #' @param standardize If TRUE, the transformed values are also centered and
  9 | #'   scaled, such that the transformation attempts a standard normal
 10 | #' @param a The constant to add to x (defaults to max(0, -min(x)))
 11 | #' @param object an object of class 'sqrt_x'
 12 | #' @param newdata a vector of data to be (potentially reverse) transformed
 13 | #' @param inverse if TRUE, performs reverse transformation
 14 | #' @param ... additional arguments
 15 | #' @details \code{sqrt_x} performs a simple square-root transformation in the
 16 | #'   context of bestNormalize, such that it creates a transformation that can be
 17 | #'   estimated and applied to new data via the \code{predict} function. The
 18 | #'   parameter a is essentially estimated by the training set by default
 19 | #'   (estimated as the minimum possible), while the base
 20 | #'   must be specified beforehand.
 21 | #'
 22 | #' @return A list of class \code{sqrt_x} with elements \item{x.t}{transformed
 23 | #'   original data} 
 24 | #'   \item{x}{original data} 
 25 | #'   \item{mean}{mean after transformation but prior to standardization} 
 26 | #'   \item{sd}{sd after transformation but prior to standardization} 
 27 | #'   \item{n}{number of nonmissing observations} 
 28 | #'   \item{norm_stat}{Pearson's P / degrees of freedom} 
 29 | #'   \item{standardize}{was the transformation standardized}
 30 | #'
 31 | #'   The \code{predict} function returns the numeric value of the transformation
 32 | #'   performed on new data, and allows for the inverse transformation as well.
 33 | #'
 34 | #' @examples
 35 | #' x <- rgamma(100, 1, 1)
 36 | #'
 37 | #' sqrt_x_obj <- sqrt_x(x)
 38 | #' sqrt_x_obj
 39 | #' p <- predict(sqrt_x_obj)
 40 | #' x2 <- predict(sqrt_x_obj, newdata = p, inverse = TRUE)
 41 | #'
 42 | #' all.equal(x2, x)
 43 | #'
 44 | #' @importFrom stats sd
 45 | #' @export
 46 | sqrt_x <- function(x, a = NULL, standardize = TRUE, ...) {
 47 |   stopifnot(is.numeric(x))
 48 |   
 49 |   min_a <- max(0, -(min(x, na.rm = TRUE)))
 50 |   if(!length(a)) 
 51 |     a <- min_a
 52 |   if(a < min_a) {
 53 |     warning("Setting a <  max(0, -(min(x))) can lead to transformation issues",
 54 |             "Standardize set to FALSE")
 55 |     standardize <- FALSE
 56 |   }
 57 |   
 58 |   
 59 |   x.t <- sqrt(x + a)
 60 |   mu <- mean(x.t, na.rm = TRUE)
 61 |   sigma <- sd(x.t, na.rm = TRUE)
 62 |   if (standardize) x.t <- (x.t - mu) / sigma
 63 |   
 64 |   ptest <- nortest::pearson.test(x.t)
 65 |   
 66 |   val <- list(
 67 |     x.t = x.t,
 68 |     x = x,
 69 |     mean = mu,
 70 |     sd = sigma,
 71 |     a = a,
 72 |     n = length(x.t) - sum(is.na(x)),
 73 |     norm_stat = unname(ptest$statistic / ptest$df),
 74 |     standardize = standardize
 75 |   )
 76 |   class(val) <- c('sqrt_x', class(val))
 77 |   val
 78 | }
 79 | 
 80 | #' @rdname sqrt_x
 81 | #' @method predict sqrt_x
 82 | #' @export
 83 | predict.sqrt_x <- function(object, newdata = NULL, inverse = FALSE, ...) {
 84 |   if (is.null(newdata) & !inverse)
 85 |     newdata <- object$x
 86 |   if (is.null(newdata) & inverse)
 87 |     newdata <- object$x.t
 88 |   
 89 |   if (inverse) {
 90 |     if (object$standardize) 
 91 |       newdata <- newdata * object$sd + object$mean
 92 |     newdata <-  newdata^2 - object$a
 93 |   } else if (!inverse) {
 94 |     newdata <- sqrt(newdata + object$a)
 95 |     if (object$standardize) 
 96 |       newdata <- (newdata - object$mean) / object$sd
 97 |   }
 98 |   unname(newdata)
 99 | }
100 | 
101 | #' @rdname sqrt_x
102 | #' @method print sqrt_x
103 | #' @export
104 | print.sqrt_x <- function(x, ...) {
105 |   cat(ifelse(x$standardize, "Standardized", "Non-Standardized"),
106 |       'sqrt(x + a) Transformation with', x$n, 'nonmissing obs.:\n', 
107 |       'Relevant statistics:\n',
108 |       '- a =', x$a, '\n',
109 |       '- mean (before standardization) =', x$mean, '\n',
110 |       '- sd (before standardization) =', x$sd, '\n')
111 | }
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/R/step_best_normalize.R:
--------------------------------------------------------------------------------
  1 | #' Run bestNormalize transformation for \code{recipes} implementation
  2 | #' 
  3 | #' @aliases step_bestNormalize step_bestNormalize_new
  4 | #'
  5 | #' @description `step_best_normalize` creates a specification of a recipe step
  6 | #'   (see `recipes` package) that will transform data using the best of a suite
  7 | #'   of normalization transformations estimated (by default) using
  8 | #'   cross-validation.
  9 | #'
 10 | #' @param recipe A formula or recipe
 11 | #' @param ... One or more selector functions to choose which variables are
 12 | #'   affected by the step. See [selections()] for more details. For the `tidy`
 13 | #'   method, these are not currently used.
 14 | #' @param role Not used by this step since no new variables are created.
 15 | #' @param transform_info A numeric vector of transformation values. This (was
 16 | #'   transform_info) is `NULL` until computed by [prep.recipe()].
 17 | #' @param transform_options options to be passed to bestNormalize
 18 | #' @param num_unique An integer where data that have less possible values will
 19 | #'   not be evaluate for a transformation.
 20 | #' @param trained For recipes functionality
 21 | #' @param skip For recipes functionality
 22 | #' @param id For recipes functionality
 23 | #'
 24 | #'
 25 | #'
 26 | #' @return An updated version of `recipe` with the new step added to the
 27 | #'   sequence of existing steps (if any). For the `tidy` method, a tibble with
 28 | #'   columns `terms` (the selectors or variables selected) and `value` (the
 29 | #'   lambda estimate).
 30 | #' @concept preprocessing
 31 | #' @concept transformation_methods
 32 | #' @export
 33 | #'
 34 | #' @details The bestnormalize transformation can be used to rescale a variable
 35 | #'   to be more similar to a normal distribution. See `?bestNormalize` for more
 36 | #'   information; `step_best_normalize` is the implementation of `bestNormalize`
 37 | #'   in the `recipes` context. 
 38 | #'   
 39 | #'   As of version 1.7, the `butcher` package can be used to (hopefully) improve 
 40 | #'   scalability of this function on bigger data sets. 
 41 | #'
 42 | #' @examples
 43 | #'
 44 | #' library(recipes)
 45 | #' rec <- recipe(~ ., data = as.data.frame(iris))
 46 | #'
 47 | #' bn_trans <- step_best_normalize(rec, all_numeric())
 48 | #'
 49 | #' bn_estimates <- prep(bn_trans, training = as.data.frame(iris))
 50 | #'
 51 | #' bn_data <- bake(bn_estimates, as.data.frame(iris))
 52 | #'
 53 | #' plot(density(iris[, "Petal.Length"]), main = "before")
 54 | #' plot(density(bn_data$Petal.Length), main = "after")
 55 | #'
 56 | #' tidy(bn_trans, number = 1)
 57 | #' tidy(bn_estimates, number = 1)
 58 | #'
 59 | #' @seealso  \code{\link[bestNormalize]{bestNormalize}} \code{\link{orderNorm}},
 60 | #'   [recipe()] [prep.recipe()] [bake.recipe()]
 61 | #'
 62 | #' @importFrom recipes recipe rand_id add_step ellipse_check step
 63 | #'   
 64 | step_best_normalize <-
 65 |   function(recipe,
 66 |            ...,
 67 |            role = NA,
 68 |            trained = FALSE,
 69 |            transform_info = NULL,
 70 |            transform_options = list(),
 71 |            num_unique = 5,
 72 |            skip = FALSE,
 73 |            id = rand_id("best_normalize")) {
 74 |     add_step(
 75 |       recipe,
 76 |       step_best_normalize_new(
 77 |         terms = ellipse_check(...),
 78 |         role = role,
 79 |         trained = trained,
 80 |         transform_info = transform_info,
 81 |         transform_options = transform_options,
 82 |         num_unique = num_unique,
 83 |         skip = skip,
 84 |         id = id
 85 |       )
 86 |     )
 87 |   }
 88 | 
 89 | step_best_normalize_new <-
 90 |   function(terms, role, trained, transform_info, transform_options, num_unique, skip, id) {
 91 |     step(
 92 |       subclass = "best_normalize",
 93 |       terms = terms,
 94 |       role = role,
 95 |       trained = trained,
 96 |       transform_info = transform_info,
 97 |       transform_options = transform_options,
 98 |       num_unique = num_unique,
 99 |       skip = skip,
100 |       id = id
101 |     )
102 |   }
103 | 
104 | #' @export
105 | #' @importFrom recipes prep recipes_eval_select check_type
106 | prep.step_best_normalize <- function(x, training, info = NULL, ...) {
107 |   col_names <- recipes_eval_select(x$terms, training, info)
108 |   check_type(training[, col_names])
109 |   
110 |   values <- apply(
111 |     training[, col_names],
112 |     2, 
113 |     estimate_bn,
114 |     transform_options = x$transform_options,
115 |     num_unique = x$num_unique
116 |   )
117 |   
118 |   step_best_normalize_new(
119 |     terms = x$terms,
120 |     role = x$role,
121 |     trained = TRUE,
122 |     transform_info = values,
123 |     transform_options = x$transform_options,
124 |     num_unique = x$num_unique,
125 |     skip = x$skip,
126 |     id = x$id
127 |   )
128 | }
129 | 
130 | #' @export
131 | #' @importFrom recipes bake
132 | #' @importFrom tibble as_tibble
133 | bake.step_best_normalize <- function(object, new_data, ...) {
134 |   if (length(object$transform_info) == 0)
135 |     return(as_tibble(new_data))
136 |   param <- names(object$transform_info)
137 |   for (i in seq_along(object$transform_info))
138 |     new_data[, param[i]] <- 
139 |       predict(object$transform_info[[param[i]]], getElement(new_data, param[i]), warn = FALSE)
140 |   as_tibble(new_data)
141 | }
142 | #' @export
143 | #' @importFrom recipes print_step
144 | print.step_best_normalize <-
145 |   function(x, width = max(20, options()$width - 35), ...) {
146 |     title <- "bestNormalize transformation on "
147 |     print_step(names(x$transform_info), x$terms, x$trained, width = width, title = title)
148 |     invisible(x)
149 |   }
150 | 
151 | ## estimates the transformations
152 | estimate_bn <- function(dat,
153 |                         transform_options = list(),
154 |                         num_unique = 5) {
155 |   
156 |   # Returns the identity transformation if not enough unique values
157 |    if (length(unique(dat)) < num_unique)
158 |     return(no_transform(dat))
159 |   
160 |   transform_options$x <- dat
161 |   
162 |   # Set some new defaults to bestNormalize unless otherwise specified 
163 |   if(is.null(transform_options$r))
164 |      transform_options$r <- 1 
165 |   if(is.null(transform_options$warn))
166 |      transform_options$warn <- FALSE 
167 |   if(is.null(transform_options$quiet))
168 |      transform_options$quiet <- TRUE
169 |   
170 |   res <- do.call(bestNormalize, transform_options)
171 |   res
172 | }
173 | 
174 | #' @rdname step_best_normalize
175 | #' @param x A `step_best_normalize` object.
176 | #' @export
177 | #' @importFrom recipes tidy is_trained sel2char
178 | #' @importFrom tibble tibble
179 | #' 
180 | tidy.step_best_normalize <- function(x, ...) {
181 |   if (is_trained(x)) {
182 |     val <- lapply(x$transform_info, tidy)
183 |     chosen <- sapply(val, function(xx) xx$transform[xx$chosen == 1])
184 |     res <- tibble(terms = names(x$transform_info),
185 |                   chosen_transform = chosen,
186 |                   cv_info = val)
187 |   } else {
188 |     term_names <- sel2char(x$terms)
189 |     res <- tibble(terms = term_names, chosen = as.double(NA), cv_info = as.double(NA))
190 |   }
191 |   res$id <- x$id
192 |   res
193 | }
194 | 
195 | #' @export
196 | step_bestNormalize <- function(...) {
197 |   .Deprecated("step_best_normalize", package = "bestNormalize", old = "step_bestNormalize") 
198 |   step_best_normalize(...)
199 | }
200 | 
201 | #' @export
202 | step_bestNormalize_new <- function(...) {
203 |   .Deprecated("step_best_normalize_new", package = "bestNormalize", old = "step_bestNormalize_new") 
204 |   step_best_normalize_new(...)
205 | }
206 | 
207 | #' @rdname step_best_normalize
208 | #' @param x A `step_best_normalize` object.
209 | #' @importFrom butcher axe_env
210 | #' @importFrom purrr map
211 | #' @export
212 | axe_env.step_best_normalize <- function(x, ...) {
213 |   x$terms <- purrr::map(x$terms, function(z) butcher::axe_env(z, ...))
214 |   x
215 | }
216 | 
217 | #' @export
218 | required_pkgs.step_best_normalize <- function(x, ...) {
219 |   c("bestNormalize")
220 | }
221 | 


--------------------------------------------------------------------------------
/R/step_orderNorm.R:
--------------------------------------------------------------------------------
  1 | #' ORQ normalization (orderNorm) for \code{recipes} implementation
  2 | #'
  3 | #' @description `step_orderNorm` creates a specification of a recipe step (see
  4 | #'   `recipes` package) that will transform data using the ORQ (orderNorm)
  5 | #'   transformation, which approximates the "true" normalizing transformation if
  6 | #'   one exists. This is considerably faster than `step_bestNormalize`.
  7 | #'   
  8 | #' @param recipe A formula or recipe
  9 | #' @param ... One or more selector functions to choose which variables are
 10 | #'   affected by the step. See [selections()] for more details. For the `tidy`
 11 | #'   method, these are not currently used.
 12 | #' @param role Not used by this step since no new variables are created.
 13 | #' @param transform_info A numeric vector of transformation values. This (was
 14 | #'   transform_info) is `NULL` until computed by [prep.recipe()].
 15 | #' @param transform_options options to be passed to orderNorm
 16 | #' @param num_unique An integer where data that have less possible values will
 17 | #'   not be evaluate for a transformation.
 18 | #' @param trained For recipes functionality
 19 | #' @param skip For recipes functionality
 20 | #' @param id For recipes functionality
 21 | #'
 22 | #'
 23 | #' @return An updated version of `recipe` with the new step added to the
 24 | #'   sequence of existing steps (if any). For the `tidy` method, a tibble with
 25 | #'   columns `terms` (the selectors or variables selected) and `value` (the
 26 | #'   lambda estimate).
 27 | #' @concept preprocessing
 28 | #' @concept transformation_methods
 29 | #' @export
 30 | #'
 31 | #' @details The orderNorm transformation can be used to rescale a variable to be
 32 | #'   more similar to a normal distribution. See `?orderNorm` for more
 33 | #'   information; `step_orderNorm` is the implementation of `orderNorm` in the
 34 | #'   `recipes` context.
 35 | #'   
 36 | #'   As of version 1.7, the `butcher` package can be used to (hopefully) improve 
 37 | #'   scalability of this function on bigger data sets. 
 38 | #'
 39 | #' @examples
 40 | #' library(recipes)
 41 | #' rec <- recipe(~ ., data = as.data.frame(iris))
 42 | #'
 43 | #' orq_trans <- step_orderNorm(rec, all_numeric())
 44 | #'
 45 | #' orq_estimates <- prep(orq_trans, training = as.data.frame(iris))
 46 | #'
 47 | #' orq_data <- bake(orq_estimates, as.data.frame(iris))
 48 | #'
 49 | #' plot(density(iris[, "Petal.Length"]), main = "before")
 50 | #' plot(density(orq_data$Petal.Length), main = "after")
 51 | #'
 52 | #' tidy(orq_trans, number = 1)
 53 | #' tidy(orq_estimates, number = 1)
 54 | #'
 55 | #'
 56 | #' @seealso  \code{\link[bestNormalize]{orderNorm}} \code{\link{bestNormalize}},
 57 | #'   [recipe()] [prep.recipe()] [bake.recipe()]
 58 | #'
 59 | #' @references Ryan A. Peterson (2019). Ordered quantile normalization: a
 60 | #'   semiparametric transformation built for the cross-validation era. Journal
 61 | #'   of Applied Statistics, 1-16.
 62 | #'
 63 | #' @importFrom recipes recipe rand_id add_step ellipse_check step
 64 | #'   
 65 | step_orderNorm <-
 66 |   function(recipe,
 67 |            ...,
 68 |            role = NA,
 69 |            trained = FALSE,
 70 |            transform_info = NULL,
 71 |            transform_options = list(),
 72 |            num_unique = 5,
 73 |            skip = FALSE,
 74 |            id = rand_id("orderNorm")) {
 75 |     add_step(
 76 |       recipe,
 77 |       step_orderNorm_new(
 78 |         terms = ellipse_check(...),
 79 |         role = role,
 80 |         trained = trained,
 81 |         transform_info = transform_info,
 82 |         transform_options = transform_options,
 83 |         num_unique = num_unique,
 84 |         skip = skip,
 85 |         id = id
 86 |       )
 87 |     )
 88 |   }
 89 | 
 90 | step_orderNorm_new <-
 91 |   function(terms, role, trained, transform_info, transform_options, num_unique, skip, id) {
 92 |     step(
 93 |       subclass = "orderNorm",
 94 |       terms = terms,
 95 |       role = role,
 96 |       trained = trained,
 97 |       transform_info = transform_info,
 98 |       transform_options = transform_options,
 99 |       num_unique = num_unique,
100 |       skip = skip,
101 |       id = id
102 |     )
103 |   }
104 | 
105 | #' @export
106 | #' @importFrom recipes prep recipes_eval_select check_type
107 | prep.step_orderNorm <- function(x, training, info = NULL, ...) {
108 |   col_names <- recipes_eval_select(x$terms, training, info)
109 |   check_type(training[, col_names])
110 |   
111 |   values <- apply(
112 |     training[, col_names],
113 |     2, 
114 |     estimate_orq,
115 |     transform_options = x$transform_options,
116 |     num_unique = x$num_unique
117 |   )
118 |   
119 |   step_orderNorm_new(
120 |     terms = x$terms,
121 |     role = x$role,
122 |     trained = TRUE,
123 |     transform_info = values,
124 |     transform_options = x$transform_options,
125 |     num_unique = x$num_unique,
126 |     skip = x$skip,
127 |     id = x$id
128 |   )
129 | }
130 | 
131 | #' @export
132 | #' @importFrom tibble as_tibble
133 | bake.step_orderNorm <- function(object, new_data, ...) {
134 |   if (length(object$transform_info) == 0)
135 |     return(as_tibble(new_data))
136 |   param <- names(object$transform_info)
137 |   for (i in seq_along(object$transform_info))
138 |     new_data[, param[i]] <- 
139 |       predict(object$transform_info[[param[i]]], getElement(new_data, param[i]), warn = FALSE)
140 |   as_tibble(new_data)
141 | }
142 | 
143 | #' @export
144 | #' @importFrom recipes print_step
145 | print.step_orderNorm <-
146 |   function(x, width = max(20, options()$width - 35), ...) {
147 |     title <- "orderNorm transformation on "
148 |     print_step(names(x$transform_info), x$terms, x$trained, width = width, title = title)
149 |     invisible(x)
150 |   }
151 | 
152 | ## estimates the transformations
153 | estimate_orq <- function(dat,
154 |                         transform_options = list(),
155 |                         num_unique = 5) {
156 |   
157 |   # Returns the identity transformation if not enough unique values
158 |   if (length(unique(dat)) < num_unique)
159 |     return(no_transform(dat))
160 |   
161 |   transform_options$x <- dat
162 |   
163 |   # Set some new defaults to orderNorm unless otherwise specified 
164 | if(is.null(transform_options$warn))
165 |     transform_options$warn <- FALSE 
166 |   
167 |   res <- do.call(orderNorm, transform_options)
168 |   res
169 | }
170 | 
171 | #' @rdname step_orderNorm
172 | #' @param x A `step_orderNorm` object.
173 | #' @export
174 | #' @importFrom recipes tidy is_trained sel2char
175 | #' @importFrom tibble tibble
176 | tidy.step_orderNorm <- function(x, ...) {
177 |   if (is_trained(x)) {
178 |     res <- tibble(terms = names(x$transform_info),
179 |                   value = x$transform_info)
180 |   } else {
181 |     term_names <- sel2char(x$terms)
182 |     res <- tibble(terms = term_names, value = as.double(NA))
183 |   }
184 |   res$id <- x$id
185 |   res
186 | }
187 | 
188 | #' @rdname step_orderNorm
189 | #' @param x A `step_orderNorm` object.
190 | #' @importFrom butcher axe_env
191 | #' @importFrom purrr map
192 | #' @export
193 | axe_env.step_orderNorm <- function(x, ...) {
194 |   x$terms <- purrr::map(x$terms, function(z) butcher::axe_env(z, ...))
195 |   x
196 | }
197 | 
198 | 
199 | #' @importFrom generics required_pkgs
200 | #' @export
201 | generics::required_pkgs
202 | 
203 | #' @export
204 | required_pkgs.step_orderNorm <- function(x, ...) {
205 |   c("bestNormalize")
206 | }
207 | 


--------------------------------------------------------------------------------
/R/yeojohnson.R:
--------------------------------------------------------------------------------
  1 | #'Yeo-Johnson Normalization
  2 | #'
  3 | #'@name yeojohnson
  4 | #'@aliases predict.yeojohnson
  5 | #'
  6 | #'@description Perform a Yeo-Johnson Transformation and center/scale a vector to
  7 | #'  attempt normalization
  8 | #'@param x A vector to normalize with Yeo-Johnson
  9 | #'@param eps A value to compare lambda against to see if it is equal to zero
 10 | #' @param standardize If TRUE, the transformed values are also centered and
 11 | #'   scaled, such that the transformation attempts a standard normal
 12 | #'@param ... Additional arguments that can be passed to the estimation of the
 13 | #'  lambda parameter (lower, upper)
 14 | #'@param newdata a vector of data to be (reverse) transformed
 15 | #'@param inverse if TRUE, performs reverse transformation
 16 | #'@param object an object of class 'yeojohnson'
 17 | #'@details \code{yeojohnson} estimates the optimal value of lambda for the
 18 | #'  Yeo-Johnson transformation. This transformation can be performed on new
 19 | #'  data, and inverted, via the \code{predict} function.
 20 | #'
 21 | #'  The Yeo-Johnson is similar to the Box-Cox method, however it allows for the
 22 | #'  transformation of nonpositive data as well. The \code{step_YeoJohnson}
 23 | #'  function in the \code{recipes} package is another useful resource (see
 24 | #'  references).
 25 | #'
 26 | #'@return A list of class \code{yeojohnson} with elements
 27 | #'
 28 | #'   \item{x.t}{transformed original data} 
 29 | #'   \item{x}{original data}
 30 | #'   \item{mean}{mean after transformation but prior to standardization} 
 31 | #'   \item{sd}{sd after transformation but prior to standardization} 
 32 | #'   \item{lambda}{estimated lambda value for skew transformation}
 33 | #'   \item{n}{number of nonmissing observations} 
 34 | #'   \item{norm_stat}{Pearson's P / degrees of freedom} 
 35 | #'   \item{standardize}{Was the transformation standardized}
 36 | #'
 37 | #'  The \code{predict} function returns the numeric value of the transformation
 38 | #'  performed on new data, and allows for the inverse transformation as well.
 39 | #'
 40 | #'@references Yeo, I. K., & Johnson, R. A. (2000). A new family of power
 41 | #'  transformations to improve normality or symmetry. Biometrika.
 42 | #'
 43 | #'  Max Kuhn and Hadley Wickham (2017). recipes: Preprocessing Tools to Create
 44 | #'  Design Matrices. R package version 0.1.0.9000.
 45 | #'  https://github.com/topepo/recipes
 46 | #'
 47 | #'
 48 | #'
 49 | #' @examples
 50 | #'
 51 | #' x <- rgamma(100, 1, 1)
 52 | #'
 53 | #' yeojohnson_obj <- yeojohnson(x)
 54 | #' yeojohnson_obj
 55 | #' p <- predict(yeojohnson_obj)
 56 | #' x2 <- predict(yeojohnson_obj, newdata = p, inverse = TRUE)
 57 | #'
 58 | #' all.equal(x2, x)
 59 | #'
 60 | #'@importFrom stats sd
 61 | #'@export
 62 | yeojohnson <- function(x, eps = .001, standardize = TRUE, ...) {
 63 |   stopifnot(is.numeric(x))
 64 |   lambda <- estimate_yeojohnson_lambda(x, eps = eps, ...)
 65 |   x.t <- x
 66 |   na_idx <- is.na(x)
 67 |   x.t[!na_idx] <- yeojohnson_trans(x[!na_idx], lambda, eps)
 68 |   mu <- mean(x.t, na.rm = TRUE)
 69 |   sigma <- sd(x.t, na.rm = TRUE)
 70 |   if (standardize) x.t <- (x.t - mu) / sigma
 71 |   
 72 |   ptest <- nortest::pearson.test(x.t)
 73 |   
 74 |   val <- list(
 75 |     x.t = x.t,
 76 |     x = x,
 77 |     mean = mu,
 78 |     sd = sigma,
 79 |     lambda = lambda,
 80 |     eps = eps,
 81 |     n = length(x.t) - sum(na_idx),
 82 |     norm_stat = unname(ptest$statistic / ptest$df),
 83 |     standardize = standardize
 84 |   )
 85 |   class(val) <- 'yeojohnson'
 86 |   val
 87 | }
 88 | 
 89 | #' @rdname yeojohnson
 90 | #' @method predict yeojohnson
 91 | #' @export
 92 | predict.yeojohnson <- function(object,
 93 |                                newdata = NULL,
 94 |                                inverse = FALSE, 
 95 |                                ...) {
 96 |   if (is.null(newdata) & !inverse)
 97 |     newdata <- object$x
 98 |   if (is.null(newdata))
 99 |     newdata <- object$x.t
100 |   
101 |   na_idx <- is.na(newdata)
102 |   
103 |   if (inverse) {
104 |     if(object$standardize) newdata <- newdata * object$sd + object$mean
105 |     newdata[!na_idx] <- inv_yeojohnson_trans(newdata[!na_idx], object$lambda)
106 |   } else {
107 |     newdata[!na_idx] <- yeojohnson_trans(newdata[!na_idx], object$lambda)
108 |     if(object$standardize) newdata <- (newdata - object$mean) / object$sd
109 |   }
110 |   
111 |   unname(newdata)
112 | }
113 | 
114 | #' @rdname yeojohnson
115 | #' @method print yeojohnson
116 | #' @export
117 | print.yeojohnson <- function(x, ...) {
118 |   cat(ifelse(x$standardize, "Standardized", "Non-Standardized"),
119 |       'Yeo-Johnson Transformation with', x$n, 'nonmissing obs.:\n', 
120 |       'Estimated statistics:\n',
121 |       '- lambda =', x$lambda, '\n',
122 |       '- mean (before standardization) =', x$mean, '\n',
123 |       '- sd (before standardization) =', x$sd, '\n')
124 | }
125 | 
126 | # Helper functions that estimates yj lambda parameter
127 | #' @importFrom stats var optimize
128 | estimate_yeojohnson_lambda <- function(x, lower = -5, upper = 5, eps = .001, ...) {
129 |   n <- length(x)
130 |   ccID <- !is.na(x)
131 |   x <- x[ccID]
132 |   pos_idx = which(x >= 0)
133 |   neg_idx = which(x < 0)
134 |   
135 |   constant <- sum(sign(x) * log(abs(x) + 1))
136 | 
137 |   # See references, Yeo & Johnson Biometrika (2000)
138 |   yj_loglik <- function(lambda) {
139 |     x_t <- yeojohnson_trans(x, lambda, eps, pos_idx = pos_idx, neg_idx = neg_idx)
140 |     x_t_bar <- mean(x_t)
141 |     x_t_var <- var(x_t) * (n - 1) / n
142 |     -0.5 * n * log(x_t_var) + (lambda - 1) * constant
143 |   }
144 |   
145 |   results <- optimize(yj_loglik, lower = lower, 
146 |                       upper = upper, maximum = TRUE,
147 |                       tol = .0001)
148 |   
149 |   results$maximum
150 | }
151 | 
152 | yeojohnson_trans <- function(x, lambda, eps = .001, pos_idx, neg_idx) {
153 |   if(missing(pos_idx)) 
154 |     pos_idx <- which(x >= 0)
155 |   if(missing(neg_idx)) 
156 |     neg_idx <- which(x < 0)
157 |   
158 |   # Transform negative values
159 |   if (length(pos_idx)>0) {
160 |     if (abs(lambda) < eps) {
161 |       x[pos_idx] <- log(x[pos_idx] + 1)
162 |     } else {
163 |       x[pos_idx] <- ((x[pos_idx] + 1) ^ lambda - 1) / lambda
164 |     }
165 |   } 
166 |   
167 |   # Transform nonnegative values
168 |   if (length(neg_idx)>0){
169 |     if (abs(lambda - 2) < eps) {
170 |       x[neg_idx] <- - log(-x[neg_idx] + 1)
171 |     } else {
172 |       x[neg_idx] <- - ((-x[neg_idx] + 1) ^ (2 - lambda) - 1) / (2 - lambda)
173 |     }
174 |   }
175 |   x
176 | }
177 | 
178 | inv_yeojohnson_trans <- function(x, lambda, eps = .001) {
179 |   val <- x
180 |   neg_idx <- x < 0
181 |   
182 |   if(any(!neg_idx)) {
183 |     if(abs(lambda) < eps) {
184 |       val[!neg_idx] <- exp(x[!neg_idx]) - 1
185 |     } else {
186 |       val[!neg_idx] <- (x[!neg_idx] * lambda + 1) ^ (1 / lambda) - 1
187 |     }
188 |   }
189 |   if(any(neg_idx)) {
190 |     if(abs(lambda - 2) < eps) {
191 |       val[neg_idx] <- -expm1(-x[neg_idx])
192 |     } else {
193 |       val[neg_idx] <- 1 - (-(2 - lambda) * x[neg_idx] + 1) ^ (1 / (2 - lambda))
194 |     }
195 |   }
196 |   val
197 | }
198 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r, echo = FALSE, include=FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "README-"
12 | )
13 | library(bestNormalize)
14 | ```
15 | 
16 | 
17 | # bestNormalize: Flexibly calculate the best normalizing transformation for a vector
18 | 
19 | [![CRAN version](https://www.r-pkg.org/badges/version/bestNormalize)](https://cran.r-project.org/package=bestNormalize)
20 | [![R-CMD-check](https://github.com/petersonR/bestNormalize/workflows/R-CMD-check/badge.svg)](https://github.com/petersonR/bestNormalize/actions)
21 | [![downloads](https://cranlogs.r-pkg.org/badges/bestNormalize)](https://cran.r-project.org/package=bestNormalize)
22 | [![Codecov test coverage](https://codecov.io/gh/petersonR/bestNormalize/branch/master/graph/badge.svg)](https://app.codecov.io/gh/petersonR/bestNormalize?branch=master)
23 | 
24 | 
25 | The `bestNormalize` R package was designed to help find a normalizing transformation for a vector. There are many techniques that have been developed in this aim, however each has been subject to their own strengths/weaknesses, and it is unclear on how to decide which will work best until the data is observed. This package will look at a range of possible transformations and return the best one, i.e. the one that makes it look the *most* normal. 
26 | 
27 | Note that some authors use the term "normalize" differently than in this package. We define "normalize": to transform a vector of data in such a way that the transformed values follow a Gaussian distribution (or equivalently, a bell curve). This is in contrast to other such techniques designed to transform values to the 0-1 range, or to the -1 to 1 range. 
28 | 
29 | This package also introduces a new adaptation of a normalization technique, which we call Ordered Quantile normalization (`orderNorm()`, or ORQ). ORQ transforms the data based off of a rank mapping to the normal distribution.  This allows us to *guarantee* normally distributed transformed data (if ties are not present). The adaptation uses a shifted logit approximation on the ranks transformation to perform the transformation on newly observed data outside of the original domain. On new data within the original domain, the transformation uses linear interpolation of the fitted transformation.
30 | 
31 | To evaluate the efficacy of the normalization technique, the `bestNormalize()` function implements repeated cross-validation to estimate the Pearson's P statistic divided by its degrees of freedom. This is called the "Normality statistic", and if it is close to 1 (or less), then the transformation can be thought of as working well. The function is designed to select the transformation that produces the lowest P / df value, when estimated on out-of-sample data (estimating this on in-sample data will always choose the orderNorm technique, and is generally not the main goal of these procedures). 
32 | 
33 | ## Installation
34 | 
35 | You can install the most recent (devel) version of bestNormalize from GitHub with:
36 | 
37 | ```{r gh-installation, eval = FALSE}
38 | # install.packages("devtools")
39 | devtools::install_github("petersonR/bestNormalize")
40 | ```
41 | 
42 | Or, you can download it from CRAN with:
43 | 
44 | ```{r cran-installation, eval = FALSE}
45 | install.packages("bestNormalize")
46 | ```
47 | 
48 | ## Example
49 | 
50 | In this example, we generate 1000 draws from a gamma distribution, and normalize them:
51 | 
52 | ```{r, eval = FALSE}
53 | library(bestNormalize)
54 | ```
55 | 
56 | ```{r example}
57 | set.seed(100)
58 | x <- rgamma(1000, 1, 1)
59 | 
60 | # Estimate best transformation with repeated cross-validation
61 | BN_obj <- bestNormalize(x, allow_lambert_s = TRUE)
62 | BN_obj
63 | 
64 | # Perform transformation
65 | gx <- predict(BN_obj)
66 | 
67 | # Perform reverse transformation
68 | x2 <- predict(BN_obj, newdata = gx, inverse = TRUE)
69 | 
70 | # Prove the transformation is 1:1
71 | all.equal(x2, x)
72 | 
73 | ```
74 | 
75 | As of version 1.3, the package supports leave-one-out cross-validation as well. ORQ normalization works very well when the size of the test dataset is low relative to the training data set, so it will often be selected via leave-one-out cross-validation (which is why we set `allow_orderNorm = FALSE` here).
76 | 
77 | ```{r}
78 | (BN_loo <- bestNormalize(x, allow_orderNorm = FALSE, allow_lambert_s = TRUE, loo = TRUE))
79 | ```
80 | 
81 | It is also possible to visualize these transformations: 
82 | 
83 | ```{r, eval = FALSE}
84 | plot(BN_obj, leg_loc = "bottomright")
85 | ```
86 | 
87 | 
88 | For a more in depth tutorial, please consult [the package vignette](https://CRAN.R-project.org/package=bestNormalize/vignettes/bestNormalize.html), or the [package website](https://petersonr.github.io/bestNormalize/).
89 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # bestNormalize: Flexibly calculate the best normalizing transformation for a vector
  5 | 
  6 | [![CRAN
  7 | version](https://www.r-pkg.org/badges/version/bestNormalize)](https://cran.r-project.org/package=bestNormalize)
  8 | [![R-CMD-check](https://github.com/petersonR/bestNormalize/workflows/R-CMD-check/badge.svg)](https://github.com/petersonR/bestNormalize/actions)
  9 | [![downloads](https://cranlogs.r-pkg.org/badges/bestNormalize)](https://cran.r-project.org/package=bestNormalize)
 10 | [![Codecov test
 11 | coverage](https://codecov.io/gh/petersonR/bestNormalize/branch/master/graph/badge.svg)](https://app.codecov.io/gh/petersonR/bestNormalize?branch=master)
 12 | 
 13 | The `bestNormalize` R package was designed to help find a normalizing
 14 | transformation for a vector. There are many techniques that have been
 15 | developed in this aim, however each has been subject to their own
 16 | strengths/weaknesses, and it is unclear on how to decide which will work
 17 | best until the data is observed. This package will look at a range of
 18 | possible transformations and return the best one, i.e. the one that
 19 | makes it look the *most* normal.
 20 | 
 21 | Note that some authors use the term “normalize” differently than in this
 22 | package. We define “normalize”: to transform a vector of data in such a
 23 | way that the transformed values follow a Gaussian distribution (or
 24 | equivalently, a bell curve). This is in contrast to other such
 25 | techniques designed to transform values to the 0-1 range, or to the -1
 26 | to 1 range.
 27 | 
 28 | This package also introduces a new adaptation of a normalization
 29 | technique, which we call Ordered Quantile normalization (`orderNorm()`,
 30 | or ORQ). ORQ transforms the data based off of a rank mapping to the
 31 | normal distribution. This allows us to *guarantee* normally distributed
 32 | transformed data (if ties are not present). The adaptation uses a
 33 | shifted logit approximation on the ranks transformation to perform the
 34 | transformation on newly observed data outside of the original domain. On
 35 | new data within the original domain, the transformation uses linear
 36 | interpolation of the fitted transformation.
 37 | 
 38 | To evaluate the efficacy of the normalization technique, the
 39 | `bestNormalize()` function implements repeated cross-validation to
 40 | estimate the Pearson’s P statistic divided by its degrees of freedom.
 41 | This is called the “Normality statistic”, and if it is close to 1 (or
 42 | less), then the transformation can be thought of as working well. The
 43 | function is designed to select the transformation that produces the
 44 | lowest P / df value, when estimated on out-of-sample data (estimating
 45 | this on in-sample data will always choose the orderNorm technique, and
 46 | is generally not the main goal of these procedures).
 47 | 
 48 | ## Installation
 49 | 
 50 | You can install the most recent (devel) version of bestNormalize from
 51 | GitHub with:
 52 | 
 53 | ``` r
 54 | # install.packages("devtools")
 55 | devtools::install_github("petersonR/bestNormalize")
 56 | ```
 57 | 
 58 | Or, you can download it from CRAN with:
 59 | 
 60 | ``` r
 61 | install.packages("bestNormalize")
 62 | ```
 63 | 
 64 | ## Example
 65 | 
 66 | In this example, we generate 1000 draws from a gamma distribution, and
 67 | normalize them:
 68 | 
 69 | ``` r
 70 | library(bestNormalize)
 71 | ```
 72 | 
 73 | ``` r
 74 | set.seed(100)
 75 | x <- rgamma(1000, 1, 1)
 76 | 
 77 | # Estimate best transformation with repeated cross-validation
 78 | BN_obj <- bestNormalize(x, allow_lambert_s = TRUE)
 79 | #> Warning: package 'lamW' was built under R version 4.0.5
 80 | BN_obj
 81 | #> Best Normalizing transformation with 1000 Observations
 82 | #>  Estimated Normality Statistics (Pearson P / df, lower => more normal):
 83 | #>  - arcsinh(x): 3.6204
 84 | #>  - Box-Cox: 0.96
 85 | #>  - Center+scale: 6.7851
 86 | #>  - Exp(x): 50.8513
 87 | #>  - Lambert's W (type s): 1.0572
 88 | #>  - Log_b(x+a): 1.908
 89 | #>  - orderNorm (ORQ): 1.0516
 90 | #>  - sqrt(x + a): 1.4556
 91 | #>  - Yeo-Johnson: 1.7385
 92 | #> Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
 93 | #>  
 94 | #> Based off these, bestNormalize chose:
 95 | #> Standardized Box Cox Transformation with 1000 nonmissing obs.:
 96 | #>  Estimated statistics:
 97 | #>  - lambda = 0.2739638 
 98 | #>  - mean (before standardization) = -0.3870903 
 99 | #>  - sd (before standardization) = 1.045498
100 | 
101 | # Perform transformation
102 | gx <- predict(BN_obj)
103 | 
104 | # Perform reverse transformation
105 | x2 <- predict(BN_obj, newdata = gx, inverse = TRUE)
106 | 
107 | # Prove the transformation is 1:1
108 | all.equal(x2, x)
109 | #> [1] TRUE
110 | ```
111 | 
112 | As of version 1.3, the package supports leave-one-out cross-validation
113 | as well. ORQ normalization works very well when the size of the test
114 | dataset is low relative to the training data set, so it will often be
115 | selected via leave-one-out cross-validation (which is why we set
116 | `allow_orderNorm = FALSE` here).
117 | 
118 | ``` r
119 | (BN_loo <- bestNormalize(x, allow_orderNorm = FALSE, allow_lambert_s = TRUE, loo = TRUE))
120 | #> Best Normalizing transformation with 1000 Observations
121 | #>  Estimated Normality Statistics (Pearson P / df, lower => more normal):
122 | #>  - arcsinh(x): 14.0712
123 | #>  - Box-Cox: 0.8077
124 | #>  - Center+scale: 26.5181
125 | #>  - Exp(x): 451.435
126 | #>  - Lambert's W (type s): 1.269
127 | #>  - Log_b(x+a): 4.5374
128 | #>  - sqrt(x + a): 3.3655
129 | #>  - Yeo-Johnson: 5.7997
130 | #> Estimation method: Out-of-sample via leave-one-out CV
131 | #>  
132 | #> Based off these, bestNormalize chose:
133 | #> Standardized Box Cox Transformation with 1000 nonmissing obs.:
134 | #>  Estimated statistics:
135 | #>  - lambda = 0.2739638 
136 | #>  - mean (before standardization) = -0.3870903 
137 | #>  - sd (before standardization) = 1.045498
138 | ```
139 | 
140 | It is also possible to visualize these transformations:
141 | 
142 | ``` r
143 | plot(BN_obj, leg_loc = "bottomright")
144 | ```
145 | 
146 | For a more in depth tutorial, please consult [the package
147 | vignette](https://CRAN.R-project.org/package=bestNormalize/vignettes/bestNormalize.html),
148 | or the [package website](https://petersonr.github.io/bestNormalize/).
149 | 


--------------------------------------------------------------------------------
/bestNormalize.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageRoxygenize: rd,collate,namespace
19 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | This is a submission of bestNormalize 1.9.1 that
 2 | fixes issues introduced by an update to lamW.
 3 | 
 4 | ## Test environments 
 5 | - GitHub Actions (ubuntu-18.04): 3.5, 3.6, oldrel, release, devel
 6 | - GitHub Actions (windows): 3.6, release
 7 | - Github Actions (macOS): release
 8 | - Local (macOS): release 
 9 | - CRAN Windows Builder: release
10 | 
11 | ## R CMD check results
12 | There were no ERRORs, WARNINGs, or NOTEs.
13 | 
14 | ## revdepcheck results
15 | 
16 | We checked 3 reverse dependencies (2 from CRAN + 1 from Bioconductor), comparing R CMD check results across CRAN and dev versions of this package.
17 | 
18 |  * We saw 0 new problems
19 |  * We failed to check 0 packages
20 | 
21 | 


--------------------------------------------------------------------------------
/data/autotrader.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/petersonR/bestNormalize/46c0ea73e57eb2130bac9078b501688fd48d65c9/data/autotrader.RData


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite 'bestNormalize' software in publications, use (1); to cite the orderNorm (ORQ) technique use (2), or cite both.")
 2 | 
 3 | bibentry(bibtype = "article",
 4 |          title = "{Finding Optimal Normalizing Transformations via
 5 |           bestNormalize}",
 6 |          author = as.person("Ryan A. Peterson"),
 7 |          year = "2021",
 8 | 	       journal = "{The R Journal}",
 9 |          doi = "10.32614/RJ-2021-041", 
10 |          pages = "310--329",
11 |          volume = "13",
12 |          number = "1",
13 | 	       textVersion = 
14 |         "(1) Peterson, R. A. (2021). Finding Optimal Normalizing Transformations via bestNormalize. The R Journal, 13:1, 310-329, DOI:10.32614/RJ-2021-041"
15 | )
16 | 
17 | bibentry(bibtype = "article",
18 |          title = "Ordered quantile normalization: a semiparametric transformation built for the cross-validation era",
19 |          author = c(as.person("Ryan A. Peterson"), 
20 | 		  as.person("Joseph E. Cavanaugh")),
21 |          year = "2020",
22 | 	 journal = "Journal of Applied Statistics",
23 | 	 publisher = "Taylor & Francis",
24 |          pages = "2312-2327",
25 |          doi = "10.1080/02664763.2019.1630372", 
26 |          volume = "47",
27 |          number = "13-15",
28 | 	 textVersion = 
29 |         "(2) Ryan A. Peterson & Joseph E. Cavanaugh (2020). Ordered quantile normalization: a semiparametric transformation built for the cross-validation era. Journal of Applied Statistics, 47:13-15, 2312-2327, DOI: 10.1080/02664763.2019.1630372"
30 | )
31 | 
32 | 


--------------------------------------------------------------------------------
/man/arcsinh_x.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/arcsinh_x.R
 3 | \name{arcsinh_x}
 4 | \alias{arcsinh_x}
 5 | \alias{predict.arcsinh_x}
 6 | \alias{print.arcsinh_x}
 7 | \title{arcsinh(x) Transformation}
 8 | \usage{
 9 | arcsinh_x(x, standardize = TRUE, ...)
10 | 
11 | \method{predict}{arcsinh_x}(object, newdata = NULL, inverse = FALSE, ...)
12 | 
13 | \method{print}{arcsinh_x}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{A vector to normalize with with x}
17 | 
18 | \item{standardize}{If TRUE, the transformed values are also centered and
19 | scaled, such that the transformation attempts a standard normal}
20 | 
21 | \item{...}{additional arguments}
22 | 
23 | \item{object}{an object of class 'arcsinh_x'}
24 | 
25 | \item{newdata}{a vector of data to be (potentially reverse) transformed}
26 | 
27 | \item{inverse}{if TRUE, performs reverse transformation}
28 | }
29 | \value{
30 | A list of class \code{arcsinh_x} with elements 
31 | \item{x.t}{transformed 
32 |   original data} 
33 |   \item{x}{original data} 
34 |   \item{mean}{mean after transformation but prior to standardization} 
35 |   \item{sd}{sd after transformation but prior to standardization} 
36 |   \item{n}{number of nonmissing observations}
37 |   \item{norm_stat}{Pearson's P / degrees of freedom}
38 |   \item{standardize}{was the transformation standardized}
39 |   
40 |   The \code{predict} function returns the numeric value of the transformation
41 |   performed on new data, and allows for the inverse transformation as well.
42 | }
43 | \description{
44 | Perform a arcsinh(x) transformation
45 | }
46 | \details{
47 | \code{arcsinh_x} performs an arcsinh transformation in the context of 
48 | bestNormalize, such that it creates a transformation that can be estimated
49 | and applied to new data via the \code{predict} function. 
50 | 
51 | The function is explicitly: log(x + sqrt(x^2 + 1))
52 | }
53 | \examples{
54 | x <- rgamma(100, 1, 1)
55 | 
56 | arcsinh_x_obj <- arcsinh_x(x)
57 | arcsinh_x_obj
58 | p <- predict(arcsinh_x_obj)
59 | x2 <- predict(arcsinh_x_obj, newdata = p, inverse = TRUE)
60 | 
61 | all.equal(x2, x)
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/man/autotrader.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{autotrader}
 5 | \alias{autotrader}
 6 | \title{Prices of 6,283 cars listed on Autotrader}
 7 | \format{
 8 | A data frame with 6283 rows and 10 variables:
 9 | \describe{
10 |   \item{price}{price, in US dollars}
11 |   \item{Car_Info}{Raw description from website}
12 |   \item{Link}{hyperlink to listing (must be appended to https://www.autotrader.com/)}
13 |   \item{Make}{Car manufacturer}
14 |   \item{Year}{Year car manufactured}
15 |   \item{Location}{Location of listing}
16 |   \item{Radius}{Radius chosen for search}
17 |   \item{mileage}{mileage on vehicle}
18 |   \item{status}{used/new/certified}
19 |   \item{model}{make and model, separated by space}
20 | }
21 | }
22 | \source{
23 | \url{https://www.autotrader.com/}
24 | }
25 | \usage{
26 | autotrader
27 | }
28 | \description{
29 | A dataset containing the prices and other attributes of over 6000 cars in the Minneapolis area.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/man/bestLogConstant.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bestLogConstant.R
 3 | \name{bestLogConstant}
 4 | \alias{bestLogConstant}
 5 | \alias{predict.bestLogConstant}
 6 | \alias{print.bestLogConstant}
 7 | \title{Calculate and perform best normalizing log transformation (experimental)}
 8 | \usage{
 9 | bestLogConstant(x, a, standardize = TRUE, ...)
10 | 
11 | \method{predict}{bestLogConstant}(object, newdata = NULL, inverse = FALSE, ...)
12 | 
13 | \method{print}{bestLogConstant}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{A vector to normalize}
17 | 
18 | \item{a}{(optional) a list of candidate constants to choose from}
19 | 
20 | \item{standardize}{If TRUE, the transformed values are also centered and
21 | scaled, such that the transformation attempts a standard normal. This will
22 | not change the normality statistic.}
23 | 
24 | \item{...}{additional arguments.}
25 | 
26 | \item{object}{an object of class 'bestLogConstant'}
27 | 
28 | \item{newdata}{a vector of data to be (reverse) transformed}
29 | 
30 | \item{inverse}{if TRUE, performs reverse transformation}
31 | }
32 | \value{
33 | A list of class \code{bestLogConstant} with elements
34 | 
35 |   \item{x.t}{transformed original data} \item{x}{original data}
36 |   \item{norm_stats}{Pearson's Pearson's P / degrees of freedom}
37 |   \item{method}{out-of-sample or in-sample, number of folds + repeats}
38 |   \item{chosen_constant}{the chosen constant transformation (of class `log_x`)}
39 |   \item{other_transforms}{the other transformations (of class `log_x`)}
40 | 
41 |   The \code{predict} function returns the numeric value of the transformation
42 |   performed on new data, and allows for the inverse transformation as well.
43 | }
44 | \description{
45 | Similar to bestNormalize, this selects the
46 |   best candidate constant for a log transformation on the basis 
47 |   of the Pearson P test statistic for normality. The
48 |   transformation that has the lowest P (calculated on the transformed data)
49 |   is selected. This function is currently in development and may not behave 
50 |   as expected. 
51 |   
52 |   See details for more information.
53 | }
54 | \details{
55 | \code{bestLogConstant} estimates the optimal normalizing constant 
56 |   for a log transformation. This transformation can be performed on new data, and
57 |   inverted, via the \code{predict} function.
58 | }
59 | \seealso{
60 | \code{\link[bestNormalize]{bestNormalize}}, \code{\link{log_x}},
61 | }
62 | 


--------------------------------------------------------------------------------
/man/bestNormalize-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bestNormalize-package.R
 3 | \docType{package}
 4 | \name{bestNormalize-package}
 5 | \alias{bestNormalize-package}
 6 | \title{bestNormalize: Flexibly calculate the best normalizing transformation for a
 7 | vector}
 8 | \description{
 9 | The \code{bestNormalize} package provides several normalizing transformations, and introduces a
10 | new transformation based off of the order statistics, \code{orderNorm}.
11 | Perhaps the most useful function is \code{bestNormalize}, which attempts all
12 | of these transformations and picks the best one based off of a goodness of
13 | fit statistic.
14 | }
15 | \seealso{
16 | Useful links:
17 | \itemize{
18 |   \item \url{https://petersonr.github.io/bestNormalize/}
19 |   \item \url{https://github.com/petersonR/bestNormalize}
20 | }
21 | 
22 | }
23 | \author{
24 | \strong{Maintainer}: Ryan Andrew Peterson \email{ryan.a.peterson@cuanschutz.edu} (\href{https://orcid.org/0000-0002-4650-5798}{ORCID})
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/man/bestNormalize.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/bestNormalize.R
  3 | \name{bestNormalize}
  4 | \alias{bestNormalize}
  5 | \alias{predict.bestNormalize}
  6 | \alias{print.bestNormalize}
  7 | \alias{tidy.bestNormalize}
  8 | \title{Calculate and perform best normalizing transformation}
  9 | \usage{
 10 | bestNormalize(
 11 |   x,
 12 |   standardize = TRUE,
 13 |   allow_orderNorm = TRUE,
 14 |   allow_lambert_s = FALSE,
 15 |   allow_lambert_h = FALSE,
 16 |   allow_exp = TRUE,
 17 |   out_of_sample = TRUE,
 18 |   cluster = NULL,
 19 |   k = 10,
 20 |   r = 5,
 21 |   loo = FALSE,
 22 |   warn = FALSE,
 23 |   quiet = FALSE,
 24 |   tr_opts = list(),
 25 |   new_transforms = list(),
 26 |   norm_stat_fn = NULL,
 27 |   ...
 28 | )
 29 | 
 30 | \method{predict}{bestNormalize}(object, newdata = NULL, inverse = FALSE, ...)
 31 | 
 32 | \method{print}{bestNormalize}(x, ...)
 33 | 
 34 | \method{tidy}{bestNormalize}(x, ...)
 35 | }
 36 | \arguments{
 37 | \item{x}{A `bestNormalize` object.}
 38 | 
 39 | \item{standardize}{If TRUE, the transformed values are also centered and
 40 | scaled, such that the transformation attempts a standard normal. This will
 41 | not change the normality statistic.}
 42 | 
 43 | \item{allow_orderNorm}{set to FALSE if orderNorm should not be applied}
 44 | 
 45 | \item{allow_lambert_s}{Set to FALSE if the lambertW of type "s"  should not
 46 | be applied (see details). Expect about 2-3x elapsed computing time if TRUE.}
 47 | 
 48 | \item{allow_lambert_h}{Set to TRUE if the lambertW of type "h"  and "hh"
 49 | should be applied (see details). Expect about 2-4x elapsed computing time.}
 50 | 
 51 | \item{allow_exp}{Set to TRUE if the exponential transformation should be
 52 | applied (sometimes this will cause errors with heavy right skew)}
 53 | 
 54 | \item{out_of_sample}{if FALSE, estimates quickly in-sample performance}
 55 | 
 56 | \item{cluster}{name of cluster set using \code{makeCluster}}
 57 | 
 58 | \item{k}{number of folds}
 59 | 
 60 | \item{r}{number of repeats}
 61 | 
 62 | \item{loo}{should leave-one-out CV be used instead of repeated CV? (see
 63 | details)}
 64 | 
 65 | \item{warn}{Should bestNormalize warn when a method doesn't work?}
 66 | 
 67 | \item{quiet}{Should a progress-bar not be displayed for cross-validation
 68 | progress?}
 69 | 
 70 | \item{tr_opts}{a list (of lists), specifying options to be passed to each
 71 | transformation (see details)}
 72 | 
 73 | \item{new_transforms}{a named list of new transformation functions and their
 74 | predict methods (see details)}
 75 | 
 76 | \item{norm_stat_fn}{if specified, a function to calculate to assess normality
 77 | (default is the Pearson chi-squared statistic divided by its d.f.)}
 78 | 
 79 | \item{...}{not used}
 80 | 
 81 | \item{object}{an object of class 'bestNormalize'}
 82 | 
 83 | \item{newdata}{a vector of data to be (reverse) transformed}
 84 | 
 85 | \item{inverse}{if TRUE, performs reverse transformation}
 86 | }
 87 | \value{
 88 | A list of class \code{bestNormalize} with elements
 89 | 
 90 |   \item{x.t}{transformed original data} \item{x}{original data}
 91 |   \item{norm_stats}{Pearson's Pearson's P / degrees of freedom}
 92 |   \item{method}{out-of-sample or in-sample, number of folds + repeats}
 93 |   \item{chosen_transform}{the chosen transformation (of appropriate class)}
 94 |   \item{other_transforms}{the other transformations (of appropriate class)}
 95 |   \item{oos_preds}{Out-of-sample predictions (if loo == TRUE) or
 96 |   normalization stats}
 97 | 
 98 |   The \code{predict} function returns the numeric value of the transformation
 99 |   performed on new data, and allows for the inverse transformation as well.
100 | }
101 | \description{
102 | Performs a suite of normalizing transformations, and selects the
103 |   best one on the basis of the Pearson P test statistic for normality. The
104 |   transformation that has the lowest P (calculated on the transformed data)
105 |   is selected. See details for more information.
106 | }
107 | \details{
108 | \code{bestNormalize} estimates the optimal normalizing
109 |   transformation. This transformation can be performed on new data, and
110 |   inverted, via the \code{predict} function.
111 | 
112 | This function currently estimates the Yeo-Johnson transformation,
113 |   the Box Cox transformation (if the data is positive), the log_10(x+a)
114 |   transformation, the square-root (x+a) transformation, and the arcsinh
115 |   transformation. a is set to max(0, -min(x) + eps) by default.  If
116 |   allow_orderNorm == TRUE and if out_of_sample == FALSE then the ordered
117 |   quantile normalization technique will likely be chosen since it essentially
118 |   forces the data to follow a normal distribution. More information on the
119 |   orderNorm technique can be found in the package vignette, or using
120 |   \code{?orderNorm}.
121 | 
122 | 
123 |   Repeated cross-validation is used by default to estimate the out-of-sample
124 |   performance of each transformation if out_of_sample = TRUE. While this can
125 |   take some time, users can speed it up by creating a cluster via the
126 |   \code{parallel} package's \code{makeCluster} function, and passing the name
127 |   of this cluster to \code{bestNormalize} via the cl argument. For best
128 |   performance, we recommend the number of clusters to be set to the number of
129 |   repeats r. Care should be taken to account for the number of observations
130 |   per fold; too small a number and the estimated normality statistic could be
131 |   inaccurate, or at least suffer from high variability.
132 | 
133 | 
134 |   As of version 1.3, users can use leave-one-out cross-validation as well for
135 |   each method by setting \code{loo} to \code{TRUE}.  This will take a lot of
136 |   time for bigger vectors, but it will have the most accurate estimate of
137 |   normalization efficacy. Note that if this method is selected, arguments
138 |   \code{k, r} are ignored. This method will still work in parallel with the
139 |   \code{cl} argument.
140 | 
141 | 
142 |   Note that the Lambert transformation of type "h" or "hh" can be done by
143 |   setting allow_lambert_h = TRUE, however this can take significantly longer
144 |   to run.
145 | 
146 |   Use \code{tr_opts} in order to set options for each transformation. For
147 |   instance, if you want to overide the default a selection for \code{log_x},
148 |   set \code{tr_opts$log_x = list(a = 1)}.
149 | 
150 |   See the package's vignette on how to use custom functions with
151 |   bestNormalize. All it takes is to create an S3 class and predict method for
152 |   the new transformation and load it into the environment, then the new
153 |   custom function (and its predict method) can be passed to bestNormalize
154 |   with \code{new_transform}.
155 | }
156 | \examples{
157 | 
158 | x <- rgamma(100, 1, 1)
159 | 
160 | \dontrun{
161 | # With Repeated CV
162 | BN_obj <- bestNormalize(x)
163 | BN_obj
164 | p <- predict(BN_obj)
165 | x2 <- predict(BN_obj, newdata = p, inverse = TRUE)
166 | 
167 | all.equal(x2, x)
168 | }
169 | 
170 | 
171 | \dontrun{
172 | # With leave-one-out CV
173 | BN_obj <- bestNormalize(x, loo = TRUE)
174 | BN_obj
175 | p <- predict(BN_obj)
176 | x2 <- predict(BN_obj, newdata = p, inverse = TRUE)
177 | 
178 | all.equal(x2, x)
179 | }
180 | 
181 | # Without CV
182 | BN_obj <- bestNormalize(x, allow_orderNorm = FALSE, out_of_sample = FALSE)
183 | BN_obj
184 | p <- predict(BN_obj)
185 | x2 <- predict(BN_obj, newdata = p, inverse = TRUE)
186 | 
187 | all.equal(x2, x)
188 | 
189 | }
190 | \seealso{
191 | \code{\link[bestNormalize]{boxcox}}, \code{\link{orderNorm}},
192 |   \code{\link{yeojohnson}}
193 | }
194 | 


--------------------------------------------------------------------------------
/man/binarize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/binarize.R
 3 | \name{binarize}
 4 | \alias{binarize}
 5 | \alias{predict.binarize}
 6 | \alias{print.binarize}
 7 | \title{Binarize}
 8 | \usage{
 9 | binarize(x, location_measure = "median")
10 | 
11 | \method{predict}{binarize}(object, newdata = NULL, inverse = FALSE, ...)
12 | 
13 | \method{print}{binarize}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{A vector to binarize}
17 | 
18 | \item{location_measure}{which location measure should be used? can either be
19 | "median", "mean", "mode", a number, or a function.}
20 | 
21 | \item{object}{an object of class 'binarize'}
22 | 
23 | \item{newdata}{a vector of data to be (reverse) transformed}
24 | 
25 | \item{inverse}{if TRUE, performs reverse transformation}
26 | 
27 | \item{...}{additional arguments}
28 | }
29 | \value{
30 | A list of class \code{binarize} with elements 
31 |   \item{x.t}{transformed original data} 
32 |   \item{x}{original data} 
33 |   \item{method}{location_measure used for original fitting} 
34 |   \item{location}{estimated location_measure} 
35 |   \item{n}{number of nonmissing observations}
36 |   \item{norm_stat}{Pearson's P / degrees of freedom}
37 |   
38 |   The \code{predict} function with \code{inverse = FALSE} returns the numeric
39 |   value (0 or 1) of the transformation on \code{newdata} (which defaults to
40 |   the original data).
41 |   
42 |   If \code{inverse = TRUE}, since the transform is not 1-1, it will create
43 |   and return a factor that indicates where the original data was cut.
44 | }
45 | \description{
46 | This function will perform a binarizing transformation, which
47 |   could be used as a last resort if the data cannot be adequately normalized.
48 |   This may be useful when accidentally attempting normalization of a binary
49 |   vector (which could occur if implementing bestNormalize in an automated
50 |   fashion).
51 |   
52 |   Note that the transformation is not one-to-one, in contrast to the other
53 |   functions in this package.
54 | }
55 | \examples{
56 | x <- rgamma(100, 1, 1)
57 | binarize_obj <- binarize(x)
58 | (p <- predict(binarize_obj))
59 | 
60 | predict(binarize_obj, newdata = p, inverse = TRUE)
61 | }
62 | 


--------------------------------------------------------------------------------
/man/boxcox.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/boxcox.R
 3 | \name{boxcox}
 4 | \alias{boxcox}
 5 | \alias{predict.boxcox}
 6 | \alias{print.boxcox}
 7 | \title{Box-Cox Normalization}
 8 | \usage{
 9 | boxcox(x, standardize = TRUE, ...)
10 | 
11 | \method{predict}{boxcox}(object, newdata = NULL, inverse = FALSE, ...)
12 | 
13 | \method{print}{boxcox}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{A vector to normalize with Box-Cox}
17 | 
18 | \item{standardize}{If TRUE, the transformed values are also centered and
19 | scaled, such that the transformation attempts a standard normal}
20 | 
21 | \item{...}{Additional arguments that can be passed to the estimation of the
22 | lambda parameter (lower, upper, epsilon)}
23 | 
24 | \item{object}{an object of class 'boxcox'}
25 | 
26 | \item{newdata}{a vector of data to be (reverse) transformed}
27 | 
28 | \item{inverse}{if TRUE, performs reverse transformation}
29 | }
30 | \value{
31 | A list of class \code{boxcox} with elements 
32 | \item{x.t}{transformed 
33 |   original data} 
34 |   \item{x}{original data} 
35 |   \item{mean}{mean after transformation but prior to standardization} 
36 |   \item{sd}{sd after transformation but prior to standardization} 
37 |   \item{lambda}{estimated lambda value for skew transformation} 
38 |   \item{n}{number of nonmissing observations}
39 |   \item{norm_stat}{Pearson's P / degrees of freedom}
40 |   \item{standardize}{was the transformation standardized}
41 |   
42 |   The \code{predict} function returns the numeric value of the transformation
43 |   performed on new data, and allows for the inverse transformation as well.
44 | }
45 | \description{
46 | Perform a Box-Cox transformation and center/scale a vector to
47 |   attempt normalization
48 | }
49 | \details{
50 | \code{boxcox} estimates the optimal value of lambda for the Box-Cox
51 | transformation. This transformation can be performed on new data, and
52 | inverted, via the \code{predict} function.
53 | 
54 | The function will return an error if a user attempt to transform nonpositive
55 | data.
56 | }
57 | \examples{
58 | x <- rgamma(100, 1, 1)
59 | 
60 | bc_obj <- boxcox(x)
61 | bc_obj
62 | p <- predict(bc_obj)
63 | x2 <- predict(bc_obj, newdata = p, inverse = TRUE)
64 | 
65 | all.equal(x2, x)
66 | }
67 | \references{
68 | Box, G. E. P. and Cox, D. R. (1964) An analysis of
69 |   transformations. Journal of the Royal Statistical Society B, 26, 211-252.
70 | }
71 | \seealso{
72 | \code{\link[MASS]{boxcox}}
73 | }
74 | 


--------------------------------------------------------------------------------
/man/double_reverse_log.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/double_reverse_log.R
 3 | \name{double_reverse_log}
 4 | \alias{double_reverse_log}
 5 | \alias{predict.double_reverse_log}
 6 | \alias{print.double_reverse_log}
 7 | \title{Double Reverse Log(x + a) Transformation}
 8 | \usage{
 9 | double_reverse_log(
10 |   x,
11 |   b = 10,
12 |   standardize = TRUE,
13 |   eps = diff(range(x, na.rm = TRUE))/10,
14 |   warn = TRUE,
15 |   ...
16 | )
17 | 
18 | \method{predict}{double_reverse_log}(object, newdata = NULL, inverse = FALSE, ...)
19 | 
20 | \method{print}{double_reverse_log}(x, ...)
21 | }
22 | \arguments{
23 | \item{x}{A vector to normalize with with x}
24 | 
25 | \item{b}{The base of the log (defaults to 10)}
26 | 
27 | \item{standardize}{If TRUE, the transformed values are also centered and
28 | scaled, such that the transformation attempts a standard normal}
29 | 
30 | \item{eps}{The cushion for the transformation range (defaults to 10 percent)}
31 | 
32 | \item{warn}{Should a warning result from infinite values?}
33 | 
34 | \item{...}{additional arguments}
35 | 
36 | \item{object}{an object of class 'double_reverse_log'}
37 | 
38 | \item{newdata}{a vector of data to be (potentially reverse) transformed}
39 | 
40 | \item{inverse}{if TRUE, performs reverse transformation}
41 | }
42 | \value{
43 | A list of class \code{double_reverse_log} with elements 
44 | \item{x.t}{transformed 
45 |   original data} 
46 |   \item{x}{original data} 
47 |   \item{mean}{mean after transformation but prior to standardization} 
48 |   \item{sd}{sd after transformation but prior to standardization} 
49 |   \item{b}{estimated base b value} 
50 |   \item{n}{number of nonmissing observations}
51 |   \item{norm_stat}{Pearson's P / degrees of freedom}
52 |   \item{standardize}{was the transformation standardized}
53 |   
54 |   The \code{predict} function returns the numeric value of the transformation
55 |   performed on new data, and allows for the inverse transformation as well.
56 | }
57 | \description{
58 | First reverses scores, then perform a log_b(x) 
59 | normalization transformation, and then reverses scores again.
60 | }
61 | \details{
62 | \code{double_reverse_log} performs a simple log transformation in the 
63 | context of bestNormalize, such that it creates a transformation that can be 
64 | estimated and applied to new data via the \code{predict} function. The parameter 
65 | a is essentially estimated by the training set by default (estimated as the 
66 | minimum possible to some extent epsilon), while the base must be specified 
67 | beforehand.
68 | }
69 | \examples{
70 | x <- rgamma(100, 1, 1)
71 | 
72 | double_reverse_log_obj <- double_reverse_log(x)
73 | double_reverse_log_obj
74 | p <- predict(double_reverse_log_obj)
75 | x2 <- predict(double_reverse_log_obj, newdata = p, inverse = TRUE)
76 | 
77 | all.equal(x2, x)
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/man/exp_x.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/exp_x.R
 3 | \name{exp_x}
 4 | \alias{exp_x}
 5 | \alias{predict.exp_x}
 6 | \alias{print.exp_x}
 7 | \title{exp(x) Transformation}
 8 | \usage{
 9 | exp_x(x, standardize = TRUE, warn = TRUE, ...)
10 | 
11 | \method{predict}{exp_x}(object, newdata = NULL, inverse = FALSE, ...)
12 | 
13 | \method{print}{exp_x}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{A vector to normalize with with x}
17 | 
18 | \item{standardize}{If TRUE, the transformed values are also centered and
19 | scaled, such that the transformation attempts a standard normal}
20 | 
21 | \item{warn}{Should a warning result from infinite values?}
22 | 
23 | \item{...}{additional arguments}
24 | 
25 | \item{object}{an object of class 'exp_x'}
26 | 
27 | \item{newdata}{a vector of data to be (potentially reverse) transformed}
28 | 
29 | \item{inverse}{if TRUE, performs reverse transformation}
30 | }
31 | \value{
32 | A list of class \code{exp_x} with elements 
33 | \item{x.t}{transformed 
34 |   original data} 
35 |   \item{x}{original data} 
36 |   \item{mean}{mean after transformation but prior to standardization} 
37 |   \item{sd}{sd after transformation but prior to standardization} 
38 |   \item{n}{number of nonmissing observations}
39 |   \item{norm_stat}{Pearson's P / degrees of freedom}
40 |   \item{standardize}{was the transformation standardized}
41 |   
42 |   The \code{predict} function returns the numeric value of the transformation
43 |   performed on new data, and allows for the inverse transformation as well.
44 | }
45 | \description{
46 | Perform a exp(x) transformation
47 | }
48 | \details{
49 | \code{exp_x} performs a simple exponential transformation in the context of 
50 | bestNormalize, such that it creates a transformation that can be estimated
51 | and applied to new data via the \code{predict} function.
52 | }
53 | \examples{
54 | x <- rgamma(100, 1, 1)
55 | 
56 | exp_x_obj <- exp_x(x)
57 | exp_x_obj
58 | p <- predict(exp_x_obj)
59 | x2 <- predict(exp_x_obj, newdata = p, inverse = TRUE)
60 | 
61 | all.equal(x2, x)
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/man/lambert.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lambert.R
 3 | \name{lambert}
 4 | \alias{lambert}
 5 | \alias{predict.lambert}
 6 | \alias{print.lambert}
 7 | \title{Lambert W x F Normalization}
 8 | \usage{
 9 | lambert(x, type = "s", standardize = TRUE, warn = FALSE, ...)
10 | 
11 | \method{predict}{lambert}(object, newdata = NULL, inverse = FALSE, ...)
12 | 
13 | \method{print}{lambert}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{A vector to normalize with Box-Cox}
17 | 
18 | \item{type}{a character indicating which transformation to perform (options 
19 | are "s", "h", and "hh", see details)}
20 | 
21 | \item{standardize}{If TRUE, the transformed values are also centered and
22 | scaled, such that the transformation attempts a standard normal}
23 | 
24 | \item{warn}{should the function show warnings}
25 | 
26 | \item{...}{Additional arguments that can be passed to the 
27 | LambertW::Gaussianize function}
28 | 
29 | \item{object}{an object of class 'lambert'}
30 | 
31 | \item{newdata}{a vector of data to be (reverse) transformed}
32 | 
33 | \item{inverse}{if TRUE, performs reverse transformation}
34 | }
35 | \value{
36 | A list of class \code{lambert} with elements 
37 |   \item{x.t}{transformed original data} 
38 |   \item{x}{original data} 
39 |   \item{mean}{mean after transformation but prior to standardization} 
40 |   \item{sd}{sd after transformation but prior to standardization} 
41 |   \item{tau.mat}{estimated parameters of LambertW::Gaussianize} 
42 |   \item{n}{number of nonmissing observations}
43 |   \item{norm_stat}{Pearson's P / degrees of freedom}
44 |   \item{standardize}{was the transformation standardized}
45 |   
46 | The \code{predict} function returns the numeric value of the transformation 
47 | performed on new data, and allows for the inverse transformation as well.
48 | }
49 | \description{
50 | Perform Lambert's W x F transformation and center/scale a vector
51 |   to attempt normalization via the \code{LambertW} package.
52 | }
53 | \details{
54 | \code{lambert} uses the \code{LambertW} package to estimate a 
55 |   normalizing (or "Gaussianizing") transformation. This transformation can be
56 |   performed on new data, and inverted, via the \code{predict} function.
57 |   
58 |   NOTE: The type = "s" argument is the only one that does the 1-1 transform
59 |   consistently, and so it is the only method currently used in
60 |   \code{bestNormalize()}. Use type = "h" or type = 'hh' at risk of not having
61 |   this estimate 1-1 transform. These alternative types are effective when the
62 |   data has exceptionally heavy tails, e.g. the Cauchy distribution.
63 |   
64 |   Additionally, sometimes (depending on the distribution) this method will be
65 |   unable to extrapolate beyond the observed bounds. In these cases, NaN is
66 |   returned.
67 | }
68 | \examples{
69 | \dontrun{
70 | x <- rgamma(100, 1, 1)
71 | 
72 | lambert_obj <- lambert(x)
73 | lambert_obj
74 | p <- predict(lambert_obj)
75 | x2 <- predict(lambert_obj, newdata = p, inverse = TRUE)
76 | 
77 | all.equal(x2, x)
78 | }
79 | 
80 | }
81 | \references{
82 | Georg M. Goerg (2016). LambertW: An R package for Lambert W x F 
83 |   Random Variables. R package version 0.6.4.
84 |   
85 |   Georg M. Goerg (2011): Lambert W random variables - a new family of 
86 |   generalized skewed distributions with applications to risk estimation. 
87 |   Annals of Applied Statistics 3(5). 2197-2230.
88 |   
89 |   Georg M. Goerg (2014): The Lambert Way to Gaussianize heavy-tailed data 
90 |   with the inverse of Tukey's h transformation as a special case. The 
91 |   Scientific World Journal.
92 | }
93 | \seealso{
94 | \code{\link[LambertW]{Gaussianize}}
95 | }
96 | 


--------------------------------------------------------------------------------
/man/log_x.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/log_x.R
 3 | \name{log_x}
 4 | \alias{log_x}
 5 | \alias{predict.log_x}
 6 | \alias{print.log_x}
 7 | \title{Log(x + a) Transformation}
 8 | \usage{
 9 | log_x(x, a = NULL, b = 10, standardize = TRUE, eps = 0.001, warn = TRUE, ...)
10 | 
11 | \method{predict}{log_x}(object, newdata = NULL, inverse = FALSE, ...)
12 | 
13 | \method{print}{log_x}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{A vector to normalize with with x}
17 | 
18 | \item{a}{The constant to add to x (defaults to max(0, -min(x) + eps));
19 | see \code{bestLogConstant}}
20 | 
21 | \item{b}{The base of the log (defaults to 10)}
22 | 
23 | \item{standardize}{If TRUE, the transformed values are also centered and
24 | scaled, such that the transformation attempts a standard normal}
25 | 
26 | \item{eps}{The allowed error in the expression for the selected a}
27 | 
28 | \item{warn}{Should a warning result from infinite values?}
29 | 
30 | \item{...}{additional arguments}
31 | 
32 | \item{object}{an object of class 'log_x'}
33 | 
34 | \item{newdata}{a vector of data to be (potentially reverse) transformed}
35 | 
36 | \item{inverse}{if TRUE, performs reverse transformation}
37 | }
38 | \value{
39 | A list of class \code{log_x} with elements 
40 | \item{x.t}{transformed 
41 |   original data} 
42 |   \item{x}{original data} 
43 |   \item{mean}{mean after transformation but prior to standardization} 
44 |   \item{sd}{sd after transformation but prior to standardization} 
45 |   \item{a}{estimated a value} 
46 |   \item{b}{estimated base b value} 
47 |   \item{n}{number of nonmissing observations}
48 |   \item{norm_stat}{Pearson's P / degrees of freedom}
49 |   \item{standardize}{was the transformation standardized}
50 |   
51 |   The \code{predict} function returns the numeric value of the transformation
52 |   performed on new data, and allows for the inverse transformation as well.
53 | }
54 | \description{
55 | Perform a log_b (x+a) normalization transformation
56 | }
57 | \details{
58 | \code{log_x} performs a simple log transformation in the context of 
59 | bestNormalize, such that it creates a transformation that can be estimated
60 | and applied to new data via the \code{predict} function. The parameter a is
61 | essentially estimated by the training set by default (estimated as the minimum
62 | possible to some extent epsilon), while the base must be 
63 | specified beforehand.
64 | }
65 | \examples{
66 | x <- rgamma(100, 1, 1)
67 | 
68 | log_x_obj <- log_x(x)
69 | log_x_obj
70 | p <- predict(log_x_obj)
71 | x2 <- predict(log_x_obj, newdata = p, inverse = TRUE)
72 | 
73 | all.equal(x2, x)
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/man/no_transform.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/no_transform.R
 3 | \name{no_transform}
 4 | \alias{no_transform}
 5 | \alias{predict.no_transform}
 6 | \alias{center_scale}
 7 | \alias{print.no_transform}
 8 | \alias{predict.center_scale}
 9 | \alias{print.center_scale}
10 | \alias{tidy.no_transform}
11 | \title{Identity transformation and center/scale transform}
12 | \usage{
13 | no_transform(x, warn = TRUE, ...)
14 | 
15 | \method{predict}{no_transform}(object, newdata = NULL, inverse = FALSE, ...)
16 | 
17 | \method{print}{no_transform}(x, ...)
18 | 
19 | center_scale(x, warn = TRUE, ...)
20 | 
21 | \method{predict}{center_scale}(object, newdata = NULL, inverse = FALSE, ...)
22 | 
23 | \method{print}{center_scale}(x, ...)
24 | 
25 | \method{tidy}{no_transform}(x, ...)
26 | }
27 | \arguments{
28 | \item{x}{A `no_transform` object.}
29 | 
30 | \item{warn}{Should a warning result from infinite values?}
31 | 
32 | \item{...}{not used}
33 | 
34 | \item{object}{an object of class 'no_transform'}
35 | 
36 | \item{newdata}{a vector of data to be (potentially reverse) transformed}
37 | 
38 | \item{inverse}{if TRUE, performs reverse transformation}
39 | }
40 | \value{
41 | A list of class \code{no_transform} with elements
42 |   \item{x.t}{transformed original data} 
43 |   \item{x}{original data}
44 |   \item{n}{number of nonmissing observations}
45 |   \item{norm_stat}{Pearson's P / degrees of freedom} 
46 | 
47 |   The \code{predict} function returns the numeric value of the transformation
48 |   performed on new data, and allows for the inverse transformation as well.
49 | }
50 | \description{
51 | Perform an identity transformation. Admittedly it seems odd to
52 |   have a dedicated function to essentially do I(x), but it makes sense to
53 |   keep the same syntax as the other transformations so it plays nicely
54 |   with them. As a benefit, the bestNormalize function will also show
55 |   a comparable normalization statistic for the untransformed data. If 
56 |   \code{standardize == TRUE}, \code{center_scale} passes to bestNormalize instead.
57 | }
58 | \details{
59 | \code{no_transform} creates a identity transformation object 
60 |   that can be applied to new data via the \code{predict} function.
61 | }
62 | \examples{
63 | x <- rgamma(100, 1, 1)
64 | 
65 | no_transform_obj <- no_transform(x)
66 | no_transform_obj
67 | p <- predict(no_transform_obj)
68 | x2 <- predict(no_transform_obj, newdata = p, inverse = TRUE)
69 | 
70 | all.equal(x2, x)
71 | 
72 | }
73 | 


--------------------------------------------------------------------------------
/man/orderNorm.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/orderNorm.R
  3 | \name{orderNorm}
  4 | \alias{orderNorm}
  5 | \alias{predict.orderNorm}
  6 | \alias{print.orderNorm}
  7 | \title{Calculate and perform Ordered Quantile normalizing transformation}
  8 | \usage{
  9 | orderNorm(x, n_logit_fit = min(length(x), 10000), ..., warn = TRUE)
 10 | 
 11 | \method{predict}{orderNorm}(object, newdata = NULL, inverse = FALSE, warn = TRUE, ...)
 12 | 
 13 | \method{print}{orderNorm}(x, ...)
 14 | }
 15 | \arguments{
 16 | \item{x}{A vector to normalize}
 17 | 
 18 | \item{n_logit_fit}{Number of points used to fit logit approximation}
 19 | 
 20 | \item{...}{additional arguments}
 21 | 
 22 | \item{warn}{transforms outside observed range or ties will yield warning}
 23 | 
 24 | \item{object}{an object of class 'orderNorm'}
 25 | 
 26 | \item{newdata}{a vector of data to be (reverse) transformed}
 27 | 
 28 | \item{inverse}{if TRUE, performs reverse transformation}
 29 | }
 30 | \value{
 31 | A list of class \code{orderNorm} with elements
 32 | 
 33 |   \item{x.t}{transformed original data} \item{x}{original data}
 34 |   \item{n}{number of nonmissing observations} \item{ties_status}{indicator if
 35 |   ties are present} \item{fit}{fit to be used for extrapolation, if needed}
 36 |   \item{norm_stat}{Pearson's P / degrees of freedom}
 37 | 
 38 |   The \code{predict} function returns the numeric value of the transformation
 39 |   performed on new data, and allows for the inverse transformation as well.
 40 | }
 41 | \description{
 42 | The Ordered Quantile (ORQ) normalization transformation,
 43 |   \code{orderNorm()}, is a rank-based procedure by which the values of a
 44 |   vector are mapped to their percentile, which is then mapped to the same
 45 |   percentile of the normal distribution. Without the presence of ties, this
 46 |   essentially guarantees that the transformation leads to a uniform
 47 |   distribution.
 48 | 
 49 |   The transformation is: \deqn{g(x) = \Phi ^ {-1} ((rank(x) - .5) /
 50 |   (length(x)))}
 51 | 
 52 |   Where \eqn{\Phi} refers to the standard normal cdf, rank(x) refers to each
 53 |   observation's rank, and length(x) refers to the number of observations.
 54 | 
 55 |   By itself, this method is certainly not new; the earliest mention of it
 56 |   that I could find is in a 1947 paper by Bartlett (see references). This
 57 |   formula was outlined explicitly in Van der Waerden, and expounded upon in
 58 |   Beasley (2009). However there is a key difference to this version of it, as
 59 |   explained below.
 60 | 
 61 |   Using linear interpolation between these percentiles, the ORQ normalization
 62 |   becomes a 1-1 transformation that can be applied to new data. However,
 63 |   outside of the observed domain of x, it is unclear how to extrapolate the
 64 |   transformation. In the ORQ normalization procedure, a binomial glm with a
 65 |   logit link is used on the ranks in order to extrapolate beyond the bounds
 66 |   of the original domain of x. The inverse normal CDF is then applied to
 67 |   these extrapolated predictions in order to extrapolate the transformation.
 68 |   This mitigates the influence of heavy-tailed distributions while preserving
 69 |   the 1-1 nature of the transformation. The extrapolation will provide a
 70 |   warning unless warn = FALSE.) However, we found that the extrapolation was
 71 |   able to perform very well even on data as heavy-tailed as a Cauchy
 72 |   distribution (paper to be published).
 73 | 
 74 |   The fit used to perform the extrapolation uses a default of 10000
 75 |   observations (or length(x) if that is less). This added approximation
 76 |   improves the scalability, both computationally and in terms of memory used.
 77 |   Do not set this value to be too low (e.g. <100), as there is no benefit to
 78 |   doing so. Increase if your test data set is large relative to 10000 and/or 
 79 |   if you are worried about losing signal in the extremes of the range.
 80 | 
 81 |   This transformation can be performed on new data and inverted via the
 82 |   \code{predict} function.
 83 | }
 84 | \examples{
 85 | 
 86 | x <- rgamma(100, 1, 1)
 87 | 
 88 | orderNorm_obj <- orderNorm(x)
 89 | orderNorm_obj
 90 | p <- predict(orderNorm_obj)
 91 | x2 <- predict(orderNorm_obj, newdata = p, inverse = TRUE)
 92 | 
 93 | all.equal(x2, x)
 94 | }
 95 | \references{
 96 | Bartlett, M. S. "The Use of Transformations." Biometrics, vol. 3, no. 1,
 97 | 1947, pp. 39-52. JSTOR www.jstor.org/stable/3001536.
 98 | 
 99 | Van der Waerden BL. Order tests for the two-sample problem and their power.
100 | 1952;55:453-458. Ser A.
101 | 
102 | Beasley TM, Erickson S, Allison DB. Rank-based inverse normal transformations
103 | are increasingly used, but are they merited? Behav. Genet. 2009;39(5):
104 | 580-595. pmid:19526352
105 | }
106 | \seealso{
107 | \code{\link{boxcox}}, \code{\link{lambert}},
108 |   \code{\link{bestNormalize}}, \code{\link{yeojohnson}}
109 | }
110 | 


--------------------------------------------------------------------------------
/man/plot.bestNormalize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plots.R
 3 | \name{plot.bestNormalize}
 4 | \alias{plot.bestNormalize}
 5 | \alias{plot.orderNorm}
 6 | \alias{plot.boxcox}
 7 | \alias{plot.yeojohnson}
 8 | \alias{plot.lambert}
 9 | \title{Transformation plotting}
10 | \usage{
11 | \method{plot}{bestNormalize}(
12 |   x,
13 |   inverse = FALSE,
14 |   bounds = NULL,
15 |   cols = NULL,
16 |   methods = NULL,
17 |   leg_loc = "top",
18 |   ...
19 | )
20 | 
21 | \method{plot}{orderNorm}(x, inverse = FALSE, bounds = NULL, ...)
22 | 
23 | \method{plot}{boxcox}(x, inverse = FALSE, bounds = NULL, ...)
24 | 
25 | \method{plot}{yeojohnson}(x, inverse = FALSE, bounds = NULL, ...)
26 | 
27 | \method{plot}{lambert}(x, inverse = FALSE, bounds = NULL, ...)
28 | }
29 | \arguments{
30 | \item{x}{a fitted transformation}
31 | 
32 | \item{inverse}{if TRUE, plots the inverse transformation}
33 | 
34 | \item{bounds}{a vector of bounds to plot for the transformation}
35 | 
36 | \item{cols}{a vector of colors to use for the transforms (see details)}
37 | 
38 | \item{methods}{a vector of transformations to plot}
39 | 
40 | \item{leg_loc}{the location of the legend on the plot}
41 | 
42 | \item{...}{further parameters to be passed to \code{plot} and \code{lines}}
43 | }
44 | \description{
45 | Plots transformation functions for objects produced by the bestNormalize
46 | package
47 | }
48 | \details{
49 | The plots produced by the individual transformations are simply
50 |   plots of the original values by the newly transformed values, with a line
51 |   denoting where transformations would take place for new data.
52 | 
53 |   For the bestNormalize object, this plots each of the possible
54 |   transformations run by the original call to bestNormalize. The first
55 |   argument in the "cols" parameter refers to the color of the chosen
56 |   transformation.
57 | }
58 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step_orderNorm.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{required_pkgs}
 7 | \title{Objects exported from other packages}
 8 | \keyword{internal}
 9 | \description{
10 | These objects are imported from other packages. Follow the links
11 | below to see their documentation.
12 | 
13 | \describe{
14 |   \item{generics}{\code{\link[generics]{required_pkgs}}}
15 | }}
16 | 
17 | 


--------------------------------------------------------------------------------
/man/sqrt_x.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sqrt_x.R
 3 | \name{sqrt_x}
 4 | \alias{sqrt_x}
 5 | \alias{predict.sqrt_x}
 6 | \alias{print.sqrt_x}
 7 | \title{sqrt(x + a) Normalization}
 8 | \usage{
 9 | sqrt_x(x, a = NULL, standardize = TRUE, ...)
10 | 
11 | \method{predict}{sqrt_x}(object, newdata = NULL, inverse = FALSE, ...)
12 | 
13 | \method{print}{sqrt_x}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{A vector to normalize with with x}
17 | 
18 | \item{a}{The constant to add to x (defaults to max(0, -min(x)))}
19 | 
20 | \item{standardize}{If TRUE, the transformed values are also centered and
21 | scaled, such that the transformation attempts a standard normal}
22 | 
23 | \item{...}{additional arguments}
24 | 
25 | \item{object}{an object of class 'sqrt_x'}
26 | 
27 | \item{newdata}{a vector of data to be (potentially reverse) transformed}
28 | 
29 | \item{inverse}{if TRUE, performs reverse transformation}
30 | }
31 | \value{
32 | A list of class \code{sqrt_x} with elements \item{x.t}{transformed
33 |   original data} 
34 |   \item{x}{original data} 
35 |   \item{mean}{mean after transformation but prior to standardization} 
36 |   \item{sd}{sd after transformation but prior to standardization} 
37 |   \item{n}{number of nonmissing observations} 
38 |   \item{norm_stat}{Pearson's P / degrees of freedom} 
39 |   \item{standardize}{was the transformation standardized}
40 | 
41 |   The \code{predict} function returns the numeric value of the transformation
42 |   performed on new data, and allows for the inverse transformation as well.
43 | }
44 | \description{
45 | Perform a sqrt (x+a) normalization transformation
46 | }
47 | \details{
48 | \code{sqrt_x} performs a simple square-root transformation in the
49 |   context of bestNormalize, such that it creates a transformation that can be
50 |   estimated and applied to new data via the \code{predict} function. The
51 |   parameter a is essentially estimated by the training set by default
52 |   (estimated as the minimum possible), while the base
53 |   must be specified beforehand.
54 | }
55 | \examples{
56 | x <- rgamma(100, 1, 1)
57 | 
58 | sqrt_x_obj <- sqrt_x(x)
59 | sqrt_x_obj
60 | p <- predict(sqrt_x_obj)
61 | x2 <- predict(sqrt_x_obj, newdata = p, inverse = TRUE)
62 | 
63 | all.equal(x2, x)
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/man/step_best_normalize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step_best_normalize.R
 3 | \name{step_best_normalize}
 4 | \alias{step_best_normalize}
 5 | \alias{step_bestNormalize}
 6 | \alias{step_bestNormalize_new}
 7 | \alias{tidy.step_best_normalize}
 8 | \alias{axe_env.step_best_normalize}
 9 | \title{Run bestNormalize transformation for \code{recipes} implementation}
10 | \usage{
11 | step_best_normalize(
12 |   recipe,
13 |   ...,
14 |   role = NA,
15 |   trained = FALSE,
16 |   transform_info = NULL,
17 |   transform_options = list(),
18 |   num_unique = 5,
19 |   skip = FALSE,
20 |   id = rand_id("best_normalize")
21 | )
22 | 
23 | \method{tidy}{step_best_normalize}(x, ...)
24 | 
25 | \method{axe_env}{step_best_normalize}(x, ...)
26 | }
27 | \arguments{
28 | \item{recipe}{A formula or recipe}
29 | 
30 | \item{...}{One or more selector functions to choose which variables are
31 | affected by the step. See [selections()] for more details. For the `tidy`
32 | method, these are not currently used.}
33 | 
34 | \item{role}{Not used by this step since no new variables are created.}
35 | 
36 | \item{trained}{For recipes functionality}
37 | 
38 | \item{transform_info}{A numeric vector of transformation values. This (was
39 | transform_info) is `NULL` until computed by [prep.recipe()].}
40 | 
41 | \item{transform_options}{options to be passed to bestNormalize}
42 | 
43 | \item{num_unique}{An integer where data that have less possible values will
44 | not be evaluate for a transformation.}
45 | 
46 | \item{skip}{For recipes functionality}
47 | 
48 | \item{id}{For recipes functionality}
49 | 
50 | \item{x}{A `step_best_normalize` object.}
51 | }
52 | \value{
53 | An updated version of `recipe` with the new step added to the
54 |   sequence of existing steps (if any). For the `tidy` method, a tibble with
55 |   columns `terms` (the selectors or variables selected) and `value` (the
56 |   lambda estimate).
57 | }
58 | \description{
59 | `step_best_normalize` creates a specification of a recipe step
60 |   (see `recipes` package) that will transform data using the best of a suite
61 |   of normalization transformations estimated (by default) using
62 |   cross-validation.
63 | }
64 | \details{
65 | The bestnormalize transformation can be used to rescale a variable
66 |   to be more similar to a normal distribution. See `?bestNormalize` for more
67 |   information; `step_best_normalize` is the implementation of `bestNormalize`
68 |   in the `recipes` context. 
69 |   
70 |   As of version 1.7, the `butcher` package can be used to (hopefully) improve 
71 |   scalability of this function on bigger data sets.
72 | }
73 | \examples{
74 | 
75 | library(recipes)
76 | rec <- recipe(~ ., data = as.data.frame(iris))
77 | 
78 | bn_trans <- step_best_normalize(rec, all_numeric())
79 | 
80 | bn_estimates <- prep(bn_trans, training = as.data.frame(iris))
81 | 
82 | bn_data <- bake(bn_estimates, as.data.frame(iris))
83 | 
84 | plot(density(iris[, "Petal.Length"]), main = "before")
85 | plot(density(bn_data$Petal.Length), main = "after")
86 | 
87 | tidy(bn_trans, number = 1)
88 | tidy(bn_estimates, number = 1)
89 | 
90 | }
91 | \seealso{
92 | \code{\link[bestNormalize]{bestNormalize}} \code{\link{orderNorm}},
93 |   [recipe()] [prep.recipe()] [bake.recipe()]
94 | }
95 | \concept{preprocessing}
96 | \concept{transformation_methods}
97 | 


--------------------------------------------------------------------------------
/man/step_orderNorm.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/step_orderNorm.R
  3 | \name{step_orderNorm}
  4 | \alias{step_orderNorm}
  5 | \alias{tidy.step_orderNorm}
  6 | \alias{axe_env.step_orderNorm}
  7 | \title{ORQ normalization (orderNorm) for \code{recipes} implementation}
  8 | \usage{
  9 | step_orderNorm(
 10 |   recipe,
 11 |   ...,
 12 |   role = NA,
 13 |   trained = FALSE,
 14 |   transform_info = NULL,
 15 |   transform_options = list(),
 16 |   num_unique = 5,
 17 |   skip = FALSE,
 18 |   id = rand_id("orderNorm")
 19 | )
 20 | 
 21 | \method{tidy}{step_orderNorm}(x, ...)
 22 | 
 23 | \method{axe_env}{step_orderNorm}(x, ...)
 24 | }
 25 | \arguments{
 26 | \item{recipe}{A formula or recipe}
 27 | 
 28 | \item{...}{One or more selector functions to choose which variables are
 29 | affected by the step. See [selections()] for more details. For the `tidy`
 30 | method, these are not currently used.}
 31 | 
 32 | \item{role}{Not used by this step since no new variables are created.}
 33 | 
 34 | \item{trained}{For recipes functionality}
 35 | 
 36 | \item{transform_info}{A numeric vector of transformation values. This (was
 37 | transform_info) is `NULL` until computed by [prep.recipe()].}
 38 | 
 39 | \item{transform_options}{options to be passed to orderNorm}
 40 | 
 41 | \item{num_unique}{An integer where data that have less possible values will
 42 | not be evaluate for a transformation.}
 43 | 
 44 | \item{skip}{For recipes functionality}
 45 | 
 46 | \item{id}{For recipes functionality}
 47 | 
 48 | \item{x}{A `step_orderNorm` object.}
 49 | }
 50 | \value{
 51 | An updated version of `recipe` with the new step added to the
 52 |   sequence of existing steps (if any). For the `tidy` method, a tibble with
 53 |   columns `terms` (the selectors or variables selected) and `value` (the
 54 |   lambda estimate).
 55 | }
 56 | \description{
 57 | `step_orderNorm` creates a specification of a recipe step (see
 58 |   `recipes` package) that will transform data using the ORQ (orderNorm)
 59 |   transformation, which approximates the "true" normalizing transformation if
 60 |   one exists. This is considerably faster than `step_bestNormalize`.
 61 | }
 62 | \details{
 63 | The orderNorm transformation can be used to rescale a variable to be
 64 |   more similar to a normal distribution. See `?orderNorm` for more
 65 |   information; `step_orderNorm` is the implementation of `orderNorm` in the
 66 |   `recipes` context.
 67 |   
 68 |   As of version 1.7, the `butcher` package can be used to (hopefully) improve 
 69 |   scalability of this function on bigger data sets.
 70 | }
 71 | \examples{
 72 | library(recipes)
 73 | rec <- recipe(~ ., data = as.data.frame(iris))
 74 | 
 75 | orq_trans <- step_orderNorm(rec, all_numeric())
 76 | 
 77 | orq_estimates <- prep(orq_trans, training = as.data.frame(iris))
 78 | 
 79 | orq_data <- bake(orq_estimates, as.data.frame(iris))
 80 | 
 81 | plot(density(iris[, "Petal.Length"]), main = "before")
 82 | plot(density(orq_data$Petal.Length), main = "after")
 83 | 
 84 | tidy(orq_trans, number = 1)
 85 | tidy(orq_estimates, number = 1)
 86 | 
 87 | 
 88 | }
 89 | \references{
 90 | Ryan A. Peterson (2019). Ordered quantile normalization: a
 91 |   semiparametric transformation built for the cross-validation era. Journal
 92 |   of Applied Statistics, 1-16.
 93 | }
 94 | \seealso{
 95 | \code{\link[bestNormalize]{orderNorm}} \code{\link{bestNormalize}},
 96 |   [recipe()] [prep.recipe()] [bake.recipe()]
 97 | }
 98 | \concept{preprocessing}
 99 | \concept{transformation_methods}
100 | 


--------------------------------------------------------------------------------
/man/yeojohnson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/yeojohnson.R
 3 | \name{yeojohnson}
 4 | \alias{yeojohnson}
 5 | \alias{predict.yeojohnson}
 6 | \alias{print.yeojohnson}
 7 | \title{Yeo-Johnson Normalization}
 8 | \usage{
 9 | yeojohnson(x, eps = 0.001, standardize = TRUE, ...)
10 | 
11 | \method{predict}{yeojohnson}(object, newdata = NULL, inverse = FALSE, ...)
12 | 
13 | \method{print}{yeojohnson}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{A vector to normalize with Yeo-Johnson}
17 | 
18 | \item{eps}{A value to compare lambda against to see if it is equal to zero}
19 | 
20 | \item{standardize}{If TRUE, the transformed values are also centered and
21 | scaled, such that the transformation attempts a standard normal}
22 | 
23 | \item{...}{Additional arguments that can be passed to the estimation of the
24 | lambda parameter (lower, upper)}
25 | 
26 | \item{object}{an object of class 'yeojohnson'}
27 | 
28 | \item{newdata}{a vector of data to be (reverse) transformed}
29 | 
30 | \item{inverse}{if TRUE, performs reverse transformation}
31 | }
32 | \value{
33 | A list of class \code{yeojohnson} with elements
34 | 
35 |   \item{x.t}{transformed original data} 
36 |   \item{x}{original data}
37 |   \item{mean}{mean after transformation but prior to standardization} 
38 |   \item{sd}{sd after transformation but prior to standardization} 
39 |   \item{lambda}{estimated lambda value for skew transformation}
40 |   \item{n}{number of nonmissing observations} 
41 |   \item{norm_stat}{Pearson's P / degrees of freedom} 
42 |   \item{standardize}{Was the transformation standardized}
43 | 
44 |  The \code{predict} function returns the numeric value of the transformation
45 |  performed on new data, and allows for the inverse transformation as well.
46 | }
47 | \description{
48 | Perform a Yeo-Johnson Transformation and center/scale a vector to
49 |  attempt normalization
50 | }
51 | \details{
52 | \code{yeojohnson} estimates the optimal value of lambda for the
53 |  Yeo-Johnson transformation. This transformation can be performed on new
54 |  data, and inverted, via the \code{predict} function.
55 | 
56 |  The Yeo-Johnson is similar to the Box-Cox method, however it allows for the
57 |  transformation of nonpositive data as well. The \code{step_YeoJohnson}
58 |  function in the \code{recipes} package is another useful resource (see
59 |  references).
60 | }
61 | \examples{
62 | 
63 | x <- rgamma(100, 1, 1)
64 | 
65 | yeojohnson_obj <- yeojohnson(x)
66 | yeojohnson_obj
67 | p <- predict(yeojohnson_obj)
68 | x2 <- predict(yeojohnson_obj, newdata = p, inverse = TRUE)
69 | 
70 | all.equal(x2, x)
71 | 
72 | }
73 | \references{
74 | Yeo, I. K., & Johnson, R. A. (2000). A new family of power
75 |  transformations to improve normality or symmetry. Biometrika.
76 | 
77 |  Max Kuhn and Hadley Wickham (2017). recipes: Preprocessing Tools to Create
78 |  Design Matrices. R package version 0.1.0.9000.
79 |  https://github.com/topepo/recipes
80 | }
81 | 


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | cloud.noindex
6 | data.sqlite
7 | *.html
8 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
 1 | # Platform
 2 | 
 3 | |field    |value                                    |
 4 | |:--------|:----------------------------------------|
 5 | |version  |R version 4.2.1 (2022-06-23)             |
 6 | |os       |macOS Ventura 13.0.1                     |
 7 | |system   |x86_64, darwin17.0                       |
 8 | |ui       |RStudio                                  |
 9 | |language |(EN)                                     |
10 | |collate  |en_US.UTF-8                              |
11 | |ctype    |en_US.UTF-8                              |
12 | |tz       |America/Denver                           |
13 | |date     |2023-08-17                               |
14 | |rstudio  |2022.12.0+353 Elsbeth Geranium (desktop) |
15 | |pandoc   |NA                                       |
16 | 
17 | # Dependencies
18 | 
19 | |package       |old   |new    |Δ  |
20 | |:-------------|:-----|:------|:--|
21 | |bestNormalize |1.9.0 |1.9.1  |*  |
22 | |clock         |NA    |0.7.0  |*  |
23 | |cpp11         |NA    |0.4.6  |*  |
24 | |digest        |NA    |0.6.33 |*  |
25 | |dplyr         |NA    |1.1.2  |*  |
26 | |future        |NA    |1.33.0 |*  |
27 | |future.apply  |NA    |1.11.0 |*  |
28 | |ggplot2       |NA    |3.4.3  |*  |
29 | |lamW          |NA    |2.2.0  |*  |
30 | |parallelly    |NA    |1.36.0 |*  |
31 | |progressr     |NA    |0.14.0 |*  |
32 | |purrr         |NA    |1.0.2  |*  |
33 | |Rcpp          |NA    |1.0.11 |*  |
34 | |recipes       |NA    |1.0.7  |*  |
35 | |rlang         |NA    |1.1.1  |*  |
36 | |tzdb          |NA    |0.4.0  |*  |
37 | |vctrs         |NA    |0.6.3  |*  |
38 | |viridisLite   |NA    |0.4.2  |*  |
39 | 
40 | # Revdeps
41 | 
42 | 


--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
1 | ## revdepcheck results
2 | 
3 | We checked 3 reverse dependencies (2 from CRAN + 1 from Bioconductor), comparing R CMD check results across CRAN and dev versions of this package.
4 | 
5 |  * We saw 0 new problems
6 |  * We failed to check 0 packages
7 | 
8 | 


--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/tests/testthat-bestNormalize.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(bestNormalize)
3 | 
4 | test_check(package = 'bestNormalize', filter = "bestNormalize")
5 | 


--------------------------------------------------------------------------------
/tests/testthat-custom-parallel.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(bestNormalize)
3 | 
4 | test_check(package = 'bestNormalize', filter = "parallel|custom")
5 | 


--------------------------------------------------------------------------------
/tests/testthat-main-transforms.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(bestNormalize)
3 | 
4 | test_check(package = 'bestNormalize', filter = "boxcox|yeojohnson|lambert|orderNorm")
5 | 


--------------------------------------------------------------------------------
/tests/testthat-methods-step.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(bestNormalize)
3 | 
4 | test_check(package = 'bestNormalize', filter = "methods|step")
5 | 


--------------------------------------------------------------------------------
/tests/testthat-minor-transforms.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(bestNormalize)
3 | 
4 | test_check(package = 'bestNormalize', filter = "arcsinh|binarize|exp_x|log|no_transform|sqrt|center_scale")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/Rplots.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/petersonR/bestNormalize/46c0ea73e57eb2130bac9078b501688fd48d65c9/tests/testthat/Rplots.pdf


--------------------------------------------------------------------------------
/tests/testthat/test_arcsinh_x.R:
--------------------------------------------------------------------------------
 1 | context('arcsinh_x functionality')
 2 | 
 3 | data(iris)
 4 | train <- iris$Petal.Width
 5 | arcsinh_x_obj <- arcsinh_x(train)
 6 | 
 7 | test_that('arcsinh_x Transforms original data consistently', {
 8 |   expect_equal(arcsinh_x_obj$x.t, predict(arcsinh_x_obj))
 9 |   expect_equal(arcsinh_x_obj$x, predict(arcsinh_x_obj, inverse = TRUE))
10 | })
11 | 
12 | test_that('arcsinh_x Transforms new data consistently', {
13 |   nd <- seq(0, 4, length = 100)
14 |   pred <- predict(arcsinh_x_obj, newdata = nd)
15 |   expect_true(!any(is.na(pred)))
16 |   
17 |   nd2 <- predict(arcsinh_x_obj, newdata = pred, inverse = TRUE)
18 |   expect_equal(nd, nd2)
19 | })
20 | 
21 | test_that('arcsinh_x correctly handles missing original data', {
22 |   b <- arcsinh_x(c(NA, train))
23 |   expect_equal(as.numeric(NA), b$x.t[1])
24 |   expect_equal(as.numeric(NA), predict(b)[1])
25 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
26 | })
27 | 
28 | test_that('arcsinh_x correctly handles missing new data', {
29 |   b <- arcsinh_x(train)
30 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
31 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
32 | })
33 | 
34 | # Test standardization
35 | arcsinh_x_obj <- arcsinh_x(train, standardize = FALSE)
36 | 
37 | test_that('arcsinh_x Transforms original data consistently', {
38 |   expect_equal(arcsinh_x_obj$x.t, predict(arcsinh_x_obj))
39 |   expect_equal(arcsinh_x_obj$x, predict(arcsinh_x_obj, inverse = TRUE))
40 | })
41 | 
42 | test_that('arcsinh_x Transforms new data consistently', {
43 |   nd <- seq(0, 4, length = 100)
44 |   pred <- predict(arcsinh_x_obj, newdata = nd)
45 |   expect_true(!any(is.na(pred)))
46 |   
47 |   nd2 <- predict(arcsinh_x_obj, newdata = pred, inverse = TRUE)
48 |   expect_equal(nd, nd2)
49 | })
50 | 
51 | test_that('arcsinh_x correctly handles missing original data', {
52 |   b <- arcsinh_x(c(NA, train), standardize = FALSE)
53 |   expect_equal(as.numeric(NA), b$x.t[1])
54 |   expect_equal(as.numeric(NA), predict(b)[1])
55 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
56 | })
57 | 
58 | test_that('arcsinh_x correctly handles missing new data', {
59 |   b <- arcsinh_x(train, standardize = FALSE)
60 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
61 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
62 | })
63 | 
64 | 


--------------------------------------------------------------------------------
/tests/testthat/test_bestNormalize.R:
--------------------------------------------------------------------------------
  1 | context('bestNormalize functionality')
  2 | 
  3 | data(iris)
  4 | train <- iris$Petal.Width
  5 | 
  6 | BNobject <- suppressWarnings(bestNormalize(train, out_of_sample = FALSE, quiet = TRUE))
  7 | BNobject4 <- bestNormalize(train, allow_orderNorm = FALSE, out_of_sample = FALSE, quiet = TRUE)
  8 | BNobject5 <- suppressWarnings(bestNormalize(train, out_of_sample = TRUE, quiet = TRUE))
  9 | # BNobject6 <- suppressWarnings(bestNormalize(train, quiet = TRUE, loo = TRUE, allow_lambert_s = TRUE))
 10 | 
 11 | # Test transformations
 12 | test_that('BestNormalize transformations with positive data', {
 13 |   
 14 |   working_transforms <- c(names(BNobject$other_transforms), class(BNobject$chosen_transform))
 15 |   
 16 |   expect_equal(working_transforms, 
 17 |                c("arcsinh_x", "boxcox", "center_scale", "double_reverse_log", "exp_x", 
 18 |                  "log_x", "sqrt_x", "yeojohnson", "orderNorm"))
 19 |   
 20 |   expect_equal(BNobject$x.t, predict(BNobject))
 21 |   expect_equal(BNobject$x, predict(BNobject, inverse = TRUE))
 22 |   expect_equal(BNobject4$x.t, predict(BNobject4))
 23 |   expect_equal(BNobject4$x, predict(BNobject4, inverse = TRUE))
 24 | })
 25 | 
 26 | #
 27 | BNobject <- suppressWarnings(bestNormalize(c(-1, train), quiet = TRUE))
 28 | BNobject4 <- suppressWarnings(bestNormalize(c(-1, train), allow_orderNorm = FALSE, out_of_sample = FALSE, quiet = TRUE))
 29 | test_that('BestNormalize transformations with mixed data, in-sample', {
 30 |   expect_equal(BNobject$x.t, predict(BNobject))
 31 |   expect_equal(BNobject$x, predict(BNobject, inverse = TRUE))
 32 |   expect_equal(BNobject4$x.t, predict(BNobject4), check.attributes = FALSE)
 33 |   expect_equal(BNobject4$x, predict(BNobject4, inverse = TRUE))
 34 | })
 35 | 
 36 | 
 37 | BNobject <- suppressWarnings(bestNormalize(c(-train), out_of_sample = FALSE, quiet = TRUE))
 38 | BNobject4 <- suppressWarnings(bestNormalize(c(-train), allow_orderNorm = FALSE, out_of_sample = FALSE, quiet = TRUE))
 39 | test_that('BestNormalize transformations with negative data', {
 40 |   expect_equal(BNobject$x.t, predict(BNobject))
 41 |   expect_equal(BNobject$x, predict(BNobject, inverse = TRUE))
 42 |   expect_equal(BNobject4$x.t, predict(BNobject4))
 43 |   expect_equal(BNobject4$x, predict(BNobject4, inverse = TRUE))
 44 | })
 45 | 
 46 | train2 <- c(train, -1, NA)
 47 | BNobject <- suppressWarnings(bestNormalize(train2, out_of_sample = FALSE, quiet = TRUE))
 48 | BNobject4 <- suppressWarnings(bestNormalize(train2, allow_orderNorm = FALSE, out_of_sample = FALSE, quiet = TRUE))
 49 | test_that('BestNormalize transformations with mixed data and missing values', {
 50 |   expect_equal(BNobject$x.t, predict(BNobject))
 51 |   expect_equal(BNobject$x, predict(BNobject, inverse = TRUE))
 52 |   expect_equal(BNobject4$x.t, predict(BNobject4), check.attributes = FALSE)
 53 |   expect_equal(BNobject4$x, predict(BNobject4, inverse = TRUE))
 54 | })
 55 | 
 56 | test_that('bestNormalize handles missing original data', {
 57 |   suppressWarnings(b <- bestNormalize(c(NA, train), out_of_sample = FALSE, quiet = TRUE))
 58 |   expect_equal(as.numeric(NA), b$x.t[1])
 59 |   expect_equal(as.numeric(NA), predict(b)[1])
 60 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
 61 | })
 62 | 
 63 | test_that('bestNormalize handles missing new data', {
 64 |   suppressWarnings(b <- bestNormalize(train, out_of_sample = FALSE, quiet = TRUE))
 65 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
 66 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
 67 | })
 68 | 
 69 | # Test standardize = FALSE
 70 | train2 <- c(train, -1, NA)
 71 | BNobject <- suppressWarnings(bestNormalize(train2, standardize = FALSE, out_of_sample = FALSE, quiet = TRUE))
 72 | BNobject4 <- suppressWarnings(bestNormalize(train2, standardize = FALSE,
 73 |                                             allow_orderNorm = FALSE, out_of_sample = FALSE, quiet = TRUE))
 74 | test_that('BestNormalize transformations without standardization', {
 75 |   expect_equal(BNobject$x.t, predict(BNobject))
 76 |   expect_equal(BNobject$x, predict(BNobject, inverse = TRUE))
 77 |   expect_equal(BNobject4$x.t, predict(BNobject4), check.attributes = FALSE)
 78 |   expect_equal(BNobject4$x, predict(BNobject4, inverse = TRUE))
 79 | })
 80 | 
 81 | test_that('bestNormalize without standardization handles missing original data', {
 82 |   suppressWarnings(b <- bestNormalize(c(NA, train), standardize = FALSE, out_of_sample = FALSE, quiet = TRUE))
 83 |   expect_equal(as.numeric(NA), b$x.t[1])
 84 |   expect_equal(as.numeric(NA), predict(b)[1])
 85 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
 86 | })
 87 | 
 88 | test_that('bestNormalize without standardization handles missing new data', {
 89 |   suppressWarnings(b <- bestNormalize(train, standardize = FALSE, out_of_sample = FALSE, quiet = TRUE))
 90 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
 91 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
 92 | })
 93 | 
 94 | # Test lambert functionality in bestNormalize
 95 | test_that("bestNormalize works with lambert of type s", {
 96 |   skip_on_cran()
 97 |   skip_on_ci()
 98 |   b <-  suppressWarnings(bestNormalize(train, allow_lambert_s = TRUE, quiet = TRUE, out_of_sample = FALSE))
 99 |   expect_true(!is.null(b$other_transforms$lambert_s))
100 |   expect_true(is.null(b$other_transforms$lambert_h))
101 |   expect_true(is.null(b$other_transforms$lambert_hh))
102 | })
103 | 
104 | test_that("bestNormalize works with lambert of type h", {
105 |   skip_on_cran()
106 |   skip_on_ci()
107 |   b <-  suppressWarnings(bestNormalize(train, allow_lambert_h = TRUE, allow_lambert_s = FALSE,  quiet = TRUE, out_of_sample = FALSE))
108 |   expect_true(is.null(b$other_transforms$lambert_s))
109 |   expect_true(!is.null(b$other_transforms$lambert_h))
110 |   expect_true(!is.null(b$other_transforms$lambert_hh))
111 | })
112 | 
113 | test_that("bestNormalize works with lambert of type h", {
114 |   skip_on_cran()
115 |   skip_on_ci()
116 |   b <-  suppressWarnings(bestNormalize(train, allow_lambert_h = TRUE, allow_lambert_s = FALSE, quiet = TRUE, out_of_sample = FALSE))
117 |   expect_true(is.null(b$other_transforms$lambert_s))
118 |   expect_true(!is.null(b$other_transforms$lambert_h))
119 |   expect_true(!is.null(b$other_transforms$lambert_hh))
120 | })
121 | 
122 | test_that("options work for bestNormalize", {
123 |   ## Log_x
124 |   expect_silent(b <- bestNormalize(train, tr_opts = list(log_x = list(a = 1)), warn = FALSE))
125 |   expect_equal(b$other_transforms$log_x$a, 1)
126 |   expect_silent(b <- bestNormalize(train, tr_opts = list(log_x = list(a = 100)), warn = FALSE))
127 |   expect_equal(b$other_transforms$log_x$a, 100)
128 | 
129 |   expect_silent(b <- bestNormalize(train, tr_opts = list(log_x = list(a = 1, b = 5)), warn = FALSE))
130 |   expect_equal(b$other_transforms$log_x$b, 5)
131 |   expect_silent(b <- bestNormalize(train, tr_opts = list(log_x = list(a = 100, b = 10)), warn = FALSE))
132 |   expect_equal(b$other_transforms$log_x$b, 10)
133 | 
134 |   expect_silent(b <- bestNormalize(train, tr_opts = list(log_x = list(eps = 1)), warn = FALSE))
135 |   expect_equal(b$other_transforms$log_x$eps, 1)
136 |   expect_silent(b <- bestNormalize(train, tr_opts = list(log_x = list(eps = 100)), warn = FALSE))
137 |   expect_equal(b$other_transforms$log_x$eps, 100)
138 | 
139 |   ## Sqrt_x
140 |   expect_silent(b <- bestNormalize(train, tr_opts = list(sqrt_x = list(a = 1)), warn = FALSE))
141 |   expect_equal(b$other_transforms$sqrt_x$a, 1)
142 |   expect_silent(b <- bestNormalize(train, tr_opts = list(sqrt_x = list(a = 100)), warn = FALSE))
143 |   expect_equal(b$other_transforms$sqrt_x$a, 100)
144 | 
145 |   ## yeo_johnson
146 |   expect_silent(b <- bestNormalize(train, tr_opts = list(yeojohnson = list(eps = 0.1)), warn = FALSE))
147 |   expect_equal(b$other_transforms$yeojohnson$eps, .1)
148 |   expect_silent(b <- bestNormalize(train, tr_opts = list(yeojohnson = list(eps = .01)), warn = FALSE))
149 |   expect_equal(b$other_transforms$yeojohnson$eps, .01)
150 | })
151 | 


--------------------------------------------------------------------------------
/tests/testthat/test_binarize.R:
--------------------------------------------------------------------------------
 1 | context('binarize functionality')
 2 | 
 3 | data(iris)
 4 | 
 5 | train <- iris$Petal.Width
 6 | binarize.obj <- binarize(train)
 7 | bn2 <- binarize(train, "mean")
 8 | bn3 <- binarize(train, "mode")
 9 | 
10 | test_that('binarize ransforms original data consistently' , {
11 |   expect_equal(binarize.obj$x.t, predict(binarize.obj))
12 |   expect_equal(bn2$x.t, predict(bn2))
13 |   expect_equal(bn3$x.t, predict(bn3))
14 | })
15 | 
16 | test_that('binarize Transforms new data', {
17 |   nd <- seq(0, 4, length = 100)
18 |   pred <- predict(binarize.obj, newdata = nd)
19 |   expect_true(!any(is.na(pred)))
20 |   
21 |   pred <- predict(bn2, newdata = nd)
22 |   expect_true(!any(is.na(pred)))
23 |   
24 |   pred <- predict(bn3, newdata = nd)
25 |   expect_true(!any(is.na(pred)))
26 | })
27 | 
28 | test_that('binarize correctly handles missing original data', {
29 |   b <- binarize(c(NA, train))
30 |   expect_equal(as.numeric(NA), b$x.t[1])
31 |   expect_equal(as.numeric(NA), predict(b)[1])
32 |   expect_equal(as.numeric(NA), as.numeric(predict(b, inverse = TRUE)[1]))
33 |   
34 |   b <- binarize(c(NA, train), "mean")
35 |   expect_equal(as.numeric(NA), b$x.t[1])
36 |   expect_equal(as.numeric(NA), predict(b)[1])
37 |   expect_equal(as.numeric(NA), as.numeric(predict(b, inverse = TRUE)[1]))
38 |   
39 |   b <- binarize(c(NA, train), "mode")
40 |   expect_equal(as.numeric(NA), b$x.t[1])
41 |   expect_equal(as.numeric(NA), predict(b)[1])
42 |   expect_equal(as.numeric(NA), as.numeric(predict(b, inverse = TRUE)[1]))
43 | })
44 | 
45 | test_that('binarize correctly handles missing new data', {
46 |   b <- binarize(train)
47 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
48 |   expect_equal(as.numeric(NA), as.numeric(predict(b, newdata = c(1, NA), inverse = TRUE)[2]))
49 |   
50 |   b <- binarize(train, "mean")
51 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
52 |   expect_equal(as.numeric(NA), as.numeric(predict(b, newdata = c(1, NA), inverse = TRUE)[2]))
53 |   
54 |   b <- binarize(train, "mode")
55 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
56 |   expect_equal(as.numeric(NA), as.numeric(predict(b, newdata = c(1, NA), inverse = TRUE)[2]))
57 | })


--------------------------------------------------------------------------------
/tests/testthat/test_bn_parallel.R:
--------------------------------------------------------------------------------
  1 | context('bestNormalize parallel functionality')
  2 | 
  3 | skip_on_cran()
  4 | set.seed(1)
  5 | 
  6 | data(iris)
  7 | iris <- iris[1:30,]
  8 | 
  9 | train <- iris$Petal.Width
 10 | 
 11 | cl <- parallel::makeCluster(2)
 12 | 
 13 | test_that("Parallel functionality works for RCV", {
 14 |   b <- bestNormalize(train, warn = FALSE, cluster = cl, r = 2, quiet = TRUE)
 15 |   expect_true(is.null(b$other_transforms$lambert_h))
 16 |   
 17 |   b <- bestNormalize(train, warn = FALSE, cluster = cl, allow_lambert_s = TRUE, r = 2, quiet = TRUE)
 18 |   expect_true(!is.null(b$other_transforms$lambert_s))
 19 |   expect_true(is.null(b$other_transforms$lambert_h))
 20 |   
 21 |   b <- bestNormalize(train, warn = FALSE, cluster = cl, allow_lambert_h = TRUE, allow_lambert_s = FALSE, r = 2, quiet = TRUE)
 22 |   expect_true(is.null(b$other_transforms$lambert_s))
 23 |   expect_true(!is.null(b$other_transforms$lambert_h))
 24 |   
 25 |   b <- bestNormalize(train, warn = FALSE, cluster = cl, allow_lambert_h = TRUE, allow_lambert_s = TRUE, r = 2, quiet = TRUE)
 26 |   expect_true(!is.null(b$other_transforms$lambert_s))
 27 |   expect_true(!is.null(b$other_transforms$lambert_h))
 28 |   
 29 | }) 
 30 | 
 31 | train <- iris$Petal.Width
 32 | 
 33 | test_that("Parallel functionality works for LOO", {
 34 |   b <- bestNormalize(train, warn = FALSE, cluster = cl, loo = TRUE, quiet = TRUE)
 35 |   b <- bestNormalize(train, warn = FALSE, allow_lambert_h = TRUE, 
 36 |                      allow_lambert_s = TRUE, loo = TRUE, quiet = FALSE)
 37 |   expect_true(!is.null(b$other_transforms$lambert_s))
 38 |   expect_true(!is.null(b$other_transforms$lambert_h))
 39 |   
 40 | }) 
 41 | 
 42 | ## Test custom functions and parallelization
 43 | 
 44 | ## Define user-function
 45 | cuberoot_x <- function(x, a = NULL, standardize = TRUE) {
 46 |   stopifnot(is.numeric(x))
 47 |   
 48 |   min_a <- max(0, -(min(x, na.rm = TRUE)))
 49 |   if(!length(a)) 
 50 |     a <- min_a
 51 |   if(a < min_a) {
 52 |     warning("Setting a <  max(0, -(min(x))) can lead to transformation issues",
 53 |             "Standardize set to FALSE")
 54 |     standardize <- FALSE
 55 |   }
 56 |   
 57 |   
 58 |   x.t <- (x + a)^(1/3)
 59 |   mu <- mean(x.t, na.rm = TRUE)
 60 |   sigma <- sd(x.t, na.rm = TRUE)
 61 |   if (standardize) x.t <- (x.t - mu) / sigma
 62 |   
 63 |   # Get in-sample normality statistic results
 64 |   ptest <- nortest::pearson.test(x.t)
 65 |   
 66 |   val <- list(
 67 |     x.t = x.t,
 68 |     x = x,
 69 |     mean = mu,
 70 |     sd = sigma,
 71 |     a = a,
 72 |     n = length(x.t) - sum(is.na(x)),
 73 |     norm_stat = unname(ptest$statistic / ptest$df),
 74 |     standardize = standardize
 75 |   )
 76 |   
 77 |   # Assign class
 78 |   class(val) <- c('cuberoot_x', class(val))
 79 |   val
 80 | }
 81 | 
 82 | predict.cuberoot_x <- function(object, newdata = NULL, inverse = FALSE, ...) {
 83 |   
 84 |   # If no data supplied and not inverse
 85 |   if (is.null(newdata) & !inverse)
 86 |     newdata <- object$x
 87 |   
 88 |   # If no data supplied and inverse
 89 |   if (is.null(newdata) & inverse)
 90 |     newdata <- object$x.t
 91 |   
 92 |   # Actually performing transformations
 93 |   
 94 |   # Perform inverse transformation as estimated
 95 |   if (inverse) {
 96 |     
 97 |     # Reverse-standardize
 98 |     if (object$standardize) 
 99 |       newdata <- newdata * object$sd + object$mean
100 |     
101 |     # Reverse-cube-root (cube)
102 |     newdata <-  newdata^3 - object$a
103 |     
104 |     
105 |     # Otherwise, perform transformation as estimated
106 |   } else if (!inverse) {
107 |     # Take cube root
108 |     newdata <- (newdata + object$a)^(1/3)
109 |     
110 |     # Standardize to mean 0, sd 1
111 |     if (object$standardize) 
112 |       newdata <- (newdata - object$mean) / object$sd
113 |   }
114 |   
115 |   # Return transformed data
116 |   unname(newdata)
117 | }
118 | 
119 | ## Define user-function
120 | quadroot_x <- function(x, a = NULL, standardize = TRUE) {
121 |   stopifnot(is.numeric(x))
122 |   
123 |   min_a <- max(0, -(min(x, na.rm = TRUE)))
124 |   if(!length(a)) 
125 |     a <- min_a
126 |   if(a < min_a) {
127 |     warning("Setting a <  max(0, -(min(x))) can lead to transformation issues",
128 |             "Standardize set to FALSE")
129 |     standardize <- FALSE
130 |   }
131 |   
132 |   
133 |   x.t <- (x + a)^(1/4)
134 |   mu <- mean(x.t, na.rm = TRUE)
135 |   sigma <- sd(x.t, na.rm = TRUE)
136 |   if (standardize) x.t <- (x.t - mu) / sigma
137 |   
138 |   # Get in-sample normality statistic results
139 |   ptest <- nortest::pearson.test(x.t)
140 |   
141 |   val <- list(
142 |     x.t = x.t,
143 |     x = x,
144 |     mean = mu,
145 |     sd = sigma,
146 |     a = a,
147 |     n = length(x.t) - sum(is.na(x)),
148 |     norm_stat = unname(ptest$statistic / ptest$df),
149 |     standardize = standardize
150 |   )
151 |   
152 |   # Assign class
153 |   class(val) <- c('quadroot_x', class(val))
154 |   val
155 | }
156 | 
157 | predict.quadroot_x <- function(object, newdata = NULL, inverse = FALSE, ...) {
158 |   
159 |   # If no data supplied and not inverse
160 |   if (is.null(newdata) & !inverse)
161 |     newdata <- object$x
162 |   
163 |   # If no data supplied and inverse
164 |   if (is.null(newdata) & inverse)
165 |     newdata <- object$x.t
166 |   
167 |   # Actually performing transformations
168 |   
169 |   # Perform inverse transformation as estimated
170 |   if (inverse) {
171 |     
172 |     # Reverse-standardize
173 |     if (object$standardize) 
174 |       newdata <- newdata * object$sd + object$mean
175 |     
176 |     # Reverse-quad-root (quad)
177 |     newdata <-  newdata^4 - object$a
178 |     
179 |     
180 |     # Otherwise, perform transformation as estimated
181 |   } else if (!inverse) {
182 |     # Take quad root
183 |     newdata <- (newdata + object$a)^(1/4)
184 |     
185 |     # Standardize to mean 0, sd 1
186 |     if (object$standardize) 
187 |       newdata <- (newdata - object$mean) / object$sd
188 |   }
189 |   
190 |   # Return transformed data
191 |   unname(newdata)
192 | }
193 | 
194 | 
195 | new_transforms <- list(
196 |   quadroot_x = quadroot_x,
197 |   predict.quadroot_x = predict.quadroot_x,
198 |   cuberoot_x = cuberoot_x,
199 |   predict.cuberoot_x = predict.cuberoot_x
200 | )
201 | 
202 | test_that("Parallel functionality works for RCV", {
203 |   expect_silent(b <- bestNormalize(train, warn = FALSE, cluster = cl, r = 2, quiet = TRUE, new_transforms = new_transforms))
204 |   expect_true(is.null(b$other_transforms$lambert_h))
205 |   
206 |   expect_silent(b <- bestNormalize(train, warn = FALSE, cluster = cl, allow_lambert_s = TRUE, 
207 |                                    r = 2, quiet = TRUE, new_transforms = new_transforms))
208 |   expect_true(!is.null(b$other_transforms$lambert_s))
209 |   expect_true(is.null(b$other_transforms$lambert_h))
210 |   
211 |   expect_silent(b <- bestNormalize(train, warn = FALSE, cluster = cl, allow_lambert_h = TRUE, 
212 |                                    r = 2, quiet = TRUE, new_transforms = new_transforms))
213 |   expect_true(!is.null(b$other_transforms$lambert_h))
214 |   
215 |   expect_silent(b <- bestNormalize(train, warn = FALSE, cluster = cl, allow_lambert_h = TRUE, 
216 |                                    allow_lambert_s = TRUE, r = 2, quiet = TRUE, new_transforms = new_transforms))
217 |   expect_true(!is.null(b$other_transforms$lambert_s))
218 |   expect_true(!is.null(b$other_transforms$lambert_h))
219 |   
220 | }) 
221 | 
222 | train <- iris$Petal.Width
223 | 
224 | test_that("Parallel functionality works for LOO", {
225 |   b <- bestNormalize(train, warn = FALSE, cluster = cl, loo = TRUE, quiet = TRUE, new_transforms = new_transforms)
226 |   b <- bestNormalize(train, warn = FALSE, cluster = cl, allow_lambert_h = TRUE, 
227 |                      allow_lambert_s = TRUE, loo = TRUE, quiet = TRUE, new_transforms = new_transforms)
228 |   expect_true(!is.null(b$other_transforms$lambert_s))
229 |   expect_true(!is.null(b$other_transforms$lambert_h))
230 |   
231 | }) 
232 | 
233 | 
234 | parallel::stopCluster(cl)
235 | 
236 | 


--------------------------------------------------------------------------------
/tests/testthat/test_boxcox.R:
--------------------------------------------------------------------------------
 1 | context('boxcox functionality')
 2 | 
 3 | data(iris)
 4 | train <- iris$Petal.Width
 5 | boxcox_obj <- boxcox(train)
 6 | 
 7 | test_that('boxcox Transforms original data consistently', {
 8 |   expect_equal(boxcox_obj$x.t, predict(boxcox_obj))
 9 |   expect_equal(boxcox_obj$x, predict(boxcox_obj, inverse = T))
10 | })
11 | 
12 | test_that('boxcox Transforms new data consistently', {
13 |   nd <- seq(0, 4, length = 100)
14 |   pred <- predict(boxcox_obj, newdata = nd)
15 |   expect_true(!any(is.na(pred)))
16 |   
17 |   nd2 <- predict(boxcox_obj, newdata = pred, inverse = TRUE)
18 |   expect_equal(nd, nd2)
19 | })
20 | 
21 | test_that('boxcox does not try to estimate with negatives' , {
22 |   expect_error(boxcox(c(-1, 2, 3)))
23 | })
24 | 
25 | test_that('boxcox correctly handles missing original data', {
26 |   b <- boxcox(c(NA, train))
27 |   expect_equal(as.numeric(NA), b$x.t[1])
28 |   expect_equal(as.numeric(NA), predict(b)[1])
29 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
30 | })
31 | 
32 | test_that('boxcox correctly handles missing new data', {
33 |   b <- boxcox(train)
34 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
35 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
36 | })
37 | 
38 | # Test standardization
39 | boxcox_obj <- boxcox(train, standardize = FALSE)
40 | 
41 | test_that('boxcox Transforms original data consistently', {
42 |   expect_equal(boxcox_obj$x.t, predict(boxcox_obj))
43 |   expect_equal(boxcox_obj$x, predict(boxcox_obj, inverse = TRUE))
44 | })
45 | 
46 | test_that('boxcox Transforms new data consistently', {
47 |   nd <- seq(0, 4, length = 100)
48 |   pred <- predict(boxcox_obj, newdata = nd)
49 |   expect_true(!any(is.na(pred)))
50 |   
51 |   nd2 <- predict(boxcox_obj, newdata = pred, inverse = TRUE)
52 |   expect_equal(nd, nd2)
53 | })
54 | 
55 | test_that('boxcox does not try to estimate with negatives' , {
56 |   expect_error(boxcox(c(-1, 2, 3), standardize = FALSE))
57 | })
58 | 
59 | test_that('boxcox correctly handles missing original data', {
60 |   b <- boxcox(c(NA, train), standardize = FALSE)
61 |   expect_equal(as.numeric(NA), b$x.t[1])
62 |   expect_equal(as.numeric(NA), predict(b)[1])
63 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
64 | })
65 | 
66 | test_that('boxcox correctly handles missing new data', {
67 |   b <- boxcox(train, standardize = FALSE)
68 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
69 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
70 | })
71 | 


--------------------------------------------------------------------------------
/tests/testthat/test_double_reverse_log.R:
--------------------------------------------------------------------------------
 1 | context('double_reverse_log functionality')
 2 | 
 3 | data(iris)
 4 | train <- iris$Petal.Width
 5 | double_reverse_log_obj <- double_reverse_log(train)
 6 | 
 7 | test_that('double_reverse_log Transforms original data consistently', {
 8 |   expect_equal(double_reverse_log_obj$x.t, predict(double_reverse_log_obj))
 9 |   expect_equal(double_reverse_log_obj$x, predict(double_reverse_log_obj, inverse = TRUE))
10 | })
11 | 
12 | test_that('double_reverse_log Transforms new data consistently', {
13 |   nd <- seq(0, 2.7, length = 100)
14 |   pred <- predict(double_reverse_log_obj, newdata = nd)
15 |   expect_true(!any(is.na(pred)))
16 |   
17 |   nd2 <- predict(double_reverse_log_obj, newdata = pred, inverse = TRUE)
18 |   expect_equal(nd, nd2)
19 | })
20 | 
21 | test_that('double_reverse_log correctly handles missing original data', {
22 |   b <- double_reverse_log(c(NA, train))
23 |   expect_equal(as.numeric(NA), b$x.t[1])
24 |   expect_equal(as.numeric(NA), predict(b)[1])
25 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
26 | })
27 | 
28 | test_that('double_reverse_log correctly handles missing new data', {
29 |   b <- double_reverse_log(train)
30 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
31 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
32 | })
33 | 
34 | # Test standardization
35 | double_reverse_log_obj <- double_reverse_log(train, standardize = FALSE)
36 | 
37 | test_that('double_reverse_log Transforms original data consistently', {
38 |   expect_equal(double_reverse_log_obj$x.t, predict(double_reverse_log_obj))
39 |   expect_equal(double_reverse_log_obj$x, predict(double_reverse_log_obj, inverse = TRUE))
40 | })
41 | 
42 | test_that('double_reverse_log Transforms new data consistently', {
43 |   nd <- seq(1, 2.7, length = 100)
44 |   pred <- predict(double_reverse_log_obj, newdata = nd)
45 |   expect_true(!any(is.na(pred)))
46 |   
47 |   nd2 <- predict(double_reverse_log_obj, newdata = pred, inverse = TRUE)
48 |   expect_equal(nd, nd2)
49 | })
50 | 
51 | test_that('double_reverse_log correctly handles missing original data', {
52 |   b <- double_reverse_log(c(NA, train), standardize = FALSE)
53 |   expect_equal(as.numeric(NA), b$x.t[1])
54 |   expect_equal(as.numeric(NA), predict(b)[1])
55 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
56 | })
57 | 
58 | test_that('double_reverse_log correctly handles missing new data', {
59 |   b <- double_reverse_log(train, standardize = FALSE)
60 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
61 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
62 | })
63 | 
64 | 
65 | double_reverse_log_obj <- double_reverse_log(train, b = exp(1))
66 | 
67 | test_that('double_reverse_log Transforms new data the same regardless of predict order', {
68 |   nd <- seq(0, 2.7, length = 100)
69 |   
70 |   pred <- predict(double_reverse_log_obj, newdata = nd)
71 |   nd2 <- predict(double_reverse_log_obj, newdata = pred, inverse = TRUE)
72 |   expect_equal(nd, nd2)
73 |   
74 |   p1 <- sapply(nd, function(nd_i) predict(double_reverse_log_obj, newdata = nd_i))
75 |   p2 <- predict(double_reverse_log_obj, newdata = nd)
76 |   expect_equal(p1, p2)
77 |   
78 | })
79 | 
80 | double_reverse_log_obj <- double_reverse_log(train, eps = 1.6)
81 | 
82 | test_that('double_reverse_log Transforms new data well if padding increased', {
83 |   nd <- seq(0, 4, length = 100)
84 |   
85 |   pred <- predict(double_reverse_log_obj, newdata = nd)
86 |   nd2 <- predict(double_reverse_log_obj, newdata = pred, inverse = TRUE)
87 |   expect_equal(nd, nd2)
88 |   
89 |   p1 <- sapply(nd, function(nd_i) predict(double_reverse_log_obj, newdata = nd_i))
90 |   p2 <- predict(double_reverse_log_obj, newdata = nd)
91 |   expect_equal(p1, p2)
92 |   
93 | })
94 | 


--------------------------------------------------------------------------------
/tests/testthat/test_exp_x.R:
--------------------------------------------------------------------------------
 1 | context('exp_x functionality')
 2 | 
 3 | data(iris)
 4 | train <- iris$Petal.Width
 5 | exp_x_obj <- exp_x(train)
 6 | 
 7 | test_that('exp_x Transforms original data consistently', {
 8 |   expect_equal(exp_x_obj$x.t, predict(exp_x_obj))
 9 |   expect_equal(exp_x_obj$x, predict(exp_x_obj, inverse = T))
10 | })
11 | 
12 | test_that('exp_x Transforms new data consistently', {
13 |   nd <- seq(0, 4, length = 100)
14 |   pred <- predict(exp_x_obj, newdata = nd)
15 |   expect_true(!any(is.na(pred)))
16 |   
17 |   nd2 <- predict(exp_x_obj, newdata = pred, inverse = TRUE)
18 |   expect_equal(nd, nd2)
19 | })
20 | 
21 | test_that('exp_x correctly handles missing original data', {
22 |   b <- exp_x(c(NA, train))
23 |   expect_equal(as.numeric(NA), b$x.t[1])
24 |   expect_equal(as.numeric(NA), predict(b)[1])
25 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
26 | })
27 | 
28 | test_that('exp_x correctly handles missing new data', {
29 |   b <- exp_x(train)
30 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
31 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
32 | })
33 | 
34 | # Test standardization
35 | exp_x_obj <- exp_x(train, standardize = FALSE)
36 | 
37 | test_that('exp_x Transforms original data consistently', {
38 |   expect_equal(exp_x_obj$x.t, predict(exp_x_obj))
39 |   expect_equal(exp_x_obj$x, predict(exp_x_obj, inverse = TRUE))
40 | })
41 | 
42 | test_that('exp_x Transforms new data consistently', {
43 |   nd <- seq(0, 4, length = 100)
44 |   pred <- predict(exp_x_obj, newdata = nd)
45 |   expect_true(!any(is.na(pred)))
46 |   
47 |   nd2 <- predict(exp_x_obj, newdata = pred, inverse = TRUE)
48 |   expect_equal(nd, nd2)
49 | })
50 | 
51 | test_that('exp_x correctly handles missing original data', {
52 |   b <- exp_x(c(NA, train), standardize = FALSE)
53 |   expect_equal(as.numeric(NA), b$x.t[1])
54 |   expect_equal(as.numeric(NA), predict(b)[1])
55 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
56 | })
57 | 
58 | test_that('exp_x correctly handles missing new data', {
59 |   b <- exp_x(train, standardize = FALSE)
60 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
61 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
62 | })
63 | 
64 | 


--------------------------------------------------------------------------------
/tests/testthat/test_lambert.R:
--------------------------------------------------------------------------------
 1 | context('lambert functionality')
 2 | 
 3 | ## LambertW package doesn't work quite right on linux and 
 4 | # solaris machines (it uses too many threads)
 5 | skip_on_os("linux")
 6 | skip_on_os("solaris")
 7 | 
 8 | data(iris)
 9 | 
10 | train <- iris$Petal.Width
11 | 
12 | lambert_obj <- lambert(train)
13 | 
14 | test_that('lambert Transforms original data consistently' , {
15 |   expect_equal(lambert_obj$x.t, predict(lambert_obj))
16 |   expect_equal(lambert_obj$x, predict(lambert_obj, inverse = TRUE))
17 | })
18 | 
19 | test_that('lambert Transforms new data consistently', {
20 |   nd <- seq(-1, 4, length = 100)
21 |   pred <- predict(lambert_obj, newdata = nd)
22 |   expect_true(!any(is.na(pred)))
23 |   
24 |   nd2 <- predict(lambert_obj, newdata = pred, inverse = TRUE)
25 |   expect_equal(nd, nd2)
26 | })
27 | 
28 | lambert_obj <- lambert(train, standardize = FALSE)
29 | 
30 | test_that('lambert without standardization Transforms original data consistently' , {
31 |   expect_equal(lambert_obj$x.t, predict(lambert_obj))
32 |   expect_equal(lambert_obj$x, predict(lambert_obj, inverse = TRUE))
33 | })
34 | 
35 | test_that('lambert without standardization Transforms new data consistently', {
36 |   nd <- seq(-1, 4, length = 100)
37 |   pred <- predict(lambert_obj, newdata = nd)
38 |   expect_true(!any(is.na(pred)))
39 |   
40 |   nd2 <- predict(lambert_obj, newdata = pred, inverse = TRUE)
41 |   expect_equal(nd, nd2)
42 | })
43 | 
44 | # for type = 'h'
45 | 
46 | lambert_obj <- lambert(train, type = 'h')
47 | 
48 | test_that('lambert Transforms original data consistently' , {
49 |   expect_equal(lambert_obj$x.t, predict(lambert_obj))
50 |   expect_equal(lambert_obj$x, predict(lambert_obj, inverse = TRUE))
51 | })
52 | 
53 | test_that('lambert Transforms new data consistently', {
54 |   nd <- seq(0, 4, length = 100)
55 |   pred <- predict(lambert_obj, newdata = nd)
56 |   expect_true(!any(is.na(pred)))
57 |   
58 |   nd2 <- predict(lambert_obj, newdata = pred, inverse = TRUE)
59 |   expect_equal(nd, nd2, tolerance = .001)
60 | })
61 | 
62 | test_that('lambert correctly handles missing original data', {
63 |   b <- lambert(c(NA, train))
64 |   expect_equal(as.numeric(NA), b$x.t[1])
65 |   expect_equal(as.numeric(NA), predict(b)[1])
66 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
67 | })
68 | 
69 | test_that('lambert correctly handles missing new data', {
70 |   b <- lambert(train)
71 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
72 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
73 | })
74 | 


--------------------------------------------------------------------------------
/tests/testthat/test_log_x.R:
--------------------------------------------------------------------------------
 1 | context('log_x functionality')
 2 | 
 3 | data(iris)
 4 | train <- iris$Petal.Width
 5 | log_x_obj <- log_x(train)
 6 | 
 7 | test_that('log_x Transforms original data consistently', {
 8 |   expect_equal(log_x_obj$x.t, predict(log_x_obj))
 9 |   expect_equal(log_x_obj$x, predict(log_x_obj, inverse = TRUE))
10 | })
11 | 
12 | test_that('log_x Transforms new data consistently', {
13 |   nd <- seq(0, 4, length = 100)
14 |   pred <- predict(log_x_obj, newdata = nd)
15 |   expect_true(!any(is.na(pred)))
16 |   
17 |   nd2 <- predict(log_x_obj, newdata = pred, inverse = TRUE)
18 |   expect_equal(nd, nd2)
19 | })
20 | 
21 | test_that('log_x correctly handles missing original data', {
22 |   b <- log_x(c(NA, train))
23 |   expect_equal(as.numeric(NA), b$x.t[1])
24 |   expect_equal(as.numeric(NA), predict(b)[1])
25 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
26 | })
27 | 
28 | test_that('log_x correctly handles missing new data', {
29 |   b <- log_x(train)
30 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
31 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
32 | })
33 | 
34 | # Test standardization
35 | log_x_obj <- log_x(train, standardize = FALSE)
36 | 
37 | test_that('log_x Transforms original data consistently', {
38 |   expect_equal(log_x_obj$x.t, predict(log_x_obj))
39 |   expect_equal(log_x_obj$x, predict(log_x_obj, inverse = TRUE))
40 | })
41 | 
42 | test_that('log_x Transforms new data consistently', {
43 |   nd <- seq(0, 4, length = 100)
44 |   pred <- predict(log_x_obj, newdata = nd)
45 |   expect_true(!any(is.na(pred)))
46 |   
47 |   nd2 <- predict(log_x_obj, newdata = pred, inverse = TRUE)
48 |   expect_equal(nd, nd2)
49 | })
50 | 
51 | test_that('log_x correctly handles missing original data', {
52 |   b <- log_x(c(NA, train), standardize = FALSE)
53 |   expect_equal(as.numeric(NA), b$x.t[1])
54 |   expect_equal(as.numeric(NA), predict(b)[1])
55 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
56 | })
57 | 
58 | test_that('log_x correctly handles missing new data', {
59 |   b <- log_x(train, standardize = FALSE)
60 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
61 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
62 | })
63 | 
64 | 
65 | log_x_obj <- log_x(train, a = 1)
66 | 
67 | test_that('log_x Transforms new data consistently (given a)', {
68 |   nd <- seq(0, 4, length = 100)
69 |   pred <- predict(log_x_obj, newdata = nd)
70 |   expect_true(!any(is.na(pred)))
71 |   
72 |   nd2 <- predict(log_x_obj, newdata = pred, inverse = TRUE)
73 |   expect_equal(nd, nd2)
74 | })
75 | 
76 | log_x_obj <- log_x(train, a = 1, b = exp(1))
77 | 
78 | test_that('log_x Transforms new data consistently (given a and b)', {
79 |   nd <- seq(0, 4, length = 100)
80 |   pred <- predict(log_x_obj, newdata = nd)
81 |   expect_true(!any(is.na(pred)))
82 |   
83 |   nd2 <- predict(log_x_obj, newdata = pred, inverse = TRUE)
84 |   expect_equal(nd, nd2)
85 | })
86 | 


--------------------------------------------------------------------------------
/tests/testthat/test_no_transform.R:
--------------------------------------------------------------------------------
 1 | context('no_transform functionality')
 2 | 
 3 | data(iris)
 4 | train <- iris$Petal.Width
 5 | no_transform_obj <- no_transform(train)
 6 | 
 7 | test_that('no_transform Transforms original data consistently', {
 8 |   expect_equal(no_transform_obj$x.t, predict(no_transform_obj))
 9 |   expect_equal(no_transform_obj$x, predict(no_transform_obj, inverse = TRUE))
10 | })
11 | 
12 | test_that('no_transform Transforms new data consistently', {
13 |   nd <- seq(0, 4, length = 100)
14 |   pred <- predict(no_transform_obj, newdata = nd)
15 |   expect_true(!any(is.na(pred)))
16 |   
17 |   nd2 <- predict(no_transform_obj, newdata = pred, inverse = TRUE)
18 |   expect_equal(nd, nd2)
19 | })
20 | 
21 | test_that('no_transform correctly handles missing original data', {
22 |   b <- no_transform(c(NA, train))
23 |   expect_equal(as.numeric(NA), b$x.t[1])
24 |   expect_equal(as.numeric(NA), predict(b)[1])
25 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
26 | })
27 | 
28 | test_that('no_transform correctly handles missing new data', {
29 |   b <- no_transform(train)
30 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
31 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
32 | })
33 | 
34 | data(iris)
35 | train <- iris$Petal.Width
36 | center_scale_obj <- center_scale(train)
37 | 
38 | test_that('center_scale Transforms original data consistently', {
39 |   expect_equal(center_scale_obj$x.t, predict(center_scale_obj))
40 |   expect_equal(center_scale_obj$x, predict(center_scale_obj, inverse = TRUE))
41 | })
42 | 
43 | test_that('center_scale Transforms new data consistently', {
44 |   nd <- seq(0, 4, length = 100)
45 |   pred <- predict(center_scale_obj, newdata = nd)
46 |   expect_true(!any(is.na(pred)))
47 |   
48 |   nd2 <- predict(center_scale_obj, newdata = pred, inverse = TRUE)
49 |   expect_equal(nd, nd2)
50 | })
51 | 
52 | test_that('center_scale correctly handles missing original data', {
53 |   b <- center_scale(c(NA, train))
54 |   expect_equal(as.numeric(NA), b$x.t[1])
55 |   expect_equal(as.numeric(NA), predict(b)[1])
56 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
57 | })
58 | 
59 | test_that('center_scale correctly handles missing new data', {
60 |   b <- center_scale(train)
61 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
62 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
63 | })
64 | 
65 | 


--------------------------------------------------------------------------------
/tests/testthat/test_orderNorm.R:
--------------------------------------------------------------------------------
 1 | context('orderNorm functionality')
 2 | 
 3 | data(iris)
 4 | 
 5 | train <- iris$Petal.Width
 6 | orderNorm_obj <- suppressWarnings(orderNorm(train))
 7 | 
 8 | test_that('orderNorm transforms original data consistently', {
 9 |   expect_equal(orderNorm_obj$x.t, predict(orderNorm_obj))
10 |   expect_equal(orderNorm_obj$x, predict(orderNorm_obj, inverse = TRUE))
11 | })
12 | 
13 | test_that('orderNorm Transforms new data consistently', {
14 |   nd <- seq(0, 4, length = 100)
15 |   expect_warning(pred <- predict(orderNorm_obj, newdata = nd))
16 |   expect_true(!any(is.na(pred)))
17 |   expect_warning(nd2 <- predict(orderNorm_obj, newdata = pred, inverse = TRUE))
18 |   expect_equal(nd, nd2)
19 | })
20 | 
21 | test_that('orderNorm correctly handles missing original data', {
22 |   suppressWarnings(b <- orderNorm(c(NA, train)))
23 |   expect_equal(as.numeric(NA), b$x.t[1])
24 |   expect_equal(as.numeric(NA), predict(b)[1])
25 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
26 | })
27 | 
28 | test_that('orderNorm correctly handles missing new data', {
29 |   suppressWarnings(b <- orderNorm(train))
30 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
31 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
32 | })
33 | 
34 | test_that('orderNorm Transforms new data consistently using n_logit_fit < n', {
35 |   orderNorm_obj <- suppressWarnings(orderNorm(train, n_logit_fit = 50))
36 |   nd <- seq(0, 4, length = 100)
37 |   expect_warning(pred <- predict(orderNorm_obj, newdata = nd))
38 |   expect_true(!any(is.na(pred)))
39 |   expect_warning(nd2 <- predict(orderNorm_obj, newdata = pred, inverse = TRUE))
40 |   expect_equal(nd, nd2)
41 |   
42 |   expect_equal(nrow(orderNorm_obj$fit$model), 50)
43 | })


--------------------------------------------------------------------------------
/tests/testthat/test_plot_methods.R:
--------------------------------------------------------------------------------
 1 | context('plot method functionality')
 2 | 
 3 | data(iris)
 4 | 
 5 | train <- c(NA, -1, iris$Petal.Width)
 6 | 
 7 | bn <- suppressWarnings(bestNormalize(train, quiet = TRUE))
 8 | test_that('bestNormalize plot method runs (in-sample) without error or warning', {
 9 |   expect_silent(plot(bn))
10 | })
11 | 
12 | m2plot <- names(bn$other_transforms)
13 | m2plot <- m2plot[!(m2plot %in% c("exp_x"))]
14 | 
15 | test_that('bestNormalize plot method runs (out-of-sample) without error or warning', {
16 |   expect_warning(plot(bn, bounds = c(-2, 10), methods = m2plot))
17 | })
18 | 
19 | on <- suppressWarnings(orderNorm(train))
20 | test_that('orderNorm plot method runs without error or warning', {
21 |   expect_silent(plot(on))
22 |   expect_silent(plot(on, bounds = c(-2, 10)))
23 | })
24 | 
25 | test_that('lambert plot method runs without error or warning', {
26 |   skip_on_cran()
27 |   skip_on_travis()
28 |   expect_silent(lw <- suppressWarnings(lambert(train)))
29 |   expect_silent(plot(lw))
30 | })
31 | 
32 | bc <- suppressWarnings(boxcox(train[train > 0]))
33 | test_that('boxcox plot method runs without error or warning', {
34 |   expect_silent(plot(bc))
35 | })
36 | 
37 | yj <- suppressWarnings(yeojohnson(train))
38 | test_that('yeojohnson plot method runs without error or warning', {
39 |   expect_silent(plot(yj))
40 | })
41 | 


--------------------------------------------------------------------------------
/tests/testthat/test_print_methods.R:
--------------------------------------------------------------------------------
 1 | context('print method functionality')
 2 | 
 3 | data(iris)
 4 | 
 5 | train <- c(NA, -1, iris$Petal.Width)
 6 | 
 7 | bn <- suppressWarnings(bestNormalize(train, quiet = TRUE))
 8 | test_that('bestNormalize print method runs without error or warning', {
 9 |   expect_output(print(bn))
10 | })
11 | 
12 | on <- suppressWarnings(orderNorm(train))
13 | test_that('orderNorm print method runs without error or warning', {
14 |   expect_output(print(on))
15 | })
16 | 
17 | test_that('lambert print method runs without error or warning', {
18 |   skip_on_cran()
19 |   skip_on_travis()
20 |   lw <- suppressWarnings(lambert(train))
21 |   
22 |   expect_output(print(lw))
23 | })
24 | 
25 | bc <- suppressWarnings(boxcox(train[train > 0]))
26 | test_that('boxcox print method runs without error or warning', {
27 |   expect_output(print(bc))
28 | })
29 | 
30 | ex <- suppressWarnings(exp_x(train))
31 | test_that('exp_x print method runs without error or warning', {
32 |   expect_output(print(ex))
33 | })
34 | 
35 | lx <- suppressWarnings(log_x(train))
36 | test_that('log_x print method runs without error or warning', {
37 |   expect_output(print(lx))
38 | })
39 | 
40 | nt <- suppressWarnings(no_transform(train))
41 | test_that('no_transform print method runs without error or warning', {
42 |   expect_output(print(nt))
43 |   expect_silent(val <- tidy(nt))
44 | })
45 | 
46 | cs <- suppressWarnings(center_scale(train))
47 | test_that('center_scale print method runs without error or warning', {
48 |   expect_output(print(cs))
49 | })
50 | 
51 | sx <- suppressWarnings(sqrt_x(train))
52 | test_that('sqrt_x print method runs without error or warning', {
53 |   expect_output(print(sx))
54 | })
55 | 
56 | yj <- suppressWarnings(yeojohnson(train))
57 | test_that('yeojohnson print method runs without error or warning', {
58 |   expect_output(print(yj))
59 | })
60 | 
61 | bn <- suppressWarnings(binarize(train))
62 | test_that('binarize print method runs without error or warning', {
63 |   expect_output(print(bn))
64 | })
65 | 
66 | as <- suppressWarnings(arcsinh_x(train))
67 | test_that('arcsinh_x print method runs without error or warning', {
68 |   expect_output(print(as))
69 | })
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/tests/testthat/test_sqrt_x.R:
--------------------------------------------------------------------------------
 1 | context('sqrt_x functionality')
 2 | 
 3 | data(iris)
 4 | train <- iris$Petal.Width
 5 | sqrt_x_obj <- sqrt_x(train)
 6 | 
 7 | test_that('sqrt_x Transforms original data consistently', {
 8 |   expect_equal(sqrt_x_obj$x.t, predict(sqrt_x_obj))
 9 |   expect_equal(sqrt_x_obj$x, predict(sqrt_x_obj, inverse = TRUE))
10 | })
11 | 
12 | test_that('sqrt_x Transforms new data consistently', {
13 |   nd <- seq(0, 4, length = 100)
14 |   pred <- predict(sqrt_x_obj, newdata = nd)
15 |   expect_true(!any(is.na(pred)))
16 |   
17 |   nd2 <- predict(sqrt_x_obj, newdata = pred, inverse = TRUE)
18 |   expect_equal(nd, nd2)
19 | })
20 | 
21 | test_that('sqrt_x correctly handles missing original data', {
22 |   b <- sqrt_x(c(NA, train))
23 |   expect_equal(as.numeric(NA), b$x.t[1])
24 |   expect_equal(as.numeric(NA), predict(b)[1])
25 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
26 | })
27 | 
28 | test_that('sqrt_x correctly handles missing new data', {
29 |   b <- sqrt_x(train)
30 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
31 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
32 | })
33 | 
34 | # Test standardization
35 | sqrt_x_obj <- sqrt_x(train, standardize = FALSE)
36 | 
37 | test_that('sqrt_x Transforms original data consistently', {
38 |   expect_equal(sqrt_x_obj$x.t, predict(sqrt_x_obj))
39 |   expect_equal(sqrt_x_obj$x, predict(sqrt_x_obj, inverse = TRUE))
40 | })
41 | 
42 | test_that('sqrt_x Transforms new data consistently', {
43 |   nd <- seq(0, 4, length = 100)
44 |   pred <- predict(sqrt_x_obj, newdata = nd)
45 |   expect_true(!any(is.na(pred)))
46 |   
47 |   nd2 <- predict(sqrt_x_obj, newdata = pred, inverse = TRUE)
48 |   expect_equal(nd, nd2)
49 | })
50 | 
51 | test_that('sqrt_x correctly handles missing original data', {
52 |   b <- sqrt_x(c(NA, train), standardize = FALSE)
53 |   expect_equal(as.numeric(NA), b$x.t[1])
54 |   expect_equal(as.numeric(NA), predict(b)[1])
55 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
56 | })
57 | 
58 | test_that('sqrt_x correctly handles missing new data', {
59 |   b <- sqrt_x(train, standardize = FALSE)
60 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
61 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
62 | })
63 | 
64 | 
65 | sqrt_x_obj <- sqrt_x(train, a = 1)
66 | 
67 | test_that('sqrt_x Transforms new data consistently (given a)', {
68 |   nd <- seq(0, 4, length = 100)
69 |   pred <- predict(sqrt_x_obj, newdata = nd)
70 |   expect_true(!any(is.na(pred)))
71 |   
72 |   nd2 <- predict(sqrt_x_obj, newdata = pred, inverse = TRUE)
73 |   expect_equal(nd, nd2)
74 | })


--------------------------------------------------------------------------------
/tests/testthat/test_step_fns.R:
--------------------------------------------------------------------------------
  1 | context('step_* recipes functionality')
  2 | library(recipes)
  3 | 
  4 | # par(mfrow = c(2,1))
  5 | rec <- recipe(~ ., data = as.data.frame(iris))
  6 | set.seed(1)
  7 | 
  8 | test_that('step_* transformations with iris data', {
  9 |   dt1 <- orderNorm(iris$Petal.Length, warn = FALSE)$x.t
 10 |   
 11 |   ## Using BoxCox
 12 | 
 13 |   expect_silent(bc_trans <- step_BoxCox(rec, all_numeric()))
 14 |   expect_silent(bc_estimates <- prep(bc_trans, training = as.data.frame(iris)))
 15 |   expect_silent(bc_data <- bake(bc_estimates, as.data.frame(iris)))
 16 |   # plot(density(iris[, "Petal.Length"]), main = "before")
 17 |   # plot(density(bc_data$Petal.Length), main = "after")
 18 |   expect_equal(nrow(tidy(bc_trans, number = 1)), 1)
 19 |   expect_equal(nrow(tidy(bc_estimates, number = 1)), 4)
 20 |   
 21 |   ## Using bestNormalize 
 22 |   
 23 |   # Check deprecation
 24 |   expect_warning(bn_trans <- step_bestNormalize(rec, all_numeric()))
 25 |   
 26 |   # Check step_best_normalize
 27 |   expect_silent(bn_trans <- step_best_normalize(rec, all_numeric()))
 28 |   expect_silent(bn_estimates <- prep(bn_trans, training = as.data.frame(iris)))
 29 |   expect_silent(bn_data <- bake(bn_estimates, as.data.frame(iris)))
 30 |   # plot(density(iris[, "Petal.Length"]), main = "before")
 31 |   # plot(density(bn_data$Petal.Length), main = "after")
 32 |   expect_equal(nrow(tidy(bn_trans, number = 1)), 1)
 33 |   expect_equal(nrow(tidy(bn_estimates, number = 1)), 4)
 34 |   expect_equal(unname(tidy(bn_estimates, number = 1)$chosen_transform[3]), "orderNorm")
 35 |   expect_identical(dt1, bn_data$Petal.Length)
 36 |   
 37 |   ## LOO 
 38 |   expect_silent(bn_trans <- step_best_normalize(rec, all_numeric(), transform_options = list(loo = TRUE, allow_orderNorm = FALSE)))
 39 |   expect_silent(bn_estimates <- prep(bn_trans, training = as.data.frame(iris)))
 40 |   expect_silent(bn_data <- bake(bn_estimates, as.data.frame(iris)))
 41 |   # plot(density(iris[, "Petal.Length"]), main = "before")
 42 |   # plot(density(bn_data$Petal.Length), main = "after")
 43 |   expect_equal(nrow(tidy(bn_trans, number = 1)), 1)
 44 |   expect_equal(nrow(tidy(bn_estimates, number = 1)), 4)
 45 |   expect_s3_class(tidy(bn_estimates, number = 1)$cv_info[[3]]$tr_object[[1]], "double_reverse_log")
 46 | 
 47 |   ## Faster (use in-sample metrics, does NOT use orderNorm)
 48 |   expect_silent(bn_trans <- step_best_normalize(rec, all_numeric(), transform_options = list(out_of_sample = FALSE, allow_orderNorm = FALSE)))
 49 |   expect_silent(bn_estimates <- prep(bn_trans, training = as.data.frame(iris)))
 50 |   expect_silent(bn_data <- bake(bn_estimates, as.data.frame(iris)))
 51 |   # plot(density(iris[, "Petal.Length"]), main = "before")
 52 |   # plot(density(bn_data$Petal.Length), main = "after")
 53 |   expect_equal(nrow(tidy(bn_trans, number = 1)), 1)
 54 |   expect_equal(nrow(tidy(bn_estimates, number = 1)), 4)
 55 |   expect_s3_class(tidy(bn_estimates, number = 1)$cv_info[[3]]$tr_object[[1]], "double_reverse_log")
 56 | 
 57 |   ## Fastest (only use ORQ (orderNorm) transformation)
 58 |   expect_silent(orq_trans <- step_orderNorm(rec, all_numeric()))
 59 |   expect_silent(orq_estimates <- prep(orq_trans, training = as.data.frame(iris)))
 60 |   expect_silent(orq_data <- bake(orq_estimates, as.data.frame(iris)))
 61 |   # plot(density(iris[, "Petal.Length"]), main = "before")
 62 |   # plot(density(orq_data$Petal.Length), main = "after")
 63 |   expect_equal(nrow(tidy(orq_trans, number = 1)), 1)
 64 |   expect_equal(nrow(tidy(orq_estimates, number = 1)), 4)
 65 |   expect_s3_class(tidy(orq_estimates, number = 1)$value[[3]], "orderNorm")
 66 |   expect_identical(dt1, orq_data$Petal.Length)
 67 |   
 68 | })
 69 | 
 70 | iris2 <- iris
 71 | iris2$Petal.Length[c(5,10, 19)] <- c(NA, -1, -3)
 72 | iris2$group <- rep(1:2, nrow(iris)/2)
 73 | 
 74 | test_that('step_* transformations with missing/negative/discrete data', {
 75 |   dt1 <- orderNorm(iris2$Petal.Length, warn = FALSE)$x.t
 76 |   
 77 |   ## Using BoxCox (should have warning due to negative data)
 78 |   expect_silent(bc_trans <- step_BoxCox(rec, all_numeric()))
 79 |   if(packageVersion("recipes") > "0.1.16")
 80 |     expect_warning(bc_estimates <- prep(bc_trans, training = as.data.frame(iris2))) else 
 81 |       expect_silent(bc_estimates <- prep(bc_trans, training = as.data.frame(iris2)))
 82 |   expect_silent(bc_data <- bake(bc_estimates, as.data.frame(iris2)))
 83 |   # plot(density(iris2[, "Petal.Length"]), main = "before")
 84 |   # plot(density(bc_data$Petal.Length), main = "after")
 85 |   expect_equal(nrow(tidy(bc_trans, number = 1)), 1)
 86 |   expect_equal(nrow(tidy(bc_estimates, number = 1)), 3)
 87 |   
 88 |   ## Using bestNormalize
 89 |   expect_silent(bn_trans <- step_best_normalize(rec, all_numeric()))
 90 |   expect_silent(bn_estimates <- prep(bn_trans, training = as.data.frame(iris2)))
 91 |   expect_silent(bn_data <- bake(bn_estimates, as.data.frame(iris2)))
 92 |   # plot(density(iris2[, "Petal.Length"]), main = "before")
 93 |   # plot(density(bn_data$Petal.Length), main = "after")
 94 |   expect_equal(nrow(tidy(bn_trans, number = 1)), 1)
 95 |   expect_equal(nrow(tidy(bn_estimates, number = 1)), 4)
 96 |   expect_s3_class(tidy(bn_estimates, number = 1)$cv_info[[3]]$tr_object[[1]], "orderNorm")
 97 |   expect_identical(dt1, bn_data$Petal.Length)
 98 |   
 99 |   ## LOO 
100 |   expect_silent(bn_trans <- step_best_normalize(rec, all_numeric(), transform_options = list(loo = TRUE, allow_orderNorm = FALSE)))
101 |   expect_silent(bn_estimates <- prep(bn_trans, training = as.data.frame(iris2)))
102 |   expect_silent(bn_data <- bake(bn_estimates, as.data.frame(iris2)))
103 |   # plot(density(iris2[, "Petal.Length"]), main = "before")
104 |   # plot(density(bn_data$Petal.Length), main = "after")
105 |   expect_equal(nrow(tidy(bn_trans, number = 1)), 1)
106 |   expect_equal(nrow(tidy(bn_estimates, number = 1)), 4)
107 |   # expect_s3_class(tidy(bn_estimates, number = 1)$cv_info[[3]]$tr_object[[1]], "lambert")
108 | 
109 |   ## Faster (use in-sample metrics, does NOT use orderNorm)
110 |   expect_silent(bn_trans <- step_best_normalize(rec, all_numeric(), transform_options = list(out_of_sample = FALSE, allow_orderNorm = FALSE)))
111 |   expect_silent(bn_estimates <- prep(bn_trans, training = as.data.frame(iris2)))
112 |   expect_silent(bn_data <- bake(bn_estimates, as.data.frame(iris2)))
113 |   # plot(density(iris2[, "Petal.Length"]), main = "before")
114 |   # plot(density(bn_data$Petal.Length), main = "after")
115 |   expect_equal(nrow(tidy(bn_trans, number = 1)), 1)
116 |   expect_equal(nrow(tidy(bn_estimates, number = 1)), 4)
117 |   expect_s3_class(tidy(bn_estimates, number = 1)$cv_info[[3]]$tr_object[[1]], "double_reverse_log")
118 | 
119 |   ## Fastest (only use ORQ (orderNorm) transformation)
120 |   expect_silent(orq_trans <- step_orderNorm(rec, all_numeric()))
121 |   expect_silent(orq_estimates <- prep(orq_trans, training = as.data.frame(iris2)))
122 |   expect_silent(orq_data <- bake(orq_estimates, as.data.frame(iris2)))
123 |   # plot(density(iris2[, "Petal.Length"]), main = "before")
124 |   # plot(density(orq_data$Petal.Length), main = "after")
125 |   expect_equal(nrow(tidy(orq_trans, number = 1)), 1)
126 |   expect_equal(nrow(tidy(orq_estimates, number = 1)), 4)
127 |   expect_s3_class(tidy(orq_estimates, number = 1)$value[[3]], "orderNorm")
128 |   expect_identical(dt1, orq_data$Petal.Length)
129 | })
130 | 
131 | ## A little help from butcher package
132 | terms_empty_env <- function(axed, step_number) {
133 |   expect_identical(attr(axed$steps[[step_number]]$terms[[1]], ".Environment"),
134 |                    rlang::base_env())
135 | }
136 | 
137 | impute_empty_env <- function(axed, step_number) {
138 |   expect_identical(attr(axed$steps[[step_number]]$impute_with[[1]], ".Environment"),
139 |                    rlang::base_env())
140 | }
141 | 
142 | test_that("recipe + step_best_normalize + axe_env() works", {
143 |   rec <- recipe(~ ., data = as.data.frame(state.x77)) %>%
144 |     step_best_normalize(rec, all_numeric())
145 |   x <- axe_env(rec)
146 |   terms_empty_env(x, 1)
147 | })
148 | 
149 | test_that("recipe + step_orderNorm + axe_env() works", {
150 |   rec <- recipe(~ ., data = as.data.frame(state.x77)) %>%
151 |     step_orderNorm(rec, all_numeric())
152 |   x <- axe_env(rec)
153 |   terms_empty_env(x, 1)
154 | })
155 | 


--------------------------------------------------------------------------------
/tests/testthat/test_yeojohnson.R:
--------------------------------------------------------------------------------
 1 | context('yeojohnson functionality')
 2 | 
 3 | data(iris)
 4 | 
 5 | train <- iris$Petal.Width
 6 | 
 7 | yeojohnson_obj <- yeojohnson(train)
 8 | 
 9 | test_that('yeojohnson Transforms original data consistently', {
10 |   expect_equal(yeojohnson_obj$x.t, predict(yeojohnson_obj))
11 |   expect_equal(yeojohnson_obj$x, predict(yeojohnson_obj, inverse = TRUE))
12 | })
13 | 
14 | test_that('yeojohnson Transforms new data consistently', {
15 |   nd <- seq(0, 4, length = 100)
16 |   pred <- predict(yeojohnson_obj, newdata = nd)
17 |   expect_true(!any(is.na(pred)))
18 |   
19 |   nd2 <- predict(yeojohnson_obj, newdata = pred, inverse = TRUE)
20 |   expect_equal(nd, nd2)
21 | })
22 | 
23 | test_that('yeojohnson correctly handles missing original data', {
24 |   b <- yeojohnson(c(NA, train))
25 |   expect_equal(as.numeric(NA), b$x.t[1])
26 |   expect_equal(as.numeric(NA), predict(b)[1])
27 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
28 | })
29 | 
30 | test_that('yeojohnson correctly handles missing new data', {
31 |   b <- yeojohnson(train)
32 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
33 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
34 | })
35 | 
36 | # without standardization
37 | 
38 | yeojohnson_obj <- yeojohnson(train, standardize = FALSE)
39 | 
40 | test_that('yeojohnson without standardization Transforms original data consistently', {
41 |   expect_equal(yeojohnson_obj$x.t, predict(yeojohnson_obj))
42 |   expect_equal(yeojohnson_obj$x, predict(yeojohnson_obj, inverse = TRUE))
43 | })
44 | 
45 | test_that('yeojohnson without standardization Transforms new data consistently', {
46 |   nd <- seq(0, 4, length = 100)
47 |   pred <- predict(yeojohnson_obj, newdata = nd)
48 |   expect_true(!any(is.na(pred)))
49 |   
50 |   nd2 <- predict(yeojohnson_obj, newdata = pred, inverse = TRUE)
51 |   expect_equal(nd, nd2)
52 | })
53 | 
54 | test_that('yeojohnson without standardization correctly handles missing original data', {
55 |   b <- yeojohnson(c(NA, train), standardize = FALSE)
56 |   expect_equal(as.numeric(NA), b$x.t[1])
57 |   expect_equal(as.numeric(NA), predict(b)[1])
58 |   expect_equal(as.numeric(NA), predict(b, inverse = TRUE)[1])
59 | })
60 | 
61 | test_that('yeojohnson without standardization correctly handles missing new data', {
62 |   b <- yeojohnson(train, standardize = FALSE)
63 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA))[2])
64 |   expect_equal(as.numeric(NA), predict(b, newdata = c(1, NA), inverse = TRUE)[2])
65 | })
66 | 


--------------------------------------------------------------------------------
/vignettes/customization.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Customization within bestNormalize"
  3 | author: "Ryan A Peterson"
  4 | date: "`r Sys.Date()`"
  5 | output: 
  6 |   rmarkdown::html_vignette:
  7 |     toc: true
  8 | vignette: >
  9 |   %\VignetteIndexEntry{Customization within bestNormalize}
 10 |   %\VignetteEngine{knitr::rmarkdown}
 11 |   %\VignetteEncoding{UTF-8}
 12 | ---
 13 | 
 14 | ```{r setup, include=FALSE}
 15 | knitr::opts_chunk$set(echo = TRUE, fig.height = 5, fig.width = 7)
 16 | library(bestNormalize)
 17 | ```
 18 | 
 19 | # Custom functions with bestNormalize
 20 | 
 21 | This vignette will go over the steps required to implement a custom user-defined function within the `bestNormalize` framework. 
 22 | 
 23 | There are 3 steps. 
 24 | 
 25 | 1) Create transformation function
 26 | 
 27 | 2) Create predict method for transformation function (that can be applied to new data)
 28 | 
 29 | 3) Pass through new function and predict method to bestNormalize
 30 | 
 31 | ## Example: cube-root
 32 | 
 33 | ## S3 methods 
 34 | 
 35 | Here, we start by defining a new function that we'll call `cuberoot_x`, which will take an argument `a` (as does the `sqrt_x` function) which will try to add a constant if it sees any negative numbers in `x`. It will also take the argument `standardize` which will center and scale the transformed data so that it's centered at 0 with SD = 1. 
 36 | 
 37 | ```{r}
 38 | ## Define user-function
 39 | cuberoot_x <- function(x, a = NULL, standardize = TRUE, ...) {
 40 |   stopifnot(is.numeric(x))
 41 |   
 42 |   min_a <- max(0, -(min(x, na.rm = TRUE)))
 43 |   if(!length(a)) 
 44 |     a <- min_a
 45 |   if(a < min_a) {
 46 |     warning("Setting a <  max(0, -(min(x))) can lead to transformation issues",
 47 |             "Standardize set to FALSE")
 48 |     standardize <- FALSE
 49 |   }
 50 |   
 51 |   
 52 |   x.t <- (x + a)^(1/3)
 53 |   mu <- mean(x.t, na.rm = TRUE)
 54 |   sigma <- sd(x.t, na.rm = TRUE)
 55 |   if (standardize) x.t <- (x.t - mu) / sigma
 56 |   
 57 |   # Get in-sample normality statistic results
 58 |   ptest <- nortest::pearson.test(x.t)
 59 |   
 60 |   val <- list(
 61 |     x.t = x.t,
 62 |     x = x,
 63 |     mean = mu,
 64 |     sd = sigma,
 65 |     a = a,
 66 |     n = length(x.t) - sum(is.na(x)),
 67 |     norm_stat = unname(ptest$statistic / ptest$df),
 68 |     standardize = standardize
 69 |   )
 70 |   
 71 |   # Assign class
 72 |   class(val) <- c('cuberoot_x', class(val))
 73 |   val
 74 | }
 75 | 
 76 | ```
 77 | 
 78 | Note that we assigned a class to the object this returns of the same name; this is necessary for successful implementation within `bestNormalize`. We'll also need an associated `predict` method that is used to apply the transformation to newly observed data. 
 79 | `
 80 | ```{r}
 81 | 
 82 | predict.cuberoot_x <- function(object, newdata = NULL, inverse = FALSE, ...) {
 83 |   
 84 |   # If no data supplied and not inverse
 85 |   if (is.null(newdata) & !inverse)
 86 |     newdata <- object$x
 87 |   
 88 |   # If no data supplied and inverse
 89 |   if (is.null(newdata) & inverse)
 90 |     newdata <- object$x.t
 91 |   
 92 |   # Actually performing transformations
 93 |   
 94 |   # Perform inverse transformation as estimated
 95 |   if (inverse) {
 96 |     
 97 |     # Reverse-standardize
 98 |     if (object$standardize) 
 99 |       newdata <- newdata * object$sd + object$mean
100 |     
101 |     # Reverse-cube-root (cube)
102 |     newdata <-  newdata^3 - object$a
103 |     
104 |     
105 |     # Otherwise, perform transformation as estimated
106 |   } else if (!inverse) {
107 |     # Take cube root
108 |     newdata <- (newdata + object$a)^(1/3)
109 |     
110 |     # Standardize to mean 0, sd 1
111 |     if (object$standardize) 
112 |       newdata <- (newdata - object$mean) / object$sd
113 |   }
114 |   
115 |   # Return transformed data
116 |   unname(newdata)
117 | }
118 | 
119 | ```
120 | 
121 | ## Optional: print method
122 | 
123 | This will be printed when bestNormalize selects your custom method or when you print an object returned by your new custom function. 
124 | 
125 | ```{r}
126 | print.cuberoot_x <- function(x, ...) {
127 |   cat(ifelse(x$standardize, "Standardized", "Non-Standardized"),
128 |       'cuberoot(x + a) Transformation with', x$n, 'nonmissing obs.:\n', 
129 |       'Relevant statistics:\n',
130 |       '- a =', x$a, '\n',
131 |       '- mean (before standardization) =', x$mean, '\n',
132 |       '- sd (before standardization) =', x$sd, '\n')
133 | }
134 | 
135 | ```
136 | 
137 | Note: if you can find a similar transformation in the source code, it's easy to model your code after it. For instance, for `cuberoot_x` and `predict.cuberoot_x`, I used `sqrt_x.R` as a template file. 
138 | 
139 | ## Implementing with bestNormalize
140 | 
141 | ```{r}
142 | # Store custom functions into list
143 | custom_transform <- list(
144 |   cuberoot_x = cuberoot_x,
145 |   predict.cuberoot_x = predict.cuberoot_x,
146 |   print.cuberoot_x = print.cuberoot_x
147 | )
148 | 
149 | set.seed(123129)
150 | x <- rgamma(100, 1, 1)
151 | (b <- bestNormalize(x = x, new_transforms = custom_transform, standardize = FALSE))
152 | ```
153 | 
154 | Evidently, the cube-rooting was the best normalizing transformation! 
155 | 
156 | ## Sanity check
157 | 
158 | Is this code actually performing the cube-rooting? 
159 | 
160 | ```{r}
161 | all.equal(x^(1/3), b$chosen_transform$x.t)
162 | all.equal(x^(1/3), predict(b))
163 | ```
164 | 
165 | It does indeed. 
166 | 
167 | # Using custom normalization statistics
168 | 
169 | The bestNormalize package can estimate any univariate statistic using its CV framework. A user-defined function can be passed in through the `norm_stat_fn` argument, and this function will then be applied in lieu of the Pearson test statistic divided by its degree of freedom. 
170 | 
171 | The user-defined function must take an argument `x`, which indicates the data on which a user wants to evaluate the statistic.
172 | 
173 | Here is an example using Lilliefors (Kolmogorov-Smirnov) normality test statistic: 
174 | 
175 | ```{r}
176 | bestNormalize(x, norm_stat_fn = function(x) nortest::lillie.test(x)$stat)
177 | ```
178 | 
179 | Here is an example using Lilliefors (Kolmogorov-Smirnov) normality test's p-value: 
180 | 
181 | ```{r}
182 | (dont_do_this <- bestNormalize(x, norm_stat_fn = function(x) nortest::lillie.test(x)$p))
183 | ```
184 | 
185 | Note: `bestNormalize` will attempt to minimize this statistic by default, which is definitely not what you want to do when calculating the p-value. This is seen in the example above, as the WORST normalization transformation is chosen. 
186 | 
187 | In this case, a user is advised to either manually select the best one: 
188 | 
189 | ```{r}
190 | best_transform <- names(which.max(dont_do_this$norm_stats))
191 | (do_this <- dont_do_this$other_transforms[[best_transform]])
192 | ```
193 | 
194 | Or, the user can reverse their defined statistic (in this case by subtracting it from 1): 
195 | 
196 | ```{r}
197 | (do_this <- bestNormalize(x, norm_stat_fn = function(x) 1-nortest::lillie.test(x)$p))
198 | ```
199 | 
200 | 


--------------------------------------------------------------------------------
/vignettes/parallel_timings.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/petersonR/bestNormalize/46c0ea73e57eb2130bac9078b501688fd48d65c9/vignettes/parallel_timings.jpg


--------------------------------------------------------------------------------