├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── DESIGN.md
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
├── archive
│ ├── na_explicit.R
│ └── na_replace.r
├── data.R
├── drop_cols.R
├── drop_rows.R
├── impute.R
├── impute_funs.R
├── make_impute.R
├── na_predict.R
├── utils.R
└── zzz.R
├── README.md
├── TODO.md
├── data
├── nacars.rda
├── nacars_dt.rda
├── nairis.rda
└── nairis_dt.rda
├── inst
└── extdata
│ └── nacars.R
├── man
├── assign_these.Rd
├── drop_cols.Rd
├── drop_rows.Rd
├── figures
│ ├── hexagon_blue_tidyimpute_v1.0.png
│ └── hexagon_blue_tidyimpute_v1.0.svg
├── impute.Rd
├── impute_functions.Rd
├── make_impute.Rd
├── na_predict.Rd
├── nacars.Rd
└── ns_export.Rd
├── tests
├── testthat.R
└── testthat
│ ├── test-impute.R.off
│ ├── test-impute_.R
│ ├── test-impute_max.R
│ ├── test-impute_mean.R
│ ├── test-impute_median.R
│ ├── test-impute_mode.R
│ └── test-impute_zero.R
└── tidyimpute.Rproj
/.Rbuildignore:
--------------------------------------------------------------------------------
1 |
2 | .git.orig
3 |
4 | ^.*\.Rproj$
5 | ^\.Rproj\.user$
6 | ^docs$
7 |
8 | DESIGN.md
9 | TODO.md
10 |
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .git.orig
2 |
3 | # History files
4 | .Rhistory
5 | # Example code in package build process
6 | *-Ex.R
7 | # R data files from past sessions
8 | .Rdata
9 | .Rproj.user
10 | .RData
11 | src/zoo
12 | inst/doc
13 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: tidyimpute
2 | Title: Impute the Tidyverse Way
3 | Version: 0.2.0
4 | Date: 2018-03-30
5 | Authors@R: c(
6 | person("Christopher", "Brown", , "chris.brown@decisionpatterns.com", c("aut", "cre") ),
7 | person("Decision Patterns", role = "cph")
8 | )
9 | URL: https://github.com/decisionpatterns/tidyimpute
10 | Description:
11 | Functions and methods for imputing missing values (NA) in tables and list
12 | patterned after the tidyverse approach of 'dplyr' and 'rlang'; works with
13 | data.tables as well.
14 | BugReports: https://github.com/decisionpatterns/tidyimpute/issues
15 | Depends:
16 | R (>= 3.1.0)
17 | Imports:
18 | methods,
19 | dplyr (>= 0.7.2),
20 | rlang (>= 0.1.2),
21 | na.tools (>= 0.1.0)
22 | Suggests:
23 | testthat (>= 1.0.2),
24 | data.table (>= 1.10),
25 | catcont (>= 0.5.0),
26 | magrittr
27 | License: GPL-3 | file LICENSE
28 | LazyData: true
29 | RoxygenNote: 6.0.1.9000
30 | Roxygen: list(markdown = TRUE)
31 | Repository: CRAN
32 | Encoding: UTF-8
33 | Collate:
34 | 'data.R'
35 | 'drop_cols.R'
36 | 'drop_rows.R'
37 | 'impute.R'
38 | 'make_impute.R'
39 | 'utils.R'
40 | 'impute_funs.R'
41 | 'na_predict.R'
42 | 'zzz.R'
43 |
--------------------------------------------------------------------------------
/DESIGN.md:
--------------------------------------------------------------------------------
1 | # DESIGN.md - Design notes for na.actions pacakge
2 |
3 | This document tracks opinionated desicion about the **na.actions** package that
4 | largely have to do with the design choices made
5 |
6 | ## Goals
7 |
8 | The packages shoould handle be the single repository for functions/methods for
9 | working with missing values (NA) for all data science workflows.
10 |
11 | It should be extensible and be able to handle
12 |
13 | - recursive and atomistic values
14 | - types: categorical and continous variables
15 | - classes: specific classes
16 |
17 |
18 | ## Naming and Labels
19 |
20 | - The explicit value for `forcats::fct_explicit_na` is `(Missing)`; this is not
21 | adopted here because it is needlessly long, '(NA)' is used instead.
22 | - beginning variables with
23 |
24 |
25 |
26 | ## na.replace and
27 |
28 | ## Organization
29 |
30 | - na_replace works only on atomic vectors;
31 | - na_explicit additionally supports recursive objects and calls na_replace.
32 |
33 | ## Style Elements
34 |
35 |
36 | ## Behaviors
37 |
38 | - replacement is type/class-safe; the type will not be affected by the
39 | replacement.
40 | - Function names should follow the lower_snake_case naming conventions. This prevents
41 | collisions with functions from the *stats* package. It may make sense for
42 | `na.*` functions operate at a low-level on vectors and similar to the *stats*
43 | package while `na_*` vectors operate
44 | on a higher level.
45 |
46 |
47 | - Follow tidyverse styles
48 | - Arguments should be lower_snake_case
49 | - Arguments names should often be preceded by a '.' when following tidyverse
50 | styles
51 | - `NA_explicit_`
52 | - cannot be "NA" since it becomes impossible to distinguish it from true
53 | `NAs` when printing to the console.
54 |
55 |
56 | ## Simple Imputation
57 |
58 | na_replace( x, .na=mean )
59 | na_replace( tbl, col1 = mean ) Or,
60 | na_replace( tbl, col1 = mean(col1) )
61 |
62 |
63 | ## Complex Imputation
64 |
65 | Imputation should be preformed when the replacement value is a rhs-formula:
66 |
67 | na_replce(tbl, col1 = ~col2, .method=lm )
68 |
69 | This has the effect of creating a model for col1 ~ col2
70 |
71 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This package is licenses under GPLv2 and is provided without warranty or
2 | support. For a copy of this license, refer to
3 | http://www.r-project.org/Licenses/GPL-2. If you are interested in other
4 | licensing arrangements, please contact the package maintainer.
5 |
6 | Copyright (c) 2018 Decision Patterns. Oakland, CA, USA.
7 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(drop_cols_all_na)
4 | export(drop_cols_any_na)
5 | export(drop_na_cols)
6 | export(drop_rows_all_na)
7 | export(drop_rows_any_na)
8 | export(filter_all_na)
9 | export(filter_any_na)
10 | export(impute)
11 | export(impute_all)
12 | export(impute_at)
13 | export(impute_if)
14 | export(make_impute_all)
15 | export(make_impute_at)
16 | export(make_impute_if)
17 | export(make_imputes)
18 | export(na_predict)
19 | exportPattern("^impute_.*$")
20 | import(na.tools)
21 | importFrom(dplyr,select_vars)
22 | importFrom(dplyr,vars)
23 | importFrom(methods,as)
24 | importFrom(rlang,eval_tidy)
25 | importFrom(rlang,quos)
26 | importFrom(stats,na.pass)
27 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | ## Version 0.2.0
2 |
3 | - Replace `impute_most_freq*` with `impute_mode*`
4 | - Add `NA_explicit_` as an exported constant for explicit categorical values.
5 | - Convert man to use markdown.
6 | - Fix exports
7 |
8 | ## Version 0.1.1 (2018-01-22)
9 |
10 | - Fix `na_replace` (and `na_explicit`) to add levels for values if
11 | they do not already exist.
12 | - Add tests
13 | - Fix documentation
14 |
15 | ## Version 0.7.0 (2017-08-22)
16 |
17 | - Add na_explicit and na_implicit
18 |
19 | ## Version 0.6.2
20 |
21 | - na_replace: revert from using `ifelse` because of edge cases
22 | - add `zzz.R`
23 | - add `NEWS.md`
24 | - add tests for `na_replace`
25 |
26 | ## Version 0.6.1
27 |
28 | - `na_replace` now uses `ifelse` and prevent recycling `value`
29 |
--------------------------------------------------------------------------------
/R/archive/na_explicit.R:
--------------------------------------------------------------------------------
1 | #' na_explicit
2 | #'
3 | #' Set missing values to an explicit value,
4 | #'
5 | #' @param x object; either atomic or recursive.
6 | #' @param .na either a single element vector list or named vectors; See #Details.
7 | #' @param ... (used for recursive structures only) a list of name=value pairs
8 | #'
9 | #' @details
10 | #'
11 | #' `na_explicit` replaces missing (`NA`) values in `x` with
12 | #' an explicit value set by the `.na` argument. It is largely an extension of
13 | #' [impute()] and supports table-like objects in a *tidyverse* compliant way.
14 | #'
15 | #'
16 | #' atomic x | .na==NULL | ... missing : error
17 | #' atomic x | .na==NULL | ... there : error
18 | #' atomic x | .na!=NULL | ... missing : all columns impute by .na
19 | #' atomic x | ,na!==NULL | ... there : all columns impute by .na, ... is extra args to f
20 | #'
21 | #' rec x | .na==NULL | ... missing : error
22 | #' rec x | .na==NULL | ... there : columns transformed by ...
23 | #' rec x | .na!=NULL | ... missing : all columns impute by .na (Should this be supported( ?) impute_all
24 | #' rec x | ,na!==NULL | ... there : all columns impute by .na, ... is extra args to f impute_
25 | #'
26 | #'
27 | #' impute( .tbl, ... )
28 | #'
29 | #' x .na!=NULL | ... missing : all columns set to `.na``
30 | #' .na==NULL | col=x | col=na
31 | #'
32 | #' na_explicit( .tbl, col1=mean( . , na.rm=TRUE )... )
33 | #' na_explicit
34 | #'
35 | #' **input: `x`**
36 | #'
37 | #' `x` can be either an *atomic* or *recursive* object. If *atomic* then
38 | #' `na_explicit` behaves as [impute()] an replaces missing values with `.na`
39 | #' and `...` is used to specify additional arguments if `.na` is a function.
40 | #'
41 | #' If `x` is *recursive* then missing values are replaced by `.na`/`...` in a
42 | #' dplyr manner.
43 | #'
44 | #' The default is to use
45 | #' [NA_explicit]
46 | #' [impute()] but explicitly but works on recursive
47 | #'
48 | #' It is also intended to be single argument function.
49 | #'
50 | #' @seealso
51 | #'
52 | #' - [impute()]
53 | #' - [na_implicit()]
54 | #' - forcats::fct_explicit_na()
55 | #'
56 | #' @examples
57 | #'
58 | #' na_explicit( c(1, NA, 3, 4), 0 )
59 | #' na_explicit( c("A",NA,"c","D" ) )
60 | #'
61 | #' na_explicit( c("A",NA,"c","D") )
62 | #'
63 | #' na_explicit( )
64 | #'
65 | #' @md
66 | #' @export
67 |
68 | na_explicit <- function(x, .na, ... )
69 | UseMethod("na_explicit")
70 |
71 |
72 | #' @export
73 | na_explicit.default <- function(x, .na, ... )
74 | if( is.recursive(x) )
75 | .na_explicit.recursive(x, .na, ... ) else
76 | .na_explicit.atomic( x, .na, ... )
77 |
78 |
79 | # Do not export -
80 | # these are internal functions that are dispatched by `na_explicit.default`
81 |
82 | .na_explicit.recursive <- function(x, .na=NULL, ... ) {
83 |
84 | if( ! is.null(.na) )
85 |
86 |
87 | # First handle the name=values pairs
88 | # - use kv function
89 | # - if value is function apply it to x[[ key ]]
90 | for( kv in kv( list(...) ) ) {
91 | if( is.function(kv$v) )
92 | val <- kv$v( x[[kv$k]] ) else
93 | val <- kv$v
94 |
95 | x[[kv$k]] <- na_explicit( x[[kv$k]], .na=val )
96 |
97 | }
98 |
99 | x
100 |
101 | }
102 |
103 |
104 | .na_explicit.atomic <- function(x, .na, ... )
105 | impute( x, .na )
106 |
107 |
108 | #' @export
109 | na_explicit.character <- function(x, .na = NA_explicit_ ) {
110 | impute(x, .na)
111 | }
112 |
113 |
114 | #' @export
115 | na_explicit.factor <- function(x, .na = NA_explicit_ ) {
116 | impute(x, .na)
117 | }
118 |
--------------------------------------------------------------------------------
/R/archive/na_replace.r:
--------------------------------------------------------------------------------
1 | #' na_replace - replace missing values in tables and recursive structures
2 | #'
3 | #' Replaces missing values (`NA`) in tables and lists
4 | #'
5 | #' @param tbl table or lsit
6 | #' @param ... specification of var=expr where expression can be a
7 | #' constant, vector of length( tbl[[var]] ) or function. See Details
8 | #'
9 | #' @examples
10 | #'
11 | #' data(mtcars)
12 | #' mtcars <- head(mtcars)
13 | #' mtcars[3,] <- NA_real_
14 | #'
15 | #' na_replace( mtcars, mpg=-99, cyl=length )
16 | #'
17 | #' data.table::setDT(mtcars)
18 | #' na_replace( mtcars, mpg=-99, cyl=length )
19 | #'
20 | #'
21 | #' @details
22 |
23 |
24 | na_replace <- function( .tbl, ... ) UseMethod('na_replace')
25 |
26 |
27 | #' @export
28 | na_replace.default <- function( .tbl, ... ) {
29 |
30 | if( ! is.recursive( .tbl ) ) {
31 | # warning( "Use na.replace instead of na_replace for atomic objects.")
32 | return( na.replace( x=.tbl, ...) )
33 | }
34 |
35 | # First handle the name=values pairs
36 | # - use kv function
37 | # - if value is function apply it to x[[ key ]]
38 | for( kv in kv( list(...) ) )
39 | .tbl[[kv$k]] <- na.replace( .tbl[[kv$k]], .na=kv$v )
40 |
41 | .tbl
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
1 | #' @docType data
2 | #' @title data with missing values
3 | #'
4 | #' @aliases nacars nacars_dt nairis nairis_dt
5 | #'
6 | #' @details
7 | #'
8 | #' **cars** and **iris** data sets with missing data for demonstration purposes.
9 | #'
10 | #' @md
11 |
12 | "nacars"
13 |
--------------------------------------------------------------------------------
/R/drop_cols.R:
--------------------------------------------------------------------------------
1 | #' @title Remove columns with missing values
2 | #'
3 | #' @description Remove columns of a table whose values are all `NA` or who have any `NA`
4 | #'
5 | #' @param .tbl table-like object
6 | #'
7 | #' @details
8 | #' `drop_cols_all_na` removes all cols whose only values are `NA`.
9 | #' `drop_cols_any_na` removes columns that have any `NA`. They work on all
10 | #' table-like objects.
11 | #'
12 | #' @return
13 | #' An object of the same class as `data` with cols containing all
14 | #' `NA` values removed
15 | #'
16 | #' @seealso
17 | #' * [dplyr::select()]
18 | #'
19 | #' @md
20 | #' @import na.tools
21 | #' @rdname drop_cols
22 | #' @export
23 |
24 | drop_cols_all_na <- function(.tbl)
25 | .tbl[ , ! apply( .tbl, 2, all_na ) ]
26 |
27 |
28 | #' @rdname drop_cols
29 | #' @export
30 |
31 | drop_cols_any_na <- function(.tbl)
32 | .tbl[ , ! apply( .tbl, 2, any_na ) ]
33 |
34 |
35 |
36 | #' @rdname drop_cols
37 | #' @export
38 |
39 | drop_na_cols <- drop_cols_all_na
40 |
--------------------------------------------------------------------------------
/R/drop_rows.R:
--------------------------------------------------------------------------------
1 | #' drop_rows_all_na, drop_rows_any_na
2 | #'
3 | #' Drop rows of a table whose values are all NA
4 | #'
5 | #' @param .tbl data-like object
6 | #'
7 | #' @details
8 | #'
9 | #' `na_drop_rows` removes all rows whose only values are NA. It works for all
10 | #' table-like objects.
11 | #'
12 | #' @return
13 | #' An object of the same class as `.tbl` with rows containing all
14 | #' `NA` values removed
15 | #'
16 | #' @seealso
17 | #' * [dplyr::filter()]
18 | #'
19 | #' @examples
20 | #'
21 | #' data(iris)
22 | #'
23 | #' .tbl <- iris[1:5,]
24 | #' .tbl[1:2,] <- NA
25 | #' .tbl[3,1] <- NA
26 | #' .tbl
27 | #'
28 | #' filter_all_na(.tbl)
29 | #' filter_any_na(.tbl)
30 | #'
31 | #' drop_rows_all_na(.tbl)
32 | #' drop_rows_any_na(.tbl)
33 | #'
34 | #' @md
35 | #' @rdname drop_rows
36 | #' @export
37 |
38 | drop_rows_all_na <- function(.tbl)
39 | .tbl[ ! apply( .tbl, 1, all_na ), ]
40 |
41 | #' @rdname drop_rows
42 | #' @export
43 | filter_all_na <- drop_rows_all_na
44 |
45 |
46 | #' @rdname drop_rows
47 | #' @export
48 | drop_rows_any_na <- function(.tbl)
49 | .tbl[ ! apply( .tbl, 1, any_na ), ]
50 |
51 | #' @rdname drop_rows
52 | #' @export
53 |
54 | filter_any_na <- drop_rows_any_na
55 |
--------------------------------------------------------------------------------
/R/impute.R:
--------------------------------------------------------------------------------
1 | #' Replace missing values in tables and lists
2 | #'
3 | #' Replace missing values (`NA`) in a table and lists
4 | #'
5 | #' @param .tbl list-like or table-like structure.
6 | #' @param .na scalar, vector or function as described in [na.tools::na.replace()]
7 | #' @param .vars character; names of columns to be imputed
8 | #' @param .predicate dply-type predicate functions
9 | #' @param ... additional args; either a unnamed list of columns (quoted or not)
10 | #' or name=function pairs. See Details.
11 | #'
12 | #' @details
13 | #'
14 | #' `impute` is similar to other *dplyr* verbs especially [dplyr::mutate()]. Like
15 | #' [dplyr::mutate()] it operates on columns. It changes only missing values
16 | #' (`NA`) to the value specified by `.na`.
17 | #'
18 | #' **Behavior**:
19 | #'
20 | #' Behavior depends on the values of `.na` and `...`.
21 | #'
22 | #' `impute` can be used for three replacement operatations:
23 | #'
24 | #' 1. `impute( .tbl, .na )` : ( missing `...` ) Replace missing values
25 | #' in **ALL COLS** by `.na`. This is analogous to `impute_all`.
26 | #'
27 | #' 2. `impute( .tbl, .na, ... )` : ( `...` is an unnamed list) Replace
28 | #' column(s) specified in `...` by `.na`. Columns are specified as an
29 | #' unnamed list of quoted or unquoted column names. This is analogous to
30 | #' `impute_at` where `...` specifies `.vars`
31 | #'
32 | #' 3. `impute( .tbl. col1=na.*, col2=na.* )` : ( missing `.na` ) :
33 | #' Replace by column-specific `.na`
34 | #'
35 | #' Additional arguments are to `.na` are not used; Use `impute_at` for
36 | #' this or create your own lambda functions.
37 | #'
38 | #' @return
39 | #'
40 | #' Returns a object as the same type as `.tbl`. Columns are mutated to replace
41 | #' missing values (`NA`) with value specied by `.na` and `...`
42 | #'
43 | #' @seealso
44 | #' * The **na.tools** package.
45 | #' * `impute_functions`
46 | #'
47 | #' @examples
48 | #'
49 | #' data(nacars)
50 | #'
51 | #' \dontrun{
52 | #' nacars %>% impute(0, mpg, cyl)
53 | #' nacars %>% impute(1:6, mpg, cyl)
54 | #'
55 | #' nacars %>% impute( na.mean )
56 | #' nacars %>% impute( mean ) # unsafe
57 | #' nacars %>% impute( length, mpg, disp )
58 | #' nacars %>% impute( mean, mpg, disp )
59 | #' nacars %>% impute( mpg=na.mean, cyl=na.max )
60 | #' nacars %>% impute( na.mean, c('mpg','disp') )
61 | #' }
62 | #' @md
63 | #' @import na.tools
64 | #' @importFrom rlang eval_tidy quos
65 | #' @importFrom dplyr select_vars vars
66 | #' @export
67 |
68 |
69 | impute <- function (.tbl, .na, ...)
70 | {
71 |
72 | if( ! is.list(.tbl) & ! is.data.frame(.tbl) )
73 | stop( "`impute` only works on lists and table")
74 |
75 |
76 | # if( missing(.na) && missing(...) )
77 | # stop( "At least one of .na or ... must be provided.")
78 |
79 | # USAGE 1: missing(...) all columns mutated by .na
80 | if ( missing(...) ) {
81 | for( j in 1:length(.tbl) )
82 | .tbl[[j]] <- na.replace( .tbl[[j]], .na )
83 | return( .tbl )
84 | }
85 |
86 |
87 | # vars: key-value list ...
88 |
89 | # TEST whether unknown columns were specified
90 | unknown <- setdiff( names(vars), names(.tbl) )
91 | if( length(unknown) > 0 )
92 | stop( paste( "Unknown columns:", paste(unknown, collapse=", ")))
93 |
94 |
95 | # USAGE 2: ... is column names
96 | # IF names were provided as part of columns list, we take
97 | if( ! missing(.na) && is_unnamed.quosure( quos(...) ) ) {
98 | vars <- select_vars( names(.tbl), ... )
99 | for( j in vars )
100 | .tbl[[j]] <- na.replace( .tbl[[j]], .na )
101 | return(.tbl)
102 | }
103 |
104 | # USAGE 3: ... is col=na.fun pairs
105 | if( missing(.na) && is_named( quos(...)) ) {
106 | for ( . in kv( quos(...) ) ) {
107 | .na = rlang::eval_tidy( .$v )
108 | .tbl[[.$k ]] <- na.replace( .tbl[[.$k]], .na=.na )
109 | }
110 | return(.tbl)
111 | }
112 |
113 | if( ! missing(.na) && is_named( quos ))
114 | stop( "Specifying .na and col=.na is not allowed")
115 |
116 | }
117 |
118 |
119 |
120 | #' @note
121 | #' `...` is used to specify columns in `impute` but is used as additional
122 | #' arguments to `.na` in the other `impute_*` functions.
123 | #'
124 | #' @examples
125 | #'
126 | #' \dontrun{
127 | #' nacars %>% impute_at( -99, .vars=1:3 )
128 | #' nacars %>% impute_at( .na=na.mean, .vars=1:6 )
129 | #'
130 | #' # Same, uses `...` for additional args
131 | #' nacars %>%
132 | #' impute_at( .na=mean , .vars=1:6, na.rm = TRUE )
133 | #'
134 | #' nacars %>% impute_at( .na=na.mean, .vars = c('mpg','cyl', 'disp') )
135 | #' }
136 | #'
137 | #' @importFrom dplyr select_vars
138 | #' @rdname impute
139 | #' @export
140 |
141 | impute_at <- function(.tbl, .na, .vars, ... ) {
142 |
143 | .vars <- dplyr::select_vars( names(.tbl), .vars )
144 | for( i in .vars ) {
145 | .tbl[[i]] <- na.replace( x=.tbl[[i]], .na=.na, ... )
146 | }
147 | .tbl
148 |
149 | }
150 |
151 |
152 | #' @details
153 | #' `impute_all` is like `impute` without specifying `...`. `...` is used
154 | #' for additional arguments to `.na`
155 | #'
156 | #' @examples
157 | #'
158 | #' \dontrun{
159 | #' nacars %>% impute_all( -99 )
160 | #' nacars %>% impute_all( na.min )
161 | #' }
162 | #'
163 | #' @rdname impute
164 | #' @export
165 |
166 | impute_all <- function(.tbl, .na, ... ) {
167 |
168 | for( i in 1:length(.tbl) )
169 | .tbl[[i]] <- na.replace( .tbl[[i]], .na, ... )
170 | .tbl
171 |
172 | }
173 |
174 |
175 | #' @rdname impute
176 | #' @export
177 | impute_if <- function( .tbl, .na, .predicate, ... ) {
178 |
179 | for( i in 1:length(.tbl) )
180 | if( .predicate(.tbl[[i]] ) )
181 | .tbl[[i]] <- na.replace( .tbl[[i]], .na=.na, ... )
182 |
183 | .tbl
184 | }
185 |
--------------------------------------------------------------------------------
/R/impute_funs.R:
--------------------------------------------------------------------------------
1 | #' Table imputation methods
2 | #'
3 | #' Replace missing value methods with a variety of methods
4 | #'
5 | #' @param .tbl table-like or list-like structure
6 | #' @param .na value/function to be used for replacement
7 | #' @param .vars list of columns generated by vars(), or a character vector of
8 | #' column names, or a numeric vector of column positions.
9 | #' @param .predicate A predicate function to be applied to the columns or a
10 | #' logical vector.
11 | #' @param ... addition passed to the imputation method
12 | #'
13 | #' @details
14 | #'
15 | #' These methods are modelled closely after [dplyr::mutate()] and the select
16 | #' style verbs. Most of the functions depend on the **na.tools** package.
17 | #'
18 | #' Function List:
19 | #' * explicit: `impute_explicit`, `impute_explicit_at`, `impute_explicit_all`, `impute_explicit_if`
20 | #' * zero: `impute_zero`, `impute_zero_at`, `impute_zero_all`, `impute_zero_if`
21 | #' * inf: `impute_inf`, `impute_inf_at`, `impute_inf_all`, `impute_inf_if`
22 | #' * neginf: `impute_neginf`, `impute_neginf_at`, `impute_neginf_all`, `impute_neginf_if`
23 | #' * constant: `impute_constant`, `impute_constant_at`, `impute_constant_all`, `impute_constant_if`
24 | #'
25 | #' * max: `impute_max`, `impute_max_at`, `impute_max_all`, `impute_max_if`
26 | #' * min: `impute_min`, `impute_min_at`, `impute_min_all`, `impute_min_if`
27 | #' * median: `impute_median`, `impute_median_at`, `impute_median_all`, `impute_median_if`
28 | #' * mean: `impute_mean`, `impute_mean_at`, `impute_mean_all`, `impute_mean_if`
29 | #' * most_freq: `impute_most_freq`, `impute_most_freq_at`, `impute_most_freq_all`, `impute_most_freq_if`
30 | #' * quantile: `impute_quantile`, `impute_quantile_at`, `impute_quantile_all`, `impute_quantile_if`
31 | #' * sample: `impute_sample`, `impute_sample_at`, `impute_sample_all`, `impute_sample_if`
32 | #' * random: `impute_random`, `impute_random_at`, `impute_random_all`, `impute_random_if`
33 | #' * replace: `impute_replace`, `impute_replace_at`, `impute_replace_all`, `impute_replace_if`
34 | #'
35 | #' @examples
36 | #'
37 | #' \dontrun{
38 | #' nacars %>% impute_zero()
39 | #' nacars %>% impute_zero( mpg, cyl )
40 | #' nacars %>% impute_zero( "mpg", "cyl" )
41 | #' nacars %>% impute_zero( c("mpg","cyl") )
42 | #' nacars %>% impute_zero( 1:2 )
43 | #' }
44 | #' @md
45 | #' @rawNamespace exportPattern("^impute_.*$")
46 | #' @include utils.R make_impute.R
47 | #' @aliases
48 | #' impute_explicit impute_explicit_at impute_explicit_all impute_explicit_if
49 | #' impute_zero impute_zero_at impute_zero_all impute_zero_if
50 | #' impute_true impute_true_at impute_true_all impute_true_if
51 | #' impute_false impute_false_at impute_false_all impute_false_if
52 | #' impute_inf impute_inf_at impute_inf_all impute_inf_if
53 | #' impute_neginf impute_neginf_at impute_neginf_all impute_neginf_if
54 | #' impute_constant impute_constant_at impute_constant_all impute_constant_if
55 | #' impute_max impute_max_at impute_max_all impute_max_if
56 | #' impute_min impute_min_at impute_min_all impute_min_if
57 | #' impute_median impute_median_at impute_median_all impute_median_if
58 | #' impute_mean impute_mean_at impute_mean_all impute_mean_if
59 | #' impute_mode impute_mode_at impute_mode_all impute_mode_if
60 | # impute_most_freq impute_most_freq_at impute_most_freq_all impute_most_freq_if
61 | #' impute_quantile impute_quantile_at impute_quantile_all impute_quantile_if
62 | #' impute_sample impute_sample_at impute_sample_all impute_sample_if
63 | #' impute_random impute_random_at impute_random_all impute_random_if
64 | #' impute_replace impute_replace_at impute_replace_all impute_replace_if
65 | #' @importFrom stats na.pass
66 | #' @importFrom methods as
67 | #' @rdname impute_functions
68 |
69 | impute_functions <- function( .tbl, .na, .vars, .predicate ) {} # just a stub
70 |
71 | assign_these( make_imputes(na.explicit) )
72 |
73 | assign_these( make_imputes(na.zero) )
74 | assign_these( make_imputes(na.true) )
75 | assign_these( make_imputes(na.false) )
76 | assign_these( make_imputes(na.inf) )
77 | assign_these( make_imputes(na.neginf) )
78 | assign_these( make_imputes(na.constant) )
79 |
80 |
81 | assign_these( make_imputes(na.max) )
82 | assign_these( make_imputes(na.min) )
83 | assign_these( make_imputes(na.mean) )
84 | assign_these( make_imputes(na.median) )
85 | assign_these( make_imputes(na.mode) )
86 | # assign_these( make_imputes(na.most_freq) )
87 | assign_these( make_imputes(na.quantile) )
88 |
89 | assign_these( make_imputes(na.sample) )
90 | assign_these( make_imputes(na.random) )
91 | assign_these( make_imputes(na.replace) )
92 |
--------------------------------------------------------------------------------
/R/make_impute.R:
--------------------------------------------------------------------------------
1 | #' Imputation metaprogramming
2 | #'
3 | #' Create functions that use a function for imputing
4 | #'
5 | #' @param fun value or function used for imputing. See Details.
6 | #'
7 | #' @details
8 | #'
9 | #' These functions make mutate-style impute functions using [impute()] and the
10 | #' supplied `.na` argument.
11 | #'
12 | #' `make_imputes` is a wrapper around the other functions and returns each as
13 | #' a list.
14 | #'
15 | #' @keywords internal
16 | #' @rdname make_impute
17 |
18 | make_impute <- function(fun) function(.tbl, ..., .na=fun )
19 | impute(.tbl, .na=.na, ... )
20 |
21 |
22 | #' @rdname make_impute
23 | #' @export
24 | make_impute_at <- function(fun) function( .tbl, .vars, ..., .na=fun ) impute_at( .tbl, .na=.na, .vars )
25 |
26 |
27 | #' @rdname make_impute
28 | #' @export
29 | make_impute_all <- function(fun) function( .tbl, .na=fun, ... ) impute_all( .tbl, .na=.na, ... )
30 |
31 | #' @rdname make_impute
32 | #' @export
33 | make_impute_if <- function(fun) function( .tbl, .predicate, ... ) impute_if( .tbl, .na=fun, .predicate, ... )
34 |
35 |
36 | #' @rdname make_impute
37 | #' @export
38 | make_imputes <- function(.na) {
39 | nm <- deparse(substitute(.na))
40 | nm <- sub("^na\\.", "", nm, perl=TRUE )
41 | structure(
42 | list(
43 | make_impute(.na)
44 | , make_impute_at(.na)
45 | , make_impute_all(.na)
46 | , make_impute_if(.na)
47 | )
48 | , names = paste("impute_", nm, c("", "_at", "_all", "_if" ), sep="" )
49 | )
50 | }
51 |
52 |
53 |
54 | #' #' @rdname imports
55 | #' #' @aliases impute_inf impute_inf_at impute_inf_all impute_inf_if
56 | #' #' @export
57 | #' na.inf %>% make_impute() %>% assign_these()
58 | #'
59 | #' # list(
60 | #' # impute = make_impute(.na)
61 | #' # , impute_at = make_impute_at(.na)
62 | #' # , impute_all = make_impute_all(.na)
63 | #' # , impute_if = make_impute_if(.na)
64 | #' # )
65 | #' # }
66 | #'
67 | #'
68 | #' # impute_inf <- make_impute(na.inf)
69 | #' # imputes_inf <- make_imputes(na.inf)
70 | #'
71 |
72 |
--------------------------------------------------------------------------------
/R/na_predict.R:
--------------------------------------------------------------------------------
1 | #' na_predict
2 | #'
3 | #' replace `NA` values by predictions of a model
4 | #'
5 | #' @param x data
6 | #' @param object object with predict method
7 | #' @param data data object
8 | #'
9 | #' @export
10 |
11 | na_predict <- function( x, object, data=x ) {
12 |
13 | if( length(x) != nrow(data) ) stop()
14 |
15 | return(x)
16 | }
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | # Key value iteration
2 | #
3 | # Create a list with `k` and `v` elements useful for iteration
4 | #
5 | # @param x object such as vector or list to separate into key value pairs
6 | # @param ... additional args
7 | #
8 | # @details
9 | #
10 | # No magic here, just something simple to convert `x` into a list of
11 | # lists. Each element of `x` is broken into a list with elements `k`
12 | # (key) and `v` (value). See examples.
13 | #
14 | # For many cases, key-value iteration can be done with `*apply` or
15 | # `paste` functions. This is made to be explicit and work on a variery of
16 | # objects.
17 | #
18 | # @references
19 | # * [SO: for-loop-in-r-with-key-value](http://stackoverflow.com/questions/18572921/for-loop-in-r-with-key-value)
20 | # * [SO: iterate-over-key-value-pair-from-a-list](http://stackoverflow.com/questions/4500106/iterate-over-key-value-pair-from-a-list)
21 | #
22 | # @return
23 | # A named list of list; each element of `x` becomes a one element list
24 | # with elements `k` and `v` representing the keys and values
25 | #
26 | # @author
27 | # This function is taken from the *kv* package and is used with permission.
28 | #
29 | # @examples
30 | #
31 | # # Lists
32 | # li <- list(a=1,b=2,c=3)
33 | # kv(li)
34 | #
35 | # for( kv in kv(li) )
36 | # cat( kv$k, ":", kv$v, "\n")
37 | #
38 | #
39 | # # vectors
40 | # v <- c(a=1, b=2, c=3 )
41 | # kv(li)
42 | #
43 | # for( kv in kv(li) )
44 | # cat( kv$k, ":", kv$v, "\n")
45 | #
46 | # @md
47 |
48 | kv <- function(x, ...) UseMethod('kv')
49 |
50 | # @export
51 | # @rdname kv
52 |
53 | kv.default <- function(x) {
54 | kv <- list()
55 | for( i in 1:length(x) ) {
56 | kv[[i]] = list( k=names(x)[[i]], v=x[[i]] )
57 | }
58 | names(kv)=names(x)
59 | return(kv)
60 | }
61 |
62 |
63 |
64 | # @author decision patterns / christopher brown
65 | # Taken from the base.tools package with permission
66 | qw <- function (...)
67 | as.character(match.call())[-1]
68 |
69 |
70 | # @author decision patterns / christopher brown
71 | # Taken from the dimensional package with permission
72 |
73 | most_freq <- function (x, na.action = stats::na.pass)
74 | as(names(which.max(table(na.action(x), useNA = "always"))), class(x))
75 |
76 |
77 | # Returns TRUE if the quosures are
78 | is_named <-function(x) ! is.null( attr(x, "names") )
79 | is_unnamed <- function(x) is.null( attr(x, "names") )
80 | is_named.quosure <- function(x) length( setdiff( names(x), "" ) ) == length(x)
81 | is_unnamed.quosure <- function(x) ! is_named.quosure(x)
82 |
83 |
84 | #' Assign named list to calling frame for setting up exports
85 | # assign list elements to parent frame by name
86 | #' @keywords internal
87 | assign_these <- function(x)
88 | for( nm in names(x) )
89 | assign( nm, x[[nm]], envir=sys.frame( sys.nframe() -1 ) )
90 | # assign( nm, x[[nm]], parent.frame() )
91 |
92 |
93 | #' Export functions in list
94 | #'
95 | #' @param nms character list of name to export
96 | #' @param x list; named list of functions to export
97 | #'
98 | #' @details
99 | #'
100 | #' This uses the `@evalNamespace` directive to declare exports for objects by
101 | #' name or when provided in a function.
102 | #'
103 | #' ns_export <- function(nms) {
104 | #' sprintf("export(%s)", paste(nms, collapse = ","))
105 | #' }
106 | #'
107 | #' ns_export_named <-function(x) {
108 | #' nms <- names(x)
109 | #' sprintf("export(%s)", paste(nms, collapse = ","))
110 | #' }
111 | #'
112 | #' #' @evalNamespace ns_export(names(fns))
113 | #'
114 | #' @seealso
115 | #' * [assign_these()]
116 | #'
117 | #' @keywords internal
118 |
119 | ns_export <- function(nms) {
120 | sprintf("export(%s)", paste(nms, collapse = ","))
121 | }
122 |
123 | #' @rdname ns_export
124 | ns_export_function_list <-function(x) {
125 | nms <- names(x)
126 | sprintf("export(%s)", paste(nms, collapse = ","))
127 | }
128 |
--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onAttach <- function( libname, pkgname ) {
2 |
3 | suppressWarnings( try( v <- utils::packageVersion(pkgname, libname), silent = TRUE ))
4 | version <- if( exists('v') ) paste0("-", v ) else ""
5 |
6 | if( interactive() )
7 | packageStartupMessage(
8 | pkgname ,
9 | version ,
10 | " - Copyright \u00a9 ", substr(Sys.Date(),1,4),
11 | " Decision Patterns" ,
12 | domain = NA
13 | )
14 |
15 | }
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | ## tidyimpute
3 |
4 |
5 | > **Impute the tidyverse way**
6 |
7 | 
8 | 
9 | [](https://www.tidyverse.org/lifecycle/#stable)
10 | [](https://www.r-pkg.org/pkg/tidyimpute)
11 | [](https://cran.rstudio.com/web/packages/tidyimpute/index.html)
12 |
13 |
14 |
15 | **tidyimpute** is tidtverse/dplyr compliant toolkit for imputing missing
16 | values (NA) values in list-like and table-like structures including data.tables.
17 | It had two goals: 1) extend existing `na.*` functions from the stats packages
18 | and 2) provide **dplyr**/**tidyverse** compliant methods for tables and lists.
19 |
20 | This package is based on the handy **na.tools** package which provides tools
21 | for working with missing values in vectors.
22 |
23 | ## Feature List
24 |
25 | * Over **80** functions for imputi missing values (See [#Function List] below.)
26 | * **dplyr**/**tidyverse** compliant inteface:
27 | * `impute_*` family of functions for table- or list-based imputations.
28 | * `impute_*_at`, `impute_*_all` and `impute_*_if` functions
29 | * Uses the **na.tools* package to ensure
30 | * Type/class and length-safe replacement. (**tidyimport** will never change
31 | produce an object with a different length/nrow or type/class of its target.)
32 | * General imputation methods
33 | * Generic imputation: `impute`, `impute_at`, `impute_all`, `impute_if`
34 | * Specialized imputation methods
35 | * Common imputations for:
36 | * constants: `0`, `-Inf`, `Inf`
37 | * univariate, commutative summary functions: `mean`, `median`, `max`, `min`, `zero`
38 | * (Coming Soon) univariate, non-commutive/ordered/time-series data: `loess`, `locf`, `locb`
39 | * (Coming Soon) model-based imputation
40 | * Support for recursive (lists and table-like structures)
41 | * Support for `tibble`
42 | * Support for `data.table`
43 | * Four extensible types of imputations
44 |
45 |
46 | ### Upcoming features
47 |
48 | * recall/track which values have been replaced
49 | * `by-group` calculations
50 | * Time-series/ordered/non-commutative methods
51 | * Model-based imputation
52 | - Model-based + by-groups
53 |
54 |
55 | ## Installation
56 |
57 | ### Github (Development Version)
58 |
59 | library(devtools)
60 | install_github( "decisionpatterns/tidyimport")
61 |
62 |
63 | ### CRAN
64 |
65 | R> install.packages("tidyimpute")
66 |
67 |
68 | ## Coming Soon ...
69 |
70 | * Impute by model
71 | * Memorable imputing
72 |
73 |
74 | ## Function List
75 |
76 | There are four types of imputation methods. They are distinguished by
77 | how the replacement values are calculated. Each is described below as well as
78 | describing each of the methods used.
79 |
80 | **Constants**
81 |
82 | In "constant" imputation methods, missing values are replaced by an
83 | *a priori* selected constant value. The vector containingmissing values
84 | is not used to calculate the replacement value. These take the form: `na.fun(x, ...)`
85 |
86 | * `impute_zero` - 0
87 | * `impute_inf` / `impute_neginf` - Inf/-Inf
88 | * `impute_constant` - Impute with a constant
89 |
90 |
91 | **Univariate**
92 |
93 | (Impute using function(s) of the target variable; When imputing in a table this
94 | is also called *column-based imputation* since the values used to derive the
95 | imputed come from the single column alone.)
96 |
97 | In "univariate" replacement methods, values are calculated using
98 | only the target vector, ie the one containing the missing values. The functions
99 | for performing the imputation are nominally univariate summary functions.
100 | Generally, the ordering of the vector does not affect imputed values. In general,
101 | one value is used to replace all missing values (`NA`) for a variable.
102 |
103 | * `impute_max` - maximum
104 | * `impute_minimum` - minumum
105 | * `impute_mean` - mean
106 | * `impute_median` - median value
107 | * `impute_quantile` - quantile value
108 | * `impute_sample` - randomly sampled value via bootstrap.
109 |
110 |
111 | **Ordered Univariate (Coming Soon)**
112 |
113 | (Impute using function(s) of the target variable. Variable ordering relevant.
114 | This is a super class of the previous **column-based imputation**.)
115 |
116 | In "ordered univariate" methods, replacement valuse are calculated
117 | from the vector that is assumed to be ordered. These types are very
118 | often used with **time-series** data. (Many of these functions are taken from
119 | or patterned after functions in the **zoo** package.)
120 |
121 | * `impute_loess` - loess smoother, assumes values are ordered
122 | * `impute_locf` - last observation carried forward, assumes ordered
123 | * `impute_nocb` - next observation carried backwards, assumes ordered
124 |
125 |
131 |
132 | **Multivariate (Coming Soon)**
133 |
134 | (Impute with multiple variables from the same observation. In tables, this is
135 | also called **row-based imputation** because imputed values derive from other
136 | measurement for the same observation. )
137 |
138 | In "Multivariate" imputation, any value from the same row (observation) can be
139 | used to derive the replacement value. This is generally implemented as a model
140 | traing from the data with `var ~ ...`
141 |
142 | * `impute_fit`,`impute_predict` - use a model
143 | * `impute_by_group` - use by-group imputation
144 |
145 |
146 | **Generalized (Coming Soon)**
147 |
148 | (Impute with column and rows.)
149 |
150 |
151 | **Future:**
152 |
153 | * `unimpute`/`impute_restore` - restore NAs to the vector; remembering
154 | replacement
155 | * `impute_toggle` - toggle between `NA` and replacement values
156 |
157 |
158 |
159 | ## Examples
160 |
161 | tbl <- data.frame( col_1 = letters[1:3], col_2=c(1,NA_real_,3), col_3=3:1)
162 |
163 | impute( tbl, 2)
164 | impute_mean( tbl )
165 |
166 |
--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | ## TODO ##
2 |
3 | - [ ] Add `impute_mode_*`
4 |
5 | - [x] Add `impute_false_*` and `impute_true_*`
6 |
7 | - [ ] Support easybake(TM) "better recipes"
8 |
9 | - [ ] Should impute_max / impute_min should not return -Inf and +Inf for all
10 | NA rows or be consistent with na.mean and na.median
11 |
12 | - [x] Create Logo **Circle Ban** of `NA`
13 |
14 | - [x] `na[._]impute` as alias for `na[._]replace` respectively.
15 |
16 | - [x] `na.explicit` and `na_explicit` apply to factors only
17 |
18 | - [ ] Row-based imputation does not need to calculate every-value values for
19 | all observations, only the missing ones. This is different than column-based
20 | imputations which need values from all observations. There might be some
21 | efficiency gains from doing this.
22 |
23 | - [ ] There is a generalized imputation that uses both rows and columns and
24 | might automatically consider by-groups (how does the values of )
25 |
26 | - [x] Is recall a part of
27 | - na.tools::na.replace only deals with vectors and references to multivariate
28 | replacement might be misplaced.
29 | - [x] tidyimpute::impute deals with tables and therefore is the pro
30 | - How to implement recall?
31 | - Recall should be able to take the same object. Identify the input.
32 | A recollection should:
33 | - test for the form of the input,
34 | - apply one or more
35 | - recall:
36 | - test for input: digest(.tbl[0,])
37 | - a functions with arguments modified except the input.
38 | impute( . , .na, ... )
39 |
40 |
41 | ### `coerce_safe`
42 |
43 | - [ ] Move `coerce_safe` to the **coercion** package. [ ] Import **coercion**.
44 |
45 | - _all, _if, _at
46 | - na_replace_all( .tbl, .funs, ... )
47 | - na_replace_if( .tbl, .predicate, .funs, ... )
48 | - na_replace_at( .tbl, .predicate, .funs, ... )
49 |
50 |
51 |
52 | So .tbl %>% na_replace_all( iris, 3, ...)
53 | na_replace( iris, mean, na.rm=TRUE )
54 | na_replace_if( iris, is.cont, mean, na.rm=TRUE )
55 | na_replace_at( iris, ! Species, mean, na.rm=TRUE )
56 |
57 |
58 | - [ ] Support atomics with `impute`
59 | - na_replace and na_explicit
60 | - [x] replace by scalar? (Low-level)
61 | - [x] replace by vector? (Other)
62 | - [x] replace by unary function? ()
63 | - [x] replace by function of multiple args: `impute_*`
64 | - [ ] replace by model/formula? ()
65 | - [ ] store .na value (if scalar)
66 | - [ ] store replaced idxs --- like na.omit
67 |
68 | - Implement slow functions with Rcpp
69 |
70 | - Might there be a clever way to allow something like:
71 | NA_explicit_ <- . %>% mean(., na.rm=TRUE)
72 | This will not work
73 |
74 | - [ ] Consider having an option for values for the na_level, e.g.
75 | options( na_explicit = "(Missing)" ) or,
76 | options( na_explicit = mean )
77 | - [x] Categorical and continuous variables must be different.
78 | - [ ] Explicit value might depend on the class, type (cat vs. cont), or on a
79 | specific attribute, `na_explicit` or `na` of the specific vari`able.
80 |
81 | - [ ] Consider how explicit NA will be treated in sorting
82 | - [ ] Do we want exceptional values first or last?
83 |
84 | - [ ] Use **catcont** package?
85 |
86 | - [x] `na_replace` and `na_explicit` are getting very similar and should probably be
87 | made aliases
88 |
89 | - [ ] Devise syntax of related to list-like/recursive objects
90 | - applying a function to an **entire** vs.
91 | - applying to each **element** object
92 | See na_explicit and na_implicit
93 |
94 |
95 | ### Completed
96 |
97 | - [x] na_drop_rows, na_drop_cols for table-like objects
98 | - Remove rows/cols with all NAs
99 | - [x] Make na_replace vectorized, e.g. na_replace( value=... )
100 | - [x] na_ifelse for na_replace
101 |
102 |
--------------------------------------------------------------------------------
/data/nacars.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decisionpatterns/tidyimpute/9e07748f4b434105e2e122df1603817f3c2ab856/data/nacars.rda
--------------------------------------------------------------------------------
/data/nacars_dt.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decisionpatterns/tidyimpute/9e07748f4b434105e2e122df1603817f3c2ab856/data/nacars_dt.rda
--------------------------------------------------------------------------------
/data/nairis.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decisionpatterns/tidyimpute/9e07748f4b434105e2e122df1603817f3c2ab856/data/nairis.rda
--------------------------------------------------------------------------------
/data/nairis_dt.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decisionpatterns/tidyimpute/9e07748f4b434105e2e122df1603817f3c2ab856/data/nairis_dt.rda
--------------------------------------------------------------------------------
/inst/extdata/nacars.R:
--------------------------------------------------------------------------------
1 | #' Make nacars
2 |
3 | data(mtcars)
4 | nacars <- head(mtcars)
5 | nacars[ c(3,5),] <- NA_real_
6 | nacars[ , c(3,5) ] <- NA_real_
7 | nacars
8 |
9 | devtools::use_data(nacars, overwrite=TRUE)
10 |
11 | library(data.table)
12 | nacars_dt <- nacars
13 | setDT(nacars_dt)
14 | nacars_dt <- data.table( nacars )
15 |
16 | devtools::use_data(nacars_dt, overwrite = TRUE )
17 |
18 |
19 |
--------------------------------------------------------------------------------
/man/assign_these.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{assign_these}
4 | \alias{assign_these}
5 | \title{Assign named list to calling frame for setting up exports}
6 | \usage{
7 | assign_these(x)
8 | }
9 | \description{
10 | Assign named list to calling frame for setting up exports
11 | }
12 | \keyword{internal}
13 |
--------------------------------------------------------------------------------
/man/drop_cols.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/drop_cols.R
3 | \name{drop_cols_all_na}
4 | \alias{drop_cols_all_na}
5 | \alias{drop_cols_any_na}
6 | \alias{drop_na_cols}
7 | \title{Remove columns with missing values}
8 | \usage{
9 | drop_cols_all_na(.tbl)
10 |
11 | drop_cols_any_na(.tbl)
12 |
13 | drop_na_cols(.tbl)
14 | }
15 | \arguments{
16 | \item{.tbl}{table-like object}
17 | }
18 | \value{
19 | An object of the same class as \code{data} with cols containing all
20 | \code{NA} values removed
21 | }
22 | \description{
23 | Remove columns of a table whose values are all \code{NA} or who have any \code{NA}
24 | }
25 | \details{
26 | \code{drop_cols_all_na} removes all cols whose only values are \code{NA}.
27 | \code{drop_cols_any_na} removes columns that have any \code{NA}. They work on all
28 | table-like objects.
29 | }
30 | \seealso{
31 | \itemize{
32 | \item \code{\link[dplyr:select]{dplyr::select()}}
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/man/drop_rows.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/drop_rows.R
3 | \name{drop_rows_all_na}
4 | \alias{drop_rows_all_na}
5 | \alias{filter_all_na}
6 | \alias{drop_rows_any_na}
7 | \alias{filter_any_na}
8 | \title{drop_rows_all_na, drop_rows_any_na}
9 | \usage{
10 | drop_rows_all_na(.tbl)
11 |
12 | filter_all_na(.tbl)
13 |
14 | drop_rows_any_na(.tbl)
15 |
16 | filter_any_na(.tbl)
17 | }
18 | \arguments{
19 | \item{.tbl}{data-like object}
20 | }
21 | \value{
22 | An object of the same class as \code{.tbl} with rows containing all
23 | \code{NA} values removed
24 | }
25 | \description{
26 | Drop rows of a table whose values are all NA
27 | }
28 | \details{
29 | \code{na_drop_rows} removes all rows whose only values are NA. It works for all
30 | table-like objects.
31 | }
32 | \examples{
33 |
34 | data(iris)
35 |
36 | .tbl <- iris[1:5,]
37 | .tbl[1:2,] <- NA
38 | .tbl[3,1] <- NA
39 | .tbl
40 |
41 | filter_all_na(.tbl)
42 | filter_any_na(.tbl)
43 |
44 | drop_rows_all_na(.tbl)
45 | drop_rows_any_na(.tbl)
46 |
47 | }
48 | \seealso{
49 | \itemize{
50 | \item \code{\link[dplyr:filter]{dplyr::filter()}}
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/man/figures/hexagon_blue_tidyimpute_v1.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/decisionpatterns/tidyimpute/9e07748f4b434105e2e122df1603817f3c2ab856/man/figures/hexagon_blue_tidyimpute_v1.0.png
--------------------------------------------------------------------------------
/man/figures/hexagon_blue_tidyimpute_v1.0.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | ]>
13 |
674 |
--------------------------------------------------------------------------------
/man/impute.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/impute.R
3 | \name{impute}
4 | \alias{impute}
5 | \alias{impute_at}
6 | \alias{impute_all}
7 | \alias{impute_if}
8 | \title{Replace missing values in tables and lists}
9 | \usage{
10 | impute(.tbl, .na, ...)
11 |
12 | impute_at(.tbl, .na, .vars, ...)
13 |
14 | impute_all(.tbl, .na, ...)
15 |
16 | impute_if(.tbl, .na, .predicate, ...)
17 | }
18 | \arguments{
19 | \item{.tbl}{list-like or table-like structure.}
20 |
21 | \item{.na}{scalar, vector or function as described in \code{\link[na.tools:na.replace]{na.tools::na.replace()}}}
22 |
23 | \item{...}{additional args; either a unnamed list of columns (quoted or not)
24 | or name=function pairs. See Details.}
25 |
26 | \item{.vars}{character; names of columns to be imputed}
27 |
28 | \item{.predicate}{dply-type predicate functions}
29 | }
30 | \value{
31 | Returns a object as the same type as \code{.tbl}. Columns are mutated to replace
32 | missing values (\code{NA}) with value specied by \code{.na} and \code{...}
33 | }
34 | \description{
35 | Replace missing values (\code{NA}) in a table and lists
36 | }
37 | \details{
38 | \code{impute} is similar to other \emph{dplyr} verbs especially \code{\link[dplyr:mutate]{dplyr::mutate()}}. Like
39 | \code{\link[dplyr:mutate]{dplyr::mutate()}} it operates on columns. It changes only missing values
40 | (\code{NA}) to the value specified by \code{.na}.
41 |
42 | \strong{Behavior}:
43 |
44 | Behavior depends on the values of \code{.na} and \code{...}.
45 |
46 | \code{impute} can be used for three replacement operatations:
47 | \enumerate{
48 | \item \code{impute( .tbl, .na )} : ( missing \code{...} ) Replace missing values
49 | in \strong{ALL COLS} by \code{.na}. This is analogous to \code{impute_all}.
50 | \item \code{impute( .tbl, .na, ... )} : ( \code{...} is an unnamed list) Replace
51 | column(s) specified in \code{...} by \code{.na}. Columns are specified as an
52 | unnamed list of quoted or unquoted column names. This is analogous to
53 | \code{impute_at} where \code{...} specifies \code{.vars}
54 | \item \code{impute( .tbl. col1=na.*, col2=na.* )} : ( missing \code{.na} ) :
55 | Replace by column-specific \code{.na}
56 | }
57 |
58 | Additional arguments are to \code{.na} are not used; Use \code{impute_at} for
59 | this or create your own lambda functions.
60 |
61 | \code{impute_all} is like \code{impute} without specifying \code{...}. \code{...} is used
62 | for additional arguments to \code{.na}
63 | }
64 | \note{
65 | \code{...} is used to specify columns in \code{impute} but is used as additional
66 | arguments to \code{.na} in the other \code{impute_*} functions.
67 | }
68 | \examples{
69 |
70 | data(nacars)
71 |
72 | \dontrun{
73 | nacars \%>\% impute(0, mpg, cyl)
74 | nacars \%>\% impute(1:6, mpg, cyl)
75 |
76 | nacars \%>\% impute( na.mean )
77 | nacars \%>\% impute( mean ) # unsafe
78 | nacars \%>\% impute( length, mpg, disp )
79 | nacars \%>\% impute( mean, mpg, disp )
80 | nacars \%>\% impute( mpg=na.mean, cyl=na.max )
81 | nacars \%>\% impute( na.mean, c('mpg','disp') )
82 | }
83 |
84 | \dontrun{
85 | nacars \%>\% impute_at( -99, .vars=1:3 )
86 | nacars \%>\% impute_at( .na=na.mean, .vars=1:6 )
87 |
88 | # Same, uses `...` for additional args
89 | nacars \%>\%
90 | impute_at( .na=mean , .vars=1:6, na.rm = TRUE )
91 |
92 | nacars \%>\% impute_at( .na=na.mean, .vars = c('mpg','cyl', 'disp') )
93 | }
94 |
95 |
96 | \dontrun{
97 | nacars \%>\% impute_all( -99 )
98 | nacars \%>\% impute_all( na.min )
99 | }
100 |
101 | }
102 | \seealso{
103 | \itemize{
104 | \item The \strong{na.tools} package.
105 | \item \code{impute_functions}
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/man/impute_functions.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/impute_funs.R
3 | \name{impute_functions}
4 | \alias{impute_functions}
5 | \alias{impute_explicit}
6 | \alias{impute_explicit_at}
7 | \alias{impute_explicit_all}
8 | \alias{impute_explicit_if}
9 | \alias{impute_zero}
10 | \alias{impute_zero_at}
11 | \alias{impute_zero_all}
12 | \alias{impute_zero_if}
13 | \alias{impute_true}
14 | \alias{impute_true_at}
15 | \alias{impute_true_all}
16 | \alias{impute_true_if}
17 | \alias{impute_false}
18 | \alias{impute_false_at}
19 | \alias{impute_false_all}
20 | \alias{impute_false_if}
21 | \alias{impute_inf}
22 | \alias{impute_inf_at}
23 | \alias{impute_inf_all}
24 | \alias{impute_inf_if}
25 | \alias{impute_neginf}
26 | \alias{impute_neginf_at}
27 | \alias{impute_neginf_all}
28 | \alias{impute_neginf_if}
29 | \alias{impute_constant}
30 | \alias{impute_constant_at}
31 | \alias{impute_constant_all}
32 | \alias{impute_constant_if}
33 | \alias{impute_max}
34 | \alias{impute_max_at}
35 | \alias{impute_max_all}
36 | \alias{impute_max_if}
37 | \alias{impute_min}
38 | \alias{impute_min_at}
39 | \alias{impute_min_all}
40 | \alias{impute_min_if}
41 | \alias{impute_median}
42 | \alias{impute_median_at}
43 | \alias{impute_median_all}
44 | \alias{impute_median_if}
45 | \alias{impute_mean}
46 | \alias{impute_mean_at}
47 | \alias{impute_mean_all}
48 | \alias{impute_mean_if}
49 | \alias{impute_mode}
50 | \alias{impute_mode_at}
51 | \alias{impute_mode_all}
52 | \alias{impute_mode_if}
53 | \alias{impute_quantile}
54 | \alias{impute_quantile_at}
55 | \alias{impute_quantile_all}
56 | \alias{impute_quantile_if}
57 | \alias{impute_sample}
58 | \alias{impute_sample_at}
59 | \alias{impute_sample_all}
60 | \alias{impute_sample_if}
61 | \alias{impute_random}
62 | \alias{impute_random_at}
63 | \alias{impute_random_all}
64 | \alias{impute_random_if}
65 | \alias{impute_replace}
66 | \alias{impute_replace_at}
67 | \alias{impute_replace_all}
68 | \alias{impute_replace_if}
69 | \title{Table imputation methods}
70 | \usage{
71 | impute_functions(.tbl, .na, .vars, .predicate)
72 | }
73 | \arguments{
74 | \item{.tbl}{table-like or list-like structure}
75 |
76 | \item{.na}{value/function to be used for replacement}
77 |
78 | \item{.vars}{list of columns generated by vars(), or a character vector of
79 | column names, or a numeric vector of column positions.}
80 |
81 | \item{.predicate}{A predicate function to be applied to the columns or a
82 | logical vector.}
83 |
84 | \item{...}{addition passed to the imputation method}
85 | }
86 | \description{
87 | Replace missing value methods with a variety of methods
88 | }
89 | \details{
90 | These methods are modelled closely after \code{\link[dplyr:mutate]{dplyr::mutate()}} and the select
91 | style verbs. Most of the functions depend on the \strong{na.tools} package.
92 |
93 | Function List:
94 | \itemize{
95 | \item explicit: \code{impute_explicit}, \code{impute_explicit_at}, \code{impute_explicit_all}, \code{impute_explicit_if}
96 | \item zero: \code{impute_zero}, \code{impute_zero_at}, \code{impute_zero_all}, \code{impute_zero_if}
97 | \item inf: \code{impute_inf}, \code{impute_inf_at}, \code{impute_inf_all}, \code{impute_inf_if}
98 | \item neginf: \code{impute_neginf}, \code{impute_neginf_at}, \code{impute_neginf_all}, \code{impute_neginf_if}
99 | \item constant: \code{impute_constant}, \code{impute_constant_at}, \code{impute_constant_all}, \code{impute_constant_if}
100 | \item max: \code{impute_max}, \code{impute_max_at}, \code{impute_max_all}, \code{impute_max_if}
101 | \item min: \code{impute_min}, \code{impute_min_at}, \code{impute_min_all}, \code{impute_min_if}
102 | \item median: \code{impute_median}, \code{impute_median_at}, \code{impute_median_all}, \code{impute_median_if}
103 | \item mean: \code{impute_mean}, \code{impute_mean_at}, \code{impute_mean_all}, \code{impute_mean_if}
104 | \item most_freq: \code{impute_most_freq}, \code{impute_most_freq_at}, \code{impute_most_freq_all}, \code{impute_most_freq_if}
105 | \item quantile: \code{impute_quantile}, \code{impute_quantile_at}, \code{impute_quantile_all}, \code{impute_quantile_if}
106 | \item sample: \code{impute_sample}, \code{impute_sample_at}, \code{impute_sample_all}, \code{impute_sample_if}
107 | \item random: \code{impute_random}, \code{impute_random_at}, \code{impute_random_all}, \code{impute_random_if}
108 | \item replace: \code{impute_replace}, \code{impute_replace_at}, \code{impute_replace_all}, \code{impute_replace_if}
109 | }
110 | }
111 | \examples{
112 |
113 | \dontrun{
114 | nacars \%>\% impute_zero()
115 | nacars \%>\% impute_zero( mpg, cyl )
116 | nacars \%>\% impute_zero( "mpg", "cyl" )
117 | nacars \%>\% impute_zero( c("mpg","cyl") )
118 | nacars \%>\% impute_zero( 1:2 )
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/man/make_impute.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/make_impute.R
3 | \name{make_impute}
4 | \alias{make_impute}
5 | \alias{make_impute_at}
6 | \alias{make_impute_all}
7 | \alias{make_impute_if}
8 | \alias{make_imputes}
9 | \title{Imputation metaprogramming}
10 | \usage{
11 | make_impute(fun)
12 |
13 | make_impute_at(fun)
14 |
15 | make_impute_all(fun)
16 |
17 | make_impute_if(fun)
18 |
19 | make_imputes(.na)
20 | }
21 | \arguments{
22 | \item{fun}{value or function used for imputing. See Details.}
23 | }
24 | \description{
25 | Create functions that use a function for imputing
26 | }
27 | \details{
28 | These functions make mutate-style impute functions using \code{\link[=impute]{impute()}} and the
29 | supplied \code{.na} argument.
30 |
31 | \code{make_imputes} is a wrapper around the other functions and returns each as
32 | a list.
33 | }
34 | \keyword{internal}
35 |
--------------------------------------------------------------------------------
/man/na_predict.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/na_predict.R
3 | \name{na_predict}
4 | \alias{na_predict}
5 | \title{na_predict}
6 | \usage{
7 | na_predict(x, object, data = x)
8 | }
9 | \arguments{
10 | \item{x}{data}
11 |
12 | \item{object}{object with predict method}
13 |
14 | \item{data}{data object}
15 | }
16 | \description{
17 | replace \code{NA} values by predictions of a model
18 | }
19 |
--------------------------------------------------------------------------------
/man/nacars.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{nacars}
5 | \alias{nacars}
6 | \alias{nacars_dt}
7 | \alias{nairis}
8 | \alias{nairis_dt}
9 | \title{data with missing values}
10 | \format{An object of class \code{data.frame} with 6 rows and 11 columns.}
11 | \usage{
12 | nacars
13 | }
14 | \description{
15 | data with missing values
16 | }
17 | \details{
18 | \strong{cars} and \strong{iris} data sets with missing data for demonstration purposes.
19 | }
20 | \keyword{datasets}
21 |
--------------------------------------------------------------------------------
/man/ns_export.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{ns_export}
4 | \alias{ns_export}
5 | \alias{ns_export_function_list}
6 | \title{Export functions in list}
7 | \usage{
8 | ns_export(nms)
9 |
10 | ns_export_function_list(x)
11 | }
12 | \arguments{
13 | \item{nms}{character list of name to export}
14 |
15 | \item{x}{list; named list of functions to export}
16 | }
17 | \description{
18 | Export functions in list
19 | }
20 | \details{
21 | This uses the \code{@evalNamespace} directive to declare exports for objects by
22 | name or when provided in a function.
23 |
24 | ns_export <- function(nms) {
25 | sprintf("export(%s)", paste(nms, collapse = ","))
26 | }
27 |
28 | ns_export_named <-function(x) {
29 | nms <- names(x)
30 | sprintf("export(%s)", paste(nms, collapse = ","))
31 | }
32 |
33 | #' @evalNamespace ns_export(names(fns))
34 | }
35 | \seealso{
36 | \itemize{
37 | \item \code{\link[=assign_these]{assign_these()}}
38 | }
39 | }
40 | \keyword{internal}
41 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(tidyimpute)
3 |
4 | test_check("tidyimpute")
5 |
--------------------------------------------------------------------------------
/tests/testthat/test-impute.R.off:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 |
3 | context("na_explicit")
4 |
5 |
6 | context( ".. data.frame")
7 | test_that( "na_explicit.data.frame", {
8 |
9 | data(iris)
10 | iris <- head(iris)
11 | iris[3,] <- NA
12 |
13 | iris %>% na_explicit(3)
14 |
15 |
16 | })
17 |
18 |
19 |
20 |
21 |
22 | ## NOTE:
23 | ## since na_explicit extends na_explicit ... the following test are just copied
24 | ## from test-na_explicit.R with the na_explicit => na_explicit
25 |
26 | context(".. continous-integer")
27 | test_that("na_explicit-continuous-integer", {
28 |
29 | # REPLACE with scalar
30 | v <- 1:3
31 | v[2] <- NA_integer_
32 |
33 | v %>% na_explicit(0) %T>%
34 | expect_is("integer") %>%
35 | expect_equal( c(1,0,3) )
36 |
37 | v %>% na_explicit(2) %T>%
38 | expect_is("integer") %>%
39 | expect_equal( c(1,2,3) )
40 |
41 | v <- 1:4
42 | v[ c(2,4) ] <- NA_integer_
43 | v %>% na_explicit(2) %T>%
44 | expect_is("integer") %>%
45 | expect_equal( c(1,2,3,2) ) # 1 2 3 2
46 |
47 | # REPLACE with VECTOR
48 | v %>% na_explicit(1:4) %T>%
49 | expect_is("integer") %>%
50 | expect_equal(1:4) # 1 2 3 4
51 |
52 | # REPLACE by non-consistent class/type
53 | expect_error( v %>% na_explicit("a") )
54 |
55 | })
56 |
57 |
58 | context(".. continous-numeric")
59 | test_that("na_explicit-continuous-integer", {
60 |
61 | # REPLACE with scalar
62 | v <- 1:3 %>% as.numeric()
63 | v[2] <- NA_real_
64 |
65 | v %>% na_explicit(0) %T>%
66 | expect_is("numeric") %>%
67 | expect_equal( c(1,0,3) )
68 |
69 | v %>% na_explicit(2) %T>%
70 | expect_is("numeric") %>%
71 | expect_equal( c(1,2,3) )
72 |
73 | v <- 1:4
74 | v[ c(2,4) ] <- NA_real_
75 | v %>% na_explicit(2) %T>%
76 | expect_is("numeric") %>%
77 | expect_equal( c(1,2,3,2) ) # 1 2 3 2
78 |
79 | # REPLACE with VECTOR
80 | v %>% na_explicit(1:4) %T>%
81 | expect_is("numeric") %>%
82 | expect_equal(1:4) # 1 2 3 4
83 |
84 | # REPLACE with non-consistent class/type
85 | expect_error( v %>% na_explicit("a") )
86 |
87 | })
88 |
89 |
90 |
91 | context(".. character")
92 | test_that("na_explicit-character", {
93 |
94 | # REPLACE with SCALAR
95 | v <- letters[1:4]
96 | v[c(2,4)] <- NA_character_
97 |
98 | v %>% na_explicit("x") %T>%
99 | expect_is("character") %>%
100 | expect_equivalent( c("a","x","c","x"))
101 |
102 | # REPLACE with VECTOR
103 | v %>% na_explicit(letters[1:4]) %T>%
104 | expect_is("character") %>%
105 | expect_equivalent( letters[1:4] )
106 |
107 | # REPLACE by DEFAULT
108 | v %>% na_explicit() %T>%
109 | expect_is("character") %>%
110 | expect_equivalent( c("a","(NA)","c","(NA)"))
111 |
112 |
113 | # REPLACE with non-consistent class
114 | v %>% na_explicit(1L) %T>%
115 | expect_is("character") %>%
116 | expect_equivalent( c("a","1","c","1") )
117 |
118 |
119 | })
120 |
121 |
122 | context( ".. factor")
123 | test_that( "na_explicit factor", {
124 |
125 | fct <- letters[1:5]
126 | fct[c(2,5)] <- NA
127 | fct <- as.factor(fct)
128 |
129 | # REPLACE with scalar
130 | ans <- factor( qw(a, z, c, d, z) )
131 | fct %>% na_explicit("z") %T>%
132 | expect_is("factor") %T>%
133 | { . %>% levels %>% expect_equal(levels(ans)) } %>%
134 | expect_equivalent( as.factor(ans) )
135 |
136 | # REPLACE with vectoe
137 | ans <- factor( letters[1:5], levels=qw(a,c,d,b,e) )
138 | fct %>% na_explicit(letters[1:5]) %T>%
139 | expect_is("factor") %T>%
140 | { . %>% levels %>% expect_equal(levels(ans)) } %T>%
141 | expect_equivalent(ans)
142 |
143 | # REPALCE with default
144 | ans <- factor( qw(a,(NA),c,d,(NA)), levels=qw(a,c,d,(NA)) )
145 | fct %>% na_explicit() %T>%
146 | expect_is("factor") %T>%
147 | { . %>% levels %>% expect_equal( levels(ans) ) } %>%
148 | expect_equivalent(ans)
149 |
150 | })
151 |
--------------------------------------------------------------------------------
/tests/testthat/test-impute_.R:
--------------------------------------------------------------------------------
1 | context("impute_ ")
2 |
3 | # Just here to make the tests look pretty.
--------------------------------------------------------------------------------
/tests/testthat/test-impute_max.R:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 |
3 |
4 | data("nairis")
5 |
6 | context( ".. impute_max")
7 | test_that("impute_max", {
8 |
9 | ans <- c(5.4, 3.9, 1.7, 0.4)
10 |
11 | res <- nairis %>% impute_max(1:4)
12 | res %>% .[3,1:4] %>% expect_equivalent(ans)
13 |
14 | res <- nairis %>% impute_max_at( .vars=c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") )
15 | res %>% .[3,1:4] %>% expect_equivalent(ans)
16 |
17 | expect_error(res <- nairis %>% impute_max_all)
18 | # res %>% .[3,1:4] %>% expect_equivalent(ans)
19 |
20 | nairis %>% impute_max_if(is.numeric)
21 | res %>% .[3,1:4] %>% expect_equivalent(ans)
22 |
23 |
24 | })
25 |
26 |
--------------------------------------------------------------------------------
/tests/testthat/test-impute_mean.R:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 |
3 |
4 | data("nairis")
5 |
6 | context( ".. impute_mean")
7 | test_that("impute_mean", {
8 |
9 | ans <- c(5,3.42,1.48,0.24)
10 |
11 | res <- nairis %>% impute_mean(1:4)
12 | res %>% .[3,1:4] %>% expect_equivalent(ans)
13 |
14 | res <- nairis %>% impute_mean_at( .vars=c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") )
15 | res %>% .[3,1:4] %>% expect_equivalent(ans)
16 |
17 | expect_warning(res <- nairis %>% impute_mean_all)
18 | res %>% .[3,1:4] %>% expect_equivalent(ans)
19 |
20 | nairis %>% impute_mean_if(is.numeric)
21 | res %>% .[3,1:4] %>% expect_equivalent(ans)
22 |
23 |
24 | })
25 |
26 |
--------------------------------------------------------------------------------
/tests/testthat/test-impute_median.R:
--------------------------------------------------------------------------------
1 |
2 | data("nairis")
3 |
4 | context( ".. impute_median")
5 | test_that("impute_median", {
6 |
7 | ans <- c(5, 3.5, 1.4, 0.2)
8 |
9 | res <- nairis %>% impute_median(1:4)
10 | res %>% .[3,1:4] %>% expect_equivalent(ans)
11 |
12 | res <- nairis %>% impute_median_at( .vars=c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") )
13 | res %>% .[3,1:4] %>% expect_equivalent(ans)
14 |
15 | expect_error(res <- nairis %>% impute_median_all)
16 | # res %>% .[3,1:4] %>% expect_equivalent(ans)
17 |
18 | nairis %>% impute_median_if(is.numeric)
19 | res %>% .[3,1:4] %>% expect_equivalent(ans)
20 |
21 |
22 | })
23 |
24 |
--------------------------------------------------------------------------------
/tests/testthat/test-impute_mode.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(magrittr)
3 | library(tidyimpute)
4 |
5 | data("nairis")
6 |
7 | context( ".. impute_mode")
8 | test_that("impute_mode", {
9 |
10 | ans <- c(4.6,3,1.4,0.2)
11 |
12 | res <- nairis %>% impute_mode(1:4)
13 | res %>% .[3,1:4] %>% expect_equivalent(ans)
14 |
15 | res <- nairis %>% impute_mode_at( .vars=c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") )
16 | res %>% .[3,1:4] %>% expect_equivalent(ans)
17 |
18 | expect_error(res <- nairis %>% impute_mode_all)
19 | res %>% .[3,1:4] %>% expect_equivalent(ans)
20 |
21 | nairis %>% impute_mode_if(is.numeric)
22 | res %>% .[3,1:4] %>% expect_equivalent(ans)
23 |
24 |
25 | })
26 |
27 |
--------------------------------------------------------------------------------
/tests/testthat/test-impute_zero.R:
--------------------------------------------------------------------------------
1 | library(magrittr)
2 |
3 |
4 | data("nairis")
5 |
6 | context( ".. impute_zero")
7 | test_that("impute_zero", {
8 |
9 | ans <- rep(0.0,4)
10 |
11 | res <- nairis %>% impute_zero(1:4)
12 | res %>% .[3,1:4] %>% expect_equivalent(ans)
13 |
14 | res <- nairis %>% impute_zero_at( .vars=c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") )
15 | res %>% .[3,1:4] %>% expect_equivalent(ans)
16 |
17 | # expect_warning(
18 | res <- nairis %>% impute_zero_all
19 | #)
20 | res %>% .[3,1:4] %>% expect_equivalent(ans)
21 |
22 | nairis %>% impute_zero_if(is.numeric)
23 | res %>% .[3,1:4] %>% expect_equivalent(ans)
24 |
25 |
26 | })
27 |
28 |
--------------------------------------------------------------------------------
/tidyimpute.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | BuildType: Package
16 | PackageInstallArgs: --no-multiarch --with-keep.source
17 |
--------------------------------------------------------------------------------