├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── check-standard.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── R
    └── hablar.R
├── README.Rmd
├── README.md
├── cran-comments.md
├── hablar.Rproj
├── man
    ├── aggregators.Rd
    ├── as_reliable.Rd
    ├── check_df.Rd
    ├── convert.Rd
    ├── could_this_be_that.Rd
    ├── create_dummy.Rd
    ├── cumulative_.Rd
    ├── find_in_df.Rd
    ├── given.Rd
    ├── if_else_.Rd
    ├── math.Rd
    ├── n_unique.Rd
    ├── rationalize.Rd
    ├── repeat_df.Rd
    ├── replacers.Rd
    ├── retype.Rd
    ├── s.Rd
    ├── set_wd_to_script_path.Rd
    └── this_date.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── .DS_Store
    │   ├── test.check_df.R
    │   ├── test.convert.R
    │   ├── test.could_this_be_that.R
    │   ├── test.create_dummy.R
    │   ├── test.cumulative.R
    │   ├── test.find_in_df.R
    │   ├── test.ifs.R
    │   ├── test.math.R
    │   ├── test.n_unique.R
    │   ├── test.rationalize.R
    │   ├── test.retype.R
    │   ├── test.s.R
    │   └── test.session_funs.R
└── vignettes
    ├── .DS_Store
    ├── .gitignore
    ├── convert.Rmd
    ├── hablar.Rmd
    ├── retype.Rmd
    └── s.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.travis\.yml$
 4 | ^README\.Rmd$
 5 | ^README-.*\.png$
 6 | ^cran-comments.md
 7 | ^LICENSE\.md$
 8 | NEWS.md
 9 | ^\.github$
10 | ^cran-comments\.md$
11 | ^CRAN-SUBMISSION$
12 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 |       R_KEEP_PKG_SOURCE: yes
17 |     steps:
18 |       - uses: actions/checkout@v3
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::rcmdcheck
27 |           needs: check
28 | 
29 |       - uses: r-lib/actions/check-r-package@v2
30 | 


--------------------------------------------------------------------------------
/.github/workflows/check-standard.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: hablar
 2 | Type: Package
 3 | Title: Non-Astonishing Results in R
 4 | Version: 0.3.2
 5 | Author: David Sjoberg
 6 | Maintainer: David Sjoberg <dav.sjob@gmail.com>
 7 | Description: Simple tools for converting columns to new data types. Intuitive functions for columns with missing values. 
 8 | License: MIT + file LICENSE
 9 | URL: https://davidsjoberg.github.io/
10 | BugReports: https://github.com/davidsjoberg/hablar/issues
11 | Encoding: UTF-8
12 | RoxygenNote: 7.1.2
13 | Imports: 
14 |     dplyr (>= 0.8.0),
15 |     purrr,
16 |     lubridate
17 | Suggests: 
18 |     testthat,
19 |     knitr,
20 |     rmarkdown,
21 |     webshot,
22 |     gapminder,
23 |     DiagrammeR,
24 |     rstudioapi
25 | VignetteBuilder: knitr
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2022
2 | COPYRIGHT HOLDER: David Sjoberg
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2022 David Sjoberg
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(rationalize,data.frame)
 4 | S3method(rationalize,default)
 5 | S3method(rationalize,numeric)
 6 | S3method(retype,Date)
 7 | S3method(retype,POSIXct)
 8 | S3method(retype,data.frame)
 9 | S3method(retype,default)
10 | S3method(retype,integer)
11 | S3method(retype,list)
12 | S3method(retype,logical)
13 | S3method(retype,numeric)
14 | export("%minus_%")
15 | export("%plus_%")
16 | export()
17 | export(as_reliable_dte)
18 | export(as_reliable_dtm)
19 | export(as_reliable_int)
20 | export(as_reliable_lgl)
21 | export(as_reliable_num)
22 | export(check_complete_set)
23 | export(check_duplicates)
24 | export(check_inf)
25 | export(check_irrational)
26 | export(check_na)
27 | export(check_nan)
28 | export(chr)
29 | export(convert)
30 | export(could_chr_be_dte)
31 | export(could_chr_be_dtm)
32 | export(could_chr_be_int)
33 | export(could_chr_be_num)
34 | export(could_num_be_int)
35 | export(cum_unique)
36 | export(cum_unique_)
37 | export(cummean_)
38 | export(cumsum_)
39 | export(dbl)
40 | export(dte)
41 | export(dtm)
42 | export(dummy)
43 | export(dummy_)
44 | export(fct)
45 | export(find_duplicates)
46 | export(find_inf)
47 | export(find_irrational)
48 | export(find_na)
49 | export(find_nan)
50 | export(first_)
51 | export(first_non_na)
52 | export(given)
53 | export(given_)
54 | export(if_else_)
55 | export(if_inf)
56 | export(if_na)
57 | export(if_nan)
58 | export(if_not_na)
59 | export(if_zero)
60 | export(inf_if)
61 | export(int)
62 | export(last_)
63 | export(lgl)
64 | export(max_)
65 | export(mean_)
66 | export(median_)
67 | export(min_)
68 | export(n_unique)
69 | export(n_unique_)
70 | export(na_if)
71 | export(nan_if)
72 | export(num)
73 | export(rationalize)
74 | export(repeat_df)
75 | export(retype)
76 | export(s)
77 | export(sd_)
78 | export(set_wd_to_script_path)
79 | export(squeeze)
80 | export(squeeze_)
81 | export(sum_)
82 | export(this_day)
83 | export(this_month)
84 | export(this_year)
85 | export(var_)
86 | export(zero_if)
87 | importFrom(dplyr,"%>%")
88 | importFrom(stats,median)
89 | importFrom(stats,na.omit)
90 | importFrom(stats,sd)
91 | importFrom(stats,var)
92 | importFrom(utils,install.packages)
93 | importFrom(utils,installed.packages)
94 | 


--------------------------------------------------------------------------------
/R/hablar.R:
--------------------------------------------------------------------------------
   1 | #' @importFrom dplyr %>% 
   2 | #' @importFrom stats median sd var na.omit
   3 | #' @importFrom utils install.packages installed.packages
   4 | #'
   5 | #' @export
   6 | #' 
   7 | ## quiets concerns of R CMD check re: the .'s that appear in pipelines
   8 | if(getRversion() >= "2.15.1") utils::globalVariables(c("."))
   9 | requireNamespace("rstudioapi", quietly = TRUE)
  10 | 
  11 | # could_this_be_that -----------------------------------------------------------
  12 |  
  13 | #' @title Tests is a vector could be of another data type
  14 | #' @name could_this_be_that
  15 | #' @aliases  could_chr_be_num
  16 | #' @aliases  could_chr_be_int
  17 | #' @aliases  could_num_be_int
  18 | #' @aliases  could_chr_be_dtm
  19 | #' @aliases  could_dtm_be_dte
  20 | #'
  21 | #' @description
  22 | #' Tests if vector could be a another data type without errors.
  23 | #'
  24 | #' @param .x vector of the data type that should be tested.
  25 | #'
  26 | #' @details The name logic of \code{could_chr_be_num} should be interpreted as:
  27 | #' Could this character vector be a numeric vector?
  28 | #' The same logic goes for all functions named could_this_be_that.
  29 | #'
  30 | #' @return TRUE or FALSE
  31 | #' 
  32 | #' @seealso \code{vignette("s")}, \code{vignette("hablar")}
  33 | #'
  34 | #' @examples
  35 | #' x <- c("1", "3", "7")
  36 | #' could_chr_be_num(x)
  37 | #' could_chr_be_int(x)
  38 | #'
  39 | #' x <- c("abc", "3", "Hello world")
  40 | #' could_chr_be_num(x)
  41 | #'
  42 | #' x <- c(NA, "3.45", "5,98")
  43 | #' could_chr_be_num(x)
  44 | #' could_chr_be_int(x)
  45 | #'
  46 | #' x <- as.numeric(c(3.45, 1.5))
  47 | #' could_num_be_int(x)
  48 | #'
  49 | #' x <- as.numeric(c(7, 2))
  50 | #' could_num_be_int(x)
  51 | #'
  52 | #' @rdname could_this_be_that
  53 | #' @export
  54 | could_chr_be_num <- function(.x) {
  55 |   if(!is.character(.x)) {
  56 |     stop("Only works with character vectors")}
  57 |   if(all(is.na(.x)) | length(.x) == 0) {
  58 |     return(FALSE)}
  59 |   .test <- tryCatch(as.numeric(.x),
  60 |                     error=function(e) e,
  61 |                     warning=function(w) w)
  62 |   if(any(attributes(.test)$class == "warning")) {
  63 |     return(FALSE)
  64 |   } else {TRUE}
  65 | }
  66 | 
  67 | #' @rdname could_this_be_that
  68 | #' @export
  69 | could_chr_be_int <- function(.x) {
  70 |   if(could_chr_be_num(.x) != TRUE) {
  71 |     return(FALSE)
  72 |   }
  73 |   if(all(is.na(.x)) | length(.x) == 0) {
  74 |     return(FALSE)}
  75 |   .x <- as.numeric(.x)
  76 |   if(all(is.na(.x)) | length(.x) == 0 | any(.x > .Machine$integer.max, na.rm=T)) {
  77 |     return(FALSE)}
  78 |   ifelse(all(.x[!is.na(.x)] == as.integer(.x[!is.na(.x)])), TRUE, FALSE)
  79 | }
  80 | 
  81 | #' @rdname could_this_be_that
  82 | #' @export
  83 | could_num_be_int <- function(.x) {
  84 |   if(!is.numeric(.x)) {
  85 |     stop("Only works with numeric vectors")}
  86 |   if(all(is.na(.x)) | length(.x) == 0 | any(is.nan(.x) | any(is.infinite(.x))) | any(.x > .Machine$integer.max, na.rm=T)) {
  87 |     return(FALSE)}
  88 |   ifelse(all(.x[!is.na(.x)] == as.integer(.x[!is.na(.x)])), TRUE, FALSE)
  89 | }
  90 | 
  91 | #' @rdname could_this_be_that
  92 | #' @export
  93 | could_chr_be_dtm <- function(.x) {
  94 |   if(!is.character(.x)) {
  95 |     stop("Only works with character vectors")}
  96 |   if(all(is.na(.x)) | length(.x) == 0) {
  97 |     return(FALSE)}
  98 |   res <- try(as.POSIXct(.x), silent = TRUE)
  99 |   test <- ifelse(all(class(res) != "try-error") == TRUE, TRUE, FALSE)
 100 |   if(test) {
 101 |     if(all(strftime(res, format="%H:%M:%S") %in% c("00:00:00", NA_character_))) {
 102 |       return(FALSE)
 103 |     } else {
 104 |       return(TRUE)
 105 |     }
 106 |   } else {
 107 |     return(FALSE)
 108 |   }
 109 | }
 110 | 
 111 | #' @rdname could_this_be_that
 112 | #' @export
 113 | could_chr_be_dte <- function(.x) {
 114 |   if(!is.character(.x)) {
 115 |     stop("Only works with character vectors")}
 116 |   if(all(is.na(.x)) | length(.x) == 0) {
 117 |     return(FALSE)}
 118 |   .test <- tryCatch(as.Date(.x),
 119 |                     error=function(e) e,
 120 |                     warning=function(w) w)
 121 |   if(any(attributes(.test)$class %in% c("warning", "error"))) {
 122 |     return(FALSE)
 123 |   } else {TRUE}
 124 | }
 125 | 
 126 | 
 127 | # S3 rationalize ---------------------------------------------------------------
 128 | 
 129 | 
 130 | #' @title Only allow rational values in numeric vectors
 131 | #'
 132 | #' \code{rationalize} transforms all numeric elements to be rational values or NA,
 133 | #' thus removes all \code{NaN,Inf} and replaces them with \code{NA}.
 134 | #'
 135 | #' @param .x vector or data.frame
 136 | #' @param ... columns to be evaluated. Only applicable if .x is a data frame.
 137 | #'
 138 | #' @return For vectors: same data type/class as .x.
 139 | #' @return For data.frame: a tbl data frame.
 140 | #' 
 141 | #' @details #' If a non-numeric vector is passed, it is unchanged. If a data.frame is
 142 | #' passed, it evaluates all columns separately.
 143 | #'
 144 | #' @seealso \code{\link{s}}, \code{\link{rationalize}}, \code{vignette("s")}, \code{vignette("hablar")}
 145 | #'
 146 | #' @examples
 147 | #' x <- c(3, -Inf, 6.56, 9.3, NaN, 5, -Inf)
 148 | #' rationalize(x)
 149 | #'
 150 | #' df <- data.frame(num_col = c(Inf, 3, NaN), 
 151 | #'                  chr_col = c("a", "b", "c"), 
 152 | #'                  stringsAsFactors = FALSE)
 153 | #' df
 154 | #' rationalize(df)
 155 | #'
 156 | #' @rdname rationalize
 157 | #' @export
 158 | 
 159 | rationalize <- function(.x, ...) {
 160 |   UseMethod("rationalize")
 161 | }
 162 | 
 163 | 
 164 | #' @return \code{NULL}
 165 | #'
 166 | #' @rdname rationalize
 167 | #' @method rationalize default
 168 | #' @export
 169 | 
 170 | rationalize.default <- function(.x, ...) {
 171 |   .x
 172 | }
 173 | 
 174 | 
 175 | #' @return \code{NULL}
 176 | #'
 177 | #' @rdname rationalize
 178 | #' @method rationalize numeric
 179 | #' @export
 180 | 
 181 | rationalize.numeric <- function(.x, ...) {
 182 |   .x[is.infinite(.x)] <- NA
 183 |   .x[is.nan(.x)] <- NA
 184 |   return(.x)
 185 | }
 186 | 
 187 | 
 188 | #' @return \code{NULL}
 189 | #'
 190 | #' @rdname rationalize
 191 | #' @method rationalize data.frame
 192 | #' @export
 193 | 
 194 | rationalize.data.frame <- function(.x, ...) {
 195 |   if(length(dplyr::quos(...)) == 0){
 196 |     .vars <- numeric(0)
 197 |   } else {
 198 |     .vars <- dplyr::quos(...)
 199 |   }
 200 | 
 201 |   if(length(.vars) != 0){
 202 |     .x <- .x %>%
 203 |       dplyr::mutate_at(dplyr::quos(!!!.vars),
 204 |                 ~rationalize(.))
 205 |   } else {
 206 |     .x <- .x %>%
 207 |       dplyr::mutate_at(dplyr::quos(dplyr::everything()),
 208 |                 ~rationalize(.))
 209 |   }
 210 |   return(.x)
 211 | }
 212 | 
 213 | 
 214 | # S3 retype --------------------------------------------------------------------
 215 | 
 216 | #' Return simple data types
 217 | #'
 218 | #' \code{retype} transforms all elements into simple classes. The simple classes
 219 | #' are date, numeric and character. By transforming all elements to these
 220 | #' classes no information is lost, while simplifying the object. See details below for
 221 | #' more information or type \code{vignette("retype")} in the console.
 222 | #'
 223 | #' @param .x vector or data.frame
 224 | #' @param ... column names to be evaluated. Only if .x is a data frame.
 225 | #'
 226 | #' @return For vectors: simple class of .x.
 227 | #' @return For data.frame: a tbl data frame with simple classes.
 228 | #'
 229 | #' @seealso \code{\link{s}}, \code{\link{rationalize}} #' \code{vignette("retype")}, \code{vignette("s")}, \code{vignette("hablar")}
 230 | #'
 231 | #' @examples
 232 | #' # Dates
 233 | #' dte <- as.Date(c("2018-01-01", "2016-03-21", "1970-01-05"))
 234 | #' retype(dte)
 235 | #' retype(dte)
 236 | #'
 237 | #' # Factors
 238 | #' fct <- as.factor(c("good", "bad", "average"))
 239 | #' retype(dte)
 240 | #'
 241 | #' # Character that only contains numeric elements
 242 | #' num_chr <- c("3","4.0", "3,5")
 243 | #' retype(num_chr)
 244 | #'
 245 | #' # Logical
 246 | #' lgl <- c(TRUE, FALSE, TRUE)
 247 | #' retype(lgl)
 248 | #'
 249 | #' # Data frame with all the above vectors
 250 | #' df <- data.frame(dte = dte, 
 251 | #'                  fct = fct, 
 252 | #'                  num_chr = num_chr, 
 253 | #'                  lgl = lgl, 
 254 | #'                  stringsAsFactos = FALSE)
 255 | #' df
 256 | #' retype(df)
 257 | #'
 258 | #' @details Each vector past to \code{retype} is reclassified into the highest position in
 259 | #' a simplification hierarchy without loosing any information. This means that:
 260 | #' Factors are converted to characters.
 261 | #' However, character vectors (or vectors changed to character initially)
 262 | #' are checked to see if they could be a numeric vector without error.
 263 | #' If so, it is transformed into a numeric vector which is higher in the hierarchy.
 264 | #' Vectors of class logical, integer are changed to numerical.
 265 | #' Dates and date time (POSIXct) goes through the same procedure.
 266 | #' Lists and complex vectors are left unchanged because the are neither simple nor complicated.
 267 | #'
 268 | #' @rdname retype
 269 | #' @export
 270 | 
 271 | # S3 retype
 272 | retype<- function(.x, ...) {
 273 |   UseMethod("retype")
 274 | }
 275 | 
 276 | #' @return \code{NULL}
 277 | #'
 278 | #' @rdname retype
 279 | #' @method retype default
 280 | #' @export
 281 | retype.default <- function(.x, ...) {
 282 |   .x <- as.character(.x)
 283 | 
 284 |   # Numericals
 285 |   if(could_chr_be_num(.x) == TRUE) {
 286 |     .x <- as.numeric(gsub(",", ".", .x))
 287 |     if(could_num_be_int(.x) == TRUE) {
 288 |       return(as.integer(.x))
 289 |     } else {
 290 |       return(.x)
 291 |     }
 292 |     }
 293 | 
 294 |   # Dates
 295 |   if(could_chr_be_dtm(.x) == TRUE) {
 296 |       return(as.POSIXct(.x))
 297 |   }
 298 |   if (could_chr_be_dte(.x) == TRUE) {
 299 |     return(as.Date(.x))
 300 |   } else {
 301 |     return(.x)
 302 |   }
 303 | }
 304 | 
 305 | #' @return \code{NULL}
 306 | #'
 307 | #' @rdname retype
 308 | #' @method retype logical
 309 | #' @export
 310 | retype.logical <- function(.x, ...) {
 311 |   as.integer(.x)
 312 | }
 313 | 
 314 | #' @return \code{NULL}
 315 | #'
 316 | #' @rdname retype
 317 | #' @method retype integer
 318 | #' @export
 319 | retype.integer <- function(.x, ...) {
 320 |   .x
 321 | }
 322 | 
 323 | #' @return \code{NULL}
 324 | #'
 325 | #' @rdname retype
 326 | #' @method retype Date
 327 | #' @export
 328 | retype.Date <- function(.x, ...) {
 329 |   .x
 330 | }
 331 | 
 332 | #' @return \code{NULL}
 333 | #'
 334 | #' @rdname retype
 335 | #' @method retype POSIXct
 336 | #' @export
 337 | retype.POSIXct <- function(.x, ...) {
 338 |   .x
 339 | }
 340 | 
 341 | #' @return \code{NULL}
 342 | #'
 343 | #' @rdname retype
 344 | #' @method retype numeric
 345 | #' @export
 346 | retype.numeric <- function(.x, ...) {
 347 |   if(could_num_be_int(.x) == TRUE) {
 348 |     .x <- as.integer(.x)
 349 |   }
 350 |   return(.x)
 351 | }
 352 | 
 353 | #' @return \code{NULL}
 354 | #'
 355 | #' @rdname retype
 356 | #' @method retype list
 357 | #' @export
 358 | retype.list <- function(.x, ...) {
 359 |   .x
 360 | }
 361 | 
 362 | #' @return \code{NULL}
 363 | #'
 364 | #' @rdname retype
 365 | #' @method retype data.frame
 366 | #' @export
 367 | retype.data.frame <- function(.x, ...) {
 368 |   if(length(dplyr::quos(...)) == 0){
 369 |     .vars <- numeric(0)
 370 |   } else {
 371 |     .vars <- dplyr::quos(...)
 372 |   }
 373 | 
 374 |   if(length(.vars) != 0){
 375 |     .x <- .x %>%
 376 |       dplyr::mutate_at(dplyr::vars(!!!.vars),
 377 |                 ~retype(.))
 378 |   } else {
 379 |     .x <- .x %>%
 380 |       dplyr::mutate_all(retype)
 381 |   }
 382 |   return(.x)
 383 | }
 384 | 
 385 | 
 386 | 
 387 | # as_reliable_[data type] ------------------------------------------------------
 388 | 
 389 | #' @title Reliable conversion to another data type
 390 | #' @name as_reliable
 391 | #' @aliases as_reliable_num
 392 | #' @aliases as_reliable_int
 393 | #' @aliases as_reliable_lgl
 394 | #'
 395 | #' @description
 396 | #' Support functions for the \code{convert} function. These functions coerces vectors to a new data type, e.g. \code{as.numeric}
 397 | #' except that it converts factors to character first.
 398 | #' See \code{\link{convert}} for more information.
 399 | #'
 400 | #' @usage as_reliable_num(.x, ...)
 401 | #'
 402 | #' as_reliable_int(.x, ...)
 403 | #'
 404 | #' as_reliable_lgl(.x, ...)
 405 | #'
 406 | #' as_reliable_dte(.x, origin = "1970-01-01", ...)
 407 | #'
 408 | #' as_reliable_dtm(.x, origin = "1970-01-01", tz = "UTC", ...)
 409 | #'
 410 | #' @param .x vector
 411 | #' @param origin argument to set origin for date/date time.
 412 | #' @param tz argument to set time zone for date/date time. Default is UTC.
 413 | #' @param ... additional arguments
 414 | #'
 415 | #' @return vector
 416 | #' 
 417 | #' @seealso \code{vignette("convert")}, \code{vignette("hablar")}
 418 | #'
 419 | #' @examples
 420 | #' x <- as.factor(c("1", "3.5"))
 421 | #' as_reliable_num(x)
 422 | #'
 423 | #' x <- as.factor(c("9", "7"))
 424 | #' as_reliable_int(x)
 425 | #'
 426 | #' x <- as.factor(c("1", "0"))
 427 | #' as_reliable_lgl(x)
 428 | #'
 429 | #' @rdname as_reliable
 430 | #' @export
 431 | 
 432 | as_reliable_num <- function(.x, ...) {
 433 |   if(is.factor(.x)) {
 434 |     return(as.numeric(as.character(.x), ...))}
 435 |   if(TRUE) {
 436 |     return(as.numeric(.x, ...))}
 437 | }
 438 | 
 439 | #' @rdname as_reliable
 440 | #' @export
 441 | as_reliable_int <- function(.x, ...) {
 442 |   if(is.factor(.x)) {
 443 |     return(as.integer(as.character(.x), ...))}
 444 |   if(TRUE) {
 445 |     return(as.integer(.x, ...))}
 446 | }
 447 | 
 448 | 
 449 | #' @rdname as_reliable
 450 | #' @export
 451 | as_reliable_lgl <- function(.x, ...) {
 452 |   if(is.logical(.x)) {
 453 |     return(.x)}
 454 |   if(any(class(.x) %in% c("POSIXct", "Date"))) {
 455 |     stop("Date and Date-time vectors can't be converted to logical.")}
 456 |   if(is.factor(.x)) {
 457 |     .x <- as.character(.x)}
 458 |   if(is.character(.x)){
 459 |     if(could_chr_be_int(.x)) {
 460 |       return(as.logical(as.integer(.x)))}
 461 |   }
 462 |   if(TRUE) {
 463 |     return(as.logical(.x, ...))}
 464 | }
 465 | 
 466 | 
 467 | 
 468 | #' @rdname as_reliable
 469 | #' @export
 470 | as_reliable_dte <- function(.x, origin = "1970-01-01", ...) {
 471 |   if(any(class(.x) == "Date")) {
 472 |     return(.x)}
 473 |   if(is.logical(.x)) {
 474 |     stop("Logical vectors can't be converted to date.")}
 475 |   if(is.factor(.x)) {
 476 |     .x <- as.character(.x)}
 477 |   if(any(class(.x) == "POSIXct")) {
 478 |     .x <- strftime(.x)}
 479 |   if(TRUE) {
 480 |     return(as.Date(.x, origin = origin, ...))}
 481 | }
 482 | 
 483 | 
 484 | #' @rdname as_reliable
 485 | #' @export
 486 | as_reliable_dtm <- function(.x, origin = "1970-01-01", tz = "UTC", ...) {
 487 |   if(any(class(.x) == "POSIXct")) {
 488 |     return(.x)}
 489 |   if(is.logical(.x)) {
 490 |     stop("Logical vectors can't be converted to date time.")}
 491 |   if(is.factor(.x)) {
 492 |     .x <- as.character(.x)}
 493 |   if(TRUE) {
 494 |     return(as.POSIXct(.x, origin = origin, tz = tz, ...))}
 495 | }
 496 | 
 497 | 
 498 | 
 499 | # convert ----------------------------------------------------------------------
 500 | 
 501 | #' @title Convert data type of columns
 502 | #' @name convert
 503 | #' @aliases convert
 504 | #' @aliases num
 505 | #' @aliases dbl
 506 | #' @aliases int
 507 | #' @aliases dte
 508 | #' @aliases dtm
 509 | #' @aliases fct
 510 | #' @aliases lgl
 511 | #'
 512 | #' \code{convert} converts columns to new classes through scoping functions. Always converts factors to
 513 | #' character before conversion Type \code{vignette("convert")} in the console for more information.
 514 | #'
 515 | #' @param .x A data.frame
 516 | #' @param ... Scoping functions, see details
 517 | #' @param .args extra argument to be passed to support function.
 518 | #'
 519 | #' @return a tbl data frame
 520 | #' 
 521 | #' @seealso \code{vignette("convert")}, \code{vignette("hablar")}
 522 | #'
 523 | #' @examples
 524 | #' \dontrun{
 525 | #' 
 526 | #' # Change one column to numeric and another to character
 527 | #' mtcars %>% 
 528 | #'   convert(num(gear),
 529 | #'           chr(mpg))
 530 | #'
 531 | #'
 532 | #' # Changing multiple data types on multiple columns
 533 | #' mtcars %>% 
 534 | #'   convert(int(hp,
 535 | #'               wt),
 536 | #'           fct(qsec,
 537 | #'               cyl,
 538 | #'               drat))
 539 | #' 
 540 | #' # Also works with tidyselect convenience functions
 541 | #' mtcars %>%
 542 | #'   convert(int(vs:carb),
 543 | #'           fct(last_col()))
 544 | #' 
 545 | #' }
 546 | #'
 547 | #' @rdname convert
 548 | #' @export
 549 | num <- function(..., .args = list()){
 550 |   list(vars = dplyr::quos(...),
 551 |        fun = ~as_reliable_num(., !!!.args))}
 552 | 
 553 | #' @rdname convert
 554 | #' @export
 555 | chr <- function(..., .args = list()){
 556 |   list(vars = dplyr::quos(...),
 557 |        fun = ~as.character(., !!!.args))}
 558 | 
 559 | #' @rdname convert
 560 | #' @export
 561 | lgl <- function(..., .args = list()){
 562 |   list(vars = dplyr::quos(...),
 563 |        fun = ~as_reliable_lgl(., !!!.args))}
 564 | 
 565 | #' @rdname convert
 566 | #' @export
 567 | int <- function(..., .args = list()){
 568 |   list(vars = dplyr::quos(...),
 569 |        fun = ~as_reliable_int(., !!!.args))}
 570 | 
 571 | #' @rdname convert
 572 | #' @export
 573 | dbl <- function(..., .args = list()){
 574 |   list(vars = dplyr::quos(...),
 575 |        fun = ~as_reliable_num(., !!!.args))}
 576 | 
 577 | #' @rdname convert
 578 | #' @export
 579 | fct <- function(..., .args = list()){
 580 |   list(vars = dplyr::quos(...),
 581 |        fun = ~factor(., !!!.args))}
 582 | 
 583 | #' @rdname convert
 584 | #' @export
 585 | dtm <- function(..., .args = list()){
 586 |   list(vars = dplyr::quos(...),
 587 |        fun = ~as_reliable_dtm(., !!!.args))}
 588 | 
 589 | #' @rdname convert
 590 | #' @export
 591 | dte <- function(..., .args = list()){
 592 |   list(vars = dplyr::quos(...),
 593 |        fun = ~as_reliable_dte(., !!!.args))}
 594 | 
 595 | #' @rdname convert
 596 | #' @export
 597 | convert <- function(.x, ...){
 598 |   if(!is.data.frame(.x)) {
 599 |     stop("convert only works with data frames.")
 600 |   }
 601 |   args <- list(...)
 602 |   
 603 |   for(i in seq_along(args)) {
 604 |     .vars <- args[[i]]$vars
 605 |     .fun  <- args[[i]]$fun
 606 |     .x <- .x %>% dplyr::mutate_at(dplyr::vars(!!!.vars), .fun)
 607 |   }
 608 |   return(.x)
 609 | }
 610 | 
 611 | 
 612 | # s ----------------------------------------------------------------------------
 613 | 
 614 | #' Make vector shorter and simpler
 615 | #'
 616 | #' \code{s} means simple and short. It removes all non-values, i.e. \code{NA,Inf,NaN}  from a vector.
 617 | #' However, if the length is 0 it returns NA.
 618 | #' It is useful in combination with summary functions, e.g. mean, sum or min, when
 619 | #' an answer is desired, if there is one in the data. In any other case NA is returned. 
 620 | #' Type \code{vignette("s")} in the console for more information.
 621 | #'
 622 | #' @param .x one vector. Does not work for factors.
 623 | #' @param ignore_na if TRUE then NA omitted from results, as long as any non-NA element is left.
 624 | #'
 625 | #' @return a shortened and simplified vector
 626 | #' 
 627 | #' @seealso \code{\link{retype}}, \code{\link{rationalize}}, \code{vignette("s")}, \code{vignette("hablar")}
 628 | #'
 629 | #' @examples
 630 | #' \dontrun{
 631 | #' library(dplyr)
 632 | #' 
 633 | #' ## s on a weird numeric vector
 634 | #' vector <- c(7, NaN, 6, -Inf, 5, 4, NA)
 635 | #' s(vector)
 636 | #'
 637 | #' ## Sum vector with non-rational values
 638 | #' vector <- c(7, NaN, -Inf, 4)
 639 | #' # Base R
 640 | #' sum(vector)
 641 | #' # With s
 642 | #' sum(s(vector))
 643 | #'
 644 | #' ## Max of vector with only NA
 645 | #' # Base R
 646 | #' max(vector, na.rm = TRUE)
 647 | #' # With s
 648 | #' max(s(vector))
 649 | #'
 650 | #' ## First of vector when NA is first element
 651 | #' vector <- c(NA, "X", "Y")
 652 | #' # dplyr R
 653 | #' first(vector)
 654 | #' # With s
 655 | #' first(s(vector))
 656 | #'
 657 | #' ## Use of s when NA should not be removes
 658 | #' vector <- c(7, Inf, NA, 4)
 659 | #' # Base R
 660 | #' sum(vector)
 661 | #' # With s
 662 | #' sum(s(vector, ignore_na = FALSE))
 663 | #'
 664 | #' ## s when summarizing a weird data.frame
 665 | #' df_test <- data.frame(a = c(NaN, 1, -Inf, 3), 
 666 | #'                       b = c(NA, "Q", "P", "P"), 
 667 | #'                       c = c(NA, NA, NA, NA), 
 668 | #'                       stringsAsFactors = FALSE) 
 669 | #' df_test
 670 | #' 
 671 | #' # Base R aggregation with dplyr's summarize
 672 | #' summarise(df_test, mean_a = mean(a), 
 673 | #'                    min_c = min(c, na.rm = TRUE))
 674 | #' # With s
 675 | #' summarise(df_test, mean_a = mean(s(a)), 
 676 | #'                    min_c = min(s(c)))
 677 | #' }
 678 | #' @rdname s
 679 | #' @export
 680 | 
 681 | s <- function(.x, ignore_na = TRUE) {
 682 |   if(is.factor(.x)){
 683 |     stop("s does not work with factors. Consider converting it into another data type with hablar::convert or hablar::retype.")
 684 |   }
 685 |   .v <- rationalize(.x)
 686 |   if(all(is.na(.v))) {
 687 |     return(.v[1])
 688 |   }
 689 |   if(length(.v) == 0) {
 690 |     na <- c(NA, .x[1])[1]
 691 |     return(na)
 692 |   }
 693 |   if(ignore_na) {return(c(.v[!is.na(.v)]))}
 694 |   return(.v)
 695 | }
 696 | 
 697 | 
 698 | 
 699 | # simplifying summary functions ---------------------------------------------------
 700 | 
 701 | #' @title Combine aggregate functions and s
 702 | #' @name wrapper - s and summary funs
 703 | #' @aliases sum_
 704 | #' @aliases mean_
 705 | #' @aliases max_
 706 | #' @aliases min_
 707 | #' @aliases sd_
 708 | #' @aliases var_
 709 | #' @aliases first_
 710 | #' @aliases last_
 711 | #'
 712 | #' @description
 713 | #' \code{[summary function_*]} functions are simple wrappers of aggregate function
 714 | #' and the \code{s} function. \code{s} removes all non-values,
 715 | #' i.e. \code{NA,Inf,NaN}  from a vector.
 716 | #' However, if the length is 0 it returns NA. The result is then passed to the
 717 | #' corresponding aggregation function. For example,
 718 | #' \code{min_(x)} is identical to \code{min(s(x))}. Please read \code{vignette("s")} for more information.
 719 | #'
 720 | #' @param .x a single vector
 721 | #' @param ignore_na if false missing values are not omitted.
 722 | #' 
 723 | #' @details 'first_non_na' is a faster version of 'first' since it only search for a non NA value until it finds one.
 724 | #' 'squeeze' on the other hand checks if all elements are equal and then returns only that value.
 725 | #'
 726 | #' @return a single aggregated value
 727 | #' 
 728 | #' @seealso \code{vignette("convert")}, \code{vignette("hablar")}
 729 | #'
 730 | #' @examples
 731 | #' ## sum_ on non-rational numeric vector
 732 | #' vector <- c(7, NaN, -Inf, 4)
 733 | #' sum_(vector)
 734 | #'
 735 | #' ## Min of vector with length 0
 736 | #' vector <- c()
 737 | #' # With a wrapped s
 738 | #' min_(vector)
 739 | #'
 740 | #' ## Max of vector with only NA
 741 | #' # With a wrapped s
 742 | #' max_(vector)
 743 | #'
 744 | #' ## Use of s when NA should not be removed
 745 | #' vector <- c(7, Inf, NA, 4)
 746 | #' # With a wrapped s
 747 | #' sum_(vector, ignore_na = FALSE)
 748 | #'
 749 | #' @rdname aggregators
 750 | #' @export
 751 | 
 752 | max_ <- function(.x, ignore_na = TRUE) {
 753 |   max(s(.x, ignore_na = ignore_na))}
 754 | 
 755 | #' @rdname aggregators
 756 | #' @export
 757 | min_ <- function(.x, ignore_na = TRUE) {
 758 |   min(s(.x, ignore_na = ignore_na))}
 759 | 
 760 | #' @rdname aggregators
 761 | #' @export
 762 | sum_ <- function(.x, ignore_na = TRUE) {
 763 |   sum(s(.x, ignore_na = ignore_na))}
 764 | 
 765 | #' @rdname aggregators
 766 | #' @export
 767 | mean_ <- function(.x, ignore_na = TRUE) {
 768 |   mean(s(.x, ignore_na = ignore_na))}
 769 | 
 770 | #' @rdname aggregators
 771 | #' @export
 772 | median_ <- function(.x, ignore_na = TRUE) { 
 773 |   stats::median(s(.x, ignore_na = ignore_na))}
 774 | 
 775 | #' @rdname aggregators
 776 | #' @export
 777 | sd_ <- function(.x, ignore_na = TRUE) {
 778 |   stats::sd(s(.x, ignore_na = ignore_na))}
 779 | 
 780 | #' @rdname aggregators
 781 | #' @export
 782 | var_ <- function(.x, ignore_na = TRUE) {
 783 |   stats::var(s(.x, ignore_na = ignore_na))}
 784 | 
 785 | #' @rdname aggregators
 786 | #' @export
 787 | first_ <- function(.x, ignore_na = TRUE) {
 788 |   dplyr::first(s(.x, ignore_na = ignore_na))}
 789 | 
 790 | #' @rdname aggregators
 791 | #' @export
 792 | last_ <- function(.x, ignore_na = TRUE) {
 793 |   dplyr::last(s(.x, ignore_na = ignore_na))}
 794 | 
 795 | #' @rdname aggregators
 796 | #' @export
 797 | first_non_na <- function(.x) {
 798 |   .x <- rationalize(.x)
 799 |   .x[base::Position(function(..x)!is.na(..x), .x)]
 800 | }
 801 | 
 802 | #' @rdname aggregators
 803 | #' @export
 804 | squeeze <- function(.x, ignore_na = FALSE) {
 805 |   .uniques <- unique(rationalize(.x))
 806 |   if(ignore_na == FALSE & length(.uniques) > 1) {
 807 |     stop("More than one unique value")
 808 |   }
 809 |   if(ignore_na == FALSE & length(na.omit(.uniques)) == 0) {
 810 |     stop("No non missing values, to ignore missing use 'squeeze_'")
 811 |   }
 812 |   if(ignore_na == TRUE & length(na.omit(.uniques)) > 1) {
 813 |     stop("More than one unique non missing value")
 814 |   }
 815 |   if(length(na.omit(.uniques)) == 0) {
 816 |     return(.uniques[1])
 817 |   }
 818 |   .uniques[!is.na(.uniques)]
 819 | }
 820 | 
 821 | #' @rdname aggregators
 822 | #' @export
 823 | squeeze_ <- function(.x, ignore_na = TRUE) {
 824 |   squeeze(.x, ignore_na = ignore_na)
 825 | }
 826 | 
 827 | 
 828 | # simplifying math functions ---------------------------------------------------
 829 | #' @title Ignore NA in math
 830 | #' @name math ignore NA in math funs
 831 | #' @aliases %minus_%
 832 | #' @aliases %plus_%
 833 | #'
 834 | #' @description
 835 | #' Simplifying math functions are simple wrappers of math function (- +).
 836 | #' If any of the left-hand side or right-hand side is NA, Inf or NaN it 
 837 | #' returns any rational value, if there is any. 
 838 | #' 
 839 | #' However, if the both values are irrational it returns NA. 
 840 | #' The result is then passed to the
 841 | #' corresponding math function.
 842 | #'
 843 | #' @param .x numeric or integer element
 844 | #' @param .y numeric or integer element
 845 | #'
 846 | #' @return a single value
 847 | #' 
 848 | #' @seealso \code{vignette("s")}, \code{vignette("hablar")}
 849 | #'
 850 | #' @examples
 851 | #' \dontrun{# The simplest case
 852 | #' 3 %minus_% 2
 853 | #' 
 854 | #' # But with NA it returns 3 as if the NA were zero
 855 | #' 3 %minus_% NA
 856 | #' 
 857 | #' # It doesnt matter if the irrational number is on left- or right-hand.
 858 | #' NA %plus_% 5
 859 | #' }
 860 | #'
 861 | #' @rdname math
 862 | #' @export
 863 | `%minus_%` <- function(.x, .y) {
 864 |   if(!all(c(class(.x), class(.y)) %in% c("integer",
 865 |                                          "numeric"))){
 866 |     stop("Input must be of type 'numeric' or 'integer'")
 867 |   }
 868 |   if(length(.x) != length(.y) & (length(.x) != 1 & length(.y) != 1)) {
 869 |     stop("LHS need to have the same length as RHS or length 1")
 870 |   }
 871 |   
 872 |   ifelse(is.na(.x), 0, .x) - ifelse(is.na(.y), 0, .y)
 873 | }
 874 | 
 875 | #' @rdname math
 876 | #' @export
 877 | `%plus_%` <- function(.x, .y) {
 878 |   if(!all(c(class(.x), class(.y)) %in% c("integer",
 879 |                                          "numeric"))){
 880 |     stop("Input must be of type 'numeric' or 'integer'")
 881 |   }
 882 |   if(length(.x) != length(.y) & (length(.x) != 1 & length(.y) != 1)) {
 883 |     stop("LHS need to have the same length as RHS or length 1")
 884 |   }
 885 |   
 886 |   ifelse(is.na(.x), 0, .x) + ifelse(is.na(.y), 0, .y)
 887 | }
 888 | 
 889 | 
 890 | 
 891 | # Count unique elements --------------------------------------------------------
 892 | #' @title n_unique
 893 | #' @name n_unique count unique elements
 894 | #' @aliases n_unique
 895 | #' @aliases n_unique_
 896 | #'
 897 | #' @description
 898 | #' Simple wrapper for length(unique(.x)). If you use n_unique_(.x) then NA
 899 | #' is ignored when counting.
 900 | #' 
 901 | #' @usage n_unique(.x, ignore_na = FALSE)
 902 | #' 
 903 | #' n_unique_(.x, ignore_na = TRUE)
 904 | #'
 905 | #' @param .x a vector
 906 | #' @param ignore_na a logical indicating whether missing values should be removed
 907 | #'
 908 | #' @return a single numeric vector of the same length as the data frame it
 909 | #' is applied to.
 910 | #' 
 911 | #' @seealso \code{vignette("s")}, \code{vignette("hablar")}
 912 | #'
 913 | #' @examples
 914 | #' 
 915 | #' # Simple
 916 | #' n_unique(c(1, 2, 2, 3))
 917 | #' 
 918 | #' # Same result as above eventhough vector includes NA
 919 | #' n_unique_(c(1, 2, 2, 3, NA))
 920 | #'
 921 | #' @rdname n_unique
 922 | #' @export
 923 | n_unique <- function(.x, ignore_na = FALSE) {
 924 |   if(ignore_na) {
 925 |     length(na.omit(unique(.x)))
 926 |   } else {
 927 |     length(unique(.x))
 928 |   }
 929 | }
 930 | 
 931 | #' @rdname n_unique
 932 | #' @export
 933 | n_unique_ <- function(.x, ignore_na = TRUE) {
 934 |   if(ignore_na) {
 935 |     length(na.omit(unique(.x)))
 936 |   } else {
 937 |     length(unique(.x))
 938 |   }
 939 | }
 940 | 
 941 | 
 942 | 
 943 | # if_else_ ---------------------------------------------------------------------
 944 | #' @title if_this_else_that_
 945 | #' @name if_else_
 946 | #'
 947 | #' @description
 948 | #' A vectorised if or else function. It checks that the true or false (or the optional missing)
 949 | #' arguments have the same type. However it accepts a generic NA. Built upon
 950 | #' dplyr's [if_else()] function. The only difference is that the user do not have to specify
 951 | #' the type of NA. if_else_ is faster than base [ifelse()] and a tad slower than 
 952 | #' dplyr's [if_else()]. Attributes are taken from either true or false because one
 953 | #' generic NA.
 954 | #' 
 955 | #' @usage if_else_(condition, true, false, missing = NULL)
 956 | 
 957 | #' @param condition logical vector
 958 | #' @param true value to replace if condition is true. Must be same length as condition or 1.
 959 | #' @param false value to replace if condition is false. Must be same length as condition or 1.
 960 | #' @param missing optional. a replacement if condition returns NA. Must be same length as condition or 1.
 961 | #'
 962 | #' @return a vector
 963 | #' 
 964 | #' @details If the returning vector have attributes (e.g. for factors) it returns the attributes
 965 | #' for the first non-generic NA in the order true, false and then missing.
 966 | #' 
 967 | #' @seealso \code{vignette("s")}, \code{vignette("hablar")}
 968 | #'
 969 | #' @examples
 970 | #' 
 971 | #' v <- c(TRUE, FALSE, TRUE, FALSE)
 972 | #' if_else_(v, "true", "false")
 973 | #' 
 974 | #' v <- c(TRUE, FALSE, NA, FALSE)
 975 | #' if_else_(v, 1, NA, 999)
 976 | #' 
 977 | #' @rdname if_else_
 978 | #' @export
 979 | if_else_ <- function(condition, true, false, missing = NULL){
 980 |   if(!check_single_generic_na(true)){
 981 |     templ <- true[1]
 982 |   } else if(!check_single_generic_na(false)){
 983 |     templ <- false[1]
 984 |   } else if(!is.null(missing)){
 985 |     if(!check_single_generic_na(missing)){
 986 |       templ <- missing[1]
 987 |     }
 988 |   } else{
 989 |     templ <- as.logical(T)
 990 |   }
 991 |   
 992 |   if(check_single_generic_na(true)){
 993 |     templ[2] <- NA
 994 |     true <- templ[2]
 995 |   }
 996 |   if(check_single_generic_na(false)){
 997 |     templ[2] <- NA
 998 |     false <- templ[2]
 999 |   }
1000 |   if(!is.null(missing)){
1001 |     if(check_single_generic_na(missing)){
1002 |       templ[2] <- NA
1003 |       missing <- templ[2]
1004 |     }
1005 |   }
1006 |   
1007 |   if (is.factor(templ)) {
1008 |     if (is.null(missing)) {
1009 |       if (length(unique(list(levels(true),
1010 |                              levels(false)))) != 1) {
1011 |         warning("Factor levels differs")
1012 |       }
1013 |       } else {
1014 |         if (length(unique(list(
1015 |           levels(true),
1016 |           levels(false),
1017 |           levels(missing)
1018 |         ))) != 1) {
1019 |           warning("Factor levels differs")
1020 |         }
1021 |       }
1022 |   }
1023 |   dplyr::if_else(condition, true, false, missing)
1024 | }
1025 | 
1026 | 
1027 | # replacement and specials ------------------------------------------------------------------------
1028 | #' @title replacemnt and specials
1029 | #' @name replacers
1030 | #' @aliases if_na
1031 | #' @aliases if_inf
1032 | #' @aliases if_nan
1033 | #' @aliases if_zero
1034 | #' @aliases na_if
1035 | #' @aliases inf_if
1036 | #' @aliases nan_if
1037 | #' @aliases zero_if
1038 | #'
1039 | #' @description
1040 | #' If-this-type-then replace with x. And the other way around; replace with x
1041 | #' if this.
1042 | #' 
1043 | #' @usage if_na(.x, replacement, missing = NULL)
1044 | #' @usage if_nan(.x, replacement, missing = NULL)
1045 | #' @usage if_inf(.x, replacement, missing = NULL)
1046 | #' @usage if_zero(.x, replacement, missing = NULL)
1047 | #' @usage na_if(.x, condition, replace_na = FALSE)
1048 | #' @usage nan_if(.x, condition, replace_na = FALSE)
1049 | #' @usage inf_if(.x, condition, replace_na = FALSE)
1050 | #' @usage zero_if(.x, condition, replace_na = FALSE)
1051 | #'
1052 | #' @param .x a vector
1053 | #' @param condition a predicament
1054 | #' @param missing a value that replace missing values in condition.
1055 | #' @param replacement a replacement if condition is TRUE
1056 | #' @param replace_na if TRUE, missing values in condition will be replaced as well
1057 | #'
1058 | #' @return a vector
1059 | #' 
1060 | #' @seealso \code{vignette("s")}, \code{vignette("hablar")}
1061 | #'
1062 | #' @examples
1063 | #' 
1064 | #' v <- c(1, NA, 2)
1065 | #' if_na(v, 100)
1066 | #' 
1067 | #' v <- c(999, NA, 2)
1068 | #' zero_if(v, v == 999)
1069 | #' 
1070 | #' @rdname replacers
1071 | #' @export
1072 | if_na <- function(.x, replacement, missing = NULL){
1073 |   if_else_(is.na(.x), replacement, .x, missing)
1074 | }
1075 | 
1076 | #' @rdname replacers
1077 | #' @export
1078 | if_not_na <- function(.x, replacement, missing = NULL){
1079 |   if_else_(!is.na(.x), replacement, .x, missing)
1080 | }
1081 | 
1082 | #' @rdname replacers
1083 | #' @export
1084 | if_inf <- function(.x, replacement, missing = NULL){
1085 |   if_else_(is.infinite(.x), replacement, .x, missing)
1086 | }
1087 | 
1088 | #' @rdname replacers
1089 | #' @export
1090 | if_nan <- function(.x, replacement, missing = NULL){
1091 |   if_else_(is.nan(.x), replacement, .x, missing)
1092 | }
1093 | 
1094 | #' @rdname replacers
1095 | #' @export
1096 | if_zero <- function(.x, replacement, missing = NULL){
1097 |   if_else_(.x == 0, replacement, .x, missing)
1098 | }
1099 | 
1100 | #' @rdname replacers
1101 | #' @export
1102 | na_if <- function(.x, condition, replace_na = FALSE){
1103 |   if(replace_na){
1104 |     missing <- NA
1105 |   } else {
1106 |     missing <- NA
1107 |   }
1108 |   if_else_(condition, NA, .x, missing)
1109 | }
1110 | 
1111 | #' @rdname replacers
1112 | #' @export
1113 | inf_if <- function(.x, condition, replace_na = FALSE){
1114 |   if(!class(.x) %in% c("numeric")){
1115 |     stop("inf_if only works on numeric vector")
1116 |   }
1117 |   if(replace_na){
1118 |     missing <- Inf
1119 |   } else {
1120 |     missing <- NA
1121 |   }
1122 |   if_else_(condition, Inf, .x)
1123 | }
1124 | 
1125 | #' @rdname replacers
1126 | #' @export
1127 | nan_if <- function(.x, condition, replace_na = FALSE){
1128 |   if(!class(.x) %in% c("numeric")){
1129 |     stop("nan_if only works on numeric vector")
1130 |   }
1131 |   if(replace_na){
1132 |     missing <- NaN
1133 |   } else {
1134 |     missing <- NA
1135 |   }
1136 |   if_else_(condition, NaN, .x)
1137 | }
1138 | 
1139 | #' @rdname replacers
1140 | #' @export
1141 | zero_if <- function(.x, condition, replace_na = FALSE){
1142 |   if(!class(.x) %in% c("integer", "numeric")){
1143 |     stop("zero_if only works on numeric and integer vector")
1144 |   }
1145 |   if(replace_na){
1146 |     if(is.integer(.x)){
1147 |       missing <- 0L
1148 |     } else {
1149 |       missing <- 0
1150 |     }
1151 |   } else {
1152 |     missing <- NA
1153 |   }
1154 |   if_else_(condition, 0, .x, missing)
1155 | }
1156 | 
1157 | # create_dummy -----------------------------------------------------------------
1158 | #' @title Create a simple dummy
1159 | #' @name create_dummy
1160 | #' @aliases dummy
1161 | #' @aliases dummy_
1162 | #'
1163 | #' @description
1164 | #' Creates a vector of the integers 1 and 0. If condition is true it returns 1. If false 0.
1165 | #' If condition returns NA it returns NA, if not explicitly not stated than NA
1166 | #' should be replaced.
1167 | #' 
1168 | #' @usage dummy(condition, missing = NA)
1169 | #' dummy_(condition, missing = 0L)
1170 | #'
1171 | #' @param condition a predicament
1172 | #' @param missing a replacement if condition is NA
1173 | #'
1174 | #' @return a vector of the integers 1, 0 and NA (if not dummy_ is used).
1175 | #' 
1176 | #' @seealso \code{vignette("hablar")}
1177 | #'
1178 | #' @examples
1179 | #' v <- c(10, 5, 3, NA, 9)
1180 | #' dummy(v > 5)
1181 | #' dummy_(v > 5)
1182 | #' 
1183 | #' @rdname create_dummy
1184 | #' @export
1185 | dummy <- function(condition, missing = NA){
1186 |   if_else_(condition, 1L, 0L, as.integer(missing))
1187 | }
1188 | 
1189 | #' @rdname create_dummy
1190 | #' @export
1191 | dummy_ <- function(condition, missing = 0L){
1192 |   if_else_(condition, 1L, 0L, as.integer(missing))
1193 | }
1194 | 
1195 | 
1196 | # repeat_df -----------------------------------------------------------------
1197 | #' @title repeat_df
1198 | #' @name repeat_df
1199 | #'
1200 | #' @description
1201 | #' Repeats a data frame n times. Useful for testing on large data frames.
1202 | #'
1203 | #' @param .df a data frame
1204 | #' @param n times the data frame should be repeated
1205 | #' @param id a character element that creates a column with a number for each repetition
1206 | #'
1207 | #' @return a vector of the integers 1, 0 and NA (if not dummy_ is used).
1208 | #' 
1209 | #' @seealso \code{vignette("hablar")}
1210 | #'
1211 | #' @examples
1212 | #' repeat_df(mtcars, 2)
1213 | #' 
1214 | #' @rdname repeat_df
1215 | #' @export
1216 | repeat_df <- function(.df, n, id = NULL) {
1217 |   purrr::map_dfr(1:n, ~.df, .id = id)
1218 | }
1219 | 
1220 | 
1221 | 
1222 | # find in df ------------------------------------------------------------
1223 | #' @title Special filters
1224 | #' @name find_in_df
1225 | #'
1226 | #' @description
1227 | #' Filters a data frame for special cases. For example, find_duplicates() returns
1228 | #' all rows that are duplicates. If variables are passed to the function
1229 | #' then duplicates for those variables are returned.
1230 | #'
1231 | #' @param .data a data frame
1232 | #' @param ... variables that should be considered. If empty, all variables are used.
1233 | #'
1234 | #' @return a filtered data frame
1235 | #' 
1236 | #' @details irrational values are Inf and NaN
1237 | #' 
1238 | #' @seealso \code{vignette("s")}, \code{vignette("hablar")}
1239 | #' 
1240 | #' @seealso \code{\link{check_df}} to return TRUE or FALSE instead of rows.
1241 | #'
1242 | #' @examples
1243 | #' \dontrun{
1244 | #' df <- data.frame(a = c("A", NA, "B", "C", "C"),
1245 | #'                  b = c(NA, 1, 1, 3, 3), 
1246 | #'                  c = c(7, 8, 2, 3, 3), 
1247 | #'                  stringsAsFactors = FALSE)
1248 | #' 
1249 | #' # Returns duplicated rows
1250 | #' df %>% find_duplicates()
1251 | #' 
1252 | #' # Returns duplicates in specific variables
1253 | #' df %>% find_duplicates(b:c)
1254 | #' 
1255 | #' # Returns rows where NA in variable b
1256 | #' df %>% find_na(b)
1257 | #' }
1258 | #' 
1259 | #' @rdname find_in_df
1260 | #' @export
1261 | find_duplicates <- function(.data, ...){
1262 |   if(!is.data.frame(.data)) {
1263 |     stop("find_duplicates() only works with data frames.")
1264 |   }
1265 |   vars <- apply_columns_quosure(...)
1266 |   
1267 |   .data %>%
1268 |     dplyr::group_by_at(dplyr::vars(!!!vars)) %>%
1269 |     dplyr::filter(dplyr::n() > 1) %>%
1270 |     dplyr::ungroup()
1271 | }
1272 | 
1273 | #' @rdname find_in_df
1274 | #' @export
1275 | find_na <- function(.data, ...){
1276 |   if(!is.data.frame(.data)) {
1277 |     stop("find_na() only works with data frames.")
1278 |   }
1279 |   vars <- apply_columns_quosure(...)
1280 |   
1281 |   .data %>%
1282 |     dplyr::filter_at(dplyr::vars(!!!vars), dplyr::any_vars(is.na(.))) 
1283 | }
1284 | 
1285 | #' @rdname find_in_df
1286 | #' @export
1287 | find_irrational <- function(.data, ...){
1288 |   if(!is.data.frame(.data)) {
1289 |     stop("find_irrational() only works with data frames.")
1290 |   }
1291 |   vars <- apply_columns_quosure(...)
1292 |   
1293 |   .data %>%
1294 |     dplyr::filter_at(dplyr::vars(!!!vars), dplyr::any_vars(is.nan(.) | is.infinite(.))) 
1295 | }
1296 | 
1297 | #' @rdname find_in_df
1298 | #' @export
1299 | find_nan <- function(.data, ...){
1300 |   if(!is.data.frame(.data)) {
1301 |     stop("find_nan() only works with data frames.")
1302 |   }
1303 |   vars <- apply_columns_quosure(...)
1304 |   
1305 |   .data %>%
1306 |     dplyr::filter_at(dplyr::vars(!!!vars), dplyr::any_vars(is.nan(.)))
1307 | }
1308 | 
1309 | #' @rdname find_in_df
1310 | #' @export
1311 | find_inf <- function(.data, ...){
1312 |   if(!is.data.frame(.data)) {
1313 |     stop("find_inf() only works with data frames.")
1314 |   }
1315 |   vars <- apply_columns_quosure(...)
1316 |   
1317 |   .data %>%
1318 |     dplyr::filter_at(dplyr::vars(!!!vars), dplyr::any_vars(is.infinite(.))) }
1319 | 
1320 | 
1321 | # check df ------------------------------------------------------------
1322 | #' @title Special checks
1323 | #' @name check_df
1324 | #'
1325 | #' @description
1326 | #' Returns TRUE if data frame have the specified special cases. For example, find_duplicates() returns
1327 | #' TRUE if any rows are duplicates. If variables are passed to the function
1328 | #' then TRUE or FALSE is returned for those variables.
1329 | #'
1330 | #' @param .data a data frame
1331 | #' @param ... variables that should be considered. If empty, all variables are used.
1332 | #'
1333 | #' @return TRUE or FALSE
1334 | #' 
1335 | #' @details irrational values are Inf and NaN. 'check_complete_set' tests 
1336 | #' if all combinations of elements exists in the data frame.
1337 | #' 
1338 | #' @seealso \code{\link{find_in_df}} to return rows instead of TRUE or FALSE. 
1339 | #' \code{vignette("s")}, \code{vignette("hablar")}
1340 | #'
1341 | #' @examples
1342 | #' \dontrun{
1343 | #' df <- data.frame(a = c("A", NA, "B", "C", "C"),
1344 | #'                  b = c(7, 8, 2, 3, 3),
1345 | #'                  c = c(NA, 1, NaN, 3, 2),
1346 | #'                  stringsAsFactors = FALSE)
1347 | #' 
1348 | #' # Returns FALSE because there is no duplicates
1349 | #' df %>% check_duplicates()
1350 | #' 
1351 | #' # Returns TRUE because there is duplicates in column a through b
1352 | #' df %>% check_duplicates(a:b)
1353 | #' 
1354 | #' # Returns FALSE because there is no NA column b
1355 | #' df %>% check_na(b)
1356 | #' 
1357 | #' # Returns TRUE because there is no NaN column c
1358 | #' df %>% check_nan(c)
1359 | #' }
1360 | #' 
1361 | #' @rdname check_df
1362 | #' @export
1363 | check_duplicates <- function(.data, ...){
1364 |   if(!is.data.frame(.data)) {
1365 |     stop("check_duplicates() only works with data frames.")
1366 |   }
1367 |   vars <- apply_columns_quosure(...)
1368 |   
1369 |   .data %>%
1370 |     dplyr::group_by_at(dplyr::vars(!!!vars)) %>%
1371 |     dplyr::filter(dplyr::n() > 1) %>%
1372 |     dplyr::ungroup() %>%
1373 |     has_rows()
1374 | }
1375 | 
1376 | #' @rdname check_df
1377 | #' @export
1378 | check_na <- function(.data, ...){
1379 |   if(!is.data.frame(.data)) {
1380 |     stop("check_na() only works with data frames.")
1381 |   }
1382 |   vars <- apply_columns_quosure(...)
1383 |   
1384 |   .data %>%
1385 |     dplyr::filter_at(dplyr::vars(!!!vars), dplyr::any_vars(is.na(.))) %>%
1386 |     has_rows()
1387 | }
1388 | 
1389 | #' @rdname check_df
1390 | #' @export
1391 | check_irrational <- function(.data, ...){
1392 |   if(!is.data.frame(.data)) {
1393 |     stop("check_irrational() only works with data frames.")
1394 |   }
1395 |   vars <- apply_columns_quosure(...)
1396 |   
1397 |   .data %>%
1398 |     dplyr::filter_at(dplyr::vars(!!!vars), dplyr::any_vars(is.nan(.) | is.infinite(.))) %>%
1399 |     has_rows()
1400 | }
1401 | 
1402 | #' @rdname check_df
1403 | #' @export
1404 | check_nan <- function(.data, ...){
1405 |   if(!is.data.frame(.data)) {
1406 |     stop("check_nan() only works with data frames.")
1407 |   }
1408 |   vars <- apply_columns_quosure(...)
1409 |   
1410 |   .data %>%
1411 |     dplyr::filter_at(dplyr::vars(!!!vars), dplyr::any_vars(is.nan(.))) %>%
1412 |     has_rows()
1413 | }
1414 | 
1415 | #' @rdname check_df
1416 | #' @export
1417 | check_inf <- function(.data, ...){
1418 |   if(!is.data.frame(.data)) {
1419 |     stop("check_inf() only works with data frames.")
1420 |   }
1421 |   vars <- apply_columns_quosure(...)
1422 |   
1423 |   .data %>%
1424 |     dplyr::filter_at(dplyr::vars(!!!vars), dplyr::any_vars(is.infinite(.))) %>%
1425 |     has_rows()
1426 | }
1427 | 
1428 | #' @rdname check_df
1429 | #' @export
1430 | check_complete_set <- function(.data, ...) {
1431 |   if(!is.data.frame(.data)) {
1432 |     stop("check_duplicates() only works with data frames.")
1433 |   }
1434 |   vars <- apply_columns_quosure(...)
1435 |   if(length(.data %>% dplyr::slice(1) %>% dplyr::select(!!!vars) %>% names()) < 2) {
1436 |     stop("You need to provide at least two columns check for complete set")
1437 |     }
1438 |   data_distinct_nrow <- .data %>%
1439 |     dplyr::select(!!!vars) %>% 
1440 |     dplyr::distinct() %>% 
1441 |     nrow()
1442 |   complete_set_nrow <- purrr::cross_df(purrr::map(.data, ~unique(.))) %>% nrow()
1443 |   if(complete_set_nrow == data_distinct_nrow) {
1444 |     return(TRUE)
1445 |   } else {
1446 |     return(FALSE)
1447 |   }
1448 | }
1449 | 
1450 | # this_date -----------------------------------------------------------------
1451 | #' @title this_date
1452 | #' @name this_date
1453 | #'
1454 | #' @description
1455 | #' Returns the current day, month or year. Day and month returns dates and year a 4 digit number.
1456 | #' 
1457 | #' @examples 
1458 | #' this_day() 
1459 | #' this_month() 
1460 | #' this_year() 
1461 | #' 
1462 | #' @return a date or number
1463 | #' 
1464 | #' @rdname this_date
1465 | #' @export
1466 | this_day <- function() {
1467 |   lubridate::floor_date(Sys.Date(), "day")
1468 | }
1469 | #' @rdname this_date
1470 | #' @export
1471 | this_month <- function() {
1472 |   lubridate::floor_date(Sys.Date(), "month")
1473 | }
1474 | #' @rdname this_date
1475 | #' @export
1476 | this_year <- function() {
1477 |   lubridate::year(lubridate::floor_date(Sys.Date(), "year"))
1478 | }
1479 | 
1480 | 
1481 | # cumulative funs --------------------------------------------------------------
1482 | #' @title cumulative_
1483 | #' @name cumulative_
1484 | #'
1485 | #' @description
1486 | #' cumulative functions. 'cumsum_' is the cumulative sum ignoring missing values.
1487 | #' 'cum_unique' counts the cumulative unique value including NA as ONE value. 
1488 | #' 'cum_unique_' ignores missing values
1489 | #' 
1490 | #' @param .v a vector
1491 | #' @param ignore_na should missing values be ignores?
1492 | #' 
1493 | #' @return a vector
1494 | #' 
1495 | #' @rdname cumulative_
1496 | #' @export
1497 | cumsum_ <- function(.v, ignore_na = TRUE) {
1498 |   purrr::map_dbl(1:length(.v), ~sum_(.v[1:.x], ignore_na = ignore_na))
1499 | }
1500 | 
1501 | #' @rdname cumulative_
1502 | #' @export
1503 | cummean_ <- function(.v, ignore_na = TRUE) {
1504 |   purrr::map_dbl(1:length(.v), ~mean_(.v[1:.x], ignore_na = ignore_na))
1505 | }
1506 | 
1507 | #' @rdname cumulative_
1508 | #' @export
1509 | cum_unique <- function(.v, ignore_na = FALSE) {
1510 |   purrr::map_dbl(1:length(.v), ~n_unique(.v[1:.x], ignore_na = ignore_na))
1511 | }
1512 | 
1513 | #' @rdname cumulative_
1514 | #' @export
1515 | cum_unique_ <- function(.v, ignore_na = TRUE) {
1516 |   cum_unique(.v, ignore_na = ignore_na)
1517 | }
1518 | 
1519 | 
1520 | # given ------------------------------------------------------------------------
1521 | #' @title given
1522 | #' @name given
1523 | #'
1524 | #' @description
1525 | #' Simple function that filters a vector while helping with missing values.
1526 | #' Replacing expression like 'x[x > 3 & !is.null(x)]'
1527 | #' 
1528 | #' @param .x the vector to filter
1529 | #' @param .y a logical vector to filter with
1530 | #' @param ignore_na should NA be removed?
1531 | #' 
1532 | #' @return a vector
1533 | #' 
1534 | #' @examples 
1535 | #' \dontrun{
1536 | #' x <- c(1, 2, NA, 4)
1537 | #' x %>% given_(x >= 2)
1538 | #' }
1539 | #' 
1540 | #' @rdname given
1541 | #' @export
1542 | given <- function(.x, .y, ignore_na = FALSE) {
1543 |   if(ignore_na) {.y <- if_na(.y, FALSE)}
1544 |   if(all(.y == FALSE)) return(NA)
1545 |   .x[.y]
1546 | }
1547 | 
1548 | #' @rdname given
1549 | #' @export
1550 | given_ <- function(.x, .y, ignore_na = TRUE) {
1551 |   given(.x, .y, ignore_na = ignore_na)
1552 | }
1553 | 
1554 | 
1555 | # Set wd to script path --------------------------------------------------------
1556 | #' @title Set wd to script path
1557 | #' @name set_wd_to_script_path
1558 | #'
1559 | #' @description
1560 | #' Sets working directory to the path where the R-script is located. Only works
1561 | #' inside [Rstudio] and in a script (i.e. not in the console). Additionally, the R-script needs to
1562 | #' be saved in a path to work.
1563 | #' 
1564 | #' @usage set_wd_to_script_path()
1565 | #'
1566 | #' @return NULL. In the background the working directory has changed if not any errors occurred.
1567 | #' 
1568 | #' @rdname set_wd_to_script_path
1569 | #' @export
1570 | set_wd_to_script_path <- function(){
1571 |   if(Sys.getenv("RSTUDIO") == "1") {
1572 |     path <- dirname(rstudioapi::getActiveDocumentContext()$path)
1573 |     if(path != "") {
1574 |       setwd(path)
1575 |     } else {
1576 |       stop("You have to save the script before you can set wd to script path.")
1577 |     }} else {
1578 |       stop("set_wd_to_script_path() only works in a script executed in RStudio, i.e. does not work in console.")
1579 |     }
1580 | }
1581 | 
1582 | 
1583 | 
1584 | # Unexported functions / Support Functions -------------------------------------
1585 | 
1586 | # Gets the .Data from mutate call if called from inside a function in a mutate call.
1587 | # That is df %>% mutate(a = f()) and the function f() contains get_caller_df()
1588 |   # get_caller_df <- function (.vars) {
1589 |   #   frames <- sys.frames()
1590 |   #   frames[[length(frames) - 4]]$.data
1591 |   # }
1592 | 
1593 | # A function that sets a class on a generic NA. If not a generic NA, the function returns input.
1594 | check_single_generic_na <- function(.x){
1595 |   if(length(.x) != 1){
1596 |     return(FALSE)
1597 |   }
1598 |   if(!is.na(.x)){
1599 |     return(FALSE)
1600 |   }
1601 |   if(!is.logical(.x)){
1602 |     return(FALSE)
1603 |   }
1604 |   return(TRUE)
1605 | }
1606 | 
1607 | # If not variables are choosed all should be chosen
1608 | apply_columns_quosure <- function(...){
1609 |   .vars <- dplyr::quos(...)
1610 |   if(length(.vars) == 0) .vars <- dplyr::quos(dplyr::everything())
1611 |   return(.vars)
1612 | }
1613 | 
1614 | # Checks if df has any rows
1615 | has_rows <- function(.data, ...) {
1616 |   nrow(.data) > 0
1617 | }
1618 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | <!-- badges: start -->
  8 | [![CRAN status](https://www.r-pkg.org/badges/version/hablar)](https://CRAN.R-project.org/package=hablar)
  9 | [![CRAN Downloads](https://cranlogs.r-pkg.org/badges/hablar)
 10 | [![R-CMD-check](https://github.com/davidsjoberg/hablar/workflows/R-CMD-check/badge.svg)](https://github.com/davidsjoberg/hablar/actions)
 11 | [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable)
 12 | [![R-CMD-check](https://github.com/davidsjoberg/hablar/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/davidsjoberg/hablar/actions/workflows/R-CMD-check.yaml)
 13 | <!-- badges: end -->
 14 | 
 15 | ```{r, echo = FALSE, include = FALSE}
 16 | knitr::opts_chunk$set(
 17 |   collapse = FALSE,
 18 |   comment = "#>",
 19 |   fig.path = "README-"
 20 | )
 21 | options(tibble.print_min = 4L, tibble.print_max = 4L)
 22 | library(dplyr)
 23 | library(hablar)
 24 | mtcars <- as_tibble(mtcars)
 25 | ```
 26 | 
 27 | # hablar
 28 | 
 29 | The mission of `hablar` is for you to get non-astonishing results! That means that functions return what you expected. R has some intuitive quirks that beginners and experienced programmers fail to identify. Some of the first weird features of R that `hablar` solves:
 30 | 
 31 | * Missing values `NA` and irrational values `Inf`, `NaN` is dominant. For example, in R `sum(c(1, 2, NA))` is `NA` and not 3. In `hablar` the addition of an underscore `sum_(c(1, 2, NA))` returns 3, as is often expected.
 32 | 
 33 | * Factors (categorical variables) that are converted to numeric returns the number of the category rather than the value. In `hablar` the `convert()` function always changes the type of the values.
 34 | 
 35 | * Finding duplicates, and rows with `NA` can be cumbersome. The functions `find_duplicates()` and `find_na()` make it easy to find where the data frame needs to be fixed. When the issues are found the utility replacement functions, e.g. `if_else_()`, `if_na()`, `zero_if()` easily fixes many of the most common problems you face.
 36 | 
 37 | `hablar` follows the syntax API of `tidyverse` and works seamlessly with `dplyr` and `tidyselect`. 
 38 | 
 39 | ## Installation
 40 | 
 41 | You can install `hablar` from CRAN:
 42 | 
 43 | ```{r cran_installation, eval = FALSE}
 44 | install.packages("hablar")
 45 | ```
 46 | 
 47 | Or preferably:
 48 | 
 49 | ```{r gh_installation, eval = FALSE}
 50 | if (!require("pacman")) install.packages("pacman")
 51 | pacman::p_load(tidyverse, hablar)
 52 | ```
 53 | 
 54 | 
 55 | ## convert
 56 | 
 57 | The most useful function of `hablar` is maybe convert. convert helps the user to quickly and dynamically change data type of columns in a data frame. convert always converts factors to character before further conversion. Works with `tidyselect`.
 58 | 
 59 | 
 60 | ```{r example1}
 61 | mtcars %>% 
 62 |   convert(int(cyl, am),
 63 |           fct(disp:drat),
 64 |           chr(contains("w")))
 65 | ```
 66 | 
 67 | For more information type `vignette("convert")` in the console.
 68 | 
 69 | ## Non-Astonishing summary functions
 70 | 
 71 | Often summary function like min, max and mean return surprising results. Combining `_` with your summary function ensures you that you will get a result, if there is one in your data. It ignores irrational numbers like `Inf` and `NaN` as well as `NA`. If all elements are `NA, Inf, NaN` it returns NA.
 72 | 
 73 | ```{r example2}
 74 | starwars %>% 
 75 |   summarise(min_height_baseR = min(height),
 76 |             min_height_hablar = min_(height))
 77 | ```
 78 | 
 79 | The function `min_` omitted that the variable `height` contained `NA`. For more information type `vignette("s")` in the console.
 80 | 
 81 | ## Find the problem
 82 | 
 83 | When cleaning data you spend a lot of time understanding your data. Sometimes you get more row than you expected when doing a `left_join()`. Or you did not know that certain column contained missing values `NA` or irrational values like `Inf` or `NaN`.
 84 | 
 85 | In `hablar` the `find_*` functions speeds up your search for the problem. To find duplicated rows you simply `df %>% find_duplicates()`. You can also find duplicates in in specific columns, which can be useful before joins.
 86 | 
 87 | ```{r}
 88 | # Create df with duplicates
 89 | df <- mtcars %>% 
 90 |   bind_rows(mtcars %>% slice(1, 5, 9))
 91 | 
 92 | # Return rows with duplicates in cyl and am
 93 | df %>% 
 94 |   find_duplicates(cyl, am)
 95 | ```
 96 | 
 97 | There are also find functions for other cases. For example `find_na()` returns rows with missing values.
 98 | 
 99 | ```{r}
100 | starwars %>% 
101 |   find_na(height)
102 | ```
103 | 
104 | If you rather want a Boolean value instead then e.g. `check_duplicates()` returns `TRUE` if the data frame contains duplicates, otherwise it returns `FALSE`.
105 | 
106 | ##### **...apply the solution**
107 | 
108 | Let's say that we have found a problem is caused by missing values in the column `height` and you want to replace all missing values with the integer 100. `hablar` comes with an additional ways of doing if-or-else. 
109 | 
110 | ```{r}
111 | starwars %>% 
112 |   find_na(height) %>% 
113 |   mutate(height = if_na(height, 100L))
114 | ```
115 | 
116 | In the chunk above we successfully replaced all missing heights with the integer 100. `hablar` also contain the self explained:
117 | 
118 | * `if_zero()` and `zero_if()`
119 | * `if_inf()` and `inf_if()`
120 | * `if_nan()` and `nan_if()`
121 | 
122 | which works in the same way as the examples above.
123 | 
124 | ## retype
125 | 
126 | A function for quick and dirty data type conversion. All columns are evaluated and converted to the simplest possible without loosing any information.
127 | 
128 | ```{r example3}
129 | mtcars %>% retype()
130 | ```
131 | 
132 | All variables with only integer were converted to type integer. For more information type `vignette("retype")` in the console.
133 | 
134 | 
135 | ### Note
136 | Hablar means 'speak R' in Spanish. 
137 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | <!-- badges: start -->
  4 | 
  5 | [![CRAN
  6 | status](https://www.r-pkg.org/badges/version/hablar)](https://CRAN.R-project.org/package=hablar)
  7 | \[![CRAN Downloads](https://cranlogs.r-pkg.org/badges/hablar)
  8 | [![R-CMD-check](https://github.com/davidsjoberg/hablar/workflows/R-CMD-check/badge.svg)](https://github.com/davidsjoberg/hablar/actions)
  9 | [![Lifecycle:
 10 | stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable)
 11 | <!-- badges: end -->
 12 | 
 13 | # hablar
 14 | 
 15 | The mission of `hablar` is for you to get non-astonishing results! That
 16 | means that functions return what you expected. R has some intuitive
 17 | quirks that beginners and experienced programmers fail to identify. Some
 18 | of the first weird features of R that `hablar` solves:
 19 | 
 20 | -   Missing values `NA` and irrational values `Inf`, `NaN` is dominant.
 21 |     For example, in R `sum(c(1, 2, NA))` is `NA` and not 3. In `hablar`
 22 |     the addition of an underscore `sum_(c(1, 2, NA))` returns 3, as is
 23 |     often expected.
 24 | 
 25 | -   Factors (categorical variables) that are converted to numeric
 26 |     returns the number of the category rather than the value. In
 27 |     `hablar` the `convert()` function always changes the type of the
 28 |     values.
 29 | 
 30 | -   Finding duplicates, and rows with `NA` can be cumbersome. The
 31 |     functions `find_duplicates()` and `find_na()` make it easy to find
 32 |     where the data frame needs to be fixed. When the issues are found
 33 |     the utility replacement functions, e.g. `if_else_()`, `if_na()`,
 34 |     `zero_if()` easily fixes many of the most common problems you face.
 35 | 
 36 | `hablar` follows the syntax API of `tidyverse` and works seamlessly with
 37 | `dplyr` and `tidyselect`.
 38 | 
 39 | ## Installation
 40 | 
 41 | You can install `hablar` from CRAN:
 42 | 
 43 | ``` r
 44 | install.packages("hablar")
 45 | ```
 46 | 
 47 | Or preferably:
 48 | 
 49 | ``` r
 50 | if (!require("pacman")) install.packages("pacman")
 51 | pacman::p_load(tidyverse, hablar)
 52 | ```
 53 | 
 54 | ## convert
 55 | 
 56 | The most useful function of `hablar` is maybe convert. convert helps the
 57 | user to quickly and dynamically change data type of columns in a data
 58 | frame. convert always converts factors to character before further
 59 | conversion. Works with `tidyselect`.
 60 | 
 61 | ``` r
 62 | mtcars %>% 
 63 |   convert(int(cyl, am),
 64 |           fct(disp:drat),
 65 |           chr(contains("w")))
 66 | ```
 67 | 
 68 |     #> # A tibble: 32 x 11
 69 |     #>     mpg   cyl disp  hp    drat  wt     qsec    vs    am  gear  carb
 70 |     #>   <dbl> <int> <fct> <fct> <fct> <chr> <dbl> <dbl> <int> <dbl> <dbl>
 71 |     #> 1  21       6 160   110   3.9   2.62   16.5     0     1     4     4
 72 |     #> 2  21       6 160   110   3.9   2.875  17.0     0     1     4     4
 73 |     #> 3  22.8     4 108   93    3.85  2.32   18.6     1     1     4     1
 74 |     #> 4  21.4     6 258   110   3.08  3.215  19.4     1     0     3     1
 75 |     #> # ... with 28 more rows
 76 | 
 77 | For more information type `vignette("convert")` in the console.
 78 | 
 79 | ## Non-Astonishing summary functions
 80 | 
 81 | Often summary function like min, max and mean return surprising results.
 82 | Combining `_` with your summary function ensures you that you will get a
 83 | result, if there is one in your data. It ignores irrational numbers like
 84 | `Inf` and `NaN` as well as `NA`. If all elements are `NA, Inf, NaN` it
 85 | returns NA.
 86 | 
 87 | ``` r
 88 | starwars %>% 
 89 |   summarise(min_height_baseR = min(height),
 90 |             min_height_hablar = min_(height))
 91 | ```
 92 | 
 93 |     #> # A tibble: 1 x 2
 94 |     #>   min_height_baseR min_height_hablar
 95 |     #>              <int>             <int>
 96 |     #> 1               NA                66
 97 | 
 98 | The function `min_` omitted that the variable `height` contained `NA`.
 99 | For more information type `vignette("s")` in the console.
100 | 
101 | ## Find the problem
102 | 
103 | When cleaning data you spend a lot of time understanding your data.
104 | Sometimes you get more row than you expected when doing a `left_join()`.
105 | Or you did not know that certain column contained missing values `NA` or
106 | irrational values like `Inf` or `NaN`.
107 | 
108 | In `hablar` the `find_*` functions speeds up your search for the
109 | problem. To find duplicated rows you simply `df %>% find_duplicates()`.
110 | You can also find duplicates in in specific columns, which can be useful
111 | before joins.
112 | 
113 | ``` r
114 | # Create df with duplicates
115 | df <- mtcars %>% 
116 |   bind_rows(mtcars %>% slice(1, 5, 9))
117 | 
118 | # Return rows with duplicates in cyl and am
119 | df %>% 
120 |   find_duplicates(cyl, am)
121 | ```
122 | 
123 |     #> # A tibble: 35 x 11
124 |     #>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
125 |     #>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
126 |     #> 1  21       6   160   110  3.9   2.62  16.5     0     1     4     4
127 |     #> 2  21       6   160   110  3.9   2.88  17.0     0     1     4     4
128 |     #> 3  22.8     4   108    93  3.85  2.32  18.6     1     1     4     1
129 |     #> 4  21.4     6   258   110  3.08  3.22  19.4     1     0     3     1
130 |     #> # ... with 31 more rows
131 | 
132 | There are also find functions for other cases. For example `find_na()`
133 | returns rows with missing values.
134 | 
135 | ``` r
136 | starwars %>% 
137 |   find_na(height)
138 | ```
139 | 
140 |     #> # A tibble: 6 x 14
141 |     #>   name     height  mass hair_color skin_color eye_color birth_year sex   gender 
142 |     #>   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr>  
143 |     #> 1 Arvel C~     NA    NA brown      fair       brown             NA male  mascul~
144 |     #> 2 Finn         NA    NA black      dark       dark              NA male  mascul~
145 |     #> 3 Rey          NA    NA brown      light      hazel             NA fema~ femini~
146 |     #> 4 Poe Dam~     NA    NA brown      light      brown             NA male  mascul~
147 |     #> # ... with 2 more rows, and 5 more variables: homeworld <chr>, species <chr>,
148 |     #> #   films <list>, vehicles <list>, starships <list>
149 | 
150 | If you rather want a Boolean value instead then
151 | e.g. `check_duplicates()` returns `TRUE` if the data frame contains
152 | duplicates, otherwise it returns `FALSE`.
153 | 
154 | ##### **…apply the solution**
155 | 
156 | Let’s say that we have found a problem is caused by missing values in
157 | the column `height` and you want to replace all missing values with the
158 | integer 100. `hablar` comes with an additional ways of doing if-or-else.
159 | 
160 | ``` r
161 | starwars %>% 
162 |   find_na(height) %>% 
163 |   mutate(height = if_na(height, 100L))
164 | ```
165 | 
166 |     #> # A tibble: 6 x 14
167 |     #>   name     height  mass hair_color skin_color eye_color birth_year sex   gender 
168 |     #>   <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> <chr>  
169 |     #> 1 Arvel C~    100    NA brown      fair       brown             NA male  mascul~
170 |     #> 2 Finn        100    NA black      dark       dark              NA male  mascul~
171 |     #> 3 Rey         100    NA brown      light      hazel             NA fema~ femini~
172 |     #> 4 Poe Dam~    100    NA brown      light      brown             NA male  mascul~
173 |     #> # ... with 2 more rows, and 5 more variables: homeworld <chr>, species <chr>,
174 |     #> #   films <list>, vehicles <list>, starships <list>
175 | 
176 | In the chunk above we successfully replaced all missing heights with the
177 | integer 100. `hablar` also contain the self explained:
178 | 
179 | -   `if_zero()` and `zero_if()`
180 | -   `if_inf()` and `inf_if()`
181 | -   `if_nan()` and `nan_if()`
182 | 
183 | which works in the same way as the examples above.
184 | 
185 | ## retype
186 | 
187 | A function for quick and dirty data type conversion. All columns are
188 | evaluated and converted to the simplest possible without loosing any
189 | information.
190 | 
191 | ``` r
192 | mtcars %>% retype()
193 | ```
194 | 
195 |     #> # A tibble: 32 x 11
196 |     #>     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
197 |     #>   <dbl> <int> <dbl> <int> <dbl> <dbl> <dbl> <int> <int> <int> <int>
198 |     #> 1  21       6   160   110  3.9   2.62  16.5     0     1     4     4
199 |     #> 2  21       6   160   110  3.9   2.88  17.0     0     1     4     4
200 |     #> 3  22.8     4   108    93  3.85  2.32  18.6     1     1     4     1
201 |     #> 4  21.4     6   258   110  3.08  3.22  19.4     1     0     3     1
202 |     #> # ... with 28 more rows
203 | 
204 | All variables with only integer were converted to type integer. For more
205 | information type `vignette("retype")` in the console.
206 | 
207 | ### Note
208 | 
209 | Hablar means ‘speak R’ in Spanish.
210 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | On latest Windows, Ubuntu, ubuntu-devel and Mac:
2 | 
3 | ## R CMD check results
4 | 
5 | 0 errors | 0 warnings | 0 note
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/hablar.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/man/aggregators.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{wrapper - s and summary funs}
 4 | \alias{wrapper - s and summary funs}
 5 | \alias{max_}
 6 | \alias{sum_}
 7 | \alias{mean_}
 8 | \alias{min_}
 9 | \alias{sd_}
10 | \alias{var_}
11 | \alias{first_}
12 | \alias{last_}
13 | \alias{median_}
14 | \alias{first_non_na}
15 | \alias{squeeze}
16 | \alias{squeeze_}
17 | \title{Combine aggregate functions and s}
18 | \usage{
19 | max_(.x, ignore_na = TRUE)
20 | 
21 | min_(.x, ignore_na = TRUE)
22 | 
23 | sum_(.x, ignore_na = TRUE)
24 | 
25 | mean_(.x, ignore_na = TRUE)
26 | 
27 | median_(.x, ignore_na = TRUE)
28 | 
29 | sd_(.x, ignore_na = TRUE)
30 | 
31 | var_(.x, ignore_na = TRUE)
32 | 
33 | first_(.x, ignore_na = TRUE)
34 | 
35 | last_(.x, ignore_na = TRUE)
36 | 
37 | first_non_na(.x)
38 | 
39 | squeeze(.x, ignore_na = FALSE)
40 | 
41 | squeeze_(.x, ignore_na = TRUE)
42 | }
43 | \arguments{
44 | \item{.x}{a single vector}
45 | 
46 | \item{ignore_na}{if false missing values are not omitted.}
47 | }
48 | \value{
49 | a single aggregated value
50 | }
51 | \description{
52 | \code{[summary function_*]} functions are simple wrappers of aggregate function
53 | and the \code{s} function. \code{s} removes all non-values,
54 | i.e. \code{NA,Inf,NaN}  from a vector.
55 | However, if the length is 0 it returns NA. The result is then passed to the
56 | corresponding aggregation function. For example,
57 | \code{min_(x)} is identical to \code{min(s(x))}. Please read \code{vignette("s")} for more information.
58 | }
59 | \details{
60 | 'first_non_na' is a faster version of 'first' since it only search for a non NA value until it finds one.
61 | 'squeeze' on the other hand checks if all elements are equal and then returns only that value.
62 | }
63 | \examples{
64 | ## sum_ on non-rational numeric vector
65 | vector <- c(7, NaN, -Inf, 4)
66 | sum_(vector)
67 | 
68 | ## Min of vector with length 0
69 | vector <- c()
70 | # With a wrapped s
71 | min_(vector)
72 | 
73 | ## Max of vector with only NA
74 | # With a wrapped s
75 | max_(vector)
76 | 
77 | ## Use of s when NA should not be removed
78 | vector <- c(7, Inf, NA, 4)
79 | # With a wrapped s
80 | sum_(vector, ignore_na = FALSE)
81 | 
82 | }
83 | \seealso{
84 | \code{vignette("convert")}, \code{vignette("hablar")}
85 | }
86 | 


--------------------------------------------------------------------------------
/man/as_reliable.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{as_reliable}
 4 | \alias{as_reliable}
 5 | \alias{as_reliable_num}
 6 | \alias{as_reliable_int}
 7 | \alias{as_reliable_lgl}
 8 | \alias{as_reliable_dte}
 9 | \alias{as_reliable_dtm}
10 | \title{Reliable conversion to another data type}
11 | \usage{
12 | as_reliable_num(.x, ...)
13 | 
14 | as_reliable_int(.x, ...)
15 | 
16 | as_reliable_lgl(.x, ...)
17 | 
18 | as_reliable_dte(.x, origin = "1970-01-01", ...)
19 | 
20 | as_reliable_dtm(.x, origin = "1970-01-01", tz = "UTC", ...)
21 | 
22 | as_reliable_int(.x, ...)
23 | 
24 | as_reliable_lgl(.x, ...)
25 | 
26 | as_reliable_dte(.x, origin = "1970-01-01", ...)
27 | 
28 | as_reliable_dtm(.x, origin = "1970-01-01", tz = "UTC", ...)
29 | }
30 | \arguments{
31 | \item{.x}{vector}
32 | 
33 | \item{...}{additional arguments}
34 | 
35 | \item{origin}{argument to set origin for date/date time.}
36 | 
37 | \item{tz}{argument to set time zone for date/date time. Default is UTC.}
38 | }
39 | \value{
40 | vector
41 | }
42 | \description{
43 | Support functions for the \code{convert} function. These functions coerces vectors to a new data type, e.g. \code{as.numeric}
44 | except that it converts factors to character first.
45 | See \code{\link{convert}} for more information.
46 | }
47 | \examples{
48 | x <- as.factor(c("1", "3.5"))
49 | as_reliable_num(x)
50 | 
51 | x <- as.factor(c("9", "7"))
52 | as_reliable_int(x)
53 | 
54 | x <- as.factor(c("1", "0"))
55 | as_reliable_lgl(x)
56 | 
57 | }
58 | \seealso{
59 | \code{vignette("convert")}, \code{vignette("hablar")}
60 | }
61 | 


--------------------------------------------------------------------------------
/man/check_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{check_df}
 4 | \alias{check_df}
 5 | \alias{check_duplicates}
 6 | \alias{check_na}
 7 | \alias{check_irrational}
 8 | \alias{check_nan}
 9 | \alias{check_inf}
10 | \alias{check_complete_set}
11 | \title{Special checks}
12 | \usage{
13 | check_duplicates(.data, ...)
14 | 
15 | check_na(.data, ...)
16 | 
17 | check_irrational(.data, ...)
18 | 
19 | check_nan(.data, ...)
20 | 
21 | check_inf(.data, ...)
22 | 
23 | check_complete_set(.data, ...)
24 | }
25 | \arguments{
26 | \item{.data}{a data frame}
27 | 
28 | \item{...}{variables that should be considered. If empty, all variables are used.}
29 | }
30 | \value{
31 | TRUE or FALSE
32 | }
33 | \description{
34 | Returns TRUE if data frame have the specified special cases. For example, find_duplicates() returns
35 | TRUE if any rows are duplicates. If variables are passed to the function
36 | then TRUE or FALSE is returned for those variables.
37 | }
38 | \details{
39 | irrational values are Inf and NaN. 'check_complete_set' tests 
40 | if all combinations of elements exists in the data frame.
41 | }
42 | \examples{
43 | \dontrun{
44 | df <- data.frame(a = c("A", NA, "B", "C", "C"),
45 |                  b = c(7, 8, 2, 3, 3),
46 |                  c = c(NA, 1, NaN, 3, 2),
47 |                  stringsAsFactors = FALSE)
48 | 
49 | # Returns FALSE because there is no duplicates
50 | df \%>\% check_duplicates()
51 | 
52 | # Returns TRUE because there is duplicates in column a through b
53 | df \%>\% check_duplicates(a:b)
54 | 
55 | # Returns FALSE because there is no NA column b
56 | df \%>\% check_na(b)
57 | 
58 | # Returns TRUE because there is no NaN column c
59 | df \%>\% check_nan(c)
60 | }
61 | 
62 | }
63 | \seealso{
64 | \code{\link{find_in_df}} to return rows instead of TRUE or FALSE. 
65 | \code{vignette("s")}, \code{vignette("hablar")}
66 | }
67 | 


--------------------------------------------------------------------------------
/man/convert.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{convert}
 4 | \alias{convert}
 5 | \alias{num}
 6 | \alias{dbl}
 7 | \alias{int}
 8 | \alias{dte}
 9 | \alias{dtm}
10 | \alias{fct}
11 | \alias{lgl}
12 | \alias{\code{convert}}
13 | \alias{converts}
14 | \alias{columns}
15 | \alias{to}
16 | \alias{new}
17 | \alias{classes}
18 | \alias{through}
19 | \alias{scoping}
20 | \alias{functions.}
21 | \alias{Always}
22 | \alias{factors}
23 | \alias{character}
24 | \alias{before}
25 | \alias{conversion}
26 | \alias{Type}
27 | \alias{\code{vignette("convert")}}
28 | \alias{in}
29 | \alias{the}
30 | \alias{console}
31 | \alias{for}
32 | \alias{more}
33 | \alias{information.}
34 | \alias{chr}
35 | \title{Convert data type of columns}
36 | \usage{
37 | num(..., .args = list())
38 | 
39 | chr(..., .args = list())
40 | 
41 | lgl(..., .args = list())
42 | 
43 | int(..., .args = list())
44 | 
45 | dbl(..., .args = list())
46 | 
47 | fct(..., .args = list())
48 | 
49 | dtm(..., .args = list())
50 | 
51 | dte(..., .args = list())
52 | 
53 | convert(.x, ...)
54 | }
55 | \arguments{
56 | \item{...}{Scoping functions, see details}
57 | 
58 | \item{.args}{extra argument to be passed to support function.}
59 | 
60 | \item{.x}{A data.frame}
61 | }
62 | \value{
63 | a tbl data frame
64 | }
65 | \description{
66 | Convert data type of columns
67 | }
68 | \examples{
69 | \dontrun{
70 | 
71 | # Change one column to numeric and another to character
72 | mtcars \%>\% 
73 |   convert(num(gear),
74 |           chr(mpg))
75 | 
76 | 
77 | # Changing multiple data types on multiple columns
78 | mtcars \%>\% 
79 |   convert(int(hp,
80 |               wt),
81 |           fct(qsec,
82 |               cyl,
83 |               drat))
84 | 
85 | # Also works with tidyselect convenience functions
86 | mtcars \%>\%
87 |   convert(int(vs:carb),
88 |           fct(last_col()))
89 | 
90 | }
91 | 
92 | }
93 | \seealso{
94 | \code{vignette("convert")}, \code{vignette("hablar")}
95 | }
96 | 


--------------------------------------------------------------------------------
/man/could_this_be_that.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{could_this_be_that}
 4 | \alias{could_this_be_that}
 5 | \alias{could_chr_be_num}
 6 | \alias{could_chr_be_int}
 7 | \alias{could_num_be_int}
 8 | \alias{could_chr_be_dtm}
 9 | \alias{could_dtm_be_dte}
10 | \alias{could_chr_be_dte}
11 | \title{Tests is a vector could be of another data type}
12 | \usage{
13 | could_chr_be_num(.x)
14 | 
15 | could_chr_be_int(.x)
16 | 
17 | could_num_be_int(.x)
18 | 
19 | could_chr_be_dtm(.x)
20 | 
21 | could_chr_be_dte(.x)
22 | }
23 | \arguments{
24 | \item{.x}{vector of the data type that should be tested.}
25 | }
26 | \value{
27 | TRUE or FALSE
28 | }
29 | \description{
30 | Tests if vector could be a another data type without errors.
31 | }
32 | \details{
33 | The name logic of \code{could_chr_be_num} should be interpreted as:
34 | Could this character vector be a numeric vector?
35 | The same logic goes for all functions named could_this_be_that.
36 | }
37 | \examples{
38 | x <- c("1", "3", "7")
39 | could_chr_be_num(x)
40 | could_chr_be_int(x)
41 | 
42 | x <- c("abc", "3", "Hello world")
43 | could_chr_be_num(x)
44 | 
45 | x <- c(NA, "3.45", "5,98")
46 | could_chr_be_num(x)
47 | could_chr_be_int(x)
48 | 
49 | x <- as.numeric(c(3.45, 1.5))
50 | could_num_be_int(x)
51 | 
52 | x <- as.numeric(c(7, 2))
53 | could_num_be_int(x)
54 | 
55 | }
56 | \seealso{
57 | \code{vignette("s")}, \code{vignette("hablar")}
58 | }
59 | 


--------------------------------------------------------------------------------
/man/create_dummy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{create_dummy}
 4 | \alias{create_dummy}
 5 | \alias{dummy}
 6 | \alias{dummy_}
 7 | \title{Create a simple dummy}
 8 | \usage{
 9 | dummy(condition, missing = NA)
10 | dummy_(condition, missing = 0L)
11 | 
12 | dummy_(condition, missing = 0L)
13 | }
14 | \arguments{
15 | \item{condition}{a predicament}
16 | 
17 | \item{missing}{a replacement if condition is NA}
18 | }
19 | \value{
20 | a vector of the integers 1, 0 and NA (if not dummy_ is used).
21 | }
22 | \description{
23 | Creates a vector of the integers 1 and 0. If condition is true it returns 1. If false 0.
24 | If condition returns NA it returns NA, if not explicitly not stated than NA
25 | should be replaced.
26 | }
27 | \examples{
28 | v <- c(10, 5, 3, NA, 9)
29 | dummy(v > 5)
30 | dummy_(v > 5)
31 | 
32 | }
33 | \seealso{
34 | \code{vignette("hablar")}
35 | }
36 | 


--------------------------------------------------------------------------------
/man/cumulative_.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{cumulative_}
 4 | \alias{cumulative_}
 5 | \alias{cumsum_}
 6 | \alias{cummean_}
 7 | \alias{cum_unique}
 8 | \alias{cum_unique_}
 9 | \title{cumulative_}
10 | \usage{
11 | cumsum_(.v, ignore_na = TRUE)
12 | 
13 | cummean_(.v, ignore_na = TRUE)
14 | 
15 | cum_unique(.v, ignore_na = FALSE)
16 | 
17 | cum_unique_(.v, ignore_na = TRUE)
18 | }
19 | \arguments{
20 | \item{.v}{a vector}
21 | 
22 | \item{ignore_na}{should missing values be ignores?}
23 | }
24 | \value{
25 | a vector
26 | }
27 | \description{
28 | cumulative functions. 'cumsum_' is the cumulative sum ignoring missing values.
29 | 'cum_unique' counts the cumulative unique value including NA as ONE value. 
30 | 'cum_unique_' ignores missing values
31 | }
32 | 


--------------------------------------------------------------------------------
/man/find_in_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{find_in_df}
 4 | \alias{find_in_df}
 5 | \alias{find_duplicates}
 6 | \alias{find_na}
 7 | \alias{find_irrational}
 8 | \alias{find_nan}
 9 | \alias{find_inf}
10 | \title{Special filters}
11 | \usage{
12 | find_duplicates(.data, ...)
13 | 
14 | find_na(.data, ...)
15 | 
16 | find_irrational(.data, ...)
17 | 
18 | find_nan(.data, ...)
19 | 
20 | find_inf(.data, ...)
21 | }
22 | \arguments{
23 | \item{.data}{a data frame}
24 | 
25 | \item{...}{variables that should be considered. If empty, all variables are used.}
26 | }
27 | \value{
28 | a filtered data frame
29 | }
30 | \description{
31 | Filters a data frame for special cases. For example, find_duplicates() returns
32 | all rows that are duplicates. If variables are passed to the function
33 | then duplicates for those variables are returned.
34 | }
35 | \details{
36 | irrational values are Inf and NaN
37 | }
38 | \examples{
39 | \dontrun{
40 | df <- data.frame(a = c("A", NA, "B", "C", "C"),
41 |                  b = c(NA, 1, 1, 3, 3), 
42 |                  c = c(7, 8, 2, 3, 3), 
43 |                  stringsAsFactors = FALSE)
44 | 
45 | # Returns duplicated rows
46 | df \%>\% find_duplicates()
47 | 
48 | # Returns duplicates in specific variables
49 | df \%>\% find_duplicates(b:c)
50 | 
51 | # Returns rows where NA in variable b
52 | df \%>\% find_na(b)
53 | }
54 | 
55 | }
56 | \seealso{
57 | \code{vignette("s")}, \code{vignette("hablar")}
58 | 
59 | \code{\link{check_df}} to return TRUE or FALSE instead of rows.
60 | }
61 | 


--------------------------------------------------------------------------------
/man/given.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{given}
 4 | \alias{given}
 5 | \alias{given_}
 6 | \title{given}
 7 | \usage{
 8 | given(.x, .y, ignore_na = FALSE)
 9 | 
10 | given_(.x, .y, ignore_na = TRUE)
11 | }
12 | \arguments{
13 | \item{.x}{the vector to filter}
14 | 
15 | \item{.y}{a logical vector to filter with}
16 | 
17 | \item{ignore_na}{should NA be removed?}
18 | }
19 | \value{
20 | a vector
21 | }
22 | \description{
23 | Simple function that filters a vector while helping with missing values.
24 | Replacing expression like 'x[x > 3 & !is.null(x)]'
25 | }
26 | \examples{
27 | \dontrun{
28 | x <- c(1, 2, NA, 4)
29 | x \%>\% given_(x >= 2)
30 | }
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/man/if_else_.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{if_else_}
 4 | \alias{if_else_}
 5 | \title{if_this_else_that_}
 6 | \usage{
 7 | if_else_(condition, true, false, missing = NULL)
 8 | }
 9 | \arguments{
10 | \item{condition}{logical vector}
11 | 
12 | \item{true}{value to replace if condition is true. Must be same length as condition or 1.}
13 | 
14 | \item{false}{value to replace if condition is false. Must be same length as condition or 1.}
15 | 
16 | \item{missing}{optional. a replacement if condition returns NA. Must be same length as condition or 1.}
17 | }
18 | \value{
19 | a vector
20 | }
21 | \description{
22 | A vectorised if or else function. It checks that the true or false (or the optional missing)
23 | arguments have the same type. However it accepts a generic NA. Built upon
24 | dplyr's [if_else()] function. The only difference is that the user do not have to specify
25 | the type of NA. if_else_ is faster than base [ifelse()] and a tad slower than 
26 | dplyr's [if_else()]. Attributes are taken from either true or false because one
27 | generic NA.
28 | }
29 | \details{
30 | If the returning vector have attributes (e.g. for factors) it returns the attributes
31 | for the first non-generic NA in the order true, false and then missing.
32 | }
33 | \examples{
34 | 
35 | v <- c(TRUE, FALSE, TRUE, FALSE)
36 | if_else_(v, "true", "false")
37 | 
38 | v <- c(TRUE, FALSE, NA, FALSE)
39 | if_else_(v, 1, NA, 999)
40 | 
41 | }
42 | \seealso{
43 | \code{vignette("s")}, \code{vignette("hablar")}
44 | }
45 | 


--------------------------------------------------------------------------------
/man/math.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{math ignore NA in math funs}
 4 | \alias{math ignore NA in math funs}
 5 | \alias{\%minus_\%}
 6 | \alias{\%plus_\%}
 7 | \title{Ignore NA in math}
 8 | \usage{
 9 | .x \%minus_\% .y
10 | 
11 | .x \%plus_\% .y
12 | }
13 | \arguments{
14 | \item{.x}{numeric or integer element}
15 | 
16 | \item{.y}{numeric or integer element}
17 | }
18 | \value{
19 | a single value
20 | }
21 | \description{
22 | Simplifying math functions are simple wrappers of math function (- +).
23 | If any of the left-hand side or right-hand side is NA, Inf or NaN it 
24 | returns any rational value, if there is any. 
25 | 
26 | However, if the both values are irrational it returns NA. 
27 | The result is then passed to the
28 | corresponding math function.
29 | }
30 | \examples{
31 | \dontrun{# The simplest case
32 | 3 \%minus_\% 2
33 | 
34 | # But with NA it returns 3 as if the NA were zero
35 | 3 \%minus_\% NA
36 | 
37 | # It doesnt matter if the irrational number is on left- or right-hand.
38 | NA \%plus_\% 5
39 | }
40 | 
41 | }
42 | \seealso{
43 | \code{vignette("s")}, \code{vignette("hablar")}
44 | }
45 | 


--------------------------------------------------------------------------------
/man/n_unique.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{n_unique count unique elements}
 4 | \alias{n_unique count unique elements}
 5 | \alias{n_unique}
 6 | \alias{n_unique_}
 7 | \title{n_unique}
 8 | \usage{
 9 | n_unique(.x, ignore_na = FALSE)
10 | 
11 | n_unique_(.x, ignore_na = TRUE)
12 | 
13 | n_unique_(.x, ignore_na = TRUE)
14 | }
15 | \arguments{
16 | \item{.x}{a vector}
17 | 
18 | \item{ignore_na}{a logical indicating whether missing values should be removed}
19 | }
20 | \value{
21 | a single numeric vector of the same length as the data frame it
22 | is applied to.
23 | }
24 | \description{
25 | Simple wrapper for length(unique(.x)). If you use n_unique_(.x) then NA
26 | is ignored when counting.
27 | }
28 | \examples{
29 | 
30 | # Simple
31 | n_unique(c(1, 2, 2, 3))
32 | 
33 | # Same result as above eventhough vector includes NA
34 | n_unique_(c(1, 2, 2, 3, NA))
35 | 
36 | }
37 | \seealso{
38 | \code{vignette("s")}, \code{vignette("hablar")}
39 | }
40 | 


--------------------------------------------------------------------------------
/man/rationalize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{rationalize}
 4 | \alias{rationalize}
 5 | \alias{rationalize.default}
 6 | \alias{rationalize.numeric}
 7 | \alias{rationalize.data.frame}
 8 | \title{Only allow rational values in numeric vectors
 9 | 
10 | \code{rationalize} transforms all numeric elements to be rational values or NA,
11 | thus removes all \code{NaN,Inf} and replaces them with \code{NA}.}
12 | \usage{
13 | rationalize(.x, ...)
14 | 
15 | \method{rationalize}{default}(.x, ...)
16 | 
17 | \method{rationalize}{numeric}(.x, ...)
18 | 
19 | \method{rationalize}{data.frame}(.x, ...)
20 | }
21 | \arguments{
22 | \item{.x}{vector or data.frame}
23 | 
24 | \item{...}{columns to be evaluated. Only applicable if .x is a data frame.}
25 | }
26 | \value{
27 | For vectors: same data type/class as .x.
28 | 
29 | For data.frame: a tbl data frame.
30 | 
31 | \code{NULL}
32 | 
33 | \code{NULL}
34 | 
35 | \code{NULL}
36 | }
37 | \description{
38 | Only allow rational values in numeric vectors
39 | 
40 | \code{rationalize} transforms all numeric elements to be rational values or NA,
41 | thus removes all \code{NaN,Inf} and replaces them with \code{NA}.
42 | }
43 | \details{
44 | #' If a non-numeric vector is passed, it is unchanged. If a data.frame is
45 | passed, it evaluates all columns separately.
46 | }
47 | \examples{
48 | x <- c(3, -Inf, 6.56, 9.3, NaN, 5, -Inf)
49 | rationalize(x)
50 | 
51 | df <- data.frame(num_col = c(Inf, 3, NaN), 
52 |                  chr_col = c("a", "b", "c"), 
53 |                  stringsAsFactors = FALSE)
54 | df
55 | rationalize(df)
56 | 
57 | }
58 | \seealso{
59 | \code{\link{s}}, \code{\link{rationalize}}, \code{vignette("s")}, \code{vignette("hablar")}
60 | }
61 | 


--------------------------------------------------------------------------------
/man/repeat_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{repeat_df}
 4 | \alias{repeat_df}
 5 | \title{repeat_df}
 6 | \usage{
 7 | repeat_df(.df, n, id = NULL)
 8 | }
 9 | \arguments{
10 | \item{.df}{a data frame}
11 | 
12 | \item{n}{times the data frame should be repeated}
13 | 
14 | \item{id}{a character element that creates a column with a number for each repetition}
15 | }
16 | \value{
17 | a vector of the integers 1, 0 and NA (if not dummy_ is used).
18 | }
19 | \description{
20 | Repeats a data frame n times. Useful for testing on large data frames.
21 | }
22 | \examples{
23 | repeat_df(mtcars, 2)
24 | 
25 | }
26 | \seealso{
27 | \code{vignette("hablar")}
28 | }
29 | 


--------------------------------------------------------------------------------
/man/replacers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{replacers}
 4 | \alias{replacers}
 5 | \alias{if_na}
 6 | \alias{if_inf}
 7 | \alias{if_nan}
 8 | \alias{if_zero}
 9 | \alias{na_if}
10 | \alias{inf_if}
11 | \alias{nan_if}
12 | \alias{zero_if}
13 | \alias{if_not_na}
14 | \title{replacemnt and specials}
15 | \usage{
16 | if_na(.x, replacement, missing = NULL)
17 | 
18 | if_nan(.x, replacement, missing = NULL)
19 | 
20 | if_inf(.x, replacement, missing = NULL)
21 | 
22 | if_zero(.x, replacement, missing = NULL)
23 | 
24 | na_if(.x, condition, replace_na = FALSE)
25 | 
26 | nan_if(.x, condition, replace_na = FALSE)
27 | 
28 | inf_if(.x, condition, replace_na = FALSE)
29 | 
30 | zero_if(.x, condition, replace_na = FALSE)
31 | 
32 | if_not_na(.x, replacement, missing = NULL)
33 | 
34 | if_inf(.x, replacement, missing = NULL)
35 | 
36 | if_nan(.x, replacement, missing = NULL)
37 | 
38 | if_zero(.x, replacement, missing = NULL)
39 | 
40 | na_if(.x, condition, replace_na = FALSE)
41 | 
42 | inf_if(.x, condition, replace_na = FALSE)
43 | 
44 | nan_if(.x, condition, replace_na = FALSE)
45 | 
46 | zero_if(.x, condition, replace_na = FALSE)
47 | }
48 | \arguments{
49 | \item{.x}{a vector}
50 | 
51 | \item{replacement}{a replacement if condition is TRUE}
52 | 
53 | \item{missing}{a value that replace missing values in condition.}
54 | 
55 | \item{condition}{a predicament}
56 | 
57 | \item{replace_na}{if TRUE, missing values in condition will be replaced as well}
58 | }
59 | \value{
60 | a vector
61 | }
62 | \description{
63 | If-this-type-then replace with x. And the other way around; replace with x
64 | if this.
65 | }
66 | \examples{
67 | 
68 | v <- c(1, NA, 2)
69 | if_na(v, 100)
70 | 
71 | v <- c(999, NA, 2)
72 | zero_if(v, v == 999)
73 | 
74 | }
75 | \seealso{
76 | \code{vignette("s")}, \code{vignette("hablar")}
77 | }
78 | 


--------------------------------------------------------------------------------
/man/retype.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hablar.R
  3 | \name{retype}
  4 | \alias{retype}
  5 | \alias{retype.default}
  6 | \alias{retype.logical}
  7 | \alias{retype.integer}
  8 | \alias{retype.Date}
  9 | \alias{retype.POSIXct}
 10 | \alias{retype.numeric}
 11 | \alias{retype.list}
 12 | \alias{retype.data.frame}
 13 | \title{Return simple data types}
 14 | \usage{
 15 | retype(.x, ...)
 16 | 
 17 | \method{retype}{default}(.x, ...)
 18 | 
 19 | \method{retype}{logical}(.x, ...)
 20 | 
 21 | \method{retype}{integer}(.x, ...)
 22 | 
 23 | \method{retype}{Date}(.x, ...)
 24 | 
 25 | \method{retype}{POSIXct}(.x, ...)
 26 | 
 27 | \method{retype}{numeric}(.x, ...)
 28 | 
 29 | \method{retype}{list}(.x, ...)
 30 | 
 31 | \method{retype}{data.frame}(.x, ...)
 32 | }
 33 | \arguments{
 34 | \item{.x}{vector or data.frame}
 35 | 
 36 | \item{...}{column names to be evaluated. Only if .x is a data frame.}
 37 | }
 38 | \value{
 39 | For vectors: simple class of .x.
 40 | 
 41 | For data.frame: a tbl data frame with simple classes.
 42 | 
 43 | \code{NULL}
 44 | 
 45 | \code{NULL}
 46 | 
 47 | \code{NULL}
 48 | 
 49 | \code{NULL}
 50 | 
 51 | \code{NULL}
 52 | 
 53 | \code{NULL}
 54 | 
 55 | \code{NULL}
 56 | 
 57 | \code{NULL}
 58 | }
 59 | \description{
 60 | \code{retype} transforms all elements into simple classes. The simple classes
 61 | are date, numeric and character. By transforming all elements to these
 62 | classes no information is lost, while simplifying the object. See details below for
 63 | more information or type \code{vignette("retype")} in the console.
 64 | }
 65 | \details{
 66 | Each vector past to \code{retype} is reclassified into the highest position in
 67 | a simplification hierarchy without loosing any information. This means that:
 68 | Factors are converted to characters.
 69 | However, character vectors (or vectors changed to character initially)
 70 | are checked to see if they could be a numeric vector without error.
 71 | If so, it is transformed into a numeric vector which is higher in the hierarchy.
 72 | Vectors of class logical, integer are changed to numerical.
 73 | Dates and date time (POSIXct) goes through the same procedure.
 74 | Lists and complex vectors are left unchanged because the are neither simple nor complicated.
 75 | }
 76 | \examples{
 77 | # Dates
 78 | dte <- as.Date(c("2018-01-01", "2016-03-21", "1970-01-05"))
 79 | retype(dte)
 80 | retype(dte)
 81 | 
 82 | # Factors
 83 | fct <- as.factor(c("good", "bad", "average"))
 84 | retype(dte)
 85 | 
 86 | # Character that only contains numeric elements
 87 | num_chr <- c("3","4.0", "3,5")
 88 | retype(num_chr)
 89 | 
 90 | # Logical
 91 | lgl <- c(TRUE, FALSE, TRUE)
 92 | retype(lgl)
 93 | 
 94 | # Data frame with all the above vectors
 95 | df <- data.frame(dte = dte, 
 96 |                  fct = fct, 
 97 |                  num_chr = num_chr, 
 98 |                  lgl = lgl, 
 99 |                  stringsAsFactos = FALSE)
100 | df
101 | retype(df)
102 | 
103 | }
104 | \seealso{
105 | \code{\link{s}}, \code{\link{rationalize}} #' \code{vignette("retype")}, \code{vignette("s")}, \code{vignette("hablar")}
106 | }
107 | 


--------------------------------------------------------------------------------
/man/s.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{s}
 4 | \alias{s}
 5 | \title{Make vector shorter and simpler}
 6 | \usage{
 7 | s(.x, ignore_na = TRUE)
 8 | }
 9 | \arguments{
10 | \item{.x}{one vector. Does not work for factors.}
11 | 
12 | \item{ignore_na}{if TRUE then NA omitted from results, as long as any non-NA element is left.}
13 | }
14 | \value{
15 | a shortened and simplified vector
16 | }
17 | \description{
18 | \code{s} means simple and short. It removes all non-values, i.e. \code{NA,Inf,NaN}  from a vector.
19 | However, if the length is 0 it returns NA.
20 | It is useful in combination with summary functions, e.g. mean, sum or min, when
21 | an answer is desired, if there is one in the data. In any other case NA is returned. 
22 | Type \code{vignette("s")} in the console for more information.
23 | }
24 | \examples{
25 | \dontrun{
26 | library(dplyr)
27 | 
28 | ## s on a weird numeric vector
29 | vector <- c(7, NaN, 6, -Inf, 5, 4, NA)
30 | s(vector)
31 | 
32 | ## Sum vector with non-rational values
33 | vector <- c(7, NaN, -Inf, 4)
34 | # Base R
35 | sum(vector)
36 | # With s
37 | sum(s(vector))
38 | 
39 | ## Max of vector with only NA
40 | # Base R
41 | max(vector, na.rm = TRUE)
42 | # With s
43 | max(s(vector))
44 | 
45 | ## First of vector when NA is first element
46 | vector <- c(NA, "X", "Y")
47 | # dplyr R
48 | first(vector)
49 | # With s
50 | first(s(vector))
51 | 
52 | ## Use of s when NA should not be removes
53 | vector <- c(7, Inf, NA, 4)
54 | # Base R
55 | sum(vector)
56 | # With s
57 | sum(s(vector, ignore_na = FALSE))
58 | 
59 | ## s when summarizing a weird data.frame
60 | df_test <- data.frame(a = c(NaN, 1, -Inf, 3), 
61 |                       b = c(NA, "Q", "P", "P"), 
62 |                       c = c(NA, NA, NA, NA), 
63 |                       stringsAsFactors = FALSE) 
64 | df_test
65 | 
66 | # Base R aggregation with dplyr's summarize
67 | summarise(df_test, mean_a = mean(a), 
68 |                    min_c = min(c, na.rm = TRUE))
69 | # With s
70 | summarise(df_test, mean_a = mean(s(a)), 
71 |                    min_c = min(s(c)))
72 | }
73 | }
74 | \seealso{
75 | \code{\link{retype}}, \code{\link{rationalize}}, \code{vignette("s")}, \code{vignette("hablar")}
76 | }
77 | 


--------------------------------------------------------------------------------
/man/set_wd_to_script_path.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{set_wd_to_script_path}
 4 | \alias{set_wd_to_script_path}
 5 | \title{Set wd to script path}
 6 | \usage{
 7 | set_wd_to_script_path()
 8 | }
 9 | \value{
10 | NULL. In the background the working directory has changed if not any errors occurred.
11 | }
12 | \description{
13 | Sets working directory to the path where the R-script is located. Only works
14 | inside [Rstudio] and in a script (i.e. not in the console). Additionally, the R-script needs to
15 | be saved in a path to work.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/this_date.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hablar.R
 3 | \name{this_date}
 4 | \alias{this_date}
 5 | \alias{this_day}
 6 | \alias{this_month}
 7 | \alias{this_year}
 8 | \title{this_date}
 9 | \usage{
10 | this_day()
11 | 
12 | this_month()
13 | 
14 | this_year()
15 | }
16 | \value{
17 | a date or number
18 | }
19 | \description{
20 | Returns the current day, month or year. Day and month returns dates and year a 4 digit number.
21 | }
22 | \examples{
23 | this_day() 
24 | this_month() 
25 | this_year() 
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(hablar)
3 | test_check("hablar")
4 | # Keep order of hablar.R for tests
5 | 


--------------------------------------------------------------------------------
/tests/testthat/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidsjoberg/hablar/6698fd2973ec6cb99ce7637d899cfc3040c0da74/tests/testthat/.DS_Store


--------------------------------------------------------------------------------
/tests/testthat/test.check_df.R:
--------------------------------------------------------------------------------
  1 | library(testthat)
  2 | library(hablar)
  3 | library(dplyr)
  4 | 
  5 | context("check_df")
  6 | 
  7 | test_that("check_duplicates", {
  8 |   expect_equal(tibble(a = c(1, 1, 2),
  9 |                           b = c(1, 1, NA),
 10 |                           c = c(3, 3, NA)) %>% check_duplicates(),
 11 |                TRUE
 12 |   )
 13 |   expect_equal(tibble(a = c(1, 1, 2),
 14 |                           b = c(1, 1, NA),
 15 |                           c = c(3, 3, NA)) %>% check_duplicates(a, b),
 16 |                TRUE
 17 |   )
 18 |   expect_equal(tibble(a = c(1, 1, 2),
 19 |                           b = c(1, 1, NA),
 20 |                           c = c(3, 3, NA)) %>% check_duplicates(-c),
 21 |                tibble(a = c(1, 1, 2),
 22 |                           b = c(1, 1, NA),
 23 |                           c = c(3, 3, NA)) %>% check_duplicates(a, b)
 24 |   )
 25 |   expect_equal(tibble(a = c(1, 1, 2),
 26 |                           b = c(1, 1, NA),
 27 |                           c = c(3, 3, NA)) %>% check_duplicates(a:b),
 28 |                TRUE
 29 |   )
 30 |   expect_equal(tibble(a = c(1, 1, 2),
 31 |                           b = c(1, 1, NA),
 32 |                           c = c(3, 3, NA)) %>% check_duplicates(),
 33 |                TRUE
 34 |   )
 35 |   expect_equal(tibble(a = c(1, 2, 2),
 36 |                           b = c(1, 1, NA),
 37 |                           c = c(3, 3, NA)) %>% check_duplicates(),
 38 |                FALSE
 39 |   )
 40 |   expect_equal(tibble(a = c(1, 1, 2),
 41 |                       b = c(1, 1, NA),
 42 |                       c = c(3, 3, NA)) %>% check_duplicates(),
 43 |                TRUE
 44 |   )
 45 |   
 46 |   expect_error(c(1, 2) %>% check_duplicates())
 47 | })
 48 | 
 49 | test_that("check_na", {
 50 |   expect_equal(tibble(a = c(1, 1, 2),
 51 |                           b = c(1, 1, NA),
 52 |                           c = c(3, 3, NA)) %>% check_na(),
 53 |                TRUE
 54 |   )
 55 |   expect_equal(tibble(a = c(1, 1, 2),
 56 |                           b = c(1, 1, NA),
 57 |                           c = c(3, 3, NA)) %>% check_na(b),
 58 |                TRUE
 59 |   )
 60 |   expect_equal(tibble(a = c(1, 1, 2),
 61 |                           b = c(1, 1, NA),
 62 |                           c = c(3, 3, NA)) %>% check_na(a),
 63 |                FALSE
 64 |   )
 65 |   expect_equal(tibble(a = c(1, 1, 2),
 66 |                           b = c(1, 1, NA),
 67 |                           c = c(3, 3, NA)) %>% check_na(-b),
 68 |                tibble(a = c(1, 1, 2),
 69 |                           b = c(1, 1, NA),
 70 |                           c = c(3, 3, NA)) %>% check_na(a, c)
 71 |   )
 72 |   expect_equal(tibble(a = c(1, 1, 2),
 73 |                           b = c(1, 1, NA),
 74 |                           c = c(3, 3, NA)) %>% check_na(a:b),
 75 |                TRUE
 76 |   )
 77 |   
 78 |   expect_error(c(1, 2) %>% check_na())
 79 | })
 80 | 
 81 | test_that("check_nan", {
 82 |   expect_equal(tibble(a = c(1, 1, 2),
 83 |                           b = c(1, 1, NA),
 84 |                           c = c(3, 3, NaN)) %>% check_nan(),
 85 |                TRUE
 86 |   )
 87 |   expect_equal(tibble(a = c(1, 1, 2),
 88 |                           b = c(1, 1, NA),
 89 |                           c = c(3, 3, NaN)) %>% check_nan(b),
 90 |                FALSE
 91 |   )
 92 |   expect_equal(tibble(a = c(NaN, 1, 2),
 93 |                           b = c(1, 1, NA),
 94 |                           c = c(3, 3, NA)) %>% check_nan(a),
 95 |                TRUE
 96 |   )
 97 |   expect_equal(tibble(a = c(NaN, 1, 2),
 98 |                           b = c(1, 1, NA),
 99 |                           c = c(3, 3, NA)) %>% check_nan(-b),
100 |                TRUE
101 |   )
102 |   expect_equal(tibble(a = c(1, 1, 2),
103 |                           b = c(1, NaN, NA),
104 |                           c = c(3, 3, NA)) %>% check_nan(a:b),
105 |                TRUE
106 |   )
107 |   
108 |   expect_error(c(1, 2, NaN) %>% check_nan())
109 | })
110 | 
111 | test_that("check_inf", {
112 |   expect_equal(tibble(a = c(1, 1, 2),
113 |                           b = c(1, 1, NA),
114 |                           c = c(3, 3, Inf)) %>% check_inf(),
115 |                TRUE
116 |   )
117 |   expect_equal(tibble(a = c(1, 1, 2),
118 |                           b = c(1, 1, NA),
119 |                           c = c(3, 3, Inf)) %>% check_inf(b),
120 |                FALSE
121 |   )
122 |   expect_equal(tibble(a = c(Inf, 1, 2),
123 |                           b = c(1, 1, NA),
124 |                           c = c(3, 3, NA)) %>% check_inf(a),
125 |                TRUE
126 |   )
127 |   expect_equal(tibble(a = c(Inf, 1, 2),
128 |                           b = c(1, 1, NA),
129 |                           c = c(3, 3, NA)) %>% check_inf(-b),
130 |                TRUE
131 |   )
132 |   expect_equal(tibble(a = c(1, 1, 2),
133 |                           b = c(1, Inf, NA),
134 |                           c = c(3, 3, NA)) %>% check_inf(a:b),
135 |                TRUE
136 |   )
137 |   
138 |   expect_error(c(1, 2, Inf) %>% check_inf())
139 | })
140 | 
141 | test_that("check_irrational", {
142 |   expect_equal(tibble(a = c(1, 1, 2),
143 |                           b = c(1, 1, NA),
144 |                           c = c(3, 3, Inf)) %>% check_irrational(),
145 |                TRUE
146 |   )
147 |   expect_equal(tibble(a = c(1, 1, 2),
148 |                           b = c(1, 1, NA),
149 |                           c = c(3, 3, Inf)) %>% check_irrational(b),
150 |                FALSE
151 |   )
152 |   expect_equal(tibble(a = c(Inf, 1, 2),
153 |                           b = c(1, 1, NA),
154 |                           c = c(3, 3, NA)) %>% check_irrational(a),
155 |                TRUE
156 |   )
157 |   expect_equal(tibble(a = c(NaN, 1, 2),
158 |                           b = c(1, 1, NA),
159 |                           c = c(3, 3, NA)) %>% check_irrational(-b),
160 |                tibble(a = c(NaN, 1, 2),
161 |                           b = c(1, 1, NA),
162 |                           c = c(3, 3, NA)) %>% check_irrational(a, c)
163 |   )
164 |   expect_equal(tibble(a = c(1, 1, 2),
165 |                           b = c(1, Inf, NA),
166 |                           c = c(3, 3, NA)) %>% check_irrational(a:b),
167 |                TRUE
168 |   )
169 |   
170 |   expect_error(c(1, 2, Inf) %>% check_irrational())
171 | })
172 | 
173 | 
174 | test_that("check_complete_set", {
175 |   expect_equal(tibble(a = c(1, 2),
176 |                           b = c(3, 4)) %>% 
177 |                             check_complete_set(a, b), FALSE
178 |   )
179 |   expect_equal(tibble(a = c(1, 2),
180 |                           b = c(3, 4)) %>% 
181 |                             check_complete_set(a:b), FALSE
182 |   )
183 |   expect_equal(tibble(a = c(1, 2, 1, 2),
184 |                           b = c(3, 4, 4, 3)) %>% 
185 |                             check_complete_set(a, b), TRUE
186 |   )
187 |   expect_error(tibble(a = c(1, 2, 1, 2),
188 |                           b = c(3, 4, 4, 3)) %>% 
189 |                             check_complete_set(a)
190 |   )
191 | })
192 | 
193 | 
194 | 
195 | 


--------------------------------------------------------------------------------
/tests/testthat/test.convert.R:
--------------------------------------------------------------------------------
  1 | library(testthat)
  2 | library(hablar)
  3 | 
  4 | context("convert")
  5 | test_that("from numeric", {
  6 |   expect_equal(tibble(a = as.numeric(c(1, 2))) %>%
  7 |                  convert(int(a)),
  8 |                tibble(a = as.integer(c(1, 2))))
  9 | 
 10 |   expect_equal(tibble(a = as.numeric(c(1, 2))) %>%
 11 |                  convert(num(a)),
 12 |                tibble(a = as.numeric(c(1, 2))))
 13 | 
 14 |   expect_equal(tibble(a = as.numeric(c(1, 2))) %>%
 15 |                  convert(dbl(a)),
 16 |                tibble(a = as.numeric(c(1, 2))))
 17 | 
 18 |   expect_equal(tibble(a = as.numeric(c(1, 2))) %>%
 19 |                  convert(chr(a)),
 20 |                tibble(a = as.character(c("1", "2"))))
 21 | 
 22 |   expect_equal(tibble(a = as.numeric(c(1, 2))) %>%
 23 |                  convert(dte(a)),
 24 |                tibble(a = as.Date(c("1970-01-02", "1970-01-03"))))
 25 | 
 26 |   expect_equal(tibble(a = as.numeric(c(1, 2))) %>%
 27 |                  convert(dtm(a)) %>% lapply(class),
 28 |                tibble(a = as.POSIXct(c("1970-01-01 01:00:01", "1970-01-01 01:00:02"))) %>% lapply(class))
 29 | 
 30 |   expect_equal(tibble(a = as.numeric(c(1, 2))) %>%
 31 |                  convert(lgl(a)),
 32 |                tibble(a = as.logical(c(T, T))))
 33 | 
 34 |   expect_equal(tibble(a = as.numeric(c(1, 2))) %>%
 35 |                  convert(fct(a)),
 36 |                tibble(a = as.factor(c(1, 2))))
 37 |   })
 38 | 
 39 | test_that("from integer", {
 40 |   expect_equal(tibble(a = as.integer(c(1, 2))) %>%
 41 |                  convert(num(a)),
 42 |                tibble(a = as.numeric(c(1, 2))))
 43 | 
 44 |   expect_equal(tibble(a = as.integer(c(1, 2))) %>%
 45 |                  convert(dbl(a)),
 46 |                tibble(a = as.numeric(c(1, 2))))
 47 | 
 48 |   expect_equal(tibble(a = as.integer(c(1, 2))) %>%
 49 |                  convert(int(a)),
 50 |                tibble(a = as.integer(c(1, 2))))
 51 | 
 52 |   expect_equal(tibble(a = as.integer(c(1, 2))) %>%
 53 |                  convert(chr(a)),
 54 |                tibble(a = as.character(c("1", "2"))))
 55 | 
 56 |   expect_equal(tibble(a = as.integer(c(1, 2))) %>%
 57 |                  convert(dte(a)),
 58 |                tibble(a = as.Date(c("1970-01-02", "1970-01-03"))))
 59 | 
 60 |   expect_equal(tibble(a = as.integer(c(1, 2))) %>%
 61 |                  convert(dtm(a)) %>% lapply(class),
 62 |                tibble(a = as.POSIXct(c("1970-01-01 01:00:01", "1970-01-01 01:00:02"), tz="UTC")) %>% lapply(class))
 63 | 
 64 |   expect_equal(tibble(a = as.integer(c(1, 2))) %>%
 65 |                  convert(lgl(a)),
 66 |                tibble(a = as.logical(c(T, T))))
 67 | 
 68 |   expect_equal(tibble(a = as.integer(c(1, 2))) %>%
 69 |                  convert(fct(a)),
 70 |                tibble(a = as.factor(c(1, 2))))
 71 | })
 72 | 
 73 | test_that("from logical", {
 74 |   expect_equal(tibble(a = as.logical(c(T, F, NA))) %>%
 75 |                  convert(num(a)),
 76 |                tibble(a = as.numeric(c(1, 0, NA))))
 77 | 
 78 |   expect_equal(tibble(a = as.logical(c(T, F, NA))) %>%
 79 |                  convert(dbl(a)),
 80 |                tibble(a = as.numeric(c(1, 0, NA))))
 81 | 
 82 |   expect_equal(tibble(a = as.logical(c(T, F, NA))) %>%
 83 |                  convert(chr(a)),
 84 |                tibble(a = as.character(c("TRUE", "FALSE", NA))))
 85 | 
 86 |   expect_error(tibble(a = as.logical(c(T, F, NA))) %>%
 87 |                  convert(dte(a)))
 88 | 
 89 |   expect_error(tibble(a = as.logical(c(T, F, NA))) %>%
 90 |                  convert(dtm(a)))
 91 | 
 92 |   expect_equal(tibble(a = as.logical(c(T, F, NA))) %>%
 93 |                  convert(lgl(a)),
 94 |                tibble(a = as.logical(c(T, F, NA))))
 95 | 
 96 |   expect_equal(tibble(a = as.logical(c(T, F, NA))) %>%
 97 |                  convert(fct(a)),
 98 |                tibble(a = as.factor(c(T, F, NA))))
 99 | })
100 | 
101 | test_that("from Date", {
102 |   expect_equal(tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))) %>%
103 |                  convert(num(a)),
104 |                tibble(a = as.numeric(c(122, NA, 6318))))
105 | 
106 |   expect_equal(tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))) %>%
107 |                  convert(dbl(a)),
108 |                tibble(a = as.numeric(c(122, NA, 6318))))
109 | 
110 |   expect_equal(tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))) %>%
111 |                  convert(chr(a)),
112 |                tibble(a = as.character(c("1970-05-03", NA, "1987-04-20"))))
113 | 
114 |   expect_equal(tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))) %>%
115 |                  convert(dte(a)),
116 |                tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))))
117 | 
118 |   expect_equal(tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))) %>%
119 |                  convert(dtm(a)),
120 |                tibble(a = as.POSIXct(as.Date(c("1970-05-03", NA, "1987-04-20")))))
121 | 
122 |   expect_equal(tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))) %>%
123 |                  convert(dte(a)),
124 |                tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))))
125 | 
126 |   expect_equal(tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))) %>%
127 |                  convert(fct(a)),
128 |                tibble(a = as.factor(c("1970-05-03", NA, "1987-04-20"))))
129 | })
130 | 
131 | 
132 | test_that("from POSIXct", {
133 |   expect_equal(tibble(a = as.POSIXct(c("1970-05-03 01:00", NA, "1987-04-20 03:45"), tz = "UTC")) %>%
134 |                  convert(num(a)),
135 |                tibble(a = as.numeric(c(10544400, NA, 545888700))))
136 | 
137 |   expect_equal(tibble(a = as.POSIXct(c("1970-05-03 01:00", NA, "1987-04-20 03:45"), tz = "UTC")) %>%
138 |                  convert(dbl(a)),
139 |                tibble(a = as.numeric(c(10544400, NA, 545888700))))
140 | 
141 |   expect_equal(tibble(a = as.POSIXct(c("1970-05-03 01:00", NA, "1987-04-20 03:45"))) %>%
142 |                  convert(chr(a)),
143 |                tibble(a = as.character(c("1970-05-03 01:00:00", NA, "1987-04-20 03:45:00"))))
144 | 
145 |   expect_equal(tibble(a = as.POSIXct(c("1970-05-03 01:00", NA, "1987-04-20 03:45"))) %>%
146 |                  convert(dte(a)),
147 |                tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))))
148 | 
149 |   expect_equal(tibble(a = as.POSIXct(c("1970-05-03 01:00", NA, "1987-04-20 03:45"))) %>%
150 |                  convert(dtm(a)),
151 |                tibble(a = as.POSIXct(c("1970-05-03 01:00", NA, "1987-04-20 03:45"))))
152 | 
153 |   expect_equal(tibble(a = as.POSIXct(c("1970-05-03 01:00", NA, "1987-04-20 03:45"))) %>%
154 |                  convert(dte(a)),
155 |                tibble(a = as.Date(c("1970-05-03", NA, "1987-04-20"))))
156 | 
157 |   expect_equal(tibble(a = as.POSIXct(c("1970-05-03 01:00", NA, "1987-04-20 03:45"))) %>%
158 |                  convert(fct(a)),
159 |                tibble(a = as.factor(c("1970-05-03 01:00:00", NA, "1987-04-20 03:45:00"))))
160 | })
161 | 
162 | 
163 | test_that("from factor", {
164 |   expect_warning(tibble(a = as.factor(c("1970-05-03", NA, "1"))) %>%
165 |                  convert(num(a)))
166 | 
167 |   expect_warning(tibble(a = as.factor(c("1970-05-03", NA, "1"))) %>%
168 |                  convert(dbl(a)))
169 | 
170 |   expect_equal(tibble(a = as.factor(c("1970-05-03", NA, "1"))) %>%
171 |                  convert(chr(a)),
172 |                tibble(a = as.character(c("1970-05-03", NA, "1"))))
173 | 
174 |   expect_equal(tibble(a = as.factor(c("1970-05-03", NA, "1"))) %>%
175 |                  convert(dte(a)),
176 |                tibble(a = as.Date(c("1970-05-03", NA, NA))))
177 | 
178 |   expect_error(tibble(a = as.factor(c("1970-05-03", NA, "1"))) %>%
179 |                  convert(dtm(a)))
180 | 
181 |   expect_equal(tibble(a = as.factor(c("1970-05-03", NA, "1"))) %>%
182 |                  convert(dte(a)),
183 |                tibble(a = as.Date(c("1970-05-03", NA, NA))))
184 | 
185 |   expect_equal(tibble(a = as.factor(c("1970-05-03", NA, "1"))) %>%
186 |                  convert(fct(a)),
187 |                tibble(a = as.factor(c("1970-05-03", NA, "1")))) 
188 | })
189 | 
190 | test_that("Other tests", {
191 |   expect_equal(dplyr::starwars %>% select(1:6) %>% convert(lgl(height), int(mass)),
192 |                dplyr::starwars %>% select(1:6) %>%
193 |                  mutate_at(vars(height), ~as.logical(.)) %>%
194 |                  mutate_at(vars(mass), ~as.integer(.)))
195 | 
196 |   expect_error(convert(as.numeric(1)))
197 |   })
198 | 


--------------------------------------------------------------------------------
/tests/testthat/test.could_this_be_that.R:
--------------------------------------------------------------------------------
  1 | library(testthat)
  2 | library(hablar)
  3 | 
  4 | context("could this be that")
  5 | test_that("could_chr_be_num", {
  6 |   expect_equal(could_chr_be_num("a"), F)
  7 |   expect_equal(could_chr_be_num("."), F)
  8 |   expect_equal(could_chr_be_num(" 3"), T)
  9 |   expect_equal(could_chr_be_num("3 0"), F)
 10 |   expect_equal(could_chr_be_num("2018-03-01"), F)
 11 |   expect_equal(could_chr_be_num("2018-10-09 19:19:26 CEST"), F)
 12 |   expect_equal(could_chr_be_num("1"), T)
 13 |   expect_equal(could_chr_be_num(".56"), T)
 14 |   expect_equal(could_chr_be_num("7.0"), T)
 15 |   expect_equal(could_chr_be_num("0003"), T)
 16 |   expect_equal(could_chr_be_num("1.98"), T)
 17 |   expect_equal(could_chr_be_num(as.character(c(NA, NA))), F)
 18 |   expect_equal(could_chr_be_num(".98"), T)
 19 |   expect_equal(could_chr_be_num(as.character(NA)), F)
 20 | 
 21 |   expect_error(could_chr_be_num(as.numeric(1)))
 22 |   expect_error(could_chr_be_num())
 23 |   expect_error(could_chr_be_num(c()))
 24 |   expect_error(could_chr_be_num(data.frame(a = c(1,3,4))))
 25 |   expect_error(could_chr_be_num(list(a = c(1,3,4))))
 26 | })
 27 | 
 28 | test_that("could_chr_be_int", {
 29 |   expect_equal(could_chr_be_int("a"), F)
 30 |   expect_equal(could_chr_be_int("."), F)
 31 |   expect_equal(could_chr_be_int(" 3"), T)
 32 |   expect_equal(could_chr_be_int("3 0"), F)
 33 |   expect_equal(could_chr_be_int("2018-03-01"), F)
 34 |   expect_equal(could_chr_be_int("2018-10-09 19:19:26 CEST"), F)
 35 |   expect_equal(could_chr_be_int("1"), T)
 36 |   expect_equal(could_chr_be_int(".56"), F)
 37 |   expect_equal(could_chr_be_int("7.0"), T)
 38 |   expect_equal(could_chr_be_int("0003"), T)
 39 |   expect_equal(could_chr_be_int("1,98"), F)
 40 |   expect_equal(could_chr_be_int(as.character(c())), F)
 41 |   expect_equal(could_chr_be_int(as.character(c(NA, NA))), F)
 42 |   expect_equal(could_chr_be_int(",98"), F)
 43 |   expect_equal(could_chr_be_int(as.character(NA)), F)
 44 | 
 45 |   expect_error(could_chr_be_int(as.numeric(1)))
 46 |   expect_error(could_chr_be_int())
 47 |   expect_error(could_chr_be_int(data.frame(a = c(1,3,4))))
 48 |   expect_error(could_chr_be_num(list(a = c(1,3,4))))
 49 | })
 50 | 
 51 | test_that("could_num_be_int", {
 52 |   expect_equal(could_num_be_int(as.numeric(1)), T)
 53 |   expect_equal(could_num_be_int(c(1, 2)), T)
 54 |   expect_equal(could_num_be_int(c(1, 2.6)), F)
 55 |   expect_equal(could_num_be_int(as.numeric(NA)), F)
 56 |   expect_equal(could_num_be_int(as.numeric(c())), F)
 57 | 
 58 |   expect_error(could_num_be_int(as.character(c())))
 59 |   expect_error(could_num_be_int())
 60 |   expect_error(could_num_be_int("a"))
 61 |   expect_error(could_num_be_int(",98"))
 62 |   expect_error(could_num_be_int(as.character(NA)))
 63 |   expect_error(could_num_be_int(data.frame(a = c(1,3,4))))
 64 |   expect_error(could_chr_be_num(list(a = c(1,3,4))))
 65 | })
 66 | 
 67 | test_that("could_chr_be_dtm", {
 68 |   expect_equal(could_chr_be_dtm("a"), F)
 69 |   expect_equal(could_chr_be_dtm("."), F)
 70 |   expect_equal(could_chr_be_dtm(" 3"), F)
 71 |   expect_equal(could_chr_be_dtm("3 0"), F)
 72 |   expect_equal(could_chr_be_dtm("2018-03-01"), F)
 73 |   expect_equal(could_chr_be_dtm("2018-10-09 19:19:26 CEST"), T)
 74 |   expect_equal(could_chr_be_dtm(as.character(c("2018-10-09 19:19:26 CEST", "2018-10-09 19:19:27 CEST", NA))), T)
 75 |   expect_equal(could_chr_be_dtm("1"), F)
 76 |   expect_equal(could_chr_be_dtm("7.0"), F)
 77 |   expect_equal(could_chr_be_dtm("0003"), F)
 78 |   expect_equal(could_chr_be_dtm(as.character(NA)), F)
 79 | 
 80 |   expect_error(could_chr_be_dtm(as.POSIXct(c("2018-10-09 19:19:26 CEST", "2018-10-09 19:19:27 CEST"))))
 81 |   expect_error(could_chr_be_dtm(as.Date(c(NA, NA))))
 82 |   expect_error(could_chr_be_dtm(as.numeric(1)))
 83 |   expect_error(could_chr_be_dtm(as.factor("2018-03-01")))
 84 |   expect_error(could_chr_be_dtm(data.frame(a = c(1,3,4))))
 85 |   expect_error(could_chr_be_num(list(a = c(1,3,4))))
 86 | })
 87 | 
 88 | test_that("could_chr_be_dte", {
 89 |   expect_equal(could_chr_be_dte(c("2018-03-01")), T)
 90 |   expect_equal(could_chr_be_dte(c(c("2018-03-01", "2018-03-03"))), T)
 91 |   expect_equal(could_chr_be_dte(c(c("2018-10-09 19:19:26 CEST", "2018-10-09 19:19:27 CEST"))), T)
 92 |   expect_equal(could_chr_be_dte(c(c("2018-10-09 19:19:26 CEST", "2018-10-09 19:19:27 CEST", NA))), T)
 93 |   expect_equal(could_chr_be_dte(c(NA_character_)), F)
 94 | 
 95 |   expect_error(could_chr_be_dte(as.Date(c(NA, NA))))
 96 |   expect_error(could_chr_be_dte(as.numeric(1)))
 97 |   expect_error(could_chr_be_dte(as.factor("2018-03-01")))
 98 |   expect_error(could_chr_be_dte(data.frame(a = c(1,3,4))))
 99 |   expect_error(could_chr_be_num(list(a = c(1,3,4))))
100 | })
101 | 
102 | 


--------------------------------------------------------------------------------
/tests/testthat/test.create_dummy.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(hablar)
 3 | 
 4 | context("create_dummy")
 5 | test_that("dummy", {
 6 |   expect_equal(
 7 |     dummy(c(T, F, NA)), 
 8 |     as.integer(c(1, 0, NA))
 9 |   )
10 |   expect_equal(
11 |     dummy(c(T, F, NA)), 
12 |     as.integer(c(1, 0, NA))
13 |   )
14 |   expect_equal(
15 |     dummy(c(T, F, NA), missing = 99), 
16 |     as.integer(c(1, 0, 99))
17 |   )
18 |   
19 |   expect_error(
20 |     dummy(c(1, 2, 3))
21 |   )
22 | })
23 | 
24 | context("create_dummy")
25 | test_that("dummy_", {
26 |   expect_equal(
27 |     dummy_(c(T, F, NA)), 
28 |     as.integer(c(1, 0, 0))
29 |   )
30 |   expect_equal(
31 |     dummy_(c(T, F, NA)), 
32 |     as.integer(c(1, 0, 0))
33 |   )
34 |   expect_equal(
35 |     dummy_(c(T, F, NA), missing = 99), 
36 |     as.integer(c(1, 0, 99))
37 |   )
38 |   
39 |   expect_error(
40 |     dummy(c(1, 2, 3))
41 |   )
42 | })
43 | 
44 | 


--------------------------------------------------------------------------------
/tests/testthat/test.cumulative.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(hablar)
 3 | 
 4 | context("cumulative")
 5 | test_that("cumulative", {
 6 |   expect_equal(cumsum_(c(1, 1, 1, 1)), c(1, 2, 3, 4))
 7 |   expect_equal(cumsum_(c(1, 1, NA, 1)), c(1, 2, 2, 3))
 8 | 
 9 |   expect_equal(cummean_(c(1, 1, NA, 1)), c(1, 1, 1, 1))
10 |   expect_equal(cummean_(c(NA, NA, NA, NA)), as.double(c(NA, NA, NA, NA)))
11 |   
12 |   expect_equal(cum_unique(c(1, 2, 3, 4)), c(1, 2, 3, 4))
13 |   expect_equal(cum_unique(c(1, 2, NA, 4)), c(1, 2, 3, 4))
14 |   expect_equal(cum_unique_(c(1, 2, NA, 4)), c(1, 2, 2, 3))
15 | })
16 | 
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test.find_in_df.R:
--------------------------------------------------------------------------------
  1 | library(testthat)
  2 | library(hablar)
  3 | library(dplyr)
  4 | 
  5 | context("find_in_df")
  6 | 
  7 | test_that("find_duplicates", {
  8 |   expect_equal(tibble(a = c(1, 1, 2),
  9 |                           b = c(1, 1, NA),
 10 |                           c = c(3, 3, NA)) %>% find_duplicates(),
 11 |                tibble(a = c(1, 1),
 12 |                           b = c(1, 1),
 13 |                           c = c(3, 3))
 14 |                )
 15 |   expect_equal(tibble(a = c(1, 1, 2),
 16 |                           b = c(1, 1, NA),
 17 |                           c = c(3, 3, NA)) %>% find_duplicates(a, b),
 18 |                tibble(a = c(1, 1),
 19 |                           b = c(1, 1),
 20 |                           c = c(3, 3))
 21 |   )
 22 |   expect_equal(tibble(a = c(1, 1, 2),
 23 |                           b = c(1, 1, NA),
 24 |                           c = c(3, 3, NA)) %>% find_duplicates(-c),
 25 |                tibble(a = c(1, 1, 2),
 26 |                           b = c(1, 1, NA),
 27 |                           c = c(3, 3, NA)) %>% find_duplicates(a, b)
 28 |   )
 29 |   expect_equal(tibble(a = c(1, 1, 2),
 30 |                           b = c(1, 1, NA),
 31 |                           c = c(3, 3, NA)) %>% find_duplicates(a:b),
 32 |                tibble(a = c(1, 1),
 33 |                           b = c(1, 1),
 34 |                           c = c(3, 3))
 35 |   )
 36 |   expect_equal(tibble(a = c(1, 1, 2),
 37 |                           b = c(1, 1, NA),
 38 |                           c = c(3, 3, NA)) %>% find_duplicates(),
 39 |                tibble(a = c(1, 1),
 40 |                           b = c(1, 1),
 41 |                           c = c(3, 3))
 42 |   )
 43 |   expect_equal(tibble(a = c(1, 1, 2),
 44 |                           b = c(1, 1, NA),
 45 |                           c = c(3, 3, NA)) %>% find_duplicates(),
 46 |                tibble(a = c(1, 1),
 47 |                           b = c(1, 1),
 48 |                           c = c(3, 3))
 49 |   )
 50 |   
 51 |   expect_error(c(1, 2) %>% find_duplicates())
 52 | })
 53 | 
 54 | test_that("find_na", {
 55 |   expect_equal(tibble(a = c(1, 1, 2),
 56 |                           b = c(1, 1, NA),
 57 |                           c = c(3, 3, NA)) %>% find_na(),
 58 |                tibble(a = c(2),
 59 |                           b = as.numeric(c(NA)),
 60 |                           c = as.numeric(c(NA)))
 61 |   )
 62 |   expect_equal(tibble(a = c(1, 1, 2),
 63 |                           b = c(1, 1, NA),
 64 |                           c = c(3, 3, NA)) %>% find_na(b),
 65 |                tibble(a = c(2),
 66 |                           b = as.numeric(c(NA)),
 67 |                           c = as.numeric(c(NA)))
 68 |   )
 69 |   expect_equal(tibble(a = c(1, 1, 2),
 70 |                           b = c(1, 1, NA),
 71 |                           c = c(3, 3, NA)) %>% find_na(a),
 72 |                tibble(a = as.numeric(c()),
 73 |                           b = as.numeric(c()),
 74 |                           c = as.numeric(c()))
 75 |   )
 76 |   expect_equal(tibble(a = c(1, 1, 2),
 77 |                           b = c(1, 1, NA),
 78 |                           c = c(3, 3, NA)) %>% find_na(-b),
 79 |                tibble(a = c(1, 1, 2),
 80 |                           b = c(1, 1, NA),
 81 |                           c = c(3, 3, NA)) %>% find_na(a, c)
 82 |   )
 83 |   expect_equal(tibble(a = c(1, 1, 2),
 84 |                           b = c(1, 1, NA),
 85 |                           c = c(3, 3, NA)) %>% find_na(a:b),
 86 |                tibble(a = c(2),
 87 |                           b = c(NA) %>% as.numeric(),
 88 |                           c = c(NA) %>% as.numeric())
 89 |   )
 90 |   
 91 |   expect_error(c(1, 2) %>% find_na())
 92 | })
 93 | 
 94 | test_that("find_nan", {
 95 |   expect_equal(tibble(a = c(1, 1, 2),
 96 |                           b = c(1, 1, NA),
 97 |                           c = c(3, 3, NaN)) %>% find_nan(),
 98 |                tibble(a = c(2),
 99 |                           b = as.numeric(c(NA)),
100 |                           c = as.numeric(c(NaN)))
101 |   )
102 |   expect_equal(tibble(a = c(1, 1, 2),
103 |                           b = c(1, 1, NA),
104 |                           c = c(3, 3, NaN)) %>% find_nan(b),
105 |                tibble(a = as.numeric(c()),
106 |                           b = as.numeric(c()),
107 |                           c = as.numeric(c()))
108 |   )
109 |   expect_equal(tibble(a = c(NaN, 1, 2),
110 |                           b = c(1, 1, NA),
111 |                           c = c(3, 3, NA)) %>% find_nan(a),
112 |                tibble(a = as.numeric(c(NaN)),
113 |                           b = as.numeric(c(1)),
114 |                           c = as.numeric(c(3)))
115 |   )
116 |   expect_equal(tibble(a = c(NaN, 1, 2),
117 |                           b = c(1, 1, NA),
118 |                           c = c(3, 3, NA)) %>% find_nan(-b),
119 |                tibble(a = c(NaN, 1, 2),
120 |                           b = c(1, 1, NA),
121 |                           c = c(3, 3, NA)) %>% find_nan(a, c)
122 |   )
123 |   expect_equal(tibble(a = c(1, 1, 2),
124 |                           b = c(1, NaN, NA),
125 |                           c = c(3, 3, NA)) %>% find_nan(a:b),
126 |                tibble(a = c(1),
127 |                           b = c(NaN) %>% as.numeric(),
128 |                           c = c(3) %>% as.numeric())
129 |   )
130 |   
131 |   expect_error(c(1, 2) %>% find_nan())
132 | })
133 | 
134 | test_that("find_inf", {
135 |   expect_equal(tibble(a = c(1, 1, 2),
136 |                           b = c(1, 1, NA),
137 |                           c = c(3, 3, Inf)) %>% find_inf(),
138 |                tibble(a = c(2),
139 |                           b = as.numeric(c(NA)),
140 |                           c = as.numeric(c(Inf)))
141 |   )
142 |   expect_equal(tibble(a = c(1, 1, 2),
143 |                           b = c(1, 1, NA),
144 |                           c = c(3, 3, Inf)) %>% find_inf(b),
145 |                tibble(a = as.numeric(c()),
146 |                           b = as.numeric(c()),
147 |                           c = as.numeric(c()))
148 |   )
149 |   expect_equal(tibble(a = c(Inf, 1, 2),
150 |                           b = c(1, 1, NA),
151 |                           c = c(3, 3, NA)) %>% find_inf(a),
152 |                tibble(a = as.numeric(c(Inf)),
153 |                           b = as.numeric(c(1)),
154 |                           c = as.numeric(c(3)))
155 |   )
156 |   expect_equal(tibble(a = c(Inf, 1, 2),
157 |                           b = c(1, 1, NA),
158 |                           c = c(3, 3, NA)) %>% find_inf(-b),
159 |                tibble(a = c(Inf, 1, 2),
160 |                           b = c(1, 1, NA),
161 |                           c = c(3, 3, NA)) %>% find_inf(a, c)
162 |   )
163 |   expect_equal(tibble(a = c(1, 1, 2),
164 |                           b = c(1, Inf, NA),
165 |                           c = c(3, 3, NA)) %>% find_inf(a:b),
166 |                tibble(a = c(1),
167 |                           b = c(Inf) %>% as.numeric(),
168 |                           c = c(3) %>% as.numeric())
169 |   )
170 |   
171 |   expect_error(c(1, 2, Inf) %>% find_inf())
172 | })
173 | 
174 | test_that("find_irrational", {
175 |   expect_equal(tibble(a = c(1, 1, 2),
176 |                           b = c(1, 1, NA),
177 |                           c = c(3, 3, Inf)) %>% find_irrational(),
178 |                tibble(a = c(2),
179 |                           b = as.numeric(c(NA)),
180 |                           c = as.numeric(c(Inf)))
181 |   )
182 |   expect_equal(tibble(a = c(1, 1, 2),
183 |                           b = c(1, 1, NA),
184 |                           c = c(3, 3, Inf)) %>% find_irrational(b),
185 |                tibble(a = as.numeric(c()),
186 |                           b = as.numeric(c()),
187 |                           c = as.numeric(c()))
188 |   )
189 |   expect_equal(tibble(a = c(Inf, 1, 2),
190 |                           b = c(1, 1, NA),
191 |                           c = c(3, 3, NA)) %>% find_irrational(a),
192 |                tibble(a = as.numeric(c(Inf)),
193 |                           b = as.numeric(c(1)),
194 |                           c = as.numeric(c(3)))
195 |   )
196 |   expect_equal(tibble(a = c(NaN, 1, 2),
197 |                           b = c(1, 1, NA),
198 |                           c = c(3, 3, NA)) %>% find_irrational(-b),
199 |                tibble(a = c(NaN, 1, 2),
200 |                           b = c(1, 1, NA),
201 |                           c = c(3, 3, NA)) %>% find_irrational(a, c)
202 |   )
203 |   expect_equal(tibble(a = c(1, 1, 2),
204 |                           b = c(1, Inf, NA),
205 |                           c = c(3, 3, NA)) %>% find_irrational(a:b),
206 |                tibble(a = c(1),
207 |                           b = c(Inf) %>% as.numeric(),
208 |                           c = c(3) %>% as.numeric())
209 |   )
210 |   
211 |   expect_error(c(1, 2, Inf) %>% find_irrational())
212 | })
213 | 
214 | 


--------------------------------------------------------------------------------
/tests/testthat/test.ifs.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(hablar)
 3 | 
 4 | context("ifs")
 5 | test_that("if_else_", {
 6 |   # No missing argument
 7 |   expect_equal(if_else_(c(T, F, NA),
 8 |                         as.numeric(c(1, 2, 3)),
 9 |                         as.numeric(c(99))),
10 |                as.numeric(c(1, 99, NA))
11 |   )
12 |   expect_equal(if_else_(c(T, F, NA),
13 |                         as.numeric(c(1, 2, 3)),
14 |                         as.numeric(c(4, 5, 6))),
15 |                as.numeric(c(1, 5, NA))
16 |   )
17 |   expect_equal(if_else_(c(T, F, NA),
18 |                         as.numeric(c(1, 2, 3)),
19 |                         NA),
20 |                as.numeric(c(1, NA, NA))
21 |   )
22 |   expect_equal(if_else_(c(T, F, NA),
23 |                         NA,
24 |                         "hey"),
25 |                as.character(c(NA, "hey", NA))
26 |   )
27 |   expect_equal(if_else_(c(T, F, NA),
28 |                         NA,
29 |                         c(1L, 2L, 3L)),
30 |                as.integer(c(NA, 2L, NA))
31 |   )
32 |   expect_equal(if_else_(c(T, F, NA),
33 |                         NA,
34 |                         as.factor(c("A", "B", "C"))),
35 |                factor(c(NA, "B", NA), levels = c("A", "B", "C"))
36 |                )
37 |   expect_equal(if_else_(c(T, F, NA),
38 |                         as.factor(c("A", "B", "C")),
39 |                         NA),
40 |                factor(c("A", NA, NA), levels = c("A", "B", "C"))
41 |   )
42 |   expect_equal(if_else_(c(T, F, NA),
43 |                         as.Date(c("2019-01-01", "2019-01-01", "2019-01-01")),
44 |                         as.Date("2018-01-01")),
45 |                as.Date(c("2019-01-01", "2018-01-01", NA))
46 |   )
47 |   expect_equal(if_else_(c(T, F, NA),
48 |                         as.Date(c("2019-01-01", "2019-01-01", "2019-01-01")),
49 |                         NA),
50 |                as.Date(c("2019-01-01", NA, NA)) 
51 |   )
52 | 
53 |   expect_error(if_else_(c(T, F, NA),
54 |                         "1",
55 |                         1L))
56 |   expect_warning(if_else_(c(T, F, NA),
57 |                         as.factor(1),
58 |                         factor(1, levels = c("1", "2"))))
59 | })
60 | 
61 | test_that("replacers if_*", {
62 |   # No missing argument
63 |   expect_equal(if_na(c(1, NA, 3), 99), c(1, 99, 3))
64 |   expect_equal(if_not_na(c(1, NA, 3), 99), c(99, NA, 99))
65 |   expect_equal(if_nan(c(1, NaN, NA), 99), c(1, 99, NA))
66 |   expect_equal(if_inf(c(1, Inf, -Inf, NA), 99), c(1, 99, 99, NA))
67 |   expect_equal(if_zero(c(1, 0, 0, NA), 99), c(1, 99, 99, NA))
68 | 
69 |   expect_equal(if_na(c(1, NA, 3), NA), c(1, NA, 3))
70 |   expect_equal(if_nan(c(1, NaN, NA), NA), c(1, NA, NA))
71 |   expect_equal(if_inf(c(1, Inf, -Inf, NA), NA), c(1, NA, NA, NA))
72 |   expect_equal(if_zero(c(1, 0, 0, NA), NA), c(1, NA, NA, NA))
73 | 
74 |   expect_equal(if_nan(c(1, NaN, NA), 99), c(1, 99, NA))
75 |   expect_equal(if_inf(c(1, Inf, -Inf, NA), 99), c(1, 99, 99, NA))
76 |   expect_equal(if_zero(c(1, 0, 0, NA), 99), c(1, 99, 99, NA))
77 |   
78 |   expect_equal(if_zero(c(1, 0, 0, NA), 99, 9999), c(1, 99, 99, 9999))
79 | 
80 |   expect_warning(if_na(as.factor(c(1, NA, 3)), 
81 |                      as.factor(99)))
82 |   expect_error(if_na(c(1, 2, NA), "hej"))
83 | })
84 | 
85 | test_that("replacers *_if", {
86 |   # No missing argument
87 |   expect_equal(na_if(c(1, 2, 3), c(1, 2, 3) == 2), c(1, NA, 3))
88 |   expect_equal(nan_if(c(1, 2, 3), c(1, 2, 3) == 2), c(1, NaN, 3))
89 |   expect_equal(inf_if(c(1, 2, 3), c(1, 2, 3) == 2), c(1, Inf, 3))
90 |   expect_equal(zero_if(c(1, 2, 3), c(1, 2, 3) == 2), c(1, 0, 3))
91 |   
92 |   expect_equal(na_if(c(1, 2, NA), c(1, 2, NA) == 2, replace_na = TRUE), c(1, NA, NA))
93 | 
94 |   expect_error(na_if(c(1, 2, NA),c(1, 2, NA)))
95 | })
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/tests/testthat/test.math.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(hablar)
 3 | 
 4 | context("math")
 5 | test_that("two sided minus_", {
 6 |   expect_equal(1 %minus_% 2, 1-2)
 7 |   expect_equal(1 %minus_% as.numeric(NA), 1)
 8 |   expect_equal(as.numeric(NA) %minus_% 3, -3)
 9 | })
10 | 
11 | test_that("two sided plus_", {
12 |   expect_equal(1 %plus_% 2, 1+2)
13 |   expect_equal(1 %plus_% as.numeric(NA), 1)
14 |   expect_equal(as.numeric(NA) %plus_% 3, 3)
15 |   
16 |   df <- tibble(x = c(1, 2, 3),
17 |                y = c(-1, -2, NA))
18 |   expect_equal(df %>% mutate(z = x %plus_% y),
19 |                tibble(x = c(1, 2, 3),
20 |                       y = c(-1, -2, NA),
21 |                       z = c(0, 0, 3)))
22 | })
23 | 
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test.n_unique.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(hablar)
 3 | 
 4 | context("n_unique")
 5 | 
 6 | test_that("n_unique", {
 7 |   expect_equal(n_unique(c(1, 1, 2)), 2L)
 8 |   expect_equal(n_unique(c(1, 1, 2, NA)), 3L)
 9 | })
10 | 
11 | test_that("n_unique_", {
12 |   expect_equal(n_unique_(c(1, 1, 2)), 2L)
13 |   expect_equal(n_unique_(c(1, 1, 2, NA)), 2L)
14 | })
15 | 
16 | 


--------------------------------------------------------------------------------
/tests/testthat/test.rationalize.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(hablar)
 3 | 
 4 | context("rationalize")
 5 | test_that("vectors", {
 6 |   expect_equal(rationalize(as.numeric(c(Inf))), as.numeric(c(NA)))
 7 |   expect_equal(rationalize(as.numeric(c(-Inf))), as.numeric(c(NA)))
 8 |   expect_equal(rationalize(as.numeric(c(NaN))), as.numeric(c(NA)))
 9 |   expect_equal(rationalize(as.numeric(c(NaN, 1, NA))), as.numeric(c(NA, 1, NA)))
10 |   expect_equal(rationalize(as.numeric(c(NaN, Inf, 5))), as.numeric(c(NA, NA, 5)))
11 |   expect_equal(rationalize(as.numeric(c(NaN, Inf, 5.67))), as.numeric(c(NA, NA, 5.67)))
12 |   expect_equal(rationalize(as.numeric()), as.numeric())
13 |   expect_equal(rationalize(as.Date(c(NA))), as.Date(c(NA)))
14 | 
15 |   expect_equal(rationalize(as.integer(c(1, 2, 3))), as.integer(c(1, 2, 3)))
16 |   expect_equal(rationalize(as.integer(c(1, 2, 3, NA))), as.integer(c(1, 2, 3, NA)))
17 |   expect_equal(rationalize(as.character(c("a", "Inf", "b", NA))), as.character(c("a", "Inf", "b", NA)))
18 | 
19 |   expect_equal(rationalize(list("a", "b")), list("a", "b"))
20 | })
21 | 
22 | test_that("data.frame", {
23 |   df <- tibble(a = as.numeric(c(NA, Inf, 3.67, NaN, -Inf)),
24 |                b = as.numeric(c(NA, Inf, 3.67, 3, 4)),
25 |                c = as.numeric(c(NA, NA, NA, NA, NA)),
26 |                d = as.character(c(rep("foo", 3), rep("bar", 2))),
27 |                e = as.integer(c(1, 2, 3, NA, 5)))
28 | 
29 |   expect_equal(rationalize(df), tibble(a = as.numeric(c(NA, NA, 3.67, NA, NA)),
30 |                                        b = as.numeric(c(NA, NA, 3.67, 3, 4)),
31 |                                        c = as.numeric(c(NA, NA, NA, NA, NA)),
32 |                                        d = as.character(c(rep("foo", 3), rep("bar", 2))),
33 |                                        e = as.integer(c(1, 2, 3, NA, 5))))
34 | 
35 |   expect_equal(rationalize(df, a), tibble(a = as.numeric(c(NA, NA, 3.67, NA, NA)),
36 |                                           b = as.numeric(c(NA, Inf, 3.67, 3, 4)),
37 |                                           c = as.numeric(c(NA, NA, NA, NA, NA)),
38 |                                           d = as.character(c(rep("foo", 3), rep("bar", 2))),
39 |                                           e = as.integer(c(1, 2, 3, NA, 5))))
40 | 
41 |   expect_equal(rationalize(df, -a), tibble(a = as.numeric(c(NA, Inf, 3.67, NaN, -Inf)),
42 |                                            b = as.numeric(c(NA, NA, 3.67, 3, 4)),
43 |                                            c = as.numeric(c(NA, NA, NA, NA, NA)),
44 |                                            d = as.character(c(rep("foo", 3), rep("bar", 2))),
45 |                                            e = as.integer(c(1, 2, 3, NA, 5))))
46 | 
47 |   expect_equal(rationalize(df, 1:5), tibble(a = as.numeric(c(NA, NA, 3.67, NA, NA)),
48 |                                             b = as.numeric(c(NA, NA, 3.67, 3, 4)),
49 |                                             c = as.numeric(c(NA, NA, NA, NA, NA)),
50 |                                             d = as.character(c(rep("foo", 3), rep("bar", 2))),
51 |                                             e = as.integer(c(1, 2, 3, NA, 5))))
52 | 
53 |   expect_equal(rationalize(df, c:e), df)
54 |   expect_equal(df %>% rationalize(c:e), df)
55 |   expect_error(rationalize(df, q))
56 | })
57 | 


--------------------------------------------------------------------------------
/tests/testthat/test.retype.R:
--------------------------------------------------------------------------------
  1 | library(testthat)
  2 | library(hablar)
  3 | 
  4 | context("retype")
  5 | test_that("vectors", {
  6 |   expect_equal(retype(as.numeric(c(3.56))), as.numeric(c(3.56)))
  7 |   expect_equal(retype(as.numeric(c(3))), as.integer(c(3)))
  8 |   expect_equal(retype(as.numeric(c(NA, NA))), as.numeric(c(NA, NA)))
  9 | 
 10 |   expect_equal(retype(as.logical(c(T))), as.integer(c(1)))
 11 |   expect_equal(retype(as.logical(c(NA, F))), as.integer(c(NA, 0)))
 12 | 
 13 |   expect_equal(retype(as.integer(c(NA, NA))), as.integer(c(NA, NA)))
 14 |   expect_equal(retype(as.integer(c(1, NA, 3))), as.integer(c(1, NA, 3)))
 15 | 
 16 |   expect_equal(retype(as.character(c("a"))), as.character(c("a")))
 17 |   expect_equal(retype(as.character(c(NA, NA))), as.character(c(NA, NA)))
 18 |   expect_equal(retype(as.character(c("1", "2", NA))), as.integer(c(1, 2, NA)))
 19 |   expect_equal(retype(as.character(c("1", "2,0", NA))), as.character(c("1", "2,0", NA)))
 20 |   expect_equal(retype(as.character(c("1", "2.5", NA))), as.numeric(c(1, 2.5, NA)))
 21 |   expect_equal(retype(as.character(c("2018-01-25", "2018-01-27", NA))), as.Date(c("2018-01-25", "2018-01-27", NA)))
 22 |   expect_equal(retype(as.character(c("2018-10-10 11:30:20 CEST", "2018-10-10 11:30:20 CEST", NA))), 
 23 |                as.POSIXct(c("2018-10-10 11:30:20 CEST", "2018-10-10 11:30:20 CEST", NA)))
 24 |   expect_equal(retype(as.character(c("2018-10-10 11:30:20 CEST", "2018-10-10 11:30:25 CEST", NA))), as.POSIXct(c("2018-10-10 11:30:20 CEST", "2018-10-10 11:30:25 CEST", NA)))
 25 | 
 26 |   expect_equal(retype(as.factor(c("a"))), as.character(c("a")))
 27 |   expect_equal(retype(as.factor(c(NA, NA))), as.character(c(NA, NA)))
 28 |   expect_equal(retype(as.factor(c("3", "2", NA))), as.integer(c(3, 2, NA)))
 29 |   expect_equal(retype(as.factor(c("1", "7,0", NA))), as.character(c("1", "7,0", NA)))
 30 |   expect_equal(retype(as.factor(c("1", "2.5", NA))), as.numeric(c(1, 2.5, NA)))
 31 |   expect_equal(retype(as.factor(c("1", "2,5", NA))), as.character(c("1", "2,5", NA)))
 32 |   expect_equal(retype(as.factor(c("2018-01-25", "2018-01-27", NA))), as.Date(c("2018-01-25", "2018-01-27", NA)))
 33 |   expect_equal(retype(as.factor(c("2018-10-10 11:30:20 CEST", "2018-10-10 11:30:20 CEST", NA))), as.POSIXct(c("2018-10-10 11:30:20 CEST", "2018-10-10 11:30:20 CEST", NA)))
 34 |   expect_equal(retype(as.factor(c("2018-10-10 11:30:20 CEST", "2018-10-10 11:30:25 CEST", NA))), as.POSIXct(c("2018-10-10 11:30:20 CEST", "2018-10-10 11:30:25 CEST", NA)))
 35 | 
 36 |   expect_equal(retype(list("a", "b")), list("a", "b"))
 37 | })
 38 | 
 39 | 
 40 | 
 41 | test_that("data.frame", {
 42 |   df <- tibble(a = as.numeric(c(1, Inf, 3, 2, 4)),
 43 |                b = as.numeric(c(1, 7, 3, 2, 4)),
 44 |                c = as.numeric(c(1, NA, 3, 2, 4)),
 45 |                d = as.numeric(c(Inf, Inf, Inf, Inf, Inf)),
 46 |                e = as.numeric(c(3.5, 3.7, NaN, Inf, Inf)),
 47 | 
 48 |                f = as.integer(c(1, 2, 3, NA, 5)),
 49 | 
 50 |                g = as.numeric(c(NA, NA, NA, NA, NA)),
 51 |                h = as.integer(c(NA, NA, NA, NA, NA)),
 52 |                i = as.Date(c(NA, NA, NA, NA, NA)),
 53 |                j = as.POSIXct(c(NA, NA, NA, NA, NA)),
 54 |                k = as.factor(c(NA, NA, NA, NA, NA)),
 55 | 
 56 |                l = as.factor(c("2018-02-01", "2018-04-15", NA, NA, "2016-05-15")),
 57 |                m = as.factor(c(1, 2, 3, 4, 5)),
 58 |                n = as.factor(c("a", "b", NA, "q", NA)),
 59 | 
 60 |                o = as.character(c("2018-02-01", "2018-04-15", NA, NA, "2016-05-15")),
 61 |                p = as.character(c("2018-02-01 01:00", "2018-04-15 01:00", NA, NA, "2016-05-15 01:00")),
 62 |                q = as.character(c("2018-02-01 01:00", "2018-04-15 16:56", NA, NA, "2016-05-15 17:10")),
 63 |                r = as.character(c("2018-02-01 01:00 CEST", "2018-04-15 16:56 CEST", NA, NA, "2016-05-15 17:10 CEST")),
 64 |                s = as.character(c("2018-02-01 01:00 CEST", "2018-04-15 16:56 CEST", NA, NA, "2016-05-15 17:10 CEST")),
 65 | 
 66 |                t = as.POSIXct(c("2018-02-01 01:00", "2018-04-15 01:00", NA, NA, "2016-05-15 01:00")),
 67 |                u = as.POSIXct(c("2018-02-01 01:00 CEST", "2018-04-15 16:56 CEST", NA, NA, "2016-05-15 17:10 CEST")),
 68 | 
 69 |                v = as.logical(c(T, T, NA, F, T)),
 70 |                w = as.character(c("1", "1.78", NA, "NaN", "1")))
 71 | 
 72 |   expect_equal(retype(df), tibble(a = as.numeric(c(1, Inf, 3, 2, 4)),
 73 |                                       b = as.integer(c(1, 7, 3, 2, 4)),
 74 |                                       c = as.integer(c(1, NA, 3, 2, 4)),
 75 |                                       d = as.numeric(c(Inf, Inf, Inf, Inf, Inf)),
 76 |                                       e = as.numeric(c(3.5, 3.7, NaN, Inf, Inf)),
 77 | 
 78 |                                       f = as.integer(c(1, 2, 3, NA, 5)),
 79 | 
 80 |                                       g = as.numeric(c(NA, NA, NA, NA, NA)),
 81 |                                       h = as.integer(c(NA, NA, NA, NA, NA)),
 82 |                                       i = as.Date(c(NA, NA, NA, NA, NA)),
 83 |                                       j = as.POSIXct(c(NA, NA, NA, NA, NA)),
 84 |                                       k = as.character(c(NA, NA, NA, NA, NA)),
 85 | 
 86 |                                       l = as.Date(c("2018-02-01", "2018-04-15", NA, NA, "2016-05-15")),
 87 |                                       m = as.integer(c(1, 2, 3, 4, 5)),
 88 |                                       n = as.character(c("a", "b", NA, "q", NA)),
 89 | 
 90 |                                       o = as.Date(c("2018-02-01", "2018-04-15", NA, NA, "2016-05-15")),
 91 |                                       p = as.POSIXct(c("2018-02-01 01:00", "2018-04-15 01:00", NA, NA, "2016-05-15 01:00")),
 92 |                                       q = as.POSIXct(c("2018-02-01 01:00", "2018-04-15 16:56", NA, NA, "2016-05-15 17:10")),
 93 |                                       r = as.POSIXct(c("2018-02-01 01:00 CEST", "2018-04-15 16:56 CEST", NA, NA, "2016-05-15 17:10 CEST")),
 94 |                                       s = as.POSIXct(c("2018-02-01 01:00 CEST", "2018-04-15 16:56 CEST", NA, NA, "2016-05-15 17:10 CEST")),
 95 | 
 96 |                                       t = as.POSIXct(c("2018-02-01 01:00", "2018-04-15 01:00", NA, NA, "2016-05-15 01:00")),
 97 |                                       u = as.POSIXct(c("2018-02-01 01:00 CEST", "2018-04-15 16:56 CEST", NA, NA, "2016-05-15 17:10 CEST")),
 98 | 
 99 |                                       v = as.integer(c(1, 1, NA, 0, 1)),
100 |                                       w = as.numeric(c(1, 1.78, NA, NaN, 1))))
101 | 
102 |   expect_equal(retype(df, p), df %>% mutate_at(vars(p), ~retype(.)))
103 |   expect_equal(retype(df, p), df %>% mutate_at(vars(p), ~as.POSIXct(.)))
104 |   expect_equal(retype(df, -t), tibble(a = as.numeric(c(1, Inf, 3, 2, 4)),
105 |                                           b = as.integer(c(1, 7, 3, 2, 4)),
106 |                                           c = as.integer(c(1, NA, 3, 2, 4)),
107 |                                           d = as.numeric(c(Inf, Inf, Inf, Inf, Inf)),
108 |                                           e = as.numeric(c(3.5, 3.7, NaN, Inf, Inf)),
109 | 
110 |                                           f = as.integer(c(1, 2, 3, NA, 5)),
111 | 
112 |                                           g = as.numeric(c(NA, NA, NA, NA, NA)),
113 |                                           h = as.integer(c(NA, NA, NA, NA, NA)),
114 |                                           i = as.Date(c(NA, NA, NA, NA, NA)),
115 |                                           j = as.POSIXct(c(NA, NA, NA, NA, NA)),
116 |                                           k = as.character(c(NA, NA, NA, NA, NA)),
117 | 
118 |                                           l = as.Date(c("2018-02-01", "2018-04-15", NA, NA, "2016-05-15")),
119 |                                           m = as.integer(c(1, 2, 3, 4, 5)),
120 |                                           n = as.character(c("a", "b", NA, "q", NA)),
121 | 
122 |                                           o = as.Date(c("2018-02-01", "2018-04-15", NA, NA, "2016-05-15")),
123 |                                           p = as.POSIXct(c("2018-02-01 01:00", "2018-04-15 01:00", NA, NA, "2016-05-15 01:00")),
124 |                                           q = as.POSIXct(c("2018-02-01 01:00", "2018-04-15 16:56", NA, NA, "2016-05-15 17:10")),
125 |                                           r = as.POSIXct(c("2018-02-01 01:00 CEST", "2018-04-15 16:56 CEST", NA, NA, "2016-05-15 17:10 CEST")),
126 |                                           s = as.POSIXct(c("2018-02-01 01:00 CEST", "2018-04-15 16:56 CEST", NA, NA, "2016-05-15 17:10 CEST")),
127 | 
128 |                                           t = as.POSIXct(c("2018-02-01 01:00", "2018-04-15 01:00", NA, NA, "2016-05-15 01:00")),
129 |                                           u = as.POSIXct(c("2018-02-01 01:00 CEST", "2018-04-15 16:56 CEST", NA, NA, "2016-05-15 17:10 CEST")),
130 | 
131 |                                           v = as.integer(c(1, 1, NA, 0, 1)),
132 |                                           w = as.numeric(c(1, 1.78, NA, NaN, 1))))
133 | })
134 | 


--------------------------------------------------------------------------------
/tests/testthat/test.s.R:
--------------------------------------------------------------------------------
  1 | library(testthat)
  2 | library(hablar)
  3 | 
  4 | context("s")
  5 | test_that("vectors", {
  6 |   expect_equal(s(as.numeric(c(1, 2))), as.numeric(c(1, 2)))
  7 |   expect_equal(s(as.numeric(c(1, NA))), as.numeric(c(1)))
  8 |   expect_equal(s(as.numeric(c(1, NA)), ignore_na = F), as.numeric(c(1, NA)))
  9 |   expect_equal(s(as.numeric(c(1.67, NA)), ignore_na = F), as.numeric(c(1.67, NA)))
 10 |   expect_equal(s(as.numeric(c(1.67, NA)), ignore_na = F), as.numeric(c(1.67, NA)))
 11 |   expect_equal(s(as.numeric(c(1.67, NaN)), ignore_na = F), as.numeric(c(1.67, NA)))
 12 |   expect_equal(s(as.numeric(c(NA, 1.67, NaN))), as.numeric(c(1.67)))
 13 |   expect_equal(s(as.numeric(c(NA, 1.67, NaN)), ignore_na = F), as.numeric(c(NA, 1.67, NA)))
 14 | 
 15 |   expect_equal(s(as.integer(c(9, NA))), as.integer(c(9)))
 16 |   expect_equal(s(as.integer(c(9, NA)), ignore_na = F), as.integer(c(9, NA)))
 17 | 
 18 |   expect_equal(s(as.Date(c("2018-05-18", NA))), as.Date(c("2018-05-18")))
 19 |   expect_equal(s(as.Date(c("2018-05-18", NA)), ignore_na = F), as.Date(c("2018-05-18", NA)))
 20 | 
 21 |   expect_equal(s(as.POSIXct(c("2018-05-18", NA))), as.POSIXct(c("2018-05-18")))
 22 |   expect_equal(s(as.POSIXct(c("2018-05-18", NA)), ignore_na = F), as.POSIXct(c("2018-05-18", NA)))
 23 | 
 24 |   expect_equal(s(as.logical(c(T, NA))), as.logical(c(T)))
 25 |   expect_equal(s(as.logical(c(T, NA)), ignore_na = F), as.logical(c(T, NA)))
 26 | 
 27 |   expect_equal(s(as.numeric(c(NA, NA)), ignore_na = F), as.numeric(NA))
 28 |   expect_equal(s(as.Date(c(NA, NA))), as.Date(NA))
 29 |   expect_equal(s(as.POSIXct(c(NA, NA))), as.POSIXct(NA))
 30 |   expect_equal(s(as.character(c(NA, NA))), as.character(NA))
 31 |   expect_equal(s(as.character(c(NA, NA))), NA_character_)
 32 |   expect_equal(s(as.integer(c(NA, NA))), as.integer(NA))
 33 | 
 34 |   expect_error(s(as.factor(c(NA, NA))))
 35 |   expect_error(s(as.factor(c(2, 3, NA))))
 36 | })
 37 | 
 38 | 
 39 | test_that("s and aggregators", {
 40 |   expect_equal(min(s(as.numeric(c(1, 2)))), as.numeric(c(1)))
 41 |   expect_equal(min(s(as.numeric(c()))), min(as.numeric(c(NA))))
 42 |   expect_equal(min(s(as.numeric(c(1, 2, NA)))), as.numeric(c(1)))
 43 |   expect_equal(min(s(as.numeric(c(NaN, 2, NA)))), as.numeric(c(2)))
 44 |   expect_equal(min(s(as.numeric(c(NaN, 2, Inf)))), as.numeric(c(2)))
 45 |   expect_equal(min(s(as.numeric(c(NaN, NA, Inf)))), as.numeric(NA))
 46 |   expect_equal(min(s(as.numeric(c(NaN, 2, NA)), ignore_na = F)), as.numeric(NA))
 47 | 
 48 |   expect_equal(max(s(as.numeric(c(1, 2)))), as.numeric(c(2)))
 49 |   expect_equal(max(s(as.numeric(c()))), max(as.numeric(c(NA))))
 50 |   expect_equal(max(s(as.numeric(c(1, 2, NA)))), as.numeric(c(2)))
 51 |   expect_equal(max(s(as.numeric(c(NaN, 2, NA)))), as.numeric(c(2)))
 52 |   expect_equal(max(s(as.numeric(c(NaN, 2, Inf)))), as.numeric(c(2)))
 53 |   expect_equal(max(s(as.numeric(c(NaN, NA, Inf)))), as.numeric(NA))
 54 |   expect_equal(max(s(as.numeric(c(NaN, 2, NA)), ignore_na = F)), as.numeric(NA))
 55 | 
 56 |   expect_equal(mean(s(as.numeric(c(1, 2)))), as.numeric(c(1.5)))
 57 |   expect_equal(mean(s(as.numeric(c()))), mean(as.numeric(c(NA))))
 58 |   expect_equal(mean(s(as.numeric(c(1, 2, NA)))), as.numeric(c(1.5)))
 59 |   expect_equal(mean(s(as.numeric(c(NaN, 2, NA)))), as.numeric(c(2)))
 60 |   expect_equal(mean(s(as.numeric(c(NaN, 2, Inf)))), as.numeric(c(2)))
 61 |   expect_equal(mean(s(as.numeric(c(NaN, NA, Inf)))), as.numeric(NA))
 62 |   expect_equal(mean(s(as.numeric(c(NaN, 2, NA)), ignore_na = F)), as.numeric(NA))
 63 | 
 64 |   expect_equal(first(s(as.numeric(c(1, 2)))), as.numeric(c(1)))
 65 |   expect_equal(first(s(as.numeric(c()))), first(as.numeric(c(NA))))
 66 |   expect_equal(first(s(as.numeric(c(1, 2, NA)))), as.numeric(c(1)))
 67 |   expect_equal(first(s(as.numeric(c(NaN, 2, NA)))), as.numeric(c(2)))
 68 |   expect_equal(first(s(as.numeric(c(NaN, 2, Inf)))), as.numeric(c(2)))
 69 |   expect_equal(first(s(as.numeric(c(NaN, NA, Inf)))), as.numeric(NA))
 70 |   expect_equal(first(s(as.numeric(c(NaN, 2, NA)), ignore_na = F)), as.numeric(NA))
 71 | 
 72 |   expect_error(s(as.factor(c(2, 3, NA))))
 73 | })
 74 | 
 75 | 
 76 | test_that("s and aggregators - wrappers", {
 77 |   expect_equal(min_(as.numeric(c(1, 2))), as.numeric(c(1)))
 78 |   expect_equal(min_(as.numeric(c())), min(as.numeric(c(NA))))
 79 |   expect_equal(min_(as.numeric(c(1, 2, NA))) , as.numeric(c(1)))
 80 |   expect_equal(min_(as.numeric(c(NaN, 2, NA))), as.numeric(c(2)))
 81 | 
 82 |   expect_equal(min_(as.numeric(c(NaN, 2, Inf))), as.numeric(c(2)))
 83 |   expect_equal(min_(as.numeric(c(NaN, NA, Inf))), as.numeric(NA))
 84 |   expect_equal(min_(as.numeric(c(NaN, 2, NA)), ignore_na = F), as.numeric(NA))
 85 | 
 86 |   expect_equal(max_(as.numeric(c(1, 2))), as.numeric(c(2)))
 87 |   expect_equal(max_(as.numeric(c())), max(as.numeric(c(NA))))
 88 |   expect_equal(max_(as.numeric(c(1, 2, NA))), as.numeric(c(2)))
 89 |   expect_equal(max_(as.numeric(c(NaN, 2, NA))), as.numeric(c(2)))
 90 |   expect_equal(max_(as.numeric(c(NaN, 2, Inf))), as.numeric(c(2)))
 91 |   expect_equal(max_(as.numeric(c(NaN, NA, Inf))), as.numeric(NA))
 92 |   expect_equal(max_(as.numeric(c(NaN, 2, NA)), ignore_na = F), as.numeric(NA))
 93 | 
 94 |   expect_equal(mean_(as.numeric(c(1, 2))), as.numeric(c(1.5)))
 95 |   expect_equal(mean_(as.numeric(c())), mean(as.numeric(c(NA))))
 96 |   expect_equal(mean_(as.numeric(c(1, 2, NA))), as.numeric(c(1.5)))
 97 |   expect_equal(mean_(as.numeric(c(NaN, 2, NA))), as.numeric(c(2)))
 98 |   expect_equal(mean_(as.numeric(c(NaN, 2, Inf))), as.numeric(c(2)))
 99 |   expect_equal(mean_(as.numeric(c(NaN, NA, Inf))), as.numeric(NA))
100 |   expect_equal(mean_(as.numeric(c(NaN, 2, NA)), ignore_na = F), as.numeric(NA))
101 | 
102 |   expect_equal(first_(as.numeric(c(1, 2))), as.numeric(c(1)))
103 |   expect_equal(first_(as.numeric(c())), first(as.numeric(c(NA))))
104 |   expect_equal(first_(as.numeric(c(1, 2, NA))), as.numeric(c(1)))
105 |   expect_equal(first_(as.numeric(c(NaN, 2, NA))), as.numeric(c(2)))
106 |   expect_equal(first_(as.numeric(c(NaN, 2, Inf))), as.numeric(c(2)))
107 |   expect_equal(first_(as.numeric(c(NaN, NA, Inf))), as.numeric(NA))
108 |   expect_equal(first_(as.numeric(c(NaN, 2, NA)), ignore_na = F), as.numeric(NA))
109 |   
110 |   expect_equal(last_(as.numeric(c(1, 2))), as.numeric(c(2)))
111 |   expect_equal(last_(as.numeric(c())), last(as.numeric(c(NA))))
112 |   expect_equal(last_(as.numeric(c(1, 2, NA))), as.numeric(c(2)))
113 |   expect_equal(last_(as.numeric(c(NaN, 2, NA))), as.numeric(c(2)))
114 |   expect_equal(last_(as.numeric(c(NaN, 2, Inf))), as.numeric(c(2)))
115 |   expect_equal(last_(as.numeric(c(NaN, NA, Inf))), as.numeric(NA))
116 |   expect_equal(last_(as.numeric(c(NaN, 2, NA)), ignore_na = F), as.numeric(NA))
117 |   
118 |   expect_equal(sd_(as.numeric(c(1, 2, 3, 4))), as.numeric(sd(c(1, 2, 3, 4))))
119 |   expect_equal(sd_(as.numeric(c())), last(as.numeric(c(NA))))
120 |   expect_equal(sd_(as.numeric(c(1, 2, NA, 3, 4))), as.numeric(sd(c(1, 2, 3, 4))))
121 |   expect_equal(sd_(as.numeric(c(NaN, 1, 2, NA, 3, 4))), as.numeric(sd(c(1, 2, 3, 4))))
122 |   expect_equal(sd_(as.numeric(c(NaN, 1, 2, Inf, 3, 4))), as.numeric(sd(c(1, 2, 3, 4))))
123 |   expect_equal(sd_(as.numeric(c(NaN, NA, Inf))), as.numeric(NA))
124 |   expect_equal(sd_(as.numeric(c(NaN, 2, NA, 3, 4)), ignore_na = F), as.numeric(NA))
125 |   
126 |   expect_equal(var_(as.numeric(c(1, 2, 3, 4))), as.numeric(var(c(1, 2, 3, 4))))
127 |   expect_equal(var_(as.numeric(c())), last(as.numeric(c(NA))))
128 |   expect_equal(var_(as.numeric(c(1, 2, NA, 3, 4))), as.numeric(var(c(1, 2, 3, 4))))
129 |   expect_equal(var_(as.numeric(c(NaN, 1, 2, NA, 3, 4))), as.numeric(var(c(1, 2, 3, 4))))
130 |   expect_equal(var_(as.numeric(c(NaN, 1, 2, Inf, 3, 4))), as.numeric(var(c(1, 2, 3, 4))))
131 |   expect_equal(var_(as.numeric(c(NaN, NA, Inf))), as.numeric(NA))
132 |   expect_equal(var_(as.numeric(c(NaN, 2, NA, 3, 4)), ignore_na = F), as.numeric(NA))
133 | 
134 |   expect_error(min_(as.factor(c(2, 3, NA))))
135 |   expect_equal(length(mean_(as.numeric(c()))), 1)
136 | })
137 | 
138 | 
139 | test_that("s and aggregators - dplyr", {
140 |   expect_equal(mtcars %>% mutate(max_gear = max_(gear[vs == 2])), mtcars %>% mutate(max_gear = as.numeric(NA)))
141 | })
142 | 
143 | 
144 | test_that("first_non_na and squeeze", {
145 |   expect_equal(squeeze(c(1, 1, 1)), c(1))
146 |   expect_error(squeeze(c(NA, NA, NA)))
147 |   expect_error(squeeze(c(1, 1, NA)))
148 |   expect_error(squeeze(c(1, 2)))
149 |   expect_error(squeeze(c(1, Inf)))
150 |   
151 |   expect_equal(squeeze_(c(1, 1, 1)), c(1))
152 |   expect_equal(squeeze_(c(NA, NA, NA)), c(NA))
153 |   expect_equal(squeeze_(as.numeric(c(NA, NA, NA))), as.numeric(c(NA)))
154 |   expect_equal(squeeze_(c(1, 1, NA)), c(1))
155 |   expect_error(squeeze_(c(1, 2)))
156 |   expect_equal(squeeze_(c(1, Inf)), c(1))
157 |   
158 |   expect_equal(first_non_na(c(1, 1, 1)), c(1))
159 |   expect_equal(first_non_na(c(1, 1, NA)), c(1))
160 |   expect_equal(first_non_na(c(1, 2)), c(1))
161 |   expect_equal(first_non_na(c(Inf, 1)), c(1))
162 |   expect_equal(first_non_na(c(NA, NA)), c(NA))
163 | })
164 | 
165 | test_that("empty groups of non-missing data", {
166 |   # Checks that if grouped df with groups of only NA keeps correct NA class
167 |   expect_silent(data.frame(id = c(1, 1, 2, 2),
168 |                            date = as.Date(c(NA, NA, as.Date("2022-06-20"), NA), origin = "1970-01-01")) %>%
169 |                   group_by(id) %>%
170 |                   summarise(max_date = max_(date)))
171 | })


--------------------------------------------------------------------------------
/tests/testthat/test.session_funs.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(hablar)
3 | 
4 | context("session_fun")
5 | test_that("set_path*", {
6 |   expect_error(set_wd_to_script_path())
7 | })
8 | 
9 | 


--------------------------------------------------------------------------------
/vignettes/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/davidsjoberg/hablar/6698fd2973ec6cb99ce7637d899cfc3040c0da74/vignettes/.DS_Store


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/convert.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "convert"
  3 | author: "David Sjoberg"
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{convert}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | 
 11 | ```{r setup, include = FALSE}
 12 | knitr::opts_chunk$set(
 13 |   collapse = F,
 14 |   comment = "#>"
 15 | )
 16 | options(tibble.print_min = 4L, tibble.print_max = 4L)
 17 | 
 18 | library(gapminder)
 19 | library(hablar)
 20 | library(dplyr)
 21 | ```
 22 | 
 23 | ## `convert` your data types
 24 | ### Before everything there was data type conversion
 25 | Best practise of data analysis is to fix data types directly after importing data into R. This helps in many ways:
 26 | 
 27 | * You only need to do it once
 28 | * If there are any errors you know where in the script it should be fixed
 29 | * It scales up your code. For example, all columns that should be numeric could be converted at the same time.
 30 | 
 31 | Additionally, if every column is converted to its appropriate data type then you won't be surprised  by data type errors the next time you run the script. 
 32 | 
 33 | ### Usage
 34 | `convert(.x, ...)`
 35 | where `.x` is a data frame. `...` is a placeholder for data type specific conversion functions.
 36 | 
 37 | ### Support functions
 38 | `convert` must be used in conjunction with data type conversion functions:
 39 | 
 40 | * `chr` converts to character.
 41 | * `num` converts to numeric.
 42 | * `int` converts to integer.
 43 | * `lgl` converts to logical.
 44 | * `fct` converts to factor.
 45 | * `dte` converts to date.
 46 | * `dtm` converts to date time.
 47 | 
 48 | ### The syntax
 49 | Imagine you have a data frame where you want to change columns:
 50 | 
 51 | - `a` and `b` to numerical
 52 | - `c` to date 
 53 | - `d` and `e` to character
 54 | 
 55 | Then you can write:
 56 | 
 57 | `df %>% convert(num(a, b), dte(c), chr(d, e))`
 58 | 
 59 | ### Examples
 60 | 
 61 | The easiest way to understand how simple `convert` is to use is with examples. Have a look at the a gapminder dataset from the package `gapminder`:
 62 | 
 63 | ```{r}
 64 | library(gapminder)
 65 | gapminder
 66 | 
 67 | ```
 68 | 
 69 | We might want to change the country column to character instead of factor. To do this we use `chr` together with the column name inside `convert`:
 70 | 
 71 | ```{r}
 72 | gapminder %>% 
 73 |   convert(chr(country))
 74 | ```
 75 | 
 76 | This converted the country column to the data type character. But we do not have to make this whole procedure for each column if we want to convert more columns. Let's say that we also want to convert continent to character and the column lifeExp to integer, pop to double and gdpPercap to numeric. It is simply done:
 77 | 
 78 | ```{r}
 79 | gapminder %>% 
 80 |   convert(chr(country, 
 81 |               continent),
 82 |           int(lifeExp),
 83 |           dbl(pop),
 84 |           num(gdpPercap))
 85 | ```
 86 | 
 87 | ## I can already convert between data types. Why do I need `convert`?
 88 | You can change alot of data types with little code. Consider using `mutate` from `dplyr` to do the same operation:
 89 | 
 90 | ```{r}
 91 | gapminder %>%
 92 |   mutate(country = as.character(country),
 93 |          continent = as.character(continent),
 94 |          lifeExp = as.integer(lifeExp),
 95 |          pop = as.double(pop),
 96 |          gdpPercap = as.numeric(gdpPercap))
 97 | ```
 98 | 
 99 | Which gives the same result. However, you need to refer to the column name twice and the data type conversion function for each column. Imagine the code to convert 20 columns. 
100 | 
101 | However, `dplyr` have another way of applying the same function to multiple columns which could help, `mutate_at`. The same example would then look like:
102 | 
103 | ```{r}
104 | gapminder %>% 
105 |   mutate_at(vars(country, continent), funs(as.character)) %>% 
106 |   mutate_at(vars(lifeExp), funs(as.integer)) %>% 
107 |   mutate_at(vars(pop), funs(as.double)) %>% 
108 |   mutate_at(vars(gdpPercap), funs(as.numeric))
109 | ```
110 | 
111 | Which is more easily scaled to deal with data type conversion of large numbers of variables. However, `convert` does the same job with much less code. In fact, `convert` uses `mutate_at` internally. The difference is syntax and code readability. Compare again with `convert`:
112 | 
113 | ```{r}
114 | gapminder %>% 
115 |   convert(chr(country, 
116 |                continent),
117 |            int(lifeExp),
118 |            dbl(pop),
119 |            num(gdpPercap))
120 | ```
121 | 
122 | 
123 | ## Adding additional arguments
124 | `convert` also supports functions of `convert` support additional arguments to be passed. For example, if you want to convert a number to a date and want to include an `origin` argument you can write:
125 | ```{r}
126 | tibble(dates = c(12818, 13891),
127 |         sunny = c("yes", "no")) %>% 
128 |   convert(dte(dates, .args = list(origin = "1900-01-01")))
129 | ```
130 | 
131 | 
132 | ## Final note
133 | `convert` is built upon `dplyr` and it will share some amazing features of `dplyr`. For example, `tidyselect` works with `convert` which helps you to select multiple columns at the same time. A simple example, if you want to change all columns with names that includes the letter "e" to factors, you can write:
134 | 
135 | ```{r}
136 | gapminder %>% 
137 |   convert(fct(contains("e")))
138 | ```
139 | 
140 | 


--------------------------------------------------------------------------------
/vignettes/hablar.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "hablar"
  3 | author: David Sjoberg
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{hablar}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | 
 11 | ```{r, include = FALSE}
 12 | knitr::opts_chunk$set(
 13 |   collapse = TRUE,
 14 |   comment = "#>"
 15 | )
 16 | 
 17 | library(hablar)
 18 | library(dplyr)
 19 | library(knitr)
 20 | 
 21 | options(tibble.print_min = 4L, tibble.print_max = 4L)
 22 | ```
 23 | 
 24 | The mission of `hablar` is for you to get non-astonishing results! That means that functions return what you expected. R has some intuitive quirks that beginners and experienced programmers fail to identify. Some of the first weird features of R that `hablar` solves:
 25 | 
 26 | * Missing values `NA` and irrational values `Inf`, `NaN` is dominant. For example, in R `sum(c(1, 2, NA))` is `NA` and not 3. In `hablar` the addition of an underscore `sum_(c(1, 2, NA))` returns 3, as is often expected.
 27 | 
 28 | * Factors (categorical variables) that are converted to numeric returns the number of the category rather than the value. In `hablar` the `convert()` function always changes the type of the values.
 29 | 
 30 | * Finding duplicates, and rows with `NA` can be cumbersome. The functions `find_duplicates()` and `find_na()` make it easy to find where the data frame needs to be fixed. When the issues are found the utility replacement functions, e.g. `if_else_()`, `if_na()`, `zero_if()` easily fixes many of the most common problems you face.
 31 | 
 32 | `hablar` follows the syntax API of `tidyverse` and works seamlessly with `dplyr` and `tidyselect`. 
 33 | 
 34 | ## Missing values that astonishes you
 35 | 
 36 | A common issue in R is how R treats missing values (i.e. `NA`). Sometimes `NA` in your data frame means that there is missing values in the sense that you need to estimate or replace them with values. But often it is not a problem! Often `NA` means that there *is* no value, and should not be. `hablar` provide useful functions that handle `NA` intuitively. Let's take a simple example: 
 37 | 
 38 | ```{r, echo=FALSE}
 39 | df <- tibble(name = c("Fredrik", "Maria", "Astrid"),
 40 |            graduation_date = as.Date(c("2016-06-15",
 41 |                                        NA,
 42 |                                        "2014-06-15")),
 43 |            age = c(21L, 16L, 23L))
 44 | 
 45 | df
 46 | ```
 47 | 
 48 | ##### **Change `min()` to `min_()`**
 49 | 
 50 | The `graduation_date` is missing for Maria. In this case it is not because we do not know. It is because she has not graduated yet, she is younger than Fredrik and Astrid. If we would like to know the first graduation date of the three observation in R with a naive `min()` we get `NA`. But with `min_()` from `hablar` we get the minimum value that is not missing. See:
 51 | 
 52 | ```{r}
 53 | df %>% 
 54 |   mutate(min_baseR = min(graduation_date),
 55 |          min_hablar = min_(graduation_date))
 56 | ```
 57 | 
 58 | The `hablar` package provides the same functionality for 
 59 | 
 60 | * `max_()`
 61 | * `mean_()`
 62 | * `median_()`
 63 | * `sd_()`
 64 | * `first_()` 
 65 | 
 66 | ... and more. For more documentation type `help(min_())` or `vignette("s")` for an in-depth description.
 67 | 
 68 | ## Change type in a snap - safely
 69 | 
 70 | In `hablar` the function `convert` provides a robust, readable and dynamic way to change type of a column.
 71 | 
 72 | ```{r}
 73 | mtcars %>% 
 74 |   convert(int(cyl, am),
 75 |           num(disp:drat))
 76 | ```
 77 | 
 78 | The above chunk converts the columns `cyl` and `am` to integers, and the columns `disp` through `drat` to numeric. If a column is of type `factor` it always converts it to character before further conversion. 
 79 | 
 80 | ##### **Fix all your types in the same function**
 81 | 
 82 | With `convert` and `tidyselect` you can easily change type of a wide range of columns. 
 83 | 
 84 | ```{r}
 85 | mtcars %>% 
 86 |   convert(
 87 |     chr(last_col()),       # Last colum to character
 88 |     int(1:2),              # First two columns to integer
 89 |     fct(hp, wt),           # hp and wt to factors
 90 |     dte(vs),               # vs to date (if you really want)
 91 |     num(contains("car"))   # car as in carb to numeric
 92 |   )           
 93 | ```
 94 | 
 95 | For more information, see `help(hablar)` or `vignette("convert")`.
 96 | 
 97 | ## Find the problem
 98 | 
 99 | When cleaning data you spend a lot of time understanding your data. Sometimes you get more row than you expected when doing a `left_join()`. Or you did not know that certain column contained missing values `NA` or irrational values like `Inf` or `NaN`.
100 | 
101 | In `hablar` the `find_*` functions speeds up your search for the problem. To find duplicated rows you simply `df %>% find_duplicates()`. You can also find duplicates in in specific columns, which can be useful before joins.
102 | 
103 | ```{r}
104 | # Create df with duplicates
105 | df <- mtcars %>% 
106 |   bind_rows(mtcars %>% slice(1, 5, 9))
107 | 
108 | # Return rows with duplicates in cyl and am
109 | df %>% 
110 |   find_duplicates(cyl, am)
111 | ```
112 | 
113 | There are also find functions for other cases. For example `find_na()` returns rows with missing values.
114 | 
115 | ```{r}
116 | starwars %>% 
117 |   find_na(height)
118 | ```
119 | 
120 | If you rather want a Boolean value instead then e.g. `check_duplicates()` returns `TRUE` if the data frame contains duplicates, otherwise it returns `FALSE`.
121 | 
122 | ##### **...apply the solution**
123 | 
124 | Let's say that we have found a problem is caused by missing values in the column `height` and you want to replace all missing values with the integer 100. `hablar` comes with an additional ways of doing if-or-else. 
125 | 
126 | ```{r}
127 | starwars %>% 
128 |   find_na(height) %>% 
129 |   mutate(height = if_na(height, 100L))
130 | ```
131 | 
132 | In the chunk above we successfully replaced all missing heights with the integer 100. `hablar` also contain the self explained:
133 | 
134 | * `if_zero()` and `zero_if()`
135 | * `if_inf()` and `inf_if()`
136 | * `if_nan()` and `nan_if()`
137 | 
138 | which works in the same way as the examples above.
139 | 
140 | ##### **Introducing a third way to if or else**
141 | 
142 | The generic function `if_else_()` provides the same rigidity as `if_else()` in `dplyr` but ads some flexibility. In `dplyr` you need to specify which type `NA` should have. In `if_else_()` you can write:
143 | 
144 | ```{r}
145 | starwars %>% 
146 |   mutate(skin_color = if_else_(hair_color == "brown", NA, hair_color))
147 | ```
148 | 
149 | In `if_else()` from `dplyr` you would have had to specified `NA_character_`.
150 | 
151 | 


--------------------------------------------------------------------------------
/vignettes/retype.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "retype"
  3 | author: "David Sjoberg"
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{retype}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | 
 11 | ```{r setup, include = FALSE}
 12 | knitr::opts_chunk$set(
 13 |   collapse = F,
 14 |   comment = "#>"
 15 | )
 16 | library(DiagrammeR)
 17 | library(hablar)
 18 | library(dplyr)
 19 | ```
 20 | 
 21 | ## Can the data be simpler?
 22 | ### `retype` quick start your analysis
 23 | Getting data into R can be hassle. But once you do, it often have incorrect data types/classes. For instance, it is not uncommon that numeric variables are characters or dates are classed as characters. 
 24 | 
 25 | Data conversion is cumbersome and small coding mistakes can produce large issues. The hablar package facilitates correction of all data types directly after you import the data into R such that you can avoid dangerous operations at later stages!
 26 | 
 27 | ### What does `retype` do?
 28 | `retype` provides an easy approach for quick and dirty data type conversion. It follows a strict simplification hierarchy for each column of your data frame. It only converts the column if it can assume that no important information is lost in the process. For example, the character vector `c("1", "2")` should rather be an integer vector. Similarly, the character `"2010-06-04"` should be a date. Factors have advantages, but they are never the simplest solution and hence it is always converted to character, at least.
 29 | 
 30 | ### Usage
 31 | `retype(x, ...)`
 32 | 
 33 | where `x` is a data frame, and `...` is the column names you want to apply `retype` to. `x` could also be a single vector.
 34 | 
 35 | ### Simple example: numeric
 36 | ```{r}
 37 | x <- as.numeric(3)
 38 | retype(x)
 39 | ```
 40 | ```{r}
 41 | class(retype(x))
 42 | ```
 43 | 
 44 | ### Simple example: character
 45 | ```{r}
 46 | x <- as.character("2017-03-02")
 47 | retype(x)
 48 | ```
 49 | ```{r}
 50 | class(retype(x))
 51 | ```
 52 | 
 53 | ### Simple example: character
 54 | ```{r}
 55 | x <- as.character(c("3,56", "0,78"))
 56 | retype(x)
 57 | ```
 58 | ```{r}
 59 | class(retype(x))
 60 | ```
 61 | 
 62 | ### Simple example: factor
 63 | ```{r}
 64 | x <- as.factor(c(3, 4))
 65 | retype(x)
 66 | ```
 67 | ```{r}
 68 | class(retype(x))
 69 | ```
 70 | 
 71 | 
 72 | ## The simplification hierarchy
 73 | ### Some things are simpler than others
 74 | `retype` uses a procedure to determine which data type is the simplest, without loosing any vital information in your data.
 75 | 
 76 | * The first thing to know about `retype` is that it always converts factors to character. 
 77 | 
 78 | * The second thing to know is that all logical columns are converted to integers.
 79 | 
 80 | * Thirdly, complex and list columns are left unchanged.
 81 | 
 82 | * From there it will test if the data could be coded as numeric. If true it converts the column to numeric.
 83 | 
 84 | * If it is numeric it tests if it could be an integer instead. If true, it converts the column to integer.
 85 | 
 86 | * If it is a character it tests if it could be a date column. If true, it converts it to a date column.
 87 | 
 88 | * If it is a date time column it tests if it could be a date. If true, it converts it to a date column.
 89 | 
 90 | 
 91 | ### A visualization of the hierarchy
 92 | The above procedure could more intuitively be described in a diagram. The arrows imply a test if a column could be converted to another without loosing information in your data. The procedure continues until it cannot be simplified further.
 93 | 
 94 | ```{r, echo=F}
 95 | grViz("digraph d {
 96 | 
 97 |       node [shape = circle, style = filled]
 98 |       factor;character;numeric;integer;'date time';date;logical;list;complex
 99 | 
100 |       logical -> integer;
101 |       factor -> character;
102 |       character -> numeric
103 |       numeric -> integer;
104 |       character -> date;
105 |       date -> 'date time';
106 |       }")
107 | ```
108 | 
109 | ## Example on a data frame
110 | Examine the following dataset `starwars` from the package `dplyr`. First, we use `convert` on some columns to new data types.
111 | ```{r}
112 | df <- starwars %>% 
113 |   select(1:4) %>% 
114 |   convert(fct(name),
115 |            chr(height:mass),
116 |            fct(hair_color)) %>% 
117 |   print()
118 |   
119 | ```
120 | 
121 | We then apply `retype` on `df`:
122 | 
123 | ```{r}
124 | df %>% 
125 |   retype()
126 | ```
127 | 
128 | Which correctly guessed that height preferably should be an integer vector and that mass works better as a numeric column. The factors were converted to character columns.
129 | 
130 | ## Final notes
131 | ### `retype` in production code
132 | Never use `retype` when you need your scripts to work the next time in the exact same way. `retype` may change over time, it could guess wrong and your data may change. Use `hablar::convert` instead where you explicitly state which data type each column should have.
133 | 


--------------------------------------------------------------------------------
/vignettes/s.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "s"
  3 | author: "David Sjoberg"
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{s}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | 
 11 | ```{r setup, include = FALSE}
 12 | knitr::opts_chunk$set(
 13 |   collapse = F,
 14 |   comment = "#>"
 15 | )
 16 | library(dplyr)
 17 | library(hablar)
 18 | ```
 19 | 
 20 | ## Overview
 21 | 
 22 | ### Description
 23 | The `s` function is a simple function that helps you get intuitive results when summarizing data. It is made to be used in conjuction with summarize functions, for example `min` , `sum` and `mean`. `s` takes a vector and mutates it in the following ways:
 24 | 
 25 | * It replaces all non-rational numbers from numeric vectors and replace them with `NA`. Non-rational numbers are `Inf`, `-Inf` and `NaN`.
 26 | 
 27 | * It removes `NA` from the vector by default
 28 | 
 29 | * If the vector has length zero or only consists of `NA` it returns a single `NA`.
 30 | 
 31 | ### Usage
 32 | 
 33 | `s(..., ignore_na = T)`
 34 | 
 35 | where ... is one or more vector(s). If missing values should not be omitted use `ignore_na = F`.
 36 | 
 37 | ### Simple examples
 38 | 
 39 | Removing `NA`:
 40 | ```{r}
 41 | x <- c(NA, 1, 2)
 42 | s(x)
 43 | ```
 44 | 
 45 | Replacing non-rational numbers with `NA` and then removes `NA`:
 46 | ```{r}
 47 | x <- c(NaN, 1, Inf)
 48 | s(x)
 49 | ```
 50 | 
 51 | Empty vectors return a single `NA`:
 52 | ```{r}
 53 | x <- c()
 54 | s(x)
 55 | ```
 56 | 
 57 | In conjuction with a summary function:
 58 | ```{r}
 59 | x <- c(NaN, Inf, 3, 4)
 60 | median(s(x))
 61 | ```
 62 | 
 63 | ## The problems it solves
 64 | ### Principle of least astonishment
 65 | All programming languages have their special cases when you get non-intuitive results that you did not expect. This is also true for R. The s-function provides intuitive outcomes of some of the most basic commands in R. In the next parts of the vignette some problems it solves are explained in greater detail.
 66 | 
 67 | ### Missing values
 68 | When learning R users might be surprised when creating suprised when using simple summary function. A summary function is a function that takes a vector and returns a single one value. For example, `min(x)` , `sum(x)` and `mean(x)`. A simple example:
 69 | 
 70 | ```{r}
 71 | x <- c(1, 2, 3, 4, 5)
 72 | sum(x)
 73 | ```
 74 | 
 75 | In this example the output of sum() was, which is expected since all entries in x sums to 15. However, in more messy data, the output is oftentimes less intuitive. New users to R might be confused that the next example results in NA (a missing value):
 76 | 
 77 | ```{r}
 78 | x <- c(1, 2, 3, NA, 4)
 79 | mean(x)
 80 | 
 81 | ```
 82 | 
 83 | Since the vector above have an a missing value R does not know how to find the mean of the vector. The missing value could be anything, and thus R thus returns the output `NA`. However, since missing values are common when working with real data,  it is also a common practise to ignore missing values. Usually the user tells R to ignore the missing value and return the mean of the vector that have values that could be averaged. The error in the previous example could be fixed by adding `na.rm = TRUE` that drops all missing values before calculating the mean:
 84 | 
 85 | ```{r}
 86 | x <- c(1, 2, 3, NA, 4)
 87 | mean(x, na.rm = TRUE)
 88 | 
 89 | ```
 90 | 
 91 | Generally, R is strict about missing values so that you do not miss them, which often is helpful rather than harsh! However, often the programmer want R to return a 'real' value from the data, if there is one, even if it ignores missing values.
 92 | 
 93 | The `s` function helps you with this. Since it by default removes missing values you can simply enter:
 94 | ```{r}
 95 | x <- c(1, 2, 3, NA, 4)
 96 | mean(s(x))
 97 | 
 98 | ```
 99 | 
100 | ### Sometimes R removes too much
101 | 
102 | Adding an argument to remove all missing is common practise when summarizing data. However, it is not uncommon that some vectors only have missing values. Imagine an example where Amanda, David and Viktor sold sodas by the beach for three days. If someone did not show up they get a missing value.
103 | 
104 | ```{r, echo=F}
105 | df <- tibble(day = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
106 |        name = c(rep("Amanda", 3), rep("David", 3), rep("Viktor", 3)),
107 |        sold_sodas = c(3, NA, 8, NA, NA, NA, 2, 1, 4)) %>% 
108 |   print()
109 | ```
110 | 
111 | Now we want to see the maximum number of sodas each person sold on a single day. The above data frame if saved as `df`.
112 | 
113 | ```{r, warning=FALSE}
114 | df %>% 
115 |   group_by(name) %>% 
116 |   summarize(n_sodas_best_day = max(sold_sodas, na.rm = T))
117 | ```
118 | 
119 | Amanda sold the most sodas in a single day. However, David who was absent on all days, got the output `-Inf`. This means that negative infinity was the number of sodas he sold during his most productive day. That is astonishing! One would perhaps think that the more intuitive output would be `NA`.
120 | 
121 | The reason for result is that we told R to remove all missing values before calculating the maximal value. It is equivalent to:
122 | 
123 | ```{r, warning=FALSE}
124 | x <- c()
125 | max(x)
126 | ```
127 | 
128 | We could try to remove the `na.rm = TRUE` argument from `max()`.
129 | 
130 | ```{r}
131 | df %>% 
132 |   group_by(name) %>% 
133 |   summarize(n_sodas_best_day = max(sold_sodas))
134 | ```
135 | 
136 | Suddenly R tells us that Viktor had the best day and Amanda, who was absent the second day, got NA because R doesn’t not know how to find the maximum value. However, David also got NA this time, which makes sense. 
137 | 
138 | Sometimes, calculating simple descriptive statistics can be a cumbersome task. The s function is there to support you! Since it returns `NA` if the vector is empty we get:
139 | 
140 | ```{r}
141 | df %>% 
142 |   group_by(name) %>% 
143 |   summarize(n_sodas_best_day = max(s(sold_sodas)))
144 | ```
145 | 
146 | 
147 | ### Getting answers when there is none
148 | 
149 | Another astonishing result one might encounter occurs when R tries to return a value when there is none. Take this extract `df` from the `starwars` dataset from the R package `dplyr`. 
150 | 
151 | ```{r, include = F}
152 | df <- starwars %>% 
153 |   select(name, homeworld, species, height)
154 | ```
155 | ```{r}
156 | df %>% head(10)
157 | ```
158 | 
159 | 
160 | Say that we want to calculate find the height of the tallest human from each homeworld. For precautionary reasons, we drop all rows with missing values from the height column so that we do not get the same problem as before.
161 | 
162 | ```{r, warning=FALSE}
163 | df %>% 
164 |   filter(!is.na(height)) %>% 
165 |   group_by(homeworld) %>% 
166 |   summarize(tallest_human = max(height[species == "Human"]))
167 | ```
168 | 
169 | We got negative infinity `-Inf` again. How could this be?
170 | 
171 | This is because some homeworld have no humans, e.g. Cerea. R tries to calculate the maximum value of nothing. The `s` function can help you out! Since it returns `NA` if the vector is empty we get:
172 | 
173 | ```{r}
174 | df %>% 
175 |   filter(!is.na(height)) %>% 
176 |   group_by(homeworld) %>% 
177 |   summarize(tallest_human = max(s(height[species == "Human"])))
178 | ```
179 | 
180 | Now we get missing values for the homeworlds that does not have any humans. Makes sense.
181 | 
182 | 
183 | ### Summarizing weird numbers
184 | 
185 | Numerical vectors in R can include more than numbers and missing values `NA`. They can also include infinite numbers `Inf` and `-Inf` as shown in the examples above. Furthermore, numerical vectors can include `NaN`'s which means 'not-a-number'. If the data frame you are using have `NaN` or `Inf` it may cause you problems when summarizing your data. Some examples:
186 | 
187 | ```{r}
188 | x <- c(NaN, 1)
189 | min(x)
190 | ```
191 | 
192 | ```{r}
193 | x <- c(Inf, 3, 4)
194 | mean(x)
195 | ```
196 | 
197 | ```{r}
198 | x <- c(5, -Inf, 2)
199 | sum(x)
200 | ```
201 | 
202 | Often when you summarize vectors that have `NaN` or `Inf` you want to treat them as a missing value. Maybe they have appeared as a mistake when you accidentally divided a value by zero since `1/0 = Inf` in R. The `s` function solves this for you be replacing them with `NA`.
203 | 
204 | ```{r}
205 | x <- c(NaN, 1)
206 | min(s(x))
207 | ```
208 | 
209 | ```{r}
210 | x <- c(Inf, 3, 4)
211 | mean(s(x))
212 | ```
213 | 
214 | ```{r}
215 | x <- c(5, -Inf, 2)
216 | sum(s(x))
217 | ```
218 | 
219 | ### Wrappers for `s` and summary functions
220 | 
221 | If things get too messy with an extra function you might prefer the wrapper functions of `s`. All major summary functions have an s wrapped alternative in `hablar`. These are accessed by adding an underscore to the name of the summary function, i.e. `min_(x)` and is equal to `min(s(x))`. Repeating the previous exercises using wrappers for `s` would look like: 
222 | 
223 | ```{r}
224 | x <- c(NaN, 1)
225 | min_(x)
226 | ```
227 | 
228 | ```{r}
229 | x <- c(Inf, 3, 4)
230 | mean_(x)
231 | ```
232 | 
233 | ```{r}
234 | x <- c(5, -Inf, 2)
235 | sum_(x)
236 | ```
237 | 
238 | ## Final note
239 | 
240 | To summarize, `s` can help you to get results when you summarize your data, if there is an sensible answer in the vector. If not, you will get `NA`.
241 | 


--------------------------------------------------------------------------------