├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check-hard.yaml │ ├── R-CMD-check.yaml │ ├── html-5-check.yaml │ ├── lint.yaml │ └── pkgdown.yml ├── .gitignore ├── .lintr ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── add_labels.R ├── as_character.R ├── as_factor.R ├── as_label.R ├── as_labelled.R ├── as_numeric.R ├── convert_case.R ├── copy_labels.R ├── drop_labels.R ├── efc.R ├── fill_labels.R ├── get_label.R ├── get_labels.R ├── get_model_labels.R ├── get_na.R ├── get_values.R ├── helpfunctions.R ├── is_labelled.R ├── label_to_colnames.R ├── read.R ├── remove_all_labels.R ├── remove_label.R ├── remove_labels.R ├── select_helpers.R ├── set_label.R ├── set_labels.R ├── set_na.R ├── tidy_labels.R ├── unlabel.R ├── utils_get_dots.R ├── val_labels.R ├── var_labels.R ├── write.R └── zap_labels.R ├── README.md ├── _pkgdown.yml ├── data └── efc.RData ├── inst └── CITATION ├── man ├── add_labels.Rd ├── as_factor.Rd ├── as_label.Rd ├── as_labelled.Rd ├── as_numeric.Rd ├── convert_case.Rd ├── copy_labels.Rd ├── efc.Rd ├── figures │ └── logo.png ├── get_label.Rd ├── get_labels.Rd ├── get_na.Rd ├── get_values.Rd ├── is_labelled.Rd ├── label_to_colnames.Rd ├── read_spss.Rd ├── remove_all_labels.Rd ├── remove_label.Rd ├── set_label.Rd ├── set_labels.Rd ├── set_na.Rd ├── sjlabelled-package.Rd ├── term_labels.Rd ├── tidy_labels.Rd ├── unlabel.Rd ├── write_spss.Rd ├── zap_labels.Rd └── zap_na_tags.Rd ├── sjlabelled.Rproj ├── sjlabelled.code-workspace ├── tests ├── testthat.R └── testthat │ ├── test-as_numeric.R │ └── test-remove_labels.R └── vignettes ├── intro_sjlabelled.Rmd ├── labelleddata.Rmd └── quasiquotation.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.travis\.yml$ 4 | ^revdep$ 5 | ^pkgdown$ 6 | ^docs$ 7 | ^_pkgdown\.yml$ 8 | 9 | ^LICENSE 10 | ^README.rmd 11 | ^Meta$ 12 | ^doc$ 13 | ^docs$ 14 | ^paper$ 15 | ^.github$ 16 | ^.git$ 17 | 18 | \.code-workspace$ 19 | \.lintr$ 20 | ^CRAN-SUBMISSION$ 21 | ^cran-comments.md$ -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check-hard.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow only directly installs "hard" dependencies, i.e. Depends, 5 | # Imports, and LinkingTo dependencies. Notably, Suggests dependencies are never 6 | # installed, with the exception of testthat, knitr, and rmarkdown. The cache is 7 | # never used to avoid accidentally restoring a cache containing a suggested 8 | # dependency. 9 | on: 10 | push: 11 | branches: 12 | - master 13 | pull_request: 14 | branches: 15 | - master 16 | 17 | name: R-CMD-check-hard 18 | 19 | jobs: 20 | R-CMD-check: 21 | runs-on: ${{ matrix.config.os }} 22 | 23 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 24 | 25 | strategy: 26 | fail-fast: false 27 | matrix: 28 | config: 29 | - { os: ubuntu-latest, r: "release" } 30 | 31 | env: 32 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 33 | R_KEEP_PKG_SOURCE: yes 34 | 35 | steps: 36 | - uses: actions/checkout@v3 37 | 38 | # Always try to use the latest pandoc version 39 | # https://github.com/jgm/pandoc/releases 40 | - uses: r-lib/actions/setup-pandoc@v2 41 | with: 42 | pandoc-version: "2.19.2" 43 | 44 | - uses: r-lib/actions/setup-r@v2 45 | with: 46 | r-version: ${{ matrix.config.r }} 47 | http-user-agent: ${{ matrix.config.http-user-agent }} 48 | use-public-rspm: true 49 | 50 | - uses: r-lib/actions/setup-r-dependencies@v2 51 | with: 52 | pak-version: devel 53 | dependencies: '"hard"' 54 | cache: false 55 | extra-packages: | 56 | any::rcmdcheck 57 | any::testthat 58 | any::knitr 59 | any::rmarkdown 60 | needs: check 61 | 62 | - uses: r-lib/actions/check-r-package@v2 63 | with: 64 | upload-snapshots: true -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | 4 | on: 5 | push: 6 | branches: 7 | - master 8 | pull_request: 9 | branches: 10 | - master 11 | 12 | name: R-CMD-check 13 | 14 | jobs: 15 | R-CMD-check: 16 | runs-on: ${{ matrix.config.os }} 17 | 18 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) ${{ matrix.config.locale }} 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | config: 24 | - { os: macOS-latest, r: "release" } 25 | - { os: macOS-latest, r: "oldrel" } 26 | 27 | - { os: windows-latest, r: "devel" } 28 | - { os: windows-latest, r: "release" } 29 | - { os: windows-latest, r: "oldrel" } 30 | 31 | # Use 3.6 to trigger usage of RTools35 32 | - { os: windows-latest, r: "3.6" } 33 | # use 4.1 to check with rtools40's older compiler 34 | - { os: windows-latest, r: "4.1" } 35 | 36 | - { os: ubuntu-latest, r: "devel" } 37 | - { os: ubuntu-latest, r: "release" } 38 | - { os: ubuntu-latest, r: "oldrel" } 39 | 40 | env: 41 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 42 | R_KEEP_PKG_SOURCE: yes 43 | _R_CHECK_CRAN_INCOMING_: false 44 | _R_CHECK_FORCE_SUGGESTS_: false 45 | 46 | steps: 47 | - name: Set locale 48 | if: matrix.config.locale == 'en_US' 49 | run: | 50 | sudo locale-gen en_US 51 | echo "LC_ALL=en_US" >> $GITHUB_ENV 52 | 53 | - uses: actions/checkout@v3 54 | 55 | # Always try to use the latest pandoc version 56 | # https://github.com/jgm/pandoc/releases 57 | - uses: r-lib/actions/setup-pandoc@v2 58 | with: 59 | pandoc-version: "2.19.2" 60 | 61 | - uses: r-lib/actions/setup-r@v2 62 | with: 63 | r-version: ${{ matrix.config.r }} 64 | http-user-agent: ${{ matrix.config.http-user-agent }} 65 | use-public-rspm: true 66 | 67 | # TODO: Check which of the ignore conditions are still relevant given the 68 | # current suggested dependencies and the minimum supported R version. 69 | # Update if anything out of date or not needed anymore. 70 | - uses: r-lib/actions/setup-r-dependencies@v2 71 | with: 72 | pak-version: devel 73 | extra-packages: | 74 | any::rcmdcheck 75 | needs: check 76 | 77 | # Don't error on "note" because if any of the suggested packages are not available 78 | # for a given R version, this generates a NOTE causing unnecessary build failure 79 | - uses: r-lib/actions/check-r-package@v2 80 | with: 81 | error-on: '"warning"' 82 | upload-snapshots: true -------------------------------------------------------------------------------- /.github/workflows/html-5-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | name: HTML5 check 12 | 13 | jobs: 14 | HTML5-check: 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | R_KEEP_PKG_SOURCE: yes 19 | steps: 20 | - uses: actions/checkout@v3 21 | 22 | - uses: r-lib/actions/setup-r@v2 23 | with: 24 | r-version: "devel" 25 | http-user-agent: "release" 26 | use-public-rspm: true 27 | 28 | - uses: r-lib/actions/setup-r-dependencies@v2 29 | with: 30 | pak-version: devel 31 | extra-packages: any::rcmdcheck, any::V8 32 | dependencies: "character()" 33 | 34 | - name: Install pdflatex 35 | run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra 36 | 37 | - name: Install tidy 38 | run: sudo apt install tidy 39 | 40 | - uses: r-lib/actions/check-r-package@v2 41 | with: 42 | args: 'c("--as-cran", "--no-codoc", "--no-examples", "--no-tests", "--no-vignettes", "--no-build-vignettes", "--ignore-vignettes", "--no-install")' 43 | build_args: 'c("--no-build-vignettes")' 44 | error-on: '"note"' 45 | env: 46 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 47 | _R_CHECK_CRAN_INCOMING_: false 48 | _R_CHECK_RD_XREFS_: false -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | name: lint 12 | 13 | jobs: 14 | lint: 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - uses: r-lib/actions/setup-r@v2 22 | with: 23 | use-public-rspm: true 24 | 25 | - uses: r-lib/actions/setup-r-dependencies@v2 26 | with: 27 | pak-version: devel 28 | extra-packages: | 29 | r-lib/lintr 30 | EGAnet=?ignore-before-r=5.0.0 31 | local::. 32 | needs: lint 33 | 34 | # TODO: Revisit to remove some of these allowances after more important lints 35 | # have been removed. 36 | - name: Lint 37 | run: | 38 | library(lintr) 39 | lint_package(linters = linters_with_defaults( 40 | absolute_path_linter = NULL, 41 | commented_code_linter = NULL, 42 | cyclocomp_linter = cyclocomp_linter(50), 43 | implicit_integer_linter = NULL, 44 | line_length_linter(120), 45 | nonportable_path_linter = NULL, 46 | object_name_linter = NULL, 47 | object_length_linter(50), 48 | object_usage_linter = NULL, 49 | todo_comment_linter = NULL, 50 | extraction_operator_linter = NULL, 51 | unneeded_concatenation_linter(allow_single_expression = FALSE), 52 | defaults = linters_with_tags(tags = NULL) 53 | )) 54 | shell: Rscript {0} -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | 25 | - uses: r-lib/actions/setup-pandoc@v2 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::pkgdown, local::. 34 | needs: website 35 | 36 | - name: Build site 37 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 38 | shell: Rscript {0} 39 | 40 | - name: Deploy to GitHub pages 🚀 41 | if: github.event_name != 'pull_request' 42 | uses: JamesIves/github-pages-deploy-action@4.1.4 43 | with: 44 | clean: false 45 | branch: gh-pages 46 | folder: docs 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | 4 | # Example code in package build process 5 | *-Ex.R 6 | # R data files from past sessions 7 | .Rdata 8 | # ========================= 9 | # Operating System Files 10 | # OSX 11 | .DS_Store 12 | .AppleDouble 13 | .LSOverride 14 | # Icon must end with two \r 15 | Icon 16 | # Thumbnails 17 | ._* 18 | # Files that might appear on external disk 19 | .Spotlight-V100 20 | .Trashes 21 | # Directories potentially created on remote AFP share 22 | .AppleDB 23 | .AppleDesktop 24 | Network Trash Folder 25 | Temporary Items 26 | .apdisk 27 | # Windows 28 | # Windows image file caches 29 | Thumbs.db 30 | ehthumbs.db 31 | # Folder config file 32 | Desktop.ini 33 | # Recycle Bin used on file shares 34 | $RECYCLE.BIN/ 35 | # Windows Installer files 36 | *.cab 37 | *.msi 38 | *.msm 39 | *.msp 40 | .Rproj.user 41 | inst/doc 42 | revdep 43 | vignettes/*.html 44 | -------------------------------------------------------------------------------- /.lintr: -------------------------------------------------------------------------------- 1 | linters: with_defaults(object_name_linter = NULL, 2 | object_length_linter(40), 3 | commented_code_linter = NULL, 4 | object_usage_linter = NULL, 5 | line_length_linter(120), 6 | cyclocomp_linter = cyclocomp_linter(20)) 7 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: sjlabelled 2 | Type: Package 3 | Encoding: UTF-8 4 | Title: Labelled Data Utility Functions 5 | Version: 1.2.0.3 6 | Authors@R: c( 7 | person("Daniel", "Lüdecke", role = c("aut", "cre"), email = "d.luedecke@uke.de", comment = c(ORCID = "0000-0002-8895-3206")), 8 | person("avid", "Ranzolin", role = "ctb", email = "daranzolin@gmail.com"), 9 | person("Jonathan", "De Troye", role = "ctb", email = "detroyejr@outlook.com") 10 | ) 11 | Maintainer: Daniel Lüdecke 12 | Description: Collection of functions dealing with labelled data, like reading and 13 | writing data between R and other statistical software packages like 'SPSS', 14 | 'SAS' or 'Stata', and working with labelled data. This includes easy ways 15 | to get, set or change value and variable label attributes, to convert 16 | labelled vectors into factors or numeric (and vice versa), or to deal with 17 | multiple declared missing values. 18 | License: GPL-3 19 | Depends: 20 | R (>= 3.4) 21 | Imports: 22 | insight, 23 | datawizard, 24 | stats, 25 | tools, 26 | utils 27 | Suggests: 28 | dplyr, 29 | haven (>= 1.1.2), 30 | magrittr, 31 | sjmisc, 32 | sjPlot, 33 | knitr, 34 | rlang, 35 | rmarkdown, 36 | snakecase, 37 | testthat 38 | URL: https://strengejacke.github.io/sjlabelled/ 39 | BugReports: https://github.com/strengejacke/sjlabelled/issues 40 | RoxygenNote: 7.3.1 41 | VignetteBuilder: knitr 42 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method("set_label<-",default) 4 | S3method(as_character,data.frame) 5 | S3method(as_character,default) 6 | S3method(as_factor,data.frame) 7 | S3method(as_factor,default) 8 | S3method(as_label,data.frame) 9 | S3method(as_label,default) 10 | S3method(as_labelled,data.frame) 11 | S3method(as_labelled,default) 12 | S3method(as_labelled,list) 13 | S3method(as_numeric,data.frame) 14 | S3method(as_numeric,default) 15 | S3method(get_label,data.frame) 16 | S3method(get_label,default) 17 | S3method(get_label,list) 18 | S3method(get_labels,data.frame) 19 | S3method(get_labels,default) 20 | S3method(get_labels,list) 21 | S3method(get_na,data.frame) 22 | S3method(get_na,default) 23 | S3method(get_na,list) 24 | S3method(get_values,data.frame) 25 | S3method(get_values,default) 26 | S3method(get_values,list) 27 | S3method(remove_all_labels,data.frame) 28 | S3method(remove_all_labels,default) 29 | S3method(remove_all_labels,list) 30 | export("set_label<-") 31 | export(add_labels) 32 | export(as_character) 33 | export(as_factor) 34 | export(as_label) 35 | export(as_labelled) 36 | export(as_numeric) 37 | export(convert_case) 38 | export(copy_labels) 39 | export(drop_labels) 40 | export(fill_labels) 41 | export(get_dv_labels) 42 | export(get_label) 43 | export(get_labels) 44 | export(get_na) 45 | export(get_term_labels) 46 | export(get_values) 47 | export(is_labelled) 48 | export(label_to_colnames) 49 | export(read_data) 50 | export(read_sas) 51 | export(read_spss) 52 | export(read_stata) 53 | export(remove_all_labels) 54 | export(remove_label) 55 | export(remove_labels) 56 | export(replace_labels) 57 | export(response_labels) 58 | export(set_label) 59 | export(set_labels) 60 | export(set_na) 61 | export(term_labels) 62 | export(tidy_labels) 63 | export(to_character) 64 | export(to_factor) 65 | export(to_label) 66 | export(to_numeric) 67 | export(unlabel) 68 | export(val_labels) 69 | export(var_labels) 70 | export(write_sas) 71 | export(write_spss) 72 | export(write_stata) 73 | export(zap_labels) 74 | export(zap_na_tags) 75 | export(zap_unlabelled) 76 | importFrom(insight,find_parameters) 77 | importFrom(insight,get_data) 78 | importFrom(stats,coef) 79 | importFrom(stats,model.frame) 80 | importFrom(stats,na.omit) 81 | importFrom(stats,terms) 82 | importFrom(tools,file_ext) 83 | importFrom(utils,setTxtProgressBar) 84 | importFrom(utils,txtProgressBar) 85 | -------------------------------------------------------------------------------- /R/add_labels.R: -------------------------------------------------------------------------------- 1 | #' @title Add, replace or remove value labels of variables 2 | #' @name add_labels 3 | #' 4 | #' @description These functions add, replace or remove value labels to or from variables. 5 | #' 6 | #' @seealso \code{\link{set_label}} to manually set variable labels or 7 | #' \code{\link{get_label}} to get variable labels; \code{\link{set_labels}} to 8 | #' add value labels, replacing the existing ones (and removing non-specified 9 | #' value labels). 10 | #' 11 | #' @param x A vector or data frame. 12 | #' @param labels \describe{ 13 | #' \item{For \code{add_labels()}}{A named (numeric) vector of labels 14 | #' that will be added to \code{x} as label attribute.} 15 | #' \item{For \code{remove_labels()}}{Either a numeric vector, indicating 16 | #' the position of one or more label attributes that should be removed; 17 | #' a character vector with names of label attributes that should be 18 | #' removed; or a \code{\link[haven:tagged_na]{tagged_na()}} to remove the labels 19 | #' from specific NA values.} 20 | #' } 21 | #' 22 | #' @inheritParams as_factor 23 | #' 24 | #' @return \code{x} with additional or removed value labels. If \code{x} 25 | #' is a data frame, the complete data frame \code{x} will be returned, 26 | #' with removed or added to variables specified in \code{...}; 27 | #' if \code{...} is not specified, applies to all variables in the 28 | #' data frame. 29 | #' 30 | #' @details \code{add_labels()} adds \code{labels} to the existing value 31 | #' labels of \code{x}, however, unlike \code{\link{set_labels}}, it 32 | #' does \emph{not} remove labels that were \emph{not} specified in 33 | #' \code{labels}. \code{add_labels()} also replaces existing 34 | #' value labels, but preserves the remaining labels. 35 | #' \cr \cr 36 | #' \code{remove_labels()} is the counterpart to \code{add_labels()}. 37 | #' It removes labels from a label attribute of \code{x}. 38 | #' \cr \cr 39 | #' \code{replace_labels()} is an alias for \code{add_labels()}. 40 | #' 41 | #' @examplesIf require("dplyr") && require("haven") 42 | #' # add_labels() 43 | #' data(efc) 44 | #' get_labels(efc$e42dep) 45 | #' 46 | #' x <- add_labels(efc$e42dep, labels = c(`nothing` = 5)) 47 | #' get_labels(x) 48 | #' 49 | #' if (require("dplyr")) { 50 | #' x <- efc %>% 51 | #' # select three variables 52 | #' dplyr::select(e42dep, c172code, c161sex) %>% 53 | #' # only add new label to two of those 54 | #' add_labels(e42dep, c172code, labels = c(`nothing` = 5)) 55 | #' # see data frame, with selected variables having new labels 56 | #' get_labels(x) 57 | #' } 58 | #' 59 | #' x <- add_labels(efc$e42dep, labels = c(`nothing` = 5, `zero value` = 0)) 60 | #' get_labels(x, values = "p") 61 | #' 62 | #' # replace old value labels 63 | #' x <- add_labels( 64 | #' efc$e42dep, 65 | #' labels = c(`not so dependent` = 4, `lorem ipsum` = 5) 66 | #' ) 67 | #' get_labels(x, values = "p") 68 | #' 69 | #' # replace specific missing value (tagged NA) 70 | #' if (require("haven")) { 71 | #' x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), 72 | #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 73 | #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) 74 | #' # get current NA values 75 | #' x 76 | #' # tagged NA(c) has currently the value label "First", will be 77 | #' # replaced by "Second" now. 78 | #' replace_labels(x, labels = c("Second" = tagged_na("c"))) 79 | #' } 80 | #' 81 | #' 82 | #' # remove_labels() 83 | #' 84 | #' x <- remove_labels(efc$e42dep, labels = 2) 85 | #' get_labels(x, values = "p") 86 | #' 87 | #' x <- remove_labels(efc$e42dep, labels = "independent") 88 | #' get_labels(x, values = "p") 89 | #' 90 | #' if (require("haven")) { 91 | #' x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), 92 | #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 93 | #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) 94 | #' # get current NA values 95 | #' get_na(x) 96 | #' get_na(remove_labels(x, labels = tagged_na("c"))) 97 | #' } 98 | #' @export 99 | add_labels <- function(x, ..., labels) { 100 | 101 | # check for valid value. value must be a named vector 102 | if (is.null(labels)) stop("`labels` is NULL.", call. = FALSE) 103 | if (is.null(names(labels))) stop("`labels` must be a named vector.", call. = FALSE) 104 | 105 | # evaluate arguments, generate data 106 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 107 | .dat <- .get_dot_data(x, dots) 108 | 109 | if (is.data.frame(x)) { 110 | # iterate variables of data frame 111 | for (i in colnames(.dat)) { 112 | x[[i]] <- add_labels_helper(.dat[[i]], value = labels) 113 | } 114 | } else { 115 | x <- add_labels_helper(.dat, value = labels) 116 | } 117 | 118 | x 119 | } 120 | 121 | add_labels_helper <- function(x, value) { 122 | # get current labels of `x` 123 | current.labels <- get_labels( 124 | x, 125 | attr.only = TRUE, 126 | values = "n", 127 | non.labelled = FALSE, 128 | drop.na = TRUE 129 | ) 130 | 131 | current.na <- NULL 132 | 133 | # if we had already labels, append new ones 134 | if (!is.null(current.labels)) { 135 | # remove multiple value labels 136 | doubles <- names(current.labels) %in% as.character(value) 137 | 138 | # switch value and names attribute, since get_labels() 139 | # returns the values as names, and the value labels 140 | # as "vector content" 141 | val.switch <- as.numeric(names(current.labels)) 142 | names(val.switch) <- as.character(current.labels) 143 | 144 | # update all labels 145 | all.labels <- c(val.switch[!doubles], value) 146 | 147 | # tell user 148 | if (any(doubles)) { 149 | message(sprintf( 150 | "label '%s' was replaced with new value label.\n", 151 | current.labels[doubles] 152 | )) 153 | } 154 | } else { 155 | all.labels <- value 156 | } 157 | 158 | if (requireNamespace("haven", quietly = TRUE)) { 159 | # get current NA values - requires haven 160 | current.na <- get_na(x) 161 | # replace tagged NA 162 | if (any(haven::is_tagged_na(value))) { 163 | # get tagged NAs 164 | value_tag <- haven::na_tag(value)[haven::is_tagged_na(value)] 165 | cna_tag <- haven::na_tag(current.na) 166 | 167 | # find matches (replaced NA), i.e. see if 'x' has any 168 | # tagged NA values that match the tagged NA specified in 'value' 169 | doubles <- na.omit(match(value_tag, cna_tag)) 170 | 171 | # tell user if we found any tagged NA, and that these will be replaced 172 | if (any(doubles)) { 173 | message(sprintf( 174 | "tagged NA '%s' was replaced with new value label.\n", 175 | names(current.na)[doubles] 176 | )) 177 | } 178 | 179 | # remove multiple tagged NA 180 | current.na <- current.na[-doubles] 181 | } 182 | } 183 | 184 | # sort labels by values 185 | all.labels <- all.labels[order(as.numeric(all.labels))] 186 | 187 | # add NA 188 | if (!is.null(current.na)) all.labels <- c(all.labels, current.na) 189 | 190 | # set back value labels 191 | attr(x, "labels") <- all.labels 192 | 193 | x 194 | } 195 | 196 | 197 | #' @rdname add_labels 198 | #' @export 199 | replace_labels <- function(x, ..., labels) { 200 | add_labels(x = x, ..., labels = labels) 201 | } 202 | -------------------------------------------------------------------------------- /R/as_character.R: -------------------------------------------------------------------------------- 1 | #' @rdname as_label 2 | #' @export 3 | as_character <- function(x, ...) { 4 | UseMethod("as_character") 5 | } 6 | 7 | 8 | #' @rdname as_label 9 | #' @export 10 | to_character <- as_character 11 | 12 | 13 | #' @export 14 | as_character.default <- function(x, add.non.labelled = FALSE, prefix = FALSE, var.label = NULL, drop.na = TRUE, drop.levels = FALSE, ...) { 15 | as_character_helper(x, add.non.labelled, prefix, var.label, drop.na, drop.levels) 16 | } 17 | 18 | 19 | #' @rdname as_label 20 | #' @export 21 | as_character.data.frame <- function(x, ..., add.non.labelled = FALSE, prefix = FALSE, var.label = NULL, drop.na = TRUE, drop.levels = FALSE, keep.labels = FALSE) { 22 | dots <- sapply(eval(substitute(alist(...))), deparse) 23 | .dat <- .get_dot_data(x, dots) 24 | 25 | # iterate variables of data frame 26 | for (i in colnames(.dat)) { 27 | x[[i]] <- as_character_helper(.dat[[i]], add.non.labelled, prefix, var.label, drop.na, drop.levels) 28 | } 29 | 30 | x 31 | } 32 | 33 | as_character_helper <- function(x, add.non.labelled = FALSE, prefix = FALSE, var.label = NULL, drop.na = TRUE, drop.levels = FALSE) { 34 | # get variable labels 35 | vl <- get_label(x) 36 | 37 | # to character 38 | x <- as.character(as_label_helper(x, add.non.labelled, prefix, var.label, drop.na, drop.levels, keep.labels = FALSE)) 39 | 40 | # set back variable labels, if any 41 | if (!is.null(vl)) x <- set_label(x, vl) 42 | 43 | x 44 | } 45 | -------------------------------------------------------------------------------- /R/as_factor.R: -------------------------------------------------------------------------------- 1 | #' @title Convert variable into factor and keep value labels 2 | #' @name as_factor 3 | #' 4 | #' @description This function converts a variable into a factor, but preserves 5 | #' variable and value label attributes. 6 | #' 7 | #' @param x A vector or data frame. 8 | #' @param ... Optional, unquoted names of variables that should be selected for 9 | #' further processing. Required, if \code{x} is a data frame (and no 10 | #' vector) and only selected variables from \code{x} should be processed. 11 | #' You may also use functions like \code{:} or tidyselect's select-helpers. 12 | #' See 'Examples'. 13 | #' @param add.non.labelled Logical, if \code{TRUE}, non-labelled values also 14 | #' get value labels. 15 | #' 16 | #' @return A factor, including variable and value labels. If \code{x} 17 | #' is a data frame, the complete data frame \code{x} will be returned, 18 | #' where variables specified in \code{...} are coerced 19 | #' to factors (including variable and value labels); 20 | #' if \code{...} is not specified, applies to all variables in the 21 | #' data frame. 22 | #' 23 | #' @note This function is intended for use with vectors that have value and variable 24 | #' label attributes. Unlike \code{\link{as.factor}}, \code{as_factor} converts 25 | #' a variable into a factor and preserves the value and variable label attributes. 26 | #' \cr \cr 27 | #' Adding label attributes is automatically done by importing data sets 28 | #' with one of the \code{read_*}-functions, like \code{\link{read_spss}}. 29 | #' Else, value and variable labels can be manually added to vectors 30 | #' with \code{\link{set_labels}} and \code{\link{set_label}}. 31 | #' 32 | #' @details \code{as_factor} converts numeric values into a factor with numeric 33 | #' levels. \code{\link{as_label}}, however, converts a vector into 34 | #' a factor and uses value labels as factor levels. 35 | #' 36 | #' @examples 37 | #' if (require("sjmisc") && require("magrittr")) { 38 | #' data(efc) 39 | #' # normal factor conversion, loses value attributes 40 | #' x <- as.factor(efc$e42dep) 41 | #' frq(x) 42 | #' 43 | #' # factor conversion, which keeps value attributes 44 | #' x <- as_factor(efc$e42dep) 45 | #' frq(x) 46 | #' 47 | #' # create partially labelled vector 48 | #' x <- set_labels( 49 | #' efc$e42dep, 50 | #' labels = c( 51 | #' `1` = "independent", 52 | #' `4` = "severe dependency", 53 | #' `9` = "missing value" 54 | #' )) 55 | #' 56 | #' # only copy existing value labels 57 | #' as_factor(x) %>% head() 58 | #' get_labels(as_factor(x), values = "p") 59 | #' 60 | #' # also add labels to non-labelled values 61 | #' as_factor(x, add.non.labelled = TRUE) %>% head() 62 | #' get_labels(as_factor(x, add.non.labelled = TRUE), values = "p") 63 | #' 64 | #' 65 | #' # easily coerce specific variables in a data frame to factor 66 | #' # and keep other variables, with their class preserved 67 | #' as_factor(efc, e42dep, e16sex, c172code) %>% head() 68 | #' 69 | #' # use select-helpers from dplyr-package 70 | #' if (require("dplyr")) { 71 | #' as_factor(efc, contains("cop"), c161sex:c175empl) %>% head() 72 | #' } 73 | #' } 74 | #' @export 75 | as_factor <- function(x, ...) { 76 | UseMethod("as_factor") 77 | } 78 | 79 | 80 | #' @rdname as_factor 81 | #' @export 82 | to_factor <- as_factor 83 | 84 | 85 | #' @export 86 | as_factor.default <- function(x, add.non.labelled = FALSE, ...) { 87 | to_fac_helper(x, add.non.labelled) 88 | } 89 | 90 | 91 | #' @rdname as_factor 92 | #' @export 93 | as_factor.data.frame <- function(x, ..., add.non.labelled = FALSE) { 94 | dots <- sapply(eval(substitute(alist(...))), deparse) 95 | .dat <- .get_dot_data(x, dots) 96 | 97 | for (i in colnames(.dat)) { 98 | x[[i]] <- to_fac_helper(.dat[[i]], add.non.labelled) 99 | } 100 | 101 | x 102 | } 103 | 104 | 105 | to_fac_helper <- function(x, add.non.labelled) { 106 | # is already factor? 107 | if (is.factor(x)) return(x) 108 | 109 | # retrieve value labels 110 | lab <- get_labels( 111 | x, 112 | attr.only = TRUE, 113 | values = "n", 114 | non.labelled = add.non.labelled 115 | ) 116 | 117 | # retrieve variable labels 118 | varlab <- attr(x, "label", exact = TRUE) 119 | na_values <- attr(x, "na_values", exact = TRUE) 120 | if (is.null(na_values)) { 121 | na_values <- attr(x, "na.values", exact = TRUE) 122 | } 123 | 124 | # switch value and names attribute, since get_labels 125 | # returns the values as names, and the value labels 126 | # as "vector content" 127 | if (!is.null(lab)) { 128 | if (is.character(x) || (is.factor(x) && !is.num.fac(x))) 129 | lab.switch <- names(lab) 130 | else 131 | lab.switch <- as.numeric(names(lab)) 132 | 133 | names(lab.switch) <- as.character(lab) 134 | } else { 135 | lab.switch <- NULL 136 | } 137 | 138 | # convert variable to factor 139 | x <- factor(x, exclude = c(NA_character_, "NaN")) 140 | 141 | # set back value labels 142 | x <- suppressMessages( 143 | set_labels( 144 | x, 145 | labels = lab.switch, 146 | force.labels = TRUE, 147 | force.values = FALSE 148 | ) 149 | ) 150 | 151 | # set back variable labels 152 | attr(x, "label") <- varlab 153 | attr(x, "na_values") <- na_values 154 | attr(x, "na.values") <- na_values 155 | 156 | x 157 | } 158 | -------------------------------------------------------------------------------- /R/as_labelled.R: -------------------------------------------------------------------------------- 1 | #' @title Convert vector to labelled class 2 | #' @name as_labelled 3 | #' 4 | #' @description Converts a (labelled) vector of any class into a \code{labelled} 5 | #' class vector, resp. adds a \code{labelled} class-attribute. 6 | #' 7 | #' @param x Variable (vector), \code{data.frame} or \code{list} of variables 8 | #' that should be converted to \code{\link[haven:labelled]{labelled()}}-class 9 | #' objects. 10 | #' @param add.labels Logical, if \code{TRUE}, non-labelled values will be 11 | #' labelled with the corresponding value. 12 | #' @param add.class Logical, if \code{TRUE}, \code{x} preserves its former 13 | #' \code{class}-attribute and \code{labelled} is added as additional 14 | #' attribute. If \code{FALSE} (default), all former \code{class}-attributes 15 | #' will be removed and the class-attribute of \code{x} will only 16 | #' be \code{labelled}. 17 | #' @param skip.strings Logical, if \code{TRUE}, character vector are not converted 18 | #' into labelled-vectors. Else, character vectors are converted to factors 19 | #' vector and the associated values are used as value labels. 20 | #' @param tag.na Logical, if \code{TRUE}, tagged \code{NA} values are replaced 21 | #' by their associated values. This is required, for instance, when writing 22 | #' data back to SPSS. 23 | #' 24 | #' @return \code{x}, as \code{labelled}-class object. 25 | #' 26 | #' @examples 27 | #' data(efc) 28 | #' str(efc$e42dep) 29 | #' 30 | #' x <- as_labelled(efc$e42dep) 31 | #' str(x) 32 | #' 33 | #' x <- as_labelled(efc$e42dep, add.class = TRUE) 34 | #' str(x) 35 | #' 36 | #' a <- c(1, 2, 4) 37 | #' x <- as_labelled(a, add.class = TRUE) 38 | #' str(x) 39 | #' 40 | #' data(efc) 41 | #' x <- set_labels(efc$e42dep, 42 | #' labels = c(`1` = "independent", `4` = "severe dependency")) 43 | #' x1 <- as_labelled(x, add.labels = FALSE) 44 | #' x2 <- as_labelled(x, add.labels = TRUE) 45 | #' 46 | #' str(x1) 47 | #' str(x2) 48 | #' 49 | #' get_values(x1) 50 | #' get_values(x2) 51 | #' @importFrom stats na.omit 52 | #' @export 53 | as_labelled <- function(x, add.labels = FALSE, add.class = FALSE, skip.strings = FALSE, tag.na = FALSE) { 54 | UseMethod("as_labelled") 55 | } 56 | 57 | #' @export 58 | as_labelled.data.frame <- function(x, add.labels = FALSE, add.class = FALSE, skip.strings = FALSE, tag.na = FALSE) { 59 | data_frame(lapply(x, FUN = as_labelled_helper, add.labels, add.class, skip.strings, tag.na)) 60 | } 61 | 62 | #' @export 63 | as_labelled.list <- function(x, add.labels = FALSE, add.class = FALSE, skip.strings = FALSE, tag.na = FALSE) { 64 | lapply(x, FUN = as_labelled_helper, add.labels, add.class, skip.strings, tag.na) 65 | } 66 | 67 | #' @export 68 | as_labelled.default <- function(x, add.labels = FALSE, add.class = FALSE, skip.strings = FALSE, tag.na = FALSE) { 69 | as_labelled_helper(x, add.labels, add.class, skip.strings, tag.na) 70 | } 71 | 72 | as_labelled_helper <- function(x, add.labels, add.class, skip.strings, tag.na) { 73 | # do nothing for labelled class 74 | if (is_labelled(x)) return(x) 75 | 76 | if (is.character(x) && skip.strings) return(x) 77 | 78 | # if factor, convert to numeric 79 | if (is.factor(x)) x <- as_numeric(x, keep.labels = TRUE) 80 | 81 | # return atomics 82 | if (is.null(get_labels(x, attr.only = TRUE))) return(x) 83 | 84 | # fill up missing attributes 85 | if (add.labels) x <- fill_labels(x) 86 | 87 | # reset missings 88 | if (!tag.na) { 89 | xna <- get_na(x) 90 | if (!isempty(xna)) { 91 | x <- set_na(x, na = xna) 92 | } 93 | } else { 94 | if (!requireNamespace("haven", quietly = TRUE)) { 95 | stop("Package 'haven' required for this function. Please install it.") 96 | } 97 | xna <- get_na(x, as.tag = TRUE) 98 | if (!isempty(xna)) { 99 | labels <- attr(x, "labels", exact = TRUE) 100 | new_tags <- unname(gsub("NA\\((.*)\\)", "\\1", xna)) 101 | names(new_tags) <- new_tags 102 | # convert to numeric, if character 103 | numeric_na <- which(is.na(suppressWarnings(as.numeric(new_tags)))) 104 | if (any(numeric_na)) { 105 | names(new_tags)[numeric_na] <- match(new_tags[numeric_na], letters) * -1 106 | } 107 | tagged_missing <- haven::na_tag(x) 108 | for (i in 1:length(xna)) { 109 | x[which(tagged_missing == new_tags[i])] <- as.numeric(names(new_tags[i])) 110 | } 111 | labels[is.na(labels)] <- stats::setNames(attr(x, "na.values"), names(labels[is.na(labels)])) 112 | attr(x, "labels") <- labels 113 | } 114 | } 115 | 116 | # is type of labels same as type of vector? typically, character 117 | # vectors can have numeric labels or vice versa, numeric vectors 118 | # have "numeric" labels as character strings. in this case, 119 | # harmonize types of vector and labels, so haven doesn't complain 120 | 121 | lt <- as.vector(attr(x, "labels", exact = TRUE)) 122 | if (!is.null(lt) && typeof(lt) != typeof(x)) { 123 | lab.at <- attr(x, "labels", exact = TRUE) 124 | nlab <- names(lab.at) 125 | if (is.integer(x) && !is.integer(lt)) { 126 | lab.at <- as.integer(lab.at) 127 | } else if (is.num.chr(lt, na.rm = TRUE)) { 128 | lab.at <- as.numeric(lab.at) 129 | } else { 130 | lab.at <- as.character(lab.at) 131 | } 132 | names(lab.at) <- nlab 133 | attr(x, "labels") <- lab.at 134 | } 135 | 136 | # get former class attributes 137 | xc <- class(x) 138 | if (add.class) 139 | class(x) <- c(xc, "haven_labelled", "vctrs_vctr") 140 | else 141 | class(x) <- c("haven_labelled", "vctrs_vctr") 142 | 143 | # add haven labelled SPSS class 144 | if (tag.na) { 145 | class(x) <- c("haven_labelled_spss", class(x)) 146 | } 147 | 148 | x 149 | } 150 | -------------------------------------------------------------------------------- /R/as_numeric.R: -------------------------------------------------------------------------------- 1 | #' @title Convert factors to numeric variables 2 | #' @name as_numeric 3 | #' 4 | #' @description This function converts (replaces) factor levels with the 5 | #' related factor level index number, thus the factor is converted to 6 | #' a numeric variable. 7 | #' 8 | #' @param start.at Starting index, i.e. the lowest numeric value of the variable's 9 | #' value range. By default, this argument is \code{NULL}, hence the lowest 10 | #' value of the returned numeric variable corresponds to the lowest factor 11 | #' level (if factor levels are numeric) or to \code{1} (if factor levels 12 | #' are not numeric). 13 | #' @param keep.labels Logical, if \code{TRUE}, former factor levels will be added as 14 | #' value labels. For numeric factor levels, values labels will be used, 15 | #' if present. See 'Examples' and \code{\link{set_labels}} for more details. 16 | #' @param use.labels Logical, if \code{TRUE} and \code{x} has numeric value labels, 17 | #' the values defined in the labels (right-hand side of \code{labels}, for instance 18 | #' \code{labels = c(null = 0, one = 1)}) will be set as numeric values (instead 19 | #' of consecutive factor level numbers). See 'Examples'. 20 | #' 21 | #' @return A numeric variable with values ranging either from \code{start.at} to 22 | #' \code{start.at} + length of factor levels, or to the corresponding 23 | #' factor levels (if these were numeric). If \code{x} is a data frame, 24 | #' the complete data frame \code{x} will be returned, where variables 25 | #' specified in \code{...} are coerced to numeric; if \code{...} is 26 | #' not specified, applies to all variables in the data frame. 27 | #' 28 | #' @inheritParams add_labels 29 | #' 30 | #' @examples 31 | #' data(efc) 32 | #' test <- as_label(efc$e42dep) 33 | #' table(test) 34 | #' 35 | #' table(as_numeric(test)) 36 | #' hist(as_numeric(test, start.at = 0)) 37 | #' 38 | #' # set lowest value of new variable to "5". 39 | #' table(as_numeric(test, start.at = 5)) 40 | #' 41 | #' # numeric factor keeps values 42 | #' dummy <- factor(c("3", "4", "6")) 43 | #' table(as_numeric(dummy)) 44 | #' 45 | #' # do not drop unused factor levels 46 | #' dummy <- ordered(c(rep("No", 5), rep("Maybe", 3)), 47 | #' levels = c("Yes", "No", "Maybe")) 48 | #' as_numeric(dummy) 49 | #' 50 | #' # non-numeric factor is converted to numeric 51 | #' # starting at 1 52 | #' dummy <- factor(c("D", "F", "H")) 53 | #' table(as_numeric(dummy)) 54 | #' 55 | #' # for numeric factor levels, value labels will be used, if present 56 | #' dummy1 <- factor(c("3", "4", "6")) 57 | #' dummy1 <- set_labels(dummy1, labels = c("first", "2nd", "3rd")) 58 | #' dummy1 59 | #' as_numeric(dummy1) 60 | #' 61 | #' # for non-numeric factor levels, these will be used. 62 | #' # value labels will be ignored 63 | #' dummy2 <- factor(c("D", "F", "H")) 64 | #' dummy2 <- set_labels(dummy2, labels = c("first", "2nd", "3rd")) 65 | #' dummy2 66 | #' as_numeric(dummy2) 67 | #' 68 | #' 69 | #' # easily coerce specific variables in a data frame to numeric 70 | #' # and keep other variables, with their class preserved 71 | #' data(efc) 72 | #' efc$e42dep <- as.factor(efc$e42dep) 73 | #' efc$e16sex <- as.factor(efc$e16sex) 74 | #' efc$e17age <- as.factor(efc$e17age) 75 | #' 76 | #' # convert back "sex" and "age" into numeric 77 | #' head(as_numeric(efc, e16sex, e17age)) 78 | #' 79 | #' x <- factor(c("None", "Little", "Some", "Lots")) 80 | #' x <- set_labels(x, 81 | #' labels = c(None = "0.5", Little = "1.3", Some = "1.8", Lots = ".2") 82 | #' ) 83 | #' x 84 | #' as_numeric(x) 85 | #' as_numeric(x, use.labels = TRUE) 86 | #' as_numeric(x, use.labels = TRUE, keep.labels = FALSE) 87 | #' @export 88 | as_numeric <- function(x, ...) { 89 | UseMethod("as_numeric") 90 | } 91 | 92 | 93 | #' @rdname as_numeric 94 | #' @export 95 | to_numeric <- as_numeric 96 | 97 | 98 | #' @export 99 | as_numeric.default <- function(x, start.at = NULL, keep.labels = TRUE, use.labels = FALSE, ...) { 100 | as_numeric_helper(x, start.at, keep.labels, use.labels) 101 | } 102 | 103 | 104 | #' @rdname as_numeric 105 | #' @export 106 | as_numeric.data.frame <- function(x, ..., start.at = NULL, keep.labels = TRUE, use.labels = FALSE) { 107 | dots <- sapply(eval(substitute(alist(...))), deparse) 108 | .dat <- .get_dot_data(x, dots) 109 | 110 | # iterate variables of data frame 111 | for (i in colnames(.dat)) { 112 | x[[i]] <- as_numeric_helper(.dat[[i]], start.at, keep.labels, use.labels) 113 | } 114 | 115 | x 116 | } 117 | 118 | 119 | as_numeric_helper <- function(x, start.at, keep.labels, use.labels) { 120 | labels <- NULL 121 | 122 | # is already numeric? 123 | if (is.numeric(x)) return(x) 124 | 125 | # save variable label 126 | varlab <- get_label(x) 127 | 128 | # get labels 129 | labels <- get_labels(x, attr.only = TRUE, values = "n") 130 | 131 | # is character? 132 | if (is.character(x)) { 133 | 134 | # has labels? 135 | if (!is.null(labels)) { 136 | 137 | # sort labels correctly, therefor get "levels" 138 | lvls <- levels(as.factor(x)) 139 | 140 | # do we have more labels than values? If yes, drop unused labels 141 | if (length(labels) > length(lvls)) labels <- labels[names(labels) %in% lvls] 142 | 143 | # it might be that we have more levels than labels, in this case 144 | # drop unused levels - else, ordering won't work 145 | if (length(lvls) > length(labels)) lvls <- lvls[lvls %in% names(labels)] 146 | 147 | # sort labels correctly 148 | labels <- unname(labels[order(names(labels), lvls)]) 149 | } 150 | 151 | # convert to factor 152 | x <- as.factor(x) 153 | } 154 | 155 | # check if we have numeric factor levels 156 | if (is.num.fac(x)) { 157 | 158 | # retrieve "value labels" 159 | if (is.null(labels)) labels <- levels(x) 160 | 161 | # convert to numeric via as.vector 162 | new_value <- as.numeric(as.vector((x))) 163 | 164 | # new minimum value? 165 | if (!is.null(start.at) && is.numeric(start.at)) { 166 | 167 | # check if lowest value of variable differs from 168 | # requested minimum conversion value 169 | val_diff <- start.at - min(new_value, na.rm = TRUE) 170 | 171 | # adjust new_value 172 | new_value <- new_value + val_diff 173 | } 174 | } else { 175 | 176 | # use non-numeric factor levels as new labels 177 | labels <- levels(x) 178 | 179 | # check which numeric values to use. If value labels were 180 | # numeric and 'use.labels = TRUE', value labels as used 181 | # as values 182 | if (use.labels) { 183 | levels(x) <- get_values(x) 184 | } else { 185 | # check start.at value 186 | if (is.null(start.at)) start.at <- 1 187 | 188 | # get amount of categories 189 | l <- nlevels(x) 190 | 191 | # determine highest category value 192 | end <- start.at + l - 1 193 | 194 | # replace labels with numeric values 195 | levels(x) <- start.at:end 196 | } 197 | 198 | # convert to numeric 199 | new_value <- as.numeric(as.character(x)) 200 | } 201 | 202 | # check if we should set back former variable and value labels 203 | if (keep.labels) { 204 | new_value <- set_labels(new_value, labels = labels, force.labels = TRUE) 205 | new_value <- set_label(new_value, label = varlab) 206 | } 207 | 208 | new_value 209 | } 210 | -------------------------------------------------------------------------------- /R/convert_case.R: -------------------------------------------------------------------------------- 1 | #' @title Generic case conversion for labels 2 | #' @name convert_case 3 | #' 4 | #' @description This function wraps \code{to_any_case()} from the \pkg{snakecase} 5 | #' package with certain defaults for the \code{sep_in} and 6 | #' \code{sep_out} arguments, used for instance to convert cases in 7 | #' \code{\link{term_labels}}. 8 | #' 9 | #' @param lab Character vector that should be case converted. 10 | #' @param case Desired target case. Labels will automatically converted into the 11 | #' specified character case. See \code{\link[snakecase:to_any_case]{to_any_case()}} for 12 | #' more details on this argument. 13 | #' @param verbose Toggle warnings and messages on or off. 14 | #' @param ... Further arguments passed down to \code{to_any_case()}, 15 | #' like \code{sep_in} or \code{sep_out}. 16 | #' 17 | #' @return \code{lab}, with converted case. 18 | #' 19 | #' @details When calling \code{to_any_case()} from \pkg{snakecase}, the 20 | #' \code{sep_in} argument is set to \code{"(?% str() 44 | #' } 45 | #' 46 | #' # copy labels from only some columns 47 | #' str(copy_labels(efc.sub, efc, e42dep)) 48 | #' str(copy_labels(efc.sub, efc, -e17age)) 49 | #' @export 50 | copy_labels <- function(df_new, df_origin = NULL, ...) { 51 | # check if old df is NULL. if so, we remove all labels 52 | # from the data frame. 53 | if (is.null(df_origin)) { 54 | # tell user 55 | message("Removing all variable and value labels from data frame.") 56 | # remove all labels 57 | df_new <- remove_all_labels(df_new) 58 | } else { 59 | # check params 60 | if (is.data.frame(df_new) && is.data.frame(df_origin)) { 61 | # get matching colnames, because we only copy attributes from variables 62 | # that also exist in the new data frame (of course) 63 | cn <- intersect(colnames(df_new), colnames(df_origin)) 64 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 65 | .dat <- .get_dot_data(df_origin, dots) 66 | cn <- intersect(cn, names(.dat)) 67 | 68 | for (i in cn) { 69 | # copy variable and value labels 70 | attr(df_new[[i]], "label") <- attr(df_origin[[i]], "label", exact = TRUE) 71 | attr(df_new[[i]], "labels") <- attr(df_origin[[i]], "labels", exact = TRUE) 72 | attr(df_new[[i]], "na_values") <- attr(df_origin[[i]], "na_values", exact = TRUE) 73 | attr(df_new[[i]], "na.values") <- attr(df_origin[[i]], "na.values", exact = TRUE) 74 | } 75 | } else { 76 | warning("Both `df_origin` and `df_new` must be of class `data.frame`.", call. = FALSE) 77 | } 78 | } 79 | 80 | df_new 81 | } 82 | -------------------------------------------------------------------------------- /R/drop_labels.R: -------------------------------------------------------------------------------- 1 | #' @rdname zap_labels 2 | #' @export 3 | drop_labels <- function(x, ..., drop.na = TRUE) { 4 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 5 | .dat <- .get_dot_data(x, dots) 6 | 7 | if (is.data.frame(x)) { 8 | # iterate variables of data frame 9 | for (i in colnames(.dat)) { 10 | x[[i]] <- drop_labels_helper(.dat[[i]], drop.na = drop.na) 11 | } 12 | } else { 13 | x <- drop_labels_helper(.dat, drop.na = drop.na) 14 | } 15 | 16 | x 17 | } 18 | 19 | drop_labels_helper <- function(x, drop.na) { 20 | # retrieve named labels 21 | tidy.labels <- attr(x, "labels", exact = TRUE) 22 | if (requireNamespace("haven", quietly = TRUE)) { 23 | tidy.labels <- tidy.labels[!haven::is_tagged_na(tidy.labels)] 24 | } 25 | 26 | # return x, if no attribute 27 | if (is.null(tidy.labels)) return(x) 28 | 29 | # all missing in variable? 30 | if (all(is.na(x))) return(x) 31 | 32 | # remove labels with no values in data 33 | tidy.labels <- tidy.labels[get_values(x, drop.na = drop.na) %in% names(table(x))] 34 | 35 | # check if tidy labels is empty - then remove everything 36 | if (isempty(tidy.labels)) tidy.labels <- "" 37 | 38 | # check if user wants to keep labels for NA values or not. 39 | if (!drop.na) { 40 | current.na <- get_na(x) 41 | if (!is.null(current.na) && length(current.na) > 0) 42 | tidy.labels <- c(tidy.labels, current.na) 43 | } 44 | 45 | # set back labels 46 | if (isempty(tidy.labels)) { 47 | attr(x, "labels") <- NULL 48 | } else { 49 | attr(x, "labels") <- tidy.labels 50 | 51 | # if labels, e.g. due to tagged NA, are no longer of same 52 | # type as labelled vector, remove labelled class attribute - 53 | # else, haven will throw errors 54 | if (inherits(x, c("labelled", "haven_labelled")) && typeof(x) != typeof(tidy.labels)) 55 | x <- unclass(x) 56 | } 57 | 58 | x 59 | } 60 | -------------------------------------------------------------------------------- /R/efc.R: -------------------------------------------------------------------------------- 1 | #' @docType data 2 | #' @title Sample dataset from the EUROFAMCARE project 3 | #' @name efc 4 | #' @keywords data 5 | #' 6 | #' @description A SPSS sample data set, imported with the \code{\link{read_spss}} function. 7 | #' 8 | #' @examples 9 | #' # Attach EFC-data 10 | #' data(efc) 11 | #' 12 | #' # Show structure 13 | #' str(efc) 14 | #' 15 | #' # show first rows 16 | #' head(efc) 17 | #' 18 | #' # show variables 19 | #' \dontrun{ 20 | #' library(sjPlot) 21 | #' view_df(efc) 22 | #' 23 | #' # show variable labels 24 | #' get_label(efc) 25 | #' 26 | #' # plot efc-data frame summary 27 | #' sjt.df(efc, altr.row.col = TRUE)} 28 | #' 29 | NULL 30 | 31 | -------------------------------------------------------------------------------- /R/fill_labels.R: -------------------------------------------------------------------------------- 1 | #' @rdname zap_labels 2 | #' @export 3 | fill_labels <- function(x, ...) { 4 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 5 | .dat <- .get_dot_data(x, dots) 6 | 7 | if (is.data.frame(x)) { 8 | # iterate variables of data frame 9 | for (i in colnames(.dat)) { 10 | x[[i]] <- fill_labels_helper(.dat[[i]]) 11 | } 12 | } else { 13 | x <- fill_labels_helper(.dat) 14 | } 15 | 16 | x 17 | } 18 | 19 | fill_labels_helper <- function(x) { 20 | # get current labels 21 | current.values <- get_labels(x, attr.only = TRUE, non.labelled = FALSE) 22 | # get all labels, including non-labelled values 23 | all.values <- get_labels(x, 24 | attr.only = TRUE, 25 | values = "n", 26 | non.labelled = TRUE) 27 | # have any values? 28 | if (!is.null(all.values)) { 29 | # set back all labels, if amount of all labels differ 30 | # from the "current" values 31 | if (length(all.values) > length(current.values)) { 32 | # first, we need to switch name attribute and values 33 | all.val.switch <- as.numeric(names(all.values)) 34 | names(all.val.switch) <- as.character(all.values) 35 | # get current NA values 36 | current.na <- get_na(x) 37 | # add NA 38 | if (!is.null(current.na)) all.val.switch <- c(all.val.switch, current.na) 39 | # then set labels 40 | x <- set_labels( 41 | x, 42 | labels = all.val.switch, 43 | force.labels = TRUE, 44 | force.values = TRUE 45 | ) 46 | } 47 | } 48 | 49 | x 50 | } 51 | -------------------------------------------------------------------------------- /R/get_label.R: -------------------------------------------------------------------------------- 1 | #' @title Retrieve variable label(s) of labelled data 2 | #' @name get_label 3 | #' 4 | #' @description This function returns the variable labels of labelled data. 5 | #' 6 | #' @seealso See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} 7 | #' for more details; \code{\link{set_label}} to manually set variable labels or \code{\link{get_labels}} 8 | #' to get value labels; \code{\link{var_labels}} to set multiple variable 9 | #' labels at once. 10 | 11 | #' @param x A data frame with variables that have label attributes (e.g. 12 | #' from an imported SPSS, SAS or STATA data set, via \code{\link{read_spss}}, 13 | #' \code{\link{read_sas}} or \code{\link{read_stata}}); a variable 14 | #' (vector) with variable label attribute; or a \code{list} of variables 15 | #' with variable label attributes. See 'Examples'. 16 | #' @param ... Optional, names of variables, where labels should be retrieved. 17 | #' Required, if either data is a data frame and no vector, or if only 18 | #' selected variables from \code{x} should be used in the function. 19 | #' Convenient argument to work with pipe-chains (see 'Examples'). 20 | #' @param def.value Optional, a character string which will be returned as label 21 | #' if \code{x} has no label attribute. By default, \code{NULL} is returned. 22 | #' 23 | #' @inheritParams term_labels 24 | #' 25 | #' @return A named character vector with all variable labels from the data frame or list; 26 | #' or a simple character vector (of length 1) with the variable label, if \code{x} is a variable. 27 | #' If \code{x} is a single vector and has no label attribute, the value 28 | #' of \code{def.value} will be returned (which is by default \code{NULL}). 29 | #' 30 | #' @note \code{\link{var_labels}} is an alternative way to set variable labels, 31 | #' which follows the philosophy of tidyvers API design (data as first argument, 32 | #' dots as value pairs indicating variables) 33 | #' 34 | #' @examples 35 | #' # import SPSS data set 36 | #' # mydat <- read_spss("my_spss_data.sav", enc="UTF-8") 37 | #' 38 | #' # retrieve variable labels 39 | #' # mydat.var <- get_label(mydat) 40 | #' 41 | #' # retrieve value labels 42 | #' # mydat.val <- get_labels(mydat) 43 | #' 44 | #' data(efc) 45 | #' 46 | #' # get variable lable 47 | #' get_label(efc$e42dep) 48 | #' 49 | #' # alternative way 50 | #' get_label(efc)["e42dep"] 51 | #' 52 | #' # 'get_label()' also works within pipe-chains 53 | #' library(magrittr) 54 | #' efc %>% get_label(e42dep, e16sex) 55 | #' 56 | #' # set default values 57 | #' get_label(mtcars, mpg, cyl, def.value = "no var labels") 58 | #' 59 | #' # simple barplot 60 | #' barplot(table(efc$e42dep)) 61 | #' # get value labels to annotate barplot 62 | #' barplot(table(efc$e42dep), 63 | #' names.arg = get_labels(efc$e42dep), 64 | #' main = get_label(efc$e42dep)) 65 | #' 66 | #' # get labels from multiple variables 67 | #' get_label(list(efc$e42dep, efc$e16sex, efc$e15relat)) 68 | #' 69 | #' # use case conversion for human-readable labels 70 | #' data(iris) 71 | #' get_label(iris, def.value = colnames(iris)) 72 | #' get_label(iris, def.value = colnames(iris), case = "parsed") 73 | #' @export 74 | get_label <- function(x, ..., def.value = NULL, case = NULL) { 75 | UseMethod("get_label") 76 | } 77 | 78 | 79 | #' @export 80 | get_label.data.frame <- function(x, ..., def.value = NULL, case = NULL) { 81 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 82 | x <- .get_dot_data(x, dots) 83 | 84 | sapply(seq_along(x), function(i) { 85 | # get label 86 | label <- attr(x[[i]], "label", exact = TRUE) 87 | 88 | # any label? 89 | if (is.null(label)) { 90 | if (!is.null(def.value)) { 91 | # def.value may also apply to data frame arguments, 92 | # so it can be greater than length one 93 | if (i <= length(def.value)) 94 | label <- def.value[i] 95 | else 96 | label <- def.value 97 | } else { 98 | label <- "" 99 | } 100 | } 101 | 102 | names(label) <- colnames(x)[i] 103 | # append to return result 104 | convert_case(label, case) 105 | }) 106 | } 107 | 108 | 109 | #' @export 110 | get_label.list <- function(x, ..., def.value = NULL, case = NULL) { 111 | convert_case(unlist(lapply(x, attr, "label", exact = TRUE)), case) 112 | } 113 | 114 | 115 | #' @export 116 | get_label.default <- function(x, ..., def.value = NULL, case = NULL) { 117 | labels <- attr(x, "label", exact = TRUE) 118 | 119 | if (is.null(labels)) 120 | convert_case(def.value, case) 121 | else 122 | convert_case(labels, case) 123 | } 124 | -------------------------------------------------------------------------------- /R/get_model_labels.R: -------------------------------------------------------------------------------- 1 | #' @title Retrieve labels of model terms from regression models 2 | #' @name term_labels 3 | #' 4 | #' @description This function retrieves variable labels from model terms. In case 5 | #' of categorical variables, where one variable has multiple dummies, 6 | #' variable name and category value is returned. 7 | #' 8 | #' @param models One or more fitted regression models. May also be glm's or 9 | #' mixed models. 10 | #' @param mark.cat Logical, if \code{TRUE}, the returned vector has an 11 | #' attribute with logical values, which indicate whether a label indicates 12 | #' the value from a factor category (attribute value is \code{TRUE}) or 13 | #' a term's variable labels (attribute value is \code{FALSE}). 14 | #' @param case Desired target case. Labels will automatically converted into the 15 | #' specified character case. See \code{\link[snakecase:to_any_case]{to_any_case()}} for 16 | #' more details on this argument. 17 | #' @param prefix Indicates whether the value labels of categorical variables 18 | #' should be prefixed, e.g. with the variable name or variable label. 19 | #' May be abbreviated. See 'Examples', 20 | #' @param mv,multi.resp Logical, if \code{TRUE} and \code{models} is a multivariate 21 | #' response model from a \code{brmsfit} object, then the labels for each 22 | #' dependent variable (multiple responses) are returned. 23 | #' @param ... Further arguments passed down to \code{to_any_case()}, 24 | #' like \code{preprocess} or \code{postprocess}. 25 | #' 26 | #' @return For \code{term_labels()}, a (named) character vector with 27 | #' variable labels of all model terms, which can be used, for instance, 28 | #' as axis labels to annotate plots. \cr \cr For \code{response_labels()}, 29 | #' a character vector with variable labels from all dependent variables 30 | #' of \code{models}. 31 | #' 32 | #' @details Typically, the variable labels from model terms are returned. However, 33 | #' for categorical terms that have estimates for each category, the 34 | #' value labels are returned as well. As the return value is a named 35 | #' vector, you can easily use it with \pkg{ggplot2}'s \code{scale_*()} 36 | #' functions to annotate plots. 37 | #' 38 | #' @examples 39 | #' # use data set with labelled data 40 | #' data(efc) 41 | #' 42 | #' fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) 43 | #' term_labels(fit) 44 | #' 45 | #' # make "education" categorical 46 | #' if (require("sjmisc")) { 47 | #' efc$c172code <- to_factor(efc$c172code) 48 | #' fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) 49 | #' term_labels(fit) 50 | #' 51 | #' # prefix value of categorical variables with variable name 52 | #' term_labels(fit, prefix = "varname") 53 | #' 54 | #' # prefix value of categorical variables with value label 55 | #' term_labels(fit, prefix = "label") 56 | #' 57 | #' # get label of dv 58 | #' response_labels(fit) 59 | #' } 60 | #' @importFrom insight find_parameters get_data 61 | #' @importFrom stats model.frame coef terms 62 | #' @export 63 | term_labels <- function(models, mark.cat = FALSE, case = NULL, prefix = c("none", "varname", "label"), ...) { 64 | 65 | prefix <- match.arg(prefix) 66 | 67 | # to be generic, make sure argument is a list 68 | if (!inherits(models, "list")) models <- list(models) 69 | 70 | # get model terms and model frame 71 | m <- try(lapply(models, function(.x) insight::find_predictors(.x, flatten = TRUE)), silent = TRUE) 72 | mf <- try(lapply(models, function(.x) insight::get_data(.x)[, -1, drop = FALSE]), silent = TRUE) 73 | 74 | # return NULL on error 75 | if (inherits(m, "try-error") || inherits(mf, "try-error")) { 76 | return(NULL) 77 | } 78 | 79 | 80 | # get all variable labels for predictors 81 | 82 | lbs1 <- unlist(lapply(1:length(m), function(x) { 83 | if (is.null(mf[[x]])) { 84 | m[[x]][-1] 85 | } else { 86 | get_label(mf[[x]], def.value = colnames(mf[[x]])) 87 | } 88 | })) 89 | 90 | 91 | # any empty name? if yes, use label as name 92 | 93 | empty <- nchar(names(lbs1)) 94 | 95 | if (any(empty == 0)) { 96 | empty <- which(empty == 0) 97 | names(lbs1)[empty] <- lbs1[empty] 98 | } 99 | 100 | 101 | # for categorical predictors, we have one term per 102 | # value (factor level), so extract these as well 103 | 104 | lbs2 <- lapply(mf, function(.x) { 105 | unlist(mapply(function(.x, .y) { 106 | if (is.factor(.x)) { 107 | l <- get_labels(.x) 108 | if (!anyNA(suppressWarnings(as.numeric(l)))) 109 | paste0(.y, l) 110 | else 111 | l 112 | } 113 | }, .x, colnames(.x), SIMPLIFY = FALSE)) 114 | }) 115 | 116 | fixed.names <- lapply(mf, function(.x) { 117 | unlist(mapply(function(.x, .y) { 118 | if (is.factor(.x)) paste0(.y, levels(.x)) 119 | }, .x, colnames(.x), SIMPLIFY = FALSE)) 120 | }) 121 | 122 | # flatten, if we have any elements. in case all predictors 123 | # were non-factors, list has only NULLs 124 | 125 | lbs2 <- if (!is.null(unlist(lbs2))) 126 | as.character(unlist(lbs2)) 127 | else 128 | NULL 129 | 130 | fixed.names <- if (!is.null(unlist(fixed.names))) 131 | as.character(unlist(fixed.names)) 132 | else 133 | NULL 134 | 135 | names(lbs2) <- unname(fixed.names) 136 | 137 | # create logical to indicate which labels come from factors 138 | fl1 <- vector(mode = "logical", length = length(lbs1)) 139 | 140 | if (!is.null(lbs2)) { 141 | fl2 <- vector(mode = "logical", length = length(lbs2)) 142 | fl2[1:length(fl2)] <- TRUE 143 | } else { 144 | fl2 <- NULL 145 | } 146 | 147 | 148 | # remove duplicated 149 | lbs <- c(lbs1, lbs2) 150 | fl <- c(fl1, fl2) 151 | 152 | keep <- !(duplicated(lbs) & duplicated(names(lbs))) 153 | 154 | lbs <- lbs[keep] 155 | fl <- fl[keep] 156 | 157 | 158 | # set default names for values 159 | if (is.null(names(lbs))) names(lbs) <- lbs 160 | 161 | # do we have partial empty names? if yes, fill them 162 | en <- which(nchar(names(lbs)) == 0) 163 | if (!isempty(en)) names(lbs)[en] <- lbs[en] 164 | 165 | 166 | # prefix labels 167 | if (prefix != "none") 168 | lbs <- prepare.labels(lbs, catval = fl, style = prefix) 169 | 170 | 171 | # the vector now contains all possible labels, as named vector. 172 | # since ggplot uses named vectors as labels for axis-scales, matching 173 | # of labels is done automatically 174 | lbs <- convert_case(lbs, case, ...) 175 | 176 | # check if attribute is requested 177 | if (mark.cat) attr(lbs, "category.value") <- fl 178 | 179 | lbs 180 | } 181 | 182 | 183 | #' @rdname term_labels 184 | #' @export 185 | get_term_labels <- term_labels 186 | 187 | 188 | prepare.labels <- function(x, catval, style = c("varname", "label")) { 189 | x_var <- names(x[!catval]) 190 | x_val <- names(x[catval]) 191 | 192 | for (i in x_var) { 193 | pos <- string_starts_with(pattern = i, x = x_val) 194 | 195 | if (!isempty(pos) && length(pos) > 0) { 196 | match.vals <- x_val[pos] 197 | if (style == "label") 198 | x[match.vals] <- sprintf("%s: %s", x[i], x[match.vals]) 199 | else 200 | x[match.vals] <- sprintf("%s: %s", i, x[match.vals]) 201 | } 202 | } 203 | 204 | x 205 | } 206 | 207 | 208 | #' @rdname term_labels 209 | #' @importFrom stats model.frame 210 | #' @export 211 | response_labels <- function(models, case = NULL, multi.resp = FALSE, mv = FALSE, ...) { 212 | 213 | if (!missing(multi.resp)) mv <- multi.resp 214 | 215 | # to be generic, make sure argument is a list 216 | if (!inherits(models, "list")) models <- list(models) 217 | 218 | 219 | intercepts.names <- tryCatch({ 220 | lapply(models, function(x) { 221 | if (inherits(x, "brmsfit")) { 222 | if (is.null(stats::formula(x)$formula) && !is.null(stats::formula(x)$responses)) 223 | if (mv) 224 | stats::formula(x)$responses 225 | else 226 | paste(stats::formula(x)$responses, collapse = ", ") 227 | else 228 | deparse(stats::formula(x)$formula[[2L]]) 229 | } else if (inherits(x, "stanmvreg")) { 230 | if (mv) 231 | sapply(stats::formula(x), function(.x) deparse(.x[[2L]], width.cutoff = 500), simplify = TRUE) 232 | else 233 | paste(sapply(stats::formula(x), function(.x) deparse(.x[[2L]], width.cutoff = 500), simplify = TRUE), collapse = ", ") 234 | } else { 235 | deparse(stats::formula(x)[[2L]]) 236 | } 237 | })}, 238 | error = function(x) { NULL }, 239 | warning = function(x) { NULL } 240 | ) 241 | 242 | 243 | mf <- tryCatch({ 244 | mapply( 245 | function(x, y) { 246 | m <- insight::get_data(x) 247 | if (mv && inherits(x, "brmsfit")) 248 | colnames(m) <- gsub(pattern = "_", replacement = "", x = colnames(m), fixed = TRUE) 249 | y <- y[obj_has_name(m, y)] 250 | if (length(y) > 0) { 251 | m[, y, drop = FALSE] 252 | } else { 253 | m[[1]] 254 | } 255 | }, 256 | models, 257 | intercepts.names, 258 | SIMPLIFY = FALSE 259 | )}, 260 | error = function(x) { NULL }, 261 | warning = function(x) { NULL } 262 | ) 263 | 264 | 265 | if (is.null(intercepts.names) || is.null(mf)) { 266 | return(rep_len("Dependent variable", length.out = length(models))) 267 | } 268 | 269 | 270 | # get all labels 271 | 272 | lbs <- mapply(function(.x, .y) get_label(.x, def.value = .y), mf, intercepts.names, SIMPLIFY = FALSE) 273 | 274 | 275 | # flatten list, and check for correct elements 276 | 277 | lbs <- as.character(unlist(lbs)) 278 | 279 | 280 | # There are some formulas that return a rather cryptic 281 | # name. In such cases, the variable name might have more 282 | # than 1 element, and here we need to set a proper default 283 | 284 | if (!mv && length(lbs) > length(models)) lbs <- "Dependent variable" 285 | 286 | convert_case(lbs, case, ...) 287 | } 288 | 289 | 290 | #' @rdname term_labels 291 | #' @export 292 | get_dv_labels <- response_labels -------------------------------------------------------------------------------- /R/get_na.R: -------------------------------------------------------------------------------- 1 | #' @title Retrieve tagged NA values of labelled variables 2 | #' @name get_na 3 | #' 4 | #' @description This function retrieves tagged NA values and their associated 5 | #' value labels from a labelled vector. 6 | #' 7 | #' @param x Variable (vector) with value label attributes, including 8 | #' tagged missing values (see \code{\link[haven:tagged_na]{tagged_na()}}); 9 | #' or a data frame or list with such variables. 10 | #' @param as.tag Logical, if \code{TRUE}, the returned values are not tagged NA's, 11 | #' but their string representative including the tag value. See 'Examples'. 12 | #' @return The tagged missing values and their associated value labels from \code{x}, 13 | #' or \code{NULL} if \code{x} has no tagged missing values. 14 | #' 15 | #' @details Other statistical software packages (like 'SPSS' or 'SAS') allow to define 16 | #' multiple missing values, e.g. \emph{not applicable}, \emph{refused answer} 17 | #' or "real" missing. These missing types may be assigned with 18 | #' different values, so it is possible to distinguish between these 19 | #' missing types. In R, multiple declared missings cannot be represented 20 | #' in a similar way with the regular missing values. However, 21 | #' \code{tagged_na()} values can do this. 22 | #' Tagged \code{NA}s work exactly like regular R missing values 23 | #' except that they store one additional byte of information: a tag, 24 | #' which is usually a letter ("a" to "z") or character number ("0" to "9"). 25 | #' This allows to indicate different missings. 26 | #' \cr \cr 27 | #' Furthermore, see 'Details' in \code{\link{get_values}}. 28 | #' 29 | #' @examples 30 | #' library(haven) 31 | #' x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), 32 | #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 33 | #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) 34 | #' # get current NA values 35 | #' x 36 | #' get_na(x) 37 | #' # which NA has which tag? 38 | #' get_na(x, as.tag = TRUE) 39 | #' 40 | #' # replace only the NA, which is tagged as NA(c) 41 | #' if (require("sjmisc")) { 42 | #' replace_na(x, value = 2, tagged.na = "c") 43 | #' get_na(replace_na(x, value = 2, tagged.na = "c")) 44 | #' 45 | #' # data frame as input 46 | #' y <- labelled(c(2:3, 3:1, tagged_na("y"), 4:1), 47 | #' c("Agreement" = 1, "Disagreement" = 4, "Why" = tagged_na("y"))) 48 | #' get_na(data.frame(x, y)) 49 | #' } 50 | #' @export 51 | get_na <- function(x, as.tag = FALSE) { 52 | UseMethod("get_na") 53 | } 54 | 55 | #' @export 56 | get_na.data.frame <- function(x, as.tag = FALSE) { 57 | lapply(x, FUN = get_na_helper, as.tag) 58 | } 59 | 60 | #' @export 61 | get_na.list <- function(x, as.tag = FALSE) { 62 | lapply(x, FUN = get_na_helper, as.tag) 63 | } 64 | 65 | #' @export 66 | get_na.default <- function(x, as.tag = FALSE) { 67 | get_na_helper(x, as.tag) 68 | } 69 | 70 | get_na_helper <- function(x, as.tag) { 71 | # get values 72 | values <- attr(x, "labels", exact = TRUE) 73 | 74 | # any labelled? 75 | if (is.null(values)) return(NULL) 76 | 77 | if (!requireNamespace("haven", quietly = TRUE)) { 78 | stop("Package 'haven' required for this function. Please install it.") 79 | } 80 | 81 | # get NA 82 | nas <- values[haven::is_tagged_na(values)] 83 | 84 | # if we have no *tagged* NA, return NULL 85 | if (length(nas) == 0) nas <- NULL 86 | 87 | # print as tag? 88 | if (as.tag && !is.null(nas)) { 89 | # save names 90 | nn <- names(nas) 91 | # make character vector with NA tags 92 | nas <- paste0("NA(", haven::na_tag(nas), ")") 93 | # set back names 94 | names(nas) <- nn 95 | } 96 | 97 | # return missing values 98 | nas 99 | } 100 | -------------------------------------------------------------------------------- /R/get_values.R: -------------------------------------------------------------------------------- 1 | #' @title Retrieve values of labelled variables 2 | #' @name get_values 3 | #' 4 | #' @description This function retrieves the values associated with value labels 5 | #' from \code{\link[haven]{labelled}} vectors. Data is also labelled 6 | #' when imported from SPSS, SAS or STATA via \code{\link{read_spss}}, 7 | #' \code{\link{read_sas}} or \code{\link{read_stata}}. 8 | #' 9 | #' @seealso \code{\link{get_labels}} for getting value labels and \code{\link{get_na}} 10 | #' to get values for missing values. 11 | #' 12 | #' @param x Variable (vector) with value label attributes; or a data frame or 13 | #' list with such variables. 14 | #' @param sort.val Logical, if \code{TRUE} (default), values of associated value labels 15 | #' are sorted. 16 | #' @param drop.na Logical, if \code{TRUE}, tagged NA values are excluded from 17 | #' the return value. See 'Examples' and \code{\link{get_na}}. 18 | #' 19 | #' @return The values associated with value labels from \code{x}, 20 | #' or \code{NULL} if \code{x} has no label attributes. 21 | #' 22 | #' @details \code{\link[haven]{labelled}} vectors are numeric by default (when imported with read-functions 23 | #' like \code{\link{read_spss}}) and have variable and value labels attributes. 24 | #' The value labels are associated with the values from the labelled vector. 25 | #' This function returns the values associated with the vector's value labels, 26 | #' which may differ from actual values in the vector (e.g. if not all 27 | #' values have a related label). 28 | #' 29 | #' @examples 30 | #' data(efc) 31 | #' str(efc$e42dep) 32 | #' get_values(efc$e42dep) 33 | #' get_labels(efc$e42dep) 34 | #' 35 | #' library(haven) 36 | #' x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), 37 | #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 38 | #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) 39 | #' # get all values 40 | #' get_values(x) 41 | #' # drop NA 42 | #' get_values(x, drop.na = TRUE) 43 | #' 44 | #' # data frame as input 45 | #' y <- labelled(c(2:3, 3:1, tagged_na("y"), 4:1), 46 | #' c("Agreement" = 1, "Disagreement" = 4, "Why" = tagged_na("y"))) 47 | #' get_values(data.frame(x, y)) 48 | #' 49 | #' @export 50 | get_values <- function(x, sort.val = TRUE, drop.na = FALSE) { 51 | UseMethod("get_values") 52 | } 53 | 54 | #' @export 55 | get_values.data.frame <- function(x, sort.val = TRUE, drop.na = FALSE) { 56 | lapply(x, FUN = get_values_helper, sort.val, drop.na) 57 | } 58 | 59 | #' @export 60 | get_values.list <- function(x, sort.val = TRUE, drop.na = FALSE) { 61 | lapply(x, FUN = get_values_helper, sort.val, drop.na) 62 | } 63 | 64 | #' @export 65 | get_values.default <- function(x, sort.val = TRUE, drop.na = FALSE) { 66 | get_values_helper(x, sort.val, drop.na) 67 | } 68 | 69 | get_values_helper <- function(x, sort.val = TRUE, drop.na = FALSE) { 70 | # get labels 71 | labels <- attr(x, "labels", exact = TRUE) 72 | 73 | # nothing found? then leave... 74 | if (is.null(labels)) return(NULL) 75 | 76 | # get values 77 | if (is.character(x) || (is.factor(x) && !is.num.fac(x))) 78 | values <- unname(labels) 79 | else 80 | values <- as.numeric(unname(labels)) 81 | 82 | if (requireNamespace("haven", quietly = TRUE)) { 83 | # do we have any tagged NAs? 84 | if (any(haven::is_tagged_na(values)) && !drop.na) { 85 | values[haven::is_tagged_na(values)] <- paste0("NA(", haven::na_tag(values[haven::is_tagged_na(values)]), ")") 86 | } 87 | } 88 | 89 | # sort values 90 | if (sort.val) values <- sort(values) 91 | 92 | # remove missing value codes? 93 | if (drop.na) values <- values[!is.na(values)] 94 | 95 | # return sorted 96 | values 97 | } 98 | -------------------------------------------------------------------------------- /R/helpfunctions.R: -------------------------------------------------------------------------------- 1 | data_frame <- function(...) { 2 | x <- data.frame(..., stringsAsFactors = FALSE) 3 | rownames(x) <- NULL 4 | x 5 | } 6 | 7 | # do we have a stan-model? 8 | is.stan <- function(x) inherits(x, c("stanreg", "stanfit", "brmsfit")) 9 | 10 | # return names of objects passed as ellipses argument 11 | dot_names <- function(dots) unname(unlist(lapply(dots, as.character))) 12 | 13 | 14 | is_float <- function(x) is.numeric(x) && !all(x %% 1 == 0, na.rm = TRUE) 15 | 16 | 17 | is.num.fac <- function(x) { 18 | # check if we have numeric levels 19 | !anyNA(suppressWarnings(as.numeric(levels(x)))) 20 | } 21 | 22 | 23 | .compact_list <- function(x) x[!sapply(x, function(i) length(i) == 0 || is.null(i) || any(i == "NULL"))] 24 | 25 | 26 | #' @importFrom stats na.omit 27 | is.num.chr <- function(x, na.rm = FALSE) { 28 | # check if we have numeric character values only 29 | if (na.rm) x <- stats::na.omit(x) 30 | !anyNA(suppressWarnings(as.numeric(x))) 31 | } 32 | 33 | 34 | isempty <- function(x, first.only = TRUE) { 35 | # do we have a valid vector? 36 | if (!is.null(x)) { 37 | # if it's a character, check if we have only one element in that vector 38 | if (is.character(x)) { 39 | # characters may also be of length 0 40 | if (length(x) == 0) return(TRUE) 41 | # else, check all elements of x 42 | zero_len <- sapply(x, function(y) { 43 | # zero chars, so empty? 44 | l <- nchar(y) == 0 45 | # if 'x' was empty, we have no chars, so zero_len will be integer(0). 46 | # check this here, because zero_len needs to be logical 47 | if (length(l) == 0) l <- TRUE 48 | l 49 | }) 50 | # return result for multiple elements of character vector 51 | if (first.only) { 52 | zero_len <- isTRUE(zero_len[1]) 53 | if (length(x) > 0) x <- x[!is.na(x)][1] 54 | } else { 55 | return(unname(zero_len)) 56 | } 57 | # we have a non-character vector here. check for length 58 | } else if (is.list(x)) { 59 | x <- .compact_list(x) 60 | zero_len <- length(x) == 0 61 | } else { 62 | zero_len <- length(x) == 0 63 | } 64 | } 65 | 66 | any(is.null(x) || zero_len || all(is.na(x))) 67 | } 68 | -------------------------------------------------------------------------------- /R/is_labelled.R: -------------------------------------------------------------------------------- 1 | #' @title Check whether object is of class "labelled" 2 | #' @name is_labelled 3 | #' @description This function checks whether \code{x} is of class \code{labelled}. 4 | #' 5 | #' @param x An object. 6 | #' @return Logical, \code{TRUE} if \code{x} inherits from class \code{labelled}, 7 | #' \code{FALSE} otherwise. 8 | #' 9 | #' @export 10 | is_labelled <- function(x) inherits(x, c("labelled", "haven_labelled")) 11 | -------------------------------------------------------------------------------- /R/label_to_colnames.R: -------------------------------------------------------------------------------- 1 | #' @title Use variable labels as column names 2 | #' @name label_to_colnames 3 | #' 4 | #' @description This function sets variable labels as column names, to use "labelled 5 | #' data" also for those functions that cannot cope with labelled data by default. 6 | #' 7 | #' @param x A data frame. 8 | #' @inheritParams as_factor 9 | #' 10 | #' @return \code{x} with variable labels as column names. For variables without 11 | #' variable labels, the column name is left unchanged. 12 | #' 13 | #' @examples 14 | #' data(iris) 15 | #' 16 | #' iris <- var_labels( 17 | #' iris, 18 | #' Petal.Length = "Petal length (cm)", 19 | #' Petal.Width = "Petal width (cm)" 20 | #' ) 21 | #' 22 | #' colnames(iris) 23 | #' plot(iris) 24 | #' 25 | #' colnames(label_to_colnames(iris)) 26 | #' plot(label_to_colnames(iris)) 27 | #' @export 28 | label_to_colnames <- function(x, ...) { 29 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 30 | .dat <- .get_dot_data(x, dots) 31 | 32 | if (!is.null(ncol(.dat)) && ncol(.dat) > 0) { 33 | replace_index <- match(colnames(.dat), colnames(x)) 34 | colnames(x)[replace_index] <- get_label(.dat, def.value = colnames(.dat)) 35 | } else { 36 | colnames(x) <- get_label(x, def.value = colnames(x)) 37 | } 38 | 39 | x 40 | } -------------------------------------------------------------------------------- /R/remove_all_labels.R: -------------------------------------------------------------------------------- 1 | #' @title Remove value and variable labels from vector or data frame 2 | #' @name remove_all_labels 3 | #' 4 | #' @description This function removes value and variable label attributes 5 | #' from a vector or data frame. These attributes are typically 6 | #' added to variables when importing foreign data (see 7 | #' \code{\link{read_spss}}) or manually adding label attributes 8 | #' with \code{\link{set_labels}}. 9 | #' 10 | #' @seealso See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}, 11 | #' and \code{\link{copy_labels}} for adding label attributes 12 | #' (subsetted) data frames. 13 | #' 14 | #' @param x Vector or \code{data.frame} with variable and/or value label attributes 15 | #' @return \code{x} with removed value and variable label attributes. 16 | #' 17 | #' @examples 18 | #' data(efc) 19 | #' str(efc) 20 | #' str(remove_all_labels(efc)) 21 | #' @export 22 | remove_all_labels <- function(x) { 23 | UseMethod("remove_all_labels") 24 | } 25 | 26 | 27 | #' @export 28 | remove_all_labels.data.frame <- function(x) { 29 | as.data.frame(lapply(x, FUN = remove_all_labels_helper), stringsAsFactors = FALSE) 30 | } 31 | 32 | #' @export 33 | remove_all_labels.list <- function(x) { 34 | lapply(x, FUN = remove_all_labels_helper) 35 | } 36 | 37 | #' @export 38 | remove_all_labels.default <- function(x) { 39 | remove_all_labels_helper(x) 40 | } 41 | 42 | remove_all_labels_helper <- function(x) { 43 | # remove attributes 44 | attr(x, "label") <- NULL 45 | attr(x, "labels") <- NULL 46 | attr(x, "na_values") <- NULL 47 | attr(x, "na.values") <- NULL 48 | 49 | # unclass, if labelled. labelled class may throw 50 | # errors / warnings, when not havin label attributes 51 | if (is_labelled(x)) x <- unclass(x) 52 | 53 | # return var 54 | x 55 | } 56 | -------------------------------------------------------------------------------- /R/remove_label.R: -------------------------------------------------------------------------------- 1 | #' @title Remove variable labels from variables 2 | #' @name remove_label 3 | #' 4 | #' @description Remove variable labels from variables. 5 | #' 6 | #' @seealso \code{\link{set_label}} to manually set variable labels or 7 | #' \code{\link{get_label}} to get variable labels; \code{\link{set_labels}} to 8 | #' add value labels, replacing the existing ones (and removing non-specified 9 | #' value labels). 10 | #' 11 | #' @param x A vector or data frame. 12 | #' @inheritParams as_factor 13 | #' 14 | #' @return \code{x} with removed variable labels 15 | #' 16 | #' @examples 17 | #' data(efc) 18 | #' x <- efc[, 1:5] 19 | #' get_label(x) 20 | #' str(x) 21 | #' 22 | #' x <- remove_label(x) 23 | #' get_label(x) 24 | #' str(x) 25 | #' @export 26 | remove_label <- function(x, ...) { 27 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 28 | .dat <- .get_dot_data(x, dots) 29 | 30 | if (is.data.frame(x)) { 31 | # iterate variables of data frame 32 | for (i in colnames(.dat)) { 33 | attr(x[[i]], "label") <- NULL 34 | } 35 | } else { 36 | attr(x, "label") <- NULL 37 | } 38 | 39 | x 40 | } 41 | -------------------------------------------------------------------------------- /R/remove_labels.R: -------------------------------------------------------------------------------- 1 | #' @rdname add_labels 2 | #' @export 3 | remove_labels <- function(x, ..., labels) { 4 | # check for valid value. value must be a named vector 5 | if (is.null(labels)) stop("`labels` is NULL.", call. = FALSE) 6 | 7 | # if value is NA, it must be tagged 8 | na.labels <- labels[is.na(labels)] 9 | if (length(na.labels)) { 10 | if (!requireNamespace("haven", quietly = TRUE)) { 11 | stop("Package 'haven' required for this function. Please install it.") 12 | } 13 | if (!all(haven::is_tagged_na(na.labels))) stop("`labels` must be a tagged NA.", call. = FALSE) 14 | } 15 | 16 | # evaluate arguments, generate data 17 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 18 | .dat <- .get_dot_data(x, dots) 19 | 20 | if (is.data.frame(x)) { 21 | # iterate variables of data frame 22 | for (i in colnames(.dat)) { 23 | x[[i]] <- remove_labels_helper(.dat[[i]], labels) 24 | } 25 | } else { 26 | x <- remove_labels_helper(.dat, labels) 27 | } 28 | 29 | x 30 | } 31 | 32 | 33 | remove_labels_helper <- function(x, labels) { 34 | # get current labels of `x` 35 | current.labels <- get_labels(x, 36 | attr.only = TRUE, 37 | values = "n", 38 | non.labelled = FALSE) 39 | 40 | # get current NA values 41 | current.na <- get_na(x) 42 | 43 | # if we have no labels, return 44 | if (is.null(current.labels) && is.null(current.na)) { 45 | message("`x` has no value labels.") 46 | return(x) 47 | } 48 | 49 | if (!requireNamespace("haven", quietly = TRUE)) { 50 | stop("Package 'haven' required for this function. Please install it.") 51 | } 52 | 53 | # remove by index? 54 | if (haven::is_tagged_na(labels[1])) { 55 | current.na <- current.na[haven::na_tag(current.na) != haven::na_tag(labels)] 56 | } else if (is.numeric(labels)) { 57 | current.labels <- current.labels[-labels] 58 | } else if (is.character(labels)) { 59 | # find value labels that should be removes 60 | removers <- as.vector(current.labels) %in% labels 61 | # remove them 62 | current.labels <- current.labels[!removers] 63 | } 64 | 65 | # switch value and names attribute, since get_labels 66 | # returns the values as names, and the value labels 67 | # as "vector content" 68 | all.labels <- names(current.labels) 69 | if (.is_num_chr(all.labels)) all.labels <- as.numeric(all.labels) 70 | names(all.labels) <- as.character(current.labels) 71 | 72 | # sort labels by values 73 | all.labels <- all.labels[order(all.labels)] 74 | 75 | # complete labels, including NA labels 76 | compl.lab <- c(all.labels, current.na) 77 | 78 | # check if any labels left after removing 79 | if (is.null(compl.lab) || isempty(compl.lab)) { 80 | # clear all labels 81 | x <- remove_all_labels(x) 82 | } else { 83 | # set back labels 84 | attr(x, "labels") <- compl.lab 85 | } 86 | 87 | x 88 | } 89 | -------------------------------------------------------------------------------- /R/select_helpers.R: -------------------------------------------------------------------------------- 1 | string_starts_with <- function(pattern, x) { 2 | pattern <- paste0("^\\Q", pattern, "\\E") 3 | grep(pattern, x, perl = TRUE) 4 | } 5 | 6 | string_contains <- function(pattern, x) { 7 | pattern <- paste0("\\Q", pattern, "\\E") 8 | grep(pattern, x, perl = TRUE) 9 | } 10 | 11 | string_ends_with <- function(pattern, x) { 12 | pattern <- paste0("\\Q", pattern, "\\E$") 13 | grep(pattern, x, perl = TRUE) 14 | } 15 | 16 | string_one_of <- function(pattern, x) { 17 | unlist(lapply(pattern, function(.x) grep(.x, x, fixed = TRUE, useBytes = TRUE))) 18 | } 19 | 20 | rownames_as_column <- function(x, var = "rowname") { 21 | rn <- data.frame(rn = rownames(x), stringsAsFactors = FALSE) 22 | x <- cbind(rn, x) 23 | colnames(x)[1] <- var 24 | rownames(x) <- NULL 25 | x 26 | } 27 | 28 | obj_has_name <- function(x, name) { 29 | name %in% names(x) 30 | } 31 | 32 | obj_has_rownames <- function(x) { 33 | !identical(as.character(1:nrow(x)), rownames(x)) 34 | } 35 | 36 | add_cols <- function(data, ..., .after = 1, .before = NULL) { 37 | if (is.character(.after)) 38 | .after <- which(colnames(data) == .after) 39 | 40 | if (!is.null(.before) && is.character(.before)) 41 | .after <- which(colnames(data) == .before) - 1 42 | 43 | if (!is.null(.before) && is.numeric(.before)) 44 | .after <- .before - 1 45 | 46 | dat <- data.frame(..., stringsAsFactors = FALSE) 47 | 48 | if (.after < 1) { 49 | cbind(dat, data) 50 | } else if (is.infinite(.after)) { 51 | cbind(data, dat) 52 | } else { 53 | c1 <- 1:.after 54 | c2 <- (.after + 1):ncol(data) 55 | 56 | x1 <- data[, colnames(data)[c1], drop = FALSE] 57 | x2 <- data[, colnames(data)[c2], drop = FALSE] 58 | 59 | cbind(x1, dat, x2) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /R/set_label.R: -------------------------------------------------------------------------------- 1 | #' @title Add variable label(s) to variables 2 | #' @name set_label 3 | #' 4 | #' @description This function adds variable labels as attribute 5 | #' (named \code{"label"}) to the variable \code{x}, resp. to a 6 | #' set of variables in a data frame or a list-object. \code{var_labels()} 7 | #' is intended for use within pipe-workflows and has a tidyverse-consistent 8 | #' syntax, including support for quasi-quotation (see 'Examples'). 9 | #' 10 | #' @seealso See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} 11 | #' for more details; \code{\link{set_labels}} to manually set value labels or \code{\link{get_label}} 12 | #' to get variable labels. 13 | #' 14 | #' @param x Variable (vector), list of variables or a data frame where variables 15 | #' labels should be added as attribute. For \code{var_labels()}, \code{x} 16 | #' must be a data frame only. 17 | #' @param ... Pairs of named vectors, where the name equals the variable name, 18 | #' which should be labelled, and the value is the new variable label. 19 | #' @param label If \code{x} is a vector (single variable), use a single character string with 20 | #' the variable label for \code{x}. If \code{x} is a data frame, use a 21 | #' vector with character labels of same length as \code{ncol(x)}. 22 | #' Use \code{label = ""} to remove labels-attribute from \code{x}, resp. 23 | #' set any value of vector \code{label} to \code{""} to remove specific variable 24 | #' label attributes from a data frame's variable. 25 | #' @param value See \code{label}. 26 | #' 27 | #' @return \code{x}, with variable label attribute(s), which contains the 28 | #' variable name(s); or with removed label-attribute if 29 | #' \code{label = ""}. 30 | #' 31 | #' @examples 32 | #' # manually set value and variable labels 33 | #' dummy <- sample(1:4, 40, replace = TRUE) 34 | #' dummy <- set_labels(dummy, labels = c("very low", "low", "mid", "hi")) 35 | #' dummy <- set_label(dummy, label = "Dummy-variable") 36 | #' 37 | #' # or use: 38 | #' # set_label(dummy) <- "Dummy-variable" 39 | #' 40 | #' # Set variable labels for data frame 41 | #' dummy <- data.frame( 42 | #' a = sample(1:4, 10, replace = TRUE), 43 | #' b = sample(1:4, 10, replace = TRUE), 44 | #' c = sample(1:4, 10, replace = TRUE) 45 | #' ) 46 | #' dummy <- set_label(dummy, c("Variable A", "Variable B", "Variable C")) 47 | #' str(dummy) 48 | #' 49 | #' # remove one variable label 50 | #' dummy <- set_label(dummy, c("Variable A", "", "Variable C")) 51 | #' str(dummy) 52 | #' 53 | #' # setting same variable labels to multiple vectors 54 | #' 55 | #' # create a set of dummy variables 56 | #' dummy1 <- sample(1:4, 40, replace = TRUE) 57 | #' dummy2 <- sample(1:4, 40, replace = TRUE) 58 | #' dummy3 <- sample(1:4, 40, replace = TRUE) 59 | #' # put them in list-object 60 | #' dummies <- list(dummy1, dummy2, dummy3) 61 | #' # and set variable labels for all three dummies 62 | #' dummies <- set_label(dummies, c("First Dummy", "2nd Dummy", "Third dummy")) 63 | #' # see result... 64 | #' get_label(dummies) 65 | #' 66 | #' 67 | #' # use 'var_labels()' to set labels within a pipe-workflow, and 68 | #' # when you need "tidyverse-consistent" api. 69 | #' # Set variable labels for data frame 70 | #' dummy <- data.frame( 71 | #' a = sample(1:4, 10, replace = TRUE), 72 | #' b = sample(1:4, 10, replace = TRUE), 73 | #' c = sample(1:4, 10, replace = TRUE) 74 | #' ) 75 | #' 76 | #' if (require("magrittr") && require("rlang")) { 77 | #' dummy %>% 78 | #' var_labels(a = "First variable", c = "third variable") %>% 79 | #' get_label() 80 | #' 81 | #' # with quasi-quotation 82 | #' v1 <- "First variable" 83 | #' v2 <- "Third variable" 84 | #' dummy %>% 85 | #' var_labels(a = !!v1, c = !!v2) %>% 86 | #' get_label() 87 | #' 88 | #' x1 <- "a" 89 | #' x2 <- "c" 90 | #' dummy %>% 91 | #' var_labels(!!x1 := !!v1, !!x2 := !!v2) %>% 92 | #' get_label() 93 | #' } 94 | #' @export 95 | set_label <- function(x, label) { 96 | # do we have all necessary arguments? 97 | if (!is.null(label) && !is.null(x)) { 98 | # if we have a data frame, we need a variable label 99 | # for each column (variable) of the data frame 100 | if (is.data.frame(x) || is.list(x)) { 101 | # get length of data frame or list, i.e. 102 | # determine number of variables 103 | if (is.data.frame(x)) 104 | nvars <- ncol(x) 105 | else 106 | nvars <- length(x) 107 | 108 | # check for matching length of supplied labels 109 | if (nvars != length(label)) { 110 | message("Argument `label` must be of same length as numbers of columns in `x`.") 111 | } else { 112 | # do we have a data frame? If yes, save column names 113 | if (is.data.frame(x)) cnames <- colnames(x) 114 | 115 | # iterate all columns / list elements 116 | for (i in seq_len(nvars)) { 117 | if (isempty(label[i])) { 118 | # empty label value means, remove 119 | # the label attribute 120 | attr(x[[i]], "label") <- NULL 121 | } else { 122 | # set variable label 123 | attr(x[[i]], "label") <- label[i] 124 | # set names attribute. equals variable name 125 | if (is.data.frame(x)) names(attr(x[[i]], "label")) <- cnames[i] 126 | } 127 | } 128 | } 129 | } else { 130 | if (isempty(label)) 131 | # empty label, so remove label attribute 132 | attr(x, "label") <- NULL 133 | else 134 | # set label attribute 135 | attr(x, "label") <- label 136 | } 137 | } 138 | x 139 | } 140 | 141 | 142 | #' @rdname set_label 143 | #' @export 144 | `set_label<-` <- function(x, value) { 145 | UseMethod("set_label<-") 146 | } 147 | 148 | #' @export 149 | `set_label<-.default` <- function(x, value) { 150 | x <- set_label(x, value) 151 | x 152 | } 153 | -------------------------------------------------------------------------------- /R/tidy_labels.R: -------------------------------------------------------------------------------- 1 | #' @title Repair value labels 2 | #' @name tidy_labels 3 | #' 4 | #' @description Duplicated value labels in variables may cause troubles when 5 | #' saving labelled data, or computing cross tabs (cf. 6 | #' \code{sjmisc::flat_table()} or \code{sjPlot::plot_xtab()}). 7 | #' \code{tidy_labels()} repairs duplicated value labels by suffixing 8 | #' them with the associated value. 9 | #' 10 | #' @param sep String that will be used to separate the suffixed value from the 11 | #' old label when creating the new value label. 12 | #' @param remove Logical, if \code{TRUE}, the original, duplicated value label will 13 | #' be replaced by the value (i.e. the value is not the suffix of the 14 | #' value label, but will become the value label itself). The 15 | #' \code{sep}-argument will be ignored in such cases. 16 | #' 17 | #' @inheritParams add_labels 18 | #' 19 | #' @return \code{x}, with "repaired" (unique) value labels for each variable. 20 | #' 21 | #' @examples 22 | #' if (require("sjmisc")) { 23 | #' set.seed(123) 24 | #' x <- set_labels( 25 | #' sample(1:5, size = 20, replace = TRUE), 26 | #' labels = c("low" = 1, ".." = 2, ".." = 3, ".." = 4, "high" = 5) 27 | #' ) 28 | #' frq(x) 29 | #' 30 | #' z <- tidy_labels(x) 31 | #' frq(z) 32 | #' 33 | #' z <- tidy_labels(x, sep = ".") 34 | #' frq(z) 35 | #' 36 | #' z <- tidy_labels(x, remove = TRUE) 37 | #' frq(z) 38 | #' } 39 | #' @export 40 | tidy_labels <- function(x, ..., sep = "_", remove = FALSE) { 41 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 42 | .dat <- .get_dot_data(x, dots) 43 | 44 | if (is.data.frame(x)) { 45 | # iterate variables of data frame 46 | for (i in colnames(.dat)) { 47 | x[[i]] <- tidy_labels_helper(x = .dat[[i]], sep = sep, remove = remove) 48 | } 49 | } else { 50 | x <- tidy_labels_helper(x = .dat, sep = sep, remove = remove) 51 | } 52 | 53 | x 54 | } 55 | 56 | 57 | tidy_labels_helper <- function(x, sep, remove) { 58 | # get value labels from variable. drop unused labels 59 | labs <- get_labels( 60 | x, 61 | attr.only = TRUE, 62 | values = FALSE, 63 | drop.unused = TRUE, 64 | drop.na = TRUE 65 | ) 66 | 67 | # no labels? then return... 68 | if (is.null(labs)) return(x) 69 | 70 | # get values that are associated with labels 71 | values <- get_values(drop_labels(x), drop.na = TRUE) 72 | 73 | # create table, and check if any value label is duplicated 74 | duped.val <- names(which(table(labs) > 1)) 75 | 76 | # no dupes found? return variable then 77 | if (isempty(duped.val)) return(x) 78 | 79 | # find position of duplicated labels 80 | dupes <- lapply(duped.val, function(.x) which(labs == .x)) 81 | dupes <- as.vector(unlist(dupes)) 82 | 83 | if (remove) { 84 | # replace labels with value 85 | labs[dupes] <- sprintf("%s",values[dupes]) 86 | } else { 87 | # prefix labels with value 88 | labs[dupes] <- sprintf("%s%s%s", labs[dupes], sep, values[dupes]) 89 | } 90 | 91 | 92 | # set back value labels 93 | names(values) <- labs 94 | attr(x, "labels") <- values 95 | 96 | x 97 | } -------------------------------------------------------------------------------- /R/unlabel.R: -------------------------------------------------------------------------------- 1 | #' @title Convert labelled vectors into normal classes 2 | #' @name unlabel 3 | #' 4 | #' @description This function converts \code{labelled} class vectors 5 | #' into a generic data format, which means that simply all \code{labelled} 6 | #' class attributes will be removed, so all vectors / variables will most 7 | #' likely become \code{atomic}. 8 | #' 9 | #' @param x A data frame, which contains \code{labelled} class 10 | #' vectors or a single vector of class \code{labelled}. 11 | #' 12 | #' @inheritParams read_spss 13 | #' 14 | #' @return A data frame or single vector (depending on \code{x}) with common object classes. 15 | #' 16 | #' @note This function is currently only used to avoid possible compatibility issues 17 | #' with \code{\link[haven:labelled]{labelled}} class vectors. Some known issues with 18 | #' \code{labelled} class vectors have already been fixed, so 19 | #' it might be that this function will become redundant in the future. 20 | #' 21 | #' @importFrom utils txtProgressBar setTxtProgressBar 22 | #' @export 23 | unlabel <- function(x, verbose = FALSE) { 24 | # check if complete data frame or only single 25 | # vector should be converted 26 | if (is.data.frame(x)) { 27 | # create progress bar 28 | if (verbose) 29 | pb <- utils::txtProgressBar(min = 0, max = ncol(x), style = 3) 30 | else 31 | pb <- NULL 32 | 33 | # tell user... 34 | if (verbose) message("Converting labelled-classes. Please wait...\n") 35 | 36 | for (i in seq_len(ncol(x))) { 37 | # remove labelled class 38 | if (is_labelled(x[[i]])) x[[i]] <- unclass(x[[i]]) 39 | # update progress bar 40 | if (verbose) utils::setTxtProgressBar(pb, i) 41 | } 42 | 43 | if (!is.null(pb)) close(pb) 44 | 45 | # remove redundant class attributes 46 | x <- as.data.frame(x, stringsAsFactors = FALSE) 47 | } else { 48 | # remove labelled class 49 | if (is_labelled(x)) x <- unclass(x) 50 | } 51 | 52 | x 53 | } 54 | -------------------------------------------------------------------------------- /R/utils_get_dots.R: -------------------------------------------------------------------------------- 1 | # function to evaluate dots in a tidyselect-style and return 2 | # the variable names as character vector 3 | .get_dot_data <- function(dat, dots, verbose = TRUE) { 4 | 5 | if (!is.data.frame(dat) || length(dots) == 0) { 6 | return(dat) 7 | } 8 | 9 | columns <- colnames(dat) 10 | 11 | x <- unlist(lapply(dots, function(i) { 12 | 13 | # contains-token 14 | if (grepl("^contains\\(", i)) { 15 | pattern <- gsub("contains\\(\"(.*)\"\\)", "\\1", i) 16 | columns[string_contains(pattern, columns)] 17 | 18 | # starts-with token 19 | } else if (grepl("^starts\\(", i) || grepl("^starts_with\\(", i)) { 20 | pattern <- gsub("(.*)\\(\"(.*)\"\\)", "\\2", i) 21 | columns[string_starts_with(pattern, columns)] 22 | 23 | # ends-with token 24 | } else if (grepl("^ends\\(", i) || grepl("^ends_with\\(", i)) { 25 | pattern <- gsub("(.*)\\(\"(.*)\"\\)", "\\2", i) 26 | columns[string_ends_with(pattern, columns)] 27 | 28 | # one-of token 29 | } else if (grepl("^one_of\\(", i)) { 30 | pattern <- gsub("(\"|\\s)", "", unlist(strsplit(gsub("one_of\\(\"(.*)\"\\)", "\\1", i), ","))) 31 | columns[string_one_of(pattern, columns)] 32 | 33 | # num_range token 34 | } else if (grepl("^num_range\\(", i)) { 35 | columns[match(.get_num_range(i), columns)] 36 | 37 | # from-to token 38 | } else if (grepl(":", i, fixed = TRUE)) { 39 | 40 | tmp <- unlist(strsplit(i, ":", fixed = TRUE)) 41 | 42 | start <- if (.is_num_chr(tmp[1])) 43 | as.numeric(tmp[1]) 44 | else 45 | which(columns == tmp[1]) 46 | 47 | end <- if (.is_num_chr(tmp[2])) 48 | as.numeric(tmp[2]) 49 | else 50 | which(columns == tmp[2]) 51 | 52 | columns[start:end] 53 | 54 | # simple name 55 | } else { 56 | i 57 | } 58 | })) 59 | 60 | x <- unlist(lapply(x, function(i) { 61 | if (.is_num_chr(i)) 62 | columns[as.numeric(i)] 63 | else if (.is_num_fac(i)) 64 | columns[as.numeric(as.character(i))] 65 | else 66 | i 67 | })) 68 | 69 | not_found <- setdiff(x, columns) 70 | 71 | if (length(not_found) && isTRUE(verbose)) { 72 | insight::print_color(sprintf( 73 | "%i variables were not found in the dataset: %s\n", 74 | length(not_found), 75 | paste0(not_found, collapse = ", ") 76 | ), 77 | color = "red") 78 | } 79 | 80 | dat[, intersect(x, columns), drop = FALSE] 81 | } 82 | 83 | #' @importFrom stats na.omit 84 | .is_num_chr <- function(x) { 85 | is.character(x) && !anyNA(suppressWarnings(as.numeric(stats::na.omit(x)))) 86 | } 87 | 88 | .is_num_fac <- function(x) { 89 | is.factor(x) && !anyNA(suppressWarnings(as.numeric(levels(x)))) 90 | } 91 | 92 | 93 | 94 | .get_num_range <- function(i) { 95 | r1 <- trimws(unlist(strsplit(gsub("num_range\\((.*)\\)", "\\1", i), ","))) 96 | r2 <- gsub("\"", "", trimws(gsub("(.*)(=)(.*)", "\\3", r1)), fixed = TRUE) 97 | es <- grepl("=", r1) 98 | if (any(es)) { 99 | names(r2)[es] <- trimws(gsub("(.*)(=)(.*)", "\\1", r1[es])) 100 | } 101 | 102 | args <- c("prefix", "range", "width") 103 | if (is.null(names(r2))) { 104 | names(r2) <- args[1:length(r2)] 105 | } 106 | 107 | na_names <- which(is.na(names(r2))) 108 | if (length(na_names)) { 109 | names(r2)[na_names] <- args[na_names] 110 | } 111 | 112 | if (length(r2) > 3) { 113 | r2 <- r2[1:3] 114 | } 115 | 116 | from <- as.numeric(gsub("(\\d):(.*)", "\\1", r2["range"])) 117 | to <- as.numeric(gsub("(.*):(\\d)", "\\2", r2["range"])) 118 | width <- as.numeric(r2["width"]) 119 | 120 | if (is.na(width)) { 121 | sprintf("%s%i", r2["prefix"], from:to) 122 | } else { 123 | sprintf("%s%.*i", r2["prefix"], width, from:to) 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /R/val_labels.R: -------------------------------------------------------------------------------- 1 | #' @rdname set_labels 2 | #' @export 3 | val_labels <- function(x, ..., force.labels = FALSE, force.values = TRUE, drop.na = TRUE) { 4 | if (!requireNamespace("rlang", quietly = TRUE)) { 5 | stop("Package 'rlang' required for this function to work. Please install it.") 6 | } 7 | 8 | # get dots 9 | .dots <- rlang::enexprs(...) 10 | labels <- lapply(.dots, function(i) if (is.language(i)) eval(i) else i) 11 | 12 | # select variables 13 | vars <- names(labels) 14 | 15 | # non-matching column names 16 | non.vars <- which(!(vars %in% colnames(x))) 17 | 18 | # check if all variables exist in data frame 19 | if (!isempty(non.vars)) { 20 | # tell user 21 | warning(sprintf( 22 | "Following elements are no valid column names in `x`: %s", 23 | paste(vars[non.vars], collapse = ",") 24 | ), 25 | call. = FALSE) 26 | # remove invalid names 27 | vars <- vars[-non.vars] 28 | labels <- labels[-non.vars] 29 | } 30 | 31 | # set label for all variables 32 | for (i in seq_len(length(vars))) { 33 | x[[vars[i]]] <- set_labels_helper( 34 | x = x[[vars[i]]], 35 | labels = labels[[i]], 36 | force.labels = force.labels, 37 | force.values = force.values, 38 | drop.na = drop.na, 39 | var.name = vars[i] 40 | ) 41 | } 42 | 43 | # return data 44 | x 45 | } 46 | -------------------------------------------------------------------------------- /R/var_labels.R: -------------------------------------------------------------------------------- 1 | #' @rdname set_label 2 | #' @export 3 | var_labels <- function(x, ...) { 4 | # get dots 5 | .dots <- match.call(expand.dots = FALSE)$`...` 6 | 7 | if (inherits(.dots, "pairlist")) { 8 | if (!requireNamespace("rlang", quietly = TRUE)) { 9 | stop("Package 'rlang' required for this function to work. Please install it.") 10 | } 11 | .dots <- unlist(lapply(rlang::ensyms(...), rlang::as_string)) 12 | } else { 13 | .dots <- unlist(.dots) 14 | } 15 | 16 | # select variables 17 | vars <- names(.dots) 18 | # get new labels 19 | labels <- unname(.dots) 20 | 21 | # non-matching column names 22 | non.vars <- which(!(vars %in% colnames(x))) 23 | 24 | # check if all variables exist in data frame 25 | if (!isempty(non.vars)) { 26 | # tell user 27 | warning(sprintf( 28 | "Following elements are no valid column names in `x`: %s", 29 | paste(vars[non.vars], collapse = ",") 30 | ), 31 | call. = FALSE) 32 | # remove invalid names 33 | vars <- vars[-non.vars] 34 | labels <- labels[-non.vars] 35 | } 36 | 37 | # set label for all variables 38 | for (i in seq_len(length(vars))) { 39 | attr(x[[vars[i]]], "label") <- labels[i] 40 | } 41 | 42 | # return data 43 | x 44 | } 45 | -------------------------------------------------------------------------------- /R/write.R: -------------------------------------------------------------------------------- 1 | #' @title Write data to other statistical software packages 2 | #' @name write_spss 3 | #' 4 | #' @description These functions write the content of a data frame to an SPSS, SAS or 5 | #' Stata-file. 6 | #' 7 | #' @param x A data frame that should be saved as file. 8 | #' @param path File path of the output file. 9 | #' @param version File version to use. Supports versions 8-14. 10 | #' @param drop.na Logical, if \code{TRUE}, tagged \code{NA} values with value labels 11 | #' will be converted to regular NA's. Else, tagged \code{NA} values will be replaced 12 | #' with their value labels. See 'Examples' and \code{\link{get_na}}. 13 | #' @param compress Logical, if \code{TRUE} and a SPSS-file should be created, 14 | #' saves \code{x} in \code{zsav} (i.e. compressed SPSS) format. 15 | #' 16 | #' @export 17 | write_spss <- function(x, path, drop.na = FALSE, compress = FALSE) { 18 | .write_data(x = x, path = path, type = "spss", version = 14, drop.na = drop.na, compress = compress) 19 | } 20 | 21 | 22 | #' @rdname write_spss 23 | #' @export 24 | write_stata <- function(x, path, drop.na = FALSE, version = 14) { 25 | .write_data(x = x, path = path, type = "stata", version = version, drop.na = drop.na) 26 | } 27 | 28 | 29 | #' @rdname write_spss 30 | #' @export 31 | write_sas <- function(x, path, drop.na = FALSE) { 32 | .write_data(x = x, path = path, type = "sas", version = 14, drop.na = drop.na) 33 | } 34 | 35 | 36 | .write_data <- function(x, path, type, version, drop.na, compress = FALSE) { 37 | if (!requireNamespace("haven", quietly = TRUE)) { 38 | stop("Package 'haven' required for this function. Please install it.") 39 | } 40 | 41 | # we need to remove empty columns... 42 | empty_columns <- datawizard::empty_columns(x) 43 | if (length(empty_columns)) { 44 | msg <- insight::format_message(sprintf("Following variables have only missing values and were removed from the dataset: %s", paste(colnames(x)[empty_columns], collapse = ", "))) 45 | message(msg) 46 | x <- x[-empty_columns] 47 | } 48 | 49 | # make sure to have tidy labels 50 | message("Tidying value labels. Please wait...") 51 | x <- tidy_labels(x) 52 | 53 | # convert data to labelled 54 | # x <- as_label(x, add.non.labelled = TRUE, drop.na = drop.na) 55 | x <- as_labelled(x, add.labels = TRUE, skip.strings = TRUE, add.class = TRUE, tag.na = TRUE) 56 | 57 | # check for correct column names 58 | for (i in seq_len(ncol(x))) { 59 | # check column name 60 | end.point <- colnames(x)[i] 61 | # if it ends with a dot, add a char. dot is invalid last char for SPSS 62 | if (substr(end.point, nchar(end.point), nchar(end.point)) == ".") { 63 | colnames(x)[i] <- paste0(end.point, i) 64 | } 65 | } 66 | 67 | # tell user 68 | message(sprintf("Writing %s file to '%s'. Please wait...", type, path)) 69 | 70 | if (tolower(tools::file_ext(path)) == "zsav") { 71 | compress <- TRUE 72 | } 73 | 74 | if (isTRUE(compress)) { 75 | compress <- "zsav" 76 | } else { 77 | compress <- "byte" 78 | } 79 | 80 | if (type == "spss") { 81 | # write SPSS 82 | haven::write_sav(data = x, path = path, compress = compress) 83 | } else if (type == "stata") { 84 | # write Stata 85 | haven::write_dta(data = x, path = path, version = version) 86 | } else if (type == "sas") { 87 | # write Stata 88 | haven::write_sas(data = x, path = path) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /R/zap_labels.R: -------------------------------------------------------------------------------- 1 | #' @title Drop, add or convert (non-)labelled values 2 | #' @name zap_labels 3 | #' 4 | #' @description For (partially) labelled vectors, \code{zap_labels()} will replace 5 | #' all values that have a value label attribute with \code{NA}; 6 | #' \code{zap_unlabelled()}, as counterpart, will replace all values 7 | #' that \emph{don't} have a value label attribute with \code{NA}. 8 | #' \cr \cr 9 | #' \code{drop_labels()} drops all value labels for unused values, 10 | #' i.e. values that are not present in a vector. \code{fill_labels()} is the 11 | #' counterpart to \code{drop_labels()} and adds value labels to 12 | #' a partially labelled vector, i.e. if not all values are 13 | #' labelled, non-labelled values get labels. 14 | #' 15 | #' @param x (partially) \code{\link[haven:labelled]{labelled()}} vector or a data frame 16 | #' with such vectors. 17 | #' 18 | #' @inheritParams add_labels 19 | #' @inheritParams set_labels 20 | #' 21 | #' @return \itemize{ 22 | #' \item For \code{zap_labels()}, \code{x}, where all labelled values are converted to \code{NA}. 23 | #' \item For \code{zap_unlabelled()}, \code{x}, where all non-labelled values are converted to \code{NA}. 24 | #' \item For \code{drop_labels()}, \code{x}, where value labels for non-existing values are removed. 25 | #' \item For \code{fill_labels()}, \code{x}, where labels for non-labelled values are added. 26 | #' } 27 | #' If \code{x} is a data frame, the complete data frame \code{x} will be 28 | #' returned, with variables specified in \code{...} being converted; 29 | #' if \code{...} is not specified, applies to all variables in the 30 | #' data frame. 31 | #' 32 | #' 33 | #' @examples 34 | #' if (require("sjmisc") && require("dplyr")) { 35 | #' 36 | #' # zap_labels() ---- 37 | #' 38 | #' data(efc) 39 | #' str(efc$e42dep) 40 | #' 41 | #' x <- set_labels( 42 | #' efc$e42dep, 43 | #' labels = c("independent" = 1, "severe dependency" = 4) 44 | #' ) 45 | #' table(x) 46 | #' get_values(x) 47 | #' str(x) 48 | #' 49 | #' # zap all labelled values 50 | #' table(zap_labels(x)) 51 | #' get_values(zap_labels(x)) 52 | #' str(zap_labels(x)) 53 | #' 54 | #' # zap all unlabelled values 55 | #' table(zap_unlabelled(x)) 56 | #' get_values(zap_unlabelled(x)) 57 | #' str(zap_unlabelled(x)) 58 | #' 59 | #' # in a pipe-workflow 60 | #' efc %>% 61 | #' select(c172code, e42dep) %>% 62 | #' set_labels( 63 | #' e42dep, 64 | #' labels = c("independent" = 1, "severe dependency" = 4) 65 | #' ) %>% 66 | #' zap_labels() 67 | #' 68 | #' 69 | #' # drop_labels() ---- 70 | #' 71 | #' rp <- rec_pattern(1, 100) 72 | #' rp 73 | #' 74 | #' # sample data 75 | #' data(efc) 76 | #' # recode carers age into groups of width 5 77 | #' x <- rec(efc$c160age, rec = rp$pattern) 78 | #' # add value labels to new vector 79 | #' x <- set_labels(x, labels = rp$labels) 80 | #' 81 | #' # watch result. due to recode-pattern, we have age groups with 82 | #' # no observations (zero-counts) 83 | #' frq(x) 84 | #' # now, let's drop zero's 85 | #' frq(drop_labels(x)) 86 | #' 87 | #' # drop labels, also drop NA value labels, then also zap tagged NA 88 | #' if (require("haven")) { 89 | #' x <- labelled(c(1:3, tagged_na("z"), 4:1), 90 | #' c("Agreement" = 1, "Disagreement" = 4, "Unused" = 5, 91 | #' "Not home" = tagged_na("z"))) 92 | #' x 93 | #' drop_labels(x, drop.na = FALSE) 94 | #' drop_labels(x) 95 | #' zap_na_tags(drop_labels(x)) 96 | #' 97 | #' # fill_labels() ---- 98 | #' 99 | #' # create labelled integer, with tagged missings 100 | #' x <- labelled( 101 | #' c(1:3, tagged_na("a", "c", "z"), 4:1), 102 | #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 103 | #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) 104 | #' ) 105 | #' # get current values and labels 106 | #' x 107 | #' get_labels(x) 108 | #' 109 | #' fill_labels(x) 110 | #' get_labels(fill_labels(x)) 111 | #' # same as 112 | #' get_labels(x, non.labelled = TRUE) 113 | #' } 114 | #' } 115 | #' @importFrom stats na.omit 116 | #' @export 117 | zap_labels <- function(x, ...) { 118 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 119 | .dat <- .get_dot_data(x, dots) 120 | 121 | if (is.data.frame(x)) { 122 | # iterate variables of data frame 123 | for (i in colnames(.dat)) { 124 | x[[i]] <- zap_labels_helper(.dat[[i]]) 125 | } 126 | } else { 127 | x <- zap_labels_helper(.dat) 128 | } 129 | 130 | x 131 | } 132 | 133 | 134 | #' @rdname zap_labels 135 | #' @importFrom stats na.omit 136 | #' @export 137 | zap_unlabelled <- function(x, ...) { 138 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 139 | .dat <- .get_dot_data(x, dots) 140 | 141 | if (is.data.frame(x)) { 142 | # iterate variables of data frame 143 | for (i in colnames(.dat)) { 144 | x[[i]] <- zap_unlabelled_helper(.dat[[i]]) 145 | } 146 | } else { 147 | x <- zap_unlabelled_helper(.dat) 148 | } 149 | 150 | x 151 | } 152 | 153 | 154 | #' @title Convert tagged NA values into regular NA 155 | #' @name zap_na_tags 156 | #' 157 | #' @description Replaces all \code{\link[haven:tagged_na]{tagged_na()}} values with 158 | #' regular \code{NA}. 159 | #' 160 | #' @param x A \code{\link[haven:labelled]{labelled()}} vector with \code{tagged_na} 161 | #' values, or a data frame with such vectors. 162 | #' 163 | #' @inheritParams add_labels 164 | #' 165 | #' @return \code{x}, where all \code{tagged_na} values are converted to \code{NA}. 166 | #' 167 | #' @examples 168 | #' if (require("haven")) { 169 | #' x <- labelled( 170 | #' c(1:3, tagged_na("a", "c", "z"), 4:1), 171 | #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 172 | #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) 173 | #' ) 174 | #' # get current NA values 175 | #' x 176 | #' get_na(x) 177 | #' zap_na_tags(x) 178 | #' get_na(zap_na_tags(x)) 179 | #' 180 | #' # also works with non-labelled vector that have tagged NA values 181 | #' x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) 182 | #' haven::print_tagged_na(x) 183 | #' haven::print_tagged_na(zap_na_tags(x)) 184 | #' } 185 | #' @importFrom stats na.omit 186 | #' @export 187 | zap_na_tags <- function(x, ...) { 188 | dots <- as.character(match.call(expand.dots = FALSE)$`...`) 189 | .dat <- .get_dot_data(x, dots) 190 | 191 | if (is.data.frame(x)) { 192 | # iterate variables of data frame 193 | for (i in colnames(.dat)) { 194 | x[[i]] <- zap_na_tags_helper(.dat[[i]]) 195 | } 196 | } else { 197 | x <- zap_na_tags_helper(.dat) 198 | } 199 | 200 | x 201 | } 202 | 203 | 204 | 205 | 206 | zap_labels_helper <- function(x) { 207 | x <- set_na(x, na = get_values(x, drop.na = TRUE)) 208 | 209 | # remove label attributes 210 | attr(x, "label") <- NULL 211 | if (is_labelled(x)) class(x) <- NULL 212 | 213 | x 214 | } 215 | 216 | zap_unlabelled_helper <- function(x) { 217 | vals <- get_values(x) 218 | x <- set_na(x, na = stats::na.omit(unique(x)[!unique(x) %in% vals])) 219 | if (is_labelled(x)) class(x) <- NULL 220 | x 221 | } 222 | 223 | zap_na_tags_helper <- function(x) { 224 | if (!requireNamespace("haven", quietly = TRUE)) { 225 | stop("Package 'haven' required for this function. Please install it.") 226 | } 227 | 228 | # check if values has only NA's 229 | if (sum(is.na(x)) == length(x)) return(x) 230 | # convert all NA, including tagged NA, into regular NA 231 | x[is.na(x)] <- NA 232 | 233 | # get labels, w/o labelled NA 234 | # retrieve named labels 235 | labs <- attr(x, "labels", exact = TRUE) 236 | labs <- labs[!haven::is_tagged_na(labs)] 237 | 238 | attr(x, "na_values") <- NULL 239 | attr(x, "na.values") <- NULL 240 | 241 | # if no labels left, clear attribute 242 | if (is.null(labs)) { 243 | attr(x, "labels") <- NULL 244 | return(x) 245 | } else { 246 | set_labels(x, labels = labs) 247 | } 248 | } 249 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sjlabelled - Labelled Data Utility Functions 2 | 3 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/sjlabelled)](https://cran.r-project.org/package=sjlabelled)    [![Documentation](https://img.shields.io/badge/documentation-sjlabelled-orange.svg?colorB=E91E63)](https://strengejacke.github.io/sjlabelled/)    [![downloads](https://cranlogs.r-pkg.org/badges/sjlabelled)](https://cranlogs.r-pkg.org/) 4 |    [![total](https://cranlogs.r-pkg.org/badges/grand-total/sjlabelled)](https://cranlogs.r-pkg.org/) 5 | 6 | This package contains utility functions that are useful when working with labelled data (especially intended for people coming from 'SPSS', 'SAS' or 'Stata' and/or who are new to R). 7 | 8 | Basically, this package covers reading and writing data between other statistical packages (like 'SPSS') and R, based on the haven and foreign packages; hence, this package also includes functions to make working with labelled data easier. This includes easy ways to get, set or change value and variable label attributes, to convert labelled vectors into factors or numeric (and vice versa), or to deal with multiple declared missing values. 9 | 10 | ## Installation 11 | 12 | ### Latest development build 13 | 14 | To install the latest development snapshot (see latest changes below), type following commands into the R console: 15 | 16 | ```r 17 | library(devtools) 18 | devtools::install_github("strengejacke/sjlabelled") 19 | ``` 20 | 21 | ### Officiale, stable release 22 | 23 | To install the latest stable release from CRAN, type following command into the R console: 24 | 25 | ```r 26 | install.packages("sjlabelled") 27 | ``` 28 | 29 | ## Citation 30 | 31 | In case you want / have to cite my package, please use `citation('sjlabelled')` for citation information. 32 | 33 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1249215.svg)](https://doi.org/10.5281/zenodo.1249215) 34 | 35 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | authors: 2 | Daniel Lüdecke: 3 | href: https://github.com/strengejacke 4 | 5 | template: 6 | params: 7 | bootswatch: cosmo 8 | 9 | reference: 10 | - title: "Introduction" 11 | contents: 12 | - sjlabelled-package 13 | 14 | - title: "Variable Labels" 15 | contents: 16 | - get_label 17 | - copy_labels 18 | - label_to_colnames 19 | - remove_label 20 | - remove_all_labels 21 | - set_label 22 | - var_labels 23 | 24 | - title: "Value Labels" 25 | contents: 26 | - add_labels 27 | - copy_labels 28 | - get_labels 29 | - remove_labels 30 | - remove_all_labels 31 | - set_labels 32 | - val_labels 33 | 34 | - title: "Labelled Model Predictors" 35 | contents: 36 | - get_term_labels 37 | 38 | - title: "Labelled Values" 39 | contents: 40 | - get_values 41 | - tidy_labels 42 | 43 | - title: "Missing Values" 44 | contents: 45 | - get_na 46 | - set_na 47 | 48 | - title: "Converting Variables" 49 | contents: 50 | - as_character 51 | - as_factor 52 | - as_label 53 | - as_numeric 54 | 55 | - title: "Converting Labelled Variables" 56 | contents: 57 | - as_labelled 58 | - convert_case 59 | - is_labelled 60 | - unlabel 61 | - zap_labels 62 | - zap_na_tags 63 | - zap_unlabelled 64 | 65 | - title: "Importing and Exporting Data" 66 | contents: 67 | - read_data 68 | - write_spss 69 | 70 | - title: "Sample Data" 71 | contents: 72 | - efc 73 | -------------------------------------------------------------------------------- /data/efc.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/strengejacke/sjlabelled/b06b4e3e47775355fef19ce824f63a83dc928d04/data/efc.RData -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | year <- sub("-.*", "", meta$Date) 2 | title <- sprintf("sjlabelled: Labelled Data Utility Functions (Version %s)", meta$Version) 3 | 4 | bibentry(bibtype="manual", 5 | title = title, 6 | author = person("Daniel", "Lüdecke"), 7 | year = year, 8 | url = "https://CRAN.R-project.org/package=sjlabelled", 9 | doi = "10.5281/zenodo.1249215") 10 | -------------------------------------------------------------------------------- /man/add_labels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/add_labels.R, R/remove_labels.R 3 | \name{add_labels} 4 | \alias{add_labels} 5 | \alias{replace_labels} 6 | \alias{remove_labels} 7 | \title{Add, replace or remove value labels of variables} 8 | \usage{ 9 | add_labels(x, ..., labels) 10 | 11 | replace_labels(x, ..., labels) 12 | 13 | remove_labels(x, ..., labels) 14 | } 15 | \arguments{ 16 | \item{x}{A vector or data frame.} 17 | 18 | \item{...}{Optional, unquoted names of variables that should be selected for 19 | further processing. Required, if \code{x} is a data frame (and no 20 | vector) and only selected variables from \code{x} should be processed. 21 | You may also use functions like \code{:} or tidyselect's select-helpers. 22 | See 'Examples'.} 23 | 24 | \item{labels}{\describe{ 25 | \item{For \code{add_labels()}}{A named (numeric) vector of labels 26 | that will be added to \code{x} as label attribute.} 27 | \item{For \code{remove_labels()}}{Either a numeric vector, indicating 28 | the position of one or more label attributes that should be removed; 29 | a character vector with names of label attributes that should be 30 | removed; or a \code{\link[haven:tagged_na]{tagged_na()}} to remove the labels 31 | from specific NA values.} 32 | }} 33 | } 34 | \value{ 35 | \code{x} with additional or removed value labels. If \code{x} 36 | is a data frame, the complete data frame \code{x} will be returned, 37 | with removed or added to variables specified in \code{...}; 38 | if \code{...} is not specified, applies to all variables in the 39 | data frame. 40 | } 41 | \description{ 42 | These functions add, replace or remove value labels to or from variables. 43 | } 44 | \details{ 45 | \code{add_labels()} adds \code{labels} to the existing value 46 | labels of \code{x}, however, unlike \code{\link{set_labels}}, it 47 | does \emph{not} remove labels that were \emph{not} specified in 48 | \code{labels}. \code{add_labels()} also replaces existing 49 | value labels, but preserves the remaining labels. 50 | \cr \cr 51 | \code{remove_labels()} is the counterpart to \code{add_labels()}. 52 | It removes labels from a label attribute of \code{x}. 53 | \cr \cr 54 | \code{replace_labels()} is an alias for \code{add_labels()}. 55 | } 56 | \examples{ 57 | \dontshow{if (require("dplyr") && require("haven")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 58 | # add_labels() 59 | data(efc) 60 | get_labels(efc$e42dep) 61 | 62 | x <- add_labels(efc$e42dep, labels = c(`nothing` = 5)) 63 | get_labels(x) 64 | 65 | if (require("dplyr")) { 66 | x <- efc \%>\% 67 | # select three variables 68 | dplyr::select(e42dep, c172code, c161sex) \%>\% 69 | # only add new label to two of those 70 | add_labels(e42dep, c172code, labels = c(`nothing` = 5)) 71 | # see data frame, with selected variables having new labels 72 | get_labels(x) 73 | } 74 | 75 | x <- add_labels(efc$e42dep, labels = c(`nothing` = 5, `zero value` = 0)) 76 | get_labels(x, values = "p") 77 | 78 | # replace old value labels 79 | x <- add_labels( 80 | efc$e42dep, 81 | labels = c(`not so dependent` = 4, `lorem ipsum` = 5) 82 | ) 83 | get_labels(x, values = "p") 84 | 85 | # replace specific missing value (tagged NA) 86 | if (require("haven")) { 87 | x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), 88 | c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 89 | "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) 90 | # get current NA values 91 | x 92 | # tagged NA(c) has currently the value label "First", will be 93 | # replaced by "Second" now. 94 | replace_labels(x, labels = c("Second" = tagged_na("c"))) 95 | } 96 | 97 | 98 | # remove_labels() 99 | 100 | x <- remove_labels(efc$e42dep, labels = 2) 101 | get_labels(x, values = "p") 102 | 103 | x <- remove_labels(efc$e42dep, labels = "independent") 104 | get_labels(x, values = "p") 105 | 106 | if (require("haven")) { 107 | x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), 108 | c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 109 | "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) 110 | # get current NA values 111 | get_na(x) 112 | get_na(remove_labels(x, labels = tagged_na("c"))) 113 | } 114 | \dontshow{\}) # examplesIf} 115 | } 116 | \seealso{ 117 | \code{\link{set_label}} to manually set variable labels or 118 | \code{\link{get_label}} to get variable labels; \code{\link{set_labels}} to 119 | add value labels, replacing the existing ones (and removing non-specified 120 | value labels). 121 | } 122 | -------------------------------------------------------------------------------- /man/as_factor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_factor.R 3 | \name{as_factor} 4 | \alias{as_factor} 5 | \alias{to_factor} 6 | \alias{as_factor.data.frame} 7 | \title{Convert variable into factor and keep value labels} 8 | \usage{ 9 | as_factor(x, ...) 10 | 11 | to_factor(x, ...) 12 | 13 | \method{as_factor}{data.frame}(x, ..., add.non.labelled = FALSE) 14 | } 15 | \arguments{ 16 | \item{x}{A vector or data frame.} 17 | 18 | \item{...}{Optional, unquoted names of variables that should be selected for 19 | further processing. Required, if \code{x} is a data frame (and no 20 | vector) and only selected variables from \code{x} should be processed. 21 | You may also use functions like \code{:} or tidyselect's select-helpers. 22 | See 'Examples'.} 23 | 24 | \item{add.non.labelled}{Logical, if \code{TRUE}, non-labelled values also 25 | get value labels.} 26 | } 27 | \value{ 28 | A factor, including variable and value labels. If \code{x} 29 | is a data frame, the complete data frame \code{x} will be returned, 30 | where variables specified in \code{...} are coerced 31 | to factors (including variable and value labels); 32 | if \code{...} is not specified, applies to all variables in the 33 | data frame. 34 | } 35 | \description{ 36 | This function converts a variable into a factor, but preserves 37 | variable and value label attributes. 38 | } 39 | \details{ 40 | \code{as_factor} converts numeric values into a factor with numeric 41 | levels. \code{\link{as_label}}, however, converts a vector into 42 | a factor and uses value labels as factor levels. 43 | } 44 | \note{ 45 | This function is intended for use with vectors that have value and variable 46 | label attributes. Unlike \code{\link{as.factor}}, \code{as_factor} converts 47 | a variable into a factor and preserves the value and variable label attributes. 48 | \cr \cr 49 | Adding label attributes is automatically done by importing data sets 50 | with one of the \code{read_*}-functions, like \code{\link{read_spss}}. 51 | Else, value and variable labels can be manually added to vectors 52 | with \code{\link{set_labels}} and \code{\link{set_label}}. 53 | } 54 | \examples{ 55 | if (require("sjmisc") && require("magrittr")) { 56 | data(efc) 57 | # normal factor conversion, loses value attributes 58 | x <- as.factor(efc$e42dep) 59 | frq(x) 60 | 61 | # factor conversion, which keeps value attributes 62 | x <- as_factor(efc$e42dep) 63 | frq(x) 64 | 65 | # create partially labelled vector 66 | x <- set_labels( 67 | efc$e42dep, 68 | labels = c( 69 | `1` = "independent", 70 | `4` = "severe dependency", 71 | `9` = "missing value" 72 | )) 73 | 74 | # only copy existing value labels 75 | as_factor(x) \%>\% head() 76 | get_labels(as_factor(x), values = "p") 77 | 78 | # also add labels to non-labelled values 79 | as_factor(x, add.non.labelled = TRUE) \%>\% head() 80 | get_labels(as_factor(x, add.non.labelled = TRUE), values = "p") 81 | 82 | 83 | # easily coerce specific variables in a data frame to factor 84 | # and keep other variables, with their class preserved 85 | as_factor(efc, e42dep, e16sex, c172code) \%>\% head() 86 | 87 | # use select-helpers from dplyr-package 88 | if (require("dplyr")) { 89 | as_factor(efc, contains("cop"), c161sex:c175empl) \%>\% head() 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /man/as_label.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_character.R, R/as_label.R 3 | \name{as_character} 4 | \alias{as_character} 5 | \alias{to_character} 6 | \alias{as_character.data.frame} 7 | \alias{as_label} 8 | \alias{to_label} 9 | \alias{as_label.data.frame} 10 | \title{Convert variable into factor with associated value labels} 11 | \usage{ 12 | as_character(x, ...) 13 | 14 | to_character(x, ...) 15 | 16 | \method{as_character}{data.frame}( 17 | x, 18 | ..., 19 | add.non.labelled = FALSE, 20 | prefix = FALSE, 21 | var.label = NULL, 22 | drop.na = TRUE, 23 | drop.levels = FALSE, 24 | keep.labels = FALSE 25 | ) 26 | 27 | as_label(x, ...) 28 | 29 | to_label(x, ...) 30 | 31 | \method{as_label}{data.frame}( 32 | x, 33 | ..., 34 | add.non.labelled = FALSE, 35 | prefix = FALSE, 36 | var.label = NULL, 37 | drop.na = TRUE, 38 | drop.levels = FALSE, 39 | keep.labels = FALSE 40 | ) 41 | } 42 | \arguments{ 43 | \item{x}{A vector or data frame.} 44 | 45 | \item{...}{Optional, unquoted names of variables that should be selected for 46 | further processing. Required, if \code{x} is a data frame (and no 47 | vector) and only selected variables from \code{x} should be processed. 48 | You may also use functions like \code{:} or tidyselect's select-helpers. 49 | See 'Examples'.} 50 | 51 | \item{add.non.labelled}{Logical, if \code{TRUE}, values without associated 52 | value label will also be converted to labels (as is). See 'Examples'.} 53 | 54 | \item{prefix}{Logical, if \code{TRUE}, the value labels used as factor levels 55 | or character values will be prefixed with their associated values. See 'Examples'.} 56 | 57 | \item{var.label}{Optional string, to set variable label attribute for the 58 | returned variable (see vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}). 59 | If \code{NULL} (default), variable label attribute of \code{x} will 60 | be used (if present). If empty, variable label attributes will be removed.} 61 | 62 | \item{drop.na}{Logical, if \code{TRUE}, tagged \code{NA} values with value labels 63 | will be converted to regular NA's. Else, tagged \code{NA} values will be replaced 64 | with their value labels. See 'Examples' and \code{\link{get_na}}.} 65 | 66 | \item{drop.levels}{Logical, if \code{TRUE}, unused factor levels will be 67 | dropped (i.e. \code{\link{droplevels}} will be applied before returning 68 | the result).} 69 | 70 | \item{keep.labels}{Logical, if \code{TRUE}, value labels are preserved This 71 | allows users to quickly convert back factors to numeric vectors with 72 | \code{as_numeric()}.} 73 | } 74 | \value{ 75 | A factor with the associated value labels as factor levels. If \code{x} 76 | is a data frame, the complete data frame \code{x} will be returned, 77 | where variables specified in \code{...} are coerced to factors; 78 | if \code{...} is not specified, applies to all variables in the 79 | data frame. \code{as_character()} returns a character vector. 80 | } 81 | \description{ 82 | \code{as_label()} converts (replaces) values of a variable (also of factors 83 | or character vectors) with their associated value labels. Might 84 | be helpful for factor variables. 85 | For instance, if you have a Gender variable with 0/1 value, and associated 86 | labels are male/female, this function would convert all 0 to male and 87 | all 1 to female and returns the new variable as factor. 88 | \code{as_character()} does the same as \code{as_label()}, but returns 89 | a character vector. 90 | } 91 | \details{ 92 | See 'Details' in \code{\link{get_na}}. 93 | } 94 | \note{ 95 | Value label attributes (see \code{\link{get_labels}}) 96 | will be removed when converting variables to factors. 97 | } 98 | \examples{ 99 | \dontshow{if (require("haven")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 100 | data(efc) 101 | print(get_labels(efc)['c161sex']) 102 | head(efc$c161sex) 103 | head(as_label(efc$c161sex)) 104 | 105 | print(get_labels(efc)['e42dep']) 106 | table(efc$e42dep) 107 | table(as_label(efc$e42dep)) 108 | 109 | head(efc$e42dep) 110 | head(as_label(efc$e42dep)) 111 | 112 | # structure of numeric values won't be changed 113 | # by this function, it only applies to labelled vectors 114 | # (typically categorical or factor variables) 115 | 116 | str(efc$e17age) 117 | str(as_label(efc$e17age)) 118 | 119 | 120 | # factor with non-numeric levels 121 | as_label(factor(c("a", "b", "c"))) 122 | 123 | # factor with non-numeric levels, prefixed 124 | x <- factor(c("a", "b", "c")) 125 | x <- set_labels(x, labels = c("ape", "bear", "cat")) 126 | as_label(x, prefix = TRUE) 127 | 128 | 129 | # create vector 130 | x <- c(1, 2, 3, 2, 4, NA) 131 | # add less labels than values 132 | x <- set_labels( 133 | x, 134 | labels = c("yes", "maybe", "no"), 135 | force.labels = FALSE, 136 | force.values = FALSE 137 | ) 138 | 139 | # convert to label w/o non-labelled values 140 | as_label(x) 141 | 142 | # convert to label, including non-labelled values 143 | as_label(x, add.non.labelled = TRUE) 144 | 145 | 146 | # create labelled integer, with missing flag 147 | if (require("haven")) { 148 | x <- labelled( 149 | c(1:3, tagged_na("a", "c", "z"), 4:1, 2:3), 150 | c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 151 | "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) 152 | ) 153 | 154 | # to labelled factor, with missing labels 155 | as_label(x, drop.na = FALSE) 156 | 157 | # to labelled factor, missings removed 158 | as_label(x, drop.na = TRUE) 159 | 160 | # keep missings, and use non-labelled values as well 161 | as_label(x, add.non.labelled = TRUE, drop.na = FALSE) 162 | } 163 | 164 | # convert labelled character to factor 165 | dummy <- c("M", "F", "F", "X") 166 | dummy <- set_labels( 167 | dummy, 168 | labels = c(`M` = "Male", `F` = "Female", `X` = "Refused") 169 | ) 170 | get_labels(dummy,, "p") 171 | as_label(dummy) 172 | 173 | # drop unused factor levels, but preserve variable label 174 | x <- factor(c("a", "b", "c"), levels = c("a", "b", "c", "d")) 175 | x <- set_labels(x, labels = c("ape", "bear", "cat")) 176 | set_label(x) <- "A factor!" 177 | x 178 | as_label(x, drop.levels = TRUE) 179 | 180 | # change variable label 181 | as_label(x, var.label = "New variable label!", drop.levels = TRUE) 182 | 183 | 184 | # convert to numeric and back again, preserving label attributes 185 | # *and* values in numeric vector 186 | x <- c(0, 1, 0, 4) 187 | x <- set_labels(x, labels = c(`null` = 0, `one` = 1, `four` = 4)) 188 | 189 | # to factor 190 | as_label(x) 191 | 192 | # to factor, back to numeric - values are 1, 2 and 3, 193 | # instead of original 0, 1 and 4 194 | as_numeric(as_label(x)) 195 | 196 | # preserve label-attributes when converting to factor, use these attributes 197 | # to restore original numeric values when converting back to numeric 198 | as_numeric(as_label(x, keep.labels = TRUE), use.labels = TRUE) 199 | 200 | 201 | # easily coerce specific variables in a data frame to factor 202 | # and keep other variables, with their class preserved 203 | as_label(efc, e42dep, e16sex, c172code) 204 | \dontshow{\}) # examplesIf} 205 | } 206 | -------------------------------------------------------------------------------- /man/as_labelled.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_labelled.R 3 | \name{as_labelled} 4 | \alias{as_labelled} 5 | \title{Convert vector to labelled class} 6 | \usage{ 7 | as_labelled( 8 | x, 9 | add.labels = FALSE, 10 | add.class = FALSE, 11 | skip.strings = FALSE, 12 | tag.na = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{x}{Variable (vector), \code{data.frame} or \code{list} of variables 17 | that should be converted to \code{\link[haven:labelled]{labelled()}}-class 18 | objects.} 19 | 20 | \item{add.labels}{Logical, if \code{TRUE}, non-labelled values will be 21 | labelled with the corresponding value.} 22 | 23 | \item{add.class}{Logical, if \code{TRUE}, \code{x} preserves its former 24 | \code{class}-attribute and \code{labelled} is added as additional 25 | attribute. If \code{FALSE} (default), all former \code{class}-attributes 26 | will be removed and the class-attribute of \code{x} will only 27 | be \code{labelled}.} 28 | 29 | \item{skip.strings}{Logical, if \code{TRUE}, character vector are not converted 30 | into labelled-vectors. Else, character vectors are converted to factors 31 | vector and the associated values are used as value labels.} 32 | 33 | \item{tag.na}{Logical, if \code{TRUE}, tagged \code{NA} values are replaced 34 | by their associated values. This is required, for instance, when writing 35 | data back to SPSS.} 36 | } 37 | \value{ 38 | \code{x}, as \code{labelled}-class object. 39 | } 40 | \description{ 41 | Converts a (labelled) vector of any class into a \code{labelled} 42 | class vector, resp. adds a \code{labelled} class-attribute. 43 | } 44 | \examples{ 45 | data(efc) 46 | str(efc$e42dep) 47 | 48 | x <- as_labelled(efc$e42dep) 49 | str(x) 50 | 51 | x <- as_labelled(efc$e42dep, add.class = TRUE) 52 | str(x) 53 | 54 | a <- c(1, 2, 4) 55 | x <- as_labelled(a, add.class = TRUE) 56 | str(x) 57 | 58 | data(efc) 59 | x <- set_labels(efc$e42dep, 60 | labels = c(`1` = "independent", `4` = "severe dependency")) 61 | x1 <- as_labelled(x, add.labels = FALSE) 62 | x2 <- as_labelled(x, add.labels = TRUE) 63 | 64 | str(x1) 65 | str(x2) 66 | 67 | get_values(x1) 68 | get_values(x2) 69 | } 70 | -------------------------------------------------------------------------------- /man/as_numeric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_numeric.R 3 | \name{as_numeric} 4 | \alias{as_numeric} 5 | \alias{to_numeric} 6 | \alias{as_numeric.data.frame} 7 | \title{Convert factors to numeric variables} 8 | \usage{ 9 | as_numeric(x, ...) 10 | 11 | to_numeric(x, ...) 12 | 13 | \method{as_numeric}{data.frame}(x, ..., start.at = NULL, keep.labels = TRUE, use.labels = FALSE) 14 | } 15 | \arguments{ 16 | \item{x}{A vector or data frame.} 17 | 18 | \item{...}{Optional, unquoted names of variables that should be selected for 19 | further processing. Required, if \code{x} is a data frame (and no 20 | vector) and only selected variables from \code{x} should be processed. 21 | You may also use functions like \code{:} or tidyselect's select-helpers. 22 | See 'Examples'.} 23 | 24 | \item{start.at}{Starting index, i.e. the lowest numeric value of the variable's 25 | value range. By default, this argument is \code{NULL}, hence the lowest 26 | value of the returned numeric variable corresponds to the lowest factor 27 | level (if factor levels are numeric) or to \code{1} (if factor levels 28 | are not numeric).} 29 | 30 | \item{keep.labels}{Logical, if \code{TRUE}, former factor levels will be added as 31 | value labels. For numeric factor levels, values labels will be used, 32 | if present. See 'Examples' and \code{\link{set_labels}} for more details.} 33 | 34 | \item{use.labels}{Logical, if \code{TRUE} and \code{x} has numeric value labels, 35 | the values defined in the labels (right-hand side of \code{labels}, for instance 36 | \code{labels = c(null = 0, one = 1)}) will be set as numeric values (instead 37 | of consecutive factor level numbers). See 'Examples'.} 38 | } 39 | \value{ 40 | A numeric variable with values ranging either from \code{start.at} to 41 | \code{start.at} + length of factor levels, or to the corresponding 42 | factor levels (if these were numeric). If \code{x} is a data frame, 43 | the complete data frame \code{x} will be returned, where variables 44 | specified in \code{...} are coerced to numeric; if \code{...} is 45 | not specified, applies to all variables in the data frame. 46 | } 47 | \description{ 48 | This function converts (replaces) factor levels with the 49 | related factor level index number, thus the factor is converted to 50 | a numeric variable. 51 | } 52 | \examples{ 53 | data(efc) 54 | test <- as_label(efc$e42dep) 55 | table(test) 56 | 57 | table(as_numeric(test)) 58 | hist(as_numeric(test, start.at = 0)) 59 | 60 | # set lowest value of new variable to "5". 61 | table(as_numeric(test, start.at = 5)) 62 | 63 | # numeric factor keeps values 64 | dummy <- factor(c("3", "4", "6")) 65 | table(as_numeric(dummy)) 66 | 67 | # do not drop unused factor levels 68 | dummy <- ordered(c(rep("No", 5), rep("Maybe", 3)), 69 | levels = c("Yes", "No", "Maybe")) 70 | as_numeric(dummy) 71 | 72 | # non-numeric factor is converted to numeric 73 | # starting at 1 74 | dummy <- factor(c("D", "F", "H")) 75 | table(as_numeric(dummy)) 76 | 77 | # for numeric factor levels, value labels will be used, if present 78 | dummy1 <- factor(c("3", "4", "6")) 79 | dummy1 <- set_labels(dummy1, labels = c("first", "2nd", "3rd")) 80 | dummy1 81 | as_numeric(dummy1) 82 | 83 | # for non-numeric factor levels, these will be used. 84 | # value labels will be ignored 85 | dummy2 <- factor(c("D", "F", "H")) 86 | dummy2 <- set_labels(dummy2, labels = c("first", "2nd", "3rd")) 87 | dummy2 88 | as_numeric(dummy2) 89 | 90 | 91 | # easily coerce specific variables in a data frame to numeric 92 | # and keep other variables, with their class preserved 93 | data(efc) 94 | efc$e42dep <- as.factor(efc$e42dep) 95 | efc$e16sex <- as.factor(efc$e16sex) 96 | efc$e17age <- as.factor(efc$e17age) 97 | 98 | # convert back "sex" and "age" into numeric 99 | head(as_numeric(efc, e16sex, e17age)) 100 | 101 | x <- factor(c("None", "Little", "Some", "Lots")) 102 | x <- set_labels(x, 103 | labels = c(None = "0.5", Little = "1.3", Some = "1.8", Lots = ".2") 104 | ) 105 | x 106 | as_numeric(x) 107 | as_numeric(x, use.labels = TRUE) 108 | as_numeric(x, use.labels = TRUE, keep.labels = FALSE) 109 | } 110 | -------------------------------------------------------------------------------- /man/convert_case.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_case.R 3 | \name{convert_case} 4 | \alias{convert_case} 5 | \title{Generic case conversion for labels} 6 | \usage{ 7 | convert_case(lab, case = NULL, verbose = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{lab}{Character vector that should be case converted.} 11 | 12 | \item{case}{Desired target case. Labels will automatically converted into the 13 | specified character case. See \code{\link[snakecase:to_any_case]{to_any_case()}} for 14 | more details on this argument.} 15 | 16 | \item{verbose}{Toggle warnings and messages on or off.} 17 | 18 | \item{...}{Further arguments passed down to \code{to_any_case()}, 19 | like \code{sep_in} or \code{sep_out}.} 20 | } 21 | \value{ 22 | \code{lab}, with converted case. 23 | } 24 | \description{ 25 | This function wraps \code{to_any_case()} from the \pkg{snakecase} 26 | package with certain defaults for the \code{sep_in} and 27 | \code{sep_out} arguments, used for instance to convert cases in 28 | \code{\link{term_labels}}. 29 | } 30 | \details{ 31 | When calling \code{to_any_case()} from \pkg{snakecase}, the 32 | \code{sep_in} argument is set to \code{"(?\% str() 59 | } 60 | 61 | # copy labels from only some columns 62 | str(copy_labels(efc.sub, efc, e42dep)) 63 | str(copy_labels(efc.sub, efc, -e17age)) 64 | } 65 | -------------------------------------------------------------------------------- /man/efc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/efc.R 3 | \docType{data} 4 | \name{efc} 5 | \alias{efc} 6 | \title{Sample dataset from the EUROFAMCARE project} 7 | \description{ 8 | A SPSS sample data set, imported with the \code{\link{read_spss}} function. 9 | } 10 | \examples{ 11 | # Attach EFC-data 12 | data(efc) 13 | 14 | # Show structure 15 | str(efc) 16 | 17 | # show first rows 18 | head(efc) 19 | 20 | # show variables 21 | \dontrun{ 22 | library(sjPlot) 23 | view_df(efc) 24 | 25 | # show variable labels 26 | get_label(efc) 27 | 28 | # plot efc-data frame summary 29 | sjt.df(efc, altr.row.col = TRUE)} 30 | 31 | } 32 | \keyword{data} 33 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/strengejacke/sjlabelled/b06b4e3e47775355fef19ce824f63a83dc928d04/man/figures/logo.png -------------------------------------------------------------------------------- /man/get_label.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_label.R 3 | \name{get_label} 4 | \alias{get_label} 5 | \title{Retrieve variable label(s) of labelled data} 6 | \usage{ 7 | get_label(x, ..., def.value = NULL, case = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{A data frame with variables that have label attributes (e.g. 11 | from an imported SPSS, SAS or STATA data set, via \code{\link{read_spss}}, 12 | \code{\link{read_sas}} or \code{\link{read_stata}}); a variable 13 | (vector) with variable label attribute; or a \code{list} of variables 14 | with variable label attributes. See 'Examples'.} 15 | 16 | \item{...}{Optional, names of variables, where labels should be retrieved. 17 | Required, if either data is a data frame and no vector, or if only 18 | selected variables from \code{x} should be used in the function. 19 | Convenient argument to work with pipe-chains (see 'Examples').} 20 | 21 | \item{def.value}{Optional, a character string which will be returned as label 22 | if \code{x} has no label attribute. By default, \code{NULL} is returned.} 23 | 24 | \item{case}{Desired target case. Labels will automatically converted into the 25 | specified character case. See \code{\link[snakecase:to_any_case]{to_any_case()}} for 26 | more details on this argument.} 27 | } 28 | \value{ 29 | A named character vector with all variable labels from the data frame or list; 30 | or a simple character vector (of length 1) with the variable label, if \code{x} is a variable. 31 | If \code{x} is a single vector and has no label attribute, the value 32 | of \code{def.value} will be returned (which is by default \code{NULL}). 33 | } 34 | \description{ 35 | This function returns the variable labels of labelled data. 36 | } 37 | \note{ 38 | \code{\link{var_labels}} is an alternative way to set variable labels, 39 | which follows the philosophy of tidyvers API design (data as first argument, 40 | dots as value pairs indicating variables) 41 | } 42 | \examples{ 43 | # import SPSS data set 44 | # mydat <- read_spss("my_spss_data.sav", enc="UTF-8") 45 | 46 | # retrieve variable labels 47 | # mydat.var <- get_label(mydat) 48 | 49 | # retrieve value labels 50 | # mydat.val <- get_labels(mydat) 51 | 52 | data(efc) 53 | 54 | # get variable lable 55 | get_label(efc$e42dep) 56 | 57 | # alternative way 58 | get_label(efc)["e42dep"] 59 | 60 | # 'get_label()' also works within pipe-chains 61 | library(magrittr) 62 | efc \%>\% get_label(e42dep, e16sex) 63 | 64 | # set default values 65 | get_label(mtcars, mpg, cyl, def.value = "no var labels") 66 | 67 | # simple barplot 68 | barplot(table(efc$e42dep)) 69 | # get value labels to annotate barplot 70 | barplot(table(efc$e42dep), 71 | names.arg = get_labels(efc$e42dep), 72 | main = get_label(efc$e42dep)) 73 | 74 | # get labels from multiple variables 75 | get_label(list(efc$e42dep, efc$e16sex, efc$e15relat)) 76 | 77 | # use case conversion for human-readable labels 78 | data(iris) 79 | get_label(iris, def.value = colnames(iris)) 80 | get_label(iris, def.value = colnames(iris), case = "parsed") 81 | } 82 | \seealso{ 83 | See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} 84 | for more details; \code{\link{set_label}} to manually set variable labels or \code{\link{get_labels}} 85 | to get value labels; \code{\link{var_labels}} to set multiple variable 86 | labels at once. 87 | } 88 | -------------------------------------------------------------------------------- /man/get_labels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_labels.R 3 | \name{get_labels} 4 | \alias{get_labels} 5 | \title{Retrieve value labels of labelled data} 6 | \usage{ 7 | get_labels( 8 | x, 9 | attr.only = FALSE, 10 | values = NULL, 11 | non.labelled = FALSE, 12 | drop.na = TRUE, 13 | drop.unused = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{x}{A data frame with variables that have value label attributes (e.g. 18 | from an imported SPSS, SAS or STATA data set, via \code{\link{read_spss}}, 19 | \code{\link{read_sas}} or \code{\link{read_stata}}); a variable 20 | (vector) with value label attributes; or a \code{list} of variables 21 | with values label attributes. If \code{x} has no label attributes, 22 | factor levels are returned. See 'Examples'.} 23 | 24 | \item{attr.only}{Logical, if \code{TRUE}, labels are only searched for 25 | in the the vector's \code{attributes}; else, if \code{attr.only = FALSE} 26 | and \code{x} has no label attributes, factor levels or string values 27 | are returned. See 'Examples'.} 28 | 29 | \item{values}{String, indicating whether the values associated with the 30 | value labels are returned as well. If \code{values = "as.name"} 31 | (or \code{values = "n"}), values are set as \code{names} 32 | attribute of the returned object. If \code{values = "as.prefix"} 33 | (or \code{values = "p"}), values are included as prefix 34 | to each label. See 'Examples'.} 35 | 36 | \item{non.labelled}{Logical, if \code{TRUE}, values without labels will 37 | also be included in the returned labels (see \code{\link{fill_labels}}).} 38 | 39 | \item{drop.na}{Logical, whether labels of tagged NA values (see \code{\link[haven:tagged_na]{tagged_na()}}) 40 | should be included in the return value or not. By default, labelled 41 | (tagged) missing values are not returned. See \code{\link{get_na}} 42 | for more details on tagged NA values.} 43 | 44 | \item{drop.unused}{Logical, if \code{TRUE}, unused labels will be removed from 45 | the return value.} 46 | } 47 | \value{ 48 | Either a list with all value labels from all variables if \code{x} 49 | is a \code{data.frame} or \code{list}; a string with the value 50 | labels, if \code{x} is a variable; 51 | or \code{NULL} if no value label attribute was found. 52 | } 53 | \description{ 54 | This function returns the value labels of labelled data. 55 | } 56 | \examples{ 57 | # import SPSS data set 58 | # mydat <- read_spss("my_spss_data.sav") 59 | 60 | # retrieve variable labels 61 | # mydat.var <- get_label(mydat) 62 | 63 | # retrieve value labels 64 | # mydat.val <- get_labels(mydat) 65 | 66 | data(efc) 67 | get_labels(efc$e42dep) 68 | 69 | # simple barplot 70 | barplot(table(efc$e42dep)) 71 | # get value labels to annotate barplot 72 | barplot(table(efc$e42dep), 73 | names.arg = get_labels(efc$e42dep), 74 | main = get_label(efc$e42dep)) 75 | 76 | # include associated values 77 | get_labels(efc$e42dep, values = "as.name") 78 | 79 | # include associated values 80 | get_labels(efc$e42dep, values = "as.prefix") 81 | 82 | # get labels from multiple variables 83 | get_labels(list(efc$e42dep, efc$e16sex, efc$e15relat)) 84 | 85 | 86 | # create a dummy factor 87 | f1 <- factor(c("hi", "low", "mid")) 88 | # search for label attributes only 89 | get_labels(f1, attr.only = TRUE) 90 | # search for factor levels as well 91 | get_labels(f1) 92 | 93 | # same for character vectors 94 | c1 <- c("higher", "lower", "mid") 95 | # search for label attributes only 96 | get_labels(c1, attr.only = TRUE) 97 | # search for string values as well 98 | get_labels(c1) 99 | 100 | 101 | # create vector 102 | x <- c(1, 2, 3, 2, 4, NA) 103 | # add less labels than values 104 | x <- set_labels(x, labels = c("yes", "maybe", "no"), force.values = FALSE) 105 | # get labels for labelled values only 106 | get_labels(x) 107 | # get labels for all values 108 | get_labels(x, non.labelled = TRUE) 109 | 110 | 111 | # get labels, including tagged NA values 112 | library(haven) 113 | x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), 114 | c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 115 | "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) 116 | # get current NA values 117 | x 118 | get_labels(x, values = "n", drop.na = FALSE) 119 | 120 | 121 | # create vector with unused labels 122 | data(efc) 123 | efc$e42dep <- set_labels( 124 | efc$e42dep, 125 | labels = c("independent" = 1, "dependent" = 4, "not used" = 5) 126 | ) 127 | get_labels(efc$e42dep) 128 | get_labels(efc$e42dep, drop.unused = TRUE) 129 | get_labels(efc$e42dep, non.labelled = TRUE, drop.unused = TRUE) 130 | } 131 | \seealso{ 132 | See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} 133 | for more details; \code{\link{set_labels}} to manually set value 134 | labels, \code{\link{get_label}} to get variable labels and 135 | \code{\link{get_values}} to retrieve the values associated 136 | with value labels. 137 | } 138 | -------------------------------------------------------------------------------- /man/get_na.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_na.R 3 | \name{get_na} 4 | \alias{get_na} 5 | \title{Retrieve tagged NA values of labelled variables} 6 | \usage{ 7 | get_na(x, as.tag = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{Variable (vector) with value label attributes, including 11 | tagged missing values (see \code{\link[haven:tagged_na]{tagged_na()}}); 12 | or a data frame or list with such variables.} 13 | 14 | \item{as.tag}{Logical, if \code{TRUE}, the returned values are not tagged NA's, 15 | but their string representative including the tag value. See 'Examples'.} 16 | } 17 | \value{ 18 | The tagged missing values and their associated value labels from \code{x}, 19 | or \code{NULL} if \code{x} has no tagged missing values. 20 | } 21 | \description{ 22 | This function retrieves tagged NA values and their associated 23 | value labels from a labelled vector. 24 | } 25 | \details{ 26 | Other statistical software packages (like 'SPSS' or 'SAS') allow to define 27 | multiple missing values, e.g. \emph{not applicable}, \emph{refused answer} 28 | or "real" missing. These missing types may be assigned with 29 | different values, so it is possible to distinguish between these 30 | missing types. In R, multiple declared missings cannot be represented 31 | in a similar way with the regular missing values. However, 32 | \code{tagged_na()} values can do this. 33 | Tagged \code{NA}s work exactly like regular R missing values 34 | except that they store one additional byte of information: a tag, 35 | which is usually a letter ("a" to "z") or character number ("0" to "9"). 36 | This allows to indicate different missings. 37 | \cr \cr 38 | Furthermore, see 'Details' in \code{\link{get_values}}. 39 | } 40 | \examples{ 41 | library(haven) 42 | x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), 43 | c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 44 | "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) 45 | # get current NA values 46 | x 47 | get_na(x) 48 | # which NA has which tag? 49 | get_na(x, as.tag = TRUE) 50 | 51 | # replace only the NA, which is tagged as NA(c) 52 | if (require("sjmisc")) { 53 | replace_na(x, value = 2, tagged.na = "c") 54 | get_na(replace_na(x, value = 2, tagged.na = "c")) 55 | 56 | # data frame as input 57 | y <- labelled(c(2:3, 3:1, tagged_na("y"), 4:1), 58 | c("Agreement" = 1, "Disagreement" = 4, "Why" = tagged_na("y"))) 59 | get_na(data.frame(x, y)) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /man/get_values.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_values.R 3 | \name{get_values} 4 | \alias{get_values} 5 | \title{Retrieve values of labelled variables} 6 | \usage{ 7 | get_values(x, sort.val = TRUE, drop.na = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{Variable (vector) with value label attributes; or a data frame or 11 | list with such variables.} 12 | 13 | \item{sort.val}{Logical, if \code{TRUE} (default), values of associated value labels 14 | are sorted.} 15 | 16 | \item{drop.na}{Logical, if \code{TRUE}, tagged NA values are excluded from 17 | the return value. See 'Examples' and \code{\link{get_na}}.} 18 | } 19 | \value{ 20 | The values associated with value labels from \code{x}, 21 | or \code{NULL} if \code{x} has no label attributes. 22 | } 23 | \description{ 24 | This function retrieves the values associated with value labels 25 | from \code{\link[haven]{labelled}} vectors. Data is also labelled 26 | when imported from SPSS, SAS or STATA via \code{\link{read_spss}}, 27 | \code{\link{read_sas}} or \code{\link{read_stata}}. 28 | } 29 | \details{ 30 | \code{\link[haven]{labelled}} vectors are numeric by default (when imported with read-functions 31 | like \code{\link{read_spss}}) and have variable and value labels attributes. 32 | The value labels are associated with the values from the labelled vector. 33 | This function returns the values associated with the vector's value labels, 34 | which may differ from actual values in the vector (e.g. if not all 35 | values have a related label). 36 | } 37 | \examples{ 38 | data(efc) 39 | str(efc$e42dep) 40 | get_values(efc$e42dep) 41 | get_labels(efc$e42dep) 42 | 43 | library(haven) 44 | x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), 45 | c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 46 | "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) 47 | # get all values 48 | get_values(x) 49 | # drop NA 50 | get_values(x, drop.na = TRUE) 51 | 52 | # data frame as input 53 | y <- labelled(c(2:3, 3:1, tagged_na("y"), 4:1), 54 | c("Agreement" = 1, "Disagreement" = 4, "Why" = tagged_na("y"))) 55 | get_values(data.frame(x, y)) 56 | 57 | } 58 | \seealso{ 59 | \code{\link{get_labels}} for getting value labels and \code{\link{get_na}} 60 | to get values for missing values. 61 | } 62 | -------------------------------------------------------------------------------- /man/is_labelled.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/is_labelled.R 3 | \name{is_labelled} 4 | \alias{is_labelled} 5 | \title{Check whether object is of class "labelled"} 6 | \usage{ 7 | is_labelled(x) 8 | } 9 | \arguments{ 10 | \item{x}{An object.} 11 | } 12 | \value{ 13 | Logical, \code{TRUE} if \code{x} inherits from class \code{labelled}, 14 | \code{FALSE} otherwise. 15 | } 16 | \description{ 17 | This function checks whether \code{x} is of class \code{labelled}. 18 | } 19 | -------------------------------------------------------------------------------- /man/label_to_colnames.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/label_to_colnames.R 3 | \name{label_to_colnames} 4 | \alias{label_to_colnames} 5 | \title{Use variable labels as column names} 6 | \usage{ 7 | label_to_colnames(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A data frame.} 11 | 12 | \item{...}{Optional, unquoted names of variables that should be selected for 13 | further processing. Required, if \code{x} is a data frame (and no 14 | vector) and only selected variables from \code{x} should be processed. 15 | You may also use functions like \code{:} or tidyselect's select-helpers. 16 | See 'Examples'.} 17 | } 18 | \value{ 19 | \code{x} with variable labels as column names. For variables without 20 | variable labels, the column name is left unchanged. 21 | } 22 | \description{ 23 | This function sets variable labels as column names, to use "labelled 24 | data" also for those functions that cannot cope with labelled data by default. 25 | } 26 | \examples{ 27 | data(iris) 28 | 29 | iris <- var_labels( 30 | iris, 31 | Petal.Length = "Petal length (cm)", 32 | Petal.Width = "Petal width (cm)" 33 | ) 34 | 35 | colnames(iris) 36 | plot(iris) 37 | 38 | colnames(label_to_colnames(iris)) 39 | plot(label_to_colnames(iris)) 40 | } 41 | -------------------------------------------------------------------------------- /man/read_spss.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read.R 3 | \name{read_spss} 4 | \alias{read_spss} 5 | \alias{read_sas} 6 | \alias{read_stata} 7 | \alias{read_data} 8 | \title{Import data from other statistical software packages} 9 | \usage{ 10 | read_spss( 11 | path, 12 | convert.factors = TRUE, 13 | drop.labels = FALSE, 14 | tag.na = FALSE, 15 | encoding = NULL, 16 | verbose = FALSE, 17 | atomic.to.fac = convert.factors 18 | ) 19 | 20 | read_sas( 21 | path, 22 | path.cat = NULL, 23 | convert.factors = TRUE, 24 | drop.labels = FALSE, 25 | encoding = NULL, 26 | verbose = FALSE, 27 | atomic.to.fac = convert.factors 28 | ) 29 | 30 | read_stata( 31 | path, 32 | convert.factors = TRUE, 33 | drop.labels = FALSE, 34 | encoding = NULL, 35 | verbose = FALSE, 36 | atomic.to.fac = convert.factors 37 | ) 38 | 39 | read_data( 40 | path, 41 | convert.factors = TRUE, 42 | drop.labels = FALSE, 43 | encoding = NULL, 44 | verbose = FALSE, 45 | atomic.to.fac = convert.factors 46 | ) 47 | } 48 | \arguments{ 49 | \item{path}{File path to the data file.} 50 | 51 | \item{convert.factors}{Logical, if \code{TRUE}, categorical variables imported 52 | from the dataset (which are imported as \code{atomic}) will be 53 | converted to factors. Variables are considered as categorical if they have 54 | at least the same number of value labels as unique values. This prevents 55 | that ranges of continuous variables, where - for instance - the minimum and 56 | maximum values are labelled only, will also be converted to factors.} 57 | 58 | \item{drop.labels}{Logical, if \code{TRUE}, unused value labels are removed. See 59 | \code{\link{drop_labels}}.} 60 | 61 | \item{tag.na}{Logical, if \code{TRUE}, missing values are imported 62 | as \code{\link[haven:tagged_na]{tagged_na}} values; else, missing values are 63 | converted to regular \code{NA} (default behaviour).} 64 | 65 | \item{encoding}{The character encoding used for the file. This defaults to the encoding 66 | specified in the file, or UTF-8. Use this argument to override the default 67 | encoding stored in the file.} 68 | 69 | \item{verbose}{Logical, if \code{TRUE}, a progress bar is displayed that indicates 70 | the progress of converting the imported data.} 71 | 72 | \item{atomic.to.fac}{Deprecated, please use `convert.factors` instead.} 73 | 74 | \item{path.cat}{Optional, the file path to the SAS catalog file.} 75 | } 76 | \value{ 77 | A data frame containing the imported, labelled data. Retrieve value labels with 78 | \code{\link{get_labels}} and variable labels with \code{\link{get_label}}. 79 | } 80 | \description{ 81 | Import data from SPSS, SAS or Stata, including NA's, value and variable 82 | labels. 83 | } 84 | \details{ 85 | These read-functions behave slightly differently from \pkg{haven}'s 86 | read-functions: 87 | \itemize{ 88 | \item The vectors in the returned data frame are of class \code{atomic}, not of class \code{labelled}. The labelled-class might cause issues with other packages. 89 | \item When importing SPSS data, variables with user defined missings \emph{won't} be read into \code{labelled_spss} objects, but imported as \emph{tagged NA values}. 90 | } 91 | The \code{convert.factors} option only 92 | converts those variables into factors that are of class \code{atomic} and 93 | which have value labels after import. Atomic vectors without value labels 94 | are considered as continuous and not converted to factors. 95 | } 96 | \note{ 97 | These are wrapper functions for \CRANpkg{haven}'s \code{read_*}-functions. 98 | } 99 | \examples{ 100 | \dontrun{ 101 | # import SPSS data set. uses haven's read function 102 | mydat <- read_spss("my_spss_data.sav") 103 | 104 | # use haven's read function, convert atomic to factor 105 | mydat <- read_spss("my_spss_data.sav", convert.factors = TRUE) 106 | 107 | # retrieve variable labels 108 | mydat.var <- get_label(mydat) 109 | 110 | # retrieve value labels 111 | mydat.val <- get_labels(mydat)} 112 | } 113 | \seealso{ 114 | Vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}. 115 | } 116 | -------------------------------------------------------------------------------- /man/remove_all_labels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove_all_labels.R 3 | \name{remove_all_labels} 4 | \alias{remove_all_labels} 5 | \title{Remove value and variable labels from vector or data frame} 6 | \usage{ 7 | remove_all_labels(x) 8 | } 9 | \arguments{ 10 | \item{x}{Vector or \code{data.frame} with variable and/or value label attributes} 11 | } 12 | \value{ 13 | \code{x} with removed value and variable label attributes. 14 | } 15 | \description{ 16 | This function removes value and variable label attributes 17 | from a vector or data frame. These attributes are typically 18 | added to variables when importing foreign data (see 19 | \code{\link{read_spss}}) or manually adding label attributes 20 | with \code{\link{set_labels}}. 21 | } 22 | \examples{ 23 | data(efc) 24 | str(efc) 25 | str(remove_all_labels(efc)) 26 | } 27 | \seealso{ 28 | See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}, 29 | and \code{\link{copy_labels}} for adding label attributes 30 | (subsetted) data frames. 31 | } 32 | -------------------------------------------------------------------------------- /man/remove_label.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove_label.R 3 | \name{remove_label} 4 | \alias{remove_label} 5 | \title{Remove variable labels from variables} 6 | \usage{ 7 | remove_label(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A vector or data frame.} 11 | 12 | \item{...}{Optional, unquoted names of variables that should be selected for 13 | further processing. Required, if \code{x} is a data frame (and no 14 | vector) and only selected variables from \code{x} should be processed. 15 | You may also use functions like \code{:} or tidyselect's select-helpers. 16 | See 'Examples'.} 17 | } 18 | \value{ 19 | \code{x} with removed variable labels 20 | } 21 | \description{ 22 | Remove variable labels from variables. 23 | } 24 | \examples{ 25 | data(efc) 26 | x <- efc[, 1:5] 27 | get_label(x) 28 | str(x) 29 | 30 | x <- remove_label(x) 31 | get_label(x) 32 | str(x) 33 | } 34 | \seealso{ 35 | \code{\link{set_label}} to manually set variable labels or 36 | \code{\link{get_label}} to get variable labels; \code{\link{set_labels}} to 37 | add value labels, replacing the existing ones (and removing non-specified 38 | value labels). 39 | } 40 | -------------------------------------------------------------------------------- /man/set_label.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/set_label.R, R/var_labels.R 3 | \name{set_label} 4 | \alias{set_label} 5 | \alias{set_label<-} 6 | \alias{var_labels} 7 | \title{Add variable label(s) to variables} 8 | \usage{ 9 | set_label(x, label) 10 | 11 | set_label(x) <- value 12 | 13 | var_labels(x, ...) 14 | } 15 | \arguments{ 16 | \item{x}{Variable (vector), list of variables or a data frame where variables 17 | labels should be added as attribute. For \code{var_labels()}, \code{x} 18 | must be a data frame only.} 19 | 20 | \item{label}{If \code{x} is a vector (single variable), use a single character string with 21 | the variable label for \code{x}. If \code{x} is a data frame, use a 22 | vector with character labels of same length as \code{ncol(x)}. 23 | Use \code{label = ""} to remove labels-attribute from \code{x}, resp. 24 | set any value of vector \code{label} to \code{""} to remove specific variable 25 | label attributes from a data frame's variable.} 26 | 27 | \item{value}{See \code{label}.} 28 | 29 | \item{...}{Pairs of named vectors, where the name equals the variable name, 30 | which should be labelled, and the value is the new variable label.} 31 | } 32 | \value{ 33 | \code{x}, with variable label attribute(s), which contains the 34 | variable name(s); or with removed label-attribute if 35 | \code{label = ""}. 36 | } 37 | \description{ 38 | This function adds variable labels as attribute 39 | (named \code{"label"}) to the variable \code{x}, resp. to a 40 | set of variables in a data frame or a list-object. \code{var_labels()} 41 | is intended for use within pipe-workflows and has a tidyverse-consistent 42 | syntax, including support for quasi-quotation (see 'Examples'). 43 | } 44 | \examples{ 45 | # manually set value and variable labels 46 | dummy <- sample(1:4, 40, replace = TRUE) 47 | dummy <- set_labels(dummy, labels = c("very low", "low", "mid", "hi")) 48 | dummy <- set_label(dummy, label = "Dummy-variable") 49 | 50 | # or use: 51 | # set_label(dummy) <- "Dummy-variable" 52 | 53 | # Set variable labels for data frame 54 | dummy <- data.frame( 55 | a = sample(1:4, 10, replace = TRUE), 56 | b = sample(1:4, 10, replace = TRUE), 57 | c = sample(1:4, 10, replace = TRUE) 58 | ) 59 | dummy <- set_label(dummy, c("Variable A", "Variable B", "Variable C")) 60 | str(dummy) 61 | 62 | # remove one variable label 63 | dummy <- set_label(dummy, c("Variable A", "", "Variable C")) 64 | str(dummy) 65 | 66 | # setting same variable labels to multiple vectors 67 | 68 | # create a set of dummy variables 69 | dummy1 <- sample(1:4, 40, replace = TRUE) 70 | dummy2 <- sample(1:4, 40, replace = TRUE) 71 | dummy3 <- sample(1:4, 40, replace = TRUE) 72 | # put them in list-object 73 | dummies <- list(dummy1, dummy2, dummy3) 74 | # and set variable labels for all three dummies 75 | dummies <- set_label(dummies, c("First Dummy", "2nd Dummy", "Third dummy")) 76 | # see result... 77 | get_label(dummies) 78 | 79 | 80 | # use 'var_labels()' to set labels within a pipe-workflow, and 81 | # when you need "tidyverse-consistent" api. 82 | # Set variable labels for data frame 83 | dummy <- data.frame( 84 | a = sample(1:4, 10, replace = TRUE), 85 | b = sample(1:4, 10, replace = TRUE), 86 | c = sample(1:4, 10, replace = TRUE) 87 | ) 88 | 89 | if (require("magrittr") && require("rlang")) { 90 | dummy \%>\% 91 | var_labels(a = "First variable", c = "third variable") \%>\% 92 | get_label() 93 | 94 | # with quasi-quotation 95 | v1 <- "First variable" 96 | v2 <- "Third variable" 97 | dummy \%>\% 98 | var_labels(a = !!v1, c = !!v2) \%>\% 99 | get_label() 100 | 101 | x1 <- "a" 102 | x2 <- "c" 103 | dummy \%>\% 104 | var_labels(!!x1 := !!v1, !!x2 := !!v2) \%>\% 105 | get_label() 106 | } 107 | } 108 | \seealso{ 109 | See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} 110 | for more details; \code{\link{set_labels}} to manually set value labels or \code{\link{get_label}} 111 | to get variable labels. 112 | } 113 | -------------------------------------------------------------------------------- /man/set_labels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/set_labels.R, R/val_labels.R 3 | \name{set_labels} 4 | \alias{set_labels} 5 | \alias{val_labels} 6 | \title{Add value labels to variables} 7 | \usage{ 8 | set_labels( 9 | x, 10 | ..., 11 | labels, 12 | force.labels = FALSE, 13 | force.values = TRUE, 14 | drop.na = TRUE 15 | ) 16 | 17 | val_labels(x, ..., force.labels = FALSE, force.values = TRUE, drop.na = TRUE) 18 | } 19 | \arguments{ 20 | \item{x}{A vector or data frame.} 21 | 22 | \item{...}{For \code{set_labels()}, Optional, unquoted names of variables that should be selected for 23 | further processing. Required, if \code{x} is a data frame (and no 24 | vector) and only selected variables from \code{x} should be processed. 25 | You may also use functions like \code{:} or tidyselect's 26 | select-helpers. \cr \cr For \code{val_labels()}, 27 | pairs of named vectors, where the name equals the variable name, which 28 | should be labelled, and the value is the new variable label. \code{val_labels()} 29 | also supports quasi-quotation (see 'Examples').} 30 | 31 | \item{labels}{(Named) character vector of labels that will be added to \code{x} as 32 | \code{"labels"} or \code{"value.labels"} attribute. 33 | \itemize{ 34 | \item if \code{labels} is \strong{not} a \emph{named vector}, its length must equal the value range of \code{x}, i.e. if \code{x} has values from 1 to 3, \code{labels} should have a length of 3; 35 | \item if length of \code{labels} is intended to differ from length of unique values of \code{x}, a warning is given. You can still add missing labels with the \code{force.labels} or \code{force.values} arguments; see 'Note'. 36 | \item if \code{labels} \strong{is} a \emph{named vector}, value labels will be set accordingly, even if \code{x} has a different length of unique values. See 'Note' and 'Examples'. 37 | \item if \code{x} is a data frame, \code{labels} may also be a \code{list} of (named) character vectors; 38 | \item if \code{labels} is a \code{list}, it must have the same length as number of columns of \code{x}; 39 | \item if \code{labels} is a vector and \code{x} is a data frame, \code{labels} will be applied to each column of \code{x}. 40 | } 41 | Use \code{labels = ""} to remove labels-attribute from \code{x}.} 42 | 43 | \item{force.labels}{Logical; if \code{TRUE}, all \code{labels} are added as value label 44 | attribute, even if \code{x} has less unique values then length of \code{labels} 45 | or if \code{x} has a smaller range then length of \code{labels}. See 'Examples'. 46 | This parameter will be ignored, if \code{labels} is a named vector.} 47 | 48 | \item{force.values}{Logical, if \code{TRUE} (default) and \code{labels} has less 49 | elements than unique values of \code{x}, additional values not covered 50 | by \code{labels} will be added as label as well. See 'Examples'. 51 | This parameter will be ignored, if \code{labels} is a named vector.} 52 | 53 | \item{drop.na}{Logical, whether existing value labels of tagged NA values 54 | (see \code{\link[haven:tagged_na]{tagged_na}}) should be removed (\code{drop.na = TRUE}, 55 | the default) or preserved (\code{drop.na = FALSE}). 56 | See \code{\link{get_na}} for more details on tagged NA values.} 57 | } 58 | \value{ 59 | \code{x} with value label attributes; or with removed label-attributes if 60 | \code{labels = ""}. If \code{x} is a data frame, the complete data 61 | frame \code{x} will be returned, with removed or added to variables 62 | specified in \code{...}; if \code{...} is not specified, applies 63 | to all variables in the data frame. 64 | } 65 | \description{ 66 | This function adds labels as attribute (named \code{"labels"}) 67 | to a variable or vector \code{x}, resp. to a set of variables in a 68 | data frame or a list-object. A use-case is, for instance, the 69 | \pkg{sjPlot}-package, which supports labelled data and automatically 70 | assigns labels to axes or legends in plots or to be used in tables. 71 | \code{val_labels()} is intended for use within pipe-workflows and has a 72 | tidyverse-consistent syntax, including support for quasi-quotation 73 | (see 'Examples'). 74 | } 75 | \note{ 76 | \itemize{ 77 | \item if \code{labels} is a named vector, \code{force.labels} and \code{force.values} will be ignored, and only values defined in \code{labels} will be labelled; 78 | \item if \code{x} has less unique values than \code{labels}, redundant labels will be dropped, see \code{force.labels}; 79 | \item if \code{x} has more unique values than \code{labels}, only matching values will be labelled, other values remain unlabelled, see \code{force.values}; 80 | } 81 | If you only want to change partial value labels, use \code{\link{add_labels}} instead. 82 | Furthermore, see 'Note' in \code{\link{get_labels}}. 83 | } 84 | \examples{ 85 | \dontshow{if (require("sjmisc") && require("haven")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 86 | dummy <- sample(1:4, 40, replace = TRUE) 87 | frq(dummy) 88 | 89 | dummy <- set_labels(dummy, labels = c("very low", "low", "mid", "hi")) 90 | frq(dummy) 91 | 92 | # assign labels with named vector 93 | dummy <- sample(1:4, 40, replace = TRUE) 94 | dummy <- set_labels(dummy, labels = c("very low" = 1, "very high" = 4)) 95 | frq(dummy) 96 | 97 | # force using all labels, even if not all labels 98 | # have associated values in vector 99 | x <- c(2, 2, 3, 3, 2) 100 | # only two value labels 101 | x <- set_labels(x, labels = c("1", "2", "3")) 102 | x 103 | frq(x) 104 | 105 | # all three value labels 106 | x <- set_labels(x, labels = c("1", "2", "3"), force.labels = TRUE) 107 | x 108 | frq(x) 109 | 110 | # create vector 111 | x <- c(1, 2, 3, 2, 4, NA) 112 | # add less labels than values 113 | x <- set_labels(x, labels = c("yes", "maybe", "no"), force.values = FALSE) 114 | x 115 | # add all necessary labels 116 | x <- set_labels(x, labels = c("yes", "maybe", "no"), force.values = TRUE) 117 | x 118 | 119 | # set labels and missings 120 | x <- c(1, 1, 1, 2, 2, -2, 3, 3, 3, 3, 3, 9) 121 | x <- set_labels(x, labels = c("Refused", "One", "Two", "Three", "Missing")) 122 | x 123 | set_na(x, na = c(-2, 9)) 124 | 125 | 126 | x <- labelled( 127 | c(1:3, tagged_na("a", "c", "z"), 4:1), 128 | c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 129 | "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) 130 | ) 131 | # get current NA values 132 | x 133 | get_na(x) 134 | # lose value labels from tagged NA by default, if not specified 135 | set_labels(x, labels = c("New Three" = 3)) 136 | # do not drop na 137 | set_labels(x, labels = c("New Three" = 3), drop.na = FALSE) 138 | 139 | 140 | # set labels via named vector, 141 | # not using all possible values 142 | data(efc) 143 | get_labels(efc$e42dep) 144 | 145 | x <- set_labels( 146 | efc$e42dep, 147 | labels = c(`independent` = 1, 148 | `severe dependency` = 2, 149 | `missing value` = 9) 150 | ) 151 | get_labels(x, values = "p") 152 | get_labels(x, values = "p", non.labelled = TRUE) 153 | 154 | # labels can also be set for tagged NA value 155 | # create numeric vector 156 | x <- c(1, 2, 3, 4) 157 | # set 2 and 3 as missing, which will automatically set as 158 | # tagged NA by 'set_na()' 159 | x <- set_na(x, na = c(2, 3)) 160 | x 161 | # set label via named vector just for tagged NA(3) 162 | set_labels(x, labels = c(`New Value` = tagged_na("3"))) 163 | 164 | # setting same value labels to multiple vectors 165 | dummies <- data.frame( 166 | dummy1 = sample(1:4, 40, replace = TRUE), 167 | dummy2 = sample(1:4, 40, replace = TRUE), 168 | dummy3 = sample(1:4, 40, replace = TRUE) 169 | ) 170 | 171 | # and set same value labels for two of three variables 172 | test <- set_labels( 173 | dummies, dummy1, dummy2, 174 | labels = c("very low", "low", "mid", "hi") 175 | ) 176 | # see result... 177 | get_labels(test) 178 | 179 | # using quasi-quotation 180 | if (require("rlang") && require("dplyr")) { 181 | dummies <- data.frame( 182 | dummy1 = sample(1:4, 40, replace = TRUE), 183 | dummy2 = sample(1:4, 40, replace = TRUE), 184 | dummy3 = sample(1:4, 40, replace = TRUE) 185 | ) 186 | 187 | x1 <- "dummy1" 188 | x2 <- c("so low", "rather low", "mid", "very hi") 189 | 190 | dummies \%>\% 191 | val_labels( 192 | !!x1 := c("really low", "low", "a bit mid", "hi"), 193 | dummy3 = !!x2 194 | ) \%>\% 195 | get_labels() 196 | 197 | # ... and named vectors to explicitly set value labels 198 | x2 <- c("so low" = 4, "rather low" = 3, "mid" = 2, "very hi" = 1) 199 | dummies \%>\% 200 | val_labels( 201 | !!x1 := c("really low" = 1, "low" = 3, "a bit mid" = 2, "hi" = 4), 202 | dummy3 = !!x2 203 | ) \%>\% get_labels(values = "p") 204 | } 205 | \dontshow{\}) # examplesIf} 206 | } 207 | \seealso{ 208 | See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} 209 | for more details; \code{\link{set_label}} to manually set variable labels or 210 | \code{\link{get_label}} to get variable labels; \code{\link{add_labels}} to 211 | add additional value labels without replacing the existing ones. 212 | } 213 | -------------------------------------------------------------------------------- /man/set_na.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/set_na.R 3 | \name{set_na} 4 | \alias{set_na} 5 | \title{Replace specific values in vector with NA} 6 | \usage{ 7 | set_na(x, ..., na, drop.levels = TRUE, as.tag = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{A vector or data frame.} 11 | 12 | \item{...}{Optional, unquoted names of variables that should be selected for 13 | further processing. Required, if \code{x} is a data frame (and no 14 | vector) and only selected variables from \code{x} should be processed. 15 | You may also use functions like \code{:} or tidyselect's select-helpers. 16 | See 'Examples'.} 17 | 18 | \item{na}{Numeric vector with values that should be replaced with NA values, 19 | or a character vector if values of factors or character vectors should be 20 | replaced. For labelled vectors, may also be the name of a value label. In 21 | this case, the associated values for the value labels in each vector 22 | will be replaced with \code{NA}. \code{na} can also be a named vector. 23 | If \code{as.tag = FALSE}, values will be replaced only in those variables 24 | that are indicated by the value names (see 'Examples').} 25 | 26 | \item{drop.levels}{Logical, if \code{TRUE}, factor levels of values that have 27 | been replaced with \code{NA} are dropped. See 'Examples'.} 28 | 29 | \item{as.tag}{Logical, if \code{TRUE}, values in \code{x} will be replaced 30 | by \code{tagged_na}, else by usual \code{NA} values. Use a named 31 | vector to assign the value label to the tagged NA value (see 'Examples').} 32 | } 33 | \value{ 34 | \code{x}, with all values in \code{na} being replaced by \code{NA}. 35 | If \code{x} is a data frame, the complete data frame \code{x} will 36 | be returned, with NA's set for variables specified in \code{...}; 37 | if \code{...} is not specified, applies to all variables in the 38 | data frame. 39 | } 40 | \description{ 41 | This function replaces specific values of variables with \code{NA}. 42 | } 43 | \details{ 44 | \code{set_na()} converts all values defined in \code{na} with 45 | a related \code{NA} or tagged NA value (see \code{\link[haven:tagged_na]{tagged_na()}}). 46 | Tagged \code{NA}s work exactly like regular R missing values 47 | except that they store one additional byte of information: a tag, 48 | which is usually a letter ("a" to "z") or character number ("0" to "9"). 49 | \cr \cr 50 | \strong{Different NA values for different variables} 51 | \cr \cr 52 | If \code{na} is a named vector \emph{and} \code{as.tag = FALSE}, the names 53 | indicate variable names, and the associated values indicate those values 54 | that should be replaced by \code{NA} in the related variable. For instance, 55 | \code{set_na(x, na = c(v1 = 4, v2 = 3))} would replace all 4 in \code{v1} 56 | with \code{NA} and all 3 in \code{v2} with \code{NA}. 57 | \cr \cr 58 | If \code{na} is a named list \emph{and} \code{as.tag = FALSE}, it is possible 59 | to replace different multiple values by \code{NA} for different variables 60 | separately. For example, \code{set_na(x, na = list(v1 = c(1, 4), v2 = 5:7))} 61 | would replace all 1 and 4 in \code{v1} with \code{NA} and all 5 to 7 in 62 | \code{v2} with \code{NA}. 63 | \cr \cr 64 | Furthermore, see also 'Details' in \code{\link{get_na}}. 65 | } 66 | \note{ 67 | Labels from values that are replaced with NA and no longer used will be 68 | removed from \code{x}, however, other value and variable label 69 | attributes are preserved. For more details on labelled data, 70 | see vignette \href{https://cran.r-project.org/package=sjlabelled/vignettes/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}. 71 | } 72 | \examples{ 73 | if (require("sjmisc") && require("dplyr") && require("haven")) { 74 | # create random variable 75 | dummy <- sample(1:8, 100, replace = TRUE) 76 | # show value distribution 77 | table(dummy) 78 | # set value 1 and 8 as missings 79 | dummy <- set_na(dummy, na = c(1, 8)) 80 | # show value distribution, including missings 81 | table(dummy, useNA = "always") 82 | 83 | # add named vector as further missing value 84 | set_na(dummy, na = c("Refused" = 5), as.tag = TRUE) 85 | 86 | # see different missing types 87 | print_tagged_na(set_na(dummy, na = c("Refused" = 5), as.tag = TRUE)) 88 | 89 | 90 | # create sample data frame 91 | dummy <- data.frame(var1 = sample(1:8, 100, replace = TRUE), 92 | var2 = sample(1:10, 100, replace = TRUE), 93 | var3 = sample(1:6, 100, replace = TRUE)) 94 | # set value 2 and 4 as missings 95 | dummy \%>\% set_na(na = c(2, 4)) \%>\% head() 96 | dummy \%>\% set_na(na = c(2, 4), as.tag = TRUE) \%>\% get_na() 97 | dummy \%>\% set_na(na = c(2, 4), as.tag = TRUE) \%>\% get_values() 98 | 99 | data(efc) 100 | dummy <- data.frame( 101 | var1 = efc$c82cop1, 102 | var2 = efc$c83cop2, 103 | var3 = efc$c84cop3 104 | ) 105 | # check original distribution of categories 106 | lapply(dummy, table, useNA = "always") 107 | # set 3 to NA for two variables 108 | lapply(set_na(dummy, var1, var3, na = 3), table, useNA = "always") 109 | 110 | 111 | # if 'na' is a named vector *and* 'as.tag = FALSE', different NA-values 112 | # can be specified for each variable 113 | set.seed(1) 114 | dummy <- data.frame( 115 | var1 = sample(1:8, 10, replace = TRUE), 116 | var2 = sample(1:10, 10, replace = TRUE), 117 | var3 = sample(1:6, 10, replace = TRUE) 118 | ) 119 | dummy 120 | 121 | # Replace "3" in var1 with NA, "5" in var2 and "6" in var3 122 | set_na(dummy, na = c(var1 = 3, var2 = 5, var3 = 6)) 123 | 124 | # if 'na' is a named list *and* 'as.tag = FALSE', for each 125 | # variable different multiple NA-values can be specified 126 | set_na(dummy, na = list(var1 = 1:3, var2 = c(7, 8), var3 = 6)) 127 | 128 | 129 | # drop unused factor levels when being set to NA 130 | x <- factor(c("a", "b", "c")) 131 | x 132 | set_na(x, na = "b", as.tag = TRUE) 133 | set_na(x, na = "b", drop.levels = FALSE, as.tag = TRUE) 134 | 135 | # set_na() can also remove a missing by defining the value label 136 | # of the value that should be replaced with NA. This is in particular 137 | # helpful if a certain category should be set as NA, however, this category 138 | # is assigned with different values accross variables 139 | x1 <- sample(1:4, 20, replace = TRUE) 140 | x2 <- sample(1:7, 20, replace = TRUE) 141 | x1 <- set_labels(x1, labels = c("Refused" = 3, "No answer" = 4)) 142 | x2 <- set_labels(x2, labels = c("Refused" = 6, "No answer" = 7)) 143 | 144 | tmp <- data.frame(x1, x2) 145 | get_labels(tmp) 146 | table(tmp, useNA = "always") 147 | 148 | get_labels(set_na(tmp, na = "No answer")) 149 | table(set_na(tmp, na = "No answer"), useNA = "always") 150 | 151 | # show values 152 | tmp 153 | set_na(tmp, na = c("Refused", "No answer")) 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /man/sjlabelled-package.Rd: -------------------------------------------------------------------------------- 1 | \encoding{UTF-8} 2 | \name{sjlabelled-package} 3 | \alias{sjlabelled-package} 4 | \alias{sjlabelled} 5 | \docType{package} 6 | \title{Labelled Data Utility Functions} 7 | \description{ 8 | 9 | \strong{Purpose of this package} 10 | 11 | Collection of miscellaneous utility functions (especially intended for people coming from other statistical software packages like 'SPSS', and/or who are new to R), supporting following common tasks when working with labelled data: 12 | \itemize{ 13 | \item Reading and writing data between R and other statistical software packages like 'SPSS', 'SAS' or 'Stata' 14 | \item Easy ways to get, set and change value and variable label attributes, to convert labelled vectors into factors (and vice versa), or to deal with multiple declared missing values etc. 15 | } 16 | 17 | } 18 | \author{ 19 | Daniel Lüdecke \email{d.luedecke@uke.de} 20 | } 21 | -------------------------------------------------------------------------------- /man/term_labels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_model_labels.R 3 | \name{term_labels} 4 | \alias{term_labels} 5 | \alias{get_term_labels} 6 | \alias{response_labels} 7 | \alias{get_dv_labels} 8 | \title{Retrieve labels of model terms from regression models} 9 | \usage{ 10 | term_labels( 11 | models, 12 | mark.cat = FALSE, 13 | case = NULL, 14 | prefix = c("none", "varname", "label"), 15 | ... 16 | ) 17 | 18 | get_term_labels( 19 | models, 20 | mark.cat = FALSE, 21 | case = NULL, 22 | prefix = c("none", "varname", "label"), 23 | ... 24 | ) 25 | 26 | response_labels(models, case = NULL, multi.resp = FALSE, mv = FALSE, ...) 27 | 28 | get_dv_labels(models, case = NULL, multi.resp = FALSE, mv = FALSE, ...) 29 | } 30 | \arguments{ 31 | \item{models}{One or more fitted regression models. May also be glm's or 32 | mixed models.} 33 | 34 | \item{mark.cat}{Logical, if \code{TRUE}, the returned vector has an 35 | attribute with logical values, which indicate whether a label indicates 36 | the value from a factor category (attribute value is \code{TRUE}) or 37 | a term's variable labels (attribute value is \code{FALSE}).} 38 | 39 | \item{case}{Desired target case. Labels will automatically converted into the 40 | specified character case. See \code{\link[snakecase:to_any_case]{to_any_case()}} for 41 | more details on this argument.} 42 | 43 | \item{prefix}{Indicates whether the value labels of categorical variables 44 | should be prefixed, e.g. with the variable name or variable label. 45 | May be abbreviated. See 'Examples',} 46 | 47 | \item{...}{Further arguments passed down to \code{to_any_case()}, 48 | like \code{preprocess} or \code{postprocess}.} 49 | 50 | \item{mv, multi.resp}{Logical, if \code{TRUE} and \code{models} is a multivariate 51 | response model from a \code{brmsfit} object, then the labels for each 52 | dependent variable (multiple responses) are returned.} 53 | } 54 | \value{ 55 | For \code{term_labels()}, a (named) character vector with 56 | variable labels of all model terms, which can be used, for instance, 57 | as axis labels to annotate plots. \cr \cr For \code{response_labels()}, 58 | a character vector with variable labels from all dependent variables 59 | of \code{models}. 60 | } 61 | \description{ 62 | This function retrieves variable labels from model terms. In case 63 | of categorical variables, where one variable has multiple dummies, 64 | variable name and category value is returned. 65 | } 66 | \details{ 67 | Typically, the variable labels from model terms are returned. However, 68 | for categorical terms that have estimates for each category, the 69 | value labels are returned as well. As the return value is a named 70 | vector, you can easily use it with \pkg{ggplot2}'s \code{scale_*()} 71 | functions to annotate plots. 72 | } 73 | \examples{ 74 | # use data set with labelled data 75 | data(efc) 76 | 77 | fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) 78 | term_labels(fit) 79 | 80 | # make "education" categorical 81 | if (require("sjmisc")) { 82 | efc$c172code <- to_factor(efc$c172code) 83 | fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) 84 | term_labels(fit) 85 | 86 | # prefix value of categorical variables with variable name 87 | term_labels(fit, prefix = "varname") 88 | 89 | # prefix value of categorical variables with value label 90 | term_labels(fit, prefix = "label") 91 | 92 | # get label of dv 93 | response_labels(fit) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /man/tidy_labels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidy_labels.R 3 | \name{tidy_labels} 4 | \alias{tidy_labels} 5 | \title{Repair value labels} 6 | \usage{ 7 | tidy_labels(x, ..., sep = "_", remove = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{A vector or data frame.} 11 | 12 | \item{...}{Optional, unquoted names of variables that should be selected for 13 | further processing. Required, if \code{x} is a data frame (and no 14 | vector) and only selected variables from \code{x} should be processed. 15 | You may also use functions like \code{:} or tidyselect's select-helpers. 16 | See 'Examples'.} 17 | 18 | \item{sep}{String that will be used to separate the suffixed value from the 19 | old label when creating the new value label.} 20 | 21 | \item{remove}{Logical, if \code{TRUE}, the original, duplicated value label will 22 | be replaced by the value (i.e. the value is not the suffix of the 23 | value label, but will become the value label itself). The 24 | \code{sep}-argument will be ignored in such cases.} 25 | } 26 | \value{ 27 | \code{x}, with "repaired" (unique) value labels for each variable. 28 | } 29 | \description{ 30 | Duplicated value labels in variables may cause troubles when 31 | saving labelled data, or computing cross tabs (cf. 32 | \code{sjmisc::flat_table()} or \code{sjPlot::plot_xtab()}). 33 | \code{tidy_labels()} repairs duplicated value labels by suffixing 34 | them with the associated value. 35 | } 36 | \examples{ 37 | if (require("sjmisc")) { 38 | set.seed(123) 39 | x <- set_labels( 40 | sample(1:5, size = 20, replace = TRUE), 41 | labels = c("low" = 1, ".." = 2, ".." = 3, ".." = 4, "high" = 5) 42 | ) 43 | frq(x) 44 | 45 | z <- tidy_labels(x) 46 | frq(z) 47 | 48 | z <- tidy_labels(x, sep = ".") 49 | frq(z) 50 | 51 | z <- tidy_labels(x, remove = TRUE) 52 | frq(z) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /man/unlabel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/unlabel.R 3 | \name{unlabel} 4 | \alias{unlabel} 5 | \title{Convert labelled vectors into normal classes} 6 | \usage{ 7 | unlabel(x, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{A data frame, which contains \code{labelled} class 11 | vectors or a single vector of class \code{labelled}.} 12 | 13 | \item{verbose}{Logical, if \code{TRUE}, a progress bar is displayed that indicates 14 | the progress of converting the imported data.} 15 | } 16 | \value{ 17 | A data frame or single vector (depending on \code{x}) with common object classes. 18 | } 19 | \description{ 20 | This function converts \code{labelled} class vectors 21 | into a generic data format, which means that simply all \code{labelled} 22 | class attributes will be removed, so all vectors / variables will most 23 | likely become \code{atomic}. 24 | } 25 | \note{ 26 | This function is currently only used to avoid possible compatibility issues 27 | with \code{\link[haven:labelled]{labelled}} class vectors. Some known issues with 28 | \code{labelled} class vectors have already been fixed, so 29 | it might be that this function will become redundant in the future. 30 | } 31 | -------------------------------------------------------------------------------- /man/write_spss.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/write.R 3 | \name{write_spss} 4 | \alias{write_spss} 5 | \alias{write_stata} 6 | \alias{write_sas} 7 | \title{Write data to other statistical software packages} 8 | \usage{ 9 | write_spss(x, path, drop.na = FALSE, compress = FALSE) 10 | 11 | write_stata(x, path, drop.na = FALSE, version = 14) 12 | 13 | write_sas(x, path, drop.na = FALSE) 14 | } 15 | \arguments{ 16 | \item{x}{A data frame that should be saved as file.} 17 | 18 | \item{path}{File path of the output file.} 19 | 20 | \item{drop.na}{Logical, if \code{TRUE}, tagged \code{NA} values with value labels 21 | will be converted to regular NA's. Else, tagged \code{NA} values will be replaced 22 | with their value labels. See 'Examples' and \code{\link{get_na}}.} 23 | 24 | \item{compress}{Logical, if \code{TRUE} and a SPSS-file should be created, 25 | saves \code{x} in \code{zsav} (i.e. compressed SPSS) format.} 26 | 27 | \item{version}{File version to use. Supports versions 8-14.} 28 | } 29 | \description{ 30 | These functions write the content of a data frame to an SPSS, SAS or 31 | Stata-file. 32 | } 33 | -------------------------------------------------------------------------------- /man/zap_labels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/drop_labels.R, R/fill_labels.R, R/zap_labels.R 3 | \name{drop_labels} 4 | \alias{drop_labels} 5 | \alias{fill_labels} 6 | \alias{zap_labels} 7 | \alias{zap_unlabelled} 8 | \title{Drop, add or convert (non-)labelled values} 9 | \usage{ 10 | drop_labels(x, ..., drop.na = TRUE) 11 | 12 | fill_labels(x, ...) 13 | 14 | zap_labels(x, ...) 15 | 16 | zap_unlabelled(x, ...) 17 | } 18 | \arguments{ 19 | \item{x}{(partially) \code{\link[haven:labelled]{labelled()}} vector or a data frame 20 | with such vectors.} 21 | 22 | \item{...}{Optional, unquoted names of variables that should be selected for 23 | further processing. Required, if \code{x} is a data frame (and no 24 | vector) and only selected variables from \code{x} should be processed. 25 | You may also use functions like \code{:} or tidyselect's select-helpers. 26 | See 'Examples'.} 27 | 28 | \item{drop.na}{Logical, whether existing value labels of tagged NA values 29 | (see \code{\link[haven:tagged_na]{tagged_na}}) should be removed (\code{drop.na = TRUE}, 30 | the default) or preserved (\code{drop.na = FALSE}). 31 | See \code{\link{get_na}} for more details on tagged NA values.} 32 | } 33 | \value{ 34 | \itemize{ 35 | \item For \code{zap_labels()}, \code{x}, where all labelled values are converted to \code{NA}. 36 | \item For \code{zap_unlabelled()}, \code{x}, where all non-labelled values are converted to \code{NA}. 37 | \item For \code{drop_labels()}, \code{x}, where value labels for non-existing values are removed. 38 | \item For \code{fill_labels()}, \code{x}, where labels for non-labelled values are added. 39 | } 40 | If \code{x} is a data frame, the complete data frame \code{x} will be 41 | returned, with variables specified in \code{...} being converted; 42 | if \code{...} is not specified, applies to all variables in the 43 | data frame. 44 | } 45 | \description{ 46 | For (partially) labelled vectors, \code{zap_labels()} will replace 47 | all values that have a value label attribute with \code{NA}; 48 | \code{zap_unlabelled()}, as counterpart, will replace all values 49 | that \emph{don't} have a value label attribute with \code{NA}. 50 | \cr \cr 51 | \code{drop_labels()} drops all value labels for unused values, 52 | i.e. values that are not present in a vector. \code{fill_labels()} is the 53 | counterpart to \code{drop_labels()} and adds value labels to 54 | a partially labelled vector, i.e. if not all values are 55 | labelled, non-labelled values get labels. 56 | } 57 | \examples{ 58 | if (require("sjmisc") && require("dplyr")) { 59 | 60 | # zap_labels() ---- 61 | 62 | data(efc) 63 | str(efc$e42dep) 64 | 65 | x <- set_labels( 66 | efc$e42dep, 67 | labels = c("independent" = 1, "severe dependency" = 4) 68 | ) 69 | table(x) 70 | get_values(x) 71 | str(x) 72 | 73 | # zap all labelled values 74 | table(zap_labels(x)) 75 | get_values(zap_labels(x)) 76 | str(zap_labels(x)) 77 | 78 | # zap all unlabelled values 79 | table(zap_unlabelled(x)) 80 | get_values(zap_unlabelled(x)) 81 | str(zap_unlabelled(x)) 82 | 83 | # in a pipe-workflow 84 | efc \%>\% 85 | select(c172code, e42dep) \%>\% 86 | set_labels( 87 | e42dep, 88 | labels = c("independent" = 1, "severe dependency" = 4) 89 | ) \%>\% 90 | zap_labels() 91 | 92 | 93 | # drop_labels() ---- 94 | 95 | rp <- rec_pattern(1, 100) 96 | rp 97 | 98 | # sample data 99 | data(efc) 100 | # recode carers age into groups of width 5 101 | x <- rec(efc$c160age, rec = rp$pattern) 102 | # add value labels to new vector 103 | x <- set_labels(x, labels = rp$labels) 104 | 105 | # watch result. due to recode-pattern, we have age groups with 106 | # no observations (zero-counts) 107 | frq(x) 108 | # now, let's drop zero's 109 | frq(drop_labels(x)) 110 | 111 | # drop labels, also drop NA value labels, then also zap tagged NA 112 | if (require("haven")) { 113 | x <- labelled(c(1:3, tagged_na("z"), 4:1), 114 | c("Agreement" = 1, "Disagreement" = 4, "Unused" = 5, 115 | "Not home" = tagged_na("z"))) 116 | x 117 | drop_labels(x, drop.na = FALSE) 118 | drop_labels(x) 119 | zap_na_tags(drop_labels(x)) 120 | 121 | # fill_labels() ---- 122 | 123 | # create labelled integer, with tagged missings 124 | x <- labelled( 125 | c(1:3, tagged_na("a", "c", "z"), 4:1), 126 | c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 127 | "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) 128 | ) 129 | # get current values and labels 130 | x 131 | get_labels(x) 132 | 133 | fill_labels(x) 134 | get_labels(fill_labels(x)) 135 | # same as 136 | get_labels(x, non.labelled = TRUE) 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /man/zap_na_tags.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zap_labels.R 3 | \name{zap_na_tags} 4 | \alias{zap_na_tags} 5 | \title{Convert tagged NA values into regular NA} 6 | \usage{ 7 | zap_na_tags(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{\link[haven:labelled]{labelled()}} vector with \code{tagged_na} 11 | values, or a data frame with such vectors.} 12 | 13 | \item{...}{Optional, unquoted names of variables that should be selected for 14 | further processing. Required, if \code{x} is a data frame (and no 15 | vector) and only selected variables from \code{x} should be processed. 16 | You may also use functions like \code{:} or tidyselect's select-helpers. 17 | See 'Examples'.} 18 | } 19 | \value{ 20 | \code{x}, where all \code{tagged_na} values are converted to \code{NA}. 21 | } 22 | \description{ 23 | Replaces all \code{\link[haven:tagged_na]{tagged_na()}} values with 24 | regular \code{NA}. 25 | } 26 | \examples{ 27 | if (require("haven")) { 28 | x <- labelled( 29 | c(1:3, tagged_na("a", "c", "z"), 4:1), 30 | c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), 31 | "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) 32 | ) 33 | # get current NA values 34 | x 35 | get_na(x) 36 | zap_na_tags(x) 37 | get_na(zap_na_tags(x)) 38 | 39 | # also works with non-labelled vector that have tagged NA values 40 | x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) 41 | haven::print_tagged_na(x) 42 | haven::print_tagged_na(zap_na_tags(x)) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /sjlabelled.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | StripTrailingWhitespace: Yes 16 | 17 | BuildType: Package 18 | PackageUseDevtools: Yes 19 | PackageInstallArgs: --no-multiarch --with-keep.source 20 | PackageCheckArgs: --as-cran 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /sjlabelled.code-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "folders": [ 3 | { 4 | "path": "." 5 | } 6 | ] 7 | } -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(sjlabelled) 3 | 4 | test_check("sjlabelled") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-as_numeric.R: -------------------------------------------------------------------------------- 1 | context("sjlabelled, as_numeric") 2 | 3 | library(sjlabelled) 4 | 5 | test_that("as_numeric", { 6 | expect_equal(as_numeric(factor(c(0,1,2)), keep.labels = FALSE), c(0,1,2)) 7 | expect_equal(as_numeric(factor(c(2,3,4)), keep.labels = FALSE), c(2,3,4)) 8 | expect_equal(as_numeric(factor(c("a", "b", "c")), keep.labels = FALSE), c(1,2,3)) 9 | expect_equal(as_numeric(factor(c("d", "e", "f")), keep.labels = FALSE), c(1,2,3)) 10 | }) 11 | 12 | test_that("as_numeric", { 13 | expect_equal(as_numeric(factor(c(0,1,2)), start.at = 4, keep.labels = FALSE), c(4,5,6)) 14 | expect_equal(as_numeric(factor(c(2,3,4)), start.at = 4, keep.labels = FALSE), c(4,5,6)) 15 | expect_equal(as_numeric(factor(c("a", "b", "c")), start.at = 4, keep.labels = FALSE), c(4,5,6)) 16 | expect_equal(as_numeric(factor(c("d", "e", "f")), start.at = 4, keep.labels = FALSE), c(4,5,6)) 17 | }) 18 | -------------------------------------------------------------------------------- /tests/testthat/test-remove_labels.R: -------------------------------------------------------------------------------- 1 | test_that("remove_labels", { 2 | skip_if_not_installed("haven") 3 | z <- factor(LETTERS[3:1], ordered = TRUE) 4 | z <- sjlabelled::set_labels(z, labels = c("yes", "maybe", "no")) 5 | x <- sjlabelled::remove_labels(z, labels = 2) 6 | 7 | expect_equal(attributes(x)$labels, c(yes = "A", no = "C")) 8 | }) 9 | -------------------------------------------------------------------------------- /vignettes/labelleddata.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Working with Labelled Data" 3 | author: "Daniel Lüdecke" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Working with Labelled Data} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r echo = FALSE} 13 | knitr::opts_chunk$set(collapse = TRUE, comment = "#>") 14 | ``` 15 | 16 | This vignette shows a small example how functions to work with labelled data can be implemented in a typical data visualization workflow. 17 | 18 | # Labelled Data 19 | 20 | In software like SPSS, it is common to have value and variable labels as variable attributes. Variable values, even if categorical, are mostly numeric. In R, however, you may use labels as values directly: 21 | 22 | ```{r} 23 | factor(c("low", "high", "mid", "high", "low")) 24 | ``` 25 | 26 | Reading SPSS-data with **haven** or **sjlabelled** keeps the numeric values for variables and adds the value and variable labels as attributes. See following example from the sample-dataset efc, which is part of the **sjlabelled**-package: 27 | 28 | ```{r} 29 | library(sjlabelled) 30 | data(efc) 31 | str(efc$e42dep) 32 | ``` 33 | 34 | While all plotting and table functions of the [sjPlot-package](https://cran.r-project.org/package=sjPlot) make use of these attributes, many packages and/or functions do not consider these attributes, e.g. R base graphics: 35 | 36 | ```{r warning=FALSE, fig.height=6, fig.width=7} 37 | library(sjlabelled) 38 | data(efc) 39 | barplot( 40 | table(efc$e42dep, efc$e16sex), 41 | beside = TRUE, 42 | legend.text = TRUE 43 | ) 44 | ``` 45 | 46 | As you can see in the above figure, the plot has neither axis nor legend labels. 47 | 48 | # Adding value labels as factor values 49 | 50 | `as_label()` is a sjlabelled-function that converts a numeric variable into a factor and sets attribute-value-labels as factor levels. When using factors with valued levels, the bar plot will be labelled. 51 | 52 | ```{r warning=FALSE, fig.height=6, fig.width=7} 53 | barplot( 54 | table(sjlabelled::as_label(efc$e42dep), 55 | sjlabelled::as_label(efc$e16sex)), 56 | beside = TRUE, 57 | legend.text = TRUE 58 | ) 59 | ``` 60 | 61 | # Getting and setting value and variable labels 62 | 63 | There are four functions that let you easily set or get value and variable labels of either a single vector or a complete data frame: 64 | 65 | * `get_label()` to get variable labels 66 | * `get_labels()` to get value labels 67 | * `set_label()` to set variable labels (add them as vector attribute) 68 | * `set_labels()` to set value labels (add them as vector attribute) 69 | 70 | With this function, you can easily add titles to plots dynamically, i.e. depending on the variable that is plotted. 71 | 72 | ```{r warning=FALSE, fig.height=6, fig.width=7} 73 | barplot( 74 | table(sjlabelled::as_label(efc$e42dep), 75 | sjlabelled::as_label(efc$e16sex)), 76 | beside = TRUE, 77 | legend.text = TRUE, 78 | main = get_label(efc$e42dep) 79 | ) 80 | ``` 81 | 82 | # Restore labels from subsetted data 83 | 84 | The base `subset()` function drops label attributes (or vector attributes in general) when subsetting data. In the sjlabelled-package, there are handy functions to deal with this problem: `copy_labels()` and `remove_labels()`. 85 | 86 | `copy_labels()` adds back labels to a subsetted data frame based on the original data frame. And `remove_labels()` removes all label attributes. 87 | 88 | 89 | ## Losing labels during subset 90 | 91 | ```{r} 92 | efc.sub <- subset(efc, subset = e16sex == 1, select = c(4:8)) 93 | str(efc.sub) 94 | ``` 95 | 96 | ## Add back labels 97 | 98 | ```{r, message=FALSE} 99 | efc.sub <- copy_labels(efc.sub, efc) 100 | str(efc.sub) 101 | ``` 102 | 103 | # Conclusion 104 | 105 | When working with labelled data, especially when working with data sets imported from other software packages, it comes very handy to make use of the label attributes. The **sjlabelled**-package supports this feature and offers useful functions for these tasks. 106 | -------------------------------------------------------------------------------- /vignettes/quasiquotation.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Using quasiquotation to add variable and value labels" 3 | author: "Daniel Lüdecke" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Using quasiquotation to add variable and value labels} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r echo = FALSE} 13 | knitr::opts_chunk$set(collapse = TRUE, comment = "#>") 14 | 15 | if (!requireNamespace("sjmisc", quietly = TRUE) || 16 | !requireNamespace("rlang", quietly = TRUE)) { 17 | knitr::opts_chunk$set(eval = FALSE) 18 | } 19 | ``` 20 | 21 | Labelling data is typically a task for end-users and is applied in own scripts or functions rather than in packages. However, sometimes it can be useful for both end-users and package developers to have a flexible way to add variable and value labels to their data. In such cases, [quasiquotation](https://adv-r.hadley.nz/quasiquotation.html) is helpful. 22 | 23 | This vignette demonstrate how to use quasiquotation in _sjlabelled_ to label your data. 24 | 25 | ## Adding value labels to variables using quasiquotation 26 | 27 | Usually, `set_labels()` can be used to add value labels to variables. The syntax of this function is easy to use, and `set_labels()` allows to add value labels to multiple variables at once, if these variables share the same value labels. 28 | 29 | In the following examples, we will use the `frq()` function, that shows an extra **label**-column containing _value labels_, if the data is labelled. If the data has _no_ value labels, this column is not shown in the output. 30 | 31 | ```{r message=FALSE, warning=FALSE} 32 | library(sjlabelled) 33 | library(sjmisc) # for frq()-function 34 | library(rlang) 35 | 36 | # unlabelled data 37 | dummies <- data.frame( 38 | dummy1 = sample(1:3, 40, replace = TRUE), 39 | dummy2 = sample(1:3, 40, replace = TRUE), 40 | dummy3 = sample(1:3, 40, replace = TRUE) 41 | ) 42 | 43 | # set labels for all variables in the data frame 44 | test <- set_labels(dummies, labels = c("low", "mid", "hi")) 45 | 46 | attr(test$dummy1, "labels") 47 | 48 | frq(test, dummy1) 49 | 50 | # and set same value labels for two of three variables 51 | test <- set_labels( 52 | dummies, dummy1, dummy2, 53 | labels = c("low", "mid", "hi") 54 | ) 55 | 56 | frq(test) 57 | ``` 58 | 59 | `val_labels()` does the same job as `set_labels()`, but in a different way. While `set_labels()` requires variables to be specified in the `...`-argument, and labels in the `labels`-argument, `val_labels()` requires both to be specified in the `...`. 60 | 61 | `val_labels()` requires _named_ vectors as argument, with the _left-hand side_ being the name of the variable that should be labelled, and the _right-hand side_ containing the labels for the values. 62 | 63 | ```{r message=FALSE, warning=FALSE} 64 | test <- val_labels(dummies, dummy1 = c("low", "mid", "hi")) 65 | attr(test$dummy1, "labels") 66 | 67 | # remaining variables are not labelled 68 | frq(test) 69 | ``` 70 | 71 | Unlike `set_labels()`, `val_labels()` allows the user to add _different_ value labels to different variables in one function call. Another advantage, or difference, of `val_labels()` is it's flexibility in defining variable names and value labels by using quasiquotation. 72 | 73 | ### Add labels that are stored in a vector 74 | 75 | To use quasiquotation, we need the **rlang** package to be installed and loaded. Now we can have labels in a character vector, and use `!!` to unquote this vector. 76 | 77 | ```{r message=FALSE, warning=FALSE} 78 | labels <- c("low_quote", "mid_quote", "hi_quote") 79 | test <- val_labels(dummies, dummy1 = !! labels) 80 | attr(test$dummy1, "labels") 81 | ``` 82 | 83 | ### Define variable names that are stored in a vector 84 | 85 | The same can be done with the names of _variables_ that should get new value labels. We then need `!!` to unquote the variable name and `:=` as assignment. 86 | 87 | ```{r message=FALSE, warning=FALSE} 88 | variable <- "dummy2" 89 | test <- val_labels(dummies, !! variable := c("lo_var", "mid_var", "high_var")) 90 | 91 | # no value labels 92 | attr(test$dummy1, "labels") 93 | 94 | # value labels 95 | attr(test$dummy2, "labels") 96 | ``` 97 | 98 | ### Both variable names and value labels are stored in a vector 99 | 100 | Finally, we can combine the above approaches to be flexible regarding both variable names and value labels. 101 | 102 | ```{r message=FALSE, warning=FALSE} 103 | variable <- "dummy3" 104 | labels <- c("low", "mid", "hi") 105 | test <- val_labels(dummies, !! variable := !! labels) 106 | attr(test$dummy3, "labels") 107 | ``` 108 | 109 | ## Adding variable labels using quasiquotation 110 | 111 | `set_label()` is the equivalent to `set_labels()` to add variable labels to a variable. The equivalent to `val_labels()` is `var_labels()`, which works in the same way as `val_labels()`. In case of _variable_ labels, a `label`-attribute is added to a vector or factor (instead of a `labels`-attribute, which is used for _value_ labels). 112 | 113 | The following examples show how to use `var_labels()` to add variable labels to the data. We demonstrate this function without further explanation, because it is actually very similar to `val_labels()`. 114 | 115 | 116 | ```{r message=FALSE, warning=FALSE} 117 | dummy <- data.frame( 118 | a = sample(1:4, 10, replace = TRUE), 119 | b = sample(1:4, 10, replace = TRUE), 120 | c = sample(1:4, 10, replace = TRUE) 121 | ) 122 | 123 | # simple usage 124 | test <- var_labels(dummy, a = "first variable", c = "third variable") 125 | 126 | attr(test$a, "label") 127 | attr(test$b, "label") 128 | attr(test$c, "label") 129 | 130 | # quasiquotation for labels 131 | v1 <- "First variable" 132 | v2 <- "Second variable" 133 | test <- var_labels(dummy, a = !! v1, b = !! v2) 134 | 135 | attr(test$a, "label") 136 | attr(test$b, "label") 137 | attr(test$c, "label") 138 | 139 | # quasiquotation for variable names 140 | x1 <- "a" 141 | x2 <- "c" 142 | test <- var_labels(dummy, !! x1 := "First", !! x2 := "Second") 143 | 144 | attr(test$a, "label") 145 | attr(test$b, "label") 146 | attr(test$c, "label") 147 | 148 | # quasiquotation for both variable names and labels 149 | test <- var_labels(dummy, !! x1 := !! v1, !! x2 := !! v2) 150 | 151 | attr(test$a, "label") 152 | attr(test$b, "label") 153 | attr(test$c, "label") 154 | ``` 155 | 156 | ## Conclusion 157 | 158 | As we have demonstrated, `var_labels()` and `val_labels()` are one of the most flexible and easy-to-use ways to add value and variable labels to our data. Another advantage is the consistent design of all functions in **sjlabelled**, which allows seamless integration into pipe-workflows. 159 | --------------------------------------------------------------------------------