├── .Rbuildignore
├── .gitattributes
├── .github
└── workflows
│ ├── check-standard.yaml
│ ├── pkgdown.yaml
│ ├── rhub.yaml
│ └── test-coverage.yaml
├── .gitignore
├── CRAN-SUBMISSION
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
├── checks.R
├── metacore.R
├── spec_builder.R
├── utils-pipe.R
├── utils.R
├── validators.R
├── xml_builders.R
├── xml_helpers.R
└── zzz.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── cran-comments.md
├── inst
└── extdata
│ ├── ADaM_define.xml
│ ├── SDTM_define.xml
│ ├── SDTM_spec_CDISC_pilot.xlsx
│ ├── mock_spec.xlsx
│ ├── p21_mock.xlsx
│ ├── pilot_ADaM.rda
│ └── pilot_SDTM.rda
├── man
├── MetaCore_filter.Rd
├── check_columns.Rd
├── check_structure.Rd
├── check_words.Rd
├── checks.Rd
├── create_tbl.Rd
├── define_to_metacore.Rd
├── figures
│ ├── .DS_Store
│ ├── labeled-code_list.png
│ ├── labeled-derivation.png
│ ├── labeled-ds_spec.png
│ ├── labeled-ds_vars.png
│ ├── labeled-supp.png
│ ├── labeled-value_spec.png
│ ├── labeled-var_spec.png
│ ├── labeled_schema.png
│ ├── metacore.PNG
│ └── schema-colors.png
├── get_control_term.Rd
├── get_keys.Rd
├── is_metacore.Rd
├── load_metacore.Rd
├── metacore.Rd
├── metacore_example.Rd
├── pipe.Rd
├── read_all_sheets.Rd
├── save_metacore.Rd
├── select_dataset.Rd
├── spec_to_metacore.Rd
├── spec_type.Rd
├── spec_type_to_codelist.Rd
├── spec_type_to_derivations.Rd
├── spec_type_to_ds_spec.Rd
├── spec_type_to_ds_vars.Rd
├── spec_type_to_value_spec.Rd
├── spec_type_to_var_spec.Rd
├── xml_to_codelist.Rd
├── xml_to_derivations.Rd
├── xml_to_ds_spec.Rd
├── xml_to_ds_vars.Rd
├── xml_to_value_spec.Rd
└── xml_to_var_spec.Rd
├── metacore.Rproj
├── tests
├── testthat.R
└── testthat
│ ├── define-2021.xml
│ ├── define2-0-0.xsl
│ ├── example_spec.xlsx
│ ├── metacore-figure
│ ├── unnamed-chunk-1-1.png
│ └── unnamed-chunk-2-1.png
│ ├── spec_no_val.xlsx
│ ├── test-checks.R
│ ├── test-metacore.R
│ ├── test-reader.R
│ ├── test-utils.R
│ └── test-validators.R
└── vignettes
├── .gitignore
├── Building_Specification_Readers.Rmd
└── Example.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^metacore\.Rproj$
2 | ^\.Rproj\.user$
3 | ^dev$
4 | ^LICENSE\.md$
5 | ^README\.Rmd$
6 | ^codecov\.yml$
7 | ^_pkgdown\.yml$
8 | ^docs$
9 | ^pkgdown$
10 | ^\.github$
11 | cran-comments.md
12 | ^doc$
13 | ^Meta$
14 | ^CRAN-RELEASE$
15 | ^CRAN-SUBMISSION$
16 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | tests/testthat/* linguist-vendored
2 |
--------------------------------------------------------------------------------
/.github/workflows/check-standard.yaml:
--------------------------------------------------------------------------------
1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
3 | on:
4 | push:
5 | branches:
6 | - main
7 | - master
8 | - dev
9 | pull_request:
10 | branches:
11 | - main
12 | - master
13 | - dev
14 |
15 | name: R-CMD-check
16 |
17 | jobs:
18 | R-CMD-check:
19 | runs-on: ${{ matrix.config.os }}
20 |
21 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
22 |
23 | strategy:
24 | fail-fast: false
25 | matrix:
26 | config:
27 | - {os: windows-latest, r: 'release'}
28 | - {os: macOS-latest, r: 'release'}
29 | - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
30 | - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
31 |
32 | env:
33 | REPOS: ${{ matrix.config.repos }}
34 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
35 | RSPM: ${{ matrix.config.rspm }}
36 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
37 |
38 | steps:
39 | - uses: actions/checkout@v2
40 |
41 | - uses: r-lib/actions/setup-r@v2
42 | with:
43 | r-version: ${{ matrix.config.r }}
44 |
45 | - uses: r-lib/actions/setup-pandoc@v2
46 |
47 | - name: Query dependencies
48 | run: |
49 | install.packages('remotes')
50 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
51 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
52 | shell: Rscript {0}
53 |
54 | - name: Restore R package cache
55 | if: runner.os != 'Windows'
56 | uses: actions/cache@v2
57 | with:
58 | path: ${{ env.R_LIBS_USER }}
59 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
60 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
61 |
62 | - name: Install system dependencies
63 | if: runner.os == 'Linux'
64 | run: |
65 | while read -r cmd
66 | do
67 | eval sudo $cmd
68 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
69 |
70 | - name: Install dependencies
71 | run: |
72 | repos <- Sys.getenv("REPOS")
73 | if (repos != "") options(repos = repos)
74 | remotes::install_deps(dependencies = TRUE)
75 | remotes::install_cran("rcmdcheck")
76 | shell: Rscript {0}
77 |
78 | - name: Check
79 | env:
80 | _R_CHECK_CRAN_INCOMING_REMOTE_: false
81 | run: |
82 | options(crayon.enabled = TRUE)
83 | rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
84 | shell: Rscript {0}
85 |
86 | - name: Upload check results
87 | if: failure()
88 | uses: actions/upload-artifact@main
89 | with:
90 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results
91 | path: check
92 |
--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - main
5 | - master
6 |
7 | name: pkgdown
8 |
9 | jobs:
10 | pkgdown:
11 | runs-on: macOS-latest
12 | env:
13 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
14 | steps:
15 | - uses: actions/checkout@v2
16 |
17 | - uses: r-lib/actions/setup-r@v2
18 |
19 | - uses: r-lib/actions/setup-pandoc@v2
20 |
21 | - name: Query dependencies
22 | run: |
23 | install.packages('remotes')
24 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
25 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
26 | shell: Rscript {0}
27 |
28 | - name: Restore R package cache
29 | uses: actions/cache@v2
30 | with:
31 | path: ${{ env.R_LIBS_USER }}
32 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
33 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
34 |
35 | - name: Install dependencies
36 | run: |
37 | remotes::install_deps(dependencies = TRUE)
38 | install.packages("pkgdown", type = "binary")
39 | shell: Rscript {0}
40 |
41 | - name: Install package
42 | run: R CMD INSTALL .
43 |
44 | - name: Deploy package
45 | run: |
46 | git config --local user.email "actions@github.com"
47 | git config --local user.name "GitHub Actions"
48 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
49 |
--------------------------------------------------------------------------------
/.github/workflows/rhub.yaml:
--------------------------------------------------------------------------------
1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at
2 | # https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml
3 | # You can update this file to a newer version using the rhub2 package:
4 | #
5 | # rhub::rhub_setup()
6 | #
7 | # It is unlikely that you need to modify this file manually.
8 |
9 | name: R-hub
10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}"
11 |
12 | on:
13 | workflow_dispatch:
14 | inputs:
15 | config:
16 | description: 'A comma separated list of R-hub platforms to use.'
17 | type: string
18 | default: 'linux,windows,macos'
19 | name:
20 | description: 'Run name. You can leave this empty now.'
21 | type: string
22 | id:
23 | description: 'Unique ID. You can leave this empty now.'
24 | type: string
25 |
26 | jobs:
27 |
28 | setup:
29 | runs-on: ubuntu-latest
30 | outputs:
31 | containers: ${{ steps.rhub-setup.outputs.containers }}
32 | platforms: ${{ steps.rhub-setup.outputs.platforms }}
33 |
34 | steps:
35 | # NO NEED TO CHECKOUT HERE
36 | - uses: r-hub/actions/setup@v1
37 | with:
38 | config: ${{ github.event.inputs.config }}
39 | id: rhub-setup
40 |
41 | linux-containers:
42 | needs: setup
43 | if: ${{ needs.setup.outputs.containers != '[]' }}
44 | runs-on: ubuntu-latest
45 | name: ${{ matrix.config.label }}
46 | strategy:
47 | fail-fast: false
48 | matrix:
49 | config: ${{ fromJson(needs.setup.outputs.containers) }}
50 | container:
51 | image: ${{ matrix.config.container }}
52 |
53 | steps:
54 | - uses: r-hub/actions/checkout@v1
55 | - uses: r-hub/actions/platform-info@v1
56 | with:
57 | token: ${{ secrets.RHUB_TOKEN }}
58 | job-config: ${{ matrix.config.job-config }}
59 | - uses: r-hub/actions/setup-deps@v1
60 | with:
61 | token: ${{ secrets.RHUB_TOKEN }}
62 | job-config: ${{ matrix.config.job-config }}
63 | - uses: r-hub/actions/run-check@v1
64 | with:
65 | token: ${{ secrets.RHUB_TOKEN }}
66 | job-config: ${{ matrix.config.job-config }}
67 |
68 | other-platforms:
69 | needs: setup
70 | if: ${{ needs.setup.outputs.platforms != '[]' }}
71 | runs-on: ${{ matrix.config.os }}
72 | name: ${{ matrix.config.label }}
73 | strategy:
74 | fail-fast: false
75 | matrix:
76 | config: ${{ fromJson(needs.setup.outputs.platforms) }}
77 |
78 | steps:
79 | - uses: r-hub/actions/checkout@v1
80 | - uses: r-hub/actions/setup-r@v1
81 | with:
82 | job-config: ${{ matrix.config.job-config }}
83 | token: ${{ secrets.RHUB_TOKEN }}
84 | - uses: r-hub/actions/platform-info@v1
85 | with:
86 | token: ${{ secrets.RHUB_TOKEN }}
87 | job-config: ${{ matrix.config.job-config }}
88 | - uses: r-hub/actions/setup-deps@v1
89 | with:
90 | job-config: ${{ matrix.config.job-config }}
91 | token: ${{ secrets.RHUB_TOKEN }}
92 | - uses: r-hub/actions/run-check@v1
93 | with:
94 | job-config: ${{ matrix.config.job-config }}
95 | token: ${{ secrets.RHUB_TOKEN }}
96 |
--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - main
5 | - master
6 | - dev
7 | pull_request:
8 | branches:
9 | - main
10 | - master
11 | - dev
12 |
13 | name: test-coverage
14 |
15 | jobs:
16 | test-coverage:
17 | runs-on: macOS-latest
18 | env:
19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
20 | steps:
21 | - uses: actions/checkout@v2
22 |
23 | - uses: r-lib/actions/setup-r@v2
24 |
25 | - uses: r-lib/actions/setup-pandoc@v2
26 |
27 | - name: Query dependencies
28 | run: |
29 | install.packages('remotes')
30 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
31 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
32 | shell: Rscript {0}
33 |
34 | - name: Restore R package cache
35 | uses: actions/cache@v2
36 | with:
37 | path: ${{ env.R_LIBS_USER }}
38 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
39 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
40 |
41 | - name: Install dependencies
42 | run: |
43 | install.packages(c("remotes"))
44 | remotes::install_deps(dependencies = TRUE)
45 | remotes::install_cran("covr")
46 | shell: Rscript {0}
47 |
48 | - name: Test coverage
49 | run: covr::codecov()
50 | shell: Rscript {0}
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Session Data files
6 | .RData
7 | .gitignore
8 |
9 | # Example code in package build process
10 | *-Ex.R
11 |
12 | # Output files from R CMD build
13 | /*.tar.gz
14 |
15 | # Output files from R CMD check
16 | /*.Rcheck/
17 |
18 | # RStudio files
19 | .Rproj.user/
20 |
21 | # produced vignettes
22 | vignettes/*.html
23 | vignettes/*.pdf
24 |
25 | # Files to draft tests
26 | */define_examples/*
27 |
28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
29 | .httr-oauth
30 |
31 | # knitr and R markdown default cache directories
32 | /*_cache/
33 | /cache/
34 |
35 | # Temporary files created by R markdown
36 | *.utf8.md
37 | *.knit.md
38 |
39 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
40 | rsconnect/
41 | .Rproj.user
42 | inst/doc
43 | tests/testthat/temp_data/*
44 |
45 | docs
46 | doc
47 | Meta
48 | .DS_Store?
49 | .DS_Store
50 |
--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 0.1.3
2 | Date: 2024-05-02 08:11:47 UTC
3 | SHA: dfac124015ca54e065a7c11a88aa2a492969b4f4
4 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: metacore
2 | Title: A Centralized Metadata Object Focus on Clinical Trial Data Programming Workflows
3 | Version: 0.1.3
4 | Authors@R:
5 | c(person(given = "Christina",
6 | family = "Fillmore",
7 | role = c("aut", "cre"),
8 | email = "christina.e.fillmore@gsk.com",
9 | comment = c(ORCID = "0000-0003-0595-2302")),
10 | person(given = "Maya",
11 | family = "Gans",
12 | role = "aut",
13 | email = "maya.gans@atorusresearch.com",
14 | comment = c(ORCID = "0000-0002-5452-6089")),
15 | person(given = "Ashley",
16 | family = "Tarasiewicz",
17 | role = "aut",
18 | email = "ashley.tarasiewicz@atorusresearch.com"),
19 | person(given = "Mike",
20 | family = "Stackhouse",
21 | role = "aut",
22 | email = "mike.stackhouse@atorusresearch.com",
23 | comment = c(ORCID = "0000-0001-6030-723X")),
24 | person(given = "Tamara",
25 | family = "Senior",
26 | role = "aut",
27 | email = "tamara.senior@roche.com"),
28 | person(given = "GSK/Atorus JPT",
29 | role = c("cph", "fnd")))
30 | Description: Create an immutable container holding metadata for the purpose of better enabling programming activities and functionality of other packages within the clinical programming workflow.
31 | License: MIT + file LICENSE
32 | Encoding: UTF-8
33 | Roxygen: list(markdown = TRUE, r6 = FALSE)
34 | RoxygenNote: 7.3.1
35 | Depends:
36 | R (>= 3.6)
37 | Suggests:
38 | testthat,
39 | knitr,
40 | rmarkdown,
41 | covr
42 | Imports:
43 | R6,
44 | tidyr,
45 | dplyr,
46 | stringr,
47 | magrittr,
48 | xml2,
49 | purrr,
50 | readxl,
51 | rlang,
52 | tibble,
53 | tidyselect
54 | VignetteBuilder: knitr
55 | URL: https://atorus-research.github.io/metacore/, https://github.com/atorus-research/metacore
56 | BugReports: https://github.com/atorus-research/metacore/issues
57 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2021
2 | COPYRIGHT HOLDER: GSK/Atorus JPT
3 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2021 GSK/Atorus JPT
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export("%>%")
4 | export(check_inconsistent_formats)
5 | export(check_inconsistent_labels)
6 | export(check_inconsistent_types)
7 | export(create_tbl)
8 | export(define_to_metacore)
9 | export(get_control_term)
10 | export(get_keys)
11 | export(is_metacore)
12 | export(load_metacore)
13 | export(metacore)
14 | export(metacore_example)
15 | export(read_all_sheets)
16 | export(save_metacore)
17 | export(select_dataset)
18 | export(spec_to_metacore)
19 | export(spec_type)
20 | export(spec_type_to_codelist)
21 | export(spec_type_to_derivations)
22 | export(spec_type_to_ds_spec)
23 | export(spec_type_to_ds_vars)
24 | export(spec_type_to_value_spec)
25 | export(spec_type_to_var_spec)
26 | export(xml_to_codelist)
27 | export(xml_to_derivations)
28 | export(xml_to_ds_spec)
29 | export(xml_to_ds_vars)
30 | export(xml_to_value_spec)
31 | export(xml_to_var_spec)
32 | import(R6)
33 | importFrom(dplyr,across)
34 | importFrom(dplyr,anti_join)
35 | importFrom(dplyr,arrange)
36 | importFrom(dplyr,bind_rows)
37 | importFrom(dplyr,case_when)
38 | importFrom(dplyr,distinct)
39 | importFrom(dplyr,filter)
40 | importFrom(dplyr,full_join)
41 | importFrom(dplyr,group_by)
42 | importFrom(dplyr,group_split)
43 | importFrom(dplyr,if_else)
44 | importFrom(dplyr,inner_join)
45 | importFrom(dplyr,left_join)
46 | importFrom(dplyr,mutate)
47 | importFrom(dplyr,n)
48 | importFrom(dplyr,n_distinct)
49 | importFrom(dplyr,pull)
50 | importFrom(dplyr,right_join)
51 | importFrom(dplyr,row_number)
52 | importFrom(dplyr,rowwise)
53 | importFrom(dplyr,select)
54 | importFrom(dplyr,summarise)
55 | importFrom(dplyr,ungroup)
56 | importFrom(magrittr,"%>%")
57 | importFrom(purrr,compact)
58 | importFrom(purrr,discard)
59 | importFrom(purrr,keep)
60 | importFrom(purrr,map)
61 | importFrom(purrr,map2)
62 | importFrom(purrr,map2_chr)
63 | importFrom(purrr,map2_lgl)
64 | importFrom(purrr,map_chr)
65 | importFrom(purrr,map_dfr)
66 | importFrom(purrr,map_int)
67 | importFrom(purrr,map_lgl)
68 | importFrom(purrr,pmap_chr)
69 | importFrom(purrr,quietly)
70 | importFrom(purrr,reduce)
71 | importFrom(purrr,safely)
72 | importFrom(readxl,excel_sheets)
73 | importFrom(readxl,read_excel)
74 | importFrom(rlang,"!!")
75 | importFrom(rlang,as_label)
76 | importFrom(rlang,as_name)
77 | importFrom(rlang,as_string)
78 | importFrom(rlang,enexpr)
79 | importFrom(rlang,expr)
80 | importFrom(rlang,prim_name)
81 | importFrom(rlang,sym)
82 | importFrom(stats,na.omit)
83 | importFrom(stats,var)
84 | importFrom(stringr,regex)
85 | importFrom(stringr,str_c)
86 | importFrom(stringr,str_count)
87 | importFrom(stringr,str_detect)
88 | importFrom(stringr,str_extract)
89 | importFrom(stringr,str_glue)
90 | importFrom(stringr,str_length)
91 | importFrom(stringr,str_remove)
92 | importFrom(stringr,str_remove_all)
93 | importFrom(stringr,str_replace)
94 | importFrom(stringr,str_replace_all)
95 | importFrom(stringr,str_replace_na)
96 | importFrom(stringr,str_split)
97 | importFrom(stringr,str_starts)
98 | importFrom(stringr,str_subset)
99 | importFrom(stringr,str_to_lower)
100 | importFrom(stringr,str_trim)
101 | importFrom(tibble,as_tibble)
102 | importFrom(tibble,tibble)
103 | importFrom(tibble,tribble)
104 | importFrom(tidyr,nest)
105 | importFrom(tidyr,replace_na)
106 | importFrom(tidyr,unnest)
107 | importFrom(tidyselect,everything)
108 | importFrom(tidyselect,matches)
109 | importFrom(xml2,read_xml)
110 | importFrom(xml2,xml_attr)
111 | importFrom(xml2,xml_find_all)
112 | importFrom(xml2,xml_find_first)
113 | importFrom(xml2,xml_ns_strip)
114 | importFrom(xml2,xml_parent)
115 | importFrom(xml2,xml_text)
116 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # Metacore 0.1.3
2 | - Add `get_keys` function which returns the dataset keys for a given dataset [#102](https://github.com/atorus-research/metacore/issues/102)
3 | - Fix issues with `select_dataset(simplify = TRUE)` [#97](https://github.com/atorus-research/metacore/issues/97)
4 |
5 | # Metacore 0.1.2
6 | - Update to resolve issues from the dplyr updates
7 |
8 | # Metacore 0.1.1
9 | - Add variable cross checks to check consistent use of label, type, and format
10 | - Add `is_metacore()` function to check if it is a metacore object
11 | - [#64](https://github.com/atorus-research/metacore/issues/64) Resolve issue so define and spec readers work when value levels are empty
12 |
13 | # Metacore 0.1.0
14 | - Updates for latest version of {tidyselect}
15 | - Move from XML to xml2 for define.xml processing
16 | - Correct issue with warning on ds_var$core
17 | - Correct bug with not reading the derivation of predecessor and assigned variables correctly
18 |
19 | # Metacore 0.0.6
20 | - Correct typos
21 | - Limits origin value/make all origins lower case
22 |
23 | # Metacore 0.0.5
24 | - [#47](https://github.com/atorus-research/metacore/issues/47) Improve `yn_to_tf()` to be more robust in parsing yes/no columns
25 |
26 | # Metacore 0.0.4
27 | - Adds a supp table to store the `idvar` and `qeval` information
28 | - Adds `sig_dig`column to the `value_spec` table
29 |
30 |
31 | # Metacore 0.0.3
32 | - Fixes bugs found in the `get_control_term` function and improves error messages for `get_control_term`
33 | - Improves internal naming consistency by renaming `define_to_MetaCore` to `define_to_metacore`
34 | - Adds quiet option to readers, to allow users to optionally ignore the reader warnings
35 | - Creates new example metacore object that is an .rda so much faster to read in
36 | - Fixes issue with select dataset when variables are duplicated
37 |
38 |
39 | # Metacore 0.0.1
40 | This fixes the following issues:
41 |
42 | - [#16](https://github.com/atorus-research/metacore/issues/16) the metacore function now accepts any empty datasets and creates an empty dataset with the correct column names and types
43 | - [#10](https://github.com/atorus-research/metacore/issues/10) yn function checks for logicals and returns them
44 | - [#11](https://github.com/atorus-research/metacore/issues/11) updated function description to make this clearer
45 | - [#12](https://github.com/atorus-research/metacore/issues/12) updated regex so to "[F|f]ormat" so it can accept lower case
46 | - [#14](https://github.com/atorus-research/metacore/issues/14) added supp_flag to ds_vars (on a side note we did a really good job with this it was super easy to change and only required a few edits)
47 | - [#15](https://github.com/atorus-research/metacore/issues/15) modified create_tbl so if there are two potential matches in the same dataset and one is an exact match it uses that
48 |
49 | Additionally, it adds the `get_control_term` function to get pull out the control term for a given variable.
50 |
--------------------------------------------------------------------------------
/R/checks.R:
--------------------------------------------------------------------------------
1 | #' Optional checks to consistency of metadata
2 | #'
3 | #' @description These functions check to see if values (e.g labels, formats)
4 | #' that should be consistent for a variable across all data are actually
5 | #' consistent.
6 | #'
7 | #' @param metacore metacore object to check
8 | #'
9 | #' @return If all variables are consistent it will return a message. If there
10 | #' are inconsistencies it will return a message and a dataset of the variables
11 | #' with inconsistencies.
12 | #'
13 | #' @export
14 | #'
15 | #' @examples
16 | #' ## EXAMPLE WITH DUPLICATES
17 | #' # Loads in a metacore obj called metacore
18 | #' load(metacore_example("pilot_ADaM.rda"))
19 | #' check_inconsistent_labels(metacore)
20 | #'
21 | #' check_inconsistent_types(metacore)
22 | #'
23 | #' ## EXAMPLE WITHOUT DUPLICATES
24 | #' # Loads in a metacore obj called metacore
25 | #' load(metacore_example("pilot_SDTM.rda"))
26 | #' check_inconsistent_labels(metacore)
27 | #'
28 | #' check_inconsistent_formats(metacore)
29 | #'
30 | #' check_inconsistent_types(metacore)
31 | #' @rdname checks
32 | check_inconsistent_labels <- function(metacore){
33 | basic_check(label, metacore)
34 | }
35 |
36 | #' @export
37 | #' @rdname checks
38 | check_inconsistent_types <- function(metacore){
39 | basic_check(type, metacore)
40 | }
41 |
42 | #' @export
43 | #' @rdname checks
44 | check_inconsistent_formats <- function(metacore){
45 | basic_check(format, metacore)
46 | }
47 |
48 | #' Base code for running cross variable checks
49 | #'
50 | #' @param col_to_check column to check
51 | #' @param metacore metacore obj
52 | #'
53 | #' @return A message if all is well or dataset with all the variables with
54 | #' duplicated variables.
55 | #' @noRd
56 | #' @importFrom stringr str_glue
57 | #' @importFrom dplyr across
58 | basic_check <- function(col_to_check, metacore){
59 | if(!is_metacore(metacore)){
60 | stop("Expects a metacore object", call. = FALSE)
61 | }
62 |
63 | report_df <- metacore$var_spec %>%
64 | mutate(var1 = str_remove(variable, "[[:alnum:]]+\\.")) %>%
65 | group_by(var1) %>%
66 | mutate(n_lab = n_distinct({{col_to_check}})) %>%
67 | filter(n_lab > 1) %>%
68 | mutate(across(everything(), remove_label)) %>%
69 | group_by(var1, {{col_to_check}}) %>%
70 | summarise(n_vars = n(),
71 | ls_of_vars = list(variable),
72 | .groups = "drop") %>%
73 | select(variable = var1, everything())
74 |
75 | if(nrow(report_df) > 0){
76 | message(str_glue("Mismatch {as_label(enexpr(col_to_check))}s detected"))
77 | return(report_df)
78 | } else {
79 | message(str_glue("No mismatch {as_label(enexpr(col_to_check))}s detected"))
80 | }
81 | }
82 |
83 | remove_label <- function(x) {
84 | attr(x, "label") <- NULL
85 | x
86 | }
87 |
--------------------------------------------------------------------------------
/R/metacore.R:
--------------------------------------------------------------------------------
1 | #' This file includes the internal functions needed to create the readonly
2 | #' Metacore R6 object
3 | #'
4 | #' @param ds_spec contains each dataset in the study, with the labels for each
5 | #' @param ds_vars information on what variables are in each dataset + plus
6 | #' dataset specific variable information
7 | #' @param var_spec variable information that is shared across all datasets
8 | #' @param value_spec parameter specific information, as data is long the specs
9 | #' for wbc might be difference the hgb
10 | #' @param derivations contains derivation, it allows for different variables to
11 | #' have the same derivation
12 | #' @param code_list contains the code/decode information
13 | #' @param supp contains the idvar and qeval information for supplemental variables
14 | #'
15 | #' @family Metacore
16 | #' @noRd
17 | #'
18 | #' @importFrom stringr str_to_lower
19 | MetaCore_initialize <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp){
20 |
21 | private$.ds_spec <- ds_spec %>%
22 | add_labs(dataset = "Dataset Name",
23 | structure = "Value Structure",
24 | label = "Dataset Label")
25 |
26 | private$.ds_vars <- ds_vars %>%
27 | add_labs(dataset = "Dataset Name",
28 | variable = "Variable Name",
29 | key_seq = "Sequence Key",
30 | order = "Variable Order",
31 | keep = "Keep (Boolean)",
32 | core = "ADaM core (Expected, Required, Permissible)",
33 | supp_flag = "Supplemental Flag")
34 |
35 | private$.var_spec <- var_spec %>%
36 | add_labs(variable = "Variable Name",
37 | length = "Variable Length",
38 | label = "Variable Label",
39 | type = "Variable Class",
40 | common = "Common Across ADaM",
41 | format = "Variable Format")
42 |
43 | private$.value_spec <- value_spec %>%
44 | add_labs(type = "Value Type",
45 | orgin = "Origin of Value",
46 | code_id = "ID of the Code List",
47 | dataset = "Dataset Name",
48 | variable = "Variable Name",
49 | where = "Value of the Variable",
50 | derivation_id = "ID of Derivation") %>%
51 | mutate(origin = str_to_lower(.data$origin))
52 |
53 |
54 | private$.derivations <- derivations %>%
55 | add_labs(derivation_id = "ID of Derivation",
56 | derivation = "Derivation")
57 |
58 | private$.codelist <- codelist %>%
59 | add_labs(code_id = "ID of the Code List",
60 | names = "Name of the Code List",
61 | type = "Code List/Permitted Values/External Library",
62 | codes = "List of Codes")
63 |
64 | private$.codelist <- codelist %>%
65 | add_labs(code_id = "ID of the Code List",
66 | names = "Name of the Code List",
67 | type = "Code List/Permitted Values/External Library",
68 | codes = "List of Codes")
69 |
70 | private$.supp <- supp %>%
71 | add_labs(dataset = "Dataset Name",
72 | variable = "Variable Name",
73 | idvar = "Identifying Variable",
74 | qeval = "Evaluator")
75 |
76 | self$validate()
77 | message("\n Metadata successfully imported")
78 | }
79 |
80 |
81 | #' Metacore class print function
82 | #'
83 | #' @param ... pass in the dataframes to be validated
84 | #' @family Metacore
85 | #' @noRd
86 | #'
87 | MetaCore_print <- function(...){
88 | ds_len <- private$.ds_spec %>% pull(.data$dataset) %>% length()
89 | paste0("Metacore object contains metadata for ", ds_len, " datasets\n") %>%
90 | cat()
91 | }
92 |
93 |
94 | #' Metacore R6 object validation function
95 | #'
96 | #' This checks that the labels and lengths of ds_vars match var_spec
97 | #' @family Metacore
98 | #' @noRd
99 | #'
100 | MetaCore_validate <- function() {
101 | if(var_name_check(private)){
102 |
103 | if(nrow(private$.ds_spec) == 0 &
104 | nrow(private$.ds_vars) == 0 &
105 | nrow(private$.var_spec) == 0 &
106 | nrow(private$.value_spec) == 0 &
107 | nrow(private$.derivations) == 0 &
108 | nrow(private$.codelist) == 0 &
109 | nrow(private$.supp) == 0 ){
110 | warning("Other checks were not preformed, because all datasets are empty",
111 | call. = FALSE)
112 | } else {
113 | check_columns(private$.ds_spec,
114 | private$.ds_vars,
115 | private$.var_spec,
116 | private$.value_spec,
117 | private$.derivations,
118 | private$.codelist,
119 | private$.supp
120 | )
121 |
122 | ds_vars_check(private$.ds_vars, private$.var_spec)
123 | value_check(private$.ds_vars, private$.value_spec)
124 | derivation_check(private$.value_spec, private$.derivations)
125 | codelist_check(private$.value_spec, private$.codelist)
126 | if(nrow(private$.supp) == 0){
127 | supp_check(private$.ds_vars, private$.supp)
128 | }
129 |
130 | }
131 |
132 | } else {
133 | warning("Other checks were not preformed, because column names were incorrect",
134 | call. = FALSE)
135 | }
136 | }
137 |
138 |
139 |
140 | #' readonly function factory
141 | #'
142 | #' This function is used inside the R6 active method and allows us
143 | #' to read the selected dataframe and prevents overwriting
144 | #'
145 | #' @param name the name of the readonly object
146 | #' @param value any attempt at assignment to the readonly object
147 | #' @family Metacore
148 | #' @noRd
149 | #'
150 | readonly <- function(name) {
151 | private <- NULL
152 | inside <- function(value) {
153 | name <- attr(sys.function(sys.parent()), "name")
154 | if (missing(value)) {
155 | private[[paste0(".", name)]]
156 | } else {
157 | stop(paste0(name, " is read only"), call. = FALSE)
158 | }
159 | }
160 | attributes(inside) <- list(name = name)
161 | inside
162 | }
163 |
164 | #' Select method to subset by a single dataframe
165 | #' @param value the dataframe to subset by
166 | #'
167 | MetaCore_filter <- function(value) {
168 |
169 | private$.ds_spec <- private$.ds_spec %>% filter(dataset == value)
170 | if(nrow(private$.ds_spec) == 0){
171 | stop(paste0(value, " is not a dataset in the metacore object", call. = FALSE))
172 | }
173 | private$.ds_vars <- private$.ds_vars %>% filter(dataset == value)
174 | private$.value_spec <- private$.value_spec %>% filter(dataset == value)
175 |
176 |
177 | # Need clarity on X.Y.Z situation: SUPPY8.QVAL
178 | private$.var_spec <- private$.var_spec %>%
179 | # variables have the dataset prefix so we make this into its own column
180 | mutate(dataset = ifelse(str_detect(variable, "\\."), str_extract(variable, "^.*(?=\\.)"), ""),
181 | variable = str_remove(variable, "^.*\\.")
182 | ) %>%
183 | # then keep the variables that occur once or in the dataset to filter
184 | filter(dataset == "" | dataset == value) %>%
185 | # remove the temporary column
186 | select(-dataset) %>%
187 | # right join
188 | right_join(private$.ds_vars %>% select(variable), by="variable",
189 | multiple = "all") %>%
190 | distinct(variable, .keep_all = TRUE) # for when duplicates gett through and have different lables but the same name
191 |
192 | # Get values/variables that need derivations
193 | val_deriv <- private$.value_spec %>%
194 | distinct(.data$derivation_id) %>%
195 | na.omit()
196 |
197 | private$.derivations <- private$.derivations %>%
198 | right_join(val_deriv, by = "derivation_id", multiple = "all")
199 |
200 | private$.codelist <- private$.codelist %>%
201 | right_join(private$.value_spec %>%
202 | distinct(.data$code_id) %>%
203 | na.omit(), by = "code_id", multiple = "all")
204 |
205 | private$.supp <- private$.supp %>% filter(dataset == value)
206 | }
207 |
208 | #' The Metacore R6 Class
209 | #'
210 | #' This uses the initialize, print, and validate functions above to create a single object
211 | #' The user can query
212 | #'
213 | #' @family Metacore
214 | #' @noRd
215 | #
216 | MetaCore <- R6::R6Class("Metacore",
217 | public = list(
218 | initialize = MetaCore_initialize,
219 | print = MetaCore_print,
220 | validate = MetaCore_validate,
221 | metacore_filter = MetaCore_filter
222 | ),
223 | private = list(
224 | .ds_spec = tibble(dataset = character(), structure = character(), label = character()),
225 | .ds_vars = tibble(dataset = character(), variable = character(), keep = logical(),
226 | key_seq = integer(), order = integer(), core = character(),
227 | supp_flag = logical()),
228 | .var_spec = tibble(variable = character(), label = character(), length = integer(),
229 | type = character(), common = character(), format = character()),
230 | .value_spec = tibble(dataset = character(),
231 | variable = character(),
232 | where = character(),
233 | type = character(),
234 | sig_dig = integer(),
235 | code_id = character(),
236 | origin = character(),
237 | derivation_id = integer()),
238 | .derivations = tibble(derivation_id = integer(), derivation = character()),
239 | # code_type == df | permitted_val | external_lib
240 | .codelist = tibble(code_id = character(), name = character(), type = character(), codes = list()),
241 | .supp = tibble(dataset = character(), variable = character(), idvar = character(), qeval = character())
242 | ),
243 | active = list(
244 | ds_spec = readonly('ds_spec'),
245 | ds_vars = readonly('ds_vars'),
246 | var_spec = readonly('var_spec'),
247 | value_spec = readonly('value_spec'),
248 | derivations = readonly('derivations'),
249 | codelist = readonly('codelist'),
250 | supp = readonly('supp')
251 | )
252 | )
253 |
254 |
255 | #' R6 Class wrapper to create your own metacore object
256 | #'
257 | #' @param ds_spec contains each dataset in the study, with the labels for each
258 | #' @param ds_vars information on what variables are in each dataset + plus dataset specific variable information
259 | #' @param var_spec variable information that is shared across all datasets
260 | #' @param value_spec parameter specific information, as data is long the specs for wbc might be difference the hgb
261 | #' @param derivations contains derivation, it allows for different variables to have the same derivation
262 | #' @param codelist contains the code/decode information
263 | #' @param supp contains the idvar and qeval information for supplemental variables
264 | #'
265 | #' @family Metacore
266 | #'
267 | #' @export
268 | #'
269 | metacore <- function(ds_spec = tibble(dataset = character(), structure = character(), label = character()),
270 | ds_vars = tibble(dataset = character(), variable = character(), keep = logical(),
271 | key_seq = integer(), order = integer(), core = character(),
272 | supp_flag = logical()),
273 | var_spec = tibble(variable = character(), label = character(), length = integer(),
274 | type = character(), common = character(), format = character()),
275 | value_spec = tibble(dataset = character(),
276 | variable = character(),
277 | where = character(),
278 | type = character(),
279 | sig_dig = integer(),
280 | code_id = character(),
281 | origin = character(),
282 | derivation_id = integer()),
283 | derivations = tibble(derivation_id = integer(), derivation = character()),
284 | codelist = tibble(code_id = character(), name = character(), type = character(), codes = list()),
285 | supp = tibble(dataset = character(), variable = character(), idvar = character(), qeval = character())) {
286 | # Check if there are any empty datasets that need adding
287 | is_empty_df <- as.list(environment()) %>%
288 | keep(is.null)
289 | if(length(is_empty_df) > 0) {
290 | # Adding empty datasets
291 | to_replace <- all_message() %>%
292 | #get the type each variable needs to be
293 | mutate(convert =
294 | map(.data$test, function(x){
295 | if(identical(x, .Primitive("is.numeric"))){
296 | numeric()
297 | } else if(identical(x, .Primitive("is.logical"))){
298 | logical()
299 | } else {
300 | character()
301 | }
302 | })) %>%
303 | filter(dataset %in% names(is_empty_df)) %>%
304 | group_by(dataset) %>%
305 | group_split()
306 | replaced <- to_replace %>%
307 | map(function(df){
308 | names(df$convert) <- df$var
309 | df$convert %>%
310 | as_tibble()
311 | })
312 | names(replaced) <- to_replace %>% map_chr(~unique(.$dataset))
313 | list2env(replaced, environment())
314 | }
315 | MetaCore$new(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp)
316 | }
317 |
318 |
319 |
320 | #' Select metacore object to single dataset
321 | #'
322 | #' @param .data the metacore object of dataframes
323 | #' @param dataset the specific dataset to subset by
324 | #' @param simplify return a single dataframe
325 | #'
326 | #' @return a filtered subset of the metacore object
327 | #' @export
328 | #'
329 | select_dataset <- function(.data, dataset, simplify = FALSE) {
330 |
331 | cl <- .data$clone()
332 | cl$metacore_filter(dataset)
333 |
334 | if (simplify) {
335 |
336 | test <- suppressMessages(
337 | list(
338 | cl$ds_vars,
339 | cl$var_spec,
340 | cl$value_spec,
341 | cl$derivations,
342 | select(cl$codelist, code_id, codes),
343 | cl$supp
344 | ) %>%
345 | reduce(left_join)
346 | )
347 |
348 | } else {
349 | return(cl)
350 | }
351 | }
352 |
353 |
354 |
355 | #' Get Control Term
356 | #'
357 | #' Returns the control term (a vector for permitted values and a tibble for code
358 | #' lists) for a given variable. The dataset can be optionally specified if there
359 | #' is different control terminology for different datasets
360 | #'
361 | #' @param metacode metacore object
362 | #' @param variable A variable name to get the controlled terms for. This can
363 | #' either be a string or just the name of the variable
364 | #' @param dataset A dataset name. This is not required if there is only one set
365 | #' of control terminology across all datasets
366 | #'
367 | #' @return a vector for permitted values and a 2-column tibble for codelists
368 | #' @export
369 | #'
370 | #' @importFrom rlang as_label enexpr as_name
371 | #'
372 | #' @examples
373 | #' \dontrun{
374 | #' meta_ex <- spec_to_metacore(metacore_example("p21_mock.xlsx"))
375 | #' get_control_term(meta_ex, QVAL, SUPPAE)
376 | #' get_control_term(meta_ex, "QVAL", "SUPPAE")
377 | #' }
378 | get_control_term <- function(metacode, variable, dataset = NULL){
379 | var_str <- ifelse(str_detect(as_label(enexpr(variable)), "\""),
380 | as_name(variable), as_label(enexpr(variable)))
381 | dataset_val <- ifelse(str_detect(as_label(enexpr(dataset)), "\""),
382 | as_name(dataset), as_label(enexpr(dataset))) # to make the filter more explicit
383 | if(!var_str %in% metacode$value_spec$variable){
384 | stop(paste0(var_str, " not found in the value_spec table. Please check the variable name"))
385 | }
386 | if(dataset_val == "NULL"){
387 | var_code_id <- metacode$value_spec %>%
388 | filter(variable == var_str) %>%
389 | pull(code_id) %>%
390 | unique()
391 | } else {
392 | subset_data <- metacode$value_spec %>%
393 | filter(dataset == dataset_val)
394 | if(nrow(subset_data) == 0){
395 | stop(paste0(dataset_val, " not found in the value_spec table. Please check the dataset name"))
396 | }
397 | var_code_id <- subset_data %>%
398 | filter(variable == var_str) %>%
399 | pull(code_id) %>%
400 | unique()
401 | }
402 | if(length(var_code_id) > 1){
403 | stop(paste0(var_str, " does not have a unique control term, consider spcificing a dataset"))
404 | }
405 | ct <- metacode$codelist %>%
406 | filter(code_id == var_code_id) %>%
407 | pull(codes)
408 | if(length(ct) == 0){
409 | message(paste0(var_str, " has no control terminology"))
410 | } else {
411 | return(ct[[1]])
412 | }
413 | }
414 |
415 |
416 | #' Get Dataset Keys
417 | #'
418 | #' Returns the dataset keys for a given dataset
419 | #'
420 | #' @param metacode metacore object
421 | #' @param dataset A dataset name
422 | #'
423 | #' @return a 2-column tibble with dataset key variables and key sequence
424 | #' @export
425 | #'
426 | #' @importFrom rlang as_label enexpr as_name
427 | #'
428 | #' @examples
429 | #' \dontrun{
430 | #' meta_ex <- spec_to_metacore(metacore_example("p21_mock.xlsx"))
431 | #' get_keys(meta_ex, "AE")
432 | #' get_keys(meta_ex, AE)
433 | #' }
434 | get_keys <- function(metacode, dataset){
435 | dataset_val <- ifelse(str_detect(as_label(enexpr(dataset)), "\""),
436 | as_name(dataset), as_label(enexpr(dataset))) # to make the filter more explicit
437 |
438 | subset_data <- metacode$ds_vars %>%
439 | filter(dataset == dataset_val)
440 | if(nrow(subset_data) == 0){
441 | stop(paste0(dataset_val, " not found in the ds_vars table. Please check the dataset name"))
442 | }
443 |
444 | keys <- subset_data %>%
445 | filter(!is.na(key_seq)) %>%
446 | select(variable, key_seq)
447 |
448 | keys <- keys[order(keys$key_seq),]
449 |
450 | return(keys)
451 | }
452 |
453 |
454 | #' save metacore object
455 | #'
456 | #' @param metacore_object the metacore object in memory to save to disc
457 | #' @param path file path and file name to save metacore object
458 | #'
459 | #' @return an .rda file
460 | #' @export
461 | #'
462 | save_metacore <- function(metacore_object, path = NULL) {
463 | # if no path save to working directory
464 | # with same name as object
465 | if (is.null(path)) {
466 | nm <- deparse(substitute(metacore_object))
467 | path <- paste0(nm, ".rds")
468 |
469 | # check the suffix of the path
470 | } else {
471 | suffix <- str_extract(path, "\\.\\w*$")
472 | # if the extension is .rda keep it
473 | if (suffix == ".rds") {
474 | path <- path
475 |
476 | # otherwise we need to replace it with .rda
477 | } else {
478 | prefix <- str_remove(path, "\\.\\w*$")
479 | path <- paste0(prefix, ".rds")
480 | }
481 | }
482 | saveRDS(metacore_object, path)
483 | }
484 |
485 | #' load metacore object
486 | #'
487 | #' @param path location of the metacore object to load into memory
488 | #'
489 | #' @return metacore object in memory
490 | #' @export
491 | load_metacore <- function(path = NULL) {
492 | if (is.null(path)) {
493 | rdss <- list.files(".", ".rds")
494 | if (length(rdss) == 0) {
495 | stop("please supply path to metacore object ending with extension .rds", call. = FALSE)
496 | } else {
497 | stop("metacore object path required, did you mean:",
498 | paste(" ", rdss, sep = "\n "), call. = FALSE)
499 | }
500 | }
501 | readRDS(path)
502 | }
503 |
--------------------------------------------------------------------------------
/R/spec_builder.R:
--------------------------------------------------------------------------------
1 | #' Specification document to metacore object
2 | #'
3 | #' This function takes the location of an excel specification document and reads
4 | #' it in as a meta core object. At the moment it only supports specification in
5 | #' the format of pinnacle 21 specifications. But, the section level spec builder can
6 | #' be used as building blocks for bespoke specification documents.
7 | #'
8 | #' @param path string of file location
9 | #' @param quiet Option to quietly load in, this will suppress warnings, but not
10 | #' errors
11 | #' @param where_sep_sheet Option to tell if the where is in a separate sheet,
12 | #' like in older p21 specs or in a single sheet like newer p21 specs
13 | #'
14 | #' @return given a spec document it returns a metacore object
15 | #' @export
16 | spec_to_metacore <- function(path, quiet = FALSE, where_sep_sheet = TRUE){
17 | doc <- read_all_sheets(path)
18 |
19 | if(spec_type(path) == "by_type"){
20 | ds_spec <- spec_type_to_ds_spec(doc)
21 | ds_vars <- spec_type_to_ds_vars(doc)
22 | var_spec <- spec_type_to_var_spec(doc)
23 | value_spec <- spec_type_to_value_spec(doc, where_sep_sheet = where_sep_sheet)
24 | derivations <- spec_type_to_derivations(doc)
25 | code_list <- spec_type_to_codelist(doc)
26 | if(!quiet){
27 | out <- metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist = code_list)
28 | } else{
29 | out<- suppressWarnings(metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist = code_list))
30 | message("Loading in metacore object with suppressed warnings")
31 | }
32 | } else {
33 | stop("This specification format is not currently supported. You will need to write your own reader",
34 | call. = FALSE)
35 | }
36 | out
37 | }
38 |
39 |
40 |
41 |
42 | #' Check the type of spec document
43 | #'
44 | #' @param path file location as a string
45 | #'
46 | #' @return returns string indicating the type of spec document
47 | #' @export
48 | #'
49 | spec_type <- function(path){
50 | sheets <- excel_sheets(path)
51 | if(!any(sheets %>% str_detect("[D|d]omains?|[D|d]atasets?"))){
52 | stop("File does not contain a Domain/Datasets tab, which is needed. Please either modify the spec document or write a reader (see documentation for more information)",
53 | call. = FALSE)
54 | } else if(any(sheets %>% str_detect("ADSL|DM"))){
55 | type <- "by_ds"
56 | } else if(any(sheets %>% str_detect("[V|v]ariables?"))){
57 | type <- "by_type"
58 | } else {
59 | stop("File in an unknown format. Please either modify the spec document or write a reader (see documentation for more information)",
60 | call. = FALSE)
61 | }
62 | type
63 | }
64 |
65 |
66 |
67 |
68 | #' Read in all Sheets
69 | #'
70 | #' Given a path to a file, this function reads in all sheets of an excel file
71 | #'
72 | #' @param path string of the file path
73 | #' @export
74 | #'
75 | #' @return a list of datasets
76 | read_all_sheets <- function(path){
77 | sheets <- excel_sheets(path)
78 | all_dat <- sheets %>%
79 | map(~read_excel(path, sheet = ., col_types = "text"))
80 | names(all_dat) <- sheets
81 | all_dat
82 | }
83 |
84 |
85 | #' Spec to ds_spec
86 | #'
87 | #' Creates the ds_spec from a list of datasets (optionally filtered by the sheet
88 | #' input). The named vector `cols` is used to determine which is the correct
89 | #' sheet and renames the columns
90 | #' @param doc Named list of datasets @seealso [read_all_sheets()] for exact
91 | #' format
92 | #' @param cols Named vector of column names. The column names can be regular
93 | #' expressions for more flexibility. But, the names must follow the given pattern
94 | #' @param sheet Regular expression for the sheet name
95 | #'
96 | #' @return a dataset formatted for the metacore object
97 | #' @export
98 | #'
99 | #' @family spec builders
100 | spec_type_to_ds_spec <- function(doc, cols = c("dataset" = "[N|n]ame|[D|d]ataset|[D|d]omain",
101 | "structure" = "[S|s]tructure",
102 | "label" = "[L|l]abel|[D|d]escription"), sheet = NULL){
103 | name_check <- names(cols) %in% c("dataset", "structure", "label") %>%
104 | all()
105 | if(!name_check | is.null(names(cols))){
106 | stop("Supplied column vector must be named using the following names:
107 | 'dataset', 'structure', 'label'")
108 | }
109 | if(!is.null(sheet)){
110 | sheet_ls <- str_subset(names(doc), sheet)
111 | doc <- doc[sheet_ls]
112 | }
113 |
114 | # Get missing columns
115 | missing <- col_vars()$.ds_spec %>%
116 | discard(~. %in% names(cols))
117 |
118 | create_tbl(doc, cols) %>%
119 | distinct() %>%
120 | `is.na<-`(missing)
121 | }
122 |
123 | #' Spec to ds_vars
124 | #'
125 | #' Creates the ds_vars from a list of datasets (optionally filtered by the sheet
126 | #' input). The named vector `cols` is used to determine which is the correct
127 | #' sheet and renames the columns
128 | #'
129 | #' @param doc Named list of datasets @seealso [read_all_sheets()] for exact
130 | #' format
131 | #' @param cols Named vector of column names. The column names can be regular
132 | #' expressions for more flexibility. But, the names must follow the given
133 | #' pattern
134 | #' @param sheet Regular expression for the sheet names
135 | #' @param key_seq_sep_sheet A boolean to indicate if the key sequence is on a
136 | #' separate sheet. If set to false add the key_seq column name to the `cols`
137 | #' vector.
138 | #' @param key_seq_cols names vector to get the key_sequence for each dataset
139 | #'
140 | #' @return a dataset formatted for the metacore object
141 | #' @export
142 | #'
143 | #' @family spec builders
144 | spec_type_to_ds_vars <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]omain",
145 | "variable" = "[V|v]ariable [[N|n]ame]?|[V|v]ariables?",
146 | "order" = "[V|v]ariable [O|o]rder|[O|o]rder",
147 | "keep" = "[K|k]eep|[M|m]andatory"),
148 | key_seq_sep_sheet = TRUE,
149 | key_seq_cols = c("dataset" = "Dataset",
150 | "key_seq" = "Key Variables"),
151 | sheet = "[V|v]ar|Datasets"){
152 |
153 | name_check <- names(cols) %in% c("variable", "dataset", "order",
154 | "keep", "key_seq", "core", "supp_flag") %>%
155 | all()
156 |
157 | name_check_extra <- names(key_seq_cols) %in% c("dataset", "key_seq") %>%
158 | all() %>%
159 | ifelse(key_seq_sep_sheet, ., TRUE) # Adding it cause we only want to check when sep sheet is true
160 |
161 | # Testing for names of vectors
162 | if(any(!name_check, !name_check_extra, is.null(names(cols)))){
163 | stop("Supplied column vector must be named using the following names:
164 | 'variable', 'dataset', 'order', 'keep', 'core', 'key_seq', 'supp_flag'")
165 | }
166 | # Subsetting sheets
167 | if(!is.null(sheet)){
168 | sheet_ls <- str_subset(names(doc), sheet)
169 | doc <- doc[sheet_ls]
170 | }
171 | #Get base doc
172 | out <-doc %>%
173 | create_tbl(cols)
174 |
175 | # Getting the key seq values
176 | if(key_seq_sep_sheet){
177 | key_seq_df <- doc %>%
178 | create_tbl(key_seq_cols) %>%
179 | mutate(key_seq = str_split(key_seq, ",\\s"),
180 | key_seq = map(key_seq, function(x){
181 | tibble(variable = x) %>%
182 | mutate(key_seq = row_number())
183 | })) %>%
184 | unnest(key_seq)
185 | out <- left_join(out, key_seq_df, by = c("dataset", "variable"))
186 | }
187 |
188 | # Get missing columns
189 | missing <- col_vars()$.ds_vars %>%
190 | discard(~. %in% names(out))
191 |
192 | out %>%
193 | distinct() %>%
194 | `is.na<-`(missing) %>%
195 | mutate(key_seq = as.integer(key_seq),
196 | keep = yn_to_tf(keep),
197 | core = as.character(core),
198 | order = as.numeric(order))
199 | }
200 |
201 |
202 | #' Spec to var_spec
203 | #'
204 | #' Creates the var_spec from a list of datasets (optionally filtered by the sheet
205 | #' input). The named vector `cols` is used to determine which is the correct
206 | #' sheet and renames the columns. (Note: the keep column will be converted logical)
207 | #'
208 | #' @param doc Named list of datasets @seealso [read_all_sheets()] for exact
209 | #' format
210 | #' @param cols Named vector of column names. The column names can be regular
211 | #' expressions for more flexibility. But, the names must follow the given pattern
212 | #' @param sheet Regular expression for the sheet name
213 | #'
214 | #' @return a dataset formatted for the metacore object
215 | #' @export
216 | #'
217 | #' @family spec builders
218 | spec_type_to_var_spec <- function(doc, cols = c("variable" = "[N|n]ame|[V|v]ariables?",
219 | "length" = "[L|l]ength",
220 | "label" = "[L|l]abel",
221 | "type" = "[T|t]ype",
222 | "dataset" = "[D|d]ataset|[D|d]omain",
223 | "format" = "[F|f]ormat"),
224 | sheet = "[V|v]ar"){
225 | # Check the names
226 | name_check <- names(cols) %in% c("variable", "length", "label",
227 | "type", "dataset", "common", "format") %>%
228 | all()
229 | if(!name_check | is.null(names(cols))){
230 | stop("Supplied column vector must be named using the following names:
231 | 'variable', 'length', 'label', 'type', 'dataset', 'common', 'format'
232 | If common is not avaliable it can be excluded and will be automatically filled in.
233 | Additionally, dataset is only used to clarify if information differs by domain")
234 | }
235 |
236 | # Check if sheet is specified
237 | if(!is.null(sheet)){
238 | sheet_ls <- str_subset(names(doc), sheet)
239 | doc <- doc[sheet_ls]
240 | }
241 | out <- create_tbl(doc, cols)
242 | if(!"dataset" %in% names(out)){
243 | dups <- out %>%
244 | distinct() %>%
245 | group_by(variable) %>%
246 | summarise(n = n(), .groups = "drop") %>%
247 | filter(n > 1)
248 | if(nrow(dups) > 0){
249 | dups %>%
250 | pull(variable) %>%
251 | paste(collapse = "\n") %>%
252 | paste0("The following variables are repeated with different metadata for different datasets:\n",
253 | ., "\nPlease add 'dataset' = [Name of dataset column] to your named cols vector, to correct for this") %>%
254 | stop(., call. = FALSE)
255 | }
256 | } else {
257 | if(!"common" %in% names(cols)){
258 | # Get the variable common to all datasets can only be calculated with ds present
259 | common_vars <- out %>%
260 | group_by(dataset) %>%
261 | select(dataset, variable) %>%
262 | group_split(.keep = FALSE) %>%
263 | reduce(inner_join, by = "variable") %>%
264 | mutate(common = TRUE)
265 | out <- out %>%
266 | left_join(common_vars, by = "variable") %>%
267 | replace_na(list(common = FALSE))
268 | }
269 |
270 | # Remove any multiples and add ds if different metadata for different ds's
271 | out <- out %>%
272 | group_by(variable) %>%
273 | mutate(unique = n_distinct(length, label, type),
274 | variable = if_else(unique == 1, variable,
275 | paste0(dataset, ".", variable)),
276 | length = as.numeric(length)) %>%
277 | distinct(variable, length, label, type, .keep_all = TRUE) %>%
278 | select(-dataset, -unique)
279 | }
280 |
281 | # Get missing columns
282 | missing <- col_vars()$.var_spec %>%
283 | discard(~. %in% names(out))
284 | out %>%
285 | `is.na<-`(missing) %>%
286 | distinct() %>%
287 | ungroup() %>%
288 | mutate(length = as.integer(length))
289 | }
290 |
291 | #' Spec to value_spec
292 | #'
293 | #' Creates the value_spec from a list of datasets (optionally filtered by the
294 | #' sheet input). The named vector `cols` is used to determine which is the
295 | #' correct sheet and renames the columns
296 | #'
297 | #' @param doc Named list of datasets @seealso [read_all_sheets()] for exact
298 | #' format
299 | #' @param cols Named vector of column names. The column names can be regular
300 | #' expressions for more flexibility. But, the names must follow the given
301 | #' pattern
302 | #' @param sheet Regular expression for the sheet name
303 | #' @param where_sep_sheet Boolean value to control if the where information in a
304 | #' separate dataset. If the where information is on a separate sheet, set to
305 | #' true and provide the column information with the `where_cols` inputs.
306 | #' @param where_cols Named list with an id and where field. All columns in the
307 | #' where field will be collapsed together
308 | #' @param var_sheet Name of sheet with the Variable information on it. Metacore
309 | #' expects each variable will have a row in the value_spec. Because many
310 | #' specification only have information in the value tab this is added. If the
311 | #' information already exists in the value tab of your specification set to
312 | #' NULL
313 | #'
314 | #' @return a dataset formatted for the metacore object
315 | #' @export
316 | #'
317 | #' @family spec builders
318 | spec_type_to_value_spec <- function(doc, cols = c("dataset" = "[D|d]ataset|[D|d]omain",
319 | "variable" = "[N|n]ame|[V|v]ariables?",
320 | "origin" = "[O|o]rigin",
321 | "type" = "[T|t]ype",
322 | "code_id" = "[C|c]odelist|Controlled Term",
323 | "sig_dig" = "[S|s]ignificant",
324 | "where" = "[W|w]here",
325 | "derivation_id" = "[M|m]ethod",
326 | "predecessor" = "[P|p]redecessor"),
327 | sheet = NULL,
328 | where_sep_sheet = TRUE,
329 | where_cols = c("id" = "ID",
330 | "where" = c("Variable", "Comparator", "Value")),
331 | var_sheet = "[V|v]ar"){
332 | name_check <- names(cols) %in% c("variable", "origin", "code_id", "sig_dig",
333 | "type", "dataset", "where", "derivation_id",
334 | "predecessor") %>%
335 | all()
336 |
337 | if(!name_check| is.null(names(cols))){
338 | stop("Supplied column vector must be named using the following names:
339 | 'dataset', 'variable', 'origin', 'code_id', 'type', 'where', 'sig_dig', 'derivation_id',
340 | 'predecessor'
341 | If derivation_id is not avaliable it can be excluded and dataset.variable will be used.
342 |
343 | If the where information is on a seperate sheet, put the column with cross ref as where.")
344 | }
345 |
346 | # Select a subset of sheets if specified
347 | if(!is.null(sheet)){
348 | sheet_ls <- str_subset(names(doc), sheet)
349 | doc <- doc[sheet_ls]
350 | }
351 |
352 | out <- create_tbl(doc, cols)
353 |
354 | # Does a var sheet exsist?
355 | if(!is.null(var_sheet)){
356 | var_sheet <- names(doc) %>%
357 | keep(~str_detect(., var_sheet))
358 | }
359 |
360 | # If so, add any variables not in the value sheet
361 | if(length(var_sheet) > 0){
362 | var_out <- doc[var_sheet] %>%
363 | map_dfr(function(x){
364 | var_out <- x %>%
365 | select_rename_w_dups(cols) %>%
366 | mutate(where = "TRUE")
367 | if(nrow(out) > 0){
368 | var_out %>%
369 | anti_join(out, by = "variable")
370 | } else {
371 | var_out
372 | }
373 | })
374 |
375 | # THIS ISN'T VERY PRETTY, IF SOMEONE HAS A BETTER IDEA PLEASE FIX
376 | # Needed in cause the value sheet is empty
377 | if(nrow(out) > 0 & nrow(var_out) > 0){
378 | out <- bind_rows(out, var_out)
379 | } else if(nrow(var_out) > 0) {
380 | out <- var_out
381 | } else {
382 | out
383 | }
384 |
385 | }
386 |
387 | if(where_sep_sheet & "where" %in% names(out)){
388 | where_df <- create_tbl(doc, where_cols) %>%
389 | mutate(
390 | where_new = pmap_chr(., function(...) {
391 | # Without c_across this gets a little weird
392 | # Use pmap and steal out the arg names
393 | vars <- list(...)
394 | # Filter down to only args that start with where
395 | wheres <- as.character(vars[which(str_starts(names(vars), 'where'))])
396 | # collapse it together
397 | paste(wheres, collapse=" ")
398 | })
399 | ) %>%
400 | select(id, where_new)
401 | out <- out %>%
402 | left_join(where_df, by = c("where" = "id")) %>%
403 | select(-where, where = where_new)
404 | } else if(where_sep_sheet) {
405 | warning("Not able to add where infromation from seperate sheet cause a where column is needed to cross-reference the information",
406 | call. = FALSE)
407 | }
408 |
409 | if(!"derivation_id" %in% names(cols)){
410 | out <- out %>%
411 | mutate(derivation_id =
412 | if_else(str_to_lower(.data$origin) == "assigned",
413 | paste0(dataset, ".", variable),
414 | paste0("pred.", dataset, ".", variable)))
415 | }
416 |
417 | # Get missing columns
418 | missing <- col_vars()$.value_spec %>%
419 | discard(~. %in% names(out))
420 |
421 | out %>%
422 | `is.na<-`(missing) %>%
423 | distinct() %>%
424 | mutate(sig_dig = as.integer(.data$sig_dig),
425 | derivation_id = case_when(
426 | !is.na(.data$derivation_id) ~ .data$derivation_id,
427 | str_to_lower(.data$origin) == "predecessor" ~ paste0("pred.", as.character(.data$predecessor)),
428 | str_to_lower(.data$origin) == "assigned" ~ paste0(.data$dataset, ".", .data$variable))
429 | ) %>%
430 | select(-.data$predecessor)
431 |
432 | }
433 |
434 | #' Spec to codelist
435 | #'
436 | #' Creates the value_spec from a list of datasets (optionally filtered by the
437 | #' sheet input). The named vector `*_cols` is used to determine which is the
438 | #' correct sheet and renames the columns.
439 | #' @param doc Named list of datasets @seealso [read_all_sheets()] for exact
440 | #' format
441 | #' @param codelist_cols Named vector of column names that make up the codelist.
442 | #' The column names can be regular expressions for more flexibility. But, the
443 | #' names must follow the given pattern
444 | #' @param permitted_val_cols Named vector of column names that make up the
445 | #' permitted value The column names can be regular expressions for more
446 | #' flexibility. This is optional, can be left as null if there isn't a
447 | #' permitted value sheet
448 | #' @param dict_cols Named vector of column names that make up the dictionary
449 | #' value The column names can be regular expressions for more flexibility.
450 | #' This is optional, can be left as null if there isn't a permitted value
451 | #' sheet
452 | #' @param sheets Optional, regular expressions of the sheets
453 | #' @param simplify Boolean value, if true will convert code/decode pairs that
454 | #' are all equal to a permitted value list. True by default
455 | #'
456 | #' @return a dataset formatted for the metacore object
457 | #' @export
458 | #'
459 | #' @family spec builders
460 | spec_type_to_codelist <- function(doc, codelist_cols = c("code_id" = "ID",
461 | "name" = "[N|n]ame",
462 | "code" = "^[C|c]ode|^[T|t]erm",
463 | "decode" = "[D|d]ecode"),
464 | permitted_val_cols = NULL,
465 | dict_cols = c("code_id" = "ID",
466 | "name" = "[N|n]ame",
467 | "dictionary" = "[D|d]ictionary",
468 | "version" = "[V|v]ersion"),
469 | sheets = NULL, simplify = FALSE){
470 | if(is.null(codelist_cols)){
471 | stop("Codelist column names must be provided", call. = FALSE)
472 | } else {
473 | name_check <- names(codelist_cols) %in% c("code_id", "name", "code", "decode") %>%
474 | all()
475 | if(!name_check| is.null(names(codelist_cols))){
476 | stop("Supplied column vector for codelist_cols must be named using the following names:
477 | 'code_id', 'name', 'code', 'decode'",
478 | call. = FALSE
479 | )
480 | }
481 | }
482 |
483 | if (!is.null(permitted_val_cols)){
484 | name_check <- names(permitted_val_cols) %in% c("code_id", "name", "code") %>%
485 | all()
486 | if(!name_check){
487 | stop("Supplied column vector for permitted_val_cols must be named using the following names:
488 | 'code_id', 'name', 'code'",
489 | call. = FALSE)
490 | }
491 | }
492 | if(!is.null(dict_cols)){
493 | name_check <- names(dict_cols) %in% c("code_id", "name", "dictionary", "version") %>%
494 | all()
495 | if(!name_check){
496 | stop("Supplied column vector for `dict_cols` must be named using the following names:
497 | 'code_id', 'name', 'dictionary', 'version',
498 | If a dictionary sheet isn't avaliable set `dict_cols` to NULL",
499 | call. = FALSE)
500 | }
501 | }
502 |
503 | # Select a subset of sheets if specified
504 | if(!is.null(sheets)){
505 | sheet_ls <- str_subset(names(doc), sheets)
506 | doc <- doc[sheet_ls]
507 | }
508 |
509 | # Create the base table with codes and decodes (min req output)
510 | cd_out <- create_tbl(doc, codelist_cols) %>%
511 | group_by(code_id) %>%
512 | mutate(type = case_when(simplify & all(code == decode) ~ "permitted_val",
513 | TRUE ~ "code_decode")) %>%
514 | nest(codes = c(code, decode)) %>%
515 | mutate(codes = if_else(type == "permitted_val",
516 | lapply(codes, function(df) df %>% pull(code)),
517 | codes))
518 | # If available get a permitted value sheet
519 | if(!is.null(permitted_val_cols)){
520 | pv_out <- create_tbl(doc, permitted_val_cols) %>%
521 | mutate(type = "permitted_val") %>%
522 | group_by(code_id) %>%
523 | nest(codes = c(code, decode))
524 | cd_out <- bind_rows(cd_out, pv_out)
525 | }
526 | # Add dictionary if avaliable
527 | if(!is.null(dict_cols)){
528 | dic_out <- create_tbl(doc, dict_cols) %>%
529 | mutate(type = "external_library") %>%
530 | group_by(code_id) %>%
531 | nest(codes = c(dictionary, version))
532 | cd_out <- bind_rows(cd_out, dic_out)
533 | }
534 | # Get missing columns
535 | missing <- col_vars()$.codelist %>%
536 | discard(~. %in% names(cd_out))
537 |
538 | cd_out %>%
539 | `is.na<-`(missing) %>%
540 | distinct() %>%
541 | filter(!is.na(code_id)) %>%
542 | ungroup()
543 | }
544 |
545 | #' Spec to derivation
546 | #'
547 | #' Creates the derivation table from a list of datasets (optionally filtered by
548 | #' the sheet input). The named vector `cols` is used to determine which is the
549 | #' correct sheet and renames the columns. The derivation will be used for
550 | #' "derived" origins, the comments for "assigned" origins, and predecessor for
551 | #' "predecessor" origins.
552 | #' @param doc Named list of datasets @seealso [read_all_sheets()] for exact
553 | #' format
554 | #' @param cols Named vector of column names. The column names can be regular
555 | #' expressions for more flexibility. But, the names must follow the given
556 | #' pattern
557 | #' @param var_cols Named vector of the name(s) of the origin, predecessor and
558 | #' comment columns. These do not have to be on the specified sheet.
559 | #' @param sheet Regular expression for the sheet name
560 | #'
561 | #' @return a dataset formatted for the metacore object
562 | #' @export
563 | #'
564 | #' @family spec builders
565 | #' @importFrom purrr quietly
566 | spec_type_to_derivations <- function(doc, cols = c("derivation_id" = "ID",
567 | "derivation" = "[D|d]efinition|[D|d]escription"),
568 | sheet = "Method|Derivations?",
569 | var_cols = c("dataset" = "[D|d]ataset|[D|d]omain",
570 | "variable" = "[N|n]ame|[V|v]ariables?",
571 | "origin" = "[O|o]rigin",
572 | "predecessor" = "[P|p]redecessor",
573 | "comment" = "[C|c]omment")){
574 |
575 | name_check <- names(cols) %in% c("derivation_id", "derivation") %>%
576 | all()
577 | if(!name_check| is.null(names(cols))){
578 | stop("Supplied column vector must be named using the following names:
579 | 'derivation_id', 'derivation'")
580 | }
581 |
582 | name_check <- names(var_cols) %in% c('dataset', 'variable', 'origin', 'predecessor', 'comment') %>%
583 | all()
584 | if(!name_check| is.null(names(var_cols))){
585 | stop("Supplied variable column vector must be named using the following names:
586 | 'dataset', 'variable', 'origin', 'predecessor', 'comment'")
587 | }
588 | # Get the predecessor
589 | ls_derivations <- quietly(create_tbl)(doc, var_cols)$result
590 | if(class(ls_derivations)[1] == "list"){
591 | ls_derivations <- ls_derivations %>%
592 | reduce(bind_rows)
593 | # Get the comments
594 | if(any(str_detect(names(doc), "[C|c]omment"))){
595 | comments <- doc[str_detect(names(doc), "[C|c]omment")][[1]] |>
596 | select(matches("ID|Description"))
597 | with_comments <- ls_derivations |>
598 | filter(str_to_lower(.data$origin) == "assigned") |>
599 | left_join(comments, by = c("comment" = "ID" )) |>
600 | mutate(comment = .data$Description) |>
601 | select(-.data$Description)
602 | ls_derivations <- ls_derivations |>
603 | filter(str_to_lower(.data$origin) != "assigned") |>
604 | bind_rows(with_comments)
605 | }
606 | }
607 |
608 | other_derivations <- ls_derivations %>%
609 | mutate(
610 | derivation_id = case_when(
611 | str_to_lower(.data$origin) == "predecessor" ~ paste0("pred.", as.character(.data$predecessor)),
612 | str_to_lower(.data$origin) == "assigned" ~ paste0(.data$dataset, ".", .data$variable),
613 | TRUE ~ NA_character_
614 | ),
615 | derivation = case_when(
616 | str_to_lower(.data$origin) == "predecessor" ~ as.character(.data$predecessor),
617 | str_to_lower(.data$origin) == "assigned" ~ .data$comment,
618 | TRUE ~ NA_character_
619 | )) %>%
620 | filter(!is.na(.data$derivation_id)) %>%
621 | select(.data$derivation, .data$derivation_id)
622 |
623 | # Select a subset of sheets if specified
624 | if(!is.null(sheet)){
625 | sheet_ls <- str_subset(names(doc), sheet)
626 | doc <- doc[sheet_ls]
627 | }
628 | out <- create_tbl(doc, cols)
629 |
630 | # Get missing columns
631 | missing <- col_vars()$.derivations %>%
632 | discard(~. %in% names(out))
633 |
634 |
635 | out %>%
636 | `is.na<-`(missing) %>%
637 | bind_rows(other_derivations) %>%
638 | distinct() %>%
639 | filter(!is.na(derivation_id))
640 | }
641 | ### Helper Functions
642 |
643 | #' Create table
644 | #'
645 | #' This function creates a table from excel sheets. This is mainly used
646 | #' internally for building spec readers, but is exported so others who need to
647 | #' build spec readers can use it.
648 | #' @param doc list of sheets from a excel doc
649 | #' @param cols vector of regex to get a datasets base on which columns it has.
650 | #' If the vector is named it will also rename the columns
651 | #'
652 | #' @return dataset (or list of datasets if not specific enough)
653 | #' @export
654 | create_tbl <- function(doc, cols){
655 | matches <- doc %>%
656 | keep(function(x){
657 | cols %>%
658 | map_lgl(~any(str_detect(names(x), .))) %>%
659 | all()
660 | })
661 | if(length(matches) == 0) {
662 | # Get which variable can't be matches
663 | mismatch_per_sheet <- doc %>%
664 | map(function(x){
665 | cols %>%
666 | map_lgl(~any(str_detect(names(x), .))) %>%
667 | discard(~.) # Remove the matched values
668 | })
669 | # Find the closest sheet by looking for the sheet(s) with the fewest mismatches
670 | mis_lens <- mismatch_per_sheet %>%
671 | map_int(length)
672 | closest_sheets <- mis_lens %>%
673 | keep(~ . == min(mis_lens)) %>%
674 | names()
675 | # Get the name of the sheets and which columns don't match
676 | sheets_to_error <- mismatch_per_sheet %>%
677 | keep(names(.) %in% closest_sheets)
678 |
679 | # Write out the error
680 | sheets_to_error %>%
681 | map2_chr(names(sheets_to_error), function(vars, sheet_name){
682 | paste0("Sheet '", sheet_name, "' is the closest match, but unable to match the following column(s)\n",
683 | paste(names(vars), collapse = "\n"))
684 | }) %>%
685 | paste0(collapse = "\n") %>%
686 | paste0("Unable to identify a sheet with all columns.\n", . ) %>%
687 | stop(call. = FALSE)
688 |
689 | } else if(length(matches) == 1){
690 | # Check names and write a better warning message if names don't work
691 | ds_nm <- matches[[1]] %>%
692 | names()
693 | nm_test <- cols %>%
694 | map_int(~sum(str_detect(ds_nm, .))) %>%
695 | keep(~ . != 1)
696 | if(length(nm_test) > 0) {
697 | # See if an exact match will
698 | test_exact <- cols[names(nm_test)] %>%
699 | paste0("^", ., "$") %>%
700 | map_int(~sum(str_detect(ds_nm, .))) %>%
701 | keep(~ . != 1)
702 | if(length(test_exact) == 0){
703 | cols[names(nm_test)] <- cols[names(nm_test)] %>%
704 | paste0("^", ., "$")
705 | } else {
706 | str_c(names(nm_test), " matches ",nm_test, " columns") %>%
707 | str_c(collapse = "\n ") %>%
708 | paste0("Unable to rename the following columns in ", names(matches[1]), ":\n ", .,
709 | "\nPlease check your regular expression ") %>%
710 | stop(call. = FALSE)
711 | }
712 | }
713 |
714 | # This needs to be done columnwise to allow for duplicate selection of the same column
715 | select_rename_w_dups(matches[[1]], cols)
716 |
717 | } else {
718 | sheets_mats <- matches %>%
719 | names()
720 | paste("Column names are not specific enough to identify a single sheet. The following",
721 | length(sheets_mats),
722 | "match the criteria set:", paste(sheets_mats, collapse = ", ")) %>%
723 | warning(., call. = FALSE)
724 | matches %>%
725 | map(~select_rename_w_dups(., cols))
726 | }
727 | }
728 |
729 |
730 | #' Yes No to True False
731 | #'
732 | #' @param x takes in a vector to convert
733 | #'
734 | #' @return returns a logical vector or normal vector with warning
735 | #' @noRd
736 | #'
737 | yn_to_tf <- function(x){
738 | if(all(is.na(x) | str_detect(x, regex("^y$|^n$|^yes$|^no$", ignore_case = T)))){
739 | case_when(str_detect(x, regex("^y$|^yes$", ignore_case = T)) ~ TRUE,
740 | str_detect(x, regex("^n$|^no$", ignore_case = T)) ~ FALSE,
741 | is.na(x) ~ NA)
742 | } else if(is.logical(x)){
743 | x
744 | } else {
745 | warning("Keep column needs to be True or False, please correct before converting to a Metacore object",
746 | call. = FALSE)
747 | x
748 | }
749 | }
750 |
751 |
752 | #' Select in a dataset with renames
753 | #'
754 | #' This works like select, but if there are duplicates it won't cause issues
755 | #'
756 | #' @param .data dataset to select columns and rename
757 | #' @param cols named vector
758 | #'
759 | #' @return dataset
760 | #' @noRd
761 | #' @importFrom purrr safely
762 | select_rename_w_dups <- function(.data, cols){
763 | pull_safe <- safely(~select(.x, matches(.y, ignore.case = FALSE)))
764 | cols %>%
765 | map_dfr(function(col){
766 | out <- pull_safe(.data, col) %>%
767 | .$result
768 | if(ncol(out) == 1){
769 | out <- out %>% pull(1)
770 | } else {
771 | out <- NULL
772 | }
773 | out
774 | })
775 | }
776 |
--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
1 | #' Pipe operator
2 | #'
3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
4 | #'
5 | #' @name %>%
6 | #' @rdname pipe
7 | #' @keywords internal
8 | #' @export
9 | #' @importFrom magrittr %>%
10 | #' @usage lhs \%>\% rhs
11 | NULL
12 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | #' Add a label to a vector
2 | #'
3 | #' @param x Vector to add label to
4 | #' @param label Label to add to vector
5 | #' @return Labeled vector
6 | #' @noRd
7 |
8 | add_lab <- function(x, label) {
9 | if (length(label) == 0) {
10 | label <- NULL
11 | }
12 | attr(x, "label") <- label
13 | x
14 | }
15 |
16 | #' Add Labels to Dataframe
17 | #' @param .data Dataframe that you want to add labels to
18 | #' @param ... Labeled vector of labels, with the name of the element equal to
19 | #' the name of the column and the element equaling the label, or comma-separated name-value pair
20 | #' @importFrom purrr map2
21 | #' @importFrom tibble tibble as_tibble
22 | #' @importFrom dplyr filter pull
23 | #' @return Dataframe with labels
24 | #' @noRd
25 | add_labs <- function(.data,...) {
26 | name_list <- c(...)
27 | df <- tibble(col = names(name_list), lab = name_list)
28 | .data %>%
29 | purrr::map2(names(.data), function(x, name) {
30 | label <- df %>%
31 | filter(col == name) %>%
32 | pull(lab) %>%
33 | unname()
34 | if(length(label) > 0) {
35 | add_lab(x, label)
36 | } else {
37 | x
38 | }
39 | }) %>%
40 | as_tibble()
41 | }
42 |
43 |
44 | #' Column Validation Function
45 | #'
46 | #' @param .data the dataframe to check the column for
47 | #' @param col the column to test
48 | #' @param func the function to use to assert column structure
49 | #' @param any_na_acceptable boolean, testing if the column can have missing
50 | #' @param nm name of column to check (for warning and error clarification)
51 | #'
52 | check_structure <- function(.data, col, func, any_na_acceptable, nm) {
53 |
54 | column <- as_string(col)
55 |
56 | vec <- .data %>% pull(!!col)
57 |
58 | if(any(is.na(vec)) & !any_na_acceptable) {
59 | error_message <- paste(column, "from the", nm, "table contains missing values. Actual values are needed.")
60 | warning_string <- NULL
61 | } else if (all(is.na(vec))){
62 | warning_string <- paste(column, "from the", nm,
63 | "table only contain missing values.")
64 | error_message <- NULL
65 | } else {
66 |
67 | failures <- vec %>%
68 | discard(~do.call(func, list(.))) %>%
69 | unique()
70 |
71 | all_fails <- paste(" ", failures, collapse = "\n")
72 | error_message <- NULL
73 |
74 | if (length(failures) > 0 ) {
75 |
76 | if (is.primitive(func)) {
77 |
78 | assertion_func <- prim_name(func)
79 | warning_string <- paste0(nm, "$", column, " fails ", assertion_func, " check \n")
80 |
81 | } else {
82 | warning_string <- paste0("The following words in ", nm, "$", column, " are not allowed: \n", all_fails, "\n")
83 | }
84 |
85 | } else {
86 | warning_string <- NULL
87 | }
88 |
89 | }
90 |
91 | list(warning = warning_string, error = error_message)
92 | }
93 |
94 | #' Check Words in Column
95 | #'
96 | #' @param ... permissible words in the column
97 | #' @param col the column to check for specific words
98 | check_words <- function(..., col) {
99 | accepted_words <- unlist(c(...))
100 | expr <- expr(function(col) col %in% !!accepted_words)
101 | make_function(body = expr, env = parent.frame())()
102 | }
103 |
104 | make_function <- function(args = pairlist(), body, env = parent.frame()) {
105 | eval(call("function", args, body), env)
106 | }
107 |
108 |
109 | #' Get path to metacore example
110 | #'
111 | #' metacore comes bundled with a number of sample files in its `inst/extdata`
112 | #' directory. This function make them easy to access. When testing or writing
113 | #' examples in other packages, it is best to use the 'pilot_ADaM.rda' example as
114 | #' it loads fastest.
115 | #' @param file Name of file. If `NULL`, the example files will be listed.
116 | #' @export
117 | #' @examples
118 | #' metacore_example()
119 | #' metacore_example("mock_spec.xlsx")
120 | metacore_example <- function(file = NULL) {
121 | if (is.null(file)) {
122 | dir(system.file("extdata", package = "metacore"))
123 | } else {
124 | system.file("extdata", file, package = "metacore", mustWork = TRUE)
125 | }
126 | }
127 |
--------------------------------------------------------------------------------
/R/validators.R:
--------------------------------------------------------------------------------
1 | #' Checks the variables in ds_var are in var_spec
2 | #'
3 | #' @param ds_vars ds_var table
4 | #' @param var_spec var_spec table
5 | #'
6 | #' @return writes warning to console if there is an issue
7 | #' @noRd
8 | ds_vars_check <- function(ds_vars, var_spec){
9 | var_check <- anti_join(ds_vars, var_spec, by = "variable")
10 |
11 | if(var_check %>% nrow()){
12 | var_ls <- var_check %>%
13 | pull(.data$variable) %>%
14 | unique()
15 |
16 | var_check_dbl <- ds_vars %>%
17 | filter(.data$variable %in% var_ls) %>%
18 | mutate(var_name = paste0(.data$dataset, ".", .data$variable)) %>%
19 | anti_join(var_spec, by = c("var_name" = "variable")) %>%
20 | pull(.data$variable) %>%
21 | unique()
22 |
23 | if(var_check_dbl %>% length() != 0){
24 | cat("\n")
25 | warning(
26 | "The following variable(s) do not have labels and lengths: ",
27 | paste(" ", var_check_dbl, sep = "\n "),
28 | "\n\n",
29 | call. = FALSE
30 | )
31 | }
32 | }
33 | }
34 |
35 |
36 | #' Check Values
37 | #'
38 | #'
39 | #' Check the value spec table works with the ds_var tables. All variables in the
40 | #' ds_var should be in the value_spec and all variables in value_spec should be
41 | #' in ds_vars
42 | #' @param ds_vars ds_vars table
43 | #' @param value_spec value_spec table
44 | #'
45 | #' @return writes warning to console if there is an issue
46 | #' @noRd
47 | value_check <- function(ds_vars, value_spec){
48 | value_vars <- value_spec %>%
49 | distinct(.data$dataset, .data$variable)
50 |
51 | #Check the variables in ds_vars that don't have value specs
52 | not_in_val <- anti_join(ds_vars, value_vars, by = c("dataset", "variable"))
53 | if(nrow(not_in_val) != 0){
54 | variables <- not_in_val %>%
55 | mutate(full = str_c(.data$dataset, .data$variable, sep = ".")) %>%
56 | pull(.data$full) %>%
57 | str_c(collapse = ", ")
58 | message <- paste("The following variables are in the ds_vars table, but don't have value specs:\n",
59 | variables, "\n\n")
60 | warning(message, call. = FALSE)
61 | }
62 | # Check the variables in value spec that aren't in ds_vars
63 | not_in_ds <- anti_join(value_vars, ds_vars, by = c("dataset", "variable"))
64 | if(nrow(not_in_ds) != 0){
65 | variables <- not_in_ds %>%
66 | pull(.data$variable) %>%
67 | str_c(collapse = ", ")
68 | message <- paste("The following variables are have value specifications, but aren't in the ds_vars table:\n",
69 | variables, "\n\n")
70 | warning(message, call. = FALSE)
71 | }
72 | }
73 |
74 |
75 | #' Derivation check
76 | #'
77 | #' @param value_spec value_spec table
78 | #' @param derivations derivation table
79 | #'
80 | #' @return writes warning to console if there is an issue
81 | #' @noRd
82 | derivation_check <- function(value_spec, derivations){
83 | deriv_vars <- value_spec %>%
84 | filter(!is.na(.data$derivation_id)) %>%
85 | distinct(.data$variable,.data$ derivation_id)
86 |
87 | #Check the variables that don't have derivations in derivations
88 | not_in_val <- anti_join(deriv_vars, derivations, by = c("derivation_id"))
89 | if(nrow(not_in_val) != 0){
90 | variables <- not_in_val %>%
91 | pull(.data$variable) %>%
92 | str_c(collapse = "\n ")
93 | message <- paste("The following variables have derivation ids not found in the derivations table:\n",
94 | variables, "\n\n")
95 | warning(message, call. = FALSE)
96 | }
97 | # Check the derivations in deriavtion that aren't in value spec
98 | not_in_deriv <- anti_join(derivations, deriv_vars, by = c("derivation_id"))
99 | if(nrow(not_in_deriv) != 0){
100 | deriv <- not_in_deriv %>%
101 | mutate(message = paste0(.data$derivation_id, ": ", .data$derivation)) %>%
102 | pull(.data$message) %>%
103 | str_c(collapse = "\n ")
104 | message <- paste("The following derivations are never used:\n",
105 | deriv, "\n\n")
106 | warning(message, call. = FALSE)
107 | }
108 |
109 | }
110 |
111 | #' Codelist Check
112 | #'
113 | #' @param value_spec value spec table
114 | #' @param codelist codelist table
115 | #'
116 | #' @return writes warning to console if there is an issue
117 | #' @noRd
118 | codelist_check <- function(value_spec, codelist){
119 | code_vars <- value_spec %>%
120 | filter(!is.na(.data$code_id)) %>%
121 | distinct(.data$variable, .data$code_id)
122 |
123 | #Check the variables in don't codelists have codelist
124 | not_in_val <- anti_join(code_vars, codelist, by = c("code_id"))
125 | if(nrow(not_in_val)){
126 | variables <- not_in_val %>%
127 | pull(.data$variable) %>%
128 | str_c(collapse = "\n ")
129 | message <- paste("The following variables have code ids not found in the codelist(s):\n",
130 | variables, "\n")
131 | warning(message, call. = FALSE)
132 | }
133 | # Check the code_ids in codelist that aren't in value spec
134 | not_in_cl <- anti_join(codelist, code_vars, by = c("code_id"))
135 | if(nrow(not_in_cl)){
136 | cl_nm <- not_in_cl %>%
137 | pull(.data$name) %>%
138 | str_c(collapse = "\n ")
139 | message <- paste("The following codelist(s) are never used:\n",
140 | cl_nm, "\n\n")
141 | warning(message, call. = FALSE)
142 | }
143 | }
144 |
145 |
146 | #' Check Supp
147 | #'
148 | #'
149 | #' Check the supp table works with the ds_var tables. All variables in the
150 | #' ds_var with a TRUE supp flag should be in the supp and all variables in supp
151 | #' should be in ds_vars
152 | #' @param ds_vars ds_vars table
153 | #' @param supp supp table
154 | #'
155 | #' @return writes warning to console if there is an issue
156 | #' @noRd
157 | supp_check <- function(ds_vars, supp){
158 | dist_test <- supp %>%
159 | distinct(.data$dataset, .data$variable) %>%
160 | nrow() == nrow(supp)
161 | if(!dist_test){
162 | warning("Supp table contains non-unique dataset/variable combinations")
163 | }
164 |
165 | ds_vars <- ds_vars %>%
166 | filter(.data$supp_flag)
167 |
168 | #Check the variables in ds_vars that don't have value specs
169 | not_in_supp <- anti_join(ds_vars, supp, by = c("dataset", "variable"))
170 | if(nrow(not_in_supp) != 0){
171 | variables <- not_in_supp %>%
172 | mutate(full = str_c(.data$dataset, .data$variable, sep = ".")) %>%
173 | pull(.data$full) %>%
174 | str_c(collapse = ", ")
175 | message <- paste("The following variables are in the ds_vars table and tagged as supplement, but don't have supp specs:\n",
176 | variables, "\n\n")
177 | warning(message, call. = FALSE)
178 | }
179 | # Check the variables in value spec that aren't in ds_vars
180 | not_in_ds <- anti_join(supp, ds_vars, by = c("dataset", "variable"))
181 | if(nrow(not_in_ds) != 0){
182 | variables <- not_in_ds %>%
183 | pull(.data$variable) %>%
184 | str_c(collapse = ", ")
185 | message <- paste("The following variables are have supp specifications, but aren't in the ds_vars table:\n",
186 | variables, "\n\n")
187 | warning(message, call. = FALSE)
188 | }
189 | }
190 |
191 |
192 | #' Column Names by dataset
193 | #'
194 | #' @return list of column names by dataset
195 | #' @noRd
196 | col_vars <- function(){
197 | list(.ds_spec = c("dataset", "structure", "label"),
198 | .ds_vars = c("dataset", "variable", "key_seq", "order","keep", "core", "supp_flag"),
199 | .var_spec = c("variable", "length", "label", "type", "common", "format"),
200 | .value_spec = c("dataset", "variable", "type", "origin","sig_dig", "code_id", "where", "derivation_id"),
201 | .derivations = c("derivation_id", "derivation"),
202 | .codelist= c("code_id", "name","type", "codes"),
203 | .supp = c("dataset", "variable", "idvar", "qeval"))
204 | }
205 |
206 |
207 | #' Check Variable names
208 | #'
209 | #' @param envrionment the private environment of the object
210 | #'
211 | #' @return warning messages to the console if there is an issue
212 | #' @noRd
213 | var_name_check <- function(envrionment){
214 | # Set the name as they should be
215 | col_names <- col_vars()
216 | # Get the tables and table names from the environment
217 | tbl_name <- ls(envrionment, all.names = TRUE)
218 | tbls <- map(tbl_name, get, envir = envrionment)
219 | # Checks is names match the table above, returns T if so F else. If the names
220 | # don't match, will also produce a warning of what the names should be
221 | map2_lgl(tbl_name, tbls, function(name, tbl){
222 | name
223 | if(is.null(tbl)){
224 | # Checks for null tables
225 | print_message <- name %>%
226 | str_remove("[:punct:]") %>%
227 | paste("is null")
228 | warning(print_message, call. = FALSE)
229 | FALSE
230 | } else if(!setequal(names(tbl),col_names[[name]])){
231 | # writes a message if the column names don't match
232 | print_message <- name %>%
233 | str_remove("[:punct:]") %>%
234 | paste0("'", ., "' has incorrect column names. It should be:\n",
235 | str_c(col_names[[name]], collapse = ", "), "\n")
236 | warning(print_message, call. = FALSE)
237 | FALSE
238 | } else {
239 | TRUE
240 | }
241 | }) %>%
242 | all()
243 |
244 | }
245 |
246 |
247 |
248 | #' Column Data Check <- lol horrible name
249 | #'
250 | #' @return a data frame of the datasets, column
251 | #' @noRd
252 | #'
253 | all_message <- function() {
254 | tribble(
255 | ~dataset, ~var, ~test, ~any_na_acceptable,
256 | "ds_spec", "dataset", is.character, FALSE,
257 | "ds_spec", "structure", is.character, TRUE,
258 | "ds_spec", "label", is.character, TRUE,
259 | "ds_vars", "dataset", is.character, FALSE,
260 | "ds_vars", "variable", is.character, FALSE,
261 | "ds_vars", "key_seq", is.numeric, TRUE,
262 | "ds_vars", "order", is.numeric, TRUE,
263 | "ds_vars", "keep", is.logical, TRUE,
264 | "ds_vars", "core", check_words("Expected", "Required", "Permissible", "Conditionally Required", "Conditionally Expected", NA), TRUE,
265 | "ds_vars", "supp_flag", is.logical, TRUE,
266 | "var_spec", "variable", is.character, FALSE,
267 | "var_spec", "type", is.character, TRUE,
268 | "var_spec", "length", is.numeric, TRUE,
269 | "var_spec", "label", is.character, TRUE,
270 | "var_spec", "format", is.character, TRUE,
271 | "var_spec", "common", is.logical, TRUE,
272 | "value_spec", "type", is.character, TRUE,
273 | "value_spec", "sig_dig", is.integer, TRUE,
274 | "value_spec", "origin", function(x){str_detect(x, "collected|derived|assigned|protocol|predecessor|crf.*")||is.na(x)}, TRUE,
275 | "value_spec", "code_id", is.character, TRUE,
276 | "value_spec", "dataset", is.character, FALSE,
277 | "value_spec", "where", is.character, TRUE,
278 | "value_spec", "derivation_id", is.character, TRUE,
279 | "derivations", "derivation_id", is.character, FALSE,
280 | "derivations", "derivation", is.character, TRUE,
281 | "codelist", "code_id", is.character, FALSE,
282 | "codelist", "name", is.character, TRUE,
283 | "codelist", "codes", function(x){!is.null(x)}, TRUE,
284 | "codelist", "type", is.character, TRUE,
285 | "supp", "dataset", is.character, FALSE,
286 | "supp", "variable", is.character, FALSE,
287 | "supp", "idvar", is.character, TRUE,
288 | "supp", "qeval", is.character, TRUE,
289 | )
290 | }
291 |
292 |
293 | #' Check all data frames include the correct types of columns
294 | #'
295 | #' This function checks for vector types and accepted words
296 | #'
297 | #' @param ds_spec dataset specification
298 | #' @param ds_vars dataset variables
299 | #' @param var_spec variable specification
300 | #' @param value_spec value specification
301 | #' @param derivations derivation information
302 | #' @param codelist codelist information
303 | #' @param supp supp information
304 | #'
305 | check_columns <- function(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist, supp) {
306 |
307 |
308 | messages <- purrr::pmap(all_message(),
309 | ~check_structure(
310 | get(..1), sym(..2), ..3, ..4, ..1)
311 | )
312 |
313 | # errors
314 | errors <- map(messages, "error") %>%
315 | compact() %>%
316 | paste0(., collapse = "\n\n")
317 | if(errors != "")
318 | stop(paste0(errors, "\n\n"), call. = FALSE)
319 |
320 | # warnings
321 | warnings <- map(messages, "warning") %>%
322 | compact() %>%
323 | paste0(., collapse = "\n\n")
324 | if(warnings != "")
325 | warning(paste0(warnings, "\n\n"), call. = FALSE)
326 |
327 |
328 |
329 | }
330 |
331 | #' Is metacore object
332 | #'
333 | #' @param x object to check
334 | #'
335 | #' @return `TRUE` if metacore, `FALSE` if not
336 | #' @export
337 | #'
338 | #' @examples
339 | #' # Loads in a metacore obj called metacore
340 | #' load(metacore_example("pilot_ADaM.rda"))
341 | #' is_metacore(metacore)
342 | #'
343 | is_metacore <- function(x){
344 | inherits(x, "Metacore")
345 | }
346 |
--------------------------------------------------------------------------------
/R/xml_builders.R:
--------------------------------------------------------------------------------
1 | #' Define XML to DataDef Object
2 | #'
3 | #' Given a path, this function converts the define xml to a DataDef Object
4 | #'
5 | #' @param path location of the define xml as a string
6 | #' @param quiet Option to quietly load in, this will suppress warnings, but not errors
7 | #'
8 | #' @return DataDef Object
9 | #' @export
10 | #' @importFrom xml2 read_xml xml_find_all xml_attr xml_ns_strip
11 | define_to_metacore <- function(path, quiet = FALSE){
12 |
13 | xml <- read_xml(path)
14 | xml_ns_strip(xml)
15 |
16 | define_version <- xml_find_all(xml, "//MetaDataVersion") %>%
17 | xml_attr("DefineVersion") %>%
18 | as.numeric_version()
19 |
20 |
21 | ds_spec <- xml_to_ds_spec(xml)
22 | ds_vars <- xml_to_ds_vars(xml)
23 | var_spec <- xml_to_var_spec(xml)
24 | value_spec <- xml_to_value_spec(xml)
25 | code_list <- xml_to_codelist(xml)
26 | derivations <- xml_to_derivations(xml)
27 | if(!quiet){
28 | out <- metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist = code_list)
29 | } else{
30 | out<- suppressWarnings(metacore(ds_spec, ds_vars, var_spec, value_spec, derivations, codelist = code_list))
31 | message("Loading in metacore object with suppressed warnings")
32 | }
33 | out
34 | }
35 |
36 |
37 | #' XML to Data Set Spec
38 | #'
39 | #' Creates a dataset specification, which has the domain name and label for each dataset
40 | #' @param doc xml document
41 | #'
42 | #' @return data frame with the data set specifications
43 | #' @family xml builder
44 | #' @export
45 | #' @importFrom xml2 xml_attr xml_find_first xml_text
46 | xml_to_ds_spec <- function(doc) {
47 | # Read in the dataset level nodes
48 | xml_find_all(doc, "//MetaDataVersion/ItemGroupDef[contains(@OID, 'IG')]") %>%
49 | map_dfr(function(node){
50 | tibble(
51 | dataset = xml_attr(node, "Name"),
52 | structure = xml_attr(node, "Structure"),
53 | label = xml_find_first(node, "./Description") %>% xml_text()
54 | )
55 | })
56 | }
57 |
58 |
59 | #' XML to Data Set Var table
60 | #'
61 | #' Creates the ds_vars table, which acts as a key between the datasets and the var spec
62 | #' @param doc xml document
63 | #'
64 | #' @return data frame with the dataset and variables
65 | #' @family xml builder
66 | #' @export
67 | #'
68 | xml_to_ds_vars <- function(doc) {
69 | # Each dataset is an ItemGroupDef
70 | xml_find_all(doc, "//ItemGroupDef") %>%
71 | map_dfr(function(node){
72 | # Each Variable is a Item Ref
73 | child_node <- xml_find_all(node, "./ItemRef")
74 | tibble(
75 | dataset = xml_attr(node, "Name"),
76 | oid = xml_attr(child_node, "ItemOID"),
77 | mandatory = xml_attr(child_node, "Mandatory"),
78 | key_seq = xml_attr(child_node, "KeySequence") %>%
79 | as.integer(),
80 | order = xml_attr(child_node, "OrderNumber") %>%
81 | as.integer()
82 | )
83 | }) %>%
84 | mutate(
85 | variable = id_to_var(.data$oid),
86 | keep = .data$mandatory == "Yes",
87 | core = NA_character_,
88 | supp_flag = NA
89 | ) %>%
90 | select(.data$dataset, .data$variable, .data$key_seq,
91 | .data$order, .data$keep, .data$core, .data$supp_flag,
92 | -.data$mandatory, -.data$oid)
93 | }
94 |
95 |
96 | #' XML to variable spec
97 | #'
98 | #' Takes a define xml and returns a dataset with specifications for each
99 | #' variable. The variable will just be the variable, unless the specification
100 | #' for that variable differ between datasets
101 | #'
102 | #' @param doc define xml document
103 | #'
104 | #' @return data frame with variable, length, label columns
105 | #' @family xml builder
106 | #' @export
107 | #' @importFrom xml2 xml_attr xml_find_all xml_text
108 | xml_to_var_spec <- function(doc) {
109 |
110 | # Gets the name, id, and length from the variable node and the description from the child
111 | var_info <- xml_find_all(doc, "//ItemDef") %>%
112 | map_dfr(function(node){
113 | tibble(
114 | oid = xml_attr(node,"OID") %>% as.character(),
115 | variable = xml_attr(node, "Name") %>% as.character(),
116 | type = xml_attr(node, "DataType"),
117 | length = xml_attr(node, "Length") %>% as.integer(),
118 | format = xml_attr(node, "DisplayFormat"),
119 | label = xml_find_first(node, "./Description/TranslatedText") %>%
120 | xml_text()
121 | )
122 | })
123 |
124 | possible_vars <- xml_find_all(doc, "//ItemGroupDef/ItemRef") %>%
125 | map_chr(function(node){
126 | oid = xml_attr(node, "ItemOID")
127 | })
128 |
129 | # Get for each variable, get the number of distinct lengths and labels
130 | dist_df <- var_info %>%
131 | filter(.data$oid %in% possible_vars) %>%
132 | distinct(.data$variable, .data$length, .data$label, .data$type, .keep_all = TRUE) %>%
133 | group_by(.data$variable) %>%
134 | mutate(
135 | n = n(),
136 | common = NA
137 | ) %>%
138 | ungroup()
139 |
140 | # For variables with more than one distinct label, this gets all the full
141 | # variable names with that root. Sometimes 3 variables will have the same root
142 | # (i.e. ARMCD), 2 of them will match, but one of them won't. This means the
143 | # two matching will have been collapsed to one in the distinct and we have to
144 | # bring back the one that got dropped. Cause all of them need to be DS.var
145 | full_name_vars <- dist_df %>%
146 | filter(n > 1) %>%
147 | select(.data$variable) %>%
148 | inner_join(var_info, by = "variable") %>%
149 | mutate(variable = str_remove(.data$oid, "^IT\\.")) %>%
150 | distinct()
151 |
152 | # Combine the variables that need full names with the variables that don't
153 | dist_df %>%
154 | filter(n == 1) %>%
155 | bind_rows(full_name_vars) %>%
156 | select(.data$variable, .data$type, .data$length, .data$label,
157 | .data$format, .data$common, -.data$n, -.data$oid)
158 | }
159 |
160 |
161 | #' XML to value spec
162 | #'
163 | #' Takes a define xml and pulls out the value level metadata including codelist_id's,
164 | #' defines_id's, and where clause. There is one row per variable expect when there
165 | #' is a where clause, at which point there is one row per value.
166 | #' @param doc xml document
167 | #'
168 | #' @return tibble with the value level information
169 | #' @family xml builder
170 | #' @export
171 | #'
172 | #' @importFrom xml2 xml_attr xml_find_first xml_parent xml_find_all
173 | xml_to_value_spec <- function(doc) {
174 | # Get information in the item definition
175 | item_def <- xml_find_all(doc, "//ItemDef") %>%
176 | map_dfr(function(node){
177 | tibble(
178 | oid = xml_attr(node,"OID") %>% as.character(),
179 | variable = xml_attr(node, "Name") %>% as.character(),
180 | type = xml_attr(node, "DataType"),
181 | sig_dig = xml_attr(node, "SignificantDigits") %>% as.integer(),
182 | origin = xml_find_first(node, "./def:Origin") %>% xml_attr("Type"),
183 | page_num = xml_find_first(node, "./def:Origin/def:DocumentRef/def:PDFPageRef") %>% xml_attr("PageRefs"),
184 | predecessor = xml_find_first(node, "./def:Origin") %>% xml_text(),
185 | comment_id = xml_attr(node,"CommentOID"),
186 | code_id = xml_find_first(node, "CodeListRef") %>% xml_attr("CodeListOID"),
187 | varname = xml_attr(node, "SASFieldName") %>% as.character()
188 | )
189 | }) %>%
190 | mutate(
191 | origin = if_else(.data$origin == "Collected" & !is.na(.data$page_num),
192 | paste0(.data$origin,", page_num = ", .data$page_num),
193 | .data$origin)
194 | ) %>%
195 | select(-.data$page_num)
196 |
197 | # Pull the information from the item reference only for dataset variable, not where level information
198 | derivations <- xml_find_all(doc, "//ItemGroupDef/ItemRef") %>%
199 | map_dfr(function(node){
200 | tibble(
201 | oid = xml_attr(node, "ItemOID") %>% as.character(),
202 | dataset = xml_parent(node) %>% xml_attr("Name") %>% as.character(),
203 | derivation_id = xml_attr(node, "MethodOID")
204 | )
205 | })
206 | # Combine all the item information but
207 | item_info <- left_join(derivations, item_def, by = "oid")
208 |
209 | where_to_merge <- xml_find_all(doc, "//def:ValueListDef/ItemRef") %>%
210 | map_dfr(function(node){
211 | tibble(
212 | oid = xml_parent(node) %>% xml_attr("OID") %>% as.character(),
213 | item_oid = xml_attr(node, "ItemOID"),
214 | ord = xml_attr(node, "OrderNumber"),
215 | where_oid = xml_find_all(node, "./def:WhereClauseRef") %>%
216 | xml_attr("WhereClauseOID"),
217 | derivation_id = xml_attr(node, "MethodOID")
218 | )
219 | }
220 | )
221 |
222 | where_eqs <- xml_find_all(doc, "//def:WhereClauseDef[@OID]/RangeCheck") %>%
223 | map_dfr(function(node){
224 | tibble(
225 | where_oid = xml_parent(node) %>% xml_attr("OID"),
226 | left = xml_attr(node, "ItemOID"),
227 | test = xml_attr(node, "Comparator"),
228 | right = xml_find_all(node, "./CheckValue") %>% xml_text()
229 | )
230 | }
231 | )
232 | # create 0x4 tibble if where_eqs is 0x0
233 | # tmp workaround until below bug is resolved in purrr
234 | # https://github.com/tidyverse/purrr/issues/824
235 | if(nrow(where_eqs) == 0){
236 | where_eqs <- tibble(where_oid=character(),
237 | left=character(),
238 | test=character(),
239 | right = character())
240 | }
241 |
242 | if(nrow(where_to_merge) == 0){
243 | where_eqs <- where_eqs %>%
244 | mutate(item_oid = .data$left,
245 | derivation_id = paste0("MT", str_remove(.data$left, "IT"), ".", .data$right),
246 | ord = NA,
247 | oid = .data$left) %>%
248 | left_join(item_def, by = c("oid")) %>%
249 | left_join(select(derivations, -.data$derivation_id), by = c("oid"))
250 |
251 | } else{
252 | where_eqs<- full_join(where_to_merge, where_eqs, by = "where_oid") %>%
253 | left_join(item_def, by = c("item_oid" = "oid")) %>%
254 | # Allow for merging with the derivations to get the dataset
255 | mutate(oid = paste0("IT", str_remove(.data$oid, "^VL")),
256 | variable = .data$varname) %>%
257 | left_join(select(derivations, -.data$derivation_id), by = c("oid"))
258 | }
259 |
260 | all_where_eqs <- where_eqs %>%
261 | group_by(.data$where_oid) %>%
262 | mutate(var = str_extract(.data$left, "\\w*$"),
263 | right = paste0("'", .data$right, "'"),
264 | test = case_when(.data$test == "EQ" ~ "==",
265 | .data$test == "LT" ~ "<",
266 | .data$test == "LE" ~ "<=",
267 | .data$test == "GT" ~ ">",
268 | .data$test == "GE" ~ ">=",
269 | .data$test == "NE" ~ "!=",
270 | TRUE ~ .data$test),
271 | eq = case_when( test == "IN" ~ paste(.data$var, "%in%", "c(",
272 | paste(.data$right, collapse = ","),
273 | ")"),
274 | test == "NOTIN" ~ paste("!", .data$var, "%in%", "c(",
275 | paste(.data$right, collapse = ","),
276 | ")"),
277 | TRUE ~ paste(.data$var, .data$test, .data$right, collapse = " & "))
278 | ) %>%
279 | select(-.data$left, -.data$var, -.data$test, -.data$right) %>%
280 | distinct() %>%
281 | group_by(.data$item_oid, .data$derivation_id) %>%
282 | mutate(full_eq = str_c(.data$eq, collapse = "||")) %>%
283 | filter(!is.na(.data$item_oid)) %>%
284 | ungroup() %>%
285 | select(-.data$eq, where = .data$full_eq, .data$derivation_id,
286 | -.data$where_oid, -.data$ord, -.data$item_oid)
287 |
288 | val_spec <- item_info %>%
289 | anti_join(all_where_eqs, by = c("oid")) %>% #remove any variables with a where
290 | bind_rows(all_where_eqs) %>%
291 | mutate(derivation_id = case_when(
292 | .data$origin == "Predecessor" & !is.na(.data$predecessor) ~ .data$predecessor,
293 | .data$origin == "Assigned" & !is.na(.data$comment_id) ~ .data$comment_id,
294 | TRUE ~ .data$derivation_id)) %>%
295 | select(.data$dataset, .data$variable, .data$code_id, .data$derivation_id,
296 | .data$type, .data$origin, .data$where, .data$sig_dig,
297 | -.data$predecessor, -.data$comment_id, -.data$varname,
298 | -.data$oid)
299 |
300 | val_spec
301 | }
302 |
303 |
304 | #' XML to code list
305 | #'
306 | #' Reads in a define xml and creates a code_list table. The code_list table is a
307 | #' nested tibble where each row is a code list or permitted value list. The code
308 | #' column contains a vector of a tibble depending on if it is a permitted values
309 | #' or code list
310 | #'
311 | #' @param doc xml document
312 | #'
313 | #' @return a tibble containing the code list and permitted value information
314 | #' @family xml builder
315 | #' @export
316 | xml_to_codelist <- function(doc) {
317 | cl_pv <- xml_find_all(doc, "//CodeList") %>%
318 | map_dfr(function(node){
319 | #Values within the code/decode
320 | node_children_EI <- xml_find_all(node, "./EnumeratedItem")
321 | node_children_CL <- xml_find_all(node, "./CodeListItem")
322 |
323 | decodes <- c(xml_find_all(node_children_EI, "./Decode") %>% xml_text(),
324 | xml_find_all(node_children_CL, "./Decode") %>% xml_text())
325 | if(length(decodes) == 0){
326 | decodes <- NA_character_
327 | }
328 |
329 | tibble(
330 | code_id = xml_attr(node, "OID"),
331 | name = xml_attr(node, "Name"),
332 | code = c(xml_attr(node_children_EI, "CodedValue"),
333 | xml_attr(node_children_CL, "CodedValue")),
334 | decode = decodes
335 | )
336 | }) %>%
337 | group_by(code_id) %>%
338 | mutate(type = if_else(all(is.na(decode)),
339 | "permitted_val", "code_decode"))
340 |
341 | permitted_val <- cl_pv %>%
342 | filter(type == "permitted_val") %>%
343 | select(-.data$decode) %>%
344 | nest(codes = c(.data$code))
345 |
346 | code_decode <- cl_pv %>%
347 | filter(type == "code_decode") %>%
348 | nest(codes = c(.data$code, .data$decode))
349 |
350 |
351 | external_libs <- xml_find_all(doc, "//CodeList/ExternalCodeList") %>%
352 | map_dfr(function(node){
353 | tibble(
354 | code_id = xml_parent(node) %>% xml_attr("OID"),
355 | name = xml_parent(node) %>% xml_attr("Name"),
356 | dictionary = xml_attr(node, "Dictionary"),
357 | version = xml_attr(node, "Version"),
358 | type = "external_library"
359 | )
360 | })
361 | if(nrow(external_libs) > 0){
362 | external_libs <- external_libs |>
363 | nest(codes = c(.data$dictionary, .data$version))
364 | }
365 |
366 |
367 | # Combinging the code decode with the permitted values
368 | bind_rows(code_decode, permitted_val, external_libs) %>%
369 | ungroup()
370 | }
371 |
372 |
373 |
374 | #' XML to derivation table
375 | #'
376 | #' This reads in a xml document and gets all the derivations/comments. These can
377 | #' be cross referenced to variables using the derivation_id's
378 | #' @param doc xml document
379 | #'
380 | #' @return dataframe with derivation id's and derivations
381 | #' @family xml builder
382 | #' @export
383 | #'
384 | xml_to_derivations <- function(doc) {
385 |
386 | derivation <-
387 | xml_find_all(doc, "//MethodDef") %>%
388 | map_dfr(function(node){
389 | tibble(derivation_id = xml_attr(node, "OID"),
390 | derivation = xml_find_first(node, "./Description/TranslatedText") %>%
391 | xml_text())
392 | })
393 |
394 | comment <-
395 | xml_find_all(doc, "//def:CommentDef") %>%
396 | map_dfr(function(node){
397 | tibble(derivation_id = xml_attr(node, "OID"),
398 | derivation = xml_find_first(node, "./Description/TranslatedText") %>%
399 | xml_text())
400 | })
401 |
402 |
403 | predecessor <- xml_find_all(doc, "//ItemDef") %>%
404 | map_dfr(function(node){
405 | tibble(
406 | derivation_id = xml_find_first(node, "./def:Origin") %>% xml_text(),
407 | derivation = derivation_id
408 | )
409 | }) %>%
410 | filter(!is.na(.data$derivation) & str_length(.data$derivation) > 0)
411 |
412 | bind_rows(derivation,
413 | comment,
414 | predecessor) %>%
415 | distinct()
416 | }
417 |
--------------------------------------------------------------------------------
/R/xml_helpers.R:
--------------------------------------------------------------------------------
1 |
2 | #' id to dataset
3 | #'
4 | #' @param id vector of id's
5 | #'
6 | #' @return vector of datasets
7 | #' @noRd
8 | id_to_ds <- function(id) {
9 | id %>% str_extract("(?<=^IT\\.)[:alnum:]+(?=\\..*)")
10 | }
11 |
12 | #' id to variable
13 | #'
14 | #' @param id vector of id's
15 | #' @param ds vector of ds's
16 | #'
17 | #' @return vector of variable names
18 | #' @noRd
19 | id_to_var <- function(id) {
20 | ds <- id %>% str_extract("(?<=^IT\\.)[:alnum:]+(?=\\..*)")
21 | extract <- if_else(is.na(ds), "(?<=^IT\\.)[:alnum:]*",
22 | str_c("(?<=^IT\\.", ds, "\\.)[:alnum:]*")
23 | )
24 | id %>%
25 | str_extract(extract)
26 | }
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | #' @import R6
2 | #' @importFrom tidyr replace_na nest unnest
3 | #' @importFrom tidyselect everything matches
4 | #' @importFrom dplyr right_join n_distinct row_number case_when n if_else arrange rowwise anti_join inner_join bind_rows distinct mutate full_join left_join group_by summarise filter pull select group_split ungroup
5 | #' @importFrom tibble tibble tribble
6 | #' @importFrom stringr str_replace str_count str_remove_all str_remove str_detect str_subset str_length str_replace_na str_replace_all str_extract str_c str_trim str_split str_starts regex
7 | #' @importFrom rlang !! as_string expr sym prim_name
8 | #' @importFrom purrr keep discard reduce compact map map_chr map_lgl map_dfr map_int map2_lgl map2_chr pmap_chr
9 | #' @importFrom stats var na.omit
10 | #' @importFrom readxl excel_sheets read_excel
11 | #' @importFrom tibble tibble as_tibble
12 | #' @importFrom readxl excel_sheets read_excel
13 |
14 | NULL
15 |
16 | globalVariables(c("private",
17 | "self",
18 | "keep",
19 | ".",
20 | ".data",
21 | "code",
22 | "code_id",
23 | "codes",
24 | "core",
25 | "dataset",
26 | "decode",
27 | "derivation_id",
28 | "dictionary",
29 | "id",
30 | "key_seq",
31 | "lab",
32 | "label",
33 | "matches",
34 | "spec_type_to_code_list",
35 | "type",
36 | "variable",
37 | "where",
38 | "where_new",
39 | "var1",
40 | "n_lab"))
41 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r, include = FALSE}
8 | knitr::opts_chunk$set(
9 | collapse = TRUE,
10 | comment = "#>",
11 | fig.path = "man/figures/README-"
12 | )
13 | ```
14 |
15 | # metacore
16 |
17 |
18 | [
](https://RValidationHub.slack.com)
19 | [](https://github.com/atorus-research/xportr/actions?workflow=R-CMD-check)
20 | [
](https://app.codecov.io/gh/atorus-research/metacore)
21 | [
](https://github.com/atorus-research/metacore/blob/master/LICENSE)
22 | [](https://lifecycle.r-lib.org/articles/stages.html#experimental-1)
23 | [](https://CRAN.R-project.org/package=metacore)
24 |
25 |
26 | Programming for clinical trial data analysis tends to be very standardized. With data standards such as [CDISC](https://www.cdisc.org/), expectations tend to be very clearly defined. Within these programming activities, there's ample room for the use of metadata. Metadata can be used for several different purposes, such as applying dataset attributes, establishing sort sequences, working with controlled terminology, and more. Despite CDISC standards, organizations tend to have their own means of storing metadata, be it in excel spreadsheets, databases, and more.
27 |
28 | The purpose of metacore is to establish a common foundation for the use of metadata within an R session. This is done by creating an R object that can hold the necessary data in a standardized, immutable structure (using R6) that makes it easy to extract out necessary information when needed. Users can read in their metadata from their various sources. To make this easy, we've provided some helper functions - and even have readers that can read directly from Define.xml 2.0. By establishing a common and consistent object in memory, further packages that support these work flows can have a common foundation upon which tools can be built that leverage metadata in the future. This reduces the need to hold different data structures containing metadata and instead allows programs to pull this information from a centralized source.
29 |
30 | ## Installation
31 |
32 | You can install the current development version of metacore from [github](https://github.com/atorus-research/metacore) with:
33 |
34 | ``` r
35 | devtools::install_github("atorus-research/metacore")
36 | ```
37 |
38 | ## Structure
39 |
40 | A metacore object is made-up of 6 different tables, which are connected with a series of identifying columns. The goal of these tables is to normalize the information as much as possible, while keeping together like information. Each table has a basic theme to make them easier to remember. They are as follows:
41 |
42 | - **ds_spec**: Contains dataset level information
43 |
44 | - **ds_vars**: Bridges the dataset and variable level information
45 |
46 | - **var_spec**: Contains variable level information
47 |
48 | - **value_spec**: Contains value level information
49 |
50 | - **derivations**: Contains all derivations
51 |
52 | - **codelist**: Contains information about code/decodes, permitted values and external libraries
53 |
54 | - **supp**: Contains information specific to supplemental variables
55 |
56 | Here is a schema of how all this fits together:
57 |
58 | 
59 |
60 | ### ds_spec
61 |
62 | This table covers the basic information about each dataset. There is only a single row per dataset, with the following information:
63 |
64 | - *dataset*: The abbreviated name of the dataset (e.g. AE)
65 |
66 | - *structure*: Value structure of the dataset as a sting
67 |
68 | - *label*: Dataset label
69 |
70 | ### ds_vars
71 |
72 | This table contains the information that bridges between purely dataset level and purely variable level. There is one row per dataset per variable:
73 |
74 | - *dataset*: The abbreviated name of the dataset. This will match to the name in **ds_spec**
75 |
76 | - *variable*: Variable name
77 |
78 | - *key_seq*: Sequence key, which are the variables used to order a dataset. This is a column of integers, where 1 is the first sorting variable and 2 is the second etc. If the variable is not used in sorting it will be left `NA`
79 |
80 | - *order*: Order sets the order of the columns to appear in the dataset. This is also a numeric value
81 |
82 | - *keep*: Logical value about if the variable needs to be kept
83 |
84 | - *core*: ADaM core, which should be one of the following values: "Expected", "Required", "Permissible", "Conditionally Required", "Conditionally Expected", or NA. For more information about core see [CDISC](https://www.cdisc.org/standards/foundational/adam)
85 |
86 | - *supp_flag*: Logical to determine if the variable is in the supplemental datasets
87 |
88 | ### var_spec
89 |
90 | This table contains the purely variable level information. The goal is there is a single row per variable, which is common across all datasets. This helps ensure variables follow the CDISC standard. But, this isn't always possible, so if information for a given variable differs across datasets, the variable will be recorded as dataset.variable in the variable column.
91 |
92 | - *variable*: Variable name, which should match the name in **ds_spec**. Unless the variable needs to be duplicated, then the name will be a combination of the the dataset name and variable name from **ds_spec** (dataset.variable)
93 |
94 | - *type*: Variable class
95 |
96 | - *length*: Variable length (while not relevant to R datasets, this is important for when creating XPT files)
97 |
98 | - *label*: Variable label
99 |
100 | - *common*: Common across ADaM datasets
101 |
102 | - *format*: Variable format
103 |
104 | ### value_spec
105 |
106 | This table contains the information the information at the value level. There will be at least one row per dataset/variable combination. There is more than one row per dataset/variable combination if the combination has values which have differing metadata. For instance LBORRES that are different data types depending on the value. The information contained are as follows:
107 |
108 | - *dataset*: The abbreviated name of the dataset. This will match to the name in **ds_spec**
109 |
110 | - *variable*: Variable name. This will match to the name in **ds_vars**
111 |
112 | - *type*: String of the value type
113 |
114 | - *origin*: Origin of the value
115 |
116 | - *sig_dig*: Significant digits of the value
117 |
118 | - *code_id*: ID for the code list to match the id in the **codelist** table
119 |
120 | - *where*: Value of the variable
121 |
122 | - *derivation_id*: ID for the derivation to match with the **derivation** table
123 |
124 | ### derivations
125 |
126 | This table has all the derivation information, with one row per derivation ID and the following information:
127 |
128 | - *derivation_id*: The ID, which should match to **value_spec**
129 |
130 | - *derivation*: Text describing the derivation
131 |
132 | ### codelist
133 |
134 | This table contains the code lists, permitted value lists, and external libraries nested within a tibble. There is only a single row per list/library, with the following information:
135 |
136 | - *code_id*: the ID used to identify the code list. This should be the same as the *code_id* in **val_spec**
137 |
138 | - *name*: Name of the code list
139 |
140 | - *code*: A list of tibbles (for code / decode combinations) and vectors (for permitted values and libraries), which contain all the codes
141 |
142 | - *type*: An indicator of if the information in the code column is a code/decode table, permitted value, or external library
143 |
144 |
145 |
146 | ### supp
147 |
148 | This table contains the information needed to create supplemental tables. If you want to add a variable which will go into a supplemental qualifier then you can create as normal (i.e. label information going to the **var_spec** table and derivation and origin going into the **value_spec** table), but you need to flag it as supplemental in the **ds_vars** table and add a row to the **supp** table. There is only a single row per dataset/variable, with the following information:
149 |
150 | - *dataset*: The abbreviated name of the dataset. This will match to the name in **ds_spec**
151 |
152 | - *variable*: Variable name. This will match to the name in **ds_spec**
153 |
154 | - *idvar*: ID variable used for the supplemental variable. Can be left missing if not needed
155 |
156 | - *qeval*: Evaluator for the supplemental variable
157 |
158 | To get more information about the metacore objects and how to build a specification reader, please see our vignettes.
159 |
160 |
161 | 
162 |
163 | ## Future Development
164 |
165 | This is an alpha release of this package, so if you have ideas on future improvements please add them to the issue log.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # metacore
5 |
6 |
7 |
8 | [
](https://RValidationHub.slack.com)
9 | [](https://github.com/atorus-research/xportr/actions?workflow=R-CMD-check)
11 | [
](https://app.codecov.io/gh/atorus-research/metacore)
12 | [
](https://github.com/atorus-research/metacore/blob/master/LICENSE)
13 | [](https://lifecycle.r-lib.org/articles/stages.html#experimental-1)
15 | [](https://CRAN.R-project.org/package=metacore)
17 |
18 |
19 | Programming for clinical trial data analysis tends to be very
20 | standardized. With data standards such as
21 | [CDISC](https://www.cdisc.org/), expectations tend to be very clearly
22 | defined. Within these programming activities, there’s ample room for the
23 | use of metadata. Metadata can be used for several different purposes,
24 | such as applying dataset attributes, establishing sort sequences,
25 | working with controlled terminology, and more. Despite CDISC standards,
26 | organizations tend to have their own means of storing metadata, be it in
27 | excel spreadsheets, databases, and more.
28 |
29 | The purpose of metacore is to establish a common foundation for the use
30 | of metadata within an R session. This is done by creating an R object
31 | that can hold the necessary data in a standardized, immutable structure
32 | (using R6) that makes it easy to extract out necessary information when
33 | needed. Users can read in their metadata from their various sources. To
34 | make this easy, we’ve provided some helper functions - and even have
35 | readers that can read directly from Define.xml 2.0. By establishing a
36 | common and consistent object in memory, further packages that support
37 | these work flows can have a common foundation upon which tools can be
38 | built that leverage metadata in the future. This reduces the need to
39 | hold different data structures containing metadata and instead allows
40 | programs to pull this information from a centralized source.
41 |
42 | ## Installation
43 |
44 | You can install the current development version of metacore from
45 | [github](https://github.com/atorus-research/metacore) with:
46 |
47 | ``` r
48 | devtools::install_github("atorus-research/metacore")
49 | ```
50 |
51 | ## Structure
52 |
53 | A metacore object is made-up of 6 different tables, which are connected
54 | with a series of identifying columns. The goal of these tables is to
55 | normalize the information as much as possible, while keeping together
56 | like information. Each table has a basic theme to make them easier to
57 | remember. They are as follows:
58 |
59 | - **ds_spec**: Contains dataset level information
60 |
61 | - **ds_vars**: Bridges the dataset and variable level information
62 |
63 | - **var_spec**: Contains variable level information
64 |
65 | - **value_spec**: Contains value level information
66 |
67 | - **derivations**: Contains all derivations
68 |
69 | - **codelist**: Contains information about code/decodes, permitted
70 | values and external libraries
71 |
72 | - **supp**: Contains information specific to supplemental variables
73 |
74 | Here is a schema of how all this fits together:
75 |
76 | 
77 |
78 | ### ds_spec
79 |
80 | This table covers the basic information about each dataset. There is
81 | only a single row per dataset, with the following information:
82 |
83 | - *dataset*: The abbreviated name of the dataset (e.g. AE)
84 |
85 | - *structure*: Value structure of the dataset as a sting
86 |
87 | - *label*: Dataset label
88 |
89 | ### ds_vars
90 |
91 | This table contains the information that bridges between purely dataset
92 | level and purely variable level. There is one row per dataset per
93 | variable:
94 |
95 | - *dataset*: The abbreviated name of the dataset. This will match to the
96 | name in **ds_spec**
97 |
98 | - *variable*: Variable name
99 |
100 | - *key_seq*: Sequence key, which are the variables used to order a
101 | dataset. This is a column of integers, where 1 is the first sorting
102 | variable and 2 is the second etc. If the variable is not used in
103 | sorting it will be left `NA`
104 |
105 | - *order*: Order sets the order of the columns to appear in the dataset.
106 | This is also a numeric value
107 |
108 | - *keep*: Logical value about if the variable needs to be kept
109 |
110 | - *core*: ADaM core, which should be one of the following values:
111 | “Expected”, “Required”, “Permissible”, “Conditionally Required”,
112 | “Conditionally Expected”, or NA. For more information about core see
113 | [CDISC](https://www.cdisc.org/standards/foundational/adam)
114 |
115 | - *supp_flag*: Logical to determine if the variable is in the
116 | supplemental datasets
117 |
118 | ### var_spec
119 |
120 | This table contains the purely variable level information. The goal is
121 | there is a single row per variable, which is common across all datasets.
122 | This helps ensure variables follow the CDISC standard. But, this isn’t
123 | always possible, so if information for a given variable differs across
124 | datasets, the variable will be recorded as dataset.variable in the
125 | variable column.
126 |
127 | - *variable*: Variable name, which should match the name in **ds_spec**.
128 | Unless the variable needs to be duplicated, then the name will be a
129 | combination of the the dataset name and variable name from **ds_spec**
130 | (dataset.variable)
131 |
132 | - *type*: Variable class
133 |
134 | - *length*: Variable length (while not relevant to R datasets, this is
135 | important for when creating XPT files)
136 |
137 | - *label*: Variable label
138 |
139 | - *common*: Common across ADaM datasets
140 |
141 | - *format*: Variable format
142 |
143 | ### value_spec
144 |
145 | This table contains the information the information at the value level.
146 | There will be at least one row per dataset/variable combination. There
147 | is more than one row per dataset/variable combination if the combination
148 | has values which have differing metadata. For instance LBORRES that are
149 | different data types depending on the value. The information contained
150 | are as follows:
151 |
152 | - *dataset*: The abbreviated name of the dataset. This will match to the
153 | name in **ds_spec**
154 |
155 | - *variable*: Variable name. This will match to the name in **ds_vars**
156 |
157 | - *type*: String of the value type
158 |
159 | - *origin*: Origin of the value
160 |
161 | - *sig_dig*: Significant digits of the value
162 |
163 | - *code_id*: ID for the code list to match the id in the **codelist**
164 | table
165 |
166 | - *where*: Value of the variable
167 |
168 | - *derivation_id*: ID for the derivation to match with the
169 | **derivation** table
170 |
171 | ### derivations
172 |
173 | This table has all the derivation information, with one row per
174 | derivation ID and the following information:
175 |
176 | - *derivation_id*: The ID, which should match to **value_spec**
177 |
178 | - *derivation*: Text describing the derivation
179 |
180 | ### codelist
181 |
182 | This table contains the code lists, permitted value lists, and external
183 | libraries nested within a tibble. There is only a single row per
184 | list/library, with the following information:
185 |
186 | - *code_id*: the ID used to identify the code list. This should be the
187 | same as the *code_id* in **val_spec**
188 |
189 | - *name*: Name of the code list
190 |
191 | - *code*: A list of tibbles (for code / decode combinations) and vectors
192 | (for permitted values and libraries), which contain all the codes
193 |
194 | - *type*: An indicator of if the information in the code column is a
195 | code/decode table, permitted value, or external library
196 |
197 | ### supp
198 |
199 | This table contains the information needed to create supplemental
200 | tables. If you want to add a variable which will go into a supplemental
201 | qualifier then you can create as normal (i.e. label information going to
202 | the **var_spec** table and derivation and origin going into the
203 | **value_spec** table), but you need to flag it as supplemental in the
204 | **ds_vars** table and add a row to the **supp** table. There is only a
205 | single row per dataset/variable, with the following information:
206 |
207 | - *dataset*: The abbreviated name of the dataset. This will match to the
208 | name in **ds_spec**
209 |
210 | - *variable*: Variable name. This will match to the name in **ds_spec**
211 |
212 | - *idvar*: ID variable used for the supplemental variable. Can be left
213 | missing if not needed
214 |
215 | - *qeval*: Evaluator for the supplemental variable
216 |
217 | To get more information about the metacore objects and how to build a
218 | specification reader, please see our vignettes.
219 |
220 | 
221 |
222 | ## Future Development
223 |
224 | This is an alpha release of this package, so if you have ideas on future
225 | improvements please add them to the issue log.
226 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | destination: docs
2 |
3 | template:
4 | params:
5 | bootswatch: yeti
6 | opengraph:
7 | image:
8 | src: man/figures/metacore.PNG
9 | alt: "metacore Hex Sticker"
10 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## R CMD check results
2 | There were no ERRORs or WARNINGs.
3 |
4 | ## Downstream dependencies
5 | I have checked with the dependencies for this package
6 |
--------------------------------------------------------------------------------
/inst/extdata/SDTM_spec_CDISC_pilot.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/inst/extdata/SDTM_spec_CDISC_pilot.xlsx
--------------------------------------------------------------------------------
/inst/extdata/mock_spec.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/inst/extdata/mock_spec.xlsx
--------------------------------------------------------------------------------
/inst/extdata/p21_mock.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/inst/extdata/p21_mock.xlsx
--------------------------------------------------------------------------------
/inst/extdata/pilot_ADaM.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/inst/extdata/pilot_ADaM.rda
--------------------------------------------------------------------------------
/inst/extdata/pilot_SDTM.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/inst/extdata/pilot_SDTM.rda
--------------------------------------------------------------------------------
/man/MetaCore_filter.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/metacore.R
3 | \name{MetaCore_filter}
4 | \alias{MetaCore_filter}
5 | \title{Select method to subset by a single dataframe}
6 | \usage{
7 | MetaCore_filter(value)
8 | }
9 | \arguments{
10 | \item{value}{the dataframe to subset by}
11 | }
12 | \description{
13 | Select method to subset by a single dataframe
14 | }
15 |
--------------------------------------------------------------------------------
/man/check_columns.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/validators.R
3 | \name{check_columns}
4 | \alias{check_columns}
5 | \title{Check all data frames include the correct types of columns}
6 | \usage{
7 | check_columns(
8 | ds_spec,
9 | ds_vars,
10 | var_spec,
11 | value_spec,
12 | derivations,
13 | codelist,
14 | supp
15 | )
16 | }
17 | \arguments{
18 | \item{ds_spec}{dataset specification}
19 |
20 | \item{ds_vars}{dataset variables}
21 |
22 | \item{var_spec}{variable specification}
23 |
24 | \item{value_spec}{value specification}
25 |
26 | \item{derivations}{derivation information}
27 |
28 | \item{codelist}{codelist information}
29 |
30 | \item{supp}{supp information}
31 | }
32 | \description{
33 | This function checks for vector types and accepted words
34 | }
35 |
--------------------------------------------------------------------------------
/man/check_structure.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{check_structure}
4 | \alias{check_structure}
5 | \title{Column Validation Function}
6 | \usage{
7 | check_structure(.data, col, func, any_na_acceptable, nm)
8 | }
9 | \arguments{
10 | \item{.data}{the dataframe to check the column for}
11 |
12 | \item{col}{the column to test}
13 |
14 | \item{func}{the function to use to assert column structure}
15 |
16 | \item{any_na_acceptable}{boolean, testing if the column can have missing}
17 |
18 | \item{nm}{name of column to check (for warning and error clarification)}
19 | }
20 | \description{
21 | Column Validation Function
22 | }
23 |
--------------------------------------------------------------------------------
/man/check_words.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{check_words}
4 | \alias{check_words}
5 | \title{Check Words in Column}
6 | \usage{
7 | check_words(..., col)
8 | }
9 | \arguments{
10 | \item{...}{permissible words in the column}
11 |
12 | \item{col}{the column to check for specific words}
13 | }
14 | \description{
15 | Check Words in Column
16 | }
17 |
--------------------------------------------------------------------------------
/man/checks.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/checks.R
3 | \name{check_inconsistent_labels}
4 | \alias{check_inconsistent_labels}
5 | \alias{check_inconsistent_types}
6 | \alias{check_inconsistent_formats}
7 | \title{Optional checks to consistency of metadata}
8 | \usage{
9 | check_inconsistent_labels(metacore)
10 |
11 | check_inconsistent_types(metacore)
12 |
13 | check_inconsistent_formats(metacore)
14 | }
15 | \arguments{
16 | \item{metacore}{metacore object to check}
17 | }
18 | \value{
19 | If all variables are consistent it will return a message. If there
20 | are inconsistencies it will return a message and a dataset of the variables
21 | with inconsistencies.
22 | }
23 | \description{
24 | These functions check to see if values (e.g labels, formats)
25 | that should be consistent for a variable across all data are actually
26 | consistent.
27 | }
28 | \examples{
29 | ## EXAMPLE WITH DUPLICATES
30 | # Loads in a metacore obj called metacore
31 | load(metacore_example("pilot_ADaM.rda"))
32 | check_inconsistent_labels(metacore)
33 |
34 | check_inconsistent_types(metacore)
35 |
36 | ## EXAMPLE WITHOUT DUPLICATES
37 | # Loads in a metacore obj called metacore
38 | load(metacore_example("pilot_SDTM.rda"))
39 | check_inconsistent_labels(metacore)
40 |
41 | check_inconsistent_formats(metacore)
42 |
43 | check_inconsistent_types(metacore)
44 | }
45 |
--------------------------------------------------------------------------------
/man/create_tbl.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/spec_builder.R
3 | \name{create_tbl}
4 | \alias{create_tbl}
5 | \title{Create table}
6 | \usage{
7 | create_tbl(doc, cols)
8 | }
9 | \arguments{
10 | \item{doc}{list of sheets from a excel doc}
11 |
12 | \item{cols}{vector of regex to get a datasets base on which columns it has.
13 | If the vector is named it will also rename the columns}
14 | }
15 | \value{
16 | dataset (or list of datasets if not specific enough)
17 | }
18 | \description{
19 | This function creates a table from excel sheets. This is mainly used
20 | internally for building spec readers, but is exported so others who need to
21 | build spec readers can use it.
22 | }
23 |
--------------------------------------------------------------------------------
/man/define_to_metacore.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/xml_builders.R
3 | \name{define_to_metacore}
4 | \alias{define_to_metacore}
5 | \title{Define XML to DataDef Object}
6 | \usage{
7 | define_to_metacore(path, quiet = FALSE)
8 | }
9 | \arguments{
10 | \item{path}{location of the define xml as a string}
11 |
12 | \item{quiet}{Option to quietly load in, this will suppress warnings, but not errors}
13 | }
14 | \value{
15 | DataDef Object
16 | }
17 | \description{
18 | Given a path, this function converts the define xml to a DataDef Object
19 | }
20 |
--------------------------------------------------------------------------------
/man/figures/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/.DS_Store
--------------------------------------------------------------------------------
/man/figures/labeled-code_list.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/labeled-code_list.png
--------------------------------------------------------------------------------
/man/figures/labeled-derivation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/labeled-derivation.png
--------------------------------------------------------------------------------
/man/figures/labeled-ds_spec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/labeled-ds_spec.png
--------------------------------------------------------------------------------
/man/figures/labeled-ds_vars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/labeled-ds_vars.png
--------------------------------------------------------------------------------
/man/figures/labeled-supp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/labeled-supp.png
--------------------------------------------------------------------------------
/man/figures/labeled-value_spec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/labeled-value_spec.png
--------------------------------------------------------------------------------
/man/figures/labeled-var_spec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/labeled-var_spec.png
--------------------------------------------------------------------------------
/man/figures/labeled_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/labeled_schema.png
--------------------------------------------------------------------------------
/man/figures/metacore.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/metacore.PNG
--------------------------------------------------------------------------------
/man/figures/schema-colors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/man/figures/schema-colors.png
--------------------------------------------------------------------------------
/man/get_control_term.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/metacore.R
3 | \name{get_control_term}
4 | \alias{get_control_term}
5 | \title{Get Control Term}
6 | \usage{
7 | get_control_term(metacode, variable, dataset = NULL)
8 | }
9 | \arguments{
10 | \item{metacode}{metacore object}
11 |
12 | \item{variable}{A variable name to get the controlled terms for. This can
13 | either be a string or just the name of the variable}
14 |
15 | \item{dataset}{A dataset name. This is not required if there is only one set
16 | of control terminology across all datasets}
17 | }
18 | \value{
19 | a vector for permitted values and a 2-column tibble for codelists
20 | }
21 | \description{
22 | Returns the control term (a vector for permitted values and a tibble for code
23 | lists) for a given variable. The dataset can be optionally specified if there
24 | is different control terminology for different datasets
25 | }
26 | \examples{
27 | \dontrun{
28 | meta_ex <- spec_to_metacore(metacore_example("p21_mock.xlsx"))
29 | get_control_term(meta_ex, QVAL, SUPPAE)
30 | get_control_term(meta_ex, "QVAL", "SUPPAE")
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/man/get_keys.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/metacore.R
3 | \name{get_keys}
4 | \alias{get_keys}
5 | \title{Get Dataset Keys}
6 | \usage{
7 | get_keys(metacode, dataset)
8 | }
9 | \arguments{
10 | \item{metacode}{metacore object}
11 |
12 | \item{dataset}{A dataset name}
13 | }
14 | \value{
15 | a 2-column tibble with dataset key variables and key sequence
16 | }
17 | \description{
18 | Returns the dataset keys for a given dataset
19 | }
20 | \examples{
21 | \dontrun{
22 | meta_ex <- spec_to_metacore(metacore_example("p21_mock.xlsx"))
23 | get_keys(meta_ex, "AE")
24 | get_keys(meta_ex, AE)
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/man/is_metacore.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/validators.R
3 | \name{is_metacore}
4 | \alias{is_metacore}
5 | \title{Is metacore object}
6 | \usage{
7 | is_metacore(x)
8 | }
9 | \arguments{
10 | \item{x}{object to check}
11 | }
12 | \value{
13 | \code{TRUE} if metacore, \code{FALSE} if not
14 | }
15 | \description{
16 | Is metacore object
17 | }
18 | \examples{
19 | # Loads in a metacore obj called metacore
20 | load(metacore_example("pilot_ADaM.rda"))
21 | is_metacore(metacore)
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/man/load_metacore.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/metacore.R
3 | \name{load_metacore}
4 | \alias{load_metacore}
5 | \title{load metacore object}
6 | \usage{
7 | load_metacore(path = NULL)
8 | }
9 | \arguments{
10 | \item{path}{location of the metacore object to load into memory}
11 | }
12 | \value{
13 | metacore object in memory
14 | }
15 | \description{
16 | load metacore object
17 | }
18 |
--------------------------------------------------------------------------------
/man/metacore.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/metacore.R
3 | \name{metacore}
4 | \alias{metacore}
5 | \title{R6 Class wrapper to create your own metacore object}
6 | \usage{
7 | metacore(
8 | ds_spec = tibble(dataset = character(), structure = character(), label = character()),
9 | ds_vars = tibble(dataset = character(), variable = character(), keep = logical(),
10 | key_seq = integer(), order = integer(), core = character(), supp_flag = logical()),
11 | var_spec = tibble(variable = character(), label = character(), length = integer(), type
12 | = character(), common = character(), format = character()),
13 | value_spec = tibble(dataset = character(), variable = character(), where = character(),
14 | type = character(), sig_dig = integer(), code_id = character(), origin = character(),
15 | derivation_id = integer()),
16 | derivations = tibble(derivation_id = integer(), derivation = character()),
17 | codelist = tibble(code_id = character(), name = character(), type = character(), codes
18 | = list()),
19 | supp = tibble(dataset = character(), variable = character(), idvar = character(), qeval
20 | = character())
21 | )
22 | }
23 | \arguments{
24 | \item{ds_spec}{contains each dataset in the study, with the labels for each}
25 |
26 | \item{ds_vars}{information on what variables are in each dataset + plus dataset specific variable information}
27 |
28 | \item{var_spec}{variable information that is shared across all datasets}
29 |
30 | \item{value_spec}{parameter specific information, as data is long the specs for wbc might be difference the hgb}
31 |
32 | \item{derivations}{contains derivation, it allows for different variables to have the same derivation}
33 |
34 | \item{codelist}{contains the code/decode information}
35 |
36 | \item{supp}{contains the idvar and qeval information for supplemental variables}
37 | }
38 | \description{
39 | R6 Class wrapper to create your own metacore object
40 | }
41 | \concept{Metacore}
42 |
--------------------------------------------------------------------------------
/man/metacore_example.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils.R
3 | \name{metacore_example}
4 | \alias{metacore_example}
5 | \title{Get path to metacore example}
6 | \usage{
7 | metacore_example(file = NULL)
8 | }
9 | \arguments{
10 | \item{file}{Name of file. If \code{NULL}, the example files will be listed.}
11 | }
12 | \description{
13 | metacore comes bundled with a number of sample files in its \code{inst/extdata}
14 | directory. This function make them easy to access. When testing or writing
15 | examples in other packages, it is best to use the 'pilot_ADaM.rda' example as
16 | it loads fastest.
17 | }
18 | \examples{
19 | metacore_example()
20 | metacore_example("mock_spec.xlsx")
21 | }
22 |
--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils-pipe.R
3 | \name{\%>\%}
4 | \alias{\%>\%}
5 | \title{Pipe operator}
6 | \usage{
7 | lhs \%>\% rhs
8 | }
9 | \description{
10 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
11 | }
12 | \keyword{internal}
13 |
--------------------------------------------------------------------------------
/man/read_all_sheets.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/spec_builder.R
3 | \name{read_all_sheets}
4 | \alias{read_all_sheets}
5 | \title{Read in all Sheets}
6 | \usage{
7 | read_all_sheets(path)
8 | }
9 | \arguments{
10 | \item{path}{string of the file path}
11 | }
12 | \value{
13 | a list of datasets
14 | }
15 | \description{
16 | Given a path to a file, this function reads in all sheets of an excel file
17 | }
18 |
--------------------------------------------------------------------------------
/man/save_metacore.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/metacore.R
3 | \name{save_metacore}
4 | \alias{save_metacore}
5 | \title{save metacore object}
6 | \usage{
7 | save_metacore(metacore_object, path = NULL)
8 | }
9 | \arguments{
10 | \item{metacore_object}{the metacore object in memory to save to disc}
11 |
12 | \item{path}{file path and file name to save metacore object}
13 | }
14 | \value{
15 | an .rda file
16 | }
17 | \description{
18 | save metacore object
19 | }
20 |
--------------------------------------------------------------------------------
/man/select_dataset.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/metacore.R
3 | \name{select_dataset}
4 | \alias{select_dataset}
5 | \title{Select metacore object to single dataset}
6 | \usage{
7 | select_dataset(.data, dataset, simplify = FALSE)
8 | }
9 | \arguments{
10 | \item{.data}{the metacore object of dataframes}
11 |
12 | \item{dataset}{the specific dataset to subset by}
13 |
14 | \item{simplify}{return a single dataframe}
15 | }
16 | \value{
17 | a filtered subset of the metacore object
18 | }
19 | \description{
20 | Select metacore object to single dataset
21 | }
22 |
--------------------------------------------------------------------------------
/man/spec_to_metacore.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/spec_builder.R
3 | \name{spec_to_metacore}
4 | \alias{spec_to_metacore}
5 | \title{Specification document to metacore object}
6 | \usage{
7 | spec_to_metacore(path, quiet = FALSE, where_sep_sheet = TRUE)
8 | }
9 | \arguments{
10 | \item{path}{string of file location}
11 |
12 | \item{quiet}{Option to quietly load in, this will suppress warnings, but not
13 | errors}
14 |
15 | \item{where_sep_sheet}{Option to tell if the where is in a separate sheet,
16 | like in older p21 specs or in a single sheet like newer p21 specs}
17 | }
18 | \value{
19 | given a spec document it returns a metacore object
20 | }
21 | \description{
22 | This function takes the location of an excel specification document and reads
23 | it in as a meta core object. At the moment it only supports specification in
24 | the format of pinnacle 21 specifications. But, the section level spec builder can
25 | be used as building blocks for bespoke specification documents.
26 | }
27 |
--------------------------------------------------------------------------------
/man/spec_type.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/spec_builder.R
3 | \name{spec_type}
4 | \alias{spec_type}
5 | \title{Check the type of spec document}
6 | \usage{
7 | spec_type(path)
8 | }
9 | \arguments{
10 | \item{path}{file location as a string}
11 | }
12 | \value{
13 | returns string indicating the type of spec document
14 | }
15 | \description{
16 | Check the type of spec document
17 | }
18 |
--------------------------------------------------------------------------------
/man/spec_type_to_codelist.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/spec_builder.R
3 | \name{spec_type_to_codelist}
4 | \alias{spec_type_to_codelist}
5 | \title{Spec to codelist}
6 | \usage{
7 | spec_type_to_codelist(
8 | doc,
9 | codelist_cols = c(code_id = "ID", name = "[N|n]ame", code = "^[C|c]ode|^[T|t]erm",
10 | decode = "[D|d]ecode"),
11 | permitted_val_cols = NULL,
12 | dict_cols = c(code_id = "ID", name = "[N|n]ame", dictionary = "[D|d]ictionary", version
13 | = "[V|v]ersion"),
14 | sheets = NULL,
15 | simplify = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{doc}{Named list of datasets @seealso \code{\link[=read_all_sheets]{read_all_sheets()}} for exact
20 | format}
21 |
22 | \item{codelist_cols}{Named vector of column names that make up the codelist.
23 | The column names can be regular expressions for more flexibility. But, the
24 | names must follow the given pattern}
25 |
26 | \item{permitted_val_cols}{Named vector of column names that make up the
27 | permitted value The column names can be regular expressions for more
28 | flexibility. This is optional, can be left as null if there isn't a
29 | permitted value sheet}
30 |
31 | \item{dict_cols}{Named vector of column names that make up the dictionary
32 | value The column names can be regular expressions for more flexibility.
33 | This is optional, can be left as null if there isn't a permitted value
34 | sheet}
35 |
36 | \item{sheets}{Optional, regular expressions of the sheets}
37 |
38 | \item{simplify}{Boolean value, if true will convert code/decode pairs that
39 | are all equal to a permitted value list. True by default}
40 | }
41 | \value{
42 | a dataset formatted for the metacore object
43 | }
44 | \description{
45 | Creates the value_spec from a list of datasets (optionally filtered by the
46 | sheet input). The named vector \verb{*_cols} is used to determine which is the
47 | correct sheet and renames the columns.
48 | }
49 | \seealso{
50 | Other spec builders:
51 | \code{\link{spec_type_to_derivations}()},
52 | \code{\link{spec_type_to_ds_spec}()},
53 | \code{\link{spec_type_to_ds_vars}()},
54 | \code{\link{spec_type_to_value_spec}()},
55 | \code{\link{spec_type_to_var_spec}()}
56 | }
57 | \concept{spec builders}
58 |
--------------------------------------------------------------------------------
/man/spec_type_to_derivations.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/spec_builder.R
3 | \name{spec_type_to_derivations}
4 | \alias{spec_type_to_derivations}
5 | \title{Spec to derivation}
6 | \usage{
7 | spec_type_to_derivations(
8 | doc,
9 | cols = c(derivation_id = "ID", derivation = "[D|d]efinition|[D|d]escription"),
10 | sheet = "Method|Derivations?",
11 | var_cols = c(dataset = "[D|d]ataset|[D|d]omain", variable = "[N|n]ame|[V|v]ariables?",
12 | origin = "[O|o]rigin", predecessor = "[P|p]redecessor", comment = "[C|c]omment")
13 | )
14 | }
15 | \arguments{
16 | \item{doc}{Named list of datasets @seealso \code{\link[=read_all_sheets]{read_all_sheets()}} for exact
17 | format}
18 |
19 | \item{cols}{Named vector of column names. The column names can be regular
20 | expressions for more flexibility. But, the names must follow the given
21 | pattern}
22 |
23 | \item{sheet}{Regular expression for the sheet name}
24 |
25 | \item{var_cols}{Named vector of the name(s) of the origin, predecessor and
26 | comment columns. These do not have to be on the specified sheet.}
27 | }
28 | \value{
29 | a dataset formatted for the metacore object
30 | }
31 | \description{
32 | Creates the derivation table from a list of datasets (optionally filtered by
33 | the sheet input). The named vector \code{cols} is used to determine which is the
34 | correct sheet and renames the columns. The derivation will be used for
35 | "derived" origins, the comments for "assigned" origins, and predecessor for
36 | "predecessor" origins.
37 | }
38 | \seealso{
39 | Other spec builders:
40 | \code{\link{spec_type_to_codelist}()},
41 | \code{\link{spec_type_to_ds_spec}()},
42 | \code{\link{spec_type_to_ds_vars}()},
43 | \code{\link{spec_type_to_value_spec}()},
44 | \code{\link{spec_type_to_var_spec}()}
45 | }
46 | \concept{spec builders}
47 |
--------------------------------------------------------------------------------
/man/spec_type_to_ds_spec.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/spec_builder.R
3 | \name{spec_type_to_ds_spec}
4 | \alias{spec_type_to_ds_spec}
5 | \title{Spec to ds_spec}
6 | \usage{
7 | spec_type_to_ds_spec(
8 | doc,
9 | cols = c(dataset = "[N|n]ame|[D|d]ataset|[D|d]omain", structure = "[S|s]tructure",
10 | label = "[L|l]abel|[D|d]escription"),
11 | sheet = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{doc}{Named list of datasets @seealso \code{\link[=read_all_sheets]{read_all_sheets()}} for exact
16 | format}
17 |
18 | \item{cols}{Named vector of column names. The column names can be regular
19 | expressions for more flexibility. But, the names must follow the given pattern}
20 |
21 | \item{sheet}{Regular expression for the sheet name}
22 | }
23 | \value{
24 | a dataset formatted for the metacore object
25 | }
26 | \description{
27 | Creates the ds_spec from a list of datasets (optionally filtered by the sheet
28 | input). The named vector \code{cols} is used to determine which is the correct
29 | sheet and renames the columns
30 | }
31 | \seealso{
32 | Other spec builders:
33 | \code{\link{spec_type_to_codelist}()},
34 | \code{\link{spec_type_to_derivations}()},
35 | \code{\link{spec_type_to_ds_vars}()},
36 | \code{\link{spec_type_to_value_spec}()},
37 | \code{\link{spec_type_to_var_spec}()}
38 | }
39 | \concept{spec builders}
40 |
--------------------------------------------------------------------------------
/man/spec_type_to_ds_vars.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/spec_builder.R
3 | \name{spec_type_to_ds_vars}
4 | \alias{spec_type_to_ds_vars}
5 | \title{Spec to ds_vars}
6 | \usage{
7 | spec_type_to_ds_vars(
8 | doc,
9 | cols = c(dataset = "[D|d]ataset|[D|d]omain", variable =
10 | "[V|v]ariable [[N|n]ame]?|[V|v]ariables?", order =
11 | "[V|v]ariable [O|o]rder|[O|o]rder", keep = "[K|k]eep|[M|m]andatory"),
12 | key_seq_sep_sheet = TRUE,
13 | key_seq_cols = c(dataset = "Dataset", key_seq = "Key Variables"),
14 | sheet = "[V|v]ar|Datasets"
15 | )
16 | }
17 | \arguments{
18 | \item{doc}{Named list of datasets @seealso \code{\link[=read_all_sheets]{read_all_sheets()}} for exact
19 | format}
20 |
21 | \item{cols}{Named vector of column names. The column names can be regular
22 | expressions for more flexibility. But, the names must follow the given
23 | pattern}
24 |
25 | \item{key_seq_sep_sheet}{A boolean to indicate if the key sequence is on a
26 | separate sheet. If set to false add the key_seq column name to the \code{cols}
27 | vector.}
28 |
29 | \item{key_seq_cols}{names vector to get the key_sequence for each dataset}
30 |
31 | \item{sheet}{Regular expression for the sheet names}
32 | }
33 | \value{
34 | a dataset formatted for the metacore object
35 | }
36 | \description{
37 | Creates the ds_vars from a list of datasets (optionally filtered by the sheet
38 | input). The named vector \code{cols} is used to determine which is the correct
39 | sheet and renames the columns
40 | }
41 | \seealso{
42 | Other spec builders:
43 | \code{\link{spec_type_to_codelist}()},
44 | \code{\link{spec_type_to_derivations}()},
45 | \code{\link{spec_type_to_ds_spec}()},
46 | \code{\link{spec_type_to_value_spec}()},
47 | \code{\link{spec_type_to_var_spec}()}
48 | }
49 | \concept{spec builders}
50 |
--------------------------------------------------------------------------------
/man/spec_type_to_value_spec.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/spec_builder.R
3 | \name{spec_type_to_value_spec}
4 | \alias{spec_type_to_value_spec}
5 | \title{Spec to value_spec}
6 | \usage{
7 | spec_type_to_value_spec(
8 | doc,
9 | cols = c(dataset = "[D|d]ataset|[D|d]omain", variable = "[N|n]ame|[V|v]ariables?",
10 | origin = "[O|o]rigin", type = "[T|t]ype", code_id = "[C|c]odelist|Controlled Term",
11 | sig_dig = "[S|s]ignificant", where = "[W|w]here", derivation_id = "[M|m]ethod",
12 | predecessor = "[P|p]redecessor"),
13 | sheet = NULL,
14 | where_sep_sheet = TRUE,
15 | where_cols = c(id = "ID", where = c("Variable", "Comparator", "Value")),
16 | var_sheet = "[V|v]ar"
17 | )
18 | }
19 | \arguments{
20 | \item{doc}{Named list of datasets @seealso \code{\link[=read_all_sheets]{read_all_sheets()}} for exact
21 | format}
22 |
23 | \item{cols}{Named vector of column names. The column names can be regular
24 | expressions for more flexibility. But, the names must follow the given
25 | pattern}
26 |
27 | \item{sheet}{Regular expression for the sheet name}
28 |
29 | \item{where_sep_sheet}{Boolean value to control if the where information in a
30 | separate dataset. If the where information is on a separate sheet, set to
31 | true and provide the column information with the \code{where_cols} inputs.}
32 |
33 | \item{where_cols}{Named list with an id and where field. All columns in the
34 | where field will be collapsed together}
35 |
36 | \item{var_sheet}{Name of sheet with the Variable information on it. Metacore
37 | expects each variable will have a row in the value_spec. Because many
38 | specification only have information in the value tab this is added. If the
39 | information already exists in the value tab of your specification set to
40 | NULL}
41 | }
42 | \value{
43 | a dataset formatted for the metacore object
44 | }
45 | \description{
46 | Creates the value_spec from a list of datasets (optionally filtered by the
47 | sheet input). The named vector \code{cols} is used to determine which is the
48 | correct sheet and renames the columns
49 | }
50 | \seealso{
51 | Other spec builders:
52 | \code{\link{spec_type_to_codelist}()},
53 | \code{\link{spec_type_to_derivations}()},
54 | \code{\link{spec_type_to_ds_spec}()},
55 | \code{\link{spec_type_to_ds_vars}()},
56 | \code{\link{spec_type_to_var_spec}()}
57 | }
58 | \concept{spec builders}
59 |
--------------------------------------------------------------------------------
/man/spec_type_to_var_spec.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/spec_builder.R
3 | \name{spec_type_to_var_spec}
4 | \alias{spec_type_to_var_spec}
5 | \title{Spec to var_spec}
6 | \usage{
7 | spec_type_to_var_spec(
8 | doc,
9 | cols = c(variable = "[N|n]ame|[V|v]ariables?", length = "[L|l]ength", label =
10 | "[L|l]abel", type = "[T|t]ype", dataset = "[D|d]ataset|[D|d]omain", format =
11 | "[F|f]ormat"),
12 | sheet = "[V|v]ar"
13 | )
14 | }
15 | \arguments{
16 | \item{doc}{Named list of datasets @seealso \code{\link[=read_all_sheets]{read_all_sheets()}} for exact
17 | format}
18 |
19 | \item{cols}{Named vector of column names. The column names can be regular
20 | expressions for more flexibility. But, the names must follow the given pattern}
21 |
22 | \item{sheet}{Regular expression for the sheet name}
23 | }
24 | \value{
25 | a dataset formatted for the metacore object
26 | }
27 | \description{
28 | Creates the var_spec from a list of datasets (optionally filtered by the sheet
29 | input). The named vector \code{cols} is used to determine which is the correct
30 | sheet and renames the columns. (Note: the keep column will be converted logical)
31 | }
32 | \seealso{
33 | Other spec builders:
34 | \code{\link{spec_type_to_codelist}()},
35 | \code{\link{spec_type_to_derivations}()},
36 | \code{\link{spec_type_to_ds_spec}()},
37 | \code{\link{spec_type_to_ds_vars}()},
38 | \code{\link{spec_type_to_value_spec}()}
39 | }
40 | \concept{spec builders}
41 |
--------------------------------------------------------------------------------
/man/xml_to_codelist.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/xml_builders.R
3 | \name{xml_to_codelist}
4 | \alias{xml_to_codelist}
5 | \title{XML to code list}
6 | \usage{
7 | xml_to_codelist(doc)
8 | }
9 | \arguments{
10 | \item{doc}{xml document}
11 | }
12 | \value{
13 | a tibble containing the code list and permitted value information
14 | }
15 | \description{
16 | Reads in a define xml and creates a code_list table. The code_list table is a
17 | nested tibble where each row is a code list or permitted value list. The code
18 | column contains a vector of a tibble depending on if it is a permitted values
19 | or code list
20 | }
21 | \seealso{
22 | Other xml builder:
23 | \code{\link{xml_to_derivations}()},
24 | \code{\link{xml_to_ds_spec}()},
25 | \code{\link{xml_to_ds_vars}()},
26 | \code{\link{xml_to_value_spec}()},
27 | \code{\link{xml_to_var_spec}()}
28 | }
29 | \concept{xml builder}
30 |
--------------------------------------------------------------------------------
/man/xml_to_derivations.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/xml_builders.R
3 | \name{xml_to_derivations}
4 | \alias{xml_to_derivations}
5 | \title{XML to derivation table}
6 | \usage{
7 | xml_to_derivations(doc)
8 | }
9 | \arguments{
10 | \item{doc}{xml document}
11 | }
12 | \value{
13 | dataframe with derivation id's and derivations
14 | }
15 | \description{
16 | This reads in a xml document and gets all the derivations/comments. These can
17 | be cross referenced to variables using the derivation_id's
18 | }
19 | \seealso{
20 | Other xml builder:
21 | \code{\link{xml_to_codelist}()},
22 | \code{\link{xml_to_ds_spec}()},
23 | \code{\link{xml_to_ds_vars}()},
24 | \code{\link{xml_to_value_spec}()},
25 | \code{\link{xml_to_var_spec}()}
26 | }
27 | \concept{xml builder}
28 |
--------------------------------------------------------------------------------
/man/xml_to_ds_spec.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/xml_builders.R
3 | \name{xml_to_ds_spec}
4 | \alias{xml_to_ds_spec}
5 | \title{XML to Data Set Spec}
6 | \usage{
7 | xml_to_ds_spec(doc)
8 | }
9 | \arguments{
10 | \item{doc}{xml document}
11 | }
12 | \value{
13 | data frame with the data set specifications
14 | }
15 | \description{
16 | Creates a dataset specification, which has the domain name and label for each dataset
17 | }
18 | \seealso{
19 | Other xml builder:
20 | \code{\link{xml_to_codelist}()},
21 | \code{\link{xml_to_derivations}()},
22 | \code{\link{xml_to_ds_vars}()},
23 | \code{\link{xml_to_value_spec}()},
24 | \code{\link{xml_to_var_spec}()}
25 | }
26 | \concept{xml builder}
27 |
--------------------------------------------------------------------------------
/man/xml_to_ds_vars.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/xml_builders.R
3 | \name{xml_to_ds_vars}
4 | \alias{xml_to_ds_vars}
5 | \title{XML to Data Set Var table}
6 | \usage{
7 | xml_to_ds_vars(doc)
8 | }
9 | \arguments{
10 | \item{doc}{xml document}
11 | }
12 | \value{
13 | data frame with the dataset and variables
14 | }
15 | \description{
16 | Creates the ds_vars table, which acts as a key between the datasets and the var spec
17 | }
18 | \seealso{
19 | Other xml builder:
20 | \code{\link{xml_to_codelist}()},
21 | \code{\link{xml_to_derivations}()},
22 | \code{\link{xml_to_ds_spec}()},
23 | \code{\link{xml_to_value_spec}()},
24 | \code{\link{xml_to_var_spec}()}
25 | }
26 | \concept{xml builder}
27 |
--------------------------------------------------------------------------------
/man/xml_to_value_spec.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/xml_builders.R
3 | \name{xml_to_value_spec}
4 | \alias{xml_to_value_spec}
5 | \title{XML to value spec}
6 | \usage{
7 | xml_to_value_spec(doc)
8 | }
9 | \arguments{
10 | \item{doc}{xml document}
11 | }
12 | \value{
13 | tibble with the value level information
14 | }
15 | \description{
16 | Takes a define xml and pulls out the value level metadata including codelist_id's,
17 | defines_id's, and where clause. There is one row per variable expect when there
18 | is a where clause, at which point there is one row per value.
19 | }
20 | \seealso{
21 | Other xml builder:
22 | \code{\link{xml_to_codelist}()},
23 | \code{\link{xml_to_derivations}()},
24 | \code{\link{xml_to_ds_spec}()},
25 | \code{\link{xml_to_ds_vars}()},
26 | \code{\link{xml_to_var_spec}()}
27 | }
28 | \concept{xml builder}
29 |
--------------------------------------------------------------------------------
/man/xml_to_var_spec.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/xml_builders.R
3 | \name{xml_to_var_spec}
4 | \alias{xml_to_var_spec}
5 | \title{XML to variable spec}
6 | \usage{
7 | xml_to_var_spec(doc)
8 | }
9 | \arguments{
10 | \item{doc}{define xml document}
11 | }
12 | \value{
13 | data frame with variable, length, label columns
14 | }
15 | \description{
16 | Takes a define xml and returns a dataset with specifications for each
17 | variable. The variable will just be the variable, unless the specification
18 | for that variable differ between datasets
19 | }
20 | \seealso{
21 | Other xml builder:
22 | \code{\link{xml_to_codelist}()},
23 | \code{\link{xml_to_derivations}()},
24 | \code{\link{xml_to_ds_spec}()},
25 | \code{\link{xml_to_ds_vars}()},
26 | \code{\link{xml_to_value_spec}()}
27 | }
28 | \concept{xml builder}
29 |
--------------------------------------------------------------------------------
/metacore.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 3
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 |
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(metacore)
3 |
4 | test_check("metacore")
5 |
--------------------------------------------------------------------------------
/tests/testthat/example_spec.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/tests/testthat/example_spec.xlsx
--------------------------------------------------------------------------------
/tests/testthat/metacore-figure/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/tests/testthat/metacore-figure/unnamed-chunk-1-1.png
--------------------------------------------------------------------------------
/tests/testthat/metacore-figure/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/tests/testthat/metacore-figure/unnamed-chunk-2-1.png
--------------------------------------------------------------------------------
/tests/testthat/spec_no_val.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atorus-research/metacore/4b251e077374152bfa7032cbcbb50cd2912aec1c/tests/testthat/spec_no_val.xlsx
--------------------------------------------------------------------------------
/tests/testthat/test-checks.R:
--------------------------------------------------------------------------------
1 | test_that("Test label Checks", {
2 | load(metacore_example("pilot_ADaM.rda"))
3 | man_label <- tibble::tribble(
4 | ~variable, ~label, ~n_vars, ~ls_of_vars,
5 | "ABLFL" , "ABLFL" , 1L, "ADADAS.ABLFL",
6 | "ABLFL" , "Analysis Baseline Flag" , 1L, "ADNPIX.ABLFL" ,
7 | "ABLFL" , "Baseline Record Flag" , 6L, c("ADLBC.ABLFL", "ADLBCPV.ABLFL", "ADLBH.ABLFL", "ADLBHPV.ABLFL", "ADLBHY.ABLFL", "ADVS.ABLFL"),
8 | "ANL01FL", "Analysis Flag 01" , 5L, c("ADLBC.ANL01FL", "ADLBCPV.ANL01FL", "ADLBH.ANL01FL", "ADLBHPV.ANL01FL", "ADVS.ANL01FL"),
9 | "ANL01FL", "Analysis Record Flag 01" , 3L, c("ADADAS.ANL01FL", "ADCIBC.ANL01FL", "ADNPIX.ANL01FL"),
10 | "CHG" , "Baseline Value" , 1L, "ADADAS.CHG",
11 | "CHG" , "Change from Baseline" , 6L, c("ADLBC.CHG", "ADLBCPV.CHG", "ADLBH.CHG", "ADLBHPV.CHG", "ADNPIX.CHG", "ADVS.CHG"),
12 | "ITTFL" , "Intent-to-Treat Population Flag" , 3L, c("ADADAS.ITTFL", "ADCIBC.ITTFL", "ADNPIX.ITTFL"),
13 | "ITTFL" , "Intent-To-Treat Population Flag" , 1L, "ADSL.ITTFL"
14 | ) %>%
15 | dplyr::arrange(variable, label)
16 | label_df <- check_inconsistent_labels(metacore) %>%
17 | dplyr::arrange(variable, label)
18 | expect_equal(label_df, man_label)
19 |
20 | load(metacore_example("pilot_SDTM.rda"))
21 | expect_message(check_inconsistent_labels(metacore),
22 | "No mismatch labels detected")
23 |
24 | expect_error(check_inconsistent_labels("metacore"),
25 | "Expects a metacore object")
26 | })
27 | test_that("Check formats and types", {
28 | load(metacore_example("pilot_ADaM.rda"))
29 | expect_message(check_inconsistent_formats(metacore),
30 | "No mismatch formats detected"
31 | )
32 | man_types <- tibble::tribble(
33 | ~variable, ~type, ~n_vars, ~ls_of_vars,
34 | "AVAL" , "float" , 6L, c("ADLBC.AVAL", "ADLBCPV.AVAL", "ADLBH.AVAL", "ADLBHPV.AVAL", "ADLBHY.AVAL", "ADVS.AVAL"),
35 | "AVAL", "integer", 4L, c("ADADAS.AVAL", "ADCIBC.AVAL", "ADNPIX.AVAL", "ADTTE.AVAL"),
36 | "BASE", "float" , 6L, c("ADLBC.BASE", "ADLBCPV.BASE", "ADLBH.BASE","ADLBHPV.BASE", "ADLBHY.BASE","ADVS.BASE"),
37 | "BASE" , "integer", 2L, c("ADADAS.BASE", "ADNPIX.BASE"),
38 | "CHG", "float" , 5L, c("ADLBC.CHG", "ADLBCPV.CHG", "ADLBH.CHG", "ADLBHPV.CHG", "ADVS.CHG"),
39 | "CHG", "integer", 2L, c("ADADAS.CHG", "ADNPIX.CHG"),
40 | "PCHG", "float" , 1L, "ADVS.PCHG",
41 | "PCHG", "integer", 2L, c("ADADAS.PCHG", "ADNPIX.PCHG"),
42 | )
43 |
44 | type_df <- check_inconsistent_types(metacore)
45 |
46 | expect_equal(type_df, man_types)
47 | })
48 |
49 |
--------------------------------------------------------------------------------
/tests/testthat/test-metacore.R:
--------------------------------------------------------------------------------
1 | # where should this function go
2 | empty_df <- function(nms, fill) {
3 | df <- as.data.frame(matrix(fill,1,length(nms)))
4 | names(df) <- nms
5 | return(df)
6 | }
7 |
8 | dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>%
9 | setNames(c("ds_spec",
10 | "ds_vars",
11 | "var_spec",
12 | "value_spec",
13 | "derivations",
14 | "codelist",
15 | "supp"))
16 | dfs$ds_vars <- dfs$ds_vars %>%
17 | mutate(supp_flag = FALSE)
18 |
19 | # function from the withr package
20 | with_dir <- function (new, code) {
21 | old <- setwd(dir = new)
22 | on.exit(setwd(old))
23 | force(code)
24 | }
25 |
26 |
27 | test_that("readonly function factory", {
28 | a <- readonly("a")
29 | expect_equal(class(a), "function")
30 | expect_equal(attr(a, "name"), "a")
31 | })
32 |
33 | test_that("metacore wrapper function works", {
34 | wrapper <- suppressWarnings(do.call(metacore, dfs[1:7]))
35 |
36 | r6 <- suppressWarnings(
37 | MetaCore$new(dfs$ds_spec,
38 | dfs$ds_vars,
39 | dfs$var_spec,
40 | dfs$value_spec,
41 | dfs$derivations,
42 | dfs$codelist,
43 | dfs$supp)
44 | )
45 |
46 | expect_equal(wrapper, r6)
47 |
48 | expect_warning(define_to_metacore(metacore_example("ADaM_define.xml")))
49 | expect_warning(spec_to_metacore(metacore_example("p21_mock.xlsx")))
50 | })
51 |
52 |
53 | test_that("Can pass metacore NULL df's", {
54 | wrapper <- suppressWarnings(metacore(dfs$ds_spec, NULL, dfs$var_spec,
55 | dfs$value_spec, dfs$derivations, dfs$codelist, dfs$supp))
56 | dummy <- list(character(), character(), numeric(), numeric(),
57 | logical(), character(), logical())
58 | names(dummy) <- c("dataset", "variable", "key_seq", "order",
59 | "keep", "core", "supp_flag")
60 | dummy <- as_tibble(dummy)
61 | #Because of the labels the dfs are slightly different so checking
62 | # the insides match
63 | expect_equal(names(wrapper$ds_vars), names(dummy))
64 | expect_equal(map_chr(wrapper$ds_vars, mode),
65 | map_chr(dummy, mode))
66 | })
67 |
68 | test_that("subsetting works", {
69 | test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE)
70 | subset <- test %>% select_dataset("DM")
71 | expect_equal(unique(subset$ds_spec$dataset), "DM")
72 | })
73 |
74 | test_that("save_metacore creates .rds with no file path", {
75 | wrapper <- suppressWarnings(do.call(metacore, dfs[1:7]))
76 | my_temp_dir <- tempdir()
77 | with_dir(my_temp_dir, save_metacore(wrapper))
78 | expect_true("wrapper.rds" %in% list.files(my_temp_dir))
79 | unlink(my_temp_dir)
80 | })
81 |
82 | test_that("save_metacore replaces file path", {
83 | wrapper <- suppressWarnings(do.call(metacore, dfs[1:7]))
84 | my_temp_dir <- tempdir()
85 | save_metacore(wrapper, file.path(my_temp_dir, "wrapper.csv"))
86 | expect_true("wrapper.rds" %in% list.files(my_temp_dir))
87 | unlink(my_temp_dir)
88 | })
89 |
90 | test_that("save_metacore uses file path", {
91 | wrapper <- suppressWarnings(do.call(metacore, dfs[1:7]))
92 | my_temp_dir <- tempdir()
93 | save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds"))
94 | expect_true("wrapper.rds" %in% list.files(my_temp_dir))
95 | unlink(my_temp_dir)
96 | })
97 |
98 | test_that("load_metacore loads .rds", {
99 | wrapper <- suppressWarnings(do.call(metacore, dfs[1:7]))
100 | my_temp_dir <- tempdir()
101 | save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds"))
102 | wrapper <- load_metacore(file.path(my_temp_dir, "wrapper.rds"))
103 | expect_equal(class(wrapper), c("Metacore", "R6"))
104 | unlink(my_temp_dir)
105 | })
106 |
107 | test_that("load metacore fails with no path", {
108 | expect_error(load_metacore())
109 | })
110 |
111 | test_that("load metacore fails with no path and rdss in wd", {
112 | wrapper <- suppressWarnings(do.call(metacore, dfs[1:7]))
113 | my_temp_dir <- tempdir()
114 | save_metacore(wrapper, file.path(my_temp_dir, "wrapper.rds"))
115 | expect_error(
116 | with_dir(my_temp_dir, load_metacore())
117 | )
118 | unlink(my_temp_dir)
119 | })
120 |
121 | test_that("pulling out control terminology works", {
122 | test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE)
123 | #Testing Errors
124 | ## Not specific enough
125 | expect_error(get_control_term(test, QVAL))
126 | ## Wrong Dataset name
127 | expect_error(get_control_term(test, QVAL, LB))
128 | ## Wrong variable name
129 | expect_error(get_control_term(test, QVA))
130 | expect_equal(
131 | get_control_term(test, QVAL, SUPPAE),
132 | tibble(code = c("N", "Y"), decode = c("No", "Yes"))
133 | )
134 | expect_equal(
135 | get_control_term(test, "QVAL", "SUPPAE"),
136 | tibble(code = c("N", "Y"), decode = c("No", "Yes"))
137 | )
138 | })
139 |
140 | test_that("get_keys works", {
141 | test <- spec_to_metacore(metacore_example("p21_mock.xlsx"), quiet = TRUE)
142 | #Testing Errors
143 | ## Domain not in ds_vars table
144 | expect_error(get_keys(test, DS))
145 | ## Missing dataset name
146 | expect_error(get_keys(test))
147 | #Testing Correct Output
148 | expect_equal(
149 | get_keys(test, DM),
150 | tibble(variable = c("STUDYID", "USUBJID"), key_seq = c(1L, 2L)) %>%
151 | add_labs(variable = "Variable Name",
152 | key_seq = "Sequence Key")
153 | )
154 | })
155 |
--------------------------------------------------------------------------------
/tests/testthat/test-utils.R:
--------------------------------------------------------------------------------
1 | test_df <- data.frame(
2 | a = c("blah", NA),
3 | b = c("SpecificWord", "Incorrect")
4 | )
5 |
6 | test_that("check_structure function works", {
7 | # error when NA not permissible
8 |
9 | expect_equal(check_structure(test_df, "a", is.character, FALSE, "test_df")$error,
10 | "a from the test_df table contains missing values. Actual values are needed.")
11 |
12 | expect_equal(check_structure(test_df, "a", is.logical, TRUE, "test_df")$warning,
13 | "test_df$a fails is.logical check \n"
14 | )
15 |
16 | expect_null(check_structure(test_df, "b", check_words("SpecificWord", "Incorrect"), TRUE, "test_df")$warning)
17 | expect_equal(check_structure(test_df, "b", check_words("SpecificWord"), TRUE, "test_df")$warning,
18 | "The following words in test_df$b are not allowed: \n Incorrect\n")
19 |
20 | })
21 |
22 | test_that("check_words creates function", {
23 | expect_true(is.function(check_words("word")))
24 | })
25 |
26 |
27 | test_that("add_lab function adds label", {
28 | a <- add_lab("a", "mylabel")
29 | expect_equal(attr(a, "label"), "mylabel")
30 | })
31 |
32 | test_that("label set to null if not supplied", {
33 | a <- add_lab("a", character(0))
34 | expect_null(attr(a, "label"))
35 | })
36 |
37 | test_that("add labels adds multiple labels", {
38 | a <- data.frame(a = "a") %>%
39 | add_labs("a" = "label_a")
40 | expect_equal(attr(a$a, "label"), "label_a")
41 | })
42 |
43 | test_that("add labels adds NULL to missing labels", {
44 | a <- data.frame(a = "a", b = "b") %>%
45 | add_labs("a" = "label_a", "b" = character(0))
46 | expect_null(attr(a$b, "label"))
47 | })
48 |
49 | test_that("metacore example returns file options", {
50 | expect_equal(sort(metacore_example()),
51 | sort(c("ADaM_define.xml", "mock_spec.xlsx", "p21_mock.xlsx", "pilot_ADaM.rda",
52 | "pilot_SDTM.rda",
53 | "SDTM_define.xml", "SDTM_spec_CDISC_pilot.xlsx")))
54 | })
55 |
--------------------------------------------------------------------------------
/tests/testthat/test-validators.R:
--------------------------------------------------------------------------------
1 | # where should this function go
2 | empty_df <- function(nms, fill) {
3 | df <- as.data.frame(matrix(fill,1,length(nms)))
4 | names(df) <- nms
5 | return(df)
6 | }
7 |
8 | # both of these functions only work
9 | # when the data def object is loaded
10 | # and i think its checking the wrong thing
11 |
12 | test_that("specific words and primitive columns fail when character", {
13 |
14 | dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>%
15 | setNames(c("ds_spec",
16 | "ds_vars",
17 | "var_spec",
18 | "value_spec",
19 | "derivations",
20 | "codelist",
21 | "supp"))
22 |
23 | expect_warning(do.call(check_columns, dfs))
24 | })
25 |
26 |
27 | test_that("NA columns fail", {
28 |
29 | dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = NA)) %>%
30 | setNames(c("ds_spec",
31 | "ds_vars",
32 | "var_spec",
33 | "value_spec",
34 | "derivations",
35 | "codelist",
36 | "supp"))
37 |
38 | expect_error(do.call(check_columns, dfs))
39 | })
40 |
41 |
42 | test_that("NA columns fail", {
43 |
44 | dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>%
45 | setNames(c("ds_spec",
46 | "ds_vars",
47 | "var_spec",
48 | "value_spec",
49 | "derivations",
50 | "codelist",
51 | "supp"))
52 |
53 | dfs$ds_spec$label <- NA
54 |
55 | expect_warning(do.call(check_columns, dfs))
56 | })
57 |
58 | test_that("all_message dataframe contains 6 datasets", {
59 | expect_equal(all_message() %>%
60 | distinct(dataset) %>%
61 | nrow(), 7)
62 | })
63 |
64 | test_that("check cross-reference tests", {
65 | dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = "A")) %>%
66 | setNames(c("ds_spec",
67 | "ds_vars",
68 | "var_spec",
69 | "value_spec",
70 | "derivations",
71 | "codelist",
72 | "supp"))
73 |
74 | dfs$var_spec <- dfs$var_spec %>%
75 | mutate(variable = "B")
76 | dfs$derivations <- dfs$derivations %>%
77 | mutate(derivation_id = "C")
78 | dfs$codelist <- dfs$codelist %>%
79 | mutate(code_id = "D")
80 | expect_warning(do.call(metacore, dfs[1:7]))
81 | })
82 |
83 | test_that("test for incorrect column names", {
84 | dfs <- purrr::map(col_vars(), ~ empty_df(.x, fill = NA)) %>%
85 | setNames(c("ds_spec",
86 | "ds_vars",
87 | "var_spec",
88 | "value_spec",
89 | "derivations",
90 | "codelist",
91 | "supp"))
92 |
93 | dfs$codelist <- dfs$codelist %>%
94 | mutate(codelist2 = "A")
95 | expect_warning(do.call(metacore, dfs[1:7]))
96 | })
97 |
98 | test_that("check object works", {
99 | load(metacore_example("pilot_ADaM.rda"))
100 | metacore %>%
101 | is_metacore() %>%
102 | expect_equal(TRUE)
103 |
104 | is_metacore("THIS IS NOT A THING") %>%
105 | expect_equal(FALSE)
106 | })
107 |
--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 |
--------------------------------------------------------------------------------
/vignettes/Building_Specification_Readers.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Building Specification Readers"
3 | output: rmarkdown::html_vignette
4 | vignette: >
5 | %\VignetteIndexEntry{Building Specification Readers}
6 | %\VignetteEncoding{UTF-8}
7 | %\VignetteEngine{knitr::rmarkdown}
8 | editor_options:
9 | chunk_output_type: console
10 | ---
11 |
12 | ```{r, include = FALSE}
13 | knitr::opts_chunk$set(
14 | collapse = TRUE,
15 | comment = "#>"
16 | )
17 | ```
18 |
19 | ```{r setup}
20 | library(metacore)
21 | library(dplyr)
22 | library(purrr)
23 | library(stringr)
24 | ```
25 |
26 | The first thing to do when trying to build a specification reader is to try the default. By default metacore can read in specifications that are in the Pinnacle 21 specification format. If your document isn't in that format, it is still worth trying the default readers, as the error messages can be helpful.
27 |
28 | ```{r, error=TRUE}
29 | spec_to_metacore(metacore_example("mock_spec.xlsx"))
30 | ```
31 |
32 | As we can see, the mock spec we are using here doesn't match the format. Therefore we will have to build a bespoke reader. Before we start, it is important to understand the structure of the metacore object. Each object acts as its own database for all dataset related metadata. The object has 7 tables and their general purpose are as follows:
33 |
34 | - **ds_spec**: Contains dataset level information
35 |
36 | - **ds_vars**: Bridges the dataset and variable level information
37 |
38 | - **var_spec**: Contains variable level information
39 |
40 | - **value_spec**: Contains value level information
41 |
42 | - **derivations**: Contains all derivations
43 |
44 | - **codelist**: Contains information about code/decodes, permitted values and external libraries
45 |
46 | - **supp**: Contains information specific to supplemental variables
47 |
48 | Here is a schema of how all this fits together
49 |
50 | 
51 |
52 | ds_spec is connected to ds_vars by the 'dataset' variable and ds_vars is connected to var_spec by the 'variable' variable, etc. For more information on the make-up of metacore objects please see the README.
53 |
54 | Now that we understand what makes a metacore object, we can start to develop the reader.
55 |
56 | First, we need to identify what type of specification format you have. At the moment we support a specification where each tab contains information relevant to a different table, such as a domain tab, a variable tab, etc. To test this you can use the `spec_type` function.
57 |
58 | ```{r}
59 | metacore:::spec_type(metacore_example("mock_spec.xlsx"))
60 | ```
61 |
62 | Given we have the type style of specification, we can attempt to run with the lower level built-in specification to metacore build. There are 6 lower level specification builders to match each of the 6 datasets needed, spec_type_to\_\*. Even if these fail, the error messages should help identify the issues.
63 |
64 | But, before we start any of that, we need to read in our document using the `read_all_sheets` function. This function reads in a multisheet excel file into a named list, where the name of each dataset is the name of the tab. The lower level specification builders do assume the provided `doc` is a named list. This mock specification has 5 tabs, domain, variables, value level metadata, controlled terms, and computational method. So it looks like we might need to split the information in these tabs to get the 6 tables needed for the metacore object.
65 |
66 | ```{r}
67 | doc <- read_all_sheets(metacore_example("mock_spec.xlsx"))
68 | doc %>% map(head)
69 |
70 | ```
71 |
72 | Let's start with making the ds_spec (dataset specification) table using `spec_type_to_ds_spec`. The ds_spec table is made of 3 columns: the dataset name, the dataset structure, and the dataset label. If we look at our specification document, it looks like all this information is in the Domains tab. Now we know what we need, we can start building the table by trying the `spec_type_to_ds_spec` function.
73 |
74 | This function takes in our named list of datasets (doc), a named vector of columns (cols) and a sheet name (sheet). But, only doc is needed, the other inputs have defaults. So we can try with just the default and see what we get.
75 |
76 | ```{r, error=TRUE}
77 | spec_type_to_ds_spec(doc)
78 | ```
79 |
80 | The error tells us there is an issue with the label column in the Domains table. Meaning, we need to change the default vector for the cols input because the default regular expression isn't specific enough. First, let's check the column names in the Domain tab
81 |
82 | ```{r}
83 | doc$Domains %>% names()
84 | ```
85 |
86 | If we look at the default input for cols, `"label" = "[L|l]abel|[D|d]escription"`, we can see the label is matching to the `Label` and the `Description` columns .
87 |
88 | We only need the Domain `Name`, `Label`, and `Data Structure` columns. So we can update the expressions to be more specific.
89 |
90 | ```{r}
91 | ds_spec <- spec_type_to_ds_spec(doc,
92 | cols = c("dataset" = "Name",
93 | "structure" = "Data Structure",
94 | "label" = "Label"))
95 | head(ds_spec)
96 | ```
97 |
98 | Regular expressions are used to match the columns, so if you needed a more flexible input, you could do that. Now, we have the ds_spec table we can move on to the ds_vars table.
99 |
100 | The ds_vars table has 7 columns:
101 |
102 | - dataset: dataset name
103 |
104 | - variable: variable name
105 |
106 | - key_seq: integers controlling the sort order of each dataset
107 |
108 | - order: integer controlling the column order of each dataset
109 |
110 | - keep: boolean to determine if the variable is needed
111 |
112 | - core: ADaM core (Expected, Required, Permissible)
113 |
114 | - supp_flag: boolean to determine if the variable is in the supplementals
115 |
116 | When we look back at our specification document we can see all this information is in the variable tab. The inputs for the `spec_type_to_ds_vars` function are the same as before, but with slightly different defaults. By default ds_vars only checks sheets labeled "Variable" (this is because all the settings are defaulted to read in P21 formatted specs). But, those default work for our specifications cause all the information is in the variable tab; so we can try with just the defaults again.
117 |
118 | ```{r, error=TRUE}
119 | spec_type_to_ds_vars(doc)
120 | ```
121 |
122 | This error means it is trying to match the sheet entitled Variable, the variable column matches to two different columns. This is the same error we had before. We just need to have a quick look at the columns and adjust the regular expression to be more specific. Additionally, for the key sequence variable isn't in the variable tab. We saw this information above in the domain tab. So we will need to do two things to fix this. First, adjust the dataset name in the `key_seq_cols` argument. Second, change the sheets to include the variable and the domain sheet.
123 |
124 | ```{r}
125 | doc$Variables %>% head()
126 |
127 | ds_vars<- spec_type_to_ds_vars(doc, cols = c("dataset" = "Domain",
128 | "variable" = "[V|v]ariable [N|n]ame",
129 | "order" = "[V|v]ariable [O|o]rder",
130 | "keep" = "[M|m]andatory"),
131 | key_seq_cols = c("dataset" = "Domain Name",
132 | "key_seq" = "Key"),
133 | sheet = "[V|v]ar|Domains")
134 |
135 | head(ds_vars)
136 | ```
137 |
138 | The next table we have is var_spec, the table of variable level metadata. var_spec is separate from ds_vars because, in accordance with CDISC standards, labels and lengths should be the same for a given variable across all datasets. So, we are able to normalize the data to only have one row per variable, which ensures this rule and helps reduce the size of the object. There are 6 columns in var_spec:
139 |
140 | - variable: variable name
141 |
142 | - length: variable length
143 |
144 | - label: variable label
145 |
146 | - type: variable Class
147 |
148 | - format: variable format
149 |
150 | - common: boolean if variable is common across ADaM
151 |
152 | Looking back at our specification we see this will also be built using the Variable tab. So, we know we need to edit the regular expression for the variable to make it more specific. Additionally, if you look at the default for `cols` you see there is a dataset input. This is because some standards aren't 100% consistent, some variables (e.g. visit) have different lengths depending on the dataset. **So to accommodate this some of the variables in var_spec are in the ds.variable format**. These builders will do this conversion for you , but the dataset is needed. The other thing the builders can automatically deal with is the common variable. If given a dataset column, the builder function will automatically figure out which variables are common to all dataset. This is good because we don't have a common variable in our specs.
153 |
154 | ```{r}
155 | var_spec <- spec_type_to_var_spec(doc, cols = c("variable" = "Variable Name",
156 | "length" = "[L|l]ength",
157 | "label" = "[L|l]abel",
158 | "type" = "[T|t]ype",
159 | "dataset" = "[D|d]ataset|[D|d]omain",
160 | "format" = "Format"))
161 | head(var_spec)
162 | ```
163 |
164 | There is one issue here: the format column is also the codelist names. This is because the information came from the "Controlled Term or Format" column of my spec document. So the final step of preparing var_spec table is to remove the controlled terms. It is easy here because all the formats end in a full stop (.), but the controlled terms don't.
165 |
166 | ```{r}
167 | var_spec <- var_spec %>%
168 | mutate(format = if_else(str_detect(format, "\\."), format, ""))
169 | ```
170 |
171 | The next dataset is value_spec, which contains the value level metadata. It is made up of 8 columns:
172 |
173 | - dataset: dataset name
174 |
175 | - variable: variable name
176 |
177 | - origin: origin of data
178 |
179 | - type: value type
180 |
181 | - sig_dig: significant digits of the value
182 |
183 | - code_id: id used to cross-reference the code/decode
184 |
185 | - where: value of the variable
186 |
187 | - derivation_id: id used to cross-reference the derivation
188 |
189 | By default, `spec_type_to_value_spec` is set up to have the where information on a different sheet because that is the format of a P21 spec, but in our spec we don't have that. In fact, we don't have any value level metadata in our spec. But, that is fine - the default builders will just pull what information it can from the variable tab. Additionally this spec doesn't have a predecessor column, so we can just use the method column.
190 |
191 | ```{r}
192 | value_spec <- spec_type_to_value_spec(doc, cols = c("dataset" = "VLM Name|Domain",
193 | "variable" = "VLM Name|Variable Name",
194 | "origin" = "[O|o]rigin",
195 | "type" = "[T|t]ype",
196 | "code_id" = "Controlled Term",
197 | "where" = "Parameter Code",
198 | "derivation_id" = "Method",
199 | "predecessor" = "Method"),
200 | where_sep_sheet = FALSE)
201 | head(value_spec)
202 | ```
203 |
204 | The derivation table is relatively simple by comparison. It just has two columns, the derivation id and the derivation. But, the derivation comes from the supplied derivation, predecessor, or comment column depending on the origin. In this mock we don't have a predecessor column so we can set to comment as well.
205 |
206 | ```{r}
207 | derivation <- spec_type_to_derivations(doc, cols = c("derivation_id" = "Name",
208 | "derivation" = "[D|d]efinition|[D|d]escription"),
209 | var_cols = c("dataset" = "Domain Name",
210 | "variable" = "Variable Name|VLM",
211 | "origin" = "[O|o]rigin",
212 | "predecessor" = "Comment",
213 | "comment" = "Comment"))
214 | head(derivation)
215 | ```
216 |
217 | The final table is codelist. This table contains all the code/decode pairs, all lists of permitted values and information about external libraries. What is somewhat special about the structure of this table is there isn't just a code and a decode column, but rather a codes column that contains a list of code/decode tables, permitted value vectors and external dictionary vectors. So there is one row per code (i.e. a row for country and one for yes/no codes etc.). This structure makes it easier to see all the codes at once and allows some code to be numeric and others to be character.
218 |
219 | By default the `spec_type_to_codelist` function expects codelists and external dictionaries. But, in the specification we only have codelist so `dict_cols` needs to be set to null.
220 |
221 | ```{r}
222 | codelist <- spec_type_to_codelist(doc, codelist_cols = c("code_id" = "Codelist Code",
223 | "name" = "Codelist Name",
224 | "code" = "Coded Value",
225 | "decode" = "Decoded Value"),
226 | simplify = TRUE,
227 | dict_cols = NULL)
228 | head(codelist)
229 | ```
230 |
231 | Now we have all the tables we need we can make the metacore object
232 |
233 | ```{r}
234 | metacore(ds_spec, ds_vars, var_spec, value_spec,
235 | derivation, codelist)
236 | ```
237 |
238 | And we're good to go!
239 |
--------------------------------------------------------------------------------
/vignettes/Example.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Example"
3 | output: rmarkdown::html_vignette
4 | vignette: >
5 | %\VignetteIndexEntry{Example}
6 | %\VignetteEngine{knitr::rmarkdown}
7 | %\VignetteEncoding{UTF-8}
8 | ---
9 |
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 | collapse = TRUE,
13 | comment = "#>"
14 | )
15 | ```
16 |
17 | ```{r setup}
18 | library(metacore)
19 | library(xml2)
20 | ```
21 |
22 | [CDISC](https://www.cdisc.org/) standards provide a standard for submission of data set metadata through a document known as define.xml. The define provides a great deal of useful information that is both machine readable and can be viewed through your web browser. While many organizations wait to produce a define until the datasets are finalized, it can still be advantageous to be able to read metadata directly from a define. For this purpose, we developed readers that can go directly from a define.xml to a metacore object.
23 |
24 | To do this, we've built separate reader function for each of the metacore tables. For more information on the structure of the metacore tables, check out the README.
25 |
26 | We start by reading the define from disk using the `xmlTreeParse()` function from the `XML` package.
27 |
28 | ```{r}
29 | doc <- read_xml(metacore_example("SDTM_define.xml"))
30 | xml_ns_strip(doc)
31 | ```
32 |
33 |
34 | Next, we use the metacore readers for each of the separate tables necessary for a metacore object.
35 |
36 | ```{r}
37 | ds_spec2 <- xml_to_ds_spec(doc)
38 | ds_vars <- xml_to_ds_vars(doc)
39 | var_spec <- xml_to_var_spec(doc)
40 | value_spec <- xml_to_value_spec(doc)
41 | code_list <- xml_to_codelist(doc)
42 | derivations <- xml_to_derivations(doc)
43 | ```
44 |
45 | Great! Now we're ready to create our metacore object.
46 |
47 | ```{r}
48 | test <- metacore(ds_spec2, ds_vars, var_spec, value_spec, derivations, code_list)
49 | ```
50 |
51 | Something to note about a metacore object is that it inherently holds all data from your source of metadata, be it your specification, define.xml, database, etc. So that means you have _all_ the metadata. In your program, it's likely that you'll just want to keep metadata relevant to the dataset you're currently programming. We've made process easily, with functions that filter metadata down to information only relevant to a specific dataset.
52 |
53 | ```{r}
54 | # a metacore object with all your dataframes
55 | subset <- test %>% select_dataset("DM")
56 | subset$ds_spec
57 |
58 | # a simplified dataframe
59 | subset_t <- test %>% select_dataset("DM", simplify = TRUE)
60 | ```
61 |
62 | As can be seen above, the metacore object can be filtered directly, or by using the `simplify = TRUE` argument, a simplified data frame can be returned.
63 |
64 | ```{r}
65 | subset_t
66 | ```
67 |
--------------------------------------------------------------------------------