├── .Rbuildignore
├── .classpath
├── .github
├── pull_request_template.md
└── workflows
│ ├── R_CMD_check_Hades.yaml
│ ├── R_CMD_check_main_weekly.yaml
│ └── nightly_cleanup_Hades.yml
├── .gitignore
├── .project
├── .settings
└── org.eclipse.jdt.core.prefs
├── CRAN-SUBMISSION
├── DESCRIPTION
├── FeatureExtraction.Rproj
├── NAMESPACE
├── NEWS.md
├── R
├── Aggregation.R
├── CompareCohorts.R
├── CovariateData.R
├── DefaultCovariateSettings.R
├── DefaultTemporalCovariateSettings.R
├── DefaultTemporalSequenceCovariateSettings.R
├── DetailedCovariateSettings.R
├── FeatureExtraction.R
├── GetCovariates.R
├── GetCovariatesFromCohortAttributes.R
├── GetCovariatesFromOtherCohorts.R
├── GetDefaultCovariates.R
├── HelperFunctions.R
├── Normalization.R
├── Table1.R
└── UnitTestHelperFunctions.R
├── README.md
├── _pkgdown.yml
├── compare_versions
├── cran-comments.md
├── deploy.sh
├── docs
├── 404.html
├── articles
│ ├── CreatingCovariatesBasedOnOtherCohorts.html
│ ├── CreatingCovariatesUsingCohortAttributes.html
│ ├── CreatingCovariatesUsingCohortAttributes_files
│ │ ├── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ │ ├── header-attrs-2.7
│ │ │ └── header-attrs.js
│ │ └── header-attrs-2.9
│ │ │ └── header-attrs.js
│ ├── CreatingCustomCovariateBuilders.html
│ ├── CreatingCustomCovariateBuildersKorean.html
│ ├── CreatingCustomCovariateBuildersKorean_files
│ │ ├── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ │ ├── header-attrs-2.7
│ │ │ └── header-attrs.js
│ │ └── header-attrs-2.9
│ │ │ └── header-attrs.js
│ ├── CreatingCustomCovariateBuilders_files
│ │ ├── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ │ ├── header-attrs-2.7
│ │ │ └── header-attrs.js
│ │ └── header-attrs-2.9
│ │ │ └── header-attrs.js
│ ├── Untitled-1f21019f-1a2d-4b5e-85a6-26b6d323301d.png
│ ├── Untitled-4b3d9353-8da9-49e0-967b-69bc05653585.png
│ ├── Untitled-9bab0a34-d162-407b-aee0-0fc6224987b4.png
│ ├── Untitled-dd762182-9d8f-4065-bddf-6282630a0f99.png
│ ├── UsingFeatureExtraction.html
│ ├── UsingFeatureExtractionKorean.html
│ ├── UsingFeatureExtractionKorean_files
│ │ ├── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ │ ├── header-attrs-2.7
│ │ │ └── header-attrs.js
│ │ └── header-attrs-2.9
│ │ │ └── header-attrs.js
│ ├── UsingFeatureExtraction_files
│ │ ├── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ │ ├── header-attrs-2.7
│ │ │ └── header-attrs.js
│ │ └── header-attrs-2.9
│ │ │ └── header-attrs.js
│ └── index.html
├── authors.html
├── bootstrap-toc.css
├── bootstrap-toc.js
├── docsearch.css
├── docsearch.js
├── index.html
├── link.svg
├── news
│ └── index.html
├── pkgdown.css
├── pkgdown.js
├── pkgdown.yml
├── pull_request_template.html
├── reference
│ ├── CovariateData-class.html
│ ├── FeatureExtraction-package.html
│ ├── FeatureExtraction.html
│ ├── Rplot001.png
│ ├── aggregateCovariates.html
│ ├── byMaxFf.html
│ ├── bySumFf.html
│ ├── computeStandardizedDifference.html
│ ├── convertPrespecSettingsToDetailedSettings.html
│ ├── createAnalysisDetails.html
│ ├── createCohortAttrCovariateSettings.html
│ ├── createCohortBasedCovariateSettings.html
│ ├── createCohortBasedTemporalCovariateSettings.html
│ ├── createCovariateSettings.html
│ ├── createDefaultCovariateSettings.html
│ ├── createDefaultTemporalCovariateSettings.html
│ ├── createDetailedCovariateSettings.html
│ ├── createDetailedTemporalCovariateSettings.html
│ ├── createEmptyCovariateData.html
│ ├── createHdpsCovariateSettings.html
│ ├── createTable1.html
│ ├── createTable1CovariateSettings.html
│ ├── createTemporalCovariateSettings.html
│ ├── createTemporalSequenceCovariateSettings.html
│ ├── dot-createLooCovariateSettings.html
│ ├── dot-getDbLooCovariateData.html
│ ├── filterByCohortDefinitionId.html
│ ├── filterByRowId.html
│ ├── filterCovariateDataCovariates.html
│ ├── getDbCohortAttrCovariatesData.html
│ ├── getDbCohortBasedCovariatesData.html
│ ├── getDbCovariateData.html
│ ├── getDbDefaultCovariateData.html
│ ├── getDbHdpsCovariateData.html
│ ├── getDefaultTable1Specifications.html
│ ├── index.html
│ ├── isAggregatedCovariateData.html
│ ├── isCovariateData.html
│ ├── isTemporalCovariateData.html
│ ├── loadCovariateData.html
│ ├── saveCovariateData.html
│ └── tidyCovariateData.html
└── sitemap.xml
├── extras
├── CohortBasedCovariatesVignetteDataFetch.R
├── DefaultCovariateSettingsTemplate.R
├── DetailedCovariateSettingsTemplate.R
├── FeatureExtraction.pdf
├── GetHdpsCovariates.R
├── PackageMaintenance.R
├── TestCode.R
├── TestHashForPostcoordinatedConcepts.R
├── VignetteDataFetch.R
└── uniquePcCombos.rds
├── inst
├── csv
│ ├── OtherParameters.csv
│ ├── OtherSqlToLoad.csv
│ ├── PrespecAnalyses.csv
│ ├── PrespecTemporalAnalyses.csv
│ ├── PrespecTemporalAnnualAnalysis.csv
│ ├── PrespecTemporalSequenceAnalyses.csv
│ ├── Table1Specs.csv
│ └── jarChecksum.txt
├── doc
│ ├── CreatingCovariatesBasedOnOtherCohorts.pdf
│ ├── CreatingCovariatesUsingCohortAttributes.pdf
│ ├── CreatingCustomCovariateBuilders.pdf
│ ├── CreatingCustomCovariateBuildersKorean.pdf
│ ├── UsingFeatureExtraction.pdf
│ └── UsingFeatureExtractionKorean.pdf
├── java
│ ├── SqlRender-1.19.1.jar
│ ├── featureExtraction-3.10.0.jar
│ └── json-20231013.jar
├── sql
│ └── sql_server
│ │ ├── CareSite.sql
│ │ ├── Chads2.sql
│ │ ├── Chads2Vasc.sql
│ │ ├── CharlsonIndex.sql
│ │ ├── CohortBasedBinaryCovariates.sql
│ │ ├── CohortBasedCountCovariates.sql
│ │ ├── ConceptCounts.sql
│ │ ├── CreateCovAnalysisRefTables.sql
│ │ ├── Dcsi.sql
│ │ ├── DemographicsAge.sql
│ │ ├── DemographicsAgeGroup.sql
│ │ ├── DemographicsEthnicity.sql
│ │ ├── DemographicsGender.sql
│ │ ├── DemographicsMonth.sql
│ │ ├── DemographicsRace.sql
│ │ ├── DemographicsTime.sql
│ │ ├── DemographicsYear.sql
│ │ ├── DemographicsYearMonth.sql
│ │ ├── DomainConcept.sql
│ │ ├── DomainConceptGroup.sql
│ │ ├── GetAttrCovariates.sql
│ │ ├── GetHdpsCovariates.sql
│ │ ├── Hfrs.sql
│ │ ├── IncludeDescendants.sql
│ │ ├── MeasObsValueAsConcept.sql
│ │ ├── MeasurementRangeGroup.sql
│ │ ├── MeasurementValue.sql
│ │ ├── RemoveCovariateTempTables.sql
│ │ ├── covariateCohorts.sql
│ │ └── unit_tests
│ │ ├── createTestingData.sql
│ │ └── dropTestingData.sql
└── testdata
│ ├── binaryCovariateData.zip
│ └── continuousCovariateData.zip
├── java
├── FeatureExtraction.jardesc
└── org
│ └── ohdsi
│ └── featureExtraction
│ ├── FeatureExtraction.java
│ ├── JarChecksum.java
│ ├── ReadCSVFile.java
│ ├── ReadCSVFileWithHeader.java
│ ├── Row.java
│ └── StringUtilities.java
├── man-roxygen
└── GetCovarParams.R
├── man
├── CovariateData-class.Rd
├── FeatureExtraction-package.Rd
├── aggregateCovariates.Rd
├── computeStandardizedDifference.Rd
├── convertPrespecSettingsToDetailedSettings.Rd
├── createAnalysisDetails.Rd
├── createCohortAttrCovariateSettings.Rd
├── createCohortBasedCovariateSettings.Rd
├── createCohortBasedTemporalCovariateSettings.Rd
├── createCovariateSettings.Rd
├── createDefaultCovariateSettings.Rd
├── createDefaultTemporalCovariateSettings.Rd
├── createDetailedCovariateSettings.Rd
├── createDetailedTemporalCovariateSettings.Rd
├── createEmptyCovariateData.Rd
├── createTable1.Rd
├── createTable1CovariateSettings.Rd
├── createTemporalCovariateSettings.Rd
├── createTemporalSequenceCovariateSettings.Rd
├── dot-createLooCovariateSettings.Rd
├── dot-getDbLooCovariateData.Rd
├── filterByCohortDefinitionId.Rd
├── filterByRowId.Rd
├── getDbCohortAttrCovariatesData.Rd
├── getDbCohortBasedCovariatesData.Rd
├── getDbCovariateData.Rd
├── getDbDefaultCovariateData.Rd
├── getDefaultTable1Specifications.Rd
├── isAggregatedCovariateData.Rd
├── isCovariateData.Rd
├── isTemporalCovariateData.Rd
├── loadCovariateData.Rd
├── saveCovariateData.Rd
└── tidyCovariateData.Rd
├── nbactions.xml
├── pom.xml
├── tests
├── testBigQuery.R
├── testOracle.R
├── testPostgres.R
├── testRedshift.R
├── testSnowflake.R
├── testSpark.R
├── testSqlServer.R
├── testSqlite.R
└── testthat
│ ├── setup.R
│ ├── test-Aggregation.R
│ ├── test-CompareCohorts.R
│ ├── test-CovariateData.R
│ ├── test-DetailedCovariateSettings.R
│ ├── test-FeatureExtractionInternal.R
│ ├── test-GetCohortBasedCovariates.R
│ ├── test-GetCovariates.R
│ ├── test-GetCovariatesFromCohortAttributes.R
│ ├── test-GetCovariatesTemporalSequence.R
│ ├── test-GetDefaultCovariates.R
│ ├── test-HelperFunctions.R
│ ├── test-PostcoordConcepts.R
│ ├── test-PrespecAnalyses.R
│ ├── test-Table1.R
│ ├── test-query-no-fail.R
│ ├── test-spot-checks.R
│ └── test-tidyCovariates.R
└── vignettes
├── CreatingCovariatesBasedOnOtherCohorts.Rmd
├── CreatingCovariatesUsingCohortAttributes.Rmd
├── CreatingCustomCovariateBuilders.Rmd
├── CreatingCustomCovariateBuildersKorean.Rmd
├── UsingFeatureExtraction.Rmd
└── UsingFeatureExtractionKorean.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | pom.xml
2 | extras
3 | docs
4 | man-roxygen
5 | ^.*\.Rproj$
6 | ^\.Rproj\.user$
7 | .classpath
8 | .project
9 | .github
10 | ^\.travis\.yml$
11 | deploy.sh
12 | ^\.git
13 | compare_versions
14 | _pkgdown.yml
15 | nbactions.xml
16 | ^CRAN-SUBMISSION$
17 | ^cran-comments\.md$
18 |
--------------------------------------------------------------------------------
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | Before you do a pull request, you should always **file an issue** and make sure the package maintainer agrees that it’s a problem, and is happy with your basic proposal for fixing it. We don’t want you to spend a bunch of time on something that we don’t think is a good idea.
2 |
3 | Additional requirements for pull requests:
4 |
5 | - Adhere to the [Developer Guidelines](https://ohdsi.github.io/MethodsLibrary/developerGuidelines.html) as well as the [OHDSI Code Style](https://ohdsi.github.io/MethodsLibrary/codeStyle.html).
6 |
7 | - If possible, add unit tests for new functionality you add.
8 |
9 | - Restrict your pull request to solving the issue at hand. Do not try to 'improve' parts of the code that are not related to the issue. If you feel other parts of the code need better organization, create a separate issue for that.
10 |
11 | - Make sure you pass R check without errors and warnings before submitting.
12 |
13 | - Always target the `develop` branch, and make sure you are up-to-date with the develop branch.
14 |
15 |
--------------------------------------------------------------------------------
/.github/workflows/R_CMD_check_main_weekly.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | schedule:
3 | - cron: '0 5 * * 0' # every Sunday at 5am UTC
4 |
5 | name: 'R check'
6 |
7 | jobs:
8 | R-CMD-check-main:
9 | runs-on: ${{ matrix.config.os }}
10 |
11 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
12 |
13 | strategy:
14 | fail-fast: false
15 | matrix:
16 | config:
17 | - {os: macOS-latest, r: 'release'}
18 |
19 | env:
20 | GITHUB_PAT: ${{ secrets.GH_TOKEN }}
21 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
22 | RSPM: ${{ matrix.config.rspm }}
23 | CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM54_SCHEMA }}
24 | CDM5_ORACLE_OHDSI_SCHEMA: ${{ secrets.CDM5_ORACLE_OHDSI_SCHEMA }}
25 | CDM5_ORACLE_PASSWORD: ${{ secrets.CDM5_ORACLE_PASSWORD }}
26 | CDM5_ORACLE_SERVER: ${{ secrets.CDM5_ORACLE_SERVER }}
27 | CDM5_ORACLE_USER: ${{ secrets.CDM5_ORACLE_USER }}
28 | CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM54_SCHEMA }}
29 | CDM5_POSTGRESQL_OHDSI_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_OHDSI_SCHEMA }}
30 | CDM5_POSTGRESQL_PASSWORD: ${{ secrets.CDM5_POSTGRESQL_PASSWORD }}
31 | CDM5_POSTGRESQL_SERVER: ${{ secrets.CDM5_POSTGRESQL_SERVER }}
32 | CDM5_POSTGRESQL_USER: ${{ secrets.CDM5_POSTGRESQL_USER }}
33 | CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM54_SCHEMA }}
34 | CDM5_SQL_SERVER_OHDSI_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_OHDSI_SCHEMA }}
35 | CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }}
36 | CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }}
37 | CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }}
38 | CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM54_SCHEMA }}
39 | CDM5_REDSHIFT_OHDSI_SCHEMA: ${{ secrets.CDM5_REDSHIFT_OHDSI_SCHEMA }}
40 | CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }}
41 | CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }}
42 | CDM5_REDSHIFT_USER: ${{ secrets.CDM5_REDSHIFT_USER }}
43 | CDM5_SPARK_USER: ${{ secrets.CDM5_SPARK_USER }}
44 | CDM5_SPARK_PASSWORD: ${{ secrets.CDM5_SPARK_PASSWORD }}
45 | CDM5_SPARK_CONNECTION_STRING: ${{ secrets.CDM5_SPARK_CONNECTION_STRING }}
46 |
47 | steps:
48 | - uses: actions/checkout@v3
49 |
50 | - uses: r-lib/actions/setup-r@v2
51 | with:
52 | r-version: ${{ matrix.config.r }}
53 |
54 | - uses: r-lib/actions/setup-tinytex@v2
55 |
56 | - uses: r-lib/actions/setup-pandoc@v2
57 |
58 | - uses: r-lib/actions/setup-r-dependencies@v2
59 | with:
60 | extra-packages: any::rcmdcheck
61 | needs: check
62 |
63 | - uses: r-lib/actions/check-r-package@v2
64 | with:
65 | args: 'c("--no-manual", "--as-cran")'
66 | error-on: '"warning"'
67 | check-dir: '"check"'
68 |
--------------------------------------------------------------------------------
/.github/workflows/nightly_cleanup_Hades.yml:
--------------------------------------------------------------------------------
1 | name: 'nightly artifacts cleanup'
2 | on:
3 | schedule:
4 | - cron: '0 1 * * *' # every night at 1 am UTC
5 |
6 | jobs:
7 | remove-old-artifacts:
8 | runs-on: ubuntu-latest
9 | timeout-minutes: 10
10 |
11 | steps:
12 | - name: Remove old artifacts
13 | uses: c-hive/gha-remove-artifacts@v1
14 | with:
15 | age: '7 days'
16 | # Optional inputs
17 | # skip-tags: true
18 | skip-recent: 1
19 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 |
4 | # Example code in package build process
5 | *-Ex.R
6 |
7 | # R data files from past sessions
8 | .Rdata
9 |
10 | # RStudio files
11 | .Rproj.user/
12 | .Rproj.user
13 |
14 | # SqlRender
15 | statement_*.sql
16 | errorReport.txt
17 |
18 | #C++ objects
19 | src/*.o
20 | src/*.so
21 | src/*.dll
22 | /Debug
23 | standalone/build/*
24 |
25 | # Java compiled files
26 | /bin
27 |
28 | /target/
29 |
30 | *.tex
31 | *.log
32 | .Renviron
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | FeatureExtraction
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.m2e.core.maven2Nature
21 | org.eclipse.jdt.core.javanature
22 |
23 |
24 |
25 | 1628619985913
26 |
27 | 30
28 |
29 | org.eclipse.core.resources.regexFilterMatcher
30 | node_modules|.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6 | org.eclipse.jdt.core.compiler.compliance=1.8
7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
12 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
13 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
14 | org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
15 | org.eclipse.jdt.core.compiler.processAnnotations=disabled
16 | org.eclipse.jdt.core.compiler.release=disabled
17 | org.eclipse.jdt.core.compiler.source=1.8
18 |
--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 3.8.0
2 | Date: 2025-03-19 15:22:25 UTC
3 | SHA: c0961a155c6fba22f3b5e4825b599f2410ed529b
4 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: FeatureExtraction
2 | Type: Package
3 | Title: Generating Features for a Cohort
4 | Version: 3.10.0
5 | Date: 2025-05-08
6 | Authors@R: c(
7 | person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut")),
8 | person("Marc", "Suchard", role = c("aut")),
9 | person("Patrick", "Ryan", role = c("aut")),
10 | person("Jenna", "Reps", role = c("aut")),
11 | person("Anthony", "Sena", , "sena@ohdsi.org", role = c("aut")),
12 | person("Ger", "Inberg", , "g.inberg@erasmusmc.nl", role = c("aut", "cre")),
13 | person("Observational Health Data Science and Informatics", role = c("cph"))
14 | )
15 | Maintainer: Ger Inberg
16 | Description: An R interface for generating features for a cohort using data in the Common Data Model. Features can be constructed using default or custom made feature definitions. Furthermore it's possible to aggregate features and get the summary statistics.
17 | Depends:
18 | R (>= 3.2.2),
19 | DatabaseConnector (>= 3.0.0),
20 | Andromeda (>= 1.0.0)
21 | Imports:
22 | methods,
23 | dplyr,
24 | rJava,
25 | jsonlite,
26 | SqlRender (>= 1.18.0),
27 | ParallelLogger (>= 2.0.2),
28 | cli,
29 | pillar,
30 | readr,
31 | rlang,
32 | RSQLite,
33 | DBI,
34 | checkmate,
35 | vroom
36 | Suggests:
37 | testthat,
38 | knitr,
39 | rmarkdown,
40 | Eunomia (>= 2.0.0),
41 | withr,
42 | curl,
43 | httr
44 | License: Apache License 2.0
45 | VignetteBuilder: knitr
46 | URL: https://github.com/OHDSI/FeatureExtraction
47 | BugReports: https://github.com/OHDSI/FeatureExtraction/issues
48 | NeedsCompilation: no
49 | RoxygenNote: 7.3.2
50 | Encoding: UTF-8
51 | Language: en-US
52 |
--------------------------------------------------------------------------------
/FeatureExtraction.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 | ProjectId: 7209d25a-3fa0-4681-8605-9ab497fb422c
3 |
4 | RestoreWorkspace: No
5 | SaveWorkspace: No
6 | AlwaysSaveHistory: No
7 |
8 | EnableCodeIndexing: Yes
9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 |
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 |
16 | BuildType: Package
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageCheckArgs: --no-build-vignettes
19 | PackageRoxygenize: rd,collate,namespace
20 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(print,summary.CovariateData)
4 | export(aggregateCovariates)
5 | export(computeStandardizedDifference)
6 | export(convertPrespecSettingsToDetailedSettings)
7 | export(createAnalysisDetails)
8 | export(createCohortAttrCovariateSettings)
9 | export(createCohortBasedCovariateSettings)
10 | export(createCohortBasedTemporalCovariateSettings)
11 | export(createCovariateSettings)
12 | export(createDefaultCovariateSettings)
13 | export(createDefaultTemporalCovariateSettings)
14 | export(createDetailedCovariateSettings)
15 | export(createDetailedTemporalCovariateSettings)
16 | export(createEmptyCovariateData)
17 | export(createTable1)
18 | export(createTable1CovariateSettings)
19 | export(createTemporalCovariateSettings)
20 | export(createTemporalSequenceCovariateSettings)
21 | export(filterByCohortDefinitionId)
22 | export(filterByRowId)
23 | export(getDbCohortAttrCovariatesData)
24 | export(getDbCohortBasedCovariatesData)
25 | export(getDbCovariateData)
26 | export(getDbDefaultCovariateData)
27 | export(getDefaultTable1Specifications)
28 | export(isAggregatedCovariateData)
29 | export(isCovariateData)
30 | export(isTemporalCovariateData)
31 | export(loadCovariateData)
32 | export(saveCovariateData)
33 | export(tidyCovariateData)
34 | exportClasses(CovariateData)
35 | exportMethods(show)
36 | exportMethods(summary)
37 | import(Andromeda)
38 | import(DatabaseConnector)
39 | import(dplyr)
40 | importClassesFrom(DBI,DBIConnection)
41 | importClassesFrom(DBI,DBIObject)
42 | importClassesFrom(RSQLite,SQLiteConnection)
43 | importFrom(SqlRender,loadRenderTranslateSql)
44 | importFrom(SqlRender,render)
45 | importFrom(SqlRender,translate)
46 | importFrom(methods,is)
47 | importFrom(rlang,.data)
48 | importFrom(stats,aggregate)
49 | importFrom(stats,quantile)
50 | importFrom(stats,sd)
51 | importFrom(utils,read.csv)
52 |
--------------------------------------------------------------------------------
/R/FeatureExtraction.R:
--------------------------------------------------------------------------------
1 | # @file FeatureExtraction.R
2 | #
3 | # Copyright 2025 Observational Health Data Sciences and Informatics
4 | #
5 | # This file is part of FeatureExtraction
6 | #
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
19 | #' @keywords internal
20 | "_PACKAGE"
21 |
22 | #' @importFrom SqlRender loadRenderTranslateSql translate render
23 | #' @importFrom methods is
24 | #' @importFrom utils read.csv
25 | #' @importFrom stats aggregate quantile sd
26 | #' @importFrom rlang .data
27 | #' @import DatabaseConnector
28 | #' @import dplyr
29 | NULL
30 |
31 | .onLoad <- function(libname, pkgname) {
32 | rJava::.jpackage(pkgname, lib.loc = libname)
33 |
34 | # Verify checksum of JAR:
35 | storedChecksum <- scan(file = system.file("csv", "jarChecksum.txt", package = "FeatureExtraction"), what = character(), quiet = TRUE)
36 | computedChecksum <- tryCatch(rJava::J("org.ohdsi.featureExtraction.JarChecksum", "computeJarChecksum"),
37 | error = function(e) {
38 | warning("Problem connecting to Java. This is normal when runing roxygen.")
39 | return("")
40 | }
41 | )
42 | if (computedChecksum != "" && (storedChecksum != computedChecksum)) {
43 | warning("Java library version does not match R package version! Please try reinstalling the FeatureExtraction package.
44 | Make sure to close all instances of R, and open only one instance before reinstalling. Also make sure your
45 | R workspace is not reloaded on startup. Delete your .Rdata file if necessary")
46 | }
47 | }
48 |
49 | .toJson <- function(object) {
50 | return(as.character(jsonlite::toJSON(object, force = TRUE, auto_unbox = TRUE)))
51 | }
52 |
53 | .fromJson <- function(json) {
54 | return(jsonlite::fromJSON(json, simplifyVector = TRUE, simplifyDataFrame = FALSE))
55 | }
56 |
--------------------------------------------------------------------------------
/R/HelperFunctions.R:
--------------------------------------------------------------------------------
1 | # Copyright 2025 Observational Health Data Sciences and Informatics
2 | #
3 | # This file is part of FeatureExtraction
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | #' Filter covariates by row ID
18 | #'
19 | #' @param covariateData An object of type \code{CovariateData}
20 | #' @param rowIds A vector containing the rowIds to keep.
21 | #'
22 | #' @return
23 | #' An object of type \code{covariateData}.
24 | #'
25 | #' @examples
26 | #' \donttest{
27 | #' covariateData <- FeatureExtraction::createEmptyCovariateData(
28 | #' cohortIds = 1,
29 | #' aggregated = FALSE,
30 | #' temporal = FALSE
31 | #' )
32 | #'
33 | #' covData <- filterByRowId(
34 | #' covariateData = covariateData,
35 | #' rowIds = 1
36 | #' )
37 | #' }
38 | #'
39 | #' @export
40 | filterByRowId <- function(covariateData, rowIds) {
41 | if (!isCovariateData(covariateData)) {
42 | stop("Data not of class CovariateData")
43 | }
44 | if (!Andromeda::isValidAndromeda(covariateData)) {
45 | stop("CovariateData object is closed")
46 | }
47 | if (isAggregatedCovariateData(covariateData)) {
48 | stop("Cannot filter aggregated data by rowId")
49 | }
50 |
51 | covariates <- covariateData$covariates %>%
52 | filter(.data$rowId %in% rowIds)
53 |
54 | result <- Andromeda::andromeda(
55 | covariates = covariates,
56 | covariateRef = covariateData$covariateRef,
57 | analysisRef = covariateData$analysisRef
58 | )
59 | metaData <- attr(covariateData, "metaData")
60 | metaData$populationSize <- length(rowIds)
61 | attr(result, "metaData") <- metaData
62 | class(result) <- "CovariateData"
63 | return(result)
64 | }
65 |
66 | #' Filter covariates by cohort definition IDs
67 | #'
68 | #' @param covariateData An object of type \code{CovariateData}
69 | #' @param cohortId DEPRECATED The cohort definition IDs to keep.
70 | #' @param cohortIds The cohort definition IDs to keep.
71 | #'
72 | #' @return
73 | #' An object of type \code{covariateData}.
74 | #'
75 | #' @examples
76 | #' \donttest{
77 | #' covariateData <- FeatureExtraction::createEmptyCovariateData(
78 | #' cohortIds = c(1, 2),
79 | #' aggregated = TRUE,
80 | #' temporal = FALSE
81 | #' )
82 | #'
83 | #' covData <- filterByCohortDefinitionId(
84 | #' covariateData = covariateData,
85 | #' cohortIds = c(1)
86 | #' )
87 | #' }
88 | #'
89 | #' @export
90 | filterByCohortDefinitionId <- function(covariateData,
91 | cohortId = 1,
92 | cohortIds = c(1)) {
93 | if (!isCovariateData(covariateData)) {
94 | stop("Data not of class CovariateData")
95 | }
96 | if (!Andromeda::isValidAndromeda(covariateData)) {
97 | stop("CovariateData object is closed")
98 | }
99 | if (!isAggregatedCovariateData(covariateData)) {
100 | stop("Can only filter aggregated data by cohortIds")
101 | }
102 | if (!missing(cohortId)) {
103 | warning("cohortId argument has been deprecated, please use cohortIds")
104 | cohortIds <- cohortId
105 | }
106 |
107 | if (is.null(covariateData$covariates)) {
108 | covariates <- NULL
109 | } else {
110 | covariates <- covariateData$covariates %>%
111 | filter(.data$cohortDefinitionId %in% cohortIds)
112 | }
113 | if (is.null(covariateData$covariatesContinuous)) {
114 | covariatesContinuous <- NULL
115 | } else {
116 | covariatesContinuous <- covariateData$covariatesContinuous %>%
117 | filter(.data$cohortDefinitionId %in% cohortIds)
118 | }
119 | result <- Andromeda::andromeda(
120 | covariates = covariates,
121 | covariatesContinuous = covariatesContinuous,
122 | covariateRef = covariateData$covariateRef,
123 | analysisRef = covariateData$analysisRef
124 | )
125 | metaData <- attr(covariateData, "metaData")
126 | metaData$populationSize <- metaData$populationSize[as.numeric(names(metaData$populationSize)) %in% cohortIds]
127 | attr(result, "metaData") <- metaData
128 | class(result) <- "CovariateData"
129 | attr(class(result), "package") <- "FeatureExtraction"
130 | return(result)
131 | }
132 |
133 | .assertCovariateId <- function(covariateId, len = NULL, min.len = NULL, null.ok = FALSE, add = NULL) {
134 | checkmate::assertNumeric(covariateId, null.ok = null.ok, len = len, min.len = 1, add = add)
135 | if (!is.null(covariateId)) {
136 | message <- sprintf(
137 | "Variable '%s' is a (64-bit) integer",
138 | paste0(deparse(eval.parent(substitute(substitute(covariateId))), width.cutoff = 500L), collapse = "\n")
139 | )
140 | checkmate::assertTRUE(all(covariateId == round(covariateId)), .var.name = message, add = add)
141 | }
142 | }
143 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | FeatureExtraction
2 | =================
3 |
4 | [](https://github.com/OHDSI/FeatureExtraction/actions?query=workflow%3AR-CMD-check)
5 | [](https://app.codecov.io/github/OHDSI/FeatureExtraction?branch=main)
6 | [](https://CRAN.R-project.org/package=FeatureExtraction)
7 |
8 | FeatureExtraction is part of [HADES](https://ohdsi.github.io/Hades/).
9 |
10 | Introduction
11 | ============
12 | An R package for generating features (covariates) for a cohort using data in the Common Data Model.
13 |
14 | Features
15 | ========
16 | - Takes a cohort as input.
17 | - Generates baseline features for that cohort.
18 | - Default covariates include all drugs, diagnoses, procedures, as well as age, comorbidity indexes, etc.
19 | - Support for creating custom covariates.
20 | - Generate paper-ready summary table of select population characteristics.
21 |
22 | Technology
23 | ==========
24 | FeatureExtraction is an R package, with some functions implemented in C++.
25 |
26 | System Requirements
27 | ===================
28 | Requires R (version 3.2.2 or higher). Installation on Windows requires [RTools](https://cran.r-project.org/bin/windows/Rtools/). FeatureExtraction require Java.
29 |
30 | Getting Started
31 | ===============
32 | 1. See the instructions [here](https://ohdsi.github.io/Hades/rSetup.html) for configuring your R environment, including RTools and Java.
33 |
34 | 3. In R, use the following commands to download and install FeatureExtraction:
35 |
36 | ```r
37 | install.packages("drat")
38 | drat::addRepo("OHDSI")
39 | install.packages("FeatureExtraction")
40 | ```
41 |
42 | User Documentation
43 | ==================
44 | The documentation website can be found at [https://ohdsi.github.io/FeatureExtraction/](https://ohdsi.github.io/FeatureExtraction/). PDF versions of the vignettes and package manual are here:
45 |
46 | * Vignette: [Using FeatureExtraction](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/UsingFeatureExtraction.pdf)
47 | * Vignette: [Creating covariates using cohort attributes](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/CreatingCovariatesUsingCohortAttributes.pdf)
48 | * Vignette: [Creating custom covariate builders](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/CreatingCustomCovariateBuilders.pdf)
49 | * Vignette: [Creating covariates based on other cohorts](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/CreatingCovariatesBasedOnOtherCohorts.pdf)
50 | * Package manual: [FeatureExtraction manual](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/extras/FeatureExtraction.pdf)
51 |
52 | These vignettes are also available in Korean:
53 |
54 | * Vignette: [Using FeatureExtraction](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/UsingFeatureExtractionKorean.pdf)
55 | * Vignette: [Creating custom covariate builders](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/CreatingCustomCovariateBuildersKorean.pdf)
56 |
57 |
58 | Support
59 | =======
60 | * Developer questions/comments/feedback: OHDSI Forum
61 | * We use the GitHub issue tracker for all bugs/issues/enhancements
62 |
63 | Contributing
64 | ============
65 | Read [here](https://ohdsi.github.io/Hades/contribute.html) how you can contribute to this package.
66 |
67 | License
68 | =======
69 | FeatureExtraction is licensed under Apache License 2.0
70 |
71 | Development
72 | ===========
73 | FeatureExtraction is being developed in R Studio.
74 |
75 | ### Development status
76 |
77 | Ready for use
78 |
79 | # Acknowledgements
80 | - This project is supported in part through the National Science Foundation grant IIS 1251151.
81 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | template:
2 | params:
3 | bootswatch: cosmo
4 |
5 | home:
6 | links:
7 | - text: Ask a question
8 | href: http://forums.ohdsi.org
9 |
10 | navbar:
11 | structure:
12 | right: [hades, github]
13 | components:
14 | hades:
15 | text: hadesLogo
16 | href: https://ohdsi.github.io/Hades
17 |
--------------------------------------------------------------------------------
/compare_versions:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | open(R_VERSION, "grep 'Version' DESCRIPTION |");
4 | $version = ;
5 | close(R_VERSION);
6 |
7 | $version =~ /(\d+)\.(\d+)\.(\d+)/;
8 | $r_major = $1;
9 | $r_minor = $2;
10 | $r_mod = $3;
11 |
12 | open(GIT_VERSION, "git describe --tags |");
13 | $git = ;
14 | close(GIT_VERSION);
15 |
16 | $git =~ /v(\d+)\.(\d+)\.(\d+)/;
17 | $git_major = $1;
18 | $git_minor = $2;
19 | $git_mod = $3;
20 |
21 | if ($r_major > $git_major || $r_minor > $git_minor || $r_mod > $git_mod) {
22 | $new_version = "v$r_major.$r_minor.$r_mod";
23 | } else {
24 | $new_version = "";
25 | }
26 |
27 | print($new_version);
28 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## R CMD check results
2 |
3 | 0 errors | 0 warnings | 1 note
4 |
5 | * This is a new release.
6 |
--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -o errexit -o nounset
3 | addToDrat(){
4 | PKG_REPO=$PWD
5 |
6 | ## Build package tar ball
7 | export PKG_TARBALL=$(ls *.tar.gz)
8 |
9 | cd ..; mkdir drat; cd drat
10 |
11 | ## Set up Repo parameters
12 | git init
13 | git config user.name "Martijn Schuemie"
14 | git config user.email "schuemie@ohdsi.org"
15 | git config --global push.default simple
16 |
17 | ## Get drat repo
18 | git remote add upstream "https://$GH_TOKEN@github.com/OHDSI/drat.git"
19 | git fetch upstream 2>err.txt
20 | git checkout gh-pages
21 |
22 | ## Link to local R packages
23 | echo 'R_LIBS=~/Rlib' > .Renviron
24 |
25 | Rscript -e "drat::insertPackage('$PKG_REPO/$PKG_TARBALL', \
26 | repodir = '.', \
27 | commit='GitHub Actions release: $PKG_TARBALL run $GITHUB_RUN_ID')"
28 | git push
29 |
30 | }
31 | addToDrat
32 |
--------------------------------------------------------------------------------
/docs/articles/CreatingCovariatesUsingCohortAttributes_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
2 | // v0.0.1
3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
4 |
5 | document.addEventListener('DOMContentLoaded', function() {
6 | const codeList = document.getElementsByClassName("sourceCode");
7 | for (var i = 0; i < codeList.length; i++) {
8 | var linkList = codeList[i].getElementsByTagName('a');
9 | for (var j = 0; j < linkList.length; j++) {
10 | if (linkList[j].innerHTML === "") {
11 | linkList[j].setAttribute('aria-hidden', 'true');
12 | }
13 | }
14 | }
15 | });
16 |
--------------------------------------------------------------------------------
/docs/articles/CreatingCovariatesUsingCohortAttributes_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/docs/articles/CreatingCovariatesUsingCohortAttributes_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/docs/articles/CreatingCustomCovariateBuildersKorean_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
2 | // v0.0.1
3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
4 |
5 | document.addEventListener('DOMContentLoaded', function() {
6 | const codeList = document.getElementsByClassName("sourceCode");
7 | for (var i = 0; i < codeList.length; i++) {
8 | var linkList = codeList[i].getElementsByTagName('a');
9 | for (var j = 0; j < linkList.length; j++) {
10 | if (linkList[j].innerHTML === "") {
11 | linkList[j].setAttribute('aria-hidden', 'true');
12 | }
13 | }
14 | }
15 | });
16 |
--------------------------------------------------------------------------------
/docs/articles/CreatingCustomCovariateBuildersKorean_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/docs/articles/CreatingCustomCovariateBuildersKorean_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/docs/articles/CreatingCustomCovariateBuilders_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
2 | // v0.0.1
3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
4 |
5 | document.addEventListener('DOMContentLoaded', function() {
6 | const codeList = document.getElementsByClassName("sourceCode");
7 | for (var i = 0; i < codeList.length; i++) {
8 | var linkList = codeList[i].getElementsByTagName('a');
9 | for (var j = 0; j < linkList.length; j++) {
10 | if (linkList[j].innerHTML === "") {
11 | linkList[j].setAttribute('aria-hidden', 'true');
12 | }
13 | }
14 | }
15 | });
16 |
--------------------------------------------------------------------------------
/docs/articles/CreatingCustomCovariateBuilders_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/docs/articles/CreatingCustomCovariateBuilders_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/docs/articles/Untitled-1f21019f-1a2d-4b5e-85a6-26b6d323301d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/docs/articles/Untitled-1f21019f-1a2d-4b5e-85a6-26b6d323301d.png
--------------------------------------------------------------------------------
/docs/articles/Untitled-4b3d9353-8da9-49e0-967b-69bc05653585.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/docs/articles/Untitled-4b3d9353-8da9-49e0-967b-69bc05653585.png
--------------------------------------------------------------------------------
/docs/articles/Untitled-9bab0a34-d162-407b-aee0-0fc6224987b4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/docs/articles/Untitled-9bab0a34-d162-407b-aee0-0fc6224987b4.png
--------------------------------------------------------------------------------
/docs/articles/Untitled-dd762182-9d8f-4065-bddf-6282630a0f99.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/docs/articles/Untitled-dd762182-9d8f-4065-bddf-6282630a0f99.png
--------------------------------------------------------------------------------
/docs/articles/UsingFeatureExtractionKorean_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
2 | // v0.0.1
3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
4 |
5 | document.addEventListener('DOMContentLoaded', function() {
6 | const codeList = document.getElementsByClassName("sourceCode");
7 | for (var i = 0; i < codeList.length; i++) {
8 | var linkList = codeList[i].getElementsByTagName('a');
9 | for (var j = 0; j < linkList.length; j++) {
10 | if (linkList[j].innerHTML === "") {
11 | linkList[j].setAttribute('aria-hidden', 'true');
12 | }
13 | }
14 | }
15 | });
16 |
--------------------------------------------------------------------------------
/docs/articles/UsingFeatureExtractionKorean_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/docs/articles/UsingFeatureExtractionKorean_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/docs/articles/UsingFeatureExtraction_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
2 | // v0.0.1
3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
4 |
5 | document.addEventListener('DOMContentLoaded', function() {
6 | const codeList = document.getElementsByClassName("sourceCode");
7 | for (var i = 0; i < codeList.length; i++) {
8 | var linkList = codeList[i].getElementsByTagName('a');
9 | for (var j = 0; j < linkList.length; j++) {
10 | if (linkList[j].innerHTML === "") {
11 | linkList[j].setAttribute('aria-hidden', 'true');
12 | }
13 | }
14 | }
15 | });
16 |
--------------------------------------------------------------------------------
/docs/articles/UsingFeatureExtraction_files/header-attrs-2.7/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/docs/articles/UsingFeatureExtraction_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
1 | /*!
2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
3 | * Copyright 2015 Aidan Feldman
4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
5 |
6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
7 |
8 | /* All levels of nav */
9 | nav[data-toggle='toc'] .nav > li > a {
10 | display: block;
11 | padding: 4px 20px;
12 | font-size: 13px;
13 | font-weight: 500;
14 | color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 | padding-left: 19px;
19 | color: #563d7c;
20 | text-decoration: none;
21 | background-color: transparent;
22 | border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 | padding-left: 18px;
28 | font-weight: bold;
29 | color: #563d7c;
30 | background-color: transparent;
31 | border-left: 2px solid #563d7c;
32 | }
33 |
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 | display: none; /* Hide by default, but at >768px, show it */
37 | padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 | padding-top: 1px;
41 | padding-bottom: 1px;
42 | padding-left: 30px;
43 | font-size: 12px;
44 | font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 | padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 | padding-left: 28px;
54 | font-weight: 500;
55 | }
56 |
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 | display: block;
60 | }
61 |
--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
1 | $(function() {
2 |
3 | // register a handler to move the focus to the search bar
4 | // upon pressing shift + "/" (i.e. "?")
5 | $(document).on('keydown', function(e) {
6 | if (e.shiftKey && e.keyCode == 191) {
7 | e.preventDefault();
8 | $("#search-input").focus();
9 | }
10 | });
11 |
12 | $(document).ready(function() {
13 | // do keyword highlighting
14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 | var mark = function() {
16 |
17 | var referrer = document.URL ;
18 | var paramKey = "q" ;
19 |
20 | if (referrer.indexOf("?") !== -1) {
21 | var qs = referrer.substr(referrer.indexOf('?') + 1);
22 | var qs_noanchor = qs.split('#')[0];
23 | var qsa = qs_noanchor.split('&');
24 | var keyword = "";
25 |
26 | for (var i = 0; i < qsa.length; i++) {
27 | var currentParam = qsa[i].split('=');
28 |
29 | if (currentParam.length !== 2) {
30 | continue;
31 | }
32 |
33 | if (currentParam[0] == paramKey) {
34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 | }
36 | }
37 |
38 | if (keyword !== "") {
39 | $(".contents").unmark({
40 | done: function() {
41 | $(".contents").mark(keyword);
42 | }
43 | });
44 | }
45 | }
46 | };
47 |
48 | mark();
49 | });
50 | });
51 |
52 | /* Search term highlighting ------------------------------*/
53 |
54 | function matchedWords(hit) {
55 | var words = [];
56 |
57 | var hierarchy = hit._highlightResult.hierarchy;
58 | // loop to fetch from lvl0, lvl1, etc.
59 | for (var idx in hierarchy) {
60 | words = words.concat(hierarchy[idx].matchedWords);
61 | }
62 |
63 | var content = hit._highlightResult.content;
64 | if (content) {
65 | words = words.concat(content.matchedWords);
66 | }
67 |
68 | // return unique words
69 | var words_uniq = [...new Set(words)];
70 | return words_uniq;
71 | }
72 |
73 | function updateHitURL(hit) {
74 |
75 | var words = matchedWords(hit);
76 | var url = "";
77 |
78 | if (hit.anchor) {
79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 | } else {
81 | url = hit.url + '?q=' + escape(words.join(" "));
82 | }
83 |
84 | return url;
85 | }
86 |
--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
13 |
--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
2 | (function($) {
3 | $(function() {
4 |
5 | $('.navbar-fixed-top').headroom();
6 |
7 | $('body').css('padding-top', $('.navbar').height() + 10);
8 | $(window).resize(function(){
9 | $('body').css('padding-top', $('.navbar').height() + 10);
10 | });
11 |
12 | $('[data-toggle="tooltip"]').tooltip();
13 |
14 | var cur_path = paths(location.pathname);
15 | var links = $("#navbar ul li a");
16 | var max_length = -1;
17 | var pos = -1;
18 | for (var i = 0; i < links.length; i++) {
19 | if (links[i].getAttribute("href") === "#")
20 | continue;
21 | // Ignore external links
22 | if (links[i].host !== location.host)
23 | continue;
24 |
25 | var nav_path = paths(links[i].pathname);
26 |
27 | var length = prefix_length(nav_path, cur_path);
28 | if (length > max_length) {
29 | max_length = length;
30 | pos = i;
31 | }
32 | }
33 |
34 | // Add class to parent , and enclosing if in dropdown
35 | if (pos >= 0) {
36 | var menu_anchor = $(links[pos]);
37 | menu_anchor.parent().addClass("active");
38 | menu_anchor.closest("li.dropdown").addClass("active");
39 | }
40 | });
41 |
42 | function paths(pathname) {
43 | var pieces = pathname.split("/");
44 | pieces.shift(); // always starts with /
45 |
46 | var end = pieces[pieces.length - 1];
47 | if (end === "index.html" || end === "")
48 | pieces.pop();
49 | return(pieces);
50 | }
51 |
52 | // Returns -1 if not found
53 | function prefix_length(needle, haystack) {
54 | if (needle.length > haystack.length)
55 | return(-1);
56 |
57 | // Special case for length-0 haystack, since for loop won't run
58 | if (haystack.length === 0) {
59 | return(needle.length === 0 ? 0 : -1);
60 | }
61 |
62 | for (var i = 0; i < haystack.length; i++) {
63 | if (needle[i] != haystack[i])
64 | return(i);
65 | }
66 |
67 | return(haystack.length);
68 | }
69 |
70 | /* Clipboard --------------------------*/
71 |
72 | function changeTooltipMessage(element, msg) {
73 | var tooltipOriginalTitle=element.getAttribute('data-original-title');
74 | element.setAttribute('data-original-title', msg);
75 | $(element).tooltip('show');
76 | element.setAttribute('data-original-title', tooltipOriginalTitle);
77 | }
78 |
79 | if(ClipboardJS.isSupported()) {
80 | $(document).ready(function() {
81 | var copyButton = "";
82 |
83 | $("div.sourceCode").addClass("hasCopyButton");
84 |
85 | // Insert copy buttons:
86 | $(copyButton).prependTo(".hasCopyButton");
87 |
88 | // Initialize tooltips:
89 | $('.btn-copy-ex').tooltip({container: 'body'});
90 |
91 | // Initialize clipboard:
92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
93 | text: function(trigger) {
94 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
95 | }
96 | });
97 |
98 | clipboardBtnCopies.on('success', function(e) {
99 | changeTooltipMessage(e.trigger, 'Copied!');
100 | e.clearSelection();
101 | });
102 |
103 | clipboardBtnCopies.on('error', function() {
104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
105 | });
106 | });
107 | }
108 | })(window.jQuery || window.$)
109 |
--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: '3.2'
2 | pkgdown: 2.1.0
3 | pkgdown_sha: ~
4 | articles:
5 | CreatingCovariatesBasedOnOtherCohorts: CreatingCovariatesBasedOnOtherCohorts.html
6 | CreatingCovariatesUsingCohortAttributes: CreatingCovariatesUsingCohortAttributes.html
7 | CreatingCustomCovariateBuilders: CreatingCustomCovariateBuilders.html
8 | CreatingCustomCovariateBuildersKorean: CreatingCustomCovariateBuildersKorean.html
9 | UsingFeatureExtraction: UsingFeatureExtraction.html
10 | UsingFeatureExtractionKorean: UsingFeatureExtractionKorean.html
11 | last_built: 2025-05-08T13:26Z
12 |
--------------------------------------------------------------------------------
/docs/reference/Rplot001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/docs/reference/Rplot001.png
--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
1 |
2 | /404.html
3 | /articles/CreatingCovariatesBasedOnOtherCohorts.html
4 | /articles/CreatingCovariatesUsingCohortAttributes.html
5 | /articles/CreatingCustomCovariateBuilders.html
6 | /articles/CreatingCustomCovariateBuildersKorean.html
7 | /articles/UsingFeatureExtraction.html
8 | /articles/UsingFeatureExtractionKorean.html
9 | /articles/index.html
10 | /authors.html
11 | /index.html
12 | /news/index.html
13 | /pull_request_template.html
14 | /reference/CovariateData-class.html
15 | /reference/FeatureExtraction-package.html
16 | /reference/aggregateCovariates.html
17 | /reference/byMaxFf.html
18 | /reference/bySumFf.html
19 | /reference/computeStandardizedDifference.html
20 | /reference/convertPrespecSettingsToDetailedSettings.html
21 | /reference/createAnalysisDetails.html
22 | /reference/createCohortAttrCovariateSettings.html
23 | /reference/createCohortBasedCovariateSettings.html
24 | /reference/createCohortBasedTemporalCovariateSettings.html
25 | /reference/createCovariateSettings.html
26 | /reference/createDefaultCovariateSettings.html
27 | /reference/createDefaultTemporalCovariateSettings.html
28 | /reference/createDetailedCovariateSettings.html
29 | /reference/createDetailedTemporalCovariateSettings.html
30 | /reference/createEmptyCovariateData.html
31 | /reference/createHdpsCovariateSettings.html
32 | /reference/createTable1.html
33 | /reference/createTable1CovariateSettings.html
34 | /reference/createTemporalCovariateSettings.html
35 | /reference/createTemporalSequenceCovariateSettings.html
36 | /reference/dot-createLooCovariateSettings.html
37 | /reference/dot-getDbLooCovariateData.html
38 | /reference/filterByCohortDefinitionId.html
39 | /reference/filterByRowId.html
40 | /reference/filterCovariateDataCovariates.html
41 | /reference/getDbCohortAttrCovariatesData.html
42 | /reference/getDbCohortBasedCovariatesData.html
43 | /reference/getDbCovariateData.html
44 | /reference/getDbDefaultCovariateData.html
45 | /reference/getDbHdpsCovariateData.html
46 | /reference/getDefaultTable1Specifications.html
47 | /reference/index.html
48 | /reference/isAggregatedCovariateData.html
49 | /reference/isCovariateData.html
50 | /reference/isTemporalCovariateData.html
51 | /reference/loadCovariateData.html
52 | /reference/saveCovariateData.html
53 | /reference/tidyCovariateData.html
54 |
55 |
56 |
--------------------------------------------------------------------------------
/extras/DefaultCovariateSettingsTemplate.R:
--------------------------------------------------------------------------------
1 | # Copyright 2025 Observational Health Data Sciences and Informatics
2 | #
3 | # This file is part of FeatureExtraction
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | %warning%
18 |
19 | #' Create covariate settings
20 | #'
21 | #' @details
22 | #' creates an object specifying how covariates should be constructed from data in the CDM model.
23 | #'
24 | %roxygen%
25 | #'
26 | #' @return
27 | #' An object of type \code{covariateSettings}, to be used in other functions.
28 | #'
29 | #' @examples
30 | #' settings <- %functionName%(%roxygenArgs%)
31 | #'
32 | #' @export
33 | %functionName% <- function(%arguments%) {
34 | covariateSettings <- list(temporal = %temporal%, temporalSequence = FALSE)
35 | formalNames <- names(formals(%functionName%))
36 | anyUseTrue <- FALSE
37 | for (name in formalNames) {
38 | value <- get(name)
39 | if (is.null(value)) {
40 | value <- vector()
41 | }
42 | if (grepl("use.*", name)) {
43 | if (value) {
44 | covariateSettings[[sub("use", "", name)]] <- value
45 | anyUseTrue <- TRUE
46 | }
47 | } else {
48 | covariateSettings[[name]] <- value
49 | }
50 | }
51 | if (!anyUseTrue) {
52 | stop("No covariate analysis selected. Must select at least one")
53 | }
54 | attr(covariateSettings, "fun") <- "getDbDefaultCovariateData"
55 | class(covariateSettings) <- "covariateSettings"
56 | return(covariateSettings)
57 | }
58 |
--------------------------------------------------------------------------------
/extras/FeatureExtraction.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/extras/FeatureExtraction.pdf
--------------------------------------------------------------------------------
/extras/TestHashForPostcoordinatedConcepts.R:
--------------------------------------------------------------------------------
1 | # To compute covariate IDs for postcoordinated concepts (concept_id - value_as_concept_id pairs),
2 | # we use a simple hashing function we implement in SQL. The resulting covariate ID uses 52 bits of
3 | # precision, so will fit in an R numeric type without loss of precision.
4 | #
5 | # Below is some code evaluating how likely we are to have collisions in covariate IDs (the same
6 | # covariate ID for different concept_id - value_as_concept_id pairs). Although collisions are
7 | # unlikely, they may occur. In general we are not concerned, as most covariates are used for
8 | # prediction or confounder adjustment, and this may simply lead to one covariate (out of tens
9 | # of thousands) being less predictive.
10 |
11 | # Check in JnJ network ---------------------------------------------------------
12 | uniquePcCombos <- readRDS("extras/uniquePcCombos.rds")
13 | hash1 <- function(value, bits) {
14 | power <- 2^bits
15 | return(bitwAnd(bitwXor(value, value / power), power-1))
16 | }
17 |
18 | hash2 <- function(value, bits) {
19 | # Use Andromeda / SQLite for intermediate steps requiring 64-bit integers:
20 | a <- Andromeda::andromeda(a = data.frame(value = as.integer(value)))
21 | shift <- 2^(32-bits)
22 | mask <- (2^bits) - 1
23 | sql <- sprintf("SELECT CAST((2654435769 * value / %s) & %s AS INT) AS hash FROM a;", shift, mask)
24 | hash <- RSQLite::dbGetQuery(a, sql)
25 | return(hash$hash)
26 | }
27 |
28 |
29 | cid <- paste(hash1(uniquePcCombos$conceptId, 18), hash1(uniquePcCombos$valueAsConceptId, 21), uniquePcCombos$table)
30 | sum(duplicated(cid))
31 | # [1] 750
32 | sum(duplicated(cid)) / nrow(uniquePcCombos)
33 | # [1] 0.004121423
34 |
35 | cid <- paste(hash2(uniquePcCombos$conceptId, 20), hash2(uniquePcCombos$valueAsConceptId, 22), uniquePcCombos$table)
36 | sum(duplicated(cid))
37 | # [1] 27
38 | sum(duplicated(cid)) / nrow(uniquePcCombos)
39 | # [1] 0.0001483712
40 |
41 | cid <- hash2(uniquePcCombos$conceptId, 20) * 4194304000 + hash2(uniquePcCombos$valueAsConceptId, 22) * 1000 + as.integer(uniquePcCombos$table == "measurement")
42 | sum(duplicated(cid))
43 |
44 | # Find a duplicate for testing:
45 | uniquePcCombos$cid <- cid
46 | dups <- cid[duplicated(cid)]
47 | dups <- uniquePcCombos[cid %in% dups, ]
48 | dups <- dups[order(dups$cid), ]
49 | dups[1:2, ]
50 | # # A tibble: 2 x 4
51 | # conceptId valueAsConceptId table cid
52 | #
53 | # 1 3048564 4069590 measurement 7.41e14
54 | # 2 40483078 4069590 measurement 7.41e14
55 |
56 | # Demonstration of hash algorithm 1 in RSQLite ---------------------------------
57 | connection <- DatabaseConnector::connect(dbms = "sqlite", server = ":memory:")
58 |
59 | # For reference:
60 | hash1(380844, 18) * 2^21 + hash1(2821462, 21)
61 | # [1] 248934763863
62 |
63 | # XOR not available in SQLite, but can implement using (a|b)-(a&b)
64 | # 2^18 = 262144
65 | # 2^21 = 2097152
66 | sql <- "
67 | SELECT (((a | a/262144) - (a & a/262144)) & 262143)*2097152 +
68 | (((b | b/2097152) - (b & b/2097152)) & 2097151) AS covariate_id
69 | FROM (
70 | SELECT 380844 AS a,
71 | 2821462 AS b
72 | ) tmp;
73 | "
74 | DatabaseConnector::renderTranslateQuerySql(connection, sql)
75 | # # COVARIATE_ID
76 | # 1 248934763863
77 |
78 | # OR not available in Oracle, but can be implemented using a + b - (a&b)
79 | sql <- "
80 | SELECT (((a + a/262144 - 2*(a & a/262144))) & 262143)*2097152 +
81 | (((b + b/2097152 - 2*(b & b/2097152))) & 2097151) AS covariate_id
82 | FROM (
83 | SELECT 380844 AS a,
84 | 2821462 AS b
85 | ) tmp;
86 | "
87 | DatabaseConnector::renderTranslateQuerySql(connection, sql)
88 | # # COVARIATE_ID
89 | # 1 248934763863
90 |
91 |
92 | DatabaseConnector::disconnect(connection)
93 |
94 | # Demonstration of hash algorithm 2 in RSQLite ---------------------------------
95 | connection <- DatabaseConnector::connect(dbms = "sqlite", server = ":memory:")
96 |
97 | # For reference:
98 | format(hash2(380844, 20) * 2^22 + hash2(2821462, 22), scientific = FALSE)
99 | # [1] 2358966384914
100 |
101 | sql <- "
102 | SELECT ((2654435769 * a / 4096) & 1048575)*4194304 +
103 | ((2654435769 * b / 1024) & 4194303) AS covariate_id
104 | FROM (
105 | SELECT 380844 AS a,
106 | 2821462 AS b
107 | ) tmp;
108 | "
109 | format(DatabaseConnector::renderTranslateQuerySql(connection, sql)[1, 1], scientific = FALSE)
110 | # # COVARIATE_ID
111 | # 1 2358966384914
112 |
113 | DatabaseConnector::disconnect(connection)
114 |
--------------------------------------------------------------------------------
/extras/uniquePcCombos.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/extras/uniquePcCombos.rds
--------------------------------------------------------------------------------
/inst/csv/OtherParameters.csv:
--------------------------------------------------------------------------------
1 | name,type,description,defaultValue
2 | includedCovariateConceptIds,common,A list of concept IDs that should be used to construct covariates.,[]
3 | addDescendantsToInclude,common,Should descendant concept IDs be added to the list of concepts to include?,false
4 | excludedCovariateConceptIds,common,A list of concept IDs that should NOT be used to construct covariates.,[]
5 | addDescendantsToExclude,common,Should descendant concept IDs be added to the list of concepts to exclude?,false
6 | includedCovariateIds,common,A list of covariate IDs that should be restricted to.,[]
7 | longTermStartDays,days,What is the start day (relative to the index date) of the long-term window?,-365
8 | mediumTermStartDays,days,What is the start day (relative to the index date) of the medium-term window?,-180
9 | shortTermStartDays,days,What is the start day (relative to the index date) of the short-term window?,-30
10 | endDays,days,What is the end day (relative to the index date) of the window?,0
11 | sequenceEndDay,temporal_sequence,What is the end day (relative to the index date) of the feature extraction?,0
12 | sequenceStartDay,temporal_sequence,What is the start day (relative to the index date) of the feature extraction?,-99999
13 | timePart,temporal_sequence,What is the interval type (day, month, year) of the timeIds?,'month'
14 | timeInterval,temporal_sequence,What is the interval length (1,7,30,...) of the timeIds?,1
15 | temporalStartDays,temporal,"A list of integers representing the start of a time period, relative to the index date. 0 indicates the index date, -1 indicates the day before the index date, etc. The start day is included in the time period.","[-365,-364,-363,-362,-361,-360,-359,-358,-357,-356,-355,-354,-353,-352,-351,-350,-349,-348,-347,-346,-345,-344,-343,-342,-341,-340,-339,-338,-337,-336,-335,-334,-333,-332,-331,-330,-329,-328,-327,-326,-325,-324,-323,-322,-321,-320,-319,-318,-317,-316,-315,-314,-313,-312,-311,-310,-309,-308,-307,-306,-305,-304,-303,-302,-301,-300,-299,-298,-297,-296,-295,-294,-293,-292,-291,-290,-289,-288,-287,-286,-285,-284,-283,-282,-281,-280,-279,-278,-277,-276,-275,-274,-273,-272,-271,-270,-269,-268,-267,-266,-265,-264,-263,-262,-261,-260,-259,-258,-257,-256,-255,-254,-253,-252,-251,-250,-249,-248,-247,-246,-245,-244,-243,-242,-241,-240,-239,-238,-237,-236,-235,-234,-233,-232,-231,-230,-229,-228,-227,-226,-225,-224,-223,-222,-221,-220,-219,-218,-217,-216,-215,-214,-213,-212,-211,-210,-209,-208,-207,-206,-205,-204,-203,-202,-201,-200,-199,-198,-197,-196,-195,-194,-193,-192,-191,-190,-189,-188,-187,-186,-185,-184,-183,-182,-181,-180,-179,-178,-177,-176,-175,-174,-173,-172,-171,-170,-169,-168,-167,-166,-165,-164,-163,-162,-161,-160,-159,-158,-157,-156,-155,-154,-153,-152,-151,-150,-149,-148,-147,-146,-145,-144,-143,-142,-141,-140,-139,-138,-137,-136,-135,-134,-133,-132,-131,-130,-129,-128,-127,-126,-125,-124,-123,-122,-121,-120,-119,-118,-117,-116,-115,-114,-113,-112,-111,-110,-109,-108,-107,-106,-105,-104,-103,-102,-101,-100,-99,-98,-97,-96,-95,-94,-93,-92,-91,-90,-89,-88,-87,-86,-85,-84,-83,-82,-81,-80,-79,-78,-77,-76,-75,-74,-73,-72,-71,-70,-69,-68,-67,-66,-65,-64,-63,-62,-61,-60,-59,-58,-57,-56,-55,-54,-53,-52,-51,-50,-49,-48,-47,-46,-45,-44,-43,-42,-41,-40,-39,-38,-37,-36,-35,-34,-33,-32,-31,-30,-29,-28,-27,-26,-25,-24,-23,-22,-21,-20,-19,-18,-17,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1]"
16 | temporalEndDays,temporal,"A list of integers representing the end of a time period, relative to the index date. 0 indicates the index date, -1 indicates the day before the index date, etc. The end day is included in the time period.","[-365,-364,-363,-362,-361,-360,-359,-358,-357,-356,-355,-354,-353,-352,-351,-350,-349,-348,-347,-346,-345,-344,-343,-342,-341,-340,-339,-338,-337,-336,-335,-334,-333,-332,-331,-330,-329,-328,-327,-326,-325,-324,-323,-322,-321,-320,-319,-318,-317,-316,-315,-314,-313,-312,-311,-310,-309,-308,-307,-306,-305,-304,-303,-302,-301,-300,-299,-298,-297,-296,-295,-294,-293,-292,-291,-290,-289,-288,-287,-286,-285,-284,-283,-282,-281,-280,-279,-278,-277,-276,-275,-274,-273,-272,-271,-270,-269,-268,-267,-266,-265,-264,-263,-262,-261,-260,-259,-258,-257,-256,-255,-254,-253,-252,-251,-250,-249,-248,-247,-246,-245,-244,-243,-242,-241,-240,-239,-238,-237,-236,-235,-234,-233,-232,-231,-230,-229,-228,-227,-226,-225,-224,-223,-222,-221,-220,-219,-218,-217,-216,-215,-214,-213,-212,-211,-210,-209,-208,-207,-206,-205,-204,-203,-202,-201,-200,-199,-198,-197,-196,-195,-194,-193,-192,-191,-190,-189,-188,-187,-186,-185,-184,-183,-182,-181,-180,-179,-178,-177,-176,-175,-174,-173,-172,-171,-170,-169,-168,-167,-166,-165,-164,-163,-162,-161,-160,-159,-158,-157,-156,-155,-154,-153,-152,-151,-150,-149,-148,-147,-146,-145,-144,-143,-142,-141,-140,-139,-138,-137,-136,-135,-134,-133,-132,-131,-130,-129,-128,-127,-126,-125,-124,-123,-122,-121,-120,-119,-118,-117,-116,-115,-114,-113,-112,-111,-110,-109,-108,-107,-106,-105,-104,-103,-102,-101,-100,-99,-98,-97,-96,-95,-94,-93,-92,-91,-90,-89,-88,-87,-86,-85,-84,-83,-82,-81,-80,-79,-78,-77,-76,-75,-74,-73,-72,-71,-70,-69,-68,-67,-66,-65,-64,-63,-62,-61,-60,-59,-58,-57,-56,-55,-54,-53,-52,-51,-50,-49,-48,-47,-46,-45,-44,-43,-42,-41,-40,-39,-38,-37,-36,-35,-34,-33,-32,-31,-30,-29,-28,-27,-26,-25,-24,-23,-22,-21,-20,-19,-18,-17,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1]"
17 |
--------------------------------------------------------------------------------
/inst/csv/OtherSqlToLoad.csv:
--------------------------------------------------------------------------------
1 | analysisName,sqlFileName
2 | cohort,CohortBasedBinaryCovariates.sql
3 | cohortCount,CohortBasedCountCovariates.sql
4 |
--------------------------------------------------------------------------------
/inst/csv/PrespecTemporalAnnualAnalysis.csv:
--------------------------------------------------------------------------------
1 | analysisId,analysisName
2 | 4,ConditionEraShortTerm
3 | 9,ConditionEraStartLongTerm
4 | 10,ConditionEraAnyTimePrior
5 | 12,DrugExposureLongTerm
6 | 37,DrugEraStartShortTerm
7 | 39,MeasurementAnyTimePrior
8 | 40,MeasurementMediumTerm
9 | 43,DrugEraShortTerm
10 | 44,DrugEraOverlapping
11 | 52,DeviceExposureAnyTimePrior
12 | 53,ObservationLongTerm
13 | 55,ProcedureOccurrenceShortTerm
14 | 56,ObservationMediumTerm
15 | 57,DeviceExposureLongTerm
16 | 59,DeviceExposureMediumTerm
17 | 60,MeasurementLongTerm
18 | 62,DrugEraStartMediumTerm
19 | 65,DeviceExposureShortTerm
20 | 67,ConditionOccurrenceLongTerm
21 | 69,ConditionOccurrenceAnyTimePrior
22 | 76,ConditionOccurrenceMediumTerm
23 | 83,ConditionOccurrencePrimaryInpatientLongTerm
24 | 84,ProcedureOccurrenceLongTerm
25 | 85,ConditionOccurrencePrimaryInpatientAnyTimePrior
26 | 86,DrugEraLongTerm
27 | 87,ProcedureOccurrenceAnyTimePrior
28 | 88,DrugEraMediumTerm
29 | 89,DrugEraAnyTimePrior
30 | 90,ConditionOccurrenceShortTerm
31 | 102,ConditionOccurrencePrimaryInpatientShortTerm
32 | 104,ConditionOccurrencePrimaryInpatientMediumTerm
33 | 16,ConditionEraMediumTerm
34 | 17,ConditionEraOverlapping
35 | 18,ConditionEraStartShortTerm
36 | 21,ConditionEraStartMediumTerm
37 | 22,ProcedureOccurrenceMediumTerm
38 | 23,ConditionEraLongTerm
39 | 28,DrugExposureAnyTimePrior
40 | 31,DrugExposureShortTerm
41 | 33,DrugExposureMediumTerm
42 | 34,ObservationShortTerm
43 | 35,DrugEraStartLongTerm
44 |
--------------------------------------------------------------------------------
/inst/csv/PrespecTemporalSequenceAnalyses.csv:
--------------------------------------------------------------------------------
1 | analysisId,analysisName,sqlFileName,subType,domainId,domainTable,domainConceptId,domainStartDate,domainEndDate,isDefault,description
2 | 1,DemographicsGender,DemographicsGender.sql,,Demographics,,,,,TRUE,Gender of the subject.
3 | 2,DemographicsAge,DemographicsAge.sql,,Demographics,,,,,FALSE,Age of the subject on the index date (in years).
4 | 3,DemographicsAgeGroup,DemographicsAgeGroup.sql,,Demographics,,,,,TRUE,Age of the subject on the index date (in 5 year age groups)
5 | 4,DemographicsRace,DemographicsRace.sql,,Demographics,,,,,TRUE,Race of the subject.
6 | 5,DemographicsEthnicity,DemographicsEthnicity.sql,,Demographics,,,,,TRUE,Ethnicity of the subject.
7 | 6,DemographicsIndexYear,DemographicsYear.sql,,Demographics,,,,,FALSE,Year of the index date.
8 | 7,DemographicsIndexMonth,DemographicsMonth.sql,,Demographics,,,,,FALSE,Month of the index date.
9 | 101,ConditionOccurrence,DomainConcept.sql,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting in the time window.
10 | 102,ConditionOccurrencePrimaryInpatient,DomainConcept.sql,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting in the time window.
11 | 201,ConditionEraStart,DomainConcept.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition in the condition_era table starting in the time window.
12 | 203,ConditionEraGroupStart,DomainConceptGroup.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,TRUE,One covariate per condition era rolled up to SNOMED groups in the condition_era table starting in the time window.
13 | 301,DrugExposure,DomainConcept.sql,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting in the time window.
14 | 401,DrugEraStart,DomainConcept.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug in the drug_era table starting in the time window.
15 | 403,DrugEraGroupStart,DomainConceptGroup.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,TRUE,One covariate per drug rolled up to ATC groups in the drug_era table starting in the time window.
16 | 501,ProcedureOccurrence,DomainConcept.sql,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,One covariate per procedure in the procedure_occurrence table in the time window.
17 | 601,DeviceExposure,DomainConcept.sql,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,FALSE,One covariate per device in the device exposure table starting in the timewindow.
18 | 701,Measurement,DomainConcept.sql,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,One covariate per measurement in the measurement table in the time window.
19 | 702,MeasurementValue,MeasurementValue.sql,,Measurement,,,,,FALSE,"One covariate containing the value per measurement-unit combination in the time window. If multiple values are found, the last is taken."
20 | 801,Observation,DomainConcept.sql,all,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,One covariate per observation in the observation table in the time window.
21 |
--------------------------------------------------------------------------------
/inst/csv/Table1Specs.csv:
--------------------------------------------------------------------------------
1 | label,analysisId,covariateIds
2 | Age group,3,
3 | Gender: female,1,8532001
4 | Race,4,
5 | Ethnicity,5,
6 | Medical history: General,210,"4006969210,438409210,4212540210,255573210,201606210,4182210210,440383210,201820210,318800210,192671210,439727210,432867210,316866210,4104000210,433736210,80180210,255848210,140168210,4030518210,80809210,435783210,4279309210,81893210,81902210,197494210,4134440210"
7 | Medical history: Cardiovascular disease,210,"313217210,381591210,317576210,321588210,316139210,4185932210,321052210,440417210,444247210"
8 | Medical history: Neoplasms,210,"4044013210,432571210,40481902210,443392210,4112853210,4180790210,443388210,197508210,200962210"
9 | Medication use,410,"21601782410,21602796410,21604686410,21604389410,21603932410,21601387410,21602028410,21600960410,21601664410,21601744410,21601461410,21600046410,21603248410,21600712410,21603890410,21601853410,21604254410,21604489410,21604752410"
10 | Charlson comorbidity index,901,1901
11 | CHADS2Vasc,904,1904
12 | DCSI,902,1902
13 |
--------------------------------------------------------------------------------
/inst/csv/jarChecksum.txt:
--------------------------------------------------------------------------------
1 | 7bf91a9f369dbfd67bc2734313402fca30962298d27c5154ec03354b5dede3d4
2 |
--------------------------------------------------------------------------------
/inst/doc/CreatingCovariatesBasedOnOtherCohorts.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/CreatingCovariatesBasedOnOtherCohorts.pdf
--------------------------------------------------------------------------------
/inst/doc/CreatingCovariatesUsingCohortAttributes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/CreatingCovariatesUsingCohortAttributes.pdf
--------------------------------------------------------------------------------
/inst/doc/CreatingCustomCovariateBuilders.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/CreatingCustomCovariateBuilders.pdf
--------------------------------------------------------------------------------
/inst/doc/CreatingCustomCovariateBuildersKorean.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/CreatingCustomCovariateBuildersKorean.pdf
--------------------------------------------------------------------------------
/inst/doc/UsingFeatureExtraction.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/UsingFeatureExtraction.pdf
--------------------------------------------------------------------------------
/inst/doc/UsingFeatureExtractionKorean.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/UsingFeatureExtractionKorean.pdf
--------------------------------------------------------------------------------
/inst/java/SqlRender-1.19.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/java/SqlRender-1.19.1.jar
--------------------------------------------------------------------------------
/inst/java/featureExtraction-3.10.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/java/featureExtraction-3.10.0.jar
--------------------------------------------------------------------------------
/inst/java/json-20231013.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/java/json-20231013.jar
--------------------------------------------------------------------------------
/inst/sql/sql_server/CareSite.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT
3 | CAST(care_site_id AS BIGINT) * 1000 + @analysis_id AS covariate_id,
4 | {@temporal} ? {
5 | CAST(NULL AS INT) AS time_id,
6 | }
7 | {@temporal_sequence} ? {
8 | CAST(NULL AS INT) AS time_id,
9 | }
10 | {@aggregated} ? {
11 | cohort_definition_id,
12 | COUNT(*) AS sum_value
13 | } : {
14 | row_id,
15 | 1 AS covariate_value
16 | }
17 | INTO @covariate_table
18 | FROM (
19 | SELECT cohort.cohort_definition_id,
20 | cohort.@row_id_field AS row_id,
21 | CASE
22 | WHEN visit_detail.care_site_id IS NOT NULL THEN visit_detail.care_site_id
23 | WHEN visit_occurrence.care_site_id IS NOT NULL THEN visit_occurrence.care_site_id
24 | ELSE person.care_site_id
25 | END AS care_site_id,
26 | ROW_NUMBER() OVER (PARTITION BY cohort_definition_id, cohort.@row_id_field ORDER BY visit_detail.visit_detail_end_date, visit_occurrence.visit_end_date) AS rn
27 | FROM @cohort_table cohort
28 | INNER JOIN @cdm_database_schema.person
29 | ON cohort.subject_id = person.person_id
30 | LEFT JOIN @cdm_database_schema.visit_occurrence
31 | ON cohort.subject_id = visit_occurrence.person_id
32 | AND visit_occurrence.visit_start_date <= cohort.cohort_start_date
33 | AND visit_occurrence.visit_end_date >= cohort.cohort_start_date
34 | LEFT JOIN @cdm_database_schema.visit_detail
35 | ON cohort.subject_id = visit_detail.person_id
36 | AND visit_detail.visit_detail_start_date <= cohort.cohort_start_date
37 | AND visit_detail.visit_detail_end_date >= cohort.cohort_start_date
38 | WHERE NOT (person.care_site_id IS NULL
39 | AND visit_occurrence.care_site_id IS NULL
40 | AND visit_detail.care_site_id IS NULL
41 | )
42 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)}
43 | ) care_site
44 | WHERE rn = 1
45 | {@included_cov_table != ''} ? { AND CAST(care_site_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)}
46 |
47 | {@aggregated} ? {
48 | GROUP BY cohort_definition_id,
49 | care_site_id
50 | }
51 | ;
52 |
53 | -- Reference construction
54 | INSERT INTO #cov_ref (
55 | covariate_id,
56 | covariate_name,
57 | analysis_id,
58 | concept_id
59 | )
60 | SELECT covariate_id,
61 | CAST(CONCAT('care site ID = ', CAST((covariate_id - @analysis_id) / 1000 AS INT)) AS VARCHAR(512)) AS covariate_name,
62 | @analysis_id AS analysis_id,
63 | 0 AS concept_id
64 | FROM (
65 | SELECT DISTINCT covariate_id
66 | FROM @covariate_table
67 | ) t1;
68 |
69 | INSERT INTO #analysis_ref (
70 | analysis_id,
71 | analysis_name,
72 | domain_id,
73 | {!@temporal} ? {
74 | start_day,
75 | end_day,
76 | }
77 | is_binary,
78 | missing_means_zero
79 | )
80 | SELECT @analysis_id AS analysis_id,
81 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
82 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id,
83 | {!@temporal} ? {
84 | CAST(NULL AS INT) AS start_day,
85 | CAST(NULL AS INT) AS end_day,
86 | }
87 | CAST('Y' AS VARCHAR(1)) AS is_binary,
88 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
89 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/CohortBasedBinaryCovariates.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT
3 | CAST(covariate_cohort_id AS BIGINT) * 1000 + @analysis_id AS covariate_id,
4 | {@temporal | @temporal_sequence} ? {
5 | time_id,
6 | }
7 | {@aggregated} ? {
8 | cohort_definition_id,
9 | COUNT(*) AS sum_value
10 | } : {
11 | row_id,
12 | 1 AS covariate_value
13 | }
14 | INTO @covariate_table
15 | FROM (
16 | SELECT DISTINCT covariate_cohort.cohort_definition_id AS covariate_cohort_id,
17 | {@temporal} ? {
18 | time_id,
19 | }
20 | {@temporal_sequence} ? {
21 | FLOOR(DATEDIFF(@time_part, covariate_cohort.cohort_start_date, cohort.cohort_start_date)*1.0/@time_interval ) as time_id,
22 | }
23 | {@aggregated} ? {
24 | cohort.cohort_definition_id,
25 | cohort.subject_id,
26 | cohort.cohort_start_date
27 | } : {
28 | cohort.@row_id_field AS row_id
29 | }
30 | FROM @cohort_table cohort
31 | INNER JOIN @covariate_cohort_table covariate_cohort
32 | ON cohort.subject_id = covariate_cohort.subject_id
33 | INNER JOIN #covariate_cohort_ref covariate_cohort_ref
34 | ON covariate_cohort.cohort_definition_id = CAST(covariate_cohort_ref.cohort_id AS INT)
35 | {@temporal} ? {
36 | INNER JOIN #time_period time_period
37 | ON covariate_cohort.cohort_start_date <= DATEADD(DAY, time_period.end_day, cohort.cohort_start_date)
38 | WHERE CASE WHEN covariate_cohort.cohort_end_date IS NULL THEN covariate_cohort.cohort_start_date ELSE covariate_cohort.cohort_end_date END >= DATEADD(DAY, time_period.start_day, cohort.cohort_start_date)
39 | } : {
40 | WHERE covariate_cohort.cohort_start_date <= DATEADD(DAY, {@temporal_sequence} ? {@sequence_end_day} : {@end_day}, cohort.cohort_start_date)
41 | {@start_day != 'anyTimePrior'} ? {
42 | AND CASE WHEN covariate_cohort.cohort_end_date IS NULL THEN covariate_cohort.cohort_start_date ELSE covariate_cohort.cohort_end_date END >= DATEADD(DAY, {@temporal_sequence} ? {@sequence_start_day} : {@start_day}, cohort.cohort_start_date)
43 | }
44 | }
45 | {@included_cov_table != ''} ? { AND CAST(covariate_cohort.cohort_definition_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)}
46 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)}
47 | ) by_row_id
48 | {@aggregated} ? {
49 | GROUP BY cohort_definition_id,
50 | covariate_cohort_id
51 | {@temporal | @temporal_sequence} ? {
52 | ,time_id
53 | }
54 | }
55 | ;
56 |
57 | -- Reference construction
58 | INSERT INTO #cov_ref (
59 | covariate_id,
60 | covariate_name,
61 | analysis_id,
62 | concept_id
63 | )
64 | SELECT covariate_id,
65 | {@temporal | @temporal_sequence} ? {
66 | CAST(CONCAT('cohort: ', cohort_name) AS VARCHAR(512)) AS covariate_name,
67 | } : {
68 | {@start_day == 'anyTimePrior'} ? {
69 | CAST(CONCAT('cohort any time prior through @end_day days relative to index: ', cohort_name) AS VARCHAR(512)) AS covariate_name,
70 | } : {
71 | CAST(CONCAT('cohort during day @start_day through @end_day days relative to index: ', cohort_name) AS VARCHAR(512)) AS covariate_name,
72 | }
73 | }
74 | @analysis_id AS analysis_id,
75 | 0 AS concept_id
76 | FROM (
77 | SELECT DISTINCT covariate_id
78 | FROM @covariate_table
79 | ) t1
80 | LEFT JOIN #covariate_cohort_ref
81 | ON CAST(cohort_id AS INT) = CAST((covariate_id - @analysis_id) / 1000 AS INT);
82 |
83 | INSERT INTO #analysis_ref (
84 | analysis_id,
85 | analysis_name,
86 | domain_id,
87 | {!@temporal} ? {
88 | start_day,
89 | end_day,
90 | }
91 | is_binary,
92 | missing_means_zero
93 | )
94 | SELECT @analysis_id AS analysis_id,
95 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
96 | CAST('cohort' AS VARCHAR(20)) AS domain_id,
97 | {!@temporal} ? {
98 | {@start_day == 'anyTimePrior'} ? {
99 | CAST(NULL AS INT) AS start_day,
100 | } : {
101 |
102 | {@temporal_sequence} ? {@sequence_start_day} : {@start_day} AS start_day,
103 | }
104 | {@temporal_sequence} ? {@sequence_end_day} : {@end_day} AS end_day,
105 | }
106 | CAST('Y' AS VARCHAR(1)) AS is_binary,
107 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
108 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/CreateCovAnalysisRefTables.sql:
--------------------------------------------------------------------------------
1 | IF OBJECT_ID('tempdb..#cov_ref', 'U') IS NOT NULL
2 | DROP TABLE #cov_ref;
3 |
4 | IF OBJECT_ID('tempdb..#analysis_ref', 'U') IS NOT NULL
5 | DROP TABLE #analysis_ref;
6 |
7 | CREATE TABLE #cov_ref (
8 | covariate_id BIGINT,
9 | covariate_name VARCHAR(512),
10 | analysis_id INT,
11 | concept_id INT,
12 | value_as_concept_id INT,
13 | collisions INT
14 | );
15 |
16 | CREATE TABLE #analysis_ref (
17 | analysis_id BIGINT,
18 | analysis_name VARCHAR(512),
19 | domain_id VARCHAR(20),
20 | {!@temporal} ? {
21 | start_day INT,
22 | end_day INT,
23 | }
24 | is_binary VARCHAR(1),
25 | missing_means_zero VARCHAR(1)
26 | );
27 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/DemographicsAgeGroup.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT CAST(FLOOR((YEAR(cohort_start_date) - year_of_birth) / 5) * 1000 + @analysis_id AS BIGINT) AS covariate_id,
3 | {@temporal | @temporal_sequence} ? {
4 | CAST(NULL AS INT) AS time_id,
5 | }
6 | {@aggregated} ? {
7 | cohort_definition_id,
8 | COUNT(*) AS sum_value
9 | } : {
10 | cohort.@row_id_field AS row_id,
11 | 1 AS covariate_value
12 | }
13 | INTO @covariate_table
14 | FROM @cohort_table cohort
15 | INNER JOIN @cdm_database_schema.person
16 | ON cohort.subject_id = person.person_id
17 | {@included_cov_table != ''} ? {WHERE FLOOR((YEAR(cohort_start_date) - year_of_birth) / 5) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)}
18 | {@cohort_definition_id != -1} ? {
19 | {@included_cov_table != ''} ? { AND} :{WHERE} cohort.cohort_definition_id IN (@cohort_definition_id)
20 | }
21 | {@aggregated} ? {
22 | GROUP BY cohort_definition_id,
23 | FLOOR((YEAR(cohort_start_date) - year_of_birth) / 5)
24 | }
25 | ;
26 |
27 | -- Reference construction
28 | INSERT INTO #cov_ref (
29 | covariate_id,
30 | covariate_name,
31 | analysis_id,
32 | concept_id
33 | )
34 | SELECT covariate_id,
35 | CAST(CONCAT (
36 | 'age group: ',
37 | RIGHT(CONCAT(' ', CAST(CAST(5 * (covariate_id - @analysis_id) / 1000 AS INTEGER) AS VARCHAR)), 3),
38 | ' - ',
39 | RIGHT(CONCAT(' ', CAST((CAST(5 * (covariate_id - @analysis_id) / 1000 AS INTEGER)) + 4 AS VARCHAR)), 3)
40 | ) AS VARCHAR(512)) AS covariate_name,
41 | @analysis_id AS analysis_id,
42 | 0 AS concept_id
43 | FROM (
44 | SELECT DISTINCT covariate_id
45 | FROM @covariate_table
46 | ) t1;
47 |
48 | INSERT INTO #analysis_ref (
49 | analysis_id,
50 | analysis_name,
51 | domain_id,
52 | {!@temporal} ? {
53 | start_day,
54 | end_day,
55 | }
56 | is_binary,
57 | missing_means_zero
58 | )
59 | SELECT @analysis_id AS analysis_id,
60 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
61 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id,
62 | {!@temporal} ? {
63 | CAST(NULL AS INT) AS start_day,
64 | CAST(NULL AS INT) AS end_day,
65 | }
66 | CAST('Y' AS VARCHAR(1)) AS is_binary,
67 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
68 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/DemographicsEthnicity.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT
3 | CAST(ethnicity_concept_id AS BIGINT) * 1000 + @analysis_id AS covariate_id,
4 | {@temporal | @temporal_sequence} ? {
5 | CAST(NULL AS INT) AS time_id,
6 | }
7 | {@aggregated} ? {
8 | cohort_definition_id,
9 | COUNT(*) AS sum_value
10 | } : {
11 | cohort.@row_id_field AS row_id,
12 | 1 AS covariate_value
13 | }
14 | INTO @covariate_table
15 | FROM @cohort_table cohort
16 | INNER JOIN @cdm_database_schema.person
17 | ON cohort.subject_id = person.person_id
18 | WHERE ethnicity_concept_id IN (
19 | SELECT concept_id
20 | FROM @cdm_database_schema.concept
21 | WHERE LOWER(concept_class_id) = 'ethnicity'
22 | )
23 | {@excluded_concept_table != ''} ? { AND ethnicity_concept_id NOT IN (SELECT id FROM @excluded_concept_table)}
24 | {@included_concept_table != ''} ? { AND ethnicity_concept_id IN (SELECT id FROM @included_concept_table)}
25 | {@included_cov_table != ''} ? { AND CAST(ethnicity_concept_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)}
26 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)}
27 | {@aggregated} ? {
28 | GROUP BY cohort_definition_id,
29 | ethnicity_concept_id
30 | }
31 | ;
32 |
33 | -- Reference construction
34 | INSERT INTO #cov_ref (
35 | covariate_id,
36 | covariate_name,
37 | analysis_id,
38 | concept_id
39 | )
40 | SELECT covariate_id,
41 | CAST(CONCAT('ethnicity = ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name,
42 | @analysis_id AS analysis_id,
43 | CAST((covariate_id - @analysis_id) / 1000 AS INT) AS concept_id
44 | FROM (
45 | SELECT DISTINCT covariate_id
46 | FROM @covariate_table
47 | ) t1
48 | LEFT JOIN @cdm_database_schema.concept
49 | ON concept_id = CAST((covariate_id - @analysis_id) / 1000 AS INT);
50 |
51 | INSERT INTO #analysis_ref (
52 | analysis_id,
53 | analysis_name,
54 | domain_id,
55 | {!@temporal} ? {
56 | start_day,
57 | end_day,
58 | }
59 | is_binary,
60 | missing_means_zero
61 | )
62 | SELECT @analysis_id AS analysis_id,
63 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
64 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id,
65 | {!@temporal} ? {
66 | CAST(NULL AS INT) AS start_day,
67 | CAST(NULL AS INT) AS end_day,
68 | }
69 | CAST('Y' AS VARCHAR(1)) AS is_binary,
70 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
71 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/DemographicsGender.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT
3 | CAST(gender_concept_id AS BIGINT) * 1000 + @analysis_id AS covariate_id,
4 | {@temporal} ? {
5 | CAST(NULL AS INT) AS time_id,
6 | }
7 | {@temporal_sequence} ? {
8 | CAST(NULL AS INT) AS time_id,
9 | }
10 | {@aggregated} ? {
11 | cohort_definition_id,
12 | COUNT(*) AS sum_value
13 | } : {
14 | cohort.@row_id_field AS row_id,
15 | 1 AS covariate_value
16 | }
17 | INTO @covariate_table
18 | FROM @cohort_table cohort
19 | INNER JOIN @cdm_database_schema.person
20 | ON cohort.subject_id = person.person_id
21 | WHERE gender_concept_id != 0
22 | {@excluded_concept_table != ''} ? { AND gender_concept_id NOT IN (SELECT id FROM @excluded_concept_table)}
23 | {@included_concept_table != ''} ? { AND gender_concept_id IN (SELECT id FROM @included_concept_table)}
24 | {@included_cov_table != ''} ? { AND CAST(gender_concept_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)}
25 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)}
26 | {@aggregated} ? {
27 | GROUP BY cohort_definition_id,
28 | gender_concept_id
29 | }
30 | ;
31 |
32 | -- Reference construction
33 | INSERT INTO #cov_ref (
34 | covariate_id,
35 | covariate_name,
36 | analysis_id,
37 | concept_id
38 | )
39 | SELECT covariate_id,
40 | CAST(CONCAT('gender = ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name,
41 | @analysis_id AS analysis_id,
42 | CAST((covariate_id - @analysis_id) / 1000 AS INT) AS concept_id
43 | FROM (
44 | SELECT DISTINCT covariate_id
45 | FROM @covariate_table
46 | ) t1
47 | LEFT JOIN @cdm_database_schema.concept
48 | ON concept_id = CAST((covariate_id - @analysis_id) / 1000 AS INT);
49 |
50 | INSERT INTO #analysis_ref (
51 | analysis_id,
52 | analysis_name,
53 | domain_id,
54 | {!@temporal} ? {
55 | start_day,
56 | end_day,
57 | }
58 | is_binary,
59 | missing_means_zero
60 | )
61 | SELECT @analysis_id AS analysis_id,
62 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
63 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id,
64 | {!@temporal} ? {
65 | CAST(NULL AS INT) AS start_day,
66 | CAST(NULL AS INT) AS end_day,
67 | }
68 | CAST('Y' AS VARCHAR(1)) AS is_binary,
69 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
70 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/DemographicsMonth.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT CAST(MONTH(cohort_start_date) * 1000 + @analysis_id AS BIGINT) AS covariate_id,
3 | {@temporal | @temporal_sequence} ? {
4 | CAST(NULL AS INT) AS time_id,
5 | }
6 | {@aggregated} ? {
7 | cohort_definition_id,
8 | COUNT(*) AS sum_value
9 | } : {
10 | cohort.@row_id_field AS row_id,
11 | 1 AS covariate_value
12 | }
13 | INTO @covariate_table
14 | FROM @cohort_table cohort
15 | INNER JOIN @cdm_database_schema.person
16 | ON cohort.subject_id = person.person_id
17 | {@included_cov_table != ''} ? {WHERE MONTH(cohort_start_date) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)}
18 | {@cohort_definition_id != -1} ? {
19 | {@included_cov_table != ''} ? { AND} :{WHERE} cohort.cohort_definition_id IN (@cohort_definition_id)
20 | }
21 | {@aggregated} ? {
22 | GROUP BY cohort_definition_id,
23 | MONTH(cohort_start_date)
24 | }
25 | ;
26 |
27 | -- Reference construction
28 | INSERT INTO #cov_ref (
29 | covariate_id,
30 | covariate_name,
31 | analysis_id,
32 | concept_id
33 | )
34 | SELECT covariate_id,
35 | CAST(CONCAT ('index month: ', CAST((covariate_id - @analysis_id) / 1000 AS INTEGER)) AS VARCHAR(512)) AS covariate_name,
36 | @analysis_id AS analysis_id,
37 | 0 AS concept_id
38 | FROM (
39 | SELECT DISTINCT covariate_id
40 | FROM @covariate_table
41 | ) t1;
42 |
43 | INSERT INTO #analysis_ref (
44 | analysis_id,
45 | analysis_name,
46 | domain_id,
47 | {!@temporal} ? {
48 | start_day,
49 | end_day,
50 | }
51 | is_binary,
52 | missing_means_zero
53 | )
54 | SELECT @analysis_id AS analysis_id,
55 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
56 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id,
57 | {!@temporal} ? {
58 | CAST(NULL AS INT) AS start_day,
59 | CAST(NULL AS INT) AS end_day,
60 | }
61 | CAST('Y' AS VARCHAR(1)) AS is_binary,
62 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
63 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/DemographicsRace.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT
3 | CAST(race_concept_id AS BIGINT) * 1000 + @analysis_id AS covariate_id,
4 | {@temporal | @temporal_sequence} ? {
5 | CAST(NULL AS INT) AS time_id,
6 | }
7 | {@aggregated} ? {
8 | cohort_definition_id,
9 | COUNT(*) AS sum_value
10 | } : {
11 | cohort.@row_id_field AS row_id,
12 | 1 AS covariate_value
13 | }
14 | INTO @covariate_table
15 | FROM @cohort_table cohort
16 | INNER JOIN @cdm_database_schema.person
17 | ON cohort.subject_id = person.person_id
18 | WHERE race_concept_id IN (
19 | SELECT concept_id
20 | FROM @cdm_database_schema.concept
21 | WHERE LOWER(concept_class_id) = 'race'
22 | )
23 | {@excluded_concept_table != ''} ? { AND race_concept_id NOT IN (SELECT id FROM @excluded_concept_table)}
24 | {@included_concept_table != ''} ? { AND race_concept_id IN (SELECT id FROM @included_concept_table)}
25 | {@included_cov_table != ''} ? { AND CAST(race_concept_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)}
26 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)}
27 | {@aggregated} ? {
28 | GROUP BY cohort_definition_id,
29 | race_concept_id
30 | }
31 | ;
32 |
33 | -- Reference construction
34 | INSERT INTO #cov_ref (
35 | covariate_id,
36 | covariate_name,
37 | analysis_id,
38 | concept_id
39 | )
40 | SELECT covariate_id,
41 | CAST(CONCAT('race = ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name,
42 | @analysis_id AS analysis_id,
43 | CAST((covariate_id - @analysis_id) / 1000 AS INT) AS concept_id
44 | FROM (
45 | SELECT DISTINCT covariate_id
46 | FROM @covariate_table
47 | ) t1
48 | LEFT JOIN @cdm_database_schema.concept
49 | ON concept_id = CAST((covariate_id - @analysis_id) / 1000 AS INT);
50 |
51 | INSERT INTO #analysis_ref (
52 | analysis_id,
53 | analysis_name,
54 | domain_id,
55 | {!@temporal} ? {
56 | start_day,
57 | end_day,
58 | }
59 | is_binary,
60 | missing_means_zero
61 | )
62 | SELECT @analysis_id AS analysis_id,
63 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
64 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id,
65 | {!@temporal} ? {
66 | CAST(NULL AS INT) AS start_day,
67 | CAST(NULL AS INT) AS end_day,
68 | }
69 | CAST('Y' AS VARCHAR(1)) AS is_binary,
70 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
71 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/DemographicsYear.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT CAST(YEAR(cohort_start_date) * 1000 + @analysis_id AS BIGINT) AS covariate_id,
3 | {@temporal | @temporal_sequence} ? {
4 | CAST(NULL AS INT) AS time_id,
5 | }
6 | {@aggregated} ? {
7 | cohort_definition_id,
8 | COUNT(*) AS sum_value
9 | } : {
10 | cohort.@row_id_field AS row_id,
11 | 1 AS covariate_value
12 | }
13 | INTO @covariate_table
14 | FROM @cohort_table cohort
15 | INNER JOIN @cdm_database_schema.person
16 | ON cohort.subject_id = person.person_id
17 | {@included_cov_table != ''} ? {WHERE YEAR(cohort_start_date) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)}
18 | {@cohort_definition_id != -1} ? {
19 | {@included_cov_table != ''} ? { AND} :{WHERE} cohort.cohort_definition_id IN (@cohort_definition_id)
20 | }
21 | {@aggregated} ? {
22 | GROUP BY cohort_definition_id,
23 | YEAR(cohort_start_date)
24 | }
25 | ;
26 |
27 | -- Reference construction
28 | INSERT INTO #cov_ref (
29 | covariate_id,
30 | covariate_name,
31 | analysis_id,
32 | concept_id
33 | )
34 | SELECT covariate_id,
35 | CAST(CONCAT ('index year: ', CAST((covariate_id - @analysis_id) / 1000 AS INTEGER)) AS VARCHAR(512)) AS covariate_name,
36 | @analysis_id AS analysis_id,
37 | 0 AS concept_id
38 | FROM (
39 | SELECT DISTINCT covariate_id
40 | FROM @covariate_table
41 | ) t1;
42 |
43 | INSERT INTO #analysis_ref (
44 | analysis_id,
45 | analysis_name,
46 | domain_id,
47 | {!@temporal} ? {
48 | start_day,
49 | end_day,
50 | }
51 | is_binary,
52 | missing_means_zero
53 | )
54 | SELECT @analysis_id AS analysis_id,
55 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
56 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id,
57 | {!@temporal} ? {
58 | CAST(NULL AS INT) AS start_day,
59 | CAST(NULL AS INT) AS end_day,
60 | }
61 | CAST('Y' AS VARCHAR(1)) AS is_binary,
62 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
63 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/DemographicsYearMonth.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT CAST(YEAR(cohort_start_date)*100000 + MONTH(cohort_start_date)*1000 + @analysis_id AS BIGINT) AS covariate_id,
3 | {@temporal} ? {
4 | CAST(NULL AS INT) AS time_id,
5 | }
6 | {@aggregated} ? {
7 | cohort_definition_id,
8 | COUNT(*) AS sum_value
9 | } : {
10 | cohort.@row_id_field AS row_id,
11 | 1 AS covariate_value
12 | }
13 | INTO @covariate_table
14 | FROM @cohort_table cohort
15 | INNER JOIN @cdm_database_schema.person
16 | ON cohort.subject_id = person.person_id
17 | {@included_cov_table != ''} ? {WHERE YEAR(cohort_start_date)*100000 + MONTH(cohort_start_date)*1000 + @analysis_id IN (SELECT id FROM @included_cov_table)}
18 | {@cohort_definition_id != -1} ? {
19 | {@included_cov_table != ''} ? { AND} :{WHERE} cohort.cohort_definition_id IN (@cohort_definition_id)
20 | }
21 | {@aggregated} ? {
22 | GROUP BY cohort_definition_id,
23 | YEAR(cohort_start_date)*100000 + MONTH(cohort_start_date)*1000 + @analysis_id
24 | }
25 | ;
26 |
27 | -- Reference construction
28 | INSERT INTO #cov_ref (
29 | covariate_id,
30 | covariate_name,
31 | analysis_id,
32 | concept_id
33 | )
34 | SELECT covariate_id,
35 | CAST(CONCAT('index year and month: ', CAST((covariate_id - @analysis_id) / 1000 AS INTEGER)) AS VARCHAR(512)) AS covariate_name,
36 | @analysis_id AS analysis_id,
37 | 0 AS concept_id
38 | FROM (
39 | SELECT DISTINCT covariate_id
40 | FROM @covariate_table
41 | ) t1;
42 |
43 | INSERT INTO #analysis_ref (
44 | analysis_id,
45 | analysis_name,
46 | domain_id,
47 | {!@temporal} ? {
48 | start_day,
49 | end_day,
50 | }
51 | is_binary,
52 | missing_means_zero
53 | )
54 | SELECT @analysis_id AS analysis_id,
55 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
56 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id,
57 | {!@temporal} ? {
58 | CAST(NULL AS INT) AS start_day,
59 | CAST(NULL AS INT) AS end_day,
60 | }
61 | CAST('Y' AS VARCHAR(1)) AS is_binary,
62 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
63 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/DomainConcept.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT
3 | CAST(@domain_concept_id AS BIGINT) * 1000 + @analysis_id AS covariate_id,
4 | {@temporal | @temporal_sequence} ? {
5 | time_id,
6 | }
7 | {@aggregated} ? {
8 | cohort_definition_id,
9 | COUNT(*) AS sum_value
10 | } : {
11 | row_id,
12 | 1 AS covariate_value
13 | }
14 | {@temporal_annual} ? {, event_year}
15 | INTO @covariate_table
16 | FROM (
17 | {@temporal_annual} ? {
18 | SELECT @domain_concept_id,
19 | cohort_definition_id,
20 | subject_id,
21 | cohort_start_date,
22 | {@temporal} ? {time_id,}
23 | event_year FROM (
24 | }
25 | SELECT DISTINCT @domain_concept_id,
26 | {@temporal} ? {
27 | time_id,
28 | }
29 | {@temporal_sequence} ? {
30 | FLOOR(DATEDIFF(@time_part, @cdm_database_schema.@domain_table.@domain_start_date, cohort.cohort_start_date)*1.0/@time_interval ) as time_id,
31 | }
32 | {@temporal_annual} ? {
33 | DATEPART(year, @domain_table.@domain_start_date) event_year,
34 | }
35 | {@aggregated} ? {
36 | cohort_definition_id,
37 | cohort.subject_id,
38 | cohort.cohort_start_date
39 | } : {
40 | cohort.@row_id_field AS row_id
41 | }
42 | FROM @cohort_table cohort
43 | INNER JOIN @cdm_database_schema.@domain_table
44 | ON cohort.subject_id = @domain_table.person_id
45 | {@sub_type == 'inpatient'} ? {
46 | INNER JOIN @cdm_database_schema.visit_occurrence vo
47 | ON vo.person_id = @domain_table.person_id
48 | AND vo.visit_start_date <= @domain_table.@domain_start_date
49 | AND vo.visit_end_date >= @domain_table.@domain_start_date
50 | INNER JOIN @cdm_database_schema.concept_ancestor ca
51 | ON ca.ancestor_concept_id IN (9201, 38004311, 8920, 262)
52 | AND ca.descendant_concept_id = vo.visit_concept_id
53 | }
54 | {@temporal} ? {
55 | INNER JOIN #time_period time_period
56 | ON @domain_start_date <= DATEADD(DAY, time_period.end_day, cohort.cohort_start_date)
57 | AND @domain_end_date >= DATEADD(DAY, time_period.start_day, cohort.cohort_start_date)
58 | WHERE @domain_concept_id != 0
59 | } : {
60 |
61 | WHERE @domain_start_date <= DATEADD(DAY, {@temporal_sequence} ? {@sequence_end_day} : {@end_day}, cohort.cohort_start_date)
62 | {@start_day != 'anyTimePrior'} ? { AND
63 |
64 | {@temporal_sequence} ? {@domain_start_date} : {@domain_end_date}
65 |
66 | >= DATEADD(DAY, {@temporal_sequence} ? {@sequence_start_day} : {@start_day}, cohort.cohort_start_date)}
67 | AND @domain_concept_id != 0
68 |
69 | }
70 | {@excluded_concept_table != ''} ? { AND @domain_concept_id NOT IN (SELECT id FROM @excluded_concept_table)}
71 | {@included_concept_table != ''} ? { AND @domain_concept_id IN (SELECT id FROM @included_concept_table)}
72 | {@included_cov_table != ''} ? { AND CAST(@domain_concept_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)}
73 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)}
74 | ) by_row_id
75 | {@temporal_annual} ? {
76 | ) by_year
77 | }
78 | {@aggregated} ? {
79 | GROUP BY cohort_definition_id,
80 | @domain_concept_id
81 | {@temporal | @temporal_sequence} ? {
82 | ,time_id
83 | }
84 | {@temporal_annual} ? {
85 | ,event_year
86 | }
87 | }
88 | ;
89 |
90 | -- Reference construction
91 | INSERT INTO #cov_ref (
92 | covariate_id,
93 | covariate_name,
94 | analysis_id,
95 | concept_id
96 | )
97 | SELECT covariate_id,
98 | {@temporal | @temporal_sequence} ? {
99 | CAST(CONCAT('@domain_table: ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END {@sub_type == 'inpatient'} ? {, ' (inpatient)'}) AS VARCHAR(512)) AS covariate_name,
100 | } : {
101 | {@start_day == 'anyTimePrior'} ? {
102 | CAST(CONCAT('@domain_table any time prior through @end_day days relative to index: ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END {@sub_type == 'inpatient'} ? {, ' (inpatient)'}) AS VARCHAR(512)) AS covariate_name,
103 | } : {
104 | CAST(CONCAT('@domain_table during day @start_day through @end_day days relative to index: ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END {@sub_type == 'inpatient'} ? {, ' (inpatient)'}) AS VARCHAR(512)) AS covariate_name,
105 | }
106 | }
107 | @analysis_id AS analysis_id,
108 | CAST((covariate_id - @analysis_id) / 1000 AS INT) AS concept_id
109 | FROM (
110 | SELECT DISTINCT covariate_id
111 | FROM @covariate_table
112 | ) t1
113 | LEFT JOIN @cdm_database_schema.concept
114 | ON concept_id = CAST((covariate_id - @analysis_id) / 1000 AS INT);
115 |
116 | INSERT INTO #analysis_ref (
117 | analysis_id,
118 | analysis_name,
119 | domain_id,
120 | {!@temporal} ? {
121 | start_day,
122 | end_day,
123 | }
124 | is_binary,
125 | missing_means_zero
126 | )
127 | SELECT @analysis_id AS analysis_id,
128 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
129 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id,
130 | {!@temporal} ? {
131 | {@start_day == 'anyTimePrior'} ? {
132 | CAST(NULL AS INT) AS start_day,
133 | } : {
134 |
135 | {@temporal_sequence} ? {@sequence_start_day} : {@start_day} AS start_day,
136 | }
137 | {@temporal_sequence} ? {@sequence_end_day} : {@end_day} AS end_day,
138 | }
139 | CAST('Y' AS VARCHAR(1)) AS is_binary,
140 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
141 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/GetAttrCovariates.sql:
--------------------------------------------------------------------------------
1 | /************************************************************************
2 | @file GetAttrCovariates.sql
3 |
4 | Copyright 2025 Observational Health Data Sciences and Informatics
5 |
6 | This file is part of FeatureExtraction
7 |
8 | Licensed under the Apache License, Version 2.0 (the "License");
9 | you may not use this file except in compliance with the License.
10 | You may obtain a copy of the License at
11 |
12 | http://www.apache.org/licenses/LICENSE-2.0
13 |
14 | Unless required by applicable law or agreed to in writing, software
15 | distributed under the License is distributed on an "AS IS" BASIS,
16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | See the License for the specific language governing permissions and
18 | limitations under the License.
19 | ************************************************************************/
20 |
21 | {DEFAULT @attr_database_schema = 'CDM_SIM.dbo' }
22 | {DEFAULT @cohort_table = '#cohort_person'}
23 | {DEFAULT @cohort_id = -1}
24 | {DEFAULT @row_id_field = 'person_id'}
25 | {DEFAULT @cohort_attribute_table = 'cohort_attribute'}
26 | {DEFAULT @has_include_attr_ids = FALSE}
27 |
28 | SELECT cohort.@row_id_field AS row_id,
29 | cohort_attribute.attribute_definition_id AS covariate_id,
30 | cohort_attribute.value_as_number AS covariate_value
31 | FROM @attr_database_schema.@cohort_attribute_table cohort_attribute
32 | INNER JOIN @cohort_table cohort
33 | ON cohort_attribute.subject_id = cohort.subject_id
34 | AND cohort_attribute.cohort_definition_id = cohort.cohort_definition_id
35 | AND cohort_attribute.subject_id = cohort.subject_id
36 | {@has_include_attr_ids} ? {
37 | INNER JOIN #included_attr included_attr
38 | ON included_attr.attribute_definition_id = cohort_attribute.attribute_definition_id
39 | }
40 | {@cohort_id != -1} ? {
41 | WHERE cohort.cohort_definition_id = @cohort_id
42 | }
43 | ;
44 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/IncludeDescendants.sql:
--------------------------------------------------------------------------------
1 | /************************************************************************
2 | Copyright 2025 Observational Health Data Sciences and Informatics
3 |
4 | This file is part of FeatureExtraction
5 |
6 | Licensed under the Apache License, Version 2.0 (the "License");
7 | you may not use this file except in compliance with the License.
8 | You may obtain a copy of the License at
9 |
10 | http://www.apache.org/licenses/LICENSE-2.0
11 |
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | ************************************************************************/
18 | {DEFAULT @table_name == '#include_concepts'}
19 | {DEFAULT @cdm_database_schema == 'cdm'}
20 |
21 | INSERT INTO @table_name (concept_id)
22 | SELECT descendant_concept_id
23 | FROM @table_name this_table
24 | INNER JOIN @cdm_database_schema.concept_ancestor
25 | ON concept_id = ancestor_concept_id
26 | WHERE concept_id != descendant_concept_id;
27 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/MeasurementRangeGroup.sql:
--------------------------------------------------------------------------------
1 | -- Feature construction
2 | SELECT
3 | (CAST(measurement_concept_id AS BIGINT) * 10000) + (range_group * 1000) + @analysis_id AS covariate_id,
4 | {@temporal} ? {
5 | time_id,
6 | }
7 | {@aggregated} ? {
8 | cohort_definition_id,
9 | COUNT(*) AS sum_value
10 | } : {
11 | row_id,
12 | 1 AS covariate_value
13 | }
14 | INTO @covariate_table
15 | FROM (
16 | {@aggregated} ? {
17 | SELECT DISTINCT measurement_concept_id,
18 | range_group,
19 | {@temporal} ? {
20 | time_id,
21 | }
22 | cohort_definition_id,
23 | subject_id,
24 | cohort_start_date
25 | FROM (
26 | }
27 | SELECT measurement_concept_id,
28 | CASE
29 | WHEN value_as_number < range_low THEN 1
30 | WHEN value_as_number > range_high THEN 3
31 | ELSE 2
32 | END AS range_group,
33 | {@temporal} ? {
34 | time_id,
35 | }
36 | {@aggregated} ? {
37 | cohort_definition_id,
38 | cohort.subject_id,
39 | cohort.cohort_start_date
40 | } : {
41 | cohort.@row_id_field AS row_id
42 | }
43 | FROM @cohort_table cohort
44 | INNER JOIN @cdm_database_schema.measurement
45 | ON cohort.subject_id = measurement.person_id
46 | {@temporal} ? {
47 | INNER JOIN #time_period time_period
48 | ON measurement_date <= DATEADD(DAY, time_period.end_day, cohort.cohort_start_date)
49 | AND measurement_date >= DATEADD(DAY, time_period.start_day, cohort.cohort_start_date)
50 | WHERE measurement_concept_id != 0
51 | } : {
52 | WHERE measurement_date <= DATEADD(DAY, @end_day, cohort.cohort_start_date)
53 | {@start_day != 'anyTimePrior'} ? { AND measurement_date >= DATEADD(DAY, @start_day, cohort.cohort_start_date)}
54 | AND measurement_concept_id != 0
55 | }
56 | AND range_low IS NOT NULL
57 | AND range_high IS NOT NULL
58 | {@excluded_concept_table != ''} ? { AND measurement_concept_id NOT IN (SELECT id FROM @excluded_concept_table)}
59 | {@included_concept_table != ''} ? { AND measurement_concept_id IN (SELECT id FROM @included_concept_table)}
60 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)}
61 | {@aggregated} ? {
62 | ) grouped_1
63 | }
64 | ) grouped_2
65 | {@included_cov_table != ''} ? {WHERE (CAST(measurement_concept_id AS BIGINT) * 10000) + (range_group * 1000) + @analysis_id IN (SELECT id FROM @included_cov_table)}
66 | GROUP BY measurement_concept_id,
67 | range_group
68 | {@aggregated} ? {
69 | ,cohort_definition_id
70 | } : {
71 | ,row_id
72 | }
73 | {@temporal} ? {
74 | ,time_id
75 | }
76 | ;
77 |
78 | -- Reference construction
79 | INSERT INTO #cov_ref (
80 | covariate_id,
81 | covariate_name,
82 | analysis_id,
83 | concept_id
84 | )
85 | SELECT covariate_id,
86 | {@temporal} ? {
87 | CAST(CONCAT('measurement ', range_name, ': ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name,
88 | } : {
89 | {@start_day == 'anyTimePrior'} ? {
90 | CAST(CONCAT('measurement ', range_name, ' during any time prior through @end_day days relative to index: ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name,
91 | } : {
92 | CAST(CONCAT('measurement ', range_name, ' during day @start_day through @end_day days relative to index: ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name,
93 | }
94 | }
95 | @analysis_id AS analysis_id,
96 | CAST(FLOOR(covariate_id / 10000.0) AS INT) AS concept_id
97 | FROM (
98 | SELECT DISTINCT covariate_id,
99 | CASE
100 | WHEN FLOOR(covariate_id / 1000.0) - (FLOOR(covariate_id / 10000.0) * 10) = 1 THEN 'below normal range'
101 | WHEN FLOOR(covariate_id / 1000.0) - (FLOOR(covariate_id / 10000.0) * 10) = 2 THEN 'within normal range'
102 | WHEN FLOOR(covariate_id / 1000.0) - (FLOOR(covariate_id / 10000.0) * 10) = 3 THEN 'above normal range'
103 | END AS range_name
104 | FROM @covariate_table
105 | ) t1
106 | LEFT JOIN @cdm_database_schema.concept
107 | ON concept_id = FLOOR(covariate_id / 10000.0);
108 |
109 | INSERT INTO #analysis_ref (
110 | analysis_id,
111 | analysis_name,
112 | domain_id,
113 | {!@temporal} ? {
114 | start_day,
115 | end_day,
116 | }
117 | is_binary,
118 | missing_means_zero
119 | )
120 | SELECT @analysis_id AS analysis_id,
121 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name,
122 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id,
123 | {!@temporal} ? {
124 | {@start_day == 'anyTimePrior'} ? {
125 | CAST(NULL AS INT) AS start_day,
126 | } : {
127 | @start_day AS start_day,
128 | }
129 | @end_day AS end_day,
130 | }
131 | CAST('Y' AS VARCHAR(1)) AS is_binary,
132 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero;
133 |
134 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/RemoveCovariateTempTables.sql:
--------------------------------------------------------------------------------
1 | /************************************************************************
2 | Copyright 2025 Observational Health Data Sciences and Informatics
3 |
4 | This file is part of FeatureExtraction
5 |
6 | Licensed under the Apache License, Version 2.0 (the "License");
7 | you may not use this file except in compliance with the License.
8 | You may obtain a copy of the License at
9 |
10 | http://www.apache.org/licenses/LICENSE-2.0
11 |
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | ************************************************************************/
18 | TRUNCATE TABLE #cov;
19 |
20 | DROP TABLE #cov;
21 |
22 | TRUNCATE TABLE #cov_ref;
23 |
24 | DROP TABLE #cov_ref;
25 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/covariateCohorts.sql:
--------------------------------------------------------------------------------
1 | /************************
2 | File covariateCohorts.sql
3 | *************************/
4 | DROP TABLE IF EXISTS @cohort_database_schema.@cohort_table;
5 |
6 | CREATE TABLE @cohort_database_schema.@cohort_table (
7 | cohort_definition_id INT,
8 | subject_id BIGINT,
9 | cohort_start_date DATE,
10 | cohort_end_date DATE
11 | );
12 |
13 | INSERT INTO @cohort_database_schema.@cohort_table (
14 | cohort_definition_id,
15 | subject_id,
16 | cohort_start_date,
17 | cohort_end_date
18 | )
19 | SELECT 1,
20 | person_id,
21 | MIN(drug_era_start_date),
22 | MIN(drug_era_end_date)
23 | FROM @cdm_database_schema.drug_era
24 | WHERE drug_concept_id = 1124300 --diclofenac
25 | GROUP BY person_id;
26 |
27 | INSERT INTO @cohort_database_schema.@cohort_table (
28 | cohort_definition_id,
29 | subject_id,
30 | cohort_start_date,
31 | cohort_end_date
32 | )
33 | SELECT 2,
34 | condition_occurrence.person_id,
35 | MIN(condition_start_date),
36 | MIN(observation_period_end_date)
37 | FROM @cdm_database_schema.condition_occurrence
38 | INNER JOIN @cdm_database_schema.drug_exposure
39 | ON condition_occurrence.person_id = drug_exposure.person_id
40 | AND drug_exposure_start_date >= condition_start_date
41 | AND drug_exposure_start_date < DATEADD(DAY, 30, condition_start_date)
42 | INNER JOIN @cdm_database_schema.observation_period
43 | ON condition_occurrence.person_id = observation_period.person_id
44 | AND condition_start_date >= observation_period_start_date
45 | AND condition_start_date <= observation_period_end_date
46 | WHERE condition_concept_id IN (
47 | SELECT descendant_concept_id
48 | FROM @cdm_database_schema.concept_ancestor
49 | WHERE ancestor_concept_id = 201826 -- Type 2 diabetes mellitus
50 | )
51 | AND drug_concept_id IN (
52 | SELECT descendant_concept_id
53 | FROM @cdm_database_schema.concept_ancestor
54 | WHERE ancestor_concept_id = 21600712 -- DRUGS USED IN DIABETES (ATC A10)
55 | )
56 | GROUP BY condition_occurrence.person_id;
57 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/unit_tests/createTestingData.sql:
--------------------------------------------------------------------------------
1 | SELECT first_use.*
2 | INTO @cohort_table
3 | FROM (
4 | SELECT drug_concept_id AS cohort_definition_id,
5 | MIN(drug_era_start_date) AS cohort_start_date,
6 | MIN(drug_era_end_date) AS cohort_end_date,
7 | person_id AS subject_id
8 | FROM @cdm_database_schema.drug_era
9 | WHERE drug_concept_id = 1118084-- celecoxib
10 | OR drug_concept_id = 1124300 --diclofenac
11 | GROUP BY drug_concept_id,
12 | person_id
13 | ) first_use
14 | INNER JOIN @cdm_database_schema.observation_period
15 | ON first_use.subject_id = observation_period.person_id
16 | AND cohort_start_date >= observation_period_start_date
17 | AND cohort_end_date <= observation_period_end_date
18 | WHERE DATEDIFF(DAY, observation_period_start_date, cohort_start_date) >= 365
19 | ;
20 |
21 | IF OBJECT_ID('@cohort_database_schema.@cohort_attribute_table', 'U') IS NOT NULL
22 | DROP TABLE @cohort_database_schema.@cohort_attribute_table;
23 |
24 | IF OBJECT_ID('@cohort_database_schema.@attribute_definition_table', 'U') IS NOT NULL
25 | DROP TABLE @cohort_database_schema.@attribute_definition_table;
26 |
27 |
28 | SELECT cohort_definition_id,
29 | subject_id,
30 | cohort_start_date,
31 | 1 AS attribute_definition_id,
32 | DATEDIFF(DAY, observation_period_start_date, cohort_start_date) AS value_as_number
33 | INTO @cohort_database_schema.@cohort_attribute_table
34 | FROM @cohort_table cohort
35 | INNER JOIN @cdm_database_schema.observation_period op
36 | ON op.person_id = cohort.subject_id
37 | WHERE cohort.cohort_start_date >= op.observation_period_start_date
38 | AND cohort.cohort_start_date <= op.observation_period_end_date
39 | {@cohort_definition_ids != ''} ? {
40 | AND cohort.cohort_definition_id IN (@cohort_definition_ids)
41 | }
42 | ;
43 |
44 | SELECT 1 AS attribute_definition_id,
45 | 'Length of observation in days' AS attribute_name
46 | INTO @cohort_database_schema.@attribute_definition_table
47 | ;
48 |
--------------------------------------------------------------------------------
/inst/sql/sql_server/unit_tests/dropTestingData.sql:
--------------------------------------------------------------------------------
1 | IF OBJECT_ID('tempdb..@cohort_table', 'U') IS NOT NULL
2 | DROP TABLE @cohort_table;
3 |
4 | IF OBJECT_ID('@cohort_database_schema.@cohort_attribute_table', 'U') IS NOT NULL
5 | DROP TABLE @cohort_database_schema.@cohort_attribute_table;
6 |
7 | IF OBJECT_ID('@cohort_database_schema.@attribute_definition_table', 'U') IS NOT NULL
8 | DROP TABLE @cohort_database_schema.@attribute_definition_table;
9 |
--------------------------------------------------------------------------------
/inst/testdata/binaryCovariateData.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/testdata/binaryCovariateData.zip
--------------------------------------------------------------------------------
/inst/testdata/continuousCovariateData.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/testdata/continuousCovariateData.zip
--------------------------------------------------------------------------------
/java/FeatureExtraction.jardesc:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/java/org/ohdsi/featureExtraction/JarChecksum.java:
--------------------------------------------------------------------------------
1 | package org.ohdsi.featureExtraction;
2 |
3 | import java.io.File;
4 | import java.io.FileInputStream;
5 | import java.io.IOException;
6 | import java.security.MessageDigest;
7 | import java.security.NoSuchAlgorithmException;
8 |
9 | /**
10 | * Provides a function for computing the checksum of the current JAR file.
11 | *
12 | * @author mschuemi
13 | *
14 | */
15 | public class JarChecksum {
16 |
17 | /**
18 | * Compute the checksum of the current JAR file. This can be used by R to verify that the JAR version is in sync with the R package. Note: will throw an
19 | * error if not running from a JAR file.
20 | *
21 | * @return The checksum of the current JAR file
22 | */
23 | public static String computeJarChecksum() {
24 | File currentJavaJarFile = new File(JarChecksum.class.getProtectionDomain().getCodeSource().getLocation().getPath());
25 | String filepath = currentJavaJarFile.getAbsolutePath();
26 | StringBuilder checksum = new StringBuilder();
27 | try {
28 | MessageDigest messageDigest = MessageDigest.getInstance("SHA-256");
29 | FileInputStream fileInputStream = new FileInputStream(filepath);
30 | byte[] dataBytes = new byte[1024];
31 | int nread = 0;
32 | while ((nread = fileInputStream.read(dataBytes)) != -1)
33 | messageDigest.update(dataBytes, 0, nread);
34 | fileInputStream.close();
35 | byte[] mdBytes = messageDigest.digest();
36 |
37 | for (int i = 0; i < mdBytes.length; i++)
38 | checksum.append(Integer.toString((mdBytes[i] & 0xff) + 0x100, 16).substring(1));
39 | } catch (NoSuchAlgorithmException e) {
40 | e.printStackTrace();
41 | } catch (IOException e) {
42 | e.printStackTrace();
43 | }
44 | return (checksum.toString());
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/java/org/ohdsi/featureExtraction/ReadCSVFile.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright 2025 Observational Health Data Sciences and Informatics
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 | package org.ohdsi.featureExtraction;
17 |
18 | import java.io.BufferedReader;
19 | import java.io.FileInputStream;
20 | import java.io.FileNotFoundException;
21 | import java.io.IOException;
22 | import java.io.InputStream;
23 | import java.io.InputStreamReader;
24 | import java.io.UnsupportedEncodingException;
25 | import java.util.Iterator;
26 | import java.util.List;
27 |
28 | public class ReadCSVFile implements Iterable> {
29 | protected BufferedReader bufferedReader;
30 | public boolean EOF = false;
31 | private char delimiter = ',';
32 |
33 |
34 | public ReadCSVFile(String filename, char delimiter) {
35 | this(filename);
36 | this.delimiter = delimiter;
37 | }
38 |
39 | public ReadCSVFile(String filename) {
40 | try {
41 | FileInputStream textFileStream = new FileInputStream(filename);
42 | bufferedReader = new BufferedReader(new InputStreamReader(textFileStream, "ISO-8859-1"));
43 | } catch (FileNotFoundException e) {
44 | e.printStackTrace();
45 | } catch (UnsupportedEncodingException e) {
46 | e.printStackTrace();
47 | }
48 | }
49 |
50 | public ReadCSVFile(InputStream inputstream, char delimiter) {
51 | this(inputstream);
52 | this.delimiter = delimiter;
53 | }
54 |
55 | public ReadCSVFile(InputStream inputstream) {
56 | try {
57 | bufferedReader = new BufferedReader(new InputStreamReader(inputstream, "ISO-8859-1"));
58 | } catch (UnsupportedEncodingException e) {
59 | e.printStackTrace();
60 | }
61 | }
62 |
63 | public Iterator> getIterator() {
64 | return iterator();
65 | }
66 |
67 | private class CSVFileIterator implements Iterator> {
68 | private String buffer;
69 |
70 | public CSVFileIterator() {
71 | try {
72 | buffer = bufferedReader.readLine();
73 | if (buffer == null) {
74 | EOF = true;
75 | bufferedReader.close();
76 | }
77 | } catch (IOException e) {
78 | e.printStackTrace();
79 | }
80 |
81 | }
82 |
83 | public boolean hasNext() {
84 | return !EOF;
85 | }
86 |
87 | public List next() {
88 | String result = buffer;
89 | try {
90 | buffer = bufferedReader.readLine();
91 | if (buffer == null) {
92 | EOF = true;
93 | bufferedReader.close();
94 | }
95 | } catch (IOException e) {
96 | e.printStackTrace();
97 | }
98 |
99 | return line2columns(result);
100 | }
101 |
102 | public void remove() {
103 | System.err.println("Unimplemented method 'remove' called");
104 | }
105 | }
106 |
107 | public Iterator> iterator() {
108 | return new CSVFileIterator();
109 | }
110 |
111 | private List line2columns(String line) {
112 | List columns = StringUtilities.safeSplit(line, delimiter);
113 | for (int i = 0; i < columns.size(); i++) {
114 | String column = columns.get(i);
115 | if (column.startsWith("\"") && column.endsWith("\"") && column.length() > 1)
116 | column = column.substring(1, column.length() - 1);
117 | column = column.replace("\\\"", "\"");
118 | column = column.replaceAll("\\\\\\\\", "\\\\");
119 | columns.set(i, column);
120 | }
121 | return columns;
122 | }
123 |
124 | public void setDelimiter(char delimiter) {
125 | this.delimiter = delimiter;
126 | }
127 |
128 | public char getDelimiter() {
129 | return delimiter;
130 | }
131 | }
132 |
--------------------------------------------------------------------------------
/java/org/ohdsi/featureExtraction/ReadCSVFileWithHeader.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright 2025 Observational Health Data Sciences and Informatics
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | ******************************************************************************/
16 | package org.ohdsi.featureExtraction;
17 |
18 | import java.io.FileInputStream;
19 | import java.io.FileNotFoundException;
20 | import java.io.InputStream;
21 | import java.util.HashMap;
22 | import java.util.Iterator;
23 | import java.util.List;
24 | import java.util.Map;
25 |
26 | public class ReadCSVFileWithHeader implements Iterable {
27 | private InputStream inputstream;
28 | private char delimiter = ',';
29 |
30 | public ReadCSVFileWithHeader(String filename, char delimiter) {
31 | this(filename);
32 | this.delimiter = delimiter;
33 | }
34 |
35 | public ReadCSVFileWithHeader(String filename) {
36 | try {
37 | inputstream = new FileInputStream(filename);
38 | } catch (FileNotFoundException e) {
39 | e.printStackTrace();
40 | }
41 | }
42 |
43 | public ReadCSVFileWithHeader(InputStream inputstream) {
44 | this.inputstream = inputstream;
45 | }
46 |
47 | @Override
48 | public Iterator iterator() {
49 | return new RowIterator();
50 | }
51 |
52 | public class RowIterator implements Iterator {
53 |
54 | private Iterator> iterator;
55 | private Map fieldName2ColumnIndex;
56 |
57 | public RowIterator() {
58 | iterator = new ReadCSVFile(inputstream, delimiter).iterator();
59 | fieldName2ColumnIndex = new HashMap();
60 | for (String header : iterator.next())
61 | fieldName2ColumnIndex.put(header, fieldName2ColumnIndex.size());
62 | }
63 |
64 | @Override
65 | public boolean hasNext() {
66 | return iterator.hasNext();
67 | }
68 |
69 | @Override
70 | public Row next() {
71 | return new Row(iterator.next(), fieldName2ColumnIndex);
72 | }
73 |
74 | @Override
75 | public void remove() {
76 | throw new RuntimeException("Remove not supported");
77 | }
78 |
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/java/org/ohdsi/featureExtraction/Row.java:
--------------------------------------------------------------------------------
1 | package org.ohdsi.featureExtraction;
2 |
3 | import java.util.ArrayList;
4 | import java.util.HashMap;
5 | import java.util.List;
6 | import java.util.Map;
7 |
8 | public class Row {
9 | private List cells;
10 | private Map fieldName2ColumnIndex;
11 |
12 | public Row() {
13 | fieldName2ColumnIndex = new HashMap();
14 | cells = new ArrayList();
15 | }
16 |
17 | public Row(List cells, Map fieldName2ColumnIndex) {
18 | this.cells = cells;
19 | this.fieldName2ColumnIndex = fieldName2ColumnIndex;
20 | }
21 |
22 | public Row(Row row) {
23 | cells = new ArrayList(row.cells);
24 | fieldName2ColumnIndex = new HashMap(row.fieldName2ColumnIndex);
25 | }
26 |
27 | public String get(String fieldName) {
28 | int index;
29 | try {
30 | index = fieldName2ColumnIndex.get(fieldName);
31 | } catch (NullPointerException e) {
32 | throw new RuntimeException("Field \"" + fieldName + "\" not found");
33 | }
34 | if (cells.size() <= index)
35 | return null;
36 | else
37 | return cells.get(index);
38 | }
39 |
40 | public List getFieldNames() {
41 | List names = new ArrayList(fieldName2ColumnIndex.size());
42 | for (int i = 0; i < fieldName2ColumnIndex.size(); i++)
43 | names.add(null);
44 | for (Map.Entry entry : fieldName2ColumnIndex.entrySet())
45 | names.set(entry.getValue(), entry.getKey());
46 | return names;
47 | }
48 |
49 | public int getInt(String fieldName) {
50 | return Integer.parseInt(get(fieldName).trim());
51 | }
52 |
53 | public long getLong(String fieldName) {
54 | return Long.parseLong(get(fieldName));
55 | }
56 |
57 | public double getDouble(String fieldName) {
58 | return Double.parseDouble(get(fieldName));
59 | }
60 |
61 | public void add(String fieldName, String value) {
62 | fieldName2ColumnIndex.put(fieldName, cells.size());
63 | cells.add(value);
64 | }
65 |
66 | public void add(String fieldName, int value) {
67 | add(fieldName, Integer.toString(value));
68 | }
69 |
70 | public void add(String fieldName, boolean value) {
71 | add(fieldName, Boolean.toString(value));
72 | }
73 |
74 | public void add(String fieldName, double value) {
75 | add(fieldName, Double.toString(value));
76 | }
77 |
78 | public void add(String fieldName, long value) {
79 | add(fieldName, Long.toString(value));
80 | }
81 |
82 | public void set(String fieldName, String value) {
83 | cells.set(fieldName2ColumnIndex.get(fieldName), value);
84 | }
85 |
86 | public void set(String fieldName, int value) {
87 | set(fieldName, Integer.toString(value));
88 | }
89 |
90 | public void set(String fieldName, long value) {
91 | set(fieldName, Long.toString(value));
92 | }
93 |
94 | public void set(String fieldName, double value) {
95 | set(fieldName, Double.toString(value));
96 | }
97 |
98 | public List getCells() {
99 | return cells;
100 | }
101 |
102 | protected Map getfieldName2ColumnIndex() {
103 | return fieldName2ColumnIndex;
104 | }
105 |
106 | public String toString() {
107 | List data = new ArrayList(cells);
108 | for (String fieldName : fieldName2ColumnIndex.keySet()) {
109 | int index = fieldName2ColumnIndex.get(fieldName);
110 | if (data.size() > index)
111 | data.set(index, "[" + fieldName + ": " + data.get(index) + "]");
112 | }
113 | return StringUtilities.join(data, ",");
114 | }
115 |
116 | public void remove(String field) {
117 | Integer index = fieldName2ColumnIndex.remove(field);
118 | cells.remove((int)index);
119 | Map tempMap = new HashMap();
120 | for (Map.Entry entry : fieldName2ColumnIndex.entrySet())
121 | if (entry.getValue() > index)
122 | tempMap.put(entry.getKey(), entry.getValue() - 1);
123 | else
124 | tempMap.put(entry.getKey(), entry.getValue());
125 | fieldName2ColumnIndex = tempMap;
126 | }
127 |
128 | public int size() {
129 | return cells.size();
130 | }
131 |
132 | public void upperCaseFieldNames() {
133 | Map tempMap = new HashMap();
134 | for (Map.Entry entry : fieldName2ColumnIndex.entrySet())
135 | tempMap.put(entry.getKey().toUpperCase(), entry.getValue());
136 | fieldName2ColumnIndex = tempMap;
137 | }
138 | }
139 |
--------------------------------------------------------------------------------
/java/org/ohdsi/featureExtraction/StringUtilities.java:
--------------------------------------------------------------------------------
1 | package org.ohdsi.featureExtraction;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Collection;
5 | import java.util.Iterator;
6 | import java.util.List;
7 |
8 | public class StringUtilities {
9 |
10 | // private static String[] UPPER_CASE_LETTERS = new String[]{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S',
11 | // 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'};
12 |
13 | public static String join(Collection> s, String delimiter) {
14 | StringBuffer buffer = new StringBuffer();
15 | Iterator> iter = s.iterator();
16 | if (iter.hasNext()) {
17 | buffer.append(iter.next().toString());
18 | }
19 | while (iter.hasNext()) {
20 | buffer.append(delimiter);
21 | buffer.append(iter.next().toString());
22 | }
23 | return buffer.toString();
24 | }
25 |
26 | public static String join(Object[] objects, String delimiter) {
27 | StringBuffer buffer = new StringBuffer();
28 | if (objects.length != 0)
29 | buffer.append(objects[0].toString());
30 | for (int i = 1; i < objects.length; i++) {
31 | buffer.append(delimiter);
32 | buffer.append(objects[i].toString());
33 | }
34 | return buffer.toString();
35 | }
36 |
37 | public static List safeSplit(String string, char delimiter) {
38 | List result = new ArrayList();
39 | if (string.length() == 0) {
40 | result.add("");
41 | return result;
42 | }
43 | boolean literal = false;
44 | boolean escape = false;
45 | int startpos = 0;
46 | int i = 0;
47 | char currentchar;
48 | while (i < string.length()) {
49 | currentchar = string.charAt(i);
50 | if (currentchar == '"' && !escape) {
51 | literal = !literal;
52 | }
53 | if (!literal && (currentchar == delimiter && !escape)) {
54 | result.add(string.substring(startpos, i));
55 | startpos = i + 1;
56 | }
57 | if (currentchar == '\\') {
58 | escape = !escape;
59 | } else {
60 | escape = false;
61 | }
62 | i++;
63 | }
64 | result.add(string.substring(startpos, i));
65 | return result;
66 | }
67 |
68 | public static String camelCaseToSnakeCase(String string) {
69 | StringBuilder result = new StringBuilder();
70 | int start = 0;
71 | for (int i = 0; i < string.length(); i++) {
72 | int charInt = (int) string.charAt(i);
73 | if (charInt < 91 && charInt > 64) {
74 | result.append(string.substring(start, i) + "_" + string.substring(i, i + 1).toLowerCase());
75 | start = i + 1;
76 | }
77 | }
78 | if (start < string.length())
79 | result.append(string.substring(start));
80 | return result.toString();
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/man-roxygen/GetCovarParams.R:
--------------------------------------------------------------------------------
1 | #' @details
2 | #' This function uses the data in the CDM to construct a large set of covariates for the provided
3 | #' cohort. The cohort is assumed to be in an existing temp table with these fields: 'subject_id',
4 | #' 'cohort_definition_id', 'cohort_start_date'. Optionally, an extra field can be added containing the
5 | #' unique identifier that will be used as rowID in the output. Typically, users don't call this
6 | #' function directly but rather use the \code{\link{getDbCovariateData}} function instead.
7 | #'
8 | #' @param connection A connection to the server containing the schema as created using the
9 | #' \code{connect} function in the \code{DatabaseConnector} package.
10 | #' @param oracleTempSchema DEPRECATED: use \code{tempEmulationSchema} instead.
11 | #' @param tempEmulationSchema Some database platforms like Oracle and Impala do not truly support
12 | #' temp tables. To emulate temp tables, provide a schema with write
13 | #' privileges where temp tables can be created.
14 | #' @param cdmDatabaseSchema The name of the database schema that contains the OMOP CDM instance.
15 | #' Requires read permissions to this database. On SQL Server, this should
16 | #' specifiy both the database and the schema, so for example
17 | #' 'cdm_instance.dbo'.
18 | #' @param cohortTable Name of the table holding the cohort for which we want to construct
19 | #' covariates. If it is a temp table, the name should have a hash prefix,
20 | #' e.g. '#temp_table'. If it is a non-temp table, it should include the
21 | #' database schema, e.g. 'cdm_database.cohort'.
22 | #' @param cohortId DEPRECATED:For which cohort ID should covariates be constructed? If set to -1,
23 | #' covariates will be constructed for all cohorts in the specified cohort
24 | #' table.
25 | #' @param cohortIds For which cohort ID(s) should covariates be constructed? If set to c(-1),
26 | #' covariates will be constructed for all cohorts in the specified cohort
27 | #' table.
28 | #' @param cdmVersion The version of the Common Data Model used. Currently only
29 | #' \code{cdmVersion = "5"} is supported.
30 | #' @param rowIdField The name of the field in the cohort temp table that is to be used as the
31 | #' row_id field in the output table. This can be especially usefull if there
32 | #' is more than one period per person.
33 | #' @param aggregated Should aggregate statistics be computed instead of covariates per
34 | #' cohort entry?
35 | #'
36 | #' @return
37 | #' Returns an object of type \code{CovariateData}, which is an Andromeda object containing information on the baseline covariates.
38 | #' Information about multiple outcomes can be captured at once for efficiency reasons. This object is
39 | #' a list with the following components: \describe{ \item{covariates}{An ffdf object listing the
40 | #' baseline covariates per person in the cohorts. This is done using a sparse representation:
41 | #' covariates with a value of 0 are omitted to save space. The covariates object will have three
42 | #' columns: rowId, covariateId, and covariateValue. The rowId is usually equal to the person_id,
43 | #' unless specified otherwise in the rowIdField argument.} \item{covariateRef}{A table
44 | #' describing the covariates that have been extracted.} }. The CovariateData object will also have a \code{metaData} attribute, a list of objects with
45 | #' information on how the covariateData object was constructed.
46 |
--------------------------------------------------------------------------------
/man/CovariateData-class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CovariateData.R
3 | \docType{class}
4 | \name{CovariateData-class}
5 | \alias{CovariateData-class}
6 | \alias{CovariateData}
7 | \alias{show,CovariateData-method}
8 | \alias{summary,CovariateData-method}
9 | \title{Covariate Data}
10 | \usage{
11 | \S4method{show}{CovariateData}(object)
12 |
13 | \S4method{summary}{CovariateData}(object)
14 | }
15 | \arguments{
16 | \item{object}{An object of class `CovariateData`.}
17 | }
18 | \description{
19 | \code{CovariateData} is an S4 class that inherits from \code{\link[Andromeda]{Andromeda}}. It contains
20 | information on covariates, which can be either captured on a per-person basis, or aggregated across
21 | the cohort(s).
22 |
23 | By default covariates refer to a specific time period, with for example different covariate IDs for
24 | whether a diagnosis code was observed in the year before and month before index date. However, a
25 | \code{CovariateData} can also be temporal, meaning that next to a covariate ID there is also a time ID,
26 | which identifies the (user specified) time window the covariate was captured.
27 |
28 | A \code{CovariateData} object is typically created using \code{\link{getDbCovariateData}}, can only be saved using
29 | \code{\link{saveCovariateData}}, and loaded using \code{\link{loadCovariateData}}.
30 | }
31 | \seealso{
32 | \code{\link{isCovariateData}}, \code{\link{isAggregatedCovariateData}}, \code{\link{isTemporalCovariateData}}
33 | }
34 |
--------------------------------------------------------------------------------
/man/FeatureExtraction-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/FeatureExtraction.R
3 | \docType{package}
4 | \name{FeatureExtraction-package}
5 | \alias{FeatureExtraction}
6 | \alias{FeatureExtraction-package}
7 | \title{FeatureExtraction: Generating Features for a Cohort}
8 | \description{
9 | An R interface for generating features for a cohort using data in the Common Data Model. Features can be constructed using default or custom made feature definitions. Furthermore it's possible to aggregate features and get the summary statistics.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 | \item \url{https://github.com/OHDSI/FeatureExtraction}
15 | \item Report bugs at \url{https://github.com/OHDSI/FeatureExtraction/issues}
16 | }
17 |
18 | }
19 | \author{
20 | \strong{Maintainer}: Ger Inberg \email{g.inberg@erasmusmc.nl}
21 |
22 | Authors:
23 | \itemize{
24 | \item Martijn Schuemie \email{schuemie@ohdsi.org}
25 | \item Marc Suchard
26 | \item Patrick Ryan
27 | \item Jenna Reps
28 | \item Anthony Sena \email{sena@ohdsi.org}
29 | }
30 |
31 | Other contributors:
32 | \itemize{
33 | \item Observational Health Data Science and Informatics [copyright holder]
34 | }
35 |
36 | }
37 | \keyword{internal}
38 |
--------------------------------------------------------------------------------
/man/aggregateCovariates.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/Aggregation.R
3 | \name{aggregateCovariates}
4 | \alias{aggregateCovariates}
5 | \title{Aggregate covariate data}
6 | \usage{
7 | aggregateCovariates(covariateData)
8 | }
9 | \arguments{
10 | \item{covariateData}{An object of type \code{covariateData} as generated using
11 | \code{getDbCovariateData}.}
12 | }
13 | \value{
14 | An object of class \code{covariateData}.
15 | }
16 | \description{
17 | Aggregate covariate data
18 | }
19 | \examples{
20 | \donttest{
21 | covariateData <- FeatureExtraction::createEmptyCovariateData(
22 | cohortIds = 1,
23 | aggregated = FALSE,
24 | temporal = FALSE
25 | )
26 | aggregatedCovariateData <- aggregateCovariates(covariateData)
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/man/computeStandardizedDifference.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CompareCohorts.R
3 | \name{computeStandardizedDifference}
4 | \alias{computeStandardizedDifference}
5 | \title{Compute standardized difference of mean for all covariates.}
6 | \usage{
7 | computeStandardizedDifference(
8 | covariateData1,
9 | covariateData2,
10 | cohortId1 = NULL,
11 | cohortId2 = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{covariateData1}{The covariate data of the first cohort. Needs to be in aggregated format.}
16 |
17 | \item{covariateData2}{The covariate data of the second cohort. Needs to be in aggregated format.}
18 |
19 | \item{cohortId1}{If provided, \code{covariateData1} will be restricted to this cohort. If not
20 | provided, \code{covariateData1} is assumed to contain data on only 1 cohort.}
21 |
22 | \item{cohortId2}{If provided, \code{covariateData2} will be restricted to this cohort. If not
23 | provided, \code{covariateData2} is assumed to contain data on only 1 cohort.}
24 | }
25 | \value{
26 | A data frame with means and standard deviations per cohort as well as the standardized difference
27 | of mean.
28 | }
29 | \description{
30 | Computes the standardized difference for all covariates between two cohorts. The standardized
31 | difference is defined as the difference between the mean divided by the overall standard deviation.
32 | }
33 | \examples{
34 | \donttest{
35 | binaryCovDataFile <- system.file("testdata/binaryCovariateData.zip",
36 | package = "FeatureExtraction"
37 | )
38 | covariateData1 <- loadCovariateData(binaryCovDataFile)
39 | covariateData2 <- loadCovariateData(binaryCovDataFile)
40 | covDataDiff <- computeStandardizedDifference(
41 | covariateData1,
42 | covariateData2,
43 | cohortId1 = 1,
44 | cohortId2 = 2
45 | )
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/man/convertPrespecSettingsToDetailedSettings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/DetailedCovariateSettings.R
3 | \name{convertPrespecSettingsToDetailedSettings}
4 | \alias{convertPrespecSettingsToDetailedSettings}
5 | \title{Convert prespecified covariate settings into detailed covariate settings}
6 | \usage{
7 | convertPrespecSettingsToDetailedSettings(covariateSettings)
8 | }
9 | \arguments{
10 | \item{covariateSettings}{An object of type \code{covariateSettings} as created for example by the
11 | \code{\link{createCovariateSettings}} function.}
12 | }
13 | \value{
14 | An object of type \code{covariateSettings}, to be used in other functions.
15 | }
16 | \description{
17 | Convert prespecified covariate settings into detailed covariate settings
18 | }
19 | \details{
20 | For advanced users only.
21 | }
22 | \examples{
23 | \donttest{
24 | covSettings <- createDefaultCovariateSettings()
25 | detailedSettings <- convertPrespecSettingsToDetailedSettings(covariateSettings = covSettings)
26 | }
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/man/createAnalysisDetails.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/DetailedCovariateSettings.R
3 | \name{createAnalysisDetails}
4 | \alias{createAnalysisDetails}
5 | \title{Create detailed covariate settings}
6 | \usage{
7 | createAnalysisDetails(
8 | analysisId,
9 | sqlFileName,
10 | parameters,
11 | includedCovariateConceptIds = c(),
12 | addDescendantsToInclude = FALSE,
13 | excludedCovariateConceptIds = c(),
14 | addDescendantsToExclude = FALSE,
15 | includedCovariateIds = c()
16 | )
17 | }
18 | \arguments{
19 | \item{analysisId}{An integer between 0 and 999 that uniquely identifies this
20 | analysis.}
21 |
22 | \item{sqlFileName}{The name of the parameterized SQL file embedded in the
23 | \code{featureExtraction} package.}
24 |
25 | \item{parameters}{The list of parameter values used to render the template SQL.}
26 |
27 | \item{includedCovariateConceptIds}{A list of concept IDs that should be used to construct
28 | covariates.}
29 |
30 | \item{addDescendantsToInclude}{Should descendant concept IDs be added to the list of concepts
31 | to include?}
32 |
33 | \item{excludedCovariateConceptIds}{A list of concept IDs that should NOT be used to construct
34 | covariates.}
35 |
36 | \item{addDescendantsToExclude}{Should descendant concept IDs be added to the list of concepts
37 | to exclude?}
38 |
39 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.}
40 | }
41 | \value{
42 | An object of type \code{analysisDetail}, to be used in
43 | \code{\link{createDetailedCovariateSettings}} or
44 | \code{\link{createDetailedTemporalCovariateSettings}}.
45 | }
46 | \description{
47 | Create detailed covariate settings
48 | }
49 | \details{
50 | creates an object specifying in detail how covariates should be constructed from data in the CDM
51 | model. Warning: this function is for advanced users only.
52 | }
53 | \examples{
54 | analysisDetails <- createAnalysisDetails(
55 | analysisId = 1,
56 | sqlFileName = "DemographicsGender.sql",
57 | parameters = list(
58 | analysisId = 1,
59 | analysisName = "Gender",
60 | domainId = "Demographics"
61 | ),
62 | includedCovariateConceptIds = c(),
63 | addDescendantsToInclude = FALSE,
64 | excludedCovariateConceptIds = c(),
65 | addDescendantsToExclude = FALSE,
66 | includedCovariateIds = c()
67 | )
68 |
69 | }
70 |
--------------------------------------------------------------------------------
/man/createCohortAttrCovariateSettings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/GetCovariatesFromCohortAttributes.R
3 | \name{createCohortAttrCovariateSettings}
4 | \alias{createCohortAttrCovariateSettings}
5 | \title{Create cohort attribute covariate settings}
6 | \usage{
7 | createCohortAttrCovariateSettings(
8 | analysisId = -1,
9 | attrDatabaseSchema,
10 | attrDefinitionTable = "attribute_definition",
11 | cohortAttrTable = "cohort_attribute",
12 | includeAttrIds = c(),
13 | isBinary = FALSE,
14 | missingMeansZero = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{analysisId}{A unique identifier for this analysis.}
19 |
20 | \item{attrDatabaseSchema}{The database schema where the attribute definition and cohort attribute
21 | table can be found.}
22 |
23 | \item{attrDefinitionTable}{The name of the attribute definition table.}
24 |
25 | \item{cohortAttrTable}{The name of the cohort attribute table.}
26 |
27 | \item{includeAttrIds}{(optional) A list of attribute definition IDs to restrict to.}
28 |
29 | \item{isBinary}{Needed for aggregation: Are these binary variables? Binary
30 | variables should only have the values 0 or 1.}
31 |
32 | \item{missingMeansZero}{Needed for aggregation: For continuous values, should missing
33 | values be interpreted as 0?}
34 | }
35 | \value{
36 | An object of type \code{covariateSettings}, to be used in other functions.
37 | }
38 | \description{
39 | Create cohort attribute covariate settings
40 | }
41 | \details{
42 | Creates an object specifying where the cohort attributes can be found to construct covariates. The
43 | attributes should be defined in a table with the same structure as the attribute_definition table
44 | in the Common Data Model. It should at least have these columns: \describe{
45 | \item{attribute_definition_id}{A unique identifier of type integer.} \item{attribute_name}{A short
46 | description of the attribute.} } The cohort attributes themselves should be stored in a table with
47 | the same format as the cohort_attribute table in the Common Data Model. It should at least have
48 | these columns: \describe{ \item{cohort_definition_id}{A key to link to the cohort table.}
49 | \item{subject_id}{A key to link to the cohort table.} \item{cohort_start_date}{A key to link to the
50 | cohort table.} \item{attribute_definition_id}{An foreign key linking to the attribute definition
51 | table.} \item{value_as_number}{A real number.} }
52 | }
53 | \examples{
54 | \donttest{
55 | covariateSettings <- createCohortAttrCovariateSettings(
56 | analysisId = 1,
57 | attrDatabaseSchema = "main",
58 | attrDefinitionTable = "attribute_definition",
59 | cohortAttrTable = "cohort_attribute",
60 | includeAttrIds = c(1),
61 | isBinary = FALSE,
62 | missingMeansZero = FALSE
63 | )
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/man/createCohortBasedCovariateSettings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/GetCovariatesFromOtherCohorts.R
3 | \name{createCohortBasedCovariateSettings}
4 | \alias{createCohortBasedCovariateSettings}
5 | \title{Create settings for covariates based on other cohorts}
6 | \usage{
7 | createCohortBasedCovariateSettings(
8 | analysisId,
9 | covariateCohortDatabaseSchema = NULL,
10 | covariateCohortTable = NULL,
11 | covariateCohorts,
12 | valueType = "binary",
13 | startDay = -365,
14 | endDay = 0,
15 | includedCovariateIds = c(),
16 | warnOnAnalysisIdOverlap = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{analysisId}{A unique identifier for this analysis.}
21 |
22 | \item{covariateCohortDatabaseSchema}{The database schema where the cohorts used to define the covariates
23 | can be found. If set to \code{NULL}, the database schema will be
24 | guessed, for example using the same one as for the main cohorts.}
25 |
26 | \item{covariateCohortTable}{The table where the cohorts used to define the covariates
27 | can be found. If set to \code{NULL}, the table will be
28 | guessed, for example using the same one as for the main cohorts.}
29 |
30 | \item{covariateCohorts}{A data frame with at least two columns: 'cohortId' and 'cohortName'. The
31 | cohort ID should correspond to the \code{cohort_definition_id} of the cohort
32 | to use for creating a covariate.}
33 |
34 | \item{valueType}{Either 'binary' or 'count'. When \code{valueType = 'count'}, the covariate
35 | value will be the number of times the cohort was observed in the window.}
36 |
37 | \item{startDay}{What is the start day (relative to the index date) of the covariate window?}
38 |
39 | \item{endDay}{What is the end day (relative to the index date) of the covariate window?}
40 |
41 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.}
42 |
43 | \item{warnOnAnalysisIdOverlap}{Warn if the provided `analysisId` overlaps with any predefined analysis as
44 | available in the `createCovariateSettings()` function.}
45 | }
46 | \value{
47 | An object of type \code{covariateSettings}, to be used in other functions.
48 | }
49 | \description{
50 | Create settings for covariates based on other cohorts
51 | }
52 | \details{
53 | Creates an object specifying covariates to be constructed based on the presence of other cohorts.
54 | }
55 |
--------------------------------------------------------------------------------
/man/createCohortBasedTemporalCovariateSettings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/GetCovariatesFromOtherCohorts.R
3 | \name{createCohortBasedTemporalCovariateSettings}
4 | \alias{createCohortBasedTemporalCovariateSettings}
5 | \title{Create settings for temporal covariates based on other cohorts}
6 | \usage{
7 | createCohortBasedTemporalCovariateSettings(
8 | analysisId,
9 | covariateCohortDatabaseSchema = NULL,
10 | covariateCohortTable = NULL,
11 | covariateCohorts,
12 | valueType = "binary",
13 | temporalStartDays = -365:-1,
14 | temporalEndDays = -365:-1,
15 | includedCovariateIds = c(),
16 | warnOnAnalysisIdOverlap = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{analysisId}{A unique identifier for this analysis.}
21 |
22 | \item{covariateCohortDatabaseSchema}{The database schema where the cohorts used to define the covariates
23 | can be found. If set to \code{NULL}, the database schema will be
24 | guessed, for example using the same one as for the main cohorts.}
25 |
26 | \item{covariateCohortTable}{The table where the cohorts used to define the covariates
27 | can be found. If set to \code{NULL}, the table will be
28 | guessed, for example using the same one as for the main cohorts.}
29 |
30 | \item{covariateCohorts}{A data frame with at least two columns: 'cohortId' and 'cohortName'. The
31 | cohort ID should correspond to the \code{cohort_definition_id} of the cohort
32 | to use for creating a covariate.}
33 |
34 | \item{valueType}{Either 'binary' or 'count'. When \code{valueType = 'count'}, the covariate
35 | value will be the number of times the cohort was observed in the window.}
36 |
37 | \item{temporalStartDays}{A list of integers representing the start of a time
38 | period, relative to the index date. 0 indicates the
39 | index date, -1 indicates the day before the index
40 | date, etc. The start day is included in the time
41 | period.}
42 |
43 | \item{temporalEndDays}{A list of integers representing the end of a time
44 | period, relative to the index date. 0 indicates the
45 | index date, -1 indicates the day before the index
46 | date, etc. The end day is included in the time
47 | period.}
48 |
49 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.}
50 |
51 | \item{warnOnAnalysisIdOverlap}{Warn if the provided `analysisId` overlaps with any predefined analysis as
52 | available in the `createTemporalCovariateSettings()` function.}
53 | }
54 | \value{
55 | An object of type \code{covariateSettings}, to be used in other functions.
56 | }
57 | \description{
58 | Create settings for temporal covariates based on other cohorts
59 | }
60 | \details{
61 | Creates an object specifying temporal covariates to be constructed based on the presence of other cohorts.
62 | }
63 |
--------------------------------------------------------------------------------
/man/createDefaultCovariateSettings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/DetailedCovariateSettings.R
3 | \name{createDefaultCovariateSettings}
4 | \alias{createDefaultCovariateSettings}
5 | \title{Create default covariate settings}
6 | \usage{
7 | createDefaultCovariateSettings(
8 | includedCovariateConceptIds = c(),
9 | addDescendantsToInclude = FALSE,
10 | excludedCovariateConceptIds = c(),
11 | addDescendantsToExclude = FALSE,
12 | includedCovariateIds = c()
13 | )
14 | }
15 | \arguments{
16 | \item{includedCovariateConceptIds}{A list of concept IDs that should be used to construct
17 | covariates.}
18 |
19 | \item{addDescendantsToInclude}{Should descendant concept IDs be added to the list of concepts
20 | to include?}
21 |
22 | \item{excludedCovariateConceptIds}{A list of concept IDs that should NOT be used to construct
23 | covariates.}
24 |
25 | \item{addDescendantsToExclude}{Should descendant concept IDs be added to the list of concepts
26 | to exclude?}
27 |
28 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.}
29 | }
30 | \value{
31 | An object of type \code{covariateSettings}, to be used in other functions.
32 | }
33 | \description{
34 | Create default covariate settings
35 | }
36 | \examples{
37 | \donttest{
38 | covSettings <- createDefaultCovariateSettings(
39 | includedCovariateConceptIds = c(1),
40 | addDescendantsToInclude = FALSE,
41 | excludedCovariateConceptIds = c(2),
42 | addDescendantsToExclude = FALSE,
43 | includedCovariateIds = c(1)
44 | )
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/man/createDefaultTemporalCovariateSettings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/DetailedCovariateSettings.R
3 | \name{createDefaultTemporalCovariateSettings}
4 | \alias{createDefaultTemporalCovariateSettings}
5 | \title{Create default covariate settings}
6 | \usage{
7 | createDefaultTemporalCovariateSettings(
8 | includedCovariateConceptIds = c(),
9 | addDescendantsToInclude = FALSE,
10 | excludedCovariateConceptIds = c(),
11 | addDescendantsToExclude = FALSE,
12 | includedCovariateIds = c()
13 | )
14 | }
15 | \arguments{
16 | \item{includedCovariateConceptIds}{A list of concept IDs that should be used to construct
17 | covariates.}
18 |
19 | \item{addDescendantsToInclude}{Should descendant concept IDs be added to the list of concepts
20 | to include?}
21 |
22 | \item{excludedCovariateConceptIds}{A list of concept IDs that should NOT be used to construct
23 | covariates.}
24 |
25 | \item{addDescendantsToExclude}{Should descendant concept IDs be added to the list of concepts
26 | to exclude?}
27 |
28 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.}
29 | }
30 | \value{
31 | An object of type \code{covariateSettings}, to be used in other functions.
32 | }
33 | \description{
34 | Create default covariate settings
35 | }
36 | \examples{
37 | \donttest{
38 | covSettings <- createDefaultTemporalCovariateSettings(
39 | includedCovariateConceptIds = c(1),
40 | addDescendantsToInclude = FALSE,
41 | excludedCovariateConceptIds = c(2),
42 | addDescendantsToExclude = FALSE,
43 | includedCovariateIds = c(1)
44 | )
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/man/createDetailedCovariateSettings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/DetailedCovariateSettings.R
3 | \name{createDetailedCovariateSettings}
4 | \alias{createDetailedCovariateSettings}
5 | \title{Create detailed covariate settings}
6 | \usage{
7 | createDetailedCovariateSettings(analyses = list())
8 | }
9 | \arguments{
10 | \item{analyses}{A list of \code{analysisDetail} objects as created using
11 | \code{\link{createAnalysisDetails}}.}
12 | }
13 | \value{
14 | An object of type \code{covariateSettings}, to be used in other functions.
15 | }
16 | \description{
17 | Create detailed covariate settings
18 | }
19 | \details{
20 | creates an object specifying in detail how covariates should be constructed from data in the CDM
21 | model. Warning: this function is for advanced users only.
22 | }
23 | \examples{
24 | \donttest{
25 | analysisDetails <- createAnalysisDetails(
26 | analysisId = 1,
27 | sqlFileName = "DemographicsGender.sql",
28 | parameters = list(
29 | analysisId = 1,
30 | analysisName = "Gender",
31 | domainId = "Demographics"
32 | ),
33 | includedCovariateConceptIds = c(),
34 | addDescendantsToInclude = FALSE,
35 | excludedCovariateConceptIds = c(),
36 | addDescendantsToExclude = FALSE,
37 | includedCovariateIds = c()
38 | )
39 | covSettings <- createDetailedCovariateSettings(analyses = analysisDetails)
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/man/createDetailedTemporalCovariateSettings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/DetailedCovariateSettings.R
3 | \name{createDetailedTemporalCovariateSettings}
4 | \alias{createDetailedTemporalCovariateSettings}
5 | \title{Create detailed temporal covariate settings}
6 | \usage{
7 | createDetailedTemporalCovariateSettings(
8 | analyses = list(),
9 | temporalStartDays = -365:-1,
10 | temporalEndDays = -365:-1
11 | )
12 | }
13 | \arguments{
14 | \item{analyses}{A list of analysis detail objects as created using
15 | \code{\link{createAnalysisDetails}}.}
16 |
17 | \item{temporalStartDays}{A list of integers representing the start of a time period, relative to
18 | the index date. 0 indicates the index date, -1 indicates the day before
19 | the index date, etc. The start day is included in the time period.}
20 |
21 | \item{temporalEndDays}{A list of integers representing the end of a time period, relative to the
22 | index date. 0 indicates the index date, -1 indicates the day before the
23 | index date, etc. The end day is included in the time period.}
24 | }
25 | \value{
26 | An object of type \code{covariateSettings}, to be used in other functions.
27 | }
28 | \description{
29 | Create detailed temporal covariate settings
30 | }
31 | \details{
32 | creates an object specifying in detail how temporal covariates should be constructed from data in
33 | the CDM model. Warning: this function is for advanced users only.
34 | }
35 | \examples{
36 | \donttest{
37 | analysisDetails <- createAnalysisDetails(
38 | analysisId = 1,
39 | sqlFileName = "DemographicsGender.sql",
40 | parameters = list(
41 | analysisId = 1,
42 | analysisName = "Gender",
43 | domainId = "Demographics"
44 | ),
45 | includedCovariateConceptIds = c(),
46 | addDescendantsToInclude = FALSE,
47 | excludedCovariateConceptIds = c(),
48 | addDescendantsToExclude = FALSE,
49 | includedCovariateIds = c()
50 | )
51 | covSettings <- createDetailedTemporalCovariateSettings(
52 | analyses = analysisDetails,
53 | temporalStartDays = -365:-1,
54 | temporalEndDays = -365:-1
55 | )
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/man/createEmptyCovariateData.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CovariateData.R
3 | \name{createEmptyCovariateData}
4 | \alias{createEmptyCovariateData}
5 | \title{Creates an empty covariate data object}
6 | \usage{
7 | createEmptyCovariateData(cohortIds, aggregated, temporal)
8 | }
9 | \arguments{
10 | \item{cohortIds}{For which cohort IDs should the covariate data be created?}
11 |
12 | \item{aggregated}{if the data should be aggregated}
13 |
14 | \item{temporal}{if the data is temporary}
15 | }
16 | \value{
17 | an empty object of class \code{CovariateData}
18 | }
19 | \description{
20 | Creates an empty covariate data object
21 | }
22 | \examples{
23 | \donttest{
24 | covariateData <- FeatureExtraction::createEmptyCovariateData(
25 | cohortIds = 1,
26 | aggregated = FALSE,
27 | temporal = FALSE
28 | )
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/man/createTable1.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/Table1.R
3 | \name{createTable1}
4 | \alias{createTable1}
5 | \title{Create a table 1}
6 | \usage{
7 | createTable1(
8 | covariateData1,
9 | covariateData2 = NULL,
10 | cohortId1 = NULL,
11 | cohortId2 = NULL,
12 | specifications = getDefaultTable1Specifications(),
13 | output = "two columns",
14 | showCounts = FALSE,
15 | showPercent = TRUE,
16 | percentDigits = 1,
17 | valueDigits = 1,
18 | stdDiffDigits = 2
19 | )
20 | }
21 | \arguments{
22 | \item{covariateData1}{The covariate data of the cohort to be included in the table.}
23 |
24 | \item{covariateData2}{The covariate data of the cohort to also be included, when comparing two
25 | cohorts.}
26 |
27 | \item{cohortId1}{If provided, \code{covariateData1} will be restricted to this cohort. If not
28 | provided, \code{covariateData1} is assumed to contain data on only 1 cohort.}
29 |
30 | \item{cohortId2}{If provided, \code{covariateData2} will be restricted to this cohort. If not
31 | provided, \code{covariateData2} is assumed to contain data on only 1 cohort.}
32 |
33 | \item{specifications}{Specifications of which covariates to display, and how.}
34 |
35 | \item{output}{The output format for the table. Options are \code{output = "two columns"},
36 | \code{output = "one column"}, or \code{output = "list"}.}
37 |
38 | \item{showCounts}{Show the number of cohort entries having the binary covariate?}
39 |
40 | \item{showPercent}{Show the percentage of cohort entries having the binary covariate?}
41 |
42 | \item{percentDigits}{Number of digits to be used for percentages.}
43 |
44 | \item{valueDigits}{Number of digits to be used for the values of continuous variables.}
45 |
46 | \item{stdDiffDigits}{Number of digits to be used for the standardized differences.}
47 | }
48 | \value{
49 | A data frame, or, when \code{output = "list"} a list of two data frames.
50 | }
51 | \description{
52 | Creates a formatted table of cohort characteristics, to be included in publications or reports.
53 | Allows for creating a table describing a single cohort, or a table comparing two cohorts.
54 | }
55 | \examples{
56 | \donttest{
57 | eunomiaConnectionDetails <- Eunomia::getEunomiaConnectionDetails()
58 | covSettings <- createDefaultCovariateSettings()
59 | Eunomia::createCohorts(
60 | connectionDetails = eunomiaConnectionDetails,
61 | cdmDatabaseSchema = "main",
62 | cohortDatabaseSchema = "main",
63 | cohortTable = "cohort"
64 | )
65 | covData1 <- getDbCovariateData(
66 | connectionDetails = eunomiaConnectionDetails,
67 | tempEmulationSchema = NULL,
68 | cdmDatabaseSchema = "main",
69 | cdmVersion = "5",
70 | cohortTable = "cohort",
71 | cohortDatabaseSchema = "main",
72 | cohortTableIsTemp = FALSE,
73 | cohortId = 1,
74 | rowIdField = "subject_id",
75 | covariateSettings = covSettings,
76 | aggregated = TRUE
77 | )
78 | covData2 <- getDbCovariateData(
79 | connectionDetails = eunomiaConnectionDetails,
80 | tempEmulationSchema = NULL,
81 | cdmDatabaseSchema = "main",
82 | cdmVersion = "5",
83 | cohortTable = "cohort",
84 | cohortDatabaseSchema = "main",
85 | cohortTableIsTemp = FALSE,
86 | cohortId = 2,
87 | rowIdField = "subject_id",
88 | covariateSettings = covSettings,
89 | aggregated = TRUE
90 | )
91 | table1 <- createTable1(
92 | covariateData1 = covData1,
93 | covariateData2 = covData2,
94 | cohortId1 = 1,
95 | cohortId2 = 2,
96 | specifications = getDefaultTable1Specifications(),
97 | output = "one column",
98 | showCounts = FALSE,
99 | showPercent = TRUE,
100 | percentDigits = 1,
101 | valueDigits = 1,
102 | stdDiffDigits = 2
103 | )
104 | }
105 |
106 | }
107 |
--------------------------------------------------------------------------------
/man/createTable1CovariateSettings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/Table1.R
3 | \name{createTable1CovariateSettings}
4 | \alias{createTable1CovariateSettings}
5 | \title{Create covariate settings for a table 1}
6 | \usage{
7 | createTable1CovariateSettings(
8 | specifications = getDefaultTable1Specifications(),
9 | covariateSettings = createDefaultCovariateSettings(),
10 | includedCovariateConceptIds = c(),
11 | addDescendantsToInclude = FALSE,
12 | excludedCovariateConceptIds = c(),
13 | addDescendantsToExclude = FALSE,
14 | includedCovariateIds = c()
15 | )
16 | }
17 | \arguments{
18 | \item{specifications}{A specifications object for generating a table using the
19 | \code{\link{createTable1}} function.}
20 |
21 | \item{covariateSettings}{The covariate settings object to use as the basis for the
22 | filtered covariate settings.}
23 |
24 | \item{includedCovariateConceptIds}{A list of concept IDs that should be used to construct
25 | covariates.}
26 |
27 | \item{addDescendantsToInclude}{Should descendant concept IDs be added to the list of concepts
28 | to include?}
29 |
30 | \item{excludedCovariateConceptIds}{A list of concept IDs that should NOT be used to construct
31 | covariates.}
32 |
33 | \item{addDescendantsToExclude}{Should descendant concept IDs be added to the list of concepts
34 | to exclude?}
35 |
36 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.}
37 | }
38 | \value{
39 | A covariate settings object, for example to be used when calling the
40 | \code{\link{getDbCovariateData}} function.
41 | }
42 | \description{
43 | Creates a covariate settings object for generating only those covariates that will be included in a
44 | table 1. This function works by filtering the \code{covariateSettings} object for the covariates in
45 | the \code{specifications} object.
46 | }
47 | \examples{
48 | \donttest{
49 | table1CovSettings <- createTable1CovariateSettings(
50 | specifications = getDefaultTable1Specifications(),
51 | covariateSettings = createDefaultCovariateSettings(),
52 | includedCovariateConceptIds = c(),
53 | addDescendantsToInclude = FALSE,
54 | excludedCovariateConceptIds = c(),
55 | addDescendantsToExclude = FALSE,
56 | includedCovariateIds = c()
57 | )
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/man/dot-createLooCovariateSettings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/UnitTestHelperFunctions.R
3 | \name{.createLooCovariateSettings}
4 | \alias{.createLooCovariateSettings}
5 | \title{Get covariate settings}
6 | \usage{
7 | .createLooCovariateSettings(useLengthOfObs = TRUE)
8 | }
9 | \arguments{
10 | \item{useLengthOfObs}{if length of observations should be used}
11 | }
12 | \value{
13 | Returns an object of type \code{covariateSettings}, containing settings for the covariates.
14 | }
15 | \description{
16 | Get covariate settings
17 | }
18 |
--------------------------------------------------------------------------------
/man/dot-getDbLooCovariateData.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/UnitTestHelperFunctions.R
3 | \name{.getDbLooCovariateData}
4 | \alias{.getDbLooCovariateData}
5 | \title{Get covariate information from the database}
6 | \usage{
7 | .getDbLooCovariateData(
8 | connection,
9 | tempEmulationSchema = NULL,
10 | cdmDatabaseSchema,
11 | cohortTable = "#cohort_person",
12 | cohortIds = c(-1),
13 | cdmVersion = "5",
14 | rowIdField = "subject_id",
15 | covariateSettings,
16 | aggregated = FALSE,
17 | minCharacterizationMean = 0
18 | )
19 | }
20 | \arguments{
21 | \item{connection}{A connection to the server containing the schema as created using the
22 | \code{connect} function in the \code{DatabaseConnector} package.
23 | Either the \code{connection} or \code{connectionDetails} argument
24 | should be specified.}
25 |
26 | \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support
27 | temp tables. To emulate temp tables, provide a schema with write
28 | privileges where temp tables can be created.}
29 |
30 | \item{cdmDatabaseSchema}{The name of the database schema that contains the OMOP CDM instance.
31 | Requires read permissions to this database. On SQL Server, this should
32 | specify both the database and the schema, so for example
33 | 'cdm_instance.dbo'.}
34 |
35 | \item{cohortTable}{Name of the (temp) table holding the cohort for which we want to
36 | construct covariates}
37 |
38 | \item{cohortIds}{For which cohort ID(s) should covariates be constructed? If set to -1,
39 | covariates will be constructed for all cohorts in the specified cohort
40 | table.}
41 |
42 | \item{cdmVersion}{Define the OMOP CDM version used: currently supported is "5".}
43 |
44 | \item{rowIdField}{The name of the field in the cohort table that is to be used as the
45 | row_id field in the output table. This can be especially usefull if
46 | there is more than one period per person.}
47 |
48 | \item{covariateSettings}{Either an object of type \code{covariateSettings} as created using one
49 | of the createCovariate functions, or a list of such objects.}
50 |
51 | \item{aggregated}{Should aggregate statistics be computed instead of covariates per
52 | cohort entry?}
53 |
54 | \item{minCharacterizationMean}{The minimum mean value for binary characterization output. Values below this will be cut off from output. This
55 | will help reduce the file size of the characterization output, but will remove information
56 | on covariates that have very low values. The default is 0.}
57 | }
58 | \value{
59 | Returns an object of type \code{covariateData}, containing information on the covariates.
60 | }
61 | \description{
62 | Get covariate information from the database
63 | }
64 |
--------------------------------------------------------------------------------
/man/filterByCohortDefinitionId.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/HelperFunctions.R
3 | \name{filterByCohortDefinitionId}
4 | \alias{filterByCohortDefinitionId}
5 | \title{Filter covariates by cohort definition IDs}
6 | \usage{
7 | filterByCohortDefinitionId(covariateData, cohortId = 1, cohortIds = c(1))
8 | }
9 | \arguments{
10 | \item{covariateData}{An object of type \code{CovariateData}}
11 |
12 | \item{cohortId}{DEPRECATED The cohort definition IDs to keep.}
13 |
14 | \item{cohortIds}{The cohort definition IDs to keep.}
15 | }
16 | \value{
17 | An object of type \code{covariateData}.
18 | }
19 | \description{
20 | Filter covariates by cohort definition IDs
21 | }
22 | \examples{
23 | \donttest{
24 | covariateData <- FeatureExtraction::createEmptyCovariateData(
25 | cohortIds = c(1, 2),
26 | aggregated = TRUE,
27 | temporal = FALSE
28 | )
29 |
30 | covData <- filterByCohortDefinitionId(
31 | covariateData = covariateData,
32 | cohortIds = c(1)
33 | )
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/man/filterByRowId.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/HelperFunctions.R
3 | \name{filterByRowId}
4 | \alias{filterByRowId}
5 | \title{Filter covariates by row ID}
6 | \usage{
7 | filterByRowId(covariateData, rowIds)
8 | }
9 | \arguments{
10 | \item{covariateData}{An object of type \code{CovariateData}}
11 |
12 | \item{rowIds}{A vector containing the rowIds to keep.}
13 | }
14 | \value{
15 | An object of type \code{covariateData}.
16 | }
17 | \description{
18 | Filter covariates by row ID
19 | }
20 | \examples{
21 | \donttest{
22 | covariateData <- FeatureExtraction::createEmptyCovariateData(
23 | cohortIds = 1,
24 | aggregated = FALSE,
25 | temporal = FALSE
26 | )
27 |
28 | covData <- filterByRowId(
29 | covariateData = covariateData,
30 | rowIds = 1
31 | )
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/man/getDbCohortBasedCovariatesData.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/GetCovariatesFromOtherCohorts.R
3 | \name{getDbCohortBasedCovariatesData}
4 | \alias{getDbCohortBasedCovariatesData}
5 | \title{Get covariate information from the database based on other cohorts}
6 | \usage{
7 | getDbCohortBasedCovariatesData(
8 | connection,
9 | oracleTempSchema = NULL,
10 | cdmDatabaseSchema,
11 | cohortTable = "#cohort_person",
12 | cohortId = -1,
13 | cohortIds = c(-1),
14 | cdmVersion = "5",
15 | rowIdField = "subject_id",
16 | covariateSettings,
17 | aggregated = FALSE,
18 | minCharacterizationMean = 0,
19 | tempEmulationSchema = getOption("sqlRenderTempEmulationSchema")
20 | )
21 | }
22 | \arguments{
23 | \item{connection}{A connection to the server containing the schema as created using the
24 | \code{connect} function in the \code{DatabaseConnector} package.}
25 |
26 | \item{oracleTempSchema}{DEPRECATED: use \code{tempEmulationSchema} instead.}
27 |
28 | \item{cdmDatabaseSchema}{The name of the database schema that contains the OMOP CDM instance.
29 | Requires read permissions to this database. On SQL Server, this should
30 | specifiy both the database and the schema, so for example
31 | 'cdm_instance.dbo'.}
32 |
33 | \item{cohortTable}{Name of the table holding the cohort for which we want to construct
34 | covariates. If it is a temp table, the name should have a hash prefix,
35 | e.g. '#temp_table'. If it is a non-temp table, it should include the
36 | database schema, e.g. 'cdm_database.cohort'.}
37 |
38 | \item{cohortId}{DEPRECATED:For which cohort ID should covariates be constructed? If set to -1,
39 | covariates will be constructed for all cohorts in the specified cohort
40 | table.}
41 |
42 | \item{cohortIds}{For which cohort ID(s) should covariates be constructed? If set to c(-1),
43 | covariates will be constructed for all cohorts in the specified cohort
44 | table.}
45 |
46 | \item{cdmVersion}{The version of the Common Data Model used. Currently only
47 | \code{cdmVersion = "5"} is supported.}
48 |
49 | \item{rowIdField}{The name of the field in the cohort temp table that is to be used as the
50 | row_id field in the output table. This can be especially usefull if there
51 | is more than one period per person.}
52 |
53 | \item{covariateSettings}{An object of type \code{covariateSettings} as created using the
54 | \code{\link{createCohortBasedCovariateSettings}} or
55 | \code{\link{createCohortBasedTemporalCovariateSettings}} functions.}
56 |
57 | \item{aggregated}{Should aggregate statistics be computed instead of covariates per
58 | cohort entry?}
59 |
60 | \item{minCharacterizationMean}{The minimum mean value for binary characterization output. Values below this will be cut off from output. This
61 | will help reduce the file size of the characterization output, but will remove information
62 | on covariates that have very low values. The default is 0.}
63 |
64 | \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support
65 | temp tables. To emulate temp tables, provide a schema with write
66 | privileges where temp tables can be created.}
67 | }
68 | \value{
69 | Returns an object of type \code{CovariateData}, which is an Andromeda object containing information on the baseline covariates.
70 | Information about multiple outcomes can be captured at once for efficiency reasons. This object is
71 | a list with the following components: \describe{ \item{covariates}{An ffdf object listing the
72 | baseline covariates per person in the cohorts. This is done using a sparse representation:
73 | covariates with a value of 0 are omitted to save space. The covariates object will have three
74 | columns: rowId, covariateId, and covariateValue. The rowId is usually equal to the person_id,
75 | unless specified otherwise in the rowIdField argument.} \item{covariateRef}{A table
76 | describing the covariates that have been extracted.} }. The CovariateData object will also have a \code{metaData} attribute, a list of objects with
77 | information on how the covariateData object was constructed.
78 | }
79 | \description{
80 | Constructs covariates using other cohorts.
81 | }
82 | \details{
83 | This function uses the data in the CDM to construct a large set of covariates for the provided
84 | cohort. The cohort is assumed to be in an existing temp table with these fields: 'subject_id',
85 | 'cohort_definition_id', 'cohort_start_date'. Optionally, an extra field can be added containing the
86 | unique identifier that will be used as rowID in the output. Typically, users don't call this
87 | function directly but rather use the \code{\link{getDbCovariateData}} function instead.
88 | }
89 |
--------------------------------------------------------------------------------
/man/getDefaultTable1Specifications.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/Table1.R
3 | \name{getDefaultTable1Specifications}
4 | \alias{getDefaultTable1Specifications}
5 | \title{Get the default table 1 specifications}
6 | \usage{
7 | getDefaultTable1Specifications()
8 | }
9 | \value{
10 | A specifications objects.
11 | }
12 | \description{
13 | Loads the default specifications for a table 1, to be used with the \code{\link{createTable1}}
14 | function.
15 | }
16 | \examples{
17 | \donttest{
18 | defaultTable1Specs <- getDefaultTable1Specifications()
19 | }
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/man/isAggregatedCovariateData.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CovariateData.R
3 | \name{isAggregatedCovariateData}
4 | \alias{isAggregatedCovariateData}
5 | \title{Check whether covariate data is aggregated}
6 | \usage{
7 | isAggregatedCovariateData(x)
8 | }
9 | \arguments{
10 | \item{x}{The covariate data object to check.}
11 | }
12 | \value{
13 | A logical value.
14 | }
15 | \description{
16 | Check whether covariate data is aggregated
17 | }
18 | \examples{
19 | \donttest{
20 | covariateData <- FeatureExtraction::createEmptyCovariateData(
21 | cohortIds = 1,
22 | aggregated = FALSE,
23 | temporal = FALSE
24 | )
25 | isAggrCovData <- isAggregatedCovariateData(covariateData)
26 | }
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/man/isCovariateData.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CovariateData.R
3 | \name{isCovariateData}
4 | \alias{isCovariateData}
5 | \title{Check whether an object is a CovariateData object}
6 | \usage{
7 | isCovariateData(x)
8 | }
9 | \arguments{
10 | \item{x}{The object to check.}
11 | }
12 | \value{
13 | A logical value.
14 | }
15 | \description{
16 | Check whether an object is a CovariateData object
17 | }
18 | \examples{
19 | \donttest{
20 | binaryCovDataFile <- system.file("testdata/binaryCovariateData.zip",
21 | package = "FeatureExtraction"
22 | )
23 | covData <- loadCovariateData(binaryCovDataFile)
24 | isCovData <- isCovariateData(covData)
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/man/isTemporalCovariateData.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CovariateData.R
3 | \name{isTemporalCovariateData}
4 | \alias{isTemporalCovariateData}
5 | \title{Check whether covariate data is temporal}
6 | \usage{
7 | isTemporalCovariateData(x)
8 | }
9 | \arguments{
10 | \item{x}{The covariate data object to check.}
11 | }
12 | \value{
13 | A logical value.
14 | }
15 | \description{
16 | Check whether covariate data is temporal
17 | }
18 | \examples{
19 | \donttest{
20 | covariateData <- FeatureExtraction::createEmptyCovariateData(
21 | cohortIds = 1,
22 | aggregated = FALSE,
23 | temporal = FALSE
24 | )
25 | isTempCovData <- isTemporalCovariateData(covariateData)
26 | }
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/man/loadCovariateData.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CovariateData.R
3 | \name{loadCovariateData}
4 | \alias{loadCovariateData}
5 | \title{Load the covariate data from a folder}
6 | \usage{
7 | loadCovariateData(file, readOnly)
8 | }
9 | \arguments{
10 | \item{file}{The name of the folder containing the data.}
11 |
12 | \item{readOnly}{DEPRECATED: If true, the data is opened read only.}
13 | }
14 | \value{
15 | An object of class \code{CovariateData}.
16 | }
17 | \description{
18 | \code{loadCovariateData} loads an object of type covariateData from a folder in the file system.
19 | }
20 | \details{
21 | The data will be written to a set of files in the folder specified by the user.
22 | }
23 | \examples{
24 | \donttest{
25 | binaryCovDataFile <- system.file("testdata/binaryCovariateData.zip",
26 | package = "FeatureExtraction"
27 | )
28 | covData <- loadCovariateData(binaryCovDataFile)
29 | }
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/man/saveCovariateData.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/CovariateData.R
3 | \name{saveCovariateData}
4 | \alias{saveCovariateData}
5 | \title{Save the covariate data to folder}
6 | \usage{
7 | saveCovariateData(covariateData, file)
8 | }
9 | \arguments{
10 | \item{covariateData}{An object of type \code{covariateData} as generated using
11 | \code{getDbCovariateData}.}
12 |
13 | \item{file}{The name of the folder where the data will be written. The folder should not
14 | yet exist.}
15 | }
16 | \value{
17 | No return value, called for side effects.
18 | }
19 | \description{
20 | \code{saveCovariateData} saves an object of type covariateData to folder.
21 | }
22 | \details{
23 | The data will be written to a set of files in the folder specified by the user.
24 | }
25 | \examples{
26 | \donttest{
27 | covariateData <- FeatureExtraction::createEmptyCovariateData(
28 | cohortIds = 1,
29 | aggregated = FALSE,
30 | temporal = FALSE
31 | )
32 | # For this example we'll use a temporary file location:
33 | fileName <- tempfile()
34 | saveCovariateData(covariateData = covariateData, file = fileName)
35 | # Cleaning up the file used in this example:
36 | unlink(fileName)
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/man/tidyCovariateData.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/Normalization.R
3 | \name{tidyCovariateData}
4 | \alias{tidyCovariateData}
5 | \title{Tidy covariate data}
6 | \usage{
7 | tidyCovariateData(
8 | covariateData,
9 | minFraction = 0.001,
10 | normalize = TRUE,
11 | removeRedundancy = TRUE
12 | )
13 | }
14 | \arguments{
15 | \item{covariateData}{An object as generated using the \code{\link{getDbCovariateData}}
16 | function.}
17 |
18 | \item{minFraction}{Minimum fraction of the population that should have a non-zero value for a
19 | covariate for that covariate to be kept. Set to 0 to don't filter on
20 | frequency.}
21 |
22 | \item{normalize}{Normalize the covariates? (dividing by the max).}
23 |
24 | \item{removeRedundancy}{Should redundant covariates be removed?}
25 | }
26 | \value{
27 | An object of class \code{CovariateData}.
28 | }
29 | \description{
30 | Tidy covariate data
31 | }
32 | \details{
33 | Normalize covariate values by dividing by the max and/or remove redundant covariates and/or remove
34 | infrequent covariates. For temporal covariates, redundancy is evaluated per time ID.
35 | }
36 | \examples{
37 | \donttest{
38 | covariateData <- FeatureExtraction::createEmptyCovariateData(
39 | cohortIds = 1,
40 | aggregated = FALSE,
41 | temporal = FALSE
42 | )
43 |
44 | covData <- tidyCovariateData(
45 | covariateData = covariateData,
46 | minFraction = 0.001,
47 | normalize = TRUE,
48 | removeRedundancy = TRUE
49 | )
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/nbactions.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | debug
5 |
6 | jar
7 |
8 |
9 | process-classes
10 | org.codehaus.mojo:exec-maven-plugin:1.5.0:exec
11 |
12 |
13 | -agentlib:jdwp=transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath org.ohdsi.featureExtraction.FeatureExtraction
14 | java
15 | true
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/tests/testBigQuery.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | options(dbms = "bigquery")
3 | runTestsOnBigQuery <- !(Sys.getenv("CDM_BIG_QUERY_CONNECTION_STRING") == "" & Sys.getenv("CDM_BIG_QUERY_KEY_FILE") == "" & Sys.getenv("CDM_BIG_QUERY_CDM_SCHEMA") == "" & Sys.getenv("CDM_BIG_QUERY_OHDSI_SCHEMA") == "")
4 | if (runTestsOnBigQuery) {
5 | test_check("FeatureExtraction")
6 | }
7 |
--------------------------------------------------------------------------------
/tests/testOracle.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | options(dbms = "oracle")
3 | runTestsOnOracle <- !(Sys.getenv("CDM5_ORACLE_USER") == "" & Sys.getenv("CDM5_ORACLE_PASSWORD") == "" & Sys.getenv("CDM5_ORACLE_SERVER") == "" & Sys.getenv("CDM5_ORACLE_CDM_SCHEMA") == "" & Sys.getenv("CDM5_ORACLE_OHDSI_SCHEMA") == "")
4 | if (runTestsOnOracle) {
5 | test_check("FeatureExtraction")
6 | }
7 |
--------------------------------------------------------------------------------
/tests/testPostgres.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | options(dbms = "postgresql")
3 | runTestsOnPostgreSQL <- !(Sys.getenv("CDM5_POSTGRESQL_USER") == "" & Sys.getenv("CDM5_POSTGRESQL_PASSWORD") == "" & Sys.getenv("CDM5_POSTGRESQL_SERVER") == "" & Sys.getenv("CDM5_POSTGRESQL_CDM_SCHEMA") == "" & Sys.getenv("CDM5_POSTGRESQL_OHDSI_SCHEMA") == "")
4 | if (runTestsOnPostgreSQL) {
5 | test_check("FeatureExtraction")
6 | }
7 |
--------------------------------------------------------------------------------
/tests/testRedshift.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | options(dbms = "redshift")
3 | runTestsOnRedshift <- FALSE # !(Sys.getenv("CDM5_REDSHIFT_USER") == "" & Sys.getenv("CDM5_REDSHIFT_PASSWORD") == "" & Sys.getenv("CDM5_REDSHIFT_SERVER") == "" & Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA") == "" & Sys.getenv("CDM5_REDSHIFT_OHDSI_SCHEMA") == "")
4 | if (runTestsOnRedshift) {
5 | test_check("FeatureExtraction")
6 | }
7 |
--------------------------------------------------------------------------------
/tests/testSnowflake.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | options(dbms = "snowflake")
3 | runTestsOnSnowflake <- !(Sys.getenv("CDM_SNOWFLAKE_CONNECTION_STRING") == "" & Sys.getenv("CDM_SNOWFLAKE_USER") == "" & Sys.getenv("CDM_SNOWFLAKE_PASSWORD") == "" & Sys.getenv("CDM_SNOWFLAKE_CDM53_SCHEMA") == "" & Sys.getenv("CDM_SNOWFLAKE_OHDSI_SCHEMA") == "")
4 | if (runTestsOnSnowflake) {
5 | test_check("FeatureExtraction")
6 | }
7 |
--------------------------------------------------------------------------------
/tests/testSpark.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | options(dbms = "spark")
3 | runTestsOnSpark <- !(Sys.getenv("CDM5_SPARK_CONNECTION_STRING") == "" & Sys.getenv("CDM5_SPARK_USER") == "" & Sys.getenv("CDM_SPARK_PASSWORD") == "" & Sys.getenv("CDM5_SPARK_CDM_SCHEMA") == "" & Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA") == "")
4 | if (runTestsOnSpark) {
5 | test_check("FeatureExtraction")
6 | }
7 |
--------------------------------------------------------------------------------
/tests/testSqlServer.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | options(dbms = "sql server")
3 | runTestsOnSQLServer <- !(Sys.getenv("CDM5_SQL_SERVER_USER") == "" & Sys.getenv("CDM5_SQL_SERVER_PASSWORD") == "" & Sys.getenv("CDM5_SQL_SERVER_SERVER") == "" & Sys.getenv("CDM5_SQL_SERVER_CDM_SCHEMA") == "" & Sys.getenv("CDM5_SQL_SERVER_OHDSI_SCHEMA") == "")
4 | if (runTestsOnSQLServer) {
5 | test_check("FeatureExtraction")
6 | }
7 |
--------------------------------------------------------------------------------
/tests/testSqlite.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | options(dbms = "sqlite")
3 | runTestsOnEunomia <- TRUE
4 | if (runTestsOnEunomia) {
5 | test_check("FeatureExtraction")
6 | }
7 |
--------------------------------------------------------------------------------
/tests/testthat/test-Aggregation.R:
--------------------------------------------------------------------------------
1 | # This file covers the code in Aggregation.R. View coverage for this file using
2 | # library(testthat); library(FeatureExtraction)
3 | # covr::file_report(covr::file_coverage("R/Aggregation.R", "tests/testthat/test-Aggregation.R"))
4 |
5 | test_that("aggregateCovariates works", {
6 | skip_on_cran()
7 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
8 | settings <- createCovariateSettings(useDemographicsAgeGroup = TRUE, useChads2Vasc = TRUE)
9 | covariateData <- getDbCovariateData(
10 | connectionDetails = eunomiaConnectionDetails,
11 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema,
12 | cohortDatabaseSchema = eunomiaOhdsiDatabaseSchema,
13 | cohortIds = c(1),
14 | covariateSettings = settings,
15 | aggregated = FALSE
16 | )
17 |
18 | aggregatedCovariateData <- aggregateCovariates(covariateData)
19 | expect_true(isAggregatedCovariateData(aggregatedCovariateData))
20 | expect_error(aggregateCovariates("blah"), "not of class CovariateData")
21 | expect_error(aggregateCovariates(aggregatedCovariateData), "already be aggregated")
22 |
23 | # create example where missing does not mean zero
24 | covariateData$analysisRef <- covariateData$analysisRef %>%
25 | mutate(missingMeansZero = ifelse(.data$analysisName == "Chads2Vasc", "N", .data$missingMeansZero))
26 | expect_true(isAggregatedCovariateData(aggregateCovariates(covariateData)))
27 |
28 | Andromeda::close(covariateData)
29 | expect_error(aggregateCovariates(covariateData), "object is closed")
30 | })
31 |
32 | test_that("aggregateCovariates handles temporalCovariates", {
33 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
34 | settings <- createTemporalCovariateSettings(useDemographicsGender = TRUE)
35 | covariateData <- getDbCovariateData(
36 | connectionDetails = eunomiaConnectionDetails,
37 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema,
38 | cohortDatabaseSchema = eunomiaOhdsiDatabaseSchema,
39 | cohortIds = c(1),
40 | covariateSettings = settings
41 | )
42 | expect_error(aggregateCovariates(covariateData), "temporal covariates")
43 | })
44 |
--------------------------------------------------------------------------------
/tests/testthat/test-CompareCohorts.R:
--------------------------------------------------------------------------------
1 | # View coverage for this file using
2 | # library(testthat); library(FeatureExtraction)
3 | # covr::file_report(covr::file_coverage("R/CompareCohorts.R", "tests/testthat/test-CompareCohorts.R"))
4 |
5 | test_that("Test stdDiff continuous variable computation", {
6 | # NOTE: Data stored in "inst/testdata/continuousCovariateData.zip" created by:
7 | # ------------------------------------------------------------------------------
8 | # connectionDetails <- Eunomia::getEunomiaConnectionDetails()
9 | # Eunomia::createCohorts(connectionDetails)
10 | # data <- FeatureExtraction::getDbCovariateData(connectionDetails = connectionDetails,
11 | # cdmDatabaseSchema = "main",
12 | # cohortTable = "cohort",
13 | # aggregated = TRUE,
14 | # covariateSettings = FeatureExtraction::createCovariateSettings(useCharlsonIndex = TRUE))
15 | # FeatureExtraction::saveCovariateData(data, "inst/testdata/continuousCovariateData.zip")
16 | # ------------------------------------------------------------------------------
17 | data <- loadCovariateData(getTestResourceFilePath("continuousCovariateData.zip"))
18 | # Compute the expected value based on cohorts 1 & 2's values from
19 | # the loaded covariate data
20 | testData <- data.frame(
21 | mean1 = 0.614,
22 | sd1 = 0.387,
23 | mean2 = 0.404,
24 | sd2 = 0.345
25 | )
26 |
27 | output <- computeStandardizedDifference(
28 | covariateData1 = data,
29 | covariateData2 = data,
30 | cohortId1 = 1,
31 | cohortId2 = 2
32 | )
33 | testData$sd <- sqrt((testData$sd1^2 + testData$sd2^2) / 2)
34 | testData$stdDiff <- (testData$mean2 - testData$mean1) / testData$sd
35 |
36 | # Compute the standardized difference of mean using the source data
37 | expect_equal(output$stdDiff, testData$stdDiff, tolerance = 0.001, scale = 1)
38 | })
39 |
40 | test_that("Test stdDiff binary variable computation", {
41 | skip_on_cran()
42 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
43 | connectionDetails <- Eunomia::getEunomiaConnectionDetails()
44 | Eunomia::createCohorts(connectionDetails)
45 | data <- FeatureExtraction::getDbCovariateData(
46 | connectionDetails = connectionDetails,
47 | cdmDatabaseSchema = "main",
48 | cohortTable = "cohort",
49 | aggregated = TRUE,
50 | covariateSettings = FeatureExtraction::createCovariateSettings(useConditionOccurrenceLongTerm = TRUE)
51 | )
52 | output <- computeStandardizedDifference(
53 | covariateData1 = data,
54 | covariateData2 = data,
55 | cohortId1 = 1,
56 | cohortId2 = 2
57 | )
58 | # Filter to: condition_occurrence during day -365 through 0 days relative to index: Diverticular disease
59 | singleCovariate <- output[output$covariateId == 4266809102, ]
60 |
61 | # Compute the expected value based on cohorts 1 & 2's values from
62 | # the loaded covariate data for covariateId == 4266809102
63 | testBinaryData <- data.frame(
64 | popSize1 = 1844,
65 | sumValue1 = 341,
66 | popSize2 = 850,
67 | sumValue2 = 64
68 | )
69 |
70 | testBinaryData$mean1 <- testBinaryData$sumValue1 / testBinaryData$popSize1
71 | testBinaryData$mean2 <- testBinaryData$sumValue2 / testBinaryData$popSize2
72 | testBinaryData$sd1 <- sqrt(testBinaryData$mean1 * (1 - testBinaryData$mean1))
73 | testBinaryData$sd2 <- sqrt(testBinaryData$mean2 * (1 - testBinaryData$mean2))
74 | testBinaryData$sd <- sqrt((testBinaryData$sd1^2 + testBinaryData$sd2^2) / 2)
75 | testBinaryData$stdDiff <- (testBinaryData$mean2 - testBinaryData$mean1) / testBinaryData$sd
76 |
77 | # Test the results
78 | expect_equal(singleCovariate$mean1, testBinaryData$mean1, tolerance = 0.001, scale = 1)
79 | expect_equal(singleCovariate$sd1, testBinaryData$sd1, tolerance = 0.001, scale = 1)
80 | expect_equal(singleCovariate$mean2, testBinaryData$mean2, tolerance = 0.001, scale = 1)
81 | expect_equal(singleCovariate$sd2, testBinaryData$sd2, tolerance = 0.001, scale = 1)
82 | expect_equal(singleCovariate$sd, testBinaryData$sd, tolerance = 0.001, scale = 1)
83 | expect_equal(singleCovariate$stdDiff, testBinaryData$stdDiff, tolerance = 0.001, scale = 1)
84 | })
85 |
--------------------------------------------------------------------------------
/tests/testthat/test-DetailedCovariateSettings.R:
--------------------------------------------------------------------------------
1 | # This file covers the code in DetailedCovariateData.R. View coverage for this file using
2 | test_that("test createDetailedCovariateSettings", {
3 | analysisDetails <- createAnalysisDetails(
4 | analysisId = 1,
5 | sqlFileName = "DemographicsGender.sql",
6 | parameters = list(
7 | analysisId = 1,
8 | analysisName = "Gender",
9 | domainId = "Demographics"
10 | ),
11 | includedCovariateConceptIds = c(),
12 | addDescendantsToInclude = FALSE,
13 | excludedCovariateConceptIds = c(),
14 | addDescendantsToExclude = FALSE,
15 | includedCovariateIds = c()
16 | )
17 |
18 | settings <- createDetailedCovariateSettings(list(analysisDetails))
19 | temporalSettings <- createDetailedTemporalCovariateSettings(list(analysisDetails))
20 | expect_s3_class(settings, "covariateSettings")
21 | expect_s3_class(temporalSettings, "covariateSettings")
22 | expect_equal(temporalSettings$temporalStartDays, -365:-1)
23 | })
24 |
25 | test_that("test createDetailedTemporalCovariateSettings", {
26 | analysisDetails <- createAnalysisDetails(
27 | analysisId = 1,
28 | sqlFileName = "DemographicsGender.sql",
29 | parameters = list(
30 | analysisId = 1,
31 | analysisName = "Gender",
32 | domainId = "Demographics"
33 | ),
34 | includedCovariateConceptIds = c(),
35 | addDescendantsToInclude = FALSE,
36 | excludedCovariateConceptIds = c(),
37 | addDescendantsToExclude = FALSE,
38 | includedCovariateIds = c()
39 | )
40 |
41 | temporalSettings <- createDetailedTemporalCovariateSettings(list(analysisDetails))
42 | expect_s3_class(temporalSettings, "covariateSettings")
43 | expect_equal(temporalSettings$temporalStartDays, -365:-1)
44 | })
45 |
46 | test_that("test convertPrespecSettingsToDetailedSettings", {
47 | settings <- createCovariateSettings(useDemographicsAgeGroup = TRUE, useChads2Vasc = TRUE)
48 | convertedSettings <- convertPrespecSettingsToDetailedSettings(settings)
49 | expect_s3_class(convertedSettings, "covariateSettings")
50 | expect_equal(names(convertedSettings), c("temporal", "temporalSequence", "temporalAnnual", "analyses"))
51 | expect_equal(sum(unlist(lapply(1:length(convertedSettings$analyses), function(i) convertedSettings$analyses[[i]]$sqlFileName)) %in% c("DemographicsAgeGroup.sql", "Chads2Vasc.sql")), 2)
52 | })
53 |
54 | test_that("test createDefaultCovariateSettings", {
55 | settings <- createDefaultCovariateSettings()
56 | expect_s3_class(settings, "covariateSettings")
57 | })
58 |
59 | test_that("test createDefaultTemporalCovariateSettings", {
60 | settings <- createDefaultTemporalCovariateSettings()
61 | expect_s3_class(settings, "covariateSettings")
62 | })
63 |
--------------------------------------------------------------------------------
/tests/testthat/test-FeatureExtractionInternal.R:
--------------------------------------------------------------------------------
1 | # View coverage for this file using
2 | # library(testthat); library(FeatureExtraction)
3 | # covr::file_report(covr::file_coverage("R/FeatureExtraction.R", "tests/testthat/test-FeatureExtractionInternal.R"))
4 |
5 | test_that("Test .onLoad()", {
6 | expect_silent(
7 | FeatureExtraction:::.onLoad(libname = "FeatureExtraction", pkgname = "FeatureExtraction")
8 | )
9 | })
10 |
11 | test_that("Test JSON functions", {
12 | expectedToJsonResult <- "{\"id\":\"1\"}"
13 | expectedFromJsonResult <- list("id" = "1")
14 | toJsonResult <- FeatureExtraction:::.toJson(expectedFromJsonResult)
15 | expect_equal(toJsonResult, expectedToJsonResult)
16 |
17 | fromJsonResult <- FeatureExtraction:::.fromJson(expectedToJsonResult)
18 | expect_equal(fromJsonResult, expectedFromJsonResult)
19 | })
20 |
--------------------------------------------------------------------------------
/tests/testthat/test-GetCovariatesFromCohortAttributes.R:
--------------------------------------------------------------------------------
1 | # This file covers the code in GetCovariatesFromCohortAttributes.R.
2 | # NOTE: Functionality is described in detail in the following vignette:
3 | # http://ohdsi.github.io/FeatureExtraction/articles/CreatingCovariatesUsingCohortAttributes.html
4 | #
5 | # View coverage for this file using
6 | # library(testthat); library(FeatureExtraction)
7 | # covr::file_report(covr::file_coverage("R/GetCovariatesFromCohortAttributes.R", "tests/testthat/test-GetCovariatesFromCohortAttributes.R"))
8 |
9 | test_that("getDbCohortAttrCovariatesData aggregation not supported check", {
10 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
11 | expect_error(getDbCohortAttrCovariatesData(
12 | connection = eunomiaConnection,
13 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema,
14 | covariateSettings = createDefaultCovariateSettings(),
15 | aggregated = TRUE
16 | ))
17 | })
18 |
19 | test_that("getDbCohortAttrCovariatesData CDM v4 not supported check", {
20 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
21 | expect_error(getDbCohortAttrCovariatesData(
22 | connection = eunomiaConnection,
23 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema,
24 | cdmVersion = "4",
25 | covariateSettings = createDefaultCovariateSettings()
26 | ))
27 | })
28 |
29 | test_that("getDbCohortAttrCovariatesData hasIncludedAttributes == 0", {
30 | skip_on_cran()
31 | # TODO: This test is probably good to run on all DB platforms
32 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
33 | covariateSettings <- createCohortAttrCovariateSettings(
34 | attrDatabaseSchema = eunomiaOhdsiDatabaseSchema,
35 | cohortAttrTable = cohortAttributeTable,
36 | attrDefinitionTable = attributeDefinitionTable,
37 | includeAttrIds = c(),
38 | isBinary = FALSE,
39 | missingMeansZero = FALSE
40 | )
41 | result <- getDbCohortAttrCovariatesData(
42 | connection = eunomiaConnection,
43 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema,
44 | cohortTable = cohortTable,
45 | covariateSettings = covariateSettings
46 | )
47 | expect_equal(class(result), "CovariateData")
48 | })
49 |
50 | test_that("getDbCohortAttrCovariatesData hasIncludedAttributes > 0", {
51 | skip_on_cran()
52 | # TODO: This test is probably good to run on all DB platforms
53 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
54 | covariateSettings <- createCohortAttrCovariateSettings(
55 | attrDatabaseSchema = eunomiaOhdsiDatabaseSchema,
56 | cohortAttrTable = cohortAttributeTable,
57 | attrDefinitionTable = attributeDefinitionTable,
58 | includeAttrIds = c(1),
59 | isBinary = FALSE,
60 | missingMeansZero = TRUE
61 | )
62 | result <- getDbCohortAttrCovariatesData(
63 | connection = eunomiaConnection,
64 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema,
65 | cohortTable = cohortTable,
66 | covariateSettings = covariateSettings,
67 | cohortIds = c(1, 2)
68 | )
69 | expect_equal(class(result), "CovariateData")
70 | })
71 |
72 | test_that("createCohortAttrCovariateSettings check", {
73 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
74 | result <- createCohortAttrCovariateSettings(attrDatabaseSchema = "main")
75 | expect_equal(class(result), "covariateSettings")
76 | })
77 |
78 | test_that("getDbCohortAttrCovariatesData cohortId warning", {
79 | skip_on_cran()
80 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
81 | covariateSettings <- createCohortAttrCovariateSettings(
82 | attrDatabaseSchema = eunomiaOhdsiDatabaseSchema,
83 | cohortAttrTable = cohortAttributeTable,
84 | attrDefinitionTable = attributeDefinitionTable,
85 | includeAttrIds = c(1),
86 | isBinary = FALSE,
87 | missingMeansZero = TRUE
88 | )
89 | # cohortId argument
90 | expect_warning(getDbCohortAttrCovariatesData(
91 | connection = eunomiaConnection,
92 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema,
93 | cohortTable = cohortTable,
94 | covariateSettings = covariateSettings,
95 | cohortId = 1
96 | ), "cohortId argument has been deprecated, please use cohortIds")
97 | })
98 |
--------------------------------------------------------------------------------
/tests/testthat/test-GetCovariatesTemporalSequence.R:
--------------------------------------------------------------------------------
1 | # View coverage for this file using
2 | # library(testthat); library(FeatureExtraction)
3 | # covr::file_report(covr::file_coverage("R/DefaultTemporalSequenceCovariateSettings.R", "tests/testthat/test-GetCovariatesTemporalSequence.R"))
4 |
5 | test_that("createTemporalSequenceCovariateSettings correctly sets list", {
6 | settings <- createTemporalSequenceCovariateSettings(
7 | useDemographicsGender = T,
8 | useConditionEraGroupStart = T,
9 | useDrugEraStart = T,
10 | timePart = "month",
11 | timeInterval = 1,
12 | sequenceEndDay = -1,
13 | sequenceStartDay = -365 * 5
14 | )
15 |
16 | testthat::expect_equal(settings$temporalSequence, T)
17 | testthat::expect_equal(settings$temporal, F)
18 |
19 | testthat::expect_equal(sum(c("DemographicsGender", "ConditionEraGroupStart", "DrugEraStart") %in% names(settings)), 3)
20 | testthat::expect_equal(sum(c("DemographicsAge", "ConditionEraStart", "DrugEraGroupStart") %in% names(settings)), 0)
21 |
22 | testthat::expect_equal(settings$timePart, "month")
23 | testthat::expect_equal(settings$timeInterval, 1)
24 |
25 | testthat::expect_equal(settings$sequenceEndDay, -1)
26 | testthat::expect_equal(settings$sequenceStartDay, -365 * 5)
27 |
28 | testthat::expect_equal(class(settings), "covariateSettings")
29 | })
30 |
31 |
32 | test_that("createTemporalSequenceCovariateSettings correctly sets function", {
33 | settings <- createTemporalSequenceCovariateSettings(
34 | useDemographicsGender = T,
35 | useConditionEraGroupStart = T,
36 | useDrugEraStart = T,
37 | timePart = "month",
38 | timeInterval = 1,
39 | sequenceEndDay = -1,
40 | sequenceStartDay = -365 * 5
41 | )
42 |
43 | testthat::expect_equal(attr(settings, "fun"), "getDbDefaultCovariateData")
44 | })
45 |
46 |
47 | # check extraction
48 | test_that("getDbCovariateData works with createTemporalSequenceCovariateSettings", {
49 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
50 | covSet <- createTemporalSequenceCovariateSettings(
51 | useDemographicsGender = T,
52 | useDemographicsAge = T,
53 | useDemographicsRace = T,
54 | useDemographicsEthnicity = T,
55 | useDemographicsAgeGroup = T,
56 | useConditionEraGroupStart = T,
57 | useDrugEraStart = T,
58 | useMeasurement = T,
59 | useMeasurementValue = T,
60 | timePart = "month",
61 | timeInterval = 1,
62 | sequenceEndDay = -1,
63 | sequenceStartDay = -365 * 5
64 | )
65 |
66 |
67 | result <- getDbCovariateData(
68 | connection = eunomiaConnection,
69 | cdmDatabaseSchema = "main",
70 | cohortTable = "cohort",
71 | cohortIds = c(1),
72 | covariateSettings = covSet
73 | )
74 |
75 | expect_true(is(result, "CovariateData"))
76 |
77 | # check timeId is 59 or less
78 | expect_true(max(as.data.frame(result$covariates)$timeId, na.rm = T) <= 60)
79 | })
80 |
81 | # Check backwards compatibility
82 | test_that("Temporal Covariate Settings are backwards compatible", {
83 | skip_on_cran()
84 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
85 |
86 | # Temporal covariate settings created previously will not have
87 | # the temporalSequence property
88 | covSet <- FeatureExtraction::createDefaultTemporalCovariateSettings()
89 | covSet$temporalSequence <- NULL
90 |
91 | result <- getDbCovariateData(
92 | connection = eunomiaConnection,
93 | cdmDatabaseSchema = "main",
94 | cohortTable = "cohort",
95 | cohortIds = c(1),
96 | covariateSettings = covSet
97 | )
98 | expect_true(is(result, "CovariateData"))
99 | })
100 |
--------------------------------------------------------------------------------
/tests/testthat/test-GetDefaultCovariates.R:
--------------------------------------------------------------------------------
1 | # This file covers the code in GetDefaultCovariates.R. View coverage for this file using
2 | # library(testthat); library(FeatureExtraction)
3 | # covr::file_report(covr::file_coverage("R/GetDefaultCovariates.R", "tests/testthat/test-GetDefaultCovariates.R"))
4 |
5 | test_that("Test exit conditions", {
6 | skip_on_cran()
7 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
8 |
9 | # covariateSettings object type
10 | expect_error(getDbDefaultCovariateData(
11 | connection = eunomiaConnection,
12 | cdmDatabaseSchema = "main",
13 | covariateSettings = list(),
14 | targetDatabaseSchema = "main",
15 | targetCovariateTable = "cov",
16 | targetCovariateRefTable = "cov_ref",
17 | targetAnalysisRefTable = "cov_analysis_ref"
18 | ))
19 | # CDM 4 not supported
20 | expect_error(getDbDefaultCovariateData(
21 | connection = eunomiaConnection,
22 | cdmDatabaseSchema = "main",
23 | cdmVersion = "4",
24 | covariateSettings = createDefaultCovariateSettings(),
25 | targetDatabaseSchema = "main",
26 | targetCovariateTable = "cov",
27 | targetCovariateRefTable = "cov_ref",
28 | targetAnalysisRefTable = "cov_analysis_ref"
29 | ))
30 |
31 | # targetCovariateTable and aggregated not supported
32 | expect_error(getDbDefaultCovariateData(
33 | connection = eunomiaConnection,
34 | cdmDatabaseSchema = "main",
35 | cohortId = -1,
36 | covariateSettings = createDefaultCovariateSettings(),
37 | targetDatabaseSchema = "main",
38 | targetCovariateTable = "cov",
39 | targetCovariateRefTable = "cov_ref",
40 | targetAnalysisRefTable = "cov_analysis_ref",
41 | aggregated = TRUE
42 | ))
43 | })
44 |
45 | # AGS - This test fails and is likely due to a bug when using SqlLite
46 | # test_that("Test target table", {
47 | # connection <- DatabaseConnector::connect(connectionDetails)
48 | # Eunomia::createCohorts(connectionDetails)
49 | #
50 | # results <- getDbDefaultCovariateData(connection = connection,
51 | # cdmDatabaseSchema = "main",
52 | # cohortTable = "cohort",
53 | # covariateSettings = createDefaultCovariateSettings(),
54 | # targetDatabaseSchema = "main",
55 | # targetCovariateTable = "ut_cov",
56 | # targetCovariateRefTable = "ut_cov_ref",
57 | # targetAnalysisRefTable = "ut_cov_analysis_ref")
58 | #
59 | # on.exit(DatabaseConnector::disconnect(connection))
60 | # })
61 | #
62 | # unlink(connectionDetails$server())
63 |
--------------------------------------------------------------------------------
/tests/testthat/test-HelperFunctions.R:
--------------------------------------------------------------------------------
1 | # This file covers the code in HelperFunctions.R. View coverage for this file using
2 | # library(testthat); library(FeatureExtraction)
3 | # covr::file_report(covr::file_coverage("R/HelperFunctions.R", "tests/testthat/test-HelperFunctions.R"))
4 |
5 | test_that("Test helper functions for non-aggregated covariate data", {
6 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
7 | expect_error(filterByRowId("blah", 1), "not of class CovariateData")
8 |
9 | covariateData <- getDbCovariateData(
10 | connection = eunomiaConnection,
11 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema,
12 | cohortDatabaseSchema = eunomiaOhdsiDatabaseSchema,
13 | cohortIds = 1:2,
14 | covariateSettings = createCovariateSettings(useDemographicsAgeGroup = TRUE, useChads2Vasc = TRUE),
15 | aggregated = F
16 | )
17 |
18 | covariateDataFiltered <- filterByRowId(covariateData, rowIds = 1)
19 | expect_equal(unique(pull(covariateDataFiltered$covariates, rowId)), 1)
20 |
21 | locallyAggregated <- aggregateCovariates(covariateData)
22 | expect_error(filterByCohortDefinitionId(locallyAggregated, cohortIds = c(1)))
23 |
24 | expect_error(filterByCohortDefinitionId(covariateData, cohortIds = c(1)), "Can only filter aggregated")
25 |
26 | Andromeda::close(covariateData)
27 | expect_error(filterByRowId(covariateData, 1), "closed")
28 | })
29 |
30 | test_that("Test helper functions for aggregated covariate data", {
31 | skip_on_cran()
32 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
33 | expect_error(filterByCohortDefinitionId("blah", 1), "not of class CovariateData")
34 |
35 | aggregatedCovariateData <- getDbCovariateData(
36 | connection = eunomiaConnection,
37 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema,
38 | cohortDatabaseSchema = eunomiaOhdsiDatabaseSchema,
39 | cohortIds = 1:2,
40 | covariateSettings = createCovariateSettings(useDemographicsAgeGroup = TRUE, useChads2Vasc = TRUE),
41 | aggregated = TRUE
42 | )
43 |
44 | aggCovariateDataFiltered <- filterByCohortDefinitionId(aggregatedCovariateData, cohortIds = c(1))
45 |
46 | expect_equal(unique(pull(aggCovariateDataFiltered$covariates, cohortDefinitionId)), 1)
47 | expect_error(filterByRowId(aggregatedCovariateData, 1), "Cannot filter aggregated")
48 | Andromeda::close(aggregatedCovariateData)
49 | expect_error(filterByCohortDefinitionId(aggregatedCovariateData, cohortId = c(1)), "closed")
50 | })
51 |
--------------------------------------------------------------------------------
/tests/testthat/test-PostcoordConcepts.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(FeatureExtraction)
3 | library(dplyr)
4 |
5 | test_that("Postcoordinated concepts on Eunomia", {
6 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
7 | # eunomiaConnection <- DatabaseConnector::connect(Eunomia::getEunomiaConnectionDetails())
8 | cohort <- data.frame(
9 | cohortDefinitionId = c(1, 1, 1, 1),
10 | cohortStartDate = as.Date(c("2000-02-01", "2000-08-01", "2000-02-01", "2000-01-02")),
11 | cohortEndDate = as.Date(c("2000-02-14", "2000-09-14", "2000-02-01", "2000-01-02")),
12 | subjectId = c(1, 2, 3, 4)
13 | )
14 | DatabaseConnector::insertTable(
15 | connection = eunomiaConnection,
16 | tableName = "#pcc_cohort",
17 | data = cohort,
18 | dropTableIfExists = TRUE,
19 | tempTable = TRUE,
20 | createTable = TRUE,
21 | progressBar = FALSE,
22 | camelCaseToSnakeCase = TRUE
23 | )
24 | measurement <- data.frame(
25 | measurementId = c(0, 0, 0, 0),
26 | measurementTypeConceptId = c(0, 0, 0, 0),
27 | personId = c(1, 1, 3, 4),
28 | measurementConceptId = c(3000963, 3000963, 3000963, 3000963),
29 | valueAsConceptId = c(4083207, 4084765, 4084765, 4084765),
30 | measurementDate = as.Date(c("2000-01-14", "2000-01-01", "2000-01-14", "2000-01-01"))
31 | )
32 | DatabaseConnector::insertTable(
33 | connection = eunomiaConnection,
34 | tableName = "measurement",
35 | databaseSchema = "main",
36 | data = measurement,
37 | dropTableIfExists = FALSE,
38 | tempTable = FALSE,
39 | createTable = FALSE,
40 | progressBar = FALSE,
41 | camelCaseToSnakeCase = TRUE
42 | )
43 | settings <- createCovariateSettings(
44 | useMeasurementValueAsConceptShortTerm = TRUE,
45 | shortTermStartDays = -30
46 | )
47 |
48 | covariateData <- getDbCovariateData(
49 | connection = eunomiaConnection,
50 | cdmDatabaseSchema = "main",
51 | cohortTable = "#pcc_cohort",
52 | cohortTableIsTemp = TRUE,
53 | covariateSettings = settings
54 | )
55 | covariates <- covariateData$covariates %>%
56 | collect() %>%
57 | arrange(rowId)
58 | expect_equal(covariates$rowId, c(1, 3, 4))
59 | expect_equal(covariates$covariateId, c(583329995308716, 583329563103716, 583329563103716))
60 | expect_equal(covariates$covariateValue, c(1, 1, 1))
61 |
62 | covariateRef <- covariateData$covariateRef %>%
63 | collect() %>%
64 | arrange(covariateId)
65 | expect_equal(covariateRef$covariateId, c(583329563103716, 583329995308716))
66 | expect_equal(covariateRef$conceptId, c(3000963, 3000963))
67 | expect_equal(covariateRef$valueAsConceptId, c(4084765, 4083207))
68 |
69 | analysisRef <- covariateData$analysisRef %>%
70 | collect()
71 | expect_equal(analysisRef$analysisId, 716)
72 |
73 | # Introduce collisions
74 | measurement <- data.frame(
75 | measurementId = c(0, 0, 0, 0),
76 | measurementTypeConceptId = c(0, 0, 0, 0),
77 | personId = c(1, 1, 3, 4),
78 | measurementConceptId = c(3048564, 3048564, 40483078, 40483078),
79 | valueAsConceptId = c(4069590, 4069590, 4069590, 4069590),
80 | measurementDate = as.Date(c("2000-01-14", "2000-01-01", "2000-01-14", "2000-01-01"))
81 | )
82 | DatabaseConnector::insertTable(
83 | connection = eunomiaConnection,
84 | tableName = "measurement",
85 | databaseSchema = "main",
86 | data = measurement,
87 | dropTableIfExists = FALSE,
88 | tempTable = FALSE,
89 | createTable = FALSE,
90 | progressBar = FALSE,
91 | camelCaseToSnakeCase = TRUE
92 | )
93 | settings <- createCovariateSettings(
94 | useMeasurementValueAsConceptShortTerm = TRUE,
95 | shortTermStartDays = -30
96 | )
97 |
98 | expect_warning(
99 | {
100 | covariateData <- getDbCovariateData(
101 | connection = eunomiaConnection,
102 | cdmDatabaseSchema = "main",
103 | cohortTable = "#pcc_cohort",
104 | cohortTableIsTemp = TRUE,
105 | covariateSettings = settings
106 | )
107 | },
108 | "Collisions"
109 | )
110 | })
111 |
--------------------------------------------------------------------------------
/tests/testthat/test-PrespecAnalyses.R:
--------------------------------------------------------------------------------
1 | # This file contains tests for all the PrespecAnalyses files in the inst/csv folder.
2 |
3 | test_that("PrespecAnalyses check for uniqueness", {
4 | analysesFiles <- list.files(system.file("csv", package = "FeatureExtraction"),
5 | pattern = "^.*.Analyses*.csv$",
6 | full.names = TRUE
7 | )
8 |
9 | lapply(analysesFiles, FUN = function(filePath) {
10 | prespecAnalyses <- read.csv(filePath)
11 |
12 | expect_s3_class(prespecAnalyses, "data.frame")
13 | expect_true(all(c(
14 | "analysisId", "analysisName", "sqlFileName", "subType", "domainId",
15 | "domainTable", "domainConceptId", "domainStartDate", "domainEndDate",
16 | "isDefault", "description"
17 | ) %in% colnames(prespecAnalyses)))
18 |
19 | # analysisId should be unique as well as the combination of other columns
20 | expect_equal(length(unique(prespecAnalyses$analysisId)), length(prespecAnalyses$analysisId))
21 |
22 | prespecAnalyses <- prespecAnalyses %>%
23 | dplyr::select(-analysisId)
24 | expect_equal(nrow(unique(prespecAnalyses)), nrow(prespecAnalyses))
25 | })
26 | })
27 |
--------------------------------------------------------------------------------
/tests/testthat/test-tidyCovariates.R:
--------------------------------------------------------------------------------
1 | # View coverage for this file using
2 | # library(testthat); library(FeatureExtraction)
3 | # covr::file_report(covr::file_coverage("R/Normalization.R", "tests/testthat/test-tidyCovariates.R"))
4 |
5 | test_that("Test exit conditions ", {
6 | # Covariate Data object check
7 | expect_error(tidyCovariateData(covariateData = list()))
8 | # CovariateData object closed
9 | cvData <- FeatureExtraction::createEmptyCovariateData(
10 | cohortIds = 1,
11 | aggregated = FALSE,
12 | temporal = FALSE
13 | )
14 | Andromeda::close(cvData)
15 | expect_error(tidyCovariateData(covariateData = cvData))
16 | # CovariateData aggregated
17 | cvData <- FeatureExtraction::createEmptyCovariateData(
18 | cohortIds = 1,
19 | aggregated = TRUE,
20 | temporal = FALSE
21 | )
22 | expect_error(tidyCovariateData(covariateData = cvData))
23 | })
24 |
25 | test_that("Test empty covariateData", {
26 | cvData <- FeatureExtraction::createEmptyCovariateData(
27 | cohortIds = 1,
28 | aggregated = FALSE,
29 | temporal = FALSE
30 | )
31 | result <- tidyCovariateData(covariateData = cvData)
32 | expect_equal(length(pull(result$covariates, covariateId)), length(pull(cvData$covariates, covariateId)))
33 | })
34 |
35 | test_that("tidyCovariates works", {
36 | # Generate some data:
37 | createCovariate <- function(i, analysisId) {
38 | return(tibble(
39 | covariateId = rep(i * 1000 + analysisId, i),
40 | covariateValue = rep(1, i)
41 | ))
42 | }
43 | covariates <- lapply(1:10, createCovariate, analysisId = 1)
44 | covariates <- do.call("rbind", covariates)
45 | covariates$rowId <- 1:nrow(covariates)
46 | metaData <- list(populationSize = nrow(covariates))
47 | frequentCovariate <- createCovariate(40, analysisId = 2)
48 | frequentCovariate$rowId <- sample.int(metaData$populationSize, nrow(frequentCovariate), replace = FALSE)
49 | infrequentCovariate <- createCovariate(1, analysisId = 3)
50 | infrequentCovariate$rowId <- sample.int(metaData$populationSize, nrow(infrequentCovariate), replace = FALSE)
51 | covariates <- rbind(covariates, frequentCovariate, infrequentCovariate)
52 |
53 | covariateRef <- tibble(
54 | covariateId = c(1:10 * 1000 + 1, 40002, 1003),
55 | analysisId = c(rep(1, 10), 2, 3)
56 | )
57 |
58 | covariateData <- Andromeda::andromeda(
59 | covariates = covariates,
60 | covariateRef = covariateRef
61 | )
62 | attr(covariateData, "metaData") <- metaData
63 | class(covariateData) <- "CovariateData"
64 |
65 | tidy <- tidyCovariateData(covariateData, minFraction = 0.1, normalize = TRUE, removeRedundancy = TRUE)
66 |
67 | # Test: most prevalent covariate in analysis 1 is dropped:
68 | expect_true(nrow(filter(tidy$covariates, covariateId == 10001) %>% collect()) == 0)
69 |
70 | # Test: infrequent covariate in analysis 1 isn't dropped:
71 | expect_true(nrow(filter(tidy$covariates, covariateId == 1001) %>% collect()) != 0)
72 |
73 | # Test: infrequent covariate is dropped:
74 | expect_true(nrow(filter(tidy$covariates, covariateId == 1003) %>% collect()) == 0)
75 |
76 | # Test: frequent covariate isn't dropped:
77 | expect_true(nrow(filter(tidy$covariates, covariateId == 40002) %>% collect()) != 0)
78 | })
79 |
80 | test_that("tidyCovariateData on Temporal Data", {
81 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection"))
82 | covariateSettings <- createTemporalCovariateSettings(
83 | useDrugExposure = TRUE,
84 | temporalStartDays = -2:-1,
85 | temporalEndDays = -2:-1
86 | )
87 | covariateData <- getDbCovariateData(
88 | connection = eunomiaConnection,
89 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema,
90 | cohortIds = c(1),
91 | covariateSettings = covariateSettings
92 | )
93 | tidy <- tidyCovariateData(covariateData)
94 | expect_equal(length(pull(tidy$analysisRef, analysisId)), length(pull(covariateData$analysisRef, analysisId)))
95 | })
96 |
--------------------------------------------------------------------------------