├── .Rbuildignore ├── .classpath ├── .github ├── pull_request_template.md └── workflows │ ├── R_CMD_check_Hades.yaml │ ├── R_CMD_check_main_weekly.yaml │ └── nightly_cleanup_Hades.yml ├── .gitignore ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── CRAN-SUBMISSION ├── DESCRIPTION ├── FeatureExtraction.Rproj ├── NAMESPACE ├── NEWS.md ├── R ├── Aggregation.R ├── CompareCohorts.R ├── CovariateData.R ├── DefaultCovariateSettings.R ├── DefaultTemporalCovariateSettings.R ├── DefaultTemporalSequenceCovariateSettings.R ├── DetailedCovariateSettings.R ├── FeatureExtraction.R ├── GetCovariates.R ├── GetCovariatesFromCohortAttributes.R ├── GetCovariatesFromOtherCohorts.R ├── GetDefaultCovariates.R ├── HelperFunctions.R ├── Normalization.R ├── Table1.R └── UnitTestHelperFunctions.R ├── README.md ├── _pkgdown.yml ├── compare_versions ├── cran-comments.md ├── deploy.sh ├── docs ├── 404.html ├── articles │ ├── CreatingCovariatesBasedOnOtherCohorts.html │ ├── CreatingCovariatesUsingCohortAttributes.html │ ├── CreatingCovariatesUsingCohortAttributes_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ ├── header-attrs-2.7 │ │ │ └── header-attrs.js │ │ └── header-attrs-2.9 │ │ │ └── header-attrs.js │ ├── CreatingCustomCovariateBuilders.html │ ├── CreatingCustomCovariateBuildersKorean.html │ ├── CreatingCustomCovariateBuildersKorean_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ ├── header-attrs-2.7 │ │ │ └── header-attrs.js │ │ └── header-attrs-2.9 │ │ │ └── header-attrs.js │ ├── CreatingCustomCovariateBuilders_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ ├── header-attrs-2.7 │ │ │ └── header-attrs.js │ │ └── header-attrs-2.9 │ │ │ └── header-attrs.js │ ├── Untitled-1f21019f-1a2d-4b5e-85a6-26b6d323301d.png │ ├── Untitled-4b3d9353-8da9-49e0-967b-69bc05653585.png │ ├── Untitled-9bab0a34-d162-407b-aee0-0fc6224987b4.png │ ├── Untitled-dd762182-9d8f-4065-bddf-6282630a0f99.png │ ├── UsingFeatureExtraction.html │ ├── UsingFeatureExtractionKorean.html │ ├── UsingFeatureExtractionKorean_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ ├── header-attrs-2.7 │ │ │ └── header-attrs.js │ │ └── header-attrs-2.9 │ │ │ └── header-attrs.js │ ├── UsingFeatureExtraction_files │ │ ├── accessible-code-block-0.0.1 │ │ │ └── empty-anchor.js │ │ ├── header-attrs-2.7 │ │ │ └── header-attrs.js │ │ └── header-attrs-2.9 │ │ │ └── header-attrs.js │ └── index.html ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── index.html ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── pull_request_template.html ├── reference │ ├── CovariateData-class.html │ ├── FeatureExtraction-package.html │ ├── FeatureExtraction.html │ ├── Rplot001.png │ ├── aggregateCovariates.html │ ├── byMaxFf.html │ ├── bySumFf.html │ ├── computeStandardizedDifference.html │ ├── convertPrespecSettingsToDetailedSettings.html │ ├── createAnalysisDetails.html │ ├── createCohortAttrCovariateSettings.html │ ├── createCohortBasedCovariateSettings.html │ ├── createCohortBasedTemporalCovariateSettings.html │ ├── createCovariateSettings.html │ ├── createDefaultCovariateSettings.html │ ├── createDefaultTemporalCovariateSettings.html │ ├── createDetailedCovariateSettings.html │ ├── createDetailedTemporalCovariateSettings.html │ ├── createEmptyCovariateData.html │ ├── createHdpsCovariateSettings.html │ ├── createTable1.html │ ├── createTable1CovariateSettings.html │ ├── createTemporalCovariateSettings.html │ ├── createTemporalSequenceCovariateSettings.html │ ├── dot-createLooCovariateSettings.html │ ├── dot-getDbLooCovariateData.html │ ├── filterByCohortDefinitionId.html │ ├── filterByRowId.html │ ├── filterCovariateDataCovariates.html │ ├── getDbCohortAttrCovariatesData.html │ ├── getDbCohortBasedCovariatesData.html │ ├── getDbCovariateData.html │ ├── getDbDefaultCovariateData.html │ ├── getDbHdpsCovariateData.html │ ├── getDefaultTable1Specifications.html │ ├── index.html │ ├── isAggregatedCovariateData.html │ ├── isCovariateData.html │ ├── isTemporalCovariateData.html │ ├── loadCovariateData.html │ ├── saveCovariateData.html │ └── tidyCovariateData.html └── sitemap.xml ├── extras ├── CohortBasedCovariatesVignetteDataFetch.R ├── DefaultCovariateSettingsTemplate.R ├── DetailedCovariateSettingsTemplate.R ├── FeatureExtraction.pdf ├── GetHdpsCovariates.R ├── PackageMaintenance.R ├── TestCode.R ├── TestHashForPostcoordinatedConcepts.R ├── VignetteDataFetch.R └── uniquePcCombos.rds ├── inst ├── csv │ ├── OtherParameters.csv │ ├── OtherSqlToLoad.csv │ ├── PrespecAnalyses.csv │ ├── PrespecTemporalAnalyses.csv │ ├── PrespecTemporalAnnualAnalysis.csv │ ├── PrespecTemporalSequenceAnalyses.csv │ ├── Table1Specs.csv │ └── jarChecksum.txt ├── doc │ ├── CreatingCovariatesBasedOnOtherCohorts.pdf │ ├── CreatingCovariatesUsingCohortAttributes.pdf │ ├── CreatingCustomCovariateBuilders.pdf │ ├── CreatingCustomCovariateBuildersKorean.pdf │ ├── UsingFeatureExtraction.pdf │ └── UsingFeatureExtractionKorean.pdf ├── java │ ├── SqlRender-1.19.1.jar │ ├── featureExtraction-3.10.0.jar │ └── json-20231013.jar ├── sql │ └── sql_server │ │ ├── CareSite.sql │ │ ├── Chads2.sql │ │ ├── Chads2Vasc.sql │ │ ├── CharlsonIndex.sql │ │ ├── CohortBasedBinaryCovariates.sql │ │ ├── CohortBasedCountCovariates.sql │ │ ├── ConceptCounts.sql │ │ ├── CreateCovAnalysisRefTables.sql │ │ ├── Dcsi.sql │ │ ├── DemographicsAge.sql │ │ ├── DemographicsAgeGroup.sql │ │ ├── DemographicsEthnicity.sql │ │ ├── DemographicsGender.sql │ │ ├── DemographicsMonth.sql │ │ ├── DemographicsRace.sql │ │ ├── DemographicsTime.sql │ │ ├── DemographicsYear.sql │ │ ├── DemographicsYearMonth.sql │ │ ├── DomainConcept.sql │ │ ├── DomainConceptGroup.sql │ │ ├── GetAttrCovariates.sql │ │ ├── GetHdpsCovariates.sql │ │ ├── Hfrs.sql │ │ ├── IncludeDescendants.sql │ │ ├── MeasObsValueAsConcept.sql │ │ ├── MeasurementRangeGroup.sql │ │ ├── MeasurementValue.sql │ │ ├── RemoveCovariateTempTables.sql │ │ ├── covariateCohorts.sql │ │ └── unit_tests │ │ ├── createTestingData.sql │ │ └── dropTestingData.sql └── testdata │ ├── binaryCovariateData.zip │ └── continuousCovariateData.zip ├── java ├── FeatureExtraction.jardesc └── org │ └── ohdsi │ └── featureExtraction │ ├── FeatureExtraction.java │ ├── JarChecksum.java │ ├── ReadCSVFile.java │ ├── ReadCSVFileWithHeader.java │ ├── Row.java │ └── StringUtilities.java ├── man-roxygen └── GetCovarParams.R ├── man ├── CovariateData-class.Rd ├── FeatureExtraction-package.Rd ├── aggregateCovariates.Rd ├── computeStandardizedDifference.Rd ├── convertPrespecSettingsToDetailedSettings.Rd ├── createAnalysisDetails.Rd ├── createCohortAttrCovariateSettings.Rd ├── createCohortBasedCovariateSettings.Rd ├── createCohortBasedTemporalCovariateSettings.Rd ├── createCovariateSettings.Rd ├── createDefaultCovariateSettings.Rd ├── createDefaultTemporalCovariateSettings.Rd ├── createDetailedCovariateSettings.Rd ├── createDetailedTemporalCovariateSettings.Rd ├── createEmptyCovariateData.Rd ├── createTable1.Rd ├── createTable1CovariateSettings.Rd ├── createTemporalCovariateSettings.Rd ├── createTemporalSequenceCovariateSettings.Rd ├── dot-createLooCovariateSettings.Rd ├── dot-getDbLooCovariateData.Rd ├── filterByCohortDefinitionId.Rd ├── filterByRowId.Rd ├── getDbCohortAttrCovariatesData.Rd ├── getDbCohortBasedCovariatesData.Rd ├── getDbCovariateData.Rd ├── getDbDefaultCovariateData.Rd ├── getDefaultTable1Specifications.Rd ├── isAggregatedCovariateData.Rd ├── isCovariateData.Rd ├── isTemporalCovariateData.Rd ├── loadCovariateData.Rd ├── saveCovariateData.Rd └── tidyCovariateData.Rd ├── nbactions.xml ├── pom.xml ├── tests ├── testBigQuery.R ├── testOracle.R ├── testPostgres.R ├── testRedshift.R ├── testSnowflake.R ├── testSpark.R ├── testSqlServer.R ├── testSqlite.R └── testthat │ ├── setup.R │ ├── test-Aggregation.R │ ├── test-CompareCohorts.R │ ├── test-CovariateData.R │ ├── test-DetailedCovariateSettings.R │ ├── test-FeatureExtractionInternal.R │ ├── test-GetCohortBasedCovariates.R │ ├── test-GetCovariates.R │ ├── test-GetCovariatesFromCohortAttributes.R │ ├── test-GetCovariatesTemporalSequence.R │ ├── test-GetDefaultCovariates.R │ ├── test-HelperFunctions.R │ ├── test-PostcoordConcepts.R │ ├── test-PrespecAnalyses.R │ ├── test-Table1.R │ ├── test-query-no-fail.R │ ├── test-spot-checks.R │ └── test-tidyCovariates.R └── vignettes ├── CreatingCovariatesBasedOnOtherCohorts.Rmd ├── CreatingCovariatesUsingCohortAttributes.Rmd ├── CreatingCustomCovariateBuilders.Rmd ├── CreatingCustomCovariateBuildersKorean.Rmd ├── UsingFeatureExtraction.Rmd └── UsingFeatureExtractionKorean.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | pom.xml 2 | extras 3 | docs 4 | man-roxygen 5 | ^.*\.Rproj$ 6 | ^\.Rproj\.user$ 7 | .classpath 8 | .project 9 | .github 10 | ^\.travis\.yml$ 11 | deploy.sh 12 | ^\.git 13 | compare_versions 14 | _pkgdown.yml 15 | nbactions.xml 16 | ^CRAN-SUBMISSION$ 17 | ^cran-comments\.md$ 18 | -------------------------------------------------------------------------------- /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Before you do a pull request, you should always **file an issue** and make sure the package maintainer agrees that it’s a problem, and is happy with your basic proposal for fixing it. We don’t want you to spend a bunch of time on something that we don’t think is a good idea. 2 | 3 | Additional requirements for pull requests: 4 | 5 | - Adhere to the [Developer Guidelines](https://ohdsi.github.io/MethodsLibrary/developerGuidelines.html) as well as the [OHDSI Code Style](https://ohdsi.github.io/MethodsLibrary/codeStyle.html). 6 | 7 | - If possible, add unit tests for new functionality you add. 8 | 9 | - Restrict your pull request to solving the issue at hand. Do not try to 'improve' parts of the code that are not related to the issue. If you feel other parts of the code need better organization, create a separate issue for that. 10 | 11 | - Make sure you pass R check without errors and warnings before submitting. 12 | 13 | - Always target the `develop` branch, and make sure you are up-to-date with the develop branch. 14 | 15 | -------------------------------------------------------------------------------- /.github/workflows/R_CMD_check_main_weekly.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | schedule: 3 | - cron: '0 5 * * 0' # every Sunday at 5am UTC 4 | 5 | name: 'R check' 6 | 7 | jobs: 8 | R-CMD-check-main: 9 | runs-on: ${{ matrix.config.os }} 10 | 11 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 12 | 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | config: 17 | - {os: macOS-latest, r: 'release'} 18 | 19 | env: 20 | GITHUB_PAT: ${{ secrets.GH_TOKEN }} 21 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 22 | RSPM: ${{ matrix.config.rspm }} 23 | CDM5_ORACLE_CDM_SCHEMA: ${{ secrets.CDM5_ORACLE_CDM54_SCHEMA }} 24 | CDM5_ORACLE_OHDSI_SCHEMA: ${{ secrets.CDM5_ORACLE_OHDSI_SCHEMA }} 25 | CDM5_ORACLE_PASSWORD: ${{ secrets.CDM5_ORACLE_PASSWORD }} 26 | CDM5_ORACLE_SERVER: ${{ secrets.CDM5_ORACLE_SERVER }} 27 | CDM5_ORACLE_USER: ${{ secrets.CDM5_ORACLE_USER }} 28 | CDM5_POSTGRESQL_CDM_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_CDM54_SCHEMA }} 29 | CDM5_POSTGRESQL_OHDSI_SCHEMA: ${{ secrets.CDM5_POSTGRESQL_OHDSI_SCHEMA }} 30 | CDM5_POSTGRESQL_PASSWORD: ${{ secrets.CDM5_POSTGRESQL_PASSWORD }} 31 | CDM5_POSTGRESQL_SERVER: ${{ secrets.CDM5_POSTGRESQL_SERVER }} 32 | CDM5_POSTGRESQL_USER: ${{ secrets.CDM5_POSTGRESQL_USER }} 33 | CDM5_SQL_SERVER_CDM_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_CDM54_SCHEMA }} 34 | CDM5_SQL_SERVER_OHDSI_SCHEMA: ${{ secrets.CDM5_SQL_SERVER_OHDSI_SCHEMA }} 35 | CDM5_SQL_SERVER_PASSWORD: ${{ secrets.CDM5_SQL_SERVER_PASSWORD }} 36 | CDM5_SQL_SERVER_SERVER: ${{ secrets.CDM5_SQL_SERVER_SERVER }} 37 | CDM5_SQL_SERVER_USER: ${{ secrets.CDM5_SQL_SERVER_USER }} 38 | CDM5_REDSHIFT_CDM_SCHEMA: ${{ secrets.CDM5_REDSHIFT_CDM54_SCHEMA }} 39 | CDM5_REDSHIFT_OHDSI_SCHEMA: ${{ secrets.CDM5_REDSHIFT_OHDSI_SCHEMA }} 40 | CDM5_REDSHIFT_PASSWORD: ${{ secrets.CDM5_REDSHIFT_PASSWORD }} 41 | CDM5_REDSHIFT_SERVER: ${{ secrets.CDM5_REDSHIFT_SERVER }} 42 | CDM5_REDSHIFT_USER: ${{ secrets.CDM5_REDSHIFT_USER }} 43 | CDM5_SPARK_USER: ${{ secrets.CDM5_SPARK_USER }} 44 | CDM5_SPARK_PASSWORD: ${{ secrets.CDM5_SPARK_PASSWORD }} 45 | CDM5_SPARK_CONNECTION_STRING: ${{ secrets.CDM5_SPARK_CONNECTION_STRING }} 46 | 47 | steps: 48 | - uses: actions/checkout@v3 49 | 50 | - uses: r-lib/actions/setup-r@v2 51 | with: 52 | r-version: ${{ matrix.config.r }} 53 | 54 | - uses: r-lib/actions/setup-tinytex@v2 55 | 56 | - uses: r-lib/actions/setup-pandoc@v2 57 | 58 | - uses: r-lib/actions/setup-r-dependencies@v2 59 | with: 60 | extra-packages: any::rcmdcheck 61 | needs: check 62 | 63 | - uses: r-lib/actions/check-r-package@v2 64 | with: 65 | args: 'c("--no-manual", "--as-cran")' 66 | error-on: '"warning"' 67 | check-dir: '"check"' 68 | -------------------------------------------------------------------------------- /.github/workflows/nightly_cleanup_Hades.yml: -------------------------------------------------------------------------------- 1 | name: 'nightly artifacts cleanup' 2 | on: 3 | schedule: 4 | - cron: '0 1 * * *' # every night at 1 am UTC 5 | 6 | jobs: 7 | remove-old-artifacts: 8 | runs-on: ubuntu-latest 9 | timeout-minutes: 10 10 | 11 | steps: 12 | - name: Remove old artifacts 13 | uses: c-hive/gha-remove-artifacts@v1 14 | with: 15 | age: '7 days' 16 | # Optional inputs 17 | # skip-tags: true 18 | skip-recent: 1 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | 4 | # Example code in package build process 5 | *-Ex.R 6 | 7 | # R data files from past sessions 8 | .Rdata 9 | 10 | # RStudio files 11 | .Rproj.user/ 12 | .Rproj.user 13 | 14 | # SqlRender 15 | statement_*.sql 16 | errorReport.txt 17 | 18 | #C++ objects 19 | src/*.o 20 | src/*.so 21 | src/*.dll 22 | /Debug 23 | standalone/build/* 24 | 25 | # Java compiled files 26 | /bin 27 | 28 | /target/ 29 | 30 | *.tex 31 | *.log 32 | .Renviron -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | FeatureExtraction 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | org.eclipse.m2e.core.maven2Builder 15 | 16 | 17 | 18 | 19 | 20 | org.eclipse.m2e.core.maven2Nature 21 | org.eclipse.jdt.core.javanature 22 | 23 | 24 | 25 | 1628619985913 26 | 27 | 30 28 | 29 | org.eclipse.core.resources.regexFilterMatcher 30 | node_modules|.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__ 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.8 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled 12 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 13 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning 14 | org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore 15 | org.eclipse.jdt.core.compiler.processAnnotations=disabled 16 | org.eclipse.jdt.core.compiler.release=disabled 17 | org.eclipse.jdt.core.compiler.source=1.8 18 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 3.8.0 2 | Date: 2025-03-19 15:22:25 UTC 3 | SHA: c0961a155c6fba22f3b5e4825b599f2410ed529b 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: FeatureExtraction 2 | Type: Package 3 | Title: Generating Features for a Cohort 4 | Version: 3.10.0 5 | Date: 2025-05-08 6 | Authors@R: c( 7 | person("Martijn", "Schuemie", , "schuemie@ohdsi.org", role = c("aut")), 8 | person("Marc", "Suchard", role = c("aut")), 9 | person("Patrick", "Ryan", role = c("aut")), 10 | person("Jenna", "Reps", role = c("aut")), 11 | person("Anthony", "Sena", , "sena@ohdsi.org", role = c("aut")), 12 | person("Ger", "Inberg", , "g.inberg@erasmusmc.nl", role = c("aut", "cre")), 13 | person("Observational Health Data Science and Informatics", role = c("cph")) 14 | ) 15 | Maintainer: Ger Inberg 16 | Description: An R interface for generating features for a cohort using data in the Common Data Model. Features can be constructed using default or custom made feature definitions. Furthermore it's possible to aggregate features and get the summary statistics. 17 | Depends: 18 | R (>= 3.2.2), 19 | DatabaseConnector (>= 3.0.0), 20 | Andromeda (>= 1.0.0) 21 | Imports: 22 | methods, 23 | dplyr, 24 | rJava, 25 | jsonlite, 26 | SqlRender (>= 1.18.0), 27 | ParallelLogger (>= 2.0.2), 28 | cli, 29 | pillar, 30 | readr, 31 | rlang, 32 | RSQLite, 33 | DBI, 34 | checkmate, 35 | vroom 36 | Suggests: 37 | testthat, 38 | knitr, 39 | rmarkdown, 40 | Eunomia (>= 2.0.0), 41 | withr, 42 | curl, 43 | httr 44 | License: Apache License 2.0 45 | VignetteBuilder: knitr 46 | URL: https://github.com/OHDSI/FeatureExtraction 47 | BugReports: https://github.com/OHDSI/FeatureExtraction/issues 48 | NeedsCompilation: no 49 | RoxygenNote: 7.3.2 50 | Encoding: UTF-8 51 | Language: en-US 52 | -------------------------------------------------------------------------------- /FeatureExtraction.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: 7209d25a-3fa0-4681-8605-9ab497fb422c 3 | 4 | RestoreWorkspace: No 5 | SaveWorkspace: No 6 | AlwaysSaveHistory: No 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | 16 | BuildType: Package 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageCheckArgs: --no-build-vignettes 19 | PackageRoxygenize: rd,collate,namespace 20 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,summary.CovariateData) 4 | export(aggregateCovariates) 5 | export(computeStandardizedDifference) 6 | export(convertPrespecSettingsToDetailedSettings) 7 | export(createAnalysisDetails) 8 | export(createCohortAttrCovariateSettings) 9 | export(createCohortBasedCovariateSettings) 10 | export(createCohortBasedTemporalCovariateSettings) 11 | export(createCovariateSettings) 12 | export(createDefaultCovariateSettings) 13 | export(createDefaultTemporalCovariateSettings) 14 | export(createDetailedCovariateSettings) 15 | export(createDetailedTemporalCovariateSettings) 16 | export(createEmptyCovariateData) 17 | export(createTable1) 18 | export(createTable1CovariateSettings) 19 | export(createTemporalCovariateSettings) 20 | export(createTemporalSequenceCovariateSettings) 21 | export(filterByCohortDefinitionId) 22 | export(filterByRowId) 23 | export(getDbCohortAttrCovariatesData) 24 | export(getDbCohortBasedCovariatesData) 25 | export(getDbCovariateData) 26 | export(getDbDefaultCovariateData) 27 | export(getDefaultTable1Specifications) 28 | export(isAggregatedCovariateData) 29 | export(isCovariateData) 30 | export(isTemporalCovariateData) 31 | export(loadCovariateData) 32 | export(saveCovariateData) 33 | export(tidyCovariateData) 34 | exportClasses(CovariateData) 35 | exportMethods(show) 36 | exportMethods(summary) 37 | import(Andromeda) 38 | import(DatabaseConnector) 39 | import(dplyr) 40 | importClassesFrom(DBI,DBIConnection) 41 | importClassesFrom(DBI,DBIObject) 42 | importClassesFrom(RSQLite,SQLiteConnection) 43 | importFrom(SqlRender,loadRenderTranslateSql) 44 | importFrom(SqlRender,render) 45 | importFrom(SqlRender,translate) 46 | importFrom(methods,is) 47 | importFrom(rlang,.data) 48 | importFrom(stats,aggregate) 49 | importFrom(stats,quantile) 50 | importFrom(stats,sd) 51 | importFrom(utils,read.csv) 52 | -------------------------------------------------------------------------------- /R/FeatureExtraction.R: -------------------------------------------------------------------------------- 1 | # @file FeatureExtraction.R 2 | # 3 | # Copyright 2025 Observational Health Data Sciences and Informatics 4 | # 5 | # This file is part of FeatureExtraction 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | #' @keywords internal 20 | "_PACKAGE" 21 | 22 | #' @importFrom SqlRender loadRenderTranslateSql translate render 23 | #' @importFrom methods is 24 | #' @importFrom utils read.csv 25 | #' @importFrom stats aggregate quantile sd 26 | #' @importFrom rlang .data 27 | #' @import DatabaseConnector 28 | #' @import dplyr 29 | NULL 30 | 31 | .onLoad <- function(libname, pkgname) { 32 | rJava::.jpackage(pkgname, lib.loc = libname) 33 | 34 | # Verify checksum of JAR: 35 | storedChecksum <- scan(file = system.file("csv", "jarChecksum.txt", package = "FeatureExtraction"), what = character(), quiet = TRUE) 36 | computedChecksum <- tryCatch(rJava::J("org.ohdsi.featureExtraction.JarChecksum", "computeJarChecksum"), 37 | error = function(e) { 38 | warning("Problem connecting to Java. This is normal when runing roxygen.") 39 | return("") 40 | } 41 | ) 42 | if (computedChecksum != "" && (storedChecksum != computedChecksum)) { 43 | warning("Java library version does not match R package version! Please try reinstalling the FeatureExtraction package. 44 | Make sure to close all instances of R, and open only one instance before reinstalling. Also make sure your 45 | R workspace is not reloaded on startup. Delete your .Rdata file if necessary") 46 | } 47 | } 48 | 49 | .toJson <- function(object) { 50 | return(as.character(jsonlite::toJSON(object, force = TRUE, auto_unbox = TRUE))) 51 | } 52 | 53 | .fromJson <- function(json) { 54 | return(jsonlite::fromJSON(json, simplifyVector = TRUE, simplifyDataFrame = FALSE)) 55 | } 56 | -------------------------------------------------------------------------------- /R/HelperFunctions.R: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Observational Health Data Sciences and Informatics 2 | # 3 | # This file is part of FeatureExtraction 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | #' Filter covariates by row ID 18 | #' 19 | #' @param covariateData An object of type \code{CovariateData} 20 | #' @param rowIds A vector containing the rowIds to keep. 21 | #' 22 | #' @return 23 | #' An object of type \code{covariateData}. 24 | #' 25 | #' @examples 26 | #' \donttest{ 27 | #' covariateData <- FeatureExtraction::createEmptyCovariateData( 28 | #' cohortIds = 1, 29 | #' aggregated = FALSE, 30 | #' temporal = FALSE 31 | #' ) 32 | #' 33 | #' covData <- filterByRowId( 34 | #' covariateData = covariateData, 35 | #' rowIds = 1 36 | #' ) 37 | #' } 38 | #' 39 | #' @export 40 | filterByRowId <- function(covariateData, rowIds) { 41 | if (!isCovariateData(covariateData)) { 42 | stop("Data not of class CovariateData") 43 | } 44 | if (!Andromeda::isValidAndromeda(covariateData)) { 45 | stop("CovariateData object is closed") 46 | } 47 | if (isAggregatedCovariateData(covariateData)) { 48 | stop("Cannot filter aggregated data by rowId") 49 | } 50 | 51 | covariates <- covariateData$covariates %>% 52 | filter(.data$rowId %in% rowIds) 53 | 54 | result <- Andromeda::andromeda( 55 | covariates = covariates, 56 | covariateRef = covariateData$covariateRef, 57 | analysisRef = covariateData$analysisRef 58 | ) 59 | metaData <- attr(covariateData, "metaData") 60 | metaData$populationSize <- length(rowIds) 61 | attr(result, "metaData") <- metaData 62 | class(result) <- "CovariateData" 63 | return(result) 64 | } 65 | 66 | #' Filter covariates by cohort definition IDs 67 | #' 68 | #' @param covariateData An object of type \code{CovariateData} 69 | #' @param cohortId DEPRECATED The cohort definition IDs to keep. 70 | #' @param cohortIds The cohort definition IDs to keep. 71 | #' 72 | #' @return 73 | #' An object of type \code{covariateData}. 74 | #' 75 | #' @examples 76 | #' \donttest{ 77 | #' covariateData <- FeatureExtraction::createEmptyCovariateData( 78 | #' cohortIds = c(1, 2), 79 | #' aggregated = TRUE, 80 | #' temporal = FALSE 81 | #' ) 82 | #' 83 | #' covData <- filterByCohortDefinitionId( 84 | #' covariateData = covariateData, 85 | #' cohortIds = c(1) 86 | #' ) 87 | #' } 88 | #' 89 | #' @export 90 | filterByCohortDefinitionId <- function(covariateData, 91 | cohortId = 1, 92 | cohortIds = c(1)) { 93 | if (!isCovariateData(covariateData)) { 94 | stop("Data not of class CovariateData") 95 | } 96 | if (!Andromeda::isValidAndromeda(covariateData)) { 97 | stop("CovariateData object is closed") 98 | } 99 | if (!isAggregatedCovariateData(covariateData)) { 100 | stop("Can only filter aggregated data by cohortIds") 101 | } 102 | if (!missing(cohortId)) { 103 | warning("cohortId argument has been deprecated, please use cohortIds") 104 | cohortIds <- cohortId 105 | } 106 | 107 | if (is.null(covariateData$covariates)) { 108 | covariates <- NULL 109 | } else { 110 | covariates <- covariateData$covariates %>% 111 | filter(.data$cohortDefinitionId %in% cohortIds) 112 | } 113 | if (is.null(covariateData$covariatesContinuous)) { 114 | covariatesContinuous <- NULL 115 | } else { 116 | covariatesContinuous <- covariateData$covariatesContinuous %>% 117 | filter(.data$cohortDefinitionId %in% cohortIds) 118 | } 119 | result <- Andromeda::andromeda( 120 | covariates = covariates, 121 | covariatesContinuous = covariatesContinuous, 122 | covariateRef = covariateData$covariateRef, 123 | analysisRef = covariateData$analysisRef 124 | ) 125 | metaData <- attr(covariateData, "metaData") 126 | metaData$populationSize <- metaData$populationSize[as.numeric(names(metaData$populationSize)) %in% cohortIds] 127 | attr(result, "metaData") <- metaData 128 | class(result) <- "CovariateData" 129 | attr(class(result), "package") <- "FeatureExtraction" 130 | return(result) 131 | } 132 | 133 | .assertCovariateId <- function(covariateId, len = NULL, min.len = NULL, null.ok = FALSE, add = NULL) { 134 | checkmate::assertNumeric(covariateId, null.ok = null.ok, len = len, min.len = 1, add = add) 135 | if (!is.null(covariateId)) { 136 | message <- sprintf( 137 | "Variable '%s' is a (64-bit) integer", 138 | paste0(deparse(eval.parent(substitute(substitute(covariateId))), width.cutoff = 500L), collapse = "\n") 139 | ) 140 | checkmate::assertTRUE(all(covariateId == round(covariateId)), .var.name = message, add = add) 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | FeatureExtraction 2 | ================= 3 | 4 | [![Build Status](https://github.com/OHDSI/FeatureExtraction/workflows/R-CMD-check/badge.svg)](https://github.com/OHDSI/FeatureExtraction/actions?query=workflow%3AR-CMD-check) 5 | [![codecov.io](https://codecov.io/github/OHDSI/FeatureExtraction/coverage.svg?branch=main)](https://app.codecov.io/github/OHDSI/FeatureExtraction?branch=main) 6 | [![CRAN status](https://www.r-pkg.org/badges/version/FeatureExtraction)](https://CRAN.R-project.org/package=FeatureExtraction) 7 | 8 | FeatureExtraction is part of [HADES](https://ohdsi.github.io/Hades/). 9 | 10 | Introduction 11 | ============ 12 | An R package for generating features (covariates) for a cohort using data in the Common Data Model. 13 | 14 | Features 15 | ======== 16 | - Takes a cohort as input. 17 | - Generates baseline features for that cohort. 18 | - Default covariates include all drugs, diagnoses, procedures, as well as age, comorbidity indexes, etc. 19 | - Support for creating custom covariates. 20 | - Generate paper-ready summary table of select population characteristics. 21 | 22 | Technology 23 | ========== 24 | FeatureExtraction is an R package, with some functions implemented in C++. 25 | 26 | System Requirements 27 | =================== 28 | Requires R (version 3.2.2 or higher). Installation on Windows requires [RTools](https://cran.r-project.org/bin/windows/Rtools/). FeatureExtraction require Java. 29 | 30 | Getting Started 31 | =============== 32 | 1. See the instructions [here](https://ohdsi.github.io/Hades/rSetup.html) for configuring your R environment, including RTools and Java. 33 | 34 | 3. In R, use the following commands to download and install FeatureExtraction: 35 | 36 | ```r 37 | install.packages("drat") 38 | drat::addRepo("OHDSI") 39 | install.packages("FeatureExtraction") 40 | ``` 41 | 42 | User Documentation 43 | ================== 44 | The documentation website can be found at [https://ohdsi.github.io/FeatureExtraction/](https://ohdsi.github.io/FeatureExtraction/). PDF versions of the vignettes and package manual are here: 45 | 46 | * Vignette: [Using FeatureExtraction](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/UsingFeatureExtraction.pdf) 47 | * Vignette: [Creating covariates using cohort attributes](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/CreatingCovariatesUsingCohortAttributes.pdf) 48 | * Vignette: [Creating custom covariate builders](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/CreatingCustomCovariateBuilders.pdf) 49 | * Vignette: [Creating covariates based on other cohorts](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/CreatingCovariatesBasedOnOtherCohorts.pdf) 50 | * Package manual: [FeatureExtraction manual](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/extras/FeatureExtraction.pdf) 51 | 52 | These vignettes are also available in Korean: 53 | 54 | * Vignette: [Using FeatureExtraction](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/UsingFeatureExtractionKorean.pdf) 55 | * Vignette: [Creating custom covariate builders](https://raw.githubusercontent.com/OHDSI/FeatureExtraction/main/inst/doc/CreatingCustomCovariateBuildersKorean.pdf) 56 | 57 | 58 | Support 59 | ======= 60 | * Developer questions/comments/feedback: OHDSI Forum 61 | * We use the GitHub issue tracker for all bugs/issues/enhancements 62 | 63 | Contributing 64 | ============ 65 | Read [here](https://ohdsi.github.io/Hades/contribute.html) how you can contribute to this package. 66 | 67 | License 68 | ======= 69 | FeatureExtraction is licensed under Apache License 2.0 70 | 71 | Development 72 | =========== 73 | FeatureExtraction is being developed in R Studio. 74 | 75 | ### Development status 76 | 77 | Ready for use 78 | 79 | # Acknowledgements 80 | - This project is supported in part through the National Science Foundation grant IIS 1251151. 81 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | template: 2 | params: 3 | bootswatch: cosmo 4 | 5 | home: 6 | links: 7 | - text: Ask a question 8 | href: http://forums.ohdsi.org 9 | 10 | navbar: 11 | structure: 12 | right: [hades, github] 13 | components: 14 | hades: 15 | text: hadesLogo 16 | href: https://ohdsi.github.io/Hades 17 | -------------------------------------------------------------------------------- /compare_versions: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | open(R_VERSION, "grep 'Version' DESCRIPTION |"); 4 | $version = ; 5 | close(R_VERSION); 6 | 7 | $version =~ /(\d+)\.(\d+)\.(\d+)/; 8 | $r_major = $1; 9 | $r_minor = $2; 10 | $r_mod = $3; 11 | 12 | open(GIT_VERSION, "git describe --tags |"); 13 | $git = ; 14 | close(GIT_VERSION); 15 | 16 | $git =~ /v(\d+)\.(\d+)\.(\d+)/; 17 | $git_major = $1; 18 | $git_minor = $2; 19 | $git_mod = $3; 20 | 21 | if ($r_major > $git_major || $r_minor > $git_minor || $r_mod > $git_mod) { 22 | $new_version = "v$r_major.$r_minor.$r_mod"; 23 | } else { 24 | $new_version = ""; 25 | } 26 | 27 | print($new_version); 28 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 1 note 4 | 5 | * This is a new release. 6 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit -o nounset 3 | addToDrat(){ 4 | PKG_REPO=$PWD 5 | 6 | ## Build package tar ball 7 | export PKG_TARBALL=$(ls *.tar.gz) 8 | 9 | cd ..; mkdir drat; cd drat 10 | 11 | ## Set up Repo parameters 12 | git init 13 | git config user.name "Martijn Schuemie" 14 | git config user.email "schuemie@ohdsi.org" 15 | git config --global push.default simple 16 | 17 | ## Get drat repo 18 | git remote add upstream "https://$GH_TOKEN@github.com/OHDSI/drat.git" 19 | git fetch upstream 2>err.txt 20 | git checkout gh-pages 21 | 22 | ## Link to local R packages 23 | echo 'R_LIBS=~/Rlib' > .Renviron 24 | 25 | Rscript -e "drat::insertPackage('$PKG_REPO/$PKG_TARBALL', \ 26 | repodir = '.', \ 27 | commit='GitHub Actions release: $PKG_TARBALL run $GITHUB_RUN_ID')" 28 | git push 29 | 30 | } 31 | addToDrat 32 | -------------------------------------------------------------------------------- /docs/articles/CreatingCovariatesUsingCohortAttributes_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/CreatingCovariatesUsingCohortAttributes_files/header-attrs-2.7/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/CreatingCovariatesUsingCohortAttributes_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/CreatingCustomCovariateBuildersKorean_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/CreatingCustomCovariateBuildersKorean_files/header-attrs-2.7/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/CreatingCustomCovariateBuildersKorean_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/CreatingCustomCovariateBuilders_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/CreatingCustomCovariateBuilders_files/header-attrs-2.7/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/CreatingCustomCovariateBuilders_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/Untitled-1f21019f-1a2d-4b5e-85a6-26b6d323301d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/docs/articles/Untitled-1f21019f-1a2d-4b5e-85a6-26b6d323301d.png -------------------------------------------------------------------------------- /docs/articles/Untitled-4b3d9353-8da9-49e0-967b-69bc05653585.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/docs/articles/Untitled-4b3d9353-8da9-49e0-967b-69bc05653585.png -------------------------------------------------------------------------------- /docs/articles/Untitled-9bab0a34-d162-407b-aee0-0fc6224987b4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/docs/articles/Untitled-9bab0a34-d162-407b-aee0-0fc6224987b4.png -------------------------------------------------------------------------------- /docs/articles/Untitled-dd762182-9d8f-4065-bddf-6282630a0f99.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/docs/articles/Untitled-dd762182-9d8f-4065-bddf-6282630a0f99.png -------------------------------------------------------------------------------- /docs/articles/UsingFeatureExtractionKorean_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/UsingFeatureExtractionKorean_files/header-attrs-2.7/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/UsingFeatureExtractionKorean_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/UsingFeatureExtraction_files/accessible-code-block-0.0.1/empty-anchor.js: -------------------------------------------------------------------------------- 1 | // Hide empty tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) --> 2 | // v0.0.1 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020. 4 | 5 | document.addEventListener('DOMContentLoaded', function() { 6 | const codeList = document.getElementsByClassName("sourceCode"); 7 | for (var i = 0; i < codeList.length; i++) { 8 | var linkList = codeList[i].getElementsByTagName('a'); 9 | for (var j = 0; j < linkList.length; j++) { 10 | if (linkList[j].innerHTML === "") { 11 | linkList[j].setAttribute('aria-hidden', 'true'); 12 | } 13 | } 14 | } 15 | }); 16 | -------------------------------------------------------------------------------- /docs/articles/UsingFeatureExtraction_files/header-attrs-2.7/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/UsingFeatureExtraction_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('[data-toggle="tooltip"]').tooltip(); 13 | 14 | var cur_path = paths(location.pathname); 15 | var links = $("#navbar ul li a"); 16 | var max_length = -1; 17 | var pos = -1; 18 | for (var i = 0; i < links.length; i++) { 19 | if (links[i].getAttribute("href") === "#") 20 | continue; 21 | // Ignore external links 22 | if (links[i].host !== location.host) 23 | continue; 24 | 25 | var nav_path = paths(links[i].pathname); 26 | 27 | var length = prefix_length(nav_path, cur_path); 28 | if (length > max_length) { 29 | max_length = length; 30 | pos = i; 31 | } 32 | } 33 | 34 | // Add class to parent
  • , and enclosing
  • if in dropdown 35 | if (pos >= 0) { 36 | var menu_anchor = $(links[pos]); 37 | menu_anchor.parent().addClass("active"); 38 | menu_anchor.closest("li.dropdown").addClass("active"); 39 | } 40 | }); 41 | 42 | function paths(pathname) { 43 | var pieces = pathname.split("/"); 44 | pieces.shift(); // always starts with / 45 | 46 | var end = pieces[pieces.length - 1]; 47 | if (end === "index.html" || end === "") 48 | pieces.pop(); 49 | return(pieces); 50 | } 51 | 52 | // Returns -1 if not found 53 | function prefix_length(needle, haystack) { 54 | if (needle.length > haystack.length) 55 | return(-1); 56 | 57 | // Special case for length-0 haystack, since for loop won't run 58 | if (haystack.length === 0) { 59 | return(needle.length === 0 ? 0 : -1); 60 | } 61 | 62 | for (var i = 0; i < haystack.length; i++) { 63 | if (needle[i] != haystack[i]) 64 | return(i); 65 | } 66 | 67 | return(haystack.length); 68 | } 69 | 70 | /* Clipboard --------------------------*/ 71 | 72 | function changeTooltipMessage(element, msg) { 73 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 74 | element.setAttribute('data-original-title', msg); 75 | $(element).tooltip('show'); 76 | element.setAttribute('data-original-title', tooltipOriginalTitle); 77 | } 78 | 79 | if(ClipboardJS.isSupported()) { 80 | $(document).ready(function() { 81 | var copyButton = ""; 82 | 83 | $("div.sourceCode").addClass("hasCopyButton"); 84 | 85 | // Insert copy buttons: 86 | $(copyButton).prependTo(".hasCopyButton"); 87 | 88 | // Initialize tooltips: 89 | $('.btn-copy-ex').tooltip({container: 'body'}); 90 | 91 | // Initialize clipboard: 92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 93 | text: function(trigger) { 94 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 95 | } 96 | }); 97 | 98 | clipboardBtnCopies.on('success', function(e) { 99 | changeTooltipMessage(e.trigger, 'Copied!'); 100 | e.clearSelection(); 101 | }); 102 | 103 | clipboardBtnCopies.on('error', function() { 104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 105 | }); 106 | }); 107 | } 108 | })(window.jQuery || window.$) 109 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: '3.2' 2 | pkgdown: 2.1.0 3 | pkgdown_sha: ~ 4 | articles: 5 | CreatingCovariatesBasedOnOtherCohorts: CreatingCovariatesBasedOnOtherCohorts.html 6 | CreatingCovariatesUsingCohortAttributes: CreatingCovariatesUsingCohortAttributes.html 7 | CreatingCustomCovariateBuilders: CreatingCustomCovariateBuilders.html 8 | CreatingCustomCovariateBuildersKorean: CreatingCustomCovariateBuildersKorean.html 9 | UsingFeatureExtraction: UsingFeatureExtraction.html 10 | UsingFeatureExtractionKorean: UsingFeatureExtractionKorean.html 11 | last_built: 2025-05-08T13:26Z 12 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | /404.html 3 | /articles/CreatingCovariatesBasedOnOtherCohorts.html 4 | /articles/CreatingCovariatesUsingCohortAttributes.html 5 | /articles/CreatingCustomCovariateBuilders.html 6 | /articles/CreatingCustomCovariateBuildersKorean.html 7 | /articles/UsingFeatureExtraction.html 8 | /articles/UsingFeatureExtractionKorean.html 9 | /articles/index.html 10 | /authors.html 11 | /index.html 12 | /news/index.html 13 | /pull_request_template.html 14 | /reference/CovariateData-class.html 15 | /reference/FeatureExtraction-package.html 16 | /reference/aggregateCovariates.html 17 | /reference/byMaxFf.html 18 | /reference/bySumFf.html 19 | /reference/computeStandardizedDifference.html 20 | /reference/convertPrespecSettingsToDetailedSettings.html 21 | /reference/createAnalysisDetails.html 22 | /reference/createCohortAttrCovariateSettings.html 23 | /reference/createCohortBasedCovariateSettings.html 24 | /reference/createCohortBasedTemporalCovariateSettings.html 25 | /reference/createCovariateSettings.html 26 | /reference/createDefaultCovariateSettings.html 27 | /reference/createDefaultTemporalCovariateSettings.html 28 | /reference/createDetailedCovariateSettings.html 29 | /reference/createDetailedTemporalCovariateSettings.html 30 | /reference/createEmptyCovariateData.html 31 | /reference/createHdpsCovariateSettings.html 32 | /reference/createTable1.html 33 | /reference/createTable1CovariateSettings.html 34 | /reference/createTemporalCovariateSettings.html 35 | /reference/createTemporalSequenceCovariateSettings.html 36 | /reference/dot-createLooCovariateSettings.html 37 | /reference/dot-getDbLooCovariateData.html 38 | /reference/filterByCohortDefinitionId.html 39 | /reference/filterByRowId.html 40 | /reference/filterCovariateDataCovariates.html 41 | /reference/getDbCohortAttrCovariatesData.html 42 | /reference/getDbCohortBasedCovariatesData.html 43 | /reference/getDbCovariateData.html 44 | /reference/getDbDefaultCovariateData.html 45 | /reference/getDbHdpsCovariateData.html 46 | /reference/getDefaultTable1Specifications.html 47 | /reference/index.html 48 | /reference/isAggregatedCovariateData.html 49 | /reference/isCovariateData.html 50 | /reference/isTemporalCovariateData.html 51 | /reference/loadCovariateData.html 52 | /reference/saveCovariateData.html 53 | /reference/tidyCovariateData.html 54 | 55 | 56 | -------------------------------------------------------------------------------- /extras/DefaultCovariateSettingsTemplate.R: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Observational Health Data Sciences and Informatics 2 | # 3 | # This file is part of FeatureExtraction 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | %warning% 18 | 19 | #' Create covariate settings 20 | #' 21 | #' @details 22 | #' creates an object specifying how covariates should be constructed from data in the CDM model. 23 | #' 24 | %roxygen% 25 | #' 26 | #' @return 27 | #' An object of type \code{covariateSettings}, to be used in other functions. 28 | #' 29 | #' @examples 30 | #' settings <- %functionName%(%roxygenArgs%) 31 | #' 32 | #' @export 33 | %functionName% <- function(%arguments%) { 34 | covariateSettings <- list(temporal = %temporal%, temporalSequence = FALSE) 35 | formalNames <- names(formals(%functionName%)) 36 | anyUseTrue <- FALSE 37 | for (name in formalNames) { 38 | value <- get(name) 39 | if (is.null(value)) { 40 | value <- vector() 41 | } 42 | if (grepl("use.*", name)) { 43 | if (value) { 44 | covariateSettings[[sub("use", "", name)]] <- value 45 | anyUseTrue <- TRUE 46 | } 47 | } else { 48 | covariateSettings[[name]] <- value 49 | } 50 | } 51 | if (!anyUseTrue) { 52 | stop("No covariate analysis selected. Must select at least one") 53 | } 54 | attr(covariateSettings, "fun") <- "getDbDefaultCovariateData" 55 | class(covariateSettings) <- "covariateSettings" 56 | return(covariateSettings) 57 | } 58 | -------------------------------------------------------------------------------- /extras/FeatureExtraction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/extras/FeatureExtraction.pdf -------------------------------------------------------------------------------- /extras/TestHashForPostcoordinatedConcepts.R: -------------------------------------------------------------------------------- 1 | # To compute covariate IDs for postcoordinated concepts (concept_id - value_as_concept_id pairs), 2 | # we use a simple hashing function we implement in SQL. The resulting covariate ID uses 52 bits of 3 | # precision, so will fit in an R numeric type without loss of precision. 4 | # 5 | # Below is some code evaluating how likely we are to have collisions in covariate IDs (the same 6 | # covariate ID for different concept_id - value_as_concept_id pairs). Although collisions are 7 | # unlikely, they may occur. In general we are not concerned, as most covariates are used for 8 | # prediction or confounder adjustment, and this may simply lead to one covariate (out of tens 9 | # of thousands) being less predictive. 10 | 11 | # Check in JnJ network --------------------------------------------------------- 12 | uniquePcCombos <- readRDS("extras/uniquePcCombos.rds") 13 | hash1 <- function(value, bits) { 14 | power <- 2^bits 15 | return(bitwAnd(bitwXor(value, value / power), power-1)) 16 | } 17 | 18 | hash2 <- function(value, bits) { 19 | # Use Andromeda / SQLite for intermediate steps requiring 64-bit integers: 20 | a <- Andromeda::andromeda(a = data.frame(value = as.integer(value))) 21 | shift <- 2^(32-bits) 22 | mask <- (2^bits) - 1 23 | sql <- sprintf("SELECT CAST((2654435769 * value / %s) & %s AS INT) AS hash FROM a;", shift, mask) 24 | hash <- RSQLite::dbGetQuery(a, sql) 25 | return(hash$hash) 26 | } 27 | 28 | 29 | cid <- paste(hash1(uniquePcCombos$conceptId, 18), hash1(uniquePcCombos$valueAsConceptId, 21), uniquePcCombos$table) 30 | sum(duplicated(cid)) 31 | # [1] 750 32 | sum(duplicated(cid)) / nrow(uniquePcCombos) 33 | # [1] 0.004121423 34 | 35 | cid <- paste(hash2(uniquePcCombos$conceptId, 20), hash2(uniquePcCombos$valueAsConceptId, 22), uniquePcCombos$table) 36 | sum(duplicated(cid)) 37 | # [1] 27 38 | sum(duplicated(cid)) / nrow(uniquePcCombos) 39 | # [1] 0.0001483712 40 | 41 | cid <- hash2(uniquePcCombos$conceptId, 20) * 4194304000 + hash2(uniquePcCombos$valueAsConceptId, 22) * 1000 + as.integer(uniquePcCombos$table == "measurement") 42 | sum(duplicated(cid)) 43 | 44 | # Find a duplicate for testing: 45 | uniquePcCombos$cid <- cid 46 | dups <- cid[duplicated(cid)] 47 | dups <- uniquePcCombos[cid %in% dups, ] 48 | dups <- dups[order(dups$cid), ] 49 | dups[1:2, ] 50 | # # A tibble: 2 x 4 51 | # conceptId valueAsConceptId table cid 52 | # 53 | # 1 3048564 4069590 measurement 7.41e14 54 | # 2 40483078 4069590 measurement 7.41e14 55 | 56 | # Demonstration of hash algorithm 1 in RSQLite --------------------------------- 57 | connection <- DatabaseConnector::connect(dbms = "sqlite", server = ":memory:") 58 | 59 | # For reference: 60 | hash1(380844, 18) * 2^21 + hash1(2821462, 21) 61 | # [1] 248934763863 62 | 63 | # XOR not available in SQLite, but can implement using (a|b)-(a&b) 64 | # 2^18 = 262144 65 | # 2^21 = 2097152 66 | sql <- " 67 | SELECT (((a | a/262144) - (a & a/262144)) & 262143)*2097152 + 68 | (((b | b/2097152) - (b & b/2097152)) & 2097151) AS covariate_id 69 | FROM ( 70 | SELECT 380844 AS a, 71 | 2821462 AS b 72 | ) tmp; 73 | " 74 | DatabaseConnector::renderTranslateQuerySql(connection, sql) 75 | # # COVARIATE_ID 76 | # 1 248934763863 77 | 78 | # OR not available in Oracle, but can be implemented using a + b - (a&b) 79 | sql <- " 80 | SELECT (((a + a/262144 - 2*(a & a/262144))) & 262143)*2097152 + 81 | (((b + b/2097152 - 2*(b & b/2097152))) & 2097151) AS covariate_id 82 | FROM ( 83 | SELECT 380844 AS a, 84 | 2821462 AS b 85 | ) tmp; 86 | " 87 | DatabaseConnector::renderTranslateQuerySql(connection, sql) 88 | # # COVARIATE_ID 89 | # 1 248934763863 90 | 91 | 92 | DatabaseConnector::disconnect(connection) 93 | 94 | # Demonstration of hash algorithm 2 in RSQLite --------------------------------- 95 | connection <- DatabaseConnector::connect(dbms = "sqlite", server = ":memory:") 96 | 97 | # For reference: 98 | format(hash2(380844, 20) * 2^22 + hash2(2821462, 22), scientific = FALSE) 99 | # [1] 2358966384914 100 | 101 | sql <- " 102 | SELECT ((2654435769 * a / 4096) & 1048575)*4194304 + 103 | ((2654435769 * b / 1024) & 4194303) AS covariate_id 104 | FROM ( 105 | SELECT 380844 AS a, 106 | 2821462 AS b 107 | ) tmp; 108 | " 109 | format(DatabaseConnector::renderTranslateQuerySql(connection, sql)[1, 1], scientific = FALSE) 110 | # # COVARIATE_ID 111 | # 1 2358966384914 112 | 113 | DatabaseConnector::disconnect(connection) 114 | -------------------------------------------------------------------------------- /extras/uniquePcCombos.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/extras/uniquePcCombos.rds -------------------------------------------------------------------------------- /inst/csv/OtherParameters.csv: -------------------------------------------------------------------------------- 1 | name,type,description,defaultValue 2 | includedCovariateConceptIds,common,A list of concept IDs that should be used to construct covariates.,[] 3 | addDescendantsToInclude,common,Should descendant concept IDs be added to the list of concepts to include?,false 4 | excludedCovariateConceptIds,common,A list of concept IDs that should NOT be used to construct covariates.,[] 5 | addDescendantsToExclude,common,Should descendant concept IDs be added to the list of concepts to exclude?,false 6 | includedCovariateIds,common,A list of covariate IDs that should be restricted to.,[] 7 | longTermStartDays,days,What is the start day (relative to the index date) of the long-term window?,-365 8 | mediumTermStartDays,days,What is the start day (relative to the index date) of the medium-term window?,-180 9 | shortTermStartDays,days,What is the start day (relative to the index date) of the short-term window?,-30 10 | endDays,days,What is the end day (relative to the index date) of the window?,0 11 | sequenceEndDay,temporal_sequence,What is the end day (relative to the index date) of the feature extraction?,0 12 | sequenceStartDay,temporal_sequence,What is the start day (relative to the index date) of the feature extraction?,-99999 13 | timePart,temporal_sequence,What is the interval type (day, month, year) of the timeIds?,'month' 14 | timeInterval,temporal_sequence,What is the interval length (1,7,30,...) of the timeIds?,1 15 | temporalStartDays,temporal,"A list of integers representing the start of a time period, relative to the index date. 0 indicates the index date, -1 indicates the day before the index date, etc. The start day is included in the time period.","[-365,-364,-363,-362,-361,-360,-359,-358,-357,-356,-355,-354,-353,-352,-351,-350,-349,-348,-347,-346,-345,-344,-343,-342,-341,-340,-339,-338,-337,-336,-335,-334,-333,-332,-331,-330,-329,-328,-327,-326,-325,-324,-323,-322,-321,-320,-319,-318,-317,-316,-315,-314,-313,-312,-311,-310,-309,-308,-307,-306,-305,-304,-303,-302,-301,-300,-299,-298,-297,-296,-295,-294,-293,-292,-291,-290,-289,-288,-287,-286,-285,-284,-283,-282,-281,-280,-279,-278,-277,-276,-275,-274,-273,-272,-271,-270,-269,-268,-267,-266,-265,-264,-263,-262,-261,-260,-259,-258,-257,-256,-255,-254,-253,-252,-251,-250,-249,-248,-247,-246,-245,-244,-243,-242,-241,-240,-239,-238,-237,-236,-235,-234,-233,-232,-231,-230,-229,-228,-227,-226,-225,-224,-223,-222,-221,-220,-219,-218,-217,-216,-215,-214,-213,-212,-211,-210,-209,-208,-207,-206,-205,-204,-203,-202,-201,-200,-199,-198,-197,-196,-195,-194,-193,-192,-191,-190,-189,-188,-187,-186,-185,-184,-183,-182,-181,-180,-179,-178,-177,-176,-175,-174,-173,-172,-171,-170,-169,-168,-167,-166,-165,-164,-163,-162,-161,-160,-159,-158,-157,-156,-155,-154,-153,-152,-151,-150,-149,-148,-147,-146,-145,-144,-143,-142,-141,-140,-139,-138,-137,-136,-135,-134,-133,-132,-131,-130,-129,-128,-127,-126,-125,-124,-123,-122,-121,-120,-119,-118,-117,-116,-115,-114,-113,-112,-111,-110,-109,-108,-107,-106,-105,-104,-103,-102,-101,-100,-99,-98,-97,-96,-95,-94,-93,-92,-91,-90,-89,-88,-87,-86,-85,-84,-83,-82,-81,-80,-79,-78,-77,-76,-75,-74,-73,-72,-71,-70,-69,-68,-67,-66,-65,-64,-63,-62,-61,-60,-59,-58,-57,-56,-55,-54,-53,-52,-51,-50,-49,-48,-47,-46,-45,-44,-43,-42,-41,-40,-39,-38,-37,-36,-35,-34,-33,-32,-31,-30,-29,-28,-27,-26,-25,-24,-23,-22,-21,-20,-19,-18,-17,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1]" 16 | temporalEndDays,temporal,"A list of integers representing the end of a time period, relative to the index date. 0 indicates the index date, -1 indicates the day before the index date, etc. The end day is included in the time period.","[-365,-364,-363,-362,-361,-360,-359,-358,-357,-356,-355,-354,-353,-352,-351,-350,-349,-348,-347,-346,-345,-344,-343,-342,-341,-340,-339,-338,-337,-336,-335,-334,-333,-332,-331,-330,-329,-328,-327,-326,-325,-324,-323,-322,-321,-320,-319,-318,-317,-316,-315,-314,-313,-312,-311,-310,-309,-308,-307,-306,-305,-304,-303,-302,-301,-300,-299,-298,-297,-296,-295,-294,-293,-292,-291,-290,-289,-288,-287,-286,-285,-284,-283,-282,-281,-280,-279,-278,-277,-276,-275,-274,-273,-272,-271,-270,-269,-268,-267,-266,-265,-264,-263,-262,-261,-260,-259,-258,-257,-256,-255,-254,-253,-252,-251,-250,-249,-248,-247,-246,-245,-244,-243,-242,-241,-240,-239,-238,-237,-236,-235,-234,-233,-232,-231,-230,-229,-228,-227,-226,-225,-224,-223,-222,-221,-220,-219,-218,-217,-216,-215,-214,-213,-212,-211,-210,-209,-208,-207,-206,-205,-204,-203,-202,-201,-200,-199,-198,-197,-196,-195,-194,-193,-192,-191,-190,-189,-188,-187,-186,-185,-184,-183,-182,-181,-180,-179,-178,-177,-176,-175,-174,-173,-172,-171,-170,-169,-168,-167,-166,-165,-164,-163,-162,-161,-160,-159,-158,-157,-156,-155,-154,-153,-152,-151,-150,-149,-148,-147,-146,-145,-144,-143,-142,-141,-140,-139,-138,-137,-136,-135,-134,-133,-132,-131,-130,-129,-128,-127,-126,-125,-124,-123,-122,-121,-120,-119,-118,-117,-116,-115,-114,-113,-112,-111,-110,-109,-108,-107,-106,-105,-104,-103,-102,-101,-100,-99,-98,-97,-96,-95,-94,-93,-92,-91,-90,-89,-88,-87,-86,-85,-84,-83,-82,-81,-80,-79,-78,-77,-76,-75,-74,-73,-72,-71,-70,-69,-68,-67,-66,-65,-64,-63,-62,-61,-60,-59,-58,-57,-56,-55,-54,-53,-52,-51,-50,-49,-48,-47,-46,-45,-44,-43,-42,-41,-40,-39,-38,-37,-36,-35,-34,-33,-32,-31,-30,-29,-28,-27,-26,-25,-24,-23,-22,-21,-20,-19,-18,-17,-16,-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1]" 17 | -------------------------------------------------------------------------------- /inst/csv/OtherSqlToLoad.csv: -------------------------------------------------------------------------------- 1 | analysisName,sqlFileName 2 | cohort,CohortBasedBinaryCovariates.sql 3 | cohortCount,CohortBasedCountCovariates.sql 4 | -------------------------------------------------------------------------------- /inst/csv/PrespecTemporalAnnualAnalysis.csv: -------------------------------------------------------------------------------- 1 | analysisId,analysisName 2 | 4,ConditionEraShortTerm 3 | 9,ConditionEraStartLongTerm 4 | 10,ConditionEraAnyTimePrior 5 | 12,DrugExposureLongTerm 6 | 37,DrugEraStartShortTerm 7 | 39,MeasurementAnyTimePrior 8 | 40,MeasurementMediumTerm 9 | 43,DrugEraShortTerm 10 | 44,DrugEraOverlapping 11 | 52,DeviceExposureAnyTimePrior 12 | 53,ObservationLongTerm 13 | 55,ProcedureOccurrenceShortTerm 14 | 56,ObservationMediumTerm 15 | 57,DeviceExposureLongTerm 16 | 59,DeviceExposureMediumTerm 17 | 60,MeasurementLongTerm 18 | 62,DrugEraStartMediumTerm 19 | 65,DeviceExposureShortTerm 20 | 67,ConditionOccurrenceLongTerm 21 | 69,ConditionOccurrenceAnyTimePrior 22 | 76,ConditionOccurrenceMediumTerm 23 | 83,ConditionOccurrencePrimaryInpatientLongTerm 24 | 84,ProcedureOccurrenceLongTerm 25 | 85,ConditionOccurrencePrimaryInpatientAnyTimePrior 26 | 86,DrugEraLongTerm 27 | 87,ProcedureOccurrenceAnyTimePrior 28 | 88,DrugEraMediumTerm 29 | 89,DrugEraAnyTimePrior 30 | 90,ConditionOccurrenceShortTerm 31 | 102,ConditionOccurrencePrimaryInpatientShortTerm 32 | 104,ConditionOccurrencePrimaryInpatientMediumTerm 33 | 16,ConditionEraMediumTerm 34 | 17,ConditionEraOverlapping 35 | 18,ConditionEraStartShortTerm 36 | 21,ConditionEraStartMediumTerm 37 | 22,ProcedureOccurrenceMediumTerm 38 | 23,ConditionEraLongTerm 39 | 28,DrugExposureAnyTimePrior 40 | 31,DrugExposureShortTerm 41 | 33,DrugExposureMediumTerm 42 | 34,ObservationShortTerm 43 | 35,DrugEraStartLongTerm 44 | -------------------------------------------------------------------------------- /inst/csv/PrespecTemporalSequenceAnalyses.csv: -------------------------------------------------------------------------------- 1 | analysisId,analysisName,sqlFileName,subType,domainId,domainTable,domainConceptId,domainStartDate,domainEndDate,isDefault,description 2 | 1,DemographicsGender,DemographicsGender.sql,,Demographics,,,,,TRUE,Gender of the subject. 3 | 2,DemographicsAge,DemographicsAge.sql,,Demographics,,,,,FALSE,Age of the subject on the index date (in years). 4 | 3,DemographicsAgeGroup,DemographicsAgeGroup.sql,,Demographics,,,,,TRUE,Age of the subject on the index date (in 5 year age groups) 5 | 4,DemographicsRace,DemographicsRace.sql,,Demographics,,,,,TRUE,Race of the subject. 6 | 5,DemographicsEthnicity,DemographicsEthnicity.sql,,Demographics,,,,,TRUE,Ethnicity of the subject. 7 | 6,DemographicsIndexYear,DemographicsYear.sql,,Demographics,,,,,FALSE,Year of the index date. 8 | 7,DemographicsIndexMonth,DemographicsMonth.sql,,Demographics,,,,,FALSE,Month of the index date. 9 | 101,ConditionOccurrence,DomainConcept.sql,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting in the time window. 10 | 102,ConditionOccurrencePrimaryInpatient,DomainConcept.sql,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting in the time window. 11 | 201,ConditionEraStart,DomainConcept.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition in the condition_era table starting in the time window. 12 | 203,ConditionEraGroupStart,DomainConceptGroup.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,TRUE,One covariate per condition era rolled up to SNOMED groups in the condition_era table starting in the time window. 13 | 301,DrugExposure,DomainConcept.sql,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting in the time window. 14 | 401,DrugEraStart,DomainConcept.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug in the drug_era table starting in the time window. 15 | 403,DrugEraGroupStart,DomainConceptGroup.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,TRUE,One covariate per drug rolled up to ATC groups in the drug_era table starting in the time window. 16 | 501,ProcedureOccurrence,DomainConcept.sql,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,One covariate per procedure in the procedure_occurrence table in the time window. 17 | 601,DeviceExposure,DomainConcept.sql,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,FALSE,One covariate per device in the device exposure table starting in the timewindow. 18 | 701,Measurement,DomainConcept.sql,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,One covariate per measurement in the measurement table in the time window. 19 | 702,MeasurementValue,MeasurementValue.sql,,Measurement,,,,,FALSE,"One covariate containing the value per measurement-unit combination in the time window. If multiple values are found, the last is taken." 20 | 801,Observation,DomainConcept.sql,all,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,One covariate per observation in the observation table in the time window. 21 | -------------------------------------------------------------------------------- /inst/csv/Table1Specs.csv: -------------------------------------------------------------------------------- 1 | label,analysisId,covariateIds 2 | Age group,3, 3 | Gender: female,1,8532001 4 | Race,4, 5 | Ethnicity,5, 6 | Medical history: General,210,"4006969210,438409210,4212540210,255573210,201606210,4182210210,440383210,201820210,318800210,192671210,439727210,432867210,316866210,4104000210,433736210,80180210,255848210,140168210,4030518210,80809210,435783210,4279309210,81893210,81902210,197494210,4134440210" 7 | Medical history: Cardiovascular disease,210,"313217210,381591210,317576210,321588210,316139210,4185932210,321052210,440417210,444247210" 8 | Medical history: Neoplasms,210,"4044013210,432571210,40481902210,443392210,4112853210,4180790210,443388210,197508210,200962210" 9 | Medication use,410,"21601782410,21602796410,21604686410,21604389410,21603932410,21601387410,21602028410,21600960410,21601664410,21601744410,21601461410,21600046410,21603248410,21600712410,21603890410,21601853410,21604254410,21604489410,21604752410" 10 | Charlson comorbidity index,901,1901 11 | CHADS2Vasc,904,1904 12 | DCSI,902,1902 13 | -------------------------------------------------------------------------------- /inst/csv/jarChecksum.txt: -------------------------------------------------------------------------------- 1 | 7bf91a9f369dbfd67bc2734313402fca30962298d27c5154ec03354b5dede3d4 2 | -------------------------------------------------------------------------------- /inst/doc/CreatingCovariatesBasedOnOtherCohorts.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/CreatingCovariatesBasedOnOtherCohorts.pdf -------------------------------------------------------------------------------- /inst/doc/CreatingCovariatesUsingCohortAttributes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/CreatingCovariatesUsingCohortAttributes.pdf -------------------------------------------------------------------------------- /inst/doc/CreatingCustomCovariateBuilders.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/CreatingCustomCovariateBuilders.pdf -------------------------------------------------------------------------------- /inst/doc/CreatingCustomCovariateBuildersKorean.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/CreatingCustomCovariateBuildersKorean.pdf -------------------------------------------------------------------------------- /inst/doc/UsingFeatureExtraction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/UsingFeatureExtraction.pdf -------------------------------------------------------------------------------- /inst/doc/UsingFeatureExtractionKorean.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/doc/UsingFeatureExtractionKorean.pdf -------------------------------------------------------------------------------- /inst/java/SqlRender-1.19.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/java/SqlRender-1.19.1.jar -------------------------------------------------------------------------------- /inst/java/featureExtraction-3.10.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/java/featureExtraction-3.10.0.jar -------------------------------------------------------------------------------- /inst/java/json-20231013.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/java/json-20231013.jar -------------------------------------------------------------------------------- /inst/sql/sql_server/CareSite.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT 3 | CAST(care_site_id AS BIGINT) * 1000 + @analysis_id AS covariate_id, 4 | {@temporal} ? { 5 | CAST(NULL AS INT) AS time_id, 6 | } 7 | {@temporal_sequence} ? { 8 | CAST(NULL AS INT) AS time_id, 9 | } 10 | {@aggregated} ? { 11 | cohort_definition_id, 12 | COUNT(*) AS sum_value 13 | } : { 14 | row_id, 15 | 1 AS covariate_value 16 | } 17 | INTO @covariate_table 18 | FROM ( 19 | SELECT cohort.cohort_definition_id, 20 | cohort.@row_id_field AS row_id, 21 | CASE 22 | WHEN visit_detail.care_site_id IS NOT NULL THEN visit_detail.care_site_id 23 | WHEN visit_occurrence.care_site_id IS NOT NULL THEN visit_occurrence.care_site_id 24 | ELSE person.care_site_id 25 | END AS care_site_id, 26 | ROW_NUMBER() OVER (PARTITION BY cohort_definition_id, cohort.@row_id_field ORDER BY visit_detail.visit_detail_end_date, visit_occurrence.visit_end_date) AS rn 27 | FROM @cohort_table cohort 28 | INNER JOIN @cdm_database_schema.person 29 | ON cohort.subject_id = person.person_id 30 | LEFT JOIN @cdm_database_schema.visit_occurrence 31 | ON cohort.subject_id = visit_occurrence.person_id 32 | AND visit_occurrence.visit_start_date <= cohort.cohort_start_date 33 | AND visit_occurrence.visit_end_date >= cohort.cohort_start_date 34 | LEFT JOIN @cdm_database_schema.visit_detail 35 | ON cohort.subject_id = visit_detail.person_id 36 | AND visit_detail.visit_detail_start_date <= cohort.cohort_start_date 37 | AND visit_detail.visit_detail_end_date >= cohort.cohort_start_date 38 | WHERE NOT (person.care_site_id IS NULL 39 | AND visit_occurrence.care_site_id IS NULL 40 | AND visit_detail.care_site_id IS NULL 41 | ) 42 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} 43 | ) care_site 44 | WHERE rn = 1 45 | {@included_cov_table != ''} ? { AND CAST(care_site_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} 46 | 47 | {@aggregated} ? { 48 | GROUP BY cohort_definition_id, 49 | care_site_id 50 | } 51 | ; 52 | 53 | -- Reference construction 54 | INSERT INTO #cov_ref ( 55 | covariate_id, 56 | covariate_name, 57 | analysis_id, 58 | concept_id 59 | ) 60 | SELECT covariate_id, 61 | CAST(CONCAT('care site ID = ', CAST((covariate_id - @analysis_id) / 1000 AS INT)) AS VARCHAR(512)) AS covariate_name, 62 | @analysis_id AS analysis_id, 63 | 0 AS concept_id 64 | FROM ( 65 | SELECT DISTINCT covariate_id 66 | FROM @covariate_table 67 | ) t1; 68 | 69 | INSERT INTO #analysis_ref ( 70 | analysis_id, 71 | analysis_name, 72 | domain_id, 73 | {!@temporal} ? { 74 | start_day, 75 | end_day, 76 | } 77 | is_binary, 78 | missing_means_zero 79 | ) 80 | SELECT @analysis_id AS analysis_id, 81 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 82 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id, 83 | {!@temporal} ? { 84 | CAST(NULL AS INT) AS start_day, 85 | CAST(NULL AS INT) AS end_day, 86 | } 87 | CAST('Y' AS VARCHAR(1)) AS is_binary, 88 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 89 | -------------------------------------------------------------------------------- /inst/sql/sql_server/CohortBasedBinaryCovariates.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT 3 | CAST(covariate_cohort_id AS BIGINT) * 1000 + @analysis_id AS covariate_id, 4 | {@temporal | @temporal_sequence} ? { 5 | time_id, 6 | } 7 | {@aggregated} ? { 8 | cohort_definition_id, 9 | COUNT(*) AS sum_value 10 | } : { 11 | row_id, 12 | 1 AS covariate_value 13 | } 14 | INTO @covariate_table 15 | FROM ( 16 | SELECT DISTINCT covariate_cohort.cohort_definition_id AS covariate_cohort_id, 17 | {@temporal} ? { 18 | time_id, 19 | } 20 | {@temporal_sequence} ? { 21 | FLOOR(DATEDIFF(@time_part, covariate_cohort.cohort_start_date, cohort.cohort_start_date)*1.0/@time_interval ) as time_id, 22 | } 23 | {@aggregated} ? { 24 | cohort.cohort_definition_id, 25 | cohort.subject_id, 26 | cohort.cohort_start_date 27 | } : { 28 | cohort.@row_id_field AS row_id 29 | } 30 | FROM @cohort_table cohort 31 | INNER JOIN @covariate_cohort_table covariate_cohort 32 | ON cohort.subject_id = covariate_cohort.subject_id 33 | INNER JOIN #covariate_cohort_ref covariate_cohort_ref 34 | ON covariate_cohort.cohort_definition_id = CAST(covariate_cohort_ref.cohort_id AS INT) 35 | {@temporal} ? { 36 | INNER JOIN #time_period time_period 37 | ON covariate_cohort.cohort_start_date <= DATEADD(DAY, time_period.end_day, cohort.cohort_start_date) 38 | WHERE CASE WHEN covariate_cohort.cohort_end_date IS NULL THEN covariate_cohort.cohort_start_date ELSE covariate_cohort.cohort_end_date END >= DATEADD(DAY, time_period.start_day, cohort.cohort_start_date) 39 | } : { 40 | WHERE covariate_cohort.cohort_start_date <= DATEADD(DAY, {@temporal_sequence} ? {@sequence_end_day} : {@end_day}, cohort.cohort_start_date) 41 | {@start_day != 'anyTimePrior'} ? { 42 | AND CASE WHEN covariate_cohort.cohort_end_date IS NULL THEN covariate_cohort.cohort_start_date ELSE covariate_cohort.cohort_end_date END >= DATEADD(DAY, {@temporal_sequence} ? {@sequence_start_day} : {@start_day}, cohort.cohort_start_date) 43 | } 44 | } 45 | {@included_cov_table != ''} ? { AND CAST(covariate_cohort.cohort_definition_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} 46 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} 47 | ) by_row_id 48 | {@aggregated} ? { 49 | GROUP BY cohort_definition_id, 50 | covariate_cohort_id 51 | {@temporal | @temporal_sequence} ? { 52 | ,time_id 53 | } 54 | } 55 | ; 56 | 57 | -- Reference construction 58 | INSERT INTO #cov_ref ( 59 | covariate_id, 60 | covariate_name, 61 | analysis_id, 62 | concept_id 63 | ) 64 | SELECT covariate_id, 65 | {@temporal | @temporal_sequence} ? { 66 | CAST(CONCAT('cohort: ', cohort_name) AS VARCHAR(512)) AS covariate_name, 67 | } : { 68 | {@start_day == 'anyTimePrior'} ? { 69 | CAST(CONCAT('cohort any time prior through @end_day days relative to index: ', cohort_name) AS VARCHAR(512)) AS covariate_name, 70 | } : { 71 | CAST(CONCAT('cohort during day @start_day through @end_day days relative to index: ', cohort_name) AS VARCHAR(512)) AS covariate_name, 72 | } 73 | } 74 | @analysis_id AS analysis_id, 75 | 0 AS concept_id 76 | FROM ( 77 | SELECT DISTINCT covariate_id 78 | FROM @covariate_table 79 | ) t1 80 | LEFT JOIN #covariate_cohort_ref 81 | ON CAST(cohort_id AS INT) = CAST((covariate_id - @analysis_id) / 1000 AS INT); 82 | 83 | INSERT INTO #analysis_ref ( 84 | analysis_id, 85 | analysis_name, 86 | domain_id, 87 | {!@temporal} ? { 88 | start_day, 89 | end_day, 90 | } 91 | is_binary, 92 | missing_means_zero 93 | ) 94 | SELECT @analysis_id AS analysis_id, 95 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 96 | CAST('cohort' AS VARCHAR(20)) AS domain_id, 97 | {!@temporal} ? { 98 | {@start_day == 'anyTimePrior'} ? { 99 | CAST(NULL AS INT) AS start_day, 100 | } : { 101 | 102 | {@temporal_sequence} ? {@sequence_start_day} : {@start_day} AS start_day, 103 | } 104 | {@temporal_sequence} ? {@sequence_end_day} : {@end_day} AS end_day, 105 | } 106 | CAST('Y' AS VARCHAR(1)) AS is_binary, 107 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 108 | -------------------------------------------------------------------------------- /inst/sql/sql_server/CreateCovAnalysisRefTables.sql: -------------------------------------------------------------------------------- 1 | IF OBJECT_ID('tempdb..#cov_ref', 'U') IS NOT NULL 2 | DROP TABLE #cov_ref; 3 | 4 | IF OBJECT_ID('tempdb..#analysis_ref', 'U') IS NOT NULL 5 | DROP TABLE #analysis_ref; 6 | 7 | CREATE TABLE #cov_ref ( 8 | covariate_id BIGINT, 9 | covariate_name VARCHAR(512), 10 | analysis_id INT, 11 | concept_id INT, 12 | value_as_concept_id INT, 13 | collisions INT 14 | ); 15 | 16 | CREATE TABLE #analysis_ref ( 17 | analysis_id BIGINT, 18 | analysis_name VARCHAR(512), 19 | domain_id VARCHAR(20), 20 | {!@temporal} ? { 21 | start_day INT, 22 | end_day INT, 23 | } 24 | is_binary VARCHAR(1), 25 | missing_means_zero VARCHAR(1) 26 | ); 27 | -------------------------------------------------------------------------------- /inst/sql/sql_server/DemographicsAgeGroup.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT CAST(FLOOR((YEAR(cohort_start_date) - year_of_birth) / 5) * 1000 + @analysis_id AS BIGINT) AS covariate_id, 3 | {@temporal | @temporal_sequence} ? { 4 | CAST(NULL AS INT) AS time_id, 5 | } 6 | {@aggregated} ? { 7 | cohort_definition_id, 8 | COUNT(*) AS sum_value 9 | } : { 10 | cohort.@row_id_field AS row_id, 11 | 1 AS covariate_value 12 | } 13 | INTO @covariate_table 14 | FROM @cohort_table cohort 15 | INNER JOIN @cdm_database_schema.person 16 | ON cohort.subject_id = person.person_id 17 | {@included_cov_table != ''} ? {WHERE FLOOR((YEAR(cohort_start_date) - year_of_birth) / 5) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} 18 | {@cohort_definition_id != -1} ? { 19 | {@included_cov_table != ''} ? { AND} :{WHERE} cohort.cohort_definition_id IN (@cohort_definition_id) 20 | } 21 | {@aggregated} ? { 22 | GROUP BY cohort_definition_id, 23 | FLOOR((YEAR(cohort_start_date) - year_of_birth) / 5) 24 | } 25 | ; 26 | 27 | -- Reference construction 28 | INSERT INTO #cov_ref ( 29 | covariate_id, 30 | covariate_name, 31 | analysis_id, 32 | concept_id 33 | ) 34 | SELECT covariate_id, 35 | CAST(CONCAT ( 36 | 'age group: ', 37 | RIGHT(CONCAT(' ', CAST(CAST(5 * (covariate_id - @analysis_id) / 1000 AS INTEGER) AS VARCHAR)), 3), 38 | ' - ', 39 | RIGHT(CONCAT(' ', CAST((CAST(5 * (covariate_id - @analysis_id) / 1000 AS INTEGER)) + 4 AS VARCHAR)), 3) 40 | ) AS VARCHAR(512)) AS covariate_name, 41 | @analysis_id AS analysis_id, 42 | 0 AS concept_id 43 | FROM ( 44 | SELECT DISTINCT covariate_id 45 | FROM @covariate_table 46 | ) t1; 47 | 48 | INSERT INTO #analysis_ref ( 49 | analysis_id, 50 | analysis_name, 51 | domain_id, 52 | {!@temporal} ? { 53 | start_day, 54 | end_day, 55 | } 56 | is_binary, 57 | missing_means_zero 58 | ) 59 | SELECT @analysis_id AS analysis_id, 60 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 61 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id, 62 | {!@temporal} ? { 63 | CAST(NULL AS INT) AS start_day, 64 | CAST(NULL AS INT) AS end_day, 65 | } 66 | CAST('Y' AS VARCHAR(1)) AS is_binary, 67 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 68 | -------------------------------------------------------------------------------- /inst/sql/sql_server/DemographicsEthnicity.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT 3 | CAST(ethnicity_concept_id AS BIGINT) * 1000 + @analysis_id AS covariate_id, 4 | {@temporal | @temporal_sequence} ? { 5 | CAST(NULL AS INT) AS time_id, 6 | } 7 | {@aggregated} ? { 8 | cohort_definition_id, 9 | COUNT(*) AS sum_value 10 | } : { 11 | cohort.@row_id_field AS row_id, 12 | 1 AS covariate_value 13 | } 14 | INTO @covariate_table 15 | FROM @cohort_table cohort 16 | INNER JOIN @cdm_database_schema.person 17 | ON cohort.subject_id = person.person_id 18 | WHERE ethnicity_concept_id IN ( 19 | SELECT concept_id 20 | FROM @cdm_database_schema.concept 21 | WHERE LOWER(concept_class_id) = 'ethnicity' 22 | ) 23 | {@excluded_concept_table != ''} ? { AND ethnicity_concept_id NOT IN (SELECT id FROM @excluded_concept_table)} 24 | {@included_concept_table != ''} ? { AND ethnicity_concept_id IN (SELECT id FROM @included_concept_table)} 25 | {@included_cov_table != ''} ? { AND CAST(ethnicity_concept_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} 26 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} 27 | {@aggregated} ? { 28 | GROUP BY cohort_definition_id, 29 | ethnicity_concept_id 30 | } 31 | ; 32 | 33 | -- Reference construction 34 | INSERT INTO #cov_ref ( 35 | covariate_id, 36 | covariate_name, 37 | analysis_id, 38 | concept_id 39 | ) 40 | SELECT covariate_id, 41 | CAST(CONCAT('ethnicity = ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name, 42 | @analysis_id AS analysis_id, 43 | CAST((covariate_id - @analysis_id) / 1000 AS INT) AS concept_id 44 | FROM ( 45 | SELECT DISTINCT covariate_id 46 | FROM @covariate_table 47 | ) t1 48 | LEFT JOIN @cdm_database_schema.concept 49 | ON concept_id = CAST((covariate_id - @analysis_id) / 1000 AS INT); 50 | 51 | INSERT INTO #analysis_ref ( 52 | analysis_id, 53 | analysis_name, 54 | domain_id, 55 | {!@temporal} ? { 56 | start_day, 57 | end_day, 58 | } 59 | is_binary, 60 | missing_means_zero 61 | ) 62 | SELECT @analysis_id AS analysis_id, 63 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 64 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id, 65 | {!@temporal} ? { 66 | CAST(NULL AS INT) AS start_day, 67 | CAST(NULL AS INT) AS end_day, 68 | } 69 | CAST('Y' AS VARCHAR(1)) AS is_binary, 70 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 71 | -------------------------------------------------------------------------------- /inst/sql/sql_server/DemographicsGender.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT 3 | CAST(gender_concept_id AS BIGINT) * 1000 + @analysis_id AS covariate_id, 4 | {@temporal} ? { 5 | CAST(NULL AS INT) AS time_id, 6 | } 7 | {@temporal_sequence} ? { 8 | CAST(NULL AS INT) AS time_id, 9 | } 10 | {@aggregated} ? { 11 | cohort_definition_id, 12 | COUNT(*) AS sum_value 13 | } : { 14 | cohort.@row_id_field AS row_id, 15 | 1 AS covariate_value 16 | } 17 | INTO @covariate_table 18 | FROM @cohort_table cohort 19 | INNER JOIN @cdm_database_schema.person 20 | ON cohort.subject_id = person.person_id 21 | WHERE gender_concept_id != 0 22 | {@excluded_concept_table != ''} ? { AND gender_concept_id NOT IN (SELECT id FROM @excluded_concept_table)} 23 | {@included_concept_table != ''} ? { AND gender_concept_id IN (SELECT id FROM @included_concept_table)} 24 | {@included_cov_table != ''} ? { AND CAST(gender_concept_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} 25 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} 26 | {@aggregated} ? { 27 | GROUP BY cohort_definition_id, 28 | gender_concept_id 29 | } 30 | ; 31 | 32 | -- Reference construction 33 | INSERT INTO #cov_ref ( 34 | covariate_id, 35 | covariate_name, 36 | analysis_id, 37 | concept_id 38 | ) 39 | SELECT covariate_id, 40 | CAST(CONCAT('gender = ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name, 41 | @analysis_id AS analysis_id, 42 | CAST((covariate_id - @analysis_id) / 1000 AS INT) AS concept_id 43 | FROM ( 44 | SELECT DISTINCT covariate_id 45 | FROM @covariate_table 46 | ) t1 47 | LEFT JOIN @cdm_database_schema.concept 48 | ON concept_id = CAST((covariate_id - @analysis_id) / 1000 AS INT); 49 | 50 | INSERT INTO #analysis_ref ( 51 | analysis_id, 52 | analysis_name, 53 | domain_id, 54 | {!@temporal} ? { 55 | start_day, 56 | end_day, 57 | } 58 | is_binary, 59 | missing_means_zero 60 | ) 61 | SELECT @analysis_id AS analysis_id, 62 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 63 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id, 64 | {!@temporal} ? { 65 | CAST(NULL AS INT) AS start_day, 66 | CAST(NULL AS INT) AS end_day, 67 | } 68 | CAST('Y' AS VARCHAR(1)) AS is_binary, 69 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 70 | -------------------------------------------------------------------------------- /inst/sql/sql_server/DemographicsMonth.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT CAST(MONTH(cohort_start_date) * 1000 + @analysis_id AS BIGINT) AS covariate_id, 3 | {@temporal | @temporal_sequence} ? { 4 | CAST(NULL AS INT) AS time_id, 5 | } 6 | {@aggregated} ? { 7 | cohort_definition_id, 8 | COUNT(*) AS sum_value 9 | } : { 10 | cohort.@row_id_field AS row_id, 11 | 1 AS covariate_value 12 | } 13 | INTO @covariate_table 14 | FROM @cohort_table cohort 15 | INNER JOIN @cdm_database_schema.person 16 | ON cohort.subject_id = person.person_id 17 | {@included_cov_table != ''} ? {WHERE MONTH(cohort_start_date) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} 18 | {@cohort_definition_id != -1} ? { 19 | {@included_cov_table != ''} ? { AND} :{WHERE} cohort.cohort_definition_id IN (@cohort_definition_id) 20 | } 21 | {@aggregated} ? { 22 | GROUP BY cohort_definition_id, 23 | MONTH(cohort_start_date) 24 | } 25 | ; 26 | 27 | -- Reference construction 28 | INSERT INTO #cov_ref ( 29 | covariate_id, 30 | covariate_name, 31 | analysis_id, 32 | concept_id 33 | ) 34 | SELECT covariate_id, 35 | CAST(CONCAT ('index month: ', CAST((covariate_id - @analysis_id) / 1000 AS INTEGER)) AS VARCHAR(512)) AS covariate_name, 36 | @analysis_id AS analysis_id, 37 | 0 AS concept_id 38 | FROM ( 39 | SELECT DISTINCT covariate_id 40 | FROM @covariate_table 41 | ) t1; 42 | 43 | INSERT INTO #analysis_ref ( 44 | analysis_id, 45 | analysis_name, 46 | domain_id, 47 | {!@temporal} ? { 48 | start_day, 49 | end_day, 50 | } 51 | is_binary, 52 | missing_means_zero 53 | ) 54 | SELECT @analysis_id AS analysis_id, 55 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 56 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id, 57 | {!@temporal} ? { 58 | CAST(NULL AS INT) AS start_day, 59 | CAST(NULL AS INT) AS end_day, 60 | } 61 | CAST('Y' AS VARCHAR(1)) AS is_binary, 62 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 63 | -------------------------------------------------------------------------------- /inst/sql/sql_server/DemographicsRace.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT 3 | CAST(race_concept_id AS BIGINT) * 1000 + @analysis_id AS covariate_id, 4 | {@temporal | @temporal_sequence} ? { 5 | CAST(NULL AS INT) AS time_id, 6 | } 7 | {@aggregated} ? { 8 | cohort_definition_id, 9 | COUNT(*) AS sum_value 10 | } : { 11 | cohort.@row_id_field AS row_id, 12 | 1 AS covariate_value 13 | } 14 | INTO @covariate_table 15 | FROM @cohort_table cohort 16 | INNER JOIN @cdm_database_schema.person 17 | ON cohort.subject_id = person.person_id 18 | WHERE race_concept_id IN ( 19 | SELECT concept_id 20 | FROM @cdm_database_schema.concept 21 | WHERE LOWER(concept_class_id) = 'race' 22 | ) 23 | {@excluded_concept_table != ''} ? { AND race_concept_id NOT IN (SELECT id FROM @excluded_concept_table)} 24 | {@included_concept_table != ''} ? { AND race_concept_id IN (SELECT id FROM @included_concept_table)} 25 | {@included_cov_table != ''} ? { AND CAST(race_concept_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} 26 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} 27 | {@aggregated} ? { 28 | GROUP BY cohort_definition_id, 29 | race_concept_id 30 | } 31 | ; 32 | 33 | -- Reference construction 34 | INSERT INTO #cov_ref ( 35 | covariate_id, 36 | covariate_name, 37 | analysis_id, 38 | concept_id 39 | ) 40 | SELECT covariate_id, 41 | CAST(CONCAT('race = ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name, 42 | @analysis_id AS analysis_id, 43 | CAST((covariate_id - @analysis_id) / 1000 AS INT) AS concept_id 44 | FROM ( 45 | SELECT DISTINCT covariate_id 46 | FROM @covariate_table 47 | ) t1 48 | LEFT JOIN @cdm_database_schema.concept 49 | ON concept_id = CAST((covariate_id - @analysis_id) / 1000 AS INT); 50 | 51 | INSERT INTO #analysis_ref ( 52 | analysis_id, 53 | analysis_name, 54 | domain_id, 55 | {!@temporal} ? { 56 | start_day, 57 | end_day, 58 | } 59 | is_binary, 60 | missing_means_zero 61 | ) 62 | SELECT @analysis_id AS analysis_id, 63 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 64 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id, 65 | {!@temporal} ? { 66 | CAST(NULL AS INT) AS start_day, 67 | CAST(NULL AS INT) AS end_day, 68 | } 69 | CAST('Y' AS VARCHAR(1)) AS is_binary, 70 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 71 | -------------------------------------------------------------------------------- /inst/sql/sql_server/DemographicsYear.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT CAST(YEAR(cohort_start_date) * 1000 + @analysis_id AS BIGINT) AS covariate_id, 3 | {@temporal | @temporal_sequence} ? { 4 | CAST(NULL AS INT) AS time_id, 5 | } 6 | {@aggregated} ? { 7 | cohort_definition_id, 8 | COUNT(*) AS sum_value 9 | } : { 10 | cohort.@row_id_field AS row_id, 11 | 1 AS covariate_value 12 | } 13 | INTO @covariate_table 14 | FROM @cohort_table cohort 15 | INNER JOIN @cdm_database_schema.person 16 | ON cohort.subject_id = person.person_id 17 | {@included_cov_table != ''} ? {WHERE YEAR(cohort_start_date) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} 18 | {@cohort_definition_id != -1} ? { 19 | {@included_cov_table != ''} ? { AND} :{WHERE} cohort.cohort_definition_id IN (@cohort_definition_id) 20 | } 21 | {@aggregated} ? { 22 | GROUP BY cohort_definition_id, 23 | YEAR(cohort_start_date) 24 | } 25 | ; 26 | 27 | -- Reference construction 28 | INSERT INTO #cov_ref ( 29 | covariate_id, 30 | covariate_name, 31 | analysis_id, 32 | concept_id 33 | ) 34 | SELECT covariate_id, 35 | CAST(CONCAT ('index year: ', CAST((covariate_id - @analysis_id) / 1000 AS INTEGER)) AS VARCHAR(512)) AS covariate_name, 36 | @analysis_id AS analysis_id, 37 | 0 AS concept_id 38 | FROM ( 39 | SELECT DISTINCT covariate_id 40 | FROM @covariate_table 41 | ) t1; 42 | 43 | INSERT INTO #analysis_ref ( 44 | analysis_id, 45 | analysis_name, 46 | domain_id, 47 | {!@temporal} ? { 48 | start_day, 49 | end_day, 50 | } 51 | is_binary, 52 | missing_means_zero 53 | ) 54 | SELECT @analysis_id AS analysis_id, 55 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 56 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id, 57 | {!@temporal} ? { 58 | CAST(NULL AS INT) AS start_day, 59 | CAST(NULL AS INT) AS end_day, 60 | } 61 | CAST('Y' AS VARCHAR(1)) AS is_binary, 62 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 63 | -------------------------------------------------------------------------------- /inst/sql/sql_server/DemographicsYearMonth.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT CAST(YEAR(cohort_start_date)*100000 + MONTH(cohort_start_date)*1000 + @analysis_id AS BIGINT) AS covariate_id, 3 | {@temporal} ? { 4 | CAST(NULL AS INT) AS time_id, 5 | } 6 | {@aggregated} ? { 7 | cohort_definition_id, 8 | COUNT(*) AS sum_value 9 | } : { 10 | cohort.@row_id_field AS row_id, 11 | 1 AS covariate_value 12 | } 13 | INTO @covariate_table 14 | FROM @cohort_table cohort 15 | INNER JOIN @cdm_database_schema.person 16 | ON cohort.subject_id = person.person_id 17 | {@included_cov_table != ''} ? {WHERE YEAR(cohort_start_date)*100000 + MONTH(cohort_start_date)*1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} 18 | {@cohort_definition_id != -1} ? { 19 | {@included_cov_table != ''} ? { AND} :{WHERE} cohort.cohort_definition_id IN (@cohort_definition_id) 20 | } 21 | {@aggregated} ? { 22 | GROUP BY cohort_definition_id, 23 | YEAR(cohort_start_date)*100000 + MONTH(cohort_start_date)*1000 + @analysis_id 24 | } 25 | ; 26 | 27 | -- Reference construction 28 | INSERT INTO #cov_ref ( 29 | covariate_id, 30 | covariate_name, 31 | analysis_id, 32 | concept_id 33 | ) 34 | SELECT covariate_id, 35 | CAST(CONCAT('index year and month: ', CAST((covariate_id - @analysis_id) / 1000 AS INTEGER)) AS VARCHAR(512)) AS covariate_name, 36 | @analysis_id AS analysis_id, 37 | 0 AS concept_id 38 | FROM ( 39 | SELECT DISTINCT covariate_id 40 | FROM @covariate_table 41 | ) t1; 42 | 43 | INSERT INTO #analysis_ref ( 44 | analysis_id, 45 | analysis_name, 46 | domain_id, 47 | {!@temporal} ? { 48 | start_day, 49 | end_day, 50 | } 51 | is_binary, 52 | missing_means_zero 53 | ) 54 | SELECT @analysis_id AS analysis_id, 55 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 56 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id, 57 | {!@temporal} ? { 58 | CAST(NULL AS INT) AS start_day, 59 | CAST(NULL AS INT) AS end_day, 60 | } 61 | CAST('Y' AS VARCHAR(1)) AS is_binary, 62 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 63 | -------------------------------------------------------------------------------- /inst/sql/sql_server/DomainConcept.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT 3 | CAST(@domain_concept_id AS BIGINT) * 1000 + @analysis_id AS covariate_id, 4 | {@temporal | @temporal_sequence} ? { 5 | time_id, 6 | } 7 | {@aggregated} ? { 8 | cohort_definition_id, 9 | COUNT(*) AS sum_value 10 | } : { 11 | row_id, 12 | 1 AS covariate_value 13 | } 14 | {@temporal_annual} ? {, event_year} 15 | INTO @covariate_table 16 | FROM ( 17 | {@temporal_annual} ? { 18 | SELECT @domain_concept_id, 19 | cohort_definition_id, 20 | subject_id, 21 | cohort_start_date, 22 | {@temporal} ? {time_id,} 23 | event_year FROM ( 24 | } 25 | SELECT DISTINCT @domain_concept_id, 26 | {@temporal} ? { 27 | time_id, 28 | } 29 | {@temporal_sequence} ? { 30 | FLOOR(DATEDIFF(@time_part, @cdm_database_schema.@domain_table.@domain_start_date, cohort.cohort_start_date)*1.0/@time_interval ) as time_id, 31 | } 32 | {@temporal_annual} ? { 33 | DATEPART(year, @domain_table.@domain_start_date) event_year, 34 | } 35 | {@aggregated} ? { 36 | cohort_definition_id, 37 | cohort.subject_id, 38 | cohort.cohort_start_date 39 | } : { 40 | cohort.@row_id_field AS row_id 41 | } 42 | FROM @cohort_table cohort 43 | INNER JOIN @cdm_database_schema.@domain_table 44 | ON cohort.subject_id = @domain_table.person_id 45 | {@sub_type == 'inpatient'} ? { 46 | INNER JOIN @cdm_database_schema.visit_occurrence vo 47 | ON vo.person_id = @domain_table.person_id 48 | AND vo.visit_start_date <= @domain_table.@domain_start_date 49 | AND vo.visit_end_date >= @domain_table.@domain_start_date 50 | INNER JOIN @cdm_database_schema.concept_ancestor ca 51 | ON ca.ancestor_concept_id IN (9201, 38004311, 8920, 262) 52 | AND ca.descendant_concept_id = vo.visit_concept_id 53 | } 54 | {@temporal} ? { 55 | INNER JOIN #time_period time_period 56 | ON @domain_start_date <= DATEADD(DAY, time_period.end_day, cohort.cohort_start_date) 57 | AND @domain_end_date >= DATEADD(DAY, time_period.start_day, cohort.cohort_start_date) 58 | WHERE @domain_concept_id != 0 59 | } : { 60 | 61 | WHERE @domain_start_date <= DATEADD(DAY, {@temporal_sequence} ? {@sequence_end_day} : {@end_day}, cohort.cohort_start_date) 62 | {@start_day != 'anyTimePrior'} ? { AND 63 | 64 | {@temporal_sequence} ? {@domain_start_date} : {@domain_end_date} 65 | 66 | >= DATEADD(DAY, {@temporal_sequence} ? {@sequence_start_day} : {@start_day}, cohort.cohort_start_date)} 67 | AND @domain_concept_id != 0 68 | 69 | } 70 | {@excluded_concept_table != ''} ? { AND @domain_concept_id NOT IN (SELECT id FROM @excluded_concept_table)} 71 | {@included_concept_table != ''} ? { AND @domain_concept_id IN (SELECT id FROM @included_concept_table)} 72 | {@included_cov_table != ''} ? { AND CAST(@domain_concept_id AS BIGINT) * 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} 73 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} 74 | ) by_row_id 75 | {@temporal_annual} ? { 76 | ) by_year 77 | } 78 | {@aggregated} ? { 79 | GROUP BY cohort_definition_id, 80 | @domain_concept_id 81 | {@temporal | @temporal_sequence} ? { 82 | ,time_id 83 | } 84 | {@temporal_annual} ? { 85 | ,event_year 86 | } 87 | } 88 | ; 89 | 90 | -- Reference construction 91 | INSERT INTO #cov_ref ( 92 | covariate_id, 93 | covariate_name, 94 | analysis_id, 95 | concept_id 96 | ) 97 | SELECT covariate_id, 98 | {@temporal | @temporal_sequence} ? { 99 | CAST(CONCAT('@domain_table: ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END {@sub_type == 'inpatient'} ? {, ' (inpatient)'}) AS VARCHAR(512)) AS covariate_name, 100 | } : { 101 | {@start_day == 'anyTimePrior'} ? { 102 | CAST(CONCAT('@domain_table any time prior through @end_day days relative to index: ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END {@sub_type == 'inpatient'} ? {, ' (inpatient)'}) AS VARCHAR(512)) AS covariate_name, 103 | } : { 104 | CAST(CONCAT('@domain_table during day @start_day through @end_day days relative to index: ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END {@sub_type == 'inpatient'} ? {, ' (inpatient)'}) AS VARCHAR(512)) AS covariate_name, 105 | } 106 | } 107 | @analysis_id AS analysis_id, 108 | CAST((covariate_id - @analysis_id) / 1000 AS INT) AS concept_id 109 | FROM ( 110 | SELECT DISTINCT covariate_id 111 | FROM @covariate_table 112 | ) t1 113 | LEFT JOIN @cdm_database_schema.concept 114 | ON concept_id = CAST((covariate_id - @analysis_id) / 1000 AS INT); 115 | 116 | INSERT INTO #analysis_ref ( 117 | analysis_id, 118 | analysis_name, 119 | domain_id, 120 | {!@temporal} ? { 121 | start_day, 122 | end_day, 123 | } 124 | is_binary, 125 | missing_means_zero 126 | ) 127 | SELECT @analysis_id AS analysis_id, 128 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 129 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id, 130 | {!@temporal} ? { 131 | {@start_day == 'anyTimePrior'} ? { 132 | CAST(NULL AS INT) AS start_day, 133 | } : { 134 | 135 | {@temporal_sequence} ? {@sequence_start_day} : {@start_day} AS start_day, 136 | } 137 | {@temporal_sequence} ? {@sequence_end_day} : {@end_day} AS end_day, 138 | } 139 | CAST('Y' AS VARCHAR(1)) AS is_binary, 140 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 141 | -------------------------------------------------------------------------------- /inst/sql/sql_server/GetAttrCovariates.sql: -------------------------------------------------------------------------------- 1 | /************************************************************************ 2 | @file GetAttrCovariates.sql 3 | 4 | Copyright 2025 Observational Health Data Sciences and Informatics 5 | 6 | This file is part of FeatureExtraction 7 | 8 | Licensed under the Apache License, Version 2.0 (the "License"); 9 | you may not use this file except in compliance with the License. 10 | You may obtain a copy of the License at 11 | 12 | http://www.apache.org/licenses/LICENSE-2.0 13 | 14 | Unless required by applicable law or agreed to in writing, software 15 | distributed under the License is distributed on an "AS IS" BASIS, 16 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | See the License for the specific language governing permissions and 18 | limitations under the License. 19 | ************************************************************************/ 20 | 21 | {DEFAULT @attr_database_schema = 'CDM_SIM.dbo' } 22 | {DEFAULT @cohort_table = '#cohort_person'} 23 | {DEFAULT @cohort_id = -1} 24 | {DEFAULT @row_id_field = 'person_id'} 25 | {DEFAULT @cohort_attribute_table = 'cohort_attribute'} 26 | {DEFAULT @has_include_attr_ids = FALSE} 27 | 28 | SELECT cohort.@row_id_field AS row_id, 29 | cohort_attribute.attribute_definition_id AS covariate_id, 30 | cohort_attribute.value_as_number AS covariate_value 31 | FROM @attr_database_schema.@cohort_attribute_table cohort_attribute 32 | INNER JOIN @cohort_table cohort 33 | ON cohort_attribute.subject_id = cohort.subject_id 34 | AND cohort_attribute.cohort_definition_id = cohort.cohort_definition_id 35 | AND cohort_attribute.subject_id = cohort.subject_id 36 | {@has_include_attr_ids} ? { 37 | INNER JOIN #included_attr included_attr 38 | ON included_attr.attribute_definition_id = cohort_attribute.attribute_definition_id 39 | } 40 | {@cohort_id != -1} ? { 41 | WHERE cohort.cohort_definition_id = @cohort_id 42 | } 43 | ; 44 | -------------------------------------------------------------------------------- /inst/sql/sql_server/IncludeDescendants.sql: -------------------------------------------------------------------------------- 1 | /************************************************************************ 2 | Copyright 2025 Observational Health Data Sciences and Informatics 3 | 4 | This file is part of FeatureExtraction 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | ************************************************************************/ 18 | {DEFAULT @table_name == '#include_concepts'} 19 | {DEFAULT @cdm_database_schema == 'cdm'} 20 | 21 | INSERT INTO @table_name (concept_id) 22 | SELECT descendant_concept_id 23 | FROM @table_name this_table 24 | INNER JOIN @cdm_database_schema.concept_ancestor 25 | ON concept_id = ancestor_concept_id 26 | WHERE concept_id != descendant_concept_id; 27 | -------------------------------------------------------------------------------- /inst/sql/sql_server/MeasurementRangeGroup.sql: -------------------------------------------------------------------------------- 1 | -- Feature construction 2 | SELECT 3 | (CAST(measurement_concept_id AS BIGINT) * 10000) + (range_group * 1000) + @analysis_id AS covariate_id, 4 | {@temporal} ? { 5 | time_id, 6 | } 7 | {@aggregated} ? { 8 | cohort_definition_id, 9 | COUNT(*) AS sum_value 10 | } : { 11 | row_id, 12 | 1 AS covariate_value 13 | } 14 | INTO @covariate_table 15 | FROM ( 16 | {@aggregated} ? { 17 | SELECT DISTINCT measurement_concept_id, 18 | range_group, 19 | {@temporal} ? { 20 | time_id, 21 | } 22 | cohort_definition_id, 23 | subject_id, 24 | cohort_start_date 25 | FROM ( 26 | } 27 | SELECT measurement_concept_id, 28 | CASE 29 | WHEN value_as_number < range_low THEN 1 30 | WHEN value_as_number > range_high THEN 3 31 | ELSE 2 32 | END AS range_group, 33 | {@temporal} ? { 34 | time_id, 35 | } 36 | {@aggregated} ? { 37 | cohort_definition_id, 38 | cohort.subject_id, 39 | cohort.cohort_start_date 40 | } : { 41 | cohort.@row_id_field AS row_id 42 | } 43 | FROM @cohort_table cohort 44 | INNER JOIN @cdm_database_schema.measurement 45 | ON cohort.subject_id = measurement.person_id 46 | {@temporal} ? { 47 | INNER JOIN #time_period time_period 48 | ON measurement_date <= DATEADD(DAY, time_period.end_day, cohort.cohort_start_date) 49 | AND measurement_date >= DATEADD(DAY, time_period.start_day, cohort.cohort_start_date) 50 | WHERE measurement_concept_id != 0 51 | } : { 52 | WHERE measurement_date <= DATEADD(DAY, @end_day, cohort.cohort_start_date) 53 | {@start_day != 'anyTimePrior'} ? { AND measurement_date >= DATEADD(DAY, @start_day, cohort.cohort_start_date)} 54 | AND measurement_concept_id != 0 55 | } 56 | AND range_low IS NOT NULL 57 | AND range_high IS NOT NULL 58 | {@excluded_concept_table != ''} ? { AND measurement_concept_id NOT IN (SELECT id FROM @excluded_concept_table)} 59 | {@included_concept_table != ''} ? { AND measurement_concept_id IN (SELECT id FROM @included_concept_table)} 60 | {@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} 61 | {@aggregated} ? { 62 | ) grouped_1 63 | } 64 | ) grouped_2 65 | {@included_cov_table != ''} ? {WHERE (CAST(measurement_concept_id AS BIGINT) * 10000) + (range_group * 1000) + @analysis_id IN (SELECT id FROM @included_cov_table)} 66 | GROUP BY measurement_concept_id, 67 | range_group 68 | {@aggregated} ? { 69 | ,cohort_definition_id 70 | } : { 71 | ,row_id 72 | } 73 | {@temporal} ? { 74 | ,time_id 75 | } 76 | ; 77 | 78 | -- Reference construction 79 | INSERT INTO #cov_ref ( 80 | covariate_id, 81 | covariate_name, 82 | analysis_id, 83 | concept_id 84 | ) 85 | SELECT covariate_id, 86 | {@temporal} ? { 87 | CAST(CONCAT('measurement ', range_name, ': ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name, 88 | } : { 89 | {@start_day == 'anyTimePrior'} ? { 90 | CAST(CONCAT('measurement ', range_name, ' during any time prior through @end_day days relative to index: ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name, 91 | } : { 92 | CAST(CONCAT('measurement ', range_name, ' during day @start_day through @end_day days relative to index: ', CASE WHEN concept_name IS NULL THEN 'Unknown concept' ELSE concept_name END) AS VARCHAR(512)) AS covariate_name, 93 | } 94 | } 95 | @analysis_id AS analysis_id, 96 | CAST(FLOOR(covariate_id / 10000.0) AS INT) AS concept_id 97 | FROM ( 98 | SELECT DISTINCT covariate_id, 99 | CASE 100 | WHEN FLOOR(covariate_id / 1000.0) - (FLOOR(covariate_id / 10000.0) * 10) = 1 THEN 'below normal range' 101 | WHEN FLOOR(covariate_id / 1000.0) - (FLOOR(covariate_id / 10000.0) * 10) = 2 THEN 'within normal range' 102 | WHEN FLOOR(covariate_id / 1000.0) - (FLOOR(covariate_id / 10000.0) * 10) = 3 THEN 'above normal range' 103 | END AS range_name 104 | FROM @covariate_table 105 | ) t1 106 | LEFT JOIN @cdm_database_schema.concept 107 | ON concept_id = FLOOR(covariate_id / 10000.0); 108 | 109 | INSERT INTO #analysis_ref ( 110 | analysis_id, 111 | analysis_name, 112 | domain_id, 113 | {!@temporal} ? { 114 | start_day, 115 | end_day, 116 | } 117 | is_binary, 118 | missing_means_zero 119 | ) 120 | SELECT @analysis_id AS analysis_id, 121 | CAST('@analysis_name' AS VARCHAR(512)) AS analysis_name, 122 | CAST('@domain_id' AS VARCHAR(20)) AS domain_id, 123 | {!@temporal} ? { 124 | {@start_day == 'anyTimePrior'} ? { 125 | CAST(NULL AS INT) AS start_day, 126 | } : { 127 | @start_day AS start_day, 128 | } 129 | @end_day AS end_day, 130 | } 131 | CAST('Y' AS VARCHAR(1)) AS is_binary, 132 | CAST(NULL AS VARCHAR(1)) AS missing_means_zero; 133 | 134 | -------------------------------------------------------------------------------- /inst/sql/sql_server/RemoveCovariateTempTables.sql: -------------------------------------------------------------------------------- 1 | /************************************************************************ 2 | Copyright 2025 Observational Health Data Sciences and Informatics 3 | 4 | This file is part of FeatureExtraction 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | ************************************************************************/ 18 | TRUNCATE TABLE #cov; 19 | 20 | DROP TABLE #cov; 21 | 22 | TRUNCATE TABLE #cov_ref; 23 | 24 | DROP TABLE #cov_ref; 25 | -------------------------------------------------------------------------------- /inst/sql/sql_server/covariateCohorts.sql: -------------------------------------------------------------------------------- 1 | /************************ 2 | File covariateCohorts.sql 3 | *************************/ 4 | DROP TABLE IF EXISTS @cohort_database_schema.@cohort_table; 5 | 6 | CREATE TABLE @cohort_database_schema.@cohort_table ( 7 | cohort_definition_id INT, 8 | subject_id BIGINT, 9 | cohort_start_date DATE, 10 | cohort_end_date DATE 11 | ); 12 | 13 | INSERT INTO @cohort_database_schema.@cohort_table ( 14 | cohort_definition_id, 15 | subject_id, 16 | cohort_start_date, 17 | cohort_end_date 18 | ) 19 | SELECT 1, 20 | person_id, 21 | MIN(drug_era_start_date), 22 | MIN(drug_era_end_date) 23 | FROM @cdm_database_schema.drug_era 24 | WHERE drug_concept_id = 1124300 --diclofenac 25 | GROUP BY person_id; 26 | 27 | INSERT INTO @cohort_database_schema.@cohort_table ( 28 | cohort_definition_id, 29 | subject_id, 30 | cohort_start_date, 31 | cohort_end_date 32 | ) 33 | SELECT 2, 34 | condition_occurrence.person_id, 35 | MIN(condition_start_date), 36 | MIN(observation_period_end_date) 37 | FROM @cdm_database_schema.condition_occurrence 38 | INNER JOIN @cdm_database_schema.drug_exposure 39 | ON condition_occurrence.person_id = drug_exposure.person_id 40 | AND drug_exposure_start_date >= condition_start_date 41 | AND drug_exposure_start_date < DATEADD(DAY, 30, condition_start_date) 42 | INNER JOIN @cdm_database_schema.observation_period 43 | ON condition_occurrence.person_id = observation_period.person_id 44 | AND condition_start_date >= observation_period_start_date 45 | AND condition_start_date <= observation_period_end_date 46 | WHERE condition_concept_id IN ( 47 | SELECT descendant_concept_id 48 | FROM @cdm_database_schema.concept_ancestor 49 | WHERE ancestor_concept_id = 201826 -- Type 2 diabetes mellitus 50 | ) 51 | AND drug_concept_id IN ( 52 | SELECT descendant_concept_id 53 | FROM @cdm_database_schema.concept_ancestor 54 | WHERE ancestor_concept_id = 21600712 -- DRUGS USED IN DIABETES (ATC A10) 55 | ) 56 | GROUP BY condition_occurrence.person_id; 57 | -------------------------------------------------------------------------------- /inst/sql/sql_server/unit_tests/createTestingData.sql: -------------------------------------------------------------------------------- 1 | SELECT first_use.* 2 | INTO @cohort_table 3 | FROM ( 4 | SELECT drug_concept_id AS cohort_definition_id, 5 | MIN(drug_era_start_date) AS cohort_start_date, 6 | MIN(drug_era_end_date) AS cohort_end_date, 7 | person_id AS subject_id 8 | FROM @cdm_database_schema.drug_era 9 | WHERE drug_concept_id = 1118084-- celecoxib 10 | OR drug_concept_id = 1124300 --diclofenac 11 | GROUP BY drug_concept_id, 12 | person_id 13 | ) first_use 14 | INNER JOIN @cdm_database_schema.observation_period 15 | ON first_use.subject_id = observation_period.person_id 16 | AND cohort_start_date >= observation_period_start_date 17 | AND cohort_end_date <= observation_period_end_date 18 | WHERE DATEDIFF(DAY, observation_period_start_date, cohort_start_date) >= 365 19 | ; 20 | 21 | IF OBJECT_ID('@cohort_database_schema.@cohort_attribute_table', 'U') IS NOT NULL 22 | DROP TABLE @cohort_database_schema.@cohort_attribute_table; 23 | 24 | IF OBJECT_ID('@cohort_database_schema.@attribute_definition_table', 'U') IS NOT NULL 25 | DROP TABLE @cohort_database_schema.@attribute_definition_table; 26 | 27 | 28 | SELECT cohort_definition_id, 29 | subject_id, 30 | cohort_start_date, 31 | 1 AS attribute_definition_id, 32 | DATEDIFF(DAY, observation_period_start_date, cohort_start_date) AS value_as_number 33 | INTO @cohort_database_schema.@cohort_attribute_table 34 | FROM @cohort_table cohort 35 | INNER JOIN @cdm_database_schema.observation_period op 36 | ON op.person_id = cohort.subject_id 37 | WHERE cohort.cohort_start_date >= op.observation_period_start_date 38 | AND cohort.cohort_start_date <= op.observation_period_end_date 39 | {@cohort_definition_ids != ''} ? { 40 | AND cohort.cohort_definition_id IN (@cohort_definition_ids) 41 | } 42 | ; 43 | 44 | SELECT 1 AS attribute_definition_id, 45 | 'Length of observation in days' AS attribute_name 46 | INTO @cohort_database_schema.@attribute_definition_table 47 | ; 48 | -------------------------------------------------------------------------------- /inst/sql/sql_server/unit_tests/dropTestingData.sql: -------------------------------------------------------------------------------- 1 | IF OBJECT_ID('tempdb..@cohort_table', 'U') IS NOT NULL 2 | DROP TABLE @cohort_table; 3 | 4 | IF OBJECT_ID('@cohort_database_schema.@cohort_attribute_table', 'U') IS NOT NULL 5 | DROP TABLE @cohort_database_schema.@cohort_attribute_table; 6 | 7 | IF OBJECT_ID('@cohort_database_schema.@attribute_definition_table', 'U') IS NOT NULL 8 | DROP TABLE @cohort_database_schema.@attribute_definition_table; 9 | -------------------------------------------------------------------------------- /inst/testdata/binaryCovariateData.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/testdata/binaryCovariateData.zip -------------------------------------------------------------------------------- /inst/testdata/continuousCovariateData.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/FeatureExtraction/d514584c7bfe68f9439cac257da19eaa4c8519c0/inst/testdata/continuousCovariateData.zip -------------------------------------------------------------------------------- /java/FeatureExtraction.jardesc: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /java/org/ohdsi/featureExtraction/JarChecksum.java: -------------------------------------------------------------------------------- 1 | package org.ohdsi.featureExtraction; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.IOException; 6 | import java.security.MessageDigest; 7 | import java.security.NoSuchAlgorithmException; 8 | 9 | /** 10 | * Provides a function for computing the checksum of the current JAR file. 11 | * 12 | * @author mschuemi 13 | * 14 | */ 15 | public class JarChecksum { 16 | 17 | /** 18 | * Compute the checksum of the current JAR file. This can be used by R to verify that the JAR version is in sync with the R package. Note: will throw an 19 | * error if not running from a JAR file. 20 | * 21 | * @return The checksum of the current JAR file 22 | */ 23 | public static String computeJarChecksum() { 24 | File currentJavaJarFile = new File(JarChecksum.class.getProtectionDomain().getCodeSource().getLocation().getPath()); 25 | String filepath = currentJavaJarFile.getAbsolutePath(); 26 | StringBuilder checksum = new StringBuilder(); 27 | try { 28 | MessageDigest messageDigest = MessageDigest.getInstance("SHA-256"); 29 | FileInputStream fileInputStream = new FileInputStream(filepath); 30 | byte[] dataBytes = new byte[1024]; 31 | int nread = 0; 32 | while ((nread = fileInputStream.read(dataBytes)) != -1) 33 | messageDigest.update(dataBytes, 0, nread); 34 | fileInputStream.close(); 35 | byte[] mdBytes = messageDigest.digest(); 36 | 37 | for (int i = 0; i < mdBytes.length; i++) 38 | checksum.append(Integer.toString((mdBytes[i] & 0xff) + 0x100, 16).substring(1)); 39 | } catch (NoSuchAlgorithmException e) { 40 | e.printStackTrace(); 41 | } catch (IOException e) { 42 | e.printStackTrace(); 43 | } 44 | return (checksum.toString()); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /java/org/ohdsi/featureExtraction/ReadCSVFile.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2025 Observational Health Data Sciences and Informatics 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package org.ohdsi.featureExtraction; 17 | 18 | import java.io.BufferedReader; 19 | import java.io.FileInputStream; 20 | import java.io.FileNotFoundException; 21 | import java.io.IOException; 22 | import java.io.InputStream; 23 | import java.io.InputStreamReader; 24 | import java.io.UnsupportedEncodingException; 25 | import java.util.Iterator; 26 | import java.util.List; 27 | 28 | public class ReadCSVFile implements Iterable> { 29 | protected BufferedReader bufferedReader; 30 | public boolean EOF = false; 31 | private char delimiter = ','; 32 | 33 | 34 | public ReadCSVFile(String filename, char delimiter) { 35 | this(filename); 36 | this.delimiter = delimiter; 37 | } 38 | 39 | public ReadCSVFile(String filename) { 40 | try { 41 | FileInputStream textFileStream = new FileInputStream(filename); 42 | bufferedReader = new BufferedReader(new InputStreamReader(textFileStream, "ISO-8859-1")); 43 | } catch (FileNotFoundException e) { 44 | e.printStackTrace(); 45 | } catch (UnsupportedEncodingException e) { 46 | e.printStackTrace(); 47 | } 48 | } 49 | 50 | public ReadCSVFile(InputStream inputstream, char delimiter) { 51 | this(inputstream); 52 | this.delimiter = delimiter; 53 | } 54 | 55 | public ReadCSVFile(InputStream inputstream) { 56 | try { 57 | bufferedReader = new BufferedReader(new InputStreamReader(inputstream, "ISO-8859-1")); 58 | } catch (UnsupportedEncodingException e) { 59 | e.printStackTrace(); 60 | } 61 | } 62 | 63 | public Iterator> getIterator() { 64 | return iterator(); 65 | } 66 | 67 | private class CSVFileIterator implements Iterator> { 68 | private String buffer; 69 | 70 | public CSVFileIterator() { 71 | try { 72 | buffer = bufferedReader.readLine(); 73 | if (buffer == null) { 74 | EOF = true; 75 | bufferedReader.close(); 76 | } 77 | } catch (IOException e) { 78 | e.printStackTrace(); 79 | } 80 | 81 | } 82 | 83 | public boolean hasNext() { 84 | return !EOF; 85 | } 86 | 87 | public List next() { 88 | String result = buffer; 89 | try { 90 | buffer = bufferedReader.readLine(); 91 | if (buffer == null) { 92 | EOF = true; 93 | bufferedReader.close(); 94 | } 95 | } catch (IOException e) { 96 | e.printStackTrace(); 97 | } 98 | 99 | return line2columns(result); 100 | } 101 | 102 | public void remove() { 103 | System.err.println("Unimplemented method 'remove' called"); 104 | } 105 | } 106 | 107 | public Iterator> iterator() { 108 | return new CSVFileIterator(); 109 | } 110 | 111 | private List line2columns(String line) { 112 | List columns = StringUtilities.safeSplit(line, delimiter); 113 | for (int i = 0; i < columns.size(); i++) { 114 | String column = columns.get(i); 115 | if (column.startsWith("\"") && column.endsWith("\"") && column.length() > 1) 116 | column = column.substring(1, column.length() - 1); 117 | column = column.replace("\\\"", "\""); 118 | column = column.replaceAll("\\\\\\\\", "\\\\"); 119 | columns.set(i, column); 120 | } 121 | return columns; 122 | } 123 | 124 | public void setDelimiter(char delimiter) { 125 | this.delimiter = delimiter; 126 | } 127 | 128 | public char getDelimiter() { 129 | return delimiter; 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /java/org/ohdsi/featureExtraction/ReadCSVFileWithHeader.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright 2025 Observational Health Data Sciences and Informatics 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | ******************************************************************************/ 16 | package org.ohdsi.featureExtraction; 17 | 18 | import java.io.FileInputStream; 19 | import java.io.FileNotFoundException; 20 | import java.io.InputStream; 21 | import java.util.HashMap; 22 | import java.util.Iterator; 23 | import java.util.List; 24 | import java.util.Map; 25 | 26 | public class ReadCSVFileWithHeader implements Iterable { 27 | private InputStream inputstream; 28 | private char delimiter = ','; 29 | 30 | public ReadCSVFileWithHeader(String filename, char delimiter) { 31 | this(filename); 32 | this.delimiter = delimiter; 33 | } 34 | 35 | public ReadCSVFileWithHeader(String filename) { 36 | try { 37 | inputstream = new FileInputStream(filename); 38 | } catch (FileNotFoundException e) { 39 | e.printStackTrace(); 40 | } 41 | } 42 | 43 | public ReadCSVFileWithHeader(InputStream inputstream) { 44 | this.inputstream = inputstream; 45 | } 46 | 47 | @Override 48 | public Iterator iterator() { 49 | return new RowIterator(); 50 | } 51 | 52 | public class RowIterator implements Iterator { 53 | 54 | private Iterator> iterator; 55 | private Map fieldName2ColumnIndex; 56 | 57 | public RowIterator() { 58 | iterator = new ReadCSVFile(inputstream, delimiter).iterator(); 59 | fieldName2ColumnIndex = new HashMap(); 60 | for (String header : iterator.next()) 61 | fieldName2ColumnIndex.put(header, fieldName2ColumnIndex.size()); 62 | } 63 | 64 | @Override 65 | public boolean hasNext() { 66 | return iterator.hasNext(); 67 | } 68 | 69 | @Override 70 | public Row next() { 71 | return new Row(iterator.next(), fieldName2ColumnIndex); 72 | } 73 | 74 | @Override 75 | public void remove() { 76 | throw new RuntimeException("Remove not supported"); 77 | } 78 | 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /java/org/ohdsi/featureExtraction/Row.java: -------------------------------------------------------------------------------- 1 | package org.ohdsi.featureExtraction; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | public class Row { 9 | private List cells; 10 | private Map fieldName2ColumnIndex; 11 | 12 | public Row() { 13 | fieldName2ColumnIndex = new HashMap(); 14 | cells = new ArrayList(); 15 | } 16 | 17 | public Row(List cells, Map fieldName2ColumnIndex) { 18 | this.cells = cells; 19 | this.fieldName2ColumnIndex = fieldName2ColumnIndex; 20 | } 21 | 22 | public Row(Row row) { 23 | cells = new ArrayList(row.cells); 24 | fieldName2ColumnIndex = new HashMap(row.fieldName2ColumnIndex); 25 | } 26 | 27 | public String get(String fieldName) { 28 | int index; 29 | try { 30 | index = fieldName2ColumnIndex.get(fieldName); 31 | } catch (NullPointerException e) { 32 | throw new RuntimeException("Field \"" + fieldName + "\" not found"); 33 | } 34 | if (cells.size() <= index) 35 | return null; 36 | else 37 | return cells.get(index); 38 | } 39 | 40 | public List getFieldNames() { 41 | List names = new ArrayList(fieldName2ColumnIndex.size()); 42 | for (int i = 0; i < fieldName2ColumnIndex.size(); i++) 43 | names.add(null); 44 | for (Map.Entry entry : fieldName2ColumnIndex.entrySet()) 45 | names.set(entry.getValue(), entry.getKey()); 46 | return names; 47 | } 48 | 49 | public int getInt(String fieldName) { 50 | return Integer.parseInt(get(fieldName).trim()); 51 | } 52 | 53 | public long getLong(String fieldName) { 54 | return Long.parseLong(get(fieldName)); 55 | } 56 | 57 | public double getDouble(String fieldName) { 58 | return Double.parseDouble(get(fieldName)); 59 | } 60 | 61 | public void add(String fieldName, String value) { 62 | fieldName2ColumnIndex.put(fieldName, cells.size()); 63 | cells.add(value); 64 | } 65 | 66 | public void add(String fieldName, int value) { 67 | add(fieldName, Integer.toString(value)); 68 | } 69 | 70 | public void add(String fieldName, boolean value) { 71 | add(fieldName, Boolean.toString(value)); 72 | } 73 | 74 | public void add(String fieldName, double value) { 75 | add(fieldName, Double.toString(value)); 76 | } 77 | 78 | public void add(String fieldName, long value) { 79 | add(fieldName, Long.toString(value)); 80 | } 81 | 82 | public void set(String fieldName, String value) { 83 | cells.set(fieldName2ColumnIndex.get(fieldName), value); 84 | } 85 | 86 | public void set(String fieldName, int value) { 87 | set(fieldName, Integer.toString(value)); 88 | } 89 | 90 | public void set(String fieldName, long value) { 91 | set(fieldName, Long.toString(value)); 92 | } 93 | 94 | public void set(String fieldName, double value) { 95 | set(fieldName, Double.toString(value)); 96 | } 97 | 98 | public List getCells() { 99 | return cells; 100 | } 101 | 102 | protected Map getfieldName2ColumnIndex() { 103 | return fieldName2ColumnIndex; 104 | } 105 | 106 | public String toString() { 107 | List data = new ArrayList(cells); 108 | for (String fieldName : fieldName2ColumnIndex.keySet()) { 109 | int index = fieldName2ColumnIndex.get(fieldName); 110 | if (data.size() > index) 111 | data.set(index, "[" + fieldName + ": " + data.get(index) + "]"); 112 | } 113 | return StringUtilities.join(data, ","); 114 | } 115 | 116 | public void remove(String field) { 117 | Integer index = fieldName2ColumnIndex.remove(field); 118 | cells.remove((int)index); 119 | Map tempMap = new HashMap(); 120 | for (Map.Entry entry : fieldName2ColumnIndex.entrySet()) 121 | if (entry.getValue() > index) 122 | tempMap.put(entry.getKey(), entry.getValue() - 1); 123 | else 124 | tempMap.put(entry.getKey(), entry.getValue()); 125 | fieldName2ColumnIndex = tempMap; 126 | } 127 | 128 | public int size() { 129 | return cells.size(); 130 | } 131 | 132 | public void upperCaseFieldNames() { 133 | Map tempMap = new HashMap(); 134 | for (Map.Entry entry : fieldName2ColumnIndex.entrySet()) 135 | tempMap.put(entry.getKey().toUpperCase(), entry.getValue()); 136 | fieldName2ColumnIndex = tempMap; 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /java/org/ohdsi/featureExtraction/StringUtilities.java: -------------------------------------------------------------------------------- 1 | package org.ohdsi.featureExtraction; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.Iterator; 6 | import java.util.List; 7 | 8 | public class StringUtilities { 9 | 10 | // private static String[] UPPER_CASE_LETTERS = new String[]{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 11 | // 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'}; 12 | 13 | public static String join(Collection s, String delimiter) { 14 | StringBuffer buffer = new StringBuffer(); 15 | Iterator iter = s.iterator(); 16 | if (iter.hasNext()) { 17 | buffer.append(iter.next().toString()); 18 | } 19 | while (iter.hasNext()) { 20 | buffer.append(delimiter); 21 | buffer.append(iter.next().toString()); 22 | } 23 | return buffer.toString(); 24 | } 25 | 26 | public static String join(Object[] objects, String delimiter) { 27 | StringBuffer buffer = new StringBuffer(); 28 | if (objects.length != 0) 29 | buffer.append(objects[0].toString()); 30 | for (int i = 1; i < objects.length; i++) { 31 | buffer.append(delimiter); 32 | buffer.append(objects[i].toString()); 33 | } 34 | return buffer.toString(); 35 | } 36 | 37 | public static List safeSplit(String string, char delimiter) { 38 | List result = new ArrayList(); 39 | if (string.length() == 0) { 40 | result.add(""); 41 | return result; 42 | } 43 | boolean literal = false; 44 | boolean escape = false; 45 | int startpos = 0; 46 | int i = 0; 47 | char currentchar; 48 | while (i < string.length()) { 49 | currentchar = string.charAt(i); 50 | if (currentchar == '"' && !escape) { 51 | literal = !literal; 52 | } 53 | if (!literal && (currentchar == delimiter && !escape)) { 54 | result.add(string.substring(startpos, i)); 55 | startpos = i + 1; 56 | } 57 | if (currentchar == '\\') { 58 | escape = !escape; 59 | } else { 60 | escape = false; 61 | } 62 | i++; 63 | } 64 | result.add(string.substring(startpos, i)); 65 | return result; 66 | } 67 | 68 | public static String camelCaseToSnakeCase(String string) { 69 | StringBuilder result = new StringBuilder(); 70 | int start = 0; 71 | for (int i = 0; i < string.length(); i++) { 72 | int charInt = (int) string.charAt(i); 73 | if (charInt < 91 && charInt > 64) { 74 | result.append(string.substring(start, i) + "_" + string.substring(i, i + 1).toLowerCase()); 75 | start = i + 1; 76 | } 77 | } 78 | if (start < string.length()) 79 | result.append(string.substring(start)); 80 | return result.toString(); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /man-roxygen/GetCovarParams.R: -------------------------------------------------------------------------------- 1 | #' @details 2 | #' This function uses the data in the CDM to construct a large set of covariates for the provided 3 | #' cohort. The cohort is assumed to be in an existing temp table with these fields: 'subject_id', 4 | #' 'cohort_definition_id', 'cohort_start_date'. Optionally, an extra field can be added containing the 5 | #' unique identifier that will be used as rowID in the output. Typically, users don't call this 6 | #' function directly but rather use the \code{\link{getDbCovariateData}} function instead. 7 | #' 8 | #' @param connection A connection to the server containing the schema as created using the 9 | #' \code{connect} function in the \code{DatabaseConnector} package. 10 | #' @param oracleTempSchema DEPRECATED: use \code{tempEmulationSchema} instead. 11 | #' @param tempEmulationSchema Some database platforms like Oracle and Impala do not truly support 12 | #' temp tables. To emulate temp tables, provide a schema with write 13 | #' privileges where temp tables can be created. 14 | #' @param cdmDatabaseSchema The name of the database schema that contains the OMOP CDM instance. 15 | #' Requires read permissions to this database. On SQL Server, this should 16 | #' specifiy both the database and the schema, so for example 17 | #' 'cdm_instance.dbo'. 18 | #' @param cohortTable Name of the table holding the cohort for which we want to construct 19 | #' covariates. If it is a temp table, the name should have a hash prefix, 20 | #' e.g. '#temp_table'. If it is a non-temp table, it should include the 21 | #' database schema, e.g. 'cdm_database.cohort'. 22 | #' @param cohortId DEPRECATED:For which cohort ID should covariates be constructed? If set to -1, 23 | #' covariates will be constructed for all cohorts in the specified cohort 24 | #' table. 25 | #' @param cohortIds For which cohort ID(s) should covariates be constructed? If set to c(-1), 26 | #' covariates will be constructed for all cohorts in the specified cohort 27 | #' table. 28 | #' @param cdmVersion The version of the Common Data Model used. Currently only 29 | #' \code{cdmVersion = "5"} is supported. 30 | #' @param rowIdField The name of the field in the cohort temp table that is to be used as the 31 | #' row_id field in the output table. This can be especially usefull if there 32 | #' is more than one period per person. 33 | #' @param aggregated Should aggregate statistics be computed instead of covariates per 34 | #' cohort entry? 35 | #' 36 | #' @return 37 | #' Returns an object of type \code{CovariateData}, which is an Andromeda object containing information on the baseline covariates. 38 | #' Information about multiple outcomes can be captured at once for efficiency reasons. This object is 39 | #' a list with the following components: \describe{ \item{covariates}{An ffdf object listing the 40 | #' baseline covariates per person in the cohorts. This is done using a sparse representation: 41 | #' covariates with a value of 0 are omitted to save space. The covariates object will have three 42 | #' columns: rowId, covariateId, and covariateValue. The rowId is usually equal to the person_id, 43 | #' unless specified otherwise in the rowIdField argument.} \item{covariateRef}{A table 44 | #' describing the covariates that have been extracted.} }. The CovariateData object will also have a \code{metaData} attribute, a list of objects with 45 | #' information on how the covariateData object was constructed. 46 | -------------------------------------------------------------------------------- /man/CovariateData-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CovariateData.R 3 | \docType{class} 4 | \name{CovariateData-class} 5 | \alias{CovariateData-class} 6 | \alias{CovariateData} 7 | \alias{show,CovariateData-method} 8 | \alias{summary,CovariateData-method} 9 | \title{Covariate Data} 10 | \usage{ 11 | \S4method{show}{CovariateData}(object) 12 | 13 | \S4method{summary}{CovariateData}(object) 14 | } 15 | \arguments{ 16 | \item{object}{An object of class `CovariateData`.} 17 | } 18 | \description{ 19 | \code{CovariateData} is an S4 class that inherits from \code{\link[Andromeda]{Andromeda}}. It contains 20 | information on covariates, which can be either captured on a per-person basis, or aggregated across 21 | the cohort(s). 22 | 23 | By default covariates refer to a specific time period, with for example different covariate IDs for 24 | whether a diagnosis code was observed in the year before and month before index date. However, a 25 | \code{CovariateData} can also be temporal, meaning that next to a covariate ID there is also a time ID, 26 | which identifies the (user specified) time window the covariate was captured. 27 | 28 | A \code{CovariateData} object is typically created using \code{\link{getDbCovariateData}}, can only be saved using 29 | \code{\link{saveCovariateData}}, and loaded using \code{\link{loadCovariateData}}. 30 | } 31 | \seealso{ 32 | \code{\link{isCovariateData}}, \code{\link{isAggregatedCovariateData}}, \code{\link{isTemporalCovariateData}} 33 | } 34 | -------------------------------------------------------------------------------- /man/FeatureExtraction-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureExtraction.R 3 | \docType{package} 4 | \name{FeatureExtraction-package} 5 | \alias{FeatureExtraction} 6 | \alias{FeatureExtraction-package} 7 | \title{FeatureExtraction: Generating Features for a Cohort} 8 | \description{ 9 | An R interface for generating features for a cohort using data in the Common Data Model. Features can be constructed using default or custom made feature definitions. Furthermore it's possible to aggregate features and get the summary statistics. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://github.com/OHDSI/FeatureExtraction} 15 | \item Report bugs at \url{https://github.com/OHDSI/FeatureExtraction/issues} 16 | } 17 | 18 | } 19 | \author{ 20 | \strong{Maintainer}: Ger Inberg \email{g.inberg@erasmusmc.nl} 21 | 22 | Authors: 23 | \itemize{ 24 | \item Martijn Schuemie \email{schuemie@ohdsi.org} 25 | \item Marc Suchard 26 | \item Patrick Ryan 27 | \item Jenna Reps 28 | \item Anthony Sena \email{sena@ohdsi.org} 29 | } 30 | 31 | Other contributors: 32 | \itemize{ 33 | \item Observational Health Data Science and Informatics [copyright holder] 34 | } 35 | 36 | } 37 | \keyword{internal} 38 | -------------------------------------------------------------------------------- /man/aggregateCovariates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Aggregation.R 3 | \name{aggregateCovariates} 4 | \alias{aggregateCovariates} 5 | \title{Aggregate covariate data} 6 | \usage{ 7 | aggregateCovariates(covariateData) 8 | } 9 | \arguments{ 10 | \item{covariateData}{An object of type \code{covariateData} as generated using 11 | \code{getDbCovariateData}.} 12 | } 13 | \value{ 14 | An object of class \code{covariateData}. 15 | } 16 | \description{ 17 | Aggregate covariate data 18 | } 19 | \examples{ 20 | \donttest{ 21 | covariateData <- FeatureExtraction::createEmptyCovariateData( 22 | cohortIds = 1, 23 | aggregated = FALSE, 24 | temporal = FALSE 25 | ) 26 | aggregatedCovariateData <- aggregateCovariates(covariateData) 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /man/computeStandardizedDifference.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CompareCohorts.R 3 | \name{computeStandardizedDifference} 4 | \alias{computeStandardizedDifference} 5 | \title{Compute standardized difference of mean for all covariates.} 6 | \usage{ 7 | computeStandardizedDifference( 8 | covariateData1, 9 | covariateData2, 10 | cohortId1 = NULL, 11 | cohortId2 = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{covariateData1}{The covariate data of the first cohort. Needs to be in aggregated format.} 16 | 17 | \item{covariateData2}{The covariate data of the second cohort. Needs to be in aggregated format.} 18 | 19 | \item{cohortId1}{If provided, \code{covariateData1} will be restricted to this cohort. If not 20 | provided, \code{covariateData1} is assumed to contain data on only 1 cohort.} 21 | 22 | \item{cohortId2}{If provided, \code{covariateData2} will be restricted to this cohort. If not 23 | provided, \code{covariateData2} is assumed to contain data on only 1 cohort.} 24 | } 25 | \value{ 26 | A data frame with means and standard deviations per cohort as well as the standardized difference 27 | of mean. 28 | } 29 | \description{ 30 | Computes the standardized difference for all covariates between two cohorts. The standardized 31 | difference is defined as the difference between the mean divided by the overall standard deviation. 32 | } 33 | \examples{ 34 | \donttest{ 35 | binaryCovDataFile <- system.file("testdata/binaryCovariateData.zip", 36 | package = "FeatureExtraction" 37 | ) 38 | covariateData1 <- loadCovariateData(binaryCovDataFile) 39 | covariateData2 <- loadCovariateData(binaryCovDataFile) 40 | covDataDiff <- computeStandardizedDifference( 41 | covariateData1, 42 | covariateData2, 43 | cohortId1 = 1, 44 | cohortId2 = 2 45 | ) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /man/convertPrespecSettingsToDetailedSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DetailedCovariateSettings.R 3 | \name{convertPrespecSettingsToDetailedSettings} 4 | \alias{convertPrespecSettingsToDetailedSettings} 5 | \title{Convert prespecified covariate settings into detailed covariate settings} 6 | \usage{ 7 | convertPrespecSettingsToDetailedSettings(covariateSettings) 8 | } 9 | \arguments{ 10 | \item{covariateSettings}{An object of type \code{covariateSettings} as created for example by the 11 | \code{\link{createCovariateSettings}} function.} 12 | } 13 | \value{ 14 | An object of type \code{covariateSettings}, to be used in other functions. 15 | } 16 | \description{ 17 | Convert prespecified covariate settings into detailed covariate settings 18 | } 19 | \details{ 20 | For advanced users only. 21 | } 22 | \examples{ 23 | \donttest{ 24 | covSettings <- createDefaultCovariateSettings() 25 | detailedSettings <- convertPrespecSettingsToDetailedSettings(covariateSettings = covSettings) 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/createAnalysisDetails.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DetailedCovariateSettings.R 3 | \name{createAnalysisDetails} 4 | \alias{createAnalysisDetails} 5 | \title{Create detailed covariate settings} 6 | \usage{ 7 | createAnalysisDetails( 8 | analysisId, 9 | sqlFileName, 10 | parameters, 11 | includedCovariateConceptIds = c(), 12 | addDescendantsToInclude = FALSE, 13 | excludedCovariateConceptIds = c(), 14 | addDescendantsToExclude = FALSE, 15 | includedCovariateIds = c() 16 | ) 17 | } 18 | \arguments{ 19 | \item{analysisId}{An integer between 0 and 999 that uniquely identifies this 20 | analysis.} 21 | 22 | \item{sqlFileName}{The name of the parameterized SQL file embedded in the 23 | \code{featureExtraction} package.} 24 | 25 | \item{parameters}{The list of parameter values used to render the template SQL.} 26 | 27 | \item{includedCovariateConceptIds}{A list of concept IDs that should be used to construct 28 | covariates.} 29 | 30 | \item{addDescendantsToInclude}{Should descendant concept IDs be added to the list of concepts 31 | to include?} 32 | 33 | \item{excludedCovariateConceptIds}{A list of concept IDs that should NOT be used to construct 34 | covariates.} 35 | 36 | \item{addDescendantsToExclude}{Should descendant concept IDs be added to the list of concepts 37 | to exclude?} 38 | 39 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.} 40 | } 41 | \value{ 42 | An object of type \code{analysisDetail}, to be used in 43 | \code{\link{createDetailedCovariateSettings}} or 44 | \code{\link{createDetailedTemporalCovariateSettings}}. 45 | } 46 | \description{ 47 | Create detailed covariate settings 48 | } 49 | \details{ 50 | creates an object specifying in detail how covariates should be constructed from data in the CDM 51 | model. Warning: this function is for advanced users only. 52 | } 53 | \examples{ 54 | analysisDetails <- createAnalysisDetails( 55 | analysisId = 1, 56 | sqlFileName = "DemographicsGender.sql", 57 | parameters = list( 58 | analysisId = 1, 59 | analysisName = "Gender", 60 | domainId = "Demographics" 61 | ), 62 | includedCovariateConceptIds = c(), 63 | addDescendantsToInclude = FALSE, 64 | excludedCovariateConceptIds = c(), 65 | addDescendantsToExclude = FALSE, 66 | includedCovariateIds = c() 67 | ) 68 | 69 | } 70 | -------------------------------------------------------------------------------- /man/createCohortAttrCovariateSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetCovariatesFromCohortAttributes.R 3 | \name{createCohortAttrCovariateSettings} 4 | \alias{createCohortAttrCovariateSettings} 5 | \title{Create cohort attribute covariate settings} 6 | \usage{ 7 | createCohortAttrCovariateSettings( 8 | analysisId = -1, 9 | attrDatabaseSchema, 10 | attrDefinitionTable = "attribute_definition", 11 | cohortAttrTable = "cohort_attribute", 12 | includeAttrIds = c(), 13 | isBinary = FALSE, 14 | missingMeansZero = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{analysisId}{A unique identifier for this analysis.} 19 | 20 | \item{attrDatabaseSchema}{The database schema where the attribute definition and cohort attribute 21 | table can be found.} 22 | 23 | \item{attrDefinitionTable}{The name of the attribute definition table.} 24 | 25 | \item{cohortAttrTable}{The name of the cohort attribute table.} 26 | 27 | \item{includeAttrIds}{(optional) A list of attribute definition IDs to restrict to.} 28 | 29 | \item{isBinary}{Needed for aggregation: Are these binary variables? Binary 30 | variables should only have the values 0 or 1.} 31 | 32 | \item{missingMeansZero}{Needed for aggregation: For continuous values, should missing 33 | values be interpreted as 0?} 34 | } 35 | \value{ 36 | An object of type \code{covariateSettings}, to be used in other functions. 37 | } 38 | \description{ 39 | Create cohort attribute covariate settings 40 | } 41 | \details{ 42 | Creates an object specifying where the cohort attributes can be found to construct covariates. The 43 | attributes should be defined in a table with the same structure as the attribute_definition table 44 | in the Common Data Model. It should at least have these columns: \describe{ 45 | \item{attribute_definition_id}{A unique identifier of type integer.} \item{attribute_name}{A short 46 | description of the attribute.} } The cohort attributes themselves should be stored in a table with 47 | the same format as the cohort_attribute table in the Common Data Model. It should at least have 48 | these columns: \describe{ \item{cohort_definition_id}{A key to link to the cohort table.} 49 | \item{subject_id}{A key to link to the cohort table.} \item{cohort_start_date}{A key to link to the 50 | cohort table.} \item{attribute_definition_id}{An foreign key linking to the attribute definition 51 | table.} \item{value_as_number}{A real number.} } 52 | } 53 | \examples{ 54 | \donttest{ 55 | covariateSettings <- createCohortAttrCovariateSettings( 56 | analysisId = 1, 57 | attrDatabaseSchema = "main", 58 | attrDefinitionTable = "attribute_definition", 59 | cohortAttrTable = "cohort_attribute", 60 | includeAttrIds = c(1), 61 | isBinary = FALSE, 62 | missingMeansZero = FALSE 63 | ) 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /man/createCohortBasedCovariateSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetCovariatesFromOtherCohorts.R 3 | \name{createCohortBasedCovariateSettings} 4 | \alias{createCohortBasedCovariateSettings} 5 | \title{Create settings for covariates based on other cohorts} 6 | \usage{ 7 | createCohortBasedCovariateSettings( 8 | analysisId, 9 | covariateCohortDatabaseSchema = NULL, 10 | covariateCohortTable = NULL, 11 | covariateCohorts, 12 | valueType = "binary", 13 | startDay = -365, 14 | endDay = 0, 15 | includedCovariateIds = c(), 16 | warnOnAnalysisIdOverlap = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{analysisId}{A unique identifier for this analysis.} 21 | 22 | \item{covariateCohortDatabaseSchema}{The database schema where the cohorts used to define the covariates 23 | can be found. If set to \code{NULL}, the database schema will be 24 | guessed, for example using the same one as for the main cohorts.} 25 | 26 | \item{covariateCohortTable}{The table where the cohorts used to define the covariates 27 | can be found. If set to \code{NULL}, the table will be 28 | guessed, for example using the same one as for the main cohorts.} 29 | 30 | \item{covariateCohorts}{A data frame with at least two columns: 'cohortId' and 'cohortName'. The 31 | cohort ID should correspond to the \code{cohort_definition_id} of the cohort 32 | to use for creating a covariate.} 33 | 34 | \item{valueType}{Either 'binary' or 'count'. When \code{valueType = 'count'}, the covariate 35 | value will be the number of times the cohort was observed in the window.} 36 | 37 | \item{startDay}{What is the start day (relative to the index date) of the covariate window?} 38 | 39 | \item{endDay}{What is the end day (relative to the index date) of the covariate window?} 40 | 41 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.} 42 | 43 | \item{warnOnAnalysisIdOverlap}{Warn if the provided `analysisId` overlaps with any predefined analysis as 44 | available in the `createCovariateSettings()` function.} 45 | } 46 | \value{ 47 | An object of type \code{covariateSettings}, to be used in other functions. 48 | } 49 | \description{ 50 | Create settings for covariates based on other cohorts 51 | } 52 | \details{ 53 | Creates an object specifying covariates to be constructed based on the presence of other cohorts. 54 | } 55 | -------------------------------------------------------------------------------- /man/createCohortBasedTemporalCovariateSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetCovariatesFromOtherCohorts.R 3 | \name{createCohortBasedTemporalCovariateSettings} 4 | \alias{createCohortBasedTemporalCovariateSettings} 5 | \title{Create settings for temporal covariates based on other cohorts} 6 | \usage{ 7 | createCohortBasedTemporalCovariateSettings( 8 | analysisId, 9 | covariateCohortDatabaseSchema = NULL, 10 | covariateCohortTable = NULL, 11 | covariateCohorts, 12 | valueType = "binary", 13 | temporalStartDays = -365:-1, 14 | temporalEndDays = -365:-1, 15 | includedCovariateIds = c(), 16 | warnOnAnalysisIdOverlap = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{analysisId}{A unique identifier for this analysis.} 21 | 22 | \item{covariateCohortDatabaseSchema}{The database schema where the cohorts used to define the covariates 23 | can be found. If set to \code{NULL}, the database schema will be 24 | guessed, for example using the same one as for the main cohorts.} 25 | 26 | \item{covariateCohortTable}{The table where the cohorts used to define the covariates 27 | can be found. If set to \code{NULL}, the table will be 28 | guessed, for example using the same one as for the main cohorts.} 29 | 30 | \item{covariateCohorts}{A data frame with at least two columns: 'cohortId' and 'cohortName'. The 31 | cohort ID should correspond to the \code{cohort_definition_id} of the cohort 32 | to use for creating a covariate.} 33 | 34 | \item{valueType}{Either 'binary' or 'count'. When \code{valueType = 'count'}, the covariate 35 | value will be the number of times the cohort was observed in the window.} 36 | 37 | \item{temporalStartDays}{A list of integers representing the start of a time 38 | period, relative to the index date. 0 indicates the 39 | index date, -1 indicates the day before the index 40 | date, etc. The start day is included in the time 41 | period.} 42 | 43 | \item{temporalEndDays}{A list of integers representing the end of a time 44 | period, relative to the index date. 0 indicates the 45 | index date, -1 indicates the day before the index 46 | date, etc. The end day is included in the time 47 | period.} 48 | 49 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.} 50 | 51 | \item{warnOnAnalysisIdOverlap}{Warn if the provided `analysisId` overlaps with any predefined analysis as 52 | available in the `createTemporalCovariateSettings()` function.} 53 | } 54 | \value{ 55 | An object of type \code{covariateSettings}, to be used in other functions. 56 | } 57 | \description{ 58 | Create settings for temporal covariates based on other cohorts 59 | } 60 | \details{ 61 | Creates an object specifying temporal covariates to be constructed based on the presence of other cohorts. 62 | } 63 | -------------------------------------------------------------------------------- /man/createDefaultCovariateSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DetailedCovariateSettings.R 3 | \name{createDefaultCovariateSettings} 4 | \alias{createDefaultCovariateSettings} 5 | \title{Create default covariate settings} 6 | \usage{ 7 | createDefaultCovariateSettings( 8 | includedCovariateConceptIds = c(), 9 | addDescendantsToInclude = FALSE, 10 | excludedCovariateConceptIds = c(), 11 | addDescendantsToExclude = FALSE, 12 | includedCovariateIds = c() 13 | ) 14 | } 15 | \arguments{ 16 | \item{includedCovariateConceptIds}{A list of concept IDs that should be used to construct 17 | covariates.} 18 | 19 | \item{addDescendantsToInclude}{Should descendant concept IDs be added to the list of concepts 20 | to include?} 21 | 22 | \item{excludedCovariateConceptIds}{A list of concept IDs that should NOT be used to construct 23 | covariates.} 24 | 25 | \item{addDescendantsToExclude}{Should descendant concept IDs be added to the list of concepts 26 | to exclude?} 27 | 28 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.} 29 | } 30 | \value{ 31 | An object of type \code{covariateSettings}, to be used in other functions. 32 | } 33 | \description{ 34 | Create default covariate settings 35 | } 36 | \examples{ 37 | \donttest{ 38 | covSettings <- createDefaultCovariateSettings( 39 | includedCovariateConceptIds = c(1), 40 | addDescendantsToInclude = FALSE, 41 | excludedCovariateConceptIds = c(2), 42 | addDescendantsToExclude = FALSE, 43 | includedCovariateIds = c(1) 44 | ) 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /man/createDefaultTemporalCovariateSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DetailedCovariateSettings.R 3 | \name{createDefaultTemporalCovariateSettings} 4 | \alias{createDefaultTemporalCovariateSettings} 5 | \title{Create default covariate settings} 6 | \usage{ 7 | createDefaultTemporalCovariateSettings( 8 | includedCovariateConceptIds = c(), 9 | addDescendantsToInclude = FALSE, 10 | excludedCovariateConceptIds = c(), 11 | addDescendantsToExclude = FALSE, 12 | includedCovariateIds = c() 13 | ) 14 | } 15 | \arguments{ 16 | \item{includedCovariateConceptIds}{A list of concept IDs that should be used to construct 17 | covariates.} 18 | 19 | \item{addDescendantsToInclude}{Should descendant concept IDs be added to the list of concepts 20 | to include?} 21 | 22 | \item{excludedCovariateConceptIds}{A list of concept IDs that should NOT be used to construct 23 | covariates.} 24 | 25 | \item{addDescendantsToExclude}{Should descendant concept IDs be added to the list of concepts 26 | to exclude?} 27 | 28 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.} 29 | } 30 | \value{ 31 | An object of type \code{covariateSettings}, to be used in other functions. 32 | } 33 | \description{ 34 | Create default covariate settings 35 | } 36 | \examples{ 37 | \donttest{ 38 | covSettings <- createDefaultTemporalCovariateSettings( 39 | includedCovariateConceptIds = c(1), 40 | addDescendantsToInclude = FALSE, 41 | excludedCovariateConceptIds = c(2), 42 | addDescendantsToExclude = FALSE, 43 | includedCovariateIds = c(1) 44 | ) 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /man/createDetailedCovariateSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DetailedCovariateSettings.R 3 | \name{createDetailedCovariateSettings} 4 | \alias{createDetailedCovariateSettings} 5 | \title{Create detailed covariate settings} 6 | \usage{ 7 | createDetailedCovariateSettings(analyses = list()) 8 | } 9 | \arguments{ 10 | \item{analyses}{A list of \code{analysisDetail} objects as created using 11 | \code{\link{createAnalysisDetails}}.} 12 | } 13 | \value{ 14 | An object of type \code{covariateSettings}, to be used in other functions. 15 | } 16 | \description{ 17 | Create detailed covariate settings 18 | } 19 | \details{ 20 | creates an object specifying in detail how covariates should be constructed from data in the CDM 21 | model. Warning: this function is for advanced users only. 22 | } 23 | \examples{ 24 | \donttest{ 25 | analysisDetails <- createAnalysisDetails( 26 | analysisId = 1, 27 | sqlFileName = "DemographicsGender.sql", 28 | parameters = list( 29 | analysisId = 1, 30 | analysisName = "Gender", 31 | domainId = "Demographics" 32 | ), 33 | includedCovariateConceptIds = c(), 34 | addDescendantsToInclude = FALSE, 35 | excludedCovariateConceptIds = c(), 36 | addDescendantsToExclude = FALSE, 37 | includedCovariateIds = c() 38 | ) 39 | covSettings <- createDetailedCovariateSettings(analyses = analysisDetails) 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /man/createDetailedTemporalCovariateSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DetailedCovariateSettings.R 3 | \name{createDetailedTemporalCovariateSettings} 4 | \alias{createDetailedTemporalCovariateSettings} 5 | \title{Create detailed temporal covariate settings} 6 | \usage{ 7 | createDetailedTemporalCovariateSettings( 8 | analyses = list(), 9 | temporalStartDays = -365:-1, 10 | temporalEndDays = -365:-1 11 | ) 12 | } 13 | \arguments{ 14 | \item{analyses}{A list of analysis detail objects as created using 15 | \code{\link{createAnalysisDetails}}.} 16 | 17 | \item{temporalStartDays}{A list of integers representing the start of a time period, relative to 18 | the index date. 0 indicates the index date, -1 indicates the day before 19 | the index date, etc. The start day is included in the time period.} 20 | 21 | \item{temporalEndDays}{A list of integers representing the end of a time period, relative to the 22 | index date. 0 indicates the index date, -1 indicates the day before the 23 | index date, etc. The end day is included in the time period.} 24 | } 25 | \value{ 26 | An object of type \code{covariateSettings}, to be used in other functions. 27 | } 28 | \description{ 29 | Create detailed temporal covariate settings 30 | } 31 | \details{ 32 | creates an object specifying in detail how temporal covariates should be constructed from data in 33 | the CDM model. Warning: this function is for advanced users only. 34 | } 35 | \examples{ 36 | \donttest{ 37 | analysisDetails <- createAnalysisDetails( 38 | analysisId = 1, 39 | sqlFileName = "DemographicsGender.sql", 40 | parameters = list( 41 | analysisId = 1, 42 | analysisName = "Gender", 43 | domainId = "Demographics" 44 | ), 45 | includedCovariateConceptIds = c(), 46 | addDescendantsToInclude = FALSE, 47 | excludedCovariateConceptIds = c(), 48 | addDescendantsToExclude = FALSE, 49 | includedCovariateIds = c() 50 | ) 51 | covSettings <- createDetailedTemporalCovariateSettings( 52 | analyses = analysisDetails, 53 | temporalStartDays = -365:-1, 54 | temporalEndDays = -365:-1 55 | ) 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /man/createEmptyCovariateData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CovariateData.R 3 | \name{createEmptyCovariateData} 4 | \alias{createEmptyCovariateData} 5 | \title{Creates an empty covariate data object} 6 | \usage{ 7 | createEmptyCovariateData(cohortIds, aggregated, temporal) 8 | } 9 | \arguments{ 10 | \item{cohortIds}{For which cohort IDs should the covariate data be created?} 11 | 12 | \item{aggregated}{if the data should be aggregated} 13 | 14 | \item{temporal}{if the data is temporary} 15 | } 16 | \value{ 17 | an empty object of class \code{CovariateData} 18 | } 19 | \description{ 20 | Creates an empty covariate data object 21 | } 22 | \examples{ 23 | \donttest{ 24 | covariateData <- FeatureExtraction::createEmptyCovariateData( 25 | cohortIds = 1, 26 | aggregated = FALSE, 27 | temporal = FALSE 28 | ) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /man/createTable1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Table1.R 3 | \name{createTable1} 4 | \alias{createTable1} 5 | \title{Create a table 1} 6 | \usage{ 7 | createTable1( 8 | covariateData1, 9 | covariateData2 = NULL, 10 | cohortId1 = NULL, 11 | cohortId2 = NULL, 12 | specifications = getDefaultTable1Specifications(), 13 | output = "two columns", 14 | showCounts = FALSE, 15 | showPercent = TRUE, 16 | percentDigits = 1, 17 | valueDigits = 1, 18 | stdDiffDigits = 2 19 | ) 20 | } 21 | \arguments{ 22 | \item{covariateData1}{The covariate data of the cohort to be included in the table.} 23 | 24 | \item{covariateData2}{The covariate data of the cohort to also be included, when comparing two 25 | cohorts.} 26 | 27 | \item{cohortId1}{If provided, \code{covariateData1} will be restricted to this cohort. If not 28 | provided, \code{covariateData1} is assumed to contain data on only 1 cohort.} 29 | 30 | \item{cohortId2}{If provided, \code{covariateData2} will be restricted to this cohort. If not 31 | provided, \code{covariateData2} is assumed to contain data on only 1 cohort.} 32 | 33 | \item{specifications}{Specifications of which covariates to display, and how.} 34 | 35 | \item{output}{The output format for the table. Options are \code{output = "two columns"}, 36 | \code{output = "one column"}, or \code{output = "list"}.} 37 | 38 | \item{showCounts}{Show the number of cohort entries having the binary covariate?} 39 | 40 | \item{showPercent}{Show the percentage of cohort entries having the binary covariate?} 41 | 42 | \item{percentDigits}{Number of digits to be used for percentages.} 43 | 44 | \item{valueDigits}{Number of digits to be used for the values of continuous variables.} 45 | 46 | \item{stdDiffDigits}{Number of digits to be used for the standardized differences.} 47 | } 48 | \value{ 49 | A data frame, or, when \code{output = "list"} a list of two data frames. 50 | } 51 | \description{ 52 | Creates a formatted table of cohort characteristics, to be included in publications or reports. 53 | Allows for creating a table describing a single cohort, or a table comparing two cohorts. 54 | } 55 | \examples{ 56 | \donttest{ 57 | eunomiaConnectionDetails <- Eunomia::getEunomiaConnectionDetails() 58 | covSettings <- createDefaultCovariateSettings() 59 | Eunomia::createCohorts( 60 | connectionDetails = eunomiaConnectionDetails, 61 | cdmDatabaseSchema = "main", 62 | cohortDatabaseSchema = "main", 63 | cohortTable = "cohort" 64 | ) 65 | covData1 <- getDbCovariateData( 66 | connectionDetails = eunomiaConnectionDetails, 67 | tempEmulationSchema = NULL, 68 | cdmDatabaseSchema = "main", 69 | cdmVersion = "5", 70 | cohortTable = "cohort", 71 | cohortDatabaseSchema = "main", 72 | cohortTableIsTemp = FALSE, 73 | cohortId = 1, 74 | rowIdField = "subject_id", 75 | covariateSettings = covSettings, 76 | aggregated = TRUE 77 | ) 78 | covData2 <- getDbCovariateData( 79 | connectionDetails = eunomiaConnectionDetails, 80 | tempEmulationSchema = NULL, 81 | cdmDatabaseSchema = "main", 82 | cdmVersion = "5", 83 | cohortTable = "cohort", 84 | cohortDatabaseSchema = "main", 85 | cohortTableIsTemp = FALSE, 86 | cohortId = 2, 87 | rowIdField = "subject_id", 88 | covariateSettings = covSettings, 89 | aggregated = TRUE 90 | ) 91 | table1 <- createTable1( 92 | covariateData1 = covData1, 93 | covariateData2 = covData2, 94 | cohortId1 = 1, 95 | cohortId2 = 2, 96 | specifications = getDefaultTable1Specifications(), 97 | output = "one column", 98 | showCounts = FALSE, 99 | showPercent = TRUE, 100 | percentDigits = 1, 101 | valueDigits = 1, 102 | stdDiffDigits = 2 103 | ) 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /man/createTable1CovariateSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Table1.R 3 | \name{createTable1CovariateSettings} 4 | \alias{createTable1CovariateSettings} 5 | \title{Create covariate settings for a table 1} 6 | \usage{ 7 | createTable1CovariateSettings( 8 | specifications = getDefaultTable1Specifications(), 9 | covariateSettings = createDefaultCovariateSettings(), 10 | includedCovariateConceptIds = c(), 11 | addDescendantsToInclude = FALSE, 12 | excludedCovariateConceptIds = c(), 13 | addDescendantsToExclude = FALSE, 14 | includedCovariateIds = c() 15 | ) 16 | } 17 | \arguments{ 18 | \item{specifications}{A specifications object for generating a table using the 19 | \code{\link{createTable1}} function.} 20 | 21 | \item{covariateSettings}{The covariate settings object to use as the basis for the 22 | filtered covariate settings.} 23 | 24 | \item{includedCovariateConceptIds}{A list of concept IDs that should be used to construct 25 | covariates.} 26 | 27 | \item{addDescendantsToInclude}{Should descendant concept IDs be added to the list of concepts 28 | to include?} 29 | 30 | \item{excludedCovariateConceptIds}{A list of concept IDs that should NOT be used to construct 31 | covariates.} 32 | 33 | \item{addDescendantsToExclude}{Should descendant concept IDs be added to the list of concepts 34 | to exclude?} 35 | 36 | \item{includedCovariateIds}{A list of covariate IDs that should be restricted to.} 37 | } 38 | \value{ 39 | A covariate settings object, for example to be used when calling the 40 | \code{\link{getDbCovariateData}} function. 41 | } 42 | \description{ 43 | Creates a covariate settings object for generating only those covariates that will be included in a 44 | table 1. This function works by filtering the \code{covariateSettings} object for the covariates in 45 | the \code{specifications} object. 46 | } 47 | \examples{ 48 | \donttest{ 49 | table1CovSettings <- createTable1CovariateSettings( 50 | specifications = getDefaultTable1Specifications(), 51 | covariateSettings = createDefaultCovariateSettings(), 52 | includedCovariateConceptIds = c(), 53 | addDescendantsToInclude = FALSE, 54 | excludedCovariateConceptIds = c(), 55 | addDescendantsToExclude = FALSE, 56 | includedCovariateIds = c() 57 | ) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /man/dot-createLooCovariateSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/UnitTestHelperFunctions.R 3 | \name{.createLooCovariateSettings} 4 | \alias{.createLooCovariateSettings} 5 | \title{Get covariate settings} 6 | \usage{ 7 | .createLooCovariateSettings(useLengthOfObs = TRUE) 8 | } 9 | \arguments{ 10 | \item{useLengthOfObs}{if length of observations should be used} 11 | } 12 | \value{ 13 | Returns an object of type \code{covariateSettings}, containing settings for the covariates. 14 | } 15 | \description{ 16 | Get covariate settings 17 | } 18 | -------------------------------------------------------------------------------- /man/dot-getDbLooCovariateData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/UnitTestHelperFunctions.R 3 | \name{.getDbLooCovariateData} 4 | \alias{.getDbLooCovariateData} 5 | \title{Get covariate information from the database} 6 | \usage{ 7 | .getDbLooCovariateData( 8 | connection, 9 | tempEmulationSchema = NULL, 10 | cdmDatabaseSchema, 11 | cohortTable = "#cohort_person", 12 | cohortIds = c(-1), 13 | cdmVersion = "5", 14 | rowIdField = "subject_id", 15 | covariateSettings, 16 | aggregated = FALSE, 17 | minCharacterizationMean = 0 18 | ) 19 | } 20 | \arguments{ 21 | \item{connection}{A connection to the server containing the schema as created using the 22 | \code{connect} function in the \code{DatabaseConnector} package. 23 | Either the \code{connection} or \code{connectionDetails} argument 24 | should be specified.} 25 | 26 | \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support 27 | temp tables. To emulate temp tables, provide a schema with write 28 | privileges where temp tables can be created.} 29 | 30 | \item{cdmDatabaseSchema}{The name of the database schema that contains the OMOP CDM instance. 31 | Requires read permissions to this database. On SQL Server, this should 32 | specify both the database and the schema, so for example 33 | 'cdm_instance.dbo'.} 34 | 35 | \item{cohortTable}{Name of the (temp) table holding the cohort for which we want to 36 | construct covariates} 37 | 38 | \item{cohortIds}{For which cohort ID(s) should covariates be constructed? If set to -1, 39 | covariates will be constructed for all cohorts in the specified cohort 40 | table.} 41 | 42 | \item{cdmVersion}{Define the OMOP CDM version used: currently supported is "5".} 43 | 44 | \item{rowIdField}{The name of the field in the cohort table that is to be used as the 45 | row_id field in the output table. This can be especially usefull if 46 | there is more than one period per person.} 47 | 48 | \item{covariateSettings}{Either an object of type \code{covariateSettings} as created using one 49 | of the createCovariate functions, or a list of such objects.} 50 | 51 | \item{aggregated}{Should aggregate statistics be computed instead of covariates per 52 | cohort entry?} 53 | 54 | \item{minCharacterizationMean}{The minimum mean value for binary characterization output. Values below this will be cut off from output. This 55 | will help reduce the file size of the characterization output, but will remove information 56 | on covariates that have very low values. The default is 0.} 57 | } 58 | \value{ 59 | Returns an object of type \code{covariateData}, containing information on the covariates. 60 | } 61 | \description{ 62 | Get covariate information from the database 63 | } 64 | -------------------------------------------------------------------------------- /man/filterByCohortDefinitionId.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/HelperFunctions.R 3 | \name{filterByCohortDefinitionId} 4 | \alias{filterByCohortDefinitionId} 5 | \title{Filter covariates by cohort definition IDs} 6 | \usage{ 7 | filterByCohortDefinitionId(covariateData, cohortId = 1, cohortIds = c(1)) 8 | } 9 | \arguments{ 10 | \item{covariateData}{An object of type \code{CovariateData}} 11 | 12 | \item{cohortId}{DEPRECATED The cohort definition IDs to keep.} 13 | 14 | \item{cohortIds}{The cohort definition IDs to keep.} 15 | } 16 | \value{ 17 | An object of type \code{covariateData}. 18 | } 19 | \description{ 20 | Filter covariates by cohort definition IDs 21 | } 22 | \examples{ 23 | \donttest{ 24 | covariateData <- FeatureExtraction::createEmptyCovariateData( 25 | cohortIds = c(1, 2), 26 | aggregated = TRUE, 27 | temporal = FALSE 28 | ) 29 | 30 | covData <- filterByCohortDefinitionId( 31 | covariateData = covariateData, 32 | cohortIds = c(1) 33 | ) 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /man/filterByRowId.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/HelperFunctions.R 3 | \name{filterByRowId} 4 | \alias{filterByRowId} 5 | \title{Filter covariates by row ID} 6 | \usage{ 7 | filterByRowId(covariateData, rowIds) 8 | } 9 | \arguments{ 10 | \item{covariateData}{An object of type \code{CovariateData}} 11 | 12 | \item{rowIds}{A vector containing the rowIds to keep.} 13 | } 14 | \value{ 15 | An object of type \code{covariateData}. 16 | } 17 | \description{ 18 | Filter covariates by row ID 19 | } 20 | \examples{ 21 | \donttest{ 22 | covariateData <- FeatureExtraction::createEmptyCovariateData( 23 | cohortIds = 1, 24 | aggregated = FALSE, 25 | temporal = FALSE 26 | ) 27 | 28 | covData <- filterByRowId( 29 | covariateData = covariateData, 30 | rowIds = 1 31 | ) 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/getDbCohortBasedCovariatesData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GetCovariatesFromOtherCohorts.R 3 | \name{getDbCohortBasedCovariatesData} 4 | \alias{getDbCohortBasedCovariatesData} 5 | \title{Get covariate information from the database based on other cohorts} 6 | \usage{ 7 | getDbCohortBasedCovariatesData( 8 | connection, 9 | oracleTempSchema = NULL, 10 | cdmDatabaseSchema, 11 | cohortTable = "#cohort_person", 12 | cohortId = -1, 13 | cohortIds = c(-1), 14 | cdmVersion = "5", 15 | rowIdField = "subject_id", 16 | covariateSettings, 17 | aggregated = FALSE, 18 | minCharacterizationMean = 0, 19 | tempEmulationSchema = getOption("sqlRenderTempEmulationSchema") 20 | ) 21 | } 22 | \arguments{ 23 | \item{connection}{A connection to the server containing the schema as created using the 24 | \code{connect} function in the \code{DatabaseConnector} package.} 25 | 26 | \item{oracleTempSchema}{DEPRECATED: use \code{tempEmulationSchema} instead.} 27 | 28 | \item{cdmDatabaseSchema}{The name of the database schema that contains the OMOP CDM instance. 29 | Requires read permissions to this database. On SQL Server, this should 30 | specifiy both the database and the schema, so for example 31 | 'cdm_instance.dbo'.} 32 | 33 | \item{cohortTable}{Name of the table holding the cohort for which we want to construct 34 | covariates. If it is a temp table, the name should have a hash prefix, 35 | e.g. '#temp_table'. If it is a non-temp table, it should include the 36 | database schema, e.g. 'cdm_database.cohort'.} 37 | 38 | \item{cohortId}{DEPRECATED:For which cohort ID should covariates be constructed? If set to -1, 39 | covariates will be constructed for all cohorts in the specified cohort 40 | table.} 41 | 42 | \item{cohortIds}{For which cohort ID(s) should covariates be constructed? If set to c(-1), 43 | covariates will be constructed for all cohorts in the specified cohort 44 | table.} 45 | 46 | \item{cdmVersion}{The version of the Common Data Model used. Currently only 47 | \code{cdmVersion = "5"} is supported.} 48 | 49 | \item{rowIdField}{The name of the field in the cohort temp table that is to be used as the 50 | row_id field in the output table. This can be especially usefull if there 51 | is more than one period per person.} 52 | 53 | \item{covariateSettings}{An object of type \code{covariateSettings} as created using the 54 | \code{\link{createCohortBasedCovariateSettings}} or 55 | \code{\link{createCohortBasedTemporalCovariateSettings}} functions.} 56 | 57 | \item{aggregated}{Should aggregate statistics be computed instead of covariates per 58 | cohort entry?} 59 | 60 | \item{minCharacterizationMean}{The minimum mean value for binary characterization output. Values below this will be cut off from output. This 61 | will help reduce the file size of the characterization output, but will remove information 62 | on covariates that have very low values. The default is 0.} 63 | 64 | \item{tempEmulationSchema}{Some database platforms like Oracle and Impala do not truly support 65 | temp tables. To emulate temp tables, provide a schema with write 66 | privileges where temp tables can be created.} 67 | } 68 | \value{ 69 | Returns an object of type \code{CovariateData}, which is an Andromeda object containing information on the baseline covariates. 70 | Information about multiple outcomes can be captured at once for efficiency reasons. This object is 71 | a list with the following components: \describe{ \item{covariates}{An ffdf object listing the 72 | baseline covariates per person in the cohorts. This is done using a sparse representation: 73 | covariates with a value of 0 are omitted to save space. The covariates object will have three 74 | columns: rowId, covariateId, and covariateValue. The rowId is usually equal to the person_id, 75 | unless specified otherwise in the rowIdField argument.} \item{covariateRef}{A table 76 | describing the covariates that have been extracted.} }. The CovariateData object will also have a \code{metaData} attribute, a list of objects with 77 | information on how the covariateData object was constructed. 78 | } 79 | \description{ 80 | Constructs covariates using other cohorts. 81 | } 82 | \details{ 83 | This function uses the data in the CDM to construct a large set of covariates for the provided 84 | cohort. The cohort is assumed to be in an existing temp table with these fields: 'subject_id', 85 | 'cohort_definition_id', 'cohort_start_date'. Optionally, an extra field can be added containing the 86 | unique identifier that will be used as rowID in the output. Typically, users don't call this 87 | function directly but rather use the \code{\link{getDbCovariateData}} function instead. 88 | } 89 | -------------------------------------------------------------------------------- /man/getDefaultTable1Specifications.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Table1.R 3 | \name{getDefaultTable1Specifications} 4 | \alias{getDefaultTable1Specifications} 5 | \title{Get the default table 1 specifications} 6 | \usage{ 7 | getDefaultTable1Specifications() 8 | } 9 | \value{ 10 | A specifications objects. 11 | } 12 | \description{ 13 | Loads the default specifications for a table 1, to be used with the \code{\link{createTable1}} 14 | function. 15 | } 16 | \examples{ 17 | \donttest{ 18 | defaultTable1Specs <- getDefaultTable1Specifications() 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /man/isAggregatedCovariateData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CovariateData.R 3 | \name{isAggregatedCovariateData} 4 | \alias{isAggregatedCovariateData} 5 | \title{Check whether covariate data is aggregated} 6 | \usage{ 7 | isAggregatedCovariateData(x) 8 | } 9 | \arguments{ 10 | \item{x}{The covariate data object to check.} 11 | } 12 | \value{ 13 | A logical value. 14 | } 15 | \description{ 16 | Check whether covariate data is aggregated 17 | } 18 | \examples{ 19 | \donttest{ 20 | covariateData <- FeatureExtraction::createEmptyCovariateData( 21 | cohortIds = 1, 22 | aggregated = FALSE, 23 | temporal = FALSE 24 | ) 25 | isAggrCovData <- isAggregatedCovariateData(covariateData) 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/isCovariateData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CovariateData.R 3 | \name{isCovariateData} 4 | \alias{isCovariateData} 5 | \title{Check whether an object is a CovariateData object} 6 | \usage{ 7 | isCovariateData(x) 8 | } 9 | \arguments{ 10 | \item{x}{The object to check.} 11 | } 12 | \value{ 13 | A logical value. 14 | } 15 | \description{ 16 | Check whether an object is a CovariateData object 17 | } 18 | \examples{ 19 | \donttest{ 20 | binaryCovDataFile <- system.file("testdata/binaryCovariateData.zip", 21 | package = "FeatureExtraction" 22 | ) 23 | covData <- loadCovariateData(binaryCovDataFile) 24 | isCovData <- isCovariateData(covData) 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /man/isTemporalCovariateData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CovariateData.R 3 | \name{isTemporalCovariateData} 4 | \alias{isTemporalCovariateData} 5 | \title{Check whether covariate data is temporal} 6 | \usage{ 7 | isTemporalCovariateData(x) 8 | } 9 | \arguments{ 10 | \item{x}{The covariate data object to check.} 11 | } 12 | \value{ 13 | A logical value. 14 | } 15 | \description{ 16 | Check whether covariate data is temporal 17 | } 18 | \examples{ 19 | \donttest{ 20 | covariateData <- FeatureExtraction::createEmptyCovariateData( 21 | cohortIds = 1, 22 | aggregated = FALSE, 23 | temporal = FALSE 24 | ) 25 | isTempCovData <- isTemporalCovariateData(covariateData) 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/loadCovariateData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CovariateData.R 3 | \name{loadCovariateData} 4 | \alias{loadCovariateData} 5 | \title{Load the covariate data from a folder} 6 | \usage{ 7 | loadCovariateData(file, readOnly) 8 | } 9 | \arguments{ 10 | \item{file}{The name of the folder containing the data.} 11 | 12 | \item{readOnly}{DEPRECATED: If true, the data is opened read only.} 13 | } 14 | \value{ 15 | An object of class \code{CovariateData}. 16 | } 17 | \description{ 18 | \code{loadCovariateData} loads an object of type covariateData from a folder in the file system. 19 | } 20 | \details{ 21 | The data will be written to a set of files in the folder specified by the user. 22 | } 23 | \examples{ 24 | \donttest{ 25 | binaryCovDataFile <- system.file("testdata/binaryCovariateData.zip", 26 | package = "FeatureExtraction" 27 | ) 28 | covData <- loadCovariateData(binaryCovDataFile) 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /man/saveCovariateData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CovariateData.R 3 | \name{saveCovariateData} 4 | \alias{saveCovariateData} 5 | \title{Save the covariate data to folder} 6 | \usage{ 7 | saveCovariateData(covariateData, file) 8 | } 9 | \arguments{ 10 | \item{covariateData}{An object of type \code{covariateData} as generated using 11 | \code{getDbCovariateData}.} 12 | 13 | \item{file}{The name of the folder where the data will be written. The folder should not 14 | yet exist.} 15 | } 16 | \value{ 17 | No return value, called for side effects. 18 | } 19 | \description{ 20 | \code{saveCovariateData} saves an object of type covariateData to folder. 21 | } 22 | \details{ 23 | The data will be written to a set of files in the folder specified by the user. 24 | } 25 | \examples{ 26 | \donttest{ 27 | covariateData <- FeatureExtraction::createEmptyCovariateData( 28 | cohortIds = 1, 29 | aggregated = FALSE, 30 | temporal = FALSE 31 | ) 32 | # For this example we'll use a temporary file location: 33 | fileName <- tempfile() 34 | saveCovariateData(covariateData = covariateData, file = fileName) 35 | # Cleaning up the file used in this example: 36 | unlink(fileName) 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /man/tidyCovariateData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Normalization.R 3 | \name{tidyCovariateData} 4 | \alias{tidyCovariateData} 5 | \title{Tidy covariate data} 6 | \usage{ 7 | tidyCovariateData( 8 | covariateData, 9 | minFraction = 0.001, 10 | normalize = TRUE, 11 | removeRedundancy = TRUE 12 | ) 13 | } 14 | \arguments{ 15 | \item{covariateData}{An object as generated using the \code{\link{getDbCovariateData}} 16 | function.} 17 | 18 | \item{minFraction}{Minimum fraction of the population that should have a non-zero value for a 19 | covariate for that covariate to be kept. Set to 0 to don't filter on 20 | frequency.} 21 | 22 | \item{normalize}{Normalize the covariates? (dividing by the max).} 23 | 24 | \item{removeRedundancy}{Should redundant covariates be removed?} 25 | } 26 | \value{ 27 | An object of class \code{CovariateData}. 28 | } 29 | \description{ 30 | Tidy covariate data 31 | } 32 | \details{ 33 | Normalize covariate values by dividing by the max and/or remove redundant covariates and/or remove 34 | infrequent covariates. For temporal covariates, redundancy is evaluated per time ID. 35 | } 36 | \examples{ 37 | \donttest{ 38 | covariateData <- FeatureExtraction::createEmptyCovariateData( 39 | cohortIds = 1, 40 | aggregated = FALSE, 41 | temporal = FALSE 42 | ) 43 | 44 | covData <- tidyCovariateData( 45 | covariateData = covariateData, 46 | minFraction = 0.001, 47 | normalize = TRUE, 48 | removeRedundancy = TRUE 49 | ) 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /nbactions.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | debug 5 | 6 | jar 7 | 8 | 9 | process-classes 10 | org.codehaus.mojo:exec-maven-plugin:1.5.0:exec 11 | 12 | 13 | -agentlib:jdwp=transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath org.ohdsi.featureExtraction.FeatureExtraction 14 | java 15 | true 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /tests/testBigQuery.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | options(dbms = "bigquery") 3 | runTestsOnBigQuery <- !(Sys.getenv("CDM_BIG_QUERY_CONNECTION_STRING") == "" & Sys.getenv("CDM_BIG_QUERY_KEY_FILE") == "" & Sys.getenv("CDM_BIG_QUERY_CDM_SCHEMA") == "" & Sys.getenv("CDM_BIG_QUERY_OHDSI_SCHEMA") == "") 4 | if (runTestsOnBigQuery) { 5 | test_check("FeatureExtraction") 6 | } 7 | -------------------------------------------------------------------------------- /tests/testOracle.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | options(dbms = "oracle") 3 | runTestsOnOracle <- !(Sys.getenv("CDM5_ORACLE_USER") == "" & Sys.getenv("CDM5_ORACLE_PASSWORD") == "" & Sys.getenv("CDM5_ORACLE_SERVER") == "" & Sys.getenv("CDM5_ORACLE_CDM_SCHEMA") == "" & Sys.getenv("CDM5_ORACLE_OHDSI_SCHEMA") == "") 4 | if (runTestsOnOracle) { 5 | test_check("FeatureExtraction") 6 | } 7 | -------------------------------------------------------------------------------- /tests/testPostgres.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | options(dbms = "postgresql") 3 | runTestsOnPostgreSQL <- !(Sys.getenv("CDM5_POSTGRESQL_USER") == "" & Sys.getenv("CDM5_POSTGRESQL_PASSWORD") == "" & Sys.getenv("CDM5_POSTGRESQL_SERVER") == "" & Sys.getenv("CDM5_POSTGRESQL_CDM_SCHEMA") == "" & Sys.getenv("CDM5_POSTGRESQL_OHDSI_SCHEMA") == "") 4 | if (runTestsOnPostgreSQL) { 5 | test_check("FeatureExtraction") 6 | } 7 | -------------------------------------------------------------------------------- /tests/testRedshift.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | options(dbms = "redshift") 3 | runTestsOnRedshift <- FALSE # !(Sys.getenv("CDM5_REDSHIFT_USER") == "" & Sys.getenv("CDM5_REDSHIFT_PASSWORD") == "" & Sys.getenv("CDM5_REDSHIFT_SERVER") == "" & Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA") == "" & Sys.getenv("CDM5_REDSHIFT_OHDSI_SCHEMA") == "") 4 | if (runTestsOnRedshift) { 5 | test_check("FeatureExtraction") 6 | } 7 | -------------------------------------------------------------------------------- /tests/testSnowflake.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | options(dbms = "snowflake") 3 | runTestsOnSnowflake <- !(Sys.getenv("CDM_SNOWFLAKE_CONNECTION_STRING") == "" & Sys.getenv("CDM_SNOWFLAKE_USER") == "" & Sys.getenv("CDM_SNOWFLAKE_PASSWORD") == "" & Sys.getenv("CDM_SNOWFLAKE_CDM53_SCHEMA") == "" & Sys.getenv("CDM_SNOWFLAKE_OHDSI_SCHEMA") == "") 4 | if (runTestsOnSnowflake) { 5 | test_check("FeatureExtraction") 6 | } 7 | -------------------------------------------------------------------------------- /tests/testSpark.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | options(dbms = "spark") 3 | runTestsOnSpark <- !(Sys.getenv("CDM5_SPARK_CONNECTION_STRING") == "" & Sys.getenv("CDM5_SPARK_USER") == "" & Sys.getenv("CDM_SPARK_PASSWORD") == "" & Sys.getenv("CDM5_SPARK_CDM_SCHEMA") == "" & Sys.getenv("CDM5_SPARK_OHDSI_SCHEMA") == "") 4 | if (runTestsOnSpark) { 5 | test_check("FeatureExtraction") 6 | } 7 | -------------------------------------------------------------------------------- /tests/testSqlServer.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | options(dbms = "sql server") 3 | runTestsOnSQLServer <- !(Sys.getenv("CDM5_SQL_SERVER_USER") == "" & Sys.getenv("CDM5_SQL_SERVER_PASSWORD") == "" & Sys.getenv("CDM5_SQL_SERVER_SERVER") == "" & Sys.getenv("CDM5_SQL_SERVER_CDM_SCHEMA") == "" & Sys.getenv("CDM5_SQL_SERVER_OHDSI_SCHEMA") == "") 4 | if (runTestsOnSQLServer) { 5 | test_check("FeatureExtraction") 6 | } 7 | -------------------------------------------------------------------------------- /tests/testSqlite.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | options(dbms = "sqlite") 3 | runTestsOnEunomia <- TRUE 4 | if (runTestsOnEunomia) { 5 | test_check("FeatureExtraction") 6 | } 7 | -------------------------------------------------------------------------------- /tests/testthat/test-Aggregation.R: -------------------------------------------------------------------------------- 1 | # This file covers the code in Aggregation.R. View coverage for this file using 2 | # library(testthat); library(FeatureExtraction) 3 | # covr::file_report(covr::file_coverage("R/Aggregation.R", "tests/testthat/test-Aggregation.R")) 4 | 5 | test_that("aggregateCovariates works", { 6 | skip_on_cran() 7 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 8 | settings <- createCovariateSettings(useDemographicsAgeGroup = TRUE, useChads2Vasc = TRUE) 9 | covariateData <- getDbCovariateData( 10 | connectionDetails = eunomiaConnectionDetails, 11 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema, 12 | cohortDatabaseSchema = eunomiaOhdsiDatabaseSchema, 13 | cohortIds = c(1), 14 | covariateSettings = settings, 15 | aggregated = FALSE 16 | ) 17 | 18 | aggregatedCovariateData <- aggregateCovariates(covariateData) 19 | expect_true(isAggregatedCovariateData(aggregatedCovariateData)) 20 | expect_error(aggregateCovariates("blah"), "not of class CovariateData") 21 | expect_error(aggregateCovariates(aggregatedCovariateData), "already be aggregated") 22 | 23 | # create example where missing does not mean zero 24 | covariateData$analysisRef <- covariateData$analysisRef %>% 25 | mutate(missingMeansZero = ifelse(.data$analysisName == "Chads2Vasc", "N", .data$missingMeansZero)) 26 | expect_true(isAggregatedCovariateData(aggregateCovariates(covariateData))) 27 | 28 | Andromeda::close(covariateData) 29 | expect_error(aggregateCovariates(covariateData), "object is closed") 30 | }) 31 | 32 | test_that("aggregateCovariates handles temporalCovariates", { 33 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 34 | settings <- createTemporalCovariateSettings(useDemographicsGender = TRUE) 35 | covariateData <- getDbCovariateData( 36 | connectionDetails = eunomiaConnectionDetails, 37 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema, 38 | cohortDatabaseSchema = eunomiaOhdsiDatabaseSchema, 39 | cohortIds = c(1), 40 | covariateSettings = settings 41 | ) 42 | expect_error(aggregateCovariates(covariateData), "temporal covariates") 43 | }) 44 | -------------------------------------------------------------------------------- /tests/testthat/test-CompareCohorts.R: -------------------------------------------------------------------------------- 1 | # View coverage for this file using 2 | # library(testthat); library(FeatureExtraction) 3 | # covr::file_report(covr::file_coverage("R/CompareCohorts.R", "tests/testthat/test-CompareCohorts.R")) 4 | 5 | test_that("Test stdDiff continuous variable computation", { 6 | # NOTE: Data stored in "inst/testdata/continuousCovariateData.zip" created by: 7 | # ------------------------------------------------------------------------------ 8 | # connectionDetails <- Eunomia::getEunomiaConnectionDetails() 9 | # Eunomia::createCohorts(connectionDetails) 10 | # data <- FeatureExtraction::getDbCovariateData(connectionDetails = connectionDetails, 11 | # cdmDatabaseSchema = "main", 12 | # cohortTable = "cohort", 13 | # aggregated = TRUE, 14 | # covariateSettings = FeatureExtraction::createCovariateSettings(useCharlsonIndex = TRUE)) 15 | # FeatureExtraction::saveCovariateData(data, "inst/testdata/continuousCovariateData.zip") 16 | # ------------------------------------------------------------------------------ 17 | data <- loadCovariateData(getTestResourceFilePath("continuousCovariateData.zip")) 18 | # Compute the expected value based on cohorts 1 & 2's values from 19 | # the loaded covariate data 20 | testData <- data.frame( 21 | mean1 = 0.614, 22 | sd1 = 0.387, 23 | mean2 = 0.404, 24 | sd2 = 0.345 25 | ) 26 | 27 | output <- computeStandardizedDifference( 28 | covariateData1 = data, 29 | covariateData2 = data, 30 | cohortId1 = 1, 31 | cohortId2 = 2 32 | ) 33 | testData$sd <- sqrt((testData$sd1^2 + testData$sd2^2) / 2) 34 | testData$stdDiff <- (testData$mean2 - testData$mean1) / testData$sd 35 | 36 | # Compute the standardized difference of mean using the source data 37 | expect_equal(output$stdDiff, testData$stdDiff, tolerance = 0.001, scale = 1) 38 | }) 39 | 40 | test_that("Test stdDiff binary variable computation", { 41 | skip_on_cran() 42 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 43 | connectionDetails <- Eunomia::getEunomiaConnectionDetails() 44 | Eunomia::createCohorts(connectionDetails) 45 | data <- FeatureExtraction::getDbCovariateData( 46 | connectionDetails = connectionDetails, 47 | cdmDatabaseSchema = "main", 48 | cohortTable = "cohort", 49 | aggregated = TRUE, 50 | covariateSettings = FeatureExtraction::createCovariateSettings(useConditionOccurrenceLongTerm = TRUE) 51 | ) 52 | output <- computeStandardizedDifference( 53 | covariateData1 = data, 54 | covariateData2 = data, 55 | cohortId1 = 1, 56 | cohortId2 = 2 57 | ) 58 | # Filter to: condition_occurrence during day -365 through 0 days relative to index: Diverticular disease 59 | singleCovariate <- output[output$covariateId == 4266809102, ] 60 | 61 | # Compute the expected value based on cohorts 1 & 2's values from 62 | # the loaded covariate data for covariateId == 4266809102 63 | testBinaryData <- data.frame( 64 | popSize1 = 1844, 65 | sumValue1 = 341, 66 | popSize2 = 850, 67 | sumValue2 = 64 68 | ) 69 | 70 | testBinaryData$mean1 <- testBinaryData$sumValue1 / testBinaryData$popSize1 71 | testBinaryData$mean2 <- testBinaryData$sumValue2 / testBinaryData$popSize2 72 | testBinaryData$sd1 <- sqrt(testBinaryData$mean1 * (1 - testBinaryData$mean1)) 73 | testBinaryData$sd2 <- sqrt(testBinaryData$mean2 * (1 - testBinaryData$mean2)) 74 | testBinaryData$sd <- sqrt((testBinaryData$sd1^2 + testBinaryData$sd2^2) / 2) 75 | testBinaryData$stdDiff <- (testBinaryData$mean2 - testBinaryData$mean1) / testBinaryData$sd 76 | 77 | # Test the results 78 | expect_equal(singleCovariate$mean1, testBinaryData$mean1, tolerance = 0.001, scale = 1) 79 | expect_equal(singleCovariate$sd1, testBinaryData$sd1, tolerance = 0.001, scale = 1) 80 | expect_equal(singleCovariate$mean2, testBinaryData$mean2, tolerance = 0.001, scale = 1) 81 | expect_equal(singleCovariate$sd2, testBinaryData$sd2, tolerance = 0.001, scale = 1) 82 | expect_equal(singleCovariate$sd, testBinaryData$sd, tolerance = 0.001, scale = 1) 83 | expect_equal(singleCovariate$stdDiff, testBinaryData$stdDiff, tolerance = 0.001, scale = 1) 84 | }) 85 | -------------------------------------------------------------------------------- /tests/testthat/test-DetailedCovariateSettings.R: -------------------------------------------------------------------------------- 1 | # This file covers the code in DetailedCovariateData.R. View coverage for this file using 2 | test_that("test createDetailedCovariateSettings", { 3 | analysisDetails <- createAnalysisDetails( 4 | analysisId = 1, 5 | sqlFileName = "DemographicsGender.sql", 6 | parameters = list( 7 | analysisId = 1, 8 | analysisName = "Gender", 9 | domainId = "Demographics" 10 | ), 11 | includedCovariateConceptIds = c(), 12 | addDescendantsToInclude = FALSE, 13 | excludedCovariateConceptIds = c(), 14 | addDescendantsToExclude = FALSE, 15 | includedCovariateIds = c() 16 | ) 17 | 18 | settings <- createDetailedCovariateSettings(list(analysisDetails)) 19 | temporalSettings <- createDetailedTemporalCovariateSettings(list(analysisDetails)) 20 | expect_s3_class(settings, "covariateSettings") 21 | expect_s3_class(temporalSettings, "covariateSettings") 22 | expect_equal(temporalSettings$temporalStartDays, -365:-1) 23 | }) 24 | 25 | test_that("test createDetailedTemporalCovariateSettings", { 26 | analysisDetails <- createAnalysisDetails( 27 | analysisId = 1, 28 | sqlFileName = "DemographicsGender.sql", 29 | parameters = list( 30 | analysisId = 1, 31 | analysisName = "Gender", 32 | domainId = "Demographics" 33 | ), 34 | includedCovariateConceptIds = c(), 35 | addDescendantsToInclude = FALSE, 36 | excludedCovariateConceptIds = c(), 37 | addDescendantsToExclude = FALSE, 38 | includedCovariateIds = c() 39 | ) 40 | 41 | temporalSettings <- createDetailedTemporalCovariateSettings(list(analysisDetails)) 42 | expect_s3_class(temporalSettings, "covariateSettings") 43 | expect_equal(temporalSettings$temporalStartDays, -365:-1) 44 | }) 45 | 46 | test_that("test convertPrespecSettingsToDetailedSettings", { 47 | settings <- createCovariateSettings(useDemographicsAgeGroup = TRUE, useChads2Vasc = TRUE) 48 | convertedSettings <- convertPrespecSettingsToDetailedSettings(settings) 49 | expect_s3_class(convertedSettings, "covariateSettings") 50 | expect_equal(names(convertedSettings), c("temporal", "temporalSequence", "temporalAnnual", "analyses")) 51 | expect_equal(sum(unlist(lapply(1:length(convertedSettings$analyses), function(i) convertedSettings$analyses[[i]]$sqlFileName)) %in% c("DemographicsAgeGroup.sql", "Chads2Vasc.sql")), 2) 52 | }) 53 | 54 | test_that("test createDefaultCovariateSettings", { 55 | settings <- createDefaultCovariateSettings() 56 | expect_s3_class(settings, "covariateSettings") 57 | }) 58 | 59 | test_that("test createDefaultTemporalCovariateSettings", { 60 | settings <- createDefaultTemporalCovariateSettings() 61 | expect_s3_class(settings, "covariateSettings") 62 | }) 63 | -------------------------------------------------------------------------------- /tests/testthat/test-FeatureExtractionInternal.R: -------------------------------------------------------------------------------- 1 | # View coverage for this file using 2 | # library(testthat); library(FeatureExtraction) 3 | # covr::file_report(covr::file_coverage("R/FeatureExtraction.R", "tests/testthat/test-FeatureExtractionInternal.R")) 4 | 5 | test_that("Test .onLoad()", { 6 | expect_silent( 7 | FeatureExtraction:::.onLoad(libname = "FeatureExtraction", pkgname = "FeatureExtraction") 8 | ) 9 | }) 10 | 11 | test_that("Test JSON functions", { 12 | expectedToJsonResult <- "{\"id\":\"1\"}" 13 | expectedFromJsonResult <- list("id" = "1") 14 | toJsonResult <- FeatureExtraction:::.toJson(expectedFromJsonResult) 15 | expect_equal(toJsonResult, expectedToJsonResult) 16 | 17 | fromJsonResult <- FeatureExtraction:::.fromJson(expectedToJsonResult) 18 | expect_equal(fromJsonResult, expectedFromJsonResult) 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test-GetCovariatesFromCohortAttributes.R: -------------------------------------------------------------------------------- 1 | # This file covers the code in GetCovariatesFromCohortAttributes.R. 2 | # NOTE: Functionality is described in detail in the following vignette: 3 | # http://ohdsi.github.io/FeatureExtraction/articles/CreatingCovariatesUsingCohortAttributes.html 4 | # 5 | # View coverage for this file using 6 | # library(testthat); library(FeatureExtraction) 7 | # covr::file_report(covr::file_coverage("R/GetCovariatesFromCohortAttributes.R", "tests/testthat/test-GetCovariatesFromCohortAttributes.R")) 8 | 9 | test_that("getDbCohortAttrCovariatesData aggregation not supported check", { 10 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 11 | expect_error(getDbCohortAttrCovariatesData( 12 | connection = eunomiaConnection, 13 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema, 14 | covariateSettings = createDefaultCovariateSettings(), 15 | aggregated = TRUE 16 | )) 17 | }) 18 | 19 | test_that("getDbCohortAttrCovariatesData CDM v4 not supported check", { 20 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 21 | expect_error(getDbCohortAttrCovariatesData( 22 | connection = eunomiaConnection, 23 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema, 24 | cdmVersion = "4", 25 | covariateSettings = createDefaultCovariateSettings() 26 | )) 27 | }) 28 | 29 | test_that("getDbCohortAttrCovariatesData hasIncludedAttributes == 0", { 30 | skip_on_cran() 31 | # TODO: This test is probably good to run on all DB platforms 32 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 33 | covariateSettings <- createCohortAttrCovariateSettings( 34 | attrDatabaseSchema = eunomiaOhdsiDatabaseSchema, 35 | cohortAttrTable = cohortAttributeTable, 36 | attrDefinitionTable = attributeDefinitionTable, 37 | includeAttrIds = c(), 38 | isBinary = FALSE, 39 | missingMeansZero = FALSE 40 | ) 41 | result <- getDbCohortAttrCovariatesData( 42 | connection = eunomiaConnection, 43 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema, 44 | cohortTable = cohortTable, 45 | covariateSettings = covariateSettings 46 | ) 47 | expect_equal(class(result), "CovariateData") 48 | }) 49 | 50 | test_that("getDbCohortAttrCovariatesData hasIncludedAttributes > 0", { 51 | skip_on_cran() 52 | # TODO: This test is probably good to run on all DB platforms 53 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 54 | covariateSettings <- createCohortAttrCovariateSettings( 55 | attrDatabaseSchema = eunomiaOhdsiDatabaseSchema, 56 | cohortAttrTable = cohortAttributeTable, 57 | attrDefinitionTable = attributeDefinitionTable, 58 | includeAttrIds = c(1), 59 | isBinary = FALSE, 60 | missingMeansZero = TRUE 61 | ) 62 | result <- getDbCohortAttrCovariatesData( 63 | connection = eunomiaConnection, 64 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema, 65 | cohortTable = cohortTable, 66 | covariateSettings = covariateSettings, 67 | cohortIds = c(1, 2) 68 | ) 69 | expect_equal(class(result), "CovariateData") 70 | }) 71 | 72 | test_that("createCohortAttrCovariateSettings check", { 73 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 74 | result <- createCohortAttrCovariateSettings(attrDatabaseSchema = "main") 75 | expect_equal(class(result), "covariateSettings") 76 | }) 77 | 78 | test_that("getDbCohortAttrCovariatesData cohortId warning", { 79 | skip_on_cran() 80 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 81 | covariateSettings <- createCohortAttrCovariateSettings( 82 | attrDatabaseSchema = eunomiaOhdsiDatabaseSchema, 83 | cohortAttrTable = cohortAttributeTable, 84 | attrDefinitionTable = attributeDefinitionTable, 85 | includeAttrIds = c(1), 86 | isBinary = FALSE, 87 | missingMeansZero = TRUE 88 | ) 89 | # cohortId argument 90 | expect_warning(getDbCohortAttrCovariatesData( 91 | connection = eunomiaConnection, 92 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema, 93 | cohortTable = cohortTable, 94 | covariateSettings = covariateSettings, 95 | cohortId = 1 96 | ), "cohortId argument has been deprecated, please use cohortIds") 97 | }) 98 | -------------------------------------------------------------------------------- /tests/testthat/test-GetCovariatesTemporalSequence.R: -------------------------------------------------------------------------------- 1 | # View coverage for this file using 2 | # library(testthat); library(FeatureExtraction) 3 | # covr::file_report(covr::file_coverage("R/DefaultTemporalSequenceCovariateSettings.R", "tests/testthat/test-GetCovariatesTemporalSequence.R")) 4 | 5 | test_that("createTemporalSequenceCovariateSettings correctly sets list", { 6 | settings <- createTemporalSequenceCovariateSettings( 7 | useDemographicsGender = T, 8 | useConditionEraGroupStart = T, 9 | useDrugEraStart = T, 10 | timePart = "month", 11 | timeInterval = 1, 12 | sequenceEndDay = -1, 13 | sequenceStartDay = -365 * 5 14 | ) 15 | 16 | testthat::expect_equal(settings$temporalSequence, T) 17 | testthat::expect_equal(settings$temporal, F) 18 | 19 | testthat::expect_equal(sum(c("DemographicsGender", "ConditionEraGroupStart", "DrugEraStart") %in% names(settings)), 3) 20 | testthat::expect_equal(sum(c("DemographicsAge", "ConditionEraStart", "DrugEraGroupStart") %in% names(settings)), 0) 21 | 22 | testthat::expect_equal(settings$timePart, "month") 23 | testthat::expect_equal(settings$timeInterval, 1) 24 | 25 | testthat::expect_equal(settings$sequenceEndDay, -1) 26 | testthat::expect_equal(settings$sequenceStartDay, -365 * 5) 27 | 28 | testthat::expect_equal(class(settings), "covariateSettings") 29 | }) 30 | 31 | 32 | test_that("createTemporalSequenceCovariateSettings correctly sets function", { 33 | settings <- createTemporalSequenceCovariateSettings( 34 | useDemographicsGender = T, 35 | useConditionEraGroupStart = T, 36 | useDrugEraStart = T, 37 | timePart = "month", 38 | timeInterval = 1, 39 | sequenceEndDay = -1, 40 | sequenceStartDay = -365 * 5 41 | ) 42 | 43 | testthat::expect_equal(attr(settings, "fun"), "getDbDefaultCovariateData") 44 | }) 45 | 46 | 47 | # check extraction 48 | test_that("getDbCovariateData works with createTemporalSequenceCovariateSettings", { 49 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 50 | covSet <- createTemporalSequenceCovariateSettings( 51 | useDemographicsGender = T, 52 | useDemographicsAge = T, 53 | useDemographicsRace = T, 54 | useDemographicsEthnicity = T, 55 | useDemographicsAgeGroup = T, 56 | useConditionEraGroupStart = T, 57 | useDrugEraStart = T, 58 | useMeasurement = T, 59 | useMeasurementValue = T, 60 | timePart = "month", 61 | timeInterval = 1, 62 | sequenceEndDay = -1, 63 | sequenceStartDay = -365 * 5 64 | ) 65 | 66 | 67 | result <- getDbCovariateData( 68 | connection = eunomiaConnection, 69 | cdmDatabaseSchema = "main", 70 | cohortTable = "cohort", 71 | cohortIds = c(1), 72 | covariateSettings = covSet 73 | ) 74 | 75 | expect_true(is(result, "CovariateData")) 76 | 77 | # check timeId is 59 or less 78 | expect_true(max(as.data.frame(result$covariates)$timeId, na.rm = T) <= 60) 79 | }) 80 | 81 | # Check backwards compatibility 82 | test_that("Temporal Covariate Settings are backwards compatible", { 83 | skip_on_cran() 84 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 85 | 86 | # Temporal covariate settings created previously will not have 87 | # the temporalSequence property 88 | covSet <- FeatureExtraction::createDefaultTemporalCovariateSettings() 89 | covSet$temporalSequence <- NULL 90 | 91 | result <- getDbCovariateData( 92 | connection = eunomiaConnection, 93 | cdmDatabaseSchema = "main", 94 | cohortTable = "cohort", 95 | cohortIds = c(1), 96 | covariateSettings = covSet 97 | ) 98 | expect_true(is(result, "CovariateData")) 99 | }) 100 | -------------------------------------------------------------------------------- /tests/testthat/test-GetDefaultCovariates.R: -------------------------------------------------------------------------------- 1 | # This file covers the code in GetDefaultCovariates.R. View coverage for this file using 2 | # library(testthat); library(FeatureExtraction) 3 | # covr::file_report(covr::file_coverage("R/GetDefaultCovariates.R", "tests/testthat/test-GetDefaultCovariates.R")) 4 | 5 | test_that("Test exit conditions", { 6 | skip_on_cran() 7 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 8 | 9 | # covariateSettings object type 10 | expect_error(getDbDefaultCovariateData( 11 | connection = eunomiaConnection, 12 | cdmDatabaseSchema = "main", 13 | covariateSettings = list(), 14 | targetDatabaseSchema = "main", 15 | targetCovariateTable = "cov", 16 | targetCovariateRefTable = "cov_ref", 17 | targetAnalysisRefTable = "cov_analysis_ref" 18 | )) 19 | # CDM 4 not supported 20 | expect_error(getDbDefaultCovariateData( 21 | connection = eunomiaConnection, 22 | cdmDatabaseSchema = "main", 23 | cdmVersion = "4", 24 | covariateSettings = createDefaultCovariateSettings(), 25 | targetDatabaseSchema = "main", 26 | targetCovariateTable = "cov", 27 | targetCovariateRefTable = "cov_ref", 28 | targetAnalysisRefTable = "cov_analysis_ref" 29 | )) 30 | 31 | # targetCovariateTable and aggregated not supported 32 | expect_error(getDbDefaultCovariateData( 33 | connection = eunomiaConnection, 34 | cdmDatabaseSchema = "main", 35 | cohortId = -1, 36 | covariateSettings = createDefaultCovariateSettings(), 37 | targetDatabaseSchema = "main", 38 | targetCovariateTable = "cov", 39 | targetCovariateRefTable = "cov_ref", 40 | targetAnalysisRefTable = "cov_analysis_ref", 41 | aggregated = TRUE 42 | )) 43 | }) 44 | 45 | # AGS - This test fails and is likely due to a bug when using SqlLite 46 | # test_that("Test target table", { 47 | # connection <- DatabaseConnector::connect(connectionDetails) 48 | # Eunomia::createCohorts(connectionDetails) 49 | # 50 | # results <- getDbDefaultCovariateData(connection = connection, 51 | # cdmDatabaseSchema = "main", 52 | # cohortTable = "cohort", 53 | # covariateSettings = createDefaultCovariateSettings(), 54 | # targetDatabaseSchema = "main", 55 | # targetCovariateTable = "ut_cov", 56 | # targetCovariateRefTable = "ut_cov_ref", 57 | # targetAnalysisRefTable = "ut_cov_analysis_ref") 58 | # 59 | # on.exit(DatabaseConnector::disconnect(connection)) 60 | # }) 61 | # 62 | # unlink(connectionDetails$server()) 63 | -------------------------------------------------------------------------------- /tests/testthat/test-HelperFunctions.R: -------------------------------------------------------------------------------- 1 | # This file covers the code in HelperFunctions.R. View coverage for this file using 2 | # library(testthat); library(FeatureExtraction) 3 | # covr::file_report(covr::file_coverage("R/HelperFunctions.R", "tests/testthat/test-HelperFunctions.R")) 4 | 5 | test_that("Test helper functions for non-aggregated covariate data", { 6 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 7 | expect_error(filterByRowId("blah", 1), "not of class CovariateData") 8 | 9 | covariateData <- getDbCovariateData( 10 | connection = eunomiaConnection, 11 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema, 12 | cohortDatabaseSchema = eunomiaOhdsiDatabaseSchema, 13 | cohortIds = 1:2, 14 | covariateSettings = createCovariateSettings(useDemographicsAgeGroup = TRUE, useChads2Vasc = TRUE), 15 | aggregated = F 16 | ) 17 | 18 | covariateDataFiltered <- filterByRowId(covariateData, rowIds = 1) 19 | expect_equal(unique(pull(covariateDataFiltered$covariates, rowId)), 1) 20 | 21 | locallyAggregated <- aggregateCovariates(covariateData) 22 | expect_error(filterByCohortDefinitionId(locallyAggregated, cohortIds = c(1))) 23 | 24 | expect_error(filterByCohortDefinitionId(covariateData, cohortIds = c(1)), "Can only filter aggregated") 25 | 26 | Andromeda::close(covariateData) 27 | expect_error(filterByRowId(covariateData, 1), "closed") 28 | }) 29 | 30 | test_that("Test helper functions for aggregated covariate data", { 31 | skip_on_cran() 32 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 33 | expect_error(filterByCohortDefinitionId("blah", 1), "not of class CovariateData") 34 | 35 | aggregatedCovariateData <- getDbCovariateData( 36 | connection = eunomiaConnection, 37 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema, 38 | cohortDatabaseSchema = eunomiaOhdsiDatabaseSchema, 39 | cohortIds = 1:2, 40 | covariateSettings = createCovariateSettings(useDemographicsAgeGroup = TRUE, useChads2Vasc = TRUE), 41 | aggregated = TRUE 42 | ) 43 | 44 | aggCovariateDataFiltered <- filterByCohortDefinitionId(aggregatedCovariateData, cohortIds = c(1)) 45 | 46 | expect_equal(unique(pull(aggCovariateDataFiltered$covariates, cohortDefinitionId)), 1) 47 | expect_error(filterByRowId(aggregatedCovariateData, 1), "Cannot filter aggregated") 48 | Andromeda::close(aggregatedCovariateData) 49 | expect_error(filterByCohortDefinitionId(aggregatedCovariateData, cohortId = c(1)), "closed") 50 | }) 51 | -------------------------------------------------------------------------------- /tests/testthat/test-PostcoordConcepts.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(FeatureExtraction) 3 | library(dplyr) 4 | 5 | test_that("Postcoordinated concepts on Eunomia", { 6 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 7 | # eunomiaConnection <- DatabaseConnector::connect(Eunomia::getEunomiaConnectionDetails()) 8 | cohort <- data.frame( 9 | cohortDefinitionId = c(1, 1, 1, 1), 10 | cohortStartDate = as.Date(c("2000-02-01", "2000-08-01", "2000-02-01", "2000-01-02")), 11 | cohortEndDate = as.Date(c("2000-02-14", "2000-09-14", "2000-02-01", "2000-01-02")), 12 | subjectId = c(1, 2, 3, 4) 13 | ) 14 | DatabaseConnector::insertTable( 15 | connection = eunomiaConnection, 16 | tableName = "#pcc_cohort", 17 | data = cohort, 18 | dropTableIfExists = TRUE, 19 | tempTable = TRUE, 20 | createTable = TRUE, 21 | progressBar = FALSE, 22 | camelCaseToSnakeCase = TRUE 23 | ) 24 | measurement <- data.frame( 25 | measurementId = c(0, 0, 0, 0), 26 | measurementTypeConceptId = c(0, 0, 0, 0), 27 | personId = c(1, 1, 3, 4), 28 | measurementConceptId = c(3000963, 3000963, 3000963, 3000963), 29 | valueAsConceptId = c(4083207, 4084765, 4084765, 4084765), 30 | measurementDate = as.Date(c("2000-01-14", "2000-01-01", "2000-01-14", "2000-01-01")) 31 | ) 32 | DatabaseConnector::insertTable( 33 | connection = eunomiaConnection, 34 | tableName = "measurement", 35 | databaseSchema = "main", 36 | data = measurement, 37 | dropTableIfExists = FALSE, 38 | tempTable = FALSE, 39 | createTable = FALSE, 40 | progressBar = FALSE, 41 | camelCaseToSnakeCase = TRUE 42 | ) 43 | settings <- createCovariateSettings( 44 | useMeasurementValueAsConceptShortTerm = TRUE, 45 | shortTermStartDays = -30 46 | ) 47 | 48 | covariateData <- getDbCovariateData( 49 | connection = eunomiaConnection, 50 | cdmDatabaseSchema = "main", 51 | cohortTable = "#pcc_cohort", 52 | cohortTableIsTemp = TRUE, 53 | covariateSettings = settings 54 | ) 55 | covariates <- covariateData$covariates %>% 56 | collect() %>% 57 | arrange(rowId) 58 | expect_equal(covariates$rowId, c(1, 3, 4)) 59 | expect_equal(covariates$covariateId, c(583329995308716, 583329563103716, 583329563103716)) 60 | expect_equal(covariates$covariateValue, c(1, 1, 1)) 61 | 62 | covariateRef <- covariateData$covariateRef %>% 63 | collect() %>% 64 | arrange(covariateId) 65 | expect_equal(covariateRef$covariateId, c(583329563103716, 583329995308716)) 66 | expect_equal(covariateRef$conceptId, c(3000963, 3000963)) 67 | expect_equal(covariateRef$valueAsConceptId, c(4084765, 4083207)) 68 | 69 | analysisRef <- covariateData$analysisRef %>% 70 | collect() 71 | expect_equal(analysisRef$analysisId, 716) 72 | 73 | # Introduce collisions 74 | measurement <- data.frame( 75 | measurementId = c(0, 0, 0, 0), 76 | measurementTypeConceptId = c(0, 0, 0, 0), 77 | personId = c(1, 1, 3, 4), 78 | measurementConceptId = c(3048564, 3048564, 40483078, 40483078), 79 | valueAsConceptId = c(4069590, 4069590, 4069590, 4069590), 80 | measurementDate = as.Date(c("2000-01-14", "2000-01-01", "2000-01-14", "2000-01-01")) 81 | ) 82 | DatabaseConnector::insertTable( 83 | connection = eunomiaConnection, 84 | tableName = "measurement", 85 | databaseSchema = "main", 86 | data = measurement, 87 | dropTableIfExists = FALSE, 88 | tempTable = FALSE, 89 | createTable = FALSE, 90 | progressBar = FALSE, 91 | camelCaseToSnakeCase = TRUE 92 | ) 93 | settings <- createCovariateSettings( 94 | useMeasurementValueAsConceptShortTerm = TRUE, 95 | shortTermStartDays = -30 96 | ) 97 | 98 | expect_warning( 99 | { 100 | covariateData <- getDbCovariateData( 101 | connection = eunomiaConnection, 102 | cdmDatabaseSchema = "main", 103 | cohortTable = "#pcc_cohort", 104 | cohortTableIsTemp = TRUE, 105 | covariateSettings = settings 106 | ) 107 | }, 108 | "Collisions" 109 | ) 110 | }) 111 | -------------------------------------------------------------------------------- /tests/testthat/test-PrespecAnalyses.R: -------------------------------------------------------------------------------- 1 | # This file contains tests for all the PrespecAnalyses files in the inst/csv folder. 2 | 3 | test_that("PrespecAnalyses check for uniqueness", { 4 | analysesFiles <- list.files(system.file("csv", package = "FeatureExtraction"), 5 | pattern = "^.*.Analyses*.csv$", 6 | full.names = TRUE 7 | ) 8 | 9 | lapply(analysesFiles, FUN = function(filePath) { 10 | prespecAnalyses <- read.csv(filePath) 11 | 12 | expect_s3_class(prespecAnalyses, "data.frame") 13 | expect_true(all(c( 14 | "analysisId", "analysisName", "sqlFileName", "subType", "domainId", 15 | "domainTable", "domainConceptId", "domainStartDate", "domainEndDate", 16 | "isDefault", "description" 17 | ) %in% colnames(prespecAnalyses))) 18 | 19 | # analysisId should be unique as well as the combination of other columns 20 | expect_equal(length(unique(prespecAnalyses$analysisId)), length(prespecAnalyses$analysisId)) 21 | 22 | prespecAnalyses <- prespecAnalyses %>% 23 | dplyr::select(-analysisId) 24 | expect_equal(nrow(unique(prespecAnalyses)), nrow(prespecAnalyses)) 25 | }) 26 | }) 27 | -------------------------------------------------------------------------------- /tests/testthat/test-tidyCovariates.R: -------------------------------------------------------------------------------- 1 | # View coverage for this file using 2 | # library(testthat); library(FeatureExtraction) 3 | # covr::file_report(covr::file_coverage("R/Normalization.R", "tests/testthat/test-tidyCovariates.R")) 4 | 5 | test_that("Test exit conditions ", { 6 | # Covariate Data object check 7 | expect_error(tidyCovariateData(covariateData = list())) 8 | # CovariateData object closed 9 | cvData <- FeatureExtraction::createEmptyCovariateData( 10 | cohortIds = 1, 11 | aggregated = FALSE, 12 | temporal = FALSE 13 | ) 14 | Andromeda::close(cvData) 15 | expect_error(tidyCovariateData(covariateData = cvData)) 16 | # CovariateData aggregated 17 | cvData <- FeatureExtraction::createEmptyCovariateData( 18 | cohortIds = 1, 19 | aggregated = TRUE, 20 | temporal = FALSE 21 | ) 22 | expect_error(tidyCovariateData(covariateData = cvData)) 23 | }) 24 | 25 | test_that("Test empty covariateData", { 26 | cvData <- FeatureExtraction::createEmptyCovariateData( 27 | cohortIds = 1, 28 | aggregated = FALSE, 29 | temporal = FALSE 30 | ) 31 | result <- tidyCovariateData(covariateData = cvData) 32 | expect_equal(length(pull(result$covariates, covariateId)), length(pull(cvData$covariates, covariateId))) 33 | }) 34 | 35 | test_that("tidyCovariates works", { 36 | # Generate some data: 37 | createCovariate <- function(i, analysisId) { 38 | return(tibble( 39 | covariateId = rep(i * 1000 + analysisId, i), 40 | covariateValue = rep(1, i) 41 | )) 42 | } 43 | covariates <- lapply(1:10, createCovariate, analysisId = 1) 44 | covariates <- do.call("rbind", covariates) 45 | covariates$rowId <- 1:nrow(covariates) 46 | metaData <- list(populationSize = nrow(covariates)) 47 | frequentCovariate <- createCovariate(40, analysisId = 2) 48 | frequentCovariate$rowId <- sample.int(metaData$populationSize, nrow(frequentCovariate), replace = FALSE) 49 | infrequentCovariate <- createCovariate(1, analysisId = 3) 50 | infrequentCovariate$rowId <- sample.int(metaData$populationSize, nrow(infrequentCovariate), replace = FALSE) 51 | covariates <- rbind(covariates, frequentCovariate, infrequentCovariate) 52 | 53 | covariateRef <- tibble( 54 | covariateId = c(1:10 * 1000 + 1, 40002, 1003), 55 | analysisId = c(rep(1, 10), 2, 3) 56 | ) 57 | 58 | covariateData <- Andromeda::andromeda( 59 | covariates = covariates, 60 | covariateRef = covariateRef 61 | ) 62 | attr(covariateData, "metaData") <- metaData 63 | class(covariateData) <- "CovariateData" 64 | 65 | tidy <- tidyCovariateData(covariateData, minFraction = 0.1, normalize = TRUE, removeRedundancy = TRUE) 66 | 67 | # Test: most prevalent covariate in analysis 1 is dropped: 68 | expect_true(nrow(filter(tidy$covariates, covariateId == 10001) %>% collect()) == 0) 69 | 70 | # Test: infrequent covariate in analysis 1 isn't dropped: 71 | expect_true(nrow(filter(tidy$covariates, covariateId == 1001) %>% collect()) != 0) 72 | 73 | # Test: infrequent covariate is dropped: 74 | expect_true(nrow(filter(tidy$covariates, covariateId == 1003) %>% collect()) == 0) 75 | 76 | # Test: frequent covariate isn't dropped: 77 | expect_true(nrow(filter(tidy$covariates, covariateId == 40002) %>% collect()) != 0) 78 | }) 79 | 80 | test_that("tidyCovariateData on Temporal Data", { 81 | skip_if_not(dbms == "sqlite" && exists("eunomiaConnection")) 82 | covariateSettings <- createTemporalCovariateSettings( 83 | useDrugExposure = TRUE, 84 | temporalStartDays = -2:-1, 85 | temporalEndDays = -2:-1 86 | ) 87 | covariateData <- getDbCovariateData( 88 | connection = eunomiaConnection, 89 | cdmDatabaseSchema = eunomiaCdmDatabaseSchema, 90 | cohortIds = c(1), 91 | covariateSettings = covariateSettings 92 | ) 93 | tidy <- tidyCovariateData(covariateData) 94 | expect_equal(length(pull(tidy$analysisRef, analysisId)), length(pull(covariateData$analysisRef, analysisId))) 95 | }) 96 | --------------------------------------------------------------------------------