├── .Rbuildignore ├── .Renviron ├── .gitignore ├── DESCRIPTION ├── LICENSE.txt ├── NAMESPACE ├── R ├── changeOutDirectory.R ├── exploreConcepts.R ├── findPatients.R ├── getClinicalData.R ├── getDemographics.R ├── getEncounters.R ├── makeDataOntology.R ├── showDataTypes.R ├── summarizeDemographics.R ├── utils.R └── zzz.R ├── README.md ├── ROMOP.Rproj ├── man ├── changeOutDirectory.Rd ├── exploreConcepts.Rd ├── findPatients.Rd ├── getClinicalData.Rd ├── getConditions.Rd ├── getDemographics.Rd ├── getDevices.Rd ├── getEncounters.Rd ├── getMeasurements.Rd ├── getMedications.Rd ├── getObservations.Rd ├── getProcedures.Rd ├── makeDataOntology.Rd ├── showDataTypes.Rd └── summarizeDemographics.Rd └── www ├── figure1a_v3.png └── figure1b_v3.png /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.Renviron: -------------------------------------------------------------------------------- 1 | driver = "" 2 | host = "" 3 | username = "" 4 | password = "" 5 | dbname = "" 6 | port = "3306" 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: ROMOP 2 | Type: Package 3 | Title: A Light-Weight R Package for Interfacing with OMOP-Formatted Electronic Health Record Data 4 | Version: 0.3.0 5 | Author: Benjamin S. Glicksberg 6 | Maintainer: Benjamin S. Glicksberg 7 | Description: ROMOP streamlines typical EHR-related data processes for Observational Medical Outcomes Partnership (OMOP)-formatted data. Its functions include exploration of data types, extraction and summarization of patient clinical and demographic data, and patient searches using any Common Data Model (CDM) vocabulary concept. 8 | License: MIT License 9 | Encoding: UTF-8 10 | LazyData: true 11 | Imports: dplyr, data.table, DBI, RMySQL, DatabaseConnector, DatabaseConnectorJars, SqlRender 12 | Depends: R (>= 3.4) 13 | RoxygenNote: 6.0.1 14 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Benjamin S. Glicksberg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(changeOutDirectory) 4 | export(exploreConcepts) 5 | export(findPatients) 6 | export(getClinicalData) 7 | export(getConditions) 8 | export(getDemographics) 9 | export(getDevices) 10 | export(getEncounters) 11 | export(getMeasurements) 12 | export(getMedications) 13 | export(getObservations) 14 | export(getProcedures) 15 | export(makeDataOntology) 16 | export(showDataTypes) 17 | export(summarizeDemographics) 18 | import(DBI) 19 | import(data.table) 20 | import(dplyr) 21 | -------------------------------------------------------------------------------- /R/changeOutDirectory.R: -------------------------------------------------------------------------------- 1 | 2 | #' Change outDirectory 3 | #' 4 | #' Sets the current outDirectory which will store the Data Ontology and all function output. Option to create directory if does not exist. 5 | #' 6 | #' @param outdir directory path 7 | #' @param create TRUE/FALSE (will create the directory if it does not exist) 8 | #' 9 | #' @return none (called for side effect: sets outDirectory) 10 | #' @export 11 | #' 12 | #' @examples 13 | #' changeOutDirectory(outdir=“~/”, create=FALSE) 14 | changeOutDirectory <- function(outdir, create = FALSE) { 15 | 16 | if (dir.exists(outdir)) { 17 | message(paste0(outdir, " set as OutDirectory. ")) 18 | if (endsWith(outdir, "/")) { 19 | options("outDirectory" = outdir) 20 | } else { 21 | options("outDirectory" = paste0(outdir,"/")) 22 | } 23 | } else { 24 | if (create == TRUE) { 25 | dir.create(outdir) 26 | if (endsWith(outdir, "/")) { 27 | options("outDirectory" = outdir) 28 | } else { 29 | options("outDirectory" = paste0(outdir,"/")) 30 | } 31 | message(paste0(outdir, " does not exist. Created and set to OutDirectory. ")) 32 | } else { 33 | message(paste0(outdir, " does not exist. Please set 'create = TRUE' if you wish to create it or choose an already existing directory. OutDirectory not set. ")) 34 | } 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /R/exploreConcepts.R: -------------------------------------------------------------------------------- 1 | #' Extract synonyms and descendants for concepts of interest. 2 | #' 3 | #' For given vocabulary and concept, returns the mapped standard concept(s) as well as decendent concept(s). Requires dataOntology to have been created (makeDataOntology funciton). 4 | #' 5 | #' @param vocabulary Comma-separated string of relevant vocabularies for inclusion criteria 6 | #' @param codes Semi-colon separated string of code concepts for inclusion criteria, corresponding to the order for vocabulary. Multiple codes can be used per vocabulary and should be comma-separated. 7 | #' 8 | #' @return Returns a table of concepts contained under (i.e., below in the heirarchy) the query concept. 9 | #' @export 10 | #' 11 | #' @examples 12 | #' conceptsInfo <- exploreConcepts(vocabulary = “ATC, ICD10CM”, codes = “A01A; K50, K51”) 13 | exploreConcepts <- function(vocabulary, codes) { 14 | 15 | if (exists("dataOntology")) { # ensure dataOntology exists 16 | 17 | criteriaMapped <- unpackAndMap(vocabulary,codes) 18 | 19 | if (nrow(criteriaMapped)>0) { 20 | codesFormatted <- paste0(criteriaMapped$concept_id,collapse=",") 21 | synonymDataFiltered <- identifySynonyms(codesFormatted) 22 | synonymCodes <- paste(c(codesFormatted, unique(synonymDataFiltered$concept_id_2)),collapse=",") 23 | mappingDataInfo <- identifyMappings(synonymCodes) 24 | 25 | return(mappingDataInfo) 26 | 27 | } else { 28 | message("Error: none of the inclusion criteria were able to map to the ontology. Please check terms and try again.") 29 | } 30 | 31 | } else { #endif dataOntology exists 32 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /R/findPatients.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' Find patients based on clinical critera 4 | #' 5 | #' Identify patients based on clinical data inclusion (and exclusion, if desired) criteria. Flexible to allow for multiple data types, vocabularies, and concepts. 6 | #' @param strategy_in "mapped" or "direct" (dictates the strategy for how inclusion criteria are treated. "direct" searches for codes as provided, "mapped" maps criteria to standard concepts and finds descendants. 7 | #' @param vocabulary_in vocabularies for inclusion criteria (comma-separated string of vocabularies) 8 | #' @param codes_in specific concept codes for inclusion criteria (semi-colon separated string of code concepts, corresponding to the order for vocabulary_in. Multiple codes can be used per vocabulary and should be comma-separated.) 9 | #' @param function_in "and" or "or" (dictates how multiple inclusion should be treated. "and" necessitates that all inclusion criteria are met (i.e., intersection), while "or" allows for any critera to be met (i.e., union) ) 10 | #' @param strategy_out "mapped", "direct", or NULL (default) (dictates the strategy for how exclusion are treated. NULL indicates no exclusion criteria.) 11 | #' @param vocabulary_out vocabularies for exclusion criteria or NULL (default) (comma-separated string of relevant vocabularies for exclusion criteria. NULL indicates no exclusion criteria) 12 | #' @param codes_out specific concept codes for exclusion criteria or NULL (default) (semi-colon separated string of code concepts for inclusion criteria, corresponding to the order for vocabulary_out. Multiple codes can be used per vocabulary and should be comma-separated. NULL indicates no exclusion criteria.) 13 | #' @param function_out "and", "or", or NULL (default) (dictates how multiple exclusion should be treated. and necessitates that all exclusion criteria are met (i.e., intersection), while or allows for any critera to be met (i.e., union). NULL indicates no exclusion criteria. ) 14 | #' @param declare TRUE/FALSE will output status and data information during the process 15 | #' @param save TRUE/FALSE whether intermediate components of the search should be saved (e.g., mapped concepts found with unique patient counts per concept). 16 | #' @param out_name name assigned to search query or NULL (if save = TRUE, saves query using provided name. If the provided name already exists as a directory (or is NULL), the directory defaults to datetime name) 17 | #' 18 | #' @return List of patients that meet inclusion criteria (and not exclusion criteria if entered). 19 | #' @import data.table DBI 20 | #' @export 21 | #' 22 | #' @examples 23 | #' patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ICD10CM", codes_in = "F41", strategy_out="mapped", vocabulary_out = "MeSH", codes_out = "D002998", function_out = "and") 24 | findPatients <- function(strategy_in="mapped", vocabulary_in, codes_in, function_in = "or", strategy_out = NULL, vocabulary_out = NULL, codes_out = NULL, function_out = NULL, declare=FALSE, save=FALSE, out_name=NULL) { 25 | 26 | if (exists("dataOntology")) { # ensure dataOntology exists 27 | 28 | ## strategy: 29 | #### mapped- map to common ontology, find descendants, and search | RECOMMENDED 30 | #### direct- search directly for included codes only 31 | strategy_in <- tolower(strategy_in) # force lowercase 32 | if (!is.null(strategy_out)) {strategy_out = tolower(strategy_out)} 33 | 34 | ## function: 35 | #### and- criteria require INTERSECTION (i.e. criteria 1 AND criteria 2 AND ...) 36 | #### or- criteria require UNION (i.e. criteria 1 OR criteria 2 OR ...) 37 | function_in = tolower(function_in) # force lowercase 38 | if (!is.null(function_out)) {function_out = tolower(function_out)} 39 | 40 | # check parameters 41 | pass_requirements <- checkParameters(strategy_in, function_in, strategy_out, function_out) 42 | 43 | # require correct parameters 44 | if (pass_requirements == TRUE) { 45 | 46 | if (save==TRUE) { 47 | 48 | if (is.null(out_name)) { 49 | outdir <- paste0(getOption("outDirectory"),gsub(" ", "_",Sys.time())) 50 | dir.create(outdir) 51 | } else { 52 | # check to see if directory already exists 53 | outdir <- paste0(getOption("outDirectory"),out_name) 54 | if (!dir.exists(outdir)) { 55 | dir.create(paste0(getOption("outDirectory"),out_name)) 56 | } else { 57 | outdir <- paste0(getOption("outDirectory"),gsub(" ", "_",Sys.time())) 58 | dir.create(outdir) 59 | message(paste0(getOption("outDirectory"), out_name, " directory already exists. Saving results to: ", outdir)) 60 | } 61 | } 62 | 63 | fout <- paste0(outdir,"/query.txt") 64 | 65 | sink(fout) 66 | cat(paste0("inclusion strategy: ", strategy_in ,"\n")) 67 | cat(paste0("inclusion vocabularies: ", vocabulary_in ,"\n")) 68 | cat(paste0("inclusion codes: ", codes_in ,"\n")) 69 | cat(paste0("inclusion function: ", function_in ,"\n")) 70 | cat(paste0("exclusion strategy: ", strategy_out ,"\n")) 71 | cat(paste0("exclusion vocabularies: ", vocabulary_out ,"\n")) 72 | cat(paste0("exclusion codes: ", codes_out ,"\n")) 73 | cat(paste0("exclusion function: ", function_out ,"\n")) 74 | sink() 75 | 76 | } 77 | 78 | 79 | ## vocabulary_in: vocabularies that will be used for INCLUSION criteria 80 | #### format = "VOCABULARY1, VOCABULARY2, VOCABULARY3" 81 | ###### vocabulary_in = "ATC, ICD10CM, SNOMED" 82 | 83 | ## codes_in: codes corresponding to order of vocabularies_in that will be used for INCLUSION criteria 84 | #### format = "c1v1;c2v1, c1v2;c2v2;c3v2, c1v3" 85 | ###### codes_in = "L01XC19;J01X, Y07.9;O33.7, 25343008" 86 | 87 | ## vocabulary_out: vocabularies that will be used for EXCLUSION criteria 88 | #### same format as above 89 | 90 | ## codes_out: codes corresponding to order of vocabularies_out that will be used for EXCLUSION criteria 91 | #### same format as above 92 | 93 | # 1- INCLUSION 94 | 95 | #### unpack vocabulary_in and codes_in 96 | inclusionCriteriaMapped <- unpackAndMap(vocabulary_in,codes_in) 97 | 98 | ### check to see if any codes matched 99 | if (nrow(inclusionCriteriaMapped)>0) { 100 | 101 | includeCodesFormatted <- paste0(inclusionCriteriaMapped$concept_id,collapse=",") 102 | 103 | if (declare == TRUE) { 104 | message("The following INCLUSION criteria are being used: \n") 105 | print(inclusionCriteriaMapped) 106 | } 107 | 108 | if (save == TRUE) { 109 | fout = paste0(outdir,"/inclusion_criteria_mapped.txt") 110 | write.table(inclusionCriteriaMapped, file = fout, sep='\t', row.names=F, quote=F) 111 | } 112 | 113 | 114 | # MAPPING 115 | if (strategy_in == "direct") { 116 | 117 | useSource <- "_source" # search _source_concept_id 118 | includeSearchTable <- identifyTablesDirect(inclusionCriteriaMapped) 119 | 120 | } else if (strategy_in == "mapped") { 121 | ## RECOMMENDED, but can lead to: 122 | #### cross-mapping (i.e. from ICD code (e.g. diabetes) to procedure/measurement) 123 | #### extra mapping (i.e. ICD 10 code --> ICD9 & 10 results) 124 | 125 | useSource <- "" # search _concept_id 126 | 127 | # get common ontology synonyms 128 | includeSynonymDataFiltered <- identifySynonyms(includeCodesFormatted) 129 | includeSynonymData <- merge(includeSynonymDataFiltered[,"concept_id_2"], dataOntology[,c("concept_id","domain_id","vocabulary_id")], by.x="concept_id_2",by.y = "concept_id") 130 | colnames(includeSynonymData) <- c("concept_id","domain_id","vocabulary_id") 131 | includeSynonymCodes <- paste(union(inclusionCriteriaMapped$concept_id, includeSynonymDataFiltered$concept_id_2),collapse = ",") ## adds original codes into ancestor query (b/c of scenarios with ATC)) 132 | 133 | # get descendents 134 | includeMappingDataInfo <- identifyMappings(includeSynonymCodes) 135 | includeMappingData <- includeMappingDataInfo[,c("descendant_concept_id","domain_id","vocabulary_id")] 136 | colnames(includeMappingData) <- c("concept_id","domain_id","vocabulary_id") 137 | 138 | includeCombined <- rbind(inclusionCriteriaMapped[,c("concept_id","domain_id","vocabulary_id")],includeSynonymData) 139 | includeCombined <- rbind(includeCombined, includeMappingData) 140 | includeCombined <- includeCombined[!duplicated(includeCombined),] 141 | 142 | if (declare == TRUE) { 143 | message("The following INCLUSION mapped concepts are being queried (along with mapped input and synonyms): \n") 144 | print(includeMappingDataInfo) 145 | } 146 | # save mapped concepts after patient count per concept added 147 | 148 | # get tables to search for mapped concepts 149 | includeSearchTable <- identifyTablesMapped(includeCombined) 150 | 151 | 152 | } #endif strategy_in == mapped 153 | 154 | # 2- SEARCH INCLUSION 155 | 156 | # if standard concepts are found for criteria after synonym and descendant search 157 | if (all(array(sapply(includeSearchTable, length)))==0) { 158 | if (declare == TRUE){ 159 | message("Warning: no concepts could be mapped to INCLUSION criteria standard concepts for the domain of interest and, as such, no patients will be identified. Please refer to README for more details.") 160 | } 161 | } 162 | 163 | # if any condition table codes 164 | if (length(includeSearchTable$Condition)>0) { 165 | if (declare==TRUE) {message("querying Conditions...")} 166 | condition_codes <- paste(includeSearchTable$Condition,collapse=",") 167 | pts_condition_include <- searchCondition(useSource,condition_codes) 168 | } else { 169 | pts_condition_include <- NULL 170 | } 171 | 172 | # if any observation table codes 173 | if (length(includeSearchTable$Observation)>0) { 174 | if (declare==TRUE){message("querying Observations") } 175 | observation_codes <- paste(includeSearchTable$Observation,collapse=",") 176 | pts_observation_include <- searchObservation(useSource,observation_codes) 177 | } else { 178 | pts_observation_include <- NULL 179 | } 180 | 181 | # if any measurement table codes 182 | if (length(includeSearchTable$Measurement)>0) { 183 | if (declare==TRUE) {message("querying Measurements")} 184 | measurement_codes <- paste(includeSearchTable$Measurement,collapse=",") 185 | pts_measurement_include <- searchMeasurement(useSource,measurement_codes) 186 | } else { 187 | pts_measurement_include <- NULL 188 | } 189 | 190 | # if any drug table codes 191 | if (length(includeSearchTable$Drug)>0) { 192 | if (declare==TRUE) {message("querying Drugs")} 193 | drug_codes <- paste(includeSearchTable$Drug,collapse=",") 194 | pts_drug_include <- searchDrug(useSource,drug_codes) 195 | } else { 196 | pts_drug_include <- NULL 197 | } 198 | 199 | # if any device table codes 200 | if (length(includeSearchTable$Device)>0) { 201 | if (declare==TRUE) {message("querying Devices")} 202 | device_codes <- paste(includeSearchTable$Drug,collapse=",") 203 | pts_device_include <- searchDevice(useSource,device_codes) 204 | } else { 205 | pts_device_include <- NULL 206 | } 207 | 208 | # if any procedure table codes 209 | if (length(includeSearchTable$Procedure)>0) { 210 | if (declare==TRUE) {message("querying Procedures")} 211 | procedure_codes <- paste(includeSearchTable$Procedure,collapse=",") 212 | pts_procedure_include <- searchProcedure(useSource,procedure_codes) 213 | }else{ 214 | pts_procedure_include <- NULL 215 | } 216 | 217 | # save mapped concepts with patient counts 218 | if (save == TRUE) { 219 | if (!(all(array(sapply(includeSearchTable, length)))==0)) { 220 | fout = paste0(outdir,"/inclusion_criteria_mapped_concepts.txt") 221 | includeDataInfowPatients <- summarizeFoundConcepts(pts_condition_include, pts_observation_include, pts_measurement_include, pts_device_include, pts_drug_include, pts_procedure_include) 222 | # merge pt counts with all concepts 223 | includeMappingCombined <- merge(includeMappingDataInfo, includeDataInfowPatients, by.x = "descendant_concept_id", by.y = "concept_id", all.x = TRUE) 224 | write.table(includeMappingCombined, file = fout, sep='\t', row.names=F, quote=F) 225 | } 226 | } 227 | 228 | 229 | # 3- EXCLUSION 230 | # vocabulary_out = "ICD9CM" 231 | # codes_out = "250.00" 232 | 233 | if (!is.null(vocabulary_out) & !is.null(codes_out) & !is.null(strategy_out) & !is.null(function_out)) { # if any exclusion criteria 234 | 235 | #### unpack vocabulary_in and codes_in 236 | exclusionCriteriaMapped <- unpackAndMap(vocabulary_out,codes_out) 237 | 238 | if (nrow(exclusionCriteriaMapped)>0) { 239 | 240 | excludeCodesFormatted <- paste0(exclusionCriteriaMapped$concept_id,collapse=",") 241 | 242 | if (declare == TRUE) { 243 | message("The following EXCLUSION criteria are being used: \n") 244 | print(exclusionCriteriaMapped) 245 | } 246 | 247 | if (save == TRUE) { 248 | fout <- paste0(outdir,"/exclusion_criteria_mapped.txt") 249 | write.table(exclusionCriteriaMapped, file = fout, sep='\t', row.names=F, quote=F) 250 | } 251 | 252 | 253 | 254 | if (strategy_out == "direct") { 255 | 256 | useSource <- "_source" # search _source_concept_id 257 | excludeSearchTable <- identifyTablesDirect(exclusionCriteriaMapped) 258 | 259 | } else if (strategy_out == "mapped") { 260 | 261 | # get common ontology synonyms 262 | excludeSynonymDataFiltered <- identifySynonyms(excludeCodesFormatted) 263 | excludeSynonymData <- merge(excludeSynonymDataFiltered[,"concept_id_2"], dataOntology[,c("concept_id","domain_id","vocabulary_id")], by.x="concept_id_2",by.y = "concept_id") 264 | colnames(excludeSynonymData) <- c("concept_id","domain_id","vocabulary_id") 265 | excludeSynonymCodes <- paste(union(exclusionCriteriaMapped$concept_id, excludeSynonymDataFiltered$concept_id_2),collapse = ",") ## adds original codes into ancestor query (b/c of scenarios with ATC)) 266 | 267 | 268 | # get descendents 269 | excludeMappingDataInfo <- identifyMappings(excludeSynonymCodes) 270 | excludeMappingData <- excludeMappingDataInfo[,c("descendant_concept_id","domain_id","vocabulary_id")] 271 | colnames(excludeMappingData) <- c("concept_id","domain_id","vocabulary_id") 272 | 273 | 274 | excludeCombined <- rbind(exclusionCriteriaMapped[,c("concept_id","domain_id","vocabulary_id")],excludeSynonymData) 275 | excludeCombined <- rbind(excludeCombined, excludeMappingData) 276 | excludeCombined <- excludeCombined[!duplicated(excludeCombined),] 277 | 278 | if (declare == TRUE) { 279 | message("The following EXCLUSION mapped concepts are being queried (along with mapped input and synonyms): \n") 280 | print(excludeMappingDataInfo) 281 | } 282 | 283 | # save mapped concepts once patient counts are added 284 | 285 | # get tables to search for mapped concepts 286 | excludeSearchTable <- identifyTablesMapped(excludeCombined) 287 | 288 | } 289 | 290 | # 4- SEARCH EXCLUSION 291 | 292 | # if standard concepts are found for criteria after synonym and descendant search 293 | if (all(array(sapply(excludeSearchTable, length)))==0) { 294 | if (declare == TRUE){ 295 | message("Warning: no concepts could be mapped to EXCLUSION criteria standard concepts for the domain of interest and, as such, no patients will be identified. Please refer to README for more details.") 296 | } 297 | } 298 | 299 | 300 | # if any condition table codes 301 | if (length(excludeSearchTable$Condition)>0) { 302 | if (declare==TRUE) {message("querying Conditions...")} 303 | condition_codes <- paste(excludeSearchTable$Condition,collapse=",") 304 | pts_condition_exclude <- searchCondition(useSource,condition_codes) 305 | } else { 306 | pts_condition_exclude <- NULL 307 | } 308 | 309 | # if any observation table codes 310 | if (length(excludeSearchTable$Observation)>0) { 311 | if (declare==TRUE) {message("querying Observations")} 312 | observation_codes <- paste(excludeSearchTable$Observation,collapse=",") 313 | pts_observation_exclude <- searchObservation(useSource,condition_codes) 314 | } else { 315 | pts_observation_exclude <- NULL 316 | } 317 | 318 | # if any measurement table codes 319 | if (length(excludeSearchTable$Measurement)>0) { 320 | if (declare==TRUE) {message("querying Measurements")} 321 | measurement_codes <- paste(excludeSearchTable$Measurement,collapse=",") 322 | pts_measurement_exclude <- searchMeasurement(useSource,measurement_codes) 323 | } else { 324 | pts_measurement_exclude <- NULL 325 | } 326 | 327 | 328 | # if any drug table codes 329 | if (length(excludeSearchTable$Drug)>0) { 330 | if (declare==TRUE) {message("querying Drugs")} 331 | drug_codes <- paste(excludeSearchTable$Drug,collapse=",") 332 | pts_drug_exclude <- searchDrug(useSource,drug_codes) 333 | } else { 334 | pts_drug_exclude <- NULL 335 | } 336 | 337 | # if any device table codes 338 | if (length(excludeSearchTable$Device)>0) { 339 | if (declare==TRUE) {message("querying Devices")} 340 | device_codes <- paste(excludeSearchTable$Device,collapse=",") 341 | pts_device_exclude <- searchDevice(useSource,device_codes) 342 | } else { 343 | pts_device_exclude <- NULL 344 | } 345 | 346 | # if any procedure table codes 347 | if (length(excludeSearchTable$Procedure)>0) { 348 | if (declare==TRUE) {message("querying Procedures")} 349 | procedure_codes <- paste(excludeSearchTable$Procedure,collapse=",") 350 | pts_procedure_exclude <- searchProcedure(useSource,procedure_codes) 351 | } else { 352 | pts_procedure_exclude <- NULL 353 | } 354 | 355 | # save mapped concepts with patient counts 356 | if (save == TRUE) { 357 | if (!(all(array(sapply(excludeSearchTable, length)))==0)) { 358 | fout <- paste0(outdir,"/exclusion_criteria_mapped_concepts.txt") 359 | excludeDataInfowPatients <- summarizeFoundConcepts(pts_condition_exclude, pts_observation_exclude, pts_measurement_exclude, pts_device_exclude, pts_drug_exclude, pts_procedure_exclude) 360 | # merge pt counts with all concepts 361 | excludeMappingCombined <- merge(excludeMappingDataInfo, excludeDataInfowPatients, by.x = "descendant_concept_id", by.y = "concept_id", all.x = TRUE) 362 | write.table(excludeMappingCombined, file = fout, sep='\t', row.names=F, quote=F) 363 | } 364 | } 365 | 366 | } else { #endif exclusion criteria match 367 | message("Warning: exclusion criteria were not able to map to ontology. Therefore, query running for inclusion criteria only.") 368 | } 369 | } # endif exclusion 370 | 371 | # 5 - PROCESS INCLUSION(/EXCLUSION) depending on functions 372 | 373 | if (function_in=="or") { 374 | include_patient_list <- identifyPatientsOR(pts_condition_include, pts_observation_include, pts_measurement_include, pts_device_include, pts_drug_include, pts_procedure_include) 375 | } else if (function_in=="and") { 376 | include_patient_list <- identifyPatientsAND(inclusionCriteriaMapped, includeSynonymDataFiltered, includeMappingDataInfo, pts_condition_include, pts_observation_include, pts_measurement_include, pts_device_include, pts_drug_include, pts_procedure_include) 377 | } 378 | 379 | patient_list <- include_patient_list 380 | 381 | if (!is.null(vocabulary_out) & !is.null(codes_out) & !is.null(strategy_out) & !is.null(function_out)) { # if any exclusion criteria 382 | 383 | if (nrow(exclusionCriteriaMapped)>0) { #verify that exclusion criteria were found 384 | 385 | if (function_out=="or") { 386 | exclude_patient_list <- identifyPatientsOR(pts_condition_exclude, pts_observation_exclude, pts_measurement_exclude, pts_device_exclude, pts_drug_exclude, pts_procedure_exclude) 387 | } else if (function_out=="and") { 388 | exclude_patient_list <- identifyPatientsAND(exclusionCriteriaMapped, excludeSynonymDataFiltered, excludeMappingDataInfo, pts_condition_exclude, pts_observation_exclude, pts_measurement_exclude, pts_device_exclude, pts_drug_exclude, pts_procedure_exclude) 389 | } 390 | 391 | inclusion_exclusion_overlapping_patients <- intersect(patient_list,exclude_patient_list) 392 | print(paste0(length(inclusion_exclusion_overlapping_patients), " overlapping patients excluded from the original inclusion input based on the exclusion criteria.")) 393 | 394 | # remove overlapping patients 395 | patient_list <- setdiff(patient_list,inclusion_exclusion_overlapping_patients) 396 | 397 | } # endif exclusion criteria found 398 | 399 | } # endif exclusion null 400 | 401 | 402 | print(paste0(length(patient_list), " patients found that meet the inclusion criteria.")) 403 | 404 | 405 | if (save == TRUE) { 406 | fout = paste0(outdir,"/outcome.txt") 407 | 408 | sink(fout) 409 | 410 | if (exists("inclusion_exclusion_overlapping_patients")) { 411 | cat(paste0(length(include_patient_list), " patients found from the inclusion criteria ONLY.\n")) 412 | cat(paste0(length(exclude_patient_list), " patients found from the exclusion criteria ONLY.\n")) 413 | cat(paste0(length(inclusion_exclusion_overlapping_patients), " overlapping patients excluded from the original inclusion input based on the exclusion criteria.\n")) 414 | cat(paste0(length(patient_list), " patients found that meet the inclusion and exclusion criteria.\n")) 415 | } else { 416 | cat(paste0(length(patient_list), " patients found that meet the inclusion criteria.\n")) 417 | } 418 | sink() 419 | 420 | fout <- paste0(outdir,"/patient_list.txt") 421 | write.table(data.frame(patient_list),file=fout, sep='\t', row.names = F, quote=F) 422 | message(paste0("Outcome from query saved in: ",outdir)) 423 | 424 | } 425 | 426 | 427 | return(patient_list) 428 | 429 | } else { # endif no inclusion found 430 | message("Error: none of the inclusion criteria were able to map to the ontology. Please check terms and try again.") 431 | } 432 | 433 | } else { #endif pass_requirements 434 | message("Error: invalid strategies and/or functions selected. Please use either 'direct' or 'mapped' for strategies. Please use either 'and' or 'or' for functions.") 435 | } 436 | 437 | } else { #endif dataOntology exists 438 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 439 | } 440 | 441 | } 442 | 443 | 444 | 445 | ################## specific table search functions 446 | 447 | #' @import data.table DBI 448 | searchCondition <- function(useSource,codes) { 449 | conditionQuery <- paste0('SELECT person_id, condition_concept_id FROM condition_occurrence WHERE condition',useSource,'_concept_id IN (',codes,') ') 450 | dataCondition <- sqlQuery(conditionQuery) 451 | dataCondition <- data.table(dataCondition) 452 | dataCondition <- dataCondition[!duplicated(dataCondition)] 453 | return(dataCondition) 454 | } 455 | 456 | #' @import data.table DBI 457 | searchObservation <- function(useSource,codes) { 458 | observationQuery <- paste0('SELECT person_id, observation_concept_id FROM observation WHERE observation',useSource,'_concept_id IN (',codes,') ') 459 | dataObservation <- sqlQuery(observationQuery) 460 | dataObservation <- data.table(dataObservation) 461 | dataObservation <- dataObservation[!duplicated(dataObservation)] 462 | return(dataObservation) 463 | } 464 | 465 | #' @import data.table DBI 466 | searchMeasurement <- function(useSource,codes) { 467 | measurementQuery <- paste0('SELECT person_id, measurement_concept_id FROM measurement WHERE measurement',useSource,'_concept_id IN (',codes,') ') 468 | dataMeasurement <- sqlQuery(measurementQuery) 469 | dataMeasurement <- data.table(dataMeasurement) 470 | dataMeasurement <- dataMeasurement[!duplicated(dataMeasurement)] 471 | return(dataMeasurement) 472 | } 473 | 474 | #' @import data.table DBI 475 | searchDrug <- function(useSource,codes) { 476 | drugQuery <- paste0('SELECT person_id, drug_concept_id FROM drug_exposure WHERE drug',useSource,'_concept_id IN (',codes,') ') 477 | dataDrug <- sqlQuery(drugQuery) 478 | dataDrug <- data.table(dataDrug) 479 | dataDrug <- dataDrug[!duplicated(dataDrug)] 480 | return(dataDrug) 481 | } 482 | 483 | #' @import data.table DBI 484 | searchDevice <- function(useSource,codes) { 485 | deviceQuery <- paste0('SELECT person_id, device_concept_id FROM device_exposure WHERE device',useSource,'_concept_id IN (',codes,') ') 486 | dataDevice <- sqlQuery(deviceQuery) 487 | dataDevice <- data.table(dataDevice) 488 | dataDevice <- dataDevice[!duplicated(dataDevice)] 489 | return(dataDevice) 490 | } 491 | 492 | #' @import data.table DBI 493 | searchProcedure<- function(useSource,codes) { 494 | procedureQuery <- paste0('SELECT person_id, procedure_concept_id FROM procedure_occurrence WHERE procedure',useSource,'_concept_id IN (',codes,') ') 495 | dataProcedure <- sqlQuery(procedureQuery) 496 | dataProcedure <- data.table(dataProcedure) 497 | dataProcedure <- dataProcedure[!duplicated(dataProcedure)] 498 | return(dataProcedure) 499 | } 500 | 501 | 502 | -------------------------------------------------------------------------------- /R/getClinicalData.R: -------------------------------------------------------------------------------- 1 | #' Retrieves all patient clinical data 2 | #' 3 | #' Wrapper for domain-specific getData functions (e.g., getObservations). Produces a list of tables for all relevant domains. 4 | #' 5 | #' @param patient_list Comma-separated string of patient ids 6 | #' @param declare TRUE/FALSE will output status and data information during the process 7 | #' 8 | #' @return a list of tables for each all data within each domain (e.g., Condition) for all patients provided (can access by ptClinicalData$Condition). 9 | #' @import data.table DBI 10 | #' @export 11 | #' 12 | #' @examples 13 | #' ptClinicalData <- getClinicalData("1,2", declare=TRUE) 14 | getClinicalData<- function(patient_list, declare=FALSE) { 15 | 16 | if (exists("dataOntology")) { # ensure dataOntology exists 17 | 18 | if (length(patient_list) > 1){ 19 | patient_list <- paste(patient_list, collapse = ",") 20 | } 21 | 22 | ### retrieves data from each data function below 23 | ptObsData <- getObservations(patient_list,declare=declare) 24 | ptCondData <- getConditions(patient_list,declare=declare) 25 | ptProcData <- getProcedures(patient_list,declare=declare) 26 | ptsMedsData <- getMedications(patient_list,declare=declare) 27 | ptMeasData <- getMeasurements(patient_list,declare=declare) 28 | ptDeviceData <- getDevices(patient_list,declare=declare) 29 | 30 | ptClinicalData <- list(ptObsData,ptCondData,ptProcData,ptsMedsData,ptMeasData,ptDeviceData) 31 | names(ptClinicalData) <- c("Observation", "Condition", "Procedures", "Medications","Measurements","Devices") 32 | 33 | return(ptClinicalData) 34 | 35 | } else { #endif dataOntology exists 36 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 37 | } 38 | 39 | } 40 | 41 | 42 | ################################# modality specific functions 43 | 44 | #' Retrieves all patient clinical data from Observations table 45 | #' 46 | #' Produces a table for relevant concepts contained in the 'observation' table mapped through the data ontology for a patient list. Data retrieved include: observation_type, value, etc. 47 | #' 48 | #' @param patient_list Comma-separated string of patient ids 49 | #' @param declare TRUE/FALSE will output status and data information during the process 50 | #' 51 | #' @return a table of relevant clinical data contained with in the 'observation' table 52 | #' @import data.table DBI 53 | #' @export 54 | #' 55 | #' @examples 56 | #' ptObsData <- getObservations("1,2", declare=TRUE) 57 | getObservations <- function(patient_list, declare=FALSE) { 58 | 59 | if (exists("dataOntology")) { # ensure dataOntology exists 60 | 61 | if (length(patient_list) > 1){ 62 | patient_list <- paste(patient_list, collapse = ",") 63 | } 64 | 65 | ## observation 66 | # observation_concept_id is SNOMED 67 | queryStatement <- paste0('SELECT person_id, observation_concept_id, observation_source_concept_id, observation_datetime, observation_type_concept_id, value_as_number, value_as_string, value_as_concept_id, visit_occurrence_id, observation_source_value, unit_source_value FROM observation WHERE person_id IN (', patient_list,') ') 68 | 69 | if (declare==TRUE) {message("Loading Observations data......")} 70 | 71 | ptObsData <- sqlQuery(queryStatement) 72 | ptObsData <- data.table(ptObsData) # convert to data.table 73 | 74 | ### check for any data 75 | if (nrow(ptObsData)==0) { 76 | message("No observation data found for patient list") 77 | } else { 78 | if (declare==TRUE) {message("Observation data loaded; formatting...")} 79 | 80 | # obtain table specific ontology 81 | observationTableOntology <- dataOntology[domain_id=="Observation"] 82 | 83 | # format clinical data 84 | ptObsData <- merge(ptObsData, observationTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="observation_concept_id",by.y="concept_id",all.x=TRUE) 85 | names(ptObsData)[names(ptObsData) == 'concept_code'] <- 'observation_concept_code' # rename column 86 | names(ptObsData)[names(ptObsData) == 'concept_name'] <- 'observation_concept_name' # rename column 87 | names(ptObsData)[names(ptObsData) == 'vocabulary_id'] <- 'observation_concept_vocabulary' # rename column 88 | ptObsData <- ptObsData[,-"observation_concept_id"] 89 | 90 | ptObsData <- merge(ptObsData, observationTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="observation_source_concept_id",by.y="concept_id",all.x=TRUE) 91 | names(ptObsData)[names(ptObsData) == 'concept_code'] <- 'observation_source_code' # rename column 92 | names(ptObsData)[names(ptObsData) == 'concept_name'] <- 'observation_source_name' # rename column 93 | names(ptObsData)[names(ptObsData) == 'vocabulary_id'] <- 'observation_source_vocabulary' # rename column 94 | ptObsData <- ptObsData[,-"observation_source_concept_id"] 95 | 96 | # format metadata 97 | ptObsData <- merge(ptObsData,dataOntology[,c("concept_id","concept_name")],by.x="observation_type_concept_id",by.y="concept_id", all.x=TRUE) 98 | names(ptObsData)[names(ptObsData) == 'concept_name'] <- 'observation_type' # rename column 99 | ptObsData <- ptObsData[,-"observation_type_concept_id"] 100 | 101 | ptObsData=merge(ptObsData,dataOntology[,c("concept_id","concept_name")],by.x="value_as_concept_id",by.y="concept_id", all.x=TRUE) 102 | names(ptObsData)[names(ptObsData) == 'concept_name'] <- 'value_concept' # rename column 103 | ptObsData <- ptObsData[,-"value_as_concept_id"] 104 | 105 | if (declare==TRUE) {message("Observation data formatted successfully ")} 106 | 107 | } 108 | 109 | return(ptObsData) 110 | 111 | } else { #endif dataOntology exists 112 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 113 | } 114 | 115 | } 116 | 117 | 118 | 119 | #' Retrieves all patient clinical data from Condition table 120 | #' 121 | #' Produces a table for relevant concepts contained in the 'condition_occurrence' table mapped through the data ontology for a patient list. Data retrieved include: condition_type, condition_status, etc. 122 | #' 123 | #' @param patient_list Comma-separated string of patient ids 124 | #' @param declare TRUE/FALSE will output status and data information during the process 125 | #' 126 | #' @return a table of relevant clinical data contained with in the 'condition_occurrence' table 127 | #' @import data.table DBI 128 | #' @export 129 | #' 130 | #' @examples 131 | #' ptCondData <- getConditions("1,2", declare=TRUE) 132 | getConditions <- function(patient_list, declare=FALSE) { 133 | 134 | if (exists("dataOntology")) { # ensure dataOntology exists 135 | 136 | if (length(patient_list) > 1){ 137 | patient_list <- paste(patient_list, collapse = ",") 138 | } 139 | 140 | queryStatement <- paste0('SELECT person_id, condition_concept_id, condition_start_datetime, visit_occurrence_id, condition_type_concept_id, condition_source_value, condition_source_concept_id, condition_status_concept_id FROM condition_occurrence WHERE person_id IN (', patient_list,') ') 141 | 142 | if (declare==TRUE) {message("Loading Condition data...")} 143 | 144 | 145 | ptCondData <- sqlQuery(queryStatement) 146 | ptCondData <- data.table(ptCondData) # convert to data.table 147 | 148 | 149 | ### check for any data 150 | if (nrow(ptCondData)==0) { 151 | message("No condition data found for patient list") 152 | } else { 153 | 154 | if (declare==TRUE) {message("Condition data loaded; formatting...")} 155 | 156 | 157 | # obtain table specific ontology 158 | conditionTableOntology <- dataOntology[grep("Condition",domain_id)] 159 | 160 | # format clinical data 161 | ptCondData <- merge(ptCondData, conditionTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="condition_concept_id",by.y="concept_id",all.x=TRUE) 162 | names(ptCondData)[names(ptCondData) == 'concept_code'] <- 'condition_concept_code' # rename column 163 | names(ptCondData)[names(ptCondData) == 'concept_name'] <- 'condition_concept_name' # rename column 164 | names(ptCondData)[names(ptCondData) == 'vocabulary_id'] <- 'condition_concept_vocabulary' # rename column 165 | ptCondData <- ptCondData[,-"condition_concept_id"] 166 | 167 | ptCondData <- merge(ptCondData, conditionTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="condition_source_concept_id",by.y="concept_id",all.x=TRUE) 168 | names(ptCondData)[names(ptCondData) == 'concept_code'] <- 'condition_source_code' # rename column 169 | names(ptCondData)[names(ptCondData) == 'concept_name'] <- 'condition_source_name' # rename column 170 | names(ptCondData)[names(ptCondData) == 'vocabulary_id'] <- 'condition_source_vocabulary' # rename column 171 | ptCondData <- ptCondData[,-"condition_source_concept_id"] 172 | 173 | # format metadatadata 174 | ptCondData <- merge(ptCondData,dataOntology[,c("concept_id","concept_name")],by.x="condition_type_concept_id",by.y="concept_id", all.x=TRUE) 175 | names(ptCondData)[names(ptCondData) == 'concept_name'] <- 'condition_type' # rename column 176 | ptCondData <- ptCondData[,-"condition_type_concept_id"] 177 | ptCondData <- merge(ptCondData,dataOntology[,c("concept_id","concept_name")],by.x="condition_status_concept_id",by.y="concept_id", all.x=TRUE) 178 | names(ptCondData)[names(ptCondData) == 'concept_name'] <- 'condition_status_type' # rename column 179 | ptCondData <- ptCondData[,-"condition_status_concept_id"] 180 | 181 | if (declare==TRUE) {message("Condition data formatted successfully. ")} 182 | 183 | } 184 | 185 | return(ptCondData) 186 | 187 | } else { #endif dataOntology exists 188 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 189 | } 190 | 191 | } 192 | 193 | 194 | 195 | #' Retrieves all patient clinical data from Procedures table 196 | #' 197 | #' Produces a table for relevant concepts contained in the 'procedure_occurrence' table mapped through the data ontology for a patient list. Data retrieved include: procedure_type, etc. 198 | #' 199 | #' @param patient_list Comma-separated string of patient ids 200 | #' @param declare TRUE/FALSE will output status and data information during the process 201 | #' 202 | #' @return a table of relevant clinical data contained with in the 'procedure_occurrence' table 203 | #' @import data.table DBI 204 | #' @export 205 | #' 206 | #' @examples 207 | #' ptProcData <- getProcedures("1,2", declare=TRUE) 208 | getProcedures <- function(patient_list, declare=FALSE){ 209 | 210 | if (exists("dataOntology")) { # ensure dataOntology exists 211 | 212 | if (length(patient_list) > 1){ 213 | patient_list <- paste(patient_list, collapse = ",") 214 | } 215 | 216 | queryStatement <- paste0('SELECT person_id, procedure_concept_id, procedure_datetime, quantity, visit_occurrence_id, procedure_type_concept_id, procedure_source_value, procedure_source_concept_id FROM procedure_occurrence WHERE person_id IN (', patient_list,') ') 217 | 218 | if (declare==TRUE) {message("Loading Procedures data...")} 219 | 220 | ptProcData <- sqlQuery(queryStatement) 221 | ptProcData <- data.table(ptProcData) # convert to data.table 222 | 223 | ### check for any data 224 | if (nrow(ptProcData)==0) { 225 | message("No procedure data found for patient list") 226 | } else { 227 | if (declare==TRUE) {message("Procedure data loaded; formatting...")} 228 | 229 | # obtain table specific ontology 230 | procedureTableOntology <- dataOntology[domain_id=="Procedure"] 231 | 232 | # format clinical data 233 | ptProcData <- merge(ptProcData, procedureTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="procedure_concept_id",by.y="concept_id",all.x=TRUE) 234 | names(ptProcData)[names(ptProcData) == 'concept_code'] <- 'procedure_concept_code' # rename column 235 | names(ptProcData)[names(ptProcData) == 'concept_name'] <- 'procedure_concept_name' # rename column 236 | names(ptProcData)[names(ptProcData) == 'vocabulary_id'] <- 'procedure_concept_vocabulary' # rename column 237 | ptProcData <- ptProcData[,-"procedure_concept_id"] 238 | 239 | ptProcData <- merge(ptProcData, procedureTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="procedure_source_concept_id",by.y="concept_id",all.x=TRUE) 240 | names(ptProcData)[names(ptProcData) == 'concept_code'] <- 'procedure_source_code' # rename column 241 | names(ptProcData)[names(ptProcData) == 'concept_name'] <- 'procedure_source_name' # rename column 242 | names(ptProcData)[names(ptProcData) == 'vocabulary_id'] <- 'procedure_source_vocabulary' # rename column 243 | ptProcData <- ptProcData[,-"procedure_source_concept_id"] 244 | 245 | # format metadata 246 | ptProcData <- merge(ptProcData,dataOntology[,c("concept_id","concept_name")],by.x="procedure_type_concept_id",by.y="concept_id", all.x=TRUE) 247 | names(ptProcData)[names(ptProcData) == 'concept_name'] <- 'procedure_type' # rename column 248 | ptProcData <- ptProcData[,-"procedure_type_concept_id"] 249 | 250 | if (declare==TRUE) {message("Procedure data formatted successfully.")} 251 | 252 | } 253 | 254 | return(ptProcData) 255 | 256 | } else { #endif dataOntology exists 257 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 258 | } 259 | } 260 | 261 | 262 | 263 | #' Retrieves all patient clinical data from Medications table 264 | #' 265 | #' Produces a table for relevant concepts contained in the 'drug_exposure' table mapped through the data ontology for a patient list. Data retrieved include: drug_type, route, etc. 266 | #' 267 | #' @param patient_list Comma-separated string of patient ids 268 | #' @param declare TRUE/FALSE will output status and data information during the process 269 | #' 270 | #' @return a table of relevant clinical data contained with in the 'drug_exposure' table 271 | #' @import data.table DBI 272 | #' @export 273 | #' 274 | #' @examples 275 | #' ptsMedsData <- getMedications("1,2", declare=TRUE) 276 | getMedications <- function(patient_list, declare=FALSE) { 277 | 278 | if (exists("dataOntology")) { # ensure dataOntology exists 279 | 280 | if (length(patient_list) > 1){ 281 | patient_list <- paste(patient_list, collapse = ",") 282 | } 283 | 284 | queryStatement <- paste0('SELECT person_id, drug_concept_id, drug_exposure_start_datetime, drug_exposure_end_datetime, drug_type_concept_id, stop_reason, refills, quantity, days_supply, sig, route_concept_id, dose_unit_source_value, visit_occurrence_id, drug_source_value, drug_source_concept_id, route_source_value FROM drug_exposure WHERE person_id IN (', patient_list,') ') 285 | 286 | if (declare==TRUE) {message("Loading Medications data...")} 287 | 288 | 289 | ptsMedsData <- sqlQuery(queryStatement) 290 | ptsMedsData <- data.table(ptsMedsData) # convert to data.table 291 | 292 | 293 | ### check for any data 294 | if (nrow(ptsMedsData)==0) { 295 | message("No medication data found for patient list") 296 | } else { 297 | if (declare==TRUE) {message("Medication data loaded; formatting...")} 298 | 299 | # obtain table specific ontology 300 | medicationTableOntology <- dataOntology[domain_id=="Drug"] 301 | 302 | # format clinical data 303 | ptsMedsData <- merge(ptsMedsData, medicationTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="drug_concept_id",by.y="concept_id",all.x=TRUE) 304 | names(ptsMedsData)[names(ptsMedsData) == 'concept_code'] <- 'medication_concept_code' # rename column 305 | names(ptsMedsData)[names(ptsMedsData) == 'concept_name'] <- 'medication_concept_name' # rename column 306 | names(ptsMedsData)[names(ptsMedsData) == 'vocabulary_id'] <- 'medication_concept_vocabulary' # rename column 307 | ptsMedsData <- ptsMedsData[,-"drug_concept_id"] 308 | 309 | ptsMedsData <- merge(ptsMedsData, medicationTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="drug_source_concept_id",by.y="concept_id",all.x=TRUE) 310 | names(ptsMedsData)[names(ptsMedsData) == 'concept_code'] <- 'medication_source_code' # rename column 311 | names(ptsMedsData)[names(ptsMedsData) == 'concept_name'] <- 'medication_source_name' # rename column 312 | names(ptsMedsData)[names(ptsMedsData) == 'vocabulary_id'] <- 'medication_source_vocabulary' # rename column 313 | ptsMedsData <- ptsMedsData[,-"drug_source_concept_id"] 314 | 315 | # format metadata 316 | ptsMedsData <- merge(ptsMedsData,dataOntology[,c("concept_id","concept_name")],by.x="drug_type_concept_id",by.y="concept_id", all.x=TRUE) 317 | names(ptsMedsData)[names(ptsMedsData) == 'concept_name'] <- 'drug_type' # rename column 318 | ptsMedsData <- ptsMedsData[,-"drug_type_concept_id"] 319 | ptsMedsData <- merge(ptsMedsData,dataOntology[,c("concept_id","concept_name")],by.x="route_concept_id",by.y="concept_id", all.x=TRUE) 320 | names(ptsMedsData)[names(ptsMedsData) == 'concept_name'] <- 'route_concept' # rename column 321 | ptsMedsData <- ptsMedsData[,-"route_concept_id"] 322 | 323 | if (declare==TRUE) {message("Medication data formatted successfully.")} 324 | 325 | } 326 | 327 | return(ptsMedsData) 328 | 329 | } else { #endif dataOntology exists 330 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 331 | } 332 | } 333 | 334 | 335 | #' Retrieves all patient clinical data from Measurement table 336 | #' 337 | #' Produces a table for relevant concepts contained in the 'measurement' table mapped through the data ontology for a patient list. Data retrieved include: measurement_type, value, unit, etc. 338 | #' 339 | #' @param patient_list Comma-separated string of patient ids 340 | #' @param declare TRUE/FALSE will output status and data information during the process 341 | #' 342 | #' @return a table of relevant clinical data contained with in the 'measurement' table 343 | #' @import data.table DBI 344 | #' @export 345 | #' 346 | #' @examples 347 | #' ptMeasData <- getMeasurements("1,2", declare=TRUE) 348 | getMeasurements <- function(patient_list, declare=FALSE) { 349 | 350 | if (exists("dataOntology")) { # ensure dataOntology exists 351 | 352 | if (length(patient_list) > 1){ 353 | patient_list <- paste(patient_list, collapse = ",") 354 | } 355 | 356 | queryStatement <- paste0('SELECT person_id, measurement_concept_id, measurement_datetime, measurement_type_concept_id, value_as_number, value_as_concept_id, unit_concept_id, visit_occurrence_id, measurement_source_value, measurement_source_concept_id FROM measurement WHERE person_id IN (', patient_list,') '); 357 | 358 | if (declare==TRUE) {message("Loading Measurements data...")} 359 | 360 | ptMeasData <- sqlQuery(queryStatement) 361 | ptMeasData <- data.table(ptMeasData) # convert to data.table 362 | 363 | 364 | ### check for any data 365 | if (nrow(ptMeasData)==0) { 366 | message("No measurement data found for patient list") 367 | } else { 368 | if (declare==TRUE) {message("Measurement data loaded; formatting...")} 369 | 370 | # obtain table specific ontology 371 | measurementTableOntology <- dataOntology[domain_id=="Measurement"] 372 | 373 | 374 | # format clinical data 375 | ptMeasData <- merge(ptMeasData, measurementTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="measurement_concept_id",by.y="concept_id",all.x=TRUE) 376 | names(ptMeasData)[names(ptMeasData) == 'concept_code'] <- 'measurement_concept_code' # rename column 377 | names(ptMeasData)[names(ptMeasData) == 'concept_name'] <- 'measurement_concept_name' # rename column 378 | names(ptMeasData)[names(ptMeasData) == 'vocabulary_id'] <- 'measurement_concept_vocabulary' # rename column 379 | ptMeasData <- ptMeasData[,-"measurement_concept_id"] 380 | 381 | ptMeasData <- merge(ptMeasData, measurementTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="measurement_source_concept_id",by.y="concept_id",all.x=TRUE) 382 | names(ptMeasData)[names(ptMeasData) == 'concept_code'] <- 'measurement_source_code' # rename column 383 | names(ptMeasData)[names(ptMeasData) == 'concept_name'] <- 'measurement_source_name' # rename column 384 | names(ptMeasData)[names(ptMeasData) == 'vocabulary_id'] <- 'measurement_source_vocabulary' # rename column 385 | ptMeasData <- ptMeasData[,-"measurement_source_concept_id"] 386 | 387 | # format metadata 388 | ptMeasData <- merge(ptMeasData,dataOntology[,c("concept_id","concept_name")],by.x="measurement_type_concept_id",by.y="concept_id", all.x=TRUE) 389 | names(ptMeasData)[names(ptMeasData) == 'concept_name'] <- 'measurement_type' # rename column 390 | ptMeasData <- ptMeasData[,-"measurement_type_concept_id"] 391 | ptMeasData <- merge(ptMeasData,dataOntology[,c("concept_id","concept_name")],by.x="value_as_concept_id",by.y="concept_id", all.x=TRUE) 392 | names(ptMeasData)[names(ptMeasData) == 'concept_name'] <- 'value_concept' # rename column 393 | ptMeasData <- ptMeasData[,-"value_as_concept_id"] 394 | ptMeasData <- merge(ptMeasData,dataOntology[,c("concept_id","concept_name")],by.x="unit_concept_id",by.y="concept_id", all.x=TRUE) 395 | names(ptMeasData)[names(ptMeasData) == 'concept_name'] <- 'unit_concept' # rename column 396 | ptMeasData <- ptMeasData[,-"unit_concept_id"] 397 | 398 | if (declare==TRUE) {message("Measurement data formatted successfully.")} 399 | 400 | } 401 | 402 | return(ptMeasData) 403 | 404 | } else { #endif dataOntology exists 405 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 406 | } 407 | 408 | } 409 | 410 | 411 | #' Retrieves all patient clinical data from Device table 412 | #' 413 | #' Produces a table for relevant concepts contained in the 'device_exposure' table mapped through the data ontology for a patient list. Data retrieved include: device_type, etc. 414 | #' 415 | #' @param patient_list Comma-separated string of patient ids 416 | #' @param declare TRUE/FALSE will output status and data information during the process 417 | #' 418 | #' @return a table of relevant clinical data contained with in the 'device_exposure' table 419 | #' @import data.table DBI 420 | #' @export 421 | #' 422 | #' @examples 423 | #' ptDeviceData <- getDevices("1,2", declare=TRUE) 424 | getDevices <- function(patient_list, declare=FALSE) { 425 | 426 | if (exists("dataOntology")) { # ensure dataOntology exists 427 | 428 | if (length(patient_list) > 1){ 429 | patient_list <- paste(patient_list, collapse = ",") 430 | } 431 | 432 | queryStatement <- paste0('SELECT person_id, device_concept_id, device_exposure_start_datetime, device_exposure_end_datetime, device_type_concept_id, device_source_value, visit_occurrence_id, device_source_concept_id FROM device_exposure WHERE person_id IN (', patient_list,') ') 433 | 434 | if (declare==TRUE) {message("Loading Devices data...")} 435 | ptDeviceData <- sqlQuery(queryStatement) 436 | ptDeviceData <- data.table(ptDeviceData) # convert to data.table 437 | 438 | 439 | ### check for any data 440 | if (nrow(ptDeviceData)==0) { 441 | message("No device data found for patient list") 442 | } else { 443 | if (declare==TRUE) {message("Device data loaded; formatting...")} 444 | # obtain table specific ontology 445 | deviceTableOntology = dataOntology[grep("Device",domain_id)] 446 | 447 | # format clinical data 448 | ptDeviceData <- merge(ptDeviceData, deviceTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="device_concept_id",by.y="concept_id",all.x=TRUE) 449 | names(ptDeviceData)[names(ptDeviceData) == 'concept_code'] <- 'device_concept_code' # rename column 450 | names(ptDeviceData)[names(ptDeviceData) == 'concept_name'] <- 'device_concept_name' # rename column 451 | names(ptDeviceData)[names(ptDeviceData) == 'vocabulary_id'] <- 'device_concept_vocabulary' # rename column 452 | ptDeviceData <- ptDeviceData[,-"device_concept_id"] 453 | 454 | ptDeviceData <- merge(ptDeviceData, deviceTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="device_source_concept_id",by.y="concept_id",all.x=TRUE) 455 | names(ptDeviceData)[names(ptDeviceData) == 'concept_code'] <- 'device_source_code' # rename column 456 | names(ptDeviceData)[names(ptDeviceData) == 'concept_name'] <- 'device_source_name' # rename column 457 | names(ptDeviceData)[names(ptDeviceData) == 'vocabulary_id'] <- 'device_source_vocabulary' # rename column 458 | ptDeviceData <- ptDeviceData[,-"device_source_concept_id"] 459 | 460 | # format metadata 461 | ptDeviceData <- merge(ptDeviceData,dataOntology[,c("concept_id","concept_name")],by.x="device_type_concept_id",by.y="concept_id", all.x=TRUE) 462 | names(ptDeviceData)[names(ptDeviceData) == 'concept_name'] <- 'device_type' # rename column 463 | ptDeviceData <- ptDeviceData[,-"device_type_concept_id"] 464 | 465 | if (declare==TRUE) {message("Device data formatted successfully.")} 466 | 467 | } 468 | 469 | return(ptDeviceData) 470 | 471 | } else { #endif dataOntology exists 472 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 473 | } 474 | } 475 | -------------------------------------------------------------------------------- /R/getDemographics.R: -------------------------------------------------------------------------------- 1 | #' Retrieves patient demographic data 2 | #' 3 | #' Compiles demographic data for all patients or a given patient list if provided. Concepts are mapped through the created data ontology. Demographic data are retrieved from 'person' and 'death' tables and include: birthdate, deathdate, gender, ethnicity, and race. 4 | #' 5 | #' @param patient_list NULL or comma-separated string of patient ids. A provdied patient_list will restrict search to ids. NULL will return demographic data for all available patients. 6 | #' @param declare TRUE/FALSE will output status and data information during the process 7 | #' 8 | #' @return table of mapped demographic concepts for all patients or specific ones for a provided csv formmated string of ids 9 | #' @import data.table DBI 10 | #' @export 11 | #' 12 | #' @examples 13 | #' ptDemo <- getDemographics(patient_list=NULL,declare=TRUE) 14 | getDemographics <-function(patient_list=NULL, declare=FALSE) { # patient list will restrict search 15 | 16 | if (exists("dataOntology")) { # ensure dataOntology exists 17 | 18 | if (length(patient_list) > 1){ 19 | patient_list <- paste(patient_list, collapse = ",") 20 | } 21 | 22 | queryStatement <- "SELECT person_id, birth_datetime, year_of_birth, gender_concept_id, ethnicity_concept_id, race_concept_id FROM person" # year_of_birth added in case birth_datetime IS NULL 23 | deathqueryStatement <-"SELECT person_id, death_date FROM death" 24 | 25 | if (!is.null(patient_list)) { # if patient_list not null, append with WHERE statement 26 | queryStatement <- paste0(queryStatement,paste0(' WHERE person_id IN (', patient_list,') ')) 27 | deathqueryStatement <- paste0(deathqueryStatement,paste0(' WHERE person_id IN (', patient_list,') ')) 28 | } 29 | 30 | # first get main patient data 31 | ptDemo <- sqlQuery(queryStatement) 32 | 33 | if (nrow(ptDemo)==0) { # check if any pts found 34 | if (declare==TRUE) {message("No patients found for current input")} 35 | } else { 36 | if (declare==TRUE) {message("Data loaded; formatting...")} 37 | 38 | ptDemo <- data.table(ptDemo) # convert to data.table 39 | current_year <- as.numeric(format(Sys.Date(),"%Y")) # get current year to calculate age 40 | ptDemo$age <- current_year - ptDemo$year_of_birth # calculate age 41 | 42 | # map concepts to reference table 43 | ptDemo <- merge(ptDemo, dataOntology[domain_id=="Gender",c("concept_id","concept_name")], by.x ="gender_concept_id", by.y = "concept_id" ,all.x=T) # Gender 44 | names(ptDemo)[names(ptDemo) == 'concept_name'] <- 'Gender' # rename column 45 | ptDemo=markNAasUnknown(ptDemo,"Gender",declare) 46 | 47 | ptDemo <- merge(ptDemo, dataOntology[domain_id=="Race",c("concept_id","concept_name")], by.x ="race_concept_id", by.y = "concept_id" ,all.x=T) # Race 48 | names(ptDemo)[names(ptDemo) == 'concept_name'] <- 'Race' # rename column 49 | ptDemo=markNAasUnknown(ptDemo,"Race",declare) 50 | 51 | ptDemo <- merge(ptDemo, dataOntology[domain_id=="Ethnicity",c("concept_id","concept_name")], by.x ="ethnicity_concept_id", by.y = "concept_id" ,all.x=T) # Ethnicity 52 | names(ptDemo)[names(ptDemo) == 'concept_name'] <- 'Ethnicity' # rename column 53 | ptDemo <- markNAasUnknown(ptDemo,"Ethnicity",declare) 54 | 55 | ### clean up extra columns 56 | ptDemo <- ptDemo[,-c("ethnicity_concept_id","race_concept_id","gender_concept_id")] 57 | 58 | # add in death date 59 | ptDeath <- sqlQuery(deathqueryStatement) 60 | ptDeath <- data.table(ptDeath) # convert to data.table 61 | 62 | # merge with patient data 63 | ptDemo <- merge(ptDemo, ptDeath,by="person_id",all.x=T) 64 | # mark Alive/Deceased 65 | ptDemo$Status <- ifelse(is.na(ptDemo$death_date),"Alive","Deceased") 66 | 67 | return(ptDemo) 68 | 69 | } 70 | 71 | } else { #endif dataOntology exists 72 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /R/getEncounters.R: -------------------------------------------------------------------------------- 1 | #' Retrieves patient clinical encounter data 2 | #' 3 | #' Compiles encounter data for a given patient list. Concepts are mapped through the created data ontology. Encounter data are retrieved from visit_occurence table and include: visit_type, encounter_type, etc. 4 | #' @param patient_list Comma-separated string of patient ids 5 | #' @param declare TRUE/FALSE will output status and data information during the process 6 | #' 7 | #' @return table of mapped encounter concepts for specific patients contained in a provided csv formmated string of ids. 8 | #' @import data.table DBI 9 | #' @export 10 | #' 11 | #' @examples 12 | #' ptEncs <- getEncounters("1,2,3,4",declare=TRUE) 13 | getEncounters <- function(patient_list, declare=FALSE) { 14 | 15 | if (exists("dataOntology")) { # ensure dataOntology exists 16 | 17 | if (length(patient_list) > 1){ 18 | patient_list <- paste(patient_list, collapse = ",") 19 | } 20 | 21 | queryStatement <- paste0('SELECT person_id, visit_occurrence_id, visit_concept_id, visit_start_datetime, visit_end_datetime, visit_source_concept_id, visit_source_value, admitting_source_concept_id, discharge_to_concept_id FROM visit_occurrence WHERE person_id IN (', patient_list,') ') 22 | 23 | if (declare==TRUE) {message("Loading encounters data...")} 24 | 25 | # get visit data 26 | ptEncs <- sqlQuery(queryStatement) 27 | 28 | if (nrow(ptEncs)==0) { 29 | message("No encounter data found for current patient list") 30 | } else { 31 | if (declare==TRUE) {message("Encounters data loaded; formatting...")} 32 | 33 | ptEncs <- data.table(ptEncs) # convert to data.table 34 | 35 | # merge in relevant information concept ids 36 | ptEncs <- merge(ptEncs,dataOntology[,c("concept_id","concept_name")], by.x="visit_concept_id", by.y="concept_id", all.x=TRUE) 37 | names(ptEncs)[names(ptEncs) == 'concept_name'] <- 'visit_concept' # rename column 38 | ptEncs <- ptEncs[,-"visit_concept_id"] 39 | ptEncs <- merge(ptEncs,dataOntology[,c("concept_id","concept_name")], by.x="visit_source_concept_id", by.y="concept_id", all.x=TRUE) 40 | names(ptEncs)[names(ptEncs) == 'concept_name'] <- 'visit_source_concept' # rename column 41 | ptEncs <- ptEncs[,-"visit_source_concept_id"] 42 | ptEncs <- merge(ptEncs,dataOntology[,c("concept_id","concept_name")], by.x="admitting_source_concept_id", by.y="concept_id", all.x=TRUE) 43 | names(ptEncs)[names(ptEncs) == 'concept_name'] <- 'admitting_concept' # rename column 44 | ptEncs <- ptEncs[,-"admitting_source_concept_id"] 45 | ptEncs <- merge(ptEncs,dataOntology[,c("concept_id","concept_name")], by.x="discharge_to_concept_id", by.y="concept_id", all.x=TRUE) 46 | names(ptEncs)[names(ptEncs) == 'concept_name'] <- 'discharge_concept' # rename column 47 | ptEncs <- ptEncs[,-"discharge_to_concept_id"] 48 | 49 | } 50 | 51 | return(ptEncs) 52 | 53 | } else { #endif dataOntology exists 54 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 55 | } 56 | 57 | 58 | } 59 | -------------------------------------------------------------------------------- /R/makeDataOntology.R: -------------------------------------------------------------------------------- 1 | 2 | #' Creates general data ontology 3 | #' 4 | #' Creates general data ontology used by all data tables from the concept table. Option to save/load as .rds file. 5 | #' @param declare TRUE/FALSE will output status and data information during the process 6 | #' @param store_ontology TRUE/FALSE If TRUE: will attempt to load .rds file from the current outDirectory; will create and save it if it does not exist. If FALSE: will build table. 7 | #' 8 | #' @return Returns a ontology table dictionary of concepts contained in the 'concept' table. 9 | #' @import data.table DBI 10 | #' @export 11 | #' 12 | #' @examples 13 | #' \dontrun{ 14 | #' dataOntology <- makeDataOntology(declare=FALSE,store_ontology=TRUE) 15 | #' } 16 | makeDataOntology <- function(declare=FALSE, store_ontology=FALSE) { 17 | if (declare==TRUE) {message("Retrieving concept data...")} 18 | create <- TRUE 19 | found <- FALSE 20 | 21 | if (store_ontology==TRUE) { 22 | if (file.exists(paste0(getOption("outDirectory"),"dataOntology.rds")) ) { 23 | message("Data Ontology found; loading... ") 24 | dataOntology = readRDS(paste0(getOption("outDirectory"),"dataOntology.rds")) 25 | message("Data Ontology loaded from memory successfully. ") 26 | create <- FALSE 27 | found <- TRUE 28 | }else{ 29 | message("Data Ontology file not found in declared out_directory; creating... ") 30 | found <- FALSE 31 | } 32 | 33 | } 34 | 35 | if (create == TRUE) { 36 | conceptQuery <- "SELECT concept_id, concept_name, domain_id, vocabulary_id, concept_class_id, concept_code FROM concept WHERE (invalid_reason = '' OR invalid_reason IS NULL);" 37 | dataOntology <- sqlQuery(conceptQuery) 38 | dataOntology <- data.table(dataOntology) 39 | } 40 | 41 | if (declare==TRUE) { 42 | message("Concept data loaded; data found for: ") 43 | message(paste0(length(unique(dataOntology$domain_id)), " unique domains.")) 44 | message(paste0(length(unique(dataOntology$vocabulary_id)), " unique vocabularies.")) 45 | message(paste0(length(unique(dataOntology$concept_class_id)), " unique concept classes.")) 46 | } 47 | 48 | if (store_ontology == TRUE & found == FALSE) { # save data ontology 49 | message(paste0("Storing Data Ontology: ", getOption("outDirectory"),"dataOntology.rds")) 50 | saveRDS(dataOntology, paste0(getOption("outDirectory"),"dataOntology.rds")) 51 | 52 | } 53 | 54 | return(dataOntology) 55 | 56 | } 57 | 58 | -------------------------------------------------------------------------------- /R/showDataTypes.R: -------------------------------------------------------------------------------- 1 | #' Shows available data types from the OMOP ontology 2 | #' 3 | #' Details relevant vocabularies per ontological domain. Requires dataOntology to have been created (makeDataOntology funciton). 4 | #' @return Returns a table of vocabularies contained within clinical domains: Condition, Observation, Measurement, Device, Procedure, Drug. 5 | #' @export 6 | #' 7 | #' @examples 8 | #' showDataTypes() 9 | #' 10 | showDataTypes <- function() { 11 | 12 | if (exists("dataOntology")) { # ensure dataOntology exists 13 | 14 | dataTypes = dataOntology[domain_id %in% c("Condition","Observation","Measurement","Device","Procedure","Drug"),c("domain_id", "vocabulary_id")] 15 | dataTypes = dataTypes[!duplicated(dataTypes)] 16 | dataTypes = dataTypes[order(domain_id),] 17 | 18 | return(dataTypes) 19 | 20 | } else { #endif dataOntology exists 21 | message("Error: dataOntology does not exist. Please first run makeDataOntology.") 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /R/summarizeDemographics.R: -------------------------------------------------------------------------------- 1 | 2 | #' Summarizes patient demographic data 3 | #' 4 | #' Summarizes patient demographic data from the getDemographics function. 5 | #' @param ptDemo patient demographics table: ptDemo is the patient demographics object from the getDemographics function output. 6 | #' 7 | #' @return none (called for side effect: prints table) 8 | #' @import dplyr data.table 9 | #' @export 10 | #' 11 | #' @examples 12 | #' \dontrun{ 13 | #' summarizeDemographics(ptDemo) 14 | #' } 15 | summarizeDemographics <- function(ptDemo) { 16 | 17 | message(paste0("# of patients: ", ptDemo %>% tally())) 18 | message(paste0("Mean age: ",round(mean(ptDemo$age),3))) 19 | message(paste0("Median age: ",round(median(ptDemo$age),3))) 20 | message(paste0("STD age: ",round(sd(ptDemo$age),3))) 21 | 22 | # compile Status info 23 | message("Status breakdown:") 24 | print(data.table(ptDemo %>% group_by(Status) %>% summarise (n = n()) %>% mutate(proportion = n / sum(n)))) 25 | 26 | # compile gender info 27 | message("Gender breakdown:") 28 | print(data.table(ptDemo %>% group_by(Gender) %>% summarise (n = n()) %>% mutate(proportion = n / sum(n)))) 29 | 30 | # compile race info 31 | message("Race breakdown:") 32 | print(data.table(ptDemo %>% group_by(Race) %>% summarise (n = n()) %>% mutate(proportion = n / sum(n)))) 33 | 34 | # compile ethnicity info 35 | message("Ethnicity breakdown:") 36 | print(data.table(ptDemo %>% group_by(Ethnicity) %>% summarise (n = n()) %>% mutate(proportion = n / sum(n)))) 37 | 38 | } 39 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | ############################# 4 | ######### GENERAL ########## 5 | ############################# 6 | 7 | #define standard_concepts 8 | standard_concepts <- function(){ 9 | data.table("domain_type"= c("Measurement","Condition","Drug","Observation","Device","Procedure"),"concepts"= c("LOINC,SNOMED,CPT4","SNOMED","RxNorm,CPT4,NDC","SNOMED,CPT4,LOINC,HCPCS","SNOMED,HCPCS","SNOMED,CPT4,HCPCS")) 10 | } 11 | 12 | ### funciton to indicate which variables to add to dbConnect function based on whether they are defined ### 13 | setConnectFunction <- function() { 14 | env_vars <- c("username", "password", "host", "port") # 'driver' and 'dbname' need to be defined 15 | connectString <- 'dbname=Sys.getenv("dbname")' 16 | if (Sys.getenv("username") != ""){ 17 | connectString <- paste0(connectString, ', user=Sys.getenv("username")') 18 | } 19 | if (Sys.getenv("password") != ""){ 20 | connectString <- paste0(connectString, ', password=Sys.getenv("password")') 21 | } 22 | if (Sys.getenv("host") != ""){ 23 | connectString <- paste0(connectString, ', host=Sys.getenv("host")') 24 | } 25 | if (Sys.getenv("port") != ""){ 26 | connectString <- paste0(connectString, ', port= as.integer(Sys.getenv("port"))') 27 | } 28 | 29 | fullConnectString <- paste0('DBI::dbConnect(drv, ', connectString , ')') 30 | 31 | return(fullConnectString) 32 | } 33 | 34 | 35 | ### general query function ### 36 | sqlQuery <- function(query) { 37 | 38 | if (tolower(Sys.getenv("driver"))=="mysql") { 39 | 40 | # creating connection object 41 | drv <- dbDriver("MySQL") 42 | fullConnectString <- setConnectFunction() 43 | con <- eval(parse(text = fullConnectString)) 44 | 45 | # close db connection after function 46 | on.exit(DBI::dbDisconnect(con)) 47 | 48 | # send query 49 | res <-DBI::dbSendQuery(con, query) 50 | 51 | # get elements from results 52 | result <- DBI::fetch(res, -1) 53 | 54 | } else { 55 | 56 | # creating connection object using DatabaseConnector 57 | con <- DatabaseConnector::connect(dbms = tolower(Sys.getenv("driver")), 58 | server = Sys.getenv("host"), 59 | user = Sys.getenv("username"), 60 | password = Sys.getenv("password"), 61 | schema = Sys.getenv("dbname"), 62 | port = Sys.getenv("port")) 63 | 64 | # close db connection after function 65 | on.exit(DatabaseConnector::disconnect(con)) 66 | 67 | # translate query using SqlRender 68 | translated_query <- SqlRender::translate(query, targetDialect = tolower(Sys.getenv("driver"))) 69 | 70 | # query using DatabaseConnector function 71 | result <- DatabaseConnector::querySql(con, translated_query) 72 | 73 | # coerce columns to lowercase 74 | colnames(result) <- tolower(colnames(result)) 75 | } 76 | return(result) 77 | } 78 | 79 | ############################# 80 | ######## PROCESSING ######### 81 | ############################# 82 | 83 | ### mark any empty Demographics fields as Unknown 84 | markNAasUnknown <- function(tbl, ColToUse, declare=FALSE) { 85 | 86 | if (ColToUse %in% colnames(tbl)) { 87 | if (any(is.na(tbl[is.na(get(ColToUse))]))) { 88 | missing_rows=tbl[is.na(get(ColToUse))] 89 | tbl[is.na(get(ColToUse)),eval(ColToUse):="Unknown"] 90 | } else { # no NA values in column 91 | if (declare==TRUE) { 92 | message(paste0("no NA values found for ", ColToUse)) 93 | } 94 | } 95 | 96 | } else { 97 | message(paste0("column ", ColToUse, " not found")) 98 | } 99 | 100 | return(tbl) 101 | 102 | } 103 | 104 | 105 | ## check search input parameters 106 | checkParameters <- function(strategy_in, function_in, strategy_out, function_out) { 107 | pass_requirements = FALSE 108 | 109 | if (strategy_in %in% c("direct","mapped") & function_in %in% c("and","or")) { 110 | if (!is.null(strategy_out) & !is.null(function_out)) { 111 | if (strategy_out %in% c("direct","mapped") & function_out %in% c("and","or")) { 112 | pass_requirements <- TRUE 113 | } 114 | } else { 115 | pass_requirements <- TRUE 116 | } 117 | } 118 | 119 | return(pass_requirements) 120 | 121 | } 122 | 123 | 124 | 125 | 126 | ## unpack vocabularies and codes for search function 127 | #' @import data.table 128 | unpackAndMap <- function(vocabularies_input, codes_input) { 129 | vocabularies_split <- trimws(strsplit(vocabularies_input,",")[[1]]) 130 | codes_split <- trimws(strsplit(codes_input,",")[[1]]) 131 | 132 | # match to one another 133 | dataCriteria <- data.table::data.table(vocabularies = vocabularies_split, codes = codes_split) 134 | 135 | dataCriteria <- dataCriteria[, list( # unpack codes 136 | codes = trimws(unlist(strsplit(codes, ";")))), 137 | by = vocabularies] 138 | 139 | # map inclusion criteria to dataOntology 140 | dataCriteriaMapped <- merge(dataCriteria, dataOntology, by.x= "codes", by.y = "concept_code") 141 | dataCriteriaMapped <- dataCriteriaMapped[vocabularies==vocabulary_id] 142 | 143 | return(dataCriteriaMapped) 144 | 145 | } 146 | 147 | # for 'Mapped' straegy; map input concept codes to common ontology 148 | identifySynonyms <- function(codesFormatted) { 149 | synonymQuery <- paste0('SELECT concept_id_1, concept_id_2, relationship_id, invalid_reason FROM concept_relationship WHERE concept_id_1 IN (',codesFormatted,');') 150 | synonymData <- sqlQuery(synonymQuery) 151 | synonymData <- data.table::data.table(synonymData) 152 | synonymData <- synonymData[invalid_reason == ""] 153 | synonymData <- synonymData[,-"invalid_reason"] 154 | 155 | # check for "Maps to" or "%- RxNorm%" or "%- SNOMED%" | standard concepts 156 | synonymDataFiltered <- synonymData[(relationship_id == "Maps to") | (grepl("- RxNorm",relationship_id)) | (grepl("- SNOMED",relationship_id)) ] 157 | 158 | return(synonymDataFiltered) 159 | 160 | } 161 | 162 | # for 'Mapped' straegy; map input concept codes (from common ontology) to common ontology descendants 163 | #' @import data.table 164 | identifyMappings <- function(synonymCodes) { 165 | 166 | mappingQuery <- paste0('SELECT ancestor_concept_id, descendant_concept_id FROM concept_ancestor A WHERE A.ancestor_concept_id IN (', synonymCodes,' );') 167 | mappingData <- sqlQuery(mappingQuery) 168 | mappingData <- data.table::data.table(mappingData) 169 | 170 | mappingDataInfo <- merge(mappingData,dataOntology, by.x = "descendant_concept_id", by.y = "concept_id") 171 | 172 | return(mappingDataInfo) 173 | 174 | } 175 | 176 | 177 | # identify tables to seach for concepts of interest (direct strategy) 178 | identifyTablesDirect <- function(criteriaTable) { 179 | 180 | searchTable = list() 181 | 182 | for(d in unique(standard_concepts()$domain_type)){ # scan through all domain types 183 | mappingData = criteriaTable[domain_id == d] 184 | mappingCodes = mappingData[domain_id == d]$concept_id 185 | searchTable[[d]] <- mappingCodes # compile codes per domain type into one table 186 | } 187 | 188 | return(searchTable) 189 | } 190 | 191 | 192 | # identify tables to seach for concepts of interest (mapped strategy) 193 | identifyTablesMapped <- function(mappingDataInfo) { 194 | 195 | searchTable = list() 196 | 197 | for(d in unique(standard_concepts()$domain_type)) { # scan through all domain types 198 | 199 | mappingDataInfoFiltered <- mappingDataInfo[domain_id==d] 200 | mappingDataInfoFiltered <- mappingDataInfoFiltered[(grep(gsub(",","|",standard_concepts()[domain_type==d,concepts]),vocabulary_id))] # map to common concepts specifically used to the domain 201 | mappingCodes <- mappingDataInfoFiltered$concept_id 202 | searchTable[[d]] <- mappingCodes 203 | } 204 | 205 | return(searchTable) 206 | 207 | } 208 | 209 | ### identifyPatients based on function 210 | # function = OR (union) 211 | identifyPatientsOR <- function(pts_condition, pts_observation, pts_measurement, pts_device, pts_drug, pts_procedure) { 212 | 213 | patient_list=c() 214 | 215 | if (!is.null(pts_condition)) { 216 | patient_list = union(patient_list, unique(pts_condition$person_id)) 217 | } 218 | 219 | if (!is.null(pts_observation)) { 220 | patient_list = union(patient_list, unique(pts_observation$person_id)) 221 | } 222 | 223 | if (!is.null(pts_measurement)) { 224 | patient_list = union(patient_list, unique(pts_measurement$person_id)) 225 | } 226 | 227 | if (!is.null(pts_device)) { 228 | patient_list = union(patient_list, unique(pts_device$person_id)) 229 | } 230 | 231 | if (!is.null(pts_drug)) { 232 | patient_list = union(patient_list, unique(pts_drug$person_id)) 233 | } 234 | 235 | if (!is.null(pts_procedure)) { 236 | patient_list = union(patient_list, unique(pts_procedure$person_id)) 237 | } 238 | 239 | return(patient_list) 240 | 241 | } 242 | 243 | # function = AND (intersect) 244 | # To identify overlapping patients, we have to backmap the descendant terms to the original concepts 245 | #' @import data.table 246 | identifyPatientsAND <- function(criteriaMapped, synonymDataFiltered, mappingDataInfo, pts_condition, pts_observation, pts_measurement, pts_device, pts_drug, pts_procedure) { 247 | 248 | names(mappingDataInfo)[names(mappingDataInfo) == 'vocabulary_id'] <- 'mapped_vocabulary_id' 249 | names(mappingDataInfo)[names(mappingDataInfo) == 'concept_name'] <- 'mapped_concept_name' 250 | 251 | synonymMapped <- merge(mappingDataInfo[,c("descendant_concept_id","ancestor_concept_id","mapped_vocabulary_id","mapped_concept_name")], synonymDataFiltered[,c("concept_id_1","concept_id_2")], by.x = "ancestor_concept_id", by.y = "concept_id_2", allow.cartesian=TRUE) 252 | synonymMapped <- synonymMapped[!duplicated(synonymMapped)] 253 | 254 | combinedMapped <- merge(synonymMapped, criteriaMapped, by.x = "concept_id_1", by.y = "concept_id", allow.cartesian=TRUE) 255 | combinedMapped <- combinedMapped[!duplicated(combinedMapped)] 256 | 257 | combinedDirect <- merge(mappingDataInfo, criteriaMapped, by.x = "ancestor_concept_id", by.y = "concept_id", allow.cartesian=TRUE) 258 | combinedDirect <- combinedDirect[!duplicated(combinedDirect)] 259 | 260 | 261 | ### derive patient list by concept_codes 262 | # create code dictionary per original concept input 263 | # initializepatient_list 264 | 265 | unique_codes <- unique(criteriaMapped$codes) 266 | 267 | code_map = list() 268 | patient_list = list() 269 | 270 | for(c in unique_codes) { 271 | seed_codes = paste(criteriaMapped[codes == c]$concept_id,collapse=",") 272 | code_map[[c]] <- c(seed_codes) # initialize list with original concept code (i.e. in case of ATC category) 273 | code_map[[c]] <- c(code_map[[c]], combinedDirect[ancestor_concept_id %in% seed_codes]$descendant_concept_id) # add in direct mapped descendants 274 | code_map[[c]] <- c(code_map[[c]], combinedMapped[concept_id_1 %in% seed_codes]$descendant_concept_id) # add in synonym codes and descendants 275 | 276 | patient_list[[c]] <- c() 277 | } 278 | 279 | if (!is.null(pts_condition)) { #Condition 280 | 281 | condition_codes <- unique(criteriaMapped[domain_id=="Condition"]$codes) 282 | 283 | for(c in condition_codes) { 284 | patient_list[[c]] <- union(patient_list[[c]], pts_condition[condition_concept_id %in% code_map[[c]]]$person_id) 285 | } 286 | } 287 | 288 | if (!is.null(pts_observation)) { #Observation 289 | observation_codes <- unique(criteriaMapped[domain_id=="Observation"]$codes) 290 | 291 | for(c in observation_codes) { 292 | patient_list[[c]] <- union(patient_list[[c]], pts_observation[observation_concept_id %in% code_map[[c]]]$person_id) 293 | } 294 | } 295 | 296 | if (!is.null(pts_measurement)) { #Measurement 297 | measurement_codes <- unique(criteriaMapped[domain_id=="Measurement"]$codes) 298 | 299 | for(c in measurement_codes) { 300 | patient_list[[c]] <- union(patient_list[[c]], pts_measurement[measurement_concept_id %in% code_map[[c]]]$person_id) 301 | } 302 | } 303 | 304 | if (!is.null(pts_device)) {#Device 305 | device_codes <- unique(criteriaMapped[domain_id=="Device"]$codes) 306 | 307 | for(c in device_codes) { 308 | patient_list[[c]] <- union(patient_list[[c]], pts_device[device_concept_id %in% code_map[[c]]]$person_id) 309 | } 310 | } 311 | 312 | if (!is.null(pts_drug)) { #Drug 313 | drug_codes = unique(criteriaMapped[domain_id=="Drug"]$codes) 314 | 315 | for(c in drug_codes) { 316 | patient_list[[c]] <- union(patient_list[[c]], pts_drug[drug_concept_id %in% code_map[[c]]]$person_id) 317 | } 318 | } 319 | 320 | if (!is.null(pts_procedure)) {#Procedure 321 | procedure_codes <- unique(criteriaMapped[domain_id=="Procedure"]$codes) 322 | 323 | for(c in procedure_codes) { 324 | patient_list[[c]] <- union(patient_list[[c]], pts_procedure[procedure_concept_id %in% code_map[[c]]]$person_id) 325 | } 326 | } 327 | 328 | # get intersected list 329 | patient_list_intersected = Reduce(intersect,patient_list) 330 | 331 | return(patient_list_intersected) 332 | 333 | } 334 | 335 | 336 | # add counts to search query concepts by unique patients 337 | #' @import dplyr data.table 338 | summarizeFoundConcepts <- function(pts_condition, pts_observation, pts_measurement, pts_device, pts_drug, pts_procedure){ 339 | 340 | conceptCount <- data.table(matrix(nrow=0,ncol=2)) 341 | colnames(conceptCount) <- c("concept_id","pt_count") 342 | 343 | summarizeConcepts <- function(tblname, colname) { 344 | tbl_concepts <- tblname %>% 345 | group_by_(colname) %>% 346 | summarise(COUNT = n()) 347 | tbl_concepts <- data.table(tbl_concepts) 348 | colnames(tbl_concepts) <- c("concept_id","pt_count") 349 | return(tbl_concepts) 350 | } 351 | 352 | 353 | if (!is.null(pts_condition)) { 354 | condition_concepts_count <- summarizeConcepts(pts_condition,"condition_concept_id") 355 | conceptCount <- rbind(conceptCount, condition_concepts_count) 356 | } 357 | 358 | if (!is.null(pts_observation)) { 359 | observation_concepts_count <- summarizeConcepts(pts_observation,"observation_concept_id") 360 | conceptCount <- rbind(conceptCount, observation_concepts_count) 361 | } 362 | 363 | if (!is.null(pts_measurement)) { 364 | measurement_concepts_count <- summarizeConcepts(pts_measurement,"measurement_concept_id") 365 | conceptCount <- rbind(conceptCount, measurement_concepts_count) 366 | } 367 | 368 | if (!is.null(pts_device)) { 369 | device_concepts_count <- summarizeConcepts(pts_device,"device_concept_id") 370 | conceptCount <- rbind(conceptCount, device_concepts_count) 371 | } 372 | 373 | if (!is.null(pts_drug)) { 374 | drug_concepts_count <- summarizeConcepts(pts_drug,"drug_concept_id") 375 | conceptCount <- rbind(conceptCount, drug_concepts_count) 376 | } 377 | 378 | if (!is.null(pts_procedure)) { 379 | procedure_concepts_count <- summarizeConcepts(pts_procedure,"procedure_concept_id") 380 | conceptCount <- rbind(conceptCount, procedure_concepts_count) 381 | } 382 | 383 | return(conceptCount) 384 | 385 | } 386 | 387 | 388 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | library(DBI) 2 | library(data.table) 3 | 4 | 5 | ### securely retrieve credentials stored in environment variables 6 | # ~/.Renviron 7 | 8 | 9 | ############################# 10 | ###### INITIALIZATION ####### 11 | ############################# 12 | 13 | # check credentials exist 14 | checkCredentialsExist <- function() { 15 | env_vars <- c("driver", "username", "password", "dbname", "host", "port") 16 | 17 | if (length(intersect(env_vars,names(Sys.getenv()))) == length(env_vars)) { 18 | pass <- TRUE 19 | 20 | # load required drivers 21 | if (tolower(Sys.getenv("driver"))=="mysql") { 22 | library(RMySQL) 23 | } else if (tolower(Sys.getenv("driver")) %in% c("oracle", "postgresql", "redshift", "sql server", "pdw", "bigquery")) { 24 | library(DatabaseConnector) 25 | library(SqlRender) 26 | } else { 27 | pass <- FALSE 28 | message("Invalid driver type, please select either: 'mysql', 'oracle', 'postgresql', 'redshift', 'sql server', 'pdw', 'bigquery'") 29 | } 30 | 31 | }else{ 32 | pass <- FALSE 33 | } 34 | 35 | return(pass) 36 | } 37 | 38 | 39 | # check that successful connection can be made to OMOP server 40 | checkOMOPconnection <- function() { 41 | 42 | status<- tryCatch( 43 | { 44 | if (tolower(Sys.getenv("driver"))=="mysql") { 45 | drv <- dbDriver(Sys.getenv("driver")) 46 | fullConnectString <- setConnectFunction() 47 | con <- eval(parse(text = fullConnectString)) 48 | } else { 49 | # creating connection object using DatabaseConnector 50 | con <- DatabaseConnector::connect(dbms = tolower(Sys.getenv("driver")), 51 | server = Sys.getenv("host"), 52 | user = Sys.getenv("username"), 53 | password = Sys.getenv("password"), 54 | schema = Sys.getenv("dbname"), 55 | port = Sys.getenv("port")) 56 | } 57 | }, 58 | warning = function(w) { 59 | # ignore 60 | }, 61 | error = function(e) { 62 | message("Unable to establish connection to OMOP server.") 63 | message(e) 64 | } 65 | ) 66 | 67 | if(!is.null(status)){ 68 | out <- TRUE 69 | message("Can successfully connect to OMOP server.") 70 | }else{ 71 | out <- FALSE 72 | } 73 | 74 | if (tolower(Sys.getenv("driver"))=="mysql") { 75 | on.exit(dbDisconnect(con)) 76 | } else { 77 | on.exit(DatabaseConnector::disconnect(con)) 78 | } 79 | return(out) 80 | 81 | } 82 | 83 | 84 | # check that relevant tables exist in OMOP database 85 | checkOMOPtables <- function() { 86 | 87 | necessaryTables = c("concept","concept_ancestor","concept_relationship","condition_occurrence","death","device_exposure","drug_exposure","measurement","observation","person","procedure_occurrence","visit_occurrence") 88 | 89 | if (tolower(Sys.getenv("driver"))=="mysql") { 90 | drv <- dbDriver(Sys.getenv("driver")) 91 | fullConnectString <- setConnectFunction() 92 | con <- eval(parse(text = fullConnectString)) 93 | } else { 94 | # creating connection object using DatabaseConnector 95 | con <- DatabaseConnector::connect(dbms = tolower(Sys.getenv("driver")), 96 | server = Sys.getenv("host"), 97 | user = Sys.getenv("username"), 98 | password = Sys.getenv("password"), 99 | schema = Sys.getenv("dbname"), 100 | port = Sys.getenv("port")) 101 | } 102 | 103 | foundTablesData <- tolower(dbListTables(con)) 104 | 105 | if (tolower(Sys.getenv("driver"))=="mysql") { 106 | on.exit(dbDisconnect(con)) 107 | } else { 108 | on.exit(DatabaseConnector::disconnect(con)) 109 | } 110 | 111 | 112 | missingTables <- FALSE 113 | 114 | for (tbls in necessaryTables) { 115 | if (!tbls %in% foundTablesData) { # check if table exists 116 | missingTables <- TRUE 117 | message(paste0("missing required table: " , tbls )) 118 | } else { # check if any data in found table 119 | if (tolower(Sys.getenv("driver"))=="mysql") { 120 | dataCheckQuery <- paste0("SELECT * FROM " , tbls , " LIMIT 1;") 121 | } else { 122 | dataCheckQuery <- paste0("SELECT TOP 1 * FROM " , tbls, ";") 123 | } 124 | dataCheck <- sqlQuery(dataCheckQuery) 125 | if (nrow(dataCheck)==0) { 126 | message(paste0("Warning: no data found in table ", tbls)) 127 | } 128 | } 129 | } 130 | 131 | if (missingTables == FALSE) { 132 | message("All required tables found!") 133 | return(TRUE) 134 | } else { 135 | return(FALSE) 136 | } 137 | 138 | } 139 | 140 | ############################# 141 | ###### INITIALIZATION ####### 142 | ############################# 143 | 144 | 145 | # .onLoad checks 146 | 147 | .onLoad <- function(...) { 148 | packageStartupMessage( 149 | paste0("Welcome to ROMOP: please refer to https://github.com/BenGlicksberg/ROMOP for detailed instructions on how to use package with examples.\n 150 | Current OutDirectory is set to ",getwd(), ". Please use changeOutDirectory function to set.\n e.g., changeOutDirectory('path/to/outdir', create = TRUE) \n 151 | Now checking for required credentials and server connection (note this package will not function without them). Please wait...\n") 152 | ) 153 | 154 | ### initialize outDirectory as current working directory 155 | options("outDirectory" = paste0(getwd(),"/")) 156 | 157 | ## Verify crednetials exist 158 | credentialsExist <- checkCredentialsExist() 159 | 160 | if (credentialsExist == TRUE) { # require credentials 161 | 162 | ## Verify connection 163 | successfulConnection <- checkOMOPconnection() 164 | 165 | if (successfulConnection == TRUE) { # require successful connection 166 | 167 | # check if relevant tables exist 168 | correctTables <- checkOMOPtables() 169 | 170 | if (correctTables == TRUE) { # require correct tables 171 | 172 | message("Success! Please create 'dataOntology' using the makeDataOntology function.\n e.g., dataOntology = makeDataOntology(declare=TRUE,store_ontology = TRUE)") 173 | 174 | } else { # end if correct tables 175 | message("Missing required tables; package will not funciton correctly.") 176 | } 177 | 178 | } else { # end if successful connection 179 | message("Unable to connect; package will not funciton correctly.") 180 | } 181 | 182 | } else { #endif credentials 183 | message("Please refer to the ReadMe to set and format server credentials in the .Renviron file.") 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ROMOP Readme 2 | ================ 3 | Benjamin S. Glicksberg 4 | 9/14/2018 5 | 6 | ## ROMOP 7 | 8 | ROMOP is a flexible R package to interface with the [Observational 9 | Health Data Sciences and Informatics (OHDSI)](https://www.ohdsi.org/) 10 | [OMOP Common Data Model](https://www.ohdsi.org/data-standardization/). 11 | Briefly, OMOP is a standardized relational database schema for 12 | Electronic Health Record (EHR) or Electronic Medical Record (EMR) data 13 | (i.e., patient data collected during clinical visits to a health 14 | system). The main benefit of a standardized schema is that it allows for 15 | interoperability between institutions, even if the underlying EHR 16 | vendors are disparate. 17 | 18 | For a detailed description of the OMOP common data model, please visit 19 | this [helpful wiki](https://github.com/OHDSI/CommonDataModel/wiki). 20 | 21 | In its backend, OMOP relies on standardized data ontologies and 22 | metathesaureses, such as the [Unified Medical Language System 23 | (UMLS)](https://www.nlm.nih.gov/research/umls/), and as such, the 24 | queries within ROMOP heavily rely on these vocabularies. 25 | [Athena](http://athena.ohdsi.org/) is a great tool to better understand 26 | the concepts in these ontologies and identify ideal search terms of 27 | interest. 28 | 29 | ![Features of 30 | ROMOP](www/figure1a_v3.png) 31 | 32 | Manuscript information: 33 | Glicksberg BS, Oskotsky B, Giangreco N, Thangaraj PM, Rudrapatna V, Datta D, Frazier R, Lee N, Larsen R, Tatonetti NP, Butte AJ. ROMOP: a light-weight R package for interfacing with OMOP-formatted electronic health record data. JAMIA open. 2019 Apr;2(1):10-4. 34 | 35 | ## Sandbox Server 36 | 37 | The Centers for Medicare and Medicaid Services (CMS) have released a 38 | synthetic clinical dataset 39 | [DE-SynPUF](https://www.cms.gov/Research-Statistics-Data-and-Systems/Downloadable-Public-Use-Files/SynPUFs/DE_Syn_PUF.html)) 40 | in the public domain with the aim of being reflective of the patient 41 | population but containing no protected health information. The OHDSI 42 | group has underwent the task of converting these data into the [OMOP CDM 43 | format](https://github.com/OHDSI/ETL-CMS). Users are certainly able to 44 | set up this configuration on their own system following the instructions 45 | on the GitHub page. We obtained all data files from the [OHDSI FTP 46 | server](ftp://ftp.ohdsi.org/synpuf) (accessed June 17th, 2018) and 47 | created the CDM (DDL and indexes) according to their [official 48 | instructions](https://github.com/OHDSI/CommonDataModel/tree/master/PostgreSQL), 49 | but modified for MySQL. For space considerations, we only uploaded one 50 | million rows of each of the data files. The sandbox server is a Rshiny 51 | server running as an Elastic Compute Cloud (EC2) instance on Amazon Web 52 | Services (AWS) querying a MySQL database server (AWS Aurora MySQL). 53 | 54 | ## Requirements 55 | 56 | #### Clinical Data 57 | 58 | ROMOP requires EHR data to be in OMOP format and on a server accessible 59 | to by the user. In it’s current form, ROMOP can connect to databases in 60 | *MySQL* using the RMySQL driver or many other formats, including 61 | *Oracle*, *PostgreSQL*, *Microsoft SQL Server*, *Amazon Redshift*, 62 | *Google BigQuery*, and *Microsoft Parallel Data Warehouse*, through 63 | utilization of the DatabaseConnector and SqlRender packages developed by 64 | the OHDSI group (see below). 65 | 66 | Users without access to EHR data might consider using synthetic public 67 | data following the instructions provided by the OHDSI group 68 | [here](https://github.com/OHDSI/ETL-CMS). 69 | 70 | #### Programming Language 71 | 72 | ROMOP is built in the R environment and developed on version 3.4.4 73 | (2018-03-15). 74 | 75 | ROMOP requires the following R packages: 76 | 77 | - [DBI](https://cran.r-project.org/web/packages/DBI/index.html) 78 | (developed on version 79 | 1.0.0) 80 | - [data.table](https://cran.r-project.org/web/packages/data.table/data.table.pdf) 81 | (developed on version 1.10.4-3). 82 | - [dplyr](https://dplyr.tidyverse.org/) (developed on version 0.7.4). 83 | 84 | Driver-specific: 85 | 86 | - [RMySQL](https://cran.r-project.org/web/packages/RMySQL/index.html) 87 | (developed on version 88 | 0.10.14). 89 | - [DatabaseConnector](https://cran.r-project.org/web/packages/DatabaseConnector/index.html) 90 | (developed on version 91 | 2.2.0) 92 | - [DatabaseConnectorJars](https://cran.r-project.org/web/packages/DatabaseConnectorJars/index.html) 93 | (developed on version 94 | 1.0.0) 95 | - [SqlRender](https://cran.r-project.org/web/packages/SqlRender/index.html) 96 | (developed on version 1.5.2) 97 | 98 | ## Installation 99 | 100 | ### Download 101 | 102 | ROMOP can be installed easily from github using the 103 | [devtools](https://cran.r-project.org/web/packages/devtools/index.html) 104 | package: 105 | 106 | library(devtools) 107 | install_github("BenGlicksberg/ROMOP") 108 | 109 | Alternatively, the package can be downloaded directly from the [github 110 | page](https://github.com/BenGlicksberg/ROMOP) and installed by the 111 | following steps: 112 | 113 | 1. Unzip ROMOP-master.zip 114 | 2. R CMD INSTALL ROMOP-master 115 | 116 | Please see the [Setup](#setup) section to properly configure the package 117 | to work. 118 | 119 |   120 | 121 | ### Setup 122 | 123 | #### Credentials 124 | 125 | In accordance with best practices for storing sensitive information, 126 | credentials are not saved in plain text but in the .Renviron file. A 127 | formatted .Renviron file is provided with the package with the following 128 | fields to fill in: 129 | 130 | ``` 131 | driver = "" 132 | host = "" 133 | username = "" 134 | password = "" 135 | dbname = "" 136 | port = "3306" 137 | ``` 138 | 139 | - driver (case insensitive): “mysql” for MySQL or (according to [OHDSI 140 | DatabaseConnector 141 | package](https://github.com/OHDSI/DatabaseConnector)) “postgresql” 142 | for PostgreSQL, “oracle” for Oracle, “sql server” for Microsoft SQL 143 | Server, “redshift” for Amazon Redshift, “pdw” for Microsoft Parallel 144 | Data Warehouse, or “bigquery” for Google BigQuery. 145 | - host (or server depending on database format) 146 | - dbname: OMOP EHR database name (or schema depending on database 147 | format) 148 | 149 | Note that this .Renviron file has to be in the same directory where R is 150 | launched. If already using an .Renviron file, add this information to 151 | it. 152 | 153 | #### Checks 154 | 155 | With credentials correctly configured, the package can be loaded. ROMOP 156 | will now check for 3 conditions to be met: 157 | 158 | 1. Check that the credentials exist and can be retrieved from .Renviron 159 | file: 160 | *requires driver, host, username, password, dbname, and port exist* 161 | 162 | 2. Check that connection to OMOP EHR server and database can be made: 163 | *uses the above credentails* 164 | 165 | 3. Check to ensure all required OMOP tables exist and contain (any) 166 | data: 167 | *the required tables 168 | are:* 169 | 170 | 171 | 172 | "concept","concept_ancestor","concept_relationship","condition_occurrence","death", 173 | "device_exposure","drug_exposure","measurement","observation","person","procedure_occurrence","visit_occurrence" 174 | 175 | - if any of the above tables are missing, a warning message will be 176 | produced and the package will not be able to load properly. 177 | - if any of the above tables exist, but do not contain any data, a 178 | warning message will be produced but the package will still be able 179 | to function. 180 | 181 | #### On start 182 | 183 | Successfully pasing all checks will allow the user to begin using ROMOP. 184 | 185 | 1. Set an output directory to use with the 186 | [changeOutDirectory](#changeoutdirectory) function (note: the 187 | default output directory will be declared on package load). 188 | 2. Create/load the Data ontology (required to decode data types) using 189 | the [makeDataOntology](#makedataontology). For the first time 190 | running this package, the concept ontology will have to first be 191 | built, but if the store\_ontology option is selected, the ontology 192 | will be saved as an .rds file for subsequent loading. 193 | 194 |   195 | 196 | ## Functions 197 | 198 | ### Utility 199 | 200 | #### getDemographics 201 | 202 | *Description*:  Retrieves and formats patient demographic data from the 203 | **person** and **death** tables. Option to restrict to patientlist of 204 | interest. 205 | 206 | *Usage*:  ptDemo \<- getDemographics(patient\_list=NULL,declare=TRUE) 207 | 208 | *Arguments*: 209 | 210 |   patient\_list         *comma-separated string of patient ids* 211 |          a provdied patientlist will restrict search to ids. NULL will 212 | return demographic data for all available patients 213 | 214 |   declare         *TRUE/FALSE* 215 |          if TRUE, outputs status and updates to the screen 216 | 217 | *Value*: 218 | 219 |   Returns a data.table with demographic data: person\_id, 220 | birth\_datetime, age, Gender, Race, Ethnicity, death\_date, Status 221 | (Alive/Deceased) 222 | 223 | *Details*: 224 | 225 | - patient\_list should be in the following format: “patient\_id\_1, 226 | patient\_id\_2, …” 227 | 228 |   229 | 230 | #### getEncounters 231 | 232 | *Description*:  Retrieves and formats patient encounter data from the 233 | **visit\_occurrence** table. Requires patientlist input. 234 | 235 | *Usage*:  ptEncs \<- getEncounters(patient\_list,declare=TRUE) 236 | 237 | *Arguments*: 238 | 239 |   patient\_list         *comma-separated string of patient ids* 240 |          searches for all encounter data for the patientlist inout. 241 | 242 |   declare         *TRUE/FALSE* 243 |          if TRUE, outputs status and updates to the screen 244 | 245 | *Value*: 246 | 247 |   Returns a data.table with encounter data: person\_id, 248 | visit\_occurrence\_id, visit\_start\_datetime, visit\_end\_datetime, 249 | visit\_source\_value, visit\_concept, visit\_source\_concept, 250 | admitting\_concept, discharge\_concept 251 | 252 | *Details*: 253 | 254 | - patient\_list should be in the following format: “patient\_id\_1, 255 | patient\_id\_2, …” 256 | 257 |   258 | 259 | #### getClinicalData 260 | 261 | *Description*:  Retrieves all relevant clinical data for individuals in 262 | a patientlist. Wrapper for domain-specific getData functions (which can 263 | also be used separately). 264 | 265 | *Usage*:  ptClinicalData \<- getClinicalData(patient\_list, 266 | declare=TRUE) 267 | 268 | *Arguments*: 269 | 270 |   patient\_list         *comma-separated string of patient ids* 271 |          a provdied patientlist will restrict search to ids. NULL will 272 | return demographic data for all available patients 273 | 274 |   declare         *TRUE/FALSE* 275 |          if TRUE, outputs status and updates to the screen 276 | 277 | *Value*: 278 |   Returns a list of data.tables stratified by domain type (e.g., 279 | ptClinicalData$Condition, ptClinicalData$Observation, etc…) 280 | 281 | *Details*: 282 | 283 | - patient\_list should be in the following format: “patient\_id\_1, 284 | patient\_id\_2, …” 285 | - getClinicalData calls domain-specific getData functions for the 286 | following domains: Observation, Condition, Procedure, Medication 287 | (Drug), Measurement, and Device. Each function can also be run 288 | individually (e.g, getConditions; getMedications). 289 | - In addition to datetimes, visit\_occurrence\_ids, 290 | \_concept\_ids and \_source\_concept\_ids, other 291 | domain-specific concepts and values are retrieved and mapped: 292 | - Observation: observation\_type\_concept, value\_as\_number, 293 | value\_as\_string, value\_as\_concept, unit\_source\_value 294 | - Condition: condition\_type\_concept, condition\_status 295 | - Procedure: procedure\_type\_concept, quantity, 296 | - Medication: drug\_type\_concept, stop\_reason, refills, 297 | quantity, days\_supply, sig, route\_concept, 298 | effective\_drug\_dose, dose\_unit\_concept, 299 | route\_source\_value, frequency, frequency\_unit, 300 | rx\_quantity\_unit\_source\_value 301 | - Measurement: measurement\_type\_concept, value\_as\_number, 302 | value\_as\_concept, unit\_concept 303 | - Device: device\_type\_concept 304 | 305 |   306 | 307 | #### findPatients 308 | 309 | *Description*:  Main function to identify patients based on clinical 310 | data inclusion (and exclusion, if desired) criteria. Flexible to allow 311 | for multiple data types, vocabularies, and concepts. 312 | 313 | *Usage*:   patientlist \<- findPatients(strategy\_in=“mapped”, 314 | vocabulary\_in, codes\_in, function\_in = “or”, strategy\_out = NULL, 315 | vocabulary\_out = NULL, codes\_out = NULL, function\_out = NULL, 316 | declare=FALSE, save=FALSE, out\_name=NULL) 317 | 318 | *Arguments*: 319 | 320 |   strategy\_in         *mapped* or *direct* 321 |          dictates the strategy for how inclusion criteria are treated 322 | (see Details). 323 | 324 |   vocabulary\_in         *vocabularies for inclusion criteria* 325 |          comma-separated string of relevant vocabularies for inclusion 326 | criteria (see Details). 327 | 328 |   codes\_in         *specific concept codes for inclusion criteria* 329 |          semi-colon separated string of code concepts for inclusion 330 | criteria, corresponding to the order for vocabulary\_in. Multiple codes 331 | can be used per vocabulary and should be comma-separated (see Details). 332 | 333 |   function\_in         *and* or *or* 334 |          dictates how multiple inclusion should be treated. *and* 335 | necessitates that all inclusion criteria are met (i.e., intersection), 336 | while *or* allows for any critera to be met (i.e., union) (see Details). 337 | 338 |   strategy\_out         *mapped* or *direct* or NULL (default) 339 |          dictates the strategy for how exclusion are treated. NULL 340 | indicates no exclusion criteria. 341 | 342 |   vocabulary\_out         *vocabularies for exclusion criteria* or NULL 343 | (default) 344 |          comma-separated string of relevant vocabularies for exclusion 345 | criteria. NULL indicates no exclusion criteria. 346 | 347 |   codes\_out         *specific concept codes for exclusion criteria* or 348 | NULL (default) 349 |          semi-colon separated string of code concepts for inclusion 350 | criteria, corresponding to the order for vocabulary\_out. Multiple codes 351 | can be used per vocabulary and should be comma-separated. NULL indicates 352 | no exclusion criteria. 353 | 354 |   function\_out         *and* or *or* or NULL 355 |          dictates how multiple exclusion should be treated. *and* 356 | necessitates that all exclusion criteria are met (i.e., intersection), 357 | while *or* allows for any critera to be met (i.e., union). NULL 358 | indicates no exclusion criteria. 359 | 360 |   declare         *TRUE/FALSE* 361 |          if TRUE, outputs status and updates to the screen. 362 | 363 |   save         *TRUE/FALSE* 364 |          if TRUE, various query output saved to outDirectory (see 365 | Details). 366 | 367 |   out\_name         *name assigned to search query* or NULL 368 |          if save == TRUE, saves query using provided name. If the 369 | provided name already exists as a directory (or is NULL), the directory 370 | defaults to datetime name (see Details). 371 | 372 | *Value*: 373 |   Returns a list of patients that meet inclusion criteria (and not 374 | exclusion criteria if entered). 375 | 376 | *Details*: 377 | 378 | - *direct* strategy queries the concepts directly by \_source\_concept 379 | in clinical tables. *mapped* maps to common ontology (via 380 | **concept\_synonym**) and identifies relevant descendants (via 381 | **concept\_ancestor**) to search for in \_concept fields. 382 | - the [exploreConcepts](#exploreconcepts) function can be used to find 383 | ideal concepts to search for. 384 | - vocabulary\_ input for multiple inputs should use relevant 385 | vocabularies (see [showDataTypes](#showdatatypes) ) as a 386 | comma-separated string, e.g., “ATC, ICD10CM, SNOMED”. 387 | - codes\_ input correspond to the order as the vocabulary\_ input and 388 | should be semi-comma separated string in the same order as above. 389 | Multiple terms per vocabulary type should be comma-separated. e.g., 390 | “A01A; K50, K51; 235599003” correspond to “A01A” for ATC, “K50” 391 | and “K51” for ICD10CM, and “235599003” for SNOMED. 392 | - function\_ corresponds to how criteria should be treated. *and* 393 | necessitates patients meet all criteria while *or* allows for 394 | patients to meet any of the criteria. 395 | - Please note that if no standard common concepts are found per search 396 | domain, a warning message will appear and the search will not be 397 | able to be performed (see [Helpful Hints](#helpful-hints) for more 398 | details.) 399 | - if save == TRUE, the following information is saved in a directory 400 | per query: 401 | - query: all arguments for the search. 402 | - \_criteria\_mapped: all original criteria for inclusion (and 403 | exclusion if applicable) that are mapped to dataOntology. 404 | - criteria\_mapped\_concepts: all mapped concepts used for 405 | inclusion (and exclusion if applicable) that are used to search 406 | in clinical data tables. Additionally, the pt\_count column 407 | displays the number of unique patients that have a record with 408 | the corresponding concept. 409 | - outcome: results of the search (most relevant when exclusion 410 | criteria are applied). 411 | - patient\_list: list of patients that meet inclusion (and not 412 | exclusion, if applicable) criteria. 413 | 414 |   415 | 416 | ### Misc. 417 | 418 | #### changeOutDirectory 419 | 420 | *Description*:   Sets the current outDirectory which will store the Data 421 | Ontology and all function output. Option to create directory if does not 422 | exist. 423 | 424 | *Usage*:  changeOutDirectory(outdir=“path/to/directory”, create=FALSE) 425 | 426 | *Arguments*: 427 |   outdir         directory path 428 | 429 |   create         *TRUE/FALSE* 430 |          will create the directory if it does not exist 431 | 432 | *Value*: 433 |    Nothing returned; simply sets (and creates if set to) output 434 | directory 435 | 436 | *Details*: 437 | 438 | - If directory does not exist and create=FALSE, a warning message will 439 | appear and the output directory will not be changed. 440 | 441 |   442 | 443 | #### makeDataOntology 444 | 445 | *Description*:  Creates general Data Ontology used by all data tables 446 | from the **concept** table. Option to save/load. 447 | 448 | *Usage*:  dataOntology \<- 449 | makeDataOntology(declare=TRUE,store\_ontology=FALSE) 450 | 451 | *Arguments*: 452 |   declare         *TRUE/FALSE* 453 |          if TRUE, outputs status and updates to the screen 454 | 455 |   store\_ontology         *TRUE/FALSE* 456 |          if TRUE, will save/load the ontology instead of active querying 457 | 458 | *Value*: 459 |   Returns a data.table with concept data. 460 | 461 | *Details*: 462 | 463 | - Generating the Data Ontology takes ~31.2 secs and is ~491.6 Mb. 464 | - If declare == TRUE, the following information will be returned: 465 | 466 | 467 | 468 | Retrieving concept data... 469 | Concept data loaded; data found for: 470 | ## unique domains. 471 | ## unique vocabularies. 472 | ### unique concept classes. 473 | 474 | - If store\_ontology == TRUE, attempts to load from memory (in the 475 | outDirectory) and saves if does not exist (~53 Mb). Loading takes ~8 476 | secs. 477 | 478 |   479 | 480 | #### summarizeDemographics 481 | 482 | *Description*:  Summarizes patient demographic data from the 483 | [getDemographics](#getdemographics) function. 484 | 485 | *Usage*:  summarizeDemographics(ptDemo) 486 | 487 | *Arguments*: 488 | 489 |   ptDemo         *patient demographics table* 490 |          ptDemo is the patient demographics object from the 491 | getDemographics function output 492 | 493 | *Value*: 494 | 495 |   N/A; outputs message with descriptive summary statistics for the 496 | relevant patient demographic data. 497 | 498 |   499 | 500 | #### showDataTypes 501 | 502 | *Description*:  Details relevant vocabularies per domain. Requires 503 | dataOntology to have been created (via 504 | [makeDataOntology](#makedataontology)). 505 | 506 | *Usage*:  showDataTypes() 507 | 508 | *Arguments*: 509 | 510 | N/A 511 | 512 | *Value*: 513 | 514 |   Returns a table of vocabularies contained within clinical domains: 515 | Condition, Observation, Measurement, Device, Procedure, Drug. 516 | 517 |   518 | 519 | #### exploreConcepts 520 | 521 | *Description*:  For given vocabulary and concept, returns the mapped 522 | standard concept(s) as well as decendent concept(s) 523 | 524 | *Usage*:  conceptsInfo \<- exploreConcepts(vocabulary, codes) 525 | 526 | *Arguments*: 527 | 528 |   vocabulary         *vocabulary* 529 |          comma-separated string of relevant vocabularies for inclusion 530 | criteria (see Details). 531 | 532 |   codes         *concept codes* 533 |          semi-colon separated string of code concepts for inclusion 534 | criteria, corresponding to the order for vocabulary. Multiple codes can 535 | be used per vocabulary and should be comma-separated (see Details). 536 | 537 | *Value*: 538 | 539 |   Returns a table of concepts contained under (i.e., below in the 540 | heirarchy) the query concept. 541 | 542 | *Details*: 543 | 544 | - vocabulary input for multiple inputs should use relevant 545 | vocabularies (see [showDataTypes](#showdatatypes) ) as a 546 | comma-separated string, e.g., “ATC, ICD10CM”. 547 | - codes input correspond to the order as the vocabulary input and 548 | should be semi-comma separated string in the same order as above. 549 | Multiple terms per vocabulary type should be comma-separated. e.g., 550 | “A01A; K50, K51” correspond to “A01A” for ATC and “K50” and “K51” 551 | for ICD10CM. 552 | 553 |   554 | 555 | ## Examples 556 | 557 | Both simple and advanced [findPatients](#findpatients) queries will be 558 | outlined. See the [Output](#output) section for description of output if 559 | save == TRUE. For the process timing provided, all queries were run on 560 | an Amazon Elastic Compute Cloud (EC2) instance. 561 | 562 | ### Simple 563 | 564 | 1. Disease category (ICD10CM): find all “Type 2 Diabetes Mellitus” 565 | patients (E11) 566 | 567 | Here we will set a single inclusion criterion. The inclusion vocbulary 568 | is set to *ICD10CM* and the inclusion code is *E11* corresponding to the 569 | vocabulary. Because the inclusion strategy is set as “mapped”, ROMOP 570 | will map the ICD10CM code to a common ontology (SNOMED) term and find 571 | all descendants to search for (see [Code Breakdown](#code-breakdown) for 572 | details on how this 573 | works). 574 | 575 | *query* 576 | 577 | ``` 578 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ICD10CM", codes_in = "E11") 579 | ``` 580 | 581 | *time*: 15.3 secs 582 | 583 |   584 | 585 | 2. Specific disease (ICD9CM): find all patients with “Diabetes with 586 | ketoacidosis, type I \[juvenile type\], not stated as uncontrolled” 587 | **only** (250.11) 588 | 589 | Here we will search for patients that have the specific *ICD9CM* code 590 | *250.11* **only**, i.e., not map to common ontology (see [Code 591 | Breakdown](#code-breakdown) for the importance of this 592 | distiction). 593 | 594 | *query* 595 | 596 | ``` 597 | patient_list = findPatients(strategy_in="direct", vocabulary_in = "ICD9CM", codes_in = "250.11") 598 | ``` 599 | 600 | *time*: 1.1 min 601 | 602 |   603 | 604 | 3. Multiple diseases (ICD10CM): find all patients with “Essential 605 | (primary) hypertension” (I10) **and** “Angina pectoris with 606 | documented spasm” (I20.1) 607 | 608 | Here we will search for patients that have the multiple ICD10CM codes. 609 | While we put a single inclusion vocabulary, we will put two inclusion 610 | codes separated by a comma. Also we set the inclusion function to “and” 611 | which requires **both** criteria to be 612 | met. 613 | 614 | *query* 615 | 616 | ``` 617 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ICD10CM", codes_in = "I10, I20.1", function_in = "and") 618 | ``` 619 | 620 | *time*: 23.8 secs 621 | 622 |   623 | 624 | 4. Drug class (ATC): find all patients prescribed with any “Serotonin 625 | receptor antagonists” (A03AE) 626 | 627 | Here we will search for patients by drug ATC code. As the inclusion 628 | strategy is set to “mapped”, all drugs that fall into this category will 629 | automatically be identified and searched for (see [Code 630 | Breakdown](#code-breakdown) for details on how this 631 | works). 632 | 633 | *query* 634 | 635 | ``` 636 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ATC", codes_in = "A03AE") 637 | ``` 638 | 639 | *time*: 1.1 secs 640 | 641 |   642 | 643 | 5. Disease category (ICD10CM) but not Drug (MeSH): find all patients 644 | with “Other anxiety disorders” (F31), but *not* prescribed with 645 | “Clonazepam” (D002998) 646 | 647 | Here we will search for patients by ICD10CM code as before. We also 648 | identify all patients prescribed with the MeSH term for “Clonazepam”, 649 | which will be removed from the original 650 | list. 651 | 652 | *query* 653 | 654 | ``` 655 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ICD10CM", codes_in = "F41", strategy_out="mapped", vocabulary_out = "MeSH", codes_out = "D002998", function_out = "and") 656 | ``` 657 | 658 | *time*: 16.5 secs 659 | 660 |   661 | 662 | ### Advanced 663 | 664 | 1. Multiple disease categories (ICD10CM) and lab test (LOINC) but not 665 | multiple disease categories (ICD10CM) nor drug class (RxNorm): find 666 | all patients with “Crohn’s disease” (F31) and “Malignant neoplasm of 667 | prostate” (C61) with “CBC W Auto Differential panel - Blood” 668 | (57021-8), but *not* “Gastroenteritis and colitis due to radiation” 669 | (K52.0) nor “Allergic and dietetic gastroenteritis and colitis” 670 | (K52.2) nor prescribed with any “Aminosalicylate” (113374) 671 | 672 | Here we will search for patients by ICD10CM code as before. We also 673 | identify all patients prescribed with the MeSH term for “Clonazepam”, 674 | which will be removed from the original list. 675 | 676 | *query* 677 | 678 | ``` 679 | vocabulary_in = "ICD10CM, LOINC" 680 | codes_in = "K50;C61, 57021-8" 681 | vocabulary_out = "ICD10CM, RxNorm" 682 | codes_out = "K52.0; K52.2, 113374" 683 | 684 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = vocabulary_in, codes_in = codes_in, function_in = "and", strategy_out="mapped", vocabulary_out = vocabulary_out, codes_out = codes_out, function_out = "or") 685 | ``` 686 | 687 | *time*: 5.9 mins 688 | 689 |   690 | 691 | ## Output 692 | 693 | All output is saved in the output directory (use 694 | [changeOutDirectory](#changeoutdirectory) to set). Additionally, the 695 | data ontology file will be loaded from here and saved if set to using 696 | the makeDataOntology\](\#makedataontology) function. 697 | 698 | If save==TRUE is selected for [findPatients](#findPatients) queries, 699 | various information will be saved in a created query-specific directory 700 | within the outDirectory: 701 | \+ query: all arguments for the search. + \_criteria\_mapped: all 702 | original criteria for inclusion (and exclusion if applicable) that are 703 | mapped to dataOntology. + criteria\_mapped\_concepts: all mapped 704 | concepts used for inclusion (and exclusion if applicable) that are used 705 | to search in clinical data tables. Additionally, the pt\_count column 706 | displays the number of unique patients that have a record with the 707 | corresponding concept. 708 | \+ outcome: results of the search (most relevant when exclusion criteria 709 | are applied). 710 | \+ patient\_list: list of patients that meet inclusion (and not 711 | exclusion, if applicable) criteria. 712 | 713 | We will detail the respective output files that are derived from Simple 714 | [Examples](#examples) \#5: 715 | 716 | ### query.txt 717 | 718 | cat query.txt 719 | 720 | inclusion strategy: mapped 721 | inclusion vocabularies: ICD10CM 722 | inclusion codes: F41 723 | inclusion function: or 724 | exclusion strategy: mapped 725 | exclusion vocabularies: MeSH 726 | exclusion codes: D002998 727 | exclusion function: and 728 | 729 | ### inclusion\_criteria\_mapped.txt 730 | 731 | ``` 732 | cat inclusion_criteria_mapped.txt 733 | ``` 734 | 735 | codes vocabularies concept_id concept_name domain_id vocabulary_id concept_class_id 736 | F41 ICD10CM 1568230 Other anxiety disorders Condition ICD10CM 3-char nonbill code 737 | 738 | ### inclusion\_criteria\_mapped\_concepts.txt 739 | 740 | ``` 741 | head inclusion_criteria_mapped_concepts.txt 742 | ``` 743 | 744 | descendant_concept_id ancestor_concept_id concept_name domain_id vocabulary_id concept_class_id concept_code pt_count 745 | 381537 442077 Organic anxiety disorder Condition SNOMED Clinical Finding 17496003 NA 746 | 432600 442077 Stress reaction causing mixed disturbance of emotion and conduct Condition SNOMED Clinical Finding 192044009 NA 747 | 433178 442077 Anxiety disorder of childhood OR adolescence Condition SNOMED Clinical Finding 109006 NA 748 | 434613 442077 Generalized anxiety disorder Condition SNOMED Clinical Finding 21897009 NA 749 | 434628 442077 Separation anxiety Condition SNOMED Clinical Finding 126943008 NA 750 | 436074 442077 Panic disorder Condition SNOMED Clinical Finding 371631005 NA 751 | 436390 442077 Psychogenic rumination Condition SNOMED Clinical Finding 192014006 NA 752 | 436676 442077 Posttraumatic stress disorder Condition SNOMED Clinical Finding 47505003 NA 753 | 437537 442077 Shyness disorder of childhood Condition SNOMED Clinical Finding 83253003 NA 754 | 755 | ### exclusion\_criteria\_mapped.txt 756 | 757 | ``` 758 | cat exclusion_criteria_mapped.txt 759 | ``` 760 | 761 | codes vocabularies concept_id concept_name domain_id vocabulary_id concept_class_id 762 | D002998 MeSH 45612901 Clonazepam Drug MeSH Main Heading 763 | 764 | ### exclusion\_criteria\_mapped\_concepts.txt 765 | 766 | ``` 767 | head exclusion_criteria_mapped_concepts.txt 768 | ``` 769 | 770 | descendant_concept_id ancestor_concept_id concept_name domain_id vocabulary_id concept_class_id concept_code pt_count 771 | 798874 798874 Clonazepam Drug RxNorm Ingredient 2598 NA 772 | 798875 798874 Clonazepam 0.5 MG Oral Tablet Drug RxNorm Clinical Drug 197527 NA 773 | 798876 798874 Clonazepam 1 MG Oral Tablet Drug RxNorm Clinical Drug 197528 NA 774 | 798877 798874 Clonazepam 2 MG Oral Tablet Drug RxNorm Clinical Drug 197529 NA 775 | 798893 798874 Clonazepam 0.125 MG Oral Tablet [Klonopin] Drug RxNorm Branded Drug 211761 NA 776 | 798894 798874 Clonazepam 0.25 MG Oral Tablet [Klonopin] Drug RxNorm Branded Drug 211762 NA 777 | 798896 798874 Clonazepam 1 MG/ML Injectable Solution Drug RxNorm Clinical Drug 249943 NA 778 | 798897 798874 Clonazepam 0.5 MG Drug RxNorm Clinical Drug Comp 315699 NA 779 | 798899 798874 Clonazepam 2 MG Drug RxNorm Clinical Drug Comp 317336 NA 780 | 781 | ### outcome.txt 782 | 783 | ``` 784 | cat outcome.txt 785 | ``` 786 | 787 | # patients found from the inclusion criteria ONLY. 788 | # patients found from the exclusion criteria ONLY. 789 | # overlapping patients excluded from the original inclusion input based on the exclusion criteria. 790 | # patients found that meet the inclusion and exclusion criteria. 791 | 792 | ### patient\_list.txt 793 | 794 | ``` 795 | head patient_list.txt 796 | ``` 797 | 798 | patient_list 799 | 1 800 | 2 801 | 3 802 | 803 | ## Code Breakdown 804 | 805 | ![Workflow of ROMOP 806 | functionality](www/figure1b_v3.png) 807 | 808 | ROMOP first requires the creation a data dictionary (using 809 | [makeDataOntology](#makedataontology) function) of the ontology (from 810 | *concept* table) that is referenced and utilized to map to all concepts 811 | for all functions. Using this ontology, all searches and extractions are 812 | optimized to only query tables in which the data could be found. 813 | 814 | ### Data Retrieval 815 | 816 | The majority of data in clinical tables are stored as concepts. When 817 | data is extracted, ROMOP first maps the relevant concepts (e.g., 818 | device\_type\_concept\_id) to the data dictionary and then returns the 819 | mapped concepts to the user. 820 | 821 | ### Searching 822 | 823 | In the OMOP data structure, there is a distinction between how concepts 824 | are recorded and what can be directly searched for. For instance, if the 825 | user is interested in the medication idelalisib, it is not possible to 826 | directly identify records by searching for the general concept (e.g., 827 | RxNorm code 1544460) as the data are recorded by the bottom-most (i.e., 828 | most specific) concepts of the hierarchy (e.g., idelalisib 150 MG 829 | Delayed Release Oral Tablet). The hierarchical structure of these 830 | concepts in the OMOP CDM back-end, however, facilitates more powerful 831 | searches. In most extracted EHR systems, the user has to define all 832 | medications to search, for instance through a pre-populated list or by 833 | wildcard string matching (e.g., all drug names LIKE “%statin%”). This 834 | strategy is ultimately not ideal as it is not extensible to other 835 | systems (e.g., one system might prescribe a version or formulation of a 836 | drug that is in not in another) and requires extensive manual 837 | quality-control (e.g., removing “nystatin” drugs from the string 838 | matching results). For the [findPatients](#findpatients) function, if 839 | the “mapped” option is selected, searching for a broad code like ATC 840 | level 3 code A05A (bile therapies), or even a specific term code like 841 | RxNorm code 1544460 for idelalisib, will automatically identify and 842 | query for all bottom-level (e.g., idelalisib 150 MG Delayed Release Oral 843 | Tablet) codes contained underneath that seed concept. This works by 844 | ROMOP first mapping the initial search criteria to a standard concept 845 | (SNOMED or RxNorm) and finding all descendants underneath it. Another 846 | benefit to this “mapped” option is that terms are not reliant on how the 847 | data were originally entered. For instance, if a health system switches 848 | from ICD-9CM to ICD-10CM coding, there might be discrepancies in 849 | prevalence of codes over time. Mapping to a common concept, however, 850 | often alleviates this issue as codes from both vocabularies are 851 | typically linked to a common code in the standard vocabulary. Of course 852 | the user can search for the concepts they entered only using the 853 | “direct” option (i.e., search for ICD-9CM code 230.0 only). 854 | 855 | ## Helpful Hints 856 | 857 | - We recommend using the *mapped* argument for the 858 | [findPatients](#findpatients) function because the concepts will not 859 | depend on by which format the data was entered (i.e., the 860 | *source\_concept*). This is important as diffierent institutions may 861 | utilize different underlying terminologies, as well as switch 862 | primary data entry vocabularies over time (i.e., the switch from 863 | ICD-9 to ICD-10). For example, if the user is interested in 864 | “Trigeminal neuralgia”, using the ICD-10 code “G50.1” with the 865 | *direct* argument, all prior entries that utilized the corresponding 866 | ICD-9 code (“350.1”) most likely will not be found as many data 867 | warehouses do not “back-map” codes. Using the *mapped* argument will 868 | bypass this issue as the standard concept will be used which should 869 | capture both options. 870 | - Standard vocabularies: while the OMOP common data model utilizes 871 | many ontologies, **SNOMED** and **RxNorm** are used primarily for 872 | common concepts in the clincal data tables. As such, while any 873 | vocabulary can be used for [findPatients](#findpatients), the 874 | *mapped* function will only be able to find data contained within 875 | the following common concepts per domain: 876 | 877 | 878 | 879 | ## domain_type concepts 880 | ## 1 Measurement LOINC,SNOMED,CPT4 881 | ## 2 Condition SNOMED 882 | ## 3 Drug RxNorm,CPT4,NDC 883 | ## 4 Observation SNOMED,CPT4,LOINC,HCPCS 884 | ## 5 Device SNOMED,HCPCS 885 | ## 6 Procedure SNOMED,CPT4,HCPCS 886 | 887 | Consequently, if inclusion/exclusion criteria can be be mapped to the 888 | data ontology, but no synonym/descendants are contained within the above 889 | common concepts, no search will be performed (as no patients would be 890 | returned). This most directly affects searching for *Drug* concepts, in 891 | which we reccommend not using standard common concepts (e.g., RxNorm, 892 | ATC) for search criteria. 893 | 894 | - To ensure complete capture of data concepts of interest, we 895 | recommend identifying multiple vocabulary/codes to use using the 896 | [Athena](http://athena.ohdsi.org/search-terms/terms) resource. For 897 | instance, if interested in finding all individuals taking a 898 | Benzodiazepine, consider using both the relevant ATC classes (e.g., 899 | N03AE) as well as the relevant Substance (SNOMED) codes (e.g., 900 | 16047007). The [exploreConcepts](#exploreconcepts) function can be 901 | used to identify and prioiritize which codes are optimal to use. 902 | 903 | ## License 904 | 905 | MIT License 906 | 907 | Copyright (c) 2018 Benjamin S. Glicksberg 908 | 909 | Permission is hereby granted, free of charge, to any person obtaining a 910 | copy of this software and associated documentation files (the 911 | “Software”), to deal in the Software without restriction, including 912 | without limitation the rights to use, copy, modify, merge, publish, 913 | distribute, sublicense, and/or sell copies of the Software, and to 914 | permit persons to whom the Software is furnished to do so, subject to 915 | the following conditions: 916 | 917 | The above copyright notice and this permission notice shall be included 918 | in all copies or substantial portions of the Software. 919 | 920 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS 921 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 922 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 923 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 924 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 925 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 926 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 927 | 928 | ## Contact 929 | 930 | For questions, comments, errors, bug reports, or issues, please contact: 931 | 932 | For general correspondance, please contact: 933 | -------------------------------------------------------------------------------- /ROMOP.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man/changeOutDirectory.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/changeOutDirectory.R 3 | \name{changeOutDirectory} 4 | \alias{changeOutDirectory} 5 | \title{Change outDirectory} 6 | \usage{ 7 | changeOutDirectory(outdir, create = FALSE) 8 | } 9 | \arguments{ 10 | \item{outdir}{directory path} 11 | 12 | \item{create}{TRUE/FALSE (will create the directory if it does not exist)} 13 | } 14 | \value{ 15 | none (called for side effect: sets outDirectory) 16 | } 17 | \description{ 18 | Sets the current outDirectory which will store the Data Ontology and all function output. Option to create directory if does not exist. 19 | } 20 | \examples{ 21 | changeOutDirectory(outdir=“.”, create=FALSE) 22 | } 23 | -------------------------------------------------------------------------------- /man/exploreConcepts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exploreConcepts.R 3 | \name{exploreConcepts} 4 | \alias{exploreConcepts} 5 | \title{Extract synonyms and descendants for concepts of interest.} 6 | \usage{ 7 | exploreConcepts(vocabulary, codes) 8 | } 9 | \arguments{ 10 | \item{vocabulary}{Comma-separated string of relevant vocabularies for inclusion criteria} 11 | 12 | \item{codes}{Semi-colon separated string of code concepts for inclusion criteria, corresponding to the order for vocabulary. Multiple codes can be used per vocabulary and should be comma-separated.} 13 | } 14 | \value{ 15 | Returns a table of concepts contained under (i.e., below in the heirarchy) the query concept. 16 | } 17 | \description{ 18 | For given vocabulary and concept, returns the mapped standard concept(s) as well as decendent concept(s). Requires dataOntology to have been created (makeDataOntology funciton). 19 | } 20 | \examples{ 21 | conceptsInfo <- exploreConcepts(vocabulary = “ATC, ICD10CM”, codes = “A01A; K50, K51”) 22 | } 23 | -------------------------------------------------------------------------------- /man/findPatients.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/findPatients.R 3 | \name{findPatients} 4 | \alias{findPatients} 5 | \title{Find patients based on clinical critera} 6 | \usage{ 7 | findPatients(strategy_in = "mapped", vocabulary_in, codes_in, 8 | function_in = "or", strategy_out = NULL, vocabulary_out = NULL, 9 | codes_out = NULL, function_out = NULL, declare = FALSE, save = FALSE, 10 | out_name = NULL) 11 | } 12 | \arguments{ 13 | \item{strategy_in}{"mapped" or "direct" (dictates the strategy for how inclusion criteria are treated. "direct" searches for codes as provided, "mapped" maps criteria to standard concepts and finds descendants.} 14 | 15 | \item{vocabulary_in}{vocabularies for inclusion criteria (comma-separated string of vocabularies)} 16 | 17 | \item{codes_in}{specific concept codes for inclusion criteria (semi-colon separated string of code concepts, corresponding to the order for vocabulary_in. Multiple codes can be used per vocabulary and should be comma-separated.)} 18 | 19 | \item{function_in}{"and" or "or" (dictates how multiple inclusion should be treated. "and" necessitates that all inclusion criteria are met (i.e., intersection), while "or" allows for any critera to be met (i.e., union) )} 20 | 21 | \item{strategy_out}{"mapped", "direct", or NULL (default) (dictates the strategy for how exclusion are treated. NULL indicates no exclusion criteria.)} 22 | 23 | \item{vocabulary_out}{vocabularies for exclusion criteria or NULL (default) (comma-separated string of relevant vocabularies for exclusion criteria. NULL indicates no exclusion criteria)} 24 | 25 | \item{codes_out}{specific concept codes for exclusion criteria or NULL (default) (semi-colon separated string of code concepts for inclusion criteria, corresponding to the order for vocabulary_out. Multiple codes can be used per vocabulary and should be comma-separated. NULL indicates no exclusion criteria.)} 26 | 27 | \item{function_out}{"and", "or", or NULL (default) (dictates how multiple exclusion should be treated. and necessitates that all exclusion criteria are met (i.e., intersection), while or allows for any critera to be met (i.e., union). NULL indicates no exclusion criteria. )} 28 | 29 | \item{declare}{TRUE/FALSE will output status and data information during the process} 30 | 31 | \item{save}{TRUE/FALSE whether intermediate components of the search should be saved (e.g., mapped concepts found with unique patient counts per concept).} 32 | 33 | \item{out_name}{name assigned to search query or NULL (if save = TRUE, saves query using provided name. If the provided name already exists as a directory (or is NULL), the directory defaults to datetime name)} 34 | } 35 | \value{ 36 | List of patients that meet inclusion criteria (and not exclusion criteria if entered). 37 | } 38 | \description{ 39 | Identify patients based on clinical data inclusion (and exclusion, if desired) criteria. Flexible to allow for multiple data types, vocabularies, and concepts. 40 | } 41 | \examples{ 42 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ICD10CM", codes_in = "F41", strategy_out="mapped", vocabulary_out = "MeSH", codes_out = "D002998", function_out = "and") 43 | } 44 | -------------------------------------------------------------------------------- /man/getClinicalData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getClinicalData.R 3 | \name{getClinicalData} 4 | \alias{getClinicalData} 5 | \title{Retrieves all patient clinical data} 6 | \usage{ 7 | getClinicalData(patient_list, declare = FALSE) 8 | } 9 | \arguments{ 10 | \item{patient_list}{Comma-separated string of patient ids} 11 | 12 | \item{declare}{TRUE/FALSE will output status and data information during the process} 13 | } 14 | \value{ 15 | a list of tables for each all data within each domain (e.g., Condition) for all patients provided (can access by ptClinicalData$Condition). 16 | } 17 | \description{ 18 | Wrapper for domain-specific getData functions (e.g., getObservations). Produces a list of tables for all relevant domains. 19 | } 20 | \examples{ 21 | ptClinicalData <- getClinicalData("1,2", declare=TRUE) 22 | } 23 | -------------------------------------------------------------------------------- /man/getConditions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getClinicalData.R 3 | \name{getConditions} 4 | \alias{getConditions} 5 | \title{Retrieves all patient clinical data from Condition table} 6 | \usage{ 7 | getConditions(patient_list, declare = FALSE) 8 | } 9 | \arguments{ 10 | \item{patient_list}{Comma-separated string of patient ids} 11 | 12 | \item{declare}{TRUE/FALSE will output status and data information during the process} 13 | } 14 | \value{ 15 | a table of relevant clinical data contained with in the 'condition_occurrence' table 16 | } 17 | \description{ 18 | Produces a table for relevant concepts contained in the 'condition_occurrence' table mapped through the data ontology for a patient list. Data retrieved include: condition_type, condition_status, etc. 19 | } 20 | \examples{ 21 | ptCondData <- getConditions("1,2", declare=TRUE) 22 | } 23 | -------------------------------------------------------------------------------- /man/getDemographics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getDemographics.R 3 | \name{getDemographics} 4 | \alias{getDemographics} 5 | \title{Retrieves patient demographic data} 6 | \usage{ 7 | getDemographics(patient_list = NULL, declare = FALSE) 8 | } 9 | \arguments{ 10 | \item{patient_list}{NULL or comma-separated string of patient ids. A provdied patient_list will restrict search to ids. NULL will return demographic data for all available patients.} 11 | 12 | \item{declare}{TRUE/FALSE will output status and data information during the process} 13 | } 14 | \value{ 15 | table of mapped demographic concepts for all patients or specific ones for a provided csv formmated string of ids 16 | } 17 | \description{ 18 | Compiles demographic data for all patients or a given patient list if provided. Concepts are mapped through the created data ontology. Demographic data are retrieved from 'person' and 'death' tables and include: birthdate, deathdate, gender, ethnicity, and race. 19 | } 20 | \examples{ 21 | ptDemo <- getDemographics(patient_list=NULL,declare=TRUE) 22 | } 23 | -------------------------------------------------------------------------------- /man/getDevices.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getClinicalData.R 3 | \name{getDevices} 4 | \alias{getDevices} 5 | \title{Retrieves all patient clinical data from Device table} 6 | \usage{ 7 | getDevices(patient_list, declare = FALSE) 8 | } 9 | \arguments{ 10 | \item{patient_list}{Comma-separated string of patient ids} 11 | 12 | \item{declare}{TRUE/FALSE will output status and data information during the process} 13 | } 14 | \value{ 15 | a table of relevant clinical data contained with in the 'device_exposure' table 16 | } 17 | \description{ 18 | Produces a table for relevant concepts contained in the 'device_exposure' table mapped through the data ontology for a patient list. Data retrieved include: device_type, etc. 19 | } 20 | \examples{ 21 | ptDeviceData <- getDevices("1,2", declare=TRUE) 22 | } 23 | -------------------------------------------------------------------------------- /man/getEncounters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getEncounters.R 3 | \name{getEncounters} 4 | \alias{getEncounters} 5 | \title{Retrieves patient clinical encounter data} 6 | \usage{ 7 | getEncounters(patient_list, declare = FALSE) 8 | } 9 | \arguments{ 10 | \item{patient_list}{Comma-separated string of patient ids} 11 | 12 | \item{declare}{TRUE/FALSE will output status and data information during the process} 13 | } 14 | \value{ 15 | table of mapped encounter concepts for specific patients contained in a provided csv formmated string of ids. 16 | } 17 | \description{ 18 | Compiles encounter data for a given patient list. Concepts are mapped through the created data ontology. Encounter data are retrieved from visit_occurence table and include: visit_type, encounter_type, etc. 19 | } 20 | \examples{ 21 | ptEncs <- getEncounters("1,2,3,4",declare=TRUE) 22 | } 23 | -------------------------------------------------------------------------------- /man/getMeasurements.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getClinicalData.R 3 | \name{getMeasurements} 4 | \alias{getMeasurements} 5 | \title{Retrieves all patient clinical data from Measurement table} 6 | \usage{ 7 | getMeasurements(patient_list, declare = FALSE) 8 | } 9 | \arguments{ 10 | \item{patient_list}{Comma-separated string of patient ids} 11 | 12 | \item{declare}{TRUE/FALSE will output status and data information during the process} 13 | } 14 | \value{ 15 | a table of relevant clinical data contained with in the 'measurement' table 16 | } 17 | \description{ 18 | Produces a table for relevant concepts contained in the 'measurement' table mapped through the data ontology for a patient list. Data retrieved include: measurement_type, value, unit, etc. 19 | } 20 | \examples{ 21 | ptMeasData <- getMeasurements("1,2", declare=TRUE) 22 | } 23 | -------------------------------------------------------------------------------- /man/getMedications.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getClinicalData.R 3 | \name{getMedications} 4 | \alias{getMedications} 5 | \title{Retrieves all patient clinical data from Medications table} 6 | \usage{ 7 | getMedications(patient_list, declare = FALSE) 8 | } 9 | \arguments{ 10 | \item{patient_list}{Comma-separated string of patient ids} 11 | 12 | \item{declare}{TRUE/FALSE will output status and data information during the process} 13 | } 14 | \value{ 15 | a table of relevant clinical data contained with in the 'drug_exposure' table 16 | } 17 | \description{ 18 | Produces a table for relevant concepts contained in the 'drug_exposure' table mapped through the data ontology for a patient list. Data retrieved include: drug_type, route, etc. 19 | } 20 | \examples{ 21 | ptsMedsData <- getMedications("1,2", declare=TRUE) 22 | } 23 | -------------------------------------------------------------------------------- /man/getObservations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getClinicalData.R 3 | \name{getObservations} 4 | \alias{getObservations} 5 | \title{Retrieves all patient clinical data from Observations table} 6 | \usage{ 7 | getObservations(patient_list, declare = FALSE) 8 | } 9 | \arguments{ 10 | \item{patient_list}{Comma-separated string of patient ids} 11 | 12 | \item{declare}{TRUE/FALSE will output status and data information during the process} 13 | } 14 | \value{ 15 | a table of relevant clinical data contained with in the 'observation' table 16 | } 17 | \description{ 18 | Produces a table for relevant concepts contained in the 'observation' table mapped through the data ontology for a patient list. Data retrieved include: observation_type, value, etc. 19 | } 20 | \examples{ 21 | ptObsData <- getObservations("1,2", declare=TRUE) 22 | } 23 | -------------------------------------------------------------------------------- /man/getProcedures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getClinicalData.R 3 | \name{getProcedures} 4 | \alias{getProcedures} 5 | \title{Retrieves all patient clinical data from Procedures table} 6 | \usage{ 7 | getProcedures(patient_list, declare = FALSE) 8 | } 9 | \arguments{ 10 | \item{patient_list}{Comma-separated string of patient ids} 11 | 12 | \item{declare}{TRUE/FALSE will output status and data information during the process} 13 | } 14 | \value{ 15 | a table of relevant clinical data contained with in the 'procedure_occurrence' table 16 | } 17 | \description{ 18 | Produces a table for relevant concepts contained in the 'procedure_occurrence' table mapped through the data ontology for a patient list. Data retrieved include: procedure_type, etc. 19 | } 20 | \examples{ 21 | ptProcData <- getProcedures("1,2", declare=TRUE) 22 | } 23 | -------------------------------------------------------------------------------- /man/makeDataOntology.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/makeDataOntology.R 3 | \name{makeDataOntology} 4 | \alias{makeDataOntology} 5 | \title{Creates general data ontology} 6 | \usage{ 7 | makeDataOntology(declare = FALSE, store_ontology = FALSE) 8 | } 9 | \arguments{ 10 | \item{declare}{TRUE/FALSE will output status and data information during the process} 11 | 12 | \item{store_ontology}{TRUE/FALSE If TRUE: will attempt to load .rds file from the current outDirectory; will create and save it if it does not exist. If FALSE: will build table.} 13 | } 14 | \value{ 15 | Returns a ontology table dictionary of concepts contained in the 'concept' table. 16 | } 17 | \description{ 18 | Creates general data ontology used by all data tables from the concept table. Option to save/load as .rds file. 19 | } 20 | \examples{ 21 | \dontrun{ 22 | dataOntology <- makeDataOntology(declare=FALSE,store_ontology=TRUE) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /man/showDataTypes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/showDataTypes.R 3 | \name{showDataTypes} 4 | \alias{showDataTypes} 5 | \title{Shows available data types from the OMOP ontology} 6 | \usage{ 7 | showDataTypes() 8 | } 9 | \value{ 10 | Returns a table of vocabularies contained within clinical domains: Condition, Observation, Measurement, Device, Procedure, Drug. 11 | } 12 | \description{ 13 | Details relevant vocabularies per ontological domain. Requires dataOntology to have been created (makeDataOntology funciton). 14 | } 15 | \examples{ 16 | showDataTypes() 17 | 18 | } 19 | -------------------------------------------------------------------------------- /man/summarizeDemographics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarizeDemographics.R 3 | \name{summarizeDemographics} 4 | \alias{summarizeDemographics} 5 | \title{Summarizes patient demographic data} 6 | \usage{ 7 | summarizeDemographics(ptDemo) 8 | } 9 | \arguments{ 10 | \item{ptDemo}{patient demographics table: ptDemo is the patient demographics object from the getDemographics function output.} 11 | } 12 | \value{ 13 | none (called for side effect: prints table) 14 | } 15 | \description{ 16 | Summarizes patient demographic data from the getDemographics function. 17 | } 18 | \examples{ 19 | \dontrun{ 20 | summarizeDemographics(ptDemo) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /www/figure1a_v3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenGlicksberg/ROMOP/dc37ea9adeaea7a3ba330e9b909f70d198673a5f/www/figure1a_v3.png -------------------------------------------------------------------------------- /www/figure1b_v3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenGlicksberg/ROMOP/dc37ea9adeaea7a3ba330e9b909f70d198673a5f/www/figure1b_v3.png --------------------------------------------------------------------------------