├── .Rbuildignore
├── .Renviron
├── .gitignore
├── DESCRIPTION
├── LICENSE.txt
├── NAMESPACE
├── R
    ├── changeOutDirectory.R
    ├── exploreConcepts.R
    ├── findPatients.R
    ├── getClinicalData.R
    ├── getDemographics.R
    ├── getEncounters.R
    ├── makeDataOntology.R
    ├── showDataTypes.R
    ├── summarizeDemographics.R
    ├── utils.R
    └── zzz.R
├── README.md
├── ROMOP.Rproj
├── man
    ├── changeOutDirectory.Rd
    ├── exploreConcepts.Rd
    ├── findPatients.Rd
    ├── getClinicalData.Rd
    ├── getConditions.Rd
    ├── getDemographics.Rd
    ├── getDevices.Rd
    ├── getEncounters.Rd
    ├── getMeasurements.Rd
    ├── getMedications.Rd
    ├── getObservations.Rd
    ├── getProcedures.Rd
    ├── makeDataOntology.Rd
    ├── showDataTypes.Rd
    └── summarizeDemographics.Rd
└── www
    ├── figure1a_v3.png
    └── figure1b_v3.png


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.Renviron:
--------------------------------------------------------------------------------
1 | driver = ""
2 | host = ""
3 | username = ""
4 | password = ""
5 | dbname = ""
6 | port = "3306"
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .DS_Store
6 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: ROMOP
 2 | Type: Package
 3 | Title: A Light-Weight R Package for Interfacing with OMOP-Formatted Electronic Health Record Data
 4 | Version: 0.3.0
 5 | Author: Benjamin S. Glicksberg
 6 | Maintainer: Benjamin S. Glicksberg <benjamin.glicksberg@ucsf.edu>
 7 | Description: ROMOP streamlines typical EHR-related data processes for Observational Medical Outcomes Partnership (OMOP)-formatted data. Its functions include exploration of data types, extraction and summarization of patient clinical and demographic data, and patient searches using any Common Data Model (CDM) vocabulary concept.
 8 | License: MIT License
 9 | Encoding: UTF-8
10 | LazyData: true
11 | Imports: dplyr, data.table, DBI, RMySQL, DatabaseConnector, DatabaseConnectorJars, SqlRender
12 | Depends: R (>= 3.4)
13 | RoxygenNote: 6.0.1
14 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 | 
3 | Copyright (c) 2018 Benjamin S. Glicksberg
4 | 
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 | 
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 | 
9 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(changeOutDirectory)
 4 | export(exploreConcepts)
 5 | export(findPatients)
 6 | export(getClinicalData)
 7 | export(getConditions)
 8 | export(getDemographics)
 9 | export(getDevices)
10 | export(getEncounters)
11 | export(getMeasurements)
12 | export(getMedications)
13 | export(getObservations)
14 | export(getProcedures)
15 | export(makeDataOntology)
16 | export(showDataTypes)
17 | export(summarizeDemographics)
18 | import(DBI)
19 | import(data.table)
20 | import(dplyr)
21 | 


--------------------------------------------------------------------------------
/R/changeOutDirectory.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Change outDirectory
 3 | #'
 4 | #' Sets the current outDirectory which will store the Data Ontology and all function output. Option to create directory if does not exist.
 5 | #'
 6 | #' @param outdir directory path
 7 | #' @param create TRUE/FALSE (will create the directory if it does not exist)
 8 | #'
 9 | #' @return none (called for side effect: sets outDirectory)
10 | #' @export
11 | #'
12 | #' @examples
13 | #' changeOutDirectory(outdir=“~/”, create=FALSE)
14 | changeOutDirectory <- function(outdir, create = FALSE) {
15 | 
16 |   if (dir.exists(outdir)) {
17 |     message(paste0(outdir, " set as OutDirectory. "))
18 |     if (endsWith(outdir, "/")) {
19 |     options("outDirectory" = outdir)
20 |     } else {
21 |       options("outDirectory" = paste0(outdir,"/"))
22 |     }
23 |   } else {
24 |     if (create == TRUE) {
25 |       dir.create(outdir)
26 |       if (endsWith(outdir, "/")) {
27 |       options("outDirectory" = outdir)
28 |       } else {
29 |         options("outDirectory" = paste0(outdir,"/"))
30 |       }
31 |       message(paste0(outdir, " does not exist. Created and set to OutDirectory. "))
32 |     } else {
33 |       message(paste0(outdir, " does not exist. Please set 'create = TRUE' if you wish to create it or choose an already existing directory. OutDirectory not set. "))
34 |     }
35 |   }
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/R/exploreConcepts.R:
--------------------------------------------------------------------------------
 1 | #' Extract synonyms and descendants for concepts of interest.
 2 | #'
 3 | #' For given vocabulary and concept, returns the mapped standard concept(s) as well as decendent concept(s). Requires dataOntology to have been created (makeDataOntology funciton).
 4 | #'
 5 | #' @param vocabulary Comma-separated string of relevant vocabularies for inclusion criteria
 6 | #' @param codes Semi-colon separated string of code concepts for inclusion criteria, corresponding to the order for vocabulary. Multiple codes can be used per vocabulary and should be comma-separated.
 7 | #'
 8 | #' @return Returns a table of concepts contained under (i.e., below in the heirarchy) the query concept.
 9 | #' @export
10 | #'
11 | #' @examples
12 | #' conceptsInfo <- exploreConcepts(vocabulary = “ATC, ICD10CM”, codes = “A01A; K50, K51”)
13 | exploreConcepts <- function(vocabulary, codes) {
14 | 
15 |   if (exists("dataOntology")) { # ensure dataOntology exists
16 | 
17 |     criteriaMapped <- unpackAndMap(vocabulary,codes)
18 | 
19 |     if (nrow(criteriaMapped)>0) {
20 |       codesFormatted <- paste0(criteriaMapped$concept_id,collapse=",")
21 |       synonymDataFiltered <- identifySynonyms(codesFormatted)
22 |       synonymCodes <- paste(c(codesFormatted, unique(synonymDataFiltered$concept_id_2)),collapse=",")
23 |       mappingDataInfo <- identifyMappings(synonymCodes)
24 | 
25 |       return(mappingDataInfo)
26 | 
27 |     } else {
28 |       message("Error: none of the inclusion criteria were able to map to the ontology. Please check terms and try again.")
29 |     }
30 | 
31 |   } else { #endif dataOntology exists
32 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/R/findPatients.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Find patients based on clinical critera
  4 | #'
  5 | #' Identify patients based on clinical data inclusion (and exclusion, if desired) criteria. Flexible to allow for multiple data types, vocabularies, and concepts.
  6 | #' @param strategy_in "mapped" or "direct" (dictates the strategy for how inclusion criteria are treated. "direct" searches for codes as provided, "mapped" maps criteria to standard concepts and finds descendants.
  7 | #' @param vocabulary_in vocabularies for inclusion criteria (comma-separated string of vocabularies)
  8 | #' @param codes_in specific concept codes for inclusion criteria (semi-colon separated string of code concepts, corresponding to the order for vocabulary_in. Multiple codes can be used per vocabulary and should be comma-separated.)
  9 | #' @param function_in  "and" or "or" (dictates how multiple inclusion should be treated. "and" necessitates that all inclusion criteria are met (i.e., intersection), while "or" allows for any critera to be met (i.e., union) )
 10 | #' @param strategy_out "mapped", "direct", or NULL (default) (dictates the strategy for how exclusion are treated. NULL indicates no exclusion criteria.)
 11 | #' @param vocabulary_out vocabularies for exclusion criteria or NULL (default) (comma-separated string of relevant vocabularies for exclusion criteria. NULL indicates no exclusion criteria)
 12 | #' @param codes_out specific concept codes for exclusion criteria or NULL (default) (semi-colon separated string of code concepts for inclusion criteria, corresponding to the order for vocabulary_out. Multiple codes can be used per vocabulary and should be comma-separated. NULL indicates no exclusion criteria.)
 13 | #' @param function_out "and", "or", or NULL (default) (dictates how multiple exclusion should be treated. and necessitates that all exclusion criteria are met (i.e., intersection), while or allows for any critera to be met (i.e., union). NULL indicates no exclusion criteria. )
 14 | #' @param declare TRUE/FALSE will output status and data information during the process
 15 | #' @param save TRUE/FALSE whether intermediate components of the search should be saved (e.g., mapped concepts found with unique patient counts per concept).
 16 | #' @param out_name name assigned to search query or NULL (if save = TRUE, saves query using provided name. If the provided name already exists as a directory (or is NULL), the directory defaults to datetime name)
 17 | #'
 18 | #' @return List of patients that meet inclusion criteria (and not exclusion criteria if entered).
 19 | #' @import data.table DBI
 20 | #' @export
 21 | #'
 22 | #' @examples
 23 | #' patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ICD10CM", codes_in = "F41", strategy_out="mapped", vocabulary_out = "MeSH", codes_out = "D002998", function_out = "and")
 24 | findPatients <- function(strategy_in="mapped", vocabulary_in, codes_in, function_in = "or", strategy_out = NULL, vocabulary_out = NULL, codes_out = NULL, function_out = NULL, declare=FALSE, save=FALSE, out_name=NULL) {
 25 | 
 26 |   if (exists("dataOntology")) { # ensure dataOntology exists
 27 | 
 28 |     ## strategy:
 29 |     #### mapped- map to common ontology, find descendants, and search | RECOMMENDED
 30 |     #### direct- search directly for included codes only
 31 |     strategy_in <- tolower(strategy_in) # force lowercase
 32 |     if (!is.null(strategy_out)) {strategy_out = tolower(strategy_out)}
 33 | 
 34 |     ## function:
 35 |     #### and- criteria require INTERSECTION (i.e. criteria 1 AND criteria 2 AND ...)
 36 |     #### or- criteria require UNION (i.e. criteria 1 OR criteria 2 OR ...)
 37 |     function_in = tolower(function_in) # force lowercase
 38 |     if (!is.null(function_out)) {function_out = tolower(function_out)}
 39 | 
 40 |     # check parameters
 41 |     pass_requirements <- checkParameters(strategy_in, function_in, strategy_out, function_out)
 42 | 
 43 |     # require correct parameters
 44 |     if (pass_requirements == TRUE) {
 45 | 
 46 |       if (save==TRUE) {
 47 | 
 48 |         if (is.null(out_name)) {
 49 |           outdir <- paste0(getOption("outDirectory"),gsub(" ", "_",Sys.time()))
 50 |           dir.create(outdir)
 51 |         } else {
 52 |           # check to see if directory already exists
 53 |           outdir <- paste0(getOption("outDirectory"),out_name)
 54 |           if (!dir.exists(outdir)) {
 55 |             dir.create(paste0(getOption("outDirectory"),out_name))
 56 |           } else {
 57 |             outdir <- paste0(getOption("outDirectory"),gsub(" ", "_",Sys.time()))
 58 |             dir.create(outdir)
 59 |             message(paste0(getOption("outDirectory"), out_name, " directory already exists. Saving results to: ", outdir))
 60 |           }
 61 |         }
 62 | 
 63 |         fout <- paste0(outdir,"/query.txt")
 64 | 
 65 |         sink(fout)
 66 |         cat(paste0("inclusion strategy: ", strategy_in ,"\n"))
 67 |         cat(paste0("inclusion vocabularies: ", vocabulary_in ,"\n"))
 68 |         cat(paste0("inclusion codes: ", codes_in ,"\n"))
 69 |         cat(paste0("inclusion function: ", function_in ,"\n"))
 70 |         cat(paste0("exclusion strategy: ", strategy_out ,"\n"))
 71 |         cat(paste0("exclusion vocabularies: ", vocabulary_out ,"\n"))
 72 |         cat(paste0("exclusion codes: ", codes_out ,"\n"))
 73 |         cat(paste0("exclusion function: ", function_out ,"\n"))
 74 |         sink()
 75 | 
 76 |       }
 77 | 
 78 | 
 79 |       ## vocabulary_in: vocabularies that will be used for INCLUSION criteria
 80 |       #### format = "VOCABULARY1, VOCABULARY2, VOCABULARY3"
 81 |       ###### vocabulary_in = "ATC, ICD10CM, SNOMED"
 82 | 
 83 |       ## codes_in: codes corresponding to order of vocabularies_in that will be used for INCLUSION criteria
 84 |       #### format = "c1v1;c2v1, c1v2;c2v2;c3v2, c1v3"
 85 |       ###### codes_in = "L01XC19;J01X, Y07.9;O33.7, 25343008"
 86 | 
 87 |       ## vocabulary_out: vocabularies that will be used for EXCLUSION criteria
 88 |       #### same format as above
 89 | 
 90 |       ## codes_out: codes corresponding to order of vocabularies_out that will be used for EXCLUSION criteria
 91 |       #### same format as above
 92 | 
 93 |       # 1- INCLUSION
 94 | 
 95 |       #### unpack vocabulary_in and codes_in
 96 |       inclusionCriteriaMapped <- unpackAndMap(vocabulary_in,codes_in)
 97 | 
 98 |       ### check to see if any codes matched
 99 |       if (nrow(inclusionCriteriaMapped)>0) {
100 | 
101 |         includeCodesFormatted <- paste0(inclusionCriteriaMapped$concept_id,collapse=",")
102 | 
103 |         if (declare == TRUE) {
104 |           message("The following INCLUSION criteria are being used: \n")
105 |           print(inclusionCriteriaMapped)
106 |         }
107 | 
108 |         if (save == TRUE) {
109 |           fout = paste0(outdir,"/inclusion_criteria_mapped.txt")
110 |           write.table(inclusionCriteriaMapped, file = fout, sep='\t', row.names=F, quote=F)
111 |         }
112 | 
113 | 
114 |         # MAPPING
115 |         if (strategy_in == "direct") {
116 | 
117 |           useSource <- "_source" # search _source_concept_id
118 |           includeSearchTable <- identifyTablesDirect(inclusionCriteriaMapped)
119 | 
120 |         } else if (strategy_in == "mapped") {
121 |           ## RECOMMENDED, but can lead to:
122 |           #### cross-mapping (i.e. from ICD code (e.g. diabetes) to procedure/measurement)
123 |           #### extra mapping (i.e. ICD 10 code --> ICD9 & 10 results)
124 | 
125 |           useSource <- "" # search _concept_id
126 | 
127 |           # get common ontology synonyms
128 |           includeSynonymDataFiltered <- identifySynonyms(includeCodesFormatted)
129 |           includeSynonymData <- merge(includeSynonymDataFiltered[,"concept_id_2"], dataOntology[,c("concept_id","domain_id","vocabulary_id")], by.x="concept_id_2",by.y = "concept_id")
130 |           colnames(includeSynonymData) <- c("concept_id","domain_id","vocabulary_id")
131 |           includeSynonymCodes <- paste(union(inclusionCriteriaMapped$concept_id, includeSynonymDataFiltered$concept_id_2),collapse = ",") ## adds original codes into ancestor query (b/c of scenarios with ATC))
132 | 
133 |           # get descendents
134 |           includeMappingDataInfo <- identifyMappings(includeSynonymCodes)
135 |           includeMappingData <- includeMappingDataInfo[,c("descendant_concept_id","domain_id","vocabulary_id")]
136 |           colnames(includeMappingData) <- c("concept_id","domain_id","vocabulary_id")
137 | 
138 |           includeCombined <- rbind(inclusionCriteriaMapped[,c("concept_id","domain_id","vocabulary_id")],includeSynonymData)
139 |           includeCombined <- rbind(includeCombined, includeMappingData)
140 |           includeCombined <- includeCombined[!duplicated(includeCombined),]
141 | 
142 |           if (declare == TRUE) {
143 |             message("The following INCLUSION mapped concepts are being queried (along with mapped input and synonyms): \n")
144 |             print(includeMappingDataInfo)
145 |           }
146 |           # save mapped concepts after patient count per concept added
147 | 
148 |           # get tables to search for mapped concepts
149 |           includeSearchTable <- identifyTablesMapped(includeCombined)
150 | 
151 | 
152 |         } #endif strategy_in == mapped
153 | 
154 |         # 2- SEARCH INCLUSION
155 | 
156 |         # if standard concepts are found for criteria after synonym and descendant search
157 |         if (all(array(sapply(includeSearchTable, length)))==0) {
158 |           if (declare == TRUE){
159 |             message("Warning: no concepts could be mapped to INCLUSION criteria standard concepts for the domain of interest and, as such, no patients will be identified. Please refer to README for more details.")
160 |           }
161 |         }
162 | 
163 |         # if any condition table codes
164 |         if (length(includeSearchTable$Condition)>0) {
165 |           if (declare==TRUE) {message("querying Conditions...")}
166 |           condition_codes <- paste(includeSearchTable$Condition,collapse=",")
167 |           pts_condition_include <- searchCondition(useSource,condition_codes)
168 |         } else {
169 |           pts_condition_include <- NULL
170 |         }
171 | 
172 |         # if any observation table codes
173 |         if (length(includeSearchTable$Observation)>0) {
174 |           if (declare==TRUE){message("querying Observations") }
175 |           observation_codes <- paste(includeSearchTable$Observation,collapse=",")
176 |           pts_observation_include <- searchObservation(useSource,observation_codes)
177 |         } else {
178 |           pts_observation_include <- NULL
179 |         }
180 | 
181 |         # if any measurement table codes
182 |         if (length(includeSearchTable$Measurement)>0) {
183 |           if (declare==TRUE) {message("querying Measurements")}
184 |           measurement_codes <- paste(includeSearchTable$Measurement,collapse=",")
185 |           pts_measurement_include <- searchMeasurement(useSource,measurement_codes)
186 |         } else {
187 |           pts_measurement_include <- NULL
188 |         }
189 | 
190 |         # if any drug table codes
191 |         if (length(includeSearchTable$Drug)>0) {
192 |           if (declare==TRUE) {message("querying Drugs")}
193 |           drug_codes <- paste(includeSearchTable$Drug,collapse=",")
194 |           pts_drug_include <- searchDrug(useSource,drug_codes)
195 |         } else {
196 |           pts_drug_include <- NULL
197 |         }
198 | 
199 |         # if any device table codes
200 |         if (length(includeSearchTable$Device)>0) {
201 |           if (declare==TRUE) {message("querying Devices")}
202 |           device_codes <- paste(includeSearchTable$Drug,collapse=",")
203 |           pts_device_include <- searchDevice(useSource,device_codes)
204 |         } else {
205 |           pts_device_include <- NULL
206 |         }
207 | 
208 |         # if any procedure table codes
209 |         if (length(includeSearchTable$Procedure)>0) {
210 |           if (declare==TRUE) {message("querying Procedures")}
211 |           procedure_codes <- paste(includeSearchTable$Procedure,collapse=",")
212 |           pts_procedure_include <- searchProcedure(useSource,procedure_codes)
213 |         }else{
214 |           pts_procedure_include <- NULL
215 |         }
216 | 
217 |         # save mapped concepts with patient counts
218 |         if (save == TRUE) {
219 |           if (!(all(array(sapply(includeSearchTable, length)))==0)) {
220 |           fout = paste0(outdir,"/inclusion_criteria_mapped_concepts.txt")
221 |           includeDataInfowPatients <- summarizeFoundConcepts(pts_condition_include, pts_observation_include, pts_measurement_include, pts_device_include, pts_drug_include, pts_procedure_include)
222 |           # merge pt counts with all concepts
223 |           includeMappingCombined <- merge(includeMappingDataInfo, includeDataInfowPatients, by.x = "descendant_concept_id", by.y = "concept_id", all.x = TRUE)
224 |           write.table(includeMappingCombined, file = fout, sep='\t', row.names=F, quote=F)
225 |           }
226 |         }
227 | 
228 | 
229 |         # 3- EXCLUSION
230 |         # vocabulary_out = "ICD9CM"
231 |         # codes_out = "250.00"
232 | 
233 |         if (!is.null(vocabulary_out) & !is.null(codes_out) & !is.null(strategy_out) & !is.null(function_out)) { # if any exclusion criteria
234 | 
235 |           #### unpack vocabulary_in and codes_in
236 |           exclusionCriteriaMapped <- unpackAndMap(vocabulary_out,codes_out)
237 | 
238 |           if (nrow(exclusionCriteriaMapped)>0) {
239 | 
240 |             excludeCodesFormatted <- paste0(exclusionCriteriaMapped$concept_id,collapse=",")
241 | 
242 |             if (declare == TRUE) {
243 |               message("The following EXCLUSION criteria are being used: \n")
244 |               print(exclusionCriteriaMapped)
245 |             }
246 | 
247 |             if (save == TRUE) {
248 |               fout <- paste0(outdir,"/exclusion_criteria_mapped.txt")
249 |               write.table(exclusionCriteriaMapped, file = fout, sep='\t', row.names=F, quote=F)
250 |             }
251 | 
252 | 
253 | 
254 |             if (strategy_out == "direct") {
255 | 
256 |               useSource <- "_source" # search _source_concept_id
257 |               excludeSearchTable <- identifyTablesDirect(exclusionCriteriaMapped)
258 | 
259 |             } else if (strategy_out == "mapped") {
260 | 
261 |               # get common ontology synonyms
262 |               excludeSynonymDataFiltered <- identifySynonyms(excludeCodesFormatted)
263 |               excludeSynonymData <- merge(excludeSynonymDataFiltered[,"concept_id_2"], dataOntology[,c("concept_id","domain_id","vocabulary_id")], by.x="concept_id_2",by.y = "concept_id")
264 |               colnames(excludeSynonymData) <- c("concept_id","domain_id","vocabulary_id")
265 |               excludeSynonymCodes <- paste(union(exclusionCriteriaMapped$concept_id, excludeSynonymDataFiltered$concept_id_2),collapse = ",") ## adds original codes into ancestor query (b/c of scenarios with ATC))
266 | 
267 | 
268 |               # get descendents
269 |               excludeMappingDataInfo <- identifyMappings(excludeSynonymCodes)
270 |               excludeMappingData <- excludeMappingDataInfo[,c("descendant_concept_id","domain_id","vocabulary_id")]
271 |               colnames(excludeMappingData) <- c("concept_id","domain_id","vocabulary_id")
272 | 
273 | 
274 |               excludeCombined <- rbind(exclusionCriteriaMapped[,c("concept_id","domain_id","vocabulary_id")],excludeSynonymData)
275 |               excludeCombined <- rbind(excludeCombined, excludeMappingData)
276 |               excludeCombined <- excludeCombined[!duplicated(excludeCombined),]
277 | 
278 |               if (declare == TRUE) {
279 |                 message("The following EXCLUSION mapped concepts are being queried (along with mapped input and synonyms): \n")
280 |                 print(excludeMappingDataInfo)
281 |               }
282 | 
283 |               # save mapped concepts once patient counts are added
284 | 
285 |               # get tables to search for mapped concepts
286 |               excludeSearchTable <- identifyTablesMapped(excludeCombined)
287 | 
288 |             }
289 | 
290 |             # 4- SEARCH EXCLUSION
291 | 
292 |             # if standard concepts are found for criteria after synonym and descendant search
293 |             if (all(array(sapply(excludeSearchTable, length)))==0) {
294 |               if (declare == TRUE){
295 |                 message("Warning: no concepts could be mapped to EXCLUSION criteria standard concepts for the domain of interest and, as such, no patients will be identified. Please refer to README for more details.")
296 |               }
297 |             }
298 | 
299 | 
300 |             # if any condition table codes
301 |             if (length(excludeSearchTable$Condition)>0) {
302 |               if (declare==TRUE) {message("querying Conditions...")}
303 |               condition_codes <- paste(excludeSearchTable$Condition,collapse=",")
304 |               pts_condition_exclude <- searchCondition(useSource,condition_codes)
305 |             } else {
306 |               pts_condition_exclude <- NULL
307 |             }
308 | 
309 |             # if any observation table codes
310 |             if (length(excludeSearchTable$Observation)>0) {
311 |               if (declare==TRUE) {message("querying Observations")}
312 |               observation_codes <- paste(excludeSearchTable$Observation,collapse=",")
313 |               pts_observation_exclude <- searchObservation(useSource,condition_codes)
314 |             } else {
315 |               pts_observation_exclude <- NULL
316 |             }
317 | 
318 |             # if any measurement table codes
319 |             if (length(excludeSearchTable$Measurement)>0) {
320 |               if (declare==TRUE) {message("querying Measurements")}
321 |               measurement_codes <- paste(excludeSearchTable$Measurement,collapse=",")
322 |               pts_measurement_exclude <- searchMeasurement(useSource,measurement_codes)
323 |             } else {
324 |               pts_measurement_exclude <- NULL
325 |             }
326 | 
327 | 
328 |             # if any drug table codes
329 |             if (length(excludeSearchTable$Drug)>0) {
330 |               if (declare==TRUE) {message("querying Drugs")}
331 |               drug_codes <- paste(excludeSearchTable$Drug,collapse=",")
332 |               pts_drug_exclude <- searchDrug(useSource,drug_codes)
333 |             } else {
334 |               pts_drug_exclude <- NULL
335 |             }
336 | 
337 |             # if any device table codes
338 |             if (length(excludeSearchTable$Device)>0) {
339 |               if (declare==TRUE) {message("querying Devices")}
340 |               device_codes <- paste(excludeSearchTable$Device,collapse=",")
341 |               pts_device_exclude <- searchDevice(useSource,device_codes)
342 |             } else {
343 |               pts_device_exclude <- NULL
344 |             }
345 | 
346 |             # if any procedure table codes
347 |             if (length(excludeSearchTable$Procedure)>0) {
348 |               if (declare==TRUE) {message("querying Procedures")}
349 |               procedure_codes <- paste(excludeSearchTable$Procedure,collapse=",")
350 |               pts_procedure_exclude <- searchProcedure(useSource,procedure_codes)
351 |             } else {
352 |               pts_procedure_exclude <- NULL
353 |             }
354 | 
355 |             # save mapped concepts with patient counts
356 |             if (save == TRUE) {
357 |               if (!(all(array(sapply(excludeSearchTable, length)))==0)) {
358 |                 fout <- paste0(outdir,"/exclusion_criteria_mapped_concepts.txt")
359 |                 excludeDataInfowPatients <- summarizeFoundConcepts(pts_condition_exclude, pts_observation_exclude, pts_measurement_exclude, pts_device_exclude, pts_drug_exclude, pts_procedure_exclude)
360 |                 # merge pt counts with all concepts
361 |                 excludeMappingCombined <- merge(excludeMappingDataInfo, excludeDataInfowPatients, by.x = "descendant_concept_id", by.y = "concept_id", all.x = TRUE)
362 |                 write.table(excludeMappingCombined, file = fout, sep='\t', row.names=F, quote=F)
363 |               }
364 |             }
365 | 
366 |           } else { #endif exclusion criteria match
367 |             message("Warning: exclusion criteria were not able to map to ontology. Therefore, query running for inclusion criteria only.")
368 |           }
369 |         } # endif exclusion
370 | 
371 |         # 5 - PROCESS INCLUSION(/EXCLUSION) depending on functions
372 | 
373 |         if (function_in=="or") {
374 |           include_patient_list <- identifyPatientsOR(pts_condition_include, pts_observation_include, pts_measurement_include, pts_device_include, pts_drug_include, pts_procedure_include)
375 |         } else if (function_in=="and") {
376 |           include_patient_list <- identifyPatientsAND(inclusionCriteriaMapped, includeSynonymDataFiltered, includeMappingDataInfo, pts_condition_include, pts_observation_include, pts_measurement_include, pts_device_include, pts_drug_include, pts_procedure_include)
377 |         }
378 | 
379 |         patient_list <- include_patient_list
380 | 
381 |         if (!is.null(vocabulary_out) & !is.null(codes_out) & !is.null(strategy_out) & !is.null(function_out)) { # if any exclusion criteria
382 | 
383 |           if (nrow(exclusionCriteriaMapped)>0) { #verify that exclusion criteria were found
384 | 
385 |             if (function_out=="or") {
386 |               exclude_patient_list <- identifyPatientsOR(pts_condition_exclude, pts_observation_exclude, pts_measurement_exclude, pts_device_exclude, pts_drug_exclude, pts_procedure_exclude)
387 |             } else if (function_out=="and") {
388 |               exclude_patient_list <- identifyPatientsAND(exclusionCriteriaMapped, excludeSynonymDataFiltered, excludeMappingDataInfo, pts_condition_exclude, pts_observation_exclude, pts_measurement_exclude, pts_device_exclude, pts_drug_exclude, pts_procedure_exclude)
389 |             }
390 | 
391 |             inclusion_exclusion_overlapping_patients <- intersect(patient_list,exclude_patient_list)
392 |             print(paste0(length(inclusion_exclusion_overlapping_patients), " overlapping patients excluded from the original inclusion input based on the exclusion criteria."))
393 | 
394 |             # remove overlapping patients
395 |             patient_list <- setdiff(patient_list,inclusion_exclusion_overlapping_patients)
396 | 
397 |           } # endif exclusion criteria found
398 | 
399 |         } # endif exclusion null
400 | 
401 | 
402 |         print(paste0(length(patient_list), " patients found that meet the inclusion criteria."))
403 | 
404 | 
405 |         if (save == TRUE) {
406 |           fout = paste0(outdir,"/outcome.txt")
407 | 
408 |           sink(fout)
409 | 
410 |           if (exists("inclusion_exclusion_overlapping_patients")) {
411 |             cat(paste0(length(include_patient_list), " patients found from the inclusion criteria ONLY.\n"))
412 |             cat(paste0(length(exclude_patient_list), " patients found from the exclusion criteria ONLY.\n"))
413 |             cat(paste0(length(inclusion_exclusion_overlapping_patients), " overlapping patients excluded from the original inclusion input based on the exclusion criteria.\n"))
414 |             cat(paste0(length(patient_list), " patients found that meet the inclusion and exclusion criteria.\n"))
415 |           } else {
416 |             cat(paste0(length(patient_list), " patients found that meet the inclusion criteria.\n"))
417 |           }
418 |           sink()
419 | 
420 |           fout <- paste0(outdir,"/patient_list.txt")
421 |           write.table(data.frame(patient_list),file=fout, sep='\t', row.names = F, quote=F)
422 |           message(paste0("Outcome from query saved in: ",outdir))
423 | 
424 |         }
425 | 
426 | 
427 |         return(patient_list)
428 | 
429 |       } else { # endif no inclusion found
430 |         message("Error: none of the inclusion criteria were able to map to the ontology. Please check terms and try again.")
431 |       }
432 | 
433 |     } else {  #endif pass_requirements
434 |       message("Error: invalid strategies and/or functions selected. Please use either 'direct' or 'mapped' for strategies. Please use either 'and' or 'or' for functions.")
435 |     }
436 | 
437 |   } else { #endif dataOntology exists
438 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
439 |   }
440 | 
441 | }
442 | 
443 | 
444 | 
445 | ################## specific table search functions
446 | 
447 | #' @import data.table DBI
448 | searchCondition <- function(useSource,codes) {
449 |   conditionQuery <- paste0('SELECT person_id, condition_concept_id FROM condition_occurrence WHERE condition',useSource,'_concept_id IN (',codes,') ')
450 |   dataCondition <- sqlQuery(conditionQuery)
451 |   dataCondition <- data.table(dataCondition)
452 |   dataCondition <- dataCondition[!duplicated(dataCondition)]
453 |   return(dataCondition)
454 | }
455 | 
456 | #' @import data.table DBI
457 | searchObservation <- function(useSource,codes) {
458 |   observationQuery <- paste0('SELECT person_id, observation_concept_id FROM observation WHERE observation',useSource,'_concept_id IN (',codes,') ')
459 |   dataObservation <- sqlQuery(observationQuery)
460 |   dataObservation <- data.table(dataObservation)
461 |   dataObservation <- dataObservation[!duplicated(dataObservation)]
462 |   return(dataObservation)
463 | }
464 | 
465 | #' @import data.table DBI
466 | searchMeasurement <- function(useSource,codes) {
467 |   measurementQuery <- paste0('SELECT person_id, measurement_concept_id FROM measurement WHERE measurement',useSource,'_concept_id IN (',codes,') ')
468 |   dataMeasurement <- sqlQuery(measurementQuery)
469 |   dataMeasurement <- data.table(dataMeasurement)
470 |   dataMeasurement <- dataMeasurement[!duplicated(dataMeasurement)]
471 |   return(dataMeasurement)
472 | }
473 | 
474 | #' @import data.table DBI
475 | searchDrug <- function(useSource,codes) {
476 |   drugQuery <- paste0('SELECT person_id, drug_concept_id FROM drug_exposure WHERE drug',useSource,'_concept_id IN (',codes,') ')
477 |   dataDrug <- sqlQuery(drugQuery)
478 |   dataDrug <- data.table(dataDrug)
479 |   dataDrug <- dataDrug[!duplicated(dataDrug)]
480 |   return(dataDrug)
481 | }
482 | 
483 | #' @import data.table DBI
484 | searchDevice <- function(useSource,codes) {
485 |   deviceQuery <- paste0('SELECT person_id, device_concept_id FROM device_exposure WHERE device',useSource,'_concept_id IN (',codes,') ')
486 |   dataDevice <- sqlQuery(deviceQuery)
487 |   dataDevice <- data.table(dataDevice)
488 |   dataDevice <- dataDevice[!duplicated(dataDevice)]
489 |   return(dataDevice)
490 | }
491 | 
492 | #' @import data.table DBI
493 | searchProcedure<- function(useSource,codes) {
494 |   procedureQuery <- paste0('SELECT person_id, procedure_concept_id FROM procedure_occurrence WHERE procedure',useSource,'_concept_id IN (',codes,') ')
495 |   dataProcedure <- sqlQuery(procedureQuery)
496 |   dataProcedure <- data.table(dataProcedure)
497 |   dataProcedure <- dataProcedure[!duplicated(dataProcedure)]
498 |   return(dataProcedure)
499 | }
500 | 
501 | 
502 | 


--------------------------------------------------------------------------------
/R/getClinicalData.R:
--------------------------------------------------------------------------------
  1 | #' Retrieves all patient clinical data
  2 | #'
  3 | #' Wrapper for domain-specific getData functions (e.g., getObservations). Produces a list of tables for all relevant domains.
  4 | #'
  5 | #' @param patient_list Comma-separated string of patient ids
  6 | #' @param declare TRUE/FALSE will output status and data information during the process
  7 | #'
  8 | #' @return a list of tables for each all data within each domain (e.g., Condition) for all patients provided (can access by ptClinicalData$Condition).
  9 | #' @import data.table DBI
 10 | #' @export
 11 | #'
 12 | #' @examples
 13 | #' ptClinicalData <- getClinicalData("1,2", declare=TRUE)
 14 | getClinicalData<- function(patient_list, declare=FALSE) {
 15 | 
 16 |   if (exists("dataOntology")) { # ensure dataOntology exists
 17 | 
 18 |     if (length(patient_list) > 1){
 19 |       patient_list <- paste(patient_list, collapse = ",")
 20 |     }
 21 | 
 22 |     ### retrieves data from each data function below
 23 |     ptObsData <- getObservations(patient_list,declare=declare)
 24 |     ptCondData <- getConditions(patient_list,declare=declare)
 25 |     ptProcData <- getProcedures(patient_list,declare=declare)
 26 |     ptsMedsData <- getMedications(patient_list,declare=declare)
 27 |     ptMeasData <- getMeasurements(patient_list,declare=declare)
 28 |     ptDeviceData <- getDevices(patient_list,declare=declare)
 29 | 
 30 |     ptClinicalData <- list(ptObsData,ptCondData,ptProcData,ptsMedsData,ptMeasData,ptDeviceData)
 31 |     names(ptClinicalData) <- c("Observation", "Condition", "Procedures", "Medications","Measurements","Devices")
 32 | 
 33 |     return(ptClinicalData)
 34 | 
 35 |   } else { #endif dataOntology exists
 36 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
 37 |   }
 38 | 
 39 | }
 40 | 
 41 | 
 42 | #################################  modality specific functions
 43 | 
 44 | #' Retrieves all patient clinical data from Observations table
 45 | #'
 46 | #' Produces a table for relevant concepts contained in the 'observation' table mapped through the data ontology for a patient list. Data retrieved include: observation_type, value, etc.
 47 | #'
 48 | #' @param patient_list Comma-separated string of patient ids
 49 | #' @param declare TRUE/FALSE will output status and data information during the process
 50 | #'
 51 | #' @return a table of relevant clinical data contained with in the 'observation' table
 52 | #' @import data.table DBI
 53 | #' @export
 54 | #'
 55 | #' @examples
 56 | #' ptObsData <- getObservations("1,2", declare=TRUE)
 57 | getObservations <- function(patient_list, declare=FALSE) {
 58 | 
 59 |   if (exists("dataOntology")) { # ensure dataOntology exists
 60 | 
 61 |     if (length(patient_list) > 1){
 62 |       patient_list <- paste(patient_list, collapse = ",")
 63 |     }
 64 | 
 65 |     ## observation
 66 |     # observation_concept_id is SNOMED
 67 |     queryStatement <- paste0('SELECT person_id, observation_concept_id, observation_source_concept_id, observation_datetime, observation_type_concept_id, value_as_number, value_as_string, value_as_concept_id, visit_occurrence_id, observation_source_value, unit_source_value FROM observation WHERE person_id IN (', patient_list,') ')
 68 | 
 69 |     if (declare==TRUE) {message("Loading Observations data......")}
 70 | 
 71 |     ptObsData <- sqlQuery(queryStatement)
 72 |     ptObsData <- data.table(ptObsData) # convert to data.table
 73 | 
 74 |     ### check for any data
 75 |     if (nrow(ptObsData)==0) {
 76 |       message("No observation data found for patient list")
 77 |     } else {
 78 |       if (declare==TRUE) {message("Observation data loaded; formatting...")}
 79 | 
 80 |       # obtain table specific ontology
 81 |       observationTableOntology <- dataOntology[domain_id=="Observation"]
 82 | 
 83 |       # format clinical data
 84 |       ptObsData <- merge(ptObsData, observationTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="observation_concept_id",by.y="concept_id",all.x=TRUE)
 85 |       names(ptObsData)[names(ptObsData) == 'concept_code'] <- 'observation_concept_code' # rename column
 86 |       names(ptObsData)[names(ptObsData) == 'concept_name'] <- 'observation_concept_name' # rename column
 87 |       names(ptObsData)[names(ptObsData) == 'vocabulary_id'] <- 'observation_concept_vocabulary' # rename column
 88 |       ptObsData <- ptObsData[,-"observation_concept_id"]
 89 | 
 90 |       ptObsData <- merge(ptObsData, observationTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="observation_source_concept_id",by.y="concept_id",all.x=TRUE)
 91 |       names(ptObsData)[names(ptObsData) == 'concept_code'] <- 'observation_source_code' # rename column
 92 |       names(ptObsData)[names(ptObsData) == 'concept_name'] <- 'observation_source_name' # rename column
 93 |       names(ptObsData)[names(ptObsData) == 'vocabulary_id'] <- 'observation_source_vocabulary' # rename column
 94 |       ptObsData <- ptObsData[,-"observation_source_concept_id"]
 95 | 
 96 |       # format metadata
 97 |       ptObsData <- merge(ptObsData,dataOntology[,c("concept_id","concept_name")],by.x="observation_type_concept_id",by.y="concept_id", all.x=TRUE)
 98 |       names(ptObsData)[names(ptObsData) == 'concept_name'] <- 'observation_type' # rename column
 99 |       ptObsData <- ptObsData[,-"observation_type_concept_id"]
100 | 
101 |       ptObsData=merge(ptObsData,dataOntology[,c("concept_id","concept_name")],by.x="value_as_concept_id",by.y="concept_id", all.x=TRUE)
102 |       names(ptObsData)[names(ptObsData) == 'concept_name'] <- 'value_concept' # rename column
103 |       ptObsData <- ptObsData[,-"value_as_concept_id"]
104 | 
105 |       if (declare==TRUE) {message("Observation data formatted successfully ")}
106 | 
107 |     }
108 | 
109 |     return(ptObsData)
110 | 
111 |   } else { #endif dataOntology exists
112 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
113 |   }
114 | 
115 | }
116 | 
117 | 
118 | 
119 | #' Retrieves all patient clinical data from Condition table
120 | #'
121 | #' Produces a table for relevant concepts contained in the 'condition_occurrence' table mapped through the data ontology for a patient list. Data retrieved include: condition_type, condition_status, etc.
122 | #'
123 | #' @param patient_list Comma-separated string of patient ids
124 | #' @param declare TRUE/FALSE will output status and data information during the process
125 | #'
126 | #' @return a table of relevant clinical data contained with in the 'condition_occurrence' table
127 | #' @import data.table DBI
128 | #' @export
129 | #'
130 | #' @examples
131 | #' ptCondData <- getConditions("1,2", declare=TRUE)
132 | getConditions <- function(patient_list, declare=FALSE) {
133 | 
134 |   if (exists("dataOntology")) { # ensure dataOntology exists
135 | 
136 |     if (length(patient_list) > 1){
137 |       patient_list <- paste(patient_list, collapse = ",")
138 |     }
139 | 
140 |     queryStatement <- paste0('SELECT person_id, condition_concept_id, condition_start_datetime, visit_occurrence_id, condition_type_concept_id, condition_source_value, condition_source_concept_id, condition_status_concept_id FROM condition_occurrence WHERE person_id IN (', patient_list,') ')
141 | 
142 |     if (declare==TRUE) {message("Loading Condition data...")}
143 | 
144 | 
145 |     ptCondData <- sqlQuery(queryStatement)
146 |     ptCondData <- data.table(ptCondData) # convert to data.table
147 | 
148 | 
149 |     ### check for any data
150 |     if (nrow(ptCondData)==0) {
151 |       message("No condition data found for patient list")
152 |     } else {
153 | 
154 |       if (declare==TRUE) {message("Condition data loaded; formatting...")}
155 | 
156 | 
157 |       # obtain table specific ontology
158 |       conditionTableOntology <- dataOntology[grep("Condition",domain_id)]
159 | 
160 |       # format clinical data
161 |       ptCondData <- merge(ptCondData, conditionTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="condition_concept_id",by.y="concept_id",all.x=TRUE)
162 |       names(ptCondData)[names(ptCondData) == 'concept_code'] <- 'condition_concept_code' # rename column
163 |       names(ptCondData)[names(ptCondData) == 'concept_name'] <- 'condition_concept_name' # rename column
164 |       names(ptCondData)[names(ptCondData) == 'vocabulary_id'] <- 'condition_concept_vocabulary' # rename column
165 |       ptCondData <- ptCondData[,-"condition_concept_id"]
166 | 
167 |       ptCondData <- merge(ptCondData, conditionTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="condition_source_concept_id",by.y="concept_id",all.x=TRUE)
168 |       names(ptCondData)[names(ptCondData) == 'concept_code'] <- 'condition_source_code' # rename column
169 |       names(ptCondData)[names(ptCondData) == 'concept_name'] <- 'condition_source_name' # rename column
170 |       names(ptCondData)[names(ptCondData) == 'vocabulary_id'] <- 'condition_source_vocabulary' # rename column
171 |       ptCondData <- ptCondData[,-"condition_source_concept_id"]
172 | 
173 |       # format metadatadata
174 |       ptCondData <- merge(ptCondData,dataOntology[,c("concept_id","concept_name")],by.x="condition_type_concept_id",by.y="concept_id", all.x=TRUE)
175 |       names(ptCondData)[names(ptCondData) == 'concept_name'] <- 'condition_type' # rename column
176 |       ptCondData <- ptCondData[,-"condition_type_concept_id"]
177 |       ptCondData <- merge(ptCondData,dataOntology[,c("concept_id","concept_name")],by.x="condition_status_concept_id",by.y="concept_id", all.x=TRUE)
178 |       names(ptCondData)[names(ptCondData) == 'concept_name'] <- 'condition_status_type' # rename column
179 |       ptCondData <- ptCondData[,-"condition_status_concept_id"]
180 | 
181 |       if (declare==TRUE) {message("Condition data formatted successfully. ")}
182 | 
183 |     }
184 | 
185 |     return(ptCondData)
186 | 
187 |   } else { #endif dataOntology exists
188 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
189 |   }
190 | 
191 | }
192 | 
193 | 
194 | 
195 | #' Retrieves all patient clinical data from Procedures table
196 | #'
197 | #' Produces a table for relevant concepts contained in the 'procedure_occurrence' table mapped through the data ontology for a patient list. Data retrieved include: procedure_type, etc.
198 | #'
199 | #' @param patient_list Comma-separated string of patient ids
200 | #' @param declare TRUE/FALSE will output status and data information during the process
201 | #'
202 | #' @return a table of relevant clinical data contained with in the 'procedure_occurrence' table
203 | #' @import data.table DBI
204 | #' @export
205 | #'
206 | #' @examples
207 | #' ptProcData <- getProcedures("1,2", declare=TRUE)
208 | getProcedures <- function(patient_list, declare=FALSE){
209 | 
210 |   if (exists("dataOntology")) { # ensure dataOntology exists
211 | 
212 |     if (length(patient_list) > 1){
213 |       patient_list <- paste(patient_list, collapse = ",")
214 |     }
215 | 
216 |     queryStatement <- paste0('SELECT person_id, procedure_concept_id, procedure_datetime, quantity, visit_occurrence_id, procedure_type_concept_id, procedure_source_value, procedure_source_concept_id  FROM procedure_occurrence WHERE person_id IN (', patient_list,') ')
217 | 
218 |     if (declare==TRUE) {message("Loading Procedures data...")}
219 | 
220 |     ptProcData <- sqlQuery(queryStatement)
221 |     ptProcData <- data.table(ptProcData) # convert to data.table
222 | 
223 |     ### check for any data
224 |     if (nrow(ptProcData)==0) {
225 |       message("No procedure data found for patient list")
226 |     } else {
227 |       if (declare==TRUE) {message("Procedure data loaded; formatting...")}
228 | 
229 |       # obtain table specific ontology
230 |       procedureTableOntology <- dataOntology[domain_id=="Procedure"]
231 | 
232 |       # format clinical data
233 |       ptProcData <- merge(ptProcData, procedureTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="procedure_concept_id",by.y="concept_id",all.x=TRUE)
234 |       names(ptProcData)[names(ptProcData) == 'concept_code'] <- 'procedure_concept_code' # rename column
235 |       names(ptProcData)[names(ptProcData) == 'concept_name'] <- 'procedure_concept_name' # rename column
236 |       names(ptProcData)[names(ptProcData) == 'vocabulary_id'] <- 'procedure_concept_vocabulary' # rename column
237 |       ptProcData <- ptProcData[,-"procedure_concept_id"]
238 | 
239 |       ptProcData <- merge(ptProcData, procedureTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="procedure_source_concept_id",by.y="concept_id",all.x=TRUE)
240 |       names(ptProcData)[names(ptProcData) == 'concept_code'] <- 'procedure_source_code' # rename column
241 |       names(ptProcData)[names(ptProcData) == 'concept_name'] <- 'procedure_source_name' # rename column
242 |       names(ptProcData)[names(ptProcData) == 'vocabulary_id'] <- 'procedure_source_vocabulary' # rename column
243 |       ptProcData <- ptProcData[,-"procedure_source_concept_id"]
244 | 
245 |       # format metadata
246 |       ptProcData <- merge(ptProcData,dataOntology[,c("concept_id","concept_name")],by.x="procedure_type_concept_id",by.y="concept_id", all.x=TRUE)
247 |       names(ptProcData)[names(ptProcData) == 'concept_name'] <- 'procedure_type' # rename column
248 |       ptProcData <- ptProcData[,-"procedure_type_concept_id"]
249 | 
250 |       if (declare==TRUE) {message("Procedure data formatted successfully.")}
251 | 
252 |     }
253 | 
254 |     return(ptProcData)
255 | 
256 |   } else { #endif dataOntology exists
257 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
258 |   }
259 | }
260 | 
261 | 
262 | 
263 | #' Retrieves all patient clinical data from Medications table
264 | #'
265 | #' Produces a table for relevant concepts contained in the 'drug_exposure' table mapped through the data ontology for a patient list. Data retrieved include: drug_type, route, etc.
266 | #'
267 | #' @param patient_list Comma-separated string of patient ids
268 | #' @param declare TRUE/FALSE will output status and data information during the process
269 | #'
270 | #' @return a table of relevant clinical data contained with in the 'drug_exposure' table
271 | #' @import data.table DBI
272 | #' @export
273 | #'
274 | #' @examples
275 | #' ptsMedsData <- getMedications("1,2", declare=TRUE)
276 | getMedications <- function(patient_list, declare=FALSE) {
277 | 
278 |   if (exists("dataOntology")) { # ensure dataOntology exists
279 | 
280 |     if (length(patient_list) > 1){
281 |       patient_list <- paste(patient_list, collapse = ",")
282 |     }
283 | 
284 |     queryStatement <- paste0('SELECT person_id, drug_concept_id, drug_exposure_start_datetime, drug_exposure_end_datetime, drug_type_concept_id, stop_reason, refills, quantity, days_supply, sig, route_concept_id, dose_unit_source_value, visit_occurrence_id, drug_source_value, drug_source_concept_id, route_source_value FROM drug_exposure WHERE person_id IN (', patient_list,') ')
285 | 
286 |     if (declare==TRUE) {message("Loading Medications data...")}
287 | 
288 | 
289 |     ptsMedsData <- sqlQuery(queryStatement)
290 |     ptsMedsData <- data.table(ptsMedsData) # convert to data.table
291 | 
292 | 
293 |     ### check for any data
294 |     if (nrow(ptsMedsData)==0) {
295 |       message("No medication data found for patient list")
296 |     } else {
297 |       if (declare==TRUE) {message("Medication data loaded; formatting...")}
298 | 
299 |       # obtain table specific ontology
300 |       medicationTableOntology <- dataOntology[domain_id=="Drug"]
301 | 
302 |       # format clinical data
303 |       ptsMedsData <- merge(ptsMedsData, medicationTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="drug_concept_id",by.y="concept_id",all.x=TRUE)
304 |       names(ptsMedsData)[names(ptsMedsData) == 'concept_code'] <- 'medication_concept_code' # rename column
305 |       names(ptsMedsData)[names(ptsMedsData) == 'concept_name'] <- 'medication_concept_name' # rename column
306 |       names(ptsMedsData)[names(ptsMedsData) == 'vocabulary_id'] <- 'medication_concept_vocabulary' # rename column
307 |       ptsMedsData <- ptsMedsData[,-"drug_concept_id"]
308 | 
309 |       ptsMedsData <- merge(ptsMedsData, medicationTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="drug_source_concept_id",by.y="concept_id",all.x=TRUE)
310 |       names(ptsMedsData)[names(ptsMedsData) == 'concept_code'] <- 'medication_source_code' # rename column
311 |       names(ptsMedsData)[names(ptsMedsData) == 'concept_name'] <- 'medication_source_name' # rename column
312 |       names(ptsMedsData)[names(ptsMedsData) == 'vocabulary_id'] <- 'medication_source_vocabulary' # rename column
313 |       ptsMedsData <- ptsMedsData[,-"drug_source_concept_id"]
314 | 
315 |       # format metadata
316 |       ptsMedsData <- merge(ptsMedsData,dataOntology[,c("concept_id","concept_name")],by.x="drug_type_concept_id",by.y="concept_id", all.x=TRUE)
317 |       names(ptsMedsData)[names(ptsMedsData) == 'concept_name'] <- 'drug_type' # rename column
318 |       ptsMedsData <- ptsMedsData[,-"drug_type_concept_id"]
319 |       ptsMedsData <- merge(ptsMedsData,dataOntology[,c("concept_id","concept_name")],by.x="route_concept_id",by.y="concept_id", all.x=TRUE)
320 |       names(ptsMedsData)[names(ptsMedsData) == 'concept_name'] <- 'route_concept' # rename column
321 |       ptsMedsData <- ptsMedsData[,-"route_concept_id"]
322 | 
323 |       if (declare==TRUE) {message("Medication data formatted successfully.")}
324 | 
325 |     }
326 | 
327 |     return(ptsMedsData)
328 | 
329 |   } else { #endif dataOntology exists
330 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
331 |   }
332 | }
333 | 
334 | 
335 | #' Retrieves all patient clinical data from Measurement table
336 | #'
337 | #' Produces a table for relevant concepts contained in the 'measurement' table mapped through the data ontology for a patient list. Data retrieved include: measurement_type, value, unit, etc.
338 | #'
339 | #' @param patient_list Comma-separated string of patient ids
340 | #' @param declare TRUE/FALSE will output status and data information during the process
341 | #'
342 | #' @return a table of relevant clinical data contained with in the 'measurement' table
343 | #' @import data.table DBI
344 | #' @export
345 | #'
346 | #' @examples
347 | #' ptMeasData <- getMeasurements("1,2", declare=TRUE)
348 | getMeasurements <- function(patient_list, declare=FALSE) {
349 | 
350 |   if (exists("dataOntology")) { # ensure dataOntology exists
351 | 
352 |     if (length(patient_list) > 1){
353 |       patient_list <- paste(patient_list, collapse = ",")
354 |     }
355 | 
356 |     queryStatement <- paste0('SELECT person_id, measurement_concept_id, measurement_datetime, measurement_type_concept_id, value_as_number, value_as_concept_id, unit_concept_id, visit_occurrence_id, measurement_source_value, measurement_source_concept_id FROM measurement WHERE person_id IN (', patient_list,') ');
357 | 
358 |     if (declare==TRUE) {message("Loading Measurements data...")}
359 | 
360 |     ptMeasData <- sqlQuery(queryStatement)
361 |     ptMeasData <- data.table(ptMeasData) # convert to data.table
362 | 
363 | 
364 |     ### check for any data
365 |     if (nrow(ptMeasData)==0) {
366 |       message("No measurement data found for patient list")
367 |     } else {
368 |       if (declare==TRUE) {message("Measurement data loaded; formatting...")}
369 | 
370 |       # obtain table specific ontology
371 |       measurementTableOntology <- dataOntology[domain_id=="Measurement"]
372 | 
373 | 
374 |       # format clinical data
375 |       ptMeasData <- merge(ptMeasData, measurementTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="measurement_concept_id",by.y="concept_id",all.x=TRUE)
376 |       names(ptMeasData)[names(ptMeasData) == 'concept_code'] <- 'measurement_concept_code' # rename column
377 |       names(ptMeasData)[names(ptMeasData) == 'concept_name'] <- 'measurement_concept_name' # rename column
378 |       names(ptMeasData)[names(ptMeasData) == 'vocabulary_id'] <- 'measurement_concept_vocabulary' # rename column
379 |       ptMeasData <- ptMeasData[,-"measurement_concept_id"]
380 | 
381 |       ptMeasData <- merge(ptMeasData, measurementTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="measurement_source_concept_id",by.y="concept_id",all.x=TRUE)
382 |       names(ptMeasData)[names(ptMeasData) == 'concept_code'] <- 'measurement_source_code' # rename column
383 |       names(ptMeasData)[names(ptMeasData) == 'concept_name'] <- 'measurement_source_name' # rename column
384 |       names(ptMeasData)[names(ptMeasData) == 'vocabulary_id'] <- 'measurement_source_vocabulary' # rename column
385 |       ptMeasData <- ptMeasData[,-"measurement_source_concept_id"]
386 | 
387 |       # format metadata
388 |       ptMeasData <- merge(ptMeasData,dataOntology[,c("concept_id","concept_name")],by.x="measurement_type_concept_id",by.y="concept_id", all.x=TRUE)
389 |       names(ptMeasData)[names(ptMeasData) == 'concept_name'] <- 'measurement_type' # rename column
390 |       ptMeasData <- ptMeasData[,-"measurement_type_concept_id"]
391 |       ptMeasData <- merge(ptMeasData,dataOntology[,c("concept_id","concept_name")],by.x="value_as_concept_id",by.y="concept_id", all.x=TRUE)
392 |       names(ptMeasData)[names(ptMeasData) == 'concept_name'] <- 'value_concept' # rename column
393 |       ptMeasData <- ptMeasData[,-"value_as_concept_id"]
394 |       ptMeasData <- merge(ptMeasData,dataOntology[,c("concept_id","concept_name")],by.x="unit_concept_id",by.y="concept_id", all.x=TRUE)
395 |       names(ptMeasData)[names(ptMeasData) == 'concept_name'] <- 'unit_concept' # rename column
396 |       ptMeasData <- ptMeasData[,-"unit_concept_id"]
397 | 
398 |       if (declare==TRUE) {message("Measurement data formatted successfully.")}
399 | 
400 |     }
401 | 
402 |     return(ptMeasData)
403 | 
404 |   } else { #endif dataOntology exists
405 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
406 |   }
407 | 
408 | }
409 | 
410 | 
411 | #' Retrieves all patient clinical data from Device table
412 | #'
413 | #' Produces a table for relevant concepts contained in the 'device_exposure' table mapped through the data ontology for a patient list. Data retrieved include: device_type, etc.
414 | #'
415 | #' @param patient_list Comma-separated string of patient ids
416 | #' @param declare TRUE/FALSE will output status and data information during the process
417 | #'
418 | #' @return a table of relevant clinical data contained with in the 'device_exposure' table
419 | #' @import data.table DBI
420 | #' @export
421 | #'
422 | #' @examples
423 | #' ptDeviceData <- getDevices("1,2", declare=TRUE)
424 | getDevices <- function(patient_list, declare=FALSE) {
425 | 
426 |   if (exists("dataOntology")) { # ensure dataOntology exists
427 | 
428 |     if (length(patient_list) > 1){
429 |       patient_list <- paste(patient_list, collapse = ",")
430 |     }
431 | 
432 |     queryStatement <- paste0('SELECT person_id, device_concept_id, device_exposure_start_datetime, device_exposure_end_datetime, device_type_concept_id, device_source_value, visit_occurrence_id, device_source_concept_id FROM device_exposure WHERE person_id IN (', patient_list,') ')
433 | 
434 |     if (declare==TRUE) {message("Loading Devices data...")}
435 |     ptDeviceData <- sqlQuery(queryStatement)
436 |     ptDeviceData <- data.table(ptDeviceData) # convert to data.table
437 | 
438 | 
439 |     ### check for any data
440 |     if (nrow(ptDeviceData)==0) {
441 |       message("No device data found for patient list")
442 |     } else {
443 |       if (declare==TRUE) {message("Device data loaded; formatting...")}
444 |       # obtain table specific ontology
445 |       deviceTableOntology = dataOntology[grep("Device",domain_id)]
446 | 
447 |       # format clinical data
448 |       ptDeviceData <- merge(ptDeviceData, deviceTableOntology[,c("concept_id","vocabulary_id","concept_code","concept_name")], by.x="device_concept_id",by.y="concept_id",all.x=TRUE)
449 |       names(ptDeviceData)[names(ptDeviceData) == 'concept_code'] <- 'device_concept_code' # rename column
450 |       names(ptDeviceData)[names(ptDeviceData) == 'concept_name'] <- 'device_concept_name' # rename column
451 |       names(ptDeviceData)[names(ptDeviceData) == 'vocabulary_id'] <- 'device_concept_vocabulary' # rename column
452 |       ptDeviceData <- ptDeviceData[,-"device_concept_id"]
453 | 
454 |       ptDeviceData <- merge(ptDeviceData, deviceTableOntology[,c("concept_id","vocabulary_id", "concept_code","concept_name")], by.x="device_source_concept_id",by.y="concept_id",all.x=TRUE)
455 |       names(ptDeviceData)[names(ptDeviceData) == 'concept_code'] <- 'device_source_code' # rename column
456 |       names(ptDeviceData)[names(ptDeviceData) == 'concept_name'] <- 'device_source_name' # rename column
457 |       names(ptDeviceData)[names(ptDeviceData) == 'vocabulary_id'] <- 'device_source_vocabulary' # rename column
458 |       ptDeviceData <- ptDeviceData[,-"device_source_concept_id"]
459 | 
460 |       # format metadata
461 |       ptDeviceData <- merge(ptDeviceData,dataOntology[,c("concept_id","concept_name")],by.x="device_type_concept_id",by.y="concept_id", all.x=TRUE)
462 |       names(ptDeviceData)[names(ptDeviceData) == 'concept_name'] <- 'device_type' # rename column
463 |       ptDeviceData <- ptDeviceData[,-"device_type_concept_id"]
464 | 
465 |       if (declare==TRUE) {message("Device data formatted successfully.")}
466 | 
467 |     }
468 | 
469 |     return(ptDeviceData)
470 | 
471 |   } else { #endif dataOntology exists
472 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
473 |   }
474 | }
475 | 


--------------------------------------------------------------------------------
/R/getDemographics.R:
--------------------------------------------------------------------------------
 1 | #' Retrieves patient demographic data
 2 | #'
 3 | #' Compiles demographic data for all patients or a given patient list if provided. Concepts are mapped through the created data ontology. Demographic data are retrieved from 'person' and 'death' tables and include: birthdate, deathdate, gender, ethnicity, and race.
 4 | #'
 5 | #' @param patient_list NULL or comma-separated string of patient ids. A provdied patient_list will restrict search to ids. NULL will return demographic data for all available patients.
 6 | #' @param declare TRUE/FALSE will output status and data information during the process
 7 | #'
 8 | #' @return table of mapped demographic concepts for all patients or specific ones for a provided csv formmated string of ids
 9 | #' @import data.table DBI
10 | #' @export
11 | #'
12 | #' @examples
13 | #' ptDemo <- getDemographics(patient_list=NULL,declare=TRUE)
14 | getDemographics <-function(patient_list=NULL, declare=FALSE) { # patient list will restrict search
15 | 
16 |   if (exists("dataOntology")) { # ensure dataOntology exists
17 | 
18 |     if (length(patient_list) > 1){
19 |       patient_list <- paste(patient_list, collapse = ",")
20 |     }
21 | 
22 |     queryStatement <- "SELECT person_id, birth_datetime, year_of_birth, gender_concept_id, ethnicity_concept_id, race_concept_id FROM person" # year_of_birth added in case birth_datetime IS NULL
23 |     deathqueryStatement <-"SELECT person_id, death_date FROM death"
24 | 
25 |     if (!is.null(patient_list)) { # if patient_list not null, append with WHERE statement
26 |       queryStatement <- paste0(queryStatement,paste0(' WHERE person_id IN (', patient_list,') '))
27 |       deathqueryStatement <- paste0(deathqueryStatement,paste0(' WHERE person_id IN (', patient_list,') '))
28 |     }
29 | 
30 |     # first get main patient data
31 |     ptDemo <- sqlQuery(queryStatement)
32 | 
33 |     if (nrow(ptDemo)==0) { # check if any pts found
34 |       if (declare==TRUE) {message("No patients found for current input")}
35 |     } else {
36 |       if (declare==TRUE) {message("Data loaded; formatting...")}
37 | 
38 |       ptDemo <- data.table(ptDemo) # convert to data.table
39 |       current_year <- as.numeric(format(Sys.Date(),"%Y")) # get current year to calculate age
40 |       ptDemo$age <- current_year - ptDemo$year_of_birth # calculate age
41 | 
42 |       # map concepts to reference table
43 |       ptDemo <- merge(ptDemo, dataOntology[domain_id=="Gender",c("concept_id","concept_name")], by.x ="gender_concept_id", by.y = "concept_id" ,all.x=T) # Gender
44 |       names(ptDemo)[names(ptDemo) == 'concept_name'] <- 'Gender' # rename column
45 |       ptDemo=markNAasUnknown(ptDemo,"Gender",declare)
46 | 
47 |       ptDemo <- merge(ptDemo, dataOntology[domain_id=="Race",c("concept_id","concept_name")], by.x ="race_concept_id", by.y = "concept_id" ,all.x=T) # Race
48 |       names(ptDemo)[names(ptDemo) == 'concept_name'] <- 'Race' # rename column
49 |       ptDemo=markNAasUnknown(ptDemo,"Race",declare)
50 | 
51 |       ptDemo <- merge(ptDemo, dataOntology[domain_id=="Ethnicity",c("concept_id","concept_name")], by.x ="ethnicity_concept_id", by.y = "concept_id" ,all.x=T) # Ethnicity
52 |       names(ptDemo)[names(ptDemo) == 'concept_name'] <- 'Ethnicity' # rename column
53 |       ptDemo <- markNAasUnknown(ptDemo,"Ethnicity",declare)
54 | 
55 |       ### clean up extra columns
56 |       ptDemo <- ptDemo[,-c("ethnicity_concept_id","race_concept_id","gender_concept_id")]
57 | 
58 |       # add in death date
59 |       ptDeath <- sqlQuery(deathqueryStatement)
60 |       ptDeath <- data.table(ptDeath) # convert to data.table
61 | 
62 |       # merge with patient data
63 |       ptDemo <- merge(ptDemo, ptDeath,by="person_id",all.x=T)
64 |       # mark Alive/Deceased
65 |       ptDemo$Status <- ifelse(is.na(ptDemo$death_date),"Alive","Deceased")
66 | 
67 |       return(ptDemo)
68 | 
69 |     }
70 | 
71 |   } else { #endif dataOntology exists
72 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
73 |   }
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/R/getEncounters.R:
--------------------------------------------------------------------------------
 1 | #' Retrieves patient clinical encounter data
 2 | #'
 3 | #' Compiles encounter data for a given patient list. Concepts are mapped through the created data ontology. Encounter data are retrieved from visit_occurence table and include: visit_type, encounter_type, etc.
 4 | #' @param patient_list Comma-separated string of patient ids
 5 | #' @param declare TRUE/FALSE will output status and data information during the process
 6 | #'
 7 | #' @return table of mapped encounter concepts for specific patients contained in a provided csv formmated string of ids.
 8 | #' @import data.table DBI
 9 | #' @export
10 | #'
11 | #' @examples
12 | #' ptEncs <- getEncounters("1,2,3,4",declare=TRUE)
13 | getEncounters <- function(patient_list, declare=FALSE) {
14 | 
15 |   if (exists("dataOntology")) { # ensure dataOntology exists
16 | 
17 |     if (length(patient_list) > 1){
18 |       patient_list <- paste(patient_list, collapse = ",")
19 |     }
20 | 
21 |     queryStatement <- paste0('SELECT person_id, visit_occurrence_id, visit_concept_id, visit_start_datetime, visit_end_datetime, visit_source_concept_id, visit_source_value, admitting_source_concept_id, discharge_to_concept_id FROM visit_occurrence WHERE person_id IN (', patient_list,') ')
22 | 
23 |     if (declare==TRUE) {message("Loading encounters data...")}
24 | 
25 |     # get visit data
26 |     ptEncs <- sqlQuery(queryStatement)
27 | 
28 |     if (nrow(ptEncs)==0) {
29 |       message("No encounter data found for current patient list")
30 |     } else {
31 |       if (declare==TRUE) {message("Encounters data loaded; formatting...")}
32 | 
33 |       ptEncs <- data.table(ptEncs) # convert to data.table
34 | 
35 |       # merge in relevant information concept ids
36 |       ptEncs <- merge(ptEncs,dataOntology[,c("concept_id","concept_name")], by.x="visit_concept_id", by.y="concept_id", all.x=TRUE)
37 |       names(ptEncs)[names(ptEncs) == 'concept_name'] <- 'visit_concept' # rename column
38 |       ptEncs <- ptEncs[,-"visit_concept_id"]
39 |       ptEncs <- merge(ptEncs,dataOntology[,c("concept_id","concept_name")], by.x="visit_source_concept_id", by.y="concept_id", all.x=TRUE)
40 |       names(ptEncs)[names(ptEncs) == 'concept_name'] <- 'visit_source_concept' # rename column
41 |       ptEncs <- ptEncs[,-"visit_source_concept_id"]
42 |       ptEncs <- merge(ptEncs,dataOntology[,c("concept_id","concept_name")], by.x="admitting_source_concept_id", by.y="concept_id", all.x=TRUE)
43 |       names(ptEncs)[names(ptEncs) == 'concept_name'] <- 'admitting_concept' # rename column
44 |       ptEncs <- ptEncs[,-"admitting_source_concept_id"]
45 |       ptEncs <- merge(ptEncs,dataOntology[,c("concept_id","concept_name")], by.x="discharge_to_concept_id", by.y="concept_id", all.x=TRUE)
46 |       names(ptEncs)[names(ptEncs) == 'concept_name'] <- 'discharge_concept' # rename column
47 |       ptEncs <- ptEncs[,-"discharge_to_concept_id"]
48 | 
49 |     }
50 | 
51 |     return(ptEncs)
52 | 
53 |   } else { #endif dataOntology exists
54 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
55 |   }
56 | 
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/R/makeDataOntology.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Creates general data ontology
 3 | #'
 4 | #' Creates general data ontology used by all data tables from the concept table. Option to save/load as .rds file.
 5 | #' @param declare TRUE/FALSE will output status and data information during the process
 6 | #' @param store_ontology TRUE/FALSE If TRUE: will attempt to load .rds file from the current outDirectory; will create and save it if it does not exist. If FALSE: will build table.
 7 | #'
 8 | #' @return Returns a ontology table dictionary of concepts contained in the 'concept' table.
 9 | #' @import data.table DBI
10 | #' @export
11 | #'
12 | #' @examples
13 | #' \dontrun{
14 | #' dataOntology <- makeDataOntology(declare=FALSE,store_ontology=TRUE)
15 | #' }
16 | makeDataOntology <- function(declare=FALSE, store_ontology=FALSE) {
17 |   if (declare==TRUE) {message("Retrieving concept data...")}
18 |   create <- TRUE
19 |   found <- FALSE
20 | 
21 |   if (store_ontology==TRUE) {
22 |     if (file.exists(paste0(getOption("outDirectory"),"dataOntology.rds")) ) {
23 |       message("Data Ontology found; loading... ")
24 |       dataOntology = readRDS(paste0(getOption("outDirectory"),"dataOntology.rds"))
25 |       message("Data Ontology loaded from memory successfully. ")
26 |       create <- FALSE
27 |       found <- TRUE
28 |     }else{
29 |       message("Data Ontology file not found in declared out_directory; creating... ")
30 |       found <- FALSE
31 |     }
32 | 
33 |   }
34 | 
35 |   if (create == TRUE) {
36 |     conceptQuery <- "SELECT concept_id, concept_name, domain_id, vocabulary_id, concept_class_id, concept_code FROM concept WHERE (invalid_reason = '' OR invalid_reason IS NULL);"
37 |     dataOntology <- sqlQuery(conceptQuery)
38 |     dataOntology <- data.table(dataOntology)
39 |   }
40 | 
41 |   if (declare==TRUE) {
42 |     message("Concept data loaded; data found for: ")
43 |     message(paste0(length(unique(dataOntology$domain_id)), " unique domains."))
44 |     message(paste0(length(unique(dataOntology$vocabulary_id)), " unique vocabularies."))
45 |     message(paste0(length(unique(dataOntology$concept_class_id)), " unique concept classes."))
46 |   }
47 | 
48 |   if (store_ontology == TRUE & found == FALSE) { # save data ontology
49 |     message(paste0("Storing Data Ontology: ", getOption("outDirectory"),"dataOntology.rds"))
50 |     saveRDS(dataOntology, paste0(getOption("outDirectory"),"dataOntology.rds"))
51 | 
52 |   }
53 | 
54 |   return(dataOntology)
55 | 
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/R/showDataTypes.R:
--------------------------------------------------------------------------------
 1 | #' Shows available data types from the OMOP ontology
 2 | #'
 3 | #' Details relevant vocabularies per ontological domain. Requires dataOntology to have been created (makeDataOntology funciton).
 4 | #' @return Returns a table of vocabularies contained within clinical domains: Condition, Observation, Measurement, Device, Procedure, Drug.
 5 | #' @export
 6 | #'
 7 | #' @examples
 8 | #' showDataTypes()
 9 | #'
10 | showDataTypes <- function() {
11 | 
12 |   if (exists("dataOntology")) { # ensure dataOntology exists
13 | 
14 |     dataTypes = dataOntology[domain_id %in% c("Condition","Observation","Measurement","Device","Procedure","Drug"),c("domain_id", "vocabulary_id")]
15 |     dataTypes = dataTypes[!duplicated(dataTypes)]
16 |     dataTypes = dataTypes[order(domain_id),]
17 | 
18 |     return(dataTypes)
19 | 
20 |   } else { #endif dataOntology exists
21 |     message("Error: dataOntology does not exist. Please first run makeDataOntology.")
22 |   }
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/R/summarizeDemographics.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Summarizes patient demographic data
 3 | #'
 4 | #' Summarizes patient demographic data from the getDemographics function.
 5 | #' @param ptDemo patient demographics table: ptDemo is the patient demographics object from the getDemographics function output.
 6 | #'
 7 | #' @return none (called for side effect: prints table)
 8 | #' @import dplyr data.table
 9 | #' @export
10 | #'
11 | #' @examples
12 | #' \dontrun{
13 | #' summarizeDemographics(ptDemo)
14 | #' }
15 | summarizeDemographics <- function(ptDemo) {
16 | 
17 |   message(paste0("# of patients: ", ptDemo %>% tally()))
18 |   message(paste0("Mean age: ",round(mean(ptDemo$age),3)))
19 |   message(paste0("Median age: ",round(median(ptDemo$age),3)))
20 |   message(paste0("STD age: ",round(sd(ptDemo$age),3)))
21 | 
22 |   # compile Status info
23 |   message("Status breakdown:")
24 |   print(data.table(ptDemo %>% group_by(Status) %>% summarise (n = n()) %>% mutate(proportion = n / sum(n))))
25 | 
26 |   # compile gender info
27 |   message("Gender breakdown:")
28 |   print(data.table(ptDemo %>% group_by(Gender) %>% summarise (n = n()) %>% mutate(proportion = n / sum(n))))
29 | 
30 |   # compile race info
31 |   message("Race breakdown:")
32 |   print(data.table(ptDemo %>% group_by(Race) %>% summarise (n = n()) %>% mutate(proportion = n / sum(n))))
33 | 
34 |   # compile ethnicity info
35 |   message("Ethnicity breakdown:")
36 |   print(data.table(ptDemo %>% group_by(Ethnicity) %>% summarise (n = n()) %>% mutate(proportion = n / sum(n))))
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #############################
  4 | ######### GENERAL ##########
  5 | #############################
  6 | 
  7 | #define standard_concepts
  8 | standard_concepts <- function(){
  9 |   data.table("domain_type"= c("Measurement","Condition","Drug","Observation","Device","Procedure"),"concepts"= c("LOINC,SNOMED,CPT4","SNOMED","RxNorm,CPT4,NDC","SNOMED,CPT4,LOINC,HCPCS","SNOMED,HCPCS","SNOMED,CPT4,HCPCS"))
 10 | }
 11 | 
 12 | ### funciton to indicate which variables to add to dbConnect function based on whether they are defined ###
 13 | setConnectFunction <- function() {
 14 |   env_vars <- c("username", "password", "host", "port") # 'driver' and 'dbname' need to be defined
 15 |   connectString <- 'dbname=Sys.getenv("dbname")'
 16 |   if (Sys.getenv("username") != ""){
 17 |     connectString <- paste0(connectString, ', user=Sys.getenv("username")')
 18 |   }
 19 |   if (Sys.getenv("password") != ""){
 20 |     connectString <- paste0(connectString, ', password=Sys.getenv("password")')
 21 |   }
 22 |   if (Sys.getenv("host") != ""){
 23 |     connectString <- paste0(connectString, ', host=Sys.getenv("host")')
 24 |   }
 25 |   if (Sys.getenv("port") != ""){
 26 |     connectString <- paste0(connectString, ', port= as.integer(Sys.getenv("port"))')
 27 |   }
 28 | 
 29 |   fullConnectString <- paste0('DBI::dbConnect(drv, ', connectString , ')')
 30 | 
 31 |   return(fullConnectString)
 32 | }
 33 | 
 34 | 
 35 | ### general query function ###
 36 | sqlQuery <- function(query) {
 37 | 
 38 |   if (tolower(Sys.getenv("driver"))=="mysql") {
 39 | 
 40 |     # creating connection object
 41 |     drv <- dbDriver("MySQL")
 42 |     fullConnectString <- setConnectFunction()
 43 |     con <- eval(parse(text = fullConnectString))
 44 | 
 45 |     # close db connection after function
 46 |     on.exit(DBI::dbDisconnect(con))
 47 | 
 48 |     # send query
 49 |     res <-DBI::dbSendQuery(con, query)
 50 | 
 51 |     # get elements from results
 52 |     result <- DBI::fetch(res, -1)
 53 | 
 54 |   } else {
 55 | 
 56 |     # creating connection object using DatabaseConnector
 57 |     con <- DatabaseConnector::connect(dbms = tolower(Sys.getenv("driver")),
 58 |                    server = Sys.getenv("host"),
 59 |                    user = Sys.getenv("username"),
 60 |                    password = Sys.getenv("password"),
 61 |                    schema = Sys.getenv("dbname"),
 62 |                    port = Sys.getenv("port"))
 63 | 
 64 |     # close db connection after function
 65 |     on.exit(DatabaseConnector::disconnect(con))
 66 | 
 67 |     # translate query using SqlRender
 68 |     translated_query <- SqlRender::translate(query, targetDialect = tolower(Sys.getenv("driver")))
 69 | 
 70 |     # query using DatabaseConnector function
 71 |     result <- DatabaseConnector::querySql(con, translated_query)
 72 | 
 73 |     # coerce columns to lowercase
 74 |     colnames(result) <- tolower(colnames(result))
 75 |   }
 76 |   return(result)
 77 | }
 78 | 
 79 | #############################
 80 | ######## PROCESSING #########
 81 | #############################
 82 | 
 83 | ### mark any empty Demographics fields as Unknown
 84 | markNAasUnknown <- function(tbl, ColToUse, declare=FALSE) {
 85 | 
 86 |   if (ColToUse %in% colnames(tbl)) {
 87 |     if (any(is.na(tbl[is.na(get(ColToUse))]))) {
 88 |       missing_rows=tbl[is.na(get(ColToUse))]
 89 |       tbl[is.na(get(ColToUse)),eval(ColToUse):="Unknown"]
 90 |     } else { # no NA values in column
 91 |       if (declare==TRUE) {
 92 |         message(paste0("no NA values found for ", ColToUse))
 93 |       }
 94 |     }
 95 | 
 96 |   } else {
 97 |     message(paste0("column ", ColToUse, " not found"))
 98 |   }
 99 | 
100 |   return(tbl)
101 | 
102 | }
103 | 
104 | 
105 | ## check search input parameters
106 | checkParameters <- function(strategy_in, function_in, strategy_out, function_out) {
107 |   pass_requirements = FALSE
108 | 
109 |   if (strategy_in %in% c("direct","mapped") & function_in %in% c("and","or")) {
110 |     if (!is.null(strategy_out) & !is.null(function_out)) {
111 |       if (strategy_out %in% c("direct","mapped") & function_out %in% c("and","or")) {
112 |         pass_requirements <- TRUE
113 |       }
114 |     } else {
115 |       pass_requirements <- TRUE
116 |     }
117 |   }
118 | 
119 |   return(pass_requirements)
120 | 
121 | }
122 | 
123 | 
124 | 
125 | 
126 | ## unpack vocabularies and codes for search function
127 | #' @import data.table
128 | unpackAndMap <- function(vocabularies_input, codes_input) {
129 |   vocabularies_split <- trimws(strsplit(vocabularies_input,",")[[1]])
130 |   codes_split <- trimws(strsplit(codes_input,",")[[1]])
131 | 
132 |   # match to one another
133 |   dataCriteria <- data.table::data.table(vocabularies = vocabularies_split, codes = codes_split)
134 | 
135 |   dataCriteria <- dataCriteria[, list( # unpack codes
136 |     codes = trimws(unlist(strsplit(codes, ";")))),
137 |     by = vocabularies]
138 | 
139 |   # map inclusion criteria to dataOntology
140 |   dataCriteriaMapped <- merge(dataCriteria, dataOntology, by.x= "codes", by.y = "concept_code")
141 |   dataCriteriaMapped <- dataCriteriaMapped[vocabularies==vocabulary_id]
142 | 
143 |   return(dataCriteriaMapped)
144 | 
145 | }
146 | 
147 | # for 'Mapped' straegy; map input concept codes to common ontology
148 | identifySynonyms <- function(codesFormatted) {
149 |   synonymQuery <- paste0('SELECT concept_id_1, concept_id_2, relationship_id, invalid_reason FROM concept_relationship WHERE concept_id_1 IN (',codesFormatted,');')
150 |   synonymData <- sqlQuery(synonymQuery)
151 |   synonymData <- data.table::data.table(synonymData)
152 |   synonymData <- synonymData[invalid_reason == ""]
153 |   synonymData <- synonymData[,-"invalid_reason"]
154 | 
155 |   # check for "Maps to" or "%- RxNorm%" or "%- SNOMED%" | standard concepts
156 |   synonymDataFiltered <- synonymData[(relationship_id == "Maps to") | (grepl("- RxNorm",relationship_id)) | (grepl("- SNOMED",relationship_id)) ]
157 | 
158 |   return(synonymDataFiltered)
159 | 
160 | }
161 | 
162 | # for 'Mapped' straegy; map input concept codes (from common ontology) to common ontology descendants
163 | #' @import data.table
164 | identifyMappings <- function(synonymCodes) {
165 | 
166 |   mappingQuery <- paste0('SELECT ancestor_concept_id, descendant_concept_id FROM concept_ancestor A WHERE A.ancestor_concept_id IN (', synonymCodes,' );')
167 |   mappingData <- sqlQuery(mappingQuery)
168 |   mappingData <- data.table::data.table(mappingData)
169 | 
170 |   mappingDataInfo <- merge(mappingData,dataOntology, by.x = "descendant_concept_id", by.y = "concept_id")
171 | 
172 |   return(mappingDataInfo)
173 | 
174 | }
175 | 
176 | 
177 | # identify tables to seach for concepts of interest (direct strategy)
178 | identifyTablesDirect <- function(criteriaTable) {
179 | 
180 |   searchTable = list()
181 | 
182 |   for(d in unique(standard_concepts()$domain_type)){ # scan through all domain types
183 |     mappingData = criteriaTable[domain_id == d]
184 |     mappingCodes = mappingData[domain_id == d]$concept_id
185 |     searchTable[[d]] <- mappingCodes # compile codes per domain type into one table
186 |   }
187 | 
188 |   return(searchTable)
189 | }
190 | 
191 | 
192 | # identify tables to seach for concepts of interest (mapped strategy)
193 | identifyTablesMapped <- function(mappingDataInfo) {
194 | 
195 |   searchTable = list()
196 | 
197 |   for(d in unique(standard_concepts()$domain_type)) { # scan through all domain types
198 | 
199 |     mappingDataInfoFiltered <- mappingDataInfo[domain_id==d]
200 |     mappingDataInfoFiltered <-  mappingDataInfoFiltered[(grep(gsub(",","|",standard_concepts()[domain_type==d,concepts]),vocabulary_id))] # map to common concepts specifically used to the domain
201 |     mappingCodes <- mappingDataInfoFiltered$concept_id
202 |     searchTable[[d]] <- mappingCodes
203 |   }
204 | 
205 |   return(searchTable)
206 | 
207 | }
208 | 
209 | ### identifyPatients based on function
210 | # function = OR (union)
211 | identifyPatientsOR <- function(pts_condition, pts_observation, pts_measurement, pts_device, pts_drug, pts_procedure) {
212 | 
213 |   patient_list=c()
214 | 
215 |   if (!is.null(pts_condition)) {
216 |     patient_list = union(patient_list, unique(pts_condition$person_id))
217 |   }
218 | 
219 |   if (!is.null(pts_observation)) {
220 |     patient_list = union(patient_list, unique(pts_observation$person_id))
221 |   }
222 | 
223 |   if (!is.null(pts_measurement)) {
224 |     patient_list = union(patient_list, unique(pts_measurement$person_id))
225 |   }
226 | 
227 |   if (!is.null(pts_device)) {
228 |     patient_list = union(patient_list, unique(pts_device$person_id))
229 |   }
230 | 
231 |   if (!is.null(pts_drug)) {
232 |     patient_list = union(patient_list, unique(pts_drug$person_id))
233 |   }
234 | 
235 |   if (!is.null(pts_procedure)) {
236 |     patient_list = union(patient_list, unique(pts_procedure$person_id))
237 |   }
238 | 
239 |   return(patient_list)
240 | 
241 | }
242 | 
243 | # function = AND (intersect)
244 | # To identify overlapping patients, we have to backmap the descendant terms to the original concepts
245 | #' @import data.table
246 | identifyPatientsAND <- function(criteriaMapped, synonymDataFiltered, mappingDataInfo, pts_condition, pts_observation, pts_measurement, pts_device, pts_drug, pts_procedure) {
247 | 
248 |   names(mappingDataInfo)[names(mappingDataInfo) == 'vocabulary_id'] <- 'mapped_vocabulary_id'
249 |   names(mappingDataInfo)[names(mappingDataInfo) == 'concept_name'] <- 'mapped_concept_name'
250 | 
251 |   synonymMapped <- merge(mappingDataInfo[,c("descendant_concept_id","ancestor_concept_id","mapped_vocabulary_id","mapped_concept_name")], synonymDataFiltered[,c("concept_id_1","concept_id_2")], by.x = "ancestor_concept_id", by.y = "concept_id_2", allow.cartesian=TRUE)
252 |   synonymMapped <- synonymMapped[!duplicated(synonymMapped)]
253 | 
254 |   combinedMapped <- merge(synonymMapped, criteriaMapped, by.x = "concept_id_1", by.y = "concept_id", allow.cartesian=TRUE)
255 |   combinedMapped <- combinedMapped[!duplicated(combinedMapped)]
256 | 
257 |   combinedDirect <- merge(mappingDataInfo, criteriaMapped, by.x = "ancestor_concept_id", by.y = "concept_id", allow.cartesian=TRUE)
258 |   combinedDirect <- combinedDirect[!duplicated(combinedDirect)]
259 | 
260 | 
261 |   ### derive patient list by concept_codes
262 |   # create code dictionary per original concept input
263 |   # initializepatient_list
264 | 
265 |   unique_codes <- unique(criteriaMapped$codes)
266 | 
267 |   code_map = list()
268 |   patient_list = list()
269 | 
270 |   for(c in unique_codes) {
271 |     seed_codes = paste(criteriaMapped[codes == c]$concept_id,collapse=",")
272 |     code_map[[c]] <- c(seed_codes) # initialize list with original concept code (i.e. in case of ATC category)
273 |     code_map[[c]] <- c(code_map[[c]], combinedDirect[ancestor_concept_id %in% seed_codes]$descendant_concept_id) # add in direct mapped descendants
274 |     code_map[[c]] <- c(code_map[[c]], combinedMapped[concept_id_1 %in% seed_codes]$descendant_concept_id)  # add in synonym codes and descendants
275 | 
276 |     patient_list[[c]] <- c()
277 |   }
278 | 
279 |   if (!is.null(pts_condition)) { #Condition
280 | 
281 |     condition_codes <- unique(criteriaMapped[domain_id=="Condition"]$codes)
282 | 
283 |     for(c in condition_codes) {
284 |       patient_list[[c]]  <- union(patient_list[[c]], pts_condition[condition_concept_id %in% code_map[[c]]]$person_id)
285 |     }
286 |   }
287 | 
288 |   if (!is.null(pts_observation)) { #Observation
289 |     observation_codes <- unique(criteriaMapped[domain_id=="Observation"]$codes)
290 | 
291 |     for(c in observation_codes) {
292 |       patient_list[[c]]  <- union(patient_list[[c]], pts_observation[observation_concept_id %in% code_map[[c]]]$person_id)
293 |     }
294 |   }
295 | 
296 |   if (!is.null(pts_measurement)) { #Measurement
297 |     measurement_codes <- unique(criteriaMapped[domain_id=="Measurement"]$codes)
298 | 
299 |     for(c in measurement_codes) {
300 |       patient_list[[c]]  <- union(patient_list[[c]], pts_measurement[measurement_concept_id %in% code_map[[c]]]$person_id)
301 |     }
302 |   }
303 | 
304 |   if (!is.null(pts_device)) {#Device
305 |     device_codes <- unique(criteriaMapped[domain_id=="Device"]$codes)
306 | 
307 |     for(c in device_codes) {
308 |       patient_list[[c]]  <- union(patient_list[[c]], pts_device[device_concept_id %in% code_map[[c]]]$person_id)
309 |     }
310 |   }
311 | 
312 |   if (!is.null(pts_drug)) { #Drug
313 |     drug_codes = unique(criteriaMapped[domain_id=="Drug"]$codes)
314 | 
315 |     for(c in drug_codes) {
316 |       patient_list[[c]]  <- union(patient_list[[c]], pts_drug[drug_concept_id %in% code_map[[c]]]$person_id)
317 |     }
318 |   }
319 | 
320 |   if (!is.null(pts_procedure)) {#Procedure
321 |     procedure_codes <- unique(criteriaMapped[domain_id=="Procedure"]$codes)
322 | 
323 |     for(c in procedure_codes) {
324 |       patient_list[[c]]  <- union(patient_list[[c]], pts_procedure[procedure_concept_id %in% code_map[[c]]]$person_id)
325 |     }
326 |   }
327 | 
328 |   # get intersected list
329 |   patient_list_intersected = Reduce(intersect,patient_list)
330 | 
331 |   return(patient_list_intersected)
332 | 
333 | }
334 | 
335 | 
336 | # add counts to search query concepts by unique patients
337 | #' @import dplyr data.table
338 | summarizeFoundConcepts <- function(pts_condition, pts_observation, pts_measurement, pts_device, pts_drug, pts_procedure){
339 | 
340 |   conceptCount <- data.table(matrix(nrow=0,ncol=2))
341 |   colnames(conceptCount) <- c("concept_id","pt_count")
342 | 
343 |   summarizeConcepts <- function(tblname, colname) {
344 |     tbl_concepts <- tblname %>%
345 |       group_by_(colname) %>%
346 |       summarise(COUNT = n())
347 |     tbl_concepts <- data.table(tbl_concepts)
348 |     colnames(tbl_concepts) <-  c("concept_id","pt_count")
349 |     return(tbl_concepts)
350 |   }
351 | 
352 | 
353 |   if (!is.null(pts_condition)) {
354 |     condition_concepts_count <- summarizeConcepts(pts_condition,"condition_concept_id")
355 |     conceptCount <- rbind(conceptCount, condition_concepts_count)
356 |   }
357 | 
358 |   if (!is.null(pts_observation)) {
359 |     observation_concepts_count <- summarizeConcepts(pts_observation,"observation_concept_id")
360 |     conceptCount <- rbind(conceptCount, observation_concepts_count)
361 |   }
362 | 
363 |   if (!is.null(pts_measurement)) {
364 |     measurement_concepts_count <- summarizeConcepts(pts_measurement,"measurement_concept_id")
365 |     conceptCount <- rbind(conceptCount, measurement_concepts_count)
366 |   }
367 | 
368 |   if (!is.null(pts_device)) {
369 |     device_concepts_count <- summarizeConcepts(pts_device,"device_concept_id")
370 |     conceptCount <- rbind(conceptCount, device_concepts_count)
371 |   }
372 | 
373 |   if (!is.null(pts_drug)) {
374 |     drug_concepts_count <- summarizeConcepts(pts_drug,"drug_concept_id")
375 |     conceptCount <- rbind(conceptCount, drug_concepts_count)
376 |   }
377 | 
378 |   if (!is.null(pts_procedure)) {
379 |     procedure_concepts_count <- summarizeConcepts(pts_procedure,"procedure_concept_id")
380 |     conceptCount <- rbind(conceptCount, procedure_concepts_count)
381 |   }
382 | 
383 |   return(conceptCount)
384 | 
385 | }
386 | 
387 | 
388 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
  1 | library(DBI)
  2 | library(data.table)
  3 | 
  4 | 
  5 | ### securely retrieve credentials stored in environment variables
  6 | # ~/.Renviron
  7 | 
  8 | 
  9 | #############################
 10 | ###### INITIALIZATION #######
 11 | #############################
 12 | 
 13 | # check credentials exist
 14 | checkCredentialsExist <- function() {
 15 |   env_vars <- c("driver", "username", "password", "dbname", "host", "port")
 16 | 
 17 |   if (length(intersect(env_vars,names(Sys.getenv()))) == length(env_vars)) {
 18 |       pass <- TRUE
 19 | 
 20 |     # load required drivers
 21 |     if (tolower(Sys.getenv("driver"))=="mysql") {
 22 |       library(RMySQL)
 23 |     } else if (tolower(Sys.getenv("driver")) %in% c("oracle", "postgresql", "redshift", "sql server", "pdw", "bigquery")) {
 24 |       library(DatabaseConnector)
 25 |       library(SqlRender)
 26 |     } else {
 27 |       pass <- FALSE
 28 |       message("Invalid driver type, please select either: 'mysql', 'oracle',  'postgresql', 'redshift', 'sql server', 'pdw', 'bigquery'")
 29 |     }
 30 | 
 31 |   }else{
 32 |     pass <- FALSE
 33 |   }
 34 | 
 35 |   return(pass)
 36 | }
 37 | 
 38 | 
 39 | # check that successful connection can be made to OMOP server
 40 | checkOMOPconnection <- function() {
 41 | 
 42 |   status<- tryCatch(
 43 |     {
 44 |       if (tolower(Sys.getenv("driver"))=="mysql") {
 45 |         drv <- dbDriver(Sys.getenv("driver"))
 46 |         fullConnectString <- setConnectFunction()
 47 |         con <- eval(parse(text = fullConnectString))
 48 |       } else {
 49 |       # creating connection object using DatabaseConnector
 50 |       con <- DatabaseConnector::connect(dbms = tolower(Sys.getenv("driver")),
 51 |                         server = Sys.getenv("host"),
 52 |                         user = Sys.getenv("username"),
 53 |                         password = Sys.getenv("password"),
 54 |                         schema = Sys.getenv("dbname"),
 55 |                         port = Sys.getenv("port"))
 56 |       }
 57 |     },
 58 |     warning = function(w) {
 59 |       # ignore
 60 |     },
 61 |     error = function(e) {
 62 |       message("Unable to establish connection to OMOP server.")
 63 |       message(e)
 64 |     }
 65 |   )
 66 | 
 67 |   if(!is.null(status)){
 68 |     out <- TRUE
 69 |     message("Can successfully connect to OMOP server.")
 70 |   }else{
 71 |     out <- FALSE
 72 |   }
 73 | 
 74 |   if (tolower(Sys.getenv("driver"))=="mysql") {
 75 |     on.exit(dbDisconnect(con))
 76 |   } else {
 77 |     on.exit(DatabaseConnector::disconnect(con))
 78 |   }
 79 |   return(out)
 80 | 
 81 | }
 82 | 
 83 | 
 84 | # check that relevant tables exist in OMOP database
 85 | checkOMOPtables <- function() {
 86 | 
 87 |   necessaryTables = c("concept","concept_ancestor","concept_relationship","condition_occurrence","death","device_exposure","drug_exposure","measurement","observation","person","procedure_occurrence","visit_occurrence")
 88 | 
 89 |   if (tolower(Sys.getenv("driver"))=="mysql") {
 90 |     drv <- dbDriver(Sys.getenv("driver"))
 91 |     fullConnectString <- setConnectFunction()
 92 |     con <- eval(parse(text = fullConnectString))
 93 |   } else {
 94 |     # creating connection object using DatabaseConnector
 95 |     con <- DatabaseConnector::connect(dbms = tolower(Sys.getenv("driver")),
 96 |                    server = Sys.getenv("host"),
 97 |                    user = Sys.getenv("username"),
 98 |                    password = Sys.getenv("password"),
 99 |                    schema = Sys.getenv("dbname"),
100 |                    port = Sys.getenv("port"))
101 |   }
102 | 
103 |   foundTablesData <- tolower(dbListTables(con))
104 | 
105 |   if (tolower(Sys.getenv("driver"))=="mysql") {
106 |     on.exit(dbDisconnect(con))
107 |   } else {
108 |     on.exit(DatabaseConnector::disconnect(con))
109 |   }
110 | 
111 | 
112 |   missingTables <- FALSE
113 | 
114 |   for (tbls in necessaryTables) {
115 |     if (!tbls %in% foundTablesData) { # check if table exists
116 |       missingTables <- TRUE
117 |       message(paste0("missing required table: " , tbls ))
118 |     } else { # check if any data in found table
119 |       if (tolower(Sys.getenv("driver"))=="mysql") {
120 |         dataCheckQuery <- paste0("SELECT * FROM " , tbls , " LIMIT 1;")
121 |       } else {
122 |         dataCheckQuery <- paste0("SELECT TOP 1 * FROM " , tbls, ";")
123 |       }
124 |         dataCheck <- sqlQuery(dataCheckQuery)
125 |       if (nrow(dataCheck)==0) {
126 |         message(paste0("Warning: no data found in table ", tbls))
127 |       }
128 |     }
129 |   }
130 | 
131 |   if (missingTables == FALSE) {
132 |     message("All required tables found!")
133 |     return(TRUE)
134 |   } else {
135 |     return(FALSE)
136 |   }
137 | 
138 | }
139 | 
140 | #############################
141 | ###### INITIALIZATION #######
142 | #############################
143 | 
144 | 
145 | # .onLoad checks
146 | 
147 | .onLoad <- function(...) {
148 |   packageStartupMessage(
149 |     paste0("Welcome to ROMOP: please refer to https://github.com/BenGlicksberg/ROMOP for detailed instructions on how to use package with examples.\n
150 | Current OutDirectory is set to ",getwd(), ". Please use changeOutDirectory function to set.\n e.g., changeOutDirectory('path/to/outdir', create = TRUE) \n
151 | Now checking for required credentials and server connection (note this package will not function without them). Please wait...\n")
152 |    )
153 | 
154 |   ### initialize outDirectory as current working directory
155 |   options("outDirectory" = paste0(getwd(),"/"))
156 | 
157 |   ## Verify crednetials exist
158 |   credentialsExist <- checkCredentialsExist()
159 | 
160 |   if (credentialsExist == TRUE) { # require credentials
161 | 
162 |     ## Verify connection
163 |     successfulConnection <- checkOMOPconnection()
164 | 
165 |     if (successfulConnection == TRUE) { # require successful connection
166 | 
167 |       # check if relevant tables exist
168 |       correctTables <- checkOMOPtables()
169 | 
170 |       if (correctTables == TRUE) { # require correct tables
171 | 
172 |         message("Success! Please create 'dataOntology' using the makeDataOntology function.\n e.g., dataOntology =  makeDataOntology(declare=TRUE,store_ontology = TRUE)")
173 | 
174 |       } else { # end if correct tables
175 |         message("Missing required tables; package will not funciton correctly.")
176 |       }
177 | 
178 |     } else { # end if successful connection
179 |       message("Unable to connect; package will not funciton correctly.")
180 |     }
181 | 
182 |   } else { #endif credentials
183 |     message("Please refer to the ReadMe to set and format server credentials in the .Renviron file.")
184 |   }
185 | }
186 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ROMOP Readme
  2 | ================
  3 | Benjamin S. Glicksberg
  4 | 9/14/2018
  5 | 
  6 | ## ROMOP
  7 | 
  8 | ROMOP is a flexible R package to interface with the [Observational
  9 | Health Data Sciences and Informatics (OHDSI)](https://www.ohdsi.org/)
 10 | [OMOP Common Data Model](https://www.ohdsi.org/data-standardization/).
 11 | Briefly, OMOP is a standardized relational database schema for
 12 | Electronic Health Record (EHR) or Electronic Medical Record (EMR) data
 13 | (i.e., patient data collected during clinical visits to a health
 14 | system). The main benefit of a standardized schema is that it allows for
 15 | interoperability between institutions, even if the underlying EHR
 16 | vendors are disparate.
 17 | 
 18 | For a detailed description of the OMOP common data model, please visit
 19 | this [helpful wiki](https://github.com/OHDSI/CommonDataModel/wiki).
 20 | 
 21 | In its backend, OMOP relies on standardized data ontologies and
 22 | metathesaureses, such as the [Unified Medical Language System
 23 | (UMLS)](https://www.nlm.nih.gov/research/umls/), and as such, the
 24 | queries within ROMOP heavily rely on these vocabularies.
 25 | [Athena](http://athena.ohdsi.org/) is a great tool to better understand
 26 | the concepts in these ontologies and identify ideal search terms of
 27 | interest.
 28 | 
 29 | ![Features of
 30 | ROMOP](www/figure1a_v3.png)
 31 | 
 32 | Manuscript information:  
 33 | Glicksberg BS, Oskotsky B, Giangreco N, Thangaraj PM, Rudrapatna V, Datta D, Frazier R, Lee N, Larsen R, Tatonetti NP, Butte AJ. ROMOP: a light-weight R package for interfacing with OMOP-formatted electronic health record data. JAMIA open. 2019 Apr;2(1):10-4.
 34 | 
 35 | ## Sandbox Server
 36 | 
 37 | The Centers for Medicare and Medicaid Services (CMS) have released a
 38 | synthetic clinical dataset
 39 | [DE-SynPUF](https://www.cms.gov/Research-Statistics-Data-and-Systems/Downloadable-Public-Use-Files/SynPUFs/DE_Syn_PUF.html))
 40 | in the public domain with the aim of being reflective of the patient
 41 | population but containing no protected health information. The OHDSI
 42 | group has underwent the task of converting these data into the [OMOP CDM
 43 | format](https://github.com/OHDSI/ETL-CMS). Users are certainly able to
 44 | set up this configuration on their own system following the instructions
 45 | on the GitHub page. We obtained all data files from the [OHDSI FTP
 46 | server](ftp://ftp.ohdsi.org/synpuf) (accessed June 17th, 2018) and
 47 | created the CDM (DDL and indexes) according to their [official
 48 | instructions](https://github.com/OHDSI/CommonDataModel/tree/master/PostgreSQL),
 49 | but modified for MySQL. For space considerations, we only uploaded one
 50 | million rows of each of the data files. The sandbox server is a Rshiny
 51 | server running as an Elastic Compute Cloud (EC2) instance on Amazon Web
 52 | Services (AWS) querying a MySQL database server (AWS Aurora MySQL).
 53 | 
 54 | ## Requirements
 55 | 
 56 | #### Clinical Data
 57 | 
 58 | ROMOP requires EHR data to be in OMOP format and on a server accessible
 59 | to by the user. In it’s current form, ROMOP can connect to databases in
 60 | *MySQL* using the RMySQL driver or many other formats, including
 61 | *Oracle*, *PostgreSQL*, *Microsoft SQL Server*, *Amazon Redshift*,
 62 | *Google BigQuery*, and *Microsoft Parallel Data Warehouse*, through
 63 | utilization of the DatabaseConnector and SqlRender packages developed by
 64 | the OHDSI group (see below).
 65 | 
 66 | Users without access to EHR data might consider using synthetic public
 67 | data following the instructions provided by the OHDSI group
 68 | [here](https://github.com/OHDSI/ETL-CMS).
 69 | 
 70 | #### Programming Language
 71 | 
 72 | ROMOP is built in the R environment and developed on version 3.4.4
 73 | (2018-03-15).
 74 | 
 75 | ROMOP requires the following R packages:
 76 | 
 77 |   - [DBI](https://cran.r-project.org/web/packages/DBI/index.html)
 78 |     (developed on version
 79 |     1.0.0)
 80 |   - [data.table](https://cran.r-project.org/web/packages/data.table/data.table.pdf)
 81 |     (developed on version 1.10.4-3).
 82 |   - [dplyr](https://dplyr.tidyverse.org/) (developed on version 0.7.4).
 83 | 
 84 | Driver-specific:
 85 | 
 86 |   - [RMySQL](https://cran.r-project.org/web/packages/RMySQL/index.html)
 87 |     (developed on version
 88 |     0.10.14).
 89 |   - [DatabaseConnector](https://cran.r-project.org/web/packages/DatabaseConnector/index.html)
 90 |     (developed on version
 91 |     2.2.0)
 92 |   - [DatabaseConnectorJars](https://cran.r-project.org/web/packages/DatabaseConnectorJars/index.html)
 93 |     (developed on version
 94 |     1.0.0)
 95 |   - [SqlRender](https://cran.r-project.org/web/packages/SqlRender/index.html)
 96 |     (developed on version 1.5.2)
 97 | 
 98 | ## Installation
 99 | 
100 | ### Download
101 | 
102 | ROMOP can be installed easily from github using the
103 | [devtools](https://cran.r-project.org/web/packages/devtools/index.html)
104 | package:
105 | 
106 |     library(devtools)
107 |     install_github("BenGlicksberg/ROMOP")
108 | 
109 | Alternatively, the package can be downloaded directly from the [github
110 | page](https://github.com/BenGlicksberg/ROMOP) and installed by the
111 | following steps:
112 | 
113 | 1.  Unzip ROMOP-master.zip
114 | 2.  R CMD INSTALL ROMOP-master
115 | 
116 | Please see the [Setup](#setup) section to properly configure the package
117 | to work.
118 | 
119 |  
120 | 
121 | ### Setup
122 | 
123 | #### Credentials
124 | 
125 | In accordance with best practices for storing sensitive information,
126 | credentials are not saved in plain text but in the .Renviron file. A
127 | formatted .Renviron file is provided with the package with the following
128 | fields to fill in:
129 | 
130 | ``` 
131 | driver = ""
132 | host = ""
133 | username = ""
134 | password = ""
135 | dbname = ""
136 | port = "3306" 
137 | ```
138 | 
139 |   - driver (case insensitive): “mysql” for MySQL or (according to [OHDSI
140 |     DatabaseConnector
141 |     package](https://github.com/OHDSI/DatabaseConnector)) “postgresql”
142 |     for PostgreSQL, “oracle” for Oracle, “sql server” for Microsoft SQL
143 |     Server, “redshift” for Amazon Redshift, “pdw” for Microsoft Parallel
144 |     Data Warehouse, or “bigquery” for Google BigQuery.  
145 |   - host (or server depending on database format)  
146 |   - dbname: OMOP EHR database name (or schema depending on database
147 |     format)
148 | 
149 | Note that this .Renviron file has to be in the same directory where R is
150 | launched. If already using an .Renviron file, add this information to
151 | it.
152 | 
153 | #### Checks
154 | 
155 | With credentials correctly configured, the package can be loaded. ROMOP
156 | will now check for 3 conditions to be met:
157 | 
158 | 1.  Check that the credentials exist and can be retrieved from .Renviron
159 |     file:  
160 |     *requires driver, host, username, password, dbname, and port exist*
161 | 
162 | 2.  Check that connection to OMOP EHR server and database can be made:  
163 |     *uses the above credentails*
164 | 
165 | 3.  Check to ensure all required OMOP tables exist and contain (any)
166 |     data:  
167 |     *the required tables
168 |     are:*  
169 | 
170 | <!-- end list -->
171 | 
172 |     "concept","concept_ancestor","concept_relationship","condition_occurrence","death",
173 |     "device_exposure","drug_exposure","measurement","observation","person","procedure_occurrence","visit_occurrence"
174 | 
175 |   - if any of the above tables are missing, a warning message will be
176 |     produced and the package will not be able to load properly.
177 |   - if any of the above tables exist, but do not contain any data, a
178 |     warning message will be produced but the package will still be able
179 |     to function.
180 | 
181 | #### On start
182 | 
183 | Successfully pasing all checks will allow the user to begin using ROMOP.
184 | 
185 | 1.  Set an output directory to use with the
186 |     [changeOutDirectory](#changeoutdirectory) function (note: the
187 |     default output directory will be declared on package load).  
188 | 2.  Create/load the Data ontology (required to decode data types) using
189 |     the [makeDataOntology](#makedataontology). For the first time
190 |     running this package, the concept ontology will have to first be
191 |     built, but if the store\_ontology option is selected, the ontology
192 |     will be saved as an .rds file for subsequent loading.
193 | 
194 |  
195 | 
196 | ## Functions
197 | 
198 | ### Utility
199 | 
200 | #### getDemographics
201 | 
202 | *Description*:  Retrieves and formats patient demographic data from the
203 | **person** and **death** tables. Option to restrict to patientlist of
204 | interest.
205 | 
206 | *Usage*:  ptDemo \<- getDemographics(patient\_list=NULL,declare=TRUE)
207 | 
208 | *Arguments*:
209 | 
210 |   patient\_list         *comma-separated string of patient ids*  
211 |          a provdied patientlist will restrict search to ids. NULL will
212 | return demographic data for all available patients
213 | 
214 |   declare         *TRUE/FALSE*  
215 |          if TRUE, outputs status and updates to the screen
216 | 
217 | *Value*:
218 | 
219 |   Returns a data.table with demographic data: person\_id,
220 | birth\_datetime, age, Gender, Race, Ethnicity, death\_date, Status
221 | (Alive/Deceased)
222 | 
223 | *Details*:
224 | 
225 |   - patient\_list should be in the following format: “patient\_id\_1,
226 |     patient\_id\_2, …”
227 | 
228 |  
229 | 
230 | #### getEncounters
231 | 
232 | *Description*:  Retrieves and formats patient encounter data from the
233 | **visit\_occurrence** table. Requires patientlist input.
234 | 
235 | *Usage*:  ptEncs \<- getEncounters(patient\_list,declare=TRUE)
236 | 
237 | *Arguments*:
238 | 
239 |   patient\_list         *comma-separated string of patient ids*  
240 |          searches for all encounter data for the patientlist inout.
241 | 
242 |   declare         *TRUE/FALSE*  
243 |          if TRUE, outputs status and updates to the screen
244 | 
245 | *Value*:
246 | 
247 |   Returns a data.table with encounter data: person\_id,
248 | visit\_occurrence\_id, visit\_start\_datetime, visit\_end\_datetime,
249 | visit\_source\_value, visit\_concept, visit\_source\_concept,
250 | admitting\_concept, discharge\_concept
251 | 
252 | *Details*:
253 | 
254 |   - patient\_list should be in the following format: “patient\_id\_1,
255 |     patient\_id\_2, …”
256 | 
257 |  
258 | 
259 | #### getClinicalData
260 | 
261 | *Description*:  Retrieves all relevant clinical data for individuals in
262 | a patientlist. Wrapper for domain-specific getData functions (which can
263 | also be used separately).
264 | 
265 | *Usage*:  ptClinicalData \<- getClinicalData(patient\_list,
266 | declare=TRUE)
267 | 
268 | *Arguments*:
269 | 
270 |   patient\_list         *comma-separated string of patient ids*  
271 |          a provdied patientlist will restrict search to ids. NULL will
272 | return demographic data for all available patients
273 | 
274 |   declare         *TRUE/FALSE*  
275 |          if TRUE, outputs status and updates to the screen
276 | 
277 | *Value*:  
278 |   Returns a list of data.tables stratified by domain type (e.g.,
279 | ptClinicalData$Condition, ptClinicalData$Observation, etc…)
280 | 
281 | *Details*:
282 | 
283 |   - patient\_list should be in the following format: “patient\_id\_1,
284 |     patient\_id\_2, …”  
285 |   - getClinicalData calls domain-specific getData functions for the
286 |     following domains: Observation, Condition, Procedure, Medication
287 |     (Drug), Measurement, and Device. Each function can also be run
288 |     individually (e.g, getConditions; getMedications).  
289 |   - In addition to datetimes, visit\_occurrence\_ids,
290 |     <domain>\_concept\_ids and <domain>\_source\_concept\_ids, other
291 |     domain-specific concepts and values are retrieved and mapped:
292 |       - Observation: observation\_type\_concept, value\_as\_number,
293 |         value\_as\_string, value\_as\_concept, unit\_source\_value  
294 |       - Condition: condition\_type\_concept, condition\_status  
295 |       - Procedure: procedure\_type\_concept, quantity,  
296 |       - Medication: drug\_type\_concept, stop\_reason, refills,
297 |         quantity, days\_supply, sig, route\_concept,
298 |         effective\_drug\_dose, dose\_unit\_concept,
299 |         route\_source\_value, frequency, frequency\_unit,
300 |         rx\_quantity\_unit\_source\_value  
301 |       - Measurement: measurement\_type\_concept, value\_as\_number,
302 |         value\_as\_concept, unit\_concept
303 |       - Device: device\_type\_concept
304 | 
305 |  
306 | 
307 | #### findPatients
308 | 
309 | *Description*:  Main function to identify patients based on clinical
310 | data inclusion (and exclusion, if desired) criteria. Flexible to allow
311 | for multiple data types, vocabularies, and concepts.
312 | 
313 | *Usage*:   patientlist \<- findPatients(strategy\_in=“mapped”,
314 | vocabulary\_in, codes\_in, function\_in = “or”, strategy\_out = NULL,
315 | vocabulary\_out = NULL, codes\_out = NULL, function\_out = NULL,
316 | declare=FALSE, save=FALSE, out\_name=NULL)
317 | 
318 | *Arguments*:
319 | 
320 |   strategy\_in         *mapped* or *direct*  
321 |          dictates the strategy for how inclusion criteria are treated
322 | (see Details).
323 | 
324 |   vocabulary\_in         *vocabularies for inclusion criteria*  
325 |          comma-separated string of relevant vocabularies for inclusion
326 | criteria (see Details).
327 | 
328 |   codes\_in         *specific concept codes for inclusion criteria*  
329 |          semi-colon separated string of code concepts for inclusion
330 | criteria, corresponding to the order for vocabulary\_in. Multiple codes
331 | can be used per vocabulary and should be comma-separated (see Details).
332 | 
333 |   function\_in         *and* or *or*  
334 |          dictates how multiple inclusion should be treated. *and*
335 | necessitates that all inclusion criteria are met (i.e., intersection),
336 | while *or* allows for any critera to be met (i.e., union) (see Details).
337 | 
338 |   strategy\_out         *mapped* or *direct* or NULL (default)  
339 |          dictates the strategy for how exclusion are treated. NULL
340 | indicates no exclusion criteria.
341 | 
342 |   vocabulary\_out         *vocabularies for exclusion criteria* or NULL
343 | (default)  
344 |          comma-separated string of relevant vocabularies for exclusion
345 | criteria. NULL indicates no exclusion criteria.
346 | 
347 |   codes\_out         *specific concept codes for exclusion criteria* or
348 | NULL (default)  
349 |          semi-colon separated string of code concepts for inclusion
350 | criteria, corresponding to the order for vocabulary\_out. Multiple codes
351 | can be used per vocabulary and should be comma-separated. NULL indicates
352 | no exclusion criteria.
353 | 
354 |   function\_out         *and* or *or* or NULL  
355 |          dictates how multiple exclusion should be treated. *and*
356 | necessitates that all exclusion criteria are met (i.e., intersection),
357 | while *or* allows for any critera to be met (i.e., union). NULL
358 | indicates no exclusion criteria.
359 | 
360 |   declare         *TRUE/FALSE*  
361 |          if TRUE, outputs status and updates to the screen.
362 | 
363 |   save         *TRUE/FALSE*  
364 |          if TRUE, various query output saved to outDirectory (see
365 | Details).
366 | 
367 |   out\_name         *name assigned to search query* or NULL  
368 |          if save == TRUE, saves query using provided name. If the
369 | provided name already exists as a directory (or is NULL), the directory
370 | defaults to datetime name (see Details).
371 | 
372 | *Value*:  
373 |   Returns a list of patients that meet inclusion criteria (and not
374 | exclusion criteria if entered).
375 | 
376 | *Details*:
377 | 
378 |   - *direct* strategy queries the concepts directly by \_source\_concept
379 |     in clinical tables. *mapped* maps to common ontology (via
380 |     **concept\_synonym**) and identifies relevant descendants (via
381 |     **concept\_ancestor**) to search for in \_concept fields.
382 |   - the [exploreConcepts](#exploreconcepts) function can be used to find
383 |     ideal concepts to search for.  
384 |   - vocabulary\_ input for multiple inputs should use relevant
385 |     vocabularies (see [showDataTypes](#showdatatypes) ) as a
386 |     comma-separated string, e.g., “ATC, ICD10CM, SNOMED”.
387 |   - codes\_ input correspond to the order as the vocabulary\_ input and
388 |     should be semi-comma separated string in the same order as above.
389 |     Multiple terms per vocabulary type should be comma-separated. e.g.,
390 |     “A01A; K50, K51; 235599003” correspond to “A01A” for ATC, “K50”
391 |     and “K51” for ICD10CM, and “235599003” for SNOMED.  
392 |   - function\_ corresponds to how criteria should be treated. *and*
393 |     necessitates patients meet all criteria while *or* allows for
394 |     patients to meet any of the criteria.
395 |   - Please note that if no standard common concepts are found per search
396 |     domain, a warning message will appear and the search will not be
397 |     able to be performed (see [Helpful Hints](#helpful-hints) for more
398 |     details.)
399 |   - if save == TRUE, the following information is saved in a directory
400 |     per query:
401 |       - query: all arguments for the search.
402 |       - \_criteria\_mapped: all original criteria for inclusion (and
403 |         exclusion if applicable) that are mapped to dataOntology.
404 |       - criteria\_mapped\_concepts: all mapped concepts used for
405 |         inclusion (and exclusion if applicable) that are used to search
406 |         in clinical data tables. Additionally, the pt\_count column
407 |         displays the number of unique patients that have a record with
408 |         the corresponding concept.  
409 |       - outcome: results of the search (most relevant when exclusion
410 |         criteria are applied).  
411 |       - patient\_list: list of patients that meet inclusion (and not
412 |         exclusion, if applicable) criteria.
413 | 
414 |  
415 | 
416 | ### Misc.
417 | 
418 | #### changeOutDirectory
419 | 
420 | *Description*:   Sets the current outDirectory which will store the Data
421 | Ontology and all function output. Option to create directory if does not
422 | exist.
423 | 
424 | *Usage*:  changeOutDirectory(outdir=“path/to/directory”, create=FALSE)
425 | 
426 | *Arguments*:  
427 |   outdir         directory path
428 | 
429 |   create         *TRUE/FALSE*  
430 |          will create the directory if it does not exist
431 | 
432 | *Value*:  
433 |    Nothing returned; simply sets (and creates if set to) output
434 | directory
435 | 
436 | *Details*:
437 | 
438 |   - If directory does not exist and create=FALSE, a warning message will
439 |     appear and the output directory will not be changed.
440 | 
441 |  
442 | 
443 | #### makeDataOntology
444 | 
445 | *Description*:  Creates general Data Ontology used by all data tables
446 | from the **concept** table. Option to save/load.
447 | 
448 | *Usage*:  dataOntology \<-
449 | makeDataOntology(declare=TRUE,store\_ontology=FALSE)
450 | 
451 | *Arguments*:  
452 |   declare         *TRUE/FALSE*  
453 |          if TRUE, outputs status and updates to the screen
454 | 
455 |   store\_ontology         *TRUE/FALSE*  
456 |          if TRUE, will save/load the ontology instead of active querying
457 | 
458 | *Value*:  
459 |   Returns a data.table with concept data.
460 | 
461 | *Details*:
462 | 
463 |   - Generating the Data Ontology takes ~31.2 secs and is ~491.6 Mb.  
464 |   - If declare == TRUE, the following information will be returned:
465 | 
466 | <!-- end list -->
467 | 
468 |     Retrieving concept data...
469 |     Concept data loaded; data found for: 
470 |     ## unique domains.
471 |     ## unique vocabularies.
472 |     ### unique concept classes.
473 | 
474 |   - If store\_ontology == TRUE, attempts to load from memory (in the
475 |     outDirectory) and saves if does not exist (~53 Mb). Loading takes ~8
476 |     secs.
477 | 
478 |  
479 | 
480 | #### summarizeDemographics
481 | 
482 | *Description*:  Summarizes patient demographic data from the
483 | [getDemographics](#getdemographics) function.
484 | 
485 | *Usage*:  summarizeDemographics(ptDemo)
486 | 
487 | *Arguments*:
488 | 
489 |   ptDemo         *patient demographics table*  
490 |          ptDemo is the patient demographics object from the
491 | getDemographics function output
492 | 
493 | *Value*:
494 | 
495 |   N/A; outputs message with descriptive summary statistics for the
496 | relevant patient demographic data.
497 | 
498 |  
499 | 
500 | #### showDataTypes
501 | 
502 | *Description*:  Details relevant vocabularies per domain. Requires
503 | dataOntology to have been created (via
504 | [makeDataOntology](#makedataontology)).
505 | 
506 | *Usage*:  showDataTypes()
507 | 
508 | *Arguments*:
509 | 
510 | N/A
511 | 
512 | *Value*:
513 | 
514 |   Returns a table of vocabularies contained within clinical domains:
515 | Condition, Observation, Measurement, Device, Procedure, Drug.
516 | 
517 |  
518 | 
519 | #### exploreConcepts
520 | 
521 | *Description*:  For given vocabulary and concept, returns the mapped
522 | standard concept(s) as well as decendent concept(s)
523 | 
524 | *Usage*:  conceptsInfo \<- exploreConcepts(vocabulary, codes)
525 | 
526 | *Arguments*:
527 | 
528 |   vocabulary         *vocabulary*  
529 |          comma-separated string of relevant vocabularies for inclusion
530 | criteria (see Details).
531 | 
532 |   codes         *concept codes*  
533 |          semi-colon separated string of code concepts for inclusion
534 | criteria, corresponding to the order for vocabulary. Multiple codes can
535 | be used per vocabulary and should be comma-separated (see Details).
536 | 
537 | *Value*:
538 | 
539 |   Returns a table of concepts contained under (i.e., below in the
540 | heirarchy) the query concept.
541 | 
542 | *Details*:
543 | 
544 |   - vocabulary input for multiple inputs should use relevant
545 |     vocabularies (see [showDataTypes](#showdatatypes) ) as a
546 |     comma-separated string, e.g., “ATC, ICD10CM”.
547 |   - codes input correspond to the order as the vocabulary input and
548 |     should be semi-comma separated string in the same order as above.
549 |     Multiple terms per vocabulary type should be comma-separated. e.g.,
550 |     “A01A; K50, K51” correspond to “A01A” for ATC and “K50” and “K51”
551 |     for ICD10CM.
552 | 
553 |  
554 | 
555 | ## Examples
556 | 
557 | Both simple and advanced [findPatients](#findpatients) queries will be
558 | outlined. See the [Output](#output) section for description of output if
559 | save == TRUE. For the process timing provided, all queries were run on
560 | an Amazon Elastic Compute Cloud (EC2) instance.
561 | 
562 | ### Simple
563 | 
564 | 1.  Disease category (ICD10CM): find all “Type 2 Diabetes Mellitus”
565 |     patients (E11)
566 | 
567 | Here we will set a single inclusion criterion. The inclusion vocbulary
568 | is set to *ICD10CM* and the inclusion code is *E11* corresponding to the
569 | vocabulary. Because the inclusion strategy is set as “mapped”, ROMOP
570 | will map the ICD10CM code to a common ontology (SNOMED) term and find
571 | all descendants to search for (see [Code Breakdown](#code-breakdown) for
572 | details on how this
573 | works).
574 | 
575 | *query*
576 | 
577 | ``` 
578 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ICD10CM", codes_in = "E11")  
579 | ```
580 | 
581 | *time*: 15.3 secs
582 | 
583 |  
584 | 
585 | 2.  Specific disease (ICD9CM): find all patients with “Diabetes with
586 |     ketoacidosis, type I \[juvenile type\], not stated as uncontrolled”
587 |     **only** (250.11)
588 | 
589 | Here we will search for patients that have the specific *ICD9CM* code
590 | *250.11* **only**, i.e., not map to common ontology (see [Code
591 | Breakdown](#code-breakdown) for the importance of this
592 | distiction).
593 | 
594 | *query*
595 | 
596 | ``` 
597 | patient_list = findPatients(strategy_in="direct", vocabulary_in = "ICD9CM", codes_in = "250.11")  
598 | ```
599 | 
600 | *time*: 1.1 min
601 | 
602 |  
603 | 
604 | 3.  Multiple diseases (ICD10CM): find all patients with “Essential
605 |     (primary) hypertension” (I10) **and** “Angina pectoris with
606 |     documented spasm” (I20.1)
607 | 
608 | Here we will search for patients that have the multiple ICD10CM codes.
609 | While we put a single inclusion vocabulary, we will put two inclusion
610 | codes separated by a comma. Also we set the inclusion function to “and”
611 | which requires **both** criteria to be
612 | met.
613 | 
614 | *query*
615 | 
616 | ``` 
617 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ICD10CM", codes_in = "I10, I20.1", function_in = "and")  
618 | ```
619 | 
620 | *time*: 23.8 secs
621 | 
622 |  
623 | 
624 | 4.  Drug class (ATC): find all patients prescribed with any “Serotonin
625 |     receptor antagonists” (A03AE)
626 | 
627 | Here we will search for patients by drug ATC code. As the inclusion
628 | strategy is set to “mapped”, all drugs that fall into this category will
629 | automatically be identified and searched for (see [Code
630 | Breakdown](#code-breakdown) for details on how this
631 | works).
632 | 
633 | *query*
634 | 
635 | ``` 
636 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ATC", codes_in = "A03AE")  
637 | ```
638 | 
639 | *time*: 1.1 secs
640 | 
641 |  
642 | 
643 | 5.  Disease category (ICD10CM) but not Drug (MeSH): find all patients
644 |     with “Other anxiety disorders” (F31), but *not* prescribed with
645 |     “Clonazepam” (D002998)
646 | 
647 | Here we will search for patients by ICD10CM code as before. We also
648 | identify all patients prescribed with the MeSH term for “Clonazepam”,
649 | which will be removed from the original
650 | list.
651 | 
652 | *query*
653 | 
654 | ``` 
655 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ICD10CM", codes_in = "F41", strategy_out="mapped", vocabulary_out = "MeSH", codes_out = "D002998", function_out = "and")  
656 | ```
657 | 
658 | *time*: 16.5 secs
659 | 
660 |  
661 | 
662 | ### Advanced
663 | 
664 | 1.  Multiple disease categories (ICD10CM) and lab test (LOINC) but not
665 |     multiple disease categories (ICD10CM) nor drug class (RxNorm): find
666 |     all patients with “Crohn’s disease” (F31) and “Malignant neoplasm of
667 |     prostate” (C61) with “CBC W Auto Differential panel - Blood”
668 |     (57021-8), but *not* “Gastroenteritis and colitis due to radiation”
669 |     (K52.0) nor “Allergic and dietetic gastroenteritis and colitis”
670 |     (K52.2) nor prescribed with any “Aminosalicylate” (113374)
671 | 
672 | Here we will search for patients by ICD10CM code as before. We also
673 | identify all patients prescribed with the MeSH term for “Clonazepam”,
674 | which will be removed from the original list.
675 | 
676 | *query*
677 | 
678 | ``` 
679 | vocabulary_in = "ICD10CM, LOINC"
680 | codes_in = "K50;C61, 57021-8" 
681 | vocabulary_out = "ICD10CM, RxNorm"
682 | codes_out = "K52.0; K52.2,  113374" 
683 | 
684 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = vocabulary_in, codes_in = codes_in, function_in = "and", strategy_out="mapped", vocabulary_out = vocabulary_out, codes_out = codes_out, function_out = "or")  
685 | ```
686 | 
687 | *time*: 5.9 mins
688 | 
689 |  
690 | 
691 | ## Output
692 | 
693 | All output is saved in the output directory (use
694 | [changeOutDirectory](#changeoutdirectory) to set). Additionally, the
695 | data ontology file will be loaded from here and saved if set to using
696 | the makeDataOntology\](\#makedataontology) function.
697 | 
698 | If save==TRUE is selected for [findPatients](#findPatients) queries,
699 | various information will be saved in a created query-specific directory
700 | within the outDirectory:  
701 | \+ query: all arguments for the search. + \_criteria\_mapped: all
702 | original criteria for inclusion (and exclusion if applicable) that are
703 | mapped to dataOntology. + criteria\_mapped\_concepts: all mapped
704 | concepts used for inclusion (and exclusion if applicable) that are used
705 | to search in clinical data tables. Additionally, the pt\_count column
706 | displays the number of unique patients that have a record with the
707 | corresponding concept.  
708 | \+ outcome: results of the search (most relevant when exclusion criteria
709 | are applied).  
710 | \+ patient\_list: list of patients that meet inclusion (and not
711 | exclusion, if applicable) criteria.
712 | 
713 | We will detail the respective output files that are derived from Simple
714 | [Examples](#examples) \#5:
715 | 
716 | ### query.txt
717 | 
718 |     cat query.txt
719 | 
720 |     inclusion strategy: mapped
721 |     inclusion vocabularies: ICD10CM
722 |     inclusion codes: F41
723 |     inclusion function: or
724 |     exclusion strategy: mapped
725 |     exclusion vocabularies: MeSH
726 |     exclusion codes: D002998
727 |     exclusion function: and
728 | 
729 | ### inclusion\_criteria\_mapped.txt
730 | 
731 | ``` 
732 |  cat inclusion_criteria_mapped.txt
733 | ```
734 | 
735 |     codes   vocabularies    concept_id  concept_name    domain_id   vocabulary_id   concept_class_id
736 |     F41 ICD10CM 1568230 Other anxiety disorders Condition   ICD10CM 3-char nonbill code
737 | 
738 | ### inclusion\_criteria\_mapped\_concepts.txt
739 | 
740 | ``` 
741 |  head inclusion_criteria_mapped_concepts.txt
742 | ```
743 | 
744 |     descendant_concept_id   ancestor_concept_id concept_name    domain_id   vocabulary_id   concept_class_id    concept_code    pt_count
745 |     381537  442077  Organic anxiety disorder    Condition   SNOMED  Clinical Finding    17496003    NA
746 |     432600  442077  Stress reaction causing mixed disturbance of emotion and conduct    Condition   SNOMED  Clinical Finding    192044009   NA
747 |     433178  442077  Anxiety disorder of childhood OR adolescence    Condition   SNOMED  Clinical Finding    109006  NA
748 |     434613  442077  Generalized anxiety disorder    Condition   SNOMED  Clinical Finding    21897009    NA
749 |     434628  442077  Separation anxiety  Condition   SNOMED  Clinical Finding    126943008   NA
750 |     436074  442077  Panic disorder  Condition   SNOMED  Clinical Finding    371631005   NA
751 |     436390  442077  Psychogenic rumination  Condition   SNOMED  Clinical Finding    192014006   NA
752 |     436676  442077  Posttraumatic stress disorder   Condition   SNOMED  Clinical Finding    47505003    NA
753 |     437537  442077  Shyness disorder of childhood   Condition   SNOMED  Clinical Finding    83253003    NA
754 | 
755 | ### exclusion\_criteria\_mapped.txt
756 | 
757 | ``` 
758 |  cat exclusion_criteria_mapped.txt
759 | ```
760 | 
761 |     codes   vocabularies    concept_id  concept_name    domain_id   vocabulary_id   concept_class_id
762 |     D002998 MeSH    45612901    Clonazepam  Drug    MeSH    Main Heading
763 | 
764 | ### exclusion\_criteria\_mapped\_concepts.txt
765 | 
766 | ``` 
767 |  head exclusion_criteria_mapped_concepts.txt
768 | ```
769 | 
770 |     descendant_concept_id   ancestor_concept_id concept_name    domain_id   vocabulary_id   concept_class_id    concept_code    pt_count
771 |     798874  798874  Clonazepam  Drug    RxNorm  Ingredient  2598    NA
772 |     798875  798874  Clonazepam 0.5 MG Oral Tablet   Drug    RxNorm  Clinical Drug   197527  NA
773 |     798876  798874  Clonazepam 1 MG Oral Tablet Drug    RxNorm  Clinical Drug   197528  NA
774 |     798877  798874  Clonazepam 2 MG Oral Tablet Drug    RxNorm  Clinical Drug   197529  NA
775 |     798893  798874  Clonazepam 0.125 MG Oral Tablet [Klonopin]  Drug    RxNorm  Branded Drug    211761  NA
776 |     798894  798874  Clonazepam 0.25 MG Oral Tablet [Klonopin]   Drug    RxNorm  Branded Drug    211762  NA
777 |     798896  798874  Clonazepam 1 MG/ML Injectable Solution  Drug    RxNorm  Clinical Drug   249943  NA
778 |     798897  798874  Clonazepam 0.5 MG   Drug    RxNorm  Clinical Drug Comp  315699  NA
779 |     798899  798874  Clonazepam 2 MG Drug    RxNorm  Clinical Drug Comp  317336  NA
780 | 
781 | ### outcome.txt
782 | 
783 | ``` 
784 |  cat outcome.txt
785 | ```
786 | 
787 |     # patients found from the inclusion criteria ONLY.
788 |     # patients found from the exclusion criteria ONLY.
789 |     # overlapping patients excluded from the original inclusion input based on the exclusion criteria.
790 |     # patients found that meet the inclusion and exclusion criteria.
791 | 
792 | ### patient\_list.txt
793 | 
794 | ``` 
795 |  head patient_list.txt 
796 | ```
797 | 
798 |     patient_list
799 |     1
800 |     2
801 |     3
802 | 
803 | ## Code Breakdown
804 | 
805 | ![Workflow of ROMOP
806 | functionality](www/figure1b_v3.png)
807 | 
808 | ROMOP first requires the creation a data dictionary (using
809 | [makeDataOntology](#makedataontology) function) of the ontology (from
810 | *concept* table) that is referenced and utilized to map to all concepts
811 | for all functions. Using this ontology, all searches and extractions are
812 | optimized to only query tables in which the data could be found.
813 | 
814 | ### Data Retrieval
815 | 
816 | The majority of data in clinical tables are stored as concepts. When
817 | data is extracted, ROMOP first maps the relevant concepts (e.g.,
818 | device\_type\_concept\_id) to the data dictionary and then returns the
819 | mapped concepts to the user.
820 | 
821 | ### Searching
822 | 
823 | In the OMOP data structure, there is a distinction between how concepts
824 | are recorded and what can be directly searched for. For instance, if the
825 | user is interested in the medication idelalisib, it is not possible to
826 | directly identify records by searching for the general concept (e.g.,
827 | RxNorm code 1544460) as the data are recorded by the bottom-most (i.e.,
828 | most specific) concepts of the hierarchy (e.g., idelalisib 150 MG
829 | Delayed Release Oral Tablet). The hierarchical structure of these
830 | concepts in the OMOP CDM back-end, however, facilitates more powerful
831 | searches. In most extracted EHR systems, the user has to define all
832 | medications to search, for instance through a pre-populated list or by
833 | wildcard string matching (e.g., all drug names LIKE “%statin%”). This
834 | strategy is ultimately not ideal as it is not extensible to other
835 | systems (e.g., one system might prescribe a version or formulation of a
836 | drug that is in not in another) and requires extensive manual
837 | quality-control (e.g., removing “nystatin” drugs from the string
838 | matching results). For the [findPatients](#findpatients) function, if
839 | the “mapped” option is selected, searching for a broad code like ATC
840 | level 3 code A05A (bile therapies), or even a specific term code like
841 | RxNorm code 1544460 for idelalisib, will automatically identify and
842 | query for all bottom-level (e.g., idelalisib 150 MG Delayed Release Oral
843 | Tablet) codes contained underneath that seed concept. This works by
844 | ROMOP first mapping the initial search criteria to a standard concept
845 | (SNOMED or RxNorm) and finding all descendants underneath it. Another
846 | benefit to this “mapped” option is that terms are not reliant on how the
847 | data were originally entered. For instance, if a health system switches
848 | from ICD-9CM to ICD-10CM coding, there might be discrepancies in
849 | prevalence of codes over time. Mapping to a common concept, however,
850 | often alleviates this issue as codes from both vocabularies are
851 | typically linked to a common code in the standard vocabulary. Of course
852 | the user can search for the concepts they entered only using the
853 | “direct” option (i.e., search for ICD-9CM code 230.0 only).
854 | 
855 | ## Helpful Hints
856 | 
857 |   - We recommend using the *mapped* argument for the
858 |     [findPatients](#findpatients) function because the concepts will not
859 |     depend on by which format the data was entered (i.e., the
860 |     *source\_concept*). This is important as diffierent institutions may
861 |     utilize different underlying terminologies, as well as switch
862 |     primary data entry vocabularies over time (i.e., the switch from
863 |     ICD-9 to ICD-10). For example, if the user is interested in
864 |     “Trigeminal neuralgia”, using the ICD-10 code “G50.1” with the
865 |     *direct* argument, all prior entries that utilized the corresponding
866 |     ICD-9 code (“350.1”) most likely will not be found as many data
867 |     warehouses do not “back-map” codes. Using the *mapped* argument will
868 |     bypass this issue as the standard concept will be used which should
869 |     capture both options.
870 |   - Standard vocabularies: while the OMOP common data model utilizes
871 |     many ontologies, **SNOMED** and **RxNorm** are used primarily for
872 |     common concepts in the clincal data tables. As such, while any
873 |     vocabulary can be used for [findPatients](#findpatients), the
874 |     *mapped* function will only be able to find data contained within
875 |     the following common concepts per domain:
876 | 
877 | <!-- end list -->
878 | 
879 |     ##   domain_type                concepts
880 |     ## 1 Measurement       LOINC,SNOMED,CPT4
881 |     ## 2   Condition                  SNOMED
882 |     ## 3        Drug         RxNorm,CPT4,NDC
883 |     ## 4 Observation SNOMED,CPT4,LOINC,HCPCS
884 |     ## 5      Device            SNOMED,HCPCS
885 |     ## 6   Procedure       SNOMED,CPT4,HCPCS
886 | 
887 | Consequently, if inclusion/exclusion criteria can be be mapped to the
888 | data ontology, but no synonym/descendants are contained within the above
889 | common concepts, no search will be performed (as no patients would be
890 | returned). This most directly affects searching for *Drug* concepts, in
891 | which we reccommend not using standard common concepts (e.g., RxNorm,
892 | ATC) for search criteria.
893 | 
894 |   - To ensure complete capture of data concepts of interest, we
895 |     recommend identifying multiple vocabulary/codes to use using the
896 |     [Athena](http://athena.ohdsi.org/search-terms/terms) resource. For
897 |     instance, if interested in finding all individuals taking a
898 |     Benzodiazepine, consider using both the relevant ATC classes (e.g.,
899 |     N03AE) as well as the relevant Substance (SNOMED) codes (e.g.,
900 |     16047007). The [exploreConcepts](#exploreconcepts) function can be
901 |     used to identify and prioiritize which codes are optimal to use.
902 | 
903 | ## License
904 | 
905 | MIT License
906 | 
907 | Copyright (c) 2018 Benjamin S. Glicksberg
908 | 
909 | Permission is hereby granted, free of charge, to any person obtaining a
910 | copy of this software and associated documentation files (the
911 | “Software”), to deal in the Software without restriction, including
912 | without limitation the rights to use, copy, modify, merge, publish,
913 | distribute, sublicense, and/or sell copies of the Software, and to
914 | permit persons to whom the Software is furnished to do so, subject to
915 | the following conditions:
916 | 
917 | The above copyright notice and this permission notice shall be included
918 | in all copies or substantial portions of the Software.
919 | 
920 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
921 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
922 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
923 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
924 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
925 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
926 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
927 | 
928 | ## Contact
929 | 
930 | For questions, comments, errors, bug reports, or issues, please contact:
931 | <benjamin.glicksberg@ucsf.edu>  
932 | For general correspondance, please contact: <atul.butte@ucsf.edu>
933 | 


--------------------------------------------------------------------------------
/ROMOP.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/man/changeOutDirectory.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/changeOutDirectory.R
 3 | \name{changeOutDirectory}
 4 | \alias{changeOutDirectory}
 5 | \title{Change outDirectory}
 6 | \usage{
 7 | changeOutDirectory(outdir, create = FALSE)
 8 | }
 9 | \arguments{
10 | \item{outdir}{directory path}
11 | 
12 | \item{create}{TRUE/FALSE (will create the directory if it does not exist)}
13 | }
14 | \value{
15 | none (called for side effect: sets outDirectory)
16 | }
17 | \description{
18 | Sets the current outDirectory which will store the Data Ontology and all function output. Option to create directory if does not exist.
19 | }
20 | \examples{
21 | changeOutDirectory(outdir=“.”, create=FALSE)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/exploreConcepts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/exploreConcepts.R
 3 | \name{exploreConcepts}
 4 | \alias{exploreConcepts}
 5 | \title{Extract synonyms and descendants for concepts of interest.}
 6 | \usage{
 7 | exploreConcepts(vocabulary, codes)
 8 | }
 9 | \arguments{
10 | \item{vocabulary}{Comma-separated string of relevant vocabularies for inclusion criteria}
11 | 
12 | \item{codes}{Semi-colon separated string of code concepts for inclusion criteria, corresponding to the order for vocabulary. Multiple codes can be used per vocabulary and should be comma-separated.}
13 | }
14 | \value{
15 | Returns a table of concepts contained under (i.e., below in the heirarchy) the query concept.
16 | }
17 | \description{
18 | For given vocabulary and concept, returns the mapped standard concept(s) as well as decendent concept(s). Requires dataOntology to have been created (makeDataOntology funciton).
19 | }
20 | \examples{
21 | conceptsInfo <- exploreConcepts(vocabulary = “ATC, ICD10CM”, codes = “A01A; K50, K51”)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/findPatients.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/findPatients.R
 3 | \name{findPatients}
 4 | \alias{findPatients}
 5 | \title{Find patients based on clinical critera}
 6 | \usage{
 7 | findPatients(strategy_in = "mapped", vocabulary_in, codes_in,
 8 |   function_in = "or", strategy_out = NULL, vocabulary_out = NULL,
 9 |   codes_out = NULL, function_out = NULL, declare = FALSE, save = FALSE,
10 |   out_name = NULL)
11 | }
12 | \arguments{
13 | \item{strategy_in}{"mapped" or "direct" (dictates the strategy for how inclusion criteria are treated. "direct" searches for codes as provided, "mapped" maps criteria to standard concepts and finds descendants.}
14 | 
15 | \item{vocabulary_in}{vocabularies for inclusion criteria (comma-separated string of vocabularies)}
16 | 
17 | \item{codes_in}{specific concept codes for inclusion criteria (semi-colon separated string of code concepts, corresponding to the order for vocabulary_in. Multiple codes can be used per vocabulary and should be comma-separated.)}
18 | 
19 | \item{function_in}{"and" or "or" (dictates how multiple inclusion should be treated. "and" necessitates that all inclusion criteria are met (i.e., intersection), while "or" allows for any critera to be met (i.e., union) )}
20 | 
21 | \item{strategy_out}{"mapped", "direct", or NULL (default) (dictates the strategy for how exclusion are treated. NULL indicates no exclusion criteria.)}
22 | 
23 | \item{vocabulary_out}{vocabularies for exclusion criteria or NULL (default) (comma-separated string of relevant vocabularies for exclusion criteria. NULL indicates no exclusion criteria)}
24 | 
25 | \item{codes_out}{specific concept codes for exclusion criteria or NULL (default) (semi-colon separated string of code concepts for inclusion criteria, corresponding to the order for vocabulary_out. Multiple codes can be used per vocabulary and should be comma-separated. NULL indicates no exclusion criteria.)}
26 | 
27 | \item{function_out}{"and", "or", or NULL (default) (dictates how multiple exclusion should be treated. and necessitates that all exclusion criteria are met (i.e., intersection), while or allows for any critera to be met (i.e., union). NULL indicates no exclusion criteria. )}
28 | 
29 | \item{declare}{TRUE/FALSE will output status and data information during the process}
30 | 
31 | \item{save}{TRUE/FALSE whether intermediate components of the search should be saved (e.g., mapped concepts found with unique patient counts per concept).}
32 | 
33 | \item{out_name}{name assigned to search query or NULL (if save = TRUE, saves query using provided name. If the provided name already exists as a directory (or is NULL), the directory defaults to datetime name)}
34 | }
35 | \value{
36 | List of patients that meet inclusion criteria (and not exclusion criteria if entered).
37 | }
38 | \description{
39 | Identify patients based on clinical data inclusion (and exclusion, if desired) criteria. Flexible to allow for multiple data types, vocabularies, and concepts.
40 | }
41 | \examples{
42 | patient_list = findPatients(strategy_in="mapped", vocabulary_in = "ICD10CM", codes_in = "F41", strategy_out="mapped", vocabulary_out = "MeSH", codes_out = "D002998", function_out = "and")
43 | }
44 | 


--------------------------------------------------------------------------------
/man/getClinicalData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getClinicalData.R
 3 | \name{getClinicalData}
 4 | \alias{getClinicalData}
 5 | \title{Retrieves all patient clinical data}
 6 | \usage{
 7 | getClinicalData(patient_list, declare = FALSE)
 8 | }
 9 | \arguments{
10 | \item{patient_list}{Comma-separated string of patient ids}
11 | 
12 | \item{declare}{TRUE/FALSE will output status and data information during the process}
13 | }
14 | \value{
15 | a list of tables for each all data within each domain (e.g., Condition) for all patients provided (can access by ptClinicalData$Condition).
16 | }
17 | \description{
18 | Wrapper for domain-specific getData functions (e.g., getObservations). Produces a list of tables for all relevant domains.
19 | }
20 | \examples{
21 | ptClinicalData <- getClinicalData("1,2", declare=TRUE)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/getConditions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getClinicalData.R
 3 | \name{getConditions}
 4 | \alias{getConditions}
 5 | \title{Retrieves all patient clinical data from Condition table}
 6 | \usage{
 7 | getConditions(patient_list, declare = FALSE)
 8 | }
 9 | \arguments{
10 | \item{patient_list}{Comma-separated string of patient ids}
11 | 
12 | \item{declare}{TRUE/FALSE will output status and data information during the process}
13 | }
14 | \value{
15 | a table of relevant clinical data contained with in the 'condition_occurrence' table
16 | }
17 | \description{
18 | Produces a table for relevant concepts contained in the 'condition_occurrence' table mapped through the data ontology for a patient list. Data retrieved include: condition_type, condition_status, etc.
19 | }
20 | \examples{
21 | ptCondData <- getConditions("1,2", declare=TRUE)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/getDemographics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getDemographics.R
 3 | \name{getDemographics}
 4 | \alias{getDemographics}
 5 | \title{Retrieves patient demographic data}
 6 | \usage{
 7 | getDemographics(patient_list = NULL, declare = FALSE)
 8 | }
 9 | \arguments{
10 | \item{patient_list}{NULL or comma-separated string of patient ids. A provdied patient_list will restrict search to ids. NULL will return demographic data for all available patients.}
11 | 
12 | \item{declare}{TRUE/FALSE will output status and data information during the process}
13 | }
14 | \value{
15 | table of mapped demographic concepts for all patients or specific ones for a provided csv formmated string of ids
16 | }
17 | \description{
18 | Compiles demographic data for all patients or a given patient list if provided. Concepts are mapped through the created data ontology. Demographic data are retrieved from 'person' and 'death' tables and include: birthdate, deathdate, gender, ethnicity, and race.
19 | }
20 | \examples{
21 | ptDemo <- getDemographics(patient_list=NULL,declare=TRUE)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/getDevices.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getClinicalData.R
 3 | \name{getDevices}
 4 | \alias{getDevices}
 5 | \title{Retrieves all patient clinical data from Device table}
 6 | \usage{
 7 | getDevices(patient_list, declare = FALSE)
 8 | }
 9 | \arguments{
10 | \item{patient_list}{Comma-separated string of patient ids}
11 | 
12 | \item{declare}{TRUE/FALSE will output status and data information during the process}
13 | }
14 | \value{
15 | a table of relevant clinical data contained with in the 'device_exposure' table
16 | }
17 | \description{
18 | Produces a table for relevant concepts contained in the 'device_exposure' table mapped through the data ontology for a patient list. Data retrieved include: device_type, etc.
19 | }
20 | \examples{
21 | ptDeviceData <- getDevices("1,2", declare=TRUE)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/getEncounters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getEncounters.R
 3 | \name{getEncounters}
 4 | \alias{getEncounters}
 5 | \title{Retrieves patient clinical encounter data}
 6 | \usage{
 7 | getEncounters(patient_list, declare = FALSE)
 8 | }
 9 | \arguments{
10 | \item{patient_list}{Comma-separated string of patient ids}
11 | 
12 | \item{declare}{TRUE/FALSE will output status and data information during the process}
13 | }
14 | \value{
15 | table of mapped encounter concepts for specific patients contained in a provided csv formmated string of ids.
16 | }
17 | \description{
18 | Compiles encounter data for a given patient list. Concepts are mapped through the created data ontology. Encounter data are retrieved from visit_occurence table and include: visit_type, encounter_type, etc.
19 | }
20 | \examples{
21 | ptEncs <- getEncounters("1,2,3,4",declare=TRUE)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/getMeasurements.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getClinicalData.R
 3 | \name{getMeasurements}
 4 | \alias{getMeasurements}
 5 | \title{Retrieves all patient clinical data from Measurement table}
 6 | \usage{
 7 | getMeasurements(patient_list, declare = FALSE)
 8 | }
 9 | \arguments{
10 | \item{patient_list}{Comma-separated string of patient ids}
11 | 
12 | \item{declare}{TRUE/FALSE will output status and data information during the process}
13 | }
14 | \value{
15 | a table of relevant clinical data contained with in the 'measurement' table
16 | }
17 | \description{
18 | Produces a table for relevant concepts contained in the 'measurement' table mapped through the data ontology for a patient list. Data retrieved include: measurement_type, value, unit, etc.
19 | }
20 | \examples{
21 | ptMeasData <- getMeasurements("1,2", declare=TRUE)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/getMedications.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getClinicalData.R
 3 | \name{getMedications}
 4 | \alias{getMedications}
 5 | \title{Retrieves all patient clinical data from Medications table}
 6 | \usage{
 7 | getMedications(patient_list, declare = FALSE)
 8 | }
 9 | \arguments{
10 | \item{patient_list}{Comma-separated string of patient ids}
11 | 
12 | \item{declare}{TRUE/FALSE will output status and data information during the process}
13 | }
14 | \value{
15 | a table of relevant clinical data contained with in the 'drug_exposure' table
16 | }
17 | \description{
18 | Produces a table for relevant concepts contained in the 'drug_exposure' table mapped through the data ontology for a patient list. Data retrieved include: drug_type, route, etc.
19 | }
20 | \examples{
21 | ptsMedsData <- getMedications("1,2", declare=TRUE)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/getObservations.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getClinicalData.R
 3 | \name{getObservations}
 4 | \alias{getObservations}
 5 | \title{Retrieves all patient clinical data from Observations table}
 6 | \usage{
 7 | getObservations(patient_list, declare = FALSE)
 8 | }
 9 | \arguments{
10 | \item{patient_list}{Comma-separated string of patient ids}
11 | 
12 | \item{declare}{TRUE/FALSE will output status and data information during the process}
13 | }
14 | \value{
15 | a table of relevant clinical data contained with in the 'observation' table
16 | }
17 | \description{
18 | Produces a table for relevant concepts contained in the 'observation' table mapped through the data ontology for a patient list. Data retrieved include: observation_type, value, etc.
19 | }
20 | \examples{
21 | ptObsData <- getObservations("1,2", declare=TRUE)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/getProcedures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/getClinicalData.R
 3 | \name{getProcedures}
 4 | \alias{getProcedures}
 5 | \title{Retrieves all patient clinical data from Procedures table}
 6 | \usage{
 7 | getProcedures(patient_list, declare = FALSE)
 8 | }
 9 | \arguments{
10 | \item{patient_list}{Comma-separated string of patient ids}
11 | 
12 | \item{declare}{TRUE/FALSE will output status and data information during the process}
13 | }
14 | \value{
15 | a table of relevant clinical data contained with in the 'procedure_occurrence' table
16 | }
17 | \description{
18 | Produces a table for relevant concepts contained in the 'procedure_occurrence' table mapped through the data ontology for a patient list. Data retrieved include: procedure_type, etc.
19 | }
20 | \examples{
21 | ptProcData <- getProcedures("1,2", declare=TRUE)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/makeDataOntology.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/makeDataOntology.R
 3 | \name{makeDataOntology}
 4 | \alias{makeDataOntology}
 5 | \title{Creates general data ontology}
 6 | \usage{
 7 | makeDataOntology(declare = FALSE, store_ontology = FALSE)
 8 | }
 9 | \arguments{
10 | \item{declare}{TRUE/FALSE will output status and data information during the process}
11 | 
12 | \item{store_ontology}{TRUE/FALSE If TRUE: will attempt to load .rds file from the current outDirectory; will create and save it if it does not exist. If FALSE: will build table.}
13 | }
14 | \value{
15 | Returns a ontology table dictionary of concepts contained in the 'concept' table.
16 | }
17 | \description{
18 | Creates general data ontology used by all data tables from the concept table. Option to save/load as .rds file.
19 | }
20 | \examples{
21 | \dontrun{
22 | dataOntology <- makeDataOntology(declare=FALSE,store_ontology=TRUE)
23 | }
24 | }
25 | 


--------------------------------------------------------------------------------
/man/showDataTypes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/showDataTypes.R
 3 | \name{showDataTypes}
 4 | \alias{showDataTypes}
 5 | \title{Shows available data types from the OMOP ontology}
 6 | \usage{
 7 | showDataTypes()
 8 | }
 9 | \value{
10 | Returns a table of vocabularies contained within clinical domains: Condition, Observation, Measurement, Device, Procedure, Drug.
11 | }
12 | \description{
13 | Details relevant vocabularies per ontological domain. Requires dataOntology to have been created (makeDataOntology funciton).
14 | }
15 | \examples{
16 | showDataTypes()
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/man/summarizeDemographics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summarizeDemographics.R
 3 | \name{summarizeDemographics}
 4 | \alias{summarizeDemographics}
 5 | \title{Summarizes patient demographic data}
 6 | \usage{
 7 | summarizeDemographics(ptDemo)
 8 | }
 9 | \arguments{
10 | \item{ptDemo}{patient demographics table: ptDemo is the patient demographics object from the getDemographics function output.}
11 | }
12 | \value{
13 | none (called for side effect: prints table)
14 | }
15 | \description{
16 | Summarizes patient demographic data from the getDemographics function.
17 | }
18 | \examples{
19 | \dontrun{
20 | summarizeDemographics(ptDemo)
21 | }
22 | }
23 | 


--------------------------------------------------------------------------------
/www/figure1a_v3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BenGlicksberg/ROMOP/dc37ea9adeaea7a3ba330e9b909f70d198673a5f/www/figure1a_v3.png


--------------------------------------------------------------------------------
/www/figure1b_v3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BenGlicksberg/ROMOP/dc37ea9adeaea7a3ba330e9b909f70d198673a5f/www/figure1b_v3.png


--------------------------------------------------------------------------------