├── .gitignore ├── Comp_test.R ├── Connect_Oracle.R ├── Connect_PostgreSQL.R ├── Connect_SQLServer.R ├── DQTBL_omop_v5.csv ├── DQTBL_pcornet_v3.csv ├── DQTBL_pcornet_v31.csv ├── LICENSE.md ├── README.Rmd ├── README.html ├── RUN.R ├── Report.Rmd ├── _config.yml ├── dmrun.R ├── dmtest_pcornet3.R ├── example ├── .Rapp.history └── reports │ ├── DM_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv │ ├── FRQ_comp_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv │ ├── load_details_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv │ ├── mstabs │ └── DQ_Master_Table_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv │ ├── tablelist_PCORnet_v3_DQe-cDemoPCORnet-14-01-2017.csv │ └── withouts_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv ├── freq.R ├── funcs_pcornet3.R ├── illustration ├── commonvariables.gif ├── fig2-1.png ├── fig2-2.png ├── fig2-3.png ├── fig3.png ├── fig4-1.png ├── fig4-2.png ├── fig5.png ├── fig6.png ├── netwizfig2.gif ├── table1.png └── workflow3.1.jpg ├── keys.R ├── libs.R ├── prep.R ├── reports ├── .DS_Store └── mstabs │ └── .keep ├── sqljdbc4.jar └── without.R /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | DQe-c.Rproj 6 | README.nb.html 7 | DQ_Master_Table_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv -------------------------------------------------------------------------------- /Comp_test.R: -------------------------------------------------------------------------------- 1 | 2 | ########################################### 3 | ############ COMPLETENESS ANALYSIS ######## 4 | ###################++++++################## 5 | ##This scripts counts and stores frequency of missing values 6 | 7 | if (SQL == "SQLServer") { 8 | ############################################################################# 9 | ##loop 3: goes through all columns in all tables and count rows with a NULL/NA value or empty string 10 | ## and store in DQTBL table as a new column, called MS1_FRQ, for each row 11 | ############################################################################# 12 | 13 | 14 | for (j in 1: length(unique(DQTBL$TabNam))) 15 | ##DQTBL$TabNam has all table names 16 | { 17 | NAM <- unique(DQTBL$TabNam)[j] 18 | ##extracted name of table j in CDM 19 | NAM_Repo <- as.character(tbls2[(tbls2$CDM_Tables == NAM),"Repo_Tables"]) 20 | 21 | # L <- as.numeric(tbls2[(tbls2$CDM_Tables == NAM),"NCols"]) 22 | id.NAM <- which(DQTBL$TabNam == NAM) 23 | id.repotabs <- which(repotabs$TABLE_NAME == NAM_Repo) 24 | ##extracting the row numbers 25 | NAMTB <- DQTBL[id.NAM,] 26 | REPOTB <- repotabs[id.repotabs,] 27 | ##subsetting the DQTBL and repository table to only the rows from table j 28 | ##saving the name of table j as characters 29 | 30 | for (i in 1:dim(REPOTB)[1]) 31 | ##now going through the columns of table j 32 | { 33 | col <- REPOTB$COLUMN_NAME[i] 34 | MS1_FRQ <- as.numeric(dbGetQuery(conn, paste0("SELECT COUNT('", col,"') FROM ",schema,NAM_Repo," WHERE [", col, "] IS NULL OR CAST(", col, " AS CHAR(54)) IN ('')"))) 35 | ##calculated length (number of total rows) of each column from each table 36 | DQTBL$MS1_FRQ <- ifelse(DQTBL$ColNam == tolower(col) & DQTBL$TabNam == NAM, MS1_FRQ, DQTBL$MS1_FRQ ) 37 | ##stored frequency in the culumn FRQ 38 | } 39 | } 40 | 41 | 42 | 43 | ############################################################################# 44 | ##loop 4: goes through all columns in all tables and count rows with a + - _ # $ * \ ? . , & ^ % ! @ flag, 45 | # meaning that there is nothing in the cell, but also not marked as NULL/NA 46 | ## and store in DQTBL table as a new column, called MS2_FRQ, for each row 47 | ############################################################################# 48 | 49 | 50 | for (j in 1: length(unique(DQTBL$TabNam))) 51 | ##DQTBL$TabNam has all table names 52 | { 53 | NAM <- unique(DQTBL$TabNam)[j] 54 | ##extracted name of table j in CDM 55 | NAM_Repo <- as.character(tbls2[(tbls2$CDM_Tables == NAM),"Repo_Tables"]) 56 | 57 | # L <- as.numeric(tbls2[(tbls2$CDM_Tables == NAM),"NCols"]) 58 | id.NAM <- which(DQTBL$TabNam == NAM) 59 | id.repotabs <- which(repotabs$TABLE_NAME == NAM_Repo) 60 | ##extracting the row numbers 61 | NAMTB <- DQTBL[id.NAM,] 62 | REPOTB <- repotabs[id.repotabs,] 63 | ##subsetting the DQTBL and repository table to only the rows from table j 64 | ##saving the name of table j as characters 65 | 66 | for (i in 1:dim(REPOTB)[1]) 67 | ##now going through the columns of table j 68 | { 69 | col <- REPOTB$COLUMN_NAME[i] 70 | MS2_FRQ <- as.numeric(dbGetQuery(conn, paste0("SELECT COUNT('", col,"') FROM ",schema,NAM_Repo," WHERE CAST(", col, " AS CHAR(54)) IN ('+', '-', '_','#', '$', '*', '\', '?', '.', '&', '^', '%', '!', '@','NI')"))) 71 | ##calculated length (number of total rows) of each column from each table 72 | DQTBL$MS2_FRQ <- ifelse(DQTBL$ColNam == tolower(col) & DQTBL$TabNam == NAM, MS2_FRQ, DQTBL$MS2_FRQ ) 73 | ##stored frequency in the culumn FRQ 74 | } 75 | } 76 | 77 | } else if (SQL == "Oracle") { 78 | 79 | ############################################################################# 80 | ##loop 3: goes through all columns in all tables and count rows with a NULL/NA value or empty string 81 | ## and store in DQTBL table as a new column, called MS1_FRQ, for each row 82 | ############################################################################# 83 | 84 | 85 | for (j in 1: length(unique(DQTBL$TabNam))) 86 | ##DQTBL$TabNam has all table names 87 | { 88 | NAM <- unique(DQTBL$TabNam)[j] 89 | ##extracted name of table j in CDM 90 | NAM_Repo <- as.character(tbls2[(tbls2$CDM_Tables == NAM),"Repo_Tables"]) 91 | 92 | # L <- as.numeric(tbls2[(tbls2$CDM_Tables == NAM),"NCols"]) 93 | id.NAM <- which(DQTBL$TabNam == NAM) 94 | id.repotabs <- which(repotabs$TABLE_NAME == NAM_Repo) 95 | ##extracting the row numbers 96 | NAMTB <- DQTBL[id.NAM,] 97 | REPOTB <- repotabs[id.repotabs,] 98 | ##subsetting the DQTBL and repository table to only the rows from table j 99 | ##saving the name of table j as characters 100 | 101 | for (i in 1:dim(REPOTB)[1]) 102 | ##now going through the columns of table j 103 | { 104 | col <- REPOTB$COLUMN_NAME[i] 105 | MS1_FRQ <- as.numeric(dbGetQuery(conn, paste0("SELECT COUNT('", col,"') FROM ",schema,NAM_Repo," WHERE ", col, " IS NULL OR TO_CHAR(", col, ") IN ('')"))) 106 | ##calculated length (number of total rows) of each column from each table 107 | DQTBL$MS1_FRQ <- ifelse(DQTBL$ColNam == tolower(col) & DQTBL$TabNam == NAM, MS1_FRQ, DQTBL$MS1_FRQ ) 108 | ##stored frequency in the culumn FRQ 109 | } 110 | } 111 | 112 | 113 | 114 | ############################################################################# 115 | ##loop 4: goes through all columns in all tables and count rows with a + - _ # $ * \ ? . , & ^ % ! @ flag, 116 | # meaning that there is nothing in the cell, but also not marked as NULL/NA 117 | ## and store in DQTBL table as a new column, called MS2_FRQ, for each row 118 | ############################################################################# 119 | 120 | 121 | for (j in 1: length(unique(DQTBL$TabNam))) 122 | ##DQTBL$TabNam has all table names 123 | { 124 | NAM <- unique(DQTBL$TabNam)[j] 125 | ##extracted name of table j in CDM 126 | NAM_Repo <- as.character(tbls2[(tbls2$CDM_Tables == NAM),"Repo_Tables"]) 127 | 128 | # L <- as.numeric(tbls2[(tbls2$CDM_Tables == NAM),"NCols"]) 129 | id.NAM <- which(DQTBL$TabNam == NAM) 130 | id.repotabs <- which(repotabs$TABLE_NAME == NAM_Repo) 131 | ##extracting the row numbers 132 | NAMTB <- DQTBL[id.NAM,] 133 | REPOTB <- repotabs[id.repotabs,] 134 | ##subsetting the DQTBL and repository table to only the rows from table j 135 | ##saving the name of table j as characters 136 | 137 | for (i in 1:dim(REPOTB)[1]) 138 | ##now going through the columns of table j 139 | { 140 | col <- REPOTB$COLUMN_NAME[i] 141 | MS2_FRQ <- as.numeric(dbGetQuery(conn, paste0("SELECT COUNT('", col,"') FROM ",schema,NAM_Repo," WHERE TO_CHAR(",col,") IN ('+', '-', '_','#', '$', '*', '\', '?', '.', '&', '^', '%', '!', '@','NI')"))) 142 | ##calculated length (number of total rows) of each column from each table 143 | DQTBL$MS2_FRQ <- ifelse(DQTBL$ColNam == tolower(col) & DQTBL$TabNam == NAM, MS2_FRQ, DQTBL$MS2_FRQ ) 144 | ##stored frequency in the culumn FRQ 145 | } 146 | } 147 | 148 | } 149 | ############# lets see what is going on with the providers table... 150 | # providchars <- dbGetQuery(conn, "SELECT providerid FROM dbo.pmndiagnosis WHERE CAST(providerid AS CHAR(54)) IN ('+', '-', '_','#', '$', '*', '\', '?', '.', '&', '^', '%', '!', '@')") 151 | # unique(providchars) 152 | ### everything is an @ !!!!!!!!!!!! 153 | 154 | DQTBL$FRQ <- as.numeric(DQTBL$FRQ) 155 | DQTBL$MS1_FRQ <- as.numeric(DQTBL$MS1_FRQ) 156 | DQTBL$MS2_FRQ <- as.numeric(DQTBL$MS2_FRQ) 157 | 158 | ##calculating percent missing compared to the entire rows in each column/table 159 | DQTBL$MSs_PERC <- round((DQTBL$MS1_FRQ+DQTBL$MS2_FRQ)/DQTBL$FRQ,2) 160 | ##saving the master DQ table 161 | write.csv(DQTBL, file = paste("reports/mstabs/DQ_Master_Table_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 162 | 163 | ##saving a copy for aggregated analysis, if the aggregated analysis add-on is installed. 164 | 165 | #set the PATH below to aggregatted analysis directory 166 | # write.csv(DQTBL, file = paste("PATH/DQ_Master_Table_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 167 | 168 | 169 | 170 | 171 | 172 | ##### Creating FRQ_comp table to compare frequencies from MSDQ table over time. 173 | path = "reports/mstabs" 174 | msnames <- list.files(path) 175 | n <- length(msnames) 176 | 177 | ##reading and storing master DQ tables 178 | compr <- list() 179 | N <- length(msnames) 180 | for (n in 1:N) { 181 | compr[[n]] = data.frame(read.csv(paste0(path,"/",msnames[n],sep=""))) 182 | } 183 | 184 | #binding the tables together to create a masters table 185 | if (CDM %in% c("PCORNET3","PCORNET31")) { 186 | FRQ_comp <- subset(rbindlist(compr), (ColNam == "patid" & TabNam == "demographic") | 187 | (ColNam == "dispensingid" & TabNam == "dispensing") | 188 | (ColNam == "vitalid" & TabNam == "vital") | 189 | (ColNam == "conditionid" & TabNam == "condition") | 190 | (ColNam == "pro_cm_id" & TabNam == "pro_cm") | 191 | (ColNam == "encounterid" & TabNam == "encounter") | 192 | (ColNam == "diagnosisid" & TabNam == "diagnosis") | 193 | (ColNam == "proceduresid" & TabNam == "procedures") | 194 | # (ColNam == "providerid" & TabNam == "encounter") | 195 | (ColNam == "prescribingid" & TabNam == "prescribing") | 196 | (ColNam == "trialid" & TabNam == "pcornet_trial") | 197 | (ColNam == "networkid" & TabNam == "harvest") 198 | ) 199 | } 200 | 201 | 202 | write.csv(FRQ_comp, file = paste("reports/FRQ_comp_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 203 | 204 | 205 | -------------------------------------------------------------------------------- /Connect_Oracle.R: -------------------------------------------------------------------------------- 1 | ######################################################### 2 | ############################################################################ 3 | ####### This script connects your R to your Oracle database. ## 4 | 5 | 6 | # read username and password 7 | source("keys.R") 8 | 9 | # set up connection 10 | path01 <- getwd() 11 | drv <- JDBC(driverClass="oracle.jdbc.OracleDriver", 12 | classPath= paste0(path01,"/ojdbc6.jar"), 13 | identifier.quote="`") 14 | # creating a connection object by calling dbConnect 15 | conn <- dbConnect(drv, 16 | "jdbc:oracle:thin:@//database.hostname.com:port/service_name_or_sid", 17 | usrnm, 18 | pss) 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /Connect_PostgreSQL.R: -------------------------------------------------------------------------------- 1 | ######################################################### 2 | ############################################################################ 3 | ####### This script connects your R to your PostgreSQL database. ## 4 | 5 | 6 | # read username and password 7 | source("keys.R") 8 | 9 | # set up connection 10 | drv <- dbDriver("PostgreSQL") 11 | con <- dbConnect(drv, 12 | dbname = "TYPE YOUR DATABASE NAME HERE", 13 | host = "TYPE YOUR HOST NAME HERE", 14 | port = ????, 15 | user = usrnm, 16 | password = pss 17 | ) 18 | rm(pss) 19 | 20 | ####### 21 | ######## If you don't know your data base name, host, or port, contact your server admin. 22 | ####### 23 | 24 | 25 | -------------------------------------------------------------------------------- /Connect_SQLServer.R: -------------------------------------------------------------------------------- 1 | ######################################################### 2 | ############################################################################ 3 | ####### This script connects your R to your SQL Server database. ## 4 | 5 | 6 | # read username and password 7 | source("keys.R") 8 | 9 | # set up connection 10 | path01 <- getwd() 11 | drv <- JDBC("com.microsoft.sqlserver.jdbc.SQLServerDriver", paste0(path01,"/sqljdbc4.jar"), 12 | identifier.quote="`") 13 | # creating a connection object by calling dbConnect 14 | conn <- dbConnect(drv, 15 | "jdbc:sqlserver://DATABASE ADDRESS;databaseName=DATABASE NAME", 16 | usrnm, 17 | pss) 18 | rm(pss) 19 | 20 | ####### 21 | ######## If you don't know your data base name and address, contact your server admin. 22 | ####### -------------------------------------------------------------------------------- /DQTBL_omop_v5.csv: -------------------------------------------------------------------------------- 1 | TabNam,ColNam,DQLVL,abbr,cat 2 | concept,concept_id,H,cnpt,vocabulary 3 | concept,concept_name,H,cnpt,vocabulary 4 | concept,domain_id,H,cnpt,vocabulary 5 | concept,vocabulary_id,H,cnpt,vocabulary 6 | concept,concept_class_id,H,cnpt,vocabulary 7 | concept,standard_concept,L,cnpt,vocabulary 8 | concept,concept_code,H,cnpt,vocabulary 9 | concept,valid_start_date,H,cnpt,vocabulary 10 | concept,valid_end_date,H,cnpt,vocabulary 11 | concept,invalid_reason,L,cnpt,vocabulary 12 | vocabulary,vocabulary_id,H,vocb,vocabulary 13 | vocabulary,vocabulary_name,H,vocb,vocabulary 14 | vocabulary,vocabulary_reference,H,vocb,vocabulary 15 | vocabulary,vocabulary_version,H,vocb,vocabulary 16 | vocabulary,vocabulary_concept_id,H,vocb,vocabulary 17 | domain,domain_id,H,domn,vocabulary 18 | domain,domain_name,H,domn,vocabulary 19 | domain,domain_concept_id,H,domn,vocabulary 20 | concept_class,concept_class_id,H,cncl,vocabulary 21 | concept_class,concept_class_name,H,cncl,vocabulary 22 | concept_class,concept_class_concept_id,H,cncl,vocabulary 23 | concept_relationship,concept_id_1,H,cnrl,vocabulary 24 | concept_relationship,concept_id_2,H,cnrl,vocabulary 25 | concept_relationship,relationship_id,H,cnrl,vocabulary 26 | concept_relationship,valid_start_date,H,cnrl,vocabulary 27 | concept_relationship,valid_end_date,H,cnrl,vocabulary 28 | concept_relationship,invalid_reason,L,cnrl,vocabulary 29 | relationship,relationship_id,H,rlsh,vocabulary 30 | relationship,relationship_name,H,rlsh,vocabulary 31 | relationship,is_hierarchical,H,rlsh,vocabulary 32 | relationship,defines_ancestry,H,rlsh,vocabulary 33 | relationship,reverse_relationship_id,H,rlsh,vocabulary 34 | relationship,relationship_concept_id,H,rlsh,vocabulary 35 | concept_synonym,concept_id,H,cnsy,vocabulary 36 | concept_synonym,concept_synonym_name,H,cnsy,vocabulary 37 | concept_synonym,language_concept_id,H,cnsy,vocabulary 38 | concept_ancestor,ancestor_concept_id,H,cnan,vocabulary 39 | concept_ancestor,descendant_concept_id,H,cnan,vocabulary 40 | concept_ancestor,min_levels_of_separation,H,cnan,vocabulary 41 | concept_ancestor,max_levels_of_separation,H,cnan,vocabulary 42 | source_to_concept_map,source_code,Yes,stcm,vocabulary 43 | source_to_concept_map,source_concept_id,Yes,stcm,vocabulary 44 | source_to_concept_map,source_vocabulary_id,No,stcm,vocabulary 45 | source_to_concept_map,source_code_description,Yes,stcm,vocabulary 46 | source_to_concept_map,target_concept_id,Yes,stcm,vocabulary 47 | source_to_concept_map,target_vocabulary_id,Yes,stcm,vocabulary 48 | source_to_concept_map,valid_start_date,Yes,stcm,vocabulary 49 | source_to_concept_map,valid_end_date,Yes,stcm,vocabulary 50 | source_to_concept_map,invalid_reason,No,stcm,vocabulary 51 | drug_strength,drug_concept_id,Yes,drgs,vocabulary 52 | drug_strength,ingredient_concept_id,Yes,drgs,vocabulary 53 | drug_strength,amount_value,No,drgs,vocabulary 54 | drug_strength,amount_unit_concept_id,No,drgs,vocabulary 55 | drug_strength,numerator_value,No,drgs,vocabulary 56 | drug_strength,numerator_unit_concept_id,No,drgs,vocabulary 57 | drug_strength,denominator_unit_concept_id,No,drgs,vocabulary 58 | drug_strength,valid_start_date,Yes,drgs,vocabulary 59 | drug_strength,valid_end_date,Yes,drgs,vocabulary 60 | drug_strength,invalid_reason,No,drgs,vocabulary 61 | cohort_definition,cohort_definition_id,Yes,cohd,vocabulary 62 | cohort_definition,cohort_definition_name,Yes,cohd,vocabulary 63 | cohort_definition,cohort_definition_description,No,cohd,vocabulary 64 | cohort_definition,definition_type_concept_id,Yes,cohd,vocabulary 65 | cohort_definition,cohort_definition_syntax,No,cohd,vocabulary 66 | cohort_definition,subject_concept_id,Yes,cohd,vocabulary 67 | cohort_definition,cohort_instantiation_date,No,cohd,vocabulary 68 | attribute_definition,attribute_definition_id,Yes,atrd,vocabulary 69 | attribute_definition,attribute_name,Yes,atrd,vocabulary 70 | attribute_definition,attribute_description,No,atrd,vocabulary 71 | attribute_definition,attribute_type_concept_id,Yes,atrd,vocabulary 72 | attribute_definition,attribute_syntax,No,atrd,vocabulary 73 | cdm_source,cdm_source_name,Yes,cdms,meta_data 74 | cdm_source,cdm_source_abbreviation,No,cdms,meta_data 75 | cdm_source,cdm_holder,No,cdms,meta_data 76 | cdm_source,source_description,No,cdms,meta_data 77 | cdm_source,source_documentation_reference,No,cdms,meta_data 78 | cdm_source,cdm_etl _reference,No,cdms,meta_data 79 | cdm_source,source_release_date,No,cdms,meta_data 80 | cdm_source,cdm_release_date,No,cdms,meta_data 81 | cdm_source,cdm_version,No,cdms,meta_data 82 | cdm_source,vocabulary_version,No,cdms,meta_data 83 | person,person_id,X,psrn,clinical 84 | person,gender_concept_id,Yes,psrn,clinical 85 | person,year_of_birth,Yes,psrn,clinical 86 | person,month_of_birth,No,psrn,clinical 87 | person,day_of_birth,No,psrn,clinical 88 | person,time_of_birth,No,psrn,clinical 89 | person,race_concept_id,Yes,psrn,clinical 90 | person,ethnicity_concept_id,Yes,psrn,clinical 91 | person,location_id,No,psrn,clinical 92 | person,provider_id,No,psrn,clinical 93 | person,care_site_id,No,psrn,clinical 94 | person,person_source_value,No,psrn,clinical 95 | person,gender_source_value,No,psrn,clinical 96 | person,gender_source_concept_id,No,psrn,clinical 97 | person,race_source_value,No,psrn,clinical 98 | person,race_source_concept_id,No,psrn,clinical 99 | person,ethnicity_source_value,No,psrn,clinical 100 | person,ethnicity_source_concept_id,No,psrn,clinical 101 | observation_period,observation_period_id,Yes,obsp,clinical 102 | observation_period,person_id,Yes,obsp,clinical 103 | observation_period,observation_period_start_date,Yes,obsp,clinical 104 | observation_period,observation_period_end_date,Yes,obsp,clinical 105 | observation_period,period_type_concept_id,Yes,obsp,clinical 106 | specimen,specimen_id,Yes,spec,clinical 107 | specimen,person_id,Yes,spec,clinical 108 | specimen,specimen_concept_id,Yes,spec,clinical 109 | specimen,specimen_type_concept_id,Yes,spec,clinical 110 | specimen,specimen_date,Yes,spec,clinical 111 | specimen,specimen_time,No,spec,clinical 112 | specimen,quantity,No,spec,clinical 113 | specimen,unit_concept_id,No,spec,clinical 114 | specimen,anatomic_site_concept_id,No,spec,clinical 115 | specimen,disease_status_concept_id,No,spec,clinical 116 | specimen,specimen_source_id,No,spec,clinical 117 | specimen,specimen_source_value,No,spec,clinical 118 | specimen,unit_source_value,No,spec,clinical 119 | specimen,anatomic_site_source_value,No,spec,clinical 120 | specimen,disease_status_source_value,No,spec,clinical 121 | death,person_id,Yes,deth,clinical 122 | death,death_date,Yes,deth,clinical 123 | death,death_type_concept_id,Yes,deth,clinical 124 | death,cause_concept_id,No,deth,clinical 125 | death,cause_source_value,No,deth,clinical 126 | death,cause_source_concept_id,No,deth,clinical 127 | visit_occurrence,visit_occurrence_id,Yes,vsto,clinical 128 | visit_occurrence,person_id,Yes,vsto,clinical 129 | visit_occurrence,visit_concept_id,Yes,vsto,clinical 130 | visit_occurrence,visit_start_date,Yes,vsto,clinical 131 | visit_occurrence,visit_start_time,No,vsto,clinical 132 | visit_occurrence,visit_end_date,Yes,vsto,clinical 133 | visit_occurrence,visit_end_time,No,vsto,clinical 134 | visit_occurrence,visit_type_concept_id,Yes,vsto,clinical 135 | visit_occurrence,provider_id,No,vsto,clinical 136 | visit_occurrence,care_site_id,No,vsto,clinical 137 | visit_occurrence,visit_source_value,No,vsto,clinical 138 | visit_occurrence,visit_source_concept_id,No,vsto,clinical 139 | procedure_occurrence,procedure_occurrence_id,Yes,prco,clinical 140 | procedure_occurrence,person_id,Yes,prco,clinical 141 | procedure_occurrence,procedure_concept_id,Yes,prco,clinical 142 | procedure_occurrence,procedure_date,Yes,prco,clinical 143 | procedure_occurrence,procedure_type_concept_id,Yes,prco,clinical 144 | procedure_occurrence,modifier_concept_id,No,prco,clinical 145 | procedure_occurrence,quantity,No,prco,clinical 146 | procedure_occurrence,provider_id,No,prco,clinical 147 | procedure_occurrence,visit_occurrence_id,No,prco,clinical 148 | procedure_occurrence,procedure_source_value,No,prco,clinical 149 | procedure_occurrence,procedure_source_concept_id,No,prco,clinical 150 | procedure_occurrence,qualifier_source_value,No,prco,clinical 151 | drug_exposure,drug_exposure_id,Yes,drge,clinical 152 | drug_exposure,person_id,Yes,drge,clinical 153 | drug_exposure,drug_concept_id,Yes,drge,clinical 154 | drug_exposure,drug_exposure_start_date,Yes,drge,clinical 155 | drug_exposure,drug_exposure_end_date,No,drge,clinical 156 | drug_exposure,drug_type_concept_id,Yes,drge,clinical 157 | drug_exposure,stop_reason,No,drge,clinical 158 | drug_exposure,refills,No,drge,clinical 159 | drug_exposure,quantity,No,drge,clinical 160 | drug_exposure,days_supply,No,drge,clinical 161 | drug_exposure,sig,No,drge,clinical 162 | drug_exposure,route_concept_id,No,drge,clinical 163 | drug_exposure,effective_drug_dose,No,drge,clinical 164 | drug_exposure,dose_unit_concept_ id,No,drge,clinical 165 | drug_exposure,lot_number,No,drge,clinical 166 | drug_exposure,provider_id,No,drge,clinical 167 | drug_exposure,visit_occurrence_id,No,drge,clinical 168 | drug_exposure,drug_source_value,No,drge,clinical 169 | drug_exposure,drug_source_concept_id,No,drge,clinical 170 | drug_exposure,route_source_value,No,drge,clinical 171 | drug_exposure,dose_unit_source_value,No,drge,clinical 172 | device_exposure,device_exposure_id,Yes,dvce,clinical 173 | device_exposure,person_id,Yes,dvce,clinical 174 | device_exposure,device_concept_id,Yes,dvce,clinical 175 | device_exposure,device_exposure_start_date,Yes,dvce,clinical 176 | device_exposure,device_exposure_end_date,No,dvce,clinical 177 | device_exposure,device_type_concept_id,Yes,dvce,clinical 178 | device_exposure,unique_device_id,No,dvce,clinical 179 | device_exposure,quantity,No,dvce,clinical 180 | device_exposure,provider_id,No,dvce,clinical 181 | device_exposure,visit_occurrence_id,No,dvce,clinical 182 | device_exposure,device_source_value,No,dvce,clinical 183 | device_exposure,device_source_ concept_id,No,dvce,clinical 184 | condition_occurrence,condition_occurrence_id,Yes,cndo,clinical 185 | condition_occurrence,person_id,Yes,cndo,clinical 186 | condition_occurrence,condition_concept_id,Yes,cndo,clinical 187 | condition_occurrence,condition_start_date,Yes,cndo,clinical 188 | condition_occurrence,condition_end_date,No,cndo,clinical 189 | condition_occurrence,condition_type_concept_id,Yes,cndo,clinical 190 | condition_occurrence,stop_reason,No,cndo,clinical 191 | condition_occurrence,provider_id,No,cndo,clinical 192 | condition_occurrence,visit_occurrence_id,No,cndo,clinical 193 | condition_occurrence,condition_source_value,No,cndo,clinical 194 | condition_occurrence,condition_source_concept_id,No,cndo,clinical 195 | measurement,measurement_id,Yes,msrm,clinical 196 | measurement,person_id,Yes,msrm,clinical 197 | measurement,measurement_concept_id,Yes,msrm,clinical 198 | measurement,measurement_date,Yes,msrm,clinical 199 | measurement,measurement_time,No,msrm,clinical 200 | measurement,measurement_type_concept_id,Yes,msrm,clinical 201 | measurement,operator_concept_id,No,msrm,clinical 202 | measurement,value_as_number,No,msrm,clinical 203 | measurement,value_as_concept_id,No,msrm,clinical 204 | measurement,unit_concept_id,No,msrm,clinical 205 | measurement,range_low,No,msrm,clinical 206 | measurement,range_high,No,msrm,clinical 207 | measurement,provider_id,No,msrm,clinical 208 | measurement,visit_occurrence_id,No,msrm,clinical 209 | measurement,measurement_source_value,No,msrm,clinical 210 | measurement,measurement_source_concept_id,No,msrm,clinical 211 | measurement,unit_source_value,No,msrm,clinical 212 | measurement,value_source_value,No,msrm,clinical 213 | note,note_id,Yes,note,clinical 214 | note,person_id,Yes,note,clinical 215 | note,note_date,Yes,note,clinical 216 | note,note_time,No,note,clinical 217 | note,note_type_concept_id,Yes,note,clinical 218 | note,note_text,Yes,note,clinical 219 | note,provider_id,No,note,clinical 220 | note,note_source_value,No,note,clinical 221 | note,visit_occurrence_id,No,note,clinical 222 | observation,observation_id,Yes,obsr,clinical 223 | observation,person_id,Yes,obsr,clinical 224 | observation,observation_concept_id,Yes,obsr,clinical 225 | observation,observation_date,Yes,obsr,clinical 226 | observation,observation_time,No,obsr,clinical 227 | observation,observation_type_concept_id,Yes,obsr,clinical 228 | observation,value_as_number,No,obsr,clinical 229 | observation,value_as_string,No,obsr,clinical 230 | observation,value_as_concept_id,No,obsr,clinical 231 | observation,qualifier_concept_id,No,obsr,clinical 232 | observation,unit_concept_id,No,obsr,clinical 233 | observation,provider_id,No,obsr,clinical 234 | observation,visit_occurrence_id,No,obsr,clinical 235 | observation,observation_source_value,No,obsr,clinical 236 | observation,observation_source_concept_id,No,obsr,clinical 237 | observation,unit_source_value,No,obsr,clinical 238 | observation,qualifier_source_value,No,obsr,clinical 239 | fact_relationship,domain_concept _id_1,Yes,fctr,clinical 240 | fact_relationship,fact_id_1,Yes,fctr,clinical 241 | fact_relationship,domain_concept_id_2,Yes,fctr,clinical 242 | fact_relationship,fact_id_2,Yes,fctr,clinical 243 | fact_relationship,relationship_concept_id,Yes,fctr,clinical 244 | location,location_id,Yes,loct,health_system 245 | location,address_1,No,loct,health_system 246 | location,address_2,No,loct,health_system 247 | location,city,No,loct,health_system 248 | location,state,No,loct,health_system 249 | location,zip,No,loct,health_system 250 | location,county,No,loct,health_system 251 | location,location_source_value,No,loct,health_system 252 | care_site,care_site_id,Yes,cars,health_system 253 | care_site,care_site_name,No,cars,health_system 254 | care_site,place_of_service_concept_id,No,cars,health_system 255 | care_site,location_id,No,cars,health_system 256 | care_site,care_site_source_value,No,cars,health_system 257 | care_site,place_of_service_source_value,No,cars,health_system 258 | provider,provider_id,Yes,prvd,health_system 259 | provider,provider_name,No,prvd,health_system 260 | provider,npi,No,prvd,health_system 261 | provider,dea,No,prvd,health_system 262 | provider,specialty_concept_id,No,prvd,health_system 263 | provider,care_site_id,No,prvd,health_system 264 | provider,year_of_birth,No,prvd,health_system 265 | provider,gender_concept_id,No,prvd,health_system 266 | provider,provider_source_value,No,prvd,health_system 267 | provider,specialty_source_value,No,prvd,health_system 268 | provider,specialty_source_concept_id,No,prvd,health_system 269 | provider,gender_source_value,No,prvd,health_system 270 | provider,gender_source_concept_id,No,prvd,health_system 271 | payer_plan_period,payer_plan_period_id,Yes,pppr,health_econ 272 | payer_plan_period,person_id,Yes,pppr,health_econ 273 | payer_plan_period,payer_plan_period_start_date,Yes,pppr,health_econ 274 | payer_plan_period,payer_plan_period_end_date,Yes,pppr,health_econ 275 | payer_plan_period,payer_source_value,No,pppr,health_econ 276 | payer_plan_period,plan_source_value,No,pppr,health_econ 277 | payer_plan_period,family_source_value,No,pppr,health_econ 278 | visit_cost,visit_cost_id,Yes,vstc,health_econ 279 | visit_cost,visit_occurrence_id,Yes,vstc,health_econ 280 | visit_cost,currency_concept_id,No,vstc,health_econ 281 | visit_cost,paid_copay,No,vstc,health_econ 282 | visit_cost,paid_coinsurance,No,vstc,health_econ 283 | visit_cost,paid_toward_ deductible,No,vstc,health_econ 284 | visit_cost,paid_by_payer,No,vstc,health_econ 285 | visit_cost,paid_by_coordination_benefits,No,vstc,health_econ 286 | visit_cost,total_out_of_pocket,No,vstc,health_econ 287 | visit_cost,total_paid,No,vstc,health_econ 288 | visit_cost,payer_plan_period_id,No,vstc,health_econ 289 | procedure_cost,procedure_cost_id,Yes,prcc,health_econ 290 | procedure_cost,procedure_occurrence_id,Yes,prcc,health_econ 291 | procedure_cost,currency_concept_id,No,prcc,health_econ 292 | procedure_cost,paid_copay,No,prcc,health_econ 293 | procedure_cost,paid_coinsurance,No,prcc,health_econ 294 | procedure_cost,paid_toward_deductible,No,prcc,health_econ 295 | procedure_cost,paid_by_payer,No,prcc,health_econ 296 | procedure_cost,paid_by_coordination_benefits,No,prcc,health_econ 297 | procedure_cost,total_out_of_pocket,No,prcc,health_econ 298 | procedure_cost,total_paid,No,prcc,health_econ 299 | procedure_cost,revenue_code_concept_id,No,prcc,health_econ 300 | procedure_cost,payer_plan_period_id,No,prcc,health_econ 301 | procedure_cost,revenue_code_source_value,No,prcc,health_econ 302 | drug_cost,drug_cost_id,Yes,drgc,health_econ 303 | drug_cost,drug_exposure_id,Yes,drgc,health_econ 304 | drug_cost,currency_concept_id,No,drgc,health_econ 305 | drug_cost,paid_copay,No,drgc,health_econ 306 | drug_cost,paid_coinsurance,No,drgc,health_econ 307 | drug_cost,paid_toward_deductible,No,drgc,health_econ 308 | drug_cost,paid_by_payer,No,drgc,health_econ 309 | drug_cost,paid_by_coordination_benefits,No,drgc,health_econ 310 | drug_cost,total_out_of_pocket,No,drgc,health_econ 311 | drug_cost,total_paid,No,drgc,health_econ 312 | drug_cost,ingredient_cost,No,drgc,health_econ 313 | drug_cost,dispensing_fee,No,drgc,health_econ 314 | drug_cost,average_wholesale_price,No,drgc,health_econ 315 | drug_cost,payer_plan_period_id,No,drgc,health_econ 316 | device_cost,device_cost_id,Yes,devc,health_econ 317 | device_cost,device_exposure_ id,Yes,devc,health_econ 318 | device_cost,currency_concept_id,No,devc,health_econ 319 | device_cost,paid_copay,No,devc,health_econ 320 | device_cost,paid_coinsurance,No,devc,health_econ 321 | device_cost,paid_toward_ deductible,No,devc,health_econ 322 | device_cost,paid_by_payer,No,devc,health_econ 323 | device_cost,paid_by_coordination_benefits,No,devc,health_econ 324 | device_cost,total_out_of_pocket,No,devc,health_econ 325 | device_cost,total_paid,No,devc,health_econ 326 | device_cost,payer_plan_period_id,No,devc,health_econ 327 | cohort,cohort_definition_id,Yes,chrt,derived_element 328 | cohort,subject_id,Yes,chrt,derived_element 329 | cohort,cohort_start_date,Yes,chrt,derived_element 330 | cohort,cohort_end_date,Yes,chrt,derived_element 331 | cohort_attribute,cohort_definition_id,Yes,chra,derived_element 332 | cohort_attribute,subject_id,Yes,chra,derived_element 333 | cohort_attribute,cohort_start_date,Yes,chra,derived_element 334 | cohort_attribute,cohort_end_date,Yes,chra,derived_element 335 | cohort_attribute,attribute_definition_id,Yes,chra,derived_element 336 | cohort_attribute,value_as_number,No,chra,derived_element 337 | cohort_attribute,value_as_concept_id,No,chra,derived_element 338 | drug_era,drug_era_id,Yes,drge,derived_element 339 | drug_era,person_id,Yes,drge,derived_element 340 | drug_era,drug_concept_id,Yes,drge,derived_element 341 | drug_era,drug_era_start_date,Yes,drge,derived_element 342 | drug_era,drug_era_end_date,Yes,drge,derived_element 343 | drug_era,drug_exposure_count,No,drge,derived_element 344 | drug_era,gap_days,No,drge,derived_element 345 | dose_era,dose_era_id,Yes,dose,derived_element 346 | dose_era,person_id,Yes,dose,derived_element 347 | dose_era,drug_concept_id,Yes,dose,derived_element 348 | dose_era,unit_concept_id,Yes,dose,derived_element 349 | dose_era,dose_value,Yes,dose,derived_element 350 | dose_era,dose_era_start_date,Yes,dose,derived_element 351 | dose_era,dose_era_end_date,Yes,dose,derived_element 352 | condition_era,condition_era_id,Yes,cnde,derived_element 353 | condition_era,person_id,Yes,cnde,derived_element 354 | condition_era,condition_concept_id,Yes,cnde,derived_element 355 | condition_era,condition_era_start_date,Yes,cnde,derived_element 356 | condition_era,condition_era_end_date,Yes,cnde,derived_element 357 | condition_era,condition_occurrence_count,No,cnde,derived_element -------------------------------------------------------------------------------- /DQTBL_pcornet_v3.csv: -------------------------------------------------------------------------------- 1 | TabNam,ColNam,DQLVL,abbr 2 | demographic,patid,X,demog 3 | demographic,birth_date,H,demog 4 | demographic,birth_time,L,demog 5 | demographic,sex,H,demog 6 | demographic,hispanic,H,demog 7 | demographic,race,H,demog 8 | demographic,biobank_flag,L,demog 9 | demographic,raw_sex,L,demog 10 | demographic,raw_hispanic,L,demog 11 | demographic,raw_race,L,demog 12 | enrollment,patid,H,enrlmnt 13 | enrollment,enr_start_date,H,enrlmnt 14 | enrollment,enr_end_date,L,enrlmnt 15 | enrollment,chart,L,enrlmnt 16 | enrollment,enr_basis,H,enrlmnt 17 | encounter,encounterid,H,encntr 18 | encounter,patid,H,encntr 19 | encounter,admit_date,H,encntr 20 | encounter,admit_time,L,encntr 21 | encounter,discharge_date,L,encntr 22 | encounter,discharge_time,L,encntr 23 | encounter,providerid,L,encntr 24 | encounter,facility_location,L,encntr 25 | encounter,enc_type,H,encntr 26 | encounter,facilityid,L,encntr 27 | encounter,discharge_disposition,L,encntr 28 | encounter,discharge_status,L,encntr 29 | encounter,drg,L,encntr 30 | encounter,drg_type,L,encntr 31 | encounter,admitting_source,L,encntr 32 | encounter,raw_siteid,L,encntr 33 | encounter,raw_enc_type,L,encntr 34 | encounter,raw_discharge_disposition,L,encntr 35 | encounter,raw_discharge_status,L,encntr 36 | encounter,raw_drg_type,L,encntr 37 | encounter,raw_admitting_source,L,encntr 38 | diagnosis,diagnosisid,H,dx 39 | diagnosis,patid,H,dx 40 | diagnosis,encounterid,H,dx 41 | diagnosis,enc_type,L,dx 42 | diagnosis,admit_date,L,dx 43 | diagnosis,providerid,L,dx 44 | diagnosis,dx,H,dx 45 | diagnosis,dx_type,H,dx 46 | diagnosis,dx_source,H,dx 47 | diagnosis,pdx,L,dx 48 | diagnosis,raw_dx,L,dx 49 | diagnosis,raw_dx_type,L,dx 50 | diagnosis,raw_dx_source,L,dx 51 | diagnosis,raw_pdx,L,dx 52 | procedures,proceduresid,H,px 53 | procedures,patid,H,px 54 | procedures,encounterid,H,px 55 | procedures,enc_type,L,px 56 | procedures,admit_date,L,px 57 | procedures,providerid,L,px 58 | procedures,px_date,L,px 59 | procedures,px,H,px 60 | procedures,px_type,H,px 61 | procedures,px_source,L,px 62 | procedures,raw_px,L,px 63 | procedures,raw_px_type,L,px 64 | vital,vitalid,H,vital 65 | vital,patid,H,vital 66 | vital,encounterid,L,vital 67 | vital,measure_date,H,vital 68 | vital,measure_time,L,vital 69 | vital,vital_source,H,vital 70 | vital,ht,H,vital 71 | vital,wt,H,vital 72 | vital,diastolic,L,vital 73 | vital,systolic,L,vital 74 | vital,original_bmi,L,vital 75 | vital,bp_position,L,vital 76 | vital,smoking,L,vital 77 | vital,tobacco,L,vital 78 | vital,tobacco_type,L,vital 79 | vital,raw_diastolic,L,vital 80 | vital,raw_systolic,L,vital 81 | vital,raw_bp_position,L,vital 82 | vital,raw_smoking,L,vital 83 | vital,raw_tobacco,L,vital 84 | vital,raw_tobacco_type,L,vital 85 | dispensing,dispensingid,H,disp 86 | dispensing,patid,H,disp 87 | dispensing,prescribingid,L,disp 88 | dispensing,dispense_date,H,disp 89 | dispensing,ndc,H,disp 90 | dispensing,dispense_sup,L,disp 91 | dispensing,dispense_amt,L,disp 92 | dispensing,raw_ndc,L,disp 93 | lab_result_cm,lab_result_cm_id,H,labs 94 | lab_result_cm,patid,H,labs 95 | lab_result_cm,encounterid,L,labs 96 | lab_result_cm,lab_name,L,labs 97 | lab_result_cm,specimen_source,L,labs 98 | lab_result_cm,lab_loinc,L,labs 99 | lab_result_cm,priority,L,labs 100 | lab_result_cm,result_loc,L,labs 101 | lab_result_cm,lab_px,L,labs 102 | lab_result_cm,lab_px_type,L,labs 103 | lab_result_cm,lab_order_date,L,labs 104 | lab_result_cm,specimen_date,L,labs 105 | lab_result_cm,specimen_time,L,labs 106 | lab_result_cm,result_date,H,labs 107 | lab_result_cm,result_time,L,labs 108 | lab_result_cm,result_qual,L,labs 109 | lab_result_cm,result_num,L,labs 110 | lab_result_cm,result_modifier,L,labs 111 | lab_result_cm,result_unit,L,labs 112 | lab_result_cm,norm_range_low,L,labs 113 | lab_result_cm,norm_modifier_low,L,labs 114 | lab_result_cm,norm_range_high,L,labs 115 | lab_result_cm,norm_modifier_high,L,labs 116 | lab_result_cm,abn_ind,L,labs 117 | lab_result_cm,raw_lab_name,L,labs 118 | lab_result_cm,raw_lab_code,L,labs 119 | lab_result_cm,raw_panel,L,labs 120 | lab_result_cm,raw_result,L,labs 121 | lab_result_cm,raw_unit,L,labs 122 | lab_result_cm,raw_order_dept,L,labs 123 | lab_result_cm,raw_facility_code,L,labs 124 | condition,conditionid,H,cndtn 125 | condition,patid,H,cndtn 126 | condition,encounterid,L,cndtn 127 | condition,report_date,L,cndtn 128 | condition,resolve_date,L,cndtn 129 | condition,onset_date,L,cndtn 130 | condition,condition_status,L,cndtn 131 | condition,condition,H,cndtn 132 | condition,condition_type,H,cndtn 133 | condition,condition_source,H,cndtn 134 | condition,raw_condition_status,L,cndtn 135 | condition,raw_condition,L,cndtn 136 | condition,raw_condition_type,L,cndtn 137 | condition,raw_condition_source,L,cndtn 138 | pro_cm,pro_cm_id,H,procm 139 | pro_cm,patid,H,procm 140 | pro_cm,encounterid,L,procm 141 | pro_cm,pro_item,H,procm 142 | pro_cm,pro_loinc,L,procm 143 | pro_cm,pro_date,H,procm 144 | pro_cm,pro_time,L,procm 145 | pro_cm,pro_response,H,procm 146 | pro_cm,pro_method,L,procm 147 | pro_cm,pro_mode,L,procm 148 | pro_cm,pro_cat,L,procm 149 | pro_cm,raw_pro_code,L,procm 150 | pro_cm,raw_pro_response,L,procm 151 | prescribing,prescribingid,H,rx 152 | prescribing,patid,H,rx 153 | prescribing,encounterid,L,rx 154 | prescribing,rx_providerid,L,rx 155 | prescribing,rx_order_date,L,rx 156 | prescribing,rx_order_time,L,rx 157 | prescribing,rx_start_date,L,rx 158 | prescribing,rx_end_date,L,rx 159 | prescribing,rx_quantity,L,rx 160 | prescribing,rx_refills,L,rx 161 | prescribing,rx_days_supply,L,rx 162 | prescribing,rx_frequency,L,rx 163 | prescribing,rx_basis,L,rx 164 | prescribing,rxnorm_cui,L,rx 165 | prescribing,raw_rx_med_name,L,rx 166 | prescribing,raw_rx_frequency,L,rx 167 | prescribing,raw_rxnorm_cui,L,rx 168 | pcornet_trial,patid,H,pcornet 169 | pcornet_trial,trialid,H,pcornet 170 | pcornet_trial,participantid,H,pcornet 171 | pcornet_trial,trial_siteid,L,pcornet 172 | pcornet_trial,trial_enroll_date,L,pcornet 173 | pcornet_trial,trial_end_date,L,pcornet 174 | pcornet_trial,trial_withdraw_date,L,pcornet 175 | pcornet_trial,trial_invite_code,L,pcornet 176 | death,patid,H,dth 177 | death,death_date,H,dth 178 | death,death_date_impute,L,dth 179 | death,death_source,H,dth 180 | death,death_match_confidence,L,dth 181 | death_condition,patid,H,dthc 182 | death_condition,death_cause,H,dthc 183 | death_condition,death_cause_code,H,dthc 184 | death_condition,death_cause_type,H,dthc 185 | death_condition,death_cause_source,H,dthc 186 | death_condition,death_cause_confidence,L,dthc 187 | harvest,networkid,H,harv 188 | harvest,network_name,L,harv 189 | harvest,datamartid,H,harv 190 | harvest,datamart_name,L,harv 191 | harvest,datamart_platform,L,harv 192 | harvest,cdm_version,L,harv 193 | harvest,datamart_claims,L,harv 194 | harvest,datamart_ehr,L,harv 195 | harvest,birth_date_mgmt,L,harv 196 | harvest,enr_start_date_mgmt,L,harv 197 | harvest,enr_end_date_mgmt,L,harv 198 | harvest,admit_date_mgmt,L,harv 199 | harvest,discharge_date_mgmt,L,harv 200 | harvest,px_date_mgmt,L,harv 201 | harvest,rx_order_date_mgmt,L,harv 202 | harvest,rx_start_date_mgmt,L,harv 203 | harvest,rx_end_date_mgmt,L,harv 204 | harvest,dispense_date_mgmt,L,harv 205 | harvest,lab_order_date_mgmt,L,harv 206 | harvest,specimen_date_mgmt,L,harv 207 | harvest,result_date_mgmt,L,harv 208 | harvest,measure_date_mgmt,L,harv 209 | harvest,onset_date_mgmt,L,harv 210 | harvest,report_date_mgmt,L,harv 211 | harvest,resolve_date_mgmt,L,harv 212 | harvest,pro_date_mgmt,L,harv 213 | harvest,refresh_demographic_date,L,harv 214 | harvest,refresh_enrollment_date,L,harv 215 | harvest,refresh_encounter_date,L,harv 216 | harvest,refresh_diagnosis_date,L,harv 217 | harvest,refresh_procedures_date,L,harv 218 | harvest,refresh_vital_date,L,harv 219 | harvest,refresh_dispensing_date,L,harv 220 | harvest,refresh_lab_result_cm_date,L,harv 221 | harvest,refresh_condition_date,L,harv 222 | harvest,refresh_pro_cm_date,L,harv 223 | harvest,refresh_prescribing_date,L,harv 224 | harvest,refresh_pcornet_trial_date,L,harv 225 | harvest,refresh_death_date,L,harv 226 | harvest,refresh_death_cause_date,L,harv -------------------------------------------------------------------------------- /DQTBL_pcornet_v31.csv: -------------------------------------------------------------------------------- 1 | TabNam,ColNam,DQLVL,abbr 2 | demographic,patid,X,demog 3 | demographic,birth_date,H,demog 4 | demographic,birth_time,L,demog 5 | demographic,sex,H,demog 6 | demographic,hispanic,H,demog 7 | demographic,race,H,demog 8 | demographic,biobank_flag,L,demog 9 | demographic,sexual_orientation,L,demog 10 | demographic,gender_identity,L,demog 11 | enrollment,patid,H,enrlmnt 12 | enrollment,enr_start_date,H,enrlmnt 13 | enrollment,enr_end_date,L,enrlmnt 14 | enrollment,chart,L,enrlmnt 15 | enrollment,enr_basis,H,enrlmnt 16 | encounter,encounterid,H,encntr 17 | encounter,patid,H,encntr 18 | encounter,admit_date,H,encntr 19 | encounter,admit_time,L,encntr 20 | encounter,discharge_date,L,encntr 21 | encounter,discharge_time,L,encntr 22 | encounter,providerid,L,encntr 23 | encounter,facility_location,L,encntr 24 | encounter,enc_type,H,encntr 25 | encounter,facilityid,L,encntr 26 | encounter,discharge_disposition,L,encntr 27 | encounter,discharge_status,L,encntr 28 | encounter,drg,L,encntr 29 | encounter,drg_type,L,encntr 30 | encounter,admitting_source,L,encntr 31 | diagnosis,diagnosisid,H,dx 32 | diagnosis,patid,H,dx 33 | diagnosis,encounterid,H,dx 34 | diagnosis,enc_type,L,dx 35 | diagnosis,admit_date,L,dx 36 | diagnosis,providerid,L,dx 37 | diagnosis,dx,H,dx 38 | diagnosis,dx_type,H,dx 39 | diagnosis,dx_source,H,dx 40 | diagnosis,pdx,L,dx 41 | diagnosis,dx_origin,L,dx 42 | procedures,proceduresid,H,px 43 | procedures,patid,H,px 44 | procedures,encounterid,H,px 45 | procedures,enc_type,L,px 46 | procedures,admit_date,L,px 47 | procedures,providerid,L,px 48 | procedures,px_date,L,px 49 | procedures,px,H,px 50 | procedures,px_type,H,px 51 | procedures,px_source,L,px 52 | vital,vitalid,H,vital 53 | vital,patid,H,vital 54 | vital,encounterid,L,vital 55 | vital,measure_date,H,vital 56 | vital,measure_time,L,vital 57 | vital,vital_source,H,vital 58 | vital,ht,H,vital 59 | vital,wt,H,vital 60 | vital,diastolic,L,vital 61 | vital,systolic,L,vital 62 | vital,original_bmi,L,vital 63 | vital,bp_position,L,vital 64 | vital,smoking,L,vital 65 | vital,tobacco,L,vital 66 | vital,tobacco_type,L,vital 67 | dispensing,dispensingid,H,disp 68 | dispensing,patid,H,disp 69 | dispensing,prescribingid,L,disp 70 | dispensing,dispense_date,H,disp 71 | dispensing,ndc,H,disp 72 | dispensing,dispense_sup,L,disp 73 | dispensing,dispense_amt,L,disp 74 | lab_result_cm,lab_result_cm_id,H,labs 75 | lab_result_cm,patid,H,labs 76 | lab_result_cm,encounterid,L,labs 77 | lab_result_cm,lab_name,L,labs 78 | lab_result_cm,specimen_source,L,labs 79 | lab_result_cm,lab_loinc,L,labs 80 | lab_result_cm,priority,L,labs 81 | lab_result_cm,result_loc,L,labs 82 | lab_result_cm,lab_px,L,labs 83 | lab_result_cm,lab_px_type,L,labs 84 | lab_result_cm,lab_order_date,L,labs 85 | lab_result_cm,specimen_date,L,labs 86 | lab_result_cm,specimen_time,L,labs 87 | lab_result_cm,result_date,H,labs 88 | lab_result_cm,result_time,L,labs 89 | lab_result_cm,result_qual,L,labs 90 | lab_result_cm,result_num,L,labs 91 | lab_result_cm,result_modifier,L,labs 92 | lab_result_cm,result_unit,L,labs 93 | lab_result_cm,norm_range_low,L,labs 94 | lab_result_cm,norm_modifier_low,L,labs 95 | lab_result_cm,norm_range_high,L,labs 96 | lab_result_cm,norm_modifier_high,L,labs 97 | lab_result_cm,abn_ind,L,labs 98 | condition,conditionid,H,cndtn 99 | condition,patid,H,cndtn 100 | condition,encounterid,L,cndtn 101 | condition,report_date,L,cndtn 102 | condition,resolve_date,L,cndtn 103 | condition,onset_date,L,cndtn 104 | condition,condition_status,L,cndtn 105 | condition,condition,H,cndtn 106 | condition,condition_type,H,cndtn 107 | condition,condition_source,H,cndtn 108 | pro_cm,pro_cm_id,H,procm 109 | pro_cm,patid,H,procm 110 | pro_cm,encounterid,L,procm 111 | pro_cm,pro_item,H,procm 112 | pro_cm,pro_loinc,L,procm 113 | pro_cm,pro_date,H,procm 114 | pro_cm,pro_time,L,procm 115 | pro_cm,pro_response,H,procm 116 | pro_cm,pro_method,L,procm 117 | pro_cm,pro_mode,L,procm 118 | pro_cm,pro_cat,L,procm 119 | prescribing,prescribingid,H,rx 120 | prescribing,patid,H,rx 121 | prescribing,encounterid,L,rx 122 | prescribing,rx_providerid,L,rx 123 | prescribing,rx_order_date,L,rx 124 | prescribing,rx_order_time,L,rx 125 | prescribing,rx_start_date,L,rx 126 | prescribing,rx_end_date,L,rx 127 | prescribing,rx_quantity,L,rx 128 | prescribing,rx_refills,L,rx 129 | prescribing,rx_days_supply,L,rx 130 | prescribing,rx_frequency,L,rx 131 | prescribing,rx_quantity_unit,L,rx 132 | prescribing,rx_basis,L,rx 133 | prescribing,rxnorm_cui,L,rx 134 | pcornet_trial,patid,H,pcornet 135 | pcornet_trial,trialid,H,pcornet 136 | pcornet_trial,participantid,H,pcornet 137 | pcornet_trial,trial_siteid,L,pcornet 138 | pcornet_trial,trial_enroll_date,L,pcornet 139 | pcornet_trial,trial_end_date,L,pcornet 140 | pcornet_trial,trial_withdraw_date,L,pcornet 141 | pcornet_trial,trial_invite_code,L,pcornet 142 | death,patid,H,dth 143 | death,death_date,L,dth 144 | death,death_date_impute,L,dth 145 | death,death_source,H,dth 146 | death,death_match_confidence,L,dth 147 | death_cause,patid,H,dthc 148 | death_cause,death_cause,H,dthc 149 | death_cause,death_cause_code,H,dthc 150 | death_cause,death_cause_type,H,dthc 151 | death_cause,death_cause_source,H,dthc 152 | death_cause,death_cause_confidence,L,dthc 153 | harvest,networkid,H,harv 154 | harvest,network_name,L,harv 155 | harvest,datamartid,H,harv 156 | harvest,datamart_name,L,harv 157 | harvest,datamart_platform,L,harv 158 | harvest,cdm_version,L,harv 159 | harvest,datamart_claims,L,harv 160 | harvest,datamart_ehr,L,harv 161 | harvest,birth_date_mgmt,L,harv 162 | harvest,enr_start_date_mgmt,L,harv 163 | harvest,enr_end_date_mgmt,L,harv 164 | harvest,admit_date_mgmt,L,harv 165 | harvest,discharge_date_mgmt,L,harv 166 | harvest,px_date_mgmt,L,harv 167 | harvest,rx_order_date_mgmt,L,harv 168 | harvest,rx_start_date_mgmt,L,harv 169 | harvest,rx_end_date_mgmt,L,harv 170 | harvest,dispense_date_mgmt,L,harv 171 | harvest,lab_order_date_mgmt,L,harv 172 | harvest,specimen_date_mgmt,L,harv 173 | harvest,result_date_mgmt,L,harv 174 | harvest,measure_date_mgmt,L,harv 175 | harvest,onset_date_mgmt,L,harv 176 | harvest,report_date_mgmt,L,harv 177 | harvest,resolve_date_mgmt,L,harv 178 | harvest,pro_date_mgmt,L,harv 179 | harvest,refresh_demographic_date,L,harv 180 | harvest,refresh_enrollment_date,L,harv 181 | harvest,refresh_encounter_date,L,harv 182 | harvest,refresh_diagnosis_date,L,harv 183 | harvest,refresh_procedures_date,L,harv 184 | harvest,refresh_vital_date,L,harv 185 | harvest,refresh_dispensing_date,L,harv 186 | harvest,refresh_lab_result_cm_date,L,harv 187 | harvest,refresh_condition_date,L,harv 188 | harvest,refresh_pro_cm_date,L,harv 189 | harvest,refresh_prescribing_date,L,harv 190 | harvest,refresh_pcornet_trial_date,L,harv 191 | harvest,refresh_death_date,L,harv 192 | harvest,refresh_death_cause_date,L,harv -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | i2b2 Software License (“Software License”) 2 | 3 | Version 2.1 4 | 5 | 6 | This Software License covers downloads from the i2b2 project (“i2b2”) maintained by The Brigham and Women’s Hospital, Inc. (“BWH”). 7 | 8 | Your downloading, copying, modifying, displaying, distributing or use of any software and/or data from i2b2 (collectively, the “Software”) constitutes acceptance of all of the terms and conditions of this Software License. If you do not agree to such terms and conditions, you have no right to download, copy, modify, display, distribute or use the Software. 9 | 10 | As used in this Software License, "you" means the individual downloading and/or using, reproducing, modifying, displaying and/or distributing the Software and the institution or entity which employs or is otherwise affiliated with such individual in connection therewith. The BWH hereby grants you, with right to sublicense, with respect to BWH's rights in the software, and related technical data, if any, which is the subject of this Software License, a royalty-free, non-exclusive license to use, reproduce, make derivative works of, display and distribute the Software, provided that: 11 | (a) You accept and adhere to all of the terms and conditions of this Software License; 12 | (b) In connection with any copy of or sublicense of all or any portion of the Software, all of the terms and conditions in this Software License shall appear in and shall apply to such copy and such sublicensee, including without limitation all source and executable forms and on any user documentation, prefaced with the following words: "All or portions of this licensed product (such portions are the "Software") have been obtained under license from The Brigham and Women's Hospital, Inc. and are subject to the following terms and conditions:" 13 | (c) You preserve and maintain all applicable attributions, copyright notices and licenses included in or applicable to the Software. 14 | Modifications to the Software shall mean any addition to or deletion from the substance or structure of either the Software or any previous Modifications. When Software is released as a series of files, a Modification is: (i) any addition to or deletion from the contents of a file containing the Software or previous Modifications; or (ii) any new file that contains any part of the Software or previous Modifications. 15 | 16 | (a) You agree that you will deliver, and you will cause all sublicensees to deliver, to BWH copies of all Modifications that are made to the Software, on or before the date on which the Modifications are distributed to any third party, and you hereby grant to BWH, and will cause all of your sublicensees to grant to BWH, a non-exclusive, royalty-free, irrevocable, non-terminable license to use, reproduce, make derivative works of, display, perform, and distribute any and all Modifications in source code and binary code form for any purpose and to sublicense such rights to others. 17 | (b) You agree that all Modifications of the Software must be clearly identified and marked as such, and must not be misrepresented as being the original Software. 18 | (c) You agree to consider making, but are under no obligation to make, the source code of any of your Modifications to the Software freely available to others on an open source basis. 19 | The license granted under this Software License includes without limitation the right to (i)żincorporate the Software into proprietary programs (subject to any restrictions applicable to such programs), (ii)żadd your own copyright statement to your Modifications of the Software, and (iii)żprovide additional or different license terms and conditions in your sublicenses of Modifications of the Software; provided that in each case your use, reproduction or distribution of such modifications otherwise complies with the conditions stated in this Software License. 20 | This Software License does not grant any rights with respect to third party software, except those rights that BWH has been authorized by a third party to grant to you, and accordingly you are solely responsible for (i) obtaining any permissions from third parties that you need to use, reproduce, make derivative works of, display and distribute the Software, and (ii) informing your sublicensees, including without limitation your end-users, of their obligations to secure any such required permissions. 21 | The Software has been designed for research purposes only and has not been reviewed or approved by the Food and Drug Administration or by any other agency. YOU ACKNOWLEDGE AND AGREE THAT CLINICAL APPLICATIONS ARE NEITHER RECOMMENDED NOR ADVISED. Any commercialization of the Software is at the sole risk of the party or parties engaged in such commercialization. You further agree to use, reproduce, make derivative works of, display and distribute the Software in compliance with all applicable governmental laws, regulations and orders, including without limitation those relating to export and import control. 22 | The Software is provided "AS IS" and neither BWH nor any contributor to the software (each a "Contributor") shall have any obligation to provide maintenance, support, updates, enhancements or modifications thereto. You acknowledge and agree that you are responsible for your selection, testing, modification, use and distribution of the Software, in any form, and that you hereby expressly waive any claim for any loss, damage, or injury that may occur as a result of your use or distribution of the Software. You agree that you are responsible for obtaining and will maintain insurance providing coverage (including but not limited to general liability and product liability insurance) that is appropriate in light of the waivers and assumptions of liability set forth in this Software License. You furthermore waive any right of recovery against BWH or any Contributor (including any right of subrogation against BWH or any Contributor or any insurer of BWH or any Contributor) for loss or damage arising out of or incident to your use or distribution of the Software. 23 | BWH AND ALL CONTRIBUTORS SPECIFICALLY DISCLAIM ALL EXPRESS AND IMPLIED WARRANTIES OF ANY KIND INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL BWH OR ANY CONTRIBUTOR BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY ARISING IN ANY WAY RELATED TO THE SOFTWARE, EVEN IF BWH OR ANY CONTRIBUTOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. TO THE MAXIMUM EXTENT NOT PROHIBITED BY LAW OR REGULATION, YOU FURTHER ASSUME ALL LIABILITY FOR YOUR USE, REPRODUCTION, MAKING OF DERIVATIVE WORKS, DISPLAY, LICENSE OR DISTRIBUTION OF THE SOFTWARE AND AGREE TO INDEMNIFY AND HOLD HARMLESS BWH AND ALL CONTRIBUTORS (AND ALL RELATED INDIVIDUALS AND ENTITIES) FROM AND AGAINST ANY AND ALL CLAIMS, SUITS, ACTIONS, DEMANDS AND JUDGMENTS ARISING THEREFROM OR FROM ANY MODIFICATIONS OF THE SOFTWARE DELIVERED TO BWH HEREUNDER. 24 | None of the names, logos or trademarks of BWH or any of BWH's affiliates or any of the Contributors, or any funding agency, may be used to endorse or promote products produced in whole or in part by operation of the Software or derived from or based on the Software without specific prior written permission from the applicable party. 25 | Any use, reproduction or distribution of the Software which is not in accordance with this Software License shall automatically revoke all rights granted to you under this Software License and render Paragraphs 1 and 2 of this Software License null and void. 26 | This Software License does not grant any rights in or to any intellectual property owned by BWH or any Contributor except those rights expressly granted hereunder. 27 | 28 | Version 2.1, 1/15/09 29 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "DQe-c Report (Example)" 3 | output: 4 | html_document: 5 | highlight: tango 6 | toc: yes 7 | word_document: 8 | toc: yes 9 | --- 10 | 11 | ##DQe-c Architecture 12 | 13 | DQe-c is a modular tool developed in R statistical language for assessing completeness in EHR data repositories. The tool also performs a data model conformance test that pertain to data completeness. Each run of DQe-c produces a web-based report (.html document) that include visualizations of the data completeness test at a given time (or data load) and changes in key frequencies over time. Data preparation, analyses, and visualizations are performed under seven modules, where each module consists of one or more R scripts (Figure 1). Modules one to six perform data preparation and analyses and store their outputs as comma-separated flat files in the reports directory. The modular design increases flexibility of the tool for future improvements and facilitates interoperability. 14 | 15 | ### Figure 1. DQe-c Workflow. 16 | ![dqec_workflow](illustration/workflow3.1.jpg) 17 | 18 | ###1- Set up and execution module 19 | The set up and execution module consists of three scripts. All necessary packages are loaded in lib.R, called by RUN.R script. DQe-c’s execution is governed by the RUN.R script, which initiates three modules one after each other (order of initiations are identified in Figure 1). Scripts within each module initiate their dependent scripts, respectively. The latest version of DQe-c (3.1) works on two common data models (CDM), PCORnet version 3 and OMOP version 5. The tool also operates on two Relational Database Management System (RDBMS), MS SQL Server and PostgreSQL, calling SQL queries from within R commands via JDBC/ODBC connection. This capability increases scalability of the tool against large EHR repositories. 20 | 21 | To run DQe-c, the user needs to: (1) specify execution parameters in the RUN.R script, including CDM, SQL, organization name, and database specifications, and (2) set up SQL connection credentials in the keys.R script. 22 | 23 | ###2- Connectivity module 24 | The third (and final) step to run DQe-c is to specify SQL connection information (i.e., data base driver, data base name, host address, and connection port) in from connectivity module. This module establishes the JDBC/ODBC connection with the respective RDBMS. 25 | 26 | ###3- Clinical indicators module 27 | The clinical indicators module performs tests to count frequencies of patients without information on key clinical indicators. The development and addition of this module was inspired in a collaboration with DARTNet Institute,25 while implementing an earlier version of DQe-c into their data repository. Selection of clinical indicators in this module is flexible and can be customized based on local needs. This module initiates the data model module as a dependency. 28 | 29 | ###4- Data model module 30 | The data model module performs data model-related completeness test. DQe-c version 3.1 checks for the existence of orphan foreign keys in the database, based on the relational constrains. This test looks at completeness from a conformance point of view. More data model-related checks will be added to the future versions of DQe-c through this module. The module initiates its dependent data preparation module. 31 | 32 | ###5- Data preparation module 33 | Data preparation module operate the first data operations after the connection to the SQL is established. After the execution parameters are set in the RUN.R, script prep.R reads the respective data model template (OMOP v5 vs. PCORnet V3) and prepares the system to call respective SQL queries (MS SQL Server vs. PostgreSQL). Two comma-separated flat file provide the CDM templates for DQe-c to operate. The first step to expanding the tools functionality to other CDMs is to create a new CDM template and modify the data preparation module. This script creates a reference table for processing in freq.R, which counts and stores frequencies of rows and unique values in each rows. Results of these counts are added to the reference table and will be used by the missingness module for further processing. 34 | 35 | ###6- Missingness module 36 | The missingness module calculates percentage of missing values for each column of each tables available in the database. The results complete the reference table created in the data preparation model, and used by the visualization and presentation module to generate the DQe-c report. 37 | 38 | ###7- Visualization and presentation module 39 | The visualization and presentation module includes an R Markdown document that generates the HTML report from completeness tests conducted through DQe-c. This module uses the outputs of its preceding modules, as they are stored with specific names as comma-separated flat files in the reports directory. We will provide a brief description of DQe-c report in the next section. 40 | 41 | ##DQe-c Outputs 42 | Each run of DQe-c generates an HTML report that summarizes outputs from its data preparation and analytics in tables and graph visualizations. The report is organized in four sections. 43 | 44 | 45 | ###1-Load and test details 46 | The first section of the report presents a databased-level snapshot summary of the latest data loaded in the clinical repository. The summary includes a table that present a list of CDM tables, their availability status (in three categories: (1) available, (2) loaded but empty, (3) not loaded), Gigabyte size and number of rows for each table. These information are then presented in three visualizations (Figures 2, 3, and 4). Data for this section of the report was generated by the data preparation module. 47 | 48 | #### Figure 2. Load Details: 49 | #####Available Tables, Compared to all CDM Tables 50 | This figure shows which of the CDM tables was received (and not received). 51 | ![Load_Details1](illustration/fig2-1.png) 52 | 53 | ##### File Size and Row Numbers by Table in the Load 54 | ![Load_Details2](illustration/fig2-2.png) 55 | 56 | 57 | #####Loaded tables against CDM Relational Model. 58 | The figure below shows a network visualization of the CDM data model, as well as highlighting the tables that are available in this load (legend is the same as in Figure 2). 59 | ![Load_Details3](illustration/netwizfig2.gif) 60 | 61 | 62 | 63 | ### 2- Completeness Results 64 | Second section of the report illustrates the results of data preparation and missingness modules. Upon the completion of each run of DQe-c, a reference table is produced and saved in the `reports` directory, under the `mstabs` directory that includes a frequencies of rows, unique values, missingness, and percent missingness for each column and each table (Table 1 provides a description of the columns and their content in the reference table). We call this table the “Master Completeness Results” (MCR) table. 65 | ![table1](illustration/table1.png) 66 | Table 1. Columns and Content of the Reference Table. 67 | 68 | * category 1 of missingness measures presence of absence 69 | 70 | ** category 2 of missingness measures presence of nonsense 71 | 72 | As we described briefly in the introduction, our approach to missingness/completeness encompasses a broad connotation. We measure completeness as presence of “sensical” data, regardless of whether a data point is plausible or not. That is, if for any reasons a data point does not include a value or an attribute that is compatible for analysis – i.e., the analyst needs to treat the data point with missing data procedures – we will consider it as missing data. Accordingly, DQe-c differentiates two types of missingness: (1) presence of absence (MS1), and (2) presence of nonsense (MS2). We describe the two types in next sections. Category 1 of missingness (MS1) is the conventional definition for missingness, counting frequency of NULL/NA values or empty strings in each column. Category 2 of missingness (MS2) can be defined by the user. We currently consider data points with characters including '+', '-', '_','#', '$', '*', '\', '?', '.', '&', '^', '%', '!', '@', and 'NI' in MS2 category. This list can be easily modified in the missingness module. 73 | 74 | 75 | ####2-1- Changes in Primary Keys Across Loads 76 | Before presenting the results of missingness for each table, DQe-c first visualizes how completeness in key variables changes over time/data loads. The purpose of this visualization is to compare key quantities of presence over time to track potential significant changes in a clinical data repository. For this purpose, DQe-c profiles changes over time in primary keys for available tables across loads (Figure 3). The tool uses reports from previous runs to automatically compile data for this visualization. 77 | 78 | ##### Figure 3: Changes in primary keys across loads. 79 | ![missingness1](illustration/fig3.png) 80 | 81 | 82 | #### 2-2- Proportion of Missing Data in Loaded Tables 83 | 84 | DQe-c differentiates two types of missingness (MS1 and MS2) and visualizes the output by column for each of the tables available from the CDM. Figure 4 presents an example of two encounter and diagnosis tables from PCORnet v3 CDM, using fabricated data. Data for this visualization is from on the MCR table. The figure shows that, for instance, there is around 15% missingness in columns providerid, enc_type, and pdx from tables encounter and diagnosis, which are all due to an existence of a nonsensical character. 85 | 86 | ##### Figure 4: Missingness percentage by table and column. 87 | ![missingness2](illustration/fig4-1.png) 88 | ![missingness3](illustration/fig4-2.png) 89 | 90 | As Figure 4 illustrates, some columns have missingness of category 1 (NULL/NA/empty string), and some have a combination of cells with both categories of missingness. The bar chart visualizations distinguish between the two but also allow the user(s) to see the overall missingness percentage. 91 | 92 | 93 | ###3- Data Model Test(s) 94 | The data model module in DQe-c provides the capability for the tool to perform data model tests that are related to completeness. The current version of the tool (DQe-c v3.1) performs a test that looks for orphan records among common key variables, based on the CDM constraints. Results are visualized in a series of interactive bar charts. Figure 5 presents an example of patid variable in PCORnet v3 CDM. 95 | 96 | #### Figure 5. Orphan rows in common key variables 97 | Figures below visualize number of unique key variables that are common in multiple tables. 98 | ![orphanrows](illustration/commonvariables.gif) 99 | 100 | * The Reference column on the right comes from the table in which the variable is a primary key, and therefore is a reference for all other tables. 101 | 102 | * Count_Out shows number of unique key variables that are not present in the reference table -- e.g., person id from observation table that does not exist in person table. 103 | 104 | * Count_In represent number of unique key variables that are present in the reference table -- e.g., person id from observation table that exist in person table as well. 105 | 106 | The procedure to identify and visualize orphan records begins with identifying common variables among tables of the CDM. A table is identified as the reference table for each common variable in the data model module. For example, in PCORnet v3 CDM, demographic table is the reference table for patid. That is, all other patids in tables that have a patid column should be included in patids in the demographic table, otherwise are identified as orphan rows. The data model module has functions to categorize each unique value in common keys under `Count_In` or `Count_Out` (orphan rows), based on the unique values available in the reference table. Figure 4, for instance, shows that there is a small proportion of orphan patids in diagnosis table – i.e., a small number of patids in diagnosis table are not available in the demographic table. If the overall frequency of unique values in a common variable (e.g., patid) is more than the frequency of unique values in the reference table, it is likely that there are orphan rows. Orphan rows can also exist due to ETL issues. 107 | 108 | 109 | ###4- Test of Completeness in Key Clinical Indicators 110 | All of the completeness tests that DQe-c preforms up to here are informatics-based – i.e., treat data without reference to their clinical meaning. The clinical indicators module in DQe-c enables the user(s) to evaluate data completeness from a subjective viewpoint. The tool calculates the proportion of patients who miss data on key clinical indicators, such as height, weight, blood pressure, medication, diagnosis, and encounter records, and demographic data including gender, race, and ethnicity (Figure 6). 111 | 112 | #### Figure 8. Test of completeness in key clinical indicators 113 | Figure 5 shows the parcentage of patients missing specific key clinical indicators. 114 | ![clinicalindicators](illustration/fig6.png) 115 | 116 | 117 | ##info 118 | This is an example report from DQe-c version 3.1 on PCORnet v3 CDM 119 | 120 | For questions and/or inquiries email: `hestiri@mgh.harvard.edu` 121 | 122 | -------------------------------------------------------------------------------- /RUN.R: -------------------------------------------------------------------------------- 1 | 2 | ##install/load required packages 3 | source("libs.R") 4 | 5 | ###identify data model PCORnet V3 6 | CDM = "PCORNET3" #set to PCORNET31, if you have the latest CDM 7 | 8 | ###identify SQL connection Oracle or SQL Server 9 | SQL = "SQLServer" ## SET to "Oracle" is Oracle is your RDBMS 10 | 11 | ## if you have your tables in a particular SQL schema, identify the schema here: 12 | schema = "" ## default is that there is no schema. SET SCHEMA NAME, IF THERE IS ONE 13 | 14 | ## is there a prefix for table names in your database? 15 | prefix = "" ## default at none. SET PREFIX, IF THERE IS ONE 16 | 17 | 18 | ## enter the organization name you are running the test on 19 | org = "" # SET Your Organization Name 20 | 21 | 22 | 23 | ##Now first run the test 24 | source("Without.R") 25 | 26 | source("Comp_test.R") 27 | 28 | 29 | 30 | ## then generate the html report 31 | rmarkdown::render("Report.Rmd") 32 | 33 | 34 | 35 | source("DQe-v_queries.R") 36 | -------------------------------------------------------------------------------- /Report.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "DQe-c Report" 3 | output: 4 | html_document: 5 | highlight: tango 6 | toc: yes 7 | --- 8 | 9 | 10 | ```{r, echo=FALSE, include=FALSE} 11 | require(data.table);require(dplyr);require(ggplot2);require(gridExtra);require(rmarkdown);require(knitr):require(plotly);require(DT);require(treemap); require(visNetwork) 12 | 13 | ## borrowing the following 6 lines of code from https://stackoverflow.com/questions/26245554/execute-a-set-of-lines-from-another-r-file 14 | sourcePartial <- function(fn, skip=0, n=-1) { 15 | lines <- scan(fn, what=character(), sep="\n", skip=skip, n=n, quiet=TRUE) 16 | tc <- textConnection(lines) 17 | source(tc) 18 | close(tc) 19 | } 20 | sourcePartial("keys.R",47,12) 21 | 22 | ``` 23 | 24 | ##Load and Test Details 25 | 26 | This DQec report is generated from testing completeness in `r CDM` data from `r org` on `r Sys.Date()`. 27 | 28 | ### Table 1. List and Status of Common Data Model (CDM) -- here, `r CDM` -- Tables in this load 29 | The table bolow provides a list of CDM tables provided (and not provided) in the data load. 30 | 31 | The source data this table and the following graphics in this section are being generated from is `r paste("tablelist_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")` 32 | ```{r, echo=FALSE, fig.align='center', fig.width= 10} 33 | dato <- read.csv(paste("reports/load_details_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 34 | dato$TotalSizeKB <- ifelse(is.null(dato$TotalSizeKB) | is.na(dato$TotalSizeKB), 0, dato$TotalSizeKB) 35 | dato$TotalSizeGB <- round(as.numeric(dato$TotalSizeKB)/1000000,4) 36 | dato$Rows <- ifelse(is.null(dato$Rows) | is.na(dato$Rows), 0, dato$Rows) 37 | dato$status <- ifelse((dato$loaded == "Yes" & dato$available == "No"), "Table not available", "Table available") 38 | datatable(select(dato, CDM_Tables, status, TotalSizeGB, Rows), options = list(pageLength = 5), filter = 'bottom') 39 | 40 | ``` 41 | 42 | 43 | ### Figure 1. Available Tables, Compared to all CDM (`r CDM`) Tables 44 | This figure shows which of the CDM tables are loaded and/or available. 45 | ```{r, echo=FALSE, fig.align='center', fig.width= 10} 46 | treemap(dato, 47 | index=c("CDM_Tables"), 48 | vSize="index", 49 | vColor="status", 50 | type="categorical", 51 | title = "", 52 | title.legend = "Table Availability", 53 | border.col = "white", 54 | position.legend = "bottom" 55 | ) 56 | 57 | ``` 58 | 59 | ### Figure 2. File Size and Row Numbers by Table in the (`r CDM`) Load 60 | ```{r, echo=FALSE, fig.align='center', fig.width= 10} 61 | treemap(dato, 62 | index=c("CDM_Tables"), 63 | vSize="Rows", 64 | vColor="TotalSizeGB", 65 | type="value", 66 | title = "", 67 | title.legend = "Size represents number of rows and color represent file size (in GB) for each table.", 68 | border.col = "white", 69 | position.legend = "bottom" 70 | ) 71 | ``` 72 | 73 | 74 | ### Figure 3. Loaded tables against CDM (`r CDM`) Relational Model. 75 | The figure below shows a network visualization of the CDM data model, as well as highlighting the tables that are available in this load (legend is the same as in Figure 1). 76 | ```{r, echo=FALSE, fig.align='center', fig.width= 10} 77 | dat.net <- dato 78 | if (CDM %in% c("PCORNET3","PCORNET31")) { 79 | nodes <- data.frame(id = dat.net$X, 80 | label = dat.net$CDM_Tables, 81 | group = c(dat.net$status), 82 | shadow = c(FALSE)) 83 | edges <- data.frame(from = c(4,4,4,4,4,4,4,4,4,4,4,4,4,#demographics's 84 | 7,7,7,7,7,7,7,#encounter's 85 | 12 #prescribing's 86 | ), 87 | to = c(2,3,1,5,6,7,8,15,10,11,12,13,14,#to demographic 88 | 15,1,13,5,14,10,12,# to encounter through encounterid, enc_type, and providerid 89 | 6 # to prescribing through prescribingid 90 | )) 91 | 92 | visNetwork(nodes, edges) %>% 93 | visGroups(groupname = "Table not available", color = "#00C5CD", shape = "circle") %>% 94 | visGroups(groupname = "Table available", color = "#EE9572", shape = "circle") %>% 95 | visOptions(highlightNearest = TRUE, nodesIdSelection = T) %>% 96 | visEdges(arrows = 'from', scaling = list(min = .5, max = 1)) %>% 97 | visInteraction(navigationButtons = T, dragView = FALSE, zoomView = FALSE) 98 | 99 | } else 100 | if (CDM == "OMOPV5") { 101 | # nodes <- data.frame(id = dat.net$X, 102 | # label = dat.net$CDM_Tables, 103 | # group = c(dat.net$status), 104 | # shadow = F) 105 | # edges <- data.frame(from = c(14,14,14,14,14,14,14,14,14,14,14,#person 106 | # 17,17,17,17,17,#provider 107 | # 1,1, #caresite 108 | # 8,8, #drug exposure 109 | # 18,18,18,18, #visit occurence 110 | # 4, #condition occurence 111 | # 16 #procedure occurence 112 | # ), 113 | # to = c(9,11,13,18,9,8,4,16,10,2,5,#person's 114 | # 1,8,4,16,10,#provider's 115 | # 9,12,#care site's 116 | # 7,6,#drug exposure's 117 | # 8, 4, 16, 10, # visit occurence's 118 | # 3, #condition occurence's 119 | # 15 #procedure occurence's 120 | # )) 121 | # 122 | # visGroups(groupname = "Table loaded but empty", color = "#EE9572", shape = "circle") %>% 123 | # visGroups(groupname = "Table available", color = "#00C5CD", shape = "circle") %>% 124 | # visGroups(groupname = "Table not loaded!", color = "gray", shape = "circle") %>% 125 | # visOptions(highlightNearest = TRUE, nodesIdSelection = T) %>% 126 | # visEdges(arrows = 'from', scaling = list(min = .5, max = 1)) %>% 127 | # visInteraction(navigationButtons = T, dragView = FALSE, zoomView = FALSE) 128 | } 129 | 130 | 131 | ``` 132 | 133 | ## Completeness Results 134 | ### Table 2. The Master Completeness Results Table 135 | The table below provides results of completeness test at the value/cell level. 136 | 137 | * `TabNam` = `r CDM` table name 138 | * `ColNam` = Column name 139 | * `DQLVL` = Level of importance for completeness test. (`X`: Extremely Important, `H`: Highly Important, `L`:Low Importance) 140 | * `FRQ` = Frequency of rows 141 | * `UNIQFRQ` = Frequency of unique values in each column 142 | * `MS1_FRQ` = Frequency of cells with NULL/NA values or empty strings in each column 143 | * `MS2_FRQ` = Frequency of cells with characters in each column that don't represent meaningful data -- including, '+', '-', '_','#', '$', '*', '\', '?', '.', '&', '^', '%', '!', '@', and 'NI'. 144 | * `MSs_PERC` = Percentage of overall missing data in each column 145 | 146 | Data for this table is generated from `r paste("DQ_Master_Table_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")` saved under report directory. 147 | ```{r, echo=FALSE} 148 | DQTBL <- read.csv(paste("reports/mstabs/DQ_Master_Table_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 149 | datatable(DQTBL[,c("TabNam","ColNam","DQLVL","test_date","FRQ","UNIQFRQ","MS1_FRQ","MS2_FRQ","MSs_PERC" )], options = list(pageLength = 10), filter = 'bottom') 150 | ``` 151 | 152 | 153 | ### Figure 4. Changes in Primary Keys Across Loads 154 | Figure below profiles changes in primary keys across loads as a measure of change in patient/record number over time. 155 | 156 | Data for the figure is stored in `r paste("FRQ_comp_",usrnm,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")` 157 | ```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.align='center', fig.height= 10, fig.width= 10} 158 | comp <- read.csv(paste("reports/FRQ_comp_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 159 | 160 | ggplot(comp, aes(x=reorder(test_date,test_date), y=UNIQFRQ, group = ColNam) ) + 161 | geom_line(aes(), alpha = 0.4, size = 1, show.legend = FALSE) + 162 | stat_smooth(colour = "red",level=0.99) + 163 | geom_point( alpha = 0.5, shape = 1, size = 6, colour = "#FF3333", stroke = 2, show.legend = FALSE) + 164 | geom_point( aes(col = test_date), shape = 20, size = 8, show.legend = T) + 165 | theme(plot.title = element_text(family = "Trebuchet MS", color="#666666", face="bold", hjust=0)) + 166 | xlab("Load") + ylab("Unique Counts") + 167 | facet_wrap(~TabNam, ncol = 3, switch = "x", scales = "free") + 168 | theme(axis.text.x=element_text(colour="white", size = 0.1)) 169 | ``` 170 | 171 | ### Figure set 1. Proportion of Missing Data by Type in Loaded Tables 172 | Figures below show proportion of missing cells/values in each column of each table loaded. Figures are generated based on Table 2. 173 | 174 | * `MS1_FRQ` = Frequency of cells with NULL/NA values and empty strings in each column -- presence of absence 175 | * `MS2_FRQ` = Frequency of cells with characters in each column that don't represent meaningful data -- presence of nonsense 176 | 177 | ```{r, echo=FALSE, ggplot, warning=FALSE, message=FALSE, fig.align='center', fig.height= 5, fig.width= 12} 178 | 179 | DQTBL$MS1_PERC <- ifelse(DQTBL$FRQ == 0, 0, round((DQTBL$MS1_FRQ)/DQTBL$FRQ,2)) 180 | DQTBL$MS2_PERC <- ifelse(DQTBL$FRQ == 0, 0, round((DQTBL$MS2_FRQ)/DQTBL$FRQ,2)) 181 | DF <- subset(melt(DQTBL, id.var=c("TabNam","ColNam")), variable %in% c("MS1_PERC","MS2_PERC")) 182 | DF$value <- as.numeric(DF$value) 183 | colnames(DF)[3] <- "DQ_Issue" 184 | colnames(DF)[4] <- "Ratio" 185 | 186 | #####plotting 187 | plot_list<- list() 188 | table_list <- unique(DQTBL$TabNam) 189 | 190 | # create for loop to produce ggplot2 graphs 191 | for (i in seq_along(table_list)) { 192 | 193 | # create plot for each OMOP table in DQTBL 194 | plot_list[[i]] <- 195 | plot_list[[i]] <- 196 | ggplot(subset(DF, TabNam==table_list[i]), 197 | aes(x=ColNam, y=Ratio, fill=DQ_Issue)) + 198 | geom_bar(stat="identity", width = 1) + 199 | # scale_fill_manual(values=c("red","green","orange","dark red")) + 200 | facet_wrap( ~ ColNam, scale="free_x", nrow = 1) + 201 | # ggtitle("Frequency of Missing Data") + 202 | xlab("Column") + 203 | ylab("Frequency") + 204 | theme(plot.title = element_text(family = "Trebuchet MS", color="#666666", face="bold", hjust=0), 205 | axis.text.x = element_text(vjust = 1), 206 | strip.text.x = element_text(angle = 90, face="bold")) + 207 | theme(axis.text.x=element_blank())+ # theme(legend.position="none") + 208 | ggtitle(paste('Ratio of Missing Data in "', table_list[i], '" table', sep='')) 209 | 210 | } 211 | 212 | plot_list 213 | 214 | ``` 215 | 216 | ##Data Model Tests 217 | 218 | ### Figure set 2. Common Key Variables 219 | Figures below visualize number of unique key variables that are common in multiple `r CDM` tables. 220 | 221 | * The Reference column on the right comes from the table in which the variable is a primary key, and therefore is a reference for all other tables. 222 | 223 | * Count_Out shows number of unique key variables that are not present in the reference table -- e.g., person id from observation table that does not exist in person table. 224 | 225 | * Count_In represent number of unique key variables that are present in the reference table -- e.g., person id from observation table that exist in person table as well. 226 | 227 | ```{r, echo=FALSE, fig.align='center', fig.height= 3, fig.width= 10} 228 | DQTBL_KEYS <- read.csv(paste("reports/DM_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 229 | 230 | if (CDM %in% c("PCORNET3","PCORNET31")) { 231 | patid<- filter(DQTBL_KEYS, ColNam == "patid") 232 | plot2 <- ggplot(data=patid, aes(x=reorder(TabNam, UNIQFRQ), y=UNIQFRQ)) + 233 | geom_bar(stat="identity", width = 1, aes(fill=Index)) + 234 | scale_fill_manual(values=c("darkolivegreen3","firebrick1", "springgreen3")) + 235 | ggtitle(paste("Count of Unique ",unique(patid$ColNam)," in Tables with ",unique(patid$ColNam), 236 | sep="")) + 237 | xlab(" Table Name") + 238 | ylab("Frequency of Unique Values") 239 | (gg <- ggplotly(plot2)) 240 | } else 241 | if (CDM == "OMOPV5") { 242 | # person_id<- filter(DQTBL_KEYS, ColNam == "person_id") 243 | # plot2 <- ggplot(data=person_id, aes(x=reorder(TabNam, UNIQFRQ), y=UNIQFRQ)) + 244 | # geom_bar(stat="identity", width = 1, aes(fill=Index)) + 245 | # scale_fill_manual(values=c("darkolivegreen3","firebrick1", "springgreen3")) + 246 | # ggtitle(paste("Count of Unique ",unique(person_id$ColNam)," in Tables with ",unique(person_id$ColNam), 247 | # sep="")) + 248 | # xlab(" Table Name") + 249 | # ylab("Frequency of Unique Values") 250 | # (gg <- ggplotly(plot2)) 251 | } 252 | 253 | 254 | 255 | ``` 256 | 257 | ```{r, echo=FALSE, fig.align='center', fig.height= 3, fig.width= 10} 258 | 259 | 260 | if (CDM %in% c("PCORNET3","PCORNET31")) { 261 | encounterid <- filter(DQTBL_KEYS, ColNam == "encounterid") 262 | 263 | plot3 <- ggplot(data=encounterid, aes(x=reorder(TabNam, UNIQFRQ), y=UNIQFRQ)) + 264 | geom_bar(stat="identity", width = 1, aes(fill=Index)) + 265 | scale_fill_manual(values=c("darkolivegreen3","firebrick1", "springgreen3")) + 266 | ggtitle(paste("Count of Unique ",unique(encounterid$ColNam)," in Tables with ",unique(encounterid$ColNam), 267 | sep="")) + 268 | xlab(" Table Name") + 269 | ylab("Frequency of Unique Values") 270 | (gg <- ggplotly(plot3)) 271 | } else 272 | if (CDM == "OMOPV5") { 273 | # care_site_id<- filter(DQTBL_KEYS, ColNam == "care_site_id") 274 | # 275 | # plot3 <- ggplot(data=care_site_id, aes(x=reorder(TabNam, UNIQFRQ), y=UNIQFRQ)) + 276 | # geom_bar(stat="identity", width = 1, aes(fill=Index)) + 277 | # scale_fill_manual(values=c("darkolivegreen3","firebrick1", "springgreen3")) + 278 | # ggtitle(paste("Count of Unique ",unique(care_site_id$ColNam)," in Tables with ",unique(care_site_id$ColNam), 279 | # sep="")) + 280 | # xlab(" Table Name") + 281 | # ylab("Frequency of Unique Values") 282 | # (gg <- ggplotly(plot3)) 283 | } 284 | 285 | 286 | ``` 287 | 288 | ```{r, echo=FALSE, fig.align='center', fig.height= 3, fig.width= 10} 289 | 290 | if (CDM %in% c("PCORNET3","PCORNET31")) { 291 | providerid <- filter(DQTBL_KEYS, ColNam == "providerid") 292 | 293 | plot4 <- ggplot(data=providerid, aes(x=reorder(TabNam, UNIQFRQ), y=UNIQFRQ)) + 294 | geom_bar(stat="identity", width = 1, aes(fill=Index)) + 295 | scale_fill_manual(values=c("darkolivegreen3","firebrick1", "springgreen3")) + 296 | ggtitle(paste("Count of Unique ",unique(providerid$ColNam)," in Tables with ",unique(providerid$ColNam), 297 | sep="")) + 298 | xlab(" Table Name") + 299 | ylab("Frequency of Unique Values") 300 | (gg <- ggplotly(plot4)) 301 | } else 302 | if (CDM == "OMOPV5") { 303 | # visit_occurrence_id<- filter(DQTBL_KEYS, ColNam == "visit_occurrence_id") 304 | # 305 | # plot4 <- ggplot(data=visit_occurrence_id, aes(x=reorder(TabNam, UNIQFRQ), y=UNIQFRQ)) + 306 | # geom_bar(stat="identity", width = 1, aes(fill=Index)) + 307 | # scale_fill_manual(values=c("darkolivegreen3","firebrick1", "springgreen3")) + 308 | # ggtitle(paste("Count of Unique ",unique(visit_occurrence_id$ColNam)," in Tables with ",unique(visit_occurrence_id$ColNam), 309 | # sep="")) + 310 | # xlab(" Table Name") + 311 | # ylab("Frequency of Unique Values") 312 | # (gg <- ggplotly(plot4)) 313 | } 314 | 315 | 316 | 317 | ``` 318 | 319 | ```{r, echo=FALSE, fig.align='center', fig.height= 3, fig.width= 10} 320 | 321 | if (CDM %in% c("PCORNET3","PCORNET31")) { 322 | enc_type <- filter(DQTBL_KEYS, ColNam == "enc_type") 323 | 324 | plot5 <- ggplot(data=enc_type, aes(x=reorder(TabNam, UNIQFRQ), y=UNIQFRQ)) + 325 | geom_bar(stat="identity", width = 1, aes(fill=Index)) + 326 | scale_fill_manual(values=c("darkolivegreen3","firebrick1", "springgreen3")) + 327 | ggtitle(paste("Count of Unique ",unique(enc_type$ColNam)," in Tables with ",unique(enc_type$ColNam), 328 | sep="")) + 329 | xlab(" Table Name") + 330 | ylab("Frequency of Unique Values") 331 | (gg <- ggplotly(plot5)) 332 | } else 333 | if (CDM == "OMOPV5") { 334 | # location_id<- filter(DQTBL_KEYS, ColNam == "location_id") 335 | # 336 | # plot5 <- ggplot(data=location_id, aes(x=reorder(TabNam, UNIQFRQ), y=UNIQFRQ)) + 337 | # geom_bar(stat="identity", width = 1, aes(fill=Index)) + 338 | # scale_fill_manual(values=c("darkolivegreen3","firebrick1", "springgreen3")) + 339 | # ggtitle(paste("Count of Unique ",unique(location_id$ColNam)," in Tables with ",unique(location_id$ColNam), 340 | # sep="")) + 341 | # xlab(" Table Name") + 342 | # ylab("Frequency of Unique Values") 343 | # (gg <- ggplotly(plot5)) 344 | } 345 | 346 | 347 | 348 | 349 | ``` 350 | 351 | ```{r, echo=FALSE, fig.align='center', fig.height= 3, fig.width= 10} 352 | if (CDM %in% c("PCORNET3","PCORNET31")) { 353 | prescribingid <- filter(DQTBL_KEYS, ColNam == "prescribingid") 354 | 355 | plot6 <- ggplot(data=prescribingid, aes(x=reorder(TabNam, UNIQFRQ), y=UNIQFRQ)) + 356 | geom_bar(stat="identity", width = 1, aes(fill=Index)) + 357 | scale_fill_manual(values=c("darkolivegreen3","firebrick1", "springgreen3")) + 358 | ggtitle(paste("Count of Unique ",unique(prescribingid$ColNam)," in Tables with ",unique(prescribingid$ColNam), 359 | sep="")) + 360 | xlab(" Table Name") + 361 | ylab("Frequency of Unique Values") 362 | (gg <- ggplotly(plot6)) 363 | } else 364 | if (CDM == "OMOPV5") { 365 | # organization_id<- filter(DQTBL_KEYS, ColNam == "organization_id") 366 | # 367 | # plot6 <- ggplot(data=organization_id, aes(x=reorder(TabNam, UNIQFRQ), y=UNIQFRQ)) + 368 | # geom_bar(stat="identity", width = 1, aes(fill=Index)) + 369 | # scale_fill_manual(values=c("darkolivegreen3","firebrick1", "springgreen3")) + 370 | # ggtitle(paste("Count of Unique ",unique(organization_id$ColNam)," in Tables with ",unique(organization_id$ColNam), 371 | # sep="")) + 372 | # xlab(" Table Name") + 373 | # ylab("Frequency of Unique Values") 374 | # (gg <- ggplotly(plot6)) 375 | } 376 | 377 | 378 | 379 | ``` 380 | 381 | 382 | 383 | ##Test of Completeness in Key Clinical Indicators 384 | 385 | ### Figure 5. Common Key Variables 386 | Figure 5 shows the parcentage of patients missing specific key clinical indicators. 387 | 388 | ```{r, echo=FALSE, fig.align='center', fig.height= 5, fig.width= 10} 389 | withouts <- read.csv(paste("reports/withouts_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 390 | 391 | ggplot(withouts, aes(x=missing.percentage,y=reorder(group,-missing.percentage), label = perc)) + 392 | geom_point(aes(fill = missing.percentage),shape = 21, colour = "black", size = 8, stroke = 3, alpha = 0.9) + 393 | geom_text(vjust = -0.5, hjust = -.38, nudge_y = 0, size = 4)+ 394 | scale_fill_gradient(limits=c(0, 100),low="#FFFAF0", high="#EE2C2C", guide=F,na.value="white")+ 395 | labs(x = "", y = "") + 396 | scale_x_continuous(limits = c(0, 100)) + 397 | theme_minimal() + 398 | theme(panel.grid.major.y = element_line(color = "gray",size = 2.5), 399 | panel.grid.major.x = element_line(colour = "black", linetype = "dotted"), 400 | axis.text.y=element_text(size=20, face="bold")) 401 | ``` 402 | 403 | 404 | ##info 405 | This is report is from DQe-c version 3.2 406 | 407 | Ask questions pr report issues: `hestiri@mgh.harvard.edu` 408 | 409 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /dmrun.R: -------------------------------------------------------------------------------- 1 | source("freq.R") 2 | 3 | ################################################################################################################################ 4 | ################################################################################################################################ 5 | ################################################################################################################################ 6 | ########## THIS SCRIPT RUNS ORPHAN KEYS' TESTS 7 | ################################################################################################################################ 8 | ################################################################################################################################ 9 | 10 | 11 | 12 | ## create data frame to store only columns and tables that have related key and/or primary keys for data model test 13 | if (CDM %in% c("PCORNET3","PCORNET31")) { 14 | DQTBL_KEYS <- select(subset(DQTBL, ColNam %in% c("patid","encounterid","providerid","prescribingid","enc_type")),TabNam, ColNam, UNIQFRQ) 15 | ## creating an index for plotting: Count In means number rof unique frequencies that exist in the reference table 16 | DQTBL_KEYS$Index <- "Count_In" 17 | dmtest <- parse(file = "dmtest_pcornet3.R") 18 | 19 | } 20 | 21 | 22 | for (i in seq_along(dmtest)) { 23 | tryCatch(eval(dmtest[[i]]), 24 | error = function(e) message("No Worries!! HE thinks it is fine if there is an ", as.character(e))) 25 | } 26 | 27 | 28 | 29 | 30 | ###### this test is working based on DQTBL_KEYS 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /dmtest_pcornet3.R: -------------------------------------------------------------------------------- 1 | ################ PCORnet V3 Data Model Orphan Keys test 2 | 3 | # Reference means that the column value is reference for all other tables that have the same column 4 | DQTBL_KEYS$Index <- ifelse(((DQTBL_KEYS$TabNam == "demographic" & DQTBL_KEYS$ColNam == "patid") | 5 | (DQTBL_KEYS$TabNam == "prescribing" & DQTBL_KEYS$ColNam == "prescribingid") | 6 | (DQTBL_KEYS$TabNam == "encounter" & DQTBL_KEYS$ColNam == "encounterid") | 7 | (DQTBL_KEYS$TabNam == "encounter" & DQTBL_KEYS$ColNam == "enc_type") | 8 | (DQTBL_KEYS$TabNam == "encounter" & DQTBL_KEYS$ColNam == "providerid")), 9 | "Reference", 10 | DQTBL_KEYS$Index) 11 | 12 | 13 | 14 | #Copy the data frame to store not counted ids (the ones that are not available in the reference coulumn) 15 | DQTBL_KEYS2 <- subset(DQTBL_KEYS, DQTBL_KEYS$Index != "Reference") 16 | DQTBL_KEYS2$Index <- "Count_Out" 17 | DQTBL_KEYS2$UNIQFRQ <- 0 18 | 19 | DQTBL_KEYS <- rbind(DQTBL_KEYS,DQTBL_KEYS2);rm(DQTBL_KEYS2) 20 | 21 | ### Now let's count the number of unique ids that do not exist in the reference column and assign related values to them 22 | ## and then subtracting the number of counted outs from the number of counted ins 23 | ########## ##### ##### ########## ##### ##### 24 | ########## ##### ##### 25 | ########## ##### ##### ########## ##### ##### 26 | #patid 27 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "enrollment" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 28 | orphankeys(table1 = "demographic", table2 = "enrollment", col = "patid") 29 | 30 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "enrollment" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 31 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "enrollment" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 32 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "enrollment" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 33 | 34 | #### 35 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "encounter" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 36 | orphankeys(table1 = "demographic", table2 = "encounter", col = "patid") 37 | 38 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "encounter" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 39 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "encounter" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 40 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "encounter" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 41 | 42 | ### 43 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 44 | orphankeys(table1 = "demographic", table2 = "diagnosis", col = "patid") 45 | 46 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 47 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 48 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 49 | 50 | ### 51 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 52 | orphankeys(table1 = "demographic", table2 = "procedures", col = "patid") 53 | 54 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 55 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 56 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 57 | 58 | ### 59 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "vital" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 60 | orphankeys(table1 = "demographic", table2 = "vital", col = "patid") 61 | 62 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "vital" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 63 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "vital" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 64 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "vital" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 65 | 66 | ### 67 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "dispensing" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 68 | orphankeys(table1 = "demographic", table2 = "dispensing", col = "patid") 69 | 70 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "dispensing" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 71 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "dispensing" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 72 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "dispensing" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 73 | 74 | ### 75 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "lab_result_cm" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 76 | orphankeys(table1 = "demographic", table2 = "lab_result_cm", col = "patid") 77 | 78 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "lab_result_cm" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 79 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "lab_result_cm" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 80 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "lab_result_cm" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 81 | 82 | ### 83 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "condition" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 84 | orphankeys(table1 = "demographic", table2 = "condition", col = "patid") 85 | 86 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "condition" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 87 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "condition" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 88 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "condition" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 89 | 90 | ### 91 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pro_cm" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 92 | orphankeys(table1 = "demographic", table2 = "pro_cm", col = "patid") 93 | 94 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pro_cm" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 95 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pro_cm" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 96 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pro_cm" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 97 | 98 | ### 99 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "prescribing" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 100 | orphankeys(table1 = "demographic", table2 = "prescribing", col = "patid") 101 | 102 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "prescribing" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 103 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "prescribing" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 104 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "prescribing" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 105 | 106 | ### 107 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pcornet_trial" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 108 | orphankeys(table1 = "demographic", table2 = "pcornet_trial", col = "patid") 109 | 110 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pcornet_trial" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 111 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pcornet_trial" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 112 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pcornet_trial" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 113 | 114 | ### 115 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "death" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 116 | orphankeys(table1 = "demographic", table2 = "death", col = "patid") 117 | 118 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "death" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 119 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "death" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 120 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "death" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 121 | 122 | ### 123 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "death_condition" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 124 | orphankeys(table1 = "demographic", table2 = "death_condition", col = "patid") 125 | 126 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "death_condition" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 127 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "death_condition" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 128 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "death_condition" & DQTBL_KEYS$ColNam == "patid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 129 | ########## ##### ##### ########## ##### ##### 130 | ########## ##### ##### 131 | ########## ##### ##### ########## ##### ##### 132 | #enc_type 133 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "enc_type" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 134 | orphankeys(table1 = "encounter", table2 = "diagnosis", col = "enc_type") 135 | 136 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "enc_type" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 137 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "enc_type" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 138 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "enc_type" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 139 | 140 | ### 141 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "enc_type" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 142 | orphankeys(table1 = "encounter", table2 = "procedures", col = "enc_type") 143 | 144 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "enc_type" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 145 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "enc_type" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 146 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "enc_type" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 147 | ########## ##### ##### ########## ##### ##### 148 | ########## ##### ##### 149 | ########## ##### ##### ########## ##### ##### 150 | #encounterid 151 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 152 | orphankeys(table1 = "encounter", table2 = "diagnosis", col = "encounterid") 153 | 154 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 155 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 156 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 157 | 158 | ### 159 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 160 | orphankeys(table1 = "encounter", table2 = "procedures", col = "encounterid") 161 | 162 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 163 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 164 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 165 | 166 | ### 167 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "vital" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 168 | orphankeys(table1 = "encounter", table2 = "vital", col = "encounterid") 169 | 170 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "vital" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 171 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "vital" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 172 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "vital" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 173 | 174 | ### 175 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "lab_result_cm" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 176 | orphankeys(table1 = "encounter", table2 = "lab_result_cm", col = "encounterid") 177 | 178 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "lab_result_cm" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 179 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "lab_result_cm" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 180 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "lab_result_cm" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 181 | 182 | ### 183 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "condition" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 184 | orphankeys(table1 = "encounter", table2 = "condition", col = "encounterid") 185 | 186 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "condition" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 187 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "condition" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 188 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "condition" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 189 | 190 | ### 191 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pro_cm" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 192 | orphankeys(table1 = "encounter", table2 = "pro_cm", col = "encounterid") 193 | 194 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pro_cm" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 195 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pro_cm" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 196 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "pro_cm" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 197 | 198 | ### 199 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "prescribing" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 200 | orphankeys(table1 = "encounter", table2 = "prescribing", col = "encounterid") 201 | 202 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "prescribing" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 203 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "prescribing" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 204 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "prescribing" & DQTBL_KEYS$ColNam == "encounterid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 205 | ########## ##### ##### ########## ##### ##### 206 | ########## ##### ##### 207 | ########## ##### ##### ########## ##### ##### 208 | # providerid 209 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "providerid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 210 | orphankeys(table1 = "encounter", table2 = "diagnosis", col = "providerid") 211 | 212 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "providerid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 213 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "providerid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 214 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "diagnosis" & DQTBL_KEYS$ColNam == "providerid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 215 | 216 | ### 217 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "providerid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"] <- 218 | orphankeys(table1 = "encounter", table2 = "procedures", col = "providerid") 219 | 220 | DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "providerid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"] <- 221 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "providerid" & DQTBL_KEYS$Index == "Count_In"),"UNIQFRQ"]) - 222 | as.numeric(DQTBL_KEYS[(DQTBL_KEYS$TabNam == "procedures" & DQTBL_KEYS$ColNam == "providerid" & DQTBL_KEYS$Index == "Count_Out"),"UNIQFRQ"]) 223 | 224 | 225 | 226 | write.csv(DQTBL_KEYS, file = paste("reports/DM_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 227 | 228 | -------------------------------------------------------------------------------- /example/.Rapp.history: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/example/.Rapp.history -------------------------------------------------------------------------------- /example/reports/DM_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv: -------------------------------------------------------------------------------- 1 | ,TabNam,ColNam,UNIQFRQ,Index, 2 | 1,demographic,patid,30871,Reference, 3 | 11,enrollment,patid,3394,Count_In, 4 | 16,encounter,encounterid,9498323,Reference, 5 | 17,encounter,patid,30871,Count_In, 6 | 22,encounter,providerid,16844,Reference, 7 | 24,encounter,enc_type,,Reference, 8 | 38,diagnosis,patid,27137,Count_In, 9 | 39,diagnosis,encounterid,2373339,Count_In, 10 | 40,diagnosis,enc_type,,Count_In, 11 | 42,diagnosis,providerid,9996,Count_In, 12 | 52,procedures,patid,30580,Count_In, 13 | 53,procedures,encounterid,2425131,Count_In, 14 | 54,procedures,enc_type,,Count_In, 15 | 56,procedures,providerid,9645,Count_In, 16 | 64,vital,patid,24412,Count_In, 17 | 65,vital,encounterid,1085747,Count_In, 18 | 85,dispensing,patid,16287,Count_In, 19 | 86,dispensing,prescribingid,,Count_In, 20 | 93,lab_result_cm,patid,23078,Count_In, 21 | 94,lab_result_cm,encounterid,858044,Count_In, 22 | 124,condition,patid,19591,Count_In, 23 | 125,condition,encounterid,94338,Count_In, 24 | 150,prescribing,prescribingid,2976887,Reference, 25 | 151,prescribing,patid,27784,Count_In, 26 | 152,prescribing,encounterid,2645201,Count_In, 27 | 175,death,patid,3154,Count_In, 28 | 111,enrollment,patid,0,Count_Out, 29 | 171,encounter,patid,24,Count_Out, 30 | 381,diagnosis,patid,500,Count_Out, 31 | 391,diagnosis,encounterid,0,Count_Out, 32 | 401,diagnosis,enc_type,0,Count_Out, 33 | 421,diagnosis,providerid,0,Count_Out, 34 | 521,procedures,patid,0,Count_Out, 35 | 531,procedures,encounterid,0,Count_Out, 36 | 541,procedures,enc_type,0,Count_Out, 37 | 561,procedures,providerid,0,Count_Out, 38 | 641,vital,patid,0,Count_Out, 39 | 651,vital,encounterid,0,Count_Out, 40 | 851,dispensing,patid,0,Count_Out, 41 | 861,dispensing,prescribingid,0,Count_Out, 42 | 931,lab_result_cm,patid,0,Count_Out, 43 | 941,lab_result_cm,encounterid,12345,Count_Out, 44 | 1241,condition,patid,0,Count_Out, 45 | 1251,condition,encounterid,0,Count_Out, 46 | 1511,prescribing,patid,0,Count_Out, 47 | 1521,prescribing,encounterid,0,Count_Out, 48 | 1751,death,patid,0,Count_Out, -------------------------------------------------------------------------------- /example/reports/FRQ_comp_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv: -------------------------------------------------------------------------------- 1 | ,X,TabNam,ColNam,DQLVL,abbr,test_date,FRQ,UNIQFRQ,MS1_FRQ,MS2_FRQ,MSs_PERC,organization,CDM 2 | 1,100,demographic,patid,X,demog,13/1/2017,167901,167901,0,0,0,DQe-cDemoonPCORnet,PCORNET3 3 | 2,101,encounter,encounterid,H,encntr,13/1/2017,28570257,28570257,0,0,0,DQe-cDemoonPCORnet,PCORNET3 4 | 4,103,diagnosis,diagnosisid,H,dx,13/1/2017,19386695,19386695,0,0,0,DQe-cDemoonPCORnet,PCORNET3 5 | 5,104,procedures,proceduresid,H,px,13/1/2017,16792289,16792289,0,0,0,DQe-cDemoonPCORnet,PCORNET3 6 | 6,105,vital,vitalid,H,vital,13/1/2017,28570257,125819,0,6174500,0.21,DQe-cDemoonPCORnet,PCORNET3 7 | 7,106,dispensing,dispensingid,H,disp,13/1/2017,600129,600129,0,0,0,DQe-cDemoonPCORnet,PCORNET3 8 | 8,107,condition,conditionid,H,cndtn,13/1/2017,469664,469664,0,0,0,DQe-cDemoonPCORnet,PCORNET3 9 | 9,108,prescribing,prescribingid,H,rx,13/1/2017,9005948,9005948,0,0,0,DQe-cDemoonPCORnet,PCORNET3 10 | 10,109,harvest,networkid,H,harv,13/1/2017,15,13,0,0,0,DQe-cDemoonPCORnet,PCORNET3 11 | 11,110,demographic,patid,X,demog,29/11/2016,139917.5,139917.5,0,0,0,DQe-cDemoonPCORnet,PCORNET3 12 | 12,111,encounter,encounterid,H,encntr,29/11/2016,19046838,19046838,0,0,0,DQe-cDemoonPCORnet,PCORNET3 13 | 14,113,diagnosis,diagnosisid,H,dx,29/11/2016,19386695,19386695,0,5433235,0.28,DQe-cDemoonPCORnet,PCORNET3 14 | 15,114,procedures,proceduresid,H,px,29/11/2016,4198072.25,4198072.25,0,0,0,DQe-cDemoonPCORnet,PCORNET3 15 | 16,115,vital,vitalid,H,vital,29/11/2016,29141662.14,128335.38,0,2342310,0.08,DQe-cDemoonPCORnet,PCORNET3 16 | 17,116,dispensing,dispensingid,H,disp,29/11/2016,300064.5,300064.5,0,0,0,DQe-cDemoonPCORnet,PCORNET3 17 | 18,117,condition,conditionid,H,cndtn,29/11/2016,391386.6667,391386.6667,0,0,0,DQe-cDemoonPCORnet,PCORNET3 18 | 19,118,prescribing,prescribingid,H,rx,29/11/2016,5003304.444,5003304.444,0,0,0,DQe-cDemoonPCORnet,PCORNET3 19 | 20,119,harvest,networkid,H,harv,29/11/2016,15,13,0,0,0,DQe-cDemoonPCORnet,PCORNET3 -------------------------------------------------------------------------------- /example/reports/load_details_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv: -------------------------------------------------------------------------------- 1 | ,CDM_Tables,Repo_Tables,Rows,TotalSizeKB,loaded,available,index 2 | 1,condition,condition,527835,49051,Yes,Yes,1 3 | 2,death,death,12051,8724,Yes,Yes,1 4 | 3,death_condition,death_condition,0,0,Yes,No,1 5 | 4,demographic,demographic,123473,16419,Yes,Yes,1 6 | 5,diagnosis,diagnosis,38182630,4557116,Yes,Yes,1 7 | 6,dispensing,dispensing,11966456,993453,Yes,Yes,1 8 | 7,encounter,encounter,3918518,527192,Yes,Yes,1 9 | 8,enrollment,enrollment,123472,14752,Yes,Yes,1 10 | 9,harvest,harvest,0,0,Yes,No,1 11 | 10,lab_result_cm,lab_result_cm,702658,55395,Yes,Yes,1 12 | 11,pcornet_trial,pcornet_trial,746,8408,Yes,Yes,1 13 | 12,prescribing,prescribing,25876657,1906405,Yes,Yes,1 14 | 13,pro_cm,pro_cm,4364073,395534,Yes,Yes,1 15 | 14,procedures,procedures,22400153,1709059,Yes,Yes,1 16 | 15,vital,vital,0,0,No,No,1 -------------------------------------------------------------------------------- /example/reports/mstabs/DQ_Master_Table_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv: -------------------------------------------------------------------------------- 1 | ,TabNam,ColNam,DQLVL,abbr,test_date,FRQ,UNIQFRQ,MS1_FRQ,MS2_FRQ,MSs_PERC,organization,CDM 2 | 1,demographic,patid,X,demog,1/12/17,264901,264901,1000,133412,0.51,DQe-cDemoonPCORnet,PCORNET3 3 | 2,demographic,birth_date,H,demog,1/12/17,264901,76744,32423,543,0.12,DQe-cDemoonPCORnet,PCORNET3 4 | 3,demographic,birth_time,L,demog,1/12/17,264901,5,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 5 | 4,demographic,sex,H,demog,1/12/17,264901,43,3221,23412,0.10,DQe-cDemoonPCORnet,PCORNET3 6 | 5,demographic,hispanic,H,demog,1/12/17,264901,5,0,251416,0.95,DQe-cDemoonPCORnet,PCORNET3 7 | 6,demographic,race,H,demog,1/12/17,264901,8,432,0,0.00,DQe-cDemoonPCORnet,PCORNET3 8 | 7,demographic,biobank_flag,L,demog,1/12/17,264901,9,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 9 | 8,demographic,raw_sex,L,demog,1/12/17,264901,66,432,0,0.00,DQe-cDemoonPCORnet,PCORNET3 10 | 9,demographic,raw_hispanic,L,demog,1/12/17,264901,5,264901,0,1.00,DQe-cDemoonPCORnet,PCORNET3 11 | 10,demographic,raw_race,L,demog,1/12/17,264901,2,264901,0,1.00,DQe-cDemoonPCORnet,PCORNET3 12 | 11,enrollment,patid,H,enrlmnt,1/12/17,264898,264898,34,0,0.00,DQe-cDemoonPCORnet,PCORNET3 13 | 12,enrollment,enr_start_date,H,enrlmnt,1/12/17,264898,43,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 14 | 13,enrollment,enr_end_date,L,enrlmnt,1/12/17,264898,50635,0,54234,0.20,DQe-cDemoonPCORnet,PCORNET3 15 | 14,enrollment,chart,L,enrlmnt,1/12/17,264898,98,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 16 | 15,enrollment,enr_basis,H,enrlmnt,1/12/17,264898,8,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 17 | 16,encounter,encounterid,H,encntr,1/12/17,85471969,85471969,4545432,0,0.05,DQe-cDemoonPCORnet,PCORNET3 18 | 17,encounter,patid,H,encntr,1/12/17,85471969,264898,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 19 | 18,encounter,admit_date,H,encntr,1/12/17,85471969,5491135,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 20 | 19,encounter,admit_time,L,encntr,1/12/17,85471969,4,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 21 | 20,encounter,discharge_date,L,encntr,1/12/17,85471969,5445085,5359822,5359822,0.13,DQe-cDemoonPCORnet,PCORNET3 22 | 21,encounter,discharge_time,L,encntr,1/12/17,85471969,0,5359822,0,0.06,DQe-cDemoonPCORnet,PCORNET3 23 | 22,encounter,providerid,L,encntr,1/12/17,85471969,138655,0,13960291,0.16,DQe-cDemoonPCORnet,PCORNET3 24 | 23,encounter,facility_location,L,encntr,1/12/17,85471969,0,71952673,0,0.84,DQe-cDemoonPCORnet,PCORNET3 25 | 24,encounter,enc_type,H,encntr,1/12/17,85471969,5,0,15896545,0.19,DQe-cDemoonPCORnet,PCORNET3 26 | 25,encounter,facilityid,L,encntr,1/12/17,85471969,0,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 27 | 26,encounter,discharge_disposition,L,encntr,1/12/17,85471969,67,19331272,66130834,1.00,DQe-cDemoonPCORnet,PCORNET3 28 | 27,encounter,discharge_status,L,encntr,1/12/17,85471969,0,19331272,66130834,1.00,DQe-cDemoonPCORnet,PCORNET3 29 | 28,encounter,drg,L,encntr,1/12/17,85471969,2,85224205,0,1.00,DQe-cDemoonPCORnet,PCORNET3 30 | 29,encounter,drg_type,L,encntr,1/12/17,85471969,3,85224205,0,1.00,DQe-cDemoonPCORnet,PCORNET3 31 | 30,encounter,admitting_source,L,encntr,1/12/17,85471969,0,85481734,5,1.00,DQe-cDemoonPCORnet,PCORNET3 32 | 31,encounter,raw_siteid,L,encntr,1/12/17,85471969,4,85471969,0,1.00,DQe-cDemoonPCORnet,PCORNET3 33 | 32,encounter,raw_enc_type,L,encntr,1/12/17,85471969,5,0,85471969,1.00,DQe-cDemoonPCORnet,PCORNET3 34 | 33,encounter,raw_discharge_disposition,L,encntr,1/12/17,85471969,0,0,85471969,1.00,DQe-cDemoonPCORnet,PCORNET3 35 | 34,encounter,raw_discharge_status,L,encntr,1/12/17,85471969,0,85418827,0,1.00,DQe-cDemoonPCORnet,PCORNET3 36 | 35,encounter,raw_drg_type,L,encntr,1/12/17,85471969,0,85383197,0,1.00,DQe-cDemoonPCORnet,PCORNET3 37 | 36,encounter,raw_admitting_source,L,encntr,1/12/17,85471969,6,85471959,0,1.00,DQe-cDemoonPCORnet,PCORNET3 38 | 37,diagnosis,diagnosisid,H,dx,1/12/17,57921283,57921283,43523,0,0.00,DQe-cDemoonPCORnet,PCORNET3 39 | 38,diagnosis,patid,H,dx,1/12/17,57921283,262492,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 40 | 39,diagnosis,encounterid,H,dx,1/12/17,57921283,21347107,32,4325,0.00,DQe-cDemoonPCORnet,PCORNET3 41 | 40,diagnosis,enc_type,L,dx,1/12/17,57921283,0,0,6029134,0.10,DQe-cDemoonPCORnet,PCORNET3 42 | 41,diagnosis,admit_date,L,dx,1/12/17,57921283,98596,54,0,0.00,DQe-cDemoonPCORnet,PCORNET3 43 | 42,diagnosis,providerid,L,dx,1/12/17,57921283,77026,334,233761,0.00,DQe-cDemoonPCORnet,PCORNET3 44 | 43,diagnosis,dx,H,dx,1/12/17,57921283,23770,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 45 | 44,diagnosis,dx_type,H,dx,1/12/17,57921283,0,234,0,0.00,DQe-cDemoonPCORnet,PCORNET3 46 | 45,diagnosis,dx_source,H,dx,1/12/17,57921283,7,0,13488484,0.23,DQe-cDemoonPCORnet,PCORNET3 47 | 46,diagnosis,pdx,L,dx,1/12/17,57921283,0,0,6283855,0.11,DQe-cDemoonPCORnet,PCORNET3 48 | 47,diagnosis,raw_dx,L,dx,1/12/17,57921283,0,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 49 | 48,diagnosis,raw_dx_type,L,dx,1/12/17,57921283,8,54234,343,0.00,DQe-cDemoonPCORnet,PCORNET3 50 | 49,diagnosis,raw_dx_source,L,dx,1/12/17,57921283,0,542345,0,0.01,DQe-cDemoonPCORnet,PCORNET3 51 | 50,diagnosis,raw_pdx,L,dx,1/12/17,57921283,0,57921283,0,1.00,DQe-cDemoonPCORnet,PCORNET3 52 | 51,procedures,proceduresid,H,px,1/12/17,50138065,50138065,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 53 | 52,procedures,patid,H,px,1/12/17,50138065,262276,342,52345,0.00,DQe-cDemoonPCORnet,PCORNET3 54 | 53,procedures,encounterid,H,px,1/12/17,50138065,21813241,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 55 | 54,procedures,enc_type,L,px,1/12/17,50138065,0,0,3969367,0.08,DQe-cDemoonPCORnet,PCORNET3 56 | 55,procedures,admit_date,L,px,1/12/17,50138065,110965,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 57 | 56,procedures,providerid,L,px,1/12/17,50138065,73867,755,85,0.00,DQe-cDemoonPCORnet,PCORNET3 58 | 57,procedures,px_date,L,px,1/12/17,50138065,0,0,765678,0.02,DQe-cDemoonPCORnet,PCORNET3 59 | 58,procedures,px,H,px,1/12/17,50138065,15460,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 60 | 59,procedures,px_type,H,px,1/12/17,50138065,0,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 61 | 60,procedures,px_source,L,px,1/12/17,50138065,0,0,50138065,1.00,DQe-cDemoonPCORnet,PCORNET3 62 | 61,procedures,raw_px,L,px,1/12/17,50138065,0,50138065,0,1.00,DQe-cDemoonPCORnet,PCORNET3 63 | 62,procedures,raw_px_type,L,px,1/12/17,50138065,0,657856,0,0.01,DQe-cDemoonPCORnet,PCORNET3 64 | 63,vital,vitalid,H,vital,1/12/17,9758782,9758782,0,6456,0.00,DQe-cDemoonPCORnet,PCORNET3 65 | 64,vital,patid,H,vital,1/12/17,9758782,206767,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 66 | 65,vital,encounterid,L,vital,1/12/17,9758782,9758782,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 67 | 66,vital,measure_date,H,vital,1/12/17,9758782,0,765,0,0.00,DQe-cDemoonPCORnet,PCORNET3 68 | 67,vital,measure_time,L,vital,1/12/17,9758782,5,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 69 | 68,vital,vital_source,H,vital,1/12/17,9758782,5,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 70 | 69,vital,ht,H,vital,1/12/17,9758782,234,7416724,0,0.76,DQe-cDemoonPCORnet,PCORNET3 71 | 70,vital,wt,H,vital,1/12/17,9758782,5,6712063,0,0.69,DQe-cDemoonPCORnet,PCORNET3 72 | 71,vital,diastolic,L,vital,1/12/17,9758782,434,6148942,0,0.63,DQe-cDemoonPCORnet,PCORNET3 73 | 72,vital,systolic,L,vital,1/12/17,9758782,67,6148942,0,0.63,DQe-cDemoonPCORnet,PCORNET3 74 | 73,vital,original_bmi,L,vital,1/12/17,9758782,6,9758782,0,1.00,DQe-cDemoonPCORnet,PCORNET3 75 | 74,vital,bp_position,L,vital,1/12/17,9758782,23,0,9758782,1.00,DQe-cDemoonPCORnet,PCORNET3 76 | 75,vital,smoking,L,vital,1/12/17,9758782,32,756,35634,0.00,DQe-cDemoonPCORnet,PCORNET3 77 | 76,vital,tobacco,L,vital,1/12/17,9758782,23,0,65435,0.01,DQe-cDemoonPCORnet,PCORNET3 78 | 77,vital,tobacco_type,L,vital,1/12/17,9758782,32,9758782,0,1.00,DQe-cDemoonPCORnet,PCORNET3 79 | 78,vital,raw_diastolic,L,vital,1/12/17,9758782,3,0,9758782,1.00,DQe-cDemoonPCORnet,PCORNET3 80 | 79,vital,raw_systolic,L,vital,1/12/17,9758782,0,0,9758782,1.00,DQe-cDemoonPCORnet,PCORNET3 81 | 80,vital,raw_bp_position,L,vital,1/12/17,9758782,0,435234,434,0.04,DQe-cDemoonPCORnet,PCORNET3 82 | 81,vital,raw_smoking,L,vital,1/12/17,9758782,4,9758782,0,1.00,DQe-cDemoonPCORnet,PCORNET3 83 | 82,vital,raw_tobacco,L,vital,1/12/17,9758782,0,7646,53451,0.01,DQe-cDemoonPCORnet,PCORNET3 84 | 83,vital,raw_tobacco_type,L,vital,1/12/17,9758782,0,9758782,0,1.00,DQe-cDemoonPCORnet,PCORNET3 85 | 84,dispensing,dispensingid,H,disp,1/12/17,1561585,1561585,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 86 | 85,dispensing,patid,H,disp,1/12/17,1561585,133639,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 87 | 86,dispensing,prescribingid,L,disp,1/12/17,1561585,0,1561585,0,1.00,DQe-cDemoonPCORnet,PCORNET3 88 | 87,dispensing,dispense_date,H,disp,1/12/17,1561585,0,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 89 | 88,dispensing,ndc,H,disp,1/12/17,1561585,67,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 90 | 89,dispensing,dispense_sup,L,disp,1/12/17,1561585,0,1561585,0,1.00,DQe-cDemoonPCORnet,PCORNET3 91 | 90,dispensing,dispense_amt,L,disp,1/12/17,1561585,0,0,1561585,1.00,DQe-cDemoonPCORnet,PCORNET3 92 | 91,dispensing,raw_ndc,L,disp,1/12/17,1561585,0,76453,544,0.05,DQe-cDemoonPCORnet,PCORNET3 93 | 92,lab_result_cm,lab_result_cm_id,H,labs,1/12/17,8761270,8761270,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 94 | 93,lab_result_cm,patid,H,labs,1/12/17,8761270,194761,0,858,0.00,DQe-cDemoonPCORnet,PCORNET3 95 | 94,lab_result_cm,encounterid,L,labs,1/12/17,8761270,7709455,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 96 | 95,lab_result_cm,lab_name,L,labs,1/12/17,8761270,5,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 97 | 96,lab_result_cm,specimen_source,L,labs,1/12/17,8761270,34,0,178825,0.02,DQe-cDemoonPCORnet,PCORNET3 98 | 97,lab_result_cm,lab_loinc,L,labs,1/12/17,8761270,55,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 99 | 98,lab_result_cm,priority,L,labs,1/12/17,8761270,66,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 100 | 99,lab_result_cm,result_loc,L,labs,1/12/17,8761270,7,34,53453,0.01,DQe-cDemoonPCORnet,PCORNET3 101 | 100,lab_result_cm,lab_px,L,labs,1/12/17,8761270,8,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 102 | 101,lab_result_cm,lab_px_type,L,labs,1/12/17,8761270,99,45,0,0.00,DQe-cDemoonPCORnet,PCORNET3 103 | 102,lab_result_cm,lab_order_date,L,labs,1/12/17,8761270,3439540,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 104 | 103,lab_result_cm,specimen_date,L,labs,1/12/17,8761270,3439540,654,0,0.00,DQe-cDemoonPCORnet,PCORNET3 105 | 104,lab_result_cm,specimen_time,L,labs,1/12/17,8761270,2312,34566,0,0.00,DQe-cDemoonPCORnet,PCORNET3 106 | 105,lab_result_cm,result_date,H,labs,1/12/17,8761270,3439540,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 107 | 106,lab_result_cm,result_time,L,labs,1/12/17,8761270,3,54,0,0.00,DQe-cDemoonPCORnet,PCORNET3 108 | 107,lab_result_cm,result_qual,L,labs,1/12/17,8761270,354,5,524526,0.06,DQe-cDemoonPCORnet,PCORNET3 109 | 108,lab_result_cm,result_num,L,labs,1/12/17,8761270,7909,187741,0,0.02,DQe-cDemoonPCORnet,PCORNET3 110 | 109,lab_result_cm,result_modifier,L,labs,1/12/17,8761270,5,0,4876,0.00,DQe-cDemoonPCORnet,PCORNET3 111 | 110,lab_result_cm,result_unit,L,labs,1/12/17,8761270,3,43,8672502,0.99,DQe-cDemoonPCORnet,PCORNET3 112 | 111,lab_result_cm,norm_range_low,L,labs,1/12/17,8761270,467,540349,0,0.06,DQe-cDemoonPCORnet,PCORNET3 113 | 112,lab_result_cm,norm_modifier_low,L,labs,1/12/17,8761270,0,0,540349,0.06,DQe-cDemoonPCORnet,PCORNET3 114 | 113,lab_result_cm,norm_range_high,L,labs,1/12/17,8761270,42,540349,0,0.06,DQe-cDemoonPCORnet,PCORNET3 115 | 114,lab_result_cm,norm_modifier_high,L,labs,1/12/17,8761270,3,0,540349,0.06,DQe-cDemoonPCORnet,PCORNET3 116 | 115,lab_result_cm,abn_ind,L,labs,1/12/17,8761270,4,0,4944394,0.56,DQe-cDemoonPCORnet,PCORNET3 117 | 116,lab_result_cm,raw_lab_name,L,labs,1/12/17,8761270,5,8761270,0,1.00,DQe-cDemoonPCORnet,PCORNET3 118 | 117,lab_result_cm,raw_lab_code,L,labs,1/12/17,8761270,6,8761270,0,1.00,DQe-cDemoonPCORnet,PCORNET3 119 | 118,lab_result_cm,raw_panel,L,labs,1/12/17,8761270,0,8761270,0,1.00,DQe-cDemoonPCORnet,PCORNET3 120 | 119,lab_result_cm,raw_result,L,labs,1/12/17,8761270,8104,102805,0,0.01,DQe-cDemoonPCORnet,PCORNET3 121 | 120,lab_result_cm,raw_unit,L,labs,1/12/17,8761270,7,93050,0,0.01,DQe-cDemoonPCORnet,PCORNET3 122 | 121,lab_result_cm,raw_order_dept,L,labs,1/12/17,8761270,0,4523,4523,0.00,DQe-cDemoonPCORnet,PCORNET3 123 | 122,lab_result_cm,raw_facility_code,L,labs,1/12/17,8761270,7,0,8761270,1.00,DQe-cDemoonPCORnet,PCORNET3 124 | 123,condition,conditionid,H,cndtn,1/12/17,1170190,1170190,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 125 | 124,condition,patid,H,cndtn,1/12/17,1170190,163375,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 126 | 125,condition,encounterid,L,cndtn,1/12/17,1170190,836104,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 127 | 126,condition,report_date,L,cndtn,1/12/17,1170190,543,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 128 | 127,condition,resolve_date,L,cndtn,1/12/17,1170190,45234,1170190,0,1.00,DQe-cDemoonPCORnet,PCORNET3 129 | 128,condition,onset_date,L,cndtn,1/12/17,1170190,0,45,75685,0.06,DQe-cDemoonPCORnet,PCORNET3 130 | 129,condition,condition_status,L,cndtn,1/12/17,1170190,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 131 | 130,condition,condition,H,cndtn,1/12/17,1170190,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 132 | 131,condition,condition_type,H,cndtn,1/12/17,1170190,0,43254,875,0.04,DQe-cDemoonPCORnet,PCORNET3 133 | 132,condition,condition_source,H,cndtn,1/12/17,1170190,5,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 134 | 133,condition,raw_condition_status,L,cndtn,1/12/17,1170190,4,1170190,0,1.00,DQe-cDemoonPCORnet,PCORNET3 135 | 134,condition,raw_condition,L,cndtn,1/12/17,1170190,0,69383,0,0.06,DQe-cDemoonPCORnet,PCORNET3 136 | 135,condition,raw_condition_type,L,cndtn,1/12/17,1170190,3,6,0,0.00,DQe-cDemoonPCORnet,PCORNET3 137 | 136,condition,raw_condition_source,L,cndtn,1/12/17,1170190,0,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 138 | 150,prescribing,prescribingid,H,rx,1/12/17,26779042,26779042,3234,0,0.00,DQe-cDemoonPCORnet,PCORNET3 139 | 151,prescribing,patid,H,rx,1/12/17,26779042,237112,432,0,0.00,DQe-cDemoonPCORnet,PCORNET3 140 | 152,prescribing,encounterid,L,rx,1/12/17,26779042,23793865,0,3243554,0.12,DQe-cDemoonPCORnet,PCORNET3 141 | 153,prescribing,rx_providerid,L,rx,1/12/17,26779042,61300,4,0,0.00,DQe-cDemoonPCORnet,PCORNET3 142 | 154,prescribing,rx_order_date,L,rx,1/12/17,26779042,2279206,0,9,0.00,DQe-cDemoonPCORnet,PCORNET3 143 | 155,prescribing,rx_order_time,L,rx,1/12/17,26779042,0,342342,0,0.01,DQe-cDemoonPCORnet,PCORNET3 144 | 156,prescribing,rx_start_date,L,rx,1/12/17,26779042,2279206,0,9,0.00,DQe-cDemoonPCORnet,PCORNET3 145 | 157,prescribing,rx_end_date,L,rx,1/12/17,26779042,2544715,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 146 | 158,prescribing,rx_quantity,L,rx,1/12/17,26779042,0,26788807,0,1.00,DQe-cDemoonPCORnet,PCORNET3 147 | 159,prescribing,rx_refills,L,rx,1/12/17,26779042,0,739340,0,0.03,DQe-cDemoonPCORnet,PCORNET3 148 | 160,prescribing,rx_days_supply,L,rx,1/12/17,26779042,0,4645635,0,0.17,DQe-cDemoonPCORnet,PCORNET3 149 | 161,prescribing,rx_frequency,L,rx,1/12/17,26779042,0,26788807,0,1.00,DQe-cDemoonPCORnet,PCORNET3 150 | 162,prescribing,rx_basis,L,rx,1/12/17,26779042,0,26787925,0,1.00,DQe-cDemoonPCORnet,PCORNET3 151 | 163,prescribing,rxnorm_cui,L,rx,1/12/17,26779042,0,6456345,0,0.24,DQe-cDemoonPCORnet,PCORNET3 152 | 164,prescribing,raw_rx_med_name,L,rx,1/12/17,26779042,0,0,634563,0.02,DQe-cDemoonPCORnet,PCORNET3 153 | 165,prescribing,raw_rx_frequency,L,rx,1/12/17,26779042,0,0,3241235,0.12,DQe-cDemoonPCORnet,PCORNET3 154 | 166,prescribing,raw_rxnorm_cui,L,rx,1/12/17,26779042,0,0,65635,0.00,DQe-cDemoonPCORnet,PCORNET3 155 | 175,death,patid,H,dth,1/12/17,15448,15448,0,445,0.03,DQe-cDemoonPCORnet,PCORNET3 156 | 176,death,death_date,H,dth,1/12/17,15448,65,653,54,0.05,DQe-cDemoonPCORnet,PCORNET3 157 | 177,death,death_date_impute,L,dth,1/12/17,15448,4,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 158 | 178,death,death_source,H,dth,1/12/17,15448,4,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 159 | 179,death,death_match_confidence,L,dth,1/12/17,15448,4,15448,0,1.00,DQe-cDemoonPCORnet,PCORNET3 160 | 186,harvest,networkid,H,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 161 | 187,harvest,network_name,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 162 | 188,harvest,datamartid,H,harv,1/12/17,5,3,5,0,1.00,DQe-cDemoonPCORnet,PCORNET3 163 | 189,harvest,datamart_name,L,harv,1/12/17,5,3,3,0,0.60,DQe-cDemoonPCORnet,PCORNET3 164 | 190,harvest,datamart_platform,L,harv,1/12/17,5,3,1,0,0.20,DQe-cDemoonPCORnet,PCORNET3 165 | 191,harvest,cdm_version,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 166 | 192,harvest,datamart_claims,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 167 | 193,harvest,datamart_ehr,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 168 | 194,harvest,birth_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 169 | 195,harvest,enr_start_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 170 | 196,harvest,enr_end_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 171 | 197,harvest,admit_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 172 | 198,harvest,discharge_date_mgmt,L,harv,1/12/17,5,3,2,0,0.40,DQe-cDemoonPCORnet,PCORNET3 173 | 199,harvest,px_date_mgmt,L,harv,1/12/17,5,3,3,0,0.60,DQe-cDemoonPCORnet,PCORNET3 174 | 200,harvest,rx_order_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 175 | 201,harvest,rx_start_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 176 | 202,harvest,rx_end_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 177 | 203,harvest,dispense_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 178 | 204,harvest,lab_order_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 179 | 205,harvest,specimen_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 180 | 206,harvest,result_date_mgmt,L,harv,1/12/17,5,3,0,3,0.60,DQe-cDemoonPCORnet,PCORNET3 181 | 207,harvest,measure_date_mgmt,L,harv,1/12/17,5,3,0,4,0.80,DQe-cDemoonPCORnet,PCORNET3 182 | 208,harvest,onset_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 183 | 209,harvest,report_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 184 | 210,harvest,resolve_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 185 | 211,harvest,pro_date_mgmt,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 186 | 212,harvest,refresh_demographic_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 187 | 213,harvest,refresh_enrollment_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 188 | 214,harvest,refresh_encounter_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 189 | 215,harvest,refresh_diagnosis_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 190 | 216,harvest,refresh_procedures_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 191 | 217,harvest,refresh_vital_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 192 | 218,harvest,refresh_dispensing_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 193 | 219,harvest,refresh_lab_result_cm_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 194 | 220,harvest,refresh_condition_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 195 | 221,harvest,refresh_pro_cm_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 196 | 222,harvest,refresh_prescribing_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 197 | 223,harvest,refresh_pcornet_trial_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 198 | 224,harvest,refresh_death_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 199 | 225,harvest,refresh_death_cause_date,L,harv,1/12/17,5,3,0,0,0.00,DQe-cDemoonPCORnet,PCORNET3 -------------------------------------------------------------------------------- /example/reports/tablelist_PCORnet_v3_DQe-cDemoPCORnet-14-01-2017.csv: -------------------------------------------------------------------------------- 1 | "","CDM_Tables" 2 | "1","enrollment" 3 | "2","vital" 4 | "3","procedures" 5 | "4","diagnosis" 6 | "5","lab_result_cm" 7 | "6","death" 8 | "8","dispensing" 9 | "9","prescribing" 10 | "11","condition" 11 | "13","harvest" 12 | "14","encounter" 13 | "15","demographic" 14 | -------------------------------------------------------------------------------- /example/reports/withouts_PCORnet_v3_DQe-cDemoPCORnet_14-01-2017.csv: -------------------------------------------------------------------------------- 1 | ,group,missing.percentage,missing.population,denominator,perc,organization,test_date,CDM 2 | 1,encounter,34,34,100,34.00%,DQe-cDemoonPCORnet,13/1/2017,PCORNET3 3 | 2,diagnosis,3,3,100,3.00%,DQe-cDemoonPCORnet,13/1/2018,PCORNET3 4 | 3,medication,12,12,100,12.00%,DQe-cDemoonPCORnet,13/1/2019,PCORNET3 5 | 4,ethnicity,0,0,100,0.00%,DQe-cDemoonPCORnet,13/1/2020,PCORNET3 6 | 5,race,4.5,4.5,100,4.50%,DQe-cDemoonPCORnet,13/1/2021,PCORNET3 7 | 6,gender,0,0,100,0.00%,DQe-cDemoonPCORnet,13/1/2022,PCORNET3 8 | 7,weight,0,0,100,2.00%,DQe-cDemoonPCORnet,13/1/2023,PCORNET3 9 | 8,height,83,83,100,83.00%,DQe-cDemoonPCORnet,13/1/2024,PCORNET3 10 | 9,blood_pressure,71.3,71.3,100,71.30%,DQe-cDemoonPCORnet,13/1/2025,PCORNET3 11 | 10,blood_pressure,6,6,100,19.00%,DQe-cDemoonPCORnet,13/1/2026,PCORNET3 12 | 11,smoking,99.9,99.9,100,99.90%,DQe-cDemoonPCORnet,13/1/2027,PCORNET3 -------------------------------------------------------------------------------- /freq.R: -------------------------------------------------------------------------------- 1 | source("prep.R") 2 | 3 | # add columns needed for completeness analysis 4 | DQTBL$test_date <- as.factor(test_date) 5 | DQTBL$FRQ <- 0 6 | DQTBL$UNIQFRQ <- 0 7 | DQTBL$MS1_FRQ <- 0 # for NULL/NAs 8 | DQTBL$MS2_FRQ <- 0 # for ""s 9 | DQTBL$MSs_PERC<- 0 # for percent missingness 10 | DQTBL$organization <- org #ORGANIZATION NAME 11 | DQTBL$test_date <- as.character(format(Sys.Date(),"%m-%d-%Y")) 12 | DQTBL$CDM <- CDM # Data Model 13 | 14 | 15 | ##store a table with list of all tables and columns in the repository 16 | 17 | if (SQL == "SQLServer") {repotabs <- dbGetQuery(conn,"SELECT COLUMN_NAME, TABLE_NAME FROM INFORMATION_SCHEMA.COLUMNS") 18 | } else if (SQL == "Oracle") {repotabs <- dbGetQuery(conn,"SELECT COLUMN_NAME, TABLE_NAME FROM user_tab_cols")} 19 | 20 | 21 | 22 | ############################################################################# 23 | ##loop 1: go through all columns in all tables and count number of rows 24 | ##Results will be stored in column "FRQ" of the DQTBL table 25 | ############################################################################# 26 | 27 | for (j in 1: length(unique(DQTBL$TabNam))) 28 | ##DQTBL$TabNam has all table names 29 | { 30 | NAM <- unique(DQTBL$TabNam)[j] 31 | ##extracted name of table j in CDM 32 | NAM_Repo <- as.character(tbls2[(tbls2$CDM_Tables == NAM),"Repo_Tables"]) 33 | 34 | # L <- as.numeric(tbls2[(tbls2$CDM_Tables == NAM),"NCols"]) 35 | id.NAM <- which(DQTBL$TabNam == NAM) 36 | id.repotabs <- which(repotabs$TABLE_NAME == NAM_Repo) 37 | ##extracting the row numbers 38 | NAMTB <- DQTBL[id.NAM,] 39 | REPOTB <- repotabs[id.repotabs,] 40 | ##subsetting the DQTBL and repository table to only the rows from table j 41 | ##saving the name of table j as characters 42 | 43 | for (i in 1:dim(REPOTB)[1]) 44 | ##now going through the columns of table j 45 | { 46 | col <- REPOTB$COLUMN_NAME[i] 47 | FRQ <- as.numeric(dbGetQuery(conn, paste0("SELECT COUNT(*) FROM ",schema,NAM_Repo))) 48 | ##calculated length (number of total rows) of each column from each table 49 | DQTBL$FRQ <- ifelse(DQTBL$ColNam == tolower(col) & DQTBL$TabNam == NAM, FRQ, DQTBL$FRQ ) 50 | ##stored frequency in the culumn FRQ 51 | } 52 | } 53 | 54 | 55 | 56 | ############################################################################# 57 | ##loop 2: goes through all columns in all tables and count number of UNIQUE rows 58 | ##Results will be stored in column "UNIQFRQ" of the DQTBL table 59 | ############################################################################# 60 | 61 | 62 | for (j in 1: length(unique(DQTBL$TabNam))) 63 | ##DQTBL$TabNam has all table names 64 | { 65 | NAM <- unique(DQTBL$TabNam)[j] 66 | ##extracted name of table j in CDM 67 | NAM_Repo <- as.character(tbls2[(tbls2$CDM_Tables == NAM),"Repo_Tables"]) 68 | 69 | # L <- as.numeric(tbls2[(tbls2$CDM_Tables == NAM),"NCols"]) 70 | id.NAM <- which(DQTBL$TabNam == NAM) 71 | id.repotabs <- which(repotabs$TABLE_NAME == NAM_Repo) 72 | ##extracting the row numbers 73 | NAMTB <- DQTBL[id.NAM,] 74 | REPOTB <- repotabs[id.repotabs,] 75 | ##subsetting the DQTBL and repository table to only the rows from table j 76 | ##saving the name of table j as characters 77 | 78 | for (i in 1:dim(REPOTB)[1]) 79 | ##now going through the columns of table j 80 | { 81 | col <- REPOTB$COLUMN_NAME[i] 82 | UNIQ <- as.numeric(dbGetQuery(conn, paste0("SELECT COUNT(DISTINCT ", col,") FROM ",schema,NAM_Repo))) 83 | ##calculated length (number of total rows) of each column from each table 84 | DQTBL$UNIQFRQ <- ifelse(DQTBL$ColNam == tolower(col) & DQTBL$TabNam == NAM, UNIQ, DQTBL$UNIQFRQ ) 85 | ##stored frequency in the culumn FRQ 86 | } 87 | } 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /funcs_pcornet3.R: -------------------------------------------------------------------------------- 1 | # a function to count patients without a given parameter within the source patient table 2 | if (SQL == "SQLServer") { 3 | 4 | withoutdem <- function(table,col,list,ref_date1 = "1900-01-01", ref_date2=Sys.Date()) { 5 | df.name <- deparse(substitute(table)) 6 | list.name <- deparse(substitute(list)) 7 | ##set the denominator 8 | denominator <- dbGetQuery(conn, 9 | paste0("SELECT COUNT(DISTINCT(PATID)) FROM DEMOGRAPHIC WHERE BIRTH_DATE > '",ref_date1,"' AND BIRTH_DATE < '", 10 | ref_date2,"'")) 11 | 12 | #count patients with unacceptable values for the given column and table 13 | notin <- dbGetQuery(conn, 14 | paste0("SELECT COUNT(PATID) FROM (SELECT * FROM ",schema,prefix,"DEMOGRAPHIC WHERE BIRTH_DATE > '",ref_date1,"' AND BIRTH_DATE < '", 15 | ref_date2,"') dd WHERE ", 16 | toupper(col), " NOT IN ('",paste(list,collapse = "','"),"')")) 17 | 18 | whattheyhave <- dbGetQuery(conn, 19 | paste0("SELECT DISTINCT(",toupper(col),") FROM (SELECT * FROM ",schema,prefix,"DEMOGRAPHIC WHERE BIRTH_DATE > '",ref_date1,"' AND BIRTH_DATE < '", 20 | ref_date2,"') dd WHERE ", 21 | toupper(col), " NOT IN ('",paste(list,collapse = "','"),"')")) 22 | 23 | d1 <- round((notin/denominator)*100,4) 24 | 25 | message(d1, "% of patients born between ",ref_date1," and ",ref_date2, " are missing ", list.name," information.",appendLF=T) 26 | if (d1 > 0) message(notin, " of the ",denominator, " patients born between ",ref_date1," and ",ref_date2, " don't have an acceptable ", toupper(list.name), " record in the ",toupper(df.name), " table.",appendLF=T) 27 | if (d1 > 0) message("Unacceptable values in column ", toupper(col), " are ",whattheyhave,".",appendLF=T) 28 | output <- data.frame("group"=list.name, "missing percentage" = as.numeric(d1), "missing population"= as.numeric(notin), "denominator"= as.numeric(denominator)) 29 | return(output) 30 | } 31 | 32 | 33 | ## a function to count patients that are not available in the list of certain condition/drug/lab/... 34 | without <- function(table,col,list, # this list here works opposite to the list in the function above. here we identify what we don't want. 35 | ref_date1 = "1900-01-01", ref_date2=Sys.Date()) { 36 | df.name <- deparse(substitute(table)) 37 | list.name <- deparse(substitute(list)) 38 | ##set the denominator 39 | denominator <- dbGetQuery(conn, 40 | paste0("SELECT COUNT(DISTINCT(PATID)) FROM ",schema,prefix,"DEMOGRAPHIC WHERE BIRTH_DATE > '",ref_date1,"' AND BIRTH_DATE < '",ref_date2,"'") 41 | ) 42 | 43 | # orphanids <- dbGetQuery(conn, 44 | # paste0( 45 | # "SELECT COUNT(DISTINCT(PATID)) FROM ",schema,subset(tbls2$Repo_Tables,tbls2$CDM_Tables == tolower(table))," WHERE PATID NOT IN (SELECT DISTINCT(PATID) FROM ",schema,prefix,"DEMOGRAPHIC WHERE BIRTH_DATE > '",ref_date1,"' AND BIRTH_DATE < '",ref_date2,"')" 46 | # )) 47 | # if (orphanids > 0) message(orphanids, " unique patient ids not available in the source table.",appendLF=T) 48 | 49 | 50 | #patients with at least one value out of what we want 51 | pats_wit_oneout <- dbGetQuery(conn, 52 | paste0("SELECT COUNT(DISTINCT(PATID)) FROM ",schema,subset(tbls2$Repo_Tables,tbls2$CDM_Tables == tolower(table))," WHERE ",toupper(col), " IS NULL OR CAST(",toupper(col), " AS CHAR(54)) IN ('",paste(list,collapse = "','"),"')") 53 | ) 54 | #calculate the percentage 55 | ppwo <- round((pats_wit_oneout/denominator)*100,4) 56 | if (ppwo > 1) message(pats_wit_oneout, " of the patients -- ",ppwo,"% of patients -- are missing at least 1 acceptable ",toupper(col)," value in the ",toupper(table)," table.",appendLF=T) 57 | 58 | 59 | #patients who don't have any records whatsoever 60 | # we calculate valid patients first 61 | whatsoever <- dbGetQuery(conn, 62 | paste0("SELECT COUNT(DISTINCT(PATID)) FROM ",schema,prefix,"DEMOGRAPHIC WHERE BIRTH_DATE > '",ref_date1,"' AND BIRTH_DATE < '",ref_date2,"'"," AND PATID IN (SELECT DISTINCT(PATID) FROM ",schema,subset(tbls2$Repo_Tables,tbls2$CDM_Tables == tolower(table))," WHERE ",toupper(col), " IS NOT NULL AND CAST(",toupper(col), " AS CHAR(54)) NOT IN ('",paste(list,collapse = "','"),"'))") 63 | ) 64 | #the we calculate the percentage of invalid 65 | pwse <- round(((denominator-whatsoever)/denominator)*100,4) 66 | if (pwse > 1) message(whatsoever, " of the patients -- ",pwse,"% of patients -- are missing any acceptable ",toupper(col)," value in the ",toupper(table)," table.",appendLF=T) 67 | 68 | 69 | message(pwse, "% of unique patients don't have any '", list.name,"' record in the ",df.name, " table.",appendLF=T) 70 | output <- data.frame("group"=list.name, "missing percentage" = as.numeric(pwse), "missing population"=as.numeric(whatsoever),"denominator"=as.numeric(denominator)) 71 | return(output) 72 | } 73 | 74 | } else 75 | if (SQL == "Oracle") { 76 | withoutdem <- function(table,col,list,ref_date1 = "1900-01-01", ref_date2=Sys.Date()) { 77 | df.name <- deparse(substitute(table)) 78 | list.name <- deparse(substitute(list)) 79 | ##set the denominator 80 | denominator <- dbGetQuery(conn, 81 | paste0("SELECT COUNT(DISTINCT(PATID)) FROM DEMOGRAPHIC WHERE BIRTH_DATE > TO_DATE('",ref_date1,"', 'yyyy-mm-dd') AND BIRTH_DATE < TO_DATE('",ref_date2,"', 'yyyy-mm-dd')")) 82 | 83 | #count patients with unacceptable values for the given column and table 84 | notin <- dbGetQuery(conn, 85 | paste0("SELECT COUNT(PATID) FROM (SELECT * FROM ",schema,prefix,"DEMOGRAPHIC WHERE BIRTH_DATE > TO_DATE('",ref_date1,"', 'yyyy-mm-dd') AND BIRTH_DATE < TO_DATE('",ref_date2,"', 'yyyy-mm-dd')) WHERE ", 86 | toupper(col), " NOT IN ('",paste(list,collapse = "','"),"')")) 87 | 88 | whattheyhave <- dbGetQuery(conn, 89 | paste0("SELECT DISTINCT(",toupper(col),") FROM (SELECT * FROM ",schema,prefix,"DEMOGRAPHIC WHERE BIRTH_DATE > TO_DATE('",ref_date1,"', 'yyyy-mm-dd') AND BIRTH_DATE < TO_DATE('",ref_date2,"', 'yyyy-mm-dd')) WHERE ", 90 | toupper(col), " NOT IN ('",paste(list,collapse = "','"),"')")) 91 | 92 | d1 <- round((notin/denominator)*100,4) 93 | 94 | message(d1, "% of patients born between ",ref_date1," and ",ref_date2, " are missing ", list.name," information.",appendLF=T) 95 | if (d1 > 0) message(notin, " of the ",denominator, " patients born between ",ref_date1," and ",ref_date2, " don't have an acceptable ", toupper(list.name), " record in the ",toupper(df.name), " table.",appendLF=T) 96 | if (d1 > 0) message("Unacceptable values in column ", toupper(col), " are ",whattheyhave,".",appendLF=T) 97 | output <- data.frame("group"=list.name, "missing percentage" = as.numeric(d1), "missing population"= as.numeric(notin), "denominator"= as.numeric(denominator)) 98 | return(output) 99 | } 100 | 101 | 102 | ## a function to count patients that are not available in the list of certain condition/drug/lab/... 103 | without <- function(table,col,list, # this list here works opposite to the list in the function above. here we identify what we don't want. 104 | ref_date1 = "1900-01-01", ref_date2=Sys.Date()) { 105 | df.name <- deparse(substitute(table)) 106 | list.name <- deparse(substitute(list)) 107 | ##set the denominator 108 | denominator <- dbGetQuery(conn, 109 | paste0("SELECT COUNT(DISTINCT(PATID)) FROM ",schema,prefix,"DEMOGRAPHIC WHERE BIRTH_DATE > TO_DATE('",ref_date1,"', 'yyyy-mm-dd') AND BIRTH_DATE < TO_DATE('",ref_date2,"', 'yyyy-mm-dd')")) 110 | 111 | # orphanids <- dbGetQuery(conn, 112 | # paste0( 113 | # "SELECT COUNT(DISTINCT(PATID)) FROM ",schema,subset(tbls2$Repo_Tables,tbls2$CDM_Tables == tolower(table))," WHERE PATID NOT IN (SELECT DISTINCT(PATID) FROM ",schema,prefix,"DEMOGRAPHIC WHERE BIRTH_DATE > TO_DATE('",ref_date1,"', 'yyyy-mm-dd') OR BIRTH_DATE < TO_DATE('",ref_date2,"', 'yyyy-mm-dd'))")) 114 | # if (orphanids > 0) message(orphanids, " unique patient ids not available in the source table.",appendLF=T) 115 | # 116 | 117 | #patients with at least one value out of what we want 118 | pats_wit_oneout <- dbGetQuery(conn, 119 | paste0("SELECT COUNT(DISTINCT(PATID)) FROM ",schema,subset(tbls2$Repo_Tables,tbls2$CDM_Tables == tolower(table))," WHERE ",toupper(col), " IS NULL OR TO_CHAR(",toupper(col),") IN ('",paste(list,collapse = "','"),"')") 120 | ) 121 | #calculate the percentage 122 | ppwo <- round((pats_wit_oneout/denominator)*100,4) 123 | if (ppwo > 1) message(pats_wit_oneout, " of the patients -- ",ppwo,"% of patients -- are missing at least 1 acceptable ",toupper(col)," value in the ",toupper(table)," table.",appendLF=T) 124 | 125 | 126 | #patients who don't have any records whatsoever 127 | # we calculate valid patients first 128 | whatsoever <- dbGetQuery(conn, 129 | paste0("SELECT COUNT(DISTINCT(PATID)) FROM ",schema,prefix,"DEMOGRAPHIC WHERE BIRTH_DATE > TO_DATE('",ref_date1,"', 'yyyy-mm-dd') AND BIRTH_DATE < TO_DATE('",ref_date2,"', 'yyyy-mm-dd') AND PATID IN (SELECT DISTINCT(PATID) FROM ",schema,subset(tbls2$Repo_Tables,tbls2$CDM_Tables == tolower(table))," WHERE ",toupper(col), " IS NOT NULL AND TO_CHAR(",toupper(col),") NOT IN ('",paste(list,collapse = "','"),"'))") 130 | ) 131 | 132 | #the we calculate the percentage of invalid 133 | pwse <- round(((denominator-whatsoever)/denominator)*100,4) 134 | if (pwse > 1) message(whatsoever, " of the patients -- ",pwse,"% of patients -- are missing any acceptable ",toupper(col)," value in the ",toupper(table)," table.",appendLF=T) 135 | 136 | 137 | message(pwse, "% of unique patients don't have any '", list.name,"' record in the ",df.name, " table.",appendLF=T) 138 | output <- data.frame("group"=list.name, "missing percentage" = as.numeric(pwse), "missing population"=as.numeric(whatsoever),"denominator"=as.numeric(denominator)) 139 | return(output) 140 | } 141 | } 142 | 143 | 144 | 145 | 146 | ## a function to count orphan foriegn keys 147 | orphankeys <- function(table1, #source table 148 | table2, #table to be compared with source table 149 | col # common column 150 | ) { 151 | 152 | orphans <- dbGetQuery(conn, 153 | paste0( 154 | "SELECT COUNT(DISTINCT(",toupper(col),")) FROM ",schema,subset(tbls2$Repo_Tables,tbls2$CDM_Tables == tolower(table2))," WHERE ",toupper(col)," NOT IN (SELECT DISTINCT(",toupper(col),") FROM ",schema,subset(tbls2$Repo_Tables,tbls2$CDM_Tables == tolower(table1)),")" 155 | )) 156 | return(as.numeric(orphans)) 157 | } 158 | 159 | 160 | #a function to print out percentages for the text labels 161 | percent <- function(x, digits = 2, format = "f", ...) { 162 | paste0(formatC(x, format = format, digits = digits, ...), "%") 163 | } 164 | 165 | -------------------------------------------------------------------------------- /illustration/commonvariables.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/commonvariables.gif -------------------------------------------------------------------------------- /illustration/fig2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/fig2-1.png -------------------------------------------------------------------------------- /illustration/fig2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/fig2-2.png -------------------------------------------------------------------------------- /illustration/fig2-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/fig2-3.png -------------------------------------------------------------------------------- /illustration/fig3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/fig3.png -------------------------------------------------------------------------------- /illustration/fig4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/fig4-1.png -------------------------------------------------------------------------------- /illustration/fig4-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/fig4-2.png -------------------------------------------------------------------------------- /illustration/fig5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/fig5.png -------------------------------------------------------------------------------- /illustration/fig6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/fig6.png -------------------------------------------------------------------------------- /illustration/netwizfig2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/netwizfig2.gif -------------------------------------------------------------------------------- /illustration/table1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/table1.png -------------------------------------------------------------------------------- /illustration/workflow3.1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/illustration/workflow3.1.jpg -------------------------------------------------------------------------------- /keys.R: -------------------------------------------------------------------------------- 1 | ### This script is supposed to be confidential and only used by the person it is generated for. 2 | 3 | if (!require("tcltk")) install.packages('tcltk') 4 | 5 | 6 | getLoginDetails <- function(){ 7 | ## Based on code by Barry Rowlingson 8 | ## http://r.789695.n4.nabble.com/tkentry-that-exits-after-RETURN-tt854721.html#none 9 | require(tcltk) 10 | tt <- tktoplevel() 11 | tkwm.title(tt, "Get login details") 12 | Name <- tclVar("Login ID") 13 | Password <- tclVar("Password") 14 | entry.Name <- tkentry(tt,width="20", textvariable=Name) 15 | entry.Password <- tkentry(tt, width="20", show="*", 16 | textvariable=Password) 17 | tkgrid(tklabel(tt, text="Please enter your login details.")) 18 | tkgrid(entry.Name) 19 | tkgrid(entry.Password) 20 | 21 | OnOK <- function() 22 | { 23 | tkdestroy(tt) 24 | } 25 | OK.but <-tkbutton(tt,text=" Login ", command=OnOK) 26 | tkbind(entry.Password, "", OnOK) 27 | tkgrid(OK.but) 28 | tkfocus(tt) 29 | tkwait.window(tt) 30 | 31 | invisible(c(loginID=tclvalue(Name), password=tclvalue(Password))) 32 | } 33 | credentials <- getLoginDetails() 34 | ## Do what needs to be done 35 | 36 | 37 | 38 | ## username for JDBC/ODBC connection? 39 | usrnm = credentials[[1]] 40 | 41 | ## password for JDBC/ODBC connection 42 | pss = credentials[[2]] 43 | 44 | ## Delete credentials 45 | rm(credentials) 46 | 47 | 48 | # who is running the test? 49 | tester <- "hossein.estiri" 50 | 51 | ## reading organization name 52 | org <- org 53 | 54 | ###identifying data model PCORnet V3, OMOP V4-V5 55 | CDM <- CDM 56 | 57 | ###identifying SQL connection PostgreSQL or SQL Server 58 | SQL <- SQL 59 | 60 | -------------------------------------------------------------------------------- /libs.R: -------------------------------------------------------------------------------- 1 | ### Install and load all libraries 2 | if (!require("devtools")) install.packages('devtools') 3 | if (!require("plyr")) install.packages('plyr') 4 | if (!require("dplyr")) install.packages('dplyr') 5 | if (!require("data.table")) install.packages('data.table') 6 | if (!require("dtplyr")) install.packages('dtplyr') 7 | if (!require("DT")) devtools::install_github('rstudio/DT') 8 | if (!require("ggplot2")) devtools::install_github('hadley/ggplot2') 9 | if (!require("gridExtra")) install.packages('gridExtra') 10 | if (!require("RPostgreSQL")) install.packages('RPostgreSQL') 11 | if (!require("knitr")) install.packages('knitr') 12 | if (!require("rmarkdown")) install.packages('rmarkdown') 13 | if (!require("plotly")) install.packages('plotly') 14 | if (!require("DT")) install.packages('DT') 15 | if (!require("treemap")) install.packages('treemap') 16 | if (!require("reshape2")) install.packages('reshape2') 17 | if (!require("RJDBC")) install.packages('RJDBC') 18 | if (!require("visNetwork")) install.packages('visNetwork') 19 | if (!require("rmdformats")) devtools::install_github("juba/rmdformats") 20 | if (!require("visNetwork")) devtools::install_github("datastorm-open/visNetwork") 21 | if (!require("ggbeeswarm")) devtools::install_github("eclarke/ggbeeswarm") 22 | if (!require("tcltk")) install.packages('tcltk') 23 | 24 | -------------------------------------------------------------------------------- /prep.R: -------------------------------------------------------------------------------- 1 | #################################### 2 | ###preparations to run the analysis 3 | #################################### 4 | if (schema != "") {schema = paste0(schema,".")} 5 | 6 | if (SQL == "SQLServer") { 7 | source("Connect_SQLServer.R") 8 | } else 9 | if (SQL == "PostgreSQL") { 10 | source("Connect_PostgreSQL.R") 11 | } else 12 | if (SQL == "Oracle") { 13 | source("Connect_Oracle.R")} 14 | 15 | if (CDM == "PCORNET3") { 16 | DQTBL <- read.csv(file="DQTBL_pcornet_v3.csv",head=TRUE,sep=",") 17 | source("funcs_pcornet3.R") 18 | } else if (CDM == "PCORNET31") { 19 | DQTBL <- read.csv(file="DQTBL_pcornet_v31.csv",head=TRUE,sep=",") 20 | source("funcs_pcornet3.R") 21 | } 22 | 23 | 24 | # create a vector of tables 25 | CDM_TABLES <- c(as.character(unique(DQTBL$TabNam))) 26 | 27 | # create a list of tables in the SQL database 28 | if (SQL == "SQLServer") { 29 | list <- dbGetQuery(conn,"SELECT * FROM INFORMATION_SCHEMA.TABLES")$TABLE_NAME 30 | } else if (SQL == "Oracle") { 31 | list <- dbGetQuery(conn, "select table_name from all_tables") 32 | } 33 | 34 | list <- data.frame(list) 35 | colnames(list)[1] <- "Repo_Tables" 36 | 37 | list$Repo_Tables3 <- tolower(list$Repo_Tables) 38 | list$Repo_Tables2 <- sub(paste0(".*",tolower(prefix)), "", list$Repo_Tables3) 39 | list$Repo_Tables3 <- NULL 40 | 41 | # ##manually modifying spelling errors for Oracle 42 | # ########### remove this when tables names are corrected!############################# 43 | if (SQL == "Oracle") { 44 | list[list$Repo_Tables2 == "labresults_cm", ]$Repo_Tables2 <- "lab_result_cm"########## 45 | list[list$Repo_Tables2 == "death_cause", ]$Repo_Tables2 <- "death_condition" 46 | list[list$Repo_Tables2 == "procedure", ]$Repo_Tables2 <- "procedures" 47 | }##########} 48 | # ##################################################################################### 49 | 50 | # # pick CDM tables from all tables provided 51 | tbls <- subset(list, list$Repo_Tables2 %in% CDM_TABLES)#| list$Repo_Tables2 %in% c("labresults_cm","death_cause")) 52 | # tbls <- unique(tbls$Repo_Tables2) 53 | rm(list) 54 | # create a version of the list to save as a .csv table 55 | tbls2 <- data.frame(tbls) 56 | colnames(tbls2)[2] <- "CDM_Tables" 57 | rownames(tbls2) <- NULL 58 | rm(tbls) 59 | 60 | 61 | 62 | 63 | ## write list of provided CDM tables for the record 64 | # write.csv(tbls2, file = paste("reports/tablelist_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 65 | 66 | 67 | 68 | ##store test date in mm-YYYY format 69 | test_date <- as.character(format(Sys.Date(),"%m-%Y")) 70 | 71 | 72 | 73 | # this piece of code below contains code that I modified from internet: 74 | # http://stackoverflow.com/questions/7892334/get-size-of-all-tables-in-database 75 | # creates a data frame of all data frames in the 76 | # global environment and calculates their 77 | # size and number of rows 78 | if (SQL == "SQLServer") { 79 | tbls2 <- join(tbls2, dbGetQuery(conn, 80 | "SELECT 81 | t.NAME AS Repo_Tables, 82 | p.rows AS Rows, 83 | SUM(a.total_pages) * 8 AS TotalSizeKB 84 | FROM 85 | sys.tables t 86 | INNER JOIN 87 | sys.indexes i ON t.OBJECT_ID = i.object_id 88 | INNER JOIN 89 | sys.partitions p ON i.object_id = p.OBJECT_ID AND i.index_id = p.index_id 90 | INNER JOIN 91 | sys.allocation_units a ON p.partition_id = a.container_id 92 | LEFT OUTER JOIN 93 | sys.schemas s ON t.schema_id = s.schema_id 94 | WHERE 95 | t.NAME NOT LIKE 'dt%' 96 | AND t.is_ms_shipped = 0 97 | AND i.OBJECT_ID > 255 98 | GROUP BY 99 | t.Name, p.Rows 100 | ORDER BY 101 | t.Name"), 102 | by = "Repo_Tables", 103 | type = "left") 104 | } else 105 | if (SQL == "Oracle") { 106 | x1 <- dbGetQuery(conn,"select Repo_Tables, TotalSizeKB, NUM_ROWS from 107 | ((select SEGMENT_NAME Repo_Tables, 108 | bytes/1000 TotalSizeKB from user_segments where segment_name in (select table_name from all_tables)) d 109 | inner join 110 | (select TABLE_NAME, NUM_ROWS from all_tables) t 111 | on d.Repo_Tables =t.TABLE_NAME)") 112 | names(x1)[1:3] = c("Repo_Tables", "TotalSizeKB", "Rows") 113 | tbls2 <- join(tbls2, x1 , 114 | by = "Repo_Tables", 115 | type = "left") 116 | rm(x1) 117 | } 118 | 119 | 120 | rownames(tbls2) <- NULL 121 | 122 | ## creating a source table, tbls3, that merges tbls2 with CDM tables 123 | tbls3 <- data.frame(unique(DQTBL$TabNam)) 124 | colnames(tbls3)[1] <- "CDM_Tables" 125 | tbls3 <- join(tbls3, tbls2, by="CDM_Tables",type = "left") 126 | 127 | tbls3$loaded <- ifelse(is.na(tbls3$Repo_Tables), "No", "Yes") 128 | tbls3$available <- ifelse((!is.na(tbls3$Rows) & tbls3$Rows > 0 & tbls3$loaded == "Yes"), "Yes", "No") 129 | tbls3$index <- 1 130 | tbls3$CDM_Tables <- as.character(tbls3$CDM_Tables) 131 | 132 | tbls3 <- tbls3[order(CDM_TABLES),] 133 | 134 | rownames(tbls3) <- NULL 135 | 136 | write.csv(tbls3, file = paste("reports/load_details_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 137 | 138 | 139 | 140 | # listing tables that should be deducted from DQTBL 141 | no_tab <- c(as.character(tbls3[(tbls3$loaded == "No" | tbls3$available == "No"),"CDM_Tables"])) 142 | 143 | 144 | ## write list of provided CDM tables for the record 145 | write.csv(select(subset(tbls2, tbls2$Rows>0),CDM_Tables), file = paste("reports/tablelist_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 146 | 147 | 148 | # write.csv(no_tab, file = paste("reports/empty_tablelist_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 149 | 150 | ## subsetting the empty/undelivered tables from DQTBL to avoid problems running loops in the analysis phase 151 | DQTBL <- subset(DQTBL, !(DQTBL$TabNam %in% no_tab)) 152 | 153 | 154 | 155 | 156 | # copy DQTBL for date testing 157 | dateTBL <-select(DQTBL[grep("_date", DQTBL[,"ColNam"]), ],TabNam, ColNam) 158 | 159 | 160 | rm(tbls3) 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /reports/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/reports/.DS_Store -------------------------------------------------------------------------------- /reports/mstabs/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/reports/mstabs/.keep -------------------------------------------------------------------------------- /sqljdbc4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hestiri/DQe-c/f98b7ede757a4c932900bd4d45184c4ba6e0e6da/sqljdbc4.jar -------------------------------------------------------------------------------- /without.R: -------------------------------------------------------------------------------- 1 | source("dmrun.R") 2 | 3 | ######################################################################### 4 | ### finding out how many patients don't have specific health variables.## 5 | ######################################################################### 6 | 7 | 8 | 9 | 10 | 11 | if (CDM %in% c("PCORNET3","PCORNET31")) { 12 | 13 | ##gender 14 | 15 | #define the only wanted values 16 | gender <- c("M","F") 17 | 18 | without_gender <- withoutdem(table = demographic, col = "sex", ref_date2 = "2014-01-01" ,list = gender) 19 | 20 | ##race -- make sure we understand what values are in accepted list! 21 | race <- c("05","03","07","02","01","04","06","OT") 22 | 23 | without_race <- withoutdem(table = demographic, col = "race", ref_date2 = "2014-01-01" ,list = race) 24 | 25 | 26 | #ethnicity 27 | ethnicity <- c("Y") 28 | 29 | without_ethnicity <- withoutdem(table = demographic, col = "hispanic", ref_date2 = "2014-01-01" ,list = ethnicity) 30 | 31 | 32 | 33 | ################## 34 | ################## 35 | ######### Using Function "WITHOUT" 36 | #################### 37 | #################### 38 | 39 | # medication 40 | #define the uwanted values in addition to NULLs... 41 | medication <- c("","%","$","#","@","NI") 42 | # 43 | without_medication <- 44 | without(table = "PRESCRIBING", col = "prescribingid", ref_date2 = "2014-01-01" ,list = medication) 45 | 46 | 47 | 48 | #Dx ------------- 49 | #define the uwanted values in addition to NULLs... 50 | diagnosis <- c("","%","$","#","@","NI") 51 | # 52 | without_diagnosis <- 53 | without(table = "DIAGNOSIS", col = "dx", ref_date2 = "2014-01-01" ,list = diagnosis) 54 | 55 | 56 | #Encounter ------------- 57 | #define the uwanted values in addition to NULLs... 58 | encounter <- c("","%","$","#","@","NI") 59 | # 60 | without_encounter <- 61 | without(table = "ENCOUNTER", col = "enc_type", ref_date2 = "2014-01-01" ,list = encounter) 62 | 63 | 64 | #Weight ------------- 65 | #define the uwanted values in addition to NULLs... 66 | weight <- c("","%","$","#","@","NI") 67 | # 68 | without_weight <- 69 | without(table = "VITAL", col = "wt", ref_date2 = "2014-01-01" ,list = weight) 70 | 71 | 72 | 73 | #Height ------------- 74 | #define the uwanted values in addition to NULLs... 75 | height <- c("","%","$","#","@","NI","NI") 76 | # 77 | without_height <- 78 | without(table = "VITAL", col = "ht", ref_date2 = "2014-01-01" ,list = height) 79 | 80 | 81 | #blood_pressure ------------- 82 | #define the uwanted values in addition to NULLs... 83 | blood_pressure <- c("","%","$","#","@","NI") 84 | # 85 | without_BP_sys <- 86 | without(table = "VITAL", col = "systolic", ref_date2 = "2014-01-01" ,list = blood_pressure) 87 | 88 | without_BP_dias <- 89 | without(table = "VITAL", col = "diastolic", ref_date2 = "2014-01-01" ,list = blood_pressure) 90 | 91 | # without_BP <- rbind(without_BP_sys,without_BP_dias) 92 | without_BP <- without_BP_sys 93 | 94 | #smoking ------------- 95 | #define the uwanted values in addition to NULLs... 96 | smoking <- c("","%","$","#","@","NI") 97 | # 98 | without_smoking <- 99 | without(table = "VITAL", col = "smoking", ref_date2 = "2014-01-01" ,list = smoking) 100 | 101 | 102 | 103 | withouts <- rbind(without_encounter,without_diagnosis,without_medication,without_ethnicity,without_race,without_gender,without_weight, 104 | without_height,without_BP,without_smoking) 105 | 106 | 107 | 108 | withouts$perc <- percent(withouts$missing.percentage) 109 | withouts$organization <- org 110 | withouts$test_date <- as.character(format(Sys.Date(),"%m-%d-%Y")) 111 | withouts$CDM <- CDM 112 | 113 | write.csv(withouts, file = paste("reports/withouts_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 114 | 115 | ## make another copy in the comparison directory for comparison 116 | # write.csv(withouts, file = paste("PATH/withouts_",CDM,"_",org,"_",as.character(format(Sys.Date(),"%d-%m-%Y")),".csv", sep="")) 117 | 118 | } 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | --------------------------------------------------------------------------------