├── BMI ├── BMI_Category_GDPPR.sql ├── BMI_codes_reference_data.csv ├── README.md ├── height_codes_reference_data.csv └── weight_codes_reference_data.csv ├── Blood_glucose ├── Blood_glucose.sql ├── Blood_pressure_codes_reference_data.csv └── README.md ├── Blood_pressure ├── Blood Pressure v2.1.sql ├── Blood_pressure_codes_reference_data.csv └── README.md ├── Ethnic_Category ├── Ethnicity_GDPPR+HES_Modal.sql ├── Ethnicity_GDPPR+HES_Recent.sql ├── Ethnicity_GDPPR_only_Recent.sql ├── README.md ├── ethnicity_reference_data_categories.csv └── ethnicity_reference_data_snomed.csv ├── LICENSE ├── README.md └── Smoking ├── Most Recent Smoking Status.sql ├── README.md └── smoking_status_reference_data.csv /BMI/BMI_Category_GDPPR.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | -- MAGIC %md 3 | -- MAGIC ## CATEGORISE BMI JOURNALS 4 | 5 | -- COMMAND ---------- 6 | 7 | -- create table with only BMI journals in 8 | CREATE OR REPLACE TEMPORARY VIEW BMI_JOURNALS_2 AS 9 | SELECT a.* 10 | , b.ConceptId_Description 11 | , b.BMI_CAT 12 | FROM gdppr_database.gdppr_table AS a 13 | LEFT JOIN ref_data.BMI_CODES AS b 14 | ON a.CODE = b.ConceptID 15 | WHERE a.CODE IN (SELECT DISTINCT ConceptID FROM ref_data.BMI_CODES) 16 | 17 | -- COMMAND ---------- 18 | 19 | -- add categories on to BMI journals 20 | -- add age at time of journal date (allows removal of under 18's) 21 | CREATE OR REPLACE TEMPORARY VIEW BMI_JOURNALS_CAT_2 AS 22 | SELECT * 23 | , CASE WHEN BMI_CAT = 'ALL' AND VALUE1_CONDITION IS NULL THEN 'INDETERMINABLE' 24 | WHEN BMI_CAT = 'ALL' AND VALUE1_CONDITION >= 10 AND VALUE1_CONDITION < 18.5 THEN 'UNDERWEIGHT' 25 | WHEN BMI_CAT = 'ALL' AND VALUE1_CONDITION >= 18.5 AND VALUE1_CONDITION < 25 THEN 'IDEAL' 26 | WHEN BMI_CAT = 'ALL' AND VALUE1_CONDITION >= 25 AND VALUE1_CONDITION < 30 THEN 'OVERWEIGHT' 27 | WHEN BMI_CAT = 'ALL' AND VALUE1_CONDITION >= 30 AND VALUE1_CONDITION <= 55 THEN 'OBESE' 28 | WHEN BMI_CAT = 'ALL' AND VALUE1_CONDITION > 55 AND VALUE1_CONDITION <= 185 THEN 'INDETERMINABLE' -- needs further investigation 29 | WHEN BMI_CAT = 'ALL' AND VALUE1_CONDITION < 10 THEN 'INDETERMINABLE' 30 | WHEN BMI_CAT = 'ALL' AND VALUE1_CONDITION > 185 THEN 'INDETERMINABLE' 31 | WHEN BMI_CAT = 'UNDERWEIGHT' AND VALUE1_CONDITION IS NULL THEN 'UNDERWEIGHT' 32 | WHEN BMI_CAT = 'UNDERWEIGHT' AND VALUE1_CONDITION = 0 THEN 'UNDERWEIGHT' 33 | WHEN BMI_CAT = 'IDEAL' AND VALUE1_CONDITION IS NULL THEN 'IDEAL' 34 | WHEN BMI_CAT = 'IDEAL' AND VALUE1_CONDITION = 0 THEN 'IDEAL' 35 | WHEN BMI_CAT = 'IDEAL' AND VALUE1_CONDITION >= 18.5 AND VALUE1_CONDITION < 25 THEN 'IDEAL' 36 | WHEN BMI_CAT = 'UNDER_IDEAL_BORDER' AND VALUE1_CONDITION IS NULL THEN 'UNDER 20 NULLS' 37 | WHEN BMI_CAT = 'UNDER_IDEAL_BORDER' AND VALUE1_CONDITION = 0 THEN 'UNDER 20 NULLS' 38 | WHEN BMI_CAT = 'UNDER_IDEAL_BORDER' AND VALUE1_CONDITION >= 10 AND VALUE1_CONDITION < 18.5 THEN 'UNDERWEIGHT' 39 | WHEN BMI_CAT = 'UNDER_IDEAL_BORDER' AND VALUE1_CONDITION >= 18.5 AND VALUE1_CONDITION <= 20 THEN 'IDEAL' 40 | WHEN BMI_CAT = 'OVERWEIGHT' AND VALUE1_CONDITION IS NULL THEN 'OVERWEIGHT' 41 | WHEN BMI_CAT = 'OVERWEIGHT' AND VALUE1_CONDITION = 0 THEN 'OVERWEIGHT' 42 | WHEN BMI_CAT = 'OVERWEIGHT' AND VALUE1_CONDITION >= 25 AND VALUE1_CONDITION < 30 THEN 'OVERWEIGHT' 43 | WHEN BMI_CAT = 'OBESE' AND VALUE1_CONDITION IS NULL THEN 'OBESE' 44 | WHEN BMI_CAT = 'OBESE' AND VALUE1_CONDITION = 0 THEN 'OBESE' 45 | WHEN BMI_CAT = 'OBESE' AND VALUE1_CONDITION >= 30 AND VALUE1_CONDITION < 55 THEN 'OBESE' 46 | WHEN BMI_CAT = 'UNSUITABLE/DECLINED' THEN 'UNSUITABLE/DECLINED' 47 | WHEN BMI_CAT = 'UNKNOWN' THEN 'INDETERMINABLE' 48 | WHEN BMI_CAT = 'CHILD' THEN 'CHILD' 49 | ELSE 'INDETERMINABLE' 50 | END AS BMI_CATEGORISED_INITIAL 51 | , (DATEDIFF(DATE, DATE_OF_BIRTH))/365.25 AS AGE_AT_TIME_OF_JOURNAL 52 | FROM BMI_JOURNALS_2 53 | 54 | -- COMMAND ---------- 55 | 56 | -- create table with no unknown BMIs in and only patients over 18 at the time of their BMI recording 57 | CREATE OR REPLACE TEMPORARY VIEW BMI_CATEGORY_NO_UNKNOWNS_2 AS 58 | SELECT * 59 | FROM BMI_JOURNALS_CAT_2 60 | WHERE BMI_CATEGORISED_INITIAL NOT IN ("INDETERMINABLE", "UNSUITABLE/DECLINED", "CHILD") 61 | AND AGE_AT_TIME_OF_JOURNAL >= 18 62 | 63 | -- COMMAND ---------- 64 | 65 | -- MAGIC %md 66 | -- MAGIC ## PULL TOGETHER HEIGHT AND WEIGHT JOURNALS 67 | 68 | -- COMMAND ---------- 69 | 70 | -- create table with only height journals in 71 | CREATE OR REPLACE TEMPORARY VIEW HEIGHT_JOURNALS_2 AS 72 | SELECT a.* 73 | , b.ConceptId_Description 74 | FROM gdppr_database.gdppr_table AS a 75 | LEFT JOIN ref_data.HEIGHT_CODES AS b 76 | ON a.CODE = b.ConceptID 77 | WHERE a.CODE IN (SELECT DISTINCT ConceptID FROM ref_data.HEIGHT_CODES) 78 | 79 | -- COMMAND ---------- 80 | 81 | -- create table with only weight journals in 82 | CREATE OR REPLACE TEMPORARY VIEW WEIGHT_JOURNALS_2 AS 83 | SELECT a.* 84 | , b.ConceptId_Description 85 | FROM gdppr_database.gdppr_table AS a 86 | LEFT JOIN ref_data.WEIGHT_CODES AS b 87 | ON a.CODE = b.ConceptID 88 | WHERE a.CODE IN (SELECT DISTINCT ConceptID FROM ref_data.WEIGHT_CODES) 89 | 90 | -- COMMAND ---------- 91 | 92 | -- join height and weight journals together for journals for the same patient which were recorded on the same date 93 | -- join on several other patient factors 94 | -- calculate age at time of journal to allow removal of under 18s 95 | CREATE OR REPLACE TEMPORARY VIEW HW_JOINT_2 AS 96 | SELECT a.* 97 | , (DATEDIFF(a.DATE, a.DATE_OF_BIRTH))/365.25 AS AGE_AT_TIME_OF_JOURNAL 98 | , CASE WHEN a.VALUE1_CONDITION >=0 AND a.VALUE1_CONDITION <=3 THEN a.VALUE1_CONDITION * 100 99 | ELSE a.VALUE1_CONDITION END AS H_VALUE 100 | , b.RECORD_DATE AS W_RECORD_DATE 101 | , b.CODE AS W_CODE 102 | , b.VALUE1_CONDITION AS W_VALUE 103 | , b.LINKS AS W_LINKS 104 | FROM HEIGHT_JOURNALS_2 AS a 105 | INNER JOIN WEIGHT_JOURNALS_2 AS b 106 | ON a.NHS_NUMBER = b.NHS_NUMBER 107 | AND a.DATE = b.DATE 108 | AND a.PRACTICE = b.PRACTICE 109 | AND a.REPORTING_PERIOD_END_DATE = b.REPORTING_PERIOD_END_DATE 110 | AND a.JOURNAL_REPORTING_PERIOD_END_DATE = b.JOURNAL_REPORTING_PERIOD_END_DATE 111 | WHERE a.VALUE1_CONDITION IS NOT NULL 112 | AND b.VALUE1_CONDITION IS NOT NULL 113 | 114 | -- COMMAND ---------- 115 | 116 | -- calculate BMI 117 | -- remove journals for under 18s at the time of their BMI recording 118 | CREATE OR REPLACE TEMPORARY VIEW HW_ADULT_2 AS 119 | SELECT * 120 | , W_VALUE / POWER((H_VALUE/100),2) AS BMI 121 | FROM HW_JOINT_2 122 | WHERE AGE_AT_TIME_OF_JOURNAL >= 18 123 | 124 | -- COMMAND ---------- 125 | 126 | -- categorise BMIs 127 | CREATE OR REPLACE TEMPORARY VIEW HEIGHT_WEIGHT_CATEGORISED_2 AS 128 | SELECT * 129 | , CASE WHEN BMI IS NULL THEN 'INDETERMINABLE' 130 | WHEN BMI >= 10 AND BMI < 18.5 THEN 'UNDERWEIGHT' 131 | WHEN BMI >= 18.5 AND BMI < 25 THEN 'IDEAL' 132 | WHEN BMI >= 25 AND BMI < 30 THEN 'OVERWEIGHT' 133 | WHEN BMI >= 30 AND BMI <= 55 THEN 'OBESE' 134 | WHEN BMI > 55 AND BMI <= 185 THEN 'INDETERMINABLE' -- needs further investigation 135 | WHEN BMI < 10 THEN 'INDETERMINABLE' 136 | ELSE 'INDETERMINABLE' END AS BMI_CATEGORISED_INITIAL 137 | FROM HW_ADULT_2 138 | 139 | -- COMMAND ---------- 140 | 141 | -- create table with no unknown BMIs in 142 | CREATE OR REPLACE TEMPORARY VIEW HEIGHT_WEIGHT_NO_UNKNOWN_2 AS 143 | SELECT * 144 | FROM HEIGHT_WEIGHT_CATEGORISED_2 145 | WHERE BMI_CATEGORISED_INITIAL NOT IN ("INDETERMINABLE", "UNSUITABLE/DECLINED", "CHILD") 146 | 147 | -- COMMAND ---------- 148 | 149 | -- MAGIC %md 150 | -- MAGIC # ADD IN CHILD JOURNALS 151 | 152 | -- COMMAND ---------- 153 | 154 | -- create table of BMI journals where age at time of measurement was <18 155 | CREATE OR REPLACE TEMPORARY VIEW CHILD_BMI_JOURNALS AS 156 | SELECT * 157 | FROM BMI_JOURNALS_2 158 | WHERE DATEDIFF(DATE, DATE_OF_BIRTH)/365.25 < 18 159 | 160 | -- COMMAND ---------- 161 | 162 | -- add categories on to BMI journals - anything already categorised leave as is (trust clinicians) 163 | -- add age at time of journal date to allow calculation of centile for those that need it 164 | CREATE OR REPLACE TEMPORARY VIEW CHILD_BMI_JOURNALS_CAT AS 165 | SELECT * 166 | , CASE WHEN BMI_CAT = 'ALL' AND VALUE1_CONDITION IS NULL THEN 'INDETERMINABLE' 167 | WHEN BMI_CAT = 'ALL' AND VALUE1_CONDITION IS NOT NULL THEN 'CENTILE' 168 | WHEN BMI_CAT = 'UNDERWEIGHT' AND VALUE1_CONDITION IS NULL THEN 'UNDERWEIGHT' 169 | WHEN BMI_CAT = 'UNDERWEIGHT' AND VALUE1_CONDITION = 0 THEN 'UNDERWEIGHT' 170 | WHEN BMI_CAT = 'UNDERWEIGHT' AND VALUE1_CONDITION IS NOT NULL AND VALUE1_CONDITION != 0 THEN 'UNDERWEIGHT' 171 | WHEN BMI_CAT = 'IDEAL' AND VALUE1_CONDITION IS NULL THEN 'IDEAL' 172 | WHEN BMI_CAT = 'IDEAL' AND VALUE1_CONDITION = 0 THEN 'IDEAL' 173 | WHEN BMI_CAT = 'IDEAL' AND VALUE1_CONDITION IS NOT NULL AND VALUE1_CONDITION != 0 THEN 'CENTILE' 174 | WHEN BMI_CAT = 'UNDER_IDEAL_BORDER' AND VALUE1_CONDITION IS NULL THEN 'UNDER 20 NULLS' 175 | WHEN BMI_CAT = 'UNDER_IDEAL_BORDER' AND VALUE1_CONDITION = 0 THEN 'UNDER 20 NULLS' 176 | WHEN BMI_CAT = 'UNDER_IDEAL_BORDER' AND VALUE1_CONDITION IS NOT NULL AND VALUE1_CONDITION != 0 THEN 'CENTILE' 177 | WHEN BMI_CAT = 'OVERWEIGHT' AND VALUE1_CONDITION IS NULL THEN 'OVERWEIGHT' 178 | WHEN BMI_CAT = 'OVERWEIGHT' AND VALUE1_CONDITION = 0 THEN 'OVERWEIGHT' 179 | WHEN BMI_CAT = 'OVERWEIGHT' AND VALUE1_CONDITION IS NOT NULL AND VALUE1_CONDITION != 0 THEN 'CENTILE' 180 | WHEN BMI_CAT = 'OBESE' AND VALUE1_CONDITION IS NULL THEN 'OBESE' 181 | WHEN BMI_CAT = 'OBESE' AND VALUE1_CONDITION = 0 THEN 'OBESE' 182 | WHEN BMI_CAT = 'OBESE' AND VALUE1_CONDITION IS NOT NULL AND VALUE1_CONDITION != 0 THEN 'CENTILE' 183 | WHEN BMI_CAT = 'UNSUITABLE/DECLINED' THEN 'UNSUITABLE/DECLINED' 184 | WHEN BMI_CAT = 'UNKNOWN' THEN 'INDETERMINABLE' 185 | WHEN BMI_CAT = 'CHILD' THEN 'CHILD' 186 | ELSE 'INDETERMINABLE' 187 | END AS BMI_CATEGORISED_INITIAL 188 | FROM CHILD_BMI_JOURNALS 189 | 190 | -- COMMAND ---------- 191 | 192 | -- categorise child centile codes according to centile boundaries 193 | CREATE OR REPLACE TEMPORARY VIEW CHILD_CENTILE_CODES AS 194 | SELECT * 195 | , CASE WHEN ConceptID IN ('920141000000102', '920161000000101', '920181000000105') THEN 'UNDERWEIGHT' 196 | WHEN ConceptID IN ('920971000000105', '920991000000109') THEN 'OVERWEIGHT' 197 | WHEN ConceptID IN ('921011000000105', '921031000000102', '921051000000109') THEN 'OBESE' 198 | ELSE 'IDEAL' 199 | END AS BMI_CATEGORISED_INITIAL 200 | FROM ref_data.BMI_CODES 201 | WHERE BMI_CAT = 'CHILD' 202 | 203 | -- COMMAND ---------- 204 | 205 | -- attach child BMI category onto child BMI centile journals 206 | CREATE OR REPLACE TEMPORARY VIEW CHILD_CENTILES AS 207 | SELECT a.* 208 | , b. BMI_CATEGORISED_INITIAL 209 | , DATEDIFF(a.DATE, a.DATE_OF_BIRTH)/365.25 AS AGE_AT_TIME_OF_JOURNAL 210 | FROM CHILD_BMI_JOURNALS AS a 211 | LEFT JOIN CHILD_CENTILE_CODES AS b 212 | ON a.CODE = b.ConceptID 213 | WHERE a.BMI_CAT = 'CHILD' 214 | 215 | -- COMMAND ---------- 216 | 217 | -- get BMI journals which need centiles calucalting 218 | CREATE OR REPLACE TEMPORARY VIEW BMI_CENTILE_JOURNALS AS 219 | SELECT * 220 | , (DATEDIFF(DATE, DATE_OF_BIRTH)/365.25) * 12 AS AGE_MONTHS 221 | FROM CHILD_BMI_JOURNALS_CAT 222 | WHERE BMI_CATEGORISED_INITIAL = 'CENTILE' 223 | 224 | -- COMMAND ---------- 225 | 226 | -- calc upper and lower age (months) 227 | CREATE OR REPLACE TEMPORARY VIEW BMI_CENTILE_CALC AS 228 | SELECT * 229 | , FLOOR(AGE_MONTHS) AS AGE_LOWER 230 | , CEIL(AGE_MONTHS) AS AGE_UPPER 231 | , AGE_MONTHS - FLOOR(AGE_MONTHS) AS AGE_DIFF 232 | FROM BMI_CENTILE_JOURNALS 233 | 234 | -- COMMAND ---------- 235 | 236 | -- pull in LMS values based on sex and age 237 | CREATE OR REPLACE TEMPORARY VIEW BMI_CENTILE_CALC_2 AS 238 | SELECT a.* 239 | , CASE WHEN a.SEX = 1 THEN b.BOYS_L 240 | WHEN a.SEX = 2 THEN b.GIRLS_L 241 | ELSE null END AS L_LOW 242 | , CASE WHEN a.SEX = 1 THEN b.BOYS_M 243 | WHEN a.SEX = 2 THEN b.GIRLS_M 244 | ELSE null END AS M_LOW 245 | , CASE WHEN a.SEX = 1 THEN b.BOYS_S 246 | WHEN a.SEX = 2 THEN b.GIRLS_S 247 | ELSE null END AS S_LOW 248 | , CASE WHEN a.SEX = 1 THEN c.BOYS_L 249 | WHEN a.SEX = 2 THEN c.GIRLS_L 250 | ELSE null END AS L_HIGH 251 | , CASE WHEN a.SEX = 1 THEN c.BOYS_M 252 | WHEN a.SEX = 2 THEN c.GIRLS_M 253 | ELSE null END AS M_HIGH 254 | , CASE WHEN a.SEX = 1 THEN c.BOYS_S 255 | WHEN a.SEX = 2 THEN c.GIRLS_S 256 | ELSE null END AS S_HIGH 257 | FROM BMI_CENTILE_CALC AS a 258 | LEFT JOIN ref_data.child_bmi_centiles_ordered AS b 259 | ON a.AGE_LOWER = b.Months_Calculated 260 | LEFT JOIN ref_data.child_bmi_centiles_ordered AS c 261 | ON a.AGE_UPPER = c.Months_Calculated 262 | 263 | -- COMMAND ---------- 264 | 265 | -- calculate LMS values to use 266 | CREATE OR REPLACE TEMPORARY VIEW BMI_CENTILE_CALC_3 AS 267 | SELECT * 268 | , L_LOW + (L_HIGH - L_LOW) * AGE_DIFF AS L_TO_USE 269 | , M_LOW + (M_HIGH - M_LOW) * AGE_DIFF AS M_TO_USE 270 | , S_LOW + (S_HIGH - S_LOW) * AGE_DIFF AS S_TO_USE 271 | FROM BMI_CENTILE_CALC_2 272 | 273 | -- COMMAND ---------- 274 | 275 | -- calculate z score 276 | CREATE OR REPLACE TEMPORARY VIEW BMI_CENTILE_CALC_4 AS 277 | SELECT * 278 | , VALUE1_CONDITION AS BMI 279 | , (POWER((VALUE1_CONDITION/M_TO_USE), L_TO_USE) - 1) / (L_TO_USE * S_TO_USE) AS BMI_Z_SCORE 280 | FROM BMI_CENTILE_CALC_3 281 | 282 | -- COMMAND ---------- 283 | 284 | -- categorise bmi z score 285 | CREATE OR REPLACE TEMPORARY VIEW BMI_CENTILE_CALC_5 AS 286 | SELECT * 287 | , DATEDIFF(DATE, DATE_OF_BIRTH)/365.25 AS AGE_AT_TIME_OF_JOURNAL 288 | , CASE WHEN BMI_Z_SCORE <=-2 THEN "UNDERWEIGHT" 289 | WHEN BMI_Z_SCORE >-2 AND BMI_Z_SCORE <(4/3) THEN "IDEAL" 290 | WHEN BMI_Z_SCORE >= (4/3) AND BMI_Z_SCORE <2 THEN "OVERWEIGHT" 291 | WHEN BMI_Z_SCORE >=2 THEN "OBESE" 292 | ELSE "ERROR" 293 | END AS CENTILE_CLINICAL_BMI_CAT 294 | FROM BMI_CENTILE_CALC_4 295 | 296 | -- COMMAND ---------- 297 | 298 | -- MAGIC %md 299 | -- MAGIC ## CHILD HEIGHT AND WEIGHT 300 | 301 | -- COMMAND ---------- 302 | 303 | -- join height and weight journals together for journals which were recorded on the same date 304 | -- join on several other patient factors 305 | -- calculate age at time of journal to allow removal of over 18s 306 | CREATE OR REPLACE TEMPORARY VIEW HW_JOINT_2_CHILD AS 307 | SELECT a.* 308 | , DATEDIFF(a.DATE, a.DATE_OF_BIRTH)/365.25 AS AGE_AT_TIME_OF_JOURNAL 309 | , CASE WHEN a.VALUE1_CONDITION >=0 AND a.VALUE1_CONDITION <=3 THEN a.VALUE1_CONDITION * 100 310 | ELSE a.VALUE1_CONDITION END AS H_VALUE 311 | , b.RECORD_DATE AS W_RECORD_DATE 312 | , b.CODE AS W_CODE 313 | , b.VALUE1_CONDITION AS W_VALUE 314 | FROM HEIGHT_JOURNALS_2 AS a 315 | INNER JOIN WEIGHT_JOURNALS_2 AS b 316 | ON a.NHS_NUMBER = b.NHS_NUMBER 317 | AND a.DATE = b.DATE 318 | AND a.PRACTICE = b.PRACTICE 319 | AND a.REPORTING_PERIOD_END_DATE = b.REPORTING_PERIOD_END_DATE 320 | AND a.JOURNAL_REPORTING_PERIOD_END_DATE = b.JOURNAL_REPORTING_PERIOD_END_DATE 321 | WHERE a.VALUE1_CONDITION IS NOT NULL 322 | AND a.VALUE1_CONDITION >0 323 | AND a.VALUE1_CONDITION <= 200 324 | AND b.VALUE1_CONDITION IS NOT NULL 325 | AND (DATEDIFF(a.DATE, a.DATE_OF_BIRTH))/365.25 <18 326 | 327 | -- COMMAND ---------- 328 | 329 | -- calculate BMI 330 | -- remove journals for over 18s at the time of their BMI recording 331 | CREATE OR REPLACE TEMPORARY VIEW HW_CHILD AS 332 | SELECT * 333 | , W_VALUE / POWER((H_VALUE/100),2) AS BMI 334 | FROM HW_JOINT_2_CHILD 335 | 336 | -- COMMAND ---------- 337 | 338 | -- MAGIC %md 339 | -- MAGIC ### CENTILES 340 | 341 | -- COMMAND ---------- 342 | 343 | -- select certain fields, add in age (months) 344 | CREATE OR REPLACE TEMPORARY VIEW CHILD_CENTILE_CALC AS 345 | SELECT DATE_OF_BIRTH 346 | , SEX 347 | , NHS_NUMBER 348 | , ETHNIC 349 | , PRACTICE 350 | , GP_SYSTEM_SUPPLIER 351 | , REPORTING_PERIOD_END_DATE 352 | , JOURNAL_REPORTING_PERIOD_END_DATE 353 | , DATE 354 | , RECORD_DATE 355 | , AGE_AT_TIME_OF_JOURNAL 356 | , H_VALUE 357 | , W_VALUE 358 | , BMI 359 | , (DATEDIFF(DATE, DATE_OF_BIRTH)/365.25) * 12 AS AGE_MONTHS 360 | FROM HW_CHILD 361 | 362 | -- COMMAND ---------- 363 | 364 | -- calculate age lower and upper (months) 365 | CREATE OR REPLACE TEMPORARY VIEW CHILD_CENTILE_CALC_2 AS 366 | SELECT * 367 | , FLOOR(AGE_MONTHS) AS AGE_LOWER 368 | , CEIL(AGE_MONTHS) AS AGE_UPPER 369 | , AGE_MONTHS - FLOOR(AGE_MONTHS) AS AGE_DIFF 370 | FROM CHILD_CENTILE_CALC 371 | 372 | -- COMMAND ---------- 373 | 374 | -- pull in LMS based on age and sex 375 | CREATE OR REPLACE TEMPORARY VIEW CHILD_CENTILE_CALC_3 AS 376 | SELECT a.* 377 | , CASE WHEN a.SEX = 1 THEN b.BOYS_L 378 | WHEN a.SEX = 2 THEN b.GIRLS_L 379 | ELSE null END AS L_LOW 380 | , CASE WHEN a.SEX = 1 THEN b.BOYS_M 381 | WHEN a.SEX = 2 THEN b.GIRLS_M 382 | ELSE null END AS M_LOW 383 | , CASE WHEN a.SEX = 1 THEN b.BOYS_S 384 | WHEN a.SEX = 2 THEN b.GIRLS_S 385 | ELSE null END AS S_LOW 386 | , CASE WHEN a.SEX = 1 THEN c.BOYS_L 387 | WHEN a.SEX = 2 THEN c.GIRLS_L 388 | ELSE null END AS L_HIGH 389 | , CASE WHEN a.SEX = 1 THEN c.BOYS_M 390 | WHEN a.SEX = 2 THEN c.GIRLS_M 391 | ELSE null END AS M_HIGH 392 | , CASE WHEN a.SEX = 1 THEN c.BOYS_S 393 | WHEN a.SEX = 2 THEN c.GIRLS_S 394 | ELSE null END AS S_HIGH 395 | FROM CHILD_CENTILE_CALC_2 AS a 396 | LEFT JOIN ref_data.child_bmi_centiles_ordered AS b 397 | ON a.AGE_LOWER = b.Months_Calculated 398 | LEFT JOIN ref_data.child_bmi_centiles_ordered AS c 399 | ON a.AGE_UPPER = c.Months_Calculated 400 | 401 | -- COMMAND ---------- 402 | 403 | -- calculate LMS to use 404 | CREATE OR REPLACE TEMPORARY VIEW CHILD_CENTILE_CALC_4 AS 405 | SELECT * 406 | , L_LOW + (L_HIGH - L_LOW) * AGE_DIFF AS L_TO_USE 407 | , M_LOW + (M_HIGH - M_LOW) * AGE_DIFF AS M_TO_USE 408 | , S_LOW + (S_HIGH - S_LOW) * AGE_DIFF AS S_TO_USE 409 | FROM CHILD_CENTILE_CALC_3 410 | 411 | -- COMMAND ---------- 412 | 413 | -- calcualte bmi z score 414 | CREATE OR REPLACE TEMPORARY VIEW CHILD_CENTILE_CALC_5 AS 415 | SELECT * 416 | , (POWER((BMI/M_TO_USE), L_TO_USE) - 1) / (L_TO_USE * S_TO_USE) AS BMI_Z_SCORE 417 | FROM CHILD_CENTILE_CALC_4 418 | 419 | -- COMMAND ---------- 420 | 421 | -- categorise bmi z score 422 | CREATE OR REPLACE TEMPORARY VIEW CHILD_CENTILE_CALC_6 AS 423 | SELECT * 424 | , CASE WHEN BMI_Z_SCORE <=-2 THEN "UNDERWEIGHT" 425 | WHEN BMI_Z_SCORE >-2 AND BMI_Z_SCORE <(4/3) THEN "IDEAL" 426 | WHEN BMI_Z_SCORE >= (4/3) AND BMI_Z_SCORE <2 THEN "OVERWEIGHT" 427 | WHEN BMI_Z_SCORE >=2 THEN "OBESE" 428 | ELSE "ERROR" 429 | END AS CENTILE_CLINICAL_BMI_CAT 430 | FROM CHILD_CENTILE_CALC_5 431 | 432 | -- COMMAND ---------- 433 | 434 | -- MAGIC %md 435 | -- MAGIC ## APPEND BMI (pre categorised) JOURNALS AND CALCULATED BMI JOURNALS (from height and weight) TOGETHER 436 | 437 | -- COMMAND ---------- 438 | 439 | -- append adult BMI journals to Height/Weight BMI calculated journals 440 | CREATE OR REPLACE TEMPORARY VIEW BMI_HW_COMBINED_2 AS 441 | SELECT DATE_OF_BIRTH 442 | ,SEX 443 | ,LSOA 444 | ,DATE_OF_DEATH 445 | ,NHS_NUMBER 446 | ,ETHNIC 447 | ,PRACTICE 448 | ,GP_SYSTEM_SUPPLIER 449 | ,PROCESSED_TIMESTAMP 450 | ,REPORTING_PERIOD_END_DATE 451 | ,JOURNAL_REPORTING_PERIOD_END_DATE 452 | ,DATE 453 | ,RECORD_DATE 454 | ,CODE 455 | ,SENSITIVE_CODE 456 | ,EPISODE_CONDITION 457 | ,EPISODE_PRESCRIPTION 458 | ,VALUE1_CONDITION 459 | ,VALUE2_CONDITION 460 | ,VALUE1_PRESCRIPTION 461 | ,VALUE2_PRESCRIPTION 462 | ,LINKS 463 | ,ConceptId_Description 464 | ,AGE_AT_TIME_OF_JOURNAL 465 | ,null as H_VALUE 466 | ,null as W_RECORD_DATE 467 | ,null as W_CODE 468 | ,null as W_VALUE 469 | ,null as W_LINKS 470 | ,null as BMI 471 | ,BMI_CAT 472 | ,BMI_CATEGORISED_INITIAL 473 | ,"BMI (ADULT)" AS TABLE_ORIGIN 474 | FROM BMI_CATEGORY_NO_UNKNOWNS_2 475 | 476 | UNION 477 | 478 | SELECT DATE_OF_BIRTH 479 | ,SEX 480 | ,LSOA 481 | ,DATE_OF_DEATH 482 | ,NHS_NUMBER 483 | ,ETHNIC 484 | ,PRACTICE 485 | ,GP_SYSTEM_SUPPLIER 486 | ,PROCESSED_TIMESTAMP 487 | ,REPORTING_PERIOD_END_DATE 488 | ,JOURNAL_REPORTING_PERIOD_END_DATE 489 | ,DATE 490 | ,RECORD_DATE 491 | ,CODE 492 | ,SENSITIVE_CODE 493 | ,EPISODE_CONDITION 494 | ,EPISODE_PRESCRIPTION 495 | ,VALUE1_CONDITION 496 | ,VALUE2_CONDITION 497 | ,VALUE1_PRESCRIPTION 498 | ,VALUE2_PRESCRIPTION 499 | ,LINKS 500 | ,ConceptId_Description 501 | ,AGE_AT_TIME_OF_JOURNAL 502 | ,H_VALUE 503 | ,W_RECORD_DATE 504 | ,W_CODE 505 | ,W_VALUE 506 | ,W_LINKS 507 | ,BMI 508 | ,null as BMI_CAT 509 | ,BMI_CATEGORISED_INITIAL 510 | ,"HW (ADULT)" AS TABLE_ORIGIN 511 | FROM HEIGHT_WEIGHT_NO_UNKNOWN_2 512 | 513 | 514 | -- COMMAND ---------- 515 | 516 | -- append child journals (bmi and height/weight calculated) 517 | CREATE OR REPLACE TEMPORARY VIEW BMI_HW_COMBINED_3 AS 518 | SELECT DATE_OF_BIRTH 519 | ,SEX 520 | ,NHS_NUMBER 521 | ,ETHNIC 522 | ,PRACTICE 523 | ,GP_SYSTEM_SUPPLIER 524 | ,REPORTING_PERIOD_END_DATE 525 | ,JOURNAL_REPORTING_PERIOD_END_DATE 526 | ,DATE 527 | ,RECORD_DATE 528 | ,AGE_AT_TIME_OF_JOURNAL 529 | ,COALESCE(BMI, VALUE1_CONDITION) as BMI 530 | ,BMI_CATEGORISED_INITIAL AS BMI_CATEGORY 531 | ,TABLE_ORIGIN 532 | FROM BMI_HW_COMBINED_2 533 | 534 | UNION 535 | 536 | SELECT DATE_OF_BIRTH 537 | ,SEX 538 | ,NHS_NUMBER 539 | ,ETHNIC 540 | ,PRACTICE 541 | ,GP_SYSTEM_SUPPLIER 542 | ,REPORTING_PERIOD_END_DATE 543 | ,JOURNAL_REPORTING_PERIOD_END_DATE 544 | ,DATE 545 | ,RECORD_DATE 546 | ,AGE_AT_TIME_OF_JOURNAL 547 | ,BMI 548 | ,CENTILE_CLINICAL_BMI_CAT AS BMI_CATEGORY 549 | ,"HW (CHILD) CENTILE" AS TABLE_ORIGIN 550 | FROM CHILD_CENTILE_CALC_6 551 | WHERE CENTILE_CLINICAL_BMI_CAT IN ('UNDERWEIGHT', 'IDEAL', 'UNDER 20 NULLS', 'OVERWEIGHT', 'OBESE') 552 | 553 | UNION 554 | 555 | SELECT DATE_OF_BIRTH 556 | ,SEX 557 | ,NHS_NUMBER 558 | ,ETHNIC 559 | ,PRACTICE 560 | ,GP_SYSTEM_SUPPLIER 561 | ,REPORTING_PERIOD_END_DATE 562 | ,JOURNAL_REPORTING_PERIOD_END_DATE 563 | ,DATE 564 | ,RECORD_DATE 565 | ,AGE_AT_TIME_OF_JOURNAL 566 | ,BMI 567 | ,CENTILE_CLINICAL_BMI_CAT AS BMI_CATEGORY 568 | ,"CHILD BMI CENTILE" AS TABLE_ORIGIN 569 | FROM BMI_CENTILE_CALC_5 570 | WHERE CENTILE_CLINICAL_BMI_CAT IN ('UNDERWEIGHT', 'IDEAL', 'UNDER 20 NULLS', 'OVERWEIGHT', 'OBESE') 571 | 572 | UNION 573 | 574 | SELECT DATE_OF_BIRTH 575 | ,SEX 576 | ,NHS_NUMBER 577 | ,ETHNIC 578 | ,PRACTICE 579 | ,GP_SYSTEM_SUPPLIER 580 | ,REPORTING_PERIOD_END_DATE 581 | ,JOURNAL_REPORTING_PERIOD_END_DATE 582 | ,DATE 583 | ,RECORD_DATE 584 | ,AGE_AT_TIME_OF_JOURNAL 585 | ,VALUE1_CONDITION AS BMI 586 | ,BMI_CATEGORISED_INITIAL AS BMI_CATEGORY 587 | ,"BMI - CENTILE" AS TABLE_ORIGIN 588 | FROM CHILD_CENTILES 589 | WHERE BMI_CATEGORISED_INITIAL IN ('UNDERWEIGHT', 'IDEAL', 'UNDER 20 NULLS', 'OVERWEIGHT', 'OBESE') 590 | 591 | -- COMMAND ---------- 592 | 593 | -- select the most recent known BMI journal date for each patient 594 | CREATE OR REPLACE TEMPORARY VIEW RECENT_BMI_DATES_2 AS 595 | SELECT NHS_NUMBER 596 | , MAX(DATE) AS DATE 597 | FROM BMI_HW_COMBINED_3 598 | GROUP BY NHS_NUMBER 599 | ORDER BY NHS_NUMBER 600 | 601 | -- COMMAND ---------- 602 | 603 | -- create table with only the most recent BMI journal(s) in for each patient - some patients may have multiple journals e.g. both saying there are overweight or one saying overweight and another saying ideal.. 604 | CREATE OR REPLACE TEMPORARY VIEW RECENT_BMI_JOURNALS_2 AS 605 | SELECT a.* 606 | FROM BMI_HW_COMBINED_3 AS a 607 | INNER JOIN RECENT_BMI_DATES_2 AS b 608 | ON a.NHS_NUMBER = b.NHS_NUMBER 609 | AND a.DATE = b.DATE 610 | 611 | -- COMMAND ---------- 612 | 613 | -- create table to remove patients with more than one BMI category in their most recent journals i.e. if patient has 2 journals saying ideal that is fine, but if patient has one journal saying overweight and another saying ideal this needs to be removed and looked at further 614 | -- we do this by counting the number of journals per patient, per BMI category 615 | CREATE OR REPLACE TEMPORARY VIEW BMI_CATS_PER_PATIENT_2 AS 616 | SELECT NHS_NUMBER 617 | , COUNT(DISTINCT BMI_CATEGORY) AS COUNT_CATEGORIES 618 | FROM RECENT_BMI_JOURNALS_2 619 | GROUP BY NHS_NUMBER 620 | 621 | -- COMMAND ---------- 622 | 623 | -- create list of patients with only one BMI category per patient -> patient with more than one BMI category will have a count greater than one 624 | CREATE OR REPLACE TEMPORARY VIEW CATS_PER_PATIENT_2 AS 625 | SELECT DISTINCT NHS_NUMBER 626 | FROM BMI_CATS_PER_PATIENT_2 627 | WHERE COUNT_CATEGORIES = 1 628 | 629 | -- COMMAND ---------- 630 | 631 | ------------------------------------------------------------------------ 632 | ----------------------------------------------------------------------- 633 | ----------- THIS IS THE FINAL TABLE ------------------------------------ 634 | ------------------------------------------------------------------------ 635 | ----------------------------------------------------------------------- 636 | 637 | -- create table with patients with more than one BMI cat removed 638 | CREATE OR REPLACE TEMPORARY VIEW RECENT_BMI_CAT_DUPS_REMOVED_2 AS 639 | SELECT * 640 | FROM RECENT_BMI_JOURNALS_2 641 | WHERE NHS_NUMBER IN (SELECT * FROM CATS_PER_PATIENT_2) -------------------------------------------------------------------------------- /BMI/BMI_codes_reference_data.csv: -------------------------------------------------------------------------------- 1 | ConceptID,ConceptId_Description,BMI_CAT 2 | 736013005,Body weight measurement declined (situation),UNSUITABLE/DECLINED 3 | 914731000000107,Obese class II (body mass index 35.0 - 39.9) (finding),OBESE 4 | 60621009,Body mass index (observable entity),ALL 5 | 414915002,Obese (finding),OBESE 6 | 921051000000109,Child body mass index greater than 99.6th centile (finding),CHILD 7 | 920291000000102,Child body mass index 26th-49th centile (finding),CHILD 8 | 162765000,On examination - weight 10-20% below ideal (finding),UNDERWEIGHT 9 | 920931000000108,Child body mass index on 75th centile (finding),CHILD 10 | 1046351000000106,Unsuitable for body weight measurement (finding),UNSUITABLE/DECLINED 11 | 35425004,Normal body mass index (finding),IDEAL 12 | 920181000000105,Child body mass index on 2nd centile (finding),CHILD 13 | 48499001,Increased body mass index (finding),UNKNOWN 14 | 920311000000101,Child body mass index on 50th centile (finding),CHILD 15 | 162690006,On examination - obese (finding),OBESE 16 | 920251000000105,Child body mass index 10th-24th centile (finding),CHILD 17 | 427090001,Body mass index less than 16.5 (finding),UNDERWEIGHT 18 | 846931000000101,Baseline body mass index (observable entity),ALL 19 | 162764001,On examination - weight greater than 20% below ideal (finding),UNDERWEIGHT 20 | 722595002,Overweight in adulthood with body mass index of 25 or more but less than 30 (finding),OVERWEIGHT 21 | 715456008,Percentage median body mass index for age and sex (observable entity),ALL 22 | 1046331000000104,Unsuitable for body height measurement (finding),UNSUITABLE/DECLINED 23 | 248342006,Underweight (finding),UNDERWEIGHT 24 | 921031000000102,Child body mass index 98.1st-99.6th centile (finding),CHILD 25 | 1037511000000103,Body height measurement declined (situation),UNSUITABLE/DECLINED 26 | 238131007,Overweight (finding),OVERWEIGHT 27 | 921011000000105,Child body mass index on 98th centile (finding),CHILD 28 | 412768003,Body mass index 20-24 - normal (finding),IDEAL 29 | 926011000000101,Down syndrome body mass index centile (observable entity),ALL 30 | 914721000000105,Obese class I (body mass index 30.0 - 34.9) (finding),OBESE 31 | 920271000000101,Child body mass index on 25th centile (finding),CHILD 32 | 443371000124107,Obese class I (finding),OBESE 33 | 920951000000101,Child body mass index 76th-90th centile (finding),CHILD 34 | 920201000000109,Child body mass index 3rd-8th centile (finding),CHILD 35 | 162863004,Body mass index 25-29 - overweight (finding),OVERWEIGHT 36 | 162769006,On examination - Underweight (finding),UNDERWEIGHT 37 | 301331008,Finding of body mass index (finding),ALL 38 | 408512008,Body mass index 40+ - severely obese (finding),OBESE 39 | 275947003,On examination - overweight (finding),OVERWEIGHT 40 | 920971000000105,Child body mass index on 91st centile (finding),CHILD 41 | 6497000,Decreased body mass index (finding),UNKNOWN 42 | 920161000000101,Child body mass index 0.4th-1.9th centile (finding),CHILD 43 | 268915006,On examination - weight 10-20% over ideal (finding),OVERWEIGHT 44 | 920991000000109,Child body mass index 92nd-97th centile (finding),CHILD 45 | 920231000000103,Child body mass index on 9th centile (finding),CHILD 46 | 443381000124105,Obese class II (finding),OBESE 47 | 920141000000102,Child body mass index less than 0.4th centile (finding),CHILD 48 | 914741000000103,Obese class III (body mass index equal to or greater than 40.0) (finding),OBESE 49 | 310252000,Body mass index less than 20 (finding),UNDER_IDEAL_BORDER 50 | 268916007,On examination - weight greater than 20% over ideal (finding),OBESE 51 | 162864005,Body mass index 30+ - obesity (finding),OBESE 52 | 920841000000108,Child body mass index 51st-74th centile (finding),CHILD 53 | -------------------------------------------------------------------------------- /BMI/README.md: -------------------------------------------------------------------------------- 1 | # BACKGROUND 2 | BMI is of particular interest in relation to the COVID-19 pandemic therefore NHS Digital have written some code to understand the quality, coverage, and distribution of BMI recording within the GDPPR dataset. 3 | 4 | BMI category can be recorded via six methods within GDPPR, all of which use SNOMED codes within patient journal tables: 5 | 6 | 1. Snomed codes for BMI values e.g. code for ‘Body mass index (observable entity)’ with an associated value of 18.5 7 | 2. Snomed codes for BMI categories (no values) e.g. code with description of ‘Obese (finding)’ 8 | 3. Snomed codes for height and weight values 9 | 4. Snomed codes for child BMI centiles/categories (no values) e.g. code with description of ‘Child body mass index 10th-24th centile (finding)’ 10 | 5. Snomed codes for child BMI values e.g. code for ‘Body mass index (observable entity)’ with an associated value of 18.5 for a patient below the age of 18 at the date of the journal 11 | 6. Snomed codes for child height and weight values 12 | 13 | Please note that methods 4, 5 and 6 for children are separated out because their BMI categories are calculated using centiles which are based on their sex, age and BMI (or height and weight calculated BMI). 14 | 15 | # METHODOLOGY 16 | 17 | a) For method 1 a BMI category is assigned using [NHS BMI groupings](https://www.nhs.uk/live-well/healthy-weight/bmi-calculator/). Outliers / potential DQ issues are removed and are not counted as having a determinable BMI category. _Please note that cut off points for outliers e.g. cut off low BMI at 10 and high at 55, were determined through initial analyses and were agreed with internal clinicians._ 18 | 19 | b) For methods 2 and 4, the BMI category assigned via the snomed code description was assumed to be correct unless the associated BMI value conflicted with the BMI category. 20 | 21 | c) Patients with an indeterminable BMI category are removed and the most recent BMI category recorded for each patient is selected. 22 | 23 | # NOTES 24 | 25 | GDPPR data = ```gdppr_database.gdppr_table``` 26 | 27 | BMI snomed codes reference data = ```ref_data.BMI_CODES``` 28 | 29 | Height snomed codes reference data = ```ref_data.HEIGHT_CODES``` 30 | 31 | Weight snomed codes reference data = ```ref_data.WEIGHT_CODES``` 32 | 33 | Child BMI centiles reference data = ```ref_data.child_bmi_centiles_ordered ``` 34 | 35 | _Please note that the child centile reference data is avilable from the [Royal College of Paediatric and Child Health](https://www.rcpch.ac.uk/resources/body-mass-index-bmi-chart) and requires a (free) license so is not included within this repository. If you are unable to obtain child centile reference data then it is not possible to accurately categorise a child's BMI._ 36 | -------------------------------------------------------------------------------- /BMI/height_codes_reference_data.csv: -------------------------------------------------------------------------------- 1 | ConceptID,ConceptId_Description 2 | 162756007,On examination - height greater than 20% below average (finding) 3 | 162757003,On examination - height 10-20% below average (finding) 4 | 162758008,On examination - height within 10% average (finding) 5 | 50373000,Body height measure (observable entity) 6 | 162760005,On examination - height greater than 20% over average (finding) 7 | 162755006,On examination - height (finding) 8 | 162759000,On examination - height 10-20% over average (finding) 9 | 248333004,Standing height (observable entity) 10 | -------------------------------------------------------------------------------- /BMI/weight_codes_reference_data.csv: -------------------------------------------------------------------------------- 1 | ConceptID,ConceptId_Description 2 | 248358009,Weight for height (observable entity) 3 | 735395000,Current body weight (observable entity) 4 | 162763007,On examination - weight (finding) 5 | 43664005,Normal weight (finding) 6 | 363808001,Measured body weight (observable entity) 7 | 425024002,Body weight without shoes (observable entity) 8 | 162766004,On examination - weight within 10% ideal (finding) 9 | 424927000,Body weight with shoes (observable entity) 10 | 27113001,Body weight (observable entity) 11 | -------------------------------------------------------------------------------- /Blood_glucose/Blood_glucose.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | -- MAGIC %md # GDPPR Blood Glucose 3 | 4 | ---- Table of all Journal Entries with a blood glucose SNOMED code 5 | ---- Note that any SNOMED code for OGTT/Non-Fasting that states that the time frame is not 120mins/2hours is classed as "indeterminable" as these timeframes are not used for diagnosing diabetes 6 | CREATE OR REPLACE TEMPORARY VIEW ALL_BLOOD_GLUCOSE_JOURNALS AS 7 | SELECT 8 | a.*, 9 | b.ConceptId_Description, 10 | TYPE_OF_BLOOD_GLUCOSE_RECORD, 11 | FASTING_STATUS 12 | FROM 13 | gdppr_database.gdppr_table AS a 14 | INNER JOIN REF_DATA.BLOOD_GLUCOSE_REF_DATA AS b ON a.CODE = b.ConceptID 15 | WHERE 16 | a.CODE IN ( 17 | SELECT 18 | DISTINCT ConceptID 19 | FROM 20 | REF_DATA.BLOOD_GLUCOSE_REF_DATA 21 | ) 22 | 23 | -- COMMAND ---------- 24 | 25 | -- MAGIC %md ## JOURNAL BREAKDOWN 26 | 27 | -- COMMAND ---------- 28 | 29 | CREATE OR REPLACE TEMPORARY VIEW ALL_NON_FASTING_BLOOD_GLUCOSE_JOURNALS AS 30 | SELECT * 31 | FROM ALL_BLOOD_GLUCOSE_JOURNALS 32 | WHERE FASTING_STATUS IN ("NON-FASTING","RANDOM") 33 | 34 | -- COMMAND ---------- 35 | 36 | ---- HbA1c will always have fasting status of "anytime" because HbA1c levels are reflective of the blood glucose levels from the previous 2/3 months 37 | ---- Random tests can only be used to diagnose diabetes and cannot be used to refute the possibility of diabetes 38 | ---- Every single SNOMED code relating to blood glucose is an observable entity so we expect a numeric value from each journal to assign a blood glucose category 39 | 40 | CREATE OR REPLACE TEMPORARY VIEW ALL_NON_FASTING_BLOOD_GLUCOSE_JOURNALS_WITH_CATEGORY AS 41 | SELECT * , 42 | CASE WHEN TYPE_OF_BLOOD_GLUCOSE_RECORD = "HBA1C" AND VALUE1_CONDITION >= 9 AND VALUE1_CONDITION < 20 43 | THEN "LOW" 44 | WHEN TYPE_OF_BLOOD_GLUCOSE_RECORD = "HBA1C" AND VALUE1_CONDITION >= 20 AND VALUE1_CONDITION < 42 45 | THEN "NORMAL" 46 | WHEN TYPE_OF_BLOOD_GLUCOSE_RECORD = "HBA1C" AND VALUE1_CONDITION >= 42 AND VALUE1_CONDITION <= 48 47 | THEN "PRE-DIABETES" 48 | WHEN TYPE_OF_BLOOD_GLUCOSE_RECORD = "HBA1C" AND VALUE1_CONDITION > 48 AND VALUE1_CONDITION <= 162 49 | THEN "DIABETES" 50 | WHEN TYPE_OF_BLOOD_GLUCOSE_RECORD IN ("BLOOD", "PLASMA", "SERUM") AND FASTING_STATUS = "NON-FASTING" AND VALUE1_CONDITION >= 1 AND VALUE1_CONDITION < 4 51 | THEN "LOW" 52 | WHEN TYPE_OF_BLOOD_GLUCOSE_RECORD IN ("BLOOD", "PLASMA", "SERUM") AND FASTING_STATUS = "NON-FASTING" AND VALUE1_CONDITION >= 4 AND VALUE1_CONDITION < 7.8 53 | THEN "NORMAL" 54 | WHEN TYPE_OF_BLOOD_GLUCOSE_RECORD IN ("BLOOD", "PLASMA", "SERUM") AND FASTING_STATUS = "NON-FASTING" AND VALUE1_CONDITION >= 7.8 AND VALUE1_CONDITION < 11.1 55 | THEN "PRE-DIABETES" 56 | WHEN TYPE_OF_BLOOD_GLUCOSE_RECORD IN ("BLOOD", "PLASMA", "SERUM") AND FASTING_STATUS = "NON-FASTING" AND VALUE1_CONDITION >= 11.1 AND VALUE1_CONDITION <= 150 57 | THEN "DIABETES" 58 | WHEN TYPE_OF_BLOOD_GLUCOSE_RECORD IN ("BLOOD", "PLASMA", "SERUM") AND FASTING_STATUS = "RANDOM" AND VALUE1_CONDITION >= 1 AND VALUE1_CONDITION < 11.1 59 | THEN "MORE-TESTING-NEEDED" 60 | WHEN TYPE_OF_BLOOD_GLUCOSE_RECORD IN ("BLOOD", "PLASMA", "SERUM") AND FASTING_STATUS = "RANDOM" AND VALUE1_CONDITION >= 11.1 AND VALUE1_CONDITION <= 150 61 | THEN "DIABETES" 62 | ELSE "INDETERMINABLE" 63 | END AS CATEGORISED_BLOOD_GLUCOSE 64 | FROM ALL_NON_FASTING_BLOOD_GLUCOSE_JOURNALS 65 | 66 | -- COMMAND ---------- 67 | 68 | ---- We can only assign a blood glucose status to OGTT journals if it is matched with a fasting journal value - OGTT results are allocated those statuses on the basis of a combination of the fasting and 2 hour levels: 69 | ---- We have no low blood glucose threshold for the combined OGTT and fasting journals because these tests take place in controlled conditions (so there should be no fasting blood glucose values that meet the criteria of hypoglycaemia) 70 | ---- We can still allocate blood glucose statuses on a single fasting value 71 | CREATE OR REPLACE TEMPORARY VIEW ALL_FASTING_BLOOD_GLUCOSE_JOURNALS AS 72 | SELECT * 73 | FROM ALL_BLOOD_GLUCOSE_JOURNALS 74 | WHERE FASTING_STATUS = "FASTING" 75 | 76 | 77 | -- COMMAND ---------- 78 | 79 | CREATE OR REPLACE TEMPORARY VIEW ALL_OGTT_BLOOD_GLUCOSE_JOURNALS 80 | AS SELECT * 81 | FROM ALL_BLOOD_GLUCOSE_JOURNALS 82 | WHERE FASTING_STATUS = "OGTT" 83 | 84 | -- COMMAND ---------- 85 | 86 | ---- Drop value 2 condition in the Fasting journals (THEY ARE ALL NULL) and replace the value 2 condition with the OGTT journal value 87 | ---- Join together fasting and OGTT journals with left join - so we can still allocate blood glucose statuses to the fasting journals that are not matched with an OGTT journal 88 | ---- We need the value1_condition (fasting value) to be NON-NULL 89 | CREATE OR REPLACE TEMPORARY VIEW FASTING_OGTT_MATCHED_JOURNALS AS 90 | SELECT a.YEAR_OF_BIRTH, 91 | a.SEX, 92 | a.LSOA, 93 | a.YEAR_OF_DEATH, 94 | a.NHS_NUMBER, 95 | a.ETHNIC, 96 | a.PRACTICE, 97 | a.GP_SYSTEM_SUPPLIER, 98 | a.PROCESSED_TIMESTAMP, 99 | a.REPORTING_PERIOD_END_DATE, 100 | a.JOURNAL_REPORTING_PERIOD_END_DATE, 101 | a.DATE, 102 | a.RECORD_DATE, 103 | a.CODE, 104 | a.SENSITIVE_CODE, 105 | a.EPISODE_CONDITION, 106 | a.EPISODE_PRESCRIPTION, 107 | a.VALUE1_CONDITION, 108 | a.VALUE1_PRESCRIPTION, 109 | b.VALUE1_CONDITION AS VALUE2_CONDITION, 110 | b.VALUE1_PRESCRIPTION AS VALUE2_PRESCRIPTION, 111 | a.LINKS, 112 | a.ConceptId_Description, 113 | a.TYPE_OF_BLOOD_GLUCOSE_RECORD, 114 | a.FASTING_STATUS 115 | FROM ALL_FASTING_BLOOD_GLUCOSE_JOURNALS AS a 116 | LEFT JOIN ALL_OGTT_BLOOD_GLUCOSE_JOURNALS AS b 117 | ON a.NHS_NUMBER = b.NHS_NUMBER 118 | AND a.DATE = b.DATE 119 | AND a.PRACTICE = b.PRACTICE 120 | AND a.REPORTING_PERIOD_END_DATE = b.REPORTING_PERIOD_END_DATE 121 | AND a.JOURNAL_REPORTING_PERIOD_END_DATE = b.JOURNAL_REPORTING_PERIOD_END_DATE 122 | 123 | 124 | -- COMMAND ---------- 125 | 126 | ---- We need the value1_condition (fasting value) to be NON-NULL 127 | CREATE OR REPLACE TEMPORARY VIEW FASTING_OGTT_MATCHED_JOURNALS_NON_NULL AS 128 | SELECT * FROM FASTING_OGTT_MATCHED_JOURNALS 129 | WHERE VALUE1_CONDITION IS NOT NULL 130 | 131 | -- COMMAND ---------- 132 | 133 | ---- Val 1 condition is the fasting blood glucose value and value 2 condition the OGTT blood glucose value (2hour) 134 | ---- Clinician - OGTT tests take place in controlled conditions (when the individuals blood glucose is not too low) - i.e. we set the lower bound for plausibility as the threshold for hypoglycaemia 135 | ---- By setting the plausible lower bound to < 4 we lose 0.2% of journals (5,000 out of 2,000,000) journal values 136 | ---- Wherever we have a "LESS THAN" ensure its greater than the lower bound for hypoglycaemia 137 | CREATE OR REPLACE TEMPORARY VIEW FASTING_OGTT_MATCHED_JOURNALS_WITH_CATEGORY AS 138 | SELECT * , 139 | CASE WHEN VALUE1_CONDITION >= 4 AND VALUE1_CONDITION <= 5.4 AND VALUE2_CONDITION >= 4 AND VALUE2_CONDITION < 7.8 140 | THEN "NORMAL" 141 | WHEN VALUE1_CONDITION >= 4 AND VALUE1_CONDITION < 7 AND VALUE2_CONDITION >= 7.8 AND VALUE2_CONDITION < 11.1 142 | THEN "PRE-DIABETES" 143 | WHEN VALUE1_CONDITION >= 5.5 AND VALUE1_CONDITION <= 6.9 AND VALUE2_CONDITION >= 4 AND VALUE2_CONDITION < 7.8 144 | THEN "PRE-DIABETES" 145 | WHEN VALUE1_CONDITION >= 4 AND ((VALUE1_CONDITION >= 7 AND VALUE1_CONDITION <= 150) OR (VALUE2_CONDITION >= 11.1 AND VALUE2_CONDITION <= 150)) 146 | THEN "DIABETES" 147 | WHEN VALUE1_CONDITION >= 3.9 AND VALUE1_CONDITION <= 5.4 AND VALUE2_CONDITION IS NULL 148 | THEN "NORMAL" 149 | WHEN VALUE1_CONDITION >= 5.5 AND VALUE1_CONDITION <= 6.9 AND VALUE2_CONDITION IS NULL 150 | THEN "PRE-DIABETES" 151 | WHEN VALUE1_CONDITION >= 7 AND VALUE1_CONDITION <= 150 AND VALUE2_CONDITION IS NULL 152 | THEN "DIABETES" 153 | ELSE "INDETERMINABLE" 154 | END AS CATEGORISED_BLOOD_GLUCOSE 155 | FROM FASTING_OGTT_MATCHED_JOURNALS_NON_NULL 156 | 157 | -- COMMAND ---------- 158 | 159 | -- MAGIC %md ### COMBINE BOTH BREAKDOWNS AND REMOVE INDETERMINABLE 160 | 161 | -- COMMAND ---------- 162 | 163 | CREATE OR REPLACE TEMPORARY VIEW ALL_JOURNALS_WITH_BLOOD_GLUCOSE_STATUS AS 164 | SELECT * 165 | FROM ALL_NON_FASTING_BLOOD_GLUCOSE_JOURNALS_WITH_CATEGORY 166 | WHERE CATEGORISED_BLOOD_GLUCOSE != "INDETERMINABLE" 167 | 168 | UNION 169 | 170 | SELECT * 171 | FROM FASTING_OGTT_MATCHED_JOURNALS_WITH_CATEGORY 172 | WHERE CATEGORISED_BLOOD_GLUCOSE != "INDETERMINABLE" 173 | 174 | -- COMMAND ---------- 175 | 176 | -- MAGIC %md ## FIND MOST RECENT 177 | 178 | -- COMMAND ---------- 179 | 180 | ---- Get the most recent date per patient 181 | CREATE OR REPLACE TEMPORARY VIEW MAX_DATE AS 182 | SELECT NHS_NUMBER 183 | , MAX(DATE) AS DATE 184 | FROM ALL_JOURNALS_WITH_BLOOD_GLUCOSE_STATUS 185 | GROUP BY NHS_NUMBER 186 | 187 | -- COMMAND ---------- 188 | 189 | ---- full table of most recent records 190 | CREATE OR REPLACE TEMPORARY VIEW TABLE_MOST_RECENT_RECORDS AS 191 | SELECT a.* 192 | FROM ALL_JOURNALS_WITH_BLOOD_GLUCOSE_STATUS AS a 193 | INNER JOIN MAX_DATE AS b 194 | ON a.NHS_NUMBER = b.NHS_NUMBER 195 | AND a.DATE = b.DATE 196 | 197 | -- COMMAND ---------- 198 | 199 | -- MAGIC %md 200 | -- MAGIC ## REMOVE CONFLICTS 201 | 202 | -- COMMAND ---------- 203 | 204 | ---- Need to remove those with conflicting blood glucose status at their most recent record 205 | CREATE OR REPLACE TEMPORARY VIEW TABLE_JOURNAL_COUNTS AS 206 | SELECT NHS_NUMBER 207 | , COUNT(DISTINCT CATEGORISED_BLOOD_GLUCOSE) AS COUNT_CATEGORIES 208 | FROM TABLE_MOST_RECENT_RECORDS 209 | GROUP BY NHS_NUMBER 210 | 211 | -- COMMAND ---------- 212 | 213 | ---- Any individual with conflicting blood glucose statuses at their most recent record is removed 214 | CREATE OR REPLACE TEMPORARY VIEW BLOOD_GLUCOSE_MOST_RECENT_NO_CONFLICTS AS 215 | SELECT * 216 | FROM TABLE_MOST_RECENT_RECORDS 217 | WHERE NHS_NUMBER IN (SELECT DISTINCT NHS_NUMBER FROM TABLE_JOURNAL_COUNTS WHERE COUNT_CATEGORIES = 1) -------------------------------------------------------------------------------- /Blood_glucose/Blood_pressure_codes_reference_data.csv: -------------------------------------------------------------------------------- 1 | ConceptId,ConceptId_Description,TYPE_OF_BLOOD_GLUCOSE_RECORD,FASTING_STATUS 2 | 1012661000000105,150 minute plasma glucose level (observable entity),PLASMA,INDETERMINABLE 3 | 1003121000000103,180 minute plasma glucose level (observable entity),PLASMA,INDETERMINABLE 4 | 1010701000000103,90 minute plasma glucose level (observable entity),PLASMA,INDETERMINABLE 5 | 996311000000105,Extended glucose tolerance test (observable entity),TOLERANCE,OGTT 6 | 1003141000000105,Plasma fasting glucose level (observable entity),PLASMA,FASTING 7 | 1083781000000102,Post-prandial blood glucose concentration (observable entity),BLOOD,INDETERMINABLE 8 | 1003721000000104,120 minute blood glucose level (observable entity),BLOOD,OGTT 9 | 1031331000000106,Plasma random glucose level (observable entity),PLASMA,RANDOM 10 | 1028901000000108,Plasma 2 hours post-prandial glucose level (observable entity),PLASMA,NON-FASTING 11 | 1003131000000101,Serum fasting glucose level (observable entity),SERUM,FASTING 12 | 1107451000000100,Substance concentration of glucose in serum (observable entity),SERUM,NON-FASTING 13 | 1010611000000107,Serum glucose level (observable entity),SERUM,NON-FASTING 14 | 1110521000000108,Substance concentration of glucose in plasma (observable entity),PLASMA,NON-FASTING 15 | 1028891000000107,Serum 2 hours post-prandial glucose level (observable entity),SERUM,NON-FASTING 16 | 1010631000000104,60 minute serum glucose level (observable entity),SERUM,INDETERMINABLE 17 | 1005711000000106,Glucose tolerance test (observable entity),TOLERANCE,OGTT 18 | 1028881000000105,Serum random glucose level (observable entity),SERUM,RANDOM 19 | 998291000000108,300 minute plasma glucose level (observable entity),PLASMA,INDETERMINABLE 20 | 998281000000106,240 minute plasma glucose level (observable entity),PLASMA,INDETERMINABLE 21 | 997681000000108,Fasting blood glucose level (observable entity),BLOOD,FASTING 22 | 1089381000000101,Random blood glucose level (observable entity),BLOOD,RANDOM 23 | 1003711000000105,90 minute blood glucose level (observable entity),BLOOD,INDETERMINABLE 24 | 1003701000000108,60 minute blood glucose level (observable entity),BLOOD,INDETERMINABLE 25 | 1010661000000109,150 minute serum glucose level (observable entity),SERUM,INDETERMINABLE 26 | 1010651000000106,120 minute serum glucose level (observable entity),SERUM,OGTT 27 | 1049321000000109,Haemoglobin A1c level (monitoring ranges) - International Federation of Clinical Chemistry and Laboratory Medicine standardised (observable entity),HBA1C,NON-FASTING 28 | 1049301000000100,Haemoglobin A1c level (diagnostic reference range) - International Federation of Clinical Chemistry and Laboratory Medicine standardised (observable entity),HBA1C,NON-FASTING 29 | 997671000000106,Blood glucose level (observable entity),BLOOD,NON-FASTING 30 | 1010711000000101,120 minute plasma glucose level (observable entity),PLASMA,OGTT 31 | 1003691000000108,30 minute blood glucose level (observable entity),BLOOD,INDETERMINABLE 32 | 994411000000106,Blood glucose series (observable entity),BLOOD,INDETERMINABLE 33 | 1010621000000101,30 minute serum glucose level (observable entity),SERUM,INDETERMINABLE 34 | 1003731000000102,150 minute blood glucose level (observable entity),BLOOD,INDETERMINABLE 35 | 1030551000000103,2 hour post-prandial blood glucose level (observable entity),BLOOD,NON-FASTING 36 | 999791000000106,Haemoglobin A1c level - International Federation of Clinical Chemistry and Laboratory Medicine standardised (observable entity),HBA1C,NON-FASTING 37 | 1010681000000100,30 minute plasma glucose level (observable entity),PLASMA,INDETERMINABLE 38 | 1010641000000108,90 minute serum glucose level (observable entity),SERUM,INDETERMINABLE 39 | 1010691000000103,60 minute plasma glucose level (observable entity),PLASMA,INDETERMINABLE 40 | 1045471000000104,210 minute plasma glucose concentration (observable entity),PLASMA,INDETERMINABLE 41 | 1010671000000102,Plasma glucose level (observable entity),PLASMA,NON-FASTING 42 | 928041000000101,Baseline blood glucose concentration (observable entity),BLOOD,FASTING 43 | -------------------------------------------------------------------------------- /Blood_glucose/README.md: -------------------------------------------------------------------------------- 1 | # BACKGROUND 2 | Blood glucose status may be of interest in relation to the COVID-19 pandemic therefore NHS Digital have written some code to understand the quality, coverage, and distribution of blood glucose status recording within the GDPPR dataset. 3 | 4 | Blood glucose status can be recorded via several methods within GDPPR, all of which use SNOMED codes within patient journal tables. These snomed codes can initially be categorised into the following categories: 5 | 1. Blood 6 | 2. HbA1c 7 | 3. Plasma 8 | 4. Serum 9 | 5. Tolerance 10 | 11 | Some of these categories can then be further classified into the following categories: 12 | 1. Fasting 13 | 2. Non-fasting 14 | 3. OGTT 15 | 4. Random 16 | 17 | These classifications are required to categorise the associated blood glucose reading accordingly and are therefore included within the reference data. *Please note that cut off points for outliers were determined through initial analyses and were agreed with internal clinicians.* 18 | 19 | # METHODOLOGY 20 | 21 | a) All blood glucose SNOMED codes are categorised in the reference data based on previous analyses. The Cluster ID's used to create this reference data are 'GLUC_COD', 'FASPLASGLUC_COD' and 'IFCCHBAM_COD'. _Please note it is possible that further blood pressure clusters may be created and/or blood glucose codes may be added to the GDPPR extract and these would need categorising._ 22 | 23 | b) Journals categorised as HbA1c are categorised according to [these thresholds](https://www.diabetes.co.uk/what-is-hba1c.html) and discussions with internal clinicians 24 | 25 | c) Journals initially categorised as 'Blood', 'Plasma' or 'Serum', and have further sub-classifications of 'Non-Fasting' or 'Random' are categorised according to [these thresholds](https://www.diabetes.co.uk/diabetes_care/blood-sugar-level-ranges.html) and discussions with internal clinicians 26 | 27 | d) Journals with a sub-classification of 'OGTT' can only be categorised if it is joined to an associated 'Fasting' journal as these statuses are determined on the basis of the fasting and 2 hour levels. These journals are matched according to NHS_Number/date of recording etc. and are then categorised according to [these thresholds](https://www.diabetes.co.uk/diabetes_care/blood-sugar-level-ranges.html) and discussions with internal clinicians 28 | 29 | - Journals with a sub-classification of 'Fasting' can be categorised alone without the OGTT journal and are therefore categorised alone if no OGTT journal is also present. They are categorised according to [these thresholds](https://www.diabetes.co.uk/diabetes_care/blood-sugar-level-ranges.html) and discussions with internal clinicians. 30 | 31 | e) Journals without a blood glucose reading (value) or journals with indeterminable statuses are removed and the most recent blood glucose reading for each patient is selected. 32 | 33 | # NOTES 34 | 35 | GDPPR data = ```gdppr_database.gdppr_table``` 36 | 37 | blood glucose snomed code reference data = ```ref_data.blood_glucose_ref_data``` 38 | -------------------------------------------------------------------------------- /Blood_pressure/Blood Pressure v2.1.sql: -------------------------------------------------------------------------------- 1 | -- COMMAND ---------- 2 | 3 | ---- Table of all Journals with a blood pressure SNOMED code 4 | CREATE OR REPLACE TEMPORARY VIEW ALL_BP_JOURNALS AS 5 | SELECT a.* 6 | , b.ConceptId_Description 7 | , b.INITIAL_BREAKDOWN 8 | , b.TYPE_OF_JOURNAL 9 | FROM gdppr_database.gdppr_table AS a 10 | INNER JOIN ref_data.blood_pressure_ref_data AS b 11 | ON a.CODE = b.ConceptID 12 | WHERE a.CODE IN (SELECT DISTINCT ConceptID FROM ref_data.blood_pressure_ref_data) 13 | 14 | -- COMMAND ---------- 15 | 16 | -- MAGIC %md ## PULL TOGETHER SBP AND DBP VALUES AND ADD THESE TO THE ALL CATEGORY 17 | 18 | -- COMMAND ---------- 19 | 20 | CREATE OR REPLACE TEMPORARY VIEW SBP_TABLE AS 21 | SELECT * 22 | FROM ALL_BP_JOURNALS 23 | WHERE INITIAL_BREAKDOWN = "SBP" 24 | 25 | -- COMMAND ---------- 26 | 27 | CREATE OR REPLACE TEMPORARY VIEW DBP_TABLE AS 28 | SELECT * 29 | FROM ALL_BP_JOURNALS 30 | WHERE INITIAL_BREAKDOWN = "DBP" 31 | 32 | -- COMMAND ---------- 33 | 34 | -- MAGIC %md ## NUMERIC JOURNALS 35 | 36 | -- COMMAND ---------- 37 | 38 | ---- Drop value 2 condition in the SBP value and replace the value 2 condition with the DBP value 39 | ---- Join together SBP and DBP journals 40 | ----- union on snomed codes for both SBP and DBP together 41 | CREATE OR REPLACE TEMPORARY VIEW NUMERIC_TABLE AS 42 | SELECT a.YEAR_OF_BIRTH, 43 | a.SEX, 44 | a.LSOA, 45 | a.YEAR_OF_DEATH, 46 | a.NHS_NUMBER_DEID, 47 | a.ETHNIC, 48 | a.PRACTICE, 49 | a.GP_SYSTEM_SUPPLIER, 50 | a.PROCESSED_TIMESTAMP, 51 | a.REPORTING_PERIOD_END_DATE, 52 | a.JOURNAL_REPORTING_PERIOD_END_DATE, 53 | a.DATE, 54 | a.RECORD_DATE, 55 | a.CODE, 56 | a.SENSITIVE_CODE, 57 | a.EPISODE_CONDITION, 58 | a.EPISODE_PRESCRIPTION, 59 | a.VALUE1_CONDITION, 60 | a.VALUE1_PRESCRIPTION, 61 | b.VALUE1_CONDITION AS VALUE2_CONDITION, -- replace with DBP value1 62 | b.VALUE1_PRESCRIPTION AS VALUE2_PRESCRIPTION, 63 | a.LINKS, 64 | a.ConceptId_Description, 65 | a.INITIAL_BREAKDOWN, 66 | a.TYPE_OF_JOURNAL 67 | FROM SBP_TABLE AS a 68 | INNER JOIN DBP_TABLE AS b 69 | ON a.NHS_NUMBER_DEID = b.NHS_NUMBER_DEID 70 | AND a.DATE = b.DATE 71 | AND a.PRACTICE = b.PRACTICE 72 | AND a.REPORTING_PERIOD_END_DATE = b.REPORTING_PERIOD_END_DATE 73 | AND a.JOURNAL_REPORTING_PERIOD_END_DATE = b.JOURNAL_REPORTING_PERIOD_END_DATE 74 | WHERE a.VALUE1_CONDITION IS NOT NULL 75 | AND b.VALUE1_CONDITION IS NOT NULL 76 | 77 | UNION 78 | 79 | SELECT * 80 | FROM ALL_BP_JOURNALS 81 | WHERE INITIAL_BREAKDOWN = "ALL" 82 | AND VALUE1_CONDITION IS NOT NULL 83 | AND VALUE2_CONDITION IS NOT NULL 84 | 85 | -- COMMAND ---------- 86 | 87 | CREATE OR REPLACE TEMPORARY VIEW ALL_NUMERIC_JOURNALS AS 88 | SELECT *, 89 | CASE 90 | WHEN ( 91 | VALUE1_CONDITION < 70 92 | OR VALUE1_CONDITION > 190 93 | OR VALUE2_CONDITION < 40 94 | OR VALUE2_CONDITION > 100 95 | ) THEN "Indeterminable" 96 | WHEN ( 97 | VALUE1_CONDITION >= 70 98 | AND VALUE1_CONDITION < 90 99 | AND VALUE2_CONDITION >= 40 100 | AND VALUE2_CONDITION < 60 101 | ) THEN "Low Blood Pressure" 102 | WHEN ( 103 | VALUE1_CONDITION >= 90 104 | AND VALUE1_CONDITION < 120 105 | AND VALUE2_CONDITION >= 40 106 | AND VALUE2_CONDITION < 80 107 | ) 108 | OR ( 109 | VALUE1_CONDITION >= 70 110 | AND VALUE1_CONDITION < 90 111 | AND VALUE2_CONDITION >= 60 112 | AND VALUE2_CONDITION < 80 113 | ) THEN "Ideal Blood Pressure" 114 | WHEN ( 115 | VALUE1_CONDITION >= 120 116 | AND VALUE1_CONDITION < 140 117 | AND VALUE2_CONDITION >= 40 118 | AND VALUE2_CONDITION < 90 119 | ) 120 | OR ( 121 | VALUE1_CONDITION >= 70 122 | AND VALUE1_CONDITION < 120 123 | AND VALUE2_CONDITION >= 80 124 | AND VALUE2_CONDITION < 90 125 | ) THEN "Pre-High Blood Pressure" 126 | WHEN ( 127 | VALUE1_CONDITION >= 140 128 | AND VALUE1_CONDITION <= 190 129 | AND VALUE2_CONDITION >= 40 130 | AND VALUE2_CONDITION <= 100 131 | ) 132 | OR ( 133 | VALUE1_CONDITION >= 70 134 | AND VALUE1_CONDITION < 140 135 | AND VALUE2_CONDITION >= 90 136 | AND VALUE2_CONDITION <= 100 137 | ) THEN "High Blood Pressure" 138 | WHEN VALUE1_CONDITION IS NULL 139 | AND VALUE2_CONDITION IS NULL THEN "Indeterminable" 140 | ELSE "Indeterminable" 141 | END AS FINAL_BP_CATS 142 | FROM 143 | NUMERIC_TABLE 144 | 145 | -- COMMAND ---------- 146 | 147 | -- MAGIC %md 148 | -- MAGIC ## NON NUMERIC JOURNALS 149 | 150 | -- COMMAND ---------- 151 | 152 | ---- Categorise SBP when only using the description in the code 153 | CREATE OR REPLACE TEMPORARY VIEW SBP_NULL_TABLE AS 154 | SELECT * , 155 | CASE WHEN TYPE_OF_JOURNAL = "Low-Finding" AND VALUE1_CONDITION IS NULL THEN "Low Systolic Blood Pressure" 156 | WHEN TYPE_OF_JOURNAL = "Ideal-Finding" AND VALUE1_CONDITION IS NULL THEN "Ideal Systolic Blood Pressure" 157 | WHEN TYPE_OF_JOURNAL = "Pre-High Finding" AND VALUE1_CONDITION IS NULL THEN "Pre-High Systolic Blood Pressure" 158 | WHEN TYPE_OF_JOURNAL = "High-Finding" AND VALUE1_CONDITION IS NULL THEN "High Systolic Blood Pressure" 159 | WHEN TYPE_OF_JOURNAL = "Indeterminable" THEN "Indeterminable" 160 | ELSE "Indeterminable" 161 | END AS SBP_CATS 162 | FROM SBP_TABLE 163 | WHERE VALUE1_CONDITION IS NULL 164 | 165 | -- COMMAND ---------- 166 | 167 | ---- Categorise DBP when only using the description in the code 168 | CREATE OR REPLACE TEMPORARY VIEW DBP_NULL_TABLE AS 169 | SELECT * , 170 | CASE WHEN TYPE_OF_JOURNAL = "Low-Finding" AND VALUE1_CONDITION IS NULL THEN "Low Diastolic Blood Pressure" 171 | WHEN TYPE_OF_JOURNAL = "Ideal-Finding" AND VALUE1_CONDITION IS NULL THEN "Ideal Diastolic Blood Pressure" 172 | WHEN TYPE_OF_JOURNAL = "Pre-High Finding" AND VALUE1_CONDITION IS NULL THEN "Pre-High Diastolic Blood Pressure" 173 | WHEN TYPE_OF_JOURNAL = "High-Finding" AND VALUE1_CONDITION IS NULL THEN "High Diastolic Blood Pressure" 174 | WHEN TYPE_OF_JOURNAL = "Indeterminable" THEN "Indeterminable" 175 | ELSE "Indeterminable" 176 | END AS DBP_CATS 177 | FROM DBP_TABLE 178 | WHERE VALUE1_CONDITION IS NULL 179 | 180 | -- COMMAND ---------- 181 | 182 | ---- Need the SBP & DBP journal records (assigned value on description) recorded at the same date, practice etc.. 183 | ---- Replace the SBP NULL value with the status assigned by the description 184 | ---- Replace the DBP NULL value with the status assigned by the description 185 | CREATE OR REPLACE TEMPORARY VIEW SBP_DBP_NULL_TABLE AS 186 | SELECT a.YEAR_OF_BIRTH, 187 | a.SEX, 188 | a.LSOA, 189 | a.YEAR_OF_DEATH, 190 | a.NHS_NUMBER_DEID, 191 | a.ETHNIC, 192 | a.PRACTICE, 193 | a.GP_SYSTEM_SUPPLIER, 194 | a.PROCESSED_TIMESTAMP, 195 | a.REPORTING_PERIOD_END_DATE, 196 | a.JOURNAL_REPORTING_PERIOD_END_DATE, 197 | a.DATE, 198 | a.RECORD_DATE, 199 | a.CODE, 200 | a.SENSITIVE_CODE, 201 | a.EPISODE_CONDITION, 202 | a.EPISODE_PRESCRIPTION, 203 | a.SBP_CATS AS VALUE1_CONDITION, 204 | a.VALUE1_PRESCRIPTION, 205 | b.DBP_CATS AS VALUE2_CONDITION, 206 | b.VALUE1_PRESCRIPTION AS VALUE2_PRESCRIPTION, 207 | a.LINKS, 208 | a.ConceptId_Description, 209 | a.INITIAL_BREAKDOWN, 210 | a.TYPE_OF_JOURNAL 211 | FROM SBP_NULL_TABLE AS a 212 | INNER JOIN DBP_NULL_TABLE AS b 213 | ON a.NHS_NUMBER_DEID = b.NHS_NUMBER_DEID 214 | AND a.DATE = b.DATE 215 | AND a.PRACTICE = b.PRACTICE 216 | AND a.REPORTING_PERIOD_END_DATE = b.REPORTING_PERIOD_END_DATE 217 | AND a.JOURNAL_REPORTING_PERIOD_END_DATE = b.JOURNAL_REPORTING_PERIOD_END_DATE 218 | 219 | -- COMMAND ---------- 220 | 221 | ---- Collate non-numeric journals 222 | CREATE OR REPLACE TEMPORARY VIEW ALL_JOURNALS_WITH_MATCHING_NULL AS 223 | 224 | SELECT * 225 | FROM All_BP_Journals 226 | WHERE INITIAL_BREAKDOWN = "ALL" 227 | AND VALUE1_CONDITION IS NULL 228 | AND VALUE2_CONDITION IS NULL 229 | 230 | UNION 231 | 232 | SELECT * 233 | FROM SBP_DBP_NULL_TABLE 234 | 235 | -- COMMAND ---------- 236 | 237 | CREATE OR REPLACE TEMPORARY VIEW ALL_JOURNALS_NHS_BP_LEVELS_NULL AS 238 | SELECT 239 | *, 240 | CASE 241 | WHEN INITIAL_BREAKDOWN = "ALL" 242 | AND TYPE_OF_JOURNAL = "Low-Finding" 243 | AND VALUE1_CONDITION IS NULL 244 | AND VALUE2_CONDITION IS NULL THEN "Low Blood Pressure" 245 | WHEN INITIAL_BREAKDOWN = "ALL" 246 | AND TYPE_OF_JOURNAL = "Ideal-Finding" 247 | AND VALUE1_CONDITION IS NULL 248 | AND VALUE2_CONDITION IS NULL THEN "Ideal Blood Pressure" 249 | WHEN INITIAL_BREAKDOWN = "ALL" 250 | AND TYPE_OF_JOURNAL = "Pre-High Finding" 251 | AND VALUE1_CONDITION IS NULL 252 | AND VALUE2_CONDITION IS NULL THEN "Pre-High Blood Pressure" 253 | WHEN INITIAL_BREAKDOWN = "ALL" 254 | AND TYPE_OF_JOURNAL = "High-Finding" 255 | AND VALUE1_CONDITION IS NULL 256 | AND VALUE2_CONDITION IS NULL THEN "High Blood Pressure" 257 | WHEN VALUE1_CONDITION = "Low Systolic Blood Pressure" 258 | AND VALUE2_CONDITION = "Low Diastolic Blood Pressure" THEN "Low Blood Pressure" 259 | WHEN ( 260 | VALUE1_CONDITION = "Ideal Systolic Blood Pressure" 261 | AND ( 262 | VALUE2_CONDITION = "Low Diastolic Blood Pressure" 263 | OR VALUE2_CONDITION = "Ideal Diastolic Blood Pressure" 264 | ) 265 | ) 266 | OR ( 267 | VALUE1_CONDITION = "Low Systolic Blood Pressure" 268 | AND VALUE2_CONDITION = "Ideal Diastolic Blood Pressure" 269 | ) THEN "Ideal Blood Pressure" 270 | WHEN ( 271 | VALUE1_CONDITION = "Pre-High Systolic Blood Pressure" 272 | AND ( 273 | VALUE2_CONDITION = "Low Diastolic Blood Pressure" 274 | OR VALUE2_CONDITION = "Ideal Diastolic Blood Pressure" 275 | OR VALUE2_CONDITION = "Pre-High Diastolic Blood Pressure" 276 | ) 277 | ) 278 | OR ( 279 | ( 280 | VALUE1_CONDITION = "Low Systolic Blood Pressure" 281 | OR VALUE1_CONDITION = "Ideal Systolic Blood Pressure" 282 | ) 283 | AND VALUE2_CONDITION = "Pre-High Diastolic Blood Pressure" 284 | ) THEN "Pre-High Blood Pressure" 285 | WHEN ( 286 | VALUE1_CONDITION = "High Systolic Blood Pressure" 287 | AND ( 288 | VALUE2_CONDITION = "Low Diastolic Blood Pressure" 289 | OR VALUE2_CONDITION = "Ideal Diastolic Blood Pressure" 290 | OR VALUE2_CONDITION = "Pre-High Diastolic Blood Pressure" 291 | OR VALUE2_CONDITION = "High Diastolic Blood Pressure" 292 | ) 293 | ) 294 | OR ( 295 | ( 296 | VALUE1_CONDITION = "Low Systolic Blood Pressure" 297 | OR VALUE1_CONDITION = "Ideal Systolic Blood Pressure" 298 | OR VALUE1_CONDITION = "Pre-High Systolic Blood Pressure" 299 | ) 300 | AND VALUE2_CONDITION = "High Diastolic Blood Pressure" 301 | ) THEN "High Blood Pressure" 302 | ELSE "Indeterminable" 303 | END AS FINAL_BP_CATS 304 | FROM 305 | ALL_JOURNALS_WITH_MATCHING_NULL 306 | 307 | -- COMMAND ---------- 308 | 309 | -- MAGIC %md 310 | -- MAGIC ## REMOVE INDETERMINABLE AND FIND MOST RECENT 311 | 312 | -- COMMAND ---------- 313 | 314 | CREATE OR REPLACE TEMPORARY VIEW ALL_JOURNALS_WITH_BP_STATUS AS 315 | 316 | SELECT * 317 | FROM ALL_NUMERIC_JOURNALS 318 | WHERE FINAL_BP_CATS != "Indeterminable" 319 | 320 | UNION 321 | 322 | SELECT * 323 | FROM ALL_JOURNALS_NHS_BP_LEVELS_NULL 324 | WHERE FINAL_BP_CATS != "Indeterminable" 325 | 326 | -- COMMAND ---------- 327 | 328 | ---- Get the most recent record 329 | CREATE OR REPLACE TEMPORARY VIEW MAX_DATE AS 330 | SELECT NHS_NUMBER_DEID 331 | , MAX(DATE) AS DATE 332 | FROM ALL_JOURNALS_WITH_BP_STATUS 333 | GROUP BY NHS_NUMBER_DEID 334 | 335 | -- COMMAND ---------- 336 | 337 | ---- full table of most recent records 338 | CREATE OR REPLACE TEMPORARY VIEW TABLE_MOST_RECENT_RECORDS AS 339 | SELECT a.* 340 | FROM ALL_JOURNALS_WITH_BP_STATUS AS a 341 | INNER JOIN MAX_DATE AS b 342 | ON a.NHS_NUMBER_DEID = b.NHS_NUMBER_DEID 343 | AND a.DATE = b.DATE 344 | 345 | -- COMMAND ---------- 346 | 347 | -- MAGIC %md 348 | -- MAGIC ## REMOVE CONFLICTS 349 | 350 | -- COMMAND ---------- 351 | 352 | ---- Need to remove those with conflicting BP status at their most recent record 353 | CREATE OR REPLACE TEMPORARY VIEW TABLE_JOURNAL_COUNTS AS 354 | SELECT NHS_NUMBER_DEID 355 | , COUNT(DISTINCT FINAL_BP_CATS) AS COUNT_CATEGORIES 356 | FROM TABLE_MOST_RECENT_RECORDS 357 | GROUP BY NHS_NUMBER_DEID 358 | 359 | -- COMMAND ---------- 360 | 361 | ---- Any individual with conflicting BP statuses at their most recent record is removed 362 | CREATE OR REPLACE TEMPORARY VIEW BP_MOST_RECENT_NO_CONFLICTS AS 363 | SELECT *, 364 | (YEAR(DATE)) - YEAR_OF_BIRTH AS AGE_AT_TIME_OF_JOURNAL -- add in age 365 | FROM TABLE_MOST_RECENT_RECORDS 366 | WHERE NHS_NUMBER_DEID IN (SELECT DISTINCT NHS_NUMBER_DEID FROM TABLE_JOURNAL_COUNTS WHERE COUNT_CATEGORIES = 1) -------------------------------------------------------------------------------- /Blood_pressure/Blood_pressure_codes_reference_data.csv: -------------------------------------------------------------------------------- 1 | ConceptId,ConceptId_Description,INITIAL_BREAKDOWN,TYPE_OF_JOURNAL 2 | 723237002,Non-invasive blood pressure (observable entity),ALL,OE 3 | 251076008,Non-invasive arterial pressure (observable entity),ALL,OE 4 | 407557002,Lying diastolic blood pressure (observable entity),DBP,OE 5 | 164783007,Ambulatory blood pressure recording (procedure),ALL,Procedure 6 | 314440001,Average systolic blood pressure (observable entity),SBP,OE 7 | 163024003,On examination - blood pressure borderline low (disorder),ALL,Ideal-Finding 8 | 386534000,Arterial blood pressure (observable entity),ALL,OE 9 | 163026001,On examination - blood pressure borderline raised (finding),ALL,Pre-High Finding 10 | 400974009,Standing systolic blood pressure (observable entity),SBP,OE 11 | 314449000,Average 24 hour systolic blood pressure (observable entity),SBP,OE 12 | 407555005,Sitting diastolic blood pressure (observable entity),DBP,OE 13 | 174255007,Non-invasive diastolic arterial pressure (observable entity),DBP,OE 14 | 314454009,Minimum day interval diastolic blood pressure (observable entity),DBP,OE 15 | 723236006,Minimum blood pressure (observable entity),ALL,OE 16 | 314455005,Minimum night interval diastolic blood pressure (observable entity),DBP,OE 17 | 163033001,Lying blood pressure (observable entity),ALL,OE 18 | 6797001,Mean blood pressure (observable entity),ALL,OE 19 | 170582005,On examination - check high blood pressure (finding),ALL,High-Finding 20 | 335661000000109,Self measured blood pressure reading (observable entity),ALL,OE 21 | 163020007,On examination - blood pressure reading (finding),ALL,Value-Needed 22 | 163035008,Sitting blood pressure (observable entity),ALL,OE 23 | 314462001,Average 24 hour diastolic blood pressure (observable entity),DBP,OE 24 | 81010002,Decreased systolic arterial pressure (finding),SBP,Low-Finding 25 | 716632005,Baseline diastolic blood pressure (observable entity),DBP,OE 26 | 163025002,On examination - blood pressure reading normal (finding),ALL,Ideal-Finding 27 | 314465004,24 hour diastolic blood pressure (observable entity),DBP,OE 28 | 75367002,Blood pressure (observable entity),ALL,OE 29 | 163031004,On examination - Diastolic blood pressure reading (finding),DBP,Value-Needed 30 | 314459004,Maximum 24 hour diastolic blood pressure (observable entity),DBP,OE 31 | 163027005,On examination - blood pressure reading raised (finding),ALL,High-Finding 32 | 163032006,On examination - blood pressure stable (finding),ALL,Value-Needed 33 | 313005002,On examination - blood pressure reading: no postural drop (situation),ALL,Situation 34 | 271650006,Diastolic blood pressure (observable entity),DBP,OE 35 | 314463006,24 hour blood pressure (observable entity),ALL,OE 36 | 163023009,On examination - blood pressure reading low (disorder),ALL,Low-Finding 37 | 24184005,Finding of increased blood pressure (finding),ALL,High-Finding 38 | 314444005,Maximum day interval systolic blood pressure (observable entity),SBP,OE 39 | 23154005,Increased diastolic arterial pressure (finding),DBP,High-Finding 40 | 198091000000104,Ambulatory diastolic blood pressure (observable entity),DBP,OE 41 | 314456006,Minimum 24 hour diastolic blood pressure (observable entity),DBP,OE 42 | 12929001,Normal systolic arterial pressure (finding),SBP,Ideal-Finding 43 | 251078009,Post-vasodilatation arterial pressure (observable entity),ALL,OE 44 | 271870002,Low blood pressure reading (disorder),ALL,Low-Finding 45 | 12763006,Finding of decreased blood pressure (finding),ALL,Low-Finding 46 | 314438006,Minimum systolic blood pressure (observable entity),SBP,OE 47 | 314464000,24 hour systolic blood pressure (observable entity),SBP,OE 48 | 314443004,Maximum night interval systolic blood pressure (observable entity),SBP,OE 49 | 314448008,Maximum 24 hour systolic blood pressure (observable entity),SBP,OE 50 | 18352002,Abnormal systolic arterial pressure (finding),SBP,Value-Needed 51 | 716579001,Baseline systolic blood pressure (observable entity),SBP,OE 52 | 314458007,Maximum day interval diastolic blood pressure (observable entity),DBP,OE 53 | 928021000000108,Baseline blood pressure (observable entity),ALL,OE 54 | 723235005,Maximum blood pressure (observable entity),ALL,OE 55 | 707303003,Post exercise systolic blood pressure response abnormal (finding),SBP,Indeterminable 56 | 18050000,Increased systolic arterial pressure (finding),SBP,High-Finding 57 | 163029008,On examination - blood pressure reading: postural drop (finding),ALL,Indeterminable 58 | 251070002,Non-invasive systolic arterial pressure (observable entity),SBP,OE 59 | 314457002,Maximum night interval diastolic blood pressure (observable entity),DBP,OE 60 | 49844009,Abnormal diastolic arterial pressure (finding),DBP,Value-Needed 61 | 53813002,Normal diastolic arterial pressure (finding),DBP,Ideal-Finding 62 | 314442009,Minimum night interval systolic blood pressure (observable entity),SBP,OE 63 | 251074006,Non-invasive mean arterial pressure (observable entity),ALL,OE 64 | 2004005,Normal blood pressure (finding),ALL,Ideal-Finding 65 | 413606001,Average home systolic blood pressure (observable entity),SBP,OE 66 | 852291000000105,Maximum mean blood pressure (observable entity),ALL,OE 67 | 314439003,Maximum systolic blood pressure (observable entity),SBP,OE 68 | 163037000,On examination - blood pressure labile (finding),ALL,Indeterminable 69 | 314445006,Average night interval systolic blood pressure (observable entity),SBP,OE 70 | 399304008,Systolic blood pressure on admission (observable entity),SBP,OE 71 | 42689008,Decreased diastolic arterial pressure (finding),DBP,Low-Finding 72 | 400975005,Standing diastolic blood pressure (observable entity),DBP,OE 73 | 314453003,Average diastolic blood pressure (observable entity),DBP,OE 74 | 170581003,On examination - initial high blood pressure (finding),ALL,High-Finding 75 | 314451001,Minimum diastolic blood pressure (observable entity),DBP,OE 76 | 386536003,Systemic blood pressure (observable entity),ALL,OE 77 | 1091811000000102,Diastolic arterial pressure (observable entity),DBP,OE 78 | 364090009,Systemic arterial pressure (observable entity),ALL,OE 79 | 163036009,On examination - blood pressure decreased (finding),ALL,Low-Finding 80 | 163022004,On examination - blood pressure reading very low (disorder),ALL,Low-Finding 81 | 723232008,Average blood pressure (observable entity),ALL,OE 82 | 314452008,Maximum diastolic blood pressure (observable entity),DBP,OE 83 | 26014004,Increased mean arterial pressure (finding),ALL,High-Finding 84 | 72313002,Systolic arterial pressure (observable entity),SBP,OE 85 | 163028000,On examination - blood pressure reading very high (finding),ALL,High-Finding 86 | 852301000000109,Minimum mean blood pressure (observable entity),ALL,OE 87 | 314461008,Average day interval diastolic blood pressure (observable entity),DBP,OE 88 | 413605002,Average home diastolic blood pressure (observable entity),DBP,OE 89 | 198081000000101,Ambulatory systolic blood pressure (observable entity),SBP,OE 90 | 271649006,Systolic blood pressure (observable entity),SBP,OE 91 | 314460009,Average night interval diastolic blood pressure (observable entity),DBP,OE 92 | 314447003,Minimum 24 hour systolic blood pressure (observable entity),SBP,OE 93 | 407556006,Lying systolic blood pressure (observable entity),SBP,OE 94 | 314441002,Minimum day interval systolic blood pressure (observable entity),SBP,OE 95 | 163030003,On examination - Systolic blood pressure reading (finding),SBP,Value-Needed 96 | 314446007,Average day interval systolic blood pressure (observable entity),SBP,OE 97 | 1105331000000106,Blood pressure measurement using oscillometric monitoring device with opportunistic atrial fibrillation detection (procedure),ALL,ERROR 98 | 163034007,Standing blood pressure (observable entity),ALL,OE 99 | 407554009,Sitting systolic blood pressure (observable entity),SBP,OE 100 | -------------------------------------------------------------------------------- /Blood_pressure/README.md: -------------------------------------------------------------------------------- 1 | # BACKGROUND 2 | Blood pressure status may be of interest in relation to the COVID-19 pandemic therefore NHS Digital have written some code to understand the quality, coverage, and distribution of blood pressure status recording within the GDPPR dataset. 3 | 4 | Blood pressure status can be recorded via several methods within GDPPR, all of which use SNOMED codes within patient journal tables: 5 | 6 | 1. SNOMED codes for blood pressure status (no values) e.g. code with description of ‘On examination - blood pressure reading very high (finding)’ 7 | 2. SNOMED codes for either systolic or diastolic blood pressure status (no values) e.g. code with description of ‘Normal diastolic arterial pressure (finding)’ - these need joining to calculate blood pressure status 8 | 3. SNOMED codes for blood pressure with **2** associated values e.g. code for ‘Blood pressure (observable entity)’ with associated values of 120 and 80 9 | 4. SNOMED codes for either systolic or diastolic blood pressure with **1** associate value e.g. 'Diastolic blood pressure (observable entity)' with an associated value of 80 - these need joining to calculate blood pressure status 10 | 11 | # METHODOLOGY 12 | 13 | a) All blood pressure SNOMED codes are categorised in the reference data based on previous analyses. The Cluster ID's used to create this reference data are 'BP_COD', 'ABPM_COD' and 'NDABP_COD'. _Please note it is possible that further blood pressure clusters may be created and/or blood pressure codes may be added to the GDPPR extract and these would need categorising._ 14 | 15 | b) For method 1, journals are assigned the approproate blood pressure status according to the SNOMED code description 16 | 17 | c) For method 2, journals are joined based on NHS number, date etc. to ensure readings were taken on the same day and are then assigned an [NHS data dictionary blood pressure status](https://www.nhs.uk/conditions/high-blood-pressure-hypertension/) 18 | 19 | d) For method 3, journals assigned an [NHS data dictionary blood pressure status](https://www.nhs.uk/conditions/high-blood-pressure-hypertension/) 20 | 21 | e) For method 4, journals are joined based on NHS number, date etc. to ensure readings were taken on the same day and are then assigned an [NHS data dictionary blood pressure status](https://www.nhs.uk/conditions/high-blood-pressure-hypertension/) 22 | 23 | f) Journals with an indeterminable blood pressure status are removed and the most recent blood pressure status is selected for each patient. 24 | 25 | # NOTES 26 | 27 | GDPPR data = ```gdppr_database.gdppr_table``` 28 | 29 | blood pressure snomed code reference data = ```ref_data.blood_pressure_ref_data``` 30 | -------------------------------------------------------------------------------- /Ethnic_Category/Ethnicity_GDPPR+HES_Modal.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | -- MAGIC %md 3 | -- MAGIC # 5 ORIGINAL TABLES 4 | 5 | -- COMMAND ---------- 6 | 7 | -- MAGIC %md 8 | -- MAGIC ## GDPPR 9 | 10 | -- COMMAND ---------- 11 | 12 | -- MAGIC %md 13 | -- MAGIC JOURNALS 14 | 15 | -- COMMAND ---------- 16 | 17 | -- MAGIC %py 18 | -- MAGIC # create spark df of dataset 19 | -- MAGIC data = spark.table('gdppr_database.gdppr_table') 20 | -- MAGIC 21 | -- MAGIC # create spark df of snomed ethnicity reference data 22 | -- MAGIC ethnicity_ref_data = spark.table('reference_database.gdppr_ethnicity_mappings_table') 23 | -- MAGIC 24 | -- MAGIC # join nhs data dictionary ethnic groups onto ethnicity snomed journals 25 | -- MAGIC data_join = data.join(ethnicity_ref_data, data.CODE == ethnicity_ref_data.ConceptId,how='left') 26 | -- MAGIC 27 | -- MAGIC # import functions 28 | -- MAGIC from pyspark.sql.functions import col 29 | -- MAGIC 30 | -- MAGIC ## remove non ethnicity snomed codes i.e. nulls 31 | -- MAGIC snomed_no_null = data_join.where(col("PrimaryCode").isNotNull()) 32 | -- MAGIC 33 | -- MAGIC # remove unknown snomed ethnicity - need to change to is in list but can't work it out in pyspark yet 34 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "Z") 35 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "z") 36 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "X") 37 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "x") 38 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "99") 39 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "9") 40 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "") 41 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != " ") 42 | -- MAGIC 43 | -- MAGIC # turn into temp view 44 | -- MAGIC snomed_no_null.createOrReplaceTempView('gdppr_ethnicity_journals') 45 | 46 | -- COMMAND ---------- 47 | 48 | -- Group by nhs number, record date, and ethnicity to get one journal per ethnicity, per date, per patient (removes duplicate journals) 49 | CREATE OR REPLACE TEMPORARY VIEW JOURNAL_GROUPBY AS 50 | SELECT NHS_NUMBER 51 | , RECORD_DATE 52 | , PrimaryCode AS ETHNICITY 53 | , COUNT(*) AS COUNT_RECORDS 54 | FROM gdppr_ethnicity_journals 55 | GROUP BY NHS_NUMBER 56 | , RECORD_DATE 57 | , PrimaryCode 58 | 59 | -- MAGIC %md 60 | -- MAGIC PATIENT 61 | 62 | -- COMMAND ---------- 63 | 64 | -- MAGIC %py 65 | -- MAGIC # import functions 66 | -- MAGIC from pyspark.sql.functions import col 67 | -- MAGIC 68 | -- MAGIC ## remove blank ethnic field 69 | -- MAGIC field_no_null = data_join.filter(data_join.ETHNIC != "") 70 | -- MAGIC 71 | -- MAGIC # remove unknowns - not stated 72 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "Z") 73 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "z") 74 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "X") 75 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "x") 76 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "99") 77 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "9") 78 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "") 79 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != " ") 80 | -- MAGIC 81 | -- MAGIC ## remove null ethnic field 82 | -- MAGIC field_no_null = field_no_null.where(col("ETHNIC").isNotNull()) 83 | -- MAGIC 84 | -- MAGIC # turn into temp view 85 | -- MAGIC field_no_null.createOrReplaceTempView('gdppr_ethnicity_patients') 86 | 87 | -- COMMAND ---------- 88 | 89 | -- Group by nhs number, reporting period end date, and ethnic field to get one ethnicity per date, per patient (removes duplicates) 90 | CREATE OR REPLACE TEMPORARY VIEW PATIENT_GROUPBY AS 91 | SELECT NHS_NUMBER 92 | , REPORTING_PERIOD_END_DATE 93 | , ETHNIC AS ETHNICITY 94 | , COUNT(*) AS COUNT_RECORDS 95 | FROM gdppr_ethnicity_patients 96 | GROUP BY NHS_NUMBER 97 | , REPORTING_PERIOD_END_DATE 98 | , ETHNIC 99 | 100 | -- MAGIC %md 101 | -- MAGIC ## HES 102 | 103 | -- COMMAND ---------- 104 | 105 | -- MAGIC %python 106 | -- MAGIC 107 | -- MAGIC ## GET THE CURRENT YEAR OF HES AND PREVIOUS 5 YEARS 108 | -- MAGIC 109 | -- MAGIC # import necessary packages/functions 110 | -- MAGIC from pyspark.sql.functions import expr, col, lit, concat, regexp_replace, upper, split, regexp_extract 111 | -- MAGIC 112 | -- MAGIC # select years 113 | -- MAGIC years = [row.year for row in spark.sql("SHOW TABLES IN sensitive_hes").filter(col("isTemporary")==False).withColumn("yeary", split(col("tableName"), '_')[2]).withColumn("year", regexp_extract(col("yeary"),"([0-2][0-9]{3})", 1)).select("year").filter(col("year") != '').distinct().sort(col("year").desc()).limit(6).collect()] 114 | -- MAGIC 115 | -- MAGIC # print years for visual check 116 | -- MAGIC print(years) 117 | 118 | -- COMMAND ---------- 119 | 120 | -- MAGIC %md 121 | -- MAGIC OP 122 | 123 | -- COMMAND ---------- 124 | 125 | -- MAGIC %python 126 | -- MAGIC # create 5 years of HES OP ethnicity records 127 | -- MAGIC stmt = [] 128 | -- MAGIC for year in years: 129 | -- MAGIC stmt.append(f"""SELECT a.NEWNHSNO, b.APPTDATE, b.ETHNOS FROM sensitive_hes.hes_op_{year} AS a INNER JOIN hes.hes_op_{year} AS b ON a.ATTENDKEY = b.ATTENDKEY WHERE b.CR_GP_PRACTICE NOT IN ('Y','Q99')""") 130 | -- MAGIC 131 | -- MAGIC # create temp table 132 | -- MAGIC spark.sql('CREATE OR REPLACE TEMPORARY VIEW OP_5_YEAR AS ' + ' UNION '.join(stmt)) 133 | 134 | 135 | -- MAGIC %md 136 | -- MAGIC AE 137 | 138 | -- COMMAND ---------- 139 | 140 | -- MAGIC %python 141 | -- MAGIC # create 5 years of HES AE ethnicity records 142 | -- MAGIC stmt = [] 143 | -- MAGIC for year in years: 144 | -- MAGIC stmt.append(f"""SELECT a.NEWNHSNO 145 | -- MAGIC , b.ARRIVALDATE 146 | -- MAGIC , b.ETHNOS 147 | -- MAGIC FROM sensitive_hes.hes_ae_{year} AS a 148 | -- MAGIC LEFT JOIN hes.hes_ae_{year} AS b 149 | -- MAGIC ON a.AEKEY = b.AEKEY 150 | -- MAGIC WHERE b.CR_GP_PRACTICE NOT IN ('Y','Q99')""") 151 | -- MAGIC 152 | -- MAGIC # create temp table 153 | -- MAGIC spark.sql('CREATE OR REPLACE TEMPORARY VIEW AE_5_YEAR AS ' + ' UNION '.join(stmt)) 154 | 155 | -- MAGIC %md 156 | -- MAGIC APC 157 | 158 | -- COMMAND ---------- 159 | 160 | -- MAGIC %python 161 | -- MAGIC # create 5 years of HES APC ethnicity records 162 | -- MAGIC stmt = [] 163 | -- MAGIC for year in years: 164 | -- MAGIC stmt.append(f"""SELECT a.NEWNHSNO 165 | -- MAGIC , b.EPIEND 166 | -- MAGIC , b.ETHNOS 167 | -- MAGIC FROM sensitive_hes.hes_apc_{year} AS a 168 | -- MAGIC LEFT JOIN hes.hes_apc_{year} AS b 169 | -- MAGIC ON a.EPIKEY = b.EPIKEY 170 | -- MAGIC WHERE b.CR_GP_PRACTICE NOT IN ('Y','Q99')""") 171 | -- MAGIC 172 | -- MAGIC # create temp table 173 | -- MAGIC spark.sql('CREATE OR REPLACE TEMPORARY VIEW APC_5_YEAR AS ' + ' UNION '.join(stmt)) 174 | 175 | -- COMMAND ---------- 176 | 177 | -- MAGIC %md 178 | -- MAGIC # APPEND AND FIND MOST RECENT OF ALL 179 | 180 | -- COMMAND ---------- 181 | 182 | -- MAGIC %md 183 | -- MAGIC # MODAL 184 | 185 | -- COMMAND ---------- 186 | 187 | -- append gdppr and hes ethnicity records together 188 | CREATE OR REPLACE TEMPORARY VIEW ALL_SOURCES_ETHNICITY_JOINED AS 189 | 190 | SELECT NHS_NUMBER 191 | , ETHNICITY 192 | , "Journal" AS SOURCE 193 | FROM JOURNAL_GROUPBY 194 | 195 | UNION 196 | 197 | SELECT NHS_NUMBER 198 | , ETHNICITY 199 | , "Patient" AS SOURCE 200 | FROM PATIENT_GROUPBY 201 | 202 | UNION 203 | 204 | SELECT NEWNHSNO AS NHS_NUMBER 205 | , ETHNOS AS ETHNICITY 206 | , "OP" AS SOURCE 207 | FROM OP_5_YEAR 208 | WHERE ETHNOS NOT IN ("Z", "z", "X", "x", "99", "9", "", " ") 209 | AND ETHNOS IS NOT NULL 210 | 211 | UNION 212 | 213 | SELECT NEWNHSNO AS NHS_NUMBER 214 | , ETHNOS AS ETHNICITY 215 | , "AE" AS SOURCE 216 | FROM AE_5_YEAR 217 | WHERE ETHNOS NOT IN ("Z", "z", "X", "x", "99", "9", "", " ") 218 | AND ETHNOS IS NOT NULL 219 | 220 | UNION 221 | 222 | SELECT NEWNHSNO AS NHS_NUMBER 223 | , ETHNOS AS ETHNICITY 224 | , "APC" AS SOURCE 225 | FROM APC_5_YEAR 226 | WHERE ETHNOS NOT IN ("Z", "z", "X", "x", "99", "9", "", " ") 227 | AND ETHNOS IS NOT NULL 228 | 229 | -- COMMAND ---------- 230 | 231 | -- count journals per ethnicity per patient 232 | CREATE OR REPLACE TEMPORARY VIEW JOURNALS_PER_ETHNICITY AS 233 | SELECT NHS_NUMBER 234 | , ETHNICITY 235 | , COUNT(*) AS COUNT 236 | FROM ALL_SOURCES_ETHNICITY_JOINED 237 | GROUP BY NHS_NUMBER 238 | , ETHNICITY 239 | 240 | -- COMMAND ---------- 241 | 242 | -- get max count per patient 243 | CREATE OR REPLACE TEMPORARY VIEW MAX_COUNT_ETHNICITY AS 244 | SELECT NHS_NUMBER 245 | , MAX(COUNT) AS MAX_COUNT 246 | FROM JOURNALS_PER_ETHNICITY 247 | GROUP BY NHS_NUMBER 248 | 249 | -- COMMAND ---------- 250 | 251 | -- get the modal ethnicity per patient (may still include duplicates for those with more than one modal ethnic cat) 252 | CREATE OR REPLACE TEMPORARY VIEW MODAL_ETHNICITY AS 253 | SELECT a.* 254 | FROM JOURNALS_PER_ETHNICITY AS a 255 | INNER JOIN MAX_COUNT_ETHNICITY AS b 256 | ON a.NHS_NUMBER = b.NHS_NUMBER 257 | AND a.COUNT = b.MAX_COUNT 258 | 259 | -- COMMAND ---------- 260 | 261 | -- MAGIC %md 262 | -- MAGIC ### REMOVE DUPLICATES 263 | 264 | -- COMMAND ---------- 265 | 266 | -- count ethnicities per NHS number as any with more than one need to be nulled and removed (for now) 267 | CREATE OR REPLACE TEMPORARY VIEW DUPLICATE_ETHNICITY_NHSNUM_MODAL AS 268 | SELECT NHS_NUMBER 269 | , COUNT(DISTINCT ETHNICITY) AS COUNT_ETHNICITIES 270 | FROM MODAL_ETHNICITY 271 | GROUP BY NHS_NUMBER 272 | 273 | -- COMMAND ---------- 274 | 275 | /* ########################################################## 276 | ########### THIS IS THE FINAL TABLE ####################### 277 | ############################################################# */ 278 | 279 | -- select only NHS numbers with one modal ethnicity i.e. remove patients who have more than one 280 | CREATE OR REPLACE TEMPORARY VIEW ETHNICITY_ASSET_MODAL AS 281 | SELECT NHS_NUMBER 282 | , ETHNICITY AS ETHNIC_CATEGORY_CODE 283 | FROM MODAL_ETHNICITY 284 | WHERE NHS_NUMBER IN (SELECT NHS_NUMBER FROM DUPLICATE_ETHNICITY_NHSNUM_MODAL WHERE COUNT_ETHNICITIES = 1) -------------------------------------------------------------------------------- /Ethnic_Category/Ethnicity_GDPPR+HES_Recent.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | -- MAGIC %md 3 | -- MAGIC # 5 ORIGINAL TABLES 4 | 5 | -- COMMAND ---------- 6 | 7 | -- MAGIC %md 8 | -- MAGIC ## GDPPR 9 | 10 | -- COMMAND ---------- 11 | 12 | -- MAGIC %md 13 | -- MAGIC JOURNALS 14 | 15 | -- COMMAND ---------- 16 | 17 | -- MAGIC %py 18 | -- MAGIC # create spark df of dataset 19 | -- MAGIC data = spark.table('gdppr_database.gdppr_table') 20 | -- MAGIC 21 | -- MAGIC # create spark df of snomed ethnicity reference data 22 | -- MAGIC ethnicity_ref_data = spark.table('reference_database.gdppr_ethnicity_mappings_table') 23 | -- MAGIC 24 | -- MAGIC # join nhs data dictionary ethnic groups onto ethnicity snomed journals 25 | -- MAGIC data_join = data.join(ethnicity_ref_data, data.CODE == ethnicity_ref_data.ConceptId,how='left') 26 | -- MAGIC 27 | -- MAGIC # import functions 28 | -- MAGIC from pyspark.sql.functions import col 29 | -- MAGIC 30 | -- MAGIC ## remove non ethnicity snomed codes i.e. nulls 31 | -- MAGIC snomed_no_null = data_join.where(col("PrimaryCode").isNotNull()) 32 | -- MAGIC 33 | -- MAGIC # remove unknown snomed ethnicity - need to change to is in list but can't work it out in pyspark yet 34 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "Z") 35 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "z") 36 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "X") 37 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "x") 38 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "99") 39 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "9") 40 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "") 41 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != " ") 42 | -- MAGIC 43 | -- MAGIC # turn into temp view 44 | -- MAGIC snomed_no_null.createOrReplaceTempView('gdppr_ethnicity_journals') 45 | 46 | -- COMMAND ---------- 47 | 48 | -- Group by nhs number, record date, and ethnicity to get one journal per ethnicity, per date, per patient (removes duplicate journals) 49 | CREATE OR REPLACE TEMPORARY VIEW JOURNAL_GROUPBY AS 50 | SELECT NHS_NUMBER 51 | , RECORD_DATE 52 | , PrimaryCode AS ETHNICITY 53 | , COUNT(*) AS COUNT_RECORDS 54 | FROM gdppr_ethnicity_journals 55 | GROUP BY NHS_NUMBER 56 | , RECORD_DATE 57 | , PrimaryCode 58 | 59 | -- COMMAND ---------- 60 | 61 | -- get most recent record date per patient 62 | CREATE OR REPLACE TEMPORARY VIEW RECENT_JOURNAL_DATE AS 63 | SELECT NHS_NUMBER 64 | , MAX(RECORD_DATE) AS RECENT_ETHNICITY_DATE 65 | FROM JOURNAL_GROUPBY 66 | GROUP BY NHS_NUMBER 67 | 68 | -- COMMAND ---------- 69 | 70 | -- get most recent ethnicity journal for each patient 71 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_JOURNAL_ETHNICITY AS 72 | SELECT a.NHS_NUMBER 73 | , a.ETHNICITY 74 | , a.RECORD_DATE AS RECORDED_DATE 75 | , "GDPPR_JOURNAL" AS SOURCE 76 | , 1 AS PRIORITY 77 | FROM JOURNAL_GROUPBY AS a 78 | INNER JOIN RECENT_JOURNAL_DATE AS b 79 | ON a.NHS_NUMBER = b.NHS_NUMBER 80 | AND a.RECORD_DATE = b.RECENT_ETHNICITY_DATE 81 | 82 | -- COMMAND ---------- 83 | 84 | -- MAGIC %md 85 | -- MAGIC PATIENT 86 | 87 | -- COMMAND ---------- 88 | 89 | -- MAGIC %py 90 | -- MAGIC # import functions 91 | -- MAGIC from pyspark.sql.functions import col 92 | -- MAGIC 93 | -- MAGIC ## remove blank ethnic field 94 | -- MAGIC field_no_null = data_join.filter(data_join.ETHNIC != "") 95 | -- MAGIC 96 | -- MAGIC # remove unknowns - not stated 97 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "Z") 98 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "z") 99 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "X") 100 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "x") 101 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "99") 102 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "9") 103 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "") 104 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != " ") 105 | -- MAGIC 106 | -- MAGIC ## remove null ethnic field 107 | -- MAGIC field_no_null = field_no_null.where(col("ETHNIC").isNotNull()) 108 | -- MAGIC 109 | -- MAGIC # turn into temp view 110 | -- MAGIC field_no_null.createOrReplaceTempView('gdppr_ethnicity_patients') 111 | 112 | -- COMMAND ---------- 113 | 114 | -- Group by nhs number, reporting period end date, and ethnic field to get one ethnicity per date, per patient (removes duplicates) 115 | CREATE OR REPLACE TEMPORARY VIEW PATIENT_GROUPBY AS 116 | SELECT NHS_NUMBER 117 | , REPORTING_PERIOD_END_DATE 118 | , ETHNIC AS ETHNICITY 119 | , COUNT(*) AS COUNT_RECORDS 120 | FROM gdppr_ethnicity_patients 121 | GROUP BY NHS_NUMBER 122 | , REPORTING_PERIOD_END_DATE 123 | , ETHNIC 124 | 125 | -- COMMAND ---------- 126 | 127 | CREATE OR REPLACE TEMPORARY VIEW PATIENT_RECENT_DATE AS 128 | SELECT NHS_NUMBER 129 | , MAX(REPORTING_PERIOD_END_DATE) AS RECENT_ETHNICITY_DATE 130 | FROM PATIENT_GROUPBY 131 | GROUP BY NHS_NUMBER 132 | 133 | -- COMMAND ---------- 134 | 135 | -- get most recent ethnicity for each patient 136 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_PATIENT_ETHNICITY AS 137 | SELECT a.NHS_NUMBER 138 | , a.ETHNICITY 139 | , a.REPORTING_PERIOD_END_DATE AS RECORDED_DATE 140 | , "GDPPR_PATIENT" AS SOURCE 141 | , 2 AS PRIORITY 142 | FROM PATIENT_GROUPBY AS a 143 | INNER JOIN PATIENT_RECENT_DATE AS b 144 | ON a.NHS_NUMBER = b.NHS_NUMBER 145 | AND a.REPORTING_PERIOD_END_DATE = b.RECENT_ETHNICITY_DATE 146 | 147 | -- COMMAND ---------- 148 | 149 | -- MAGIC %md 150 | -- MAGIC ## HES 151 | 152 | -- COMMAND ---------- 153 | 154 | -- MAGIC %python 155 | -- MAGIC 156 | -- MAGIC ## GET THE CURRENT YEAR OF HES AND PREVIOUS 5 YEARS 157 | -- MAGIC 158 | -- MAGIC # import necessary packages/functions 159 | -- MAGIC from pyspark.sql.functions import expr, col, lit, concat, regexp_replace, upper, split, regexp_extract 160 | -- MAGIC 161 | -- MAGIC # select years 162 | -- MAGIC years = [row.year for row in spark.sql("SHOW TABLES IN sensitive_hes").filter(col("isTemporary")==False).withColumn("yeary", split(col("tableName"), '_')[2]).withColumn("year", regexp_extract(col("yeary"),"([0-2][0-9]{3})", 1)).select("year").filter(col("year") != '').distinct().sort(col("year").desc()).limit(6).collect()] 163 | -- MAGIC 164 | -- MAGIC # print years for visual check 165 | -- MAGIC print(years) 166 | 167 | -- COMMAND ---------- 168 | 169 | -- MAGIC %md 170 | -- MAGIC OP 171 | 172 | -- COMMAND ---------- 173 | 174 | -- MAGIC %python 175 | -- MAGIC # create 5 years of HES OP ethnicity records 176 | -- MAGIC stmt = [] 177 | -- MAGIC for year in years: 178 | -- MAGIC stmt.append(f"""SELECT a.NEWNHSNO, b.APPTDATE, b.ETHNOS FROM sensitive_hes.hes_op_{year} AS a INNER JOIN hes.hes_op_{year} AS b ON a.ATTENDKEY = b.ATTENDKEY WHERE b.CR_GP_PRACTICE NOT IN ('Y','Q99')""") 179 | -- MAGIC 180 | -- MAGIC # create temp table 181 | -- MAGIC spark.sql('CREATE OR REPLACE TEMPORARY VIEW OP_5_YEAR AS ' + ' UNION '.join(stmt)) 182 | 183 | -- COMMAND ---------- 184 | 185 | -- remove unknown/blank/null ethnicities 186 | CREATE OR REPLACE TEMPORARY VIEW HES_OP_NO_UNKNOWN AS 187 | SELECT NEWNHSNO AS NHS_NUMBER 188 | , APPTDATE 189 | , ETHNOS AS ETHNICITY 190 | FROM OP_5_YEAR 191 | WHERE ETHNOS NOT IN ("Z", "z", "X", "x", "99", "9", "", " ") 192 | AND ETHNOS IS NOT NULL 193 | 194 | -- COMMAND ---------- 195 | 196 | -- Group by nhs number, appt date, and ethnos field to get one ethnicity per date, per patient (removes duplicates e.g. several appts in one day) 197 | CREATE OR REPLACE TEMPORARY VIEW OP_GROUPBY AS 198 | SELECT NHS_NUMBER 199 | , APPTDATE 200 | , ETHNICITY 201 | , COUNT(*) AS COUNT_RECORDS 202 | FROM HES_OP_NO_UNKNOWN 203 | GROUP BY NHS_NUMBER 204 | , APPTDATE 205 | , ETHNICITY 206 | 207 | -- COMMAND ---------- 208 | 209 | -- get most recent date per patient 210 | CREATE OR REPLACE TEMPORARY VIEW OP_RECENT_DATE AS 211 | SELECT NHS_NUMBER 212 | , MAX(APPTDATE) AS RECENT_ETHNICITY_DATE 213 | FROM OP_GROUPBY 214 | GROUP BY NHS_NUMBER 215 | 216 | -- COMMAND ---------- 217 | 218 | -- get most recent ethnicity for each patient 219 | CREATE OR REPLACE TEMPORARY VIEW HES_OP_ETHNICITY AS 220 | SELECT a.NHS_NUMBER 221 | , a.ETHNICITY 222 | , a.APPTDATE AS RECORDED_DATE 223 | , "HES_OP" AS SOURCE 224 | , 5 AS PRIORITY 225 | FROM OP_GROUPBY AS a 226 | INNER JOIN OP_RECENT_DATE AS b 227 | ON a.NHS_NUMBER = b.NHS_NUMBER 228 | AND a.APPTDATE = b.RECENT_ETHNICITY_DATE 229 | 230 | -- COMMAND ---------- 231 | 232 | -- MAGIC %md 233 | -- MAGIC AE 234 | 235 | -- COMMAND ---------- 236 | 237 | -- MAGIC %python 238 | -- MAGIC # create 5 years of HES AE ethnicity records 239 | -- MAGIC stmt = [] 240 | -- MAGIC for year in years: 241 | -- MAGIC stmt.append(f"""SELECT a.NEWNHSNO 242 | -- MAGIC , b.ARRIVALDATE 243 | -- MAGIC , b.ETHNOS 244 | -- MAGIC FROM sensitive_hes.hes_ae_{year} AS a 245 | -- MAGIC LEFT JOIN hes.hes_ae_{year} AS b 246 | -- MAGIC ON a.AEKEY = b.AEKEY 247 | -- MAGIC WHERE b.CR_GP_PRACTICE NOT IN ('Y','Q99')""") 248 | -- MAGIC 249 | -- MAGIC # create temp table 250 | -- MAGIC spark.sql('CREATE OR REPLACE TEMPORARY VIEW AE_5_YEAR AS ' + ' UNION '.join(stmt)) 251 | 252 | -- COMMAND ---------- 253 | 254 | -- remove unknown/blank/null ethnicities 255 | CREATE OR REPLACE TEMPORARY VIEW HES_AE_NO_UNKNOWN AS 256 | SELECT NEWNHSNO AS NHS_NUMBER 257 | , ARRIVALDATE 258 | , ETHNOS AS ETHNICITY 259 | FROM AE_5_YEAR 260 | WHERE ETHNOS NOT IN ("Z", "z", "X", "x", "99", "9", "", " ") 261 | AND ETHNOS IS NOT NULL 262 | 263 | -- COMMAND ---------- 264 | 265 | -- Group by nhs number, arrival date, and ethnos field to get one ethnicity per date, per patient (removes duplicates e.g. several a+e trips in one day - unlikely?) 266 | CREATE OR REPLACE TEMPORARY VIEW AE_GROUPBY AS 267 | SELECT NHS_NUMBER 268 | , ARRIVALDATE 269 | , ETHNICITY 270 | , COUNT(*) AS COUNT_RECORDS 271 | FROM HES_AE_NO_UNKNOWN 272 | GROUP BY NHS_NUMBER 273 | , ARRIVALDATE 274 | , ETHNICITY 275 | 276 | -- COMMAND ---------- 277 | 278 | -- get most recent date per patient 279 | CREATE OR REPLACE TEMPORARY VIEW AE_RECENT_DATE AS 280 | SELECT NHS_NUMBER 281 | , MAX(ARRIVALDATE) AS RECENT_ETHNICITY_DATE 282 | FROM AE_GROUPBY 283 | GROUP BY NHS_NUMBER 284 | 285 | -- COMMAND ---------- 286 | 287 | -- get most recent ethnicity for each patient 288 | CREATE OR REPLACE TEMPORARY VIEW HES_AE_ETHNICITY AS 289 | SELECT a.NHS_NUMBER 290 | , a.ETHNICITY 291 | , a.ARRIVALDATE AS RECORDED_DATE 292 | , "HES_AE" AS SOURCE 293 | , 4 AS PRIORITY 294 | FROM AE_GROUPBY AS a 295 | INNER JOIN AE_RECENT_DATE AS b 296 | ON a.NHS_NUMBER = b.NHS_NUMBER 297 | AND a.ARRIVALDATE = b.RECENT_ETHNICITY_DATE 298 | 299 | -- COMMAND ---------- 300 | 301 | -- MAGIC %md 302 | -- MAGIC APC 303 | 304 | -- COMMAND ---------- 305 | 306 | -- MAGIC %python 307 | -- MAGIC # create 5 years of HES APC ethnicity records 308 | -- MAGIC stmt = [] 309 | -- MAGIC for year in years: 310 | -- MAGIC stmt.append(f"""SELECT a.NEWNHSNO 311 | -- MAGIC , b.EPIEND 312 | -- MAGIC , b.ETHNOS 313 | -- MAGIC FROM sensitive_hes.hes_apc_{year} AS a 314 | -- MAGIC LEFT JOIN hes.hes_apc_{year} AS b 315 | -- MAGIC ON a.EPIKEY = b.EPIKEY 316 | -- MAGIC WHERE b.CR_GP_PRACTICE NOT IN ('Y','Q99')""") 317 | -- MAGIC 318 | -- MAGIC # create temp table 319 | -- MAGIC spark.sql('CREATE OR REPLACE TEMPORARY VIEW APC_5_YEAR AS ' + ' UNION '.join(stmt)) 320 | 321 | -- COMMAND ---------- 322 | 323 | -- remove unknown/blank/null ethnicities 324 | CREATE OR REPLACE TEMPORARY VIEW HES_APC_NO_UNKNOWN AS 325 | SELECT NEWNHSNO AS NHS_NUMBER 326 | , EPIEND 327 | , ETHNOS AS ETHNICITY 328 | FROM APC_5_YEAR 329 | WHERE ETHNOS NOT IN ("Z", "z", "X", "x", "99", "9", "", " ") 330 | AND ETHNOS IS NOT NULL 331 | 332 | -- COMMAND ---------- 333 | 334 | -- Group by nhs number, episode end date, and ethnos field to get one ethnicity per date, per patient (removes duplicates e.g. several admittals in one day - unlikely?) 335 | CREATE OR REPLACE TEMPORARY VIEW APC_GROUPBY AS 336 | SELECT NHS_NUMBER 337 | , EPIEND 338 | , ETHNICITY 339 | , COUNT(*) AS COUNT_RECORDS 340 | FROM HES_APC_NO_UNKNOWN 341 | GROUP BY NHS_NUMBER 342 | , EPIEND 343 | , ETHNICITY 344 | 345 | -- COMMAND ---------- 346 | 347 | -- get most recent date per patient 348 | CREATE OR REPLACE TEMPORARY VIEW APC_RECENT_DATE AS 349 | SELECT NHS_NUMBER 350 | , MAX(EPIEND) AS RECENT_ETHNICITY_DATE 351 | FROM APC_GROUPBY 352 | GROUP BY NHS_NUMBER 353 | 354 | -- COMMAND ---------- 355 | 356 | -- get most recent ethnicity for each patient 357 | CREATE OR REPLACE TEMPORARY VIEW HES_APC_ETHNICITY AS 358 | SELECT a.NHS_NUMBER 359 | , a.ETHNICITY 360 | , a.EPIEND AS RECORDED_DATE 361 | , "HES_APC" AS SOURCE 362 | , 3 AS PRIORITY 363 | FROM APC_GROUPBY AS a 364 | INNER JOIN APC_RECENT_DATE AS b 365 | ON a.NHS_NUMBER = b.NHS_NUMBER 366 | AND a.EPIEND = b.RECENT_ETHNICITY_DATE 367 | 368 | -- COMMAND ---------- 369 | 370 | -- MAGIC %md 371 | -- MAGIC # APPEND AND FIND MOST RECENT OF ALL 372 | 373 | -- COMMAND ---------- 374 | 375 | -- append gdppr and hes ethnicity records together 376 | CREATE OR REPLACE TEMPORARY VIEW ALL_SOURCES_RECENT_ETHNICITY_JOINED AS 377 | 378 | SELECT NHS_NUMBER 379 | , ETHNICITY 380 | , RECORDED_DATE 381 | , SOURCE 382 | , PRIORITY 383 | FROM GDPPR_JOURNAL_ETHNICITY 384 | 385 | UNION 386 | 387 | SELECT NHS_NUMBER 388 | , ETHNICITY 389 | , RECORDED_DATE 390 | , SOURCE 391 | , PRIORITY 392 | FROM GDPPR_PATIENT_ETHNICITY 393 | 394 | UNION 395 | 396 | SELECT NHS_NUMBER 397 | , ETHNICITY 398 | , RECORDED_DATE 399 | , SOURCE 400 | , PRIORITY 401 | FROM HES_OP_ETHNICITY 402 | 403 | UNION 404 | 405 | SELECT NHS_NUMBER 406 | , ETHNICITY 407 | , RECORDED_DATE 408 | , SOURCE 409 | , PRIORITY 410 | FROM HES_AE_ETHNICITY 411 | 412 | UNION 413 | 414 | SELECT NHS_NUMBER 415 | , ETHNICITY 416 | , RECORDED_DATE 417 | , SOURCE 418 | , PRIORITY 419 | FROM HES_APC_ETHNICITY 420 | 421 | 422 | -- COMMAND ---------- 423 | 424 | -- get most recent date per patient 425 | CREATE OR REPLACE TEMPORARY VIEW ALL_SOURCES_RECENT_DATE AS 426 | SELECT NHS_NUMBER 427 | , MAX(RECORDED_DATE) AS RECENT_ETHNICITY_DATE 428 | FROM ALL_SOURCES_RECENT_ETHNICITY_JOINED 429 | GROUP BY NHS_NUMBER 430 | 431 | 432 | -- COMMAND ---------- 433 | 434 | -- get most recent ethnicity record for each patient 435 | CREATE OR REPLACE TEMPORARY VIEW ALL_ETHNICITY_RECENT AS 436 | SELECT a.NHS_NUMBER 437 | , a.ETHNICITY 438 | , a.RECORDED_DATE 439 | , a.SOURCE 440 | , a.PRIORITY 441 | FROM ALL_SOURCES_RECENT_ETHNICITY_JOINED AS a 442 | INNER JOIN ALL_SOURCES_RECENT_DATE AS b 443 | ON a.NHS_NUMBER = b.NHS_NUMBER 444 | AND a.RECORDED_DATE = b.RECENT_ETHNICITY_DATE 445 | 446 | -- COMMAND ---------- 447 | 448 | -- MAGIC %md 449 | -- MAGIC # REMOVE CONFLICTS 450 | 451 | -- COMMAND ---------- 452 | 453 | -- find highest priority source for each patient (this is the minimum as GDPPR journal is priority 1, GDPPR patient is priority 2 etc.) 454 | CREATE OR REPLACE TEMPORARY VIEW MIN_PRIORITY AS 455 | SELECT NHS_NUMBER 456 | , MIN(PRIORITY) AS MIN_PRIORITY 457 | FROM ALL_ETHNICITY_RECENT 458 | GROUP BY NHS_NUMBER 459 | 460 | -- COMMAND ---------- 461 | 462 | -- get prioritised ethnicity record for each patient (remove lower priority source records) 463 | CREATE OR REPLACE TEMPORARY VIEW PRIORITISED_ETHNICITY AS 464 | SELECT a.NHS_NUMBER 465 | , a.ETHNICITY 466 | , a.RECORDED_DATE 467 | , a.SOURCE 468 | , a.PRIORITY 469 | FROM ALL_ETHNICITY_RECENT AS a 470 | INNER JOIN MIN_PRIORITY AS b 471 | ON a.NHS_NUMBER = b.NHS_NUMBER 472 | AND a.PRIORITY = b.MIN_PRIORITY 473 | 474 | -- COMMAND ---------- 475 | 476 | -- MAGIC %md 477 | -- MAGIC # NULLIFY DUPLICATES 478 | 479 | -- COMMAND ---------- 480 | 481 | -- count ethnicities per NHS number as any with more than one need to be nulled and removed (for now) 482 | CREATE OR REPLACE TEMPORARY VIEW DUPLICATE_ETHNICITY_NHSNUM AS 483 | SELECT NHS_NUMBER 484 | , COUNT(*) AS COUNT_ETHNICITIES 485 | FROM PRIORITISED_ETHNICITY 486 | GROUP BY NHS_NUMBER 487 | 488 | -- COMMAND ---------- 489 | 490 | -- select only NHS numbers with one ethnicity i.e. remove patients who have more than one 491 | 492 | CREATE OR REPLACE TEMPORARY VIEW ETHNICITY_ASSET_ALL AS 493 | SELECT NHS_NUMBER 494 | , ETHNICITY AS ETHNIC_CATEGORY_CODE 495 | , RECORDED_DATE AS DATE_OF_ATTRIBUTION 496 | , SOURCE AS DATA_SOURCE 497 | FROM PRIORITISED_ETHNICITY 498 | WHERE NHS_NUMBER IN (SELECT NHS_NUMBER FROM DUPLICATE_ETHNICITY_NHSNUM WHERE COUNT_ETHNICITIES = 1) 499 | 500 | -- COMMAND ---------- 501 | 502 | -- MAGIC %md 503 | -- MAGIC # REMOVE DECEASED PATIENTS AND OPT OUTS 504 | 505 | -- COMMAND ---------- 506 | 507 | -- create list of gdppr patients who are not deceased (gdppr does not include opt outs so they will be removed where they came in from HES) 508 | CREATE OR REPLACE TEMPORARY VIEW ALIVE_PATIENTS AS 509 | SELECT DISTINCT NHS_NUMBER 510 | FROM gdppr_database.gdppr_table 511 | WHERE DATE_OF_DEATH IS null 512 | 513 | -- COMMAND ---------- 514 | 515 | /* ########################################################## 516 | ########### THIS IS THE FINAL TABLE ####################### 517 | ############################################################# */ 518 | 519 | -- pull only living patients 520 | 521 | CREATE OR REPLACE TEMPORARY VIEW ETHNICITY_ASSET_V1 AS 522 | SELECT NHS_NUMBER 523 | , ETHNIC_CATEGORY_CODE 524 | , DATE_OF_ATTRIBUTION 525 | , DATA_SOURCE 526 | FROM ETHNICITY_ASSET_ALL 527 | WHERE NHS_NUMBER IN (SELECT NHS_NUMBER FROM ALIVE_PATIENTS) 528 | 529 | -------------------------------------------------------------------------------- /Ethnic_Category/Ethnicity_GDPPR_only_Recent.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | -- MAGIC %md 3 | -- MAGIC # 5 ORIGINAL TABLES 4 | 5 | -- COMMAND ---------- 6 | 7 | -- MAGIC %md 8 | -- MAGIC ## GDPPR 9 | 10 | -- COMMAND ---------- 11 | 12 | -- MAGIC %md 13 | -- MAGIC JOURNALS 14 | 15 | -- COMMAND ---------- 16 | 17 | -- MAGIC %py 18 | -- MAGIC # create spark df of dataset 19 | -- MAGIC data = spark.table('gdppr_database.gdppr_table') 20 | -- MAGIC 21 | -- MAGIC # create spark df of snomed ethnicity reference data 22 | -- MAGIC ethnicity_ref_data = spark.table('reference_database.gdppr_ethnicity_mappings_table') 23 | -- MAGIC 24 | -- MAGIC # join nhs data dictionary ethnic groups onto ethnicity snomed journals 25 | -- MAGIC data_join = data.join(ethnicity_ref_data, data.CODE == ethnicity_ref_data.ConceptId,how='left') 26 | -- MAGIC 27 | -- MAGIC # import functions 28 | -- MAGIC from pyspark.sql.functions import col 29 | -- MAGIC 30 | -- MAGIC ## remove non ethnicity snomed codes i.e. nulls 31 | -- MAGIC snomed_no_null = data_join.where(col("PrimaryCode").isNotNull()) 32 | -- MAGIC 33 | -- MAGIC # remove unknown snomed ethnicity - need to change to is in list but can't work it out in pyspark yet 34 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "Z") 35 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "z") 36 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "X") 37 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "x") 38 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "99") 39 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "9") 40 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != "") 41 | -- MAGIC snomed_no_null = snomed_no_null.filter(data_join.PrimaryCode != " ") 42 | -- MAGIC 43 | -- MAGIC # turn into temp view 44 | -- MAGIC snomed_no_null.createOrReplaceTempView('gdppr_ethnicity_journals') 45 | 46 | -- COMMAND ---------- 47 | 48 | -- Group by nhs number, record date, and ethnicity to get one journal per ethnicity, per date, per patient (removes duplicate journals) 49 | CREATE OR REPLACE TEMPORARY VIEW JOURNAL_GROUPBY AS 50 | SELECT NHS_NUMBER 51 | , RECORD_DATE 52 | , PrimaryCode AS ETHNICITY 53 | , COUNT(*) AS COUNT_RECORDS 54 | FROM gdppr_ethnicity_journals 55 | GROUP BY NHS_NUMBER 56 | , RECORD_DATE 57 | , PrimaryCode 58 | 59 | -- COMMAND ---------- 60 | 61 | -- get most recent record date per patient 62 | CREATE OR REPLACE TEMPORARY VIEW RECENT_JOURNAL_DATE AS 63 | SELECT NHS_NUMBER 64 | , MAX(RECORD_DATE) AS RECENT_ETHNICITY_DATE 65 | FROM JOURNAL_GROUPBY 66 | GROUP BY NHS_NUMBER 67 | 68 | -- COMMAND ---------- 69 | 70 | -- get most recent ethnicity journal for each patient 71 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_JOURNAL_ETHNICITY AS 72 | SELECT a.NHS_NUMBER 73 | , a.ETHNICITY 74 | , a.RECORD_DATE AS RECORDED_DATE 75 | , "GDPPR_JOURNAL" AS SOURCE 76 | , 1 AS PRIORITY 77 | FROM JOURNAL_GROUPBY AS a 78 | INNER JOIN RECENT_JOURNAL_DATE AS b 79 | ON a.NHS_NUMBER = b.NHS_NUMBER 80 | AND a.RECORD_DATE = b.RECENT_ETHNICITY_DATE 81 | 82 | -- COMMAND ---------- 83 | 84 | -- MAGIC %md 85 | -- MAGIC PATIENT 86 | 87 | -- COMMAND ---------- 88 | 89 | -- MAGIC %py 90 | -- MAGIC # import functions 91 | -- MAGIC from pyspark.sql.functions import col 92 | -- MAGIC 93 | -- MAGIC ## remove blank ethnic field 94 | -- MAGIC field_no_null = data_join.filter(data_join.ETHNIC != "") 95 | -- MAGIC 96 | -- MAGIC # remove unknowns - not stated 97 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "Z") 98 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "z") 99 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "X") 100 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "x") 101 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "99") 102 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "9") 103 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != "") 104 | -- MAGIC field_no_null = field_no_null.filter(data_join.ETHNIC != " ") 105 | -- MAGIC 106 | -- MAGIC ## remove null ethnic field 107 | -- MAGIC field_no_null = field_no_null.where(col("ETHNIC").isNotNull()) 108 | -- MAGIC 109 | -- MAGIC # turn into temp view 110 | -- MAGIC field_no_null.createOrReplaceTempView('gdppr_ethnicity_patients') 111 | 112 | -- COMMAND ---------- 113 | 114 | -- Group by nhs number, reporting period end date, and ethnic field to get one ethnicity per date, per patient (removes duplicates) 115 | CREATE OR REPLACE TEMPORARY VIEW PATIENT_GROUPBY AS 116 | SELECT NHS_NUMBER 117 | , REPORTING_PERIOD_END_DATE 118 | , ETHNIC AS ETHNICITY 119 | , COUNT(*) AS COUNT_RECORDS 120 | FROM gdppr_ethnicity_patients 121 | GROUP BY NHS_NUMBER 122 | , REPORTING_PERIOD_END_DATE 123 | , ETHNIC 124 | 125 | -- COMMAND ---------- 126 | 127 | CREATE OR REPLACE TEMPORARY VIEW PATIENT_RECENT_DATE AS 128 | SELECT NHS_NUMBER 129 | , MAX(REPORTING_PERIOD_END_DATE) AS RECENT_ETHNICITY_DATE 130 | FROM PATIENT_GROUPBY 131 | GROUP BY NHS_NUMBER 132 | 133 | -- COMMAND ---------- 134 | 135 | -- get most recent ethnicity for each patient 136 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_PATIENT_ETHNICITY AS 137 | SELECT a.NHS_NUMBER 138 | , a.ETHNICITY 139 | , a.REPORTING_PERIOD_END_DATE AS RECORDED_DATE 140 | , "GDPPR_PATIENT" AS SOURCE 141 | , 2 AS PRIORITY 142 | FROM PATIENT_GROUPBY AS a 143 | INNER JOIN PATIENT_RECENT_DATE AS b 144 | ON a.NHS_NUMBER = b.NHS_NUMBER 145 | AND a.REPORTING_PERIOD_END_DATE = b.RECENT_ETHNICITY_DATE 146 | 147 | -- COMMAND ---------- 148 | 149 | -- MAGIC %md 150 | -- MAGIC # GDPPR ONLY 151 | 152 | -- COMMAND ---------- 153 | 154 | -- append gdppr patient and gdppr journal records together 155 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_RECENT_ETHNICITY_JOINED AS 156 | 157 | SELECT NHS_NUMBER 158 | , ETHNICITY 159 | , RECORDED_DATE 160 | , SOURCE 161 | , PRIORITY 162 | FROM GDPPR_JOURNAL_ETHNICITY 163 | 164 | UNION 165 | 166 | SELECT NHS_NUMBER 167 | , ETHNICITY 168 | , RECORDED_DATE 169 | , SOURCE 170 | , PRIORITY 171 | FROM GDPPR_PATIENT_ETHNICITY 172 | 173 | -- COMMAND ---------- 174 | 175 | -- get most recent date per patient 176 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_RECENT_DATE AS 177 | SELECT NHS_NUMBER 178 | , MAX(RECORDED_DATE) AS RECENT_ETHNICITY_DATE 179 | FROM GDPPR_RECENT_ETHNICITY_JOINED 180 | GROUP BY NHS_NUMBER 181 | 182 | 183 | -- COMMAND ---------- 184 | 185 | -- get most recent ethnicity record for each patient 186 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_ETHNICITY_RECENT AS 187 | SELECT a.NHS_NUMBER 188 | , a.ETHNICITY 189 | , a.RECORDED_DATE 190 | , a.SOURCE 191 | , a.PRIORITY 192 | FROM GDPPR_RECENT_ETHNICITY_JOINED AS a 193 | INNER JOIN GDPPR_RECENT_DATE AS b 194 | ON a.NHS_NUMBER = b.NHS_NUMBER 195 | AND a.RECORDED_DATE = b.RECENT_ETHNICITY_DATE 196 | 197 | -- COMMAND ---------- 198 | 199 | -- MAGIC %md 200 | -- MAGIC ## REMOVE CONFLICTS - GDPPR 201 | 202 | -- COMMAND ---------- 203 | 204 | -- find highest priority source for each patient (this is the minimum as GDPPR journal is priority 1, GDPPR patient is priority 2 etc.) 205 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_MIN_PRIORITY AS 206 | SELECT NHS_NUMBER 207 | , MIN(PRIORITY) AS MIN_PRIORITY 208 | FROM GDPPR_ETHNICITY_RECENT 209 | GROUP BY NHS_NUMBER 210 | 211 | -- COMMAND ---------- 212 | 213 | -- get prioritised ethnicity record for each patient (remove lower priority source records) 214 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_PRIORITISED_ETHNICITY AS 215 | SELECT a.NHS_NUMBER 216 | , a.ETHNICITY 217 | , a.RECORDED_DATE 218 | , a.SOURCE 219 | , a.PRIORITY 220 | FROM GDPPR_ETHNICITY_RECENT AS a 221 | INNER JOIN GDPPR_MIN_PRIORITY AS b 222 | ON a.NHS_NUMBER = b.NHS_NUMBER 223 | AND a.PRIORITY = b.MIN_PRIORITY 224 | 225 | -- COMMAND ---------- 226 | 227 | -- MAGIC %md 228 | -- MAGIC ## NULLIFY DUPLICATES - GDPPR 229 | 230 | -- COMMAND ---------- 231 | 232 | -- count ethnicities per NHS number as any with more than one need to be nulled and removed (for now) 233 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_DUPLICATE_ETHNICITY_NHSNUM AS 234 | SELECT NHS_NUMBER 235 | , COUNT(*) AS COUNT_ETHNICITIES 236 | FROM GDPPR_PRIORITISED_ETHNICITY 237 | GROUP BY NHS_NUMBER 238 | 239 | -- COMMAND ---------- 240 | 241 | -- select only NHS numbers with one ethnicity i.e. remove patients who have more than one 242 | 243 | CREATE OR REPLACE TEMPORARY VIEW ETHNICITY_ASSET_GDPPR AS 244 | SELECT NHS_NUMBER 245 | , ETHNICITY AS ETHNIC_CATEGORY_CODE 246 | , RECORDED_DATE AS DATE_OF_ATTRIBUTION 247 | , SOURCE AS DATA_SOURCE 248 | FROM GDPPR_PRIORITISED_ETHNICITY 249 | WHERE NHS_NUMBER IN (SELECT NHS_NUMBER FROM GDPPR_DUPLICATE_ETHNICITY_NHSNUM WHERE COUNT_ETHNICITIES = 1) 250 | 251 | -- COMMAND ---------- 252 | 253 | -- MAGIC %md 254 | -- MAGIC ## REMOVE DECEASED PATIENTS AND OPT OUTS - GDPPR 255 | 256 | -- COMMAND ---------- 257 | 258 | -- create list of gdppr patients who are not deceased (gdppr does not include opt outs) 259 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_ALIVE_PATIENTS AS 260 | SELECT DISTINCT NHS_NUMBER 261 | FROM gdppr_database.gdppr_table 262 | WHERE DATE_OF_DEATH IS null 263 | 264 | -- COMMAND ---------- 265 | 266 | /* ########################################################## 267 | ########### THIS IS THE FINAL TABLE ####################### 268 | ############################################################# */ 269 | 270 | -- pull only living patients 271 | 272 | CREATE OR REPLACE TEMPORARY VIEW GDPPR_ETHNICITY_ASSET_V1 AS 273 | SELECT NHS_NUMBER 274 | , ETHNIC_CATEGORY_CODE 275 | , DATE_OF_ATTRIBUTION 276 | , DATA_SOURCE 277 | FROM ETHNICITY_ASSET_GDPPR 278 | WHERE NHS_NUMBER IN (SELECT NHS_NUMBER FROM GDPPR_ALIVE_PATIENTS) -------------------------------------------------------------------------------- /Ethnic_Category/README.md: -------------------------------------------------------------------------------- 1 | # BACKGROUND 2 | 3 | Ethnicity is of particular interest in relation to the COVID-19 pandemic therefore NHS Digital have produced an ethnicity asset to understand the quality, coverage, and distribution of ethnic category recording within certain datasets held by NHS Digital. The [2011 Census](https://www.ons.gov.uk/census/2011census), published by the Office for National Statistics (ONS), is the gold standard for ethnicity recording in England and Wales, however as this data is nearly 10 years old and may no longer reflect the ethnic breakdown of the current population NHS Digital have amalgamated ethnic category data from [Hospital Episode Statistics (HES)](https://digital.nhs.uk/data-and-information/data-tools-and-services/data-services/hospital-episode-statistics) and [GPES Data for Pandemic Planning and Research COVID (GDPPR-COVID)](https://digital.nhs.uk/coronavirus/gpes-data-for-pandemic-planning-and-research) to provide a near population (England only) level view of ethnic category. Ethnic category coverage management information using this asset is published on the [NHS Digital website](https://digital.nhs.uk/data-and-information/areas-of-interest/ethnicity). 4 | 5 | The HES datasets included within this code are HES Admitted Patient Care (APC), HES Accident and Emergency (AE) and HES Outpatients (OP), from the current year and previous 5 years. Ethnic category can only be recorded via the ETHNOS field in the HES datasets. 6 | 7 | Ethnic category can be recorded via two methods within GDPPR: an ETHNIC field within patient information tables, or a SNOMED code for ethnicity within patient journal tables. [SNOMED codes](https://digital.nhs.uk/services/terminology-and-classifications/snomed-ct) are the clinical coding standards used with GP records. 8 | 9 | # METHODOLOGY 10 | 11 | ## Recency 12 | 13 | __*Note: Almost identical methodology is used for GDPPR only, GDPPR + HES, and GDPPR with HES only when ethnicity is not available in GDPPR, therefore it is only explained once here.*__ 14 | 15 | **a)** Patients and their recorded ethnic categories from each of the 5 data sources (GDPPR-Journal, GDPPR-Patient, HES-APC, HES-AE, HES-OP) are amalgamated. 16 | 17 | **b)** Records where the ethnic category is unknown are removed, and the data source providing the most recent ethnic category recording for each patient is selected to de-duplicate patients with multiple recordings. 18 | 19 | **c)** Where conflicts exist between data sources priority is given in the following order: GDPPR-Journal, GDPPR-Patient, HES-APC, HES-AE, HES-OP. 20 | 21 | **d)** Where conflicts still exist (i.e. where the highest priority data source gives different ethnic categories on the latest date of attribution) ethnic category is set to null and the patient is not counted as having a known ethnic category. 22 | 23 | **e)** Records are removed for patients who have a recorded date of death and/or their postcode is not in England. 24 | 25 | ## Modal 26 | 27 | **a)** Patients and their recorded ethnic categories from each of the 5 data sources (GDPPR-Journal, GDPPR-Patient, HES-APC, HES-AE, HES-OP) are amalgamated and records where ethnic category is unknown are removed. 28 | 29 | **b)** The modal ethnic category recording for each patient is selected to de-duplicate patients with multiple recordings. 30 | 31 | **c)** Where conflicts exist between data sources i.e. a patient has more than one modal ethnic category, ethnic category is set to null and the patient is not counted as having a known ethnic category. 32 | 33 | **d)** Records are removed for patients who have a recorded date of death and/or their postcode is not in England. 34 | 35 | # NOTES 36 | 37 | GDPPR data = ```gdppr_database.gdppr_table``` 38 | 39 | Ethnicity reference data = ```reference_database.gdppr_ethnicity_mappings_table``` 40 | 41 | HES data is stored in two tables - sensitive and non-sensitive; they need joining to get the required data: 42 | 43 | HES OP data = ```sensitive_hes.hes_op_{year}``` and ```hes.hes_op_{year}``` 44 | 45 | HES AE data = ```sensitive_hes.hes_ae_{year}``` and ```hes.hes_ae_{year}``` 46 | 47 | HES APC data = ```sensitive_hes.hes_apc_{year}``` and ```hes.hes_apc_{year}``` 48 | 49 | -------------------------------------------------------------------------------- /Ethnic_Category/ethnicity_reference_data_categories.csv: -------------------------------------------------------------------------------- 1 | ETHNIC CATEGORY ID,ETHNIC CATEGORY LABEL 2 | A,British 3 | B,Irish 4 | C,Any other White background 5 | D,White and Black Caribbean 6 | E,White and Black African 7 | F,White and Asian 8 | G,Any other mixed background 9 | H,Indian 10 | J,Pakistani 11 | K,Bangladeshi 12 | L,Any other Asian background 13 | M,Caribbean 14 | N,African 15 | P,Any other Black background 16 | R,Chinese 17 | S,Any other ethnic group 18 | T,Traveller 19 | W,Arab 20 | Z,Not stated 21 | -------------------------------------------------------------------------------- /Ethnic_Category/ethnicity_reference_data_snomed.csv: -------------------------------------------------------------------------------- 1 | ConceptId,PrimaryCode 2 | 92561000000109,C 3 | 92571000000102,A 4 | 92581000000100,G 5 | 92591000000103,G 6 | 92601000000109,S 7 | 92611000000106,S 8 | 92621000000100,G 9 | 92631000000103,G 10 | 92641000000107,L 11 | 92651000000105,L 12 | 92661000000108,L 13 | 92671000000101,L 14 | 92681000000104,L 15 | 92691000000102,L 16 | 92701000000102,L 17 | 92711000000100,N 18 | 92721000000106,G 19 | 92731000000108,N 20 | 92741000000104,P 21 | 92751000000101,L 22 | 92761000000103,L 23 | 92771000000105,L 24 | 92781000000107,L 25 | 92791000000109,A 26 | 93921000000101,C 27 | 93931000000104,C 28 | 93941000000108,C 29 | 93951000000106,S 30 | 93961000000109,C 31 | 93981000000100,C 32 | 93991000000103,C 33 | 94001000000108,C 34 | 94011000000105,C 35 | 94021000000104,C 36 | 94031000000102,C 37 | 94041000000106,C 38 | 94051000000109,C 39 | 94061000000107,N 40 | 94071000000100,S 41 | 94081000000103,S 42 | 94091000000101,S 43 | 94101000000109,S 44 | 94111000000106,S 45 | 94121000000100,L 46 | 94131000000103,S 47 | 94141000000107,L 48 | 94151000000105,S 49 | 1024701000000100,Z 50 | 107691000000105,M 51 | 110401000000103,S 52 | 110751000000108,H 53 | 110761000000106,A 54 | 110771000000104,G 55 | 110781000000102,L 56 | 110791000000100,P 57 | 110831000000107,L 58 | 494131000000105,A 59 | 494141000000101,A 60 | 494151000000103,A 61 | 494161000000100,B 62 | 494171000000107,B 63 | 494181000000109,B 64 | 86461000000107,L 65 | 88911000000101,T 66 | 88921000000107,T 67 | 88931000000109,T 68 | 88941000000100,C 69 | 88951000000102,C 70 | 88961000000104,C 71 | 88971000000106,C 72 | 88981000000108,C 73 | 88991000000105,S 74 | 89001000000105,W 75 | 89011000000107,S 76 | 89021000000101,S 77 | 89031000000104,S 78 | 92391000000108,A 79 | 92401000000106,B 80 | 92411000000108,C 81 | 92421000000102,D 82 | 92431000000100,E 83 | 92441000000109,F 84 | 92451000000107,G 85 | 92461000000105,J 86 | 92471000000103,K 87 | 92481000000101,L 88 | 92491000000104,N 89 | 92501000000105,P 90 | 92511000000107,R 91 | 92521000000101,S 92 | 92531000000104,Z 93 | 92541000000108,A 94 | 92551000000106,A 95 | 976631000000101,A 96 | 976641000000105,A 97 | 976651000000108,B 98 | 976661000000106,B 99 | 976671000000104,T 100 | 976681000000102,T 101 | 976691000000100,C 102 | 976701000000100,C 103 | 976711000000103,D 104 | 976721000000109,D 105 | 976731000000106,E 106 | 976741000000102,E 107 | 976751000000104,F 108 | 976761000000101,F 109 | 976771000000108,G 110 | 976781000000105,G 111 | 976791000000107,H 112 | 976801000000106,H 113 | 976811000000108,J 114 | 976821000000102,J 115 | 976831000000100,K 116 | 976841000000109,K 117 | 976851000000107,R 118 | 976861000000105,R 119 | 976871000000103,L 120 | 976881000000101,L 121 | 976891000000104,N 122 | 976901000000103,N 123 | 976911000000101,M 124 | 976921000000107,M 125 | 976931000000109,P 126 | 976941000000100,P 127 | 976951000000102,W 128 | 976961000000104,W 129 | 976971000000106,S 130 | 976981000000108,S 131 | 977351000000100,A 132 | 977361000000102,A 133 | 977371000000109,T 134 | 977381000000106,T 135 | 977391000000108,D 136 | 977401000000106,D 137 | 977411000000108,E 138 | 977421000000102,E 139 | 977431000000100,F 140 | 977441000000109,F 141 | 977551000000106,G 142 | 977561000000109,G 143 | 977591000000103,H 144 | 977601000000109,H 145 | 977711000000100,J 146 | 977721000000106,J 147 | 977731000000108,K 148 | 977741000000104,K 149 | 977751000000101,R 150 | 977761000000103,R 151 | 977771000000105,L 152 | 977781000000107,L 153 | 977791000000109,N 154 | 977801000000108,N 155 | 977811000000105,M 156 | 977821000000104,M 157 | 977831000000102,P 158 | 977841000000106,P 159 | 977851000000109,W 160 | 977861000000107,W 161 | 977871000000100,S 162 | 977881000000103,S 163 | 977911000000103,A 164 | 977921000000109,A 165 | 977931000000106,A 166 | 977941000000102,A 167 | 977951000000104,B 168 | 977961000000101,B 169 | 977971000000108,T 170 | 977981000000105,T 171 | 978011000000101,C 172 | 978021000000107,C 173 | 978031000000109,C 174 | 978041000000100,C 175 | 978051000000102,G 176 | 978061000000104,G 177 | 978071000000106,J 178 | 978081000000108,J 179 | 978111000000100,H 180 | 978121000000106,H 181 | 978171000000105,K 182 | 978181000000107,K 183 | 978191000000109,R 184 | 978201000000106,R 185 | 978211000000108,L 186 | 978221000000102,L 187 | 978231000000100,N 188 | 978241000000109,N 189 | 978251000000107,P 190 | 978261000000105,P 191 | 978271000000103,M 192 | 978281000000101,M 193 | 978341000000102,M 194 | 978351000000104,M 195 | 978361000000101,P 196 | 978371000000108,P 197 | 978381000000105,W 198 | 978391000000107,W 199 | 978401000000105,S 200 | 978411000000107,S 201 | 10008004,N 202 | 10117001,C 203 | 10292001,S 204 | 10432001,L 205 | 108342005,S 206 | 113169009,C 207 | 113170005,S 208 | 113171009,S 209 | 11794009,N 210 | 12556008,L 211 | 13233008,S 212 | 1340002,L 213 | 13440006,N 214 | 14045001,C 215 | 14176005,C 216 | 14470009,N 217 | 1451003,C 218 | 14999008,B 219 | 15086000,P 220 | 15801006,S 221 | 160531006,M 222 | 17095009,C 223 | 17789004,S 224 | 18167009,N 225 | 18575005,S 226 | 18583004,C 227 | 185984009,C 228 | 185988007,M 229 | 185989004,P 230 | 185990008,P 231 | 185993005,N 232 | 185995003,H 233 | 185996002,L 234 | 185998001,G 235 | 185999009,G 236 | 186000006,G 237 | 186001005,H 238 | 186002003,J 239 | 186003008,K 240 | 186005001,S 241 | 186006000,S 242 | 186007009,S 243 | 186010002,N 244 | 186012005,H 245 | 186013000,L 246 | 186014006,B 247 | 186017004,C 248 | 186019001,G 249 | 186020007,G 250 | 186021006,F 251 | 186022004,G 252 | 186023009,G 253 | 186035008,S 254 | 186036009,S 255 | 186037000,S 256 | 186039002,S 257 | 186040000,S 258 | 186041001,S 259 | 186042008,S 260 | 186044009,L 261 | 18664001,S 262 | 19085009,S 263 | 1919006,N 264 | 19434008,C 265 | 20140003,S 266 | 20291009,S 267 | 20449009,L 268 | 21047009,S 269 | 21868006,N 270 | 21993009,L 271 | 22007004,S 272 | 23517005,S 273 | 23534002,S 274 | 23922002,N 275 | 24812003,L 276 | 25750005,S 277 | 25804004,A 278 | 26215007,L 279 | 2688009,L 280 | 270460000,M 281 | 270461001,P 282 | 270462008,L 283 | 270463003,M 284 | 270464009,S 285 | 270465005,L 286 | 270466006,C 287 | 270467002,S 288 | 2720008,N 289 | 27301002,S 290 | 275586009,P 291 | 275587000,P 292 | 275588005,P 293 | 275589002,L 294 | 275590006,L 295 | 275591005,M 296 | 275592003,M 297 | 275593008,M 298 | 275594002,S 299 | 275595001,S 300 | 275596000,L 301 | 275597009,L 302 | 275599007,C 303 | 275600005,C 304 | 275601009,S 305 | 275602002,S 306 | 27683006,L 307 | 27700004,S 308 | 28409002,C 309 | 2852001,L 310 | 28562006,C 311 | 286009,C 312 | 28796001,L 313 | 28821007,S 314 | 29343004,C 315 | 296841000000102,L 316 | 309643000,M 317 | 309644006,M 318 | 312859007,L 319 | 312860002,Z 320 | 312861003,Z 321 | 315236000,A 322 | 315237009,B 323 | 315239007,G 324 | 315240009,P 325 | 315279003,P 326 | 315280000,L 327 | 315281001,L 328 | 315283003,T 329 | 315634007,D 330 | 315635008,E 331 | 31637002,C 332 | 32045009,C 333 | 32513008,L 334 | 32873005,S 335 | 33182009,S 336 | 3353005,S 337 | 33897005,R 338 | 34334001,S 339 | 35007000,S 340 | 36329002,C 341 | 3698008,S 342 | 37474002,N 343 | 37843006,N 344 | 38144004,S 345 | 3818007,N 346 | 38361009,L 347 | 38750003,S 348 | 39007007,N 349 | 39764005,N 350 | 401213008,A 351 | 401214002,A 352 | 40165009,L 353 | 40182006,S 354 | 4073004,S 355 | 41076003,N 356 | 413465009,M 357 | 413466005,E 358 | 414152003,C 359 | 414481008,H 360 | 414551003,L 361 | 414661004,S 362 | 414978006,L 363 | 41798002,S 364 | 42632009,L 365 | 4299001,L 366 | 43056000,S 367 | 43608005,S 368 | 43890005,S 369 | 44460002,L 370 | 445343003,C 371 | 45465003,C 372 | 46110004,N 373 | 46723002,L 374 | 47250000,L 375 | 47327008,S 376 | 48118002,S 377 | 48294008,S 378 | 48375000,S 379 | 48679001,L 380 | 49202008,S 381 | 50405005,S 382 | 51750002,N 383 | 51827000,S 384 | 52075006,N 385 | 53195006,L 386 | 53460002,C 387 | 55990000,S 388 | 56056003,C 389 | 57405008,L 390 | 57539009,S 391 | 58047002,N 392 | 59366001,N 393 | 59487007,S 394 | 59597001,S 395 | 60157000,S 396 | 62598008,S 397 | 63457007,S 398 | 6373008,S 399 | 63732001,S 400 | 63736003,L 401 | 64483007,H 402 | 64693008,C 403 | 65776006,S 404 | 66406004,S 405 | 66920001,S 406 | 67165000,L 407 | 67439005,N 408 | 67931002,S 409 | 68486007,C 410 | 69865008,S 411 | 69983001,S 412 | 704385002,L 413 | 704386001,L 414 | 704387005,L 415 | 704388000,L 416 | 704389008,L 417 | 704390004,L 418 | 704391000,L 419 | 704392007,L 420 | 71176007,N 421 | 718131000000106,L 422 | 718958002,T 423 | 718959005,C 424 | 718960000,T 425 | 718961001,C 426 | 718962008,C 427 | 718963003,C 428 | 718964009,T 429 | 71949006,S 430 | 72201005,N 431 | 72248007,N 432 | 72337002,S 433 | 72809004,C 434 | 733078003,C 435 | 733446001,C 436 | 735001008,C 437 | 73524008,S 438 | 73736004,S 439 | 74159009,S 440 | 74302004,S 441 | 75301003,S 442 | 75326007,S 443 | 75704009,S 444 | 76253004,N 445 | 763726001,Z 446 | 76460008,S 447 | 76574004,C 448 | 76768002,L 449 | 76775001,N 450 | 76883002,S 451 | 7695005,C 452 | 77502007,S 453 | 77686000,S 454 | 79434006,S 455 | 80208004,C 456 | 80528001,N 457 | 81035008,J 458 | 8124001,N 459 | 81283004,L 460 | 81403004,C 461 | 81560001,S 462 | 81653003,S 463 | 81846005,L 464 | 82174001,N 465 | 83365001,L 466 | 83584002,N 467 | 83939006,L 468 | 85163001,C 469 | 85371009,N 470 | 85515006,S 471 | 86275006,S 472 | 87323008,S 473 | 88790004,N 474 | 88839008,N 475 | 88934004,N 476 | 89026003,S 477 | 90027003,W 478 | 90348007,L 479 | 90822005,N 480 | 91066000,L 481 | 91191002,L 482 | 91488008,S 483 | 9158000,N 484 | 9533000,C 485 | 870448005,N 486 | 413773004,C 487 | 414752008,G 488 | 415226007,Z 489 | 415794004,Z 490 | 521000220104,P 491 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2021, NHS Digital 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [GDPPR](https://digital.nhs.uk/coronavirus/gpes-data-for-pandemic-planning-and-research) subject matter experts have completed various analyses using the GDPPR dataset and are sharing code to: 2 | 3 | * prevent duplication of work 4 | * allow peer review of code and methodology used in analysis 5 | * increase consistency of methodology across users 6 | * increase general knowledge sharing 7 | 8 | Useful code can be found in the folders within this repository. Each folder will include a README.md file which explains the basic methodology behind the analysis/code, a code script, and reference data (where necessary). 9 | 10 | Please ensure that you read the information in the read me as there may be several scripts for different methodologies. 11 | 12 | Due to the analytical environment NHS Digital uses (databricks) it is possible for python, SQL and markdown code to be used within the same script. 13 | - SQL code will appear as usual - please note that databricks uses spark SQL rather than T-SQL 14 | - Python code will have ```--MAGIC``` prior to it and ```%py``` to signal that this 'chunk' of code is written in python 15 | - Markdown code will have ```--MAGIC``` prior to it and ```%md``` to signal that this 'chunk' of code is written in markdown. Markdown code is minimal and used in databricks to separate code as you would with commenting in SQL. 16 | 17 | Code is included for the following pieces of analysis: 18 | * Ethnicity 19 | * BMI 20 | * Blood pressure status 21 | * Blood glucose status 22 | * Smoking status 23 | 24 | Please note that the majority of this code has been written as part of exploratory analyses so care should be taken to ensure it is appropriate for your own analysis and that relevant clinical advice is sought where appropriate. All code and methodology is subject to change following further analysis. 25 | -------------------------------------------------------------------------------- /Smoking/Most Recent Smoking Status.sql: -------------------------------------------------------------------------------- 1 | -- Databricks notebook source 2 | -- MAGIC %md # SMOKING STATUS GDPPR 3 | 4 | -- COMMAND ---------- 5 | 6 | -- MAGIC %md 7 | -- MAGIC ## SMOKING JOURNALS 8 | 9 | -- COMMAND ---------- 10 | 11 | -- create view of only smoking journals 12 | -- add in code description and category which allows us to assign smoking status 13 | CREATE OR REPLACE TEMPORARY VIEW all_smoking_journals AS 14 | SELECT a.* 15 | , b.Code_description 16 | , b.CATEGORY 17 | FROM gdppr_database.gdppr_table as a 18 | LEFT JOIN reference_database.smoking_ref_data AS b 19 | ON a.CODE = b.SNOMED_concept_ID 20 | WHERE a.CODE IN (SELECT DISTINCT SNOMED_concept_ID FROM reference_database.smoking_ref_data) 21 | 22 | -- COMMAND ---------- 23 | 24 | -- MAGIC %md # CATEGORISE 25 | 26 | -- COMMAND ---------- 27 | 28 | -- categorise journals based on values or snomed codes (based on previous analyses) 29 | CREATE OR REPLACE TEMPORARY VIEW categorise_smoking_journals AS 30 | SELECT * 31 | , CASE WHEN CATEGORY = 'non_smoker_findings_codes' AND VALUE1_CONDITION IS NULL THEN "Non-Smoker" 32 | WHEN CATEGORY = 'smoker_findings_codes' AND VALUE1_CONDITION IS NULL THEN "Smoker" 33 | WHEN CATEGORY = 'Microtest_Error' AND VALUE1_CONDITION IS NULL THEN "Non-Smoker" 34 | WHEN CATEGORY = 'Microtest_Error' AND VALUE1_CONDITION = 0 THEN "Non-Smoker" 35 | WHEN CATEGORY = 'Value_Finding_Smoker' AND VALUE1_CONDITION IS NULL THEN "Smoker" 36 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '160603005' AND VALUE1_CONDITION >= 1 AND VALUE1_CONDITION <= 9 THEN "Smoker" 37 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '160604004' AND VALUE1_CONDITION >= 10 AND VALUE1_CONDITION <= 19 THEN "Smoker" 38 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '160605003' AND VALUE1_CONDITION >= 20 AND VALUE1_CONDITION <= 39 THEN "Smoker" 39 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '160606002' AND VALUE1_CONDITION >= 40 AND VALUE1_CONDITION <= 100 THEN "Smoker" 40 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '266920004' AND VALUE1_CONDITION >= 0 AND VALUE1_CONDITION <= 1 THEN "Smoker" 41 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '56578002' AND VALUE1_CONDITION >=1 AND VALUE1_CONDITION <=20 THEN "Smoker" 42 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '56771006' AND VALUE1_CONDITION >= 20 AND VALUE1_CONDITION <= 100 THEN "Smoker" 43 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '59978006' AND VALUE1_CONDITION >=1 AND VALUE1_CONDITION <= 20 THEN "Smoker" 44 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '65568007' AND VALUE1_CONDITION >= 1 AND VALUE1_CONDITION <= 100 THEN "Smoker" 45 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '77176002' AND VALUE1_CONDITION >= 1 AND VALUE1_CONDITION <= 100 THEN "Smoker" 46 | WHEN CATEGORY = 'Value_Finding_Smoker' AND CODE = '82302008' AND VALUE1_CONDITION >= 1 AND VALUE1_CONDITION <= 100 THEN "Smoker" 47 | WHEN CATEGORY = 'Value_Finding_Non_Smoker' AND VALUE1_CONDITION IS NULL THEN "Non-Smoker" 48 | WHEN CATEGORY = 'Observable_Entities' AND CODE IN ('160625004', '230056004', '230057008', '230058003', '266918002', '401201003', '413173009', '836001000000109', '228486009', '735112005') AND VALUE1_CONDITION IS NULL THEN "Non-Smoker" 49 | WHEN CATEGORY = 'Observable_Entities' AND CODE IN ('160625004', '735112005') AND VALUE1_CONDITION >= 0 THEN "Non-Smoker" 50 | WHEN CATEGORY = 'Observable_Entities' AND CODE IN ('230056004', '266918002', '401201003') AND VALUE1_CONDITION = 0 THEN "Non-Smoker" 51 | WHEN CATEGORY = 'Observable_Entities' AND CODE = '230056004' AND VALUE1_CONDITION > 0 AND VALUE1_CONDITION <= 100 THEN "Smoker" 52 | WHEN CATEGORY = 'Observable_Entities' AND CODE = '230057008' AND VALUE1_CONDITION > 0 AND VALUE1_CONDITION <= 20 THEN "Smoker" 53 | WHEN CATEGORY = 'Observable_Entities' AND CODE = '230058003' AND VALUE1_CONDITION > 0 AND VALUE1_CONDITION <= 100 THEN "Smoker" 54 | WHEN CATEGORY = 'Observable_Entities' AND CODE = '266918002' AND VALUE1_CONDITION > 0 AND VALUE1_CONDITION <= 100 THEN "Smoker" 55 | WHEN CATEGORY = 'Observable_Entities' AND CODE = '401201003' AND VALUE1_CONDITION > 0 AND VALUE1_CONDITION <= 240 THEN "Smoker" 56 | WHEN CATEGORY = 'Observable_Entities' AND CODE = '413173009' AND VALUE1_CONDITION >= 0 AND VALUE1_CONDITION <= 1440 THEN "Smoker" 57 | WHEN CATEGORY = 'Observable_Entities' AND CODE = '836001000000109' AND VALUE1_CONDITION > 0 AND VALUE1_CONDITION <= 100 THEN "Smoker" 58 | WHEN CATEGORY = 'Observable_Entities' AND CODE IN ('401159003') AND VALUE1_CONDITION IS NULL THEN "Smoker" 59 | ELSE 'Indeterminable' 60 | END AS SMOKING_CATS 61 | FROM all_smoking_journals 62 | 63 | -- COMMAND ---------- 64 | 65 | -- MAGIC %md # ADD DATA DICTIONARY SMOKING CATEGORIES 66 | 67 | -- COMMAND ---------- 68 | 69 | ---- Add in nhs data dictionary categories : current smoker; ex-smoker; non-smoker history unknown; never smoker and not stated 70 | CREATE OR REPLACE TEMPORARY VIEW smoking_categories_2 AS 71 | SELECT *, 72 | CASE WHEN SMOKING_CATS = "Smoker" THEN "Current-Smoker" 73 | WHEN SMOKING_CATS = "Indeterminable" THEN "Indeterminable" 74 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "221000119102" THEN "Never-Smoker" 75 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "266919005" THEN "Never-Smoker" 76 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "401201003" AND VALUE1_CONDITION IS NULL THEN "Never-Smoker" 77 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "401201003" AND VALUE1_CONDITION = 0 THEN "Never-Smoker" 78 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "105539002" THEN "Non-Smoker History Unknown" 79 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "105540000" THEN "Non-Smoker History Unknown" 80 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "105541001" THEN "Non-Smoker History Unknown" 81 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "160618006" THEN "Non-Smoker History Unknown" 82 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "360918006" THEN "Non-Smoker History Unknown" 83 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "360929005" THEN "Non-Smoker History Unknown" 84 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "405746006" THEN "Non-Smoker History Unknown" 85 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "8392000" THEN "Non-Smoker History Unknown" 86 | WHEN SMOKING_CATS = "Non-Smoker" AND CODE = "87739003" THEN "Non-Smoker History Unknown" 87 | ELSE 'Ex-Smoker' 88 | END AS DATA_DICTIONARY_CATS 89 | FROM categorise_smoking_journals 90 | 91 | -- COMMAND ---------- 92 | 93 | -- MAGIC %md # FIND MOST RECENT RECORD 94 | 95 | -- COMMAND ---------- 96 | 97 | ---- Need to remove those that are indeterminable - most recent "known" record 98 | CREATE OR REPLACE TEMPORARY VIEW smoking_no_unknowns AS 99 | SELECT * 100 | FROM smoking_categories_2 101 | WHERE DATA_DICTIONARY_CATS != "Indeterminable" 102 | 103 | -- COMMAND ---------- 104 | 105 | ---- Create table of most recent date per patient 106 | CREATE OR REPLACE TEMPORARY VIEW MAX_DATE AS 107 | SELECT NHS_NUMBER 108 | , MAX(DATE) AS DATE 109 | FROM smoking_no_unknowns 110 | GROUP BY NHS_NUMBER 111 | 112 | -- COMMAND ---------- 113 | 114 | ---- create table with only most recent records 115 | CREATE OR REPLACE TEMPORARY VIEW smoking_Most_Recent_Records AS 116 | SELECT a.* 117 | FROM smoking_no_unknowns AS a 118 | INNER JOIN MAX_DATE AS b 119 | ON a.NHS_NUMBER = b.NHS_NUMBER 120 | AND a.DATE = b.DATE 121 | 122 | -- COMMAND ---------- 123 | 124 | ---- Count number of categories per patient from most recent records - patients may have more than one recent record so some may have conflicting smoking statuses 125 | CREATE OR REPLACE TEMPORARY VIEW smoking_Journal_Counts AS 126 | SELECT NHS_NUMBER 127 | , COUNT(DISTINCT DATA_DICTIONARY_CATS) AS COUNT_CATEGORIES 128 | FROM smoking_Most_Recent_Records 129 | GROUP BY NHS_NUMBER 130 | 131 | -- COMMAND ---------- 132 | 133 | ---- Remove individual with conflicting smoking statuses 134 | -- add in age at time of journal 135 | CREATE OR REPLACE TEMPORARY VIEW recent_smoking_journals_no_conflicts AS 136 | SELECT * 137 | , FLOOR(DATEDIFF(DATE, DATE_OF_BIRTH) / 365.25) AS AGE_AT_TIME_OF_JOURNAL 138 | FROM smoking_Most_Recent_Records 139 | WHERE NHS_NUMBER IN (SELECT DISTINCT NHS_NUMBER FROM smoking_Journal_Counts WHERE COUNT_CATEGORIES = 1) 140 | 141 | -- COMMAND ---------- -------------------------------------------------------------------------------- /Smoking/README.md: -------------------------------------------------------------------------------- 1 | # BACKGROUND 2 | Smoking status may be of interest in relation to the COVID-19 pandemic therefore NHS Digital have written some code to understand the quality, coverage, and distribution of smoking status recording within the GDPPR dataset. 3 | 4 | Smoking status can be recorded via several methods within GDPPR, all of which use SNOMED codes within patient journal tables: 5 | 6 | 1. SNOMED codes for smoking status (no values) e.g. code with description of ‘Non-smoker (finding)’ 7 | 2. SNOMED codes for smoking status with associated values e.g. code for ‘Cigarette consumption (observable entity)’ with an associated value of 10 8 | 3. SNOMED codes for non-smokers which need categorising in terms of their smoking history e.g. never smoker or ex smoker 9 | 10 | # METHODOLOGY 11 | 12 | a) All smoking status SNOMED codes are categorised in the reference data based on previous analyses. _Please note it is possible that further smoking status codes may be added to the GDPPR extract and these would need categorising._ 13 | 14 | b) For methods 1 and 3, journals are assigned an [NHS data dictionary smoking status](https://datadictionary.nhs.uk/attributes/smoking_status.html) 15 | 16 | c) For method 2, journals are assigned an NHS data dictionary smoking status based on the associated value. Categorisations are based on initial analyses and discussions with clinicians. 17 | 18 | # NOTES 19 | 20 | GDPPR data = ```gdppr_database.gdppr_table``` 21 | 22 | smoking status snomed code reference data = ```reference_database.smoking_ref_data``` 23 | -------------------------------------------------------------------------------- /Smoking/smoking_status_reference_data.csv: -------------------------------------------------------------------------------- 1 | SNOMED_concept_ID,Code_description,CATEGORY 2 | 1092031000000108,Ex-smoker amount unknown (finding),non_smoker_findings_codes 3 | 1092041000000104,Ex-very heavy smoker (40+/day) (finding),non_smoker_findings_codes 4 | 1092071000000105,Ex-heavy smoker (20-39/day) (finding),non_smoker_findings_codes 5 | 1092091000000109,Ex-moderate smoker (10-19/day) (finding),non_smoker_findings_codes 6 | 1092111000000104,Ex-light smoker (1-9/day) (finding),non_smoker_findings_codes 7 | 1092131000000107,Ex-trivial smoker (<1/day) (finding),non_smoker_findings_codes 8 | 160617001,Stopped smoking (finding),non_smoker_findings_codes 9 | 160620009,Ex-pipe smoker (finding),non_smoker_findings_codes 10 | 160621008,Ex-cigar smoker (finding),non_smoker_findings_codes 11 | 160625004,Date ceased smoking (observable entity),Observable_Entities 12 | 228486009,Time since stopped smoking (observable entity),Observable_Entities 13 | 266921000,Ex-trivial cigarette smoker (<1/day) (finding),Value_Finding_Non_Smoker 14 | 266922007,Ex-light cigarette smoker (1-9/day) (finding),Value_Finding_Non_Smoker 15 | 266923002,Ex-moderate cigarette smoker (10-19/day) (finding),Value_Finding_Non_Smoker 16 | 266924008,Ex-heavy cigarette smoker (20-39/day) (finding),Value_Finding_Non_Smoker 17 | 266925009,Ex-very heavy cigarette smoker (40+/day) (finding),Value_Finding_Non_Smoker 18 | 266928006,Ex-cigarette smoker amount unknown (finding),non_smoker_findings_codes 19 | 281018007,Ex-cigarette smoker (finding),non_smoker_findings_codes 20 | 360890004,Intolerant ex-smoker (finding),non_smoker_findings_codes 21 | 360900008,Aggressive ex-smoker (finding),non_smoker_findings_codes 22 | 48031000119106,Ex-smoker for more than 1 year (finding),non_smoker_findings_codes 23 | 492191000000103,Ex roll-up cigarette smoker (finding),non_smoker_findings_codes 24 | 53896009,Tolerant ex-smoker (finding),non_smoker_findings_codes 25 | 735112005,Date ceased using moist tobacco (observable entity),Observable_Entities 26 | 735128000,Ex-smoker for less than 1 year (finding),non_smoker_findings_codes 27 | 8517006,Ex-smoker (finding),non_smoker_findings_codes 28 | 105539002,Non-smoker for personal reasons (finding),non_smoker_findings_codes 29 | 105540000,Non-smoker for religious reasons (finding),non_smoker_findings_codes 30 | 105541001,Non-smoker for medical reasons (finding),non_smoker_findings_codes 31 | 1092031000000108,Ex-smoker amount unknown (finding),non_smoker_findings_codes 32 | 1092041000000104,Ex-very heavy smoker (40+/day) (finding),non_smoker_findings_codes 33 | 1092071000000105,Ex-heavy smoker (20-39/day) (finding),non_smoker_findings_codes 34 | 1092091000000109,Ex-moderate smoker (10-19/day) (finding),non_smoker_findings_codes 35 | 1092111000000104,Ex-light smoker (1-9/day) (finding),non_smoker_findings_codes 36 | 1092131000000107,Ex-trivial smoker (<1/day) (finding),non_smoker_findings_codes 37 | 230058003,Pipe tobacco consumption (observable entity),Observable_Entities 38 | 1092481000000104,Number of calculated smoking pack years (observable entity),Observable_Entities 39 | 110483000,Tobacco user (finding),Value_Finding_Smoker 40 | 134406006,Smoking reduced (finding),Value_Finding_Smoker 41 | 160603005,Light cigarette smoker (1-9 cigs/day) (finding),Value_Finding_Smoker 42 | 160604004,Moderate cigarette smoker (10-19 cigs/day) (finding),Value_Finding_Smoker 43 | 160605003,Heavy cigarette smoker (20-39 cigs/day) (finding),Value_Finding_Smoker 44 | 160606002,Very heavy cigarette smoker (40+ cigs/day) (finding),Value_Finding_Smoker 45 | 160612007,Keeps trying to stop smoking (finding),smoker_findings_codes 46 | 160613002,Admitted tobacco consumption possibly untrue (finding),smoker_findings_codes 47 | 160616005,Trying to give up smoking (finding),smoker_findings_codes 48 | 160617001,Stopped smoking (finding),non_smoker_findings_codes 49 | 160618006,Current non-smoker (finding),Microtest_Error 50 | 160619003,Rolls own cigarettes (finding),smoker_findings_codes 51 | 160620009,Ex-pipe smoker (finding),non_smoker_findings_codes 52 | 160621008,Ex-cigar smoker (finding),non_smoker_findings_codes 53 | 160625004,Date ceased smoking (observable entity),Observable_Entities 54 | 203191000000107,Wants to stop smoking (finding),smoker_findings_codes 55 | 221000119102,Never smoked any substance (finding),non_smoker_findings_codes 56 | 225934006,Smokes in bed (finding),smoker_findings_codes 57 | 228486009,Time since stopped smoking (observable entity),Observable_Entities 58 | 230056004,Cigarette consumption (observable entity),Observable_Entities 59 | 230057008,Cigar consumption (observable entity),Observable_Entities 60 | 230059006,Occasional cigarette smoker (finding),Value_Finding_Smoker 61 | 230060001,Light cigarette smoker (finding),Value_Finding_Smoker 62 | 230062009,Moderate cigarette smoker (finding),Value_Finding_Smoker 63 | 230063004,Heavy cigarette smoker (finding),Value_Finding_Smoker 64 | 230064005,Very heavy cigarette smoker (finding),smoker_findings_codes 65 | 230065006,Chain smoker (finding),smoker_findings_codes 66 | 266918002,Tobacco smoking consumption (observable entity),Observable_Entities 67 | 266919005,Never smoked tobacco (finding),Microtest_Error 68 | 266920004,Trivial cigarette smoker (less than one cigarette/day) (finding),Value_Finding_Smoker 69 | 266921000,Ex-trivial cigarette smoker (<1/day) (finding),Value_Finding_Non_Smoker 70 | 266922007,Ex-light cigarette smoker (1-9/day) (finding),Value_Finding_Non_Smoker 71 | 266923002,Ex-moderate cigarette smoker (10-19/day) (finding),Value_Finding_Non_Smoker 72 | 266924008,Ex-heavy cigarette smoker (20-39/day) (finding),Value_Finding_Non_Smoker 73 | 266925009,Ex-very heavy cigarette smoker (40+/day) (finding),Value_Finding_Non_Smoker 74 | 266928006,Ex-cigarette smoker amount unknown (finding),non_smoker_findings_codes 75 | 266929003,Smoking started (finding),smoker_findings_codes 76 | 281018007,Ex-cigarette smoker (finding),non_smoker_findings_codes 77 | 308438006,Smoking restarted (finding),smoker_findings_codes 78 | 360890004,Intolerant ex-smoker (finding),non_smoker_findings_codes 79 | 360900008,Aggressive ex-smoker (finding),non_smoker_findings_codes 80 | 360918006,Aggressive non-smoker (finding),non_smoker_findings_codes 81 | 360929005,Intolerant non-smoker (finding),non_smoker_findings_codes 82 | 365982000,Finding of tobacco smoking consumption (finding),Value_Finding_Smoker 83 | 394871007,Thinking about stopping smoking (finding),smoker_findings_codes 84 | 394872000,Ready to stop smoking (finding),smoker_findings_codes 85 | 394873005,Not interested in stopping smoking (finding),smoker_findings_codes 86 | 401159003,Reason for restarting smoking (observable entity),Observable_Entities 87 | 401201003,Cigarette pack-years (observable entity),Observable_Entities 88 | 405746006,Current non smoker but past smoking history unknown (finding),non_smoker_findings_codes 89 | 413173009,Minutes from waking to first tobacco consumption (observable entity),Observable_Entities 90 | 428041000124106,Occasional tobacco smoker (finding),smoker_findings_codes 91 | 446172000,Failed attempt to stop smoking (finding),smoker_findings_codes 92 | 449345000,Smoked before confirmation of pregnancy (finding),smoker_findings_codes 93 | 449368009,Stopped smoking during pregnancy (finding),non_smoker_findings_codes 94 | 449369001,Stopped smoking before pregnancy (finding),non_smoker_findings_codes 95 | 449868002,Smokes tobacco daily (finding),smoker_findings_codes 96 | 48031000119106,Ex-smoker for more than 1 year (finding),non_smoker_findings_codes 97 | 492191000000103,Ex roll-up cigarette smoker (finding),non_smoker_findings_codes 98 | 517211000000106,Recently stopped smoking (finding),non_smoker_findings_codes 99 | 53896009,Tolerant ex-smoker (finding),non_smoker_findings_codes 100 | 56578002,Moderate smoker (20 or less per day) (finding),Value_Finding_Smoker 101 | 56771006,Heavy smoker (over 20 per day) (finding),Value_Finding_Smoker 102 | 59978006,Cigar smoker (finding),Value_Finding_Smoker 103 | 65568007,Cigarette smoker (finding),Value_Finding_Smoker 104 | 735112005,Date ceased using moist tobacco (observable entity),Observable_Entities 105 | 735128000,Ex-smoker for less than 1 year (finding),non_smoker_findings_codes 106 | 77176002,Smoker (finding),Value_Finding_Smoker 107 | 82302008,Pipe smoker (finding),Value_Finding_Smoker 108 | 836001000000109,Waterpipe tobacco consumption (observable entity),Observable_Entities 109 | 8392000,Non-smoker (finding),non_smoker_findings_codes 110 | 8517006,Ex-smoker (finding),non_smoker_findings_codes 111 | 87739003,Tolerant non-smoker (finding),non_smoker_findings_codes 112 | 221000119102,Never smoked any substance (finding),non_smoker_findings_codes 113 | 266919005,Never smoked tobacco (finding),Microtest_Error 114 | 1092031000000108,Ex-smoker amount unknown (finding),non_smoker_findings_codes 115 | 1092041000000104,Ex-very heavy smoker (40+/day) (finding),non_smoker_findings_codes 116 | 1092071000000105,Ex-heavy smoker (20-39/day) (finding),non_smoker_findings_codes 117 | 1092091000000109,Ex-moderate smoker (10-19/day) (finding),non_smoker_findings_codes 118 | 1092111000000104,Ex-light smoker (1-9/day) (finding),non_smoker_findings_codes 119 | 1092131000000107,Ex-trivial smoker (<1/day) (finding),non_smoker_findings_codes 120 | 134406006,Smoking reduced (finding),Value_Finding_Smoker 121 | 160603005,Light cigarette smoker (1-9 cigs/day) (finding),Value_Finding_Smoker 122 | 160604004,Moderate cigarette smoker (10-19 cigs/day) (finding),Value_Finding_Smoker 123 | 160605003,Heavy cigarette smoker (20-39 cigs/day) (finding),Value_Finding_Smoker 124 | 160606002,Very heavy cigarette smoker (40+ cigs/day) (finding),Value_Finding_Smoker 125 | 160612007,Keeps trying to stop smoking (finding),smoker_findings_codes 126 | 160613002,Admitted tobacco consumption possibly untrue (finding),smoker_findings_codes 127 | 160616005,Trying to give up smoking (finding),smoker_findings_codes 128 | 160617001,Stopped smoking (finding),non_smoker_findings_codes 129 | 160619003,Rolls own cigarettes (finding),smoker_findings_codes 130 | 160620009,Ex-pipe smoker (finding),non_smoker_findings_codes 131 | 160621008,Ex-cigar smoker (finding),non_smoker_findings_codes 132 | 160625004,Date ceased smoking (observable entity),Observable_Entities 133 | 203191000000107,Wants to stop smoking (finding),smoker_findings_codes 134 | 221000119102,Never smoked any substance (finding),non_smoker_findings_codes 135 | 225934006,Smokes in bed (finding),smoker_findings_codes 136 | 228486009,Time since stopped smoking (observable entity),Observable_Entities 137 | 230056004,Cigarette consumption (observable entity),Observable_Entities 138 | 230057008,Cigar consumption (observable entity),Observable_Entities 139 | 230058003,Pipe tobacco consumption (observable entity),Observable_Entities 140 | 230059006,Occasional cigarette smoker (finding),Value_Finding_Smoker 141 | 230060001,Light cigarette smoker (finding),Value_Finding_Smoker 142 | 230062009,Moderate cigarette smoker (finding),Value_Finding_Smoker 143 | 230063004,Heavy cigarette smoker (finding),Value_Finding_Smoker 144 | 230064005,Very heavy cigarette smoker (finding),smoker_findings_codes 145 | 230065006,Chain smoker (finding),smoker_findings_codes 146 | 266918002,Tobacco smoking consumption (observable entity),Observable_Entities 147 | 266919005,Never smoked tobacco (finding),Microtest_Error 148 | 266920004,Trivial cigarette smoker (less than one cigarette/day) (finding),Value_Finding_Smoker 149 | 266921000,Ex-trivial cigarette smoker (<1/day) (finding),Value_Finding_Non_Smoker 150 | 266922007,Ex-light cigarette smoker (1-9/day) (finding),Value_Finding_Non_Smoker 151 | 266923002,Ex-moderate cigarette smoker (10-19/day) (finding),Value_Finding_Non_Smoker 152 | 266924008,Ex-heavy cigarette smoker (20-39/day) (finding),Value_Finding_Non_Smoker 153 | 266925009,Ex-very heavy cigarette smoker (40+/day) (finding),Value_Finding_Non_Smoker 154 | 266928006,Ex-cigarette smoker amount unknown (finding),non_smoker_findings_codes 155 | 266929003,Smoking started (finding),smoker_findings_codes 156 | 281018007,Ex-cigarette smoker (finding),non_smoker_findings_codes 157 | 308438006,Smoking restarted (finding),smoker_findings_codes 158 | 360890004,Intolerant ex-smoker (finding),non_smoker_findings_codes 159 | 360900008,Aggressive ex-smoker (finding),non_smoker_findings_codes 160 | 394871007,Thinking about stopping smoking (finding),smoker_findings_codes 161 | 394872000,Ready to stop smoking (finding),smoker_findings_codes 162 | 394873005,Not interested in stopping smoking (finding),smoker_findings_codes 163 | 401159003,Reason for restarting smoking (observable entity),Observable_Entities 164 | 401201003,Cigarette pack-years (observable entity),Observable_Entities 165 | 413173009,Minutes from waking to first tobacco consumption (observable entity),Observable_Entities 166 | 428041000124106,Occasional tobacco smoker (finding),smoker_findings_codes 167 | 446172000,Failed attempt to stop smoking (finding),smoker_findings_codes 168 | 449868002,Smokes tobacco daily (finding),smoker_findings_codes 169 | 48031000119106,Ex-smoker for more than 1 year (finding),non_smoker_findings_codes 170 | 492191000000103,Ex roll-up cigarette smoker (finding),non_smoker_findings_codes 171 | 53896009,Tolerant ex-smoker (finding),non_smoker_findings_codes 172 | 56578002,Moderate smoker (20 or less per day) (finding),Value_Finding_Smoker 173 | 56771006,Heavy smoker (over 20 per day) (finding),Value_Finding_Smoker 174 | 59978006,Cigar smoker (finding),Value_Finding_Smoker 175 | 65568007,Cigarette smoker (finding),Value_Finding_Smoker 176 | 735112005,Date ceased using moist tobacco (observable entity),Observable_Entities 177 | 735128000,Ex-smoker for less than 1 year (finding),non_smoker_findings_codes 178 | 77176002,Smoker (finding),Value_Finding_Smoker 179 | 82302008,Pipe smoker (finding),Value_Finding_Smoker 180 | 836001000000109,Waterpipe tobacco consumption (observable entity),Observable_Entities 181 | 8517006,Ex-smoker (finding),non_smoker_findings_codes 182 | 134406006,Smoking reduced (finding),Value_Finding_Smoker 183 | 160603005,Light cigarette smoker (1-9 cigs/day) (finding),Value_Finding_Smoker 184 | 160604004,Moderate cigarette smoker (10-19 cigs/day) (finding),Value_Finding_Smoker 185 | 160605003,Heavy cigarette smoker (20-39 cigs/day) (finding),Value_Finding_Smoker 186 | 160606002,Very heavy cigarette smoker (40+ cigs/day) (finding),Value_Finding_Smoker 187 | 160612007,Keeps trying to stop smoking (finding),smoker_findings_codes 188 | 160613002,Admitted tobacco consumption possibly untrue (finding),smoker_findings_codes 189 | 160616005,Trying to give up smoking (finding),smoker_findings_codes 190 | 160619003,Rolls own cigarettes (finding),smoker_findings_codes 191 | 203191000000107,Wants to stop smoking (finding),smoker_findings_codes 192 | 225934006,Smokes in bed (finding),smoker_findings_codes 193 | 230056004,Cigarette consumption (observable entity),Observable_Entities 194 | 230057008,Cigar consumption (observable entity),Observable_Entities 195 | 230058003,Pipe tobacco consumption (observable entity),Observable_Entities 196 | 230059006,Occasional cigarette smoker (finding),Value_Finding_Smoker 197 | 230060001,Light cigarette smoker (finding),Value_Finding_Smoker 198 | 230062009,Moderate cigarette smoker (finding),Value_Finding_Smoker 199 | 230063004,Heavy cigarette smoker (finding),Value_Finding_Smoker 200 | 230064005,Very heavy cigarette smoker (finding),smoker_findings_codes 201 | 230065006,Chain smoker (finding),smoker_findings_codes 202 | 266918002,Tobacco smoking consumption (observable entity),Observable_Entities 203 | 266920004,Trivial cigarette smoker (less than one cigarette/day) (finding),Value_Finding_Smoker 204 | 266929003,Smoking started (finding),smoker_findings_codes 205 | 308438006,Smoking restarted (finding),smoker_findings_codes 206 | 394871007,Thinking about stopping smoking (finding),smoker_findings_codes 207 | 394872000,Ready to stop smoking (finding),smoker_findings_codes 208 | 394873005,Not interested in stopping smoking (finding),smoker_findings_codes 209 | 836001000000109,Waterpipe tobacco consumption (observable entity),Observable_Entities 210 | 82302008,Pipe smoker (finding),Value_Finding_Smoker 211 | 401159003,Reason for restarting smoking (observable entity),Observable_Entities 212 | 413173009,Minutes from waking to first tobacco consumption (observable entity),Observable_Entities 213 | 428041000124106,Occasional tobacco smoker (finding),smoker_findings_codes 214 | 446172000,Failed attempt to stop smoking (finding),smoker_findings_codes 215 | 449868002,Smokes tobacco daily (finding),smoker_findings_codes 216 | 56578002,Moderate smoker (20 or less per day) (finding),Value_Finding_Smoker 217 | 56771006,Heavy smoker (over 20 per day) (finding),Value_Finding_Smoker 218 | 59978006,Cigar smoker (finding),Value_Finding_Smoker 219 | 65568007,Cigarette smoker (finding),Value_Finding_Smoker 220 | 77176002,Smoker (finding),Value_Finding_Smoker 221 | --------------------------------------------------------------------------------