├── data └── .gitkeep ├── sql ├── mimic-iii │ ├── .gitkeep │ ├── time_of_vent_and_demographic.sql │ ├── 1hour_duration.sql │ ├── vent_settings_details.sql │ └── hourly_lab_gcs_vitals.sql ├── mimic-iv │ ├── .gitkeep │ ├── hospitalstay │ │ ├── outcome.sql │ │ └── demographic.sql │ ├── measurement │ │ ├── inflammation.sql │ │ ├── cardiac_marker.sql │ │ ├── rhythm.sql │ │ ├── coagulation.sql │ │ ├── covid.sql │ │ ├── complete_blood_count.sql │ │ ├── enzyme.sql │ │ ├── blood_differential.sql │ │ ├── chemistry.sql │ │ ├── vitalsign.sql │ │ ├── ventilator_setting.sql │ │ ├── oxygen_delivery.sql │ │ ├── gcs.sql │ │ └── bg.sql │ ├── generate_tables.sh │ └── medication │ │ ├── enoxaparin_administration.sql │ │ ├── enoxaparin.sql │ │ ├── heparin_administration.sql │ │ └── heparin.sql └── mimic-covid │ ├── generate_tables.sh │ └── ventilator_duration.sql ├── environment.yml ├── .gitignore └── README.md /data/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sql/mimic-iii/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sql/mimic-iv/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sql/mimic-iv/hospitalstay/outcome.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | a.subject_id 3 | , i.stay_id 4 | , a.dischtime as hospital_discharge_time 5 | , a.hospital_expire_flag 6 | FROM mimic_icu.icustays i 7 | INNER JOIN mimic_core.admissions a 8 | ON i.hadm_id = a.hadm_id 9 | ORDER BY subject_id, stay_id; -------------------------------------------------------------------------------- /sql/mimic-iii/time_of_vent_and_demographic.sql: -------------------------------------------------------------------------------- 1 | SELECT v.* , w.weight , h.height,d.starttime,d.endtime,d.ventnum, d.duration_hours FROM `physionet-data.mimiciii_derived.icustay_detail` as v 2 | left join `physionet-data.mimiciii_derived.ventdurations` as d 3 | on d.icustay_id = v.icustay_id 4 | left join `physionet-data.mimiciii_derived.weightfirstday` as w 5 | on w.icustay_id = v.icustay_id 6 | left join `physionet-data.mimiciii_derived.heightfirstday` as h 7 | on h.icustay_id = v.icustay_id 8 | 9 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: covid-19 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - pymssql=2.1.4 7 | - numpy=1.18.1 8 | - python=3.7 9 | - sqlalchemy=1.3.3 10 | - seaborn=0.9.0 11 | - scikit-learn=0.22.2 12 | - pandas=1.0.3 13 | - psycopg2=2.7.6.1 14 | - matplotlib=3.2.1 15 | - pytest=4.3.1 16 | - jupyter=1.0.0 17 | - lxml=4.3.3 18 | - tqdm=4.31.1 19 | - autopep8 20 | - pylint=2.3.1 21 | - flake8=3.7.7 22 | - regex=2019.11.1 23 | - sympy=1.4 24 | - pip=20.0.2 25 | - shap=0.34.0 26 | - pip: 27 | - xgboost=1.0.2 -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/inflammation.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | MAX(subject_id) AS subject_id 3 | , MAX(hadm_id) AS hadm_id 4 | , MAX(charttime) AS charttime 5 | , le.specimen_id 6 | -- convert from itemid into a meaningful column 7 | , MAX(CASE WHEN itemid = 50889 THEN valuenum ELSE NULL END) AS crp 8 | , MAX(CASE WHEN itemid = 51652 THEN valuenum ELSE NULL END) AS crp_high_sens 9 | , CAST(NULL AS NUMERIC) AS il6 10 | , CAST(NULL AS NUMERIC) AS procalcitonin 11 | FROM mimic_hosp.labevents le 12 | WHERE le.itemid IN 13 | ( 14 | 50889, -- crp 15 | 51652 -- high sensitivity CRP 16 | ) 17 | AND valuenum IS NOT NULL 18 | -- lab values cannot be 0 and cannot be negative 19 | AND valuenum > 0 20 | GROUP BY le.specimen_id 21 | ORDER BY subject_id, charttime; -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/cardiac_marker.sql: -------------------------------------------------------------------------------- 1 | -- begin query that extracts the data 2 | SELECT 3 | MAX(subject_id) AS subject_id 4 | , MAX(hadm_id) AS hadm_id 5 | , MAX(charttime) AS charttime 6 | , le.specimen_id 7 | -- convert from itemid into a meaningful column 8 | , MAX(CASE WHEN itemid = 51002 THEN value ELSE NULL END) AS troponin_i 9 | , MAX(CASE WHEN itemid = 52598 THEN value ELSE NULL END) AS troponin_i_poc 10 | , MAX(CASE WHEN itemid = 51003 THEN value ELSE NULL END) AS troponin_t 11 | , MAX(CASE WHEN itemid = 50911 THEN valuenum ELSE NULL END) AS ck_mb 12 | FROM mimic_hosp.labevents le 13 | WHERE le.itemid IN 14 | ( 15 | 51002, -- Troponin I 16 | 52598, -- Troponin I, point of care 17 | 51003, -- Troponin T 18 | 50911 -- Creatinine Kinase, MB isoenzyme 19 | ) 20 | GROUP BY le.specimen_id 21 | ORDER BY subject_id, charttime; 22 | -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/rhythm.sql: -------------------------------------------------------------------------------- 1 | -- Heart rhythm related documentation 2 | select 3 | ce.subject_id 4 | , ce.charttime 5 | , MAX(case when itemid = 220048 THEN value ELSE NULL END) AS heart_rhythm 6 | , MAX(case when itemid = 224650 THEN value ELSE NULL END) AS ectopy_type 7 | , MAX(case when itemid = 224651 THEN value ELSE NULL END) AS ectopy_frequency 8 | , MAX(case when itemid = 226479 THEN value ELSE NULL END) AS ectopy_type_secondary 9 | , MAX(case when itemid = 226480 THEN value ELSE NULL END) AS ectopy_frequency_secondary 10 | FROM mimic_icu.chartevents ce 11 | where ce.stay_id IS NOT NULL 12 | and ce.itemid in 13 | ( 14 | 220048, -- Heart Rhythm 15 | 224650, -- Ectopy Type 1 16 | 224651, -- Ectopy Frequency 1 17 | 226479, -- Ectopy Type 2 18 | 226480 -- Ectopy Frequency 2 19 | ) 20 | GROUP BY ce.subject_id, ce.charttime 21 | order by ce.subject_id, ce.charttime; 22 | -------------------------------------------------------------------------------- /sql/mimic-iii/1hour_duration.sql: -------------------------------------------------------------------------------- 1 | with co_stg as 2 | ( 3 | select icustay_id, hadm_id 4 | , DATETIME_TRUNC( intime, hour) as intime 5 | , outtime 6 | , generate_array 7 | ( 8 | 1 , CEIL(DATETIME_DIFF(outtime, intime, HOUR)) 9 | ) as hr 10 | FROM `physionet-data.mimiciii_clinical.icustays` ie 11 | inner join `physionet-data.mimiciii_clinical.patients` pt 12 | on ie.subject_id = pt.subject_id 13 | -- filter to adults by removing admissions with DOB ~= admission time 14 | where ie.intime > (DATETIME_ADD(pt.dob, INTERVAL 1 YEAR)) 15 | ) 16 | -- add in the charttime column 17 | , co as 18 | ( 19 | select icustay_id, hadm_id, intime, outtime 20 | , DATETIME_ADD(intime, INTERVAL CAST(hr_flat AS INT64)-1 HOUR) as starttime 21 | , DATETIME_ADD(intime, INTERVAL CAST(hr_flat AS INT64) HOUR) as endtime 22 | , CAST(hr_flat AS INT64) as hour 23 | from co_stg 24 | CROSS JOIN UNNEST(co_stg.hr) AS hr_flat 25 | )select * from co 26 | order by icustay_id , hour 27 | -------------------------------------------------------------------------------- /sql/mimic-iv/hospitalstay/demographic.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | a.subject_id 3 | , a.hadm_id 4 | -- TODO: shift anchor age to admission date to get admission age 5 | , pt.anchor_age AS age 6 | , CASE WHEN pt.gender = 'F' THEN 1 ELSE 0 END AS is_female 7 | -- race/ethnicity 8 | , CASE WHEN a.ethnicity = 'WHITE' THEN 1 ELSE 0 END AS ethnicity_white 9 | , CASE WHEN a.ethnicity = 'BLACK/AFRICAN AMERICAN' THEN 1 ELSE 0 END AS ethnicity_black 10 | , CASE WHEN a.ethnicity = 'HISPANIC/LATINO' THEN 1 ELSE 0 END AS ethnicity_hispanic 11 | -- TODO: BMI on admission 12 | -- one option: 226531 in chartevents has weight - has noisy data tho 13 | -- pregnancy is sourced from ICU data 14 | , CAST(COALESCE(prg.pregnant, 0) AS INT64) AS pregnant 15 | FROM mimic_core.admissions a 16 | INNER JOIN mimic_core.patients pt 17 | ON a.subject_id = pt.subject_id 18 | LEFT JOIN ( 19 | SELECT hadm_id, MAX(valuenum) as pregnant 20 | FROM mimic_icu.chartevents 21 | WHERE itemid = 225082 -- Pregnant 22 | GROUP BY 1 23 | ) prg 24 | ON a.hadm_id = prg.hadm_id 25 | ORDER BY subject_id; -------------------------------------------------------------------------------- /sql/mimic-covid/generate_tables.sh: -------------------------------------------------------------------------------- 1 | # set the project 2 | # gcloud config set project bidmc-covid-19 3 | 4 | export TARGET_DATASET=mimic_covid_derived 5 | 6 | # run the MIMIC-IV queries replacing the dataset with MIMIC-COVID datasets 7 | export MIMICIV_PATH='../mimic-iv' 8 | export DATASET_REGEX='s/mimic_(.+)/mimic_covid_\1/g' 9 | 10 | # generate tables in pivoted subfolder 11 | for d in measurement medication; 12 | do 13 | for fn in `ls ${MIMICIV_PATH}/$d`; 14 | do 15 | # table name is file name minus extension 16 | tbl=`echo $fn | cut -d. -f1` 17 | # do not run bg_art until all other queries are run 18 | if [[ tbl != 'bg_art' ]]; then 19 | echo "${d}/${fn}" 20 | cat ${MIMICIV_PATH}/${d}/${fn} | sed -E $DATASET_REGEX | bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.${tbl} 21 | fi 22 | done 23 | done 24 | 25 | # vasopressor medications also have a specific order 26 | # cat medication/vasopressor.sql | sed -E $DATASET_REGEX | bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.vasopressor -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/coagulation.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | MAX(subject_id) AS subject_id 3 | , MAX(hadm_id) AS hadm_id 4 | , MAX(charttime) AS charttime 5 | , le.specimen_id 6 | -- convert from itemid into a meaningful column 7 | , MAX(CASE WHEN itemid = 51196 THEN valuenum ELSE NULL END) AS d_dimer 8 | , MAX(CASE WHEN itemid = 51214 THEN valuenum ELSE NULL END) AS fibrinogen 9 | , MAX(CASE WHEN itemid = 51297 THEN valuenum ELSE NULL END) AS thrombin 10 | , MAX(CASE WHEN itemid = 51237 THEN valuenum ELSE NULL END) AS inr 11 | , MAX(CASE WHEN itemid = 51274 THEN valuenum ELSE NULL END) AS pt 12 | , MAX(CASE WHEN itemid = 51275 THEN valuenum ELSE NULL END) AS ptt 13 | FROM mimic_hosp.labevents le 14 | WHERE le.itemid IN 15 | ( 16 | -- 51149, 52750, 52072, 52073 -- Bleeding Time, no data as of 2020/04/01 17 | 51196, -- D-Dimer 18 | 51214, -- Fibrinogen 19 | -- 51280, 52893, -- Reptilase Time, Not measured as of 2020/04/01 20 | -- 51281, 52161, -- Reptilase Time Control, Not measured as of 2020/04/01 21 | 51297, -- thrombin 22 | 51237, -- INR 23 | 51274, -- PT 24 | 51275 -- PTT 25 | ) 26 | AND valuenum IS NOT NULL 27 | GROUP BY le.specimen_id 28 | ORDER BY subject_id, charttime; 29 | -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/covid.sql: -------------------------------------------------------------------------------- 1 | -- begin query that extracts the data 2 | SELECT 3 | MAX(subject_id) AS subject_id 4 | , MAX(hadm_id) AS hadm_id 5 | , MAX(charttime) AS charttime 6 | , le.specimen_id 7 | -- the case statement for covid_status misses ~20 labs 8 | -- these have conflicting cov2/cov4 reports (usually cov2 is correct) 9 | -- other rows have cov5 10 | -- can impute as needed 11 | , MAX(CASE WHEN itemid = 51853 AND storetime IS NULL THEN 'PENDING' 12 | WHEN itemid = 51853 THEN value 13 | ELSE NULL 14 | END) AS covid_status 15 | , MAX(CASE WHEN itemid = 51890 AND storetime IS NULL THEN 'PENDING' 16 | WHEN itemid = 51890 THEN value 17 | ELSE NULL 18 | END) AS covid_rapid 19 | , MAX(CASE WHEN itemid = 51849 THEN value ELSE NULL END) AS cov1 20 | , MAX(CASE WHEN itemid = 51908 THEN value ELSE NULL END) AS cov2 21 | , MAX(CASE WHEN itemid = 51850 THEN value ELSE NULL END) AS cov3 22 | , MAX(CASE WHEN itemid = 51909 THEN value ELSE NULL END) AS cov4 23 | , MAX(CASE WHEN itemid = 51851 THEN value ELSE NULL END) AS cov5 24 | , MAX(CASE WHEN itemid = 51852 THEN value ELSE NULL END) AS cov6 25 | FROM mimic_hosp.labevents le 26 | WHERE le.itemid IN 27 | ( 28 | 51849, -- COV1 29 | 51908, -- COV2 30 | 51850, -- COV3 31 | 51909, -- COV4 32 | 51851, -- COV5 33 | 51852, -- COV6 34 | 51853, -- COVID-19 35 | 51890 -- Rapid COVID 36 | ) 37 | GROUP BY le.specimen_id 38 | ORDER BY charttime; 39 | -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/complete_blood_count.sql: -------------------------------------------------------------------------------- 1 | -- begin query that extracts the data 2 | SELECT 3 | MAX(subject_id) AS subject_id 4 | , MAX(hadm_id) AS hadm_id 5 | , MAX(charttime) AS charttime 6 | , le.specimen_id 7 | -- convert from itemid into a meaningful column 8 | , MAX(CASE WHEN itemid = 51221 THEN valuenum ELSE NULL END) AS hematocrit 9 | , MAX(CASE WHEN itemid = 51222 THEN valuenum ELSE NULL END) AS hemoglobin 10 | , MAX(CASE WHEN itemid = 51248 THEN valuenum ELSE NULL END) AS mch 11 | , MAX(CASE WHEN itemid = 51249 THEN valuenum ELSE NULL END) AS mchc 12 | , MAX(CASE WHEN itemid = 51250 THEN valuenum ELSE NULL END) AS mcv 13 | , MAX(CASE WHEN itemid = 51265 THEN valuenum ELSE NULL END) AS platelets 14 | , MAX(CASE WHEN itemid = 51279 THEN valuenum ELSE NULL END) AS rbc 15 | , MAX(CASE WHEN itemid = 51277 THEN valuenum ELSE NULL END) AS rdw 16 | , MAX(CASE WHEN itemid = 52159 THEN valuenum ELSE NULL END) AS rdwsd 17 | , MAX(CASE WHEN itemid = 51301 THEN valuenum ELSE NULL END) AS wbc 18 | FROM mimic_hosp.labevents le 19 | WHERE le.itemid IN 20 | ( 21 | 51221, -- hematocrit 22 | 51222, -- hemoglobin 23 | 51248, -- MCH 24 | 51249, -- MCHC 25 | 51250, -- MCV 26 | 51265, -- platelets 27 | 51279, -- RBC 28 | 51277, -- RDW 29 | 52159, -- RDW SD 30 | 51301 -- WBC 31 | 32 | ) 33 | AND valuenum IS NOT NULL 34 | -- lab values cannot be 0 and cannot be negative 35 | AND valuenum > 0 36 | GROUP BY le.specimen_id 37 | ORDER BY subject_id, charttime; 38 | -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/enzyme.sql: -------------------------------------------------------------------------------- 1 | -- begin query that extracts the data 2 | SELECT 3 | MAX(subject_id) AS subject_id 4 | , MAX(hadm_id) AS hadm_id 5 | , MAX(charttime) AS charttime 6 | , le.specimen_id 7 | -- convert from itemid into a meaningful column 8 | , MAX(CASE WHEN itemid = 50861 THEN valuenum ELSE NULL END) AS alt 9 | , MAX(CASE WHEN itemid = 50863 THEN valuenum ELSE NULL END) AS alkphos 10 | , MAX(CASE WHEN itemid = 50878 THEN valuenum ELSE NULL END) AS ast 11 | , MAX(CASE WHEN itemid = 50867 THEN valuenum ELSE NULL END) AS amylase 12 | , MAX(CASE WHEN itemid = 50885 THEN valuenum ELSE NULL END) AS bilirubin_total 13 | , MAX(CASE WHEN itemid = 50883 THEN valuenum ELSE NULL END) AS bilirubin_direct 14 | , MAX(CASE WHEN itemid = 50884 THEN valuenum ELSE NULL END) AS bilirubin_indirect 15 | , MAX(CASE WHEN itemid = 50910 THEN valuenum ELSE NULL END) AS ck_cpk 16 | , MAX(CASE WHEN itemid = 50911 THEN valuenum ELSE NULL END) AS ck_mb 17 | , MAX(CASE WHEN itemid = 50954 THEN valuenum ELSE NULL END) AS ld_ldh 18 | FROM mimic_hosp.labevents le 19 | WHERE le.itemid IN 20 | ( 21 | 50861, -- ALT 22 | 50863, -- Alk Phosp 23 | 50878, -- AST 24 | 50867, -- Amylase 25 | 50885, -- total bili 26 | 50884, -- indirect bili 27 | 50883, -- direct bili 28 | 50910, -- ck_cpk 29 | 50911, -- CK-MB 30 | 50954 -- ld_ldh 31 | ) 32 | AND valuenum IS NOT NULL 33 | -- lab values cannot be 0 and cannot be negative 34 | AND valuenum > 0 35 | GROUP BY le.specimen_id 36 | ORDER BY subject_id, charttime; 37 | -------------------------------------------------------------------------------- /sql/mimic-iv/generate_tables.sh: -------------------------------------------------------------------------------- 1 | # set the project 2 | gcloud config set project bidmc-covid-19 3 | 4 | export TARGET_DATASET=mimic_derived 5 | 6 | # generate tables in pivoted subfolder 7 | for d in measurement medication; 8 | do 9 | for fn in `ls $d`; 10 | do 11 | # table name is file name minus extension 12 | tbl=`echo $fn | cut -d. -f1` 13 | # do not run bg_art until all other queries are run 14 | #if [[ tbl != 'bg_art' ]]; then 15 | echo "${d}/${fn}" 16 | cat ${d}/${fn} | bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.${tbl} 17 | #fi 18 | done 19 | done 20 | 21 | # vasopressor medications also have a specific order 22 | # cat medication/vasopressor.sql | bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.vasopressor 23 | # cat medication/vasopressor_duration.sql | bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.vasopressor_duration 24 | 25 | # defines stay_id and inclusion criteria for study 26 | # tbl='vitalsign'; cat measurement/${tbl}.sql | bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.${tbl} 27 | # cat measurement/covid.sql | bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.covid 28 | # cat cohort/cohort.sql | bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.cohort 29 | 30 | # cat hospitalstay/demographic.sql | bq query --use_legacy_sql=False --replace --destination_table=${TARGET_DATASET}.demographic 31 | -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/blood_differential.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | MAX(subject_id) AS subject_id 3 | , MAX(hadm_id) AS hadm_id 4 | , MAX(charttime) AS charttime 5 | , le.specimen_id 6 | -- convert from itemid into a meaningful column 7 | , MAX(CASE WHEN itemid = 52056 THEN valuenum ELSE NULL END) AS abs_basophils 8 | , MAX(CASE WHEN itemid = 52060 THEN valuenum ELSE NULL END) AS abs_eosinophils 9 | , MAX(CASE 10 | WHEN itemid = 51133 THEN valuenum 11 | -- convert #/uL to K/uL 12 | WHEN itemid = 52733 THEN valuenum / 1000.0 13 | ELSE NULL END) AS abs_lymphocytes 14 | , MAX(CASE WHEN itemid = 52061 THEN valuenum ELSE NULL END) AS abs_monocytes 15 | , MAX(CASE WHEN itemid = 52062 THEN valuenum ELSE NULL END) AS abs_neutrophils 16 | , MAX(CASE WHEN itemid = 51143 THEN valuenum ELSE NULL END) AS atyps 17 | , MAX(CASE WHEN itemid = 51144 THEN valuenum ELSE NULL END) AS bands 18 | , MAX(CASE WHEN itemid = 52122 THEN valuenum ELSE NULL END) AS imm_granulocytes 19 | , MAX(CASE WHEN itemid = 51251 THEN valuenum ELSE NULL END) AS metas 20 | , MAX(CASE WHEN itemid = 51257 THEN valuenum ELSE NULL END) AS nrbc 21 | FROM mimic_hosp.labevents le 22 | WHERE le.itemid IN 23 | ( 24 | 52056, -- Absolute basophil count 25 | 52060, -- Absolute Eosinophil count 26 | 51133, -- Absolute Lymphocyte Count, K/uL 27 | 52733, -- Absolute Lymphocyte Count, #/uL 28 | 52061, -- Absolute Monocyte Count 29 | 52062, -- Absolute Neutrophil Count 30 | 51143, -- Atypical lymphocytes 31 | 51144, -- Bands (%) 32 | 52122, -- Immature granulocytes (%) 33 | 51251, -- Metamyelocytes 34 | 51257 -- Nucleated RBC 35 | ) 36 | AND valuenum IS NOT NULL 37 | -- lab values cannot be 0 and cannot be negative 38 | AND valuenum > 0 39 | GROUP BY le.specimen_id 40 | ORDER BY subject_id, charttime; 41 | -------------------------------------------------------------------------------- /sql/mimic-iv/medication/enoxaparin_administration.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | e.subject_id 3 | , e.charttime 4 | , e.event_txt 5 | , e.storetime 6 | -- debug columns 7 | , e.emar_id 8 | , e.emar_seq 9 | , e.medication 10 | , e.poe_id 11 | -- , e.pharmacy_id 12 | , det.dose_due 13 | , det.dose_due_unit 14 | , det.complete_dose_not_given 15 | , det.will_remainder_of_dose_be_given 16 | -- grouped administration info 17 | , dgrp.dose_given 18 | , dgrp.dose_given_unit 19 | , dgrp.dose_given_string 20 | , dgrp.product_amount_given 21 | , dgrp.product_unit 22 | FROM mimic_hosp.emar e 23 | -- dose due is in the "parent" of the emar detail group 24 | -- where parent_field_ordinal is null 25 | -- only 1 of these per emar 26 | LEFT JOIN mimic_hosp.emar_detail det 27 | ON e.emar_id = det.emar_id 28 | AND det.parent_field_ordinal IS NULL 29 | -- group together individual administrations to get total administered 30 | -- since dose is sometimes a string, include a string concat for debug purposes 31 | LEFT JOIN ( 32 | SELECT emar_id 33 | , SUM(CASE 34 | WHEN REGEXP_CONTAINS(dose_given, r'^[0-9]*\.?[0-9]*$') 35 | THEN CAST(dose_given AS NUMERIC) 36 | ELSE NULL END) AS dose_given 37 | , STRING_AGG(dose_given) AS dose_given_string 38 | , MAX(dose_given_unit) AS dose_given_unit 39 | , SUM(CASE 40 | WHEN REGEXP_CONTAINS(product_amount_given, r'^[0-9]*\.?[0-9]*$') 41 | THEN CAST(product_amount_given AS NUMERIC) 42 | ELSE NULL END) AS product_amount_given 43 | , STRING_AGG(product_amount_given) AS product_amount_given_string 44 | , MAX(product_unit) AS product_unit 45 | FROM mimic_hosp.emar_detail 46 | GROUP BY emar_id 47 | ) dgrp 48 | ON e.emar_id = dgrp.emar_id 49 | WHERE e.medication IN 50 | ( 51 | 'Enoxaparin Sodium', 'INV-Enoxaparin', 'Enoxaparin', 52 | 'Enoxaparin ', 'INV Enoxaparin', 'Lovenox', 'IND-Enoxaparin', 53 | 'Enoxaparin (Treatment)', 'Enoxaparin (Prophylaxis)' 54 | ) 55 | ORDER BY subject_id, e.emar_seq, parent_field_ordinal 56 | -------------------------------------------------------------------------------- /sql/mimic-iv/medication/enoxaparin.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | pr.subject_id 3 | , pr.starttime 4 | , pr.stoptime 5 | , ph.entertime 6 | --, ph.verifiedtime 7 | , pr.pharmacy_id 8 | , ph.poe_id 9 | -- aux columns used for debugging 10 | -- , pr.pharmacy_id 11 | , pr.route 12 | , ph.frequency 13 | , ph.disp_sched 14 | , pr.dose_val_rx 15 | , CASE 16 | -- Cast numeric values directly 17 | WHEN REGEXP_CONTAINS(pr.dose_val_rx, r'^\s*[0-9]*\.?[0-9]+\s*$') 18 | THEN CAST(pr.dose_val_rx AS NUMERIC) 19 | -- 25,000 20 | WHEN REGEXP_CONTAINS(pr.dose_val_rx, r'^\s*([0-9]*,?)+\s*$') 21 | THEN CAST(REPLACE(pr.dose_val_rx, ',', '') AS NUMERIC) 22 | -- for doses like 1500-3000, use the lower end 23 | WHEN REGEXP_CONTAINS(pr.dose_val_rx, r'^\s*([0-9]*\.?[0-9]+)-([0-9]*\.?[0-9]+)\s*$') 24 | THEN CAST(REGEXP_EXTRACT(pr.dose_val_rx, r'^\s*([0-9]*\.?[0-9]+)-') AS NUMERIC) 25 | ELSE NULL END AS dose_val_rx_numeric 26 | , pr.dose_unit_rx 27 | -- clean doses per 24 hour 28 | -- fixes 0s present if doses/24hr < 1 29 | -- adds dose when it's missing 30 | , CASE 31 | -- once is set to be equiv to daily 32 | WHEN REGEXP_CONTAINS(ph.frequency, '^(1X|X1|ASDIR|ONCE)') THEN 1 33 | -- daily 34 | WHEN REGEXP_CONTAINS(ph.frequency, '^DAILY') THEN 1 35 | WHEN ph.frequency IN ('Q24H', 'Q 24H', 'HS', 'DINNER', 'QAM', 'QHD', 'QHS', 'QPM') THEN 1 36 | -- twice a day (BID) 37 | WHEN REGEXP_CONTAINS(ph.frequency, '^BID') THEN 0.5 -- BID 38 | WHEN ph.frequency IN ('Q12H', 'Q 12H') THEN 0.5 -- BID 39 | WHEN ph.frequency IN ('QMOWEFR', 'QTUTHSA') THEN 0.5 40 | -- fall back on doses if necessary 41 | ELSE COALESCE(pr.doses_per_24_hrs, 1) END 42 | AS doses_per_24_hrs 43 | -- misc info used to identify drug 44 | , drug 45 | , gsn, ndc 46 | FROM mimic_hosp.prescriptions pr 47 | LEFT JOIN mimic_hosp.pharmacy ph 48 | ON pr.pharmacy_id = ph.pharmacy_id 49 | AND pr.drug = ph.medication 50 | WHERE pr.drug IN 51 | ( 52 | 'Enoxaparin Sodium', 'INV-Enoxaparin', 53 | 'Enoxaparin', 'Enoxaparin ', 'INV Enoxaparin', 54 | 'Lovenox', 'IND-Enoxaparin', 55 | 'Enoxaparin (Prophylaxis)', 'Enoxaparin (Treatment)' 56 | ) 57 | ORDER BY subject_id, starttime, stoptime; 58 | -------------------------------------------------------------------------------- /sql/mimic-iv/medication/heparin_administration.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | e.subject_id 3 | , e.charttime 4 | , e.event_txt 5 | , e.storetime 6 | -- debug columns 7 | , e.emar_id 8 | , e.emar_seq 9 | , e.medication 10 | , e.poe_id 11 | -- , e.pharmacy_id 12 | , det.administration_types 13 | , det.dose_due 14 | , det.dose_due_unit 15 | , det.complete_dose_not_given 16 | , det.will_remainder_of_dose_be_given 17 | -- grouped administration info 18 | , dgrp.dose_given 19 | , dgrp.dose_given_unit 20 | , dgrp.dose_given_string 21 | , dgrp.product_amount_given 22 | , dgrp.product_unit 23 | FROM mimic_hosp.emar e 24 | -- dose due is in the "parent" of the emar detail group 25 | -- where parent_field_ordinal is null 26 | -- only 1 of these per emar 27 | LEFT JOIN mimic_hosp.emar_detail det 28 | ON e.emar_id = det.emar_id 29 | AND det.parent_field_ordinal IS NULL 30 | -- group together individual administrations to get total administered 31 | -- since dose is sometimes a string, include a string concat for debug purposes 32 | LEFT JOIN ( 33 | SELECT emar_id 34 | , SUM(CASE 35 | WHEN REGEXP_CONTAINS(dose_given, r'^[0-9]*\.?[0-9]*$') 36 | THEN CAST(dose_given AS NUMERIC) 37 | ELSE NULL END) AS dose_given 38 | , STRING_AGG(dose_given) AS dose_given_string 39 | , MAX(dose_given_unit) AS dose_given_unit 40 | , SUM(CASE 41 | WHEN REGEXP_CONTAINS(product_amount_given, r'^[0-9]*\.?[0-9]*$') 42 | THEN CAST(product_amount_given AS NUMERIC) 43 | ELSE NULL END) AS product_amount_given 44 | , STRING_AGG(product_amount_given) AS product_amount_given_string 45 | , MAX(product_unit) AS product_unit 46 | FROM mimic_hosp.emar_detail 47 | GROUP BY emar_id 48 | ) dgrp 49 | ON e.emar_id = dgrp.emar_id 50 | WHERE e.medication IN 51 | ( 52 | 'Heparin', 53 | -- 'Heparin (CRRT Machine Priming)', 54 | -- 'Heparin CRRT', 55 | -- 'Heparin (Hemodialysis)', 56 | -- 'Heparin (IABP)', 57 | -- 'Heparin (Impella) – Left ventricle', 58 | -- 'Heparin (Impella) – Right ventricle', 59 | -- 'Heparin LVAD', 60 | 'Heparin (via Anti-Xa Monitoring)', 61 | 'Heparin Desensitization' 62 | -- 'Heparin Dwell (1000 Units/mL)', 63 | -- 'Heparin Flush (1 unit/mL)', 64 | -- 'Heparin Flush (10 units/mL)', 65 | -- 'Heparin Flush (10 units/ml)', 66 | -- 'Heparin Flush (100 units/ml)', 67 | -- 'Heparin Flush (1000 units/mL)', 68 | -- 'Heparin INTRAPERITONEAL' 69 | ) 70 | ORDER BY subject_id, e.emar_seq, parent_field_ordinal 71 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Data folder 2 | data/* 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/chemistry.sql: -------------------------------------------------------------------------------- 1 | -- extract chemistry labs 2 | -- excludes point of care tests (very rare) 3 | -- blood gas measurements are *not* included in this query 4 | -- instead they are in bg.sql 5 | SELECT 6 | MAX(subject_id) AS subject_id 7 | , MAX(hadm_id) AS hadm_id 8 | , MAX(charttime) AS charttime 9 | , le.specimen_id 10 | -- convert from itemid into a meaningful column 11 | , MAX(CASE WHEN itemid = 50862 AND valuenum <= 10 THEN valuenum ELSE NULL END) AS albumin 12 | , MAX(CASE WHEN itemid = 50930 AND valuenum <= 10 THEN valuenum ELSE NULL END) AS globulin 13 | , MAX(CASE WHEN itemid = 50976 AND valuenum <= 20 THEN valuenum ELSE NULL END) AS total_protein 14 | , MAX(CASE WHEN itemid = 50868 AND valuenum <= 10000 THEN valuenum ELSE NULL END) AS aniongap 15 | , MAX(CASE WHEN itemid = 50882 AND valuenum <= 10000 THEN valuenum ELSE NULL END) AS bicarbonate 16 | , MAX(CASE WHEN itemid = 51006 AND valuenum <= 300 THEN valuenum ELSE NULL END) AS bun 17 | , MAX(CASE WHEN itemid = 50893 AND valuenum <= 10000 THEN valuenum ELSE NULL END) AS calcium 18 | , MAX(CASE WHEN itemid = 50902 AND valuenum <= 10000 THEN valuenum ELSE NULL END) AS chloride 19 | , MAX(CASE WHEN itemid = 50912 AND valuenum <= 150 THEN valuenum ELSE NULL END) AS creatinine 20 | , MAX(CASE WHEN itemid = 50931 AND valuenum <= 10000 THEN valuenum ELSE NULL END) AS glucose 21 | , MAX(CASE WHEN itemid = 50983 AND valuenum <= 200 THEN valuenum ELSE NULL END) AS sodium 22 | , MAX(CASE WHEN itemid = 50971 AND valuenum <= 30 THEN valuenum ELSE NULL END) AS potassium 23 | FROM mimic_hosp.labevents le 24 | WHERE le.itemid IN 25 | ( 26 | -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS 27 | 50862, -- ALBUMIN | CHEMISTRY | BLOOD | 146697 28 | 50930, -- Globulin 29 | 50976, -- Total protein 30 | 50868, -- ANION GAP | CHEMISTRY | BLOOD | 769895 31 | -- 52456, -- Anion gap, point of care test 32 | 50882, -- BICARBONATE | CHEMISTRY | BLOOD | 780733 33 | 50893, -- Calcium 34 | 50912, -- CREATININE | CHEMISTRY | BLOOD | 797476 35 | -- 52502, Creatinine, point of care 36 | 50902, -- CHLORIDE | CHEMISTRY | BLOOD | 795568 37 | 50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981 38 | -- 52525, Glucose, point of care 39 | 50971, -- POTASSIUM | CHEMISTRY | BLOOD | 845825 40 | -- 52566, -- Potassium, point of care 41 | 50983, -- SODIUM | CHEMISTRY | BLOOD | 808489 42 | -- 52579, -- Sodium, point of care 43 | 51006 -- UREA NITROGEN | CHEMISTRY | BLOOD | 791925 44 | -- 52603, Urea, point of care 45 | ) 46 | AND valuenum IS NOT NULL 47 | -- lab values cannot be 0 and cannot be negative 48 | -- .. except anion gap. 49 | AND (valuenum > 0 OR itemid = 50868) 50 | GROUP BY le.specimen_id 51 | ORDER BY 1, 2, 3, 4; 52 | -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/vitalsign.sql: -------------------------------------------------------------------------------- 1 | -- This query pivots the vital signs for the entire patient stay. 2 | -- Vital signs include heart rate, blood pressure, respiration rate, and temperature 3 | select 4 | ce.subject_id 5 | , ce.charttime 6 | , AVG(case when itemid in (220045) and valuenum > 0 and valuenum < 300 then valuenum else null end) as heart_rate 7 | , AVG(case when itemid in (220179,220050) and valuenum > 0 and valuenum < 400 then valuenum else null end) as sbp 8 | , AVG(case when itemid in (220180,220051) and valuenum > 0 and valuenum < 300 then valuenum else null end) as dbp 9 | , AVG(case when itemid in (220052,220181,225312) and valuenum > 0 and valuenum < 300 then valuenum else null end) as mbp 10 | , AVG(case when itemid = 220179 and valuenum > 0 and valuenum < 400 then valuenum else null end) as sbp_ni 11 | , AVG(case when itemid = 220180 and valuenum > 0 and valuenum < 300 then valuenum else null end) as dbp_ni 12 | , AVG(case when itemid = 220181 and valuenum > 0 and valuenum < 300 then valuenum else null end) as mbp_ni 13 | , AVG(case when itemid in (220210,224690) and valuenum > 0 and valuenum < 70 then valuenum else null end) as resprate 14 | , ROUND( 15 | AVG(case when itemid in (223761) and valuenum > 70 and valuenum < 120 then (valuenum-32)/1.8 -- converted to degC in valuenum call 16 | when itemid in (223762) and valuenum > 10 and valuenum < 50 then valuenum else null end) 17 | , 2) as temperature 18 | , MAX(CASE WHEN itemid = 224642 THEN value ELSE NULL END) AS temperature_site 19 | , AVG(case when itemid in (220277) and valuenum > 0 and valuenum <= 100 then valuenum else null end) as spo2 20 | , AVG(case when itemid in (225664,220621,226537) and valuenum > 0 then valuenum else null end) as glucose 21 | FROM mimic_icu.chartevents ce 22 | where ce.stay_id IS NOT NULL 23 | and ce.itemid in 24 | ( 25 | 220045, -- Heart Rate 26 | 225309, -- ART BP Systolic 27 | 225310, -- ART BP Diastolic 28 | 225312, -- ART BP Mean 29 | 220050, -- Arterial Blood Pressure systolic 30 | 220051, -- Arterial Blood Pressure diastolic 31 | 220052, -- Arterial Blood Pressure mean 32 | 220179, -- Non Invasive Blood Pressure systolic 33 | 220180, -- Non Invasive Blood Pressure diastolic 34 | 220181, -- Non Invasive Blood Pressure mean 35 | 220210, -- Respiratory Rate 36 | 224690, -- Respiratory Rate (Total) 37 | 220277, -- SPO2, peripheral 38 | -- GLUCOSE, both lab and fingerstick 39 | 225664, -- Glucose finger stick 40 | 220621, -- Glucose (serum) 41 | 226537, -- Glucose (whole blood) 42 | -- TEMPERATURE 43 | 223762, -- "Temperature Celsius" 44 | 223761, -- "Temperature Fahrenheit" 45 | 224642 -- Temperature Site 46 | -- 226329 -- Blood Temperature CCO (C) 47 | ) 48 | group by ce.subject_id, ce.charttime 49 | order by ce.subject_id, ce.charttime; 50 | -------------------------------------------------------------------------------- /sql/mimic-iv/medication/heparin.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | pr.subject_id 3 | , pr.starttime 4 | , pr.stoptime 5 | , ph.entertime 6 | --, ph.verifiedtime 7 | , pr.pharmacy_id 8 | , ph.poe_id 9 | -- aux columns used for debugging 10 | -- , pr.pharmacy_id 11 | , pr.route 12 | , ph.frequency 13 | , ph.disp_sched 14 | , pr.dose_val_rx 15 | , CASE 16 | -- Cast numeric values directly 17 | WHEN REGEXP_CONTAINS(pr.dose_val_rx, r'^\s*[0-9]*\.?[0-9]+\s*$') 18 | THEN CAST(pr.dose_val_rx AS NUMERIC) 19 | -- 25,000 20 | WHEN REGEXP_CONTAINS(pr.dose_val_rx, r'^\s*([0-9]*,?)+\s*$') 21 | THEN CAST(REPLACE(pr.dose_val_rx, ',', '') AS NUMERIC) 22 | -- for doses like 1500-3000, use the lower end 23 | WHEN REGEXP_CONTAINS(pr.dose_val_rx, r'^\s*([0-9]*\.?[0-9]+)-([0-9]*\.?[0-9]+)\s*$') 24 | THEN CAST(REGEXP_EXTRACT(pr.dose_val_rx, r'^\s*([0-9]*\.?[0-9]+)-') AS NUMERIC) 25 | ELSE NULL END AS dose_val_rx_numeric 26 | , pr.dose_unit_rx 27 | -- clean doses per 24 hour 28 | -- fixes 0s present if doses/24hr < 1 29 | -- adds dose when it's missing 30 | , CASE 31 | -- once is set to be equiv to daily 32 | WHEN REGEXP_CONTAINS(ph.frequency, '^(1X|X1|ONCE)') THEN 1 33 | -- TODO: ASDIR (maybe Q6H?) 34 | -- daily 35 | WHEN REGEXP_CONTAINS(ph.frequency, '^DAILY') THEN 1 36 | WHEN ph.frequency IN ('Q24H', 'Q 24H', 'HS', 'DINNER', 'QAM', 'QHD', 'QHS', 'QPM') THEN 1 37 | -- twice a day (BID) 38 | WHEN REGEXP_CONTAINS(ph.frequency, '^BID') THEN 0.5 -- BID 39 | WHEN ph.frequency IN ('Q12H', 'Q 12H') THEN 0.5 -- BID 40 | WHEN ph.frequency IN ('QMOWEFR', 'QTUTHSA') THEN 0.5 41 | -- fall back on doses if necessary 42 | ELSE COALESCE(pr.doses_per_24_hrs, 1) END 43 | AS doses_per_24_hrs 44 | -- misc info used to identify drug 45 | , drug 46 | , gsn, ndc 47 | FROM mimic_hosp.prescriptions pr 48 | LEFT JOIN mimic_hosp.pharmacy ph 49 | ON pr.pharmacy_id = ph.pharmacy_id 50 | AND pr.drug = ph.medication 51 | WHERE pr.drug IN 52 | ( 53 | 54 | 'Heparin', 55 | 'Heparin Sodium', 56 | -- '5% Dextrose', 57 | -- 'Heparin Flush (10 units/ml)', 58 | -- 'Heparin Dwell (1000 Units/mL)', 59 | -- 'Heparin Flush (100 units/ml)', 60 | 'Heparin Pres. Free', 61 | -- 'Heparin (Hemodialysis)', 62 | -- 'Heparin (CRRT Machine Priming)', 63 | -- 'Heparin Flush (1000 units/mL)', 64 | -- 'Heparin Flush (10 units/mL)', 65 | -- 'Heparin Flush (1 unit/mL)', 66 | 'Heparin Sodium', 67 | -- 'Heparin PF (0.5 Units/mL)', 68 | -- 'Heparin Desensitization', 69 | -- 'Heparin (Impella)', 70 | -- 'Heparin Dwell (1000 Units/mL)', 71 | -- 'Heparin Flush (1000 units/mL)', 72 | 'Heparin (via Anti-Xa Monitoring)', 73 | 'Heparin', 74 | -- 'NS', 75 | 'Heparin Sodium', 76 | -- 'Heparin (IABP)', 77 | 'Heparin', 78 | 'Heparin Pres. Free', 79 | 'Heparin', 80 | -- 'Heparin INTRAPERITONEAL', 81 | 'Heparin ', 82 | 'Heparin Sodium', 83 | -- 'Heparin CRRT', 84 | -- 'Heparin Flush CRRT (5000 Units/mL)', 85 | 'Heparin', 86 | -- 'Heparin (IABP)', 87 | 'Heparin Sodium' 88 | ) 89 | ORDER BY subject_id, starttime, stoptime; -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/ventilator_setting.sql: -------------------------------------------------------------------------------- 1 | with ce as 2 | ( 3 | SELECT 4 | ce.subject_id 5 | , ce.stay_id 6 | , ce.charttime 7 | , itemid 8 | -- TODO: clean 9 | , value 10 | , case 11 | -- begin fio2 cleaning 12 | when itemid = 223835 13 | then 14 | case 15 | when valuenum >= 0.20 and valuenum <= 1 16 | then valuenum * 100 17 | -- improperly input data - looks like O2 flow in litres 18 | when valuenum > 1 and valuenum < 20 19 | then null 20 | when valuenum >= 20 and valuenum <= 100 21 | then valuenum 22 | ELSE NULL END 23 | -- end of fio2 cleaning 24 | -- begin peep cleaning 25 | WHEN itemid in (220339, 224700) 26 | THEN 27 | CASE 28 | WHEN valuenum > 100 THEN NULL 29 | WHEN valuenum < 0 THEN NULL 30 | ELSE valuenum END 31 | -- end peep cleaning 32 | ELSE valuenum END AS valuenum 33 | , valueuom 34 | , storetime 35 | FROM mimic_icu.chartevents ce 36 | where ce.value IS NOT NULL 37 | AND ce.stay_id IS NOT NULL 38 | AND ce.itemid IN 39 | ( 40 | 224688 -- Respiratory Rate (Set) 41 | , 224689 -- Respiratory Rate (spontaneous) 42 | , 224690 -- Respiratory Rate (Total) 43 | , 224687 -- minute volume 44 | , 224685, 224684, 224686 -- tidal volume 45 | , 224696 -- PlateauPressure 46 | , 220339, 224700 -- PEEP 47 | , 223835 -- fio2 48 | , 223849 -- vent mode 49 | , 229314 -- vent mode (Hamilton) 50 | , 223848 -- vent type 51 | ) 52 | UNION ALL 53 | -- add in the extubation flags from procedureevents_mv 54 | -- note that we only need the start time for the extubation 55 | -- (extubation is always charted as ending 1 minute after it started) 56 | SELECT 57 | subject_id 58 | , stay_id 59 | , starttime as charttime 60 | , itemid 61 | , 'extubated' as value 62 | , 1 as valuenum 63 | , NULL AS valueuom 64 | , storetime 65 | FROM mimic_icu.procedureevents 66 | WHERE itemid IN 67 | ( 68 | 227194 -- "Extubation" 69 | , 225468 -- "Unplanned Extubation (patient-initiated)" 70 | , 225477 -- "Unplanned Extubation (non-patient initiated)" 71 | ) 72 | ) 73 | SELECT 74 | subject_id 75 | , charttime 76 | , MAX(CASE WHEN itemid = 224688 THEN valuenum ELSE NULL END) AS respiratory_rate_set 77 | , MAX(CASE WHEN itemid = 224690 THEN valuenum ELSE NULL END) AS respiratory_rate_total 78 | , MAX(CASE WHEN itemid = 224689 THEN valuenum ELSE NULL END) AS respiratory_rate_spontaneous 79 | , MAX(CASE WHEN itemid = 224687 THEN valuenum ELSE NULL END) AS minute_volume 80 | , MAX(CASE WHEN itemid = 224684 THEN valuenum ELSE NULL END) AS tidal_volume_set 81 | , MAX(CASE WHEN itemid = 224685 THEN valuenum ELSE NULL END) AS tidal_volume_observed 82 | , MAX(CASE WHEN itemid = 224686 THEN valuenum ELSE NULL END) AS tidal_volume_spontaneous 83 | , MAX(CASE WHEN itemid = 224696 THEN valuenum ELSE NULL END) AS plateau_pressure 84 | , MAX(CASE WHEN itemid in (220339, 224700) THEN valuenum ELSE NULL END) AS peep 85 | , MAX(CASE WHEN itemid = 223835 THEN valuenum ELSE NULL END) AS fio2 86 | , MAX(CASE WHEN itemid = 223849 THEN value ELSE NULL END) AS ventilator_mode 87 | , MAX(CASE WHEN itemid = 229314 THEN value ELSE NULL END) AS ventilator_mode_hamilton 88 | , MAX(CASE WHEN itemid = 223848 THEN value ELSE NULL END) AS ventilator_type 89 | , MAX(CASE WHEN itemid in (227194, 225468, 225477) THEN valuenum ELSE NULL END) AS extubated 90 | FROM ce 91 | GROUP BY subject_id, charttime 92 | ORDER BY subject_id, charttime; 93 | -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/oxygen_delivery.sql: -------------------------------------------------------------------------------- 1 | with ce_stg1 as 2 | ( 3 | SELECT 4 | ce.subject_id 5 | , ce.charttime 6 | , CASE 7 | -- merge o2 flows into a single row 8 | WHEN itemid IN (223834, 227582, 224691) THEN 223834 9 | ELSE itemid END AS itemid 10 | , value 11 | , valuenum 12 | , valueuom 13 | , storetime 14 | FROM mimic_icu.chartevents ce 15 | WHERE ce.value IS NOT NULL 16 | AND ce.itemid IN 17 | ( 18 | 223834 -- o2 flow 19 | , 227582 -- bipap o2 flow 20 | , 224691 -- Flow Rate (L) 21 | -- additional o2 flow is its own column 22 | , 227287 -- additional o2 flow 23 | ) 24 | ) 25 | , ce_stg2 AS 26 | ( 27 | select 28 | ce.subject_id 29 | , ce.charttime 30 | , itemid 31 | , value 32 | , valuenum 33 | , valueuom 34 | -- retain only 1 row per charttime 35 | -- prioritizing the last documented value 36 | -- primarily used to subselect o2 flows 37 | , ROW_NUMBER() OVER (PARTITION BY subject_id, charttime, itemid ORDER BY storetime DESC) as rn 38 | FROM ce_stg1 ce 39 | ) 40 | , o2 AS 41 | ( 42 | -- -- The below ITEMID can have multiple entires for charttime/storetime 43 | -- -- These are totally valid entries. 44 | -- 224181 -- Small Volume Neb Drug #1 | Respiratory | Text | chartevents 45 | -- , 227570 -- Small Volume Neb Drug/Dose #1 | Respiratory | Text | chartevents 46 | -- , 224833 -- SBT Deferred | Respiratory | Text | chartevents 47 | -- , 224716 -- SBT Stopped | Respiratory | Text | chartevents 48 | -- , 224740 -- RSBI Deferred | Respiratory | Text | chartevents 49 | -- , 224829 -- Trach Tube Type | Respiratory | Text | chartevents 50 | -- , 226732 -- O2 Delivery Device(s) | Respiratory | Text | chartevents 51 | -- , 226873 -- Inspiratory Ratio | Respiratory | Numeric | chartevents 52 | -- , 226871 -- Expiratory Ratio | Respiratory | Numeric | chartevents 53 | -- maximum of 4 o2 devices on at once 54 | SELECT 55 | subject_id 56 | , charttime 57 | , itemid 58 | , value AS o2_device 59 | , ROW_NUMBER() OVER (PARTITION BY subject_id, charttime, itemid ORDER BY value) as rn 60 | FROM mimic_icu.chartevents 61 | WHERE itemid = 226732 -- oxygen delivery device(s) 62 | ) 63 | , stg AS 64 | ( 65 | select 66 | COALESCE(ce.subject_id, o2.subject_id) AS subject_id 67 | , COALESCE(ce.charttime, o2.charttime) AS charttime 68 | , COALESCE(ce.itemid, o2.itemid) AS itemid 69 | , ce.value 70 | , ce.valuenum 71 | , o2.o2_device 72 | , o2.rn 73 | from ce_stg2 ce 74 | FULL OUTER JOIN o2 75 | ON ce.subject_id = o2.subject_id 76 | AND ce.charttime = o2.charttime 77 | -- limit to 1 row per subject_id/charttime/itemid from ce_stg2 78 | WHERE ce.rn = 1 79 | ) 80 | SELECT 81 | subject_id, charttime 82 | , MAX(CASE WHEN itemid = 223834 THEN valuenum ELSE NULL END) AS o2_flow 83 | , MAX(CASE WHEN itemid = 227287 THEN valuenum ELSE NULL END) AS o2_flow_additional 84 | -- ensure we retain all o2 devices for the patient 85 | , MAX(CASE WHEN rn = 1 THEN o2_device ELSE NULL END) AS o2_delivery_device_1 86 | , MAX(CASE WHEN rn = 2 THEN o2_device ELSE NULL END) AS o2_delivery_device_2 87 | , MAX(CASE WHEN rn = 3 THEN o2_device ELSE NULL END) AS o2_delivery_device_3 88 | , MAX(CASE WHEN rn = 4 THEN o2_device ELSE NULL END) AS o2_delivery_device_4 89 | FROM stg 90 | GROUP BY subject_id, charttime 91 | ORDER BY subject_id, charttime 92 | ; -------------------------------------------------------------------------------- /sql/mimic-iii/vent_settings_details.sql: -------------------------------------------------------------------------------- 1 | select 2 | icustay_id, charttime 3 | -- case statement determining whether it is an instance of mech vent 4 | ,max(case when itemid = 223848 then value else null end) as VentTypeRecorded 5 | ,max(case when itemid = 223849 then value else null end) as ventilatormode 6 | ,max(case when itemid = 220339 then valuenum else null end) as peep_set -- PEEP 7 | ,max(case when itemid = 224700 then valuenum else null end ) as Total_PEEP_level 8 | ,max(case when itemid = 224684 then valuenum else null end ) as Tidal_Volume_set 9 | ,max(case when itemid = 224685 then valuenum else null end ) as Tidal_Volume_observed 10 | ,max(case when itemid = 224686 then valuenum else null end ) as Tidal_Volume_spontaneous 11 | , max(case when itemid = 224687 then value else null end) as minutes_vol 12 | ,max(case when itemid = 224688 then valuenum else null end ) as RR_set_Set 13 | ,max(case when itemid = 224689 then valuenum else null end ) as RR_set_Set_spontaneous 14 | ,max(case when itemid = 224690 then valuenum else null end ) as RR_set_Set_total 15 | , max (case when itemid = 224695 then value else null end ) as Peak_Insp_Pressure 16 | , max (case when itemid = 224696 then value else null end ) as Plateau_Pressure 17 | , max (case when itemid = 224697 then value else null end ) as mean_airway_Pressure 18 | , max(case 19 | -- initiation of oxygen therapy indicates the ventilation has ended 20 | when (itemid = 226732 and value in 21 | ( 22 | 'Nasal cannula', -- 153714 observations 23 | 'Face tent', -- 24601 observations 24 | 'Aerosol-cool', -- 24560 observations 25 | 'Trach mask ', -- 16435 observations 26 | 'High flow neb', -- 10785 observations 27 | 'Non-rebreather', -- 5182 observations 28 | 'Venti mask ', -- 1947 observations 29 | 'Medium conc mask ', -- 1888 observations 30 | 'T-piece', -- 1135 observations 31 | 'High flow nasal cannula', -- 925 observations 32 | 'Ultrasonic neb', -- 9 observations 33 | 'Vapomist' -- 3 observations 34 | )) then value else null end )as O2_Delivery_Device 35 | , max(case when itemid = 227287 then value else null end) as O2_Flow_additional_cannula 36 | , max (case when itemid = 223834 then value else null end ) as o2_flow 37 | , max (case when itemid = 224746 then value else null end ) as Transpulmonary_Press_Exp_Hold 38 | , max (case when itemid = 224747 then value else null end ) as Transpulmonary_Press_Insp_Hold 39 | , max (case when itemid = 226873 then value else null end ) as Inspiratory_Ratio 40 | , max (case when itemid = 224419 then value else null end ) as Negative_Insp_Force 41 | , max (case when itemid = 224738 then value else null end ) as Inspiratory_Time 42 | , max (case when itemid = 224750 then value else null end ) as Nitric_Oxide_Tank_Pressure 43 | , max (case when itemid = 227187 then value else null end ) as Pinsp_Draeger_only 44 | , max (case when itemid = 224701 then value else null end ) as PSVlevel 45 | , max (case when itemid = 224702 then value else null end ) as PCV_Level_Avea 46 | , max (case when itemid = 224705 then value else null end ) as P_High_APRV 47 | , max (case when itemid = 224706 then value else null end ) as P_Low_APRV 48 | , max (case when itemid = 224707 then value else null end ) as T_High_APRV 49 | , max (case when itemid = 224709 then value else null end ) as T_Low_APRV 50 | , max (case when itemid = 229393 then value else null end ) as PF_Ratio 51 | , max (case when itemid = 229394 then value else null end ) as SF_Ratio 52 | ------BIPAP related 53 | , max (case when itemid = 227577 then value else null end ) as BiPap_Mode 54 | , max (case when itemid = 227578 then value else null end ) as BiPap_Mask 55 | , max (case when itemid = 227579 then value else null end ) as BiPap_EPAP 56 | , max (case when itemid = 227580 then value else null end ) as BiPap_IPAP 57 | 58 | 59 | from `physionet-data.mimiciii_clinical.chartevents` ce 60 | where ce.value is not null 61 | and icustay_id is not null 62 | -- exclude rows marked as error 63 | and (ce.error != 1 or ce.error IS NULL) 64 | group by icustay_id, charttime 65 | order by icustay_id, charttime 66 | 67 | 68 | #UNION DISTINCT 69 | #-- add in the extubation flags from procedureevents_mv 70 | #-- note that we only need the start time for the extubation 71 | #-- (extubation is always charted as ending 1 minute after it started) 72 | #select 73 | # icustay_id, starttime as charttime 74 | # , 0 as MechVent 75 | # , 0 as OxygenTherapy 76 | # , 1 as Extubated 77 | # , case when itemid = 225468 then 1 else 0 end as SelfExtubated 78 | #from `physionet-data.mimiciii_clinical.procedureevents_mv` 79 | #where itemid in 80 | #( 81 | # 227194 -- "Extubation" 82 | #, 225468 -- "Unplanned Extubation (patient-initiated)" 83 | #, 225477 -- "Unplanned Extubation (non-patient initiated)" 84 | #); 85 | 86 | -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/gcs.sql: -------------------------------------------------------------------------------- 1 | -- This query extracts the Glasgow Coma Scale, a measure of neurological function. 2 | -- The query has a few special rules: 3 | -- (1) The verbal component can be set to 0 if the patient is ventilated. 4 | -- This is corrected to 5 - the overall GCS is set to 15 in these cases. 5 | -- (2) Often only one of three components is documented. The other components 6 | -- are carried forward. 7 | 8 | -- ITEMIDs used: 9 | 10 | -- METAVISION 11 | -- 223900 GCS - Verbal Response 12 | -- 223901 GCS - Motor Response 13 | -- 220739 GCS - Eye Opening 14 | 15 | -- Note: 16 | -- The GCS for sedated patients is defaulted to 15 in this code. 17 | -- This is in line with how the data is meant to be collected. 18 | -- e.g., from the SAPS II publication: 19 | -- For sedated patients, the Glasgow Coma Score before sedation was used. 20 | -- This was ascertained either from interviewing the physician who ordered the sedation, 21 | -- or by reviewing the patient's medical record. 22 | with base as 23 | ( 24 | select 25 | subject_id 26 | , ce.stay_id, ce.charttime 27 | -- pivot each value into its own column 28 | , max(case when ce.ITEMID = 223901 then ce.valuenum else null end) as GCSMotor 29 | , max(case 30 | when ce.ITEMID = 223900 and ce.VALUE = 'No Response-ETT' then 0 31 | when ce.ITEMID = 223900 then ce.valuenum 32 | else null 33 | end) as GCSVerbal 34 | , max(case when ce.ITEMID = 220739 then ce.valuenum else null end) as GCSEyes 35 | -- convert the data into a number, reserving a value of 0 for ET/Trach 36 | , max(case 37 | -- endotrach/vent is assigned a value of 0 38 | -- flag it here to later parse specially 39 | when ce.ITEMID = 223900 and ce.VALUE = 'No Response-ETT' then 1 -- metavision 40 | else 0 end) 41 | as endotrachflag 42 | , ROW_NUMBER () 43 | OVER (PARTITION BY ce.stay_id ORDER BY ce.charttime ASC) as rn 44 | from mimic_icu.chartevents ce 45 | -- Isolate the desired GCS variables 46 | where ce.ITEMID in 47 | ( 48 | -- GCS components, Metavision 49 | 223900, 223901, 220739 50 | ) 51 | group by ce.subject_id, ce.stay_id, ce.charttime 52 | ) 53 | , gcs as ( 54 | select b.* 55 | , b2.GCSVerbal as GCSVerbalPrev 56 | , b2.GCSMotor as GCSMotorPrev 57 | , b2.GCSEyes as GCSEyesPrev 58 | -- Calculate GCS, factoring in special case when they are intubated and prev vals 59 | -- note that the coalesce are used to implement the following if: 60 | -- if current value exists, use it 61 | -- if previous value exists, use it 62 | -- otherwise, default to normal 63 | , case 64 | -- replace GCS during sedation with 15 65 | when b.GCSVerbal = 0 66 | then 15 67 | when b.GCSVerbal is null and b2.GCSVerbal = 0 68 | then 15 69 | -- if previously they were intub, but they aren't now, do not use previous GCS values 70 | when b2.GCSVerbal = 0 71 | then 72 | coalesce(b.GCSMotor,6) 73 | + coalesce(b.GCSVerbal,5) 74 | + coalesce(b.GCSEyes,4) 75 | -- otherwise, add up score normally, imputing previous value if none available at current time 76 | else 77 | coalesce(b.GCSMotor,coalesce(b2.GCSMotor,6)) 78 | + coalesce(b.GCSVerbal,coalesce(b2.GCSVerbal,5)) 79 | + coalesce(b.GCSEyes,coalesce(b2.GCSEyes,4)) 80 | end as GCS 81 | 82 | from base b 83 | -- join to itself within 6 hours to get previous value 84 | left join base b2 85 | on b.stay_id = b2.stay_id 86 | and b.rn = b2.rn+1 87 | and b2.charttime > DATETIME_ADD(b.charttime, INTERVAL 6 HOUR) 88 | ) 89 | -- combine components with previous within 6 hours 90 | -- filter down to cohort which is not excluded 91 | -- truncate charttime to the hour 92 | , gcs_stg as 93 | ( 94 | select 95 | subject_id 96 | , gs.stay_id, gs.charttime 97 | , GCS 98 | , coalesce(GCSMotor,GCSMotorPrev) as GCSMotor 99 | , coalesce(GCSVerbal,GCSVerbalPrev) as GCSVerbal 100 | , coalesce(GCSEyes,GCSEyesPrev) as GCSEyes 101 | , case when coalesce(GCSMotor,GCSMotorPrev) is null then 0 else 1 end 102 | + case when coalesce(GCSVerbal,GCSVerbalPrev) is null then 0 else 1 end 103 | + case when coalesce(GCSEyes,GCSEyesPrev) is null then 0 else 1 end 104 | as components_measured 105 | , EndoTrachFlag 106 | from gcs gs 107 | ) 108 | -- priority is: 109 | -- (i) complete data, (ii) non-sedated GCS, (iii) lowest GCS, (iv) charttime 110 | , gcs_priority as 111 | ( 112 | select 113 | subject_id 114 | , stay_id 115 | , charttime 116 | , gcs 117 | , gcsmotor 118 | , gcsverbal 119 | , gcseyes 120 | , EndoTrachFlag 121 | , ROW_NUMBER() over 122 | ( 123 | PARTITION BY stay_id, charttime 124 | ORDER BY components_measured DESC, endotrachflag, gcs, charttime DESC 125 | ) as rn 126 | from gcs_stg 127 | ) 128 | select 129 | gs.subject_id 130 | , gs.stay_id 131 | , gs.charttime 132 | , GCS AS gcs 133 | , GCSMotor AS gcs_motor 134 | , GCSVerbal AS gcs_verbal 135 | , GCSEyes AS gcs_eyes 136 | , EndoTrachFlag AS gcs_unable 137 | from gcs_priority gs 138 | where rn = 1 139 | ORDER BY 1, 2; 140 | -------------------------------------------------------------------------------- /sql/mimic-covid/ventilator_duration.sql: -------------------------------------------------------------------------------- 1 | -- Calculate duration of mechanical ventilation. 2 | -- Some useful cases for debugging: 3 | -- stay_id = 30019660 has a tracheostomy placed in the ICU 4 | -- stay_id = 30000117 has explicit documentation of extubation 5 | WITH vs AS 6 | ( 7 | select 8 | vs.subject_id, vs.charttime 9 | -- case statement determining whether it is an instance of mech vent 10 | , MAX(CASE 11 | WHEN COALESCE(extubated, 0) = 1 THEN 0 12 | -- vent type does not differentiate between mech vent and other modes for trach 13 | -- WHEN venttype IS NOT NULL and venttype != 'Other' THEN 1 14 | WHEN ventilator_mode IS NOT NULL THEN 1 15 | WHEN minute_volume IS NOT NULL THEN 1 16 | WHEN tidal_volume_observed IS NOT NULL THEN 1 17 | WHEN tidal_volume_spontaneous IS NOT NULL THEN 1 18 | WHEN tidal_volume_set IS NOT NULL THEN 1 19 | WHEN plateau_pressure IS NOT NULL THEN 1 20 | WHEN peep IS NOT NULL THEN 1 21 | WHEN od.o2_delivery_device_1 = 'Endotracheal tube' THEN 1 22 | WHEN od.o2_delivery_device_2 = 'Endotracheal tube' THEN 1 23 | WHEN od.o2_delivery_device_3 = 'Endotracheal tube' THEN 1 24 | WHEN od.o2_delivery_device_4 = 'Endotracheal tube' THEN 1 25 | -- 224697,224695,224696,224746,224747 -- High/Low/Peak/Mean/Neg insp force ("RespPressure") 26 | -- 226873,224738,224419,224750,227187 -- Insp pressure 27 | -- 224707,224709,224705,224706 -- APRV pressure 28 | -- 224702 -- PCV 29 | -- 224701 -- PSVlevel 30 | WHEN od.o2_delivery_device_1 IN 31 | ( 32 | 'None', 33 | 'Nasal cannula', -- 153714 observations 34 | 'Face tent', -- 24601 observations 35 | 'Aerosol-cool', -- 24560 observations 36 | 'Trach mask ', -- 16435 observations 37 | 'High flow neb', -- 10785 observations 38 | 'Non-rebreather', -- 5182 observations 39 | 'Venti mask ', -- 1947 observations 40 | 'Medium conc mask ', -- 1888 observations 41 | 'T-piece', -- 1135 observations 42 | 'High flow nasal cannula', -- 925 observations 43 | 'Ultrasonic neb', -- 9 observations 44 | 'Vapomist' -- 3 observations 45 | ) 46 | THEN 0 47 | ELSE NULL END 48 | ) as MechVent 49 | , MAX(COALESCE(extubated, 0)) AS Extubated 50 | FROM mimic_covid_derived.ventilator_setting vs 51 | LEFT JOIN mimic_covid_derived.oxygen_delivery od 52 | ON vs.subject_id = od.subject_id 53 | AND vs.charttime = od.charttime 54 | GROUP BY vs.subject_id, vs.charttime 55 | ) 56 | , vd0 AS 57 | ( 58 | select 59 | subject_id 60 | -- this carries over the previous charttime which had a mechanical ventilation event 61 | , case 62 | when MechVent = 1 then 63 | LAG(charttime, 1) OVER (partition by subject_id, MechVent order by charttime) 64 | else null 65 | end as charttime_lag 66 | -- carry forward our extubated flag 67 | -- need the extubated row to be included in the current mechvent partition, 68 | -- so that the endtime is set to the time that extubated is charted 69 | -- to do this, we use lag(extubated) to set mechvent = 0 70 | , LAG(Extubated,1) 71 | OVER 72 | ( 73 | partition by subject_id 74 | order by charttime 75 | ) as ExtubatedLag 76 | , charttime 77 | , MechVent 78 | , Extubated 79 | from vs 80 | ) 81 | , vd1 as 82 | ( 83 | select 84 | subject_id 85 | , charttime_lag 86 | , charttime 87 | , MechVent 88 | , Extubated 89 | 90 | -- calculate the time since the last event 91 | -- since charttime_lag is NULL for non-mechvent rows, this is only present on MechVent=1 rows 92 | , DATETIME_DIFF(charttime, charttime_lag, MINUTE)/60 as ventduration 93 | 94 | -- now we determine if the current mech vent event is a "new", i.e. they've just been intubated 95 | , case 96 | -- if there was an extubation flag on the previously charted row, 97 | -- then this row is a new ventilation event 98 | WHEN ExtubatedLag = 1 THEN 1 99 | -- we want to include the row with the extubation in the current mechvent event 100 | -- this makes our endtime of that event == the time of extubation 101 | WHEN Extubated = 1 THEN 0 102 | -- if we have specified MechVent = 0, then the settings indicated *not* mech vent 103 | -- thus, they must have been extubated previous to this time 104 | when MechVent = 0 then 1 105 | -- if there has been 8 hours since the last mech vent documentation, 106 | -- then we assume they were extubated earlier 107 | when CHARTTIME > DATETIME_ADD(charttime_lag, INTERVAL 8 HOUR) 108 | then 1 109 | else 0 110 | end as newvent 111 | -- use the staging table with only vent settings from chart events 112 | FROM vd0 ventsettings 113 | ) 114 | , vd2 as 115 | ( 116 | select vd1.* 117 | -- create a cumulative sum of the instances of new ventilation 118 | -- this results in a monotonic integer assigned to each instance of ventilation 119 | , case when MechVent=1 or Extubated = 1 then 120 | SUM( newvent ) 121 | OVER ( partition by subject_id order by charttime ) 122 | else null end 123 | as ventnum 124 | --- now we convert CHARTTIME of ventilator settings into durations 125 | from vd1 126 | ) 127 | -- create the durations for each mechanical ventilation instance 128 | select subject_id 129 | , min(charttime) as starttime 130 | , max(charttime) as endtime 131 | from vd2 132 | group by subject_id, vd2.ventnum 133 | having min(charttime) != max(charttime) 134 | -- patient had to be mechanically ventilated at least once 135 | -- i.e. max(mechvent) should be 1 136 | -- this excludes a frequent situation of NIV/oxygen before intub 137 | -- in these cases, ventnum=0 and max(mechvent)=0, so they are ignored 138 | and MAX(mechvent) = 1 139 | order by subject_id, ventnum -------------------------------------------------------------------------------- /sql/mimic-iv/measurement/bg.sql: -------------------------------------------------------------------------------- 1 | -- The aim of this query is to pivot entries related to blood gases 2 | -- which were found in LABEVENTS 3 | WITH bg AS 4 | ( 5 | select 6 | -- specimen_id only ever has 1 measurement for each itemid 7 | -- so, we may simply collapse rows using MAX() 8 | MAX(subject_id) AS subject_id 9 | , MAX(hadm_id) AS hadm_id 10 | , MAX(charttime) AS charttime 11 | -- specimen_id *may* have different storetimes, so this is taking the latest 12 | , MAX(storetime) AS storetime 13 | , le.specimen_id 14 | , MAX(CASE WHEN itemid = 52025 THEN value ELSE NULL END) AS specimen 15 | , MAX(CASE WHEN itemid = 50801 THEN valuenum ELSE NULL END) AS aado2 16 | , MAX(CASE WHEN itemid = 50802 THEN valuenum ELSE NULL END) AS baseexcess 17 | , MAX(CASE WHEN itemid = 50803 THEN valuenum ELSE NULL END) AS bicarbonate 18 | , MAX(CASE WHEN itemid = 50804 THEN valuenum ELSE NULL END) AS totalco2 19 | , MAX(CASE WHEN itemid = 50805 THEN valuenum ELSE NULL END) AS carboxyhemoglobin 20 | , MAX(CASE WHEN itemid = 50806 THEN valuenum ELSE NULL END) AS chloride 21 | , MAX(CASE WHEN itemid = 50808 THEN valuenum ELSE NULL END) AS calcium 22 | , MAX(CASE WHEN itemid = 50809 THEN valuenum ELSE NULL END) AS glucose 23 | , MAX(CASE WHEN itemid = 50810 and valuenum <= 100 THEN valuenum ELSE NULL END) AS hematocrit 24 | , MAX(CASE WHEN itemid = 50811 THEN valuenum ELSE NULL END) AS hemoglobin 25 | , MAX(CASE WHEN itemid = 50813 THEN valuenum ELSE NULL END) AS lactate 26 | , MAX(CASE WHEN itemid = 50814 THEN valuenum ELSE NULL END) AS methemoglobin 27 | , MAX(CASE WHEN itemid = 50815 THEN valuenum ELSE NULL END) AS o2flow 28 | -- fix a common unit conversion error for fio2 29 | -- atmospheric o2 is 20.89%, so any value <= 20 is unphysiologic 30 | -- usually this is a misplaced O2 flow measurement 31 | , MAX(CASE WHEN itemid = 50816 THEN 32 | CASE 33 | WHEN valuenum > 20 AND valuenum <= 100 THEN valuenum 34 | WHEN valuenum > 0.2 AND valuenum <= 1.0 THEN valuenum*100.0 35 | ELSE NULL END 36 | ELSE NULL END) AS fio2 37 | , MAX(CASE WHEN itemid = 50817 AND valuenum <= 100 THEN valuenum ELSE NULL END) AS so2 38 | , MAX(CASE WHEN itemid = 50818 THEN valuenum ELSE NULL END) AS pco2 39 | , MAX(CASE WHEN itemid = 50819 THEN valuenum ELSE NULL END) AS peep 40 | , MAX(CASE WHEN itemid = 50820 THEN valuenum ELSE NULL END) AS ph 41 | , MAX(CASE WHEN itemid = 50821 THEN valuenum ELSE NULL END) AS po2 42 | , MAX(CASE WHEN itemid = 50822 THEN valuenum ELSE NULL END) AS potassium 43 | , MAX(CASE WHEN itemid = 50823 THEN valuenum ELSE NULL END) AS requiredo2 44 | , MAX(CASE WHEN itemid = 50824 THEN valuenum ELSE NULL END) AS sodium 45 | , MAX(CASE WHEN itemid = 50825 THEN valuenum ELSE NULL END) AS temperature 46 | , MAX(CASE WHEN itemid = 50807 THEN value ELSE NULL END) AS comments 47 | FROM mimic_hosp.labevents le 48 | where le.ITEMID in 49 | -- blood gases 50 | ( 51 | 52025 -- specimen 52 | , 50801 -- aado2 53 | , 50802 -- base excess 54 | , 50803 -- bicarb 55 | , 50804 -- calc tot co2 56 | , 50805 -- carboxyhgb 57 | , 50806 -- chloride 58 | -- , 52390 -- chloride, WB CL- 59 | , 50807 -- comments 60 | , 50808 -- free calcium 61 | , 50809 -- glucose 62 | , 50810 -- hct 63 | , 50811 -- hgb 64 | , 50813 -- lactate 65 | , 50814 -- methemoglobin 66 | , 50815 -- o2 flow 67 | , 50816 -- fio2 68 | , 50817 -- o2 sat 69 | , 50818 -- pco2 70 | , 50819 -- peep 71 | , 50820 -- pH 72 | , 50821 -- pO2 73 | , 50822 -- potassium 74 | -- , 52408 -- potassium, WB K+ 75 | , 50823 -- required O2 76 | , 50824 -- sodium 77 | -- , 52411 -- sodium, WB NA + 78 | , 50825 -- temperature 79 | ) 80 | GROUP BY le.specimen_id 81 | ) 82 | , stg_spo2 as 83 | ( 84 | select subject_id, charttime 85 | -- avg here is just used to group SpO2 by charttime 86 | , AVG(valuenum) as SpO2 87 | FROM mimic_icu.chartevents 88 | where ITEMID = 220277 -- O2 saturation pulseoxymetry 89 | and valuenum > 0 and valuenum <= 100 90 | group by subject_id, charttime 91 | ) 92 | , stg_fio2 as 93 | ( 94 | select subject_id, charttime 95 | -- pre-process the FiO2s to ensure they are between 21-100% 96 | , max( 97 | case 98 | when valuenum > 0.2 and valuenum <= 1 99 | then valuenum * 100 100 | -- improperly input data - looks like O2 flow in litres 101 | when valuenum > 1 and valuenum < 20 102 | then null 103 | when valuenum >= 20 and valuenum <= 100 104 | then valuenum 105 | else null end 106 | ) as fio2_chartevents 107 | FROM mimic_icu.chartevents 108 | where ITEMID = 223835 -- Inspired O2 Fraction (FiO2) 109 | and valuenum > 0 and valuenum <= 100 110 | group by subject_id, charttime 111 | ) 112 | , stg2 as 113 | ( 114 | select bg.* 115 | , ROW_NUMBER() OVER (partition by bg.subject_id, bg.charttime order by s1.charttime DESC) as lastRowSpO2 116 | , s1.spo2 117 | from bg 118 | left join stg_spo2 s1 119 | -- same hospitalization 120 | on bg.subject_id = s1.subject_id 121 | -- spo2 occurred at most 2 hours before this blood gas 122 | and s1.charttime between DATETIME_SUB(bg.charttime, INTERVAL 2 HOUR) and bg.charttime 123 | where bg.po2 is not null 124 | ) 125 | , stg3 as 126 | ( 127 | select bg.* 128 | , ROW_NUMBER() OVER (partition by bg.subject_id, bg.charttime order by s2.charttime DESC) as lastRowFiO2 129 | , s2.fio2_chartevents 130 | -- create our specimen prediction 131 | , 1/(1+exp(-(-0.02544 132 | + 0.04598 * po2 133 | + coalesce(-0.15356 * spo2 , -0.15356 * 97.49420 + 0.13429) 134 | + coalesce( 0.00621 * fio2_chartevents , 0.00621 * 51.49550 + -0.24958) 135 | + coalesce( 0.10559 * hemoglobin , 0.10559 * 10.32307 + 0.05954) 136 | + coalesce( 0.13251 * so2 , 0.13251 * 93.66539 + -0.23172) 137 | + coalesce(-0.01511 * pco2 , -0.01511 * 42.08866 + -0.01630) 138 | + coalesce( 0.01480 * fio2 , 0.01480 * 63.97836 + -0.31142) 139 | + coalesce(-0.00200 * aado2 , -0.00200 * 442.21186 + -0.01328) 140 | + coalesce(-0.03220 * bicarbonate , -0.03220 * 22.96894 + -0.06535) 141 | + coalesce( 0.05384 * totalco2 , 0.05384 * 24.72632 + -0.01405) 142 | + coalesce( 0.08202 * lactate , 0.08202 * 3.06436 + 0.06038) 143 | + coalesce( 0.10956 * ph , 0.10956 * 7.36233 + -0.00617) 144 | + coalesce( 0.00848 * o2flow , 0.00848 * 7.59362 + -0.35803) 145 | ))) as specimen_prob 146 | from stg2 bg 147 | left join stg_fio2 s2 148 | -- same patient 149 | on bg.subject_id = s2.subject_id 150 | -- fio2 occurred at most 4 hours before this blood gas 151 | and s2.charttime between DATETIME_SUB(bg.charttime, INTERVAL 4 HOUR) and bg.charttime 152 | AND s2.fio2_chartevents > 0 153 | where bg.lastRowSpO2 = 1 -- only the row with the most recent SpO2 (if no SpO2 found lastRowSpO2 = 1) 154 | ) 155 | select 156 | stg3.subject_id 157 | , stg3.hadm_id 158 | , stg3.charttime 159 | -- raw data indicating sample type, only present 80% of the time 160 | , specimen 161 | -- prediction of specimen for missing data 162 | , case 163 | when specimen is not null then specimen 164 | when specimen_prob > 0.75 then 'ART' 165 | else null end as specimen_pred 166 | , specimen_prob 167 | 168 | -- oxygen related parameters 169 | , so2, spo2 -- note spo2 is FROM `physionet-data.mimiciii_clinical.chartevents` 170 | , po2, pco2 171 | , fio2_chartevents, fio2 172 | , aado2 173 | -- also calculate AADO2 174 | , case 175 | when po2 is not null 176 | and pco2 is not null 177 | and coalesce(fio2, fio2_chartevents) is not null 178 | -- multiple by 100 because fio2 is in a % but should be a fraction 179 | then (coalesce(fio2, fio2_chartevents)/100) * (760 - 47) - (pco2/0.8) - po2 180 | else null 181 | end as aado2_calc 182 | , case 183 | when PO2 is not null and coalesce(fio2, fio2_chartevents) is not null 184 | -- multiply by 100 because fio2 is in a % but should be a fraction 185 | then 100*PO2/(coalesce(fio2, fio2_chartevents)) 186 | else null 187 | end as paO2fio2ratio 188 | -- acid-base parameters 189 | , ph, baseexcess 190 | , bicarbonate, totalco2 191 | 192 | -- blood count parameters 193 | , hematocrit 194 | , hemoglobin 195 | , carboxyhemoglobin 196 | , methemoglobin 197 | 198 | -- chemistry 199 | , chloride, calcium 200 | , temperature 201 | , potassium, sodium 202 | , lactate 203 | , glucose 204 | 205 | -- ventilation stuff that's sometimes input 206 | -- , intubated, tidalvolume, ventilationrate, ventilator 207 | -- , peep, o2flow 208 | -- , requiredo2 209 | from stg3 210 | where lastRowFiO2 = 1 -- only the most recent FiO2 211 | order by 1, charttime; -------------------------------------------------------------------------------- /sql/mimic-iii/hourly_lab_gcs_vitals.sql: -------------------------------------------------------------------------------- 1 | 2 | SELECT 3 | pvt.subject_id, pvt.hadm_id, pvt.icustay_id, pvt.intime,pvt.outtime, pvt.starttime, pvt.endtime 4 | , max(GCS) as gcs_max 5 | , min(GCS) as gcs_min 6 | , max(GCSMotor) as gcsmotor_max 7 | , min(GCSMotor) as gcsmotor_min 8 | , max(GCSVerbal) as gcsverbal_max 9 | , min(GCSVerbal) as gcsverbal_min 10 | , max(GCSEyes) as gcseyes_max 11 | , min(GCSEyes) as gcseyes_min 12 | , max(EndoTrachFlag) as endo_max 13 | , min(EndoTrachFlag) as endo_min 14 | , max(HeartRate) as hr_max 15 | , min(HeartRate) as hr_min 16 | , max(SysBP) as sysbp_max 17 | , min(SysBP) as sysbp_min 18 | , max(DiasBP) as diasbp_max 19 | , min(DiasBP) as diasbp_min 20 | , max(MeanBP) as meanbp_max 21 | , min(MeanBP) as meanbp_min 22 | , max(RespRate) as rr_max 23 | , min(RespRate) as rr_min 24 | , max(TempC) as tempc_max 25 | , min(TempC) as tempc_min 26 | , max(SpO2) as spo2_max 27 | , min(SpO2) as spo2_min 28 | , max(fio2) as fio2_max 29 | , min(fio2) as fio2_min 30 | , min(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE null END) as ANIONGAP_min 31 | , max(CASE WHEN label = 'ANION GAP' THEN valuenum ELSE null END) as ANIONGAP_max 32 | , min(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE null END) as ALBUMIN_min 33 | , max(CASE WHEN label = 'ALBUMIN' THEN valuenum ELSE null END) as ALBUMIN_max 34 | , min(CASE WHEN label = 'BANDS' THEN valuenum ELSE null END) as BANDS_min 35 | , max(CASE WHEN label = 'BANDS' THEN valuenum ELSE null END) as BANDS_max 36 | , min(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE null END) as BICARBONATE_min 37 | , max(CASE WHEN label = 'BICARBONATE' THEN valuenum ELSE null END) as BICARBONATE_max 38 | , min(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE null END) as BILIRUBIN_min 39 | , max(CASE WHEN label = 'BILIRUBIN' THEN valuenum ELSE null END) as BILIRUBIN_max 40 | , min(CASE WHEN label = 'CREATININE' THEN valuenum ELSE null END) as CREATININE_min 41 | , max(CASE WHEN label = 'CREATININE' THEN valuenum ELSE null END) as CREATININE_max 42 | , min(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE null END) as CHLORIDE_min 43 | , max(CASE WHEN label = 'CHLORIDE' THEN valuenum ELSE null END) as CHLORIDE_max 44 | , min(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE null END) as GLUCOSE_min 45 | , max(CASE WHEN label = 'GLUCOSE' THEN valuenum ELSE null END) as GLUCOSE_max 46 | , min(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE null END) as HEMATOCRIT_min 47 | , max(CASE WHEN label = 'HEMATOCRIT' THEN valuenum ELSE null END) as HEMATOCRIT_max 48 | , min(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE null END) as HEMOGLOBIN_min 49 | , max(CASE WHEN label = 'HEMOGLOBIN' THEN valuenum ELSE null END) as HEMOGLOBIN_max 50 | , min(CASE WHEN label = 'LACTATE' THEN valuenum ELSE null END) as LACTATE_min 51 | , max(CASE WHEN label = 'LACTATE' THEN valuenum ELSE null END) as LACTATE_max 52 | , min(CASE WHEN label = 'PLATELET' THEN valuenum ELSE null END) as PLATELET_min 53 | , max(CASE WHEN label = 'PLATELET' THEN valuenum ELSE null END) as PLATELET_max 54 | , min(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE null END) as POTASSIUM_min 55 | , max(CASE WHEN label = 'POTASSIUM' THEN valuenum ELSE null END) as POTASSIUM_max 56 | , min(CASE WHEN label = 'PTT' THEN valuenum ELSE null END) as PTT_min 57 | , max(CASE WHEN label = 'PTT' THEN valuenum ELSE null END) as PTT_max 58 | , min(CASE WHEN label = 'INR' THEN valuenum ELSE null END) as INR_min 59 | , max(CASE WHEN label = 'INR' THEN valuenum ELSE null END) as INR_max 60 | , min(CASE WHEN label = 'PT' THEN valuenum ELSE null END) as PT_min 61 | , max(CASE WHEN label = 'PT' THEN valuenum ELSE null END) as PT_max 62 | , min(CASE WHEN label = 'SODIUM' THEN valuenum ELSE null END) as SODIUM_min 63 | , max(CASE WHEN label = 'SODIUM' THEN valuenum ELSE null end) as SODIUM_max 64 | , min(CASE WHEN label = 'BUN' THEN valuenum ELSE null end) as BUN_min 65 | , max(CASE WHEN label = 'BUN' THEN valuenum ELSE null end) as BUN_max 66 | , min(CASE WHEN label = 'WBC' THEN valuenum ELSE null end) as WBC_min 67 | , max(CASE WHEN label = 'WBC' THEN valuenum ELSE null end) as WBC_max 68 | , min(CASE WHEN label = 'alt' THEN valuenum ELSE null end) as alt_min 69 | , max(CASE WHEN label = 'alt' THEN valuenum ELSE null end) as alt_max 70 | , min(CASE WHEN label = 'ast' THEN valuenum ELSE null end) as ast_min 71 | , max(CASE WHEN label = 'ast' THEN valuenum ELSE null end) as ast_max 72 | , min(CASE WHEN label = 'crp' THEN valuenum ELSE null end) as crp_min 73 | , max(CASE WHEN label = 'crp' THEN valuenum ELSE null end) as crp_max 74 | 75 | FROM 76 | ( -- begin query that extracts the data 77 | SELECT ie.subject_id, ie.hadm_id, ie.icustay_id, d.intime, d.outtime, d.starttime, d.endtime 78 | -- here we assign labels to ITEMIDs 79 | -- this also fuses together multiple ITEMIDs containing the same data 80 | , CASE 81 | WHEN itemid = 50868 THEN 'ANION GAP' 82 | WHEN itemid = 50862 THEN 'ALBUMIN' 83 | WHEN itemid = 51144 THEN 'BANDS' 84 | WHEN itemid = 50882 THEN 'BICARBONATE' 85 | WHEN itemid = 50885 THEN 'BILIRUBIN' 86 | WHEN itemid = 50912 THEN 'CREATININE' 87 | WHEN itemid = 50806 THEN 'CHLORIDE' 88 | WHEN itemid = 50902 THEN 'CHLORIDE' 89 | WHEN itemid = 50809 THEN 'GLUCOSE' 90 | WHEN itemid = 50931 THEN 'GLUCOSE' 91 | WHEN itemid = 50810 THEN 'HEMATOCRIT' 92 | WHEN itemid = 51221 THEN 'HEMATOCRIT' 93 | WHEN itemid = 50811 THEN 'HEMOGLOBIN' 94 | WHEN itemid = 51222 THEN 'HEMOGLOBIN' 95 | WHEN itemid = 50813 THEN 'LACTATE' 96 | WHEN itemid = 51265 THEN 'PLATELET' 97 | WHEN itemid = 50822 THEN 'POTASSIUM' 98 | WHEN itemid = 50971 THEN 'POTASSIUM' 99 | WHEN itemid = 51275 THEN 'PTT' 100 | WHEN itemid = 51237 THEN 'INR' 101 | WHEN itemid = 51274 THEN 'PT' 102 | WHEN itemid = 50824 THEN 'SODIUM' 103 | WHEN itemid = 50983 THEN 'SODIUM' 104 | WHEN itemid = 51006 THEN 'BUN' 105 | WHEN itemid = 51300 THEN 'WBC' 106 | WHEN itemid = 51301 THEN 'WBC' 107 | when itemid = 50861 then 'alt' 108 | when itemid = 50878 then 'ast' 109 | when itemid = 50889 then 'crp' 110 | ELSE null 111 | END AS label 112 | , -- add in some sanity checks on the values 113 | -- the where clause below requires all valuenum to be > 0, so these are only upper limit checks 114 | CASE 115 | WHEN itemid = 50862 and valuenum > 10 THEN null -- g/dL 'ALBUMIN' 116 | WHEN itemid = 50868 and valuenum > 10000 THEN null -- mEq/L 'ANION GAP' 117 | WHEN itemid = 51144 and valuenum < 0 THEN null -- immature band forms, % 118 | WHEN itemid = 51144 and valuenum > 100 THEN null -- immature band forms, % 119 | WHEN itemid = 50882 and valuenum > 10000 THEN null -- mEq/L 'BICARBONATE' 120 | WHEN itemid = 50885 and valuenum > 150 THEN null -- mg/dL 'BILIRUBIN' 121 | WHEN itemid = 50806 and valuenum > 10000 THEN null -- mEq/L 'CHLORIDE' 122 | WHEN itemid = 50902 and valuenum > 10000 THEN null -- mEq/L 'CHLORIDE' 123 | WHEN itemid = 50912 and valuenum > 150 THEN null -- mg/dL 'CREATININE' 124 | WHEN itemid = 50809 and valuenum > 10000 THEN null -- mg/dL 'GLUCOSE' 125 | WHEN itemid = 50931 and valuenum > 10000 THEN null -- mg/dL 'GLUCOSE' 126 | WHEN itemid = 50810 and valuenum > 100 THEN null -- % 'HEMATOCRIT' 127 | WHEN itemid = 51221 and valuenum > 100 THEN null -- % 'HEMATOCRIT' 128 | WHEN itemid = 50811 and valuenum > 50 THEN null -- g/dL 'HEMOGLOBIN' 129 | WHEN itemid = 51222 and valuenum > 50 THEN null -- g/dL 'HEMOGLOBIN' 130 | WHEN itemid = 50813 and valuenum > 50 THEN null -- mmol/L 'LACTATE' 131 | WHEN itemid = 51265 and valuenum > 10000 THEN null -- K/uL 'PLATELET' 132 | WHEN itemid = 50822 and valuenum > 30 THEN null -- mEq/L 'POTASSIUM' 133 | WHEN itemid = 50971 and valuenum > 30 THEN null -- mEq/L 'POTASSIUM' 134 | WHEN itemid = 51275 and valuenum > 150 THEN null -- sec 'PTT' 135 | WHEN itemid = 51237 and valuenum > 50 THEN null -- 'INR' 136 | WHEN itemid = 51274 and valuenum > 150 THEN null -- sec 'PT' 137 | WHEN itemid = 50824 and valuenum > 200 THEN null -- mEq/L == mmol/L 'SODIUM' 138 | WHEN itemid = 50983 and valuenum > 200 THEN null -- mEq/L == mmol/L 'SODIUM' 139 | WHEN itemid = 51006 and valuenum > 300 THEN null -- 'BUN' 140 | WHEN itemid = 51300 and valuenum > 1000 THEN null -- 'WBC' 141 | WHEN itemid = 51301 and valuenum > 1000 THEN null -- 'WBC' 142 | WHEN itemid = 50889 and valuenum > 1000 THEN null -- 'crp' 143 | WHEN itemid = 50878 and valuenum > 1000 THEN null -- 'ast' 144 | WHEN itemid = 50861 and valuenum > 1000 THEN null -- 'alt' 145 | ELSE le.valuenum 146 | END AS valuenum 147 | 148 | FROM `mimic-dqn.dribing_pressure.1hour_duration` d 149 | LEFT JOIN `physionet-data.mimiciii_clinical.icustays` ie 150 | ON d.icustay_id = ie.icustay_id 151 | LEFT JOIN `physionet-data.mimiciii_clinical.labevents` le 152 | ON le.subject_id = ie.subject_id AND le.hadm_id = ie.hadm_id 153 | AND le.charttime BETWEEN d.starttime AND d.endtime 154 | AND le.ITEMID in 155 | ( 156 | -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS 157 | 50868, -- ANION GAP | CHEMISTRY | BLOOD | 769895 158 | 50862, -- ALBUMIN | CHEMISTRY | BLOOD | 146697 159 | 51144, -- BANDS - hematology 160 | 50882, -- BICARBONATE | CHEMISTRY | BLOOD | 780733 161 | 50885, -- BILIRUBIN, TOTAL | CHEMISTRY | BLOOD | 238277 162 | 50912, -- CREATININE | CHEMISTRY | BLOOD | 797476 163 | 50902, -- CHLORIDE | CHEMISTRY | BLOOD | 795568 164 | 50806, -- CHLORIDE, WHOLE BLOOD | BLOOD GAS | BLOOD | 48187 165 | 50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981 166 | 50809, -- GLUCOSE | BLOOD GAS | BLOOD | 196734 167 | 51221, -- HEMATOCRIT | HEMATOLOGY | BLOOD | 881846 168 | 50810, -- HEMATOCRIT, CALCULATED | BLOOD GAS | BLOOD | 89715 169 | 51222, -- HEMOGLOBIN | HEMATOLOGY | BLOOD | 752523 170 | 50811, -- HEMOGLOBIN | BLOOD GAS | BLOOD | 89712 171 | 50813, -- LACTATE | BLOOD GAS | BLOOD | 187124 172 | 51265, -- PLATELET COUNT | HEMATOLOGY | BLOOD | 778444 173 | 50971, -- POTASSIUM | CHEMISTRY | BLOOD | 845825 174 | 50822, -- POTASSIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 192946 175 | 51275, -- PTT | HEMATOLOGY | BLOOD | 474937 176 | 51237, -- INR(PT) | HEMATOLOGY | BLOOD | 471183 177 | 51274, -- PT | HEMATOLOGY | BLOOD | 469090 178 | 50983, -- SODIUM | CHEMISTRY | BLOOD | 808489 179 | 50824, -- SODIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 71503 180 | 51006, -- UREA NITROGEN | CHEMISTRY | BLOOD | 791925 181 | 51301, -- WHITE BLOOD CELLS | HEMATOLOGY | BLOOD | 753301 182 | 51300, -- WBC COUNT | HEMATOLOGY | BLOOD | 2371 183 | 50861, 184 | 50878, 185 | 50889 186 | ) 187 | AND valuenum IS NOT null AND valuenum > 0 -- lab values cannot be 0 and cannot be negative 188 | ) pvt 189 | left join `physionet-data.mimiciii_derived.pivoted_vital` as v 190 | on pvt.icustay_id = v.icustay_id 191 | left join `physionet-data.mimiciii_derived.pivoted_gcs` as g 192 | on pvt.icustay_id =g.icustay_id 193 | left join `physionet-data.mimiciii_derived.pivoted_fio2` as f 194 | on pvt.icustay_id =f.icustay_id 195 | where g.charttime between pvt.starttime and pvt.endtime and 196 | v.charttime between pvt.starttime and pvt.endtime and 197 | f.charttime between pvt.starttime and pvt.endtime 198 | GROUP BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id ,pvt.intime,pvt.outtime, pvt.starttime, pvt.endtime 199 | ORDER BY pvt.subject_id, pvt.hadm_id, pvt.icustay_id, pvt.starttime 200 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # COVID-19 2 | 3 | Analysis of high resolution clinical data for COVID-19 patients could significantly impact guidelines around optimal treatment. 4 | Due to significant administrative challenges in sharing de-identified clinical data, we have created a repository for performing federated analysis of COVID-19 patients. 5 | The idea is simple: reformat your data into our proposed 10-15 views, and you'll be able to leverage all analysis code in this repository. 6 | We are actively seeking collaborators to expand the scope of the analysis to more patients! 7 | 8 | ## Organization 9 | 10 | The organization is as follows: 11 | 12 | * [sql](/sql) - The SQL folder has local scripts for each dataset which convert. 13 | * [mimic-iii](/sql/mimic-iii) - MIMIC-III is a publicly accessible critical care database. Though MIMIC-III does not contain information on patients with COVID-19, its highly accessible nature makes it useful for prototyping. The [MIMIC-III Clinical Database Demo](https://physionet.org/content/mimiciii-demo/1.4/) is openly available and can be used to better understand the queries and ultimate data structure. 14 | * [mimic-iv](/sql/mimic-iv) - MIMIC-IV is a non-public update to MIMIC-III which contains more recent information. 15 | * [data](/data) - A placeholder folder to contain harmonized datasets. Analysis code assumes data is present in this folder. 16 | * [notebooks](/notebooks) - Jupyter notebooks which contain end-to-end analyses of COVID-19 data. 17 | 18 | ## Table structure 19 | 20 | In order to facilitate shared analysis, we have defined a common set of views/tables. 21 | 22 | **The table structure is a work in progress.** 23 | 24 | Table | Content 25 | ----- | ----- 26 | [cohort](#cohort) | Defines `stay_id`, a single ICU stay. 27 | [vitalsign](#vital-signs) | Glasgow coma scale measures. 28 | [gcs](#gcs) | Glasgow coma scale measures. 29 | [rass](#rass) | Richmond Sedation Agitation Scale measurements 30 | [oxygen_delivery](#oxygen-delivery) | Information regarding supplemental oxygen delivery 31 | [ventilator_setting](#ventilator-setting) | Measurements and settings associated with non-invasive and invasive mechanical ventilation 32 | [vitalsign](#vitalsign) | Nurse validated vital sign measurements 33 | [vasopressor](#vasopressor) | Administration and dose of intravenous vasopressors 34 | [bg](#blood-gases) | Blood gas measurements 35 | [cbc](#complete_blood_count) | Counts of the number of blood cells and related measures. 36 | [differential](#differential) | Detailed differential counts of white blood cells 37 | [red cell morphology](#red_cell_morphology) | Morphology of red blood cells 38 | [coagulation](#coagulation) | Measures of blood coagulation 39 | [chemistry](#chemistry) | Electrolyte and protein counts 40 | [enzymes](#enzymes-and-bilirubin) | Enzymes concentrations and bilirubin concentration 41 | [cardiac_markers](#cardiac-markers) | Markers of cardiac function or injury 42 | [inflammation_measures](#inflammation-measures) | Measures of inflammation 43 | 44 | ### Detailed tables 45 | 46 | #### Cohort 47 | 48 | Must create a `stay_id`, `intime`, `outtime` triplet to assign a unique stay in the ICU for each patient. `subject_id` should uniquely define the patient. 49 | 50 | #### Charted data 51 | 52 | ##### Vital signs 53 | 54 | Column | Data type | Unit of measure | Description 55 | ------------- | --------- | ------------------- | ----------- 56 | subject_id | Integer | N/A | Patient identifier. 57 | charttime | Timestamp | N/A | Time at which the charted event was valid. 58 | heart_rate | NUMERIC | Beats per minute | Number of heart beats per minute. 59 | sbp | NUMERIC | mmHg | Systolic blood pressure. 60 | dbp | NUMERIC | mmHg | Diastolic blood pressure. 61 | mbp | NUMERIC | mmHg | Mean blood pressure. 62 | sbp_ni | NUMERIC | mmHg | Non-invasively recorded systolic blood pressure. 63 | dbp_ni | NUMERIC | mmHg | Non-invasively recorded diastolic blood pressure. 64 | mbp_ni | NUMERIC | mmHg | Non-invasively recorded mean blood pressure. 65 | resprate | NUMERIC | Breaths per minute | Respiratory rate. 66 | temperature | NUMERIC | Degrees Celsius | Patient body temperature. 67 | temperature_site | NUMERIC | N/A | Site at which the measurement is taken. 68 | spo2 | NUMERIC | % (percentage) | Peripheral oxygen saturation. 69 | glucose | NUMERIC | mg/dL | Serum glucose measured using a fingerstick. 70 | 71 | ##### GCS 72 | 73 | Column | Data type | Unit of measure | Description 74 | ------------- | ---------- | --------------- | ----------- 75 | subject_id | Integer | N/A | Patient identifier. 76 | charttime | Timestamp | N/A | Time at which the charted event was valid. 77 | gcs | Integer | N/A | Glasgow coma scale. 78 | gcs_motor | Integer | N/A | 79 | gcs_verbal | Integer | N/A | 80 | gcs_eyes | Integer | N/A | 81 | gcs_unable | Integer | N/A | Unable to assess GCS due to sedation/intubation. 82 | 83 | ##### RASS 84 | 85 | Column | Data type | Unit of measure | Description 86 | ------------- | --------- | --------------- | ----------- 87 | subject_id | Integer | N/A | Patient identifier. 88 | stay_id | Integer | N/A | Encounter identifier. 89 | charttime | Timestamp | N/A | Time at which the charted event was valid. 90 | rass | Integer | N/A | Current Richmond Agitation Sedation Scale value 91 | rass_target | Integer | N/A | Desired Richmond Agitation Sedation Scale value 92 | 93 | ##### Oxygen Delivery 94 | 95 | Column | Data type | Unit of measure | Description 96 | ------------- | ---------- | --------------- | ----------- 97 | subject_id | Integer | N/A | Patient identifier. 98 | charttime | Timestamp | N/A | Time at which the charted event was valid. 99 | o2_flow | Numeric | Litres/minute | Oxygen flow provided to the patient. 100 | o2_flow_additional | Numeric | Litres/minute | Additional oxygen flow provided by one or more secondary devices. 101 | o2_delivery_device_1 | Numeric | N/A | Primary oxygen delivery device. 102 | o2_delivery_device_2 | Numeric | N/A | Secondary oxygen delivery device. 103 | o2_delivery_device_3 | Numeric | N/A | Tertiary oxygen delivery device. 104 | o2_delivery_device_4 | Numeric | N/A | Quartenary oxygen delivery device. 105 | 106 | ##### Ventilator Setting 107 | 108 | Column | Data type | Unit of measure | Description 109 | ------------------------- | ---------- | --------------- | ----------- 110 | subject_id | Integer | N/A | Patient identifier. 111 | charttime | Timestamp | N/A | Time at which the charted event was valid. 112 | respiratory_rate_set | Numeric | Breaths/min | Breathing rate set by the ventilator 113 | respiratory_rate_spontaneous | Numeric | Breaths/min | Breathing rate occuring above the set rate 114 | respiratory_rate_total | Numeric | Breaths/min | Actual breathing rate 115 | minute_volume | Numeric | L/min | Litres of air inspired per minute 116 | tidal_volume_set | Numeric | mL | Tidal volume set by the ventilator 117 | tidal_volume_observed | Numeric | mL | Observed tidal volume 118 | tidal_volume_spontaneous | Numeric | mL | Tidal volume of spontaneous breaths over the ventilator 119 | plateau_pressure | Numeric | cm H2O | Maximum pressure observed in the lungs 120 | peep | Numeric | cm H2O | Positive end expiratory pressure 121 | fio2 | Numeric | Proportion | Fraction of inspired oxygen in the air 122 | ventilator_mode | String | N/A | Mode of ventilation (assist control, etc) 123 | ventilator_mode_hamilton | String | N/A | Special mode settings for Hamilton brand ventilators 124 | ventilator_type | String | N/A | Type of ventilator used 125 | 126 | #### Labs 127 | 128 | ##### Blood gases 129 | 130 | Laboratory measures from patients with the time of blood collection and the time at which the result was available. 131 | 132 | Column | Data type | Unit of measure | Description 133 | ------------- | ---------- | --------------- | ----------- 134 | subject_id | Integer | N/A | Patient identifier. 135 | charttime | Timestamp | N/A | Time at which the specimen was drawn from the patient. 136 | specimen_id | Integer | N/A | Unique identifier for the specimen drawn from the patient which the measurements are derived from. 137 | Temperature | Numeric | | 138 | so2 | Numeric | | 139 | pO2 | Numeric | | 140 | pCO2 | Numeric | | 141 | pH | Numeric | | 142 | aado2 | Numeric | | 143 | pafi | Numeric | | 144 | calTCO2 | Numeric | | 145 | Base Excess | Numeric | | 146 | hematocrit | Numeric | | 147 | hemoglobin | Numeric | | 148 | carboxyhemoglobin | Numeric | | 149 | methemoglobin | Numeric | | 150 | chloride | Numeric | | 151 | calcium | Numeric | | 152 | potassium | Numeric | | 153 | sodium | Numeric | | 154 | lactate | Numeric | | 155 | glucose | Numeric | | 156 | 157 | ##### complete_blood_count 158 | 159 | Column | Data type | Unit of measure | Description 160 | ------------- | ---------- | --------------- | ----------- 161 | subject_id | Integer | N/A | Patient identifier. 162 | charttime | Timestamp | N/A | Time at which the specimen was drawn from the patient. 163 | specimen_id | Integer | N/A | Unique identifier for the specimen drawn from the patient which the measurements are derived from. 164 | hct | Numeric | % | Hematocrit 165 | hgb | Numeric | g/dL | Hemoglobin 166 | mch | Numeric | pg | Mean corpuscular hemoglobin 167 | mchc | Numeric | g/dL | Mean corpuscular hemoglobin concentration 168 | mcv | Numeric | fL | Mean corpuscular volume 169 | platelets | Numeric | K/uL | Platelet count 170 | rbc | Numeric | m/uL | Red blood cells 171 | rdw | Numeric | % | Red blood cell distribution width 172 | rdwsd | Numeric | fL | Red blood cell distribution width standard deviation 173 | wbc | Numeric | K/uL | White blood cell count 174 | 175 | ##### differential 176 | 177 | Column | Data type | Unit of measure | Description 178 | ------------- | ---------- | --------------- | ----------- 179 | subject_id | Integer | N/A | Patient identifier. 180 | charttime | Timestamp | N/A | Time at which the specimen was drawn from the patient. 181 | specimen_id | Integer | N/A | Unique identifier for the specimen drawn from the patient which the measurements are derived from. 182 | specimen_type | Text | | 183 | abs_basophils | Numeric | K/uL | Absolute Basophil Count 184 | abs_eosinophils | Numeric | K/uL | Absolute Eosinophil Count 185 | abs_lymphocytes | Numeric | K/uL | Absolute Lymphocyte Count 186 | abs_monocytes | Numeric | K/uL | Absolute Monocyte Count 187 | abs_neutrophils | Numeric | K/uL | Absolute Neutrophil Count 188 | atyps | Numeric | % | Atypical Lymphocytes 189 | bands | Numeric | % | Immature Band Forms 190 | imm_granulocytes | Numeric | % | Immature Granulocytes 191 | metas | Numeric | % | Metamyelocytes 192 | nrbc | Numeric | % | Nucleated Red Blood Cells 193 | 194 | ##### red_cell_morphology 195 | 196 | Column | Data type | Unit of measure | Description 197 | ------------- | ---------- | --------------- | ----------- 198 | subject_id | Integer | N/A | Patient identifier. 199 | charttime | Timestamp | N/A | Time at which the specimen was drawn from the patient. 200 | specimen_id | Integer | N/A | Unique identifier for the specimen drawn from the patient which the measurements are derived from. 201 | rbc morph | Numeric | | 202 | poiklo | Numeric | | 203 | polychr | Numeric | | 204 | ovalocy | Numeric | | 205 | target | Numeric | | 206 | cshisto | Numeric | | 207 | echino | Numeric | | 208 | 209 | ##### coagulation 210 | 211 | Column | Data type | Unit of measure | Description 212 | ------------- | ---------- | --------------- | ----------- 213 | subject_id | Integer | N/A | Patient identifier. 214 | charttime | Timestamp | N/A | Time at which the specimen was drawn from the patient. 215 | specimen_id | Integer | N/A | Unique identifier for the specimen drawn from the patient which the measurements are derived from. 216 | d_dimer | Numeric | ng/mL FEU | D-Dimer 217 | fibrinogen | Numeric | mg/dL | Fibrinogen, Functional 218 | thrombin | Numeric | sec | Thrombin Time 219 | inr | Numeric | N/A (ratio) | International Normalized Ratio 220 | pt | Numeric | sec | Prothrombin Time 221 | ptt | Numeric | sec | Partial Thromboplastin Time 222 | 223 | ##### chemistry 224 | 225 | Column | Data type | Unit of measure | Description 226 | ------------- | ---------- | --------------- | ----------- 227 | subject_id | Integer | N/A | Patient identifier. 228 | charttime | Timestamp | N/A | Time at which the specimen was drawn from the patient. 229 | specimen_id | Integer | N/A | Unique identifier for the specimen drawn from the patient which the measurements are derived from. 230 | Albumin | Numeric | g/dL | 231 | Globulin | Numeric | g/dL | 232 | total_protein | Numeric | g/dL | 233 | aniongap | Numeric | mEq/L | 234 | bicarbonate | Numeric | mEq/L | 235 | bun | Numeric | mg/dL | 236 | calcium | Numeric | mg/dL | 237 | chloride | Numeric | mEq/L | 238 | creatinine | Numeric | mg/dL | 239 | glucose | Numeric | mg/dL | 240 | sodium | Numeric | mEq/L | 241 | potassium | Numeric | mEq/L | 242 | 243 | ##### Enzymes (and Bilirubin) 244 | 245 | Column | Data type | Unit of measure | Description 246 | ------------- | ---------- | --------------- | ----------- 247 | subject_id | Integer | N/A | Patient identifier. 248 | charttime | Timestamp | N/A | Time at which the specimen was drawn from the patient. 249 | specimen_id | Integer | N/A | Unique identifier for the specimen drawn from the patient which the measurements are derived from. 250 | alt | Numeric | | Alanine Aminotransferase 251 | alkphos | Numeric | | Alkaline Phosphatase 252 | ast | Numeric | | Asparate Aminotransferase 253 | amylase | Numeric | | Amylase 254 | bilirubin_total | Numeric | | Total Bilirubin (direct + indirect) 255 | bilirubin_direct | Numeric | | Direct Bilirubin 256 | bilirubin_indirect | Numeric | | Indirect Bilirubin 257 | ck_cpk | Numeric | | Creatinine Kinase 258 | ld_ldh | Numeric | | Lactate Dehydronase. 259 | 260 | ##### Cardiac Markers 261 | 262 | Column | Data type | Unit of measure | Description 263 | --------------- | ---------- | --------------- | ----------- 264 | subject_id | Integer | N/A | Patient identifier. 265 | charttime | Timestamp | N/A | Time at which the specimen was drawn from the patient. 266 | specimen_id | Integer | N/A | Unique identifier for the specimen drawn from the patient which the measurements are derived from. 267 | troponin_i | Numeric | | Troponin I 268 | troponin_i_poc | Numeric | | Troponin I, Point of Care test 269 | troponin_t | Numeric | | Tropinin T 270 | ck_mb | Numeric | | Creatinine Kinase, MB Isoenzyme 271 | 272 | ##### Inflammation measures 273 | 274 | Column | Data type | Unit of measure | Description 275 | ------------- | ---------- | --------------- | ----------- 276 | subject_id | Integer | N/A | Patient identifier. 277 | charttime | Timestamp | N/A | Time at which the specimen was drawn from the patient. 278 | specimen_id | Integer | N/A | Unique identifier for the specimen drawn from the patient which the measurements are derived from. 279 | crp | Numeric | | C-reactive Protein 280 | crp_high_sens | Numeric | | C-reactive Protein, high sensitivity assay 281 | il6 | Numeric | | Interleukin-6 (send out) 282 | procalcitonin | Numeric | | Procalcitonin 283 | 284 | ##### hematologic/other 285 | 286 | * ferritin 287 | * ggt 288 | * transaminase 289 | * 5ntd 290 | * ceruloplasmin 291 | * alpha-fetoprotein 292 | --------------------------------------------------------------------------------