├── utils ├── README.md ├── requirements.txt ├── setup_user_env.sh ├── build_curated_from_psql.sh ├── simple_impute.py ├── postgres_make_extended_concepts.sh ├── Makefile └── niv-durations.sql ├── resources ├── README.md ├── variable_ranges.csv ├── Rohit_itemid.txt ├── outcome_data_spec.json ├── static_data_spec.json ├── item_id_stat.csv └── testing_schemas.pkl ├── notebooks ├── .gitignore ├── README.md ├── mmd_grud_utils.py ├── Baselines for Mortality and LOS prediction - GRU-D.ipynb └── Baselines for Mortality and LOS prediction - Sklearn.ipynb ├── data └── .gitignore ├── SQL_Queries ├── notes.sql ├── codes.sql ├── debug_codes.sql ├── statics.sql └── debug_statics.sql ├── LICENSE ├── .gitignore ├── datapackage_io_util.py ├── mimic_querier.py ├── mimic_extract_env_py36.yml ├── README.md └── heuristic_sentence_splitter.py /utils/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /resources/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /notebooks/.gitignore: -------------------------------------------------------------------------------- 1 | *Scratchpad.ipynb 2 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything per data use and confidentiality agreements 2 | * 3 | # Except this file 4 | !.gitignore -------------------------------------------------------------------------------- /utils/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.12 2 | pandas>=0.18 3 | scipy>=0.18 4 | jupyter>=1.0 5 | numexpr>=2.6 6 | scikit-learn>=0.19 7 | psycopg2>=2.7 8 | pytables>=3.4 9 | matplotlib>=2.0 -------------------------------------------------------------------------------- /SQL_Queries/notes.sql: -------------------------------------------------------------------------------- 1 | SELECT n.subject_id, n.hadm_id, i.icustay_id, n.chartdate, n.charttime, n.category, n.description, n.text 2 | FROM noteevents n INNER JOIN icustays i on i.hadm_id = n.hadm_id 3 | WHERE 4 | iserror IS NULL 5 | AND (n.chartdate <= i.outtime OR n.charttime <= i.outtime) 6 | AND n.hadm_id IN ('{hadm_id}') 7 | AND n.subject_id IN ('{subject_id}') 8 | -------------------------------------------------------------------------------- /SQL_Queries/codes.sql: -------------------------------------------------------------------------------- 1 | SET SEARCH_PATH TO public,mimiciii; 2 | SELECT 3 | i.icustay_id, d.subject_id, d.hadm_id, 4 | array_agg(d.icd9_code ORDER BY seq_num ASC) AS icd9_codes 5 | FROM diagnoses_icd d 6 | LEFT OUTER JOIN (SELECT ccs_matched_id, icd9_code from ccs_dx) c 7 | ON c.icd9_code = d.icd9_code 8 | INNER JOIN icustays i 9 | ON i.hadm_id = d.hadm_id AND i.subject_id = d.subject_id 10 | WHERE d.hadm_id IN ('{hadm_id}') AND seq_num IS NOT NULL 11 | GROUP BY i.icustay_id, d.subject_id, d.hadm_id 12 | -------------------------------------------------------------------------------- /SQL_Queries/debug_codes.sql: -------------------------------------------------------------------------------- 1 | \echo "DEBUG ONLY" 2 | SET search_path TO mimiciii; 3 | SELECT 4 | i.icustay_id, d.subject_id, d.hadm_id, 5 | array_agg(d.icd9_code ORDER BY seq_num ASC) AS icd9_codes, 6 | array_agg(c.ccs_matched_id ORDER BY seq_num ASC) AS ccs_codes 7 | FROM mimiciii.diagnoses_icd d 8 | LEFT OUTER JOIN (SELECT ccs_matched_id, icd9_code from mimiciii.ccs_dx) c 9 | ON c.icd9_code = d.icd9_code 10 | INNER JOIN icustays i 11 | ON i.hadm_id = d.hadm_id AND i.subject_id = d.subject_id 12 | 13 | WHERE seq_num IS NOT NULL 14 | GROUP BY i.icustay_id, d.subject_id, d.hadm_id 15 | 16 | -------------------------------------------------------------------------------- /utils/setup_user_env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export MIMIC_CODE_DIR=$(realpath ../../mimic-code) 4 | export MIMIC_EXTRACT_CODE_DIR=$(realpath ../) 5 | 6 | export MIMIC_DATA_DIR=$MIMIC_EXTRACT_CODE_DIR/data/ 7 | 8 | export MIMIC_EXTRACT_OUTPUT_DIR=$MIMIC_DATA_DIR/curated/ 9 | mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR 10 | 11 | export DBUSER=bnestor 12 | export DBNAME=mimic 13 | export SCHEMA=public,mimiciii 14 | export HOST=mimic 15 | export DBSTRING="dbname=$DBNAME options=--search_path=$SCHEMA" 16 | alias psql="psql -h $HOST -U $DBUSER " 17 | 18 | export PGHOST=$HOST 19 | export PGUSER=$DBUSER 20 | 21 | export PGPASSWORD=$1 22 | -------------------------------------------------------------------------------- /utils/build_curated_from_psql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Build curated dataset of .csv / .npy / .hd5 files 4 | # for patient time-series data extracted from PSQL DB 5 | # 6 | # Takes optional argument POP_SIZE 7 | # 8 | 9 | mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR; 10 | 11 | if [[ -z $POP_SIZE ]]; then 12 | # means extract all available data 13 | POP_SIZE=0; 14 | fi 15 | 16 | python -u $MIMIC_EXTRACT_CODE_DIR/mimic_direct_extract.py \ 17 | --out_path $MIMIC_EXTRACT_OUTPUT_DIR/ \ 18 | --resource_path $MIMIC_EXTRACT_CODE_DIR/resources/ \ 19 | --extract_pop 2 \ 20 | --extract_outcomes 2 \ 21 | --extract_codes 0 \ 22 | --extract_numerics 2 \ 23 | --extract_notes 0\ 24 | --exit_after_loading 0 \ 25 | --plot_hist 0 \ 26 | --pop_size $POP_SIZE \ 27 | --psql_password $PGPASSWORD \ 28 | --psql_host $HOST \ 29 | --min_percent 0 \ 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 MLforHealth 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # Related Jupyter Notebooks 2 | 3 | # Testing Cases 4 | `Testing mimic_direct_extract.ipynb` contains tests for different data processing funcations in **MIMIC-Extract**. 5 | 6 | # Demonstrated Usage 7 | * `Baselines for Mortality and LOS prediction - Sklearn.ipynb` 8 | 9 | This notebook demonstrates the use of **MIMIC-Extract** output in mortality and long length-of-stay prediction tasks. Logistic regression and random forest models are fitted using Scikit-Learn. 10 | 11 | * `Baselines for Mortality and LOS prediction - GRU-D.ipynb` 12 | 13 | This notebook demonstrates the use of **MIMIC-Extract** output in mortality and long length-of-stay prediction tasks. GRU-D models are fitted. 14 | 15 | * `Baselines for Intervention Prediction - Mechanical Ventilation.ipynb` 16 | 17 | This notebook demonstrates the use of **MIMIC-Extract** output in mechanical ventilation prediction task. Logistic regression and random forest models models are fitted using Scikit-Learn. CNN is fitted using Keras 2.2.4. LSTM is fitted using Tensorflow 1.8.0. 18 | 19 | * `Baselines for Intervention Prediction - Vasopressor.ipynb` 20 | 21 | This notebook demonstrates the use of **MIMIC-Extract** output in vasopressor prediction task. Logistic regression and random forest models models are fitted using Scikit-Learn. CNN is fitted using Keras 2.2.4. LSTM is fitted using Tensorflow 1.8.0. 22 | -------------------------------------------------------------------------------- /utils/simple_impute.py: -------------------------------------------------------------------------------- 1 | import copy, math, os, pickle, time, pandas as pd, numpy as np 2 | 3 | ID_COLS = ['subject_id', 'hadm_id', 'icustay_id'] 4 | 5 | def simple_imputer(df,train_subj): 6 | idx = pd.IndexSlice 7 | df = df.copy() 8 | 9 | df_out = df.loc[:, idx[:, ['mean', 'count']]] 10 | icustay_means = df_out.loc[:, idx[:, 'mean']].groupby(ID_COLS).mean() 11 | global_means = df_out.loc[idx[train_subj,:], idx[:, 'mean']].mean(axis=0) 12 | 13 | df_out.loc[:,idx[:,'mean']] = df_out.loc[:,idx[:,'mean']].groupby(ID_COLS).fillna( 14 | method='ffill' 15 | ).groupby(ID_COLS).fillna(icustay_means).fillna(global_means) 16 | 17 | df_out.loc[:, idx[:, 'count']] = (df.loc[:, idx[:, 'count']] > 0).astype(float) 18 | df_out.rename(columns={'count': 'mask'}, level='Aggregation Function', inplace=True) 19 | 20 | is_absent = (1 - df_out.loc[:, idx[:, 'mask']]) 21 | hours_of_absence = is_absent.cumsum() 22 | time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill') 23 | time_since_measured.rename(columns={'mask': 'time_since_measured'}, level='Aggregation Function', inplace=True) 24 | 25 | df_out = pd.concat((df_out, time_since_measured), axis=1) 26 | df_out.loc[:, idx[:, 'time_since_measured']] = df_out.loc[:, idx[:, 'time_since_measured']].fillna(100) 27 | 28 | df_out.sort_index(axis=1, inplace=True) 29 | return df_out -------------------------------------------------------------------------------- /utils/postgres_make_extended_concepts.sh: -------------------------------------------------------------------------------- 1 | # This file makes tables for the concepts in this subfolder. 2 | # Be sure to run postgres-functions.sql first, as the concepts rely on those function definitions. 3 | # Note that this may take a large amount of time and hard drive space. 4 | 5 | # string replacements are necessary for some queries 6 | export REGEX_DATETIME_DIFF="s/DATETIME_DIFF\((.+?),\s?(.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1, \2, '\3')/g" 7 | export REGEX_SCHEMA='s/`physionet-data.(mimiciii_clinical|mimiciii_derived|mimiciii_notes).(.+?)`/\2/g' 8 | export CONNSTR='-d mimic' 9 | 10 | # this is set as the search_path variable for psql 11 | # a search path of "public,mimiciii" will search both public and mimiciii 12 | # schemas for data, but will create tables on the public schema 13 | export PSQL_PREAMBLE='SET search_path TO public,mimiciii' 14 | 15 | echo '' 16 | echo '===' 17 | echo 'Beginning to create tables for MIMIC database.' 18 | echo 'Any notices of the form "NOTICE: TABLE "XXXXXX" does not exist" can be ignored.' 19 | echo 'The scripts drop views before creating them, and these notices indicate nothing existed prior to creating the view.' 20 | echo '===' 21 | echo '' 22 | 23 | echo 'Directory 5 of 9: fluid_balance' 24 | { echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS colloid_bolus; CREATE TABLE colloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/colloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql ${CONNSTR} 25 | { echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS crystalloid_bolus; CREATE TABLE crystalloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/crystalloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql ${CONNSTR} 26 | 27 | echo 'Finished creating tables.' 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | 107 | *.ipynb_checkpoints* 108 | makejob 109 | 110 | # tags 111 | tags 112 | -------------------------------------------------------------------------------- /utils/Makefile: -------------------------------------------------------------------------------- 1 | SHELL:=/bin/bash 2 | 3 | PSQL_EXECUTABLE:=$(shell which psql) 4 | 5 | MIMIC_CODE_DIR:=${shell grep "MIMIC_CODE_DIR" setup_user_env.sh | cut -d'=' -f2} 6 | 7 | #=== Commands 8 | 9 | .PHONY: build_curated_from_psql 10 | 11 | build_curated_from_psql: setup_user_env.sh has_psql_exe ## Build curated flat files from PSQL db, using this repo 12 | { \ 13 | source ./setup_user_env.sh; \ 14 | bash build_curated_from_psql.sh; \ 15 | } 16 | 17 | #=== Required mimic-code/ repo 18 | # Rules below will download the repo if it doesn't exist 19 | # Expected location MIMIC_CODE_DIR is defined in setup_user_env.sh 20 | 21 | .PHONY: clone_mimic_code_repo ${MIMIC_CODE_DIR}/buildmimic/postgres/Makefile 22 | 23 | clone_mimic_code_repo: ${MIMIC_CODE_DIR}/buildmimic/postgres/Makefile 24 | 25 | ${MIMIC_CODE_DIR}/buildmimic/postgres/Makefile: setup_user_env.sh 26 | { \ 27 | source ./setup_user_env.sh; \ 28 | [ -e $@ ] || git clone https://github.com/MIT-LCP/mimic-code/ ${MIMIC_CODE_DIR}/; \ 29 | } 30 | 31 | #=== Build concepts 32 | .PHONY: build_concepts 33 | build_concepts: build_concepts_mimic_code build_extra_concepts 34 | 35 | .PHONY: build_concepts_mimic_code 36 | build_concepts_mimic_code: setup_user_env.sh clone_mimic_code_repo 37 | { \ 38 | source ./setup_user_env.sh; \ 39 | cd ${MIMIC_CODE_DIR}/concepts; \ 40 | psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./make-concepts.sql; \ 41 | cd ../../MIMIC_Extract/utils; \ 42 | } 43 | 44 | .PHONY: build_extra_concepts 45 | build_extra_concepts: setup_user_env.sh niv-durations.sql crystalloid-bolus.sql colloid-bolus.sql 46 | { \ 47 | source ./setup_user_env.sh; \ 48 | psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./niv-durations.sql; \ 49 | psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./crystalloid-bolus.sql; \ 50 | psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./colloid-bolus.sql; \ 51 | } 52 | 53 | #=== Env Checks 54 | 55 | .PHONY: has_psql_exe 56 | has_psql_exe: setup_user_env.sh 57 | ifndef PSQL_EXECUTABLE 58 | $(error "Error: 'psql' is undefined. Please install/add to current path.") 59 | endif 60 | -------------------------------------------------------------------------------- /resources/variable_ranges.csv: -------------------------------------------------------------------------------- 1 | LEVEL2,LEVEL1,OUTLIER LOW,VALID LOW,IMPUTE,VALID HIGH,OUTLIER HIGH 2 | Alanine aminotransferase,,0,2,34,10000,11000 3 | Albumin,,0,0.6,3.1,6,60 4 | Alkaline phosphate,,0,20,106,3625,4000 5 | Anion Gap,,0,5,13,50,55 6 | Asparate aminotransferase,,0,6,40,20000,22000 7 | Basophils,,,,,, 8 | Bicarbonate,,0,0,25,60,66 9 | Bilirubin,Bilirubin (total),0,0.1,0.9,60,66 10 | Bilirubin,Bilirubin (conjugated),,,,, 11 | Bilirubin,Bilirubin (unconjugated),,,,, 12 | Blood culture,,,,,, 13 | Blood urea nitrogen,,0,0,23,250,275 14 | Calcium,,,,,, 15 | Calcium ionized,,,,,, 16 | Capillary refill rate,,0,0,0,1,1 17 | Chloride,,0,50,104,175,200 18 | Cholesterol,,0,32,160,600,660 19 | Creatinine,,0,0.1,1,60,66 20 | Diastolic blood pressure,,0,0,59,375,375 21 | Eosinophils,,,,,, 22 | Fraction inspired oxygen,,0.2,0.21,0.21,1,1.1 23 | Glascow coma scale eye opening,,1,1,4,4,4 24 | Glascow coma scale motor response,,1,1,6,6,6 25 | Glascow coma scale total,,3,3,11,15,15 26 | Glascow coma scale verbal response,,1,1,4,5,5 27 | Glucose,,0,33,128,2000,2200 28 | Heart rate,,0,0,86,350,390 29 | Height,,0,0,170,240,275 30 | Hematocrit,,0,0,30.2,75,100 31 | Hemoglobin,,0,0,10.2,25,30 32 | Lactate,,0,0.4,1.8,30,33 33 | Lactate dehydrogenase,,0,54,263,33000,35000 34 | Lactic acid,,,,,, 35 | Lymphocytes,,,,,, 36 | Magnesium,,0,0,2,20,22 37 | Mean blood pressure,,0,14,77,330,375 38 | Mean corpuscular hemoglobin,,,,,, 39 | Mean corpuscular hemoglobin concentration,,,,,, 40 | Mean corpuscular volume,,,,,, 41 | Oxygen saturation,,0,0,98,100,150 42 | Monocytes,,,,,, 43 | Neutrophils,,,,,, 44 | Partial pressure of carbon dioxide,,0,0,40,200,220 45 | Partial pressure of oxygen,,0,32,112,700,770 46 | Partial thromboplastin time,,0,18.8,34.4,150,150 47 | Peak inspiratory pressure,,0,14,20,38,40 48 | pH,,6.3,6.3,7.4,8.4,10 49 | Phosphate,,0,0.5,3.4,20,22 50 | Platelets,,0,0,208,2000,2200 51 | Positive end-expiratory pressure,,0,0,6,25,30 52 | Potassium,,0,0,4.1,12,15 53 | Prothrombin time,,0,9.9,14.5,97.1,150 54 | Pupillary response left,,,,,, 55 | Pupillary response right,,,,,, 56 | Pupillary size left,,,,,, 57 | Pupillary size right,,,,,, 58 | Red blood cell count,,,,,, 59 | Respiratory rate,,0,0,19,300,330 60 | Sodium,,0,50,142,225,250 61 | Systolic blood pressure,,0,0,118,375,375 62 | Temperature,,14.2,26,37,45,47 63 | Troponin-I,,0,0.01,2.3,49.6,575 64 | Troponin-T,,0,0.01,0.1,20.85,24 65 | Urine output,,0,0,80,1200,2445 66 | Weight,,0,0,81.8,250,250 67 | White blood cell count,,0,0,9.9,1000,1100 -------------------------------------------------------------------------------- /SQL_Queries/statics.sql: -------------------------------------------------------------------------------- 1 | select distinct 2 | i.subject_id, 3 | i.hadm_id, 4 | i.icustay_id, 5 | i.gender, 6 | i.admission_age as age, 7 | i.ethnicity, 8 | i.hospital_expire_flag, 9 | i.hospstay_seq, 10 | i.los_icu, 11 | i.admittime, 12 | i.dischtime, 13 | i.intime, 14 | i.outtime, 15 | a.diagnosis AS diagnosis_at_admission, 16 | a.admission_type, 17 | a.insurance, 18 | a.deathtime, 19 | a.discharge_location, 20 | CASE when a.deathtime between i.intime and i.outtime THEN 1 ELSE 0 END AS mort_icu, 21 | CASE when a.deathtime between i.admittime and i.dischtime THEN 1 ELSE 0 END AS mort_hosp, 22 | s.first_careunit, 23 | c.fullcode_first, 24 | c.dnr_first, 25 | c.fullcode, 26 | c.dnr, 27 | c.dnr_first_charttime, 28 | c.cmo_first, 29 | c.cmo_last, 30 | c.cmo, 31 | c.timecmo_chart, 32 | sofa.sofa, 33 | sofa.respiration as sofa_, 34 | sofa.coagulation as sofa_, 35 | sofa.liver as sofa_, 36 | sofa.cardiovascular as sofa_, 37 | sofa.cns as sofa_, 38 | sofa.renal as sofa_, 39 | sapsii.sapsii, 40 | sapsii.sapsii_prob, 41 | oasis.oasis, 42 | oasis.oasis_prob, 43 | COALESCE(f.readmission_30, 0) AS readmission_30 44 | FROM icustay_detail i 45 | INNER JOIN admissions a ON i.hadm_id = a.hadm_id 46 | INNER JOIN icustays s ON i.icustay_id = s.icustay_id 47 | INNER JOIN code_status c ON i.icustay_id = c.icustay_id 48 | LEFT OUTER JOIN (SELECT d.icustay_id, 1 as readmission_30 49 | FROM icustays c, icustays d 50 | WHERE c.subject_id=d.subject_id 51 | AND c.icustay_id > d.icustay_id 52 | AND c.intime - d.outtime <= interval '30 days' 53 | AND c.outtime = (SELECT MIN(e.outtime) from icustays e 54 | WHERE e.subject_id=c.subject_id 55 | AND e.intime>d.outtime)) f 56 | ON i.icustay_id=f.icustay_id 57 | LEFT OUTER JOIN (SELECT icustay_id, sofa, respiration, coagulation, liver, cardiovascular, cns, renal 58 | FROM sofa) sofa 59 | ON i.icustay_id=sofa.icustay_id 60 | LEFT OUTER JOIN (SELECT icustay_id, sapsii, sapsii_prob 61 | FROM sapsii) sapsii 62 | ON sapsii.icustay_id=i.icustay_id 63 | LEFT OUTER JOIN (SELECT icustay_id, oasis, oasis_prob 64 | FROM oasis) oasis 65 | ON oasis.icustay_id=i.icustay_id 66 | WHERE s.first_careunit NOT like 'NICU' 67 | and i.hadm_id is not null and i.icustay_id is not null 68 | and i.hospstay_seq = 1 69 | and i.icustay_seq = 1 70 | and i.admission_age >= {min_age} 71 | and i.los_icu >= {min_day} 72 | and (i.outtime >= (i.intime + interval '{min_dur} hours')) 73 | and (i.outtime <= (i.intime + interval '{max_dur} hours')) 74 | ORDER BY subject_id 75 | {limit} 76 | -------------------------------------------------------------------------------- /SQL_Queries/debug_statics.sql: -------------------------------------------------------------------------------- 1 | \echo "This file is just for debugging" 2 | SET search_path TO public,mimiciii; 3 | select distinct 4 | i.subject_id, 5 | i.hadm_id, 6 | i.icustay_id, 7 | i.gender, 8 | i.age as age, 9 | i.ethnicity, 10 | i.admission_type, 11 | i.hospital_expire_flag, 12 | i.hospstay_seq, 13 | i.los_icu, 14 | i.admittime, 15 | i.dischtime, 16 | i.intime, 17 | i.outtime, 18 | a.diagnosis AS diagnosis_at_admission, 19 | a.insurance, 20 | a.deathtime, 21 | a.discharge_location, 22 | CASE when a.deathtime between i.intime and i.outtime THEN 1 ELSE 0 END AS mort_icu, 23 | CASE when a.deathtime between i.admittime and i.dischtime THEN 1 ELSE 0 END AS mort_hosp, 24 | s.first_careunit, 25 | c.fullcode_first, 26 | c.dnr_first, 27 | c.fullcode, 28 | c.dnr, 29 | -- c.timednr_chart, 30 | c.dnr_first_charttime, 31 | c.cmo_first, 32 | c.cmo_last, 33 | c.cmo, 34 | c.cmo_ds, 35 | -- c.timecmo_chart, 36 | c.cmo_first_charttime, 37 | -- c.timecmo_nursingnote, 38 | c.cmo_nursingnote_charttime, 39 | sofa.sofa, 40 | sofa.respiration as sofa_, 41 | sofa.coagulation as sofa_, 42 | sofa.liver as sofa_, 43 | sofa.cardiovascular as sofa_, 44 | sofa.cns as sofa_, 45 | sofa.renal as sofa_, 46 | sapsii.sapsii, 47 | sapsii.sapsii_prob, 48 | oasis.oasis, 49 | oasis.oasis_prob, 50 | COALESCE(f.readmission_30, 0) AS readmission_30 51 | FROM icustay_detail i 52 | INNER JOIN admissions a ON i.hadm_id = a.hadm_id 53 | INNER JOIN icustays s ON i.icustay_id = s.icustay_id 54 | INNER JOIN code_status c ON i.icustay_id = c.icustay_id 55 | LEFT OUTER JOIN (SELECT d.icustay_id, 1 as readmission_30 56 | FROM icustays c, icustays d 57 | WHERE c.subject_id=d.subject_id 58 | AND c.icustay_id > d.icustay_id 59 | AND c.intime - d.outtime <= interval '30 days' 60 | AND c.outtime = (SELECT MIN(e.outtime) from icustays e 61 | WHERE e.subject_id=c.subject_id 62 | AND e.intime>d.outtime)) f 63 | ON i.icustay_id=f.icustay_id 64 | LEFT OUTER JOIN (SELECT icustay_id, sofa, respiration, coagulation, liver, cardiovascular, cns, renal 65 | FROM sofa) sofa 66 | ON i.icustay_id=sofa.icustay_id 67 | LEFT OUTER JOIN (SELECT icustay_id, sapsii, sapsii_prob 68 | FROM sapsii) sapsii 69 | ON sapsii.icustay_id=i.icustay_id 70 | LEFT OUTER JOIN (SELECT icustay_id, oasis, oasis_prob 71 | FROM oasis) oasis 72 | ON oasis.icustay_id=i.icustay_id 73 | WHERE s.first_careunit NOT like 'NICU' 74 | and i.hadm_id is not null and i.icustay_id is not null 75 | and i.hospstay_seq = 1 76 | and i.icustay_seq = 1 77 | and i.age >= 16 78 | and i.los_icu >= 1 79 | and (i.outtime >= (i.intime + interval '12 hours')) 80 | and (i.outtime <= (i.intime + interval '250 hours')) 81 | ORDER BY subject_id 82 | -------------------------------------------------------------------------------- /datapackage_io_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import datapackage 4 | 5 | def load_datapackage_schema(json_fpath, resource_id=0): 6 | """ Load schema object 7 | 8 | Returns 9 | ------- 10 | schema : schema object, with attributes 11 | field_names 12 | fields : list of dict 13 | Each dict provides info about the field (data type, etc) 14 | """ 15 | spec = datapackage.DataPackage(json_fpath) 16 | schema = spec.resources[resource_id].schema 17 | return schema 18 | 19 | def load_sanitized_df_from_csv(csv_fpath, schema): 20 | """ Load dataframe from CSV that meets provided schema. 21 | 22 | Returns 23 | ------- 24 | data_df : pandas DataFrame 25 | Will have fields provided by schema 26 | Will have field types (categorical, datetime, etc) provided by schema. 27 | """ 28 | data_df = pd.read_csv(csv_fpath) 29 | return sanitize_df(data_df, schema) 30 | 31 | def save_sanitized_df_to_csv(csv_fpath, data_df, schema=None): 32 | """ Save sanitized df to .csv file 33 | 34 | Returns 35 | ------- 36 | None 37 | 38 | Post Condition 39 | -------------- 40 | csv_fpath is .csv file containing sanitized data_df 41 | This file could be read by load_sanitized_df_from_csv() 42 | """ 43 | if schema is not None: 44 | data_df = sanitize_df(data_df, schema, setup_index=False) 45 | has_non_numeric_index = ( 46 | getattr(data_df.index, 'name', None) is not None 47 | or getattr(data_df.index, 'names', [None])[0] is not None) 48 | data_df.to_csv(csv_fpath, index=has_non_numeric_index) 49 | 50 | def sanitize_df(data_df, schema, setup_index=True, missing_column_procedure='fill_zero'): 51 | """ Sanitize dataframe according to provided schema 52 | 53 | Returns 54 | ------- 55 | data_df : pandas DataFrame 56 | Will have fields provided by schema 57 | Will have field types (categorical, datetime, etc) provided by schema. 58 | """ 59 | data_df = data_df.reset_index() 60 | for ff, field_name in enumerate(schema.field_names): 61 | type_ff = schema.fields[ff].descriptor['type'] 62 | if field_name not in data_df.columns: 63 | if missing_column_procedure == 'fill_zero': 64 | if type_ff == 'integer': 65 | data_df[field_name] = 0 66 | elif type_ff == 'number': 67 | data_df[field_name] = 0.0 68 | 69 | # Reorder columns to match schema 70 | data_df = data_df[schema.field_names] 71 | # Cast fields to required type (categorical / datetime) 72 | for ff, name in enumerate(schema.field_names): 73 | ff_spec = schema.descriptor['fields'][ff] 74 | if 'pandas_dtype' in ff_spec and ff_spec['pandas_dtype'] == 'category': 75 | data_df[name] = data_df[name].astype('category') 76 | elif 'type' in ff_spec and ff_spec['type'] == 'datetime': 77 | data_df[name] = pd.to_datetime(data_df[name]) 78 | if hasattr(schema, 'primary_key'): 79 | data_df = data_df.sort_values(schema.primary_key) 80 | if setup_index: 81 | data_df = data_df.set_index(schema.primary_key) 82 | return data_df 83 | 84 | 85 | -------------------------------------------------------------------------------- /mimic_querier.py: -------------------------------------------------------------------------------- 1 | import copy, psycopg2, pandas as pd 2 | 3 | # TODO(mmd): Where should this go? 4 | # TODO(mmd): Rename 5 | # TODO(mmd): eliminate try/except. Just use conditionals. 6 | def get_values_by_name_from_df_column_or_index(data_df, colname): 7 | """ Easily get values for named field, whether a column or an index 8 | 9 | Returns 10 | ------- 11 | values : 1D array 12 | """ 13 | try: 14 | values = data_df[colname] 15 | except KeyError as e: 16 | if colname in data_df.index.names: 17 | values = data_df.index.get_level_values(colname) 18 | else: 19 | raise e 20 | return values 21 | 22 | # TODO(mmd): Maybe make context manager? 23 | class MIMIC_Querier(): 24 | def __init__( 25 | self, 26 | exclusion_criteria_template_vars={}, 27 | query_args={}, # passed wholesale to psycopg2.connect 28 | schema_name='public,mimiciii' 29 | ): 30 | """ A class to facilitate repeated Queries to a MIMIC psql database """ 31 | self.exclusion_criteria_template_vars = {} 32 | self.query_args = query_args 33 | self.schema_name = schema_name 34 | self.connected = False 35 | self.connection, self.cursor = None, None 36 | 37 | # TODO(mmd): this isn't really doing exclusion criteria. Should maybe also absorb 'WHERE' clause... 38 | def add_exclusion_criteria_from_df(self, df, columns=[]): 39 | self.exclusion_criteria_template_vars.update({ 40 | c: "','".join( 41 | set([str(v) for v in get_values_by_name_from_df_column_or_index(df, c)]) 42 | ) for c in columns 43 | }) 44 | 45 | def clear_exclusion_criteria(self): self.exclusion_criteria_template_vars = {} 46 | 47 | def close(self): 48 | if not self.connected: return 49 | self.connection.close() 50 | self.cursor.close() # TODO(mmd): Maybe don't actually need this to stay open? 51 | self.connected = False 52 | 53 | def connect(self): 54 | self.close() 55 | self.connection = psycopg2.connect(**self.query_args) 56 | self.cursor = self.connection.cursor() 57 | self.cursor.execute('SET search_path TO %s' % self.schema_name) 58 | self.connected = True 59 | 60 | def query(self, query_string=None, query_file=None, extra_template_vars={}): 61 | assert query_string is not None or query_file is not None, "Must pass a query!" 62 | assert query_string is None or query_file is None, "Must only pass one query!" 63 | 64 | self.connect() 65 | 66 | if query_string is None: 67 | with open(query_file, mode='r') as f: query_string = f.read() 68 | 69 | template_vars = copy.copy(self.exclusion_criteria_template_vars) 70 | template_vars.update(extra_template_vars) 71 | 72 | query_string = query_string.format(**template_vars) 73 | out = pd.read_sql_query(query_string, self.connection) 74 | 75 | self.close() 76 | return out 77 | def add_exclusion_criteria_from_df(self, df, columns=[]): 78 | self.exclusion_criteria_template_vars.update({ 79 | c: "','".join( 80 | set([str(v) for v in get_values_by_name_from_df_column_or_index(df, c)]) 81 | ) for c in columns 82 | }) 83 | 84 | def close(self): 85 | if not self.connected: return 86 | self.connection.close() 87 | self.cursor.close() # TODO(mmd): Maybe don't actually need this to stay open? 88 | self.connected = False 89 | 90 | def connect(self): 91 | self.close() 92 | self.connection = psycopg2.connect(**self.query_args) 93 | self.cursor = self.connection.cursor() 94 | self.cursor.execute('SET search_path TO %s' % self.schema_name) 95 | self.connected = True 96 | 97 | def query(self, query_string=None, query_file=None, extra_template_vars={}): 98 | assert query_string is not None or query_file is not None, "Must pass a query!" 99 | assert query_string is None or query_file is None, "Must only pass one query!" 100 | 101 | self.connect() 102 | 103 | if query_string is None: 104 | with open(query_file, mode='r') as f: query_string = f.read() 105 | 106 | template_vars = copy.copy(self.exclusion_criteria_template_vars) 107 | template_vars.update(extra_template_vars) 108 | 109 | query_string = query_string.format(**template_vars) 110 | out = pd.read_sql_query(query_string, self.connection) 111 | 112 | self.close() 113 | return out 114 | -------------------------------------------------------------------------------- /resources/Rohit_itemid.txt: -------------------------------------------------------------------------------- 1 | -- Demographic, neuro, status 2 | 198 GCS ready 3 | 581 Weight ignore 4 | 762 AdmitWt No-Level2 5 | 6 | -- Cardiovascular 7 | 455 NBPSys ready 8 | 455 NBPDias value2 Not-exist 9 | 456 NBPMean ready 10 | 1149 NBP Not-exist 11 | 51 SBP ready (may want to separate this from Level 2 called Systolic Blood Pressure) 12 | 51 DBP value2 Not-exist 13 | 52 MAP ready 14 | 211 HR ready 15 | 646 OR 1148 SpO2 ready (1148 Not-exist) 16 | 113 OR 1103 CVP No-Level2 (and count is too low for cvp) 17 | 491 PAPMean No-Level2 18 | 492 PAPSd No-Level2 19 | 116 CrdIndx No-Level2 20 | 626 SVR No-Level2 21 | 90 COtd No-Level2 22 | 89 COfck No-Level2 23 | 504 PCWP No-Level2 24 | 512 PVR No-Level2 25 | 3353 CardiacMurmur No-Level2 26 | 3685 VitaminK No-Level2 27 | 28 | -- Chemistries, Electrolytes, acid/base 29 | 837 OR 1536 Na verify 30 | 829 OR 1535 K verify 31 | 788 OR 1523 Cl verify 32 | 827 Phosphorous No-Level2 33 | 818 OR 1531 Lactic_Acid verify 34 | 787 CO2 No-Level2 35 | 811 Glucose ready 36 | 781 OR 1162 BUN verify 37 | 791 OR 1525 Creatinine verify 38 | 821 OR 1532 Mg verify 39 | 786 OR 1522 Ca verify 40 | 816 IonCa verify 41 | 769 ALT verify 42 | 770 AST verify 43 | 851 Troponin verify 44 | 806 Fibrinogen No-Level2 45 | 848 OR 1538 TBili verify 46 | 803 OR 1527 DBili verify (should probably be separated in terms of level2 from above) 47 | 849 OR 1539 TProtein No-Level2 48 | 772 OR 1521 Albumin verify 49 | 818 OR 1531 Lactate verify 50 | 51 | -- Blood Gases 52 | 776 ArtBE No-Level2 53 | 777 ArtCO2 maybe 54 | 778 ArtPaCO2 verify 55 | 779 ArtPaO2 (should probably be separated in terms of level 2 from above) 56 | 780 OR 1126 ArtpH ready 57 | 859 PvO2 No-Level2 58 | 59 | -- Ventilation 60 | 190 FiO2Set No-Level2 61 | 506 PEEPSet No-Level2 62 | 618 RESP ready 63 | 615 RespTot ready (should probably be seperated from above in terms of level2) 64 | 619 RespSet No-Level2 65 | 614 RespSpon ready (should be separated from 615 and 618) 66 | 535 PIP No-Level2 67 | 543 PlateauPres No-Level2 68 | 682 TidVolObs No-Level2 69 | 683 TidVolSet No-Level2 70 | 684 TidVolSpon No-Level2 71 | 834 SaO2 ready 72 | 428 OR 425 LungSounds No-Level2 73 | 74 | 75 | -- Hematology 76 | 813 HCT verify 77 | 814 Hg verify 78 | 815 OR 1530 INR verify 79 | 828 Platelets verify 80 | 824 OR 1286 PT verify 81 | 825 OR 1533 PTT verify 82 | 861 OR 1127 OR 1542 WBC verify 83 | 833 RBC verify 84 | 678 OR 679 TEMP ready 85 | 86 | -- Severity Scores 87 | 20001 SAPS Not-exist 88 | 20002 RespSOFA Not-exist 89 | 20003 HepaticSOFA Not-exist 90 | 20004 HematSOFA Not-exist 91 | 20006 NeuroSOFA Not-exist 92 | 20007 CardioSOFA Not-exist 93 | 94 | -- Categorical 95 | 212 Heart_Rhythm No-Level2 96 | 161 Ectopy_Type No-Level2 97 | 159 Ectopy_Freq No-Level2 98 | 128 Code_Status No-Level2 99 | 1484 FallRisk No-Level2 100 | 479 Orientation No-Level2 101 | 432 ConsciousLevel No-Level2 102 | 184 EyeOpening ready 103 | 454 MotorResponse ready (should probably be separated from 184 in terms of level2) 104 | 1337 RikerSAS No-Level2 105 | 722 Vent No-Level2 106 | 720 VentMode No-Level2 107 | 516 Pacemaker No-Level2 108 | 690 Trach No-Level2 109 | 643 SkinColor No-Level2 110 | 644 SkinIntegrity No-Level2 111 | 1125 ServiceType No-Level2 112 | 113 | -- Medication Data 114 | 142 Integrelin No-Level2 (discrepancy - says Current Goal in csv file) 115 | 119 OR 44 Epinephrine No-Level2 (discrepancy - says Cervical Collar Type in csv) 116 | 123 Lasix No-Level2 (discrepancy - says Chest Tube Site #2 in csv file) 117 | 51 Vasopressin verify (discrepancy - says Arterial BP [Systolic]) 118 | 50 Nitroprusside No-Level2 (discrepancy - says Apnea Time Interval) 119 | 126 MorphineSulfate Not-exist 120 | 112 Amiodarone No-Level2 (discrepancy - CT #4 Suction Amount) 121 | 124 Midazolam No-Level2 (discrepancy - Chest Tube Site #3) 122 | 43 Dopamine (discrepancy - Angio Appearance #1) 123 | 118 OR 149 Fentanyl No-Level2 (discrepancy - Cerv Collar Status) 124 | 120 OR 47 Levophed Not-Exist (47 says Angio Site #1) 125 | 25 Heparin No-Level2 (discrepancy - AV Interval) 126 | 121 OR 49 Nitroglycerine (discrepancy - 121 is Chest PT [Right] and 49 is Anti-Embolism [Device] 127 | 45 Insulin No-Level2 (discrepancy - Angio Dressing #1) 128 | 127 OR 128 Neosynephrine No-Level2 (discrepancy - 127 is Circulation/SkinInt and 128 is Code Status) 129 | 131 Propofol No-Level2 (discrepancy - Compliance(40-60ml)) 130 | 131 | -- Fluid Input Output Data 132 | 55 OR 69 OR 715 OR 61 OR 57 OR 85 OR 473 OR 405 OR 428 UrineOut No-Level2 (discrepancy) 133 | 144 OR 172 OR 398 InputRBCs (144 is Not-exist, 172 is Education Topic #2 and 398 is Inc #1 [Dressing]) 134 | 179 OR 224 OR 3955 OR 163 OR 319 OR 221 InputOtherBlood No-Level2 (discrepancy) -------------------------------------------------------------------------------- /utils/niv-durations.sql: -------------------------------------------------------------------------------- 1 | -- This query extracts the duration of mechanical ventilation 2 | -- The main goal of the query is to aggregate sequential ventilator settings 3 | -- into single mechanical ventilation "events". The start and end time of these 4 | -- events can then be used for various purposes: calculating the total duration 5 | -- of mechanical ventilation, cross-checking values (e.g. PaO2:FiO2 on vent), etc 6 | 7 | SET SEARCH_PATH TO public,mimiciii; 8 | 9 | -- The query's logic is roughly: 10 | -- 1) The presence of a mechanical ventilation setting starts a new ventilation event 11 | -- 2) Any instance of a setting in the next 8 hours continues the event 12 | -- 3) Certain elements end the current ventilation event 13 | -- a) documented extubation ends the current ventilation 14 | -- b) initiation of non-invasive vent and/or oxygen ends the current vent 15 | -- The ventilation events are numbered consecutively by the `num` column. 16 | 17 | 18 | -- First, create a temporary table to store relevant data from CHARTEVENTS. 19 | DROP MATERIALIZED VIEW IF EXISTS nivdurations CASCADE; 20 | create MATERIALIZED VIEW nivdurations as 21 | with nivsettings AS 22 | ( 23 | select 24 | icustay_id, charttime 25 | , max( 26 | case 27 | -- initiation of oxygen therapy 28 | when itemid = 226732 and value in 29 | ( 30 | 'Nasal cannula', -- 153714 observations 31 | 'Face tent', -- 24601 observations 32 | 'Aerosol-cool', -- 24560 observations 33 | 'Trach mask ', -- 16435 observations 34 | 'High flow neb', -- 10785 observations 35 | 'Non-rebreather', -- 5182 observations 36 | 'Venti mask ', -- 1947 observations 37 | 'Medium conc mask ', -- 1888 observations 38 | 'T-piece', -- 1135 observations 39 | 'High flow nasal cannula', -- 925 observations 40 | 'Ultrasonic neb', -- 9 observations 41 | 'Vapomist' -- 3 observations 42 | ) then 1 43 | when itemid in (467,468) and value in 44 | ( 45 | 'Cannula', -- 278252 observations 46 | 'Nasal Cannula', -- 248299 observations 47 | 'None', -- 95498 observations 48 | 'Face Tent', -- 35766 observations 49 | 'Aerosol-Cool', -- 33919 observations 50 | 'Trach Mask', -- 32655 observations 51 | 'Hi Flow Neb', -- 14070 observations 52 | 'Non-Rebreather', -- 10856 observations 53 | 'Venti Mask', -- 4279 observations 54 | 'Medium Conc Mask', -- 2114 observations 55 | 'Vapotherm', -- 1655 observations 56 | 'T-Piece', -- 779 observations 57 | 'Hood', -- 670 observations 58 | 'Hut', -- 150 observations 59 | 'TranstrachealCat', -- 78 observations 60 | 'Heated Neb', -- 37 observations 61 | 'Ultrasonic Neb' -- 2 observations 62 | ) then 1 63 | when itemid = 469 and value in ('Nasal Cannula', 'Face Tent', 'Trach Mask') then 1 64 | when itemid in (470, 471, 227287, 223834) and valuenum > 0 then 1 65 | else 0 66 | end 67 | ) as OxygenTherapy 68 | from chartevents ce 69 | where ce.value is not null 70 | -- exclude rows marked as error 71 | and ce.error IS DISTINCT FROM 1 72 | and itemid in 73 | ( 74 | -- the below indicate oxygen/NIV 75 | 467 -- O2 Delivery Device 76 | , 468 -- O2 Delivery Device#2 77 | , 469 -- O2 Delivery Mode 78 | , 470 -- O2 Flow (lpm) 79 | , 471 -- O2 Flow (lpm) #2 80 | , 227287 -- O2 Flow (additional cannula) 81 | , 226732 -- O2 Delivery Device(s) 82 | , 223834 -- O2 Flow 83 | ) 84 | group by icustay_id, charttime 85 | ) 86 | , vd0 as 87 | ( 88 | select 89 | icustay_id 90 | -- this carries over the previous charttime which had a mechanical ventilation event 91 | , case 92 | when OxygenTherapy=1 then 93 | LAG(CHARTTIME, 1) OVER (partition by icustay_id, OxygenTherapy order by charttime) 94 | else null 95 | end as charttime_lag 96 | , charttime 97 | , OxygenTherapy 98 | from nivsettings 99 | ) 100 | , vd1 as 101 | ( 102 | select 103 | icustay_id 104 | , charttime_lag 105 | , charttime 106 | , OxygenTherapy 107 | 108 | -- if this is a mechanical ventilation event, we calculate the time since the last event 109 | , case 110 | -- if the current observation indicates mechanical ventilation is present 111 | -- calculate the time since the last vent event 112 | when OxygenTherapy=1 then 113 | CHARTTIME - charttime_lag 114 | else null 115 | end as ventduration 116 | 117 | , case when (CHARTTIME - charttime_lag) > interval '8' hour then 1 118 | else 0 119 | end as newvent 120 | -- use the staging table with only vent settings from chart events 121 | FROM vd0 122 | ) 123 | , vd2 as 124 | ( 125 | select vd1.* 126 | -- create a cumulative sum of the instances of new ventilation 127 | -- this results in a monotonic integer assigned to each instance of ventilation 128 | , case when OxygenTherapy=1 then 129 | SUM( newvent ) 130 | OVER ( partition by icustay_id order by charttime ) 131 | else null end 132 | as ventnum 133 | --- now we convert CHARTTIME of ventilator settings into durations 134 | from vd1 135 | ) 136 | -- create the durations for each mechanical ventilation instance 137 | select icustay_id 138 | -- regenerate ventnum so it's sequential 139 | , ROW_NUMBER() over (partition by icustay_id order by ventnum) as ventnum 140 | , min(charttime) as starttime 141 | , max(charttime) as endtime 142 | , extract(epoch from max(charttime)-min(charttime))/60/60 AS duration_hours 143 | from vd2 144 | group by icustay_id, ventnum 145 | having min(charttime) != max(charttime) 146 | -- patient had to be given NIV at least once 147 | -- i.e. max(OxygenTherapy) should be 1 148 | and max(OxygenTherapy) = 1 149 | order by icustay_id, ventnum; 150 | -------------------------------------------------------------------------------- /resources/outcome_data_spec.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "outcome_hourly_data", 3 | "title": "Outcome measurements (hourly) for ICU patients extracted from MIMIC-III", 4 | "description": "Includes ventilator and vasopressor (blood thinner)", 5 | "license": "The use of data is conditional on citing the original data sources.", 6 | "resources": [ 7 | { 8 | "name": "outcome_hourly_tabular_data", 9 | "path": "outcome_hourly_data.csv", 10 | "format": "csv", 11 | "mediatype": "text/csv", 12 | "profile": "tabular-data-resource", 13 | "$schema": "https://frictionlessdata.io/schemas/table-schema.json", 14 | "schema": { 15 | "fields": [ 16 | { 17 | "name": "subject_id", 18 | "description": "ID number for subject within PATIENTS table", 19 | "type": "integer", 20 | "constraints" : { 21 | "required" : true 22 | } 23 | }, 24 | { 25 | "name": "hadm_id", 26 | "description": "ID number for subject within ADMISSIONS table", 27 | "type": "integer", 28 | "constraints" : { 29 | "required" : true 30 | } 31 | }, 32 | { 33 | "name": "icustay_id", 34 | "description": "ID number for subject within ICUSTAYS table", 35 | "type": "integer", 36 | "constraints" : { 37 | "required" : true 38 | } 39 | }, 40 | { 41 | "name": "hours_in", 42 | "description": "Number of hours subject in ICU before measurement taken.", 43 | "type": "integer", 44 | "constraints" : { 45 | "required" : true 46 | } 47 | }, 48 | { 49 | "name": "vent", 50 | "description": "Mechanical ventilator used at current timestep (1 = present, 0 = not).", 51 | "type": "integer", 52 | "constraints" : { 53 | "required" : false 54 | } 55 | }, 56 | { 57 | "name": "vaso", 58 | "description": "Vasopressors given via IV at current timestep (1 = yes, 0 = no).", 59 | "type": "integer", 60 | "constraints" : { 61 | "required" : false 62 | } 63 | }, 64 | { 65 | "name": "adenosine", 66 | "description": "adenosine given via IV at current timestep (1 = yes, 0 = no).", 67 | "type": "integer", 68 | "constraints" : { 69 | "required" : false 70 | } 71 | }, 72 | { 73 | "name": "dobutamine", 74 | "description": "dobutamine given via IV at current timestep (1 = yes, 0 = no).", 75 | "type": "integer", 76 | "constraints" : { 77 | "required" : false 78 | } 79 | }, 80 | { 81 | "name": "dopamine", 82 | "description": "dopamine given via IV at current timestep (1 = yes, 0 = no).", 83 | "type": "integer", 84 | "constraints" : { 85 | "required" : false 86 | } 87 | }, 88 | { 89 | "name": "epinephrine", 90 | "description": "epinephrine given via IV at current timestep (1 = yes, 0 = no).", 91 | "type": "integer", 92 | "constraints" : { 93 | "required" : false 94 | } 95 | }, 96 | { 97 | "name": "isuprel", 98 | "description": "isuprel given via IV at current timestep (1 = yes, 0 = no).", 99 | "type": "integer", 100 | "constraints" : { 101 | "required" : false 102 | } 103 | }, 104 | { 105 | "name": "milrinone", 106 | "description": "milrinone given via IV at current timestep (1 = yes, 0 = no).", 107 | "type": "integer", 108 | "constraints" : { 109 | "required" : false 110 | } 111 | }, 112 | { 113 | "name": "norepinephrine", 114 | "description": "norepinephrine given via IV at current timestep (1 = yes, 0 = no).", 115 | "type": "integer", 116 | "constraints" : { 117 | "required" : false 118 | } 119 | }, 120 | { 121 | "name": "phenylephrine", 122 | "description": "phenylephrine given via IV at current timestep (1 = yes, 0 = no).", 123 | "type": "integer", 124 | "constraints" : { 125 | "required" : false 126 | } 127 | }, 128 | { 129 | "name": "vasopressin", 130 | "description": "vasopressin given via IV at current timestep (1 = yes, 0 = no).", 131 | "type": "integer", 132 | "constraints" : { 133 | "required" : false 134 | } 135 | }, 136 | { 137 | "name": "colloid_bolus", 138 | "description": "colloid bolus given via IV at current timestep (1 = yes, 0 = no).", 139 | "type": "integer", 140 | "constraints" : { 141 | "required" : false 142 | } 143 | }, 144 | { 145 | "name": "crystalloid_bolus", 146 | "description": "crystalloid bolus given via IV at current timestep (1 = yes, 0 = no).", 147 | "type": "integer", 148 | "constraints" : { 149 | "required" : false 150 | } 151 | }, 152 | { 153 | "name": "nivdurations", 154 | "description": "Non-invasive ventilator used at current timestep (1 = yes, 0 = no).", 155 | "type": "integer", 156 | "constraints" : { 157 | "required" : false 158 | } 159 | } 160 | ], 161 | "missingValues": "nan", 162 | "primaryKey": ["subject_id", "hadm_id", "icustay_id", "hours_in"] 163 | } 164 | } 165 | ] 166 | } 167 | -------------------------------------------------------------------------------- /mimic_extract_env_py36.yml: -------------------------------------------------------------------------------- 1 | name: mimic_data_extraction 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - boto3=1.14.42=pyh9f0ad1d_0 7 | - botocore=1.17.42=pyh9f0ad1d_0 8 | - cached-property=1.5.1=py_0 9 | - cchardet=2.1.6=py36h831f99a_1 10 | - certifi=2019.3.9=py36_0 11 | - click=7.1.2=pyh9f0ad1d_0 12 | - cython-blis=0.2.4=py36h516909a_1 13 | - datapackage=1.15.0=pyh9f0ad1d_0 14 | - docutils=0.15.2=py36_0 15 | - et_xmlfile=1.0.1=py_1001 16 | - ijson=3.1.1=pyh9f0ad1d_0 17 | - isodate=0.6.0=py_1 18 | - jdcal=1.4.1=py_0 19 | - jmespath=0.10.0=pyh9f0ad1d_0 20 | - jsonlines=1.2.0=pyh9f0ad1d_2 21 | - jsonpointer=2.0=py_0 22 | - linear-tsv=1.1.0=py_1 23 | - openpyxl=3.0.4=py_0 24 | - python_abi=3.6=1_cp36m 25 | - rfc3986=1.4.0=pyh9f0ad1d_0 26 | - s3transfer=0.3.3=py36h9f0ad1d_1 27 | - spacy=2.1.8=py36hc9558a2_0 28 | - sqlalchemy=1.3.18=py36h8c4c3a4_0 29 | - srsly=1.0.2=py36h831f99a_0 30 | - tableschema=1.19.2=pyh9f0ad1d_0 31 | - tabulator=1.52.3=pyh9f0ad1d_0 32 | - thinc=7.0.8=py36hc9558a2_0 33 | - unicodecsv=0.14.1=py_1 34 | - wasabi=0.7.1=pyh9f0ad1d_0 35 | - xlrd=1.2.0=pyh9f0ad1d_1 36 | - _libgcc_mutex=0.1=main 37 | - asn1crypto=0.24.0=py36_0 38 | - attrs=19.1.0=py36_1 39 | - backcall=0.1.0=py36_0 40 | - blas=1.0=mkl 41 | - bleach=3.1.5=py_0 42 | - blosc=1.15.0=hd408876_0 43 | - bzip2=1.0.6=h14c3975_5 44 | - ca-certificates=2019.1.23=0 45 | - cffi=1.12.3=py36h2e261b9_0 46 | - chardet=3.0.4=py36_1003 47 | - cryptography=2.6.1=py36h1ba5d50_0 48 | - cycler=0.10.0=py36_0 49 | - cymem=2.0.2=py36hfd86e86_0 50 | - cytoolz=0.9.0.1=py36h14c3975_1 51 | - dbus=1.13.6=h746ee38_0 52 | - decorator=4.4.0=py36_1 53 | - defusedxml=0.6.0=py_0 54 | - dill=0.2.9=py36_0 55 | - entrypoints=0.3=py36_0 56 | - expat=2.2.6=he6710b0_0 57 | - fontconfig=2.13.0=h9420a91_0 58 | - freetype=2.9.1=h8a8886c_1 59 | - glib=2.56.2=hd408876_0 60 | - gmp=6.1.2=h6c8ec71_1 61 | - gst-plugins-base=1.14.0=hbbd80ab_1 62 | - gstreamer=1.14.0=hb453b48_1 63 | - hdf5=1.10.4=hb1b8bf9_0 64 | - icu=58.2=h9c2bf20_1 65 | - idna=2.8=py36_0 66 | - importlib-metadata=1.7.0=py36_0 67 | - importlib_metadata=1.7.0=0 68 | - intel-openmp=2019.3=199 69 | - ipykernel=5.1.0=py36h39e3cac_0 70 | - ipython=7.5.0=py36h39e3cac_0 71 | - ipython_genutils=0.2.0=py36_0 72 | - ipywidgets=7.4.2=py36_0 73 | - jedi=0.13.3=py36_0 74 | - jinja2=2.10.1=py36_0 75 | - jpeg=9b=h024ee3a_2 76 | - jsonschema=3.2.0=py36_0 77 | - jupyter=1.0.0=py36_7 78 | - jupyter_client=5.2.4=py36_0 79 | - jupyter_console=6.0.0=py36_0 80 | - jupyter_core=4.4.0=py36_0 81 | - kiwisolver=1.1.0=py36he6710b0_0 82 | - krb5=1.16.1=h173b8e3_7 83 | - libedit=3.1.20181209=hc058e9b_0 84 | - libffi=3.2.1=hd88cf55_4 85 | - libgcc-ng=8.2.0=hdf63c60_1 86 | - libgfortran-ng=7.3.0=hdf63c60_0 87 | - libpng=1.6.37=hbc83047_0 88 | - libpq=11.2=h20c2e04_0 89 | - libsodium=1.0.16=h1bed415_0 90 | - libstdcxx-ng=8.2.0=hdf63c60_1 91 | - libuuid=1.0.3=h1bed415_2 92 | - libxcb=1.13=h1bed415_1 93 | - libxml2=2.9.9=he19cac6_0 94 | - llvmlite=0.28.0=py36hd408876_0 95 | - lzo=2.10=h49e0be7_2 96 | - markupsafe=1.1.1=py36h7b6447c_0 97 | - matplotlib=3.0.3=py36h5429711_0 98 | - mistune=0.8.4=py36h7b6447c_0 99 | - mkl=2019.3=199 100 | - mkl_fft=1.0.12=py36ha843d7b_0 101 | - mkl_random=1.0.2=py36hd81dba3_0 102 | - msgpack-numpy=0.4.3.2=py36_0 103 | - msgpack-python=0.6.1=py36hfd86e86_1 104 | - murmurhash=1.0.2=py36he6710b0_0 105 | - nbconvert=5.5.0=py_0 106 | - nbformat=4.4.0=py36_0 107 | - ncurses=6.1=he6710b0_1 108 | - nltk=3.4.1=py36_0 109 | - notebook=5.7.8=py36_0 110 | - numba=0.43.1=py36h962f231_0 111 | - numexpr=2.6.9=py36h9e4a6bb_0 112 | - numpy=1.16.3=py36h7e9f1db_0 113 | - numpy-base=1.16.3=py36hde5b4d6_0 114 | - openssl=1.1.1b=h7b6447c_1 115 | - packaging=20.4=py_0 116 | - pandas=0.24.2=py36he6710b0_0 117 | - pandoc=2.2.3.2=0 118 | - pandocfilters=1.4.2=py36_1 119 | - parso=0.4.0=py_0 120 | - pcre=8.43=he6710b0_0 121 | - pexpect=4.7.0=py36_0 122 | - pickleshare=0.7.5=py36_0 123 | - pip=19.1.1=py36_0 124 | - plac=0.9.6=py36_0 125 | - preshed=2.0.1=py36he6710b0_0 126 | - prometheus_client=0.6.0=py36_0 127 | - prompt_toolkit=2.0.9=py36_0 128 | - psycopg2=2.7.6.1=py36h1ba5d50_0 129 | - ptyprocess=0.6.0=py36_0 130 | - pycparser=2.19=py36_0 131 | - pygments=2.4.0=py_0 132 | - pyopenssl=19.0.0=py36_0 133 | - pyparsing=2.4.0=py_0 134 | - pyqt=5.9.2=py36h05f1152_2 135 | - pyrsistent=0.16.0=py36h7b6447c_0 136 | - pysocks=1.6.8=py36_0 137 | - pytables=3.5.1=py36h71ec239_0 138 | - python=3.6.8=h0371630_0 139 | - python-dateutil=2.8.0=py36_0 140 | - pytz=2019.1=py_0 141 | - pyzmq=18.0.0=py36he6710b0_0 142 | - qt=5.9.7=h5867ecd_1 143 | - qtconsole=4.4.4=py_0 144 | - readline=7.0=h7b6447c_5 145 | - regex=2019.04.14=py36h7b6447c_0 146 | - requests=2.21.0=py36_0 147 | - scikit-learn=0.20.3=py36hd81dba3_0 148 | - scipy=1.2.1=py36h7c811a0_0 149 | - send2trash=1.5.0=py36_0 150 | - setuptools=41.0.1=py36_0 151 | - sip=4.19.8=py36hf484d3e_0 152 | - six=1.12.0=py36_0 153 | - snappy=1.1.7=hbae5bb6_3 154 | - sqlite=3.28.0=h7b6447c_0 155 | - terminado=0.8.2=py36_0 156 | - testpath=0.4.2=py36_0 157 | - tk=8.6.8=hbc83047_0 158 | - toolz=0.9.0=py36_0 159 | - tornado=6.0.2=py36h7b6447c_0 160 | - tqdm=4.31.1=py36_1 161 | - traitlets=4.3.2=py36_0 162 | - ujson=1.35=py36h14c3975_0 163 | - urllib3=1.24.3=py36_0 164 | - wcwidth=0.1.7=py36_0 165 | - webencodings=0.5.1=py36_1 166 | - wheel=0.33.2=py36_0 167 | - widgetsnbextension=3.4.2=py36_0 168 | - wrapt=1.10.11=py36h14c3975_2 169 | - xz=5.2.4=h14c3975_4 170 | - zeromq=4.3.1=he6710b0_3 171 | - zipp=3.1.0=py_0 172 | - zlib=1.2.11=h7b6447c_3 173 | - pip: 174 | - blis==0.4.1 175 | - catalogue==1.0.0 176 | - https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.1.0/en_core_web_sm-2.1.0.tar.gz 177 | - joblib==0.16.0 178 | - msgpack==0.6.1 179 | - nmslib==2.0.6 180 | - psutil==5.7.2 181 | - pybind11==2.5.0 182 | - pysbd==0.3.1 183 | - scispacy==0.2.5 184 | - tables==3.5.1 185 | prefix: /afs/csail.mit.edu/u/m/mmd/.conda/envs/mimic_extract_py36 186 | -------------------------------------------------------------------------------- /resources/static_data_spec.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "static_patient_data", 3 | "title": "Static attributes of ICU patients extracted from MIMIC-III", 4 | "description": "Includes demographics and info about hospital/ICU admissions", 5 | "license": "The use of data is conditional on citing the original data sources.", 6 | "resources": [ 7 | { 8 | "name": "static_tabular_data", 9 | "path": "static_data.csv", 10 | "format": "csv", 11 | "mediatype": "text/csv", 12 | "profile": "tabular-data-resource", 13 | "$schema": "https://frictionlessdata.io/schemas/table-schema.json", 14 | "schema": { 15 | "fields": [ 16 | { 17 | "name": "subject_id", 18 | "description": "ID number for subject within PATIENTS table", 19 | "type": "integer", 20 | "constraints" : { 21 | "required" : true 22 | } 23 | }, 24 | { 25 | "name": "hadm_id", 26 | "description": "ID number for subject within ADMISSIONS table", 27 | "type": "integer", 28 | "constraints" : { 29 | "required" : true 30 | } 31 | }, 32 | { 33 | "name": "icustay_id", 34 | "description": "ID number for subject within ICUSTAYS table", 35 | "type": "integer", 36 | "constraints" : { 37 | "required" : true 38 | } 39 | }, 40 | { 41 | "name": "gender", 42 | "description": "Indicates subject gender. 'M'=male or 'F'=female.", 43 | "type": "string", 44 | "pandas_dtype": "category", 45 | "constraints" : { 46 | "required" : true 47 | } 48 | }, 49 | { 50 | "name": "ethnicity", 51 | "description": "Indicates subject ethnicity. Many human-readable categories/sub-categories.", 52 | "type": "string", 53 | "pandas_dtype": "category", 54 | "constraints" : { 55 | "required" : true 56 | } 57 | }, 58 | { 59 | "name": "age", 60 | "type": "number", 61 | "description": "Age of patient at admission (in fractional years)", 62 | "unit" : "year", 63 | "constraints" : { 64 | "required" : true 65 | } 66 | }, 67 | { 68 | "name": "insurance", 69 | "type": "string", 70 | "description": "Insurance type of patient at admission.", 71 | "unit" : "category", 72 | "constraints" : { 73 | "required" : true 74 | } 75 | }, 76 | { 77 | "name": "admittime", 78 | "type": "datetime", 79 | "description": "Datetime of subject's admission.", 80 | "constraints" : { 81 | "required" : true 82 | } 83 | }, 84 | { 85 | "name": "diagnosis_at_admission", 86 | "type": "string", 87 | "description": "The admitting physician's diagnosis for this patient (why were they admitted).", 88 | "constraints" : { 89 | "required" : true 90 | } 91 | }, 92 | { 93 | "name": "dischtime", 94 | "type": "datetime", 95 | "description": "Datetime of subject's discharge.", 96 | "constraints" : { 97 | "required" : true 98 | } 99 | }, 100 | { 101 | "name": "discharge_location", 102 | "type": "string", 103 | "description": "To where the patient was discharged.", 104 | "constraints" : { 105 | "required" : true 106 | } 107 | }, 108 | { 109 | "name": "fullcode_first", 110 | "type": "integer", 111 | "description": "Did the patient arrive with full code status?" 112 | }, 113 | { 114 | "name": "dnr_first", 115 | "type": "integer", 116 | "description": "Did the patient arrive with DNR status?" 117 | }, 118 | { 119 | "name": "fullcode", 120 | "type": "integer", 121 | "description": "Was the patient ever full-code?" 122 | }, 123 | { 124 | "name": "dnr", 125 | "type": "integer", 126 | "description": "Was the patient ever DNR?" 127 | }, 128 | { 129 | "name": "dnr_first_charttime", 130 | "type": "datetime", 131 | "description": "At what time was the patient transitioned to DNR?" 132 | }, 133 | { 134 | "name": "timecmo_chart", 135 | "type": "datetime", 136 | "description": "At what time was the patient transitioned to CMO?" 137 | }, 138 | { 139 | "name": "cmo_first", 140 | "type": "integer", 141 | "description": "Were comfort measures under order at the beginning of the stay?" 142 | }, 143 | { 144 | "name": "cmo_last", 145 | "type": "integer", 146 | "description": "Were comfort measures under order at the end of the stay?" 147 | }, 148 | { 149 | "name": "cmo", 150 | "type": "integer", 151 | "description": "Were comfort measures ever under order during the stay?" 152 | }, 153 | { 154 | "name": "deathtime", 155 | "type": "datetime", 156 | "description": "Datetime of subject's death. NaN if subject did not die.", 157 | "constraints" : { 158 | "required" : false 159 | } 160 | }, 161 | { 162 | "name": "intime", 163 | "type": "datetime", 164 | "description": "Datetime of subject's intake into ICU.", 165 | "constraints" : { 166 | "required" : true 167 | } 168 | }, 169 | { 170 | "name": "outtime", 171 | "type": "datetime", 172 | "description": "Datetime of subject's exit from ICU.", 173 | "constraints" : { 174 | "required" : true 175 | } 176 | }, 177 | { 178 | "name": "los_icu", 179 | "type": "number", 180 | "description": "Length-of-stay in the ICU in days.", 181 | "unit": "day", 182 | "constraints" : { 183 | "required" : true 184 | } 185 | }, 186 | { 187 | "name": "admission_type", 188 | "type": "string", 189 | "description": "Category of admission: {'ELECTIVE', 'EMERGENCY', 'URGENT'}.", 190 | "pandas_dtype": "category" 191 | }, 192 | { 193 | "name": "first_careunit", 194 | "type": "string", 195 | "description": "Category of hospital unit where first admitted: {'CCU', 'CSRU', 'MICU', 'SICU', 'TSICU'}", 196 | "pandas_dtype": "category" 197 | }, 198 | { 199 | "name": "mort_icu", 200 | "type": "integer", 201 | "description": "Indicates if subject died in ICU. 1 if died, 0 otherwise.", 202 | "pandas_dtype": "integer" 203 | }, 204 | { 205 | "name": "mort_hosp", 206 | "type": "integer", 207 | "description": "Indicates if subject died in hospital. 1 if died, 0 otherwise.", 208 | "pandas_dtype": "integer" 209 | }, 210 | { 211 | "name": "hospital_expire_flag", 212 | "type": "integer", 213 | "description": "TODO ???", 214 | "pandas_dtype": "integer" 215 | }, 216 | { 217 | "name": "hospstay_seq", 218 | "type": "integer", 219 | "description": "TODO ???", 220 | "pandas_dtype": "integer" 221 | }, 222 | { 223 | "name": "readmission_30", 224 | "type": "integer", 225 | "description": "Indicates if the patient will be readmitted to icu within 30 days. 1 if readmitted, 0 otherwise.", 226 | "pandas_dtype": "integer" 227 | } 228 | ], 229 | "missingValues": "nan", 230 | "primaryKey": ["subject_id", "hadm_id", "icustay_id"] 231 | } 232 | } 233 | ] 234 | } 235 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # **MIMIC-Extract**:A Data Extraction, Preprocessing, and Representation Pipeline for MIMIC-III 2 | 3 | # About 4 | This repo contains code for **MIMIC-Extract**. It has been divided into the following folders: 5 | * Data: Locally contains the data to be extracted. 6 | * Notebooks: Jupyter Notebooks demonstrating test cases and usage of output data in risk and intervention prediction tasks. 7 | * Resources: Consist of Rohit_itemid.txt which describes the correlation of MIMIC-III item ids with those of MIMIC II as used by Rohit; itemid_to_variable_map.csv which is the main file used in data extraction - consists of groupings of item ids as well as which item ids are ready to extract; variable_ranges.csv which describes the normal variable ranges for the levels assisting in extraction of proper data. It also contains expected schema of output tables. 8 | * Utils: scripts and detailed instructions for running **MIMIC-Extract** data pipeline. 9 | * `mimic_direct_extract.py`: extraction script. 10 | 11 | # Paper 12 | If you use this code in your research, please cite the following publication: 13 | 14 | ``` 15 | Shirly Wang, Matthew B. A. McDermott, Geeticka Chauhan, Michael C. Hughes, Tristan Naumann, 16 | and Marzyeh Ghassemi. MIMIC-Extract: A Data Extraction, Preprocessing, and Representation 17 | Pipeline for MIMIC-III. arXiv:1907.08322. 18 | ``` 19 | 20 | # Pre-processed Output 21 | If you simply wish to use the output of this pipeline in your own research, a preprocessed version with 22 | default parameters is available via gcp, 23 | [here](https://console.cloud.google.com/storage/browser/mimic_extract). 24 | 25 | To access this, you will need to be credentialed for MIMIC-III GCP access through physionet. Instructions for 26 | that are available [on physionet](https://mimic.physionet.org/gettingstarted/cloud/). 27 | 28 | This output is released on an as-is basis, with no guarantees, but if you find any issues with it please let 29 | us know via Github issues. 30 | 31 | # Step-by-step instructions 32 | The first several steps are the same here as above. These instructions are tested with mimic-code at version 33 | 762943eab64deb30bdb2abcf7db43602ccb25908 34 | 35 | ## Step 0: Required software and prereqs 36 | 37 | Your local system should have the following executables on the PATH: 38 | 39 | * conda 40 | * psql (PostgreSQL 9.4 or higher) 41 | * git 42 | * MIMIC-iii psql relational database (Refer to [MIT-LCP Repo](https://github.com/MIT-LCP/mimic-code)) 43 | 44 | All instructions below should be executed from a terminal, with current directory set to utils/ 45 | 46 | ## Step 1: Create conda environment 47 | 48 | Next, make a new conda environment from [mimic_extract_env_py36.yml](../mimic_extract_env_py36.yml) and 49 | activate that environment. 50 | 51 | ``` 52 | conda env create --force -f ../mimic_extract_env_py36.yml 53 | ``` 54 | 55 | This step will _report failure on the pip installation stage_. This is not the end of the world. Instead, 56 | simply activate the environment (which should work despite the former "failure"): 57 | 58 | ``` 59 | conda activate mimic_data_extraction 60 | ``` 61 | 62 | And then install any failed packages with pip (e.g., `pip install [package]`). This may include, in 63 | particular, packages: `datapackage`, `spacy`, and `scispacy`. 64 | You will also then need to install the english language model for spacy, via: 65 | `python -m spacy download en_core_web_sm` 66 | 67 | #### Expected Outcome 68 | 69 | The desired enviroment will be created and activated. 70 | 71 | #### Expected Resources 72 | 73 | Will typically take less than 5 minutes. 74 | Requires a good internet connection. 75 | 76 | ## Step 3: Build Views for Feature Extraction 77 | 78 | Materialized views in the MIMIC PostgreSQL database will be generated. 79 | This includes all concept tables in [MIT-LCP Repo](https://github.com/MIT-LCP/mimic-code) and tables for 80 | extracting non-mechanical ventilation, and injections of crystalloid bolus and colloid bolus. 81 | 82 | Note that you need to have schema edit permission on your postgres user to make concepts in this way. First, 83 | you must clone this github repository to a directory, which here we assume is stored in the environment 84 | variable `$MIMIC_CODE_DIR`. After cloning, follow these instructions: 85 | 86 | ``` 87 | cd $MIMIC_CODE_DIR/concepts 88 | psql -d mimic -f postgres-functions.sql 89 | bash postgres_make_concepts.sh 90 | ``` 91 | 92 | Next, you'll need to build 3 additional materialized views necessary for this pipeline. To do this (again with 93 | schema edit permission), navigate to `utils` and run `bash postgres_make_extended_concepts.sh` followed by 94 | `psql -d mimic -f niv-durations.sql`. 95 | 96 | ## Step 4: Set Cohort Selection and Extraction Criteria 97 | 98 | Next, navigate to the root directory of _this repository_, activate your conda environment and run 99 | `python mimic_direct_extract.py ...` with your args as desired. 100 | 101 | #### Expected Outcome 102 | 103 | The default setting will create an hdf5 file inside MIMIC_EXTRACT_OUTPUT_DIR with four tables: 104 | * **patients**: static demographics, static outcomes 105 | * One row per (subj_id,hadm_id,icustay_id) 106 | 107 | * **vitals_labs**: time-varying vitals and labs (hourly mean, count and standard deviation) 108 | * One row per (subj_id,hadm_id,icustay_id,hours_in) 109 | 110 | * **vitals_labs_mean**: time-varying vitals and labs (hourly mean only) 111 | * One row per (subj_id,hadm_id,icustay_id,hours_in) 112 | 113 | * **interventions**: hourly binary indicators for administered interventions 114 | * One row per (subj_id,hadm_id,icustay_id,hours_in) 115 | 116 | 117 | #### Expected Resources 118 | 119 | Will probably take 5-10 hours. 120 | Will require a good machine with at least 50GB RAM. 121 | 122 | #### Setting the population size 123 | 124 | By default, this step builds a dataset with all eligible patients. Sometimes, we wish to run with only a small subset of patients (debugging, etc.). 125 | 126 | To do this, just set the POP_SIZE environmental variable. For example, to build a curated dataset with only the first 1000 patients, we could do: 127 | 128 | 129 | # Common Errors / FAQ: 130 | 1. When running `mimic_direct_extract.py`, I encounter an error of the form: 131 | ``` 132 | psycopg2.OperationalError: could not connect to server: No such file or directory 133 | Is the server running locally and accepting 134 | connections on Unix domain socket "/tmp/.s.PGSQL.5432"? 135 | ``` 136 | or 137 | ``` 138 | psycopg2.OperationalError: could not connect to server: No such file or directory 139 | Is the server running locally and accepting 140 | connections on Unix domain socket "/var/run/postgresql/..."? 141 | ``` 142 | For this issue, see [this stackoverflow 143 | post](https://stackoverflow.com/questions/5500332/cant-connect-the-postgresql-with-psycopg2) and use our 144 | `--psql_host` argument, which you can pass either directly when calling `mimic_direct_extract.py` or use 145 | via the Makefile instructions by setting the `HOST` environment variable. 146 | 2. `relation "code_status" does not exist` 147 | In this error, the table `code_status` hasn't been built successfully, and you'll need to rebuild your 148 | MIMIC-III concepts. Instructions for this can be found in Step 3 of either instruction set. Also see 149 | below for our issues specific to building concepts. 150 | 151 | ## Common Errors with Building Concepts 152 | 1. When I built concepts, the system complained it didn't have permissions to edit schema mimiciii. This 153 | error indicates that your default psql user doesn't have authority to build concepts. You need to login 154 | as a higher authority postgres user to and have it run the commands. This is common in setups where 155 | multiple users have read-only access to postgres at once. If you do this, you may need to take extra 156 | steps to expose the resulting concepts tables to other users. 157 | 2. I built concepts, but now the code can't see them. This can be for a few reasons - firslty, you may not 158 | have permissions to read the new tables, and secondly, they may be in the wrong namespace. Our code 159 | expects them to be fully visible and within the mimiciii namespace. To adjust these properties, login as 160 | the owning postgres user and adjust the permissions and namespaces of those views manually. A few 161 | commands that are relevant are: 162 | * `ALTER TABLE code_status SET SCHEMA mimiciii;` 163 | * `GRANT SELECT ON mimiciii.code_status TO [USER];` 164 | Note that you'll need to run these on _every_ concepts table accessed by the script. 165 | -------------------------------------------------------------------------------- /heuristic_sentence_splitter.py: -------------------------------------------------------------------------------- 1 | # Original source taken from https://github.com/wboag/mimic-tokenize/blob/master/heuristic-tokenize.py at 2 | # commit e953d271bbb4c53aee5cc9a7b8be870a6b007604 3 | 4 | import re, nltk 5 | 6 | def is_inline_title(text): 7 | m = re.search('^([a-zA-Z ]+:) ', text) 8 | if not m: return False 9 | return is_title(m.groups()[0]) 10 | 11 | stopwords = set(['of', 'on', 'or']) 12 | def is_title(text): 13 | if not text.endswith(':'): return False 14 | text = text[:-1] 15 | 16 | # be a little loose here... can tighten if it causes errors 17 | text = re.sub('(\([^\)]*?\))', '', text) 18 | 19 | # Are all non-stopwords capitalized? 20 | for word in text.split(): 21 | if word in stopwords: continue 22 | if not word[0].isupper(): return False 23 | 24 | # I noticed this is a common issue (non-title aapears at beginning of line) 25 | if text == 'Disp': return False 26 | 27 | # optionally: could assert that it is less than 6 tokens 28 | return True 29 | 30 | 31 | def sent_tokenize_rules(text): 32 | 33 | # long sections are OBVIOUSLY different sentences 34 | text = re.sub('---+', '\n\n-----\n\n', text) 35 | text = re.sub('___+', '\n\n_____\n\n', text) 36 | text = re.sub('\n\n+', '\n\n', text) 37 | 38 | segments = text.split('\n\n') 39 | 40 | # strategy: break down segments and chip away structure until just prose. 41 | # once you have prose, use nltk.sent_tokenize() 42 | 43 | ### Separate section headers ### 44 | new_segments = [] 45 | 46 | # deal with this one edge case (multiple headers per line) up front 47 | m1 = re.match('(Admission Date:) (.*) (Discharge Date:) (.*)', segments[0]) 48 | if m1: 49 | new_segments += list(map(lambda s: s.strip(), m1.groups())) 50 | segments = segments[1:] 51 | 52 | m2 = re.match('(Date of Birth:) (.*) (Sex:) (.*)' , segments[0]) 53 | if m2: 54 | new_segments += list(map(lambda s: s.strip(), m2.groups())) 55 | segments = segments[1:] 56 | 57 | for segment in segments: 58 | # find all section headers 59 | possible_headers = re.findall('\n([A-Z][^\n:]+:)', '\n'+segment) 60 | #assert len(possible_headers) < 2, str(possible_headers) 61 | headers = [] 62 | for h in possible_headers: 63 | #print 'cand=[%s]' % h 64 | if is_title(h.strip()): 65 | #print '\tYES=[%s]' % h 66 | headers.append(h.strip()) 67 | 68 | # split text into new segments, delimiting on these headers 69 | for h in headers: 70 | h = h.strip() 71 | 72 | # split this segment into 3 smaller segments 73 | ind = segment.index(h) 74 | prefix = segment[:ind].strip() 75 | rest = segment[ ind+len(h):].strip() 76 | 77 | # add the prefix (potentially empty) 78 | if len(prefix) > 0: 79 | new_segments.append(prefix.strip()) 80 | 81 | # add the header 82 | new_segments.append(h) 83 | 84 | # remove the prefix from processing (very unlikely to be empty) 85 | segment = rest.strip() 86 | 87 | # add the final piece (aka what comes after all headers are processed) 88 | if len(segment) > 0: 89 | new_segments.append(segment.strip()) 90 | 91 | # copy over the new list of segments (further segmented than original segments) 92 | segments = list(new_segments) 93 | new_segments = [] 94 | 95 | 96 | ### Low-hanging fruit: "_____" is a delimiter 97 | for segment in segments: 98 | subsections = segment.split('\n_____\n') 99 | new_segments.append(subsections[0]) 100 | for ss in subsections[1:]: 101 | new_segments.append('_____') 102 | new_segments.append(ss) 103 | 104 | segments = list(new_segments) 105 | new_segments = [] 106 | 107 | 108 | ### Low-hanging fruit: "-----" is a delimiter 109 | for segment in segments: 110 | subsections = segment.split('\n-----\n') 111 | new_segments.append(subsections[0]) 112 | for ss in subsections[1:]: 113 | new_segments.append('-----') 114 | new_segments.append(ss) 115 | 116 | segments = list(new_segments) 117 | new_segments = [] 118 | 119 | ''' 120 | for segment in segments: 121 | print '------------START------------' 122 | print segment 123 | print '-------------END-------------' 124 | print 125 | exit() 126 | ''' 127 | 128 | ### Separate enumerated lists ### 129 | for segment in segments: 130 | old_len = len(new_segments) 131 | if not re.search('\n\s*\d+\.', '\n'+segment): 132 | new_segments.append(segment) 133 | continue 134 | 135 | #print '------------START------------' 136 | #print segment 137 | #print '-------------END-------------' 138 | #print 139 | 140 | # generalizes in case the list STARTS this section 141 | segment = '\n'+segment 142 | 143 | # determine whether this segment contains a bulleted list (assumes i,i+1,...,n) 144 | start = int(re.search('\n\s*(\d+)\.', segment).groups()[0]) 145 | n = start 146 | while re.search('\n\s*%d\.'%n,segment): 147 | n += 1 148 | n -= 1 149 | 150 | # no bulleted list 151 | if n < 1 or (n - start) == 0: 152 | new_segments.append(segment) 153 | continue 154 | 155 | #print '------------START------------' 156 | #print segment 157 | #print '-------------END-------------' 158 | #print start,n 159 | #print 160 | 161 | # break each list into its own line 162 | # challenge: not clear how to tell when the list ends if more text happens next 163 | for i in range(start,n+1): 164 | matching_text = re.search('(\n\s*\d+\.)',segment).groups()[0] 165 | prefix = segment[:segment.index(matching_text) ].strip() 166 | segment = segment[ segment.index(matching_text):].strip() 167 | 168 | if len(prefix)>0: 169 | new_segments.append(prefix) 170 | 171 | if len(segment)>0: 172 | new_segments.append(segment) 173 | 174 | 175 | #print 'Out Segments:' 176 | #for out_segment in new_segments[old_len:]: 177 | # print '------------START------------' 178 | # print out_segment 179 | # print '-------------END-------------' 180 | #print('\n\n') 181 | 182 | segments = list(new_segments) 183 | new_segments = [] 184 | 185 | ''' 186 | TODO: Big Challenge 187 | There is so much variation in what makes a list. Intuitively, I can tell it's a 188 | list because it shows repeated structure (often following a header) 189 | Examples of some lists (with numbers & symptoms changed around to noise) 190 | Past Medical History: 191 | -- Hyperlipidemia 192 | -- lactose intolerance 193 | -- Hypertension 194 | Physical Exam: 195 | Vitals - T 82.2 BP 123/23 HR 73 R 21 75% on 2L NC 196 | General - well appearing male, sitting up in chair in NAD 197 | Neck - supple, JVP elevated to angle of jaw 198 | CV - distant heart sounds, RRR, faint __PHI_43__ murmur at 199 | Labs: 200 | __PHI_10__ 12:00PM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8* 201 | MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888 202 | __PHI_14__ 04:54AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8* 203 | MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888 204 | __PHI_23__ 03:33AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8* 205 | MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888 206 | __PHI_109__ 03:06AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8* 207 | MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888 208 | __PHI_1__ 05:09AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8* 209 | MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888 210 | __PHI_26__ 04:53AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8* 211 | MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888 212 | __PHI_301__ 05:30AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8* 213 | MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888 214 | Medications on Admission: 215 | Allopurinol 100 mg DAILY 216 | Aspirin 250 mg DAILY 217 | Atorvastatin 10 mg DAILY 218 | Glimepiride 1 mg once a week. 219 | Hexavitamin DAILY 220 | Lasix 50mg M-W-F; 60mg T-Th-Sat-Sun 221 | Metoprolol 12.5mg TID 222 | Prilosec OTC 20 mg once a day 223 | Verapamil 120 mg SR DAILY 224 | ''' 225 | 226 | ### Remove lines with inline titles from larger segments (clearly nonprose) 227 | for segment in segments: 228 | ''' 229 | With: __PHI_6__, MD __PHI_5__ 230 | Building: De __PHI_45__ Building (__PHI_32__ Complex) __PHI_87__ 231 | Campus: WEST 232 | ''' 233 | 234 | lines = segment.split('\n') 235 | 236 | buf = [] 237 | for line in lines: 238 | if is_inline_title(line): 239 | if len(buf) > 0: new_segments.append('\n'.join(buf)) 240 | buf = [] 241 | buf.append(line) 242 | if len(buf) > 0: 243 | new_segments.append('\n'.join(buf)) 244 | 245 | segments = list(new_segments) 246 | new_segments = [] 247 | 248 | # Going to put one-liner answers with their sections 249 | # (aka A A' B B' C D D' --> AA' BB' C DD' ) 250 | N = len(segments) 251 | for i in range(N): 252 | # avoid segfaults 253 | if i==0: 254 | new_segments.append(segments[i]) 255 | continue 256 | if segments[i].count('\n') == 0 and is_title(segments[i-1]) and not is_title(segments[i]): 257 | if (i == N-1) or is_title(segments[i+1]): 258 | new_segments = new_segments[:-1] 259 | new_segments.append(segments[i-1] + ' ' + segments[i]) 260 | else: new_segments.append(segments[i]) 261 | else: 262 | new_segments.append(segments[i]) 263 | 264 | segments = list(new_segments) 265 | new_segments = [] 266 | 267 | ''' 268 | Should do some kind of regex to find "TEST: value" in segments? 269 | Indication: Source of embolism. 270 | BP (mm Hg): 145/89 271 | HR (bpm): 80 272 | Note: I made a temporary hack that fixes this particular problem. 273 | We'll see how it shakes out 274 | ''' 275 | 276 | 277 | ''' 278 | Separate ALL CAPS lines (Warning... is there ever prose that can be all caps?) 279 | ''' 280 | 281 | 282 | 283 | ''' 284 | for segment in segments: 285 | print '------------START------------' 286 | print segment 287 | print '-------------END-------------' 288 | print 289 | exit() 290 | ''' 291 | 292 | return segments 293 | -------------------------------------------------------------------------------- /notebooks/mmd_grud_utils.py: -------------------------------------------------------------------------------- 1 | import copy, math, os, pickle, time, pandas as pd, numpy as np, scipy.stats as ss 2 | 3 | from sklearn.linear_model import LogisticRegression 4 | from sklearn.ensemble import RandomForestClassifier 5 | from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score 6 | 7 | import torch, torch.utils.data as utils, torch.nn as nn, torch.nn.functional as F, torch.optim as optim 8 | from torch.autograd import Variable 9 | from torch.nn.parameter import Parameter 10 | 11 | def to_3D_tensor(df): 12 | idx = pd.IndexSlice 13 | return np.dstack((df.loc[idx[:,:,:,i], :].values for i in sorted(set(df.index.get_level_values('hours_in'))))) 14 | def prepare_dataloader(df, Ys, batch_size, shuffle=True): 15 | """ 16 | dfs = (df_train, df_dev, df_test). 17 | df_* = (subject, hadm, icustay, hours_in) X (level2, agg fn \ni {mask, mean, time}) 18 | Ys_series = (subject, hadm, icustay) => label. 19 | """ 20 | X = torch.from_numpy(to_3D_tensor(df).astype(np.float32)) 21 | label = torch.from_numpy(Ys.values.astype(np.int64)) 22 | dataset = utils.TensorDataset(X, label) 23 | 24 | return utils.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last = True) 25 | 26 | class FilterLinear(nn.Module): 27 | def __init__(self, in_features, out_features, filter_square_matrix, bias=True): 28 | ''' 29 | filter_square_matrix : filter square matrix, whose each elements is 0 or 1. 30 | ''' 31 | super(FilterLinear, self).__init__() 32 | self.in_features = in_features 33 | self.out_features = out_features 34 | 35 | assert in_features > 1 and out_features > 1, "Passing in nonsense sizes" 36 | 37 | use_gpu = torch.cuda.is_available() 38 | self.filter_square_matrix = None 39 | if use_gpu: self.filter_square_matrix = Variable(filter_square_matrix.cuda(), requires_grad=False) 40 | else: self.filter_square_matrix = Variable(filter_square_matrix, requires_grad=False) 41 | 42 | self.weight = Parameter(torch.Tensor(out_features, in_features)) 43 | 44 | if bias: self.bias = Parameter(torch.Tensor(out_features)) 45 | else: self.register_parameter('bias', None) 46 | self.reset_parameters() 47 | 48 | def reset_parameters(self): 49 | stdv = 1. / math.sqrt(self.weight.size(1)) 50 | self.weight.data.uniform_(-stdv, stdv) 51 | if self.bias is not None: self.bias.data.uniform_(-stdv, stdv) 52 | 53 | def forward(self, x): 54 | return F.linear( 55 | x, 56 | self.filter_square_matrix.mul(self.weight), 57 | self.bias 58 | ) 59 | 60 | def __repr__(self): 61 | return self.__class__.__name__ + '(' \ 62 | + 'in_features=' + str(self.in_features) \ 63 | + ', out_features=' + str(self.out_features) \ 64 | + ', bias=' + str(self.bias is not None) + ')' 65 | 66 | class GRUD(nn.Module): 67 | def __init__(self, input_size, cell_size, hidden_size, X_mean, batch_size = 0, output_last = False): 68 | """ 69 | With minor modifications from https://github.com/zhiyongc/GRU-D/ 70 | 71 | Recurrent Neural Networks for Multivariate Times Series with Missing Values 72 | GRU-D: GRU exploit two representations of informative missingness patterns, i.e., masking and time interval. 73 | cell_size is the size of cell_state. 74 | 75 | Implemented based on the paper: 76 | @article{che2018recurrent, 77 | title={Recurrent neural networks for multivariate time series with missing values}, 78 | author={Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan}, 79 | journal={Scientific reports}, 80 | volume={8}, 81 | number={1}, 82 | pages={6085}, 83 | year={2018}, 84 | publisher={Nature Publishing Group} 85 | } 86 | 87 | GRU-D: 88 | input_size: variable dimension of each time 89 | hidden_size: dimension of hidden_state 90 | mask_size: dimension of masking vector 91 | X_mean: the mean of the historical input data 92 | """ 93 | 94 | super(GRUD, self).__init__() 95 | 96 | self.hidden_size = hidden_size 97 | self.delta_size = input_size 98 | self.mask_size = input_size 99 | 100 | use_gpu = torch.cuda.is_available() 101 | if use_gpu: 102 | self.identity = torch.eye(input_size).cuda() 103 | self.zeros = Variable(torch.zeros(batch_size, input_size).cuda()) 104 | self.zeros_h = Variable(torch.zeros(batch_size, self.hidden_size).cuda()) 105 | self.X_mean = Variable(torch.Tensor(X_mean).cuda()) 106 | else: 107 | self.identity = torch.eye(input_size) 108 | self.zeros = Variable(torch.zeros(batch_size, input_size)) 109 | self.zeros_h = Variable(torch.zeros(batch_size, self.hidden_size)) 110 | self.X_mean = Variable(torch.Tensor(X_mean)) 111 | 112 | self.zl = nn.Linear(input_size + hidden_size + self.mask_size, hidden_size) # Wz, Uz are part of the same network. the bias is bz 113 | self.rl = nn.Linear(input_size + hidden_size + self.mask_size, hidden_size) # Wr, Ur are part of the same network. the bias is br 114 | self.hl = nn.Linear(input_size + hidden_size + self.mask_size, hidden_size) # W, U are part of the same network. the bias is b 115 | 116 | self.gamma_x_l = FilterLinear(self.delta_size, self.delta_size, self.identity) 117 | 118 | self.gamma_h_l = nn.Linear(self.delta_size, self.hidden_size) # this was wrong in available version. remember to raise the issue 119 | 120 | self.output_last = output_last 121 | 122 | self.fc = nn.Linear(self.hidden_size, 2) 123 | self.bn= torch.nn.BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True) 124 | self.drop=nn.Dropout(p=0.5, inplace=False) 125 | 126 | def step(self, x, x_last_obsv, x_mean, h, mask, delta): 127 | """ 128 | Inputs: 129 | x: input tensor 130 | x_last_obsv: input tensor with forward fill applied 131 | x_mean: the mean of each feature 132 | h: the hidden state of the network 133 | mask: the mask of whether or not the current value is observed 134 | delta: the tensor indicating the number of steps since the last time a feature was observed. 135 | 136 | Returns: 137 | h: the updated hidden state of the network 138 | """ 139 | 140 | batch_size = x.size()[0] 141 | dim_size = x.size()[1] 142 | 143 | gamma_x_l_delta = self.gamma_x_l(delta) 144 | delta_x = torch.exp(-torch.max(self.zeros, gamma_x_l_delta)) #exponentiated negative rectifier 145 | 146 | gamma_h_l_delta = self.gamma_h_l(delta) 147 | delta_h = torch.exp(-torch.max(self.zeros_h, gamma_h_l_delta)) #self.zeros became self.zeros_h to accomodate hidden size != input size 148 | 149 | x_mean = x_mean.repeat(batch_size, 1) 150 | 151 | x = mask * x + (1 - mask) * (delta_x * x_last_obsv + (1 - delta_x) * x_mean) 152 | h = delta_h * h 153 | 154 | combined = torch.cat((x, h, mask), 1) 155 | z = torch.sigmoid(self.zl(combined)) #sigmoid(W_z*x_t + U_z*h_{t-1} + V_z*m_t + bz) 156 | r = torch.sigmoid(self.rl(combined)) #sigmoid(W_r*x_t + U_r*h_{t-1} + V_r*m_t + br) 157 | combined_new = torch.cat((x, r*h, mask), 1) 158 | h_tilde = torch.tanh(self.hl(combined_new)) #tanh(W*x_t +U(r_t*h_{t-1}) + V*m_t) + b 159 | h = (1 - z) * h + z * h_tilde 160 | 161 | return h 162 | 163 | def forward(self, X, X_last_obsv, Mask, Delta): 164 | batch_size = X.size(0) 165 | # type_size = input.size(1) 166 | step_size = X.size(1) # num timepoints 167 | spatial_size = X.size(2) # num features 168 | 169 | Hidden_State = self.initHidden(batch_size) 170 | # X = torch.squeeze(input[:,0,:,:]) 171 | # X_last_obsv = torch.squeeze(input[:,1,:,:]) 172 | # Mask = torch.squeeze(input[:,2,:,:]) 173 | # Delta = torch.squeeze(input[:,3,:,:]) 174 | 175 | outputs = None 176 | for i in range(step_size): 177 | Hidden_State = self.step( 178 | torch.squeeze(X[:,i:i+1,:], 1), 179 | torch.squeeze(X_last_obsv[:,i:i+1,:], 1), 180 | torch.squeeze(self.X_mean[:,i:i+1,:], 1), 181 | Hidden_State, 182 | torch.squeeze(Mask[:,i:i+1,:], 1), 183 | torch.squeeze(Delta[:,i:i+1,:], 1), 184 | ) 185 | if outputs is None: 186 | outputs = Hidden_State.unsqueeze(1) 187 | else: 188 | outputs = torch.cat((Hidden_State.unsqueeze(1), outputs), 1) 189 | 190 | # we want to predict a binary outcome 191 | #Apply 50% dropout and batch norm here 192 | self.drop(self.bn(self.fc(Hidden_State))) 193 | return self.drop(self.bn(self.fc(Hidden_State))) 194 | 195 | # if self.output_last: 196 | # return outputs[:,-1,:] 197 | # else: 198 | # return outputs 199 | 200 | def initHidden(self, batch_size): 201 | use_gpu = torch.cuda.is_available() 202 | if use_gpu: 203 | Hidden_State = Variable(torch.zeros(batch_size, self.hidden_size).cuda()) 204 | return Hidden_State 205 | else: 206 | Hidden_State = Variable(torch.zeros(batch_size, self.hidden_size)) 207 | return Hidden_State 208 | 209 | 210 | def Train_Model( 211 | model, train_dataloader, valid_dataloader, num_epochs = 300, patience = 3, min_delta = 1e-5, learning_rate=1e-3, batch_size=None 212 | ): 213 | 214 | print('Model Structure: ', model) 215 | print('Start Training ... ') 216 | 217 | model 218 | 219 | if (type(model) == nn.modules.container.Sequential): 220 | output_last = model[-1].output_last 221 | print('Output type dermined by the last layer') 222 | else: 223 | output_last = model.output_last 224 | print('Output type dermined by the model') 225 | 226 | loss_MSE = torch.nn.MSELoss() 227 | loss_nll=torch.nn.NLLLoss() 228 | loss_CEL=torch.nn.CrossEntropyLoss() 229 | loss_L1 = torch.nn.L1Loss() 230 | 231 | # optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate, alpha=0.99) 232 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 233 | use_gpu = False#torch.cuda.is_available() 234 | 235 | interval = 100 236 | losses_train = [] 237 | losses_valid = [] 238 | losses_epochs_train = [] 239 | losses_epochs_valid = [] 240 | 241 | cur_time = time.time() 242 | pre_time = time.time() 243 | 244 | # Variables for Early Stopping 245 | is_best_model = 0 246 | patient_epoch = 0 247 | for epoch in range(num_epochs): 248 | 249 | trained_number = 0 250 | 251 | valid_dataloader_iter = iter(valid_dataloader) 252 | 253 | losses_epoch_train = [] 254 | losses_epoch_valid = [] 255 | 256 | for X, labels in train_dataloader: 257 | X = X.numpy() 258 | mask = torch.from_numpy(X[:, np.arange(0, X.shape[1], 3), :].astype(np.float32)) 259 | measurement = torch.from_numpy(X[:, np.arange(1, X.shape[1], 3), :].astype(np.float32)) 260 | time_ = torch.from_numpy(X[:, np.arange(2, X.shape[1], 3), :].astype(np.float32)) 261 | 262 | mask = torch.transpose(mask, 1, 2) 263 | measurement = torch.transpose(measurement, 1, 2) 264 | time_ = torch.transpose(time_, 1, 2) 265 | measurement_last_obsv = measurement 266 | 267 | assert measurement.size()[0] == batch_size, "Batch Size doesn't match! %s" % str(measurement.size()) 268 | 269 | if use_gpu: 270 | convert_to_cuda=lambda x: Variable(x.cuda()) 271 | X, X_last_obsv, Mask, Delta, labels = map(convert_to_cuda, [measurement, measurement_last_obsv, mask, time_, labels]) 272 | else: 273 | # inputs, labels = Variable(inputs), Variable(labels) 274 | convert_to_tensor=lambda x: Variable(x) 275 | X, X_last_obsv, Mask, Delta, labels = map(convert_to_tensor, [measurement, measurement_last_obsv, mask, time_, labels]) 276 | 277 | model.zero_grad() 278 | 279 | # outputs = model(inputs) 280 | prediction=model(X, X_last_obsv, Mask, Delta) 281 | 282 | # print(torch.sum(torch.sum(torch.isnan(prediction)))) 283 | 284 | # print(labels.shape) 285 | # print(prediction.shape) 286 | 287 | if output_last: 288 | loss_train = loss_CEL(torch.squeeze(prediction), torch.squeeze(labels)) 289 | else: 290 | full_labels = torch.cat((inputs[:,1:,:], labels), dim = 1) 291 | loss_train = loss_MSE(outputs, full_labels) 292 | 293 | losses_train.append(loss_train.data) 294 | losses_epoch_train.append(loss_train.data) 295 | 296 | optimizer.zero_grad() 297 | 298 | loss_train.backward() 299 | 300 | optimizer.step() 301 | 302 | # validation 303 | try: 304 | X_val, labels_val = next(valid_dataloader_iter) 305 | X_val = X_val.numpy() 306 | mask_val = torch.from_numpy(X_val[:, np.arange(0, X_val.shape[1], 3), :].astype(np.float32)) 307 | measurement_val = torch.from_numpy(X_val[:, np.arange(1, X_val.shape[1], 3), :].astype(np.float32)) 308 | time_val = torch.from_numpy(X_val[:, np.arange(2, X_val.shape[1], 3), :].astype(np.float32)) 309 | 310 | mask_val = torch.transpose(mask_val, 1, 2) 311 | measurement_val = torch.transpose(measurement_val, 1, 2) 312 | time_val = torch.transpose(time_val, 1, 2) 313 | measurement_last_obsv_val = measurement_val 314 | except StopIteration: 315 | valid_dataloader_iter = iter(valid_dataloader) 316 | X_val, labels_val = next(valid_dataloader_iter) 317 | X_val = X_val.numpy() 318 | mask_val = torch.from_numpy(X_val[:, np.arange(0, X_val.shape[1], 3), :].astype(np.float32)) 319 | measurement_val = torch.from_numpy(X_val[:, np.arange(1, X_val.shape[1], 3), :].astype(np.float32)) 320 | time_val = torch.from_numpy(X_val[:, np.arange(2, X_val.shape[1], 3), :].astype(np.float32)) 321 | 322 | mask_val = torch.transpose(mask_val, 1, 2) 323 | measurement_val = torch.transpose(measurement_val, 1, 2) 324 | time_val = torch.transpose(time_val, 1, 2) 325 | measurement_last_obsv_val = measurement_val 326 | 327 | if use_gpu: 328 | convert_to_cuda=lambda x: Variable(x.cuda()) 329 | X_val, X_last_obsv_val, Mask_val, Delta_val, labels_val = map(convert_to_cuda, [measurement_val, measurement_last_obsv_val, mask_val, time_val, labels_val]) 330 | else: 331 | # inputs, labels = Variable(inputs), Variable(labels) 332 | convert_to_tensor=lambda x: Variable(x) 333 | X_val, X_last_obsv_val, Mask_val, Delta_val, labels_val = map(convert_to_tensor, [measurement_val, measurement_last_obsv_val, mask_val, time_val, labels_val]) 334 | 335 | 336 | model.zero_grad() 337 | 338 | # outputs_val = model(inputs_val) 339 | prediction_val = model(X_val, X_last_obsv_val, Mask_val, Delta_val) 340 | 341 | # print(labels.shape) 342 | # print(prediction_val.shape) 343 | 344 | if output_last: 345 | loss_valid =loss_CEL(torch.squeeze(prediction_val), torch.squeeze(labels_val)) 346 | else: 347 | raise NotImplementedError("Should be output last!") 348 | full_labels_val = torch.cat((inputs_val[:,1:,:], labels_val), dim = 1) 349 | loss_valid = loss_MSE(outputs_val, full_labels_val) 350 | 351 | losses_valid.append(loss_valid.data) 352 | losses_epoch_valid.append(loss_valid.data) 353 | 354 | # print(sklearn.metrics.roc_auc_score(labels_val.detach().cpu().numpy(), prediction_val.detach().cpu().numpy()[:,1])) 355 | 356 | # output 357 | trained_number += 1 358 | 359 | avg_losses_epoch_train = sum(losses_epoch_train).cpu().numpy() / float(len(losses_epoch_train)) 360 | avg_losses_epoch_valid = sum(losses_epoch_valid).cpu().numpy() / float(len(losses_epoch_valid)) 361 | losses_epochs_train.append(avg_losses_epoch_train) 362 | losses_epochs_valid.append(avg_losses_epoch_valid) 363 | 364 | 365 | # Early Stopping 366 | if epoch == 0: 367 | is_best_model = 1 368 | best_model = model 369 | min_loss_epoch_valid = 10000.0 370 | if avg_losses_epoch_valid < min_loss_epoch_valid: 371 | min_loss_epoch_valid = avg_losses_epoch_valid 372 | else: 373 | if min_loss_epoch_valid - avg_losses_epoch_valid > min_delta: 374 | is_best_model = 1 375 | best_model = model 376 | min_loss_epoch_valid = avg_losses_epoch_valid 377 | patient_epoch = 0 378 | else: 379 | is_best_model = 0 380 | patient_epoch += 1 381 | if patient_epoch >= patience: 382 | print('Early Stopped at Epoch:', epoch) 383 | break 384 | 385 | # Print training parameters 386 | cur_time = time.time() 387 | print('Epoch: {}, train_loss: {}, valid_loss: {}, time: {}, best model: {}'.format( \ 388 | epoch, \ 389 | np.around(avg_losses_epoch_train, decimals=8),\ 390 | np.around(avg_losses_epoch_valid, decimals=8),\ 391 | np.around([cur_time - pre_time] , decimals=2),\ 392 | is_best_model) ) 393 | pre_time = cur_time 394 | # if epoch==1: 395 | # break 396 | 397 | return best_model, [losses_train, losses_valid, losses_epochs_train, losses_epochs_valid] 398 | 399 | def predict_proba(model, dataloader): 400 | """ 401 | Input: 402 | model: GRU-D model 403 | test_dataloader: containing batches of measurement, measurement_last_obsv, mask, time_, labels 404 | Returns: 405 | predictions: size[num_samples, 2] 406 | labels: size[num_samples] 407 | """ 408 | model.eval() 409 | use_gpu = False# torch.cuda.is_available() 410 | 411 | probabilities = [] 412 | labels = [] 413 | ethnicities = [] 414 | genders = [] 415 | for X, label in dataloader: 416 | X = X.numpy() 417 | mask = torch.from_numpy(X[:, np.arange(0, X.shape[1], 3), :].astype(np.float32)) 418 | measurement = torch.from_numpy(X[:, np.arange(1, X.shape[1], 3), :].astype(np.float32)) 419 | time_ = torch.from_numpy(X[:, np.arange(2, X.shape[1], 3), :].astype(np.float32)) 420 | 421 | mask = torch.transpose(mask, 1, 2) 422 | measurement = torch.transpose(measurement, 1, 2) 423 | time_ = torch.transpose(time_, 1, 2) 424 | measurement_last_obsv = measurement 425 | 426 | if use_gpu: 427 | convert_to_cuda=lambda x: Variable(x.cuda()) 428 | X, X_last_obsv, Mask, Delta, label = map(convert_to_cuda, [measurement, measurement_last_obsv, mask, time_, label]) 429 | else: 430 | # inputs, labels = Variable(inputs), Variable(labels) 431 | convert_to_tensor=lambda x: Variable(x) 432 | X, X_last_obsv, Mask, Delta, label = map(convert_to_tensor, [measurement, measurement_last_obsv, mask, time_, label]) 433 | 434 | 435 | prob = model(X, X_last_obsv, Mask, Delta) 436 | 437 | probabilities.append(prob.detach().cpu().data.numpy()) 438 | labels.append(label.detach().cpu().data.numpy()) 439 | 440 | return probabilities, labels 441 | -------------------------------------------------------------------------------- /resources/item_id_stat.csv: -------------------------------------------------------------------------------- 1 | itemid,label,LEVEL1,LEVEL2,mean,stdev,missing percent 2 | 211,heart rate,heart rate,heart rate,85.11720532842587,17.145789289466023,48.41582332025113 3 | 618,respiratory rate,respiratory rate,respiratory rate,19.356590679247063,5.721490437680727,49.91458249468185 4 | 646,spo2,pulse oximetry,oxygen saturation,97.01037398666034,3.4533884699224116,51.633746093739354 5 | 220045,heart rate,heart rate,heart rate,84.77979402854999,17.43969038021443,61.99548014179306 6 | 220210,respiratory rate,respiratory rate,respiratory rate,19.24884122911674,5.520934157300327,62.25504940130507 7 | 220277,o2 saturation pulseoxymetry,pulse oximetry,oxygen saturation,96.6730422133314,3.184367241856636,63.02507912477952 8 | 455,nbp [systolic],systolic blood pressure (noninvasive),systolic blood pressure,121.23039763958806,22.036006580520866,71.97837846679212 9 | 8441,nbp [diastolic],diastolic blood pressure (noninvasive),diastolic blood pressure,59.00310847406039,14.830207043261264,72.00423089260384 10 | 456,nbp mean,mean blood pressure (noninvasive),mean blood pressure,78.27484762141329,14.700985120201974,72.43122754950808 11 | 51,arterial bp [systolic],systolic blood pressure (arterial),systolic blood pressure,122.45984223083545,24.380945746102867,74.59578891698781 12 | 8368,arterial bp [diastolic],diastolic blood pressure (arterial),diastolic blood pressure,60.19914091791762,13.450017343017635,74.59724283197195 13 | 52,arterial bp mean,mean blood pressure (arterial),mean blood pressure,81.53031031841128,16.761669175274545,74.79583853183665 14 | 220181,non invasive blood pressure mean,mean blood pressure (noninvasive),mean blood pressure,77.18891678098602,15.008928988881522,74.9390491577743 15 | 220179,non invasive blood pressure systolic,systolic blood pressure (noninvasive),systolic blood pressure,121.05626381891265,21.445186669208272,75.103523290355 16 | 220180,non invasive blood pressure diastolic,diastolic blood pressure (noninvasive),diastolic blood pressure,63.59508798924918,14.965460189200936,75.11460939210906 17 | 198,gcs total,glascow coma scale total,glascow coma scale total,12.491195240149807,3.5862187630327247,82.83530687147483 18 | 113,cvp,central venous pressure,central venous pressure,10.632580330008674,5.414234986670563,86.38276856308673 19 | 220052,arterial blood pressure mean,mean blood pressure (arterial),mean blood pressure,81.1309772550919,18.640504934574423,86.91499231696801 20 | 220050,arterial blood pressure systolic,systolic blood pressure (arterial),systolic blood pressure,121.49062275107978,22.30167277954901,86.9640619476827 21 | 220051,arterial blood pressure diastolic,diastolic blood pressure (arterial),diastolic blood pressure,60.3850337967619,13.42507863132073,86.9679239093593 22 | 678,temperature f,temperature (f),temperature,36.94417537612146,0.7951733965056856,87.36275269723947 23 | 677,temperature c (calc),temperature (c),temperature,36.944185993850084,0.7951668078228821,87.36288900176923 24 | 223761,temperature fahrenheit,temperature (f),temperature,36.8494655237953,0.7444853516121963,90.31997034013432 25 | 492,pap [systolic],pulmonary artery pressure systolic,pulmonary artery pressure systolic,38.42374784657562,12.589739189239921,92.71520440681633 26 | 811,glucose (70-105),glucose,glucose,135.827665056469,53.77577466184741,93.23734162549513 27 | 190,fio2 set,fraction inspired oxygen set,fraction inspired oxygen set,0.5250663193567437,0.18214605265781275,93.51322199373544 28 | 807,fingerstick glucose,glucose,glucose,144.54007815267448,57.77352341407985,93.84339699966469 29 | 220074,central venous pressure,central venous pressure,central venous pressure,13.714740936021288,27.775085476211146,93.96938782000896 30 | 679,temperature f (calc),temperature (f),temperature,37.243650675696934,0.7381676307439387,94.11691475605578 31 | 676,temperature c,temperature (c),temperature,37.24364465740107,0.738169804038111,94.11691475605578 32 | 829,potassium (3.5-5.3),potassium,potassium,4.126247210307182,0.6472945535234909,94.25894407606883 33 | 813,hematocrit,hematocrit,hematocrit,30.716222552062003,4.9231926598853635,94.85468574081966 34 | 1529,glucose,glucose,glucose,132.77146570845525,52.14488532909935,94.87903881680398 35 | 225664,glucose finger stick,glucose (finger stick),glucose,150.2282125131326,60.67695105335787,95.40149407938557 36 | 780,arterial ph,ph (arterial),ph,7.383238570089456,0.0755647129939972,95.50749356869794 37 | 506,peep set,positive end-expiratory pressure set,positive end-expiratory pressure set,6.159088139452178,2.9228705568923634,95.60031695346655 38 | 223835,inspired o2 fraction,fraction inspired oxygen,fraction inspired oxygen,0.5334754600604382,0.18898523530873748,95.7089062288444 39 | 1535,potassium,potassium,potassium,4.114669913878324,0.6396563912120975,95.70913340306068 40 | 615,resp rate (total),respiratory rate (total),respiratory rate,18.586152348383074,6.2297633113521735,95.72290016056674 41 | 777,arterial co2(calc),"co2 (etco2, pco2, etc.)","co2 (etco2, pco2, etc.)",25.18672696759632,5.28707499563618,95.78560024425772 42 | 778,arterial paco2,partial pressure of carbon dioxide (arterial),partial pressure of carbon dioxide,40.84872077048675,9.114166286220346,95.78882611812878 43 | 779,arterial pao2,partial pressure of oxygen,partial pressure of oxygen,145.8220365787322,84.89126553607426,95.79586851883319 44 | 1126,art.ph,ph (arterial),ph,7.383119686812778,0.07519230566723935,95.80450113905152 45 | 837,sodium (135-148),sodium,sodium,138.86951490281783,5.113445697937651,96.09151304388915 46 | 116,cardiac index,cardiac index,cardiac index,2.868186533308795,0.8419530320916853,96.45135700246348 47 | 791,creatinine (0-1.3),creatinine,creatinine,1.398509939491261,1.4961551426642252,96.49088531609475 48 | 781,bun (6-20),blood urea nitrogen,blood urea nitrogen,26.59446128625145,22.09455559225942,96.50792338231511 49 | 220545,hematocrit (serum),hematocrit (serum),hematocrit,30.550602759811184,5.22841861296835,96.50874120949369 50 | 828,platelets,platelets,platelets,196.81989868553214,108.8892593297763,96.53195841439667 51 | 821,magnesium (1.6-2.6),magnesium,magnesium,2.0332414522629136,0.4060717620618477,96.53522972311099 52 | 788,chloride (100-112),chloride,chloride,105.61685393752943,6.145410060187982,96.55890127644649 53 | 787,carbon dioxide,co2,co2,24.108687983137326,4.803093298652404,96.56553476356163 54 | 814,hemoglobin,hemoglobin,hemoglobin,10.627113882198122,1.7245841985282473,96.57657543047242 55 | 626,svr,systemic vascular resistance,systemic vascular resistance,996.5979018775078,354.7224767830885,96.59320458310351 56 | 861,"wbc (4-11,000)",white blood cell count,white blood cell count,12.243439531219247,9.996575276533045,96.77058221116843 57 | 227442,potassium (serum),potassium (serum),potassium serum,4.0906582939391685,0.8737147916349557,96.7802598327816 58 | 833,rbc,red blood cell count,red blood cell count,3.504754382643194,0.6045657055454036,96.7996605108512 59 | 220645,sodium (serum),sodium (serum),sodium,138.84060664795905,5.370440267953136,96.85890754645486 60 | 220602,chloride (serum),chloride (serum),chloride,105.3458225167868,6.377004562546435,96.8671312530839 61 | 491,pap mean,pulmonary artery pressure mean,pulmonary artery pressure mean,29.590011224348547,9.286626776198654,96.91633718832834 62 | 1127,"wbc (4-11,000)",white blood cell count,white blood cell count,12.248704920458804,10.162362458486854,96.92974046708837 63 | 90,c.o.(thermodilution),cardiac output thermodilution,cardiac output thermodilution,5.683717102431174,1.8687379380008704,96.95336658558061 64 | 1536,sodium,sodium,sodium,138.9270348296241,5.084821897778887,96.988805763319 65 | 220339,peep set,positive end-expiratory pressure set,positive end-expiratory pressure set,6.162566992846121,2.859692347492791,97.00170925880323 66 | 220615,creatinine,creatinine,creatinine,1.3813086178762408,1.4321308870645644,97.02070102328354 67 | 225624,bun,blood urea nitrogen,blood urea nitrogen,25.880015928444525,21.425976311535525,97.033468214238 68 | 227443,hco3 (serum),bicarbonate (serum),bicarbonate,24.42982683782072,4.749466430341764,97.05868455224417 69 | 220621,glucose (serum),glucose (serum),glucose,136.1707688338493,61.499888212575534,97.06490912577 70 | 619,respiratory rate set,respiratory rate set,respiratory rate set,14.911526170363905,5.719745146331646,97.12238420248674 71 | 227073,anion gap,anion gap,anion gap,13.314896685047854,3.7742659156408966,97.13269791190548 72 | 220635,magnesium,magnesium,magnesium,2.0695281239420478,0.4079584270012105,97.1817221077769 73 | 220228,hemoglobin,hemoglobin,hemoglobin,10.477677307842182,1.8508923343673749,97.20048669804095 74 | 227457,platelet count,platelets,platelets,203.92415520213353,113.77754779961387,97.20602974891797 75 | 682,tidal volume (obser),tidal volume observed,tidal volume observed,575.1758166224059,124.72789449238428,97.22901977960467 76 | 827,phosphorous(2.7-4.5),phosphorous,phosphorous,3.5141597348168134,1.46456848724299,97.2461032806683 77 | 224685,tidal volume (observed),tidal volume observed,tidal volume observed,504.80537478573103,2208.851953853323,97.2645043921863 78 | 220546,wbc,white blood cell count,white blood cell count,11.623559214481318,9.688906692189475,97.27917984655745 79 | 1525,creatinine,creatinine,creatinine,1.395167748555619,1.4718179302730412,97.28426854900192 80 | 1162,bun,blood urea nitrogen,blood urea nitrogen,26.389558030539735,21.956304746558512,97.29530921591274 81 | 786,calcium (8.4-10.2),calcium,calcium,8.272538015442466,0.8114345548114279,97.29980726539492 82 | 1532,magnesium,magnesium,magnesium,2.0579964973335714,0.41366519694224513,97.3105753232462 83 | 1523,chloride,chloride,chloride,105.77032572163749,6.1752064804362945,97.32784056368284 84 | 614,resp rate (spont),respiratory rate (spontaneous),respiratory rate,2.848259597078965,5.030844616613539,97.33501926891702 85 | 224695,peak insp. pressure,peak inspiratory pressure,peak inspiratory pressure,20.40891912516437,6.132278461374045,97.34592363129806 86 | 683,tidal volume (set),tidal volume set,tidal volume set,565.2982436312367,129.72198707778733,97.34674145847664 87 | 224689,respiratory rate (spontaneous),respiratory rate (spontaneous),respiratory rate,9.452999280226122,10.767978594938004,97.38113563482017 88 | 825,ptt(22-35),partial thromboplastin time,partial thromboplastin time,42.1014856999121,24.924086355778606,97.41107719652477 89 | 816,ionized calcium,calcium ionized,calcium ionized,1.4548033491300119,7.258700266632667,97.43056874428089 90 | 535,peak insp. pressure,peak inspiratory pressure,peak inspiratory pressure,25.318576669825053,6.144567138276635,97.45723899727118 91 | 225677,phosphorous,phosphorous,phosphorous,3.408323303813618,1.35993873135611,97.47068771087447 92 | 225625,calcium non-ionized,calcium non-ionized,calcium,8.33335374198663,2.8973194475055513,97.47691228440031 93 | 223830,ph (arterial),ph (arterial),ph,7.380746273155249,0.08131153657111036,97.48758947256508 94 | 1542,wbc,white blood cell count,white blood cell count,12.189169491832727,10.4186935629412,97.49385948093418 95 | 815,inr (2-4 ref. range),prothrombin time,prothrombin time inr,1.5276120216548172,1.3212495648680445,97.55496934511126 96 | 824,pt(11-13.5),prothrombin time,prothrombin time pt,15.412690379706046,5.394941850258561,97.55578717228983 97 | 220235,arterial co2 pressure,partial pressure of carbon dioxide (arterial),partial pressure of carbon dioxide,41.157794018595105,9.434863928206854,97.59408874515324 98 | 225698,tco2 (calc) arterial,"co2 (etco2, pco2, etc.)","co2 (etco2, pco2, etc.)",25.19359657241436,5.174422890507092,97.59454309358578 99 | 220059,pulmonary artery pressure systolic,pulmonary artery pressure systolic,pulmonary artery pressure systolic,37.026091266290216,11.13907184662926,97.69445431390206 100 | 1534,phosphorous,phosphorous,phosphorous,3.4739764333108893,1.4325795387739864,97.7672409327955 101 | 1522,calcium,calcium,calcium,8.296089334144861,0.8157925353620858,97.79468357812112 102 | 224690,respiratory rate (total),respiratory rate (total),respiratory rate,18.893311217735548,5.663158357836307,97.90336372318549 103 | 543,plateau pressure,plateau pressure,plateau pressure,20.721389808057822,6.00324975148783,97.9616566270808 104 | 1533,ptt,partial thromboplastin time,partial thromboplastin time,41.441392212163706,24.26243544442964,98.00241168147994 105 | 227466,ptt,partial thromboplastin time,partial thromboplastin time,42.03303366894534,24.972508806894062,98.0324895477143 106 | 1530,inr,prothrombin time,prothrombin time inr,1.5209415558434443,1.2103463697710057,98.10727529971093 107 | 1286,pt,prothrombin time,prothrombin time pt,15.614373679408336,5.820295850553224,98.10772964814348 108 | 227467,inr,prothrombin time,prothrombin time inr,1.5048745598589854,0.8876349421905448,98.14189665027074 109 | 227465,prothrombin time,prothrombin time,prothrombin time pt,16.52679300648027,7.5145446551246105,98.141942085114 110 | 684,tidal volume (spont),tidal volume spontaneous,tidal volume spontaneous,457.21894691863207,194.06437358994376,98.1827425743564 111 | 224688,respiratory rate (set),respiratory rate set,respiratory rate set,16.76014530811976,10.680771899698309,98.24158069637076 112 | 224684,tidal volume (set),tidal volume set,tidal volume set,489.39459059415026,88.52825285612052,98.31105057170663 113 | 226537,glucose (whole blood),glucose (whole blood),glucose,131.78947783061918,42.03276652546779,98.42363811329086 114 | 834,sao2,oxygen saturation,oxygen saturation,96.68015091015981,3.3690289177241617,98.44817292864822 115 | 225667,ionized calcium,calcium ionized,calcium ionized,1.1375018536010573,0.8987666521246573,98.508873879236 116 | 224686,tidal volume (spontaneous),tidal volume spontaneous,tidal volume spontaneous,531.9146441360622,3355.6396737253526,98.62500533859408 117 | 223762,temperature celsius,temperature (c),temperature,37.060083241731,0.9077621258050494,98.69515673657877 118 | 225668,lactic acid,lactic acid,lactic acid,2.5086622141267125,2.3069009807165988,98.72786982372189 119 | 818,lactic acid(0.5-2.0),lactic acid,lactic acid,2.9755073601570334,3.277799186893734,98.84254736809584 120 | 226531,admission weight (lbs.),"weight (lbs, admission)",weight,80.71517796342529,23.36829893729347,98.85131629284392 121 | 225312,art bp mean,mean blood pressure (arterial),mean blood pressure,79.65383604325228,18.70728138686971,98.86917218624288 122 | 225309,art bp systolic,systolic blood pressure (arterial),systolic blood pressure,115.30353914605523,23.739399173556016,98.87335219182228 123 | 225310,art bp diastolic,diastolic blood pressure (arterial),diastolic blood pressure,59.27203190742385,14.449988507288586,98.8741245841576 124 | 224696,plateau pressure,plateau pressure,plateau pressure,19.305944780698116,4.997881593930517,98.90402071101894 125 | 227464,potassium (whole blood),potassium (whole blood),potassium,4.236190486219362,0.7210840843788271,98.92133138629885 126 | 1531,lactic acid,lactic acid,lactic acid,2.8969093902457725,3.2320433870719487,98.99729844422009 127 | 763,daily weight,weight (daily),weight,84.26841476030812,22.987743827178928,99.18994217961847 128 | 224,iabp mean,mean blood pressure (arterial),mean blood pressure,81.4670694434781,14.49083602684611,99.26218358039287 129 | 89,c.o. (fick),cardiac output fick,cardiac output fick,5.6815246559783406,1.9843659375245477,99.26559119363694 130 | 220227,arterial o2 saturation,oxygen saturation (arterial),oxygen saturation,96.1006185673329,4.109073848979552,99.2657274981667 131 | 224639,daily weight,weight (daily),weight,86.32265755329007,23.65570696817214,99.31366125780002 132 | 225690,total bilirubin,bilirubin (total),bilirubin,2.6861728143468806,5.160533122108008,99.33115367245294 133 | 770,ast,asparate aminotransferase,asparate aminotransferase,404.2892781678141,1299.0477671154677,99.33342541461566 134 | 220587,ast,asparate aminotransferase,asparate aminotransferase,347.40981595092023,1239.8913868625452,99.33347084945892 135 | 769,alt,alanine aminotransferase,alanine aminotransferase,335.13654895009546,984.894038593404,99.33356171914542 136 | 220644,alt,alanine aminotransferase,alanine aminotransferase,281.0920128231362,907.0054864293936,99.3338797630482 137 | 226512,admission weight (kg),"weight (kg, admission)",weight,80.77219622953048,22.47144631077177,99.3396499881415 138 | 224700,total peep level,positive end-expiratory pressure (total),positive end-expiratory pressure,7.364976207137844,3.4632607918855878,99.35073608989556 139 | 225612,alkaline phosphate,alkaline phosphate,alkaline phosphate,120.11637508747376,146.4889185297922,99.35073608989556 140 | 773,alk. phosphate,alkaline phosphate,alkaline phosphate,126.72359811440231,157.6099844059226,99.35423457282614 141 | 848,total bili (0-1.5),bilirubin (total),bilirubin,3.232037251775634,6.352632846086666,99.3730900327767 142 | 227429,troponin-t,troponin-t,troponin-t,0.6851146770069206,1.9286944059222382,99.44928426491421 143 | 1538,total bili,bilirubin (total),bilirubin,3.2994932895097273,6.437710719690952,99.50235216183528 144 | 772,albumin (>3.2),albumin,albumin,2.923178992298408,0.6319024849857566,99.51034869424804 145 | 226534,sodium (whole blood),sodium (whole blood),sodium,136.2199218469922,5.0680310977003,99.5775468274212 146 | 227456,albumin,albumin,albumin,3.0758501155496942,0.6561975161958757,99.58713357934786 147 | 1521,albumin,albumin,albumin,2.9533737229745696,0.6362340551027333,99.61752948948501 148 | 226730,height (cm),height (cm),height,168.79796203532825,13.858957585321845,99.65533127907263 149 | 226707,height,height,height,168.79774123385295,13.82883797185507,99.65533127907263 150 | 651,spon rr (mech.),respiratory rate (spontaneous),respiratory rate,22.33264552783046,7.321761402405684,99.65610367140795 151 | 806,fibrinogen (150-400),fibrinogen,fibrinogen,300.5521066920049,178.4770106575294,99.66528150974531 152 | 227468,fibrinogen,fibrinogen,fibrinogen,288.7183806239222,177.50011742710805,99.7101711348806 153 | 1528,fibrinogen,fibrinogen,fibrinogen,298.4862065131056,177.54140366082632,99.71398766171397 154 | 226536,chloride (whole blood),chloride (whole blood),chloride,106.15262321144672,5.902115271065178,99.71421483593024 155 | 504,pcwp,pulmonary capillary wedge pressure,pulmonary capillary wedge pressure,17.11269971987513,7.154823284028659,99.72966268263671 156 | 6701,arterial bp #2 [systolic],systolic blood pressure (arterial),systolic blood pressure,109.3032405065662,22.273124921633524,99.73052594465854 157 | 8555,arterial bp #2 [diastolic],diastolic blood pressure (arterial),diastolic blood pressure,57.41475929494896,12.635531634465185,99.7305713795018 158 | 6702,arterial bp mean #2,mean blood pressure (arterial),mean blood pressure,76.85960301282616,13.50967036579858,99.73252507776174 159 | 224422,spont rr,respiratory rate (spontaneous),respiratory rate,20.75559874143994,6.912434510256433,99.75451554189682 160 | 224322,iabp mean,mean blood pressure (arterial),mean blood pressure,79.58123678261505,15.215683735087245,99.79231733148444 161 | 860,venous ph,ph (venous),ph,7.372199163023656,0.07587914823369585,99.82593911549264 162 | 220274,ph (venous),ph (venous),ph,7.37149143610012,0.08704031820404255,99.8275747698498 163 | 189,fio2 (analyzed),fraction inspired oxygen,fraction inspired oxygen,0.568271808654999,0.2016535849428096,99.83139129668317 164 | 512,pvr,post void residual,post void residual,205.5797168637139,134.9887854778189,99.86037872667943 165 | 789,cholesterol (<200),cholesterol,cholesterol,161.7004323656578,49.12364077133579,99.92644098877123 166 | 226062,venous co2 pressure,partial pressure of carbon dioxide,partial pressure of carbon dioxide,44.57086871325931,13.420123431063322,99.93043925497761 167 | 223679,tco2 (calc) venous,"co2 (etco2, pco2, etc.)","co2 (etco2, pco2, etc.)",25.276143790849673,7.79197652299903,99.93048468982086 168 | 851,troponin,troponin-i,troponin-i,7.669527675276747,10.688952561726897,99.93843578739038 169 | 803,direct bili (0-0.3),bilirubin (conjugated),bilirubin,3.201930654058309,4.881562376748818,99.94234318391025 170 | 225651,direct bilirubin,bilirubin (conjugated),bilirubin,3.0232613908872854,4.265646313845083,99.94316101108883 171 | 1524,cholesterol,cholesterol,cholesterol,159.81780366056572,47.42536183359262,99.94538731840828 172 | 220603,cholesterol,cholesterol,cholesterol,160.08823529411765,52.03284289722227,99.95133928287461 173 | 442,manual bp [systolic],systolic blood pressure (noninvasive),systolic blood pressure,120.31005433048445,24.741012009665045,99.95261145848573 174 | 857,venous co2(calc),"co2 (etco2, pco2, etc.)","co2 (etco2, pco2, etc.)",25.793240556660038,8.234860617450735,99.95429254768614 175 | 8440,manual bp [diastolic],diastolic blood pressure (noninvasive),diastolic blood pressure,62.113279946613275,14.503790129481999,99.95461059158892 176 | 859,venous pvo2,venous pvo2,venous pvo2,43.87709205020921,14.872487981508149,99.95656428984886 177 | 849,total protein(6.5-8),total protein,total protein,5.660992907801418,1.0911605011574026,99.98078106130342 178 | 224167,manual blood pressure systolic left,systolic blood pressure (noninvasive),systolic blood pressure,120.20458673932787,27.590102229742303,99.98332541252566 179 | 224643,manual blood pressure diastolic left,diastolic blood pressure (noninvasive),diastolic blood pressure,66.04010566762727,16.221242418772487,99.98423410939074 180 | 1539,total protein,total protein,total protein,5.656176470588233,1.096490821739333,99.98455215329352 181 | 227243,manual blood pressure systolic right,systolic blood pressure (noninvasive),systolic blood pressure,123.6897689768977,26.480958939779597,99.98623324249394 182 | 727,vision fio2,fraction inspired oxygen,fraction inspired oxygen,0.43559748427672906,0.18904353258599843,99.99277585992256 183 | 1394,height inches,height (in),height,167.64,17.96051224213831,99.9999091303135 184 | -------------------------------------------------------------------------------- /notebooks/Baselines for Mortality and LOS prediction - GRU-D.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%load_ext autoreload\n", 12 | "from __future__ import print_function, division" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "%autoreload\n", 24 | "\n", 25 | "import copy, math, os, pickle, time, pandas as pd, numpy as np, scipy.stats as ss\n", 26 | "\n", 27 | "from sklearn.linear_model import LogisticRegression\n", 28 | "from sklearn.ensemble import RandomForestClassifier\n", 29 | "from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score\n", 30 | "\n", 31 | "import torch, torch.utils.data as utils, torch.nn as nn, torch.nn.functional as F, torch.optim as optim\n", 32 | "from torch.autograd import Variable\n", 33 | "from torch.nn.parameter import Parameter\n", 34 | "\n", 35 | "\n", 36 | "from mmd_grud_utils import *" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "DATA_FILEPATH = '/scratch/mmd/mimic_data/final/grouping_5/all_hourly_data.h5'\n", 46 | "RAW_DATA_FILEPATH = '/scratch/mmd/mimic_data/final/nogrouping_5/all_hourly_data.h5'\n", 47 | "GAP_TIME = 6 # In hours\n", 48 | "WINDOW_SIZE = 24 # In hours\n", 49 | "SEED = 1\n", 50 | "ID_COLS = ['subject_id', 'hadm_id', 'icustay_id']\n", 51 | "GPU = '2'\n", 52 | "\n", 53 | "os.environ['CUDA_VISIBLE_DEVICES'] = GPU\n", 54 | "np.random.seed(SEED)\n", 55 | "torch.manual_seed(SEED)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "class DictDist():\n", 67 | " def __init__(self, dict_of_rvs): self.dict_of_rvs = dict_of_rvs\n", 68 | " def rvs(self, n):\n", 69 | " a = {k: v.rvs(n) for k, v in self.dict_of_rvs.items()}\n", 70 | " out = []\n", 71 | " for i in range(n): out.append({k: vs[i] for k, vs in a.items()})\n", 72 | " return out\n", 73 | " \n", 74 | "class Choice():\n", 75 | " def __init__(self, options): self.options = options\n", 76 | " def rvs(self, n): return [self.options[i] for i in ss.randint(0, len(self.options)).rvs(n)]" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "scrolled": false 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "%%time\n", 88 | "data_full_lvl2 = pd.read_hdf(DATA_FILEPATH, 'vitals_labs')\n", 89 | "data_full_raw = pd.read_hdf(RAW_DATA_FILEPATH, 'vitals_labs') \n", 90 | "statics = pd.read_hdf(DATA_FILEPATH, 'patients')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "data_full_lvl2.head()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "data_full_raw.head()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "statics.head()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": true 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "def simple_imputer(df):\n", 129 | " idx = pd.IndexSlice\n", 130 | " df = df.copy()\n", 131 | " if len(df.columns.names) > 2: df.columns = df.columns.droplevel(('label', 'LEVEL1', 'LEVEL2'))\n", 132 | " \n", 133 | " df_out = df.loc[:, idx[:, ['mean', 'count']]]\n", 134 | " icustay_means = df_out.loc[:, idx[:, 'mean']].groupby(ID_COLS).mean()\n", 135 | " \n", 136 | " df_out.loc[:,idx[:,'mean']] = df_out.loc[:,idx[:,'mean']].groupby(ID_COLS).fillna(\n", 137 | " method='ffill'\n", 138 | " ).groupby(ID_COLS).fillna(icustay_means).fillna(0)\n", 139 | " \n", 140 | " df_out.loc[:, idx[:, 'count']] = (df.loc[:, idx[:, 'count']] > 0).astype(float)\n", 141 | " df_out.rename(columns={'count': 'mask'}, level='Aggregation Function', inplace=True)\n", 142 | " \n", 143 | " is_absent = (1 - df_out.loc[:, idx[:, 'mask']])\n", 144 | " hours_of_absence = is_absent.cumsum()\n", 145 | " time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill')\n", 146 | " time_since_measured.rename(columns={'mask': 'time_since_measured'}, level='Aggregation Function', inplace=True)\n", 147 | "\n", 148 | " df_out = pd.concat((df_out, time_since_measured), axis=1)\n", 149 | " df_out.loc[:, idx[:, 'time_since_measured']] = df_out.loc[:, idx[:, 'time_since_measured']].fillna(100)\n", 150 | " \n", 151 | " df_out.sort_index(axis=1, inplace=True)\n", 152 | " return df_out" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "Ys = statics[statics.max_hours > WINDOW_SIZE + GAP_TIME][['mort_hosp', 'mort_icu', 'los_icu']]\n", 162 | "Ys['los_3'] = Ys['los_icu'] > 3\n", 163 | "Ys['los_7'] = Ys['los_icu'] > 7\n", 164 | "Ys.drop(columns=['los_icu'], inplace=True)\n", 165 | "Ys.astype(float)\n", 166 | "\n", 167 | "lvl2, raw = [df[\n", 168 | " (df.index.get_level_values('icustay_id').isin(set(Ys.index.get_level_values('icustay_id')))) &\n", 169 | " (df.index.get_level_values('hours_in') < WINDOW_SIZE)\n", 170 | "] for df in (data_full_lvl2, data_full_raw)]\n", 171 | "\n", 172 | "raw.columns = raw.columns.droplevel(level=['label', 'LEVEL1', 'LEVEL2'])\n", 173 | "\n", 174 | "train_frac, dev_frac, test_frac = 0.7, 0.1, 0.2\n", 175 | "lvl2_subj_idx, raw_subj_idx, Ys_subj_idx = [df.index.get_level_values('subject_id') for df in (lvl2, raw, Ys)]\n", 176 | "lvl2_subjects = set(lvl2_subj_idx)\n", 177 | "assert lvl2_subjects == set(Ys_subj_idx), \"Subject ID pools differ!\"\n", 178 | "assert lvl2_subjects == set(raw_subj_idx), \"Subject ID pools differ!\"\n", 179 | "\n", 180 | "np.random.seed(SEED)\n", 181 | "subjects, N = np.random.permutation(list(lvl2_subjects)), len(lvl2_subjects)\n", 182 | "N_train, N_dev, N_test = int(train_frac * N), int(dev_frac * N), int(test_frac * N)\n", 183 | "train_subj = subjects[:N_train]\n", 184 | "dev_subj = subjects[N_train:N_train + N_dev]\n", 185 | "test_subj = subjects[N_train+N_dev:]\n", 186 | "\n", 187 | "[(lvl2_train, lvl2_dev, lvl2_test), (raw_train, raw_dev, raw_test), (Ys_train, Ys_dev, Ys_test)] = [\n", 188 | " [df[df.index.get_level_values('subject_id').isin(s)] for s in (train_subj, dev_subj, test_subj)] \\\n", 189 | " for df in (lvl2, raw, Ys)\n", 190 | "]\n", 191 | "\n", 192 | "idx = pd.IndexSlice\n", 193 | "lvl2_means, lvl2_stds = lvl2_train.loc[:, idx[:,'mean']].mean(axis=0), lvl2_train.loc[:, idx[:,'mean']].std(axis=0)\n", 194 | "raw_means, raw_stds = raw_train.loc[:, idx[:,'mean']].mean(axis=0), raw_train.loc[:, idx[:,'mean']].std(axis=0)\n", 195 | "\n", 196 | "lvl2_train.loc[:, idx[:,'mean']] = (lvl2_train.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n", 197 | "lvl2_dev.loc[:, idx[:,'mean']] = (lvl2_dev.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n", 198 | "lvl2_test.loc[:, idx[:,'mean']] = (lvl2_test.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n", 199 | "\n", 200 | "raw_train.loc[:, idx[:,'mean']] = (raw_train.loc[:, idx[:,'mean']] - raw_means)/raw_stds\n", 201 | "raw_dev.loc[:, idx[:,'mean']] = (raw_dev.loc[:, idx[:,'mean']] - raw_means)/raw_stds\n", 202 | "raw_test.loc[:, idx[:,'mean']] = (raw_test.loc[:, idx[:,'mean']] - raw_means)/raw_stds" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test = [\n", 212 | " simple_imputer(df) for df in (raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test)\n", 213 | "]\n", 214 | "raw_flat_train, raw_flat_dev, raw_flat_test, lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test = [\n", 215 | " df.pivot_table(index=['subject_id', 'hadm_id', 'icustay_id'], columns=['hours_in']) for df in (\n", 216 | " raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test\n", 217 | " )\n", 218 | "]\n", 219 | "\n", 220 | "for df in lvl2_train, lvl2_dev, lvl2_test, raw_train, raw_dev, raw_test: assert not df.isnull().any().any()" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": { 227 | "collapsed": true 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "Ys = statics[statics.max_hours > WINDOW_SIZE + GAP_TIME][['mort_hosp', 'mort_icu', 'los_icu']]\n", 232 | "Ys['los_3'] = Ys['los_icu'] > 3\n", 233 | "Ys['los_7'] = Ys['los_icu'] > 7\n", 234 | "Ys.drop(columns=['los_icu'], inplace=True)\n", 235 | "Ys.astype(float)\n", 236 | "[(Ys_train, Ys_dev, Ys_test)] = [\n", 237 | " [df[df.index.get_level_values('subject_id').isin(s)] for s in (train_subj, dev_subj, test_subj)] \\\n", 238 | " for df in (Ys,)\n", 239 | "]" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "### Task Prediction" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "#### Hyperparams" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "collapsed": true 261 | }, 262 | "outputs": [], 263 | "source": [ 264 | "N = 10\n", 265 | "\n", 266 | "GRU_D_dist = DictDist({\n", 267 | " 'cell_size': ss.randint(50, 75),\n", 268 | " 'hidden_size': ss.randint(65, 95), \n", 269 | " 'learning_rate': ss.uniform(2e-3, 1e-1),\n", 270 | " 'num_epochs': ss.randint(15, 150),\n", 271 | " 'patience': ss.randint(3, 7),\n", 272 | " 'batch_size': ss.randint(35, 65),\n", 273 | " 'early_stop_frac': ss.uniform(0.05, 0.1),\n", 274 | " 'seed': ss.randint(1, 10000),\n", 275 | "})\n", 276 | "np.random.seed(SEED)\n", 277 | "GRU_D_hyperparams_list = GRU_D_dist.rvs(N)\n", 278 | "\n", 279 | "with open('/scratch/mmd/extraction_baselines_gru-d.pkl', mode='rb') as f: results = pickle.load(f)" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "results" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "### GRU-D" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "scrolled": false 303 | }, 304 | "outputs": [], 305 | "source": [ 306 | "model_name = 'GRU-D'\n", 307 | "hyperparams_list = GRU_D_hyperparams_list\n", 308 | "RERUN = False\n", 309 | "if model_name not in results: results[model_name] = {}\n", 310 | "for t in ['mort_icu', 'los_3', 'mort_hosp', 'los_7']:\n", 311 | " if t not in results[model_name]: results[model_name][t] = {}\n", 312 | " for n, X_train, X_dev, X_test in (\n", 313 | " ('lvl2', lvl2_train, lvl2_dev, lvl2_test),\n", 314 | "# ('raw', raw_train, raw_dev, raw_test)\n", 315 | " ):\n", 316 | " print(\"Running model %s on target %s with representation %s\" % (model_name, t, n))\n", 317 | " X_mean = np.nanmean(\n", 318 | " to_3D_tensor(\n", 319 | " X_train.loc[:, pd.IndexSlice[:, 'mean']] * \n", 320 | " np.where((X_train.loc[:, pd.IndexSlice[:, 'mask']] == 1).values, 1, np.NaN)\n", 321 | " ),\n", 322 | " axis=0, keepdims=True\n", 323 | " ).transpose([0, 2, 1])\n", 324 | " base_params = {'X_mean': X_mean, 'output_last': True, 'input_size': X_mean.shape[2]}\n", 325 | " \n", 326 | " if n in results[model_name][t]:\n", 327 | " if not RERUN: \n", 328 | " print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n", 329 | " print(results[model_name][t][n])\n", 330 | " continue\n", 331 | " best_s, best_hyperparams = results[model_name][t][n][-1], results[model_name][t][n][1]\n", 332 | " print(\"Loading best hyperparams\", best_hyperparams)\n", 333 | " else:\n", 334 | " best_s, best_hyperparams = -np.Inf, None\n", 335 | " for i, hyperparams in enumerate(hyperparams_list):\n", 336 | " print(\"On sample %d / %d (hyperparams = %s)\" % (i+1, len(hyperparams_list), repr((hyperparams))))\n", 337 | "\n", 338 | " early_stop_frac,batch_size,seed = [hyperparams[k] for k in ('early_stop_frac','batch_size','seed')]\n", 339 | "\n", 340 | " np.random.seed(seed)\n", 341 | " all_train_subjects = list(\n", 342 | " np.random.permutation(Ys_train.index.get_level_values('subject_id').values)\n", 343 | " )\n", 344 | " N_early_stop = int(len(all_train_subjects) * early_stop_frac)\n", 345 | " train_subjects = all_train_subjects[:-N_early_stop]\n", 346 | " early_stop_subjects = all_train_subjects[-N_early_stop:]\n", 347 | " X_train_obs = X_train[X_train.index.get_level_values('subject_id').isin(train_subjects)]\n", 348 | " Ys_train_obs = Ys_train[Ys_train.index.get_level_values('subject_id').isin(train_subjects)]\n", 349 | "\n", 350 | " X_train_early_stop = X_train[X_train.index.get_level_values('subject_id').isin(early_stop_subjects)]\n", 351 | " Ys_train_early_stop = Ys_train[\n", 352 | " Ys_train.index.get_level_values('subject_id').isin(early_stop_subjects)\n", 353 | " ]\n", 354 | "\n", 355 | " train_dataloader = prepare_dataloader(X_train_obs, Ys_train_obs[t], batch_size=batch_size)\n", 356 | " early_stop_dataloader = prepare_dataloader(\n", 357 | " X_train_early_stop, Ys_train_early_stop[t], batch_size=batch_size\n", 358 | " )\n", 359 | " dev_dataloader = prepare_dataloader(X_dev, Ys_dev[t], batch_size=batch_size)\n", 360 | " test_dataloader = prepare_dataloader(X_test, Ys_test[t], batch_size=batch_size)\n", 361 | "\n", 362 | " model_hyperparams = copy.copy(base_params)\n", 363 | " model_hyperparams.update(\n", 364 | " {k: v for k, v in hyperparams.items() if k in ('cell_size', 'hidden_size', 'batch_size')}\n", 365 | " )\n", 366 | " model = GRUD(**model_hyperparams)\n", 367 | "\n", 368 | " best_model, _ = Train_Model(\n", 369 | " model, train_dataloader, early_stop_dataloader,\n", 370 | " **{k: v for k, v in hyperparams.items() if k in (\n", 371 | " 'num_epochs', 'patience', 'learning_rate', 'batch_size'\n", 372 | " )}\n", 373 | " )\n", 374 | "\n", 375 | " probabilities_dev, labels_dev = predict_proba(best_model, dev_dataloader)\n", 376 | " probabilities_dev = np.concatenate(probabilities_dev)[:, 1]\n", 377 | " labels_dev = np.concatenate(labels_dev)\n", 378 | " s = roc_auc_score(labels_dev, probabilities_dev)\n", 379 | " if s > best_s:\n", 380 | " best_s, best_hyperparams = s, hyperparams\n", 381 | " print(\"New Best Score: %.2f @ hyperparams = %s\" % (100*best_s, repr((best_hyperparams))))\n", 382 | " \n", 383 | " ## Test\n", 384 | " np.random.seed(seed)\n", 385 | " hyperparams = best_hyperparams # In case I forgot a replace below\n", 386 | " early_stop_frac,batch_size,seed = [best_hyperparams[k] for k in ('early_stop_frac','batch_size','seed')]\n", 387 | " \n", 388 | " X_train_concat, Ys_train_concat = pd.concat((X_train, X_dev)), pd.concat((Ys_train, Ys_dev))\n", 389 | " \n", 390 | " all_train_subjects = list(np.random.permutation(Ys_train_concat.index.get_level_values('subject_id').values))\n", 391 | " N_early_stop = int(len(all_train_subjects) * early_stop_frac)\n", 392 | " train_subjects, early_stop_subjects = all_train_subjects[:-N_early_stop], all_train_subjects[-N_early_stop:]\n", 393 | " X_train_obs = X_train_concat[X_train_concat.index.get_level_values('subject_id').isin(train_subjects)]\n", 394 | " Ys_train_obs = Ys_train_concat[Ys_train_concat.index.get_level_values('subject_id').isin(train_subjects)]\n", 395 | "\n", 396 | " X_train_early_stop = X_train_concat[X_train_concat.index.get_level_values('subject_id').isin(early_stop_subjects)]\n", 397 | " Ys_train_early_stop = Ys_train_concat[Ys_train_concat.index.get_level_values('subject_id').isin(early_stop_subjects)]\n", 398 | "\n", 399 | " train_dataloader = prepare_dataloader(X_train_obs, Ys_train_obs[t], batch_size=batch_size)\n", 400 | " early_stop_dataloader = prepare_dataloader(X_train_early_stop, Ys_train_early_stop[t], batch_size=batch_size)\n", 401 | " test_dataloader = prepare_dataloader(X_test, Ys_test[t], batch_size=batch_size)\n", 402 | "\n", 403 | " model_hyperparams = copy.copy(base_params)\n", 404 | " model_hyperparams.update(\n", 405 | " {k: v for k, v in best_hyperparams.items() if k in ('cell_size', 'hidden_size', 'batch_size')}\n", 406 | " )\n", 407 | " model = GRUD(**model_hyperparams)\n", 408 | "\n", 409 | " best_model, (losses_train, losses_early_stop, losses_epochs_train, losses_epochs_early_stop) = Train_Model(\n", 410 | " model, train_dataloader, early_stop_dataloader,\n", 411 | " **{k: v for k, v in best_hyperparams.items() if k in (\n", 412 | " 'num_epochs', 'patience', 'learning_rate', 'batch_size'\n", 413 | " )}\n", 414 | " )\n", 415 | "\n", 416 | " probabilities_test, labels_test = predict_proba(best_model, test_dataloader)\n", 417 | "\n", 418 | " y_score = np.concatenate(probabilities_test)[:, 1]\n", 419 | " y_pred = np.argmax(probabilities_test)\n", 420 | " y_true = np.concatenate(labels_test)\n", 421 | "\n", 422 | " auc = roc_auc_score(y_true, y_score)\n", 423 | " auprc = average_precision_score(y_true, y_score)\n", 424 | " acc = accuracy_score(y_true, y_pred)\n", 425 | " F1 = f1_score(y_true, y_pred)\n", 426 | " print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n", 427 | " print(auc, auprc, acc, F1)\n", 428 | " \n", 429 | " results[model_name][t][n] = None, best_hyperparams, auc, auprc, acc, F1, best_s\n", 430 | " with open('/scratch/mmd/extraction_baselines_gru-d.pkl', mode='wb') as f: pickle.dump(results, f)" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": null, 436 | "metadata": {}, 437 | "outputs": [], 438 | "source": [ 439 | "y_score = np.concatenate(probabilities_test)[:, 1]\n", 440 | "y_pred = np.concatenate(probabilities_test).argmax(axis=1)\n", 441 | "y_true = np.concatenate(labels_test)\n", 442 | "\n", 443 | "auc = roc_auc_score(y_true, y_score)\n", 444 | "auprc = average_precision_score(y_true, y_score)\n", 445 | "acc = accuracy_score(y_true, y_pred)\n", 446 | "F1 = f1_score(y_true, y_pred)\n", 447 | "print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n", 448 | "print(auc, auprc, acc, F1)\n", 449 | "\n", 450 | "results[model_name][t][n] = None, best_hyperparams, auc, auprc, acc, F1, best_s\n", 451 | "with open('/scratch/mmd/extraction_baselines_gru-d.pkl', mode='wb') as f: pickle.dump(results, f)" 452 | ] 453 | } 454 | ], 455 | "metadata": { 456 | "kernelspec": { 457 | "display_name": "Python 2", 458 | "language": "python", 459 | "name": "python2" 460 | }, 461 | "language_info": { 462 | "codemirror_mode": { 463 | "name": "ipython", 464 | "version": 2 465 | }, 466 | "file_extension": ".py", 467 | "mimetype": "text/x-python", 468 | "name": "python", 469 | "nbconvert_exporter": "python", 470 | "pygments_lexer": "ipython2", 471 | "version": "2.7.13" 472 | } 473 | }, 474 | "nbformat": 4, 475 | "nbformat_minor": 2 476 | } 477 | -------------------------------------------------------------------------------- /notebooks/Baselines for Mortality and LOS prediction - Sklearn.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%load_ext autoreload\n", 12 | "from __future__ import print_function, division" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "%autoreload\n", 24 | "\n", 25 | "import copy, math, os, pickle, time, pandas as pd, numpy as np, scipy.stats as ss\n", 26 | "\n", 27 | "from sklearn.linear_model import LogisticRegression\n", 28 | "from sklearn.ensemble import RandomForestClassifier\n", 29 | "from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score\n", 30 | "\n", 31 | "import torch, torch.utils.data as utils, torch.nn as nn, torch.nn.functional as F, torch.optim as optim\n", 32 | "from torch.autograd import Variable\n", 33 | "from torch.nn.parameter import Parameter" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "DATA_FILEPATH = '/scratch/mmd/mimic_data/final/grouping_5/all_hourly_data.h5'\n", 43 | "RAW_DATA_FILEPATH = '/scratch/mmd/mimic_data/final/nogrouping_5/all_hourly_data.h5'\n", 44 | "GAP_TIME = 6 # In hours\n", 45 | "WINDOW_SIZE = 24 # In hours\n", 46 | "SEED = 1\n", 47 | "ID_COLS = ['subject_id', 'hadm_id', 'icustay_id']\n", 48 | "GPU = '2'\n", 49 | "\n", 50 | "os.environ['CUDA_VISIBLE_DEVICES'] = GPU\n", 51 | "np.random.seed(SEED)\n", 52 | "torch.manual_seed(SEED)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": true 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "class DictDist():\n", 64 | " def __init__(self, dict_of_rvs): self.dict_of_rvs = dict_of_rvs\n", 65 | " def rvs(self, n):\n", 66 | " a = {k: v.rvs(n) for k, v in self.dict_of_rvs.items()}\n", 67 | " out = []\n", 68 | " for i in range(n): out.append({k: vs[i] for k, vs in a.items()})\n", 69 | " return out\n", 70 | " \n", 71 | "class Choice():\n", 72 | " def __init__(self, options): self.options = options\n", 73 | " def rvs(self, n): return [self.options[i] for i in ss.randint(0, len(self.options)).rvs(n)]" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "scrolled": false 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "%%time\n", 85 | "data_full_lvl2 = pd.read_hdf(DATA_FILEPATH, 'vitals_labs')\n", 86 | "data_full_raw = pd.read_hdf(RAW_DATA_FILEPATH, 'vitals_labs') \n", 87 | "statics = pd.read_hdf(DATA_FILEPATH, 'patients')" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "data_full_lvl2.head()" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "data_full_raw.head()" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "statics.head()" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": { 121 | "collapsed": true 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "def simple_imputer(df):\n", 126 | " idx = pd.IndexSlice\n", 127 | " df = df.copy()\n", 128 | " if len(df.columns.names) > 2: df.columns = df.columns.droplevel(('label', 'LEVEL1', 'LEVEL2'))\n", 129 | " \n", 130 | " df_out = df.loc[:, idx[:, ['mean', 'count']]]\n", 131 | " icustay_means = df_out.loc[:, idx[:, 'mean']].groupby(ID_COLS).mean()\n", 132 | " \n", 133 | " df_out.loc[:,idx[:,'mean']] = df_out.loc[:,idx[:,'mean']].groupby(ID_COLS).fillna(\n", 134 | " method='ffill'\n", 135 | " ).groupby(ID_COLS).fillna(icustay_means).fillna(0)\n", 136 | " \n", 137 | " df_out.loc[:, idx[:, 'count']] = (df.loc[:, idx[:, 'count']] > 0).astype(float)\n", 138 | " df_out.rename(columns={'count': 'mask'}, level='Aggregation Function', inplace=True)\n", 139 | " \n", 140 | " is_absent = (1 - df_out.loc[:, idx[:, 'mask']])\n", 141 | " hours_of_absence = is_absent.cumsum()\n", 142 | " time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill')\n", 143 | " time_since_measured.rename(columns={'mask': 'time_since_measured'}, level='Aggregation Function', inplace=True)\n", 144 | "\n", 145 | " df_out = pd.concat((df_out, time_since_measured), axis=1)\n", 146 | " df_out.loc[:, idx[:, 'time_since_measured']] = df_out.loc[:, idx[:, 'time_since_measured']].fillna(100)\n", 147 | " \n", 148 | " df_out.sort_index(axis=1, inplace=True)\n", 149 | " return df_out" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "Ys = statics[statics.max_hours > WINDOW_SIZE + GAP_TIME][['mort_hosp', 'mort_icu', 'los_icu']]\n", 159 | "Ys['los_3'] = Ys['los_icu'] > 3\n", 160 | "Ys['los_7'] = Ys['los_icu'] > 7\n", 161 | "Ys.drop(columns=['los_icu'], inplace=True)\n", 162 | "Ys.astype(float)\n", 163 | "\n", 164 | "lvl2, raw = [df[\n", 165 | " (df.index.get_level_values('icustay_id').isin(set(Ys.index.get_level_values('icustay_id')))) &\n", 166 | " (df.index.get_level_values('hours_in') < WINDOW_SIZE)\n", 167 | "] for df in (data_full_lvl2, data_full_raw)]\n", 168 | "\n", 169 | "raw.columns = raw.columns.droplevel(level=['label', 'LEVEL1', 'LEVEL2'])\n", 170 | "\n", 171 | "train_frac, dev_frac, test_frac = 0.7, 0.1, 0.2\n", 172 | "lvl2_subj_idx, raw_subj_idx, Ys_subj_idx = [df.index.get_level_values('subject_id') for df in (lvl2, raw, Ys)]\n", 173 | "lvl2_subjects = set(lvl2_subj_idx)\n", 174 | "assert lvl2_subjects == set(Ys_subj_idx), \"Subject ID pools differ!\"\n", 175 | "assert lvl2_subjects == set(raw_subj_idx), \"Subject ID pools differ!\"\n", 176 | "\n", 177 | "np.random.seed(SEED)\n", 178 | "subjects, N = np.random.permutation(list(lvl2_subjects)), len(lvl2_subjects)\n", 179 | "N_train, N_dev, N_test = int(train_frac * N), int(dev_frac * N), int(test_frac * N)\n", 180 | "train_subj = subjects[:N_train]\n", 181 | "dev_subj = subjects[N_train:N_train + N_dev]\n", 182 | "test_subj = subjects[N_train+N_dev:]\n", 183 | "\n", 184 | "[(lvl2_train, lvl2_dev, lvl2_test), (raw_train, raw_dev, raw_test), (Ys_train, Ys_dev, Ys_test)] = [\n", 185 | " [df[df.index.get_level_values('subject_id').isin(s)] for s in (train_subj, dev_subj, test_subj)] \\\n", 186 | " for df in (lvl2, raw, Ys)\n", 187 | "]\n", 188 | "\n", 189 | "idx = pd.IndexSlice\n", 190 | "lvl2_means, lvl2_stds = lvl2_train.loc[:, idx[:,'mean']].mean(axis=0), lvl2_train.loc[:, idx[:,'mean']].std(axis=0)\n", 191 | "raw_means, raw_stds = raw_train.loc[:, idx[:,'mean']].mean(axis=0), raw_train.loc[:, idx[:,'mean']].std(axis=0)\n", 192 | "\n", 193 | "lvl2_train.loc[:, idx[:,'mean']] = (lvl2_train.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n", 194 | "lvl2_dev.loc[:, idx[:,'mean']] = (lvl2_dev.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n", 195 | "lvl2_test.loc[:, idx[:,'mean']] = (lvl2_test.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n", 196 | "\n", 197 | "raw_train.loc[:, idx[:,'mean']] = (raw_train.loc[:, idx[:,'mean']] - raw_means)/raw_stds\n", 198 | "raw_dev.loc[:, idx[:,'mean']] = (raw_dev.loc[:, idx[:,'mean']] - raw_means)/raw_stds\n", 199 | "raw_test.loc[:, idx[:,'mean']] = (raw_test.loc[:, idx[:,'mean']] - raw_means)/raw_stds" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test = [\n", 209 | " simple_imputer(df) for df in (raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test)\n", 210 | "]\n", 211 | "raw_flat_train, raw_flat_dev, raw_flat_test, lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test = [\n", 212 | " df.pivot_table(index=['subject_id', 'hadm_id', 'icustay_id'], columns=['hours_in']) for df in (\n", 213 | " raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test\n", 214 | " )\n", 215 | "]\n", 216 | "\n", 217 | "for df in lvl2_train, lvl2_dev, lvl2_test, raw_train, raw_dev, raw_test: assert not df.isnull().any().any()" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "### Task Prediction" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "#### Hyperparams" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": { 238 | "collapsed": true 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "N = 15\n", 243 | "\n", 244 | "LR_dist = DictDist({\n", 245 | " 'C': Choice(np.geomspace(1e-3, 1e3, 10000)),\n", 246 | " 'penalty': Choice(['l1', 'l2']),\n", 247 | " 'solver': Choice(['liblinear', 'lbfgs']),\n", 248 | " 'max_iter': Choice([100, 500])\n", 249 | "})\n", 250 | "np.random.seed(SEED)\n", 251 | "LR_hyperparams_list = LR_dist.rvs(N)\n", 252 | "for i in range(N):\n", 253 | " if LR_hyperparams_list[i]['solver'] == 'lbfgs': LR_hyperparams_list[i]['penalty'] = 'l2'\n", 254 | "\n", 255 | "RF_dist = DictDist({\n", 256 | " 'n_estimators': ss.randint(50, 500),\n", 257 | " 'max_depth': ss.randint(2, 10),\n", 258 | " 'min_samples_split': ss.randint(2, 75),\n", 259 | " 'min_samples_leaf': ss.randint(1, 50),\n", 260 | "})\n", 261 | "np.random.seed(SEED)\n", 262 | "RF_hyperparams_list = RF_dist.rvs(N)\n", 263 | "\n", 264 | "GRU_D_dist = DictDist({\n", 265 | " 'cell_size': ss.randint(50, 75),\n", 266 | " 'hidden_size': ss.randint(65, 95), \n", 267 | " 'learning_rate': ss.uniform(2e-3, 1e-1),\n", 268 | " 'num_epochs': ss.randint(15, 150),\n", 269 | " 'patience': ss.randint(3, 7),\n", 270 | " 'batch_size': ss.randint(35, 65),\n", 271 | " 'early_stop_frac': ss.uniform(0.05, 0.1),\n", 272 | " 'seed': ss.randint(1, 10000),\n", 273 | "})\n", 274 | "np.random.seed(SEED)\n", 275 | "GRU_D_hyperparams_list = GRU_D_dist.rvs(N)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": { 282 | "collapsed": true 283 | }, 284 | "outputs": [], 285 | "source": [ 286 | "def run_basic(model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, target):\n", 287 | " best_s, best_hyperparams = -np.Inf, None\n", 288 | " for i, hyperparams in enumerate(hyperparams_list):\n", 289 | " print(\"On sample %d / %d (hyperparams = %s)\" % (i+1, len(hyperparams_list), repr((hyperparams))))\n", 290 | " M = model(**hyperparams)\n", 291 | " M.fit(X_flat_train, Ys_train[target])\n", 292 | " s = roc_auc_score(Ys_dev[target], M.predict_proba(X_flat_dev)[:, 1])\n", 293 | " if s > best_s:\n", 294 | " best_s, best_hyperparams = s, hyperparams\n", 295 | " print(\"New Best Score: %.2f @ hyperparams = %s\" % (100*best_s, repr((best_hyperparams))))\n", 296 | "\n", 297 | " return run_only_final(model, best_hyperparams, X_flat_train, X_flat_dev, X_flat_test, target)\n", 298 | "\n", 299 | "def run_only_final(model, best_hyperparams, X_flat_train, X_flat_dev, X_flat_test, target):\n", 300 | " best_M = model(**best_hyperparams)\n", 301 | " best_M.fit(pd.concat((X_flat_train, X_flat_dev)), pd.concat((Ys_train, Ys_dev))[target])\n", 302 | " y_true = Ys_test[target]\n", 303 | " y_score = best_M.predict_proba(X_flat_test)[:, 1]\n", 304 | " y_pred = best_M.predict(X_flat_test)\n", 305 | "\n", 306 | " auc = roc_auc_score(y_true, y_score)\n", 307 | " auprc = average_precision_score(y_true, y_score)\n", 308 | " acc = accuracy_score(y_true, y_pred)\n", 309 | " F1 = f1_score(y_true, y_pred)\n", 310 | " \n", 311 | " return best_M, best_hyperparams, auc, auprc, acc, F1" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "### Sklearn" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": { 325 | "collapsed": true 326 | }, 327 | "outputs": [], 328 | "source": [ 329 | "RESULTS_PATH = '/scratch/mmd/extraction_baselines-sklearn.pkl'\n", 330 | "with open(RESULTS_PATH, mode='rb') as f: results = pickle.load(f)\n", 331 | " \n", 332 | "RERUN = True" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": null, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "for model_name, model, hyperparams_list in [\n", 342 | " ('RF', RandomForestClassifier, RF_hyperparams_list), ('LR', LogisticRegression, LR_hyperparams_list)\n", 343 | "]:\n", 344 | " if model_name not in results: results[model_name] = {}\n", 345 | " for t in ['mort_icu', 'los_3']:\n", 346 | " if t not in results[model_name]: results[model_name][t] = {}\n", 347 | " for n, X_flat_train, X_flat_dev, X_flat_test in (\n", 348 | " ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),\n", 349 | " ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)\n", 350 | " ):\n", 351 | " if n in results[model_name][t]:\n", 352 | " print(\"Finished model %s on target %s with representation %s\" % (model_name, t, n))\n", 353 | " if RERUN: \n", 354 | " h = results[model_name][t][n][1]\n", 355 | " results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)\n", 356 | " \n", 357 | " print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n", 358 | " print(results[model_name][t][n][2:])\n", 359 | "\n", 360 | " with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)\n", 361 | " continue\n", 362 | " \n", 363 | " print(\"Running model %s on target %s with representation %s\" % (model_name, t, n))\n", 364 | " results[model_name][t][n] = run_basic(\n", 365 | " model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t\n", 366 | " )\n", 367 | " print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n", 368 | " print(results[model_name][t][n][2:])\n", 369 | " with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": {}, 376 | "outputs": [], 377 | "source": [ 378 | "np.random.seed(SEED+1)\n", 379 | "LR_hyperparams_list_2 = LR_dist.rvs(45)\n", 380 | "for i in range(45):\n", 381 | " if LR_hyperparams_list_2[i]['solver'] == 'lbfgs': LR_hyperparams_list_2[i]['penalty'] = 'l2'\n", 382 | "\n", 383 | "results_2 = {}\n", 384 | "results_2_PATH = '/scratch/mmd/extraction_baselines-sklearn_LR_2_runs.pkl'\n", 385 | "\n", 386 | "for model_name, model, hyperparams_list in [\n", 387 | "# ('RF', RandomForestClassifier, RF_hyperparams_list),\n", 388 | " ('LR', LogisticRegression, LR_hyperparams_list_2)\n", 389 | "]:\n", 390 | " if model_name not in results_2: results_2[model_name] = {}\n", 391 | " for t in ['mort_icu', 'los_3']:\n", 392 | " if t not in results_2[model_name]: results_2[model_name][t] = {}\n", 393 | " for n, X_flat_train, X_flat_dev, X_flat_test in (\n", 394 | " ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),\n", 395 | "# ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)\n", 396 | " ):\n", 397 | " if n in results_2[model_name][t]:\n", 398 | " print(\"Finished model %s on target %s with representation %s\" % (model_name, t, n))\n", 399 | " if RERUN: \n", 400 | " h = results_2[model_name][t][n][1]\n", 401 | " results_2[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)\n", 402 | " \n", 403 | " print(\"Final results_2 for model %s on target %s with representation %s\" % (model_name, t, n))\n", 404 | " print(results_2[model_name][t][n][2:])\n", 405 | "\n", 406 | " with open(results_2_PATH, mode='wb') as f: pickle.dump(results_2, f)\n", 407 | " continue\n", 408 | " \n", 409 | " print(\"Running model %s on target %s with representation %s\" % (model_name, t, n))\n", 410 | " results_2[model_name][t][n] = run_basic(\n", 411 | " model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t\n", 412 | " )\n", 413 | " print(\"Final results_2 for model %s on target %s with representation %s\" % (model_name, t, n))\n", 414 | " print(results_2[model_name][t][n][2:])\n", 415 | " with open(results_2_PATH, mode='wb') as f: pickle.dump(results_2, f)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "metadata": { 422 | "scrolled": false 423 | }, 424 | "outputs": [], 425 | "source": [ 426 | "for model_name, model, hyperparams_list in [\n", 427 | "# ('RF', RandomForestClassifier, RF_hyperparams_list),\n", 428 | " ('LR', LogisticRegression, LR_hyperparams_list_2)\n", 429 | "]:\n", 430 | " if model_name not in results_2: results_2[model_name] = {}\n", 431 | " for t in ['mort_icu', 'los_3']:\n", 432 | " if t not in results_2[model_name]: results_2[model_name][t] = {}\n", 433 | " for n, X_flat_train, X_flat_dev, X_flat_test in (\n", 434 | "# ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),\n", 435 | " ('raw', raw_flat_train, raw_flat_dev, raw_flat_test),\n", 436 | " ):\n", 437 | " if n in results_2[model_name][t]:\n", 438 | " print(\"Finished model %s on target %s with representation %s\" % (model_name, t, n))\n", 439 | " if RERUN: \n", 440 | " h = results_2[model_name][t][n][1]\n", 441 | " results_2[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)\n", 442 | " \n", 443 | " print(\"Final results_2 for model %s on target %s with representation %s\" % (model_name, t, n))\n", 444 | " print(results_2[model_name][t][n][2:])\n", 445 | "\n", 446 | " with open(results_2_PATH, mode='wb') as f: pickle.dump(results_2, f)\n", 447 | " continue\n", 448 | " \n", 449 | " print(\"Running model %s on target %s with representation %s\" % (model_name, t, n))\n", 450 | " results_2[model_name][t][n] = run_basic(\n", 451 | " model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t\n", 452 | " )\n", 453 | " print(\"Final results_2 for model %s on target %s with representation %s\" % (model_name, t, n))\n", 454 | " print(results_2[model_name][t][n][2:])\n", 455 | " with open(results_2_PATH, mode='wb') as f: pickle.dump(results_2, f)" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "metadata": { 462 | "scrolled": false 463 | }, 464 | "outputs": [], 465 | "source": [ 466 | "for model_name, model, hyperparams_list in [\n", 467 | " ('RF', RandomForestClassifier, RF_hyperparams_list), ('LR', LogisticRegression, LR_hyperparams_list)\n", 468 | "]:\n", 469 | " if model_name not in results: results[model_name] = {}\n", 470 | " for t in ['mort_hosp', 'los_7']:\n", 471 | " if t not in results[model_name]: results[model_name][t] = {}\n", 472 | " for n, X_flat_train, X_flat_dev, X_flat_test in (\n", 473 | " ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),\n", 474 | " ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)\n", 475 | " ):\n", 476 | " if n in results[model_name][t]:\n", 477 | " print(\"Finished model %s on target %s with representation %s\" % (model_name, t, n))\n", 478 | " if RERUN: \n", 479 | " h = results[model_name][t][n][1]\n", 480 | " results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)\n", 481 | " \n", 482 | " print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n", 483 | " print(results[model_name][t][n][2:])\n", 484 | "\n", 485 | " with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)\n", 486 | " continue\n", 487 | " \n", 488 | " print(\"Running model %s on target %s with representation %s\" % (model_name, t, n))\n", 489 | " results[model_name][t][n] = run_basic(\n", 490 | " model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t\n", 491 | " )\n", 492 | " print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n", 493 | " print(results[model_name][t][n][2:])\n", 494 | " with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)" 495 | ] 496 | } 497 | ], 498 | "metadata": { 499 | "kernelspec": { 500 | "display_name": "Python 2", 501 | "language": "python", 502 | "name": "python2" 503 | }, 504 | "language_info": { 505 | "codemirror_mode": { 506 | "name": "ipython", 507 | "version": 2 508 | }, 509 | "file_extension": ".py", 510 | "mimetype": "text/x-python", 511 | "name": "python", 512 | "nbconvert_exporter": "python", 513 | "pygments_lexer": "ipython2", 514 | "version": "2.7.13" 515 | } 516 | }, 517 | "nbformat": 4, 518 | "nbformat_minor": 2 519 | } 520 | -------------------------------------------------------------------------------- /resources/testing_schemas.pkl: -------------------------------------------------------------------------------- 1 | (ccopy_reg 2 | _reconstructor 3 | p0 4 | (cpandas.core.frame 5 | DataFrame 6 | p1 7 | c__builtin__ 8 | object 9 | p2 10 | Ntp3 11 | Rp4 12 | (dp5 13 | S'_metadata' 14 | p6 15 | (lp7 16 | sS'_typ' 17 | p8 18 | S'dataframe' 19 | p9 20 | sS'_data' 21 | p10 22 | g0 23 | (cpandas.core.internals.managers 24 | BlockManager 25 | p11 26 | g2 27 | Ntp12 28 | Rp13 29 | ((lp14 30 | cpandas.core.indexes.base 31 | _new_Index 32 | p15 33 | (cpandas.core.indexes.base 34 | Index 35 | p16 36 | (dp17 37 | S'data' 38 | p18 39 | cnumpy.core.multiarray 40 | _reconstruct 41 | p19 42 | (cnumpy 43 | ndarray 44 | p20 45 | (I0 46 | tp21 47 | S'b' 48 | p22 49 | tp23 50 | Rp24 51 | (I1 52 | (I17 53 | tp25 54 | cnumpy 55 | dtype 56 | p26 57 | (S'O8' 58 | p27 59 | I0 60 | I1 61 | tp28 62 | Rp29 63 | (I3 64 | S'|' 65 | p30 66 | NNNI-1 67 | I-1 68 | I63 69 | tp31 70 | bI00 71 | (lp32 72 | Vsubject_id 73 | p33 74 | aVhadm_id 75 | p34 76 | aS'gender' 77 | p35 78 | aS'ethnicity' 79 | p36 80 | aS'age' 81 | p37 82 | aS'admittime' 83 | p38 84 | aS'dischtime' 85 | p39 86 | aS'deathtime' 87 | p40 88 | aS'intime' 89 | p41 90 | aS'outtime' 91 | p42 92 | aS'los_icu' 93 | p43 94 | aS'admission_type' 95 | p44 96 | aS'first_careunit' 97 | p45 98 | aS'mort_icu' 99 | p46 100 | aS'mort_hosp' 101 | p47 102 | aS'hospital_expire_flag' 103 | p48 104 | aS'hospstay_seq' 105 | p49 106 | atp50 107 | bsS'name' 108 | p51 109 | Nstp52 110 | Rp53 111 | ag15 112 | (cpandas.core.indexes.numeric 113 | Int64Index 114 | p54 115 | (dp55 116 | g18 117 | g19 118 | (g20 119 | (I0 120 | tp56 121 | g22 122 | tp57 123 | Rp58 124 | (I1 125 | (I0 126 | tp59 127 | g26 128 | (S'i8' 129 | p60 130 | I0 131 | I1 132 | tp61 133 | Rp62 134 | (I3 135 | S'<' 136 | p63 137 | NNNI-1 138 | I-1 139 | I0 140 | tp64 141 | bI00 142 | S'' 143 | p65 144 | tp66 145 | bsg51 146 | S'icustay_id' 147 | p67 148 | stp68 149 | Rp69 150 | a(lp70 151 | g0 152 | (cpandas.core.arrays.categorical 153 | Categorical 154 | p71 155 | g2 156 | Ntp72 157 | Rp73 158 | (dp74 159 | S'_cache' 160 | p75 161 | (dp76 162 | S'ndim' 163 | p77 164 | I1 165 | ssS'_codes' 166 | p78 167 | g19 168 | (g20 169 | (I0 170 | tp79 171 | g22 172 | tp80 173 | Rp81 174 | (I1 175 | (I0 176 | tp82 177 | g26 178 | (S'i1' 179 | p83 180 | I0 181 | I1 182 | tp84 183 | Rp85 184 | (I3 185 | S'|' 186 | p86 187 | NNNI-1 188 | I-1 189 | I0 190 | tp87 191 | bI00 192 | g65 193 | tp88 194 | bsS'_ordered' 195 | p89 196 | I00 197 | sS'_categories' 198 | p90 199 | g15 200 | (g16 201 | (dp91 202 | g18 203 | g19 204 | (g20 205 | (I0 206 | tp92 207 | g22 208 | tp93 209 | Rp94 210 | (I1 211 | (I2 212 | tp95 213 | g29 214 | I00 215 | (lp96 216 | S'F' 217 | p97 218 | aS'M' 219 | p98 220 | atp99 221 | bsg51 222 | Nstp100 223 | Rp101 224 | sS'_dtype' 225 | p102 226 | g0 227 | (cpandas.core.dtypes.dtypes 228 | CategoricalDtype 229 | p103 230 | g2 231 | Ntp104 232 | Rp105 233 | (dp106 234 | S'ordered' 235 | p107 236 | I00 237 | sS'categories' 238 | p108 239 | g101 240 | sbsbag0 241 | (g71 242 | g2 243 | Ntp109 244 | Rp110 245 | (dp111 246 | g75 247 | (dp112 248 | g77 249 | I1 250 | ssg78 251 | g19 252 | (g20 253 | (I0 254 | tp113 255 | g22 256 | tp114 257 | Rp115 258 | (I1 259 | (I0 260 | tp116 261 | g85 262 | I00 263 | g65 264 | tp117 265 | bsg89 266 | I00 267 | sg90 268 | g15 269 | (g16 270 | (dp118 271 | g18 272 | g19 273 | (g20 274 | (I0 275 | tp119 276 | g22 277 | tp120 278 | Rp121 279 | (I1 280 | (I41 281 | tp122 282 | g29 283 | I00 284 | (lp123 285 | S'AMERICAN INDIAN/ALASKA NATIVE' 286 | p124 287 | aS'AMERICAN INDIAN/ALASKA NATIVE FEDERALLY RECOGNIZED TRIBE' 288 | p125 289 | aS'ASIAN' 290 | p126 291 | aS'ASIAN - ASIAN INDIAN' 292 | p127 293 | aS'ASIAN - CAMBODIAN' 294 | p128 295 | aS'ASIAN - CHINESE' 296 | p129 297 | aS'ASIAN - FILIPINO' 298 | p130 299 | aS'ASIAN - JAPANESE' 300 | p131 301 | aS'ASIAN - KOREAN' 302 | p132 303 | aS'ASIAN - OTHER' 304 | p133 305 | aS'ASIAN - THAI' 306 | p134 307 | aS'ASIAN - VIETNAMESE' 308 | p135 309 | aS'BLACK/AFRICAN' 310 | p136 311 | aS'BLACK/AFRICAN AMERICAN' 312 | p137 313 | aS'BLACK/CAPE VERDEAN' 314 | p138 315 | aS'BLACK/HAITIAN' 316 | p139 317 | aS'CARIBBEAN ISLAND' 318 | p140 319 | aS'HISPANIC OR LATINO' 320 | p141 321 | aS'HISPANIC/LATINO - CENTRAL AMERICAN (OTHER)' 322 | p142 323 | aS'HISPANIC/LATINO - COLOMBIAN' 324 | p143 325 | aS'HISPANIC/LATINO - CUBAN' 326 | p144 327 | aS'HISPANIC/LATINO - DOMINICAN' 328 | p145 329 | aS'HISPANIC/LATINO - GUATEMALAN' 330 | p146 331 | aS'HISPANIC/LATINO - HONDURAN' 332 | p147 333 | aS'HISPANIC/LATINO - MEXICAN' 334 | p148 335 | aS'HISPANIC/LATINO - PUERTO RICAN' 336 | p149 337 | aS'HISPANIC/LATINO - SALVADORAN' 338 | p150 339 | aS'MIDDLE EASTERN' 340 | p151 341 | aS'MULTI RACE ETHNICITY' 342 | p152 343 | aS'NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER' 344 | p153 345 | aS'OTHER' 346 | p154 347 | aS'PATIENT DECLINED TO ANSWER' 348 | p155 349 | aS'PORTUGUESE' 350 | p156 351 | aS'SOUTH AMERICAN' 352 | p157 353 | aS'UNABLE TO OBTAIN' 354 | p158 355 | aS'UNKNOWN/NOT SPECIFIED' 356 | p159 357 | aS'WHITE' 358 | p160 359 | aS'WHITE - BRAZILIAN' 360 | p161 361 | aS'WHITE - EASTERN EUROPEAN' 362 | p162 363 | aS'WHITE - OTHER EUROPEAN' 364 | p163 365 | aS'WHITE - RUSSIAN' 366 | p164 367 | atp165 368 | bsg51 369 | Nstp166 370 | Rp167 371 | sg102 372 | g0 373 | (g103 374 | g2 375 | Ntp168 376 | Rp169 377 | (dp170 378 | g107 379 | I00 380 | sg108 381 | g167 382 | sbsbag0 383 | (g71 384 | g2 385 | Ntp171 386 | Rp172 387 | (dp173 388 | g75 389 | (dp174 390 | g77 391 | I1 392 | ssg78 393 | g19 394 | (g20 395 | (I0 396 | tp175 397 | g22 398 | tp176 399 | Rp177 400 | (I1 401 | (I0 402 | tp178 403 | g85 404 | I00 405 | g65 406 | tp179 407 | bsg89 408 | I00 409 | sg90 410 | g15 411 | (g16 412 | (dp180 413 | g18 414 | g19 415 | (g20 416 | (I0 417 | tp181 418 | g22 419 | tp182 420 | Rp183 421 | (I1 422 | (I3 423 | tp184 424 | g29 425 | I00 426 | (lp185 427 | S'ELECTIVE' 428 | p186 429 | aS'EMERGENCY' 430 | p187 431 | aS'URGENT' 432 | p188 433 | atp189 434 | bsg51 435 | Nstp190 436 | Rp191 437 | sg102 438 | g0 439 | (g103 440 | g2 441 | Ntp192 442 | Rp193 443 | (dp194 444 | g107 445 | I00 446 | sg108 447 | g191 448 | sbsbag0 449 | (g71 450 | g2 451 | Ntp195 452 | Rp196 453 | (dp197 454 | g75 455 | (dp198 456 | g77 457 | I1 458 | ssg78 459 | g19 460 | (g20 461 | (I0 462 | tp199 463 | g22 464 | tp200 465 | Rp201 466 | (I1 467 | (I0 468 | tp202 469 | g85 470 | I00 471 | g65 472 | tp203 473 | bsg89 474 | I00 475 | sg90 476 | g15 477 | (g16 478 | (dp204 479 | g18 480 | g19 481 | (g20 482 | (I0 483 | tp205 484 | g22 485 | tp206 486 | Rp207 487 | (I1 488 | (I5 489 | tp208 490 | g29 491 | I00 492 | (lp209 493 | S'CCU' 494 | p210 495 | aS'CSRU' 496 | p211 497 | aS'MICU' 498 | p212 499 | aS'SICU' 500 | p213 501 | aS'TSICU' 502 | p214 503 | atp215 504 | bsg51 505 | Nstp216 506 | Rp217 507 | sg102 508 | g0 509 | (g103 510 | g2 511 | Ntp218 512 | Rp219 513 | (dp220 514 | g107 515 | I00 516 | sg108 517 | g217 518 | sbsbag0 519 | (g71 520 | g2 521 | Ntp221 522 | Rp222 523 | (dp223 524 | g75 525 | (dp224 526 | g77 527 | I1 528 | ssg78 529 | g19 530 | (g20 531 | (I0 532 | tp225 533 | g22 534 | tp226 535 | Rp227 536 | (I1 537 | (I0 538 | tp228 539 | g85 540 | I00 541 | g65 542 | tp229 543 | bsg89 544 | I00 545 | sg90 546 | g15 547 | (g54 548 | (dp230 549 | g18 550 | g19 551 | (g20 552 | (I0 553 | tp231 554 | g22 555 | tp232 556 | Rp233 557 | (I1 558 | (I2 559 | tp234 560 | g62 561 | I00 562 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00' 563 | p235 564 | tp236 565 | bsg51 566 | Nstp237 567 | Rp238 568 | sg102 569 | g0 570 | (g103 571 | g2 572 | Ntp239 573 | Rp240 574 | (dp241 575 | g107 576 | I00 577 | sg108 578 | g238 579 | sbsbag0 580 | (g71 581 | g2 582 | Ntp242 583 | Rp243 584 | (dp244 585 | g75 586 | (dp245 587 | g77 588 | I1 589 | ssg78 590 | g19 591 | (g20 592 | (I0 593 | tp246 594 | g22 595 | tp247 596 | Rp248 597 | (I1 598 | (I0 599 | tp249 600 | g85 601 | I00 602 | g65 603 | tp250 604 | bsg89 605 | I00 606 | sg90 607 | g15 608 | (g54 609 | (dp251 610 | g18 611 | g19 612 | (g20 613 | (I0 614 | tp252 615 | g22 616 | tp253 617 | Rp254 618 | (I1 619 | (I2 620 | tp255 621 | g62 622 | I00 623 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00' 624 | p256 625 | tp257 626 | bsg51 627 | Nstp258 628 | Rp259 629 | sg102 630 | g0 631 | (g103 632 | g2 633 | Ntp260 634 | Rp261 635 | (dp262 636 | g107 637 | I00 638 | sg108 639 | g259 640 | sbsbag0 641 | (g71 642 | g2 643 | Ntp263 644 | Rp264 645 | (dp265 646 | g75 647 | (dp266 648 | g77 649 | I1 650 | ssg78 651 | g19 652 | (g20 653 | (I0 654 | tp267 655 | g22 656 | tp268 657 | Rp269 658 | (I1 659 | (I0 660 | tp270 661 | g85 662 | I00 663 | g65 664 | tp271 665 | bsg89 666 | I00 667 | sg90 668 | g15 669 | (g54 670 | (dp272 671 | g18 672 | g19 673 | (g20 674 | (I0 675 | tp273 676 | g22 677 | tp274 678 | Rp275 679 | (I1 680 | (I2 681 | tp276 682 | g62 683 | I00 684 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00' 685 | p277 686 | tp278 687 | bsg51 688 | Nstp279 689 | Rp280 690 | sg102 691 | g0 692 | (g103 693 | g2 694 | Ntp281 695 | Rp282 696 | (dp283 697 | g107 698 | I00 699 | sg108 700 | g280 701 | sbsbag0 702 | (g71 703 | g2 704 | Ntp284 705 | Rp285 706 | (dp286 707 | g75 708 | (dp287 709 | g77 710 | I1 711 | ssg78 712 | g19 713 | (g20 714 | (I0 715 | tp288 716 | g22 717 | tp289 718 | Rp290 719 | (I1 720 | (I0 721 | tp291 722 | g85 723 | I00 724 | g65 725 | tp292 726 | bsg89 727 | I00 728 | sg90 729 | g15 730 | (g54 731 | (dp293 732 | g18 733 | g19 734 | (g20 735 | (I0 736 | tp294 737 | g22 738 | tp295 739 | Rp296 740 | (I1 741 | (I1 742 | tp297 743 | g62 744 | I00 745 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 746 | p298 747 | tp299 748 | bsg51 749 | Nstp300 750 | Rp301 751 | sg102 752 | g0 753 | (g103 754 | g2 755 | Ntp302 756 | Rp303 757 | (dp304 758 | g107 759 | I00 760 | sg108 761 | g301 762 | sbsbag19 763 | (g20 764 | (I0 765 | tp305 766 | g22 767 | tp306 768 | Rp307 769 | (I1 770 | (I5 771 | I0 772 | tp308 773 | g26 774 | (S'M8' 775 | p309 776 | I0 777 | I1 778 | tp310 779 | Rp311 780 | (I4 781 | S'<' 782 | p312 783 | NNNI-1 784 | I-1 785 | I0 786 | ((dp313 787 | (S'ns' 788 | p314 789 | I1 790 | I1 791 | I1 792 | tp315 793 | tp316 794 | tp317 795 | bI00 796 | g65 797 | tp318 798 | bag19 799 | (g20 800 | (I0 801 | tp319 802 | g22 803 | tp320 804 | Rp321 805 | (I1 806 | (I2 807 | I0 808 | tp322 809 | g26 810 | (S'f8' 811 | p323 812 | I0 813 | I1 814 | tp324 815 | Rp325 816 | (I3 817 | S'<' 818 | p326 819 | NNNI-1 820 | I-1 821 | I0 822 | tp327 823 | bI00 824 | g65 825 | tp328 826 | bag19 827 | (g20 828 | (I0 829 | tp329 830 | g22 831 | tp330 832 | Rp331 833 | (I1 834 | (I2 835 | I0 836 | tp332 837 | g62 838 | I00 839 | g65 840 | tp333 841 | ba(lp334 842 | g15 843 | (g16 844 | (dp335 845 | g18 846 | g19 847 | (g20 848 | (I0 849 | tp336 850 | g22 851 | tp337 852 | Rp338 853 | (I1 854 | (I1 855 | tp339 856 | g29 857 | I00 858 | (lp340 859 | g35 860 | atp341 861 | bsg51 862 | Nstp342 863 | Rp343 864 | ag15 865 | (g16 866 | (dp344 867 | g18 868 | g19 869 | (g20 870 | (I0 871 | tp345 872 | g22 873 | tp346 874 | Rp347 875 | (I1 876 | (I1 877 | tp348 878 | g29 879 | I00 880 | (lp349 881 | g36 882 | atp350 883 | bsg51 884 | Nstp351 885 | Rp352 886 | ag15 887 | (g16 888 | (dp353 889 | g18 890 | g19 891 | (g20 892 | (I0 893 | tp354 894 | g22 895 | tp355 896 | Rp356 897 | (I1 898 | (I1 899 | tp357 900 | g29 901 | I00 902 | (lp358 903 | g44 904 | atp359 905 | bsg51 906 | Nstp360 907 | Rp361 908 | ag15 909 | (g16 910 | (dp362 911 | g18 912 | g19 913 | (g20 914 | (I0 915 | tp363 916 | g22 917 | tp364 918 | Rp365 919 | (I1 920 | (I1 921 | tp366 922 | g29 923 | I00 924 | (lp367 925 | g45 926 | atp368 927 | bsg51 928 | Nstp369 929 | Rp370 930 | ag15 931 | (g16 932 | (dp371 933 | g18 934 | g19 935 | (g20 936 | (I0 937 | tp372 938 | g22 939 | tp373 940 | Rp374 941 | (I1 942 | (I1 943 | tp375 944 | g29 945 | I00 946 | (lp376 947 | g46 948 | atp377 949 | bsg51 950 | Nstp378 951 | Rp379 952 | ag15 953 | (g16 954 | (dp380 955 | g18 956 | g19 957 | (g20 958 | (I0 959 | tp381 960 | g22 961 | tp382 962 | Rp383 963 | (I1 964 | (I1 965 | tp384 966 | g29 967 | I00 968 | (lp385 969 | g47 970 | atp386 971 | bsg51 972 | Nstp387 973 | Rp388 974 | ag15 975 | (g16 976 | (dp389 977 | g18 978 | g19 979 | (g20 980 | (I0 981 | tp390 982 | g22 983 | tp391 984 | Rp392 985 | (I1 986 | (I1 987 | tp393 988 | g29 989 | I00 990 | (lp394 991 | g48 992 | atp395 993 | bsg51 994 | Nstp396 995 | Rp397 996 | ag15 997 | (g16 998 | (dp398 999 | g18 1000 | g19 1001 | (g20 1002 | (I0 1003 | tp399 1004 | g22 1005 | tp400 1006 | Rp401 1007 | (I1 1008 | (I1 1009 | tp402 1010 | g29 1011 | I00 1012 | (lp403 1013 | g49 1014 | atp404 1015 | bsg51 1016 | Nstp405 1017 | Rp406 1018 | ag15 1019 | (g16 1020 | (dp407 1021 | g18 1022 | g19 1023 | (g20 1024 | (I0 1025 | tp408 1026 | g22 1027 | tp409 1028 | Rp410 1029 | (I1 1030 | (I5 1031 | tp411 1032 | g29 1033 | I00 1034 | (lp412 1035 | g38 1036 | ag39 1037 | ag40 1038 | ag41 1039 | ag42 1040 | atp413 1041 | bsg51 1042 | Nstp414 1043 | Rp415 1044 | ag15 1045 | (g16 1046 | (dp416 1047 | g18 1048 | g19 1049 | (g20 1050 | (I0 1051 | tp417 1052 | g22 1053 | tp418 1054 | Rp419 1055 | (I1 1056 | (I2 1057 | tp420 1058 | g29 1059 | I00 1060 | (lp421 1061 | g37 1062 | ag43 1063 | atp422 1064 | bsg51 1065 | Nstp423 1066 | Rp424 1067 | ag15 1068 | (g16 1069 | (dp425 1070 | g18 1071 | g19 1072 | (g20 1073 | (I0 1074 | tp426 1075 | g22 1076 | tp427 1077 | Rp428 1078 | (I1 1079 | (I2 1080 | tp429 1081 | g29 1082 | I00 1083 | (lp430 1084 | g33 1085 | ag34 1086 | atp431 1087 | bsg51 1088 | Nstp432 1089 | Rp433 1090 | a(dp434 1091 | S'0.14.1' 1092 | p435 1093 | (dp436 1094 | S'axes' 1095 | p437 1096 | g14 1097 | sS'blocks' 1098 | p438 1099 | (lp439 1100 | (dp440 1101 | S'mgr_locs' 1102 | p441 1103 | c__builtin__ 1104 | slice 1105 | p442 1106 | (I2 1107 | I3 1108 | I1 1109 | tp443 1110 | Rp444 1111 | sS'values' 1112 | p445 1113 | g73 1114 | sa(dp446 1115 | g441 1116 | g442 1117 | (I3 1118 | I4 1119 | I1 1120 | tp447 1121 | Rp448 1122 | sg445 1123 | g110 1124 | sa(dp449 1125 | g441 1126 | g442 1127 | (I11 1128 | I12 1129 | I1 1130 | tp450 1131 | Rp451 1132 | sg445 1133 | g172 1134 | sa(dp452 1135 | g441 1136 | g442 1137 | (I12 1138 | I13 1139 | I1 1140 | tp453 1141 | Rp454 1142 | sg445 1143 | g196 1144 | sa(dp455 1145 | g441 1146 | g442 1147 | (I13 1148 | I14 1149 | I1 1150 | tp456 1151 | Rp457 1152 | sg445 1153 | g222 1154 | sa(dp458 1155 | g441 1156 | g442 1157 | (I14 1158 | I15 1159 | I1 1160 | tp459 1161 | Rp460 1162 | sg445 1163 | g243 1164 | sa(dp461 1165 | g441 1166 | g442 1167 | (I15 1168 | I16 1169 | I1 1170 | tp462 1171 | Rp463 1172 | sg445 1173 | g264 1174 | sa(dp464 1175 | g441 1176 | g442 1177 | (I16 1178 | I17 1179 | I1 1180 | tp465 1181 | Rp466 1182 | sg445 1183 | g285 1184 | sa(dp467 1185 | g441 1186 | g442 1187 | (I5 1188 | I10 1189 | I1 1190 | tp468 1191 | Rp469 1192 | sg445 1193 | g307 1194 | sa(dp470 1195 | g441 1196 | g442 1197 | (I4 1198 | I16 1199 | I6 1200 | tp471 1201 | Rp472 1202 | sg445 1203 | g321 1204 | sa(dp473 1205 | g441 1206 | g442 1207 | (I0 1208 | I2 1209 | I1 1210 | tp474 1211 | Rp475 1212 | sg445 1213 | g331 1214 | sasstp476 1215 | bsbg0 1216 | (g1 1217 | g2 1218 | Ntp477 1219 | Rp478 1220 | (dp479 1221 | g6 1222 | g7 1223 | sg8 1224 | g9 1225 | sg10 1226 | g0 1227 | (g11 1228 | g2 1229 | Ntp480 1230 | Rp481 1231 | ((lp482 1232 | g15 1233 | (g16 1234 | (dp483 1235 | g18 1236 | g19 1237 | (g20 1238 | (I0 1239 | tp484 1240 | g22 1241 | tp485 1242 | Rp486 1243 | (I1 1244 | (I7 1245 | tp487 1246 | g29 1247 | I00 1248 | (lp488 1249 | S'subject_id' 1250 | p489 1251 | aS'hadm_id' 1252 | p490 1253 | aS'icustay_id' 1254 | p491 1255 | aS'charttime' 1256 | p492 1257 | aS'itemid' 1258 | p493 1259 | aS'value' 1260 | p494 1261 | aS'valueuom' 1262 | p495 1263 | atp496 1264 | bsg51 1265 | Nstp497 1266 | Rp498 1267 | ag15 1268 | (g54 1269 | (dp499 1270 | g18 1271 | g19 1272 | (g20 1273 | (I0 1274 | tp500 1275 | g22 1276 | tp501 1277 | Rp502 1278 | (I1 1279 | (I0 1280 | tp503 1281 | g62 1282 | I00 1283 | g65 1284 | tp504 1285 | bsg51 1286 | Nstp505 1287 | Rp506 1288 | a(lp507 1289 | g19 1290 | (g20 1291 | (I0 1292 | tp508 1293 | g22 1294 | tp509 1295 | Rp510 1296 | (I1 1297 | (I4 1298 | I0 1299 | tp511 1300 | g62 1301 | I00 1302 | g65 1303 | tp512 1304 | bag19 1305 | (g20 1306 | (I0 1307 | tp513 1308 | g22 1309 | tp514 1310 | Rp515 1311 | (I1 1312 | (I1 1313 | I0 1314 | tp516 1315 | g26 1316 | (S'M8' 1317 | p517 1318 | I0 1319 | I1 1320 | tp518 1321 | Rp519 1322 | (I4 1323 | S'<' 1324 | p520 1325 | NNNI-1 1326 | I-1 1327 | I0 1328 | ((dp521 1329 | (S'ns' 1330 | p522 1331 | I1 1332 | I1 1333 | I1 1334 | tp523 1335 | tp524 1336 | tp525 1337 | bI00 1338 | g65 1339 | tp526 1340 | bag19 1341 | (g20 1342 | (I0 1343 | tp527 1344 | g22 1345 | tp528 1346 | Rp529 1347 | (I1 1348 | (I2 1349 | I0 1350 | tp530 1351 | g29 1352 | I00 1353 | (lp531 1354 | tp532 1355 | ba(lp533 1356 | g15 1357 | (g16 1358 | (dp534 1359 | g18 1360 | g19 1361 | (g20 1362 | (I0 1363 | tp535 1364 | g22 1365 | tp536 1366 | Rp537 1367 | (I1 1368 | (I4 1369 | tp538 1370 | g29 1371 | I00 1372 | (lp539 1373 | g489 1374 | ag490 1375 | ag491 1376 | ag493 1377 | atp540 1378 | bsg51 1379 | Nstp541 1380 | Rp542 1381 | ag15 1382 | (g16 1383 | (dp543 1384 | g18 1385 | g19 1386 | (g20 1387 | (I0 1388 | tp544 1389 | g22 1390 | tp545 1391 | Rp546 1392 | (I1 1393 | (I1 1394 | tp547 1395 | g29 1396 | I00 1397 | (lp548 1398 | g492 1399 | atp549 1400 | bsg51 1401 | Nstp550 1402 | Rp551 1403 | ag15 1404 | (g16 1405 | (dp552 1406 | g18 1407 | g19 1408 | (g20 1409 | (I0 1410 | tp553 1411 | g22 1412 | tp554 1413 | Rp555 1414 | (I1 1415 | (I2 1416 | tp556 1417 | g29 1418 | I00 1419 | (lp557 1420 | g494 1421 | ag495 1422 | atp558 1423 | bsg51 1424 | Nstp559 1425 | Rp560 1426 | a(dp561 1427 | g435 1428 | (dp562 1429 | g437 1430 | g482 1431 | sg438 1432 | (lp563 1433 | (dp564 1434 | g441 1435 | g19 1436 | (g20 1437 | (I0 1438 | tp565 1439 | g22 1440 | tp566 1441 | Rp567 1442 | (I1 1443 | (I4 1444 | tp568 1445 | g62 1446 | I00 1447 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00' 1448 | p569 1449 | tp570 1450 | bsg445 1451 | g510 1452 | sa(dp571 1453 | g441 1454 | g442 1455 | (I3 1456 | I4 1457 | I1 1458 | tp572 1459 | Rp573 1460 | sg445 1461 | g515 1462 | sa(dp574 1463 | g441 1464 | g442 1465 | (I5 1466 | I7 1467 | I1 1468 | tp575 1469 | Rp576 1470 | sg445 1471 | g529 1472 | sasstp577 1473 | bsbg0 1474 | (g1 1475 | g2 1476 | Ntp578 1477 | Rp579 1478 | (dp580 1479 | g6 1480 | g7 1481 | sg8 1482 | g9 1483 | sg10 1484 | g0 1485 | (g11 1486 | g2 1487 | Ntp581 1488 | Rp582 1489 | ((lp583 1490 | g15 1491 | (g16 1492 | (dp584 1493 | g18 1494 | g19 1495 | (g20 1496 | (I0 1497 | tp585 1498 | g22 1499 | tp586 1500 | Rp587 1501 | (I1 1502 | (I5 1503 | tp588 1504 | g29 1505 | I00 1506 | (lp589 1507 | S'label' 1508 | p590 1509 | aS'dbsource' 1510 | p591 1511 | aS'linksto' 1512 | p592 1513 | aS'category' 1514 | p593 1515 | aS'unitname' 1516 | p594 1517 | atp595 1518 | bsg51 1519 | Nstp596 1520 | Rp597 1521 | ag15 1522 | (g54 1523 | (dp598 1524 | g18 1525 | g19 1526 | (g20 1527 | (I0 1528 | tp599 1529 | g22 1530 | tp600 1531 | Rp601 1532 | (I1 1533 | (I0 1534 | tp602 1535 | g62 1536 | I00 1537 | g65 1538 | tp603 1539 | bsg51 1540 | S'itemid' 1541 | p604 1542 | stp605 1543 | Rp606 1544 | a(lp607 1545 | g19 1546 | (g20 1547 | (I0 1548 | tp608 1549 | g22 1550 | tp609 1551 | Rp610 1552 | (I1 1553 | (I5 1554 | I0 1555 | tp611 1556 | g29 1557 | I00 1558 | (lp612 1559 | tp613 1560 | ba(lp614 1561 | g15 1562 | (g16 1563 | (dp615 1564 | g18 1565 | g19 1566 | (g20 1567 | (I0 1568 | tp616 1569 | g22 1570 | tp617 1571 | Rp618 1572 | (I1 1573 | (I5 1574 | tp619 1575 | g29 1576 | I00 1577 | (lp620 1578 | g590 1579 | ag591 1580 | ag592 1581 | ag593 1582 | ag594 1583 | atp621 1584 | bsg51 1585 | Nstp622 1586 | Rp623 1587 | a(dp624 1588 | g435 1589 | (dp625 1590 | g437 1591 | g583 1592 | sg438 1593 | (lp626 1594 | (dp627 1595 | g441 1596 | g442 1597 | (I0 1598 | I5 1599 | I1 1600 | tp628 1601 | Rp629 1602 | sg445 1603 | g610 1604 | sasstp630 1605 | bsbg0 1606 | (g1 1607 | g2 1608 | Ntp631 1609 | Rp632 1610 | (dp633 1611 | g6 1612 | g7 1613 | sg8 1614 | g9 1615 | sg10 1616 | g0 1617 | (g11 1618 | g2 1619 | Ntp634 1620 | Rp635 1621 | ((lp636 1622 | g15 1623 | (g16 1624 | (dp637 1625 | g18 1626 | g19 1627 | (g20 1628 | (I0 1629 | tp638 1630 | g22 1631 | tp639 1632 | Rp640 1633 | (I1 1634 | (I5 1635 | tp641 1636 | g29 1637 | I00 1638 | (lp642 1639 | S'OUTLIER_LOW' 1640 | p643 1641 | aS'VALID_LOW' 1642 | p644 1643 | aS'IMPUTE' 1644 | p645 1645 | aS'VALID_HIGH' 1646 | p646 1647 | aS'OUTLIER_HIGH' 1648 | p647 1649 | atp648 1650 | bsg51 1651 | Nstp649 1652 | Rp650 1653 | ag15 1654 | (g16 1655 | (dp651 1656 | g18 1657 | g19 1658 | (g20 1659 | (I0 1660 | tp652 1661 | g22 1662 | tp653 1663 | Rp654 1664 | (I1 1665 | (I0 1666 | tp655 1667 | g29 1668 | I00 1669 | (lp656 1670 | tp657 1671 | bsg51 1672 | S'VARIABLE' 1673 | p658 1674 | stp659 1675 | Rp660 1676 | a(lp661 1677 | g19 1678 | (g20 1679 | (I0 1680 | tp662 1681 | g22 1682 | tp663 1683 | Rp664 1684 | (I1 1685 | (I5 1686 | I0 1687 | tp665 1688 | g325 1689 | I00 1690 | g65 1691 | tp666 1692 | ba(lp667 1693 | g15 1694 | (g16 1695 | (dp668 1696 | g18 1697 | g19 1698 | (g20 1699 | (I0 1700 | tp669 1701 | g22 1702 | tp670 1703 | Rp671 1704 | (I1 1705 | (I5 1706 | tp672 1707 | g29 1708 | I00 1709 | (lp673 1710 | g643 1711 | ag644 1712 | ag645 1713 | ag646 1714 | ag647 1715 | atp674 1716 | bsg51 1717 | Nstp675 1718 | Rp676 1719 | a(dp677 1720 | g435 1721 | (dp678 1722 | g437 1723 | g636 1724 | sg438 1725 | (lp679 1726 | (dp680 1727 | g441 1728 | g442 1729 | (I0 1730 | I5 1731 | I1 1732 | tp681 1733 | Rp682 1734 | sg445 1735 | g664 1736 | sasstp683 1737 | bsbg0 1738 | (g1 1739 | g2 1740 | Ntp684 1741 | Rp685 1742 | (dp686 1743 | g6 1744 | g7 1745 | sg8 1746 | g9 1747 | sg10 1748 | g0 1749 | (g11 1750 | g2 1751 | Ntp687 1752 | Rp688 1753 | ((lp689 1754 | g15 1755 | (g16 1756 | (dp690 1757 | g18 1758 | g19 1759 | (g20 1760 | (I0 1761 | tp691 1762 | g22 1763 | tp692 1764 | Rp693 1765 | (I1 1766 | (I18 1767 | tp694 1768 | g29 1769 | I00 1770 | (lp695 1771 | S'LEVEL2' 1772 | p696 1773 | aS'LEVEL1' 1774 | p697 1775 | aS'ALTERNATIVE' 1776 | p698 1777 | aS'STATUS' 1778 | p699 1779 | aS'STATUS NOTE' 1780 | p700 1781 | aS'ITEMID' 1782 | p701 1783 | aS'MIMIC LABEL' 1784 | p702 1785 | aS'UNITNAME' 1786 | p703 1787 | aS'LINKSTO' 1788 | p704 1789 | aS'COUNT' 1790 | p705 1791 | aS'CATEGORY' 1792 | p706 1793 | aS'CONCEPTID' 1794 | p707 1795 | aS'FLUID' 1796 | p708 1797 | aS'LOINC_CODE' 1798 | p709 1799 | aS'DBSOURCE' 1800 | p710 1801 | aS'Unnamed: 15' 1802 | p711 1803 | aS'PARAM_TYPE' 1804 | p712 1805 | aS'NOTE' 1806 | p713 1807 | atp714 1808 | bsg51 1809 | Nstp715 1810 | Rp716 1811 | ag15 1812 | (g54 1813 | (dp717 1814 | g18 1815 | g19 1816 | (g20 1817 | (I0 1818 | tp718 1819 | g22 1820 | tp719 1821 | Rp720 1822 | (I1 1823 | (I0 1824 | tp721 1825 | g62 1826 | I00 1827 | g65 1828 | tp722 1829 | bsg51 1830 | Nstp723 1831 | Rp724 1832 | a(lp725 1833 | g19 1834 | (g20 1835 | (I0 1836 | tp726 1837 | g22 1838 | tp727 1839 | Rp728 1840 | (I1 1841 | (I17 1842 | I0 1843 | tp729 1844 | g29 1845 | I00 1846 | (lp730 1847 | tp731 1848 | bag19 1849 | (g20 1850 | (I0 1851 | tp732 1852 | g22 1853 | tp733 1854 | Rp734 1855 | (I1 1856 | (I1 1857 | I0 1858 | tp735 1859 | g62 1860 | I00 1861 | g65 1862 | tp736 1863 | ba(lp737 1864 | g15 1865 | (g16 1866 | (dp738 1867 | g18 1868 | g19 1869 | (g20 1870 | (I0 1871 | tp739 1872 | g22 1873 | tp740 1874 | Rp741 1875 | (I1 1876 | (I17 1877 | tp742 1878 | g29 1879 | I00 1880 | (lp743 1881 | g696 1882 | ag697 1883 | ag698 1884 | ag699 1885 | ag700 1886 | ag702 1887 | ag703 1888 | ag704 1889 | ag705 1890 | ag706 1891 | ag707 1892 | ag708 1893 | ag709 1894 | ag710 1895 | ag711 1896 | ag712 1897 | ag713 1898 | atp744 1899 | bsg51 1900 | Nstp745 1901 | Rp746 1902 | ag15 1903 | (g16 1904 | (dp747 1905 | g18 1906 | g19 1907 | (g20 1908 | (I0 1909 | tp748 1910 | g22 1911 | tp749 1912 | Rp750 1913 | (I1 1914 | (I1 1915 | tp751 1916 | g29 1917 | I00 1918 | (lp752 1919 | g701 1920 | atp753 1921 | bsg51 1922 | Nstp754 1923 | Rp755 1924 | a(dp756 1925 | g435 1926 | (dp757 1927 | g437 1928 | g689 1929 | sg438 1930 | (lp758 1931 | (dp759 1932 | g441 1933 | g19 1934 | (g20 1935 | (I0 1936 | tp760 1937 | g22 1938 | tp761 1939 | Rp762 1940 | (I1 1941 | (I17 1942 | tp763 1943 | g62 1944 | I00 1945 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x11\x00\x00\x00\x00\x00\x00\x00' 1946 | p764 1947 | tp765 1948 | bsg445 1949 | g728 1950 | sa(dp766 1951 | g441 1952 | g442 1953 | (I5 1954 | I6 1955 | I1 1956 | tp767 1957 | Rp768 1958 | sg445 1959 | g734 1960 | sasstp769 1961 | bsbg0 1962 | (g1 1963 | g2 1964 | Ntp770 1965 | Rp771 1966 | (dp772 1967 | g6 1968 | (lp773 1969 | sg8 1970 | S'dataframe' 1971 | p774 1972 | sg10 1973 | g0 1974 | (g11 1975 | g2 1976 | Ntp775 1977 | Rp776 1978 | ((lp777 1979 | g15 1980 | (cpandas.core.indexes.multi 1981 | MultiIndex 1982 | p778 1983 | (dp779 1984 | S'codes' 1985 | p780 1986 | (lp781 1987 | g19 1988 | (cpandas.core.indexes.frozen 1989 | FrozenNDArray 1990 | p782 1991 | (I0 1992 | tp783 1993 | g22 1994 | tp784 1995 | Rp785 1996 | (I1 1997 | (I2 1998 | tp786 1999 | g26 2000 | (S'i1' 2001 | p787 2002 | I0 2003 | I1 2004 | tp788 2005 | Rp789 2006 | (I3 2007 | S'|' 2008 | p790 2009 | NNNI-1 2010 | I-1 2011 | I0 2012 | tp791 2013 | bI00 2014 | S'\x00\x00' 2015 | p792 2016 | tp793 2017 | bag19 2018 | (g782 2019 | (I0 2020 | tp794 2021 | g22 2022 | tp795 2023 | Rp796 2024 | (I1 2025 | (I2 2026 | tp797 2027 | g789 2028 | I00 2029 | S'\x01\x02' 2030 | p798 2031 | tp799 2032 | basS'names' 2033 | p800 2034 | (lp801 2035 | S'LEVEL2' 2036 | p802 2037 | aS'Aggregation Function' 2038 | p803 2039 | asS'levels' 2040 | p804 2041 | (lp805 2042 | g15 2043 | (g16 2044 | (dp806 2045 | g18 2046 | g19 2047 | (g20 2048 | (I0 2049 | tp807 2050 | g22 2051 | tp808 2052 | Rp809 2053 | (I1 2054 | (I1 2055 | tp810 2056 | g26 2057 | (S'O8' 2058 | p811 2059 | I0 2060 | I1 2061 | tp812 2062 | Rp813 2063 | (I3 2064 | S'|' 2065 | p814 2066 | NNNI-1 2067 | I-1 2068 | I63 2069 | tp815 2070 | bI00 2071 | (lp816 2072 | S'test_level2' 2073 | p817 2074 | atp818 2075 | bsg51 2076 | g802 2077 | stp819 2078 | Rp820 2079 | ag15 2080 | (g16 2081 | (dp821 2082 | g18 2083 | g19 2084 | (g20 2085 | (I0 2086 | tp822 2087 | g22 2088 | tp823 2089 | Rp824 2090 | (I1 2091 | (I4 2092 | tp825 2093 | g813 2094 | I00 2095 | (lp826 2096 | g65 2097 | aS'count' 2098 | p827 2099 | aS'mean' 2100 | p828 2101 | aS'std' 2102 | p829 2103 | atp830 2104 | bsg51 2105 | g803 2106 | stp831 2107 | Rp832 2108 | asS'sortorder' 2109 | p833 2110 | Nstp834 2111 | Rp835 2112 | ag15 2113 | (g778 2114 | (dp836 2115 | g780 2116 | (lp837 2117 | g19 2118 | (g782 2119 | (I0 2120 | tp838 2121 | g22 2122 | tp839 2123 | Rp840 2124 | (I1 2125 | (I0 2126 | tp841 2127 | g789 2128 | I00 2129 | g65 2130 | tp842 2131 | bag19 2132 | (g782 2133 | (I0 2134 | tp843 2135 | g22 2136 | tp844 2137 | Rp845 2138 | (I1 2139 | (I0 2140 | tp846 2141 | g789 2142 | I00 2143 | g65 2144 | tp847 2145 | bag19 2146 | (g782 2147 | (I0 2148 | tp848 2149 | g22 2150 | tp849 2151 | Rp850 2152 | (I1 2153 | (I0 2154 | tp851 2155 | g789 2156 | I00 2157 | g65 2158 | tp852 2159 | bag19 2160 | (g782 2161 | (I0 2162 | tp853 2163 | g22 2164 | tp854 2165 | Rp855 2166 | (I1 2167 | (I0 2168 | tp856 2169 | g789 2170 | I00 2171 | g65 2172 | tp857 2173 | basg800 2174 | (lp858 2175 | S'subject_id' 2176 | p859 2177 | aS'hadm_id' 2178 | p860 2179 | aS'icustay_id' 2180 | p861 2181 | aS'hours_in' 2182 | p862 2183 | asg804 2184 | (lp863 2185 | g15 2186 | (g54 2187 | (dp864 2188 | g18 2189 | g19 2190 | (g20 2191 | (I0 2192 | tp865 2193 | g22 2194 | tp866 2195 | Rp867 2196 | (I1 2197 | (I1 2198 | tp868 2199 | g26 2200 | (S'i8' 2201 | p869 2202 | I0 2203 | I1 2204 | tp870 2205 | Rp871 2206 | (I3 2207 | S'<' 2208 | p872 2209 | NNNI-1 2210 | I-1 2211 | I0 2212 | tp873 2213 | bI00 2214 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 2215 | p874 2216 | tp875 2217 | bsg51 2218 | g859 2219 | stp876 2220 | Rp877 2221 | ag15 2222 | (g54 2223 | (dp878 2224 | g18 2225 | g19 2226 | (g20 2227 | (I0 2228 | tp879 2229 | g22 2230 | tp880 2231 | Rp881 2232 | (I1 2233 | (I1 2234 | tp882 2235 | g871 2236 | I00 2237 | S'\x01\x00\x00\x00\x00\x00\x00\x00' 2238 | p883 2239 | tp884 2240 | bsg51 2241 | g860 2242 | stp885 2243 | Rp886 2244 | ag15 2245 | (g54 2246 | (dp887 2247 | g18 2248 | g19 2249 | (g20 2250 | (I0 2251 | tp888 2252 | g22 2253 | tp889 2254 | Rp890 2255 | (I1 2256 | (I1 2257 | tp891 2258 | g871 2259 | I00 2260 | S'\x00\x00\x00\x00\x00\x00\x00\x00' 2261 | p892 2262 | tp893 2263 | bsg51 2264 | g861 2265 | stp894 2266 | Rp895 2267 | ag15 2268 | (g54 2269 | (dp896 2270 | g18 2271 | g19 2272 | (g20 2273 | (I0 2274 | tp897 2275 | g22 2276 | tp898 2277 | Rp899 2278 | (I1 2279 | (I6 2280 | tp900 2281 | g871 2282 | I00 2283 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00' 2284 | p901 2285 | tp902 2286 | bsg51 2287 | g862 2288 | stp903 2289 | Rp904 2290 | asg833 2291 | Nstp905 2292 | Rp906 2293 | a(lp907 2294 | g19 2295 | (g20 2296 | (I0 2297 | tp908 2298 | g22 2299 | tp909 2300 | Rp910 2301 | (I1 2302 | (I2 2303 | I0 2304 | tp911 2305 | g26 2306 | (S'f8' 2307 | p912 2308 | I0 2309 | I1 2310 | tp913 2311 | Rp914 2312 | (I3 2313 | S'<' 2314 | p915 2315 | NNNI-1 2316 | I-1 2317 | I0 2318 | tp916 2319 | bI00 2320 | g65 2321 | tp917 2322 | ba(lp918 2323 | g15 2324 | (g778 2325 | (dp919 2326 | g780 2327 | (lp920 2328 | g19 2329 | (g782 2330 | (I0 2331 | tp921 2332 | g22 2333 | tp922 2334 | Rp923 2335 | (I1 2336 | (I2 2337 | tp924 2338 | g789 2339 | I00 2340 | S'\x00\x00' 2341 | p925 2342 | tp926 2343 | bag19 2344 | (g782 2345 | (I0 2346 | tp927 2347 | g22 2348 | tp928 2349 | Rp929 2350 | (I1 2351 | (I2 2352 | tp930 2353 | g789 2354 | I00 2355 | S'\x01\x02' 2356 | p931 2357 | tp932 2358 | basg800 2359 | (lp933 2360 | g802 2361 | ag803 2362 | asg804 2363 | (lp934 2364 | g15 2365 | (g16 2366 | (dp935 2367 | g18 2368 | g19 2369 | (g20 2370 | (I0 2371 | tp936 2372 | g22 2373 | tp937 2374 | Rp938 2375 | (I1 2376 | (I1 2377 | tp939 2378 | g813 2379 | I00 2380 | (lp940 2381 | g817 2382 | atp941 2383 | bsg51 2384 | g802 2385 | stp942 2386 | Rp943 2387 | ag15 2388 | (g16 2389 | (dp944 2390 | g18 2391 | g19 2392 | (g20 2393 | (I0 2394 | tp945 2395 | g22 2396 | tp946 2397 | Rp947 2398 | (I1 2399 | (I4 2400 | tp948 2401 | g813 2402 | I00 2403 | (lp949 2404 | g65 2405 | ag827 2406 | ag828 2407 | ag829 2408 | atp950 2409 | bsg51 2410 | g803 2411 | stp951 2412 | Rp952 2413 | asg833 2414 | Nstp953 2415 | Rp954 2416 | a(dp955 2417 | g435 2418 | (dp956 2419 | g437 2420 | g777 2421 | sg438 2422 | (lp957 2423 | (dp958 2424 | g441 2425 | g442 2426 | (I0 2427 | I2 2428 | I1 2429 | tp959 2430 | Rp960 2431 | sg445 2432 | g910 2433 | sasstp961 2434 | bsbtp962 2435 | . --------------------------------------------------------------------------------