├── utils
    ├── README.md
    ├── requirements.txt
    ├── setup_user_env.sh
    ├── build_curated_from_psql.sh
    ├── simple_impute.py
    ├── postgres_make_extended_concepts.sh
    ├── Makefile
    └── niv-durations.sql
├── resources
    ├── README.md
    ├── variable_ranges.csv
    ├── Rohit_itemid.txt
    ├── outcome_data_spec.json
    ├── static_data_spec.json
    ├── item_id_stat.csv
    └── testing_schemas.pkl
├── notebooks
    ├── .gitignore
    ├── README.md
    ├── mmd_grud_utils.py
    ├── Baselines for Mortality and LOS prediction - GRU-D.ipynb
    └── Baselines for Mortality and LOS prediction - Sklearn.ipynb
├── data
    └── .gitignore
├── SQL_Queries
    ├── notes.sql
    ├── codes.sql
    ├── debug_codes.sql
    ├── statics.sql
    └── debug_statics.sql
├── LICENSE
├── .gitignore
├── datapackage_io_util.py
├── mimic_querier.py
├── mimic_extract_env_py36.yml
├── README.md
└── heuristic_sentence_splitter.py


/utils/README.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/resources/README.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/notebooks/.gitignore:
--------------------------------------------------------------------------------
1 | *Scratchpad.ipynb
2 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything per data use and confidentiality agreements
2 | *
3 | # Except this file
4 | !.gitignore


--------------------------------------------------------------------------------
/utils/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.12
2 | pandas>=0.18
3 | scipy>=0.18
4 | jupyter>=1.0
5 | numexpr>=2.6
6 | scikit-learn>=0.19
7 | psycopg2>=2.7
8 | pytables>=3.4
9 | matplotlib>=2.0


--------------------------------------------------------------------------------
/SQL_Queries/notes.sql:
--------------------------------------------------------------------------------
1 | SELECT n.subject_id, n.hadm_id, i.icustay_id, n.chartdate, n.charttime, n.category, n.description, n.text
2 | FROM noteevents n INNER JOIN icustays i on i.hadm_id = n.hadm_id
3 | WHERE
4 |   iserror IS NULL
5 |   AND (n.chartdate <= i.outtime OR n.charttime <= i.outtime)
6 |   AND n.hadm_id IN ('{hadm_id}')
7 |   AND n.subject_id IN ('{subject_id}')
8 | 


--------------------------------------------------------------------------------
/SQL_Queries/codes.sql:
--------------------------------------------------------------------------------
 1 | SET SEARCH_PATH TO public,mimiciii;
 2 | SELECT
 3 |     i.icustay_id, d.subject_id, d.hadm_id,
 4 |     array_agg(d.icd9_code ORDER BY seq_num ASC) AS icd9_codes
 5 | FROM diagnoses_icd d 
 6 |     LEFT OUTER JOIN (SELECT ccs_matched_id, icd9_code from ccs_dx) c
 7 |     ON c.icd9_code = d.icd9_code
 8 |     INNER JOIN icustays i
 9 |     ON i.hadm_id = d.hadm_id AND i.subject_id = d.subject_id
10 | WHERE d.hadm_id IN ('{hadm_id}') AND seq_num IS NOT NULL
11 | GROUP BY i.icustay_id, d.subject_id, d.hadm_id
12 | 


--------------------------------------------------------------------------------
/SQL_Queries/debug_codes.sql:
--------------------------------------------------------------------------------
 1 | \echo "DEBUG ONLY"
 2 | SET search_path TO mimiciii;
 3 | SELECT
 4 |     i.icustay_id, d.subject_id, d.hadm_id,
 5 |     array_agg(d.icd9_code ORDER BY seq_num ASC) AS icd9_codes,
 6 |     array_agg(c.ccs_matched_id ORDER BY seq_num ASC) AS ccs_codes
 7 | FROM mimiciii.diagnoses_icd d 
 8 |     LEFT OUTER JOIN (SELECT ccs_matched_id, icd9_code from mimiciii.ccs_dx) c
 9 |     ON c.icd9_code = d.icd9_code
10 |     INNER JOIN icustays i
11 |     ON i.hadm_id = d.hadm_id AND i.subject_id = d.subject_id
12 |     
13 | WHERE seq_num IS NOT NULL
14 | GROUP BY i.icustay_id, d.subject_id, d.hadm_id
15 | 
16 | 


--------------------------------------------------------------------------------
/utils/setup_user_env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export MIMIC_CODE_DIR=$(realpath ../../mimic-code)
 4 | export MIMIC_EXTRACT_CODE_DIR=$(realpath ../)
 5 | 
 6 | export MIMIC_DATA_DIR=$MIMIC_EXTRACT_CODE_DIR/data/
 7 | 
 8 | export MIMIC_EXTRACT_OUTPUT_DIR=$MIMIC_DATA_DIR/curated/
 9 | mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR
10 | 
11 | export DBUSER=bnestor
12 | export DBNAME=mimic
13 | export SCHEMA=public,mimiciii
14 | export HOST=mimic
15 | export DBSTRING="dbname=$DBNAME options=--search_path=$SCHEMA"
16 | alias psql="psql -h $HOST -U $DBUSER "
17 | 
18 | export PGHOST=$HOST
19 | export PGUSER=$DBUSER
20 | 
21 | export PGPASSWORD=$1
22 | 


--------------------------------------------------------------------------------
/utils/build_curated_from_psql.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Build curated dataset of .csv / .npy / .hd5 files
 4 | # for patient time-series data extracted from PSQL DB
 5 | #
 6 | # Takes optional argument POP_SIZE
 7 | #
 8 | 
 9 | mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR;
10 | 
11 | if [[ -z $POP_SIZE ]]; then
12 |     # means extract all available data
13 |     POP_SIZE=0;
14 | fi
15 | 
16 | python -u $MIMIC_EXTRACT_CODE_DIR/mimic_direct_extract.py \
17 |     --out_path $MIMIC_EXTRACT_OUTPUT_DIR/ \
18 |     --resource_path $MIMIC_EXTRACT_CODE_DIR/resources/ \
19 |     --extract_pop 2 \
20 |     --extract_outcomes 2 \
21 |     --extract_codes 0 \
22 |     --extract_numerics 2 \
23 |     --extract_notes 0\
24 |     --exit_after_loading 0 \
25 |     --plot_hist 0 \
26 |     --pop_size $POP_SIZE \
27 |     --psql_password $PGPASSWORD \
28 |     --psql_host $HOST \
29 |     --min_percent 0 \
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 MLforHealth
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
 1 | # Related Jupyter Notebooks
 2 | 
 3 | # Testing Cases
 4 | `Testing mimic_direct_extract.ipynb` contains tests for different data processing funcations in **MIMIC-Extract**.
 5 | 
 6 | # Demonstrated Usage
 7 | * `Baselines for Mortality and LOS prediction - Sklearn.ipynb`
 8 | 
 9 | This notebook demonstrates the use of **MIMIC-Extract** output in mortality and long length-of-stay prediction tasks. Logistic regression and random forest models are fitted using Scikit-Learn.
10 | 
11 | * `Baselines for Mortality and LOS prediction - GRU-D.ipynb`
12 | 
13 | This notebook demonstrates the use of **MIMIC-Extract** output in mortality and long length-of-stay prediction tasks. GRU-D models are fitted.
14 | 
15 | * `Baselines for Intervention Prediction - Mechanical Ventilation.ipynb`
16 | 
17 | This notebook demonstrates the use of **MIMIC-Extract** output in mechanical ventilation prediction task. Logistic regression and random forest models models are fitted using Scikit-Learn. CNN is fitted using Keras 2.2.4. LSTM is fitted using Tensorflow 1.8.0.
18 | 
19 | * `Baselines for Intervention Prediction - Vasopressor.ipynb`
20 | 
21 | This notebook demonstrates the use of **MIMIC-Extract** output in vasopressor prediction task. Logistic regression and random forest models models are fitted using Scikit-Learn. CNN is fitted using Keras 2.2.4. LSTM is fitted using Tensorflow 1.8.0.
22 | 


--------------------------------------------------------------------------------
/utils/simple_impute.py:
--------------------------------------------------------------------------------
 1 | import copy, math, os, pickle, time, pandas as pd, numpy as np
 2 | 
 3 | ID_COLS = ['subject_id', 'hadm_id', 'icustay_id']
 4 | 
 5 | def simple_imputer(df,train_subj):
 6 |     idx = pd.IndexSlice
 7 |     df = df.copy()
 8 |     
 9 |     df_out = df.loc[:, idx[:, ['mean', 'count']]]
10 |     icustay_means = df_out.loc[:, idx[:, 'mean']].groupby(ID_COLS).mean()
11 |     global_means = df_out.loc[idx[train_subj,:], idx[:, 'mean']].mean(axis=0)
12 |     
13 |     df_out.loc[:,idx[:,'mean']] = df_out.loc[:,idx[:,'mean']].groupby(ID_COLS).fillna(
14 |         method='ffill'
15 |     ).groupby(ID_COLS).fillna(icustay_means).fillna(global_means)
16 |     
17 |     df_out.loc[:, idx[:, 'count']] = (df.loc[:, idx[:, 'count']] > 0).astype(float)
18 |     df_out.rename(columns={'count': 'mask'}, level='Aggregation Function', inplace=True)
19 |     
20 |     is_absent = (1 - df_out.loc[:, idx[:, 'mask']])
21 |     hours_of_absence = is_absent.cumsum()
22 |     time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill')
23 |     time_since_measured.rename(columns={'mask': 'time_since_measured'}, level='Aggregation Function', inplace=True)
24 | 
25 |     df_out = pd.concat((df_out, time_since_measured), axis=1)
26 |     df_out.loc[:, idx[:, 'time_since_measured']] = df_out.loc[:, idx[:, 'time_since_measured']].fillna(100)
27 |     
28 |     df_out.sort_index(axis=1, inplace=True)
29 |     return df_out


--------------------------------------------------------------------------------
/utils/postgres_make_extended_concepts.sh:
--------------------------------------------------------------------------------
 1 | # This file makes tables for the concepts in this subfolder.
 2 | # Be sure to run postgres-functions.sql first, as the concepts rely on those function definitions.
 3 | # Note that this may take a large amount of time and hard drive space.
 4 | 
 5 | # string replacements are necessary for some queries
 6 | export REGEX_DATETIME_DIFF="s/DATETIME_DIFF\((.+?),\s?(.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1, \2, '\3')/g"
 7 | export REGEX_SCHEMA='s/`physionet-data.(mimiciii_clinical|mimiciii_derived|mimiciii_notes).(.+?)`/\2/g'
 8 | export CONNSTR='-d mimic'
 9 | 
10 | # this is set as the search_path variable for psql
11 | # a search path of "public,mimiciii" will search both public and mimiciii
12 | # schemas for data, but will create tables on the public schema
13 | export PSQL_PREAMBLE='SET search_path TO public,mimiciii'
14 | 
15 | echo ''
16 | echo '==='
17 | echo 'Beginning to create tables for MIMIC database.'
18 | echo 'Any notices of the form "NOTICE: TABLE "XXXXXX" does not exist" can be ignored.'
19 | echo 'The scripts drop views before creating them, and these notices indicate nothing existed prior to creating the view.'
20 | echo '==='
21 | echo ''
22 | 
23 | echo 'Directory 5 of 9: fluid_balance'
24 | { echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS colloid_bolus; CREATE TABLE colloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/colloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql ${CONNSTR}
25 | { echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS crystalloid_bolus; CREATE TABLE crystalloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/crystalloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql ${CONNSTR}
26 | 
27 | echo 'Finished creating tables.'
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | 
107 | *.ipynb_checkpoints*
108 | makejob
109 | 
110 | # tags
111 | tags
112 | 


--------------------------------------------------------------------------------
/utils/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL:=/bin/bash
 2 | 
 3 | PSQL_EXECUTABLE:=$(shell which psql)
 4 | 
 5 | MIMIC_CODE_DIR:=${shell grep "MIMIC_CODE_DIR" setup_user_env.sh | cut -d'=' -f2}
 6 | 
 7 | #=== Commands
 8 | 
 9 | .PHONY: build_curated_from_psql           
10 | 
11 | build_curated_from_psql: setup_user_env.sh has_psql_exe   ## Build curated flat files from PSQL db, using this repo
12 | 	{ \
13 | 	source ./setup_user_env.sh; \
14 | 	bash build_curated_from_psql.sh; \
15 | 	}
16 | 
17 | #=== Required mimic-code/ repo
18 | # Rules below will download the repo if it doesn't exist
19 | # Expected location MIMIC_CODE_DIR is defined in setup_user_env.sh
20 | 
21 | .PHONY: clone_mimic_code_repo ${MIMIC_CODE_DIR}/buildmimic/postgres/Makefile
22 | 
23 | clone_mimic_code_repo: ${MIMIC_CODE_DIR}/buildmimic/postgres/Makefile
24 | 
25 | ${MIMIC_CODE_DIR}/buildmimic/postgres/Makefile: setup_user_env.sh
26 | 	{ \
27 | 	source ./setup_user_env.sh; \
28 | 	[ -e $@ ] || git clone https://github.com/MIT-LCP/mimic-code/ ${MIMIC_CODE_DIR}/; \
29 | 	}
30 | 
31 | #=== Build concepts
32 | .PHONY: build_concepts
33 | build_concepts: build_concepts_mimic_code build_extra_concepts
34 | 
35 | .PHONY: build_concepts_mimic_code
36 | build_concepts_mimic_code: setup_user_env.sh clone_mimic_code_repo
37 | 	{ \
38 | 	source ./setup_user_env.sh; \
39 | 	cd ${MIMIC_CODE_DIR}/concepts; \
40 | 	psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./make-concepts.sql; \
41 | 	cd ../../MIMIC_Extract/utils; \
42 | 	}
43 | 
44 | .PHONY: build_extra_concepts
45 | build_extra_concepts: setup_user_env.sh niv-durations.sql crystalloid-bolus.sql colloid-bolus.sql
46 | 	{ \
47 | 	source ./setup_user_env.sh; \
48 | 	psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./niv-durations.sql; \
49 | 	psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./crystalloid-bolus.sql; \
50 | 	psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./colloid-bolus.sql; \
51 | 	}
52 | 
53 | #=== Env Checks
54 | 
55 | .PHONY: has_psql_exe
56 | has_psql_exe: setup_user_env.sh
57 | ifndef PSQL_EXECUTABLE
58 | 	$(error "Error: 'psql' is undefined. Please install/add to current path.")
59 | endif
60 | 


--------------------------------------------------------------------------------
/resources/variable_ranges.csv:
--------------------------------------------------------------------------------
 1 | LEVEL2,LEVEL1,OUTLIER LOW,VALID LOW,IMPUTE,VALID HIGH,OUTLIER HIGH
 2 | Alanine aminotransferase,,0,2,34,10000,11000
 3 | Albumin,,0,0.6,3.1,6,60
 4 | Alkaline phosphate,,0,20,106,3625,4000
 5 | Anion Gap,,0,5,13,50,55
 6 | Asparate aminotransferase,,0,6,40,20000,22000
 7 | Basophils,,,,,,
 8 | Bicarbonate,,0,0,25,60,66
 9 | Bilirubin,Bilirubin (total),0,0.1,0.9,60,66
10 | Bilirubin,Bilirubin (conjugated),,,,,
11 | Bilirubin,Bilirubin (unconjugated),,,,,
12 | Blood culture,,,,,,
13 | Blood urea nitrogen,,0,0,23,250,275
14 | Calcium,,,,,,
15 | Calcium ionized,,,,,,
16 | Capillary refill rate,,0,0,0,1,1
17 | Chloride,,0,50,104,175,200
18 | Cholesterol,,0,32,160,600,660
19 | Creatinine,,0,0.1,1,60,66
20 | Diastolic blood pressure,,0,0,59,375,375
21 | Eosinophils,,,,,,
22 | Fraction inspired oxygen,,0.2,0.21,0.21,1,1.1
23 | Glascow coma scale eye opening,,1,1,4,4,4
24 | Glascow coma scale motor response,,1,1,6,6,6
25 | Glascow coma scale total,,3,3,11,15,15
26 | Glascow coma scale verbal response,,1,1,4,5,5
27 | Glucose,,0,33,128,2000,2200
28 | Heart rate,,0,0,86,350,390
29 | Height,,0,0,170,240,275
30 | Hematocrit,,0,0,30.2,75,100
31 | Hemoglobin,,0,0,10.2,25,30
32 | Lactate,,0,0.4,1.8,30,33
33 | Lactate dehydrogenase,,0,54,263,33000,35000
34 | Lactic acid,,,,,,
35 | Lymphocytes,,,,,,
36 | Magnesium,,0,0,2,20,22
37 | Mean blood pressure,,0,14,77,330,375
38 | Mean corpuscular hemoglobin,,,,,,
39 | Mean corpuscular hemoglobin concentration,,,,,,
40 | Mean corpuscular volume,,,,,,
41 | Oxygen saturation,,0,0,98,100,150
42 | Monocytes,,,,,,
43 | Neutrophils,,,,,,
44 | Partial pressure of carbon dioxide,,0,0,40,200,220
45 | Partial pressure of oxygen,,0,32,112,700,770
46 | Partial thromboplastin time,,0,18.8,34.4,150,150
47 | Peak inspiratory pressure,,0,14,20,38,40
48 | pH,,6.3,6.3,7.4,8.4,10
49 | Phosphate,,0,0.5,3.4,20,22
50 | Platelets,,0,0,208,2000,2200
51 | Positive end-expiratory pressure,,0,0,6,25,30
52 | Potassium,,0,0,4.1,12,15
53 | Prothrombin time,,0,9.9,14.5,97.1,150
54 | Pupillary response left,,,,,,
55 | Pupillary response right,,,,,,
56 | Pupillary size left,,,,,,
57 | Pupillary size right,,,,,,
58 | Red blood cell count,,,,,,
59 | Respiratory rate,,0,0,19,300,330
60 | Sodium,,0,50,142,225,250
61 | Systolic blood pressure,,0,0,118,375,375
62 | Temperature,,14.2,26,37,45,47
63 | Troponin-I,,0,0.01,2.3,49.6,575
64 | Troponin-T,,0,0.01,0.1,20.85,24
65 | Urine output,,0,0,80,1200,2445
66 | Weight,,0,0,81.8,250,250
67 | White blood cell count,,0,0,9.9,1000,1100


--------------------------------------------------------------------------------
/SQL_Queries/statics.sql:
--------------------------------------------------------------------------------
 1 | select distinct
 2 |     i.subject_id,
 3 |     i.hadm_id,
 4 |     i.icustay_id,
 5 |     i.gender,
 6 |     i.admission_age as age,
 7 |     i.ethnicity,
 8 |     i.hospital_expire_flag,
 9 |     i.hospstay_seq,
10 |     i.los_icu,
11 |     i.admittime,
12 |     i.dischtime,
13 |     i.intime,
14 |     i.outtime,
15 |     a.diagnosis AS diagnosis_at_admission,
16 |     a.admission_type,
17 |     a.insurance,
18 |     a.deathtime,
19 |     a.discharge_location,
20 |     CASE when a.deathtime between i.intime and i.outtime THEN 1 ELSE 0 END AS mort_icu,
21 |     CASE when a.deathtime between i.admittime and i.dischtime THEN 1 ELSE 0 END AS mort_hosp,
22 |     s.first_careunit,
23 |     c.fullcode_first,
24 |     c.dnr_first,
25 |     c.fullcode,
26 |     c.dnr,
27 |     c.dnr_first_charttime,
28 |     c.cmo_first,
29 |     c.cmo_last,
30 |     c.cmo,
31 |     c.timecmo_chart,
32 |     sofa.sofa,
33 |     sofa.respiration as sofa_,
34 |     sofa.coagulation as sofa_,
35 |     sofa.liver as sofa_,
36 |     sofa.cardiovascular as sofa_,
37 |     sofa.cns as sofa_,
38 |     sofa.renal as sofa_,
39 |     sapsii.sapsii,
40 |     sapsii.sapsii_prob,
41 |     oasis.oasis,
42 |     oasis.oasis_prob,
43 |     COALESCE(f.readmission_30, 0) AS readmission_30
44 | FROM icustay_detail i
45 |     INNER JOIN admissions a ON i.hadm_id = a.hadm_id
46 |     INNER JOIN icustays s ON i.icustay_id = s.icustay_id
47 |     INNER JOIN code_status c ON i.icustay_id = c.icustay_id
48 |     LEFT OUTER JOIN (SELECT d.icustay_id, 1 as readmission_30
49 |               FROM icustays c, icustays d
50 |               WHERE c.subject_id=d.subject_id
51 |               AND c.icustay_id > d.icustay_id
52 |               AND c.intime - d.outtime <= interval '30 days'
53 |               AND c.outtime = (SELECT MIN(e.outtime) from icustays e 
54 |                                 WHERE e.subject_id=c.subject_id
55 |                                 AND e.intime>d.outtime)) f
56 |               ON i.icustay_id=f.icustay_id
57 |     LEFT OUTER JOIN (SELECT icustay_id, sofa,  respiration, coagulation, liver, cardiovascular, cns, renal 
58 |               FROM sofa) sofa
59 |               ON i.icustay_id=sofa.icustay_id
60 |     LEFT OUTER JOIN (SELECT icustay_id, sapsii,  sapsii_prob 
61 |                 FROM sapsii) sapsii
62 |                 ON sapsii.icustay_id=i.icustay_id
63 |     LEFT OUTER JOIN (SELECT icustay_id, oasis, oasis_prob
64 |                 FROM oasis) oasis
65 |                 ON oasis.icustay_id=i.icustay_id
66 | WHERE s.first_careunit NOT like 'NICU'
67 |     and i.hadm_id is not null and i.icustay_id is not null
68 |     and i.hospstay_seq = 1
69 |     and i.icustay_seq = 1
70 |     and i.admission_age >= {min_age}
71 |     and i.los_icu >= {min_day}
72 |     and (i.outtime >= (i.intime + interval '{min_dur} hours'))
73 |     and (i.outtime <= (i.intime + interval '{max_dur} hours'))
74 | ORDER BY subject_id
75 | {limit}
76 | 


--------------------------------------------------------------------------------
/SQL_Queries/debug_statics.sql:
--------------------------------------------------------------------------------
 1 | \echo "This file is just for debugging"
 2 | SET search_path TO public,mimiciii;
 3 | select distinct
 4 |     i.subject_id,
 5 |     i.hadm_id,
 6 |     i.icustay_id,
 7 |     i.gender,
 8 |     i.age as age,
 9 |     i.ethnicity,
10 |     i.admission_type,
11 |     i.hospital_expire_flag,
12 |     i.hospstay_seq,
13 |     i.los_icu,
14 |     i.admittime,
15 |     i.dischtime,
16 |     i.intime,
17 |     i.outtime,
18 |     a.diagnosis AS diagnosis_at_admission,
19 |     a.insurance,
20 |     a.deathtime,
21 |     a.discharge_location,
22 |     CASE when a.deathtime between i.intime and i.outtime THEN 1 ELSE 0 END AS mort_icu,
23 |     CASE when a.deathtime between i.admittime and i.dischtime THEN 1 ELSE 0 END AS mort_hosp,
24 |     s.first_careunit,
25 |         c.fullcode_first,
26 |     c.dnr_first,
27 |     c.fullcode,
28 |     c.dnr,
29 | --     c.timednr_chart,
30 |     c.dnr_first_charttime,
31 |     c.cmo_first,
32 |     c.cmo_last,
33 |     c.cmo,
34 |     c.cmo_ds,
35 | --     c.timecmo_chart,
36 |     c.cmo_first_charttime,
37 | --     c.timecmo_nursingnote,
38 |     c.cmo_nursingnote_charttime,
39 |     sofa.sofa,
40 |     sofa.respiration as sofa_,
41 |     sofa.coagulation as sofa_,
42 |     sofa.liver as sofa_,
43 |     sofa.cardiovascular as sofa_,
44 |     sofa.cns as sofa_,
45 |     sofa.renal as sofa_,
46 |     sapsii.sapsii,
47 |     sapsii.sapsii_prob,
48 |     oasis.oasis,
49 |     oasis.oasis_prob,
50 |     COALESCE(f.readmission_30, 0) AS readmission_30
51 | FROM icustay_detail i
52 |     INNER JOIN admissions a ON i.hadm_id = a.hadm_id
53 |     INNER JOIN icustays s ON i.icustay_id = s.icustay_id
54 |     INNER JOIN code_status c ON i.icustay_id = c.icustay_id
55 |     LEFT OUTER JOIN (SELECT d.icustay_id, 1 as readmission_30
56 |               FROM icustays c, icustays d
57 |               WHERE c.subject_id=d.subject_id
58 |               AND c.icustay_id > d.icustay_id
59 |               AND c.intime - d.outtime <= interval '30 days'
60 |               AND c.outtime = (SELECT MIN(e.outtime) from icustays e 
61 |                                 WHERE e.subject_id=c.subject_id
62 |                                 AND e.intime>d.outtime)) f
63 |               ON i.icustay_id=f.icustay_id
64 |     LEFT OUTER JOIN (SELECT icustay_id, sofa,  respiration, coagulation, liver, cardiovascular, cns, renal 
65 |               FROM sofa) sofa
66 |               ON i.icustay_id=sofa.icustay_id
67 |     LEFT OUTER JOIN (SELECT icustay_id, sapsii,  sapsii_prob 
68 |                 FROM sapsii) sapsii
69 |                 ON sapsii.icustay_id=i.icustay_id
70 |     LEFT OUTER JOIN (SELECT icustay_id, oasis, oasis_prob
71 |                 FROM oasis) oasis
72 |                 ON oasis.icustay_id=i.icustay_id
73 | WHERE s.first_careunit NOT like 'NICU'
74 |     and i.hadm_id is not null and i.icustay_id is not null
75 |     and i.hospstay_seq = 1
76 |     and i.icustay_seq = 1
77 |     and i.age >= 16
78 |     and i.los_icu >= 1
79 |     and (i.outtime >= (i.intime + interval '12 hours'))
80 |     and (i.outtime <= (i.intime + interval '250 hours'))
81 | ORDER BY subject_id
82 | 


--------------------------------------------------------------------------------
/datapackage_io_util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import datapackage
 4 | 
 5 | def load_datapackage_schema(json_fpath, resource_id=0):
 6 |     """ Load schema object
 7 | 
 8 |     Returns
 9 |     -------
10 |     schema : schema object, with attributes
11 |         field_names
12 |         fields : list of dict
13 |             Each dict provides info about the field (data type, etc)
14 |     """
15 |     spec = datapackage.DataPackage(json_fpath)
16 |     schema = spec.resources[resource_id].schema
17 |     return schema
18 | 
19 | def load_sanitized_df_from_csv(csv_fpath, schema):
20 |     """ Load dataframe from CSV that meets provided schema.
21 | 
22 |     Returns
23 |     -------
24 |     data_df : pandas DataFrame
25 |         Will have fields provided by schema
26 |         Will have field types (categorical, datetime, etc) provided by schema.
27 |     """
28 |     data_df = pd.read_csv(csv_fpath)
29 |     return sanitize_df(data_df, schema)
30 | 
31 | def save_sanitized_df_to_csv(csv_fpath, data_df, schema=None):
32 |     """ Save sanitized df to .csv file
33 | 
34 |     Returns
35 |     -------
36 |     None
37 | 
38 |     Post Condition
39 |     --------------
40 |     csv_fpath is .csv file containing sanitized data_df
41 |     This file could be read by load_sanitized_df_from_csv()
42 |     """
43 |     if schema is not None:
44 |         data_df = sanitize_df(data_df, schema, setup_index=False)
45 |     has_non_numeric_index = (
46 |         getattr(data_df.index, 'name', None) is not None
47 |         or getattr(data_df.index, 'names', [None])[0] is not None)
48 |     data_df.to_csv(csv_fpath, index=has_non_numeric_index)
49 | 
50 | def sanitize_df(data_df, schema, setup_index=True, missing_column_procedure='fill_zero'):
51 |     """ Sanitize dataframe according to provided schema
52 | 
53 |     Returns
54 |     -------
55 |     data_df : pandas DataFrame
56 |         Will have fields provided by schema
57 |         Will have field types (categorical, datetime, etc) provided by schema.
58 |     """
59 |     data_df = data_df.reset_index()
60 |     for ff, field_name in enumerate(schema.field_names):
61 |         type_ff = schema.fields[ff].descriptor['type']
62 |         if field_name not in data_df.columns:
63 |             if missing_column_procedure == 'fill_zero':
64 |                 if type_ff == 'integer':
65 |                     data_df[field_name] = 0
66 |                 elif type_ff == 'number':
67 |                     data_df[field_name] = 0.0
68 | 
69 |     # Reorder columns to match schema
70 |     data_df = data_df[schema.field_names]
71 |     # Cast fields to required type (categorical / datetime)
72 |     for ff, name in enumerate(schema.field_names):
73 |         ff_spec = schema.descriptor['fields'][ff]
74 |         if 'pandas_dtype' in ff_spec and ff_spec['pandas_dtype'] == 'category':
75 |             data_df[name] = data_df[name].astype('category')
76 |         elif 'type' in ff_spec and ff_spec['type'] == 'datetime':
77 |             data_df[name] = pd.to_datetime(data_df[name])
78 |     if hasattr(schema, 'primary_key'):
79 |         data_df = data_df.sort_values(schema.primary_key)
80 |         if setup_index:
81 |             data_df = data_df.set_index(schema.primary_key)
82 |     return data_df
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/mimic_querier.py:
--------------------------------------------------------------------------------
  1 | import copy, psycopg2, pandas as pd
  2 | 
  3 | # TODO(mmd): Where should this go?
  4 | # TODO(mmd): Rename
  5 | # TODO(mmd): eliminate try/except. Just use conditionals.
  6 | def get_values_by_name_from_df_column_or_index(data_df, colname):
  7 |     """ Easily get values for named field, whether a column or an index
  8 | 
  9 |     Returns
 10 |     -------
 11 |     values : 1D array
 12 |     """
 13 |     try:
 14 |         values = data_df[colname]
 15 |     except KeyError as e:
 16 |         if colname in data_df.index.names:
 17 |             values = data_df.index.get_level_values(colname)
 18 |         else:
 19 |             raise e
 20 |     return values
 21 | 
 22 | # TODO(mmd): Maybe make context manager?
 23 | class MIMIC_Querier():
 24 |     def __init__(
 25 |         self,
 26 |         exclusion_criteria_template_vars={},
 27 |         query_args={}, # passed wholesale to psycopg2.connect
 28 |         schema_name='public,mimiciii'
 29 |     ):
 30 |         """ A class to facilitate repeated Queries to a MIMIC psql database """
 31 |         self.exclusion_criteria_template_vars = {}
 32 |         self.query_args  = query_args
 33 |         self.schema_name = schema_name
 34 |         self.connected   = False
 35 |         self.connection, self.cursor = None, None
 36 | 
 37 |     # TODO(mmd): this isn't really doing exclusion criteria. Should maybe also absorb 'WHERE' clause...
 38 |     def add_exclusion_criteria_from_df(self, df, columns=[]):
 39 |         self.exclusion_criteria_template_vars.update({
 40 |             c: "','".join(
 41 |                 set([str(v) for v in get_values_by_name_from_df_column_or_index(df, c)])
 42 |             ) for c in columns
 43 |         })
 44 | 
 45 |     def clear_exclusion_criteria(self): self.exclusion_criteria_template_vars = {}
 46 | 
 47 |     def close(self):
 48 |         if not self.connected: return
 49 |         self.connection.close()
 50 |         self.cursor.close() # TODO(mmd): Maybe don't actually need this to stay open?
 51 |         self.connected = False
 52 | 
 53 |     def connect(self):
 54 |         self.close()
 55 |         self.connection = psycopg2.connect(**self.query_args)
 56 |         self.cursor     = self.connection.cursor()
 57 |         self.cursor.execute('SET search_path TO %s' % self.schema_name)
 58 |         self.connected = True
 59 | 
 60 |     def query(self, query_string=None, query_file=None, extra_template_vars={}):
 61 |         assert query_string is not None or query_file is not None, "Must pass a query!"
 62 |         assert query_string is None or query_file is None, "Must only pass one query!"
 63 | 
 64 |         self.connect()
 65 | 
 66 |         if query_string is None:
 67 |             with open(query_file, mode='r') as f: query_string = f.read()
 68 | 
 69 |         template_vars = copy.copy(self.exclusion_criteria_template_vars)
 70 |         template_vars.update(extra_template_vars)
 71 | 
 72 |         query_string = query_string.format(**template_vars)
 73 |         out = pd.read_sql_query(query_string, self.connection)
 74 | 
 75 |         self.close()
 76 |         return out
 77 |     def add_exclusion_criteria_from_df(self, df, columns=[]):
 78 |         self.exclusion_criteria_template_vars.update({
 79 |             c: "','".join(
 80 |                 set([str(v) for v in get_values_by_name_from_df_column_or_index(df, c)])
 81 |             ) for c in columns
 82 |         })
 83 | 
 84 |     def close(self):
 85 |         if not self.connected: return
 86 |         self.connection.close()
 87 |         self.cursor.close() # TODO(mmd): Maybe don't actually need this to stay open?
 88 |         self.connected = False
 89 | 
 90 |     def connect(self):
 91 |         self.close()
 92 |         self.connection = psycopg2.connect(**self.query_args)
 93 |         self.cursor     = self.connection.cursor()
 94 |         self.cursor.execute('SET search_path TO %s' % self.schema_name)
 95 |         self.connected = True
 96 | 
 97 |     def query(self, query_string=None, query_file=None, extra_template_vars={}):
 98 |         assert query_string is not None or query_file is not None, "Must pass a query!"
 99 |         assert query_string is None or query_file is None, "Must only pass one query!"
100 | 
101 |         self.connect()
102 | 
103 |         if query_string is None:
104 |             with open(query_file, mode='r') as f: query_string = f.read()
105 | 
106 |         template_vars = copy.copy(self.exclusion_criteria_template_vars)
107 |         template_vars.update(extra_template_vars)
108 | 
109 |         query_string = query_string.format(**template_vars)
110 |         out = pd.read_sql_query(query_string, self.connection)
111 | 
112 |         self.close()
113 |         return out
114 | 


--------------------------------------------------------------------------------
/resources/Rohit_itemid.txt:
--------------------------------------------------------------------------------
  1 | -- Demographic, neuro, status
  2 | 198 GCS ready
  3 | 581 Weight ignore
  4 | 762 AdmitWt No-Level2
  5 | 
  6 | -- Cardiovascular
  7 | 455 NBPSys ready
  8 | 455 NBPDias value2 Not-exist
  9 | 456 NBPMean ready
 10 | 1149 NBP Not-exist
 11 | 51 SBP ready (may want to separate this from Level 2 called Systolic Blood Pressure)
 12 | 51 DBP value2 Not-exist
 13 | 52 MAP ready
 14 | 211 HR ready
 15 | 646 OR 1148 SpO2 ready (1148 Not-exist)
 16 | 113 OR 1103 CVP No-Level2 (and count is too low for cvp)
 17 | 491 PAPMean No-Level2
 18 | 492 PAPSd No-Level2
 19 | 116 CrdIndx No-Level2
 20 | 626 SVR No-Level2
 21 | 90 COtd No-Level2
 22 | 89 COfck No-Level2
 23 | 504 PCWP No-Level2
 24 | 512 PVR No-Level2
 25 | 3353 CardiacMurmur No-Level2
 26 | 3685 VitaminK No-Level2
 27 | 
 28 | -- Chemistries, Electrolytes, acid/base
 29 | 837 OR 1536 Na verify
 30 | 829 OR 1535 K verify
 31 | 788 OR 1523 Cl verify
 32 | 827 Phosphorous No-Level2
 33 | 818 OR 1531 Lactic_Acid verify
 34 | 787 CO2 No-Level2
 35 | 811 Glucose ready
 36 | 781 OR 1162 BUN verify
 37 | 791 OR 1525 Creatinine verify
 38 | 821 OR 1532 Mg verify
 39 | 786 OR 1522 Ca verify
 40 | 816 IonCa verify
 41 | 769 ALT verify
 42 | 770 AST verify
 43 | 851 Troponin verify
 44 | 806 Fibrinogen No-Level2
 45 | 848 OR 1538 TBili verify
 46 | 803 OR 1527 DBili verify (should probably be separated in terms of level2 from above)
 47 | 849 OR 1539 TProtein No-Level2
 48 | 772 OR 1521 Albumin verify
 49 | 818 OR 1531 Lactate verify
 50 | 
 51 | -- Blood Gases
 52 | 776 ArtBE No-Level2
 53 | 777 ArtCO2 maybe
 54 | 778 ArtPaCO2 verify
 55 | 779 ArtPaO2 (should probably be separated in terms of level 2 from above)
 56 | 780 OR 1126 ArtpH ready
 57 | 859 PvO2 No-Level2
 58 | 
 59 | -- Ventilation
 60 | 190 FiO2Set No-Level2
 61 | 506 PEEPSet No-Level2
 62 | 618 RESP ready
 63 | 615 RespTot ready (should probably be seperated from above in terms of level2)
 64 | 619 RespSet No-Level2
 65 | 614 RespSpon ready (should be separated from 615 and 618)
 66 | 535 PIP No-Level2
 67 | 543 PlateauPres No-Level2
 68 | 682 TidVolObs No-Level2
 69 | 683 TidVolSet No-Level2
 70 | 684 TidVolSpon No-Level2
 71 | 834 SaO2 ready
 72 | 428 OR 425 LungSounds No-Level2
 73 | 
 74 | 
 75 | -- Hematology
 76 | 813 HCT verify
 77 | 814 Hg verify
 78 | 815 OR 1530 INR verify
 79 | 828 Platelets verify
 80 | 824 OR 1286 PT verify
 81 | 825 OR 1533 PTT verify
 82 | 861 OR 1127 OR 1542 WBC verify
 83 | 833 RBC verify
 84 | 678 OR 679 TEMP ready
 85 | 
 86 | -- Severity Scores
 87 | 20001 SAPS Not-exist
 88 | 20002 RespSOFA Not-exist
 89 | 20003 HepaticSOFA Not-exist
 90 | 20004 HematSOFA Not-exist
 91 | 20006 NeuroSOFA Not-exist
 92 | 20007 CardioSOFA Not-exist    
 93 | 
 94 | -- Categorical
 95 | 212 Heart_Rhythm No-Level2
 96 | 161 Ectopy_Type No-Level2
 97 | 159 Ectopy_Freq No-Level2
 98 | 128 Code_Status No-Level2
 99 | 1484 FallRisk No-Level2
100 | 479 Orientation No-Level2
101 | 432 ConsciousLevel No-Level2
102 | 184 EyeOpening ready
103 | 454 MotorResponse ready (should probably be separated from 184 in terms of level2)
104 | 1337 RikerSAS No-Level2
105 | 722 Vent No-Level2
106 | 720 VentMode No-Level2
107 | 516 Pacemaker No-Level2
108 | 690 Trach No-Level2
109 | 643 SkinColor No-Level2
110 | 644 SkinIntegrity No-Level2
111 | 1125 ServiceType No-Level2  
112 | 
113 | -- Medication Data
114 | 142 Integrelin No-Level2 (discrepancy - says Current Goal in csv file) 
115 | 119 OR 44 Epinephrine No-Level2 (discrepancy - says Cervical Collar Type in csv)
116 | 123 Lasix No-Level2 (discrepancy - says Chest Tube Site #2 in csv file)
117 | 51 Vasopressin verify (discrepancy - says Arterial BP [Systolic])
118 | 50 Nitroprusside No-Level2 (discrepancy - says Apnea Time Interval)
119 | 126 MorphineSulfate Not-exist
120 | 112 Amiodarone No-Level2 (discrepancy - CT #4 Suction Amount)
121 | 124 Midazolam No-Level2 (discrepancy - Chest Tube Site #3)
122 | 43 Dopamine (discrepancy - Angio Appearance #1)
123 | 118 OR 149 Fentanyl No-Level2 (discrepancy - Cerv Collar Status)
124 | 120 OR 47 Levophed Not-Exist (47 says Angio Site #1)
125 | 25 Heparin No-Level2 (discrepancy - AV Interval)
126 | 121 OR 49 Nitroglycerine (discrepancy - 121 is Chest PT [Right] and 49 is Anti-Embolism [Device]
127 | 45 Insulin No-Level2 (discrepancy - Angio Dressing #1)
128 | 127 OR 128 Neosynephrine No-Level2 (discrepancy - 127 is Circulation/SkinInt and 128 is Code Status)
129 | 131 Propofol No-Level2 (discrepancy - Compliance(40-60ml))
130 | 
131 | -- Fluid Input Output Data
132 | 55 OR 69 OR 715 OR 61 OR 57 OR 85 OR 473 OR 405 OR 428 UrineOut No-Level2 (discrepancy)
133 | 144 OR 172 OR 398 InputRBCs (144 is Not-exist, 172 is Education Topic #2 and 398 is Inc #1 [Dressing])
134 | 179 OR 224 OR 3955 OR 163 OR 319 OR 221 InputOtherBlood No-Level2 (discrepancy)


--------------------------------------------------------------------------------
/utils/niv-durations.sql:
--------------------------------------------------------------------------------
  1 | -- This query extracts the duration of mechanical ventilation
  2 | -- The main goal of the query is to aggregate sequential ventilator settings
  3 | -- into single mechanical ventilation "events". The start and end time of these
  4 | -- events can then be used for various purposes: calculating the total duration
  5 | -- of mechanical ventilation, cross-checking values (e.g. PaO2:FiO2 on vent), etc
  6 | 
  7 | SET SEARCH_PATH TO public,mimiciii;
  8 | 
  9 | -- The query's logic is roughly:
 10 | --    1) The presence of a mechanical ventilation setting starts a new ventilation event
 11 | --    2) Any instance of a setting in the next 8 hours continues the event
 12 | --    3) Certain elements end the current ventilation event
 13 | --        a) documented extubation ends the current ventilation
 14 | --        b) initiation of non-invasive vent and/or oxygen ends the current vent
 15 | -- The ventilation events are numbered consecutively by the `num` column.
 16 | 
 17 | 
 18 | -- First, create a temporary table to store relevant data from CHARTEVENTS.
 19 | DROP  MATERIALIZED VIEW IF EXISTS nivdurations CASCADE;
 20 | create  MATERIALIZED VIEW nivdurations as
 21 | with nivsettings AS
 22 | (
 23 | select
 24 |   icustay_id, charttime
 25 |     , max(
 26 |       case
 27 |         -- initiation of oxygen therapy
 28 |         when itemid = 226732 and value in
 29 |         (
 30 |           'Nasal cannula', -- 153714 observations
 31 |           'Face tent', -- 24601 observations
 32 |           'Aerosol-cool', -- 24560 observations
 33 |           'Trach mask ', -- 16435 observations
 34 |           'High flow neb', -- 10785 observations
 35 |           'Non-rebreather', -- 5182 observations
 36 |           'Venti mask ', -- 1947 observations
 37 |           'Medium conc mask ', -- 1888 observations
 38 |           'T-piece', -- 1135 observations
 39 |           'High flow nasal cannula', -- 925 observations
 40 |           'Ultrasonic neb', -- 9 observations
 41 |           'Vapomist' -- 3 observations
 42 |         ) then 1
 43 |         when itemid in (467,468) and value in
 44 |         (
 45 |           'Cannula', -- 278252 observations
 46 |           'Nasal Cannula', -- 248299 observations
 47 |           'None', -- 95498 observations
 48 |           'Face Tent', -- 35766 observations
 49 |           'Aerosol-Cool', -- 33919 observations
 50 |           'Trach Mask', -- 32655 observations
 51 |           'Hi Flow Neb', -- 14070 observations
 52 |           'Non-Rebreather', -- 10856 observations
 53 |           'Venti Mask', -- 4279 observations
 54 |           'Medium Conc Mask', -- 2114 observations
 55 |           'Vapotherm', -- 1655 observations
 56 |           'T-Piece', -- 779 observations
 57 |           'Hood', -- 670 observations
 58 |           'Hut', -- 150 observations
 59 |           'TranstrachealCat', -- 78 observations
 60 |           'Heated Neb', -- 37 observations
 61 |           'Ultrasonic Neb' -- 2 observations
 62 |         ) then 1
 63 |       when itemid = 469 and value in ('Nasal Cannula', 'Face Tent', 'Trach Mask') then 1
 64 |       when itemid in (470, 471, 227287, 223834) and valuenum > 0 then 1
 65 |       else 0
 66 |       end
 67 |     ) as OxygenTherapy
 68 | from chartevents ce
 69 | where ce.value is not null
 70 | -- exclude rows marked as error
 71 | and ce.error IS DISTINCT FROM 1
 72 | and itemid in
 73 | (
 74 |     -- the below indicate oxygen/NIV
 75 |       467 -- O2 Delivery Device
 76 |     , 468 -- O2 Delivery Device#2
 77 |     , 469 -- O2 Delivery Mode
 78 |     , 470 -- O2 Flow (lpm)
 79 |     , 471 -- O2 Flow (lpm) #2
 80 |     , 227287 -- O2 Flow (additional cannula)
 81 |     , 226732 -- O2 Delivery Device(s)
 82 |     , 223834 -- O2 Flow
 83 | )
 84 | group by icustay_id, charttime
 85 | )
 86 | , vd0 as
 87 | (
 88 |   select
 89 |     icustay_id
 90 |     -- this carries over the previous charttime which had a mechanical ventilation event
 91 |     , case
 92 |         when OxygenTherapy=1 then
 93 |           LAG(CHARTTIME, 1) OVER (partition by icustay_id, OxygenTherapy order by charttime)
 94 |         else null
 95 |       end as charttime_lag
 96 |     , charttime
 97 |     , OxygenTherapy
 98 |   from nivsettings
 99 | )
100 | , vd1 as
101 | (
102 |   select
103 |       icustay_id
104 |       , charttime_lag
105 |       , charttime
106 |       , OxygenTherapy
107 | 
108 |       -- if this is a mechanical ventilation event, we calculate the time since the last event
109 |       , case
110 |           -- if the current observation indicates mechanical ventilation is present
111 |           -- calculate the time since the last vent event
112 |           when OxygenTherapy=1 then
113 |             CHARTTIME - charttime_lag
114 |           else null
115 |         end as ventduration
116 | 
117 |       , case when (CHARTTIME - charttime_lag) > interval '8' hour then 1
118 |         else 0
119 |         end as newvent
120 |   -- use the staging table with only vent settings from chart events
121 |   FROM vd0
122 | )
123 | , vd2 as
124 | (
125 |   select vd1.*
126 |   -- create a cumulative sum of the instances of new ventilation
127 |   -- this results in a monotonic integer assigned to each instance of ventilation
128 |   , case when OxygenTherapy=1 then
129 |       SUM( newvent )
130 |       OVER ( partition by icustay_id order by charttime )
131 |     else null end
132 |     as ventnum
133 |   --- now we convert CHARTTIME of ventilator settings into durations
134 |   from vd1
135 | )
136 | -- create the durations for each mechanical ventilation instance
137 | select icustay_id
138 |   -- regenerate ventnum so it's sequential
139 |   , ROW_NUMBER() over (partition by icustay_id order by ventnum) as ventnum
140 |   , min(charttime) as starttime
141 |   , max(charttime) as endtime
142 |   , extract(epoch from max(charttime)-min(charttime))/60/60 AS duration_hours
143 | from vd2
144 | group by icustay_id, ventnum
145 | having min(charttime) != max(charttime)
146 | -- patient had to be given NIV at least once
147 | -- i.e. max(OxygenTherapy) should be 1
148 | and max(OxygenTherapy) = 1
149 | order by icustay_id, ventnum;
150 | 


--------------------------------------------------------------------------------
/resources/outcome_data_spec.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "outcome_hourly_data",
  3 |   "title": "Outcome measurements (hourly) for ICU patients extracted from MIMIC-III",
  4 |   "description": "Includes ventilator and vasopressor (blood thinner)",
  5 |   "license": "The use of data is conditional on citing the original data sources.",
  6 |   "resources": [
  7 |     {
  8 |       "name": "outcome_hourly_tabular_data",
  9 |       "path": "outcome_hourly_data.csv",
 10 |       "format": "csv",
 11 |       "mediatype": "text/csv",
 12 |       "profile": "tabular-data-resource",
 13 |       "$schema": "https://frictionlessdata.io/schemas/table-schema.json",
 14 |       "schema": {
 15 |         "fields": [
 16 |           {
 17 |             "name": "subject_id",
 18 |             "description": "ID number for subject within PATIENTS table",
 19 |             "type": "integer",
 20 |             "constraints" : {
 21 |                 "required" : true
 22 |             }
 23 |           },
 24 |           {
 25 |             "name": "hadm_id",
 26 |             "description": "ID number for subject within ADMISSIONS table",
 27 |             "type": "integer",
 28 |             "constraints" : {
 29 |                 "required" : true
 30 |             }            
 31 |           },
 32 |           {
 33 |             "name": "icustay_id",
 34 |             "description": "ID number for subject within ICUSTAYS table",
 35 |             "type": "integer",
 36 |             "constraints" : {
 37 |                 "required" : true
 38 |             }
 39 |           },
 40 |           {
 41 |             "name": "hours_in",
 42 |             "description": "Number of hours subject in ICU before measurement taken.",
 43 |             "type": "integer",
 44 |             "constraints" : {
 45 |                 "required" : true
 46 |             }
 47 |           },
 48 |           {
 49 |             "name": "vent",
 50 |             "description": "Mechanical ventilator used at current timestep (1 = present, 0 = not).",
 51 |             "type": "integer",
 52 |             "constraints" : {
 53 |                 "required" : false
 54 |             }
 55 |           },
 56 |           {
 57 |             "name": "vaso",
 58 |             "description": "Vasopressors given via IV at current timestep (1 = yes, 0 = no).",
 59 |             "type": "integer",
 60 |             "constraints" : {
 61 |                 "required" : false
 62 |             }
 63 |           },
 64 |           {
 65 |             "name": "adenosine",
 66 |             "description": "adenosine given via IV at current timestep (1 = yes, 0 = no).",
 67 |             "type": "integer",
 68 |             "constraints" : {
 69 |                 "required" : false
 70 |             }
 71 |           },
 72 |           {
 73 |             "name": "dobutamine",
 74 |             "description": "dobutamine given via IV at current timestep (1 = yes, 0 = no).",
 75 |             "type": "integer",
 76 |             "constraints" : {
 77 |                 "required" : false
 78 |             }
 79 |           },
 80 |           {
 81 |             "name": "dopamine",
 82 |             "description": "dopamine given via IV at current timestep (1 = yes, 0 = no).",
 83 |             "type": "integer",
 84 |             "constraints" : {
 85 |                 "required" : false
 86 |             }
 87 |           },
 88 |           {
 89 |             "name": "epinephrine",
 90 |             "description": "epinephrine given via IV at current timestep (1 = yes, 0 = no).",
 91 |             "type": "integer",
 92 |             "constraints" : {
 93 |                 "required" : false
 94 |             }
 95 |           },
 96 |           {
 97 |             "name": "isuprel",
 98 |             "description": "isuprel given via IV at current timestep (1 = yes, 0 = no).",
 99 |             "type": "integer",
100 |             "constraints" : {
101 |                 "required" : false
102 |             }
103 |           },
104 |           {
105 |             "name": "milrinone",
106 |             "description": "milrinone given via IV at current timestep (1 = yes, 0 = no).",
107 |             "type": "integer",
108 |             "constraints" : {
109 |                 "required" : false
110 |             }
111 |           },
112 |           {
113 |             "name": "norepinephrine",
114 |             "description": "norepinephrine given via IV at current timestep (1 = yes, 0 = no).",
115 |             "type": "integer",
116 |             "constraints" : {
117 |                 "required" : false
118 |             }
119 |           },
120 |           {
121 |             "name": "phenylephrine",
122 |             "description": "phenylephrine given via IV at current timestep (1 = yes, 0 = no).",
123 |             "type": "integer",
124 |             "constraints" : {
125 |                 "required" : false
126 |             }
127 |           },
128 |           {
129 |             "name": "vasopressin",
130 |             "description": "vasopressin given via IV at current timestep (1 = yes, 0 = no).",
131 |             "type": "integer",
132 |             "constraints" : {
133 |                 "required" : false
134 |             }
135 |           },
136 |           {
137 |             "name": "colloid_bolus",
138 |             "description": "colloid bolus given via IV at current timestep (1 = yes, 0 = no).",
139 |             "type": "integer",
140 |             "constraints" : {
141 |                 "required" : false
142 |             }
143 |           },
144 |           {
145 |             "name": "crystalloid_bolus",
146 |             "description": "crystalloid bolus given via IV at current timestep (1 = yes, 0 = no).",
147 |             "type": "integer",
148 |             "constraints" : {
149 |                 "required" : false
150 |             }
151 |           },
152 |           {
153 |             "name": "nivdurations",
154 |             "description": "Non-invasive ventilator used at current timestep (1 = yes, 0 = no).",
155 |             "type": "integer",
156 |             "constraints" : {
157 |                 "required" : false
158 |             }
159 |           }
160 |         ],
161 |       "missingValues": "nan",
162 |       "primaryKey": ["subject_id", "hadm_id", "icustay_id", "hours_in"]
163 |       }
164 |     }
165 |   ]
166 | }
167 | 


--------------------------------------------------------------------------------
/mimic_extract_env_py36.yml:
--------------------------------------------------------------------------------
  1 | name: mimic_data_extraction
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 |   - boto3=1.14.42=pyh9f0ad1d_0
  7 |   - botocore=1.17.42=pyh9f0ad1d_0
  8 |   - cached-property=1.5.1=py_0
  9 |   - cchardet=2.1.6=py36h831f99a_1
 10 |   - certifi=2019.3.9=py36_0
 11 |   - click=7.1.2=pyh9f0ad1d_0
 12 |   - cython-blis=0.2.4=py36h516909a_1
 13 |   - datapackage=1.15.0=pyh9f0ad1d_0
 14 |   - docutils=0.15.2=py36_0
 15 |   - et_xmlfile=1.0.1=py_1001
 16 |   - ijson=3.1.1=pyh9f0ad1d_0
 17 |   - isodate=0.6.0=py_1
 18 |   - jdcal=1.4.1=py_0
 19 |   - jmespath=0.10.0=pyh9f0ad1d_0
 20 |   - jsonlines=1.2.0=pyh9f0ad1d_2
 21 |   - jsonpointer=2.0=py_0
 22 |   - linear-tsv=1.1.0=py_1
 23 |   - openpyxl=3.0.4=py_0
 24 |   - python_abi=3.6=1_cp36m
 25 |   - rfc3986=1.4.0=pyh9f0ad1d_0
 26 |   - s3transfer=0.3.3=py36h9f0ad1d_1
 27 |   - spacy=2.1.8=py36hc9558a2_0
 28 |   - sqlalchemy=1.3.18=py36h8c4c3a4_0
 29 |   - srsly=1.0.2=py36h831f99a_0
 30 |   - tableschema=1.19.2=pyh9f0ad1d_0
 31 |   - tabulator=1.52.3=pyh9f0ad1d_0
 32 |   - thinc=7.0.8=py36hc9558a2_0
 33 |   - unicodecsv=0.14.1=py_1
 34 |   - wasabi=0.7.1=pyh9f0ad1d_0
 35 |   - xlrd=1.2.0=pyh9f0ad1d_1
 36 |   - _libgcc_mutex=0.1=main
 37 |   - asn1crypto=0.24.0=py36_0
 38 |   - attrs=19.1.0=py36_1
 39 |   - backcall=0.1.0=py36_0
 40 |   - blas=1.0=mkl
 41 |   - bleach=3.1.5=py_0
 42 |   - blosc=1.15.0=hd408876_0
 43 |   - bzip2=1.0.6=h14c3975_5
 44 |   - ca-certificates=2019.1.23=0
 45 |   - cffi=1.12.3=py36h2e261b9_0
 46 |   - chardet=3.0.4=py36_1003
 47 |   - cryptography=2.6.1=py36h1ba5d50_0
 48 |   - cycler=0.10.0=py36_0
 49 |   - cymem=2.0.2=py36hfd86e86_0
 50 |   - cytoolz=0.9.0.1=py36h14c3975_1
 51 |   - dbus=1.13.6=h746ee38_0
 52 |   - decorator=4.4.0=py36_1
 53 |   - defusedxml=0.6.0=py_0
 54 |   - dill=0.2.9=py36_0
 55 |   - entrypoints=0.3=py36_0
 56 |   - expat=2.2.6=he6710b0_0
 57 |   - fontconfig=2.13.0=h9420a91_0
 58 |   - freetype=2.9.1=h8a8886c_1
 59 |   - glib=2.56.2=hd408876_0
 60 |   - gmp=6.1.2=h6c8ec71_1
 61 |   - gst-plugins-base=1.14.0=hbbd80ab_1
 62 |   - gstreamer=1.14.0=hb453b48_1
 63 |   - hdf5=1.10.4=hb1b8bf9_0
 64 |   - icu=58.2=h9c2bf20_1
 65 |   - idna=2.8=py36_0
 66 |   - importlib-metadata=1.7.0=py36_0
 67 |   - importlib_metadata=1.7.0=0
 68 |   - intel-openmp=2019.3=199
 69 |   - ipykernel=5.1.0=py36h39e3cac_0
 70 |   - ipython=7.5.0=py36h39e3cac_0
 71 |   - ipython_genutils=0.2.0=py36_0
 72 |   - ipywidgets=7.4.2=py36_0
 73 |   - jedi=0.13.3=py36_0
 74 |   - jinja2=2.10.1=py36_0
 75 |   - jpeg=9b=h024ee3a_2
 76 |   - jsonschema=3.2.0=py36_0
 77 |   - jupyter=1.0.0=py36_7
 78 |   - jupyter_client=5.2.4=py36_0
 79 |   - jupyter_console=6.0.0=py36_0
 80 |   - jupyter_core=4.4.0=py36_0
 81 |   - kiwisolver=1.1.0=py36he6710b0_0
 82 |   - krb5=1.16.1=h173b8e3_7
 83 |   - libedit=3.1.20181209=hc058e9b_0
 84 |   - libffi=3.2.1=hd88cf55_4
 85 |   - libgcc-ng=8.2.0=hdf63c60_1
 86 |   - libgfortran-ng=7.3.0=hdf63c60_0
 87 |   - libpng=1.6.37=hbc83047_0
 88 |   - libpq=11.2=h20c2e04_0
 89 |   - libsodium=1.0.16=h1bed415_0
 90 |   - libstdcxx-ng=8.2.0=hdf63c60_1
 91 |   - libuuid=1.0.3=h1bed415_2
 92 |   - libxcb=1.13=h1bed415_1
 93 |   - libxml2=2.9.9=he19cac6_0
 94 |   - llvmlite=0.28.0=py36hd408876_0
 95 |   - lzo=2.10=h49e0be7_2
 96 |   - markupsafe=1.1.1=py36h7b6447c_0
 97 |   - matplotlib=3.0.3=py36h5429711_0
 98 |   - mistune=0.8.4=py36h7b6447c_0
 99 |   - mkl=2019.3=199
100 |   - mkl_fft=1.0.12=py36ha843d7b_0
101 |   - mkl_random=1.0.2=py36hd81dba3_0
102 |   - msgpack-numpy=0.4.3.2=py36_0
103 |   - msgpack-python=0.6.1=py36hfd86e86_1
104 |   - murmurhash=1.0.2=py36he6710b0_0
105 |   - nbconvert=5.5.0=py_0
106 |   - nbformat=4.4.0=py36_0
107 |   - ncurses=6.1=he6710b0_1
108 |   - nltk=3.4.1=py36_0
109 |   - notebook=5.7.8=py36_0
110 |   - numba=0.43.1=py36h962f231_0
111 |   - numexpr=2.6.9=py36h9e4a6bb_0
112 |   - numpy=1.16.3=py36h7e9f1db_0
113 |   - numpy-base=1.16.3=py36hde5b4d6_0
114 |   - openssl=1.1.1b=h7b6447c_1
115 |   - packaging=20.4=py_0
116 |   - pandas=0.24.2=py36he6710b0_0
117 |   - pandoc=2.2.3.2=0
118 |   - pandocfilters=1.4.2=py36_1
119 |   - parso=0.4.0=py_0
120 |   - pcre=8.43=he6710b0_0
121 |   - pexpect=4.7.0=py36_0
122 |   - pickleshare=0.7.5=py36_0
123 |   - pip=19.1.1=py36_0
124 |   - plac=0.9.6=py36_0
125 |   - preshed=2.0.1=py36he6710b0_0
126 |   - prometheus_client=0.6.0=py36_0
127 |   - prompt_toolkit=2.0.9=py36_0
128 |   - psycopg2=2.7.6.1=py36h1ba5d50_0
129 |   - ptyprocess=0.6.0=py36_0
130 |   - pycparser=2.19=py36_0
131 |   - pygments=2.4.0=py_0
132 |   - pyopenssl=19.0.0=py36_0
133 |   - pyparsing=2.4.0=py_0
134 |   - pyqt=5.9.2=py36h05f1152_2
135 |   - pyrsistent=0.16.0=py36h7b6447c_0
136 |   - pysocks=1.6.8=py36_0
137 |   - pytables=3.5.1=py36h71ec239_0
138 |   - python=3.6.8=h0371630_0
139 |   - python-dateutil=2.8.0=py36_0
140 |   - pytz=2019.1=py_0
141 |   - pyzmq=18.0.0=py36he6710b0_0
142 |   - qt=5.9.7=h5867ecd_1
143 |   - qtconsole=4.4.4=py_0
144 |   - readline=7.0=h7b6447c_5
145 |   - regex=2019.04.14=py36h7b6447c_0
146 |   - requests=2.21.0=py36_0
147 |   - scikit-learn=0.20.3=py36hd81dba3_0
148 |   - scipy=1.2.1=py36h7c811a0_0
149 |   - send2trash=1.5.0=py36_0
150 |   - setuptools=41.0.1=py36_0
151 |   - sip=4.19.8=py36hf484d3e_0
152 |   - six=1.12.0=py36_0
153 |   - snappy=1.1.7=hbae5bb6_3
154 |   - sqlite=3.28.0=h7b6447c_0
155 |   - terminado=0.8.2=py36_0
156 |   - testpath=0.4.2=py36_0
157 |   - tk=8.6.8=hbc83047_0
158 |   - toolz=0.9.0=py36_0
159 |   - tornado=6.0.2=py36h7b6447c_0
160 |   - tqdm=4.31.1=py36_1
161 |   - traitlets=4.3.2=py36_0
162 |   - ujson=1.35=py36h14c3975_0
163 |   - urllib3=1.24.3=py36_0
164 |   - wcwidth=0.1.7=py36_0
165 |   - webencodings=0.5.1=py36_1
166 |   - wheel=0.33.2=py36_0
167 |   - widgetsnbextension=3.4.2=py36_0
168 |   - wrapt=1.10.11=py36h14c3975_2
169 |   - xz=5.2.4=h14c3975_4
170 |   - zeromq=4.3.1=he6710b0_3
171 |   - zipp=3.1.0=py_0
172 |   - zlib=1.2.11=h7b6447c_3
173 |   - pip:
174 |     - blis==0.4.1
175 |     - catalogue==1.0.0
176 |     - https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.1.0/en_core_web_sm-2.1.0.tar.gz
177 |     - joblib==0.16.0
178 |     - msgpack==0.6.1
179 |     - nmslib==2.0.6
180 |     - psutil==5.7.2
181 |     - pybind11==2.5.0
182 |     - pysbd==0.3.1
183 |     - scispacy==0.2.5
184 |     - tables==3.5.1
185 | prefix: /afs/csail.mit.edu/u/m/mmd/.conda/envs/mimic_extract_py36
186 | 


--------------------------------------------------------------------------------
/resources/static_data_spec.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "static_patient_data",
  3 |   "title": "Static attributes of ICU patients extracted from MIMIC-III",
  4 |   "description": "Includes demographics and info about hospital/ICU admissions",
  5 |   "license": "The use of data is conditional on citing the original data sources.",
  6 |   "resources": [
  7 |     {
  8 |       "name": "static_tabular_data",
  9 |       "path": "static_data.csv",
 10 |       "format": "csv",
 11 |       "mediatype": "text/csv",
 12 |       "profile": "tabular-data-resource",
 13 |       "$schema": "https://frictionlessdata.io/schemas/table-schema.json",
 14 |       "schema": {
 15 |         "fields": [
 16 |           {
 17 |             "name": "subject_id",
 18 |             "description": "ID number for subject within PATIENTS table",
 19 |             "type": "integer",
 20 |             "constraints" : {
 21 |                 "required" : true
 22 |             }
 23 |           },
 24 |           {
 25 |             "name": "hadm_id",
 26 |             "description": "ID number for subject within ADMISSIONS table",
 27 |             "type": "integer",
 28 |             "constraints" : {
 29 |                 "required" : true
 30 |             }
 31 |           },
 32 |           {
 33 |             "name": "icustay_id",
 34 |             "description": "ID number for subject within ICUSTAYS table",
 35 |             "type": "integer",
 36 |             "constraints" : {
 37 |                 "required" : true
 38 |             }
 39 |           },
 40 |           {
 41 |             "name": "gender",
 42 |             "description": "Indicates subject gender. 'M'=male or 'F'=female.",
 43 |             "type": "string",
 44 |             "pandas_dtype": "category",
 45 |             "constraints" : {
 46 |                 "required" : true
 47 |             }
 48 |           },
 49 |           {
 50 |             "name": "ethnicity",
 51 |             "description": "Indicates subject ethnicity. Many human-readable categories/sub-categories.",
 52 |             "type": "string",
 53 |             "pandas_dtype": "category",
 54 |             "constraints" : {
 55 |                 "required" : true
 56 |             }
 57 |           },
 58 |           {
 59 |             "name": "age",
 60 |             "type": "number",
 61 |             "description": "Age of patient at admission (in fractional years)",
 62 |             "unit" : "year",
 63 |             "constraints" : {
 64 |                 "required" : true
 65 |             }
 66 |           },
 67 |           {
 68 |             "name": "insurance",
 69 |             "type": "string",
 70 |             "description": "Insurance type of patient at admission.",
 71 |             "unit" : "category",
 72 |             "constraints" : {
 73 |                 "required" : true
 74 |             }
 75 |           },
 76 |           {
 77 |             "name": "admittime",
 78 |             "type": "datetime",
 79 |             "description": "Datetime of subject's admission.",
 80 |             "constraints" : {
 81 |                 "required" : true
 82 |             }
 83 |           },
 84 |           {
 85 |             "name": "diagnosis_at_admission",
 86 |             "type": "string",
 87 |             "description": "The admitting physician's diagnosis for this patient (why were they admitted).",
 88 |             "constraints" : {
 89 |                 "required" : true
 90 |             }
 91 |           },
 92 |           {
 93 |             "name": "dischtime",
 94 |             "type": "datetime",
 95 |             "description": "Datetime of subject's discharge.",
 96 |             "constraints" : {
 97 |                 "required" : true
 98 |             }
 99 |           },
100 |           {
101 |             "name": "discharge_location",
102 |             "type": "string",
103 |             "description": "To where the patient was discharged.",
104 |             "constraints" : {
105 |                 "required" : true
106 |             }
107 |           },
108 |           {
109 |             "name": "fullcode_first",
110 |             "type": "integer",
111 |             "description": "Did the patient arrive with full code status?"
112 |           },
113 |           {
114 |             "name": "dnr_first",
115 |             "type": "integer",
116 |             "description": "Did the patient arrive with DNR status?"
117 |           },
118 |           {
119 |             "name": "fullcode",
120 |             "type": "integer",
121 |             "description": "Was the patient ever full-code?"
122 |           },
123 |           {
124 |             "name": "dnr",
125 |             "type": "integer",
126 |             "description": "Was the patient ever DNR?"
127 |           },
128 |           {
129 |             "name": "dnr_first_charttime",
130 |             "type": "datetime",
131 |             "description": "At what time was the patient transitioned to DNR?"
132 |           },
133 |           {
134 |             "name": "timecmo_chart",
135 |             "type": "datetime",
136 |             "description": "At what time was the patient transitioned to CMO?"
137 |           },
138 |           {
139 |             "name": "cmo_first",
140 |             "type": "integer",
141 |             "description": "Were comfort measures under order at the beginning of the stay?"
142 |           },
143 |           {
144 |             "name": "cmo_last",
145 |             "type": "integer",
146 |             "description": "Were comfort measures under order at the end of the stay?"
147 |           },
148 |           {
149 |             "name": "cmo",
150 |             "type": "integer",
151 |             "description": "Were comfort measures ever under order during the stay?"
152 |           },
153 |           {
154 |             "name": "deathtime",
155 |             "type": "datetime",
156 |             "description": "Datetime of subject's death. NaN if subject did not die.",
157 |             "constraints" : {
158 |                 "required" : false
159 |             }
160 |           },
161 |           {
162 |             "name": "intime",
163 |             "type": "datetime",
164 |             "description": "Datetime of subject's intake into ICU.",
165 |             "constraints" : {
166 |                 "required" : true
167 |             }
168 |           },
169 |           {
170 |             "name": "outtime",
171 |             "type": "datetime",
172 |             "description": "Datetime of subject's exit from ICU.",
173 |             "constraints" : {
174 |                 "required" : true
175 |             }
176 |           },
177 |           {
178 |             "name": "los_icu",
179 |             "type": "number",
180 |             "description": "Length-of-stay in the ICU in days.",
181 |             "unit": "day",
182 |             "constraints" : {
183 |                 "required" : true
184 |             }
185 |           },
186 |           {
187 |             "name": "admission_type",
188 |             "type": "string",
189 |             "description": "Category of admission: {'ELECTIVE', 'EMERGENCY', 'URGENT'}.",
190 |             "pandas_dtype": "category"
191 |           },
192 |           {
193 |             "name": "first_careunit",
194 |             "type": "string",
195 |             "description": "Category of hospital unit where first admitted: {'CCU', 'CSRU', 'MICU', 'SICU', 'TSICU'}",
196 |             "pandas_dtype": "category"
197 |           },
198 |           {
199 |             "name": "mort_icu",
200 |             "type": "integer",
201 |             "description": "Indicates if subject died in ICU. 1 if died, 0 otherwise.",
202 |             "pandas_dtype": "integer"
203 |           },
204 |           {
205 |             "name": "mort_hosp",
206 |             "type": "integer",
207 |             "description": "Indicates if subject died in hospital. 1 if died, 0 otherwise.",
208 |             "pandas_dtype": "integer"
209 |           },
210 |           {
211 |             "name": "hospital_expire_flag",
212 |             "type": "integer",
213 |             "description": "TODO ???",
214 |             "pandas_dtype": "integer"
215 |           },
216 |           {
217 |             "name": "hospstay_seq",
218 |             "type": "integer",
219 |             "description": "TODO ???",
220 |             "pandas_dtype": "integer"
221 |           },
222 |           {
223 |             "name": "readmission_30",
224 |             "type": "integer",
225 |             "description": "Indicates if the patient will be readmitted to icu within 30 days. 1 if readmitted, 0 otherwise.",
226 |             "pandas_dtype": "integer"
227 |           }
228 |         ],
229 |       "missingValues": "nan",
230 |       "primaryKey": ["subject_id", "hadm_id", "icustay_id"]
231 |       }
232 |     }
233 |   ]
234 | }
235 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # **MIMIC-Extract**:A Data Extraction, Preprocessing, and Representation Pipeline for MIMIC-III
  2 | 
  3 | # About
  4 | This repo contains code for **MIMIC-Extract**. It has been divided into the following folders:
  5 | * Data: Locally contains the data to be extracted.
  6 | * Notebooks: Jupyter Notebooks demonstrating test cases and usage of output data in risk and intervention prediction tasks.
  7 | * Resources: Consist of Rohit_itemid.txt which describes the correlation of MIMIC-III item ids with those of MIMIC II as used by Rohit; itemid_to_variable_map.csv which is the main file used in data extraction - consists of groupings of item ids as well as which item ids are ready to extract; variable_ranges.csv which describes the normal variable ranges for the levels assisting in extraction of proper data. It also contains expected schema of output tables.
  8 | * Utils: scripts and detailed instructions for running **MIMIC-Extract** data pipeline.
  9 | * `mimic_direct_extract.py`: extraction script. 
 10 | 
 11 | # Paper
 12 | If you use this code in your research, please cite the following publication:
 13 | 
 14 | ```
 15 | Shirly Wang, Matthew B. A. McDermott, Geeticka Chauhan, Michael C. Hughes, Tristan Naumann, 
 16 | and Marzyeh Ghassemi. MIMIC-Extract: A Data Extraction, Preprocessing, and Representation 
 17 | Pipeline for MIMIC-III. arXiv:1907.08322. 
 18 | ```
 19 | 
 20 | # Pre-processed Output
 21 | If you simply wish to use the output of this pipeline in your own research, a preprocessed version with
 22 | default parameters is available via gcp,
 23 | [here](https://console.cloud.google.com/storage/browser/mimic_extract).
 24 | 
 25 | To access this, you will need to be credentialed for MIMIC-III GCP access through physionet. Instructions for
 26 | that are available [on physionet](https://mimic.physionet.org/gettingstarted/cloud/).
 27 | 
 28 | This output is released on an as-is basis, with no guarantees, but if you find any issues with it please let
 29 | us know via Github issues.
 30 | 
 31 | # Step-by-step instructions
 32 | The first several steps are the same here as above. These instructions are tested with mimic-code at version
 33 | 762943eab64deb30bdb2abcf7db43602ccb25908
 34 | 
 35 | ## Step 0: Required software and prereqs
 36 | 
 37 | Your local system should have the following executables on the PATH:
 38 | 
 39 | * conda
 40 | * psql (PostgreSQL 9.4 or higher)
 41 | * git
 42 | * MIMIC-iii psql relational database (Refer to [MIT-LCP Repo](https://github.com/MIT-LCP/mimic-code))
 43 | 
 44 | All instructions below should be executed from a terminal, with current directory set to utils/
 45 | 
 46 | ## Step 1: Create conda environment
 47 | 
 48 | Next, make a new conda environment from [mimic_extract_env_py36.yml](../mimic_extract_env_py36.yml) and
 49 | activate that environment.
 50 | 
 51 | ```
 52 | conda env create --force -f ../mimic_extract_env_py36.yml
 53 | ```
 54 | 
 55 | This step will _report failure on the pip installation stage_. This is not the end of the world. Instead,
 56 | simply activate the environment (which should work despite the former "failure"):
 57 | 
 58 | ```
 59 | conda activate mimic_data_extraction
 60 | ```
 61 | 
 62 | And then install any failed packages with pip (e.g., `pip install [package]`). This may include, in
 63 | particular, packages: `datapackage`, `spacy`, and `scispacy`.
 64 | You will also then need to install the english language model for spacy, via:
 65 | `python -m spacy download en_core_web_sm`
 66 | 
 67 | #### Expected Outcome
 68 | 
 69 | The desired enviroment will be created and activated.
 70 | 
 71 | #### Expected Resources
 72 | 
 73 | Will typically take less than 5 minutes.
 74 | Requires a good internet connection.
 75 | 
 76 | ## Step 3: Build Views for Feature Extraction
 77 | 
 78 | Materialized views in the MIMIC PostgreSQL database will be generated.
 79 | This includes all concept tables in [MIT-LCP Repo](https://github.com/MIT-LCP/mimic-code) and tables for
 80 | extracting non-mechanical ventilation, and injections of crystalloid bolus and colloid bolus.
 81 | 
 82 | Note that you need to have schema edit permission on your postgres user to make concepts in this way. First,
 83 | you must clone this github repository to a directory, which here we assume is stored in the environment
 84 | variable `$MIMIC_CODE_DIR`. After cloning, follow these instructions:
 85 | 
 86 | ```
 87 | cd $MIMIC_CODE_DIR/concepts
 88 | psql -d mimic -f postgres-functions.sql
 89 | bash postgres_make_concepts.sh
 90 | ```
 91 | 
 92 | Next, you'll need to build 3 additional materialized views necessary for this pipeline. To do this (again with
 93 | schema edit permission), navigate to `utils` and run `bash postgres_make_extended_concepts.sh` followed by
 94 | `psql -d mimic -f niv-durations.sql`.
 95 | 
 96 | ## Step 4: Set Cohort Selection and Extraction Criteria
 97 | 
 98 | Next, navigate to the root directory of _this repository_, activate your conda environment and run
 99 | `python mimic_direct_extract.py ...` with your args as desired.
100 | 
101 | #### Expected Outcome
102 | 
103 | The default setting will create an hdf5 file inside MIMIC_EXTRACT_OUTPUT_DIR with four tables:
104 | * **patients**: static demographics, static outcomes
105 |   * One row per (subj_id,hadm_id,icustay_id)
106 | 
107 | * **vitals_labs**: time-varying vitals and labs (hourly mean, count and standard deviation)
108 |   * One row per (subj_id,hadm_id,icustay_id,hours_in)
109 | 
110 | * **vitals_labs_mean**: time-varying vitals and labs (hourly mean only)
111 |   * One row per (subj_id,hadm_id,icustay_id,hours_in)
112 | 
113 | * **interventions**: hourly binary indicators for administered interventions
114 |   * One row per (subj_id,hadm_id,icustay_id,hours_in)
115 | 
116 | 
117 | #### Expected Resources
118 | 
119 | Will probably take 5-10 hours.
120 | Will require a good machine with at least 50GB RAM.
121 | 
122 | #### Setting the population size
123 | 
124 | By default, this step builds a dataset with all eligible patients. Sometimes, we wish to run with only a small subset of patients (debugging, etc.).
125 | 
126 | To do this, just set the POP_SIZE environmental variable. For example, to build a curated dataset with only the first 1000 patients, we could do:
127 | 
128 | 
129 | # Common Errors / FAQ:
130 |   1. When running `mimic_direct_extract.py`, I encounter an error of the form: 
131 |      ```
132 |         psycopg2.OperationalError: could not connect to server: No such file or directory
133 |         Is the server running locally and accepting
134 |         connections on Unix domain socket "/tmp/.s.PGSQL.5432"?
135 |      ```
136 |      or
137 |      ```
138 |         psycopg2.OperationalError: could not connect to server: No such file or directory
139 |         Is the server running locally and accepting
140 |         connections on Unix domain socket "/var/run/postgresql/..."?
141 |      ```
142 |      For this issue, see [this stackoverflow
143 |      post](https://stackoverflow.com/questions/5500332/cant-connect-the-postgresql-with-psycopg2) and use our
144 |      `--psql_host` argument, which you can pass either directly when calling `mimic_direct_extract.py` or use
145 |      via the Makefile instructions by setting the `HOST` environment variable.
146 |   2. `relation "code_status" does not exist`
147 |      In this error, the table `code_status` hasn't been built successfully, and you'll need to rebuild your
148 |      MIMIC-III concepts. Instructions for this can be found in Step 3 of either instruction set. Also see
149 |      below for our issues specific to building concepts.
150 | 
151 | ## Common Errors with Building Concepts
152 |   1. When I built concepts, the system complained it didn't have permissions to edit schema mimiciii. This
153 |      error indicates that your default psql user doesn't have authority to build concepts. You need to login
154 |      as a higher authority postgres user to and have it run the commands. This is common in setups where
155 |      multiple users have read-only access to postgres at once. If you do this, you may need to take extra
156 |      steps to expose the resulting concepts tables to other users.
157 |   2. I built concepts, but now the code can't see them. This can be for a few reasons - firslty, you may not
158 |      have permissions to read the new tables, and secondly, they may be in the wrong namespace. Our code
159 |      expects them to be fully visible and within the mimiciii namespace. To adjust these properties, login as
160 |      the owning postgres user and adjust the permissions and namespaces of those views manually. A few
161 |      commands that are relevant are:
162 |     * `ALTER TABLE code_status SET SCHEMA mimiciii;`
163 |     * `GRANT SELECT ON mimiciii.code_status TO [USER];`
164 |     Note that you'll need to run these on _every_ concepts table accessed by the script.
165 | 


--------------------------------------------------------------------------------
/heuristic_sentence_splitter.py:
--------------------------------------------------------------------------------
  1 | # Original source taken from https://github.com/wboag/mimic-tokenize/blob/master/heuristic-tokenize.py at
  2 | # commit e953d271bbb4c53aee5cc9a7b8be870a6b007604
  3 | 
  4 | import re, nltk
  5 | 
  6 | def is_inline_title(text):
  7 |     m = re.search('^([a-zA-Z ]+:) ', text)
  8 |     if not m: return False
  9 |     return is_title(m.groups()[0])
 10 | 
 11 | stopwords = set(['of', 'on', 'or'])
 12 | def is_title(text):
 13 |     if not text.endswith(':'): return False
 14 |     text = text[:-1]
 15 | 
 16 |     # be a little loose here... can tighten if it causes errors
 17 |     text = re.sub('(\([^\)]*?\))', '', text)
 18 | 
 19 |     # Are all non-stopwords capitalized?
 20 |     for word in text.split():
 21 |         if word in stopwords: continue
 22 |         if not word[0].isupper(): return False
 23 | 
 24 |     # I noticed this is a common issue (non-title aapears at beginning of line)
 25 |     if text == 'Disp': return False
 26 | 
 27 |     # optionally: could assert that it is less than 6 tokens
 28 |     return True
 29 | 
 30 | 
 31 | def sent_tokenize_rules(text):
 32 | 
 33 |     # long sections are OBVIOUSLY different sentences
 34 |     text = re.sub('---+', '\n\n-----\n\n', text)
 35 |     text = re.sub('___+', '\n\n_____\n\n', text)
 36 |     text = re.sub('\n\n+', '\n\n', text)
 37 | 
 38 |     segments = text.split('\n\n')
 39 | 
 40 |     # strategy: break down segments and chip away structure until just prose.
 41 |     #           once you have prose, use nltk.sent_tokenize()
 42 | 
 43 |     ### Separate section headers ###
 44 |     new_segments = []
 45 | 
 46 |     # deal with this one edge case (multiple headers per line) up front
 47 |     m1 = re.match('(Admission Date:) (.*) (Discharge Date:) (.*)', segments[0])
 48 |     if m1:
 49 |         new_segments += list(map(lambda s: s.strip(), m1.groups()))
 50 |         segments = segments[1:]
 51 | 
 52 |     m2 = re.match('(Date of Birth:) (.*) (Sex:) (.*)'            , segments[0])
 53 |     if m2:
 54 |         new_segments += list(map(lambda s: s.strip(), m2.groups()))
 55 |         segments = segments[1:]
 56 | 
 57 |     for segment in segments:
 58 |         # find all section headers
 59 |         possible_headers  = re.findall('\n([A-Z][^\n:]+:)', '\n'+segment)
 60 |         #assert len(possible_headers) < 2, str(possible_headers)
 61 |         headers = []
 62 |         for h in possible_headers:
 63 |             #print 'cand=[%s]' % h
 64 |             if is_title(h.strip()):
 65 |                 #print '\tYES=[%s]' % h
 66 |                 headers.append(h.strip())
 67 | 
 68 |         # split text into new segments, delimiting on these headers
 69 |         for h in headers:
 70 |             h = h.strip()
 71 | 
 72 |             # split this segment into 3 smaller segments
 73 |             ind = segment.index(h)
 74 |             prefix = segment[:ind].strip()
 75 |             rest   = segment[ ind+len(h):].strip()
 76 | 
 77 |             # add the prefix (potentially empty)
 78 |             if len(prefix) > 0:
 79 |                 new_segments.append(prefix.strip())
 80 | 
 81 |             # add the header
 82 |             new_segments.append(h)
 83 | 
 84 |             # remove the prefix from processing (very unlikely to be empty)
 85 |             segment = rest.strip()
 86 | 
 87 |         # add the final piece (aka what comes after all headers are processed)
 88 |         if len(segment) > 0:
 89 |             new_segments.append(segment.strip())
 90 | 
 91 |     # copy over the new list of segments (further segmented than original segments)
 92 |     segments = list(new_segments)
 93 |     new_segments = []
 94 | 
 95 | 
 96 |     ### Low-hanging fruit: "_____" is a delimiter
 97 |     for segment in segments:
 98 |         subsections = segment.split('\n_____\n')
 99 |         new_segments.append(subsections[0])
100 |         for ss in subsections[1:]:
101 |             new_segments.append('_____')
102 |             new_segments.append(ss)
103 | 
104 |     segments = list(new_segments)
105 |     new_segments = []
106 | 
107 | 
108 |     ### Low-hanging fruit: "-----" is a delimiter
109 |     for segment in segments:
110 |         subsections = segment.split('\n-----\n')
111 |         new_segments.append(subsections[0])
112 |         for ss in subsections[1:]:
113 |             new_segments.append('-----')
114 |             new_segments.append(ss)
115 | 
116 |     segments = list(new_segments)
117 |     new_segments = []
118 | 
119 |     '''
120 |     for segment in segments:
121 |         print '------------START------------'
122 |         print segment
123 |         print '-------------END-------------'
124 |         print
125 |     exit()
126 |     '''
127 | 
128 |     ### Separate enumerated lists ###
129 |     for segment in segments:
130 |         old_len = len(new_segments)
131 |         if not re.search('\n\s*\d+\.', '\n'+segment): 
132 |             new_segments.append(segment)
133 |             continue
134 | 
135 |         #print '------------START------------'
136 |         #print segment
137 |         #print '-------------END-------------'
138 |         #print
139 | 
140 |         # generalizes in case the list STARTS this section
141 |         segment = '\n'+segment
142 | 
143 |         # determine whether this segment contains a bulleted list (assumes i,i+1,...,n)
144 |         start = int(re.search('\n\s*(\d+)\.', segment).groups()[0])
145 |         n = start
146 |         while re.search('\n\s*%d\.'%n,segment):
147 |             n += 1
148 |         n -= 1
149 | 
150 |         # no bulleted list
151 |         if n < 1 or (n - start) == 0:
152 |             new_segments.append(segment)
153 |             continue
154 | 
155 |         #print '------------START------------'
156 |         #print segment
157 |         #print '-------------END-------------'
158 |         #print start,n
159 |         #print 
160 | 
161 |         # break each list into its own line
162 |         # challenge: not clear how to tell when the list ends if more text happens next
163 |         for i in range(start,n+1):
164 |             matching_text = re.search('(\n\s*\d+\.)',segment).groups()[0]
165 |             prefix  = segment[:segment.index(matching_text) ].strip()
166 |             segment = segment[ segment.index(matching_text):].strip()
167 | 
168 |             if len(prefix)>0:
169 |                 new_segments.append(prefix)
170 | 
171 |         if len(segment)>0:
172 |             new_segments.append(segment)
173 | 
174 | 
175 |         #print 'Out Segments:'
176 |         #for out_segment in new_segments[old_len:]:
177 |         #    print '------------START------------'
178 |         #    print out_segment
179 |         #    print '-------------END-------------'
180 |         #print('\n\n')
181 | 
182 |     segments = list(new_segments)
183 |     new_segments = []
184 | 
185 |     '''
186 |         TODO: Big Challenge
187 |         There is so much variation in what makes a list. Intuitively, I can tell it's a
188 |         list because it shows repeated structure (often following a header)
189 |         Examples of some lists (with numbers & symptoms changed around to noise)
190 |             Past Medical History:
191 |             -- Hyperlipidemia
192 |             -- lactose intolerance
193 |             -- Hypertension
194 |             Physical Exam:
195 |             Vitals - T 82.2 BP 123/23 HR 73 R 21 75% on 2L NC
196 |             General - well appearing male, sitting up in chair in NAD
197 |             Neck - supple, JVP elevated to angle of jaw 
198 |             CV - distant heart sounds, RRR, faint __PHI_43__ murmur at
199 |             Labs:
200 |             __PHI_10__ 12:00PM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8*
201 |             MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888
202 |             __PHI_14__ 04:54AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8*
203 |             MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888
204 |             __PHI_23__ 03:33AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8*
205 |             MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888
206 |             __PHI_109__ 03:06AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8*
207 |             MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888
208 |             __PHI_1__ 05:09AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8*
209 |             MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888
210 |             __PHI_26__ 04:53AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8*
211 |             MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888
212 |             __PHI_301__ 05:30AM BLOOD WBC-8.8 RBC-8.88* Hgb-88.8* Hct-88.8*
213 |             MCV-88 MCH-88.8 MCHC-88.8 RDW-88.8* Plt Ct-888
214 |             Medications on Admission:
215 |             Allopurinol 100 mg DAILY
216 |             Aspirin 250 mg DAILY
217 |             Atorvastatin 10 mg DAILY
218 |             Glimepiride 1 mg once a week.
219 |             Hexavitamin DAILY
220 |             Lasix 50mg M-W-F; 60mg T-Th-Sat-Sun
221 |             Metoprolol 12.5mg TID
222 |             Prilosec OTC 20 mg once a day
223 |             Verapamil 120 mg SR DAILY
224 |     '''
225 | 
226 |     ### Remove lines with inline titles from larger segments (clearly nonprose)
227 |     for segment in segments:
228 |         '''
229 |         With: __PHI_6__, MD __PHI_5__
230 |         Building: De __PHI_45__ Building (__PHI_32__ Complex) __PHI_87__
231 |         Campus: WEST
232 |         '''
233 | 
234 |         lines = segment.split('\n')
235 | 
236 |         buf = []
237 |         for line in lines:
238 |             if is_inline_title(line):
239 |                 if len(buf) > 0: new_segments.append('\n'.join(buf))
240 |                 buf = []
241 |             buf.append(line)
242 |         if len(buf) > 0:
243 |             new_segments.append('\n'.join(buf))
244 | 
245 |     segments = list(new_segments)
246 |     new_segments = []
247 | 
248 |     # Going to put one-liner answers with their sections 
249 |     # (aka A A' B B' C D D' -->  AA' BB' C DD' )
250 |     N = len(segments)
251 |     for i in range(N):
252 |         # avoid segfaults
253 |         if i==0:
254 |             new_segments.append(segments[i])
255 |             continue
256 |         if segments[i].count('\n') == 0 and is_title(segments[i-1]) and not is_title(segments[i]):
257 |             if (i == N-1) or is_title(segments[i+1]):
258 |                 new_segments = new_segments[:-1]
259 |                 new_segments.append(segments[i-1] + ' ' + segments[i])
260 |             else: new_segments.append(segments[i])
261 |         else:
262 |             new_segments.append(segments[i])
263 | 
264 |     segments = list(new_segments)
265 |     new_segments = []
266 | 
267 |     '''
268 |         Should do some kind of regex to find "TEST: value" in segments?
269 |             Indication: Source of embolism.
270 |             BP (mm Hg): 145/89
271 |             HR (bpm): 80
272 |         Note: I made a temporary hack that fixes this particular problem. 
273 |               We'll see how it shakes out
274 |     '''
275 | 
276 | 
277 |     '''
278 |         Separate ALL CAPS lines (Warning... is there ever prose that can be all caps?)
279 |     '''
280 | 
281 | 
282 | 
283 |     '''
284 |     for segment in segments:
285 |         print '------------START------------'
286 |         print segment
287 |         print '-------------END-------------'
288 |         print
289 |     exit()
290 |     '''
291 | 
292 |     return segments
293 | 


--------------------------------------------------------------------------------
/notebooks/mmd_grud_utils.py:
--------------------------------------------------------------------------------
  1 | import copy, math, os, pickle, time, pandas as pd, numpy as np, scipy.stats as ss
  2 | 
  3 | from sklearn.linear_model import LogisticRegression
  4 | from sklearn.ensemble import RandomForestClassifier
  5 | from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score
  6 | 
  7 | import torch, torch.utils.data as utils, torch.nn as nn, torch.nn.functional as F, torch.optim as optim
  8 | from torch.autograd import Variable
  9 | from torch.nn.parameter import Parameter
 10 | 
 11 | def to_3D_tensor(df):
 12 |     idx = pd.IndexSlice
 13 |     return np.dstack((df.loc[idx[:,:,:,i], :].values for i in sorted(set(df.index.get_level_values('hours_in')))))
 14 | def prepare_dataloader(df, Ys, batch_size, shuffle=True):
 15 |     """
 16 |     dfs = (df_train, df_dev, df_test).
 17 |     df_* = (subject, hadm, icustay, hours_in) X (level2, agg fn \ni {mask, mean, time})
 18 |     Ys_series = (subject, hadm, icustay) => label.
 19 |     """
 20 |     X     = torch.from_numpy(to_3D_tensor(df).astype(np.float32))
 21 |     label = torch.from_numpy(Ys.values.astype(np.int64))
 22 |     dataset = utils.TensorDataset(X, label)
 23 |     
 24 |     return utils.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, drop_last = True)
 25 | 
 26 | class FilterLinear(nn.Module):
 27 |     def __init__(self, in_features, out_features, filter_square_matrix, bias=True):
 28 |         '''
 29 |         filter_square_matrix : filter square matrix, whose each elements is 0 or 1.
 30 |         '''
 31 |         super(FilterLinear, self).__init__()
 32 |         self.in_features = in_features
 33 |         self.out_features = out_features
 34 |         
 35 |         assert in_features > 1 and out_features > 1, "Passing in nonsense sizes"
 36 |         
 37 |         use_gpu = torch.cuda.is_available()
 38 |         self.filter_square_matrix = None
 39 |         if use_gpu: self.filter_square_matrix = Variable(filter_square_matrix.cuda(), requires_grad=False)
 40 |         else:       self.filter_square_matrix = Variable(filter_square_matrix, requires_grad=False)
 41 |         
 42 |         self.weight = Parameter(torch.Tensor(out_features, in_features))
 43 | 
 44 |         if bias: self.bias = Parameter(torch.Tensor(out_features))
 45 |         else:    self.register_parameter('bias', None)
 46 |         self.reset_parameters()
 47 | 
 48 |     def reset_parameters(self):
 49 |         stdv = 1. / math.sqrt(self.weight.size(1))
 50 |         self.weight.data.uniform_(-stdv, stdv)
 51 |         if self.bias is not None: self.bias.data.uniform_(-stdv, stdv)
 52 | 
 53 |     def forward(self, x):
 54 |         return F.linear(
 55 |             x,
 56 |             self.filter_square_matrix.mul(self.weight),
 57 |             self.bias
 58 |         )
 59 | 
 60 |     def __repr__(self):
 61 |         return self.__class__.__name__ + '(' \
 62 |             + 'in_features=' + str(self.in_features) \
 63 |             + ', out_features=' + str(self.out_features) \
 64 |             + ', bias=' + str(self.bias is not None) + ')'
 65 |         
 66 | class GRUD(nn.Module):
 67 |     def __init__(self, input_size, cell_size, hidden_size, X_mean, batch_size = 0, output_last = False):
 68 |         """
 69 |         With minor modifications from https://github.com/zhiyongc/GRU-D/
 70 | 
 71 |         Recurrent Neural Networks for Multivariate Times Series with Missing Values
 72 |         GRU-D: GRU exploit two representations of informative missingness patterns, i.e., masking and time interval.
 73 |         cell_size is the size of cell_state.
 74 |         
 75 |         Implemented based on the paper: 
 76 |         @article{che2018recurrent,
 77 |           title={Recurrent neural networks for multivariate time series with missing values},
 78 |           author={Che, Zhengping and Purushotham, Sanjay and Cho, Kyunghyun and Sontag, David and Liu, Yan},
 79 |           journal={Scientific reports},
 80 |           volume={8},
 81 |           number={1},
 82 |           pages={6085},
 83 |           year={2018},
 84 |           publisher={Nature Publishing Group}
 85 |         }
 86 |         
 87 |         GRU-D:
 88 |             input_size: variable dimension of each time
 89 |             hidden_size: dimension of hidden_state
 90 |             mask_size: dimension of masking vector
 91 |             X_mean: the mean of the historical input data
 92 |         """
 93 |         
 94 |         super(GRUD, self).__init__()
 95 |         
 96 |         self.hidden_size = hidden_size
 97 |         self.delta_size = input_size
 98 |         self.mask_size = input_size
 99 |         
100 |         use_gpu = torch.cuda.is_available()
101 |         if use_gpu:
102 |             self.identity = torch.eye(input_size).cuda()
103 |             self.zeros = Variable(torch.zeros(batch_size, input_size).cuda())
104 |             self.zeros_h = Variable(torch.zeros(batch_size, self.hidden_size).cuda())
105 |             self.X_mean = Variable(torch.Tensor(X_mean).cuda())
106 |         else:
107 |             self.identity = torch.eye(input_size)
108 |             self.zeros = Variable(torch.zeros(batch_size, input_size))
109 |             self.zeros_h = Variable(torch.zeros(batch_size, self.hidden_size))
110 |             self.X_mean = Variable(torch.Tensor(X_mean))
111 |         
112 |         self.zl = nn.Linear(input_size + hidden_size + self.mask_size, hidden_size) # Wz, Uz are part of the same network. the bias is bz
113 |         self.rl = nn.Linear(input_size + hidden_size + self.mask_size, hidden_size) # Wr, Ur are part of the same network. the bias is br
114 |         self.hl = nn.Linear(input_size + hidden_size + self.mask_size, hidden_size) # W, U are part of the same network. the bias is b
115 |         
116 |         self.gamma_x_l = FilterLinear(self.delta_size, self.delta_size, self.identity)
117 |         
118 |         self.gamma_h_l = nn.Linear(self.delta_size, self.hidden_size) # this was wrong in available version. remember to raise the issue
119 |         
120 |         self.output_last = output_last
121 |         
122 |         self.fc = nn.Linear(self.hidden_size, 2)
123 |         self.bn= torch.nn.BatchNorm1d(2, eps=1e-05, momentum=0.1, affine=True)
124 |         self.drop=nn.Dropout(p=0.5, inplace=False)
125 |         
126 |     def step(self, x, x_last_obsv, x_mean, h, mask, delta):
127 |         """
128 |         Inputs:
129 |             x: input tensor
130 |             x_last_obsv: input tensor with forward fill applied
131 |             x_mean: the mean of each feature
132 |             h: the hidden state of the network
133 |             mask: the mask of whether or not the current value is observed
134 |             delta: the tensor indicating the number of steps since the last time a feature was observed.
135 |             
136 |         Returns:
137 |             h: the updated hidden state of the network
138 |         """
139 |         
140 |         batch_size = x.size()[0]
141 |         dim_size = x.size()[1]
142 |         
143 |         gamma_x_l_delta = self.gamma_x_l(delta)
144 |         delta_x = torch.exp(-torch.max(self.zeros, gamma_x_l_delta)) #exponentiated negative rectifier
145 |         
146 |         gamma_h_l_delta = self.gamma_h_l(delta)
147 |         delta_h = torch.exp(-torch.max(self.zeros_h, gamma_h_l_delta)) #self.zeros became self.zeros_h to accomodate hidden size != input size
148 |         
149 |         x_mean = x_mean.repeat(batch_size, 1)
150 |         
151 |         x = mask * x + (1 - mask) * (delta_x * x_last_obsv + (1 - delta_x) * x_mean)
152 |         h = delta_h * h
153 |         
154 |         combined = torch.cat((x, h, mask), 1)
155 |         z = torch.sigmoid(self.zl(combined)) #sigmoid(W_z*x_t + U_z*h_{t-1} + V_z*m_t + bz)
156 |         r = torch.sigmoid(self.rl(combined)) #sigmoid(W_r*x_t + U_r*h_{t-1} + V_r*m_t + br)
157 |         combined_new = torch.cat((x, r*h, mask), 1)
158 |         h_tilde = torch.tanh(self.hl(combined_new)) #tanh(W*x_t +U(r_t*h_{t-1}) + V*m_t) + b
159 |         h = (1 - z) * h + z * h_tilde
160 |         
161 |         return h
162 |     
163 |     def forward(self, X, X_last_obsv, Mask, Delta):
164 |         batch_size = X.size(0)
165 | #         type_size = input.size(1)
166 |         step_size = X.size(1) # num timepoints
167 |         spatial_size = X.size(2) # num features
168 |         
169 |         Hidden_State = self.initHidden(batch_size)
170 | #         X = torch.squeeze(input[:,0,:,:])
171 | #         X_last_obsv = torch.squeeze(input[:,1,:,:])
172 | #         Mask = torch.squeeze(input[:,2,:,:])
173 | #         Delta = torch.squeeze(input[:,3,:,:])
174 |         
175 |         outputs = None
176 |         for i in range(step_size):
177 |             Hidden_State = self.step(
178 |                 torch.squeeze(X[:,i:i+1,:], 1),
179 |                 torch.squeeze(X_last_obsv[:,i:i+1,:], 1),
180 |                 torch.squeeze(self.X_mean[:,i:i+1,:], 1),
181 |                 Hidden_State,
182 |                 torch.squeeze(Mask[:,i:i+1,:], 1),
183 |                 torch.squeeze(Delta[:,i:i+1,:], 1),
184 |             )
185 |             if outputs is None:
186 |                 outputs = Hidden_State.unsqueeze(1)
187 |             else:
188 |                 outputs = torch.cat((Hidden_State.unsqueeze(1), outputs), 1)
189 |                 
190 |         # we want to predict a binary outcome
191 |         #Apply 50% dropout and batch norm here
192 |         self.drop(self.bn(self.fc(Hidden_State)))
193 |         return self.drop(self.bn(self.fc(Hidden_State)))
194 |                 
195 | #         if self.output_last:
196 | #             return outputs[:,-1,:]
197 | #         else:
198 | #             return outputs
199 |     
200 |     def initHidden(self, batch_size):
201 |         use_gpu = torch.cuda.is_available()
202 |         if use_gpu:
203 |             Hidden_State = Variable(torch.zeros(batch_size, self.hidden_size).cuda())
204 |             return Hidden_State
205 |         else:
206 |             Hidden_State = Variable(torch.zeros(batch_size, self.hidden_size))
207 |             return Hidden_State
208 | 
209 |         
210 | def Train_Model(
211 |     model, train_dataloader, valid_dataloader, num_epochs = 300, patience = 3, min_delta = 1e-5, learning_rate=1e-3, batch_size=None
212 | ):
213 |     
214 |     print('Model Structure: ', model)
215 |     print('Start Training ... ')
216 |     
217 |     model
218 |     
219 |     if (type(model) == nn.modules.container.Sequential):
220 |         output_last = model[-1].output_last
221 |         print('Output type dermined by the last layer')
222 |     else:
223 |         output_last = model.output_last
224 |         print('Output type dermined by the model')
225 |         
226 |     loss_MSE = torch.nn.MSELoss()
227 |     loss_nll=torch.nn.NLLLoss()
228 |     loss_CEL=torch.nn.CrossEntropyLoss()
229 |     loss_L1 = torch.nn.L1Loss()
230 |     
231 | #     optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate, alpha=0.99)
232 |     optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
233 |     use_gpu = False#torch.cuda.is_available()
234 |     
235 |     interval = 100
236 |     losses_train = []
237 |     losses_valid = []
238 |     losses_epochs_train = []
239 |     losses_epochs_valid = []
240 |     
241 |     cur_time = time.time()
242 |     pre_time = time.time()
243 |     
244 |     # Variables for Early Stopping
245 |     is_best_model = 0
246 |     patient_epoch = 0
247 |     for epoch in range(num_epochs):
248 |         
249 |         trained_number = 0
250 |         
251 |         valid_dataloader_iter = iter(valid_dataloader)
252 |         
253 |         losses_epoch_train = []
254 |         losses_epoch_valid = []
255 |         
256 |         for X, labels in train_dataloader:
257 |             X = X.numpy()
258 |             mask        = torch.from_numpy(X[:, np.arange(0, X.shape[1], 3), :].astype(np.float32))
259 |             measurement = torch.from_numpy(X[:, np.arange(1, X.shape[1], 3), :].astype(np.float32))
260 |             time_       = torch.from_numpy(X[:, np.arange(2, X.shape[1], 3), :].astype(np.float32))
261 |             
262 |             mask = torch.transpose(mask, 1, 2)
263 |             measurement = torch.transpose(measurement, 1, 2)
264 |             time_ = torch.transpose(time_, 1, 2)
265 |             measurement_last_obsv = measurement            
266 | 
267 |             assert measurement.size()[0] == batch_size, "Batch Size doesn't match! %s" % str(measurement.size())
268 | 
269 |             if use_gpu:
270 |                 convert_to_cuda=lambda x: Variable(x.cuda())
271 |                 X, X_last_obsv, Mask, Delta, labels = map(convert_to_cuda, [measurement, measurement_last_obsv, mask, time_, labels])
272 |             else: 
273 | #                 inputs, labels = Variable(inputs), Variable(labels)
274 |                 convert_to_tensor=lambda x: Variable(x)
275 |                 X, X_last_obsv, Mask, Delta, labels  = map(convert_to_tensor, [measurement, measurement_last_obsv, mask, time_, labels])
276 |             
277 |             model.zero_grad()
278 | 
279 | #             outputs = model(inputs)
280 |             prediction=model(X, X_last_obsv, Mask, Delta)
281 |     
282 | #             print(torch.sum(torch.sum(torch.isnan(prediction))))
283 |             
284 | #             print(labels.shape)
285 | #             print(prediction.shape)
286 |             
287 |             if output_last:
288 |                 loss_train = loss_CEL(torch.squeeze(prediction), torch.squeeze(labels))
289 |             else:
290 |                 full_labels = torch.cat((inputs[:,1:,:], labels), dim = 1)
291 |                 loss_train = loss_MSE(outputs, full_labels)
292 |         
293 |             losses_train.append(loss_train.data)
294 |             losses_epoch_train.append(loss_train.data)
295 |             
296 |             optimizer.zero_grad()
297 |             
298 |             loss_train.backward()
299 |             
300 |             optimizer.step()
301 |             
302 |              # validation 
303 |             try: 
304 |                 X_val, labels_val = next(valid_dataloader_iter)
305 |                 X_val = X_val.numpy()
306 |                 mask_val        = torch.from_numpy(X_val[:, np.arange(0, X_val.shape[1], 3), :].astype(np.float32))
307 |                 measurement_val = torch.from_numpy(X_val[:, np.arange(1, X_val.shape[1], 3), :].astype(np.float32))
308 |                 time_val       = torch.from_numpy(X_val[:, np.arange(2, X_val.shape[1], 3), :].astype(np.float32))
309 |             
310 |                 mask_val = torch.transpose(mask_val, 1, 2)
311 |                 measurement_val = torch.transpose(measurement_val, 1, 2)
312 |                 time_val = torch.transpose(time_val, 1, 2)
313 |                 measurement_last_obsv_val = measurement_val
314 |             except StopIteration:
315 |                 valid_dataloader_iter = iter(valid_dataloader)
316 |                 X_val, labels_val = next(valid_dataloader_iter)
317 |                 X_val = X_val.numpy()
318 |                 mask_val        = torch.from_numpy(X_val[:, np.arange(0, X_val.shape[1], 3), :].astype(np.float32))
319 |                 measurement_val = torch.from_numpy(X_val[:, np.arange(1, X_val.shape[1], 3), :].astype(np.float32))
320 |                 time_val       = torch.from_numpy(X_val[:, np.arange(2, X_val.shape[1], 3), :].astype(np.float32))
321 |             
322 |                 mask_val = torch.transpose(mask_val, 1, 2)
323 |                 measurement_val = torch.transpose(measurement_val, 1, 2)
324 |                 time_val = torch.transpose(time_val, 1, 2)
325 |                 measurement_last_obsv_val = measurement_val
326 |             
327 |             if use_gpu:
328 |                 convert_to_cuda=lambda x: Variable(x.cuda())
329 |                 X_val, X_last_obsv_val, Mask_val, Delta_val, labels_val = map(convert_to_cuda, [measurement_val, measurement_last_obsv_val, mask_val, time_val, labels_val])
330 |             else: 
331 | #                 inputs, labels = Variable(inputs), Variable(labels)
332 |                 convert_to_tensor=lambda x: Variable(x)
333 |                 X_val, X_last_obsv_val, Mask_val, Delta_val, labels_val = map(convert_to_tensor, [measurement_val, measurement_last_obsv_val, mask_val, time_val, labels_val])
334 |             
335 |                 
336 |             model.zero_grad()
337 |             
338 | #             outputs_val = model(inputs_val)
339 |             prediction_val = model(X_val, X_last_obsv_val, Mask_val, Delta_val)
340 |     
341 | #             print(labels.shape)
342 | #             print(prediction_val.shape)
343 |             
344 |             if output_last:
345 |                 loss_valid =loss_CEL(torch.squeeze(prediction_val), torch.squeeze(labels_val))
346 |             else:
347 |                 raise NotImplementedError("Should be output last!")
348 |                 full_labels_val = torch.cat((inputs_val[:,1:,:], labels_val), dim = 1)
349 |                 loss_valid = loss_MSE(outputs_val, full_labels_val)
350 | 
351 |             losses_valid.append(loss_valid.data)
352 |             losses_epoch_valid.append(loss_valid.data)
353 |             
354 | #             print(sklearn.metrics.roc_auc_score(labels_val.detach().cpu().numpy(), prediction_val.detach().cpu().numpy()[:,1]))
355 |             
356 |             # output
357 |             trained_number += 1
358 |             
359 |         avg_losses_epoch_train = sum(losses_epoch_train).cpu().numpy() / float(len(losses_epoch_train))
360 |         avg_losses_epoch_valid = sum(losses_epoch_valid).cpu().numpy() / float(len(losses_epoch_valid))
361 |         losses_epochs_train.append(avg_losses_epoch_train)
362 |         losses_epochs_valid.append(avg_losses_epoch_valid)
363 |         
364 |         
365 |         # Early Stopping
366 |         if epoch == 0:
367 |             is_best_model = 1
368 |             best_model = model
369 |             min_loss_epoch_valid = 10000.0
370 |             if avg_losses_epoch_valid < min_loss_epoch_valid:
371 |                 min_loss_epoch_valid = avg_losses_epoch_valid
372 |         else:
373 |             if min_loss_epoch_valid - avg_losses_epoch_valid > min_delta:
374 |                 is_best_model = 1
375 |                 best_model = model
376 |                 min_loss_epoch_valid = avg_losses_epoch_valid 
377 |                 patient_epoch = 0
378 |             else:
379 |                 is_best_model = 0
380 |                 patient_epoch += 1
381 |                 if patient_epoch >= patience:
382 |                     print('Early Stopped at Epoch:', epoch)
383 |                     break
384 |         
385 |         # Print training parameters
386 |         cur_time = time.time()
387 |         print('Epoch: {}, train_loss: {}, valid_loss: {}, time: {}, best model: {}'.format( \
388 |                     epoch, \
389 |                     np.around(avg_losses_epoch_train, decimals=8),\
390 |                     np.around(avg_losses_epoch_valid, decimals=8),\
391 |                     np.around([cur_time - pre_time] , decimals=2),\
392 |                     is_best_model) )
393 |         pre_time = cur_time
394 | #         if epoch==1:
395 | #             break
396 |                 
397 |     return best_model, [losses_train, losses_valid, losses_epochs_train, losses_epochs_valid]
398 | 
399 | def predict_proba(model, dataloader):
400 |     """
401 |     Input:
402 |         model: GRU-D model
403 |         test_dataloader: containing batches of measurement, measurement_last_obsv, mask, time_, labels
404 |     Returns:
405 |         predictions: size[num_samples, 2]
406 |         labels: size[num_samples]
407 |     """
408 |     model.eval()
409 |     use_gpu = False# torch.cuda.is_available()
410 |     
411 |     probabilities = []
412 |     labels        = []
413 |     ethnicities   = []
414 |     genders       = []
415 |     for X, label in dataloader:
416 |         X = X.numpy()
417 |         mask        = torch.from_numpy(X[:, np.arange(0, X.shape[1], 3), :].astype(np.float32))
418 |         measurement = torch.from_numpy(X[:, np.arange(1, X.shape[1], 3), :].astype(np.float32))
419 |         time_       = torch.from_numpy(X[:, np.arange(2, X.shape[1], 3), :].astype(np.float32))
420 | 
421 |         mask = torch.transpose(mask, 1, 2)
422 |         measurement = torch.transpose(measurement, 1, 2)
423 |         time_ = torch.transpose(time_, 1, 2)
424 |         measurement_last_obsv = measurement            
425 | 
426 |         if use_gpu:
427 |             convert_to_cuda=lambda x: Variable(x.cuda())
428 |             X, X_last_obsv, Mask, Delta, label = map(convert_to_cuda, [measurement, measurement_last_obsv, mask, time_, label])
429 |         else: 
430 | #                 inputs, labels = Variable(inputs), Variable(labels)
431 |             convert_to_tensor=lambda x: Variable(x)
432 |             X, X_last_obsv, Mask, Delta, label  = map(convert_to_tensor, [measurement, measurement_last_obsv, mask, time_, label])
433 | 
434 |         
435 |         prob = model(X, X_last_obsv, Mask, Delta)
436 |         
437 |         probabilities.append(prob.detach().cpu().data.numpy())
438 |         labels.append(label.detach().cpu().data.numpy())
439 | 
440 |     return probabilities, labels
441 | 


--------------------------------------------------------------------------------
/resources/item_id_stat.csv:
--------------------------------------------------------------------------------
  1 | itemid,label,LEVEL1,LEVEL2,mean,stdev,missing percent
  2 | 211,heart rate,heart rate,heart rate,85.11720532842587,17.145789289466023,48.41582332025113
  3 | 618,respiratory rate,respiratory rate,respiratory rate,19.356590679247063,5.721490437680727,49.91458249468185
  4 | 646,spo2,pulse oximetry,oxygen saturation,97.01037398666034,3.4533884699224116,51.633746093739354
  5 | 220045,heart rate,heart rate,heart rate,84.77979402854999,17.43969038021443,61.99548014179306
  6 | 220210,respiratory rate,respiratory rate,respiratory rate,19.24884122911674,5.520934157300327,62.25504940130507
  7 | 220277,o2 saturation pulseoxymetry,pulse oximetry,oxygen saturation,96.6730422133314,3.184367241856636,63.02507912477952
  8 | 455,nbp [systolic],systolic blood pressure (noninvasive),systolic blood pressure,121.23039763958806,22.036006580520866,71.97837846679212
  9 | 8441,nbp [diastolic],diastolic blood pressure (noninvasive),diastolic blood pressure,59.00310847406039,14.830207043261264,72.00423089260384
 10 | 456,nbp mean,mean blood pressure (noninvasive),mean blood pressure,78.27484762141329,14.700985120201974,72.43122754950808
 11 | 51,arterial bp [systolic],systolic blood pressure (arterial),systolic blood pressure,122.45984223083545,24.380945746102867,74.59578891698781
 12 | 8368,arterial bp [diastolic],diastolic blood pressure (arterial),diastolic blood pressure,60.19914091791762,13.450017343017635,74.59724283197195
 13 | 52,arterial bp mean,mean blood pressure (arterial),mean blood pressure,81.53031031841128,16.761669175274545,74.79583853183665
 14 | 220181,non invasive blood pressure mean,mean blood pressure (noninvasive),mean blood pressure,77.18891678098602,15.008928988881522,74.9390491577743
 15 | 220179,non invasive blood pressure systolic,systolic blood pressure (noninvasive),systolic blood pressure,121.05626381891265,21.445186669208272,75.103523290355
 16 | 220180,non invasive blood pressure diastolic,diastolic blood pressure (noninvasive),diastolic blood pressure,63.59508798924918,14.965460189200936,75.11460939210906
 17 | 198,gcs total,glascow coma scale total,glascow coma scale total,12.491195240149807,3.5862187630327247,82.83530687147483
 18 | 113,cvp,central venous pressure,central venous pressure,10.632580330008674,5.414234986670563,86.38276856308673
 19 | 220052,arterial blood pressure mean,mean blood pressure (arterial),mean blood pressure,81.1309772550919,18.640504934574423,86.91499231696801
 20 | 220050,arterial blood pressure systolic,systolic blood pressure (arterial),systolic blood pressure,121.49062275107978,22.30167277954901,86.9640619476827
 21 | 220051,arterial blood pressure diastolic,diastolic blood pressure (arterial),diastolic blood pressure,60.3850337967619,13.42507863132073,86.9679239093593
 22 | 678,temperature f,temperature (f),temperature,36.94417537612146,0.7951733965056856,87.36275269723947
 23 | 677,temperature c (calc),temperature (c),temperature,36.944185993850084,0.7951668078228821,87.36288900176923
 24 | 223761,temperature fahrenheit,temperature (f),temperature,36.8494655237953,0.7444853516121963,90.31997034013432
 25 | 492,pap [systolic],pulmonary artery pressure systolic,pulmonary artery pressure systolic,38.42374784657562,12.589739189239921,92.71520440681633
 26 | 811,glucose (70-105),glucose,glucose,135.827665056469,53.77577466184741,93.23734162549513
 27 | 190,fio2 set,fraction inspired oxygen set,fraction inspired oxygen set,0.5250663193567437,0.18214605265781275,93.51322199373544
 28 | 807,fingerstick glucose,glucose,glucose,144.54007815267448,57.77352341407985,93.84339699966469
 29 | 220074,central venous pressure,central venous pressure,central venous pressure,13.714740936021288,27.775085476211146,93.96938782000896
 30 | 679,temperature f (calc),temperature (f),temperature,37.243650675696934,0.7381676307439387,94.11691475605578
 31 | 676,temperature c,temperature (c),temperature,37.24364465740107,0.738169804038111,94.11691475605578
 32 | 829,potassium (3.5-5.3),potassium,potassium,4.126247210307182,0.6472945535234909,94.25894407606883
 33 | 813,hematocrit,hematocrit,hematocrit,30.716222552062003,4.9231926598853635,94.85468574081966
 34 | 1529,glucose,glucose,glucose,132.77146570845525,52.14488532909935,94.87903881680398
 35 | 225664,glucose finger stick,glucose (finger stick),glucose,150.2282125131326,60.67695105335787,95.40149407938557
 36 | 780,arterial ph,ph (arterial),ph,7.383238570089456,0.0755647129939972,95.50749356869794
 37 | 506,peep set,positive end-expiratory pressure set,positive end-expiratory pressure set,6.159088139452178,2.9228705568923634,95.60031695346655
 38 | 223835,inspired o2 fraction,fraction inspired oxygen,fraction inspired oxygen,0.5334754600604382,0.18898523530873748,95.7089062288444
 39 | 1535,potassium,potassium,potassium,4.114669913878324,0.6396563912120975,95.70913340306068
 40 | 615,resp rate (total),respiratory rate (total),respiratory rate,18.586152348383074,6.2297633113521735,95.72290016056674
 41 | 777,arterial co2(calc),"co2 (etco2, pco2, etc.)","co2 (etco2, pco2, etc.)",25.18672696759632,5.28707499563618,95.78560024425772
 42 | 778,arterial paco2,partial pressure of carbon dioxide (arterial),partial pressure of carbon dioxide,40.84872077048675,9.114166286220346,95.78882611812878
 43 | 779,arterial pao2,partial pressure of oxygen,partial pressure of oxygen,145.8220365787322,84.89126553607426,95.79586851883319
 44 | 1126,art.ph,ph (arterial),ph,7.383119686812778,0.07519230566723935,95.80450113905152
 45 | 837,sodium (135-148),sodium,sodium,138.86951490281783,5.113445697937651,96.09151304388915
 46 | 116,cardiac index,cardiac index,cardiac index,2.868186533308795,0.8419530320916853,96.45135700246348
 47 | 791,creatinine (0-1.3),creatinine,creatinine,1.398509939491261,1.4961551426642252,96.49088531609475
 48 | 781,bun (6-20),blood urea nitrogen,blood urea nitrogen,26.59446128625145,22.09455559225942,96.50792338231511
 49 | 220545,hematocrit (serum),hematocrit (serum),hematocrit,30.550602759811184,5.22841861296835,96.50874120949369
 50 | 828,platelets,platelets,platelets,196.81989868553214,108.8892593297763,96.53195841439667
 51 | 821,magnesium (1.6-2.6),magnesium,magnesium,2.0332414522629136,0.4060717620618477,96.53522972311099
 52 | 788,chloride (100-112),chloride,chloride,105.61685393752943,6.145410060187982,96.55890127644649
 53 | 787,carbon dioxide,co2,co2,24.108687983137326,4.803093298652404,96.56553476356163
 54 | 814,hemoglobin,hemoglobin,hemoglobin,10.627113882198122,1.7245841985282473,96.57657543047242
 55 | 626,svr,systemic vascular resistance,systemic vascular resistance,996.5979018775078,354.7224767830885,96.59320458310351
 56 | 861,"wbc (4-11,000)",white blood cell count,white blood cell count,12.243439531219247,9.996575276533045,96.77058221116843
 57 | 227442,potassium (serum),potassium (serum),potassium serum,4.0906582939391685,0.8737147916349557,96.7802598327816
 58 | 833,rbc,red blood cell count,red blood cell count,3.504754382643194,0.6045657055454036,96.7996605108512
 59 | 220645,sodium (serum),sodium (serum),sodium,138.84060664795905,5.370440267953136,96.85890754645486
 60 | 220602,chloride (serum),chloride (serum),chloride,105.3458225167868,6.377004562546435,96.8671312530839
 61 | 491,pap mean,pulmonary artery pressure mean,pulmonary artery pressure mean,29.590011224348547,9.286626776198654,96.91633718832834
 62 | 1127,"wbc   (4-11,000)",white blood cell count,white blood cell count,12.248704920458804,10.162362458486854,96.92974046708837
 63 | 90,c.o.(thermodilution),cardiac output thermodilution,cardiac output thermodilution,5.683717102431174,1.8687379380008704,96.95336658558061
 64 | 1536,sodium,sodium,sodium,138.9270348296241,5.084821897778887,96.988805763319
 65 | 220339,peep set,positive end-expiratory pressure set,positive end-expiratory pressure set,6.162566992846121,2.859692347492791,97.00170925880323
 66 | 220615,creatinine,creatinine,creatinine,1.3813086178762408,1.4321308870645644,97.02070102328354
 67 | 225624,bun,blood urea nitrogen,blood urea nitrogen,25.880015928444525,21.425976311535525,97.033468214238
 68 | 227443,hco3 (serum),bicarbonate (serum),bicarbonate,24.42982683782072,4.749466430341764,97.05868455224417
 69 | 220621,glucose (serum),glucose (serum),glucose,136.1707688338493,61.499888212575534,97.06490912577
 70 | 619,respiratory rate set,respiratory rate set,respiratory rate set,14.911526170363905,5.719745146331646,97.12238420248674
 71 | 227073,anion gap,anion gap,anion gap,13.314896685047854,3.7742659156408966,97.13269791190548
 72 | 220635,magnesium,magnesium,magnesium,2.0695281239420478,0.4079584270012105,97.1817221077769
 73 | 220228,hemoglobin,hemoglobin,hemoglobin,10.477677307842182,1.8508923343673749,97.20048669804095
 74 | 227457,platelet count,platelets,platelets,203.92415520213353,113.77754779961387,97.20602974891797
 75 | 682,tidal volume (obser),tidal volume observed,tidal volume observed,575.1758166224059,124.72789449238428,97.22901977960467
 76 | 827,phosphorous(2.7-4.5),phosphorous,phosphorous,3.5141597348168134,1.46456848724299,97.2461032806683
 77 | 224685,tidal volume (observed),tidal volume observed,tidal volume observed,504.80537478573103,2208.851953853323,97.2645043921863
 78 | 220546,wbc,white blood cell count,white blood cell count,11.623559214481318,9.688906692189475,97.27917984655745
 79 | 1525,creatinine,creatinine,creatinine,1.395167748555619,1.4718179302730412,97.28426854900192
 80 | 1162,bun,blood urea nitrogen,blood urea nitrogen,26.389558030539735,21.956304746558512,97.29530921591274
 81 | 786,calcium (8.4-10.2),calcium,calcium,8.272538015442466,0.8114345548114279,97.29980726539492
 82 | 1532,magnesium,magnesium,magnesium,2.0579964973335714,0.41366519694224513,97.3105753232462
 83 | 1523,chloride,chloride,chloride,105.77032572163749,6.1752064804362945,97.32784056368284
 84 | 614,resp rate (spont),respiratory rate (spontaneous),respiratory rate,2.848259597078965,5.030844616613539,97.33501926891702
 85 | 224695,peak insp. pressure,peak inspiratory pressure,peak inspiratory pressure,20.40891912516437,6.132278461374045,97.34592363129806
 86 | 683,tidal volume (set),tidal volume set,tidal volume set,565.2982436312367,129.72198707778733,97.34674145847664
 87 | 224689,respiratory rate (spontaneous),respiratory rate (spontaneous),respiratory rate,9.452999280226122,10.767978594938004,97.38113563482017
 88 | 825,ptt(22-35),partial thromboplastin time,partial thromboplastin time,42.1014856999121,24.924086355778606,97.41107719652477
 89 | 816,ionized calcium,calcium ionized,calcium ionized,1.4548033491300119,7.258700266632667,97.43056874428089
 90 | 535,peak insp. pressure,peak inspiratory pressure,peak inspiratory pressure,25.318576669825053,6.144567138276635,97.45723899727118
 91 | 225677,phosphorous,phosphorous,phosphorous,3.408323303813618,1.35993873135611,97.47068771087447
 92 | 225625,calcium non-ionized,calcium non-ionized,calcium,8.33335374198663,2.8973194475055513,97.47691228440031
 93 | 223830,ph (arterial),ph (arterial),ph,7.380746273155249,0.08131153657111036,97.48758947256508
 94 | 1542,wbc,white blood cell count,white blood cell count,12.189169491832727,10.4186935629412,97.49385948093418
 95 | 815,inr (2-4 ref. range),prothrombin time,prothrombin time inr,1.5276120216548172,1.3212495648680445,97.55496934511126
 96 | 824,pt(11-13.5),prothrombin time,prothrombin time pt,15.412690379706046,5.394941850258561,97.55578717228983
 97 | 220235,arterial co2 pressure,partial pressure of carbon dioxide (arterial),partial pressure of carbon dioxide,41.157794018595105,9.434863928206854,97.59408874515324
 98 | 225698,tco2 (calc) arterial,"co2 (etco2, pco2, etc.)","co2 (etco2, pco2, etc.)",25.19359657241436,5.174422890507092,97.59454309358578
 99 | 220059,pulmonary artery pressure systolic,pulmonary artery pressure systolic,pulmonary artery pressure systolic,37.026091266290216,11.13907184662926,97.69445431390206
100 | 1534,phosphorous,phosphorous,phosphorous,3.4739764333108893,1.4325795387739864,97.7672409327955
101 | 1522,calcium,calcium,calcium,8.296089334144861,0.8157925353620858,97.79468357812112
102 | 224690,respiratory rate (total),respiratory rate (total),respiratory rate,18.893311217735548,5.663158357836307,97.90336372318549
103 | 543,plateau pressure,plateau pressure,plateau pressure,20.721389808057822,6.00324975148783,97.9616566270808
104 | 1533,ptt,partial thromboplastin time,partial thromboplastin time,41.441392212163706,24.26243544442964,98.00241168147994
105 | 227466,ptt,partial thromboplastin time,partial thromboplastin time,42.03303366894534,24.972508806894062,98.0324895477143
106 | 1530,inr,prothrombin time,prothrombin time inr,1.5209415558434443,1.2103463697710057,98.10727529971093
107 | 1286,pt,prothrombin time,prothrombin time pt,15.614373679408336,5.820295850553224,98.10772964814348
108 | 227467,inr,prothrombin time,prothrombin time inr,1.5048745598589854,0.8876349421905448,98.14189665027074
109 | 227465,prothrombin time,prothrombin time,prothrombin time pt,16.52679300648027,7.5145446551246105,98.141942085114
110 | 684,tidal volume (spont),tidal volume spontaneous,tidal volume spontaneous,457.21894691863207,194.06437358994376,98.1827425743564
111 | 224688,respiratory rate (set),respiratory rate set,respiratory rate set,16.76014530811976,10.680771899698309,98.24158069637076
112 | 224684,tidal volume (set),tidal volume set,tidal volume set,489.39459059415026,88.52825285612052,98.31105057170663
113 | 226537,glucose (whole blood),glucose (whole blood),glucose,131.78947783061918,42.03276652546779,98.42363811329086
114 | 834,sao2,oxygen saturation,oxygen saturation,96.68015091015981,3.3690289177241617,98.44817292864822
115 | 225667,ionized calcium,calcium ionized,calcium ionized,1.1375018536010573,0.8987666521246573,98.508873879236
116 | 224686,tidal volume (spontaneous),tidal volume spontaneous,tidal volume spontaneous,531.9146441360622,3355.6396737253526,98.62500533859408
117 | 223762,temperature celsius,temperature (c),temperature,37.060083241731,0.9077621258050494,98.69515673657877
118 | 225668,lactic acid,lactic acid,lactic acid,2.5086622141267125,2.3069009807165988,98.72786982372189
119 | 818,lactic acid(0.5-2.0),lactic acid,lactic acid,2.9755073601570334,3.277799186893734,98.84254736809584
120 | 226531,admission weight (lbs.),"weight (lbs, admission)",weight,80.71517796342529,23.36829893729347,98.85131629284392
121 | 225312,art bp mean,mean blood pressure (arterial),mean blood pressure,79.65383604325228,18.70728138686971,98.86917218624288
122 | 225309,art bp systolic,systolic blood pressure (arterial),systolic blood pressure,115.30353914605523,23.739399173556016,98.87335219182228
123 | 225310,art bp diastolic,diastolic blood pressure (arterial),diastolic blood pressure,59.27203190742385,14.449988507288586,98.8741245841576
124 | 224696,plateau pressure,plateau pressure,plateau pressure,19.305944780698116,4.997881593930517,98.90402071101894
125 | 227464,potassium (whole blood),potassium (whole blood),potassium,4.236190486219362,0.7210840843788271,98.92133138629885
126 | 1531,lactic acid,lactic acid,lactic acid,2.8969093902457725,3.2320433870719487,98.99729844422009
127 | 763,daily weight,weight (daily),weight,84.26841476030812,22.987743827178928,99.18994217961847
128 | 224,iabp mean,mean blood pressure (arterial),mean blood pressure,81.4670694434781,14.49083602684611,99.26218358039287
129 | 89,c.o. (fick),cardiac output fick,cardiac output fick,5.6815246559783406,1.9843659375245477,99.26559119363694
130 | 220227,arterial o2 saturation,oxygen saturation (arterial),oxygen saturation,96.1006185673329,4.109073848979552,99.2657274981667
131 | 224639,daily weight,weight (daily),weight,86.32265755329007,23.65570696817214,99.31366125780002
132 | 225690,total bilirubin,bilirubin (total),bilirubin,2.6861728143468806,5.160533122108008,99.33115367245294
133 | 770,ast,asparate aminotransferase,asparate aminotransferase,404.2892781678141,1299.0477671154677,99.33342541461566
134 | 220587,ast,asparate aminotransferase,asparate aminotransferase,347.40981595092023,1239.8913868625452,99.33347084945892
135 | 769,alt,alanine aminotransferase,alanine aminotransferase,335.13654895009546,984.894038593404,99.33356171914542
136 | 220644,alt,alanine aminotransferase,alanine aminotransferase,281.0920128231362,907.0054864293936,99.3338797630482
137 | 226512,admission weight (kg),"weight (kg, admission)",weight,80.77219622953048,22.47144631077177,99.3396499881415
138 | 224700,total peep level,positive end-expiratory pressure (total),positive end-expiratory pressure,7.364976207137844,3.4632607918855878,99.35073608989556
139 | 225612,alkaline phosphate,alkaline phosphate,alkaline phosphate,120.11637508747376,146.4889185297922,99.35073608989556
140 | 773,alk. phosphate,alkaline phosphate,alkaline phosphate,126.72359811440231,157.6099844059226,99.35423457282614
141 | 848,total bili (0-1.5),bilirubin (total),bilirubin,3.232037251775634,6.352632846086666,99.3730900327767
142 | 227429,troponin-t,troponin-t,troponin-t,0.6851146770069206,1.9286944059222382,99.44928426491421
143 | 1538,total bili,bilirubin (total),bilirubin,3.2994932895097273,6.437710719690952,99.50235216183528
144 | 772,albumin (>3.2),albumin,albumin,2.923178992298408,0.6319024849857566,99.51034869424804
145 | 226534,sodium (whole blood),sodium (whole blood),sodium,136.2199218469922,5.0680310977003,99.5775468274212
146 | 227456,albumin,albumin,albumin,3.0758501155496942,0.6561975161958757,99.58713357934786
147 | 1521,albumin,albumin,albumin,2.9533737229745696,0.6362340551027333,99.61752948948501
148 | 226730,height (cm),height (cm),height,168.79796203532825,13.858957585321845,99.65533127907263
149 | 226707,height,height,height,168.79774123385295,13.82883797185507,99.65533127907263
150 | 651,spon rr (mech.),respiratory rate (spontaneous),respiratory rate,22.33264552783046,7.321761402405684,99.65610367140795
151 | 806,fibrinogen (150-400),fibrinogen,fibrinogen,300.5521066920049,178.4770106575294,99.66528150974531
152 | 227468,fibrinogen,fibrinogen,fibrinogen,288.7183806239222,177.50011742710805,99.7101711348806
153 | 1528,fibrinogen,fibrinogen,fibrinogen,298.4862065131056,177.54140366082632,99.71398766171397
154 | 226536,chloride (whole blood),chloride (whole blood),chloride,106.15262321144672,5.902115271065178,99.71421483593024
155 | 504,pcwp,pulmonary capillary wedge pressure,pulmonary capillary wedge pressure,17.11269971987513,7.154823284028659,99.72966268263671
156 | 6701,arterial bp #2 [systolic],systolic blood pressure (arterial),systolic blood pressure,109.3032405065662,22.273124921633524,99.73052594465854
157 | 8555,arterial bp #2 [diastolic],diastolic blood pressure (arterial),diastolic blood pressure,57.41475929494896,12.635531634465185,99.7305713795018
158 | 6702,arterial bp mean #2,mean blood pressure (arterial),mean blood pressure,76.85960301282616,13.50967036579858,99.73252507776174
159 | 224422,spont rr,respiratory rate (spontaneous),respiratory rate,20.75559874143994,6.912434510256433,99.75451554189682
160 | 224322,iabp mean,mean blood pressure (arterial),mean blood pressure,79.58123678261505,15.215683735087245,99.79231733148444
161 | 860,venous ph,ph (venous),ph,7.372199163023656,0.07587914823369585,99.82593911549264
162 | 220274,ph (venous),ph (venous),ph,7.37149143610012,0.08704031820404255,99.8275747698498
163 | 189,fio2 (analyzed),fraction inspired oxygen,fraction inspired oxygen,0.568271808654999,0.2016535849428096,99.83139129668317
164 | 512,pvr,post void residual,post void residual,205.5797168637139,134.9887854778189,99.86037872667943
165 | 789,cholesterol (<200),cholesterol,cholesterol,161.7004323656578,49.12364077133579,99.92644098877123
166 | 226062,venous co2 pressure,partial pressure of carbon dioxide,partial pressure of carbon dioxide,44.57086871325931,13.420123431063322,99.93043925497761
167 | 223679,tco2 (calc) venous,"co2 (etco2, pco2, etc.)","co2 (etco2, pco2, etc.)",25.276143790849673,7.79197652299903,99.93048468982086
168 | 851,troponin,troponin-i,troponin-i,7.669527675276747,10.688952561726897,99.93843578739038
169 | 803,direct bili (0-0.3),bilirubin (conjugated),bilirubin,3.201930654058309,4.881562376748818,99.94234318391025
170 | 225651,direct bilirubin,bilirubin (conjugated),bilirubin,3.0232613908872854,4.265646313845083,99.94316101108883
171 | 1524,cholesterol,cholesterol,cholesterol,159.81780366056572,47.42536183359262,99.94538731840828
172 | 220603,cholesterol,cholesterol,cholesterol,160.08823529411765,52.03284289722227,99.95133928287461
173 | 442,manual bp [systolic],systolic blood pressure (noninvasive),systolic blood pressure,120.31005433048445,24.741012009665045,99.95261145848573
174 | 857,venous co2(calc),"co2 (etco2, pco2, etc.)","co2 (etco2, pco2, etc.)",25.793240556660038,8.234860617450735,99.95429254768614
175 | 8440,manual bp [diastolic],diastolic blood pressure (noninvasive),diastolic blood pressure,62.113279946613275,14.503790129481999,99.95461059158892
176 | 859,venous pvo2,venous pvo2,venous pvo2,43.87709205020921,14.872487981508149,99.95656428984886
177 | 849,total protein(6.5-8),total protein,total protein,5.660992907801418,1.0911605011574026,99.98078106130342
178 | 224167,manual blood pressure systolic left,systolic blood pressure (noninvasive),systolic blood pressure,120.20458673932787,27.590102229742303,99.98332541252566
179 | 224643,manual blood pressure diastolic left,diastolic blood pressure (noninvasive),diastolic blood pressure,66.04010566762727,16.221242418772487,99.98423410939074
180 | 1539,total protein,total protein,total protein,5.656176470588233,1.096490821739333,99.98455215329352
181 | 227243,manual blood pressure systolic right,systolic blood pressure (noninvasive),systolic blood pressure,123.6897689768977,26.480958939779597,99.98623324249394
182 | 727,vision fio2,fraction inspired oxygen,fraction inspired oxygen,0.43559748427672906,0.18904353258599843,99.99277585992256
183 | 1394,height inches,height (in),height,167.64,17.96051224213831,99.9999091303135
184 | 


--------------------------------------------------------------------------------
/notebooks/Baselines for Mortality and LOS prediction - GRU-D.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%load_ext autoreload\n",
 12 |     "from __future__ import print_function, division"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "%autoreload\n",
 24 |     "\n",
 25 |     "import copy, math, os, pickle, time, pandas as pd, numpy as np, scipy.stats as ss\n",
 26 |     "\n",
 27 |     "from sklearn.linear_model import LogisticRegression\n",
 28 |     "from sklearn.ensemble import RandomForestClassifier\n",
 29 |     "from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score\n",
 30 |     "\n",
 31 |     "import torch, torch.utils.data as utils, torch.nn as nn, torch.nn.functional as F, torch.optim as optim\n",
 32 |     "from torch.autograd import Variable\n",
 33 |     "from torch.nn.parameter import Parameter\n",
 34 |     "\n",
 35 |     "\n",
 36 |     "from mmd_grud_utils import *"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "DATA_FILEPATH     = '/scratch/mmd/mimic_data/final/grouping_5/all_hourly_data.h5'\n",
 46 |     "RAW_DATA_FILEPATH = '/scratch/mmd/mimic_data/final/nogrouping_5/all_hourly_data.h5'\n",
 47 |     "GAP_TIME          = 6  # In hours\n",
 48 |     "WINDOW_SIZE       = 24 # In hours\n",
 49 |     "SEED              = 1\n",
 50 |     "ID_COLS           = ['subject_id', 'hadm_id', 'icustay_id']\n",
 51 |     "GPU               = '2'\n",
 52 |     "\n",
 53 |     "os.environ['CUDA_VISIBLE_DEVICES'] = GPU\n",
 54 |     "np.random.seed(SEED)\n",
 55 |     "torch.manual_seed(SEED)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": true
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "class DictDist():\n",
 67 |     "    def __init__(self, dict_of_rvs): self.dict_of_rvs = dict_of_rvs\n",
 68 |     "    def rvs(self, n):\n",
 69 |     "        a = {k: v.rvs(n) for k, v in self.dict_of_rvs.items()}\n",
 70 |     "        out = []\n",
 71 |     "        for i in range(n): out.append({k: vs[i] for k, vs in a.items()})\n",
 72 |     "        return out\n",
 73 |     "    \n",
 74 |     "class Choice():\n",
 75 |     "    def __init__(self, options): self.options = options\n",
 76 |     "    def rvs(self, n): return [self.options[i] for i in ss.randint(0, len(self.options)).rvs(n)]"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "scrolled": false
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "%%time\n",
 88 |     "data_full_lvl2 = pd.read_hdf(DATA_FILEPATH, 'vitals_labs')\n",
 89 |     "data_full_raw  = pd.read_hdf(RAW_DATA_FILEPATH, 'vitals_labs') \n",
 90 |     "statics        = pd.read_hdf(DATA_FILEPATH, 'patients')"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "data_full_lvl2.head()"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "data_full_raw.head()"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "statics.head()"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {
124 |     "collapsed": true
125 |    },
126 |    "outputs": [],
127 |    "source": [
128 |     "def simple_imputer(df):\n",
129 |     "    idx = pd.IndexSlice\n",
130 |     "    df = df.copy()\n",
131 |     "    if len(df.columns.names) > 2: df.columns = df.columns.droplevel(('label', 'LEVEL1', 'LEVEL2'))\n",
132 |     "    \n",
133 |     "    df_out = df.loc[:, idx[:, ['mean', 'count']]]\n",
134 |     "    icustay_means = df_out.loc[:, idx[:, 'mean']].groupby(ID_COLS).mean()\n",
135 |     "    \n",
136 |     "    df_out.loc[:,idx[:,'mean']] = df_out.loc[:,idx[:,'mean']].groupby(ID_COLS).fillna(\n",
137 |     "        method='ffill'\n",
138 |     "    ).groupby(ID_COLS).fillna(icustay_means).fillna(0)\n",
139 |     "    \n",
140 |     "    df_out.loc[:, idx[:, 'count']] = (df.loc[:, idx[:, 'count']] > 0).astype(float)\n",
141 |     "    df_out.rename(columns={'count': 'mask'}, level='Aggregation Function', inplace=True)\n",
142 |     "    \n",
143 |     "    is_absent = (1 - df_out.loc[:, idx[:, 'mask']])\n",
144 |     "    hours_of_absence = is_absent.cumsum()\n",
145 |     "    time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill')\n",
146 |     "    time_since_measured.rename(columns={'mask': 'time_since_measured'}, level='Aggregation Function', inplace=True)\n",
147 |     "\n",
148 |     "    df_out = pd.concat((df_out, time_since_measured), axis=1)\n",
149 |     "    df_out.loc[:, idx[:, 'time_since_measured']] = df_out.loc[:, idx[:, 'time_since_measured']].fillna(100)\n",
150 |     "    \n",
151 |     "    df_out.sort_index(axis=1, inplace=True)\n",
152 |     "    return df_out"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "Ys = statics[statics.max_hours > WINDOW_SIZE + GAP_TIME][['mort_hosp', 'mort_icu', 'los_icu']]\n",
162 |     "Ys['los_3'] = Ys['los_icu'] > 3\n",
163 |     "Ys['los_7'] = Ys['los_icu'] > 7\n",
164 |     "Ys.drop(columns=['los_icu'], inplace=True)\n",
165 |     "Ys.astype(float)\n",
166 |     "\n",
167 |     "lvl2, raw = [df[\n",
168 |     "    (df.index.get_level_values('icustay_id').isin(set(Ys.index.get_level_values('icustay_id')))) &\n",
169 |     "    (df.index.get_level_values('hours_in') < WINDOW_SIZE)\n",
170 |     "] for df in (data_full_lvl2, data_full_raw)]\n",
171 |     "\n",
172 |     "raw.columns = raw.columns.droplevel(level=['label', 'LEVEL1', 'LEVEL2'])\n",
173 |     "\n",
174 |     "train_frac, dev_frac, test_frac = 0.7, 0.1, 0.2\n",
175 |     "lvl2_subj_idx, raw_subj_idx, Ys_subj_idx = [df.index.get_level_values('subject_id') for df in (lvl2, raw, Ys)]\n",
176 |     "lvl2_subjects = set(lvl2_subj_idx)\n",
177 |     "assert lvl2_subjects == set(Ys_subj_idx), \"Subject ID pools differ!\"\n",
178 |     "assert lvl2_subjects == set(raw_subj_idx), \"Subject ID pools differ!\"\n",
179 |     "\n",
180 |     "np.random.seed(SEED)\n",
181 |     "subjects, N = np.random.permutation(list(lvl2_subjects)), len(lvl2_subjects)\n",
182 |     "N_train, N_dev, N_test = int(train_frac * N), int(dev_frac * N), int(test_frac * N)\n",
183 |     "train_subj = subjects[:N_train]\n",
184 |     "dev_subj   = subjects[N_train:N_train + N_dev]\n",
185 |     "test_subj  = subjects[N_train+N_dev:]\n",
186 |     "\n",
187 |     "[(lvl2_train, lvl2_dev, lvl2_test), (raw_train, raw_dev, raw_test), (Ys_train, Ys_dev, Ys_test)] = [\n",
188 |     "    [df[df.index.get_level_values('subject_id').isin(s)] for s in (train_subj, dev_subj, test_subj)] \\\n",
189 |     "    for df in (lvl2, raw, Ys)\n",
190 |     "]\n",
191 |     "\n",
192 |     "idx = pd.IndexSlice\n",
193 |     "lvl2_means, lvl2_stds = lvl2_train.loc[:, idx[:,'mean']].mean(axis=0), lvl2_train.loc[:, idx[:,'mean']].std(axis=0)\n",
194 |     "raw_means, raw_stds = raw_train.loc[:, idx[:,'mean']].mean(axis=0), raw_train.loc[:, idx[:,'mean']].std(axis=0)\n",
195 |     "\n",
196 |     "lvl2_train.loc[:, idx[:,'mean']] = (lvl2_train.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n",
197 |     "lvl2_dev.loc[:, idx[:,'mean']] = (lvl2_dev.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n",
198 |     "lvl2_test.loc[:, idx[:,'mean']] = (lvl2_test.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n",
199 |     "\n",
200 |     "raw_train.loc[:, idx[:,'mean']] = (raw_train.loc[:, idx[:,'mean']] - raw_means)/raw_stds\n",
201 |     "raw_dev.loc[:, idx[:,'mean']] = (raw_dev.loc[:, idx[:,'mean']] - raw_means)/raw_stds\n",
202 |     "raw_test.loc[:, idx[:,'mean']] = (raw_test.loc[:, idx[:,'mean']] - raw_means)/raw_stds"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": null,
208 |    "metadata": {},
209 |    "outputs": [],
210 |    "source": [
211 |     "raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test = [\n",
212 |     "    simple_imputer(df) for df in (raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test)\n",
213 |     "]\n",
214 |     "raw_flat_train, raw_flat_dev, raw_flat_test, lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test = [\n",
215 |     "    df.pivot_table(index=['subject_id', 'hadm_id', 'icustay_id'], columns=['hours_in']) for df in (\n",
216 |     "        raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test\n",
217 |     "    )\n",
218 |     "]\n",
219 |     "\n",
220 |     "for df in lvl2_train, lvl2_dev, lvl2_test, raw_train, raw_dev, raw_test: assert not df.isnull().any().any()"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {
227 |     "collapsed": true
228 |    },
229 |    "outputs": [],
230 |    "source": [
231 |     "Ys = statics[statics.max_hours > WINDOW_SIZE + GAP_TIME][['mort_hosp', 'mort_icu', 'los_icu']]\n",
232 |     "Ys['los_3'] = Ys['los_icu'] > 3\n",
233 |     "Ys['los_7'] = Ys['los_icu'] > 7\n",
234 |     "Ys.drop(columns=['los_icu'], inplace=True)\n",
235 |     "Ys.astype(float)\n",
236 |     "[(Ys_train, Ys_dev, Ys_test)] = [\n",
237 |     "    [df[df.index.get_level_values('subject_id').isin(s)] for s in (train_subj, dev_subj, test_subj)] \\\n",
238 |     "    for df in (Ys,)\n",
239 |     "]"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "### Task Prediction"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {},
252 |    "source": [
253 |     "#### Hyperparams"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {
260 |     "collapsed": true
261 |    },
262 |    "outputs": [],
263 |    "source": [
264 |     "N = 10\n",
265 |     "\n",
266 |     "GRU_D_dist = DictDist({\n",
267 |     "    'cell_size': ss.randint(50, 75),\n",
268 |     "    'hidden_size': ss.randint(65, 95), \n",
269 |     "    'learning_rate': ss.uniform(2e-3, 1e-1),\n",
270 |     "    'num_epochs': ss.randint(15, 150),\n",
271 |     "    'patience': ss.randint(3, 7),\n",
272 |     "    'batch_size': ss.randint(35, 65),\n",
273 |     "    'early_stop_frac': ss.uniform(0.05, 0.1),\n",
274 |     "    'seed': ss.randint(1, 10000),\n",
275 |     "})\n",
276 |     "np.random.seed(SEED)\n",
277 |     "GRU_D_hyperparams_list = GRU_D_dist.rvs(N)\n",
278 |     "\n",
279 |     "with open('/scratch/mmd/extraction_baselines_gru-d.pkl', mode='rb') as f: results = pickle.load(f)"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "metadata": {},
286 |    "outputs": [],
287 |    "source": [
288 |     "results"
289 |    ]
290 |   },
291 |   {
292 |    "cell_type": "markdown",
293 |    "metadata": {},
294 |    "source": [
295 |     "### GRU-D"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": null,
301 |    "metadata": {
302 |     "scrolled": false
303 |    },
304 |    "outputs": [],
305 |    "source": [
306 |     "model_name       = 'GRU-D'\n",
307 |     "hyperparams_list = GRU_D_hyperparams_list\n",
308 |     "RERUN            = False\n",
309 |     "if model_name not in results: results[model_name] = {}\n",
310 |     "for t in ['mort_icu', 'los_3', 'mort_hosp', 'los_7']:\n",
311 |     "    if t not in results[model_name]: results[model_name][t] = {}\n",
312 |     "    for n, X_train, X_dev, X_test in (\n",
313 |     "        ('lvl2', lvl2_train, lvl2_dev, lvl2_test),\n",
314 |     "#         ('raw', raw_train, raw_dev, raw_test)\n",
315 |     "    ):\n",
316 |     "        print(\"Running model %s on target %s with representation %s\" % (model_name, t, n))\n",
317 |     "        X_mean = np.nanmean(\n",
318 |     "            to_3D_tensor(\n",
319 |     "                X_train.loc[:, pd.IndexSlice[:, 'mean']] * \n",
320 |     "                np.where((X_train.loc[:, pd.IndexSlice[:, 'mask']] == 1).values, 1, np.NaN)\n",
321 |     "            ),\n",
322 |     "            axis=0, keepdims=True\n",
323 |     "        ).transpose([0, 2, 1])\n",
324 |     "        base_params = {'X_mean': X_mean, 'output_last': True, 'input_size': X_mean.shape[2]}\n",
325 |     "    \n",
326 |     "        if n in results[model_name][t]:\n",
327 |     "            if not RERUN: \n",
328 |     "                print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n",
329 |     "                print(results[model_name][t][n])\n",
330 |     "                continue\n",
331 |     "            best_s, best_hyperparams = results[model_name][t][n][-1], results[model_name][t][n][1]\n",
332 |     "            print(\"Loading best hyperparams\", best_hyperparams)\n",
333 |     "        else:\n",
334 |     "            best_s, best_hyperparams = -np.Inf, None\n",
335 |     "            for i, hyperparams in enumerate(hyperparams_list):\n",
336 |     "                print(\"On sample %d / %d (hyperparams = %s)\" % (i+1, len(hyperparams_list), repr((hyperparams))))\n",
337 |     "\n",
338 |     "                early_stop_frac,batch_size,seed = [hyperparams[k] for k in ('early_stop_frac','batch_size','seed')]\n",
339 |     "\n",
340 |     "                np.random.seed(seed)\n",
341 |     "                all_train_subjects = list(\n",
342 |     "                    np.random.permutation(Ys_train.index.get_level_values('subject_id').values)\n",
343 |     "                )\n",
344 |     "                N_early_stop        = int(len(all_train_subjects) * early_stop_frac)\n",
345 |     "                train_subjects      = all_train_subjects[:-N_early_stop]\n",
346 |     "                early_stop_subjects = all_train_subjects[-N_early_stop:]\n",
347 |     "                X_train_obs         = X_train[X_train.index.get_level_values('subject_id').isin(train_subjects)]\n",
348 |     "                Ys_train_obs        = Ys_train[Ys_train.index.get_level_values('subject_id').isin(train_subjects)]\n",
349 |     "\n",
350 |     "                X_train_early_stop  = X_train[X_train.index.get_level_values('subject_id').isin(early_stop_subjects)]\n",
351 |     "                Ys_train_early_stop = Ys_train[\n",
352 |     "                    Ys_train.index.get_level_values('subject_id').isin(early_stop_subjects)\n",
353 |     "                ]\n",
354 |     "\n",
355 |     "                train_dataloader      = prepare_dataloader(X_train_obs, Ys_train_obs[t], batch_size=batch_size)\n",
356 |     "                early_stop_dataloader = prepare_dataloader(\n",
357 |     "                    X_train_early_stop, Ys_train_early_stop[t], batch_size=batch_size\n",
358 |     "                )\n",
359 |     "                dev_dataloader        = prepare_dataloader(X_dev, Ys_dev[t], batch_size=batch_size)\n",
360 |     "                test_dataloader       = prepare_dataloader(X_test, Ys_test[t], batch_size=batch_size)\n",
361 |     "\n",
362 |     "                model_hyperparams = copy.copy(base_params)\n",
363 |     "                model_hyperparams.update(\n",
364 |     "                    {k: v for k, v in hyperparams.items() if k in ('cell_size', 'hidden_size', 'batch_size')}\n",
365 |     "                )\n",
366 |     "                model = GRUD(**model_hyperparams)\n",
367 |     "\n",
368 |     "                best_model, _ = Train_Model(\n",
369 |     "                    model, train_dataloader, early_stop_dataloader,\n",
370 |     "                    **{k: v for k, v in hyperparams.items() if k in (\n",
371 |     "                        'num_epochs', 'patience', 'learning_rate', 'batch_size'\n",
372 |     "                    )}\n",
373 |     "                )\n",
374 |     "\n",
375 |     "                probabilities_dev, labels_dev = predict_proba(best_model, dev_dataloader)\n",
376 |     "                probabilities_dev = np.concatenate(probabilities_dev)[:, 1]\n",
377 |     "                labels_dev        = np.concatenate(labels_dev)\n",
378 |     "                s = roc_auc_score(labels_dev, probabilities_dev)\n",
379 |     "                if s > best_s:\n",
380 |     "                    best_s, best_hyperparams = s, hyperparams\n",
381 |     "                    print(\"New Best Score: %.2f @ hyperparams = %s\" % (100*best_s, repr((best_hyperparams))))\n",
382 |     "                \n",
383 |     "        ## Test\n",
384 |     "        np.random.seed(seed)\n",
385 |     "        hyperparams = best_hyperparams # In case I forgot a replace below\n",
386 |     "        early_stop_frac,batch_size,seed = [best_hyperparams[k] for k in ('early_stop_frac','batch_size','seed')]\n",
387 |     "        \n",
388 |     "        X_train_concat, Ys_train_concat = pd.concat((X_train, X_dev)), pd.concat((Ys_train, Ys_dev))\n",
389 |     "        \n",
390 |     "        all_train_subjects = list(np.random.permutation(Ys_train_concat.index.get_level_values('subject_id').values))\n",
391 |     "        N_early_stop = int(len(all_train_subjects) * early_stop_frac)\n",
392 |     "        train_subjects, early_stop_subjects = all_train_subjects[:-N_early_stop], all_train_subjects[-N_early_stop:]\n",
393 |     "        X_train_obs         = X_train_concat[X_train_concat.index.get_level_values('subject_id').isin(train_subjects)]\n",
394 |     "        Ys_train_obs        = Ys_train_concat[Ys_train_concat.index.get_level_values('subject_id').isin(train_subjects)]\n",
395 |     "\n",
396 |     "        X_train_early_stop  = X_train_concat[X_train_concat.index.get_level_values('subject_id').isin(early_stop_subjects)]\n",
397 |     "        Ys_train_early_stop = Ys_train_concat[Ys_train_concat.index.get_level_values('subject_id').isin(early_stop_subjects)]\n",
398 |     "\n",
399 |     "        train_dataloader      = prepare_dataloader(X_train_obs, Ys_train_obs[t], batch_size=batch_size)\n",
400 |     "        early_stop_dataloader = prepare_dataloader(X_train_early_stop, Ys_train_early_stop[t], batch_size=batch_size)\n",
401 |     "        test_dataloader       = prepare_dataloader(X_test, Ys_test[t], batch_size=batch_size)\n",
402 |     "\n",
403 |     "        model_hyperparams = copy.copy(base_params)\n",
404 |     "        model_hyperparams.update(\n",
405 |     "            {k: v for k, v in best_hyperparams.items() if k in ('cell_size', 'hidden_size', 'batch_size')}\n",
406 |     "        )\n",
407 |     "        model = GRUD(**model_hyperparams)\n",
408 |     "\n",
409 |     "        best_model, (losses_train, losses_early_stop, losses_epochs_train, losses_epochs_early_stop) = Train_Model(\n",
410 |     "            model, train_dataloader, early_stop_dataloader,\n",
411 |     "            **{k: v for k, v in best_hyperparams.items() if k in (\n",
412 |     "                'num_epochs', 'patience', 'learning_rate', 'batch_size'\n",
413 |     "            )}\n",
414 |     "        )\n",
415 |     "\n",
416 |     "        probabilities_test, labels_test = predict_proba(best_model, test_dataloader)\n",
417 |     "\n",
418 |     "        y_score = np.concatenate(probabilities_test)[:, 1]\n",
419 |     "        y_pred  = np.argmax(probabilities_test)\n",
420 |     "        y_true  = np.concatenate(labels_test)\n",
421 |     "\n",
422 |     "        auc   = roc_auc_score(y_true, y_score)\n",
423 |     "        auprc = average_precision_score(y_true, y_score)\n",
424 |     "        acc   = accuracy_score(y_true, y_pred)\n",
425 |     "        F1    = f1_score(y_true, y_pred)\n",
426 |     "        print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n",
427 |     "        print(auc, auprc, acc, F1)\n",
428 |     "        \n",
429 |     "        results[model_name][t][n] = None, best_hyperparams, auc, auprc, acc, F1, best_s\n",
430 |     "        with open('/scratch/mmd/extraction_baselines_gru-d.pkl', mode='wb') as f: pickle.dump(results, f)"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "code",
435 |    "execution_count": null,
436 |    "metadata": {},
437 |    "outputs": [],
438 |    "source": [
439 |     "y_score = np.concatenate(probabilities_test)[:, 1]\n",
440 |     "y_pred  = np.concatenate(probabilities_test).argmax(axis=1)\n",
441 |     "y_true  = np.concatenate(labels_test)\n",
442 |     "\n",
443 |     "auc   = roc_auc_score(y_true, y_score)\n",
444 |     "auprc = average_precision_score(y_true, y_score)\n",
445 |     "acc   = accuracy_score(y_true, y_pred)\n",
446 |     "F1    = f1_score(y_true, y_pred)\n",
447 |     "print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n",
448 |     "print(auc, auprc, acc, F1)\n",
449 |     "\n",
450 |     "results[model_name][t][n] = None, best_hyperparams, auc, auprc, acc, F1, best_s\n",
451 |     "with open('/scratch/mmd/extraction_baselines_gru-d.pkl', mode='wb') as f: pickle.dump(results, f)"
452 |    ]
453 |   }
454 |  ],
455 |  "metadata": {
456 |   "kernelspec": {
457 |    "display_name": "Python 2",
458 |    "language": "python",
459 |    "name": "python2"
460 |   },
461 |   "language_info": {
462 |    "codemirror_mode": {
463 |     "name": "ipython",
464 |     "version": 2
465 |    },
466 |    "file_extension": ".py",
467 |    "mimetype": "text/x-python",
468 |    "name": "python",
469 |    "nbconvert_exporter": "python",
470 |    "pygments_lexer": "ipython2",
471 |    "version": "2.7.13"
472 |   }
473 |  },
474 |  "nbformat": 4,
475 |  "nbformat_minor": 2
476 | }
477 | 


--------------------------------------------------------------------------------
/notebooks/Baselines for Mortality and LOS prediction - Sklearn.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%load_ext autoreload\n",
 12 |     "from __future__ import print_function, division"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "%autoreload\n",
 24 |     "\n",
 25 |     "import copy, math, os, pickle, time, pandas as pd, numpy as np, scipy.stats as ss\n",
 26 |     "\n",
 27 |     "from sklearn.linear_model import LogisticRegression\n",
 28 |     "from sklearn.ensemble import RandomForestClassifier\n",
 29 |     "from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score\n",
 30 |     "\n",
 31 |     "import torch, torch.utils.data as utils, torch.nn as nn, torch.nn.functional as F, torch.optim as optim\n",
 32 |     "from torch.autograd import Variable\n",
 33 |     "from torch.nn.parameter import Parameter"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "DATA_FILEPATH     = '/scratch/mmd/mimic_data/final/grouping_5/all_hourly_data.h5'\n",
 43 |     "RAW_DATA_FILEPATH = '/scratch/mmd/mimic_data/final/nogrouping_5/all_hourly_data.h5'\n",
 44 |     "GAP_TIME          = 6  # In hours\n",
 45 |     "WINDOW_SIZE       = 24 # In hours\n",
 46 |     "SEED              = 1\n",
 47 |     "ID_COLS           = ['subject_id', 'hadm_id', 'icustay_id']\n",
 48 |     "GPU               = '2'\n",
 49 |     "\n",
 50 |     "os.environ['CUDA_VISIBLE_DEVICES'] = GPU\n",
 51 |     "np.random.seed(SEED)\n",
 52 |     "torch.manual_seed(SEED)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {
 59 |     "collapsed": true
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "class DictDist():\n",
 64 |     "    def __init__(self, dict_of_rvs): self.dict_of_rvs = dict_of_rvs\n",
 65 |     "    def rvs(self, n):\n",
 66 |     "        a = {k: v.rvs(n) for k, v in self.dict_of_rvs.items()}\n",
 67 |     "        out = []\n",
 68 |     "        for i in range(n): out.append({k: vs[i] for k, vs in a.items()})\n",
 69 |     "        return out\n",
 70 |     "    \n",
 71 |     "class Choice():\n",
 72 |     "    def __init__(self, options): self.options = options\n",
 73 |     "    def rvs(self, n): return [self.options[i] for i in ss.randint(0, len(self.options)).rvs(n)]"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {
 80 |     "scrolled": false
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "%%time\n",
 85 |     "data_full_lvl2 = pd.read_hdf(DATA_FILEPATH, 'vitals_labs')\n",
 86 |     "data_full_raw  = pd.read_hdf(RAW_DATA_FILEPATH, 'vitals_labs') \n",
 87 |     "statics        = pd.read_hdf(DATA_FILEPATH, 'patients')"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "data_full_lvl2.head()"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "data_full_raw.head()"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "statics.head()"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {
121 |     "collapsed": true
122 |    },
123 |    "outputs": [],
124 |    "source": [
125 |     "def simple_imputer(df):\n",
126 |     "    idx = pd.IndexSlice\n",
127 |     "    df = df.copy()\n",
128 |     "    if len(df.columns.names) > 2: df.columns = df.columns.droplevel(('label', 'LEVEL1', 'LEVEL2'))\n",
129 |     "    \n",
130 |     "    df_out = df.loc[:, idx[:, ['mean', 'count']]]\n",
131 |     "    icustay_means = df_out.loc[:, idx[:, 'mean']].groupby(ID_COLS).mean()\n",
132 |     "    \n",
133 |     "    df_out.loc[:,idx[:,'mean']] = df_out.loc[:,idx[:,'mean']].groupby(ID_COLS).fillna(\n",
134 |     "        method='ffill'\n",
135 |     "    ).groupby(ID_COLS).fillna(icustay_means).fillna(0)\n",
136 |     "    \n",
137 |     "    df_out.loc[:, idx[:, 'count']] = (df.loc[:, idx[:, 'count']] > 0).astype(float)\n",
138 |     "    df_out.rename(columns={'count': 'mask'}, level='Aggregation Function', inplace=True)\n",
139 |     "    \n",
140 |     "    is_absent = (1 - df_out.loc[:, idx[:, 'mask']])\n",
141 |     "    hours_of_absence = is_absent.cumsum()\n",
142 |     "    time_since_measured = hours_of_absence - hours_of_absence[is_absent==0].fillna(method='ffill')\n",
143 |     "    time_since_measured.rename(columns={'mask': 'time_since_measured'}, level='Aggregation Function', inplace=True)\n",
144 |     "\n",
145 |     "    df_out = pd.concat((df_out, time_since_measured), axis=1)\n",
146 |     "    df_out.loc[:, idx[:, 'time_since_measured']] = df_out.loc[:, idx[:, 'time_since_measured']].fillna(100)\n",
147 |     "    \n",
148 |     "    df_out.sort_index(axis=1, inplace=True)\n",
149 |     "    return df_out"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "Ys = statics[statics.max_hours > WINDOW_SIZE + GAP_TIME][['mort_hosp', 'mort_icu', 'los_icu']]\n",
159 |     "Ys['los_3'] = Ys['los_icu'] > 3\n",
160 |     "Ys['los_7'] = Ys['los_icu'] > 7\n",
161 |     "Ys.drop(columns=['los_icu'], inplace=True)\n",
162 |     "Ys.astype(float)\n",
163 |     "\n",
164 |     "lvl2, raw = [df[\n",
165 |     "    (df.index.get_level_values('icustay_id').isin(set(Ys.index.get_level_values('icustay_id')))) &\n",
166 |     "    (df.index.get_level_values('hours_in') < WINDOW_SIZE)\n",
167 |     "] for df in (data_full_lvl2, data_full_raw)]\n",
168 |     "\n",
169 |     "raw.columns = raw.columns.droplevel(level=['label', 'LEVEL1', 'LEVEL2'])\n",
170 |     "\n",
171 |     "train_frac, dev_frac, test_frac = 0.7, 0.1, 0.2\n",
172 |     "lvl2_subj_idx, raw_subj_idx, Ys_subj_idx = [df.index.get_level_values('subject_id') for df in (lvl2, raw, Ys)]\n",
173 |     "lvl2_subjects = set(lvl2_subj_idx)\n",
174 |     "assert lvl2_subjects == set(Ys_subj_idx), \"Subject ID pools differ!\"\n",
175 |     "assert lvl2_subjects == set(raw_subj_idx), \"Subject ID pools differ!\"\n",
176 |     "\n",
177 |     "np.random.seed(SEED)\n",
178 |     "subjects, N = np.random.permutation(list(lvl2_subjects)), len(lvl2_subjects)\n",
179 |     "N_train, N_dev, N_test = int(train_frac * N), int(dev_frac * N), int(test_frac * N)\n",
180 |     "train_subj = subjects[:N_train]\n",
181 |     "dev_subj   = subjects[N_train:N_train + N_dev]\n",
182 |     "test_subj  = subjects[N_train+N_dev:]\n",
183 |     "\n",
184 |     "[(lvl2_train, lvl2_dev, lvl2_test), (raw_train, raw_dev, raw_test), (Ys_train, Ys_dev, Ys_test)] = [\n",
185 |     "    [df[df.index.get_level_values('subject_id').isin(s)] for s in (train_subj, dev_subj, test_subj)] \\\n",
186 |     "    for df in (lvl2, raw, Ys)\n",
187 |     "]\n",
188 |     "\n",
189 |     "idx = pd.IndexSlice\n",
190 |     "lvl2_means, lvl2_stds = lvl2_train.loc[:, idx[:,'mean']].mean(axis=0), lvl2_train.loc[:, idx[:,'mean']].std(axis=0)\n",
191 |     "raw_means, raw_stds = raw_train.loc[:, idx[:,'mean']].mean(axis=0), raw_train.loc[:, idx[:,'mean']].std(axis=0)\n",
192 |     "\n",
193 |     "lvl2_train.loc[:, idx[:,'mean']] = (lvl2_train.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n",
194 |     "lvl2_dev.loc[:, idx[:,'mean']] = (lvl2_dev.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n",
195 |     "lvl2_test.loc[:, idx[:,'mean']] = (lvl2_test.loc[:, idx[:,'mean']] - lvl2_means)/lvl2_stds\n",
196 |     "\n",
197 |     "raw_train.loc[:, idx[:,'mean']] = (raw_train.loc[:, idx[:,'mean']] - raw_means)/raw_stds\n",
198 |     "raw_dev.loc[:, idx[:,'mean']] = (raw_dev.loc[:, idx[:,'mean']] - raw_means)/raw_stds\n",
199 |     "raw_test.loc[:, idx[:,'mean']] = (raw_test.loc[:, idx[:,'mean']] - raw_means)/raw_stds"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test = [\n",
209 |     "    simple_imputer(df) for df in (raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test)\n",
210 |     "]\n",
211 |     "raw_flat_train, raw_flat_dev, raw_flat_test, lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test = [\n",
212 |     "    df.pivot_table(index=['subject_id', 'hadm_id', 'icustay_id'], columns=['hours_in']) for df in (\n",
213 |     "        raw_train, raw_dev, raw_test, lvl2_train, lvl2_dev, lvl2_test\n",
214 |     "    )\n",
215 |     "]\n",
216 |     "\n",
217 |     "for df in lvl2_train, lvl2_dev, lvl2_test, raw_train, raw_dev, raw_test: assert not df.isnull().any().any()"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {},
223 |    "source": [
224 |     "### Task Prediction"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "#### Hyperparams"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "metadata": {
238 |     "collapsed": true
239 |    },
240 |    "outputs": [],
241 |    "source": [
242 |     "N = 15\n",
243 |     "\n",
244 |     "LR_dist = DictDist({\n",
245 |     "    'C': Choice(np.geomspace(1e-3, 1e3, 10000)),\n",
246 |     "    'penalty': Choice(['l1', 'l2']),\n",
247 |     "    'solver': Choice(['liblinear', 'lbfgs']),\n",
248 |     "    'max_iter': Choice([100, 500])\n",
249 |     "})\n",
250 |     "np.random.seed(SEED)\n",
251 |     "LR_hyperparams_list = LR_dist.rvs(N)\n",
252 |     "for i in range(N):\n",
253 |     "    if LR_hyperparams_list[i]['solver'] == 'lbfgs': LR_hyperparams_list[i]['penalty'] = 'l2'\n",
254 |     "\n",
255 |     "RF_dist = DictDist({\n",
256 |     "    'n_estimators': ss.randint(50, 500),\n",
257 |     "    'max_depth': ss.randint(2, 10),\n",
258 |     "    'min_samples_split': ss.randint(2, 75),\n",
259 |     "    'min_samples_leaf': ss.randint(1, 50),\n",
260 |     "})\n",
261 |     "np.random.seed(SEED)\n",
262 |     "RF_hyperparams_list = RF_dist.rvs(N)\n",
263 |     "\n",
264 |     "GRU_D_dist = DictDist({\n",
265 |     "    'cell_size': ss.randint(50, 75),\n",
266 |     "    'hidden_size': ss.randint(65, 95), \n",
267 |     "    'learning_rate': ss.uniform(2e-3, 1e-1),\n",
268 |     "    'num_epochs': ss.randint(15, 150),\n",
269 |     "    'patience': ss.randint(3, 7),\n",
270 |     "    'batch_size': ss.randint(35, 65),\n",
271 |     "    'early_stop_frac': ss.uniform(0.05, 0.1),\n",
272 |     "    'seed': ss.randint(1, 10000),\n",
273 |     "})\n",
274 |     "np.random.seed(SEED)\n",
275 |     "GRU_D_hyperparams_list = GRU_D_dist.rvs(N)"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": null,
281 |    "metadata": {
282 |     "collapsed": true
283 |    },
284 |    "outputs": [],
285 |    "source": [
286 |     "def run_basic(model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, target):\n",
287 |     "    best_s, best_hyperparams = -np.Inf, None\n",
288 |     "    for i, hyperparams in enumerate(hyperparams_list):\n",
289 |     "        print(\"On sample %d / %d (hyperparams = %s)\" % (i+1, len(hyperparams_list), repr((hyperparams))))\n",
290 |     "        M = model(**hyperparams)\n",
291 |     "        M.fit(X_flat_train, Ys_train[target])\n",
292 |     "        s = roc_auc_score(Ys_dev[target], M.predict_proba(X_flat_dev)[:, 1])\n",
293 |     "        if s > best_s:\n",
294 |     "            best_s, best_hyperparams = s, hyperparams\n",
295 |     "            print(\"New Best Score: %.2f @ hyperparams = %s\" % (100*best_s, repr((best_hyperparams))))\n",
296 |     "\n",
297 |     "    return run_only_final(model, best_hyperparams, X_flat_train, X_flat_dev, X_flat_test, target)\n",
298 |     "\n",
299 |     "def run_only_final(model, best_hyperparams, X_flat_train, X_flat_dev, X_flat_test, target):\n",
300 |     "    best_M = model(**best_hyperparams)\n",
301 |     "    best_M.fit(pd.concat((X_flat_train, X_flat_dev)), pd.concat((Ys_train, Ys_dev))[target])\n",
302 |     "    y_true  = Ys_test[target]\n",
303 |     "    y_score = best_M.predict_proba(X_flat_test)[:, 1]\n",
304 |     "    y_pred  = best_M.predict(X_flat_test)\n",
305 |     "\n",
306 |     "    auc   = roc_auc_score(y_true, y_score)\n",
307 |     "    auprc = average_precision_score(y_true, y_score)\n",
308 |     "    acc   = accuracy_score(y_true, y_pred)\n",
309 |     "    F1    = f1_score(y_true, y_pred)\n",
310 |     "    \n",
311 |     "    return best_M, best_hyperparams, auc, auprc, acc, F1"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "markdown",
316 |    "metadata": {},
317 |    "source": [
318 |     "### Sklearn"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {
325 |     "collapsed": true
326 |    },
327 |    "outputs": [],
328 |    "source": [
329 |     "RESULTS_PATH = '/scratch/mmd/extraction_baselines-sklearn.pkl'\n",
330 |     "with open(RESULTS_PATH, mode='rb') as f: results = pickle.load(f)\n",
331 |     "    \n",
332 |     "RERUN = True"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": null,
338 |    "metadata": {},
339 |    "outputs": [],
340 |    "source": [
341 |     "for model_name, model, hyperparams_list in [\n",
342 |     "    ('RF', RandomForestClassifier, RF_hyperparams_list), ('LR', LogisticRegression, LR_hyperparams_list)\n",
343 |     "]:\n",
344 |     "    if model_name not in results: results[model_name] = {}\n",
345 |     "    for t in ['mort_icu', 'los_3']:\n",
346 |     "        if t not in results[model_name]: results[model_name][t] = {}\n",
347 |     "        for n, X_flat_train, X_flat_dev, X_flat_test in (\n",
348 |     "            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),\n",
349 |     "            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)\n",
350 |     "        ):\n",
351 |     "            if n in results[model_name][t]:\n",
352 |     "                print(\"Finished model %s on target %s with representation %s\" % (model_name, t, n))\n",
353 |     "                if RERUN: \n",
354 |     "                    h = results[model_name][t][n][1]\n",
355 |     "                    results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)\n",
356 |     "                    \n",
357 |     "                    print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n",
358 |     "                    print(results[model_name][t][n][2:])\n",
359 |     "\n",
360 |     "                    with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)\n",
361 |     "                continue\n",
362 |     "                \n",
363 |     "            print(\"Running model %s on target %s with representation %s\" % (model_name, t, n))\n",
364 |     "            results[model_name][t][n] = run_basic(\n",
365 |     "                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t\n",
366 |     "            )\n",
367 |     "            print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n",
368 |     "            print(results[model_name][t][n][2:])\n",
369 |     "            with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": null,
375 |    "metadata": {},
376 |    "outputs": [],
377 |    "source": [
378 |     "np.random.seed(SEED+1)\n",
379 |     "LR_hyperparams_list_2 = LR_dist.rvs(45)\n",
380 |     "for i in range(45):\n",
381 |     "    if LR_hyperparams_list_2[i]['solver'] == 'lbfgs': LR_hyperparams_list_2[i]['penalty'] = 'l2'\n",
382 |     "\n",
383 |     "results_2 = {}\n",
384 |     "results_2_PATH = '/scratch/mmd/extraction_baselines-sklearn_LR_2_runs.pkl'\n",
385 |     "\n",
386 |     "for model_name, model, hyperparams_list in [\n",
387 |     "#     ('RF', RandomForestClassifier, RF_hyperparams_list),\n",
388 |     "    ('LR', LogisticRegression, LR_hyperparams_list_2)\n",
389 |     "]:\n",
390 |     "    if model_name not in results_2: results_2[model_name] = {}\n",
391 |     "    for t in ['mort_icu', 'los_3']:\n",
392 |     "        if t not in results_2[model_name]: results_2[model_name][t] = {}\n",
393 |     "        for n, X_flat_train, X_flat_dev, X_flat_test in (\n",
394 |     "            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),\n",
395 |     "#             ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)\n",
396 |     "        ):\n",
397 |     "            if n in results_2[model_name][t]:\n",
398 |     "                print(\"Finished model %s on target %s with representation %s\" % (model_name, t, n))\n",
399 |     "                if RERUN: \n",
400 |     "                    h = results_2[model_name][t][n][1]\n",
401 |     "                    results_2[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)\n",
402 |     "                    \n",
403 |     "                    print(\"Final results_2 for model %s on target %s with representation %s\" % (model_name, t, n))\n",
404 |     "                    print(results_2[model_name][t][n][2:])\n",
405 |     "\n",
406 |     "                    with open(results_2_PATH, mode='wb') as f: pickle.dump(results_2, f)\n",
407 |     "                continue\n",
408 |     "                \n",
409 |     "            print(\"Running model %s on target %s with representation %s\" % (model_name, t, n))\n",
410 |     "            results_2[model_name][t][n] = run_basic(\n",
411 |     "                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t\n",
412 |     "            )\n",
413 |     "            print(\"Final results_2 for model %s on target %s with representation %s\" % (model_name, t, n))\n",
414 |     "            print(results_2[model_name][t][n][2:])\n",
415 |     "            with open(results_2_PATH, mode='wb') as f: pickle.dump(results_2, f)"
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "code",
420 |    "execution_count": null,
421 |    "metadata": {
422 |     "scrolled": false
423 |    },
424 |    "outputs": [],
425 |    "source": [
426 |     "for model_name, model, hyperparams_list in [\n",
427 |     "#     ('RF', RandomForestClassifier, RF_hyperparams_list),\n",
428 |     "    ('LR', LogisticRegression, LR_hyperparams_list_2)\n",
429 |     "]:\n",
430 |     "    if model_name not in results_2: results_2[model_name] = {}\n",
431 |     "    for t in ['mort_icu', 'los_3']:\n",
432 |     "        if t not in results_2[model_name]: results_2[model_name][t] = {}\n",
433 |     "        for n, X_flat_train, X_flat_dev, X_flat_test in (\n",
434 |     "#             ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),\n",
435 |     "            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test),\n",
436 |     "        ):\n",
437 |     "            if n in results_2[model_name][t]:\n",
438 |     "                print(\"Finished model %s on target %s with representation %s\" % (model_name, t, n))\n",
439 |     "                if RERUN: \n",
440 |     "                    h = results_2[model_name][t][n][1]\n",
441 |     "                    results_2[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)\n",
442 |     "                    \n",
443 |     "                    print(\"Final results_2 for model %s on target %s with representation %s\" % (model_name, t, n))\n",
444 |     "                    print(results_2[model_name][t][n][2:])\n",
445 |     "\n",
446 |     "                    with open(results_2_PATH, mode='wb') as f: pickle.dump(results_2, f)\n",
447 |     "                continue\n",
448 |     "                \n",
449 |     "            print(\"Running model %s on target %s with representation %s\" % (model_name, t, n))\n",
450 |     "            results_2[model_name][t][n] = run_basic(\n",
451 |     "                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t\n",
452 |     "            )\n",
453 |     "            print(\"Final results_2 for model %s on target %s with representation %s\" % (model_name, t, n))\n",
454 |     "            print(results_2[model_name][t][n][2:])\n",
455 |     "            with open(results_2_PATH, mode='wb') as f: pickle.dump(results_2, f)"
456 |    ]
457 |   },
458 |   {
459 |    "cell_type": "code",
460 |    "execution_count": null,
461 |    "metadata": {
462 |     "scrolled": false
463 |    },
464 |    "outputs": [],
465 |    "source": [
466 |     "for model_name, model, hyperparams_list in [\n",
467 |     "    ('RF', RandomForestClassifier, RF_hyperparams_list), ('LR', LogisticRegression, LR_hyperparams_list)\n",
468 |     "]:\n",
469 |     "    if model_name not in results: results[model_name] = {}\n",
470 |     "    for t in ['mort_hosp', 'los_7']:\n",
471 |     "        if t not in results[model_name]: results[model_name][t] = {}\n",
472 |     "        for n, X_flat_train, X_flat_dev, X_flat_test in (\n",
473 |     "            ('lvl2', lvl2_flat_train, lvl2_flat_dev, lvl2_flat_test),\n",
474 |     "            ('raw', raw_flat_train, raw_flat_dev, raw_flat_test)\n",
475 |     "        ):\n",
476 |     "            if n in results[model_name][t]:\n",
477 |     "                print(\"Finished model %s on target %s with representation %s\" % (model_name, t, n))\n",
478 |     "                if RERUN: \n",
479 |     "                    h = results[model_name][t][n][1]\n",
480 |     "                    results[model_name][t][n] = run_only_final(model, h, X_flat_train, X_flat_dev, X_flat_test, t)\n",
481 |     "                    \n",
482 |     "                    print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n",
483 |     "                    print(results[model_name][t][n][2:])\n",
484 |     "\n",
485 |     "                    with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)\n",
486 |     "                continue\n",
487 |     "                \n",
488 |     "            print(\"Running model %s on target %s with representation %s\" % (model_name, t, n))\n",
489 |     "            results[model_name][t][n] = run_basic(\n",
490 |     "                model, hyperparams_list, X_flat_train, X_flat_dev, X_flat_test, t\n",
491 |     "            )\n",
492 |     "            print(\"Final results for model %s on target %s with representation %s\" % (model_name, t, n))\n",
493 |     "            print(results[model_name][t][n][2:])\n",
494 |     "            with open(RESULTS_PATH, mode='wb') as f: pickle.dump(results, f)"
495 |    ]
496 |   }
497 |  ],
498 |  "metadata": {
499 |   "kernelspec": {
500 |    "display_name": "Python 2",
501 |    "language": "python",
502 |    "name": "python2"
503 |   },
504 |   "language_info": {
505 |    "codemirror_mode": {
506 |     "name": "ipython",
507 |     "version": 2
508 |    },
509 |    "file_extension": ".py",
510 |    "mimetype": "text/x-python",
511 |    "name": "python",
512 |    "nbconvert_exporter": "python",
513 |    "pygments_lexer": "ipython2",
514 |    "version": "2.7.13"
515 |   }
516 |  },
517 |  "nbformat": 4,
518 |  "nbformat_minor": 2
519 | }
520 | 


--------------------------------------------------------------------------------
/resources/testing_schemas.pkl:
--------------------------------------------------------------------------------
   1 | (ccopy_reg
   2 | _reconstructor
   3 | p0
   4 | (cpandas.core.frame
   5 | DataFrame
   6 | p1
   7 | c__builtin__
   8 | object
   9 | p2
  10 | Ntp3
  11 | Rp4
  12 | (dp5
  13 | S'_metadata'
  14 | p6
  15 | (lp7
  16 | sS'_typ'
  17 | p8
  18 | S'dataframe'
  19 | p9
  20 | sS'_data'
  21 | p10
  22 | g0
  23 | (cpandas.core.internals.managers
  24 | BlockManager
  25 | p11
  26 | g2
  27 | Ntp12
  28 | Rp13
  29 | ((lp14
  30 | cpandas.core.indexes.base
  31 | _new_Index
  32 | p15
  33 | (cpandas.core.indexes.base
  34 | Index
  35 | p16
  36 | (dp17
  37 | S'data'
  38 | p18
  39 | cnumpy.core.multiarray
  40 | _reconstruct
  41 | p19
  42 | (cnumpy
  43 | ndarray
  44 | p20
  45 | (I0
  46 | tp21
  47 | S'b'
  48 | p22
  49 | tp23
  50 | Rp24
  51 | (I1
  52 | (I17
  53 | tp25
  54 | cnumpy
  55 | dtype
  56 | p26
  57 | (S'O8'
  58 | p27
  59 | I0
  60 | I1
  61 | tp28
  62 | Rp29
  63 | (I3
  64 | S'|'
  65 | p30
  66 | NNNI-1
  67 | I-1
  68 | I63
  69 | tp31
  70 | bI00
  71 | (lp32
  72 | Vsubject_id
  73 | p33
  74 | aVhadm_id
  75 | p34
  76 | aS'gender'
  77 | p35
  78 | aS'ethnicity'
  79 | p36
  80 | aS'age'
  81 | p37
  82 | aS'admittime'
  83 | p38
  84 | aS'dischtime'
  85 | p39
  86 | aS'deathtime'
  87 | p40
  88 | aS'intime'
  89 | p41
  90 | aS'outtime'
  91 | p42
  92 | aS'los_icu'
  93 | p43
  94 | aS'admission_type'
  95 | p44
  96 | aS'first_careunit'
  97 | p45
  98 | aS'mort_icu'
  99 | p46
 100 | aS'mort_hosp'
 101 | p47
 102 | aS'hospital_expire_flag'
 103 | p48
 104 | aS'hospstay_seq'
 105 | p49
 106 | atp50
 107 | bsS'name'
 108 | p51
 109 | Nstp52
 110 | Rp53
 111 | ag15
 112 | (cpandas.core.indexes.numeric
 113 | Int64Index
 114 | p54
 115 | (dp55
 116 | g18
 117 | g19
 118 | (g20
 119 | (I0
 120 | tp56
 121 | g22
 122 | tp57
 123 | Rp58
 124 | (I1
 125 | (I0
 126 | tp59
 127 | g26
 128 | (S'i8'
 129 | p60
 130 | I0
 131 | I1
 132 | tp61
 133 | Rp62
 134 | (I3
 135 | S'<'
 136 | p63
 137 | NNNI-1
 138 | I-1
 139 | I0
 140 | tp64
 141 | bI00
 142 | S''
 143 | p65
 144 | tp66
 145 | bsg51
 146 | S'icustay_id'
 147 | p67
 148 | stp68
 149 | Rp69
 150 | a(lp70
 151 | g0
 152 | (cpandas.core.arrays.categorical
 153 | Categorical
 154 | p71
 155 | g2
 156 | Ntp72
 157 | Rp73
 158 | (dp74
 159 | S'_cache'
 160 | p75
 161 | (dp76
 162 | S'ndim'
 163 | p77
 164 | I1
 165 | ssS'_codes'
 166 | p78
 167 | g19
 168 | (g20
 169 | (I0
 170 | tp79
 171 | g22
 172 | tp80
 173 | Rp81
 174 | (I1
 175 | (I0
 176 | tp82
 177 | g26
 178 | (S'i1'
 179 | p83
 180 | I0
 181 | I1
 182 | tp84
 183 | Rp85
 184 | (I3
 185 | S'|'
 186 | p86
 187 | NNNI-1
 188 | I-1
 189 | I0
 190 | tp87
 191 | bI00
 192 | g65
 193 | tp88
 194 | bsS'_ordered'
 195 | p89
 196 | I00
 197 | sS'_categories'
 198 | p90
 199 | g15
 200 | (g16
 201 | (dp91
 202 | g18
 203 | g19
 204 | (g20
 205 | (I0
 206 | tp92
 207 | g22
 208 | tp93
 209 | Rp94
 210 | (I1
 211 | (I2
 212 | tp95
 213 | g29
 214 | I00
 215 | (lp96
 216 | S'F'
 217 | p97
 218 | aS'M'
 219 | p98
 220 | atp99
 221 | bsg51
 222 | Nstp100
 223 | Rp101
 224 | sS'_dtype'
 225 | p102
 226 | g0
 227 | (cpandas.core.dtypes.dtypes
 228 | CategoricalDtype
 229 | p103
 230 | g2
 231 | Ntp104
 232 | Rp105
 233 | (dp106
 234 | S'ordered'
 235 | p107
 236 | I00
 237 | sS'categories'
 238 | p108
 239 | g101
 240 | sbsbag0
 241 | (g71
 242 | g2
 243 | Ntp109
 244 | Rp110
 245 | (dp111
 246 | g75
 247 | (dp112
 248 | g77
 249 | I1
 250 | ssg78
 251 | g19
 252 | (g20
 253 | (I0
 254 | tp113
 255 | g22
 256 | tp114
 257 | Rp115
 258 | (I1
 259 | (I0
 260 | tp116
 261 | g85
 262 | I00
 263 | g65
 264 | tp117
 265 | bsg89
 266 | I00
 267 | sg90
 268 | g15
 269 | (g16
 270 | (dp118
 271 | g18
 272 | g19
 273 | (g20
 274 | (I0
 275 | tp119
 276 | g22
 277 | tp120
 278 | Rp121
 279 | (I1
 280 | (I41
 281 | tp122
 282 | g29
 283 | I00
 284 | (lp123
 285 | S'AMERICAN INDIAN/ALASKA NATIVE'
 286 | p124
 287 | aS'AMERICAN INDIAN/ALASKA NATIVE FEDERALLY RECOGNIZED TRIBE'
 288 | p125
 289 | aS'ASIAN'
 290 | p126
 291 | aS'ASIAN - ASIAN INDIAN'
 292 | p127
 293 | aS'ASIAN - CAMBODIAN'
 294 | p128
 295 | aS'ASIAN - CHINESE'
 296 | p129
 297 | aS'ASIAN - FILIPINO'
 298 | p130
 299 | aS'ASIAN - JAPANESE'
 300 | p131
 301 | aS'ASIAN - KOREAN'
 302 | p132
 303 | aS'ASIAN - OTHER'
 304 | p133
 305 | aS'ASIAN - THAI'
 306 | p134
 307 | aS'ASIAN - VIETNAMESE'
 308 | p135
 309 | aS'BLACK/AFRICAN'
 310 | p136
 311 | aS'BLACK/AFRICAN AMERICAN'
 312 | p137
 313 | aS'BLACK/CAPE VERDEAN'
 314 | p138
 315 | aS'BLACK/HAITIAN'
 316 | p139
 317 | aS'CARIBBEAN ISLAND'
 318 | p140
 319 | aS'HISPANIC OR LATINO'
 320 | p141
 321 | aS'HISPANIC/LATINO - CENTRAL AMERICAN (OTHER)'
 322 | p142
 323 | aS'HISPANIC/LATINO - COLOMBIAN'
 324 | p143
 325 | aS'HISPANIC/LATINO - CUBAN'
 326 | p144
 327 | aS'HISPANIC/LATINO - DOMINICAN'
 328 | p145
 329 | aS'HISPANIC/LATINO - GUATEMALAN'
 330 | p146
 331 | aS'HISPANIC/LATINO - HONDURAN'
 332 | p147
 333 | aS'HISPANIC/LATINO - MEXICAN'
 334 | p148
 335 | aS'HISPANIC/LATINO - PUERTO RICAN'
 336 | p149
 337 | aS'HISPANIC/LATINO - SALVADORAN'
 338 | p150
 339 | aS'MIDDLE EASTERN'
 340 | p151
 341 | aS'MULTI RACE ETHNICITY'
 342 | p152
 343 | aS'NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER'
 344 | p153
 345 | aS'OTHER'
 346 | p154
 347 | aS'PATIENT DECLINED TO ANSWER'
 348 | p155
 349 | aS'PORTUGUESE'
 350 | p156
 351 | aS'SOUTH AMERICAN'
 352 | p157
 353 | aS'UNABLE TO OBTAIN'
 354 | p158
 355 | aS'UNKNOWN/NOT SPECIFIED'
 356 | p159
 357 | aS'WHITE'
 358 | p160
 359 | aS'WHITE - BRAZILIAN'
 360 | p161
 361 | aS'WHITE - EASTERN EUROPEAN'
 362 | p162
 363 | aS'WHITE - OTHER EUROPEAN'
 364 | p163
 365 | aS'WHITE - RUSSIAN'
 366 | p164
 367 | atp165
 368 | bsg51
 369 | Nstp166
 370 | Rp167
 371 | sg102
 372 | g0
 373 | (g103
 374 | g2
 375 | Ntp168
 376 | Rp169
 377 | (dp170
 378 | g107
 379 | I00
 380 | sg108
 381 | g167
 382 | sbsbag0
 383 | (g71
 384 | g2
 385 | Ntp171
 386 | Rp172
 387 | (dp173
 388 | g75
 389 | (dp174
 390 | g77
 391 | I1
 392 | ssg78
 393 | g19
 394 | (g20
 395 | (I0
 396 | tp175
 397 | g22
 398 | tp176
 399 | Rp177
 400 | (I1
 401 | (I0
 402 | tp178
 403 | g85
 404 | I00
 405 | g65
 406 | tp179
 407 | bsg89
 408 | I00
 409 | sg90
 410 | g15
 411 | (g16
 412 | (dp180
 413 | g18
 414 | g19
 415 | (g20
 416 | (I0
 417 | tp181
 418 | g22
 419 | tp182
 420 | Rp183
 421 | (I1
 422 | (I3
 423 | tp184
 424 | g29
 425 | I00
 426 | (lp185
 427 | S'ELECTIVE'
 428 | p186
 429 | aS'EMERGENCY'
 430 | p187
 431 | aS'URGENT'
 432 | p188
 433 | atp189
 434 | bsg51
 435 | Nstp190
 436 | Rp191
 437 | sg102
 438 | g0
 439 | (g103
 440 | g2
 441 | Ntp192
 442 | Rp193
 443 | (dp194
 444 | g107
 445 | I00
 446 | sg108
 447 | g191
 448 | sbsbag0
 449 | (g71
 450 | g2
 451 | Ntp195
 452 | Rp196
 453 | (dp197
 454 | g75
 455 | (dp198
 456 | g77
 457 | I1
 458 | ssg78
 459 | g19
 460 | (g20
 461 | (I0
 462 | tp199
 463 | g22
 464 | tp200
 465 | Rp201
 466 | (I1
 467 | (I0
 468 | tp202
 469 | g85
 470 | I00
 471 | g65
 472 | tp203
 473 | bsg89
 474 | I00
 475 | sg90
 476 | g15
 477 | (g16
 478 | (dp204
 479 | g18
 480 | g19
 481 | (g20
 482 | (I0
 483 | tp205
 484 | g22
 485 | tp206
 486 | Rp207
 487 | (I1
 488 | (I5
 489 | tp208
 490 | g29
 491 | I00
 492 | (lp209
 493 | S'CCU'
 494 | p210
 495 | aS'CSRU'
 496 | p211
 497 | aS'MICU'
 498 | p212
 499 | aS'SICU'
 500 | p213
 501 | aS'TSICU'
 502 | p214
 503 | atp215
 504 | bsg51
 505 | Nstp216
 506 | Rp217
 507 | sg102
 508 | g0
 509 | (g103
 510 | g2
 511 | Ntp218
 512 | Rp219
 513 | (dp220
 514 | g107
 515 | I00
 516 | sg108
 517 | g217
 518 | sbsbag0
 519 | (g71
 520 | g2
 521 | Ntp221
 522 | Rp222
 523 | (dp223
 524 | g75
 525 | (dp224
 526 | g77
 527 | I1
 528 | ssg78
 529 | g19
 530 | (g20
 531 | (I0
 532 | tp225
 533 | g22
 534 | tp226
 535 | Rp227
 536 | (I1
 537 | (I0
 538 | tp228
 539 | g85
 540 | I00
 541 | g65
 542 | tp229
 543 | bsg89
 544 | I00
 545 | sg90
 546 | g15
 547 | (g54
 548 | (dp230
 549 | g18
 550 | g19
 551 | (g20
 552 | (I0
 553 | tp231
 554 | g22
 555 | tp232
 556 | Rp233
 557 | (I1
 558 | (I2
 559 | tp234
 560 | g62
 561 | I00
 562 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00'
 563 | p235
 564 | tp236
 565 | bsg51
 566 | Nstp237
 567 | Rp238
 568 | sg102
 569 | g0
 570 | (g103
 571 | g2
 572 | Ntp239
 573 | Rp240
 574 | (dp241
 575 | g107
 576 | I00
 577 | sg108
 578 | g238
 579 | sbsbag0
 580 | (g71
 581 | g2
 582 | Ntp242
 583 | Rp243
 584 | (dp244
 585 | g75
 586 | (dp245
 587 | g77
 588 | I1
 589 | ssg78
 590 | g19
 591 | (g20
 592 | (I0
 593 | tp246
 594 | g22
 595 | tp247
 596 | Rp248
 597 | (I1
 598 | (I0
 599 | tp249
 600 | g85
 601 | I00
 602 | g65
 603 | tp250
 604 | bsg89
 605 | I00
 606 | sg90
 607 | g15
 608 | (g54
 609 | (dp251
 610 | g18
 611 | g19
 612 | (g20
 613 | (I0
 614 | tp252
 615 | g22
 616 | tp253
 617 | Rp254
 618 | (I1
 619 | (I2
 620 | tp255
 621 | g62
 622 | I00
 623 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00'
 624 | p256
 625 | tp257
 626 | bsg51
 627 | Nstp258
 628 | Rp259
 629 | sg102
 630 | g0
 631 | (g103
 632 | g2
 633 | Ntp260
 634 | Rp261
 635 | (dp262
 636 | g107
 637 | I00
 638 | sg108
 639 | g259
 640 | sbsbag0
 641 | (g71
 642 | g2
 643 | Ntp263
 644 | Rp264
 645 | (dp265
 646 | g75
 647 | (dp266
 648 | g77
 649 | I1
 650 | ssg78
 651 | g19
 652 | (g20
 653 | (I0
 654 | tp267
 655 | g22
 656 | tp268
 657 | Rp269
 658 | (I1
 659 | (I0
 660 | tp270
 661 | g85
 662 | I00
 663 | g65
 664 | tp271
 665 | bsg89
 666 | I00
 667 | sg90
 668 | g15
 669 | (g54
 670 | (dp272
 671 | g18
 672 | g19
 673 | (g20
 674 | (I0
 675 | tp273
 676 | g22
 677 | tp274
 678 | Rp275
 679 | (I1
 680 | (I2
 681 | tp276
 682 | g62
 683 | I00
 684 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00'
 685 | p277
 686 | tp278
 687 | bsg51
 688 | Nstp279
 689 | Rp280
 690 | sg102
 691 | g0
 692 | (g103
 693 | g2
 694 | Ntp281
 695 | Rp282
 696 | (dp283
 697 | g107
 698 | I00
 699 | sg108
 700 | g280
 701 | sbsbag0
 702 | (g71
 703 | g2
 704 | Ntp284
 705 | Rp285
 706 | (dp286
 707 | g75
 708 | (dp287
 709 | g77
 710 | I1
 711 | ssg78
 712 | g19
 713 | (g20
 714 | (I0
 715 | tp288
 716 | g22
 717 | tp289
 718 | Rp290
 719 | (I1
 720 | (I0
 721 | tp291
 722 | g85
 723 | I00
 724 | g65
 725 | tp292
 726 | bsg89
 727 | I00
 728 | sg90
 729 | g15
 730 | (g54
 731 | (dp293
 732 | g18
 733 | g19
 734 | (g20
 735 | (I0
 736 | tp294
 737 | g22
 738 | tp295
 739 | Rp296
 740 | (I1
 741 | (I1
 742 | tp297
 743 | g62
 744 | I00
 745 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
 746 | p298
 747 | tp299
 748 | bsg51
 749 | Nstp300
 750 | Rp301
 751 | sg102
 752 | g0
 753 | (g103
 754 | g2
 755 | Ntp302
 756 | Rp303
 757 | (dp304
 758 | g107
 759 | I00
 760 | sg108
 761 | g301
 762 | sbsbag19
 763 | (g20
 764 | (I0
 765 | tp305
 766 | g22
 767 | tp306
 768 | Rp307
 769 | (I1
 770 | (I5
 771 | I0
 772 | tp308
 773 | g26
 774 | (S'M8'
 775 | p309
 776 | I0
 777 | I1
 778 | tp310
 779 | Rp311
 780 | (I4
 781 | S'<'
 782 | p312
 783 | NNNI-1
 784 | I-1
 785 | I0
 786 | ((dp313
 787 | (S'ns'
 788 | p314
 789 | I1
 790 | I1
 791 | I1
 792 | tp315
 793 | tp316
 794 | tp317
 795 | bI00
 796 | g65
 797 | tp318
 798 | bag19
 799 | (g20
 800 | (I0
 801 | tp319
 802 | g22
 803 | tp320
 804 | Rp321
 805 | (I1
 806 | (I2
 807 | I0
 808 | tp322
 809 | g26
 810 | (S'f8'
 811 | p323
 812 | I0
 813 | I1
 814 | tp324
 815 | Rp325
 816 | (I3
 817 | S'<'
 818 | p326
 819 | NNNI-1
 820 | I-1
 821 | I0
 822 | tp327
 823 | bI00
 824 | g65
 825 | tp328
 826 | bag19
 827 | (g20
 828 | (I0
 829 | tp329
 830 | g22
 831 | tp330
 832 | Rp331
 833 | (I1
 834 | (I2
 835 | I0
 836 | tp332
 837 | g62
 838 | I00
 839 | g65
 840 | tp333
 841 | ba(lp334
 842 | g15
 843 | (g16
 844 | (dp335
 845 | g18
 846 | g19
 847 | (g20
 848 | (I0
 849 | tp336
 850 | g22
 851 | tp337
 852 | Rp338
 853 | (I1
 854 | (I1
 855 | tp339
 856 | g29
 857 | I00
 858 | (lp340
 859 | g35
 860 | atp341
 861 | bsg51
 862 | Nstp342
 863 | Rp343
 864 | ag15
 865 | (g16
 866 | (dp344
 867 | g18
 868 | g19
 869 | (g20
 870 | (I0
 871 | tp345
 872 | g22
 873 | tp346
 874 | Rp347
 875 | (I1
 876 | (I1
 877 | tp348
 878 | g29
 879 | I00
 880 | (lp349
 881 | g36
 882 | atp350
 883 | bsg51
 884 | Nstp351
 885 | Rp352
 886 | ag15
 887 | (g16
 888 | (dp353
 889 | g18
 890 | g19
 891 | (g20
 892 | (I0
 893 | tp354
 894 | g22
 895 | tp355
 896 | Rp356
 897 | (I1
 898 | (I1
 899 | tp357
 900 | g29
 901 | I00
 902 | (lp358
 903 | g44
 904 | atp359
 905 | bsg51
 906 | Nstp360
 907 | Rp361
 908 | ag15
 909 | (g16
 910 | (dp362
 911 | g18
 912 | g19
 913 | (g20
 914 | (I0
 915 | tp363
 916 | g22
 917 | tp364
 918 | Rp365
 919 | (I1
 920 | (I1
 921 | tp366
 922 | g29
 923 | I00
 924 | (lp367
 925 | g45
 926 | atp368
 927 | bsg51
 928 | Nstp369
 929 | Rp370
 930 | ag15
 931 | (g16
 932 | (dp371
 933 | g18
 934 | g19
 935 | (g20
 936 | (I0
 937 | tp372
 938 | g22
 939 | tp373
 940 | Rp374
 941 | (I1
 942 | (I1
 943 | tp375
 944 | g29
 945 | I00
 946 | (lp376
 947 | g46
 948 | atp377
 949 | bsg51
 950 | Nstp378
 951 | Rp379
 952 | ag15
 953 | (g16
 954 | (dp380
 955 | g18
 956 | g19
 957 | (g20
 958 | (I0
 959 | tp381
 960 | g22
 961 | tp382
 962 | Rp383
 963 | (I1
 964 | (I1
 965 | tp384
 966 | g29
 967 | I00
 968 | (lp385
 969 | g47
 970 | atp386
 971 | bsg51
 972 | Nstp387
 973 | Rp388
 974 | ag15
 975 | (g16
 976 | (dp389
 977 | g18
 978 | g19
 979 | (g20
 980 | (I0
 981 | tp390
 982 | g22
 983 | tp391
 984 | Rp392
 985 | (I1
 986 | (I1
 987 | tp393
 988 | g29
 989 | I00
 990 | (lp394
 991 | g48
 992 | atp395
 993 | bsg51
 994 | Nstp396
 995 | Rp397
 996 | ag15
 997 | (g16
 998 | (dp398
 999 | g18
1000 | g19
1001 | (g20
1002 | (I0
1003 | tp399
1004 | g22
1005 | tp400
1006 | Rp401
1007 | (I1
1008 | (I1
1009 | tp402
1010 | g29
1011 | I00
1012 | (lp403
1013 | g49
1014 | atp404
1015 | bsg51
1016 | Nstp405
1017 | Rp406
1018 | ag15
1019 | (g16
1020 | (dp407
1021 | g18
1022 | g19
1023 | (g20
1024 | (I0
1025 | tp408
1026 | g22
1027 | tp409
1028 | Rp410
1029 | (I1
1030 | (I5
1031 | tp411
1032 | g29
1033 | I00
1034 | (lp412
1035 | g38
1036 | ag39
1037 | ag40
1038 | ag41
1039 | ag42
1040 | atp413
1041 | bsg51
1042 | Nstp414
1043 | Rp415
1044 | ag15
1045 | (g16
1046 | (dp416
1047 | g18
1048 | g19
1049 | (g20
1050 | (I0
1051 | tp417
1052 | g22
1053 | tp418
1054 | Rp419
1055 | (I1
1056 | (I2
1057 | tp420
1058 | g29
1059 | I00
1060 | (lp421
1061 | g37
1062 | ag43
1063 | atp422
1064 | bsg51
1065 | Nstp423
1066 | Rp424
1067 | ag15
1068 | (g16
1069 | (dp425
1070 | g18
1071 | g19
1072 | (g20
1073 | (I0
1074 | tp426
1075 | g22
1076 | tp427
1077 | Rp428
1078 | (I1
1079 | (I2
1080 | tp429
1081 | g29
1082 | I00
1083 | (lp430
1084 | g33
1085 | ag34
1086 | atp431
1087 | bsg51
1088 | Nstp432
1089 | Rp433
1090 | a(dp434
1091 | S'0.14.1'
1092 | p435
1093 | (dp436
1094 | S'axes'
1095 | p437
1096 | g14
1097 | sS'blocks'
1098 | p438
1099 | (lp439
1100 | (dp440
1101 | S'mgr_locs'
1102 | p441
1103 | c__builtin__
1104 | slice
1105 | p442
1106 | (I2
1107 | I3
1108 | I1
1109 | tp443
1110 | Rp444
1111 | sS'values'
1112 | p445
1113 | g73
1114 | sa(dp446
1115 | g441
1116 | g442
1117 | (I3
1118 | I4
1119 | I1
1120 | tp447
1121 | Rp448
1122 | sg445
1123 | g110
1124 | sa(dp449
1125 | g441
1126 | g442
1127 | (I11
1128 | I12
1129 | I1
1130 | tp450
1131 | Rp451
1132 | sg445
1133 | g172
1134 | sa(dp452
1135 | g441
1136 | g442
1137 | (I12
1138 | I13
1139 | I1
1140 | tp453
1141 | Rp454
1142 | sg445
1143 | g196
1144 | sa(dp455
1145 | g441
1146 | g442
1147 | (I13
1148 | I14
1149 | I1
1150 | tp456
1151 | Rp457
1152 | sg445
1153 | g222
1154 | sa(dp458
1155 | g441
1156 | g442
1157 | (I14
1158 | I15
1159 | I1
1160 | tp459
1161 | Rp460
1162 | sg445
1163 | g243
1164 | sa(dp461
1165 | g441
1166 | g442
1167 | (I15
1168 | I16
1169 | I1
1170 | tp462
1171 | Rp463
1172 | sg445
1173 | g264
1174 | sa(dp464
1175 | g441
1176 | g442
1177 | (I16
1178 | I17
1179 | I1
1180 | tp465
1181 | Rp466
1182 | sg445
1183 | g285
1184 | sa(dp467
1185 | g441
1186 | g442
1187 | (I5
1188 | I10
1189 | I1
1190 | tp468
1191 | Rp469
1192 | sg445
1193 | g307
1194 | sa(dp470
1195 | g441
1196 | g442
1197 | (I4
1198 | I16
1199 | I6
1200 | tp471
1201 | Rp472
1202 | sg445
1203 | g321
1204 | sa(dp473
1205 | g441
1206 | g442
1207 | (I0
1208 | I2
1209 | I1
1210 | tp474
1211 | Rp475
1212 | sg445
1213 | g331
1214 | sasstp476
1215 | bsbg0
1216 | (g1
1217 | g2
1218 | Ntp477
1219 | Rp478
1220 | (dp479
1221 | g6
1222 | g7
1223 | sg8
1224 | g9
1225 | sg10
1226 | g0
1227 | (g11
1228 | g2
1229 | Ntp480
1230 | Rp481
1231 | ((lp482
1232 | g15
1233 | (g16
1234 | (dp483
1235 | g18
1236 | g19
1237 | (g20
1238 | (I0
1239 | tp484
1240 | g22
1241 | tp485
1242 | Rp486
1243 | (I1
1244 | (I7
1245 | tp487
1246 | g29
1247 | I00
1248 | (lp488
1249 | S'subject_id'
1250 | p489
1251 | aS'hadm_id'
1252 | p490
1253 | aS'icustay_id'
1254 | p491
1255 | aS'charttime'
1256 | p492
1257 | aS'itemid'
1258 | p493
1259 | aS'value'
1260 | p494
1261 | aS'valueuom'
1262 | p495
1263 | atp496
1264 | bsg51
1265 | Nstp497
1266 | Rp498
1267 | ag15
1268 | (g54
1269 | (dp499
1270 | g18
1271 | g19
1272 | (g20
1273 | (I0
1274 | tp500
1275 | g22
1276 | tp501
1277 | Rp502
1278 | (I1
1279 | (I0
1280 | tp503
1281 | g62
1282 | I00
1283 | g65
1284 | tp504
1285 | bsg51
1286 | Nstp505
1287 | Rp506
1288 | a(lp507
1289 | g19
1290 | (g20
1291 | (I0
1292 | tp508
1293 | g22
1294 | tp509
1295 | Rp510
1296 | (I1
1297 | (I4
1298 | I0
1299 | tp511
1300 | g62
1301 | I00
1302 | g65
1303 | tp512
1304 | bag19
1305 | (g20
1306 | (I0
1307 | tp513
1308 | g22
1309 | tp514
1310 | Rp515
1311 | (I1
1312 | (I1
1313 | I0
1314 | tp516
1315 | g26
1316 | (S'M8'
1317 | p517
1318 | I0
1319 | I1
1320 | tp518
1321 | Rp519
1322 | (I4
1323 | S'<'
1324 | p520
1325 | NNNI-1
1326 | I-1
1327 | I0
1328 | ((dp521
1329 | (S'ns'
1330 | p522
1331 | I1
1332 | I1
1333 | I1
1334 | tp523
1335 | tp524
1336 | tp525
1337 | bI00
1338 | g65
1339 | tp526
1340 | bag19
1341 | (g20
1342 | (I0
1343 | tp527
1344 | g22
1345 | tp528
1346 | Rp529
1347 | (I1
1348 | (I2
1349 | I0
1350 | tp530
1351 | g29
1352 | I00
1353 | (lp531
1354 | tp532
1355 | ba(lp533
1356 | g15
1357 | (g16
1358 | (dp534
1359 | g18
1360 | g19
1361 | (g20
1362 | (I0
1363 | tp535
1364 | g22
1365 | tp536
1366 | Rp537
1367 | (I1
1368 | (I4
1369 | tp538
1370 | g29
1371 | I00
1372 | (lp539
1373 | g489
1374 | ag490
1375 | ag491
1376 | ag493
1377 | atp540
1378 | bsg51
1379 | Nstp541
1380 | Rp542
1381 | ag15
1382 | (g16
1383 | (dp543
1384 | g18
1385 | g19
1386 | (g20
1387 | (I0
1388 | tp544
1389 | g22
1390 | tp545
1391 | Rp546
1392 | (I1
1393 | (I1
1394 | tp547
1395 | g29
1396 | I00
1397 | (lp548
1398 | g492
1399 | atp549
1400 | bsg51
1401 | Nstp550
1402 | Rp551
1403 | ag15
1404 | (g16
1405 | (dp552
1406 | g18
1407 | g19
1408 | (g20
1409 | (I0
1410 | tp553
1411 | g22
1412 | tp554
1413 | Rp555
1414 | (I1
1415 | (I2
1416 | tp556
1417 | g29
1418 | I00
1419 | (lp557
1420 | g494
1421 | ag495
1422 | atp558
1423 | bsg51
1424 | Nstp559
1425 | Rp560
1426 | a(dp561
1427 | g435
1428 | (dp562
1429 | g437
1430 | g482
1431 | sg438
1432 | (lp563
1433 | (dp564
1434 | g441
1435 | g19
1436 | (g20
1437 | (I0
1438 | tp565
1439 | g22
1440 | tp566
1441 | Rp567
1442 | (I1
1443 | (I4
1444 | tp568
1445 | g62
1446 | I00
1447 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00'
1448 | p569
1449 | tp570
1450 | bsg445
1451 | g510
1452 | sa(dp571
1453 | g441
1454 | g442
1455 | (I3
1456 | I4
1457 | I1
1458 | tp572
1459 | Rp573
1460 | sg445
1461 | g515
1462 | sa(dp574
1463 | g441
1464 | g442
1465 | (I5
1466 | I7
1467 | I1
1468 | tp575
1469 | Rp576
1470 | sg445
1471 | g529
1472 | sasstp577
1473 | bsbg0
1474 | (g1
1475 | g2
1476 | Ntp578
1477 | Rp579
1478 | (dp580
1479 | g6
1480 | g7
1481 | sg8
1482 | g9
1483 | sg10
1484 | g0
1485 | (g11
1486 | g2
1487 | Ntp581
1488 | Rp582
1489 | ((lp583
1490 | g15
1491 | (g16
1492 | (dp584
1493 | g18
1494 | g19
1495 | (g20
1496 | (I0
1497 | tp585
1498 | g22
1499 | tp586
1500 | Rp587
1501 | (I1
1502 | (I5
1503 | tp588
1504 | g29
1505 | I00
1506 | (lp589
1507 | S'label'
1508 | p590
1509 | aS'dbsource'
1510 | p591
1511 | aS'linksto'
1512 | p592
1513 | aS'category'
1514 | p593
1515 | aS'unitname'
1516 | p594
1517 | atp595
1518 | bsg51
1519 | Nstp596
1520 | Rp597
1521 | ag15
1522 | (g54
1523 | (dp598
1524 | g18
1525 | g19
1526 | (g20
1527 | (I0
1528 | tp599
1529 | g22
1530 | tp600
1531 | Rp601
1532 | (I1
1533 | (I0
1534 | tp602
1535 | g62
1536 | I00
1537 | g65
1538 | tp603
1539 | bsg51
1540 | S'itemid'
1541 | p604
1542 | stp605
1543 | Rp606
1544 | a(lp607
1545 | g19
1546 | (g20
1547 | (I0
1548 | tp608
1549 | g22
1550 | tp609
1551 | Rp610
1552 | (I1
1553 | (I5
1554 | I0
1555 | tp611
1556 | g29
1557 | I00
1558 | (lp612
1559 | tp613
1560 | ba(lp614
1561 | g15
1562 | (g16
1563 | (dp615
1564 | g18
1565 | g19
1566 | (g20
1567 | (I0
1568 | tp616
1569 | g22
1570 | tp617
1571 | Rp618
1572 | (I1
1573 | (I5
1574 | tp619
1575 | g29
1576 | I00
1577 | (lp620
1578 | g590
1579 | ag591
1580 | ag592
1581 | ag593
1582 | ag594
1583 | atp621
1584 | bsg51
1585 | Nstp622
1586 | Rp623
1587 | a(dp624
1588 | g435
1589 | (dp625
1590 | g437
1591 | g583
1592 | sg438
1593 | (lp626
1594 | (dp627
1595 | g441
1596 | g442
1597 | (I0
1598 | I5
1599 | I1
1600 | tp628
1601 | Rp629
1602 | sg445
1603 | g610
1604 | sasstp630
1605 | bsbg0
1606 | (g1
1607 | g2
1608 | Ntp631
1609 | Rp632
1610 | (dp633
1611 | g6
1612 | g7
1613 | sg8
1614 | g9
1615 | sg10
1616 | g0
1617 | (g11
1618 | g2
1619 | Ntp634
1620 | Rp635
1621 | ((lp636
1622 | g15
1623 | (g16
1624 | (dp637
1625 | g18
1626 | g19
1627 | (g20
1628 | (I0
1629 | tp638
1630 | g22
1631 | tp639
1632 | Rp640
1633 | (I1
1634 | (I5
1635 | tp641
1636 | g29
1637 | I00
1638 | (lp642
1639 | S'OUTLIER_LOW'
1640 | p643
1641 | aS'VALID_LOW'
1642 | p644
1643 | aS'IMPUTE'
1644 | p645
1645 | aS'VALID_HIGH'
1646 | p646
1647 | aS'OUTLIER_HIGH'
1648 | p647
1649 | atp648
1650 | bsg51
1651 | Nstp649
1652 | Rp650
1653 | ag15
1654 | (g16
1655 | (dp651
1656 | g18
1657 | g19
1658 | (g20
1659 | (I0
1660 | tp652
1661 | g22
1662 | tp653
1663 | Rp654
1664 | (I1
1665 | (I0
1666 | tp655
1667 | g29
1668 | I00
1669 | (lp656
1670 | tp657
1671 | bsg51
1672 | S'VARIABLE'
1673 | p658
1674 | stp659
1675 | Rp660
1676 | a(lp661
1677 | g19
1678 | (g20
1679 | (I0
1680 | tp662
1681 | g22
1682 | tp663
1683 | Rp664
1684 | (I1
1685 | (I5
1686 | I0
1687 | tp665
1688 | g325
1689 | I00
1690 | g65
1691 | tp666
1692 | ba(lp667
1693 | g15
1694 | (g16
1695 | (dp668
1696 | g18
1697 | g19
1698 | (g20
1699 | (I0
1700 | tp669
1701 | g22
1702 | tp670
1703 | Rp671
1704 | (I1
1705 | (I5
1706 | tp672
1707 | g29
1708 | I00
1709 | (lp673
1710 | g643
1711 | ag644
1712 | ag645
1713 | ag646
1714 | ag647
1715 | atp674
1716 | bsg51
1717 | Nstp675
1718 | Rp676
1719 | a(dp677
1720 | g435
1721 | (dp678
1722 | g437
1723 | g636
1724 | sg438
1725 | (lp679
1726 | (dp680
1727 | g441
1728 | g442
1729 | (I0
1730 | I5
1731 | I1
1732 | tp681
1733 | Rp682
1734 | sg445
1735 | g664
1736 | sasstp683
1737 | bsbg0
1738 | (g1
1739 | g2
1740 | Ntp684
1741 | Rp685
1742 | (dp686
1743 | g6
1744 | g7
1745 | sg8
1746 | g9
1747 | sg10
1748 | g0
1749 | (g11
1750 | g2
1751 | Ntp687
1752 | Rp688
1753 | ((lp689
1754 | g15
1755 | (g16
1756 | (dp690
1757 | g18
1758 | g19
1759 | (g20
1760 | (I0
1761 | tp691
1762 | g22
1763 | tp692
1764 | Rp693
1765 | (I1
1766 | (I18
1767 | tp694
1768 | g29
1769 | I00
1770 | (lp695
1771 | S'LEVEL2'
1772 | p696
1773 | aS'LEVEL1'
1774 | p697
1775 | aS'ALTERNATIVE'
1776 | p698
1777 | aS'STATUS'
1778 | p699
1779 | aS'STATUS NOTE'
1780 | p700
1781 | aS'ITEMID'
1782 | p701
1783 | aS'MIMIC LABEL'
1784 | p702
1785 | aS'UNITNAME'
1786 | p703
1787 | aS'LINKSTO'
1788 | p704
1789 | aS'COUNT'
1790 | p705
1791 | aS'CATEGORY'
1792 | p706
1793 | aS'CONCEPTID'
1794 | p707
1795 | aS'FLUID'
1796 | p708
1797 | aS'LOINC_CODE'
1798 | p709
1799 | aS'DBSOURCE'
1800 | p710
1801 | aS'Unnamed: 15'
1802 | p711
1803 | aS'PARAM_TYPE'
1804 | p712
1805 | aS'NOTE'
1806 | p713
1807 | atp714
1808 | bsg51
1809 | Nstp715
1810 | Rp716
1811 | ag15
1812 | (g54
1813 | (dp717
1814 | g18
1815 | g19
1816 | (g20
1817 | (I0
1818 | tp718
1819 | g22
1820 | tp719
1821 | Rp720
1822 | (I1
1823 | (I0
1824 | tp721
1825 | g62
1826 | I00
1827 | g65
1828 | tp722
1829 | bsg51
1830 | Nstp723
1831 | Rp724
1832 | a(lp725
1833 | g19
1834 | (g20
1835 | (I0
1836 | tp726
1837 | g22
1838 | tp727
1839 | Rp728
1840 | (I1
1841 | (I17
1842 | I0
1843 | tp729
1844 | g29
1845 | I00
1846 | (lp730
1847 | tp731
1848 | bag19
1849 | (g20
1850 | (I0
1851 | tp732
1852 | g22
1853 | tp733
1854 | Rp734
1855 | (I1
1856 | (I1
1857 | I0
1858 | tp735
1859 | g62
1860 | I00
1861 | g65
1862 | tp736
1863 | ba(lp737
1864 | g15
1865 | (g16
1866 | (dp738
1867 | g18
1868 | g19
1869 | (g20
1870 | (I0
1871 | tp739
1872 | g22
1873 | tp740
1874 | Rp741
1875 | (I1
1876 | (I17
1877 | tp742
1878 | g29
1879 | I00
1880 | (lp743
1881 | g696
1882 | ag697
1883 | ag698
1884 | ag699
1885 | ag700
1886 | ag702
1887 | ag703
1888 | ag704
1889 | ag705
1890 | ag706
1891 | ag707
1892 | ag708
1893 | ag709
1894 | ag710
1895 | ag711
1896 | ag712
1897 | ag713
1898 | atp744
1899 | bsg51
1900 | Nstp745
1901 | Rp746
1902 | ag15
1903 | (g16
1904 | (dp747
1905 | g18
1906 | g19
1907 | (g20
1908 | (I0
1909 | tp748
1910 | g22
1911 | tp749
1912 | Rp750
1913 | (I1
1914 | (I1
1915 | tp751
1916 | g29
1917 | I00
1918 | (lp752
1919 | g701
1920 | atp753
1921 | bsg51
1922 | Nstp754
1923 | Rp755
1924 | a(dp756
1925 | g435
1926 | (dp757
1927 | g437
1928 | g689
1929 | sg438
1930 | (lp758
1931 | (dp759
1932 | g441
1933 | g19
1934 | (g20
1935 | (I0
1936 | tp760
1937 | g22
1938 | tp761
1939 | Rp762
1940 | (I1
1941 | (I17
1942 | tp763
1943 | g62
1944 | I00
1945 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\r\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x11\x00\x00\x00\x00\x00\x00\x00'
1946 | p764
1947 | tp765
1948 | bsg445
1949 | g728
1950 | sa(dp766
1951 | g441
1952 | g442
1953 | (I5
1954 | I6
1955 | I1
1956 | tp767
1957 | Rp768
1958 | sg445
1959 | g734
1960 | sasstp769
1961 | bsbg0
1962 | (g1
1963 | g2
1964 | Ntp770
1965 | Rp771
1966 | (dp772
1967 | g6
1968 | (lp773
1969 | sg8
1970 | S'dataframe'
1971 | p774
1972 | sg10
1973 | g0
1974 | (g11
1975 | g2
1976 | Ntp775
1977 | Rp776
1978 | ((lp777
1979 | g15
1980 | (cpandas.core.indexes.multi
1981 | MultiIndex
1982 | p778
1983 | (dp779
1984 | S'codes'
1985 | p780
1986 | (lp781
1987 | g19
1988 | (cpandas.core.indexes.frozen
1989 | FrozenNDArray
1990 | p782
1991 | (I0
1992 | tp783
1993 | g22
1994 | tp784
1995 | Rp785
1996 | (I1
1997 | (I2
1998 | tp786
1999 | g26
2000 | (S'i1'
2001 | p787
2002 | I0
2003 | I1
2004 | tp788
2005 | Rp789
2006 | (I3
2007 | S'|'
2008 | p790
2009 | NNNI-1
2010 | I-1
2011 | I0
2012 | tp791
2013 | bI00
2014 | S'\x00\x00'
2015 | p792
2016 | tp793
2017 | bag19
2018 | (g782
2019 | (I0
2020 | tp794
2021 | g22
2022 | tp795
2023 | Rp796
2024 | (I1
2025 | (I2
2026 | tp797
2027 | g789
2028 | I00
2029 | S'\x01\x02'
2030 | p798
2031 | tp799
2032 | basS'names'
2033 | p800
2034 | (lp801
2035 | S'LEVEL2'
2036 | p802
2037 | aS'Aggregation Function'
2038 | p803
2039 | asS'levels'
2040 | p804
2041 | (lp805
2042 | g15
2043 | (g16
2044 | (dp806
2045 | g18
2046 | g19
2047 | (g20
2048 | (I0
2049 | tp807
2050 | g22
2051 | tp808
2052 | Rp809
2053 | (I1
2054 | (I1
2055 | tp810
2056 | g26
2057 | (S'O8'
2058 | p811
2059 | I0
2060 | I1
2061 | tp812
2062 | Rp813
2063 | (I3
2064 | S'|'
2065 | p814
2066 | NNNI-1
2067 | I-1
2068 | I63
2069 | tp815
2070 | bI00
2071 | (lp816
2072 | S'test_level2'
2073 | p817
2074 | atp818
2075 | bsg51
2076 | g802
2077 | stp819
2078 | Rp820
2079 | ag15
2080 | (g16
2081 | (dp821
2082 | g18
2083 | g19
2084 | (g20
2085 | (I0
2086 | tp822
2087 | g22
2088 | tp823
2089 | Rp824
2090 | (I1
2091 | (I4
2092 | tp825
2093 | g813
2094 | I00
2095 | (lp826
2096 | g65
2097 | aS'count'
2098 | p827
2099 | aS'mean'
2100 | p828
2101 | aS'std'
2102 | p829
2103 | atp830
2104 | bsg51
2105 | g803
2106 | stp831
2107 | Rp832
2108 | asS'sortorder'
2109 | p833
2110 | Nstp834
2111 | Rp835
2112 | ag15
2113 | (g778
2114 | (dp836
2115 | g780
2116 | (lp837
2117 | g19
2118 | (g782
2119 | (I0
2120 | tp838
2121 | g22
2122 | tp839
2123 | Rp840
2124 | (I1
2125 | (I0
2126 | tp841
2127 | g789
2128 | I00
2129 | g65
2130 | tp842
2131 | bag19
2132 | (g782
2133 | (I0
2134 | tp843
2135 | g22
2136 | tp844
2137 | Rp845
2138 | (I1
2139 | (I0
2140 | tp846
2141 | g789
2142 | I00
2143 | g65
2144 | tp847
2145 | bag19
2146 | (g782
2147 | (I0
2148 | tp848
2149 | g22
2150 | tp849
2151 | Rp850
2152 | (I1
2153 | (I0
2154 | tp851
2155 | g789
2156 | I00
2157 | g65
2158 | tp852
2159 | bag19
2160 | (g782
2161 | (I0
2162 | tp853
2163 | g22
2164 | tp854
2165 | Rp855
2166 | (I1
2167 | (I0
2168 | tp856
2169 | g789
2170 | I00
2171 | g65
2172 | tp857
2173 | basg800
2174 | (lp858
2175 | S'subject_id'
2176 | p859
2177 | aS'hadm_id'
2178 | p860
2179 | aS'icustay_id'
2180 | p861
2181 | aS'hours_in'
2182 | p862
2183 | asg804
2184 | (lp863
2185 | g15
2186 | (g54
2187 | (dp864
2188 | g18
2189 | g19
2190 | (g20
2191 | (I0
2192 | tp865
2193 | g22
2194 | tp866
2195 | Rp867
2196 | (I1
2197 | (I1
2198 | tp868
2199 | g26
2200 | (S'i8'
2201 | p869
2202 | I0
2203 | I1
2204 | tp870
2205 | Rp871
2206 | (I3
2207 | S'<'
2208 | p872
2209 | NNNI-1
2210 | I-1
2211 | I0
2212 | tp873
2213 | bI00
2214 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
2215 | p874
2216 | tp875
2217 | bsg51
2218 | g859
2219 | stp876
2220 | Rp877
2221 | ag15
2222 | (g54
2223 | (dp878
2224 | g18
2225 | g19
2226 | (g20
2227 | (I0
2228 | tp879
2229 | g22
2230 | tp880
2231 | Rp881
2232 | (I1
2233 | (I1
2234 | tp882
2235 | g871
2236 | I00
2237 | S'\x01\x00\x00\x00\x00\x00\x00\x00'
2238 | p883
2239 | tp884
2240 | bsg51
2241 | g860
2242 | stp885
2243 | Rp886
2244 | ag15
2245 | (g54
2246 | (dp887
2247 | g18
2248 | g19
2249 | (g20
2250 | (I0
2251 | tp888
2252 | g22
2253 | tp889
2254 | Rp890
2255 | (I1
2256 | (I1
2257 | tp891
2258 | g871
2259 | I00
2260 | S'\x00\x00\x00\x00\x00\x00\x00\x00'
2261 | p892
2262 | tp893
2263 | bsg51
2264 | g861
2265 | stp894
2266 | Rp895
2267 | ag15
2268 | (g54
2269 | (dp896
2270 | g18
2271 | g19
2272 | (g20
2273 | (I0
2274 | tp897
2275 | g22
2276 | tp898
2277 | Rp899
2278 | (I1
2279 | (I6
2280 | tp900
2281 | g871
2282 | I00
2283 | S'\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00'
2284 | p901
2285 | tp902
2286 | bsg51
2287 | g862
2288 | stp903
2289 | Rp904
2290 | asg833
2291 | Nstp905
2292 | Rp906
2293 | a(lp907
2294 | g19
2295 | (g20
2296 | (I0
2297 | tp908
2298 | g22
2299 | tp909
2300 | Rp910
2301 | (I1
2302 | (I2
2303 | I0
2304 | tp911
2305 | g26
2306 | (S'f8'
2307 | p912
2308 | I0
2309 | I1
2310 | tp913
2311 | Rp914
2312 | (I3
2313 | S'<'
2314 | p915
2315 | NNNI-1
2316 | I-1
2317 | I0
2318 | tp916
2319 | bI00
2320 | g65
2321 | tp917
2322 | ba(lp918
2323 | g15
2324 | (g778
2325 | (dp919
2326 | g780
2327 | (lp920
2328 | g19
2329 | (g782
2330 | (I0
2331 | tp921
2332 | g22
2333 | tp922
2334 | Rp923
2335 | (I1
2336 | (I2
2337 | tp924
2338 | g789
2339 | I00
2340 | S'\x00\x00'
2341 | p925
2342 | tp926
2343 | bag19
2344 | (g782
2345 | (I0
2346 | tp927
2347 | g22
2348 | tp928
2349 | Rp929
2350 | (I1
2351 | (I2
2352 | tp930
2353 | g789
2354 | I00
2355 | S'\x01\x02'
2356 | p931
2357 | tp932
2358 | basg800
2359 | (lp933
2360 | g802
2361 | ag803
2362 | asg804
2363 | (lp934
2364 | g15
2365 | (g16
2366 | (dp935
2367 | g18
2368 | g19
2369 | (g20
2370 | (I0
2371 | tp936
2372 | g22
2373 | tp937
2374 | Rp938
2375 | (I1
2376 | (I1
2377 | tp939
2378 | g813
2379 | I00
2380 | (lp940
2381 | g817
2382 | atp941
2383 | bsg51
2384 | g802
2385 | stp942
2386 | Rp943
2387 | ag15
2388 | (g16
2389 | (dp944
2390 | g18
2391 | g19
2392 | (g20
2393 | (I0
2394 | tp945
2395 | g22
2396 | tp946
2397 | Rp947
2398 | (I1
2399 | (I4
2400 | tp948
2401 | g813
2402 | I00
2403 | (lp949
2404 | g65
2405 | ag827
2406 | ag828
2407 | ag829
2408 | atp950
2409 | bsg51
2410 | g803
2411 | stp951
2412 | Rp952
2413 | asg833
2414 | Nstp953
2415 | Rp954
2416 | a(dp955
2417 | g435
2418 | (dp956
2419 | g437
2420 | g777
2421 | sg438
2422 | (lp957
2423 | (dp958
2424 | g441
2425 | g442
2426 | (I0
2427 | I2
2428 | I1
2429 | tp959
2430 | Rp960
2431 | sg445
2432 | g910
2433 | sasstp961
2434 | bsbtp962
2435 | .


--------------------------------------------------------------------------------