├── .gitignore
├── LICENSE
├── README.md
├── data
    └── mimicdata.sqlite
├── example-patient
    ├── example-patient-matlab.ipynb
    ├── example_patient_matlab.m
    ├── example_patient_matlab_ce.m
    ├── example_patient_matlab_ie.m
    ├── example_patient_matlab_le.m
    ├── expt-query-1.sql
    ├── expt-query-2.sql
    ├── expt-query-3.sql
    ├── expt-query-4.sql
    ├── expt-query-to-csv.sql
    └── mlcc1_example_patient.m
├── installation
    └── sqlite-manager
    │   └── sqlite_manager-0.8.3-tb+sm+fx.xpi
├── intro_to_mimic
    ├── 00-query-mimic.md
    ├── 01-example-patient-heart-failure.ipynb
    └── MozFest2015.key
├── mlcc
    ├── etc
    │   ├── calcRoc.m
    │   └── makeQuery.m
    ├── lab1-data-extraction
    │   ├── mlcc-query-1.sql
    │   ├── mlcc1-problem-set-solutions-ICUSTAYID.sql
    │   ├── mlcc1-problem-set-solutions.sql
    │   ├── mlcc1_introduction.ipynb
    │   └── mlcc1_introduction.m
    ├── lab2-intro-ml
    │   ├── README.md
    │   ├── mlcc2-query.sql
    │   ├── mlcc2_svm_workshop.ipynb
    │   └── mlcc2_svm_workshop.m
    └── lab4-mortality-prediction
    │   ├── matlab_postgres_connection.m
    │   ├── mlcc-extract-data.sql
    │   ├── mlcc-group-by-tutorial.sql
    │   ├── mlcc_mortality_prediction.ipynb
    │   └── mlcc_mortality_prediction.m
├── requirements.txt
└── temp
    ├── 02-example-patient-sepsis.ipynb
    ├── 03-example-patient-ich.ipynb
    ├── 04-example-multiplepatients.ipynb
    └── 06-example-patient-psql.ipynb


/.gitignore:
--------------------------------------------------------------------------------
 1 | # matlab temp files
 2 | *.m~
 3 | 
 4 | # CSV files with example data
 5 | example-patient/*.csv
 6 | 
 7 | # Byte-compiled / optimized / DLL files
 8 | __pycache__/
 9 | *.py[cod]
10 | 
11 | # C extensions
12 | *.so
13 | 
14 | # Distribution / packaging
15 | .Python
16 | env/
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | 
32 | # PyInstaller
33 | #  Usually these files are written by a python script from a template
34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 | 
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 | 
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *,cover
51 | 
52 | # Translations
53 | *.mo
54 | *.pot
55 | 
56 | # Django stuff:
57 | *.log
58 | 
59 | # Sphinx documentation
60 | docs/_build/
61 | 
62 | # PyBuilder
63 | target/
64 | 
65 | # OSX .DS_Store 
66 | .DS_Store 
67 | 
68 | # IPython notebook checkpoints
69 | .ipynb_checkpoints/
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Tom Pollard
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MIMIC Critical Care Datathon
 2 | 
 3 | These are training materials for the MIMIC Critical Care Database. The package includes:
 4 | 
 5 | - a demo version of MIMIC which can be quickly installed in the Firefox web browser with the SQLite Plugin.
 6 | - some sample SQL queries which can be used to query the MIMIC data
 7 | - an IPython Notebook which connects to the demo MIMIC database and allows analysis to be carried out using Python.
 8 | 
 9 | ## What is MIMIC-III?
10 | 
11 | MIMIC-III is a widely-used, freely available dataset developed by the MIT Lab for Computational Physiology, comprising deidentified health data associated with >40,000 critical care patients. It includes demographics, vital signs, laboratory tests, medications, and more. Details are available on the MIMIC website: https://mimic.physionet.org/
12 | 
13 | ## Workshop overview 
14 | 
15 | During the workshop, you will:
16 | 
17 | - Learn about MIMIC-III, the publicly accessible critical care database 
18 | - Create a local version of MIMIC-III with a small sample of patients using the Firefox SQLite Plugin
19 | - Explore the patient data using SQL
20 | - Plot and analyse the data using Python
21 | - Get inspiration for future research projects
22 | 
23 | ## Downloading the materials
24 | 
25 | If you are familiar with git, please clone this repository. If not, click the
26 | 'Download ZIP' button on the right and then unzip the materials onto your
27 | computer.
28 | 
29 | ## Installing a demo version of MIMIC-III with SQLite Manager
30 | 
31 | To create the database on your computer, you will need the Firefox SQLite Manager Add-on. Open Firefox, select "Add-ons" from the Tools menu, and then install SQLite Manager. To create the demo database, select "connect to database" from the menu and choose the data/mimicdata.sqlite file.
32 | 
33 | ## Analysing the data using IPython Notebook
34 | 
35 | To analyse the data using IPython Notebook:
36 | 
37 | - If you already have Python and the Pip package manager, run ```pip install ipython```
38 | - If you are new to Python, we suggest installing the Anaconda package from https://www.continuum.io/downloads. Then run ```conda update ipython```.
39 | 
40 | Once IPython is installed, run ```ipython notebook``` from the command line to open IPython Notebook, then open one of the notebook (.ipynb) files (for example, 01-example-patient-heart-failure.ipynb).
41 | 
42 | ## Getting access to the full MIMIC-III dataset
43 | 
44 | If after this workshop you would like to gain access to the full MIMIC-III dataset, which contains rich data for over 40,000 patients, please see: https://mimic.physionet.org/gettingstarted/access/
45 | 
46 | ## Help to improve the workshop
47 | 
48 | We hope to improve the workshop contents over time and we welcome your contributions. Please raise an issue and/or submit a pull request!
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/data/mimicdata.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MIT-LCP/mimic-workshop/b27eee438a1f62d909dd30d1d458d3516f32b276/data/mimicdata.sqlite


--------------------------------------------------------------------------------
/example-patient/example_patient_matlab.m:
--------------------------------------------------------------------------------
  1 | 
  2 | %% Load chartevents for the patient
  3 | % load the various files
  4 | fp = fopen('example-patient-chartevents.csv');
  5 | header_ce = fgetl(fp);
  6 | 
  7 | % convert header from a string to a cell array of strings
  8 | header_ce = regexp(header_ce,',','split');
  9 | 
 10 | frmt = '%f%f%f%s%f%q%q';
 11 | data_ce = textscan(fp,frmt,'delimiter',',');
 12 | fclose(fp);
 13 | 
 14 | % Let's extract the numeric data only into data_ce - and put string data into data_ce_str
 15 | idxNumeric = cellfun(@isnumeric, data_ce);
 16 | data_ce_str = [data_ce{~idxNumeric}];
 17 | header_ce_str = header_ce(~idxNumeric);
 18 | data_ce = [data_ce{idxNumeric}];
 19 | header_ce = header_ce(idxNumeric);
 20 | 
 21 | % here's a preview of the string data
 22 | header_ce_str
 23 | data_ce_str(1:5,:)
 24 | 
 25 | % here's a preview of the numeric data ('\t' is a tab)
 26 | fprintf('%8s\t',header_ce{:});
 27 | fprintf('\n')
 28 | 
 29 | frmt = '%8g\t%8.2f\t%8g\t%8.2f';
 30 | for n=1:5
 31 |     fprintf(frmt,data_ce(n,:));
 32 |     fprintf('\n');
 33 | end
 34 | 
 35 | %% Load the other events tables
 36 | % Time to load in the rest of the data!
 37 | % LAB DATA
 38 | frmt = '%f%f%f%s%f%q%q';
 39 | 
 40 | fp = fopen('example-patient-labevents.csv');
 41 | header_le = fgetl(fp);
 42 | header_le = regexp(header_le,',','split');
 43 | data_le = textscan(fp,frmt,'delimiter',',');
 44 | fclose(fp);
 45 | idxNumeric = cellfun(@isnumeric, data_le);
 46 | data_le_str = [data_le{~idxNumeric}];
 47 | header_le_str = header_le(~idxNumeric);
 48 | data_le = [data_le{idxNumeric}];
 49 | header_le = header_le(idxNumeric);
 50 | 
 51 | % INPUT DATA
 52 | frmt = '%f%f%f%f%f%q%f%q%f%q';
 53 | 
 54 | fp = fopen('example-patient-inputevents.csv');
 55 | header_ie = fgetl(fp);
 56 | header_ie = regexp(header_ie,',','split');
 57 | data_ie = textscan(fp,frmt,'delimiter',',');
 58 | fclose(fp);
 59 | idxNumeric = cellfun(@isnumeric, data_ie);
 60 | data_ie_str = [data_ie{~idxNumeric}];
 61 | header_ie_str = header_ie(~idxNumeric);
 62 | data_ie = [data_ie{idxNumeric}];
 63 | header_ie = header_ie(idxNumeric);
 64 | 
 65 | 
 66 | % OUTPUT DATA
 67 | frmt = '%f%f%f%f%q%q';
 68 | 
 69 | fp = fopen('example-patient-outputevents.csv');
 70 | header_oe = fgetl(fp);
 71 | header_oe = regexp(header_oe,',','split');
 72 | data_oe = textscan(fp,frmt,'delimiter',',');
 73 | fclose(fp);
 74 | idxNumeric = cellfun(@isnumeric, data_oe);
 75 | data_oe_str = [data_oe{~idxNumeric}];
 76 | header_oe_str = header_oe(~idxNumeric);
 77 | data_oe = [data_oe{idxNumeric}];
 78 | header_oe = header_oe(idxNumeric);
 79 | 
 80 | % PROCEDURE DATA
 81 | frmt = '%f%f%f%f%f%q%f%q';
 82 | 
 83 | fp = fopen('example-patient-procedureevents.csv');
 84 | header_pe = fgetl(fp);
 85 | header_pe = regexp(header_pe,',','split');
 86 | data_pe = textscan(fp,frmt,'delimiter',',');
 87 | fclose(fp);
 88 | idxNumeric = cellfun(@isnumeric, data_pe);
 89 | data_pe_str = [data_pe{~idxNumeric}];
 90 | header_pe_str = header_pe(~idxNumeric);
 91 | data_pe = [data_pe{idxNumeric}];
 92 | header_pe = header_pe(idxNumeric);
 93 | 
 94 | %% Initialize some plotting variables
 95 | % Some variables used to make pretty plots
 96 | col = [0.9047    0.1918    0.1988
 97 |     0.2941    0.5447    0.7494
 98 |     0.3718    0.7176    0.3612
 99 |     1.0000    0.5482    0.1000
100 |     0.4550    0.4946    0.4722
101 |     0.6859    0.4035    0.2412
102 |     0.9718    0.5553    0.7741
103 |     0.5313    0.3359    0.6523];
104 | marker = {'d','+','o','x','>','s','<','+','^'};
105 | ms = repmat(8,1,numel(marker));
106 | savefigflag=0;
107 | %% Plot the vital signs
108 | figure(1); clf; hold all;
109 | example_patient_matlab_ce;
110 | 
111 | %% Plot the labs
112 | figure(1); clf; hold all;
113 | example_patient_matlab_le;
114 | 
115 | %% add in IOEVENTS
116 | figure(1); clf; hold all;
117 | example_patient_matlab_ie;
118 | 
119 | %% putting it all together
120 | figure(1); clf;
121 | 
122 | subplot(3,1,1); hold all;
123 | example_patient_matlab_ce;
124 | subplot(3,1,2); hold all;
125 | example_patient_matlab_le;
126 | subplot(3,1,3); hold all;
127 | example_patient_matlab_ie;
128 | P_PrettyFigure(1);
129 | 
130 | if savefigflag==1
131 | export_fig(1,'exampledata10.png','-transparent');
132 | end
133 | 


--------------------------------------------------------------------------------
/example-patient/example_patient_matlab_ce.m:
--------------------------------------------------------------------------------
 1 | 
 2 | lbl_plot = {'Arterial Blood Pressure mean','Heart Rate','O2 saturation pulseoxymetry','Respiratory Rate'};
 3 | % plot the values
 4 | for k=1:numel(lbl_plot)
 5 |     idxPlot = ismember(data_ce_str(:,3), lbl_plot{k});
 6 |     plot(data_ce(idxPlot,2), data_ce(idxPlot,4),marker{k},...
 7 |         'Color',col(k,:), 'MarkerFaceColor',col(k,:), 'markersize',ms(k), 'linewidth',2);
 8 | end
 9 | 
10 | 
11 | set(gca,'XLim',[0,72],'YLim',[0,150]);
12 | set(gca,'YTick',0:25:150);
13 | 
14 | xlabel('Hours since admission','FontSize',16);
15 | ylabel('Value of measurement','FontSize',16);
16 | 
17 | %=== add in the legend
18 | legend_str = {'Mean arterial blood pressure','Heart Rate','Peripheral oxygen saturation','Respiratory Rate'};
19 | 
20 | % dummy figure to provide the legend
21 | hleg=legend(legend_str,'Location','NorthEast');
22 | set(gca,'FontSize',16);
23 | grid on;
24 | 
25 | %% add in GCS
26 | lbl_keep = {'GCS - Eye Opening';'GCS - Motor Response';'GCS - Verbal Response'};
27 | lbl_plot = (135:20:175)+2;
28 | 
29 | % plot the values
30 | for k=1:numel(lbl_keep)
31 |     idxPlot = ismember(data_ce_str(:,3), lbl_keep{k});
32 |     data_plot = data_ce_str(idxPlot,1);
33 |     time_plot = data_ce(idxPlot,2);
34 | 
35 |     idxM = find(time_plot < 72);
36 | %     idxM = idxM(1:4:end);
37 |     idxM = idxM(:)';
38 |     for m=idxM
39 |     text(time_plot(m),...
40 |         lbl_plot(k),... % y-axis location, defined above
41 |         data_plot{m},...
42 |         'FontName','Helvetica','FontSize',14);
43 |     end
44 | end
45 | lbl_keep = strrep(lbl_keep,'GCS - ','');
46 | 
47 | % add the GCS stuff to the y-axis
48 | set(gca,'YLim',[0,200],'YTick',[0:50:100,135,155,175,200],...
49 |      'YTickLabel',{'0','50','100',lbl_keep{1},lbl_keep{2},lbl_keep{3},'200'});
50 | if savefigflag==1
51 | export_fig(1,'exampledata3.png','-transparent');
52 | end
53 | %% add in labs
54 | le_lbl = unique(data_le_str(:,3));
55 | 
56 | lbl_keep = {'CREATININE';
57 |     'HEMOGLOBIN'};
58 | 
59 | 
60 | % plot the values
61 | for k=1:numel(lbl_keep)
62 |     idxPlot = ismember(data_le_str(:,3), lbl_keep{k});
63 |     plot(data_le(idxPlot,2), data_le(idxPlot,4),marker{k+4},...
64 |         'Color',[0,0,0], 'markerfacecolor',col(k+4,:),...
65 |         'markersize',12,'linewidth',2);
66 | end
67 | 
68 | legend_str = legend_str(:)';
69 | legend_str = [legend_str,lbl_keep'];
70 | legend(legend_str,'Location','NorthEast');
71 | 
72 | if savefigflag==1
73 | export_fig(1,'exampledata4.png','-transparent');
74 | end


--------------------------------------------------------------------------------
/example-patient/example_patient_matlab_ie.m:
--------------------------------------------------------------------------------
  1 | %% Plot pain/sedation medication
  2 | lbl1 = {'Midazolam (Versed)','Propofol','Fentanyl'};
  3 | for k=1:numel(lbl1)
  4 |     idxPlot = ismember(data_ie_str(:,3), lbl1{k});
  5 | 
  6 |     % time start/stop
  7 |     time_plot = data_ie(idxPlot,2:3);
  8 | 
  9 |     % rate start/stop
 10 |     data_plot = data_ie(idxPlot,5:6);
 11 |     
 12 |     idxPlot = find(time_plot(:,1) < 72); % only plot drug infusions in first 24 hr
 13 |     idxPlot = idxPlot(:)'; % ensure it is a row vector for "for" loop
 14 |     for m=idxPlot
 15 |     % starting marker
 16 |     plot(time_plot(m,1), data_plot(m,2), '<',...
 17 |         'color',col(k,:), 'markerfacecolor',col(k,:),...
 18 |         'linewidth',3,'markersize',8,...
 19 |         'HandleVisibility', 'off');
 20 | 
 21 |     % ending marker
 22 |     plot(time_plot(m,2), data_plot(m,2), '>',...
 23 |         'color',col(k,:), 'markerfacecolor',col(k,:),...
 24 |         'linewidth',3,'markersize',8,...
 25 |         'HandleVisibility', 'off');
 26 |     
 27 |     % ensure the plot line only appears in the legend once
 28 |     if m==idxPlot(end)
 29 |         visib='on';
 30 |     else
 31 |         visib='off';
 32 |     end
 33 |     
 34 |     % connecting line
 35 |     plot(time_plot(m,1:2), repmat(data_plot(m,2),1,2), '-',...
 36 |         'color',col(k,:), 'markerfacecolor',col(k,:),...
 37 |         'linewidth',3,'markersize',8,...
 38 |         'HandleVisibility', visib);
 39 |     end
 40 | end
 41 | 
 42 | legend_str = lbl1(:)';
 43 | legend(legend_str,'Location','NorthEast');
 44 | 
 45 | set(gca,'XLim',[0,72],'YLim',[0,200]);
 46 | 
 47 | xlabel('Hours since admission','FontSize',16);
 48 | ylabel('Value of measurement','FontSize',16);
 49 | 
 50 | P_PrettyFigure(1);
 51 | if savefigflag==1
 52 | export_fig(1,'exampledata6.png','-transparent');
 53 | end
 54 | 
 55 | % %% OR data
 56 | % idxKeep = data_ie(:,2)<72;
 57 | % ie_lbl = unique(data_ie_str(idxKeep,3));
 58 | % 
 59 | % lbl1 = {'OR Cryoprecipitate Intake';
 60 | %     'OR Crystalloid Intake';'OR FFP Intake';
 61 | %     'OR Packed RBC Intake';'OR Platelet Intake'};
 62 | % for k=1:numel(lbl1)
 63 | %     idxPlot = ismember(data_ie_str(:,3), lbl1{k});
 64 | % 
 65 | %     % time start/stop
 66 | %     time_plot = data_ie(idxPlot,2:3);
 67 | % 
 68 | %     % rate start/stop
 69 | %     data_plot = data_ie(idxPlot,5:6);
 70 | % 
 71 | %     % for OR volumes, it's always a bolus over 1 minute
 72 | %     plot(time_plot(1,1), data_plot(1,1)/100, 's',...
 73 | %         'color',[0,0,0], 'markerfacecolor',col(k+1,:),...
 74 | %         'linewidth',3,'markersize',10);
 75 | % end
 76 | % 
 77 | % ylabel('OR blood (mL/100)');
 78 | % legend_str = [legend_str,lbl1(:)'];
 79 | % legend(legend_str,'Location','NorthEast');
 80 | % 
 81 | % if savefigflag==1
 82 | % export_fig(1,'exampledata7.png','-transparent');
 83 | % end
 84 | %%
 85 | lbl1 = {'LR'};
 86 | for k=1:numel(lbl1)
 87 |     idxPlot = ismember(data_ie_str(:,3), lbl1{k});
 88 |     
 89 |     
 90 |     % time start/stop
 91 |     time_plot = data_ie(idxPlot,2:3);
 92 |     
 93 |     % rate start/stop
 94 |     data_plot = data_ie(idxPlot,5:6);
 95 |     
 96 |     M=3;
 97 |     for m=1:M
 98 |             % starting marker
 99 |             plot(time_plot(m,1), data_plot(m,2), '<',...
100 |                 'color',col(k+7,:), 'markerfacecolor',col(k+7,:),...
101 |                 'linewidth',3,'markersize',8,...
102 |                 'HandleVisibility', 'off');
103 |             
104 |             
105 |             % ending marker
106 |             plot(time_plot(m,2), data_plot(m,2), '>',...
107 |                 'color',col(k+7,:), 'markerfacecolor',col(k+7,:),...
108 |                 'linewidth',3,'markersize',8,...
109 |                 'HandleVisibility', 'off');
110 |             
111 |             % ensure the plot line only appears in the legend once
112 |             if m==M
113 |                 visib='on';
114 |             else
115 |                 visib='off';
116 |             end
117 |             
118 |             % connecting line
119 |             plot(time_plot(m,1:2), repmat(data_plot(m,2),1,2), '-',...
120 |                 'color',col(k+7,:), 'markerfacecolor',col(k+7,:),...
121 |                 'linewidth',3,'markersize',8,...
122 |                 'HandleVisibility', visib);
123 |     end
124 |     
125 |     %=== plot bolus at M=4
126 |     m=4;
127 |     plot(time_plot(m,1), data_plot(m,1)/10, 's',...
128 |         'color',col(k+7,:), 'markerfacecolor',col(k+7,:),...
129 |         'linewidth',3,'markersize',10);
130 | end
131 | 
132 | legend_str = [legend_str,lbl1(:)',strcat(lbl1(:)',' Bolus')];
133 | legend(legend_str,'Location','NorthEast');
134 | 
135 | 
136 | if savefigflag==1
137 | export_fig(1,'exampledata8.png','-transparent');
138 | end


--------------------------------------------------------------------------------
/example-patient/example_patient_matlab_le.m:
--------------------------------------------------------------------------------
 1 | %% Plot the labs
 2 | le_lbl = unique(data_le_str(:,3));
 3 | marker = {'d','+','o','x','>','d','<','+','^'};
 4 | lbl_keep = {'CREATININE';
 5 |     'HEMOGLOBIN';
 6 |     'PCO2';
 7 |     'PO2';
 8 |     'LACTATE'};
 9 | 
10 | 
11 | % plot the values
12 | for k=1:numel(lbl_keep)
13 |     idxPlot = ismember(data_le_str(:,3), lbl_keep{k});
14 |     data_plot = data_le(idxPlot,4);
15 |     if ismember(lbl_keep{k},{'PO2','PCO2'})==1
16 |         % convert to kPa
17 |         data_plot = data_plot / 7.500617;
18 |     end
19 |     plot(data_le(idxPlot,2), data_plot, ['--' marker{k}],...
20 |         'Color',col(k,:), 'markerfacecolor',col(k,:),...
21 |         'markersize',12,'linewidth',2);
22 | end
23 | 
24 | legend_str = lbl_keep(:)';
25 | legend(lbl_keep,'Location','NorthEast');
26 | 
27 | set(gca,'XLim',[0,72],'YLim',[0,25]);
28 | 
29 | xlabel('Hours since admission','FontSize',16);
30 | ylabel('Value of measurement','FontSize',16);
31 | 
32 | P_PrettyFigure(1);
33 | if savefigflag==1
34 |     
35 | legend(lbl_keep,'Location','NorthWest');
36 | export_fig(1,'exampledata5.png','-transparent');
37 | end


--------------------------------------------------------------------------------
/example-patient/expt-query-1.sql:
--------------------------------------------------------------------------------
 1 | select ie.icustay_id
 2 |     , di.label
 3 |     , round( (julianday(de.charttime) - julianday(ie.intime))*24, 4) as Hours
 4 |     , de.itemid
 5 |     , de.value
 6 |     , de.valuenum
 7 | from icustays ie
 8 | inner join chartevents de
 9 |   on ie.icustay_id = de.icustay_id 
10 | inner join d_items di
11 | on de.itemid = di.itemid
12 | where ie.hadm_id = 103075
13 | order by charttime


--------------------------------------------------------------------------------
/example-patient/expt-query-2.sql:
--------------------------------------------------------------------------------
 1 | select ie.icustay_id
 2 |   , di.label
 3 |   , round( (julianday(de.charttime) - julianday(ie.intime))*24, 4) as Hours
 4 |   , de.itemid
 5 |   , de.value
 6 |   , de.valuenum
 7 | from icustays ie
 8 | inner join labevents de 
 9 |     on de.hadm_id = ie.hadm_id
10 | inner join d_labitems di
11 |     on de.itemid = di.itemid
12 | where de.hadm_id = 103075
13 | order by charttime


--------------------------------------------------------------------------------
/example-patient/expt-query-3.sql:
--------------------------------------------------------------------------------
 1 | select de.icustay_id
 2 |   , di.label
 3 |   , round( (julianday(de.charttime) - julianday(ie.intime))*24, 4) as HOURS
 4 |   , de.itemid
 5 |   , de.value
 6 |   , de.value as valuenum
 7 | from icustays ie
 8 | inner join outputevents de
 9 |     on de.icustay_id = ie.icustay_id
10 | inner join d_items di
11 |     on de.itemid = di.itemid
12 | where de.hadm_id = 103075
13 | order by charttime


--------------------------------------------------------------------------------
/example-patient/expt-query-4.sql:
--------------------------------------------------------------------------------
 1 | select de.icustay_id
 2 |   , di.label
 3 |   , round( (julianday(de.charttime) - julianday(ie.intime))*24, 4) as HOURS
 4 |   , de.itemid
 5 |   , de.amount
 6 |   , de.amountuom
 7 |   , de.rate
 8 |   , de.rateuom
 9 | from icustays ie
10 | inner join inputevents_cv de
11 |     on de.icustay_id = ie.icustay_id
12 | inner join d_items di
13 |     on de.itemid = di.itemid
14 | where de.hadm_id = 103075
15 | order by charttime


--------------------------------------------------------------------------------
/example-patient/expt-query-to-csv.sql:
--------------------------------------------------------------------------------
 1 | -- This script exports data for a single patient from a PostgreSQL instance of MIMIC-III to CSV.
 2 | -- You may need to change the paths to match your local system.
 3 | -- You may also need to set the PostgreSQL search path to the schema with MIMIC-III.
 4 | 
 5 | -- This version extracts data for
 6 | 
 7 |   -- This script exports data for a single patient from a PostgreSQL instance of MIMIC-III to CSV.
 8 |   -- You may need to change the paths to match your local system.
 9 |   -- You may also need to set the PostgreSQL search path to the schema with MIMIC-III.
10 | 
11 |   -- CHARTED DATA
12 |   Copy (
13 |     select ie.icustay_id
14 |         , di.label
15 |         , round(extract(EPOCH from (de.charttime-ie.intime)) :: NUMERIC / 360,4) as HOURS
16 |         , de.itemid
17 |         , de.value
18 |         , de.valuenum
19 |     from icustays ie
20 |     inner join chartevents de
21 |       on ie.icustay_id = de.icustay_id
22 |     inner join d_items di
23 |     on de.itemid = di.itemid
24 |     where ie.hadm_id = 103075
25 |     order by charttime
26 |   ) To '/data/mimic3/example-patient-chartevents.csv' With CSV HEADER;
27 | 
28 | 
29 |   -- LAB DATA
30 |   Copy (
31 |     select ie.icustay_id
32 |       , di.label
33 |       , round(extract(EPOCH from (de.charttime-ie.intime)) :: NUMERIC / 360,4) as HOURS
34 |       , de.itemid
35 |       , de.value
36 |       , de.valuenum
37 |     from icustays ie
38 |     inner join labevents de
39 |         on de.hadm_id = ie.hadm_id
40 |     inner join d_labitems di
41 |         on de.itemid = di.itemid
42 |     where de.hadm_id = 103075
43 |     order by charttime
44 |   ) To '/data/mimic3/example-patient-labevents.csv' With CSV HEADER;
45 | 
46 | 
47 |   -- OUTPUT DATA
48 |   Copy (
49 |     select de.icustay_id
50 |       , di.label
51 |       , round(extract(EPOCH from (de.charttime-ie.intime)) :: NUMERIC / 360,4) as HOURS
52 |       , de.itemid
53 |       , de.value
54 |       , de.value as valuenum
55 |     from icustays ie
56 |     inner join outputevents de
57 |         on de.icustay_id = ie.icustay_id
58 |     inner join d_items di
59 |         on de.itemid = di.itemid
60 |     where de.hadm_id = 103075
61 |     order by charttime
62 |   ) To '/data/mimic3/example-patient-outputevents.csv' With CSV HEADER;
63 | 
64 | 
65 |   -- INPUT DATA
66 |   Copy (
67 |     select de.icustay_id
68 |       , di.label
69 |       , round(extract(EPOCH from (de.charttime-ie.intime)) :: NUMERIC / 360,4) as HOURS
70 |       , de.itemid
71 |       , de.amount
72 |       , de.amountuom
73 |       , de.rate
74 |       , de.rateuom
75 |     from icustays ie
76 |     inner join inputevents_cv de
77 |         on de.icustay_id = ie.icustay_id
78 |     inner join d_items di
79 |         on de.itemid = di.itemid
80 |     where de.hadm_id = 103075
81 |     order by charttime
82 |   ) To '/data/mimic3/example-patient-inputevents.csv' With CSV HEADER;
83 | 


--------------------------------------------------------------------------------
/example-patient/mlcc1_example_patient.m:
--------------------------------------------------------------------------------
  1 | %%  Plot data for an example patient
  2 | 
  3 | %% 1 - Initialize some plotting variables
  4 | % Some variables used to make pretty plots
  5 | col = [0.9047    0.1918    0.1988
  6 |     0.2941    0.5447    0.7494
  7 |     0.3718    0.7176    0.3612
  8 |     1.0000    0.5482    0.1000
  9 |     0.4550    0.4946    0.4722
 10 |     0.6859    0.4035    0.2412
 11 |     0.9718    0.5553    0.7741
 12 |     0.5313    0.3359    0.6523];
 13 | 
 14 | col = repmat(col,2,1);
 15 | col_fill = col;
 16 | col(9:end,:) = 0; % when plotting > 8 items, we make the outline black
 17 | 
 18 | marker = {'d','+','o','x','>','s','<','+','^'};
 19 | marker = repmat(marker,1,2);
 20 | ms = 12;
 21 | savefigflag=0;
 22 | 
 23 | %% 2 - SQLite instructions
 24 | % STEP 1: Tell Matlab where the driver is
 25 | javaclasspath('sqlite-jdbc-3.8.11.2.jar') % use this for SQLite
 26 | 
 27 | % STEP 2: Connect to the Database
 28 | conn = database('','','',...
 29 |     'org.sqlite.JDBC',['jdbc:sqlite:' pwd filesep 'data' filesep 'mimiciii_v1_3_mini.sqlite']);
 30 | 
 31 | 
 32 | % Note: Amazon RDS instructions - will be slower as it is the full database
 33 | % % STEP 1: Tell Matlab where the driver is
 34 | % javaclasspath('postgresql-9.4.1207.jre6.jar') % use this for Amazon
 35 | % 
 36 | % % STEP 2: Connect to the Database
 37 | % conn = database('MIMIC','testuser','mitmlcctu','Vendor','sqlite',...
 38 | %                 'Server','<xxxxx>.amazonaws.com',...
 39 | %                 'PortNumber',5432);
 40 | 
 41 | 
 42 | %% 3 - Run the query to extract chartevents data
 43 | query = makeQuery('expt-query-1.sql');
 44 | data_ce = fetch(conn,query);
 45 | 
 46 | %% 4 - Plot patient vital signs
 47 | figure(1); clf; hold all;
 48 | 
 49 | lbl_plot = {'Arterial BP Mean',...
 50 |     'Heart Rate',...
 51 |     'SpO2',...
 52 |     'Respiratory Rate'};
 53 | 
 54 | % loop through the above list of labels
 55 | for k=1:numel(lbl_plot)
 56 |     % create an index for only the label we are interested in
 57 |     idxPlot = ismember(data_ce(:,2), lbl_plot{k});
 58 |     
 59 |     % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value
 60 |     data_plot = cell2mat(data_ce(idxPlot,6));
 61 |     time_plot = cell2mat(data_ce(idxPlot,3));
 62 |     
 63 |     % plot the data for this label
 64 |     plot(time_plot, data_plot,...
 65 |         'LineStyle','--', 'Marker',marker{k},...
 66 |         'Color', col(k,:), 'MarkerFaceColor', col_fill(k,:),...
 67 |         'markersize', ms, 'linewidth',2);
 68 | end
 69 | 
 70 | set(gca,'XLim',[0,72],'YLim',[0,150]);
 71 | set(gca,'YTick',0:25:150);
 72 | 
 73 | xlabel('Hours since ICU admission','FontSize',16);
 74 | ylabel('Value of measurement','FontSize',16);
 75 | 
 76 | % dummy figure to provide the legend
 77 | hleg=legend(lbl_plot,'Location','NorthEast');
 78 | set(gca,'FontSize',16);
 79 | grid on;
 80 | 
 81 | %% 5 - What else could you add to the above plot? Add labels to lbl_plot.
 82 | 
 83 | % here is a list of the available labels:
 84 | unique(data_ce(:,2))
 85 | 
 86 | 
 87 | %% 6 - Extract lab values
 88 | query = makeQuery('expt-query-2.sql');
 89 | data_le = fetch(conn,query);
 90 | 
 91 | %% 7 - Plot lab values
 92 | figure(1); clf; hold all;
 93 | lbl_plot = {'CREATININE','HEMOGLOBIN','LACTATE'};
 94 | 
 95 | % plot the values
 96 | for k=1:numel(lbl_plot)
 97 |     
 98 |     % create an index for only the label we are interested in
 99 |     idxPlot = ismember(data_le(:,2), lbl_plot{k});
100 |     
101 |     % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value
102 |     data_plot = cell2mat(data_le(idxPlot,6));
103 |     time_plot = cell2mat(data_le(idxPlot,3));
104 |     
105 |     % plot the data for this label
106 |     plot(time_plot, data_plot,...
107 |         'LineStyle','--','Marker',marker{k},...
108 |         'Color',col(k,:), 'markerfacecolor',col_fill(k,:),...
109 |         'markersize',ms,'linewidth',2);
110 | end
111 | legend(lbl_plot,'Location','NorthEast');
112 | set(gca,'XLim',[0,72],'YLim',[0,25],'FontSize',14);
113 | grid on;
114 | 
115 | xlabel('Hours since ICU admission','FontSize',16);
116 | ylabel('Value of measurement','FontSize',16);
117 | 
118 | 
119 | %% 8 - What else could you add to the above plot? Add labels to lbl_plot.
120 | 
121 | % here is a list of the available labels:
122 | unique(data_le(:,2))
123 | 
124 | %% 9 - Extract output values
125 | query = makeQuery('expt-query-3.sql');
126 | data_oe = fetch(conn,query);
127 | 
128 | %% 10 - Plot the outputs
129 | figure(1); clf; hold all;
130 | lbl_plot = {'Urine Out Foley'};
131 | for k=1:numel(lbl_plot)
132 |     
133 |     % create an index for only the label we are interested in
134 |     idxPlot = ismember(data_oe(:,2), lbl_plot{k});
135 |     
136 |     % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value
137 |     data_plot = cell2mat(data_oe(idxPlot,6));
138 |     time_plot = cell2mat(data_oe(idxPlot,3));
139 |     
140 |     plot(time_plot, data_plot,...
141 |         'LineStyle','--','Marker',marker{k},...
142 |         'color',col(k,:), 'markerfacecolor',col_fill(k,:),...
143 |         'linewidth',2,'markersize',ms);
144 | end
145 | 
146 | legend(lbl_plot,'Location','NorthEast');
147 | set(gca,'XLim',[0,72],'YLim',[0,1000],'FontSize',14);
148 | 
149 | xlabel('Hours since ICU admission','FontSize',16);
150 | ylabel('Value of measurement','FontSize',16);
151 | grid on;
152 | 
153 | %% 11 - What else could you add to the above plot? Add labels to lbl_plot.
154 | 
155 | % here is a list of the available labels:
156 | unique(data_oe(:,2))
157 | 
158 | %% 12 - Extract input values
159 | query = makeQuery('expt-query-4.sql');
160 | data_ie = fetch(conn,query);
161 | 
162 | % this is a fix to replace empty cells with "NaN"
163 | % SQL represents missing values as empty, but MATLAB represents them as NaN
164 | data_ie(cellfun(@isempty, data_ie(:,5)),5) = {NaN};
165 | data_ie(cellfun(@isempty, data_ie(:,7)),7) = {NaN};
166 | %% 13 - Plot the inputs
167 | figure(1); clf; hold all;
168 | lbl_plot = {'Neosynephrine-k','Propofol'};
169 | for k=1:numel(lbl_plot)
170 |     
171 |     % create an index for only the label we are interested in
172 |     idxPlot = ismember(data_ie(:,2), lbl_plot{k});
173 |     
174 |     % the 3rd column is the time
175 |     % for inputs, the order is slightly different:
176 |     %   the 5th column is the VOLUME
177 |     %   the 7th column is the RATE
178 |     data_plot = cell2mat(data_ie(idxPlot,7));
179 |     time_plot = cell2mat(data_ie(idxPlot,3));
180 |     
181 |     plot(time_plot, data_plot,...
182 |         'LineStyle','--','Marker',marker{k},...
183 |         'color',col(k,:), 'markerfacecolor',col_fill(k,:),...
184 |         'linewidth',2,'markersize',ms);
185 | end
186 | 
187 | legend(lbl_plot,'Location','NorthWest');
188 | set(gca,'XLim',[0,72],'YLim',[0,100],'FontSize',14);
189 | 
190 | grid on;
191 | 
192 | xlabel('Hours since ICU admission','FontSize',16);
193 | ylabel('Value of measurement','FontSize',16);
194 | 
195 | 
196 | %% 14 - What else could you add to the above plot? Add labels to lbl_plot.
197 | 
198 | % here is a list of the available labels:
199 | unique(data_ie(:,2))
200 | 
201 | 
202 | %% 15 - Bring it all together
203 | lbl_ce = {'Arterial BP Mean','Heart Rate','SpO2','Respiratory Rate'};
204 | lbl_le = {'CREATININE','HEMOGLOBIN','LACTATE'};
205 | lbl_oe = {'Urine Out Foley'};
206 | lbl_ie = {'Neosynephrine-k','Propofol'};
207 | 
208 | figure(1); clf; hold all;
209 | 
210 | k_offset = 0;
211 | % Plot the chart values
212 | for k=1:numel(lbl_ce)
213 |     % create an index for only the label we are interested in
214 |     idxPlot = ismember(data_ce(:,2), lbl_ce{k});
215 |     
216 |     % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value
217 |     data_plot = cell2mat(data_ce(idxPlot,6));
218 |     time_plot = cell2mat(data_ce(idxPlot,3));
219 |     
220 |     % plot the data for this label
221 |     plot(time_plot, data_plot, marker{k},...
222 |         'Color', col(k+k_offset,:), 'MarkerFaceColor', col_fill(k+k_offset,:),...
223 |         'markersize', ms, 'linewidth',2);
224 | end
225 | k_offset=k_offset+k;
226 | 
227 | % Plot the lab values
228 | for k=1:numel(lbl_le)
229 |     
230 |     % create an index for only the label we are interested in
231 |     idxPlot = ismember(data_le(:,2), lbl_le{k});
232 |     
233 |     % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value
234 |     data_plot = cell2mat(data_le(idxPlot,6));
235 |     time_plot = cell2mat(data_le(idxPlot,3));
236 |     
237 |     % plot the data for this label
238 |     plot(time_plot, data_plot,...
239 |         'LineStyle','--','Marker',marker{k+k_offset},...
240 |         'Color',col(k+k_offset,:), 'markerfacecolor',col_fill(k+k_offset,:),...
241 |         'markersize',ms,'linewidth',2);
242 | end
243 | k_offset=k_offset+k;
244 | 
245 | % Plot the outputs
246 | for k=1:numel(lbl_oe)
247 |     
248 |     % create an index for only the label we are interested in
249 |     idxPlot = ismember(data_oe(:,2), lbl_oe{k});
250 |     
251 |     % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value
252 |     data_plot = cell2mat(data_oe(idxPlot,6));
253 |     time_plot = cell2mat(data_oe(idxPlot,3));
254 |     
255 |     plot(time_plot, data_plot,...
256 |         'LineStyle','--','Marker',marker{k+k_offset},...
257 |         'color',col(k+k_offset,:), 'markerfacecolor',col_fill(k+k_offset,:),...
258 |         'linewidth',2,'markersize',ms);
259 | end
260 | k_offset=k_offset+k;
261 | 
262 | % Plot the inputs
263 | for k=1:numel(lbl_ie)
264 |     % create an index for only the label we are interested in
265 |     idxPlot = ismember(data_ie(:,2), lbl_ie{k});
266 |     
267 |     % the 3rd column is the time
268 |     % for inputs, the order is slightly different:
269 |     %   the 5th column is the VOLUME
270 |     %   the 7th column is the RATE
271 |     data_plot = cell2mat(data_ie(idxPlot,7));
272 |     time_plot = cell2mat(data_ie(idxPlot,3));
273 |     
274 |     plot(time_plot, data_plot,...
275 |         'LineStyle','--','Marker',marker{k+k_offset},...
276 |         'color',col(k+k_offset,:), 'markerfacecolor',col_fill(k+k_offset,:),...
277 |         'linewidth',2,'markersize',ms);
278 | end
279 | k_offset=k_offset+k;
280 | 
281 | 
282 | 
283 | legend([lbl_ce, lbl_le, lbl_oe, lbl_ie],'Location','Best');
284 | xlabel('Hours since ICU admission','FontSize',16);
285 | ylabel('Value of measurement','FontSize',16);
286 | 
287 | 


--------------------------------------------------------------------------------
/installation/sqlite-manager/sqlite_manager-0.8.3-tb+sm+fx.xpi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MIT-LCP/mimic-workshop/b27eee438a1f62d909dd30d1d458d3516f32b276/installation/sqlite-manager/sqlite_manager-0.8.3-tb+sm+fx.xpi


--------------------------------------------------------------------------------
/intro_to_mimic/00-query-mimic.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Introduction to the MIMIC database
  3 | 
  4 | ## What is the MIMIC Critical Care Database?
  5 | 
  6 | MIMIC-III is an freely available relational database developed by the MIT Lab for Computational Physiology, comprising deidentified health data associated with >40,000 critical care patients. It includes demographics, vital signs, laboratory tests, medications, and more. MIMIC-III is used widely around the world in academic research, education, and industry. For further information, see: https://mimic.physionet.org/
  7 | 
  8 | ## Workshop overview
  9 | 
 10 | During the workshop, you will:
 11 | 
 12 | * Learn about MIMIC-III, the publicly accessible critical care database
 13 | * Create a local version of MIMIC-III with a small sample of patients using the Firefox SQLite Plugin
 14 | * Explore the patient data using SQL
 15 | * Plot and analyse the data using Python
 16 | * Get inspiration for future research projects
 17 | 
 18 | ## Set up a mini version of MIMIC-III on your computer
 19 | 
 20 | * MIMIC-III contains over 40,000 patients, but for the workshop we will be working with a subset of patients.
 21 | * To create the database on your computer, you will need to install Firefox and the Firefox SQLite Manager Add-on. Open Firefox, select "Add-ons" from the Tools menu, and then install SQLite Manager.
 22 | * After restarting Firefox, select "SQLite Manager" from the tools menu. In SQLite Manager, click "Connect Database" in the menu, and select the "data/mimicdata.sqlite" database file.
 23 | 
 24 | ## Start exploring the data with SQL
 25 | 
 26 | SQL stands for "structured query language". It is the standard language used for querying relational databases, which are databases comprising of several tables linked together by IDs.
 27 | 
 28 | TIP: queries are generally constructed using the following syntax:
 29 | 
 30 | ```sql
 31 | SELECT <columns>  
 32 | FROM <table>  
 33 | WHERE <constraints>;
 34 | ```
 35 | 
 36 | ### Select all of the columns ('\*') from the patients table
 37 | 
 38 | ```sql
 39 | SELECT *  
 40 | FROM patients;
 41 | ```
 42 | 
 43 | ### Select all of the columns ('\*') from the patients table where the patient is female
 44 | 
 45 | ```sql
 46 | SELECT *
 47 | FROM patients
 48 | WHERE gender = 'F';
 49 | ```
 50 | 
 51 | ### Select all of the columns ('\*') from the patients table for a single patient
 52 | 
 53 | ```sql
 54 | SELECT *
 55 | FROM patients
 56 | WHERE subject_id = 40080;
 57 | ```
 58 | 
 59 | ## More example queries
 60 | 
 61 | ### Combine the admissions and patients table using their common link, `subject_id`
 62 | 
 63 | ```sql
 64 | SELECT *
 65 | FROM patients
 66 | INNER JOIN admissions
 67 | ON patients.subject_id = admissions.subject_id;
 68 | ```
 69 | 
 70 | ### Subselect rows using the where clause
 71 | 
 72 | Here we select only the female ('F') patients.
 73 | 
 74 | ```sql
 75 | SELECT *
 76 | FROM patients
 77 | INNER JOIN admissions
 78 | ON patients.subject_id = admissions.subject_id
 79 | WHERE gender = 'F';
 80 | ```
 81 | 
 82 | ### Select a single patient by specifying their `subject_id`
 83 | 
 84 | Note that we need to specify which table the `subject_id` is sourced from (`patients.subject_id`).
 85 | This is because there are two `subject_id` columns: one from patients and the other from admissions.
 86 | SQL will not know which table to choose from, so you must specify it.
 87 | 
 88 | ```sql
 89 | SELECT *
 90 | FROM patients
 91 | INNER JOIN admissions
 92 | ON patients.subject_id = admissions.subject_id
 93 | WHERE gender = 'F'
 94 | AND patients.subject_id = 40080;
 95 | ```
 96 | 
 97 | ### Select only data from the patients table
 98 | 
 99 | We can use the table name with a wild card (\*) to specify all columns from that table.
100 | 
101 | ```sql
102 | SELECT patients.*
103 | FROM patients
104 | INNER JOIN admissions
105 | ON patients.subject_id = admissions.subject_id
106 | WHERE gender = 'F'
107 | AND patients.subject_id = 40080;
108 | ```
109 | 
110 | ### Select only data from the admissions table
111 | 
112 | Similarly, we can select only the columns in the admissions table.
113 | 
114 | ```sql
115 | SELECT admissions.*
116 | FROM patients
117 | INNER JOIN admissions
118 | ON patients.subject_id = admissions.subject_id
119 | WHERE gender = 'F'
120 | AND patients.subject_id = 40080;
121 | ```
122 | 
123 | ### Select single columns from a table
124 | 
125 | Instead of using the wild card, we can specify the columns we would like (in this case, DOB).
126 | 
127 | ```sql
128 | SELECT patients.DOB, admissions.*
129 | FROM patients
130 | INNER JOIN admissions
131 | ON patients.subject_id = admissions.subject_id
132 | WHERE gender = 'F'
133 | AND patients.subject_id = 40080;
134 | ```
135 | 
136 | ### Using aliases for convenience
137 | 
138 | Typing out admissions and patients over and over can be tedious. SQL allows aliases to be defined.
139 | Aliases are simply short hand for the full table name. An alias is defined by writing a word after the table name appears in either the FROM or JOIN clause.
140 | For example, we have defined the alias 'pat' for patients, and the alias 'adm' for admissions.
141 | Now, when we select from these tables, we use the alias name, *not* the table name.
142 | 
143 | ```sql
144 | SELECT pat.DOB, adm.*
145 | FROM patients pat
146 | INNER JOIN admissions adm
147 | ON pat.subject_id = adm.subject_id
148 | WHERE gender = 'F'
149 | AND pat.subject_id = 40080;
150 | ```
151 | 
152 | ### Select data for the same patient from chartevents
153 | 
154 | ```sql
155 | SELECT *
156 | FROM chartevents
157 | WHERE subject_id = 40080;
158 | ```
159 | 
160 | ### Define an alias for chartevents and select data from it
161 | 
162 | ```sql
163 | SELECT ce.*
164 | FROM chartevents ce
165 | WHERE subject_id = 40080;
166 | ```
167 | 
168 | ### Join to the d_items table to get a description of what the observations in chartevents are
169 | 
170 | ```sql
171 | SELECT ce.*, di.label
172 | FROM chartevents ce
173 | INNER JOIN d_items di
174 | ON ce.itemid = di.itemid
175 | WHERE subject_id = 40080;
176 | ```
177 | 


--------------------------------------------------------------------------------
/intro_to_mimic/01-example-patient-heart-failure.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exploring the trajectory of a single patient"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Import Python libraries"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "We first need to import some tools for working with data in Python. \n",
 22 |     "- NumPy is for working with numbers\n",
 23 |     "- Pandas is for analysing data\n",
 24 |     "- MatPlotLib is for making plots\n",
 25 |     "- Sqlite3 to connect to the database"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import numpy as np\n",
 37 |     "import pandas as pd\n",
 38 |     "import matplotlib.pyplot as plt\n",
 39 |     "import sqlite3\n",
 40 |     "%matplotlib inline"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### Connect to the database"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "- We can use the sqlite3 library to connect to the MIMIC database\n",
 55 |     "- Once the connection is established, we'll run a simple SQL query."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": true
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Connect to the MIMIC database\n",
 67 |     "conn = sqlite3.connect('data/mimicdata.sqlite')"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "collapsed": true
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "# Create our test query\n",
 79 |     "test_query = \"\"\"\n",
 80 |     "SELECT subject_id, hadm_id, admittime, dischtime, admission_type, diagnosis\n",
 81 |     "FROM admissions\n",
 82 |     "\"\"\""
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {
 89 |     "collapsed": true
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "# Run the query and assign the results to a variable\n",
 94 |     "test = pd.read_sql_query(test_query,conn)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {
101 |     "collapsed": false
102 |    },
103 |    "outputs": [],
104 |    "source": [
105 |     "# Display the first few rows\n",
106 |     "test.head()"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "### Load the chartevents data"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "- The chartevents table contains data charted at the patient bedside. It includes variables such as heart rate, respiratory rate, temperature, and so on.\n",
121 |     "- We'll begin by loading the chartevents data for a single patient."
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "query = \"\"\"\n",
133 |     "SELECT de.icustay_id\n",
134 |     "  , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n",
135 |     "  , di.label\n",
136 |     "  , de.value\n",
137 |     "  , de.valuenum\n",
138 |     "  , de.uom\n",
139 |     "FROM chartevents de\n",
140 |     "INNER join d_items di\n",
141 |     "ON de.itemid = di.itemid\n",
142 |     "INNER join icustays ie\n",
143 |     "ON de.icustay_id = ie.icustay_id\n",
144 |     "WHERE de.icustay_id = 252522\n",
145 |     "ORDER BY charttime;\n",
146 |     "\"\"\"\n",
147 |     "\n",
148 |     "ce = pd.read_sql_query(query,conn)\n",
149 |     "\n",
150 |     "\n",
151 |     "# OPTION 2: load chartevents from a CSV file\n",
152 |     "# ce = pd.read_csv('data/example_chartevents.csv', index_col='HOURSSINCEADMISSION')"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {
159 |     "collapsed": false
160 |    },
161 |    "outputs": [],
162 |    "source": [
163 |     "# Preview the data\n",
164 |     "# Use 'head' to limit the number of rows returned\n",
165 |     "ce.head()"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "### Review the patient's heart rate"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "- We can select individual columns using the column name. \n",
180 |     "- For example, if we want to select just the label column, we write **```ce.LABEL```** or alternatively **```ce['LABEL']```**"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {
187 |     "collapsed": false
188 |    },
189 |    "outputs": [],
190 |    "source": [
191 |     "# Select a single column\n",
192 |     "ce['LABEL']"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "- In a similar way, we can select rows from data using indexes. \n",
200 |     "- For example, to select rows where the label is equal to 'Heart Rate', we would create an index using **```[ce.LABEL=='Heart Rate']```** "
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "metadata": {
207 |     "collapsed": false
208 |    },
209 |    "outputs": [],
210 |    "source": [
211 |     "# Select just the heart rate rows using an index\n",
212 |     "ce[ce.LABEL=='Heart Rate']"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {},
218 |    "source": [
219 |     "### Plot 1: How did the patients heart rate change over time?"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {},
225 |    "source": [
226 |     "- Using the methods described above to select our data of interest, we can create our x and y axis values to create a time series plot of heart rate."
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "code",
231 |    "execution_count": null,
232 |    "metadata": {
233 |     "collapsed": false
234 |    },
235 |    "outputs": [],
236 |    "source": [
237 |     "# Which time stamps have a corresponding heart rate measurement?\n",
238 |     "print ce.index[ce.LABEL=='Heart Rate']"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {
245 |     "collapsed": false,
246 |     "scrolled": true
247 |    },
248 |    "outputs": [],
249 |    "source": [
250 |     "# Set x equal to the times\n",
251 |     "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n",
252 |     "\n",
253 |     "# Set y equal to the heart rates\n",
254 |     "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n",
255 |     "\n",
256 |     "# Plot time against heart rate\n",
257 |     "plt.figure(figsize=(14, 6))\n",
258 |     "plt.plot(x_hr,y_hr)\n",
259 |     "\n",
260 |     "\n",
261 |     "plt.xlabel('Time',fontsize=16)\n",
262 |     "plt.ylabel('Heart rate',fontsize=16)\n",
263 |     "plt.title('Heart rate over time from admission to the intensive care unit')"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "markdown",
268 |    "metadata": {},
269 |    "source": [
270 |     "### Task 1\n",
271 |     "\n",
272 |     "* What is happening to this patient's heart rate?\n",
273 |     "* Plot respiratory rate over time for the patient.\n",
274 |     "* Is there anything unusual about the patient's respiratory rate?\n"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "metadata": {
281 |     "collapsed": true
282 |    },
283 |    "outputs": [],
284 |    "source": [
285 |     "# Exercise 1 here\n",
286 |     "\n"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "markdown",
291 |    "metadata": {},
292 |    "source": [
293 |     "### Plot 2: Did the patient's vital signs breach any alarm thresholds?"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {},
299 |    "source": [
300 |     "- Alarm systems in the intensive care unit are commonly based on high and low thresholds defined by the carer.\n",
301 |     "- False alarms are often a problem and so thresholds may be set arbitrarily to reduce alarms.\n",
302 |     "- As a result, alarm settings carry limited information."
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {
309 |     "collapsed": false
310 |    },
311 |    "outputs": [],
312 |    "source": [
313 |     "plt.figure(figsize=(14, 6))\n",
314 |     "\n",
315 |     "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n",
316 |     "         ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n",
317 |     "         'k+', markersize=10, linewidth=4)\n",
318 |     "\n",
319 |     "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - High'], \n",
320 |     "         ce.VALUENUM[ce.LABEL=='Resp Alarm - High'],\n",
321 |     "         'm--')\n",
322 |     "\n",
323 |     "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - Low'], \n",
324 |     "         ce.VALUENUM[ce.LABEL=='Resp Alarm - Low'],\n",
325 |     "         'm--')\n",
326 |     "\n",
327 |     "plt.xlabel('Time',fontsize=16)\n",
328 |     "plt.ylabel('Respiratory rate',fontsize=16)\n",
329 |     "plt.title('Respiratory rate over time from admission, with upper and lower alarm thresholds')\n",
330 |     "plt.ylim(0,55)\n"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {},
336 |    "source": [
337 |     "### Task 2\n",
338 |     "\n",
339 |     "- Based on the data, does it look like the alarms would have triggered for this patient?\n"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "markdown",
344 |    "metadata": {},
345 |    "source": [
346 |     "### Plot 3: What is patient's level of consciousness?"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "markdown",
351 |    "metadata": {},
352 |    "source": [
353 |     "- Glasgow Coma Scale (GCS) is a measure of consciousness.\n",
354 |     "- It is commonly used for monitoring patients in the intensive care unit. \n",
355 |     "- It consists of three components: eye response; verbal response; motor response."
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": null,
361 |    "metadata": {
362 |     "collapsed": false
363 |    },
364 |    "outputs": [],
365 |    "source": [
366 |     "# Display the first few rows of the GCS eye response data\n",
367 |     "ce[ce.LABEL=='GCS - Eye Opening'].head()"
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "code",
372 |    "execution_count": null,
373 |    "metadata": {
374 |     "collapsed": false
375 |    },
376 |    "outputs": [],
377 |    "source": [
378 |     "# Prepare the size of the figure\n",
379 |     "plt.figure(figsize=(18, 10))\n",
380 |     "\n",
381 |     "# Set x equal to the times\n",
382 |     "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n",
383 |     "\n",
384 |     "# Set y equal to the heart rates\n",
385 |     "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n",
386 |     "\n",
387 |     "\n",
388 |     "plt.plot(x_hr,y_hr)\n",
389 |     "\n",
390 |     "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n",
391 |     "         ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n",
392 |     "         'k', markersize=6)\n",
393 |     "\n",
394 |     "# Add a text label to the y-axis\n",
395 |     "plt.text(-20,155,'GCS - Eye Opening',fontsize=14)\n",
396 |     "plt.text(-20,150,'GCS - Motor Response',fontsize=14)\n",
397 |     "plt.text(-20,145,'GCS - Verbal Response',fontsize=14)   \n",
398 |     "\n",
399 |     "# Iterate over list of GCS labels, plotting around 1 in 10 to avoid overlap\n",
400 |     "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Eye Opening'].values):\n",
401 |     "    if np.mod(i,6)==0 and i < 65:\n",
402 |     "        plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Eye Opening'].values[i],155),fontsize=14)\n",
403 |     "        \n",
404 |     "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Motor Response'].values):\n",
405 |     "    if np.mod(i,6)==0 and i < 65:\n",
406 |     "        plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Motor Response'].values[i],150),fontsize=14)\n",
407 |     "\n",
408 |     "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Verbal Response'].values):\n",
409 |     "    if np.mod(i,6)==0 and i < 65:\n",
410 |     "        plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Verbal Response'].values[i],145),fontsize=14)\n",
411 |     "\n",
412 |     "plt.title('Vital signs and Glasgow Coma Scale over time from admission',fontsize=16)\n",
413 |     "\n",
414 |     "plt.xlabel('Time (hours)',fontsize=16)\n",
415 |     "plt.ylabel('Heart rate or GCS',fontsize=16)\n",
416 |     "plt.ylim(10,165)\n"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "markdown",
421 |    "metadata": {},
422 |    "source": [
423 |     "### Task 3\n",
424 |     "\n",
425 |     "- How is the patient's consciousness changing over time?"
426 |    ]
427 |   },
428 |   {
429 |    "cell_type": "markdown",
430 |    "metadata": {},
431 |    "source": [
432 |     "# Stop here..."
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "markdown",
437 |    "metadata": {},
438 |    "source": [
439 |     "### Plot 4: What other data do we have on the patient?"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "markdown",
444 |    "metadata": {},
445 |    "source": [
446 |     "- Using Pandas 'read_csv function' again, we'll now load the outputevents data - this table contains all information about patient outputs (urine output, drains, dialysis)."
447 |    ]
448 |   },
449 |   {
450 |    "cell_type": "code",
451 |    "execution_count": null,
452 |    "metadata": {
453 |     "collapsed": false
454 |    },
455 |    "outputs": [],
456 |    "source": [
457 |     "# OPTION 1: load outputs from the patient\n",
458 |     "query = \"\"\"\n",
459 |     "select de.icustay_id\n",
460 |     "  , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n",
461 |     "  , di.label\n",
462 |     "  , de.value\n",
463 |     "  , de.valueuom\n",
464 |     "from outputevents de \n",
465 |     "inner join icustays ie\n",
466 |     "  on de.icustay_id = ie.icustay_id\n",
467 |     "inner join d_items di\n",
468 |     "  on de.itemid = di.itemid\n",
469 |     "where de.subject_id = 40080\n",
470 |     "order by charttime;\n",
471 |     "\"\"\"\n",
472 |     "\n",
473 |     "oe = pd.read_sql_query(query,conn)"
474 |    ]
475 |   },
476 |   {
477 |    "cell_type": "code",
478 |    "execution_count": null,
479 |    "metadata": {
480 |     "collapsed": false
481 |    },
482 |    "outputs": [],
483 |    "source": [
484 |     "oe.head()"
485 |    ]
486 |   },
487 |   {
488 |    "cell_type": "code",
489 |    "execution_count": null,
490 |    "metadata": {
491 |     "collapsed": false
492 |    },
493 |    "outputs": [],
494 |    "source": [
495 |     "plt.figure(figsize=(14, 10))\n",
496 |     "\n",
497 |     "plt.figure(figsize=(14, 6))\n",
498 |     "plt.title('Fluid output over time')\n",
499 |     "\n",
500 |     "plt.plot(oe.HOURS, \n",
501 |     "         oe.VALUE.cumsum()/1000, \n",
502 |     "         'ro', markersize=8, label='Output volume, L')\n",
503 |     "\n",
504 |     "plt.xlim(0,72)\n",
505 |     "plt.ylim(0,10)\n",
506 |     "plt.legend()"
507 |    ]
508 |   },
509 |   {
510 |    "cell_type": "markdown",
511 |    "metadata": {},
512 |    "source": [
513 |     "To provide necessary context to this plot, it would help to include patient input data. This provides the necessary context to determine a patient's fluid balance - a key indicator in patient health."
514 |    ]
515 |   },
516 |   {
517 |    "cell_type": "code",
518 |    "execution_count": null,
519 |    "metadata": {
520 |     "collapsed": false
521 |    },
522 |    "outputs": [],
523 |    "source": [
524 |     "# OPTION 1: load inputs given to the patient (usually intravenously) using the database connection\n",
525 |     "query = \"\"\"\n",
526 |     "select de.icustay_id\n",
527 |     "  , (strftime('%s',de.starttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_START\n",
528 |     "  , (strftime('%s',de.endtime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_END\n",
529 |     "  , de.linkorderid\n",
530 |     "  , di.label\n",
531 |     "  , de.amount\n",
532 |     "  , de.amountuom\n",
533 |     "  , de.rate\n",
534 |     "  , de.rateuom\n",
535 |     "from inputevents_mv de \n",
536 |     "inner join icustays ie\n",
537 |     "  on de.icustay_id = ie.icustay_id\n",
538 |     "inner join d_items di\n",
539 |     "  on de.itemid = di.itemid\n",
540 |     "where de.subject_id = 40080\n",
541 |     "order by endtime;\n",
542 |     "\"\"\"\n",
543 |     "\n",
544 |     "ie = pd.read_sql_query(query,conn)\n",
545 |     "\n",
546 |     "# # OPTION 2: load ioevents using the CSV file with endtime as the index\n",
547 |     "# ioe = pd.read_csv('inputevents.csv'\n",
548 |     "#                   ,header=None\n",
549 |     "#                   ,names=['subject_id','itemid','label','starttime','endtime','amount','amountuom','rate','rateuom']\n",
550 |     "#                   ,parse_dates=True)"
551 |    ]
552 |   },
553 |   {
554 |    "cell_type": "code",
555 |    "execution_count": null,
556 |    "metadata": {
557 |     "collapsed": false
558 |    },
559 |    "outputs": [],
560 |    "source": [
561 |     "ie.head()"
562 |    ]
563 |   },
564 |   {
565 |    "cell_type": "markdown",
566 |    "metadata": {},
567 |    "source": [
568 |     "Note that the column headers are different: we have \"HOURS_START\" and \"HOURS_END\". This is because inputs are administered over a fixed period of time."
569 |    ]
570 |   },
571 |   {
572 |    "cell_type": "code",
573 |    "execution_count": null,
574 |    "metadata": {
575 |     "collapsed": false
576 |    },
577 |    "outputs": [],
578 |    "source": [
579 |     "ie['LABEL'].unique()"
580 |    ]
581 |   },
582 |   {
583 |    "cell_type": "code",
584 |    "execution_count": null,
585 |    "metadata": {
586 |     "collapsed": false
587 |    },
588 |    "outputs": [],
589 |    "source": [
590 |     "plt.figure(figsize=(14, 10))\n",
591 |     "\n",
592 |     "# Plot the cumulative input against the cumulative output\n",
593 |     "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n",
594 |     "         ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n",
595 |     "         'go', markersize=8, label='Intake volume, L')\n",
596 |     "\n",
597 |     "plt.plot(oe.HOURS, \n",
598 |     "         oe.VALUE.cumsum()/1000, \n",
599 |     "         'ro', markersize=8, label='Output volume, L')\n",
600 |     "\n",
601 |     "plt.title('Fluid balance over time',fontsize=16)\n",
602 |     "plt.xlabel('Hours',fontsize=16)\n",
603 |     "plt.ylabel('Volume (litres)',fontsize=16)\n",
604 |     "# plt.ylim(0,38)\n",
605 |     "plt.legend()"
606 |    ]
607 |   },
608 |   {
609 |    "cell_type": "markdown",
610 |    "metadata": {
611 |     "collapsed": true
612 |    },
613 |    "source": [
614 |     "As the plot shows, the patient's intake tends to be above their output (as one would expect!) - but there are periods where they are almost one to one. One of the biggest challenges of working with ICU data is that context is everything - let's look at a treatment (lasix) that we know will affect this graph."
615 |    ]
616 |   },
617 |   {
618 |    "cell_type": "code",
619 |    "execution_count": null,
620 |    "metadata": {
621 |     "collapsed": false
622 |    },
623 |    "outputs": [],
624 |    "source": [
625 |     "plt.figure(figsize=(14, 10))\n",
626 |     "\n",
627 |     "# Plot the cumulative input against the cumulative output\n",
628 |     "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n",
629 |     "         ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n",
630 |     "         'go', markersize=8, label='Intake volume, L')\n",
631 |     "\n",
632 |     "plt.plot(oe.HOURS, \n",
633 |     "         oe.VALUE.cumsum()/1000, \n",
634 |     "         'ro', markersize=8, label='Output volume, L')\n",
635 |     "\n",
636 |     "# example on getting two columns from a dataframe: ie[['HOURS_START','HOURS_END']].head()\n",
637 |     "\n",
638 |     "for i, idx in enumerate(ie.index[ie.LABEL=='Furosemide (Lasix)']):\n",
639 |     "    plt.plot([ie.HOURS_START[ie.LABEL=='Furosemide (Lasix)'][idx],\n",
640 |     "             ie.HOURS_END[ie.LABEL=='Furosemide (Lasix)'][idx]],\n",
641 |     "            [ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx],\n",
642 |     "             ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx]],\n",
643 |     "            'b-',linewidth=4)\n",
644 |     "    \n",
645 |     "\n",
646 |     "plt.title('Fluid balance over time',fontsize=16)\n",
647 |     "plt.xlabel('Hours',fontsize=16)\n",
648 |     "plt.ylabel('Volume (litres)',fontsize=16)\n",
649 |     "# plt.ylim(0,38)\n",
650 |     "plt.legend()\n"
651 |    ]
652 |   },
653 |   {
654 |    "cell_type": "code",
655 |    "execution_count": null,
656 |    "metadata": {
657 |     "collapsed": false
658 |    },
659 |    "outputs": [],
660 |    "source": [
661 |     "ie['LABEL'].unique()"
662 |    ]
663 |   },
664 |   {
665 |    "cell_type": "markdown",
666 |    "metadata": {},
667 |    "source": [
668 |     "### Exercise 2\n",
669 |     "\n",
670 |     "* Plot the alarms for the mean arterial pressure ('```Arterial Blood Pressure mean```')\n",
671 |     "* HINT: you can use ```ce.LABEL.unique()``` to find a list of variable names\n",
672 |     "* Were the alarm thresholds breached?"
673 |    ]
674 |   },
675 |   {
676 |    "cell_type": "code",
677 |    "execution_count": null,
678 |    "metadata": {
679 |     "collapsed": false
680 |    },
681 |    "outputs": [],
682 |    "source": [
683 |     "# Exercise 2 here\n",
684 |     "\n",
685 |     "\n"
686 |    ]
687 |   },
688 |   {
689 |    "cell_type": "markdown",
690 |    "metadata": {},
691 |    "source": [
692 |     "### Plot 3: Were the patient's other vital signs stable?"
693 |    ]
694 |   },
695 |   {
696 |    "cell_type": "code",
697 |    "execution_count": null,
698 |    "metadata": {
699 |     "collapsed": false
700 |    },
701 |    "outputs": [],
702 |    "source": [
703 |     "plt.figure(figsize=(14, 10))\n",
704 |     "\n",
705 |     "plt.plot(ce.index[ce.LABEL=='Heart Rate'], \n",
706 |     "         ce.VALUENUM[ce.LABEL=='Heart Rate'],\n",
707 |     "         'rx', markersize=8, label='HR')\n",
708 |     "\n",
709 |     "plt.plot(ce.index[ce.LABEL=='O2 saturation pulseoxymetry'], \n",
710 |     "         ce.VALUENUM[ce.LABEL=='O2 saturation pulseoxymetry'], \n",
711 |     "         'g.', markersize=8, label='O2')\n",
712 |     "\n",
713 |     "plt.plot(ce.index[ce.LABEL=='Arterial Blood Pressure mean'], \n",
714 |     "         ce.VALUENUM[ce.LABEL=='Arterial Blood Pressure mean'], \n",
715 |     "         'bv', markersize=8, label='MAP')\n",
716 |     "\n",
717 |     "plt.plot(ce.index[ce.LABEL=='Respiratory Rate'], \n",
718 |     "         ce.VALUENUM[ce.LABEL=='Respiratory Rate'], \n",
719 |     "         'k+', markersize=8, label='RR')\n",
720 |     "\n",
721 |     "plt.title('Vital signs over time from admission')\n",
722 |     "plt.ylim(0,130)\n",
723 |     "plt.legend()"
724 |    ]
725 |   },
726 |   {
727 |    "cell_type": "markdown",
728 |    "metadata": {},
729 |    "source": [
730 |     "### Plot 5: Laboratory measurements"
731 |    ]
732 |   },
733 |   {
734 |    "cell_type": "markdown",
735 |    "metadata": {},
736 |    "source": [
737 |     "Using Pandas 'read_csv function' again, we'll now load the labevents data.\n",
738 |     "This data corresponds to measurements made in a laboratory - usually on a sample of patient blood. "
739 |    ]
740 |   },
741 |   {
742 |    "cell_type": "code",
743 |    "execution_count": null,
744 |    "metadata": {
745 |     "collapsed": false
746 |    },
747 |    "outputs": [],
748 |    "source": [
749 |     "# OPTION 1: load labevents data using the database connection\n",
750 |     "query = \"\"\"\n",
751 |     "SELECT de.subject_id\n",
752 |     "  , de.charttime\n",
753 |     "  , di.label, de.value, de.valuenum\n",
754 |     "  , de.uom\n",
755 |     "FROM labevents de\n",
756 |     "INNER JOIN d_labitems di\n",
757 |     "  ON de.itemid = di.itemid\n",
758 |     "where de.subject_id = 40080\n",
759 |     "\"\"\"\n",
760 |     "\n",
761 |     "le = pd.read_sql_query(query,conn)\n",
762 |     "\n",
763 |     "# OPTION 2: load labevents from the CSV file\n",
764 |     "# le = pd.read_csv('data/example_labevents.csv', index_col='HOURSSINCEADMISSION')"
765 |    ]
766 |   },
767 |   {
768 |    "cell_type": "code",
769 |    "execution_count": null,
770 |    "metadata": {
771 |     "collapsed": false
772 |    },
773 |    "outputs": [],
774 |    "source": [
775 |     "# preview the labevents data\n",
776 |     "le.head()"
777 |    ]
778 |   },
779 |   {
780 |    "cell_type": "code",
781 |    "execution_count": null,
782 |    "metadata": {
783 |     "collapsed": false
784 |    },
785 |    "outputs": [],
786 |    "source": [
787 |     "# preview the ioevents data\n",
788 |     "le[le.LABEL=='HEMOGLOBIN']"
789 |    ]
790 |   },
791 |   {
792 |    "cell_type": "code",
793 |    "execution_count": null,
794 |    "metadata": {
795 |     "collapsed": false
796 |    },
797 |    "outputs": [],
798 |    "source": [
799 |     "plt.figure(figsize=(14, 10))\n",
800 |     "\n",
801 |     "plt.plot(le.index[le.LABEL=='HEMATOCRIT'], \n",
802 |     "         le.VALUENUM[le.LABEL=='HEMATOCRIT'], \n",
803 |     "         'go', markersize=6, label='Haematocrit')\n",
804 |     "\n",
805 |     "plt.plot(le.index[le.LABEL=='HEMOGLOBIN'], \n",
806 |     "         le.VALUENUM[le.LABEL=='HEMOGLOBIN'], \n",
807 |     "         'bv', markersize=8, label='Hemoglobin')\n",
808 |     "\n",
809 |     "plt.title('Laboratory measurements over time from admission')\n",
810 |     "plt.ylim(0,38)\n",
811 |     "plt.legend()"
812 |    ]
813 |   }
814 |  ],
815 |  "metadata": {
816 |   "kernelspec": {
817 |    "display_name": "Python 2",
818 |    "language": "python",
819 |    "name": "python2"
820 |   },
821 |   "language_info": {
822 |    "codemirror_mode": {
823 |     "name": "ipython",
824 |     "version": 2
825 |    },
826 |    "file_extension": ".py",
827 |    "mimetype": "text/x-python",
828 |    "name": "python",
829 |    "nbconvert_exporter": "python",
830 |    "pygments_lexer": "ipython2",
831 |    "version": "2.7.10"
832 |   }
833 |  },
834 |  "nbformat": 4,
835 |  "nbformat_minor": 0
836 | }
837 | 


--------------------------------------------------------------------------------
/intro_to_mimic/MozFest2015.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MIT-LCP/mimic-workshop/b27eee438a1f62d909dd30d1d458d3516f32b276/intro_to_mimic/MozFest2015.key


--------------------------------------------------------------------------------
/mlcc/etc/calcRoc.m:
--------------------------------------------------------------------------------
 1 | function [ rocx, rocy, auc ] = calcRoc( pred, target )
 2 | %CALCROC This function outputs the sensitivity and 1-specificity at every
 3 | %operating point in PRED. These values can be plotted to create a receiver
 4 | %operator characteristic (ROC) curve.
 5 | %   Detailed explanation goes here
 6 | 
 7 | [pred,idxSort] = sort(pred,1,'ascend');
 8 | target=target(idxSort);
 9 | 
10 | TP = flipud(target);
11 | FP = cumsum(1-TP);
12 | FP = flipud(FP);
13 | TP = cumsum(TP);
14 | TP = flipud(TP);
15 | FN = cumsum(target)-target;
16 | TN = numel(target) - TP - FP - FN;
17 | 
18 | %=== 1-Specificity (false positive rate)
19 | rocx = 1- (TN ./ (TN + FP));
20 | 
21 | %=== Sensitivity (true positive rate)
22 | rocy = TP ./ (TP + FN);
23 | 
24 | % AUROC
25 | if nargout > 2
26 | idxNegative = target==0;
27 |     % Count the number of negative targets below each element
28 |     auc = cumsum(idxNegative,1);
29 |     
30 |     % Now only keep elements for positive cases
31 |     % the result is a vector which counts, for each positive case, how many
32 |     % negative cases are lower in predicted value
33 |     auc = auc(~idxNegative);
34 | 
35 |     % sum the number of negative cases which are below a positive case
36 |     auc = sum(auc,1); %=== count number who are negative
37 | 
38 |     % divide by the number of positive/negative pairs in the data
39 |     auc = auc./(sum(target==1) * sum(target==0));
40 | 
41 |     % the result is the probability a positive case prediction is higher than a
42 |     % negative case prediction: the AUROC.
43 | end
44 | 
45 | end
46 | 
47 | 


--------------------------------------------------------------------------------
/mlcc/etc/makeQuery.m:
--------------------------------------------------------------------------------
 1 | function [ query ] = makeQuery( filename)
 2 | fileID = fopen(filename);
 3 | text = textscan(fileID,'%s','delimiter','\n');
 4 | text = text{1};
 5 | idxRem = strfind(text,'--');
 6 | for m=1:size(text,1)
 7 |     if ~isempty(idxRem{m})
 8 |         text{m}(idxRem{m}:end) = '';
 9 |     end
10 | end
11 | query = strjoin(text');
12 | fclose(fileID);
13 | 
14 | end
15 | 
16 | 


--------------------------------------------------------------------------------
/mlcc/lab1-data-extraction/mlcc-query-1.sql:
--------------------------------------------------------------------------------
 1 | select
 2 |   -- ICUSTAY_ID identifies each unique patient ICU stay
 3 |   -- note that if the same person stays in the ICU more than once, each stay would have a *different* ICUSTAY_ID
 4 |   ie.icustay_id
 5 | 
 6 |   -- this is the outcome of interest: in-hospital mortality
 7 |   , max(adm.HOSPITAL_EXPIRE_FLAG) as OUTCOME
 8 | 
 9 |   -- let's read this statement inside out. first, the case statement says:
10 |   --  if the ITEMID = 211, then output the numeric value
11 |   --  otherwise, set it to NULL
12 |   -- that means that there are *only* heart rate values within the brackets
13 |   -- next, we take the minimum - min() - which ignores NULLs
14 |   -- as a result, we get the minimum heart rate value, which we define "as HeartRate_Min"
15 | 
16 |   -- how did we know heart rates were stored using ITEMID 211? Simple, we looked in D_ITEMS!
17 |   -- Try it for yourself: select * from d_items where lower(label) like '%heart rate%'
18 | 
19 |   , min(case when itemid = 211 then valuenum else null end) as HeartRate_Min
20 |   , max(case when itemid = 211 then valuenum else null end) as HeartRate_Max
21 |   , min(case when itemid in (615,618) then valuenum else null end) as RespRate_Min
22 |   , max(case when itemid in (615,618) then valuenum else null end) as RespRate_Max
23 | from icustays ie
24 | -- join to the admissions table to get hospital outcome
25 | inner join admissions adm
26 |   on ie.hadm_id = adm.hadm_id
27 | 
28 | -- join to the chartevents table to get the observations
29 | left join chartevents ce
30 |   -- match the tables on the patient identifier
31 |   on ie.icustay_id = ce.icustay_id
32 |   -- and require that the observation be made after the patient is admitted to the ICU
33 |   and ce.charttime >= ie.intime
34 |   -- and *before* their admission time + 1 day, i.e. the observation must be made on their first day in the ICU
35 |   and ce.charttime <= date(ie.intime,'+1 day')
36 | 
37 |   -- finally, only look at heart rate/respiratory rate observations
38 |   and ce.itemid in
39 |   (
40 |   211, -- Heart Rate
41 |   618, --	Respiratory Rate
42 |   615 --	Resp Rate (Total)
43 |   )
44 | 
45 | -- Note above that we take the max() and min() of some columns
46 | -- Imagine you have a table with 2 columns and 10 rows
47 | -- If we take the max() of the 2nd column, we now have:
48 | --  10 rows in the first column
49 | --  1 row in the second column (the max value)
50 | -- How does the second column correspond to the first?
51 | -- Should we copy that 1 row to all 10 rows?
52 | -- We need to tell SQL how to *group* the max value
53 | 
54 | -- The below line states "group everything by icustay_id"
55 | -- That means that we take the max( HEART RATE ) grouped by ICUSTAY_ID
56 | -- or, normal words, we take the maximum heart rate for each patient's ICU stay
57 | group by ie.icustay_id
58 | order by ie.icustay_id;
59 | 


--------------------------------------------------------------------------------
/mlcc/lab1-data-extraction/mlcc1-problem-set-solutions-ICUSTAYID.sql:
--------------------------------------------------------------------------------
  1 | 
  2 | -- Staging table #1: CHARTEVENTS
  3 | with ce_stg as
  4 | (
  5 |   select ie.icustay_id
  6 |   , case
  7 |       when itemid in (211,220045) then 1 -- HeartRate
  8 |       when itemid in (456,52,6702,443,220052,220181,225312) then 4 -- MeanBP
  9 |       when itemid in (615,618,220210,224690) then 5 -- RespRate
 10 |       else null end as VitalID
 11 |   , valuenum
 12 |   from icustays ie
 13 |   left join chartevents chart
 14 |     on ie.subject_id = chart.subject_id and ie.hadm_id = chart.hadm_id and ie.icustay_id = chart.icustay_id
 15 |     and chart.charttime >= ie.intime and chart.charttime <= date(ie.intime,'+1 day')
 16 |     and chart.itemid in
 17 |     (
 18 |     -- HEART RATE
 19 |     211, --"Heart Rate"
 20 |     220045, --"Heart Rate"
 21 | 
 22 |     -- MEAN BLOOD PRESSURE
 23 |     456, --"NBP Mean"
 24 |     52, --"Arterial BP Mean"
 25 |     6702, --	Arterial BP Mean #2
 26 |     443, --	Manual BP Mean(calc)
 27 |     220052, --"Arterial Blood Pressure mean"
 28 |     220181, --"Non Invasive Blood Pressure mean"
 29 |     225312, --"ART BP mean"
 30 | 
 31 |     -- RESPIRATORY RATE
 32 |     618,--	Respiratory Rate
 33 |     615,--	Resp Rate (Total)
 34 |     220210,--	Respiratory Rate
 35 |     224690 --	Respiratory Rate (Total)
 36 |     )
 37 | )
 38 | -- Aggregate table #1: CHARTEVENTS
 39 | , ce as
 40 | (
 41 |   SELECT ce_stg.icustay_id
 42 |   , min(case when VitalID = 1 then valuenum else null end) as HeartRate_Min
 43 |   , max(case when VitalID = 1 then valuenum else null end) as HeartRate_Max
 44 |   , min(case when VitalID = 4 then valuenum else null end) as MeanBP_Min
 45 |   , max(case when VitalID = 4 then valuenum else null end) as MeanBP_Max
 46 |   , min(case when VitalID = 5 then valuenum else null end) as RespRate_Min
 47 |   , max(case when VitalID = 5 then valuenum else null end) as RespRate_Max
 48 |   FROM ce_stg
 49 |   group by ce_stg.icustay_id
 50 | )
 51 | 
 52 | -- Staging table #2: GCS
 53 | -- Because we need to add together GCS components, we do it seperately from chartevents
 54 | , gcs_stg as
 55 | (
 56 |   select ie.icustay_id, chart.charttime
 57 |   , max(case when itemid in (723,223900) then valuenum else null end) as GCSVerbal
 58 |   , max(case when itemid in (454,223901) then valuenum else null end) as GCSMotor
 59 |   , max(case when itemid in (184,220739) then valuenum else null end) as GCSEyes
 60 |   from icustays ie
 61 |   left join chartevents chart
 62 |     on ie.subject_id = chart.subject_id and ie.hadm_id = chart.hadm_id and ie.icustay_id = chart.icustay_id
 63 |     and chart.charttime >= ie.intime and chart.charttime <= date(ie.intime,'+1 day')
 64 |     and chart.itemid in
 65 |     (
 66 |       723, -- GCSVerbal
 67 |       454, -- GCSMotor
 68 |       184, -- GCSEyes
 69 |       223900, -- GCS - Verbal Response
 70 |       223901, -- GCS - Motor Response
 71 |       220739 -- GCS - Eye Opening
 72 |     )
 73 |   group by ie.icustay_id, chart.charttime
 74 | )
 75 | -- Aggregate table #2: GCS
 76 | , gcs as
 77 | (
 78 |   SELECT gcs_stg.icustay_id
 79 |   , min(GCSVerbal + GCSMotor + GCSEyes) as GCS_Min
 80 |   , max(GCSVerbal + GCSMotor + GCSEyes) as GCS_Max
 81 |   FROM gcs_stg
 82 |   group by gcs_stg.icustay_id
 83 | )
 84 | -- Staging table #3: LABEVENTS
 85 | , le_stg as
 86 | (
 87 |   select ie.icustay_id
 88 |     -- here we assign labels to ITEMIDs
 89 |     -- this also fuses together multiple ITEMIDs containing the same data
 90 |     , case
 91 |           when itemid = 50885 then 'BILIRUBIN'
 92 |           when itemid = 50912 then 'CREATININE'
 93 |           when itemid = 50809 then 'GLUCOSE'
 94 |           when itemid = 50931 then 'GLUCOSE'
 95 |           when itemid = 50811 then 'HEMOGLOBIN'
 96 |           when itemid = 51222 then 'HEMOGLOBIN'
 97 |           when itemid = 50824 then 'SODIUM'
 98 |           when itemid = 50983 then 'SODIUM'
 99 |           when itemid = 51300 then 'WBC'
100 |           when itemid = 51301 then 'WBC'
101 |         else null
102 |       end as label
103 |     , valuenum
104 | 
105 |     from icustays ie
106 | 
107 |     left join labevents lab
108 |       on ie.subject_id = lab.subject_id and ie.hadm_id = lab.hadm_id
109 |       and lab.charttime >= date(ie.intime,'-6 hour') and lab.charttime <= date(ie.intime,'+1 day')
110 |       and lab.ITEMID in
111 |       (
112 |         -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS
113 |         50885, -- BILIRUBIN, TOTAL | CHEMISTRY | BLOOD | 238277
114 |         50912, -- CREATININE | CHEMISTRY | BLOOD | 797476
115 |         50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981
116 |         50809, -- GLUCOSE | BLOOD GAS | BLOOD | 196734
117 |         51222, -- HEMOGLOBIN | HEMATOLOGY | BLOOD | 752523
118 |         50811, -- HEMOGLOBIN | BLOOD GAS | BLOOD | 89712
119 |         50983, -- SODIUM | CHEMISTRY | BLOOD | 808489
120 |         50824, -- SODIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 71503
121 |         51301, -- WHITE BLOOD CELLS | HEMATOLOGY | BLOOD | 753301
122 |         51300  -- WBC COUNT | HEMATOLOGY | BLOOD | 2371
123 |       )
124 |       and lab.valuenum is not null and lab.valuenum > 0 -- lab values cannot be 0 and cannot be negative
125 | )
126 | 
127 | -- Aggregate table #3: LABEVENTS
128 | , le as
129 | (
130 |   select
131 |     le_stg.icustay_id
132 | 
133 |     , min(case when label = 'BILIRUBIN' then valuenum else null end) as BILIRUBIN_min
134 |     , max(case when label = 'BILIRUBIN' then valuenum else null end) as BILIRUBIN_max
135 |     , min(case when label = 'CREATININE' then valuenum else null end) as CREATININE_min
136 |     , max(case when label = 'CREATININE' then valuenum else null end) as CREATININE_max
137 |     , min(case when label = 'HEMOGLOBIN' then valuenum else null end) as HEMOGLOBIN_min
138 |     , max(case when label = 'HEMOGLOBIN' then valuenum else null end) as HEMOGLOBIN_max
139 |     , min(case when label = 'SODIUM' then valuenum else null end) as SODIUM_min
140 |     , max(case when label = 'SODIUM' then valuenum else null end) as SODIUM_max
141 |     , min(case when label = 'WBC' then valuenum else null end) as WBC_min
142 |     , max(case when label = 'WBC' then valuenum else null end) as WBC_max
143 | 
144 |   from le_stg
145 |   group by le_stg.icustay_id
146 | )
147 | 
148 | SELECT ie.icustay_id
149 | , adm.HOSPITAL_EXPIRE_FLAG -- whether the patient died within the hospital
150 | , round( (julianday(ie.intime) - julianday(pat.dob))/365.24, 4) as Age
151 | 
152 | , HeartRate_Min
153 | , HeartRate_Max
154 | , MeanBP_Min
155 | , MeanBP_Max
156 | , RespRate_Min
157 | , RespRate_Max
158 | 
159 | , GCS_Min
160 | , GCS_Max
161 | 
162 | , BILIRUBIN_min
163 | , BILIRUBIN_max
164 | , CREATININE_min
165 | , CREATININE_max
166 | , HEMOGLOBIN_min
167 | , HEMOGLOBIN_max
168 | , SODIUM_min
169 | , SODIUM_max
170 | , WBC_min
171 | , WBC_max
172 | 
173 | FROM icustays ie
174 | inner join admissions adm
175 |   on ie.hadm_id = adm.hadm_id
176 | inner join patients pat
177 |   on ie.subject_id = pat.subject_id
178 | left join ce
179 |   on ie.icustay_id = ce.icustay_id
180 | left join gcs
181 |   on ie.icustay_id = gcs.icustay_id
182 | left join le
183 |   on ie.icustay_id = le.icustay_id
184 | 


--------------------------------------------------------------------------------
/mlcc/lab1-data-extraction/mlcc1-problem-set-solutions.sql:
--------------------------------------------------------------------------------
  1 | 
  2 | -- Staging table #1: CHARTEVENTS
  3 | with ce as
  4 | (
  5 |   select adm.hadm_id
  6 |   , min(case when itemid in (211,220045) then valuenum else null end) as HeartRate_Min
  7 |   , max(case when itemid in (211,220045)  then valuenum else null end) as HeartRate_Max
  8 |   , min(case when itemid in (456,52,6702,443,220052,220181,225312) then valuenum else null end) as MeanBP_Min
  9 |   , max(case when itemid in (456,52,6702,443,220052,220181,225312)  then valuenum else null end) as MeanBP_Max
 10 |   , min(case when itemid in (615,618,220210,224690)  then valuenum else null end) as RespRate_Min
 11 |   , max(case when itemid in (615,618,220210,224690)  then valuenum else null end) as RespRate_Max
 12 |   from admissions adm
 13 |   left join chartevents chart
 14 |     on adm.hadm_id = chart.hadm_id
 15 |     and chart.itemid in
 16 |     (
 17 |     -- HEART RATE
 18 |     211, --"Heart Rate"
 19 |     220045, --"Heart Rate"
 20 | 
 21 |     -- MEAN BLOOD PRESSURE
 22 |     456, --"NBP Mean"
 23 |     52, --"Arterial BP Mean"
 24 |     6702, --	Arterial BP Mean #2
 25 |     443, --	Manual BP Mean(calc)
 26 |     220052, --"Arterial Blood Pressure mean"
 27 |     220181, --"Non Invasive Blood Pressure mean"
 28 |     225312, --"ART BP mean"
 29 | 
 30 |     -- RESPIRATORY RATE
 31 |     618,--	Respiratory Rate
 32 |     615,--	Resp Rate (Total)
 33 |     220210,--	Respiratory Rate
 34 |     224690 --	Respiratory Rate (Total)
 35 |     )
 36 |     group by adm.hadm_id
 37 | )
 38 | -- Staging table #3: LABEVENTS
 39 | , le as
 40 | (
 41 |   select adm.hadm_id
 42 |   , min(case when itemid = 50885 then valuenum else null end) as BILIRUBIN_min
 43 |   , max(case when itemid = 50885 then valuenum else null end) as BILIRUBIN_max
 44 |   , min(case when itemid = 50912 then valuenum else null end) as CREATININE_min
 45 |   , max(case when itemid = 50912 then valuenum else null end) as CREATININE_max
 46 |   , min(case when itemid in (50809,50931) then valuenum else null end) as GLUCOSE_min
 47 |   , max(case when itemid in (50809,50931) then valuenum else null end) as GLUCOSE_max
 48 |   , min(case when itemid in (50811,51222) then valuenum else null end) as HEMOGLOBIN_min
 49 |   , max(case when itemid in (50811,51222) then valuenum else null end) as HEMOGLOBIN_max
 50 |   , min(case when itemid in (50824,50983) then valuenum else null end) as SODIUM_min
 51 |   , max(case when itemid in (50824,50983) then valuenum else null end) as SODIUM_max
 52 |   , min(case when itemid in (51300,51301) then valuenum else null end) as WBC_min
 53 |   , max(case when itemid in (51300,51301) then valuenum else null end) as WBC_max
 54 | 
 55 |     from admissions adm
 56 | 
 57 |     left join labevents lab
 58 |       on adm.subject_id = lab.subject_id and adm.hadm_id = lab.hadm_id
 59 |       and lab.charttime >= adm.admittime and lab.charttime <= adm.dischtime
 60 |       and lab.ITEMID in
 61 |       (
 62 |         -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS
 63 |         50885, -- BILIRUBIN, TOTAL | CHEMISTRY | BLOOD | 238277
 64 |         50912, -- CREATININE | CHEMISTRY | BLOOD | 797476
 65 |         50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981
 66 |         50809, -- GLUCOSE | BLOOD GAS | BLOOD | 196734
 67 |         51222, -- HEMOGLOBIN | HEMATOLOGY | BLOOD | 752523
 68 |         50811, -- HEMOGLOBIN | BLOOD GAS | BLOOD | 89712
 69 |         50983, -- SODIUM | CHEMISTRY | BLOOD | 808489
 70 |         50824, -- SODIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 71503
 71 |         51301, -- WHITE BLOOD CELLS | HEMATOLOGY | BLOOD | 753301
 72 |         51300  -- WBC COUNT | HEMATOLOGY | BLOOD | 2371
 73 |       )
 74 |       and lab.valuenum is not null and lab.valuenum > 0 -- lab values cannot be 0 and cannot be negative
 75 |     group by adm.hadm_id
 76 | )
 77 | 
 78 | -- Staging table #2: GCS
 79 | -- Because we need to add together GCS components, we do it seperately from chartevents
 80 | -- This lets us group together the components by their CHARTTIME
 81 | -- Then we can add together components measured at the same time
 82 | , gcs_stg as
 83 | (
 84 |   select adm.hadm_id, chart.charttime
 85 |   , max(case when itemid in (723,223900) then valuenum else null end) as GCSVerbal
 86 |   , max(case when itemid in (454,223901) then valuenum else null end) as GCSMotor
 87 |   , max(case when itemid in (184,220739) then valuenum else null end) as GCSEyes
 88 |   from admissions adm
 89 |   left join chartevents chart
 90 |     on adm.hadm_id = chart.hadm_id
 91 |     and chart.itemid in
 92 |     (
 93 |       723, -- GCSVerbal
 94 |       454, -- GCSMotor
 95 |       184, -- GCSEyes
 96 |       223900, -- GCS - Verbal Response
 97 |       223901, -- GCS - Motor Response
 98 |       220739 -- GCS - Eye Opening
 99 |     )
100 |   group by adm.hadm_id, chart.charttime
101 | )
102 | -- Aggregate table #2: GCS
103 | , gcs as
104 | (
105 |   SELECT gcs_stg.hadm_id
106 |   , min(GCSVerbal + GCSMotor + GCSEyes) as GCS_Min
107 |   , max(GCSVerbal + GCSMotor + GCSEyes) as GCS_Max
108 |   FROM gcs_stg
109 |   group by gcs_stg.hadm_id
110 | )
111 | 
112 | SELECT adm.hadm_id
113 | , adm.HOSPITAL_EXPIRE_FLAG -- whether the patient died within the hospital
114 | , round( (julianday(adm.admittime) - julianday(pat.dob))/365.24, 4) as Age
115 | 
116 | , HeartRate_Min
117 | , HeartRate_Max
118 | , MeanBP_Min
119 | , MeanBP_Max
120 | , RespRate_Min
121 | , RespRate_Max
122 | 
123 | , GCS_Min
124 | , GCS_Max
125 | 
126 | , BILIRUBIN_min
127 | , BILIRUBIN_max
128 | , CREATININE_min
129 | , CREATININE_max
130 | , GLUCOSE_min
131 | , GLUCOSE_max
132 | , HEMOGLOBIN_min
133 | , HEMOGLOBIN_max
134 | , SODIUM_min
135 | , SODIUM_max
136 | , WBC_min
137 | , WBC_max
138 | 
139 | FROM admissions adm
140 | inner join patients pat
141 |   on adm.subject_id = pat.subject_id
142 | left join ce
143 |   on adm.hadm_id = ce.hadm_id
144 | left join gcs
145 |   on adm.hadm_id = gcs.hadm_id
146 | left join le
147 |   on adm.hadm_id = le.hadm_id
148 | 


--------------------------------------------------------------------------------
/mlcc/lab1-data-extraction/mlcc1_introduction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Extract data\n",
  8 |     "\n",
  9 |     "First, we extract the data to a csv (alternatively, we could use a database connection here and extract the data directly from the database).\n",
 10 |     "\n",
 11 |     "\n",
 12 |     "```sql\n",
 13 |     "Copy (\n",
 14 |     "  select ce.icustay_id, charttime, itemid, valuenum, adm.hospital_expire_flag\n",
 15 |     "  from mimiciii.chartevents ce\n",
 16 |     "  inner join mimiciii.icustays ie\n",
 17 |     "    on ce.icustay_id = ie.icustay_id\n",
 18 |     "\n",
 19 |     "  inner join mimiciii.admissions adm\n",
 20 |     "    on ce.hadm_id = adm.hadm_id\n",
 21 |     "  inner join mimiciii.patients pat\n",
 22 |     "    on ce.subject_id = pat.subject_id\n",
 23 |     "\n",
 24 |     "  where ce.charttime between ie.intime and (ie.intime + interval '1 day')\n",
 25 |     "  and extract(EPOCH from (ie.intime - pat.dob)) >= (60*60*24*12*15) -- older than 15, i.e. an adult\n",
 26 |     "  and itemid in\n",
 27 |     "  (\n",
 28 |     "  618, --\tRespiratory Rate\n",
 29 |     "  220210, --\tRespiratory Rate\n",
 30 |     "\n",
 31 |     "  211, --\tHeart Rate\n",
 32 |     "  220045 --\tHeart Rate\n",
 33 |     "  )\n",
 34 |     "  order by icustay_id, charttime\n",
 35 |     ") To '/data/mimic3/mimic-hr-rr.csv' With CSV HEADER;\n",
 36 |     "```\n",
 37 |     "\n",
 38 |     "We now assume that the data is in a csv file called `mimic-hr-rr.csv` in the local directory."
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 1,
 44 |    "metadata": {
 45 |     "collapsed": false
 46 |    },
 47 |    "outputs": [
 48 |     {
 49 |      "data": {
 50 |       "text/plain": [
 51 |        "icustay_id\t   charttime\t      itemid\t    valuenum\thospital_expire_flag\t\n",
 52 |        "      200001\t   796924.80\t      220210\t          22\t0\n",
 53 |        "      200001\t   796924.80\t      220045\t         114\t0\n",
 54 |        "      200001\t   796924.80\t      220210\t          26\t0\n",
 55 |        "      200001\t   796924.83\t      220045\t         113\t0\n",
 56 |        "      200001\t   796924.83\t      220210\t          20\t0"
 57 |       ]
 58 |      },
 59 |      "execution_count": 1,
 60 |      "metadata": {},
 61 |      "output_type": "execute_result"
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "% load the data\n",
 66 |     "\n",
 67 |     "fp = fopen('mimic-hr-rr.csv');\n",
 68 |     "header = fgetl(fp);\n",
 69 |     "\n",
 70 |     "% convert header from a string to a cell array of strings\n",
 71 |     "header = regexp(header,',','split');\n",
 72 |     "\n",
 73 |     "frmt = '%f%s%f%f%f';\n",
 74 |     "data = textscan(fp,frmt,'delimiter',',');\n",
 75 |     "fclose(fp);\n",
 76 |     "\n",
 77 |     "% convert the date string into a MATLAB's numeric format\n",
 78 |     "data{2} = datenum(data{2},'yyyy-mm-dd HH:MM:SS');\n",
 79 |     "\n",
 80 |     "% now we can convert data from a cell array to a matrix\n",
 81 |     "data = [data{:}];\n",
 82 |     "\n",
 83 |     "% here's a preview of the data ('\\t' is a tab)\n",
 84 |     "fprintf('%12s\\t',header{:});\n",
 85 |     "fprintf('\\n')\n",
 86 |     "\n",
 87 |     "frmt = '%12g\\t%12.2f\\t%12g\\t%12g\\t%1d';\n",
 88 |     "for n=1:5\n",
 89 |     "    fprintf(frmt,data(n,:));\n",
 90 |     "    fprintf('\\n');\n",
 91 |     "end"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "In the above, you can see:\n",
 99 |     "\n",
100 |     "* `ICUSTAY_ID` - This is the unique integer which identifies an ICU stay.\n",
101 |     "* `CHARTTIME` - This is the time at which a measurement is recorded. It represents the number of days since January 0, 0000.\n",
102 |     "* `ITEMID` - This is a unique integer which represents the type of data recorded. 220210 is respiratory rate, and 220045 is heart rate.\n",
103 |     "* `VALUENUM` - This is the actual value of the measurement. So we can see that `ICUSTAY_ID` 200001 had a respiratory rate of 22 breaths per minute (we have not included the unit of measurement here, but it is in the database if you are interested in confirming this).\n",
104 |     "* `HOSPITAL_EXPIRE_FLAG` - This indicates whether the patient died in the hospital (1 is death at hospital discharge).\n",
105 |     "\n",
106 |     "We can plot the first patient's data as follows:"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": false
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "id = 200001; % which icustay_id we'd like to plot\n",
118 |     "\n",
119 |     "idxID = data(:,1) == id; % only plot data for 1 patient\n",
120 |     "idxHR = data(:,3) == 211 | data(:,3) == 220045;\n",
121 |     "idxRR = data(:,3) == 618 | data(:,3) == 220210;\n",
122 |     "\n",
123 |     "figure(1); hold all;\n",
124 |     "plot(data(idxID & idxHR,2),data(idxID & idxHR,4),'-',...\n",
125 |     "    'Linewidth',2,'Color',[0.8906, 0.1016, 0.1094]);\n",
126 |     "plot(data(idxID & idxRR,2),data(idxID & idxRR,4),'-',...\n",
127 |     "    'Linewidth',2,'Color',[0.2148, 0.4922, 0.7188]);"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {},
133 |    "source": [
134 |     "Above we can see the heart rate in red and the respiratory rate in blue. The bottom axis is the days since January 0, 0000 - a bit hard to interpret but we can see that the data spans 1 day."
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "# Extracting data\n",
142 |     "\n",
143 |     "Now we have plotted the data for a few patients and have an idea of what it looks like. We'd like to extract some data which is useable in our machine learning classifiers. That means we need to convert this *time-series* into a *design matrix*."
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {
150 |     "collapsed": false
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "% We can use sorting to get the maximum and minimum value\n",
155 |     "% This is quite complicated syntax - we need to perform vectorized operations\n",
156 |     "% Note: this type of task is *much* easier in SQL!\n",
157 |     "\n",
158 |     "[id_unique, idxID] = unique(data(:,1)); % get a list of all unique ICUSTAY_IDs\n",
159 |     "X = nan(size(id_unique,1),4);\n",
160 |     "\n",
161 |     "idxHR = data(:,3) == 211 | data(:,3) == 220045;\n",
162 |     "idxRR = data(:,3) == 618 | data(:,3) == 220210;\n",
163 |     "\n",
164 |     "tic; % we time how long this process takes\n",
165 |     "\n",
166 |     "data_tmp = data(idxHR,:);\n",
167 |     "data_tmp = sortrows(data_tmp, [1,4]); % minimum HR is the first row for each ICUSTAY_ID\n",
168 |     "\n",
169 |     "[id_tmp,idxA] = unique(data_tmp(:,1));\n",
170 |     "[idxExist,idxMap] = ismember(id_unique, id_tmp);\n",
171 |     "X(idxMap(idxExist),1) = data_tmp(idxA,4);\n",
172 |     "\n",
173 |     "% Repeat for the *maximum* heart rate\n",
174 |     "data_tmp = sortrows(data_tmp, [1,-4]); % maximum HR is now the first row for each ICUSTAY_ID\n",
175 |     "X(idxMap(idxExist),2) = data_tmp(idxA,4);\n",
176 |     "\n",
177 |     "\n",
178 |     "% Repeat for respiratory rate\n",
179 |     "data_tmp = data(idxRR,:);\n",
180 |     "data_tmp = sortrows(data_tmp, [1,4]); % minimum RR is the first row for each ICUSTAY_ID\n",
181 |     "\n",
182 |     "[id_tmp,idxA] = unique(data_tmp(:,1));\n",
183 |     "[idxExist,idxMap] = ismember(id_unique, id_tmp);\n",
184 |     "X(idxMap(idxExist),3) = data_tmp(idxA,4);\n",
185 |     "\n",
186 |     "% Repeat for the *maximum* heart rate\n",
187 |     "data_tmp = sortrows(data_tmp, [1,-4]); % maximum RR is now the first row for each ICUSTAY_ID\n",
188 |     "X(idxMap(idxExist),4) = data_tmp(idxA,4);\n",
189 |     "\n",
190 |     "toc;\n",
191 |     "\n",
192 |     "% Clear variables so we don't accidentally use the wrong data in temp variables later on\n",
193 |     "clear data_tmp idxRR idxHR id_tmp idxA;\n",
194 |     "\n",
195 |     "% Preview of the data:\n",
196 |     "X(1:5,:)"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": null,
202 |    "metadata": {
203 |     "collapsed": false
204 |    },
205 |    "outputs": [],
206 |    "source": [
207 |     "% This is equivalent to the above cell, but using for loops\n",
208 |     "% It takes ~5-10 minutes to run\n",
209 |     "\n",
210 |     "[id_unique,idxID] = unique(data(:,1)); % get a list of all unique ICUSTAY_IDs\n",
211 |     "X_slow = nan(size(id_unique,1),4);\n",
212 |     "\n",
213 |     "idxHR = data(:,3) == 211 | data(:,3) == 220045;\n",
214 |     "idxRR = data(:,3) == 618 | data(:,3) == 220210;\n",
215 |     "\n",
216 |     "tic; % we time how long this takes\n",
217 |     "\n",
218 |     "for n=1:size(id_unique,1)\n",
219 |     "    idxCurrentID = data(:,1) == id_unique(n);\n",
220 |     "    \n",
221 |     "    idx = idxCurrentID & idxHR;\n",
222 |     "    if any(idx)\n",
223 |     "        X_slow(n,1) = min(data(idx,4));\n",
224 |     "        X_slow(n,2) = max(data(idx,4));\n",
225 |     "    end\n",
226 |     "    \n",
227 |     "    idx = idxCurrentID & idxRR;\n",
228 |     "    if any(idx)\n",
229 |     "        X_slow(n,3) = min(data(idx,4));\n",
230 |     "        X_slow(n,4) = max(data(idx,4));\n",
231 |     "    end\n",
232 |     "end\n",
233 |     "\n",
234 |     "toc;\n",
235 |     "\n",
236 |     "% Clear variables so we don't accidentally use the wrong data in temp variables later on\n",
237 |     "clear idxRR idxHR idxCurrentID idx;\n",
238 |     "\n",
239 |     "% let's show a preview of X:\n",
240 |     "X(1:5,:)"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": null,
246 |    "metadata": {
247 |     "collapsed": false
248 |    },
249 |    "outputs": [],
250 |    "source": [
251 |     "y = data(idxID,5); % get the outcome for each patient\n",
252 |     "\n",
253 |     "% plot the variables against each other, coloured by their outcome\n",
254 |     "figure(1); clf; hold all;\n",
255 |     "plot(X(y==1,1), X(y==1,2),'x',...\n",
256 |     "    'Linewidth',2,'Color',[0.8906, 0.1016, 0.1094]);\n",
257 |     "plot(X(y==0,1), X(y==0,2),'o',...\n",
258 |     "    'Linewidth',2,'Color',[0.2148, 0.4922, 0.7188]);\n",
259 |     "\n",
260 |     "xlabel('Lowest heart rate');\n",
261 |     "ylabel('Highest heart rate');"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "markdown",
266 |    "metadata": {},
267 |    "source": [
268 |     "Above we can see most people have heart rates between 0-200, except a few with heart rates around 1000 and one with a highest heart rate of 5500. Clearly these are not physiological - you'll find these \"outliers\" frequently in medical data - it's a consequence of the secondary nature of our analysis. It's obvious to any care provider that these are not possible, so they are ignored during routine care, and not sanitized in the database. We have to fix them ourselves! For now, we can ignore these and set the limits on our plot. Later, we will preprocess these data appropriately."
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "code",
273 |    "execution_count": null,
274 |    "metadata": {
275 |     "collapsed": false
276 |    },
277 |    "outputs": [],
278 |    "source": [
279 |     "y = data(idxID,5); % get the outcome for each patient\n",
280 |     "\n",
281 |     "% plot the variables against each other, coloured by their outcome\n",
282 |     "figure(1); clf; hold all;\n",
283 |     "plot(X(y==1,1), X(y==1,2),'x',...\n",
284 |     "    'Linewidth',2,'Color',[0.8906, 0.1016, 0.1094]);\n",
285 |     "plot(X(y==0,1), X(y==0,2),'o',...\n",
286 |     "    'Linewidth',2,'Color',[0.2148, 0.4922, 0.7188]);\n",
287 |     "\n",
288 |     "xlabel('Lowest heart rate');\n",
289 |     "ylabel('Highest heart rate');\n",
290 |     "\n",
291 |     "% change the axis to reasonable limits\n",
292 |     "set(gca,'XLim',[0,240],'YLim',[0,240]);"
293 |    ]
294 |   },
295 |   {
296 |    "cell_type": "markdown",
297 |    "metadata": {},
298 |    "source": [
299 |     "Now we have a wonderful blob of data. This is because there are so many data points! What a wonderful problem to have. We limit the plot to 200 data points (100 in each class, survived or died in hospital) - this will give us a better visualization. We pick these data points randomly."
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {
306 |     "collapsed": false
307 |    },
308 |    "outputs": [],
309 |    "source": [
310 |     "y = data(idxID,5); % get the outcome for each patient\n",
311 |     "\n",
312 |     "N_DATA_POINTS = 100; % Number of data points to plot for each class - must be less than 6342, the number of deaths\n",
313 |     "\n",
314 |     "rng(777,'twister'); % fix the random number seed so everyone's plots look identical\n",
315 |     "\n",
316 |     "idx0 = find(y==0);\n",
317 |     "[~,idxRand] = sort(rand(size(idx0,1),1),1);\n",
318 |     "idx0 = idx0(idxRand(1:N_DATA_POINTS));\n",
319 |     "\n",
320 |     "idx1 = find(y==1);\n",
321 |     "[~,idxRand] = sort(rand(size(idx1,1),1),1);\n",
322 |     "idx1 = idx1(idxRand(1:N_DATA_POINTS));\n",
323 |     "\n",
324 |     "% plot the variables against each other, coloured by their outcome\n",
325 |     "figure(1); clf; hold all;\n",
326 |     "plot(X(idx1,1), X(idx1,2),'x',...\n",
327 |     "    'Linewidth',2,'Color',[0.8906, 0.1016, 0.1094]);\n",
328 |     "plot(X(idx0,1), X(idx0,2),'o',...\n",
329 |     "    'Linewidth',2,'Color',[0.2148, 0.4922, 0.7188]);\n",
330 |     "\n",
331 |     "xlabel('Lowest heart rate');\n",
332 |     "ylabel('Highest heart rate');\n",
333 |     "\n",
334 |     "% change the axis to reasonable limits\n",
335 |     "set(gca,'XLim',[0,240],'YLim',[0,240]);\n"
336 |    ]
337 |   }
338 |  ],
339 |  "metadata": {
340 |   "kernelspec": {
341 |    "display_name": "Matlab",
342 |    "language": "matlab",
343 |    "name": "matlab_kernel"
344 |   },
345 |   "language_info": {
346 |    "codemirror_mode": "Octave",
347 |    "file_extension": ".m",
348 |    "help_links": [
349 |     {
350 |      "text": "MetaKernel Magics",
351 |      "url": "https://github.com/calysto/metakernel/blob/master/metakernel/magics/README.md"
352 |     }
353 |    ],
354 |    "mimetype": "text/x-matlab",
355 |    "name": "octave"
356 |   }
357 |  },
358 |  "nbformat": 4,
359 |  "nbformat_minor": 0
360 | }
361 | 


--------------------------------------------------------------------------------
/mlcc/lab1-data-extraction/mlcc1_introduction.m:
--------------------------------------------------------------------------------
 1 | %% Run the following to connect to the database
 2 | 
 3 | % STEP 1: Tell Matlab where the driver is
 4 | javaclasspath('sqlite-jdbc-3.8.11.2.jar') % use this for SQLite
 5 | 
 6 | % STEP 2: Connect to the Database
 7 | conn = database('','','',...
 8 |     'org.sqlite.JDBC',['jdbc:sqlite:' pwd filesep 'data' filesep 'mimiciii_v1_3_mini.sqlite']);
 9 | 
10 | %% Take a look at "mlcc-query-1.sql" - this extracts a single value for each patient
11 | % More specifically, it extracts the highest heart rate
12 | query = makeQuery('mlcc-query-1.sql');
13 | data = fetch(conn,query);
14 | 
15 | %% Plot a histogram of the highest heart rate values
16 | 
17 | 
18 | %% Plot the highest heart rate against the highest respiratory rate
19 | 
20 | 
21 | %% See if you can add in the highest GCS
22 | % Then, run the query here to get the results.
23 | query = makeQuery(''); % put the filename here
24 | data = fetch(conn,query);
25 | 
26 | 
27 | %% Plot the highest heart rate against the highest respiratory rate, colouring by outcome
28 | % The patient outcome is stored in "HOSPITAL_EXPIRE_FLAG" - the 4th column
29 | 
30 | 
31 | %% Plot the highest heart rate against the highest GCS, colouring by outcome
32 | % Which variable do you feel discriminates mortality better?
33 | 
34 | 
35 | %% What other variables could you add which might help?
36 | 
37 | 
38 | %% Build a logistic regression to classify mortality
39 | % This is equivalent to drawing a line of separation
40 | 
41 | % See: glmfit
42 | % e.g. b = glmfit(X,y,'binomial')
43 | 
44 | 


--------------------------------------------------------------------------------
/mlcc/lab2-intro-ml/README.md:
--------------------------------------------------------------------------------
1 | 
2 | # Important notes
3 | 
4 | * If we use LIBSVM, we should provide pre-compiled binaries for all the operating systems
5 |     * Linux - This was easy for me on Ubuntu, but I had to change the path in the Makefile and run it from command line (running make.m did *not* work, it compiled but didn't link the library correctly)
6 |     * Mac OSX - The lab laptop doesn't have a compiler, need to sort that out..
7 |     * Windows - It comes with pre-compiled binaries
8 | * Need to add `libsvm/matlab` to the path
9 | 


--------------------------------------------------------------------------------
/mlcc/lab2-intro-ml/mlcc2-query.sql:
--------------------------------------------------------------------------------
 1 | with pvt as
 2 | (
 3 |   select ie.subject_id, ie.hadm_id, ie.icustay_id, ie.intime
 4 |   , case
 5 |     when itemid in (456,52,6702,443,220052,220181,225312) then 1 -- MeanBP
 6 |     when itemid in (615,618) then 2 -- RespRate
 7 | 
 8 |     when itemid in (723,223900) then 10 -- GCSVerbal
 9 |     when itemid in (454,223901) then 11 -- GCSMotor
10 |     when itemid in (184,220739) then 12 -- GCSEyes
11 |     else null end as VitalID
12 |   , valuenum
13 |   from icustays ie
14 |   left join chartevents ce
15 |   on ie.subject_id = ce.subject_id and ie.hadm_id = ce.hadm_id and ie.icustay_id = ce.icustay_id
16 |   and ce.charttime >= ie.intime and ce.charttime <= date(ie.intime,'+1 day')
17 |   where ce.itemid in
18 |   (
19 |  723, -- GCSVerbal
20 |  454, -- GCSMotor
21 |  184, -- GCSEyes
22 | 
23 |  223900, -- GCS - Verbal Response
24 |  223901, -- GCS - Motor Response
25 |  220739, -- GCS - Eye Opening
26 |   618, --	Respiratory Rate
27 |   615, --	Resp Rate (Total)
28 |   456, --"NBP Mean"
29 |   52, --"Arterial BP Mean"
30 |   6702, --	Arterial BP Mean #2
31 |   443, --	Manual BP Mean(calc)
32 |   220052, --"Arterial Blood Pressure mean"
33 |   220181, --"Non Invasive Blood Pressure mean"
34 |   225312 --"ART BP mean"
35 |   )
36 | )
37 | SELECT pvt.icustay_id
38 | , adm.HOSPITAL_EXPIRE_FLAG -- whether the patient died within the hospital
39 | , round( (julianday(pvt.intime) - julianday(pat.dob))/365.24, 4) as Age
40 | , min(case when VitalID = 1 then valuenum else null end) as MeanBP_Min
41 | , max(case when VitalID = 2 then valuenum else null end) as RespRate_Max
42 | FROM pvt
43 | inner join patients pat
44 | on pvt.subject_id = pat.subject_id
45 | inner join admissions adm
46 | on pvt.hadm_id = adm.hadm_id
47 | group by pvt.icustay_id, pvt.hadm_id, adm.HOSPITAL_EXPIRE_FLAG, pvt.intime, pat.dob
48 | order by pvt.icustay_id;
49 | 


--------------------------------------------------------------------------------
/mlcc/lab4-mortality-prediction/matlab_postgres_connection.m:
--------------------------------------------------------------------------------
 1 | % Tell Matlab where the driver is
 2 | javaclasspath('postgresql-9.4.1207.jar') % note we are using a postgres driver
 3 | 
 4 | 
 5 | %% Initiate our database connection with Amazon
 6 | username = '';
 7 | password = '';
 8 | 
 9 | % Connect to the Database
10 | conn = database('mimic',username,password,...
11 |     'Vendor','PostgreSQL',...
12 |     'Server','localhost',...
13 |     'PortNumber',5432);
14 | 
15 | 
16 | %% create and run a query
17 | query = 'select * from patients limit 10';
18 | data = fetch(conn,query);
19 | 
20 | %% close the connection
21 | close(conn);


--------------------------------------------------------------------------------
/mlcc/lab4-mortality-prediction/mlcc-extract-data.sql:
--------------------------------------------------------------------------------
 1 | with ce as
 2 | (
 3 |   select
 4 |     icustay_id, charttime, itemid, valuenum
 5 |   from chartevents
 6 |   -- specify what data we want from chartevents
 7 |   where itemid in
 8 |   (
 9 |   211, -- Heart Rate
10 |   618, --	Respiratory Rate
11 |   615 --	Resp Rate (Total)
12 |   )
13 |   -- how did we know heart rates were stored using ITEMID 211? Simple, we looked in D_ITEMS!
14 |   -- Try it for yourself: select * from d_items where lower(label) like '%heart rate%'
15 | )
16 | select
17 |   -- ICUSTAY_ID identifies each unique patient ICU stay
18 |   -- note that if the same person stays in the ICU more than once, each stay would have a *different* ICUSTAY_ID
19 |   -- however, since it's the same person, all those stays would have the same SUBJECT_ID
20 |   ie.icustay_id
21 | 
22 |   -- this is the outcome of interest: in-hospital mortality
23 |   , max(adm.HOSPITAL_EXPIRE_FLAG) as OUTCOME
24 | 
25 |   -- this is a case statement - essentially an "if, else" clause
26 |   , min(
27 |       case
28 |         -- if the itemid is 211
29 |         when itemid = 211
30 |           -- then return the actual value stored in VALUENUM
31 |           then valuenum
32 |         -- otherwise, return 'null', which is SQL standard for an empty value
33 |         else null
34 |       -- end the case statement
35 |       end
36 |     ) as HeartRate_Min
37 | 
38 |     -- note we wrapped the above in "min()"
39 |     -- this takes the minimum of all values inside, and *ignores* nulls
40 |     -- by calling this on our case statement, we are ignoring all values except those with ITEMID = 211
41 |     -- since ITEMID 211 are heart rates, we take the minimum of only heart rates
42 | 
43 |   , max(case when itemid = 211 then valuenum else null end) as HeartRate_Max
44 |   , min(case when itemid in (615,618) then valuenum else null end) as RespRate_Min
45 |   , max(case when itemid in (615,618) then valuenum else null end) as RespRate_Max
46 | from icustays ie
47 | 
48 | -- join to the admissions table to get hospital outcome
49 | inner join admissions adm
50 |   on ie.hadm_id = adm.hadm_id
51 | 
52 | -- join to the chartevents table to get the observations
53 | left join ce
54 |   -- match the tables on the patient identifier
55 |   on ie.icustay_id = ce.icustay_id
56 |   -- and require that the observation be made after the patient is admitted to the ICU
57 |   and ce.charttime >= ie.intime
58 |   -- and *before* their admission time + 1 day, i.e. the observation must be made on their first day in the ICU
59 |   and ce.charttime <= ie.intime + interval '1' day
60 | group by ie.icustay_id
61 | order by ie.icustay_id;
62 | 


--------------------------------------------------------------------------------
/mlcc/lab4-mortality-prediction/mlcc-group-by-tutorial.sql:
--------------------------------------------------------------------------------
 1 | -- In a lot of the code that you will use today, you will need to group values.
 2 | -- What does it mean to group values?
 3 | -- Say you want my highest heart rate for the day...
 4 | -- that's equivalent to saying "I want the max heart rate *group by* ICUSTAY_ID"
 5 | 
 6 | -- Imagine you have a table with 2 columns and 5 rows
 7 | --  ICUSTAY_ID | HEART RATE
 8 | --      1      |    90
 9 | --      1      |    73
10 | --      2      |    84
11 | --      2      |    82
12 | --      2      |    81
13 | 
14 | -- Here we have two ICU stays (#1 and #2). We'd like their highest (maximum) heart rate.
15 | 
16 | -- If we take the max() of the 2nd column, we now have:
17 | --  5 rows in the first column
18 | --  ?? how many rows in the second column
19 | --  ICUSTAY_ID | max(HEART RATE)
20 | --      1      |    ?
21 | --      1      |    ?
22 | --      2      |    ?
23 | --      2      |    ?
24 | --      2      |    ?
25 | 
26 | -- The logical answer is we'd want to collapse heart rate by ICUSTAY_ID.
27 | -- I'm interested in each ICU stays highest heart rate - taking the maximum another way (e.g. across patients) doesn't make sense.
28 | -- To do this: we need to tell SQL how to *group* the max value
29 | -- If we say *group by* ICUSTAY_ID, then we tell SQL to group the heart rates according to ICUSTAY_ID
30 | 
31 | --  ICUSTAY_ID | max(HEART RATE)
32 | --      1      |    90
33 | --      2      |    84
34 | 
35 | -- In SQL, we specify this by adding in "group by" at the bottom of the query.
36 | 
37 | 
38 | -- Let's try it for something simple: let's find the first time a patient entered the ICU
39 | select
40 |   -- ICUSTAY_ID identifies each unique patient ICU stay
41 |   -- note that if the same person stays in the ICU more than once, each stay would have a *different* ICUSTAY_ID
42 |   -- however, since it's the same person, all those stays would have the same SUBJECT_ID
43 |   icustay_id
44 | 
45 |   -- this is the lowest intime
46 |   -- since 'intime' is a date, the lowest intime is conceptually the same as the earliest intime
47 |   , min(intime) as MinimumINTIME
48 | 
49 | from icustays ie
50 | group by icustay_id;
51 | 


--------------------------------------------------------------------------------
/mlcc/lab4-mortality-prediction/mlcc_mortality_prediction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# Import libraries\n",
 12 |     "import numpy as np\n",
 13 |     "import pandas as pd\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import psycopg2\n",
 16 |     "from sklearn.pipeline import Pipeline\n",
 17 |     "\n",
 18 |     "# used for train/test splits\n",
 19 |     "from sklearn.cross_validation import train_test_split\n",
 20 |     "\n",
 21 |     "# used to impute mean for data\n",
 22 |     "from sklearn.preprocessing import Imputer\n",
 23 |     "\n",
 24 |     "# logistic regression is our model of choice\n",
 25 |     "from sklearn.linear_model import LogisticRegression\n",
 26 |     "from sklearn.linear_model import LogisticRegressionCV\n",
 27 |     "\n",
 28 |     "# used to calculate AUROC/accuracy\n",
 29 |     "from sklearn import metrics\n",
 30 |     "\n",
 31 |     "# used to create confusion matrix\n",
 32 |     "from sklearn.metrics import confusion_matrix\n",
 33 |     "\n",
 34 |     "from sklearn.cross_validation import cross_val_score\n",
 35 |     "\n",
 36 |     "\n",
 37 |     "%matplotlib inline"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 2,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# Connect to MIMIC\n",
 47 |     "# be sure to add the password as appropriate!\n",
 48 |     "con = psycopg2.connect(dbname='MIMIC', user='workshop', password=''\n",
 49 |     "                       , host='<xxxxx>.amazonaws.com'\n",
 50 |     "                       , port=5432)\n",
 51 |     "cur = con.cursor()\n",
 52 |     "cur.execute('SET search_path to ''mimiciii_workshop''')"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 3,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stdout",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "   icustay_id  outcome  heartrate_min  heartrate_max  resprate_min  \\\n",
 65 |       "0      200006        0             62             84            14   \n",
 66 |       "1      200030        0             83            115            11   \n",
 67 |       "2      200068        0             67            112            20   \n",
 68 |       "3      200071        0            118            130            16   \n",
 69 |       "4      200102        1             71             87            13   \n",
 70 |       "\n",
 71 |       "   resprate_max  \n",
 72 |       "0            27  \n",
 73 |       "1            28  \n",
 74 |       "2            24  \n",
 75 |       "3            25  \n",
 76 |       "4            32  \n"
 77 |      ]
 78 |     }
 79 |    ],
 80 |    "source": [
 81 |     "query = \"\"\"\n",
 82 |     "with ce as\n",
 83 |     "(\n",
 84 |     "  select\n",
 85 |     "    icustay_id, charttime, itemid, valuenum\n",
 86 |     "  from chartevents\n",
 87 |     "  -- specify what data we want from chartevents\n",
 88 |     "  where itemid in\n",
 89 |     "  (\n",
 90 |     "  211, -- Heart Rate\n",
 91 |     "  618, --\tRespiratory Rate\n",
 92 |     "  615 --\tResp Rate (Total)\n",
 93 |     "  )\n",
 94 |     "  -- how did we know heart rates were stored using ITEMID 211? Simple, we looked in D_ITEMS!\n",
 95 |     "  -- Try it for yourself: select * from d_items where lower(label) like '%heart rate%'\n",
 96 |     ")\n",
 97 |     "select\n",
 98 |     "  -- ICUSTAY_ID identifies each unique patient ICU stay\n",
 99 |     "  -- note that if the same person stays in the ICU more than once, each stay would have a *different* ICUSTAY_ID\n",
100 |     "  -- however, since it's the same person, all those stays would have the same SUBJECT_ID\n",
101 |     "  ie.icustay_id\n",
102 |     "\n",
103 |     "  -- this is the outcome of interest: in-hospital mortality\n",
104 |     "  , max(adm.HOSPITAL_EXPIRE_FLAG) as OUTCOME\n",
105 |     "\n",
106 |     "  -- this is a case statement - essentially an \"if, else\" clause\n",
107 |     "  , min(\n",
108 |     "      case\n",
109 |     "        -- if the itemid is 211\n",
110 |     "        when itemid = 211\n",
111 |     "          -- then return the actual value stored in VALUENUM\n",
112 |     "          then valuenum\n",
113 |     "        -- otherwise, return 'null', which is SQL standard for an empty value\n",
114 |     "        else null\n",
115 |     "      -- end the case statement\n",
116 |     "      end\n",
117 |     "    ) as HeartRate_Min\n",
118 |     "\n",
119 |     "    -- note we wrapped the above in \"min()\"\n",
120 |     "    -- this takes the minimum of all values inside, and *ignores* nulls\n",
121 |     "    -- by calling this on our case statement, we are ignoring all values except those with ITEMID = 211\n",
122 |     "    -- since ITEMID 211 are heart rates, we take the minimum of only heart rates\n",
123 |     "\n",
124 |     "  , max(case when itemid = 211 then valuenum else null end) as HeartRate_Max\n",
125 |     "  , min(case when itemid in (615,618) then valuenum else null end) as RespRate_Min\n",
126 |     "  , max(case when itemid in (615,618) then valuenum else null end) as RespRate_Max\n",
127 |     "from icustays ie\n",
128 |     "\n",
129 |     "-- join to the admissions table to get hospital outcome\n",
130 |     "inner join admissions adm\n",
131 |     "  on ie.hadm_id = adm.hadm_id\n",
132 |     "\n",
133 |     "-- join to the chartevents table to get the observations\n",
134 |     "left join ce\n",
135 |     "  -- match the tables on the patient identifier\n",
136 |     "  on ie.icustay_id = ce.icustay_id\n",
137 |     "  -- and require that the observation be made after the patient is admitted to the ICU\n",
138 |     "  and ce.charttime >= ie.intime\n",
139 |     "  -- and *before* their admission time + 1 day, i.e. the observation must be made on their first day in the ICU\n",
140 |     "  and ce.charttime <= ie.intime + interval '1' day\n",
141 |     "group by ie.icustay_id\n",
142 |     "order by ie.icustay_id\n",
143 |     "\"\"\"\n",
144 |     "\n",
145 |     "data = pd.read_sql_query(query,con)\n",
146 |     "print(data.head())"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 4,
152 |    "metadata": {
153 |     "collapsed": true
154 |    },
155 |    "outputs": [],
156 |    "source": [
157 |     "# close the connection as we are done loading data from server\n",
158 |     "cur.close()\n",
159 |     "con.close()"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 5,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "# move from a data frame into a numpy array\n",
169 |     "X = data.values\n",
170 |     "y = X[:,1]\n",
171 |     "\n",
172 |     "# delete first 2 columns: the ID and the outcome\n",
173 |     "X = np.delete(X,0,axis=1)\n",
174 |     "X = np.delete(X,0,axis=1)"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 6,
180 |    "metadata": {},
181 |    "outputs": [
182 |     {
183 |      "name": "stdout",
184 |      "output_type": "stream",
185 |      "text": [
186 |       "Accuracy = 0.784267912773\n",
187 |       "AUROC = 0.642288212031\n",
188 |       "\n",
189 |       "Confusion matrix\n",
190 |       "[[977  17]\n",
191 |       " [260  30]]\n",
192 |       "\n",
193 |       "Classification report\n",
194 |       "             precision    recall  f1-score   support\n",
195 |       "\n",
196 |       "        0.0       0.79      0.98      0.88       994\n",
197 |       "        1.0       0.64      0.10      0.18       290\n",
198 |       "\n",
199 |       "avg / total       0.76      0.78      0.72      1284\n",
200 |       "\n"
201 |      ]
202 |     }
203 |    ],
204 |    "source": [
205 |     "# evaluate a logistic regression model using an 80%-20% training/test split\n",
206 |     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
207 |     "\n",
208 |     "# impute mean for missing values\n",
209 |     "imp = Imputer(missing_values='NaN', strategy='mean', axis=0)\n",
210 |     "imp.fit(X_train)\n",
211 |     "\n",
212 |     "X_train = imp.transform(X_train)\n",
213 |     "X_test = imp.transform(X_test)\n",
214 |     "\n",
215 |     "model = LogisticRegression(fit_intercept=True)\n",
216 |     "model = model.fit(X_train, y_train)\n",
217 |     "\n",
218 |     "# predict class labels for the test set\n",
219 |     "y_pred = model.predict(X_test)\n",
220 |     "\n",
221 |     "# generate class probabilities\n",
222 |     "y_prob = model.predict_proba(X_test)\n",
223 |     "\n",
224 |     "# generate evaluation metrics\n",
225 |     "print('Accuracy = {}'.format(metrics.accuracy_score(y_test, y_pred)))\n",
226 |     "print('AUROC = {}'.format(metrics.roc_auc_score(y_test, y_prob[:, 1])))\n",
227 |     "\n",
228 |     "print('\\nConfusion matrix')\n",
229 |     "print(metrics.confusion_matrix(y_test, y_pred))\n",
230 |     "print('\\nClassification report')\n",
231 |     "print(metrics.classification_report(y_test, y_pred))"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 8,
237 |    "metadata": {},
238 |    "outputs": [
239 |     {
240 |      "name": "stdout",
241 |      "output_type": "stream",
242 |      "text": [
243 |       "AUROC for all folds:\n",
244 |       "[ 0.632241    0.66711432  0.65462583  0.63505984  0.64856111]\n",
245 |       "Average AUROC across folds:\n",
246 |       "0.647520418729\n"
247 |      ]
248 |     }
249 |    ],
250 |    "source": [
251 |     "# evaluate a logistic regression with L1 regularization\n",
252 |     "\n",
253 |     "# evaluate the model using 5-fold cross-validation\n",
254 |     "# see: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter\n",
255 |     "# for list of scoring parameters\n",
256 |     "\n",
257 |     "estimator = Pipeline([(\"imputer\", Imputer(missing_values='NaN',\n",
258 |     "                                          strategy=\"mean\",\n",
259 |     "                                          axis=0)),\n",
260 |     "                      (\"regression\", LogisticRegressionCV(penalty='l1',\n",
261 |     "                                                          cv=5,\n",
262 |     "                                                          scoring='roc_auc',\n",
263 |     "                                                          solver='liblinear'))])\n",
264 |     "\n",
265 |     "scores = cross_val_score(estimator\n",
266 |     "                         , X, y\n",
267 |     "                         , scoring='roc_auc', cv=5)\n",
268 |     "\n",
269 |     "\n",
270 |     "print('AUROC for all folds:')\n",
271 |     "print(scores)\n",
272 |     "print('Average AUROC across folds:')\n",
273 |     "print(scores.mean())"
274 |    ]
275 |   }
276 |  ],
277 |  "metadata": {
278 |   "kernelspec": {
279 |    "display_name": "Python 2",
280 |    "language": "python",
281 |    "name": "python2"
282 |   },
283 |   "language_info": {
284 |    "codemirror_mode": {
285 |     "name": "ipython",
286 |     "version": 2
287 |    },
288 |    "file_extension": ".py",
289 |    "mimetype": "text/x-python",
290 |    "name": "python",
291 |    "nbconvert_exporter": "python",
292 |    "pygments_lexer": "ipython2",
293 |    "version": "2.7.13"
294 |   }
295 |  },
296 |  "nbformat": 4,
297 |  "nbformat_minor": 1
298 | }
299 | 


--------------------------------------------------------------------------------
/mlcc/lab4-mortality-prediction/mlcc_mortality_prediction.m:
--------------------------------------------------------------------------------
  1 | %% Build a better mortality prediction model
  2 | 
  3 | password = ''; % ask a demonstrator for the password to the instance
  4 | 
  5 | 
  6 | % Tell Matlab where the driver is
  7 | javaclasspath('postgresql-9.4.1207.jar') % note we are using a postgres driver
  8 | %% Initiate our database connection with Amazon
  9 | % Connect to the Database
 10 | conn = database('MIMIC','workshop',password,...
 11 |     'Vendor','PostgreSQL',...
 12 |     'Server','<xxxxx>.amazonaws.com',...
 13 |     'PortNumber',5432);
 14 | 
 15 | if isempty(conn.Message)
 16 |     % nothing went wrong hurray
 17 |     fprintf('Connected to the database!\n');
 18 | else
 19 |     switch conn.Message
 20 |         case 'Unable to find JDBC driver.'
 21 |             error('You do not have the JDBC driver installed. Please ensure MATLAB can find the .jar file.');
 22 |         case 'The server requested password-based authentication, but no password was provided.'
 23 |             error('Please enter the password provided to you in the password variable at the top of the script.');
 24 |         otherwise
 25 |             error(conn.Message)
 26 |     end
 27 | end
 28 | 
 29 | % NOTE: below is how we used to connect to the local sqlite file
 30 | %   javaclasspath('sqlite-jdbc-3.8.11.2.jar') % use this for SQLite
 31 | %   conn = database('','','',...
 32 | %     'org.sqlite.JDBC',['jdbc:sqlite:' pwd filesep 'data' filesep 'mimiciii_v1_3_demo.sqlite']);
 33 | 
 34 | % it's convenient to have our database connection return "dataset" data
 35 | % we can extract header information from dataset outputs
 36 | setdbprefs('DataReturnFormat','dataset')
 37 | 
 38 | %% Extract the patient data using the query
 39 | % *Highly advised* to not extract your data all at once in one query
 40 | % That way if you find a typo, you only need to re-run a subcomponent,
 41 | % not the entire data extraction process!
 42 | 
 43 | % read the text from the file
 44 | query = makeQuery('mlcc-extract-data.sql');
 45 | 
 46 | % run the query on the database connection
 47 | tic;
 48 | data = fetch(conn,query);
 49 | toc;
 50 | 
 51 | %% (Optional) convert the data from a dataset to an X design matrix
 52 | % first convert data to a cell array
 53 | data = dataset2cell(data);
 54 | 
 55 | % we can get the column names from the first row of the 'data' variable
 56 | header = data(1,:);
 57 | header = regexprep(header,'_',''); % remove underscores
 58 | 
 59 | % remove the header row from the data cell
 60 | data = data(2:end,:);
 61 | 
 62 | % MATLAB sometimes reads 'null' sometimes instead of NaN
 63 | data(cellfun(@isstr, data) & cellfun(@(x) strcmp(x,'null'), data)) = {NaN};
 64 | 
 65 | % MATLAB sometimes has blank cells which should be NaN
 66 | data(cellfun(@isempty, data)) = {NaN};
 67 | 
 68 | % Convert the data into a matrix of numbers
 69 | % This is a MATLAB data type thing - we can't do math with cell arrays
 70 | data = cell2mat(data);
 71 | 
 72 | 
 73 | X_id = data(:, strcmp(header,'ICUSTAYID'));
 74 | y = data(:, strcmp(header,'OUTCOME'));
 75 | 
 76 | X = data(:, ~ismember( header, {'ICUSTAYID','OUTCOME'}) );
 77 | X_header = header(~ismember( header, {'ICUSTAYID','OUTCOME'}));
 78 | 
 79 | %% Print out the first 5 rows of the data
 80 | W = 5; % the maximum number of columns to print at one time
 81 | % can set this wider for wider monitors
 82 | for o=1:floor(size(X,2)/W)
 83 |     idxColumn = (o-1)*W + 1 : o*W;
 84 |     if idxColumn(end) > size(X,2)
 85 |         idxColumn = idxColumn(1):size(X,2);
 86 |     end
 87 | 
 88 |     fprintf('%12s\t',X_header{idxColumn});
 89 |     fprintf('\n');
 90 |     for n=1:5
 91 |         for m=idxColumn
 92 |             fprintf('%12g\t',X(n, m));
 93 |         end
 94 |         fprintf('\n');
 95 |     end
 96 |     fprintf('\n');
 97 | end
 98 | 
 99 | 
100 | %% Inspect the data
101 | figure(1); clf; hold all;
102 | 
103 | % Box-plots are very useful for quickly looking for outliers, etc
104 | boxplot(X,'plotstyle','compact','labels',X_header);
105 | 
106 | %% Perform data preprocessing
107 | % correct ages, remove outliers, etc.
108 | 
109 | 
110 | %% Sub-sample the frequent class to balance the number in each class
111 | % This is not always needed - but some models do better with it
112 | % Alternatively, you could up-sample the infrequent class
113 | balanceData = false;
114 | 
115 | % optionally, we can balance the subsets
116 | if balanceData == true
117 |     N0 = sum(y_train==0);
118 |     N1 = sum(y_train==1);
119 | 
120 |     [~,idxRandomize] = sort(rand(N0,1));
121 |     idxKeep = find(y_train==0); % find all the negative outcomes
122 |     idxKeep = idxKeep(idxRandomize(1:N1)); % pick a random N1 negative outcomes
123 |     idxKeep = [find(y_train==1);idxKeep]; % add in the positive outcomes
124 |     idxKeep = sort(idxKeep); % probably not needed but it's cleaner
125 | else
126 |     idxKeep = true(size(X,1),1);
127 | end
128 | 
129 | X_train = X(idxKeep,:);
130 | y_train = y(idxKeep);
131 | 
132 | %% Create cross-fold validation indices
133 | K = 5; % how many folds
134 | 
135 | [~,idxK] = sort(rand(size(X_train,1),1));
136 | idxK = mod(idxK,K) + 1;
137 | 
138 | %% Train a classifier
139 | % Here is an example using logistic regression
140 | 
141 | auroc = zeros(1,K);
142 | 
143 | for k=1:K
144 |     idxDevelop  = idxK ~= k;
145 |     idxValidate = idxK == k;
146 | 
147 |     X_develop = X_train(idxDevelop,:);
148 |     y_develop = y_train(idxDevelop,:);
149 | 
150 |     X_validate = X_train(idxValidate,:);
151 |     y_validate = y_train(idxValidate,:);
152 | 
153 |     % Normalize and impute means for the data before training
154 | 
155 |     % Normalize the data
156 |     mu = nanmean(X_develop, 1);
157 |     sigma = nanstd(X_develop, [], 1);
158 |     X_develop = bsxfun(@minus, X_develop, mu);
159 |     X_develop = bsxfun(@rdivide, X_develop, sigma);
160 | 
161 |     X_validate = bsxfun(@minus, X_validate, mu);
162 |     X_validate = bsxfun(@rdivide, X_validate, sigma);
163 | 
164 |     % Impute the mean (equal to 0 since we normalized the mean to be 0)
165 |     X_develop(isnan(X_develop)) = 0;
166 |     X_validate(isnan(X_validate)) = 0;
167 | 
168 | 
169 |     % (Option 1). A logistic regression
170 |     model = glmfit(X_develop, y_develop, 'binomial');
171 |     y_hat = glmval(model, X_validate, 'logit');
172 | 
173 |     % (Option 2). An SVM
174 |     % model = svmtrain(y_develop, X_develop, '-q -t 2');
175 |     % [pred,~,y_hat] = svmpredict(y_validate, X_validate, model);
176 | 
177 |     % if (pred(1) == 0 && y_hat(1) > 0) || (pred(1) == 1 && y_hat(1) < 0)
178 |     %     % flip the sign of dist to ensure that the AUROC is calculated properly
179 |     %     % the AUROC expects predictions of 1 to be assigned increasing distances
180 |     %     y_hat = -y_hat;
181 |     % end
182 | 
183 |     % Calculate our performance metric: the AUROC.
184 |     [~, ~, auroc(k)] = calcRoc(y_hat, y_validate);
185 | end
186 | 
187 | fprintf('Mean AUROC across %d folds: %4.4f.\n',K, mean(auroc));
188 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MIT-LCP/mimic-workshop/b27eee438a1f62d909dd30d1d458d3516f32b276/requirements.txt


--------------------------------------------------------------------------------
/temp/02-example-patient-sepsis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exploring the trajectory of a single patient"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Import Python libraries"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "We first need to import some tools for working with data in Python. \n",
 22 |     "- NumPy is for working with numbers\n",
 23 |     "- Pandas is for analysing data\n",
 24 |     "- MatPlotLib is for making plots\n",
 25 |     "- Sqlite3 to connect to the database"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import numpy as np\n",
 37 |     "import pandas as pd\n",
 38 |     "import matplotlib.pyplot as plt\n",
 39 |     "import sqlite3\n",
 40 |     "%matplotlib inline"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### Connect to the database"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "- We can use the sqlite3 library to connect to the MIMIC database\n",
 55 |     "- Once the connection is established, we'll run a simple SQL query."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": true
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Connect to the MIMIC database\n",
 67 |     "conn = sqlite3.connect('data/mimicdata.sqlite')"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "collapsed": true
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "# Create our test query\n",
 79 |     "test_query = \"\"\"\n",
 80 |     "SELECT subject_id, hadm_id, admittime, dischtime, admission_type, diagnosis\n",
 81 |     "FROM admissions\n",
 82 |     "LIMIT 10;\n",
 83 |     "\"\"\""
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {
 90 |     "collapsed": true
 91 |    },
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "# Run the query and assign the results to a variable\n",
 95 |     "test = pd.read_sql_query(test_query,conn)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "# Display the first few rows\n",
107 |     "test.head()"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "### Load the chartevents data"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "- The chartevents table contains data charted at the patient bedside. It includes variables such as heart rate, respiratory rate, temperature, and so on.\n",
122 |     "- We'll begin by loading the chartevents data for a single patient."
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {
129 |     "collapsed": false
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "query = \"\"\"\n",
134 |     "SELECT de.icustay_id\n",
135 |     "  , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n",
136 |     "  , di.label\n",
137 |     "  , de.value\n",
138 |     "  , de.valuenum\n",
139 |     "  , de.uom\n",
140 |     "FROM chartevents de\n",
141 |     "INNER join d_items di\n",
142 |     "ON de.itemid = di.itemid\n",
143 |     "INNER join icustays ie\n",
144 |     "ON de.icustay_id = ie.icustay_id\n",
145 |     "WHERE de.subject_id = 40036\n",
146 |     "ORDER BY charttime;\n",
147 |     "\"\"\"\n",
148 |     "\n",
149 |     "ce = pd.read_sql_query(query,conn)\n"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {
156 |     "collapsed": false
157 |    },
158 |    "outputs": [],
159 |    "source": [
160 |     "# Preview the data\n",
161 |     "# Use 'head' to limit the number of rows returned\n",
162 |     "ce.head()"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "markdown",
167 |    "metadata": {},
168 |    "source": [
169 |     "### Review the patient's heart rate"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "- We can select individual columns using the column name. \n",
177 |     "- For example, if we want to select just the label column, we write **```ce.LABEL```** or alternatively **```ce['LABEL']```**"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {
184 |     "collapsed": false
185 |    },
186 |    "outputs": [],
187 |    "source": [
188 |     "# Select a single column\n",
189 |     "ce['LABEL']"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "markdown",
194 |    "metadata": {},
195 |    "source": [
196 |     "- In a similar way, we can select rows from data using indexes. \n",
197 |     "- For example, to select rows where the label is equal to 'Heart Rate', we would create an index using **```[ce.LABEL=='Heart Rate']```** "
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {
204 |     "collapsed": false
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "# Select just the heart rate rows using an index\n",
209 |     "ce[ce.LABEL=='Heart Rate']"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "markdown",
214 |    "metadata": {},
215 |    "source": [
216 |     "### Plot 1: How did the patients heart rate change over time?"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "metadata": {},
222 |    "source": [
223 |     "- Using the methods described above to select our data of interest, we can create our x and y axis values to create a time series plot of heart rate."
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "execution_count": null,
229 |    "metadata": {
230 |     "collapsed": false
231 |    },
232 |    "outputs": [],
233 |    "source": [
234 |     "# Which time stamps have a corresponding heart rate measurement?\n",
235 |     "print ce.index[ce.LABEL=='Heart Rate']"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {
242 |     "collapsed": false,
243 |     "scrolled": true
244 |    },
245 |    "outputs": [],
246 |    "source": [
247 |     "# Set x equal to the times\n",
248 |     "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n",
249 |     "\n",
250 |     "# Set y equal to the heart rates\n",
251 |     "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n",
252 |     "\n",
253 |     "# Plot time against heart rate\n",
254 |     "plt.figure(figsize=(14, 6))\n",
255 |     "plt.plot(x_hr,y_hr)\n",
256 |     "\n",
257 |     "\n",
258 |     "plt.xlabel('Time',fontsize=16)\n",
259 |     "plt.ylabel('Heart rate',fontsize=16)\n",
260 |     "plt.title('Heart rate over time from admission to the intensive care unit')"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "markdown",
265 |    "metadata": {},
266 |    "source": [
267 |     "### Task 1\n",
268 |     "\n",
269 |     "* What is happening to this patient's heart rate?\n",
270 |     "* Plot respiratory rate over time for the patient.\n",
271 |     "* Is there anything unusual about the patient's respiratory rate?\n"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": null,
277 |    "metadata": {
278 |     "collapsed": true
279 |    },
280 |    "outputs": [],
281 |    "source": [
282 |     "# Exercise 1 here\n",
283 |     "\n"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "markdown",
288 |    "metadata": {},
289 |    "source": [
290 |     "### Plot 2: Did the patient's vital signs breach any alarm thresholds?"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "- Alarm systems in the intensive care unit are commonly based on high and low thresholds defined by the carer.\n",
298 |     "- False alarms are often a problem and so thresholds may be set arbitrarily to reduce alarms.\n",
299 |     "- As a result, alarm settings carry limited information."
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {
306 |     "collapsed": false
307 |    },
308 |    "outputs": [],
309 |    "source": [
310 |     "plt.figure(figsize=(14, 6))\n",
311 |     "\n",
312 |     "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n",
313 |     "         ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n",
314 |     "         'k+', markersize=10, linewidth=4)\n",
315 |     "\n",
316 |     "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - High'], \n",
317 |     "         ce.VALUENUM[ce.LABEL=='Resp Alarm - High'],\n",
318 |     "         'm--')\n",
319 |     "\n",
320 |     "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - Low'], \n",
321 |     "         ce.VALUENUM[ce.LABEL=='Resp Alarm - Low'],\n",
322 |     "         'm--')\n",
323 |     "\n",
324 |     "plt.xlabel('Time',fontsize=16)\n",
325 |     "plt.ylabel('Respiratory rate',fontsize=16)\n",
326 |     "plt.title('Respiratory rate over time from admission, with upper and lower alarm thresholds')\n",
327 |     "plt.ylim(0,55)\n"
328 |    ]
329 |   },
330 |   {
331 |    "cell_type": "markdown",
332 |    "metadata": {},
333 |    "source": [
334 |     "### Task 2\n",
335 |     "\n",
336 |     "- Based on the data, does it look like the alarms would have triggered for this patient?\n"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "markdown",
341 |    "metadata": {},
342 |    "source": [
343 |     "### Plot 3: What is patient's level of consciousness?"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {},
349 |    "source": [
350 |     "- Glasgow Coma Scale (GCS) is a measure of consciousness.\n",
351 |     "- It is commonly used for monitoring patients in the intensive care unit. \n",
352 |     "- It consists of three components: eye response; verbal response; motor response."
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": null,
358 |    "metadata": {
359 |     "collapsed": false
360 |    },
361 |    "outputs": [],
362 |    "source": [
363 |     "# Display the first few rows of the GCS eye response data\n",
364 |     "ce[ce.LABEL=='GCS - Eye Opening'].head()"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": null,
370 |    "metadata": {
371 |     "collapsed": false
372 |    },
373 |    "outputs": [],
374 |    "source": [
375 |     "# Prepare the size of the figure\n",
376 |     "plt.figure(figsize=(14, 10))\n",
377 |     "\n",
378 |     "# Set x equal to the times\n",
379 |     "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n",
380 |     "\n",
381 |     "# Set y equal to the heart rates\n",
382 |     "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n",
383 |     "\n",
384 |     "\n",
385 |     "plt.plot(x_hr,y_hr)\n",
386 |     "\n",
387 |     "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n",
388 |     "         ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n",
389 |     "         'k', markersize=6)\n",
390 |     "\n",
391 |     "# Add a text label to the y-axis\n",
392 |     "plt.text(-4,155,'GCS - Eye Opening',fontsize=14)\n",
393 |     "plt.text(-4,150,'GCS - Motor Response',fontsize=14)\n",
394 |     "plt.text(-4,145,'GCS - Verbal Response',fontsize=14)   \n",
395 |     "\n",
396 |     "# Iterate over list of GCS labels, plotting around 1 in 10 to avoid overlap\n",
397 |     "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Eye Opening'].values):\n",
398 |     "    if np.mod(i,6)==0 and i < 65:\n",
399 |     "        plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Eye Opening'].values[i],155),fontsize=14)\n",
400 |     "        \n",
401 |     "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Motor Response'].values):\n",
402 |     "    if np.mod(i,6)==0 and i < 65:\n",
403 |     "        plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Motor Response'].values[i],150),fontsize=14)\n",
404 |     "\n",
405 |     "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Verbal Response'].values):\n",
406 |     "    if np.mod(i,6)==0 and i < 65:\n",
407 |     "        plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Verbal Response'].values[i],145),fontsize=14)\n",
408 |     "\n",
409 |     "plt.title('Vital signs and Glasgow Coma Scale over time from admission',fontsize=16)\n",
410 |     "\n",
411 |     "plt.xlabel('Time (hours)',fontsize=16)\n",
412 |     "plt.ylabel('Heart rate or GCS',fontsize=16)\n",
413 |     "plt.ylim(10,165)\n"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "markdown",
418 |    "metadata": {},
419 |    "source": [
420 |     "### Task 3\n",
421 |     "\n",
422 |     "- How is the patient's consciousness changing over time?"
423 |    ]
424 |   },
425 |   {
426 |    "cell_type": "markdown",
427 |    "metadata": {},
428 |    "source": [
429 |     "# Stop here..."
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "markdown",
434 |    "metadata": {},
435 |    "source": [
436 |     "### Plot 2: What other data do we have on the patient?"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "metadata": {},
442 |    "source": [
443 |     "- Using Pandas 'read_csv function' again, we'll now load the patient outputs data (for example, urine output, drains, dialysis). This data is contained in the outputevents data table."
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": null,
449 |    "metadata": {
450 |     "collapsed": false
451 |    },
452 |    "outputs": [],
453 |    "source": [
454 |     "# OPTION 1: load outputs from the patient\n",
455 |     "query = \"\"\"\n",
456 |     "select de.icustay_id\n",
457 |     "  , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n",
458 |     "  , di.label\n",
459 |     "  , de.value\n",
460 |     "  , de.valueuom\n",
461 |     "from outputevents de \n",
462 |     "inner join icustays ie\n",
463 |     "  on de.icustay_id = ie.icustay_id\n",
464 |     "inner join d_items di\n",
465 |     "  on de.itemid = di.itemid\n",
466 |     "where de.subject_id = 40036\n",
467 |     "order by charttime;\n",
468 |     "\"\"\"\n",
469 |     "\n",
470 |     "oe = pd.read_sql_query(query,conn)"
471 |    ]
472 |   },
473 |   {
474 |    "cell_type": "code",
475 |    "execution_count": null,
476 |    "metadata": {
477 |     "collapsed": false
478 |    },
479 |    "outputs": [],
480 |    "source": [
481 |     "oe.head()"
482 |    ]
483 |   },
484 |   {
485 |    "cell_type": "code",
486 |    "execution_count": null,
487 |    "metadata": {
488 |     "collapsed": false
489 |    },
490 |    "outputs": [],
491 |    "source": [
492 |     "# Prepare the size of the figure\n",
493 |     "plt.figure(figsize=(14, 10))\n",
494 |     "\n",
495 |     "plt.title('Fluid output over time')\n",
496 |     "\n",
497 |     "plt.plot(oe.HOURS, \n",
498 |     "         oe.VALUE.cumsum()/1000, \n",
499 |     "         'ro', markersize=8, label='Output volume, L')\n",
500 |     "\n",
501 |     "plt.xlim(0,20)\n",
502 |     "plt.ylim(0,2)\n",
503 |     "plt.legend()"
504 |    ]
505 |   },
506 |   {
507 |    "cell_type": "markdown",
508 |    "metadata": {},
509 |    "source": [
510 |     "To provide context for this plot, it would help to include patient input data. This helps to determine the patient's fluid balance, a key indicator in patient health."
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": null,
516 |    "metadata": {
517 |     "collapsed": false
518 |    },
519 |    "outputs": [],
520 |    "source": [
521 |     "# Load inputs given to the patient (usually intravenously) using the database connection\n",
522 |     "query = \"\"\"\n",
523 |     "select de.icustay_id\n",
524 |     "  , (strftime('%s',de.starttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_START\n",
525 |     "  , (strftime('%s',de.endtime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_END\n",
526 |     "  , de.linkorderid\n",
527 |     "  , di.label\n",
528 |     "  , de.amount\n",
529 |     "  , de.amountuom\n",
530 |     "  , de.rate\n",
531 |     "  , de.rateuom\n",
532 |     "from inputevents_mv de \n",
533 |     "inner join icustays ie\n",
534 |     "  on de.icustay_id = ie.icustay_id\n",
535 |     "inner join d_items di\n",
536 |     "  on de.itemid = di.itemid\n",
537 |     "where de.subject_id = 40036\n",
538 |     "order by endtime;\n",
539 |     "\"\"\"\n",
540 |     "\n",
541 |     "ie = pd.read_sql_query(query,conn)\n"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "code",
546 |    "execution_count": null,
547 |    "metadata": {
548 |     "collapsed": false
549 |    },
550 |    "outputs": [],
551 |    "source": [
552 |     "ie.head()"
553 |    ]
554 |   },
555 |   {
556 |    "cell_type": "markdown",
557 |    "metadata": {},
558 |    "source": [
559 |     "Note that the column headers are different: we have \"HOURS_START\" and \"HOURS_END\". This is because inputs are administered over a fixed period of time."
560 |    ]
561 |   },
562 |   {
563 |    "cell_type": "code",
564 |    "execution_count": null,
565 |    "metadata": {
566 |     "collapsed": false
567 |    },
568 |    "outputs": [],
569 |    "source": [
570 |     "ie['LABEL'].unique()"
571 |    ]
572 |   },
573 |   {
574 |    "cell_type": "code",
575 |    "execution_count": null,
576 |    "metadata": {
577 |     "collapsed": false
578 |    },
579 |    "outputs": [],
580 |    "source": [
581 |     "# Prepare the size of the figure\n",
582 |     "plt.figure(figsize=(14, 10))\n",
583 |     "\n",
584 |     "# Plot the cumulative input against the cumulative output\n",
585 |     "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n",
586 |     "         ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n",
587 |     "         'go', markersize=8, label='Intake volume, L')\n",
588 |     "\n",
589 |     "plt.plot(oe.HOURS, \n",
590 |     "         oe.VALUE.cumsum()/1000, \n",
591 |     "         'ro', markersize=8, label='Output volume, L')\n",
592 |     "\n",
593 |     "plt.title('Fluid balance over time',fontsize=16)\n",
594 |     "plt.xlabel('Hours',fontsize=16)\n",
595 |     "plt.ylabel('Volume (litres)',fontsize=16)\n",
596 |     "# plt.ylim(0,38)\n",
597 |     "plt.legend()"
598 |    ]
599 |   },
600 |   {
601 |    "cell_type": "markdown",
602 |    "metadata": {
603 |     "collapsed": true
604 |    },
605 |    "source": [
606 |     "As the plot shows, the patient's intake tends to be above their output. There are however periods where input and output are almost one to one. One of the biggest challenges of working with ICU data is that context is everything, so let's look at a treatment (Furosemide/Lasix) which we know will affect this graph."
607 |    ]
608 |   },
609 |   {
610 |    "cell_type": "code",
611 |    "execution_count": null,
612 |    "metadata": {
613 |     "collapsed": false
614 |    },
615 |    "outputs": [],
616 |    "source": [
617 |     "plt.figure(figsize=(14, 10))\n",
618 |     "\n",
619 |     "# Plot the cumulative input against the cumulative output\n",
620 |     "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n",
621 |     "         ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n",
622 |     "         'go', markersize=8, label='Intake volume, L')\n",
623 |     "\n",
624 |     "plt.plot(oe.HOURS, \n",
625 |     "         oe.VALUE.cumsum()/1000, \n",
626 |     "         'ro', markersize=8, label='Output volume, L')\n",
627 |     "\n",
628 |     "# example on getting two columns from a dataframe: ie[['HOURS_START','HOURS_END']].head()\n",
629 |     "\n",
630 |     "for i, idx in enumerate(ie.index[ie.LABEL=='Furosemide (Lasix)']):\n",
631 |     "    plt.plot([ie.HOURS_START[ie.LABEL=='Furosemide (Lasix)'][idx],\n",
632 |     "             ie.HOURS_END[ie.LABEL=='Furosemide (Lasix)'][idx]],\n",
633 |     "            [ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx],\n",
634 |     "             ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx]],\n",
635 |     "            'b-',linewidth=4)\n",
636 |     "    \n",
637 |     "\n",
638 |     "plt.title('Fluid balance over time',fontsize=16)\n",
639 |     "plt.xlabel('Hours',fontsize=16)\n",
640 |     "plt.ylabel('Volume (litres)',fontsize=16)\n",
641 |     "# plt.ylim(0,38)\n",
642 |     "plt.legend()\n"
643 |    ]
644 |   },
645 |   {
646 |    "cell_type": "code",
647 |    "execution_count": null,
648 |    "metadata": {
649 |     "collapsed": false
650 |    },
651 |    "outputs": [],
652 |    "source": [
653 |     "ie['LABEL'].unique()"
654 |    ]
655 |   },
656 |   {
657 |    "cell_type": "markdown",
658 |    "metadata": {},
659 |    "source": [
660 |     "### Exercise 2\n",
661 |     "\n",
662 |     "* Plot the alarms for the mean arterial pressure ('```Arterial Blood Pressure mean```')\n",
663 |     "* HINT: you can use ```ce.LABEL.unique()``` to find a list of variable names\n",
664 |     "* Were the alarm thresholds breached?"
665 |    ]
666 |   },
667 |   {
668 |    "cell_type": "code",
669 |    "execution_count": null,
670 |    "metadata": {
671 |     "collapsed": false
672 |    },
673 |    "outputs": [],
674 |    "source": [
675 |     "# Exercise 2 here\n",
676 |     "\n",
677 |     "\n"
678 |    ]
679 |   },
680 |   {
681 |    "cell_type": "markdown",
682 |    "metadata": {},
683 |    "source": [
684 |     "### Plot 3: Were the patient's other vital signs stable?"
685 |    ]
686 |   },
687 |   {
688 |    "cell_type": "code",
689 |    "execution_count": null,
690 |    "metadata": {
691 |     "collapsed": false
692 |    },
693 |    "outputs": [],
694 |    "source": [
695 |     "plt.figure(figsize=(14, 10))\n",
696 |     "\n",
697 |     "plt.plot(ce.index[ce.LABEL=='Heart Rate'], \n",
698 |     "         ce.VALUENUM[ce.LABEL=='Heart Rate'],\n",
699 |     "         'rx', markersize=8, label='HR')\n",
700 |     "\n",
701 |     "plt.plot(ce.index[ce.LABEL=='O2 saturation pulseoxymetry'], \n",
702 |     "         ce.VALUENUM[ce.LABEL=='O2 saturation pulseoxymetry'], \n",
703 |     "         'g.', markersize=8, label='O2')\n",
704 |     "\n",
705 |     "plt.plot(ce.index[ce.LABEL=='Arterial Blood Pressure mean'], \n",
706 |     "         ce.VALUENUM[ce.LABEL=='Arterial Blood Pressure mean'], \n",
707 |     "         'bv', markersize=8, label='MAP')\n",
708 |     "\n",
709 |     "plt.plot(ce.index[ce.LABEL=='Respiratory Rate'], \n",
710 |     "         ce.VALUENUM[ce.LABEL=='Respiratory Rate'], \n",
711 |     "         'k+', markersize=8, label='RR')\n",
712 |     "\n",
713 |     "plt.title('Vital signs over time from admission')\n",
714 |     "plt.ylim(0,130)\n",
715 |     "plt.legend()"
716 |    ]
717 |   },
718 |   {
719 |    "cell_type": "markdown",
720 |    "metadata": {},
721 |    "source": [
722 |     "### Plot 5: Laboratory measurements"
723 |    ]
724 |   },
725 |   {
726 |    "cell_type": "markdown",
727 |    "metadata": {},
728 |    "source": [
729 |     "Using Pandas 'read_csv function' again, we'll now load the labevents data.\n",
730 |     "This data corresponds to measurements made in a laboratory - usually on a sample of patient blood. "
731 |    ]
732 |   },
733 |   {
734 |    "cell_type": "code",
735 |    "execution_count": null,
736 |    "metadata": {
737 |     "collapsed": false
738 |    },
739 |    "outputs": [],
740 |    "source": [
741 |     "# OPTION 1: load labevents data using the database connection\n",
742 |     "query = \"\"\"\n",
743 |     "SELECT de.subject_id\n",
744 |     "  , de.charttime\n",
745 |     "  , di.label, de.value, de.valuenum\n",
746 |     "  , de.uom\n",
747 |     "FROM labevents de\n",
748 |     "INNER JOIN d_labitems di\n",
749 |     "  ON de.itemid = di.itemid\n",
750 |     "where de.subject_id = 40036\n",
751 |     "\"\"\"\n",
752 |     "\n",
753 |     "le = pd.read_sql_query(query,conn)"
754 |    ]
755 |   },
756 |   {
757 |    "cell_type": "code",
758 |    "execution_count": null,
759 |    "metadata": {
760 |     "collapsed": false
761 |    },
762 |    "outputs": [],
763 |    "source": [
764 |     "# preview the labevents data\n",
765 |     "le.head()"
766 |    ]
767 |   },
768 |   {
769 |    "cell_type": "code",
770 |    "execution_count": null,
771 |    "metadata": {
772 |     "collapsed": false
773 |    },
774 |    "outputs": [],
775 |    "source": [
776 |     "# preview the ioevents data\n",
777 |     "le[le.LABEL=='HEMOGLOBIN']"
778 |    ]
779 |   },
780 |   {
781 |    "cell_type": "code",
782 |    "execution_count": null,
783 |    "metadata": {
784 |     "collapsed": false
785 |    },
786 |    "outputs": [],
787 |    "source": [
788 |     "plt.figure(figsize=(14, 10))\n",
789 |     "\n",
790 |     "plt.plot(le.index[le.LABEL=='HEMATOCRIT'], \n",
791 |     "         le.VALUENUM[le.LABEL=='HEMATOCRIT'], \n",
792 |     "         'go', markersize=6, label='Haematocrit')\n",
793 |     "\n",
794 |     "plt.plot(le.index[le.LABEL=='HEMOGLOBIN'], \n",
795 |     "         le.VALUENUM[le.LABEL=='HEMOGLOBIN'], \n",
796 |     "         'bv', markersize=8, label='Hemoglobin')\n",
797 |     "\n",
798 |     "plt.title('Laboratory measurements over time from admission')\n",
799 |     "plt.ylim(0,38)\n",
800 |     "plt.legend()"
801 |    ]
802 |   },
803 |   {
804 |    "cell_type": "code",
805 |    "execution_count": null,
806 |    "metadata": {
807 |     "collapsed": true
808 |    },
809 |    "outputs": [],
810 |    "source": []
811 |   },
812 |   {
813 |    "cell_type": "markdown",
814 |    "metadata": {},
815 |    "source": [
816 |     "## Plot 5: intravenous medications"
817 |    ]
818 |   },
819 |   {
820 |    "cell_type": "markdown",
821 |    "metadata": {},
822 |    "source": [
823 |     "- Using the Pandas 'read_csv function' again, we'll now load the the ioevents dataset"
824 |    ]
825 |   },
826 |   {
827 |    "cell_type": "code",
828 |    "execution_count": null,
829 |    "metadata": {
830 |     "collapsed": false
831 |    },
832 |    "outputs": [],
833 |    "source": [
834 |     "# load ioevents\n",
835 |     "ioe = pd.read_csv('data/example_ioevents.csv',index_col='HOURSSINCEADMISSION_START')"
836 |    ]
837 |   },
838 |   {
839 |    "cell_type": "code",
840 |    "execution_count": null,
841 |    "metadata": {
842 |     "collapsed": false
843 |    },
844 |    "outputs": [],
845 |    "source": [
846 |     "ioe.head()\n"
847 |    ]
848 |   },
849 |   {
850 |    "cell_type": "code",
851 |    "execution_count": null,
852 |    "metadata": {
853 |     "collapsed": false
854 |    },
855 |    "outputs": [],
856 |    "source": [
857 |     "plt.figure(figsize=(14, 10))\n",
858 |     "\n",
859 |     "plt.plot(ioe.index[ioe.LABEL=='Midazolam (Versed)'], \n",
860 |     "         ioe.RATE[ioe.LABEL=='Midazolam (Versed)'], \n",
861 |     "         'go', markersize=6, label='Midazolam (Versed)')\n",
862 |     "\n",
863 |     "plt.plot(ioe.index[ioe.LABEL=='Propofol'], \n",
864 |     "         ioe.RATE[ioe.LABEL=='Propofol'], \n",
865 |     "         'bv', markersize=8, label='Propofol')\n",
866 |     "\n",
867 |     "plt.plot(ioe.index[ioe.LABEL=='Fentanyl'], \n",
868 |     "         ioe.RATE[ioe.LABEL=='Fentanyl'], \n",
869 |     "         'k+', markersize=8, label='Fentanyl')\n",
870 |     "\n",
871 |     "plt.title('IOevents over time from admission')\n",
872 |     "plt.ylim(0,380)\n",
873 |     "plt.legend()"
874 |    ]
875 |   },
876 |   {
877 |    "cell_type": "code",
878 |    "execution_count": null,
879 |    "metadata": {
880 |     "collapsed": true
881 |    },
882 |    "outputs": [],
883 |    "source": []
884 |   },
885 |   {
886 |    "cell_type": "markdown",
887 |    "metadata": {},
888 |    "source": [
889 |     "## Plot 6: blood products"
890 |    ]
891 |   },
892 |   {
893 |    "cell_type": "markdown",
894 |    "metadata": {},
895 |    "source": [
896 |     "- Using Pandas 'read_csv function' again, we'll now load the blood products data"
897 |    ]
898 |   },
899 |   {
900 |    "cell_type": "code",
901 |    "execution_count": null,
902 |    "metadata": {
903 |     "collapsed": false
904 |    },
905 |    "outputs": [],
906 |    "source": [
907 |     "plt.figure(figsize=(14, 10))\n",
908 |     "\n",
909 |     "plt.plot(ioe.index[ioe.LABEL=='OR Cryoprecipitate Intake'], \n",
910 |     "         ioe.VALUENUM[ioe.LABEL=='OR Cryoprecipitate Intake'], \n",
911 |     "         'go', markersize=6, label='OR Cryoprecipitate Intake')\n",
912 |     "\n",
913 |     "plt.plot(ioe.index[ioe.LABEL=='OR Crystalloid Intake'], \n",
914 |     "         ioe.VALUENUM[ioe.LABEL=='OR Crystalloid Intake'], \n",
915 |     "         'bv', markersize=8, label='OR Crystalloid Intake')\n",
916 |     "\n",
917 |     "plt.plot(ioe.index[ioe.LABEL=='OR FFP Intake'], \n",
918 |     "         ioe.VALUENUM[ioe.LABEL=='OR FFP Intake'], \n",
919 |     "         'k+', markersize=8, label='OR FFP Intake')\n",
920 |     "\n",
921 |     "plt.plot(ioe.index[ioe.LABEL=='OR Packed RBC Intake'], \n",
922 |     "         ioe.VALUENUM[ioe.LABEL=='OR Packed RBC Intake'], \n",
923 |     "         'k+', markersize=8, label='OR Packed RBC Intake')\n",
924 |     "\n",
925 |     "plt.plot(ioe.index[ioe.LABEL=='OR Platelet Intake'], \n",
926 |     "         ioe.VALUENUM[ioe.LABEL=='OR Platelet Intake'], \n",
927 |     "         'k+', markersize=8, label='OR Platelet Intake')\n",
928 |     "\n",
929 |     "plt.title('Blood products administered over time from admission')\n",
930 |     "plt.legend()"
931 |    ]
932 |   },
933 |   {
934 |    "cell_type": "markdown",
935 |    "metadata": {},
936 |    "source": [
937 |     "### Discharge summary"
938 |    ]
939 |   },
940 |   {
941 |    "cell_type": "code",
942 |    "execution_count": null,
943 |    "metadata": {
944 |     "collapsed": false
945 |    },
946 |    "outputs": [],
947 |    "source": [
948 |     "# insert discharge summary here..."
949 |    ]
950 |   }
951 |  ],
952 |  "metadata": {
953 |   "kernelspec": {
954 |    "display_name": "Python 2",
955 |    "language": "python",
956 |    "name": "python2"
957 |   },
958 |   "language_info": {
959 |    "codemirror_mode": {
960 |     "name": "ipython",
961 |     "version": 2
962 |    },
963 |    "file_extension": ".py",
964 |    "mimetype": "text/x-python",
965 |    "name": "python",
966 |    "nbconvert_exporter": "python",
967 |    "pygments_lexer": "ipython2",
968 |    "version": "2.7.10"
969 |   }
970 |  },
971 |  "nbformat": 4,
972 |  "nbformat_minor": 0
973 | }
974 | 


--------------------------------------------------------------------------------
/temp/03-example-patient-ich.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exploring the trajectory of a single patient"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Import Python libraries"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "We first need to import some tools for working with data in Python. \n",
 22 |     "- NumPy is for working with numbers\n",
 23 |     "- Pandas is for analysing data\n",
 24 |     "- MatPlotLib is for making plots\n",
 25 |     "- Sqlite3 to connect to the database"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import numpy as np\n",
 37 |     "import pandas as pd\n",
 38 |     "import matplotlib.pyplot as plt\n",
 39 |     "import sqlite3\n",
 40 |     "%matplotlib inline"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### Connect to the database"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "- We can use the sqlite3 library to connect to the MIMIC database\n",
 55 |     "- Once the connection is established, we'll run a simple SQL query."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Connect to the MIMIC database\n",
 67 |     "conn = sqlite3.connect('data/mimicdata.sqlite')"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "collapsed": true
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "# Create our test query\n",
 79 |     "test_query = \"\"\"\n",
 80 |     "SELECT subject_id, hadm_id, admittime, dischtime, admission_type, diagnosis\n",
 81 |     "FROM admissions\n",
 82 |     "LIMIT 10;\n",
 83 |     "\"\"\""
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {
 90 |     "collapsed": true
 91 |    },
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "# Run the query and assign the results to a variable\n",
 95 |     "test = pd.read_sql_query(test_query,conn)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "# Display the first few rows\n",
107 |     "test.head()"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "### Load the chartevents data"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "- The chartevents table contains data charted at the patient bedside. It includes variables such as heart rate, respiratory rate, temperature, and so on.\n",
122 |     "- We'll begin by loading the chartevents data for a single patient."
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {
129 |     "collapsed": false
130 |    },
131 |    "outputs": [],
132 |    "source": [
133 |     "query = \"\"\"\n",
134 |     "SELECT de.icustay_id\n",
135 |     "  , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n",
136 |     "  , di.label\n",
137 |     "  , de.value\n",
138 |     "  , de.valuenum\n",
139 |     "  , de.uom\n",
140 |     "FROM chartevents de\n",
141 |     "INNER join d_items di\n",
142 |     "ON de.itemid = di.itemid\n",
143 |     "INNER join icustays ie\n",
144 |     "ON de.icustay_id = ie.icustay_id\n",
145 |     "WHERE de.subject_id = 40084\n",
146 |     "ORDER BY charttime;\n",
147 |     "\"\"\"\n",
148 |     "\n",
149 |     "ce = pd.read_sql_query(query,conn)\n",
150 |     "\n",
151 |     "\n",
152 |     "# OPTION 2: load chartevents from a CSV file\n",
153 |     "# ce = pd.read_csv('data/example_chartevents.csv', index_col='HOURSSINCEADMISSION')"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {
160 |     "collapsed": false
161 |    },
162 |    "outputs": [],
163 |    "source": [
164 |     "# Preview the data\n",
165 |     "# Use 'head' to limit the number of rows returned\n",
166 |     "ce.head()"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "### Review the patient's heart rate"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "- We can select individual columns using the column name. \n",
181 |     "- For example, if we want to select just the label column, we write **```ce.LABEL```** or alternatively **```ce['LABEL']```**"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {
188 |     "collapsed": false
189 |    },
190 |    "outputs": [],
191 |    "source": [
192 |     "# Select a single column\n",
193 |     "ce['LABEL'].head()"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "- In a similar way, we can select rows from data using indexes. \n",
201 |     "- For example, to select rows where the label is equal to 'Heart Rate', we would create an index using **```[ce.LABEL=='Heart Rate']```** "
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": false
209 |    },
210 |    "outputs": [],
211 |    "source": [
212 |     "# Select just the heart rate rows using an index\n",
213 |     "ce[ce.LABEL=='Heart Rate'].head()"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "### Plot 1: How did the patients heart rate change over time?"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "markdown",
225 |    "metadata": {},
226 |    "source": [
227 |     "- Using the methods described above to select our data of interest, we can create our x and y axis values to create a time series plot of heart rate."
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {
234 |     "collapsed": false
235 |    },
236 |    "outputs": [],
237 |    "source": [
238 |     "# Which time stamps have a corresponding heart rate measurement?\n",
239 |     "print ce.index[ce.LABEL=='Heart Rate']"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "code",
244 |    "execution_count": null,
245 |    "metadata": {
246 |     "collapsed": false,
247 |     "scrolled": true
248 |    },
249 |    "outputs": [],
250 |    "source": [
251 |     "# Set x equal to the times\n",
252 |     "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n",
253 |     "\n",
254 |     "# Set y equal to the heart rates\n",
255 |     "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n",
256 |     "\n",
257 |     "# Plot time against heart rate\n",
258 |     "plt.figure(figsize=(14, 6))\n",
259 |     "plt.plot(x_hr,y_hr)\n",
260 |     "\n",
261 |     "\n",
262 |     "plt.xlabel('Time',fontsize=16)\n",
263 |     "plt.ylabel('Heart rate',fontsize=16)\n",
264 |     "plt.title('Heart rate over time from admission to the intensive care unit')"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": null,
270 |    "metadata": {
271 |     "collapsed": false
272 |    },
273 |    "outputs": [],
274 |    "source": [
275 |     "ce['LABEL'].unique()"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "markdown",
280 |    "metadata": {},
281 |    "source": [
282 |     "### Task 1\n",
283 |     "\n",
284 |     "* What is happening to this patient's heart rate?\n",
285 |     "* Plot respiratory rate over time for the patient.\n",
286 |     "* Is there anything unusual about the patient's respiratory rate?\n"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": null,
292 |    "metadata": {
293 |     "collapsed": true
294 |    },
295 |    "outputs": [],
296 |    "source": [
297 |     "# Exercise 1 here\n",
298 |     "\n"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "markdown",
303 |    "metadata": {},
304 |    "source": [
305 |     "# What is happening to this patient's heart rate?"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": null,
311 |    "metadata": {
312 |     "collapsed": false
313 |    },
314 |    "outputs": [],
315 |    "source": [
316 |     "# Set x equal to the times\n",
317 |     "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n",
318 |     "\n",
319 |     "# Set y equal to the heart rates\n",
320 |     "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n",
321 |     "\n",
322 |     "# Plot time against heart rate\n",
323 |     "plt.figure(figsize=(14, 6))\n",
324 |     "plt.plot(x_hr,y_hr)\n",
325 |     "\n",
326 |     "# Get some information regarding arctic sun\n",
327 |     "plt.plot(ce.HOURS[ce.LABEL=='Arctic Sun/Alsius Set Temp'], \n",
328 |     "         ce.VALUENUM[ce.LABEL=='Arctic Sun/Alsius Set Temp'],\n",
329 |     "         'k+--',markersize=8)\n",
330 |     "plt.plot(ce.HOURS[ce.LABEL=='Arctic Sun Water Temp'], \n",
331 |     "         ce.VALUENUM[ce.LABEL=='Arctic Sun Water Temp'],\n",
332 |     "         'r+--',markersize=8)\n",
333 |     "plt.plot(ce.HOURS[ce.LABEL=='Arctic Sun/Alsius Temp #1 C'], \n",
334 |     "         ce.VALUENUM[ce.LABEL=='Arctic Sun/Alsius Temp #1 C'],\n",
335 |     "         'b+--',markersize=8)\n",
336 |     "plt.plot(ce.HOURS[ce.LABEL=='Arctic Sun/Alsius Temp #2 C'], \n",
337 |     "         ce.VALUENUM[ce.LABEL=='Arctic Sun/Alsius Temp #2 C'],\n",
338 |     "         'g+--',markersize=8)\n",
339 |     "\n",
340 |     "plt.xlabel('Time',fontsize=16)\n",
341 |     "plt.ylabel('Heart rate',fontsize=16)\n",
342 |     "\n",
343 |     "plt.xlabel('Time (hours)',fontsize=16)\n",
344 |     "plt.ylabel('Heart rate / temperature',fontsize=16)\n",
345 |     "plt.title('Heart rate over time')\n",
346 |     "plt.ylim(0,80)\n",
347 |     "plt.xlim(0,48)\n",
348 |     "plt.legend()"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "markdown",
353 |    "metadata": {},
354 |    "source": [
355 |     "### Plot 2: Did the patient's vital signs breach any alarm thresholds?"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "markdown",
360 |    "metadata": {},
361 |    "source": [
362 |     "- Alarm systems in the intensive care unit are commonly based on high and low thresholds defined by the carer.\n",
363 |     "- False alarms are often a problem and so thresholds may be set arbitrarily to reduce alarms.\n",
364 |     "- As a result, alarm settings carry limited information."
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "code",
369 |    "execution_count": null,
370 |    "metadata": {
371 |     "collapsed": false
372 |    },
373 |    "outputs": [],
374 |    "source": [
375 |     "plt.figure(figsize=(14, 6))\n",
376 |     "\n",
377 |     "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n",
378 |     "         ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n",
379 |     "         'k+', markersize=10, linewidth=4)\n",
380 |     "\n",
381 |     "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - High'], \n",
382 |     "         ce.VALUENUM[ce.LABEL=='Resp Alarm - High'],\n",
383 |     "         'm--')\n",
384 |     "\n",
385 |     "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - Low'], \n",
386 |     "         ce.VALUENUM[ce.LABEL=='Resp Alarm - Low'],\n",
387 |     "         'm--')\n",
388 |     "\n",
389 |     "plt.xlabel('Time',fontsize=16)\n",
390 |     "plt.ylabel('Respiratory rate',fontsize=16)\n",
391 |     "plt.title('Respiratory rate over time from admission, with upper and lower alarm thresholds')\n",
392 |     "plt.ylim(0,55)\n"
393 |    ]
394 |   },
395 |   {
396 |    "cell_type": "markdown",
397 |    "metadata": {},
398 |    "source": [
399 |     "### Task 2\n",
400 |     "\n",
401 |     "- Based on the data, does it look like the alarms would have triggered for this patient?\n"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "markdown",
406 |    "metadata": {},
407 |    "source": [
408 |     "### Plot 3: What is patient's level of consciousness?"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "markdown",
413 |    "metadata": {},
414 |    "source": [
415 |     "- Glasgow Coma Scale (GCS) is a measure of consciousness.\n",
416 |     "- It is commonly used for monitoring patients in the intensive care unit. \n",
417 |     "- It consists of three components: eye response; verbal response; motor response."
418 |    ]
419 |   },
420 |   {
421 |    "cell_type": "code",
422 |    "execution_count": null,
423 |    "metadata": {
424 |     "collapsed": false
425 |    },
426 |    "outputs": [],
427 |    "source": [
428 |     "# Display the first few rows of the GCS eye response data\n",
429 |     "ce[ce.LABEL=='GCS - Eye Opening'].head()"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "code",
434 |    "execution_count": null,
435 |    "metadata": {
436 |     "collapsed": false
437 |    },
438 |    "outputs": [],
439 |    "source": [
440 |     "# Prepare the size of the figure\n",
441 |     "plt.figure(figsize=(18, 10))\n",
442 |     "\n",
443 |     "# Set x equal to the times\n",
444 |     "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n",
445 |     "\n",
446 |     "# Set y equal to the heart rates\n",
447 |     "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n",
448 |     "\n",
449 |     "\n",
450 |     "plt.plot(x_hr,y_hr)\n",
451 |     "\n",
452 |     "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n",
453 |     "         ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n",
454 |     "         'k', markersize=6)\n",
455 |     "\n",
456 |     "# Add a text label to the y-axis\n",
457 |     "plt.text(-5,155,'GCS - Eye Opening',fontsize=14)\n",
458 |     "plt.text(-5,150,'GCS - Motor Response',fontsize=14)\n",
459 |     "plt.text(-5,145,'GCS - Verbal Response',fontsize=14)   \n",
460 |     "\n",
461 |     "# Iterate over list of GCS labels, plotting around 1 in 10 to avoid overlap\n",
462 |     "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Eye Opening'].values):\n",
463 |     "    if np.mod(i,6)==0 and i < 65:\n",
464 |     "        plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Eye Opening'].values[i],155),fontsize=14)\n",
465 |     "        \n",
466 |     "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Motor Response'].values):\n",
467 |     "    if np.mod(i,6)==0 and i < 65:\n",
468 |     "        plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Motor Response'].values[i],150),fontsize=14)\n",
469 |     "\n",
470 |     "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Verbal Response'].values):\n",
471 |     "    if np.mod(i,6)==0 and i < 65:\n",
472 |     "        plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Verbal Response'].values[i],145),fontsize=14)\n",
473 |     "\n",
474 |     "plt.title('Vital signs and Glasgow Coma Scale over time from admission',fontsize=16)\n",
475 |     "\n",
476 |     "plt.xlabel('Time (hours)',fontsize=16)\n",
477 |     "plt.ylabel('Heart rate or GCS',fontsize=16)\n",
478 |     "plt.ylim(10,165)\n"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "markdown",
483 |    "metadata": {},
484 |    "source": [
485 |     "### Task 3\n",
486 |     "\n",
487 |     "- How is the patient's consciousness changing over time?"
488 |    ]
489 |   },
490 |   {
491 |    "cell_type": "markdown",
492 |    "metadata": {},
493 |    "source": [
494 |     "# Stop here..."
495 |    ]
496 |   },
497 |   {
498 |    "cell_type": "markdown",
499 |    "metadata": {},
500 |    "source": [
501 |     "### Plot 2: What other data do we have on the patient?"
502 |    ]
503 |   },
504 |   {
505 |    "cell_type": "markdown",
506 |    "metadata": {},
507 |    "source": [
508 |     "- Using Pandas 'read_csv function' again, we'll now load the outputevents data - this table contains all information about patient outputs (urine output, drains, dialysis)."
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "code",
513 |    "execution_count": null,
514 |    "metadata": {
515 |     "collapsed": false
516 |    },
517 |    "outputs": [],
518 |    "source": [
519 |     "# OPTION 1: load outputs from the patient\n",
520 |     "query = \"\"\"\n",
521 |     "select de.icustay_id\n",
522 |     "  , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n",
523 |     "  , di.label\n",
524 |     "  , de.value\n",
525 |     "  , de.valueuom\n",
526 |     "from outputevents de \n",
527 |     "inner join icustays ie\n",
528 |     "  on de.icustay_id = ie.icustay_id\n",
529 |     "inner join d_items di\n",
530 |     "  on de.itemid = di.itemid\n",
531 |     "where de.subject_id = 40084\n",
532 |     "order by charttime;\n",
533 |     "\"\"\"\n",
534 |     "\n",
535 |     "oe = pd.read_sql_query(query,conn)"
536 |    ]
537 |   },
538 |   {
539 |    "cell_type": "code",
540 |    "execution_count": null,
541 |    "metadata": {
542 |     "collapsed": false
543 |    },
544 |    "outputs": [],
545 |    "source": [
546 |     "oe.head()"
547 |    ]
548 |   },
549 |   {
550 |    "cell_type": "code",
551 |    "execution_count": null,
552 |    "metadata": {
553 |     "collapsed": false
554 |    },
555 |    "outputs": [],
556 |    "source": [
557 |     "plt.figure(figsize=(14, 10))\n",
558 |     "\n",
559 |     "plt.figure(figsize=(14, 6))\n",
560 |     "plt.title('Fluid output over time')\n",
561 |     "\n",
562 |     "plt.plot(oe.HOURS, \n",
563 |     "         oe.VALUE.cumsum()/1000, \n",
564 |     "         'ro', markersize=8, label='Output volume, L')\n",
565 |     "\n",
566 |     "plt.xlim(0,72)\n",
567 |     "plt.ylim(0,10)\n",
568 |     "plt.legend()"
569 |    ]
570 |   },
571 |   {
572 |    "cell_type": "markdown",
573 |    "metadata": {},
574 |    "source": [
575 |     "To provide necessary context to this plot, it would help to include patient input data. This provides the necessary context to determine a patient's fluid balance - a key indicator in patient health."
576 |    ]
577 |   },
578 |   {
579 |    "cell_type": "code",
580 |    "execution_count": null,
581 |    "metadata": {
582 |     "collapsed": false
583 |    },
584 |    "outputs": [],
585 |    "source": [
586 |     "# OPTION 1: load inputs given to the patient (usually intravenously) using the database connection\n",
587 |     "query = \"\"\"\n",
588 |     "select de.icustay_id\n",
589 |     "  , (strftime('%s',de.starttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_START\n",
590 |     "  , (strftime('%s',de.endtime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_END\n",
591 |     "  , de.linkorderid\n",
592 |     "  , di.label\n",
593 |     "  , de.amount\n",
594 |     "  , de.amountuom\n",
595 |     "  , de.rate\n",
596 |     "  , de.rateuom\n",
597 |     "from inputevents_mv de \n",
598 |     "inner join icustays ie\n",
599 |     "  on de.icustay_id = ie.icustay_id\n",
600 |     "inner join d_items di\n",
601 |     "  on de.itemid = di.itemid\n",
602 |     "where de.subject_id = 40084\n",
603 |     "order by endtime;\n",
604 |     "\"\"\"\n",
605 |     "\n",
606 |     "ie = pd.read_sql_query(query,conn)\n",
607 |     "\n",
608 |     "# # OPTION 2: load ioevents using the CSV file with endtime as the index\n",
609 |     "# ioe = pd.read_csv('inputevents.csv'\n",
610 |     "#                   ,header=None\n",
611 |     "#                   ,names=['subject_id','itemid','label','starttime','endtime','amount','amountuom','rate','rateuom']\n",
612 |     "#                   ,parse_dates=True)"
613 |    ]
614 |   },
615 |   {
616 |    "cell_type": "code",
617 |    "execution_count": null,
618 |    "metadata": {
619 |     "collapsed": false
620 |    },
621 |    "outputs": [],
622 |    "source": [
623 |     "ie.head()"
624 |    ]
625 |   },
626 |   {
627 |    "cell_type": "markdown",
628 |    "metadata": {},
629 |    "source": [
630 |     "Note that the column headers are different: we have \"HOURS_START\" and \"HOURS_END\". This is because inputs are administered over a fixed period of time."
631 |    ]
632 |   },
633 |   {
634 |    "cell_type": "code",
635 |    "execution_count": null,
636 |    "metadata": {
637 |     "collapsed": false
638 |    },
639 |    "outputs": [],
640 |    "source": [
641 |     "ie['LABEL'].unique()"
642 |    ]
643 |   },
644 |   {
645 |    "cell_type": "code",
646 |    "execution_count": null,
647 |    "metadata": {
648 |     "collapsed": false
649 |    },
650 |    "outputs": [],
651 |    "source": [
652 |     "plt.figure(figsize=(14, 10))\n",
653 |     "\n",
654 |     "# Plot the cumulative input against the cumulative output\n",
655 |     "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n",
656 |     "         ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n",
657 |     "         'go', markersize=8, label='Intake volume, L')\n",
658 |     "\n",
659 |     "plt.plot(oe.HOURS, \n",
660 |     "         oe.VALUE.cumsum()/1000, \n",
661 |     "         'ro', markersize=8, label='Output volume, L')\n",
662 |     "\n",
663 |     "plt.title('Fluid balance over time',fontsize=16)\n",
664 |     "plt.xlabel('Hours',fontsize=16)\n",
665 |     "plt.ylabel('Volume (litres)',fontsize=16)\n",
666 |     "# plt.ylim(0,38)\n",
667 |     "plt.legend()"
668 |    ]
669 |   },
670 |   {
671 |    "cell_type": "markdown",
672 |    "metadata": {
673 |     "collapsed": true
674 |    },
675 |    "source": [
676 |     "As the plot shows, the patient's intake tends to be above their output (as one would expect!) - but there are periods where they are almost one to one. One of the biggest challenges of working with ICU data is that context is everything - let's look at a treatment (lasix) that we know will affect this graph."
677 |    ]
678 |   },
679 |   {
680 |    "cell_type": "code",
681 |    "execution_count": null,
682 |    "metadata": {
683 |     "collapsed": false
684 |    },
685 |    "outputs": [],
686 |    "source": [
687 |     "plt.figure(figsize=(14, 10))\n",
688 |     "\n",
689 |     "# Plot the cumulative input against the cumulative output\n",
690 |     "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n",
691 |     "         ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n",
692 |     "         'go', markersize=8, label='Intake volume, L')\n",
693 |     "\n",
694 |     "plt.plot(oe.HOURS, \n",
695 |     "         oe.VALUE.cumsum()/1000, \n",
696 |     "         'ro', markersize=8, label='Output volume, L')\n",
697 |     "\n",
698 |     "# example on getting two columns from a dataframe: ie[['HOURS_START','HOURS_END']].head()\n",
699 |     "\n",
700 |     "for i, idx in enumerate(ie.index[ie.LABEL=='Furosemide (Lasix)']):\n",
701 |     "    plt.plot([ie.HOURS_START[ie.LABEL=='Furosemide (Lasix)'][idx],\n",
702 |     "             ie.HOURS_END[ie.LABEL=='Furosemide (Lasix)'][idx]],\n",
703 |     "            [ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx],\n",
704 |     "             ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx]],\n",
705 |     "            'b-',linewidth=4)\n",
706 |     "    \n",
707 |     "\n",
708 |     "plt.title('Fluid balance over time',fontsize=16)\n",
709 |     "plt.xlabel('Hours',fontsize=16)\n",
710 |     "plt.ylabel('Volume (litres)',fontsize=16)\n",
711 |     "# plt.ylim(0,38)\n",
712 |     "plt.legend()\n"
713 |    ]
714 |   },
715 |   {
716 |    "cell_type": "code",
717 |    "execution_count": null,
718 |    "metadata": {
719 |     "collapsed": false
720 |    },
721 |    "outputs": [],
722 |    "source": [
723 |     "ie['LABEL'].unique()"
724 |    ]
725 |   },
726 |   {
727 |    "cell_type": "markdown",
728 |    "metadata": {},
729 |    "source": [
730 |     "### Exercise 2\n",
731 |     "\n",
732 |     "* Plot the alarms for the mean arterial pressure ('```Arterial Blood Pressure mean```')\n",
733 |     "* HINT: you can use ```ce.LABEL.unique()``` to find a list of variable names\n",
734 |     "* Were the alarm thresholds breached?"
735 |    ]
736 |   },
737 |   {
738 |    "cell_type": "code",
739 |    "execution_count": null,
740 |    "metadata": {
741 |     "collapsed": false
742 |    },
743 |    "outputs": [],
744 |    "source": [
745 |     "# Exercise 2 here\n",
746 |     "\n",
747 |     "\n"
748 |    ]
749 |   },
750 |   {
751 |    "cell_type": "markdown",
752 |    "metadata": {},
753 |    "source": [
754 |     "### Plot 3: Were the patient's other vital signs stable?"
755 |    ]
756 |   },
757 |   {
758 |    "cell_type": "code",
759 |    "execution_count": null,
760 |    "metadata": {
761 |     "collapsed": false
762 |    },
763 |    "outputs": [],
764 |    "source": [
765 |     "plt.figure(figsize=(14, 10))\n",
766 |     "\n",
767 |     "plt.plot(ce.index[ce.LABEL=='Heart Rate'], \n",
768 |     "         ce.VALUENUM[ce.LABEL=='Heart Rate'],\n",
769 |     "         'rx', markersize=8, label='HR')\n",
770 |     "\n",
771 |     "plt.plot(ce.index[ce.LABEL=='O2 saturation pulseoxymetry'], \n",
772 |     "         ce.VALUENUM[ce.LABEL=='O2 saturation pulseoxymetry'], \n",
773 |     "         'g.', markersize=8, label='O2')\n",
774 |     "\n",
775 |     "plt.plot(ce.index[ce.LABEL=='Arterial Blood Pressure mean'], \n",
776 |     "         ce.VALUENUM[ce.LABEL=='Arterial Blood Pressure mean'], \n",
777 |     "         'bv', markersize=8, label='MAP')\n",
778 |     "\n",
779 |     "plt.plot(ce.index[ce.LABEL=='Respiratory Rate'], \n",
780 |     "         ce.VALUENUM[ce.LABEL=='Respiratory Rate'], \n",
781 |     "         'k+', markersize=8, label='RR')\n",
782 |     "\n",
783 |     "plt.title('Vital signs over time from admission')\n",
784 |     "plt.ylim(0,130)\n",
785 |     "plt.legend()"
786 |    ]
787 |   },
788 |   {
789 |    "cell_type": "markdown",
790 |    "metadata": {},
791 |    "source": [
792 |     "### Plot 5: Laboratory measurements"
793 |    ]
794 |   },
795 |   {
796 |    "cell_type": "markdown",
797 |    "metadata": {},
798 |    "source": [
799 |     "Using Pandas 'read_csv function' again, we'll now load the labevents data.\n",
800 |     "This data corresponds to measurements made in a laboratory - usually on a sample of patient blood. "
801 |    ]
802 |   },
803 |   {
804 |    "cell_type": "code",
805 |    "execution_count": null,
806 |    "metadata": {
807 |     "collapsed": false
808 |    },
809 |    "outputs": [],
810 |    "source": [
811 |     "# OPTION 1: load labevents data using the database connection\n",
812 |     "query = \"\"\"\n",
813 |     "SELECT de.subject_id\n",
814 |     "  , de.charttime\n",
815 |     "  , di.label, de.value, de.valuenum\n",
816 |     "  , de.uom\n",
817 |     "FROM labevents de\n",
818 |     "INNER JOIN d_labitems di\n",
819 |     "  ON de.itemid = di.itemid\n",
820 |     "where de.subject_id = 40084\n",
821 |     "\"\"\"\n",
822 |     "\n",
823 |     "le = pd.read_sql_query(query,conn)\n",
824 |     "\n",
825 |     "# OPTION 2: load labevents from the CSV file\n",
826 |     "# le = pd.read_csv('data/example_labevents.csv', index_col='HOURSSINCEADMISSION')"
827 |    ]
828 |   },
829 |   {
830 |    "cell_type": "code",
831 |    "execution_count": null,
832 |    "metadata": {
833 |     "collapsed": false
834 |    },
835 |    "outputs": [],
836 |    "source": [
837 |     "# preview the labevents data\n",
838 |     "le.head()"
839 |    ]
840 |   },
841 |   {
842 |    "cell_type": "code",
843 |    "execution_count": null,
844 |    "metadata": {
845 |     "collapsed": false
846 |    },
847 |    "outputs": [],
848 |    "source": [
849 |     "# preview the ioevents data\n",
850 |     "le[le.LABEL=='HEMOGLOBIN']"
851 |    ]
852 |   },
853 |   {
854 |    "cell_type": "code",
855 |    "execution_count": null,
856 |    "metadata": {
857 |     "collapsed": false
858 |    },
859 |    "outputs": [],
860 |    "source": [
861 |     "plt.figure(figsize=(14, 10))\n",
862 |     "\n",
863 |     "plt.plot(le.index[le.LABEL=='HEMATOCRIT'], \n",
864 |     "         le.VALUENUM[le.LABEL=='HEMATOCRIT'], \n",
865 |     "         'go', markersize=6, label='Haematocrit')\n",
866 |     "\n",
867 |     "plt.plot(le.index[le.LABEL=='HEMOGLOBIN'], \n",
868 |     "         le.VALUENUM[le.LABEL=='HEMOGLOBIN'], \n",
869 |     "         'bv', markersize=8, label='Hemoglobin')\n",
870 |     "\n",
871 |     "plt.title('Laboratory measurements over time from admission')\n",
872 |     "plt.ylim(0,38)\n",
873 |     "plt.legend()"
874 |    ]
875 |   },
876 |   {
877 |    "cell_type": "markdown",
878 |    "metadata": {},
879 |    "source": [
880 |     "## Plot 5: intravenous medications"
881 |    ]
882 |   },
883 |   {
884 |    "cell_type": "markdown",
885 |    "metadata": {},
886 |    "source": [
887 |     "- Using the Pandas 'read_csv function' again, we'll now load the the ioevents dataset"
888 |    ]
889 |   },
890 |   {
891 |    "cell_type": "code",
892 |    "execution_count": null,
893 |    "metadata": {
894 |     "collapsed": false
895 |    },
896 |    "outputs": [],
897 |    "source": [
898 |     "# load ioevents\n",
899 |     "ioe = pd.read_csv('data/example_ioevents.csv',index_col='HOURSSINCEADMISSION_START')"
900 |    ]
901 |   },
902 |   {
903 |    "cell_type": "code",
904 |    "execution_count": null,
905 |    "metadata": {
906 |     "collapsed": false
907 |    },
908 |    "outputs": [],
909 |    "source": [
910 |     "ioe.head()\n"
911 |    ]
912 |   },
913 |   {
914 |    "cell_type": "code",
915 |    "execution_count": null,
916 |    "metadata": {
917 |     "collapsed": false
918 |    },
919 |    "outputs": [],
920 |    "source": [
921 |     "plt.figure(figsize=(14, 10))\n",
922 |     "\n",
923 |     "plt.plot(ie.CHARTTIME[ie.LABEL=='Midazolam (Versed)'], \n",
924 |     "         ie.RATE[ie.LABEL=='Midazolam (Versed)'], \n",
925 |     "         'go', markersize=6, label='Midazolam (Versed)')\n",
926 |     "\n",
927 |     "plt.plot(ie.CHARTTIME[ie.LABEL=='Propofol'], \n",
928 |     "         ie.RATE[ie.LABEL=='Propofol'], \n",
929 |     "         'bv', markersize=8, label='Propofol')\n",
930 |     "\n",
931 |     "plt.plot(ie.CHARTTIME[ie.LABEL=='Fentanyl'], \n",
932 |     "         ie.RATE[ie.LABEL=='Fentanyl'], \n",
933 |     "         'k+', markersize=8, label='Fentanyl')\n",
934 |     "\n",
935 |     "plt.title('Inputs over time from admission')\n",
936 |     "plt.ylim(0,380)\n",
937 |     "plt.legend()"
938 |    ]
939 |   }
940 |  ],
941 |  "metadata": {
942 |   "kernelspec": {
943 |    "display_name": "Python 2",
944 |    "language": "python",
945 |    "name": "python2"
946 |   },
947 |   "language_info": {
948 |    "codemirror_mode": {
949 |     "name": "ipython",
950 |     "version": 2
951 |    },
952 |    "file_extension": ".py",
953 |    "mimetype": "text/x-python",
954 |    "name": "python",
955 |    "nbconvert_exporter": "python",
956 |    "pygments_lexer": "ipython2",
957 |    "version": "2.7.10"
958 |   }
959 |  },
960 |  "nbformat": 4,
961 |  "nbformat_minor": 0
962 | }
963 | 


--------------------------------------------------------------------------------
/temp/04-example-multiplepatients.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd\n",
 13 |     "import matplotlib.pyplot as plt\n",
 14 |     "import sqlite3\n",
 15 |     "%matplotlib inline\n",
 16 |     "\n",
 17 |     "conn = sqlite3.connect('data/mimicdata.sqlite')\n"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {
 24 |     "collapsed": false,
 25 |     "scrolled": true
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "\n",
 30 |     "\n",
 31 |     "data = []\n",
 32 |     "for subject_id in [40084, 40080, 40004]:\n",
 33 |     "    \n",
 34 |     "    query = \"\"\"\n",
 35 |     "    SELECT de.icustay_id\n",
 36 |     "      , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n",
 37 |     "      , di.label\n",
 38 |     "      , de.value\n",
 39 |     "      , de.valuenum\n",
 40 |     "      , de.uom\n",
 41 |     "    FROM chartevents de\n",
 42 |     "    INNER join d_items di\n",
 43 |     "    ON de.itemid = di.itemid\n",
 44 |     "    INNER join icustays ie\n",
 45 |     "    ON de.icustay_id = ie.icustay_id\n",
 46 |     "    WHERE de.subject_id = \"\"\" + str(subject_id) + \"\"\"\n",
 47 |     "    ORDER BY charttime;\n",
 48 |     "    \"\"\"\n",
 49 |     "\n",
 50 |     "    ce = pd.read_sql_query(query,conn)\n",
 51 |     "\n",
 52 |     "    valueName = \"Respiratory Rate\";\n",
 53 |     "    \n",
 54 |     "    # Set x equal to the times\n",
 55 |     "    x_hr = ce.HOURS[ce.LABEL==valueName]\n",
 56 |     "\n",
 57 |     "    # Set y equal to the heart rates\n",
 58 |     "    y_hr = ce.VALUENUM[ce.LABEL==valueName]\n",
 59 |     "\n",
 60 |     "    # Plot time against heart rate\n",
 61 |     "    plt.figure(figsize=(14, 6))\n",
 62 |     "    data.append([x_hr,y_hr]);\n",
 63 |     "    \n",
 64 |     "for patient in data:\n",
 65 |     "    plt.plot(patient[0], patient[1]);\n",
 66 |     "\n",
 67 |     "plt.xlabel('Time',fontsize=16)\n",
 68 |     "plt.ylabel(valueName,fontsize=16)\n",
 69 |     "plt.title(valueName + ' over time from admission to the intensive care unit')\n",
 70 |     "\n"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": []
 81 |   }
 82 |  ],
 83 |  "metadata": {
 84 |   "kernelspec": {
 85 |    "display_name": "Python 2",
 86 |    "language": "python",
 87 |    "name": "python2"
 88 |   },
 89 |   "language_info": {
 90 |    "codemirror_mode": {
 91 |     "name": "ipython",
 92 |     "version": 2
 93 |    },
 94 |    "file_extension": ".py",
 95 |    "mimetype": "text/x-python",
 96 |    "name": "python",
 97 |    "nbconvert_exporter": "python",
 98 |    "pygments_lexer": "ipython2",
 99 |    "version": "2.7.10"
100 |   }
101 |  },
102 |  "nbformat": 4,
103 |  "nbformat_minor": 0
104 | }
105 | 


--------------------------------------------------------------------------------
/temp/06-example-patient-psql.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Exploring the trajectory of a single patient"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "### Import Python libraries"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "We first need to import some tools for working with data in Python. \n",
 22 |     "- NumPy is for working with numbers\n",
 23 |     "- Pandas is for analysing data\n",
 24 |     "- MatPlotLib is for making plots\n",
 25 |     "- Sqlite3 to connect to the database"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import numpy as np\n",
 37 |     "import pandas as pd\n",
 38 |     "import matplotlib.pyplot as plt\n",
 39 |     "import psycopg2\n",
 40 |     "%matplotlib inline"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### Connect to the database"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "markdown",
 52 |    "metadata": {},
 53 |    "source": [
 54 |     "- We can use the sqlite3 library to connect to the MIMIC database\n",
 55 |     "- Once the connection is established, we'll run a simple SQL query."
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Connect to the MIMIC database\n",
 67 |     "try: \n",
 68 |     "    conn = psycopg2.connect(\"dbname='mimic' user='tompollard' host='localhost' password='postgres'\")\n",
 69 |     "except: \n",
 70 |     "    print('meh')"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "collapsed": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "# Create our test query\n",
 82 |     "test_query = \"\"\"\n",
 83 |     "SELECT subject_id, hadm_id, admittime, dischtime, diagnosis, admission_type, deathtime, discharge_location\n",
 84 |     "FROM mimiciii.admissions;\n",
 85 |     "\"\"\""
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {
 92 |     "collapsed": false
 93 |    },
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "# Run the query and assign the results to a variable\n",
 97 |     "test = pd.read_sql_query(test_query,conn)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "metadata": {
104 |     "collapsed": false
105 |    },
106 |    "outputs": [],
107 |    "source": [
108 |     "# Display the first few rows\n",
109 |     "test"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "metadata": {
116 |     "collapsed": true
117 |    },
118 |    "outputs": [],
119 |    "source": []
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "### Load the chartevents data"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "- The chartevents table contains data charted at the patient bedside. It includes variables such as heart rate, respiratory rate, temperature, and so on.\n",
133 |     "- We'll begin by loading the chartevents data for a single patient."
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {
140 |     "collapsed": false
141 |    },
142 |    "outputs": [],
143 |    "source": [
144 |     "query = \"\"\"\n",
145 |     "SELECT de.icustay_id\n",
146 |     "  , EXTRACT(EPOCH FROM de.charttime-ie.intime)/3600/24 as HOURS\n",
147 |     "  , di.label\n",
148 |     "  , de.value\n",
149 |     "  , de.valuenum\n",
150 |     "  , de.uom\n",
151 |     "FROM mimiciii.chartevents de\n",
152 |     "INNER join mimiciii.d_items di\n",
153 |     "ON de.itemid = di.itemid\n",
154 |     "INNER join mimiciii.icustays ie\n",
155 |     "ON de.icustay_id = ie.icustay_id\n",
156 |     "WHERE de.icustay_id = 236942\n",
157 |     "ORDER BY charttime;\n",
158 |     "\"\"\"\n",
159 |     "\n",
160 |     "ce = pd.read_sql_query(query,conn)\n",
161 |     "\n",
162 |     "\n",
163 |     "# OPTION 2: load chartevents from a CSV file\n",
164 |     "# ce = pd.read_csv('data/example_chartevents.csv', index_col='HOURSSINCEADMISSION')"
165 |    ]
166 |   },
167 |   {
168 |    "cell_type": "code",
169 |    "execution_count": null,
170 |    "metadata": {
171 |     "collapsed": false
172 |    },
173 |    "outputs": [],
174 |    "source": [
175 |     "# Preview the data\n",
176 |     "# Use 'head' to limit the number of rows returned\n",
177 |     "ce.head()"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "### Review the patient's heart rate"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "- We can select individual columns using the column name. \n",
192 |     "- For example, if we want to select just the label column, we write **```ce.LABEL```** or alternatively **```ce['LABEL']```**"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {
199 |     "collapsed": false
200 |    },
201 |    "outputs": [],
202 |    "source": [
203 |     "# Select a single column\n",
204 |     "ce['label'].head()"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "- In a similar way, we can select rows from data using indexes. \n",
212 |     "- For example, to select rows where the label is equal to 'Heart Rate', we would create an index using **```[ce.LABEL=='Heart Rate']```** "
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {
219 |     "collapsed": false
220 |    },
221 |    "outputs": [],
222 |    "source": [
223 |     "# Select just the heart rate rows using an index\n",
224 |     "ce.label.unique()"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "### Plot 1: How did the patients heart rate change over time?"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "markdown",
236 |    "metadata": {},
237 |    "source": [
238 |     "- Using the methods described above to select our data of interest, we can create our x and y axis values to create a time series plot of heart rate."
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {
245 |     "collapsed": false
246 |    },
247 |    "outputs": [],
248 |    "source": [
249 |     "# Which time stamps have a corresponding heart rate measurement?\n",
250 |     "print ce.index[ce.label=='Heart Rate']"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {
257 |     "collapsed": false,
258 |     "scrolled": true
259 |    },
260 |    "outputs": [],
261 |    "source": [
262 |     "# Set x equal to the times\n",
263 |     "x_hr = ce.hours[ce.label=='Heart Rate']\n",
264 |     "\n",
265 |     "# Set y equal to the heart rates\n",
266 |     "y_hr = ce.valuenum[ce.label=='Heart Rate']\n",
267 |     "\n",
268 |     "# Plot time against heart rate\n",
269 |     "plt.figure(figsize=(14, 6))\n",
270 |     "plt.plot(x_hr,y_hr)\n",
271 |     "\n",
272 |     "\n",
273 |     "plt.xlabel('Time since admission, hours',fontsize=16)\n",
274 |     "plt.ylabel('Heart rate',fontsize=16)\n",
275 |     "plt.title('Heart rate over time from admission to the intensive care unit',fontsize=16)"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "markdown",
280 |    "metadata": {},
281 |    "source": [
282 |     "### Task 1\n",
283 |     "\n",
284 |     "* What is happening to this patient's heart rate?\n",
285 |     "* Plot respiratory rate over time for the patient.\n",
286 |     "* Is there anything unusual about the patient's respiratory rate?\n"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": null,
292 |    "metadata": {
293 |     "collapsed": true
294 |    },
295 |    "outputs": [],
296 |    "source": [
297 |     "# Exercise 1 here\n",
298 |     "\n"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "markdown",
303 |    "metadata": {},
304 |    "source": [
305 |     "### Plot 2: Did the patient's vital signs breach any alarm thresholds?"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "markdown",
310 |    "metadata": {},
311 |    "source": [
312 |     "- Alarm systems in the intensive care unit are commonly based on high and low thresholds defined by the carer.\n",
313 |     "- False alarms are often a problem and so thresholds may be set arbitrarily to reduce alarms.\n",
314 |     "- As a result, alarm settings carry limited information."
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "code",
319 |    "execution_count": null,
320 |    "metadata": {
321 |     "collapsed": false
322 |    },
323 |    "outputs": [],
324 |    "source": [
325 |     "plt.figure(figsize=(14, 6))\n",
326 |     "\n",
327 |     "plt.plot(ce.hours[ce.label=='Respiratory Rate'], \n",
328 |     "         ce.valuenum[ce.label=='Respiratory Rate'],\n",
329 |     "         'k+-', markersize=2, linewidth=1)\n",
330 |     "\n",
331 |     "plt.plot(ce.hours[ce.label=='Resp Alarm - High'], \n",
332 |     "         ce.valuenum[ce.label=='Resp Alarm - High'],\n",
333 |     "         'm--')\n",
334 |     "\n",
335 |     "plt.plot(ce.hours[ce.label=='Resp Alarm - Low'], \n",
336 |     "         ce.valuenum[ce.label=='Resp Alarm - Low'],\n",
337 |     "         'm--')\n",
338 |     "\n",
339 |     "plt.xlabel('Time',fontsize=16)\n",
340 |     "plt.ylabel('Respiratory rate',fontsize=16)\n",
341 |     "plt.title('Respiratory rate since admission, with upper and lower alarm thresholds',fontsize=16)\n",
342 |     "plt.ylim(0,55)\n"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "markdown",
347 |    "metadata": {},
348 |    "source": [
349 |     "### Task 2\n",
350 |     "\n",
351 |     "- Based on the data, does it look like the alarms would have triggered for this patient?\n"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "markdown",
356 |    "metadata": {},
357 |    "source": [
358 |     "### Plot 3: What is patient's level of consciousness?"
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "markdown",
363 |    "metadata": {},
364 |    "source": [
365 |     "- Glasgow Coma Scale (GCS) is a measure of consciousness.\n",
366 |     "- It is commonly used for monitoring patients in the intensive care unit. \n",
367 |     "- It consists of three components: eye response; verbal response; motor response."
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "code",
372 |    "execution_count": null,
373 |    "metadata": {
374 |     "collapsed": false
375 |    },
376 |    "outputs": [],
377 |    "source": [
378 |     "# Display the first few rows of the GCS eye response data\n",
379 |     "ce[ce.label=='GCS - Eye Opening'].head()"
380 |    ]
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "execution_count": null,
385 |    "metadata": {
386 |     "collapsed": false
387 |    },
388 |    "outputs": [],
389 |    "source": [
390 |     "# Prepare the size of the figure\n",
391 |     "plt.figure(figsize=(18, 12))\n",
392 |     "\n",
393 |     "# Set x equal to the times\n",
394 |     "x_hr = ce.hours[ce.label=='Heart Rate']\n",
395 |     "\n",
396 |     "# Set y equal to the heart rates\n",
397 |     "y_hr = ce.valuenum[ce.label=='Heart Rate']\n",
398 |     "\n",
399 |     "plt.plot(x_hr,y_hr,'r-',label='Heart rate')\n",
400 |     "\n",
401 |     "plt.plot(ce.hours[ce.label=='Heart rate Alarm - High'], \n",
402 |     "         ce.valuenum[ce.label=='Heart rate Alarm - High'],\n",
403 |     "         'm--')\n",
404 |     "\n",
405 |     "plt.plot(ce.hours[ce.label=='Heart rate Alarm - Low'], \n",
406 |     "         ce.valuenum[ce.label=='Heart rate Alarm - Low'],\n",
407 |     "         'm--', label='Alarm threshold')\n",
408 |     "\n",
409 |     "plt.plot(ce.hours[ce.label=='Respiratory Rate'], \n",
410 |     "         ce.valuenum[ce.label=='Respiratory Rate'],\n",
411 |     "         'b-', markersize=6,label='Respiratory rate')\n",
412 |     "\n",
413 |     "plt.plot(ce.hours[ce.label=='Resp Alarm - High'], \n",
414 |     "         ce.valuenum[ce.label=='Resp Alarm - High'],\n",
415 |     "         'm--')\n",
416 |     "\n",
417 |     "plt.plot(ce.hours[ce.label=='Resp Alarm - Low'], \n",
418 |     "         ce.valuenum[ce.label=='Resp Alarm - Low'],\n",
419 |     "         'm--')\n",
420 |     "\n",
421 |     "# Add a text label to the y-axis\n",
422 |     "plt.text(-3,155,'GCS - Eye Opening',fontsize=14)\n",
423 |     "plt.text(-3,150,'GCS - Motor Response',fontsize=14)\n",
424 |     "plt.text(-3,145,'GCS - Verbal Response',fontsize=14)   \n",
425 |     "\n",
426 |     "# Iterate over list of GCS labels, plotting around 1 in 10 to avoid overlap\n",
427 |     "for i, txt in enumerate(ce.value[ce.label=='GCS - Eye Opening'].values):\n",
428 |     "    if np.mod(i,14)==0 and i < 75:\n",
429 |     "        plt.annotate(txt, (ce.hours[ce.label=='GCS - Eye Opening'].values[i],155),fontsize=14)\n",
430 |     "        \n",
431 |     "for i, txt in enumerate(ce.value[ce.label=='GCS - Motor Response'].values):\n",
432 |     "    if np.mod(i,14)==0 and i < 75:\n",
433 |     "        plt.annotate(txt, (ce.hours[ce.label=='GCS - Motor Response'].values[i],150),fontsize=14)\n",
434 |     "\n",
435 |     "for i, txt in enumerate(ce.value[ce.label=='GCS - Verbal Response'].values):\n",
436 |     "    if np.mod(i,14)==0 and i < 75:\n",
437 |     "        plt.annotate(txt, (ce.hours[ce.label=='GCS - Verbal Response'].values[i],145),fontsize=14)\n",
438 |     "\n",
439 |     "plt.title('Vital signs and Glasgow Coma Scale since admission',fontsize=18)\n",
440 |     "\n",
441 |     "plt.xlabel('Time (hours)',fontsize=18)\n",
442 |     "plt.ylabel('Vital signs',fontsize=18)\n",
443 |     "plt.legend(loc=1)\n",
444 |     "plt.ylim(10,180)\n"
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "markdown",
449 |    "metadata": {},
450 |    "source": [
451 |     "### Task 3\n",
452 |     "\n",
453 |     "- How is the patient's consciousness changing over time?"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "markdown",
458 |    "metadata": {},
459 |    "source": [
460 |     "# Stop here..."
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "markdown",
465 |    "metadata": {},
466 |    "source": [
467 |     "### Plot 4: What other data do we have on the patient?"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "markdown",
472 |    "metadata": {},
473 |    "source": [
474 |     "- Using Pandas 'read_csv function' again, we'll now load the outputevents data - this table contains all information about patient outputs (urine output, drains, dialysis)."
475 |    ]
476 |   },
477 |   {
478 |    "cell_type": "code",
479 |    "execution_count": null,
480 |    "metadata": {
481 |     "collapsed": false
482 |    },
483 |    "outputs": [],
484 |    "source": [
485 |     "# OPTION 1: load outputs from the patient\n",
486 |     "query = \"\"\"\n",
487 |     "select de.icustay_id\n",
488 |     "  , EXTRACT(EPOCH FROM de.charttime-ie.intime)/3600 as HOURS\n",
489 |     "  , di.label\n",
490 |     "  , de.value\n",
491 |     "  , de.valueuom\n",
492 |     "from mimiciii.outputevents de \n",
493 |     "inner join mimiciii.icustays ie\n",
494 |     "  on de.icustay_id = ie.icustay_id\n",
495 |     "inner join mimiciii.d_items di\n",
496 |     "  on de.itemid = di.itemid\n",
497 |     "where de.subject_id = 49205\n",
498 |     "order by charttime;\n",
499 |     "\"\"\"\n",
500 |     "\n",
501 |     "oe = pd.read_sql_query(query,conn)"
502 |    ]
503 |   },
504 |   {
505 |    "cell_type": "code",
506 |    "execution_count": null,
507 |    "metadata": {
508 |     "collapsed": false
509 |    },
510 |    "outputs": [],
511 |    "source": [
512 |     "oe.head()"
513 |    ]
514 |   },
515 |   {
516 |    "cell_type": "code",
517 |    "execution_count": null,
518 |    "metadata": {
519 |     "collapsed": false
520 |    },
521 |    "outputs": [],
522 |    "source": [
523 |     "plt.figure(figsize=(14, 10))\n",
524 |     "\n",
525 |     "plt.figure(figsize=(14, 6))\n",
526 |     "plt.title('Fluid output over time')\n",
527 |     "\n",
528 |     "plt.plot(oe.hours, \n",
529 |     "         oe.value.cumsum()/1000, \n",
530 |     "         'ro', markersize=8, label='Output volume, L')\n",
531 |     "\n",
532 |     "# plt.xlim(0,72)\n",
533 |     "# plt.ylim(0,10)\n",
534 |     "plt.legend()"
535 |    ]
536 |   },
537 |   {
538 |    "cell_type": "markdown",
539 |    "metadata": {},
540 |    "source": [
541 |     "To provide necessary context to this plot, it would help to include patient input data. This provides the necessary context to determine a patient's fluid balance - a key indicator in patient health."
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "code",
546 |    "execution_count": null,
547 |    "metadata": {
548 |     "collapsed": false
549 |    },
550 |    "outputs": [],
551 |    "source": [
552 |     "# OPTION 1: load inputs given to the patient (usually intravenously) using the database connection\n",
553 |     "query = \"\"\"\n",
554 |     "select de.icustay_id\n",
555 |     "  , EXTRACT(EPOCH FROM de.starttime-ie.intime)/3600 as HOURS_START\n",
556 |     "  , EXTRACT(EPOCH FROM de.endtime-ie.intime)/3600 as HOURS_END\n",
557 |     "  , de.linkorderid\n",
558 |     "  , di.label\n",
559 |     "  , de.amount\n",
560 |     "  , de.amountuom\n",
561 |     "  , de.rate\n",
562 |     "  , de.rateuom\n",
563 |     "from mimiciii.inputevents_mv de \n",
564 |     "inner join mimiciii.icustays ie\n",
565 |     "  on de.icustay_id = ie.icustay_id\n",
566 |     "inner join mimiciii.d_items di\n",
567 |     "  on de.itemid = di.itemid\n",
568 |     "where de.subject_id = 49205\n",
569 |     "order by endtime;\n",
570 |     "\"\"\"\n",
571 |     "\n",
572 |     "ie = pd.read_sql_query(query,conn)\n",
573 |     "\n",
574 |     "# # OPTION 2: load ioevents using the CSV file with endtime as the index\n",
575 |     "# ioe = pd.read_csv('inputevents.csv'\n",
576 |     "#                   ,header=None\n",
577 |     "#                   ,names=['subject_id','itemid','label','starttime','endtime','amount','amountuom','rate','rateuom']\n",
578 |     "#                   ,parse_dates=True)"
579 |    ]
580 |   },
581 |   {
582 |    "cell_type": "code",
583 |    "execution_count": null,
584 |    "metadata": {
585 |     "collapsed": false
586 |    },
587 |    "outputs": [],
588 |    "source": [
589 |     "ie.head()"
590 |    ]
591 |   },
592 |   {
593 |    "cell_type": "markdown",
594 |    "metadata": {},
595 |    "source": [
596 |     "Note that the column headers are different: we have \"HOURS_START\" and \"HOURS_END\". This is because inputs are administered over a fixed period of time."
597 |    ]
598 |   },
599 |   {
600 |    "cell_type": "code",
601 |    "execution_count": null,
602 |    "metadata": {
603 |     "collapsed": false
604 |    },
605 |    "outputs": [],
606 |    "source": [
607 |     "ie['label'].unique()"
608 |    ]
609 |   },
610 |   {
611 |    "cell_type": "code",
612 |    "execution_count": null,
613 |    "metadata": {
614 |     "collapsed": false
615 |    },
616 |    "outputs": [],
617 |    "source": [
618 |     "plt.figure(figsize=(14, 10))\n",
619 |     "\n",
620 |     "# Plot the cumulative input against the cumulative output\n",
621 |     "plt.plot(ie.hours_end[ie.amountuom=='mL'], \n",
622 |     "         ie.amount[ie.amountuom=='mL'].cumsum()/1000, \n",
623 |     "         'go', markersize=8, label='Intake volume, L')\n",
624 |     "\n",
625 |     "plt.plot(oe.hours, \n",
626 |     "         oe.value.cumsum()/1000, \n",
627 |     "         'ro', markersize=8, label='Output volume, L')\n",
628 |     "\n",
629 |     "plt.title('Fluid balance over time',fontsize=16)\n",
630 |     "plt.xlabel('Hours',fontsize=16)\n",
631 |     "plt.ylabel('Volume (litres)',fontsize=16)\n",
632 |     "# plt.ylim(0,38)\n",
633 |     "plt.legend()"
634 |    ]
635 |   },
636 |   {
637 |    "cell_type": "markdown",
638 |    "metadata": {
639 |     "collapsed": true
640 |    },
641 |    "source": [
642 |     "As the plot shows, the patient's intake tends to be above their output (as one would expect!) - but there are periods where they are almost one to one. One of the biggest challenges of working with ICU data is that context is everything - let's look at a treatment (lasix) that we know will affect this graph."
643 |    ]
644 |   },
645 |   {
646 |    "cell_type": "code",
647 |    "execution_count": null,
648 |    "metadata": {
649 |     "collapsed": false
650 |    },
651 |    "outputs": [],
652 |    "source": [
653 |     "plt.figure(figsize=(14, 10))\n",
654 |     "\n",
655 |     "# Plot the cumulative input against the cumulative output\n",
656 |     "plt.plot(ie.hours_end[ie.amountuom=='mL'], \n",
657 |     "         ie.amount[ie.amountuom=='mL'].cumsum()/1000, \n",
658 |     "         'go', markersize=8, label='Intake volume, L')\n",
659 |     "\n",
660 |     "plt.plot(oe.hours, \n",
661 |     "         oe.value.cumsum()/1000, \n",
662 |     "         'ro', markersize=8, label='Output volume, L')\n",
663 |     "\n",
664 |     "# example on getting two columns from a dataframe: ie[['HOURS_START','HOURS_END']].head()\n",
665 |     "\n",
666 |     "for i, idx in enumerate(ie.index[ie.label=='Furosemide (Lasix)']):\n",
667 |     "    plt.plot([ie.hours_start[ie.label=='Furosemide (Lasix)'][idx],\n",
668 |     "             ie.hours_end[ie.label=='Furosemide (Lasix)'][idx]],\n",
669 |     "            [ie.rate[ie.label=='Furosemide (Lasix)'][idx],\n",
670 |     "             ie.rate[ie.label=='Furosemide (Lasix)'][idx]],\n",
671 |     "            'b-',linewidth=4)\n",
672 |     "    \n",
673 |     "\n",
674 |     "plt.title('Fluid balance over time',fontsize=16)\n",
675 |     "plt.xlabel('Hours',fontsize=16)\n",
676 |     "plt.ylabel('Volume (litres)',fontsize=16)\n",
677 |     "# plt.ylim(0,38)\n",
678 |     "plt.legend()\n"
679 |    ]
680 |   },
681 |   {
682 |    "cell_type": "code",
683 |    "execution_count": null,
684 |    "metadata": {
685 |     "collapsed": false
686 |    },
687 |    "outputs": [],
688 |    "source": [
689 |     "ie['label'].unique()"
690 |    ]
691 |   },
692 |   {
693 |    "cell_type": "markdown",
694 |    "metadata": {},
695 |    "source": [
696 |     "### Exercise 2\n",
697 |     "\n",
698 |     "* Plot the alarms for the mean arterial pressure ('```Arterial Blood Pressure mean```')\n",
699 |     "* HINT: you can use ```ce.LABEL.unique()``` to find a list of variable names\n",
700 |     "* Were the alarm thresholds breached?"
701 |    ]
702 |   },
703 |   {
704 |    "cell_type": "code",
705 |    "execution_count": null,
706 |    "metadata": {
707 |     "collapsed": false
708 |    },
709 |    "outputs": [],
710 |    "source": [
711 |     "# Exercise 2 here\n",
712 |     "\n",
713 |     "\n"
714 |    ]
715 |   },
716 |   {
717 |    "cell_type": "markdown",
718 |    "metadata": {},
719 |    "source": [
720 |     "### Plot 3: Were the patient's other vital signs stable?"
721 |    ]
722 |   },
723 |   {
724 |    "cell_type": "code",
725 |    "execution_count": null,
726 |    "metadata": {
727 |     "collapsed": false
728 |    },
729 |    "outputs": [],
730 |    "source": [
731 |     "plt.figure(figsize=(14, 10))\n",
732 |     "\n",
733 |     "plt.plot(ce.index[ce.label=='Heart Rate'], \n",
734 |     "         ce.valuenum[ce.label=='Heart Rate'],\n",
735 |     "         'rx', markersize=8, label='HR')\n",
736 |     "\n",
737 |     "plt.plot(ce.index[ce.label=='O2 saturation pulseoxymetry'], \n",
738 |     "         ce.valuenum[ce.label=='O2 saturation pulseoxymetry'], \n",
739 |     "         'g.', markersize=8, label='O2')\n",
740 |     "\n",
741 |     "plt.plot(ce.index[ce.label=='Arterial Blood Pressure mean'], \n",
742 |     "         ce.valuenum[ce.label=='Arterial Blood Pressure mean'], \n",
743 |     "         'bv', markersize=8, label='MAP')\n",
744 |     "\n",
745 |     "plt.plot(ce.index[ce.label=='Respiratory Rate'], \n",
746 |     "         ce.valuenum[ce.label=='Respiratory Rate'], \n",
747 |     "         'k+', markersize=8, label='RR')\n",
748 |     "\n",
749 |     "plt.title('Vital signs over time from admission')\n",
750 |     "plt.ylim(0,130)\n",
751 |     "plt.legend()"
752 |    ]
753 |   },
754 |   {
755 |    "cell_type": "markdown",
756 |    "metadata": {},
757 |    "source": [
758 |     "### Plot 5: Laboratory measurements"
759 |    ]
760 |   },
761 |   {
762 |    "cell_type": "markdown",
763 |    "metadata": {},
764 |    "source": [
765 |     "Using Pandas 'read_csv function' again, we'll now load the labevents data.\n",
766 |     "This data corresponds to measurements made in a laboratory - usually on a sample of patient blood. "
767 |    ]
768 |   },
769 |   {
770 |    "cell_type": "code",
771 |    "execution_count": null,
772 |    "metadata": {
773 |     "collapsed": false
774 |    },
775 |    "outputs": [],
776 |    "source": [
777 |     "# OPTION 1: load labevents data using the database connection\n",
778 |     "query = \"\"\"\n",
779 |     "SELECT de.subject_id\n",
780 |     "  , de.charttime\n",
781 |     "  , di.label, de.value, de.valuenum\n",
782 |     "  , de.uom\n",
783 |     "FROM mimiciii.labevents de\n",
784 |     "INNER JOIN mimiciii.d_labitems di\n",
785 |     "  ON de.itemid = di.itemid\n",
786 |     "where de.subject_id = 49205\n",
787 |     "\"\"\"\n",
788 |     "\n",
789 |     "le = pd.read_sql_query(query,conn)\n",
790 |     "\n",
791 |     "# OPTION 2: load labevents from the CSV file\n",
792 |     "# le = pd.read_csv('data/example_labevents.csv', index_col='HOURSSINCEADMISSION')"
793 |    ]
794 |   },
795 |   {
796 |    "cell_type": "code",
797 |    "execution_count": null,
798 |    "metadata": {
799 |     "collapsed": false
800 |    },
801 |    "outputs": [],
802 |    "source": [
803 |     "# preview the labevents data\n",
804 |     "le.head()"
805 |    ]
806 |   },
807 |   {
808 |    "cell_type": "code",
809 |    "execution_count": null,
810 |    "metadata": {
811 |     "collapsed": false
812 |    },
813 |    "outputs": [],
814 |    "source": [
815 |     "# preview the ioevents data\n",
816 |     "le[le.label=='HEMOGLOBIN']"
817 |    ]
818 |   },
819 |   {
820 |    "cell_type": "code",
821 |    "execution_count": null,
822 |    "metadata": {
823 |     "collapsed": false
824 |    },
825 |    "outputs": [],
826 |    "source": [
827 |     "plt.figure(figsize=(14, 10))\n",
828 |     "\n",
829 |     "plt.plot(le.index[le.label=='HEMATOCRIT'], \n",
830 |     "         le.valuenum[le.label=='HEMATOCRIT'], \n",
831 |     "         'go', markersize=6, label='Haematocrit')\n",
832 |     "\n",
833 |     "plt.plot(le.index[le.label=='HEMOGLOBIN'], \n",
834 |     "         le.valuenum[le.label=='HEMOGLOBIN'], \n",
835 |     "         'bv', markersize=8, label='Hemoglobin')\n",
836 |     "\n",
837 |     "plt.title('Laboratory measurements over time from admission')\n",
838 |     "plt.ylim(0,38)\n",
839 |     "plt.legend()"
840 |    ]
841 |   }
842 |  ],
843 |  "metadata": {
844 |   "kernelspec": {
845 |    "display_name": "Python 2",
846 |    "language": "python",
847 |    "name": "python2"
848 |   },
849 |   "language_info": {
850 |    "codemirror_mode": {
851 |     "name": "ipython",
852 |     "version": 2
853 |    },
854 |    "file_extension": ".py",
855 |    "mimetype": "text/x-python",
856 |    "name": "python",
857 |    "nbconvert_exporter": "python",
858 |    "pygments_lexer": "ipython2",
859 |    "version": "2.7.10"
860 |   }
861 |  },
862 |  "nbformat": 4,
863 |  "nbformat_minor": 0
864 | }
865 | 


--------------------------------------------------------------------------------