├── .gitignore ├── LICENSE ├── README.md ├── data └── mimicdata.sqlite ├── example-patient ├── example-patient-matlab.ipynb ├── example_patient_matlab.m ├── example_patient_matlab_ce.m ├── example_patient_matlab_ie.m ├── example_patient_matlab_le.m ├── expt-query-1.sql ├── expt-query-2.sql ├── expt-query-3.sql ├── expt-query-4.sql ├── expt-query-to-csv.sql └── mlcc1_example_patient.m ├── installation └── sqlite-manager │ └── sqlite_manager-0.8.3-tb+sm+fx.xpi ├── intro_to_mimic ├── 00-query-mimic.md ├── 01-example-patient-heart-failure.ipynb └── MozFest2015.key ├── mlcc ├── etc │ ├── calcRoc.m │ └── makeQuery.m ├── lab1-data-extraction │ ├── mlcc-query-1.sql │ ├── mlcc1-problem-set-solutions-ICUSTAYID.sql │ ├── mlcc1-problem-set-solutions.sql │ ├── mlcc1_introduction.ipynb │ └── mlcc1_introduction.m ├── lab2-intro-ml │ ├── README.md │ ├── mlcc2-query.sql │ ├── mlcc2_svm_workshop.ipynb │ └── mlcc2_svm_workshop.m └── lab4-mortality-prediction │ ├── matlab_postgres_connection.m │ ├── mlcc-extract-data.sql │ ├── mlcc-group-by-tutorial.sql │ ├── mlcc_mortality_prediction.ipynb │ └── mlcc_mortality_prediction.m ├── requirements.txt └── temp ├── 02-example-patient-sepsis.ipynb ├── 03-example-patient-ich.ipynb ├── 04-example-multiplepatients.ipynb └── 06-example-patient-psql.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # matlab temp files 2 | *.m~ 3 | 4 | # CSV files with example data 5 | example-patient/*.csv 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *,cover 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | 59 | # Sphinx documentation 60 | docs/_build/ 61 | 62 | # PyBuilder 63 | target/ 64 | 65 | # OSX .DS_Store 66 | .DS_Store 67 | 68 | # IPython notebook checkpoints 69 | .ipynb_checkpoints/ 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Tom Pollard 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MIMIC Critical Care Datathon 2 | 3 | These are training materials for the MIMIC Critical Care Database. The package includes: 4 | 5 | - a demo version of MIMIC which can be quickly installed in the Firefox web browser with the SQLite Plugin. 6 | - some sample SQL queries which can be used to query the MIMIC data 7 | - an IPython Notebook which connects to the demo MIMIC database and allows analysis to be carried out using Python. 8 | 9 | ## What is MIMIC-III? 10 | 11 | MIMIC-III is a widely-used, freely available dataset developed by the MIT Lab for Computational Physiology, comprising deidentified health data associated with >40,000 critical care patients. It includes demographics, vital signs, laboratory tests, medications, and more. Details are available on the MIMIC website: https://mimic.physionet.org/ 12 | 13 | ## Workshop overview 14 | 15 | During the workshop, you will: 16 | 17 | - Learn about MIMIC-III, the publicly accessible critical care database 18 | - Create a local version of MIMIC-III with a small sample of patients using the Firefox SQLite Plugin 19 | - Explore the patient data using SQL 20 | - Plot and analyse the data using Python 21 | - Get inspiration for future research projects 22 | 23 | ## Downloading the materials 24 | 25 | If you are familiar with git, please clone this repository. If not, click the 26 | 'Download ZIP' button on the right and then unzip the materials onto your 27 | computer. 28 | 29 | ## Installing a demo version of MIMIC-III with SQLite Manager 30 | 31 | To create the database on your computer, you will need the Firefox SQLite Manager Add-on. Open Firefox, select "Add-ons" from the Tools menu, and then install SQLite Manager. To create the demo database, select "connect to database" from the menu and choose the data/mimicdata.sqlite file. 32 | 33 | ## Analysing the data using IPython Notebook 34 | 35 | To analyse the data using IPython Notebook: 36 | 37 | - If you already have Python and the Pip package manager, run ```pip install ipython``` 38 | - If you are new to Python, we suggest installing the Anaconda package from https://www.continuum.io/downloads. Then run ```conda update ipython```. 39 | 40 | Once IPython is installed, run ```ipython notebook``` from the command line to open IPython Notebook, then open one of the notebook (.ipynb) files (for example, 01-example-patient-heart-failure.ipynb). 41 | 42 | ## Getting access to the full MIMIC-III dataset 43 | 44 | If after this workshop you would like to gain access to the full MIMIC-III dataset, which contains rich data for over 40,000 patients, please see: https://mimic.physionet.org/gettingstarted/access/ 45 | 46 | ## Help to improve the workshop 47 | 48 | We hope to improve the workshop contents over time and we welcome your contributions. Please raise an issue and/or submit a pull request! 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /data/mimicdata.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIT-LCP/mimic-workshop/b27eee438a1f62d909dd30d1d458d3516f32b276/data/mimicdata.sqlite -------------------------------------------------------------------------------- /example-patient/example_patient_matlab.m: -------------------------------------------------------------------------------- 1 | 2 | %% Load chartevents for the patient 3 | % load the various files 4 | fp = fopen('example-patient-chartevents.csv'); 5 | header_ce = fgetl(fp); 6 | 7 | % convert header from a string to a cell array of strings 8 | header_ce = regexp(header_ce,',','split'); 9 | 10 | frmt = '%f%f%f%s%f%q%q'; 11 | data_ce = textscan(fp,frmt,'delimiter',','); 12 | fclose(fp); 13 | 14 | % Let's extract the numeric data only into data_ce - and put string data into data_ce_str 15 | idxNumeric = cellfun(@isnumeric, data_ce); 16 | data_ce_str = [data_ce{~idxNumeric}]; 17 | header_ce_str = header_ce(~idxNumeric); 18 | data_ce = [data_ce{idxNumeric}]; 19 | header_ce = header_ce(idxNumeric); 20 | 21 | % here's a preview of the string data 22 | header_ce_str 23 | data_ce_str(1:5,:) 24 | 25 | % here's a preview of the numeric data ('\t' is a tab) 26 | fprintf('%8s\t',header_ce{:}); 27 | fprintf('\n') 28 | 29 | frmt = '%8g\t%8.2f\t%8g\t%8.2f'; 30 | for n=1:5 31 | fprintf(frmt,data_ce(n,:)); 32 | fprintf('\n'); 33 | end 34 | 35 | %% Load the other events tables 36 | % Time to load in the rest of the data! 37 | % LAB DATA 38 | frmt = '%f%f%f%s%f%q%q'; 39 | 40 | fp = fopen('example-patient-labevents.csv'); 41 | header_le = fgetl(fp); 42 | header_le = regexp(header_le,',','split'); 43 | data_le = textscan(fp,frmt,'delimiter',','); 44 | fclose(fp); 45 | idxNumeric = cellfun(@isnumeric, data_le); 46 | data_le_str = [data_le{~idxNumeric}]; 47 | header_le_str = header_le(~idxNumeric); 48 | data_le = [data_le{idxNumeric}]; 49 | header_le = header_le(idxNumeric); 50 | 51 | % INPUT DATA 52 | frmt = '%f%f%f%f%f%q%f%q%f%q'; 53 | 54 | fp = fopen('example-patient-inputevents.csv'); 55 | header_ie = fgetl(fp); 56 | header_ie = regexp(header_ie,',','split'); 57 | data_ie = textscan(fp,frmt,'delimiter',','); 58 | fclose(fp); 59 | idxNumeric = cellfun(@isnumeric, data_ie); 60 | data_ie_str = [data_ie{~idxNumeric}]; 61 | header_ie_str = header_ie(~idxNumeric); 62 | data_ie = [data_ie{idxNumeric}]; 63 | header_ie = header_ie(idxNumeric); 64 | 65 | 66 | % OUTPUT DATA 67 | frmt = '%f%f%f%f%q%q'; 68 | 69 | fp = fopen('example-patient-outputevents.csv'); 70 | header_oe = fgetl(fp); 71 | header_oe = regexp(header_oe,',','split'); 72 | data_oe = textscan(fp,frmt,'delimiter',','); 73 | fclose(fp); 74 | idxNumeric = cellfun(@isnumeric, data_oe); 75 | data_oe_str = [data_oe{~idxNumeric}]; 76 | header_oe_str = header_oe(~idxNumeric); 77 | data_oe = [data_oe{idxNumeric}]; 78 | header_oe = header_oe(idxNumeric); 79 | 80 | % PROCEDURE DATA 81 | frmt = '%f%f%f%f%f%q%f%q'; 82 | 83 | fp = fopen('example-patient-procedureevents.csv'); 84 | header_pe = fgetl(fp); 85 | header_pe = regexp(header_pe,',','split'); 86 | data_pe = textscan(fp,frmt,'delimiter',','); 87 | fclose(fp); 88 | idxNumeric = cellfun(@isnumeric, data_pe); 89 | data_pe_str = [data_pe{~idxNumeric}]; 90 | header_pe_str = header_pe(~idxNumeric); 91 | data_pe = [data_pe{idxNumeric}]; 92 | header_pe = header_pe(idxNumeric); 93 | 94 | %% Initialize some plotting variables 95 | % Some variables used to make pretty plots 96 | col = [0.9047 0.1918 0.1988 97 | 0.2941 0.5447 0.7494 98 | 0.3718 0.7176 0.3612 99 | 1.0000 0.5482 0.1000 100 | 0.4550 0.4946 0.4722 101 | 0.6859 0.4035 0.2412 102 | 0.9718 0.5553 0.7741 103 | 0.5313 0.3359 0.6523]; 104 | marker = {'d','+','o','x','>','s','<','+','^'}; 105 | ms = repmat(8,1,numel(marker)); 106 | savefigflag=0; 107 | %% Plot the vital signs 108 | figure(1); clf; hold all; 109 | example_patient_matlab_ce; 110 | 111 | %% Plot the labs 112 | figure(1); clf; hold all; 113 | example_patient_matlab_le; 114 | 115 | %% add in IOEVENTS 116 | figure(1); clf; hold all; 117 | example_patient_matlab_ie; 118 | 119 | %% putting it all together 120 | figure(1); clf; 121 | 122 | subplot(3,1,1); hold all; 123 | example_patient_matlab_ce; 124 | subplot(3,1,2); hold all; 125 | example_patient_matlab_le; 126 | subplot(3,1,3); hold all; 127 | example_patient_matlab_ie; 128 | P_PrettyFigure(1); 129 | 130 | if savefigflag==1 131 | export_fig(1,'exampledata10.png','-transparent'); 132 | end 133 | -------------------------------------------------------------------------------- /example-patient/example_patient_matlab_ce.m: -------------------------------------------------------------------------------- 1 | 2 | lbl_plot = {'Arterial Blood Pressure mean','Heart Rate','O2 saturation pulseoxymetry','Respiratory Rate'}; 3 | % plot the values 4 | for k=1:numel(lbl_plot) 5 | idxPlot = ismember(data_ce_str(:,3), lbl_plot{k}); 6 | plot(data_ce(idxPlot,2), data_ce(idxPlot,4),marker{k},... 7 | 'Color',col(k,:), 'MarkerFaceColor',col(k,:), 'markersize',ms(k), 'linewidth',2); 8 | end 9 | 10 | 11 | set(gca,'XLim',[0,72],'YLim',[0,150]); 12 | set(gca,'YTick',0:25:150); 13 | 14 | xlabel('Hours since admission','FontSize',16); 15 | ylabel('Value of measurement','FontSize',16); 16 | 17 | %=== add in the legend 18 | legend_str = {'Mean arterial blood pressure','Heart Rate','Peripheral oxygen saturation','Respiratory Rate'}; 19 | 20 | % dummy figure to provide the legend 21 | hleg=legend(legend_str,'Location','NorthEast'); 22 | set(gca,'FontSize',16); 23 | grid on; 24 | 25 | %% add in GCS 26 | lbl_keep = {'GCS - Eye Opening';'GCS - Motor Response';'GCS - Verbal Response'}; 27 | lbl_plot = (135:20:175)+2; 28 | 29 | % plot the values 30 | for k=1:numel(lbl_keep) 31 | idxPlot = ismember(data_ce_str(:,3), lbl_keep{k}); 32 | data_plot = data_ce_str(idxPlot,1); 33 | time_plot = data_ce(idxPlot,2); 34 | 35 | idxM = find(time_plot < 72); 36 | % idxM = idxM(1:4:end); 37 | idxM = idxM(:)'; 38 | for m=idxM 39 | text(time_plot(m),... 40 | lbl_plot(k),... % y-axis location, defined above 41 | data_plot{m},... 42 | 'FontName','Helvetica','FontSize',14); 43 | end 44 | end 45 | lbl_keep = strrep(lbl_keep,'GCS - ',''); 46 | 47 | % add the GCS stuff to the y-axis 48 | set(gca,'YLim',[0,200],'YTick',[0:50:100,135,155,175,200],... 49 | 'YTickLabel',{'0','50','100',lbl_keep{1},lbl_keep{2},lbl_keep{3},'200'}); 50 | if savefigflag==1 51 | export_fig(1,'exampledata3.png','-transparent'); 52 | end 53 | %% add in labs 54 | le_lbl = unique(data_le_str(:,3)); 55 | 56 | lbl_keep = {'CREATININE'; 57 | 'HEMOGLOBIN'}; 58 | 59 | 60 | % plot the values 61 | for k=1:numel(lbl_keep) 62 | idxPlot = ismember(data_le_str(:,3), lbl_keep{k}); 63 | plot(data_le(idxPlot,2), data_le(idxPlot,4),marker{k+4},... 64 | 'Color',[0,0,0], 'markerfacecolor',col(k+4,:),... 65 | 'markersize',12,'linewidth',2); 66 | end 67 | 68 | legend_str = legend_str(:)'; 69 | legend_str = [legend_str,lbl_keep']; 70 | legend(legend_str,'Location','NorthEast'); 71 | 72 | if savefigflag==1 73 | export_fig(1,'exampledata4.png','-transparent'); 74 | end -------------------------------------------------------------------------------- /example-patient/example_patient_matlab_ie.m: -------------------------------------------------------------------------------- 1 | %% Plot pain/sedation medication 2 | lbl1 = {'Midazolam (Versed)','Propofol','Fentanyl'}; 3 | for k=1:numel(lbl1) 4 | idxPlot = ismember(data_ie_str(:,3), lbl1{k}); 5 | 6 | % time start/stop 7 | time_plot = data_ie(idxPlot,2:3); 8 | 9 | % rate start/stop 10 | data_plot = data_ie(idxPlot,5:6); 11 | 12 | idxPlot = find(time_plot(:,1) < 72); % only plot drug infusions in first 24 hr 13 | idxPlot = idxPlot(:)'; % ensure it is a row vector for "for" loop 14 | for m=idxPlot 15 | % starting marker 16 | plot(time_plot(m,1), data_plot(m,2), '<',... 17 | 'color',col(k,:), 'markerfacecolor',col(k,:),... 18 | 'linewidth',3,'markersize',8,... 19 | 'HandleVisibility', 'off'); 20 | 21 | % ending marker 22 | plot(time_plot(m,2), data_plot(m,2), '>',... 23 | 'color',col(k,:), 'markerfacecolor',col(k,:),... 24 | 'linewidth',3,'markersize',8,... 25 | 'HandleVisibility', 'off'); 26 | 27 | % ensure the plot line only appears in the legend once 28 | if m==idxPlot(end) 29 | visib='on'; 30 | else 31 | visib='off'; 32 | end 33 | 34 | % connecting line 35 | plot(time_plot(m,1:2), repmat(data_plot(m,2),1,2), '-',... 36 | 'color',col(k,:), 'markerfacecolor',col(k,:),... 37 | 'linewidth',3,'markersize',8,... 38 | 'HandleVisibility', visib); 39 | end 40 | end 41 | 42 | legend_str = lbl1(:)'; 43 | legend(legend_str,'Location','NorthEast'); 44 | 45 | set(gca,'XLim',[0,72],'YLim',[0,200]); 46 | 47 | xlabel('Hours since admission','FontSize',16); 48 | ylabel('Value of measurement','FontSize',16); 49 | 50 | P_PrettyFigure(1); 51 | if savefigflag==1 52 | export_fig(1,'exampledata6.png','-transparent'); 53 | end 54 | 55 | % %% OR data 56 | % idxKeep = data_ie(:,2)<72; 57 | % ie_lbl = unique(data_ie_str(idxKeep,3)); 58 | % 59 | % lbl1 = {'OR Cryoprecipitate Intake'; 60 | % 'OR Crystalloid Intake';'OR FFP Intake'; 61 | % 'OR Packed RBC Intake';'OR Platelet Intake'}; 62 | % for k=1:numel(lbl1) 63 | % idxPlot = ismember(data_ie_str(:,3), lbl1{k}); 64 | % 65 | % % time start/stop 66 | % time_plot = data_ie(idxPlot,2:3); 67 | % 68 | % % rate start/stop 69 | % data_plot = data_ie(idxPlot,5:6); 70 | % 71 | % % for OR volumes, it's always a bolus over 1 minute 72 | % plot(time_plot(1,1), data_plot(1,1)/100, 's',... 73 | % 'color',[0,0,0], 'markerfacecolor',col(k+1,:),... 74 | % 'linewidth',3,'markersize',10); 75 | % end 76 | % 77 | % ylabel('OR blood (mL/100)'); 78 | % legend_str = [legend_str,lbl1(:)']; 79 | % legend(legend_str,'Location','NorthEast'); 80 | % 81 | % if savefigflag==1 82 | % export_fig(1,'exampledata7.png','-transparent'); 83 | % end 84 | %% 85 | lbl1 = {'LR'}; 86 | for k=1:numel(lbl1) 87 | idxPlot = ismember(data_ie_str(:,3), lbl1{k}); 88 | 89 | 90 | % time start/stop 91 | time_plot = data_ie(idxPlot,2:3); 92 | 93 | % rate start/stop 94 | data_plot = data_ie(idxPlot,5:6); 95 | 96 | M=3; 97 | for m=1:M 98 | % starting marker 99 | plot(time_plot(m,1), data_plot(m,2), '<',... 100 | 'color',col(k+7,:), 'markerfacecolor',col(k+7,:),... 101 | 'linewidth',3,'markersize',8,... 102 | 'HandleVisibility', 'off'); 103 | 104 | 105 | % ending marker 106 | plot(time_plot(m,2), data_plot(m,2), '>',... 107 | 'color',col(k+7,:), 'markerfacecolor',col(k+7,:),... 108 | 'linewidth',3,'markersize',8,... 109 | 'HandleVisibility', 'off'); 110 | 111 | % ensure the plot line only appears in the legend once 112 | if m==M 113 | visib='on'; 114 | else 115 | visib='off'; 116 | end 117 | 118 | % connecting line 119 | plot(time_plot(m,1:2), repmat(data_plot(m,2),1,2), '-',... 120 | 'color',col(k+7,:), 'markerfacecolor',col(k+7,:),... 121 | 'linewidth',3,'markersize',8,... 122 | 'HandleVisibility', visib); 123 | end 124 | 125 | %=== plot bolus at M=4 126 | m=4; 127 | plot(time_plot(m,1), data_plot(m,1)/10, 's',... 128 | 'color',col(k+7,:), 'markerfacecolor',col(k+7,:),... 129 | 'linewidth',3,'markersize',10); 130 | end 131 | 132 | legend_str = [legend_str,lbl1(:)',strcat(lbl1(:)',' Bolus')]; 133 | legend(legend_str,'Location','NorthEast'); 134 | 135 | 136 | if savefigflag==1 137 | export_fig(1,'exampledata8.png','-transparent'); 138 | end -------------------------------------------------------------------------------- /example-patient/example_patient_matlab_le.m: -------------------------------------------------------------------------------- 1 | %% Plot the labs 2 | le_lbl = unique(data_le_str(:,3)); 3 | marker = {'d','+','o','x','>','d','<','+','^'}; 4 | lbl_keep = {'CREATININE'; 5 | 'HEMOGLOBIN'; 6 | 'PCO2'; 7 | 'PO2'; 8 | 'LACTATE'}; 9 | 10 | 11 | % plot the values 12 | for k=1:numel(lbl_keep) 13 | idxPlot = ismember(data_le_str(:,3), lbl_keep{k}); 14 | data_plot = data_le(idxPlot,4); 15 | if ismember(lbl_keep{k},{'PO2','PCO2'})==1 16 | % convert to kPa 17 | data_plot = data_plot / 7.500617; 18 | end 19 | plot(data_le(idxPlot,2), data_plot, ['--' marker{k}],... 20 | 'Color',col(k,:), 'markerfacecolor',col(k,:),... 21 | 'markersize',12,'linewidth',2); 22 | end 23 | 24 | legend_str = lbl_keep(:)'; 25 | legend(lbl_keep,'Location','NorthEast'); 26 | 27 | set(gca,'XLim',[0,72],'YLim',[0,25]); 28 | 29 | xlabel('Hours since admission','FontSize',16); 30 | ylabel('Value of measurement','FontSize',16); 31 | 32 | P_PrettyFigure(1); 33 | if savefigflag==1 34 | 35 | legend(lbl_keep,'Location','NorthWest'); 36 | export_fig(1,'exampledata5.png','-transparent'); 37 | end -------------------------------------------------------------------------------- /example-patient/expt-query-1.sql: -------------------------------------------------------------------------------- 1 | select ie.icustay_id 2 | , di.label 3 | , round( (julianday(de.charttime) - julianday(ie.intime))*24, 4) as Hours 4 | , de.itemid 5 | , de.value 6 | , de.valuenum 7 | from icustays ie 8 | inner join chartevents de 9 | on ie.icustay_id = de.icustay_id 10 | inner join d_items di 11 | on de.itemid = di.itemid 12 | where ie.hadm_id = 103075 13 | order by charttime -------------------------------------------------------------------------------- /example-patient/expt-query-2.sql: -------------------------------------------------------------------------------- 1 | select ie.icustay_id 2 | , di.label 3 | , round( (julianday(de.charttime) - julianday(ie.intime))*24, 4) as Hours 4 | , de.itemid 5 | , de.value 6 | , de.valuenum 7 | from icustays ie 8 | inner join labevents de 9 | on de.hadm_id = ie.hadm_id 10 | inner join d_labitems di 11 | on de.itemid = di.itemid 12 | where de.hadm_id = 103075 13 | order by charttime -------------------------------------------------------------------------------- /example-patient/expt-query-3.sql: -------------------------------------------------------------------------------- 1 | select de.icustay_id 2 | , di.label 3 | , round( (julianday(de.charttime) - julianday(ie.intime))*24, 4) as HOURS 4 | , de.itemid 5 | , de.value 6 | , de.value as valuenum 7 | from icustays ie 8 | inner join outputevents de 9 | on de.icustay_id = ie.icustay_id 10 | inner join d_items di 11 | on de.itemid = di.itemid 12 | where de.hadm_id = 103075 13 | order by charttime -------------------------------------------------------------------------------- /example-patient/expt-query-4.sql: -------------------------------------------------------------------------------- 1 | select de.icustay_id 2 | , di.label 3 | , round( (julianday(de.charttime) - julianday(ie.intime))*24, 4) as HOURS 4 | , de.itemid 5 | , de.amount 6 | , de.amountuom 7 | , de.rate 8 | , de.rateuom 9 | from icustays ie 10 | inner join inputevents_cv de 11 | on de.icustay_id = ie.icustay_id 12 | inner join d_items di 13 | on de.itemid = di.itemid 14 | where de.hadm_id = 103075 15 | order by charttime -------------------------------------------------------------------------------- /example-patient/expt-query-to-csv.sql: -------------------------------------------------------------------------------- 1 | -- This script exports data for a single patient from a PostgreSQL instance of MIMIC-III to CSV. 2 | -- You may need to change the paths to match your local system. 3 | -- You may also need to set the PostgreSQL search path to the schema with MIMIC-III. 4 | 5 | -- This version extracts data for 6 | 7 | -- This script exports data for a single patient from a PostgreSQL instance of MIMIC-III to CSV. 8 | -- You may need to change the paths to match your local system. 9 | -- You may also need to set the PostgreSQL search path to the schema with MIMIC-III. 10 | 11 | -- CHARTED DATA 12 | Copy ( 13 | select ie.icustay_id 14 | , di.label 15 | , round(extract(EPOCH from (de.charttime-ie.intime)) :: NUMERIC / 360,4) as HOURS 16 | , de.itemid 17 | , de.value 18 | , de.valuenum 19 | from icustays ie 20 | inner join chartevents de 21 | on ie.icustay_id = de.icustay_id 22 | inner join d_items di 23 | on de.itemid = di.itemid 24 | where ie.hadm_id = 103075 25 | order by charttime 26 | ) To '/data/mimic3/example-patient-chartevents.csv' With CSV HEADER; 27 | 28 | 29 | -- LAB DATA 30 | Copy ( 31 | select ie.icustay_id 32 | , di.label 33 | , round(extract(EPOCH from (de.charttime-ie.intime)) :: NUMERIC / 360,4) as HOURS 34 | , de.itemid 35 | , de.value 36 | , de.valuenum 37 | from icustays ie 38 | inner join labevents de 39 | on de.hadm_id = ie.hadm_id 40 | inner join d_labitems di 41 | on de.itemid = di.itemid 42 | where de.hadm_id = 103075 43 | order by charttime 44 | ) To '/data/mimic3/example-patient-labevents.csv' With CSV HEADER; 45 | 46 | 47 | -- OUTPUT DATA 48 | Copy ( 49 | select de.icustay_id 50 | , di.label 51 | , round(extract(EPOCH from (de.charttime-ie.intime)) :: NUMERIC / 360,4) as HOURS 52 | , de.itemid 53 | , de.value 54 | , de.value as valuenum 55 | from icustays ie 56 | inner join outputevents de 57 | on de.icustay_id = ie.icustay_id 58 | inner join d_items di 59 | on de.itemid = di.itemid 60 | where de.hadm_id = 103075 61 | order by charttime 62 | ) To '/data/mimic3/example-patient-outputevents.csv' With CSV HEADER; 63 | 64 | 65 | -- INPUT DATA 66 | Copy ( 67 | select de.icustay_id 68 | , di.label 69 | , round(extract(EPOCH from (de.charttime-ie.intime)) :: NUMERIC / 360,4) as HOURS 70 | , de.itemid 71 | , de.amount 72 | , de.amountuom 73 | , de.rate 74 | , de.rateuom 75 | from icustays ie 76 | inner join inputevents_cv de 77 | on de.icustay_id = ie.icustay_id 78 | inner join d_items di 79 | on de.itemid = di.itemid 80 | where de.hadm_id = 103075 81 | order by charttime 82 | ) To '/data/mimic3/example-patient-inputevents.csv' With CSV HEADER; 83 | -------------------------------------------------------------------------------- /example-patient/mlcc1_example_patient.m: -------------------------------------------------------------------------------- 1 | %% Plot data for an example patient 2 | 3 | %% 1 - Initialize some plotting variables 4 | % Some variables used to make pretty plots 5 | col = [0.9047 0.1918 0.1988 6 | 0.2941 0.5447 0.7494 7 | 0.3718 0.7176 0.3612 8 | 1.0000 0.5482 0.1000 9 | 0.4550 0.4946 0.4722 10 | 0.6859 0.4035 0.2412 11 | 0.9718 0.5553 0.7741 12 | 0.5313 0.3359 0.6523]; 13 | 14 | col = repmat(col,2,1); 15 | col_fill = col; 16 | col(9:end,:) = 0; % when plotting > 8 items, we make the outline black 17 | 18 | marker = {'d','+','o','x','>','s','<','+','^'}; 19 | marker = repmat(marker,1,2); 20 | ms = 12; 21 | savefigflag=0; 22 | 23 | %% 2 - SQLite instructions 24 | % STEP 1: Tell Matlab where the driver is 25 | javaclasspath('sqlite-jdbc-3.8.11.2.jar') % use this for SQLite 26 | 27 | % STEP 2: Connect to the Database 28 | conn = database('','','',... 29 | 'org.sqlite.JDBC',['jdbc:sqlite:' pwd filesep 'data' filesep 'mimiciii_v1_3_mini.sqlite']); 30 | 31 | 32 | % Note: Amazon RDS instructions - will be slower as it is the full database 33 | % % STEP 1: Tell Matlab where the driver is 34 | % javaclasspath('postgresql-9.4.1207.jre6.jar') % use this for Amazon 35 | % 36 | % % STEP 2: Connect to the Database 37 | % conn = database('MIMIC','testuser','mitmlcctu','Vendor','sqlite',... 38 | % 'Server','.amazonaws.com',... 39 | % 'PortNumber',5432); 40 | 41 | 42 | %% 3 - Run the query to extract chartevents data 43 | query = makeQuery('expt-query-1.sql'); 44 | data_ce = fetch(conn,query); 45 | 46 | %% 4 - Plot patient vital signs 47 | figure(1); clf; hold all; 48 | 49 | lbl_plot = {'Arterial BP Mean',... 50 | 'Heart Rate',... 51 | 'SpO2',... 52 | 'Respiratory Rate'}; 53 | 54 | % loop through the above list of labels 55 | for k=1:numel(lbl_plot) 56 | % create an index for only the label we are interested in 57 | idxPlot = ismember(data_ce(:,2), lbl_plot{k}); 58 | 59 | % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value 60 | data_plot = cell2mat(data_ce(idxPlot,6)); 61 | time_plot = cell2mat(data_ce(idxPlot,3)); 62 | 63 | % plot the data for this label 64 | plot(time_plot, data_plot,... 65 | 'LineStyle','--', 'Marker',marker{k},... 66 | 'Color', col(k,:), 'MarkerFaceColor', col_fill(k,:),... 67 | 'markersize', ms, 'linewidth',2); 68 | end 69 | 70 | set(gca,'XLim',[0,72],'YLim',[0,150]); 71 | set(gca,'YTick',0:25:150); 72 | 73 | xlabel('Hours since ICU admission','FontSize',16); 74 | ylabel('Value of measurement','FontSize',16); 75 | 76 | % dummy figure to provide the legend 77 | hleg=legend(lbl_plot,'Location','NorthEast'); 78 | set(gca,'FontSize',16); 79 | grid on; 80 | 81 | %% 5 - What else could you add to the above plot? Add labels to lbl_plot. 82 | 83 | % here is a list of the available labels: 84 | unique(data_ce(:,2)) 85 | 86 | 87 | %% 6 - Extract lab values 88 | query = makeQuery('expt-query-2.sql'); 89 | data_le = fetch(conn,query); 90 | 91 | %% 7 - Plot lab values 92 | figure(1); clf; hold all; 93 | lbl_plot = {'CREATININE','HEMOGLOBIN','LACTATE'}; 94 | 95 | % plot the values 96 | for k=1:numel(lbl_plot) 97 | 98 | % create an index for only the label we are interested in 99 | idxPlot = ismember(data_le(:,2), lbl_plot{k}); 100 | 101 | % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value 102 | data_plot = cell2mat(data_le(idxPlot,6)); 103 | time_plot = cell2mat(data_le(idxPlot,3)); 104 | 105 | % plot the data for this label 106 | plot(time_plot, data_plot,... 107 | 'LineStyle','--','Marker',marker{k},... 108 | 'Color',col(k,:), 'markerfacecolor',col_fill(k,:),... 109 | 'markersize',ms,'linewidth',2); 110 | end 111 | legend(lbl_plot,'Location','NorthEast'); 112 | set(gca,'XLim',[0,72],'YLim',[0,25],'FontSize',14); 113 | grid on; 114 | 115 | xlabel('Hours since ICU admission','FontSize',16); 116 | ylabel('Value of measurement','FontSize',16); 117 | 118 | 119 | %% 8 - What else could you add to the above plot? Add labels to lbl_plot. 120 | 121 | % here is a list of the available labels: 122 | unique(data_le(:,2)) 123 | 124 | %% 9 - Extract output values 125 | query = makeQuery('expt-query-3.sql'); 126 | data_oe = fetch(conn,query); 127 | 128 | %% 10 - Plot the outputs 129 | figure(1); clf; hold all; 130 | lbl_plot = {'Urine Out Foley'}; 131 | for k=1:numel(lbl_plot) 132 | 133 | % create an index for only the label we are interested in 134 | idxPlot = ismember(data_oe(:,2), lbl_plot{k}); 135 | 136 | % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value 137 | data_plot = cell2mat(data_oe(idxPlot,6)); 138 | time_plot = cell2mat(data_oe(idxPlot,3)); 139 | 140 | plot(time_plot, data_plot,... 141 | 'LineStyle','--','Marker',marker{k},... 142 | 'color',col(k,:), 'markerfacecolor',col_fill(k,:),... 143 | 'linewidth',2,'markersize',ms); 144 | end 145 | 146 | legend(lbl_plot,'Location','NorthEast'); 147 | set(gca,'XLim',[0,72],'YLim',[0,1000],'FontSize',14); 148 | 149 | xlabel('Hours since ICU admission','FontSize',16); 150 | ylabel('Value of measurement','FontSize',16); 151 | grid on; 152 | 153 | %% 11 - What else could you add to the above plot? Add labels to lbl_plot. 154 | 155 | % here is a list of the available labels: 156 | unique(data_oe(:,2)) 157 | 158 | %% 12 - Extract input values 159 | query = makeQuery('expt-query-4.sql'); 160 | data_ie = fetch(conn,query); 161 | 162 | % this is a fix to replace empty cells with "NaN" 163 | % SQL represents missing values as empty, but MATLAB represents them as NaN 164 | data_ie(cellfun(@isempty, data_ie(:,5)),5) = {NaN}; 165 | data_ie(cellfun(@isempty, data_ie(:,7)),7) = {NaN}; 166 | %% 13 - Plot the inputs 167 | figure(1); clf; hold all; 168 | lbl_plot = {'Neosynephrine-k','Propofol'}; 169 | for k=1:numel(lbl_plot) 170 | 171 | % create an index for only the label we are interested in 172 | idxPlot = ismember(data_ie(:,2), lbl_plot{k}); 173 | 174 | % the 3rd column is the time 175 | % for inputs, the order is slightly different: 176 | % the 5th column is the VOLUME 177 | % the 7th column is the RATE 178 | data_plot = cell2mat(data_ie(idxPlot,7)); 179 | time_plot = cell2mat(data_ie(idxPlot,3)); 180 | 181 | plot(time_plot, data_plot,... 182 | 'LineStyle','--','Marker',marker{k},... 183 | 'color',col(k,:), 'markerfacecolor',col_fill(k,:),... 184 | 'linewidth',2,'markersize',ms); 185 | end 186 | 187 | legend(lbl_plot,'Location','NorthWest'); 188 | set(gca,'XLim',[0,72],'YLim',[0,100],'FontSize',14); 189 | 190 | grid on; 191 | 192 | xlabel('Hours since ICU admission','FontSize',16); 193 | ylabel('Value of measurement','FontSize',16); 194 | 195 | 196 | %% 14 - What else could you add to the above plot? Add labels to lbl_plot. 197 | 198 | % here is a list of the available labels: 199 | unique(data_ie(:,2)) 200 | 201 | 202 | %% 15 - Bring it all together 203 | lbl_ce = {'Arterial BP Mean','Heart Rate','SpO2','Respiratory Rate'}; 204 | lbl_le = {'CREATININE','HEMOGLOBIN','LACTATE'}; 205 | lbl_oe = {'Urine Out Foley'}; 206 | lbl_ie = {'Neosynephrine-k','Propofol'}; 207 | 208 | figure(1); clf; hold all; 209 | 210 | k_offset = 0; 211 | % Plot the chart values 212 | for k=1:numel(lbl_ce) 213 | % create an index for only the label we are interested in 214 | idxPlot = ismember(data_ce(:,2), lbl_ce{k}); 215 | 216 | % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value 217 | data_plot = cell2mat(data_ce(idxPlot,6)); 218 | time_plot = cell2mat(data_ce(idxPlot,3)); 219 | 220 | % plot the data for this label 221 | plot(time_plot, data_plot, marker{k},... 222 | 'Color', col(k+k_offset,:), 'MarkerFaceColor', col_fill(k+k_offset,:),... 223 | 'markersize', ms, 'linewidth',2); 224 | end 225 | k_offset=k_offset+k; 226 | 227 | % Plot the lab values 228 | for k=1:numel(lbl_le) 229 | 230 | % create an index for only the label we are interested in 231 | idxPlot = ismember(data_le(:,2), lbl_le{k}); 232 | 233 | % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value 234 | data_plot = cell2mat(data_le(idxPlot,6)); 235 | time_plot = cell2mat(data_le(idxPlot,3)); 236 | 237 | % plot the data for this label 238 | plot(time_plot, data_plot,... 239 | 'LineStyle','--','Marker',marker{k+k_offset},... 240 | 'Color',col(k+k_offset,:), 'markerfacecolor',col_fill(k+k_offset,:),... 241 | 'markersize',ms,'linewidth',2); 242 | end 243 | k_offset=k_offset+k; 244 | 245 | % Plot the outputs 246 | for k=1:numel(lbl_oe) 247 | 248 | % create an index for only the label we are interested in 249 | idxPlot = ismember(data_oe(:,2), lbl_oe{k}); 250 | 251 | % the 3rd column is the time, and the 6th column is VALUENUM, the numeric value 252 | data_plot = cell2mat(data_oe(idxPlot,6)); 253 | time_plot = cell2mat(data_oe(idxPlot,3)); 254 | 255 | plot(time_plot, data_plot,... 256 | 'LineStyle','--','Marker',marker{k+k_offset},... 257 | 'color',col(k+k_offset,:), 'markerfacecolor',col_fill(k+k_offset,:),... 258 | 'linewidth',2,'markersize',ms); 259 | end 260 | k_offset=k_offset+k; 261 | 262 | % Plot the inputs 263 | for k=1:numel(lbl_ie) 264 | % create an index for only the label we are interested in 265 | idxPlot = ismember(data_ie(:,2), lbl_ie{k}); 266 | 267 | % the 3rd column is the time 268 | % for inputs, the order is slightly different: 269 | % the 5th column is the VOLUME 270 | % the 7th column is the RATE 271 | data_plot = cell2mat(data_ie(idxPlot,7)); 272 | time_plot = cell2mat(data_ie(idxPlot,3)); 273 | 274 | plot(time_plot, data_plot,... 275 | 'LineStyle','--','Marker',marker{k+k_offset},... 276 | 'color',col(k+k_offset,:), 'markerfacecolor',col_fill(k+k_offset,:),... 277 | 'linewidth',2,'markersize',ms); 278 | end 279 | k_offset=k_offset+k; 280 | 281 | 282 | 283 | legend([lbl_ce, lbl_le, lbl_oe, lbl_ie],'Location','Best'); 284 | xlabel('Hours since ICU admission','FontSize',16); 285 | ylabel('Value of measurement','FontSize',16); 286 | 287 | -------------------------------------------------------------------------------- /installation/sqlite-manager/sqlite_manager-0.8.3-tb+sm+fx.xpi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIT-LCP/mimic-workshop/b27eee438a1f62d909dd30d1d458d3516f32b276/installation/sqlite-manager/sqlite_manager-0.8.3-tb+sm+fx.xpi -------------------------------------------------------------------------------- /intro_to_mimic/00-query-mimic.md: -------------------------------------------------------------------------------- 1 | 2 | # Introduction to the MIMIC database 3 | 4 | ## What is the MIMIC Critical Care Database? 5 | 6 | MIMIC-III is an freely available relational database developed by the MIT Lab for Computational Physiology, comprising deidentified health data associated with >40,000 critical care patients. It includes demographics, vital signs, laboratory tests, medications, and more. MIMIC-III is used widely around the world in academic research, education, and industry. For further information, see: https://mimic.physionet.org/ 7 | 8 | ## Workshop overview 9 | 10 | During the workshop, you will: 11 | 12 | * Learn about MIMIC-III, the publicly accessible critical care database 13 | * Create a local version of MIMIC-III with a small sample of patients using the Firefox SQLite Plugin 14 | * Explore the patient data using SQL 15 | * Plot and analyse the data using Python 16 | * Get inspiration for future research projects 17 | 18 | ## Set up a mini version of MIMIC-III on your computer 19 | 20 | * MIMIC-III contains over 40,000 patients, but for the workshop we will be working with a subset of patients. 21 | * To create the database on your computer, you will need to install Firefox and the Firefox SQLite Manager Add-on. Open Firefox, select "Add-ons" from the Tools menu, and then install SQLite Manager. 22 | * After restarting Firefox, select "SQLite Manager" from the tools menu. In SQLite Manager, click "Connect Database" in the menu, and select the "data/mimicdata.sqlite" database file. 23 | 24 | ## Start exploring the data with SQL 25 | 26 | SQL stands for "structured query language". It is the standard language used for querying relational databases, which are databases comprising of several tables linked together by IDs. 27 | 28 | TIP: queries are generally constructed using the following syntax: 29 | 30 | ```sql 31 | SELECT 32 | FROM 33 | WHERE ; 34 | ``` 35 | 36 | ### Select all of the columns ('\*') from the patients table 37 | 38 | ```sql 39 | SELECT * 40 | FROM patients; 41 | ``` 42 | 43 | ### Select all of the columns ('\*') from the patients table where the patient is female 44 | 45 | ```sql 46 | SELECT * 47 | FROM patients 48 | WHERE gender = 'F'; 49 | ``` 50 | 51 | ### Select all of the columns ('\*') from the patients table for a single patient 52 | 53 | ```sql 54 | SELECT * 55 | FROM patients 56 | WHERE subject_id = 40080; 57 | ``` 58 | 59 | ## More example queries 60 | 61 | ### Combine the admissions and patients table using their common link, `subject_id` 62 | 63 | ```sql 64 | SELECT * 65 | FROM patients 66 | INNER JOIN admissions 67 | ON patients.subject_id = admissions.subject_id; 68 | ``` 69 | 70 | ### Subselect rows using the where clause 71 | 72 | Here we select only the female ('F') patients. 73 | 74 | ```sql 75 | SELECT * 76 | FROM patients 77 | INNER JOIN admissions 78 | ON patients.subject_id = admissions.subject_id 79 | WHERE gender = 'F'; 80 | ``` 81 | 82 | ### Select a single patient by specifying their `subject_id` 83 | 84 | Note that we need to specify which table the `subject_id` is sourced from (`patients.subject_id`). 85 | This is because there are two `subject_id` columns: one from patients and the other from admissions. 86 | SQL will not know which table to choose from, so you must specify it. 87 | 88 | ```sql 89 | SELECT * 90 | FROM patients 91 | INNER JOIN admissions 92 | ON patients.subject_id = admissions.subject_id 93 | WHERE gender = 'F' 94 | AND patients.subject_id = 40080; 95 | ``` 96 | 97 | ### Select only data from the patients table 98 | 99 | We can use the table name with a wild card (\*) to specify all columns from that table. 100 | 101 | ```sql 102 | SELECT patients.* 103 | FROM patients 104 | INNER JOIN admissions 105 | ON patients.subject_id = admissions.subject_id 106 | WHERE gender = 'F' 107 | AND patients.subject_id = 40080; 108 | ``` 109 | 110 | ### Select only data from the admissions table 111 | 112 | Similarly, we can select only the columns in the admissions table. 113 | 114 | ```sql 115 | SELECT admissions.* 116 | FROM patients 117 | INNER JOIN admissions 118 | ON patients.subject_id = admissions.subject_id 119 | WHERE gender = 'F' 120 | AND patients.subject_id = 40080; 121 | ``` 122 | 123 | ### Select single columns from a table 124 | 125 | Instead of using the wild card, we can specify the columns we would like (in this case, DOB). 126 | 127 | ```sql 128 | SELECT patients.DOB, admissions.* 129 | FROM patients 130 | INNER JOIN admissions 131 | ON patients.subject_id = admissions.subject_id 132 | WHERE gender = 'F' 133 | AND patients.subject_id = 40080; 134 | ``` 135 | 136 | ### Using aliases for convenience 137 | 138 | Typing out admissions and patients over and over can be tedious. SQL allows aliases to be defined. 139 | Aliases are simply short hand for the full table name. An alias is defined by writing a word after the table name appears in either the FROM or JOIN clause. 140 | For example, we have defined the alias 'pat' for patients, and the alias 'adm' for admissions. 141 | Now, when we select from these tables, we use the alias name, *not* the table name. 142 | 143 | ```sql 144 | SELECT pat.DOB, adm.* 145 | FROM patients pat 146 | INNER JOIN admissions adm 147 | ON pat.subject_id = adm.subject_id 148 | WHERE gender = 'F' 149 | AND pat.subject_id = 40080; 150 | ``` 151 | 152 | ### Select data for the same patient from chartevents 153 | 154 | ```sql 155 | SELECT * 156 | FROM chartevents 157 | WHERE subject_id = 40080; 158 | ``` 159 | 160 | ### Define an alias for chartevents and select data from it 161 | 162 | ```sql 163 | SELECT ce.* 164 | FROM chartevents ce 165 | WHERE subject_id = 40080; 166 | ``` 167 | 168 | ### Join to the d_items table to get a description of what the observations in chartevents are 169 | 170 | ```sql 171 | SELECT ce.*, di.label 172 | FROM chartevents ce 173 | INNER JOIN d_items di 174 | ON ce.itemid = di.itemid 175 | WHERE subject_id = 40080; 176 | ``` 177 | -------------------------------------------------------------------------------- /intro_to_mimic/01-example-patient-heart-failure.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exploring the trajectory of a single patient" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Import Python libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "We first need to import some tools for working with data in Python. \n", 22 | "- NumPy is for working with numbers\n", 23 | "- Pandas is for analysing data\n", 24 | "- MatPlotLib is for making plots\n", 25 | "- Sqlite3 to connect to the database" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import numpy as np\n", 37 | "import pandas as pd\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "import sqlite3\n", 40 | "%matplotlib inline" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### Connect to the database" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "- We can use the sqlite3 library to connect to the MIMIC database\n", 55 | "- Once the connection is established, we'll run a simple SQL query." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "# Connect to the MIMIC database\n", 67 | "conn = sqlite3.connect('data/mimicdata.sqlite')" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "collapsed": true 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "# Create our test query\n", 79 | "test_query = \"\"\"\n", 80 | "SELECT subject_id, hadm_id, admittime, dischtime, admission_type, diagnosis\n", 81 | "FROM admissions\n", 82 | "\"\"\"" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": { 89 | "collapsed": true 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "# Run the query and assign the results to a variable\n", 94 | "test = pd.read_sql_query(test_query,conn)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "collapsed": false 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "# Display the first few rows\n", 106 | "test.head()" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "### Load the chartevents data" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "- The chartevents table contains data charted at the patient bedside. It includes variables such as heart rate, respiratory rate, temperature, and so on.\n", 121 | "- We'll begin by loading the chartevents data for a single patient." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "query = \"\"\"\n", 133 | "SELECT de.icustay_id\n", 134 | " , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n", 135 | " , di.label\n", 136 | " , de.value\n", 137 | " , de.valuenum\n", 138 | " , de.uom\n", 139 | "FROM chartevents de\n", 140 | "INNER join d_items di\n", 141 | "ON de.itemid = di.itemid\n", 142 | "INNER join icustays ie\n", 143 | "ON de.icustay_id = ie.icustay_id\n", 144 | "WHERE de.icustay_id = 252522\n", 145 | "ORDER BY charttime;\n", 146 | "\"\"\"\n", 147 | "\n", 148 | "ce = pd.read_sql_query(query,conn)\n", 149 | "\n", 150 | "\n", 151 | "# OPTION 2: load chartevents from a CSV file\n", 152 | "# ce = pd.read_csv('data/example_chartevents.csv', index_col='HOURSSINCEADMISSION')" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "# Preview the data\n", 164 | "# Use 'head' to limit the number of rows returned\n", 165 | "ce.head()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "### Review the patient's heart rate" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "- We can select individual columns using the column name. \n", 180 | "- For example, if we want to select just the label column, we write **```ce.LABEL```** or alternatively **```ce['LABEL']```**" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": { 187 | "collapsed": false 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "# Select a single column\n", 192 | "ce['LABEL']" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "- In a similar way, we can select rows from data using indexes. \n", 200 | "- For example, to select rows where the label is equal to 'Heart Rate', we would create an index using **```[ce.LABEL=='Heart Rate']```** " 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "# Select just the heart rate rows using an index\n", 212 | "ce[ce.LABEL=='Heart Rate']" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "### Plot 1: How did the patients heart rate change over time?" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "- Using the methods described above to select our data of interest, we can create our x and y axis values to create a time series plot of heart rate." 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": { 233 | "collapsed": false 234 | }, 235 | "outputs": [], 236 | "source": [ 237 | "# Which time stamps have a corresponding heart rate measurement?\n", 238 | "print ce.index[ce.LABEL=='Heart Rate']" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "collapsed": false, 246 | "scrolled": true 247 | }, 248 | "outputs": [], 249 | "source": [ 250 | "# Set x equal to the times\n", 251 | "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n", 252 | "\n", 253 | "# Set y equal to the heart rates\n", 254 | "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n", 255 | "\n", 256 | "# Plot time against heart rate\n", 257 | "plt.figure(figsize=(14, 6))\n", 258 | "plt.plot(x_hr,y_hr)\n", 259 | "\n", 260 | "\n", 261 | "plt.xlabel('Time',fontsize=16)\n", 262 | "plt.ylabel('Heart rate',fontsize=16)\n", 263 | "plt.title('Heart rate over time from admission to the intensive care unit')" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "### Task 1\n", 271 | "\n", 272 | "* What is happening to this patient's heart rate?\n", 273 | "* Plot respiratory rate over time for the patient.\n", 274 | "* Is there anything unusual about the patient's respiratory rate?\n" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": { 281 | "collapsed": true 282 | }, 283 | "outputs": [], 284 | "source": [ 285 | "# Exercise 1 here\n", 286 | "\n" 287 | ] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "metadata": {}, 292 | "source": [ 293 | "### Plot 2: Did the patient's vital signs breach any alarm thresholds?" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "- Alarm systems in the intensive care unit are commonly based on high and low thresholds defined by the carer.\n", 301 | "- False alarms are often a problem and so thresholds may be set arbitrarily to reduce alarms.\n", 302 | "- As a result, alarm settings carry limited information." 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": false 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "plt.figure(figsize=(14, 6))\n", 314 | "\n", 315 | "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n", 316 | " ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n", 317 | " 'k+', markersize=10, linewidth=4)\n", 318 | "\n", 319 | "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - High'], \n", 320 | " ce.VALUENUM[ce.LABEL=='Resp Alarm - High'],\n", 321 | " 'm--')\n", 322 | "\n", 323 | "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - Low'], \n", 324 | " ce.VALUENUM[ce.LABEL=='Resp Alarm - Low'],\n", 325 | " 'm--')\n", 326 | "\n", 327 | "plt.xlabel('Time',fontsize=16)\n", 328 | "plt.ylabel('Respiratory rate',fontsize=16)\n", 329 | "plt.title('Respiratory rate over time from admission, with upper and lower alarm thresholds')\n", 330 | "plt.ylim(0,55)\n" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "### Task 2\n", 338 | "\n", 339 | "- Based on the data, does it look like the alarms would have triggered for this patient?\n" 340 | ] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": {}, 345 | "source": [ 346 | "### Plot 3: What is patient's level of consciousness?" 347 | ] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": {}, 352 | "source": [ 353 | "- Glasgow Coma Scale (GCS) is a measure of consciousness.\n", 354 | "- It is commonly used for monitoring patients in the intensive care unit. \n", 355 | "- It consists of three components: eye response; verbal response; motor response." 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": { 362 | "collapsed": false 363 | }, 364 | "outputs": [], 365 | "source": [ 366 | "# Display the first few rows of the GCS eye response data\n", 367 | "ce[ce.LABEL=='GCS - Eye Opening'].head()" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": { 374 | "collapsed": false 375 | }, 376 | "outputs": [], 377 | "source": [ 378 | "# Prepare the size of the figure\n", 379 | "plt.figure(figsize=(18, 10))\n", 380 | "\n", 381 | "# Set x equal to the times\n", 382 | "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n", 383 | "\n", 384 | "# Set y equal to the heart rates\n", 385 | "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n", 386 | "\n", 387 | "\n", 388 | "plt.plot(x_hr,y_hr)\n", 389 | "\n", 390 | "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n", 391 | " ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n", 392 | " 'k', markersize=6)\n", 393 | "\n", 394 | "# Add a text label to the y-axis\n", 395 | "plt.text(-20,155,'GCS - Eye Opening',fontsize=14)\n", 396 | "plt.text(-20,150,'GCS - Motor Response',fontsize=14)\n", 397 | "plt.text(-20,145,'GCS - Verbal Response',fontsize=14) \n", 398 | "\n", 399 | "# Iterate over list of GCS labels, plotting around 1 in 10 to avoid overlap\n", 400 | "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Eye Opening'].values):\n", 401 | " if np.mod(i,6)==0 and i < 65:\n", 402 | " plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Eye Opening'].values[i],155),fontsize=14)\n", 403 | " \n", 404 | "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Motor Response'].values):\n", 405 | " if np.mod(i,6)==0 and i < 65:\n", 406 | " plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Motor Response'].values[i],150),fontsize=14)\n", 407 | "\n", 408 | "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Verbal Response'].values):\n", 409 | " if np.mod(i,6)==0 and i < 65:\n", 410 | " plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Verbal Response'].values[i],145),fontsize=14)\n", 411 | "\n", 412 | "plt.title('Vital signs and Glasgow Coma Scale over time from admission',fontsize=16)\n", 413 | "\n", 414 | "plt.xlabel('Time (hours)',fontsize=16)\n", 415 | "plt.ylabel('Heart rate or GCS',fontsize=16)\n", 416 | "plt.ylim(10,165)\n" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "### Task 3\n", 424 | "\n", 425 | "- How is the patient's consciousness changing over time?" 426 | ] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "# Stop here..." 433 | ] 434 | }, 435 | { 436 | "cell_type": "markdown", 437 | "metadata": {}, 438 | "source": [ 439 | "### Plot 4: What other data do we have on the patient?" 440 | ] 441 | }, 442 | { 443 | "cell_type": "markdown", 444 | "metadata": {}, 445 | "source": [ 446 | "- Using Pandas 'read_csv function' again, we'll now load the outputevents data - this table contains all information about patient outputs (urine output, drains, dialysis)." 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": null, 452 | "metadata": { 453 | "collapsed": false 454 | }, 455 | "outputs": [], 456 | "source": [ 457 | "# OPTION 1: load outputs from the patient\n", 458 | "query = \"\"\"\n", 459 | "select de.icustay_id\n", 460 | " , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n", 461 | " , di.label\n", 462 | " , de.value\n", 463 | " , de.valueuom\n", 464 | "from outputevents de \n", 465 | "inner join icustays ie\n", 466 | " on de.icustay_id = ie.icustay_id\n", 467 | "inner join d_items di\n", 468 | " on de.itemid = di.itemid\n", 469 | "where de.subject_id = 40080\n", 470 | "order by charttime;\n", 471 | "\"\"\"\n", 472 | "\n", 473 | "oe = pd.read_sql_query(query,conn)" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": null, 479 | "metadata": { 480 | "collapsed": false 481 | }, 482 | "outputs": [], 483 | "source": [ 484 | "oe.head()" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "execution_count": null, 490 | "metadata": { 491 | "collapsed": false 492 | }, 493 | "outputs": [], 494 | "source": [ 495 | "plt.figure(figsize=(14, 10))\n", 496 | "\n", 497 | "plt.figure(figsize=(14, 6))\n", 498 | "plt.title('Fluid output over time')\n", 499 | "\n", 500 | "plt.plot(oe.HOURS, \n", 501 | " oe.VALUE.cumsum()/1000, \n", 502 | " 'ro', markersize=8, label='Output volume, L')\n", 503 | "\n", 504 | "plt.xlim(0,72)\n", 505 | "plt.ylim(0,10)\n", 506 | "plt.legend()" 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": {}, 512 | "source": [ 513 | "To provide necessary context to this plot, it would help to include patient input data. This provides the necessary context to determine a patient's fluid balance - a key indicator in patient health." 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": null, 519 | "metadata": { 520 | "collapsed": false 521 | }, 522 | "outputs": [], 523 | "source": [ 524 | "# OPTION 1: load inputs given to the patient (usually intravenously) using the database connection\n", 525 | "query = \"\"\"\n", 526 | "select de.icustay_id\n", 527 | " , (strftime('%s',de.starttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_START\n", 528 | " , (strftime('%s',de.endtime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_END\n", 529 | " , de.linkorderid\n", 530 | " , di.label\n", 531 | " , de.amount\n", 532 | " , de.amountuom\n", 533 | " , de.rate\n", 534 | " , de.rateuom\n", 535 | "from inputevents_mv de \n", 536 | "inner join icustays ie\n", 537 | " on de.icustay_id = ie.icustay_id\n", 538 | "inner join d_items di\n", 539 | " on de.itemid = di.itemid\n", 540 | "where de.subject_id = 40080\n", 541 | "order by endtime;\n", 542 | "\"\"\"\n", 543 | "\n", 544 | "ie = pd.read_sql_query(query,conn)\n", 545 | "\n", 546 | "# # OPTION 2: load ioevents using the CSV file with endtime as the index\n", 547 | "# ioe = pd.read_csv('inputevents.csv'\n", 548 | "# ,header=None\n", 549 | "# ,names=['subject_id','itemid','label','starttime','endtime','amount','amountuom','rate','rateuom']\n", 550 | "# ,parse_dates=True)" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": null, 556 | "metadata": { 557 | "collapsed": false 558 | }, 559 | "outputs": [], 560 | "source": [ 561 | "ie.head()" 562 | ] 563 | }, 564 | { 565 | "cell_type": "markdown", 566 | "metadata": {}, 567 | "source": [ 568 | "Note that the column headers are different: we have \"HOURS_START\" and \"HOURS_END\". This is because inputs are administered over a fixed period of time." 569 | ] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "execution_count": null, 574 | "metadata": { 575 | "collapsed": false 576 | }, 577 | "outputs": [], 578 | "source": [ 579 | "ie['LABEL'].unique()" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": null, 585 | "metadata": { 586 | "collapsed": false 587 | }, 588 | "outputs": [], 589 | "source": [ 590 | "plt.figure(figsize=(14, 10))\n", 591 | "\n", 592 | "# Plot the cumulative input against the cumulative output\n", 593 | "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n", 594 | " ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n", 595 | " 'go', markersize=8, label='Intake volume, L')\n", 596 | "\n", 597 | "plt.plot(oe.HOURS, \n", 598 | " oe.VALUE.cumsum()/1000, \n", 599 | " 'ro', markersize=8, label='Output volume, L')\n", 600 | "\n", 601 | "plt.title('Fluid balance over time',fontsize=16)\n", 602 | "plt.xlabel('Hours',fontsize=16)\n", 603 | "plt.ylabel('Volume (litres)',fontsize=16)\n", 604 | "# plt.ylim(0,38)\n", 605 | "plt.legend()" 606 | ] 607 | }, 608 | { 609 | "cell_type": "markdown", 610 | "metadata": { 611 | "collapsed": true 612 | }, 613 | "source": [ 614 | "As the plot shows, the patient's intake tends to be above their output (as one would expect!) - but there are periods where they are almost one to one. One of the biggest challenges of working with ICU data is that context is everything - let's look at a treatment (lasix) that we know will affect this graph." 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": null, 620 | "metadata": { 621 | "collapsed": false 622 | }, 623 | "outputs": [], 624 | "source": [ 625 | "plt.figure(figsize=(14, 10))\n", 626 | "\n", 627 | "# Plot the cumulative input against the cumulative output\n", 628 | "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n", 629 | " ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n", 630 | " 'go', markersize=8, label='Intake volume, L')\n", 631 | "\n", 632 | "plt.plot(oe.HOURS, \n", 633 | " oe.VALUE.cumsum()/1000, \n", 634 | " 'ro', markersize=8, label='Output volume, L')\n", 635 | "\n", 636 | "# example on getting two columns from a dataframe: ie[['HOURS_START','HOURS_END']].head()\n", 637 | "\n", 638 | "for i, idx in enumerate(ie.index[ie.LABEL=='Furosemide (Lasix)']):\n", 639 | " plt.plot([ie.HOURS_START[ie.LABEL=='Furosemide (Lasix)'][idx],\n", 640 | " ie.HOURS_END[ie.LABEL=='Furosemide (Lasix)'][idx]],\n", 641 | " [ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx],\n", 642 | " ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx]],\n", 643 | " 'b-',linewidth=4)\n", 644 | " \n", 645 | "\n", 646 | "plt.title('Fluid balance over time',fontsize=16)\n", 647 | "plt.xlabel('Hours',fontsize=16)\n", 648 | "plt.ylabel('Volume (litres)',fontsize=16)\n", 649 | "# plt.ylim(0,38)\n", 650 | "plt.legend()\n" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "execution_count": null, 656 | "metadata": { 657 | "collapsed": false 658 | }, 659 | "outputs": [], 660 | "source": [ 661 | "ie['LABEL'].unique()" 662 | ] 663 | }, 664 | { 665 | "cell_type": "markdown", 666 | "metadata": {}, 667 | "source": [ 668 | "### Exercise 2\n", 669 | "\n", 670 | "* Plot the alarms for the mean arterial pressure ('```Arterial Blood Pressure mean```')\n", 671 | "* HINT: you can use ```ce.LABEL.unique()``` to find a list of variable names\n", 672 | "* Were the alarm thresholds breached?" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": null, 678 | "metadata": { 679 | "collapsed": false 680 | }, 681 | "outputs": [], 682 | "source": [ 683 | "# Exercise 2 here\n", 684 | "\n", 685 | "\n" 686 | ] 687 | }, 688 | { 689 | "cell_type": "markdown", 690 | "metadata": {}, 691 | "source": [ 692 | "### Plot 3: Were the patient's other vital signs stable?" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": null, 698 | "metadata": { 699 | "collapsed": false 700 | }, 701 | "outputs": [], 702 | "source": [ 703 | "plt.figure(figsize=(14, 10))\n", 704 | "\n", 705 | "plt.plot(ce.index[ce.LABEL=='Heart Rate'], \n", 706 | " ce.VALUENUM[ce.LABEL=='Heart Rate'],\n", 707 | " 'rx', markersize=8, label='HR')\n", 708 | "\n", 709 | "plt.plot(ce.index[ce.LABEL=='O2 saturation pulseoxymetry'], \n", 710 | " ce.VALUENUM[ce.LABEL=='O2 saturation pulseoxymetry'], \n", 711 | " 'g.', markersize=8, label='O2')\n", 712 | "\n", 713 | "plt.plot(ce.index[ce.LABEL=='Arterial Blood Pressure mean'], \n", 714 | " ce.VALUENUM[ce.LABEL=='Arterial Blood Pressure mean'], \n", 715 | " 'bv', markersize=8, label='MAP')\n", 716 | "\n", 717 | "plt.plot(ce.index[ce.LABEL=='Respiratory Rate'], \n", 718 | " ce.VALUENUM[ce.LABEL=='Respiratory Rate'], \n", 719 | " 'k+', markersize=8, label='RR')\n", 720 | "\n", 721 | "plt.title('Vital signs over time from admission')\n", 722 | "plt.ylim(0,130)\n", 723 | "plt.legend()" 724 | ] 725 | }, 726 | { 727 | "cell_type": "markdown", 728 | "metadata": {}, 729 | "source": [ 730 | "### Plot 5: Laboratory measurements" 731 | ] 732 | }, 733 | { 734 | "cell_type": "markdown", 735 | "metadata": {}, 736 | "source": [ 737 | "Using Pandas 'read_csv function' again, we'll now load the labevents data.\n", 738 | "This data corresponds to measurements made in a laboratory - usually on a sample of patient blood. " 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": null, 744 | "metadata": { 745 | "collapsed": false 746 | }, 747 | "outputs": [], 748 | "source": [ 749 | "# OPTION 1: load labevents data using the database connection\n", 750 | "query = \"\"\"\n", 751 | "SELECT de.subject_id\n", 752 | " , de.charttime\n", 753 | " , di.label, de.value, de.valuenum\n", 754 | " , de.uom\n", 755 | "FROM labevents de\n", 756 | "INNER JOIN d_labitems di\n", 757 | " ON de.itemid = di.itemid\n", 758 | "where de.subject_id = 40080\n", 759 | "\"\"\"\n", 760 | "\n", 761 | "le = pd.read_sql_query(query,conn)\n", 762 | "\n", 763 | "# OPTION 2: load labevents from the CSV file\n", 764 | "# le = pd.read_csv('data/example_labevents.csv', index_col='HOURSSINCEADMISSION')" 765 | ] 766 | }, 767 | { 768 | "cell_type": "code", 769 | "execution_count": null, 770 | "metadata": { 771 | "collapsed": false 772 | }, 773 | "outputs": [], 774 | "source": [ 775 | "# preview the labevents data\n", 776 | "le.head()" 777 | ] 778 | }, 779 | { 780 | "cell_type": "code", 781 | "execution_count": null, 782 | "metadata": { 783 | "collapsed": false 784 | }, 785 | "outputs": [], 786 | "source": [ 787 | "# preview the ioevents data\n", 788 | "le[le.LABEL=='HEMOGLOBIN']" 789 | ] 790 | }, 791 | { 792 | "cell_type": "code", 793 | "execution_count": null, 794 | "metadata": { 795 | "collapsed": false 796 | }, 797 | "outputs": [], 798 | "source": [ 799 | "plt.figure(figsize=(14, 10))\n", 800 | "\n", 801 | "plt.plot(le.index[le.LABEL=='HEMATOCRIT'], \n", 802 | " le.VALUENUM[le.LABEL=='HEMATOCRIT'], \n", 803 | " 'go', markersize=6, label='Haematocrit')\n", 804 | "\n", 805 | "plt.plot(le.index[le.LABEL=='HEMOGLOBIN'], \n", 806 | " le.VALUENUM[le.LABEL=='HEMOGLOBIN'], \n", 807 | " 'bv', markersize=8, label='Hemoglobin')\n", 808 | "\n", 809 | "plt.title('Laboratory measurements over time from admission')\n", 810 | "plt.ylim(0,38)\n", 811 | "plt.legend()" 812 | ] 813 | } 814 | ], 815 | "metadata": { 816 | "kernelspec": { 817 | "display_name": "Python 2", 818 | "language": "python", 819 | "name": "python2" 820 | }, 821 | "language_info": { 822 | "codemirror_mode": { 823 | "name": "ipython", 824 | "version": 2 825 | }, 826 | "file_extension": ".py", 827 | "mimetype": "text/x-python", 828 | "name": "python", 829 | "nbconvert_exporter": "python", 830 | "pygments_lexer": "ipython2", 831 | "version": "2.7.10" 832 | } 833 | }, 834 | "nbformat": 4, 835 | "nbformat_minor": 0 836 | } 837 | -------------------------------------------------------------------------------- /intro_to_mimic/MozFest2015.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIT-LCP/mimic-workshop/b27eee438a1f62d909dd30d1d458d3516f32b276/intro_to_mimic/MozFest2015.key -------------------------------------------------------------------------------- /mlcc/etc/calcRoc.m: -------------------------------------------------------------------------------- 1 | function [ rocx, rocy, auc ] = calcRoc( pred, target ) 2 | %CALCROC This function outputs the sensitivity and 1-specificity at every 3 | %operating point in PRED. These values can be plotted to create a receiver 4 | %operator characteristic (ROC) curve. 5 | % Detailed explanation goes here 6 | 7 | [pred,idxSort] = sort(pred,1,'ascend'); 8 | target=target(idxSort); 9 | 10 | TP = flipud(target); 11 | FP = cumsum(1-TP); 12 | FP = flipud(FP); 13 | TP = cumsum(TP); 14 | TP = flipud(TP); 15 | FN = cumsum(target)-target; 16 | TN = numel(target) - TP - FP - FN; 17 | 18 | %=== 1-Specificity (false positive rate) 19 | rocx = 1- (TN ./ (TN + FP)); 20 | 21 | %=== Sensitivity (true positive rate) 22 | rocy = TP ./ (TP + FN); 23 | 24 | % AUROC 25 | if nargout > 2 26 | idxNegative = target==0; 27 | % Count the number of negative targets below each element 28 | auc = cumsum(idxNegative,1); 29 | 30 | % Now only keep elements for positive cases 31 | % the result is a vector which counts, for each positive case, how many 32 | % negative cases are lower in predicted value 33 | auc = auc(~idxNegative); 34 | 35 | % sum the number of negative cases which are below a positive case 36 | auc = sum(auc,1); %=== count number who are negative 37 | 38 | % divide by the number of positive/negative pairs in the data 39 | auc = auc./(sum(target==1) * sum(target==0)); 40 | 41 | % the result is the probability a positive case prediction is higher than a 42 | % negative case prediction: the AUROC. 43 | end 44 | 45 | end 46 | 47 | -------------------------------------------------------------------------------- /mlcc/etc/makeQuery.m: -------------------------------------------------------------------------------- 1 | function [ query ] = makeQuery( filename) 2 | fileID = fopen(filename); 3 | text = textscan(fileID,'%s','delimiter','\n'); 4 | text = text{1}; 5 | idxRem = strfind(text,'--'); 6 | for m=1:size(text,1) 7 | if ~isempty(idxRem{m}) 8 | text{m}(idxRem{m}:end) = ''; 9 | end 10 | end 11 | query = strjoin(text'); 12 | fclose(fileID); 13 | 14 | end 15 | 16 | -------------------------------------------------------------------------------- /mlcc/lab1-data-extraction/mlcc-query-1.sql: -------------------------------------------------------------------------------- 1 | select 2 | -- ICUSTAY_ID identifies each unique patient ICU stay 3 | -- note that if the same person stays in the ICU more than once, each stay would have a *different* ICUSTAY_ID 4 | ie.icustay_id 5 | 6 | -- this is the outcome of interest: in-hospital mortality 7 | , max(adm.HOSPITAL_EXPIRE_FLAG) as OUTCOME 8 | 9 | -- let's read this statement inside out. first, the case statement says: 10 | -- if the ITEMID = 211, then output the numeric value 11 | -- otherwise, set it to NULL 12 | -- that means that there are *only* heart rate values within the brackets 13 | -- next, we take the minimum - min() - which ignores NULLs 14 | -- as a result, we get the minimum heart rate value, which we define "as HeartRate_Min" 15 | 16 | -- how did we know heart rates were stored using ITEMID 211? Simple, we looked in D_ITEMS! 17 | -- Try it for yourself: select * from d_items where lower(label) like '%heart rate%' 18 | 19 | , min(case when itemid = 211 then valuenum else null end) as HeartRate_Min 20 | , max(case when itemid = 211 then valuenum else null end) as HeartRate_Max 21 | , min(case when itemid in (615,618) then valuenum else null end) as RespRate_Min 22 | , max(case when itemid in (615,618) then valuenum else null end) as RespRate_Max 23 | from icustays ie 24 | -- join to the admissions table to get hospital outcome 25 | inner join admissions adm 26 | on ie.hadm_id = adm.hadm_id 27 | 28 | -- join to the chartevents table to get the observations 29 | left join chartevents ce 30 | -- match the tables on the patient identifier 31 | on ie.icustay_id = ce.icustay_id 32 | -- and require that the observation be made after the patient is admitted to the ICU 33 | and ce.charttime >= ie.intime 34 | -- and *before* their admission time + 1 day, i.e. the observation must be made on their first day in the ICU 35 | and ce.charttime <= date(ie.intime,'+1 day') 36 | 37 | -- finally, only look at heart rate/respiratory rate observations 38 | and ce.itemid in 39 | ( 40 | 211, -- Heart Rate 41 | 618, -- Respiratory Rate 42 | 615 -- Resp Rate (Total) 43 | ) 44 | 45 | -- Note above that we take the max() and min() of some columns 46 | -- Imagine you have a table with 2 columns and 10 rows 47 | -- If we take the max() of the 2nd column, we now have: 48 | -- 10 rows in the first column 49 | -- 1 row in the second column (the max value) 50 | -- How does the second column correspond to the first? 51 | -- Should we copy that 1 row to all 10 rows? 52 | -- We need to tell SQL how to *group* the max value 53 | 54 | -- The below line states "group everything by icustay_id" 55 | -- That means that we take the max( HEART RATE ) grouped by ICUSTAY_ID 56 | -- or, normal words, we take the maximum heart rate for each patient's ICU stay 57 | group by ie.icustay_id 58 | order by ie.icustay_id; 59 | -------------------------------------------------------------------------------- /mlcc/lab1-data-extraction/mlcc1-problem-set-solutions-ICUSTAYID.sql: -------------------------------------------------------------------------------- 1 | 2 | -- Staging table #1: CHARTEVENTS 3 | with ce_stg as 4 | ( 5 | select ie.icustay_id 6 | , case 7 | when itemid in (211,220045) then 1 -- HeartRate 8 | when itemid in (456,52,6702,443,220052,220181,225312) then 4 -- MeanBP 9 | when itemid in (615,618,220210,224690) then 5 -- RespRate 10 | else null end as VitalID 11 | , valuenum 12 | from icustays ie 13 | left join chartevents chart 14 | on ie.subject_id = chart.subject_id and ie.hadm_id = chart.hadm_id and ie.icustay_id = chart.icustay_id 15 | and chart.charttime >= ie.intime and chart.charttime <= date(ie.intime,'+1 day') 16 | and chart.itemid in 17 | ( 18 | -- HEART RATE 19 | 211, --"Heart Rate" 20 | 220045, --"Heart Rate" 21 | 22 | -- MEAN BLOOD PRESSURE 23 | 456, --"NBP Mean" 24 | 52, --"Arterial BP Mean" 25 | 6702, -- Arterial BP Mean #2 26 | 443, -- Manual BP Mean(calc) 27 | 220052, --"Arterial Blood Pressure mean" 28 | 220181, --"Non Invasive Blood Pressure mean" 29 | 225312, --"ART BP mean" 30 | 31 | -- RESPIRATORY RATE 32 | 618,-- Respiratory Rate 33 | 615,-- Resp Rate (Total) 34 | 220210,-- Respiratory Rate 35 | 224690 -- Respiratory Rate (Total) 36 | ) 37 | ) 38 | -- Aggregate table #1: CHARTEVENTS 39 | , ce as 40 | ( 41 | SELECT ce_stg.icustay_id 42 | , min(case when VitalID = 1 then valuenum else null end) as HeartRate_Min 43 | , max(case when VitalID = 1 then valuenum else null end) as HeartRate_Max 44 | , min(case when VitalID = 4 then valuenum else null end) as MeanBP_Min 45 | , max(case when VitalID = 4 then valuenum else null end) as MeanBP_Max 46 | , min(case when VitalID = 5 then valuenum else null end) as RespRate_Min 47 | , max(case when VitalID = 5 then valuenum else null end) as RespRate_Max 48 | FROM ce_stg 49 | group by ce_stg.icustay_id 50 | ) 51 | 52 | -- Staging table #2: GCS 53 | -- Because we need to add together GCS components, we do it seperately from chartevents 54 | , gcs_stg as 55 | ( 56 | select ie.icustay_id, chart.charttime 57 | , max(case when itemid in (723,223900) then valuenum else null end) as GCSVerbal 58 | , max(case when itemid in (454,223901) then valuenum else null end) as GCSMotor 59 | , max(case when itemid in (184,220739) then valuenum else null end) as GCSEyes 60 | from icustays ie 61 | left join chartevents chart 62 | on ie.subject_id = chart.subject_id and ie.hadm_id = chart.hadm_id and ie.icustay_id = chart.icustay_id 63 | and chart.charttime >= ie.intime and chart.charttime <= date(ie.intime,'+1 day') 64 | and chart.itemid in 65 | ( 66 | 723, -- GCSVerbal 67 | 454, -- GCSMotor 68 | 184, -- GCSEyes 69 | 223900, -- GCS - Verbal Response 70 | 223901, -- GCS - Motor Response 71 | 220739 -- GCS - Eye Opening 72 | ) 73 | group by ie.icustay_id, chart.charttime 74 | ) 75 | -- Aggregate table #2: GCS 76 | , gcs as 77 | ( 78 | SELECT gcs_stg.icustay_id 79 | , min(GCSVerbal + GCSMotor + GCSEyes) as GCS_Min 80 | , max(GCSVerbal + GCSMotor + GCSEyes) as GCS_Max 81 | FROM gcs_stg 82 | group by gcs_stg.icustay_id 83 | ) 84 | -- Staging table #3: LABEVENTS 85 | , le_stg as 86 | ( 87 | select ie.icustay_id 88 | -- here we assign labels to ITEMIDs 89 | -- this also fuses together multiple ITEMIDs containing the same data 90 | , case 91 | when itemid = 50885 then 'BILIRUBIN' 92 | when itemid = 50912 then 'CREATININE' 93 | when itemid = 50809 then 'GLUCOSE' 94 | when itemid = 50931 then 'GLUCOSE' 95 | when itemid = 50811 then 'HEMOGLOBIN' 96 | when itemid = 51222 then 'HEMOGLOBIN' 97 | when itemid = 50824 then 'SODIUM' 98 | when itemid = 50983 then 'SODIUM' 99 | when itemid = 51300 then 'WBC' 100 | when itemid = 51301 then 'WBC' 101 | else null 102 | end as label 103 | , valuenum 104 | 105 | from icustays ie 106 | 107 | left join labevents lab 108 | on ie.subject_id = lab.subject_id and ie.hadm_id = lab.hadm_id 109 | and lab.charttime >= date(ie.intime,'-6 hour') and lab.charttime <= date(ie.intime,'+1 day') 110 | and lab.ITEMID in 111 | ( 112 | -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS 113 | 50885, -- BILIRUBIN, TOTAL | CHEMISTRY | BLOOD | 238277 114 | 50912, -- CREATININE | CHEMISTRY | BLOOD | 797476 115 | 50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981 116 | 50809, -- GLUCOSE | BLOOD GAS | BLOOD | 196734 117 | 51222, -- HEMOGLOBIN | HEMATOLOGY | BLOOD | 752523 118 | 50811, -- HEMOGLOBIN | BLOOD GAS | BLOOD | 89712 119 | 50983, -- SODIUM | CHEMISTRY | BLOOD | 808489 120 | 50824, -- SODIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 71503 121 | 51301, -- WHITE BLOOD CELLS | HEMATOLOGY | BLOOD | 753301 122 | 51300 -- WBC COUNT | HEMATOLOGY | BLOOD | 2371 123 | ) 124 | and lab.valuenum is not null and lab.valuenum > 0 -- lab values cannot be 0 and cannot be negative 125 | ) 126 | 127 | -- Aggregate table #3: LABEVENTS 128 | , le as 129 | ( 130 | select 131 | le_stg.icustay_id 132 | 133 | , min(case when label = 'BILIRUBIN' then valuenum else null end) as BILIRUBIN_min 134 | , max(case when label = 'BILIRUBIN' then valuenum else null end) as BILIRUBIN_max 135 | , min(case when label = 'CREATININE' then valuenum else null end) as CREATININE_min 136 | , max(case when label = 'CREATININE' then valuenum else null end) as CREATININE_max 137 | , min(case when label = 'HEMOGLOBIN' then valuenum else null end) as HEMOGLOBIN_min 138 | , max(case when label = 'HEMOGLOBIN' then valuenum else null end) as HEMOGLOBIN_max 139 | , min(case when label = 'SODIUM' then valuenum else null end) as SODIUM_min 140 | , max(case when label = 'SODIUM' then valuenum else null end) as SODIUM_max 141 | , min(case when label = 'WBC' then valuenum else null end) as WBC_min 142 | , max(case when label = 'WBC' then valuenum else null end) as WBC_max 143 | 144 | from le_stg 145 | group by le_stg.icustay_id 146 | ) 147 | 148 | SELECT ie.icustay_id 149 | , adm.HOSPITAL_EXPIRE_FLAG -- whether the patient died within the hospital 150 | , round( (julianday(ie.intime) - julianday(pat.dob))/365.24, 4) as Age 151 | 152 | , HeartRate_Min 153 | , HeartRate_Max 154 | , MeanBP_Min 155 | , MeanBP_Max 156 | , RespRate_Min 157 | , RespRate_Max 158 | 159 | , GCS_Min 160 | , GCS_Max 161 | 162 | , BILIRUBIN_min 163 | , BILIRUBIN_max 164 | , CREATININE_min 165 | , CREATININE_max 166 | , HEMOGLOBIN_min 167 | , HEMOGLOBIN_max 168 | , SODIUM_min 169 | , SODIUM_max 170 | , WBC_min 171 | , WBC_max 172 | 173 | FROM icustays ie 174 | inner join admissions adm 175 | on ie.hadm_id = adm.hadm_id 176 | inner join patients pat 177 | on ie.subject_id = pat.subject_id 178 | left join ce 179 | on ie.icustay_id = ce.icustay_id 180 | left join gcs 181 | on ie.icustay_id = gcs.icustay_id 182 | left join le 183 | on ie.icustay_id = le.icustay_id 184 | -------------------------------------------------------------------------------- /mlcc/lab1-data-extraction/mlcc1-problem-set-solutions.sql: -------------------------------------------------------------------------------- 1 | 2 | -- Staging table #1: CHARTEVENTS 3 | with ce as 4 | ( 5 | select adm.hadm_id 6 | , min(case when itemid in (211,220045) then valuenum else null end) as HeartRate_Min 7 | , max(case when itemid in (211,220045) then valuenum else null end) as HeartRate_Max 8 | , min(case when itemid in (456,52,6702,443,220052,220181,225312) then valuenum else null end) as MeanBP_Min 9 | , max(case when itemid in (456,52,6702,443,220052,220181,225312) then valuenum else null end) as MeanBP_Max 10 | , min(case when itemid in (615,618,220210,224690) then valuenum else null end) as RespRate_Min 11 | , max(case when itemid in (615,618,220210,224690) then valuenum else null end) as RespRate_Max 12 | from admissions adm 13 | left join chartevents chart 14 | on adm.hadm_id = chart.hadm_id 15 | and chart.itemid in 16 | ( 17 | -- HEART RATE 18 | 211, --"Heart Rate" 19 | 220045, --"Heart Rate" 20 | 21 | -- MEAN BLOOD PRESSURE 22 | 456, --"NBP Mean" 23 | 52, --"Arterial BP Mean" 24 | 6702, -- Arterial BP Mean #2 25 | 443, -- Manual BP Mean(calc) 26 | 220052, --"Arterial Blood Pressure mean" 27 | 220181, --"Non Invasive Blood Pressure mean" 28 | 225312, --"ART BP mean" 29 | 30 | -- RESPIRATORY RATE 31 | 618,-- Respiratory Rate 32 | 615,-- Resp Rate (Total) 33 | 220210,-- Respiratory Rate 34 | 224690 -- Respiratory Rate (Total) 35 | ) 36 | group by adm.hadm_id 37 | ) 38 | -- Staging table #3: LABEVENTS 39 | , le as 40 | ( 41 | select adm.hadm_id 42 | , min(case when itemid = 50885 then valuenum else null end) as BILIRUBIN_min 43 | , max(case when itemid = 50885 then valuenum else null end) as BILIRUBIN_max 44 | , min(case when itemid = 50912 then valuenum else null end) as CREATININE_min 45 | , max(case when itemid = 50912 then valuenum else null end) as CREATININE_max 46 | , min(case when itemid in (50809,50931) then valuenum else null end) as GLUCOSE_min 47 | , max(case when itemid in (50809,50931) then valuenum else null end) as GLUCOSE_max 48 | , min(case when itemid in (50811,51222) then valuenum else null end) as HEMOGLOBIN_min 49 | , max(case when itemid in (50811,51222) then valuenum else null end) as HEMOGLOBIN_max 50 | , min(case when itemid in (50824,50983) then valuenum else null end) as SODIUM_min 51 | , max(case when itemid in (50824,50983) then valuenum else null end) as SODIUM_max 52 | , min(case when itemid in (51300,51301) then valuenum else null end) as WBC_min 53 | , max(case when itemid in (51300,51301) then valuenum else null end) as WBC_max 54 | 55 | from admissions adm 56 | 57 | left join labevents lab 58 | on adm.subject_id = lab.subject_id and adm.hadm_id = lab.hadm_id 59 | and lab.charttime >= adm.admittime and lab.charttime <= adm.dischtime 60 | and lab.ITEMID in 61 | ( 62 | -- comment is: LABEL | CATEGORY | FLUID | NUMBER OF ROWS IN LABEVENTS 63 | 50885, -- BILIRUBIN, TOTAL | CHEMISTRY | BLOOD | 238277 64 | 50912, -- CREATININE | CHEMISTRY | BLOOD | 797476 65 | 50931, -- GLUCOSE | CHEMISTRY | BLOOD | 748981 66 | 50809, -- GLUCOSE | BLOOD GAS | BLOOD | 196734 67 | 51222, -- HEMOGLOBIN | HEMATOLOGY | BLOOD | 752523 68 | 50811, -- HEMOGLOBIN | BLOOD GAS | BLOOD | 89712 69 | 50983, -- SODIUM | CHEMISTRY | BLOOD | 808489 70 | 50824, -- SODIUM, WHOLE BLOOD | BLOOD GAS | BLOOD | 71503 71 | 51301, -- WHITE BLOOD CELLS | HEMATOLOGY | BLOOD | 753301 72 | 51300 -- WBC COUNT | HEMATOLOGY | BLOOD | 2371 73 | ) 74 | and lab.valuenum is not null and lab.valuenum > 0 -- lab values cannot be 0 and cannot be negative 75 | group by adm.hadm_id 76 | ) 77 | 78 | -- Staging table #2: GCS 79 | -- Because we need to add together GCS components, we do it seperately from chartevents 80 | -- This lets us group together the components by their CHARTTIME 81 | -- Then we can add together components measured at the same time 82 | , gcs_stg as 83 | ( 84 | select adm.hadm_id, chart.charttime 85 | , max(case when itemid in (723,223900) then valuenum else null end) as GCSVerbal 86 | , max(case when itemid in (454,223901) then valuenum else null end) as GCSMotor 87 | , max(case when itemid in (184,220739) then valuenum else null end) as GCSEyes 88 | from admissions adm 89 | left join chartevents chart 90 | on adm.hadm_id = chart.hadm_id 91 | and chart.itemid in 92 | ( 93 | 723, -- GCSVerbal 94 | 454, -- GCSMotor 95 | 184, -- GCSEyes 96 | 223900, -- GCS - Verbal Response 97 | 223901, -- GCS - Motor Response 98 | 220739 -- GCS - Eye Opening 99 | ) 100 | group by adm.hadm_id, chart.charttime 101 | ) 102 | -- Aggregate table #2: GCS 103 | , gcs as 104 | ( 105 | SELECT gcs_stg.hadm_id 106 | , min(GCSVerbal + GCSMotor + GCSEyes) as GCS_Min 107 | , max(GCSVerbal + GCSMotor + GCSEyes) as GCS_Max 108 | FROM gcs_stg 109 | group by gcs_stg.hadm_id 110 | ) 111 | 112 | SELECT adm.hadm_id 113 | , adm.HOSPITAL_EXPIRE_FLAG -- whether the patient died within the hospital 114 | , round( (julianday(adm.admittime) - julianday(pat.dob))/365.24, 4) as Age 115 | 116 | , HeartRate_Min 117 | , HeartRate_Max 118 | , MeanBP_Min 119 | , MeanBP_Max 120 | , RespRate_Min 121 | , RespRate_Max 122 | 123 | , GCS_Min 124 | , GCS_Max 125 | 126 | , BILIRUBIN_min 127 | , BILIRUBIN_max 128 | , CREATININE_min 129 | , CREATININE_max 130 | , GLUCOSE_min 131 | , GLUCOSE_max 132 | , HEMOGLOBIN_min 133 | , HEMOGLOBIN_max 134 | , SODIUM_min 135 | , SODIUM_max 136 | , WBC_min 137 | , WBC_max 138 | 139 | FROM admissions adm 140 | inner join patients pat 141 | on adm.subject_id = pat.subject_id 142 | left join ce 143 | on adm.hadm_id = ce.hadm_id 144 | left join gcs 145 | on adm.hadm_id = gcs.hadm_id 146 | left join le 147 | on adm.hadm_id = le.hadm_id 148 | -------------------------------------------------------------------------------- /mlcc/lab1-data-extraction/mlcc1_introduction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Extract data\n", 8 | "\n", 9 | "First, we extract the data to a csv (alternatively, we could use a database connection here and extract the data directly from the database).\n", 10 | "\n", 11 | "\n", 12 | "```sql\n", 13 | "Copy (\n", 14 | " select ce.icustay_id, charttime, itemid, valuenum, adm.hospital_expire_flag\n", 15 | " from mimiciii.chartevents ce\n", 16 | " inner join mimiciii.icustays ie\n", 17 | " on ce.icustay_id = ie.icustay_id\n", 18 | "\n", 19 | " inner join mimiciii.admissions adm\n", 20 | " on ce.hadm_id = adm.hadm_id\n", 21 | " inner join mimiciii.patients pat\n", 22 | " on ce.subject_id = pat.subject_id\n", 23 | "\n", 24 | " where ce.charttime between ie.intime and (ie.intime + interval '1 day')\n", 25 | " and extract(EPOCH from (ie.intime - pat.dob)) >= (60*60*24*12*15) -- older than 15, i.e. an adult\n", 26 | " and itemid in\n", 27 | " (\n", 28 | " 618, --\tRespiratory Rate\n", 29 | " 220210, --\tRespiratory Rate\n", 30 | "\n", 31 | " 211, --\tHeart Rate\n", 32 | " 220045 --\tHeart Rate\n", 33 | " )\n", 34 | " order by icustay_id, charttime\n", 35 | ") To '/data/mimic3/mimic-hr-rr.csv' With CSV HEADER;\n", 36 | "```\n", 37 | "\n", 38 | "We now assume that the data is in a csv file called `mimic-hr-rr.csv` in the local directory." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 1, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "icustay_id\t charttime\t itemid\t valuenum\thospital_expire_flag\t\n", 52 | " 200001\t 796924.80\t 220210\t 22\t0\n", 53 | " 200001\t 796924.80\t 220045\t 114\t0\n", 54 | " 200001\t 796924.80\t 220210\t 26\t0\n", 55 | " 200001\t 796924.83\t 220045\t 113\t0\n", 56 | " 200001\t 796924.83\t 220210\t 20\t0" 57 | ] 58 | }, 59 | "execution_count": 1, 60 | "metadata": {}, 61 | "output_type": "execute_result" 62 | } 63 | ], 64 | "source": [ 65 | "% load the data\n", 66 | "\n", 67 | "fp = fopen('mimic-hr-rr.csv');\n", 68 | "header = fgetl(fp);\n", 69 | "\n", 70 | "% convert header from a string to a cell array of strings\n", 71 | "header = regexp(header,',','split');\n", 72 | "\n", 73 | "frmt = '%f%s%f%f%f';\n", 74 | "data = textscan(fp,frmt,'delimiter',',');\n", 75 | "fclose(fp);\n", 76 | "\n", 77 | "% convert the date string into a MATLAB's numeric format\n", 78 | "data{2} = datenum(data{2},'yyyy-mm-dd HH:MM:SS');\n", 79 | "\n", 80 | "% now we can convert data from a cell array to a matrix\n", 81 | "data = [data{:}];\n", 82 | "\n", 83 | "% here's a preview of the data ('\\t' is a tab)\n", 84 | "fprintf('%12s\\t',header{:});\n", 85 | "fprintf('\\n')\n", 86 | "\n", 87 | "frmt = '%12g\\t%12.2f\\t%12g\\t%12g\\t%1d';\n", 88 | "for n=1:5\n", 89 | " fprintf(frmt,data(n,:));\n", 90 | " fprintf('\\n');\n", 91 | "end" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "In the above, you can see:\n", 99 | "\n", 100 | "* `ICUSTAY_ID` - This is the unique integer which identifies an ICU stay.\n", 101 | "* `CHARTTIME` - This is the time at which a measurement is recorded. It represents the number of days since January 0, 0000.\n", 102 | "* `ITEMID` - This is a unique integer which represents the type of data recorded. 220210 is respiratory rate, and 220045 is heart rate.\n", 103 | "* `VALUENUM` - This is the actual value of the measurement. So we can see that `ICUSTAY_ID` 200001 had a respiratory rate of 22 breaths per minute (we have not included the unit of measurement here, but it is in the database if you are interested in confirming this).\n", 104 | "* `HOSPITAL_EXPIRE_FLAG` - This indicates whether the patient died in the hospital (1 is death at hospital discharge).\n", 105 | "\n", 106 | "We can plot the first patient's data as follows:" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "id = 200001; % which icustay_id we'd like to plot\n", 118 | "\n", 119 | "idxID = data(:,1) == id; % only plot data for 1 patient\n", 120 | "idxHR = data(:,3) == 211 | data(:,3) == 220045;\n", 121 | "idxRR = data(:,3) == 618 | data(:,3) == 220210;\n", 122 | "\n", 123 | "figure(1); hold all;\n", 124 | "plot(data(idxID & idxHR,2),data(idxID & idxHR,4),'-',...\n", 125 | " 'Linewidth',2,'Color',[0.8906, 0.1016, 0.1094]);\n", 126 | "plot(data(idxID & idxRR,2),data(idxID & idxRR,4),'-',...\n", 127 | " 'Linewidth',2,'Color',[0.2148, 0.4922, 0.7188]);" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "Above we can see the heart rate in red and the respiratory rate in blue. The bottom axis is the days since January 0, 0000 - a bit hard to interpret but we can see that the data spans 1 day." 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "# Extracting data\n", 142 | "\n", 143 | "Now we have plotted the data for a few patients and have an idea of what it looks like. We'd like to extract some data which is useable in our machine learning classifiers. That means we need to convert this *time-series* into a *design matrix*." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "% We can use sorting to get the maximum and minimum value\n", 155 | "% This is quite complicated syntax - we need to perform vectorized operations\n", 156 | "% Note: this type of task is *much* easier in SQL!\n", 157 | "\n", 158 | "[id_unique, idxID] = unique(data(:,1)); % get a list of all unique ICUSTAY_IDs\n", 159 | "X = nan(size(id_unique,1),4);\n", 160 | "\n", 161 | "idxHR = data(:,3) == 211 | data(:,3) == 220045;\n", 162 | "idxRR = data(:,3) == 618 | data(:,3) == 220210;\n", 163 | "\n", 164 | "tic; % we time how long this process takes\n", 165 | "\n", 166 | "data_tmp = data(idxHR,:);\n", 167 | "data_tmp = sortrows(data_tmp, [1,4]); % minimum HR is the first row for each ICUSTAY_ID\n", 168 | "\n", 169 | "[id_tmp,idxA] = unique(data_tmp(:,1));\n", 170 | "[idxExist,idxMap] = ismember(id_unique, id_tmp);\n", 171 | "X(idxMap(idxExist),1) = data_tmp(idxA,4);\n", 172 | "\n", 173 | "% Repeat for the *maximum* heart rate\n", 174 | "data_tmp = sortrows(data_tmp, [1,-4]); % maximum HR is now the first row for each ICUSTAY_ID\n", 175 | "X(idxMap(idxExist),2) = data_tmp(idxA,4);\n", 176 | "\n", 177 | "\n", 178 | "% Repeat for respiratory rate\n", 179 | "data_tmp = data(idxRR,:);\n", 180 | "data_tmp = sortrows(data_tmp, [1,4]); % minimum RR is the first row for each ICUSTAY_ID\n", 181 | "\n", 182 | "[id_tmp,idxA] = unique(data_tmp(:,1));\n", 183 | "[idxExist,idxMap] = ismember(id_unique, id_tmp);\n", 184 | "X(idxMap(idxExist),3) = data_tmp(idxA,4);\n", 185 | "\n", 186 | "% Repeat for the *maximum* heart rate\n", 187 | "data_tmp = sortrows(data_tmp, [1,-4]); % maximum RR is now the first row for each ICUSTAY_ID\n", 188 | "X(idxMap(idxExist),4) = data_tmp(idxA,4);\n", 189 | "\n", 190 | "toc;\n", 191 | "\n", 192 | "% Clear variables so we don't accidentally use the wrong data in temp variables later on\n", 193 | "clear data_tmp idxRR idxHR id_tmp idxA;\n", 194 | "\n", 195 | "% Preview of the data:\n", 196 | "X(1:5,:)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": { 203 | "collapsed": false 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "% This is equivalent to the above cell, but using for loops\n", 208 | "% It takes ~5-10 minutes to run\n", 209 | "\n", 210 | "[id_unique,idxID] = unique(data(:,1)); % get a list of all unique ICUSTAY_IDs\n", 211 | "X_slow = nan(size(id_unique,1),4);\n", 212 | "\n", 213 | "idxHR = data(:,3) == 211 | data(:,3) == 220045;\n", 214 | "idxRR = data(:,3) == 618 | data(:,3) == 220210;\n", 215 | "\n", 216 | "tic; % we time how long this takes\n", 217 | "\n", 218 | "for n=1:size(id_unique,1)\n", 219 | " idxCurrentID = data(:,1) == id_unique(n);\n", 220 | " \n", 221 | " idx = idxCurrentID & idxHR;\n", 222 | " if any(idx)\n", 223 | " X_slow(n,1) = min(data(idx,4));\n", 224 | " X_slow(n,2) = max(data(idx,4));\n", 225 | " end\n", 226 | " \n", 227 | " idx = idxCurrentID & idxRR;\n", 228 | " if any(idx)\n", 229 | " X_slow(n,3) = min(data(idx,4));\n", 230 | " X_slow(n,4) = max(data(idx,4));\n", 231 | " end\n", 232 | "end\n", 233 | "\n", 234 | "toc;\n", 235 | "\n", 236 | "% Clear variables so we don't accidentally use the wrong data in temp variables later on\n", 237 | "clear idxRR idxHR idxCurrentID idx;\n", 238 | "\n", 239 | "% let's show a preview of X:\n", 240 | "X(1:5,:)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": null, 246 | "metadata": { 247 | "collapsed": false 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "y = data(idxID,5); % get the outcome for each patient\n", 252 | "\n", 253 | "% plot the variables against each other, coloured by their outcome\n", 254 | "figure(1); clf; hold all;\n", 255 | "plot(X(y==1,1), X(y==1,2),'x',...\n", 256 | " 'Linewidth',2,'Color',[0.8906, 0.1016, 0.1094]);\n", 257 | "plot(X(y==0,1), X(y==0,2),'o',...\n", 258 | " 'Linewidth',2,'Color',[0.2148, 0.4922, 0.7188]);\n", 259 | "\n", 260 | "xlabel('Lowest heart rate');\n", 261 | "ylabel('Highest heart rate');" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "Above we can see most people have heart rates between 0-200, except a few with heart rates around 1000 and one with a highest heart rate of 5500. Clearly these are not physiological - you'll find these \"outliers\" frequently in medical data - it's a consequence of the secondary nature of our analysis. It's obvious to any care provider that these are not possible, so they are ignored during routine care, and not sanitized in the database. We have to fix them ourselves! For now, we can ignore these and set the limits on our plot. Later, we will preprocess these data appropriately." 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": null, 274 | "metadata": { 275 | "collapsed": false 276 | }, 277 | "outputs": [], 278 | "source": [ 279 | "y = data(idxID,5); % get the outcome for each patient\n", 280 | "\n", 281 | "% plot the variables against each other, coloured by their outcome\n", 282 | "figure(1); clf; hold all;\n", 283 | "plot(X(y==1,1), X(y==1,2),'x',...\n", 284 | " 'Linewidth',2,'Color',[0.8906, 0.1016, 0.1094]);\n", 285 | "plot(X(y==0,1), X(y==0,2),'o',...\n", 286 | " 'Linewidth',2,'Color',[0.2148, 0.4922, 0.7188]);\n", 287 | "\n", 288 | "xlabel('Lowest heart rate');\n", 289 | "ylabel('Highest heart rate');\n", 290 | "\n", 291 | "% change the axis to reasonable limits\n", 292 | "set(gca,'XLim',[0,240],'YLim',[0,240]);" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "Now we have a wonderful blob of data. This is because there are so many data points! What a wonderful problem to have. We limit the plot to 200 data points (100 in each class, survived or died in hospital) - this will give us a better visualization. We pick these data points randomly." 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": { 306 | "collapsed": false 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "y = data(idxID,5); % get the outcome for each patient\n", 311 | "\n", 312 | "N_DATA_POINTS = 100; % Number of data points to plot for each class - must be less than 6342, the number of deaths\n", 313 | "\n", 314 | "rng(777,'twister'); % fix the random number seed so everyone's plots look identical\n", 315 | "\n", 316 | "idx0 = find(y==0);\n", 317 | "[~,idxRand] = sort(rand(size(idx0,1),1),1);\n", 318 | "idx0 = idx0(idxRand(1:N_DATA_POINTS));\n", 319 | "\n", 320 | "idx1 = find(y==1);\n", 321 | "[~,idxRand] = sort(rand(size(idx1,1),1),1);\n", 322 | "idx1 = idx1(idxRand(1:N_DATA_POINTS));\n", 323 | "\n", 324 | "% plot the variables against each other, coloured by their outcome\n", 325 | "figure(1); clf; hold all;\n", 326 | "plot(X(idx1,1), X(idx1,2),'x',...\n", 327 | " 'Linewidth',2,'Color',[0.8906, 0.1016, 0.1094]);\n", 328 | "plot(X(idx0,1), X(idx0,2),'o',...\n", 329 | " 'Linewidth',2,'Color',[0.2148, 0.4922, 0.7188]);\n", 330 | "\n", 331 | "xlabel('Lowest heart rate');\n", 332 | "ylabel('Highest heart rate');\n", 333 | "\n", 334 | "% change the axis to reasonable limits\n", 335 | "set(gca,'XLim',[0,240],'YLim',[0,240]);\n" 336 | ] 337 | } 338 | ], 339 | "metadata": { 340 | "kernelspec": { 341 | "display_name": "Matlab", 342 | "language": "matlab", 343 | "name": "matlab_kernel" 344 | }, 345 | "language_info": { 346 | "codemirror_mode": "Octave", 347 | "file_extension": ".m", 348 | "help_links": [ 349 | { 350 | "text": "MetaKernel Magics", 351 | "url": "https://github.com/calysto/metakernel/blob/master/metakernel/magics/README.md" 352 | } 353 | ], 354 | "mimetype": "text/x-matlab", 355 | "name": "octave" 356 | } 357 | }, 358 | "nbformat": 4, 359 | "nbformat_minor": 0 360 | } 361 | -------------------------------------------------------------------------------- /mlcc/lab1-data-extraction/mlcc1_introduction.m: -------------------------------------------------------------------------------- 1 | %% Run the following to connect to the database 2 | 3 | % STEP 1: Tell Matlab where the driver is 4 | javaclasspath('sqlite-jdbc-3.8.11.2.jar') % use this for SQLite 5 | 6 | % STEP 2: Connect to the Database 7 | conn = database('','','',... 8 | 'org.sqlite.JDBC',['jdbc:sqlite:' pwd filesep 'data' filesep 'mimiciii_v1_3_mini.sqlite']); 9 | 10 | %% Take a look at "mlcc-query-1.sql" - this extracts a single value for each patient 11 | % More specifically, it extracts the highest heart rate 12 | query = makeQuery('mlcc-query-1.sql'); 13 | data = fetch(conn,query); 14 | 15 | %% Plot a histogram of the highest heart rate values 16 | 17 | 18 | %% Plot the highest heart rate against the highest respiratory rate 19 | 20 | 21 | %% See if you can add in the highest GCS 22 | % Then, run the query here to get the results. 23 | query = makeQuery(''); % put the filename here 24 | data = fetch(conn,query); 25 | 26 | 27 | %% Plot the highest heart rate against the highest respiratory rate, colouring by outcome 28 | % The patient outcome is stored in "HOSPITAL_EXPIRE_FLAG" - the 4th column 29 | 30 | 31 | %% Plot the highest heart rate against the highest GCS, colouring by outcome 32 | % Which variable do you feel discriminates mortality better? 33 | 34 | 35 | %% What other variables could you add which might help? 36 | 37 | 38 | %% Build a logistic regression to classify mortality 39 | % This is equivalent to drawing a line of separation 40 | 41 | % See: glmfit 42 | % e.g. b = glmfit(X,y,'binomial') 43 | 44 | -------------------------------------------------------------------------------- /mlcc/lab2-intro-ml/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Important notes 3 | 4 | * If we use LIBSVM, we should provide pre-compiled binaries for all the operating systems 5 | * Linux - This was easy for me on Ubuntu, but I had to change the path in the Makefile and run it from command line (running make.m did *not* work, it compiled but didn't link the library correctly) 6 | * Mac OSX - The lab laptop doesn't have a compiler, need to sort that out.. 7 | * Windows - It comes with pre-compiled binaries 8 | * Need to add `libsvm/matlab` to the path 9 | -------------------------------------------------------------------------------- /mlcc/lab2-intro-ml/mlcc2-query.sql: -------------------------------------------------------------------------------- 1 | with pvt as 2 | ( 3 | select ie.subject_id, ie.hadm_id, ie.icustay_id, ie.intime 4 | , case 5 | when itemid in (456,52,6702,443,220052,220181,225312) then 1 -- MeanBP 6 | when itemid in (615,618) then 2 -- RespRate 7 | 8 | when itemid in (723,223900) then 10 -- GCSVerbal 9 | when itemid in (454,223901) then 11 -- GCSMotor 10 | when itemid in (184,220739) then 12 -- GCSEyes 11 | else null end as VitalID 12 | , valuenum 13 | from icustays ie 14 | left join chartevents ce 15 | on ie.subject_id = ce.subject_id and ie.hadm_id = ce.hadm_id and ie.icustay_id = ce.icustay_id 16 | and ce.charttime >= ie.intime and ce.charttime <= date(ie.intime,'+1 day') 17 | where ce.itemid in 18 | ( 19 | 723, -- GCSVerbal 20 | 454, -- GCSMotor 21 | 184, -- GCSEyes 22 | 23 | 223900, -- GCS - Verbal Response 24 | 223901, -- GCS - Motor Response 25 | 220739, -- GCS - Eye Opening 26 | 618, -- Respiratory Rate 27 | 615, -- Resp Rate (Total) 28 | 456, --"NBP Mean" 29 | 52, --"Arterial BP Mean" 30 | 6702, -- Arterial BP Mean #2 31 | 443, -- Manual BP Mean(calc) 32 | 220052, --"Arterial Blood Pressure mean" 33 | 220181, --"Non Invasive Blood Pressure mean" 34 | 225312 --"ART BP mean" 35 | ) 36 | ) 37 | SELECT pvt.icustay_id 38 | , adm.HOSPITAL_EXPIRE_FLAG -- whether the patient died within the hospital 39 | , round( (julianday(pvt.intime) - julianday(pat.dob))/365.24, 4) as Age 40 | , min(case when VitalID = 1 then valuenum else null end) as MeanBP_Min 41 | , max(case when VitalID = 2 then valuenum else null end) as RespRate_Max 42 | FROM pvt 43 | inner join patients pat 44 | on pvt.subject_id = pat.subject_id 45 | inner join admissions adm 46 | on pvt.hadm_id = adm.hadm_id 47 | group by pvt.icustay_id, pvt.hadm_id, adm.HOSPITAL_EXPIRE_FLAG, pvt.intime, pat.dob 48 | order by pvt.icustay_id; 49 | -------------------------------------------------------------------------------- /mlcc/lab4-mortality-prediction/matlab_postgres_connection.m: -------------------------------------------------------------------------------- 1 | % Tell Matlab where the driver is 2 | javaclasspath('postgresql-9.4.1207.jar') % note we are using a postgres driver 3 | 4 | 5 | %% Initiate our database connection with Amazon 6 | username = ''; 7 | password = ''; 8 | 9 | % Connect to the Database 10 | conn = database('mimic',username,password,... 11 | 'Vendor','PostgreSQL',... 12 | 'Server','localhost',... 13 | 'PortNumber',5432); 14 | 15 | 16 | %% create and run a query 17 | query = 'select * from patients limit 10'; 18 | data = fetch(conn,query); 19 | 20 | %% close the connection 21 | close(conn); -------------------------------------------------------------------------------- /mlcc/lab4-mortality-prediction/mlcc-extract-data.sql: -------------------------------------------------------------------------------- 1 | with ce as 2 | ( 3 | select 4 | icustay_id, charttime, itemid, valuenum 5 | from chartevents 6 | -- specify what data we want from chartevents 7 | where itemid in 8 | ( 9 | 211, -- Heart Rate 10 | 618, -- Respiratory Rate 11 | 615 -- Resp Rate (Total) 12 | ) 13 | -- how did we know heart rates were stored using ITEMID 211? Simple, we looked in D_ITEMS! 14 | -- Try it for yourself: select * from d_items where lower(label) like '%heart rate%' 15 | ) 16 | select 17 | -- ICUSTAY_ID identifies each unique patient ICU stay 18 | -- note that if the same person stays in the ICU more than once, each stay would have a *different* ICUSTAY_ID 19 | -- however, since it's the same person, all those stays would have the same SUBJECT_ID 20 | ie.icustay_id 21 | 22 | -- this is the outcome of interest: in-hospital mortality 23 | , max(adm.HOSPITAL_EXPIRE_FLAG) as OUTCOME 24 | 25 | -- this is a case statement - essentially an "if, else" clause 26 | , min( 27 | case 28 | -- if the itemid is 211 29 | when itemid = 211 30 | -- then return the actual value stored in VALUENUM 31 | then valuenum 32 | -- otherwise, return 'null', which is SQL standard for an empty value 33 | else null 34 | -- end the case statement 35 | end 36 | ) as HeartRate_Min 37 | 38 | -- note we wrapped the above in "min()" 39 | -- this takes the minimum of all values inside, and *ignores* nulls 40 | -- by calling this on our case statement, we are ignoring all values except those with ITEMID = 211 41 | -- since ITEMID 211 are heart rates, we take the minimum of only heart rates 42 | 43 | , max(case when itemid = 211 then valuenum else null end) as HeartRate_Max 44 | , min(case when itemid in (615,618) then valuenum else null end) as RespRate_Min 45 | , max(case when itemid in (615,618) then valuenum else null end) as RespRate_Max 46 | from icustays ie 47 | 48 | -- join to the admissions table to get hospital outcome 49 | inner join admissions adm 50 | on ie.hadm_id = adm.hadm_id 51 | 52 | -- join to the chartevents table to get the observations 53 | left join ce 54 | -- match the tables on the patient identifier 55 | on ie.icustay_id = ce.icustay_id 56 | -- and require that the observation be made after the patient is admitted to the ICU 57 | and ce.charttime >= ie.intime 58 | -- and *before* their admission time + 1 day, i.e. the observation must be made on their first day in the ICU 59 | and ce.charttime <= ie.intime + interval '1' day 60 | group by ie.icustay_id 61 | order by ie.icustay_id; 62 | -------------------------------------------------------------------------------- /mlcc/lab4-mortality-prediction/mlcc-group-by-tutorial.sql: -------------------------------------------------------------------------------- 1 | -- In a lot of the code that you will use today, you will need to group values. 2 | -- What does it mean to group values? 3 | -- Say you want my highest heart rate for the day... 4 | -- that's equivalent to saying "I want the max heart rate *group by* ICUSTAY_ID" 5 | 6 | -- Imagine you have a table with 2 columns and 5 rows 7 | -- ICUSTAY_ID | HEART RATE 8 | -- 1 | 90 9 | -- 1 | 73 10 | -- 2 | 84 11 | -- 2 | 82 12 | -- 2 | 81 13 | 14 | -- Here we have two ICU stays (#1 and #2). We'd like their highest (maximum) heart rate. 15 | 16 | -- If we take the max() of the 2nd column, we now have: 17 | -- 5 rows in the first column 18 | -- ?? how many rows in the second column 19 | -- ICUSTAY_ID | max(HEART RATE) 20 | -- 1 | ? 21 | -- 1 | ? 22 | -- 2 | ? 23 | -- 2 | ? 24 | -- 2 | ? 25 | 26 | -- The logical answer is we'd want to collapse heart rate by ICUSTAY_ID. 27 | -- I'm interested in each ICU stays highest heart rate - taking the maximum another way (e.g. across patients) doesn't make sense. 28 | -- To do this: we need to tell SQL how to *group* the max value 29 | -- If we say *group by* ICUSTAY_ID, then we tell SQL to group the heart rates according to ICUSTAY_ID 30 | 31 | -- ICUSTAY_ID | max(HEART RATE) 32 | -- 1 | 90 33 | -- 2 | 84 34 | 35 | -- In SQL, we specify this by adding in "group by" at the bottom of the query. 36 | 37 | 38 | -- Let's try it for something simple: let's find the first time a patient entered the ICU 39 | select 40 | -- ICUSTAY_ID identifies each unique patient ICU stay 41 | -- note that if the same person stays in the ICU more than once, each stay would have a *different* ICUSTAY_ID 42 | -- however, since it's the same person, all those stays would have the same SUBJECT_ID 43 | icustay_id 44 | 45 | -- this is the lowest intime 46 | -- since 'intime' is a date, the lowest intime is conceptually the same as the earliest intime 47 | , min(intime) as MinimumINTIME 48 | 49 | from icustays ie 50 | group by icustay_id; 51 | -------------------------------------------------------------------------------- /mlcc/lab4-mortality-prediction/mlcc_mortality_prediction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# Import libraries\n", 12 | "import numpy as np\n", 13 | "import pandas as pd\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import psycopg2\n", 16 | "from sklearn.pipeline import Pipeline\n", 17 | "\n", 18 | "# used for train/test splits\n", 19 | "from sklearn.cross_validation import train_test_split\n", 20 | "\n", 21 | "# used to impute mean for data\n", 22 | "from sklearn.preprocessing import Imputer\n", 23 | "\n", 24 | "# logistic regression is our model of choice\n", 25 | "from sklearn.linear_model import LogisticRegression\n", 26 | "from sklearn.linear_model import LogisticRegressionCV\n", 27 | "\n", 28 | "# used to calculate AUROC/accuracy\n", 29 | "from sklearn import metrics\n", 30 | "\n", 31 | "# used to create confusion matrix\n", 32 | "from sklearn.metrics import confusion_matrix\n", 33 | "\n", 34 | "from sklearn.cross_validation import cross_val_score\n", 35 | "\n", 36 | "\n", 37 | "%matplotlib inline" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# Connect to MIMIC\n", 47 | "# be sure to add the password as appropriate!\n", 48 | "con = psycopg2.connect(dbname='MIMIC', user='workshop', password=''\n", 49 | " , host='.amazonaws.com'\n", 50 | " , port=5432)\n", 51 | "cur = con.cursor()\n", 52 | "cur.execute('SET search_path to ''mimiciii_workshop''')" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 3, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | " icustay_id outcome heartrate_min heartrate_max resprate_min \\\n", 65 | "0 200006 0 62 84 14 \n", 66 | "1 200030 0 83 115 11 \n", 67 | "2 200068 0 67 112 20 \n", 68 | "3 200071 0 118 130 16 \n", 69 | "4 200102 1 71 87 13 \n", 70 | "\n", 71 | " resprate_max \n", 72 | "0 27 \n", 73 | "1 28 \n", 74 | "2 24 \n", 75 | "3 25 \n", 76 | "4 32 \n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "query = \"\"\"\n", 82 | "with ce as\n", 83 | "(\n", 84 | " select\n", 85 | " icustay_id, charttime, itemid, valuenum\n", 86 | " from chartevents\n", 87 | " -- specify what data we want from chartevents\n", 88 | " where itemid in\n", 89 | " (\n", 90 | " 211, -- Heart Rate\n", 91 | " 618, --\tRespiratory Rate\n", 92 | " 615 --\tResp Rate (Total)\n", 93 | " )\n", 94 | " -- how did we know heart rates were stored using ITEMID 211? Simple, we looked in D_ITEMS!\n", 95 | " -- Try it for yourself: select * from d_items where lower(label) like '%heart rate%'\n", 96 | ")\n", 97 | "select\n", 98 | " -- ICUSTAY_ID identifies each unique patient ICU stay\n", 99 | " -- note that if the same person stays in the ICU more than once, each stay would have a *different* ICUSTAY_ID\n", 100 | " -- however, since it's the same person, all those stays would have the same SUBJECT_ID\n", 101 | " ie.icustay_id\n", 102 | "\n", 103 | " -- this is the outcome of interest: in-hospital mortality\n", 104 | " , max(adm.HOSPITAL_EXPIRE_FLAG) as OUTCOME\n", 105 | "\n", 106 | " -- this is a case statement - essentially an \"if, else\" clause\n", 107 | " , min(\n", 108 | " case\n", 109 | " -- if the itemid is 211\n", 110 | " when itemid = 211\n", 111 | " -- then return the actual value stored in VALUENUM\n", 112 | " then valuenum\n", 113 | " -- otherwise, return 'null', which is SQL standard for an empty value\n", 114 | " else null\n", 115 | " -- end the case statement\n", 116 | " end\n", 117 | " ) as HeartRate_Min\n", 118 | "\n", 119 | " -- note we wrapped the above in \"min()\"\n", 120 | " -- this takes the minimum of all values inside, and *ignores* nulls\n", 121 | " -- by calling this on our case statement, we are ignoring all values except those with ITEMID = 211\n", 122 | " -- since ITEMID 211 are heart rates, we take the minimum of only heart rates\n", 123 | "\n", 124 | " , max(case when itemid = 211 then valuenum else null end) as HeartRate_Max\n", 125 | " , min(case when itemid in (615,618) then valuenum else null end) as RespRate_Min\n", 126 | " , max(case when itemid in (615,618) then valuenum else null end) as RespRate_Max\n", 127 | "from icustays ie\n", 128 | "\n", 129 | "-- join to the admissions table to get hospital outcome\n", 130 | "inner join admissions adm\n", 131 | " on ie.hadm_id = adm.hadm_id\n", 132 | "\n", 133 | "-- join to the chartevents table to get the observations\n", 134 | "left join ce\n", 135 | " -- match the tables on the patient identifier\n", 136 | " on ie.icustay_id = ce.icustay_id\n", 137 | " -- and require that the observation be made after the patient is admitted to the ICU\n", 138 | " and ce.charttime >= ie.intime\n", 139 | " -- and *before* their admission time + 1 day, i.e. the observation must be made on their first day in the ICU\n", 140 | " and ce.charttime <= ie.intime + interval '1' day\n", 141 | "group by ie.icustay_id\n", 142 | "order by ie.icustay_id\n", 143 | "\"\"\"\n", 144 | "\n", 145 | "data = pd.read_sql_query(query,con)\n", 146 | "print(data.head())" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 4, 152 | "metadata": { 153 | "collapsed": true 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "# close the connection as we are done loading data from server\n", 158 | "cur.close()\n", 159 | "con.close()" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 5, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# move from a data frame into a numpy array\n", 169 | "X = data.values\n", 170 | "y = X[:,1]\n", 171 | "\n", 172 | "# delete first 2 columns: the ID and the outcome\n", 173 | "X = np.delete(X,0,axis=1)\n", 174 | "X = np.delete(X,0,axis=1)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 6, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "name": "stdout", 184 | "output_type": "stream", 185 | "text": [ 186 | "Accuracy = 0.784267912773\n", 187 | "AUROC = 0.642288212031\n", 188 | "\n", 189 | "Confusion matrix\n", 190 | "[[977 17]\n", 191 | " [260 30]]\n", 192 | "\n", 193 | "Classification report\n", 194 | " precision recall f1-score support\n", 195 | "\n", 196 | " 0.0 0.79 0.98 0.88 994\n", 197 | " 1.0 0.64 0.10 0.18 290\n", 198 | "\n", 199 | "avg / total 0.76 0.78 0.72 1284\n", 200 | "\n" 201 | ] 202 | } 203 | ], 204 | "source": [ 205 | "# evaluate a logistic regression model using an 80%-20% training/test split\n", 206 | "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", 207 | "\n", 208 | "# impute mean for missing values\n", 209 | "imp = Imputer(missing_values='NaN', strategy='mean', axis=0)\n", 210 | "imp.fit(X_train)\n", 211 | "\n", 212 | "X_train = imp.transform(X_train)\n", 213 | "X_test = imp.transform(X_test)\n", 214 | "\n", 215 | "model = LogisticRegression(fit_intercept=True)\n", 216 | "model = model.fit(X_train, y_train)\n", 217 | "\n", 218 | "# predict class labels for the test set\n", 219 | "y_pred = model.predict(X_test)\n", 220 | "\n", 221 | "# generate class probabilities\n", 222 | "y_prob = model.predict_proba(X_test)\n", 223 | "\n", 224 | "# generate evaluation metrics\n", 225 | "print('Accuracy = {}'.format(metrics.accuracy_score(y_test, y_pred)))\n", 226 | "print('AUROC = {}'.format(metrics.roc_auc_score(y_test, y_prob[:, 1])))\n", 227 | "\n", 228 | "print('\\nConfusion matrix')\n", 229 | "print(metrics.confusion_matrix(y_test, y_pred))\n", 230 | "print('\\nClassification report')\n", 231 | "print(metrics.classification_report(y_test, y_pred))" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 8, 237 | "metadata": {}, 238 | "outputs": [ 239 | { 240 | "name": "stdout", 241 | "output_type": "stream", 242 | "text": [ 243 | "AUROC for all folds:\n", 244 | "[ 0.632241 0.66711432 0.65462583 0.63505984 0.64856111]\n", 245 | "Average AUROC across folds:\n", 246 | "0.647520418729\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "# evaluate a logistic regression with L1 regularization\n", 252 | "\n", 253 | "# evaluate the model using 5-fold cross-validation\n", 254 | "# see: http://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter\n", 255 | "# for list of scoring parameters\n", 256 | "\n", 257 | "estimator = Pipeline([(\"imputer\", Imputer(missing_values='NaN',\n", 258 | " strategy=\"mean\",\n", 259 | " axis=0)),\n", 260 | " (\"regression\", LogisticRegressionCV(penalty='l1',\n", 261 | " cv=5,\n", 262 | " scoring='roc_auc',\n", 263 | " solver='liblinear'))])\n", 264 | "\n", 265 | "scores = cross_val_score(estimator\n", 266 | " , X, y\n", 267 | " , scoring='roc_auc', cv=5)\n", 268 | "\n", 269 | "\n", 270 | "print('AUROC for all folds:')\n", 271 | "print(scores)\n", 272 | "print('Average AUROC across folds:')\n", 273 | "print(scores.mean())" 274 | ] 275 | } 276 | ], 277 | "metadata": { 278 | "kernelspec": { 279 | "display_name": "Python 2", 280 | "language": "python", 281 | "name": "python2" 282 | }, 283 | "language_info": { 284 | "codemirror_mode": { 285 | "name": "ipython", 286 | "version": 2 287 | }, 288 | "file_extension": ".py", 289 | "mimetype": "text/x-python", 290 | "name": "python", 291 | "nbconvert_exporter": "python", 292 | "pygments_lexer": "ipython2", 293 | "version": "2.7.13" 294 | } 295 | }, 296 | "nbformat": 4, 297 | "nbformat_minor": 1 298 | } 299 | -------------------------------------------------------------------------------- /mlcc/lab4-mortality-prediction/mlcc_mortality_prediction.m: -------------------------------------------------------------------------------- 1 | %% Build a better mortality prediction model 2 | 3 | password = ''; % ask a demonstrator for the password to the instance 4 | 5 | 6 | % Tell Matlab where the driver is 7 | javaclasspath('postgresql-9.4.1207.jar') % note we are using a postgres driver 8 | %% Initiate our database connection with Amazon 9 | % Connect to the Database 10 | conn = database('MIMIC','workshop',password,... 11 | 'Vendor','PostgreSQL',... 12 | 'Server','.amazonaws.com',... 13 | 'PortNumber',5432); 14 | 15 | if isempty(conn.Message) 16 | % nothing went wrong hurray 17 | fprintf('Connected to the database!\n'); 18 | else 19 | switch conn.Message 20 | case 'Unable to find JDBC driver.' 21 | error('You do not have the JDBC driver installed. Please ensure MATLAB can find the .jar file.'); 22 | case 'The server requested password-based authentication, but no password was provided.' 23 | error('Please enter the password provided to you in the password variable at the top of the script.'); 24 | otherwise 25 | error(conn.Message) 26 | end 27 | end 28 | 29 | % NOTE: below is how we used to connect to the local sqlite file 30 | % javaclasspath('sqlite-jdbc-3.8.11.2.jar') % use this for SQLite 31 | % conn = database('','','',... 32 | % 'org.sqlite.JDBC',['jdbc:sqlite:' pwd filesep 'data' filesep 'mimiciii_v1_3_demo.sqlite']); 33 | 34 | % it's convenient to have our database connection return "dataset" data 35 | % we can extract header information from dataset outputs 36 | setdbprefs('DataReturnFormat','dataset') 37 | 38 | %% Extract the patient data using the query 39 | % *Highly advised* to not extract your data all at once in one query 40 | % That way if you find a typo, you only need to re-run a subcomponent, 41 | % not the entire data extraction process! 42 | 43 | % read the text from the file 44 | query = makeQuery('mlcc-extract-data.sql'); 45 | 46 | % run the query on the database connection 47 | tic; 48 | data = fetch(conn,query); 49 | toc; 50 | 51 | %% (Optional) convert the data from a dataset to an X design matrix 52 | % first convert data to a cell array 53 | data = dataset2cell(data); 54 | 55 | % we can get the column names from the first row of the 'data' variable 56 | header = data(1,:); 57 | header = regexprep(header,'_',''); % remove underscores 58 | 59 | % remove the header row from the data cell 60 | data = data(2:end,:); 61 | 62 | % MATLAB sometimes reads 'null' sometimes instead of NaN 63 | data(cellfun(@isstr, data) & cellfun(@(x) strcmp(x,'null'), data)) = {NaN}; 64 | 65 | % MATLAB sometimes has blank cells which should be NaN 66 | data(cellfun(@isempty, data)) = {NaN}; 67 | 68 | % Convert the data into a matrix of numbers 69 | % This is a MATLAB data type thing - we can't do math with cell arrays 70 | data = cell2mat(data); 71 | 72 | 73 | X_id = data(:, strcmp(header,'ICUSTAYID')); 74 | y = data(:, strcmp(header,'OUTCOME')); 75 | 76 | X = data(:, ~ismember( header, {'ICUSTAYID','OUTCOME'}) ); 77 | X_header = header(~ismember( header, {'ICUSTAYID','OUTCOME'})); 78 | 79 | %% Print out the first 5 rows of the data 80 | W = 5; % the maximum number of columns to print at one time 81 | % can set this wider for wider monitors 82 | for o=1:floor(size(X,2)/W) 83 | idxColumn = (o-1)*W + 1 : o*W; 84 | if idxColumn(end) > size(X,2) 85 | idxColumn = idxColumn(1):size(X,2); 86 | end 87 | 88 | fprintf('%12s\t',X_header{idxColumn}); 89 | fprintf('\n'); 90 | for n=1:5 91 | for m=idxColumn 92 | fprintf('%12g\t',X(n, m)); 93 | end 94 | fprintf('\n'); 95 | end 96 | fprintf('\n'); 97 | end 98 | 99 | 100 | %% Inspect the data 101 | figure(1); clf; hold all; 102 | 103 | % Box-plots are very useful for quickly looking for outliers, etc 104 | boxplot(X,'plotstyle','compact','labels',X_header); 105 | 106 | %% Perform data preprocessing 107 | % correct ages, remove outliers, etc. 108 | 109 | 110 | %% Sub-sample the frequent class to balance the number in each class 111 | % This is not always needed - but some models do better with it 112 | % Alternatively, you could up-sample the infrequent class 113 | balanceData = false; 114 | 115 | % optionally, we can balance the subsets 116 | if balanceData == true 117 | N0 = sum(y_train==0); 118 | N1 = sum(y_train==1); 119 | 120 | [~,idxRandomize] = sort(rand(N0,1)); 121 | idxKeep = find(y_train==0); % find all the negative outcomes 122 | idxKeep = idxKeep(idxRandomize(1:N1)); % pick a random N1 negative outcomes 123 | idxKeep = [find(y_train==1);idxKeep]; % add in the positive outcomes 124 | idxKeep = sort(idxKeep); % probably not needed but it's cleaner 125 | else 126 | idxKeep = true(size(X,1),1); 127 | end 128 | 129 | X_train = X(idxKeep,:); 130 | y_train = y(idxKeep); 131 | 132 | %% Create cross-fold validation indices 133 | K = 5; % how many folds 134 | 135 | [~,idxK] = sort(rand(size(X_train,1),1)); 136 | idxK = mod(idxK,K) + 1; 137 | 138 | %% Train a classifier 139 | % Here is an example using logistic regression 140 | 141 | auroc = zeros(1,K); 142 | 143 | for k=1:K 144 | idxDevelop = idxK ~= k; 145 | idxValidate = idxK == k; 146 | 147 | X_develop = X_train(idxDevelop,:); 148 | y_develop = y_train(idxDevelop,:); 149 | 150 | X_validate = X_train(idxValidate,:); 151 | y_validate = y_train(idxValidate,:); 152 | 153 | % Normalize and impute means for the data before training 154 | 155 | % Normalize the data 156 | mu = nanmean(X_develop, 1); 157 | sigma = nanstd(X_develop, [], 1); 158 | X_develop = bsxfun(@minus, X_develop, mu); 159 | X_develop = bsxfun(@rdivide, X_develop, sigma); 160 | 161 | X_validate = bsxfun(@minus, X_validate, mu); 162 | X_validate = bsxfun(@rdivide, X_validate, sigma); 163 | 164 | % Impute the mean (equal to 0 since we normalized the mean to be 0) 165 | X_develop(isnan(X_develop)) = 0; 166 | X_validate(isnan(X_validate)) = 0; 167 | 168 | 169 | % (Option 1). A logistic regression 170 | model = glmfit(X_develop, y_develop, 'binomial'); 171 | y_hat = glmval(model, X_validate, 'logit'); 172 | 173 | % (Option 2). An SVM 174 | % model = svmtrain(y_develop, X_develop, '-q -t 2'); 175 | % [pred,~,y_hat] = svmpredict(y_validate, X_validate, model); 176 | 177 | % if (pred(1) == 0 && y_hat(1) > 0) || (pred(1) == 1 && y_hat(1) < 0) 178 | % % flip the sign of dist to ensure that the AUROC is calculated properly 179 | % % the AUROC expects predictions of 1 to be assigned increasing distances 180 | % y_hat = -y_hat; 181 | % end 182 | 183 | % Calculate our performance metric: the AUROC. 184 | [~, ~, auroc(k)] = calcRoc(y_hat, y_validate); 185 | end 186 | 187 | fprintf('Mean AUROC across %d folds: %4.4f.\n',K, mean(auroc)); 188 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MIT-LCP/mimic-workshop/b27eee438a1f62d909dd30d1d458d3516f32b276/requirements.txt -------------------------------------------------------------------------------- /temp/02-example-patient-sepsis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exploring the trajectory of a single patient" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Import Python libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "We first need to import some tools for working with data in Python. \n", 22 | "- NumPy is for working with numbers\n", 23 | "- Pandas is for analysing data\n", 24 | "- MatPlotLib is for making plots\n", 25 | "- Sqlite3 to connect to the database" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import numpy as np\n", 37 | "import pandas as pd\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "import sqlite3\n", 40 | "%matplotlib inline" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### Connect to the database" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "- We can use the sqlite3 library to connect to the MIMIC database\n", 55 | "- Once the connection is established, we'll run a simple SQL query." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "# Connect to the MIMIC database\n", 67 | "conn = sqlite3.connect('data/mimicdata.sqlite')" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "collapsed": true 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "# Create our test query\n", 79 | "test_query = \"\"\"\n", 80 | "SELECT subject_id, hadm_id, admittime, dischtime, admission_type, diagnosis\n", 81 | "FROM admissions\n", 82 | "LIMIT 10;\n", 83 | "\"\"\"" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": true 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "# Run the query and assign the results to a variable\n", 95 | "test = pd.read_sql_query(test_query,conn)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "# Display the first few rows\n", 107 | "test.head()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "### Load the chartevents data" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "- The chartevents table contains data charted at the patient bedside. It includes variables such as heart rate, respiratory rate, temperature, and so on.\n", 122 | "- We'll begin by loading the chartevents data for a single patient." 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "query = \"\"\"\n", 134 | "SELECT de.icustay_id\n", 135 | " , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n", 136 | " , di.label\n", 137 | " , de.value\n", 138 | " , de.valuenum\n", 139 | " , de.uom\n", 140 | "FROM chartevents de\n", 141 | "INNER join d_items di\n", 142 | "ON de.itemid = di.itemid\n", 143 | "INNER join icustays ie\n", 144 | "ON de.icustay_id = ie.icustay_id\n", 145 | "WHERE de.subject_id = 40036\n", 146 | "ORDER BY charttime;\n", 147 | "\"\"\"\n", 148 | "\n", 149 | "ce = pd.read_sql_query(query,conn)\n" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "# Preview the data\n", 161 | "# Use 'head' to limit the number of rows returned\n", 162 | "ce.head()" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "### Review the patient's heart rate" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "- We can select individual columns using the column name. \n", 177 | "- For example, if we want to select just the label column, we write **```ce.LABEL```** or alternatively **```ce['LABEL']```**" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": { 184 | "collapsed": false 185 | }, 186 | "outputs": [], 187 | "source": [ 188 | "# Select a single column\n", 189 | "ce['LABEL']" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "- In a similar way, we can select rows from data using indexes. \n", 197 | "- For example, to select rows where the label is equal to 'Heart Rate', we would create an index using **```[ce.LABEL=='Heart Rate']```** " 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "# Select just the heart rate rows using an index\n", 209 | "ce[ce.LABEL=='Heart Rate']" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "### Plot 1: How did the patients heart rate change over time?" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "- Using the methods described above to select our data of interest, we can create our x and y axis values to create a time series plot of heart rate." 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": null, 229 | "metadata": { 230 | "collapsed": false 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "# Which time stamps have a corresponding heart rate measurement?\n", 235 | "print ce.index[ce.LABEL=='Heart Rate']" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "collapsed": false, 243 | "scrolled": true 244 | }, 245 | "outputs": [], 246 | "source": [ 247 | "# Set x equal to the times\n", 248 | "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n", 249 | "\n", 250 | "# Set y equal to the heart rates\n", 251 | "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n", 252 | "\n", 253 | "# Plot time against heart rate\n", 254 | "plt.figure(figsize=(14, 6))\n", 255 | "plt.plot(x_hr,y_hr)\n", 256 | "\n", 257 | "\n", 258 | "plt.xlabel('Time',fontsize=16)\n", 259 | "plt.ylabel('Heart rate',fontsize=16)\n", 260 | "plt.title('Heart rate over time from admission to the intensive care unit')" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "### Task 1\n", 268 | "\n", 269 | "* What is happening to this patient's heart rate?\n", 270 | "* Plot respiratory rate over time for the patient.\n", 271 | "* Is there anything unusual about the patient's respiratory rate?\n" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "collapsed": true 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "# Exercise 1 here\n", 283 | "\n" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "### Plot 2: Did the patient's vital signs breach any alarm thresholds?" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": {}, 296 | "source": [ 297 | "- Alarm systems in the intensive care unit are commonly based on high and low thresholds defined by the carer.\n", 298 | "- False alarms are often a problem and so thresholds may be set arbitrarily to reduce alarms.\n", 299 | "- As a result, alarm settings carry limited information." 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": { 306 | "collapsed": false 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "plt.figure(figsize=(14, 6))\n", 311 | "\n", 312 | "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n", 313 | " ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n", 314 | " 'k+', markersize=10, linewidth=4)\n", 315 | "\n", 316 | "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - High'], \n", 317 | " ce.VALUENUM[ce.LABEL=='Resp Alarm - High'],\n", 318 | " 'm--')\n", 319 | "\n", 320 | "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - Low'], \n", 321 | " ce.VALUENUM[ce.LABEL=='Resp Alarm - Low'],\n", 322 | " 'm--')\n", 323 | "\n", 324 | "plt.xlabel('Time',fontsize=16)\n", 325 | "plt.ylabel('Respiratory rate',fontsize=16)\n", 326 | "plt.title('Respiratory rate over time from admission, with upper and lower alarm thresholds')\n", 327 | "plt.ylim(0,55)\n" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "### Task 2\n", 335 | "\n", 336 | "- Based on the data, does it look like the alarms would have triggered for this patient?\n" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "### Plot 3: What is patient's level of consciousness?" 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "- Glasgow Coma Scale (GCS) is a measure of consciousness.\n", 351 | "- It is commonly used for monitoring patients in the intensive care unit. \n", 352 | "- It consists of three components: eye response; verbal response; motor response." 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": { 359 | "collapsed": false 360 | }, 361 | "outputs": [], 362 | "source": [ 363 | "# Display the first few rows of the GCS eye response data\n", 364 | "ce[ce.LABEL=='GCS - Eye Opening'].head()" 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": { 371 | "collapsed": false 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "# Prepare the size of the figure\n", 376 | "plt.figure(figsize=(14, 10))\n", 377 | "\n", 378 | "# Set x equal to the times\n", 379 | "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n", 380 | "\n", 381 | "# Set y equal to the heart rates\n", 382 | "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n", 383 | "\n", 384 | "\n", 385 | "plt.plot(x_hr,y_hr)\n", 386 | "\n", 387 | "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n", 388 | " ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n", 389 | " 'k', markersize=6)\n", 390 | "\n", 391 | "# Add a text label to the y-axis\n", 392 | "plt.text(-4,155,'GCS - Eye Opening',fontsize=14)\n", 393 | "plt.text(-4,150,'GCS - Motor Response',fontsize=14)\n", 394 | "plt.text(-4,145,'GCS - Verbal Response',fontsize=14) \n", 395 | "\n", 396 | "# Iterate over list of GCS labels, plotting around 1 in 10 to avoid overlap\n", 397 | "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Eye Opening'].values):\n", 398 | " if np.mod(i,6)==0 and i < 65:\n", 399 | " plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Eye Opening'].values[i],155),fontsize=14)\n", 400 | " \n", 401 | "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Motor Response'].values):\n", 402 | " if np.mod(i,6)==0 and i < 65:\n", 403 | " plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Motor Response'].values[i],150),fontsize=14)\n", 404 | "\n", 405 | "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Verbal Response'].values):\n", 406 | " if np.mod(i,6)==0 and i < 65:\n", 407 | " plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Verbal Response'].values[i],145),fontsize=14)\n", 408 | "\n", 409 | "plt.title('Vital signs and Glasgow Coma Scale over time from admission',fontsize=16)\n", 410 | "\n", 411 | "plt.xlabel('Time (hours)',fontsize=16)\n", 412 | "plt.ylabel('Heart rate or GCS',fontsize=16)\n", 413 | "plt.ylim(10,165)\n" 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": {}, 419 | "source": [ 420 | "### Task 3\n", 421 | "\n", 422 | "- How is the patient's consciousness changing over time?" 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": {}, 428 | "source": [ 429 | "# Stop here..." 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "### Plot 2: What other data do we have on the patient?" 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": {}, 442 | "source": [ 443 | "- Using Pandas 'read_csv function' again, we'll now load the patient outputs data (for example, urine output, drains, dialysis). This data is contained in the outputevents data table." 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": { 450 | "collapsed": false 451 | }, 452 | "outputs": [], 453 | "source": [ 454 | "# OPTION 1: load outputs from the patient\n", 455 | "query = \"\"\"\n", 456 | "select de.icustay_id\n", 457 | " , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n", 458 | " , di.label\n", 459 | " , de.value\n", 460 | " , de.valueuom\n", 461 | "from outputevents de \n", 462 | "inner join icustays ie\n", 463 | " on de.icustay_id = ie.icustay_id\n", 464 | "inner join d_items di\n", 465 | " on de.itemid = di.itemid\n", 466 | "where de.subject_id = 40036\n", 467 | "order by charttime;\n", 468 | "\"\"\"\n", 469 | "\n", 470 | "oe = pd.read_sql_query(query,conn)" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "execution_count": null, 476 | "metadata": { 477 | "collapsed": false 478 | }, 479 | "outputs": [], 480 | "source": [ 481 | "oe.head()" 482 | ] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": { 488 | "collapsed": false 489 | }, 490 | "outputs": [], 491 | "source": [ 492 | "# Prepare the size of the figure\n", 493 | "plt.figure(figsize=(14, 10))\n", 494 | "\n", 495 | "plt.title('Fluid output over time')\n", 496 | "\n", 497 | "plt.plot(oe.HOURS, \n", 498 | " oe.VALUE.cumsum()/1000, \n", 499 | " 'ro', markersize=8, label='Output volume, L')\n", 500 | "\n", 501 | "plt.xlim(0,20)\n", 502 | "plt.ylim(0,2)\n", 503 | "plt.legend()" 504 | ] 505 | }, 506 | { 507 | "cell_type": "markdown", 508 | "metadata": {}, 509 | "source": [ 510 | "To provide context for this plot, it would help to include patient input data. This helps to determine the patient's fluid balance, a key indicator in patient health." 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": null, 516 | "metadata": { 517 | "collapsed": false 518 | }, 519 | "outputs": [], 520 | "source": [ 521 | "# Load inputs given to the patient (usually intravenously) using the database connection\n", 522 | "query = \"\"\"\n", 523 | "select de.icustay_id\n", 524 | " , (strftime('%s',de.starttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_START\n", 525 | " , (strftime('%s',de.endtime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_END\n", 526 | " , de.linkorderid\n", 527 | " , di.label\n", 528 | " , de.amount\n", 529 | " , de.amountuom\n", 530 | " , de.rate\n", 531 | " , de.rateuom\n", 532 | "from inputevents_mv de \n", 533 | "inner join icustays ie\n", 534 | " on de.icustay_id = ie.icustay_id\n", 535 | "inner join d_items di\n", 536 | " on de.itemid = di.itemid\n", 537 | "where de.subject_id = 40036\n", 538 | "order by endtime;\n", 539 | "\"\"\"\n", 540 | "\n", 541 | "ie = pd.read_sql_query(query,conn)\n" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": null, 547 | "metadata": { 548 | "collapsed": false 549 | }, 550 | "outputs": [], 551 | "source": [ 552 | "ie.head()" 553 | ] 554 | }, 555 | { 556 | "cell_type": "markdown", 557 | "metadata": {}, 558 | "source": [ 559 | "Note that the column headers are different: we have \"HOURS_START\" and \"HOURS_END\". This is because inputs are administered over a fixed period of time." 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": null, 565 | "metadata": { 566 | "collapsed": false 567 | }, 568 | "outputs": [], 569 | "source": [ 570 | "ie['LABEL'].unique()" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "metadata": { 577 | "collapsed": false 578 | }, 579 | "outputs": [], 580 | "source": [ 581 | "# Prepare the size of the figure\n", 582 | "plt.figure(figsize=(14, 10))\n", 583 | "\n", 584 | "# Plot the cumulative input against the cumulative output\n", 585 | "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n", 586 | " ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n", 587 | " 'go', markersize=8, label='Intake volume, L')\n", 588 | "\n", 589 | "plt.plot(oe.HOURS, \n", 590 | " oe.VALUE.cumsum()/1000, \n", 591 | " 'ro', markersize=8, label='Output volume, L')\n", 592 | "\n", 593 | "plt.title('Fluid balance over time',fontsize=16)\n", 594 | "plt.xlabel('Hours',fontsize=16)\n", 595 | "plt.ylabel('Volume (litres)',fontsize=16)\n", 596 | "# plt.ylim(0,38)\n", 597 | "plt.legend()" 598 | ] 599 | }, 600 | { 601 | "cell_type": "markdown", 602 | "metadata": { 603 | "collapsed": true 604 | }, 605 | "source": [ 606 | "As the plot shows, the patient's intake tends to be above their output. There are however periods where input and output are almost one to one. One of the biggest challenges of working with ICU data is that context is everything, so let's look at a treatment (Furosemide/Lasix) which we know will affect this graph." 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": null, 612 | "metadata": { 613 | "collapsed": false 614 | }, 615 | "outputs": [], 616 | "source": [ 617 | "plt.figure(figsize=(14, 10))\n", 618 | "\n", 619 | "# Plot the cumulative input against the cumulative output\n", 620 | "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n", 621 | " ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n", 622 | " 'go', markersize=8, label='Intake volume, L')\n", 623 | "\n", 624 | "plt.plot(oe.HOURS, \n", 625 | " oe.VALUE.cumsum()/1000, \n", 626 | " 'ro', markersize=8, label='Output volume, L')\n", 627 | "\n", 628 | "# example on getting two columns from a dataframe: ie[['HOURS_START','HOURS_END']].head()\n", 629 | "\n", 630 | "for i, idx in enumerate(ie.index[ie.LABEL=='Furosemide (Lasix)']):\n", 631 | " plt.plot([ie.HOURS_START[ie.LABEL=='Furosemide (Lasix)'][idx],\n", 632 | " ie.HOURS_END[ie.LABEL=='Furosemide (Lasix)'][idx]],\n", 633 | " [ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx],\n", 634 | " ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx]],\n", 635 | " 'b-',linewidth=4)\n", 636 | " \n", 637 | "\n", 638 | "plt.title('Fluid balance over time',fontsize=16)\n", 639 | "plt.xlabel('Hours',fontsize=16)\n", 640 | "plt.ylabel('Volume (litres)',fontsize=16)\n", 641 | "# plt.ylim(0,38)\n", 642 | "plt.legend()\n" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": null, 648 | "metadata": { 649 | "collapsed": false 650 | }, 651 | "outputs": [], 652 | "source": [ 653 | "ie['LABEL'].unique()" 654 | ] 655 | }, 656 | { 657 | "cell_type": "markdown", 658 | "metadata": {}, 659 | "source": [ 660 | "### Exercise 2\n", 661 | "\n", 662 | "* Plot the alarms for the mean arterial pressure ('```Arterial Blood Pressure mean```')\n", 663 | "* HINT: you can use ```ce.LABEL.unique()``` to find a list of variable names\n", 664 | "* Were the alarm thresholds breached?" 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": null, 670 | "metadata": { 671 | "collapsed": false 672 | }, 673 | "outputs": [], 674 | "source": [ 675 | "# Exercise 2 here\n", 676 | "\n", 677 | "\n" 678 | ] 679 | }, 680 | { 681 | "cell_type": "markdown", 682 | "metadata": {}, 683 | "source": [ 684 | "### Plot 3: Were the patient's other vital signs stable?" 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": null, 690 | "metadata": { 691 | "collapsed": false 692 | }, 693 | "outputs": [], 694 | "source": [ 695 | "plt.figure(figsize=(14, 10))\n", 696 | "\n", 697 | "plt.plot(ce.index[ce.LABEL=='Heart Rate'], \n", 698 | " ce.VALUENUM[ce.LABEL=='Heart Rate'],\n", 699 | " 'rx', markersize=8, label='HR')\n", 700 | "\n", 701 | "plt.plot(ce.index[ce.LABEL=='O2 saturation pulseoxymetry'], \n", 702 | " ce.VALUENUM[ce.LABEL=='O2 saturation pulseoxymetry'], \n", 703 | " 'g.', markersize=8, label='O2')\n", 704 | "\n", 705 | "plt.plot(ce.index[ce.LABEL=='Arterial Blood Pressure mean'], \n", 706 | " ce.VALUENUM[ce.LABEL=='Arterial Blood Pressure mean'], \n", 707 | " 'bv', markersize=8, label='MAP')\n", 708 | "\n", 709 | "plt.plot(ce.index[ce.LABEL=='Respiratory Rate'], \n", 710 | " ce.VALUENUM[ce.LABEL=='Respiratory Rate'], \n", 711 | " 'k+', markersize=8, label='RR')\n", 712 | "\n", 713 | "plt.title('Vital signs over time from admission')\n", 714 | "plt.ylim(0,130)\n", 715 | "plt.legend()" 716 | ] 717 | }, 718 | { 719 | "cell_type": "markdown", 720 | "metadata": {}, 721 | "source": [ 722 | "### Plot 5: Laboratory measurements" 723 | ] 724 | }, 725 | { 726 | "cell_type": "markdown", 727 | "metadata": {}, 728 | "source": [ 729 | "Using Pandas 'read_csv function' again, we'll now load the labevents data.\n", 730 | "This data corresponds to measurements made in a laboratory - usually on a sample of patient blood. " 731 | ] 732 | }, 733 | { 734 | "cell_type": "code", 735 | "execution_count": null, 736 | "metadata": { 737 | "collapsed": false 738 | }, 739 | "outputs": [], 740 | "source": [ 741 | "# OPTION 1: load labevents data using the database connection\n", 742 | "query = \"\"\"\n", 743 | "SELECT de.subject_id\n", 744 | " , de.charttime\n", 745 | " , di.label, de.value, de.valuenum\n", 746 | " , de.uom\n", 747 | "FROM labevents de\n", 748 | "INNER JOIN d_labitems di\n", 749 | " ON de.itemid = di.itemid\n", 750 | "where de.subject_id = 40036\n", 751 | "\"\"\"\n", 752 | "\n", 753 | "le = pd.read_sql_query(query,conn)" 754 | ] 755 | }, 756 | { 757 | "cell_type": "code", 758 | "execution_count": null, 759 | "metadata": { 760 | "collapsed": false 761 | }, 762 | "outputs": [], 763 | "source": [ 764 | "# preview the labevents data\n", 765 | "le.head()" 766 | ] 767 | }, 768 | { 769 | "cell_type": "code", 770 | "execution_count": null, 771 | "metadata": { 772 | "collapsed": false 773 | }, 774 | "outputs": [], 775 | "source": [ 776 | "# preview the ioevents data\n", 777 | "le[le.LABEL=='HEMOGLOBIN']" 778 | ] 779 | }, 780 | { 781 | "cell_type": "code", 782 | "execution_count": null, 783 | "metadata": { 784 | "collapsed": false 785 | }, 786 | "outputs": [], 787 | "source": [ 788 | "plt.figure(figsize=(14, 10))\n", 789 | "\n", 790 | "plt.plot(le.index[le.LABEL=='HEMATOCRIT'], \n", 791 | " le.VALUENUM[le.LABEL=='HEMATOCRIT'], \n", 792 | " 'go', markersize=6, label='Haematocrit')\n", 793 | "\n", 794 | "plt.plot(le.index[le.LABEL=='HEMOGLOBIN'], \n", 795 | " le.VALUENUM[le.LABEL=='HEMOGLOBIN'], \n", 796 | " 'bv', markersize=8, label='Hemoglobin')\n", 797 | "\n", 798 | "plt.title('Laboratory measurements over time from admission')\n", 799 | "plt.ylim(0,38)\n", 800 | "plt.legend()" 801 | ] 802 | }, 803 | { 804 | "cell_type": "code", 805 | "execution_count": null, 806 | "metadata": { 807 | "collapsed": true 808 | }, 809 | "outputs": [], 810 | "source": [] 811 | }, 812 | { 813 | "cell_type": "markdown", 814 | "metadata": {}, 815 | "source": [ 816 | "## Plot 5: intravenous medications" 817 | ] 818 | }, 819 | { 820 | "cell_type": "markdown", 821 | "metadata": {}, 822 | "source": [ 823 | "- Using the Pandas 'read_csv function' again, we'll now load the the ioevents dataset" 824 | ] 825 | }, 826 | { 827 | "cell_type": "code", 828 | "execution_count": null, 829 | "metadata": { 830 | "collapsed": false 831 | }, 832 | "outputs": [], 833 | "source": [ 834 | "# load ioevents\n", 835 | "ioe = pd.read_csv('data/example_ioevents.csv',index_col='HOURSSINCEADMISSION_START')" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": null, 841 | "metadata": { 842 | "collapsed": false 843 | }, 844 | "outputs": [], 845 | "source": [ 846 | "ioe.head()\n" 847 | ] 848 | }, 849 | { 850 | "cell_type": "code", 851 | "execution_count": null, 852 | "metadata": { 853 | "collapsed": false 854 | }, 855 | "outputs": [], 856 | "source": [ 857 | "plt.figure(figsize=(14, 10))\n", 858 | "\n", 859 | "plt.plot(ioe.index[ioe.LABEL=='Midazolam (Versed)'], \n", 860 | " ioe.RATE[ioe.LABEL=='Midazolam (Versed)'], \n", 861 | " 'go', markersize=6, label='Midazolam (Versed)')\n", 862 | "\n", 863 | "plt.plot(ioe.index[ioe.LABEL=='Propofol'], \n", 864 | " ioe.RATE[ioe.LABEL=='Propofol'], \n", 865 | " 'bv', markersize=8, label='Propofol')\n", 866 | "\n", 867 | "plt.plot(ioe.index[ioe.LABEL=='Fentanyl'], \n", 868 | " ioe.RATE[ioe.LABEL=='Fentanyl'], \n", 869 | " 'k+', markersize=8, label='Fentanyl')\n", 870 | "\n", 871 | "plt.title('IOevents over time from admission')\n", 872 | "plt.ylim(0,380)\n", 873 | "plt.legend()" 874 | ] 875 | }, 876 | { 877 | "cell_type": "code", 878 | "execution_count": null, 879 | "metadata": { 880 | "collapsed": true 881 | }, 882 | "outputs": [], 883 | "source": [] 884 | }, 885 | { 886 | "cell_type": "markdown", 887 | "metadata": {}, 888 | "source": [ 889 | "## Plot 6: blood products" 890 | ] 891 | }, 892 | { 893 | "cell_type": "markdown", 894 | "metadata": {}, 895 | "source": [ 896 | "- Using Pandas 'read_csv function' again, we'll now load the blood products data" 897 | ] 898 | }, 899 | { 900 | "cell_type": "code", 901 | "execution_count": null, 902 | "metadata": { 903 | "collapsed": false 904 | }, 905 | "outputs": [], 906 | "source": [ 907 | "plt.figure(figsize=(14, 10))\n", 908 | "\n", 909 | "plt.plot(ioe.index[ioe.LABEL=='OR Cryoprecipitate Intake'], \n", 910 | " ioe.VALUENUM[ioe.LABEL=='OR Cryoprecipitate Intake'], \n", 911 | " 'go', markersize=6, label='OR Cryoprecipitate Intake')\n", 912 | "\n", 913 | "plt.plot(ioe.index[ioe.LABEL=='OR Crystalloid Intake'], \n", 914 | " ioe.VALUENUM[ioe.LABEL=='OR Crystalloid Intake'], \n", 915 | " 'bv', markersize=8, label='OR Crystalloid Intake')\n", 916 | "\n", 917 | "plt.plot(ioe.index[ioe.LABEL=='OR FFP Intake'], \n", 918 | " ioe.VALUENUM[ioe.LABEL=='OR FFP Intake'], \n", 919 | " 'k+', markersize=8, label='OR FFP Intake')\n", 920 | "\n", 921 | "plt.plot(ioe.index[ioe.LABEL=='OR Packed RBC Intake'], \n", 922 | " ioe.VALUENUM[ioe.LABEL=='OR Packed RBC Intake'], \n", 923 | " 'k+', markersize=8, label='OR Packed RBC Intake')\n", 924 | "\n", 925 | "plt.plot(ioe.index[ioe.LABEL=='OR Platelet Intake'], \n", 926 | " ioe.VALUENUM[ioe.LABEL=='OR Platelet Intake'], \n", 927 | " 'k+', markersize=8, label='OR Platelet Intake')\n", 928 | "\n", 929 | "plt.title('Blood products administered over time from admission')\n", 930 | "plt.legend()" 931 | ] 932 | }, 933 | { 934 | "cell_type": "markdown", 935 | "metadata": {}, 936 | "source": [ 937 | "### Discharge summary" 938 | ] 939 | }, 940 | { 941 | "cell_type": "code", 942 | "execution_count": null, 943 | "metadata": { 944 | "collapsed": false 945 | }, 946 | "outputs": [], 947 | "source": [ 948 | "# insert discharge summary here..." 949 | ] 950 | } 951 | ], 952 | "metadata": { 953 | "kernelspec": { 954 | "display_name": "Python 2", 955 | "language": "python", 956 | "name": "python2" 957 | }, 958 | "language_info": { 959 | "codemirror_mode": { 960 | "name": "ipython", 961 | "version": 2 962 | }, 963 | "file_extension": ".py", 964 | "mimetype": "text/x-python", 965 | "name": "python", 966 | "nbconvert_exporter": "python", 967 | "pygments_lexer": "ipython2", 968 | "version": "2.7.10" 969 | } 970 | }, 971 | "nbformat": 4, 972 | "nbformat_minor": 0 973 | } 974 | -------------------------------------------------------------------------------- /temp/03-example-patient-ich.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exploring the trajectory of a single patient" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Import Python libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "We first need to import some tools for working with data in Python. \n", 22 | "- NumPy is for working with numbers\n", 23 | "- Pandas is for analysing data\n", 24 | "- MatPlotLib is for making plots\n", 25 | "- Sqlite3 to connect to the database" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import numpy as np\n", 37 | "import pandas as pd\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "import sqlite3\n", 40 | "%matplotlib inline" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### Connect to the database" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "- We can use the sqlite3 library to connect to the MIMIC database\n", 55 | "- Once the connection is established, we'll run a simple SQL query." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "# Connect to the MIMIC database\n", 67 | "conn = sqlite3.connect('data/mimicdata.sqlite')" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": { 74 | "collapsed": true 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "# Create our test query\n", 79 | "test_query = \"\"\"\n", 80 | "SELECT subject_id, hadm_id, admittime, dischtime, admission_type, diagnosis\n", 81 | "FROM admissions\n", 82 | "LIMIT 10;\n", 83 | "\"\"\"" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "collapsed": true 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "# Run the query and assign the results to a variable\n", 95 | "test = pd.read_sql_query(test_query,conn)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "# Display the first few rows\n", 107 | "test.head()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "### Load the chartevents data" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "- The chartevents table contains data charted at the patient bedside. It includes variables such as heart rate, respiratory rate, temperature, and so on.\n", 122 | "- We'll begin by loading the chartevents data for a single patient." 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "query = \"\"\"\n", 134 | "SELECT de.icustay_id\n", 135 | " , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n", 136 | " , di.label\n", 137 | " , de.value\n", 138 | " , de.valuenum\n", 139 | " , de.uom\n", 140 | "FROM chartevents de\n", 141 | "INNER join d_items di\n", 142 | "ON de.itemid = di.itemid\n", 143 | "INNER join icustays ie\n", 144 | "ON de.icustay_id = ie.icustay_id\n", 145 | "WHERE de.subject_id = 40084\n", 146 | "ORDER BY charttime;\n", 147 | "\"\"\"\n", 148 | "\n", 149 | "ce = pd.read_sql_query(query,conn)\n", 150 | "\n", 151 | "\n", 152 | "# OPTION 2: load chartevents from a CSV file\n", 153 | "# ce = pd.read_csv('data/example_chartevents.csv', index_col='HOURSSINCEADMISSION')" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "# Preview the data\n", 165 | "# Use 'head' to limit the number of rows returned\n", 166 | "ce.head()" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "### Review the patient's heart rate" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "- We can select individual columns using the column name. \n", 181 | "- For example, if we want to select just the label column, we write **```ce.LABEL```** or alternatively **```ce['LABEL']```**" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": { 188 | "collapsed": false 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "# Select a single column\n", 193 | "ce['LABEL'].head()" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "- In a similar way, we can select rows from data using indexes. \n", 201 | "- For example, to select rows where the label is equal to 'Heart Rate', we would create an index using **```[ce.LABEL=='Heart Rate']```** " 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "collapsed": false 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "# Select just the heart rate rows using an index\n", 213 | "ce[ce.LABEL=='Heart Rate'].head()" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "### Plot 1: How did the patients heart rate change over time?" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "- Using the methods described above to select our data of interest, we can create our x and y axis values to create a time series plot of heart rate." 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": { 234 | "collapsed": false 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "# Which time stamps have a corresponding heart rate measurement?\n", 239 | "print ce.index[ce.LABEL=='Heart Rate']" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": { 246 | "collapsed": false, 247 | "scrolled": true 248 | }, 249 | "outputs": [], 250 | "source": [ 251 | "# Set x equal to the times\n", 252 | "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n", 253 | "\n", 254 | "# Set y equal to the heart rates\n", 255 | "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n", 256 | "\n", 257 | "# Plot time against heart rate\n", 258 | "plt.figure(figsize=(14, 6))\n", 259 | "plt.plot(x_hr,y_hr)\n", 260 | "\n", 261 | "\n", 262 | "plt.xlabel('Time',fontsize=16)\n", 263 | "plt.ylabel('Heart rate',fontsize=16)\n", 264 | "plt.title('Heart rate over time from admission to the intensive care unit')" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": { 271 | "collapsed": false 272 | }, 273 | "outputs": [], 274 | "source": [ 275 | "ce['LABEL'].unique()" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "### Task 1\n", 283 | "\n", 284 | "* What is happening to this patient's heart rate?\n", 285 | "* Plot respiratory rate over time for the patient.\n", 286 | "* Is there anything unusual about the patient's respiratory rate?\n" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": { 293 | "collapsed": true 294 | }, 295 | "outputs": [], 296 | "source": [ 297 | "# Exercise 1 here\n", 298 | "\n" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "# What is happening to this patient's heart rate?" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "metadata": { 312 | "collapsed": false 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "# Set x equal to the times\n", 317 | "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n", 318 | "\n", 319 | "# Set y equal to the heart rates\n", 320 | "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n", 321 | "\n", 322 | "# Plot time against heart rate\n", 323 | "plt.figure(figsize=(14, 6))\n", 324 | "plt.plot(x_hr,y_hr)\n", 325 | "\n", 326 | "# Get some information regarding arctic sun\n", 327 | "plt.plot(ce.HOURS[ce.LABEL=='Arctic Sun/Alsius Set Temp'], \n", 328 | " ce.VALUENUM[ce.LABEL=='Arctic Sun/Alsius Set Temp'],\n", 329 | " 'k+--',markersize=8)\n", 330 | "plt.plot(ce.HOURS[ce.LABEL=='Arctic Sun Water Temp'], \n", 331 | " ce.VALUENUM[ce.LABEL=='Arctic Sun Water Temp'],\n", 332 | " 'r+--',markersize=8)\n", 333 | "plt.plot(ce.HOURS[ce.LABEL=='Arctic Sun/Alsius Temp #1 C'], \n", 334 | " ce.VALUENUM[ce.LABEL=='Arctic Sun/Alsius Temp #1 C'],\n", 335 | " 'b+--',markersize=8)\n", 336 | "plt.plot(ce.HOURS[ce.LABEL=='Arctic Sun/Alsius Temp #2 C'], \n", 337 | " ce.VALUENUM[ce.LABEL=='Arctic Sun/Alsius Temp #2 C'],\n", 338 | " 'g+--',markersize=8)\n", 339 | "\n", 340 | "plt.xlabel('Time',fontsize=16)\n", 341 | "plt.ylabel('Heart rate',fontsize=16)\n", 342 | "\n", 343 | "plt.xlabel('Time (hours)',fontsize=16)\n", 344 | "plt.ylabel('Heart rate / temperature',fontsize=16)\n", 345 | "plt.title('Heart rate over time')\n", 346 | "plt.ylim(0,80)\n", 347 | "plt.xlim(0,48)\n", 348 | "plt.legend()" 349 | ] 350 | }, 351 | { 352 | "cell_type": "markdown", 353 | "metadata": {}, 354 | "source": [ 355 | "### Plot 2: Did the patient's vital signs breach any alarm thresholds?" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "- Alarm systems in the intensive care unit are commonly based on high and low thresholds defined by the carer.\n", 363 | "- False alarms are often a problem and so thresholds may be set arbitrarily to reduce alarms.\n", 364 | "- As a result, alarm settings carry limited information." 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": null, 370 | "metadata": { 371 | "collapsed": false 372 | }, 373 | "outputs": [], 374 | "source": [ 375 | "plt.figure(figsize=(14, 6))\n", 376 | "\n", 377 | "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n", 378 | " ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n", 379 | " 'k+', markersize=10, linewidth=4)\n", 380 | "\n", 381 | "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - High'], \n", 382 | " ce.VALUENUM[ce.LABEL=='Resp Alarm - High'],\n", 383 | " 'm--')\n", 384 | "\n", 385 | "plt.plot(ce.HOURS[ce.LABEL=='Resp Alarm - Low'], \n", 386 | " ce.VALUENUM[ce.LABEL=='Resp Alarm - Low'],\n", 387 | " 'm--')\n", 388 | "\n", 389 | "plt.xlabel('Time',fontsize=16)\n", 390 | "plt.ylabel('Respiratory rate',fontsize=16)\n", 391 | "plt.title('Respiratory rate over time from admission, with upper and lower alarm thresholds')\n", 392 | "plt.ylim(0,55)\n" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "### Task 2\n", 400 | "\n", 401 | "- Based on the data, does it look like the alarms would have triggered for this patient?\n" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "### Plot 3: What is patient's level of consciousness?" 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "- Glasgow Coma Scale (GCS) is a measure of consciousness.\n", 416 | "- It is commonly used for monitoring patients in the intensive care unit. \n", 417 | "- It consists of three components: eye response; verbal response; motor response." 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": { 424 | "collapsed": false 425 | }, 426 | "outputs": [], 427 | "source": [ 428 | "# Display the first few rows of the GCS eye response data\n", 429 | "ce[ce.LABEL=='GCS - Eye Opening'].head()" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": null, 435 | "metadata": { 436 | "collapsed": false 437 | }, 438 | "outputs": [], 439 | "source": [ 440 | "# Prepare the size of the figure\n", 441 | "plt.figure(figsize=(18, 10))\n", 442 | "\n", 443 | "# Set x equal to the times\n", 444 | "x_hr = ce.HOURS[ce.LABEL=='Heart Rate']\n", 445 | "\n", 446 | "# Set y equal to the heart rates\n", 447 | "y_hr = ce.VALUENUM[ce.LABEL=='Heart Rate']\n", 448 | "\n", 449 | "\n", 450 | "plt.plot(x_hr,y_hr)\n", 451 | "\n", 452 | "plt.plot(ce.HOURS[ce.LABEL=='Respiratory Rate'], \n", 453 | " ce.VALUENUM[ce.LABEL=='Respiratory Rate'],\n", 454 | " 'k', markersize=6)\n", 455 | "\n", 456 | "# Add a text label to the y-axis\n", 457 | "plt.text(-5,155,'GCS - Eye Opening',fontsize=14)\n", 458 | "plt.text(-5,150,'GCS - Motor Response',fontsize=14)\n", 459 | "plt.text(-5,145,'GCS - Verbal Response',fontsize=14) \n", 460 | "\n", 461 | "# Iterate over list of GCS labels, plotting around 1 in 10 to avoid overlap\n", 462 | "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Eye Opening'].values):\n", 463 | " if np.mod(i,6)==0 and i < 65:\n", 464 | " plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Eye Opening'].values[i],155),fontsize=14)\n", 465 | " \n", 466 | "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Motor Response'].values):\n", 467 | " if np.mod(i,6)==0 and i < 65:\n", 468 | " plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Motor Response'].values[i],150),fontsize=14)\n", 469 | "\n", 470 | "for i, txt in enumerate(ce.VALUE[ce.LABEL=='GCS - Verbal Response'].values):\n", 471 | " if np.mod(i,6)==0 and i < 65:\n", 472 | " plt.annotate(txt, (ce.HOURS[ce.LABEL=='GCS - Verbal Response'].values[i],145),fontsize=14)\n", 473 | "\n", 474 | "plt.title('Vital signs and Glasgow Coma Scale over time from admission',fontsize=16)\n", 475 | "\n", 476 | "plt.xlabel('Time (hours)',fontsize=16)\n", 477 | "plt.ylabel('Heart rate or GCS',fontsize=16)\n", 478 | "plt.ylim(10,165)\n" 479 | ] 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "metadata": {}, 484 | "source": [ 485 | "### Task 3\n", 486 | "\n", 487 | "- How is the patient's consciousness changing over time?" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "# Stop here..." 495 | ] 496 | }, 497 | { 498 | "cell_type": "markdown", 499 | "metadata": {}, 500 | "source": [ 501 | "### Plot 2: What other data do we have on the patient?" 502 | ] 503 | }, 504 | { 505 | "cell_type": "markdown", 506 | "metadata": {}, 507 | "source": [ 508 | "- Using Pandas 'read_csv function' again, we'll now load the outputevents data - this table contains all information about patient outputs (urine output, drains, dialysis)." 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": null, 514 | "metadata": { 515 | "collapsed": false 516 | }, 517 | "outputs": [], 518 | "source": [ 519 | "# OPTION 1: load outputs from the patient\n", 520 | "query = \"\"\"\n", 521 | "select de.icustay_id\n", 522 | " , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n", 523 | " , di.label\n", 524 | " , de.value\n", 525 | " , de.valueuom\n", 526 | "from outputevents de \n", 527 | "inner join icustays ie\n", 528 | " on de.icustay_id = ie.icustay_id\n", 529 | "inner join d_items di\n", 530 | " on de.itemid = di.itemid\n", 531 | "where de.subject_id = 40084\n", 532 | "order by charttime;\n", 533 | "\"\"\"\n", 534 | "\n", 535 | "oe = pd.read_sql_query(query,conn)" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": null, 541 | "metadata": { 542 | "collapsed": false 543 | }, 544 | "outputs": [], 545 | "source": [ 546 | "oe.head()" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": { 553 | "collapsed": false 554 | }, 555 | "outputs": [], 556 | "source": [ 557 | "plt.figure(figsize=(14, 10))\n", 558 | "\n", 559 | "plt.figure(figsize=(14, 6))\n", 560 | "plt.title('Fluid output over time')\n", 561 | "\n", 562 | "plt.plot(oe.HOURS, \n", 563 | " oe.VALUE.cumsum()/1000, \n", 564 | " 'ro', markersize=8, label='Output volume, L')\n", 565 | "\n", 566 | "plt.xlim(0,72)\n", 567 | "plt.ylim(0,10)\n", 568 | "plt.legend()" 569 | ] 570 | }, 571 | { 572 | "cell_type": "markdown", 573 | "metadata": {}, 574 | "source": [ 575 | "To provide necessary context to this plot, it would help to include patient input data. This provides the necessary context to determine a patient's fluid balance - a key indicator in patient health." 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": null, 581 | "metadata": { 582 | "collapsed": false 583 | }, 584 | "outputs": [], 585 | "source": [ 586 | "# OPTION 1: load inputs given to the patient (usually intravenously) using the database connection\n", 587 | "query = \"\"\"\n", 588 | "select de.icustay_id\n", 589 | " , (strftime('%s',de.starttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_START\n", 590 | " , (strftime('%s',de.endtime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS_END\n", 591 | " , de.linkorderid\n", 592 | " , di.label\n", 593 | " , de.amount\n", 594 | " , de.amountuom\n", 595 | " , de.rate\n", 596 | " , de.rateuom\n", 597 | "from inputevents_mv de \n", 598 | "inner join icustays ie\n", 599 | " on de.icustay_id = ie.icustay_id\n", 600 | "inner join d_items di\n", 601 | " on de.itemid = di.itemid\n", 602 | "where de.subject_id = 40084\n", 603 | "order by endtime;\n", 604 | "\"\"\"\n", 605 | "\n", 606 | "ie = pd.read_sql_query(query,conn)\n", 607 | "\n", 608 | "# # OPTION 2: load ioevents using the CSV file with endtime as the index\n", 609 | "# ioe = pd.read_csv('inputevents.csv'\n", 610 | "# ,header=None\n", 611 | "# ,names=['subject_id','itemid','label','starttime','endtime','amount','amountuom','rate','rateuom']\n", 612 | "# ,parse_dates=True)" 613 | ] 614 | }, 615 | { 616 | "cell_type": "code", 617 | "execution_count": null, 618 | "metadata": { 619 | "collapsed": false 620 | }, 621 | "outputs": [], 622 | "source": [ 623 | "ie.head()" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": {}, 629 | "source": [ 630 | "Note that the column headers are different: we have \"HOURS_START\" and \"HOURS_END\". This is because inputs are administered over a fixed period of time." 631 | ] 632 | }, 633 | { 634 | "cell_type": "code", 635 | "execution_count": null, 636 | "metadata": { 637 | "collapsed": false 638 | }, 639 | "outputs": [], 640 | "source": [ 641 | "ie['LABEL'].unique()" 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": null, 647 | "metadata": { 648 | "collapsed": false 649 | }, 650 | "outputs": [], 651 | "source": [ 652 | "plt.figure(figsize=(14, 10))\n", 653 | "\n", 654 | "# Plot the cumulative input against the cumulative output\n", 655 | "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n", 656 | " ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n", 657 | " 'go', markersize=8, label='Intake volume, L')\n", 658 | "\n", 659 | "plt.plot(oe.HOURS, \n", 660 | " oe.VALUE.cumsum()/1000, \n", 661 | " 'ro', markersize=8, label='Output volume, L')\n", 662 | "\n", 663 | "plt.title('Fluid balance over time',fontsize=16)\n", 664 | "plt.xlabel('Hours',fontsize=16)\n", 665 | "plt.ylabel('Volume (litres)',fontsize=16)\n", 666 | "# plt.ylim(0,38)\n", 667 | "plt.legend()" 668 | ] 669 | }, 670 | { 671 | "cell_type": "markdown", 672 | "metadata": { 673 | "collapsed": true 674 | }, 675 | "source": [ 676 | "As the plot shows, the patient's intake tends to be above their output (as one would expect!) - but there are periods where they are almost one to one. One of the biggest challenges of working with ICU data is that context is everything - let's look at a treatment (lasix) that we know will affect this graph." 677 | ] 678 | }, 679 | { 680 | "cell_type": "code", 681 | "execution_count": null, 682 | "metadata": { 683 | "collapsed": false 684 | }, 685 | "outputs": [], 686 | "source": [ 687 | "plt.figure(figsize=(14, 10))\n", 688 | "\n", 689 | "# Plot the cumulative input against the cumulative output\n", 690 | "plt.plot(ie.HOURS_END[ie.AMOUNTUOM=='mL'], \n", 691 | " ie.AMOUNT[ie.AMOUNTUOM=='mL'].cumsum()/1000, \n", 692 | " 'go', markersize=8, label='Intake volume, L')\n", 693 | "\n", 694 | "plt.plot(oe.HOURS, \n", 695 | " oe.VALUE.cumsum()/1000, \n", 696 | " 'ro', markersize=8, label='Output volume, L')\n", 697 | "\n", 698 | "# example on getting two columns from a dataframe: ie[['HOURS_START','HOURS_END']].head()\n", 699 | "\n", 700 | "for i, idx in enumerate(ie.index[ie.LABEL=='Furosemide (Lasix)']):\n", 701 | " plt.plot([ie.HOURS_START[ie.LABEL=='Furosemide (Lasix)'][idx],\n", 702 | " ie.HOURS_END[ie.LABEL=='Furosemide (Lasix)'][idx]],\n", 703 | " [ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx],\n", 704 | " ie.RATE[ie.LABEL=='Furosemide (Lasix)'][idx]],\n", 705 | " 'b-',linewidth=4)\n", 706 | " \n", 707 | "\n", 708 | "plt.title('Fluid balance over time',fontsize=16)\n", 709 | "plt.xlabel('Hours',fontsize=16)\n", 710 | "plt.ylabel('Volume (litres)',fontsize=16)\n", 711 | "# plt.ylim(0,38)\n", 712 | "plt.legend()\n" 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": null, 718 | "metadata": { 719 | "collapsed": false 720 | }, 721 | "outputs": [], 722 | "source": [ 723 | "ie['LABEL'].unique()" 724 | ] 725 | }, 726 | { 727 | "cell_type": "markdown", 728 | "metadata": {}, 729 | "source": [ 730 | "### Exercise 2\n", 731 | "\n", 732 | "* Plot the alarms for the mean arterial pressure ('```Arterial Blood Pressure mean```')\n", 733 | "* HINT: you can use ```ce.LABEL.unique()``` to find a list of variable names\n", 734 | "* Were the alarm thresholds breached?" 735 | ] 736 | }, 737 | { 738 | "cell_type": "code", 739 | "execution_count": null, 740 | "metadata": { 741 | "collapsed": false 742 | }, 743 | "outputs": [], 744 | "source": [ 745 | "# Exercise 2 here\n", 746 | "\n", 747 | "\n" 748 | ] 749 | }, 750 | { 751 | "cell_type": "markdown", 752 | "metadata": {}, 753 | "source": [ 754 | "### Plot 3: Were the patient's other vital signs stable?" 755 | ] 756 | }, 757 | { 758 | "cell_type": "code", 759 | "execution_count": null, 760 | "metadata": { 761 | "collapsed": false 762 | }, 763 | "outputs": [], 764 | "source": [ 765 | "plt.figure(figsize=(14, 10))\n", 766 | "\n", 767 | "plt.plot(ce.index[ce.LABEL=='Heart Rate'], \n", 768 | " ce.VALUENUM[ce.LABEL=='Heart Rate'],\n", 769 | " 'rx', markersize=8, label='HR')\n", 770 | "\n", 771 | "plt.plot(ce.index[ce.LABEL=='O2 saturation pulseoxymetry'], \n", 772 | " ce.VALUENUM[ce.LABEL=='O2 saturation pulseoxymetry'], \n", 773 | " 'g.', markersize=8, label='O2')\n", 774 | "\n", 775 | "plt.plot(ce.index[ce.LABEL=='Arterial Blood Pressure mean'], \n", 776 | " ce.VALUENUM[ce.LABEL=='Arterial Blood Pressure mean'], \n", 777 | " 'bv', markersize=8, label='MAP')\n", 778 | "\n", 779 | "plt.plot(ce.index[ce.LABEL=='Respiratory Rate'], \n", 780 | " ce.VALUENUM[ce.LABEL=='Respiratory Rate'], \n", 781 | " 'k+', markersize=8, label='RR')\n", 782 | "\n", 783 | "plt.title('Vital signs over time from admission')\n", 784 | "plt.ylim(0,130)\n", 785 | "plt.legend()" 786 | ] 787 | }, 788 | { 789 | "cell_type": "markdown", 790 | "metadata": {}, 791 | "source": [ 792 | "### Plot 5: Laboratory measurements" 793 | ] 794 | }, 795 | { 796 | "cell_type": "markdown", 797 | "metadata": {}, 798 | "source": [ 799 | "Using Pandas 'read_csv function' again, we'll now load the labevents data.\n", 800 | "This data corresponds to measurements made in a laboratory - usually on a sample of patient blood. " 801 | ] 802 | }, 803 | { 804 | "cell_type": "code", 805 | "execution_count": null, 806 | "metadata": { 807 | "collapsed": false 808 | }, 809 | "outputs": [], 810 | "source": [ 811 | "# OPTION 1: load labevents data using the database connection\n", 812 | "query = \"\"\"\n", 813 | "SELECT de.subject_id\n", 814 | " , de.charttime\n", 815 | " , di.label, de.value, de.valuenum\n", 816 | " , de.uom\n", 817 | "FROM labevents de\n", 818 | "INNER JOIN d_labitems di\n", 819 | " ON de.itemid = di.itemid\n", 820 | "where de.subject_id = 40084\n", 821 | "\"\"\"\n", 822 | "\n", 823 | "le = pd.read_sql_query(query,conn)\n", 824 | "\n", 825 | "# OPTION 2: load labevents from the CSV file\n", 826 | "# le = pd.read_csv('data/example_labevents.csv', index_col='HOURSSINCEADMISSION')" 827 | ] 828 | }, 829 | { 830 | "cell_type": "code", 831 | "execution_count": null, 832 | "metadata": { 833 | "collapsed": false 834 | }, 835 | "outputs": [], 836 | "source": [ 837 | "# preview the labevents data\n", 838 | "le.head()" 839 | ] 840 | }, 841 | { 842 | "cell_type": "code", 843 | "execution_count": null, 844 | "metadata": { 845 | "collapsed": false 846 | }, 847 | "outputs": [], 848 | "source": [ 849 | "# preview the ioevents data\n", 850 | "le[le.LABEL=='HEMOGLOBIN']" 851 | ] 852 | }, 853 | { 854 | "cell_type": "code", 855 | "execution_count": null, 856 | "metadata": { 857 | "collapsed": false 858 | }, 859 | "outputs": [], 860 | "source": [ 861 | "plt.figure(figsize=(14, 10))\n", 862 | "\n", 863 | "plt.plot(le.index[le.LABEL=='HEMATOCRIT'], \n", 864 | " le.VALUENUM[le.LABEL=='HEMATOCRIT'], \n", 865 | " 'go', markersize=6, label='Haematocrit')\n", 866 | "\n", 867 | "plt.plot(le.index[le.LABEL=='HEMOGLOBIN'], \n", 868 | " le.VALUENUM[le.LABEL=='HEMOGLOBIN'], \n", 869 | " 'bv', markersize=8, label='Hemoglobin')\n", 870 | "\n", 871 | "plt.title('Laboratory measurements over time from admission')\n", 872 | "plt.ylim(0,38)\n", 873 | "plt.legend()" 874 | ] 875 | }, 876 | { 877 | "cell_type": "markdown", 878 | "metadata": {}, 879 | "source": [ 880 | "## Plot 5: intravenous medications" 881 | ] 882 | }, 883 | { 884 | "cell_type": "markdown", 885 | "metadata": {}, 886 | "source": [ 887 | "- Using the Pandas 'read_csv function' again, we'll now load the the ioevents dataset" 888 | ] 889 | }, 890 | { 891 | "cell_type": "code", 892 | "execution_count": null, 893 | "metadata": { 894 | "collapsed": false 895 | }, 896 | "outputs": [], 897 | "source": [ 898 | "# load ioevents\n", 899 | "ioe = pd.read_csv('data/example_ioevents.csv',index_col='HOURSSINCEADMISSION_START')" 900 | ] 901 | }, 902 | { 903 | "cell_type": "code", 904 | "execution_count": null, 905 | "metadata": { 906 | "collapsed": false 907 | }, 908 | "outputs": [], 909 | "source": [ 910 | "ioe.head()\n" 911 | ] 912 | }, 913 | { 914 | "cell_type": "code", 915 | "execution_count": null, 916 | "metadata": { 917 | "collapsed": false 918 | }, 919 | "outputs": [], 920 | "source": [ 921 | "plt.figure(figsize=(14, 10))\n", 922 | "\n", 923 | "plt.plot(ie.CHARTTIME[ie.LABEL=='Midazolam (Versed)'], \n", 924 | " ie.RATE[ie.LABEL=='Midazolam (Versed)'], \n", 925 | " 'go', markersize=6, label='Midazolam (Versed)')\n", 926 | "\n", 927 | "plt.plot(ie.CHARTTIME[ie.LABEL=='Propofol'], \n", 928 | " ie.RATE[ie.LABEL=='Propofol'], \n", 929 | " 'bv', markersize=8, label='Propofol')\n", 930 | "\n", 931 | "plt.plot(ie.CHARTTIME[ie.LABEL=='Fentanyl'], \n", 932 | " ie.RATE[ie.LABEL=='Fentanyl'], \n", 933 | " 'k+', markersize=8, label='Fentanyl')\n", 934 | "\n", 935 | "plt.title('Inputs over time from admission')\n", 936 | "plt.ylim(0,380)\n", 937 | "plt.legend()" 938 | ] 939 | } 940 | ], 941 | "metadata": { 942 | "kernelspec": { 943 | "display_name": "Python 2", 944 | "language": "python", 945 | "name": "python2" 946 | }, 947 | "language_info": { 948 | "codemirror_mode": { 949 | "name": "ipython", 950 | "version": 2 951 | }, 952 | "file_extension": ".py", 953 | "mimetype": "text/x-python", 954 | "name": "python", 955 | "nbconvert_exporter": "python", 956 | "pygments_lexer": "ipython2", 957 | "version": "2.7.10" 958 | } 959 | }, 960 | "nbformat": 4, 961 | "nbformat_minor": 0 962 | } 963 | -------------------------------------------------------------------------------- /temp/04-example-multiplepatients.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import sqlite3\n", 15 | "%matplotlib inline\n", 16 | "\n", 17 | "conn = sqlite3.connect('data/mimicdata.sqlite')\n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": { 24 | "collapsed": false, 25 | "scrolled": true 26 | }, 27 | "outputs": [], 28 | "source": [ 29 | "\n", 30 | "\n", 31 | "data = []\n", 32 | "for subject_id in [40084, 40080, 40004]:\n", 33 | " \n", 34 | " query = \"\"\"\n", 35 | " SELECT de.icustay_id\n", 36 | " , (strftime('%s',de.charttime)-strftime('%s',ie.intime))/60.0/60.0 as HOURS\n", 37 | " , di.label\n", 38 | " , de.value\n", 39 | " , de.valuenum\n", 40 | " , de.uom\n", 41 | " FROM chartevents de\n", 42 | " INNER join d_items di\n", 43 | " ON de.itemid = di.itemid\n", 44 | " INNER join icustays ie\n", 45 | " ON de.icustay_id = ie.icustay_id\n", 46 | " WHERE de.subject_id = \"\"\" + str(subject_id) + \"\"\"\n", 47 | " ORDER BY charttime;\n", 48 | " \"\"\"\n", 49 | "\n", 50 | " ce = pd.read_sql_query(query,conn)\n", 51 | "\n", 52 | " valueName = \"Respiratory Rate\";\n", 53 | " \n", 54 | " # Set x equal to the times\n", 55 | " x_hr = ce.HOURS[ce.LABEL==valueName]\n", 56 | "\n", 57 | " # Set y equal to the heart rates\n", 58 | " y_hr = ce.VALUENUM[ce.LABEL==valueName]\n", 59 | "\n", 60 | " # Plot time against heart rate\n", 61 | " plt.figure(figsize=(14, 6))\n", 62 | " data.append([x_hr,y_hr]);\n", 63 | " \n", 64 | "for patient in data:\n", 65 | " plt.plot(patient[0], patient[1]);\n", 66 | "\n", 67 | "plt.xlabel('Time',fontsize=16)\n", 68 | "plt.ylabel(valueName,fontsize=16)\n", 69 | "plt.title(valueName + ' over time from admission to the intensive care unit')\n", 70 | "\n" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [] 81 | } 82 | ], 83 | "metadata": { 84 | "kernelspec": { 85 | "display_name": "Python 2", 86 | "language": "python", 87 | "name": "python2" 88 | }, 89 | "language_info": { 90 | "codemirror_mode": { 91 | "name": "ipython", 92 | "version": 2 93 | }, 94 | "file_extension": ".py", 95 | "mimetype": "text/x-python", 96 | "name": "python", 97 | "nbconvert_exporter": "python", 98 | "pygments_lexer": "ipython2", 99 | "version": "2.7.10" 100 | } 101 | }, 102 | "nbformat": 4, 103 | "nbformat_minor": 0 104 | } 105 | -------------------------------------------------------------------------------- /temp/06-example-patient-psql.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Exploring the trajectory of a single patient" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Import Python libraries" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "We first need to import some tools for working with data in Python. \n", 22 | "- NumPy is for working with numbers\n", 23 | "- Pandas is for analysing data\n", 24 | "- MatPlotLib is for making plots\n", 25 | "- Sqlite3 to connect to the database" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import numpy as np\n", 37 | "import pandas as pd\n", 38 | "import matplotlib.pyplot as plt\n", 39 | "import psycopg2\n", 40 | "%matplotlib inline" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### Connect to the database" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "- We can use the sqlite3 library to connect to the MIMIC database\n", 55 | "- Once the connection is established, we'll run a simple SQL query." 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "# Connect to the MIMIC database\n", 67 | "try: \n", 68 | " conn = psycopg2.connect(\"dbname='mimic' user='tompollard' host='localhost' password='postgres'\")\n", 69 | "except: \n", 70 | " print('meh')" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "# Create our test query\n", 82 | "test_query = \"\"\"\n", 83 | "SELECT subject_id, hadm_id, admittime, dischtime, diagnosis, admission_type, deathtime, discharge_location\n", 84 | "FROM mimiciii.admissions;\n", 85 | "\"\"\"" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": { 92 | "collapsed": false 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "# Run the query and assign the results to a variable\n", 97 | "test = pd.read_sql_query(test_query,conn)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "# Display the first few rows\n", 109 | "test" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "collapsed": true 117 | }, 118 | "outputs": [], 119 | "source": [] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "### Load the chartevents data" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "- The chartevents table contains data charted at the patient bedside. It includes variables such as heart rate, respiratory rate, temperature, and so on.\n", 133 | "- We'll begin by loading the chartevents data for a single patient." 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": false 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "query = \"\"\"\n", 145 | "SELECT de.icustay_id\n", 146 | " , EXTRACT(EPOCH FROM de.charttime-ie.intime)/3600/24 as HOURS\n", 147 | " , di.label\n", 148 | " , de.value\n", 149 | " , de.valuenum\n", 150 | " , de.uom\n", 151 | "FROM mimiciii.chartevents de\n", 152 | "INNER join mimiciii.d_items di\n", 153 | "ON de.itemid = di.itemid\n", 154 | "INNER join mimiciii.icustays ie\n", 155 | "ON de.icustay_id = ie.icustay_id\n", 156 | "WHERE de.icustay_id = 236942\n", 157 | "ORDER BY charttime;\n", 158 | "\"\"\"\n", 159 | "\n", 160 | "ce = pd.read_sql_query(query,conn)\n", 161 | "\n", 162 | "\n", 163 | "# OPTION 2: load chartevents from a CSV file\n", 164 | "# ce = pd.read_csv('data/example_chartevents.csv', index_col='HOURSSINCEADMISSION')" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "# Preview the data\n", 176 | "# Use 'head' to limit the number of rows returned\n", 177 | "ce.head()" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "### Review the patient's heart rate" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "- We can select individual columns using the column name. \n", 192 | "- For example, if we want to select just the label column, we write **```ce.LABEL```** or alternatively **```ce['LABEL']```**" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": { 199 | "collapsed": false 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "# Select a single column\n", 204 | "ce['label'].head()" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "- In a similar way, we can select rows from data using indexes. \n", 212 | "- For example, to select rows where the label is equal to 'Heart Rate', we would create an index using **```[ce.LABEL=='Heart Rate']```** " 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "collapsed": false 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "# Select just the heart rate rows using an index\n", 224 | "ce.label.unique()" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "### Plot 1: How did the patients heart rate change over time?" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "- Using the methods described above to select our data of interest, we can create our x and y axis values to create a time series plot of heart rate." 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "collapsed": false 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "# Which time stamps have a corresponding heart rate measurement?\n", 250 | "print ce.index[ce.label=='Heart Rate']" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "collapsed": false, 258 | "scrolled": true 259 | }, 260 | "outputs": [], 261 | "source": [ 262 | "# Set x equal to the times\n", 263 | "x_hr = ce.hours[ce.label=='Heart Rate']\n", 264 | "\n", 265 | "# Set y equal to the heart rates\n", 266 | "y_hr = ce.valuenum[ce.label=='Heart Rate']\n", 267 | "\n", 268 | "# Plot time against heart rate\n", 269 | "plt.figure(figsize=(14, 6))\n", 270 | "plt.plot(x_hr,y_hr)\n", 271 | "\n", 272 | "\n", 273 | "plt.xlabel('Time since admission, hours',fontsize=16)\n", 274 | "plt.ylabel('Heart rate',fontsize=16)\n", 275 | "plt.title('Heart rate over time from admission to the intensive care unit',fontsize=16)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "metadata": {}, 281 | "source": [ 282 | "### Task 1\n", 283 | "\n", 284 | "* What is happening to this patient's heart rate?\n", 285 | "* Plot respiratory rate over time for the patient.\n", 286 | "* Is there anything unusual about the patient's respiratory rate?\n" 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "execution_count": null, 292 | "metadata": { 293 | "collapsed": true 294 | }, 295 | "outputs": [], 296 | "source": [ 297 | "# Exercise 1 here\n", 298 | "\n" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "### Plot 2: Did the patient's vital signs breach any alarm thresholds?" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": {}, 311 | "source": [ 312 | "- Alarm systems in the intensive care unit are commonly based on high and low thresholds defined by the carer.\n", 313 | "- False alarms are often a problem and so thresholds may be set arbitrarily to reduce alarms.\n", 314 | "- As a result, alarm settings carry limited information." 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": null, 320 | "metadata": { 321 | "collapsed": false 322 | }, 323 | "outputs": [], 324 | "source": [ 325 | "plt.figure(figsize=(14, 6))\n", 326 | "\n", 327 | "plt.plot(ce.hours[ce.label=='Respiratory Rate'], \n", 328 | " ce.valuenum[ce.label=='Respiratory Rate'],\n", 329 | " 'k+-', markersize=2, linewidth=1)\n", 330 | "\n", 331 | "plt.plot(ce.hours[ce.label=='Resp Alarm - High'], \n", 332 | " ce.valuenum[ce.label=='Resp Alarm - High'],\n", 333 | " 'm--')\n", 334 | "\n", 335 | "plt.plot(ce.hours[ce.label=='Resp Alarm - Low'], \n", 336 | " ce.valuenum[ce.label=='Resp Alarm - Low'],\n", 337 | " 'm--')\n", 338 | "\n", 339 | "plt.xlabel('Time',fontsize=16)\n", 340 | "plt.ylabel('Respiratory rate',fontsize=16)\n", 341 | "plt.title('Respiratory rate since admission, with upper and lower alarm thresholds',fontsize=16)\n", 342 | "plt.ylim(0,55)\n" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": {}, 348 | "source": [ 349 | "### Task 2\n", 350 | "\n", 351 | "- Based on the data, does it look like the alarms would have triggered for this patient?\n" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "### Plot 3: What is patient's level of consciousness?" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "- Glasgow Coma Scale (GCS) is a measure of consciousness.\n", 366 | "- It is commonly used for monitoring patients in the intensive care unit. \n", 367 | "- It consists of three components: eye response; verbal response; motor response." 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": { 374 | "collapsed": false 375 | }, 376 | "outputs": [], 377 | "source": [ 378 | "# Display the first few rows of the GCS eye response data\n", 379 | "ce[ce.label=='GCS - Eye Opening'].head()" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": { 386 | "collapsed": false 387 | }, 388 | "outputs": [], 389 | "source": [ 390 | "# Prepare the size of the figure\n", 391 | "plt.figure(figsize=(18, 12))\n", 392 | "\n", 393 | "# Set x equal to the times\n", 394 | "x_hr = ce.hours[ce.label=='Heart Rate']\n", 395 | "\n", 396 | "# Set y equal to the heart rates\n", 397 | "y_hr = ce.valuenum[ce.label=='Heart Rate']\n", 398 | "\n", 399 | "plt.plot(x_hr,y_hr,'r-',label='Heart rate')\n", 400 | "\n", 401 | "plt.plot(ce.hours[ce.label=='Heart rate Alarm - High'], \n", 402 | " ce.valuenum[ce.label=='Heart rate Alarm - High'],\n", 403 | " 'm--')\n", 404 | "\n", 405 | "plt.plot(ce.hours[ce.label=='Heart rate Alarm - Low'], \n", 406 | " ce.valuenum[ce.label=='Heart rate Alarm - Low'],\n", 407 | " 'm--', label='Alarm threshold')\n", 408 | "\n", 409 | "plt.plot(ce.hours[ce.label=='Respiratory Rate'], \n", 410 | " ce.valuenum[ce.label=='Respiratory Rate'],\n", 411 | " 'b-', markersize=6,label='Respiratory rate')\n", 412 | "\n", 413 | "plt.plot(ce.hours[ce.label=='Resp Alarm - High'], \n", 414 | " ce.valuenum[ce.label=='Resp Alarm - High'],\n", 415 | " 'm--')\n", 416 | "\n", 417 | "plt.plot(ce.hours[ce.label=='Resp Alarm - Low'], \n", 418 | " ce.valuenum[ce.label=='Resp Alarm - Low'],\n", 419 | " 'm--')\n", 420 | "\n", 421 | "# Add a text label to the y-axis\n", 422 | "plt.text(-3,155,'GCS - Eye Opening',fontsize=14)\n", 423 | "plt.text(-3,150,'GCS - Motor Response',fontsize=14)\n", 424 | "plt.text(-3,145,'GCS - Verbal Response',fontsize=14) \n", 425 | "\n", 426 | "# Iterate over list of GCS labels, plotting around 1 in 10 to avoid overlap\n", 427 | "for i, txt in enumerate(ce.value[ce.label=='GCS - Eye Opening'].values):\n", 428 | " if np.mod(i,14)==0 and i < 75:\n", 429 | " plt.annotate(txt, (ce.hours[ce.label=='GCS - Eye Opening'].values[i],155),fontsize=14)\n", 430 | " \n", 431 | "for i, txt in enumerate(ce.value[ce.label=='GCS - Motor Response'].values):\n", 432 | " if np.mod(i,14)==0 and i < 75:\n", 433 | " plt.annotate(txt, (ce.hours[ce.label=='GCS - Motor Response'].values[i],150),fontsize=14)\n", 434 | "\n", 435 | "for i, txt in enumerate(ce.value[ce.label=='GCS - Verbal Response'].values):\n", 436 | " if np.mod(i,14)==0 and i < 75:\n", 437 | " plt.annotate(txt, (ce.hours[ce.label=='GCS - Verbal Response'].values[i],145),fontsize=14)\n", 438 | "\n", 439 | "plt.title('Vital signs and Glasgow Coma Scale since admission',fontsize=18)\n", 440 | "\n", 441 | "plt.xlabel('Time (hours)',fontsize=18)\n", 442 | "plt.ylabel('Vital signs',fontsize=18)\n", 443 | "plt.legend(loc=1)\n", 444 | "plt.ylim(10,180)\n" 445 | ] 446 | }, 447 | { 448 | "cell_type": "markdown", 449 | "metadata": {}, 450 | "source": [ 451 | "### Task 3\n", 452 | "\n", 453 | "- How is the patient's consciousness changing over time?" 454 | ] 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": {}, 459 | "source": [ 460 | "# Stop here..." 461 | ] 462 | }, 463 | { 464 | "cell_type": "markdown", 465 | "metadata": {}, 466 | "source": [ 467 | "### Plot 4: What other data do we have on the patient?" 468 | ] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "- Using Pandas 'read_csv function' again, we'll now load the outputevents data - this table contains all information about patient outputs (urine output, drains, dialysis)." 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": null, 480 | "metadata": { 481 | "collapsed": false 482 | }, 483 | "outputs": [], 484 | "source": [ 485 | "# OPTION 1: load outputs from the patient\n", 486 | "query = \"\"\"\n", 487 | "select de.icustay_id\n", 488 | " , EXTRACT(EPOCH FROM de.charttime-ie.intime)/3600 as HOURS\n", 489 | " , di.label\n", 490 | " , de.value\n", 491 | " , de.valueuom\n", 492 | "from mimiciii.outputevents de \n", 493 | "inner join mimiciii.icustays ie\n", 494 | " on de.icustay_id = ie.icustay_id\n", 495 | "inner join mimiciii.d_items di\n", 496 | " on de.itemid = di.itemid\n", 497 | "where de.subject_id = 49205\n", 498 | "order by charttime;\n", 499 | "\"\"\"\n", 500 | "\n", 501 | "oe = pd.read_sql_query(query,conn)" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": null, 507 | "metadata": { 508 | "collapsed": false 509 | }, 510 | "outputs": [], 511 | "source": [ 512 | "oe.head()" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": { 519 | "collapsed": false 520 | }, 521 | "outputs": [], 522 | "source": [ 523 | "plt.figure(figsize=(14, 10))\n", 524 | "\n", 525 | "plt.figure(figsize=(14, 6))\n", 526 | "plt.title('Fluid output over time')\n", 527 | "\n", 528 | "plt.plot(oe.hours, \n", 529 | " oe.value.cumsum()/1000, \n", 530 | " 'ro', markersize=8, label='Output volume, L')\n", 531 | "\n", 532 | "# plt.xlim(0,72)\n", 533 | "# plt.ylim(0,10)\n", 534 | "plt.legend()" 535 | ] 536 | }, 537 | { 538 | "cell_type": "markdown", 539 | "metadata": {}, 540 | "source": [ 541 | "To provide necessary context to this plot, it would help to include patient input data. This provides the necessary context to determine a patient's fluid balance - a key indicator in patient health." 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": null, 547 | "metadata": { 548 | "collapsed": false 549 | }, 550 | "outputs": [], 551 | "source": [ 552 | "# OPTION 1: load inputs given to the patient (usually intravenously) using the database connection\n", 553 | "query = \"\"\"\n", 554 | "select de.icustay_id\n", 555 | " , EXTRACT(EPOCH FROM de.starttime-ie.intime)/3600 as HOURS_START\n", 556 | " , EXTRACT(EPOCH FROM de.endtime-ie.intime)/3600 as HOURS_END\n", 557 | " , de.linkorderid\n", 558 | " , di.label\n", 559 | " , de.amount\n", 560 | " , de.amountuom\n", 561 | " , de.rate\n", 562 | " , de.rateuom\n", 563 | "from mimiciii.inputevents_mv de \n", 564 | "inner join mimiciii.icustays ie\n", 565 | " on de.icustay_id = ie.icustay_id\n", 566 | "inner join mimiciii.d_items di\n", 567 | " on de.itemid = di.itemid\n", 568 | "where de.subject_id = 49205\n", 569 | "order by endtime;\n", 570 | "\"\"\"\n", 571 | "\n", 572 | "ie = pd.read_sql_query(query,conn)\n", 573 | "\n", 574 | "# # OPTION 2: load ioevents using the CSV file with endtime as the index\n", 575 | "# ioe = pd.read_csv('inputevents.csv'\n", 576 | "# ,header=None\n", 577 | "# ,names=['subject_id','itemid','label','starttime','endtime','amount','amountuom','rate','rateuom']\n", 578 | "# ,parse_dates=True)" 579 | ] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "execution_count": null, 584 | "metadata": { 585 | "collapsed": false 586 | }, 587 | "outputs": [], 588 | "source": [ 589 | "ie.head()" 590 | ] 591 | }, 592 | { 593 | "cell_type": "markdown", 594 | "metadata": {}, 595 | "source": [ 596 | "Note that the column headers are different: we have \"HOURS_START\" and \"HOURS_END\". This is because inputs are administered over a fixed period of time." 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": null, 602 | "metadata": { 603 | "collapsed": false 604 | }, 605 | "outputs": [], 606 | "source": [ 607 | "ie['label'].unique()" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": null, 613 | "metadata": { 614 | "collapsed": false 615 | }, 616 | "outputs": [], 617 | "source": [ 618 | "plt.figure(figsize=(14, 10))\n", 619 | "\n", 620 | "# Plot the cumulative input against the cumulative output\n", 621 | "plt.plot(ie.hours_end[ie.amountuom=='mL'], \n", 622 | " ie.amount[ie.amountuom=='mL'].cumsum()/1000, \n", 623 | " 'go', markersize=8, label='Intake volume, L')\n", 624 | "\n", 625 | "plt.plot(oe.hours, \n", 626 | " oe.value.cumsum()/1000, \n", 627 | " 'ro', markersize=8, label='Output volume, L')\n", 628 | "\n", 629 | "plt.title('Fluid balance over time',fontsize=16)\n", 630 | "plt.xlabel('Hours',fontsize=16)\n", 631 | "plt.ylabel('Volume (litres)',fontsize=16)\n", 632 | "# plt.ylim(0,38)\n", 633 | "plt.legend()" 634 | ] 635 | }, 636 | { 637 | "cell_type": "markdown", 638 | "metadata": { 639 | "collapsed": true 640 | }, 641 | "source": [ 642 | "As the plot shows, the patient's intake tends to be above their output (as one would expect!) - but there are periods where they are almost one to one. One of the biggest challenges of working with ICU data is that context is everything - let's look at a treatment (lasix) that we know will affect this graph." 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": null, 648 | "metadata": { 649 | "collapsed": false 650 | }, 651 | "outputs": [], 652 | "source": [ 653 | "plt.figure(figsize=(14, 10))\n", 654 | "\n", 655 | "# Plot the cumulative input against the cumulative output\n", 656 | "plt.plot(ie.hours_end[ie.amountuom=='mL'], \n", 657 | " ie.amount[ie.amountuom=='mL'].cumsum()/1000, \n", 658 | " 'go', markersize=8, label='Intake volume, L')\n", 659 | "\n", 660 | "plt.plot(oe.hours, \n", 661 | " oe.value.cumsum()/1000, \n", 662 | " 'ro', markersize=8, label='Output volume, L')\n", 663 | "\n", 664 | "# example on getting two columns from a dataframe: ie[['HOURS_START','HOURS_END']].head()\n", 665 | "\n", 666 | "for i, idx in enumerate(ie.index[ie.label=='Furosemide (Lasix)']):\n", 667 | " plt.plot([ie.hours_start[ie.label=='Furosemide (Lasix)'][idx],\n", 668 | " ie.hours_end[ie.label=='Furosemide (Lasix)'][idx]],\n", 669 | " [ie.rate[ie.label=='Furosemide (Lasix)'][idx],\n", 670 | " ie.rate[ie.label=='Furosemide (Lasix)'][idx]],\n", 671 | " 'b-',linewidth=4)\n", 672 | " \n", 673 | "\n", 674 | "plt.title('Fluid balance over time',fontsize=16)\n", 675 | "plt.xlabel('Hours',fontsize=16)\n", 676 | "plt.ylabel('Volume (litres)',fontsize=16)\n", 677 | "# plt.ylim(0,38)\n", 678 | "plt.legend()\n" 679 | ] 680 | }, 681 | { 682 | "cell_type": "code", 683 | "execution_count": null, 684 | "metadata": { 685 | "collapsed": false 686 | }, 687 | "outputs": [], 688 | "source": [ 689 | "ie['label'].unique()" 690 | ] 691 | }, 692 | { 693 | "cell_type": "markdown", 694 | "metadata": {}, 695 | "source": [ 696 | "### Exercise 2\n", 697 | "\n", 698 | "* Plot the alarms for the mean arterial pressure ('```Arterial Blood Pressure mean```')\n", 699 | "* HINT: you can use ```ce.LABEL.unique()``` to find a list of variable names\n", 700 | "* Were the alarm thresholds breached?" 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": null, 706 | "metadata": { 707 | "collapsed": false 708 | }, 709 | "outputs": [], 710 | "source": [ 711 | "# Exercise 2 here\n", 712 | "\n", 713 | "\n" 714 | ] 715 | }, 716 | { 717 | "cell_type": "markdown", 718 | "metadata": {}, 719 | "source": [ 720 | "### Plot 3: Were the patient's other vital signs stable?" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": null, 726 | "metadata": { 727 | "collapsed": false 728 | }, 729 | "outputs": [], 730 | "source": [ 731 | "plt.figure(figsize=(14, 10))\n", 732 | "\n", 733 | "plt.plot(ce.index[ce.label=='Heart Rate'], \n", 734 | " ce.valuenum[ce.label=='Heart Rate'],\n", 735 | " 'rx', markersize=8, label='HR')\n", 736 | "\n", 737 | "plt.plot(ce.index[ce.label=='O2 saturation pulseoxymetry'], \n", 738 | " ce.valuenum[ce.label=='O2 saturation pulseoxymetry'], \n", 739 | " 'g.', markersize=8, label='O2')\n", 740 | "\n", 741 | "plt.plot(ce.index[ce.label=='Arterial Blood Pressure mean'], \n", 742 | " ce.valuenum[ce.label=='Arterial Blood Pressure mean'], \n", 743 | " 'bv', markersize=8, label='MAP')\n", 744 | "\n", 745 | "plt.plot(ce.index[ce.label=='Respiratory Rate'], \n", 746 | " ce.valuenum[ce.label=='Respiratory Rate'], \n", 747 | " 'k+', markersize=8, label='RR')\n", 748 | "\n", 749 | "plt.title('Vital signs over time from admission')\n", 750 | "plt.ylim(0,130)\n", 751 | "plt.legend()" 752 | ] 753 | }, 754 | { 755 | "cell_type": "markdown", 756 | "metadata": {}, 757 | "source": [ 758 | "### Plot 5: Laboratory measurements" 759 | ] 760 | }, 761 | { 762 | "cell_type": "markdown", 763 | "metadata": {}, 764 | "source": [ 765 | "Using Pandas 'read_csv function' again, we'll now load the labevents data.\n", 766 | "This data corresponds to measurements made in a laboratory - usually on a sample of patient blood. " 767 | ] 768 | }, 769 | { 770 | "cell_type": "code", 771 | "execution_count": null, 772 | "metadata": { 773 | "collapsed": false 774 | }, 775 | "outputs": [], 776 | "source": [ 777 | "# OPTION 1: load labevents data using the database connection\n", 778 | "query = \"\"\"\n", 779 | "SELECT de.subject_id\n", 780 | " , de.charttime\n", 781 | " , di.label, de.value, de.valuenum\n", 782 | " , de.uom\n", 783 | "FROM mimiciii.labevents de\n", 784 | "INNER JOIN mimiciii.d_labitems di\n", 785 | " ON de.itemid = di.itemid\n", 786 | "where de.subject_id = 49205\n", 787 | "\"\"\"\n", 788 | "\n", 789 | "le = pd.read_sql_query(query,conn)\n", 790 | "\n", 791 | "# OPTION 2: load labevents from the CSV file\n", 792 | "# le = pd.read_csv('data/example_labevents.csv', index_col='HOURSSINCEADMISSION')" 793 | ] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "execution_count": null, 798 | "metadata": { 799 | "collapsed": false 800 | }, 801 | "outputs": [], 802 | "source": [ 803 | "# preview the labevents data\n", 804 | "le.head()" 805 | ] 806 | }, 807 | { 808 | "cell_type": "code", 809 | "execution_count": null, 810 | "metadata": { 811 | "collapsed": false 812 | }, 813 | "outputs": [], 814 | "source": [ 815 | "# preview the ioevents data\n", 816 | "le[le.label=='HEMOGLOBIN']" 817 | ] 818 | }, 819 | { 820 | "cell_type": "code", 821 | "execution_count": null, 822 | "metadata": { 823 | "collapsed": false 824 | }, 825 | "outputs": [], 826 | "source": [ 827 | "plt.figure(figsize=(14, 10))\n", 828 | "\n", 829 | "plt.plot(le.index[le.label=='HEMATOCRIT'], \n", 830 | " le.valuenum[le.label=='HEMATOCRIT'], \n", 831 | " 'go', markersize=6, label='Haematocrit')\n", 832 | "\n", 833 | "plt.plot(le.index[le.label=='HEMOGLOBIN'], \n", 834 | " le.valuenum[le.label=='HEMOGLOBIN'], \n", 835 | " 'bv', markersize=8, label='Hemoglobin')\n", 836 | "\n", 837 | "plt.title('Laboratory measurements over time from admission')\n", 838 | "plt.ylim(0,38)\n", 839 | "plt.legend()" 840 | ] 841 | } 842 | ], 843 | "metadata": { 844 | "kernelspec": { 845 | "display_name": "Python 2", 846 | "language": "python", 847 | "name": "python2" 848 | }, 849 | "language_info": { 850 | "codemirror_mode": { 851 | "name": "ipython", 852 | "version": 2 853 | }, 854 | "file_extension": ".py", 855 | "mimetype": "text/x-python", 856 | "name": "python", 857 | "nbconvert_exporter": "python", 858 | "pygments_lexer": "ipython2", 859 | "version": "2.7.10" 860 | } 861 | }, 862 | "nbformat": 4, 863 | "nbformat_minor": 0 864 | } 865 | --------------------------------------------------------------------------------