├── requirements.txt ├── MANUSCRIPT- Forecasting Ultra-early Intensive Care Strain from COVID-19 in England.pdf ├── data ├── model │ ├── ICU_beds_region.csv │ ├── CFR.csv │ ├── hospitalisation_and_fatalities.csv │ ├── uptodate_cases.csv │ └── DailyConfirmedCases.csv ├── demographics.py └── prob_hospitalisation.py ├── LICENSE.txt ├── README.md └── graphs.py /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | matplotlib 4 | statsmodels 5 | scikit-learn 6 | dash -------------------------------------------------------------------------------- /MANUSCRIPT- Forecasting Ultra-early Intensive Care Strain from COVID-19 in England.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ariercole/Cambridge_COVID-19_ICU/HEAD/MANUSCRIPT- Forecasting Ultra-early Intensive Care Strain from COVID-19 in England.pdf -------------------------------------------------------------------------------- /data/model/ICU_beds_region.csv: -------------------------------------------------------------------------------- 1 | GSS_CD,region,n_beds (2019) 2 | E40000007,East of England,337 3 | E40000003,London,1019 4 | E40000008,Midlands,680 5 | E40000009,North East and Yorkshire,622 6 | E40000010,North West,643 7 | E40000005,South East,523 8 | E40000006,South West,303 -------------------------------------------------------------------------------- /data/model/CFR.csv: -------------------------------------------------------------------------------- 1 | Area Codes ,region,CFR 2 | E40000007,East of England,0.021306 3 | E40000003,London,0.014310 4 | E40000008,Midlands,0.020544 5 | E40000009,North East and Yorkshire,0.020858 6 | E40000010,North West,0.020143 7 | E40000005,South East,0.021440 8 | E40000006,South West,0.023568 -------------------------------------------------------------------------------- /data/model/hospitalisation_and_fatalities.csv: -------------------------------------------------------------------------------- 1 | ,Area Codes,Region,Mortality Rate,Critical Care Needs Rate 2 | 3,E40000007,East of England,0.012657311153064711,0.026192393524316952 3 | 4,E40000003,London,0.008230275363884789,0.01702646101183463 4 | 2,E40000008,Midlands,0.012167724035519334,0.0251760993322999 5 | 1,E40000009,North East and Yorkshire,0.01236211867678823,0.02557846522973329 6 | 0,E40000010,North West,0.011912609704227637,0.024647103731820642 7 | 5,E40000005,South East,0.01273179334516114,0.026348095035954172 8 | 6,E40000006,South West,0.014087900705113744,0.029153111824457848 9 | -------------------------------------------------------------------------------- /data/model/uptodate_cases.csv: -------------------------------------------------------------------------------- 1 | GSS_CD,region,13/03/2020,14/03/2020,15/03/2020,16/03/2020,17/03/2020,18/03/2020,19/03/2020,20/03/2020,21/03/2020,22/03/2020,23/03/2020 2 | E40000007,East of England,39,43,71,81,93,128,147,183,221,274,351 3 | E40000003,London,167,313,407,480,621,953,1221,1588,1965,2189,2433 4 | E40000008,Midlands,59,75,94,129,140,234,282,389,491,624,808 5 | E40000009,North East and Yorkshire,49,67,91,86,74,168,194,233,298,368,446 6 | E40000010,North West,62,69,76,83,157,180,220,274,312,390,496 7 | E40000005,South East,110,144,175,173,241,285,340,410,492,536,590 8 | E40000006,South West,49,53,61,77,95,117,140,169,216,242,278 -------------------------------------------------------------------------------- /data/model/DailyConfirmedCases.csv: -------------------------------------------------------------------------------- 1 | DateVal,CMODateCount,CumCases 2 | 31/01/2020,2,2 3 | 01/02/2020,0,2 4 | 02/02/2020,0,2 5 | 03/02/2020,0,2 6 | 04/02/2020,0,2 7 | 05/02/2020,0,2 8 | 06/02/2020,1,3 9 | 07/02/2020,0,3 10 | 08/02/2020,0,3 11 | 09/02/2020,1,4 12 | 10/02/2020,4,8 13 | 11/02/2020,0,8 14 | 12/02/2020,0,8 15 | 13/02/2020,1,9 16 | 14/02/2020,0,9 17 | 15/02/2020,0,9 18 | 16/02/2020,0,9 19 | 17/02/2020,0,9 20 | 18/02/2020,0,9 21 | 19/02/2020,0,9 22 | 20/02/2020,0,9 23 | 21/02/2020,0,9 24 | 22/02/2020,0,9 25 | 23/02/2020,0,9 26 | 24/02/2020,4,13 27 | 25/02/2020,0,13 28 | 26/02/2020,0,13 29 | 27/02/2020,0,13 30 | 28/02/2020,6,19 31 | 29/02/2020,4,23 32 | 01/03/2020,12,35 33 | 02/03/2020,5,40 34 | 03/03/2020,11,51 35 | 04/03/2020,34,85 36 | 05/03/2020,29,114 37 | 06/03/2020,46,160 38 | 07/03/2020,46,206 39 | 08/03/2020,65,271 40 | 09/03/2020,50,321 41 | 10/03/2020,52,373 42 | 11/03/2020,83,456 43 | 12/03/2020,139,590 44 | 13/03/2020,207,797 45 | 14/03/2020,264,1061 46 | 15/03/2020,330,1391 -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jacob Deasy, Emma Rocheteau, Katharina Kohler, Daniel J. Stubbs, Pietro Barbiero, Pietro Liò, Ari Ercole 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cambridge_COVID-19_ICU 2 | 3 | Please note that this work has **YET TO BE PEER REVIEWED** and is an early access pre-print. It has not been validated yet and therefore should not be used for clinical purposes. 4 | 5 | ## Authors 6 | Jacob Deasy, Emma Rocheteau, Katharina Kohler, Daniel J. Stubbs, Pietro Barbiero, Pietro Lio, Ari Ercole 7 | 8 | University of Cambridge 9 | 10 | ## Website 11 | Please see our website with live updates http://covid19icu.cl.cam.ac.uk/ 12 | 13 | ## Background 14 | Intensive care unit (ICU) utilisation (for respiratory failure due to viral pneumonitis) is significant for COVID-19 infection. This has the potential to exhaust ICU capacity as was seen in Italy which has been particularly badly hit. 15 | 16 | It is possible to create further ICU capacity however this requires time (e.g. by cancelling elective major surgery) or freeing up beds so that ICU patients can be more quickly discharged. At present we do not have a model that allows ultra-early ICU occupancy forecasting. Here we present code for an ultra-early forecast which attempts aim to create a 14 day forecast of COVID-19 ICU occupancy in the NHS commissioning regions in England as a percentage of the total number of available beds from Public Health England dashboard COVID-19 case data. 17 | 18 | Modelling assumptions are described in "MANUSCRIPT- Forecasting Ultra-early Intensive Care Strain from COVID-19 in England.pdf". 19 | 20 | Important note: This is experimental and not yet validated (as cases have not yet arrived since the model is being developed as the pandemic unfolds). The model is only as good as its assumptions and starting data both of which are likely to have limitations. It should not be used for clinical decision making. 21 | 22 | -------------------------------------------------------------------------------- /data/demographics.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import pandas as pd 5 | 6 | from prob_hospitalisation import age_cats, per_region, total_demographics 7 | 8 | 9 | mpl.rc('font', family = 'serif', size = 30) 10 | mpl.rcParams['xtick.labelsize'] = 20 11 | mpl.rcParams['ytick.labelsize'] = 30 12 | 13 | 14 | def plots(total_demographics, icu_beds, per_region): 15 | # demographics plot 16 | fig, (ax11, ax21) = plt.subplots(1, 2) 17 | fig.set_size_inches(28, 10) 18 | plt.subplots_adjust(left=0.05, wspace=0.3, right=0.95, top=0.92, bottom=0.05) 19 | ax12 = ax11.twinx() 20 | n = 7 21 | ind1 = np.arange(start=0, stop=n*3, step=3) # the x locations for the groups 22 | ind2 = np.arange(start=1, stop=n*3+1, step=3) 23 | title_ind = np.arange(0.5, stop=n*3+0.5, step=3) 24 | width = 0.70 # the width of the bars: can also be len(x) sequence 25 | cumultative_total = 0 26 | total_demographics.sort_values('Region', inplace=True) 27 | regions = ['EoE', 'London', 'Midlands', 'NE&Yorks.', 'NW', 'SE', 'SW'] 28 | colors = ['red', 'hotpink', 'orange', 'gold', 'limegreen', 'darkgreen', 'darkblue', 'dodgerblue', 'mediumorchid'] 29 | plots = [] 30 | 31 | for i in range(9): 32 | millions = total_demographics.iloc[:, i] / 1000000 # convert to millions 33 | plots.append(ax11.bar(ind1, millions, width, bottom=cumultative_total, color=colors[i])) 34 | cumultative_total += millions 35 | 36 | plots.append(ax12.bar(ind2, icu_beds['n_beds (2019)']*100000/total_demographics.sum(axis=1).values, width=width, color='dimgray')) 37 | ax11.set_ylabel('Population (millions)') 38 | ax12.set_ylabel('ICU beds per 100,000') 39 | plt.title('Demographics', y=1.02) 40 | plt.xticks(title_ind, regions, rotation='vertical') 41 | ax11.set_yticks([0, 2, 4, 6, 8, 10, 12]) 42 | ax12.set_yticks([0, 2, 4, 6, 8, 10, 12]) 43 | ax11.set_ylim((0, 12.5)) 44 | ax12.set_ylim((0, 12.5)) 45 | ax11.legend(age_cats, loc='upper right', prop={'size': 22}) 46 | 47 | # critical care needs plot 48 | ax22 = ax21.twinx() 49 | plots.append(ax21.bar(ind1, per_region['Critical Care Needs Rate'] * 100, width=width, color='lightblue')) 50 | plots.append(ax21.bar(ind1, per_region['Mortality Rate'] * 100 , width=width, color='dodgerblue')) 51 | plots.append(ax22.bar(ind2, icu_beds['n_beds (2019)'] * 100000 / total_demographics.sum(axis=1).values, width=width, color='dimgray')) 52 | ax21.set_ylabel('Percentage of cases requiring ICU') 53 | ax22.set_ylabel('ICU beds per 100,000') 54 | plt.title('Critical Care Demand Per Case', y=1.02) 55 | plt.xticks(title_ind, regions, rotation='vertical') 56 | ax21.set_yticks([0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]) 57 | ax22.set_yticks([0, 2, 4, 6, 8, 10, 12]) 58 | ax21.set_ylim((0, 3.63)) 59 | ax22.set_ylim((0, 12.5)) 60 | ax21.legend(['Survivors', 'Non-Survivors'], loc='upper right', prop={'size': 22}) 61 | plt.savefig(os.path.join(os.pardir, 'figs', 'demographics_cc_icu.pdf')) 62 | 63 | 64 | icu_beds = pd.read_csv(os.path.join('model', 'ICU_beds_region.csv') 65 | plots(total_demographics, icu_beds, per_region) 66 | -------------------------------------------------------------------------------- /data/prob_hospitalisation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | 5 | def get_age(): 6 | age_cats = ['<10', '10-20', '20-30', '30-40','40-50', '50-60', '60-70', '70-80', '80+'] 7 | # data taken from Table 1: https://www.imperial.ac.uk/media/imperial-college/medicine/sph/ide/gida-fellowships/Imperial-College-COVID19-NPI-modelling-16-03-2020.pdf 8 | prob_hosp = [0.1, 0.3, 1.2, 3.2, 4.9, 10.2, 16.6, 24.3, 27.3] # percentage requiring hospitalisation 9 | prob_critical_care_if_hosp = [5.0, 5.0, 5.0, 5.0, 6.3, 12.2, 27.4, 43.2, 70.9] # percentage needing critical care among those hospitalised 10 | prob_death = [0.002, 0.006, 0.03, 0.08, 0.15, 0.6, 2.2, 5.1, 9.3] # mortality rate 11 | age = pd.DataFrame([prob_hosp, prob_critical_care_if_hosp, prob_death], columns=age_cats, index=['hosp', 'crit_care_if_hosp', 'death']) 12 | age /= 100 # convert percentages to probs 13 | 14 | return age, age_cats 15 | 16 | 17 | def get_gender(): 18 | # we need to take into account the extra risk of being male 19 | # I am assuming the extra risk applies equally to hospitalisations and deaths 20 | gender_cats = ['Male', 'Female'] 21 | prob_death_gender = [[2.8, 1.7]] # data taken from Wuhan on gender difference in mortality 22 | prob_death_total = 2.8*0.5117 + 1.7*0.4883 # males make up 51.17% of population China according to 2017 Census (http://www.stats.gov.cn/tjsj/ndsj/2018/indexeh.htm) 23 | gender = pd.DataFrame(prob_death_gender, columns=gender_cats) 24 | 25 | return gender, prob_death_total 26 | 27 | 28 | def get_age_brackets(lower, upper): 29 | cats = [] 30 | for i in range(lower, upper): 31 | cats.append(str(i)) 32 | if lower == 80: 33 | cats.append('90+') 34 | 35 | return cats 36 | 37 | 38 | def get_age_cat_demographics(age_cats, df): 39 | demographics_age = pd.DataFrame([], columns=age_cats) 40 | demographics_age['Area Codes'] = df['Area Codes '] 41 | demographics_age['Region'] = df['Unnamed: 1'] 42 | for i, age_cat in enumerate(age_cats): 43 | demographics_age[age_cat] = df[get_age_brackets(i*10, i*10+10)].astype(int).sum(axis=1) 44 | 45 | return demographics_age 46 | 47 | 48 | age, age_cats = get_age() 49 | gender, prob_death_total = get_gender() 50 | # prob critical care = prob hospitalisation x prob critical care if hospitalised 51 | prob_critical_care_by_age_gender = {} 52 | prob_critical_care_by_age_gender['Male'] = age.loc['hosp']*age.loc['crit_care_if_hosp']*(gender['Male'].values[0]/prob_death_total) 53 | prob_critical_care_by_age_gender['Female'] = age.loc['hosp']*age.loc['crit_care_if_hosp']*(gender['Female'].values[0]/prob_death_total) 54 | prob_death_by_age_gender = {} 55 | prob_death_by_age_gender['Male'] = age.loc['death']*(gender['Male'].values[0]/prob_death_total) 56 | prob_death_by_age_gender['Female'] = age.loc['death']*(gender['Female'].values[0]/prob_death_total) 57 | 58 | female = pd.read_csv(os.path.join('background', 'female_age_demographics.csv'), thousands=',') 59 | male = pd.read_csv(os.path.join('background', 'male_age_demographics.csv'), thousands=',') 60 | 61 | male_demographics = get_age_cat_demographics(age_cats, male) 62 | female_demographics = get_age_cat_demographics(age_cats, female) 63 | 64 | male_deaths = male_demographics[age_cats].mul(prob_death_by_age_gender['Male'].values, axis=1) 65 | female_deaths = female_demographics[age_cats].mul(prob_death_by_age_gender['Female'].values, axis=1) 66 | 67 | male_critical_care = male_demographics[age_cats].mul(prob_critical_care_by_age_gender['Male'].values, axis=1) 68 | female_critical_care = female_demographics[age_cats].mul(prob_critical_care_by_age_gender['Female'].values, axis=1) 69 | 70 | total_deaths = male_deaths + female_deaths 71 | total_critical_care = male_critical_care + female_critical_care 72 | total_demographics = male_demographics + female_demographics 73 | 74 | per_region = pd.DataFrame(total_deaths.sum(axis=1), columns=['Deaths']) 75 | per_region['Critical Beds Required'] = total_critical_care.sum(axis=1) 76 | per_region['Area Codes'] = male_demographics['Area Codes'] 77 | per_region['Region'] = male_demographics['Region'] 78 | per_region['Mortality Rate'] = per_region['Deaths']/(male['All Ages'] + female['All Ages']) 79 | per_region['Critical Care Needs Rate'] = per_region['Critical Beds Required']/(male['All Ages'] + female['All Ages']) 80 | per_region.drop(columns=['Deaths', 'Critical Beds Required'], inplace=True) 81 | per_region.sort_values('Region', inplace=True) 82 | per_region.to_csv(os.path.join('model', 'hospitalisation_and_fatalities.csv', index=False) 83 | -------------------------------------------------------------------------------- /graphs.py: -------------------------------------------------------------------------------- 1 | """Graphs file.""" 2 | 3 | import dash_core_components as dcc 4 | import datetime 5 | import numpy as np 6 | import os 7 | import pandas as pd 8 | import pickle 9 | import plotly.express as px 10 | import plotly.graph_objs as go 11 | import numpy as np 12 | import os 13 | import shutil 14 | import statsmodels.api as sm 15 | 16 | from dateutil import parser 17 | from joblib import Parallel, delayed 18 | from numpy.random import gamma, lognormal, normal 19 | from statsmodels.sandbox.regression.predstd import wls_prediction_std 20 | from tqdm import tqdm 21 | 22 | 23 | # Global variables 24 | n_obs = 11 # UPDATE THIS 25 | n_future = 14 26 | n_total = n_obs + n_future 27 | 28 | 29 | def load_data(): 30 | daily_cases = pd.read_csv(os.path.join(os.pardir, 'data', 'model', 'uptodate_cases.csv')) 31 | 32 | regions = daily_cases['region'] 33 | dates = [f'{13+i}/03' for i in range(n_total)] 34 | dates = dates[:19] + [f'{i}/04' for i in range(1, n_total-18)] 35 | 36 | X = sm.add_constant(np.arange(n_obs)) 37 | X_pred = sm.add_constant(np.arange(n_total)) 38 | 39 | cum_cases = np.array(daily_cases.iloc[:, 2:], dtype=np.float32) 40 | 41 | return X, X_pred, cum_cases, regions, dates 42 | 43 | 44 | def _regional_prediction(X, X_pred, Y, i): 45 | mod = sm.OLS(np.log(Y[i]), X) 46 | res = mod.fit() 47 | 48 | y_pred = res.predict(X_pred) 49 | _, _, std_u = wls_prediction_std(res, exog=X_pred, alpha=1-0.6827) # 1 s.d. 50 | _, ci_l, ci_u = wls_prediction_std(res, exog=X_pred, alpha=1-0.95) # 95% CI 51 | 52 | return y_pred, std_u, ci_l, ci_u, res.params[1] 53 | 54 | 55 | def regional_predictions(X, X_pred, Y): 56 | x_true_list = [] 57 | y_true_list = [] 58 | x_pred_list = [] 59 | y_pred_list = [] 60 | ci_l_list = [] 61 | ci_u_list = [] 62 | avgs_list = [] 63 | stds_list = [] 64 | exponent_list = [] 65 | 66 | # Parallelize model fitting 67 | parallel = Parallel(n_jobs=-1, prefer="threads") 68 | results = parallel(delayed(_regional_prediction)(X, X_pred, Y, i) for i in range(7)) 69 | 70 | for i, (y_pred, std_u, ci_l, ci_u, exponent) in enumerate(results): # 7 regions 71 | avgs_list += [y_pred] 72 | stds_list += [std_u - y_pred] 73 | 74 | # Log 75 | y_pred = np.exp(y_pred) 76 | ci_l = np.exp(ci_l) 77 | ci_u = np.exp(ci_u) 78 | 79 | x_true_list += [X[:, 1]] 80 | y_true_list += [Y[i]] 81 | x_pred_list += [X_pred[:, 1]] 82 | y_pred_list += [y_pred] 83 | ci_l_list += [ci_l] 84 | ci_u_list += [ci_u] 85 | exponent_list += [exponent] 86 | 87 | return x_true_list, y_true_list, x_pred_list, y_pred_list, ci_l_list, ci_u_list, avgs_list, stds_list, exponent_list 88 | 89 | 90 | def occupancy_arrays(means, stds, exponents, pct_need_icu, 91 | icu_delay_normal_loc=2.0, los_gamma_shape=8.0, log=True): 92 | means = np.stack(means) 93 | stds = np.stack(stds) 94 | n_regions = means.shape[0] 95 | n_days = means.shape[1] 96 | n_samples = 500 97 | arr = np.zeros((n_regions, n_days, n_samples)) 98 | 99 | for k in range(n_samples): 100 | if log: 101 | new_cases = exponents[:, np.newaxis] * lognormal(means, stds) 102 | else: 103 | new_cases = normal(means, stds) 104 | icu_cases = pct_need_icu[:, np.newaxis] * new_cases # ICU cases = new cases each day * icu_per_case 105 | icu_cases = np.maximum(icu_cases, 1).astype(np.int32) 106 | 107 | for i in range(n_regions): 108 | for j in range(n_days): 109 | # Start 110 | delay_2_icu = normal(loc=icu_delay_normal_loc, scale=3.5, size=icu_cases[i, j]) 111 | delay_2_icu = delay_2_icu.round().astype(np.int32) 112 | 113 | # End 114 | los = gamma(shape=los_gamma_shape, scale=1.0, size=icu_cases[i, j]) 115 | los = np.maximum(los, 1).astype(np.int32) 116 | 117 | # Indices 118 | start_inds = j + delay_2_icu 119 | end_inds = np.minimum(start_inds + los, n_days-1) 120 | start_inds = np.maximum(start_inds, 0) 121 | 122 | for start, end in zip(start_inds, end_inds): 123 | if start >= n_days: 124 | continue 125 | else: 126 | arr[i, start:end+1, k] += 1 127 | 128 | return arr.mean(axis=2), arr.std(axis=2) 129 | 130 | 131 | def daterange(start_date, end_date): 132 | for n in range(int((end_date - start_date).days)): 133 | yield start_date + datetime.timedelta(n) 134 | 135 | 136 | def get_new_dates(dates): 137 | min_date_str = dates[0].split("/")[1] + "/" + dates[0].split("/")[0] 138 | min_date = parser.parse(min_date_str) 139 | today = datetime.datetime.today() 140 | max_date = today + datetime.timedelta(days=n_future+1) 141 | new_dates = [] 142 | today_idx = None 143 | for i, date in enumerate(daterange(min_date, max_date)): 144 | if today.strftime("%d/%m") == date.strftime("%d/%m"): 145 | today_idx = i 146 | new_dates.append(date.strftime("%d/%m")) 147 | return new_dates, today_idx 148 | 149 | 150 | def _make_fig(x_true, y_true, x_pred, y_pred, ci_l, ci_u, 151 | title, ylabel, obs, dates): 152 | if "patients" in ylabel: 153 | y_max = max(y_pred) 154 | elif "occupancy" in ylabel: 155 | y_max = min(100, max(y_pred)) 156 | else: 157 | pass 158 | 159 | color = "blue" 160 | trace0 = go.Scatter( 161 | x=x_pred, 162 | y=ci_l.round(2), 163 | line=dict(color=color), 164 | name="95% CI", 165 | showlegend=False 166 | ) 167 | trace1 = go.Scatter( 168 | x=x_pred, 169 | y=ci_u.round(2), 170 | fill='tonexty', 171 | name="95% CI", 172 | line=dict(color=color) 173 | ) 174 | if obs: 175 | trace2 = go.Scatter( 176 | x=x_true, 177 | y=y_true.round(2), 178 | mode='markers', 179 | name='Recorded', 180 | line=dict(color="red") 181 | ) 182 | trace3 = go.Scatter( 183 | x=x_pred, 184 | y=y_pred.round(2), 185 | mode='lines+markers', 186 | name='Mean', 187 | line=dict(color=color, width=2), 188 | ) 189 | if obs: 190 | traces = [trace0, trace1, trace3, trace2] 191 | else: 192 | traces = [trace0, trace1, trace3] 193 | tickvals = list(range(0, len(dates), 3)) 194 | # print_dates = [d if i % 3 == 0 else '' for i, d in enumerate(dates)] 195 | print_dates = [d for i, d in enumerate(dates) if i % 3 == 0] 196 | fig=dict( 197 | data=traces, 198 | layout=dict( 199 | showlegend=True, 200 | title=title, 201 | yaxis_title=ylabel, 202 | xaxis=dict( 203 | tickmode='array', 204 | tickvals=tickvals, 205 | ticktext=print_dates, 206 | showgrid=True, 207 | range=[min(x_true), max(x_pred)], 208 | ), 209 | yaxis=dict( 210 | title=ylabel, 211 | range=[0, max(y_pred)] 212 | ), 213 | ) 214 | ) 215 | 216 | return fig 217 | 218 | 219 | def _make_figs(x_true_list, y_true_list, x_pred_list, y_pred_list, ci_l_list, ci_u_list, 220 | dates, regions, title, ylabel, obs=True): 221 | new_dates, today_idx = get_new_dates(dates) 222 | 223 | # Parallelize figure making 224 | parallel = Parallel(n_jobs=-1, prefer="threads") 225 | fig_list = parallel( 226 | delayed(_make_fig)( 227 | x_true, y_true, x_pred, y_pred, ci_l, ci_u, title, ylabel, obs, new_dates) 228 | for x_true, y_true, x_pred, y_pred, ci_l, ci_u in zip( 229 | x_true_list, y_true_list, x_pred_list, y_pred_list, ci_l_list, ci_u_list)) 230 | 231 | return regions, fig_list, today_idx 232 | 233 | 234 | def patients_update(plot_dict): 235 | _, _, _, regions, dates = load_data() 236 | x_true, y_true, x_pred, y_pred, ci_l, ci_u = plot_dict["new_patients_loglinear_fit"] 237 | regions, fig_list_logged, today_idx = _make_figs(x_true, y_true, x_pred, y_pred, ci_l, ci_u, 238 | dates, regions, "Cumulative COVID-19 patients", "Number of patients") 239 | 240 | return regions, fig_list_logged, get_new_dates(dates) 241 | 242 | 243 | def occupancy_update(plot_dict, delay=10, los=8, obs=True): 244 | _, _, _, regions, dates = load_data() 245 | 246 | x_true, y_true, x_pred, y_pred, ci_l, ci_u = plot_dict["icu_patients"][delay][los] 247 | regions, icu_fig_list, today_idx = _make_figs(x_true, y_true, x_pred, y_pred, ci_l, ci_u, 248 | dates, regions, "ICU patients", "#patients", obs=False) 249 | 250 | x_true, y_true, x_pred, y_pred, ci_l, ci_u = plot_dict["icu_occupancy"][delay][los] 251 | regions, occ_fig_list, today_idx = _make_figs(x_true, y_true, x_pred, y_pred, ci_l, ci_u, 252 | dates, regions, "% of ICU Bed Occupancy", "% occupancy", obs=False) 253 | 254 | return regions, icu_fig_list, occ_fig_list, get_new_dates(dates) 255 | 256 | 257 | def save_dict_safely(dict_, f=os.path.join('data', 'plot_dict.pkl')): 258 | if os.path.exists(f): 259 | os.remove(f) 260 | with open(f, 'wb') as fp: 261 | pickle.dump(dict_, fp) 262 | 263 | 264 | def load_dict_safely(f=os.path.join('data', 'plot_dict.pkl')): 265 | with open(f, 'rb') as fp: 266 | dict_ = pickle.load(fp) 267 | return dict_ 268 | 269 | 270 | def update_backend(icu_delay_normal_locs=list(range(1, 11)), los_gamma_shapes=list(range(3, 12))): 271 | print('Updating backend dictionary, this may take a while...') 272 | print('Loading data...') 273 | X, X_pred, Y, _, _ = load_data() 274 | death_and_icu_info = pd.read_csv(os.path.join(os.pardir, 'data', 'model', 'hospitalisation_and_fatalities.csv')) 275 | pct_need_icu = death_and_icu_info['Critical Care Needs Rate'] 276 | beds_info = pd.read_csv(os.path.join(os.pardir, 'data', 'model', 'ICU_beds_region.csv')) 277 | beds = beds_info['n_beds (2019)'].values 278 | 279 | """ 280 | Construct large dictionary to be indexed by the web user. 281 | big_dict: 282 | new_patients_loglinear_fit: plot_tuple 283 | icu_patients: icu_patients_dict 284 | icu_occupancy: icu_occupancy_dict 285 | 286 | icu_patients_dict: 287 | delay: default = [3, 4, 5, 6, 7, 8, 9, 10, 11] 288 | los: default = [3, 4, 5, 6, 7, 8, 9, 10, 11] 289 | 290 | Example indexing: 291 | - big_dict['new_patients_loglinear_fit'] --> return plot_tuple 292 | - big_dict['icu_occupancy'][10][8] --> return plot_tuple 293 | """ 294 | big_dict = {} 295 | 296 | # Update new patient 297 | print('Updating estimated new patients (LOG-LINEAR)...') 298 | x_true, y_true, x_pred, y_pred, ci_l, ci_u, log_means, log_stds, exponents = regional_predictions(X, X_pred, Y) 299 | big_dict['new_patients_loglinear_fit'] = x_true, y_true, x_pred, y_pred, ci_l, ci_u 300 | 301 | # Update ICU patient 302 | print('\nUpdating ICU patients info...') 303 | delay_dict = {} 304 | for delay in icu_delay_normal_locs: 305 | los_dict = {} 306 | for los in tqdm(los_gamma_shapes): 307 | mu, sig = occupancy_arrays(log_means, log_stds, np.array(exponents), pct_need_icu, 308 | icu_delay_normal_loc=delay, los_gamma_shape=los) 309 | ci_l = [np.maximum(mu[i] - 1.96 * sig[i], 0) for i in range(7)] 310 | ci_u = [mu[i] + 1.96 * sig[i] for i in range(7)] 311 | 312 | los_dict[los] = (x_true, y_true, x_pred, mu, ci_l, ci_u) 313 | delay_dict[delay] = los_dict 314 | big_dict['icu_patients'] = delay_dict 315 | 316 | # Update ICU occupancy 317 | print('\nUpdating ICU occupancy info...') 318 | delay_dict = {} 319 | for delay in icu_delay_normal_locs: 320 | los_dict = {} 321 | for los in tqdm(los_gamma_shapes): 322 | _, _, _, mu, ci_l, ci_u = big_dict['icu_patients'][delay][los] 323 | 324 | avg_occ = [100 * mu[i] / beds[i] for i in range(7)] 325 | ci_l = [np.maximum(100 * ci_l[i] / beds[i], 0) for i in range(7)] 326 | ci_u = [100 * ci_u[i] / beds[i] for i in range(7)] 327 | 328 | los_dict[los] = (x_true, y_true, x_pred, avg_occ, ci_l, ci_u) 329 | delay_dict[delay] = los_dict 330 | big_dict['icu_occupancy'] = delay_dict 331 | 332 | # Save 333 | print('Saving big dictionary to file with pickle...') 334 | save_dict_safely(big_dict) 335 | 336 | 337 | def choroplet_plot(plot_dict, geo_data, geo_df, today=None, delay=2, los=8): 338 | _, _, _, y_pred, _, _ = plot_dict["icu_occupancy"][delay][los] 339 | if today is None: 340 | today = n_obs 341 | # Index regions `today` 342 | y_pred = np.array(y_pred) 343 | geo_df['% additional demand'] = y_pred[[3, 4, 3, 2, 1, 2, 0, 5, 6], today].round(2) 344 | 345 | fig = px.choropleth_mapbox( 346 | geo_df, 347 | geojson=geo_data, 348 | locations='ID', 349 | color='% additional demand', 350 | color_continuous_scale="Portland", 351 | featureidkey="properties.nuts118cd", 352 | range_color=(0, 100), 353 | mapbox_style="carto-positron", 354 | hover_data=["Region", "% additional demand"], 355 | zoom=4.7, 356 | center={"lat": 53, "lon": -2}, 357 | opacity=0.7 358 | ) 359 | fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) 360 | 361 | return fig 362 | 363 | 364 | if __name__ == "__main__": 365 | update_backend(icu_delay_normal_locs=list(range(1, 11)), los_gamma_shapes=list(range(3, 12))) 366 | --------------------------------------------------------------------------------