├── testmove.csv ├── updates and notes.txt ├── LICENSE ├── run_model_small.R ├── preprocess_small.R ├── preprocess_data_2015.R ├── README.md ├── run_model_2015.R └── bearmod_fx.R /testmove.csv: -------------------------------------------------------------------------------- 1 | date,from,to,movers,fr_pat,to_pat,fr_users,to_users 2 | 1,1,2,10,1,2,100,100 3 | 1,1,3,5,1,3,100,100 4 | 1,1,4,4,1,4,100,100 5 | 1,1,5,3,1,5,100,100 6 | 1,5,1,8,5,1,100,100 7 | -------------------------------------------------------------------------------- /updates and notes.txt: -------------------------------------------------------------------------------- 1 | BEARmod update schedule 2 | 3 | 0.81 - 10 March 2020 4 | - Major speed increase in infection 5 | - added capacity for multiple timesteps per day. TSinday defaults to 1, but defines the number of timesteps per day in the model (must be integer) 6 | - added capacity for a probability of moving per timestep (fitting to Vodafone data). Not accounted for if prob_move_per_TS = 0 7 | 8 | 0.7 - 8 March 2020 9 | - Added capability for a proportion of exposed people to be infectious--involves "exposed_pop_inf_prop" parameter added into runSim(). Defaults to zero. 10 | 11 | Features to be added: 12 | - Keep track of imported/local cases (This is kept track of on a daily basis already through nInfMovedToday) 13 | - Move transmission parameters to a Parameters list, for simplicity of input data -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Nick Ruktanonchai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /run_model_small.R: -------------------------------------------------------------------------------- 1 | ####Model running code for BEARmod v.0.6 2 | rm(list=ls()) 3 | library(data.table) # fread - fastly reading data 4 | library(lubridate) 5 | 6 | source("bearmod_fx.R") 7 | source("preprocess_small.R") 8 | #Initial parameters 9 | NPat = length(patNames) 10 | patnInf = rep(0,NPat) 11 | patnExp = c(rep(0,NPat) ) 12 | 13 | pat_locator$pop = 100 14 | 15 | #start infection in Wuhan 16 | patnInf[which(patNames == 1)] = 50 17 | #recovery rate variable 18 | recover_df = data.frame(date = seq(from=min(movement_data$date),to=max(movement_data$date),by="days"),recrate = recrate) 19 | relative_move_data=data.frame(date = "2020-05-01",from = patIDs,relative_move = .1) 20 | #### Running the model #### 21 | 22 | 23 | 24 | HPop = InitiatePop(pat_locator,patnInf,patnExp) 25 | ###dates of simulation 26 | 27 | 28 | input_dates = rep("2020-05-01",50) 29 | results = list() 30 | 31 | for (run in 1:500){ 32 | 33 | HPop_update = runSim(HPop,pat_locator,relative_move_data,movement_data, input_dates,recover_df, exposerate,exposepd,exposed_pop_inf_prop = 0, TSinday = 1) 34 | print(paste0("Run # ",run)) 35 | results[[run]] = HPop_update$all_spread 36 | } 37 | save(results,file="results.RData") 38 | # -------------------------------------------------------------------------------- /preprocess_small.R: -------------------------------------------------------------------------------- 1 | 2 | library(lubridate) 3 | 4 | # ## 2013 - 2014 data 5 | # movement_data2 = read.csv("baidu/7 feb/Baidu_LBS_flow_201312-201404.csv") 6 | # cell_user_from_data = read.csv("baidu/7 feb/LBSusers_from_201312-201404.csv") 7 | # cell_user_from_data$date = date(cell_user_from_data$date) + days(1) 8 | # cell_user_to_data = read.csv("baidu/7 feb/LBSusers_to_201312-201404.csv") 9 | # cell_user_to_data$date = date(cell_user_to_data$date) + days(1) 10 | 11 | movement_data = read.table("testmove.csv",sep=",",header=T) 12 | 13 | patNames = unique(movement_data$to)[order(unique(movement_data$to))] 14 | patIDs = 1:length(patNames) 15 | pat_locator = data.frame(patNames,patIDs) 16 | 17 | #convert dates to format R can read 18 | movement_data$date = ymd("2020-05-01") 19 | 20 | # 21 | # missing_dates = c(date("2014-1-17"), date("2014-2-2"),date("2014-2-18"),date("2014-2-20"),date("2014-3-1"),date("2014-3-2")) 22 | # for (dates in 1:length(missing_dates)){ 23 | # replaceday = subset(movement_data,Date == missing_dates[dates] - days(1)) 24 | # replaceday$Date = replaceday$Date + days(1) 25 | # movement_data = rbind(movement_data,replaceday) 26 | # } 27 | 28 | recrate = 1/6 #daily probability of recovery 29 | exposerate = 2.68/6 # R0 of 2.68, 5.8 days till seeking treatment # How many people a single person potentially infects per day -- can be calculated from R0 estimate if you divide R0 by infectious period 30 | exposepd = 3 # incubation period -------------------------------------------------------------------------------- /preprocess_data_2015.R: -------------------------------------------------------------------------------- 1 | 2 | library(lubridate) 3 | 4 | # ## 2013 - 2014 data 5 | # movement_data2 = read.csv("baidu/7 feb/Baidu_LBS_flow_201312-201404.csv") 6 | # cell_user_from_data = read.csv("baidu/7 feb/LBSusers_from_201312-201404.csv") 7 | # cell_user_from_data$date = date(cell_user_from_data$date) + days(1) 8 | # cell_user_to_data = read.csv("baidu/7 feb/LBSusers_to_201312-201404.csv") 9 | # cell_user_to_data$date = date(cell_user_to_data$date) + days(1) 10 | pop_data = read.csv("baidu/2012SHP_2010census_2011_2016CDCPop_adm2.csv") 11 | 12 | relative_move_data = read.csv("baidu/7 feb/parameter_relative_movement_2014 8 feb.csv",stringsAsFactors=F) 13 | relative_move_data$date = date(relative_move_data$date) + years(1) + days(19) 14 | #### 2015 data 15 | 16 | movement_data = read.table("baidu/Baidu_IP_flow_201411_201505.txt",sep="\t",header=T) 17 | cell_user_data = read.table("baidu/IPusers_201411-201511.txt",sep="\t",header=T) 18 | pop_data = read.csv("baidu/2012SHP_2010census_2011_2016CDCPop_adm2.csv") 19 | ### 20 | 21 | patNames = unique(movement_data$to)[order(unique(movement_data$to))] 22 | patIDs = 1:length(patNames) 23 | pat_locator = data.frame(patNames,patIDs) 24 | 25 | #add patch names to the movement data 26 | movement_data = merge(movement_data,pat_locator,by.x = "from",by.y = "patNames") 27 | names(movement_data)[which(names(movement_data) == "patIDs")] = "fr_pat" 28 | movement_data = merge(movement_data,pat_locator,by.x = "to",by.y = "patNames") 29 | names(movement_data)[which(names(movement_data) == "patIDs")] = "to_pat" 30 | 31 | #add total numbers of users to movement data 32 | movement_data = merge(movement_data,cell_user_data,by.x = c("from","date"),by.y=c("SHP_CITY_CODE","date")) 33 | names(movement_data)[which(names(movement_data) == "users")] = "fr_users" 34 | 35 | movement_data = merge(movement_data,cell_user_data,by.x = c("to","date"),by.y=c("SHP_CITY_CODE","date")) 36 | names(movement_data)[which(names(movement_data) == "users")] = "to_users" 37 | 38 | #convert dates to format R can read 39 | movement_data$date = ymd(movement_data$date) 40 | 41 | 42 | names(movement_data)[which(names(movement_data) == "move")] = "movers" 43 | # 44 | # missing_dates = c(date("2014-1-17"), date("2014-2-2"),date("2014-2-18"),date("2014-2-20"),date("2014-3-1"),date("2014-3-2")) 45 | # for (dates in 1:length(missing_dates)){ 46 | # replaceday = subset(movement_data,Date == missing_dates[dates] - days(1)) 47 | # replaceday$Date = replaceday$Date + days(1) 48 | # movement_data = rbind(movement_data,replaceday) 49 | # } 50 | 51 | recrate = 1/6 #daily probability of recovery 52 | exposerate = 2.68/6 # R0 of 2.68, 5.8 days till seeking treatment # How many people a single person potentially infects per day -- can be calculated from R0 estimate if you divide R0 by infectious period 53 | exposepd = 3 # incubation period -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BEARmod 2 | Basic Epidemic, Activity, and Response COVID-19 model 3 | 4 | This model implements a basic SEIR simulation model, accounting for variable daily movement patterns, recovery rates, and contact rates. Demonstration of this model can be seen in a recent Nature paper [1] 5 | 6 | For a placeholder dummy dataset and example simulation run, please see "run_model_small.R", which uses a dummy movement dataset "testmove.csv" 7 | 8 | ## Overall model 9 | This model is a metapopulation model of COVID-19 transmission, based on an SEIR modeling framework. Within each patch, this model follows a fairly simple SEIR framework. The primary complexities this model is designed to describe are daily movement patterns, and spatially and temporally heterogeneous reductions in movement and contact rates. Specifically, this model is particularly suited for data that generally come from mobile phone companies. 10 | 11 | ### Baseline patch-level processes 12 | Within each patch, this model first calculates the number of infected people who recovered or were otherwise removed from the infectious population (ie. through self-isolatuion) at an average rate r, where r is equal to the inverse of the average infectious period. This is explicitly incorporated as a Bernoulli trial for each infected person with a probability of recovering 1-exp⁡(-r). 13 | Then, the model converts exposed people to infectious by similarly incorporating a Bernoulli trial for each exposed individual, where the daily probability of becoming infectious 1-exp⁡(-ε), where ε was the inverse of the average time spent exposed but not infectious. 14 | Finally, to end the exposure, infection, and recovery step of the model, newly exposed people are calculated for each city based on the number of infectious people in the city I_i, and the average number of daily contacts that lead to transmission that each infectious person has c. This model then simulates the number of newly exposed people through a random draw from a Poisson distribution for each infectious person where the mean number of new infections per person was c, which was then multiplied by the fraction of people in the patch who are susceptible. 15 | The infection processes within each patch therefore approximate the following deterministic, continuous-time model, where c and r varied through time: 16 | dS/dt=S-c SI/N 17 | dE/dt=c SI/N-εE 18 | dI/dt=εE-rI 19 | dR/dt=rI 20 | 21 | ### Movement between patches 22 | After completing the infection-related processes, the model moves infectious people between cities, using the proportion of people who went from each patch to each other patch measured in the input OD matrix. Infectious people are moved from their current location to each possible destination (including remaining in the same place) using Bernoulli trials for each infectied person, and each possible destination city. 23 | Through this model, stochasticity in the numbers and places where COVID-19 appears between simulation runs in this model through variance in numbers of people becoming exposed, infectious, and removed/recovered, as well as variance in numbers of people moving from one city to another. 24 | 25 | ## Input options and formats 26 | Note: These parameter specifications are relevant for v 0.92, denoted at the top of the bearmod_fx.R file. 27 | 28 | First, you will create an empty population list HPop, using InitiatePop(). This function takes as inputs: 29 | - pat_locator: A data frame with variables "patNames", "patIDs" (numeric; sequential from 1:number of patches), and "pop" (population per patch) 30 | - initialInf: A vector of initially infected people per patch, length equal to the number of patches 31 | - initialExp: A vector of initially exposed people per patch, length equal to the number of patches 32 | 33 | The initial HPop is then fed into the runSim function, which has the following inputs: 34 | - HPop 35 | - pat_info: This is the same as pat_locator 36 | - movement_reduction_df: a data frame with 3 variables, "date", "name", and "relative_movement". "name" corresponds to the patNames ID for the patch, and "relative_movement" indicates the relative proportion of movement for that day--.3 means all movement for that patch in that day (both incoming and outgoing) will be 30% of the baseline value (specified in mobmat later). This is specified on a per-day basis, and does not have to be complete--any missing day/patch pairs will have 100% of the baseline movement patterns 37 | - contact_reduction_df: a data frame with 3 variables, "date", "name", and "relative_contact". Same as movement_reduction_df except this refers to the relative contact rate within a patch for a given day--ie. .5 means half as many contacts per person 38 | - mobmat: A data frame with variables "date", "fr_pat", "to_pat", "move_prop". fr_pat and to_pat refer to the patch IDs of the origin and destination patches (see patIDs from pat_locator), and move_prop is the proportion of people who move from each origin to each destination on the given day in "date". If stayers are not denoted (origin = destination), then the model will designate this as 1 - sum(movement elsewhere) for a given patch. 39 | 40 | --more parameter definitions coming soon-- 41 | 42 | Contact: 43 | Nick W Ruktanonchai; 44 | nrukt00 at gmail.com 45 | 46 | [1] Lai, S., Ruktanonchai, N.W., Zhou, L. et al. Effect of non-pharmaceutical interventions to contain COVID-19 in China. Nature (2020). https://doi.org/10.1038/s41586-020-2293-x 47 | -------------------------------------------------------------------------------- /run_model_2015.R: -------------------------------------------------------------------------------- 1 | ####Model running code for BEARmod v.0.6 2 | rm(list=ls()) 3 | library(data.table) # fread - fastly reading data 4 | library(lubridate) 5 | 6 | # setwd('//worldpop.files.soton.ac.uk/Worldpop/Projects/WP519091_Seasonality') 7 | # setwd('D:/OneDrive - University of Southampton/Wuhan Coronavirus R0/Spread risk') 8 | #setwd('C:/Users/sl4m18/OneDrive - University of Southampton/Wuhan Coronavirus R0/Spread risk') 9 | 10 | 11 | source("bearmod/BEARmod_development/bearmod_fx_dev.R") 12 | # source("bearmod/bearmod_fx.R") 13 | source("bearmod/BEARmod_development/preprocess_data_2015_dev.R") 14 | #Initial parameters 15 | NPat = length(patNames) 16 | patnInf = rep(0,NPat) 17 | patnExp = c(rep(0,NPat) ) 18 | 19 | 20 | 21 | #start infection in Wuhan 22 | patnInf[which(patNames == 42010000)] = 10 23 | 24 | 25 | # pop2014 or pop2015 26 | pat_locator = merge(pat_locator,pop_data[,c("SHP_CITY_CODE","TOTAL_POP2015")],by.x="patNames",by.y="SHP_CITY_CODE") 27 | names(pat_locator)[which(names(pat_locator) == "TOTAL_POP2015")] = "pop" 28 | 29 | #recovery rate variable 30 | recover_df = data.frame(date = seq(from=min(movement_data$date),to=max(movement_data$date),by="days"),recrate = recrate) 31 | recover_df$recrate[which(recover_df$date > "2015-2-18")] = 1/3 32 | 33 | 34 | #### Running the model #### 35 | 36 | 37 | 38 | HPop = InitiatePop(pat_locator,patnInf,patnExp) 39 | ###dates of simulation 40 | 41 | # input_dates = seq(date("2015-1-5"),date("2015-3-25"),by="days") 42 | # input_dates = seq(date("2015-1-2"),date("2015-3-3"),by="days") # from 2020-12-08 to 2 wks after LNY's day 43 | # input_dates = seq(date("2015-1-2"),date("2015-3-17"),by="days") # coresponding to the period from 2020-12-08 to 4 wks after LNY's day 44 | #day_list = seq(date("2013-12-02"),date("2014-2-13"),by="days") 45 | 46 | input_dates = seq(date("2014-12-26"),date("2015-2-1"),by="days") 47 | # input_dates = seq(date("2013-12-02"),date("2014-2-13"),by="days") # coresponding to the period from 2020-12-08 to 2 wks after LNY's day 48 | # input_dates = seq(date("2013-12-02"),date("2014-2-27"),by="days") # coresponding to the period from 2020-12-08 to 4 wks after LNY's day 49 | results = list() 50 | 51 | for (run in 1:500){ 52 | 53 | HPop_update = runSim(HPop,pat_locator,relative_move_data,movement_data, input_dates,recover_df, exposerate,exposepd,exposed_pop_inf_prop = .25, TSinday = 1) 54 | print(paste0("Run # ",run)) 55 | results[[run]] = HPop_update$all_spread 56 | } 57 | save(results,file="results.RData") 58 | # 59 | # ###### Master function #### 60 | # HPop_update = runSim(HPop,pat_locator,relative_move_data,movement_data, input_dates,recover_df, exposerate,exposepd) 61 | # 62 | # 63 | # ##Plotting the results 64 | # newHPop = HPop_update$HPop 65 | # epidemic_curve = HPop_update$epidemic_curve 66 | # all_spread = HPop_update$all_spread 67 | # epidemic_curve$Date_2020 <- seq(as.Date('2020-12-08'),(as.Date('2020-12-08') + (max(input_dates) - min(input_dates))), by='days') 68 | # epidemic_curve$Date_2020_report <- epidemic_curve$Date_2020 + exposepd + 4 # 4 days - diagnosis, report delay 69 | # epidemic_curve$inf_accu <- epidemic_curve$inf 70 | # for(i in 2:nrow(epidemic_curve)){ 71 | # epidemic_curve$inf_accu[i] <- epidemic_curve$inf_accu[i-1] + epidemic_curve$inf[i] 72 | # } 73 | # 74 | # plot(epidemic_curve$Date_2020,epidemic_curve$inf) 75 | # plot(epidemic_curve$Date_2020_report,epidemic_curve$inf) 76 | # plot(epidemic_curve$Date_2020_report,epidemic_curve$inf_accu) 77 | # 78 | # 79 | # # 80 | # # ####Many iterations 81 | # # output_df=matrix(0,0,340) 82 | # # for (run in 1:1000){ 83 | # # print(run) 84 | # # HPop_update = runSim(HPop,pat_locator,movement_data, input_dates, recrate, exposerate,exposepd,relative_movement = 1) 85 | # # 86 | # # 87 | # # newHPop = HPop_update$HPop 88 | # # epidemic_curve = HPop_update$epidemic_curve 89 | # # all_spread = HPop_update$all_spread 90 | # # output_df = rbind(output_df,t(all_spread[,dim(all_spread)[2]])) 91 | # # } 92 | # # 93 | # # output_df2 = output_df > 0 94 | # # colnames(output_df2) = newHPop$names 95 | # # 96 | # # 97 | # # chn_shp$prob_CNY = 0 98 | # # for (i in 1:dim(chn_shp)[1]){ 99 | # # if (length(which(colnames(output_df2) == chn_shp$ZONECODE[i]))>0){ 100 | # # chn_shp$prob_CNY[i] = colSums(output_df2)[which(colnames(output_df2) == chn_shp$ZONECODE[i])]/1000 101 | # # }} 102 | # # ggplot() + 103 | # # geom_sf(chn_shp, mapping = aes(fill = prob_CNY) ) + scale_fill_distiller(palette="YlOrRd",direction=1) 104 | # # ####Animation code 105 | # # chn_shp 106 | # # 107 | # # ####Animation code 108 | # # 109 | # # #Numbers of people infected per patch, with IDs 110 | # # outputdf = data.frame(ID = newHPop$names) 111 | # # output_data = cbind(outputdf,all_spread) 112 | # # library(reshape2) 113 | # # melt_data = melt(output_data,id.vars="ID") 114 | # # 115 | # # library(ggplot2) 116 | # # library(ggmap) 117 | # # library(rgeos) 118 | # # library("plyr") 119 | # # library("ggplot2") 120 | # # library("maptools") 121 | # # library(raster) 122 | # # library(igraph) 123 | # # library(rgdal) 124 | # # library(MASS) 125 | # # library(fossil) 126 | # # library(McSpatial) 127 | # # library(geosphere) 128 | # # library(ggrepel) 129 | # # library(hexbin) 130 | # # library(gganimate) 131 | # # library(viridis) 132 | # # library(sf) 133 | # # chn_shp = read_sf(dsn="baidu/ChinaShapefile2012",layer="dishi") 134 | # # library(ggplot2) 135 | # # library(gganimate) 136 | # # library(ggmap) 137 | # # library(maps) 138 | # # library(gapminder) 139 | # # 140 | # # 141 | # # library(animation) 142 | # # 143 | # # 144 | # # dev.control('enable') 145 | # # 146 | # # oopts = ani.options(interval = 0.3) 147 | # # 148 | # # 149 | # # ani.options(oopts) 150 | # # ani.record(reset = TRUE) 151 | # # i=1 152 | # # 153 | # # plots=list() 154 | # # melt_data$variable = as.numeric(melt_data$variable) 155 | # # for (i in 1:(as.numeric(date("2015-1-30")-date("2014-12-01")))){ 156 | # # melt_data2= subset(melt_data,as.numeric(variable) ==i) 157 | # # chn_shp2 = merge(chn_shp,melt_data2,by.x="ZONECODE",by.y="ID",all.x=T) 158 | # # chn_shp2$value= round(chn_shp2$value) 159 | # # plots[[i]] = ggplot() + 160 | # # geom_sf(chn_shp2,mapping=aes(),fill="light grey") + 161 | # # geom_sf(subset(chn_shp2,value>0), mapping = aes(fill = value) ) + 162 | # # scale_fill_distiller(palette="YlOrRd",direction=1,limits = c(.1,max(melt_data$value)),trans="log10",name="# inf")+ 163 | # # ggtitle(i) 164 | # # #scale_fill_viridis(direction=-1,option="A",trans="log10")+ 165 | # # theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), 166 | # # panel.background = element_blank(), axis.line = element_line(colour = "white")) 167 | # # print(i) 168 | # # 169 | # # } 170 | # # trace.animate <- function(plotter) { for (i in 1:length(plotter)) { 171 | # # print(plotter[[i]]) 172 | # # }} 173 | # # saveGIF(trace.animate(plots),interval=.3,movie.name="cov_model.gif",ani.width=800,ani.height=800) 174 | -------------------------------------------------------------------------------- /bearmod_fx.R: -------------------------------------------------------------------------------- 1 | ##### BEARmod v.0.92 2 | # 3 | # Basic Epidemic, Activity, and Response simulation model 4 | # 5 | # 6 | # v0.92 7 | # #fixed rounding error bugs throughout 8 | # 9 | # 10 | # v0.91 11 | # - fixed issue with movement that could lead to more people moving into a patch than there are total. for now, this was fixed by making "room" for infected people by removing recovered people. 12 | # 13 | # 14 | # v0.9 15 | # - added functionality for time spent version of movement matrix (for now implemented in a new function, runSim_timespent()) 16 | # - fixed bug in how model tracks recovered people 17 | # 18 | # v0.81 19 | # - Major speed increase in infection 20 | # 21 | # v0.8 22 | # - added capacity for multiple timesteps per day. TSinday defaults to 1, but defines the number of timesteps per day in the model (must be integer) 23 | # - added capacity for a probability of moving per timestep (fitting to Vodafone data). Not accounted for if prob_move_per_TS = 0 24 | # 25 | # v0.7 updates: 26 | # - Add percentage of exposed people who are infectious 27 | # 28 | # v0.65 updates: 29 | # - Cleaned up inputs into model for easier pre-processing 30 | # - Added capacity for time-dependent contact rates 31 | # 32 | # v0.6 updates: 33 | # - fixed bug when recovery rate data are missing a patch for a day 34 | # - patched bug that could lead to negative nInf values (however, the actual solution needs revisiting!) 35 | # 36 | #v0.5 updates: 37 | # - Added functionality to input relative movement table 38 | # - added functionality for time-variable recovery rates 39 | # 40 | # v.0.4 Updates: 41 | # - Model calibrated using HKU studies. 42 | # - Fixed transmission term to Poisson distribution 43 | # - Added "date" versatility to use non-contiguous dates 44 | # 45 | # This model runs a basic SEIR model, with stochastic exposure, incubation, recovery, and movement 46 | # Disease spread occurs each day, based on the movement patterns from specific days from mobile phone-derived data. 47 | # 48 | # 49 | # 50 | # See run_model.R for working example. 51 | # 52 | # TO DO: 53 | # - Add in infectious period for part of the exposed period (new category: exposed and infectious) 54 | # 55 | # 56 | # 57 | ###### 58 | 59 | library(lubridate) 60 | 61 | #This function creates the starting population 62 | InitiatePop = function(pat_locator,initialInf,initialExp){ 63 | NPat = dim(pat_locator)[1] 64 | list( 65 | nInitialInf = initialInf, 66 | nInitialExp = initialExp, 67 | nInf = initialInf, 68 | nExp = initialExp, 69 | nRec = rep(0,NPat), 70 | nTotal = pat_locator$pop, 71 | names = pat_locator$patNames, 72 | IDs = pat_locator$patIDs, 73 | relativeInf = rep(1,NPat), 74 | nRecoveredToday = rep(0,NPat), 75 | nInfectedToday = rep(0,NPat), 76 | nExposedToday = rep(0,NPat), 77 | nInfMovedToday = rep(0,NPat), 78 | controlled = rep(0,NPat) 79 | ) 80 | } 81 | 82 | ##### Epidemic functions: exposure, infectivity, recovery #### 83 | recoveryTimeStep = function(HPop, recrate_values,current_day){ 84 | recrate = subset(recrate_values,date == current_day)$recrate 85 | HPop$nInf = round(HPop$nInf) 86 | #print(recrate)#print(paste0("Day ",current_day, " recovery rate: ", recrate)) 87 | for (i in 1:length(HPop$nInf)){ 88 | HPop$nRecoveredToday[i]= sum(rbinom(HPop$nInf[i],1,recrate)) 89 | 90 | 91 | HPop$nInf[i] = HPop$nInf[i] - HPop$nRecoveredToday[i] 92 | HPop$nRec[i] = HPop$nRec[i] + HPop$nRecoveredToday[i] 93 | 94 | } 95 | #print(paste0("Number of people recovering: ",sum(HPop$nRecoveredToday))) 96 | HPop 97 | } 98 | 99 | exposedtoinfTimeStep = function(HPop, exp_to_infrate){ 100 | #(exp_to_infrate) 101 | HPop$nExp = round(HPop$nExp) 102 | for (i in 1:length(HPop$nInf)){ 103 | #print(HPop$nExposedToday[i]) 104 | HPop$nInfectedToday[i]= sum(rbinom(HPop$nExp[i],1,exp_to_infrate)) 105 | #if (HPop$nInf[i] + HPop$nInfectedToday[i] < HPop$nTotal[i] - HPop$nExp[i] - HPop$nRec[i] ) { 106 | HPop$nInf[i] = HPop$nInf[i] + HPop$nInfectedToday[i] 107 | 108 | # } else { 109 | # HPop$nInfectedToday[i] = max(0,HPop$nTotal[i] - HPop$nInf[i] - HPop$nExp[i]- HPop$nRec[i]) 110 | 111 | # HPop$nInf[i] = max(0,HPop$nTotal[i] - HPop$nExp[i] - HPop$nRec[i]) 112 | 113 | # } 114 | HPop$nExp[i] = HPop$nExp[i] - HPop$nInfectedToday[i] 115 | 116 | } 117 | #print(paste0("Number of people newly infectious: ",sum(HPop$nInfectedToday))) 118 | HPop 119 | } 120 | 121 | exposedTimeStep = function(HPop, exposerate_df, current_day, exposed_pop_inf_prop){ 122 | 123 | if (is.numeric(exposerate_df)){ 124 | exposerate = exposerate_df 125 | } 126 | if (is.data.frame(exposerate_df)){ 127 | exposerate = subset(exposerate_df, date == current_day)$exposerate 128 | } 129 | for (i in 1:length(HPop$nInf)){ 130 | infectious_pop = HPop$nInf[i] + exposed_pop_inf_prop * HPop$nExp[i] 131 | infectious_pop = round(infectious_pop) 132 | #HPop$nExposedToday[i]= sum(rbinom(infectious_pop,1,exposerate)) * (1 - ( (HPop$nInf[i] + HPop$nExp[i]) / HPop$nTotal[i])) 133 | HPop$nExposedToday[i]= sum(rpois(infectious_pop,exposerate)) * (1 - min(1, ( (HPop$nInf[i] + HPop$nExp[i] + HPop$nRec[i]) / HPop$nTotal[i]) )) 134 | 135 | 136 | if (HPop$nExp[i] + HPop$nExposedToday[i] < HPop$nTotal[i] - HPop$nInf[i] - HPop$nRec[i] ) { 137 | HPop$nExp[i] = HPop$nExp[i] + HPop$nExposedToday[i] 138 | 139 | } else { 140 | HPop$nExposedToday[i] = max(0,HPop$nTotal[i] - HPop$nInf[i] - HPop$nExp[i]- HPop$nRec[i]) 141 | 142 | HPop$nExp[i] = max(0,HPop$nTotal[i] - HPop$nInf[i] - HPop$nRec[i]) 143 | 144 | } 145 | } 146 | #print(paste0("Number of people newly exposed: ",sum(HPop$nExposedToday))) 147 | HPop 148 | } 149 | 150 | 151 | exposedTimeStep_timespent = function(HPop, exposerate_df, current_day, exposed_pop_inf_prop,ts_data){ 152 | 153 | TS_matrix = matrix(0,NPat,NPat,dimnames=list(patIDs,patIDs)) 154 | daily_move = subset(ts_data,date == current_day) 155 | daily_move = subset(daily_move,!is.na(fr_pat) & !is.na(to_pat) & !is.na(fr_users) & !is.na(movers)) 156 | 157 | daily_move_mat = daily_move[,is.element(names(daily_move),c("fr_pat","to_pat","fr_users","movers"))] 158 | daily_move_mat = as.matrix(daily_move_mat) 159 | col1 = which(colnames(daily_move_mat) == "fr_pat") 160 | col2=which(colnames(daily_move_mat) == "to_pat") 161 | 162 | colmove = which(colnames(daily_move_mat) == "movers") 163 | colusers=which(colnames(daily_move_mat) == "fr_users") 164 | TS_matrix[daily_move_mat[,c(col1,col2)]] = daily_move_mat[,colmove]/daily_move_mat[,colusers] 165 | if (length(which(rowSums(TS_matrix)>1)) > 0){ 166 | print("Warning: row sums > 1 in movement matrix. Correcting...") 167 | correctingrows = which(rowSums(TS_matrix)>1) 168 | for (i in correctingrows){ 169 | TS_matrix[i,] = TS_matrix[i,] /sum(TS_matrix[i,] ) 170 | } 171 | } 172 | for (i in 1:length(patIDs)){ 173 | TS_matrix[i,i] = 1 - sum(TS_matrix[i,-i]) 174 | } 175 | 176 | if (is.numeric(exposerate_df)){ 177 | exposerate = exposerate_df 178 | } 179 | if (is.data.frame(exposerate_df)){ 180 | exposerate = subset(exposerate_df, date == current_day)$exposerate 181 | } 182 | movement_adjusted_infectious_prop = rep(0,length(HPop$nInf)) 183 | for (i in 1:length(HPop$nInf)){ 184 | movement_adjusted_infectious_prop[i] = sum(((HPop$nInf * TS_matrix[,i]) + exposed_pop_inf_prop * sum(( HPop$nExp * TS_matrix[,i])))) / sum(HPop$nTotal * TS_matrix[,i]) 185 | } 186 | susceptible_vec = HPop$nTotal - HPop$nInf - HPop$nExp - HPop$nRec 187 | 188 | probability_infection = 1-exp(-exposerate * movement_adjusted_infectious_prop) 189 | for (i in 1:length(HPop$nInf)){ 190 | susceptible_weighted_pop = round(susceptible_vec[i]*TS_matrix[i,]) 191 | HPop$nExposedToday[i] = sum(rbinom(length(susceptible_weighted_pop),size = susceptible_weighted_pop,prob=probability_infection)) 192 | if (HPop$nExp[i] + HPop$nExposedToday[i] < HPop$nTotal[i] - HPop$nInf[i] - HPop$nRec[i] ) { 193 | HPop$nExp[i] = HPop$nExp[i] + HPop$nExposedToday[i] 194 | 195 | } else { 196 | if (HPop$nExp[i]< 0){print(HPop$nExp[i])} 197 | 198 | HPop$nExposedToday[i] = HPop$nTotal[i] - HPop$nInf[i] - HPop$nExp[i]- HPop$nRec[i] 199 | HPop$nExp[i] = HPop$nTotal[i] - HPop$nInf[i] - HPop$nRec[i] 200 | if (HPop$nExp[i]< 0){print(HPop$nExp[i])} 201 | } 202 | } 203 | 204 | #print(paste0("Number of people newly exposed: ",sum(HPop$nExposedToday))) 205 | HPop 206 | } 207 | 208 | 209 | 210 | ####### Activity functions: Human movement #### 211 | movementTimeStep = function(HPop, mobmat,day,control_df,prob_move_per_TS){ 212 | movement_matrix = matrix(0,NPat,NPat,dimnames=list(patIDs,patIDs)) 213 | daily_move = subset(mobmat,date == day) 214 | 215 | daily_move = subset(daily_move,!is.na(fr_pat) & !is.na(to_pat) & !is.na(fr_users) & !is.na(movers)) 216 | 217 | daily_move_mat = daily_move[,is.element(names(daily_move),c("fr_pat","to_pat","fr_users","movers"))] 218 | daily_move_mat = as.matrix(daily_move_mat) 219 | col1 = which(colnames(daily_move_mat) == "fr_pat") 220 | col2=which(colnames(daily_move_mat) == "to_pat") 221 | 222 | colmove = which(colnames(daily_move_mat) == "movers") 223 | colusers=which(colnames(daily_move_mat) == "fr_users") 224 | movement_matrix[daily_move_mat[,c(col1,col2)]] = daily_move_mat[,colmove]/daily_move_mat[,colusers] 225 | if (length(which(rowSums(movement_matrix)>1)) > 0){ 226 | print("Warning: row sums > 1 in movement matrix. Correcting...") 227 | correctingrows = which(rowSums(movement_matrix)>1) 228 | for (i in correctingrows){ 229 | movement_matrix[i,] = movement_matrix[i,] /sum(movement_matrix[i,] ) 230 | } 231 | } 232 | if (prob_move_per_TS > 0){ 233 | movement_matrix = movement_matrix*prob_move_per_TS 234 | } 235 | for (i in 1:length(patIDs)){ 236 | movement_matrix[i,i] = 1 - sum(movement_matrix[i,-i]) 237 | } 238 | 239 | HPop$controlled = rep(0,length(HPop$names)) 240 | if (length(which(control_df$date == day)) > 0){ 241 | control_df_sub = subset(control_df,date == day) 242 | if (dim(control_df_sub)[1] > 0){ 243 | for (i in 1:dim(control_df_sub)[1]){ 244 | HPop$controlled[which(HPop$names == control_df_sub$from[i])] = control_df_sub$relative_move[i] 245 | 246 | } 247 | } 248 | } 249 | if (sum(HPop$controlled)>0){ 250 | movement_matrix = stopMovement(HPop,movement_matrix,day) 251 | } 252 | #deterministic version 253 | #HPop$nInfMovedToday = colSums(diag(HPop$nInf) %*% movement_matrix) - HPop$nInf 254 | #HPop$nInf = colSums(diag(HPop$nInf) %*% movement_matrix) 255 | HPop$nInf = round(HPop$nInf) 256 | # stochastic version 257 | z <- rbinom(n=NPat^2,size = rep(HPop$nInf,each=NPat),prob = t(movement_matrix)[]) 258 | moved_matrix = t(matrix(z,NPat,NPat,dimnames=list(patIDs,patIDs))) 259 | for (i in 1:dim(moved_matrix)[1]){ 260 | if (sum(moved_matrix[i,]) > 0){ 261 | moved_matrix[i,] = moved_matrix[i,]/sum(moved_matrix[i,]) * HPop$nInf[i] 262 | } 263 | } 264 | #print(sum(moved_matrix)) 265 | #print(sum(HPop$nInf)) 266 | diag(moved_matrix)=0 267 | HPop$nInfMovedToday = colSums(moved_matrix) 268 | 269 | HPop$nInf = HPop$nInf - rowSums(moved_matrix) + colSums(moved_matrix) 270 | #print(max((HPop$nInf + HPop$nRec + HPop$nExp)/HPop$nTotal)) 271 | #quick fix 272 | for (i in 1:length(HPop$nInf)){ 273 | if (HPop$nInf[i] > HPop$nTotal[i] - HPop$nExp[i] - HPop$nRec[i]){ 274 | HPop$nRec[i] = max(0, HPop$nTotal[i] - HPop$nExp[i]- HPop$nInf[i]) 275 | 276 | HPop$nInf[i] = HPop$nTotal[i] - HPop$nExp[i] - HPop$nRec[i] 277 | } 278 | if (HPop$nInf[i] <0 ){ 279 | 280 | HPop$nInf[i] = 0 281 | } 282 | } 283 | 284 | #(max((HPop$nInf + HPop$nRec + HPop$nExp)/HPop$nTotal)) 285 | #print(paste0("Number of infected people moving: ",sum(abs(HPop$nInfMovedToday))/2)) 286 | HPop 287 | } 288 | 289 | 290 | ###### Response functions: Control 291 | #relative_movement is the proportion of original movement out/in that we want to keep -- ie. .1 = 10% of original movement rate 292 | stopMovement = function(HPop,mobmat,current_date){ 293 | stopping = which(HPop$controlled > 0) 294 | if (length(stopping) > 0){ 295 | # print(paste("stopping movement in patches", HPop$names[stopping])) 296 | for (ctrl_pat in stopping){ 297 | control_patches = HPop$IDs[ctrl_pat] 298 | mobmat[control_patches,] = mobmat[control_patches,] * HPop$controlled[ctrl_pat] 299 | mobmat[,control_patches] = mobmat[,control_patches] * HPop$controlled[ctrl_pat] 300 | for (i in 1:length(HPop$IDs)){ 301 | mobmat[i,i] = 1 - sum(mobmat[i,-i]) 302 | } 303 | } 304 | } 305 | mobmat 306 | } 307 | 308 | 309 | 310 | 311 | ###### Master function #### 312 | runSim = function(HPop,pat_info,control_info,mobmat,day_list,recrate_values,exposerate_df,exposepd,exposed_pop_inf_prop = 0,TSinday = 1,prob_move_per_TS=0) { 313 | 314 | 315 | epidemic_curve <- data.frame(Date=as.Date(character()), 316 | inf=c(), 317 | stringsAsFactors=FALSE) 318 | 319 | if (TSinday > 1){ 320 | #recrate_values$recrate = 1-(1-recrate_values$recrate)^(1/TSinday) 321 | exposetoinfrate = 1/exposepd 322 | exposepd = 1/(1 - exp(log(1-exposetoinfrate) / TSinday)) 323 | #recrate_values$recrate = 1 - ((1 - recrate_values$recrate) ^ (1/TSinday)) 324 | recrate_values$recrate = 1 - exp(log(1-recrate_values$recrate) / TSinday) 325 | if (is.numeric(exposerate_df)){ 326 | # exposerate_df = 1-(1-exposerate_df)^(1/TSinday) 327 | exposerate_df = exposerate_df/TSinday 328 | # recrate_values$recrate = 1 - ((1 - recrate_values$recrate) ^ (1/TSinday)) 329 | } 330 | if (is.data.frame(exposerate_df)){ 331 | # exposerate_df$exposerate = 1-(1-exposerate_df$exposerate)^(1/TSinday) 332 | exposerate_df$exposerate = exposerate_df$exposerate/TSinday 333 | } 334 | } 335 | 336 | all_spread = matrix(0,length(day_list),length(HPop$nInf)) 337 | all_spread_today = matrix(0,length(day_list),length(HPop$nInf)) 338 | colnames(all_spread) = HPop$names 339 | #print(all_dates) 340 | for (current_day in 1:length(day_list)){ 341 | for (current_TS in 1:TSinday){ 342 | print(day_list[current_day]) 343 | 344 | HPop = recoveryTimeStep(HPop,recrate_values,day_list[current_day]) 345 | HPop = exposedtoinfTimeStep(HPop,1/exposepd) 346 | 347 | 348 | HPop = exposedTimeStep(HPop,exposerate_df, day_list[current_day], exposed_pop_inf_prop) 349 | 350 | HPop = movementTimeStep(HPop,mobmat,day_list[current_day],control_info,prob_move_per_TS) 351 | 352 | print(paste("inf: ",sum(HPop$nInf)," exp:",sum(HPop$nExp), "rec: ",sum(HPop$nRec))) 353 | 354 | } 355 | #save(HPop,file=paste(current_day,".RData")) 356 | epidemic_curve = rbind(epidemic_curve,data.frame(Date = day_list[current_day], inf = sum(HPop$nInf))) 357 | all_spread[current_day,] = HPop$nInf 358 | all_spread_today[current_day,] = HPop$nInfectedToday 359 | 360 | } 361 | all_spread_2 = data.frame(dates = day_list,runday = 1:length(day_list)) 362 | all_spread_2= cbind(all_spread_2,all_spread) 363 | all_spread_today_2 = data.frame(dates = day_list,runday = 1:length(day_list)) 364 | all_spread_today_2= cbind(all_spread_today_2,all_spread_today) 365 | list(HPop = HPop,epidemic_curve = epidemic_curve,all_spread=all_spread_2,all_spread_today = all_spread_today_2) 366 | } 367 | 368 | 369 | runSim_timespent = function(HPop,pat_info,control_info,TS_data,day_list,recrate_values,exposerate_df,exposepd,exposed_pop_inf_prop = 0,TSinday = 1) { 370 | 371 | 372 | epidemic_curve <- data.frame(Date=as.Date(character()), 373 | inf=c(), 374 | stringsAsFactors=FALSE) 375 | 376 | if (TSinday > 1){ 377 | #recrate_values$recrate = 1-(1-recrate_values$recrate)^(1/TSinday) 378 | exposetoinfrate = 1/exposepd 379 | exposepd = 1/(1 - exp(log(1-exposetoinfrate) / TSinday)) 380 | #recrate_values$recrate = 1 - ((1 - recrate_values$recrate) ^ (1/TSinday)) 381 | recrate_values$recrate = 1 - exp(log(1-recrate_values$recrate) / TSinday) 382 | if (is.numeric(exposerate_df)){ 383 | # exposerate_df = 1-(1-exposerate_df)^(1/TSinday) 384 | exposerate_df = exposerate_df/TSinday 385 | # recrate_values$recrate = 1 - ((1 - recrate_values$recrate) ^ (1/TSinday)) 386 | } 387 | if (is.data.frame(exposerate_df)){ 388 | # exposerate_df$exposerate = 1-(1-exposerate_df$exposerate)^(1/TSinday) 389 | exposerate_df$exposerate = exposerate_df$exposerate/TSinday 390 | } 391 | } 392 | 393 | all_spread = matrix(0,length(day_list),length(HPop$nInf)) 394 | colnames(all_spread) = HPop$names 395 | #print(all_dates) 396 | for (current_day in 1:length(day_list)){ 397 | for (current_TS in 1:TSinday){ 398 | print(day_list[current_day]) 399 | HPop = recoveryTimeStep(HPop,recrate_values,day_list[current_day]) 400 | HPop = exposedtoinfTimeStep(HPop,1/exposepd) 401 | HPop = exposedTimeStep_timespent(HPop,exposerate_df, day_list[current_day], exposed_pop_inf_prop,TS_data) 402 | } 403 | #save(HPop,file=paste(current_day,".RData")) 404 | epidemic_curve = rbind(epidemic_curve,data.frame(Date = day_list[current_day], inf = sum(HPop$nInf))) 405 | all_spread[current_day,] = HPop$nInf 406 | 407 | } 408 | all_spread_2 = data.frame(dates = day_list,runday = 1:length(day_list)) 409 | all_spread_2= cbind(all_spread_2,all_spread) 410 | list(HPop = HPop,epidemic_curve = epidemic_curve,all_spread=all_spread_2) 411 | } 412 | 413 | 414 | --------------------------------------------------------------------------------