├── .gitignore ├── attrdl.pdf ├── 2015_gasparrini_Lancet_Rcodedata.Rproj ├── README.md ├── 04.tables.R ├── 01.firststage.R ├── 00.prepdata.R ├── 05.plots.R ├── 02.secondstage.R ├── 03.attr.R └── attrdl.R /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /attrdl.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gasparrini/2015_gasparrini_Lancet_Rcodedata/HEAD/attrdl.pdf -------------------------------------------------------------------------------- /2015_gasparrini_Lancet_Rcodedata.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: 03136a07-ea9f-4450-ac05-db1a3f164cc6 3 | 4 | RestoreWorkspace: Default 5 | SaveWorkspace: Default 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Multi-city analysis of temperature-related excess mortality 2 | 3 | ------------------------------------------------------------------------ 4 | 5 | An application of two-stage design for a multi-city analysis in environmental epidemiology. The application partly reproduces the results of an analysis of the excess risk attributable to non-optimal outdoor temperature in a multi-country dataset, published in: 6 | 7 | Gasparrini A, Guo Y, Hashizume M, Lavigne E, Zanobetti A, Schwartz J, Tobias A, Tong S, Rocklöv J, Forsberg B, Leone M, De Sario M, Bell ML, Guo YLL, Wu CF, Kan H, Yi SM, de Sousa Zanotti Stagliorio Coelho M, Saldiva PH, Honda Y, Kim H, Armstrong B. Mortality risk attributable to high and low ambient temperature: a multicountry observational study. *The Lancet*. 2015;**386**(9991):369-375. DOI: 10.1016/S0140-6736(14)62114-0. PMID: 26003380. [[freely available here](http://www.ag-myresearch.com/2015_gasparrini_lancet.html)] 8 | 9 | ------------------------------------------------------------------------ 10 | 11 | The code: 12 | 13 | - *regEngWales* stores the daily time series data from 10 locations corresponding to regions of England and Wales in the period 1993--2006 14 | - *attrdl.R* creates the function for computing the attributable risk measures 15 | - *attrdl.pdf* is the help page for the function `attrdl()` 16 | - the numbered files from *00.prepdata.R* to *05.plots.R*, reproduce the results of an example with the subset of data 17 | 18 | Download as a ZIP file using the green button *Clone or download* above 19 | -------------------------------------------------------------------------------- /04.tables.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Updated version of the code for the analysis in: 3 | # 4 | # "Mortality risk attributable to high and low ambient temperature: 5 | # a multi-country study" 6 | # Antonio Gasparrini and collaborators 7 | # The Lancet - 2015 8 | # http://www.ag-myresearch.com/2015_gasparrini_lancet.html 9 | # 10 | # Update: 15 January 2017 11 | # * an updated version of this code, compatible with future versions of the 12 | # software, is available at: 13 | # https://github.com/gasparrini/2015_gasparrini_Lancet_Rcodedata 14 | ################################################################################ 15 | 16 | ################################################################################ 17 | # TABLES 18 | ################################################################################ 19 | 20 | ################################################################################ 21 | # RELATED PART OF TABLE 1 22 | 23 | tmeanuk <- sapply(dlist,function(city) mean(city$tmean,na.rm=T)) 24 | c(Country="UK", 25 | Period=paste(range(dlist[[1]]$year),collapse="-"),Deaths=totdeathtot, 26 | Temperature=paste0(formatC(mean(tmeanuk),dig=1, 27 | format="f")," (",paste(formatC(range(tmeanuk),dig=1,format="f"), 28 | collapse="-"),")")) 29 | 30 | ################################################################################ 31 | # RELATED PART OF TABLE 2 32 | 33 | # MMP 34 | minperccountry 35 | 36 | # ATTRIBUTABLE FRACTION 37 | t(cbind(aftot,aftotlow,aftothigh)) 38 | 39 | ################################################################################ 40 | # RELATED PART OF TABLE S4 41 | 42 | # DEATHS 43 | totdeath 44 | 45 | # MINIMUM MORTALITY TEMPERATURE PERCENTILE AND ABSOLUTE TEMPERATURE 46 | minperccity 47 | mintempcity 48 | 49 | # ATTRIBUTABLE FRACTION 50 | afcity 51 | 52 | # 53 | -------------------------------------------------------------------------------- /01.firststage.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Updated version of the code for the analysis in: 3 | # 4 | # "Mortality risk attributable to high and low ambient temperature: 5 | # a multi-country study" 6 | # Antonio Gasparrini and collaborators 7 | # The Lancet - 2015 8 | # http://www.ag-myresearch.com/2015_gasparrini_lancet.html 9 | # 10 | # Update: 15 January 2017 11 | # * an updated version of this code, compatible with future versions of the 12 | # software, is available at: 13 | # https://github.com/gasparrini/2015_gasparrini_Lancet_Rcodedata 14 | ################################################################################ 15 | 16 | ################################################################################ 17 | # FIRST-STAGE ANALYSIS: RUN THE MODEL IN EACH CITY, REDUCE AND SAVE 18 | ################################################################################ 19 | 20 | ################################################################################ 21 | # CREATE THE OBJECTS TO STORE THE RESULTS 22 | 23 | # COEFFICIENTS AND VCOV FOR OVERALL CUMULATIVE SUMMARY 24 | coef <- matrix(NA,nrow(cities),length(varper)+vardegree, 25 | dimnames=list(cities$city)) 26 | vcov <- vector("list",nrow(cities)) 27 | names(vcov) <- cities$city 28 | 29 | ################################################################################ 30 | # RUN THE LOOP 31 | 32 | # LOOP 33 | time <- proc.time()[3] 34 | for(i in seq(length(dlist))) { 35 | 36 | # PRINT 37 | cat(i,"") 38 | 39 | # EXTRACT THE DATA 40 | data <- dlist[[i]] 41 | 42 | # DEFINE THE CROSSBASIS 43 | argvar <- list(fun=varfun,knots=quantile(data$tmean,varper/100,na.rm=T), 44 | degree=vardegree) 45 | cb <- crossbasis(data$tmean,lag=lag,argvar=argvar, 46 | arglag=list(knots=logknots(lag,lagnk))) 47 | #summary(cb) 48 | 49 | # RUN THE MODEL AND OBTAIN PREDICTIONS 50 | # NB: NO CENTERING NEEDED HERE, AS THIS DOES NOT AFFECT COEF-VCOV 51 | model <- glm(formula,data,family=quasipoisson,na.action="na.exclude") 52 | cen <- mean(data$tmean,na.rm=T) 53 | pred <- crosspred(cb,model,cen=cen) 54 | 55 | # REDUCTION TO OVERALL CUMULATIVE 56 | red <- crossreduce(cb,model,cen=cen) 57 | coef[i,] <- coef(red) 58 | vcov[[i]] <- vcov(red) 59 | 60 | } 61 | proc.time()[3]-time 62 | 63 | # 64 | -------------------------------------------------------------------------------- /00.prepdata.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Updated version of the code for the analysis in: 3 | # 4 | # "Mortality risk attributable to high and low ambient temperature: 5 | # a multi-country study" 6 | # Antonio Gasparrini and collaborators 7 | # The Lancet - 2015 8 | # http://www.ag-myresearch.com/2015_gasparrini_lancet.html 9 | # 10 | # Update: 15 January 2017 11 | # * an updated version of this code, compatible with future versions of the 12 | # software, is available at: 13 | # https://github.com/gasparrini/2015_gasparrini_Lancet_Rcodedata 14 | ################################################################################ 15 | 16 | ################################################################################ 17 | # PREPARE THE DATA 18 | ################################################################################ 19 | 20 | # LOAD THE PACKAGES 21 | library(dlnm) ; library(mvmeta) ; library(splines) ; library(tsModel) 22 | 23 | # CHECK VERSION OF THE PACKAGE 24 | if(packageVersion("dlnm")<"2.2.0") 25 | stop("update dlnm package to version >= 2.2.0") 26 | 27 | # LOAD THE DATASET (INCLUDING THE 10 UK REGIONS ONLY) 28 | regEngWales <- read.csv("regEngWales.csv",row.names=1) 29 | regEngWales$date <- as.Date(regEngWales$date) 30 | 31 | # ARRANGE THE DATA AS A LIST OF DATA SETS 32 | regions <- as.character(unique(regEngWales$regnames)) 33 | dlist <- lapply(regions,function(x) regEngWales[regEngWales$regnames==x,]) 34 | names(dlist) <- regions 35 | 36 | # METADATA FOR LOCATIONS 37 | cities <- data.frame( 38 | city = regions, 39 | cityname = c("North East","North West","Yorkshire & Humber","East Midlands", 40 | "West Midlands","East","London","South East","South West","Wales") 41 | ) 42 | 43 | # ORDER 44 | ord <- order(cities$cityname) 45 | dlist <- dlist[ord] 46 | cities <- cities[ord,] 47 | 48 | # REMOVE ORIGINALS 49 | rm(regEngWales,regions,ord) 50 | 51 | ################################################################################ 52 | 53 | # SPECIFICATION OF THE EXPOSURE FUNCTION 54 | varfun = "bs" 55 | vardegree = 2 56 | varper <- c(10,75,90) 57 | 58 | # SPECIFICATION OF THE LAG FUNCTION 59 | lag <- 21 60 | lagnk <- 3 61 | 62 | # DEGREE OF FREEDOM FOR SEASONALITY 63 | dfseas <- 8 64 | 65 | # COMPUTE PERCENTILES 66 | per <- t(sapply(dlist,function(x) 67 | quantile(x$tmean,c(2.5,10,25,50,75,90,97.5)/100,na.rm=T))) 68 | 69 | # MODEL FORMULA 70 | formula <- death~cb+dow+ns(date,df=dfseas*length(unique(year))) 71 | 72 | # 73 | -------------------------------------------------------------------------------- /05.plots.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Updated version of the code for the analysis in: 3 | # 4 | # "Mortality risk attributable to high and low ambient temperature: 5 | # a multi-country study" 6 | # Antonio Gasparrini and collaborators 7 | # The Lancet - 2015 8 | # http://www.ag-myresearch.com/2015_gasparrini_lancet.html 9 | # 10 | # Update: 15 January 2017 11 | # * an updated version of this code, compatible with future versions of the 12 | # software, is available at: 13 | # https://github.com/gasparrini/2015_gasparrini_Lancet_Rcodedata 14 | ################################################################################ 15 | 16 | ################################################################################ 17 | # PLOTS 18 | ################################################################################ 19 | 20 | ################################################################################ 21 | # SIMILAR TO FIGURE 1 22 | 23 | xlab <- expression(paste("Temperature (",degree,"C)")) 24 | 25 | pdf("figure1.pdf",width=8,height=9) 26 | layout(matrix(c(0,1,1,2,2,0,rep(3:8,each=2),0,9,9,10,10,0),ncol=6,byrow=T)) 27 | par(mar=c(4,3.8,3,2.4),mgp=c(2.5,1,0),las=1) 28 | 29 | for(i in seq(length(dlist))) { 30 | data <- dlist[[i]] 31 | # NB: CENTERING POINT DIFFERENT THAN ORIGINAL CHOICE OF 75TH 32 | argvar <- list(x=data$tmean,fun=varfun,degree=vardegree, 33 | knots=quantile(data$tmean,varper/100,na.rm=T)) 34 | bvar <- do.call(onebasis,argvar) 35 | pred <- crosspred(bvar,coef=blup[[i]]$blup,vcov=blup[[i]]$vcov, 36 | model.link="log",by=0.1,cen=mintempcity[i]) 37 | plot(pred,type="n",ylim=c(0,2.5),yaxt="n",lab=c(6,5,7),xlab=xlab,ylab="RR", 38 | main=cities$cityname[i]) 39 | ind1 <- pred$predvar<=mintempcity[i] 40 | ind2 <- pred$predvar>=mintempcity[i] 41 | lines(pred$predvar[ind1],pred$allRRfit[ind1],col=4,lwd=1.5) 42 | lines(pred$predvar[ind2],pred$allRRfit[ind2],col=2,lwd=1.5) 43 | mtext(cities$countryname[i],cex=0.7,line=0) 44 | #axis(1,at=-8:8*5) 45 | axis(2,at=1:5*0.5) 46 | breaks <- c(min(data$tmean,na.rm=T)-1,seq(pred$predvar[1], 47 | pred$predvar[length(pred$predvar)],length=30),max(data$tmean,na.rm=T)+1) 48 | hist <- hist(data$tmean,breaks=breaks,plot=F) 49 | hist$density <- hist$density/max(hist$density)*0.7 50 | prop <- max(hist$density)/max(hist$counts) 51 | counts <- pretty(hist$count,3) 52 | plot(hist,ylim=c(0,max(hist$density)*3.5),axes=F,ann=F,col=grey(0.95), 53 | breaks=breaks,freq=F,add=T) 54 | axis(4,at=counts*prop,labels=counts,cex.axis=0.7) 55 | #mtext("N",4,line=-0.5,at=mean(counts*prop),cex=0.5) 56 | abline(v=mintempcity[i],lty=3) 57 | abline(v=c(per[i,c("2.5%","97.5%")]),lty=2) 58 | } 59 | 60 | dev.off() 61 | 62 | # 63 | -------------------------------------------------------------------------------- /02.secondstage.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Updated version of the code for the analysis in: 3 | # 4 | # "Mortality risk attributable to high and low ambient temperature: 5 | # a multi-country study" 6 | # Antonio Gasparrini and collaborators 7 | # The Lancet - 2015 8 | # http://www.ag-myresearch.com/2015_gasparrini_lancet.html 9 | # 10 | # Update: 15 January 2017 11 | # * an updated version of this code, compatible with future versions of the 12 | # software, is available at: 13 | # https://github.com/gasparrini/2015_gasparrini_Lancet_Rcodedata 14 | ################################################################################ 15 | 16 | ################################################################################ 17 | # MULTIVARIATE META-ANALYSIS OF THE REDUCED COEF AND THEN COMPUTATION OF BLUP 18 | ################################################################################ 19 | 20 | # CREATE AVERAGE TEMPERATURE AND RANGE AS META-PREDICTORS 21 | avgtmean <- sapply(dlist,function(x) mean(x$tmean,na.rm=T)) 22 | rangetmean <- sapply(dlist,function(x) diff(range(x$tmean,na.rm=T))) 23 | 24 | ################################################################################ 25 | # META-ANALYSIS 26 | # NB: COUNTRY EFFECT IS NOT INCLUDED IN THIS EXAMPLE 27 | 28 | mv <- mvmeta(coef~avgtmean+rangetmean,vcov,data=cities,control=list(showiter=T)) 29 | summary(mv) 30 | # NB: IN THIS EXAMPLE THE MV-META MODEL IS CLEARLY OVERPARAMETERIZED 31 | 32 | ################################################################################ 33 | 34 | # FUNCTION FOR COMPUTING THE P-VALUE OF A WALD TEST 35 | fwald <- function(model,var) { 36 | ind <- grep(var,names(coef(model))) 37 | coef <- coef(model)[ind] 38 | vcov <- vcov(model)[ind,ind] 39 | waldstat <- coef%*%solve(vcov)%*%coef 40 | df <- length(coef) 41 | return(1-pchisq(waldstat,df)) 42 | } 43 | 44 | # TEST THE EFFECTS 45 | fwald(mv,"avgtmean") 46 | fwald(mv,"rangetmean") 47 | 48 | ################################################################################ 49 | # OBTAIN BLUPS 50 | 51 | blup <- blup(mv,vcov=T) 52 | 53 | ################################################################################ 54 | # RE-CENTERING 55 | 56 | # GENERATE THE MATRIX FOR STORING THE RESULTS 57 | minperccity <- mintempcity <- rep(NA,length(dlist)) 58 | names(mintempcity) <- names(minperccity) <- cities$city 59 | 60 | # DEFINE MINIMUM MORTALITY VALUES: EXCLUDE LOW AND VERY HOT TEMPERATURE 61 | for(i in seq(length(dlist))) { 62 | data <- dlist[[i]] 63 | predvar <- quantile(data$tmean,1:99/100,na.rm=T) 64 | # REDEFINE THE FUNCTION USING ALL THE ARGUMENTS (BOUNDARY KNOTS INCLUDED) 65 | argvar <- list(x=predvar,fun=varfun, 66 | knots=quantile(data$tmean,varper/100,na.rm=T),degree=vardegree, 67 | Bound=range(data$tmean,na.rm=T)) 68 | bvar <- do.call(onebasis,argvar) 69 | minperccity[i] <- (1:99)[which.min((bvar%*%blup[[i]]$blup))] 70 | mintempcity[i] <- quantile(data$tmean,minperccity[i]/100,na.rm=T) 71 | } 72 | 73 | # COUNTRY-SPECIFIC POINTS OF MINIMUM MORTALITY 74 | (minperccountry <- median(minperccity)) 75 | 76 | # 77 | -------------------------------------------------------------------------------- /03.attr.R: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Updated version of the code for the analysis in: 3 | # 4 | # "Mortality risk attributable to high and low ambient temperature: 5 | # a multi-country study" 6 | # Antonio Gasparrini and collaborators 7 | # The Lancet - 2015 8 | # http://www.ag-myresearch.com/2015_gasparrini_lancet.html 9 | # 10 | # Update: 15 January 2017 11 | # * an updated version of this code, compatible with future versions of the 12 | # software, is available at: 13 | # https://github.com/gasparrini/2015_gasparrini_Lancet_Rcodedata 14 | ################################################################################ 15 | 16 | ################################################################################ 17 | # COMPUTE THE ATTRIBUTABLE DEATHS FOR EACH CITY, WITH EMPIRICAL CI 18 | # ESTIMATED USING THE RE-CENTERED BASES 19 | ################################################################################ 20 | 21 | # LOAD THE FUNCTION FOR COMPUTING THE ATTRIBUTABLE RISK MEASURES 22 | source("attrdl.R") 23 | 24 | # CREATE THE VECTORS TO STORE THE TOTAL MORTALITY (ACCOUNTING FOR MISSING) 25 | totdeath <- rep(NA,nrow(cities)) 26 | names(totdeath) <- cities$city 27 | 28 | # CREATE THE MATRIX TO STORE THE ATTRIBUTABLE DEATHS 29 | matsim <- matrix(NA,nrow(cities),3,dimnames=list(cities$city, 30 | c("glob","cold","heat"))) 31 | 32 | # NUMBER OF SIMULATION RUNS FOR COMPUTING EMPIRICAL CI 33 | nsim <- 1000 34 | 35 | # CREATE THE ARRAY TO STORE THE CI OF ATTRIBUTABLE DEATHS 36 | arraysim <- array(NA,dim=c(nrow(cities),3,nsim),dimnames=list(cities$city, 37 | c("glob","cold","heat"))) 38 | 39 | ################################################################################ 40 | 41 | # RUN THE LOOP 42 | for(i in seq(dlist)){ 43 | 44 | # PRINT 45 | cat(i,"") 46 | 47 | # EXTRACT THE DATA 48 | data <- dlist[[i]] 49 | 50 | # DERIVE THE CROSS-BASIS 51 | # NB: CENTERING POINT DIFFERENT THAN ORIGINAL CHOICE OF 75TH 52 | argvar <- list(x=data$tmean,fun=varfun,knots=quantile(data$tmean, 53 | varper/100,na.rm=T),degree=vardegree) 54 | cb <- crossbasis(data$tmean,lag=lag,argvar=argvar, 55 | arglag=list(knots=logknots(lag,lagnk))) 56 | 57 | # COMPUTE THE ATTRIBUTABLE DEATHS 58 | # NB: THE REDUCED COEFFICIENTS ARE USED HERE 59 | matsim[i,"glob"] <- attrdl(data$tmean,cb,data$death,coef=blup[[i]]$blup, 60 | vcov=blup[[i]]$vcov,type="an",dir="forw",cen=mintempcity[i]) 61 | matsim[i,"cold"] <- attrdl(data$tmean,cb,data$death,coef=blup[[i]]$blup, 62 | vcov=blup[[i]]$vcov,type="an",dir="forw",cen=mintempcity[i], 63 | range=c(-100,mintempcity[i])) 64 | matsim[i,"heat"] <- attrdl(data$tmean,cb,data$death,coef=blup[[i]]$blup, 65 | vcov=blup[[i]]$vcov,type="an",dir="forw",cen=mintempcity[i], 66 | range=c(mintempcity[i],100)) 67 | 68 | # COMPUTE EMPIRICAL OCCURRENCES OF THE ATTRIBUTABLE DEATHS 69 | # USED TO DERIVE CONFIDENCE INTERVALS 70 | arraysim[i,"glob",] <- attrdl(data$tmean,cb,data$death,coef=blup[[i]]$blup, 71 | vcov=blup[[i]]$vcov,type="an",dir="forw",cen=mintempcity[i],sim=T,nsim=nsim) 72 | arraysim[i,"cold",] <- attrdl(data$tmean,cb,data$death,coef=blup[[i]]$blup, 73 | vcov=blup[[i]]$vcov,type="an",dir="forw",cen=mintempcity[i], 74 | range=c(-100,mintempcity[i]),sim=T,nsim=nsim) 75 | arraysim[i,"heat",] <- attrdl(data$tmean,cb,data$death,coef=blup[[i]]$blup, 76 | vcov=blup[[i]]$vcov,type="an",dir="forw",cen=mintempcity[i], 77 | range=c(mintempcity[i],100),sim=T,nsim=nsim) 78 | 79 | # STORE THE DENOMINATOR OF ATTRIBUTABLE DEATHS, I.E. TOTAL OBSERVED MORTALITY 80 | # CORRECT DENOMINATOR TO COMPUTE THE ATTRIBUTABLE FRACTION LATER, AS IN attrdl 81 | totdeath[i] <- sum(data$death,na.rm=T) 82 | } 83 | 84 | ################################################################################ 85 | # ATTRIBUTABLE NUMBERS 86 | 87 | # CITY-SPECIFIC 88 | ancity <- matsim 89 | ancitylow <- apply(arraysim,c(1,2),quantile,0.025) 90 | ancityhigh <- apply(arraysim,c(1,2),quantile,0.975) 91 | rownames(ancity) <- rownames(ancitylow) <- rownames(ancityhigh) <- cities$cityname 92 | 93 | # TOTAL 94 | # NB: FIRST SUM THROUGH CITIES 95 | antot <- colSums(matsim) 96 | antotlow <- apply(apply(arraysim,c(2,3),sum),1,quantile,0.025) 97 | antothigh <- apply(apply(arraysim,c(2,3),sum),1,quantile,0.975) 98 | 99 | ################################################################################ 100 | # TOTAL MORTALITY 101 | 102 | # BY COUNTRY 103 | totdeathtot <- sum(totdeath) 104 | 105 | ################################################################################ 106 | # ATTRIBUTABLE FRACTIONS 107 | 108 | # CITY-SPECIFIC 109 | afcity <- ancity/totdeath*100 110 | afcitylow <- ancitylow/totdeath*100 111 | afcityhigh <- ancityhigh/totdeath*100 112 | 113 | # TOTAL 114 | aftot <- antot/totdeathtot*100 115 | aftotlow <- antotlow/totdeathtot*100 116 | aftothigh <- antothigh/totdeathtot*100 117 | 118 | # 119 | -------------------------------------------------------------------------------- /attrdl.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### (c) Antonio Gasparrini 2015-2017 3 | # 4 | ################################################################################ 5 | # FUNCTION FOR COMPUTING ATTRIBUTABLE MEASURES FROM DLNM 6 | # REQUIRES dlnm VERSION 2.2.0 AND ON 7 | ################################################################################ 8 | # 9 | # DISCLAIMER: 10 | # THE CODE COMPOSING THIS FUNCTION HAS NOT BEEN SYSTEMATICALLY TESTED. THE 11 | # PRESENCE OF BUGS CANNOT BE RULED OUT. ALSO, ALTHOUGH WRITTEN GENERICALLY 12 | # FOR WORKING IN DIFFERENT SCENARIOS AND DATA, THE FUNCTION HAS NOT BEEN 13 | # TESTED IN CONTEXTS DIFFERENT THAN THE EXAMPLE INCLUDED IN THE PAPER. 14 | # IT IS RESPONSIBILITY OF THE USER TO CHECK THE RELIABILITY OF THE RESULTS IN 15 | # DIFFERENT APPLICATIONS. 16 | # 17 | # Version: 25 January 2017 18 | # AN UPDATED VERSION CAN BE FOUND AT: 19 | # https://github.com/gasparrini/2014_gasparrini_BMCmrm_Rcodedata 20 | # 21 | ################################################################################ 22 | # SEE THE PDF WITH A DETAILED DOCUMENTATION AT www.ag-myresearch.com 23 | # 24 | # - x: AN EXPOSURE VECTOR OR (ONLY FOR dir="back") A MATRIX OF LAGGED EXPOSURES 25 | # - basis: THE CROSS-BASIS COMPUTED FROM x 26 | # - cases: THE CASES VECTOR OR (ONLY FOR dir="forw") THE MATRIX OF FUTURE CASES 27 | # - model: THE FITTED MODEL 28 | # - coef, vcov: COEF AND VCOV FOR basis IF model IS NOT PROVIDED 29 | # - model.link: LINK FUNCTION IF model IS NOT PROVIDED 30 | # - type: EITHER "an" OR "af" FOR ATTRIBUTABLE NUMBER OR FRACTION 31 | # - dir: EITHER "back" OR "forw" FOR BACKWARD OR FORWARD PERSPECTIVES 32 | # - tot: IF TRUE, THE TOTAL ATTRIBUTABLE RISK IS COMPUTED 33 | # - cen: THE REFERENCE VALUE USED AS COUNTERFACTUAL SCENARIO 34 | # - range: THE RANGE OF EXPOSURE. IF NULL, THE WHOLE RANGE IS USED 35 | # - sim: IF SIMULATION SAMPLES SHOULD BE RETURNED. ONLY FOR tot=TRUE 36 | # - nsim: NUMBER OF SIMULATION SAMPLES 37 | ################################################################################ 38 | attrdl <- function(x,basis,cases,model=NULL,coef=NULL,vcov=NULL,model.link=NULL, 39 | type="af",dir="back",tot=TRUE,cen,range=NULL,sim=FALSE,nsim=5000) { 40 | ################################################################################ 41 | # 42 | # CHECK VERSION OF THE DLNM PACKAGE 43 | if(packageVersion("dlnm")<"2.2.0") 44 | stop("update dlnm package to version >= 2.2.0") 45 | # 46 | # EXTRACT NAME AND CHECK type AND dir 47 | name <- deparse(substitute(basis)) 48 | type <- match.arg(type,c("an","af")) 49 | dir <- match.arg(dir,c("back","forw")) 50 | # 51 | # DEFINE CENTERING 52 | if(missing(cen) && is.null(cen <- attr(basis,"argvar")$cen)) 53 | stop("'cen' must be provided") 54 | if(!is.numeric(cen) && length(cen)>1L) stop("'cen' must be a numeric scalar") 55 | attributes(basis)$argvar$cen <- NULL 56 | # 57 | # SELECT RANGE (FORCE TO CENTERING VALUE OTHERWISE, MEANING NULL RISK) 58 | if(!is.null(range)) x[xrange[2]] <- cen 59 | # 60 | # COMPUTE THE MATRIX OF 61 | # - LAGGED EXPOSURES IF dir="back" 62 | # - CONSTANT EXPOSURES ALONG LAGS IF dir="forw" 63 | lag <- attr(basis,"lag") 64 | if(NCOL(x)==1L) { 65 | at <- if(dir=="back") tsModel:::Lag(x,seq(lag[1],lag[2])) else 66 | matrix(rep(x,diff(lag)+1),length(x)) 67 | } else { 68 | if(dir=="forw") stop("'x' must be a vector when dir='forw'") 69 | if(ncol(at <- x)!=diff(lag)+1) 70 | stop("dimension of 'x' not compatible with 'basis'") 71 | } 72 | # 73 | # NUMBER USED FOR THE CONTRIBUTION AT EACH TIME IN FORWARD TYPE 74 | # - IF cases PROVIDED AS A MATRIX, TAKE THE ROW AVERAGE 75 | # - IF PROVIDED AS A TIME SERIES, COMPUTE THE FORWARD MOVING AVERAGE 76 | # - THIS EXCLUDES MISSING ACCORDINGLY 77 | # ALSO COMPUTE THE DENOMINATOR TO BE USED BELOW 78 | if(NROW(cases)!=NROW(at)) stop("'x' and 'cases' not consistent") 79 | if(NCOL(cases)>1L) { 80 | if(dir=="back") stop("'cases' must be a vector if dir='back'") 81 | if(ncol(cases)!=diff(lag)+1) stop("dimension of 'cases' not compatible") 82 | den <- sum(rowMeans(cases,na.rm=TRUE),na.rm=TRUE) 83 | cases <- rowMeans(cases) 84 | } else { 85 | den <- sum(cases,na.rm=TRUE) 86 | if(dir=="forw") 87 | cases <- rowMeans(as.matrix(tsModel:::Lag(cases,-seq(lag[1],lag[2])))) 88 | } 89 | # 90 | ################################################################################ 91 | # 92 | # EXTRACT COEF AND VCOV IF MODEL IS PROVIDED 93 | if(!is.null(model)) { 94 | cond <- paste0(name,"[[:print:]]*v[0-9]{1,2}\\.l[0-9]{1,2}") 95 | if(ncol(basis)==1L) cond <- name 96 | model.class <- class(model) 97 | coef <- dlnm:::getcoef(model,model.class) 98 | ind <- grep(cond,names(coef)) 99 | coef <- coef[ind] 100 | vcov <- dlnm:::getvcov(model,model.class)[ind,ind,drop=FALSE] 101 | model.link <- dlnm:::getlink(model,model.class) 102 | if(!model.link %in% c("log","logit")) 103 | stop("'model' must have a log or logit link function") 104 | } 105 | # 106 | # IF REDUCED ESTIMATES ARE PROVIDED 107 | typebasis <- ifelse(length(coef)!=ncol(basis),"one","cb") 108 | # 109 | ################################################################################ 110 | # 111 | # PREPARE THE ARGUMENTS FOR TH BASIS TRANSFORMATION 112 | predvar <- if(typebasis=="one") x else seq(NROW(at)) 113 | predlag <- if(typebasis=="one") 0 else dlnm:::seqlag(lag) 114 | # 115 | # CREATE THE MATRIX OF TRANSFORMED CENTRED VARIABLES (DEPENDENT ON typebasis) 116 | if(typebasis=="cb") { 117 | Xpred <- dlnm:::mkXpred(typebasis,basis,at,predvar,predlag,cen) 118 | Xpredall <- 0 119 | for (i in seq(length(predlag))) { 120 | ind <- seq(length(predvar))+length(predvar)*(i-1) 121 | Xpredall <- Xpredall + Xpred[ind,,drop=FALSE] 122 | } 123 | } else { 124 | basis <- do.call(onebasis,c(list(x=x),attr(basis,"argvar"))) 125 | Xpredall <- dlnm:::mkXpred(typebasis,basis,x,predvar,predlag,cen) 126 | } 127 | # 128 | # CHECK DIMENSIONS 129 | if(length(coef)!=ncol(Xpredall)) 130 | stop("arguments 'basis' do not match 'model' or 'coef'-'vcov'") 131 | if(any(dim(vcov)!=c(length(coef),length(coef)))) 132 | stop("arguments 'coef' and 'vcov' do no match") 133 | if(typebasis=="one" && dir=="back") 134 | stop("only dir='forw' allowed for reduced estimates") 135 | # 136 | ################################################################################ 137 | # 138 | # COMPUTE AF AND AN 139 | af <- 1-exp(-drop(as.matrix(Xpredall%*%coef))) 140 | an <- af*cases 141 | # 142 | # TOTAL 143 | # - SELECT NON-MISSING OBS CONTRIBUTING TO COMPUTATION 144 | # - DERIVE TOTAL AF 145 | # - COMPUTE TOTAL AN WITH ADJUSTED DENOMINATOR (OBSERVED TOTAL NUMBER) 146 | if(tot) { 147 | isna <- is.na(an) 148 | af <- sum(an[!isna])/sum(cases[!isna]) 149 | an <- af*den 150 | } 151 | # 152 | ################################################################################ 153 | # 154 | # EMPIRICAL CONFIDENCE INTERVALS 155 | if(!tot && sim) { 156 | sim <- FALSE 157 | warning("simulation samples only returned for tot=T") 158 | } 159 | if(sim) { 160 | # SAMPLE COEF 161 | k <- length(coef) 162 | eigen <- eigen(vcov) 163 | X <- matrix(rnorm(length(coef)*nsim),nsim) 164 | coefsim <- coef + eigen$vectors %*% diag(sqrt(eigen$values),k) %*% t(X) 165 | # RUN THE LOOP 166 | # pre_afsim <- (1 - exp(- Xpredall %*% coefsim)) * cases # a matrix 167 | # afsim <- colSums(pre_afsim,na.rm=TRUE) / sum(cases[!isna],na.rm=TRUE) 168 | afsim <- apply(coefsim,2, function(coefi) { 169 | ani <- (1-exp(-drop(Xpredall%*%coefi)))*cases 170 | sum(ani[!is.na(ani)])/sum(cases[!is.na(ani)]) 171 | }) 172 | ansim <- afsim*den 173 | } 174 | # 175 | ################################################################################ 176 | # 177 | res <- if(sim) { 178 | if(type=="an") ansim else afsim 179 | } else { 180 | if(type=="an") an else af 181 | } 182 | # 183 | return(res) 184 | } 185 | 186 | # 187 | --------------------------------------------------------------------------------