├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── data.R ├── digitise.R ├── fit_models.R ├── global.R ├── make.ipd.R ├── make.surv.R ├── make.transition.probs.R ├── model.fit.plot.R ├── plot.survHE.R ├── plot_tranformed_km.R ├── print.survHE.R ├── psa.plot.R ├── summary.survHE.R ├── survHE-package.R ├── utils_fit_models.R ├── utils_make_surv.R ├── utils_plot_survHE.R ├── utils_print_survHE.R └── write.surv.R ├── README.md ├── data ├── TA174.RData ├── datalist ├── msmdata.RData └── survtrial.RData ├── man-roxygen └── refs.R ├── man ├── data.Rd ├── digitise.Rd ├── fit.models.Rd ├── make.ipd.Rd ├── make.surv.Rd ├── make.transition.probs.Rd ├── make_data_multi_state.Rd ├── make_newdata.Rd ├── markov_trace.Rd ├── model.fit.plot.Rd ├── msmdata.Rd ├── plot.survHE.Rd ├── plot_transformed_km.Rd ├── print.survHE.Rd ├── psa.plot.Rd ├── summary.survHE.Rd ├── survHE-package.Rd ├── ta174.Rd ├── theme_survHE.Rd ├── three_state_mm.Rd └── write.surv.Rd ├── survHE.Rproj └── tests ├── testthat.R └── testthat └── test-make.surv.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^renv$ 2 | ^renv\.lock$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^\.travis\.yml$ 6 | ^appveyor\.yml$ 7 | ^LICENSE\.md$ 8 | ^_pkgdown\.yml$ 9 | ^docs$ 10 | ^pkgdown$ 11 | ^\.httr-oauth$ 12 | ^CONDUCT\.md$ 13 | ^man-roxygen$ 14 | ^.git 15 | ^.gitignore 16 | ^cran-comments\.md$ 17 | ^README_dev\.md$ 18 | ^\.github$ 19 | ^paper\.bib$ 20 | ^paper\.md$ 21 | ^CONTRIBUTING\.md$ 22 | ^revdep$ 23 | ^CRAN-SUBMISSION$ 24 | ^CITATION.cff 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | 35 | # Large files generated by Stan 36 | src/*.so 37 | src/*.o 38 | ## src/stan_files/*.hpp 39 | ## src/stan_files/*.o 40 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: survHE 2 | Title: Survival Analysis in Health Economic Evaluation 3 | Version: 2.0.5 4 | Date: 2025-05-15 5 | Authors@R: c( 6 | person(given = "Gianluca",family = "Baio",role = c("aut", "cre"),email = "g.baio@ucl.ac.uk"), 7 | person("Andrea","Berardi",role="ctb",email="and.be.like@gmail.com"), 8 | person("Philip","Cooney",role="ctb",email="philip.cooney@hotmail.com"), 9 | person("Andrew","Jones",role="ctb",email="andrew.r.johnson@postgrad.curtin.edu.au"), 10 | person("Nathan","Green",role="ctb",email="n.green@ucl.ac.uk")) 11 | URL: https://github.com/giabaio/survHE, https://gianluca.statistica.it/software/survhe/ 12 | BugReports: https://github.com/giabaio/survHE/issues 13 | Description: Contains a suite of functions for survival analysis in health economics. 14 | These can be used to run survival models under a frequentist (based on maximum likelihood) 15 | or a Bayesian approach (both based on Integrated Nested Laplace Approximation or Hamiltonian 16 | Monte Carlo). To run the Bayesian models, the user needs to install additional modules 17 | (packages), i.e. 'survHEinla' and 'survHEhmc'. These can be installed from 18 | using 19 | 'install.packages("survHEhmc", repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org"))' 20 | and 21 | 'install.packages("survHEinla", repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org"))' 22 | respectively. 'survHEinla' is based on the package INLA, which is available for download at 23 | . The user can specify a set of parametric models 24 | using a common notation and select the preferred mode of inference. The results can also be 25 | post-processed to produce probabilistic sensitivity analysis and can be used to export the 26 | output to an Excel file (e.g. for a Markov model, as often done by modellers and 27 | practitioners). . 28 | License: GPL (>=3) 29 | Encoding: UTF-8 30 | LazyData: true 31 | Roxygen: list(markdown = TRUE) 32 | RoxygenNote: 7.3.2 33 | Biarch: true 34 | Depends: 35 | methods, 36 | R (>= 4.1.0), 37 | flexsurv, 38 | dplyr, 39 | ggplot2 40 | Imports: 41 | rms, 42 | xlsx, 43 | tools, 44 | tibble, 45 | tidyr 46 | Suggests: 47 | survHEinla, 48 | survHEhmc, 49 | INLA, 50 | rstan, 51 | testthat (>= 3.0.0) 52 | Config/testthat/edition: 3 53 | Additional_repositories: 54 | https://inla.r-inla-download.org/R/stable/, 55 | https://giabaio.r-universe.dev/ 56 | SystemRequirements: GNU make 57 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | S3method(plot, survHE) 2 | S3method(print, survHE) 3 | S3method(summary, survHE) 4 | export(digitise) 5 | export(fit.models) 6 | export(make.ipd) 7 | export(make.surv) 8 | export(model.fit.plot) 9 | export(make.transition.probs) 10 | export(plot.survHE) 11 | export(print.survHE) 12 | export(psa.plot) 13 | export(summary.survHE) 14 | export(write.surv) 15 | export(plot_transformed_km) 16 | export(three_state_mm) 17 | export(markov_trace) 18 | export(make_data_multi_state) 19 | export(make_newdata) 20 | export(make.transition.probs) 21 | export(theme_survHE) 22 | import(flexsurv) 23 | import(methods) 24 | import(dplyr) 25 | import(ggplot2) 26 | importFrom("graphics", "axis", "barplot", "hist", "legend", "par", "plot", 27 | "points", "polygon", "text", "title") 28 | importFrom("utils", "read.table","write.table","head","tail","modifyList") 29 | importFrom("stats", "terms","as.formula","model.frame","model.matrix","dexp", 30 | "pexp","dweibull","pweibull","dgamma","pgamma","median","var","time", 31 | "update","dt","profile","dlnorm","plnorm","dlogis","plogis","sd", 32 | "quantile","qnorm","model.matrix.lm") 33 | importFrom("grDevices", "colors","adjustcolor","colorRampPalette") 34 | importFrom("tools", "file_ext") 35 | importFrom("rms", "npsurv","survplot") 36 | importFrom("tibble", "rownames_to_column") 37 | importFrom("tidyr", "unite") 38 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # 2.0.5 2 | 3 | * Adds a `ggplot` theme `theme_survHE` to be used with combined plots (eg for a `survextrap` element) 4 | 5 | # 2.0.4 6 | 7 | * Fixes a slight mistake in the table for the Gen F. Now works properly on the beta coefficients 8 | * Updates DESCRIPTION with details of installatin for the add-ons `survHEinla` and `survHEhmc` 9 | 10 | # 2.0.3 11 | 12 | * Fixes dependency to R >= 4.1.0 because of the use of natural pipes 13 | * Fixes tests --- needs to use `select(!contains("(Intercept)"))` instead of `select(-matches("Intercept"),everything())` in several parts of the code. This was previously used to determine the names of the strata, used to plot the survival curves 14 | 15 | # 2.0.2 16 | 17 | * Added a `NEWS.md` file to track changes to the package (to replace the old `.Rd` version). 18 | 19 | _October 2024_ 20 | 21 | Patch code in support of plotting (`R/utils_fit_models.R`) 22 | 23 | * In `make_surv_curve_plot` replace 24 | ``` 25 | geom_step(data = datakm, aes(x = time, y = S, group=as.factor(strata)), 26 | color="darkgrey") + 27 | geom_ribbon(data = datakm, 28 | aes(x = time, y = S, ymin=lower, ymax=upper, group=as.factor(strata)), 29 | alpha = 0.2) 30 | ``` 31 | with 32 | ``` 33 | geom_step(data = datakm, aes(x = time, y = S, group=as.factor(strata:object_name)), 34 | color="darkgrey") + 35 | geom_ribbon(data = datakm, 36 | aes(x = time, y = S, ymin=lower, ymax=upper, group=as.factor(strata:object_name)), 37 | alpha = 0.2) 38 | ``` 39 | This means that when plotting two or more `survHE` objects, the KM is added and displayed correctly 40 | 41 | * Adds a utility function `make_newdata` that can be used to generate profiles of covariates, to then plot specific groups of individuals' survival or hazard curves. 42 | 43 | _September 2024_ 44 | 45 | # 2.0.1 46 | 47 | * Some refactoring (mainly thanks to @n8thangreen) with tidying up of the underlying code. One major change is that now 'make.surv' outputs an object named 'time' instead of 't', which was ambigious 48 | 49 | _November 2022_ 50 | 51 | # 2.0 52 | 53 | * @Philip-Cooney found a little mistake in how the print method works for hmc objects. The utility functions were accessing the data for the first model of the list of possible models (instead of the specific 'mod' one). Also, RPS would produce bizarre results if no covariates included. 54 | \item @Philip-Cooney also found that 'make_sim_hmc' would break in the case of a RPS model with no covariates, because the matrix of beta coefficients would be turned into a vector, essentially, but the code would try to still subset a column. This has now been fixed so it's OK to make simulations off an RPS with no intercept model and that propagates to plots too. 55 | 56 | _April 2022_ 57 | 58 | * This is a *major* change. In this version, the package is restructured to only perform, in its basic version, MLE estimates using flexsurv as 59 | inferential engine. All the backbone functionalities are unchanged and the user can also expand (to revert to the "full" survHE including Bayesian modelling), by simply also adding the new packages survHEinla and/or survHEhmc. These now only contain the INLA and rstan calls and functionalities. 60 | 61 | _January 2022_ 62 | 63 | # 1.1.4 64 | 65 | * Contribution by Andrew Jones to update compatibility with the newer version of stan. Changes on StanHeaders + stan models to avoid complaints by 66 | the compiler because of declared variables with the same name of a function that was being defined. None of these are directly "visible" to the final user, though... 67 | 68 | * Changes to '.Rbuildignore' to allow 'rstantool' to automatically configuring on package install. Also improves compatibility across versions of 69 | 'rstan'. See https://github.com/giabaio/survHE/pull/42 70 | 71 | _September 2021_ 72 | 73 | # 1.1.3 74 | 75 | * Adds an option 'what' to plot so that 'survival', 'hazard' and 'cumhazard' can be specified (and the plot is modified to the various different scales) 76 | 77 | * Updates the Gamma, GenGamma and GenF models in HMC to include for the possibility that the data contain no censoring. Also fix a small typo in the print method for Gamma/HMC models. 78 | 79 | * Updates in INLA means now the Gompertz model is also available for survival modelling. fit.models(), make.surv() and print() have now been updated so that the Gompertz model can be run under 'survHE'. In order to run the Gompertz model using INLA, the *testing* version (>=21.03.21) needs to be installed (see instructions here: https://www.r-inla.org/download-install). 80 | 81 | * Related to this, to improve computational stability, *all* the INLA models are now run by 'survHE' using the following trick: first the times are rescaled (on the fly) in the interval [0-1] (by simply recomputing 'time=time/max(time'). The resulting models are *not* directly comparable to other inferential engines (because they are fitted to different data), but 'survHE' automatically rescales the estimates and model fitting statistics (eg *ICs) so that the 'plot' and 'print' methods give the correct answers. make.surv() has also been updated to reflect this. 82 | 83 | * There are also changes to make.transition.probs(), which has been updated and streamlined to compute transition probabilities off the survival curves fitted in a 'survHE' object. The computation is quicker and now based on the more robust relationship between the cumulative hazard function 84 | and the transition probabilities. 85 | 86 | * A new function make_data_multi_state() to create a dataset in the format required to analyse data in a multi-state framework. 87 | 88 | * A new function three_state_mm() added to fit a standard 3-states Markov model, based on survival curves that are then mapped onto transition probabilities (this function is under testing, though). 89 | 90 | * Adds two new datasets (TA174 and msmdata), both from the MDM paper by Williams et al (2017) that can be used for analysis of multi-state data. 91 | 92 | _June 2021_ 93 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' A fictional survival trial. 2 | #' 3 | #' A dataset containing fictional data from a trial, where 4 | #' the main outcome is in terms of time-to-event and 5 | #' censoring indicator and with additional covariates. 6 | #' 7 | #' @format A data frame with 367 rows and 8 variables: 8 | #' \describe{ 9 | #' \item{ID_patient}{The individual level identifier} 10 | #' \item{time}{The observed time at which the event happens} 11 | #' \item{censored}{An indicator to describe whether the 12 | #' event is fully observed or censored} 13 | #' \item{arm}{An indicator for the treatment arm, with 14 | #' 0 = control and 1 = active treatment} 15 | #' \item{sex}{An indicator for the individual's sex, with 16 | #' 0 = male and 1 = female} 17 | #' \item{age}{A numeric variable with the individual's age} 18 | #' \item{imd}{A categorical variable representing a measure 19 | #' of area-level social deprivation} 20 | #' \item{ethnic}{A categorical variable representing the 21 | #' individual's ethnic group, as measured from a Census} 22 | #' } 23 | "data" 24 | 25 | 26 | #' NICE TA174 dataset. 27 | #' 28 | #' A dataset containing the data used for NICE TA174, as 29 | #' made publicly available as part of the supplementary 30 | #' material for Williams et al (2017). Medical Decision 31 | #' Making, 37;427-439. 32 | #' 33 | #' @format A tibble with 810 rows and 8 variables: 34 | #' \describe{ 35 | #' \item{patid}{A numeric patient identifier} 36 | #' \item{treat}{The treatment indicator. 1=rituximab 37 | #' in combination with fludarabine andcyclophosphamide 38 | #' (RFC); 0=fludarabine and cyclo-phosphamide alone (FC)} 39 | #' \item{prog}{An indicator to describe whether the 40 | #' patient has experience a progression} 41 | #' \item{death}{An indicator to describe whether the 42 | #' patient has experience death} 43 | #' \item{prog_t}{The observed time at progression, or 44 | #' the time at which the patient has been censored; 45 | #' measured in months} 46 | #' \item{death_t}{The observed time at death, or 47 | #' the time at which the patient has been censored; 48 | #' measured in months} 49 | #' \item{prog_ty}{The observed time at progression, or 50 | #' the time at which the patient has been censored; 51 | #' measured in years} 52 | #' \item{death_ty}{The observed time at death, or 53 | #' the time at which the patient has been censored; 54 | #' measured in years} 55 | #' } 56 | "ta174" 57 | 58 | 59 | #' NICE TA174 dataset in multi-state format. 60 | #' 61 | #' These are the same data contained in NICE TA174, as 62 | #' made publicly available as part of the supplementary 63 | #' material for Williams et al (2017). Medical Decision 64 | #' Making, 37;427-439. However, the data have been 65 | #' restructured (by using the function \code{make_data_multi_state()}) 66 | #' to be used for multi-state analysis 67 | #' 68 | #' @format A tibble with 1868 rows and 16 variables: 69 | #' \describe{ 70 | #' \item{id}{A numeric patient identifier} 71 | #' \item{from}{An indicator of the starting state. 72 | #' 1=Pre-progression; 2=Progression; 3=Death} 73 | #' \item{to}{An indicator for the arriving state} 74 | #' \item{trans}{A code for the actual transition considered. 75 | #' 1=Pre-progression -> Progression; 2=Pre-progression -> 76 | #' Death; 3=Progression -> Death} 77 | #' \item{Tstart}{The time of entry into the observation} 78 | #' \item{Tstop}{The time of exit from observation} 79 | #' \item{time}{The observed time until even (progression or 80 | #' death), or censoring occurs} 81 | #' \item{status}{The event indicator; takes value 1 if the 82 | #' underlying event (which varies depending on which 83 | #' transition is being considered) happens and 0 otherwise} 84 | #' \item{treat}{The treatment indicator. 1=rituximab 85 | #' in combination with fludarabine andcyclophosphamide 86 | #' (RFC); 0=fludarabine and cyclo-phosphamide alone (FC)} 87 | #' \item{patid}{The original numeric patient identifier} 88 | #' \item{prog}{The original indicator to describe whether 89 | #' the patient has experience a progression} 90 | #' \item{death}{The original indicator to describe whether 91 | #' the patient has experience death} 92 | #' \item{prog_t}{The original observed time at progression, 93 | #' or the time at which the patient has been censored; 94 | #' measured in months} 95 | #' \item{death_t}{The original observed time at death, or 96 | #' the time at which the patient has been censored; 97 | #' measured in months} 98 | #' \item{prog_ty}{The original observed time at progression, 99 | #' or the time at which the patient has been censored; 100 | #' measured in years} 101 | #' \item{death_ty}{The original observed time at death, or 102 | #' the time at which the patient has been censored; 103 | #' measured in years} 104 | #' } 105 | "msmdata" -------------------------------------------------------------------------------- /R/digitise.R: -------------------------------------------------------------------------------- 1 | #' Format digitised data for use in survival analysis 2 | #' 3 | #' Produces txt files with Kaplan Meier and individual level survival data from 4 | #' digitised Kaplan Meier curves obtained by DigitizeIT 5 | #' 6 | #' 7 | #' @param surv_inp a txt file obtained for example by DigitizeIT and containing 8 | #' the input survival times from graph reading. This file contains 3 columns 9 | #' 'ID' = the row-ID 10 | #' 'time' = the vector of times captured by the digitisation process 11 | #' 'survival' = the vector of survival probabilities captured by the digitisation 12 | #' process 13 | #' @param nrisk_inp a txt file obtained by DigitizeIT and containing the 14 | #' reported number at risk. This contains the following columns: 15 | #' 'Interval' = the ID of the various intervals included in the analysis ( 16 | #' eg 1, 2, 3, ...) 17 | #' 'Time' = the actual time shown on the x-axis in the digitsed graph 18 | #' 'Lower' = the row of the extracted co-ordinates that the time corresponds 19 | #' to 20 | #' 'Upper' = the row of the extracted co-ordinates for which the time is less 21 | #' than the following time at which we have a number at risk 22 | #' 'nrisk' = the actual number at risk as specified in the original data 23 | #' @param km_output the name of the file to which the KM data will be written 24 | #' @param ipd_output the name of the file to which the individual level data 25 | #' data will be written 26 | #' @author Patricia Guyot and Gianluca Baio 27 | #' @template refs 28 | #' @keywords Digitized Kaplan Meier curve 29 | #' @examples 30 | #' \dontrun{ 31 | #' # Defines the txt files to be used as inputs 32 | #' surv.inp <- system.file("extdata", "survival.txt", package = "survHE") 33 | #' nrisk.inp <- system.file("extdata", "nrisk.txt", package = "survHE") 34 | #' # Runs 'digitise' to create the relevant output files 35 | #' digitise(surv.inp, nrisk.inp) 36 | #' } 37 | #' @export digitise 38 | digitise <- function(surv_inp,nrisk_inp,km_output="KMdata.txt",ipd_output="IPDdata.txt") { 39 | # Post-process the data obtained by DigitizeIT to obtain the KM data and the individual level data 40 | # surv_inp = a txt file obtained by DigitizeIT and containing the input survival times from graph reading 41 | # nrisk_inp = a txt file obtained by DigitizeIT and containing the reported number at risk 42 | # km_output = the name of the file to which the KM data will be written 43 | # ipd_output = the name of the file to which the individual level data data will be written 44 | # Adapted from Patricia Guyot (2012) 45 | 46 | # Defines the working directory (same as the one where the DigitizeIT data are) 47 | working.dir <- dirname(surv_inp) 48 | #### setwd(working.dir); working.dir <- paste0(getwd(),"/") 49 | tot.events<-"NA" #tot.events = total no. of events reported. If not reported, then tot.events="NA" 50 | arm.id<-1 #arm indicator 51 | 52 | #Read in survival times read by digizeit 53 | digizeit <- read.table(surv_inp,header=TRUE,row.names=NULL) 54 | t.S<-digizeit[,2] # times recorded from DigitizeIT 55 | S<-digizeit[,3] # survival from DigitizeIT 56 | 57 | #Read in published numbers at risk, n.risk, at time, t.risk, lower and upper indexes for time interval 58 | pub.risk<-read.table(nrisk_inp,header=TRUE,row.names=NULL) 59 | ## Needs to get rid of possible time intervals with no digitised observations 60 | pub.risk <- pub.risk[pub.risk[,4]>0,] 61 | ## Needs to recode the first ever occurrence to 1?? 62 | if (!(pub.risk[1,3]==1)) {pub.risk[1,3] <- 1} 63 | 64 | # Defines the variables needed for the algorithm 65 | t.risk<-pub.risk[,2] 66 | lower<-pub.risk[,3] 67 | upper<-pub.risk[,4] 68 | n.risk<-pub.risk[,5] 69 | n.int<-length(n.risk) 70 | n.t<- upper[n.int] 71 | 72 | #Initialise vectors 73 | arm <- rep(arm.id,n.risk[1]) 74 | n.censor <- rep(0,(n.int-1)) 75 | n.hat <- rep(n.risk[1]+1,n.t) 76 | cen <- d <- rep(0,n.t) 77 | KM.hat <- rep(1,n.t) 78 | last.i <- rep(1,n.int) 79 | sumdL <- 0 80 | 81 | # Executes Patricia's algorithm to determine censoring 82 | if (n.int > 1){ 83 | #Time intervals 1,...,(n.int-1) 84 | for (i in 1:(n.int-1)){ 85 | #First approximation of no. censored on interval i 86 | n.censor[i]<- round(n.risk[i]*S[lower[i+1]]/S[lower[i]]- n.risk[i+1]) 87 | #Adjust tot. no. censored until n.hat = n.risk at start of interval (i+1) 88 | while((n.hat[lower[i+1]]>n.risk[i+1])||((n.hat[lower[i+1]]0))){ 89 | if (n.censor[i]<=0){ 90 | cen[lower[i]:upper[i]]<-0 91 | n.censor[i]<-0 92 | } 93 | if (n.censor[i]>0){ 94 | cen.t<-rep(0,n.censor[i]) 95 | for (j in 1:n.censor[i]){ 96 | cen.t[j]<- t.S[lower[i]] + 97 | j*(t.S[lower[(i+1)]]-t.S[lower[i]])/(n.censor[i]+1) 98 | } 99 | #Distribute censored observations evenly over time. Find no. censored on each time interval. 100 | cen[lower[i]:upper[i]]<-hist(cen.t,breaks=t.S[lower[i]:lower[(i+1)]],plot=F)$counts 101 | } 102 | #Find no. events and no. at risk on each interval to agree with K-M estimates read from curves 103 | n.hat[lower[i]]<-n.risk[i] 104 | last<-last.i[i] 105 | for (k in lower[i]:upper[i]){ 106 | if (i==1 & k==lower[i]){ 107 | d[k]<-0 108 | KM.hat[k]<-1 109 | } 110 | else { 111 | d[k]<-round(n.hat[k]*(1-(S[k]/KM.hat[last]))) 112 | KM.hat[k]<-KM.hat[last]*(1-(d[k]/n.hat[k])) 113 | } 114 | n.hat[k+1]<-n.hat[k]-d[k]-cen[k] 115 | if (d[k] != 0) last<-k 116 | } 117 | n.censor[i]<- n.censor[i]+(n.hat[lower[i+1]]-n.risk[i+1]) 118 | } 119 | if (n.hat[lower[i+1]]1){ 125 | #Assume same censor rate as average over previous time intervals. 126 | n.censor[n.int]<- min(round(sum(n.censor[1:(n.int-1)])*(t.S[upper[n.int]]- 127 | t.S[lower[n.int]])/(t.S[upper[(n.int-1)]]-t.S[lower[1]])), n.risk[n.int]) 128 | } 129 | if (n.int==1){n.censor[n.int]<-0} 130 | if (n.censor[n.int] <= 0){ 131 | cen[lower[n.int]:(upper[n.int]-1)]<-0 132 | n.censor[n.int]<-0 133 | } 134 | if (n.censor[n.int]>0){ 135 | cen.t<-rep(0,n.censor[n.int]) 136 | for (j in 1:n.censor[n.int]){ 137 | cen.t[j]<- t.S[lower[n.int]] + 138 | j*(t.S[upper[n.int]]-t.S[lower[n.int]])/(n.censor[n.int]+1) 139 | } 140 | cen[lower[n.int]:(upper[n.int]-1)]<-hist(cen.t,breaks=t.S[lower[n.int]:upper[n.int]],plot=F)$counts 141 | } 142 | #Find no. events and no. at risk on each interval to agree with K-M estimates read from curves 143 | n.hat[lower[n.int]]<-n.risk[n.int] 144 | last<-last.i[n.int] 145 | for (k in lower[n.int]:upper[n.int]){ 146 | if(KM.hat[last] !=0){ 147 | d[k]<-round(n.hat[k]*(1-(S[k]/KM.hat[last])))} else {d[k]<-0} 148 | KM.hat[k]<-KM.hat[last]*(1-(d[k]/n.hat[k])) 149 | n.hat[k+1]<-n.hat[k]-d[k]-cen[k] 150 | #No. at risk cannot be negative 151 | if (n.hat[k+1] < 0) { 152 | n.hat[k+1]<-0 153 | cen[k]<-n.hat[k] - d[k] 154 | } 155 | if (d[k] != 0) last<-k 156 | } 157 | #If total no. of events reported, adjust no. censored so that total no. of events agrees. 158 | if (tot.events != "NA"){ 159 | if (n.int>1){ 160 | sumdL<-sum(d[1:upper[(n.int-1)]]) 161 | #If total no. events already too big, then set events and censoring = 0 on all further time intervals 162 | if (sumdL >= tot.events){ 163 | d[lower[n.int]:upper[n.int]]<- rep(0,(upper[n.int]-lower[n.int]+1)) 164 | cen[lower[n.int]:(upper[n.int]-1)]<- rep(0,(upper[n.int]-lower[n.int])) 165 | n.hat[(lower[n.int]+1):(upper[n.int]+1)]<- rep(n.risk[n.int],(upper[n.int]+1-lower[n.int])) 166 | } 167 | } 168 | #Otherwise adjust no. censored to give correct total no. events 169 | if ((sumdL < tot.events)|| (n.int==1)){ 170 | sumd<-sum(d[1:upper[n.int]]) 171 | while ((sumd > tot.events)||((sumd< tot.events)&&(n.censor[n.int]>0))){ 172 | n.censor[n.int]<- n.censor[n.int] + (sumd - tot.events) 173 | if (n.censor[n.int]<=0){ 174 | cen[lower[n.int]:(upper[n.int]-1)]<-0 175 | n.censor[n.int]<-0 176 | } 177 | if (n.censor[n.int]>0){ 178 | cen.t<-rep(0,n.censor[n.int]) 179 | for (j in 1:n.censor[n.int]){ 180 | cen.t[j]<- t.S[lower[n.int]] + 181 | j*(t.S[upper[n.int]]-t.S[lower[n.int]])/(n.censor[n.int]+1) 182 | } 183 | cen[lower[n.int]:(upper[n.int]-1)]<-hist(cen.t,breaks=t.S[lower[n.int]:upper[n.int]],plot=F)$counts 184 | } 185 | n.hat[lower[n.int]]<-n.risk[n.int] 186 | last<-last.i[n.int] 187 | for (k in lower[n.int]:upper[n.int]){ 188 | d[k]<-round(n.hat[k]*(1-(S[k]/KM.hat[last]))) 189 | KM.hat[k]<-KM.hat[last]*(1-(d[k]/n.hat[k])) 190 | if (k != upper[n.int]){ 191 | n.hat[k+1]<-n.hat[k]-d[k]-cen[k] 192 | #No. at risk cannot be negative 193 | if (n.hat[k+1] < 0) { 194 | n.hat[k+1]<-0 195 | cen[k]<-n.hat[k] - d[k] 196 | } 197 | } 198 | if (d[k] != 0) last<-k 199 | } 200 | sumd <- sum(d[1:upper[n.int]]) 201 | } 202 | } 203 | } 204 | 205 | # Now writes the results to the output files 206 | KMdata <- data.frame(time=t.S,n.risk=n.hat[1:n.t],n.event=d,n.censored=cen) 207 | write.table(KMdata,km_output,sep="\t",row.names=FALSE,col.names=TRUE) 208 | 209 | # And forms IPD data 210 | #Initialise vectors 211 | t.IPD <- rep(t.S[n.t],n.risk[1]) 212 | event.IPD <- rep(0,n.risk[1]) 213 | #Write event time and event indicator (=1) for each event, as separate row in t.IPD and event.IPD 214 | k <- 1 215 | for (j in 1:n.t){ 216 | if(d[j]!=0){ 217 | t.IPD[k:(k+d[j]-1)]<- rep(t.S[j],d[j]) 218 | event.IPD[k:(k+d[j]-1)]<- rep(1,d[j]) 219 | k<-k+d[j] 220 | } 221 | } 222 | #Write censor time and event indicator (=0) for each censor, as separate row in t.IPD and event.IPD 223 | for (j in 1:(n.t-1)){ 224 | if(cen[j]!=0){ 225 | t.IPD[k:(k+cen[j]-1)]<- rep(((t.S[j]+t.S[j+1])/2),cen[j]) 226 | event.IPD[k:(k+cen[j]-1)]<- rep(0,cen[j]) 227 | k<-k+cen[j] 228 | } 229 | } 230 | #Output IPD 231 | IPD <- data.frame(time=t.IPD,event=event.IPD,arm) 232 | write.table(IPD,ipd_output,sep="\t",row.names=FALSE,col.names=TRUE) 233 | 234 | if (dirname(km_output)==".") { 235 | cat("\n") 236 | cat(paste0("Kaplan Meier data written to file: ",working.dir,km_output)) 237 | } else { 238 | cat("\n") 239 | cat(paste0("Kaplan Meier data written to file: ",km_output)) 240 | } 241 | if (dirname(ipd_output)==".") { 242 | cat("\n") 243 | cat(paste0("IPD data written to file: ",working.dir,ipd_output)) 244 | cat("\n") 245 | } else { 246 | cat("\n") 247 | cat(paste0("IPD data written to file: ",ipd_output)) 248 | cat("\n") 249 | } 250 | } 251 | -------------------------------------------------------------------------------- /R/fit_models.R: -------------------------------------------------------------------------------- 1 | ## SET OF UTILITY FUNCTIONS TO INCLUDE SURVIVAL ANALYSIS RESULTS INTO A HEALTH ECONOMIC MODEL 2 | ## Gianluca Baio + Will Browne + Peter Konings (10 Jan 2017) 3 | #' Fit parametric survival analysis for health economic evaluations 4 | #' 5 | #' Runs the survival analysis with several useful options, using either MLE 6 | #' (via flexsurv) or a Bayesian approach (via R-INLA or rstan) 7 | #' 8 | #' On object in the class \code{survHE} containing the following elements 9 | #' 10 | #' @param formula a formula specifying the model to be used, in the form 11 | #' \code{Surv(time,event)~treatment[+covariates]} for flexsurv, or 12 | #' \code{inla.surv(time,event)~treatment[+covariates]} for INLA 13 | #' @param data A data frame containing the data to be used for the analysis. 14 | #' This must contain data for the 'event' variable. In case there is no 15 | #' censoring, then \code{event} is a column of 1s. 16 | #' @param distr a (vector of) string(s) containing the name(s) of the model(s) 17 | #' to be fitted. Available options are: 18 | #' 19 | #' \code{flexsurv}: 20 | #' "exponential","gamma","genf","gengamma","gompertz","weibull", 21 | #' "weibullPH","loglogistic","lognormal" \code{INLA}: 22 | #' "exponential","weibull","lognormal","loglogistic" \code{hmc}: 23 | #' "exponential","gamma","genf","gengamma","gompertz","weibull","weibullPH", 24 | #' "loglogistic","lognormal" 25 | #' @param method A string specifying the inferential method (\code{'mle'}, 26 | #' \code{'inla'} or \code{'hmc'}). If \code{method} is set to \code{'hmc'}, 27 | #' then \code{survHE} will write suitable model code in the Stan language 28 | #' (according to the specified distribution), prepare data and initial values 29 | #' and then run the model. 30 | #' @param \dots Additional options (for INLA or HMC). 31 | #' 32 | #' **INLA** specific options \code{dz} = defines the step length for the grid 33 | #' search over the hyperparameters space (default = 0.1) \code{diff.logdens} = 34 | #' defines the difference in the log-density for the hyperparameters to stop 35 | #' integration (default = 5) \code{control.fixed} = defines the default for the 36 | #' priors, unless specified by the user. Default values are prior mean = 0 for 37 | #' *all* fixed effects prior var = 1000 for *all* fixed effects prior mean = 0 38 | #' for the intercept prior prec -> 0 for the intercept \code{control.family} = 39 | #' a list of options. If distr is a vector, then can be provided as a named 40 | #' list of options, for example something like this: 41 | #' \code{control.family=list(weibull=list(param=c(.1,.1)),lognormal=list(initial=2))} 42 | #' the names of the elements of the list need to be the same as those given in 43 | #' the vector \code{distr} 44 | #' 45 | #' **HMC** specific options \code{chains} = number of chains to run in the HMC 46 | #' (default = 2) \code{iter} = total number of iterations (default = 2000) 47 | #' \code{warmup} = number of warmup iterations (default = iter/2) \code{thin} = 48 | #' number of thinning (default = 1) \code{control} = a list specifying 49 | #' Stan-related options, eg \code{control=list(adapt_delta=0.85)} (default = 50 | #' NULL) \code{seed} = the random seed (to make things replicable) \code{pars} 51 | #' = a vector of parameters (string, default = NA) \code{include} = a logical 52 | #' indicator (if FALSE, then the pars are not saved; default = TRUE) 53 | #' \code{priors} = a list (of lists) specifying the values for the parameters 54 | #' of the prior distributions in the models \code{save.stan} = a logical 55 | #' indicator (default = FALSE). If TRUE, then saves the data list for Stan and 56 | #' the model file(s) 57 | #' @return \item{models}{ A list containing the fitted models. These contain 58 | #' the output from the original inference engine (\code{flexsurv}, \code{INLA} 59 | #' or \code{rstan}). Can be processed using the methods specific to the 60 | #' original packages, or via \code{survHE}-specific methods (such as 61 | #' \code{plot}, \code{print}) or other specialised functions (eg to extrapolate 62 | #' the survival curves, etc). } \item{model.fitting}{ A list containing the 63 | #' output of the model-fit statistics (AIC, BIC, DIC). The AIC and BIC are 64 | #' estimated for all methods, while the DIC is only estimated when using 65 | #' Bayesian inference. } \item{method}{ A string indicating the method used to 66 | #' fit the model, ie \code{'mle'}, \code{'inla'} or \code{'hmc'}. } 67 | #' \item{misc}{ A list containing the time needed to run the model(s) (in 68 | #' seconds), the formula used, the results of the Kaplan-Meier analysis (which 69 | #' is automatically performed using \code{npsurv}) and the original data frame. 70 | #' } 71 | #' @author Gianluca Baio 72 | #' @seealso \code{make.surv} 73 | #' @template refs 74 | #' @keywords Parametric survival models Bayesian inference via Hamiltonian 75 | #' Monte Carlo Bayesian inference via Integrated Nested Laplace Approximation 76 | #' @examples 77 | #' \dontrun{ 78 | #' # Loads an example dataset from 'flexsurv' 79 | #' data(bc) 80 | #' 81 | #' # Fits the same model using the 3 inference methods 82 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 83 | #' distr="exp",method="mle") 84 | #' inla = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 85 | #' distr="exp",method="inla") 86 | #' hmc = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 87 | #' distr="exp",method="hmc") 88 | #' 89 | #' # Prints the results in comparable fashion using the survHE method 90 | #' print(mle) 91 | #' print(inla) 92 | #' print(hmc) 93 | #' 94 | #' # Or visualises the results using the original packages methods 95 | #' print(mle,original=TRUE) 96 | #' print(inla,original=TRUE) 97 | #' print(hmc,original=TRUE) 98 | #' 99 | #' # Plots the survival curves and estimates 100 | #' plot(mle) 101 | #' plot(mle,inla,hmc,labs=c("MLE","INLA","HMC"),colors=c("black","red","blue")) 102 | #' } 103 | #' 104 | #' @export fit.models 105 | fit.models <- function(formula = NULL, data , distr = NULL, method = "mle", ...) { 106 | # Captures the call 107 | call=match.call() 108 | 109 | # Lists all the additional inputs 110 | exArgs <- list(...) 111 | # Adds the 'formula' to exArgs, so it can be used by 'runHMC' and 'runINLA' 112 | exArgs$formula <- formula 113 | # Adds the 'data' to exArgs so it can be used by 'runHMC', 'runMLE' and 'runINLA' 114 | exArgs$data=data 115 | # Adds the 'call' to exArgs 116 | exArgs$call=call 117 | 118 | # Avoids the 'no visible binding for global variable' error, when compiling 119 | #model <- NULL 120 | 121 | # Needs to specify either the formula or the list of variables! 122 | if(is.null(formula)) { 123 | stop("You need to specify a model 'formula', e.g. 'formula=Surv(time,event)~treat'") 124 | } 125 | # ensures method is lower case 126 | method <- tolower(method) 127 | # ensures method is one of "mle","inla", "mcmc" 128 | if(!method %in% c("hmc","inla","mle")) { 129 | stop("Methods available for use are 'mle', 'hmc' or 'inla'") 130 | } 131 | 132 | # Check whether the selected distribution(s) can be implemented with the selected method 133 | # (and if not, falls back to 'mle') 134 | method=check_distributions(method,distr) 135 | 136 | # MLE ----- 137 | # If method = MLE, then fits the model(s) using flexsurvreg 138 | if (method=="mle") { 139 | # Runs the models using the helper 'runMLE' and use the helper 'format_output_fit.models 140 | res <- format_output_fit.models(lapply(distr,function(x) runMLE(x,exArgs)),method,distr,formula,data) 141 | } 142 | 143 | # INLA ----- 144 | # If method = INLA, then fits model(s) using inla 145 | if (method=="inla") { 146 | if (!isTRUE(requireNamespace("survHEinla", quietly = TRUE))) { 147 | stop("You need to install the packages 'survHEinla'. Please run in your R terminal:\n remotes::install_github('giabaio/survHEinla')") 148 | } 149 | # If survHEinla is installed but not loaded then attach the Namespace (so that all the relevant functions are available) 150 | if (isTRUE(requireNamespace("survHEinla", quietly = TRUE))) { 151 | if (!is.element("survHEinla", (.packages()))) { 152 | attachNamespace("survHEinla") 153 | } 154 | res <- format_output_fit.models(lapply(distr,function(x) survHEinla::runINLA(x,exArgs)),method,distr,formula,data) 155 | } 156 | } 157 | 158 | # HMC ----- 159 | if (method == "hmc") { 160 | if (!isTRUE(requireNamespace("survHEhmc", quietly = TRUE))) { 161 | stop("You need to install the packages 'survHEhmc'. Please run in your R terminal:\n remotes::install_github('giabaio/survHEhmc')") 162 | } 163 | # If survHEhmc is installed but not loaded then attach the Namespace (so that all the relevant functions are available) 164 | if (isTRUE(requireNamespace("survHEhmc", quietly = TRUE))) { 165 | if (!is.element("survHEhmc", (.packages()))) { 166 | attachNamespace("survHEhmc") 167 | } 168 | res <- format_output_fit.models(lapply(distr,function(x) survHEhmc::runHMC(x,exArgs)),method,distr,formula,data) 169 | } 170 | } 171 | 172 | return(res) 173 | } 174 | -------------------------------------------------------------------------------- /R/global.R: -------------------------------------------------------------------------------- 1 | #' This defines variables, mainly used with 'dplyr' and pipes that 2 | #' throw a 'no visible binding for global variable' during 3 | #' 'R CMD check' (before CRAN submission) 4 | #' 5 | #' @noRd 6 | utils::globalVariables(c( 7 | ".", 8 | "low", 9 | "upp", 10 | "time", 11 | "event", 12 | "(Intercept)", 13 | "S", 14 | "model_name", 15 | "strata", 16 | "object_name", 17 | "lower", 18 | "upper", 19 | "value", 20 | "lab", 21 | "aic", 22 | "bic", 23 | "dic", 24 | "mods_id", 25 | "xmin", 26 | "xmax", 27 | "ymin", 28 | "ymax", 29 | "where", 30 | "Death", 31 | "Alive", 32 | "disc", 33 | "Tstop", 34 | "Tstart", 35 | "from", 36 | "to", 37 | "trans", 38 | "status", 39 | "treat", 40 | "Pre-progressed", 41 | "Progressed", 42 | "Death", 43 | "l_12", 44 | "l_13", 45 | "l_11", 46 | "check", 47 | "sim_idx", 48 | "npeople", 49 | "grp_lab", 50 | "Group" 51 | )) 52 | -------------------------------------------------------------------------------- /R/make.ipd.R: -------------------------------------------------------------------------------- 1 | #' Create an individual level dataset from digitised data 2 | #' 3 | #' Piles in the simulated IPD resulting from running digitise for more than one 4 | #' treatment arm 5 | #' 6 | #' 7 | #' @param ipd_files a list including the names of the IPD files created as 8 | #' output of digitise 9 | #' @param ctr the index of the file associated with the control arm (default, 10 | #' the first file). This will be coded as 0 11 | #' @param var.labs a vector of labels for the column of the resulting data 12 | #' matrix. NB these should match the arguments to the formula specified for 13 | #' fit.models. The user can specify values. These should be 4 elements (ID, 14 | #' TIME, EVENT, ARM) 15 | #' @author Gianluca Baio 16 | #' @seealso Something will go here 17 | #' @references Something will go here 18 | #' @keywords Digitized Kaplan Meier curve 19 | #' @examples 20 | #' \dontrun{ 21 | #' # Defines the txt files to be used as inputs 22 | #' surv.inp <- system.file("extdata", "survival.txt", package = "survHE") 23 | #' nrisk.inp <- system.file("extdata", "nrisk.txt", package = "survHE") 24 | #' # Runs 'digitise' to create the relevant output files 25 | #' digitise(surv.inp, nrisk.inp, ipd_output = "IPD.txt") 26 | #' # Now uses 'make.ipd' to create the pseudo-data 27 | #' make.ipd("IPD.txt", ctr = 1, var.labs = c("time", "event", "arm")) 28 | #' } 29 | #' @export make.ipd 30 | make.ipd <- function(ipd_files,ctr=1,var.labs=c("time","event","arm")) { 31 | ## Piles in the simulated IPD resulting from running digitise for more than one treatment arm 32 | ## ipd_files = a list including the names of the IPD files created as output of digitise 33 | ## ctr = the index of the file associated with the control arm (default, the first file). 34 | ## This will be coded as 0 35 | ## var.labs = a vector of labels for the column of the resulting data matrix. NB these 36 | ## should match the arguments to the formula specified for fit.models. The 37 | ## user can specify values. These should be 3 elements (TIME, EVENT, ARM) 38 | 39 | # Identifies the number of arms (= number of IPD files) 40 | n_arms <- length(ipd_files) 41 | index <- 1:n_arms 42 | active <- index[-ctr] 43 | data <- read.table(ipd_files[[ctr]],header=TRUE,row.names=NULL) 44 | data[,"arm"] <- 0 # sets the value of "arm" to 0, for the control group 45 | arm.ind <- 1 46 | for (i in active) { 47 | tmp <- read.table(ipd_files[[index[i]]],header=TRUE,row.names=NULL) 48 | tmp[,"arm"] <- arm.ind 49 | data <- rbind(data,tmp) 50 | arm.ind <- arm.ind+1 51 | } 52 | colnames(data) <- var.labs 53 | return(data) 54 | } 55 | -------------------------------------------------------------------------------- /R/make.surv.R: -------------------------------------------------------------------------------- 1 | #' Engine for Probabilistic Sensitivity Analysis on the survival curves 2 | #' 3 | #' Creates the survival curves for the fitted model(s) 4 | #' 5 | #' @param fit the result of the call to the \code{fit.models} function, 6 | #' containing the model fitting (and other relevant information) 7 | #' @param mod the index of the model. Default value is 1, but the user can 8 | #' choose which model fit to visualise, if the call to \code{fit.models} has a vector 9 | #' argument for \code{distr} (so many models are fitted & stored in the same object) 10 | #' @param t the time vector to be used for the estimation of the survival curve 11 | #' @param newdata a list (of lists), specifying the values of the covariates 12 | #' at which the computation is performed. For example 13 | #' \code{list(list(arm=0),list(arm=1))} will create two survival curves, one 14 | #' obtained by setting the covariate \code{arm} to the value 0 and the other by 15 | #' setting it to the value 1. In line with \code{flexsurv} notation, the user 16 | #' needs to either specify the value for *all* the covariates or for none (in 17 | #' which case, \code{newdata=NULL}, which is the default). If some value is 18 | #' specified and at least one of the covariates is continuous, then a single 19 | #' survival curve will be computed in correspondence of the average values of 20 | #' all the covariates (including the factors, which in this case are expanded 21 | #' into indicators). 22 | #' @param nsim The number of simulations from the distribution of the survival 23 | #' curves. Default at \code{nsim=1}, in which case uses the point estimate for 24 | #' the relevant distributional parameters and computes the resulting survival 25 | #' curve 26 | #' @param ... Additional options 27 | #' @author Gianluca Baio 28 | #' @seealso \code{\link{fit.models}}, \code{\link{psa.plot}}, \code{\link{write.surv}} 29 | #' @template refs 30 | #' @keywords Survival models Bootstrap Probabilistic sensitivity analysis 31 | #' 32 | #' @examples 33 | #' \dontrun{ 34 | #' # Loads an example dataset from 'flexsurv' 35 | #' data(bc) 36 | #' 37 | #' # Fits the same model using the 3 inference methods 38 | #' mle <- fit.models(formula=Surv(recyrs,censrec) ~ group, data=bc, 39 | #' distr="exp", method="mle") 40 | #' p.mle <- make.surv(mle) 41 | #' psa.plot(p.mle) 42 | #' 43 | #' # Can also use the main 'plot' function to visualise the survival curves 44 | #' # and include uncertainty by using a number 'nsim' of simulations 45 | #' plot(mle, nsim=10) 46 | #' } 47 | #' 48 | #' @export make.surv 49 | #' 50 | make.surv <- function(fit, mod=1, t=NULL, newdata=NULL, nsim=1,...) { 51 | ## Creates the survival curves for the fitted model(s) 52 | # fit = the result of the call to the fit.models function, containing the model fitting (and other relevant information) 53 | # mod = the index of the model. Default value is 1, but the user can choose which model fit to visualise, 54 | # if the call to fit.models has a vector argument for distr (so many models are fitted & stored in the same object) 55 | # t = the time framework to be used for the estimation of the survival curve 56 | # newdata = a list (of lists), specifying the values of the covariates at which the computation is performed. For example 57 | # 'list(list(arm=0),list(arm=1))' will create two survival curves, one obtained by setting the covariate 'arm' 58 | # to the value 0 and the other by setting it to the value 1. In line with 'flexsurv' notation, the user needs 59 | # to either specify the value for *all* the covariates or for none (in which case, 'newdata=NULL', which is the 60 | # default). If some value is specified and at least one of the covariates is continuous, then a single survival 61 | # curve will be computed in correspondence of the average values of all the covariates (including the factors, 62 | # which in this case are expanded into indicators). The order of the variables in the list *must* be the same 63 | # as in the formula used for the model 64 | # nsim = the number of simulations from the distribution of the survival curves. Default at nsim=1, in which case 65 | # uses the point estimate for the relevant distributional parameters and computes the resulting survival curve 66 | # ... = additional options 67 | 68 | # Defines list with optional parameters 69 | exArgs <- list(...) 70 | 71 | # The Poly-Weibull/HMC model is special and needs a custom function 72 | if (fit$misc$model_name[mod] == "pow") { 73 | pwstuff <- make_surv_pw(fit=fit,mod=mod,t=t,newdata=newdata,nsim=nsim,exArgs) 74 | sim <- pwstuff$sim 75 | mat <- pwstuff$mat 76 | X <- pwstuff$X 77 | t <- pwstuff$t 78 | } else { 79 | # Any other model/method has a streamlined process using helper functions 80 | # Extracts the model object and the data from the survHE output 81 | m <- fit$models[[mod]] 82 | data <- fit$misc$data 83 | 84 | # Create a vector of times, if the user hasn't provided one, based on the observed data 85 | if (is.null(t)) { 86 | t <- sort(unique(fit$misc$km$time)) 87 | } 88 | # By default uses the mean to compute summary statistics (eg for the case when nsim=1) 89 | # but the user can specify the median, which works better for very skewed parameters 90 | # which happens for example when trying to fit a model with too many parameters, which 91 | # results in a huge uncertainty, blowing up the estimates and making even the mean survival 92 | # curve impossible to compute 93 | if (exists("summary_stat",exArgs)){ 94 | summary_stat <- exArgs$summary_stat 95 | } else {summary_stat <- "mean"} 96 | 97 | # Makes sure the distribution name(s) vector is in a usable format 98 | dist <- fit$misc$model_name[mod] 99 | 100 | # Now creates the profile of covariates for which to compute the survival curves 101 | X <- make_profile_surv(fit$misc$formula, data, newdata) 102 | 103 | # This is needed to rescale correctly the INLA models (which are fitted 104 | # on a range [0-1] for numerical stability) 105 | time_max <- max(fit$misc$km$time) 106 | 107 | # Draws a sample of nsim simulations from the distribution of the model parameters 108 | sim <- do.call(paste0("make_sim_",fit$method), 109 | args=list(m=m,t=t,X=X,nsim=nsim,newdata=newdata,dist=dist,data=data, 110 | formula=fit$misc$formula,summary_stat=summary_stat,time_max=time_max) 111 | ) 112 | # Computes the survival curves - first in matrix form with all the simulations 113 | # Needs to add more inputs for the case of hmc/rps 114 | if (fit$method=="hmc" && dist=="rps") { 115 | exArgs$data.stan <- fit$misc$data.stan[[mod]] 116 | t[t==0] <- min(0.00001,min(t[t>0])) 117 | } 118 | if (fit$method=="mle" && dist=="rps") { 119 | exArgs$knots <- fit$models[[mod]]$knots 120 | } 121 | 122 | mat <- do.call(compute_surv_curve, 123 | args=list(sim=sim,exArgs=exArgs,nsim=nsim, 124 | dist=dist,t=t,method=fit$method,X=X) 125 | ) 126 | } 127 | 128 | # Finally computes the actual survival curves, in summary forms 129 | if (nsim == 1) { 130 | # If nsim=1 then only save the point estimates of the survival curve 131 | S <- lapply(mat, function(x) { 132 | rowwise(x, time) |> 133 | summarise(S = mean(c_across(contains("S")))) |> ungroup() 134 | }) 135 | } else { 136 | # If nsim>1 then also give the lower and upper quartile of the underlying distribution 137 | # avoids no visibile binding for global variable 138 | 139 | S <- lapply(mat, function(x) { 140 | rowwise(x, time) |> 141 | summarise(S = mean(c_across(contains("S"))), 142 | low = quantile(c_across(contains("S")), 0.025), 143 | upp = quantile(c_across(contains("S")), 0.975)) |> ungroup() 144 | }) 145 | } 146 | 147 | list( 148 | S = S, 149 | sim = sim, 150 | nsim = nsim, 151 | mat = mat, 152 | des.mat = X, 153 | times = t) 154 | } -------------------------------------------------------------------------------- /R/make.transition.probs.R: -------------------------------------------------------------------------------- 1 | #' make.transition.probs 2 | #' 3 | #' Computes the transition probabilities (to be passed to a Markov model) from 4 | #' the cumulative hazard curves obtained using \code{fit.models}, using the formula 5 | #' p(t)=1-exp(H(t-k)/H(t)), where k is the Markov model cycle length (or the 6 | #' difference across two consecutive times) and t is a generic time 7 | #' 8 | #' @aliases make.transition.probs 9 | #' @param fit an object obtained as output of the call to \code{fit.models} 10 | #' @param labs a vector with labels to identify the 'profiles' ie the 11 | #' combination of covariates that have been passed onto the model formula. 12 | #' If 'NULL' (default), then figures it out from the 'survHE' object. 13 | #' @param ... additional arguments. Includes the standard inputs to the 14 | #' call to \code{make.surv}, so \code{mod} (the index of the possibly many 15 | #' models stored in the 'survHE' object), \code{t} (the vector of times 16 | #' over which to compute the survival curves), \code{newdata} (a list that 17 | #' defines the profile of covariates) and \code{nsim} (the number of 18 | #' simulations to use - default is \code{nsim}=1) 19 | #' @return A tibble 'lambda' with an indicator for the treatment arm, 20 | #' the times at which the probabilities have been computed and \code{nsim} 21 | #' columns each with a simulation of the transition probabilities for 22 | #' all the times specified by the user 23 | #' @note Something will go here 24 | #' @author Gianluca Baio 25 | #' @seealso \code{\link{make.surv}} 26 | #' @references Something will go here 27 | #' @keywords Transition probabilities Markov models 28 | #' @examples 29 | #' \dontrun{ 30 | #' # Something will go here 31 | #' } 32 | #' 33 | #' @export make.transition.probs 34 | make.transition.probs <- function(fit,labs=NULL,...) { 35 | exArgs <- list(...) 36 | 37 | # Makes default for parameters to the call to 'make.surv' (which are overwritten if the user has specified them 38 | # separately) 39 | if(exists("mod",exArgs)) {mod <- exArgs$mod} else {mod <- 1} 40 | if(exists("t",exArgs)) {t <- exArgs$t} else {t <- NULL} 41 | if(is.null(t)) { 42 | t <- sort(unique(fit$misc$km$time)) 43 | # Add an extra time=0 at the beginning. This ensures the computation can be done for all the actual times 44 | # specified by the user (because the first one has no lag and so the ratio gives NA...) 45 | #if(t[1]>0) {t=c(0,t)} 46 | } 47 | # Add an extra time=0 at the beginning. This ensures the computation can be done for all the actual times 48 | # specified by the user (because the first one has no lag and so the ratio gives NA...) 49 | t <- c(0,t) 50 | if(exists("newdata",exArgs)) {newdata <- exArgs$newdata} else {newdata <- NULL} 51 | if(exists("nsim",exArgs)) {nsim <- exArgs$nsim} else {nsim <- 1} 52 | # Now computes the simulations using 'make.surv' 53 | s <- make.surv(fit,mod=mod,t=t,newdata=newdata,nsim=nsim) 54 | 55 | # Get labels of the 'strata' 56 | strata <- lapply(1:nrow(s$des.mat),function(x){ 57 | s$des.mat %>% 58 | as_tibble() %>% 59 | select(!contains("(Intercept)")) %>% 60 | slice(x) %>% 61 | round(digits=2) %>% 62 | mutate(strata=paste0(names(.),"=",.,collapse=",")) 63 | }) %>% 64 | bind_rows(.) %>% 65 | select(strata) %>% 66 | pull(strata) 67 | 68 | # Now retrieves the transition probabilities 'lambda' applying the formula 69 | # lambda(t)=1-S(t+k)/S(t) where k is the MM cycle length and t is a generic time 70 | lambda <- s$mat %>% 71 | # First stacks together all the matrices with the simulation(s) for the survival curves 72 | bind_rows() %>% 73 | # Then creates a treatment indicator (1,2,...) & place it as the first column 74 | mutate(profile=rep(strata,each=nrow(s$mat[[1]]))) %>% select(profile,everything()) %>% 75 | # Then group by treatment and computes the cumulative hazards 76 | group_by(profile) %>% mutate(across(starts_with("S"),~-log(.))) %>% 77 | # Then computes the probabilities using 1-exp(H(t-k)-H(t)), where k is the cycle length (or difference across times) 78 | ####mutate(across(starts_with("S"),~case_when(.==0~0,TRUE~1-exp(lag(.)-.)))) %>% 79 | mutate(across(starts_with("S"), ~ 1-exp(lag(.)-.))) %>% 80 | # Then removes the first row (in each treatment) - that was just artificially added anyway... 81 | slice(-1) %>% ungroup() 82 | 83 | # And now renames the columns from S(_1,S_2,...,S_nsim) to lambda(_1,lambda_2,...,lambda_nsim) 84 | if (nsim==1) { 85 | lambda <- lambda %>% 86 | rename_with(starts_with("S"), .fn=~"lambda") 87 | } else { 88 | lambda <- lambda %>% 89 | rename_with(starts_with("S"), .fn=~paste0("lambda_",1:nsim)) 90 | } 91 | 92 | lambda 93 | } 94 | 95 | 96 | #' make_data_multi_state 97 | #' 98 | #' Takes as input an individual-level dataset including data on both 99 | #' progression and death time (**jointly**) and manipulates it using 100 | #' \code{dplyr} functions to create a full "multi-state" dataset, in 101 | #' which all the transitions are tracked. This can then be used 102 | #' to fit survival models and compute all the estimates for the 103 | #' whole set of transition probabilities 104 | #' 105 | #' @aliases make_data_multi_state 106 | #' @param data dataset containing the full ILD with information on both 107 | #' progression and death. Can be a data.frame or a tibble 108 | #' @param id The column with the individual identifier. Can be NULL (in 109 | #' which case, it will be created from scratch) 110 | #' @param prog The progression indicator: takes value 1 if the individual 111 | #' has progressed and 0 otherwise. Defaults to the column named 'prog' in 112 | #' the dataset 113 | #' @param death The death indicator: takes value 1 if the individual 114 | #' has died and 0 otherwise. Defaults to the column named 'death' in 115 | #' the dataset 116 | #' @param prog_t The progression time. Defaults to the column named 117 | #' 'prog_t' in the dataset 118 | #' @param death_t The death time. Defaults to the column named 119 | #' 'death_t' in the dataset 120 | #' @param keep A vector of strings with the names of the additional 121 | #' variables from the original dataset to keep into the multistate 122 | #' dataset. If 'NULL' (default), then keeps all 123 | #' @param ... additional arguments. 124 | #' @return A tibble containing the event history for each individual 125 | #' and with the following variables: id = Patients ID; from = Initial 126 | #' state (1=Pre-progression, 2=Progression, 3=Death); to = End state 127 | #' (1=Pre-progression, 2=Progression, 3=Death); trans = Transition ID: 128 | #' 1=Pre-progression -> Progression; 2=Pre-Progression -> Death; 129 | #' 3=Progression -> Death; Tstart = Entry time (either entry or 130 | #' progression); Tstop = Exit time (time of event or censoring time); 131 | #' status = Event indicator (1=yes, 0=censored), **for the specific 132 | #' event under consideration**; treat = Treatment indicator 133 | #' All the other original variables are appended to these, but can be 134 | #' removed 135 | #' @note Something will go here 136 | #' @author Gianluca Baio 137 | #' @seealso Something will go here 138 | #' @references Something will go here 139 | #' @keywords Transition probabilities Markov models Multistate models 140 | #' @examples 141 | #' \dontrun{ 142 | #' # Something will go here 143 | #' } 144 | #' 145 | #' @export make_data_multi_state 146 | make_data_multi_state=function(data,id="id",prog="prog",death="death",prog_t="prog_t", 147 | death_t="death_t",keep=NULL,...) { 148 | 149 | # If ID is not passed (=NULL) then create it 150 | if(is.null(id)) { 151 | data=data %>% mutate(id=row_number()) 152 | id="id" 153 | } 154 | 155 | # Uses dplyr to manipulate the original dataset and create the mstate version 156 | # NB: uses the notation '!!sym(name_variable)' to address a specific column in the dataset 157 | msmdata= 158 | # Transition Pre to Post 159 | data %>% mutate( 160 | id=!!sym(id), # patient ID 161 | from=1, # starting state 162 | to=2, # arriving state 163 | trans=1, # transition code (1 = Pre -> Progression) 164 | Tstart=0, # entry time 165 | Tstop=!!sym(prog_t), # exit time 166 | time=Tstop-Tstart, # time-to-event = Tstop-Tstart 167 | status=case_when( # censoring indicator: 168 | !!sym(prog)==1~1, # 1 if progressed; 0 otherwise 169 | TRUE~0 170 | ) 171 | ) %>% select(id,from,to,trans,Tstart,Tstop,time,status,treat,everything()) %>% 172 | bind_rows( 173 | # Transition Pre to Death 174 | data %>% mutate( 175 | id=!!sym(id), # patient ID 176 | from=1, # starting state 177 | to=3, # arriving state 178 | trans=2, # transition code (2 = Pre -> Death) 179 | Tstart=0, # entry time 180 | Tstop=!!sym(death_t), # exit time 181 | time=Tstop-Tstart, # time-to-event = Tstop-Tstart 182 | status=case_when( # censoring indicator: 183 | # 1 if died at progression; 0 otherwise 184 | (!!sym(death)==1 & !!sym(prog_t)==!!sym(death_t))~1, 185 | TRUE~0 186 | ) 187 | ) %>% select(id,from,to,trans,Tstart,Tstop,time,status,treat,everything()) 188 | ) %>% 189 | bind_rows( 190 | # Transition Post to Death 191 | data %>% filter(!!sym(prog)==1) %>% mutate( 192 | id=!!sym(id), # patient ID 193 | from=2, # starting state 194 | to=3, # arriving state 195 | trans=3, # transition code (2 = Pre -> Death) 196 | Tstart=!!sym(prog_t), # entry time 197 | Tstop=!!sym(death_t), # exit time 198 | time=Tstop-Tstart, # time-to-event = Tstop-Tstart 199 | status=case_when( # censoring indicator: 200 | !!sym(death)==1~1, # 1 if died; 0 otherwise 201 | TRUE~0 202 | ) 203 | ) %>% select(id,from,to,trans,Tstart,Tstop,time,status,treat,everything()) 204 | ) %>% arrange(id,trans) 205 | 206 | if (!is.null(keep)) { 207 | msmdata=msmdata %>% select(id,from,to,trans,Tstart,Tstop,time,status,keep) 208 | } 209 | return(msmdata) 210 | } 211 | 212 | 213 | #' three_state_mm 214 | #' 215 | #' General purpose function to run a standard three-state Markov model 216 | #' (typically used in cancer modelling). The states are typically 217 | #' 'Pre-progression', 'Progressed' and 'Death'. No backward transition 218 | #' from 'Progressed' to 'Pre-progression' is allowed and 'Death' is 219 | #' obviously an absorbing state. All other transitions are possible. 220 | #' The crucial assumption is that *individual-level data* are available 221 | #' recording an indicator and the time of progression and death for each 222 | #' individual. The function returns the full transition matrix 223 | #' 224 | #' @aliases three_state_mm 225 | #' @param m_12 A 'survHE' object (output to a call to \code{fit.models}) 226 | #' estimating the parameters of a model for the transition from 227 | #' 'Pre-progression' (state 1) to 'Progressed' (state 2). Given the 228 | #' individual level data with the complete event history (in the object 229 | #' 'data'), can be done with a call like 'x=make_data_multi_state(data)' 230 | #' and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==1),...)} 231 | #' @param m_13 A 'survHE' object (output to a call to \code{fit.models}) 232 | #' estimating the parameters of a model for the transition from 233 | #' 'Pre-progression' (state 1) to 'Death' (state 3). Given the 234 | #' individual level data with the complete event history (in the object 235 | #' 'data'), can be done with a call like 'x=make_data_multi_state(data)' 236 | #' and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==2),...)} 237 | #' @param m_23 A 'survHE' object (output to a call to \code{fit.models}) 238 | #' estimating the parameters of a model for the transition from 239 | #' 'Progressed' (state 2) to 'Death' (state 3). Given the 240 | #' individual level data with the complete event history (in the object 241 | #' 'data'), can be done with a call like 'x=make_data_multi_state(data)' 242 | #' and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==3),...)} 243 | #' @param nsim The number of simulations for the model parameters that are 244 | #' used to compute the survival curves. Defaults to \code{nsim}=1, 245 | #' which simply creates one survival curve for each treatment arm. 246 | #' @param start A vector of initial state occupancy. By default assumes 1000 247 | #' individuals, all initially allocated to 'Pre-progression' 248 | #' @param basecase Should the base case be computed as well, based on the 249 | #' point estimate of the underlying model parameters? (Default=FALSE) 250 | #' @param ... additional arguments. 251 | #' @return A list including the state occupancy simulations in an object 'm'. 252 | #' This is a tibble with the number of individuals in each of the 3 states 253 | #' at each of the times specified by the user. If \code{nsim}>1, then the tibble 254 | #' also contains a simulation index to keep track of that. The list also 255 | #' includes the computation time to obtain the state occupancy tibble (in the 256 | #' object 'running_time'). If \code{basecase==TRUE}, then the function also 257 | #' computes the "base case scenario" (based on 1 simulation from of the 258 | #' underlying survival curves, i.e. the point estimate of the model parameters) 259 | #' and stores it in the object 'base_case' 260 | #' @note Something will go here 261 | #' @author Gianluca Baio 262 | #' @seealso make.transition.probs make_data_multi_state 263 | #' @references Something will go here 264 | #' @keywords Transition probabilities Markov models Three-state cancer model 265 | #' @examples 266 | #' \dontrun{ 267 | #' # Something will go here 268 | #' } 269 | #' 270 | #' @export three_state_mm 271 | three_state_mm = function(m_12,m_13,m_23,nsim=1,start=c(1000,0,0),basecase=FALSE,...){ 272 | 273 | exArgs <- list(...) 274 | 275 | # Initialises the base_case object 276 | base_case=NULL 277 | 278 | # Makes default for parameters to the call to 'make.surv' (which are overwritten if the user has specified them 279 | # separately) 280 | if(exists("mod",exArgs)) {mod=exArgs$mod} else {mod=1} 281 | if(exists("t",exArgs)) {t=exArgs$t} else {t=NULL} 282 | if(is.null(t)) { 283 | t <- sort(unique(m_12$misc$km$time)) 284 | } 285 | if(exists("newdata",exArgs)) {newdata=exArgs$newdata} else {newdata=NULL} 286 | 287 | # Computes the transition probabilities for the transitions that are directly 288 | # identifiable from the observed data 289 | lambda_12=make.transition.probs(m_12,mod=mod,t=t,newdata=newdata,nsim=nsim) 290 | lambda_13=make.transition.probs(m_13,mod=mod,t=t,newdata=newdata,nsim=nsim) 291 | lambda_23=make.transition.probs(m_23,mod=mod,t=t,newdata=newdata,nsim=nsim) 292 | 293 | # Derives lambda_11 by subtraction (as all transition probs out of state 1 must sum to 1) 294 | # NB: Without further checks, it is possible that (lambda_12+lambda_13)>1 and so lambda_11<0 295 | # This should be carefully checked! 296 | lambda_11=(lambda_12 %>% select(starts_with("lambda")) + lambda_13 %>% select(starts_with("lambda"))) %>% 297 | as_tibble() %>% bind_cols(lambda_12 %>% select(profile,time)) %>% select(profile,time,everything()) %>% 298 | mutate(across(starts_with("lambda"),~1-.)) 299 | # Derives lambda_22 by subtraction (as all transition probs out of state 2 must sum to 1) 300 | lambda_22=(1-lambda_23 %>% select(starts_with("lambda"))) %>% as_tibble() %>% 301 | bind_cols(lambda_23 %>% select(profile,time)) %>% select(profile,time,everything()) 302 | 303 | # Computes the state occupancy for all the simulations 304 | tic=Sys.time() 305 | m=make_state_occupancy(nsim=nsim,lambda_11,lambda_12,lambda_13,lambda_22,lambda_23,start) 306 | toc=Sys.time() 307 | running_time=toc-tic 308 | 309 | if(basecase) { 310 | # Makes base-case Markov model (by considering the point estimate of the model parameters) 311 | # Computes the transition probabilities for the transitions that are directly 312 | # identifiable from the observed data 313 | lambda_12=make.transition.probs(m_12,mod=mod,t=t,newdata=newdata,nsim=1) 314 | lambda_13=make.transition.probs(m_13,mod=mod,t=t,newdata=newdata,nsim=1) 315 | lambda_23=make.transition.probs(m_23,mod=mod,t=t,newdata=newdata,nsim=1) 316 | 317 | # Derives lambda_11 by subtraction (as all transition probs out of state 1 must sum to 1) 318 | lambda_11=(lambda_12 %>% select(starts_with("lambda"))+lambda_13 %>% select(starts_with("lambda"))) %>% 319 | as_tibble() %>% bind_cols(lambda_12 %>% select(profile,time)) %>% select(profile,time,everything()) %>% 320 | mutate(across(starts_with("lambda"),~1-.)) 321 | # Derives lambda_22 by subtraction (as all transition probs out of state 2 must sum to 1) 322 | lambda_22=(1-lambda_23 %>% select(starts_with("lambda"))) %>% as_tibble() %>% 323 | bind_cols(lambda_23 %>% select(profile,time)) %>% select(profile,time,everything()) 324 | 325 | base_case=make_state_occupancy(nsim=1,lambda_11,lambda_12,lambda_13,lambda_22,lambda_23,start) 326 | } 327 | 328 | 329 | # Outputs of the function 330 | list(m=m,running_time=running_time,base_case=base_case) 331 | } 332 | 333 | 334 | #' make_state_occupancy 335 | #' 336 | #' Utility function to compute the state occupancy in a three state MM 337 | #' 338 | #' @param nsim The number of simulations for the model parameters that are 339 | #' used to compute the survival curves. 340 | #' @param lambda_11 the tibble containing the transition probabilities for 341 | #' the transition "Pre-progression -> Pre-progression" 342 | #' @param lambda_12 the tibble containing the transition probabilities for 343 | #' the transition "Pre-progression -> Progression" 344 | #' @param lambda_13 the tibble containing the transition probabilities for 345 | #' the transition "Pre-progression -> Death" 346 | #' @param lambda_22 the tibble containing the transition probabilities for 347 | #' the transition "Progression -> Progression" 348 | #' @param lambda_23 the tibble containing the transition probabilities for 349 | #' the transition "Progression -> Death" 350 | #' @return A list including the state occupancy simulations in an object 'm'. 351 | #' This is a tibble with the number of individuals in each of the 3 states 352 | #' at each of the times specified by the user. If \code{nsim}>1, then the tibble 353 | #' also contains a simulation index to keep track of that. 354 | #' @note Something will go here 355 | #' @author Gianluca Baio 356 | #' @seealso make.transition.probs make_data_multi_state 357 | #' @references Something will go here 358 | #' @keywords Transition probabilities Markov models Three-state cancer model 359 | #' @examples 360 | #' \dontrun{ 361 | #' # Something will go here 362 | #' } 363 | #' 364 | #' @noRd 365 | make_state_occupancy=function(nsim,lambda_11,lambda_12,lambda_13,lambda_22,lambda_23,start) { 366 | 367 | m=list() 368 | # Initialises the lists 369 | m=lapply(1:nsim,function(i) { 370 | # Creates the tibbles (one per each of the nsim simulations) 371 | m[[i]]=tibble(profile=lambda_11$profile,time=lambda_11$time,`Pre-progressed`=NA,Progressed=NA,Death=NA) 372 | # Now adds in the relevant transition probabilities in the correct rows 373 | m[[i]]=m[[i]] %>% left_join(lambda_11 %>% select(profile,time,starts_with("lambda")[[i]]) %>% rename("lambda_11"=starts_with("lambda")), by=c("profile","time")) %>% 374 | left_join(lambda_12 %>% select(profile,time,starts_with("lambda")[[i]]) %>% rename("lambda_12"=starts_with("lambda")), by=c("profile","time")) %>% 375 | left_join(lambda_13 %>% select(profile,time,starts_with("lambda")[[i]]) %>% rename("lambda_13"=starts_with("lambda")), by=c("profile","time")) %>% 376 | left_join(lambda_22 %>% select(profile,time,starts_with("lambda")[[i]]) %>% rename("lambda_22"=starts_with("lambda")), by=c("profile","time")) %>% 377 | left_join(lambda_23 %>% select(profile,time,starts_with("lambda")[[i]]) %>% rename("lambda_23"=starts_with("lambda")), by=c("profile","time")) 378 | # Initialise the tibbles with the start values 379 | m[[i]]=m[[i]] %>% group_by(profile) %>% mutate( 380 | `Pre-progressed`=replace(`Pre-progressed`,row_number()==1,start[1]), 381 | Progressed=replace(Progressed,row_number()==1,start[2]), 382 | Death=replace(Death,row_number()==1,start[3]), 383 | ) %>% ungroup() 384 | 385 | # Re-compute the probabilities to *always* sum up to 1 (this gets rid of weird cases where two 386 | # probs sum to something bigger than 1, which generates negative values) 387 | m[[i]]=m[[i]] %>% mutate(check=(lambda_12+lambda_13)>1) %>% mutate( 388 | l_12=case_when( 389 | check==TRUE~lambda_12/(lambda_12+lambda_13), 390 | check==FALSE~lambda_12 391 | ), 392 | l_13=case_when( 393 | check==TRUE~lambda_13/(lambda_12+lambda_13), 394 | check==FALSE~lambda_13 395 | ) 396 | ) %>% mutate(l_11=1-(l_12+l_13)) %>% mutate(lambda_11=l_11,lambda_12=l_12,lambda_13=l_13) %>% 397 | select(-c(l_11,l_12,l_13,check)) 398 | }) 399 | 400 | # Convert the list into a massive tibble with a simulation index 401 | m=m %>% bind_rows() %>% mutate(sim_idx=rep(1:nsim,each=nrow(m[[1]]))) 402 | 403 | # Loops over times to compute the state occupancy 404 | for (j in 2:nrow(m)) { 405 | # This is a simple trick to restart the computation when the treatment indicator changes 406 | if (m$profile[j]==m$profile[j-1]) { 407 | m$`Pre-progressed`[j]=sum(c(m$`Pre-progressed`[j-1]*m$lambda_11[j]),na.rm=T) 408 | m$Progressed[j]=sum(c(m$`Pre-progressed`[j-1]*m$lambda_12[j], m$Progressed[j-1]*m$lambda_22[j]),na.rm=T) 409 | m$Death[j]=sum(c(m$`Pre-progressed`[j-1]*m$lambda_13[j], m$Progressed[j-1]*m$lambda_23[j], m$Death[j-1]),na.rm=T) 410 | } 411 | } 412 | m=m %>% select(profile,time,`Pre-progressed`,Progressed,Death,sim_idx,everything()) 413 | # Need to set to 0 all negative values! 414 | m=m %>% mutate( 415 | `Pre-progressed`=if_else(`Pre-progressed`<0,0,`Pre-progressed`), 416 | Progressed=if_else(Progressed<0,0,Progressed), 417 | Death=if_else(Death<0,0,Death) 418 | ) 419 | 420 | return(m) 421 | } 422 | 423 | 424 | #' Markov trace 425 | #' 426 | #' Plots the Markov Trace from an object generated using \code{three_state_mm} 427 | #' 428 | #' @aliases markov_trace 429 | #' @param mm The output of a call to \code{three_state_mm} 430 | #' @param interventions A vector of labels for the interventions 431 | #' @param ... additional arguments. 432 | #' @return Plot 433 | #' @note Something will go here 434 | #' @author Gianluca Baio 435 | #' @seealso make.surv, three_state_mm 436 | #' @references Something will go here 437 | #' @keywords Transition probabilities Markov models Markov trace 438 | #' @examples 439 | #' \dontrun{ 440 | #' # Something will go here 441 | #' } 442 | #' 443 | #' @export markov_trace 444 | markov_trace <- function(mm, interventions=NULL,...) { 445 | # First reshape the data 446 | if(!is.null(interventions)) { 447 | # Figures out how many observations there are in each treatment & replaces the values passed 448 | # as arguments in 'interventions' 449 | mm$m <- mm$m %>% 450 | mutate(profile = rep(interventions,each=mm$m %>% 451 | count(profile) %>% 452 | slice(1) %>% 453 | pull(n))) 454 | } 455 | 456 | pl=mm$m |> group_by(profile,time) |> 457 | summarise( 458 | `Pre-progressed`=mean(`Pre-progressed`),Progressed=mean(Progressed), 459 | Death=mean(Death) 460 | ) |> 461 | ungroup() 462 | pl <- pl |> 463 | select(profile,time,`Pre-progressed`) %>% 464 | rename(npeople=`Pre-progressed`) %>% 465 | mutate(group="Pre-progressed") %>% 466 | bind_rows(pl %>% select(profile,time,`Progressed`) %>% 467 | rename(npeople=Progressed) %>% 468 | mutate(group="Progressed")) %>% 469 | bind_rows(pl %>% select(profile,time,Death) %>% 470 | rename(npeople=Death) %>% 471 | mutate(group="Death")) %>% 472 | # Create a numeric/factor group label to help manage the appearance of the graph 473 | mutate( 474 | grp_lab=as.factor(case_when( 475 | group=="Pre-progressed"~3, 476 | group=="Progressed"~2, 477 | TRUE~1 478 | )) 479 | ) %>% 480 | ggplot(aes(x=time, y=npeople, fill=grp_lab)) + 481 | geom_bar(position="stack",stat="identity") + 482 | labs(x="Cycle",y="Number of people",title="Markov trace",fill="State") + 483 | facet_wrap(~profile) + 484 | theme_bw() + 485 | # Add control to the legend label 486 | scale_fill_discrete(breaks=c(3,2,1),labels=c("Pre-progressed","Progressed","Death")) 487 | 488 | return(pl) 489 | } 490 | 491 | 492 | ### TO DO --- THIS NICELY COMPUTES THE MEAN SURVIVAL TIME OVER THE MM SIMULATIONS BUT CURRENTLY ONLY 493 | ### DOES IT FOR WHEN nsim=1!!! 494 | compute_mean_time=function(x,disc.rate=0) { 495 | mean_time=x %>% mutate(Alive=(1000-Death)/1000,disc=1/(1+disc.rate)^time) %>% group_by(profile) %>% 496 | mutate(dt=time-lag(time)) %>% summarise(mean_survival_time=sum(Alive*dt*disc,na.rm=T)) %>% ungroup() 497 | 498 | return(mean_time) 499 | } 500 | 501 | -------------------------------------------------------------------------------- /R/model.fit.plot.R: -------------------------------------------------------------------------------- 1 | #' Graphical representation of the measures of model fitting based on 2 | #' Information Criteria 3 | #' 4 | #' Plots a summary of the model fit for all the models fitted 5 | #' 6 | #' Something will go here 7 | #' 8 | #' @param ... Optional inputs. Must include at least one \code{survHE} object. 9 | #' @param type should the AIC, the BIC or the DIC plotted? (values = \code{"aic"}, 10 | #' \code{"bic"} or \code{"dic"}) 11 | #' @param scale If \code{scale='absolute'} (default), then plot the absolute value 12 | #' of the *IC. If \code{scale='relative'} then plot a rescaled version taking 13 | #' the percentage increase in the *IC in comparison with the best-fitting model 14 | #' @param stacked Should the bars be stacked and grouped by survHE object? (default=F) 15 | #' @return A plot with the relevant model fitting statistics 16 | #' @author Gianluca Baio 17 | #' @seealso \code{fit.models} 18 | #' @template refs 19 | #' @keywords Model fitting Parametric survival models 20 | #' @examples 21 | #' \dontrun{ 22 | #' data(bc) 23 | #' 24 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 25 | #' distr=c("exp","wei","lno"),method="mle") 26 | #' model.fit.plot(mle) 27 | #' } 28 | #' 29 | #' @export model.fit.plot 30 | model.fit.plot <- function(...,type="aic",scale="absolute",stacked=FALSE) { 31 | ## Plots a summary of the model fit for all the models 32 | ## Can also combine several survHE objects each containing the fit for one model 33 | 34 | exArgs=list(...) 35 | 36 | # Finds out whether there are objects with no name (if so, they will be 'survHE' objects!) 37 | # If there are any, then needs to rename them to make the rest of the function work 38 | if(length(names(exArgs))==0) { 39 | # This is the case where the only argument(s) is/are unnamed 'survHE' object(s) 40 | names(exArgs)=paste0("Object",1:length(exArgs)) 41 | } 42 | if(length(which(names(exArgs)==""))>0){ 43 | names(exArgs)[which(names(exArgs)=="")] = paste0("Object",1:length(which(names(exArgs)==""))) 44 | } 45 | # Extracts the 'survHE' objects from the list 'exArgs'. If there are none, then stop with an error message! 46 | w <- which(unlist(lapply(1:length(exArgs),function(i) class(exArgs[[i]])))=="survHE") 47 | if(length(w)==0){ 48 | stop("Please give at least one 'survHE' object, generated by a call to 'fit.models(...)") 49 | } else { 50 | survHE_objs=lapply(1:length(w),function(i) exArgs[[w[i]]]) 51 | } 52 | names(survHE_objs)=names(exArgs)[w] 53 | 54 | # What model should be used from the 'survHE' objects? 55 | if (!exists("mods",exArgs)) { 56 | mods=1:sum(unlist(lapply(survHE_objs,function(x) length(x$models)))) 57 | } else {mods=exArgs$mods} 58 | 59 | # Maps the arguments for 'type' to relevant strings 60 | if(type %in% c("aic","AIC","a","A")){type="AIC"} 61 | if(type %in% c("bic","BIC","b","B")){type="BIC"} 62 | if(type %in% c("dic","DIC","d","D")){type="DIC"} 63 | 64 | # Creates the dataset with the model fitting statistics for all the selected models 65 | toplot=lapply(1:length(survHE_objs),function(x) survHE_objs[[x]]$model.fitting %>% bind_rows %>% 66 | mutate(object_name=as.factor(names(survHE_objs)[x]), 67 | model_name=names(survHE_objs[[x]]$models))) %>% 68 | bind_rows %>% mutate(lab=paste0(model_name,":",object_name)) %>% select(object_name,model_name,lab,everything()) %>% 69 | slice(mods) 70 | 71 | # Can make the bars stacked (looks actually nice...) 72 | if(stacked==TRUE) { 73 | if(type=="AIC") { 74 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$aic)))} 75 | mfp=ggplot(data=toplot,aes(x=model_name,y=aic,fill=object_name)) + 76 | geom_bar(stat="identity",position=position_dodge()) + 77 | geom_text(aes(x=model_name,y=aic,label=aic %>% round(digits=1.5)), hjust=1.05, 78 | color="white", size=5.5,position = position_dodge(0.9)) + coord_flip(ylim=yl) 79 | } 80 | if(type=="BIC") { 81 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$bic)))} 82 | mfp=ggplot(data=toplot,aes(x=model_name,y=bic,fill=object_name)) + 83 | geom_bar(stat="identity",position=position_dodge()) + 84 | geom_text(aes(x=model_name,y=bic,label=bic %>% round(digits=1.5)), hjust=1.05, 85 | color="white", size=5.5,position = position_dodge(0.9)) + coord_flip(ylim=yl) 86 | } 87 | if(type=="DIC") { 88 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$dic,na.rm=TRUE)))} 89 | mfp=ggplot(data=toplot,aes(x=model_name,y=dic,fill=object_name)) + 90 | geom_bar(stat="identity",position=position_dodge()) + 91 | geom_text(aes(x=model_name,y=dic,label=dic %>% round(digits=1.5)), hjust=1.05, 92 | color="white", size=5.5,position = position_dodge(0.9)) + coord_flip(ylim=yl) 93 | } 94 | mfp=mfp+ 95 | theme_bw() + 96 | theme(axis.text.x = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5), 97 | axis.text.y = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5), 98 | axis.title.x = element_text(color="black",size=14,angle=0,hjust=.5,vjust=.5), 99 | axis.title.y = element_text(color="black",size=14,angle=90,hjust=.5,vjust=.5)) + 100 | theme(axis.line = element_line(colour = "black"), 101 | panel.background = element_blank(), 102 | panel.border = element_blank(), 103 | plot.title = element_text(size=18, face="bold")) + 104 | labs(y=toupper(type),x="",title=paste0("Model comparison based on ",toupper(type)), 105 | fill="survHE object") + 106 | scale_fill_brewer(palette="Paired") + 107 | theme(legend.position="bottom") 108 | 109 | # Optional arguments 110 | # Manual colours should be a vector with lenght equal to the number of objects 111 | if(exists("col",exArgs)){ 112 | mfp=mfp+scale_fill_manual(values=exArgs$col) 113 | } 114 | if(exists("colour",exArgs)){ 115 | mfp=mfp+scale_fill_manual(values=exArgs$colour) 116 | } 117 | if(exists("color",exArgs)){ 118 | mfp=mfp+scale_fill_manual(values=exArgs$color) 119 | } 120 | # Can modify the title of the legend 121 | if(exists("name_legend",exArgs)){ 122 | mfp=mfp+labs(fill=exArgs$name_legend) 123 | } 124 | # Can change the palette too 125 | # plot + scale_fill_brewer(palette=...) see 'help(scale_fill_brewer)' for possible options 126 | } 127 | 128 | if(stacked==FALSE) { 129 | # Can choose the colour with which to plot the bars. If nothing specified, then select default ('steelblue'). 130 | # Otherwise, the user can specify a vector of colour with as many as there are bars. Can use interchangeably 131 | # the strings 'colour', 'color' or 'col' 132 | if(exists("colour",exArgs)){col=exArgs$colour} else {col="steelblue"} 133 | if(exists("color",exArgs)){col=exArgs$color} else {col="steelblue"} 134 | if(exists("col",exArgs)){col=exArgs$col} else {col="steelblue"} 135 | 136 | # Finally plots the bar-chart 137 | if(nlevels(toplot$object_name)==1){x=toplot$model_name} else {x=toplot$lab} 138 | mfp=ggplot(data=toplot) 139 | if(type=="AIC") { 140 | if(scale=="absolute" | scale=="abs") { 141 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$aic)))} 142 | mfp=mfp+geom_bar(mapping=aes(x=x,y=aic),stat="identity",fill=col) + 143 | geom_text(aes(x=x,y=aic,label=aic %>% round(digits=1.5)), hjust=1.05, color="white", size=5.5) + 144 | labs(y=toupper(type),x="",title=paste0("Model comparison based on ",toupper(type)), 145 | color=ifelse(length(mods)==1,"Model","Models") 146 | ) + coord_flip(ylim=yl) 147 | } 148 | if (scale=="rel" | scale=="relative") { 149 | mfp=mfp+geom_bar(mapping=aes(x=x,y=100*(aic-min(aic))/min(aic)),stat="identity",fill=col) + 150 | geom_text(aes(x=x,y=100*(aic-min(aic))/min(aic),label=(100*(aic-min(aic))/min(aic)) %>% round(digits=1.5)), 151 | hjust=-.05, color="black", size=5.5) + 152 | labs(y=paste0("Percentage increase in ",toupper(type)),x="",title=paste0("Model comparison based on ",toupper(type)), 153 | color=ifelse(length(mods)==1,"Model","Models") 154 | ) + coord_flip() 155 | } 156 | } 157 | if(type=="BIC") { 158 | if(scale=="absolute" | scale=="abs") { 159 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$bic)))} 160 | mfp=mfp+geom_bar(mapping=aes(x=x,y=bic),stat="identity",fill=col) + 161 | geom_text(aes(x=x,y=bic,label=bic %>% round(digits=1.5)), hjust=1.05, color="white", size=5.5) + 162 | labs(y=toupper(type),x="",title=paste0("Model comparison based on ",toupper(type)), 163 | color=ifelse(length(mods)==1,"Model","Models") 164 | ) + coord_flip(ylim=yl) 165 | } 166 | if(scale=="rel" | scale=="relative") { 167 | mfp=mfp+geom_bar(mapping=aes(x=x,y=100*(bic-min(bic))/min(bic)),stat="identity",fill=col) + 168 | geom_text(aes(x=x,y=100*(bic-min(bic))/min(bic),label=(100*(bic-min(bic))/min(bic)) %>% round(digits=1.5)), 169 | hjust=-.05, color="black", size=5.5) + 170 | labs(y=paste0("Percentage increase in ",toupper(type)),x="",title=paste0("Model comparison based on ",toupper(type)), 171 | color=ifelse(length(mods)==1,"Model","Models") 172 | ) + coord_flip() 173 | } 174 | } 175 | if(type=="DIC") { 176 | if(scale=="absolute" | scale=="abs") { 177 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$dic,na.rm=TRUE)))} 178 | mfp=mfp+geom_bar(mapping=aes(x=x,y=dic),stat="identity",fill=col) + 179 | geom_text(aes(x=x,y=dic,label=dic %>% round(digits=1.5)), hjust=1.05, color="white", size=5.5) + 180 | labs(y=toupper(type),x="",title=paste0("Model comparison based on ",toupper(type)), 181 | color=ifelse(length(mods)==1,"Model","Models") 182 | ) + coord_flip(ylim=yl) 183 | } 184 | if(scale=="rel" | scale=="relative") { 185 | mfp=mfp+geom_bar(mapping=aes(x=x,y=100*(dic-min(dic))/min(dic)),stat="identity",fill=col) + 186 | geom_text(aes(x=x,y=100*(dic-min(dic))/min(dic),label=(100*(dic-min(dic))/min(dic)) %>% round(digits=1.5)), 187 | hjust=-.05, color="black", size=5.5) + 188 | labs(y=paste0("Percentage increase in ",toupper(type)),x="",title=paste0("Model comparison based on ",toupper(type)), 189 | color=ifelse(length(mods)==1,"Model","Models") 190 | ) + coord_flip() 191 | } 192 | } 193 | mfp=mfp + 194 | theme_bw() + 195 | theme(axis.text.x = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5), 196 | axis.text.y = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5), 197 | axis.title.x = element_text(color="black",size=14,angle=0,hjust=.5,vjust=.5), 198 | axis.title.y = element_text(color="black",size=14,angle=90,hjust=.5,vjust=.5)) + 199 | theme(axis.line = element_line(colour = "black"), 200 | panel.background = element_blank(), 201 | panel.border = element_blank(), 202 | plot.title = element_text(size=18, face="bold")) 203 | 204 | # Optional arguments 205 | if(exists("main",exArgs)){ 206 | mfp=mfp+labs(title=exArgs$main) 207 | } 208 | if(exists("models",exArgs)){ 209 | mfp=mfp+scale_x_discrete(labels=exArgs$models) 210 | } 211 | } 212 | 213 | # Renders the graph 214 | mfp 215 | } -------------------------------------------------------------------------------- /R/plot.survHE.R: -------------------------------------------------------------------------------- 1 | #' Plot survival curves for the models fitted using \code{fit.models} 2 | #' 3 | #' Plots the results of model fit. 4 | #' 5 | #' @param ... Must include at least one result object saved as 6 | #' the call to the \code{fit.models} function. Nay include other 7 | #' optional parameters. These include whether the KM curve should be 8 | #' added \code{add.km} and whether the user specifies a profile of covariates 9 | #' (in the list \code{newdata}). Other possibilities are additional 10 | #' (mainly graphical) options. These are: 11 | #' \itemize{ 12 | #' \item \code{xlab} = a string with the label for the x-axis (default = "time") 13 | #' \item \code{ylab} = a string with the label for the y-axis (default = "Survival") 14 | #' \item \code{lab.profile} = a (vector of) string(s) indicating the labels associated with the strata defining the different 15 | #' survival curves to plot. Default to the value used by the Kaplan Meier 16 | #' estimate given in \code{fit.models}. 17 | #' \item \code{newdata} = a list (of lists) providing the values for the relevant covariates If NULL, then will use 18 | #' the mean values for the covariates if at least one is a continuous variable, 19 | #' or the combination of the categorical covariates. 20 | #' \item \code{xlim} = a vector determining the limits for the x-axis 21 | #' \item \code{colors} = a vector of characters defining the colours in which to plot the different survival curves 22 | #' \item \code{what} = a string indicating whether the survival, hazard or 23 | #' cumulative hazard curve should be plotted. Defaults to 'survival', but the 24 | #' other two options can be specified as 'hazard' or 'cumhazard' 25 | #' \item \code{lab.profile} = a vector of characters defining the names of the models fitted 26 | #' \item \code{add.km} = TRUE (whether to also add the Kaplan Meier estimates of the data) 27 | #' \item \code{annotate} = FALSE (whether to also add text to highlight the observed vs 28 | #' extrapolated data) 29 | #' \item \code{legend.position} = a vector of proportions to place the legend. Default 30 | #' to 'c(.75,.9)', which means 75% across the x-axis and 90% across the y-axis 31 | #' \item \code{legend.title} = suitable instructions to format the title of the legend; 32 | #' defaults to 'element_text(size=15,face="bold")' but there may be other 33 | #' arguments that can be added (using 'ggplot' facilities) 34 | #' \item \code{legend.text} = suitable instructions to format the text of the legend; 35 | #' defaults to 'element_text(colour="black", size=14, face="plain")' but there 36 | #' may be other arguments that can be added (using 'ggplot' facilities) 37 | #' } 38 | #' @author Gianluca Baio 39 | #' @seealso \code{\link{fit.models}}, \code{\link{write.surv}} 40 | #' @template refs 41 | #' @keywords Parametric survival models 42 | #' @examples 43 | #' \dontrun{ 44 | #' data(bc) 45 | #' 46 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 47 | #' distr="exp",method="mle") 48 | #' inla = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 49 | #' distr="exp",method="inla") 50 | #' plot(MLE=mle,INLA=inla) 51 | #' } 52 | #' 53 | #' @export plot.survHE 54 | plot.survHE <- function(...) { 55 | 56 | # Collects all the extra arguments 57 | exArgs <- list(...) 58 | 59 | # Finds out whether there are objects with no name (if so, they will be 'survHE' objects!) 60 | # If there are any, then needs to rename them to make the rest of the function work 61 | if(length(names(exArgs))==0) { 62 | # This is the case where the only argument(s) is/are unnamed 'survHE' object(s) 63 | names(exArgs) <- paste0("Object",1:length(exArgs)) 64 | } 65 | if(length(which(names(exArgs)==""))>0){ 66 | names(exArgs)[which(names(exArgs)=="")] <- paste0("Object",1:length(which(names(exArgs)==""))) 67 | } 68 | 69 | # The default is to go with the 'ggplot' version of the graph. 70 | if (exists("graph",exArgs)) { 71 | graph <- exArgs$graph 72 | } else { 73 | graph <- "ggplot"} 74 | 75 | # If so, then call the function 'plot_ggplot_survHE 76 | if(graph=="ggplot") { 77 | return(plot_ggplot_survHE(exArgs)) 78 | } 79 | 80 | # If the user selects 'base' (only for back-compatibility), then runs the old code 81 | ### NB: Do I want this? (probably not...) 82 | if(graph=="base") { 83 | do.call(plot_base_survHE,exArgs) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /R/plot_tranformed_km.R: -------------------------------------------------------------------------------- 1 | 2 | #' Plot to assess suitability of parametric model 3 | #' 4 | #' Perform an exploratory investigation for linearity of 5 | #' transformed survival models. 6 | #' 7 | #' For the Weibull, twice taking logs of the survivor function 8 | #' 9 | #' \deqn{log(-log S(t)) = log \lambda + \gamma log t} 10 | #' 11 | #' A plot of \eqn{log(-log S(t))} against \eqn{log(t)} would give an approximately 12 | #' straight line if the Weibull assumption is reasonable. 13 | #' The plot could also be used to give a rough estimate of the parameters. 14 | #' 15 | #' Similarly, for the log-logistic distribution 16 | #' 17 | #' \deqn{logS(t)/(1 - S(t)) = \theta - \kappa log t} 18 | #' 19 | #' For the log-normal distribution 20 | #' 21 | #' \deqn{\Phi^{-1} (1 - S(t)) = (log t - \mu) / \sigma} 22 | #' 23 | #' We can also check the assumption made with using the Cox regression model 24 | #' of proportional hazards by inspecting the log-cumulative hazard plot. 25 | #' 26 | #' \deqn{log H_i(t) = \beta x_i + log H_0(t)} 27 | #' 28 | #' The transformed curves for different values of the explanatory variables 29 | #' will be parallel if PH holds. 30 | #' 31 | #' @param fit An object of class survHE. 32 | #' @param mod Index or name of a model in fit. Defaults to 1. 33 | #' @param add_legend If \code{TRUE}, labels assumptions. Defaults to \code{FALSE}. 34 | #' @param graph Type of plot: base or ggplot2. 35 | #' @param \dots Further arguments, passed on to plot. 36 | #' @return Diagnostic plot 37 | #' @author William Browne, Nathan Green 38 | #' @references Collett (2015) Modelling Survival Data in Medical Research, CRC Press 39 | #' @keywords survival hplot 40 | #' @export 41 | #' @examples 42 | #' 43 | #' data(bc) 44 | #' form <- formula("Surv(recyrs, censrec) ~ group") 45 | #' 46 | #' # exponential distribution 47 | #' fit_exp <- fit.models(form, data = bc, 48 | #' distr = "exp", method = "mle") 49 | #' plot_transformed_km(fit_exp) 50 | #' plot_transformed_km(fit_exp, graph = "ggplot2") 51 | #' 52 | #' # weibull distribution 53 | #' fit_wei <- fit.models(form, data = bc, 54 | #' distr = "weibull", method = "mle") 55 | #' plot_transformed_km(fit_wei) 56 | #' plot_transformed_km(fit_wei, graph = "ggplot2") 57 | #' 58 | #' # loglogistic distribution 59 | #' fit_llog <- fit.models(form, data = bc, 60 | #' distr = "loglogistic", method = "mle") 61 | #' plot_transformed_km(fit_llog) 62 | #' plot_transformed_km(fit_llog, graph = "ggplot2") 63 | #' 64 | #' # lognormal distribution 65 | #' fit_lnorm <- fit.models(form, data = bc, 66 | #' distr = "lognormal", method = "mle") 67 | #' plot_transformed_km(fit_lnorm) 68 | #' plot_transformed_km(fit_lnorm, graph = "ggplot2") 69 | #' 70 | #' ## for only one group 71 | #' form <- formula("Surv(recyrs, censrec) ~ 1") 72 | #' 73 | #' fit_exp <- fit.models(form, data = bc, 74 | #' distr = "exp", method = "mle") 75 | #' plot_transformed_km(fit_exp) 76 | #' plot_transformed_km(fit_exp, graph = "ggplot2") 77 | #' 78 | plot_transformed_km <- function(fit, mod = 1, add_legend = FALSE, 79 | graph = "base", ...) { 80 | 81 | dots <- list(...) 82 | 83 | graph <- match.arg(graph, c("base", "ggplot2")) 84 | 85 | if (length(mod) != 1) 86 | stop("Please provide at most one model index.") 87 | 88 | if (is.numeric(mod) && !mod <= length(fit$models)) 89 | stop("More model names provided than available in list of model fits provided.") 90 | 91 | if (is.character(mod) && !mod %in% names(fit$models)) 92 | stop("Model name not available in list of model fits provided.") 93 | 94 | dist <- get_distribution(fit, mod) 95 | 96 | distn_names <- list( 97 | "exp" = c("exp", "exponential"), 98 | "weibull" = c("weibull", "weibull.quiet", "weibullaf", "weibullph"), 99 | "loglogistic" = c("llogis", "loglogistic"), 100 | "lognormal" = c("lognormal", "lnorm"), 101 | "gompertz" = "gompertz") 102 | 103 | if (!dist %in% unname(unlist(distn_names))) 104 | stop("Distribution not available.") 105 | 106 | fit_km <- fit$misc$km 107 | 108 | n_strata <- length(fit_km$strata) 109 | 110 | if (n_strata == 0 || n_strata == 1) { 111 | fit_km$strata <- c("group" = length(fit_km$time)) 112 | } 113 | 114 | model_strata <- rep(x = names(fit_km$strata), 115 | times = fit_km$strata) 116 | 117 | times <- split(fit_km$time, model_strata) 118 | survs <- split(fit_km$surv, model_strata) 119 | 120 | params <- list() 121 | 122 | if (dist %in% distn_names[["exp"]]) { 123 | params <- list( 124 | FUN = "lines", 125 | xlab = "time", 126 | ylab = "-log(S(t))", 127 | main = "Exponential distributional assumption", 128 | x = times, 129 | y = lapply(survs, function(x) -log(x)), 130 | lty = 1:n_strata, 131 | col = 1:n_strata, 132 | type = "l") 133 | } 134 | 135 | if (dist %in% distn_names[["weibull"]]) { 136 | params <- list( 137 | FUN = "lines", 138 | xlab = "log(time)", 139 | ylab = "log(-log(S(t))) i.e. log cumulative hazard", 140 | main = "Weibull distributional assumption", 141 | x = lapply(times, log), 142 | y = lapply(survs, function(x) log(-log(x))), 143 | lty = 1:n_strata, 144 | col = 1:n_strata, 145 | type = "l") 146 | } 147 | 148 | if (dist %in% distn_names[["loglogistic"]]) { 149 | params <- list( 150 | FUN = "lines", 151 | xlab = "time", 152 | ylab = "log(S(t)/(1-S(t)))", 153 | main = "log-Logistic distributional assumption", 154 | x = lapply(times, log), 155 | y = lapply(survs, function(x) log(x/(1 - x))), 156 | lty = 1:n_strata, 157 | col = 1:n_strata, 158 | type = "l") 159 | } 160 | 161 | if (dist %in% distn_names[["lognormal"]]) { 162 | params <- list( 163 | FUN = "lines", 164 | xlab = "log(time)", 165 | ylab = expression(Phi^-1 ~ (1 - S(t))), 166 | main = "Log-normal distributional assumption", 167 | x = lapply(times, log), 168 | y = lapply(survs, function(x) qnorm(1 - x)), 169 | lty = 1:n_strata, 170 | col = 1:n_strata, 171 | type = "l") 172 | } 173 | 174 | if (dist %in% distn_names[["gompertz"]]) { 175 | stop("Gompertz not yet implemented.") 176 | 177 | # estimate.h <- function(s, t) { 178 | # denom <- t - c(t[-1], max(t) + 1) 179 | # numerator <- log(s) - log(c(s[-1], 0)) 180 | # -numerator/denom 181 | # } 182 | 183 | # params <- list( 184 | # x = lapply(times, log), 185 | # y = estimate.h(survs, times), 186 | # xlab = "log(time)", 187 | # ylab = "h(t)", 188 | # main = "Gompertz distributional assumption", 189 | # lty = 1:n_strata, 190 | # col = 1:n_strata, 191 | # type = "l") 192 | } 193 | 194 | default_pars <- list( 195 | x = NULL, 196 | type = "n", 197 | axes = FALSE, 198 | xlab = params$xlab, 199 | ylab = params$ylab, 200 | main = params$main, 201 | xlim = range(pretty(unlist(params$x))), 202 | ylim = range(pretty(unlist(params$y)))) 203 | 204 | setup_pars <- modifyList( 205 | default_pars, dots[names(default_pars)]) 206 | 207 | if (graph == "base") { 208 | 209 | # empty plot 210 | do.call(plot, setup_pars) 211 | 212 | axis(1); axis(2) 213 | 214 | # plot lines 215 | do.call(mapply, modifyList(params, dots)) 216 | 217 | if (isTRUE(add_legend)) { 218 | legend("topright", names(survs), col = params$col, 219 | lty = params$lty, bty = "n") 220 | } 221 | } 222 | 223 | if (graph == "ggplot2") { 224 | 225 | if (!add_legend) { 226 | pos.legend <- "none" 227 | } else { 228 | pos.legend <- "right"} 229 | 230 | ggdata <- 231 | data.frame(time = unlist(params$x), 232 | y = unlist(params$y)) |> 233 | tibble::rownames_to_column("Group") |> 234 | mutate(Group = gsub("\\d+", "", Group)) 235 | 236 | p <- 237 | ggplot(ggdata, aes(x = .data$time, y = .data$y, 238 | group = .data$Group, col = .data$Group)) + 239 | geom_line() + 240 | do.call(labs, 241 | list(title = setup_pars$main, 242 | x = setup_pars$xlab, 243 | y = setup_pars$ylab)) + 244 | theme_bw() + 245 | theme(legend.position = pos.legend) 246 | 247 | print(p) 248 | } 249 | 250 | invisible(params) 251 | } 252 | 253 | 254 | #' 255 | get_distribution <- function(fit, mod) { 256 | m <- fit$models[[mod]] 257 | tolower(ifelse(fit$method == "hmc", m@model_name, m$dlist$name)) 258 | } 259 | 260 | -------------------------------------------------------------------------------- /R/print.survHE.R: -------------------------------------------------------------------------------- 1 | #' Print a summary of the survival model(s) fitted by \code{fit.models} 2 | #' 3 | #' Prints the summary table for the model(s) fitted, with the estimate of the 4 | #' parameters 5 | #' 6 | #' 7 | #' @param x the \code{survHE} object (the output of the call to 8 | #' \code{fit.models}) 9 | #' @param mod is the index of the model. Default value is 1, but the user can 10 | #' choose which model fit to visualise, if the call to fit.models has a vector 11 | #' argument for distr (so many models are fitted & stored in the same object) 12 | #' @param \dots additional options, including: \code{digits} = number of 13 | #' significant digits to be shown in the summary table (default = 6) 14 | #' \code{original} = a flag to say whether the *original* table 15 | #' from either \code{flexsurv} or \code{INLA} or \code{rstan} should be printed 16 | #' @author Gianluca Baio 17 | #' @template refs 18 | #' @keywords Parametric survival models 19 | #' @examples 20 | #' \dontrun{ 21 | #' data(bc) 22 | #' 23 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 24 | #' distr="exp",method="mle") 25 | #' print(mle) 26 | #' } 27 | #' 28 | #' @export print.survHE 29 | print.survHE <- function(x,mod=1,...) { 30 | # Creates a print method for the objects in the class survHE 31 | # x is the survHE object (the output of the call to fit.models) 32 | # mod is the index of the model. Default value is 1, but the user can choose which model fit to visualise, 33 | # if the call to fit.models has a vector argument for distr (so many models are fitted & stored in the same object) 34 | # ... optional arguments 35 | # digits = number of *significant* digits to be shown in the summary table (default = 6) 36 | # original = a flag to say whether the *original* table from either INLA or MCMC should be printed 37 | 38 | exArgs <- list(...) 39 | 40 | # Loads available models 41 | availables <- load_availables() 42 | 43 | # Can select the number of digits to be printed in the output table 44 | if(!exists("digits",where=exArgs)){digits=6} else {digits=exArgs$digits} 45 | if(!exists("original",where=exArgs)){original=FALSE} else {original=exArgs$original} 46 | # Aliases for 'original' 47 | if(exists("orig",exArgs)){original=exArgs$orig} 48 | 49 | # Now computes the stats, using different helpers depending on the underlying method 50 | # Can ask for the original output from either 'flexsurv', 'inla' or 'rstan' 51 | if(original==TRUE) { 52 | do.call( 53 | paste0("original_table_",x$method), 54 | args=list(x,mod,digits) 55 | ) 56 | } # If not, go with the default formatting using the standardised 'survHE' output 57 | else { 58 | # First make the results table using the helper functions 59 | res=do.call( 60 | paste0("get_stats_",x$method), 61 | args=list(x,mod) 62 | ) 63 | # Now formats the table 64 | format_table(x,mod,res,digits) 65 | } 66 | } 67 | 68 | -------------------------------------------------------------------------------- /R/psa.plot.R: -------------------------------------------------------------------------------- 1 | #' Graphical depiction of the probabilistic sensitivity analysis for the 2 | #' survival curves 3 | #' 4 | #' Plots the survival curves for all the PSA simulations. The function is 5 | #' actually deprecated - similar graphs can be obtained directly using 6 | #' the \code{plot} method (with options), which allows a finer depiction 7 | #' of the results. 8 | #' 9 | #' @param psa the result of the call to the function \code{make.surv} 10 | #' @param ... Optional graphical parameters, such as: 11 | #' \itemize{ 12 | #' \item \code{xlab} = label for the x-axis 13 | #' \item \code{ylab} = label for the y-axis 14 | #' \item \code{col} = (vector) of colours for the lines to be plotted 15 | #' \item \code{alpha} = the level of transparency for the curves (default = 0.2) 16 | #' } 17 | #' 18 | #' @author Gianluca Baio 19 | #' @template refs 20 | #' @seealso \code{\link{make.surv}}, \code{\link{write.surv}} 21 | #' @keywords Survival models Bootstrap Probabilistic sensitivity analysis 22 | #' 23 | #' @examples 24 | #' \dontrun{ 25 | #' data(bc) 26 | #' 27 | #' # Fits the same model using the 3 inference methods 28 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 29 | #' distr="exp",method="mle") 30 | #' p.mle = make.surv(mle,nsim=100) 31 | #' psa.plot(p.mle) 32 | #' } 33 | #' 34 | #' @export psa.plot 35 | psa.plot <- function(psa,...) { 36 | # Plots the survival curves for all the PSA simulations 37 | # psa = the result of the call to the function make.surv 38 | # ... = additional arguments 39 | # xlab = label for the x-axis 40 | # ylab = label for the y-axis 41 | # col = vector of colours with which to plot the curves 42 | # alpha = parameter to determine the transparency (default = 0.2) 43 | # main = a string to write the title 44 | # labs = a vector with non-standard names for the legend values 45 | # name_labs = the non-standard title for the legend 46 | # xlim = a vector of limits for the times 47 | 48 | exArgs <- list(...) 49 | 50 | # Creates the dataset to plot with the survival curves for all profiles 51 | strata <- lapply(1:nrow(psa$des.mat), function(x) { 52 | psa$des.mat %>% 53 | as_tibble() %>% 54 | select(!contains("(Intercept)")) %>% 55 | slice(x) %>% 56 | round(digits=2) %>% 57 | mutate(strata=paste0(names(.),"=",.,collapse=","))}) %>% 58 | bind_rows(.) %>% 59 | select(strata) 60 | 61 | toplot <- lapply(1:length(psa$S),function(i) { 62 | psa$S[[i]] %>% bind_cols(strata=as.factor(as.character(strata[i,]))) 63 | }) %>% bind_rows(.) 64 | 65 | if(exists("alpha",where=exArgs)){alpha=exArgs$alpha} else {alpha=0.2} 66 | if(exists("name_labs",where=exArgs)){name_labs=exArgs$name_labs} else {name_labs="Profile"} 67 | 68 | psa.plot <- ggplot(data=toplot,aes(x=time, y=S, colour=strata)) + 69 | geom_line(linewidth=.9) + 70 | theme_bw() + 71 | theme(axis.text.x = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5), 72 | axis.text.y = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5), 73 | axis.title.x = element_text(color="black",size=14,angle=0,hjust=.5,vjust=.5), 74 | axis.title.y = element_text(color="black",size=14,angle=90,hjust=.5,vjust=.5)) + 75 | theme(axis.line = element_line(colour = "black"), 76 | #panel.grid.major = element_blank(), 77 | #panel.grid.minor = element_blank(), 78 | #panel.border = element_blank(), 79 | panel.background = element_blank(), 80 | panel.border = element_blank(), 81 | plot.title = element_text(size=18, face="bold")) + 82 | theme(legend.position="inside", legend.position.inside=c(.75,.9), 83 | legend.title=element_text(size=15,face="bold"), 84 | #legend.title = element_blank(), 85 | legend.text = element_text(colour="black", size=14, face="plain"), 86 | legend.background=element_blank()) + 87 | labs(y="Survival",x="Time",title=NULL, color=name_labs) 88 | 89 | # If there are more than 1 simulation, then there also are the low and upp extremes and plots them too 90 | if(any(grepl("low",names(toplot)))) { 91 | psa.plot <- psa.plot + 92 | geom_ribbon(data=toplot, 93 | aes(x=time, y=S, ymin=low,ymax=upp,fill=strata), 94 | alpha=alpha,show.legend=FALSE) 95 | } 96 | 97 | # Optional arguments 98 | if(exists("col",where=exArgs)) { 99 | psa.plot <- psa.plot + scale_color_manual(values=exArgs$col) + 100 | scale_fill_manual(values=exArgs$col) 101 | } 102 | if(exists("xlab",where=exArgs)){ 103 | psa.plot <- psa.plot + labs(x=exArgs$xlab) 104 | } 105 | if(exists("ylab",where=exArgs)){ 106 | psa.plot <- psa.plot + labs(y=exArgs$ylab) 107 | } 108 | if(exists("main",where=exArgs)) { 109 | psa.plot <- psa.plot + labs(title=exArgs$main) + 110 | theme(plot.title = element_text(size=18,face="bold")) 111 | } 112 | if(exists("labs",where=exArgs)) { 113 | psa.plot <- psa.plot + scale_color_discrete(labels=exArgs$labs) 114 | } 115 | if(exists("xlim",where=exArgs)){ 116 | psa.plot <- psa.plot + xlim(exArgs$xlim) 117 | } 118 | 119 | psa.plot 120 | } 121 | -------------------------------------------------------------------------------- /R/summary.survHE.R: -------------------------------------------------------------------------------- 1 | #' Prints a summary table for the distribution the mean survival time for a 2 | #' given model and data 3 | #' 4 | #' Calculates the mean survival time as the area under the survival curve 5 | #' 6 | #' A list comprising of the following elements 7 | #' 8 | #' @param object a \code{survHE} object (resulting from the call to 9 | #' \code{fit.models} 10 | #' @param mod the model to be analysed (default = 1) 11 | #' @param t the vector of times to be used in the computation. Default = NULL, 12 | #' which means the observed times will be used. NB: the vector of times should 13 | #' be: i) long enough so that S(t) goes to 0; and ii) dense enough so that the 14 | #' approximation to the AUC is sufficiently precise 15 | #' @param nsim the number of simulations from the survival curve distributions 16 | #' to be used (to compute interval estimates) 17 | #' @param \dots Additional options 18 | #' @return \item{mean.surv}{ A matrix with the simulated values for the mean 19 | #' survival times } \item{tab}{ A summary table } 20 | #' @author Gianluca Baio 21 | #' @seealso \code{fit.models}, \code{make.surv} 22 | #' @template refs 23 | #' @keywords Parametric survival models Mean survival time 24 | #' @examples 25 | #' \dontrun{ 26 | #' data(bc) 27 | #' 28 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 29 | #' distr="exp",method="mle") 30 | #' summary(mle,nsim=100) 31 | #' } 32 | #' 33 | #' @export summary.survHE 34 | summary.survHE <- function(object,mod=1,t=NULL,nsim=1000,...) { 35 | # Computes the estimated mean survival as the area under the survival curve 36 | # This is obtained using the trapezoidal method by taking the average of the "left" and "right" y-values. 37 | # object: is the output from a fit.models call 38 | # mod: the model to be analysed (default = 1) 39 | # t: the vector of times to be used in the computation. Default = NULL, which means the observed times will be used. 40 | # NB: the vector of times should be: i) long enough so that S(t) goes to 0; and ii) dense enough so that 41 | # the approximation to the AUC is sufficiently precise 42 | # nsim: number of simulations from the survival curve distributions to be used (to compute interval estimates) 43 | # stats: a logical value. If TRUE, also shows a table 44 | # ...: optional arguments 45 | # newdata = a list (of lists), specifiying the values of the covariates at which the computation is performed. For example 46 | # 'list(list(arm=0),list(arm=1))' will create two survival curves, one obtained by setting the covariate 'arm' 47 | # to the value 0 and the other by setting it to the value 1. In line with 'flexsurv' notation, the user needs 48 | # to either specify the value for *all* the covariates or for none (in which case, 'newdata=NULL', which is the 49 | # default). If some value is specified and at least one of the covariates is continuous, then a single survival 50 | # curve will be computed in correspondence of the average values of all the covariates (including the factors, 51 | # which in this case are expanded into indicators). The order of the variables in the list *must* be the same 52 | # as in the formula used for the model 53 | # labs: a vector of strings giving the names of the "profile" of covariates for which the mean survival times are computed 54 | # 55 | # NB: NEED TO FIX THIS FOR THE POLY-WEIBULL 56 | # 57 | # Defines the utility function to compute the stats table 58 | make.stats <- function(x, dim = 2) { 59 | bugs.stats <- function(x) { 60 | c(mean(x), sd(x), quantile(x, 0.025), median(x), quantile(x, 0.975)) 61 | } 62 | if (is.null(dim(x)) == TRUE) { 63 | tab <- bugs.stats(x) 64 | names(tab) <- c("mean", "sd", "2.5%", "median", "97.5%") 65 | } 66 | if (is.null(dim(x)) == FALSE) { 67 | tab <- t(apply(x, dim, function(x) bugs.stats(x))) 68 | colnames(tab) <- c("mean", "sd", "2.5%", "median", "97.5%") 69 | } 70 | return(tab) 71 | } 72 | 73 | exArgs <- list(...) 74 | if (!exists("newdata",where=exArgs)) {newdata <- NULL} else {newdata <- exArgs$newdata} 75 | if (!exists("labs",where=exArgs)) {labs <- NULL} else {labs <- exArgs$labs} 76 | if(is.null(t)) { 77 | if(object$misc$model_name[mod]=="pow") { 78 | t <- sort(unique(object$misc$km[[mod]]$time)) 79 | } else { 80 | t <- sort(unique(object$misc$km$time)) 81 | } 82 | } 83 | 84 | psa <- make.surv(object,mod=mod,t=t,nsim=nsim,newdata=newdata) 85 | 86 | rlabs <- rownames(psa$des.mat) 87 | if (!is.null(rlabs)) { 88 | rlabs <- gsub("^1,","",rlabs) 89 | } else { 90 | rlabs <- rep("",length(psa$sim)) 91 | } 92 | if(!is.null(labs) & length(labs)==length(rlabs)) {rlabs <- labs} 93 | 94 | mean.surv=matrix(unlist( 95 | lapply(psa$mat,function(i) { 96 | lapply(1:psa$nsim,function(j) { 97 | xvar=i$time 98 | yvar=i[,(j+1)] 99 | sum(diff(xvar) * (head(yvar,-1)+tail(yvar,-1)), na.rm=TRUE)/2 100 | }) 101 | }) 102 | ),nrow=psa$nsim,byrow=FALSE) 103 | 104 | if (ncol(mean.surv)==length(names(object$misc$km$strata))) { 105 | colnames(mean.surv) <- names(object$misc$km$strata) 106 | } 107 | 108 | tab <- NULL 109 | if(psa$nsim>1) { 110 | tab <- make.stats(mean.surv) 111 | rownames(tab) <- rlabs 112 | if(!is.null(names(object$misc$km$strata))) { 113 | if (ncol(mean.surv)==length(names(object$misc$km$strata))) { 114 | rownames(tab) <- names(object$misc$km$strata) 115 | } else { 116 | rownames(tab) <- rlabs 117 | } 118 | } else { 119 | rownames(tab) <- rlabs 120 | } 121 | cat("\nEstimated average survival time distribution* \n") 122 | print(tab) 123 | cat(paste0("\n*Computed over the range: [", 124 | paste(format(range(t),digits=4,nsmall=3),collapse="-"),"] using ",psa$nsim, 125 | " simulations.\nNB: Check that the survival curves tend to 0 over this range!\n")) 126 | } 127 | 128 | invisible( 129 | list(mean.surv=mean.surv, 130 | tab=tab)) 131 | } 132 | -------------------------------------------------------------------------------- /R/survHE-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | ## usethis namespace: start 5 | ## usethis namespace: end 6 | NULL 7 | -------------------------------------------------------------------------------- /R/utils_fit_models.R: -------------------------------------------------------------------------------- 1 | #' Helper function to run the survival models using MLE and flexsurv 2 | #' 3 | #' @param x a string containing the name of the model 4 | #' to be fitted 5 | #' #' @param exArgs a list of extra arguments passed from the main 'fit.models' 6 | #' function 7 | #' @note Something will go here 8 | #' @author Gianluca Baio 9 | #' @seealso fit.models 10 | #' @references Baio (2020). survHE 11 | #' @keywords Parametric survival models Maximum likelihood estimation 12 | #' @noRd 13 | runMLE <- function(x,exArgs) { 14 | ##### PROBABLY CAN REMOVE THIS ###### 15 | # Checks that 'flexsurv' is loaded up. NB: ***Probably*** not needed, as 'flexsurv' is a primary dependency??? 16 | #if(!isTRUE(requireNamespace("flexsurv",quietly=TRUE))) { 17 | # stop("You need to install the R package 'flexsurv'. Please run in your R terminal:\n install.packages('flexsurv')") 18 | #} 19 | # Loads the model formula & data 20 | formula <- exArgs$formula 21 | data=exArgs$data 22 | 23 | # Loads in the available models in each method 24 | availables <- load_availables() 25 | # Uses the helper 'manipulated_distributions' to create the vectors distr, distr3 and labs 26 | d3 <- manipulate_distributions(x)$distr3 27 | x <- manipulate_distributions(x)$distr 28 | 29 | tic <- proc.time() 30 | # If user selects RPS model, then could also provide some optional arguments - uses flexsurv defaults 31 | if(x=="survspline") { 32 | if(exists("bhazard",where=exArgs)) {bhazard <- exArgs$bhazard} else {bhazard <-NULL} 33 | if(exists("weights",where=exArgs)) {weights <- exArgs$weights} else {weights <- NULL} 34 | if(exists("subset",where=exArgs)) {subset <- exArgs$subset} else {subset <- NULL} 35 | if(exists("knots",where=exArgs)) {knots <- exArgs$knots} else {knots <- NULL} 36 | if(exists("k",where=exArgs)) {k <- exArgs$k} else {k <- 0} 37 | if(exists("bknots",where=exArgs)) {bknots <- exArgs$bknots} else {bknots <- NULL} 38 | if(exists("scale",where=exArgs)) {scale <- exArgs$scale} else {scale <- "hazard"} 39 | if(exists("timescale",where=exArgs)) {timescale <- exArgs$scale} else {timescale <- "log"} 40 | model <- do.call( 41 | flexsurv::flexsurvspline, 42 | args=list( 43 | formula=formula, 44 | data=quote(data), 45 | k=k, 46 | knots=knots, 47 | bknots=bknots, 48 | scale=scale, 49 | timescale=timescale, 50 | weights=weights, 51 | subset=subset 52 | ) 53 | ) 54 | ###model <- flexsurv::flexsurvspline(formula=formula,data=data,k=k,knots=knots,bknots=bknots,scale=scale, 55 | ### timescale=timescale) 56 | if(!is.null(weights)) {model$call$weights=exArgs$call$weights} else {model$call$weights=NULL} 57 | if(!is.null(subset)) {model$call$subset=exArgs$call$subset} else {model$call$subset=NULL} 58 | } else { 59 | # If it's one of the other available models under MLE, then simply runs flexsurv::flexsurvreg 60 | # But allows to use weight and subset 61 | if(exists("weights",where=exArgs)) {weights <- exArgs$weights} else {weights=NULL} 62 | if(exists("subset",where=exArgs)) {subset <- exArgs$subset} else {subset <- NULL} 63 | 64 | ###model <- flexsurv::flexsurvreg(formula=formula,data=data,dist=x,weights=weights) 65 | model <- do.call("flexsurvreg",args=list( 66 | formula=formula, 67 | data=quote(data), 68 | dist=x, 69 | weights=weights, 70 | subset=subset 71 | )) 72 | # Fix the 'call' by using parts of the original call to fit.models 73 | if(!is.null(weights)) {model$call$weights=exArgs$call$weights} else {model$call$weights=NULL} 74 | if(!is.null(subset)) {model$call$subset=exArgs$call$subset} else {model$call$subset=NULL} 75 | } 76 | toc <- proc.time()-tic 77 | 78 | # Replaces a field used in 'make.surv' to indicate the model used (standardised across models) 79 | model_name <- d3 80 | 81 | # Finally returns the output 82 | list( 83 | model=model, 84 | aic=model$AIC, 85 | bic=-2*model$loglik+model$npars*log(model$N), 86 | dic=NULL, 87 | time2run=toc[3], 88 | model_name=model_name 89 | ) 90 | } 91 | 92 | 93 | 94 | #' Helper function to make the Kaplan-Meier analysis of the underlying data 95 | #' for a given formula and dataset 96 | #' 97 | #' @param formula a formula specifying the model to be used, in the form 98 | #' \code{Surv(time,event)~treatment[+covariates]} in flexsurv terms, or 99 | #' \code{inla.surv(time,event)~treatment[+covariates]} in INLA terms. 100 | #' @param method A string specifying the inferential method (\code{'mle'}, 101 | #' \code{'inla'} or \code{'hmc'}). If \code{method} is set to \code{'hmc'}, 102 | #' then \code{survHE} will write suitable model code in the Stan language 103 | #' (according to the specified distribution), prepare data and initial values 104 | #' and then run the model. 105 | #' @param data A data frame containing the data to be used for the analysis. 106 | #' This must contain data for the 'event' variable. In case there is no 107 | #' censoring, then \code{event} is a column of 1s. 108 | #' @return \item{ObjSurvfit}{A 'rms::npsurv' estimate of the KM curves}. 109 | #' @note Something will go here 110 | #' @author Gianluca Baio 111 | #' @seealso fit.models 112 | #' @references Baio (2020). survHE 113 | #' @keywords Kaplan-Meier estimate 114 | #' @noRd 115 | make_KM <- function(formula,data) { 116 | km.formula <- as.formula(gsub("inla.surv","Surv",deparse(formula))) 117 | # Computes the Kaplan Meier curve using the package "rms" 118 | ObjSurvfit <- rms::npsurv( # Uses the function "npsurv" from the package "rms" 119 | formula = km.formula, # to fit the model specified in the "formula" object 120 | data = data # to the dataset named "data" 121 | ) 122 | return(ObjSurvfit) 123 | } 124 | 125 | 126 | 127 | 128 | #' Helper function to format the output of the modelling (produced either 129 | #' by running 'runMLE', or 'runINLA', 'runHMC'), in a way that is consistent 130 | #' with the architecture of 'survHE' 131 | #' 132 | #' @param output The output of one of the helper functions used to run the 133 | #' models. 134 | #' @param method The method used to do the estimation 135 | #' @param distr The abbreviated name for the distribution to be used 136 | #' @param formula The model formula 137 | #' @param data The dataset used 138 | #' @return \item{res}{A 'survHE' object containing all the relevant output 139 | #' conveniently formatted}. 140 | #' @note Something will go here 141 | #' @author Gianluca Baio 142 | #' @seealso fit.models 143 | #' @references Baio (2020). survHE 144 | #' @keywords Parametric survival models Bayesian inference via Hamiltonian 145 | #' Monte Carlo Bayesian inference via Integrated Nested Laplace Approximation 146 | #' @noRd 147 | format_output_fit.models <- function(output,method,distr,formula,data) { 148 | 149 | # Uses the helper 'manipulated_distributions' to create the vector labs 150 | labs <- manipulate_distributions(distr)$labs 151 | 152 | # Model output 153 | models <- lapply(output, function(x) x$model) 154 | # Model fitting statistics 155 | model.fitting <- list( 156 | aic=unlist(lapply(output,function(x) x$aic)), 157 | bic=unlist(lapply(output,function(x) x$bic)), 158 | dic=unlist(lapply(output,function(x) x$dic)) 159 | ) 160 | # Miscellanea 161 | misc <- list( 162 | time2run= unlist(lapply(output, function(x) x$time2run)), 163 | formula=formula, 164 | data=data, 165 | model_name=unlist(lapply(output,function(x) x$model_name)) 166 | ) 167 | if(any(distr=="polyweibull")) { 168 | misc$km=lapply(formula,function(f) make_KM(f,data)) 169 | } else { 170 | misc$km=make_KM(formula,data) 171 | } 172 | 173 | # HMC-specific extra output 174 | if(method=="hmc"){ 175 | # Completes the 'misc' and 'model.fitting' lists with additional output 176 | misc$data.stan <- lapply(output,function(x) x$data.stan) 177 | model.fitting$dic2 <- unlist(lapply(output,function(x) x$dic2)) 178 | } 179 | if(method=="inla") { 180 | model.fitting$dic2 = unlist(lapply(output,function(x) x$dic2)) 181 | } 182 | 183 | # Names the elements of the list 184 | names(models) <- labs 185 | 186 | # Formats all output in a list 187 | res <- list(models=models,model.fitting=model.fitting,method=method,misc=misc) 188 | # And sets its class attribute to "survHE" 189 | class(res) <- "survHE" 190 | return(res) 191 | } 192 | 193 | 194 | 195 | #' Helper function to provide a list of models available in each method 196 | #' 197 | #' @return \item{availables}{A list of models available in each method}. 198 | #' @note Something will go here 199 | #' @author Gianluca Baio 200 | #' @seealso fit.models 201 | #' @references Baio (2020). survHE 202 | #' @keywords Parametric survival models Bayesian inference via Hamiltonian 203 | #' Monte Carlo Bayesian inference via Integrated Nested Laplace Approximation 204 | #' @noRd 205 | load_availables <- function() { 206 | # INLA can only do a limited set of models (for now) so if user has selected 207 | # one that is not available, then falls back on MLE analysis 208 | availables=list( 209 | mle=c("genf" = "gef", 210 | "genf.orig" = "gof", 211 | "gengamma" = "gga", 212 | "gengamma.orig" = "ggo", 213 | "exp" = "exp", 214 | "weibull" = "wei", 215 | "weibullPH" = "wph", 216 | "lnorm" = "lno", 217 | "gamma" = "gam", 218 | "gompertz" = "gom", 219 | "llogis" = "llo", 220 | "lognormal" = "lno", 221 | "rps" = "rps" 222 | ), 223 | inla=c("exponential" = "exp", 224 | "weibull" = "wei", 225 | "weibullPH" = "wph", 226 | "lognormal" = "lno", 227 | "loglogistic" = "llo", 228 | "rps" = "rps", 229 | "gompertz" = "gom" # added Mar 19, 2021 230 | ), 231 | hmc=c("Exponential" = "exp", 232 | "Gamma" = "gam", 233 | "GenF" = "gef", 234 | "GenGamma" = "gga", 235 | "Gompertz" = "gom", 236 | "PolyWeibull" = "pow", 237 | "RP" = "rps", 238 | "WeibullAF" = "wei", 239 | "WeibullPH" = "wph", 240 | "logLogistic" = "llo", 241 | "logNormal" = "lno" 242 | ) 243 | ) 244 | return(availables) 245 | } 246 | 247 | 248 | 249 | #' Helper function to manipulate the strings of text defining the 250 | #' distributions selected by the user so they are consistent with the 251 | #' various methods 252 | #' 253 | #' @param x A string with the distribution name selected by the user. 254 | #' @return \item{list}{A list containing the modified name of the 255 | #' distribution, the acronym (3-letters abbreviation), or the 256 | #' labels (humane-readable name)}. 257 | #' @note Something will go here 258 | #' @author Gianluca Baio 259 | #' @seealso fit.models 260 | #' @references Baio (2020). survHE 261 | #' @keywords Parametric survival models Bayesian inference via Hamiltonian 262 | #' Monte Carlo Bayesian inference via Integrated Nested Laplace Approximation 263 | #' @noRd 264 | manipulate_distributions <- function(x){ 265 | # selected model checks ----- 266 | matchTable = list( 267 | "exp" = c("exponential", "exp"), 268 | "wei" = c("weibull", "weibullaft", "weiaft", "waft", "weibullaf", "weiaf", "waf", "wei"), 269 | "wph" = c("weibullph", "weiph", "wph"), 270 | "gam" = c("gamma", "gam", "gma"), 271 | "lno" = c("lognormal", "lnormal", "lnorm", "lognorm", "lno"), 272 | "llo" = c("loglogistic", "loglog", "llogistic", "llogis", "llo", "llogist"), 273 | "gga" = c("generalisedgamma", "generalizedgamma", "ggamma", "gengamma", "gga", "ggam"), 274 | "ggo" = c("gengamma.orig", "ggo"), 275 | "gef" = c("generalisedf", "generalizedf", "genf", "gef"), 276 | "gof" = c("genf.orig", "gof"), 277 | "gom" = c("gompertz", "gpz", "gomp", "gompz", "gom"), 278 | "rps" = c("roystonparmar", "roystonparmarsplines", "roystonparmarspline", "spline", "splines", "rps"), 279 | "pow" = c("polyweibull","pow","PolyWeibull") 280 | ) 281 | # Human readable label 282 | labelTable = c( 283 | "exp" = "Exponential", 284 | "wei" = "Weibull (AFT)", 285 | "wph" = "Weibull (PH)", 286 | "gam" = "Gamma", 287 | "lno" = "log-Normal", 288 | "llo" = "log-Logistic", 289 | "gga" = "Gen. Gamma", "ggo" = "Gen. Gamma (orig parametrisation)", 290 | "gef" = "Gen. F", "gof" = "Gen. F (orig parametrisation)", 291 | "gom" = "Gompertz", 292 | "rps" = "Royston-Parmar", 293 | "pow" = "Poly-Weibull") 294 | # Labels used by R to define p..., r... and d... commands 295 | labelR = c( 296 | "exp" = "exp", 297 | "wei" = "weibull", 298 | "wph" = "weibullPH", 299 | "gam" = "gamma", 300 | "lno" = "lnorm", 301 | "llo" = "llogis", 302 | "gga" = "gengamma", 303 | "ggo" = "gengamma.orig", 304 | "gef" = "genf", 305 | "gof" = "genf.orig", 306 | "gom" = "gompertz", 307 | "rps" = "survspline", 308 | "pow" = "polyweibull" 309 | ) 310 | 311 | distr = gsub("[ ]*[-]*", "", tolower(x)) 312 | isDistrUnmatched = which(!sapply( 313 | 1:length(distr), 314 | '%in%', 315 | unname(unlist(sapply(matchTable, match, distr))))) 316 | if (length(isDistrUnmatched) > 0) { 317 | stop(paste0("Distribution ", paste(distr[isDistrUnmatched], collapse = ", "), " could not be matched.")) 318 | } 319 | 320 | distr3 <- numeric() 321 | for (i in 1:length(distr)) { 322 | distr3[i] <- names(which(unlist(lapply(matchTable,function(x) distr[i]%in%x)))) 323 | } 324 | labs <- unname(labelTable[distr3]) 325 | distr <- unname(labelR[distr3]) 326 | 327 | list(distr=distr,distr3=distr3,labs=labs) 328 | } 329 | 330 | 331 | #' Helper function to check that the distribution(s) provided by the user are 332 | #' consistent with the method chosen for inference. 333 | #' 334 | #' \code{'inla'} or \code{'hmc'}). If \code{method} is set to \code{'hmc'}, 335 | #' then \code{survHE} will write suitable model code in the Stan language 336 | #' (according to the specified distribution), prepare data and initial values 337 | #' and then run the model. 338 | #' @param distr3 A vector of distribution labels (as created by 'fit.models'). 339 | #' It's a 3-letters label to identify the distributions 340 | #' @param availables A list with the distributions available for each method. 341 | #' @note Something will go here 342 | #' @author Gianluca Baio 343 | #' @seealso fit.models 344 | #' @references Baio (2020). survHE 345 | #' @keywords Parametric survival models 346 | #' @noRd 347 | check_distributions <- function(method,distr) { 348 | # Loads in the available models in each method 349 | availables <- load_availables() 350 | # Uses the helper 'manipulated_distributions' to create the vectors distr, distr3 and labs 351 | distr3 <- manipulate_distributions(distr)$distr3 352 | 353 | # If 'method' is either 'inla' or 'hmc but we're trying to run a model that is not available, then 354 | # falls back to 'mle' 355 | if(method %in% c("inla","hmc")) { 356 | if(!all(distr3 %in% availables[[method]])) { 357 | ####modelsString <- unname(labelTable[availables[[method]]]) 358 | modelsString <- unname(manipulate_distributions(availables[[method]])$labs) 359 | modelsString[length(modelsString)] = paste0("or ", modelsString[length(modelsString)]) 360 | message(paste0( 361 | "NB: ",toupper(method)," can only fit ", 362 | paste(modelsString, collapse = ", "), 363 | " parametric survival models. Falling back on MLE analysis") 364 | ) 365 | method <- "mle" 366 | } 367 | } 368 | 369 | # 'mle' can implement all the possible models, except the PolyWeibull 370 | # In this case, I choose to *stop* execution, rather than falling back to 'hmc'! 371 | if (method == "mle") { 372 | if(!all(distr3 %in% availables[[method]])) { 373 | stop(paste0("The Poly-Weibull model is only implemented under method='hmc'. 374 | Please set this option in your call to 'fit.models'")) 375 | } 376 | } 377 | return(method) 378 | } 379 | -------------------------------------------------------------------------------- /R/utils_print_survHE.R: -------------------------------------------------------------------------------- 1 | #' Helper function to get the relevant stats to print the summary table 2 | #' 3 | #' @param x The 'survHE' object with the fitted model 4 | #' @param mod A number identifying which of the models is to be used 5 | #' @return \item{res}{The resulting stats} 6 | #' @author Gianluca Baio 7 | #' @seealso print.survHE 8 | #' @references Baio (2020). survHE 9 | #' @keywords MLE 10 | #' @noRd 11 | get_stats_mle <- function(x,mod) { 12 | # Can use directly 'flexsurv' output to make the results table 13 | res=x$models[[mod]]$res[,c(1,4,2,3),drop=FALSE] 14 | colnames(res)=c("mean","se","L95%","U95%") 15 | return(res) 16 | } 17 | 18 | 19 | #' Helper function to get the relevant stats to print the summary table 20 | #' 21 | #' @param x The 'survHE' object with the fitted model 22 | #' @param mod A number identifying which of the models is to be used 23 | #' @return \item{res}{The resulting stats} 24 | #' @author Gianluca Baio 25 | #' @seealso print.survHE 26 | #' @references Baio (2020). survHE 27 | #' @keywords INLA 28 | #' @noRd 29 | get_stats_inla <- function(x,mod) { 30 | # Calls the helper functions to make the results table 31 | res=do.call(paste0("rescale_stats_inla_",x$misc$model_name[mod]), 32 | args=list(x,mod)) 33 | return(res) 34 | } 35 | 36 | 37 | #' Helper function to get the relevant stats to print the summary table 38 | #' 39 | #' @param x The 'survHE' object with the fitted model 40 | #' @param mod A number identifying which of the models is to be used 41 | #' @return \item{res}{The resulting stats} 42 | #' @author Gianluca Baio 43 | #' @seealso print.survHE 44 | #' @references Baio (2020). survHE 45 | #' @keywords HMC 46 | #' @noRd 47 | get_stats_hmc <- function(x,mod) { 48 | ######quiet(print(x$models[[mod]])) 49 | # Gets the original summary stats from the 'rstan' run 50 | table = rstan::summary(x$models[[mod]])$summary[,c("mean","sd","2.5%","97.5%")] 51 | ###table <- cbind(x$models[[mod]]@.MISC$summary$msd,x$models[[mod]]@.MISC$summary$quan[,c("2.5%","97.5%")]) 52 | # Removes the node 'lp___' 53 | table=table[-grep("lp__",rownames(table)),] 54 | # If the model is intercept only, removes the unnecessary covariates created to suit 'stan' format 55 | if("X_obs" %in% names(x$misc$data.stan[[mod]])) { 56 | if(any(apply(x$misc$data.stan[[mod]]$X_obs,2,function(x) all(x==0)))) { 57 | table=table[-grep("beta\\[2\\]",rownames(table)),] 58 | } 59 | } else { 60 | if(any(apply(x$misc$data.stan[[mod]]$X,2,function(x) all(x==0)))) { 61 | if(x$misc$model_name[mod]=="rps") { 62 | table=table[-grep("beta",rownames(table)),] 63 | } else { 64 | table=table[-grep("beta\\[2\\]",rownames(table)),] 65 | } 66 | } 67 | } 68 | # Now calls the helper functions to make the results table 69 | res=do.call(paste0("rescale_stats_hmc_",x$misc$model_name[mod]), 70 | args=list(table=table,x=x)) 71 | return(res) 72 | } 73 | 74 | #' Helper function to rescale the stats for the Exponential model 75 | #' 76 | #' @param table The table with the relevant values for the model 77 | #' parameters 78 | #' @param x The original 'survHE' object 79 | #' @return \item{res}{The resulting stats} 80 | #' @author Gianluca Baio 81 | #' @seealso print.survHE 82 | #' @references Baio (2020). survHE 83 | #' @keywords HMC Exponential 84 | #' @noRd 85 | rescale_stats_hmc_exp <- function(table,x) { 86 | rate <- matrix(table[grep("rate",rownames(table)),],ncol=4) 87 | rownames(rate) <- "rate" 88 | effects=add_effects_hmc(table,x) 89 | res <- rbind(rate,effects) 90 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 91 | return(res) 92 | } 93 | 94 | #' Helper function to rescale the stats for the Weibull AFT model 95 | #' 96 | #' @param table The table with the relevant values for the model 97 | #' parameters 98 | #' @param x The original 'survHE' object 99 | #' @return \item{res}{The resulting stats} 100 | #' @author Gianluca Baio 101 | #' @seealso print.survHE 102 | #' @references Baio (2020). survHE 103 | #' @keywords HMC WeibullAFT 104 | #' @noRd 105 | rescale_stats_hmc_wei <- function(table,x) { 106 | scale <- matrix(table[grep("scale",rownames(table)),],ncol=4) 107 | rownames(scale) <- "scale" 108 | shape <- matrix(table[grep("alpha",rownames(table)),],ncol=4) 109 | rownames(shape) <- "shape" 110 | effects=add_effects_hmc(table,x) 111 | res <- rbind(shape,scale,effects) 112 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 113 | return(res) 114 | } 115 | 116 | #' Helper function to rescale the stats for the Weibull PH model 117 | #' 118 | #' @param table The table with the relevant values for the model 119 | #' parameters 120 | #' @param x The original 'survHE' object 121 | #' @return \item{res}{The resulting stats} 122 | #' @author Gianluca Baio 123 | #' @seealso print.survHE 124 | #' @references Baio (2020). survHE 125 | #' @keywords HMC WeibullPH 126 | #' @noRd 127 | rescale_stats_hmc_wph <- function(table,x) { 128 | scale <- matrix(table[grep("scale",rownames(table)),],ncol=4) 129 | rownames(scale) <- "scale" 130 | shape <- matrix(table[grep("alpha",rownames(table)),],ncol=4) 131 | rownames(shape) <- "shape" 132 | effects=add_effects_hmc(table,x) 133 | res <- rbind(shape,scale,effects) 134 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 135 | return(res) 136 | } 137 | 138 | #' Helper function to rescale the stats for the Gompertz model 139 | #' 140 | #' @param table The table with the relevant values for the model 141 | #' parameters 142 | #' @param x The original 'survHE' object 143 | #' @return \item{res}{The resulting stats} 144 | #' @author Gianluca Baio 145 | #' @seealso print.survHE 146 | #' @references Baio (2020). survHE 147 | #' @keywords HMC Gompertz 148 | #' @noRd 149 | rescale_stats_hmc_gom <- function(table,x) { 150 | rate <- matrix(table[grep("rate",rownames(table)),],ncol=4) 151 | rownames(rate) <- "rate" 152 | shape <- matrix(table[grep("alpha",rownames(table)),],ncol=4) 153 | rownames(shape) <- "shape" 154 | effects=add_effects_hmc(table,x) 155 | res <- rbind(shape,rate,effects) 156 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 157 | return(res) 158 | } 159 | 160 | #' Helper function to rescale the stats for the logNormal model 161 | #' 162 | #' @param table The table with the relevant values for the model 163 | #' parameters 164 | #' @param x The original 'survHE' object 165 | #' @return \item{res}{The resulting stats} 166 | #' @author Gianluca Baio 167 | #' @seealso print.survHE 168 | #' @references Baio (2020). survHE 169 | #' @keywords HMC logNormal 170 | #' @noRd 171 | rescale_stats_hmc_lno <- function(table,x) { 172 | meanlog <- matrix(table[grep("meanlog",rownames(table)),],ncol=4) 173 | rownames(meanlog) <- "meanlog" 174 | sdlog <- matrix(table[grep("alpha",rownames(table)),],ncol=4) 175 | rownames(sdlog) <- "sdlog" 176 | effects=add_effects_hmc(table,x) 177 | res <- rbind(meanlog,sdlog,effects) 178 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 179 | return(res) 180 | } 181 | 182 | #' Helper function to rescale the stats for the Gamma model 183 | #' 184 | #' @param table The table with the relevant values for the model 185 | #' parameters 186 | #' @param x The original 'survHE' object 187 | #' @return \item{res}{The resulting stats} 188 | #' @author Gianluca Baio 189 | #' @seealso print.survHE 190 | #' @references Baio (2020). survHE 191 | #' @keywords HMC Gamma 192 | #' @noRd 193 | rescale_stats_hmc_gam <- function(table,x) { 194 | rate <- matrix(table[grep("rate",rownames(table)),],ncol=4) 195 | rownames(rate) <- "rate" 196 | shape <- matrix(table[grep("alpha",rownames(table)),],ncol=4) 197 | rownames(shape) <- "shape" 198 | effects=add_effects_hmc(table,x) 199 | res <- rbind(shape,rate,effects) 200 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 201 | return(res) 202 | } 203 | 204 | #' Helper function to rescale the stats for the logLogistic model 205 | #' 206 | #' @param table The table with the relevant values for the model 207 | #' parameters 208 | #' @param x The original 'survHE' object 209 | #' @return \item{res}{The resulting stats} 210 | #' @author Gianluca Baio 211 | #' @seealso print.survHE 212 | #' @references Baio (2020). survHE 213 | #' @keywords HMC logLogistic 214 | #' @noRd 215 | rescale_stats_hmc_llo <- function(table,x) { 216 | rate <- matrix(table[grep("rate",rownames(table)),],ncol=4) 217 | rownames(rate) <- "scale" 218 | shape <- matrix(table[grep("alpha",rownames(table)),],ncol=4) 219 | rownames(shape) <- "shape" 220 | effects=add_effects_hmc(table,x) 221 | res <- rbind(shape,rate,effects) 222 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 223 | return(res) 224 | } 225 | 226 | #' Helper function to rescale the stats for the Gen F model 227 | #' 228 | #' @param table The table with the relevant values for the model 229 | #' parameters 230 | #' @param x The original 'survHE' object 231 | #' @return \item{res}{The resulting stats} 232 | #' @author Gianluca Baio 233 | #' @seealso print.survHE 234 | #' @references Baio (2020). survHE 235 | #' @keywords HMC GenF 236 | #' @noRd 237 | rescale_stats_hmc_gef <- function(table,x) { 238 | mu <- matrix(table[grep("beta",rownames(table)),,drop=FALSE][1,],ncol=4,nrow=1) 239 | rownames(mu) <- "mu" 240 | sigma <- matrix(table[grep("sigma",rownames(table)),],ncol=4) 241 | rownames(sigma) <- "sigma" 242 | Q <- matrix(table[grep("Q",rownames(table)),],ncol=4) 243 | rownames(Q) <- "Q" 244 | P <- matrix(table[match("P",rownames(table)),],ncol=4) 245 | rownames(P) <- "P" 246 | effects=add_effects_hmc(table,x) 247 | res <- rbind(mu,sigma,Q,P,effects) 248 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 249 | return(res) 250 | } 251 | 252 | #' Helper function to rescale the stats for the Gen Gamma model 253 | #' 254 | #' @param table The table with the relevant values for the model 255 | #' parameters 256 | #' @param x The original 'survHE' object 257 | #' @return \item{res}{The resulting stats} 258 | #' @author Gianluca Baio 259 | #' @seealso print.survHE 260 | #' @references Baio (2020). survHE 261 | #' @keywords HMC GenGamma 262 | #' @noRd 263 | rescale_stats_hmc_gga <- function(table,x) { 264 | mu <- matrix(table[grep("beta",rownames(table)),,drop=FALSE][1,],ncol=4,nrow=1) 265 | rownames(mu) <- "mu" 266 | sigma <- matrix(table[grep("sigma",rownames(table)),],ncol=4) 267 | rownames(sigma) <- "sigma" 268 | Q <- matrix(table[grep("Q",rownames(table)),],ncol=4) 269 | rownames(Q) <- "Q" 270 | effects=add_effects_hmc(table,x) 271 | res <- rbind(mu,sigma,Q,effects) 272 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 273 | return(res) 274 | } 275 | 276 | #' Helper function to rescale the stats for the RPS model 277 | #' 278 | #' @param table The table with the relevant values for the model 279 | #' parameters 280 | #' @param x The original 'survHE' object 281 | #' @return \item{res}{The resulting stats} 282 | #' @author Gianluca Baio 283 | #' @seealso print.survHE 284 | #' @references Baio (2020). survHE 285 | #' @keywords HMC Royston-Parmar splines 286 | #' @noRd 287 | rescale_stats_hmc_rps <- function(table,x) { 288 | gamma <- matrix(table[grep("gamma",rownames(table)),],ncol=4) 289 | rownames(gamma) <- paste0("gamma",0:(nrow(gamma)-1)) 290 | # If there covariates adds their effects 291 | if(length(grep("beta",rownames(table)))>0) { 292 | effects <- matrix(table[grep("beta",rownames(table)),],ncol=4) 293 | cn=colnames(model.matrix(x$misc$formula,x$misc$data)) 294 | rownames(effects) <- cn[-grep("Intercept",cn),drop=FALSE] 295 | } else { 296 | effects <- matrix(NA,nrow=0,ncol=4) 297 | } 298 | res <- rbind(gamma,effects) 299 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 300 | return(res) 301 | } 302 | 303 | #' Helper function to rescale the stats for the Poly-Weibull model 304 | #' 305 | #' @param table The table with the relevant values for the model 306 | #' parameters 307 | #' @param x The original 'survHE' object 308 | #' @return \item{res}{The resulting stats} 309 | #' @author Gianluca Baio 310 | #' @seealso print.survHE 311 | #' @references Baio (2020). survHE 312 | #' @keywords HMC Poly-Weibull 313 | #' @noRd 314 | rescale_stats_hmc_pow <- function(table,x) { 315 | rownames(table)[grep("alpha",rownames(table))]=paste0("shape_",1:length(grep("alpha",rownames(table)))) 316 | 317 | # Figures out which beta coefficients should be removed (because they are multiplied by a covariate that is constantly 0) 318 | to.rm=matrix(unlist(lapply(1:length(x$misc$formula),function(m) apply(x$misc$data.stan[[1]]$X[m,,],2,function(x) all(x==0)))), 319 | nrow=length(x$misc$formula),byrow=T) 320 | nmatch <- length(which(to.rm==T)) 321 | if(nmatch>0){ 322 | idx <- matrix(unlist(lapply(1:nmatch,function(i) { 323 | paste0(which(to.rm==TRUE,arr.ind=T)[i,],collapse=",") 324 | }))) 325 | } else {idx=NULL} 326 | if (!is.null(nrow(idx))) { 327 | take.out <- match(paste0("beta[",idx,"]"),rownames(table)) 328 | } else {take.out=NULL} 329 | if(all(!is.null(take.out))) {table=table[-take.out,]} 330 | effects=table[-grep("shape",rownames(table)),] 331 | rownames(effects) <- unlist(lapply(1:x$misc$data.stan[[1]]$M,function(m) { 332 | paste0(colnames(model.matrix(x$misc$formula[[m]],x$misc$data)),"_",m) 333 | })) 334 | res <- rbind(table[grep("shape",rownames(table)),],effects) 335 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")} 336 | return(res) 337 | } 338 | 339 | #' Helper function to rescale the stats for the Weibull AFT model 340 | #' 341 | #' @param table The table with the relevant values for the model 342 | #' parameters 343 | #' @return \item{res}{The resulting stats} 344 | #' @author Gianluca Baio 345 | #' @seealso print.survHE 346 | #' @references Baio (2020). survHE 347 | #' @keywords INLA WeibullAFT 348 | #' @noRd 349 | rescale_stats_inla_wei <- function(x,mod,nsim=1000) { 350 | # The scale and effects are computed as a *non linear* function of the AFT effects and the shape 351 | # But for simplicity can approximate this using 'inla.rmarginal' 352 | shape_sim=INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.hyperpar[[1]]) 353 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) { 354 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]]) 355 | }) 356 | shape=shape_sim %>% make_stats %>% matrix(.,ncol=4) 357 | ## NB: INLA has a weird parameterisation and with Weibull AFT, the coefficients have the wrong sign 358 | if(attributes(terms(x$misc$formula))$intercept==1) { 359 | scale=exp(-fixeff_sim[[1]]+log(max(x$misc$km$time))) %>% make_stats%>% matrix(.,ncol=4) 360 | } 361 | rownames(scale) <- "scale" 362 | rownames(shape) <- "shape" 363 | res=rbind(shape,scale) 364 | # If there are covariates then add them too 365 | if(length(fixeff_sim)>1) { 366 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) { 367 | -fixeff_sim[[i]] 368 | }) 369 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)), 370 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T) 371 | rownames(effects) <- x$models[[mod]]$names.fixed[-1] 372 | res=rbind(res,effects) 373 | } 374 | colnames(res)=c("mean","se","L95%","U95%") 375 | return(res) 376 | } 377 | 378 | #' Helper function to rescale the stats for the Weibull PH model 379 | #' 380 | #' @param table The table with the relevant values for the model 381 | #' parameters 382 | #' @return \item{res}{The resulting stats} 383 | #' @author Gianluca Baio 384 | #' @seealso print.survHE 385 | #' @references Baio (2020). survHE 386 | #' @keywords INLA WeibullPH 387 | #' @noRd 388 | rescale_stats_inla_wph <- function(x,mod,nsim=1000) { 389 | # The scale and effects are computed as a *non linear* function of the AFT effects and the shape 390 | # But for simplicity can approximate this using 'inla.rmarginal' 391 | shape_sim=INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.hyperpar[[1]]) 392 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) { 393 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]]) 394 | }) 395 | shape=shape_sim %>% make_stats %>% matrix(.,ncol=4) 396 | if(attributes(terms(x$misc$formula))$intercept==1) { 397 | scale=exp(fixeff_sim[[1]]+log(max(x$misc$km$time)))^(-shape_sim) %>% make_stats%>% matrix(.,ncol=4) 398 | } 399 | rownames(scale) <- "scale" 400 | rownames(shape) <- "shape" 401 | res=rbind(shape,scale) 402 | # If there are covariates then add them too 403 | if(length(fixeff_sim)>1) { 404 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) { 405 | fixeff_sim[[i]] 406 | }) 407 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)), 408 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T) 409 | rownames(effects) <- x$models[[mod]]$names.fixed[-1] 410 | res=rbind(res,effects) 411 | } 412 | colnames(res)=c("mean","se","L95%","U95%") 413 | return(res) 414 | } 415 | 416 | #' Helper function to rescale the stats for the Exponential model 417 | #' 418 | #' @param table The table with the relevant values for the model 419 | #' parameters 420 | #' @return \item{res}{The resulting stats} 421 | #' @author Gianluca Baio 422 | #' @seealso print.survHE 423 | #' @references Baio (2020). survHE 424 | #' @keywords INLA Exponential 425 | #' @noRd 426 | rescale_stats_inla_exp <- function(x,mod,nsim=1000) { 427 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) { 428 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]]) 429 | }) 430 | if(attributes(terms(x$misc$formula))$intercept==1) { 431 | rate=exp(fixeff_sim[[1]]-log(max(x$misc$km$time))) %>% make_stats %>% matrix(.,ncol=4) 432 | } 433 | rownames(rate)="rate" 434 | res=rate 435 | if(length(fixeff_sim)>1) { 436 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) { 437 | fixeff_sim[[i]] 438 | }) 439 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)), 440 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T) 441 | rownames(effects) <- x$models[[mod]]$names.fixed[-1] 442 | res=rbind(res,effects) 443 | } 444 | colnames(res)=c("mean","se","L95%","U95%") 445 | return(res) 446 | } 447 | 448 | #' Helper function to rescale the stats for the logNormal model 449 | #' 450 | #' @param table The table with the relevant values for the model 451 | #' parameters 452 | #' @return \item{res}{The resulting stats} 453 | #' @author Gianluca Baio 454 | #' @seealso print.survHE 455 | #' @references Baio (2020). survHE 456 | #' @keywords INLA logNormal 457 | #' @noRd 458 | rescale_stats_inla_lno <- function(x,mod,nsim=1000) { 459 | prec_sim=INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.hyperpar[[1]]) 460 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) { 461 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]]) 462 | }) 463 | if(attributes(terms(x$misc$formula))$intercept==1) { 464 | meanlog=(fixeff_sim[[1]]+log(max(x$misc$km$time))) %>% make_stats %>% matrix(.,ncol=4) 465 | } 466 | sdlog=sqrt(1/prec_sim) %>% make_stats %>% matrix(.,ncol=4) 467 | rownames(meanlog)="meanlog" 468 | rownames(sdlog)="sdlog" 469 | res=rbind(meanlog,sdlog) 470 | if(length(fixeff_sim)>1) { 471 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) { 472 | fixeff_sim[[i]] 473 | }) 474 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)), 475 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T) 476 | rownames(effects) <- x$models[[mod]]$names.fixed[-1] 477 | res=rbind(res,effects) 478 | } 479 | colnames(res)=c("mean","se","L95%","U95%") 480 | return(res) 481 | } 482 | 483 | #' Helper function to rescale the stats for the logLogistic model 484 | #' 485 | #' @param table The table with the relevant values for the model 486 | #' parameters 487 | #' @return \item{res}{The resulting stats} 488 | #' @author Gianluca Baio 489 | #' @seealso print.survHE 490 | #' @references Baio (2020). survHE 491 | #' @keywords INLA logLogistic 492 | #' @noRd 493 | rescale_stats_inla_llo <- function(x,mod,nsim=1000) { 494 | # Uses 'variant=1' in INLA 495 | shape_sim=INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.hyperpar[[1]]) 496 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) { 497 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]]) 498 | }) 499 | shape=shape_sim %>% make_stats %>% matrix(.,ncol=4) 500 | if(attributes(terms(x$misc$formula))$intercept==1) { 501 | scale=exp(-fixeff_sim[[1]]+log(max(x$misc$km$time))) %>% make_stats %>% matrix(.,ncol=4) 502 | } 503 | rownames(shape)="shape" 504 | rownames(scale)="scale" 505 | res=rbind(shape,scale) 506 | if(length(fixeff_sim)>1) { 507 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) { 508 | -fixeff_sim[[i]] 509 | }) 510 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)), 511 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T) 512 | rownames(effects) <- x$models[[mod]]$names.fixed[-1] 513 | res=rbind(res,effects) 514 | } 515 | colnames(res)=c("mean","se","L95%","U95%") 516 | return(res) 517 | } 518 | 519 | #' Helper function to rescale the stats for the Gompertz model 520 | #' 521 | #' @param table The table with the relevant values for the model 522 | #' parameters 523 | #' @return \item{res}{The resulting stats} 524 | #' @author Gianluca Baio 525 | #' @seealso print.survHE 526 | #' @references Baio (2020). survHE 527 | #' @keywords INLA Gompertz 528 | #' @noRd 529 | rescale_stats_inla_gom <- function(x,mod,nsim=1000) { 530 | shape_sim=INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.hyperpar[[1]])/max(x$misc$km$time) 531 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) { 532 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]]) 533 | }) 534 | shape=shape_sim %>% make_stats %>% matrix(.,ncol=4) 535 | # Need to rescale only if there is an intercept 536 | if(attributes(terms(x$misc$formula))$intercept==1) { 537 | rate=exp(fixeff_sim[[1]]-log(max(x$misc$km$time))) %>% make_stats %>% matrix(.,ncol=4) 538 | } 539 | rownames(shape)="shape" 540 | rownames(rate)="rate" 541 | res=rbind(shape,rate) 542 | if(length(fixeff_sim)>1) { 543 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) { 544 | fixeff_sim[[i]] 545 | }) 546 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)), 547 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T) 548 | rownames(effects) <- x$models[[mod]]$names.fixed[-1] 549 | res=rbind(res,effects) 550 | } 551 | colnames(res)=c("mean","se","L95%","U95%") 552 | return(res) 553 | } 554 | 555 | #' Helper function to create summary stats 556 | #' 557 | #' @param x A vector of simulations 558 | #' @return \item{tab}{The resulting stats} 559 | #' @author Gianluca Baio 560 | #' @seealso print.survHE 561 | #' @references Baio (2020). survHE 562 | #' @keywords Summaries Print 563 | #' @noRd 564 | make_stats <- function(x) { 565 | tab=c(mean(x), sd(x), quantile(x, 0.025), quantile(x,0.975)) 566 | return(tab) 567 | } 568 | 569 | #' Helper function to for Stan, which needs to first print the output of 570 | #' the model before you can access the elements in the object 571 | #' '@.MISC$summary', so can use this function to print quietly... 572 | #' 573 | #' @param x A 'survHE' object 574 | #' @author Gianluca Baio 575 | #' @seealso print.survHE 576 | #' @references Baio (2020). survHE 577 | #' @keywords HMC Stan 578 | #' @noRd 579 | quiet <- function(x) { 580 | sink(tempfile()) 581 | on.exit(sink()) 582 | invisible(force(x)) 583 | } 584 | 585 | #' Helper function to checks whether covariates effects should be 586 | #' included in the 'res' table for HMC 587 | #' 588 | #' @param table The table with the summary statistics 589 | #' @param x The original 'survHE' object 590 | #' @return \item{effects}{The effects} 591 | #' @author Gianluca Baio 592 | #' @seealso print.survHE 593 | #' @references Baio (2020). survHE 594 | #' @keywords HMC Stan 595 | #' @noRd 596 | add_effects_hmc <- function(table,x) { 597 | # If there's more than one beta, then there are "effects" (otherwise it's only intercept) 598 | if(length(grep("beta",rownames(table)))>1) { 599 | effects <- matrix(table[grep("beta",rownames(table)),],ncol=4) 600 | rownames(effects) <- colnames(model.matrix(x$misc$formula,x$misc$data)) 601 | # Now removes the line with the intercept (which is already rescaled to the shape/rate/mean parameter) 602 | effects=effects[-grep("Intercept",rownames(effects)),,drop=FALSE] 603 | } else { 604 | effects <- matrix(NA,nrow=0,ncol=4) 605 | } 606 | return(effects) 607 | } 608 | 609 | #' Helper function to create the original summary table 610 | #' 611 | #' @param x The 'survHE' model 612 | #' @param mod Which of the models to be used 613 | #' @param digits The number of digits to print 614 | #' @author Gianluca Baio 615 | #' @seealso print.survHE 616 | #' @references Baio (2020). survHE 617 | #' @keywords MLE 618 | #' @noRd 619 | original_table_mle <- function(x,mod,digits) { 620 | print(x$models[[mod]],digits=digits) 621 | } 622 | 623 | #' Helper function to create the original summary table 624 | #' 625 | #' @param x The 'survHE' model 626 | #' @param mod Which of the models to be used 627 | #' @param digits The number of digits to print 628 | #' @author Gianluca Baio 629 | #' @seealso print.survHE 630 | #' @references Baio (2020). survHE 631 | #' @keywords INLA 632 | #' @noRd 633 | original_table_inla <- function(x,mod,digits) { 634 | print(summary(x$models[[mod]]),digits=digits) 635 | cat("\n") 636 | cat("NB: notice that INLA models are fitted to data rescaled in [0-1] for computational stability.") 637 | cat("\nThe estimates are rescaled on the original scale, applying a suitable back-transformation.") 638 | cat("\nThe numbers shown when 'original=TRUE' will be different than those shown in the 'survHE' format.") 639 | } 640 | 641 | #' Helper function to create the original summary table 642 | #' 643 | #' @param x The 'survHE' model 644 | #' @param mod Which of the models to be used 645 | #' @param digits The number of digits to print 646 | #' @author Gianluca Baio 647 | #' @seealso print.survHE 648 | #' @references Baio (2020). survHE 649 | #' @keywords HMC 650 | #' @noRd 651 | original_table_hmc <- function(x,mod,digits) { 652 | print(x$models[[mod]],digits=digits) 653 | } 654 | 655 | 656 | #' Helper function to format the summary table with the model parameters 657 | #' 658 | #' @param x The 'survHE' model 659 | #' @param mod Which of the models to be used 660 | #' @param res The output table 661 | #' @param digits The number of digits to print 662 | #' @author Gianluca Baio 663 | #' @seealso print.survHE 664 | #' @references Baio (2020). survHE 665 | #' @keywords Table formatting Print 666 | #' @noRd 667 | format_table <- function(x,mod,res,digits){ 668 | # First re-format some of the labels (eg model names) 669 | if(x$misc$model_name[mod]=="exp") {label <- "Exponential"} 670 | if(x$misc$model_name[mod]=="gam") {label <- "Gamma"} 671 | if(x$misc$model_name[mod]=="lno") {label <- "log-Normal"} 672 | if(x$misc$model_name[mod]=="llo") {label <-"log-Logistic"} 673 | if(x$misc$model_name[mod]=="gga") {label <- "Generalised Gamma"} 674 | if(x$misc$model_name[mod]=="wei") {label <- "Weibull AF"} 675 | if(x$misc$model_name[mod]=="wph") {label <- "Weibull PH"} 676 | if(x$misc$model_name[mod]=="gef") {label <- "Generalised F"} 677 | if(x$misc$model_name[mod]=="gom") {label <- "Gompertz"} 678 | if(x$misc$model_name[mod]=="rps") {label <- "Royston & Parmar splines"} 679 | if(x$misc$model_name[mod]=="pow") {label <- "Poly-Weibull"} 680 | 681 | # Creates label of the method used 682 | label.met <- ifelse( 683 | x$method=="mle","Flexsurvreg \n(Maximum Likelihood Estimate)", 684 | ifelse(x$method=="inla","INLA (Bayesian inference via \nIntegrated Nested Laplace Approximation)", 685 | "Stan (Bayesian inference via \nHamiltonian Monte Carlo)") 686 | ) 687 | 688 | # Finally prints the formatted table 689 | cat("\n") 690 | cat(paste0("Model fit for the ",label," model, obtained using ",label.met,". Running time: ", 691 | format(x$misc$time2run[[mod]],digits=5,nsmall=3)," seconds")) 692 | cat("\n\n") 693 | print(res,quote=F,digits=digits,justify="center") 694 | cat("\n") 695 | cat("Model fitting summaries\n") 696 | cat(paste0("Akaike Information Criterion (AIC)....: ",format(x$model.fitting$aic[[mod]],digits=6,nsmall=3))) 697 | cat("\n") 698 | cat(paste0("Bayesian Information Criterion (BIC)..: ",format(x$model.fitting$bic[[mod]],digits=6,nsmall=3))) 699 | if(x$method=="inla" | x$method=="hmc") { 700 | cat("\n") 701 | cat(paste0("Deviance Information Criterion (DIC)..: ",format(x$model.fitting$dic[[mod]],digits=6,nsmall=3))) 702 | } 703 | cat("\n\n") 704 | } -------------------------------------------------------------------------------- /R/write.surv.R: -------------------------------------------------------------------------------- 1 | #' write.surv 2 | #' 3 | #' Writes the survival summary to an excel file (helpful to then call the 4 | #' values in the Markov model) 5 | #' 6 | #' Something will go here 7 | #' 8 | #' @param object a summary.flexsurvreg object containing the survival curves 9 | #' (with times, estimates and interval limits) 10 | #' @param file a string with the full path to the file name to be saved 11 | #' @param sheet a string with the name of the sheet to be created 12 | #' @param what a string to describe what to be exported. Can either be 13 | #' 'surv' (default), which outputs the simulation(s) for the survival curves 14 | #' or 'sim', which outputs the simulation(s) for the underlying model 15 | #' parameters. If there are several 'profiles', they get written in 16 | #' separate spreadsheets and a clear indication is given as the name of the 17 | #' spreadsheet 18 | #' @return A spreadsheet file with the simulation(s) of the relevant quantity 19 | #' @author Gianluca Baio 20 | #' @seealso \code{make.surv} 21 | #' @template refs 22 | #' @keywords Excel PSA 23 | #' @examples 24 | #' \dontrun{ 25 | #' # Loads an example dataset from 'flexsurv' 26 | #' data(bc) 27 | #' 28 | #' # Fits the same model using the 3 inference methods 29 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 30 | #' distr="exp",method="mle") 31 | #' p.mle = make.surv(mle) 32 | #' write.surv(p.mle,file="test.xlsx") 33 | #' } 34 | #' @export write.surv 35 | write.surv <- function(object,file,sheet=NULL,what="surv") { 36 | # Writes the survival summary to an excel file (helpful to then call the values in the Markov model) 37 | # object = a summary.flexsurvreg object containing the survival curves (with times, estimates and interval limits) - that's actually 38 | # the result of the PSA from 'make.surv' 39 | # file = a string with the full path to the file name to be saved 40 | # sheet = a string with the name of the sheet to be created 41 | # what = the object to be exported. Possible values are: 42 | # 'surv' = a matrix with nsim rows and ntimes columns with the survival curve (one such matix for each configuration of the covariates) 43 | # 'sim' = a matrix with nsim rows and simulations for the survival parameters (scale, shape, rate, etc) 44 | 45 | # If xlsx is not installed, then request installation 46 | if(!isTRUE(requireNamespace("xlsx",quietly=TRUE))) { 47 | stop("You need to install the R package 'xlsx'. Please run in your R terminal:\n install.packages('xlsx')") 48 | } 49 | # But if it is installed, check if it's loaded and if not make its Namespace available so that all its functions are available 50 | if (isTRUE(requireNamespace("xlsx",quietly=TRUE))) { 51 | if (!is.element("xlsx", (.packages()))) { 52 | attachNamespace("xlsx") 53 | } 54 | # Extracts the relevant component of the make.surv output 55 | if(what=="surv") { 56 | export <- object$mat 57 | export.lab <- paste0(" ",object$nsim," simulation(s) for the survival curve") 58 | } else { 59 | export=object$sim 60 | export.lab <- paste0(" ",object$nsim," simulation(s) for the model parameters") 61 | } 62 | nobjs <- length(export) 63 | profile.lab=lapply(1:nrow(object$des.mat),function(x){ 64 | object$des.mat %>% as_tibble() %>% select(!contains("(Intercept)")) %>% slice(x) %>% 65 | round(digits=2) %>% mutate(strata=paste0(names(.),"=",.,collapse=",")) 66 | }) %>% bind_rows(.) %>% pull(strata) 67 | dims <- dim(export[[1]]) 68 | # Finds the total number of rows necessary to write the simulations to the output file 69 | tot.rows <- dims[1]*nobjs + nobjs 70 | 71 | if(is.null(sheet)) { 72 | sheet = profile.lab 73 | } 74 | 75 | # If it already exists, we need to append the data to a different sheet 76 | if (file.exists(file)) { 77 | wb <- xlsx::loadWorkbook(file) 78 | # If worksheet already exists needs to replace it & overwrite it 79 | for (i in 1:length(sheet)){ 80 | if(sheet[i] %in% names(xlsx::getSheets(wb))) {xlsx::removeSheet(wb,sheetName=sheet[i])} 81 | } 82 | sheet <- lapply(sheet,function(i) xlsx::createSheet(wb,i)) 83 | ex <- lapply(1:nobjs,function(i) xlsx::addDataFrame(export[[i]],sheet=sheet[[i]],startRow=1,startColumn=1,row.names=T,col.names=T)) 84 | xlsx::saveWorkbook(wb,file) 85 | } 86 | 87 | # But if file does not exist, then create it 88 | if (!file.exists(file)) { 89 | exts <- tools::file_ext(file) 90 | ## Should put some restriction as to what file extensions we want? 91 | wb <- xlsx::createWorkbook(type=exts) 92 | sheet <- lapply(sheet,function(i) xlsx::createSheet(wb,i)) 93 | ex <- lapply(1:nobjs,function(i) xlsx::addDataFrame(export[[i]],sheet=sheet[[i]],startRow=1,startColumn=1,row.names=T,col.names=T)) 94 | xlsx::saveWorkbook(wb,file) 95 | } 96 | 97 | msg <- paste0("written to file: '",file,"'") 98 | cat(export.lab,msg,"\n","Profile(s):",paste("\n ",profile.lab),"\n") 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # survHE 2 | 3 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/survHE)](https://cran.r-project.org/package=survHE) 4 | [![CRAN_Download_Badge](https://cranlogs.r-pkg.org/badges/survHE)](https://cran.r-project.org/package=survHE) 5 | [![CRAN_Download_Badge](https://cranlogs.r-pkg.org:443/badges/grand-total/survHE?color=orange)](https://cranlogs.r-pkg.org:443/badges/grand-total/survHE?color=orange) 6 | 7 | ## Survival analysis in health economic evaluation 8 | 9 | Contains a suite of functions to systematise the workflow involving survival analysis in health economic evaluation. survHE can fit a large range of survival models using both a frequentist approach (by calling the R package [flexsurv](https://CRAN.R-project.org/package=flexsurv)) and a Bayesian perspective. For a selected range of models, both Integrated Nested Laplace Integration (via the R package [INLA](https://www.r-inla.org/)) and Hamiltonian Monte Carlo (via the R package [rstan](https://CRAN.R-project.org/package=rstan)) are possible. HMC models are pre-compiled so that they can run in a very efficient and fast way. In addition to model fitting, survHE provides a set of specialised functions, for example to perform Probabilistic Sensitivity Analysis, export the results of the modelling to a spreadsheet, plotting survival curves and uncertainty around the mean estimates. 10 | 11 | **NB**: To run the Bayesian models, as of version 2.0 of `survHE`, it is necessary to install the additional packages [`survHEinla`](https://github.com/giabaio/survHEinla) and/or [`survHEhmc`](https://github.com/giabaio/survHEhmc), which are available from this GitHub repository. The reason for this structural change is that in this way, the basic backbone of `survHE` (available from this `main` branch of the repo) becomes a very lean package, whose installation is very quick. More details [here](https://gianluca.statistica.it/blog/2022-01-18-survhe-light/). All the functionalities are in place for `survHE` to easily extend to the Bayesian versions, once one or both of the additional "modules" is also installed. 12 | 13 | ## Installation 14 | The most updated version can be installed using the following code. 15 | ```R 16 | install.packages( 17 | "survHE", 18 | repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org") 19 | ) 20 | ``` 21 | 22 | To run the Bayesian versions of the models, you also need to install the ancillary packages 23 | ```R 24 | # Bayesian models using HMC/Stan 25 | install.packages( 26 | "survHEhmc", 27 | repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org"), 28 | dependencies=TRUE 29 | ) 30 | 31 | # Bayesian models using INLA 32 | install.packages( 33 | "survHEinla", 34 | repos = c( 35 | "https://giabaio.r-universe.dev", 36 | "https://cloud.r-project.org", 37 | "https://inla.r-inla-download.org/R/stable" 38 | ), 39 | dependencies=TRUE 40 | ) 41 | ``` 42 | (these two are optional, in some sense, so you don't *have* to, unless you want to do the right thing and be Bayesian about it... :wink:) 43 | -------------------------------------------------------------------------------- /data/TA174.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/giabaio/survHE/578ee3f602ebc0896b70585614b9e8b574e6a30f/data/TA174.RData -------------------------------------------------------------------------------- /data/datalist: -------------------------------------------------------------------------------- 1 | data: ID_patient time censored arm sex age imd ethnic 2 | ta174: patid treat prog death prog_t death_t prog_ty death_ty 3 | msmdata: id from to trans Tstart Tstop time status treat patid prog death prog_t death_t prog_ty death_ty 4 | -------------------------------------------------------------------------------- /data/msmdata.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/giabaio/survHE/578ee3f602ebc0896b70585614b9e8b574e6a30f/data/msmdata.RData -------------------------------------------------------------------------------- /data/survtrial.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/giabaio/survHE/578ee3f602ebc0896b70585614b9e8b574e6a30f/data/survtrial.RData -------------------------------------------------------------------------------- /man-roxygen/refs.R: -------------------------------------------------------------------------------- 1 | #' @references G Baio (2019). survHE: Survival analysis for health economic evaluation 2 | #' and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95, 3 | #' 14, 1-47. 4 | 5 | -------------------------------------------------------------------------------- /man/data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{data} 5 | \alias{data} 6 | \title{A fictional survival trial.} 7 | \format{ 8 | A data frame with 367 rows and 8 variables: 9 | \describe{ 10 | \item{ID_patient}{The individual level identifier} 11 | \item{time}{The observed time at which the event happens} 12 | \item{censored}{An indicator to describe whether the 13 | event is fully observed or censored} 14 | \item{arm}{An indicator for the treatment arm, with 15 | 0 = control and 1 = active treatment} 16 | \item{sex}{An indicator for the individual's sex, with 17 | 0 = male and 1 = female} 18 | \item{age}{A numeric variable with the individual's age} 19 | \item{imd}{A categorical variable representing a measure 20 | of area-level social deprivation} 21 | \item{ethnic}{A categorical variable representing the 22 | individual's ethnic group, as measured from a Census} 23 | } 24 | } 25 | \usage{ 26 | data 27 | } 28 | \description{ 29 | A dataset containing fictional data from a trial, where 30 | the main outcome is in terms of time-to-event and 31 | censoring indicator and with additional covariates. 32 | } 33 | \keyword{datasets} 34 | -------------------------------------------------------------------------------- /man/digitise.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/digitise.R 3 | \name{digitise} 4 | \alias{digitise} 5 | \title{Format digitised data for use in survival analysis} 6 | \usage{ 7 | digitise( 8 | surv_inp, 9 | nrisk_inp, 10 | km_output = "KMdata.txt", 11 | ipd_output = "IPDdata.txt" 12 | ) 13 | } 14 | \arguments{ 15 | \item{surv_inp}{a txt file obtained for example by DigitizeIT and containing 16 | the input survival times from graph reading. This file contains 3 columns 17 | 'ID' = the row-ID 18 | 'time' = the vector of times captured by the digitisation process 19 | 'survival' = the vector of survival probabilities captured by the digitisation 20 | process} 21 | 22 | \item{nrisk_inp}{a txt file obtained by DigitizeIT and containing the 23 | reported number at risk. This contains the following columns: 24 | 'Interval' = the ID of the various intervals included in the analysis ( 25 | eg 1, 2, 3, ...) 26 | 'Time' = the actual time shown on the x-axis in the digitsed graph 27 | 'Lower' = the row of the extracted co-ordinates that the time corresponds 28 | to 29 | 'Upper' = the row of the extracted co-ordinates for which the time is less 30 | than the following time at which we have a number at risk 31 | 'nrisk' = the actual number at risk as specified in the original data} 32 | 33 | \item{km_output}{the name of the file to which the KM data will be written} 34 | 35 | \item{ipd_output}{the name of the file to which the individual level data 36 | data will be written} 37 | } 38 | \description{ 39 | Produces txt files with Kaplan Meier and individual level survival data from 40 | digitised Kaplan Meier curves obtained by DigitizeIT 41 | } 42 | \examples{ 43 | \dontrun{ 44 | # Defines the txt files to be used as inputs 45 | surv.inp <- system.file("extdata", "survival.txt", package = "survHE") 46 | nrisk.inp <- system.file("extdata", "nrisk.txt", package = "survHE") 47 | # Runs 'digitise' to create the relevant output files 48 | digitise(surv.inp, nrisk.inp) 49 | } 50 | } 51 | \references{ 52 | G Baio (2019). survHE: Survival analysis for health economic evaluation 53 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95, 54 | 14, 1-47. \url{doi:10.18637/jss.v095.i14} 55 | } 56 | \author{ 57 | Patricia Guyot and Gianluca Baio 58 | } 59 | \keyword{Digitized} 60 | \keyword{Kaplan} 61 | \keyword{Meier} 62 | \keyword{curve} 63 | -------------------------------------------------------------------------------- /man/fit.models.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fit_models.R 3 | \name{fit.models} 4 | \alias{fit.models} 5 | \title{Fit parametric survival analysis for health economic evaluations} 6 | \usage{ 7 | fit.models(formula = NULL, data, distr = NULL, method = "mle", ...) 8 | } 9 | \arguments{ 10 | \item{formula}{a formula specifying the model to be used, in the form 11 | \code{Surv(time,event)~treatment[+covariates]} for flexsurv, or 12 | \code{inla.surv(time,event)~treatment[+covariates]} for INLA} 13 | 14 | \item{data}{A data frame containing the data to be used for the analysis. 15 | This must contain data for the 'event' variable. In case there is no 16 | censoring, then \code{event} is a column of 1s.} 17 | 18 | \item{distr}{a (vector of) string(s) containing the name(s) of the model(s) 19 | to be fitted. Available options are: 20 | 21 | \code{flexsurv}: 22 | "exponential","gamma","genf","gengamma","gompertz","weibull", 23 | "weibullPH","loglogistic","lognormal" \code{INLA}: 24 | "exponential","weibull","lognormal","loglogistic" \code{hmc}: 25 | "exponential","gamma","genf","gengamma","gompertz","weibull","weibullPH", 26 | "loglogistic","lognormal"} 27 | 28 | \item{method}{A string specifying the inferential method (\code{'mle'}, 29 | \code{'inla'} or \code{'hmc'}). If \code{method} is set to \code{'hmc'}, 30 | then \code{survHE} will write suitable model code in the Stan language 31 | (according to the specified distribution), prepare data and initial values 32 | and then run the model.} 33 | 34 | \item{\dots}{Additional options (for INLA or HMC). 35 | 36 | \strong{INLA} specific options \code{dz} = defines the step length for the grid 37 | search over the hyperparameters space (default = 0.1) \code{diff.logdens} = 38 | defines the difference in the log-density for the hyperparameters to stop 39 | integration (default = 5) \code{control.fixed} = defines the default for the 40 | priors, unless specified by the user. Default values are prior mean = 0 for 41 | \emph{all} fixed effects prior var = 1000 for \emph{all} fixed effects prior mean = 0 42 | for the intercept prior prec -> 0 for the intercept \code{control.family} = 43 | a list of options. If distr is a vector, then can be provided as a named 44 | list of options, for example something like this: 45 | \code{control.family=list(weibull=list(param=c(.1,.1)),lognormal=list(initial=2))} 46 | the names of the elements of the list need to be the same as those given in 47 | the vector \code{distr} 48 | 49 | \strong{HMC} specific options \code{chains} = number of chains to run in the HMC 50 | (default = 2) \code{iter} = total number of iterations (default = 2000) 51 | \code{warmup} = number of warmup iterations (default = iter/2) \code{thin} = 52 | number of thinning (default = 1) \code{control} = a list specifying 53 | Stan-related options, eg \code{control=list(adapt_delta=0.85)} (default = 54 | NULL) \code{seed} = the random seed (to make things replicable) \code{pars} 55 | = a vector of parameters (string, default = NA) \code{include} = a logical 56 | indicator (if FALSE, then the pars are not saved; default = TRUE) 57 | \code{priors} = a list (of lists) specifying the values for the parameters 58 | of the prior distributions in the models \code{save.stan} = a logical 59 | indicator (default = FALSE). If TRUE, then saves the data list for Stan and 60 | the model file(s)} 61 | } 62 | \value{ 63 | \item{models}{ A list containing the fitted models. These contain 64 | the output from the original inference engine (\code{flexsurv}, \code{INLA} 65 | or \code{rstan}). Can be processed using the methods specific to the 66 | original packages, or via \code{survHE}-specific methods (such as 67 | \code{plot}, \code{print}) or other specialised functions (eg to extrapolate 68 | the survival curves, etc). } \item{model.fitting}{ A list containing the 69 | output of the model-fit statistics (AIC, BIC, DIC). The AIC and BIC are 70 | estimated for all methods, while the DIC is only estimated when using 71 | Bayesian inference. } \item{method}{ A string indicating the method used to 72 | fit the model, ie \code{'mle'}, \code{'inla'} or \code{'hmc'}. } 73 | \item{misc}{ A list containing the time needed to run the model(s) (in 74 | seconds), the formula used, the results of the Kaplan-Meier analysis (which 75 | is automatically performed using \code{npsurv}) and the original data frame. 76 | } 77 | } 78 | \description{ 79 | Runs the survival analysis with several useful options, using either MLE 80 | (via flexsurv) or a Bayesian approach (via R-INLA or rstan) 81 | } 82 | \details{ 83 | On object in the class \code{survHE} containing the following elements 84 | } 85 | \examples{ 86 | \dontrun{ 87 | # Loads an example dataset from 'flexsurv' 88 | data(bc) 89 | 90 | # Fits the same model using the 3 inference methods 91 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 92 | distr="exp",method="mle") 93 | inla = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 94 | distr="exp",method="inla") 95 | hmc = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 96 | distr="exp",method="hmc") 97 | 98 | # Prints the results in comparable fashion using the survHE method 99 | print(mle) 100 | print(inla) 101 | print(hmc) 102 | 103 | # Or visualises the results using the original packages methods 104 | print(mle,original=TRUE) 105 | print(inla,original=TRUE) 106 | print(hmc,original=TRUE) 107 | 108 | # Plots the survival curves and estimates 109 | plot(mle) 110 | plot(mle,inla,hmc,labs=c("MLE","INLA","HMC"),colors=c("black","red","blue")) 111 | } 112 | 113 | } 114 | \references{ 115 | G Baio (2019). survHE: Survival analysis for health economic evaluation 116 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95, 117 | 14, 1-47. \url{doi:10.18637/jss.v095.i14} 118 | } 119 | \seealso{ 120 | \code{make.surv} 121 | } 122 | \author{ 123 | Gianluca Baio 124 | } 125 | \keyword{Approximation} 126 | \keyword{Bayesian} 127 | \keyword{Carlo} 128 | \keyword{Hamiltonian} 129 | \keyword{Integrated} 130 | \keyword{Laplace} 131 | \keyword{Monte} 132 | \keyword{Nested} 133 | \keyword{Parametric} 134 | \keyword{inference} 135 | \keyword{models} 136 | \keyword{survival} 137 | \keyword{via} 138 | -------------------------------------------------------------------------------- /man/make.ipd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make.ipd.R 3 | \name{make.ipd} 4 | \alias{make.ipd} 5 | \title{Create an individual level dataset from digitised data} 6 | \usage{ 7 | make.ipd(ipd_files, ctr = 1, var.labs = c("time", "event", "arm")) 8 | } 9 | \arguments{ 10 | \item{ipd_files}{a list including the names of the IPD files created as 11 | output of digitise} 12 | 13 | \item{ctr}{the index of the file associated with the control arm (default, 14 | the first file). This will be coded as 0} 15 | 16 | \item{var.labs}{a vector of labels for the column of the resulting data 17 | matrix. NB these should match the arguments to the formula specified for 18 | fit.models. The user can specify values. These should be 4 elements (ID, 19 | TIME, EVENT, ARM)} 20 | } 21 | \description{ 22 | Piles in the simulated IPD resulting from running digitise for more than one 23 | treatment arm 24 | } 25 | \examples{ 26 | \dontrun{ 27 | # Defines the txt files to be used as inputs 28 | surv.inp <- system.file("extdata", "survival.txt", package = "survHE") 29 | nrisk.inp <- system.file("extdata", "nrisk.txt", package = "survHE") 30 | # Runs 'digitise' to create the relevant output files 31 | digitise(surv.inp, nrisk.inp, ipd_output = "IPD.txt") 32 | # Now uses 'make.ipd' to create the pseudo-data 33 | make.ipd("IPD.txt", ctr = 1, var.labs = c("time", "event", "arm")) 34 | } 35 | } 36 | \references{ 37 | Something will go here 38 | } 39 | \seealso{ 40 | Something will go here 41 | } 42 | \author{ 43 | Gianluca Baio 44 | } 45 | \keyword{Digitized} 46 | \keyword{Kaplan} 47 | \keyword{Meier} 48 | \keyword{curve} 49 | -------------------------------------------------------------------------------- /man/make.surv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make.surv.R 3 | \name{make.surv} 4 | \alias{make.surv} 5 | \title{Engine for Probabilistic Sensitivity Analysis on the survival curves} 6 | \usage{ 7 | make.surv(fit, mod = 1, t = NULL, newdata = NULL, nsim = 1, ...) 8 | } 9 | \arguments{ 10 | \item{fit}{the result of the call to the \code{fit.models} function, 11 | containing the model fitting (and other relevant information)} 12 | 13 | \item{mod}{the index of the model. Default value is 1, but the user can 14 | choose which model fit to visualise, if the call to \code{fit.models} has a vector 15 | argument for \code{distr} (so many models are fitted & stored in the same object)} 16 | 17 | \item{t}{the time vector to be used for the estimation of the survival curve} 18 | 19 | \item{newdata}{a list (of lists), specifying the values of the covariates 20 | at which the computation is performed. For example 21 | \code{list(list(arm=0),list(arm=1))} will create two survival curves, one 22 | obtained by setting the covariate \code{arm} to the value 0 and the other by 23 | setting it to the value 1. In line with \code{flexsurv} notation, the user 24 | needs to either specify the value for \emph{all} the covariates or for none (in 25 | which case, \code{newdata=NULL}, which is the default). If some value is 26 | specified and at least one of the covariates is continuous, then a single 27 | survival curve will be computed in correspondence of the average values of 28 | all the covariates (including the factors, which in this case are expanded 29 | into indicators).} 30 | 31 | \item{nsim}{The number of simulations from the distribution of the survival 32 | curves. Default at \code{nsim=1}, in which case uses the point estimate for 33 | the relevant distributional parameters and computes the resulting survival 34 | curve} 35 | 36 | \item{...}{Additional options} 37 | } 38 | \description{ 39 | Creates the survival curves for the fitted model(s) 40 | } 41 | \examples{ 42 | \dontrun{ 43 | # Loads an example dataset from 'flexsurv' 44 | data(bc) 45 | 46 | # Fits the same model using the 3 inference methods 47 | mle <- fit.models(formula=Surv(recyrs,censrec) ~ group, data=bc, 48 | distr="exp", method="mle") 49 | p.mle <- make.surv(mle) 50 | psa.plot(p.mle) 51 | 52 | # Can also use the main 'plot' function to visualise the survival curves 53 | # and include uncertainty by using a number 'nsim' of simulations 54 | plot(mle, nsim=10) 55 | } 56 | 57 | } 58 | \references{ 59 | G Baio (2019). survHE: Survival analysis for health economic evaluation 60 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95, 61 | 14, 1-47. \url{doi:10.18637/jss.v095.i14} 62 | } 63 | \seealso{ 64 | \code{\link{fit.models}}, \code{\link{psa.plot}}, \code{\link{write.surv}} 65 | } 66 | \author{ 67 | Gianluca Baio 68 | } 69 | \keyword{Bootstrap} 70 | \keyword{Probabilistic} 71 | \keyword{Survival} 72 | \keyword{analysis} 73 | \keyword{models} 74 | \keyword{sensitivity} 75 | -------------------------------------------------------------------------------- /man/make.transition.probs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make.transition.probs.R 3 | \name{make.transition.probs} 4 | \alias{make.transition.probs} 5 | \title{make.transition.probs} 6 | \usage{ 7 | make.transition.probs(fit, labs = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{fit}{an object obtained as output of the call to \code{fit.models}} 11 | 12 | \item{labs}{a vector with labels to identify the 'profiles' ie the 13 | combination of covariates that have been passed onto the model formula. 14 | If 'NULL' (default), then figures it out from the 'survHE' object.} 15 | 16 | \item{...}{additional arguments. Includes the standard inputs to the 17 | call to \code{make.surv}, so \code{mod} (the index of the possibly many 18 | models stored in the 'survHE' object), \code{t} (the vector of times 19 | over which to compute the survival curves), \code{newdata} (a list that 20 | defines the profile of covariates) and \code{nsim} (the number of 21 | simulations to use - default is \code{nsim}=1)} 22 | } 23 | \value{ 24 | A tibble 'lambda' with an indicator for the treatment arm, 25 | the times at which the probabilities have been computed and \code{nsim} 26 | columns each with a simulation of the transition probabilities for 27 | all the times specified by the user 28 | } 29 | \description{ 30 | Computes the transition probabilities (to be passed to a Markov model) from 31 | the cumulative hazard curves obtained using \code{fit.models}, using the formula 32 | p(t)=1-exp(H(t-k)/H(t)), where k is the Markov model cycle length (or the 33 | difference across two consecutive times) and t is a generic time 34 | } 35 | \note{ 36 | Something will go here 37 | } 38 | \examples{ 39 | \dontrun{ 40 | # Something will go here 41 | } 42 | 43 | } 44 | \references{ 45 | Something will go here 46 | } 47 | \seealso{ 48 | \code{\link{make.surv}} 49 | } 50 | \author{ 51 | Gianluca Baio 52 | } 53 | \keyword{Markov} 54 | \keyword{Transition} 55 | \keyword{models} 56 | \keyword{probabilities} 57 | -------------------------------------------------------------------------------- /man/make_data_multi_state.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make.transition.probs.R 3 | \name{make_data_multi_state} 4 | \alias{make_data_multi_state} 5 | \title{make_data_multi_state} 6 | \usage{ 7 | make_data_multi_state( 8 | data, 9 | id = "id", 10 | prog = "prog", 11 | death = "death", 12 | prog_t = "prog_t", 13 | death_t = "death_t", 14 | keep = NULL, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{data}{dataset containing the full ILD with information on both 20 | progression and death. Can be a data.frame or a tibble} 21 | 22 | \item{id}{The column with the individual identifier. Can be NULL (in 23 | which case, it will be created from scratch)} 24 | 25 | \item{prog}{The progression indicator: takes value 1 if the individual 26 | has progressed and 0 otherwise. Defaults to the column named 'prog' in 27 | the dataset} 28 | 29 | \item{death}{The death indicator: takes value 1 if the individual 30 | has died and 0 otherwise. Defaults to the column named 'death' in 31 | the dataset} 32 | 33 | \item{prog_t}{The progression time. Defaults to the column named 34 | 'prog_t' in the dataset} 35 | 36 | \item{death_t}{The death time. Defaults to the column named 37 | 'death_t' in the dataset} 38 | 39 | \item{keep}{A vector of strings with the names of the additional 40 | variables from the original dataset to keep into the multistate 41 | dataset. If 'NULL' (default), then keeps all} 42 | 43 | \item{...}{additional arguments.} 44 | } 45 | \value{ 46 | A tibble containing the event history for each individual 47 | and with the following variables: id = Patients ID; from = Initial 48 | state (1=Pre-progression, 2=Progression, 3=Death); to = End state 49 | (1=Pre-progression, 2=Progression, 3=Death); trans = Transition ID: 50 | 1=Pre-progression -> Progression; 2=Pre-Progression -> Death; 51 | 3=Progression -> Death; Tstart = Entry time (either entry or 52 | progression); Tstop = Exit time (time of event or censoring time); 53 | status = Event indicator (1=yes, 0=censored), \strong{for the specific 54 | event under consideration}; treat = Treatment indicator 55 | All the other original variables are appended to these, but can be 56 | removed 57 | } 58 | \description{ 59 | Takes as input an individual-level dataset including data on both 60 | progression and death time (\strong{jointly}) and manipulates it using 61 | \code{dplyr} functions to create a full "multi-state" dataset, in 62 | which all the transitions are tracked. This can then be used 63 | to fit survival models and compute all the estimates for the 64 | whole set of transition probabilities 65 | } 66 | \note{ 67 | Something will go here 68 | } 69 | \examples{ 70 | \dontrun{ 71 | # Something will go here 72 | } 73 | 74 | } 75 | \references{ 76 | Something will go here 77 | } 78 | \seealso{ 79 | Something will go here 80 | } 81 | \author{ 82 | Gianluca Baio 83 | } 84 | \keyword{Markov} 85 | \keyword{Multistate} 86 | \keyword{Transition} 87 | \keyword{models} 88 | \keyword{probabilities} 89 | -------------------------------------------------------------------------------- /man/make_newdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils_plot_survHE.R 3 | \name{make_newdata} 4 | \alias{make_newdata} 5 | \title{Creates a 'newdata' list to modify the plots for specific individual 6 | profiles (with respect to the covariates)} 7 | \usage{ 8 | make_newdata(data, vars, conts = NULL) 9 | } 10 | \arguments{ 11 | \item{data}{The original dataset that has been used as input to the call 12 | to 'fit.models'} 13 | 14 | \item{vars}{A vector of strings, including the names of the variables that 15 | are to be used to construct specific profiles of individual covariates} 16 | 17 | \item{conts}{A subset of 'vars', which include the named covariates that 18 | are continuous. These will be averaged over, while for the remaining 19 | covariates (assumed to be factors), the specific profiles will be listed. 20 | Defaults to NULL} 21 | } 22 | \value{ 23 | \item{newdata}{The list 'newdata' to be passed as optional argument 24 | to a call to the 'plot' method} 25 | 26 | \item{labs}{A vector of labels (say to use in the plot, for each 27 | profile)} 28 | } 29 | \description{ 30 | Creates a 'newdata' list to modify the plots for specific individual 31 | profiles (with respect to the covariates) 32 | } 33 | \note{ 34 | Something will go here 35 | } 36 | \examples{ 37 | \dontrun{ 38 | data(bc) 39 | 40 | # Fits a model using the 'bc' data 41 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 42 | distr="exp",method="mle") 43 | # Now makes the default plot 44 | plot(mle) 45 | # Now creates a 'newdata' list to modify the plot for selected profiles 46 | newdata=make_newdata(data=bc,vars="group") 47 | # And can plot, say, only two of the three treatment arms 48 | plot(mle,newdata=newdata$newdata[c(1,3)],lab.profile=newdata$labs[c(1,3)]) 49 | } 50 | 51 | } 52 | \author{ 53 | Gianluca Baio 54 | } 55 | \keyword{Parametric} 56 | \keyword{models} 57 | \keyword{survival} 58 | -------------------------------------------------------------------------------- /man/markov_trace.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make.transition.probs.R 3 | \name{markov_trace} 4 | \alias{markov_trace} 5 | \title{Markov trace} 6 | \usage{ 7 | markov_trace(mm, interventions = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{mm}{The output of a call to \code{three_state_mm}} 11 | 12 | \item{interventions}{A vector of labels for the interventions} 13 | 14 | \item{...}{additional arguments.} 15 | } 16 | \value{ 17 | Plot 18 | } 19 | \description{ 20 | Plots the Markov Trace from an object generated using \code{three_state_mm} 21 | } 22 | \note{ 23 | Something will go here 24 | } 25 | \examples{ 26 | \dontrun{ 27 | # Something will go here 28 | } 29 | 30 | } 31 | \references{ 32 | Something will go here 33 | } 34 | \seealso{ 35 | make.surv, three_state_mm 36 | } 37 | \author{ 38 | Gianluca Baio 39 | } 40 | \keyword{Markov} 41 | \keyword{Transition} 42 | \keyword{models} 43 | \keyword{probabilities} 44 | \keyword{trace} 45 | -------------------------------------------------------------------------------- /man/model.fit.plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model.fit.plot.R 3 | \name{model.fit.plot} 4 | \alias{model.fit.plot} 5 | \title{Graphical representation of the measures of model fitting based on 6 | Information Criteria} 7 | \usage{ 8 | model.fit.plot(..., type = "aic", scale = "absolute", stacked = FALSE) 9 | } 10 | \arguments{ 11 | \item{...}{Optional inputs. Must include at least one \code{survHE} object.} 12 | 13 | \item{type}{should the AIC, the BIC or the DIC plotted? (values = \code{"aic"}, 14 | \code{"bic"} or \code{"dic"})} 15 | 16 | \item{scale}{If \code{scale='absolute'} (default), then plot the absolute value 17 | of the *IC. If \code{scale='relative'} then plot a rescaled version taking 18 | the percentage increase in the *IC in comparison with the best-fitting model} 19 | 20 | \item{stacked}{Should the bars be stacked and grouped by survHE object? (default=F)} 21 | } 22 | \value{ 23 | A plot with the relevant model fitting statistics 24 | } 25 | \description{ 26 | Plots a summary of the model fit for all the models fitted 27 | } 28 | \details{ 29 | Something will go here 30 | } 31 | \examples{ 32 | \dontrun{ 33 | data(bc) 34 | 35 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 36 | distr=c("exp","wei","lno"),method="mle") 37 | model.fit.plot(mle) 38 | } 39 | 40 | } 41 | \references{ 42 | G Baio (2019). survHE: Survival analysis for health economic evaluation 43 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95, 44 | 14, 1-47. \url{doi:10.18637/jss.v095.i14} 45 | } 46 | \seealso{ 47 | \code{fit.models} 48 | } 49 | \author{ 50 | Gianluca Baio 51 | } 52 | \keyword{Model} 53 | \keyword{Parametric} 54 | \keyword{fitting} 55 | \keyword{models} 56 | \keyword{survival} 57 | -------------------------------------------------------------------------------- /man/msmdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{msmdata} 5 | \alias{msmdata} 6 | \title{NICE TA174 dataset in multi-state format.} 7 | \format{ 8 | A tibble with 1868 rows and 16 variables: 9 | \describe{ 10 | \item{id}{A numeric patient identifier} 11 | \item{from}{An indicator of the starting state. 12 | 1=Pre-progression; 2=Progression; 3=Death} 13 | \item{to}{An indicator for the arriving state} 14 | \item{trans}{A code for the actual transition considered. 15 | 1=Pre-progression -> Progression; 2=Pre-progression -> 16 | Death; 3=Progression -> Death} 17 | \item{Tstart}{The time of entry into the observation} 18 | \item{Tstop}{The time of exit from observation} 19 | \item{time}{The observed time until even (progression or 20 | death), or censoring occurs} 21 | \item{status}{The event indicator; takes value 1 if the 22 | underlying event (which varies depending on which 23 | transition is being considered) happens and 0 otherwise} 24 | \item{treat}{The treatment indicator. 1=rituximab 25 | in combination with fludarabine andcyclophosphamide 26 | (RFC); 0=fludarabine and cyclo-phosphamide alone (FC)} 27 | \item{patid}{The original numeric patient identifier} 28 | \item{prog}{The original indicator to describe whether 29 | the patient has experience a progression} 30 | \item{death}{The original indicator to describe whether 31 | the patient has experience death} 32 | \item{prog_t}{The original observed time at progression, 33 | or the time at which the patient has been censored; 34 | measured in months} 35 | \item{death_t}{The original observed time at death, or 36 | the time at which the patient has been censored; 37 | measured in months} 38 | \item{prog_ty}{The original observed time at progression, 39 | or the time at which the patient has been censored; 40 | measured in years} 41 | \item{death_ty}{The original observed time at death, or 42 | the time at which the patient has been censored; 43 | measured in years} 44 | } 45 | } 46 | \usage{ 47 | msmdata 48 | } 49 | \description{ 50 | These are the same data contained in NICE TA174, as 51 | made publicly available as part of the supplementary 52 | material for Williams et al (2017). Medical Decision 53 | Making, 37;427-439. However, the data have been 54 | restructured (by using the function \code{make_data_multi_state()}) 55 | to be used for multi-state analysis 56 | } 57 | \keyword{datasets} 58 | -------------------------------------------------------------------------------- /man/plot.survHE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.survHE.R 3 | \name{plot.survHE} 4 | \alias{plot.survHE} 5 | \title{Plot survival curves for the models fitted using \code{fit.models}} 6 | \usage{ 7 | \method{plot}{survHE}(...) 8 | } 9 | \arguments{ 10 | \item{...}{Must include at least one result object saved as 11 | the call to the \code{fit.models} function. Nay include other 12 | optional parameters. These include whether the KM curve should be 13 | added \code{add.km} and whether the user specifies a profile of covariates 14 | (in the list \code{newdata}). Other possibilities are additional 15 | (mainly graphical) options. These are: 16 | \itemize{ 17 | \item \code{xlab} = a string with the label for the x-axis (default = "time") 18 | \item \code{ylab} = a string with the label for the y-axis (default = "Survival") 19 | \item \code{lab.profile} = a (vector of) string(s) indicating the labels associated with the strata defining the different 20 | survival curves to plot. Default to the value used by the Kaplan Meier 21 | estimate given in \code{fit.models}. 22 | \item \code{newdata} = a list (of lists) providing the values for the relevant covariates If NULL, then will use 23 | the mean values for the covariates if at least one is a continuous variable, 24 | or the combination of the categorical covariates. 25 | \item \code{xlim} = a vector determining the limits for the x-axis 26 | \item \code{colors} = a vector of characters defining the colours in which to plot the different survival curves 27 | \item \code{what} = a string indicating whether the survival, hazard or 28 | cumulative hazard curve should be plotted. Defaults to 'survival', but the 29 | other two options can be specified as 'hazard' or 'cumhazard' 30 | \item \code{lab.profile} = a vector of characters defining the names of the models fitted 31 | \item \code{add.km} = TRUE (whether to also add the Kaplan Meier estimates of the data) 32 | \item \code{annotate} = FALSE (whether to also add text to highlight the observed vs 33 | extrapolated data) 34 | \item \code{legend.position} = a vector of proportions to place the legend. Default 35 | to 'c(.75,.9)', which means 75\% across the x-axis and 90\% across the y-axis 36 | \item \code{legend.title} = suitable instructions to format the title of the legend; 37 | defaults to 'element_text(size=15,face="bold")' but there may be other 38 | arguments that can be added (using 'ggplot' facilities) 39 | \item \code{legend.text} = suitable instructions to format the text of the legend; 40 | defaults to 'element_text(colour="black", size=14, face="plain")' but there 41 | may be other arguments that can be added (using 'ggplot' facilities) 42 | }} 43 | } 44 | \description{ 45 | Plots the results of model fit. 46 | } 47 | \examples{ 48 | \dontrun{ 49 | data(bc) 50 | 51 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 52 | distr="exp",method="mle") 53 | inla = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 54 | distr="exp",method="inla") 55 | plot(MLE=mle,INLA=inla) 56 | } 57 | 58 | } 59 | \references{ 60 | G Baio (2019). survHE: Survival analysis for health economic evaluation 61 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95, 62 | 14, 1-47. \url{doi:10.18637/jss.v095.i14} 63 | } 64 | \seealso{ 65 | \code{\link{fit.models}}, \code{\link{write.surv}} 66 | } 67 | \author{ 68 | Gianluca Baio 69 | } 70 | \keyword{Parametric} 71 | \keyword{models} 72 | \keyword{survival} 73 | -------------------------------------------------------------------------------- /man/plot_transformed_km.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_tranformed_km.R 3 | \name{plot_transformed_km} 4 | \alias{plot_transformed_km} 5 | \title{Plot to assess suitability of parametric model} 6 | \usage{ 7 | plot_transformed_km(fit, mod = 1, add_legend = FALSE, graph = "base", ...) 8 | } 9 | \arguments{ 10 | \item{fit}{An object of class survHE.} 11 | 12 | \item{mod}{Index or name of a model in fit. Defaults to 1.} 13 | 14 | \item{add_legend}{If \code{TRUE}, labels assumptions. Defaults to \code{FALSE}.} 15 | 16 | \item{graph}{Type of plot: base or ggplot2.} 17 | 18 | \item{\dots}{Further arguments, passed on to plot.} 19 | } 20 | \value{ 21 | Diagnostic plot 22 | } 23 | \description{ 24 | Perform an exploratory investigation for linearity of 25 | transformed survival models. 26 | } 27 | \details{ 28 | For the Weibull, twice taking logs of the survivor function 29 | 30 | \deqn{log(-log S(t)) = log \lambda + \gamma log t} 31 | 32 | A plot of \eqn{log(-log S(t))} against \eqn{log(t)} would give an approximately 33 | straight line if the Weibull assumption is reasonable. 34 | The plot could also be used to give a rough estimate of the parameters. 35 | 36 | Similarly, for the log-logistic distribution 37 | 38 | \deqn{logS(t)/(1 - S(t)) = \theta - \kappa log t} 39 | 40 | For the log-normal distribution 41 | 42 | \deqn{\Phi^{-1} (1 - S(t)) = (log t - \mu) / \sigma} 43 | 44 | We can also check the assumption made with using the Cox regression model 45 | of proportional hazards by inspecting the log-cumulative hazard plot. 46 | 47 | \deqn{log H_i(t) = \beta x_i + log H_0(t)} 48 | 49 | The transformed curves for different values of the explanatory variables 50 | will be parallel if PH holds. 51 | } 52 | \examples{ 53 | 54 | data(bc) 55 | form <- formula("Surv(recyrs, censrec) ~ group") 56 | 57 | # exponential distribution 58 | fit_exp <- fit.models(form, data = bc, 59 | distr = "exp", method = "mle") 60 | plot_transformed_km(fit_exp) 61 | plot_transformed_km(fit_exp, graph = "ggplot2") 62 | 63 | # weibull distribution 64 | fit_wei <- fit.models(form, data = bc, 65 | distr = "weibull", method = "mle") 66 | plot_transformed_km(fit_wei) 67 | plot_transformed_km(fit_wei, graph = "ggplot2") 68 | 69 | # loglogistic distribution 70 | fit_llog <- fit.models(form, data = bc, 71 | distr = "loglogistic", method = "mle") 72 | plot_transformed_km(fit_llog) 73 | plot_transformed_km(fit_llog, graph = "ggplot2") 74 | 75 | # lognormal distribution 76 | fit_lnorm <- fit.models(form, data = bc, 77 | distr = "lognormal", method = "mle") 78 | plot_transformed_km(fit_lnorm) 79 | plot_transformed_km(fit_lnorm, graph = "ggplot2") 80 | 81 | ## for only one group 82 | form <- formula("Surv(recyrs, censrec) ~ 1") 83 | 84 | fit_exp <- fit.models(form, data = bc, 85 | distr = "exp", method = "mle") 86 | plot_transformed_km(fit_exp) 87 | plot_transformed_km(fit_exp, graph = "ggplot2") 88 | 89 | } 90 | \references{ 91 | Collett (2015) Modelling Survival Data in Medical Research, CRC Press 92 | } 93 | \author{ 94 | William Browne, Nathan Green 95 | } 96 | \keyword{hplot} 97 | \keyword{survival} 98 | -------------------------------------------------------------------------------- /man/print.survHE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print.survHE.R 3 | \name{print.survHE} 4 | \alias{print.survHE} 5 | \title{Print a summary of the survival model(s) fitted by \code{fit.models}} 6 | \usage{ 7 | \method{print}{survHE}(x, mod = 1, ...) 8 | } 9 | \arguments{ 10 | \item{x}{the \code{survHE} object (the output of the call to 11 | \code{fit.models})} 12 | 13 | \item{mod}{is the index of the model. Default value is 1, but the user can 14 | choose which model fit to visualise, if the call to fit.models has a vector 15 | argument for distr (so many models are fitted & stored in the same object)} 16 | 17 | \item{\dots}{additional options, including: \code{digits} = number of 18 | significant digits to be shown in the summary table (default = 6) 19 | \code{original} = a flag to say whether the \emph{original} table 20 | from either \code{flexsurv} or \code{INLA} or \code{rstan} should be printed} 21 | } 22 | \description{ 23 | Prints the summary table for the model(s) fitted, with the estimate of the 24 | parameters 25 | } 26 | \examples{ 27 | \dontrun{ 28 | data(bc) 29 | 30 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 31 | distr="exp",method="mle") 32 | print(mle) 33 | } 34 | 35 | } 36 | \references{ 37 | G Baio (2019). survHE: Survival analysis for health economic evaluation 38 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95, 39 | 14, 1-47. \url{doi:10.18637/jss.v095.i14} 40 | } 41 | \author{ 42 | Gianluca Baio 43 | } 44 | \keyword{Parametric} 45 | \keyword{models} 46 | \keyword{survival} 47 | -------------------------------------------------------------------------------- /man/psa.plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/psa.plot.R 3 | \name{psa.plot} 4 | \alias{psa.plot} 5 | \title{Graphical depiction of the probabilistic sensitivity analysis for the 6 | survival curves} 7 | \usage{ 8 | psa.plot(psa, ...) 9 | } 10 | \arguments{ 11 | \item{psa}{the result of the call to the function \code{make.surv}} 12 | 13 | \item{...}{Optional graphical parameters, such as: 14 | \itemize{ 15 | \item \code{xlab} = label for the x-axis 16 | \item \code{ylab} = label for the y-axis 17 | \item \code{col} = (vector) of colours for the lines to be plotted 18 | \item \code{alpha} = the level of transparency for the curves (default = 0.2) 19 | }} 20 | } 21 | \description{ 22 | Plots the survival curves for all the PSA simulations. The function is 23 | actually deprecated - similar graphs can be obtained directly using 24 | the \code{plot} method (with options), which allows a finer depiction 25 | of the results. 26 | } 27 | \examples{ 28 | \dontrun{ 29 | data(bc) 30 | 31 | # Fits the same model using the 3 inference methods 32 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 33 | distr="exp",method="mle") 34 | p.mle = make.surv(mle,nsim=100) 35 | psa.plot(p.mle) 36 | } 37 | 38 | } 39 | \references{ 40 | G Baio (2019). survHE: Survival analysis for health economic evaluation 41 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95, 42 | 14, 1-47. \url{doi:10.18637/jss.v095.i14} 43 | } 44 | \seealso{ 45 | \code{\link{make.surv}}, \code{\link{write.surv}} 46 | } 47 | \author{ 48 | Gianluca Baio 49 | } 50 | \keyword{Bootstrap} 51 | \keyword{Probabilistic} 52 | \keyword{Survival} 53 | \keyword{analysis} 54 | \keyword{models} 55 | \keyword{sensitivity} 56 | -------------------------------------------------------------------------------- /man/summary.survHE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.survHE.R 3 | \name{summary.survHE} 4 | \alias{summary.survHE} 5 | \title{Prints a summary table for the distribution the mean survival time for a 6 | given model and data} 7 | \usage{ 8 | \method{summary}{survHE}(object, mod = 1, t = NULL, nsim = 1000, ...) 9 | } 10 | \arguments{ 11 | \item{object}{a \code{survHE} object (resulting from the call to 12 | \code{fit.models}} 13 | 14 | \item{mod}{the model to be analysed (default = 1)} 15 | 16 | \item{t}{the vector of times to be used in the computation. Default = NULL, 17 | which means the observed times will be used. NB: the vector of times should 18 | be: i) long enough so that S(t) goes to 0; and ii) dense enough so that the 19 | approximation to the AUC is sufficiently precise} 20 | 21 | \item{nsim}{the number of simulations from the survival curve distributions 22 | to be used (to compute interval estimates)} 23 | 24 | \item{\dots}{Additional options} 25 | } 26 | \value{ 27 | \item{mean.surv}{ A matrix with the simulated values for the mean 28 | survival times } \item{tab}{ A summary table } 29 | } 30 | \description{ 31 | Calculates the mean survival time as the area under the survival curve 32 | } 33 | \details{ 34 | A list comprising of the following elements 35 | } 36 | \examples{ 37 | \dontrun{ 38 | data(bc) 39 | 40 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 41 | distr="exp",method="mle") 42 | summary(mle,nsim=100) 43 | } 44 | 45 | } 46 | \references{ 47 | G Baio (2019). survHE: Survival analysis for health economic evaluation 48 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95, 49 | 14, 1-47. \url{doi:10.18637/jss.v095.i14} 50 | } 51 | \seealso{ 52 | \code{fit.models}, \code{make.surv} 53 | } 54 | \author{ 55 | Gianluca Baio 56 | } 57 | \keyword{Mean} 58 | \keyword{Parametric} 59 | \keyword{models} 60 | \keyword{survival} 61 | \keyword{time} 62 | -------------------------------------------------------------------------------- /man/survHE-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/survHE-package.R 3 | \docType{package} 4 | \name{survHE-package} 5 | \alias{survHE} 6 | \alias{survHE-package} 7 | \title{survHE: Survival Analysis in Health Economic Evaluation} 8 | \description{ 9 | Contains a suite of functions for survival analysis in health economics. These can be used to run survival models under a frequentist (based on maximum likelihood) or a Bayesian approach (both based on Integrated Nested Laplace Approximation or Hamiltonian Monte Carlo). To run the Bayesian models, the user needs to install additional modules (packages), i.e. 'survHEinla' and 'survHEhmc'. These can be installed from \url{https://giabaio.r-universe.dev/} using 'install.packages("survHEhmc", repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org"))' and 'install.packages("survHEinla", repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org"))' respectively. 'survHEinla' is based on the package INLA, which is available for download at \url{https://inla.r-inla-download.org/R/stable/}. The user can specify a set of parametric models using a common notation and select the preferred mode of inference. The results can also be post-processed to produce probabilistic sensitivity analysis and can be used to export the output to an Excel file (e.g. for a Markov model, as often done by modellers and practitioners). \doi{10.18637/jss.v095.i14}. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://github.com/giabaio/survHE} 15 | \item \url{https://gianluca.statistica.it/software/survhe/} 16 | \item Report bugs at \url{https://github.com/giabaio/survHE/issues} 17 | } 18 | 19 | } 20 | \author{ 21 | \strong{Maintainer}: Gianluca Baio \email{g.baio@ucl.ac.uk} 22 | 23 | Other contributors: 24 | \itemize{ 25 | \item Andrea Berardi \email{and.be.like@gmail.com} [contributor] 26 | \item Philip Cooney \email{philip.cooney@hotmail.com} [contributor] 27 | \item Andrew Jones \email{andrew.r.johnson@postgrad.curtin.edu.au} [contributor] 28 | \item Nathan Green \email{n.green@ucl.ac.uk} [contributor] 29 | } 30 | 31 | } 32 | \keyword{internal} 33 | -------------------------------------------------------------------------------- /man/ta174.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{ta174} 5 | \alias{ta174} 6 | \title{NICE TA174 dataset.} 7 | \format{ 8 | A tibble with 810 rows and 8 variables: 9 | \describe{ 10 | \item{patid}{A numeric patient identifier} 11 | \item{treat}{The treatment indicator. 1=rituximab 12 | in combination with fludarabine andcyclophosphamide 13 | (RFC); 0=fludarabine and cyclo-phosphamide alone (FC)} 14 | \item{prog}{An indicator to describe whether the 15 | patient has experience a progression} 16 | \item{death}{An indicator to describe whether the 17 | patient has experience death} 18 | \item{prog_t}{The observed time at progression, or 19 | the time at which the patient has been censored; 20 | measured in months} 21 | \item{death_t}{The observed time at death, or 22 | the time at which the patient has been censored; 23 | measured in months} 24 | \item{prog_ty}{The observed time at progression, or 25 | the time at which the patient has been censored; 26 | measured in years} 27 | \item{death_ty}{The observed time at death, or 28 | the time at which the patient has been censored; 29 | measured in years} 30 | } 31 | } 32 | \usage{ 33 | ta174 34 | } 35 | \description{ 36 | A dataset containing the data used for NICE TA174, as 37 | made publicly available as part of the supplementary 38 | material for Williams et al (2017). Medical Decision 39 | Making, 37;427-439. 40 | } 41 | \keyword{datasets} 42 | -------------------------------------------------------------------------------- /man/theme_survHE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils_plot_survHE.R 3 | \name{theme_survHE} 4 | \alias{theme_survHE} 5 | \title{A Custom ggplot2 Theme for Survival Plots} 6 | \usage{ 7 | theme_survHE() 8 | } 9 | \value{ 10 | A ggplot2 theme object that can be added to a ggplot. 11 | } 12 | \description{ 13 | This theme is designed for use with survival analysis plots, particularly 14 | those created using the \code{survHE} package. It builds on \code{theme_bw()} and 15 | customizes axis text, titles, plot background, and legend styling. 16 | } 17 | \details{ 18 | Note: To position the legend inside the plot, use an additional call to 19 | \code{theme(legend.position = c(x, y), legend.justification = c("left", "top"))}. 20 | } 21 | \examples{ 22 | library(ggplot2) 23 | library(survHE) 24 | ggplot(mtcars, aes(wt, mpg)) + 25 | geom_point() + 26 | theme_survHE() + 27 | theme(legend.position = c(0.6, 0.8), legend.justification = c("left", "top")) 28 | } 29 | -------------------------------------------------------------------------------- /man/three_state_mm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/make.transition.probs.R 3 | \name{three_state_mm} 4 | \alias{three_state_mm} 5 | \title{three_state_mm} 6 | \usage{ 7 | three_state_mm( 8 | m_12, 9 | m_13, 10 | m_23, 11 | nsim = 1, 12 | start = c(1000, 0, 0), 13 | basecase = FALSE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{m_12}{A 'survHE' object (output to a call to \code{fit.models}) 19 | estimating the parameters of a model for the transition from 20 | 'Pre-progression' (state 1) to 'Progressed' (state 2). Given the 21 | individual level data with the complete event history (in the object 22 | 'data'), can be done with a call like 'x=make_data_multi_state(data)' 23 | and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==1),...)}} 24 | 25 | \item{m_13}{A 'survHE' object (output to a call to \code{fit.models}) 26 | estimating the parameters of a model for the transition from 27 | 'Pre-progression' (state 1) to 'Death' (state 3). Given the 28 | individual level data with the complete event history (in the object 29 | 'data'), can be done with a call like 'x=make_data_multi_state(data)' 30 | and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==2),...)}} 31 | 32 | \item{m_23}{A 'survHE' object (output to a call to \code{fit.models}) 33 | estimating the parameters of a model for the transition from 34 | 'Progressed' (state 2) to 'Death' (state 3). Given the 35 | individual level data with the complete event history (in the object 36 | 'data'), can be done with a call like 'x=make_data_multi_state(data)' 37 | and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==3),...)}} 38 | 39 | \item{nsim}{The number of simulations for the model parameters that are 40 | used to compute the survival curves. Defaults to \code{nsim}=1, 41 | which simply creates one survival curve for each treatment arm.} 42 | 43 | \item{start}{A vector of initial state occupancy. By default assumes 1000 44 | individuals, all initially allocated to 'Pre-progression'} 45 | 46 | \item{basecase}{Should the base case be computed as well, based on the 47 | point estimate of the underlying model parameters? (Default=FALSE)} 48 | 49 | \item{...}{additional arguments.} 50 | } 51 | \value{ 52 | A list including the state occupancy simulations in an object 'm'. 53 | This is a tibble with the number of individuals in each of the 3 states 54 | at each of the times specified by the user. If \code{nsim}>1, then the tibble 55 | also contains a simulation index to keep track of that. The list also 56 | includes the computation time to obtain the state occupancy tibble (in the 57 | object 'running_time'). If \code{basecase==TRUE}, then the function also 58 | computes the "base case scenario" (based on 1 simulation from of the 59 | underlying survival curves, i.e. the point estimate of the model parameters) 60 | and stores it in the object 'base_case' 61 | } 62 | \description{ 63 | General purpose function to run a standard three-state Markov model 64 | (typically used in cancer modelling). The states are typically 65 | 'Pre-progression', 'Progressed' and 'Death'. No backward transition 66 | from 'Progressed' to 'Pre-progression' is allowed and 'Death' is 67 | obviously an absorbing state. All other transitions are possible. 68 | The crucial assumption is that \emph{individual-level data} are available 69 | recording an indicator and the time of progression and death for each 70 | individual. The function returns the full transition matrix 71 | } 72 | \note{ 73 | Something will go here 74 | } 75 | \examples{ 76 | \dontrun{ 77 | # Something will go here 78 | } 79 | 80 | } 81 | \references{ 82 | Something will go here 83 | } 84 | \seealso{ 85 | make.transition.probs make_data_multi_state 86 | } 87 | \author{ 88 | Gianluca Baio 89 | } 90 | \keyword{Markov} 91 | \keyword{Three-state} 92 | \keyword{Transition} 93 | \keyword{cancer} 94 | \keyword{model} 95 | \keyword{models} 96 | \keyword{probabilities} 97 | -------------------------------------------------------------------------------- /man/write.surv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/write.surv.R 3 | \name{write.surv} 4 | \alias{write.surv} 5 | \title{write.surv} 6 | \usage{ 7 | write.surv(object, file, sheet = NULL, what = "surv") 8 | } 9 | \arguments{ 10 | \item{object}{a summary.flexsurvreg object containing the survival curves 11 | (with times, estimates and interval limits)} 12 | 13 | \item{file}{a string with the full path to the file name to be saved} 14 | 15 | \item{sheet}{a string with the name of the sheet to be created} 16 | 17 | \item{what}{a string to describe what to be exported. Can either be 18 | 'surv' (default), which outputs the simulation(s) for the survival curves 19 | or 'sim', which outputs the simulation(s) for the underlying model 20 | parameters. If there are several 'profiles', they get written in 21 | separate spreadsheets and a clear indication is given as the name of the 22 | spreadsheet} 23 | } 24 | \value{ 25 | A spreadsheet file with the simulation(s) of the relevant quantity 26 | } 27 | \description{ 28 | Writes the survival summary to an excel file (helpful to then call the 29 | values in the Markov model) 30 | } 31 | \details{ 32 | Something will go here 33 | } 34 | \examples{ 35 | \dontrun{ 36 | # Loads an example dataset from 'flexsurv' 37 | data(bc) 38 | 39 | # Fits the same model using the 3 inference methods 40 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc, 41 | distr="exp",method="mle") 42 | p.mle = make.surv(mle) 43 | write.surv(p.mle,file="test.xlsx") 44 | } 45 | } 46 | \references{ 47 | G Baio (2019). survHE: Survival analysis for health economic evaluation 48 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95, 49 | 14, 1-47. \url{doi:10.18637/jss.v095.i14} 50 | } 51 | \seealso{ 52 | \code{make.surv} 53 | } 54 | \author{ 55 | Gianluca Baio 56 | } 57 | \keyword{Excel} 58 | \keyword{PSA} 59 | -------------------------------------------------------------------------------- /survHE.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace 19 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/tests.html 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files 8 | 9 | library(testthat) 10 | library(survHE) 11 | 12 | test_check("survHE") 13 | -------------------------------------------------------------------------------- /tests/testthat/test-make.surv.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("output plots don't throw an error and return ggplot2 list", { 3 | 4 | data(bc) 5 | 6 | mle <- fit.models(formula=Surv(recyrs,censrec) ~ group, data=bc, 7 | distr=c("exponential", "weibull"), method="mle") 8 | 9 | p.mle <- make.surv(mle) 10 | p.mle2 <- make.surv(mle, nsim = 2) 11 | p.mle3 <- make.surv(mle, nsim = 2, mod = 2) 12 | 13 | expect_type(plot(mle, nsim=10), "list") 14 | 15 | expect_type(psa.plot(p.mle), "list") 16 | expect_type(psa.plot(p.mle), "list") 17 | 18 | expect_type(psa.plot(p.mle2), "list") 19 | 20 | expect_type(psa.plot(p.mle3), "list") 21 | 22 | expect_type(plot(mle, add.km=TRUE), "list") 23 | expect_type(plot(mle, add.km=TRUE, sim = 10), "list") 24 | }) 25 | --------------------------------------------------------------------------------