├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
├── data.R
├── digitise.R
├── fit_models.R
├── global.R
├── make.ipd.R
├── make.surv.R
├── make.transition.probs.R
├── model.fit.plot.R
├── plot.survHE.R
├── plot_tranformed_km.R
├── print.survHE.R
├── psa.plot.R
├── summary.survHE.R
├── survHE-package.R
├── utils_fit_models.R
├── utils_make_surv.R
├── utils_plot_survHE.R
├── utils_print_survHE.R
└── write.surv.R
├── README.md
├── data
├── TA174.RData
├── datalist
├── msmdata.RData
└── survtrial.RData
├── man-roxygen
└── refs.R
├── man
├── data.Rd
├── digitise.Rd
├── fit.models.Rd
├── make.ipd.Rd
├── make.surv.Rd
├── make.transition.probs.Rd
├── make_data_multi_state.Rd
├── make_newdata.Rd
├── markov_trace.Rd
├── model.fit.plot.Rd
├── msmdata.Rd
├── plot.survHE.Rd
├── plot_transformed_km.Rd
├── print.survHE.Rd
├── psa.plot.Rd
├── summary.survHE.Rd
├── survHE-package.Rd
├── ta174.Rd
├── theme_survHE.Rd
├── three_state_mm.Rd
└── write.surv.Rd
├── survHE.Rproj
└── tests
├── testthat.R
└── testthat
└── test-make.surv.R
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^renv$
2 | ^renv\.lock$
3 | ^.*\.Rproj$
4 | ^\.Rproj\.user$
5 | ^\.travis\.yml$
6 | ^appveyor\.yml$
7 | ^LICENSE\.md$
8 | ^_pkgdown\.yml$
9 | ^docs$
10 | ^pkgdown$
11 | ^\.httr-oauth$
12 | ^CONDUCT\.md$
13 | ^man-roxygen$
14 | ^.git
15 | ^.gitignore
16 | ^cran-comments\.md$
17 | ^README_dev\.md$
18 | ^\.github$
19 | ^paper\.bib$
20 | ^paper\.md$
21 | ^CONTRIBUTING\.md$
22 | ^revdep$
23 | ^CRAN-SUBMISSION$
24 | ^CITATION.cff
25 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Session Data files
6 | .RData
7 |
8 | # Example code in package build process
9 | *-Ex.R
10 |
11 | # Output files from R CMD build
12 | /*.tar.gz
13 |
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 |
17 | # RStudio files
18 | .Rproj.user/
19 |
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 |
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 |
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 |
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 |
35 | # Large files generated by Stan
36 | src/*.so
37 | src/*.o
38 | ## src/stan_files/*.hpp
39 | ## src/stan_files/*.o
40 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: survHE
2 | Title: Survival Analysis in Health Economic Evaluation
3 | Version: 2.0.5
4 | Date: 2025-05-15
5 | Authors@R: c(
6 | person(given = "Gianluca",family = "Baio",role = c("aut", "cre"),email = "g.baio@ucl.ac.uk"),
7 | person("Andrea","Berardi",role="ctb",email="and.be.like@gmail.com"),
8 | person("Philip","Cooney",role="ctb",email="philip.cooney@hotmail.com"),
9 | person("Andrew","Jones",role="ctb",email="andrew.r.johnson@postgrad.curtin.edu.au"),
10 | person("Nathan","Green",role="ctb",email="n.green@ucl.ac.uk"))
11 | URL: https://github.com/giabaio/survHE, https://gianluca.statistica.it/software/survhe/
12 | BugReports: https://github.com/giabaio/survHE/issues
13 | Description: Contains a suite of functions for survival analysis in health economics.
14 | These can be used to run survival models under a frequentist (based on maximum likelihood)
15 | or a Bayesian approach (both based on Integrated Nested Laplace Approximation or Hamiltonian
16 | Monte Carlo). To run the Bayesian models, the user needs to install additional modules
17 | (packages), i.e. 'survHEinla' and 'survHEhmc'. These can be installed from
18 | using
19 | 'install.packages("survHEhmc", repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org"))'
20 | and
21 | 'install.packages("survHEinla", repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org"))'
22 | respectively. 'survHEinla' is based on the package INLA, which is available for download at
23 | . The user can specify a set of parametric models
24 | using a common notation and select the preferred mode of inference. The results can also be
25 | post-processed to produce probabilistic sensitivity analysis and can be used to export the
26 | output to an Excel file (e.g. for a Markov model, as often done by modellers and
27 | practitioners). .
28 | License: GPL (>=3)
29 | Encoding: UTF-8
30 | LazyData: true
31 | Roxygen: list(markdown = TRUE)
32 | RoxygenNote: 7.3.2
33 | Biarch: true
34 | Depends:
35 | methods,
36 | R (>= 4.1.0),
37 | flexsurv,
38 | dplyr,
39 | ggplot2
40 | Imports:
41 | rms,
42 | xlsx,
43 | tools,
44 | tibble,
45 | tidyr
46 | Suggests:
47 | survHEinla,
48 | survHEhmc,
49 | INLA,
50 | rstan,
51 | testthat (>= 3.0.0)
52 | Config/testthat/edition: 3
53 | Additional_repositories:
54 | https://inla.r-inla-download.org/R/stable/,
55 | https://giabaio.r-universe.dev/
56 | SystemRequirements: GNU make
57 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | S3method(plot, survHE)
2 | S3method(print, survHE)
3 | S3method(summary, survHE)
4 | export(digitise)
5 | export(fit.models)
6 | export(make.ipd)
7 | export(make.surv)
8 | export(model.fit.plot)
9 | export(make.transition.probs)
10 | export(plot.survHE)
11 | export(print.survHE)
12 | export(psa.plot)
13 | export(summary.survHE)
14 | export(write.surv)
15 | export(plot_transformed_km)
16 | export(three_state_mm)
17 | export(markov_trace)
18 | export(make_data_multi_state)
19 | export(make_newdata)
20 | export(make.transition.probs)
21 | export(theme_survHE)
22 | import(flexsurv)
23 | import(methods)
24 | import(dplyr)
25 | import(ggplot2)
26 | importFrom("graphics", "axis", "barplot", "hist", "legend", "par", "plot",
27 | "points", "polygon", "text", "title")
28 | importFrom("utils", "read.table","write.table","head","tail","modifyList")
29 | importFrom("stats", "terms","as.formula","model.frame","model.matrix","dexp",
30 | "pexp","dweibull","pweibull","dgamma","pgamma","median","var","time",
31 | "update","dt","profile","dlnorm","plnorm","dlogis","plogis","sd",
32 | "quantile","qnorm","model.matrix.lm")
33 | importFrom("grDevices", "colors","adjustcolor","colorRampPalette")
34 | importFrom("tools", "file_ext")
35 | importFrom("rms", "npsurv","survplot")
36 | importFrom("tibble", "rownames_to_column")
37 | importFrom("tidyr", "unite")
38 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # 2.0.5
2 |
3 | * Adds a `ggplot` theme `theme_survHE` to be used with combined plots (eg for a `survextrap` element)
4 |
5 | # 2.0.4
6 |
7 | * Fixes a slight mistake in the table for the Gen F. Now works properly on the beta coefficients
8 | * Updates DESCRIPTION with details of installatin for the add-ons `survHEinla` and `survHEhmc`
9 |
10 | # 2.0.3
11 |
12 | * Fixes dependency to R >= 4.1.0 because of the use of natural pipes
13 | * Fixes tests --- needs to use `select(!contains("(Intercept)"))` instead of `select(-matches("Intercept"),everything())` in several parts of the code. This was previously used to determine the names of the strata, used to plot the survival curves
14 |
15 | # 2.0.2
16 |
17 | * Added a `NEWS.md` file to track changes to the package (to replace the old `.Rd` version).
18 |
19 | _October 2024_
20 |
21 | Patch code in support of plotting (`R/utils_fit_models.R`)
22 |
23 | * In `make_surv_curve_plot` replace
24 | ```
25 | geom_step(data = datakm, aes(x = time, y = S, group=as.factor(strata)),
26 | color="darkgrey") +
27 | geom_ribbon(data = datakm,
28 | aes(x = time, y = S, ymin=lower, ymax=upper, group=as.factor(strata)),
29 | alpha = 0.2)
30 | ```
31 | with
32 | ```
33 | geom_step(data = datakm, aes(x = time, y = S, group=as.factor(strata:object_name)),
34 | color="darkgrey") +
35 | geom_ribbon(data = datakm,
36 | aes(x = time, y = S, ymin=lower, ymax=upper, group=as.factor(strata:object_name)),
37 | alpha = 0.2)
38 | ```
39 | This means that when plotting two or more `survHE` objects, the KM is added and displayed correctly
40 |
41 | * Adds a utility function `make_newdata` that can be used to generate profiles of covariates, to then plot specific groups of individuals' survival or hazard curves.
42 |
43 | _September 2024_
44 |
45 | # 2.0.1
46 |
47 | * Some refactoring (mainly thanks to @n8thangreen) with tidying up of the underlying code. One major change is that now 'make.surv' outputs an object named 'time' instead of 't', which was ambigious
48 |
49 | _November 2022_
50 |
51 | # 2.0
52 |
53 | * @Philip-Cooney found a little mistake in how the print method works for hmc objects. The utility functions were accessing the data for the first model of the list of possible models (instead of the specific 'mod' one). Also, RPS would produce bizarre results if no covariates included.
54 | \item @Philip-Cooney also found that 'make_sim_hmc' would break in the case of a RPS model with no covariates, because the matrix of beta coefficients would be turned into a vector, essentially, but the code would try to still subset a column. This has now been fixed so it's OK to make simulations off an RPS with no intercept model and that propagates to plots too.
55 |
56 | _April 2022_
57 |
58 | * This is a *major* change. In this version, the package is restructured to only perform, in its basic version, MLE estimates using flexsurv as
59 | inferential engine. All the backbone functionalities are unchanged and the user can also expand (to revert to the "full" survHE including Bayesian modelling), by simply also adding the new packages survHEinla and/or survHEhmc. These now only contain the INLA and rstan calls and functionalities.
60 |
61 | _January 2022_
62 |
63 | # 1.1.4
64 |
65 | * Contribution by Andrew Jones to update compatibility with the newer version of stan. Changes on StanHeaders + stan models to avoid complaints by
66 | the compiler because of declared variables with the same name of a function that was being defined. None of these are directly "visible" to the final user, though...
67 |
68 | * Changes to '.Rbuildignore' to allow 'rstantool' to automatically configuring on package install. Also improves compatibility across versions of
69 | 'rstan'. See https://github.com/giabaio/survHE/pull/42
70 |
71 | _September 2021_
72 |
73 | # 1.1.3
74 |
75 | * Adds an option 'what' to plot so that 'survival', 'hazard' and 'cumhazard' can be specified (and the plot is modified to the various different scales)
76 |
77 | * Updates the Gamma, GenGamma and GenF models in HMC to include for the possibility that the data contain no censoring. Also fix a small typo in the print method for Gamma/HMC models.
78 |
79 | * Updates in INLA means now the Gompertz model is also available for survival modelling. fit.models(), make.surv() and print() have now been updated so that the Gompertz model can be run under 'survHE'. In order to run the Gompertz model using INLA, the *testing* version (>=21.03.21) needs to be installed (see instructions here: https://www.r-inla.org/download-install).
80 |
81 | * Related to this, to improve computational stability, *all* the INLA models are now run by 'survHE' using the following trick: first the times are rescaled (on the fly) in the interval [0-1] (by simply recomputing 'time=time/max(time'). The resulting models are *not* directly comparable to other inferential engines (because they are fitted to different data), but 'survHE' automatically rescales the estimates and model fitting statistics (eg *ICs) so that the 'plot' and 'print' methods give the correct answers. make.surv() has also been updated to reflect this.
82 |
83 | * There are also changes to make.transition.probs(), which has been updated and streamlined to compute transition probabilities off the survival curves fitted in a 'survHE' object. The computation is quicker and now based on the more robust relationship between the cumulative hazard function
84 | and the transition probabilities.
85 |
86 | * A new function make_data_multi_state() to create a dataset in the format required to analyse data in a multi-state framework.
87 |
88 | * A new function three_state_mm() added to fit a standard 3-states Markov model, based on survival curves that are then mapped onto transition probabilities (this function is under testing, though).
89 |
90 | * Adds two new datasets (TA174 and msmdata), both from the MDM paper by Williams et al (2017) that can be used for analysis of multi-state data.
91 |
92 | _June 2021_
93 |
--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
1 | #' A fictional survival trial.
2 | #'
3 | #' A dataset containing fictional data from a trial, where
4 | #' the main outcome is in terms of time-to-event and
5 | #' censoring indicator and with additional covariates.
6 | #'
7 | #' @format A data frame with 367 rows and 8 variables:
8 | #' \describe{
9 | #' \item{ID_patient}{The individual level identifier}
10 | #' \item{time}{The observed time at which the event happens}
11 | #' \item{censored}{An indicator to describe whether the
12 | #' event is fully observed or censored}
13 | #' \item{arm}{An indicator for the treatment arm, with
14 | #' 0 = control and 1 = active treatment}
15 | #' \item{sex}{An indicator for the individual's sex, with
16 | #' 0 = male and 1 = female}
17 | #' \item{age}{A numeric variable with the individual's age}
18 | #' \item{imd}{A categorical variable representing a measure
19 | #' of area-level social deprivation}
20 | #' \item{ethnic}{A categorical variable representing the
21 | #' individual's ethnic group, as measured from a Census}
22 | #' }
23 | "data"
24 |
25 |
26 | #' NICE TA174 dataset.
27 | #'
28 | #' A dataset containing the data used for NICE TA174, as
29 | #' made publicly available as part of the supplementary
30 | #' material for Williams et al (2017). Medical Decision
31 | #' Making, 37;427-439.
32 | #'
33 | #' @format A tibble with 810 rows and 8 variables:
34 | #' \describe{
35 | #' \item{patid}{A numeric patient identifier}
36 | #' \item{treat}{The treatment indicator. 1=rituximab
37 | #' in combination with fludarabine andcyclophosphamide
38 | #' (RFC); 0=fludarabine and cyclo-phosphamide alone (FC)}
39 | #' \item{prog}{An indicator to describe whether the
40 | #' patient has experience a progression}
41 | #' \item{death}{An indicator to describe whether the
42 | #' patient has experience death}
43 | #' \item{prog_t}{The observed time at progression, or
44 | #' the time at which the patient has been censored;
45 | #' measured in months}
46 | #' \item{death_t}{The observed time at death, or
47 | #' the time at which the patient has been censored;
48 | #' measured in months}
49 | #' \item{prog_ty}{The observed time at progression, or
50 | #' the time at which the patient has been censored;
51 | #' measured in years}
52 | #' \item{death_ty}{The observed time at death, or
53 | #' the time at which the patient has been censored;
54 | #' measured in years}
55 | #' }
56 | "ta174"
57 |
58 |
59 | #' NICE TA174 dataset in multi-state format.
60 | #'
61 | #' These are the same data contained in NICE TA174, as
62 | #' made publicly available as part of the supplementary
63 | #' material for Williams et al (2017). Medical Decision
64 | #' Making, 37;427-439. However, the data have been
65 | #' restructured (by using the function \code{make_data_multi_state()})
66 | #' to be used for multi-state analysis
67 | #'
68 | #' @format A tibble with 1868 rows and 16 variables:
69 | #' \describe{
70 | #' \item{id}{A numeric patient identifier}
71 | #' \item{from}{An indicator of the starting state.
72 | #' 1=Pre-progression; 2=Progression; 3=Death}
73 | #' \item{to}{An indicator for the arriving state}
74 | #' \item{trans}{A code for the actual transition considered.
75 | #' 1=Pre-progression -> Progression; 2=Pre-progression ->
76 | #' Death; 3=Progression -> Death}
77 | #' \item{Tstart}{The time of entry into the observation}
78 | #' \item{Tstop}{The time of exit from observation}
79 | #' \item{time}{The observed time until even (progression or
80 | #' death), or censoring occurs}
81 | #' \item{status}{The event indicator; takes value 1 if the
82 | #' underlying event (which varies depending on which
83 | #' transition is being considered) happens and 0 otherwise}
84 | #' \item{treat}{The treatment indicator. 1=rituximab
85 | #' in combination with fludarabine andcyclophosphamide
86 | #' (RFC); 0=fludarabine and cyclo-phosphamide alone (FC)}
87 | #' \item{patid}{The original numeric patient identifier}
88 | #' \item{prog}{The original indicator to describe whether
89 | #' the patient has experience a progression}
90 | #' \item{death}{The original indicator to describe whether
91 | #' the patient has experience death}
92 | #' \item{prog_t}{The original observed time at progression,
93 | #' or the time at which the patient has been censored;
94 | #' measured in months}
95 | #' \item{death_t}{The original observed time at death, or
96 | #' the time at which the patient has been censored;
97 | #' measured in months}
98 | #' \item{prog_ty}{The original observed time at progression,
99 | #' or the time at which the patient has been censored;
100 | #' measured in years}
101 | #' \item{death_ty}{The original observed time at death, or
102 | #' the time at which the patient has been censored;
103 | #' measured in years}
104 | #' }
105 | "msmdata"
--------------------------------------------------------------------------------
/R/digitise.R:
--------------------------------------------------------------------------------
1 | #' Format digitised data for use in survival analysis
2 | #'
3 | #' Produces txt files with Kaplan Meier and individual level survival data from
4 | #' digitised Kaplan Meier curves obtained by DigitizeIT
5 | #'
6 | #'
7 | #' @param surv_inp a txt file obtained for example by DigitizeIT and containing
8 | #' the input survival times from graph reading. This file contains 3 columns
9 | #' 'ID' = the row-ID
10 | #' 'time' = the vector of times captured by the digitisation process
11 | #' 'survival' = the vector of survival probabilities captured by the digitisation
12 | #' process
13 | #' @param nrisk_inp a txt file obtained by DigitizeIT and containing the
14 | #' reported number at risk. This contains the following columns:
15 | #' 'Interval' = the ID of the various intervals included in the analysis (
16 | #' eg 1, 2, 3, ...)
17 | #' 'Time' = the actual time shown on the x-axis in the digitsed graph
18 | #' 'Lower' = the row of the extracted co-ordinates that the time corresponds
19 | #' to
20 | #' 'Upper' = the row of the extracted co-ordinates for which the time is less
21 | #' than the following time at which we have a number at risk
22 | #' 'nrisk' = the actual number at risk as specified in the original data
23 | #' @param km_output the name of the file to which the KM data will be written
24 | #' @param ipd_output the name of the file to which the individual level data
25 | #' data will be written
26 | #' @author Patricia Guyot and Gianluca Baio
27 | #' @template refs
28 | #' @keywords Digitized Kaplan Meier curve
29 | #' @examples
30 | #' \dontrun{
31 | #' # Defines the txt files to be used as inputs
32 | #' surv.inp <- system.file("extdata", "survival.txt", package = "survHE")
33 | #' nrisk.inp <- system.file("extdata", "nrisk.txt", package = "survHE")
34 | #' # Runs 'digitise' to create the relevant output files
35 | #' digitise(surv.inp, nrisk.inp)
36 | #' }
37 | #' @export digitise
38 | digitise <- function(surv_inp,nrisk_inp,km_output="KMdata.txt",ipd_output="IPDdata.txt") {
39 | # Post-process the data obtained by DigitizeIT to obtain the KM data and the individual level data
40 | # surv_inp = a txt file obtained by DigitizeIT and containing the input survival times from graph reading
41 | # nrisk_inp = a txt file obtained by DigitizeIT and containing the reported number at risk
42 | # km_output = the name of the file to which the KM data will be written
43 | # ipd_output = the name of the file to which the individual level data data will be written
44 | # Adapted from Patricia Guyot (2012)
45 |
46 | # Defines the working directory (same as the one where the DigitizeIT data are)
47 | working.dir <- dirname(surv_inp)
48 | #### setwd(working.dir); working.dir <- paste0(getwd(),"/")
49 | tot.events<-"NA" #tot.events = total no. of events reported. If not reported, then tot.events="NA"
50 | arm.id<-1 #arm indicator
51 |
52 | #Read in survival times read by digizeit
53 | digizeit <- read.table(surv_inp,header=TRUE,row.names=NULL)
54 | t.S<-digizeit[,2] # times recorded from DigitizeIT
55 | S<-digizeit[,3] # survival from DigitizeIT
56 |
57 | #Read in published numbers at risk, n.risk, at time, t.risk, lower and upper indexes for time interval
58 | pub.risk<-read.table(nrisk_inp,header=TRUE,row.names=NULL)
59 | ## Needs to get rid of possible time intervals with no digitised observations
60 | pub.risk <- pub.risk[pub.risk[,4]>0,]
61 | ## Needs to recode the first ever occurrence to 1??
62 | if (!(pub.risk[1,3]==1)) {pub.risk[1,3] <- 1}
63 |
64 | # Defines the variables needed for the algorithm
65 | t.risk<-pub.risk[,2]
66 | lower<-pub.risk[,3]
67 | upper<-pub.risk[,4]
68 | n.risk<-pub.risk[,5]
69 | n.int<-length(n.risk)
70 | n.t<- upper[n.int]
71 |
72 | #Initialise vectors
73 | arm <- rep(arm.id,n.risk[1])
74 | n.censor <- rep(0,(n.int-1))
75 | n.hat <- rep(n.risk[1]+1,n.t)
76 | cen <- d <- rep(0,n.t)
77 | KM.hat <- rep(1,n.t)
78 | last.i <- rep(1,n.int)
79 | sumdL <- 0
80 |
81 | # Executes Patricia's algorithm to determine censoring
82 | if (n.int > 1){
83 | #Time intervals 1,...,(n.int-1)
84 | for (i in 1:(n.int-1)){
85 | #First approximation of no. censored on interval i
86 | n.censor[i]<- round(n.risk[i]*S[lower[i+1]]/S[lower[i]]- n.risk[i+1])
87 | #Adjust tot. no. censored until n.hat = n.risk at start of interval (i+1)
88 | while((n.hat[lower[i+1]]>n.risk[i+1])||((n.hat[lower[i+1]]0))){
89 | if (n.censor[i]<=0){
90 | cen[lower[i]:upper[i]]<-0
91 | n.censor[i]<-0
92 | }
93 | if (n.censor[i]>0){
94 | cen.t<-rep(0,n.censor[i])
95 | for (j in 1:n.censor[i]){
96 | cen.t[j]<- t.S[lower[i]] +
97 | j*(t.S[lower[(i+1)]]-t.S[lower[i]])/(n.censor[i]+1)
98 | }
99 | #Distribute censored observations evenly over time. Find no. censored on each time interval.
100 | cen[lower[i]:upper[i]]<-hist(cen.t,breaks=t.S[lower[i]:lower[(i+1)]],plot=F)$counts
101 | }
102 | #Find no. events and no. at risk on each interval to agree with K-M estimates read from curves
103 | n.hat[lower[i]]<-n.risk[i]
104 | last<-last.i[i]
105 | for (k in lower[i]:upper[i]){
106 | if (i==1 & k==lower[i]){
107 | d[k]<-0
108 | KM.hat[k]<-1
109 | }
110 | else {
111 | d[k]<-round(n.hat[k]*(1-(S[k]/KM.hat[last])))
112 | KM.hat[k]<-KM.hat[last]*(1-(d[k]/n.hat[k]))
113 | }
114 | n.hat[k+1]<-n.hat[k]-d[k]-cen[k]
115 | if (d[k] != 0) last<-k
116 | }
117 | n.censor[i]<- n.censor[i]+(n.hat[lower[i+1]]-n.risk[i+1])
118 | }
119 | if (n.hat[lower[i+1]]1){
125 | #Assume same censor rate as average over previous time intervals.
126 | n.censor[n.int]<- min(round(sum(n.censor[1:(n.int-1)])*(t.S[upper[n.int]]-
127 | t.S[lower[n.int]])/(t.S[upper[(n.int-1)]]-t.S[lower[1]])), n.risk[n.int])
128 | }
129 | if (n.int==1){n.censor[n.int]<-0}
130 | if (n.censor[n.int] <= 0){
131 | cen[lower[n.int]:(upper[n.int]-1)]<-0
132 | n.censor[n.int]<-0
133 | }
134 | if (n.censor[n.int]>0){
135 | cen.t<-rep(0,n.censor[n.int])
136 | for (j in 1:n.censor[n.int]){
137 | cen.t[j]<- t.S[lower[n.int]] +
138 | j*(t.S[upper[n.int]]-t.S[lower[n.int]])/(n.censor[n.int]+1)
139 | }
140 | cen[lower[n.int]:(upper[n.int]-1)]<-hist(cen.t,breaks=t.S[lower[n.int]:upper[n.int]],plot=F)$counts
141 | }
142 | #Find no. events and no. at risk on each interval to agree with K-M estimates read from curves
143 | n.hat[lower[n.int]]<-n.risk[n.int]
144 | last<-last.i[n.int]
145 | for (k in lower[n.int]:upper[n.int]){
146 | if(KM.hat[last] !=0){
147 | d[k]<-round(n.hat[k]*(1-(S[k]/KM.hat[last])))} else {d[k]<-0}
148 | KM.hat[k]<-KM.hat[last]*(1-(d[k]/n.hat[k]))
149 | n.hat[k+1]<-n.hat[k]-d[k]-cen[k]
150 | #No. at risk cannot be negative
151 | if (n.hat[k+1] < 0) {
152 | n.hat[k+1]<-0
153 | cen[k]<-n.hat[k] - d[k]
154 | }
155 | if (d[k] != 0) last<-k
156 | }
157 | #If total no. of events reported, adjust no. censored so that total no. of events agrees.
158 | if (tot.events != "NA"){
159 | if (n.int>1){
160 | sumdL<-sum(d[1:upper[(n.int-1)]])
161 | #If total no. events already too big, then set events and censoring = 0 on all further time intervals
162 | if (sumdL >= tot.events){
163 | d[lower[n.int]:upper[n.int]]<- rep(0,(upper[n.int]-lower[n.int]+1))
164 | cen[lower[n.int]:(upper[n.int]-1)]<- rep(0,(upper[n.int]-lower[n.int]))
165 | n.hat[(lower[n.int]+1):(upper[n.int]+1)]<- rep(n.risk[n.int],(upper[n.int]+1-lower[n.int]))
166 | }
167 | }
168 | #Otherwise adjust no. censored to give correct total no. events
169 | if ((sumdL < tot.events)|| (n.int==1)){
170 | sumd<-sum(d[1:upper[n.int]])
171 | while ((sumd > tot.events)||((sumd< tot.events)&&(n.censor[n.int]>0))){
172 | n.censor[n.int]<- n.censor[n.int] + (sumd - tot.events)
173 | if (n.censor[n.int]<=0){
174 | cen[lower[n.int]:(upper[n.int]-1)]<-0
175 | n.censor[n.int]<-0
176 | }
177 | if (n.censor[n.int]>0){
178 | cen.t<-rep(0,n.censor[n.int])
179 | for (j in 1:n.censor[n.int]){
180 | cen.t[j]<- t.S[lower[n.int]] +
181 | j*(t.S[upper[n.int]]-t.S[lower[n.int]])/(n.censor[n.int]+1)
182 | }
183 | cen[lower[n.int]:(upper[n.int]-1)]<-hist(cen.t,breaks=t.S[lower[n.int]:upper[n.int]],plot=F)$counts
184 | }
185 | n.hat[lower[n.int]]<-n.risk[n.int]
186 | last<-last.i[n.int]
187 | for (k in lower[n.int]:upper[n.int]){
188 | d[k]<-round(n.hat[k]*(1-(S[k]/KM.hat[last])))
189 | KM.hat[k]<-KM.hat[last]*(1-(d[k]/n.hat[k]))
190 | if (k != upper[n.int]){
191 | n.hat[k+1]<-n.hat[k]-d[k]-cen[k]
192 | #No. at risk cannot be negative
193 | if (n.hat[k+1] < 0) {
194 | n.hat[k+1]<-0
195 | cen[k]<-n.hat[k] - d[k]
196 | }
197 | }
198 | if (d[k] != 0) last<-k
199 | }
200 | sumd <- sum(d[1:upper[n.int]])
201 | }
202 | }
203 | }
204 |
205 | # Now writes the results to the output files
206 | KMdata <- data.frame(time=t.S,n.risk=n.hat[1:n.t],n.event=d,n.censored=cen)
207 | write.table(KMdata,km_output,sep="\t",row.names=FALSE,col.names=TRUE)
208 |
209 | # And forms IPD data
210 | #Initialise vectors
211 | t.IPD <- rep(t.S[n.t],n.risk[1])
212 | event.IPD <- rep(0,n.risk[1])
213 | #Write event time and event indicator (=1) for each event, as separate row in t.IPD and event.IPD
214 | k <- 1
215 | for (j in 1:n.t){
216 | if(d[j]!=0){
217 | t.IPD[k:(k+d[j]-1)]<- rep(t.S[j],d[j])
218 | event.IPD[k:(k+d[j]-1)]<- rep(1,d[j])
219 | k<-k+d[j]
220 | }
221 | }
222 | #Write censor time and event indicator (=0) for each censor, as separate row in t.IPD and event.IPD
223 | for (j in 1:(n.t-1)){
224 | if(cen[j]!=0){
225 | t.IPD[k:(k+cen[j]-1)]<- rep(((t.S[j]+t.S[j+1])/2),cen[j])
226 | event.IPD[k:(k+cen[j]-1)]<- rep(0,cen[j])
227 | k<-k+cen[j]
228 | }
229 | }
230 | #Output IPD
231 | IPD <- data.frame(time=t.IPD,event=event.IPD,arm)
232 | write.table(IPD,ipd_output,sep="\t",row.names=FALSE,col.names=TRUE)
233 |
234 | if (dirname(km_output)==".") {
235 | cat("\n")
236 | cat(paste0("Kaplan Meier data written to file: ",working.dir,km_output))
237 | } else {
238 | cat("\n")
239 | cat(paste0("Kaplan Meier data written to file: ",km_output))
240 | }
241 | if (dirname(ipd_output)==".") {
242 | cat("\n")
243 | cat(paste0("IPD data written to file: ",working.dir,ipd_output))
244 | cat("\n")
245 | } else {
246 | cat("\n")
247 | cat(paste0("IPD data written to file: ",ipd_output))
248 | cat("\n")
249 | }
250 | }
251 |
--------------------------------------------------------------------------------
/R/fit_models.R:
--------------------------------------------------------------------------------
1 | ## SET OF UTILITY FUNCTIONS TO INCLUDE SURVIVAL ANALYSIS RESULTS INTO A HEALTH ECONOMIC MODEL
2 | ## Gianluca Baio + Will Browne + Peter Konings (10 Jan 2017)
3 | #' Fit parametric survival analysis for health economic evaluations
4 | #'
5 | #' Runs the survival analysis with several useful options, using either MLE
6 | #' (via flexsurv) or a Bayesian approach (via R-INLA or rstan)
7 | #'
8 | #' On object in the class \code{survHE} containing the following elements
9 | #'
10 | #' @param formula a formula specifying the model to be used, in the form
11 | #' \code{Surv(time,event)~treatment[+covariates]} for flexsurv, or
12 | #' \code{inla.surv(time,event)~treatment[+covariates]} for INLA
13 | #' @param data A data frame containing the data to be used for the analysis.
14 | #' This must contain data for the 'event' variable. In case there is no
15 | #' censoring, then \code{event} is a column of 1s.
16 | #' @param distr a (vector of) string(s) containing the name(s) of the model(s)
17 | #' to be fitted. Available options are:
18 | #'
19 | #' \code{flexsurv}:
20 | #' "exponential","gamma","genf","gengamma","gompertz","weibull",
21 | #' "weibullPH","loglogistic","lognormal" \code{INLA}:
22 | #' "exponential","weibull","lognormal","loglogistic" \code{hmc}:
23 | #' "exponential","gamma","genf","gengamma","gompertz","weibull","weibullPH",
24 | #' "loglogistic","lognormal"
25 | #' @param method A string specifying the inferential method (\code{'mle'},
26 | #' \code{'inla'} or \code{'hmc'}). If \code{method} is set to \code{'hmc'},
27 | #' then \code{survHE} will write suitable model code in the Stan language
28 | #' (according to the specified distribution), prepare data and initial values
29 | #' and then run the model.
30 | #' @param \dots Additional options (for INLA or HMC).
31 | #'
32 | #' **INLA** specific options \code{dz} = defines the step length for the grid
33 | #' search over the hyperparameters space (default = 0.1) \code{diff.logdens} =
34 | #' defines the difference in the log-density for the hyperparameters to stop
35 | #' integration (default = 5) \code{control.fixed} = defines the default for the
36 | #' priors, unless specified by the user. Default values are prior mean = 0 for
37 | #' *all* fixed effects prior var = 1000 for *all* fixed effects prior mean = 0
38 | #' for the intercept prior prec -> 0 for the intercept \code{control.family} =
39 | #' a list of options. If distr is a vector, then can be provided as a named
40 | #' list of options, for example something like this:
41 | #' \code{control.family=list(weibull=list(param=c(.1,.1)),lognormal=list(initial=2))}
42 | #' the names of the elements of the list need to be the same as those given in
43 | #' the vector \code{distr}
44 | #'
45 | #' **HMC** specific options \code{chains} = number of chains to run in the HMC
46 | #' (default = 2) \code{iter} = total number of iterations (default = 2000)
47 | #' \code{warmup} = number of warmup iterations (default = iter/2) \code{thin} =
48 | #' number of thinning (default = 1) \code{control} = a list specifying
49 | #' Stan-related options, eg \code{control=list(adapt_delta=0.85)} (default =
50 | #' NULL) \code{seed} = the random seed (to make things replicable) \code{pars}
51 | #' = a vector of parameters (string, default = NA) \code{include} = a logical
52 | #' indicator (if FALSE, then the pars are not saved; default = TRUE)
53 | #' \code{priors} = a list (of lists) specifying the values for the parameters
54 | #' of the prior distributions in the models \code{save.stan} = a logical
55 | #' indicator (default = FALSE). If TRUE, then saves the data list for Stan and
56 | #' the model file(s)
57 | #' @return \item{models}{ A list containing the fitted models. These contain
58 | #' the output from the original inference engine (\code{flexsurv}, \code{INLA}
59 | #' or \code{rstan}). Can be processed using the methods specific to the
60 | #' original packages, or via \code{survHE}-specific methods (such as
61 | #' \code{plot}, \code{print}) or other specialised functions (eg to extrapolate
62 | #' the survival curves, etc). } \item{model.fitting}{ A list containing the
63 | #' output of the model-fit statistics (AIC, BIC, DIC). The AIC and BIC are
64 | #' estimated for all methods, while the DIC is only estimated when using
65 | #' Bayesian inference. } \item{method}{ A string indicating the method used to
66 | #' fit the model, ie \code{'mle'}, \code{'inla'} or \code{'hmc'}. }
67 | #' \item{misc}{ A list containing the time needed to run the model(s) (in
68 | #' seconds), the formula used, the results of the Kaplan-Meier analysis (which
69 | #' is automatically performed using \code{npsurv}) and the original data frame.
70 | #' }
71 | #' @author Gianluca Baio
72 | #' @seealso \code{make.surv}
73 | #' @template refs
74 | #' @keywords Parametric survival models Bayesian inference via Hamiltonian
75 | #' Monte Carlo Bayesian inference via Integrated Nested Laplace Approximation
76 | #' @examples
77 | #' \dontrun{
78 | #' # Loads an example dataset from 'flexsurv'
79 | #' data(bc)
80 | #'
81 | #' # Fits the same model using the 3 inference methods
82 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
83 | #' distr="exp",method="mle")
84 | #' inla = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
85 | #' distr="exp",method="inla")
86 | #' hmc = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
87 | #' distr="exp",method="hmc")
88 | #'
89 | #' # Prints the results in comparable fashion using the survHE method
90 | #' print(mle)
91 | #' print(inla)
92 | #' print(hmc)
93 | #'
94 | #' # Or visualises the results using the original packages methods
95 | #' print(mle,original=TRUE)
96 | #' print(inla,original=TRUE)
97 | #' print(hmc,original=TRUE)
98 | #'
99 | #' # Plots the survival curves and estimates
100 | #' plot(mle)
101 | #' plot(mle,inla,hmc,labs=c("MLE","INLA","HMC"),colors=c("black","red","blue"))
102 | #' }
103 | #'
104 | #' @export fit.models
105 | fit.models <- function(formula = NULL, data , distr = NULL, method = "mle", ...) {
106 | # Captures the call
107 | call=match.call()
108 |
109 | # Lists all the additional inputs
110 | exArgs <- list(...)
111 | # Adds the 'formula' to exArgs, so it can be used by 'runHMC' and 'runINLA'
112 | exArgs$formula <- formula
113 | # Adds the 'data' to exArgs so it can be used by 'runHMC', 'runMLE' and 'runINLA'
114 | exArgs$data=data
115 | # Adds the 'call' to exArgs
116 | exArgs$call=call
117 |
118 | # Avoids the 'no visible binding for global variable' error, when compiling
119 | #model <- NULL
120 |
121 | # Needs to specify either the formula or the list of variables!
122 | if(is.null(formula)) {
123 | stop("You need to specify a model 'formula', e.g. 'formula=Surv(time,event)~treat'")
124 | }
125 | # ensures method is lower case
126 | method <- tolower(method)
127 | # ensures method is one of "mle","inla", "mcmc"
128 | if(!method %in% c("hmc","inla","mle")) {
129 | stop("Methods available for use are 'mle', 'hmc' or 'inla'")
130 | }
131 |
132 | # Check whether the selected distribution(s) can be implemented with the selected method
133 | # (and if not, falls back to 'mle')
134 | method=check_distributions(method,distr)
135 |
136 | # MLE -----
137 | # If method = MLE, then fits the model(s) using flexsurvreg
138 | if (method=="mle") {
139 | # Runs the models using the helper 'runMLE' and use the helper 'format_output_fit.models
140 | res <- format_output_fit.models(lapply(distr,function(x) runMLE(x,exArgs)),method,distr,formula,data)
141 | }
142 |
143 | # INLA -----
144 | # If method = INLA, then fits model(s) using inla
145 | if (method=="inla") {
146 | if (!isTRUE(requireNamespace("survHEinla", quietly = TRUE))) {
147 | stop("You need to install the packages 'survHEinla'. Please run in your R terminal:\n remotes::install_github('giabaio/survHEinla')")
148 | }
149 | # If survHEinla is installed but not loaded then attach the Namespace (so that all the relevant functions are available)
150 | if (isTRUE(requireNamespace("survHEinla", quietly = TRUE))) {
151 | if (!is.element("survHEinla", (.packages()))) {
152 | attachNamespace("survHEinla")
153 | }
154 | res <- format_output_fit.models(lapply(distr,function(x) survHEinla::runINLA(x,exArgs)),method,distr,formula,data)
155 | }
156 | }
157 |
158 | # HMC -----
159 | if (method == "hmc") {
160 | if (!isTRUE(requireNamespace("survHEhmc", quietly = TRUE))) {
161 | stop("You need to install the packages 'survHEhmc'. Please run in your R terminal:\n remotes::install_github('giabaio/survHEhmc')")
162 | }
163 | # If survHEhmc is installed but not loaded then attach the Namespace (so that all the relevant functions are available)
164 | if (isTRUE(requireNamespace("survHEhmc", quietly = TRUE))) {
165 | if (!is.element("survHEhmc", (.packages()))) {
166 | attachNamespace("survHEhmc")
167 | }
168 | res <- format_output_fit.models(lapply(distr,function(x) survHEhmc::runHMC(x,exArgs)),method,distr,formula,data)
169 | }
170 | }
171 |
172 | return(res)
173 | }
174 |
--------------------------------------------------------------------------------
/R/global.R:
--------------------------------------------------------------------------------
1 | #' This defines variables, mainly used with 'dplyr' and pipes that
2 | #' throw a 'no visible binding for global variable' during
3 | #' 'R CMD check' (before CRAN submission)
4 | #'
5 | #' @noRd
6 | utils::globalVariables(c(
7 | ".",
8 | "low",
9 | "upp",
10 | "time",
11 | "event",
12 | "(Intercept)",
13 | "S",
14 | "model_name",
15 | "strata",
16 | "object_name",
17 | "lower",
18 | "upper",
19 | "value",
20 | "lab",
21 | "aic",
22 | "bic",
23 | "dic",
24 | "mods_id",
25 | "xmin",
26 | "xmax",
27 | "ymin",
28 | "ymax",
29 | "where",
30 | "Death",
31 | "Alive",
32 | "disc",
33 | "Tstop",
34 | "Tstart",
35 | "from",
36 | "to",
37 | "trans",
38 | "status",
39 | "treat",
40 | "Pre-progressed",
41 | "Progressed",
42 | "Death",
43 | "l_12",
44 | "l_13",
45 | "l_11",
46 | "check",
47 | "sim_idx",
48 | "npeople",
49 | "grp_lab",
50 | "Group"
51 | ))
52 |
--------------------------------------------------------------------------------
/R/make.ipd.R:
--------------------------------------------------------------------------------
1 | #' Create an individual level dataset from digitised data
2 | #'
3 | #' Piles in the simulated IPD resulting from running digitise for more than one
4 | #' treatment arm
5 | #'
6 | #'
7 | #' @param ipd_files a list including the names of the IPD files created as
8 | #' output of digitise
9 | #' @param ctr the index of the file associated with the control arm (default,
10 | #' the first file). This will be coded as 0
11 | #' @param var.labs a vector of labels for the column of the resulting data
12 | #' matrix. NB these should match the arguments to the formula specified for
13 | #' fit.models. The user can specify values. These should be 4 elements (ID,
14 | #' TIME, EVENT, ARM)
15 | #' @author Gianluca Baio
16 | #' @seealso Something will go here
17 | #' @references Something will go here
18 | #' @keywords Digitized Kaplan Meier curve
19 | #' @examples
20 | #' \dontrun{
21 | #' # Defines the txt files to be used as inputs
22 | #' surv.inp <- system.file("extdata", "survival.txt", package = "survHE")
23 | #' nrisk.inp <- system.file("extdata", "nrisk.txt", package = "survHE")
24 | #' # Runs 'digitise' to create the relevant output files
25 | #' digitise(surv.inp, nrisk.inp, ipd_output = "IPD.txt")
26 | #' # Now uses 'make.ipd' to create the pseudo-data
27 | #' make.ipd("IPD.txt", ctr = 1, var.labs = c("time", "event", "arm"))
28 | #' }
29 | #' @export make.ipd
30 | make.ipd <- function(ipd_files,ctr=1,var.labs=c("time","event","arm")) {
31 | ## Piles in the simulated IPD resulting from running digitise for more than one treatment arm
32 | ## ipd_files = a list including the names of the IPD files created as output of digitise
33 | ## ctr = the index of the file associated with the control arm (default, the first file).
34 | ## This will be coded as 0
35 | ## var.labs = a vector of labels for the column of the resulting data matrix. NB these
36 | ## should match the arguments to the formula specified for fit.models. The
37 | ## user can specify values. These should be 3 elements (TIME, EVENT, ARM)
38 |
39 | # Identifies the number of arms (= number of IPD files)
40 | n_arms <- length(ipd_files)
41 | index <- 1:n_arms
42 | active <- index[-ctr]
43 | data <- read.table(ipd_files[[ctr]],header=TRUE,row.names=NULL)
44 | data[,"arm"] <- 0 # sets the value of "arm" to 0, for the control group
45 | arm.ind <- 1
46 | for (i in active) {
47 | tmp <- read.table(ipd_files[[index[i]]],header=TRUE,row.names=NULL)
48 | tmp[,"arm"] <- arm.ind
49 | data <- rbind(data,tmp)
50 | arm.ind <- arm.ind+1
51 | }
52 | colnames(data) <- var.labs
53 | return(data)
54 | }
55 |
--------------------------------------------------------------------------------
/R/make.surv.R:
--------------------------------------------------------------------------------
1 | #' Engine for Probabilistic Sensitivity Analysis on the survival curves
2 | #'
3 | #' Creates the survival curves for the fitted model(s)
4 | #'
5 | #' @param fit the result of the call to the \code{fit.models} function,
6 | #' containing the model fitting (and other relevant information)
7 | #' @param mod the index of the model. Default value is 1, but the user can
8 | #' choose which model fit to visualise, if the call to \code{fit.models} has a vector
9 | #' argument for \code{distr} (so many models are fitted & stored in the same object)
10 | #' @param t the time vector to be used for the estimation of the survival curve
11 | #' @param newdata a list (of lists), specifying the values of the covariates
12 | #' at which the computation is performed. For example
13 | #' \code{list(list(arm=0),list(arm=1))} will create two survival curves, one
14 | #' obtained by setting the covariate \code{arm} to the value 0 and the other by
15 | #' setting it to the value 1. In line with \code{flexsurv} notation, the user
16 | #' needs to either specify the value for *all* the covariates or for none (in
17 | #' which case, \code{newdata=NULL}, which is the default). If some value is
18 | #' specified and at least one of the covariates is continuous, then a single
19 | #' survival curve will be computed in correspondence of the average values of
20 | #' all the covariates (including the factors, which in this case are expanded
21 | #' into indicators).
22 | #' @param nsim The number of simulations from the distribution of the survival
23 | #' curves. Default at \code{nsim=1}, in which case uses the point estimate for
24 | #' the relevant distributional parameters and computes the resulting survival
25 | #' curve
26 | #' @param ... Additional options
27 | #' @author Gianluca Baio
28 | #' @seealso \code{\link{fit.models}}, \code{\link{psa.plot}}, \code{\link{write.surv}}
29 | #' @template refs
30 | #' @keywords Survival models Bootstrap Probabilistic sensitivity analysis
31 | #'
32 | #' @examples
33 | #' \dontrun{
34 | #' # Loads an example dataset from 'flexsurv'
35 | #' data(bc)
36 | #'
37 | #' # Fits the same model using the 3 inference methods
38 | #' mle <- fit.models(formula=Surv(recyrs,censrec) ~ group, data=bc,
39 | #' distr="exp", method="mle")
40 | #' p.mle <- make.surv(mle)
41 | #' psa.plot(p.mle)
42 | #'
43 | #' # Can also use the main 'plot' function to visualise the survival curves
44 | #' # and include uncertainty by using a number 'nsim' of simulations
45 | #' plot(mle, nsim=10)
46 | #' }
47 | #'
48 | #' @export make.surv
49 | #'
50 | make.surv <- function(fit, mod=1, t=NULL, newdata=NULL, nsim=1,...) {
51 | ## Creates the survival curves for the fitted model(s)
52 | # fit = the result of the call to the fit.models function, containing the model fitting (and other relevant information)
53 | # mod = the index of the model. Default value is 1, but the user can choose which model fit to visualise,
54 | # if the call to fit.models has a vector argument for distr (so many models are fitted & stored in the same object)
55 | # t = the time framework to be used for the estimation of the survival curve
56 | # newdata = a list (of lists), specifying the values of the covariates at which the computation is performed. For example
57 | # 'list(list(arm=0),list(arm=1))' will create two survival curves, one obtained by setting the covariate 'arm'
58 | # to the value 0 and the other by setting it to the value 1. In line with 'flexsurv' notation, the user needs
59 | # to either specify the value for *all* the covariates or for none (in which case, 'newdata=NULL', which is the
60 | # default). If some value is specified and at least one of the covariates is continuous, then a single survival
61 | # curve will be computed in correspondence of the average values of all the covariates (including the factors,
62 | # which in this case are expanded into indicators). The order of the variables in the list *must* be the same
63 | # as in the formula used for the model
64 | # nsim = the number of simulations from the distribution of the survival curves. Default at nsim=1, in which case
65 | # uses the point estimate for the relevant distributional parameters and computes the resulting survival curve
66 | # ... = additional options
67 |
68 | # Defines list with optional parameters
69 | exArgs <- list(...)
70 |
71 | # The Poly-Weibull/HMC model is special and needs a custom function
72 | if (fit$misc$model_name[mod] == "pow") {
73 | pwstuff <- make_surv_pw(fit=fit,mod=mod,t=t,newdata=newdata,nsim=nsim,exArgs)
74 | sim <- pwstuff$sim
75 | mat <- pwstuff$mat
76 | X <- pwstuff$X
77 | t <- pwstuff$t
78 | } else {
79 | # Any other model/method has a streamlined process using helper functions
80 | # Extracts the model object and the data from the survHE output
81 | m <- fit$models[[mod]]
82 | data <- fit$misc$data
83 |
84 | # Create a vector of times, if the user hasn't provided one, based on the observed data
85 | if (is.null(t)) {
86 | t <- sort(unique(fit$misc$km$time))
87 | }
88 | # By default uses the mean to compute summary statistics (eg for the case when nsim=1)
89 | # but the user can specify the median, which works better for very skewed parameters
90 | # which happens for example when trying to fit a model with too many parameters, which
91 | # results in a huge uncertainty, blowing up the estimates and making even the mean survival
92 | # curve impossible to compute
93 | if (exists("summary_stat",exArgs)){
94 | summary_stat <- exArgs$summary_stat
95 | } else {summary_stat <- "mean"}
96 |
97 | # Makes sure the distribution name(s) vector is in a usable format
98 | dist <- fit$misc$model_name[mod]
99 |
100 | # Now creates the profile of covariates for which to compute the survival curves
101 | X <- make_profile_surv(fit$misc$formula, data, newdata)
102 |
103 | # This is needed to rescale correctly the INLA models (which are fitted
104 | # on a range [0-1] for numerical stability)
105 | time_max <- max(fit$misc$km$time)
106 |
107 | # Draws a sample of nsim simulations from the distribution of the model parameters
108 | sim <- do.call(paste0("make_sim_",fit$method),
109 | args=list(m=m,t=t,X=X,nsim=nsim,newdata=newdata,dist=dist,data=data,
110 | formula=fit$misc$formula,summary_stat=summary_stat,time_max=time_max)
111 | )
112 | # Computes the survival curves - first in matrix form with all the simulations
113 | # Needs to add more inputs for the case of hmc/rps
114 | if (fit$method=="hmc" && dist=="rps") {
115 | exArgs$data.stan <- fit$misc$data.stan[[mod]]
116 | t[t==0] <- min(0.00001,min(t[t>0]))
117 | }
118 | if (fit$method=="mle" && dist=="rps") {
119 | exArgs$knots <- fit$models[[mod]]$knots
120 | }
121 |
122 | mat <- do.call(compute_surv_curve,
123 | args=list(sim=sim,exArgs=exArgs,nsim=nsim,
124 | dist=dist,t=t,method=fit$method,X=X)
125 | )
126 | }
127 |
128 | # Finally computes the actual survival curves, in summary forms
129 | if (nsim == 1) {
130 | # If nsim=1 then only save the point estimates of the survival curve
131 | S <- lapply(mat, function(x) {
132 | rowwise(x, time) |>
133 | summarise(S = mean(c_across(contains("S")))) |> ungroup()
134 | })
135 | } else {
136 | # If nsim>1 then also give the lower and upper quartile of the underlying distribution
137 | # avoids no visibile binding for global variable
138 |
139 | S <- lapply(mat, function(x) {
140 | rowwise(x, time) |>
141 | summarise(S = mean(c_across(contains("S"))),
142 | low = quantile(c_across(contains("S")), 0.025),
143 | upp = quantile(c_across(contains("S")), 0.975)) |> ungroup()
144 | })
145 | }
146 |
147 | list(
148 | S = S,
149 | sim = sim,
150 | nsim = nsim,
151 | mat = mat,
152 | des.mat = X,
153 | times = t)
154 | }
--------------------------------------------------------------------------------
/R/make.transition.probs.R:
--------------------------------------------------------------------------------
1 | #' make.transition.probs
2 | #'
3 | #' Computes the transition probabilities (to be passed to a Markov model) from
4 | #' the cumulative hazard curves obtained using \code{fit.models}, using the formula
5 | #' p(t)=1-exp(H(t-k)/H(t)), where k is the Markov model cycle length (or the
6 | #' difference across two consecutive times) and t is a generic time
7 | #'
8 | #' @aliases make.transition.probs
9 | #' @param fit an object obtained as output of the call to \code{fit.models}
10 | #' @param labs a vector with labels to identify the 'profiles' ie the
11 | #' combination of covariates that have been passed onto the model formula.
12 | #' If 'NULL' (default), then figures it out from the 'survHE' object.
13 | #' @param ... additional arguments. Includes the standard inputs to the
14 | #' call to \code{make.surv}, so \code{mod} (the index of the possibly many
15 | #' models stored in the 'survHE' object), \code{t} (the vector of times
16 | #' over which to compute the survival curves), \code{newdata} (a list that
17 | #' defines the profile of covariates) and \code{nsim} (the number of
18 | #' simulations to use - default is \code{nsim}=1)
19 | #' @return A tibble 'lambda' with an indicator for the treatment arm,
20 | #' the times at which the probabilities have been computed and \code{nsim}
21 | #' columns each with a simulation of the transition probabilities for
22 | #' all the times specified by the user
23 | #' @note Something will go here
24 | #' @author Gianluca Baio
25 | #' @seealso \code{\link{make.surv}}
26 | #' @references Something will go here
27 | #' @keywords Transition probabilities Markov models
28 | #' @examples
29 | #' \dontrun{
30 | #' # Something will go here
31 | #' }
32 | #'
33 | #' @export make.transition.probs
34 | make.transition.probs <- function(fit,labs=NULL,...) {
35 | exArgs <- list(...)
36 |
37 | # Makes default for parameters to the call to 'make.surv' (which are overwritten if the user has specified them
38 | # separately)
39 | if(exists("mod",exArgs)) {mod <- exArgs$mod} else {mod <- 1}
40 | if(exists("t",exArgs)) {t <- exArgs$t} else {t <- NULL}
41 | if(is.null(t)) {
42 | t <- sort(unique(fit$misc$km$time))
43 | # Add an extra time=0 at the beginning. This ensures the computation can be done for all the actual times
44 | # specified by the user (because the first one has no lag and so the ratio gives NA...)
45 | #if(t[1]>0) {t=c(0,t)}
46 | }
47 | # Add an extra time=0 at the beginning. This ensures the computation can be done for all the actual times
48 | # specified by the user (because the first one has no lag and so the ratio gives NA...)
49 | t <- c(0,t)
50 | if(exists("newdata",exArgs)) {newdata <- exArgs$newdata} else {newdata <- NULL}
51 | if(exists("nsim",exArgs)) {nsim <- exArgs$nsim} else {nsim <- 1}
52 | # Now computes the simulations using 'make.surv'
53 | s <- make.surv(fit,mod=mod,t=t,newdata=newdata,nsim=nsim)
54 |
55 | # Get labels of the 'strata'
56 | strata <- lapply(1:nrow(s$des.mat),function(x){
57 | s$des.mat %>%
58 | as_tibble() %>%
59 | select(!contains("(Intercept)")) %>%
60 | slice(x) %>%
61 | round(digits=2) %>%
62 | mutate(strata=paste0(names(.),"=",.,collapse=","))
63 | }) %>%
64 | bind_rows(.) %>%
65 | select(strata) %>%
66 | pull(strata)
67 |
68 | # Now retrieves the transition probabilities 'lambda' applying the formula
69 | # lambda(t)=1-S(t+k)/S(t) where k is the MM cycle length and t is a generic time
70 | lambda <- s$mat %>%
71 | # First stacks together all the matrices with the simulation(s) for the survival curves
72 | bind_rows() %>%
73 | # Then creates a treatment indicator (1,2,...) & place it as the first column
74 | mutate(profile=rep(strata,each=nrow(s$mat[[1]]))) %>% select(profile,everything()) %>%
75 | # Then group by treatment and computes the cumulative hazards
76 | group_by(profile) %>% mutate(across(starts_with("S"),~-log(.))) %>%
77 | # Then computes the probabilities using 1-exp(H(t-k)-H(t)), where k is the cycle length (or difference across times)
78 | ####mutate(across(starts_with("S"),~case_when(.==0~0,TRUE~1-exp(lag(.)-.)))) %>%
79 | mutate(across(starts_with("S"), ~ 1-exp(lag(.)-.))) %>%
80 | # Then removes the first row (in each treatment) - that was just artificially added anyway...
81 | slice(-1) %>% ungroup()
82 |
83 | # And now renames the columns from S(_1,S_2,...,S_nsim) to lambda(_1,lambda_2,...,lambda_nsim)
84 | if (nsim==1) {
85 | lambda <- lambda %>%
86 | rename_with(starts_with("S"), .fn=~"lambda")
87 | } else {
88 | lambda <- lambda %>%
89 | rename_with(starts_with("S"), .fn=~paste0("lambda_",1:nsim))
90 | }
91 |
92 | lambda
93 | }
94 |
95 |
96 | #' make_data_multi_state
97 | #'
98 | #' Takes as input an individual-level dataset including data on both
99 | #' progression and death time (**jointly**) and manipulates it using
100 | #' \code{dplyr} functions to create a full "multi-state" dataset, in
101 | #' which all the transitions are tracked. This can then be used
102 | #' to fit survival models and compute all the estimates for the
103 | #' whole set of transition probabilities
104 | #'
105 | #' @aliases make_data_multi_state
106 | #' @param data dataset containing the full ILD with information on both
107 | #' progression and death. Can be a data.frame or a tibble
108 | #' @param id The column with the individual identifier. Can be NULL (in
109 | #' which case, it will be created from scratch)
110 | #' @param prog The progression indicator: takes value 1 if the individual
111 | #' has progressed and 0 otherwise. Defaults to the column named 'prog' in
112 | #' the dataset
113 | #' @param death The death indicator: takes value 1 if the individual
114 | #' has died and 0 otherwise. Defaults to the column named 'death' in
115 | #' the dataset
116 | #' @param prog_t The progression time. Defaults to the column named
117 | #' 'prog_t' in the dataset
118 | #' @param death_t The death time. Defaults to the column named
119 | #' 'death_t' in the dataset
120 | #' @param keep A vector of strings with the names of the additional
121 | #' variables from the original dataset to keep into the multistate
122 | #' dataset. If 'NULL' (default), then keeps all
123 | #' @param ... additional arguments.
124 | #' @return A tibble containing the event history for each individual
125 | #' and with the following variables: id = Patients ID; from = Initial
126 | #' state (1=Pre-progression, 2=Progression, 3=Death); to = End state
127 | #' (1=Pre-progression, 2=Progression, 3=Death); trans = Transition ID:
128 | #' 1=Pre-progression -> Progression; 2=Pre-Progression -> Death;
129 | #' 3=Progression -> Death; Tstart = Entry time (either entry or
130 | #' progression); Tstop = Exit time (time of event or censoring time);
131 | #' status = Event indicator (1=yes, 0=censored), **for the specific
132 | #' event under consideration**; treat = Treatment indicator
133 | #' All the other original variables are appended to these, but can be
134 | #' removed
135 | #' @note Something will go here
136 | #' @author Gianluca Baio
137 | #' @seealso Something will go here
138 | #' @references Something will go here
139 | #' @keywords Transition probabilities Markov models Multistate models
140 | #' @examples
141 | #' \dontrun{
142 | #' # Something will go here
143 | #' }
144 | #'
145 | #' @export make_data_multi_state
146 | make_data_multi_state=function(data,id="id",prog="prog",death="death",prog_t="prog_t",
147 | death_t="death_t",keep=NULL,...) {
148 |
149 | # If ID is not passed (=NULL) then create it
150 | if(is.null(id)) {
151 | data=data %>% mutate(id=row_number())
152 | id="id"
153 | }
154 |
155 | # Uses dplyr to manipulate the original dataset and create the mstate version
156 | # NB: uses the notation '!!sym(name_variable)' to address a specific column in the dataset
157 | msmdata=
158 | # Transition Pre to Post
159 | data %>% mutate(
160 | id=!!sym(id), # patient ID
161 | from=1, # starting state
162 | to=2, # arriving state
163 | trans=1, # transition code (1 = Pre -> Progression)
164 | Tstart=0, # entry time
165 | Tstop=!!sym(prog_t), # exit time
166 | time=Tstop-Tstart, # time-to-event = Tstop-Tstart
167 | status=case_when( # censoring indicator:
168 | !!sym(prog)==1~1, # 1 if progressed; 0 otherwise
169 | TRUE~0
170 | )
171 | ) %>% select(id,from,to,trans,Tstart,Tstop,time,status,treat,everything()) %>%
172 | bind_rows(
173 | # Transition Pre to Death
174 | data %>% mutate(
175 | id=!!sym(id), # patient ID
176 | from=1, # starting state
177 | to=3, # arriving state
178 | trans=2, # transition code (2 = Pre -> Death)
179 | Tstart=0, # entry time
180 | Tstop=!!sym(death_t), # exit time
181 | time=Tstop-Tstart, # time-to-event = Tstop-Tstart
182 | status=case_when( # censoring indicator:
183 | # 1 if died at progression; 0 otherwise
184 | (!!sym(death)==1 & !!sym(prog_t)==!!sym(death_t))~1,
185 | TRUE~0
186 | )
187 | ) %>% select(id,from,to,trans,Tstart,Tstop,time,status,treat,everything())
188 | ) %>%
189 | bind_rows(
190 | # Transition Post to Death
191 | data %>% filter(!!sym(prog)==1) %>% mutate(
192 | id=!!sym(id), # patient ID
193 | from=2, # starting state
194 | to=3, # arriving state
195 | trans=3, # transition code (2 = Pre -> Death)
196 | Tstart=!!sym(prog_t), # entry time
197 | Tstop=!!sym(death_t), # exit time
198 | time=Tstop-Tstart, # time-to-event = Tstop-Tstart
199 | status=case_when( # censoring indicator:
200 | !!sym(death)==1~1, # 1 if died; 0 otherwise
201 | TRUE~0
202 | )
203 | ) %>% select(id,from,to,trans,Tstart,Tstop,time,status,treat,everything())
204 | ) %>% arrange(id,trans)
205 |
206 | if (!is.null(keep)) {
207 | msmdata=msmdata %>% select(id,from,to,trans,Tstart,Tstop,time,status,keep)
208 | }
209 | return(msmdata)
210 | }
211 |
212 |
213 | #' three_state_mm
214 | #'
215 | #' General purpose function to run a standard three-state Markov model
216 | #' (typically used in cancer modelling). The states are typically
217 | #' 'Pre-progression', 'Progressed' and 'Death'. No backward transition
218 | #' from 'Progressed' to 'Pre-progression' is allowed and 'Death' is
219 | #' obviously an absorbing state. All other transitions are possible.
220 | #' The crucial assumption is that *individual-level data* are available
221 | #' recording an indicator and the time of progression and death for each
222 | #' individual. The function returns the full transition matrix
223 | #'
224 | #' @aliases three_state_mm
225 | #' @param m_12 A 'survHE' object (output to a call to \code{fit.models})
226 | #' estimating the parameters of a model for the transition from
227 | #' 'Pre-progression' (state 1) to 'Progressed' (state 2). Given the
228 | #' individual level data with the complete event history (in the object
229 | #' 'data'), can be done with a call like 'x=make_data_multi_state(data)'
230 | #' and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==1),...)}
231 | #' @param m_13 A 'survHE' object (output to a call to \code{fit.models})
232 | #' estimating the parameters of a model for the transition from
233 | #' 'Pre-progression' (state 1) to 'Death' (state 3). Given the
234 | #' individual level data with the complete event history (in the object
235 | #' 'data'), can be done with a call like 'x=make_data_multi_state(data)'
236 | #' and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==2),...)}
237 | #' @param m_23 A 'survHE' object (output to a call to \code{fit.models})
238 | #' estimating the parameters of a model for the transition from
239 | #' 'Progressed' (state 2) to 'Death' (state 3). Given the
240 | #' individual level data with the complete event history (in the object
241 | #' 'data'), can be done with a call like 'x=make_data_multi_state(data)'
242 | #' and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==3),...)}
243 | #' @param nsim The number of simulations for the model parameters that are
244 | #' used to compute the survival curves. Defaults to \code{nsim}=1,
245 | #' which simply creates one survival curve for each treatment arm.
246 | #' @param start A vector of initial state occupancy. By default assumes 1000
247 | #' individuals, all initially allocated to 'Pre-progression'
248 | #' @param basecase Should the base case be computed as well, based on the
249 | #' point estimate of the underlying model parameters? (Default=FALSE)
250 | #' @param ... additional arguments.
251 | #' @return A list including the state occupancy simulations in an object 'm'.
252 | #' This is a tibble with the number of individuals in each of the 3 states
253 | #' at each of the times specified by the user. If \code{nsim}>1, then the tibble
254 | #' also contains a simulation index to keep track of that. The list also
255 | #' includes the computation time to obtain the state occupancy tibble (in the
256 | #' object 'running_time'). If \code{basecase==TRUE}, then the function also
257 | #' computes the "base case scenario" (based on 1 simulation from of the
258 | #' underlying survival curves, i.e. the point estimate of the model parameters)
259 | #' and stores it in the object 'base_case'
260 | #' @note Something will go here
261 | #' @author Gianluca Baio
262 | #' @seealso make.transition.probs make_data_multi_state
263 | #' @references Something will go here
264 | #' @keywords Transition probabilities Markov models Three-state cancer model
265 | #' @examples
266 | #' \dontrun{
267 | #' # Something will go here
268 | #' }
269 | #'
270 | #' @export three_state_mm
271 | three_state_mm = function(m_12,m_13,m_23,nsim=1,start=c(1000,0,0),basecase=FALSE,...){
272 |
273 | exArgs <- list(...)
274 |
275 | # Initialises the base_case object
276 | base_case=NULL
277 |
278 | # Makes default for parameters to the call to 'make.surv' (which are overwritten if the user has specified them
279 | # separately)
280 | if(exists("mod",exArgs)) {mod=exArgs$mod} else {mod=1}
281 | if(exists("t",exArgs)) {t=exArgs$t} else {t=NULL}
282 | if(is.null(t)) {
283 | t <- sort(unique(m_12$misc$km$time))
284 | }
285 | if(exists("newdata",exArgs)) {newdata=exArgs$newdata} else {newdata=NULL}
286 |
287 | # Computes the transition probabilities for the transitions that are directly
288 | # identifiable from the observed data
289 | lambda_12=make.transition.probs(m_12,mod=mod,t=t,newdata=newdata,nsim=nsim)
290 | lambda_13=make.transition.probs(m_13,mod=mod,t=t,newdata=newdata,nsim=nsim)
291 | lambda_23=make.transition.probs(m_23,mod=mod,t=t,newdata=newdata,nsim=nsim)
292 |
293 | # Derives lambda_11 by subtraction (as all transition probs out of state 1 must sum to 1)
294 | # NB: Without further checks, it is possible that (lambda_12+lambda_13)>1 and so lambda_11<0
295 | # This should be carefully checked!
296 | lambda_11=(lambda_12 %>% select(starts_with("lambda")) + lambda_13 %>% select(starts_with("lambda"))) %>%
297 | as_tibble() %>% bind_cols(lambda_12 %>% select(profile,time)) %>% select(profile,time,everything()) %>%
298 | mutate(across(starts_with("lambda"),~1-.))
299 | # Derives lambda_22 by subtraction (as all transition probs out of state 2 must sum to 1)
300 | lambda_22=(1-lambda_23 %>% select(starts_with("lambda"))) %>% as_tibble() %>%
301 | bind_cols(lambda_23 %>% select(profile,time)) %>% select(profile,time,everything())
302 |
303 | # Computes the state occupancy for all the simulations
304 | tic=Sys.time()
305 | m=make_state_occupancy(nsim=nsim,lambda_11,lambda_12,lambda_13,lambda_22,lambda_23,start)
306 | toc=Sys.time()
307 | running_time=toc-tic
308 |
309 | if(basecase) {
310 | # Makes base-case Markov model (by considering the point estimate of the model parameters)
311 | # Computes the transition probabilities for the transitions that are directly
312 | # identifiable from the observed data
313 | lambda_12=make.transition.probs(m_12,mod=mod,t=t,newdata=newdata,nsim=1)
314 | lambda_13=make.transition.probs(m_13,mod=mod,t=t,newdata=newdata,nsim=1)
315 | lambda_23=make.transition.probs(m_23,mod=mod,t=t,newdata=newdata,nsim=1)
316 |
317 | # Derives lambda_11 by subtraction (as all transition probs out of state 1 must sum to 1)
318 | lambda_11=(lambda_12 %>% select(starts_with("lambda"))+lambda_13 %>% select(starts_with("lambda"))) %>%
319 | as_tibble() %>% bind_cols(lambda_12 %>% select(profile,time)) %>% select(profile,time,everything()) %>%
320 | mutate(across(starts_with("lambda"),~1-.))
321 | # Derives lambda_22 by subtraction (as all transition probs out of state 2 must sum to 1)
322 | lambda_22=(1-lambda_23 %>% select(starts_with("lambda"))) %>% as_tibble() %>%
323 | bind_cols(lambda_23 %>% select(profile,time)) %>% select(profile,time,everything())
324 |
325 | base_case=make_state_occupancy(nsim=1,lambda_11,lambda_12,lambda_13,lambda_22,lambda_23,start)
326 | }
327 |
328 |
329 | # Outputs of the function
330 | list(m=m,running_time=running_time,base_case=base_case)
331 | }
332 |
333 |
334 | #' make_state_occupancy
335 | #'
336 | #' Utility function to compute the state occupancy in a three state MM
337 | #'
338 | #' @param nsim The number of simulations for the model parameters that are
339 | #' used to compute the survival curves.
340 | #' @param lambda_11 the tibble containing the transition probabilities for
341 | #' the transition "Pre-progression -> Pre-progression"
342 | #' @param lambda_12 the tibble containing the transition probabilities for
343 | #' the transition "Pre-progression -> Progression"
344 | #' @param lambda_13 the tibble containing the transition probabilities for
345 | #' the transition "Pre-progression -> Death"
346 | #' @param lambda_22 the tibble containing the transition probabilities for
347 | #' the transition "Progression -> Progression"
348 | #' @param lambda_23 the tibble containing the transition probabilities for
349 | #' the transition "Progression -> Death"
350 | #' @return A list including the state occupancy simulations in an object 'm'.
351 | #' This is a tibble with the number of individuals in each of the 3 states
352 | #' at each of the times specified by the user. If \code{nsim}>1, then the tibble
353 | #' also contains a simulation index to keep track of that.
354 | #' @note Something will go here
355 | #' @author Gianluca Baio
356 | #' @seealso make.transition.probs make_data_multi_state
357 | #' @references Something will go here
358 | #' @keywords Transition probabilities Markov models Three-state cancer model
359 | #' @examples
360 | #' \dontrun{
361 | #' # Something will go here
362 | #' }
363 | #'
364 | #' @noRd
365 | make_state_occupancy=function(nsim,lambda_11,lambda_12,lambda_13,lambda_22,lambda_23,start) {
366 |
367 | m=list()
368 | # Initialises the lists
369 | m=lapply(1:nsim,function(i) {
370 | # Creates the tibbles (one per each of the nsim simulations)
371 | m[[i]]=tibble(profile=lambda_11$profile,time=lambda_11$time,`Pre-progressed`=NA,Progressed=NA,Death=NA)
372 | # Now adds in the relevant transition probabilities in the correct rows
373 | m[[i]]=m[[i]] %>% left_join(lambda_11 %>% select(profile,time,starts_with("lambda")[[i]]) %>% rename("lambda_11"=starts_with("lambda")), by=c("profile","time")) %>%
374 | left_join(lambda_12 %>% select(profile,time,starts_with("lambda")[[i]]) %>% rename("lambda_12"=starts_with("lambda")), by=c("profile","time")) %>%
375 | left_join(lambda_13 %>% select(profile,time,starts_with("lambda")[[i]]) %>% rename("lambda_13"=starts_with("lambda")), by=c("profile","time")) %>%
376 | left_join(lambda_22 %>% select(profile,time,starts_with("lambda")[[i]]) %>% rename("lambda_22"=starts_with("lambda")), by=c("profile","time")) %>%
377 | left_join(lambda_23 %>% select(profile,time,starts_with("lambda")[[i]]) %>% rename("lambda_23"=starts_with("lambda")), by=c("profile","time"))
378 | # Initialise the tibbles with the start values
379 | m[[i]]=m[[i]] %>% group_by(profile) %>% mutate(
380 | `Pre-progressed`=replace(`Pre-progressed`,row_number()==1,start[1]),
381 | Progressed=replace(Progressed,row_number()==1,start[2]),
382 | Death=replace(Death,row_number()==1,start[3]),
383 | ) %>% ungroup()
384 |
385 | # Re-compute the probabilities to *always* sum up to 1 (this gets rid of weird cases where two
386 | # probs sum to something bigger than 1, which generates negative values)
387 | m[[i]]=m[[i]] %>% mutate(check=(lambda_12+lambda_13)>1) %>% mutate(
388 | l_12=case_when(
389 | check==TRUE~lambda_12/(lambda_12+lambda_13),
390 | check==FALSE~lambda_12
391 | ),
392 | l_13=case_when(
393 | check==TRUE~lambda_13/(lambda_12+lambda_13),
394 | check==FALSE~lambda_13
395 | )
396 | ) %>% mutate(l_11=1-(l_12+l_13)) %>% mutate(lambda_11=l_11,lambda_12=l_12,lambda_13=l_13) %>%
397 | select(-c(l_11,l_12,l_13,check))
398 | })
399 |
400 | # Convert the list into a massive tibble with a simulation index
401 | m=m %>% bind_rows() %>% mutate(sim_idx=rep(1:nsim,each=nrow(m[[1]])))
402 |
403 | # Loops over times to compute the state occupancy
404 | for (j in 2:nrow(m)) {
405 | # This is a simple trick to restart the computation when the treatment indicator changes
406 | if (m$profile[j]==m$profile[j-1]) {
407 | m$`Pre-progressed`[j]=sum(c(m$`Pre-progressed`[j-1]*m$lambda_11[j]),na.rm=T)
408 | m$Progressed[j]=sum(c(m$`Pre-progressed`[j-1]*m$lambda_12[j], m$Progressed[j-1]*m$lambda_22[j]),na.rm=T)
409 | m$Death[j]=sum(c(m$`Pre-progressed`[j-1]*m$lambda_13[j], m$Progressed[j-1]*m$lambda_23[j], m$Death[j-1]),na.rm=T)
410 | }
411 | }
412 | m=m %>% select(profile,time,`Pre-progressed`,Progressed,Death,sim_idx,everything())
413 | # Need to set to 0 all negative values!
414 | m=m %>% mutate(
415 | `Pre-progressed`=if_else(`Pre-progressed`<0,0,`Pre-progressed`),
416 | Progressed=if_else(Progressed<0,0,Progressed),
417 | Death=if_else(Death<0,0,Death)
418 | )
419 |
420 | return(m)
421 | }
422 |
423 |
424 | #' Markov trace
425 | #'
426 | #' Plots the Markov Trace from an object generated using \code{three_state_mm}
427 | #'
428 | #' @aliases markov_trace
429 | #' @param mm The output of a call to \code{three_state_mm}
430 | #' @param interventions A vector of labels for the interventions
431 | #' @param ... additional arguments.
432 | #' @return Plot
433 | #' @note Something will go here
434 | #' @author Gianluca Baio
435 | #' @seealso make.surv, three_state_mm
436 | #' @references Something will go here
437 | #' @keywords Transition probabilities Markov models Markov trace
438 | #' @examples
439 | #' \dontrun{
440 | #' # Something will go here
441 | #' }
442 | #'
443 | #' @export markov_trace
444 | markov_trace <- function(mm, interventions=NULL,...) {
445 | # First reshape the data
446 | if(!is.null(interventions)) {
447 | # Figures out how many observations there are in each treatment & replaces the values passed
448 | # as arguments in 'interventions'
449 | mm$m <- mm$m %>%
450 | mutate(profile = rep(interventions,each=mm$m %>%
451 | count(profile) %>%
452 | slice(1) %>%
453 | pull(n)))
454 | }
455 |
456 | pl=mm$m |> group_by(profile,time) |>
457 | summarise(
458 | `Pre-progressed`=mean(`Pre-progressed`),Progressed=mean(Progressed),
459 | Death=mean(Death)
460 | ) |>
461 | ungroup()
462 | pl <- pl |>
463 | select(profile,time,`Pre-progressed`) %>%
464 | rename(npeople=`Pre-progressed`) %>%
465 | mutate(group="Pre-progressed") %>%
466 | bind_rows(pl %>% select(profile,time,`Progressed`) %>%
467 | rename(npeople=Progressed) %>%
468 | mutate(group="Progressed")) %>%
469 | bind_rows(pl %>% select(profile,time,Death) %>%
470 | rename(npeople=Death) %>%
471 | mutate(group="Death")) %>%
472 | # Create a numeric/factor group label to help manage the appearance of the graph
473 | mutate(
474 | grp_lab=as.factor(case_when(
475 | group=="Pre-progressed"~3,
476 | group=="Progressed"~2,
477 | TRUE~1
478 | ))
479 | ) %>%
480 | ggplot(aes(x=time, y=npeople, fill=grp_lab)) +
481 | geom_bar(position="stack",stat="identity") +
482 | labs(x="Cycle",y="Number of people",title="Markov trace",fill="State") +
483 | facet_wrap(~profile) +
484 | theme_bw() +
485 | # Add control to the legend label
486 | scale_fill_discrete(breaks=c(3,2,1),labels=c("Pre-progressed","Progressed","Death"))
487 |
488 | return(pl)
489 | }
490 |
491 |
492 | ### TO DO --- THIS NICELY COMPUTES THE MEAN SURVIVAL TIME OVER THE MM SIMULATIONS BUT CURRENTLY ONLY
493 | ### DOES IT FOR WHEN nsim=1!!!
494 | compute_mean_time=function(x,disc.rate=0) {
495 | mean_time=x %>% mutate(Alive=(1000-Death)/1000,disc=1/(1+disc.rate)^time) %>% group_by(profile) %>%
496 | mutate(dt=time-lag(time)) %>% summarise(mean_survival_time=sum(Alive*dt*disc,na.rm=T)) %>% ungroup()
497 |
498 | return(mean_time)
499 | }
500 |
501 |
--------------------------------------------------------------------------------
/R/model.fit.plot.R:
--------------------------------------------------------------------------------
1 | #' Graphical representation of the measures of model fitting based on
2 | #' Information Criteria
3 | #'
4 | #' Plots a summary of the model fit for all the models fitted
5 | #'
6 | #' Something will go here
7 | #'
8 | #' @param ... Optional inputs. Must include at least one \code{survHE} object.
9 | #' @param type should the AIC, the BIC or the DIC plotted? (values = \code{"aic"},
10 | #' \code{"bic"} or \code{"dic"})
11 | #' @param scale If \code{scale='absolute'} (default), then plot the absolute value
12 | #' of the *IC. If \code{scale='relative'} then plot a rescaled version taking
13 | #' the percentage increase in the *IC in comparison with the best-fitting model
14 | #' @param stacked Should the bars be stacked and grouped by survHE object? (default=F)
15 | #' @return A plot with the relevant model fitting statistics
16 | #' @author Gianluca Baio
17 | #' @seealso \code{fit.models}
18 | #' @template refs
19 | #' @keywords Model fitting Parametric survival models
20 | #' @examples
21 | #' \dontrun{
22 | #' data(bc)
23 | #'
24 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
25 | #' distr=c("exp","wei","lno"),method="mle")
26 | #' model.fit.plot(mle)
27 | #' }
28 | #'
29 | #' @export model.fit.plot
30 | model.fit.plot <- function(...,type="aic",scale="absolute",stacked=FALSE) {
31 | ## Plots a summary of the model fit for all the models
32 | ## Can also combine several survHE objects each containing the fit for one model
33 |
34 | exArgs=list(...)
35 |
36 | # Finds out whether there are objects with no name (if so, they will be 'survHE' objects!)
37 | # If there are any, then needs to rename them to make the rest of the function work
38 | if(length(names(exArgs))==0) {
39 | # This is the case where the only argument(s) is/are unnamed 'survHE' object(s)
40 | names(exArgs)=paste0("Object",1:length(exArgs))
41 | }
42 | if(length(which(names(exArgs)==""))>0){
43 | names(exArgs)[which(names(exArgs)=="")] = paste0("Object",1:length(which(names(exArgs)=="")))
44 | }
45 | # Extracts the 'survHE' objects from the list 'exArgs'. If there are none, then stop with an error message!
46 | w <- which(unlist(lapply(1:length(exArgs),function(i) class(exArgs[[i]])))=="survHE")
47 | if(length(w)==0){
48 | stop("Please give at least one 'survHE' object, generated by a call to 'fit.models(...)")
49 | } else {
50 | survHE_objs=lapply(1:length(w),function(i) exArgs[[w[i]]])
51 | }
52 | names(survHE_objs)=names(exArgs)[w]
53 |
54 | # What model should be used from the 'survHE' objects?
55 | if (!exists("mods",exArgs)) {
56 | mods=1:sum(unlist(lapply(survHE_objs,function(x) length(x$models))))
57 | } else {mods=exArgs$mods}
58 |
59 | # Maps the arguments for 'type' to relevant strings
60 | if(type %in% c("aic","AIC","a","A")){type="AIC"}
61 | if(type %in% c("bic","BIC","b","B")){type="BIC"}
62 | if(type %in% c("dic","DIC","d","D")){type="DIC"}
63 |
64 | # Creates the dataset with the model fitting statistics for all the selected models
65 | toplot=lapply(1:length(survHE_objs),function(x) survHE_objs[[x]]$model.fitting %>% bind_rows %>%
66 | mutate(object_name=as.factor(names(survHE_objs)[x]),
67 | model_name=names(survHE_objs[[x]]$models))) %>%
68 | bind_rows %>% mutate(lab=paste0(model_name,":",object_name)) %>% select(object_name,model_name,lab,everything()) %>%
69 | slice(mods)
70 |
71 | # Can make the bars stacked (looks actually nice...)
72 | if(stacked==TRUE) {
73 | if(type=="AIC") {
74 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$aic)))}
75 | mfp=ggplot(data=toplot,aes(x=model_name,y=aic,fill=object_name)) +
76 | geom_bar(stat="identity",position=position_dodge()) +
77 | geom_text(aes(x=model_name,y=aic,label=aic %>% round(digits=1.5)), hjust=1.05,
78 | color="white", size=5.5,position = position_dodge(0.9)) + coord_flip(ylim=yl)
79 | }
80 | if(type=="BIC") {
81 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$bic)))}
82 | mfp=ggplot(data=toplot,aes(x=model_name,y=bic,fill=object_name)) +
83 | geom_bar(stat="identity",position=position_dodge()) +
84 | geom_text(aes(x=model_name,y=bic,label=bic %>% round(digits=1.5)), hjust=1.05,
85 | color="white", size=5.5,position = position_dodge(0.9)) + coord_flip(ylim=yl)
86 | }
87 | if(type=="DIC") {
88 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$dic,na.rm=TRUE)))}
89 | mfp=ggplot(data=toplot,aes(x=model_name,y=dic,fill=object_name)) +
90 | geom_bar(stat="identity",position=position_dodge()) +
91 | geom_text(aes(x=model_name,y=dic,label=dic %>% round(digits=1.5)), hjust=1.05,
92 | color="white", size=5.5,position = position_dodge(0.9)) + coord_flip(ylim=yl)
93 | }
94 | mfp=mfp+
95 | theme_bw() +
96 | theme(axis.text.x = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5),
97 | axis.text.y = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5),
98 | axis.title.x = element_text(color="black",size=14,angle=0,hjust=.5,vjust=.5),
99 | axis.title.y = element_text(color="black",size=14,angle=90,hjust=.5,vjust=.5)) +
100 | theme(axis.line = element_line(colour = "black"),
101 | panel.background = element_blank(),
102 | panel.border = element_blank(),
103 | plot.title = element_text(size=18, face="bold")) +
104 | labs(y=toupper(type),x="",title=paste0("Model comparison based on ",toupper(type)),
105 | fill="survHE object") +
106 | scale_fill_brewer(palette="Paired") +
107 | theme(legend.position="bottom")
108 |
109 | # Optional arguments
110 | # Manual colours should be a vector with lenght equal to the number of objects
111 | if(exists("col",exArgs)){
112 | mfp=mfp+scale_fill_manual(values=exArgs$col)
113 | }
114 | if(exists("colour",exArgs)){
115 | mfp=mfp+scale_fill_manual(values=exArgs$colour)
116 | }
117 | if(exists("color",exArgs)){
118 | mfp=mfp+scale_fill_manual(values=exArgs$color)
119 | }
120 | # Can modify the title of the legend
121 | if(exists("name_legend",exArgs)){
122 | mfp=mfp+labs(fill=exArgs$name_legend)
123 | }
124 | # Can change the palette too
125 | # plot + scale_fill_brewer(palette=...) see 'help(scale_fill_brewer)' for possible options
126 | }
127 |
128 | if(stacked==FALSE) {
129 | # Can choose the colour with which to plot the bars. If nothing specified, then select default ('steelblue').
130 | # Otherwise, the user can specify a vector of colour with as many as there are bars. Can use interchangeably
131 | # the strings 'colour', 'color' or 'col'
132 | if(exists("colour",exArgs)){col=exArgs$colour} else {col="steelblue"}
133 | if(exists("color",exArgs)){col=exArgs$color} else {col="steelblue"}
134 | if(exists("col",exArgs)){col=exArgs$col} else {col="steelblue"}
135 |
136 | # Finally plots the bar-chart
137 | if(nlevels(toplot$object_name)==1){x=toplot$model_name} else {x=toplot$lab}
138 | mfp=ggplot(data=toplot)
139 | if(type=="AIC") {
140 | if(scale=="absolute" | scale=="abs") {
141 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$aic)))}
142 | mfp=mfp+geom_bar(mapping=aes(x=x,y=aic),stat="identity",fill=col) +
143 | geom_text(aes(x=x,y=aic,label=aic %>% round(digits=1.5)), hjust=1.05, color="white", size=5.5) +
144 | labs(y=toupper(type),x="",title=paste0("Model comparison based on ",toupper(type)),
145 | color=ifelse(length(mods)==1,"Model","Models")
146 | ) + coord_flip(ylim=yl)
147 | }
148 | if (scale=="rel" | scale=="relative") {
149 | mfp=mfp+geom_bar(mapping=aes(x=x,y=100*(aic-min(aic))/min(aic)),stat="identity",fill=col) +
150 | geom_text(aes(x=x,y=100*(aic-min(aic))/min(aic),label=(100*(aic-min(aic))/min(aic)) %>% round(digits=1.5)),
151 | hjust=-.05, color="black", size=5.5) +
152 | labs(y=paste0("Percentage increase in ",toupper(type)),x="",title=paste0("Model comparison based on ",toupper(type)),
153 | color=ifelse(length(mods)==1,"Model","Models")
154 | ) + coord_flip()
155 | }
156 | }
157 | if(type=="BIC") {
158 | if(scale=="absolute" | scale=="abs") {
159 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$bic)))}
160 | mfp=mfp+geom_bar(mapping=aes(x=x,y=bic),stat="identity",fill=col) +
161 | geom_text(aes(x=x,y=bic,label=bic %>% round(digits=1.5)), hjust=1.05, color="white", size=5.5) +
162 | labs(y=toupper(type),x="",title=paste0("Model comparison based on ",toupper(type)),
163 | color=ifelse(length(mods)==1,"Model","Models")
164 | ) + coord_flip(ylim=yl)
165 | }
166 | if(scale=="rel" | scale=="relative") {
167 | mfp=mfp+geom_bar(mapping=aes(x=x,y=100*(bic-min(bic))/min(bic)),stat="identity",fill=col) +
168 | geom_text(aes(x=x,y=100*(bic-min(bic))/min(bic),label=(100*(bic-min(bic))/min(bic)) %>% round(digits=1.5)),
169 | hjust=-.05, color="black", size=5.5) +
170 | labs(y=paste0("Percentage increase in ",toupper(type)),x="",title=paste0("Model comparison based on ",toupper(type)),
171 | color=ifelse(length(mods)==1,"Model","Models")
172 | ) + coord_flip()
173 | }
174 | }
175 | if(type=="DIC") {
176 | if(scale=="absolute" | scale=="abs") {
177 | if(exists("xlim",exArgs)){yl=exArgs$xlim} else {yl=range(pretty(range(toplot$dic,na.rm=TRUE)))}
178 | mfp=mfp+geom_bar(mapping=aes(x=x,y=dic),stat="identity",fill=col) +
179 | geom_text(aes(x=x,y=dic,label=dic %>% round(digits=1.5)), hjust=1.05, color="white", size=5.5) +
180 | labs(y=toupper(type),x="",title=paste0("Model comparison based on ",toupper(type)),
181 | color=ifelse(length(mods)==1,"Model","Models")
182 | ) + coord_flip(ylim=yl)
183 | }
184 | if(scale=="rel" | scale=="relative") {
185 | mfp=mfp+geom_bar(mapping=aes(x=x,y=100*(dic-min(dic))/min(dic)),stat="identity",fill=col) +
186 | geom_text(aes(x=x,y=100*(dic-min(dic))/min(dic),label=(100*(dic-min(dic))/min(dic)) %>% round(digits=1.5)),
187 | hjust=-.05, color="black", size=5.5) +
188 | labs(y=paste0("Percentage increase in ",toupper(type)),x="",title=paste0("Model comparison based on ",toupper(type)),
189 | color=ifelse(length(mods)==1,"Model","Models")
190 | ) + coord_flip()
191 | }
192 | }
193 | mfp=mfp +
194 | theme_bw() +
195 | theme(axis.text.x = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5),
196 | axis.text.y = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5),
197 | axis.title.x = element_text(color="black",size=14,angle=0,hjust=.5,vjust=.5),
198 | axis.title.y = element_text(color="black",size=14,angle=90,hjust=.5,vjust=.5)) +
199 | theme(axis.line = element_line(colour = "black"),
200 | panel.background = element_blank(),
201 | panel.border = element_blank(),
202 | plot.title = element_text(size=18, face="bold"))
203 |
204 | # Optional arguments
205 | if(exists("main",exArgs)){
206 | mfp=mfp+labs(title=exArgs$main)
207 | }
208 | if(exists("models",exArgs)){
209 | mfp=mfp+scale_x_discrete(labels=exArgs$models)
210 | }
211 | }
212 |
213 | # Renders the graph
214 | mfp
215 | }
--------------------------------------------------------------------------------
/R/plot.survHE.R:
--------------------------------------------------------------------------------
1 | #' Plot survival curves for the models fitted using \code{fit.models}
2 | #'
3 | #' Plots the results of model fit.
4 | #'
5 | #' @param ... Must include at least one result object saved as
6 | #' the call to the \code{fit.models} function. Nay include other
7 | #' optional parameters. These include whether the KM curve should be
8 | #' added \code{add.km} and whether the user specifies a profile of covariates
9 | #' (in the list \code{newdata}). Other possibilities are additional
10 | #' (mainly graphical) options. These are:
11 | #' \itemize{
12 | #' \item \code{xlab} = a string with the label for the x-axis (default = "time")
13 | #' \item \code{ylab} = a string with the label for the y-axis (default = "Survival")
14 | #' \item \code{lab.profile} = a (vector of) string(s) indicating the labels associated with the strata defining the different
15 | #' survival curves to plot. Default to the value used by the Kaplan Meier
16 | #' estimate given in \code{fit.models}.
17 | #' \item \code{newdata} = a list (of lists) providing the values for the relevant covariates If NULL, then will use
18 | #' the mean values for the covariates if at least one is a continuous variable,
19 | #' or the combination of the categorical covariates.
20 | #' \item \code{xlim} = a vector determining the limits for the x-axis
21 | #' \item \code{colors} = a vector of characters defining the colours in which to plot the different survival curves
22 | #' \item \code{what} = a string indicating whether the survival, hazard or
23 | #' cumulative hazard curve should be plotted. Defaults to 'survival', but the
24 | #' other two options can be specified as 'hazard' or 'cumhazard'
25 | #' \item \code{lab.profile} = a vector of characters defining the names of the models fitted
26 | #' \item \code{add.km} = TRUE (whether to also add the Kaplan Meier estimates of the data)
27 | #' \item \code{annotate} = FALSE (whether to also add text to highlight the observed vs
28 | #' extrapolated data)
29 | #' \item \code{legend.position} = a vector of proportions to place the legend. Default
30 | #' to 'c(.75,.9)', which means 75% across the x-axis and 90% across the y-axis
31 | #' \item \code{legend.title} = suitable instructions to format the title of the legend;
32 | #' defaults to 'element_text(size=15,face="bold")' but there may be other
33 | #' arguments that can be added (using 'ggplot' facilities)
34 | #' \item \code{legend.text} = suitable instructions to format the text of the legend;
35 | #' defaults to 'element_text(colour="black", size=14, face="plain")' but there
36 | #' may be other arguments that can be added (using 'ggplot' facilities)
37 | #' }
38 | #' @author Gianluca Baio
39 | #' @seealso \code{\link{fit.models}}, \code{\link{write.surv}}
40 | #' @template refs
41 | #' @keywords Parametric survival models
42 | #' @examples
43 | #' \dontrun{
44 | #' data(bc)
45 | #'
46 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
47 | #' distr="exp",method="mle")
48 | #' inla = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
49 | #' distr="exp",method="inla")
50 | #' plot(MLE=mle,INLA=inla)
51 | #' }
52 | #'
53 | #' @export plot.survHE
54 | plot.survHE <- function(...) {
55 |
56 | # Collects all the extra arguments
57 | exArgs <- list(...)
58 |
59 | # Finds out whether there are objects with no name (if so, they will be 'survHE' objects!)
60 | # If there are any, then needs to rename them to make the rest of the function work
61 | if(length(names(exArgs))==0) {
62 | # This is the case where the only argument(s) is/are unnamed 'survHE' object(s)
63 | names(exArgs) <- paste0("Object",1:length(exArgs))
64 | }
65 | if(length(which(names(exArgs)==""))>0){
66 | names(exArgs)[which(names(exArgs)=="")] <- paste0("Object",1:length(which(names(exArgs)=="")))
67 | }
68 |
69 | # The default is to go with the 'ggplot' version of the graph.
70 | if (exists("graph",exArgs)) {
71 | graph <- exArgs$graph
72 | } else {
73 | graph <- "ggplot"}
74 |
75 | # If so, then call the function 'plot_ggplot_survHE
76 | if(graph=="ggplot") {
77 | return(plot_ggplot_survHE(exArgs))
78 | }
79 |
80 | # If the user selects 'base' (only for back-compatibility), then runs the old code
81 | ### NB: Do I want this? (probably not...)
82 | if(graph=="base") {
83 | do.call(plot_base_survHE,exArgs)
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/R/plot_tranformed_km.R:
--------------------------------------------------------------------------------
1 |
2 | #' Plot to assess suitability of parametric model
3 | #'
4 | #' Perform an exploratory investigation for linearity of
5 | #' transformed survival models.
6 | #'
7 | #' For the Weibull, twice taking logs of the survivor function
8 | #'
9 | #' \deqn{log(-log S(t)) = log \lambda + \gamma log t}
10 | #'
11 | #' A plot of \eqn{log(-log S(t))} against \eqn{log(t)} would give an approximately
12 | #' straight line if the Weibull assumption is reasonable.
13 | #' The plot could also be used to give a rough estimate of the parameters.
14 | #'
15 | #' Similarly, for the log-logistic distribution
16 | #'
17 | #' \deqn{logS(t)/(1 - S(t)) = \theta - \kappa log t}
18 | #'
19 | #' For the log-normal distribution
20 | #'
21 | #' \deqn{\Phi^{-1} (1 - S(t)) = (log t - \mu) / \sigma}
22 | #'
23 | #' We can also check the assumption made with using the Cox regression model
24 | #' of proportional hazards by inspecting the log-cumulative hazard plot.
25 | #'
26 | #' \deqn{log H_i(t) = \beta x_i + log H_0(t)}
27 | #'
28 | #' The transformed curves for different values of the explanatory variables
29 | #' will be parallel if PH holds.
30 | #'
31 | #' @param fit An object of class survHE.
32 | #' @param mod Index or name of a model in fit. Defaults to 1.
33 | #' @param add_legend If \code{TRUE}, labels assumptions. Defaults to \code{FALSE}.
34 | #' @param graph Type of plot: base or ggplot2.
35 | #' @param \dots Further arguments, passed on to plot.
36 | #' @return Diagnostic plot
37 | #' @author William Browne, Nathan Green
38 | #' @references Collett (2015) Modelling Survival Data in Medical Research, CRC Press
39 | #' @keywords survival hplot
40 | #' @export
41 | #' @examples
42 | #'
43 | #' data(bc)
44 | #' form <- formula("Surv(recyrs, censrec) ~ group")
45 | #'
46 | #' # exponential distribution
47 | #' fit_exp <- fit.models(form, data = bc,
48 | #' distr = "exp", method = "mle")
49 | #' plot_transformed_km(fit_exp)
50 | #' plot_transformed_km(fit_exp, graph = "ggplot2")
51 | #'
52 | #' # weibull distribution
53 | #' fit_wei <- fit.models(form, data = bc,
54 | #' distr = "weibull", method = "mle")
55 | #' plot_transformed_km(fit_wei)
56 | #' plot_transformed_km(fit_wei, graph = "ggplot2")
57 | #'
58 | #' # loglogistic distribution
59 | #' fit_llog <- fit.models(form, data = bc,
60 | #' distr = "loglogistic", method = "mle")
61 | #' plot_transformed_km(fit_llog)
62 | #' plot_transformed_km(fit_llog, graph = "ggplot2")
63 | #'
64 | #' # lognormal distribution
65 | #' fit_lnorm <- fit.models(form, data = bc,
66 | #' distr = "lognormal", method = "mle")
67 | #' plot_transformed_km(fit_lnorm)
68 | #' plot_transformed_km(fit_lnorm, graph = "ggplot2")
69 | #'
70 | #' ## for only one group
71 | #' form <- formula("Surv(recyrs, censrec) ~ 1")
72 | #'
73 | #' fit_exp <- fit.models(form, data = bc,
74 | #' distr = "exp", method = "mle")
75 | #' plot_transformed_km(fit_exp)
76 | #' plot_transformed_km(fit_exp, graph = "ggplot2")
77 | #'
78 | plot_transformed_km <- function(fit, mod = 1, add_legend = FALSE,
79 | graph = "base", ...) {
80 |
81 | dots <- list(...)
82 |
83 | graph <- match.arg(graph, c("base", "ggplot2"))
84 |
85 | if (length(mod) != 1)
86 | stop("Please provide at most one model index.")
87 |
88 | if (is.numeric(mod) && !mod <= length(fit$models))
89 | stop("More model names provided than available in list of model fits provided.")
90 |
91 | if (is.character(mod) && !mod %in% names(fit$models))
92 | stop("Model name not available in list of model fits provided.")
93 |
94 | dist <- get_distribution(fit, mod)
95 |
96 | distn_names <- list(
97 | "exp" = c("exp", "exponential"),
98 | "weibull" = c("weibull", "weibull.quiet", "weibullaf", "weibullph"),
99 | "loglogistic" = c("llogis", "loglogistic"),
100 | "lognormal" = c("lognormal", "lnorm"),
101 | "gompertz" = "gompertz")
102 |
103 | if (!dist %in% unname(unlist(distn_names)))
104 | stop("Distribution not available.")
105 |
106 | fit_km <- fit$misc$km
107 |
108 | n_strata <- length(fit_km$strata)
109 |
110 | if (n_strata == 0 || n_strata == 1) {
111 | fit_km$strata <- c("group" = length(fit_km$time))
112 | }
113 |
114 | model_strata <- rep(x = names(fit_km$strata),
115 | times = fit_km$strata)
116 |
117 | times <- split(fit_km$time, model_strata)
118 | survs <- split(fit_km$surv, model_strata)
119 |
120 | params <- list()
121 |
122 | if (dist %in% distn_names[["exp"]]) {
123 | params <- list(
124 | FUN = "lines",
125 | xlab = "time",
126 | ylab = "-log(S(t))",
127 | main = "Exponential distributional assumption",
128 | x = times,
129 | y = lapply(survs, function(x) -log(x)),
130 | lty = 1:n_strata,
131 | col = 1:n_strata,
132 | type = "l")
133 | }
134 |
135 | if (dist %in% distn_names[["weibull"]]) {
136 | params <- list(
137 | FUN = "lines",
138 | xlab = "log(time)",
139 | ylab = "log(-log(S(t))) i.e. log cumulative hazard",
140 | main = "Weibull distributional assumption",
141 | x = lapply(times, log),
142 | y = lapply(survs, function(x) log(-log(x))),
143 | lty = 1:n_strata,
144 | col = 1:n_strata,
145 | type = "l")
146 | }
147 |
148 | if (dist %in% distn_names[["loglogistic"]]) {
149 | params <- list(
150 | FUN = "lines",
151 | xlab = "time",
152 | ylab = "log(S(t)/(1-S(t)))",
153 | main = "log-Logistic distributional assumption",
154 | x = lapply(times, log),
155 | y = lapply(survs, function(x) log(x/(1 - x))),
156 | lty = 1:n_strata,
157 | col = 1:n_strata,
158 | type = "l")
159 | }
160 |
161 | if (dist %in% distn_names[["lognormal"]]) {
162 | params <- list(
163 | FUN = "lines",
164 | xlab = "log(time)",
165 | ylab = expression(Phi^-1 ~ (1 - S(t))),
166 | main = "Log-normal distributional assumption",
167 | x = lapply(times, log),
168 | y = lapply(survs, function(x) qnorm(1 - x)),
169 | lty = 1:n_strata,
170 | col = 1:n_strata,
171 | type = "l")
172 | }
173 |
174 | if (dist %in% distn_names[["gompertz"]]) {
175 | stop("Gompertz not yet implemented.")
176 |
177 | # estimate.h <- function(s, t) {
178 | # denom <- t - c(t[-1], max(t) + 1)
179 | # numerator <- log(s) - log(c(s[-1], 0))
180 | # -numerator/denom
181 | # }
182 |
183 | # params <- list(
184 | # x = lapply(times, log),
185 | # y = estimate.h(survs, times),
186 | # xlab = "log(time)",
187 | # ylab = "h(t)",
188 | # main = "Gompertz distributional assumption",
189 | # lty = 1:n_strata,
190 | # col = 1:n_strata,
191 | # type = "l")
192 | }
193 |
194 | default_pars <- list(
195 | x = NULL,
196 | type = "n",
197 | axes = FALSE,
198 | xlab = params$xlab,
199 | ylab = params$ylab,
200 | main = params$main,
201 | xlim = range(pretty(unlist(params$x))),
202 | ylim = range(pretty(unlist(params$y))))
203 |
204 | setup_pars <- modifyList(
205 | default_pars, dots[names(default_pars)])
206 |
207 | if (graph == "base") {
208 |
209 | # empty plot
210 | do.call(plot, setup_pars)
211 |
212 | axis(1); axis(2)
213 |
214 | # plot lines
215 | do.call(mapply, modifyList(params, dots))
216 |
217 | if (isTRUE(add_legend)) {
218 | legend("topright", names(survs), col = params$col,
219 | lty = params$lty, bty = "n")
220 | }
221 | }
222 |
223 | if (graph == "ggplot2") {
224 |
225 | if (!add_legend) {
226 | pos.legend <- "none"
227 | } else {
228 | pos.legend <- "right"}
229 |
230 | ggdata <-
231 | data.frame(time = unlist(params$x),
232 | y = unlist(params$y)) |>
233 | tibble::rownames_to_column("Group") |>
234 | mutate(Group = gsub("\\d+", "", Group))
235 |
236 | p <-
237 | ggplot(ggdata, aes(x = .data$time, y = .data$y,
238 | group = .data$Group, col = .data$Group)) +
239 | geom_line() +
240 | do.call(labs,
241 | list(title = setup_pars$main,
242 | x = setup_pars$xlab,
243 | y = setup_pars$ylab)) +
244 | theme_bw() +
245 | theme(legend.position = pos.legend)
246 |
247 | print(p)
248 | }
249 |
250 | invisible(params)
251 | }
252 |
253 |
254 | #'
255 | get_distribution <- function(fit, mod) {
256 | m <- fit$models[[mod]]
257 | tolower(ifelse(fit$method == "hmc", m@model_name, m$dlist$name))
258 | }
259 |
260 |
--------------------------------------------------------------------------------
/R/print.survHE.R:
--------------------------------------------------------------------------------
1 | #' Print a summary of the survival model(s) fitted by \code{fit.models}
2 | #'
3 | #' Prints the summary table for the model(s) fitted, with the estimate of the
4 | #' parameters
5 | #'
6 | #'
7 | #' @param x the \code{survHE} object (the output of the call to
8 | #' \code{fit.models})
9 | #' @param mod is the index of the model. Default value is 1, but the user can
10 | #' choose which model fit to visualise, if the call to fit.models has a vector
11 | #' argument for distr (so many models are fitted & stored in the same object)
12 | #' @param \dots additional options, including: \code{digits} = number of
13 | #' significant digits to be shown in the summary table (default = 6)
14 | #' \code{original} = a flag to say whether the *original* table
15 | #' from either \code{flexsurv} or \code{INLA} or \code{rstan} should be printed
16 | #' @author Gianluca Baio
17 | #' @template refs
18 | #' @keywords Parametric survival models
19 | #' @examples
20 | #' \dontrun{
21 | #' data(bc)
22 | #'
23 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
24 | #' distr="exp",method="mle")
25 | #' print(mle)
26 | #' }
27 | #'
28 | #' @export print.survHE
29 | print.survHE <- function(x,mod=1,...) {
30 | # Creates a print method for the objects in the class survHE
31 | # x is the survHE object (the output of the call to fit.models)
32 | # mod is the index of the model. Default value is 1, but the user can choose which model fit to visualise,
33 | # if the call to fit.models has a vector argument for distr (so many models are fitted & stored in the same object)
34 | # ... optional arguments
35 | # digits = number of *significant* digits to be shown in the summary table (default = 6)
36 | # original = a flag to say whether the *original* table from either INLA or MCMC should be printed
37 |
38 | exArgs <- list(...)
39 |
40 | # Loads available models
41 | availables <- load_availables()
42 |
43 | # Can select the number of digits to be printed in the output table
44 | if(!exists("digits",where=exArgs)){digits=6} else {digits=exArgs$digits}
45 | if(!exists("original",where=exArgs)){original=FALSE} else {original=exArgs$original}
46 | # Aliases for 'original'
47 | if(exists("orig",exArgs)){original=exArgs$orig}
48 |
49 | # Now computes the stats, using different helpers depending on the underlying method
50 | # Can ask for the original output from either 'flexsurv', 'inla' or 'rstan'
51 | if(original==TRUE) {
52 | do.call(
53 | paste0("original_table_",x$method),
54 | args=list(x,mod,digits)
55 | )
56 | } # If not, go with the default formatting using the standardised 'survHE' output
57 | else {
58 | # First make the results table using the helper functions
59 | res=do.call(
60 | paste0("get_stats_",x$method),
61 | args=list(x,mod)
62 | )
63 | # Now formats the table
64 | format_table(x,mod,res,digits)
65 | }
66 | }
67 |
68 |
--------------------------------------------------------------------------------
/R/psa.plot.R:
--------------------------------------------------------------------------------
1 | #' Graphical depiction of the probabilistic sensitivity analysis for the
2 | #' survival curves
3 | #'
4 | #' Plots the survival curves for all the PSA simulations. The function is
5 | #' actually deprecated - similar graphs can be obtained directly using
6 | #' the \code{plot} method (with options), which allows a finer depiction
7 | #' of the results.
8 | #'
9 | #' @param psa the result of the call to the function \code{make.surv}
10 | #' @param ... Optional graphical parameters, such as:
11 | #' \itemize{
12 | #' \item \code{xlab} = label for the x-axis
13 | #' \item \code{ylab} = label for the y-axis
14 | #' \item \code{col} = (vector) of colours for the lines to be plotted
15 | #' \item \code{alpha} = the level of transparency for the curves (default = 0.2)
16 | #' }
17 | #'
18 | #' @author Gianluca Baio
19 | #' @template refs
20 | #' @seealso \code{\link{make.surv}}, \code{\link{write.surv}}
21 | #' @keywords Survival models Bootstrap Probabilistic sensitivity analysis
22 | #'
23 | #' @examples
24 | #' \dontrun{
25 | #' data(bc)
26 | #'
27 | #' # Fits the same model using the 3 inference methods
28 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
29 | #' distr="exp",method="mle")
30 | #' p.mle = make.surv(mle,nsim=100)
31 | #' psa.plot(p.mle)
32 | #' }
33 | #'
34 | #' @export psa.plot
35 | psa.plot <- function(psa,...) {
36 | # Plots the survival curves for all the PSA simulations
37 | # psa = the result of the call to the function make.surv
38 | # ... = additional arguments
39 | # xlab = label for the x-axis
40 | # ylab = label for the y-axis
41 | # col = vector of colours with which to plot the curves
42 | # alpha = parameter to determine the transparency (default = 0.2)
43 | # main = a string to write the title
44 | # labs = a vector with non-standard names for the legend values
45 | # name_labs = the non-standard title for the legend
46 | # xlim = a vector of limits for the times
47 |
48 | exArgs <- list(...)
49 |
50 | # Creates the dataset to plot with the survival curves for all profiles
51 | strata <- lapply(1:nrow(psa$des.mat), function(x) {
52 | psa$des.mat %>%
53 | as_tibble() %>%
54 | select(!contains("(Intercept)")) %>%
55 | slice(x) %>%
56 | round(digits=2) %>%
57 | mutate(strata=paste0(names(.),"=",.,collapse=","))}) %>%
58 | bind_rows(.) %>%
59 | select(strata)
60 |
61 | toplot <- lapply(1:length(psa$S),function(i) {
62 | psa$S[[i]] %>% bind_cols(strata=as.factor(as.character(strata[i,])))
63 | }) %>% bind_rows(.)
64 |
65 | if(exists("alpha",where=exArgs)){alpha=exArgs$alpha} else {alpha=0.2}
66 | if(exists("name_labs",where=exArgs)){name_labs=exArgs$name_labs} else {name_labs="Profile"}
67 |
68 | psa.plot <- ggplot(data=toplot,aes(x=time, y=S, colour=strata)) +
69 | geom_line(linewidth=.9) +
70 | theme_bw() +
71 | theme(axis.text.x = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5),
72 | axis.text.y = element_text(color="black",size=12,angle=0,hjust=.5,vjust=.5),
73 | axis.title.x = element_text(color="black",size=14,angle=0,hjust=.5,vjust=.5),
74 | axis.title.y = element_text(color="black",size=14,angle=90,hjust=.5,vjust=.5)) +
75 | theme(axis.line = element_line(colour = "black"),
76 | #panel.grid.major = element_blank(),
77 | #panel.grid.minor = element_blank(),
78 | #panel.border = element_blank(),
79 | panel.background = element_blank(),
80 | panel.border = element_blank(),
81 | plot.title = element_text(size=18, face="bold")) +
82 | theme(legend.position="inside", legend.position.inside=c(.75,.9),
83 | legend.title=element_text(size=15,face="bold"),
84 | #legend.title = element_blank(),
85 | legend.text = element_text(colour="black", size=14, face="plain"),
86 | legend.background=element_blank()) +
87 | labs(y="Survival",x="Time",title=NULL, color=name_labs)
88 |
89 | # If there are more than 1 simulation, then there also are the low and upp extremes and plots them too
90 | if(any(grepl("low",names(toplot)))) {
91 | psa.plot <- psa.plot +
92 | geom_ribbon(data=toplot,
93 | aes(x=time, y=S, ymin=low,ymax=upp,fill=strata),
94 | alpha=alpha,show.legend=FALSE)
95 | }
96 |
97 | # Optional arguments
98 | if(exists("col",where=exArgs)) {
99 | psa.plot <- psa.plot + scale_color_manual(values=exArgs$col) +
100 | scale_fill_manual(values=exArgs$col)
101 | }
102 | if(exists("xlab",where=exArgs)){
103 | psa.plot <- psa.plot + labs(x=exArgs$xlab)
104 | }
105 | if(exists("ylab",where=exArgs)){
106 | psa.plot <- psa.plot + labs(y=exArgs$ylab)
107 | }
108 | if(exists("main",where=exArgs)) {
109 | psa.plot <- psa.plot + labs(title=exArgs$main) +
110 | theme(plot.title = element_text(size=18,face="bold"))
111 | }
112 | if(exists("labs",where=exArgs)) {
113 | psa.plot <- psa.plot + scale_color_discrete(labels=exArgs$labs)
114 | }
115 | if(exists("xlim",where=exArgs)){
116 | psa.plot <- psa.plot + xlim(exArgs$xlim)
117 | }
118 |
119 | psa.plot
120 | }
121 |
--------------------------------------------------------------------------------
/R/summary.survHE.R:
--------------------------------------------------------------------------------
1 | #' Prints a summary table for the distribution the mean survival time for a
2 | #' given model and data
3 | #'
4 | #' Calculates the mean survival time as the area under the survival curve
5 | #'
6 | #' A list comprising of the following elements
7 | #'
8 | #' @param object a \code{survHE} object (resulting from the call to
9 | #' \code{fit.models}
10 | #' @param mod the model to be analysed (default = 1)
11 | #' @param t the vector of times to be used in the computation. Default = NULL,
12 | #' which means the observed times will be used. NB: the vector of times should
13 | #' be: i) long enough so that S(t) goes to 0; and ii) dense enough so that the
14 | #' approximation to the AUC is sufficiently precise
15 | #' @param nsim the number of simulations from the survival curve distributions
16 | #' to be used (to compute interval estimates)
17 | #' @param \dots Additional options
18 | #' @return \item{mean.surv}{ A matrix with the simulated values for the mean
19 | #' survival times } \item{tab}{ A summary table }
20 | #' @author Gianluca Baio
21 | #' @seealso \code{fit.models}, \code{make.surv}
22 | #' @template refs
23 | #' @keywords Parametric survival models Mean survival time
24 | #' @examples
25 | #' \dontrun{
26 | #' data(bc)
27 | #'
28 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
29 | #' distr="exp",method="mle")
30 | #' summary(mle,nsim=100)
31 | #' }
32 | #'
33 | #' @export summary.survHE
34 | summary.survHE <- function(object,mod=1,t=NULL,nsim=1000,...) {
35 | # Computes the estimated mean survival as the area under the survival curve
36 | # This is obtained using the trapezoidal method by taking the average of the "left" and "right" y-values.
37 | # object: is the output from a fit.models call
38 | # mod: the model to be analysed (default = 1)
39 | # t: the vector of times to be used in the computation. Default = NULL, which means the observed times will be used.
40 | # NB: the vector of times should be: i) long enough so that S(t) goes to 0; and ii) dense enough so that
41 | # the approximation to the AUC is sufficiently precise
42 | # nsim: number of simulations from the survival curve distributions to be used (to compute interval estimates)
43 | # stats: a logical value. If TRUE, also shows a table
44 | # ...: optional arguments
45 | # newdata = a list (of lists), specifiying the values of the covariates at which the computation is performed. For example
46 | # 'list(list(arm=0),list(arm=1))' will create two survival curves, one obtained by setting the covariate 'arm'
47 | # to the value 0 and the other by setting it to the value 1. In line with 'flexsurv' notation, the user needs
48 | # to either specify the value for *all* the covariates or for none (in which case, 'newdata=NULL', which is the
49 | # default). If some value is specified and at least one of the covariates is continuous, then a single survival
50 | # curve will be computed in correspondence of the average values of all the covariates (including the factors,
51 | # which in this case are expanded into indicators). The order of the variables in the list *must* be the same
52 | # as in the formula used for the model
53 | # labs: a vector of strings giving the names of the "profile" of covariates for which the mean survival times are computed
54 | #
55 | # NB: NEED TO FIX THIS FOR THE POLY-WEIBULL
56 | #
57 | # Defines the utility function to compute the stats table
58 | make.stats <- function(x, dim = 2) {
59 | bugs.stats <- function(x) {
60 | c(mean(x), sd(x), quantile(x, 0.025), median(x), quantile(x, 0.975))
61 | }
62 | if (is.null(dim(x)) == TRUE) {
63 | tab <- bugs.stats(x)
64 | names(tab) <- c("mean", "sd", "2.5%", "median", "97.5%")
65 | }
66 | if (is.null(dim(x)) == FALSE) {
67 | tab <- t(apply(x, dim, function(x) bugs.stats(x)))
68 | colnames(tab) <- c("mean", "sd", "2.5%", "median", "97.5%")
69 | }
70 | return(tab)
71 | }
72 |
73 | exArgs <- list(...)
74 | if (!exists("newdata",where=exArgs)) {newdata <- NULL} else {newdata <- exArgs$newdata}
75 | if (!exists("labs",where=exArgs)) {labs <- NULL} else {labs <- exArgs$labs}
76 | if(is.null(t)) {
77 | if(object$misc$model_name[mod]=="pow") {
78 | t <- sort(unique(object$misc$km[[mod]]$time))
79 | } else {
80 | t <- sort(unique(object$misc$km$time))
81 | }
82 | }
83 |
84 | psa <- make.surv(object,mod=mod,t=t,nsim=nsim,newdata=newdata)
85 |
86 | rlabs <- rownames(psa$des.mat)
87 | if (!is.null(rlabs)) {
88 | rlabs <- gsub("^1,","",rlabs)
89 | } else {
90 | rlabs <- rep("",length(psa$sim))
91 | }
92 | if(!is.null(labs) & length(labs)==length(rlabs)) {rlabs <- labs}
93 |
94 | mean.surv=matrix(unlist(
95 | lapply(psa$mat,function(i) {
96 | lapply(1:psa$nsim,function(j) {
97 | xvar=i$time
98 | yvar=i[,(j+1)]
99 | sum(diff(xvar) * (head(yvar,-1)+tail(yvar,-1)), na.rm=TRUE)/2
100 | })
101 | })
102 | ),nrow=psa$nsim,byrow=FALSE)
103 |
104 | if (ncol(mean.surv)==length(names(object$misc$km$strata))) {
105 | colnames(mean.surv) <- names(object$misc$km$strata)
106 | }
107 |
108 | tab <- NULL
109 | if(psa$nsim>1) {
110 | tab <- make.stats(mean.surv)
111 | rownames(tab) <- rlabs
112 | if(!is.null(names(object$misc$km$strata))) {
113 | if (ncol(mean.surv)==length(names(object$misc$km$strata))) {
114 | rownames(tab) <- names(object$misc$km$strata)
115 | } else {
116 | rownames(tab) <- rlabs
117 | }
118 | } else {
119 | rownames(tab) <- rlabs
120 | }
121 | cat("\nEstimated average survival time distribution* \n")
122 | print(tab)
123 | cat(paste0("\n*Computed over the range: [",
124 | paste(format(range(t),digits=4,nsmall=3),collapse="-"),"] using ",psa$nsim,
125 | " simulations.\nNB: Check that the survival curves tend to 0 over this range!\n"))
126 | }
127 |
128 | invisible(
129 | list(mean.surv=mean.surv,
130 | tab=tab))
131 | }
132 |
--------------------------------------------------------------------------------
/R/survHE-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 |
4 | ## usethis namespace: start
5 | ## usethis namespace: end
6 | NULL
7 |
--------------------------------------------------------------------------------
/R/utils_fit_models.R:
--------------------------------------------------------------------------------
1 | #' Helper function to run the survival models using MLE and flexsurv
2 | #'
3 | #' @param x a string containing the name of the model
4 | #' to be fitted
5 | #' #' @param exArgs a list of extra arguments passed from the main 'fit.models'
6 | #' function
7 | #' @note Something will go here
8 | #' @author Gianluca Baio
9 | #' @seealso fit.models
10 | #' @references Baio (2020). survHE
11 | #' @keywords Parametric survival models Maximum likelihood estimation
12 | #' @noRd
13 | runMLE <- function(x,exArgs) {
14 | ##### PROBABLY CAN REMOVE THIS ######
15 | # Checks that 'flexsurv' is loaded up. NB: ***Probably*** not needed, as 'flexsurv' is a primary dependency???
16 | #if(!isTRUE(requireNamespace("flexsurv",quietly=TRUE))) {
17 | # stop("You need to install the R package 'flexsurv'. Please run in your R terminal:\n install.packages('flexsurv')")
18 | #}
19 | # Loads the model formula & data
20 | formula <- exArgs$formula
21 | data=exArgs$data
22 |
23 | # Loads in the available models in each method
24 | availables <- load_availables()
25 | # Uses the helper 'manipulated_distributions' to create the vectors distr, distr3 and labs
26 | d3 <- manipulate_distributions(x)$distr3
27 | x <- manipulate_distributions(x)$distr
28 |
29 | tic <- proc.time()
30 | # If user selects RPS model, then could also provide some optional arguments - uses flexsurv defaults
31 | if(x=="survspline") {
32 | if(exists("bhazard",where=exArgs)) {bhazard <- exArgs$bhazard} else {bhazard <-NULL}
33 | if(exists("weights",where=exArgs)) {weights <- exArgs$weights} else {weights <- NULL}
34 | if(exists("subset",where=exArgs)) {subset <- exArgs$subset} else {subset <- NULL}
35 | if(exists("knots",where=exArgs)) {knots <- exArgs$knots} else {knots <- NULL}
36 | if(exists("k",where=exArgs)) {k <- exArgs$k} else {k <- 0}
37 | if(exists("bknots",where=exArgs)) {bknots <- exArgs$bknots} else {bknots <- NULL}
38 | if(exists("scale",where=exArgs)) {scale <- exArgs$scale} else {scale <- "hazard"}
39 | if(exists("timescale",where=exArgs)) {timescale <- exArgs$scale} else {timescale <- "log"}
40 | model <- do.call(
41 | flexsurv::flexsurvspline,
42 | args=list(
43 | formula=formula,
44 | data=quote(data),
45 | k=k,
46 | knots=knots,
47 | bknots=bknots,
48 | scale=scale,
49 | timescale=timescale,
50 | weights=weights,
51 | subset=subset
52 | )
53 | )
54 | ###model <- flexsurv::flexsurvspline(formula=formula,data=data,k=k,knots=knots,bknots=bknots,scale=scale,
55 | ### timescale=timescale)
56 | if(!is.null(weights)) {model$call$weights=exArgs$call$weights} else {model$call$weights=NULL}
57 | if(!is.null(subset)) {model$call$subset=exArgs$call$subset} else {model$call$subset=NULL}
58 | } else {
59 | # If it's one of the other available models under MLE, then simply runs flexsurv::flexsurvreg
60 | # But allows to use weight and subset
61 | if(exists("weights",where=exArgs)) {weights <- exArgs$weights} else {weights=NULL}
62 | if(exists("subset",where=exArgs)) {subset <- exArgs$subset} else {subset <- NULL}
63 |
64 | ###model <- flexsurv::flexsurvreg(formula=formula,data=data,dist=x,weights=weights)
65 | model <- do.call("flexsurvreg",args=list(
66 | formula=formula,
67 | data=quote(data),
68 | dist=x,
69 | weights=weights,
70 | subset=subset
71 | ))
72 | # Fix the 'call' by using parts of the original call to fit.models
73 | if(!is.null(weights)) {model$call$weights=exArgs$call$weights} else {model$call$weights=NULL}
74 | if(!is.null(subset)) {model$call$subset=exArgs$call$subset} else {model$call$subset=NULL}
75 | }
76 | toc <- proc.time()-tic
77 |
78 | # Replaces a field used in 'make.surv' to indicate the model used (standardised across models)
79 | model_name <- d3
80 |
81 | # Finally returns the output
82 | list(
83 | model=model,
84 | aic=model$AIC,
85 | bic=-2*model$loglik+model$npars*log(model$N),
86 | dic=NULL,
87 | time2run=toc[3],
88 | model_name=model_name
89 | )
90 | }
91 |
92 |
93 |
94 | #' Helper function to make the Kaplan-Meier analysis of the underlying data
95 | #' for a given formula and dataset
96 | #'
97 | #' @param formula a formula specifying the model to be used, in the form
98 | #' \code{Surv(time,event)~treatment[+covariates]} in flexsurv terms, or
99 | #' \code{inla.surv(time,event)~treatment[+covariates]} in INLA terms.
100 | #' @param method A string specifying the inferential method (\code{'mle'},
101 | #' \code{'inla'} or \code{'hmc'}). If \code{method} is set to \code{'hmc'},
102 | #' then \code{survHE} will write suitable model code in the Stan language
103 | #' (according to the specified distribution), prepare data and initial values
104 | #' and then run the model.
105 | #' @param data A data frame containing the data to be used for the analysis.
106 | #' This must contain data for the 'event' variable. In case there is no
107 | #' censoring, then \code{event} is a column of 1s.
108 | #' @return \item{ObjSurvfit}{A 'rms::npsurv' estimate of the KM curves}.
109 | #' @note Something will go here
110 | #' @author Gianluca Baio
111 | #' @seealso fit.models
112 | #' @references Baio (2020). survHE
113 | #' @keywords Kaplan-Meier estimate
114 | #' @noRd
115 | make_KM <- function(formula,data) {
116 | km.formula <- as.formula(gsub("inla.surv","Surv",deparse(formula)))
117 | # Computes the Kaplan Meier curve using the package "rms"
118 | ObjSurvfit <- rms::npsurv( # Uses the function "npsurv" from the package "rms"
119 | formula = km.formula, # to fit the model specified in the "formula" object
120 | data = data # to the dataset named "data"
121 | )
122 | return(ObjSurvfit)
123 | }
124 |
125 |
126 |
127 |
128 | #' Helper function to format the output of the modelling (produced either
129 | #' by running 'runMLE', or 'runINLA', 'runHMC'), in a way that is consistent
130 | #' with the architecture of 'survHE'
131 | #'
132 | #' @param output The output of one of the helper functions used to run the
133 | #' models.
134 | #' @param method The method used to do the estimation
135 | #' @param distr The abbreviated name for the distribution to be used
136 | #' @param formula The model formula
137 | #' @param data The dataset used
138 | #' @return \item{res}{A 'survHE' object containing all the relevant output
139 | #' conveniently formatted}.
140 | #' @note Something will go here
141 | #' @author Gianluca Baio
142 | #' @seealso fit.models
143 | #' @references Baio (2020). survHE
144 | #' @keywords Parametric survival models Bayesian inference via Hamiltonian
145 | #' Monte Carlo Bayesian inference via Integrated Nested Laplace Approximation
146 | #' @noRd
147 | format_output_fit.models <- function(output,method,distr,formula,data) {
148 |
149 | # Uses the helper 'manipulated_distributions' to create the vector labs
150 | labs <- manipulate_distributions(distr)$labs
151 |
152 | # Model output
153 | models <- lapply(output, function(x) x$model)
154 | # Model fitting statistics
155 | model.fitting <- list(
156 | aic=unlist(lapply(output,function(x) x$aic)),
157 | bic=unlist(lapply(output,function(x) x$bic)),
158 | dic=unlist(lapply(output,function(x) x$dic))
159 | )
160 | # Miscellanea
161 | misc <- list(
162 | time2run= unlist(lapply(output, function(x) x$time2run)),
163 | formula=formula,
164 | data=data,
165 | model_name=unlist(lapply(output,function(x) x$model_name))
166 | )
167 | if(any(distr=="polyweibull")) {
168 | misc$km=lapply(formula,function(f) make_KM(f,data))
169 | } else {
170 | misc$km=make_KM(formula,data)
171 | }
172 |
173 | # HMC-specific extra output
174 | if(method=="hmc"){
175 | # Completes the 'misc' and 'model.fitting' lists with additional output
176 | misc$data.stan <- lapply(output,function(x) x$data.stan)
177 | model.fitting$dic2 <- unlist(lapply(output,function(x) x$dic2))
178 | }
179 | if(method=="inla") {
180 | model.fitting$dic2 = unlist(lapply(output,function(x) x$dic2))
181 | }
182 |
183 | # Names the elements of the list
184 | names(models) <- labs
185 |
186 | # Formats all output in a list
187 | res <- list(models=models,model.fitting=model.fitting,method=method,misc=misc)
188 | # And sets its class attribute to "survHE"
189 | class(res) <- "survHE"
190 | return(res)
191 | }
192 |
193 |
194 |
195 | #' Helper function to provide a list of models available in each method
196 | #'
197 | #' @return \item{availables}{A list of models available in each method}.
198 | #' @note Something will go here
199 | #' @author Gianluca Baio
200 | #' @seealso fit.models
201 | #' @references Baio (2020). survHE
202 | #' @keywords Parametric survival models Bayesian inference via Hamiltonian
203 | #' Monte Carlo Bayesian inference via Integrated Nested Laplace Approximation
204 | #' @noRd
205 | load_availables <- function() {
206 | # INLA can only do a limited set of models (for now) so if user has selected
207 | # one that is not available, then falls back on MLE analysis
208 | availables=list(
209 | mle=c("genf" = "gef",
210 | "genf.orig" = "gof",
211 | "gengamma" = "gga",
212 | "gengamma.orig" = "ggo",
213 | "exp" = "exp",
214 | "weibull" = "wei",
215 | "weibullPH" = "wph",
216 | "lnorm" = "lno",
217 | "gamma" = "gam",
218 | "gompertz" = "gom",
219 | "llogis" = "llo",
220 | "lognormal" = "lno",
221 | "rps" = "rps"
222 | ),
223 | inla=c("exponential" = "exp",
224 | "weibull" = "wei",
225 | "weibullPH" = "wph",
226 | "lognormal" = "lno",
227 | "loglogistic" = "llo",
228 | "rps" = "rps",
229 | "gompertz" = "gom" # added Mar 19, 2021
230 | ),
231 | hmc=c("Exponential" = "exp",
232 | "Gamma" = "gam",
233 | "GenF" = "gef",
234 | "GenGamma" = "gga",
235 | "Gompertz" = "gom",
236 | "PolyWeibull" = "pow",
237 | "RP" = "rps",
238 | "WeibullAF" = "wei",
239 | "WeibullPH" = "wph",
240 | "logLogistic" = "llo",
241 | "logNormal" = "lno"
242 | )
243 | )
244 | return(availables)
245 | }
246 |
247 |
248 |
249 | #' Helper function to manipulate the strings of text defining the
250 | #' distributions selected by the user so they are consistent with the
251 | #' various methods
252 | #'
253 | #' @param x A string with the distribution name selected by the user.
254 | #' @return \item{list}{A list containing the modified name of the
255 | #' distribution, the acronym (3-letters abbreviation), or the
256 | #' labels (humane-readable name)}.
257 | #' @note Something will go here
258 | #' @author Gianluca Baio
259 | #' @seealso fit.models
260 | #' @references Baio (2020). survHE
261 | #' @keywords Parametric survival models Bayesian inference via Hamiltonian
262 | #' Monte Carlo Bayesian inference via Integrated Nested Laplace Approximation
263 | #' @noRd
264 | manipulate_distributions <- function(x){
265 | # selected model checks -----
266 | matchTable = list(
267 | "exp" = c("exponential", "exp"),
268 | "wei" = c("weibull", "weibullaft", "weiaft", "waft", "weibullaf", "weiaf", "waf", "wei"),
269 | "wph" = c("weibullph", "weiph", "wph"),
270 | "gam" = c("gamma", "gam", "gma"),
271 | "lno" = c("lognormal", "lnormal", "lnorm", "lognorm", "lno"),
272 | "llo" = c("loglogistic", "loglog", "llogistic", "llogis", "llo", "llogist"),
273 | "gga" = c("generalisedgamma", "generalizedgamma", "ggamma", "gengamma", "gga", "ggam"),
274 | "ggo" = c("gengamma.orig", "ggo"),
275 | "gef" = c("generalisedf", "generalizedf", "genf", "gef"),
276 | "gof" = c("genf.orig", "gof"),
277 | "gom" = c("gompertz", "gpz", "gomp", "gompz", "gom"),
278 | "rps" = c("roystonparmar", "roystonparmarsplines", "roystonparmarspline", "spline", "splines", "rps"),
279 | "pow" = c("polyweibull","pow","PolyWeibull")
280 | )
281 | # Human readable label
282 | labelTable = c(
283 | "exp" = "Exponential",
284 | "wei" = "Weibull (AFT)",
285 | "wph" = "Weibull (PH)",
286 | "gam" = "Gamma",
287 | "lno" = "log-Normal",
288 | "llo" = "log-Logistic",
289 | "gga" = "Gen. Gamma", "ggo" = "Gen. Gamma (orig parametrisation)",
290 | "gef" = "Gen. F", "gof" = "Gen. F (orig parametrisation)",
291 | "gom" = "Gompertz",
292 | "rps" = "Royston-Parmar",
293 | "pow" = "Poly-Weibull")
294 | # Labels used by R to define p..., r... and d... commands
295 | labelR = c(
296 | "exp" = "exp",
297 | "wei" = "weibull",
298 | "wph" = "weibullPH",
299 | "gam" = "gamma",
300 | "lno" = "lnorm",
301 | "llo" = "llogis",
302 | "gga" = "gengamma",
303 | "ggo" = "gengamma.orig",
304 | "gef" = "genf",
305 | "gof" = "genf.orig",
306 | "gom" = "gompertz",
307 | "rps" = "survspline",
308 | "pow" = "polyweibull"
309 | )
310 |
311 | distr = gsub("[ ]*[-]*", "", tolower(x))
312 | isDistrUnmatched = which(!sapply(
313 | 1:length(distr),
314 | '%in%',
315 | unname(unlist(sapply(matchTable, match, distr)))))
316 | if (length(isDistrUnmatched) > 0) {
317 | stop(paste0("Distribution ", paste(distr[isDistrUnmatched], collapse = ", "), " could not be matched."))
318 | }
319 |
320 | distr3 <- numeric()
321 | for (i in 1:length(distr)) {
322 | distr3[i] <- names(which(unlist(lapply(matchTable,function(x) distr[i]%in%x))))
323 | }
324 | labs <- unname(labelTable[distr3])
325 | distr <- unname(labelR[distr3])
326 |
327 | list(distr=distr,distr3=distr3,labs=labs)
328 | }
329 |
330 |
331 | #' Helper function to check that the distribution(s) provided by the user are
332 | #' consistent with the method chosen for inference.
333 | #'
334 | #' \code{'inla'} or \code{'hmc'}). If \code{method} is set to \code{'hmc'},
335 | #' then \code{survHE} will write suitable model code in the Stan language
336 | #' (according to the specified distribution), prepare data and initial values
337 | #' and then run the model.
338 | #' @param distr3 A vector of distribution labels (as created by 'fit.models').
339 | #' It's a 3-letters label to identify the distributions
340 | #' @param availables A list with the distributions available for each method.
341 | #' @note Something will go here
342 | #' @author Gianluca Baio
343 | #' @seealso fit.models
344 | #' @references Baio (2020). survHE
345 | #' @keywords Parametric survival models
346 | #' @noRd
347 | check_distributions <- function(method,distr) {
348 | # Loads in the available models in each method
349 | availables <- load_availables()
350 | # Uses the helper 'manipulated_distributions' to create the vectors distr, distr3 and labs
351 | distr3 <- manipulate_distributions(distr)$distr3
352 |
353 | # If 'method' is either 'inla' or 'hmc but we're trying to run a model that is not available, then
354 | # falls back to 'mle'
355 | if(method %in% c("inla","hmc")) {
356 | if(!all(distr3 %in% availables[[method]])) {
357 | ####modelsString <- unname(labelTable[availables[[method]]])
358 | modelsString <- unname(manipulate_distributions(availables[[method]])$labs)
359 | modelsString[length(modelsString)] = paste0("or ", modelsString[length(modelsString)])
360 | message(paste0(
361 | "NB: ",toupper(method)," can only fit ",
362 | paste(modelsString, collapse = ", "),
363 | " parametric survival models. Falling back on MLE analysis")
364 | )
365 | method <- "mle"
366 | }
367 | }
368 |
369 | # 'mle' can implement all the possible models, except the PolyWeibull
370 | # In this case, I choose to *stop* execution, rather than falling back to 'hmc'!
371 | if (method == "mle") {
372 | if(!all(distr3 %in% availables[[method]])) {
373 | stop(paste0("The Poly-Weibull model is only implemented under method='hmc'.
374 | Please set this option in your call to 'fit.models'"))
375 | }
376 | }
377 | return(method)
378 | }
379 |
--------------------------------------------------------------------------------
/R/utils_print_survHE.R:
--------------------------------------------------------------------------------
1 | #' Helper function to get the relevant stats to print the summary table
2 | #'
3 | #' @param x The 'survHE' object with the fitted model
4 | #' @param mod A number identifying which of the models is to be used
5 | #' @return \item{res}{The resulting stats}
6 | #' @author Gianluca Baio
7 | #' @seealso print.survHE
8 | #' @references Baio (2020). survHE
9 | #' @keywords MLE
10 | #' @noRd
11 | get_stats_mle <- function(x,mod) {
12 | # Can use directly 'flexsurv' output to make the results table
13 | res=x$models[[mod]]$res[,c(1,4,2,3),drop=FALSE]
14 | colnames(res)=c("mean","se","L95%","U95%")
15 | return(res)
16 | }
17 |
18 |
19 | #' Helper function to get the relevant stats to print the summary table
20 | #'
21 | #' @param x The 'survHE' object with the fitted model
22 | #' @param mod A number identifying which of the models is to be used
23 | #' @return \item{res}{The resulting stats}
24 | #' @author Gianluca Baio
25 | #' @seealso print.survHE
26 | #' @references Baio (2020). survHE
27 | #' @keywords INLA
28 | #' @noRd
29 | get_stats_inla <- function(x,mod) {
30 | # Calls the helper functions to make the results table
31 | res=do.call(paste0("rescale_stats_inla_",x$misc$model_name[mod]),
32 | args=list(x,mod))
33 | return(res)
34 | }
35 |
36 |
37 | #' Helper function to get the relevant stats to print the summary table
38 | #'
39 | #' @param x The 'survHE' object with the fitted model
40 | #' @param mod A number identifying which of the models is to be used
41 | #' @return \item{res}{The resulting stats}
42 | #' @author Gianluca Baio
43 | #' @seealso print.survHE
44 | #' @references Baio (2020). survHE
45 | #' @keywords HMC
46 | #' @noRd
47 | get_stats_hmc <- function(x,mod) {
48 | ######quiet(print(x$models[[mod]]))
49 | # Gets the original summary stats from the 'rstan' run
50 | table = rstan::summary(x$models[[mod]])$summary[,c("mean","sd","2.5%","97.5%")]
51 | ###table <- cbind(x$models[[mod]]@.MISC$summary$msd,x$models[[mod]]@.MISC$summary$quan[,c("2.5%","97.5%")])
52 | # Removes the node 'lp___'
53 | table=table[-grep("lp__",rownames(table)),]
54 | # If the model is intercept only, removes the unnecessary covariates created to suit 'stan' format
55 | if("X_obs" %in% names(x$misc$data.stan[[mod]])) {
56 | if(any(apply(x$misc$data.stan[[mod]]$X_obs,2,function(x) all(x==0)))) {
57 | table=table[-grep("beta\\[2\\]",rownames(table)),]
58 | }
59 | } else {
60 | if(any(apply(x$misc$data.stan[[mod]]$X,2,function(x) all(x==0)))) {
61 | if(x$misc$model_name[mod]=="rps") {
62 | table=table[-grep("beta",rownames(table)),]
63 | } else {
64 | table=table[-grep("beta\\[2\\]",rownames(table)),]
65 | }
66 | }
67 | }
68 | # Now calls the helper functions to make the results table
69 | res=do.call(paste0("rescale_stats_hmc_",x$misc$model_name[mod]),
70 | args=list(table=table,x=x))
71 | return(res)
72 | }
73 |
74 | #' Helper function to rescale the stats for the Exponential model
75 | #'
76 | #' @param table The table with the relevant values for the model
77 | #' parameters
78 | #' @param x The original 'survHE' object
79 | #' @return \item{res}{The resulting stats}
80 | #' @author Gianluca Baio
81 | #' @seealso print.survHE
82 | #' @references Baio (2020). survHE
83 | #' @keywords HMC Exponential
84 | #' @noRd
85 | rescale_stats_hmc_exp <- function(table,x) {
86 | rate <- matrix(table[grep("rate",rownames(table)),],ncol=4)
87 | rownames(rate) <- "rate"
88 | effects=add_effects_hmc(table,x)
89 | res <- rbind(rate,effects)
90 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
91 | return(res)
92 | }
93 |
94 | #' Helper function to rescale the stats for the Weibull AFT model
95 | #'
96 | #' @param table The table with the relevant values for the model
97 | #' parameters
98 | #' @param x The original 'survHE' object
99 | #' @return \item{res}{The resulting stats}
100 | #' @author Gianluca Baio
101 | #' @seealso print.survHE
102 | #' @references Baio (2020). survHE
103 | #' @keywords HMC WeibullAFT
104 | #' @noRd
105 | rescale_stats_hmc_wei <- function(table,x) {
106 | scale <- matrix(table[grep("scale",rownames(table)),],ncol=4)
107 | rownames(scale) <- "scale"
108 | shape <- matrix(table[grep("alpha",rownames(table)),],ncol=4)
109 | rownames(shape) <- "shape"
110 | effects=add_effects_hmc(table,x)
111 | res <- rbind(shape,scale,effects)
112 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
113 | return(res)
114 | }
115 |
116 | #' Helper function to rescale the stats for the Weibull PH model
117 | #'
118 | #' @param table The table with the relevant values for the model
119 | #' parameters
120 | #' @param x The original 'survHE' object
121 | #' @return \item{res}{The resulting stats}
122 | #' @author Gianluca Baio
123 | #' @seealso print.survHE
124 | #' @references Baio (2020). survHE
125 | #' @keywords HMC WeibullPH
126 | #' @noRd
127 | rescale_stats_hmc_wph <- function(table,x) {
128 | scale <- matrix(table[grep("scale",rownames(table)),],ncol=4)
129 | rownames(scale) <- "scale"
130 | shape <- matrix(table[grep("alpha",rownames(table)),],ncol=4)
131 | rownames(shape) <- "shape"
132 | effects=add_effects_hmc(table,x)
133 | res <- rbind(shape,scale,effects)
134 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
135 | return(res)
136 | }
137 |
138 | #' Helper function to rescale the stats for the Gompertz model
139 | #'
140 | #' @param table The table with the relevant values for the model
141 | #' parameters
142 | #' @param x The original 'survHE' object
143 | #' @return \item{res}{The resulting stats}
144 | #' @author Gianluca Baio
145 | #' @seealso print.survHE
146 | #' @references Baio (2020). survHE
147 | #' @keywords HMC Gompertz
148 | #' @noRd
149 | rescale_stats_hmc_gom <- function(table,x) {
150 | rate <- matrix(table[grep("rate",rownames(table)),],ncol=4)
151 | rownames(rate) <- "rate"
152 | shape <- matrix(table[grep("alpha",rownames(table)),],ncol=4)
153 | rownames(shape) <- "shape"
154 | effects=add_effects_hmc(table,x)
155 | res <- rbind(shape,rate,effects)
156 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
157 | return(res)
158 | }
159 |
160 | #' Helper function to rescale the stats for the logNormal model
161 | #'
162 | #' @param table The table with the relevant values for the model
163 | #' parameters
164 | #' @param x The original 'survHE' object
165 | #' @return \item{res}{The resulting stats}
166 | #' @author Gianluca Baio
167 | #' @seealso print.survHE
168 | #' @references Baio (2020). survHE
169 | #' @keywords HMC logNormal
170 | #' @noRd
171 | rescale_stats_hmc_lno <- function(table,x) {
172 | meanlog <- matrix(table[grep("meanlog",rownames(table)),],ncol=4)
173 | rownames(meanlog) <- "meanlog"
174 | sdlog <- matrix(table[grep("alpha",rownames(table)),],ncol=4)
175 | rownames(sdlog) <- "sdlog"
176 | effects=add_effects_hmc(table,x)
177 | res <- rbind(meanlog,sdlog,effects)
178 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
179 | return(res)
180 | }
181 |
182 | #' Helper function to rescale the stats for the Gamma model
183 | #'
184 | #' @param table The table with the relevant values for the model
185 | #' parameters
186 | #' @param x The original 'survHE' object
187 | #' @return \item{res}{The resulting stats}
188 | #' @author Gianluca Baio
189 | #' @seealso print.survHE
190 | #' @references Baio (2020). survHE
191 | #' @keywords HMC Gamma
192 | #' @noRd
193 | rescale_stats_hmc_gam <- function(table,x) {
194 | rate <- matrix(table[grep("rate",rownames(table)),],ncol=4)
195 | rownames(rate) <- "rate"
196 | shape <- matrix(table[grep("alpha",rownames(table)),],ncol=4)
197 | rownames(shape) <- "shape"
198 | effects=add_effects_hmc(table,x)
199 | res <- rbind(shape,rate,effects)
200 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
201 | return(res)
202 | }
203 |
204 | #' Helper function to rescale the stats for the logLogistic model
205 | #'
206 | #' @param table The table with the relevant values for the model
207 | #' parameters
208 | #' @param x The original 'survHE' object
209 | #' @return \item{res}{The resulting stats}
210 | #' @author Gianluca Baio
211 | #' @seealso print.survHE
212 | #' @references Baio (2020). survHE
213 | #' @keywords HMC logLogistic
214 | #' @noRd
215 | rescale_stats_hmc_llo <- function(table,x) {
216 | rate <- matrix(table[grep("rate",rownames(table)),],ncol=4)
217 | rownames(rate) <- "scale"
218 | shape <- matrix(table[grep("alpha",rownames(table)),],ncol=4)
219 | rownames(shape) <- "shape"
220 | effects=add_effects_hmc(table,x)
221 | res <- rbind(shape,rate,effects)
222 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
223 | return(res)
224 | }
225 |
226 | #' Helper function to rescale the stats for the Gen F model
227 | #'
228 | #' @param table The table with the relevant values for the model
229 | #' parameters
230 | #' @param x The original 'survHE' object
231 | #' @return \item{res}{The resulting stats}
232 | #' @author Gianluca Baio
233 | #' @seealso print.survHE
234 | #' @references Baio (2020). survHE
235 | #' @keywords HMC GenF
236 | #' @noRd
237 | rescale_stats_hmc_gef <- function(table,x) {
238 | mu <- matrix(table[grep("beta",rownames(table)),,drop=FALSE][1,],ncol=4,nrow=1)
239 | rownames(mu) <- "mu"
240 | sigma <- matrix(table[grep("sigma",rownames(table)),],ncol=4)
241 | rownames(sigma) <- "sigma"
242 | Q <- matrix(table[grep("Q",rownames(table)),],ncol=4)
243 | rownames(Q) <- "Q"
244 | P <- matrix(table[match("P",rownames(table)),],ncol=4)
245 | rownames(P) <- "P"
246 | effects=add_effects_hmc(table,x)
247 | res <- rbind(mu,sigma,Q,P,effects)
248 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
249 | return(res)
250 | }
251 |
252 | #' Helper function to rescale the stats for the Gen Gamma model
253 | #'
254 | #' @param table The table with the relevant values for the model
255 | #' parameters
256 | #' @param x The original 'survHE' object
257 | #' @return \item{res}{The resulting stats}
258 | #' @author Gianluca Baio
259 | #' @seealso print.survHE
260 | #' @references Baio (2020). survHE
261 | #' @keywords HMC GenGamma
262 | #' @noRd
263 | rescale_stats_hmc_gga <- function(table,x) {
264 | mu <- matrix(table[grep("beta",rownames(table)),,drop=FALSE][1,],ncol=4,nrow=1)
265 | rownames(mu) <- "mu"
266 | sigma <- matrix(table[grep("sigma",rownames(table)),],ncol=4)
267 | rownames(sigma) <- "sigma"
268 | Q <- matrix(table[grep("Q",rownames(table)),],ncol=4)
269 | rownames(Q) <- "Q"
270 | effects=add_effects_hmc(table,x)
271 | res <- rbind(mu,sigma,Q,effects)
272 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
273 | return(res)
274 | }
275 |
276 | #' Helper function to rescale the stats for the RPS model
277 | #'
278 | #' @param table The table with the relevant values for the model
279 | #' parameters
280 | #' @param x The original 'survHE' object
281 | #' @return \item{res}{The resulting stats}
282 | #' @author Gianluca Baio
283 | #' @seealso print.survHE
284 | #' @references Baio (2020). survHE
285 | #' @keywords HMC Royston-Parmar splines
286 | #' @noRd
287 | rescale_stats_hmc_rps <- function(table,x) {
288 | gamma <- matrix(table[grep("gamma",rownames(table)),],ncol=4)
289 | rownames(gamma) <- paste0("gamma",0:(nrow(gamma)-1))
290 | # If there covariates adds their effects
291 | if(length(grep("beta",rownames(table)))>0) {
292 | effects <- matrix(table[grep("beta",rownames(table)),],ncol=4)
293 | cn=colnames(model.matrix(x$misc$formula,x$misc$data))
294 | rownames(effects) <- cn[-grep("Intercept",cn),drop=FALSE]
295 | } else {
296 | effects <- matrix(NA,nrow=0,ncol=4)
297 | }
298 | res <- rbind(gamma,effects)
299 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
300 | return(res)
301 | }
302 |
303 | #' Helper function to rescale the stats for the Poly-Weibull model
304 | #'
305 | #' @param table The table with the relevant values for the model
306 | #' parameters
307 | #' @param x The original 'survHE' object
308 | #' @return \item{res}{The resulting stats}
309 | #' @author Gianluca Baio
310 | #' @seealso print.survHE
311 | #' @references Baio (2020). survHE
312 | #' @keywords HMC Poly-Weibull
313 | #' @noRd
314 | rescale_stats_hmc_pow <- function(table,x) {
315 | rownames(table)[grep("alpha",rownames(table))]=paste0("shape_",1:length(grep("alpha",rownames(table))))
316 |
317 | # Figures out which beta coefficients should be removed (because they are multiplied by a covariate that is constantly 0)
318 | to.rm=matrix(unlist(lapply(1:length(x$misc$formula),function(m) apply(x$misc$data.stan[[1]]$X[m,,],2,function(x) all(x==0)))),
319 | nrow=length(x$misc$formula),byrow=T)
320 | nmatch <- length(which(to.rm==T))
321 | if(nmatch>0){
322 | idx <- matrix(unlist(lapply(1:nmatch,function(i) {
323 | paste0(which(to.rm==TRUE,arr.ind=T)[i,],collapse=",")
324 | })))
325 | } else {idx=NULL}
326 | if (!is.null(nrow(idx))) {
327 | take.out <- match(paste0("beta[",idx,"]"),rownames(table))
328 | } else {take.out=NULL}
329 | if(all(!is.null(take.out))) {table=table[-take.out,]}
330 | effects=table[-grep("shape",rownames(table)),]
331 | rownames(effects) <- unlist(lapply(1:x$misc$data.stan[[1]]$M,function(m) {
332 | paste0(colnames(model.matrix(x$misc$formula[[m]],x$misc$data)),"_",m)
333 | }))
334 | res <- rbind(table[grep("shape",rownames(table)),],effects)
335 | if (is.null(dim(res))) {names(res) <- c("mean","se","L95%","U95%")} else {colnames(res) <- c("mean","se","L95%","U95%")}
336 | return(res)
337 | }
338 |
339 | #' Helper function to rescale the stats for the Weibull AFT model
340 | #'
341 | #' @param table The table with the relevant values for the model
342 | #' parameters
343 | #' @return \item{res}{The resulting stats}
344 | #' @author Gianluca Baio
345 | #' @seealso print.survHE
346 | #' @references Baio (2020). survHE
347 | #' @keywords INLA WeibullAFT
348 | #' @noRd
349 | rescale_stats_inla_wei <- function(x,mod,nsim=1000) {
350 | # The scale and effects are computed as a *non linear* function of the AFT effects and the shape
351 | # But for simplicity can approximate this using 'inla.rmarginal'
352 | shape_sim=INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.hyperpar[[1]])
353 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) {
354 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]])
355 | })
356 | shape=shape_sim %>% make_stats %>% matrix(.,ncol=4)
357 | ## NB: INLA has a weird parameterisation and with Weibull AFT, the coefficients have the wrong sign
358 | if(attributes(terms(x$misc$formula))$intercept==1) {
359 | scale=exp(-fixeff_sim[[1]]+log(max(x$misc$km$time))) %>% make_stats%>% matrix(.,ncol=4)
360 | }
361 | rownames(scale) <- "scale"
362 | rownames(shape) <- "shape"
363 | res=rbind(shape,scale)
364 | # If there are covariates then add them too
365 | if(length(fixeff_sim)>1) {
366 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) {
367 | -fixeff_sim[[i]]
368 | })
369 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)),
370 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T)
371 | rownames(effects) <- x$models[[mod]]$names.fixed[-1]
372 | res=rbind(res,effects)
373 | }
374 | colnames(res)=c("mean","se","L95%","U95%")
375 | return(res)
376 | }
377 |
378 | #' Helper function to rescale the stats for the Weibull PH model
379 | #'
380 | #' @param table The table with the relevant values for the model
381 | #' parameters
382 | #' @return \item{res}{The resulting stats}
383 | #' @author Gianluca Baio
384 | #' @seealso print.survHE
385 | #' @references Baio (2020). survHE
386 | #' @keywords INLA WeibullPH
387 | #' @noRd
388 | rescale_stats_inla_wph <- function(x,mod,nsim=1000) {
389 | # The scale and effects are computed as a *non linear* function of the AFT effects and the shape
390 | # But for simplicity can approximate this using 'inla.rmarginal'
391 | shape_sim=INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.hyperpar[[1]])
392 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) {
393 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]])
394 | })
395 | shape=shape_sim %>% make_stats %>% matrix(.,ncol=4)
396 | if(attributes(terms(x$misc$formula))$intercept==1) {
397 | scale=exp(fixeff_sim[[1]]+log(max(x$misc$km$time)))^(-shape_sim) %>% make_stats%>% matrix(.,ncol=4)
398 | }
399 | rownames(scale) <- "scale"
400 | rownames(shape) <- "shape"
401 | res=rbind(shape,scale)
402 | # If there are covariates then add them too
403 | if(length(fixeff_sim)>1) {
404 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) {
405 | fixeff_sim[[i]]
406 | })
407 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)),
408 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T)
409 | rownames(effects) <- x$models[[mod]]$names.fixed[-1]
410 | res=rbind(res,effects)
411 | }
412 | colnames(res)=c("mean","se","L95%","U95%")
413 | return(res)
414 | }
415 |
416 | #' Helper function to rescale the stats for the Exponential model
417 | #'
418 | #' @param table The table with the relevant values for the model
419 | #' parameters
420 | #' @return \item{res}{The resulting stats}
421 | #' @author Gianluca Baio
422 | #' @seealso print.survHE
423 | #' @references Baio (2020). survHE
424 | #' @keywords INLA Exponential
425 | #' @noRd
426 | rescale_stats_inla_exp <- function(x,mod,nsim=1000) {
427 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) {
428 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]])
429 | })
430 | if(attributes(terms(x$misc$formula))$intercept==1) {
431 | rate=exp(fixeff_sim[[1]]-log(max(x$misc$km$time))) %>% make_stats %>% matrix(.,ncol=4)
432 | }
433 | rownames(rate)="rate"
434 | res=rate
435 | if(length(fixeff_sim)>1) {
436 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) {
437 | fixeff_sim[[i]]
438 | })
439 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)),
440 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T)
441 | rownames(effects) <- x$models[[mod]]$names.fixed[-1]
442 | res=rbind(res,effects)
443 | }
444 | colnames(res)=c("mean","se","L95%","U95%")
445 | return(res)
446 | }
447 |
448 | #' Helper function to rescale the stats for the logNormal model
449 | #'
450 | #' @param table The table with the relevant values for the model
451 | #' parameters
452 | #' @return \item{res}{The resulting stats}
453 | #' @author Gianluca Baio
454 | #' @seealso print.survHE
455 | #' @references Baio (2020). survHE
456 | #' @keywords INLA logNormal
457 | #' @noRd
458 | rescale_stats_inla_lno <- function(x,mod,nsim=1000) {
459 | prec_sim=INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.hyperpar[[1]])
460 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) {
461 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]])
462 | })
463 | if(attributes(terms(x$misc$formula))$intercept==1) {
464 | meanlog=(fixeff_sim[[1]]+log(max(x$misc$km$time))) %>% make_stats %>% matrix(.,ncol=4)
465 | }
466 | sdlog=sqrt(1/prec_sim) %>% make_stats %>% matrix(.,ncol=4)
467 | rownames(meanlog)="meanlog"
468 | rownames(sdlog)="sdlog"
469 | res=rbind(meanlog,sdlog)
470 | if(length(fixeff_sim)>1) {
471 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) {
472 | fixeff_sim[[i]]
473 | })
474 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)),
475 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T)
476 | rownames(effects) <- x$models[[mod]]$names.fixed[-1]
477 | res=rbind(res,effects)
478 | }
479 | colnames(res)=c("mean","se","L95%","U95%")
480 | return(res)
481 | }
482 |
483 | #' Helper function to rescale the stats for the logLogistic model
484 | #'
485 | #' @param table The table with the relevant values for the model
486 | #' parameters
487 | #' @return \item{res}{The resulting stats}
488 | #' @author Gianluca Baio
489 | #' @seealso print.survHE
490 | #' @references Baio (2020). survHE
491 | #' @keywords INLA logLogistic
492 | #' @noRd
493 | rescale_stats_inla_llo <- function(x,mod,nsim=1000) {
494 | # Uses 'variant=1' in INLA
495 | shape_sim=INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.hyperpar[[1]])
496 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) {
497 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]])
498 | })
499 | shape=shape_sim %>% make_stats %>% matrix(.,ncol=4)
500 | if(attributes(terms(x$misc$formula))$intercept==1) {
501 | scale=exp(-fixeff_sim[[1]]+log(max(x$misc$km$time))) %>% make_stats %>% matrix(.,ncol=4)
502 | }
503 | rownames(shape)="shape"
504 | rownames(scale)="scale"
505 | res=rbind(shape,scale)
506 | if(length(fixeff_sim)>1) {
507 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) {
508 | -fixeff_sim[[i]]
509 | })
510 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)),
511 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T)
512 | rownames(effects) <- x$models[[mod]]$names.fixed[-1]
513 | res=rbind(res,effects)
514 | }
515 | colnames(res)=c("mean","se","L95%","U95%")
516 | return(res)
517 | }
518 |
519 | #' Helper function to rescale the stats for the Gompertz model
520 | #'
521 | #' @param table The table with the relevant values for the model
522 | #' parameters
523 | #' @return \item{res}{The resulting stats}
524 | #' @author Gianluca Baio
525 | #' @seealso print.survHE
526 | #' @references Baio (2020). survHE
527 | #' @keywords INLA Gompertz
528 | #' @noRd
529 | rescale_stats_inla_gom <- function(x,mod,nsim=1000) {
530 | shape_sim=INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.hyperpar[[1]])/max(x$misc$km$time)
531 | fixeff_sim=lapply(1:nrow(x$models[[mod]]$summary.fixed),function(i) {
532 | INLA::inla.rmarginal(nsim,x$models[[mod]]$marginals.fixed[[i]])
533 | })
534 | shape=shape_sim %>% make_stats %>% matrix(.,ncol=4)
535 | # Need to rescale only if there is an intercept
536 | if(attributes(terms(x$misc$formula))$intercept==1) {
537 | rate=exp(fixeff_sim[[1]]-log(max(x$misc$km$time))) %>% make_stats %>% matrix(.,ncol=4)
538 | }
539 | rownames(shape)="shape"
540 | rownames(rate)="rate"
541 | res=rbind(shape,rate)
542 | if(length(fixeff_sim)>1) {
543 | effects=lapply(2:nrow(x$models[[mod]]$summary.fixed),function(i) {
544 | fixeff_sim[[i]]
545 | })
546 | effects=matrix(unlist(lapply(effects,function(i) i %>% make_stats)),
547 | nrow=length(fixeff_sim)-1,ncol=4,byrow=T)
548 | rownames(effects) <- x$models[[mod]]$names.fixed[-1]
549 | res=rbind(res,effects)
550 | }
551 | colnames(res)=c("mean","se","L95%","U95%")
552 | return(res)
553 | }
554 |
555 | #' Helper function to create summary stats
556 | #'
557 | #' @param x A vector of simulations
558 | #' @return \item{tab}{The resulting stats}
559 | #' @author Gianluca Baio
560 | #' @seealso print.survHE
561 | #' @references Baio (2020). survHE
562 | #' @keywords Summaries Print
563 | #' @noRd
564 | make_stats <- function(x) {
565 | tab=c(mean(x), sd(x), quantile(x, 0.025), quantile(x,0.975))
566 | return(tab)
567 | }
568 |
569 | #' Helper function to for Stan, which needs to first print the output of
570 | #' the model before you can access the elements in the object
571 | #' '@.MISC$summary', so can use this function to print quietly...
572 | #'
573 | #' @param x A 'survHE' object
574 | #' @author Gianluca Baio
575 | #' @seealso print.survHE
576 | #' @references Baio (2020). survHE
577 | #' @keywords HMC Stan
578 | #' @noRd
579 | quiet <- function(x) {
580 | sink(tempfile())
581 | on.exit(sink())
582 | invisible(force(x))
583 | }
584 |
585 | #' Helper function to checks whether covariates effects should be
586 | #' included in the 'res' table for HMC
587 | #'
588 | #' @param table The table with the summary statistics
589 | #' @param x The original 'survHE' object
590 | #' @return \item{effects}{The effects}
591 | #' @author Gianluca Baio
592 | #' @seealso print.survHE
593 | #' @references Baio (2020). survHE
594 | #' @keywords HMC Stan
595 | #' @noRd
596 | add_effects_hmc <- function(table,x) {
597 | # If there's more than one beta, then there are "effects" (otherwise it's only intercept)
598 | if(length(grep("beta",rownames(table)))>1) {
599 | effects <- matrix(table[grep("beta",rownames(table)),],ncol=4)
600 | rownames(effects) <- colnames(model.matrix(x$misc$formula,x$misc$data))
601 | # Now removes the line with the intercept (which is already rescaled to the shape/rate/mean parameter)
602 | effects=effects[-grep("Intercept",rownames(effects)),,drop=FALSE]
603 | } else {
604 | effects <- matrix(NA,nrow=0,ncol=4)
605 | }
606 | return(effects)
607 | }
608 |
609 | #' Helper function to create the original summary table
610 | #'
611 | #' @param x The 'survHE' model
612 | #' @param mod Which of the models to be used
613 | #' @param digits The number of digits to print
614 | #' @author Gianluca Baio
615 | #' @seealso print.survHE
616 | #' @references Baio (2020). survHE
617 | #' @keywords MLE
618 | #' @noRd
619 | original_table_mle <- function(x,mod,digits) {
620 | print(x$models[[mod]],digits=digits)
621 | }
622 |
623 | #' Helper function to create the original summary table
624 | #'
625 | #' @param x The 'survHE' model
626 | #' @param mod Which of the models to be used
627 | #' @param digits The number of digits to print
628 | #' @author Gianluca Baio
629 | #' @seealso print.survHE
630 | #' @references Baio (2020). survHE
631 | #' @keywords INLA
632 | #' @noRd
633 | original_table_inla <- function(x,mod,digits) {
634 | print(summary(x$models[[mod]]),digits=digits)
635 | cat("\n")
636 | cat("NB: notice that INLA models are fitted to data rescaled in [0-1] for computational stability.")
637 | cat("\nThe estimates are rescaled on the original scale, applying a suitable back-transformation.")
638 | cat("\nThe numbers shown when 'original=TRUE' will be different than those shown in the 'survHE' format.")
639 | }
640 |
641 | #' Helper function to create the original summary table
642 | #'
643 | #' @param x The 'survHE' model
644 | #' @param mod Which of the models to be used
645 | #' @param digits The number of digits to print
646 | #' @author Gianluca Baio
647 | #' @seealso print.survHE
648 | #' @references Baio (2020). survHE
649 | #' @keywords HMC
650 | #' @noRd
651 | original_table_hmc <- function(x,mod,digits) {
652 | print(x$models[[mod]],digits=digits)
653 | }
654 |
655 |
656 | #' Helper function to format the summary table with the model parameters
657 | #'
658 | #' @param x The 'survHE' model
659 | #' @param mod Which of the models to be used
660 | #' @param res The output table
661 | #' @param digits The number of digits to print
662 | #' @author Gianluca Baio
663 | #' @seealso print.survHE
664 | #' @references Baio (2020). survHE
665 | #' @keywords Table formatting Print
666 | #' @noRd
667 | format_table <- function(x,mod,res,digits){
668 | # First re-format some of the labels (eg model names)
669 | if(x$misc$model_name[mod]=="exp") {label <- "Exponential"}
670 | if(x$misc$model_name[mod]=="gam") {label <- "Gamma"}
671 | if(x$misc$model_name[mod]=="lno") {label <- "log-Normal"}
672 | if(x$misc$model_name[mod]=="llo") {label <-"log-Logistic"}
673 | if(x$misc$model_name[mod]=="gga") {label <- "Generalised Gamma"}
674 | if(x$misc$model_name[mod]=="wei") {label <- "Weibull AF"}
675 | if(x$misc$model_name[mod]=="wph") {label <- "Weibull PH"}
676 | if(x$misc$model_name[mod]=="gef") {label <- "Generalised F"}
677 | if(x$misc$model_name[mod]=="gom") {label <- "Gompertz"}
678 | if(x$misc$model_name[mod]=="rps") {label <- "Royston & Parmar splines"}
679 | if(x$misc$model_name[mod]=="pow") {label <- "Poly-Weibull"}
680 |
681 | # Creates label of the method used
682 | label.met <- ifelse(
683 | x$method=="mle","Flexsurvreg \n(Maximum Likelihood Estimate)",
684 | ifelse(x$method=="inla","INLA (Bayesian inference via \nIntegrated Nested Laplace Approximation)",
685 | "Stan (Bayesian inference via \nHamiltonian Monte Carlo)")
686 | )
687 |
688 | # Finally prints the formatted table
689 | cat("\n")
690 | cat(paste0("Model fit for the ",label," model, obtained using ",label.met,". Running time: ",
691 | format(x$misc$time2run[[mod]],digits=5,nsmall=3)," seconds"))
692 | cat("\n\n")
693 | print(res,quote=F,digits=digits,justify="center")
694 | cat("\n")
695 | cat("Model fitting summaries\n")
696 | cat(paste0("Akaike Information Criterion (AIC)....: ",format(x$model.fitting$aic[[mod]],digits=6,nsmall=3)))
697 | cat("\n")
698 | cat(paste0("Bayesian Information Criterion (BIC)..: ",format(x$model.fitting$bic[[mod]],digits=6,nsmall=3)))
699 | if(x$method=="inla" | x$method=="hmc") {
700 | cat("\n")
701 | cat(paste0("Deviance Information Criterion (DIC)..: ",format(x$model.fitting$dic[[mod]],digits=6,nsmall=3)))
702 | }
703 | cat("\n\n")
704 | }
--------------------------------------------------------------------------------
/R/write.surv.R:
--------------------------------------------------------------------------------
1 | #' write.surv
2 | #'
3 | #' Writes the survival summary to an excel file (helpful to then call the
4 | #' values in the Markov model)
5 | #'
6 | #' Something will go here
7 | #'
8 | #' @param object a summary.flexsurvreg object containing the survival curves
9 | #' (with times, estimates and interval limits)
10 | #' @param file a string with the full path to the file name to be saved
11 | #' @param sheet a string with the name of the sheet to be created
12 | #' @param what a string to describe what to be exported. Can either be
13 | #' 'surv' (default), which outputs the simulation(s) for the survival curves
14 | #' or 'sim', which outputs the simulation(s) for the underlying model
15 | #' parameters. If there are several 'profiles', they get written in
16 | #' separate spreadsheets and a clear indication is given as the name of the
17 | #' spreadsheet
18 | #' @return A spreadsheet file with the simulation(s) of the relevant quantity
19 | #' @author Gianluca Baio
20 | #' @seealso \code{make.surv}
21 | #' @template refs
22 | #' @keywords Excel PSA
23 | #' @examples
24 | #' \dontrun{
25 | #' # Loads an example dataset from 'flexsurv'
26 | #' data(bc)
27 | #'
28 | #' # Fits the same model using the 3 inference methods
29 | #' mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
30 | #' distr="exp",method="mle")
31 | #' p.mle = make.surv(mle)
32 | #' write.surv(p.mle,file="test.xlsx")
33 | #' }
34 | #' @export write.surv
35 | write.surv <- function(object,file,sheet=NULL,what="surv") {
36 | # Writes the survival summary to an excel file (helpful to then call the values in the Markov model)
37 | # object = a summary.flexsurvreg object containing the survival curves (with times, estimates and interval limits) - that's actually
38 | # the result of the PSA from 'make.surv'
39 | # file = a string with the full path to the file name to be saved
40 | # sheet = a string with the name of the sheet to be created
41 | # what = the object to be exported. Possible values are:
42 | # 'surv' = a matrix with nsim rows and ntimes columns with the survival curve (one such matix for each configuration of the covariates)
43 | # 'sim' = a matrix with nsim rows and simulations for the survival parameters (scale, shape, rate, etc)
44 |
45 | # If xlsx is not installed, then request installation
46 | if(!isTRUE(requireNamespace("xlsx",quietly=TRUE))) {
47 | stop("You need to install the R package 'xlsx'. Please run in your R terminal:\n install.packages('xlsx')")
48 | }
49 | # But if it is installed, check if it's loaded and if not make its Namespace available so that all its functions are available
50 | if (isTRUE(requireNamespace("xlsx",quietly=TRUE))) {
51 | if (!is.element("xlsx", (.packages()))) {
52 | attachNamespace("xlsx")
53 | }
54 | # Extracts the relevant component of the make.surv output
55 | if(what=="surv") {
56 | export <- object$mat
57 | export.lab <- paste0(" ",object$nsim," simulation(s) for the survival curve")
58 | } else {
59 | export=object$sim
60 | export.lab <- paste0(" ",object$nsim," simulation(s) for the model parameters")
61 | }
62 | nobjs <- length(export)
63 | profile.lab=lapply(1:nrow(object$des.mat),function(x){
64 | object$des.mat %>% as_tibble() %>% select(!contains("(Intercept)")) %>% slice(x) %>%
65 | round(digits=2) %>% mutate(strata=paste0(names(.),"=",.,collapse=","))
66 | }) %>% bind_rows(.) %>% pull(strata)
67 | dims <- dim(export[[1]])
68 | # Finds the total number of rows necessary to write the simulations to the output file
69 | tot.rows <- dims[1]*nobjs + nobjs
70 |
71 | if(is.null(sheet)) {
72 | sheet = profile.lab
73 | }
74 |
75 | # If it already exists, we need to append the data to a different sheet
76 | if (file.exists(file)) {
77 | wb <- xlsx::loadWorkbook(file)
78 | # If worksheet already exists needs to replace it & overwrite it
79 | for (i in 1:length(sheet)){
80 | if(sheet[i] %in% names(xlsx::getSheets(wb))) {xlsx::removeSheet(wb,sheetName=sheet[i])}
81 | }
82 | sheet <- lapply(sheet,function(i) xlsx::createSheet(wb,i))
83 | ex <- lapply(1:nobjs,function(i) xlsx::addDataFrame(export[[i]],sheet=sheet[[i]],startRow=1,startColumn=1,row.names=T,col.names=T))
84 | xlsx::saveWorkbook(wb,file)
85 | }
86 |
87 | # But if file does not exist, then create it
88 | if (!file.exists(file)) {
89 | exts <- tools::file_ext(file)
90 | ## Should put some restriction as to what file extensions we want?
91 | wb <- xlsx::createWorkbook(type=exts)
92 | sheet <- lapply(sheet,function(i) xlsx::createSheet(wb,i))
93 | ex <- lapply(1:nobjs,function(i) xlsx::addDataFrame(export[[i]],sheet=sheet[[i]],startRow=1,startColumn=1,row.names=T,col.names=T))
94 | xlsx::saveWorkbook(wb,file)
95 | }
96 |
97 | msg <- paste0("written to file: '",file,"'")
98 | cat(export.lab,msg,"\n","Profile(s):",paste("\n ",profile.lab),"\n")
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # survHE
2 |
3 | [](https://cran.r-project.org/package=survHE)
4 | [](https://cran.r-project.org/package=survHE)
5 | [](https://cranlogs.r-pkg.org:443/badges/grand-total/survHE?color=orange)
6 |
7 | ## Survival analysis in health economic evaluation
8 |
9 | Contains a suite of functions to systematise the workflow involving survival analysis in health economic evaluation. survHE can fit a large range of survival models using both a frequentist approach (by calling the R package [flexsurv](https://CRAN.R-project.org/package=flexsurv)) and a Bayesian perspective. For a selected range of models, both Integrated Nested Laplace Integration (via the R package [INLA](https://www.r-inla.org/)) and Hamiltonian Monte Carlo (via the R package [rstan](https://CRAN.R-project.org/package=rstan)) are possible. HMC models are pre-compiled so that they can run in a very efficient and fast way. In addition to model fitting, survHE provides a set of specialised functions, for example to perform Probabilistic Sensitivity Analysis, export the results of the modelling to a spreadsheet, plotting survival curves and uncertainty around the mean estimates.
10 |
11 | **NB**: To run the Bayesian models, as of version 2.0 of `survHE`, it is necessary to install the additional packages [`survHEinla`](https://github.com/giabaio/survHEinla) and/or [`survHEhmc`](https://github.com/giabaio/survHEhmc), which are available from this GitHub repository. The reason for this structural change is that in this way, the basic backbone of `survHE` (available from this `main` branch of the repo) becomes a very lean package, whose installation is very quick. More details [here](https://gianluca.statistica.it/blog/2022-01-18-survhe-light/). All the functionalities are in place for `survHE` to easily extend to the Bayesian versions, once one or both of the additional "modules" is also installed.
12 |
13 | ## Installation
14 | The most updated version can be installed using the following code.
15 | ```R
16 | install.packages(
17 | "survHE",
18 | repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org")
19 | )
20 | ```
21 |
22 | To run the Bayesian versions of the models, you also need to install the ancillary packages
23 | ```R
24 | # Bayesian models using HMC/Stan
25 | install.packages(
26 | "survHEhmc",
27 | repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org"),
28 | dependencies=TRUE
29 | )
30 |
31 | # Bayesian models using INLA
32 | install.packages(
33 | "survHEinla",
34 | repos = c(
35 | "https://giabaio.r-universe.dev",
36 | "https://cloud.r-project.org",
37 | "https://inla.r-inla-download.org/R/stable"
38 | ),
39 | dependencies=TRUE
40 | )
41 | ```
42 | (these two are optional, in some sense, so you don't *have* to, unless you want to do the right thing and be Bayesian about it... :wink:)
43 |
--------------------------------------------------------------------------------
/data/TA174.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/giabaio/survHE/578ee3f602ebc0896b70585614b9e8b574e6a30f/data/TA174.RData
--------------------------------------------------------------------------------
/data/datalist:
--------------------------------------------------------------------------------
1 | data: ID_patient time censored arm sex age imd ethnic
2 | ta174: patid treat prog death prog_t death_t prog_ty death_ty
3 | msmdata: id from to trans Tstart Tstop time status treat patid prog death prog_t death_t prog_ty death_ty
4 |
--------------------------------------------------------------------------------
/data/msmdata.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/giabaio/survHE/578ee3f602ebc0896b70585614b9e8b574e6a30f/data/msmdata.RData
--------------------------------------------------------------------------------
/data/survtrial.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/giabaio/survHE/578ee3f602ebc0896b70585614b9e8b574e6a30f/data/survtrial.RData
--------------------------------------------------------------------------------
/man-roxygen/refs.R:
--------------------------------------------------------------------------------
1 | #' @references G Baio (2019). survHE: Survival analysis for health economic evaluation
2 | #' and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95,
3 | #' 14, 1-47.
4 |
5 |
--------------------------------------------------------------------------------
/man/data.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{data}
5 | \alias{data}
6 | \title{A fictional survival trial.}
7 | \format{
8 | A data frame with 367 rows and 8 variables:
9 | \describe{
10 | \item{ID_patient}{The individual level identifier}
11 | \item{time}{The observed time at which the event happens}
12 | \item{censored}{An indicator to describe whether the
13 | event is fully observed or censored}
14 | \item{arm}{An indicator for the treatment arm, with
15 | 0 = control and 1 = active treatment}
16 | \item{sex}{An indicator for the individual's sex, with
17 | 0 = male and 1 = female}
18 | \item{age}{A numeric variable with the individual's age}
19 | \item{imd}{A categorical variable representing a measure
20 | of area-level social deprivation}
21 | \item{ethnic}{A categorical variable representing the
22 | individual's ethnic group, as measured from a Census}
23 | }
24 | }
25 | \usage{
26 | data
27 | }
28 | \description{
29 | A dataset containing fictional data from a trial, where
30 | the main outcome is in terms of time-to-event and
31 | censoring indicator and with additional covariates.
32 | }
33 | \keyword{datasets}
34 |
--------------------------------------------------------------------------------
/man/digitise.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/digitise.R
3 | \name{digitise}
4 | \alias{digitise}
5 | \title{Format digitised data for use in survival analysis}
6 | \usage{
7 | digitise(
8 | surv_inp,
9 | nrisk_inp,
10 | km_output = "KMdata.txt",
11 | ipd_output = "IPDdata.txt"
12 | )
13 | }
14 | \arguments{
15 | \item{surv_inp}{a txt file obtained for example by DigitizeIT and containing
16 | the input survival times from graph reading. This file contains 3 columns
17 | 'ID' = the row-ID
18 | 'time' = the vector of times captured by the digitisation process
19 | 'survival' = the vector of survival probabilities captured by the digitisation
20 | process}
21 |
22 | \item{nrisk_inp}{a txt file obtained by DigitizeIT and containing the
23 | reported number at risk. This contains the following columns:
24 | 'Interval' = the ID of the various intervals included in the analysis (
25 | eg 1, 2, 3, ...)
26 | 'Time' = the actual time shown on the x-axis in the digitsed graph
27 | 'Lower' = the row of the extracted co-ordinates that the time corresponds
28 | to
29 | 'Upper' = the row of the extracted co-ordinates for which the time is less
30 | than the following time at which we have a number at risk
31 | 'nrisk' = the actual number at risk as specified in the original data}
32 |
33 | \item{km_output}{the name of the file to which the KM data will be written}
34 |
35 | \item{ipd_output}{the name of the file to which the individual level data
36 | data will be written}
37 | }
38 | \description{
39 | Produces txt files with Kaplan Meier and individual level survival data from
40 | digitised Kaplan Meier curves obtained by DigitizeIT
41 | }
42 | \examples{
43 | \dontrun{
44 | # Defines the txt files to be used as inputs
45 | surv.inp <- system.file("extdata", "survival.txt", package = "survHE")
46 | nrisk.inp <- system.file("extdata", "nrisk.txt", package = "survHE")
47 | # Runs 'digitise' to create the relevant output files
48 | digitise(surv.inp, nrisk.inp)
49 | }
50 | }
51 | \references{
52 | G Baio (2019). survHE: Survival analysis for health economic evaluation
53 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95,
54 | 14, 1-47. \url{doi:10.18637/jss.v095.i14}
55 | }
56 | \author{
57 | Patricia Guyot and Gianluca Baio
58 | }
59 | \keyword{Digitized}
60 | \keyword{Kaplan}
61 | \keyword{Meier}
62 | \keyword{curve}
63 |
--------------------------------------------------------------------------------
/man/fit.models.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/fit_models.R
3 | \name{fit.models}
4 | \alias{fit.models}
5 | \title{Fit parametric survival analysis for health economic evaluations}
6 | \usage{
7 | fit.models(formula = NULL, data, distr = NULL, method = "mle", ...)
8 | }
9 | \arguments{
10 | \item{formula}{a formula specifying the model to be used, in the form
11 | \code{Surv(time,event)~treatment[+covariates]} for flexsurv, or
12 | \code{inla.surv(time,event)~treatment[+covariates]} for INLA}
13 |
14 | \item{data}{A data frame containing the data to be used for the analysis.
15 | This must contain data for the 'event' variable. In case there is no
16 | censoring, then \code{event} is a column of 1s.}
17 |
18 | \item{distr}{a (vector of) string(s) containing the name(s) of the model(s)
19 | to be fitted. Available options are:
20 |
21 | \code{flexsurv}:
22 | "exponential","gamma","genf","gengamma","gompertz","weibull",
23 | "weibullPH","loglogistic","lognormal" \code{INLA}:
24 | "exponential","weibull","lognormal","loglogistic" \code{hmc}:
25 | "exponential","gamma","genf","gengamma","gompertz","weibull","weibullPH",
26 | "loglogistic","lognormal"}
27 |
28 | \item{method}{A string specifying the inferential method (\code{'mle'},
29 | \code{'inla'} or \code{'hmc'}). If \code{method} is set to \code{'hmc'},
30 | then \code{survHE} will write suitable model code in the Stan language
31 | (according to the specified distribution), prepare data and initial values
32 | and then run the model.}
33 |
34 | \item{\dots}{Additional options (for INLA or HMC).
35 |
36 | \strong{INLA} specific options \code{dz} = defines the step length for the grid
37 | search over the hyperparameters space (default = 0.1) \code{diff.logdens} =
38 | defines the difference in the log-density for the hyperparameters to stop
39 | integration (default = 5) \code{control.fixed} = defines the default for the
40 | priors, unless specified by the user. Default values are prior mean = 0 for
41 | \emph{all} fixed effects prior var = 1000 for \emph{all} fixed effects prior mean = 0
42 | for the intercept prior prec -> 0 for the intercept \code{control.family} =
43 | a list of options. If distr is a vector, then can be provided as a named
44 | list of options, for example something like this:
45 | \code{control.family=list(weibull=list(param=c(.1,.1)),lognormal=list(initial=2))}
46 | the names of the elements of the list need to be the same as those given in
47 | the vector \code{distr}
48 |
49 | \strong{HMC} specific options \code{chains} = number of chains to run in the HMC
50 | (default = 2) \code{iter} = total number of iterations (default = 2000)
51 | \code{warmup} = number of warmup iterations (default = iter/2) \code{thin} =
52 | number of thinning (default = 1) \code{control} = a list specifying
53 | Stan-related options, eg \code{control=list(adapt_delta=0.85)} (default =
54 | NULL) \code{seed} = the random seed (to make things replicable) \code{pars}
55 | = a vector of parameters (string, default = NA) \code{include} = a logical
56 | indicator (if FALSE, then the pars are not saved; default = TRUE)
57 | \code{priors} = a list (of lists) specifying the values for the parameters
58 | of the prior distributions in the models \code{save.stan} = a logical
59 | indicator (default = FALSE). If TRUE, then saves the data list for Stan and
60 | the model file(s)}
61 | }
62 | \value{
63 | \item{models}{ A list containing the fitted models. These contain
64 | the output from the original inference engine (\code{flexsurv}, \code{INLA}
65 | or \code{rstan}). Can be processed using the methods specific to the
66 | original packages, or via \code{survHE}-specific methods (such as
67 | \code{plot}, \code{print}) or other specialised functions (eg to extrapolate
68 | the survival curves, etc). } \item{model.fitting}{ A list containing the
69 | output of the model-fit statistics (AIC, BIC, DIC). The AIC and BIC are
70 | estimated for all methods, while the DIC is only estimated when using
71 | Bayesian inference. } \item{method}{ A string indicating the method used to
72 | fit the model, ie \code{'mle'}, \code{'inla'} or \code{'hmc'}. }
73 | \item{misc}{ A list containing the time needed to run the model(s) (in
74 | seconds), the formula used, the results of the Kaplan-Meier analysis (which
75 | is automatically performed using \code{npsurv}) and the original data frame.
76 | }
77 | }
78 | \description{
79 | Runs the survival analysis with several useful options, using either MLE
80 | (via flexsurv) or a Bayesian approach (via R-INLA or rstan)
81 | }
82 | \details{
83 | On object in the class \code{survHE} containing the following elements
84 | }
85 | \examples{
86 | \dontrun{
87 | # Loads an example dataset from 'flexsurv'
88 | data(bc)
89 |
90 | # Fits the same model using the 3 inference methods
91 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
92 | distr="exp",method="mle")
93 | inla = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
94 | distr="exp",method="inla")
95 | hmc = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
96 | distr="exp",method="hmc")
97 |
98 | # Prints the results in comparable fashion using the survHE method
99 | print(mle)
100 | print(inla)
101 | print(hmc)
102 |
103 | # Or visualises the results using the original packages methods
104 | print(mle,original=TRUE)
105 | print(inla,original=TRUE)
106 | print(hmc,original=TRUE)
107 |
108 | # Plots the survival curves and estimates
109 | plot(mle)
110 | plot(mle,inla,hmc,labs=c("MLE","INLA","HMC"),colors=c("black","red","blue"))
111 | }
112 |
113 | }
114 | \references{
115 | G Baio (2019). survHE: Survival analysis for health economic evaluation
116 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95,
117 | 14, 1-47. \url{doi:10.18637/jss.v095.i14}
118 | }
119 | \seealso{
120 | \code{make.surv}
121 | }
122 | \author{
123 | Gianluca Baio
124 | }
125 | \keyword{Approximation}
126 | \keyword{Bayesian}
127 | \keyword{Carlo}
128 | \keyword{Hamiltonian}
129 | \keyword{Integrated}
130 | \keyword{Laplace}
131 | \keyword{Monte}
132 | \keyword{Nested}
133 | \keyword{Parametric}
134 | \keyword{inference}
135 | \keyword{models}
136 | \keyword{survival}
137 | \keyword{via}
138 |
--------------------------------------------------------------------------------
/man/make.ipd.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/make.ipd.R
3 | \name{make.ipd}
4 | \alias{make.ipd}
5 | \title{Create an individual level dataset from digitised data}
6 | \usage{
7 | make.ipd(ipd_files, ctr = 1, var.labs = c("time", "event", "arm"))
8 | }
9 | \arguments{
10 | \item{ipd_files}{a list including the names of the IPD files created as
11 | output of digitise}
12 |
13 | \item{ctr}{the index of the file associated with the control arm (default,
14 | the first file). This will be coded as 0}
15 |
16 | \item{var.labs}{a vector of labels for the column of the resulting data
17 | matrix. NB these should match the arguments to the formula specified for
18 | fit.models. The user can specify values. These should be 4 elements (ID,
19 | TIME, EVENT, ARM)}
20 | }
21 | \description{
22 | Piles in the simulated IPD resulting from running digitise for more than one
23 | treatment arm
24 | }
25 | \examples{
26 | \dontrun{
27 | # Defines the txt files to be used as inputs
28 | surv.inp <- system.file("extdata", "survival.txt", package = "survHE")
29 | nrisk.inp <- system.file("extdata", "nrisk.txt", package = "survHE")
30 | # Runs 'digitise' to create the relevant output files
31 | digitise(surv.inp, nrisk.inp, ipd_output = "IPD.txt")
32 | # Now uses 'make.ipd' to create the pseudo-data
33 | make.ipd("IPD.txt", ctr = 1, var.labs = c("time", "event", "arm"))
34 | }
35 | }
36 | \references{
37 | Something will go here
38 | }
39 | \seealso{
40 | Something will go here
41 | }
42 | \author{
43 | Gianluca Baio
44 | }
45 | \keyword{Digitized}
46 | \keyword{Kaplan}
47 | \keyword{Meier}
48 | \keyword{curve}
49 |
--------------------------------------------------------------------------------
/man/make.surv.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/make.surv.R
3 | \name{make.surv}
4 | \alias{make.surv}
5 | \title{Engine for Probabilistic Sensitivity Analysis on the survival curves}
6 | \usage{
7 | make.surv(fit, mod = 1, t = NULL, newdata = NULL, nsim = 1, ...)
8 | }
9 | \arguments{
10 | \item{fit}{the result of the call to the \code{fit.models} function,
11 | containing the model fitting (and other relevant information)}
12 |
13 | \item{mod}{the index of the model. Default value is 1, but the user can
14 | choose which model fit to visualise, if the call to \code{fit.models} has a vector
15 | argument for \code{distr} (so many models are fitted & stored in the same object)}
16 |
17 | \item{t}{the time vector to be used for the estimation of the survival curve}
18 |
19 | \item{newdata}{a list (of lists), specifying the values of the covariates
20 | at which the computation is performed. For example
21 | \code{list(list(arm=0),list(arm=1))} will create two survival curves, one
22 | obtained by setting the covariate \code{arm} to the value 0 and the other by
23 | setting it to the value 1. In line with \code{flexsurv} notation, the user
24 | needs to either specify the value for \emph{all} the covariates or for none (in
25 | which case, \code{newdata=NULL}, which is the default). If some value is
26 | specified and at least one of the covariates is continuous, then a single
27 | survival curve will be computed in correspondence of the average values of
28 | all the covariates (including the factors, which in this case are expanded
29 | into indicators).}
30 |
31 | \item{nsim}{The number of simulations from the distribution of the survival
32 | curves. Default at \code{nsim=1}, in which case uses the point estimate for
33 | the relevant distributional parameters and computes the resulting survival
34 | curve}
35 |
36 | \item{...}{Additional options}
37 | }
38 | \description{
39 | Creates the survival curves for the fitted model(s)
40 | }
41 | \examples{
42 | \dontrun{
43 | # Loads an example dataset from 'flexsurv'
44 | data(bc)
45 |
46 | # Fits the same model using the 3 inference methods
47 | mle <- fit.models(formula=Surv(recyrs,censrec) ~ group, data=bc,
48 | distr="exp", method="mle")
49 | p.mle <- make.surv(mle)
50 | psa.plot(p.mle)
51 |
52 | # Can also use the main 'plot' function to visualise the survival curves
53 | # and include uncertainty by using a number 'nsim' of simulations
54 | plot(mle, nsim=10)
55 | }
56 |
57 | }
58 | \references{
59 | G Baio (2019). survHE: Survival analysis for health economic evaluation
60 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95,
61 | 14, 1-47. \url{doi:10.18637/jss.v095.i14}
62 | }
63 | \seealso{
64 | \code{\link{fit.models}}, \code{\link{psa.plot}}, \code{\link{write.surv}}
65 | }
66 | \author{
67 | Gianluca Baio
68 | }
69 | \keyword{Bootstrap}
70 | \keyword{Probabilistic}
71 | \keyword{Survival}
72 | \keyword{analysis}
73 | \keyword{models}
74 | \keyword{sensitivity}
75 |
--------------------------------------------------------------------------------
/man/make.transition.probs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/make.transition.probs.R
3 | \name{make.transition.probs}
4 | \alias{make.transition.probs}
5 | \title{make.transition.probs}
6 | \usage{
7 | make.transition.probs(fit, labs = NULL, ...)
8 | }
9 | \arguments{
10 | \item{fit}{an object obtained as output of the call to \code{fit.models}}
11 |
12 | \item{labs}{a vector with labels to identify the 'profiles' ie the
13 | combination of covariates that have been passed onto the model formula.
14 | If 'NULL' (default), then figures it out from the 'survHE' object.}
15 |
16 | \item{...}{additional arguments. Includes the standard inputs to the
17 | call to \code{make.surv}, so \code{mod} (the index of the possibly many
18 | models stored in the 'survHE' object), \code{t} (the vector of times
19 | over which to compute the survival curves), \code{newdata} (a list that
20 | defines the profile of covariates) and \code{nsim} (the number of
21 | simulations to use - default is \code{nsim}=1)}
22 | }
23 | \value{
24 | A tibble 'lambda' with an indicator for the treatment arm,
25 | the times at which the probabilities have been computed and \code{nsim}
26 | columns each with a simulation of the transition probabilities for
27 | all the times specified by the user
28 | }
29 | \description{
30 | Computes the transition probabilities (to be passed to a Markov model) from
31 | the cumulative hazard curves obtained using \code{fit.models}, using the formula
32 | p(t)=1-exp(H(t-k)/H(t)), where k is the Markov model cycle length (or the
33 | difference across two consecutive times) and t is a generic time
34 | }
35 | \note{
36 | Something will go here
37 | }
38 | \examples{
39 | \dontrun{
40 | # Something will go here
41 | }
42 |
43 | }
44 | \references{
45 | Something will go here
46 | }
47 | \seealso{
48 | \code{\link{make.surv}}
49 | }
50 | \author{
51 | Gianluca Baio
52 | }
53 | \keyword{Markov}
54 | \keyword{Transition}
55 | \keyword{models}
56 | \keyword{probabilities}
57 |
--------------------------------------------------------------------------------
/man/make_data_multi_state.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/make.transition.probs.R
3 | \name{make_data_multi_state}
4 | \alias{make_data_multi_state}
5 | \title{make_data_multi_state}
6 | \usage{
7 | make_data_multi_state(
8 | data,
9 | id = "id",
10 | prog = "prog",
11 | death = "death",
12 | prog_t = "prog_t",
13 | death_t = "death_t",
14 | keep = NULL,
15 | ...
16 | )
17 | }
18 | \arguments{
19 | \item{data}{dataset containing the full ILD with information on both
20 | progression and death. Can be a data.frame or a tibble}
21 |
22 | \item{id}{The column with the individual identifier. Can be NULL (in
23 | which case, it will be created from scratch)}
24 |
25 | \item{prog}{The progression indicator: takes value 1 if the individual
26 | has progressed and 0 otherwise. Defaults to the column named 'prog' in
27 | the dataset}
28 |
29 | \item{death}{The death indicator: takes value 1 if the individual
30 | has died and 0 otherwise. Defaults to the column named 'death' in
31 | the dataset}
32 |
33 | \item{prog_t}{The progression time. Defaults to the column named
34 | 'prog_t' in the dataset}
35 |
36 | \item{death_t}{The death time. Defaults to the column named
37 | 'death_t' in the dataset}
38 |
39 | \item{keep}{A vector of strings with the names of the additional
40 | variables from the original dataset to keep into the multistate
41 | dataset. If 'NULL' (default), then keeps all}
42 |
43 | \item{...}{additional arguments.}
44 | }
45 | \value{
46 | A tibble containing the event history for each individual
47 | and with the following variables: id = Patients ID; from = Initial
48 | state (1=Pre-progression, 2=Progression, 3=Death); to = End state
49 | (1=Pre-progression, 2=Progression, 3=Death); trans = Transition ID:
50 | 1=Pre-progression -> Progression; 2=Pre-Progression -> Death;
51 | 3=Progression -> Death; Tstart = Entry time (either entry or
52 | progression); Tstop = Exit time (time of event or censoring time);
53 | status = Event indicator (1=yes, 0=censored), \strong{for the specific
54 | event under consideration}; treat = Treatment indicator
55 | All the other original variables are appended to these, but can be
56 | removed
57 | }
58 | \description{
59 | Takes as input an individual-level dataset including data on both
60 | progression and death time (\strong{jointly}) and manipulates it using
61 | \code{dplyr} functions to create a full "multi-state" dataset, in
62 | which all the transitions are tracked. This can then be used
63 | to fit survival models and compute all the estimates for the
64 | whole set of transition probabilities
65 | }
66 | \note{
67 | Something will go here
68 | }
69 | \examples{
70 | \dontrun{
71 | # Something will go here
72 | }
73 |
74 | }
75 | \references{
76 | Something will go here
77 | }
78 | \seealso{
79 | Something will go here
80 | }
81 | \author{
82 | Gianluca Baio
83 | }
84 | \keyword{Markov}
85 | \keyword{Multistate}
86 | \keyword{Transition}
87 | \keyword{models}
88 | \keyword{probabilities}
89 |
--------------------------------------------------------------------------------
/man/make_newdata.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils_plot_survHE.R
3 | \name{make_newdata}
4 | \alias{make_newdata}
5 | \title{Creates a 'newdata' list to modify the plots for specific individual
6 | profiles (with respect to the covariates)}
7 | \usage{
8 | make_newdata(data, vars, conts = NULL)
9 | }
10 | \arguments{
11 | \item{data}{The original dataset that has been used as input to the call
12 | to 'fit.models'}
13 |
14 | \item{vars}{A vector of strings, including the names of the variables that
15 | are to be used to construct specific profiles of individual covariates}
16 |
17 | \item{conts}{A subset of 'vars', which include the named covariates that
18 | are continuous. These will be averaged over, while for the remaining
19 | covariates (assumed to be factors), the specific profiles will be listed.
20 | Defaults to NULL}
21 | }
22 | \value{
23 | \item{newdata}{The list 'newdata' to be passed as optional argument
24 | to a call to the 'plot' method}
25 |
26 | \item{labs}{A vector of labels (say to use in the plot, for each
27 | profile)}
28 | }
29 | \description{
30 | Creates a 'newdata' list to modify the plots for specific individual
31 | profiles (with respect to the covariates)
32 | }
33 | \note{
34 | Something will go here
35 | }
36 | \examples{
37 | \dontrun{
38 | data(bc)
39 |
40 | # Fits a model using the 'bc' data
41 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
42 | distr="exp",method="mle")
43 | # Now makes the default plot
44 | plot(mle)
45 | # Now creates a 'newdata' list to modify the plot for selected profiles
46 | newdata=make_newdata(data=bc,vars="group")
47 | # And can plot, say, only two of the three treatment arms
48 | plot(mle,newdata=newdata$newdata[c(1,3)],lab.profile=newdata$labs[c(1,3)])
49 | }
50 |
51 | }
52 | \author{
53 | Gianluca Baio
54 | }
55 | \keyword{Parametric}
56 | \keyword{models}
57 | \keyword{survival}
58 |
--------------------------------------------------------------------------------
/man/markov_trace.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/make.transition.probs.R
3 | \name{markov_trace}
4 | \alias{markov_trace}
5 | \title{Markov trace}
6 | \usage{
7 | markov_trace(mm, interventions = NULL, ...)
8 | }
9 | \arguments{
10 | \item{mm}{The output of a call to \code{three_state_mm}}
11 |
12 | \item{interventions}{A vector of labels for the interventions}
13 |
14 | \item{...}{additional arguments.}
15 | }
16 | \value{
17 | Plot
18 | }
19 | \description{
20 | Plots the Markov Trace from an object generated using \code{three_state_mm}
21 | }
22 | \note{
23 | Something will go here
24 | }
25 | \examples{
26 | \dontrun{
27 | # Something will go here
28 | }
29 |
30 | }
31 | \references{
32 | Something will go here
33 | }
34 | \seealso{
35 | make.surv, three_state_mm
36 | }
37 | \author{
38 | Gianluca Baio
39 | }
40 | \keyword{Markov}
41 | \keyword{Transition}
42 | \keyword{models}
43 | \keyword{probabilities}
44 | \keyword{trace}
45 |
--------------------------------------------------------------------------------
/man/model.fit.plot.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/model.fit.plot.R
3 | \name{model.fit.plot}
4 | \alias{model.fit.plot}
5 | \title{Graphical representation of the measures of model fitting based on
6 | Information Criteria}
7 | \usage{
8 | model.fit.plot(..., type = "aic", scale = "absolute", stacked = FALSE)
9 | }
10 | \arguments{
11 | \item{...}{Optional inputs. Must include at least one \code{survHE} object.}
12 |
13 | \item{type}{should the AIC, the BIC or the DIC plotted? (values = \code{"aic"},
14 | \code{"bic"} or \code{"dic"})}
15 |
16 | \item{scale}{If \code{scale='absolute'} (default), then plot the absolute value
17 | of the *IC. If \code{scale='relative'} then plot a rescaled version taking
18 | the percentage increase in the *IC in comparison with the best-fitting model}
19 |
20 | \item{stacked}{Should the bars be stacked and grouped by survHE object? (default=F)}
21 | }
22 | \value{
23 | A plot with the relevant model fitting statistics
24 | }
25 | \description{
26 | Plots a summary of the model fit for all the models fitted
27 | }
28 | \details{
29 | Something will go here
30 | }
31 | \examples{
32 | \dontrun{
33 | data(bc)
34 |
35 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
36 | distr=c("exp","wei","lno"),method="mle")
37 | model.fit.plot(mle)
38 | }
39 |
40 | }
41 | \references{
42 | G Baio (2019). survHE: Survival analysis for health economic evaluation
43 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95,
44 | 14, 1-47. \url{doi:10.18637/jss.v095.i14}
45 | }
46 | \seealso{
47 | \code{fit.models}
48 | }
49 | \author{
50 | Gianluca Baio
51 | }
52 | \keyword{Model}
53 | \keyword{Parametric}
54 | \keyword{fitting}
55 | \keyword{models}
56 | \keyword{survival}
57 |
--------------------------------------------------------------------------------
/man/msmdata.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{msmdata}
5 | \alias{msmdata}
6 | \title{NICE TA174 dataset in multi-state format.}
7 | \format{
8 | A tibble with 1868 rows and 16 variables:
9 | \describe{
10 | \item{id}{A numeric patient identifier}
11 | \item{from}{An indicator of the starting state.
12 | 1=Pre-progression; 2=Progression; 3=Death}
13 | \item{to}{An indicator for the arriving state}
14 | \item{trans}{A code for the actual transition considered.
15 | 1=Pre-progression -> Progression; 2=Pre-progression ->
16 | Death; 3=Progression -> Death}
17 | \item{Tstart}{The time of entry into the observation}
18 | \item{Tstop}{The time of exit from observation}
19 | \item{time}{The observed time until even (progression or
20 | death), or censoring occurs}
21 | \item{status}{The event indicator; takes value 1 if the
22 | underlying event (which varies depending on which
23 | transition is being considered) happens and 0 otherwise}
24 | \item{treat}{The treatment indicator. 1=rituximab
25 | in combination with fludarabine andcyclophosphamide
26 | (RFC); 0=fludarabine and cyclo-phosphamide alone (FC)}
27 | \item{patid}{The original numeric patient identifier}
28 | \item{prog}{The original indicator to describe whether
29 | the patient has experience a progression}
30 | \item{death}{The original indicator to describe whether
31 | the patient has experience death}
32 | \item{prog_t}{The original observed time at progression,
33 | or the time at which the patient has been censored;
34 | measured in months}
35 | \item{death_t}{The original observed time at death, or
36 | the time at which the patient has been censored;
37 | measured in months}
38 | \item{prog_ty}{The original observed time at progression,
39 | or the time at which the patient has been censored;
40 | measured in years}
41 | \item{death_ty}{The original observed time at death, or
42 | the time at which the patient has been censored;
43 | measured in years}
44 | }
45 | }
46 | \usage{
47 | msmdata
48 | }
49 | \description{
50 | These are the same data contained in NICE TA174, as
51 | made publicly available as part of the supplementary
52 | material for Williams et al (2017). Medical Decision
53 | Making, 37;427-439. However, the data have been
54 | restructured (by using the function \code{make_data_multi_state()})
55 | to be used for multi-state analysis
56 | }
57 | \keyword{datasets}
58 |
--------------------------------------------------------------------------------
/man/plot.survHE.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plot.survHE.R
3 | \name{plot.survHE}
4 | \alias{plot.survHE}
5 | \title{Plot survival curves for the models fitted using \code{fit.models}}
6 | \usage{
7 | \method{plot}{survHE}(...)
8 | }
9 | \arguments{
10 | \item{...}{Must include at least one result object saved as
11 | the call to the \code{fit.models} function. Nay include other
12 | optional parameters. These include whether the KM curve should be
13 | added \code{add.km} and whether the user specifies a profile of covariates
14 | (in the list \code{newdata}). Other possibilities are additional
15 | (mainly graphical) options. These are:
16 | \itemize{
17 | \item \code{xlab} = a string with the label for the x-axis (default = "time")
18 | \item \code{ylab} = a string with the label for the y-axis (default = "Survival")
19 | \item \code{lab.profile} = a (vector of) string(s) indicating the labels associated with the strata defining the different
20 | survival curves to plot. Default to the value used by the Kaplan Meier
21 | estimate given in \code{fit.models}.
22 | \item \code{newdata} = a list (of lists) providing the values for the relevant covariates If NULL, then will use
23 | the mean values for the covariates if at least one is a continuous variable,
24 | or the combination of the categorical covariates.
25 | \item \code{xlim} = a vector determining the limits for the x-axis
26 | \item \code{colors} = a vector of characters defining the colours in which to plot the different survival curves
27 | \item \code{what} = a string indicating whether the survival, hazard or
28 | cumulative hazard curve should be plotted. Defaults to 'survival', but the
29 | other two options can be specified as 'hazard' or 'cumhazard'
30 | \item \code{lab.profile} = a vector of characters defining the names of the models fitted
31 | \item \code{add.km} = TRUE (whether to also add the Kaplan Meier estimates of the data)
32 | \item \code{annotate} = FALSE (whether to also add text to highlight the observed vs
33 | extrapolated data)
34 | \item \code{legend.position} = a vector of proportions to place the legend. Default
35 | to 'c(.75,.9)', which means 75\% across the x-axis and 90\% across the y-axis
36 | \item \code{legend.title} = suitable instructions to format the title of the legend;
37 | defaults to 'element_text(size=15,face="bold")' but there may be other
38 | arguments that can be added (using 'ggplot' facilities)
39 | \item \code{legend.text} = suitable instructions to format the text of the legend;
40 | defaults to 'element_text(colour="black", size=14, face="plain")' but there
41 | may be other arguments that can be added (using 'ggplot' facilities)
42 | }}
43 | }
44 | \description{
45 | Plots the results of model fit.
46 | }
47 | \examples{
48 | \dontrun{
49 | data(bc)
50 |
51 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
52 | distr="exp",method="mle")
53 | inla = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
54 | distr="exp",method="inla")
55 | plot(MLE=mle,INLA=inla)
56 | }
57 |
58 | }
59 | \references{
60 | G Baio (2019). survHE: Survival analysis for health economic evaluation
61 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95,
62 | 14, 1-47. \url{doi:10.18637/jss.v095.i14}
63 | }
64 | \seealso{
65 | \code{\link{fit.models}}, \code{\link{write.surv}}
66 | }
67 | \author{
68 | Gianluca Baio
69 | }
70 | \keyword{Parametric}
71 | \keyword{models}
72 | \keyword{survival}
73 |
--------------------------------------------------------------------------------
/man/plot_transformed_km.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plot_tranformed_km.R
3 | \name{plot_transformed_km}
4 | \alias{plot_transformed_km}
5 | \title{Plot to assess suitability of parametric model}
6 | \usage{
7 | plot_transformed_km(fit, mod = 1, add_legend = FALSE, graph = "base", ...)
8 | }
9 | \arguments{
10 | \item{fit}{An object of class survHE.}
11 |
12 | \item{mod}{Index or name of a model in fit. Defaults to 1.}
13 |
14 | \item{add_legend}{If \code{TRUE}, labels assumptions. Defaults to \code{FALSE}.}
15 |
16 | \item{graph}{Type of plot: base or ggplot2.}
17 |
18 | \item{\dots}{Further arguments, passed on to plot.}
19 | }
20 | \value{
21 | Diagnostic plot
22 | }
23 | \description{
24 | Perform an exploratory investigation for linearity of
25 | transformed survival models.
26 | }
27 | \details{
28 | For the Weibull, twice taking logs of the survivor function
29 |
30 | \deqn{log(-log S(t)) = log \lambda + \gamma log t}
31 |
32 | A plot of \eqn{log(-log S(t))} against \eqn{log(t)} would give an approximately
33 | straight line if the Weibull assumption is reasonable.
34 | The plot could also be used to give a rough estimate of the parameters.
35 |
36 | Similarly, for the log-logistic distribution
37 |
38 | \deqn{logS(t)/(1 - S(t)) = \theta - \kappa log t}
39 |
40 | For the log-normal distribution
41 |
42 | \deqn{\Phi^{-1} (1 - S(t)) = (log t - \mu) / \sigma}
43 |
44 | We can also check the assumption made with using the Cox regression model
45 | of proportional hazards by inspecting the log-cumulative hazard plot.
46 |
47 | \deqn{log H_i(t) = \beta x_i + log H_0(t)}
48 |
49 | The transformed curves for different values of the explanatory variables
50 | will be parallel if PH holds.
51 | }
52 | \examples{
53 |
54 | data(bc)
55 | form <- formula("Surv(recyrs, censrec) ~ group")
56 |
57 | # exponential distribution
58 | fit_exp <- fit.models(form, data = bc,
59 | distr = "exp", method = "mle")
60 | plot_transformed_km(fit_exp)
61 | plot_transformed_km(fit_exp, graph = "ggplot2")
62 |
63 | # weibull distribution
64 | fit_wei <- fit.models(form, data = bc,
65 | distr = "weibull", method = "mle")
66 | plot_transformed_km(fit_wei)
67 | plot_transformed_km(fit_wei, graph = "ggplot2")
68 |
69 | # loglogistic distribution
70 | fit_llog <- fit.models(form, data = bc,
71 | distr = "loglogistic", method = "mle")
72 | plot_transformed_km(fit_llog)
73 | plot_transformed_km(fit_llog, graph = "ggplot2")
74 |
75 | # lognormal distribution
76 | fit_lnorm <- fit.models(form, data = bc,
77 | distr = "lognormal", method = "mle")
78 | plot_transformed_km(fit_lnorm)
79 | plot_transformed_km(fit_lnorm, graph = "ggplot2")
80 |
81 | ## for only one group
82 | form <- formula("Surv(recyrs, censrec) ~ 1")
83 |
84 | fit_exp <- fit.models(form, data = bc,
85 | distr = "exp", method = "mle")
86 | plot_transformed_km(fit_exp)
87 | plot_transformed_km(fit_exp, graph = "ggplot2")
88 |
89 | }
90 | \references{
91 | Collett (2015) Modelling Survival Data in Medical Research, CRC Press
92 | }
93 | \author{
94 | William Browne, Nathan Green
95 | }
96 | \keyword{hplot}
97 | \keyword{survival}
98 |
--------------------------------------------------------------------------------
/man/print.survHE.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/print.survHE.R
3 | \name{print.survHE}
4 | \alias{print.survHE}
5 | \title{Print a summary of the survival model(s) fitted by \code{fit.models}}
6 | \usage{
7 | \method{print}{survHE}(x, mod = 1, ...)
8 | }
9 | \arguments{
10 | \item{x}{the \code{survHE} object (the output of the call to
11 | \code{fit.models})}
12 |
13 | \item{mod}{is the index of the model. Default value is 1, but the user can
14 | choose which model fit to visualise, if the call to fit.models has a vector
15 | argument for distr (so many models are fitted & stored in the same object)}
16 |
17 | \item{\dots}{additional options, including: \code{digits} = number of
18 | significant digits to be shown in the summary table (default = 6)
19 | \code{original} = a flag to say whether the \emph{original} table
20 | from either \code{flexsurv} or \code{INLA} or \code{rstan} should be printed}
21 | }
22 | \description{
23 | Prints the summary table for the model(s) fitted, with the estimate of the
24 | parameters
25 | }
26 | \examples{
27 | \dontrun{
28 | data(bc)
29 |
30 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
31 | distr="exp",method="mle")
32 | print(mle)
33 | }
34 |
35 | }
36 | \references{
37 | G Baio (2019). survHE: Survival analysis for health economic evaluation
38 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95,
39 | 14, 1-47. \url{doi:10.18637/jss.v095.i14}
40 | }
41 | \author{
42 | Gianluca Baio
43 | }
44 | \keyword{Parametric}
45 | \keyword{models}
46 | \keyword{survival}
47 |
--------------------------------------------------------------------------------
/man/psa.plot.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/psa.plot.R
3 | \name{psa.plot}
4 | \alias{psa.plot}
5 | \title{Graphical depiction of the probabilistic sensitivity analysis for the
6 | survival curves}
7 | \usage{
8 | psa.plot(psa, ...)
9 | }
10 | \arguments{
11 | \item{psa}{the result of the call to the function \code{make.surv}}
12 |
13 | \item{...}{Optional graphical parameters, such as:
14 | \itemize{
15 | \item \code{xlab} = label for the x-axis
16 | \item \code{ylab} = label for the y-axis
17 | \item \code{col} = (vector) of colours for the lines to be plotted
18 | \item \code{alpha} = the level of transparency for the curves (default = 0.2)
19 | }}
20 | }
21 | \description{
22 | Plots the survival curves for all the PSA simulations. The function is
23 | actually deprecated - similar graphs can be obtained directly using
24 | the \code{plot} method (with options), which allows a finer depiction
25 | of the results.
26 | }
27 | \examples{
28 | \dontrun{
29 | data(bc)
30 |
31 | # Fits the same model using the 3 inference methods
32 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
33 | distr="exp",method="mle")
34 | p.mle = make.surv(mle,nsim=100)
35 | psa.plot(p.mle)
36 | }
37 |
38 | }
39 | \references{
40 | G Baio (2019). survHE: Survival analysis for health economic evaluation
41 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95,
42 | 14, 1-47. \url{doi:10.18637/jss.v095.i14}
43 | }
44 | \seealso{
45 | \code{\link{make.surv}}, \code{\link{write.surv}}
46 | }
47 | \author{
48 | Gianluca Baio
49 | }
50 | \keyword{Bootstrap}
51 | \keyword{Probabilistic}
52 | \keyword{Survival}
53 | \keyword{analysis}
54 | \keyword{models}
55 | \keyword{sensitivity}
56 |
--------------------------------------------------------------------------------
/man/summary.survHE.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/summary.survHE.R
3 | \name{summary.survHE}
4 | \alias{summary.survHE}
5 | \title{Prints a summary table for the distribution the mean survival time for a
6 | given model and data}
7 | \usage{
8 | \method{summary}{survHE}(object, mod = 1, t = NULL, nsim = 1000, ...)
9 | }
10 | \arguments{
11 | \item{object}{a \code{survHE} object (resulting from the call to
12 | \code{fit.models}}
13 |
14 | \item{mod}{the model to be analysed (default = 1)}
15 |
16 | \item{t}{the vector of times to be used in the computation. Default = NULL,
17 | which means the observed times will be used. NB: the vector of times should
18 | be: i) long enough so that S(t) goes to 0; and ii) dense enough so that the
19 | approximation to the AUC is sufficiently precise}
20 |
21 | \item{nsim}{the number of simulations from the survival curve distributions
22 | to be used (to compute interval estimates)}
23 |
24 | \item{\dots}{Additional options}
25 | }
26 | \value{
27 | \item{mean.surv}{ A matrix with the simulated values for the mean
28 | survival times } \item{tab}{ A summary table }
29 | }
30 | \description{
31 | Calculates the mean survival time as the area under the survival curve
32 | }
33 | \details{
34 | A list comprising of the following elements
35 | }
36 | \examples{
37 | \dontrun{
38 | data(bc)
39 |
40 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
41 | distr="exp",method="mle")
42 | summary(mle,nsim=100)
43 | }
44 |
45 | }
46 | \references{
47 | G Baio (2019). survHE: Survival analysis for health economic evaluation
48 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95,
49 | 14, 1-47. \url{doi:10.18637/jss.v095.i14}
50 | }
51 | \seealso{
52 | \code{fit.models}, \code{make.surv}
53 | }
54 | \author{
55 | Gianluca Baio
56 | }
57 | \keyword{Mean}
58 | \keyword{Parametric}
59 | \keyword{models}
60 | \keyword{survival}
61 | \keyword{time}
62 |
--------------------------------------------------------------------------------
/man/survHE-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/survHE-package.R
3 | \docType{package}
4 | \name{survHE-package}
5 | \alias{survHE}
6 | \alias{survHE-package}
7 | \title{survHE: Survival Analysis in Health Economic Evaluation}
8 | \description{
9 | Contains a suite of functions for survival analysis in health economics. These can be used to run survival models under a frequentist (based on maximum likelihood) or a Bayesian approach (both based on Integrated Nested Laplace Approximation or Hamiltonian Monte Carlo). To run the Bayesian models, the user needs to install additional modules (packages), i.e. 'survHEinla' and 'survHEhmc'. These can be installed from \url{https://giabaio.r-universe.dev/} using 'install.packages("survHEhmc", repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org"))' and 'install.packages("survHEinla", repos = c("https://giabaio.r-universe.dev", "https://cloud.r-project.org"))' respectively. 'survHEinla' is based on the package INLA, which is available for download at \url{https://inla.r-inla-download.org/R/stable/}. The user can specify a set of parametric models using a common notation and select the preferred mode of inference. The results can also be post-processed to produce probabilistic sensitivity analysis and can be used to export the output to an Excel file (e.g. for a Markov model, as often done by modellers and practitioners). \doi{10.18637/jss.v095.i14}.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 | \item \url{https://github.com/giabaio/survHE}
15 | \item \url{https://gianluca.statistica.it/software/survhe/}
16 | \item Report bugs at \url{https://github.com/giabaio/survHE/issues}
17 | }
18 |
19 | }
20 | \author{
21 | \strong{Maintainer}: Gianluca Baio \email{g.baio@ucl.ac.uk}
22 |
23 | Other contributors:
24 | \itemize{
25 | \item Andrea Berardi \email{and.be.like@gmail.com} [contributor]
26 | \item Philip Cooney \email{philip.cooney@hotmail.com} [contributor]
27 | \item Andrew Jones \email{andrew.r.johnson@postgrad.curtin.edu.au} [contributor]
28 | \item Nathan Green \email{n.green@ucl.ac.uk} [contributor]
29 | }
30 |
31 | }
32 | \keyword{internal}
33 |
--------------------------------------------------------------------------------
/man/ta174.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{ta174}
5 | \alias{ta174}
6 | \title{NICE TA174 dataset.}
7 | \format{
8 | A tibble with 810 rows and 8 variables:
9 | \describe{
10 | \item{patid}{A numeric patient identifier}
11 | \item{treat}{The treatment indicator. 1=rituximab
12 | in combination with fludarabine andcyclophosphamide
13 | (RFC); 0=fludarabine and cyclo-phosphamide alone (FC)}
14 | \item{prog}{An indicator to describe whether the
15 | patient has experience a progression}
16 | \item{death}{An indicator to describe whether the
17 | patient has experience death}
18 | \item{prog_t}{The observed time at progression, or
19 | the time at which the patient has been censored;
20 | measured in months}
21 | \item{death_t}{The observed time at death, or
22 | the time at which the patient has been censored;
23 | measured in months}
24 | \item{prog_ty}{The observed time at progression, or
25 | the time at which the patient has been censored;
26 | measured in years}
27 | \item{death_ty}{The observed time at death, or
28 | the time at which the patient has been censored;
29 | measured in years}
30 | }
31 | }
32 | \usage{
33 | ta174
34 | }
35 | \description{
36 | A dataset containing the data used for NICE TA174, as
37 | made publicly available as part of the supplementary
38 | material for Williams et al (2017). Medical Decision
39 | Making, 37;427-439.
40 | }
41 | \keyword{datasets}
42 |
--------------------------------------------------------------------------------
/man/theme_survHE.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils_plot_survHE.R
3 | \name{theme_survHE}
4 | \alias{theme_survHE}
5 | \title{A Custom ggplot2 Theme for Survival Plots}
6 | \usage{
7 | theme_survHE()
8 | }
9 | \value{
10 | A ggplot2 theme object that can be added to a ggplot.
11 | }
12 | \description{
13 | This theme is designed for use with survival analysis plots, particularly
14 | those created using the \code{survHE} package. It builds on \code{theme_bw()} and
15 | customizes axis text, titles, plot background, and legend styling.
16 | }
17 | \details{
18 | Note: To position the legend inside the plot, use an additional call to
19 | \code{theme(legend.position = c(x, y), legend.justification = c("left", "top"))}.
20 | }
21 | \examples{
22 | library(ggplot2)
23 | library(survHE)
24 | ggplot(mtcars, aes(wt, mpg)) +
25 | geom_point() +
26 | theme_survHE() +
27 | theme(legend.position = c(0.6, 0.8), legend.justification = c("left", "top"))
28 | }
29 |
--------------------------------------------------------------------------------
/man/three_state_mm.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/make.transition.probs.R
3 | \name{three_state_mm}
4 | \alias{three_state_mm}
5 | \title{three_state_mm}
6 | \usage{
7 | three_state_mm(
8 | m_12,
9 | m_13,
10 | m_23,
11 | nsim = 1,
12 | start = c(1000, 0, 0),
13 | basecase = FALSE,
14 | ...
15 | )
16 | }
17 | \arguments{
18 | \item{m_12}{A 'survHE' object (output to a call to \code{fit.models})
19 | estimating the parameters of a model for the transition from
20 | 'Pre-progression' (state 1) to 'Progressed' (state 2). Given the
21 | individual level data with the complete event history (in the object
22 | 'data'), can be done with a call like 'x=make_data_multi_state(data)'
23 | and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==1),...)}}
24 |
25 | \item{m_13}{A 'survHE' object (output to a call to \code{fit.models})
26 | estimating the parameters of a model for the transition from
27 | 'Pre-progression' (state 1) to 'Death' (state 3). Given the
28 | individual level data with the complete event history (in the object
29 | 'data'), can be done with a call like 'x=make_data_multi_state(data)'
30 | and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==2),...)}}
31 |
32 | \item{m_23}{A 'survHE' object (output to a call to \code{fit.models})
33 | estimating the parameters of a model for the transition from
34 | 'Progressed' (state 2) to 'Death' (state 3). Given the
35 | individual level data with the complete event history (in the object
36 | 'data'), can be done with a call like 'x=make_data_multi_state(data)'
37 | and then \code{fit.models(Surv(time,status)~...,data=x \%>\% filter(trans==3),...)}}
38 |
39 | \item{nsim}{The number of simulations for the model parameters that are
40 | used to compute the survival curves. Defaults to \code{nsim}=1,
41 | which simply creates one survival curve for each treatment arm.}
42 |
43 | \item{start}{A vector of initial state occupancy. By default assumes 1000
44 | individuals, all initially allocated to 'Pre-progression'}
45 |
46 | \item{basecase}{Should the base case be computed as well, based on the
47 | point estimate of the underlying model parameters? (Default=FALSE)}
48 |
49 | \item{...}{additional arguments.}
50 | }
51 | \value{
52 | A list including the state occupancy simulations in an object 'm'.
53 | This is a tibble with the number of individuals in each of the 3 states
54 | at each of the times specified by the user. If \code{nsim}>1, then the tibble
55 | also contains a simulation index to keep track of that. The list also
56 | includes the computation time to obtain the state occupancy tibble (in the
57 | object 'running_time'). If \code{basecase==TRUE}, then the function also
58 | computes the "base case scenario" (based on 1 simulation from of the
59 | underlying survival curves, i.e. the point estimate of the model parameters)
60 | and stores it in the object 'base_case'
61 | }
62 | \description{
63 | General purpose function to run a standard three-state Markov model
64 | (typically used in cancer modelling). The states are typically
65 | 'Pre-progression', 'Progressed' and 'Death'. No backward transition
66 | from 'Progressed' to 'Pre-progression' is allowed and 'Death' is
67 | obviously an absorbing state. All other transitions are possible.
68 | The crucial assumption is that \emph{individual-level data} are available
69 | recording an indicator and the time of progression and death for each
70 | individual. The function returns the full transition matrix
71 | }
72 | \note{
73 | Something will go here
74 | }
75 | \examples{
76 | \dontrun{
77 | # Something will go here
78 | }
79 |
80 | }
81 | \references{
82 | Something will go here
83 | }
84 | \seealso{
85 | make.transition.probs make_data_multi_state
86 | }
87 | \author{
88 | Gianluca Baio
89 | }
90 | \keyword{Markov}
91 | \keyword{Three-state}
92 | \keyword{Transition}
93 | \keyword{cancer}
94 | \keyword{model}
95 | \keyword{models}
96 | \keyword{probabilities}
97 |
--------------------------------------------------------------------------------
/man/write.surv.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/write.surv.R
3 | \name{write.surv}
4 | \alias{write.surv}
5 | \title{write.surv}
6 | \usage{
7 | write.surv(object, file, sheet = NULL, what = "surv")
8 | }
9 | \arguments{
10 | \item{object}{a summary.flexsurvreg object containing the survival curves
11 | (with times, estimates and interval limits)}
12 |
13 | \item{file}{a string with the full path to the file name to be saved}
14 |
15 | \item{sheet}{a string with the name of the sheet to be created}
16 |
17 | \item{what}{a string to describe what to be exported. Can either be
18 | 'surv' (default), which outputs the simulation(s) for the survival curves
19 | or 'sim', which outputs the simulation(s) for the underlying model
20 | parameters. If there are several 'profiles', they get written in
21 | separate spreadsheets and a clear indication is given as the name of the
22 | spreadsheet}
23 | }
24 | \value{
25 | A spreadsheet file with the simulation(s) of the relevant quantity
26 | }
27 | \description{
28 | Writes the survival summary to an excel file (helpful to then call the
29 | values in the Markov model)
30 | }
31 | \details{
32 | Something will go here
33 | }
34 | \examples{
35 | \dontrun{
36 | # Loads an example dataset from 'flexsurv'
37 | data(bc)
38 |
39 | # Fits the same model using the 3 inference methods
40 | mle = fit.models(formula=Surv(recyrs,censrec)~group,data=bc,
41 | distr="exp",method="mle")
42 | p.mle = make.surv(mle)
43 | write.surv(p.mle,file="test.xlsx")
44 | }
45 | }
46 | \references{
47 | G Baio (2019). survHE: Survival analysis for health economic evaluation
48 | and cost-effectiveness modelling. Journal of Statistical Software (2020). vol 95,
49 | 14, 1-47. \url{doi:10.18637/jss.v095.i14}
50 | }
51 | \seealso{
52 | \code{make.surv}
53 | }
54 | \author{
55 | Gianluca Baio
56 | }
57 | \keyword{Excel}
58 | \keyword{PSA}
59 |
--------------------------------------------------------------------------------
/survHE.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 |
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageRoxygenize: rd,collate,namespace
19 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | # This file is part of the standard setup for testthat.
2 | # It is recommended that you do not modify it.
3 | #
4 | # Where should you do additional test configuration?
5 | # Learn more about the roles of various files in:
6 | # * https://r-pkgs.org/tests.html
7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files
8 |
9 | library(testthat)
10 | library(survHE)
11 |
12 | test_check("survHE")
13 |
--------------------------------------------------------------------------------
/tests/testthat/test-make.surv.R:
--------------------------------------------------------------------------------
1 |
2 | test_that("output plots don't throw an error and return ggplot2 list", {
3 |
4 | data(bc)
5 |
6 | mle <- fit.models(formula=Surv(recyrs,censrec) ~ group, data=bc,
7 | distr=c("exponential", "weibull"), method="mle")
8 |
9 | p.mle <- make.surv(mle)
10 | p.mle2 <- make.surv(mle, nsim = 2)
11 | p.mle3 <- make.surv(mle, nsim = 2, mod = 2)
12 |
13 | expect_type(plot(mle, nsim=10), "list")
14 |
15 | expect_type(psa.plot(p.mle), "list")
16 | expect_type(psa.plot(p.mle), "list")
17 |
18 | expect_type(psa.plot(p.mle2), "list")
19 |
20 | expect_type(psa.plot(p.mle3), "list")
21 |
22 | expect_type(plot(mle, add.km=TRUE), "list")
23 | expect_type(plot(mle, add.km=TRUE, sim = 10), "list")
24 | })
25 |
--------------------------------------------------------------------------------