├── .gitattributes ├── .Rbuildignore ├── tests ├── testthat.R └── testthat │ ├── gamma_test.rda │ ├── generate_test_data_gamma.R │ ├── test-scoreExtract.R │ ├── test-options.R │ ├── parallel.R │ ├── test-scoreStatSet.R │ ├── test-scoreStat.R │ ├── test-internalGamma.R │ ├── test-internalScore.R │ ├── test-timedependent.R │ ├── test-gammaStat.R │ ├── test-gammaImputedata.R │ ├── test-validationGamma.R │ ├── test-scoreSystem.R │ └── test-validation.R ├── data ├── ScoreInd.rda └── ScoreTimeDep.rda ├── inst └── validation │ ├── jackson_simulation.pdf │ ├── score.R │ └── jackson_simulation.Rmd ├── R ├── InformativeCensoring-package.R ├── scoreStat.R ├── scoreImputedSet.R ├── tests.R ├── dataDescription.R ├── parallel.R ├── validationGamma.R ├── options.R ├── timedependent.R ├── gammaStat.R ├── scoreImputedData.R ├── generics.R ├── internalGamma.R ├── scoreStatSet.R ├── validation.R └── gammaImputeData.R ├── .travis.yml ├── man ├── ScoreStatSet.Rd ├── GammaStat.object.Rd ├── MakeTimeDepScore.Rd ├── ScoreTD.object.Rd ├── GammaImputedData.object.Rd ├── ScoreStatList.object.Rd ├── ScoreStatSet.object.Rd ├── GammaStatList.object.Rd ├── InformativeCensoring-package.Rd ├── ScoreTimeDep.Rd ├── ScoreImputedData.object.Rd ├── ExtractSingle.Rd ├── cox.zph.Rd ├── NN.options.Rd ├── ScoreStat.object.Rd ├── summary.ScoreStatSet.Rd ├── ScoreImputedSet.object.Rd ├── GammaImputedSet.object.Rd ├── ScoreInd.Rd ├── col.headings.Rd ├── ImputeStat.Rd ├── ScoreImpute.Rd └── gammaImpute.Rd ├── appveyor.yml ├── NAMESPACE ├── DESCRIPTION ├── vignettes └── bibliography.bib └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | data/* binary 3 | src/* text=lf 4 | R/* text=lf 5 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | .travis.yml 2 | ^appveyor\.yml$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library("InformativeCensoring") 3 | 4 | test_check("InformativeCensoring") -------------------------------------------------------------------------------- /data/ScoreInd.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientific-computing-solutions/InformativeCensoring/HEAD/data/ScoreInd.rda -------------------------------------------------------------------------------- /data/ScoreTimeDep.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientific-computing-solutions/InformativeCensoring/HEAD/data/ScoreTimeDep.rda -------------------------------------------------------------------------------- /tests/testthat/gamma_test.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientific-computing-solutions/InformativeCensoring/HEAD/tests/testthat/gamma_test.rda -------------------------------------------------------------------------------- /inst/validation/jackson_simulation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scientific-computing-solutions/InformativeCensoring/HEAD/inst/validation/jackson_simulation.pdf -------------------------------------------------------------------------------- /R/InformativeCensoring-package.R: -------------------------------------------------------------------------------- 1 | #' @importFrom survival Surv 2 | #' @import parallel 3 | #' @importFrom stats formula model.frame na.fail pf predict pt 4 | #' @importFrom stats qt quantile runif setNames terms update var vcov 5 | #' @importFrom utils tail 6 | NULL 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | cache: packages 3 | 4 | os: 5 | - linux 6 | - osx 7 | 8 | r_github_packages: 9 | - jimhester/covr 10 | 11 | before_install: 12 | - if [ "${TRAVIS_OS_NAME}" == "osx" ]; then sudo tlmgr install preprint url; fi 13 | 14 | after_success: 15 | - if [ "${TRAVIS_OS_NAME}" == "linux" ]; then echo 'covr::coveralls(quiet=FALSE)' | R --vanilla; fi 16 | -------------------------------------------------------------------------------- /man/ScoreStatSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoreStatSet.R 3 | \name{ScoreStatSet} 4 | \alias{ScoreStatSet} 5 | \title{S3 generic to create a \code{ScoreStatSet} object} 6 | \usage{ 7 | ScoreStatSet(x) 8 | } 9 | \arguments{ 10 | \item{x}{The object to convert into a \code{ScoreStatSet} object} 11 | } 12 | \value{ 13 | A ScoreStatSet object 14 | } 15 | \description{ 16 | S3 generic to create a \code{ScoreStatSet} object 17 | } 18 | \seealso{ 19 | \code{\link{ScoreStatSet.object}} 20 | } 21 | 22 | -------------------------------------------------------------------------------- /man/GammaStat.object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gammaStat.R 3 | \name{GammaStat.object} 4 | \alias{GammaStat.object} 5 | \title{\code{GammaStat} object} 6 | \description{ 7 | An S3 object which contains the point estimate 8 | and test statistic after fitting a model to 9 | a \code{GammaImputedData} object. 10 | } 11 | \details{ 12 | The function \code{print.GammaStat} has been implemented 13 | 14 | The object contains the following: 15 | } 16 | \section{Slots}{ 17 | 18 | \describe{ 19 | \item{\code{model}}{The model used to create the fit} 20 | 21 | \item{\code{method}}{The model used for the fit} 22 | 23 | \item{\code{estimate}}{A point estimate of the test statistic} 24 | 25 | \item{\code{var}}{The estimate for the variance of the test statistic} 26 | }} 27 | 28 | -------------------------------------------------------------------------------- /man/MakeTimeDepScore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/timedependent.R 3 | \name{MakeTimeDepScore} 4 | \alias{MakeTimeDepScore} 5 | \title{Create a valid \code{ScoreTD} object} 6 | \usage{ 7 | MakeTimeDepScore(data, Id, time.start, time.end) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame of time dependent covariates} 11 | 12 | \item{Id}{The column name of the subject Id} 13 | 14 | \item{time.start}{The covariates are valid for the time [time.start,time.end] where 15 | time.start is the column name of time.start} 16 | 17 | \item{time.end}{The covariates are valid for the time [time.start,time.end] where 18 | time.end is the column name of time.end} 19 | } 20 | \value{ 21 | A \code{ScoreTD} object 22 | } 23 | \description{ 24 | Create a valid \code{ScoreTD} object 25 | } 26 | 27 | -------------------------------------------------------------------------------- /man/ScoreTD.object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/timedependent.R 3 | \name{ScoreTD.object} 4 | \alias{ScoreTD.object} 5 | \title{A \code{ScoreTD} object} 6 | \description{ 7 | This data frame holds time dependent covariates for 8 | use with risk score imputation 9 | } 10 | \details{ 11 | The data frame contains the following columns: 12 | 'Id' for subject ID \cr 13 | 'time.start' and 'time.end' the range of time for which 14 | the covariate values are valid - i.e. [time.start,time.end] \cr 15 | Additional columns are the time dependent covariates 16 | 17 | All data for a single subject should be stored in consecutive rows, sorted 18 | by time and the starting time of a row should match the ending time of the previous row 19 | } 20 | \seealso{ 21 | \code{\link{MakeTimeDepScore}} 22 | } 23 | 24 | -------------------------------------------------------------------------------- /man/GammaImputedData.object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gammaImputeData.R 3 | \name{GammaImputedData.object} 4 | \alias{GammaImputedData.object} 5 | \title{\code{GammaImputedData} object} 6 | \description{ 7 | An object which contains 8 | } 9 | \section{Slots}{ 10 | 11 | \describe{ 12 | \item{\code{data}}{A data frame containing the time to event data 13 | with 3 new columns impute.time and impute.event, the imputed event/censoring times and event indicators 14 | (for subjects whose data is not imputed these columns contain the unchanged event/censoring time and 15 | event indicator) and internal_gamma_val which is the value of gamma used for each subject in this data set} 16 | 17 | \item{\code{default.formula}}{The default model formula which will be used when fitting the imputed data} 18 | }} 19 | 20 | -------------------------------------------------------------------------------- /man/ScoreStatList.object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoreStatSet.R 3 | \name{ScoreStatList.object} 4 | \alias{ScoreStatList.object} 5 | \title{ScoreStatList} 6 | \description{ 7 | The object containing the results of fitting models to 8 | a \code{ScoreImputedSet} object. 9 | } 10 | \details{ 11 | A \code{summary.ScoreStatList} has been implemented. 12 | 13 | The object contains the following 14 | } 15 | \section{Slots}{ 16 | 17 | \describe{ 18 | \item{\code{fits}}{A list of \code{ScoreStat} objects containing the model fits for 19 | the imputed data sets} 20 | 21 | \item{\code{statistics}}{A \code{ScoreStatSet} object containing the statistics} 22 | 23 | \item{\code{m}}{The number of model fits} 24 | }} 25 | \seealso{ 26 | \code{\link{ScoreStatSet.object}} \code{\link{ScoreStat.object}} 27 | } 28 | 29 | -------------------------------------------------------------------------------- /man/ScoreStatSet.object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoreStatSet.R 3 | \name{ScoreStatSet.object} 4 | \alias{ScoreStatSet.object} 5 | \title{An object which contains the test statistic and estimators for 6 | a set of model fits to imputed data using risk score imputation} 7 | \description{ 8 | The object is a Mx3 matrix, one row per imputed data set 9 | and columns: estimate (the point estimates), var (their variances) 10 | and Z (the test statistic). M must be > 4 11 | } 12 | \details{ 13 | Note the Z should be ~ standard normal (so we do not use the chi_squared 14 | test statistic see \code{\link{ScoreStat.object}} for further details) 15 | 16 | The summary.ScoreStatSet function will apply the MI averaging procedures 17 | and estimates of the test statistic and p-value 18 | } 19 | \seealso{ 20 | \code{\link{summary.ScoreStatSet}} 21 | } 22 | 23 | -------------------------------------------------------------------------------- /man/GammaStatList.object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gammaStat.R 3 | \name{GammaStatList.object} 4 | \alias{GammaStatList.object} 5 | \title{\code{GammaStatList} object} 6 | \description{ 7 | The object containing the results of fitting models to 8 | a \code{GammaImputedSet} object. 9 | } 10 | \details{ 11 | A \code{summary.GammaStatList} has been implemented which performs 12 | Rubin's multiple imputation rules. 13 | 14 | The object contains the following 15 | } 16 | \section{Slots}{ 17 | 18 | \describe{ 19 | \item{\code{fits}}{A list of \code{GammaStat} objects containing the model fits for 20 | the imputed data sets} 21 | 22 | \item{\code{statistics}}{A list with two elements: estimates and vars which contain the coefficient 23 | estimates and their variances one column per covariate one row per imputed data set} 24 | 25 | \item{\code{m}}{The number of model fits} 26 | }} 27 | 28 | -------------------------------------------------------------------------------- /man/InformativeCensoring-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataDescription.R 3 | \docType{package} 4 | \name{InformativeCensoring-package} 5 | \alias{InformativeCensoring} 6 | \alias{InformativeCensoring-package} 7 | \title{Perform methods of multiple imputation for 8 | time to event data} 9 | \description{ 10 | Perform methods of multiple imputation for 11 | time to event data 12 | } 13 | \details{ 14 | See Nonparametric comparison of two survival functions with 15 | dependent censoring via nonparametric multiple imputation. Hsu and Taylor 16 | Statistics in Medicine (2009) 28:462-475 for Hsu's method 17 | 18 | See Relaxing the independent censoring assumption in the Cox proportional 19 | hazards model using multiple imputation. Jackson et al., Statistics in Medicine 20 | (2014) 33:4681-4694 for Jackson's method 21 | } 22 | \author{ 23 | \email{David.Ruau@astrazeneca.com} 24 | } 25 | 26 | -------------------------------------------------------------------------------- /man/ScoreTimeDep.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataDescription.R 3 | \docType{data} 4 | \name{ScoreTimeDep} 5 | \alias{ScoreTimeDep} 6 | \title{Simulated time dependent variables for time to event data} 7 | \format{A data.frame containing 1 row per subject-visit} 8 | \description{ 9 | This data set contains time dependent covariates for the 10 | \code{\link{ScoreInd}} time to event data. 11 | } 12 | \section{Fields}{ 13 | 14 | \describe{ 15 | \item{\code{Id}}{The Subject Id} 16 | 17 | \item{\code{start}}{The covariate given in each row are for a given subject from time 'start'...} 18 | 19 | \item{\code{end}}{... until time end} 20 | 21 | \item{\code{W1}}{The value of a (binary) time dependent variable 22 | for the subject with the given 'Id' between times 'start' and 'end'} 23 | 24 | \item{\code{W2}}{The value of a (continuous) time dependent variable 25 | for the subject with the given 'Id' between times 'start' and 'end'} 26 | }} 27 | 28 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # DO NOT CHANGE the "init" and "install" sections below 2 | 3 | # Download script file from GitHub 4 | init: 5 | ps: | 6 | $ErrorActionPreference = "Stop" 7 | Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" 8 | Import-Module '..\appveyor-tool.ps1' 9 | 10 | install: 11 | ps: Bootstrap 12 | 13 | # Adapt as necessary starting from here 14 | 15 | build_script: 16 | - travis-tool.sh install_deps 17 | 18 | test_script: 19 | - travis-tool.sh run_tests 20 | 21 | on_failure: 22 | - 7z a failure.zip *.Rcheck\* 23 | - appveyor PushArtifact failure.zip 24 | 25 | artifacts: 26 | - path: '*.Rcheck\**\*.log' 27 | name: Logs 28 | 29 | - path: '*.Rcheck\**\*.out' 30 | name: Logs 31 | 32 | - path: '*.Rcheck\**\*.fail' 33 | name: Logs 34 | 35 | - path: '*.Rcheck\**\*.Rout' 36 | name: Logs 37 | 38 | - path: '\*_*.tar.gz' 39 | name: Bits 40 | 41 | - path: '\*_*.zip' 42 | name: Bits 43 | -------------------------------------------------------------------------------- /man/ScoreImputedData.object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoreImputedData.R 3 | \name{ScoreImputedData.object} 4 | \alias{ScoreImputedData.object} 5 | \title{\code{ScoreImputedData} object} 6 | \description{ 7 | An object which contains 8 | } 9 | \section{Slots}{ 10 | 11 | \describe{ 12 | \item{\code{data}}{A data frame containing the time to event data 13 | with 2 new columns impute.time and impute.event, the imputed event/censoring times and event indicators 14 | (for subjects whose data is not imputed these columns contain the unchanged event/censoring time and 15 | event indicator )} 16 | 17 | \item{\code{col.control}}{The list of column names the risk score imputation method requires see \code{\link{col.headings}} 18 | for further details. If censor.type was not used then \code{col.control$censor.type="using_has.event_col"}} 19 | 20 | \item{\code{default.formula}}{The default model formula which will be used when fitting the imputed data using a Cox model} 21 | }} 22 | 23 | -------------------------------------------------------------------------------- /man/ExtractSingle.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gammaImputeData.R, R/gammaStat.R, R/generics.R, R/scoreImputedSet.R, R/scoreStatSet.R 3 | \name{ExtractSingle} 4 | \alias{ExtractSingle} 5 | \alias{ExtractSingle.GammaImputedSet} 6 | \alias{ExtractSingle.GammaStatList} 7 | \alias{ExtractSingle.ScoreImputedSet} 8 | \alias{ExtractSingle.ScoreStatList} 9 | \title{Extract a single risk score/gamma imputed data set/model fit} 10 | \usage{ 11 | \method{ExtractSingle}{GammaImputedSet}(x, index) 12 | 13 | \method{ExtractSingle}{GammaStatList}(x, index) 14 | 15 | ExtractSingle(x, index) 16 | 17 | \method{ExtractSingle}{ScoreImputedSet}(x, index) 18 | 19 | \method{ExtractSingle}{ScoreStatList}(x, index) 20 | } 21 | \arguments{ 22 | \item{x}{The multiple imputed object} 23 | 24 | \item{index}{Integer, which imputed data set/model fit should be returned} 25 | } 26 | \value{ 27 | The individual data set/model fit 28 | } 29 | \description{ 30 | Extract a single risk score/gamma imputed data set/model fit 31 | } 32 | 33 | -------------------------------------------------------------------------------- /man/cox.zph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/generics.R 3 | \name{cox.zph} 4 | \alias{cox.zph} 5 | \title{Test Cox proportional hazards assumption} 6 | \usage{ 7 | cox.zph(fit, transform = "km", global = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{fit}{ 11 | the result of fitting a Cox regression model, using the \code{coxph} function. 12 | } 13 | 14 | \item{transform}{ 15 | a character string specifying how the survival times should be transformed 16 | before the test is performed. 17 | Possible values are \code{"km"}, \code{"rank"}, \code{"identity"} or a 18 | function of one argument. 19 | } 20 | 21 | \item{global}{ 22 | should a global chi-square test be done, in addition to the 23 | per-variable tests. 24 | } 25 | 26 | \item{...}{Additional arguments to cox.zph, for example \code{index} if 27 | fit is a \code{GammaStatList} object} 28 | } 29 | \description{ 30 | See cox.zph function in the survival package 31 | } 32 | \seealso{ 33 | \code{\link[survival]{cox.zph}} 34 | } 35 | 36 | -------------------------------------------------------------------------------- /tests/testthat/generate_test_data_gamma.R: -------------------------------------------------------------------------------- 1 | #code for generating gamma imputation test set, this is not exported 2 | #for users but is used in the testthat tests 3 | 4 | set.seed(6110) 5 | 6 | N <- 1000 7 | 8 | Z <- sample(x=0:2,size = N,replace = TRUE,prob = c(0.5,0.3,0.2)) 9 | 10 | Ci <- rexp(n = N,rate = 0.3) 11 | Ti <- vapply(Z,function(x){ 12 | rate <- 0.03 13 | if(x == 1) rate <- 0.05 14 | if(x == 2) rate <- 0.09 15 | rexp(1,rate=rate) 16 | } ,FUN.VALUE = numeric(1)) 17 | 18 | Yi <- pmin(Ti,Ci,3) 19 | delta <- (Ti < Ci & Ti <3) 20 | 21 | W1 <- rbinom(n = N,size=1,p=0.5) 22 | W2 <- rbinom(n = N,size=1,p=0.5) 23 | 24 | gamma.dataset <- data.frame(Id=1:N, 25 | Yi=Yi, 26 | delta=delta, 27 | Z=factor(Z), 28 | W1=W1, 29 | W2=W2, 30 | to.impute=(delta==0 & Yi <3), 31 | DCO.time=rep(3,N), 32 | gamma=rep(1,N)) 33 | 34 | #save(gamma.dataset,file="gamma_test.rda") 35 | #must move file into testthat directory 36 | -------------------------------------------------------------------------------- /man/NN.options.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/options.R 3 | \name{NN.options} 4 | \alias{NN.options} 5 | \title{Create a list of options which control the nearest neighbour algorithm 6 | for risk score imputation} 7 | \usage{ 8 | NN.options(NN = 5, w.censoring = 0.2, min.subjects = 20) 9 | } 10 | \arguments{ 11 | \item{NN}{The (maximum) number of subjects to be included in the 12 | risk set} 13 | 14 | \item{w.censoring}{The weighting on the censoring risk score when 15 | calculating distances for the nearest neighbour calculation 16 | A weighting of \code{(1-w.censoring)} is used for the event risk score} 17 | 18 | \item{min.subjects}{If using time dependent score imputation include at least 19 | this number of subjects when fitting the Cox model (i.e. include some subjects who were censored/had event 20 | earlier than the cenosred observation if neccessary)} 21 | } 22 | \value{ 23 | A list of options used within the ScoreImputedData 24 | function 25 | } 26 | \description{ 27 | Create a list of options which control the nearest neighbour algorithm 28 | for risk score imputation 29 | } 30 | 31 | -------------------------------------------------------------------------------- /man/ScoreStat.object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoreStat.R 3 | \name{ScoreStat.object} 4 | \alias{ScoreStat.object} 5 | \title{ScoreStat object} 6 | \description{ 7 | An S3 object which contains the point estimate 8 | and test statistic after fitting a model to 9 | a \code{ScoreImputedData} object. 10 | } 11 | \details{ 12 | The functions \code{print.ScoreStat} and \code{as.vector.ScoreStat} 13 | have been included 14 | 15 | The object contains the following: 16 | 17 | The test statistic should be normally distributed and hence for 18 | the logrank test Z = (O_2 - E_2)/sqrt(V_2), i.e. the square root of the standard 19 | Chi squared statistic (with the appropriate sign) 20 | } 21 | \section{Slots}{ 22 | 23 | \describe{ 24 | \item{\code{model}}{The model used to create the fit} 25 | 26 | \item{\code{method}}{The method used for the fit} 27 | 28 | \item{\code{estimate}}{A point estimate of the test statistic} 29 | 30 | \item{\code{var}}{The estimate for the variance of the test statistic} 31 | 32 | \item{\code{statistic}}{The test statistic given by \code{estimate/sqrt(var)}} 33 | }} 34 | 35 | -------------------------------------------------------------------------------- /man/summary.ScoreStatSet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoreStatSet.R 3 | \name{summary.ScoreStatSet} 4 | \alias{summary.ScoreStatSet} 5 | \alias{summary.ScoreStatSet.object} 6 | \title{Summary object of \code{ScoreStatSet} object} 7 | \description{ 8 | This object contains the multiple imputed 9 | averages/p-values of a set of estimates from 10 | risk score imputed data sets. 11 | } 12 | \details{ 13 | A \code{print.summary.ScoreStatSet} function has been implemented 14 | 15 | This object contains three lists meth1 and meth2 and methRubin 16 | meth1 averages the point estimates to produce an F test statistic, 17 | meth2 averages the test statistics and prodcues a t test statistic 18 | and methRubin follows Rubin's standard rules and is used for calculating 19 | confidence intervals 20 | 21 | See the vignette for further details. 22 | 23 | meth1, meth2 and methRubin are lists with the following elements: 24 | estimate: average estimator for meth1, NOTE: for meth2 this is the average test statistic, \cr 25 | var: estimate of variance of "estimate" field \cr 26 | test.stat: test statistic \cr 27 | distribution: distribution of statistical test (i.e. F or t) \cr 28 | p.value: p-value of test \cr 29 | } 30 | 31 | -------------------------------------------------------------------------------- /man/ScoreImputedSet.object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoreImputedSet.R 3 | \name{ScoreImputedSet.object} 4 | \alias{ScoreImputedSet.object} 5 | \title{\code{ScoreImputedSet} object} 6 | \description{ 7 | An object which contains the set of score imputed data frames. 8 | Use the \code{ExtractSingle} function to extract a single 9 | \code{ScoreImputedData} object. Use the \code{ScoreStat} function to fit models 10 | to the entire set of imputed data frames 11 | } 12 | \details{ 13 | It contains the following: 14 | } 15 | \section{Slots}{ 16 | 17 | \describe{ 18 | \item{\code{data}}{A data frame containing the unimputed time to event data} 19 | 20 | \item{\code{col.control}}{The list of column names the score imputation method requires see \code{\link{col.headings}} 21 | for further details} 22 | 23 | \item{\code{m}}{The number of imputed data sets} 24 | 25 | \item{\code{impute.time}}{A matrix (1 column per imputed data set) containing the imputed times} 26 | 27 | \item{\code{impute.event}}{A matrix (1 column per imputed data set) containing the imputed event indicators} 28 | 29 | \item{\code{default.formula}}{The default model formula which will be used when fitting the imputed data using a Cox model} 30 | }} 31 | \seealso{ 32 | \code{\link{ScoreImputedData.object}} 33 | } 34 | 35 | -------------------------------------------------------------------------------- /man/GammaImputedSet.object.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gammaImputeData.R 3 | \name{GammaImputedSet.object} 4 | \alias{GammaImputedSet.object} 5 | \title{\code{GammaImputedSet} object} 6 | \description{ 7 | An object which contains the set of gamma imputed data frames. 8 | Use the \code{ExtractSingle} function to extract a single 9 | \code{GammaImputedData} objects. Use the ImputeStat function to fit models 10 | to the entire set of imputed data frames 11 | } 12 | \details{ 13 | It contains the following: 14 | } 15 | \section{Slots}{ 16 | 17 | \describe{ 18 | \item{\code{data}}{A data frame containing the unimputed time to event data (along with a column internal_gamma_val 19 | which is the value of gamma used for each subject in this data set)} 20 | 21 | \item{\code{m}}{The number of imputed data sets} 22 | 23 | \item{\code{gamma.factor}}{The value of gamma.factor used with the imputation} 24 | 25 | \item{\code{impute.time}}{A matrix (1 column per imputed data set) containing the imputed times} 26 | 27 | \item{\code{impute.event}}{A matrix (1 column per imputed data set) containing the imputed event indicators} 28 | 29 | \item{\code{default.formula}}{The default model formula which will be used when fitting the imputed data} 30 | }} 31 | \seealso{ 32 | \code{\link{GammaImputedData.object}} 33 | } 34 | 35 | -------------------------------------------------------------------------------- /man/ScoreInd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataDescription.R 3 | \docType{data} 4 | \name{ScoreInd} 5 | \alias{ScoreInd} 6 | \title{Simulated time to event data with 5 time independent covariates} 7 | \format{A data.frame containing a row per subject with eleven columns:} 8 | \description{ 9 | This dataset is inspired by the simulation described in Hsu and Taylor, 10 | Statistics in Medicine (2009) 28:462-475 with an additional DCO.time column 11 | } 12 | \section{Fields}{ 13 | 14 | \describe{ 15 | \item{\code{Id}}{subject identifier} 16 | 17 | \item{\code{arm}}{factor for treatment group control=0, active=1} 18 | 19 | \item{\code{Z1}}{binary time independent covariate} 20 | 21 | \item{\code{Z2}}{continuous time independent covariate} 22 | 23 | \item{\code{Z3}}{binary time independent covariate} 24 | 25 | \item{\code{Z4}}{continuous time independent covariate} 26 | 27 | \item{\code{Z5}}{binary time independent covariate} 28 | 29 | \item{\code{event}}{event indicator (1 yes, 0 no)} 30 | 31 | \item{\code{time}}{subject censoring/event time (in years)} 32 | 33 | \item{\code{to.impute}}{logical, should an event time be imputed for this subject? 34 | (this is ignored if subject has event time)} 35 | 36 | \item{\code{DCO.time}}{The time the subject would have been censored if they had not 37 | had an event or been censored before the data cut off date} 38 | }} 39 | 40 | -------------------------------------------------------------------------------- /man/col.headings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/options.R 3 | \name{col.headings} 4 | \alias{col.headings} 5 | \title{Specify the columns of the data frame required by score imputation method} 6 | \usage{ 7 | col.headings(arm, has.event, time, Id, DCO.time, to.impute, 8 | censor.type = NULL) 9 | } 10 | \arguments{ 11 | \item{arm}{column name which will contain the subject's treatment group} 12 | 13 | \item{has.event}{column name which will contain whether 14 | the subject has an event (1) or not(0)} 15 | 16 | \item{time}{column name of censoring/event time} 17 | 18 | \item{Id}{column name of subject Id} 19 | 20 | \item{DCO.time}{column name of the time at which the subject would have been 21 | censored had they not had an event before data cut off} 22 | 23 | \item{to.impute}{column name of the logical column as to whether events should 24 | be imputed} 25 | 26 | \item{censor.type}{column name of the column containing the reason for censoring, 27 | 0=had event, 1=non-administrative censoring 2=administrative censoring -- only subjects 28 | with 1 in this column count as having an `event' in the Cox model for censoring 29 | (optionally used -- if not used then all subjects who are censored are used)} 30 | } 31 | \value{ 32 | A list contain the given arguments 33 | } 34 | \description{ 35 | Specify the columns of the data frame required by score imputation method 36 | } 37 | 38 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(ExtractSingle,GammaImputedSet) 4 | S3method(ExtractSingle,GammaStatList) 5 | S3method(ExtractSingle,ScoreImputedSet) 6 | S3method(ExtractSingle,ScoreStatList) 7 | S3method(ExtractSingle,default) 8 | S3method(ImputeStat,GammaImputedData) 9 | S3method(ImputeStat,GammaImputedSet) 10 | S3method(ImputeStat,ScoreImputedData) 11 | S3method(ImputeStat,ScoreImputedSet) 12 | S3method(ImputeStat,default) 13 | S3method(ScoreStatSet,default) 14 | S3method(ScoreStatSet,matrix) 15 | S3method(as.vector,ScoreStat) 16 | S3method(confint,summary.ScoreStatSet) 17 | S3method(cox.zph,GammaStat) 18 | S3method(cox.zph,GammaStatList) 19 | S3method(cox.zph,default) 20 | S3method(print,GammaStat) 21 | S3method(print,ScoreStat) 22 | S3method(print,summary.ScoreStatSet) 23 | S3method(summary,GammaStatList) 24 | S3method(summary,ScoreStatList) 25 | S3method(summary,ScoreStatSet) 26 | export(ExtractSingle) 27 | export(ImputeStat) 28 | export(MakeTimeDepScore) 29 | export(NN.options) 30 | export(ScoreImpute) 31 | export(ScoreStatSet) 32 | export(col.headings) 33 | export(cox.zph) 34 | export(gammaImpute) 35 | import(parallel) 36 | import(survival) 37 | importFrom(boot,boot) 38 | importFrom(dplyr,inner_join) 39 | importFrom(stats,formula) 40 | importFrom(stats,model.frame) 41 | importFrom(stats,na.fail) 42 | importFrom(stats,pf) 43 | importFrom(stats,predict) 44 | importFrom(stats,pt) 45 | importFrom(stats,qt) 46 | importFrom(stats,quantile) 47 | importFrom(stats,runif) 48 | importFrom(stats,setNames) 49 | importFrom(stats,terms) 50 | importFrom(stats,update) 51 | importFrom(stats,var) 52 | importFrom(stats,vcov) 53 | importFrom(survival,Surv) 54 | importFrom(utils,tail) 55 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: InformativeCensoring 2 | Type: Package 3 | Title: Multiple Imputation for Informative Censoring 4 | Version: 0.3.4 5 | Authors@R: c( 6 | person("David", "Ruau", 7 | email='david.ruau@astrazeneca.com', 8 | role=c("aut")), 9 | person("Nikolas", "Burkoff", 10 | role=c("aut")), 11 | person("Jonathan", "Bartlett", 12 | email='jonathan.bartlett1@astrazeneca.com', 13 | role=c("aut","cre")), 14 | person("Dan", "Jackson", 15 | email='dan.jackson@mrc-bsu.cam.ac.uk', 16 | role=c("aut")), 17 | person("Edmund", "Jones", 18 | email='epaj2@medschl.cam.ac.uk', 19 | role=c("aut")), 20 | person("Martin", "Law", 21 | email='martin.law@mrc-bsu.cam.ac.uk', 22 | role=c("aut")), 23 | person("Paul", "Metcalfe", 24 | email='paul.metcalfe@astrazeneca.com', 25 | role=c("aut")) 26 | ) 27 | Maintainer: Jonathan Bartlett 28 | Author: David Ruau [aut], 29 | Nikolas Burkoff [aut], 30 | Jonathan Bartlett [aut, cre], 31 | Dan Jackson [aut], 32 | Edmund Jones [aut], 33 | Martin Law [aut], 34 | Paul Metcalfe [aut] 35 | Description: Multiple Imputation for Informative Censoring. 36 | This package implements two methods. Gamma Imputation 37 | from Jackson et al. (2014) and Risk Score Imputation 38 | from Hsu et al. (2009) . 39 | License: GPL (>= 2) | file LICENSE 40 | LazyLoad: yes 41 | Depends: 42 | R (>= 3.1.2), 43 | survival (>= 2.36-1) 44 | Imports: 45 | boot, 46 | dplyr (>= 0.4.3), 47 | parallel 48 | Suggests: 49 | knitr, 50 | testthat 51 | VignetteBuilder: knitr 52 | URL: https://github.com/scientific-computing-solutions/InformativeCensoring 53 | RoxygenNote: 5.0.1 54 | -------------------------------------------------------------------------------- /vignettes/bibliography.bib: -------------------------------------------------------------------------------- 1 | @article{Hsu:2009, 2 | title={Nonparametric comparison of two survival functions with dependent censoring via nonparametric multiple imputation}, 3 | author={Hsu, Chiu-Hsieh and Taylor, Jeremy MG}, 4 | journal={Statistics in Medicine}, 5 | volume={28}, 6 | number={3}, 7 | pages={462--475}, 8 | year={2009}, 9 | publisher={Wiley Online Library} 10 | } 11 | 12 | @article{Jackson:2014, 13 | title={{Relaxing the independent censoring assumption in the Cox proportional hazards model using multiple imputation}}, 14 | author={Jackson, Dan and White, Ian and Seaman, Shaun and Evans, Hannah and Baisley, Kathy and Carpenter, James}, 15 | journal={Statistics in Medicine}, 16 | volume={33}, 17 | number={27}, 18 | pages={4681--4694}, 19 | year={2014}, 20 | publisher={Wiley Online Library} 21 | } 22 | 23 | @article{Meng:1994, 24 | title={{Multiple-Imputation Inferences with Uncongenial Sources of Input}}, 25 | author={Meng, Xiao-Li}, 26 | journal={Statistical Science}, 27 | volume={9}, 28 | number={4}, 29 | pages={538--558}, 30 | year={1994} 31 | } 32 | 33 | @article{Rubin:1987, 34 | title={{Multiple Imputation for Nonresponse in Surveys (Wiley Series in Probability and Statistics)}}, 35 | author={Rubin, Donald B}, 36 | year={1987}, 37 | publisher={Wiley} 38 | } 39 | 40 | @article{Li:1991, 41 | title={Significance levels from repeated p-values with multiply-imputed data}, 42 | author={Li, Kim-Hung and Meng, Xiao-Li and Raghunathan, Trivellore E and Rubin, Donald B}, 43 | journal={Statistica Sinica}, 44 | pages={65--92}, 45 | year={1991}, 46 | publisher={JSTOR} 47 | } 48 | 49 | @article{Rubin:1991, 50 | title={Multiple imputation in health-are databases: An overview and some applications}, 51 | author={Rubin, Donald B and Schenker, Nathaniel}, 52 | journal={Statistics in medicine}, 53 | volume={10}, 54 | number={4}, 55 | pages={585--598}, 56 | year={1991}, 57 | publisher={Wiley Online Library} 58 | } -------------------------------------------------------------------------------- /R/scoreStat.R: -------------------------------------------------------------------------------- 1 | #This file contains the functions associated with a model fit 2 | #of a single risk score imputed data set 3 | 4 | ##' ScoreStat object 5 | ##' 6 | ##' An S3 object which contains the point estimate 7 | ##' and test statistic after fitting a model to 8 | ##' a \code{ScoreImputedData} object. 9 | ##' 10 | ##' The functions \code{print.ScoreStat} and \code{as.vector.ScoreStat} 11 | ##' have been included 12 | ##' 13 | ##' The object contains the following: 14 | ##' @slot model The model used to create the fit 15 | ##' @slot method The method used for the fit 16 | ##' @slot estimate A point estimate of the test statistic 17 | ##' @slot var The estimate for the variance of the test statistic 18 | ##' @slot statistic The test statistic given by \code{estimate/sqrt(var)} 19 | ##' 20 | ##' @details The test statistic should be normally distributed and hence for 21 | ##' the logrank test Z = (O_2 - E_2)/sqrt(V_2), i.e. the square root of the standard 22 | ##' Chi squared statistic (with the appropriate sign) 23 | ##' 24 | ##' @name ScoreStat.object 25 | NULL 26 | 27 | ##' @export 28 | ImputeStat.ScoreImputedData <- function(object,method=c("logrank","Wilcoxon","Cox")[1],formula=NULL,...){ 29 | if(!method %in% c("logrank","Wilcoxon","Cox")){ 30 | stop("Invalid method for risk score imputation it must be logrank, Wilcoxon or Cox") 31 | } 32 | 33 | test.stats <- .imputeStat.internal(object,method,formula,...) 34 | #only take the first covariate: the treatment arm 35 | test.stats$estimate <- test.stats$estimate[1] 36 | test.stats$var <- test.stats$var[1] 37 | test.stats$statistic <- test.stats$estimate/sqrt(test.stats$var) 38 | 39 | class(test.stats) <- "ScoreStat" 40 | test.stats 41 | } 42 | 43 | 44 | ##' @method as.vector ScoreStat 45 | ##' @export 46 | as.vector.ScoreStat <- function(x,mode="any"){ 47 | ans <- c(x$estimate,x$var,x$statistic) 48 | names(ans) <- c("estimate","var","statistic") 49 | ans 50 | } 51 | 52 | 53 | ##' @export 54 | print.ScoreStat <- function(x,...){ 55 | cat("Method used:",x$method,fill = TRUE) 56 | cat("Point Estimate:",x$estimate,fill=TRUE) 57 | cat("Variance estimate:",x$var,fill=TRUE) 58 | cat("Z stastistic:",x$statistic,fill=TRUE) 59 | cat("Use x$model to view the model fit") 60 | } -------------------------------------------------------------------------------- /inst/validation/score.R: -------------------------------------------------------------------------------- 1 | #Code used to generate time independent score imputation data set 2 | #code for time dependent dataset is below 3 | 4 | 5 | Gen.Cova <- function(N){ 6 | data.frame(Z1=rbinom(n=N,size=1,prob=0.5), 7 | Z2=runif(n=N, min = 0,max=1), 8 | Z3=rbinom(n=N,size=1,prob=0.5), 9 | Z4=runif(n=N, min = 0,max=1), 10 | Z5=rbinom(n=N,size=1,prob=0.5)) 11 | } 12 | 13 | Get.times <- function(degree,values){ 14 | ((degree+1)*(-log(runif(length(values)))/values))^(1/(degree+1)) 15 | } 16 | 17 | Gen.Event <- function(df){ 18 | TRT <- 1-df$arm 19 | val <- exp(0.75*TRT-2*df$Z1+0.5*df$Z2-2*df$Z3+2*df$Z4+2*df$Z5) 20 | Get.times(4,val) 21 | } 22 | 23 | Gen.Censor <- function(df){ 24 | TRT <- 1-df$arm 25 | val <- exp(-3*(TRT+0.1)*df$Z1+0.5*df$Z2-2*(TRT+0.1)*df$Z3+1.5*df$Z4+ 2*(TRT+0.1)*df$Z5 ) 26 | Get.times(3,val) 27 | } 28 | 29 | 30 | 31 | set.seed(1120) 32 | 33 | N.placebo <- 200 34 | N.active <- 200 35 | N <- N.placebo+N.active 36 | df <- data.frame(Id=1:N, 37 | arm=c(rep(0,N.placebo),rep(1,N.active))) 38 | 39 | df <- cbind(df,Gen.Cova(N)) 40 | 41 | event.times <- Gen.Event(df) 42 | censor.times <- Gen.Censor(df) 43 | 44 | df$event <- ifelse(event.times < censor.times,1,0) 45 | df$time <- pmin(event.times,censor.times) 46 | df$arm <- as.factor(df$arm) 47 | df$to.impute <- df$event==0 48 | df$DCO.time <- df$time*runif(N,min = 1,max=2) 49 | 50 | ScoreInd <- df 51 | 52 | ##################################################### 53 | 54 | #Generate time dependent covariate data frame 55 | 56 | getW1 <- function(num){ 57 | U <- rpois(n=1,lambda = 3) 58 | if(U>num){ 59 | return(rep(0,num)) 60 | } 61 | if(U==0){ 62 | return(rep(1,num)) 63 | } 64 | c(rep(0,U-1),rep(1,num-U+1)) 65 | 66 | } 67 | 68 | getW2 <- function(num){ 69 | start <- 1+rgamma(n = 1,shape=0.5,scale=1) 70 | start*(1:num)^1.2 71 | } 72 | 73 | 74 | 75 | visit <- 0.2 #in years 76 | 77 | ans <- lapply(1:N,function(x){ 78 | num <- 1 + floor(df[x,"time"]/visit) 79 | start <- visit*(0:(num-1)) 80 | end <- start + visit 81 | end[num] <- df[x,"time"] 82 | 83 | my.df <- data.frame(Id=rep(x,num), 84 | start=start, 85 | end=end, 86 | W1=getW1(num), 87 | W2=getW2(num)) 88 | }) 89 | 90 | ScoreTimeDep <- do.call("rbind", ans) 91 | rownames(ScoreTimeDep) <- NULL 92 | 93 | devtools::use_data(ScoreInd,overwrite = TRUE) 94 | devtools::use_data(ScoreTimeDep,overwrite = TRUE) 95 | -------------------------------------------------------------------------------- /R/scoreImputedSet.R: -------------------------------------------------------------------------------- 1 | #This file contains code and documentation 2 | #associated with the ScoreImputedSet object (a set of risk score imputed 3 | #datasets) 4 | 5 | ##' \code{ScoreImputedSet} object 6 | ##' 7 | ##' An object which contains the set of score imputed data frames. 8 | ##' Use the \code{ExtractSingle} function to extract a single 9 | ##' \code{ScoreImputedData} object. Use the \code{ScoreStat} function to fit models 10 | ##' to the entire set of imputed data frames 11 | ##' 12 | ##' It contains the following: 13 | ##' @slot data A data frame containing the unimputed time to event data 14 | ##' @slot col.control The list of column names the score imputation method requires see \code{\link{col.headings}} 15 | ##' for further details 16 | ##' @slot m The number of imputed data sets 17 | ##' @slot impute.time A matrix (1 column per imputed data set) containing the imputed times 18 | ##' @slot impute.event A matrix (1 column per imputed data set) containing the imputed event indicators 19 | ##' @slot default.formula The default model formula which will be used when fitting the imputed data using a Cox model 20 | ##' @name ScoreImputedSet.object 21 | ##' @seealso \code{\link{ScoreImputedData.object}} 22 | NULL 23 | 24 | 25 | ##' @name ExtractSingle 26 | ##' @export 27 | ExtractSingle.ScoreImputedSet <- function(x,index){ 28 | retVal <- .internalExtract(x,index,fit=FALSE) 29 | class(retVal) <- "ScoreImputedData" 30 | retVal 31 | } 32 | 33 | 34 | 35 | ##' @param parallel The type of parallel operation to be used (if any), can be used for \code{GammaImputedSet} and \code{ScoreImputedSet} 36 | ##' @param ncpus integer: number of processes to be used in parallel operation: typically one would chose this to be 37 | ##' the number of available CPUs, can be used for \code{GammaImputedSet} and \code{ScoreImputedSet}. 38 | ##' @param cl An optional parallel or snow cluster for use if \code{parallel="snow"}. If not supplied, a 39 | ##' cluster on the local machine is created for the duration of the call, can be used for \code{GammaImputedSet} and \code{ScoreImputedSet}. 40 | ##' @name ImputeStat 41 | ##' @export 42 | ImputeStat.ScoreImputedSet <- function(object,method=c("logrank","Wilcoxon","Cox")[1],formula=NULL,..., 43 | parallel = c("no", "multicore", "snow")[1], ncpus = 1L, cl = NULL){ 44 | 45 | fits <- .internalImputeStatset(object,method,formula,...,parallel=parallel,ncpus=ncpus,cl=cl) 46 | statistics <- ScoreStatSet(t(vapply(fits,function(x){as.vector(x)},FUN.VALUE=numeric(3)))) 47 | 48 | retVal <- list(fits=fits, 49 | statistics=statistics, 50 | m=object$m) 51 | 52 | class(retVal) <- "ScoreStatList" 53 | return(retVal) 54 | } 55 | 56 | -------------------------------------------------------------------------------- /R/tests.R: -------------------------------------------------------------------------------- 1 | #The test statistics for fitting models to the imputed data 2 | #they should return a list with 4 arguments which will 3 | #be set to be a ScoreStat.object or GammaStat.object at a higher level (and the 4 | #test statistic(s) will be calculated as estimate/sqrt(var) if needed) 5 | 6 | .Testlogrank <- function(object,formula,...){ 7 | .Testsurvdiff(object,formula,rho=0,method="logrank",...) 8 | } 9 | 10 | #Note this is the Peto & Peto modification of the Gehan-Wilcoxon test 11 | .Testwilcoxon <- function(object,formula,...){ 12 | .Testsurvdiff(object,formula,rho=1,method="Wilcoxon",...) 13 | } 14 | 15 | 16 | .Testsurvdiff <- function(object,formula,rho,method,...){ 17 | 18 | model <- survdiff(formula=formula,data=object$data,rho=rho,...) 19 | 20 | list(model=model, 21 | method=paste(method,"(estimator for O-E)"), 22 | estimate=if(class(model$obs)=="matrix") sum(model$obs[2,]-model$exp[2,]) else (model$obs[2]-model$exp[2]), 23 | var=model$var[2,2]) 24 | } 25 | 26 | 27 | .TestWeibull <- function(object,formula,...){ 28 | .Testsurv("weibull",survreg,object,formula,dist="weibull",...) 29 | } 30 | 31 | .TestExponential <- function(object,formula,...){ 32 | .Testsurv("exponential",survreg,object,formula,dist="exponential",...) 33 | } 34 | 35 | .Testcox <- function(object,formula,...){ 36 | .Testsurv("Cox",coxph,object,formula,...) 37 | } 38 | 39 | .Testsurv <- function(method,model.function,object,formula,...){ 40 | model <- model.function(formula,data=object$data,model=TRUE,...) 41 | list(model=model, 42 | method=method, 43 | estimate=model$coefficients, 44 | var=diag(vcov(model))[1:length(model$coefficients)]) 45 | #we need to use [1:length(...)] in line above as the variances of the scale variable(s) 46 | #are included in the vcov matrix if the Weibull method is used 47 | } 48 | 49 | 50 | #if no formula argument is given then we are using the default from the object also checking if a formula 51 | #argument is given then it is valid see .validRHSformula function for details 52 | .getFormula <- function(formula,arm,method){ 53 | 54 | #check the formula is valid 55 | .validRHSFormula(formula,arm) 56 | 57 | if(method %in% c("logrank","Wilcoxon")){ 58 | #checking all but treatment arm are strata if using logrank/Wilcoxon 59 | tms<-terms(formula,specials=c("strata")) 60 | if(length(untangle.specials(tms,special = "strata")$vars) != length(attr(terms(formula),"term.labels")) - 1){ 61 | stop("Cannot include non-stratified covariates in logrank or Wilcoxon method") 62 | } 63 | } 64 | 65 | #Add the appropriate left hand side of the formula 66 | update(formula,paste("Surv(impute.time,impute.event)~.")) 67 | } 68 | 69 | 70 | -------------------------------------------------------------------------------- /R/dataDescription.R: -------------------------------------------------------------------------------- 1 | #This file contains the roxygen comments for the package and 2 | #internal data frames 3 | 4 | 5 | ##' Perform methods of multiple imputation for 6 | ##' time to event data 7 | ##' 8 | ##' See Nonparametric comparison of two survival functions with 9 | ##' dependent censoring via nonparametric multiple imputation. Hsu and Taylor 10 | ##' Statistics in Medicine (2009) 28:462-475 for Hsu's method 11 | ##' 12 | ##' See Relaxing the independent censoring assumption in the Cox proportional 13 | ##' hazards model using multiple imputation. Jackson et al., Statistics in Medicine 14 | ##' (2014) 33:4681-4694 for Jackson's method 15 | ##' 16 | ##' @name InformativeCensoring-package 17 | ##' @aliases InformativeCensoring 18 | ##' @docType package 19 | ##' @title Perform methods of multiple imputation for 20 | ##' time to event data 21 | ##' @author \email{David.Ruau@@astrazeneca.com} 22 | NULL 23 | 24 | 25 | ##' Simulated time to event data with 5 time independent covariates 26 | ##' 27 | ##' This dataset is inspired by the simulation described in Hsu and Taylor, 28 | ##' Statistics in Medicine (2009) 28:462-475 with an additional DCO.time column 29 | ##' 30 | ##' @name ScoreInd 31 | ##' @docType data 32 | ##' @format A data.frame containing a row per subject with eleven columns: 33 | ##' @field Id subject identifier 34 | ##' @field arm factor for treatment group control=0, active=1 35 | ##' @field Z1 binary time independent covariate 36 | ##' @field Z2 continuous time independent covariate 37 | ##' @field Z3 binary time independent covariate 38 | ##' @field Z4 continuous time independent covariate 39 | ##' @field Z5 binary time independent covariate 40 | ##' @field event event indicator (1 yes, 0 no) 41 | ##' @field time subject censoring/event time (in years) 42 | ##' @field to.impute logical, should an event time be imputed for this subject? 43 | ##' (this is ignored if subject has event time) 44 | ##' @field DCO.time The time the subject would have been censored if they had not 45 | ##' had an event or been censored before the data cut off date 46 | NULL 47 | 48 | ##' Simulated time dependent variables for time to event data 49 | ##' 50 | ##' This data set contains time dependent covariates for the 51 | ##' \code{\link{ScoreInd}} time to event data. 52 | ##' 53 | ##' @name ScoreTimeDep 54 | ##' @docType data 55 | ##' @format A data.frame containing 1 row per subject-visit 56 | ##' @field Id The Subject Id 57 | ##' @field start The covariate given in each row are for a given subject from time 'start'... 58 | ##' @field end ... until time end 59 | ##' @field W1 The value of a (binary) time dependent variable 60 | ##' for the subject with the given 'Id' between times 'start' and 'end' 61 | ##' @field W2 The value of a (continuous) time dependent variable 62 | ##' for the subject with the given 'Id' between times 'start' and 'end' 63 | NULL 64 | -------------------------------------------------------------------------------- /tests/testthat/test-scoreExtract.R: -------------------------------------------------------------------------------- 1 | context("ScoreExtract") 2 | 3 | 4 | test_that("ScoreImputeSet_extract",{ 5 | set.seed(25) 6 | data(ScoreInd) 7 | 8 | ans <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 9 | col.control=col.headings(has.event="event", 10 | time="time", 11 | Id="Id", 12 | arm="arm", 13 | DCO.time="DCO.time", 14 | to.impute="to.impute"), 15 | NN.control=NN.options(NN=5,w.censoring = 0.2), 16 | m=6,bootstrap.strata=ScoreInd$arm) 17 | 18 | expect_equal("ScoreImputedSet",class(ans)) 19 | 20 | expect_equal(6,ans$m) 21 | 22 | expect_equal(col.headings(has.event="event", 23 | time="time", 24 | Id="Id", 25 | arm="arm", 26 | DCO.time="DCO.time", 27 | to.impute="to.impute", 28 | censor.type="using_has.event_col"),ans$col.control) 29 | 30 | expect_equal(ScoreInd,ans$data) 31 | 32 | expect_equal("matrix",class(ans$impute.time)) 33 | expect_equal("matrix",class(ans$impute.event)) 34 | expect_equal(400,nrow(ans$impute.time)) 35 | expect_equal(6,ncol(ans$impute.event)) 36 | 37 | 38 | expect_error(ExtractSingle(ScoreInd,index=5)) 39 | expect_error(ExtractSingle(ans,index=4.5)) 40 | expect_error(ExtractSingle(ans,index=7)) 41 | expect_error(ExtractSingle(ans,index=0)) 42 | 43 | my.data <- ExtractSingle(ans,index=5) 44 | expect_equal("ScoreImputedData",class(my.data)) 45 | expect_equal(my.data$data$impute.time,ans$impute.time[,5]) 46 | expect_equal(my.data$data$impute.event,ans$impute.event[,5]) 47 | 48 | my.data <- ExtractSingle(ans,index=3) 49 | expect_equal(my.data$data$impute.time,ans$impute.time[,3]) 50 | expect_equal(my.data$data$impute.event,ans$impute.event[,3]) 51 | }) 52 | 53 | 54 | test_that("ExtractSingle.Stat",{ 55 | set.seed(25) 56 | data(ScoreInd) 57 | 58 | ans <- ScoreImpute(data=ScoreInd, 59 | event.model=~Z1+Z2+Z3+Z4+Z5, 60 | col.control=col.headings(has.event="event", 61 | time="time", 62 | Id="Id", 63 | arm="arm", 64 | DCO.time="DCO.time", 65 | to.impute="to.impute"), 66 | NN.control=NN.options(NN=5,w.censoring = 0.2), 67 | m=5,bootstrap.strata=ScoreInd$arm) 68 | fits <- ImputeStat(ans,method="Cox") 69 | 70 | onefit <- ExtractSingle(fits,index=3) 71 | 72 | onedata <- ExtractSingle(ans,index=3) 73 | expect_equal(onefit,ImputeStat(onedata,method="Cox")) 74 | 75 | 76 | }) -------------------------------------------------------------------------------- /R/parallel.R: -------------------------------------------------------------------------------- 1 | #This function contains a wrapper for parallelizing code 2 | #inspired by the boot package (see boot package and parallel 3 | #package vignette for further details) 4 | #Also the validation routines of the parallel arguments 5 | 6 | #Run functions in parallel 7 | #@inheritparam gammaImpute 8 | #@param lapply.list the list or vector to be iterated over when using parallel apply 9 | #@param FUN the function to be called inside parallel apply 10 | #@param ... Additional arguments to be passed to FUN 11 | #@return list of answers 12 | #See boot::boot and parallel package vignette for further details 13 | parallelRun <- function(parallel,ncpus,cl,lapply.list,FUN,...){ 14 | 15 | #code from boot package 16 | have_mc <- have_snow <- FALSE 17 | #detecting local machine parameters 18 | if (parallel == "multicore") 19 | have_mc <- .Platform$OS.type != "windows" 20 | else if (parallel == "snow") 21 | have_snow <- TRUE 22 | 23 | if(!have_mc && !have_snow) 24 | stop("Invalid parallel option") 25 | 26 | loadNamespace("parallel") 27 | 28 | if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) 29 | runif(1) 30 | 31 | if(RNGkind()[1] != "L'Ecuyer-CMRG"){ 32 | warning("The L'Ecuyer-CMRG random number generator has not been used so", 33 | " reproducibility cannot be guaranteed. Use the command: RNGkind(\"L'Ecuyer-CMRG\")", 34 | " for reproducibility and check the parallel package documentation for further details.") 35 | } 36 | 37 | if (have_mc) { 38 | mclapply(lapply.list, FUN, ..., 39 | mc.preschedule = TRUE, mc.set.seed = TRUE, mc.cores = ncpus) 40 | } 41 | else if (have_snow) { 42 | list(...) #force evaluation of args 43 | if (is.null(cl)) { 44 | cl <- makePSOCKcluster(rep("localhost",ncpus)) 45 | if (RNGkind()[1L] == "L'Ecuyer-CMRG") 46 | clusterSetRNGStream(cl) 47 | #This cluster call is needed as snow doesn't seem 48 | #to pass libraries which a package depends on onto 49 | #the cluster 50 | #clusterCall(cl, function() library("survival")) 51 | clusterCall(cl, "library", "survival", character.only=TRUE) 52 | res <- parLapply(cl, lapply.list, FUN, ...) 53 | on.exit(stopCluster(cl)) 54 | res 55 | } 56 | else{ 57 | parLapply(cl, lapply.list, FUN) 58 | } 59 | } 60 | 61 | } 62 | 63 | 64 | #validate function arguments for parallelization 65 | validate.parallel.arguments <- function(parallel, ncpus, cl){ 66 | if(!is.null(cl) && !"cluster" %in% class(cl)){ 67 | stop("cl is not a cluster") 68 | } 69 | 70 | if(!parallel %in% c("no", "multicore", "snow")){ 71 | stop("Invalid argument for parallel") 72 | } 73 | 74 | if(!.internal.is.finite.number(ncpus) ||!.internal.is.wholenumber(ncpus) || ncpus < 1){ 75 | stop("ncpus must be a positive integer") 76 | } 77 | 78 | if(parallel=="no" && ncpus != 1) stop("Cannot have ncpus > 1 if parallel == no") 79 | } 80 | -------------------------------------------------------------------------------- /tests/testthat/test-options.R: -------------------------------------------------------------------------------- 1 | context("options") 2 | 3 | test_that(".internal.is.finite.number",{ 4 | expect_true(.internal.is.finite.number(10)) 5 | expect_true(.internal.is.finite.number(0)) 6 | expect_true(.internal.is.finite.number(-8)) 7 | expect_true(.internal.is.finite.number(4.6764)) 8 | expect_false(.internal.is.finite.number("w")) 9 | expect_false(.internal.is.finite.number(c(4,5,6))) 10 | expect_false(.internal.is.finite.number(TRUE)) 11 | expect_false(.internal.is.finite.number(Inf)) 12 | expect_false(.internal.is.finite.number(NA)) 13 | expect_false(.internal.is.finite.number(as.numeric(NA))) 14 | }) 15 | 16 | test_that("NN.options_invalid",{ 17 | expect_error(NN.options(NN=0)) 18 | expect_error(NN.options(NN=-1)) 19 | expect_error(NN.options(NN=3.5)) 20 | expect_error(NN.options(NN=c(5,6))) 21 | expect_error(NN.options(NN="fr")) 22 | expect_error(NN.options(NN=5,w.censoring = -1.2)) 23 | expect_error(NN.options(NN=5,w.censoring = 1.4)) 24 | expect_error(NN.options(NN=5,w.censoring = c(0.5,0.3))) 25 | expect_error(NN.options(NN=5,w.censoring = FALSE)) 26 | expect_error(NN.options(NN=5,w.censoring = 0.2,min.subjects=c(2,10))) 27 | expect_error(NN.options(NN=5,w.censoring = 0.2,min.subjects="hello")) 28 | expect_error(NN.options(NN=5,w.censoring = 0.2,min.subjects=0)) 29 | expect_error(NN.options(NN=5,w.censoring = 0.2,min.subjects=4.5)) 30 | }) 31 | 32 | test_that("NN.options",{ 33 | #default 34 | n <- NN.options() 35 | expect_equal(5,n$NN) 36 | expect_equal(0.2,n$w.censoring) 37 | expect_equal(20,n$min.subjects) 38 | expect_equal(c("NN","w.censoring","min.subjects"),names(n)) 39 | 40 | #non default 41 | n <- NN.options(NN=6,w.censoring = 0.5,min.subjects=15) 42 | expect_equal(6,n$NN) 43 | expect_equal(0.5,n$w.censoring) 44 | expect_equal(15,n$min.subjects) 45 | }) 46 | 47 | test_that("dummy.col.headings",{ 48 | expect_that(.dummy.col.headings(),not(throws_error())) 49 | }) 50 | 51 | 52 | test_that("col.headings",{ 53 | #note most tests require a data set with col.headings list 54 | #and so are tested elsewhere 55 | expect_error(col.headings(arm="a",has.event="",time="t",Id="I",DCO.time="D",to.impute ="ti")) 56 | expect_error(col.headings(arm="a",has.event="h",time="t",Id=c("I","J"),DCO.time="D",to.impute ="ti")) 57 | col.control <- col.headings(arm="a",has.event="h",time="t",Id="I",DCO.time="D",to.impute ="ti") 58 | expect_equal(c("arm","has.event","time","Id","DCO.time","to.impute","censor.type"),names(col.control)) 59 | expect_equal("a",col.control$arm) 60 | expect_equal("h",col.control$has.event) 61 | expect_equal("t",col.control$time) 62 | expect_equal("I",col.control$Id) 63 | expect_equal("D",col.control$DCO.time) 64 | expect_equal("ti",col.control$to.impute) 65 | 66 | #No arm 67 | expect_error(col.headings(has.event="event", 68 | time="time", Id="Id", 69 | DCO.time="DCO.time", 70 | to.impute="to.impute")) 71 | 72 | }) 73 | 74 | 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # InformativeCensoring 2 | 3 | [![Build Status](https://travis-ci.org/scientific-computing-solutions/InformativeCensoring.svg?branch=master)](https://travis-ci.org/scientific-computing-solutions/InformativeCensoring) 4 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/InformativeCensoring)](https://cran.r-project.org/package=InformativeCensoring) 5 | [![Coverage Status](https://coveralls.io/repos/scientific-computing-solutions/InformativeCensoring/badge.svg?branch=master&service=github)](https://coveralls.io/github/scientific-computing-solutions/InformativeCensoring?branch=master) 6 | [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/scientific-computing-solutions/InformativeCensoring?branch=master&svg=true)](https://ci.appveyor.com/project/scientific-computing-solutions/InformativeCensoring) 7 | [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/InformativeCensoring)](https://cran.r-project.org/package=InformativeCensoring) 8 | [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/grand-total/InformativeCensoring)](https://cran.r-project.org/package=InformativeCensoring) 9 | 10 | Multiple Imputation for Informative Censoring 11 | 12 | This R package implement two methods for multiple imputation of survival data. 13 | * Gamma imputation from Jackson et al. [1] 14 | * Risk score imputation from Hsu et al. [2] 15 | 16 | ## Contributor (alphabetical order) 17 | Bartlett, Jonathan (maintainer); Burkoff, Nikolas; Jackson, Dan; Jones, Edmund; 18 | Law, Martin; Metcalfe, Paul; Ruau, David; 19 | 20 | ## Installation 21 | 22 | To install the development version from GitHub: 23 | ```R 24 | install.packages("devtools") 25 | # We spent a lot of time developing the vignettes. We recommend the read but 26 | # building them from source takes some time 27 | devtools::install_github("scientific-computing-solutions/InformativeCensoring", 28 | build_vignettes = TRUE) 29 | ``` 30 | 31 | ## Gamma imputation (Jackson 2014) 32 | The Gamma imputation method implementation was developed in collaboration 33 | between AstraZeneca, the MRC Biostatistics Unit and the University of Cambridge. 34 | 35 | This implementation was validated to the best of our effort following good coding 36 | practice and thorough user testing. 37 | 38 | ## Risk Score Imputation (Hsu 2009) 39 | We implemented the method described in Chiu-Hsieh Hsu and Jeremy Taylor (2009) 40 | following the publication. 41 | 42 | This implementation was validated to the best of our effort following good coding 43 | practice and thorough user testing. 44 | 45 | [1] Dan Jackson, Ian White, Shaun Seaman, Hannah Evans, Kathy Baisley, and James Carpenter. Relaxing the independent censoring assumption in the Cox proportional hazards model using multiple imputation. Statistics in Medicine, 33(27):4681–4694, 2014. 46 | 47 | [2] Chiu-Hsieh Hsu and Jeremy MG Taylor. Nonparametric comparison of two survival functions with dependent censoring via nonparametric multiple imputation. Statistics in Medicine, 28(3):462–475, 2009. 48 | -------------------------------------------------------------------------------- /man/ImputeStat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gammaStat.R, R/generics.R, R/scoreImputedSet.R 3 | \name{ImputeStat} 4 | \alias{ImputeStat} 5 | \alias{ImputeStat.GammaImputedData} 6 | \alias{ImputeStat.GammaImputedSet} 7 | \alias{ImputeStat.ScoreImputedSet} 8 | \title{S3 generic to fit model(s) to risk score/gamma Imputed objects} 9 | \usage{ 10 | \method{ImputeStat}{GammaImputedData}(object, method = c("Cox", "weibull", 11 | "exponential")[1], formula = NULL, ...) 12 | 13 | \method{ImputeStat}{GammaImputedSet}(object, method = c("Cox", "weibull", 14 | "exponential")[1], formula = NULL, ..., parallel = c("no", "multicore", 15 | "snow")[1], ncpus = 1L, cl = NULL) 16 | 17 | ImputeStat(object, method = c("logrank", "Wilcoxon", "Cox", "weibull", 18 | "exponential")[1], formula, ...) 19 | 20 | \method{ImputeStat}{ScoreImputedSet}(object, method = c("logrank", "Wilcoxon", 21 | "Cox")[1], formula = NULL, ..., parallel = c("no", "multicore", 22 | "snow")[1], ncpus = 1L, cl = NULL) 23 | } 24 | \arguments{ 25 | \item{object}{A \code{ScoreImputedData}, \code{ScoreImputedSet}, \code{GammaImputedData} or \code{GammaImputedSet} object 26 | to fit the model to} 27 | 28 | \item{method}{The type of statistical model to fit. There are three methods which can be performed when using 29 | Risk Score imputation \cr 30 | "logrank": a logrank test using \code{survival::survdiff} \cr 31 | "Wilcoxon": Peto & Peto modification of the Gehan-Wilcoxon test using \code{survival::survdiff} 32 | with \code{rho=1} \cr 33 | "Cox": Fit a cox model using \code{survival::coxph} \cr 34 | 35 | For gamma imputation the model can be "Cox" (using \code{survival::coxph}), 36 | "weibull" or "exponential" both using \code{survival::coxph}} 37 | 38 | \item{formula}{The model formula to fit. 39 | If no formula argument is used, then object$default.formula will be used. 40 | For risk score imputation this is \code{~ treatment.group} and for gamma imputation 41 | this is the formula used when fitting the Cox model 42 | 43 | For \code{method="Cox"}, additional covariates can be included by explictily giving a 44 | formula argument. For logrank/Wilcoxon only additional strata terms can be 45 | included. 46 | 47 | In all cases only the right hand side of the formula is required 48 | The survival object on the left hand side is created automatically 49 | E.g. for a Cox model could use formula=~arm + covar1. The cluster and tt options cannot be used 50 | See the vignettes for further details} 51 | 52 | \item{...}{Additional arguments which are passed into the model fit function} 53 | 54 | \item{parallel}{The type of parallel operation to be used (if any), can be used for \code{GammaImputedSet} and \code{ScoreImputedSet}} 55 | 56 | \item{ncpus}{integer: number of processes to be used in parallel operation: typically one would chose this to be 57 | the number of available CPUs, can be used for \code{GammaImputedSet} and \code{ScoreImputedSet}.} 58 | 59 | \item{cl}{An optional parallel or snow cluster for use if \code{parallel="snow"}. If not supplied, a 60 | cluster on the local machine is created for the duration of the call, can be used for \code{GammaImputedSet} and \code{ScoreImputedSet}.} 61 | } 62 | \description{ 63 | S3 generic to fit model(s) to risk score/gamma Imputed objects 64 | } 65 | \seealso{ 66 | \code{\link{ScoreStat.object}} \code{\link{ScoreImputedData.object}} 67 | } 68 | 69 | -------------------------------------------------------------------------------- /R/validationGamma.R: -------------------------------------------------------------------------------- 1 | #This fle contains the functions used to validate the gammaimpute arguments 2 | #(see validation.R for more some subfunctions (e.g. is.valid.call)) and 3 | #parallel.R for validation of arguments associated with parallelism 4 | 5 | 6 | validate.Gamma.arguments <- function(data,surv.times,m,gamma,strata,gamma.factor,DCO.time,Call){ 7 | 8 | is.valid.call(Call) 9 | 10 | if(nrow(data)==0){ 11 | stop("Empty data frame!") 12 | } 13 | 14 | if(any(c("impute.time","impute.event","internal_gamma_val","internalDCO.time") %in% colnames(data))){ 15 | stop("Cannot use a data frame with columns impute.time, impute.event", 16 | " internalDCO.time or internal_gamma_val") 17 | } 18 | 19 | if(!.internal.is.finite.number(m) ||!.internal.is.wholenumber(m) || m < 2){ 20 | stop("m must be an integer and at least 2") 21 | } 22 | 23 | if(!is.numeric(gamma.factor) || is.na(gamma.factor) || length(gamma.factor)>1){ 24 | stop("gamma.factor must be a single number that is multiplied by the values", 25 | " of the gamma argument in order to create subject specific jumps in the hazard rate.", 26 | " see help(gammaImpute) for examples") 27 | } 28 | 29 | #other errors on bootstrap.strata argument will be caught by boot 30 | if(length(strata) != nrow(data)){ 31 | stop("Invalid strata argument it must be a vector the same length as the number of rows in the dataset") 32 | } 33 | 34 | validateDCO.time(DCO.time=DCO.time,data=data,times=surv.times[,1]) 35 | 36 | if(!is.null(gamma)){ 37 | validateGammaVal(gamma,data) 38 | } 39 | 40 | #check time is positive 41 | if(any(surv.times[,1]<= 0)){ 42 | stop("Time on study must be positive") 43 | } 44 | 45 | } 46 | 47 | 48 | validateDCO.time <- function(DCO.time,data,times){ 49 | 50 | #attempt to get DCO.time to be a vector of DCO.times 51 | #even if single number or column name is input 52 | if(is.character(DCO.time)){ 53 | if(length(DCO.time) != 1 || !DCO.time %in% colnames(data)){ 54 | stop("Invalid DCO.time argument") 55 | } 56 | DCO.time <- data[,DCO.time] 57 | } 58 | else if(length(DCO.time)==1){ 59 | DCO.time <- rep(DCO.time,nrow(data)) 60 | } 61 | else if(length(DCO.time)!=nrow(data)){ 62 | stop("Invalid DCO.time length") 63 | } 64 | 65 | #Now validate the DCO.time 66 | if(any(!is.numeric(DCO.time) | is.infinite(DCO.time))){ 67 | stop("DCO times must be numeric and finite") 68 | } 69 | 70 | #DCO.time is <= time 71 | if(any(DCO.time < times)){ 72 | stop("DCO.time must be >= time for all subjects ") 73 | } 74 | } 75 | 76 | validateGammaVal <- function(gamma,data){ 77 | #first attempt to get gamma a vector of gamma values 78 | #whatever format gamma was input as 79 | if(is.character(gamma)){ 80 | if(length(gamma)!= 1 || !gamma %in% colnames(data)){ 81 | stop("Invalid gamma column name") 82 | } 83 | gamma <- data[,gamma] 84 | } 85 | 86 | #then check gamma is the right length and numeric 87 | if(length(gamma)!= nrow(data)){ 88 | stop("Invalid length of gamma its length must be the number of subjects in the", 89 | " the dataset. use the gamma.factor argument if you want to use a single number", 90 | " for each subject's jump in hazard. See help(gammaImpute) for futher details") 91 | } 92 | 93 | if(any(!is.na(gamma) & !is.numeric(gamma))){ 94 | stop("gamma must be numeric") 95 | } 96 | } -------------------------------------------------------------------------------- /man/ScoreImpute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scoreImputedData.R 3 | \name{ScoreImpute} 4 | \alias{ScoreImpute} 5 | \title{Perform risk score multiple imputation method} 6 | \usage{ 7 | ScoreImpute(data, event.model, censor.model = event.model, col.control, 8 | NN.control = NN.options(), time.dep = NULL, m, bootstrap.strata = rep(1, 9 | nrow(data)), ..., parallel = c("no", "multicore", "snow")[1], ncpus = 1L, 10 | cl = NULL) 11 | } 12 | \arguments{ 13 | \item{data}{The data set for which imputation is required} 14 | 15 | \item{event.model}{The right hand side of the formula to be used for fitting the Cox model for calculating the time to 16 | event score e.g. ~Z1+Z2+Z3.} 17 | 18 | \item{censor.model}{The right hand side of the formula to be used for fitting the Cox model for calculating the time to 19 | censoring score if not included then \code{event.model} will be used} 20 | 21 | \item{col.control}{A list of the columns names of \code{data} which are used by the imputation algorithm 22 | See example below and for further details of these columns and their purpose see \code{\link{col.headings}}} 23 | 24 | \item{NN.control}{Parameters which control the nearest neighbour algorithm. See \code{\link{NN.options}}} 25 | 26 | \item{time.dep}{A ScoreTD object, to be included if the time dependent score imputation method is to be used, otherwise it should be NULL} 27 | 28 | \item{m}{The number of data sets to impute} 29 | 30 | \item{bootstrap.strata}{When performing the bootstrap procedure for fitting the models, 31 | how should the data be stratified (see strata argument to \code{boot::boot}). if argument 32 | is not used then standard sampling with replacement is used to generate the bootstrap data} 33 | 34 | \item{...}{Additional arguments passed into the Cox model Note the subset and na.action arguments should not be used 35 | (na.fail will be used when fitting the Cox model)} 36 | 37 | \item{parallel}{The type of parallel operation to be used (if any).} 38 | 39 | \item{ncpus}{integer: number of processes to be used in parallel operation: typically one would chose this to be 40 | the number of available CPUs} 41 | 42 | \item{cl}{An optional parallel or snow cluster for use if \code{parallel="snow"}. If not supplied, a 43 | cluster on the local machine is created for the duration of the call.} 44 | } 45 | \value{ 46 | A \code{ScoreImputedSet} object 47 | } 48 | \description{ 49 | Perform risk score multiple imputation method 50 | } 51 | \details{ 52 | Note that coxph may fail to converge and the following output 53 | Warning in fitter(X, Y, strats, offset, init, control, weights = weights, : 54 | Ran out of iterations and did not converge 55 | 56 | It is possible to use ridge regression by including a ridge term in the model formula 57 | (e.g. \code{~Z1+ridge(Z2,theta=1)}). See \code{\link[survival]{ridge}} for further details 58 | } 59 | \examples{ 60 | 61 | data(ScoreInd) 62 | 63 | col.control <- col.headings(has.event="event", time="time", 64 | Id="Id",arm="arm", 65 | DCO.time="DCO.time", 66 | to.impute="to.impute") 67 | 68 | \dontrun{ 69 | ans <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 70 | col.control=col.control, m=5, 71 | bootstrap.strata=ScoreInd$arm, 72 | NN.control=NN.options(NN=5,w.censoring = 0.2)) 73 | } 74 | 75 | } 76 | \seealso{ 77 | \code{\link{ScoreImputedSet.object}} 78 | } 79 | 80 | -------------------------------------------------------------------------------- /inst/validation/jackson_simulation.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Gamma imputation - simulation study" 3 | author: "Jonathan Bartlett" 4 | date: "4/25/2016" 5 | output: pdf_document 6 | --- 7 | 8 | ```{r} 9 | #replication of simulation study performed by Jackson et al DOI: 10.1002/sim.6274 10 | #we perform 100 simulations per value of gamma, whereas Jackson et al used 1000 11 | 12 | #load informative censoring package 13 | library(InformativeCensoring) 14 | 15 | runSim <- function(nSim=100,n=100,gamma=0) { 16 | 17 | ICEst <- array(0, dim=c(nSim,2)) 18 | ICCI <- array(0, dim=c(nSim,4)) 19 | miEst <- array(0, dim=c(nSim,2)) 20 | miCI <- array(0, dim=c(nSim,4)) 21 | trueEst <- array(0, dim=c(nSim,2)) 22 | for (sim in 1:nSim) { 23 | u <- runif(n) 24 | z <- rep(0, n) 25 | z[(u>0.5) & (u<0.8)] <- 1 26 | z[u>0.8] <- 2 27 | 28 | #generate censoring time 29 | c <- rexp(n, rate=0.3) 30 | lambda <- 0.03+0.02*(z==1)+0.06*(z==2) 31 | t <- rexp(n, rate=lambda) 32 | 33 | y <- t 34 | y[c3] <- 3 36 | 37 | delta <- 1*(y==t) 38 | 39 | #note that thus far T and C are independent 40 | ICmod <- coxph(Surv(y,delta)~factor(z)) 41 | ICEst[sim,] <- coef(ICmod) 42 | ICCI[sim,1:2] <- log(summary(ICmod)$conf.int[1,3:4]) 43 | ICCI[sim,3:4] <- log(summary(ICmod)$conf.int[2,3:4]) 44 | 45 | #now we apply the gamma imputation approach 46 | data <- data.frame(y,delta,z=factor(z)) 47 | imputed <- gammaImpute(formula=Surv(y,delta)~z, data=data, m=10, gamma=rep(gamma,n), DCO.time = 3) 48 | fits <- ImputeStat(imputed) 49 | s <- summary(fits) 50 | miEst[sim,] <- s[,1] 51 | miCI[sim,1:2] <- s[1,6:7] 52 | miCI[sim,3:4] <- s[2,6:7] 53 | 54 | #now we simulate what would have been observed in the absence of censoring 55 | tstar <- t 56 | a <- rexp(n, rate=lambda*exp(gamma)) 57 | tstar[t>c] <- c[t>c]+a[t>c] 58 | #in their paper, Jackson et al again censor tstar at 3 59 | delta <- 1*(tstar<3) 60 | tstar[delta==0] <- 3 61 | 62 | truemod <- coxph(Surv(tstar,delta)~factor(z)) 63 | trueEst[sim,] <- coef(truemod) 64 | #in the way Jackson et al have done it, these now become the 'true' log hazard ratios 65 | } 66 | 67 | list(ICEst=ICEst, ICCI=ICCI, miEst=miEst, miCI=miCI, trueEst=trueEst) 68 | } 69 | 70 | gammaseq <- seq(-2,5,1) 71 | resultsTable <- array(0, dim=c(length(gammaseq), 9)) 72 | resultsTable[,1] <- gammaseq 73 | 74 | for (i in 1:length(gammaseq)) { 75 | gammaval <- gammaseq[i] 76 | print(paste("Gamma = ", gammaval, sep="")) 77 | results <- runSim(nSim=100,n=1000,gammaval) 78 | #calculate bias, as defined in Jackson paper 79 | truth <- colMeans(results$trueEst) 80 | 81 | ICbias <- colMeans(results$ICEst)-colMeans(results$trueEst) 82 | MIbias <- colMeans(results$miEst)-colMeans(results$trueEst) 83 | 84 | ICCI1 <- ((results$ICCI[,1]truth[1])) 85 | ICCI2 <- ((results$ICCI[,3]truth[2])) 86 | miCI1 <- ((results$miCI[,1]truth[1])) 87 | miCI2 <- ((results$miCI[,3]truth[2])) 88 | 89 | #save to results table, mirroring formatting in Jackson paper 90 | resultsTable[i,2:9] <- c(MIbias[1], ICbias[1], mean(miCI1), mean(ICCI1), MIbias[2], ICbias[2], mean(miCI2), mean(ICCI2)) 91 | } 92 | 93 | colnames(resultsTable) <- c("Gamma", "MI bias 1", "IC bias 1", "MI CI 1", "IC CI 1", "MI bias 2", "IC bias 2", "MI CI 2", "IC CI 2") 94 | 95 | format(round(resultsTable, 2), nsmall=2) 96 | ``` 97 | -------------------------------------------------------------------------------- /tests/testthat/parallel.R: -------------------------------------------------------------------------------- 1 | #These tests don't work on R cmd check but do in devtools::test so they are 2 | #not run gnerally (they are testing the parallelism) 3 | context("parallel") 4 | 5 | test_that("valid_parallel_args",{ 6 | 7 | expect_error(validate.parallel.arguments(parallel="no",ncpus=-5,cl=NULL)) 8 | expect_error(validate.parallel.arguments(parallel="no",ncpus=34.5,cl=NULL)) 9 | expect_error(validate.parallel.arguments(parallel="no",ncpus=c(2,6,10),cl=NULL)) 10 | expect_error(validate.parallel.arguments(parallel="no",ncpus=5,cl="hello")) 11 | 12 | expect_error(validate.parallel.arguments(parallel="no",ncpus=5,cl=NULL)) 13 | expect_error(validate.parallel.arguments(parallel="mue",ncpus=5,cl=NULL)) 14 | 15 | }) 16 | 17 | test_that("parallel",{ 18 | 19 | f <- function(x,y){x*y} 20 | 21 | RNGkind("L'Ecuyer-CMRG") 22 | 23 | ans.1 <- parallelRun(parallel="multicore",ncpus=2,cl=NULL,lapply.list=1:10,FUN=f,y=5) 24 | expect_equal(unlist(ans.1),seq(5,50,5)) 25 | 26 | ans.2 <- parallelRun(parallel="snow",ncpus=2,cl=NULL,lapply.list=1:10,FUN=f,y=5) 27 | expect_equal(unlist(ans.2),seq(5,50,5)) 28 | 29 | RNGkind("Mersenne-Twister") 30 | expect_warning(parallelRun(parallel="multicore",ncpus=2,cl=NULL,lapply.list=1:10,FUN=f,y=5)) 31 | 32 | }) 33 | 34 | 35 | 36 | test_that("parallel_score",{ 37 | RNGkind("L'Ecuyer-CMRG") 38 | set.seed(104) 39 | data(ScoreInd) 40 | 41 | col.control <- col.headings(has.event="event", time="time",Id="Id",arm="arm", 42 | DCO.time="DCO.time", to.impute="to.impute") 43 | 44 | d1 <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 45 | col.control=col.control, m=5, 46 | bootstrap.strata=ScoreInd$arm, 47 | NN.control=NN.options(NN=5,w.censoring = 0.2), 48 | parallel="snow",ncpus=2) 49 | 50 | expect_that(ImputeStat(d1,parallel="multicore",ncpus=2),not(throws_error())) 51 | }) 52 | 53 | 54 | test_that("parallel_gamma",{ 55 | RNGkind("L'Ecuyer-CMRG") 56 | set.seed(10) 57 | load("gamma_test.rda") 58 | d1 <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 59 | data = gamma.dataset, 60 | m=5, DCO.time="DCO.time", 61 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 62 | gamma.factor=1,gamma="gamma", 63 | parallel="multicore",ncpus=2) 64 | 65 | expect_that(ImputeStat(d1,method="weibull",parallel="snow",ncpus=2),not(throws_error())) 66 | 67 | 68 | }) 69 | 70 | test_that("parallel_seed",{ 71 | 72 | RNGkind("L'Ecuyer-CMRG") 73 | 74 | load("gamma_test.rda") 75 | #snow is reproducible 76 | set.seed(10) 77 | d1 <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 78 | data = gamma.dataset, 79 | m=9, DCO.time="DCO.time", 80 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 81 | gamma.factor=1,gamma="gamma", 82 | parallel="snow",ncpus=2) 83 | 84 | set.seed(10) 85 | d2 <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 86 | data = gamma.dataset, 87 | m=9, DCO.time="DCO.time", 88 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 89 | gamma.factor=1,gamma="gamma", 90 | parallel="snow",ncpus=2) 91 | 92 | expect_equal(d1,d2) 93 | 94 | #multicore is reproducible 95 | set.seed(10) 96 | d1 <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 97 | data = gamma.dataset, 98 | m=5, DCO.time="DCO.time", 99 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 100 | gamma.factor=1,gamma="gamma", 101 | parallel="multicore",ncpus=2) 102 | 103 | set.seed(10) 104 | d2 <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 105 | data = gamma.dataset, 106 | m=5, DCO.time="DCO.time", 107 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 108 | gamma.factor=1,gamma="gamma", 109 | parallel="multicore",ncpus=2) 110 | 111 | expect_equal(d1,d2) 112 | 113 | 114 | }) -------------------------------------------------------------------------------- /R/options.R: -------------------------------------------------------------------------------- 1 | #This file containts functions associated with col.control and NN.options arguments 2 | #for srisk score imputation 3 | 4 | ##' Specify the columns of the data frame required by score imputation method 5 | ##' @param arm column name which will contain the subject's treatment group 6 | ##' @param has.event column name which will contain whether 7 | ##' the subject has an event (1) or not(0) 8 | ##' @param time column name of censoring/event time 9 | ##' @param Id column name of subject Id 10 | ##' @param DCO.time column name of the time at which the subject would have been 11 | ##' censored had they not had an event before data cut off 12 | ##' @param to.impute column name of the logical column as to whether events should 13 | ##' be imputed 14 | ##' @param censor.type column name of the column containing the reason for censoring, 15 | ##' 0=had event, 1=non-administrative censoring 2=administrative censoring -- only subjects 16 | ##' with 1 in this column count as having an `event' in the Cox model for censoring 17 | ##' (optionally used -- if not used then all subjects who are censored are used) 18 | ##' @return A list contain the given arguments 19 | ##' @export 20 | col.headings <- function(arm,has.event,time,Id,DCO.time,to.impute,censor.type=NULL){ 21 | 22 | retVal <- list(arm=arm, 23 | has.event=has.event, 24 | time=time, 25 | Id=Id, 26 | DCO.time=DCO.time, 27 | to.impute=to.impute, 28 | censor.type=censor.type) 29 | 30 | .validcoloption(retVal) 31 | retVal 32 | } 33 | 34 | .validcoloption <- function(retVal){ 35 | 36 | if(is.null(retVal) || !is.list(retVal)){ 37 | stop("Invalid column control") 38 | } 39 | 40 | which.cols <- c("arm","has.event","time","Id","DCO.time","to.impute","censor.type") 41 | 42 | if(any(!which.cols %in% names(retVal))){ 43 | stop("Invalid column control") 44 | } 45 | 46 | lapply(which.cols,function(name){ 47 | if(name!="censor.type" && is.null(retVal[[name]])){ 48 | stop("Invalid argument to col.headings only censor.type can be NULL") 49 | } 50 | if(!is.null(retVal[[name]]) && 51 | (length(retVal[[name]])!=1|| !is.character(retVal[[name]]) || retVal[[name]]=="")){ 52 | stop("Invalid argument to col.headings") 53 | } 54 | }) 55 | } 56 | 57 | 58 | .dummy.col.headings <- function(){ 59 | col.headings(arm=" ",has.event=" ",time=" ",Id=" ",DCO.time=" ",to.impute=" ",censor.type = " ") 60 | } 61 | 62 | 63 | ##' Create a list of options which control the nearest neighbour algorithm 64 | ##' for risk score imputation 65 | ##' @param NN The (maximum) number of subjects to be included in the 66 | ##' risk set 67 | ##' @param w.censoring The weighting on the censoring risk score when 68 | ##' calculating distances for the nearest neighbour calculation 69 | ##' A weighting of \code{(1-w.censoring)} is used for the event risk score 70 | ##' @param min.subjects If using time dependent score imputation include at least 71 | ##' this number of subjects when fitting the Cox model (i.e. include some subjects who were censored/had event 72 | ##' earlier than the cenosred observation if neccessary) 73 | ##' @return A list of options used within the ScoreImputedData 74 | ##' function 75 | ##' @export 76 | NN.options <- function(NN=5,w.censoring=0.2,min.subjects=20){ 77 | retVal <- list(NN=NN, 78 | w.censoring=w.censoring, 79 | min.subjects=min.subjects) 80 | 81 | .validNNoption(retVal) 82 | retVal 83 | } 84 | 85 | .validNNoption <- function(val){ 86 | if(is.null(val) || !is.list(val)){ 87 | stop("Invalid NNoption") 88 | } 89 | 90 | if(any(! c("NN","w.censoring","min.subjects") %in% names(val))){ 91 | stop("NNoption must contain the entries NN, w.censoring and min.subjects") 92 | } 93 | 94 | if(!.internal.is.finite.number(val$w.censoring) || val$w.censoring < 0 || val$w.censoring >1){ 95 | stop("Invalid argument w.censoring must be in [0,1]") 96 | } 97 | 98 | if(!.internal.is.finite.number(val$NN) || !.internal.is.wholenumber(val$NN) || val$NN <=0){ 99 | stop("Invalid argument NN must be positive integer") 100 | } 101 | 102 | 103 | if(!.internal.is.finite.number(val$min.subjects) || !.internal.is.wholenumber(val$min.subjects) || val$min.subjects <=0){ 104 | stop("Invalid argument min.subjects must be positive integer") 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /R/timedependent.R: -------------------------------------------------------------------------------- 1 | #This file contains code associated with the time dependent risk score imputation 2 | #method, the ScoreTD object (data frame containing the time dependent covariates) 3 | # 4 | 5 | ##' A \code{ScoreTD} object 6 | ##' 7 | ##' This data frame holds time dependent covariates for 8 | ##' use with risk score imputation 9 | ##' 10 | ##' The data frame contains the following columns: 11 | ##' 'Id' for subject ID \cr 12 | ##' 'time.start' and 'time.end' the range of time for which 13 | ##' the covariate values are valid - i.e. [time.start,time.end] \cr 14 | ##' Additional columns are the time dependent covariates 15 | ##' 16 | ##' All data for a single subject should be stored in consecutive rows, sorted 17 | ##' by time and the starting time of a row should match the ending time of the previous row 18 | ##' 19 | ##' @name ScoreTD.object 20 | ##' @seealso \code{\link{MakeTimeDepScore}} 21 | NULL 22 | 23 | 24 | ##' Create a valid \code{ScoreTD} object 25 | ##' @param data A data frame of time dependent covariates 26 | ##' @param Id The column name of the subject Id 27 | ##' @param time.start The covariates are valid for the time [time.start,time.end] where 28 | ##' time.start is the column name of time.start 29 | ##' @param time.end The covariates are valid for the time [time.start,time.end] where 30 | ##' time.end is the column name of time.end 31 | ##' @return A \code{ScoreTD} object 32 | ##' @export 33 | MakeTimeDepScore <- function(data,Id,time.start,time.end){ 34 | if(any(!c(Id,time.start,time.end) %in% colnames(data))){ 35 | stop("Invalid arguments, Id, time.start and time.end should be column names in the data frame ") 36 | } 37 | 38 | sort.cols <- function(data,new.name,given.name){ 39 | if(given.name != new.name){ 40 | if(new.name %in% colnames(data)) stop("The column",new.name,"already exists!") 41 | data[,new.name] <- data[,given.name] 42 | data[,given.name] <- NULL 43 | } 44 | data 45 | } 46 | 47 | data <- sort.cols(data,"Id",Id) 48 | data <- sort.cols(data,"time.start",time.start) 49 | data <- sort.cols(data,"time.end",time.end) 50 | 51 | data$Id <- factor(data$Id) 52 | data$time.start <- as.numeric(data$time.start) 53 | data$time.end <- as.numeric(data$time.end) 54 | 55 | checkContiguous(data$Id) 56 | by(data,data$Id,checkPanelling) 57 | 58 | class(data) <- c("ScoreTD",class(data)) 59 | data 60 | } 61 | 62 | 63 | #For a give data frame data and ScoreTD object, time.dep, 64 | #output a merged data frame containing data and the time dependent covariate 65 | #values at time my.time if my.time is NULL then output the time dependent covariates 66 | #at the time of subject having an event/being censored 67 | .getTimeDepDataSet <- function(data,time.dep,dataID,datatime,my.time){ 68 | 69 | #if have no subjects (as current subject has max time, then output empty data frame) 70 | #we only fit a model if the number of rows of data frame > NN, so 71 | #this edge case behaves as expected 72 | if(nrow(data)==0){ 73 | return(data) 74 | } 75 | 76 | #when merging and matching need to be careful about factor IDs not being matched correctly 77 | time.dep$Id <- as.character(time.dep$Id) 78 | data$internal_char_IDXX <-as.character(data[,dataID]) 79 | 80 | 81 | time.dep <- time.dep[time.dep$Id %in% data$internal_char_IDXX,] 82 | time.dep$internal.Score.time <- if(is.null(my.time)) vapply(time.dep$Id,function(x){ 83 | data[which(data$internal_char_IDXX==x),datatime] 84 | },numeric(1)) else my.time 85 | 86 | retVal <- time.dep[time.dep$time.start < time.dep$internal.Score.time 87 | & time.dep$time.end >= time.dep$internal.Score.time,] 88 | 89 | s <- sort(unique(retVal$Id)) 90 | s2 <- sort(unique(data$internal_char_IDXX)) 91 | 92 | if(length(s) != length(s2) || any(s != s2)){ 93 | msg <- if(is.null(my.time)) " the value of a covariate for a subject at the time of event/censoring" 94 | else paste(" the value of a covariate for an at risk subject at time",my.time) 95 | stop("Error in creating data set with time dependent covariates. Cannot determine", msg) 96 | } 97 | retVal$internal.Score.time <- NULL 98 | retVal$time.start <- NULL 99 | retVal$time.end <- NULL 100 | 101 | 102 | data <- inner_join(data,retVal,by=c("internal_char_IDXX"="Id")) 103 | data$internal_char_IDXX <- NULL 104 | data 105 | } 106 | 107 | -------------------------------------------------------------------------------- /R/gammaStat.R: -------------------------------------------------------------------------------- 1 | #This file contains the code used to fit an analyze gamma imputed 2 | #data sets. Most of these functions call functions inside generic.R 3 | #and the Roxygen comments for the generics are found there 4 | 5 | ##' @name ImputeStat 6 | ##' @export 7 | ImputeStat.GammaImputedData <- function(object,method=c("Cox","weibull","exponential")[1],formula=NULL,...){ 8 | if(!method %in% c("Cox","weibull","exponential")){ 9 | stop("Invalid method for gamma imputation must be one of Cox, weibull or exponential") 10 | } 11 | 12 | test.stats <- .imputeStat.internal(object,method,formula,...) 13 | class(test.stats) <- "GammaStat" 14 | test.stats 15 | } 16 | 17 | 18 | ##' @name ImputeStat 19 | ##' @export 20 | ImputeStat.GammaImputedSet <- function(object,method=c("Cox","weibull","exponential")[1],formula=NULL,..., 21 | parallel = c("no", "multicore", "snow")[1], ncpus = 1L, cl = NULL){ 22 | 23 | fits <- .internalImputeStatset(object,method,formula,...,parallel=parallel,ncpus=ncpus,cl=cl) 24 | 25 | #function to extract the estimates and variances from the fit 26 | .setupStats <- function(val){ 27 | retVal <- do.call(rbind,lapply(fits,function(x){matrix(x[[val]],nrow=1)})) 28 | colnames(retVal) <- names(fits[[1]]$estimate) 29 | retVal 30 | } 31 | 32 | retVal <- list(fits=fits, 33 | statistics=list(estimates=.setupStats("estimate"), 34 | vars=.setupStats("var") ), 35 | m=object$m) 36 | class(retVal) <- "GammaStatList" 37 | return(retVal) 38 | } 39 | 40 | ##' @export 41 | summary.GammaStatList <- function(object,...){ 42 | 43 | M <- object$m 44 | estimates <- colMeans(object$statistics$estimates) 45 | mean.variances <- colMeans(object$statistics$vars) 46 | var.of.estimates <- apply(object$statistics$estimates,2,var) 47 | 48 | se <- sqrt(mean.variances + (1 + (1/M)) * var.of.estimates) 49 | t <- estimates/se 50 | df <- (M-1)*(1+(mean.variances/((1+(1/M))*var.of.estimates)))^2 51 | 52 | t.val <- qt(1-(1-0.95)/2,df=df) 53 | 54 | retVal <- data.frame(est=estimates, 55 | se=se, 56 | t=t, 57 | df=df, 58 | "Pr(>|t|)"=2*(1-pt(abs(t),df=df)), 59 | "lo 95"=estimates-se*t.val, 60 | "hi 95"=estimates+se*t.val, 61 | check.names=FALSE) 62 | as.matrix(retVal,ncol=ncol(retVal)) 63 | } 64 | 65 | 66 | ##' \code{GammaStat} object 67 | ##' 68 | ##' An S3 object which contains the point estimate 69 | ##' and test statistic after fitting a model to 70 | ##' a \code{GammaImputedData} object. 71 | ##' 72 | ##' The function \code{print.GammaStat} has been implemented 73 | ##' 74 | ##' The object contains the following: 75 | ##' @slot model The model used to create the fit 76 | ##' @slot method The model used for the fit 77 | ##' @slot estimate A point estimate of the test statistic 78 | ##' @slot var The estimate for the variance of the test statistic 79 | ##' @name GammaStat.object 80 | NULL 81 | 82 | ##' \code{GammaStatList} object 83 | ##' 84 | ##' The object containing the results of fitting models to 85 | ##' a \code{GammaImputedSet} object. 86 | ##' 87 | ##' A \code{summary.GammaStatList} has been implemented which performs 88 | ##' Rubin's multiple imputation rules. 89 | ##' 90 | ##' The object contains the following 91 | ##' @slot fits A list of \code{GammaStat} objects containing the model fits for 92 | ##' the imputed data sets 93 | ##' @slot statistics A list with two elements: estimates and vars which contain the coefficient 94 | ##' estimates and their variances one column per covariate one row per imputed data set 95 | ##' @slot m The number of model fits 96 | ##' @name GammaStatList.object 97 | NULL 98 | 99 | ##' @export 100 | print.GammaStat <- function(x,...){ 101 | print(x$model) 102 | } 103 | 104 | ##' @name ExtractSingle 105 | ##' @export 106 | ExtractSingle.GammaStatList <- function(x,index){ 107 | .internalExtract(x,index,fit=TRUE) 108 | } 109 | 110 | ##' @export 111 | cox.zph.GammaStat <- function(fit,transform="km",global=TRUE,...){ 112 | if(!inherits(fit$model, "coxph")) stop("The model fit is not Cox!") 113 | cox.zph(fit$model,transform=transform,global=global) 114 | } 115 | 116 | ##' @export 117 | cox.zph.GammaStatList <- function(fit,transform="km",global=TRUE,index,...){ 118 | cox.zph(ExtractSingle(fit,index),transform=transform,global=global,...) 119 | } 120 | 121 | -------------------------------------------------------------------------------- /man/gammaImpute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gammaImputeData.R 3 | \name{gammaImpute} 4 | \alias{gammaImpute} 5 | \title{Perform gamma-Imputation for a given data set} 6 | \usage{ 7 | gammaImpute(formula, data, m, gamma, gamma.factor, bootstrap.strata = rep(1, 8 | nrow(data)), DCO.time, ..., parallel = c("no", "multicore", "snow")[1], 9 | ncpus = 1L, cl = NULL) 10 | } 11 | \arguments{ 12 | \item{formula}{The model formula to be used when fitting the models to calculate 13 | the cumulative hazard. A formula for coxph can include strata terms but not 14 | cluster or tt and only right-censored \code{Surv} objects can be used. 15 | Note the function does not allow multiple strata to be written as \code{strata(W1)+strata(W2)}, 16 | use \code{strata(W1,W2)} instead} 17 | 18 | \item{data}{A time to event data set for which event times are to be imputed} 19 | 20 | \item{m}{The number of imputations to be created} 21 | 22 | \item{gamma}{Either column name containing the value of gamma or a vector of values giving the subject specific 23 | size of the step change in the log hazard at censoring. If a subject has NA in this column then no imputation is performed 24 | for this subject (i.e. the subject's censored time remains unchanged after imputation). If a subject has already had an 25 | event then the value of gamma is ignored. If \code{gamma.factor} is also used then the subject specific gamma 26 | are all multipled by \code{gamma.factor}. At least one of \code{gamma} and \code{gamma.factor} must be included.} 27 | 28 | \item{gamma.factor}{If used, a single numeric value. If no \code{gamma} then the step change in log 29 | hazard for all subjects at censoring is given by \code{gamma.factor}. If \code{gamma} is used 30 | then for each subject, the step change in log hazard is given by \code{gamma.factor} multiplied by the subject specific gamma. 31 | At least one of \code{gamma} and \code{gamma.factor} must be included.} 32 | 33 | \item{bootstrap.strata}{The strata argument for stratified bootstrap sampling, see argument \code{strata} 34 | for the function \code{boot::boot} for further details. If argument is not used then standard sampling with 35 | replacement will be used} 36 | 37 | \item{DCO.time}{Either column name containing the subject's data cutoff time or a vector 38 | of DCO.times for the subjects or a single number to be used as the DCO.time for all subjects 39 | (if imputed events are > this DCO.time then subjects are censored at DCO.time in imputed data sets)} 40 | 41 | \item{...}{Additional parameters to be passed into the model fit function} 42 | 43 | \item{parallel}{The type of parallel operation to be used (if any).} 44 | 45 | \item{ncpus}{integer: number of processes to be used in parallel operation: typically one would chose this to be 46 | the number of available CPUs} 47 | 48 | \item{cl}{An optional parallel or snow cluster for use if \code{parallel="snow"}. If not supplied, a 49 | cluster on the local machine is created for the duration of the call.} 50 | } 51 | \value{ 52 | A \code{GammaImputedSet.object} containing the imputed data sets 53 | } 54 | \description{ 55 | This function performs the Imputation described in 56 | Relaxing the independent censoring assumptions in the Cox proportional 57 | hazards model using multiple imputation. (2014) D. Jackson et al. Statist. Med. (33) 58 | 4681-4694 59 | } 60 | \details{ 61 | See the Gamma Imputation vignette for further details 62 | } 63 | \examples{ 64 | 65 | \dontrun{ 66 | data(nwtco) 67 | nwtco <- nwtco[1:500,] 68 | 69 | #creating 2 imputed data sets (m=2) for speed, would normally create more 70 | ans <- gammaImpute(formula=Surv(edrel,rel)~histol + instit, 71 | data = nwtco, m=2, gamma.factor=1, DCO.time=6209) 72 | 73 | #subject specific gamma (multiplied by gamma.factor to give the jump) 74 | #NA for subjects that are not to be imputed 75 | jumps <- c(rep(NA,10),rep(1,490)) 76 | DCO.values <- rep(6209,500) 77 | 78 | ans.2 <- gammaImpute(formula=Surv(edrel,rel)~histol + instit + strata(stage), 79 | data = nwtco, m=2, bootstrap.strata=strata(nwtco$stage), 80 | gamma=jumps, gamma.factor=1, DCO.time=DCO.values) 81 | 82 | #can also use column names 83 | nwtco$gamma <- jumps 84 | nwtco$DCO.time <- DCO.values 85 | ans.3 <- gammaImpute(formula=Surv(edrel,rel)~histol + instit + strata(stage), 86 | data = nwtco, m=2, bootstrap.strata=strata(nwtco$stage), 87 | gamma="gamma", DCO.time="DCO.time") 88 | } 89 | 90 | } 91 | \seealso{ 92 | \code{\link{GammaImputedSet.object}} \code{\link{GammaImputedData.object}} 93 | } 94 | 95 | -------------------------------------------------------------------------------- /tests/testthat/test-scoreStatSet.R: -------------------------------------------------------------------------------- 1 | context("ScoreStatSet") 2 | 3 | test_that("from_matrix_invalid",{ 4 | #wrong number of columns 5 | expect_error(ScoreStatSet(matrix(rep(1,20),ncol = 2))) 6 | expect_error(ScoreStatSet(matrix(rep(1,20),ncol = 4))) 7 | 8 | #fewer than 5 rows 9 | expect_error(ScoreStatSet(matrix(rep(1,12),ncol = 3))) 10 | 11 | #negative variances 12 | expect_error(ScoreStatSet(matrix(c(rep(1,9),-1,rep(1,5)),ncol = 3))) 13 | 14 | #Z must equal estimate/sqrt(var) 15 | m <- matrix(rep(1,15),ncol=3) 16 | m[1,3] <- 2 17 | expect_error(ScoreStatSet(m)) 18 | }) 19 | 20 | .get.test.matrix <- function(){ 21 | matrix(c(0.5,-0.6,0.7,0.8,0.9, 22 | 0.1^2,0.11^2,0.12^2,0.13^2,0.14^2, 23 | 0.5/0.1,-0.6/0.11,0.7/0.12,0.8/0.13,0.9/0.14),ncol = 3)} 24 | 25 | 26 | test_that("from_matrix",{ 27 | m <- .get.test.matrix() 28 | sss <- ScoreStatSet(m) 29 | 30 | expect_equal("ScoreStatSet",class(sss)) 31 | expect_equal(3,ncol(sss)) 32 | expect_equal(c(0.5,-0.6,0.7,0.8,0.9),sss[,"estimate"]) 33 | expect_equal(m[,2],sss[,"var"]) 34 | expect_equal(m[,3],sss[,"Z"]) 35 | }) 36 | 37 | 38 | #check the summary.ScoreStatSet creation works 39 | test_that("summary_creation",{ 40 | m <- .get.test.matrix() 41 | s <- summary(ScoreStatSet(m)) 42 | expect_equal("summary.ScoreStatSet",class(s)) 43 | expect_equal(3,length(s)) 44 | 45 | expect_equal(c("meth1","meth2","methRubin"),names(s)) 46 | 47 | meth1 <- s$meth1 48 | meth2 <- s$meth2 49 | methRubin <- s$methRubin 50 | 51 | expect_equal(c("estimate","var","test.stat","df","distribution","p.value"),names(meth1)) 52 | expect_equal(names(meth1),names(meth2)) 53 | expect_equal(names(meth1),names(methRubin)) 54 | 55 | expect_equal("F",meth1$distribution) 56 | expect_equal("t",meth2$distribution) 57 | expect_equal("t",methRubin$distribution) 58 | }) 59 | 60 | test_that("summary_all_rows_same",{ 61 | m <- matrix(rep(c(1,0.25,2),5),ncol=3,byrow = TRUE) 62 | #get a warning here as v1 is undefined (yes, this is caught...) 63 | expect_warning(s5 <- summary(ScoreStatSet(m))) 64 | 65 | m <- matrix(rep(c(1,0.25,2),6),ncol=3,byrow = TRUE) 66 | s6 <- summary(ScoreStatSet(m)) 67 | m <- matrix(rep(c(1,0.25,2),10),ncol=3,byrow = TRUE) 68 | s10 <- summary(ScoreStatSet(m)) 69 | 70 | expect_equal(s6,s10) #these should be the same 71 | expect_equal(s5$meth2,s6$meth2) #s5[[1]] has NAN df -> this is correct 0/0 = NAN 72 | 73 | expect_equal(1,s6$meth1$estimate) 74 | expect_equal(0.25,s6$meth1$var) 75 | expect_equal(4,s6$meth1$test.stat) 76 | expect_equal(c(1,Inf),s6$meth1$df) 77 | expect_equal(2*(1-pnorm(abs(2))),s6$meth1$p.value) 78 | 79 | 80 | expect_equal(2,s6$meth2$estimate) 81 | expect_equal(1,s6$meth2$var) 82 | expect_equal(2,s6$meth2$test.stat) 83 | expect_true(is.infinite(s6$meth2$df)) 84 | expect_equal(2*(1-pnorm(abs(2))),s6$meth2$p.value) 85 | 86 | expect_equal(1,s6$methRubin$estimate) 87 | expect_equal(0.25,s6$methRubin$var) 88 | expect_equal(2,s6$methRubin$test.stat) 89 | expect_true(is.infinite(s6$methRubin$df)) 90 | expect_equal(2*(1-pnorm(abs(2))),s6$methRubin$p.value) 91 | 92 | 93 | }) 94 | 95 | test_that("summary_system_test",{ 96 | m <- .get.test.matrix() 97 | s <- summary(ScoreStatSet(m)) 98 | 99 | expect_equal(0.46,s$meth1$estimate) 100 | #0.0146 + 6*var(c(0.5,-0.6,0.7,0.8,0.9))/5 101 | expect_equal(0.4622,s$meth1$var) 102 | 103 | #0.46*0.46/0.4622 104 | expect_equal(0.457810471657,s$meth1$test.stat) 105 | #as (t-4) = 0 106 | expect_equal(c(1,4),s$meth1$df) 107 | expect_equal(1-pf(0.457810471657,1,4),s$meth1$p.value) 108 | 109 | me <- mean(c(0.5/0.1,-0.6/0.11,0.7/0.12,0.8/0.13,0.9/0.14)) 110 | expect_equal(me,s$meth2$estimate) 111 | 112 | va <- var(c(0.5/0.1,-0.6/0.11,0.7/0.12,0.8/0.13,0.9/0.14)) 113 | expect_equal(1+(6*va/5),s$meth2$var) 114 | 115 | expect_equal( me/sqrt(1+(6*va/5)),s$meth2$test.stat) 116 | 117 | expect_equal( 4*(1+5/(va*6))^2,s$meth2$df) 118 | expect_equal( 2*(1-pt(me/sqrt(1+(6*va/5)),4*(1+5/(va*6))^2)),s$meth2$p.value ) 119 | }) 120 | 121 | test_that("methRubin",{ 122 | m <- .get.test.matrix() 123 | s <- summary(ScoreStatSet(m)) 124 | 125 | expect_equal(s$methRubin$estimate,s$meth1$estimate) 126 | expect_equal(s$methRubin$var,s$meth1$var) 127 | expect_equal(s$methRubin$test.stat^2,s$meth1$test.stat) 128 | 129 | #4*(1+0.0146/(6*0.373/5))^2 130 | expect_true(abs(4.265203-s$methRubin$df)<1e-6) 131 | }) 132 | 133 | test_that("meth1.df",{ 134 | 135 | m <- matrix(c(5:12,seq(0.1,0.8,0.1),rep(1,8)),ncol=3) 136 | m[,3] <- m[,1]/sqrt(m[,2]) 137 | s <- summary(ScoreStatSet(m)) 138 | 139 | r <- (9*6)/(0.45*8) 140 | 141 | expect_equal(4+3*(1+5/(7*r))^2, s$meth1$df[2]) 142 | 143 | }) 144 | 145 | test_that("confint",{ 146 | m <- matrix(c(5:12,seq(0.1,0.8,0.1),rep(1,8)),ncol=3) 147 | m[,3] <- m[,1]/sqrt(m[,2]) 148 | s <- summary(ScoreStatSet(m)) 149 | 150 | expect_error(confint(s,level=Inf)) 151 | expect_error(confint(s,level=0)) 152 | expect_error(confint(s,level=1)) 153 | expect_error(confint(s,level="Hello")) 154 | 155 | ans <- confint(s,level=0.001) 156 | expect_true(s$methRubin$estimate > ans[1]) 157 | expect_true(s$methRubin$estimate < ans[2]) 158 | 159 | ans.95 <- confint(s) 160 | expect_equal(c("2.5%","97.5%"),names(ans.95)) 161 | #symmetric 162 | expect_true(all.equal(s$methRubin$estimate,mean(ans.95))) 163 | 164 | }) -------------------------------------------------------------------------------- /R/scoreImputedData.R: -------------------------------------------------------------------------------- 1 | #This file contains the main function to perform the risk 2 | #score imputation and the roxygen comments for the objects 3 | #associated with generating the risk score imputation 4 | #See internalScore.R for subfunctions and validation.R for 5 | #many validation routines 6 | 7 | ##' \code{ScoreImputedData} object 8 | ##' 9 | ##' An object which contains 10 | ##' 11 | ##' @slot data A data frame containing the time to event data 12 | ##' with 2 new columns impute.time and impute.event, the imputed event/censoring times and event indicators 13 | ##' (for subjects whose data is not imputed these columns contain the unchanged event/censoring time and 14 | ##' event indicator ) 15 | ##' @slot col.control The list of column names the risk score imputation method requires see \code{\link{col.headings}} 16 | ##' for further details. If censor.type was not used then \code{col.control$censor.type="using_has.event_col"} 17 | ##' @slot default.formula The default model formula which will be used when fitting the imputed data using a Cox model 18 | ##' @name ScoreImputedData.object 19 | NULL 20 | 21 | 22 | ##' Perform risk score multiple imputation method 23 | ##' @param data The data set for which imputation is required 24 | ##' @param event.model The right hand side of the formula to be used for fitting the Cox model for calculating the time to 25 | ##' event score e.g. ~Z1+Z2+Z3. 26 | ##' @param censor.model The right hand side of the formula to be used for fitting the Cox model for calculating the time to 27 | ##' censoring score if not included then \code{event.model} will be used 28 | ##' @param col.control A list of the columns names of \code{data} which are used by the imputation algorithm 29 | ##' See example below and for further details of these columns and their purpose see \code{\link{col.headings}} 30 | ##' @param NN.control Parameters which control the nearest neighbour algorithm. See \code{\link{NN.options}} 31 | ##' @param time.dep A ScoreTD object, to be included if the time dependent score imputation method is to be used, otherwise it should be NULL 32 | ##' @param m The number of data sets to impute 33 | ##' @param bootstrap.strata When performing the bootstrap procedure for fitting the models, 34 | ##' how should the data be stratified (see strata argument to \code{boot::boot}). if argument 35 | ##' is not used then standard sampling with replacement is used to generate the bootstrap data 36 | ##' @param ... Additional arguments passed into the Cox model Note the subset and na.action arguments should not be used 37 | ##' (na.fail will be used when fitting the Cox model) 38 | ##' @param parallel The type of parallel operation to be used (if any). 39 | ##' @param ncpus integer: number of processes to be used in parallel operation: typically one would chose this to be 40 | ##' the number of available CPUs 41 | ##' @param cl An optional parallel or snow cluster for use if \code{parallel="snow"}. If not supplied, a 42 | ##' cluster on the local machine is created for the duration of the call. 43 | ##' @return A \code{ScoreImputedSet} object 44 | ##' @seealso \code{\link{ScoreImputedSet.object}} 45 | ##' @details Note that coxph may fail to converge and the following output 46 | ##' Warning in fitter(X, Y, strats, offset, init, control, weights = weights, : 47 | ##' Ran out of iterations and did not converge 48 | ##' 49 | ##' It is possible to use ridge regression by including a ridge term in the model formula 50 | ##' (e.g. \code{~Z1+ridge(Z2,theta=1)}). See \code{\link[survival]{ridge}} for further details 51 | ##' @examples 52 | ##' 53 | ##' data(ScoreInd) 54 | ##' 55 | ##' col.control <- col.headings(has.event="event", time="time", 56 | ##' Id="Id",arm="arm", 57 | ##' DCO.time="DCO.time", 58 | ##' to.impute="to.impute") 59 | ##' 60 | ##' \dontrun{ 61 | ##' ans <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 62 | ##' col.control=col.control, m=5, 63 | ##' bootstrap.strata=ScoreInd$arm, 64 | ##' NN.control=NN.options(NN=5,w.censoring = 0.2)) 65 | ##' } 66 | ##' 67 | ##' @export 68 | ScoreImpute <- function(data,event.model,censor.model=event.model, 69 | col.control, NN.control=NN.options(),time.dep=NULL,m, 70 | bootstrap.strata=rep(1,nrow(data)),..., 71 | parallel = c("no", "multicore", "snow")[1], ncpus = 1L, cl = NULL){ 72 | 73 | #validate arguments 74 | validate.Score.Arguments(data,col.control,NN.control,time.dep,match.call(expand.dots = TRUE),m) 75 | validate.parallel.arguments(parallel,ncpus,cl) 76 | 77 | #setup censor model event type if not given 78 | if(is.null(col.control$censor.type)){ 79 | col.control$censor.type <- "using_has.event_col" 80 | data[,col.control$censor.type] <- 1-data[,col.control$has.event] 81 | } 82 | 83 | #indices for bootstrap sample 84 | boot.indices <- boot::boot(data=data,R=m,statistic=function(data,indices){return(indices)},strata=bootstrap.strata) 85 | boot.indices <- lapply(seq_len(m),function(x){boot.indices$t[x,]}) 86 | 87 | #perform imputation 88 | if(parallel=="no"){ 89 | imputes <- lapply(boot.indices,Sfn,data,event.model, 90 | censor.model,col.control, 91 | NN.control,time.dep,...) 92 | } 93 | else{ 94 | imputes <- parallelRun(parallel,ncpus,cl,lapply.list=boot.indices, 95 | FUN=Sfn,data,event.model,censor.model,col.control, 96 | NN.control,time.dep,...) 97 | } 98 | 99 | #remove censor.type column if created earlier 100 | if(col.control$censor.type=="using_has.event_col"){ 101 | data[,col.control$censor.type] <- NULL 102 | } 103 | 104 | #package up result 105 | .createImputedSet(m,col.control,data,imputes,"Score",formula(paste("~",col.control$arm))) 106 | } 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /tests/testthat/test-scoreStat.R: -------------------------------------------------------------------------------- 1 | context("ScoreStat") 2 | 3 | mockImputed <- function(){ 4 | data(ScoreInd) 5 | 6 | df <- ScoreInd 7 | 8 | df$impute.event <- df$event 9 | df$impute.time <- df$time 10 | 11 | df$my.arm <- df$arm 12 | df$arm <- NULL 13 | 14 | col.control <- list(has.event="event", 15 | time="time", 16 | Id="Id", 17 | arm="my.arm", 18 | DCO.time="DCO.time", 19 | to.impute="to.impute") 20 | 21 | retVal <- list(data=df, 22 | col.control=col.control, 23 | default.formula=formula("~ my.arm")) 24 | 25 | class(retVal) <- "ScoreImputedData" 26 | retVal 27 | } 28 | 29 | 30 | test_that("creation_invalid",{ 31 | 32 | scoreData <- mockImputed() 33 | expect_error(ImputeStat(scoreData$data,method="logrank")) 34 | expect_error(ImputeStat(scoreData,method="log")) 35 | expect_error(ImputeStat(scoreData,method="exponential")) 36 | expect_error(ImputeStat(scoreData,method="weibull")) 37 | 38 | #No formula apart from stratified for Wilcoxon/logrank 39 | expect_error(ImputeStat(scoreData,method="logrank",formula=~my.arm+Z1)) 40 | expect_error(ImputeStat(scoreData,method="Wilcoxon",formula=~my.arm+Z1)) 41 | expect_error(ImputeStat(scoreData,method="Wilcoxon",formula=~my.arm+strata(Z1)+Z2)) 42 | expect_error(ImputeStat(scoreData,method="Wilcoxon",formula=~strata(Z1)+my.arm)) 43 | 44 | #Invalid Cox formulae 45 | expect_error(ImputeStat(scoreData,method="Cox",formula=~Z1)) 46 | expect_error(ImputeStat(scoreData,method="Cox",formula=~my.arm+Z1*my.arm)) 47 | expect_error(ImputeStat(scoreData,method="Cox",formula=Surv(impute.time,impute.event)~my.arm)) 48 | }) 49 | 50 | test_that("logrank",{ 51 | scoreData <- mockImputed() 52 | 53 | a <- ImputeStat(scoreData,method="logrank",formula=~my.arm) 54 | expect_equal("ScoreStat",class(a)) 55 | expect_equal(c("model","method","estimate","var","statistic"),names(a)) 56 | 57 | expect_equal("logrank (estimator for O-E)",a$method) 58 | 59 | my.mod <- survdiff(Surv(impute.time,impute.event)~my.arm,data=scoreData$data) 60 | 61 | #hack call to then show model is same 62 | my.mod$call <- a$model$call 63 | expect_equal(my.mod,a$model) 64 | 65 | #Z^2 is correct 66 | expect_equal(my.mod$chisq,a$statistic^2) 67 | 68 | expect_equal(a$estimate/sqrt(a$var),a$statistic ) 69 | expect_equal(a$var,my.mod$var[2,2]) 70 | expect_equal(a$estimate,my.mod$obs[2]-my.mod$exp[2]) 71 | 72 | #default test is logrank and default formula is ~my.arm 73 | a1 <- ImputeStat(scoreData) 74 | expect_equal(a,a1) 75 | 76 | }) 77 | 78 | test_that("asvector",{ 79 | 80 | #mock object 81 | a <- list(model=NULL, 82 | estimate=6, 83 | var=9, 84 | statistic=2, 85 | test=NULL) 86 | 87 | class(a) <- "ScoreStat" 88 | ans <- as.vector(a) 89 | 90 | expects <- c(6,9,2) 91 | names(expects) <- c("estimate","var","statistic") 92 | 93 | expect_equal(expects,ans) 94 | 95 | }) 96 | 97 | test_that("Wilcoxon",{ 98 | scoreData <- mockImputed() 99 | 100 | #remove time and event cols as they should not be being used 101 | scoreData$data$time <- NULL 102 | scoreData$data$event <- NULL 103 | 104 | a <- ImputeStat(scoreData,method="Wilcoxon") 105 | expect_equal("ScoreStat",class(a)) 106 | expect_equal(c("model","method","estimate","var","statistic"),names(a)) 107 | 108 | expect_equal("Wilcoxon (estimator for O-E)",a$method) 109 | 110 | my.mod <- survdiff(Surv(impute.time,impute.event)~my.arm,data=scoreData$data,rho = 1) 111 | 112 | #hack call to then show model is same 113 | my.mod$call <- a$model$call 114 | expect_equal(my.mod,a$model) 115 | 116 | #Z^2 is correct 117 | expect_equal(my.mod$chisq,a$statistic^2) 118 | 119 | expect_equal(a$estimate/sqrt(a$var),a$statistic ) 120 | expect_equal(a$var,my.mod$var[2,2]) 121 | expect_equal(a$estimate,my.mod$obs[2]-my.mod$exp[2]) 122 | }) 123 | 124 | samePH <- function(x,y){ 125 | expect_equal(x$coefficients,y$coefficients) 126 | expect_equal(x$var,y$var) 127 | expect_equal(x$residuals,y$residuals) 128 | } 129 | 130 | test_that("Cox_defaultformula",{ 131 | scoreData <- mockImputed() 132 | a <- ImputeStat(scoreData,method="Cox",ties="breslow") 133 | expect_equal("ScoreStat",class(a)) 134 | 135 | expect_equal("Cox",a$method) 136 | 137 | my.mod <- coxph(Surv(impute.time,impute.event)~my.arm,data=scoreData$data,ties="breslow") 138 | 139 | expect_equal("breslow",a$model$method) 140 | 141 | samePH(my.mod,a$model) 142 | expect_equal(a$statistic,a$estimate/sqrt(a$var)) 143 | expect_equal(a$estimate,my.mod$coefficients["my.arm1"]) 144 | expect_true(abs(a$var-my.mod$var[1,1])<1e-12) 145 | }) 146 | 147 | 148 | test_that("Cox_usersuppliedformula",{ 149 | scoreData <- mockImputed() 150 | a <- ImputeStat(scoreData,method="Cox",formula=~my.arm+Z1+Z4) 151 | expect_equal("ScoreStat",class(a)) 152 | 153 | expect_equal("Cox",a$method) 154 | 155 | my.mod <- coxph(Surv(impute.time,impute.event)~my.arm+Z1+Z4,data=scoreData$data) 156 | expect_equal("efron",a$model$method) 157 | 158 | samePH(my.mod,a$model) 159 | expect_equal(a$statistic,a$estimate/sqrt(a$var)) 160 | expect_equal(a$estimate,my.mod$coefficients["my.arm1"]) 161 | expect_true(abs(a$var-my.mod$var[1,1])<1e-12) 162 | }) 163 | 164 | 165 | test_that("Logrank_Wilcoxon_usersupplied_formula",{ 166 | scoreData <- mockImputed() 167 | a <- ImputeStat(scoreData,method="logrank",formula=~my.arm+strata(Z1)) 168 | expect_equal(a$statistic,sum(a$model$obs[2,]-a$model$exp[2,])/sqrt(a$model$var[2,2])) 169 | 170 | scoreData <- mockImputed() 171 | a <- ImputeStat(scoreData,method="logrank",formula=~my.arm+strata(Z1,Z3)) 172 | a2 <- ImputeStat(scoreData,method="logrank",formula=~my.arm+strata(Z3) + strata(Z1)) 173 | expect_equal(as.vector(a),as.vector(a2)) 174 | 175 | }) 176 | -------------------------------------------------------------------------------- /R/generics.R: -------------------------------------------------------------------------------- 1 | #This file contains the functions which are common to both imputation 2 | #methods - these are mainly S3 generics and their internals 3 | 4 | ##' @import survival 5 | NULL 6 | 7 | ##' @importFrom boot boot 8 | NULL 9 | 10 | ##' @importFrom dplyr inner_join 11 | NULL 12 | 13 | .internal.is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) abs(x - round(x)) < tol 14 | 15 | .internal.is.finite.number <- function(x){ 16 | if(!is.numeric(x) || is.na(x) || length(x)>1 || is.infinite(x)){ 17 | return(FALSE) 18 | } 19 | return(TRUE) 20 | } 21 | 22 | ##' Extract a single risk score/gamma imputed data set/model fit 23 | ##' @param x The multiple imputed object 24 | ##' @param index Integer, which imputed data set/model fit should be returned 25 | ##' @return The individual data set/model fit 26 | ##' @export 27 | ExtractSingle <- function(x,index){ 28 | UseMethod("ExtractSingle") 29 | } 30 | 31 | ##' @export 32 | ExtractSingle.default <- function(x,index){ 33 | stop("No method ExtractImputedData for this object") 34 | } 35 | 36 | #performs the ExtractSingle generic for various x 37 | #@param x the S3 object for which ExtractSingle was used 38 | #@param index - which individual object should be output 39 | #@param fit - logical, if TRUE outputting a *Stat object, if not 40 | #@ a *ImputedData object 41 | .internalExtract <- function(x,index,fit){ 42 | 43 | if(!.internal.is.finite.number(index) || !.internal.is.wholenumber(index) || index <= 0 || index > x$m){ 44 | stop("invalid index") 45 | } 46 | 47 | if(fit){ 48 | return(x$fits[[index]]) 49 | } 50 | 51 | retVal <- list(data=x$data, 52 | col.control=x$col.control, 53 | default.formula=x$default.formula) 54 | 55 | retVal$data$impute.time <- x$impute.time[,index] 56 | retVal$data$impute.event <- x$impute.event[,index] 57 | retVal 58 | } 59 | 60 | ##' S3 generic to fit model(s) to risk score/gamma Imputed objects 61 | ##' 62 | ##' @param object A \code{ScoreImputedData}, \code{ScoreImputedSet}, \code{GammaImputedData} or \code{GammaImputedSet} object 63 | ##' to fit the model to 64 | ##' @param method The type of statistical model to fit. There are three methods which can be performed when using 65 | ##' Risk Score imputation \cr 66 | ##' "logrank": a logrank test using \code{survival::survdiff} \cr 67 | ##' "Wilcoxon": Peto & Peto modification of the Gehan-Wilcoxon test using \code{survival::survdiff} 68 | ##' with \code{rho=1} \cr 69 | ##' "Cox": Fit a cox model using \code{survival::coxph} \cr 70 | ##' 71 | ##' For gamma imputation the model can be "Cox" (using \code{survival::coxph}), 72 | ##' "weibull" or "exponential" both using \code{survival::coxph} 73 | ##' 74 | ##' 75 | ##' @param formula The model formula to fit. 76 | ##' If no formula argument is used, then object$default.formula will be used. 77 | ##' For risk score imputation this is \code{~ treatment.group} and for gamma imputation 78 | ##' this is the formula used when fitting the Cox model 79 | ##' 80 | ##' For \code{method="Cox"}, additional covariates can be included by explictily giving a 81 | ##' formula argument. For logrank/Wilcoxon only additional strata terms can be 82 | ##' included. 83 | ##' 84 | ##' In all cases only the right hand side of the formula is required 85 | ##' The survival object on the left hand side is created automatically 86 | ##' E.g. for a Cox model could use formula=~arm + covar1. The cluster and tt options cannot be used 87 | ##' See the vignettes for further details 88 | ##' @param ... Additional arguments which are passed into the model fit function 89 | ##' @seealso \code{\link{ScoreStat.object}} \code{\link{ScoreImputedData.object}} 90 | ##' @export 91 | ImputeStat <- function(object,method=c("logrank","Wilcoxon","Cox","weibull","exponential")[1],formula,...){ 92 | UseMethod("ImputeStat") 93 | } 94 | 95 | ##' @export 96 | ImputeStat.default <- function(object,method=c("logrank","Wilcoxon","Cox","weibull","exponential")[1],formula,...){ 97 | stop("No method ImputeStat for this object") 98 | } 99 | 100 | #Internal function for fitting the imputed data sets 101 | #see ImputeStat function documentation 102 | .imputeStat.internal <- function(object,method,formula,...){ 103 | formula <- .getFormula(if(is.null(formula)) object$default.formula else formula, 104 | object$col.control$arm,method) 105 | switch(method, 106 | logrank=.Testlogrank , 107 | Wilcoxon=.Testwilcoxon , 108 | Cox=.Testcox, 109 | exponential=.TestExponential, 110 | weibull=.TestWeibull, 111 | function(object,formula,...){stop("Unknown test type")})(object,formula,...) 112 | } 113 | 114 | 115 | #extract the imputed data and perform the fit 116 | .internalImputeStatset <- function(object,method,formula,...,parallel,ncpus,cl){ 117 | validate.parallel.arguments(parallel,ncpus,cl) 118 | 119 | extract_stat <- function(x,method,formula){ 120 | ImputeStat(ExtractSingle(object,x),method=method,formula=formula,...)} 121 | 122 | if(parallel=="no"){ 123 | fits <- lapply(seq_len(object$m),extract_stat,method,formula) 124 | } 125 | else{ 126 | fits <- parallelRun(parallel,ncpus,cl,lapply.list=seq_len(object$m), 127 | FUN=extract_stat,method,formula,...) 128 | } 129 | } 130 | 131 | 132 | #constructor function for the Score/Gamma ImputedSet objects 133 | .createImputedSet <- function(m,col.control,data,imputes,classprefix,default.formula){ 134 | retVal <- list(m=m, 135 | col.control=col.control, 136 | default.formula=default.formula, 137 | data=data, 138 | impute.time=vapply(imputes,"[[","impute.time",FUN.VALUE=numeric(nrow(data))), 139 | impute.event=vapply(imputes,"[[","impute.event",FUN.VALUE=numeric(nrow(data)))) 140 | class(retVal) <- paste(classprefix,"ImputedSet",sep="") 141 | retVal 142 | } 143 | 144 | ##' Test Cox proportional hazards assumption 145 | ##' 146 | ##' See cox.zph function in the survival package 147 | ##' @inheritParams survival::cox.zph 148 | ##' @param ... Additional arguments to cox.zph, for example \code{index} if 149 | ##' fit is a \code{GammaStatList} object 150 | ##' @seealso \code{\link[survival]{cox.zph}} 151 | ##' @export 152 | cox.zph <- function(fit,transform="km",global=TRUE,...){ 153 | UseMethod("cox.zph") 154 | } 155 | 156 | ##' @export 157 | cox.zph.default <- function(fit,transform="km",global=TRUE,...){ 158 | survival::cox.zph(fit,transform=transform,global=global) 159 | } 160 | 161 | -------------------------------------------------------------------------------- /R/internalGamma.R: -------------------------------------------------------------------------------- 1 | #The code contains the functions called by gammaImpute 2 | #(not including the validation routines) 3 | 4 | # Main function which performs gamma imputation to generate imputed times 5 | # for a single Imputation 6 | # @inheritParams gammaImpute 7 | # @param indices The indicies of the rows of the data set used to get the bootstrapped data for fitting the Cox model 8 | # @param surv.times, matrix representation of the Surv object of the original event times 9 | # and indicator 10 | # @param DCO.time the column of data cutoff times of the data frame 11 | # @return A data frame with the imputed times and events for this imputed dataset 12 | # (subjects who are not imputed return unchanged values) 13 | # @details This function performs the following steps: 14 | # 1) fit a cox model to the bootstrapped data frame 15 | # 2) calculates the baseline hazards by calling .SetupStrataBaseHaz 16 | # 3) calculates the linear predictors (lp) 17 | # 4) For each subject calculate an imputed time and event indicator (using .CoxMImpute) 18 | # 5) wrap up the event times into a dataframe 19 | .singleGammaImpute <- function(indices,data,surv.times,DCO.time,formula,...){ 20 | 21 | boot.data <- data[indices,] 22 | model <- coxph(formula=formula,data=boot.data,model=TRUE,na.action=na.fail,...) 23 | 24 | stratas <- untangle.specials(model$terms,"strata")$vars 25 | if(length(stratas) > 1){ 26 | stop("Cannot have multiple strata arguments. Use strata(x,y) instead of strata(x)+strata(y)") 27 | } 28 | 29 | basehaz.details <- .SetupStrataBaseHaz(data,formula,stratas,basehaz(model)) 30 | lp <- predict(model, data, type="lp",reference="sample")+data$internal_gamma_val 31 | 32 | #perform imputation for one subject at a time 33 | imputes <- lapply(seq_len(nrow(data)),function(x){ 34 | 35 | my.time <- surv.times[x,1] 36 | 37 | #if have event or not imputing then do not impute 38 | if(surv.times[x,2]==1 || is.na(data[x,"internal_gamma_val"])){ 39 | return(list(event=surv.times[x,2],time=my.time)) 40 | } 41 | 42 | #(basehaz.details$index is a vector of which basehaz.details$base.hazard data frame 43 | #to be used for each subject) 44 | imputed.time <- .CoxMImpute(my.time,lp[x], 45 | basehaz.details$base.hazard[[basehaz.details$index[x]]]) 46 | DCO.time <- data[x,DCO.time] 47 | 48 | return(list(event=(imputed.time < DCO.time), 49 | time=min(DCO.time,imputed.time))) 50 | }) 51 | 52 | data.frame(impute.event=vapply(imputes,"[[","event",FUN.VALUE = numeric(1)), 53 | impute.time=vapply(imputes,"[[","time",FUN.VALUE = numeric(1))) 54 | } 55 | 56 | #function which creates the baseline hazard data frames, one per strata 57 | #and which data frame is required by each data point 58 | # @param data The initial data frame for which imputation is required 59 | # @param formula The formula used for the model fit 60 | # @param stratas The stratas of the model (untangle.specials(terms,"strata")) 61 | # @param base.hazard The output of basehaz(model) 62 | # @return a list with two elements, base.hazard: a list of stratified baseline hazard 63 | # data frames, named by the their strata values and index: A nrow(data) length 64 | # vector where index[i] is the strata value required for subject in row i. If the 65 | # baseline hazard function is not stratified then base.hazard is a list containing one data frame 66 | # named "all" and index is a vector with every element = "all" 67 | .SetupStrataBaseHaz <- function(data,formula,stratas,base.hazard){ 68 | if(!"strata" %in% colnames(base.hazard)){ 69 | return(list(index=rep("all",nrow(data)), 70 | base.hazard=list(all=base.hazard))) 71 | } 72 | 73 | 74 | base.hazard <- split(base.hazard,f=base.hazard$strata) 75 | 76 | #which basehaz data frame should each subject use? 77 | #cannot find a nicer way to do this... 78 | mf <- model.frame(formula = formula,data = data) 79 | 80 | index <- vapply(seq_len(nrow(data)), 81 | function(i) paste(unlist(lapply(stratas, 82 | function(x){as.character(mf[i,x])})),collapse=", "), 83 | FUN.VALUE = character(1)) 84 | 85 | if(any(!index %in% names(base.hazard))){ 86 | stop("Cannot find baseline hazard function for a given strata ", 87 | "check your bootstrap.strata argument as you may be producing invalid ", 88 | "stratified bootstrapped samples") 89 | } 90 | 91 | list(index=index, 92 | base.hazard=base.hazard) 93 | } 94 | 95 | 96 | # Function which performs the imputation for a single 97 | # subject (we already assume a subject is to have time imputed) 98 | # @param my.time The current time of censoring 99 | # @param beta The lp prediction from the model fit including 100 | # the step change gamma (= \hat{beta_j}Z_i+ gamma_i) see equation (4) in paper 101 | # @param basehaz A data frame containing the baseline hazard function (time, hazard, [possibly strata]) 102 | # for the strata of this subject's strata 103 | # @return A proposed imputed event time (higher up this could be replaced 104 | # by censoring at DCO.time) 105 | .CoxMImpute <- function(my.time,beta,basehaz){ 106 | 107 | basehaz$strata <- NULL 108 | basehaz$time <- basehaz$time-my.time 109 | 110 | if(any(basehaz$time<=0)){ 111 | basehaz$hazard <- basehaz$hazard - max(basehaz$hazard[basehaz$time<=0]) 112 | } 113 | 114 | #The 0,0 is inserted because all cumulative hazards pass through the origin. 115 | #This also has the effect of making gamma=Inf give instant failure 116 | basehaz <- rbind(data.frame(time=0,hazard=0), 117 | basehaz[basehaz$time>0,], 118 | data.frame(time=Inf,hazard=Inf)) 119 | 120 | quant <- -log(runif(1))*exp(-beta) 121 | my.time + min(basehaz$time[basehaz$hazard>= quant]) 122 | 123 | } 124 | 125 | #simple input function which calculates subject specific jumps in 126 | #hazard given (already validated arguments) 127 | getGamma <- function(data,gamma,gamma.factor){ 128 | if(is.null(gamma)){ 129 | return(rep(gamma.factor,nrow(data))) 130 | } 131 | if(is.character(gamma)){ 132 | return(data[,gamma]*gamma.factor) 133 | } 134 | gamma*gamma.factor 135 | } 136 | 137 | 138 | #from nlme, output the rhs of a formula as a formula object 139 | getCovarFormula <- function(form){ 140 | if (!(inherits(form, "formula"))) { 141 | stop("invalid formula") 142 | } 143 | form <- form[[length(form)]] 144 | if (length(form) == 3 && form[[1]] == as.name("|")) { 145 | form <- form[[2]] 146 | } 147 | eval(substitute(~form)) 148 | } -------------------------------------------------------------------------------- /tests/testthat/test-internalGamma.R: -------------------------------------------------------------------------------- 1 | context("internalGamma") 2 | 3 | 4 | test_that(".CoxMImpute",{ 5 | #Here we check we get the same result as the example code given by Jackson 6 | 7 | Jackson.snippet <- function(mytime,lp,hazard_boot,U){ 8 | hazard_boot$time=hazard_boot$time-mytime 9 | haz_to_sub=subset(hazard_boot, time<=0)$hazard 10 | hazard_boot=subset(hazard_boot, time>0) 11 | if(length(haz_to_sub)>0) hazard_boot$hazard=hazard_boot$hazard-max(haz_to_sub) 12 | hazard_boot=rbind(c(0,0), hazard_boot, c(Inf, Inf)) 13 | hazard_boot=data.frame(hazard=hazard_boot[,1], time=hazard_boot[,2]) 14 | quant=-log(U)*exp(-lp) 15 | answer=min(subset(hazard_boot, hazard>=quant)$time) 16 | mytime+answer 17 | } 18 | 19 | check.same <- function(my.time,beta,basehaz,seed){ 20 | set.seed(seed) 21 | Jans <- Jackson.snippet(my.time,beta,basehaz,runif(1)) 22 | 23 | set.seed(seed) 24 | myans <- .CoxMImpute(my.time,beta,basehaz) 25 | 26 | expect_equal(Jans,myans) 27 | } 28 | 29 | 30 | basehaz <- data.frame(hazard=c(0.1,0.5,1.0,1.5,2.5,4), 31 | time=c(1,2,3,4,5,6)) 32 | 33 | check.same(my.time=3,beta=0.4,basehaz,seed=10) 34 | check.same(my.time=8,beta=0.8,basehaz,seed=23) 35 | check.same(my.time=5,beta=-0.8,basehaz,seed=123) 36 | check.same(my.time=5.3,beta=1,basehaz,seed=1323) 37 | check.same(my.time=0.5,beta=1,basehaz,seed=1323) 38 | 39 | 40 | }) 41 | 42 | test_that(".SetupStrataBaseHaz.no.strata",{ 43 | 44 | data <- data.frame(id=1:5, 45 | time=1:5, 46 | event=c(0,1,1,0,1), 47 | x=c(1,0,1,1,0), 48 | y=c(1,1,1,0,0)) 49 | 50 | formula <- formula(Surv(time,event)~x) 51 | 52 | basehaz <- data.frame(hazard=c(1,2.2,4.5,8), 53 | time=c(1:4)) 54 | 55 | stratas <- character(0) 56 | ans <- .SetupStrataBaseHaz(data,formula,stratas,basehaz) 57 | 58 | expect_equal(rep("all",5),ans$index) 59 | expect_equal("all",names(ans$base.hazard)) 60 | 61 | }) 62 | 63 | 64 | test_that(".SetupStrataBaseHaz.strata.error",{ 65 | 66 | data <- data.frame(id=1:5, 67 | time=1:5, 68 | event=c(0,1,1,0,1), 69 | x=c(1,0,1,1,0), 70 | y=c(1,1,1,0,0)) 71 | 72 | formula <- formula(Surv(time,event)~strata(x)) 73 | 74 | basehaz <- data.frame(hazard=c(1,2.2,4.5,2,4,7,11), 75 | time=c(1:3,1:4), 76 | strata=c(rep("x=1",3),rep("x=2",4))) 77 | 78 | stratas <- "strata(x)" 79 | 80 | expect_error(.SetupStrataBaseHaz(data,formula,stratas,basehaz)) 81 | 82 | }) 83 | 84 | 85 | test_that(".SetupStrataBaseHaz.one.strata",{ 86 | 87 | data <- data.frame(id=1:5, 88 | time=1:5, 89 | event=c(0,1,1,0,1), 90 | x=c(1,0,1,1,0), 91 | y=c(1,1,1,0,0)) 92 | 93 | formula <- formula(Surv(time,event)~strata(x)) 94 | basehaz <- data.frame(hazard=c(1,2.2,4.5,2,4,7,11), 95 | time=c(1:3,1:4), 96 | strata=c(rep("x=0",3),rep("x=1",4))) 97 | 98 | stratas <- "strata(x)" 99 | 100 | ans <- .SetupStrataBaseHaz(data,formula,stratas,basehaz) 101 | expect_equal(c("x=1","x=0","x=1","x=1","x=0"),ans$index) 102 | expect_equal(c("x=0","x=1"),names(ans$base.hazard)) 103 | 104 | expect_equal(ans$base.hazard$`x=0`,basehaz[basehaz$strata=="x=0",]) 105 | 106 | 107 | #Now redo with x as factor 108 | data$x <- factor(data$x) 109 | basehaz$strata <- c(rep(0,3),rep(1,4)) 110 | ans <- .SetupStrataBaseHaz(data,formula,stratas,basehaz) 111 | expect_equal(c("1","0","1","1","0"),ans$index) 112 | expect_equal(c("0","1"),names(ans$base.hazard)) 113 | 114 | expect_equal(ans$base.hazard$`1`,basehaz[basehaz$strata=="1",]) 115 | 116 | #Now check strata(x*y) works 117 | data$x <- c(1,0,1,1,0) 118 | formula <- formula(Surv(time,event)~strata(x*y)) 119 | stratas <- "strata(x * y)" 120 | basehaz$strata <- c(rep("x * y=0",3),rep("x * y=1",4)) 121 | ans <- .SetupStrataBaseHaz(data,formula,stratas,basehaz) 122 | expect_equal(c("x * y=1","x * y=0","x * y=1","x * y=0","x * y=0"),ans$index) 123 | expect_equal(c("x * y=0","x * y=1"),names(ans$base.hazard)) 124 | }) 125 | 126 | test_that(".SetupStrataBaseHaz.two.strata",{ 127 | data <- data.frame(id=1:5, 128 | time=1:5, 129 | event=c(0,1,1,0,1), 130 | x=c(1,0,1,1,0), 131 | y=c(1,1,1,0,0)) 132 | 133 | data$x <- factor(data$x) 134 | formula <- formula(Surv(time,event)~strata(x)+strata(y)) 135 | stratas <- c("strata(x)","strata(y)") 136 | 137 | basehaz <- data.frame(hazard=c(1,3,2,1), 138 | time=1:4, 139 | strata=c("0, y=0","0, y=1","1, y=0","1, y=1")) 140 | 141 | ans <- .SetupStrataBaseHaz(data,formula,stratas,basehaz) 142 | expect_equal(c("1, y=1","0, y=1","1, y=1","1, y=0","0, y=0"),ans$index) 143 | expect_equal(c("0, y=0","0, y=1","1, y=0","1, y=1"),names(ans$base.hazard)) 144 | expect_equal(ans$base.hazard$`0, y=1`,basehaz[basehaz$strata=="0, y=1",]) 145 | }) 146 | 147 | test_that("untangle.specials.behaves.as.expected",{ 148 | 149 | data <- data.frame(id=1:9, 150 | time=1:9, 151 | event=c(0,1,1,0,1,1,0,1,1), 152 | x=c(1,0,1,1,0,1,1,0,1), 153 | y=c(1,1,1,0,0,1,1,0,1)) 154 | 155 | formula <- formula(Surv(time,event)~x) 156 | m <- coxph(formula,data) 157 | expect_equal(character(0),untangle.specials(m$terms,"strata")$var) 158 | 159 | 160 | formula <- formula(Surv(time,event)~x+strata(y)) 161 | expect_warning(m <- coxph(formula,data)) #not interested in lack of convergence 162 | expect_equal("strata(y)",untangle.specials(m$terms,"strata")$var) 163 | 164 | formula <- formula(Surv(time,event)~x*strata(y)) 165 | expect_warning(m <- coxph(formula,data)) #not interested in lack of convergence 166 | expect_equal("strata(y)",untangle.specials(m$terms,"strata")$var) 167 | 168 | formula <- formula(Surv(time,event)~strata(x)*strata(y)) 169 | suppressWarnings(m <- coxph(formula,data)) #not interested in lack of convergence 170 | expect_equal(c("strata(x)","strata(y)"),untangle.specials(m$terms,"strata")$var) 171 | 172 | formula <- formula(Surv(time,event)~strata(x*y)) 173 | m <- coxph(formula,data) 174 | expect_equal("strata(x * y)",untangle.specials(m$terms,"strata")$var) 175 | 176 | formula <- formula(Surv(time,event)~strata(x, y)) 177 | m <- coxph(formula,data) 178 | expect_equal("strata(x, y)",untangle.specials(m$terms,"strata")$var) 179 | 180 | formula <- formula(Surv(time,event)~strata(y) + strata( x)) 181 | m <- coxph(formula,data) 182 | expect_equal(c("strata(y)","strata(x)"),untangle.specials(m$terms,"strata")$var) 183 | 184 | }) 185 | -------------------------------------------------------------------------------- /tests/testthat/test-internalScore.R: -------------------------------------------------------------------------------- 1 | context("internalScore") 2 | 3 | test_that("getLatestTimeCutoff",{ 4 | 5 | expect_equal(0,.getLatestTimeCutoff(c(9,1.5,7,10,12),5)) 6 | expect_equal(10,.getLatestTimeCutoff(c(9,1.5,7,10,12),1)) 7 | expect_equal(7,.getLatestTimeCutoff(c(9,9,9,11,1.5,7,10,12),4)) 8 | expect_equal(7,.getLatestTimeCutoff(c(9,9,9,11,1.5,7,10,12),5)) 9 | expect_equal(7,.getLatestTimeCutoff(c(9,9,9,11,1.5,7,10,12),6)) 10 | expect_equal(1.5,.getLatestTimeCutoff(c(9,9,9,11,1.5,7,10,12),7)) 11 | }) 12 | 13 | test_that(".kmi_deterministiccases",{ 14 | #empty risk set 15 | df <- data.frame(times=numeric(0),has.event=numeric(0)) 16 | 17 | ans <- .kmi(df,10,20) 18 | expect_equal(list(event=0,time=10),ans) 19 | 20 | #risk set with no events 21 | df <- data.frame(times=c(20,30,10),has.event=c(0,0,0)) 22 | ans <- .kmi(df,5,40) 23 | expect_equal(list(event=0,time=30),ans) 24 | ans <- .kmi(df,5,15) 25 | expect_equal(list(event=0,time=15),ans) 26 | 27 | #risk set with 1 subject with event 28 | df <- data.frame(times=20,has.event=1) 29 | ans <- .kmi(df,5,15) 30 | expect_equal(list(event=0,time=15),ans) 31 | ans <- .kmi(df,5,30) 32 | expect_equal(list(event=1,time=20),ans) 33 | 34 | #risk set with 1 subject with no events 35 | df <- data.frame(times=40,has.event=0) 36 | ans <- .kmi(df,5,15) 37 | expect_equal(list(event=0,time=15),ans) 38 | ans <- .kmi(df,5,50) 39 | expect_equal(list(event=0,time=40),ans) 40 | 41 | #risk set 1 event, last one 42 | df <- data.frame(times=c(40,20,30),has.event=c(1,0,0)) 43 | ans <- .kmi(df,5,15) 44 | expect_equal(list(event=0,time=15),ans) 45 | ans <- .kmi(df,5,50) 46 | expect_equal(list(event=1,time=40),ans) 47 | }) 48 | 49 | 50 | test_that("kmi_stochastic",{ 51 | df <- data.frame(times=c(10,20,30,40,50),has.event=c(1,1,0,1,1)) 52 | 53 | set.seed(10) 54 | #get the 20 random U which will be used below 55 | Us <- runif(n=20) 56 | expect_vals <- vapply(Us,function(x){ 57 | if(x<0.2) return(10) 58 | if(x<0.4) return(20) 59 | if(x<0.7) return(40) 60 | return(50) 61 | },FUN.VALUE = numeric(1)) 62 | 63 | #reset the seed back 64 | set.seed(10) 65 | ans <- vapply(1:20,function(x){.kmi(df,5,60)$time},FUN.VALUE = numeric(1)) 66 | 67 | expect_equal(expect_vals,ans) 68 | 69 | }) 70 | 71 | 72 | test_that("getriskset",{ 73 | 74 | distances <- c(10,5,12,1.5,5,10,7) 75 | times <- c(10,20,30,40,50,60,5) 76 | has.event <- c(1,0,0,1,1,0,1) 77 | 78 | #test if NN is large to include everything 79 | rs <- .getRiskSet(distances,times,my.time=1,NN=10,has.event) 80 | expect_equal(data.frame(times=times,has.event=has.event),rs) 81 | 82 | #Only include subjects who have time strictly > my.time 83 | rs <- .getRiskSet(distances,times,my.time=5,NN=10,has.event) 84 | expect_equal(data.frame(times=c(10,20,30,40,50,60), 85 | has.event=c(1,0,0,1,1,0)),rs) 86 | #Empty data set 87 | rs <- .getRiskSet(distances,times,my.time=60,NN=10,has.event) 88 | expect_equal(data.frame(times=numeric(0), 89 | has.event=numeric(0)),rs) 90 | 91 | #NN are output if possible 92 | rs <- .getRiskSet(distances,times,my.time=5,NN=5,has.event) 93 | expect_equal(data.frame(times=c(10,20,40,50,60), 94 | has.event=c(1,0,1,1,0)),rs) 95 | 96 | #handling ties as described in vignette 97 | rs <- .getRiskSet(distances,times,my.time=5,NN=4,has.event) 98 | expect_equal(data.frame(times=c(10,20,40,50,60), 99 | has.event=c(1,0,1,1,0)),rs) 100 | 101 | #extreme NN (=1) 102 | rs <- .getRiskSet(distances,times,my.time=40,NN=1,has.event) 103 | expect_equal(data.frame(times=c(50), 104 | has.event=c(1)),rs) 105 | }) 106 | 107 | test_that(".fitPHmodel",{ 108 | my.data <- data.frame( 109 | my.time=c(10,20,80,40,50,50,70,90), 110 | my.event=c(1,1,1,1,0,0,0,0), 111 | my.censoring=c(0,0,0,0,1,1,1,1), 112 | c1=c(0,1,1,0,1,1,0,0) 113 | ) 114 | 115 | #error if any NA 116 | my.data$c1[1] <- NA 117 | expect_error(.fitPHmodel(formula(~c1),event=FALSE,my.data,time="my.time",has.event="my.event",has.censoring="my.censoring")) 118 | 119 | my.data$c1[1] <- 0 120 | #return NULL if cannot fit event 121 | expect_true(is.null(.fitPHmodel(formula(~c1),event=FALSE,my.data[1:4,],time="my.time",has.event="my.event",has.censoring="my.censoring"))) 122 | #or censoring 123 | expect_true(is.null(.fitPHmodel(formula(~c1),event=TRUE,my.data[5:8,],time="my.time",has.event="my.event",has.censoring="my.censoring"))) 124 | #but ok in reverse case 125 | expect_false(is.null(.fitPHmodel(formula(~c1),event=TRUE,my.data[1:4,],time="my.time",has.event="my.event",has.censoring="my.censoring"))) 126 | 127 | samePH <- function(x,y){ 128 | expect_equal(x$coefficients,y$coefficients) 129 | expect_equal(x$var,y$var) 130 | expect_equal(x$residuals,y$residuals) 131 | } 132 | 133 | #check model fit works 134 | my.mod <- .fitPHmodel(formula(~c1),event=TRUE,my.data,time="my.time",has.event="my.event",has.censoring="my.censoring") 135 | surv.mod <- coxph(Surv(my.time,my.event)~c1,data=my.data) 136 | samePH(surv.mod,my.mod) 137 | 138 | #and that it works for censored model 139 | my.mod <- .fitPHmodel(formula(~c1),event=FALSE,my.data,time="my.time",has.event="my.event",has.censoring="my.censoring",ties="breslow") 140 | surv.mod <- coxph(Surv(my.time,my.censoring==1)~c1,data=my.data,ties="breslow") 141 | samePH(surv.mod,my.mod) 142 | expect_equal("breslow",my.mod$method) 143 | 144 | #and censored model works with my.censoring including administrative censoring 145 | my.data$my.censoring[8] <- 2 146 | my.mod <- .fitPHmodel(formula(~c1),event=FALSE,my.data,time="my.time",has.event="my.event",has.censoring="my.censoring") 147 | surv.mod <- coxph(Surv(my.time,my.censoring==1)~c1,data=my.data) 148 | samePH(surv.mod,my.mod) 149 | 150 | }) 151 | 152 | #note there will always be at least one row in the raw.scores data frame 153 | test_that("normalize.scores",{ 154 | 155 | my.df <- data.frame(Rs.f=c(1,2,3,4,5), 156 | Rs.c=c(5,5,5,5,5)) 157 | 158 | norm.df <- normalize.scores(my.df) 159 | expect_equal(rep(0,5),norm.df$Rs.c) 160 | expect_equal(((1:5)-2.5)/sqrt(5/3),norm.df$Rs.f) 161 | 162 | norm.df <- normalize.scores(my.df[1,]) 163 | expect_equal(0,norm.df$Rs.f) 164 | expect_equal(0,norm.df$Rs.c) 165 | }) 166 | 167 | test_that(".internalDistances",{ 168 | 169 | my.df <- data.frame(Rs.f=c(1,2,3,4,5), 170 | Rs.c=c(5,5,5,5,5)) 171 | 172 | s <- 1/sqrt(5/3) 173 | 174 | expect_equal(rep(0,4),.internalDistances(my.df,w.censoring = 1)) 175 | expect_equal((4:1)*s,.internalDistances(my.df,w.censoring = 0)) 176 | expect_equal((4:1)*s*sqrt(0.5),.internalDistances(my.df,w.censoring = 0.5)) 177 | 178 | my.df$Rs.c <- my.df$Rs.f 179 | expect_equal((4:1)*s,.internalDistances(my.df,w.censoring = 0.5)) 180 | 181 | my.df$Rs.c <- 5:1 182 | expect_equal((4:1)*s,.internalDistances(my.df,w.censoring = 1)) 183 | 184 | }) 185 | -------------------------------------------------------------------------------- /tests/testthat/test-timedependent.R: -------------------------------------------------------------------------------- 1 | context("timedependent") 2 | 3 | #not testing that data is contiguous or 4 | #that the pannelling is valid - 5 | #these are tested in the test-validation.R 6 | #file 7 | test_that("MakeTimeDepScore_invalid",{ 8 | df <- data.frame(Subject=c(1,1,2,2), 9 | c1=c(10,20,20,30), 10 | my.start=c(0,10,0,10), 11 | my.end=c(10,12,10,15)) 12 | 13 | #invalid Id 14 | expect_error(MakeTimeDepScore(data=df,Id="Id",time.start="my.start",time.end="my.end")) 15 | 16 | #invalid time.start 17 | expect_error(MakeTimeDepScore(data=df,Id="Subject",time.start="c",time.end="my.end")) 18 | 19 | df$Id <- df$my.start 20 | 21 | #cannot have "Id" column and use it for non "Id" column 22 | expect_error(MakeTimeDepScore(data=df,Id="Subject",time.start="my.start",time.end="my.end")) 23 | 24 | df$Id <- df$Subject 25 | df$time.start <- df$my.start 26 | 27 | #cannot have "time.start" column if not used as time.start 28 | expect_error(MakeTimeDepScore(data=df,Id="Id",time.start="my.start",time.end="my.end")) 29 | 30 | #test if not numeric time.start/time.end 31 | df$time.start <- NULL 32 | 33 | df$my.start[1] <- "r" 34 | expect_warning(expect_error(MakeTimeDepScore(data=df,Id="Id",time.start="my.start",time.end="my.end"))) 35 | }) 36 | 37 | test_that("MakeTimeDepScore",{ 38 | df <- data.frame(Subject=c(1,1,2,2), 39 | c1=c(10,20,20,30), 40 | my.start=c(0,10,0,10), 41 | my.end=c(10,12,10,15), 42 | W1=c(0,1,1,0)) 43 | 44 | df$W1 <- factor(df$W1) 45 | 46 | td <- MakeTimeDepScore(data=df,Id="Subject",time.start="my.start",time.end="my.end") 47 | expect_equal(c("ScoreTD","data.frame"),class(td)) 48 | 49 | expect_equal(c("c1","W1","Id","time.start","time.end"),colnames(td)) 50 | expect_equal(factor(c(1,1,2,2)),td$Id) #ID has become a factor if it wasn't already 51 | expect_equal(c(0,10,0,10),td$time.start) 52 | expect_equal(c(10,20,20,30),td$c1) 53 | expect_equal(factor(c(0,1,1,0)),td$W1) 54 | 55 | df <- data.frame(Id=c(1,1,2,2), 56 | c1=c(10,20,20,30), 57 | time.start=c(0,10,0,10), 58 | my.end=c(10,12,10,15), 59 | W1=c(0,1,1,0)) 60 | 61 | df$W1 <- factor(df$W1) 62 | td <- MakeTimeDepScore(data=df,Id="Id",time.start="time.start",time.end="my.end") 63 | 64 | expect_equal(c("Id","c1","time.start","W1","time.end"),colnames(td)) 65 | expect_equal(df$my.end,td$time.end) 66 | expect_equal(df$Id,as.numeric(td$Id)) 67 | 68 | }) 69 | 70 | 71 | test_that(".getTimeDepDataSet_invalid",{ 72 | #validation such as ensuring data and time.dep do not have same 73 | #colnames etc. has already taken place before .getTimeDepDataSet is 74 | #called. my.time should also be valid for the given data set 75 | 76 | df <- data.frame(Id=c(1,1,2,2), 77 | c1=c(10,20,20,30), 78 | time.start=c(0,10,0,10), 79 | my.end=c(10,12,10,15), 80 | W1=c(0,1,1,0)) 81 | 82 | baseline.df <- data.frame(Sub=c(1,2,3), 83 | Z1=c(0,1,1), 84 | time=c(12,15,20), 85 | DCO.time=c(20,25,40), 86 | has.event=c(1,0,0), 87 | arm=c(1,1,1)) 88 | td <- MakeTimeDepScore(data=df,Id="Id",time.start="time.start",time.end="my.end") 89 | 90 | #missing Id 91 | expect_error(.getTimeDepDataSet(baseline.df,td,"Sub","time",my.time=NULL)) 92 | 93 | expect_that(.getTimeDepDataSet(baseline.df[1:2,],td,"Sub","time",my.time=NULL),not(throws_error())) 94 | 95 | #missing someone 96 | expect_error(.getTimeDepDataSet(baseline.df[1:2,],td,"Sub","time",my.time=14)) 97 | 98 | 99 | }) 100 | 101 | test_that(".getTimeDepDataSet_nullmy.time",{ 102 | df <- data.frame(Id=c(1,1,2,2), 103 | c1=c(10,20,20,30), 104 | time.start=c(0,10,0,10), 105 | my.end=c(10,12,10,15), 106 | W1=c(0,1,1,0)) 107 | df$W1 <- factor(df$W1) 108 | 109 | td <- MakeTimeDepScore(data=df,Id="Id",time.start="time.start",time.end="my.end") 110 | 111 | baseline.df <- data.frame(Sub=c(1,2), 112 | Z1=c(0,1), 113 | time=c(12,15), 114 | DCO.time=c(20,25), 115 | has.event=c(1,0), 116 | arm=c(1,1)) 117 | 118 | baseline.df$Sub <- factor(baseline.df$Sub) 119 | ans <- .getTimeDepDataSet(baseline.df,td,"Sub","time",my.time=NULL) 120 | 121 | expect_equal(2,nrow(ans)) 122 | expect_equal(c("Sub","Z1","time","DCO.time", "has.event","arm","c1","W1"),colnames(ans)) 123 | 124 | expect_equal(c(20,30),ans$c1) 125 | expect_equal(factor(c(1,0)),ans$W1) 126 | expect_equal(c(0,1),ans$Z1) 127 | 128 | #now reorder Sub the merge does not sort the data set 129 | baseline.df <- baseline.df[2:1,] 130 | ans <- .getTimeDepDataSet(baseline.df,td,"Sub","time",my.time=NULL) 131 | 132 | expect_equal(factor(2:1),ans$Sub) 133 | 134 | }) 135 | 136 | test_that(".getTimeDepDataSet_my.time",{ 137 | df <- data.frame(Id=c(1,1,2,2,3,3,4,4,4), 138 | c1=c(10,20,20,30,45,10,1,4,3), 139 | time.start=c(0,10,0,10,0,5,0,7,12), 140 | my.end=c(10,12,10,15,5,15,7,12,15), 141 | W1=c(0,1,1,0,1,0,0,1,1)) 142 | 143 | baseline.df <- data.frame(Sub=c(1,2,3,4), 144 | Z1=c(0,1,1,0), 145 | time=c(12,15,15,15), 146 | DCO.time=c(20,25,25,25), 147 | has.event=c(1,0,0,0), 148 | arm=c(1,1,1,1)) 149 | 150 | td <- MakeTimeDepScore(data=df,Id="Id",time.start="time.start",time.end="my.end") 151 | ans <- .getTimeDepDataSet(baseline.df,td,"Sub","time",my.time=4) 152 | 153 | expect_equal(c(10,20,45,1),ans$c1) 154 | expect_equal(c(0,1,1,0),ans$W1) 155 | 156 | #also OK with Id in both columns 157 | baseline.df$Id <- baseline.df$Sub 158 | baseline.df$Sub <- NULL 159 | ans <- .getTimeDepDataSet(baseline.df,td,"Id","time",my.time=12) 160 | expect_equal(1:4,ans$Id) 161 | 162 | expect_equal(8,ncol(ans)) 163 | 164 | expect_equal(c(20,30,10,4),ans$c1) 165 | expect_equal(c(1,0,0,1),ans$W1) 166 | 167 | }) 168 | 169 | test_that("bootstrap_df_OK_with_timedep",{ 170 | df <- data.frame(Id=c(1,1,2,2,3,3,4,4,4), 171 | c1=c(10,20,20,30,45,10,1,4,3), 172 | time.start=c(0,10,0,10,0,5,0,7,12), 173 | my.end=c(10,12,10,15,5,15,7,12,15), 174 | W1=c(0,1,1,0,1,0,0,1,1)) 175 | 176 | baseline.df <- data.frame(Sub=c(1,2,3,4), 177 | Z1=c(0,1,1,0), 178 | time=c(12,15,15,15), 179 | DCO.time=c(20,25,25,25), 180 | has.event=c(1,0,0,0), 181 | arm=c(1,1,1,1)) 182 | 183 | baseline.df <- baseline.df[rep(1:4,2),] 184 | 185 | td <- MakeTimeDepScore(data=df,Id="Id",time.start="time.start",time.end="my.end") 186 | ans <- .getTimeDepDataSet(baseline.df,td,"Sub","time",my.time=4) 187 | 188 | expect_equal(rep(1:4,2),ans$Sub) 189 | expect_equal(c(0,1,1,0,0,1,1,0),ans$Z1) 190 | expect_equal(c(10,20,45,1,10,20,45,1),ans$c1) 191 | expect_equal(c(0,1,1,0,0,1,1,0),ans$W1) 192 | }) 193 | -------------------------------------------------------------------------------- /R/scoreStatSet.R: -------------------------------------------------------------------------------- 1 | #This file contains the functions associated with 2 | #the ScoreStatList object - the statistics from a set of 3 | #analyzed risk score imputed data sets. The summary.ScoreStatList object 4 | #code and object documentation are found here (it 5 | #implements the meth1, meth2 averaging methods in the Hsu and Taylor paper) 6 | 7 | ##' ScoreStatList 8 | ##' 9 | ##' The object containing the results of fitting models to 10 | ##' a \code{ScoreImputedSet} object. 11 | ##' 12 | ##' A \code{summary.ScoreStatList} has been implemented. 13 | ##' 14 | ##' The object contains the following 15 | ##' @slot fits A list of \code{ScoreStat} objects containing the model fits for 16 | ##' the imputed data sets 17 | ##' @slot statistics A \code{ScoreStatSet} object containing the statistics 18 | ##' @slot m The number of model fits 19 | ##' @name ScoreStatList.object 20 | ##' @seealso \code{\link{ScoreStatSet.object}} \code{\link{ScoreStat.object}} 21 | NULL 22 | 23 | ##' @export 24 | summary.ScoreStatList <- function(object,...){ 25 | summary(object$statistics) 26 | } 27 | 28 | ##' @name ExtractSingle 29 | ##' @export 30 | ExtractSingle.ScoreStatList <- function(x,index){ 31 | .internalExtract(x,index,fit=TRUE) 32 | } 33 | 34 | ##' An object which contains the test statistic and estimators for 35 | ##' a set of model fits to imputed data using risk score imputation 36 | ##' 37 | ##' The object is a Mx3 matrix, one row per imputed data set 38 | ##' and columns: estimate (the point estimates), var (their variances) 39 | ##' and Z (the test statistic). M must be > 4 40 | ##' 41 | ##' Note the Z should be ~ standard normal (so we do not use the chi_squared 42 | ##' test statistic see \code{\link{ScoreStat.object}} for further details) 43 | ##' 44 | ##' The summary.ScoreStatSet function will apply the MI averaging procedures 45 | ##' and estimates of the test statistic and p-value 46 | ##' @seealso \code{\link{summary.ScoreStatSet}} 47 | ##' @name ScoreStatSet.object 48 | NULL 49 | 50 | ##' S3 generic to create a \code{ScoreStatSet} object 51 | ##' @param x The object to convert into a \code{ScoreStatSet} object 52 | ##' @return A ScoreStatSet object 53 | ##' @seealso \code{\link{ScoreStatSet.object}} 54 | ##' @export 55 | ScoreStatSet <- function(x){ 56 | UseMethod("ScoreStatSet") 57 | } 58 | 59 | ##' @export 60 | ScoreStatSet.default <- function(x){ 61 | stop("No method ScoreStatSet for this object") 62 | } 63 | 64 | 65 | ##' @export 66 | ScoreStatSet.matrix <- function(x){ 67 | if(ncol(x)!=3){ 68 | stop("Matrix must have 3 columns") 69 | } 70 | 71 | if(any(x[,2]<0)){ 72 | stop("The second column of the matrix is a column of variances, they must be positive") 73 | } 74 | 75 | if(nrow(x)<5){ 76 | stop("Results for at least five data sets are required for the MI averaging calculation") 77 | } 78 | 79 | if(any(x[,1]/sqrt(x[,2])!= x[,3])){ 80 | stop("The Z statistic [third col] should be the estimate [first col]/ sqrt(var [2nd col] )") 81 | } 82 | 83 | colnames(x) <- c("estimate","var","Z") 84 | class(x) <- "ScoreStatSet" 85 | x 86 | } 87 | 88 | 89 | ##' Summary object of \code{ScoreStatSet} object 90 | ##' 91 | ##' This object contains the multiple imputed 92 | ##' averages/p-values of a set of estimates from 93 | ##' risk score imputed data sets. 94 | ##' 95 | ##' A \code{print.summary.ScoreStatSet} function has been implemented 96 | ##' 97 | ##' This object contains three lists meth1 and meth2 and methRubin 98 | ##' meth1 averages the point estimates to produce an F test statistic, 99 | ##' meth2 averages the test statistics and prodcues a t test statistic 100 | ##' and methRubin follows Rubin's standard rules and is used for calculating 101 | ##' confidence intervals 102 | ##' 103 | ##' See the vignette for further details. 104 | ##' 105 | ##' 106 | ##' meth1, meth2 and methRubin are lists with the following elements: 107 | ##' estimate: average estimator for meth1, NOTE: for meth2 this is the average test statistic, \cr 108 | ##' var: estimate of variance of "estimate" field \cr 109 | ##' test.stat: test statistic \cr 110 | ## df: degrees of freedom of test statistic \cr 111 | ##' distribution: distribution of statistical test (i.e. F or t) \cr 112 | ##' p.value: p-value of test \cr 113 | ##' 114 | ##' 115 | ##' @name summary.ScoreStatSet 116 | ##' @aliases summary.ScoreStatSet.object 117 | NULL 118 | 119 | ##' @export 120 | summary.ScoreStatSet <- function(object,...){ 121 | 122 | retVal <- list() 123 | 124 | #the same names are used as in Hsu's paper 125 | M <- nrow(object) 126 | 127 | theta.bar <- mean(object[,"estimate"]) 128 | U1 <- mean(object[,"var"]) 129 | B1 <- var(object[,"estimate"]) 130 | V1 <- U1 + (1+1/M)*B1 131 | D <- theta.bar^2/V1 132 | t <- M-1 133 | r <- (1+1/M)*B1/U1 134 | v1 <- 4+(t-4)*(1+(1-2/t)/r)^2 135 | 136 | if(M==5 && r==0){ 137 | warning("Cannot calculate degrees of freedom for meth1 when r=0 and M=5") 138 | } 139 | 140 | retVal[["meth1"]] <- list(estimate=theta.bar, 141 | var=V1, 142 | test.stat=D, 143 | df=c(1,v1), 144 | distribution="F", 145 | p.value=1-pf(D,df1=1,df2=v1)) 146 | 147 | Z.bar <- mean(object[,"Z"]) 148 | B2 <- var(object[,"Z"]) 149 | V2 <- 1 + (1+1/M)*B2 150 | v2 <- (M-1)*(1 + (M/(M+1))*(1/B2) )^2 151 | 152 | retVal[["meth2"]] <-list(estimate=Z.bar, 153 | var=V2, 154 | test.stat=Z.bar/sqrt(V2), 155 | df=v2, 156 | distribution="t", 157 | p.value=2*(1-pt(abs(Z.bar/sqrt(V2)),v2))) 158 | 159 | rubin.df <- (M-1)*(1+ U1/((1+1/M)*B1))^2 160 | 161 | retVal[["methRubin"]] <- list(estimate=theta.bar, 162 | var=V1, 163 | test.stat=theta.bar/sqrt(V1), 164 | df=rubin.df, 165 | distribution="t", 166 | p.value=2*(1-pt(abs(theta.bar/sqrt(V1)),rubin.df))) 167 | 168 | class(retVal) <- "summary.ScoreStatSet" 169 | retVal 170 | } 171 | 172 | 173 | ##' @export 174 | print.summary.ScoreStatSet <- function(x,...){ 175 | cat("Summary statistics of Imputed Data Sets\n") 176 | cat("Method 1 (averaging of point estimates):\n") 177 | cat("Estimator:",x$meth1$estimate,"\n") 178 | cat("Var of Estimator:",x$meth1$var,"\n") 179 | cat("Test statistic:",x$meth1$test.stat,"\n") 180 | cat("Distribution:",x$meth1$distribution,"\n") 181 | cat("with ",x$meth1$df[1],", ",x$meth1$df[2]," degrees of freedom\n",sep="") 182 | cat("giving a p-value of",x$meth1$p.value,"\n\n") 183 | 184 | cat("Method 2 (averaging of test statistics):\n") 185 | cat("Test statistic:",x$meth2$test.stat,"\n") 186 | cat("Distribution:",x$meth2$distribution,"\n") 187 | cat("with",x$meth2$df,"degrees of freedom\n") 188 | cat("giving a p-value of",x$meth2$p.value,"\n\n") 189 | } 190 | 191 | ##' @export 192 | confint.summary.ScoreStatSet <- function(object, parm, level = 0.95, ...){ 193 | if(!.internal.is.finite.number(level) || level <= 0 || level >= 1){ 194 | stop("Invalid level argument") 195 | } 196 | 197 | standard.error <- sqrt(object$methRubin$var) 198 | t_val <- qt(1-(1-level)/2,df=object$methRubin$df) 199 | retVal <- object$methRubin$estimate + c(-1,1)*t_val*standard.error 200 | names(retVal) <- c(paste(100*(1-level)/2,"%",sep=""),paste(100*(1 - (1-level)/2) ,"%",sep="")) 201 | retVal 202 | } 203 | -------------------------------------------------------------------------------- /R/validation.R: -------------------------------------------------------------------------------- 1 | #This file contains the code to validate the riskscore imputation arguments and 2 | #the formula arguments and various subfunctions 3 | 4 | #check the formula is valid (specifically there is no left hand side, should not be tt or cluster 5 | #and if arm is not NULL then the 'arm' is the first term on the right hand side and there are no 6 | #interactions between arm and other covariates) 7 | .validRHSFormula <- function(formula,arm=NULL){ 8 | 9 | if(class(formula)!="formula") stop("Invalid formula is not of type formula") 10 | 11 | if(length(.getResponse(formula))!=0){ 12 | stop("formula cannot have any variables on the left hand side.") 13 | } 14 | 15 | tms<-terms(formula,specials=c("strata","cluster","tt")) 16 | 17 | if(length(untangle.specials(tms,"tt")$vars)>0 || 18 | length(untangle.specials(tms,"cluster")$vars)>0){ 19 | stop("Cannot use tt or cluster in formula for gamma/score imputation") 20 | } 21 | 22 | 23 | if(is.null(arm)) return(NULL) 24 | 25 | #validate the right hand side of the formula 26 | k <- attr(terms(formula),"term.labels") 27 | if(length(k)==0){ 28 | stop("Empty formula!") 29 | } 30 | 31 | first_term <- k[[1]] 32 | if(first_term!=arm){ 33 | stop("The first term of the formula must be the treatment group") 34 | } 35 | 36 | if(length(k)>1){ 37 | covariates <- tail(k,-1) 38 | 39 | ans <- unlist(lapply(covariates,function(x){ 40 | first_term %in% unlist(strsplit(x,split=c(":"))) 41 | })) 42 | 43 | if(any(ans)){ 44 | stop(error=paste("Model formula cannot include interactions between",arm, "and covariates.", 45 | "The", arm, "must be the first model variable")) 46 | } 47 | } 48 | 49 | } 50 | 51 | 52 | #from http://stackoverflow.com/questions/13217322/how-to-reliably-get-dependent-variable-name-from-formula-object 53 | .getResponse <- function(formula) { 54 | tt <- terms(formula) 55 | vars <- as.character(attr(tt, "variables"))[-1] ## [1] is the list call 56 | response <- attr(tt, "response") # index of response var 57 | vars[response] 58 | } 59 | 60 | #check that the columns [time.start,time.end] for a single 61 | #subject are ordered correctly, the initial time.start = 0, 62 | #time.end > time.start and there are no gaps (i.e. [0,1], [2,10]) 63 | #@param data a data frame with the time dep. covariates for a 64 | #single subject 65 | checkPanelling <- function(data){ 66 | if(data$time.start[1] != 0){ 67 | stop("The time.start for the first row for subject",data$Id[1],"must be 0") 68 | } 69 | if(nrow(data)>1 && !all.equal(data$time.start[2:nrow(data)],data$time.end[1:(nrow(data)-1)])){ 70 | stop("There cannot be gaps in the [time.start,time.end) intervals, check subject",data$Id[1]) 71 | } 72 | if(any(data$time.end <= data$time.start)){ 73 | stop("The time.end cannot be <= time.start, check subject",data$Id[1]) 74 | } 75 | } 76 | 77 | 78 | # Check duplicated elements of a vector are contiguous 79 | # 80 | # for example c(1,1,1,2,2,4,4,5,5) is contiguous 81 | # but c(1,1,2,2,1,4,5,4,5) is not 82 | # @param vec a vector of values 83 | checkContiguous <- function(vec){ 84 | if(length(vec)<2) return(TRUE) 85 | known.vals <- vec[1] 86 | for(i in 2:length(vec)){ 87 | if(vec[i] != vec[i-1]){ 88 | if(!vec[i] %in% known.vals){ 89 | known.vals <- c(known.vals,vec[i]) 90 | } 91 | else{ 92 | stop(paste("The order of the data frame is incorrect. All rows with the same", 93 | "Subject ID must be contiguous")) 94 | } 95 | } 96 | } 97 | } 98 | 99 | is.valid.call <- function(Call){ 100 | 101 | indx <- match(c("subset", "na.action"), names(Call),nomatch=0L) 102 | if(any(indx[1:2]!=0)){ 103 | stop("Cannot use na.action or subset argument with this imputation ", 104 | "function (na.fail is used when fitting the Cox models)") 105 | } 106 | } 107 | 108 | 109 | .additionalScore.validate <- function(data,col.control,Call){ 110 | 111 | is.valid.call(Call) 112 | 113 | if(any(c("impute.time","impute.event") %in% colnames(data))){ 114 | stop("Cannot use a data frame with columns impute.time or impute.event") 115 | } 116 | 117 | #validate col.control (note this is needed here in case people do not use col.headings function) 118 | .validcoloption(col.control) 119 | 120 | #validate col.control with data 121 | lapply(col.control,function(x){ 122 | if(!is.null(x) && !x %in% colnames(data))stop("Invalid column name '",x,"'not found in data frame")}) 123 | 124 | if(nrow(data)==0){ 125 | stop("Empty data frame!") 126 | } 127 | 128 | if(any(!is.numeric(data[,col.control$time]))){ 129 | stop("time on study must be numeric") 130 | } 131 | 132 | if(any(!is.numeric(data[,col.control$DCO.time]))){ 133 | stop("DCO time must be numeric") 134 | } 135 | 136 | #check time is positive 137 | if(any(data[,col.control$time]<= 0)){ 138 | stop("Time on study must be positive") 139 | } 140 | 141 | #DCO.time is <= time 142 | if(any(data[,col.control$DCO.time] < data[,col.control$time])){ 143 | stop("DCO.time must be >= time for all subjects ") 144 | } 145 | 146 | #unique IDs 147 | if(nrow(data)!=length(unique(data[,col.control$Id]))){ 148 | stop("Subject Ids must be unique") 149 | } 150 | 151 | if(any(!data[,col.control$has.event] %in% c(0,1))){ 152 | stop("Event indicator column can only contain 0s and 1s") 153 | } 154 | 155 | if(any(!is.logical(data[,col.control$to.impute]))){ 156 | stop("To impute column can only contain TRUE and FALSE") 157 | } 158 | 159 | } 160 | 161 | 162 | validate.Score.Arguments <- function(data,col.control,NN.control,time.dep,Call,m){ 163 | 164 | #validation routines additional 165 | .additionalScore.validate(data,col.control,Call=Call) 166 | 167 | #check m 168 | if(!.internal.is.finite.number(m) ||!.internal.is.wholenumber(m) || m < 5){ 169 | stop("m must be at least 5") 170 | } 171 | 172 | #check have event.model 173 | indx <- match(c("event.model"), names(Call),nomatch=0L) 174 | if(indx[1]==0){ 175 | stop("Missing event.model argument") 176 | } 177 | 178 | #event.model and censor.model are validated later 179 | #validate NN.control (note this is needed here in case people do not use NN.options function) 180 | .validNNoption(NN.control) 181 | 182 | #check arm is two level factor 183 | if(!is.factor(data[,col.control$arm])|| length(levels(data[,col.control$arm]))!=2){ 184 | stop("Treatment group must be a two level factor variable") 185 | } 186 | 187 | #censor.type validation 188 | if("using_has.event_col" %in% colnames(data)){ 189 | stop("Cannot use a data frame with column name using_has.event_col") 190 | } 191 | 192 | if(!is.null(col.control$censor.type)){ 193 | if(any(!data[,col.control$censor.type] %in% 0:2)){ 194 | stop("Censor type must be 0, 1 or 2") 195 | } 196 | if(any(data[data[,col.control$has.event]==1,col.control$censor.type]!=0)){ 197 | stop("Censor type for subjects who have event must=0") 198 | } 199 | if(any(data[data[,col.control$has.event]==0,col.control$censor.type]==0)){ 200 | stop("Censor type for subjects who do not have event cannot be=0") 201 | } 202 | } 203 | 204 | 205 | if(!is.null(time.dep)){ 206 | #timedep class 207 | if(!"ScoreTD" %in% class(time.dep)){ 208 | stop("time.dep must be of type ScoreTD") 209 | } 210 | 211 | ans <- intersect(colnames(time.dep),colnames(data)) 212 | if(length(ans)>1 || (length(ans)==1 && ans[1] != "Id") ){ 213 | stop("Cannot have columns with the same name in data and time.dep data frames") 214 | } 215 | 216 | } 217 | 218 | } 219 | 220 | -------------------------------------------------------------------------------- /tests/testthat/test-gammaStat.R: -------------------------------------------------------------------------------- 1 | context("gammaStat") 2 | 3 | test_that("GammaStat",{ 4 | load("gamma_test.rda") 5 | 6 | set.seed(10) 7 | imputed <- gammaImpute(formula=Surv(Yi,delta)~Z+W1, 8 | gamma.dataset,m=2,DCO.time="DCO.time", 9 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 10 | gamma.factor=1,gamma="gamma") 11 | 12 | df <- ExtractSingle(imputed,1) 13 | 14 | expect_error(ImputeStat(df,method="Wilcoxon")) 15 | expect_error(ImputeStat(df,method="logrank")) 16 | expect_that(ImputeStat(df,method="Cox"),not(throws_error())) #no error uses default formula 17 | 18 | expect_error(ImputeStat(df,method="Cox",formula=~cluster(W1))) 19 | expect_error(ImputeStat(df,method="Cox",formula=~Z+tt(W1))) 20 | expect_error(ImputeStat(df,method="Cox",formula=Surv(impute.time,impute.event)~Z)) 21 | 22 | 23 | ans <- ImputeStat(df,method="Cox",formula=~Z+W1) 24 | expect_equal("GammaStat",class(ans)) 25 | expect_equal("Cox",ans$method) 26 | 27 | mod <- coxph(Surv(impute.time,impute.event)~Z+W1,data=df$data,model=TRUE) 28 | 29 | #change Call so that mod should equal ans$model 30 | mod$call <- ans$model$call 31 | expect_equal(mod,ans$model) 32 | 33 | vars <- ans$var 34 | expect_equal(c("Z1","Z2","W1"),names(vars)) 35 | names(vars) <- NULL 36 | expect_true(abs(vcov(mod)[1,1]-vars[1])<1e-8) 37 | expect_true(abs(vcov(mod)[3,3]-vars[3])<1e-8) 38 | 39 | expect_equal(coefficients(mod),ans$estimate) 40 | 41 | }) 42 | 43 | 44 | test_that("weibull_and_exponential",{ 45 | load("gamma_test.rda") 46 | 47 | set.seed(10) 48 | imputed <- gammaImpute(formula=Surv(Yi,delta)~Z+W1, 49 | gamma.dataset,m=2,gamma="gamma", 50 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 51 | gamma.factor=1,DCO.time="DCO.time") 52 | 53 | df <- ExtractSingle(imputed,1) 54 | ans <- ImputeStat(df,method="weibull") 55 | 56 | expect_equal("weibull",ans$method) 57 | mod <- survreg(Surv(impute.time,impute.event)~Z+W1,data=df$data,dist="weibull") 58 | expect_equal(mod$coefficients,ans$estimate) 59 | 60 | #Do not pull out scales at least for now 61 | expect_equal(ans$var,diag(vcov(mod))[1:4]) 62 | 63 | #Test strata 64 | ans <- ImputeStat(df,method="weibull",formula=~Z+strata(W1)) 65 | mod <- survreg(Surv(impute.time,impute.event)~Z+strata(W1),data=df$data,dist="weibull") 66 | expect_equal(mod$coefficients,ans$estimate) 67 | expect_equal(ans$var,diag(vcov(mod))[1:3]) 68 | 69 | #Test exponential 70 | ans <- ImputeStat(df,method="exponential") 71 | expect_equal("exponential",ans$method) 72 | mod <- survreg(Surv(impute.time,impute.event)~Z+W1,data=df$data,dist="exponential") 73 | expect_equal(mod$coefficients,ans$estimate) 74 | expect_equal(ans$var,diag(vcov(mod))) 75 | }) 76 | 77 | 78 | test_that("default_formula_behaves_as_expected",{ 79 | load("gamma_test.rda") 80 | 81 | set.seed(10) 82 | imputed <- gammaImpute(formula=Surv(Yi,delta)~Z, 83 | gamma.dataset,m=2,gamma="gamma", 84 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 85 | gamma.factor=1,DCO.time="DCO.time") 86 | 87 | df <- ExtractSingle(imputed,1) 88 | ans <- ImputeStat(df,method="Cox",formula=~Z) 89 | 90 | expect_equal(formula("Surv(impute.time,impute.event)~Z"),ans$model$formula) 91 | 92 | ans2 <- ImputeStat(df,method="Cox",formula=~Z+W1) 93 | expect_equal(formula("Surv(impute.time,impute.event)~Z+W1"),ans2$model$formula) 94 | }) 95 | 96 | 97 | test_that("GammaStatSet",{ 98 | load("gamma_test.rda") 99 | set.seed(10) 100 | imputed <- gammaImpute(formula=Surv(Yi,delta)~Z+W1, 101 | gamma.dataset,m=2,gamma="gamma", 102 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 103 | gamma.factor=1,DCO.time="DCO.time") 104 | 105 | ans <- ImputeStat(imputed,method="Cox",formula=~W1+strata(Z)) 106 | 107 | expect_equal("GammaStatList",class(ans)) 108 | expect_equal(2,ans$m) 109 | 110 | df <- ExtractSingle(imputed,1) 111 | expect_equal(ExtractSingle(ans,1),ImputeStat(df,method="Cox",formula=~W1+strata(Z))) 112 | 113 | expect_equal(2,length(ans$fits)) 114 | expect_equal(c("estimates","vars"),names(ans$statistics)) 115 | expect_equal("matrix",class(ans$statistics$estimates)) 116 | expect_equal("matrix",class(ans$statistics$vars)) 117 | expect_equal(2,nrow(ans$statistics$estimates)) 118 | expect_equal(1,ncol(ans$statistics$vars)) 119 | expect_equal("W1",colnames(ans$statistics$vars)) 120 | 121 | expect_equal(ans$fits[[1]]$estimate,ans$statistics$estimates[1,1]) 122 | expect_equal(ans$fits[[2]]$estimate,ans$statistics$estimates[2,1]) 123 | expect_equal(ans$fits[[1]]$var,ans$statistics$vars[1,1]) 124 | expect_equal(ans$fits[[2]]$var,ans$statistics$vars[2,1]) 125 | 126 | }) 127 | 128 | test_that("GammaStatSet_multiplecovar",{ 129 | load("gamma_test.rda") 130 | set.seed(10) 131 | imputed <- gammaImpute(formula=Surv(Yi,delta)~Z+W1, 132 | gamma.dataset,m=3,gamma="gamma", 133 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 134 | gamma.factor=1,DCO.time="DCO.time") 135 | 136 | 137 | ans <- ImputeStat(imputed,method="Cox",formula=~Z+strata(W1)) 138 | 139 | expect_equal(3,length(ans$fits)) 140 | expect_equal("matrix",class(ans$statistics$estimates)) 141 | expect_equal("matrix",class(ans$statistics$vars)) 142 | expect_equal(3,nrow(ans$statistics$estimates)) 143 | expect_equal(2,ncol(ans$statistics$vars)) 144 | expect_equal(c("Z1","Z2"),colnames(ans$statistics$vars)) 145 | 146 | expect_equal(ans$fits[[1]]$estimate[1],ans$statistics$estimates[1,1]) 147 | expect_equal(ans$fits[[2]]$estimate[1],ans$statistics$estimates[2,1]) 148 | expect_equal(ans$fits[[3]]$estimate[2],ans$statistics$estimates[3,2]) 149 | expect_equal(ans$fits[[1]]$var[2],ans$statistics$vars[1,2]) 150 | expect_equal(ans$fits[[2]]$var[1],ans$statistics$vars[2,1]) 151 | expect_equal(ans$fits[[3]]$var[2],ans$statistics$vars[3,2]) 152 | 153 | }) 154 | 155 | test_that("summary_1_covar",{ 156 | #A mock GammaStatList object 157 | 158 | estimates <- matrix(c(2,4),ncol=1) 159 | vars <- matrix(c(0.5,0.8),ncol=1) 160 | 161 | colnames(estimates) <- "W1" 162 | colnames(vars) <- "W1" 163 | 164 | 165 | fits <- list(m=2, 166 | statistics=list(estimates=estimates, 167 | vars=vars)) 168 | class(fits) <- "GammaStatList" 169 | 170 | ans <- summary(fits) 171 | expect_equal("matrix",class(ans)) 172 | 173 | expect_equal("W1",rownames(ans)) 174 | expect_equal(c("est","se","t","df","Pr(>|t|)","lo 95","hi 95"),colnames(ans)) 175 | 176 | expect_equal(3,ans[1,1]) 177 | expect_equal(sqrt(3.65),ans[1,2]) 178 | expect_equal(3/sqrt(3.65),ans[1,3]) 179 | expect_equal((1+0.65/3)^2,ans[1,4]) 180 | expect_equal(2*(1-pt(3/sqrt(3.65),df=(1+0.65/3)^2)),ans[1,5]) 181 | tval <- qt(0.975,df=(1+0.65/3)^2) 182 | expect_equal(3-tval*sqrt(3.65),ans[1,6]) 183 | expect_equal(3+tval*sqrt(3.65),ans[1,7]) 184 | 185 | }) 186 | 187 | test_that("summary_3_covar",{ 188 | 189 | #A mock GammaStatList object 190 | 191 | estimates <- matrix(1:9,ncol=3) 192 | vars <- matrix(seq(0.1,0.9,0.1),ncol=3) 193 | 194 | colnames(estimates) <- c("W1","R4","Q") 195 | colnames(vars) <- c("W1","R4","Q") 196 | fits <- list(m=3, 197 | statistics=list(estimates=estimates, 198 | vars=vars)) 199 | class(fits) <- "GammaStatList" 200 | 201 | ans <- summary(fits) 202 | expect_equal(c("W1","R4","Q"),rownames(ans)) 203 | expect_equal(5,ans[2,1]) 204 | expect_equal(sqrt(0.8+4/3),ans[3,2]) 205 | expect_equal(2/sqrt(0.2+4/3),ans[1,3]) 206 | expect_equal(2*(1+0.5*3/4)^2,ans[2,4]) 207 | }) -------------------------------------------------------------------------------- /R/gammaImputeData.R: -------------------------------------------------------------------------------- 1 | #This file contains the code and object documentation associated with 2 | #generating the imputed data sets usign gamma imputation see validationGamma.R 3 | #for validation routines, and internalGamma.R for subfunctions 4 | 5 | ##' \code{GammaImputedData} object 6 | ##' 7 | ##' An object which contains 8 | ##' 9 | ##' @slot data A data frame containing the time to event data 10 | ##' with 3 new columns impute.time and impute.event, the imputed event/censoring times and event indicators 11 | ##' (for subjects whose data is not imputed these columns contain the unchanged event/censoring time and 12 | ##' event indicator) and internal_gamma_val which is the value of gamma used for each subject in this data set 13 | ##' @slot default.formula The default model formula which will be used when fitting the imputed data 14 | ##' @name GammaImputedData.object 15 | NULL 16 | 17 | 18 | ##' \code{GammaImputedSet} object 19 | ##' 20 | ##' An object which contains the set of gamma imputed data frames. 21 | ##' Use the \code{ExtractSingle} function to extract a single 22 | ##' \code{GammaImputedData} objects. Use the ImputeStat function to fit models 23 | ##' to the entire set of imputed data frames 24 | ##' 25 | ##' It contains the following: 26 | ##' @slot data A data frame containing the unimputed time to event data (along with a column internal_gamma_val 27 | ##' which is the value of gamma used for each subject in this data set) 28 | ##' @slot m The number of imputed data sets 29 | ##' @slot gamma.factor The value of gamma.factor used with the imputation 30 | ##' @slot impute.time A matrix (1 column per imputed data set) containing the imputed times 31 | ##' @slot impute.event A matrix (1 column per imputed data set) containing the imputed event indicators 32 | ##' @slot default.formula The default model formula which will be used when fitting the imputed data 33 | ##' @name GammaImputedSet.object 34 | ##' @seealso \code{\link{GammaImputedData.object}} 35 | NULL 36 | 37 | ##' Perform gamma-Imputation for a given data set 38 | ##' 39 | ##' This function performs the Imputation described in 40 | ##' Relaxing the independent censoring assumptions in the Cox proportional 41 | ##' hazards model using multiple imputation. (2014) D. Jackson et al. Statist. Med. (33) 42 | ##' 4681-4694 43 | ##' 44 | ##' @details See the Gamma Imputation vignette for further details 45 | ##' 46 | ##' @param formula The model formula to be used when fitting the models to calculate 47 | ##' the cumulative hazard. A formula for coxph can include strata terms but not 48 | ##' cluster or tt and only right-censored \code{Surv} objects can be used. 49 | ##' Note the function does not allow multiple strata to be written as \code{strata(W1)+strata(W2)}, 50 | ##' use \code{strata(W1,W2)} instead 51 | ##' @param data A time to event data set for which event times are to be imputed 52 | ##' @param m The number of imputations to be created 53 | ##' @param gamma Either column name containing the value of gamma or a vector of values giving the subject specific 54 | ##' size of the step change in the log hazard at censoring. If a subject has NA in this column then no imputation is performed 55 | ##' for this subject (i.e. the subject's censored time remains unchanged after imputation). If a subject has already had an 56 | ##' event then the value of gamma is ignored. If \code{gamma.factor} is also used then the subject specific gamma 57 | ##' are all multipled by \code{gamma.factor}. At least one of \code{gamma} and \code{gamma.factor} must be included. 58 | ##' @param gamma.factor If used, a single numeric value. If no \code{gamma} then the step change in log 59 | ##' hazard for all subjects at censoring is given by \code{gamma.factor}. If \code{gamma} is used 60 | ##' then for each subject, the step change in log hazard is given by \code{gamma.factor} multiplied by the subject specific gamma. 61 | ##' At least one of \code{gamma} and \code{gamma.factor} must be included. 62 | ##' @param bootstrap.strata The strata argument for stratified bootstrap sampling, see argument \code{strata} 63 | ##' for the function \code{boot::boot} for further details. If argument is not used then standard sampling with 64 | ##' replacement will be used 65 | ##' @param DCO.time Either column name containing the subject's data cutoff time or a vector 66 | ##' of DCO.times for the subjects or a single number to be used as the DCO.time for all subjects 67 | ##' (if imputed events are > this DCO.time then subjects are censored at DCO.time in imputed data sets) 68 | ##' @param ... Additional parameters to be passed into the model fit function 69 | ##' @param parallel The type of parallel operation to be used (if any). 70 | ##' @param ncpus integer: number of processes to be used in parallel operation: typically one would chose this to be 71 | ##' the number of available CPUs 72 | ##' @param cl An optional parallel or snow cluster for use if \code{parallel="snow"}. If not supplied, a 73 | ##' cluster on the local machine is created for the duration of the call. 74 | ##' @return A \code{GammaImputedSet.object} containing the imputed data sets 75 | ##' 76 | ##' @seealso \code{\link{GammaImputedSet.object}} \code{\link{GammaImputedData.object}} 77 | ##' @examples 78 | ##' 79 | ##' \dontrun{ 80 | ##' data(nwtco) 81 | ##' nwtco <- nwtco[1:500,] 82 | ##' 83 | ##' #creating 2 imputed data sets (m=2) for speed, would normally create more 84 | ##' ans <- gammaImpute(formula=Surv(edrel,rel)~histol + instit, 85 | ##' data = nwtco, m=2, gamma.factor=1, DCO.time=6209) 86 | ##' 87 | ##' #subject specific gamma (multiplied by gamma.factor to give the jump) 88 | ##' #NA for subjects that are not to be imputed 89 | ##' jumps <- c(rep(NA,10),rep(1,490)) 90 | ##' DCO.values <- rep(6209,500) 91 | ##' 92 | ##' ans.2 <- gammaImpute(formula=Surv(edrel,rel)~histol + instit + strata(stage), 93 | ##' data = nwtco, m=2, bootstrap.strata=strata(nwtco$stage), 94 | ##' gamma=jumps, gamma.factor=1, DCO.time=DCO.values) 95 | ##' 96 | ##' #can also use column names 97 | ##' nwtco$gamma <- jumps 98 | ##' nwtco$DCO.time <- DCO.values 99 | ##' ans.3 <- gammaImpute(formula=Surv(edrel,rel)~histol + instit + strata(stage), 100 | ##' data = nwtco, m=2, bootstrap.strata=strata(nwtco$stage), 101 | ##' gamma="gamma", DCO.time="DCO.time") 102 | ##' } 103 | ##' 104 | ##' @export 105 | gammaImpute <- function(formula, data, m, gamma, gamma.factor, 106 | bootstrap.strata=rep(1,nrow(data)), DCO.time,..., 107 | parallel = c("no", "multicore", "snow")[1], ncpus = 1L, cl = NULL){ 108 | 109 | #First sort out missing arguments for gamma 110 | if(missing(gamma) && missing(gamma.factor)){ 111 | stop("At least one of gamma and gamma.factor must be included") 112 | } 113 | if(missing(gamma.factor)) gamma.factor <- 1 114 | if(!missing(gamma) && is.null(gamma)) stop("gamma cannot be NULL") 115 | if(missing(gamma)) gamma <- NULL 116 | 117 | #validate RHS of formula 118 | .validRHSFormula(getCovarFormula(formula)) 119 | 120 | #Get the Surv object 121 | surv.object <- model.frame(formula,data)[,1] 122 | surv.times <- as.matrix(surv.object) 123 | if(class(surv.object) != "Surv" || attr(surv.object,"type")!="right" || ncol(surv.times)!=2){ 124 | stop("Can only use a right censored Surv object on the left hand side of the formula") 125 | } 126 | 127 | #perform main validation of input arguments 128 | validate.Gamma.arguments(data,surv.times,m,gamma,bootstrap.strata, 129 | gamma.factor,DCO.time,match.call(expand.dots = TRUE)) 130 | validate.parallel.arguments(parallel,ncpus,cl) 131 | 132 | #setup DCO.time column if needed 133 | if(!is.character(DCO.time)){ 134 | data[,"internalDCO.time"] <- if(length(DCO.time)>1) DCO.time else rep(DCO.time,nrow(data)) 135 | DCO.time <- "internalDCO.time" 136 | } 137 | 138 | #create the list of bootstrapped data indices 139 | boot.indices <- boot::boot(data=data,R=m,statistic=function(data,indices){return(indices)},strata=bootstrap.strata) 140 | boot.indices <- lapply(seq_len(m),function(x){boot.indices$t[x,]}) 141 | 142 | #set up gamma 143 | data$internal_gamma_val <- getGamma(data,gamma,gamma.factor) 144 | 145 | #perform imputation 146 | if(parallel=="no"){ 147 | imputes <- lapply(boot.indices,.singleGammaImpute,data=data, 148 | surv.times=surv.times,DCO.time=DCO.time,formula=formula,...) 149 | } 150 | else{ 151 | imputes <- parallelRun(parallel,ncpus,cl,lapply.list=boot.indices, 152 | FUN=.singleGammaImpute,data,surv.times,DCO.time,formula,...) 153 | } 154 | 155 | #package up results 156 | retVal <- .createImputedSet(m,col.control=NULL,data,imputes,"Gamma",getCovarFormula(formula)) 157 | retVal$gamma.factor <- gamma.factor 158 | retVal 159 | } 160 | 161 | ##' @name ExtractSingle 162 | ##' @export 163 | ExtractSingle.GammaImputedSet <- function(x,index){ 164 | retVal <- .internalExtract(x,index,fit=FALSE) 165 | class(retVal) <- "GammaImputedData" 166 | retVal 167 | } 168 | -------------------------------------------------------------------------------- /tests/testthat/test-gammaImputedata.R: -------------------------------------------------------------------------------- 1 | context("gammaImputeData") 2 | 3 | #see validate_gamma_arguments in test-validation for 4 | #input parameter validation 5 | 6 | 7 | test_that("GammaImputeSet_object",{ 8 | 9 | set.seed(25) 10 | load("gamma_test.rda") 11 | 12 | imputed.data.sets <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 13 | data = gamma.dataset, 14 | m=2, DCO.time="DCO.time", 15 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 16 | gamma.factor=1,gamma="gamma") 17 | 18 | expect_equal("GammaImputedSet",class(imputed.data.sets)) 19 | expect_equal(2,imputed.data.sets$m) 20 | 21 | ans <- imputed.data.sets$data 22 | ans$internal_gamma_val <- NULL 23 | 24 | expect_equal(gamma.dataset,ans) 25 | expect_equal("matrix",class(imputed.data.sets$impute.time)) 26 | expect_equal("matrix",class(imputed.data.sets$impute.event)) 27 | 28 | expect_equal(2,ncol(imputed.data.sets$impute.time)) 29 | expect_equal(nrow(gamma.dataset),nrow(imputed.data.sets$impute.event)) 30 | 31 | expect_equal(1,imputed.data.sets$gamma.factor) 32 | expect_equal(formula(~Z+strata(W1)),imputed.data.sets$default.formula) 33 | 34 | }) 35 | 36 | test_that("gamma.factor",{ 37 | load("gamma_test.rda") 38 | 39 | set.seed(410) 40 | gamma.dataset$gamma <- runif(nrow(gamma.dataset)) 41 | 42 | set.seed(10) 43 | imputed.data.sets <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 44 | data = gamma.dataset, 45 | m=2,gamma="gamma", 46 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 47 | gamma.factor=-1.765,DCO.time="DCO.time") 48 | 49 | expect_equal(imputed.data.sets$data$internal_gamma_val,-1.765*gamma.dataset$gamma) 50 | 51 | gamma.dataset$gamma <- gamma.dataset$gamma*-1.765 52 | set.seed(10) 53 | imputed.data.sets2 <-gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 54 | data = gamma.dataset, 55 | m=2,gamma="gamma", 56 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 57 | gamma.factor=1,DCO.time="DCO.time") 58 | 59 | 60 | expect_equal(imputed.data.sets$impute.time,imputed.data.sets2$impute.time) 61 | expect_equal(imputed.data.sets$impute.event,imputed.data.sets2$impute.event) 62 | }) 63 | 64 | test_that("inf.gamma.factor",{ 65 | load("gamma_test.rda") 66 | 67 | set.seed(10) 68 | inf.gamma <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 69 | data = gamma.dataset, 70 | m=2,gamma="gamma",DCO.time="DCO.time", 71 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 72 | gamma.factor=Inf) 73 | set.seed(10) 74 | neg.inf.gamma <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 75 | data = gamma.dataset, 76 | m=2,gamma="gamma",DCO.time="DCO.time", 77 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 78 | gamma.factor=-Inf) 79 | 80 | inf.gamma.df <- ExtractSingle(inf.gamma,1) 81 | neg.inf.gamma.df <- ExtractSingle(neg.inf.gamma,2) 82 | 83 | index <- which(inf.gamma.df$data$to.impute==1) 84 | 85 | #+Inf gamma implies have event at censoring 86 | expect_equal(inf.gamma.df$data$impute.time[index],inf.gamma.df$data$Yi[index]) 87 | 88 | #-Inf gamma implies have no event 89 | expect_equal(neg.inf.gamma.df$data$impute.time[index],neg.inf.gamma.df$data$DCO.time[index]) 90 | }) 91 | 92 | test_that("impute.times.non_stochastic_tests",{ 93 | load("gamma_test.rda") 94 | 95 | df <- gamma.dataset 96 | df$gamma[1:49] <- as.numeric(NA) 97 | df$gamma[50] <- NA 98 | df$gamma[50:100] <- 1 99 | 100 | #check data set is suitable for tests below 101 | expect_false(all(is.na(df$gamma[df$delta]))) 102 | expect_true(any(is.na(df$gamma[df$delta]))) 103 | expect_false(all(is.na(df$gamma[!df$delta]))) 104 | expect_true(any(is.na(df$gamma[!df$delta]))) 105 | 106 | df$DCO.time <- df$Yi + runif(nrow(df),1,5) 107 | 108 | ans <- ExtractSingle(gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1),df, 109 | m=2,gamma="gamma",DCO.time="DCO.time", 110 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 111 | gamma.factor=1),index=1) 112 | 113 | expect_equal("GammaImputedData",class(ans)) 114 | expect_equal(formula("~Z+strata(W1)"),ans$default.formula) 115 | 116 | #gamma = NA implies no imputation 117 | index <- which(is.na(df$gamma)) 118 | expect_equal(ans$data$Yi[index],ans$data$impute.time[index]) 119 | expect_equal(as.numeric(ans$data$delta[index]),ans$data$impute.event[index]) 120 | 121 | expect_true(all(ans$data$DCO.time>=ans$data$impute.time)) 122 | expect_true(all(ans$data$Yi<=ans$data$impute.time)) 123 | 124 | #All DCO.time are censored 125 | expect_true(all(!ans$data$impute.event[ans$data$DCO.time==ans$data$impute.time])) 126 | 127 | #gamma=NA, with already having event remains unchanged 128 | index <- which(ans$data$delta) 129 | expect_true(all(as.logical(ans$data$impute.event[index]))) 130 | expect_true(all(ans$data$Yi[index]==ans$data$impute.time[index])) 131 | }) 132 | 133 | test_that("internal_gamma_val_same_with_colname_or_vect",{ 134 | load("gamma_test.rda") 135 | 136 | set.seed(2323) 137 | gamma.dataset$G <- runif(n=nrow(gamma.dataset)) 138 | 139 | set.seed(2323) 140 | use.gamma.col <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 141 | data = gamma.dataset, 142 | m=2,DCO.time="DCO.time",gamma="G", 143 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1)) 144 | 145 | gamma.vec <- gamma.dataset$G 146 | 147 | set.seed(2323) 148 | use.gamma.vec <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 149 | data = gamma.dataset, 150 | m=2,DCO.time="DCO.time",gamma=gamma.vec, 151 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1)) 152 | 153 | expect_equal(use.gamma.col,use.gamma.vec) 154 | expect_equal(use.gamma.col$data$internal_gamma_val,gamma.vec) 155 | 156 | }) 157 | 158 | 159 | test_that("gamma_and_gamma_factor",{ 160 | load("gamma_test.rda") 161 | 162 | gamma.dataset$G <- gamma.dataset$gamma 163 | 164 | set.seed(10) 165 | use.gamma.factor <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 166 | data = gamma.dataset, 167 | m=2,DCO.time="DCO.time", 168 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 169 | gamma.factor=5) 170 | 171 | set.seed(10) 172 | gamma.dataset$G <- 5 173 | use.gamma <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 174 | data = gamma.dataset, 175 | m=2,DCO.time="DCO.time",gamma="G", 176 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1)) 177 | 178 | gvec <- gamma.dataset$G 179 | set.seed(10) 180 | use.gamma.vector <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 181 | data = gamma.dataset, 182 | m=2,DCO.time="DCO.time",gamma=gvec, 183 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1)) 184 | 185 | expect_equal(use.gamma.vector,use.gamma) 186 | 187 | #everything except gamma.factor and gamma column are the same 188 | use.gamma$gamma.factor <- NULL 189 | use.gamma.factor$gamma.factor <- NULL 190 | use.gamma$data$G <- use.gamma.factor$data$G 191 | expect_equal(use.gamma,use.gamma.factor) 192 | 193 | }) 194 | 195 | 196 | test_that("DCO.time_vector",{ 197 | load("gamma_test.rda") 198 | 199 | set.seed(10) 200 | 201 | dco.time <- 3+runif(n = nrow(gamma.dataset)) 202 | 203 | DCO.ans <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 204 | data = gamma.dataset, 205 | m=2,DCO.time=dco.time, 206 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 207 | gamma.factor=-Inf) 208 | 209 | expect_equal(DCO.ans$data$internalDCO.time,dco.time) 210 | dco.one <- ExtractSingle(DCO.ans,1) 211 | expect_equal(dco.time[gamma.dataset$to.impute], 212 | dco.one$data$impute.time[gamma.dataset$to.impute]) 213 | }) 214 | 215 | 216 | 217 | test_that("DCO.time_different_inputs_agree",{ 218 | load("gamma_test.rda") 219 | 220 | set.seed(10) 221 | DCO.col <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 222 | data = gamma.dataset, 223 | m=2,DCO.time="DCO.time", 224 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 225 | gamma.factor=1) 226 | set.seed(10) 227 | DCO.single <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 228 | data = gamma.dataset, 229 | m=2,DCO.time=3, 230 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 231 | gamma.factor=1) 232 | 233 | set.seed(10) 234 | DCO.all <- gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 235 | data = gamma.dataset, 236 | m=2,DCO.time=rep(3,nrow(gamma.dataset)), 237 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 238 | gamma.factor=1) 239 | 240 | expect_equal(DCO.single,DCO.all) 241 | #remove internal_DCO from DCO.single should equal DCO.col 242 | DCO.single$data$internalDCO.time <- NULL 243 | expect_equal(DCO.col,DCO.single) 244 | }) 245 | 246 | -------------------------------------------------------------------------------- /tests/testthat/test-validationGamma.R: -------------------------------------------------------------------------------- 1 | context("validationGamma") 2 | 3 | test_that("invalid_rhs_formula",{ 4 | load("gamma_test.rda") 5 | 6 | 7 | expect_error(gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1)+strata(W2), 8 | gamma.dataset,m=2,gamma="gamma",DCO.time="DCO.time", 9 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 10 | gamma.factor=1)) 11 | 12 | expect_error(gammaImpute(formula=Surv(Yi,delta)~Z+cluster(W1)+strata(W2), 13 | gamma.dataset,m=2,gamma="gamma",DCO.time="DCO.time", 14 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 15 | gamma.factor=1)) 16 | 17 | expect_error(gammaImpute(formula=Surv(Yi,delta)~~Z+tt(W1)+strata(W2), 18 | gamma.dataset,m=2,gamma="gamma",DCO.time="DCO.time", 19 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 20 | gamma.factor=1)) 21 | 22 | 23 | expect_that(gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1,W2), 24 | gamma.dataset,DCO.time="DCO.time",m=2,gamma="gamma", 25 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 26 | gamma.factor=1),not(throws_error())) 27 | 28 | 29 | }) 30 | 31 | 32 | test_that("validate_gamma_arguments_lhs_formula",{ 33 | load("gamma_test.rda") 34 | 35 | #ok to have expression in Surv 36 | expect_that(gammaImpute(formula=Surv(Yi,delta==W2)~Z+strata(W1), 37 | gamma.dataset,DCO.time="DCO.time",m=2,gamma="gamma", 38 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 39 | gamma.factor=1),not(throws_error())) 40 | 41 | #no lhs 42 | expect_error(gammaImpute(formula=~Z+strata(W1), 43 | gamma.dataset,DCO.time="DCO.time",m=2,gamma="gamma", 44 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 45 | gamma.factor=1)) 46 | 47 | #lhs not right censored 48 | expect_error(gammaImpute(formula=Surv(Yi-5,Yi,delta)~Z+strata(W1), 49 | gamma.dataset,DCO.time="DCO.time",m=2,gamma="gamma", 50 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 51 | gamma.factor=1)) 52 | #lhs not Surv 53 | expect_error(gammaImpute(formula=Yi~Z+strata(W1), 54 | gamma.dataset,DCO.time="DCO.time",m=2,gamma="gamma", 55 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 56 | gamma.factor=1)) 57 | }) 58 | 59 | test_that("validate_gamma_arguments_m_gammafactor_and_strata_call",{ 60 | load("gamma_test.rda") 61 | 62 | surv.times <- as.matrix(model.frame(formula(Surv(Yi,delta)~1),data=gamma.dataset)) 63 | 64 | Call <- call("mycall",event.model=~Z1+Z2) 65 | 66 | 67 | expect_that(validate.Gamma.arguments(data=gamma.dataset,surv.times=surv.times, 68 | m=2,gamma="gamma", 69 | strata=rep(1,nrow(gamma.dataset)), 70 | gamma.factor=1,DCO.time="DCO.time",Call=Call), not(throws_error())) 71 | 72 | 73 | #m 74 | expect_error(validate.Gamma.arguments(data=gamma.dataset,surv.times=surv.times, 75 | m=-4,gamma="gamma", 76 | strata=rep(1,nrow(gamma.dataset)), 77 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 78 | 79 | expect_error(validate.Gamma.arguments(data=gamma.dataset,surv.times=surv.times, 80 | m=1,gamma="gamma", 81 | strata=rep(1,nrow(gamma.dataset)), 82 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 83 | 84 | expect_error(validate.Gamma.arguments(data=gamma.dataset,surv.times=surv.times, 85 | m=1.6,gamma="gamma", 86 | strata=rep(1,nrow(gamma.dataset)), 87 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 88 | 89 | #gamma.factor 90 | expect_error(validate.Gamma.arguments(data=gamma.dataset,col.control=col.control, 91 | m=4,gamma="gamma", 92 | strata=rep(1,nrow(gamma.dataset)), 93 | gamma.factor=c(3,4,5),Call=Call)) 94 | 95 | expect_error(validate.Gamma.arguments(data=gamma.dataset,col.control=col.control, 96 | m=4,gamma="gamma", 97 | strata=rep(1,nrow(gamma.dataset)), 98 | gamma.factor="hello",Call=Call)) 99 | 100 | 101 | #Call 102 | Call <- call("mycall",event.model=~Z1+Z2,subset="x==8") 103 | expect_error(validate.Gamma.arguments(data=gamma.dataset,surv.times=surv.times, 104 | m=2,gamma="gamma", 105 | strata=rep(1,nrow(gamma.dataset)), 106 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 107 | 108 | Call <- call("mycall",event.model=~Z1+Z2,na.action="boo") 109 | expect_error(validate.Gamma.arguments(data=gamma.dataset,surv.times=surv.times, 110 | m=2,gamma="gamma", 111 | strata=rep(1,nrow(gamma.dataset)), 112 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 113 | 114 | 115 | #strata (errors will be caught by boot if not caught here) 116 | df <- gamma.dataset 117 | expect_error(validate.Gamma.arguments(data=df,surv.times=surv.times, 118 | m=2,gamma="gamma", 119 | strata=rep(1,nrow(gamma.dataset)+6), 120 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 121 | 122 | }) 123 | 124 | 125 | test_that("negative_time",{ 126 | load("gamma_test.rda") 127 | 128 | surv.times <- as.matrix(model.frame(formula(Surv(Yi,delta)~1),data=gamma.dataset)) 129 | Call <- call("mycall",event.model=~Z1+Z2) 130 | 131 | surv.times[1,1] <- -8 132 | 133 | df <- gamma.dataset 134 | df$internal_gamma_val <- rep(1,nrow(df)) 135 | expect_error(validate.Gamma.arguments(data=df,surv.times=surv.times, 136 | m=2,gamma="gamma", 137 | strata=rep(1,nrow(gamma.dataset)), 138 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 139 | }) 140 | 141 | test_that("validate_gamma_arguments_data",{ 142 | load("gamma_test.rda") 143 | 144 | surv.times <- as.matrix(model.frame(formula(Surv(Yi,delta)~1),data=gamma.dataset)) 145 | 146 | Call <- call("mycall",event.model=~Z1+Z2) 147 | 148 | #data 149 | expect_error(validate.Gamma.arguments(data=gamma.dataset[numeric(0),],surv.times=surv.times, 150 | m=2,gamma="gamma", 151 | strata=rep(1,nrow(gamma.dataset)), 152 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 153 | 154 | 155 | df <- gamma.dataset 156 | df$internal_gamma_val <- rep(1,nrow(df)) 157 | expect_error(validate.Gamma.arguments(data=df,surv.times=surv.times, 158 | m=2,gamma="gamma", 159 | strata=rep(1,nrow(gamma.dataset)), 160 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 161 | 162 | df <- gamma.dataset 163 | df$impute.time <- rep(1,nrow(df)) 164 | expect_error(validate.Gamma.arguments(data=df,surv.times=surv.times, 165 | m=2,gamma="gamma", 166 | strata=rep(1,nrow(gamma.dataset)), 167 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 168 | 169 | 170 | df <- gamma.dataset 171 | df$gamma <- rep("HELLO",nrow(df)) 172 | expect_error(validate.Gamma.arguments(data=df,surv.times=surv.times, 173 | m=2,gamma="gamma", 174 | strata=rep(1,nrow(gamma.dataset)), 175 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 176 | 177 | 178 | }) 179 | 180 | test_that("validate_DCO.time",{ 181 | load("gamma_test.rda") 182 | 183 | surv.times <- as.matrix(model.frame(formula(Surv(Yi,delta)~1),data=gamma.dataset)) 184 | 185 | Call <- call("mycall",event.model=~Z1+Z2) 186 | 187 | #No DCO time column 188 | df <- gamma.dataset 189 | expect_error(validate.Gamma.arguments(data=df,surv.times=surv.times, 190 | m=2,gamma="gamma", 191 | strata=rep(1,nrow(gamma.dataset)), 192 | gamma.factor=1,DCO.time="O.time",Call=Call)) 193 | 194 | #Inf DCO time 195 | df$DCO.time[1] <- Inf 196 | expect_error(validate.Gamma.arguments(data=df,surv.times=surv.times, 197 | m=2,gamma="gamma", 198 | strata=rep(1,nrow(gamma.dataset)), 199 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 200 | 201 | #DCO less than time 202 | df <- gamma.dataset 203 | df$DCO.time[5] <- 0.5*df$Yi[5] 204 | expect_error(validate.Gamma.arguments(data=df,surv.times=surv.times, 205 | m=2,gamma="gamma", 206 | strata=rep(1,nrow(gamma.dataset)), 207 | gamma.factor=1,DCO.time="DCO.time",Call=Call)) 208 | 209 | #invalid length of DCO.time 210 | expect_error(validate.Gamma.arguments(data=df,surv.times=surv.times, 211 | m=2,gamma="gamma", 212 | strata=rep(1,nrow(gamma.dataset)), 213 | gamma.factor=1,DCO.time=c(1,2),Call=Call)) 214 | 215 | 216 | }) 217 | 218 | 219 | test_that("Validate_gamma",{ 220 | 221 | load("gamma_test.rda") 222 | 223 | #gamma length incorrect for character string 224 | expect_error(gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 225 | gamma.dataset,DCO.time="DCO.time",m=2,gamma=rep("gamma",nrow(gamma.dataset)), 226 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 227 | gamma.factor=1)) 228 | 229 | #gamma length incorrect for vector 230 | expect_error(gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 231 | gamma.dataset,DCO.time="DCO.time",m=2,gamma=1, 232 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 233 | gamma.factor=1)) 234 | 235 | #cannot be single number 236 | expect_error(gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 237 | gamma.dataset,DCO.time="DCO.time",m=2,gamma=1, 238 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 239 | gamma.factor=1)) 240 | 241 | expect_error(gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 242 | gamma.dataset,DCO.time="DCO.time",m=2,gamma=c(1,6), 243 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 244 | gamma.factor=1)) 245 | 246 | #missing col 247 | expect_error(gammaImpute(formula=Surv(Yi,delta)~Z+strata(W1), 248 | gamma.dataset,DCO.time="DCO.time",m=2,gamma="missing", 249 | bootstrap.strata=strata(gamma.dataset$Z,gamma.dataset$W1), 250 | gamma.factor=1)) 251 | 252 | }) 253 | -------------------------------------------------------------------------------- /tests/testthat/test-scoreSystem.R: -------------------------------------------------------------------------------- 1 | context("ScoreSystem") 2 | 3 | test_that("arguments_to_ScoreImpute",{ 4 | data(ScoreInd) 5 | 6 | #first testing the arm and gamma arguments are valid in col.control for score imputation 7 | 8 | #invalid arm 9 | expect_error(ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 10 | col.control=col.headings(has.event="event", 11 | time="time", 12 | Id="Id", 13 | arm=23, 14 | DCO.time="DCO.time", 15 | to.impute="to.impute"), 16 | NN.control=NN.options(NN=5,w.censoring = 0.2), 17 | bootstrap.strata=ScoreInd$arm,m=5)) 18 | 19 | #using gamma 20 | expect_error(ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 21 | col.control=col.headings(has.event="event", 22 | time="time", 23 | Id="Id", 24 | arm="arm", 25 | gamma="arm", 26 | DCO.time="DCO.time", 27 | to.impute="to.impute"), 28 | NN.control=NN.options(NN=5,w.censoring = 0.2), 29 | bootstrap.strata=ScoreInd$arm,m=5)) 30 | 31 | #missing arm 32 | expect_error(ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 33 | col.control=col.headings(has.event="event", 34 | time="time", 35 | Id="Id", 36 | DCO.time="DCO.time", 37 | to.impute="to.impute"), 38 | NN.control=NN.options(NN=5,w.censoring = 0.2), 39 | bootstrap.strata=ScoreInd$arm,m=5)) 40 | 41 | #Also testing: 42 | 43 | col.control <- col.headings(has.event="event", time="time", 44 | Id="Id",arm="arm", DCO.time="DCO.time", 45 | to.impute="to.impute") 46 | 47 | #use subset 48 | expect_error(ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 49 | col.control=col.control,NN.control=NN.options(NN=5,w.censoring = 0.2), 50 | subset=Z1==1,bootstrap.strata=ScoreInd$arm,m=5)) 51 | 52 | #use NA.action 53 | expect_error(ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 54 | col.control=col.control,NN.control=NN.options(NN=5,w.censoring = 0.2), 55 | bootstrap.strata=ScoreInd$arm,m=5,na.action=na.exclude)) 56 | 57 | #missing event model 58 | expect_error(ScoreImpute(data=ScoreInd,col.control=col.control, 59 | NN.control=NN.options(NN=5,w.censoring = 0.2), 60 | bootstrap.strata=ScoreInd$arm,m=5)) 61 | 62 | }) 63 | 64 | test_that("ScoreImputedDataOutput",{ 65 | data(ScoreInd) 66 | 67 | ScoreInd$to.impute[1:40] <- FALSE 68 | set.seed(25) 69 | 70 | ans <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 71 | col.control=col.headings(has.event="event", time="time", 72 | Id="Id",arm="arm", DCO.time="DCO.time",to.impute="to.impute"), 73 | NN.control=NN.options(NN=5,w.censoring = 0.2),m=5,bootstrap.strata=ScoreInd$arm) 74 | 75 | ans <- ExtractSingle(ans,index=4) 76 | 77 | expect_equal("ScoreImputedData",class(ans)) 78 | expect_equal(c("data","col.control","default.formula"),names(ans)) 79 | 80 | expect_equal(col.headings(has.event="event", 81 | time="time", 82 | Id="Id", 83 | arm="arm", 84 | DCO.time="DCO.time", 85 | to.impute="to.impute", 86 | censor.type="using_has.event_col"),ans$col.control) 87 | 88 | expect_equal(c(colnames(ScoreInd),"impute.time","impute.event"),colnames(ans$data)) 89 | expect_equal(nrow(ScoreInd),nrow(ans$data)) 90 | 91 | df <- ans$data 92 | df$impute.event <- NULL 93 | df$impute.time <- NULL 94 | 95 | expect_equal(ScoreInd,df) 96 | 97 | df <- ans$data 98 | expect_true(all(df$impute.event[df$event==1]==1)) 99 | expect_true(all(df$impute.event[!df$to.impute]==df$event[!df$to.impute])) 100 | expect_true(all(df$time[!df$to.impute]==df$impute.time[!df$to.impute])) 101 | 102 | expect_true(all(df$time <= df$impute.time & df$impute.time <= df$DCO.time)) 103 | #if impute to DCO.time then don't have event 104 | expect_true(all(df$impute.event[df$time!=df$DCO.time & df$impute.time==df$DCO.time]==0 )) 105 | }) 106 | 107 | 108 | test_that("algorithm_is_stochastic",{ 109 | data(ScoreInd) 110 | set.seed(25) 111 | 112 | col.control <-col.headings(has.event="event", 113 | time="time",Id="Id", 114 | arm="arm", DCO.time="DCO.time", to.impute="to.impute") 115 | 116 | ans <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 117 | col.control = col.control,m=5,bootstrap.strata = ScoreInd$arm, 118 | NN.control=NN.options(NN=5,w.censoring = 0.2)) 119 | 120 | set.seed(26) 121 | ans2 <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 122 | col.control = col.control,m=5,bootstrap.strata = ScoreInd$arm, 123 | NN.control=NN.options(NN=5,w.censoring = 0.2)) 124 | 125 | expect_false(all(ExtractSingle(ans,index=1)$data$impute.time==ExtractSingle(ans2,index=1)$data$impute.time)) 126 | 127 | }) 128 | 129 | 130 | test_that("factor_numeric_character_Id_same_answer",{ 131 | data(ScoreInd) 132 | set.seed(25) 133 | 134 | col.control <-col.headings(has.event="event", 135 | time="time",Id="Id", 136 | arm="arm", DCO.time="DCO.time", to.impute="to.impute") 137 | 138 | ans <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 139 | col.control=col.control,m=5,bootstrap.strata=ScoreInd$arm, 140 | NN.control=NN.options(NN=5,w.censoring = 0.2)) 141 | 142 | set.seed(25) 143 | ScoreInd$Id <- as.character(ScoreInd$Id) 144 | ScoreInd$Id[1] <- "a" 145 | 146 | ans2 <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 147 | col.control=col.control,m=5,bootstrap.strata=ScoreInd$arm, 148 | NN.control=NN.options(NN=5,w.censoring = 0.2)) 149 | 150 | expect_equal(ExtractSingle(ans,3)$data$impute.time,ExtractSingle(ans2,3)$data$impute.time) 151 | 152 | 153 | set.seed(25) 154 | ScoreInd$Id <- factor(ScoreInd$Id) 155 | ans3 <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5, 156 | col.control=col.control,m=5,bootstrap.strata=ScoreInd$arm, 157 | NN.control=NN.options(NN=5,w.censoring = 0.2)) 158 | 159 | expect_equal(ExtractSingle(ans,3)$data$impute.time,ExtractSingle(ans3,3)$data$impute.time) 160 | 161 | }) 162 | 163 | test_that("using_censor_type_gives_same_result_as_not_if_no_administrative_censoring",{ 164 | data(ScoreInd) 165 | set.seed(125) 166 | 167 | ScoreInd$ctype <- 1 - ScoreInd$event 168 | 169 | col.control <- col.headings(has.event="event", 170 | time="time", Id="Id", 171 | arm="arm", DCO.time="DCO.time", 172 | to.impute="to.impute") 173 | 174 | ans <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5,m=5, 175 | bootstrap.strata=ScoreInd$arm, 176 | col.control=col.control, 177 | NN.control=NN.options(NN=5,w.censoring = 0.2)) 178 | 179 | set.seed(125) 180 | col.control$censor.type <- "ctype" 181 | ans2 <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5,m=5, 182 | bootstrap.strata=ScoreInd$arm, 183 | col.control=col.control, 184 | NN.control=NN.options(NN=5,w.censoring = 0.2)) 185 | 186 | #only difference should be col.control$censor.type 187 | expect_equal("ctype",ans2$col.control$censor.type) 188 | expect_equal("using_has.event_col",ans$col.control$censor.type) 189 | ans2$col.control$censor.type <- ans$col.control$censor.type 190 | expect_equal(ans,ans2) 191 | 192 | }) 193 | 194 | 195 | test_that("Sfn_time_dep",{ 196 | data(ScoreInd) 197 | data(ScoreTimeDep) 198 | set.seed(25) 199 | 200 | time.dep <- MakeTimeDepScore(ScoreTimeDep,Id="Id", 201 | time.start="start", 202 | time.end="end") 203 | 204 | #ok if time dep not used 205 | #note do not get same answer without timedep as still use separate 206 | #model fits for each censored observation if timedep is not NULL 207 | expect_warning(ans <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z3+Z5, 208 | col.control=col.headings(has.event="event", 209 | time="time", 210 | Id="Id", 211 | arm="arm", 212 | DCO.time="DCO.time", 213 | to.impute="to.impute"), 214 | NN.control=NN.options(NN=5,w.censoring = 0.2), 215 | time.dep = time.dep,m=5, 216 | bootstrap.strata=ScoreInd$arm)) 217 | 218 | #same answer if ID is factor (matching time.dep$ID which is always a factor) 219 | set.seed(25) 220 | 221 | ScoreInd$Id <- factor(ScoreInd$Id) 222 | 223 | expect_warning(ans2 <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z3+Z5, 224 | col.control=col.headings(has.event="event", 225 | time="time", 226 | Id="Id", 227 | arm="arm", 228 | DCO.time="DCO.time", 229 | to.impute="to.impute"), 230 | NN.control=NN.options(NN=5,w.censoring = 0.2), 231 | time.dep = time.dep,m=5, 232 | bootstrap.strata=ScoreInd$arm)) 233 | expect_equal(ans$data$impute.time,ans2$data$impute.time) 234 | expect_equal(ans$data$impute.event,ans2$data$impute.event) 235 | }) 236 | 237 | test_that("ordering_timeindep",{ 238 | 239 | data(ScoreInd) 240 | set.seed(250) 241 | 242 | col.control <- col.headings(has.event="event", 243 | time="time", Id="Id", 244 | arm="arm", DCO.time="DCO.time", 245 | to.impute="to.impute") 246 | 247 | #random reordering 248 | ScoreInd <- ScoreInd[sample(nrow(ScoreInd)),] 249 | 250 | #randomly set DCOs 251 | ScoreInd$DCO.time <- ScoreInd$DCO.time + runif(n=nrow(ScoreInd),min = 0,max=10) 252 | 253 | ans <- ScoreImpute(data=ScoreInd,event.model=~Z1+Z2+Z3+Z4+Z5,m=5, 254 | bootstrap.strata=ScoreInd$arm, 255 | col.control=col.control, 256 | NN.control=NN.options(NN=5,w.censoring = 0.2)) 257 | 258 | ans <- ExtractSingle(ans,index=2) 259 | 260 | expect_equal(ans$data$Id,ScoreInd$Id) 261 | expect_equal(ans$data$arm,ScoreInd$arm) 262 | 263 | expect_true(all(ans$data$impute.time >= ans$data$time)) 264 | expect_true(all(ans$data$DCO.time>= ans$data$impute.time)) 265 | 266 | }) 267 | 268 | 269 | -------------------------------------------------------------------------------- /tests/testthat/test-validation.R: -------------------------------------------------------------------------------- 1 | context("validation") 2 | 3 | test_that("checkContiguous",{ 4 | expect_error(checkContiguous(c(1,2,1))) 5 | expect_error(checkContiguous(c(2,2,1,1,1,3,3,3,2))) 6 | expect_error(checkContiguous(c("A","","A"))) 7 | expect_error(checkContiguous(c(1,1,1,2,2,2,3,3,3,2,3,3,3))) 8 | expect_that(checkContiguous(c(1,2,3,4)), not(throws_error())) 9 | expect_that(checkContiguous(c(41)), not(throws_error())) 10 | expect_that(checkContiguous(c("A","A","C","C","C","B")), not(throws_error())) 11 | }) 12 | 13 | test_that("checkPanelling_invalid",{ 14 | make.df <- function(s,e){ 15 | data.frame(time.start=s,time.end=e) 16 | } 17 | 18 | #negative start 19 | expect_error(checkPanelling(make.df(c(-5),c(4)))) 20 | 21 | #not start at zero 22 | expect_error(checkPanelling(make.df(c(1),c(4)))) 23 | 24 | #incorrect order 25 | expect_error(checkPanelling(make.df(c(0,2,1),c(1,3,2)))) 26 | 27 | #more than one subject 28 | expect_error(checkPanelling(make.df(c(0,0),c(2,5)))) 29 | 30 | #non-contiguous 31 | expect_error(checkPanelling(make.df(c(0,2,5),c(2,4,6)))) 32 | 33 | #start and end mismatch 34 | expect_error(checkPanelling( make.df(c(0,10,20),c(10,15,30)))) 35 | 36 | #invalid last end 37 | expect_error(checkPanelling(make.df(c(0,10,20,30),c(10,20,30,25)))) 38 | 39 | #invalid interval of 0 40 | expect_error(checkPanelling(make.df(c(0,10,20,20),c(10,20,20,25)))) 41 | 42 | }) 43 | 44 | test_that("checkPanelling_valid",{ 45 | make.df <- function(s,e){ 46 | data.frame(time.start=s,time.end=e) 47 | } 48 | 49 | expect_that(checkPanelling(make.df(c(0),c(4))), not(throws_error())) 50 | expect_that(checkPanelling(make.df(c(0,2.5,5,7),c(2.5,5,7,10))), not(throws_error())) 51 | expect_that(checkPanelling(make.df(c(0,2.5,5,7),c(2.5,5,7,7.01))), not(throws_error())) 52 | }) 53 | 54 | 55 | test_that(".getResponse",{ 56 | expect_equal("y",.getResponse(formula(y~x))) 57 | expect_equal("Surv(x, y)",.getResponse(formula(Surv(x,y)~x+y+r*y))) 58 | expect_equal("w + y * z",.getResponse(formula(w+y*z~x))) 59 | expect_equal(0,length(.getResponse(formula(~x)))) 60 | }) 61 | 62 | test_that(".validRHSFormula",{ 63 | #first without arm argument LHS must be empty 64 | expect_error(.validRHSFormula(formula(y~x))) 65 | expect_that(.validRHSFormula(formula(~a+b+c)),not(throws_error()) ) 66 | expect_error(.validRHSFormula(formula(~x+cluster(y)))) 67 | expect_error(.validRHSFormula(formula(~tt(x)+y))) 68 | expect_that(.validRHSFormula(formula(~a+strata(b)+c)),not(throws_error()) ) 69 | #if do have arm argument then it must be the first on the rhs 70 | #and no interaction terms with it 71 | expect_error(.validRHSFormula(formula(y~a),arm="a")) 72 | expect_that(.validRHSFormula(formula(~a+b+c),arm="a"),not(throws_error()) ) 73 | expect_error(.validRHSFormula(formula(~b+arm),arm="arm")) 74 | expect_error(.validRHSFormula(formula(~b),arm="arm")) 75 | expect_error(.validRHSFormula(formula(~arm+b*arm),arm="arm")) 76 | expect_error(.validRHSFormula(formula(~b+b*arm),arm="b")) 77 | }) 78 | 79 | 80 | test_that(".additionalScore.validate_control",{ 81 | data(ScoreInd) 82 | 83 | col.control <- col.headings(has.event="event", 84 | time="time", 85 | Id="Id", 86 | arm="arm", 87 | DCO.time="DCO.time", 88 | to.impute="to.impute") 89 | 90 | 91 | Call <- call("mycall",event.model=~Z1+Z2) 92 | 93 | #col.control not matching columns in data frame 94 | col.control$time <- "my.time" 95 | expect_error(.additionalScore.validate(ScoreInd,col.control=col.control,Call)) 96 | 97 | col.control$time <- "time" 98 | col.control$gamma <- "BOO" 99 | expect_error(.additionalScore.validate(ScoreInd,col.control=col.control,Call)) 100 | 101 | col.control$to.impute <- NULL 102 | expect_error(.additionalScore.validate(ScoreInd,col.control=col.control,Call)) 103 | 104 | }) 105 | 106 | test_that(".additionalScore.validate",{ 107 | col.control <- col.headings(has.event="event", 108 | time="time", 109 | Id="Id", 110 | arm="arm", 111 | DCO.time="DCO.time", 112 | to.impute="to.impute") 113 | 114 | 115 | Call <- call("mycall",event.model=~Z1+Z2) 116 | #non-unique Id 117 | df <- data.frame(Id=c(1,6,9,1),event=c(0,0,1,1),time=c(4,5,6,7), 118 | arm=factor(c(0,1,0,1)),DCO.time=c(5,6,7,8),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 119 | 120 | expect_error(.additionalScore.validate(df,col.control=col.control,Call)) 121 | 122 | #negative time 123 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(-4,5,6,7), 124 | arm=factor(c(0,1,0,1)),DCO.time=c(5,6,7,8),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 125 | 126 | expect_error(.additionalScore.validate(df,col.control=col.control,Call)) 127 | 128 | #zero time 129 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(4,5,0,7), 130 | arm=factor(c(0,1,0,1)),DCO.time=c(5,6,7,8),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 131 | 132 | expect_error(.additionalScore.validate(df,col.control=col.control,Call)) 133 | 134 | #DCO.time < time 135 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(4,5,6,7), 136 | arm=factor(c("A","B","A","B")),DCO.time=c(5,3,7,8),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 137 | 138 | expect_error(.additionalScore.validate(df,col.control=col.control,Call)) 139 | 140 | #ok if DCO.time = time 141 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(4,5,6,7), 142 | arm=factor(c("A","B","A","B")),DCO.time=c(4,5,6,7),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 143 | 144 | expect_that(.additionalScore.validate(df,col.control=col.control,Call),not(throws_error())) 145 | 146 | 147 | df$arm <- factor(c("A","B","A","B")) 148 | 149 | #call has subset 150 | expect_error(.additionalScore.validate(df,col.control=col.control, 151 | Call=call("my.func",event.model=~Z1,subset="a"))) 152 | 153 | #toimpute invalid 154 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(4,5,6,7), 155 | arm=factor(c(1,0,0,1)),DCO.time=c(4,5,6,7),to.impute=c(7,TRUE,FALSE,FALSE)) 156 | expect_error(.additionalScore.validate(df,col.control=col.control,Call)) 157 | 158 | #event indicator incorrect 159 | df <- data.frame(Id=c(1,6,9,21),event=c(0,5,1,1),time=c(4,5,6,7), 160 | arm=factor(c(1,0,0,1)),DCO.time=c(4,5,6,7),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 161 | expect_error(.additionalScore.validate(df,col.control=col.control,Call)) 162 | }) 163 | 164 | 165 | test_that("validate_Score_arguments_control",{ 166 | data(ScoreInd) 167 | 168 | col.control <- col.headings(has.event="event", 169 | time="time", 170 | Id="Id", 171 | arm="arm", 172 | DCO.time="DCO.time", 173 | to.impute="to.impute") 174 | 175 | Call <- call("mycall",event.model=~Z1+Z2) 176 | 177 | expect_error(validate.Score.Arguments(ScoreInd,col.control=col.control,NN.control=NULL,NULL,Call,m=5)) 178 | 179 | NN.control <- c(10,20) 180 | expect_error(validate.Score.Arguments(ScoreInd,col.control=col.control,NN.control=NN.control,NULL,Call,m=5)) 181 | NN.control <- list(NN=10) 182 | expect_error(validate.Score.Arguments(ScoreInd,col.control=col.control,NN.control=NN.control,NULL,Call,m=5)) 183 | 184 | NN.control <- NN.options() 185 | expect_that(validate.Score.Arguments(ScoreInd,col.control=col.control,NN.control=NN.control,NULL,Call,m=5),not(throws_error())) 186 | 187 | }) 188 | 189 | 190 | test_that("validate_Score_arguments_data_and_Call",{ 191 | Call <- call("mycall",event.model=~Z1+Z2) 192 | col.control <- col.headings(has.event="event", 193 | time="time", 194 | Id="Id", 195 | arm="arm", 196 | DCO.time="DCO.time", 197 | to.impute="to.impute") 198 | NN.control <- NN.options() 199 | 200 | 201 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(4,5,6,7), 202 | arm=factor(c("A","B","A","B")),DCO.time=c(4,5,6,7),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 203 | 204 | expect_that(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL, 205 | Call=Call,m=5),not(throws_error())) 206 | 207 | #invalid m 208 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL, 209 | Call=Call,m=-5)) 210 | 211 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL, 212 | Call=Call,m=c(3,4,5))) 213 | 214 | #m must be > 4 215 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL, 216 | Call=Call,m=4)) 217 | 218 | 219 | 220 | #no event model in call 221 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL, 222 | Call=call("my.func"),m=5)) 223 | 224 | #arm not a factor 225 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(4,5,6,7), 226 | arm=c(1,0,1,1),DCO.time=c(4,5,"a6",7),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 227 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL,Call,m=5)) 228 | 229 | #arm not two level factor 230 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(4,5,6,7), 231 | arm=factor(c(1,1,1,1)),DCO.time=c(4,5,6,7),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 232 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL,Call,m=5)) 233 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(4,5,6,7), 234 | arm=factor(c(1,4,7,1)),DCO.time=c(4,5,6,7),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 235 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL,Call,m=5)) 236 | }) 237 | 238 | 239 | test_that("Invalid_censortype",{ 240 | Call <- call("mycall",event.model=~Z1+Z2) 241 | col.control <- col.headings(has.event="event", 242 | time="time", 243 | Id="Id", 244 | arm="arm", 245 | DCO.time="DCO.time", 246 | to.impute="to.impute", 247 | censor.type="ctype") 248 | NN.control <- NN.options() 249 | 250 | 251 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(4,5,6,7), 252 | arm=factor(c("A","B","A","B")),DCO.time=c(4,5,6,7),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 253 | 254 | 255 | #invalid column name 256 | df2 <- df 257 | df2$using_has.event_col <- rep(1,nrow(df)) 258 | expect_error(validate.Score.Arguments(df2,col.control=col.control,NN.control=NN.control,NULL,Call=Call,m=5) ) 259 | 260 | #column contains something other than 0,1 or 2 261 | df$ctype <- c(4,1,0,1) 262 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL,Call=Call,m=5) ) 263 | 264 | df$ctype <- c(-1,"hello",1,1) 265 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL,Call=Call,m=5) ) 266 | 267 | #error if have event and censor type != 0 268 | df$ctype <- c(1,0,2,1) 269 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL,Call=Call,m=5) ) 270 | 271 | #and error if do not have event and censor type = 0 272 | df$ctype <- c(0,0,0,1) 273 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,NULL,Call=Call,m=5) ) 274 | 275 | }) 276 | 277 | test_that("validate_Score_arguments_timedep",{ 278 | Call <- call("mycall",event.model=~Z1+Z2) 279 | col.control <- col.headings(has.event="event", 280 | time="time", 281 | Id="Id", 282 | arm="arm", 283 | DCO.time="DCO.time", 284 | to.impute="to.impute") 285 | NN.control <- NN.options() 286 | 287 | df <- data.frame(Id=c(1,6,9,21),event=c(0,0,1,1),time=c(14,15,16,17), 288 | arm=factor(c("A","B","A","B")),DCO.time=c(24,25,26,27),to.impute=c(TRUE,TRUE,FALSE,FALSE)) 289 | 290 | 291 | time.dep.df <- data.frame(Id=c(1,1,6,6,6,21,21,9), 292 | time=c(0,5,0,2,8,0,10,0), 293 | end=c(5,14,2,8,15,10,17,16), 294 | W1=c(1,2,3,4,1,2,2,1)) 295 | 296 | time.dep <- MakeTimeDepScore(time.dep.df,Id="Id",time.start="time",time.end="end") 297 | 298 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,time.dep=time.dep.df,Call,m=5)) 299 | 300 | #ok if both columns have Id 301 | expect_that(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,time.dep=time.dep,Call,m=5), 302 | not(throws_error())) 303 | 304 | #invalid if both have same column names 305 | df$W1 <- c(2,3,4,5) 306 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,time.dep=time.dep,Call,m=5)) 307 | 308 | df$W1 <- NULL 309 | time.dep$arm <- c(1,1,1,1,0,0,0,0) 310 | expect_error(validate.Score.Arguments(df,col.control=col.control,NN.control=NN.control,time.dep=time.dep,Call,m=5)) 311 | 312 | }) 313 | 314 | --------------------------------------------------------------------------------