├── .gitignore ├── LICENSE ├── R ├── ansim.R ├── estimates.rds ├── examplesim.Rproj ├── readme.md ├── runsim.R └── states.rds ├── README.md └── Stata ├── README.md ├── ansim-pointplots.do ├── ansim-simsum.do ├── ansim-zipplot.do ├── estimates.dta ├── estimateslabels.dta ├── simstudy.do └── states.dta /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Tim Morris 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /R/ansim.R: -------------------------------------------------------------------------------- 1 | # Runs simulation study to produce estimates data and states data 2 | # See example in https://onlinelibrary.wiley.com/doi/10.1002/sim.8086 3 | # Tim Morris @tmorris_mrc | 30apr2019 4 | # (based partly on code of Alessandro Gasparini; also on some pestering of Matteo Quartagno) 5 | 6 | if (!requireNamespace("tidyverse")) install.packages("tidyverse") 7 | if (!requireNamespace("rsimsum")) install.packages("rsimsum") 8 | 9 | library(tidyverse) 10 | library(rsimsum) 11 | 12 | # no vomit grid in ggplot 13 | theme_set(theme_bw(base_size = 12)) 14 | theme_update(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) 15 | 16 | # Read estimates produced by runsim.R 17 | estimates <- readRDS("estimates.rds") 18 | 19 | # Medium-long estimates data; label method var 20 | estmlong <- estimates %>% 21 | gather(key = "meththing", value = "est", -rep, -dgmgamma) %>% 22 | separate(meththing, into = c("thetase", "method"), sep = "_" ) %>% 23 | spread(thetase,est) 24 | estmlong$dgmgamma <- factor(estmlong$dgmgamma, levels = c(1,1.5), labels = c("gamma = 1", "gamma = 1.5")) 25 | estmlong$method <- factor(estmlong$method, levels = c(1,2,3), labels = c("Exponential", "Weibull", "Cox")) 26 | head(estmlong) 27 | 28 | 29 | # Alternative way to get estmlong 30 | #estlong <- reshape(estimates, 31 | # direction = "long", idvar = c("rep","dgmgamma"), 32 | # timevar = c("method"), times = c("exp", "wei", "cox"), v.names=c("theta", "se"), 33 | # varying = list(c("thetaexp", "thetawei", "thetacox"), c("seexp", "sewei", "secox")) 34 | #) 35 | 36 | 37 | # Swarm plot of theta (separated vertically by rep) 38 | meantheta <- estmlong %>% 39 | group_by(dgmgamma, method) %>% 40 | summarise(Mean.Theta = mean(theta)) 41 | thetaswarm <- ggplot(estmlong, 42 | aes(x = theta, y = rep, labs(y = "", x = "")) 43 | ) + geom_point(color = rgb(.129,.404,.494) , alpha = .3) + geom_vline(data = meantheta, aes(xintercept = Mean.Theta), colour = rgb(1,0.859,0)) + facet_wrap(facets = c("dgmgamma","method"), ncol = 1, strip.position = "left") + theme(axis.title.y=element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank()) 44 | # Output pdf 45 | thetaswarm 46 | ggsave(filename = "thetaswarmR.pdf", plot = last_plot(), device = "pdf", 47 | scale = 1, width = 4, height = 6, units = "in") 48 | 49 | 50 | # Swarm plot of se (separated vertically by rep) 51 | meanse <- estmlong %>% 52 | group_by(dgmgamma, method) %>% 53 | summarise(Mean.SE = mean(se)) 54 | seswarm <- ggplot(estmlong, 55 | aes(x = se, y = rep, labs(y = "", x = "")) 56 | ) + geom_point(color = rgb(.129,.404,.494) , alpha = .3) + geom_vline(data = meanse, aes(xintercept = Mean.SE), colour = rgb(1,0.859,0)) + facet_wrap(facets = c("dgmgamma","method"), ncol = 1, strip.position = "left") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) 57 | # Output pdf 58 | seswarm 59 | ggsave(filename = "seswarmR.pdf", plot = last_plot(), device = "pdf", 60 | scale = 1, width = 4, height = 6, units = "in") 61 | 62 | 63 | # Run simsum to estimate performances 64 | ssres <- simsum( 65 | data = estmlong, estvarname = "theta", true = -.5 , se = "se", 66 | methodvar = "method", ref = "Weibull", by = "dgmgamma", x = TRUE 67 | ) 68 | performance <- ssres["summ"][["summ"]] 69 | head(performance) 70 | 71 | # Comparison of theta according to method 72 | autoplot(ssres, type = "est") 73 | autoplot(ssres, type = "est_ba") 74 | # Comparison of SE according to method 75 | autoplot(ssres, type = "se") 76 | autoplot(ssres, type = "se_ba") 77 | # Ridge plots comparing theta 78 | autoplot(ssres, type = "est_ridge") 79 | autoplot(ssres, type = "se_ridge") 80 | # Zip plot 81 | zip <- autoplot(ssres, type = "zip") 82 | zip 83 | ggsave(filename = "zipR.pdf", plot = last_plot(), device = "pdf", 84 | scale = 1, width = 4, height = 6, units = "in") 85 | 86 | # Lollipop plot for whatever performance measure you favour 87 | autoplot(summary(ssres), type = "lolly", stats = "bias") 88 | -------------------------------------------------------------------------------- /R/estimates.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpmorris/simtutorial/f47ff33e6dbdf2d5ce120a6089087394c5e3df0e/R/estimates.rds -------------------------------------------------------------------------------- /R/examplesim.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /R/readme.md: -------------------------------------------------------------------------------- 1 | # R code for running a simple simulation study 2 | ## Taken from the example in https://onlinelibrary.wiley.com/doi/10.1002/sim.8086 3 | The files contained in this repository are provided for for readers of [Morris, White and Crowther's tutorial paper](https://onlinelibrary.wiley.com/doi/10.1002/sim.8086) to run the example simulation study. 4 | 5 | ## Disclaimer 6 | I am not an R programmer and this represents my first full simulation study in R. I am aware of several alternative ways in which I could have coded this. My code attempts to be clear rather than clever or beautiful, though I may also have failed at clarity. 7 | 8 | ## Running the `.R` files 9 | For those running the files (rather than just reading them), note that `runsim.R` should be run first, since this is the core of the simulation. It produces the files `estimates.rds` and `states.rds` (these files are also provided in this repo). The file `ansim.R` can then be run. 10 | 11 | ## R versions 12 | This was run in R version 3.6.0. I don't know if it would work on earlier versions (sorry). 13 | 14 | ## Additional libraries 15 | To run `runsim.R`, the `simsurv` and `eha` packages are required. 16 | 17 | To run `ansim.R`, the `tidyverse` and `rsimsum` packages are required. 18 | 19 | ## Reproducing data/results of a single repetition 20 | At the end of `runsim.R` there is some code to reproduce the results of a specific repetition and data-generating mechanism. When running the repetitions, I output the current state (`.Random.seed`) of the random-number generator at the beginning of each repetition for each data-generating mechanism. This can then be used to later set `.Random.seed` to the desired value and repeat what was done. 21 | 22 | Note that this is not general: it works for the default random-number generator in R (Mersenne twister) and I have not checked how the current state is represented for other generators. 23 | 24 | ## Bugs, issues and improvements 25 | Please do let me know of any issues you discover in these files, and I will endeavor to acknowledge you here. I am not certain to respond to pull requests that say 'here's how you *should* do it', but I will respond to requests that say 'I found an error here'. It's not that I think I've done it the best way, it's just that I don't know enough about R to judge whether a different approach is better in a worthwhile way; by all means release and publicise your own better version! 26 | -------------------------------------------------------------------------------- /R/runsim.R: -------------------------------------------------------------------------------- 1 | # Runs simulation study to produce estimates data and states data 2 | # See example in https://onlinelibrary.wiley.com/doi/10.1002/sim.8086 3 | # Tim Morris @tmorris_mrc | 30apr2019 4 | # (based partly on code of Alessandro Gasparini; also on some pestering of Matteo Quartagno) 5 | 6 | if (!requireNamespace("simsurv")) install.packages("simsurv") 7 | if (!requireNamespace("eha")) install.packages("eha") 8 | library(simsurv) 9 | library(eha) 10 | 11 | 12 | # Function to generate one-repetition worth of data 13 | # Generate survival times s from a Weibull dist. with single binary covariate trt 14 | # and administrative censoring at time s = 5 15 | # Then analyse using exponential, Weibull and Cox 16 | onerep <- function(rep, nobs = 300, prob = 0.5, lambda = 0.1, gamma = 1, beta = -0.5) { 17 | df <- data.frame( 18 | id = 1:nobs, 19 | trt = rbinom(n = nobs, size = 1, prob = prob) 20 | ) 21 | # Generate survival times and merge into df 22 | s <- simsurv(lambdas = lambda, gammas = gamma, betas = c(trt = beta), x = df, maxt = 5) 23 | df <- merge(df, s) 24 | # Exponential model 25 | fitexp <- phreg(Surv(eventtime, status) ~ trt, data = df, dist = "weibull", shape = 1) 26 | thetaexp <- coef(fitexp)[["trt"]] 27 | se_thetaexp <- sqrt(fitexp[["var"]]["trt", "trt"]) 28 | # Weibull model 29 | fitwei <- phreg(Surv(eventtime, status) ~ trt, data = df, dist = "weibull") 30 | thetawei <- coef(fitwei)[["trt"]] 31 | se_thetawei <- sqrt(fitwei[["var"]]["trt", "trt"]) 32 | # Cox model 33 | fitcox <- coxph(Surv(eventtime, status) ~ trt, df) 34 | thetacox <- coef(fitcox)[["trt"]] 35 | se_thetacox <- sqrt(fitcox[["var"]]) 36 | # Output coeffs and SEs 37 | out <- data.frame( 38 | rep = rep, 39 | dgmgamma = gamma, 40 | theta_1 = thetaexp, 41 | se_1 = se_thetaexp, 42 | theta_2 = thetawei, 43 | se_2 = se_thetawei, 44 | theta_3 = thetacox, 45 | se_3 = se_thetacox 46 | ) 47 | return(out) 48 | } 49 | 50 | # Uncomment the following line to run once with large n_obs. 51 | #onerep(i = 1, nobs = 100000) 52 | 53 | # Preparation to run nsim repetitions 54 | set.seed(65416) 55 | nsim <- 1600 56 | # Empty estimates data frame to fill up. 57 | # Note - requires nsim*2 rows because 2 data-generating mechanisms 58 | estimates <- data.frame(matrix(ncol = 8, nrow = (nsim*2))) 59 | x <- c("rep", "dgmgamma", "theta_1", "se_1", "theta_2", "se_2", "theta_3", "se_3") 60 | colnames(estimates) <- x 61 | states <- matrix(ncol = 626, nrow = (nsim*2)) 62 | 63 | # Run all nsim reps 64 | for (r in 1:nsim) { 65 | # 1st data-generating mechanism 66 | states[r, ] <- .Random.seed 67 | estimates[r, ] <- onerep(rep = r, gamma=1) 68 | # 2nd data-generating mechanism 69 | states[(nsim+r), ] <- .Random.seed 70 | estimates[(nsim+r), ] <- onerep(rep = r, gamma=1.5) 71 | } 72 | 73 | # Save data frame for analysis 74 | head(estimates) 75 | saveRDS(estimates, file = "estimates.rds") 76 | head(states) 77 | saveRDS(states, file = "states.rds") 78 | 79 | # Want to reproduce data from a particular rep? This is why we produced (and saved) states 80 | # Here is repetition 3, gamma = 1 81 | .Random.seed <- states[3,] 82 | onerep(rep = 3, gamma=1) 83 | # Now for repetition 211, gamma = 1.5 84 | .Random.seed <- states[(nsim+211),] # For the second data-generating mechanism, we stored state r in row nsim+r 85 | onerep(rep = 211, gamma=1.5) 86 | -------------------------------------------------------------------------------- /R/states.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpmorris/simtutorial/f47ff33e6dbdf2d5ce120a6089087394c5e3df0e/R/states.rds -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Stata code for running a simple simulation study 2 | The files contained in this repository are provided for for readers of [Morris, White and Crowther's tutorial paper](https://onlinelibrary.wiley.com/doi/10.1002/sim.8086) to run the example simulation study described in section 7. 3 | 4 | There is one folder containing Stata code and another folder containing R code. Each folder contains the relevant readme. I would be delighted to see code for running this simulation study in other languages, but do not plan to write it myself. 5 | -------------------------------------------------------------------------------- /Stata/README.md: -------------------------------------------------------------------------------- 1 | # Stata code for running a simple simulation study 2 | The files contained in this repository are provided for for readers of [Morris, White and Crowther's tutorial paper](https://onlinelibrary.wiley.com/doi/10.1002/sim.8086) to run the example simulation study. 3 | 4 | ## Running the `.do` files 5 | For those running the files (rather than just reading them), note that `simstudy.do` should be run first, since this is the core of the simulation. It produces the data files `estimates.dta`, `estimateslabels.dta` (a cleanly labelled version of estimates) and `states.dta`. However, these data files are also provided here. 6 | 7 | ## Stata versions 8 | Relatively recent versions of Stata are needed: 9 | * `simstudy.do` requires version 14 (Stata's random number generator changed from using KISS to Mersenne twister, and the form of `c(rngstate)` also changed to be more complex; This file shows how to handle the resulting >5000 character strings defining the random number generator state). 10 | * `ansim-simsum.do` requires version 14 (due to the `ci proportions` command). 11 | * `ansim-pointplots.do` requires version 15 (the graphs use the translucency features introduced at version 15) 12 | * `ansim-zipplot.do` requires version 15 (the graphs use the translucency features introduced at version 15) 13 | The guts of these files would work in older versions (down to 11.2), and could be adapted by users. 14 | 15 | ## User-written packages 16 | To run `simstudy.do`, the user-written package `survsim` is required. This can be installed with: 17 | `. ssc install survsim` 18 | See: [Crowther MJ and Lambert PC. Simulating complex survival data. The Stata Journal 2012;12(4):674-687.](http://www.stata-journal.com/article.html?article=st0275) 19 | 20 | Similarly, to run `ansim-simsum.do`, submit: 21 | `. ssc install simsum` 22 | See: [White IR. simsum: Analyses of simulation studies including Monte Carlo error. The Stata Journal 2010;10(3):369-385](http://www.stata-journal.com/article.html?article=st0200) 23 | 24 | Note that the graphs presented in the [tutorial](https://onlinelibrary.wiley.com/doi/10.1002/sim.8086) used the MRC graph scheme, which can be downloaded using: 25 | `. ssc install scheme-mrc` 26 | and invoked with 27 | `. set scheme mrc` 28 | 29 | ## Bugs, issues and improvements 30 | Please do let me know of any issues you discover in these files, and we will endeavor to acknowledge you here. 31 | -------------------------------------------------------------------------------- /Stata/ansim-pointplots.do: -------------------------------------------------------------------------------- 1 | *! Tim P Morris 29nov2017 2 | version 15 3 | 4 | use estimateslabels, clear 5 | 6 | * Scatter of theta_i against repetition id, by method and DGM 7 | * Note - I space the different methods out by adding 2500 to rep number 8 | * of method 2 and 5000 to rep number of method 3, then label the methods 9 | replace idrep = idrep + 2500 if method==2 10 | replace idrep = idrep + 5000 if method==1 11 | lab def idreplab 750 "Cox" 3250 "Weibull" 5750 "Exponential" 12 | lab val idrep idreplab 13 | twoway scatter idrep theta, /// 14 | msymbol(o) msize(small) mcolor(%30) mlc(%0) /// 15 | by(dgm, cols(1) note("") xrescale) /// 16 | ytitle("") ylabel(750 3250 5750, nogrid) /// 17 | ytick(-450 2050 4550, noticks grid) /// 18 | xline(-.5, lc(gs8)) name(thetai, replace) 19 | 20 | * As above but for modelse 21 | twoway scatter idrep se, /// 22 | msymbol(o) msize(small) mcolor(%30) mlc(%0) /// 23 | by(dgm, cols(1) note("") xrescale) /// 24 | ytitle("") ylabel(750 3250 5750, nogrid) /// 25 | ytick(-450 2050 4550, noticks grid) name(sei, replace) 26 | 27 | graph combine thetai sei, xsize(7) iscale(*1.5) 28 | 29 | graph export thetaisei.pdf, replace 30 | graph export thetaisei.svg, replace 31 | 32 | 33 | * theta_i vs. se_i 34 | twoway scatter se theta, msym(o) msize(small) mcol(%30) mlc(%0) by(method dgm, cols(2) yrescale xrescale) 35 | 36 | 37 | * Comparing each method vs. each other method 38 | use estimateslabels, clear 39 | drop conv error // all reps converged, no errors 40 | reshape wide theta se, i(idrep dgm) j(method) 41 | 42 | label var theta1 "θ, Exponential" 43 | label var se1 "SE(θ), Exponential" 44 | label var theta2 "θ, Weibull" 45 | label var se2 "SE(θ), Weibull" 46 | label var theta3 "θ, Cox" 47 | label var se3 "SE(θ), Cox" 48 | 49 | * Standard matrix plot of theta(method) vs. theta(!method) 50 | * Waste of space 51 | foreach s in theta se { 52 | graph matrix `s'1 `s'2 `s'3, by(dgm, note("")) msym(p) name(`s', replace) xsize(8) 53 | } 54 | 55 | 56 | * This plot takes more effort but is better 57 | local opts yscale(range(-1.5 0)) xscale(range(-1.5 0)) msym(i) mlabs(vlarge) mlabc(black) aspect(1) graphregion(margin(zero)) plotregion(margin(zero)) xtit("") ytit("") legend(off) nodraw 58 | twoway scatteri 0 0 (0) "Exponential" .5 .7 (0) "θᵢ " -.5 0 (0) "SE(θᵢ)", `opts' xlab(none) ylab(none) name(Exponential, replace) 59 | twoway scatteri 0 0 (0) "Weibull" .5 .5 (0) "θᵢ" -.5 -.5 (0) "SE(θᵢ)", `opts' xlab(none) ylab(none) name(Weibull, replace) 60 | twoway scatteri 0 0 (0) "Cox" .5 0 (0) "θᵢ" -.5 -.5 (0) "SE(θᵢ)", `opts' xlab(none) ylab(none) name(Cox, replace) 61 | forval dgm = 2/2 { 62 | if `dgm'==1 { 63 | local frtheta -1 0 64 | local frse .18 .25 65 | } 66 | else if `dgm'==2 { 67 | local frtheta -1 .1 68 | local frse .14 .17 69 | } 70 | twoway (function x, range(`frtheta') lcolor(gs10)) (scatter theta1 theta2 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(theta12dgm`dgm', replace) 71 | twoway (function x, range(`frtheta') lcolor(gs10)) (scatter theta1 theta3 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(theta13dgm`dgm', replace) 72 | twoway (function x, range(`frtheta') lcolor(gs10)) (scatter theta2 theta2 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(theta23dgm`dgm', replace) 73 | twoway (function x, range(`frse')) (scatter se1 se2 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(se12dgm`dgm', replace) 74 | twoway (function x, range(`frse')) (scatter se1 se3 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(se13dgm`dgm', replace) 75 | twoway (function x, range(`frse')) (scatter se2 se2 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(se23dgm`dgm', replace) 76 | graph combine Exponential theta12dgm`dgm' theta13dgm`dgm' /// 77 | se12dgm`dgm' Weibull theta23dgm`dgm' /// 78 | se13dgm`dgm' se23dgm`dgm' Cox /// 79 | , cols(3) /// 80 | xsize(4) /// 81 | name(dgm`dgm', replace) 82 | //graph export dgm`dgm'.pdf, replace 83 | } 84 | -------------------------------------------------------------------------------- /Stata/ansim-simsum.do: -------------------------------------------------------------------------------- 1 | *! Tim P Morris 29nov2017 2 | version 10 3 | 4 | use estimateslabels, clear 5 | 6 | * Run simsum 7 | simsum theta, true(-.5) se(se) by(dgm) methodvar(method) id(idrep) ref(Weibull) mcse format(%6.3fc) 8 | 9 | gen byte bccovers = 0 10 | * bias-corrected coverage 11 | forval dgm = 1/2 { 12 | forval method = 1/3 { 13 | summ theta if dgm==`dgm' & method==`method', meanonly 14 | local thetahat = r(mean) 15 | replace bccovers = 1 if theta-(1.96*se)<`thetahat' & theta+(1.96*se)>`thetahat' & dgm==`dgm' & method==`method' 16 | } 17 | } 18 | 19 | bysort dgm method: ci proportions bccovers 20 | -------------------------------------------------------------------------------- /Stata/ansim-zipplot.do: -------------------------------------------------------------------------------- 1 | *! Tim P Morris 29nov2017 2 | * File to produce the zip plot 3 | version 15 4 | 5 | * Zip plot of conf int 6 | use estimateslabels, clear 7 | gen float cilow = theta + (se*invnorm(.025)) 8 | gen float ciupp = theta + (se*invnorm(.975)) 9 | 10 | local trteff -0.5 // name true value of theta `trteff' 11 | 12 | * For coverage (or type I error), use true θ for null value 13 | * so p<=.05 is a non-covering interval 14 | gen float ptheta = 1-normal(abs(theta-`trteff')/se) // if sim outputs df, use ttail and remove '1-' 15 | gen byte covers = ptheta > .025 // binary indicator of whether ci covers true theta 16 | 17 | sort dgm method ptheta 18 | by dgm method: gen double pthetarank = 100 - (_n/16) // scale from 0-100. This will be vertical axis. 19 | 20 | * Create MC conf. int. for coverage 21 | gen float covlb = . 22 | gen float covub = . 23 | forval dgm = 1/2 { 24 | forval method = 1/3 { 25 | di as text "DGM = " as result `dgm' as text ", method = " as result `method' 26 | qui ci proportions covers if dgm==`dgm' & method==`method' 27 | qui replace covlb = 100*(r(lb)) if dgm==`dgm' & method==`method' 28 | qui replace covub = 100*(r(ub)) if dgm==`dgm' & method==`method' 29 | } 30 | } 31 | bysort dgm method: replace covlb = . if _n>1 32 | bysort dgm method: replace covub = . if _n>1 33 | qui gen float lpoint = -1.5 if !missing(covlb) 34 | qui gen float rpoint = 1.5 if !missing(covlb) 35 | 36 | 37 | * Plot of confidence interval coverage: 38 | * First two rspike plots: Monte Carlo confidence interval for percent coverage 39 | * second two rspike plots: confidence intervals for individual reps 40 | * blue intervals cover, purple do not 41 | * scatter plot (white dots) are point estimates - probably unnecessary 42 | #delimit ; 43 | twoway (rspike lpoint rpoint covlb, hor lw(thin) pstyle(p5)) // MC 44 | (rspike lpoint rpoint covub, hor lw(thin) pstyle(p5)) 45 | (rspike cil ciu pthetarank if !covers, hor lw(medium) pstyle(p2) lcol(%30)) 46 | (rspike cil ciu pthetarank if covers, hor lw(medium) pstyle(p1) lcol(%30)) 47 | (scatter pthetarank theta, msym(p) mcol(white%30)) // plots point estimates in white 48 | (pci 0 -.5 100 -.5, pstyle(p5) lw(thin)) 49 | , 50 | name(coverage, replace) 51 | xtit("95% confidence intervals") 52 | ytit("Centile of ranked p-values for null: θ=–0.5") 53 | ylab(5 50 95) 54 | by(dgm method, cols(3) note("") noxrescale iscale(*.8)) scale(.8) 55 | legend(order(4 "Coverer" 3 "Non-coverer") rows(1)) 56 | xsize(4) scheme(economist) 57 | ; 58 | #delimit cr 59 | graph export zipplot.pdf, replace 60 | -------------------------------------------------------------------------------- /Stata/estimates.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpmorris/simtutorial/f47ff33e6dbdf2d5ce120a6089087394c5e3df0e/Stata/estimates.dta -------------------------------------------------------------------------------- /Stata/estimateslabels.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpmorris/simtutorial/f47ff33e6dbdf2d5ce120a6089087394c5e3df0e/Stata/estimateslabels.dta -------------------------------------------------------------------------------- /Stata/simstudy.do: -------------------------------------------------------------------------------- 1 | *! Michael J Crowther, Tim P Morris | 29nov2017 2 | version 14 3 | * Runs simulation study to produce 4 | * 1. estimates data 5 | * 2. states data at start of a rep 6 | /* 7 | Note 8 | -> add quietly to suppress output 9 | -> add noisily after 'capture' to show the output 10 | */ 11 | quietly { 12 | set seed 72789 // set your seed 13 | local nsim 1600 // number of simulated data sets required for each parameter setting 14 | local nobs 300 // number of observations in each simulated data set 15 | local gamma1 1 // for Weibull and exponential DGM 16 | local gamma2 1.5 // for Weibull not exponential DGM 17 | 18 | * Create temporary objects: 'post' is the name used to refer to estimates and 'postseed' to states 19 | tempname estimates states 20 | 21 | /* 22 | declare your post file containing: 23 | -> i = the simulation iteration 24 | -> dgm = the data generating model 25 | -> method = a string variable with (maximum) 7 characters, which refers to the survival model being fitted, e.g. "weibull" 26 | -> b = estimated log hazard ratio 27 | -> se = standard error of the estimated log hazard ratio 28 | -> conv = model converged (0=no, 1=yes) 29 | -> error = (0=no, 1=yes) 30 | */ 31 | 32 | postfile `estimates' int(idrep) byte(dgm method) float(theta se) byte(conv error) using estimates, replace 33 | * seed file 34 | postfile `states' int(idrep) str2000 s1 str2000 s2 str1100 s3 using states.dta, replace 35 | set coeftabresults off // runs faster 36 | timer on 1 // if you want to time the whole sim 37 | 38 | * loop over iterations, conducting 1000 repetitions 39 | noi _dots 0, title("Simulation running...") 40 | forvalues i = 1/`nsim' { 41 | 42 | * store the rngstate 43 | post `states' (`i') (substr(c(rngstate),1,2000)) (substr(c(rngstate),2001,2000)) (substr(c(rngstate),4001,.)) 44 | 45 | * at the beginning of each iteration, clear the dataset 46 | clear 47 | * declare your sample size 48 | set obs `nobs' 49 | * generate a binary treatment group (0/1), with Prob(0.5) of being in each arm 50 | gen trt = rbinomial(1,0.5) 51 | 52 | * DGM 53 | forvalues j=1/2 { 54 | * Simulate survival times from Weibull, under proportional hazards, with administrative censoring at 5 years 55 | capture: survsim stime`j' event`j', dist(weibull) lambda(0.1) gamma(`gamma`j'') cov(trt -0.5) maxt(5) 56 | if _rc > 0 display as error "You do not have the survsim command installed" _n as text "To install it, type:" _n "ssc install survsim" 57 | * Declare the data to be survival data 58 | stset stime`j', failure(event`j'=1) 59 | 60 | * Fit an exponential proportional hazards model, adjusting for treatment 61 | capture streg trt, dist(exp) nohr 62 | if (_rc>0) local error = 1 63 | else local error = 0 64 | * Post the iteration, DGM, model, estimated log hazard ratio, and s.e. of estimated log hazard ratio 65 | post `estimates' (`i') (`j') (1) (_b[trt]) (_se[trt]) (e(converged)) (`error') 66 | 67 | * Fit a Weibull proportional hazards model, adjusting for treatment 68 | capture streg trt, dist(weibull) nohr 69 | if (_rc>0) local error = 1 70 | else local error = 0 71 | * Post the iteration, DGM, model, estimated log hazard ratio, and s.e. of estimated log hazard ratio 72 | post `estimates' (`i') (`j') (2) (_b[trt]) (_se[trt]) (e(converged)) (`error') 73 | 74 | * Fit a Cox proportional hazards model, adjusting for treatment 75 | capture stcox trt, estimate 76 | if (_rc>0) local error = 1 77 | else local error = 0 78 | * Post the iteration, DGM, model, estimated log hazard ratio, and s.e. of estimated log hazard ratio 79 | post `estimates' (`i') (`j') (3) (_b[trt]) (_se[trt]) (e(converged)) (`error') 80 | } 81 | noi _dots `i' 0 82 | } 83 | timer off 1 // if you want to time the whole sim 84 | timer list // display run time 85 | * close the postfiles 86 | postclose `estimates' 87 | postclose `states' 88 | } 89 | 90 | * Label estimates data and re-save 91 | use estimates, clear 92 | label variable idrep "Rep num" 93 | label variable dgm "Data-generating mechanism" 94 | label variable method "Method" 95 | label variable theta "θᵢ" 96 | label variable se "SE(θᵢ)" 97 | label variable conv "Converged" 98 | label define nylab 0 "No" 1 "Yes" 99 | label values conv error nylab 100 | label define dgmlab 1 "DGM: γ=1" 2 "DGM: γ=1.5" 101 | label values dgm dgmlab 102 | label define methodlab 1 "Exponential" 2 "Weibull" 3 "Cox" 103 | label values method methodlab 104 | sort idrep dgm method 105 | save estimateslabels, replace 106 | 107 | * to load your dataset of random number states 108 | use states, replace 109 | * to extract the first seed and reset the rngstate for repetition i 110 | local i 23 111 | local statei = s1[`i']+s2[`i']+s3[`i'] 112 | set rngstate `statei' 113 | -------------------------------------------------------------------------------- /Stata/states.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tpmorris/simtutorial/f47ff33e6dbdf2d5ce120a6089087394c5e3df0e/Stata/states.dta --------------------------------------------------------------------------------