├── .gitignore
├── LICENSE
├── R
    ├── ansim.R
    ├── estimates.rds
    ├── examplesim.Rproj
    ├── readme.md
    ├── runsim.R
    └── states.rds
├── README.md
└── Stata
    ├── README.md
    ├── ansim-pointplots.do
    ├── ansim-simsum.do
    ├── ansim-zipplot.do
    ├── estimates.dta
    ├── estimateslabels.dta
    ├── simstudy.do
    └── states.dta


/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Tim Morris
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/R/ansim.R:
--------------------------------------------------------------------------------
 1 | # Runs simulation study to produce estimates data and states data
 2 | # See example in https://onlinelibrary.wiley.com/doi/10.1002/sim.8086
 3 | # Tim Morris @tmorris_mrc | 30apr2019
 4 | # (based partly on code of Alessandro Gasparini; also  on some pestering of Matteo Quartagno)
 5 | 
 6 | if (!requireNamespace("tidyverse")) install.packages("tidyverse")
 7 | if (!requireNamespace("rsimsum")) install.packages("rsimsum")
 8 | 
 9 | library(tidyverse)
10 | library(rsimsum)
11 | 
12 | # no vomit grid in ggplot
13 | theme_set(theme_bw(base_size = 12))
14 | theme_update(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
15 | 
16 | # Read estimates produced by runsim.R
17 | estimates <- readRDS("estimates.rds")
18 | 
19 | # Medium-long estimates data; label method var
20 | estmlong <- estimates %>%
21 |   gather(key = "meththing", value = "est", -rep, -dgmgamma) %>% 
22 |     separate(meththing, into = c("thetase", "method"), sep = "_" ) %>% 
23 |       spread(thetase,est)
24 | estmlong$dgmgamma <- factor(estmlong$dgmgamma, levels = c(1,1.5), labels = c("gamma = 1", "gamma = 1.5"))
25 | estmlong$method <- factor(estmlong$method, levels = c(1,2,3), labels = c("Exponential", "Weibull", "Cox"))
26 | head(estmlong)
27 | 
28 | 
29 | # Alternative way to get estmlong
30 | #estlong <- reshape(estimates,
31 | #  direction = "long", idvar = c("rep","dgmgamma"),
32 | #  timevar = c("method"), times = c("exp", "wei", "cox"), v.names=c("theta", "se"),
33 | #  varying = list(c("thetaexp", "thetawei", "thetacox"), c("seexp", "sewei", "secox"))
34 | #)
35 | 
36 | 
37 | # Swarm plot of theta (separated vertically by rep)
38 | meantheta <- estmlong %>%
39 |   group_by(dgmgamma, method) %>%
40 |     summarise(Mean.Theta = mean(theta))
41 | thetaswarm <- ggplot(estmlong,
42 |   aes(x = theta, y = rep, labs(y = "", x = ""))
43 | ) + geom_point(color = rgb(.129,.404,.494) , alpha = .3) + geom_vline(data = meantheta, aes(xintercept = Mean.Theta), colour = rgb(1,0.859,0)) + facet_wrap(facets = c("dgmgamma","method"), ncol = 1, strip.position = "left") + theme(axis.title.y=element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank())
44 | # Output pdf
45 | thetaswarm
46 | ggsave(filename = "thetaswarmR.pdf", plot = last_plot(), device = "pdf",
47 |   scale = 1, width = 4, height = 6, units = "in")
48 | 
49 | 
50 | # Swarm plot of se (separated vertically by rep)
51 | meanse <- estmlong %>%
52 |   group_by(dgmgamma, method) %>%
53 |     summarise(Mean.SE = mean(se))
54 | seswarm <- ggplot(estmlong,
55 |   aes(x = se, y = rep, labs(y = "", x = ""))
56 | ) + geom_point(color = rgb(.129,.404,.494) , alpha = .3) + geom_vline(data = meanse, aes(xintercept = Mean.SE), colour = rgb(1,0.859,0)) + facet_wrap(facets = c("dgmgamma","method"), ncol = 1, strip.position = "left") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
57 | # Output pdf
58 | seswarm
59 | ggsave(filename = "seswarmR.pdf", plot = last_plot(), device = "pdf",
60 |   scale = 1, width = 4, height = 6, units = "in")
61 | 
62 | 
63 | # Run simsum to estimate performances
64 | ssres <- simsum(
65 |   data = estmlong, estvarname = "theta", true = -.5 , se = "se",
66 |   methodvar = "method", ref = "Weibull", by = "dgmgamma", x = TRUE
67 | )
68 | performance <- ssres["summ"][["summ"]]
69 | head(performance)
70 | 
71 | # Comparison of theta according to method
72 | autoplot(ssres, type = "est")
73 | autoplot(ssres, type = "est_ba")
74 | # Comparison of SE according to method
75 | autoplot(ssres, type = "se")
76 | autoplot(ssres, type = "se_ba")
77 | # Ridge plots comparing theta
78 | autoplot(ssres, type = "est_ridge")
79 | autoplot(ssres, type = "se_ridge")
80 | # Zip plot
81 | zip <- autoplot(ssres, type = "zip")
82 | zip
83 | ggsave(filename = "zipR.pdf", plot = last_plot(), device = "pdf",
84 |   scale = 1, width = 4, height = 6, units = "in")
85 | 
86 | # Lollipop plot for whatever performance measure you favour
87 | autoplot(summary(ssres), type = "lolly", stats = "bias")
88 | 


--------------------------------------------------------------------------------
/R/estimates.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpmorris/simtutorial/f47ff33e6dbdf2d5ce120a6089087394c5e3df0e/R/estimates.rds


--------------------------------------------------------------------------------
/R/examplesim.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/R/readme.md:
--------------------------------------------------------------------------------
 1 | ﻿# R code for running a simple simulation study
 2 | ## Taken from the example in https://onlinelibrary.wiley.com/doi/10.1002/sim.8086
 3 | The files contained in this repository are provided for for readers of [Morris, White and Crowther's tutorial paper](https://onlinelibrary.wiley.com/doi/10.1002/sim.8086) to run the example simulation study.
 4 | 
 5 | ## Disclaimer
 6 | I am not an R programmer and this represents my first full simulation study in R. I am aware of several alternative ways in which I could have coded this. My code attempts to be clear rather than clever or beautiful, though I may also have failed at clarity.
 7 | 
 8 | ## Running the `.R` files
 9 | For those running the files (rather than just reading them), note that `runsim.R` should be run first, since this is the core of the simulation. It produces the files `estimates.rds` and `states.rds` (these files are also provided in this repo). The file `ansim.R` can then be run.
10 | 
11 | ## R versions
12 | This was run in R version 3.6.0. I don't know if it would work on earlier versions (sorry).
13 | 
14 | ## Additional libraries
15 | To run `runsim.R`, the `simsurv` and `eha` packages are required.
16 | 
17 | To run `ansim.R`, the `tidyverse` and `rsimsum` packages are required.
18 | 
19 | ## Reproducing data/results of a single repetition
20 | At the end of `runsim.R` there is some code to reproduce the results of a specific repetition and data-generating mechanism. When running the repetitions, I output the current state (`.Random.seed`) of the random-number generator at the beginning of each repetition for each data-generating mechanism. This can then be used to later set `.Random.seed` to the desired value and repeat what was done.
21 | 
22 | Note that this is not general: it works for the default random-number generator in R (Mersenne twister) and I have not checked how the current state is represented for other generators.
23 | 
24 | ## Bugs, issues and improvements
25 | Please do let me know of any issues you discover in these files, and I will endeavor to acknowledge you here. I am not certain to respond to pull requests that say 'here's how you *should* do it', but I will respond to requests that say 'I found an error here'. It's not that I think I've done it the best way, it's just that I don't know enough about R to judge whether a different approach is better in a worthwhile way; by all means release and publicise your own better version!
26 | 


--------------------------------------------------------------------------------
/R/runsim.R:
--------------------------------------------------------------------------------
 1 | # Runs simulation study to produce estimates data and states data
 2 | # See example in https://onlinelibrary.wiley.com/doi/10.1002/sim.8086
 3 | # Tim Morris @tmorris_mrc | 30apr2019
 4 | # (based partly on code of Alessandro Gasparini; also on some pestering of Matteo Quartagno)
 5 | 
 6 | if (!requireNamespace("simsurv")) install.packages("simsurv")
 7 | if (!requireNamespace("eha")) install.packages("eha")
 8 | library(simsurv)
 9 | library(eha)
10 | 
11 | 
12 | # Function to generate one-repetition worth of data
13 | # Generate survival times s from a Weibull dist. with single binary covariate trt
14 | # and administrative censoring at time s = 5
15 | # Then analyse using exponential, Weibull and Cox
16 | onerep <- function(rep, nobs = 300, prob = 0.5, lambda = 0.1, gamma = 1, beta = -0.5) {
17 |   df <- data.frame(
18 |     id = 1:nobs,
19 |     trt = rbinom(n = nobs, size = 1, prob = prob)
20 |   )
21 |   # Generate survival times and merge into df
22 |   s <- simsurv(lambdas = lambda, gammas = gamma, betas = c(trt = beta), x = df, maxt = 5)
23 |   df <- merge(df, s)
24 |   # Exponential model
25 |   fitexp <- phreg(Surv(eventtime, status) ~ trt, data = df, dist = "weibull", shape = 1)
26 |     thetaexp <- coef(fitexp)[["trt"]]
27 |     se_thetaexp <- sqrt(fitexp[["var"]]["trt", "trt"])
28 |   # Weibull model
29 |   fitwei <- phreg(Surv(eventtime, status) ~ trt, data = df, dist = "weibull")
30 |     thetawei <- coef(fitwei)[["trt"]]
31 |     se_thetawei <- sqrt(fitwei[["var"]]["trt", "trt"])
32 |   # Cox model
33 |   fitcox <- coxph(Surv(eventtime, status) ~ trt, df)
34 |     thetacox <- coef(fitcox)[["trt"]]
35 |     se_thetacox <- sqrt(fitcox[["var"]])
36 |   # Output coeffs and SEs
37 |   out <- data.frame(
38 |     rep = rep,
39 |     dgmgamma = gamma,
40 |     theta_1 = thetaexp,
41 |     se_1 = se_thetaexp,
42 |     theta_2 = thetawei,
43 |     se_2 = se_thetawei,
44 |     theta_3 = thetacox,
45 |     se_3 = se_thetacox
46 |   )
47 |   return(out)
48 | }
49 | 
50 | # Uncomment the following line to run once with large n_obs.
51 | #onerep(i = 1, nobs = 100000)
52 | 
53 | # Preparation to run nsim repetitions
54 | set.seed(65416)
55 | nsim <- 1600
56 | # Empty estimates data frame to fill up.
57 | # Note - requires nsim*2 rows because 2 data-generating mechanisms
58 | estimates <- data.frame(matrix(ncol = 8, nrow = (nsim*2)))
59 | x <- c("rep", "dgmgamma", "theta_1", "se_1", "theta_2", "se_2", "theta_3", "se_3")
60 | colnames(estimates) <- x
61 | states <- matrix(ncol = 626, nrow = (nsim*2))
62 | 
63 | # Run all nsim reps
64 | for (r in 1:nsim) {
65 |   # 1st data-generating mechanism
66 |   states[r, ] <- .Random.seed
67 |   estimates[r, ] <- onerep(rep = r, gamma=1)
68 |   # 2nd data-generating mechanism
69 |   states[(nsim+r), ] <- .Random.seed
70 |   estimates[(nsim+r), ] <- onerep(rep = r, gamma=1.5)
71 | }
72 | 
73 | # Save data frame for analysis
74 | head(estimates)
75 | saveRDS(estimates, file = "estimates.rds")
76 | head(states)
77 | saveRDS(states, file = "states.rds")
78 | 
79 | # Want to reproduce data from a particular rep? This is why we produced (and saved) states
80 | # Here is repetition 3, gamma = 1
81 | .Random.seed <- states[3,]
82 | onerep(rep = 3, gamma=1)
83 | # Now for repetition 211, gamma = 1.5
84 | .Random.seed <- states[(nsim+211),] # For the second data-generating mechanism, we stored state r in row nsim+r
85 | onerep(rep = 211, gamma=1.5)
86 | 


--------------------------------------------------------------------------------
/R/states.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpmorris/simtutorial/f47ff33e6dbdf2d5ce120a6089087394c5e3df0e/R/states.rds


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Stata code for running a simple simulation study
2 | The files contained in this repository are provided for for readers of [Morris, White and Crowther's tutorial paper](https://onlinelibrary.wiley.com/doi/10.1002/sim.8086) to run the example simulation study described in section 7.
3 | 
4 | There is one folder containing Stata code and another folder containing R code. Each folder contains the relevant readme. I would be delighted to see code for running this simulation study in other languages, but do not plan to write it myself.
5 | 


--------------------------------------------------------------------------------
/Stata/README.md:
--------------------------------------------------------------------------------
 1 | # Stata code for running a simple simulation study
 2 | The files contained in this repository are provided for for readers of [Morris, White and Crowther's tutorial paper](https://onlinelibrary.wiley.com/doi/10.1002/sim.8086) to run the example simulation study.
 3 | 
 4 | ## Running the `.do` files
 5 | For those running the files (rather than just reading them), note that `simstudy.do` should be run first, since this is the core of the simulation. It produces the data files `estimates.dta`, `estimateslabels.dta` (a cleanly labelled version of estimates) and `states.dta`. However, these data files are also provided here.
 6 | 
 7 | ## Stata versions
 8 | Relatively recent versions of Stata are needed:   
 9 | * `simstudy.do` requires version 14 (Stata's random number generator changed from using KISS to Mersenne twister, and the form of `c(rngstate)` also changed to be more complex; This file shows how to handle the resulting >5000 character strings defining the random number generator state).
10 | * `ansim-simsum.do` requires version 14 (due to the `ci proportions` command).
11 | * `ansim-pointplots.do` requires version 15 (the graphs use the translucency features introduced at version 15)
12 | * `ansim-zipplot.do` requires version 15 (the graphs use the translucency features introduced at version 15)
13 | The guts of these files would work in older versions (down to 11.2), and could be adapted by users.
14 | 
15 | ## User-written packages
16 | To run `simstudy.do`, the user-written package `survsim` is required. This can be installed with:   
17 | `. ssc install survsim`   
18 | See: [Crowther MJ and Lambert PC. Simulating complex survival data. The Stata Journal 2012;12(4):674-687.](http://www.stata-journal.com/article.html?article=st0275)   
19 | 
20 | Similarly, to run `ansim-simsum.do`, submit:   
21 | `. ssc install simsum`   
22 | See: [White IR. simsum: Analyses of simulation studies including Monte Carlo error. The Stata Journal 2010;10(3):369-385](http://www.stata-journal.com/article.html?article=st0200)   
23 | 
24 | Note that the graphs presented in the [tutorial](https://onlinelibrary.wiley.com/doi/10.1002/sim.8086) used the MRC graph scheme, which can be downloaded using:
25 | `. ssc install scheme-mrc`
26 | and invoked with
27 | `. set scheme mrc`
28 | 
29 | ## Bugs, issues and improvements
30 | Please do let me know of any issues you discover in these files, and we will endeavor to acknowledge you here.
31 | 


--------------------------------------------------------------------------------
/Stata/ansim-pointplots.do:
--------------------------------------------------------------------------------
 1 | *! Tim P Morris 29nov2017
 2 | version 15
 3 | 
 4 | use estimateslabels, clear
 5 | 
 6 | * Scatter of theta_i against repetition id, by method and DGM
 7 | * Note - I space the different methods out by adding 2500 to rep number
 8 | * of method 2 and 5000 to rep number of method 3, then label the methods
 9 | replace idrep = idrep + 2500 if method==2
10 | replace idrep = idrep + 5000 if method==1
11 | lab def idreplab 750 "Cox" 3250 "Weibull" 5750 "Exponential"
12 | 	lab val idrep idreplab
13 | twoway scatter idrep theta, ///
14 | 	msymbol(o) msize(small) mcolor(%30) mlc(%0)	///
15 | 	by(dgm, cols(1) note("") xrescale)	///
16 | 	ytitle("") ylabel(750 3250 5750, nogrid)	///
17 | 	ytick(-450 2050 4550, noticks grid)	///
18 | 	xline(-.5, lc(gs8)) name(thetai, replace)
19 | 	
20 | * As above but for modelse
21 | twoway scatter idrep se, ///
22 | 	msymbol(o) msize(small) mcolor(%30) mlc(%0)	///
23 | 	by(dgm, cols(1) note("") xrescale)	///
24 | 	ytitle("") ylabel(750 3250 5750, nogrid)	///
25 | 	ytick(-450 2050 4550, noticks grid) name(sei, replace)
26 | 
27 | graph combine thetai sei, xsize(7) iscale(*1.5)
28 | 
29 | graph export thetaisei.pdf, replace
30 | graph export thetaisei.svg, replace
31 | 
32 | 
33 | * theta_i vs. se_i
34 | twoway scatter se theta, msym(o) msize(small) mcol(%30) mlc(%0) by(method dgm, cols(2) yrescale xrescale)
35 | 
36 | 
37 | * Comparing each method vs. each other method
38 | use estimateslabels, clear
39 | drop conv error // all reps converged, no errors
40 | reshape wide theta se, i(idrep dgm) j(method)
41 | 
42 | label var theta1 "θ, Exponential"
43 | label var se1 "SE(θ), Exponential"
44 | label var theta2 "θ, Weibull"
45 | label var se2 "SE(θ), Weibull"
46 | label var theta3 "θ, Cox"
47 | label var se3 "SE(θ), Cox"
48 | 
49 | * Standard matrix plot of theta(method) vs. theta(!method)
50 | * Waste of space
51 | foreach s in theta se {
52 | 	graph matrix `s'1 `s'2 `s'3, by(dgm, note("")) msym(p) name(`s', replace) xsize(8)
53 | }
54 | 
55 | 
56 | * This plot takes more effort but is better
57 | local opts yscale(range(-1.5 0)) xscale(range(-1.5 0)) msym(i) mlabs(vlarge) mlabc(black) aspect(1) graphregion(margin(zero)) plotregion(margin(zero)) xtit("") ytit("") legend(off) nodraw
58 | twoway scatteri 0 0 (0) "Exponential" .5 .7 (0) "θᵢ " -.5 0 (0) "SE(θᵢ)", `opts' xlab(none) ylab(none) name(Exponential, replace)
59 | twoway scatteri 0 0 (0) "Weibull" .5 .5 (0) "θᵢ" -.5 -.5 (0) "SE(θᵢ)", `opts' xlab(none) ylab(none) name(Weibull, replace)
60 | twoway scatteri 0 0 (0) "Cox" .5 0 (0) "θᵢ" -.5 -.5 (0) "SE(θᵢ)", `opts' xlab(none) ylab(none) name(Cox, replace)
61 | forval dgm = 2/2 {
62 | 	if `dgm'==1 {
63 | 		local frtheta -1 0
64 | 		local frse .18 .25
65 | 	}
66 | 	else if `dgm'==2 {
67 | 		local frtheta -1 .1
68 | 		local frse .14 .17
69 | 	}
70 | 	twoway (function x, range(`frtheta') lcolor(gs10)) (scatter theta1 theta2 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(theta12dgm`dgm', replace)
71 | 	twoway (function x, range(`frtheta') lcolor(gs10)) (scatter theta1 theta3 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(theta13dgm`dgm', replace)
72 | 	twoway (function x, range(`frtheta') lcolor(gs10)) (scatter theta2 theta2 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(theta23dgm`dgm', replace)
73 | 	twoway (function x, range(`frse')) (scatter se1 se2 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(se12dgm`dgm', replace)
74 | 	twoway (function x, range(`frse')) (scatter se1 se3 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(se13dgm`dgm', replace)
75 | 	twoway (function x, range(`frse')) (scatter se2 se2 if dgm==`dgm', mc(%50) msize(vsmall)), `opts' name(se23dgm`dgm', replace)
76 | 	graph combine Exponential theta12dgm`dgm' theta13dgm`dgm'	///
77 | 		se12dgm`dgm'	Weibull theta23dgm`dgm'	///
78 | 		se13dgm`dgm' se23dgm`dgm' Cox	///
79 | 		, cols(3)	///
80 | 		xsize(4)	///
81 | 		name(dgm`dgm', replace)
82 | 	//graph export dgm`dgm'.pdf, replace
83 | }
84 | 


--------------------------------------------------------------------------------
/Stata/ansim-simsum.do:
--------------------------------------------------------------------------------
 1 | *! Tim P Morris 29nov2017
 2 | version 10
 3 | 
 4 | use estimateslabels, clear
 5 | 
 6 | * Run simsum
 7 | simsum theta, true(-.5) se(se) by(dgm) methodvar(method) id(idrep) ref(Weibull) mcse format(%6.3fc)
 8 | 
 9 | gen byte bccovers = 0
10 | * bias-corrected coverage
11 | forval dgm = 1/2 {
12 | 	forval method = 1/3 {
13 | 		summ theta if dgm==`dgm' & method==`method', meanonly
14 | 		local thetahat = r(mean)
15 | 		replace bccovers = 1 if theta-(1.96*se)<`thetahat' & theta+(1.96*se)>`thetahat' & dgm==`dgm' & method==`method'
16 | 	}
17 | }
18 | 
19 | bysort dgm method: ci proportions bccovers
20 | 


--------------------------------------------------------------------------------
/Stata/ansim-zipplot.do:
--------------------------------------------------------------------------------
 1 | *! Tim P Morris 29nov2017
 2 | * File to produce the zip plot
 3 | version 15
 4 | 
 5 | * Zip plot of conf int
 6 | use estimateslabels, clear
 7 | gen float cilow = theta + (se*invnorm(.025))
 8 | gen float ciupp = theta + (se*invnorm(.975))
 9 | 
10 | local trteff -0.5 // name true value of theta `trteff'
11 | 
12 | * For coverage (or type I error), use true θ for null value
13 | * so p<=.05 is a non-covering interval
14 | gen float ptheta = 1-normal(abs(theta-`trteff')/se) // if sim outputs df, use ttail and remove '1-'
15 | gen byte covers = ptheta > .025  // binary indicator of whether ci covers true theta
16 | 
17 | sort dgm method ptheta
18 | by dgm method: gen double pthetarank = 100 - (_n/16) // scale from 0-100. This will be vertical axis.
19 | 
20 | * Create MC conf. int. for coverage
21 | gen float covlb = .
22 | gen float covub = .
23 | forval dgm = 1/2 {
24 | 	forval method = 1/3 {
25 | 		di as text "DGM = " as result `dgm' as text ", method = " as result `method'
26 | 		qui ci proportions covers if dgm==`dgm' & method==`method'
27 | 			qui replace covlb = 100*(r(lb)) if dgm==`dgm' & method==`method'
28 | 			qui replace covub = 100*(r(ub)) if dgm==`dgm' & method==`method'
29 | 	}
30 | }
31 | bysort dgm method: replace covlb = . if _n>1
32 | bysort dgm method: replace covub = . if _n>1
33 | qui gen float lpoint = -1.5 if !missing(covlb)
34 | qui gen float rpoint =  1.5 if !missing(covlb)
35 | 
36 | 
37 | * Plot of confidence interval coverage:
38 | * First two rspike plots: Monte Carlo confidence interval for percent coverage
39 | * second two rspike plots: confidence intervals for individual reps
40 | * blue intervals cover, purple do not
41 | * scatter plot (white dots) are point estimates - probably unnecessary
42 | #delimit ;
43 | twoway (rspike lpoint rpoint covlb, hor lw(thin) pstyle(p5)) // MC 
44 | 	(rspike lpoint rpoint covub, hor lw(thin) pstyle(p5))
45 | 	(rspike cil ciu pthetarank if !covers, hor lw(medium) pstyle(p2) lcol(%30))
46 | 	(rspike cil ciu pthetarank if covers, hor lw(medium) pstyle(p1) lcol(%30))
47 | 	(scatter pthetarank theta, msym(p) mcol(white%30)) // plots point estimates in white
48 | 	(pci 0 -.5 100 -.5, pstyle(p5) lw(thin))
49 | 	,
50 | 	name(coverage, replace)
51 | 	xtit("95% confidence intervals")
52 | 	ytit("Centile of ranked p-values for null: θ=–0.5")
53 | 	ylab(5 50 95)
54 | 	by(dgm method, cols(3) note("") noxrescale iscale(*.8)) scale(.8)
55 | 	legend(order(4 "Coverer" 3 "Non-coverer") rows(1))
56 | 	xsize(4) scheme(economist)
57 | 	;
58 | #delimit cr
59 | graph export zipplot.pdf, replace
60 | 


--------------------------------------------------------------------------------
/Stata/estimates.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpmorris/simtutorial/f47ff33e6dbdf2d5ce120a6089087394c5e3df0e/Stata/estimates.dta


--------------------------------------------------------------------------------
/Stata/estimateslabels.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpmorris/simtutorial/f47ff33e6dbdf2d5ce120a6089087394c5e3df0e/Stata/estimateslabels.dta


--------------------------------------------------------------------------------
/Stata/simstudy.do:
--------------------------------------------------------------------------------
  1 | *! Michael J Crowther, Tim P Morris | 29nov2017
  2 | version 14
  3 | * Runs simulation study to produce 
  4 | * 1. estimates data
  5 | * 2. states data at start of a rep
  6 | /*
  7 | Note
  8 | -> add quietly to suppress output
  9 | -> add noisily after 'capture' to show the output
 10 | */
 11 | quietly {
 12 |   set seed 72789 		// set your seed
 13 |   local nsim 1600		// number of simulated data sets required for each parameter setting
 14 |   local nobs 300		// number of observations in each simulated data set
 15 |   local gamma1 1		// for Weibull and exponential DGM
 16 |   local gamma2 1.5	// for Weibull not exponential DGM
 17 | 
 18 |   * Create temporary objects: 'post' is the name used to refer to estimates and 'postseed' to states
 19 |   tempname estimates states
 20 | 
 21 |   /*
 22 | 	declare your post file containing:
 23 |   -> i = the simulation iteration
 24 |   -> dgm = the data generating model
 25 |   -> method = a string variable with (maximum) 7 characters, which refers to the survival model being fitted, e.g. "weibull"
 26 |   -> b = estimated log hazard ratio
 27 |   -> se = standard error of the estimated log hazard ratio
 28 |   -> conv = model converged (0=no, 1=yes)
 29 |   -> error = (0=no, 1=yes)
 30 | 	*/
 31 | 
 32 |   postfile `estimates' int(idrep) byte(dgm method) float(theta se) byte(conv error) using estimates, replace
 33 |   * seed file
 34 |   postfile `states' int(idrep) str2000 s1 str2000 s2 str1100 s3 using states.dta, replace
 35 | 	set coeftabresults off //  runs faster
 36 | 	timer on 1 // if you want to time the whole sim
 37 | 
 38 |   * loop over iterations, conducting 1000 repetitions
 39 | 	noi _dots 0, title("Simulation running...")
 40 |   forvalues i = 1/`nsim' {
 41 | 
 42 |     * store the rngstate
 43 |     post `states' (`i') (substr(c(rngstate),1,2000)) (substr(c(rngstate),2001,2000)) (substr(c(rngstate),4001,.))
 44 | 
 45 |     * at the beginning of each iteration, clear the dataset
 46 |     clear			
 47 |     * declare your sample size
 48 |     set obs `nobs'
 49 |     * generate a binary treatment group (0/1), with Prob(0.5) of being in each arm 
 50 |     gen trt = rbinomial(1,0.5)
 51 | 
 52 |     * DGM 
 53 |     forvalues j=1/2 {
 54 |     	* Simulate survival times from Weibull, under proportional hazards, with administrative censoring at 5 years
 55 |     	capture: survsim stime`j' event`j', dist(weibull) lambda(0.1) gamma(`gamma`j'') cov(trt -0.5) maxt(5)
 56 | 			if _rc > 0 display as error "You do not have the survsim command installed" _n as text "To install it, type:" _n "ssc install survsim"
 57 |     	* Declare the data to be survival data
 58 |     	stset stime`j', failure(event`j'=1)
 59 | 
 60 |     	* Fit an exponential proportional hazards model, adjusting for treatment
 61 |    		capture streg trt, dist(exp) nohr
 62 |    		if (_rc>0) local error = 1
 63 |    		else local error = 0
 64 |    		* Post the iteration, DGM, model, estimated log hazard ratio, and s.e. of estimated log hazard ratio
 65 |    		post `estimates' (`i') (`j') (1) (_b[trt]) (_se[trt]) (e(converged)) (`error')
 66 | 
 67 |    		* Fit a Weibull proportional hazards model, adjusting for treatment
 68 |    		capture streg trt, dist(weibull) nohr
 69 |    		if (_rc>0) local error = 1
 70 |    		else local error = 0
 71 |    		* Post the iteration, DGM, model, estimated log hazard ratio, and s.e. of estimated log hazard ratio
 72 |    		post `estimates' (`i') (`j') (2) (_b[trt]) (_se[trt]) (e(converged)) (`error')
 73 | 
 74 |    		* Fit a Cox proportional hazards model, adjusting for treatment
 75 |    		capture stcox trt, estimate
 76 |    		if (_rc>0) local error = 1
 77 |    		else local error = 0
 78 |    		* Post the iteration, DGM, model, estimated log hazard ratio, and s.e. of estimated log hazard ratio
 79 |    		post `estimates' (`i') (`j') (3) (_b[trt]) (_se[trt]) (e(converged)) (`error')
 80 |    	}
 81 | 	noi _dots `i' 0
 82 |   }
 83 | 	timer off 1 // if you want to time the whole sim
 84 | 	timer list // display run time
 85 |   * close the postfiles
 86 |   postclose `estimates'
 87 |   postclose `states'
 88 | }
 89 | 
 90 | * Label estimates data and re-save
 91 | use estimates, clear
 92 | 	label variable idrep "Rep num"
 93 | 	label variable dgm "Data-generating mechanism"
 94 | 	label variable method "Method"
 95 | 	label variable theta "θᵢ"
 96 | 	label variable se "SE(θᵢ)"
 97 | 	label variable conv "Converged"
 98 | 	label define nylab 0 "No" 1 "Yes"
 99 | 		label values conv error nylab
100 | 	label define dgmlab 1 "DGM: γ=1" 2 "DGM: γ=1.5"
101 | 		label values dgm dgmlab
102 | 	label define methodlab 1 "Exponential" 2 "Weibull" 3 "Cox"
103 | 		label values method methodlab
104 | 	sort idrep dgm method
105 | save estimateslabels, replace
106 | 
107 | * to load your dataset of random number states
108 | use states, replace
109 | * to extract the first seed and reset the rngstate for repetition i
110 | local i 23
111 | local statei = s1[`i']+s2[`i']+s3[`i']
112 | set rngstate `statei'
113 | 


--------------------------------------------------------------------------------
/Stata/states.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpmorris/simtutorial/f47ff33e6dbdf2d5ce120a6089087394c5e3df0e/Stata/states.dta


--------------------------------------------------------------------------------