├── vignettes ├── psi.n.mat.rda ├── vignette_dat │ ├── trueMSMreps.sc1.Rdata │ ├── trueMSMreps.sc3.Rdata │ ├── repstudy1_psi0.t0.MSM.Rdata │ ├── repstudy1_psi0.t0.NP.Rdata │ ├── restabSc1_all_1Ksims.Rdata │ ├── restabSc3_all_1Ksims.Rdata │ ├── restabwLefSc1_all_1Ksims.Rdata │ ├── restabwLefSc3_all_1Ksims.Rdata │ ├── sim50K.stratQg.notrunc.g.Rdata │ └── sim50K.stratQg.notrunc.missg.Rdata ├── mystyles.sty ├── R-Pckgs.bib └── SimCausal_2014.bib ├── .gitignore ├── .Rbuildignore ├── man ├── DAG.empty.Rd ├── distr.list.Rd ├── net.list.Rd ├── print.DAG.Rd ├── vecfun.print.Rd ├── vecfun.reset.Rd ├── print.DAG.action.Rd ├── print.DAG.node.Rd ├── vecfun.all.print.Rd ├── vecfun.remove.Rd ├── Define_sVar.Rd ├── N.Rd ├── rnet.gnp.Rd ├── rnet.gnm.Rd ├── rbern.Rd ├── igraph.to.sparseAdjMat.Rd ├── parents.Rd ├── vecfun.add.Rd ├── add.nodes.Rd ├── A.Rd ├── NetInd.to.sparseAdjMat.Rd ├── DF.to.long.Rd ├── plotSurvEst.Rd ├── sparseAdjMat.to.igraph.Rd ├── rnet.SmWorld.Rd ├── DF.to.longDT.Rd ├── rconst.Rd ├── rdistr.template.Rd ├── rcat.b1.Rd ├── sparseAdjMat.to.NetInd.Rd ├── NetIndClass.Rd ├── eval.target.Rd ├── plotDAG.Rd ├── simobs.Rd ├── rcat.factor.Rd ├── simfull.Rd ├── doLTCF.Rd ├── sim.Rd ├── set.targetMSM.Rd ├── simcausal.Rd ├── set.targetE.Rd └── add.action.Rd ├── inst └── CITATION ├── tests ├── RUnit │ ├── RUnit_tests_00_wide_to_long.R │ ├── RUnit_tests_03_NSE_bug.R │ └── RUnit_tests_02_MV.R ├── examples │ ├── sim.impute.examples12.R │ ├── MSM.examples.R │ ├── add.action.R │ ├── set.targetE.examples.R │ ├── example.simnets.R │ └── set.DAG.R └── test-all.R ├── R ├── zzz.R ├── DAG_node_R6.R ├── network_distributions.R ├── action_node_indexing.R ├── simcausal-package.r ├── distributions.R └── interface.r ├── DESCRIPTION ├── NAMESPACE ├── .travis.yml ├── NEWS ├── cran-comments.md └── README.md /vignettes/psi.n.mat.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/psi.n.mat.rda -------------------------------------------------------------------------------- /vignettes/vignette_dat/trueMSMreps.sc1.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/vignette_dat/trueMSMreps.sc1.Rdata -------------------------------------------------------------------------------- /vignettes/vignette_dat/trueMSMreps.sc3.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/vignette_dat/trueMSMreps.sc3.Rdata -------------------------------------------------------------------------------- /vignettes/vignette_dat/repstudy1_psi0.t0.MSM.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/vignette_dat/repstudy1_psi0.t0.MSM.Rdata -------------------------------------------------------------------------------- /vignettes/vignette_dat/repstudy1_psi0.t0.NP.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/vignette_dat/repstudy1_psi0.t0.NP.Rdata -------------------------------------------------------------------------------- /vignettes/vignette_dat/restabSc1_all_1Ksims.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/vignette_dat/restabSc1_all_1Ksims.Rdata -------------------------------------------------------------------------------- /vignettes/vignette_dat/restabSc3_all_1Ksims.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/vignette_dat/restabSc3_all_1Ksims.Rdata -------------------------------------------------------------------------------- /vignettes/vignette_dat/restabwLefSc1_all_1Ksims.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/vignette_dat/restabwLefSc1_all_1Ksims.Rdata -------------------------------------------------------------------------------- /vignettes/vignette_dat/restabwLefSc3_all_1Ksims.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/vignette_dat/restabwLefSc3_all_1Ksims.Rdata -------------------------------------------------------------------------------- /vignettes/vignette_dat/sim50K.stratQg.notrunc.g.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/vignette_dat/sim50K.stratQg.notrunc.g.Rdata -------------------------------------------------------------------------------- /vignettes/vignette_dat/sim50K.stratQg.notrunc.missg.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/osofr/simcausal/HEAD/vignettes/vignette_dat/sim50K.stratQg.notrunc.missg.Rdata -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .DS_Store 5 | inst/doc 6 | README.html 7 | tests/*.pdf 8 | tests/RUnit/*.html 9 | tests/RUnit/*.txt 10 | tests/RUnit/*.pdf 11 | *.sublime-project 12 | *.sublime-workspace -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.DS_Store$ 3 | ^\.Rproj\.user$ 4 | ^cran-comments\.md$ 5 | ^README\.html$ 6 | ^.Rprofile$ 7 | ^inst/db$ 8 | ^tests/$ 9 | ^tests/RUnit/$ 10 | ^\.travis\.yml$ 11 | ^\.sublime-project$ 12 | ^\.sublime-workspace$ 13 | -------------------------------------------------------------------------------- /man/DAG.empty.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/interface.r 3 | \name{DAG.empty} 4 | \alias{DAG.empty} 5 | \title{Initialize an empty DAG object} 6 | \usage{ 7 | DAG.empty() 8 | } 9 | \description{ 10 | Initialize an empty DAG object 11 | } 12 | -------------------------------------------------------------------------------- /man/distr.list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distributions.R 3 | \name{distr.list} 4 | \alias{distr.list} 5 | \title{List All Custom Distribution Functions in \code{simcausal}.} 6 | \usage{ 7 | distr.list() 8 | } 9 | \description{ 10 | List All Custom Distribution Functions in \code{simcausal}. 11 | } 12 | -------------------------------------------------------------------------------- /man/net.list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/network_distributions.R 3 | \name{net.list} 4 | \alias{net.list} 5 | \title{List All Custom Network Generator Functions in \code{simcausal}.} 6 | \usage{ 7 | net.list() 8 | } 9 | \description{ 10 | List All Custom Network Generator Functions in \code{simcausal}. 11 | } 12 | -------------------------------------------------------------------------------- /man/print.DAG.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{print.DAG} 4 | \alias{print.DAG} 5 | \title{Print DAG Object} 6 | \usage{ 7 | \method{print}{DAG}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A DAG object.} 11 | 12 | \item{...}{Other arguments to generic print.} 13 | } 14 | \description{ 15 | Print DAG Object 16 | } 17 | -------------------------------------------------------------------------------- /man/vecfun.print.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{vecfun.print} 4 | \alias{vecfun.print} 5 | \title{Print Names of Custom Vectorized Functions} 6 | \usage{ 7 | vecfun.print() 8 | } 9 | \value{ 10 | A vector of vectorized function names 11 | } 12 | \description{ 13 | Print current user-defined vectorized function names. 14 | } 15 | -------------------------------------------------------------------------------- /man/vecfun.reset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{vecfun.reset} 4 | \alias{vecfun.reset} 5 | \title{Reset Custom Vectorized Function List} 6 | \usage{ 7 | vecfun.reset() 8 | } 9 | \value{ 10 | An old vector of user-defined vectorized function names 11 | } 12 | \description{ 13 | Reset a listing of user-defined vectorized functions. 14 | } 15 | -------------------------------------------------------------------------------- /man/print.DAG.action.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{print.DAG.action} 4 | \alias{print.DAG.action} 5 | \title{Print Action Object} 6 | \usage{ 7 | \method{print}{DAG.action}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object.} 11 | 12 | \item{...}{Other arguments to generic print.} 13 | } 14 | \description{ 15 | Print Action Object 16 | } 17 | -------------------------------------------------------------------------------- /man/print.DAG.node.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{print.DAG.node} 4 | \alias{print.DAG.node} 5 | \title{Print DAG.node Object} 6 | \usage{ 7 | \method{print}{DAG.node}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A Node object.} 11 | 12 | \item{...}{Other arguments to generic print.} 13 | } 14 | \description{ 15 | Print DAG.node Object 16 | } 17 | -------------------------------------------------------------------------------- /man/vecfun.all.print.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{vecfun.all.print} 4 | \alias{vecfun.all.print} 5 | \title{Print Names of All Vectorized Functions} 6 | \usage{ 7 | vecfun.all.print() 8 | } 9 | \value{ 10 | A vector of build-in and user-defined vectorized function names 11 | } 12 | \description{ 13 | Print all vectorized function names (build-in and user-defined). 14 | } 15 | -------------------------------------------------------------------------------- /vignettes/mystyles.sty: -------------------------------------------------------------------------------- 1 | % \usepackage[utf8]{inputenc} 2 | \usepackage{amsmath} 3 | \usepackage{graphicx} 4 | \usepackage{float} 5 | \usepackage{booktabs} 6 | \usepackage{ctable} 7 | % \usepackage[sort,numbers]{natbib} 8 | %\usepackage[sort,authoryear]{natbib} 9 | \usepackage[english]{babel} 10 | \let\code=\texttt 11 | \let\proglang=\textsf 12 | \newcommand{\pkg}[1]{{\fontseries{b}\selectfont #1}} 13 | \newcommand{\email}[1]{\href{mailto:#1}{\normalfont\texttt{#1}}} 14 | -------------------------------------------------------------------------------- /man/vecfun.remove.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{vecfun.remove} 4 | \alias{vecfun.remove} 5 | \title{Remove Custom Vectorized Functions} 6 | \usage{ 7 | vecfun.remove(vecfun_names) 8 | } 9 | \arguments{ 10 | \item{vecfun_names}{A character vector of function names that will be removed from the custom list} 11 | } 12 | \value{ 13 | An old vector of user-defined vectorized function names 14 | } 15 | \description{ 16 | Remove user-defined function names from a global list of custom vectorized functions. See vignette for more details. 17 | } 18 | -------------------------------------------------------------------------------- /man/Define_sVar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parserfunctions_R6.r 3 | \docType{class} 4 | \name{Define_sVar} 5 | \alias{Define_sVar} 6 | \title{Class for defining and evaluating user-specified summary measures (exprs_list)} 7 | \format{An R6 class object.} 8 | \usage{ 9 | Define_sVar 10 | } 11 | \description{ 12 | Evaluates and and stores arbitrary summary measure expressions. The expressions (exprs_list) are evaluated in the environment of the input data.frame. 13 | } 14 | \details{ 15 | Following fields are created during initialization 16 | \itemize{ 17 | \item{nodes} ... 18 | \item{subset_regs} ... 19 | \item{sA_nms} ... 20 | \item{sW_nms} ... 21 | \item{Kmax} ... 22 | } 23 | } 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /man/N.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/action_node_indexing.R 3 | \name{N} 4 | \alias{N} 5 | \title{Subsetting/Indexing \code{DAG} Nodes} 6 | \usage{ 7 | N(DAG) 8 | } 9 | \arguments{ 10 | \item{DAG}{A DAG object that was defined using functions \code{\link{node}} and \code{\link{set.DAG}}.} 11 | } 12 | \value{ 13 | returns a list of nodes that can be indexed as a typical named list "[[]]". 14 | } 15 | \description{ 16 | Subsetting/Indexing \code{DAG} Nodes 17 | } 18 | \examples{ 19 | 20 | D <- DAG.empty() 21 | D <- D + node(name="W1", distr="rbern", prob=plogis(-0.5)) 22 | D <- D + node(name="W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 23 | D <- set.DAG(D) 24 | #Returns all nodes from DAG D 25 | N(D) 26 | #Returns node W1 from DAG D 27 | N(D)["W1"] 28 | } 29 | -------------------------------------------------------------------------------- /man/rnet.gnp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/network_distributions.R 3 | \name{rnet.gnp} 4 | \alias{rnet.gnp} 5 | \title{Call \code{igraph::sample_gnp} to Generate Random Graph Object According to the G(n,p) Erdos-Renyi Model} 6 | \usage{ 7 | rnet.gnp(n, p) 8 | } 9 | \arguments{ 10 | \item{n}{Size of the network graph (number of nodes).} 11 | 12 | \item{p}{Same as \code{igraph::sample_gnp}: The probability for drawing an edge between two arbitrary vertices (G(n,p) graph).} 13 | } 14 | \value{ 15 | A matrix with n rows, each row lists the indices of friends connected to that particular observation. 16 | } 17 | \description{ 18 | Call \code{igraph::sample_gnp} and convert the output to \code{simcausal} network matrix. 19 | } 20 | \seealso{ 21 | \code{\link{rnet.gnm}} 22 | } 23 | -------------------------------------------------------------------------------- /man/rnet.gnm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/network_distributions.R 3 | \name{rnet.gnm} 4 | \alias{rnet.gnm} 5 | \title{Call \code{igraph::sample_gnm} to Generate Random Graph Object According to the G(n,m) Erdos-Renyi Model} 6 | \usage{ 7 | rnet.gnm(n, m_pn) 8 | } 9 | \arguments{ 10 | \item{n}{Size of the network graph (number of nodes).} 11 | 12 | \item{m_pn}{The total number of edges as a fraction of the sample size \code{n}.} 13 | } 14 | \value{ 15 | A matrix with n rows, each row lists the indices of friends connected to that particular observation. 16 | } 17 | \description{ 18 | Call \code{igraph::sample_gnm} and convert the output to \code{simcausal} network matrix. 19 | The parameter \code{m} of \code{igraph::sample_gnm} is derived from \code{n} and \code{m_pn} as \code{as.integer(m_pn*n)} 20 | } 21 | \seealso{ 22 | \code{\link{rnet.gnp}} 23 | } 24 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry(bibtype = "Article", 2 | title = "{simcausal} {R} Package: Conducting Transparent and Reproducible Simulation Studies of Causal Effect Estimation with Complex Longitudinal Data", 3 | author = c(person(given = "Oleg", 4 | family = "Sofrygin", 5 | email = "oleg.sofrygin@gmail.com"), 6 | person(given = c("Mark", "J."), 7 | family = "van der Laan", 8 | email = "laan@berkeley.edu"), 9 | person(given = "Romain", 10 | family = "Neugebauer", 11 | email = "Romain.S.Neugebauer@kp.org")), 12 | journal = "Journal of Statistical Software", 13 | year = "2017", 14 | volume = "81", 15 | number = "2", 16 | pages = "1--47", 17 | doi = "10.18637/jss.v081.i02", 18 | 19 | header = "To cite simcausal in publications use:" 20 | ) 21 | 22 | -------------------------------------------------------------------------------- /man/rbern.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distributions.R 3 | \name{rbern} 4 | \alias{rbern} 5 | \title{Random Sample from Bernoulli Distribution} 6 | \usage{ 7 | rbern(n, prob) 8 | } 9 | \arguments{ 10 | \item{n}{Sample size.} 11 | 12 | \item{prob}{A vector of success probabilities.} 13 | } 14 | \value{ 15 | Binary vector of length \code{n}. 16 | } 17 | \description{ 18 | Wrapper for Bernoulli node distribution. 19 | } 20 | \examples{ 21 | 22 | #--------------------------------------------------------------------------------------- 23 | # Specifying and simulating from a DAG with 3 Bernoulli nodes 24 | #--------------------------------------------------------------------------------------- 25 | D <- DAG.empty() 26 | D <- D + node("W1", distr="rbern", prob=0.05) 27 | D <- D + node("W2", distr="rbern", prob=ifelse(W1==1,0.5,0.1)) 28 | D <- D + node("W3", distr="rbern", prob=ifelse(W1==1,0.5,0.1)) 29 | Dset <- set.DAG(D) 30 | simdat <- sim(Dset, n=200, rndseed=1) 31 | } 32 | -------------------------------------------------------------------------------- /man/igraph.to.sparseAdjMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/network.R 3 | \name{igraph.to.sparseAdjMat} 4 | \alias{igraph.to.sparseAdjMat} 5 | \title{Convert igraph Network Object into Sparse Adjacency Matrix} 6 | \usage{ 7 | igraph.to.sparseAdjMat(igraph_network) 8 | } 9 | \arguments{ 10 | \item{igraph_network}{Network as an \code{igraph} object} 11 | } 12 | \value{ 13 | Sparase adjacency matrix returned by \code{igraph::as_adjacency_matrix} function. 14 | NOTE: for directed graphs the friend IDs pointing into vertex \code{i} are assumed to be listed in the column \code{i} 15 | (i.e, \code{which(adjmat[,i])} are friends of \code{i}). 16 | } 17 | \description{ 18 | Convert igraph network object into its sparse adjacency matrix representation using \code{as_adjacency_matrix} function from the \code{igraph} package. 19 | } 20 | \seealso{ 21 | \code{\link{network}}; \code{\link{sparseAdjMat.to.NetInd}}; \code{\link{NetInd.to.sparseAdjMat}}; \code{\link{sparseAdjMat.to.igraph}}; 22 | } 23 | -------------------------------------------------------------------------------- /man/parents.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{parents} 4 | \alias{parents} 5 | \title{Show Node Parents Given DAG Object} 6 | \usage{ 7 | parents(DAG, nodesChr) 8 | } 9 | \arguments{ 10 | \item{DAG}{A DAG object that was specified by calling \code{\link{set.DAG}}} 11 | 12 | \item{nodesChr}{A vector of node names that are already defined in DAG} 13 | } 14 | \value{ 15 | A list with parent names for each node name in nodesChr 16 | } 17 | \description{ 18 | Given a vector of node names, this function provides the name(s) of node parents that were obtained by parsing the node formulas. 19 | } 20 | \examples{ 21 | 22 | D <- DAG.empty() 23 | D <- D + node(name="W1", distr="rbern", prob=plogis(-0.5)) 24 | D <- D + node(name="W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 25 | D <- D + node(name="A", distr="rbern", prob=plogis(-0.5 - 0.3*W1 - 0.3*W2)) 26 | D <- D + node(name="Y", distr="rbern", prob=plogis(-0.1 + 1.2*A + 0.3*W1 + 0.3*W2), EFU=TRUE) 27 | D <- set.DAG(D) 28 | parents(D, c("W2", "A", "Y")) 29 | } 30 | -------------------------------------------------------------------------------- /man/vecfun.add.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{vecfun.add} 4 | \alias{vecfun.add} 5 | \title{Add Custom Vectorized Functions} 6 | \usage{ 7 | vecfun.add(vecfun_names) 8 | } 9 | \arguments{ 10 | \item{vecfun_names}{A character vector of function names that will be treated as "vectorized" by the node formula R parser} 11 | } 12 | \value{ 13 | An old vector of user-defined vectorized function names 14 | } 15 | \description{ 16 | Add user-defined function names to a global list of custom vectorized functions. 17 | The functions in \code{vecfun_names} are intended for use inside the node formulas. 18 | Adding functions to this list will generally greatly expedite the simulation run time. 19 | Any node formula calling a function on this list will be evaluated "as is", the function should 20 | be written to accept arguments as either vectors of length \code{n} or as matrices with \code{n} rows. 21 | Adding function to this list will effects simulation from all DAG objects that call this function. See vignette for more details. 22 | } 23 | -------------------------------------------------------------------------------- /man/add.nodes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/interface.r 3 | \name{add.nodes} 4 | \alias{add.nodes} 5 | \alias{+.DAG} 6 | \title{Adding Node(s) to DAG} 7 | \usage{ 8 | add.nodes(DAG, nodes) 9 | 10 | \method{+}{DAG}(obj1, obj2) 11 | } 12 | \arguments{ 13 | \item{DAG}{DAG object} 14 | 15 | \item{nodes}{A node or several nodes returned from a call to \code{node} function. If the node(s) under same name(s) already exist, the old node(s) get overwritten.} 16 | 17 | \item{obj1}{Object that belongs to either classes: \code{DAG}, \code{DAG.action} or \code{DAG.nodelist}.} 18 | 19 | \item{obj2}{Object that belongs to either classes: \code{DAG}, \code{DAG.action} or \code{DAG.nodelist}.} 20 | } 21 | \value{ 22 | An updated DAG object with new nodes 23 | } 24 | \description{ 25 | Adding nodes to a growing DAG object, as in \code{DAG + node()}. Use either syntax \code{DAG + node()} or \code{add.nodes(DAG = , nodes = node())}. Both give identical results, see the examples in the vignette and below for details. 26 | } 27 | \seealso{ 28 | \code{\link{node}} 29 | } 30 | -------------------------------------------------------------------------------- /man/A.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/action_node_indexing.R 3 | \name{A} 4 | \alias{A} 5 | \title{Subsetting/Indexing Actions Defined for \code{DAG} Object} 6 | \usage{ 7 | A(DAG) 8 | } 9 | \arguments{ 10 | \item{DAG}{A DAG object that was defined using functions \code{\link{node}}, \code{\link{set.DAG}} and \code{\link{action}}.} 11 | } 12 | \value{ 13 | returns a list of actions, which are intervened versions of the original observed data DAG. 14 | } 15 | \description{ 16 | Subsetting/Indexing Actions Defined for \code{DAG} Object 17 | } 18 | \examples{ 19 | 20 | D <- DAG.empty() 21 | D <- D + node(name="W1", distr="rbern", prob=plogis(-0.5)) 22 | D <- D + node(name="W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 23 | D <- D + node(name="A", distr="rbern", prob=plogis(-0.5 + 0.5*W1+ 0.5*W2)) 24 | D <- set.DAG(D) 25 | # Define two actions, acting on node "A" 26 | D <- D + action("A0", nodes=node("A", distr="rbern", prob=0)) 27 | D <- D + action("A1", nodes=node("A", distr="rbern", prob=1)) 28 | # Select both actions 29 | A(D) 30 | # Select action "A1" only 31 | A(D)["A1"] 32 | } 33 | -------------------------------------------------------------------------------- /tests/RUnit/RUnit_tests_00_wide_to_long.R: -------------------------------------------------------------------------------- 1 | 2 | test.long.wide.simobs <- function() { 3 | library("simcausal"); options(simcausal.verbose=FALSE) 4 | t_end <- 5 5 | D <- DAG.empty() + 6 | node("W", distr="rbern", prob=0.05) + 7 | node("L1", t=0:t_end, distr="rbern", prob=0.25) + 8 | node("L2", t=0:t_end, distr="rconst", const=L1[t] + W) + 9 | node("Y", t=0:t_end, distr="rbern", prob=plogis(-6.5 + L1[t] + 2*L2[t] + 0.05*sum(I(L2[0:t]==rep(0,(t+1))))), EFU=TRUE) 10 | Dset <- set.DAG(D) 11 | 12 | Odat1 <- sim(Dset, n=500, wide = TRUE, rndseed = 123) 13 | checkTrue(all.equal( 14 | names(Odat1), 15 | c("ID", "W", "L1_0", "L2_0", "Y_0", "L1_1", "L2_1", "Y_1", "L1_2", "L2_2", "Y_2", "L1_3", "L2_3", "Y_3", "L1_4", "L2_4", "Y_4", "L1_5", "L2_5", "Y_5") 16 | )) 17 | 18 | Odat1b <- simobs(Dset, n=500, wide = TRUE, rndseed = 123) 19 | checkTrue(all.equal(Odat1, Odat1b)) 20 | 21 | Odat1 <- sim(Dset, n=500, wide = FALSE, rndseed = 123) 22 | checkTrue(all.equal( 23 | names(Odat1), 24 | c("ID", "W", "t", "L1", "L2", "Y") 25 | )) 26 | 27 | Odat1b <- simobs(Dset, n=500, wide = FALSE, rndseed = 123) 28 | checkTrue(all.equal(Odat1, Odat1b)) 29 | } -------------------------------------------------------------------------------- /man/NetInd.to.sparseAdjMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/network.R 3 | \name{NetInd.to.sparseAdjMat} 4 | \alias{NetInd.to.sparseAdjMat} 5 | \title{Convert Network IDs Matrix into Sparse Adjacency Matrix} 6 | \usage{ 7 | NetInd.to.sparseAdjMat(NetInd_k, nF) 8 | } 9 | \arguments{ 10 | \item{NetInd_k}{Matrix of network IDs of dimension \code{(n=nrow(sparseAdjMat),Kmax)}, 11 | where each row \code{i} consists of the network IDs (row number of friends) of observation \code{i}. Remainders are filled with \code{NA}s.} 12 | 13 | \item{nF}{Integer vector of length \code{n} specifying the number of friends for each observation.} 14 | } 15 | \value{ 16 | Network represented as a sparse adjacency matrix (S4 class object \code{dgCMatrix} from package \code{Matrix}). 17 | NOTE: The friend IDs for observation \code{i} will be listed in column \code{i} 18 | (i.e, \code{which(sparseAdjMat[,i])} are friends of \code{i}). 19 | } 20 | \description{ 21 | Convert \code{simcausal} network ID matrix (\code{NetInd_k}) into a network represented by a sparse adjacency matrix. 22 | } 23 | \seealso{ 24 | \code{\link{network}}; \code{\link{sparseAdjMat.to.igraph}}; \code{\link{igraph.to.sparseAdjMat}}; \code{\link{sparseAdjMat.to.NetInd}}; 25 | } 26 | -------------------------------------------------------------------------------- /man/DF.to.long.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulation.r 3 | \name{DF.to.long} 4 | \alias{DF.to.long} 5 | \title{Convert Data from Wide to Long Format Using \code{reshape}} 6 | \usage{ 7 | DF.to.long(df_wide) 8 | } 9 | \arguments{ 10 | \item{df_wide}{A \code{data.frame} in wide format} 11 | } 12 | \value{ 13 | A \code{data.frame} object in long format 14 | } 15 | \description{ 16 | This utility function takes a simulated data.frame in wide format as an input and converts it into a long format (slower compared to \code{\link{DF.to.longDT}}). 17 | } 18 | \details{ 19 | Keeps all covariates that appear only once and at the first time-point constant (carry-forward). 20 | 21 | All covariates that appear fewer than range(t) times are imputed with NA for missing time-points. 22 | 23 | Observations with all NA's for all time-varying covariates are removed. 24 | 25 | When removing NA's the time-varying covariates that are attributes (attnames) are not considered. 26 | } 27 | \seealso{ 28 | \code{\link{DF.to.longDT}} - a faster version of \code{DF.to.long} that uses \code{data.table} package 29 | 30 | Other data manipulation functions: \code{\link{DF.to.longDT}}, 31 | \code{\link{doLTCF}} 32 | } 33 | \concept{data manipulation functions} 34 | -------------------------------------------------------------------------------- /man/plotSurvEst.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{plotSurvEst} 4 | \alias{plotSurvEst} 5 | \title{(EXPERIMENTAL) Plot Discrete Survival Function(s)} 6 | \usage{ 7 | plotSurvEst(surv = list(), xindx = NULL, ylab = "", xlab = "t", 8 | ylim = c(0, 1), legend.xyloc = "topright", ...) 9 | } 10 | \arguments{ 11 | \item{surv}{A list of vectors, each containing action-specific discrete survival probabilities over time.} 12 | 13 | \item{xindx}{A vector of indices for subsetting the survival vectors in \code{surv}, if omitted all survival probabilities in each \code{surv[[i]]} are plotted.} 14 | 15 | \item{ylab}{An optional title for y axis, passed to \code{\link{plot}}.} 16 | 17 | \item{xlab}{An optional title for x axis, passed to \code{\link{plot}}.} 18 | 19 | \item{ylim}{Optional y limits for the plot, passed to \code{\link{plot}}.} 20 | 21 | \item{legend.xyloc}{Optional x and y co-ordinates to be used to position the legend. 22 | Can be specified by keyword or as a named list with (x,y), uses the same convention as in \code{graphics::xy.coords}.} 23 | 24 | \item{...}{Additional arguments passed to \code{\link{plot}}.} 25 | } 26 | \description{ 27 | Plot discrete survival curves from a list of discrete survival probabilities by calling \code{\link{plot}} with \code{type='b'}. 28 | } 29 | -------------------------------------------------------------------------------- /man/sparseAdjMat.to.igraph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/network.R 3 | \name{sparseAdjMat.to.igraph} 4 | \alias{sparseAdjMat.to.igraph} 5 | \title{Convert Network from Sparse Adjacency Matrix into igraph Object} 6 | \usage{ 7 | sparseAdjMat.to.igraph(sparseAdjMat, mode = "directed") 8 | } 9 | \arguments{ 10 | \item{sparseAdjMat}{Network represented as a sparse adjacency matrix (S4 class object \code{dgCMatrix} from package \code{Matrix}). 11 | NOTE: for directed graphs the friend IDs pointing into vertex \code{i} are assumed to be listed in the column \code{i} 12 | (i.e, \code{which(sparseAdjMat[,i])} are friends of \code{i}).} 13 | 14 | \item{mode}{Character scalar, passed on to \code{igraph::graph_from_adjacency_matrix}, specifies how igraph should interpret the supplied matrix. 15 | See \code{?igraph::graph_from_adjacency_matrix} for details.} 16 | } 17 | \value{ 18 | A list containing the network object(s) of type \code{DAG.net}. 19 | } 20 | \description{ 21 | Uses \code{graph_from_adjacency_matrix} function from the \code{igraph} package to convert the network in sparse adjacency matrix format into \code{igraph} network object. 22 | } 23 | \seealso{ 24 | \code{\link{network}}; \code{\link{igraph.to.sparseAdjMat}}; \code{\link{sparseAdjMat.to.NetInd}}; \code{\link{NetInd.to.sparseAdjMat}}; 25 | } 26 | -------------------------------------------------------------------------------- /man/rnet.SmWorld.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/network_distributions.R 3 | \name{rnet.SmWorld} 4 | \alias{rnet.SmWorld} 5 | \title{Call \code{igraph::sample_smallworld} to Generate Random Graph Object from the Watts-Strogatz Small-World Model} 6 | \usage{ 7 | rnet.SmWorld(n, dim, nei, p) 8 | } 9 | \arguments{ 10 | \item{n}{Size of the network graph (the number of nodes).} 11 | 12 | \item{dim}{Same as in \code{igraph::sample_smallworld}: Integer constant, the dimension of the starting lattice.} 13 | 14 | \item{nei}{Same as in \code{igraph::sample_smallworld}: Integer constant, the neighborhood within which the vertices of the lattice will be connected.} 15 | 16 | \item{p}{Same as in \code{igraph::sample_smallworld}: Real constant between zero and one, the rewiring probability.} 17 | } 18 | \value{ 19 | A matrix with n rows, each row lists the indices of friends connected to that particular observation. 20 | } 21 | \description{ 22 | Call \code{igraph::sample_smallworld} and convert the output to \code{simcausal} network matrix. 23 | The parameters are the same as those of \code{igraph::sample_smallworld}. 24 | The loop edges aren't allowed (\code{loops = FALSE}) and the multiple edges aren't allowed either \code{multiple = FALSE}. 25 | } 26 | \seealso{ 27 | \code{\link{rnet.gnp}}, \code{\link{rnet.gnm}} 28 | } 29 | -------------------------------------------------------------------------------- /man/DF.to.longDT.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulation.r 3 | \name{DF.to.longDT} 4 | \alias{DF.to.longDT} 5 | \title{Faster Conversion of Data from Wide to Long Format Using \code{dcast.data.table}} 6 | \usage{ 7 | DF.to.longDT(df_wide, return_DF = TRUE) 8 | } 9 | \arguments{ 10 | \item{df_wide}{A \code{data.frame} or \code{data.table} in wide format} 11 | 12 | \item{return_DF}{\code{TRUE} (default) to return a \code{data.frame}, \code{FALSE} returns a \code{data.table}} 13 | } 14 | \value{ 15 | A \code{data.frame} in long format 16 | } 17 | \description{ 18 | Faster utility function for converting wide-format \code{data.frame} into a long format. 19 | Internally uses \pkg{data.table} package functions \code{melt.data.table} and \code{dcast.data.table}. 20 | } 21 | \details{ 22 | Keeps all covariates that appear only once and at the first time-point constant (carry-forward). 23 | 24 | All covariates that appear fewer than range(t) times are imputed with NA for missing time-points. 25 | 26 | Observations with all NA's for all time-varying covariates are removed. 27 | 28 | When removing NA's the time-varying covariates that are attributes (attnames) are not considered. 29 | } 30 | \seealso{ 31 | Other data manipulation functions: \code{\link{DF.to.long}}, 32 | \code{\link{doLTCF}} 33 | } 34 | \concept{data manipulation functions} 35 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | # Runs when attached to search() path such as by library() or require() 2 | .onAttach <- function(...) { 3 | if (interactive()) { 4 | packageStartupMessage('simcausal') 5 | packageStartupMessage('Version: ', utils::packageDescription('simcausal')$Version) 6 | packageStartupMessage('Package created on ', utils::packageDescription('simcausal')$Date, '\n') 7 | packageStartupMessage('Please note this package is still in its early stages of development. Check for updates and report bugs at http://github.com/osofr/simcausal.', '\n') 8 | packageStartupMessage('To see the vignette use vignette("simcausal_vignette", package="simcausal"). To see all available package documentation use help(package = "simcausal") and ?simcausal.', '\n') 9 | packageStartupMessage('To see the latest updates for this version, use news(package = "simcausal").', '\n') 10 | } 11 | } 12 | 13 | # Runs when loaded but not attached to search() path; e.g., when a package just Imports (not Depends on) simcausal 14 | .onLoad <- function(libname, pkgname) { 15 | # Set simcausal package options, # simcausal. 16 | opts = c("simcausal.verbose"="TRUE" 17 | ) 18 | for (i in setdiff(names(opts),names(options()))) { 19 | eval(parse(text=paste("options(",i,"=",opts[i],")",sep=""))) 20 | } 21 | invisible() 22 | } 23 | 24 | # .onUnload <- function(libpath) { 25 | # } -------------------------------------------------------------------------------- /tests/RUnit/RUnit_tests_03_NSE_bug.R: -------------------------------------------------------------------------------- 1 | # library("simcausal") 2 | 3 | test.NSEbug <- function() { 4 | 5 | lookup_parameter <- function(pp, node, s, t, variable) { 6 | print("s"); print(s) 7 | print("t"); print(t) 8 | print("variable"); print(variable) 9 | pp["t" == t & "s" == s,] 10 | pp[pp[,"t"] %in% 0 & pp[,"s"] %in% 1,"param"] 11 | } 12 | 13 | tFUN <- function(s, t, SS) { 14 | s*t 15 | } 16 | 17 | pp <- 'name' 18 | t_start <- 0 19 | TT <- 2 20 | s_start <- 1 21 | SS <- 3 22 | 23 | pp <- data.frame(t = t_start:TT, s = s_start:SS, param = runif(3)) 24 | 25 | # vecfun.add("lookup_parameter") 26 | 27 | D <- DAG.empty() 28 | 29 | for(t in t_start:TT ){ # Per month 30 | for(s in s_start:SS ){ # Per location 31 | 32 | tpos <- tFUN(s, t, SS) 33 | ## Initial month values ## 34 | 35 | # if(s == 1 & t == 0){ 36 | D <- D + node('L1', 37 | distr = 'rnorm', 38 | mean = .(lookup_parameter(pp, node = 'L1', s = s, t = t, variable = '(Intercept)') ), 39 | # mean = lookup_parameter(pp, node = 'L1', s = .(s), t = t, variable = '(Intercept)'), 40 | sd = .(lookup_parameter(pp, node = 'L1', s = s, t = t, variable = 'mse') ), 41 | # sd = lookup_parameter(pp, node = 'L1', s = .(s), t = t, variable = 'mse'), 42 | t = tpos) 43 | # } 44 | } 45 | } 46 | D <- set.DAG(D) 47 | 48 | } 49 | -------------------------------------------------------------------------------- /man/rconst.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distributions.R 3 | \name{rconst} 4 | \alias{rconst} 5 | \title{Constant (Degenerate) Distribution (Returns its Own Argument \code{const})} 6 | \usage{ 7 | rconst(n, const) 8 | } 9 | \arguments{ 10 | \item{n}{Sample size.} 11 | 12 | \item{const}{Either a vector with one constant value (replicated \code{n} times) 13 | or a vector of length \code{n} or a matrix with \code{n} rows (for a multivariate node).} 14 | } 15 | \value{ 16 | A vector of constants of length \code{n}. 17 | } 18 | \description{ 19 | Wrapper for constant value (degenerate) distribution. 20 | } 21 | \examples{ 22 | 23 | #--------------------------------------------------------------------------------------- 24 | # Specifying and simulating from a DAG with 1 Bernoulli and 2 constant nodes 25 | #--------------------------------------------------------------------------------------- 26 | D <- DAG.empty() 27 | D <- D + node("W1", distr = "rbern", prob = 0.45) 28 | D <- D + node("W2", distr = "rconst", const = 1) 29 | D <- D + node("W3", distr = "rconst", const = ifelse(W1 == 1, 5, 10)) 30 | 31 | # TWO equivalent ways of creating a multivariate node (just repeating W1 and W2): 32 | create_mat <- function(W1, W2) cbind(W1, W2) 33 | vecfun.add("create_mat") 34 | 35 | D <- D + node(c("W1.copy1", "W2.copy1"), distr = "rconst", const = c(W1, W2)) 36 | D <- D + node(c("W1.copy2", "W2.copy2"), distr = "rconst", const = create_mat(W1, W2)) 37 | Dset <- set.DAG(D) 38 | sim(Dset, n=10, rndseed=1) 39 | } 40 | -------------------------------------------------------------------------------- /man/rdistr.template.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distributions.R 3 | \name{rdistr.template} 4 | \alias{rdistr.template} 5 | \title{Template for Writing Custom Distribution Functions} 6 | \usage{ 7 | rdistr.template(n, arg1, arg2, ...) 8 | } 9 | \arguments{ 10 | \item{n}{Sample size that needs to be generated} 11 | 12 | \item{arg1}{Argument 2} 13 | 14 | \item{arg2}{Argument 1} 15 | 16 | \item{...}{Additional optional parameters} 17 | } 18 | \value{ 19 | A vector of length \code{n} 20 | } 21 | \description{ 22 | Template function for writing \code{SimCausal} custom distribution wrappers. 23 | } 24 | \details{ 25 | One of the named arguments must be 'n', this argument is passed on to the function automatically by 26 | the package and is assigned to the number of samples that needs to be generated from this distribution. 27 | Other arguments (in this example arg1 and arg2) must be declared by the user as 28 | arguments inside the node() function that uses this distribution, 29 | e.g., \code{node("Node1"}, \code{distr="distr.template"}, \code{arg1 = ...}, \code{arg2 = ...)}. 30 | Both, arg1 and arg2, can be either numeric constants or formulas involving past node names. 31 | The constants get passed on to the distribution function unchanged. 32 | The formulas are evaluated inside the environment of the simulated data and are passed on to the 33 | distribution functions as vectors. 34 | The output of the distribution function is expected to be a vector of length n of the sampled covariates. 35 | } 36 | -------------------------------------------------------------------------------- /tests/examples/sim.impute.examples12.R: -------------------------------------------------------------------------------- 1 | t_end <- 10 2 | lDAG <- DAG.empty() 3 | lDAG <- lDAG + 4 | node(name = "L2", t = 0, distr = "rconst", const = 0) + 5 | node(name = "A1", t = 0, distr = "rconst", const = 0) + 6 | node(name = "L2", t = 1:t_end, distr = "rbern", 7 | prob = ifelse(A1[t - 1] == 1, 0.1, 8 | ifelse(L2[t-1] == 1, 0.9, 9 | min(1,0.1 + t/t_end)))) + 10 | node(name = "A1", t = 1:t_end, distr = "rbern", 11 | prob = ifelse(A1[t - 1] == 1, 1, 12 | ifelse(L2[0] == 0, 0.3, 13 | ifelse(L2[0] == 0, 0.1, 14 | ifelse(L2[0] == 1, 0.7, 0.5))))) + 15 | node(name = "Y", t = 1:t_end, distr = "rbern", 16 | prob = plogis(-6.5 + 4 * L2[t] + 0.05 * sum(I(L2[0:t] == rep(0,(t + 1))))), 17 | EFU = TRUE) 18 | lDAG <- set.DAG(lDAG) 19 | #--------------------------------------------------------------------------------------- 20 | # EXAMPLE 1. No forward imputation. 21 | #--------------------------------------------------------------------------------------- 22 | Odat.wide <- sim(DAG = lDAG, n = 1000, rndseed = 123) 23 | Odat.wide[c(21,47), 1:18] 24 | Odat.wideLTCF <- sim(DAG = lDAG, n = 1000, LTCF = "Y", rndseed = 123) 25 | Odat.wideLTCF[c(21,47), 1:18] 26 | #--------------------------------------------------------------------------------------- 27 | # EXAMPLE 2. With forward imputation. 28 | #--------------------------------------------------------------------------------------- 29 | Odat.wideLTCF2 <- doLTCF(data = Odat.wide, LTCF = "Y") 30 | Odat.wideLTCF2[c(21,47), 1:18] 31 | # all.equal(Odat.wideLTCF, Odat.wideLTCF2) 32 | -------------------------------------------------------------------------------- /man/rcat.b1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distributions.R 3 | \name{rcategor.int} 4 | \alias{rcategor.int} 5 | \alias{rcat.b1} 6 | \alias{rcat.b0} 7 | \title{Random Sample from Base 1 (rcat.b1) or Base 0 (rcat.b0) Categorical (Integer) Distribution} 8 | \usage{ 9 | rcategor.int(n, probs) 10 | 11 | rcat.b1(n, probs) 12 | 13 | rcat.b0(n, probs) 14 | } 15 | \arguments{ 16 | \item{n}{Sample size.} 17 | 18 | \item{probs}{Either a vector or a matrix of success probabilities. 19 | When probs is a vector, \code{n} identically distributed random categorical variables are 20 | generated. 21 | When \code{probs} is a matrix, the categorical probabilities of the \code{k}th 22 | sample are determined by the \code{k}th row of probs matrix, i.e., \code{probs[k,]}.} 23 | } 24 | \value{ 25 | An integer vector of length \code{n} with range either in \code{0,...,ncol(probs)} or in \code{1,...,ncol(probs)+1}. 26 | } 27 | \description{ 28 | Same as \code{}, but returning a vector of sampled integers with range 1, 2, ..., \code{ncol(probs)+1} for \code{rcat.b1} 29 | or range 0, 1, ..., \code{ncol(probs)} for \code{rcat.b0}. For sampling categorical factors see \link{rcat.factor}. 30 | } 31 | \section{Functions}{ 32 | \itemize{ 33 | \item \code{rcategor.int}: (Deperecated) Random Sample from Base 1 Categorical (Integer) Distribution 34 | 35 | \item \code{rcat.b1}: Random Sample from Base 1 Categorical (Integer) Distribution 36 | 37 | \item \code{rcat.b0}: Random Sample from Base 0 Categorical (Integer) Distribution 38 | }} 39 | 40 | \seealso{ 41 | \code{\link{rcat.factor}} 42 | } 43 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: simcausal 2 | Type: Package 3 | Version: 0.5.5 4 | Title: Simulating Longitudinal Data with Causal Inference Applications 5 | Authors@R: c( 6 | person("Oleg", "Sofrygin", role=c("aut", "cre"), email="oleg.sofrygin@gmail.com"), 7 | person(c("Mark", "J."), "van der Laan", role="aut", email="laan@berkeley.edu"), 8 | person("Romain", "Neugebauer", role="aut", email="Romain.S.Neugebauer@kp.org")) 9 | Description: A flexible tool for simulating complex longitudinal data using 10 | structural equations, with emphasis on problems in causal inference. 11 | Specify interventions and simulate from intervened data generating 12 | distributions. Define and evaluate treatment-specific means, the average 13 | treatment effects and coefficients from working marginal structural models. 14 | User interface designed to facilitate the conduct of transparent and 15 | reproducible simulation studies, and allows concise expression of complex 16 | functional dependencies for a large number of time-varying nodes. See the 17 | package vignette for more information, documentation and examples. 18 | URL: https://github.com/osofr/simcausal 19 | BugReports: https://github.com/osofr/simcausal/issues 20 | Depends: 21 | R (>= 3.2.0) 22 | Imports: 23 | data.table, 24 | igraph, 25 | stringr, 26 | R6, 27 | assertthat, 28 | Matrix, 29 | methods 30 | Suggests: 31 | copula, 32 | RUnit, 33 | ltmle, 34 | knitr, 35 | ggplot2, 36 | Hmisc, 37 | mvtnorm, 38 | bindata 39 | VignetteBuilder: knitr 40 | License: GPL-2 41 | RoxygenNote: 6.1.1 42 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method("+",DAG) 4 | S3method(print,DAG) 5 | S3method(print,DAG.action) 6 | S3method(print,DAG.node) 7 | export(A) 8 | export(DAG.empty) 9 | export(DF.to.long) 10 | export(DF.to.longDT) 11 | export(N) 12 | export(NetInd.to.sparseAdjMat) 13 | export(NetIndClass) 14 | export(action) 15 | export(add.action) 16 | export(add.nodes) 17 | export(distr.list) 18 | export(doLTCF) 19 | export(eval.target) 20 | export(igraph.to.sparseAdjMat) 21 | export(net.list) 22 | export(network) 23 | export(node) 24 | export(parents) 25 | export(plotDAG) 26 | export(plotSurvEst) 27 | export(rbern) 28 | export(rcat.b0) 29 | export(rcat.b1) 30 | export(rcat.factor) 31 | export(rcategor) 32 | export(rcategor.int) 33 | export(rconst) 34 | export(rdistr.template) 35 | export(rnet.SmWorld) 36 | export(rnet.gnm) 37 | export(rnet.gnp) 38 | export(set.DAG) 39 | export(set.targetE) 40 | export(set.targetMSM) 41 | export(sim) 42 | export(simfull) 43 | export(simobs) 44 | export(sparseAdjMat.to.NetInd) 45 | export(sparseAdjMat.to.igraph) 46 | export(vecfun.add) 47 | export(vecfun.all.print) 48 | export(vecfun.print) 49 | export(vecfun.remove) 50 | export(vecfun.reset) 51 | import(data.table) 52 | import(igraph) 53 | importFrom(R6,R6Class) 54 | importFrom(assertthat,assert_that) 55 | importFrom(assertthat,is.count) 56 | importFrom(assertthat,is.string) 57 | importFrom(graphics,legend) 58 | importFrom(graphics,par) 59 | importFrom(graphics,plot) 60 | importFrom(methods,is) 61 | importFrom(stats,as.formula) 62 | importFrom(stats,glm) 63 | importFrom(stats,na.exclude) 64 | importFrom(stats,rbinom) 65 | importFrom(stats,reshape) 66 | importFrom(stats,rnorm) 67 | importFrom(stats,runif) 68 | importFrom(stats,setNames) 69 | importFrom(stats,terms.formula) 70 | importFrom(utils,getFromNamespace) 71 | importFrom(utils,head) 72 | importFrom(utils,str) 73 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------- 2 | # Travis-CI configuration for R packages 3 | # 4 | # REFERENCES: 5 | # * Travis CI: https://travis-ci.org/ 6 | # * native R support: https://github.com/craigcitro/r-travis 7 | # * examples: https://github.com/craigcitro/r-travis/wiki/Porting-to-native-R-support-in-Travis 8 | # * r-builder: https://github.com/metacran/r-builder 9 | # * covr: https://github.com/jimhester/covr 10 | # * Coveralls: https://coveralls.io/ 11 | # 12 | # Validate your .travis.yml file at http://lint.travis-ci.org/ 13 | #---------------------------------------------------------------- 14 | # sudo: required 15 | # r_binary_packages: 16 | # - knitr 17 | # - data.table 18 | # - RUnit 19 | # - igraph 20 | # - ggplot2 21 | # - Hmisc 22 | # - mvtnorm 23 | # - copula 24 | # - bindata 25 | # r_packages: 26 | # - copula 27 | # - libgsl0-dev 28 | # - gsl-bin 29 | 30 | language: r 31 | # sudo: false 32 | sudo: required 33 | cache: packages 34 | warnings_are_errors: true 35 | # r_build_args: "--compact-vignettes" 36 | # r_check_args: "--as-cran" 37 | # r_build_args: "--no-build-vignettes --no-manual" 38 | r_check_args: "--as-cran" 39 | 40 | before_install: 41 | Rscript -e 'update.packages(ask = FALSE)' 42 | - export RGL_USE_NULL=TRUE 43 | 44 | r_packages: 45 | - covr 46 | - copula 47 | 48 | r: 49 | - oldrel 50 | - release 51 | - devel 52 | 53 | r_github_packages: 54 | - jimhester/covr 55 | 56 | addons: 57 | apt: 58 | packages: 59 | - libgsl0-dev 60 | - libicu-dev 61 | - pandoc 62 | 63 | env: 64 | global: 65 | - CRAN: http://cran.rstudio.com 66 | - RGL_USE_NULL=TRUE 67 | 68 | after_success: 69 | - Rscript -e 'covr::coveralls()' 70 | 71 | on_failure: 72 | - ./travis-tool.sh dump_logs 73 | 74 | notifications: 75 | email: 76 | on_success: change 77 | on_failure: change 78 | 79 | -------------------------------------------------------------------------------- /man/sparseAdjMat.to.NetInd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/network.R 3 | \name{sparseAdjMat.to.NetInd} 4 | \alias{sparseAdjMat.to.NetInd} 5 | \title{Convert Network from Sparse Adjacency Matrix into Network IDs Matrix} 6 | \usage{ 7 | sparseAdjMat.to.NetInd(sparseAdjMat, trimKmax) 8 | } 9 | \arguments{ 10 | \item{sparseAdjMat}{Network represented as a sparse adjacency matrix (S4 class object \code{dgCMatrix} from package \code{Matrix}). 11 | NOTE: The friends (row numbers) of observation \code{i} are assumed to be listed in column \code{i} 12 | (i.e, \code{which(sparseAdjMat[,i])} are friends of \code{i}).} 13 | 14 | \item{trimKmax}{Trim the maximum number of friends to this integer value. If this argument is not missing, 15 | the conversion network matrix obtained from \code{sparseAdjMat} will be trimmed, so that each observation has at most \code{trimKmax} friends. 16 | The trimming initiates from the last column of the network ID matrix, removing columns until only \code{trimKmax} columns are left.} 17 | } 18 | \value{ 19 | A named list with 3 items: 1) \code{NetInd_k}; 2) \code{nF}; and 3) \code{Kmax}. 20 | 1) \code{NetInd_k} - matrix of network IDs of dimension \code{(n=nrow(sparseAdjMat),Kmax)}, where each row \code{i} consists of the network IDs (friends) for observation \code{i}. 21 | Remainders are filled with NAs. 22 | 2) \code{nF} - integer vector of length \code{n} specifying the number of friends for each observation. 23 | 3) \code{Kmax} - integer constant specifying the maximum observed number of friends in input \code{sparseAdjMat} (this is the column dimension for the output matrix \code{NetInd_k}). 24 | } 25 | \description{ 26 | Convert network represented by a sparse adjacency matrix into \code{simcausal} network IDs matrix (\code{NetInd_k}). 27 | } 28 | \seealso{ 29 | \code{\link{network}}; \code{\link{NetInd.to.sparseAdjMat}}; \code{\link{sparseAdjMat.to.igraph}}; \code{\link{igraph.to.sparseAdjMat}}; 30 | } 31 | -------------------------------------------------------------------------------- /R/DAG_node_R6.R: -------------------------------------------------------------------------------- 1 | #' @importFrom R6 R6Class 2 | #' @importFrom assertthat assert_that 3 | Node_Class <- R6Class("Node_Class", 4 | class = TRUE, 5 | portable = TRUE, 6 | public = list( 7 | name = character(), 8 | t = NULL, 9 | distr = character(), 10 | dist_params = list(), 11 | Kmax = NULL, 12 | order = NULL, 13 | EFU = NULL, 14 | 15 | # capture the user environment; user.env is used when eval'ing sVar exprs (enclos = user.env) 16 | initialize = function(user.env, netind_cl) { 17 | self$user.env <- user.env 18 | self$netind_cl <- netind_cl 19 | self$Kmax <- self$netind_cl$Kmax 20 | invisible(self) 21 | }, 22 | 23 | newfun = function(data.df) { # list of variable names from data.df with special var name (ANCHOR_ALLVARNMS_VECTOR_0) 24 | return(list(ANCHOR_ALLVARNMS_VECTOR_0 = colnames(data.df))) 25 | } 26 | ), 27 | 28 | active = list( 29 | placeholder = function() {} 30 | ), 31 | 32 | private = list( 33 | privplaceholder = function() {} 34 | ) 35 | ) 36 | 37 | 38 | #' @importFrom R6 R6Class 39 | #' @importFrom assertthat assert_that 40 | DAG_Class <- R6Class("DAG_Class", 41 | class = TRUE, 42 | portable = TRUE, 43 | public = list( 44 | user.env = emptyenv(), # user environment to be used as enclos arg to eval(sVar) 45 | # Kmax = NULL, 46 | Nsamp = NULL, # sample size (nrows) of the simulation dataset 47 | # capture the user environment; user.env is used when eval'ing sVar exprs (enclos = user.env) 48 | initialize = function(user.env, netind_cl) { 49 | self$user.env <- user.env 50 | self$netind_cl <- netind_cl 51 | self$Kmax <- self$netind_cl$Kmax 52 | invisible(self) 53 | }, 54 | 55 | 56 | newfun = function(data.df) { # list of variable names from data.df with special var name (ANCHOR_ALLVARNMS_VECTOR_0) 57 | return(list(ANCHOR_ALLVARNMS_VECTOR_0 = colnames(data.df))) 58 | } 59 | ), 60 | 61 | active = list( 62 | placeholder = function() {} 63 | ), 64 | 65 | private = list( 66 | privplaceholder = function() {} 67 | ) 68 | ) 69 | 70 | -------------------------------------------------------------------------------- /man/NetIndClass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/network.R 3 | \docType{class} 4 | \name{NetIndClass} 5 | \alias{NetIndClass} 6 | \title{R6 class for creating and storing a friend matrix (network IDs) for network data} 7 | \format{An \code{\link{R6Class}} generator object} 8 | \usage{ 9 | NetIndClass 10 | } 11 | \description{ 12 | This R6 class defines fields and methods for creating and storing \code{NetInd_k}, 13 | a matrix of friend indices (network IDs) of \code{dim = (nobs x Kmax)}. 14 | } 15 | \details{ 16 | \itemize{ 17 | \item{NetInd} - Matrix of friend indices (network IDs) of \code{dim = (nobs x Kmax)} (Active Binding). 18 | \item{nF} - Vector of integers, where \code{nF[i]} is the integer number of friends (0 to \code{Kmax}) for observation \code{i}. 19 | \item{nobs} - Number of observations 20 | \item{Kmax} - Maximum number of friends for any observation. 21 | } 22 | } 23 | \section{Methods}{ 24 | 25 | \describe{ 26 | \item{\code{new(nobs, Kmax = 1)}}{Uses \code{nobs} and \code{Kmax} to instantiate an object of R6 class and pre-allocate memory 27 | for the future network ID matrix.} 28 | \item{\code{makeNetInd.fromIDs(Net_str, IDs_str = NULL, sep = ' ')}}{Build the matrix of network IDs (\code{NetInd_k}) from IDs string vector, 29 | all friends of one observation \code{i} are located in a string Net_str[i], with two distinct friend IDs of \code{i} 30 | separated by character \code{sep}. If \code{IDs_str} is NULL it is assumed that the friends in Net_str are 31 | actual row numbers in \code{1:nobs}, otherwise IDs from Net_str will be used for looking up the observation row numbers in \code{IDs_str}.} 32 | \item{\code{make.nF(NetInd_k = self$NetInd_k, nobs = self$nobs, Kmax = self$Kmax)}}{This method calculates the integer number of 33 | friends for each row of the network ID matrix (\code{self$NetInd_k}). The result is assigned to a field \code{self$nF} and 34 | is returned invisibly.} 35 | \item{\code{mat.nF(nFnode)}}{\code{nFnode} - the character name for the number of friends variable that is assigned as a column 36 | name to a single column matrix in \code{self$nF}.} 37 | } 38 | } 39 | 40 | \keyword{R6} 41 | \keyword{class} 42 | -------------------------------------------------------------------------------- /man/eval.target.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/targetparam.r 3 | \name{eval.target} 4 | \alias{eval.target} 5 | \title{Evaluate the True Value of the Causal Target Parameter} 6 | \usage{ 7 | eval.target(DAG, n, data, actions, rndseed = NULL, 8 | verbose = getOption("simcausal.verbose")) 9 | } 10 | \arguments{ 11 | \item{DAG}{DAG object with target parameter set via \code{set.targetE} or \code{set.targetMSM} functions} 12 | 13 | \item{n}{Number of observations to simulate (if simulating full data), this is overwritten by the number of observations in each data} 14 | 15 | \item{data}{List of action-specific \code{data.frames} generated with \code{sim} or \code{simfull}} 16 | 17 | \item{actions}{Character vector of action names which play the role of the data generating mechanism for simulated data when argument \code{data} is missing. Alternatively, \code{actions} can be a list of action DAGs pre-selected with \code{A(DAG)} function. When this argument is missing, full data is automatically sampled from all available actions in the \code{DAG} argument.} 18 | 19 | \item{rndseed}{Seed for the random number generator.} 20 | 21 | \item{verbose}{Set to \code{TRUE} to print messages on status and information to the console. 22 | Turn this off by default using options(simcausal.verbose=FALSE).} 23 | } 24 | \value{ 25 | For targetE returns a vector of counterfactual means, ATE or ATR; for targetMSM returns a named list with the MSM model fit (\code{"msm"}), 26 | MSM model coefficients (\code{"coef"}), the mapping of the MSM summary terms \code{S()} to the actual variable names used in the data, (\code{"S.msm.map"}), 27 | and the long format full data that was used for fitting this MSM \code{"df_long"}. 28 | } 29 | \description{ 30 | This function estimates the true value of the previously set target parameter (\code{set.targetE} or \code{set.targetMSM}) using the DAG object and either 1) \code{data}: list of action-specific simulated \code{data.frames}; or 2) \code{actions}; or 3) when \code{data} and \code{actions} are missing, using all distinct actions previously defined on the \code{DAG} object. 31 | } 32 | \details{ 33 | For examples and additional details see documentation for \code{\link{set.targetE}} or \code{\link{set.targetMSM}} 34 | } 35 | -------------------------------------------------------------------------------- /man/plotDAG.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal.r 3 | \name{plotDAG} 4 | \alias{plotDAG} 5 | \title{Plot DAG} 6 | \usage{ 7 | plotDAG(DAG, tmax = NULL, xjitter, yjitter, node.action.color, 8 | vertex_attrs = list(), edge_attrs = list(), excludeattrs, 9 | customvlabs, verbose = getOption("simcausal.verbose")) 10 | } 11 | \arguments{ 12 | \item{DAG}{A DAG object that was specified by calling \code{\link{set.DAG}}} 13 | 14 | \item{tmax}{Maximum time-point to plot for time-varying DAG objects} 15 | 16 | \item{xjitter}{Amount of random jitter for node x-axis plotting coordinates} 17 | 18 | \item{yjitter}{Amount of random jitter for node y-axis plotting coordinates} 19 | 20 | \item{node.action.color}{Color of the action node labels (only for action DAG of class DAG.action). If missing, defaults to red.} 21 | 22 | \item{vertex_attrs}{A named list of \code{igraph} graphical parameters for plotting DAG vertices. These parameters are passed on to \code{add.vertices} \code{igraph} function.} 23 | 24 | \item{edge_attrs}{A named list of \code{igraph} graphical parameters for plotting DAG edges. These parameters are passed on to \code{add.edges} \code{igraph} function.} 25 | 26 | \item{excludeattrs}{A character vector for DAG nodes that should be excluded from the plot} 27 | 28 | \item{customvlabs}{A named vector of custom DAG node labels (replaces node names from the DAG object).} 29 | 30 | \item{verbose}{Set to \code{TRUE} to print messages on status and information to the console. 31 | Turn this off by default using options(simcausal.verbose=FALSE).} 32 | } 33 | \description{ 34 | Plot DAG object using functions from \code{igraph} package. 35 | The default setting is to keep the regular (observed) DAG nodes with \code{shape} set to "none", which can be over-ridden by the user. 36 | For latent (hidden) DAG nodes the default is to: 37 | 1) set the node color as grey; 38 | 2) enclose the node by a circle; and 39 | 3) all directed edges coming out of the latent node are plotted as dashed. 40 | } 41 | \references{ 42 | Sofrygin O, van der Laan MJ, Neugebauer R (2017). 43 | "simcausal R Package: Conducting Transparent and Reproducible Simulation Studies of Causal Effect Estimation with Complex Longitudinal Data." 44 | Journal of Statistical Software, 81(2), 1-47. doi: 10.18637/jss.v081.i02. 45 | } 46 | -------------------------------------------------------------------------------- /man/simobs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulation.r 3 | \name{simobs} 4 | \alias{simobs} 5 | \title{Simulate Observed Data} 6 | \usage{ 7 | simobs(DAG, n, wide = TRUE, LTCF = NULL, rndseed = NULL, 8 | rndseed.reset.node = NULL, verbose = getOption("simcausal.verbose")) 9 | } 10 | \arguments{ 11 | \item{DAG}{A DAG objects that has been locked with set.DAG(DAG). Observed data from this DAG will be simulated.} 12 | 13 | \item{n}{Number of observations to sample.} 14 | 15 | \item{wide}{A logical, if TRUE the output data is generated in wide format, if FALSE, the output longitudinal data in generated in long format} 16 | 17 | \item{LTCF}{If forward imputation is desired for the missing variable values, this argument should be set to the name of the node that indicates the end of follow-up event. See the vignette, \code{\link{sim}} and \code{\link{doLTCF}} for additional details.} 18 | 19 | \item{rndseed}{Seed for the random number generator.} 20 | 21 | \item{rndseed.reset.node}{When \code{rndseed} is specified, use this argument to specify the name of the \code{DAG} node at which the random number generator seed is reset back to \code{NULL} (simulation function will call \code{set.seed(NULL)}). 22 | Can be useful if one wishes to simulate data using the set seed \code{rndseed} only for the first K nodes of the DAG and use an entirely random sample when simulating the rest of the nodes starting at K+1 and on. 23 | The name of such (K+1)th order \code{DAG} node should be then specified with this argument.} 24 | 25 | \item{verbose}{Set to \code{TRUE} to print messages on status and information to the console. 26 | Turn this off by default using options(simcausal.verbose=FALSE).} 27 | } 28 | \value{ 29 | A \code{data.frame} where each column is sampled from the conditional distribution specified by the corresponding \code{DAG} object node. 30 | } 31 | \description{ 32 | This function simulates observed data from a DAG object. 33 | } 34 | \seealso{ 35 | \code{\link{simfull}} - a wrapper function for simulating full data only; \code{\link{sim}} - a wrapper function for simulating both types of data; \code{\link{doLTCF}} for forward imputation of the missing values in already simulating data; \code{\link{DF.to.long}}, \code{\link{DF.to.longDT}} - converting longitudinal data from wide to long formats. 36 | 37 | Other simulation functions: \code{\link{simfull}}, 38 | \code{\link{sim}} 39 | } 40 | \concept{simulation functions} 41 | -------------------------------------------------------------------------------- /man/rcat.factor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distributions.R 3 | \name{rcat.factor} 4 | \alias{rcat.factor} 5 | \alias{rcategor} 6 | \title{Random Sample for a Categorical Factor} 7 | \usage{ 8 | rcat.factor(n, probs) 9 | 10 | rcategor(n, probs) 11 | } 12 | \arguments{ 13 | \item{n}{Sample size.} 14 | 15 | \item{probs}{Either a vector or a matrix of success probabilities. 16 | When \code{probs} is a vector, \code{n} identically distributed random categorical variables 17 | are generated with categories: 1, 2, ..., length(probs)+1. 18 | When \code{probs} is a matrix, the categorical probabilities of the \code{k}th sample are determined by the 19 | \code{k}th row of \code{probs} matrix, i.e., \code{probs[k,]}.} 20 | } 21 | \value{ 22 | A factor of length \code{n} with levels: \code{1,2, ...,ncol(probs)+1}. 23 | } 24 | \description{ 25 | Matrix version of the categorical distribution. The argument \code{probs} can be a matrix of n rows, 26 | specifying individual (varying in sample) categorical probabilities. 27 | The number of categories generated is equal to \code{ncol(probs)+1}, the levels labeled as: \code{1,...,ncol(probs)+1}. 28 | } 29 | \section{Functions}{ 30 | \itemize{ 31 | \item \code{rcategor}: (Deperecated) Random Sample of a Categorical Factor 32 | }} 33 | 34 | \examples{ 35 | 36 | #--------------------------------------------------------------------------------------- 37 | # Specifying and simulating from a DAG with one categorical node with constant 38 | # probabilities 39 | #--------------------------------------------------------------------------------------- 40 | D <- DAG.empty() 41 | D <- D + node("race",t=0,distr="rcat.factor",probs=c(0.2,0.1,0.4,0.15,0.05,0.1)) 42 | Dset <- set.DAG(D) 43 | simdat <- sim(Dset, n=200, rndseed=1) 44 | 45 | #--------------------------------------------------------------------------------------- 46 | # Specifying and simulating from a DAG with a categorical node with varying 47 | # probabilities (probabilities are determined by values sampled for nodes L0 and L1) 48 | #--------------------------------------------------------------------------------------- 49 | D <- DAG.empty() 50 | D <- D + node("L0", distr="rnorm", mean=10, sd=5) 51 | D <- D + node("L1", distr="rnorm", mean=10, sd=5) 52 | D <- D + node("L2", distr="rcat.factor", probs=c(abs(1/L0), abs(1/L1))) 53 | Dset <- set.DAG(D) 54 | simdat <- sim(Dset, n=200, rndseed=1) 55 | } 56 | \seealso{ 57 | \code{\link{rcat.b1}}, \code{\link{rcat.b0}} 58 | } 59 | -------------------------------------------------------------------------------- /man/simfull.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulation.r 3 | \name{simfull} 4 | \alias{simfull} 5 | \title{Simulate Full Data (From Action DAG(s))} 6 | \usage{ 7 | simfull(actions, n, wide = TRUE, LTCF = NULL, rndseed = NULL, 8 | rndseed.reset.node = NULL, verbose = getOption("simcausal.verbose")) 9 | } 10 | \arguments{ 11 | \item{actions}{Actions specifying the counterfactual DAG. This argument must be either an object of class DAG.action or a list of DAG.action objects.} 12 | 13 | \item{n}{Number of observations to sample.} 14 | 15 | \item{wide}{A logical, if TRUE the output data is generated in wide format, if FALSE, the output longitudinal data in generated in long format} 16 | 17 | \item{LTCF}{If forward imputation is desired for the missing variable values, this argument should be set to the name of the node that indicates the end of follow-up event. See the vignette, \code{\link{sim}} and \code{\link{doLTCF}} for additional details.} 18 | 19 | \item{rndseed}{Seed for the random number generator.} 20 | 21 | \item{rndseed.reset.node}{When \code{rndseed} is specified, use this argument to specify the name of the \code{DAG} node at which the random number generator seed is reset back to \code{NULL} (simulation function will call \code{set.seed(NULL)}). 22 | Can be useful if one wishes to simulate data using the set seed \code{rndseed} only for the first K nodes of the DAG and use an entirely random sample when simulating the rest of the nodes starting at K+1 and on. 23 | The name of such (K+1)th order \code{DAG} node should be then specified with this argument.} 24 | 25 | \item{verbose}{Set to \code{TRUE} to print messages on status and information to the console. 26 | Turn this off by default using options(simcausal.verbose=FALSE).} 27 | } 28 | \value{ 29 | A named list, each item is a \code{data.frame} corresponding to an action specified by the actions argument, action names are used for naming these list items. 30 | } 31 | \description{ 32 | This function simulates full data based on a list of intervention DAGs, returning a list of \code{data.frame}s. 33 | } 34 | \seealso{ 35 | \code{\link{simobs}} - a wrapper function for simulating observed data only; \code{\link{sim}} - a wrapper function for simulating both types of data; \code{\link{doLTCF}} for forward imputation of the missing values in already simulating data; \code{\link{DF.to.long}}, \code{\link{DF.to.longDT}} - converting longitudinal data from wide to long formats. 36 | 37 | Other simulation functions: \code{\link{simobs}}, 38 | \code{\link{sim}} 39 | } 40 | \concept{simulation functions} 41 | -------------------------------------------------------------------------------- /tests/test-all.R: -------------------------------------------------------------------------------- 1 | ## unit tests will not be done if RUnit is not available 2 | # setwd("..") 3 | # getwd() 4 | # library(RUnit) 5 | if(require("RUnit", quietly=TRUE)) { 6 | ## --- Setup --- 7 | 8 | pkg <- "simcausal" # <-- Tested package name 9 | 10 | if(Sys.getenv("RCMDCHECK") == "FALSE") { 11 | ## Path to unit tests for standalone running under Makefile (not R CMD check) 12 | ## PKG/tests/../inst/unitTests 13 | # path <- file.path(getwd(), "..", "inst", "unitTests") 14 | } else { 15 | ## Path to unit tests for R CMD check 16 | ## PKG.Rcheck/tests/../PKG/unitTests 17 | # path <- system.file(package=pkg, "RUnit") 18 | 19 | # REPLACED WITH: 20 | path <- file.path(getwd(), "RUnit") 21 | } 22 | 23 | cat("\nRunning unit tests\n") 24 | print(list(pkg=pkg, getwd=getwd(), pathToUnitTests=path)) 25 | 26 | library(package=pkg, character.only=TRUE) 27 | 28 | ## If desired, load the name space to allow testing of private functions 29 | ## if (is.element(pkg, loadedNamespaces())) 30 | ## attach(loadNamespace(pkg), name=paste("namespace", pkg, sep=":"), pos=3) 31 | ## 32 | ## or simply call PKG:::myPrivateFunction() in tests 33 | 34 | ## --- Testing --- 35 | 36 | ## Define tests 37 | test.suite <- defineTestSuite(name=paste(pkg, "unit testing"), 38 | # dirs="./RUnit", 39 | dirs=path, 40 | testFileRegexp = "^RUnit_tests_+", 41 | testFuncRegexp = "^test.+", 42 | rngKind = "Marsaglia-Multicarry", 43 | rngNormalKind = "Kinderman-Ramage") 44 | ## Run 45 | tests <- runTestSuite(test.suite) 46 | 47 | ## Default report name 48 | pathReport <- file.path(path, "report") 49 | 50 | ## Report to stdout and text files 51 | cat("------------------- UNIT TEST SUMMARY ---------------------\n\n") 52 | printTextProtocol(tests, showDetails=FALSE) 53 | printTextProtocol(tests, showDetails=FALSE, 54 | fileName=paste0(pathReport, "Summary.txt")) 55 | printTextProtocol(tests, showDetails=TRUE, 56 | fileName=paste0(pathReport, ".txt")) 57 | ## Report to HTML file 58 | printHTMLProtocol(tests, fileName=paste0(pathReport, ".html")) 59 | 60 | ## Return stop() to cause R CMD check stop in case of 61 | ## - failures i.e. FALSE to unit tests or 62 | ## - errors i.e. R errors 63 | tmp <- getErrors(tests) 64 | if(tmp$nFail > 0 | tmp$nErr > 0) { 65 | stop(paste("\n\nunit testing failed (#test failures: ", tmp$nFail, 66 | ", #R errors: ", tmp$nErr, ")\n\n", sep="")) 67 | } 68 | } else { 69 | warning("cannot run unit tests -- package RUnit is not available") 70 | } -------------------------------------------------------------------------------- /tests/examples/MSM.examples.R: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------------------------------------------- 2 | # DAG with time-varying outcomes (survival outcome) 3 | #--------------------------------------------------------------------------------------- 4 | # Define longitudinal data structure over 6 time-points t=(0:5) 5 | t_end <- 5 6 | D <- DAG.empty() 7 | D <- D + node("L2", t=0, distr="rbern", prob=0.05) 8 | D <- D + node("L1", t=0, distr="rbern", prob=ifelse(L2[0]==1,0.5,0.1)) 9 | D <- D + node("A1", t=0, distr="rbern", prob=ifelse(L1[0]==1 & L2[0]==0, 0.5, 10 | ifelse(L1[0]==0 & L2[0]==0, 0.1, 11 | ifelse(L1[0]==1 & L2[0]==1, 0.9, 0.5)))) 12 | D <- D + node("A2", t=0, distr="rbern", prob=0, order=4, EFU=TRUE) 13 | D <- D + node("Y", t=0, distr="rbern", 14 | prob=plogis(-6.5 + L1[0] + 4*L2[0] + 0.05*I(L2[0]==0)), 15 | EFU=TRUE) 16 | D <- D + node("L2", t=1:t_end, distr="rbern", prob=ifelse(A1[t-1]==1, 0.1, 17 | ifelse(L2[t-1]==1, 0.9, 18 | min(1,0.1 + t/16)))) 19 | D <- D + node("A1", t=1:t_end, distr="rbern", prob=ifelse(A1[t-1]==1, 1, 20 | ifelse(L1[0]==1 & L2[0]==0, 0.3, 21 | ifelse(L1[0]==0 & L2[0]==0, 0.1, 22 | ifelse(L1[0]==1 & L2[0]==1, 0.7, 23 | 0.5))))) 24 | D <- D + node("A2", t=1:t_end, distr="rbern", prob=0, EFU=TRUE) 25 | D <- D + node( "Y", t=1:t_end, distr="rbern", 26 | prob=plogis(-6.5 + L1[0] + 4*L2[t] + 0.05*sum(I(L2[0:t]==rep(0,(t+1))))), 27 | EFU=TRUE) 28 | D <- set.DAG(D) 29 | 30 | # Add two dynamic actions (indexed by values of the parameter theta={0,1}) 31 | # Define intervention nodes 32 | act_t0_theta <- node("A1",t=0, distr="rbern", prob=ifelse(L2[0] >= theta,1,0)) 33 | act_tp_theta <- node("A1",t=1:t_end, distr="rbern", 34 | prob=ifelse(A1[t-1]==1,1,ifelse(L2[t] >= theta,1,0))) 35 | # Add two actions to current DAG object 36 | D <- D + action("A1_th0", nodes=c(act_t0_theta, act_tp_theta), theta=0) 37 | D <- D + action("A1_th1", nodes=c(act_t0_theta, act_tp_theta), theta=1) 38 | 39 | #--------------------------------------------------------------------------------------- 40 | # MSM EXAMPLE 1: Modeling survival over time 41 | #--------------------------------------------------------------------------------------- 42 | # Modeling pooled survival Y_t over time as a projection on the following working 43 | # logistic model: 44 | msm.form <- "Y ~ theta + t + I(theta*t)" 45 | D <- set.targetMSM(D, outcome="Y", t=0:5, formula=msm.form, family="binomial", 46 | hazard=FALSE) 47 | MSMres <- eval.target(D, n=1000) 48 | MSMres$coef 49 | 50 | #--------------------------------------------------------------------------------------- 51 | # MSM EXAMPLE 2: Modeling survival over time with exposure-based summary measures 52 | #--------------------------------------------------------------------------------------- 53 | # Now we want to model Y_t by adding a summary measure covariate defined as mean 54 | # exposure A1 from time 0 to t; 55 | # Enclosing any term inside S() forces its evaluation in the environment 56 | # of the full (counterfactual) data. 57 | msm.form_sum <- "Y ~ theta + t + I(theta*t) + S(mean(A1[0:t]))" 58 | D <- set.targetMSM(D, outcome="Y", t=0:5, formula=msm.form_sum, family="binomial", 59 | hazard=FALSE) 60 | MSMres <- eval.target(D, n=1000) 61 | MSMres$coef 62 | -------------------------------------------------------------------------------- /tests/RUnit/RUnit_tests_02_MV.R: -------------------------------------------------------------------------------- 1 | `%+%` <- function(a, b) paste0(a, b) 2 | as.numeric.factor <- function(x) {as.numeric(levels(x))[x]} 3 | allNA = function(x) all(is.na(x)) 4 | 5 | # Adding test for latent vars 6 | test.MV <- function() { 7 | D <- DAG.empty() 8 | mvName <- c("X1", "X2", "X3") 9 | # multivar node with no t: 10 | test.node_not <- node(mvName, distr = "rconst", const = 1) 11 | # multivar node with with t: 12 | test.node_wt <- node(mvName, t = 0:20, distr = "rconst", const = 1) 13 | 14 | if (requireNamespace("mvtnorm", quietly = TRUE)) { 15 | require("mvtnorm") 16 | D <- DAG.empty() 17 | # 3 dimensional normal (uncorrelated) using rmvnorm function from rmvnorm package: 18 | D <- D + node(c("X1","X2","X3"), distr = "rmvnorm", 19 | asis.params = list(mean = "c(0,1,2)")) 20 | # Bivariate normal using same function (correlation coef 0.75): 21 | D <- D + node(c("Y1","Y2"), distr = "rmvnorm", 22 | asis.params = list(mean = "c(0,1)", sigma = "matrix(c(1,0.75,0.75,1), ncol=2)")) 23 | D <- D + node("A", distr = "rconst", const = 1-X1) 24 | Dset1 <- set.DAG(D, verbose = TRUE) 25 | plotDAG(Dset1) 26 | dat1 <- sim(Dset1, n = 200) 27 | } 28 | 29 | if (requireNamespace("copula", quietly = TRUE)) { 30 | # Bivariate uniform copula using rCopula function from copula package (correlation coef 0.75), with a warning: 31 | require("copula") 32 | D <- DAG.empty() 33 | D <- D + node(c("Y1","Y2"), distr = "rCopula", copula = eval(normalCopula(0.75, dim = 2))) 34 | Dset2a <- set.DAG(D) 35 | dat2a <- sim(Dset2a, n = 200) 36 | # Same with no warning: 37 | D <- DAG.empty() 38 | D <- D + node(c("Y1","Y2"), distr = "rCopula", 39 | asis.params = list(copula = "normalCopula(0.75, dim = 2)")) 40 | Dset2b <- set.DAG(D) 41 | dat2b <- sim(Dset2b, n = 200) 42 | # Bivariate binomial from previous copula, with same correlation: 43 | vecfun.add("qbinom") 44 | D <- D + 45 | node("A.Bin1", distr = "rconst", const = qbinom(Y1, 10, 0.5))+ 46 | node("A.Bin2", distr = "rconst", const = qbinom(Y2, 15, 0.7))+ 47 | node(c("A.Bin1.2","A.Bin2.2"), distr = "rconst", const = c(qbinom(Y1, 10, 0.5),qbinom(Y2, 15, 0.7))) 48 | Dset3 <- set.DAG(D) 49 | dat3 <- sim(Dset3, n = 200) 50 | plotDAG(Dset3) 51 | } 52 | 53 | if (requireNamespace("bindata", quietly = TRUE)) { 54 | # Same as "A.Bin1" and "A.Bin2", but directly using rmvbin function in bindata package: 55 | require("bindata") 56 | D <- DAG.empty() 57 | D <- D + node(c("B.Bin1","B.Bin2"), distr = "rmvbin", 58 | asis.params = list( 59 | margprob = "c(0.5, 0.5)", 60 | bincorr = "matrix(c(1,0.75,0.75,1), ncol=2)")) 61 | Dset4 <- set.DAG(D) 62 | dat4 <- sim(Dset4, n = 200) 63 | plotDAG(Dset4) 64 | } 65 | 66 | if (requireNamespace("mvtnorm", quietly = TRUE)) { 67 | # time-varying multivar node (3 time-points, 3 dimensional normal): 68 | D <- DAG.empty() 69 | D <- D + node(c("X1", "X2", "X3"), t = 0:2, distr = "rmvnorm", 70 | asis.params = list( 71 | mean = "c(0,1,2)", 72 | sigma = "matrix(rep(0.75,9), ncol=3)")) 73 | Dset5 <- set.DAG(D) 74 | dat5 <- sim(Dset5, n = 200) 75 | } 76 | } 77 | 78 | -------------------------------------------------------------------------------- /R/network_distributions.R: -------------------------------------------------------------------------------- 1 | #' Call \code{igraph::sample_gnp} to Generate Random Graph Object According to the G(n,p) Erdos-Renyi Model 2 | #' 3 | #' Call \code{igraph::sample_gnp} and convert the output to \code{simcausal} network matrix. 4 | #' @param n Size of the network graph (number of nodes). 5 | #' @param p Same as \code{igraph::sample_gnp}: The probability for drawing an edge between two arbitrary vertices (G(n,p) graph). 6 | #' @return A matrix with n rows, each row lists the indices of friends connected to that particular observation. 7 | #' @seealso \code{\link{rnet.gnm}} 8 | #' @export 9 | rnet.gnp <- function(n, p) { 10 | igraph.gnm <- igraph::sample_gnp(n = n, p = p, directed = TRUE) 11 | sparse_AdjMat <- simcausal::igraph.to.sparseAdjMat(igraph.gnm) 12 | NetInd_out <- simcausal::sparseAdjMat.to.NetInd(sparse_AdjMat) 13 | return(NetInd_out$NetInd_k) 14 | } 15 | 16 | #' Call \code{igraph::sample_gnm} to Generate Random Graph Object According to the G(n,m) Erdos-Renyi Model 17 | #' 18 | #' Call \code{igraph::sample_gnm} and convert the output to \code{simcausal} network matrix. 19 | #' The parameter \code{m} of \code{igraph::sample_gnm} is derived from \code{n} and \code{m_pn} as \code{as.integer(m_pn*n)} 20 | #' @param n Size of the network graph (number of nodes). 21 | #' @param m_pn The total number of edges as a fraction of the sample size \code{n}. 22 | #' @return A matrix with n rows, each row lists the indices of friends connected to that particular observation. 23 | #' @seealso \code{\link{rnet.gnp}} 24 | #' @export 25 | rnet.gnm <- function(n, m_pn) { 26 | m <- as.integer(m_pn*n) 27 | if (n <= 10) m <- 20 28 | igraph.gnm <- igraph::sample_gnm(n = n, m = m, directed = TRUE) 29 | sparse_AdjMat <- simcausal::igraph.to.sparseAdjMat(igraph.gnm) 30 | NetInd_out <- simcausal::sparseAdjMat.to.NetInd(sparse_AdjMat) 31 | return(NetInd_out$NetInd_k) 32 | } 33 | 34 | #' Call \code{igraph::sample_smallworld} to Generate Random Graph Object from the Watts-Strogatz Small-World Model 35 | #' 36 | #' Call \code{igraph::sample_smallworld} and convert the output to \code{simcausal} network matrix. 37 | #' The parameters are the same as those of \code{igraph::sample_smallworld}. 38 | #' The loop edges aren't allowed (\code{loops = FALSE}) and the multiple edges aren't allowed either \code{multiple = FALSE}. 39 | #' @param n Size of the network graph (the number of nodes). 40 | #' @param dim Same as in \code{igraph::sample_smallworld}: Integer constant, the dimension of the starting lattice. 41 | #' @param nei Same as in \code{igraph::sample_smallworld}: Integer constant, the neighborhood within which the vertices of the lattice will be connected. 42 | #' @param p Same as in \code{igraph::sample_smallworld}: Real constant between zero and one, the rewiring probability. 43 | #' @return A matrix with n rows, each row lists the indices of friends connected to that particular observation. 44 | #' @seealso \code{\link{rnet.gnp}}, \code{\link{rnet.gnm}} 45 | #' @export 46 | #' 47 | rnet.SmWorld <- function(n, dim, nei, p) { 48 | g <- igraph::sample_smallworld(dim = dim, size = n, nei = nei, p = p, loops = FALSE, multiple = FALSE) 49 | g <- igraph::as.directed(g, mode = c("mutual")) 50 | sparse_AdjMat <- simcausal::igraph.to.sparseAdjMat(g) 51 | NetInd_out <- simcausal::sparseAdjMat.to.NetInd(sparse_AdjMat) 52 | return(NetInd_out[["NetInd_k"]]) 53 | } 54 | 55 | #' List All Custom Network Generator Functions in \code{simcausal}. 56 | #' 57 | #' @export 58 | net.list <- function() { 59 | message("All custom network generators defined in SimCausal:\n") 60 | print(ls("package:simcausal", pattern="^(rnet)")) 61 | invisible(ls("package:simcausal")) 62 | } -------------------------------------------------------------------------------- /man/doLTCF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulation.r 3 | \name{doLTCF} 4 | \alias{doLTCF} 5 | \title{Missing Variable Imputation with Last Time Point Value Carried Forward (LTCF)} 6 | \usage{ 7 | doLTCF(data, LTCF) 8 | } 9 | \arguments{ 10 | \item{data}{Simulated \code{data.frame} in wide format} 11 | 12 | \item{LTCF}{Character string specifying the outcome node that is the indicator of the end of follow-up (observations with value of the outcome variable being 1 indicate that the end of follow-up has been reached). The outcome variable must be a binary node that was declared with \code{EFU=TRUE}.} 13 | } 14 | \value{ 15 | Modified \code{data.frame}, all time-varying missing variables after the \code{EFU} outcome specified in \code{LTCF} are forward imputed with their last available non-missing value. 16 | } 17 | \description{ 18 | Forward imputation for missing variable values in simulated data after a particular end of the follow-up event. The end of follow-up event is defined by the node of type \code{EOF=TRUE} being equal to 1. 19 | } 20 | \section{Details}{ 21 | 22 | The default behavior of the \code{sim} function consists in setting all nodes that temporally follow an \code{EFU} node whose simulated value is 1 to missing (i.e., \code{NA}). 23 | The argument \code{LTCF} of the \code{sim} function can however be used to change this default behavior and impute some of these missing values with \emph{last time point value carried forward} (LTCF). 24 | More specifically, only the missing values of time-varying nodes (i.e., those with non-missing \code{t} argument) that follow the end of follow-up event encoded by the \code{EFU} node specified by the \code{LTCF} argument will be imputed. 25 | One can use the function \code{doLTCF} to apply the \emph{last time point value carried forward} (LTCF) imputation to an existing simulated dataset obtained from the function \code{sim} that was called with its default imputation setting (i.e., with no \code{LTCF} argument). 26 | Illustration of the use of the LTCF imputation functionality are provided in the package vignette. 27 | 28 | The first example below shows the default data format of the \code{sim} function after an end of the follow-up event and how this behavior can be modified to generate data with LTCF imputation by either using the \code{LTCF} argument of the 29 | \code{sim} function or by calling the \code{doLTCF} function. The second example demonstrates how to use the \code{doLTCF} function to perform LTCF imputation on already existing data simulated with the \code{sim} function based on its default non-imputation behavior. 30 | } 31 | 32 | \examples{ 33 | t_end <- 10 34 | lDAG <- DAG.empty() 35 | lDAG <- lDAG + 36 | node(name = "L2", t = 0, distr = "rconst", const = 0) + 37 | node(name = "A1", t = 0, distr = "rconst", const = 0) + 38 | node(name = "L2", t = 1:t_end, distr = "rbern", 39 | prob = ifelse(A1[t - 1] == 1, 0.1, 40 | ifelse(L2[t-1] == 1, 0.9, 41 | min(1,0.1 + t/t_end)))) + 42 | node(name = "A1", t = 1:t_end, distr = "rbern", 43 | prob = ifelse(A1[t - 1] == 1, 1, 44 | ifelse(L2[0] == 0, 0.3, 45 | ifelse(L2[0] == 0, 0.1, 46 | ifelse(L2[0] == 1, 0.7, 0.5))))) + 47 | node(name = "Y", t = 1:t_end, distr = "rbern", 48 | prob = plogis(-6.5 + 4 * L2[t] + 0.05 * sum(I(L2[0:t] == rep(0,(t + 1))))), 49 | EFU = TRUE) 50 | lDAG <- set.DAG(lDAG) 51 | #--------------------------------------------------------------------------------------- 52 | # EXAMPLE 1. No forward imputation. 53 | #--------------------------------------------------------------------------------------- 54 | Odat.wide <- sim(DAG = lDAG, n = 1000, rndseed = 123) 55 | Odat.wide[c(21,47), 1:18] 56 | Odat.wideLTCF <- sim(DAG = lDAG, n = 1000, LTCF = "Y", rndseed = 123) 57 | Odat.wideLTCF[c(21,47), 1:18] 58 | #--------------------------------------------------------------------------------------- 59 | # EXAMPLE 2. With forward imputation. 60 | #--------------------------------------------------------------------------------------- 61 | Odat.wideLTCF2 <- doLTCF(data = Odat.wide, LTCF = "Y") 62 | Odat.wideLTCF2[c(21,47), 1:18] 63 | # all.equal(Odat.wideLTCF, Odat.wideLTCF2) 64 | } 65 | \seealso{ 66 | \code{\link{sim}}, \code{\link{simobs}} and \code{\link{simfull}} for simulating data with and without carry forward imputation. 67 | 68 | Other data manipulation functions: \code{\link{DF.to.longDT}}, 69 | \code{\link{DF.to.long}} 70 | } 71 | \concept{data manipulation functions} 72 | -------------------------------------------------------------------------------- /R/action_node_indexing.R: -------------------------------------------------------------------------------- 1 | ################################################################### 2 | 3 | ################################################################### 4 | # Node and Action Constructors 5 | # Indexing/Subsetting DAG and actions will be here 6 | ################################################################### 7 | 8 | #' Subsetting/Indexing \code{DAG} Nodes 9 | #' @param DAG A DAG object that was defined using functions \code{\link{node}} and \code{\link{set.DAG}}. 10 | #' @return returns a list of nodes that can be indexed as a typical named list "[[]]". 11 | #' @examples 12 | #' 13 | #'D <- DAG.empty() 14 | #'D <- D + node(name="W1", distr="rbern", prob=plogis(-0.5)) 15 | #'D <- D + node(name="W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 16 | #'D <- set.DAG(D) 17 | #' #Returns all nodes from DAG D 18 | #'N(D) 19 | #' #Returns node W1 from DAG D 20 | #'N(D)["W1"] 21 | #' @export 22 | N <- function(DAG) { 23 | if (!is.DAG(DAG) && !is.DAGnodelist(DAG)) { 24 | stop("Not a DAG object") 25 | } 26 | nodecount <- length(DAG) 27 | res <- seq_len(nodecount) 28 | class(res) <- "DAG.nodelist" 29 | ne <- new.env() 30 | assign("DAG", DAG, envir=ne) 31 | attr(res, "env") <- ne 32 | # the idea is to return the environment variable to avoid copying the DAG while subsetting 33 | # res 34 | # for now returning just the DAG itself 35 | attr(res, "env")$DAG 36 | } 37 | # select DAG nodes by t vector attribute 38 | Ntvec <- function(DAG, tvec) { 39 | node_nms <- sapply(N(DAG), '[[', "name") 40 | # get actual t for each node and return only nodes that pass 41 | N_t_idx <- sapply(N(DAG), function(node) is.null(node[["t"]]) || (node[["t"]]%in%tvec)) 42 | N_t <- N(DAG)[N_t_idx] 43 | class(N_t) <- "DAG.nodelist" 44 | N_t 45 | } 46 | # return a list of attribute values for a given attr name and list of nodes (DAG) 47 | Nattr <- function(DAG, attr) { 48 | lapply(N(DAG), '[[', attr) 49 | } 50 | 51 | #' Subsetting/Indexing Actions Defined for \code{DAG} Object 52 | #' @param DAG A DAG object that was defined using functions \code{\link{node}}, \code{\link{set.DAG}} and \code{\link{action}}. 53 | #' @return returns a list of actions, which are intervened versions of the original observed data DAG. 54 | #' @examples 55 | #' 56 | #'D <- DAG.empty() 57 | #'D <- D + node(name="W1", distr="rbern", prob=plogis(-0.5)) 58 | #'D <- D + node(name="W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 59 | #'D <- D + node(name="A", distr="rbern", prob=plogis(-0.5 + 0.5*W1+ 0.5*W2)) 60 | #'D <- set.DAG(D) 61 | #' # Define two actions, acting on node "A" 62 | #'D <- D + action("A0", nodes=node("A", distr="rbern", prob=0)) 63 | #'D <- D + action("A1", nodes=node("A", distr="rbern", prob=1)) 64 | #' # Select both actions 65 | #'A(D) 66 | #' # Select action "A1" only 67 | #'A(D)["A1"] 68 | #' @export 69 | A <- function(DAG) { 70 | if (!is.DAG(DAG)) { 71 | stop("Not a DAG object") 72 | } 73 | res <- attributes(DAG)$actions 74 | if (is.null(res)) { 75 | NULL 76 | } else { 77 | # class(res) <- "DAG.action" 78 | res 79 | } 80 | } 81 | 82 | # # @export 83 | # "[.DAG.nodelist" <- function(x, i) { 84 | # i <- substitute(i) 85 | # if (is.numeric(i) || is.integer(i)) { 86 | # # simple indexing by node ids 87 | # res <- i[ i %in% x ] 88 | # attributes(res) <- attributes(x) 89 | # } else if (is.logical(i)) { 90 | # # simple indexing by logical vector 91 | # res <- as.numeric(x) [ i ] 92 | # attributes(res) <- attributes(x) 93 | # } else if (is.character(i)) { 94 | # res <- as.DAG.nodes(get("DAG", attr(x, "env")), i) 95 | # attributes(res) <- attributes(x) 96 | # } else { 97 | # # language expression, can also be an attribute based indexing 98 | # DAG <- get("DAG", attr(x, "env")) 99 | # i <- eval.... 100 | # if (is.numeric(i) || is.integer(i)) { 101 | # i <- as.numeric(i) 102 | # res <- i[ i %in% x ] 103 | # attributes(res) <- attributes(x) 104 | # } else if (is.logical(i)) { 105 | # res <- as.numeric(x) [ i ] 106 | # attributes(res) <- attributes(x) 107 | # } else if (is.character(i)) { 108 | # res <- as.DAG.nodes(get("DAG", attr(x, "env")), i) 109 | # attributes(res) <- attributes(x) 110 | # } else { 111 | # stop("invalid indexing of the node") 112 | # } 113 | # } 114 | # res 115 | # } 116 | # as.DAG.nodes <- function(DAG, node) { 117 | # .... 118 | # } -------------------------------------------------------------------------------- /tests/examples/add.action.R: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------------------------------------------- 2 | # EXAMPLE 1: Showing two equivalent ways of defining an action for a simple DAG 3 | #--------------------------------------------------------------------------------------- 4 | 5 | D <- DAG.empty() 6 | D <- D + node(name="W1", distr="rbern", prob=plogis(-0.5)) 7 | D <- D + node(name="W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 8 | D <- D + node(name="A", distr="rbern", prob=plogis(-0.5 + 0.5*W1+ 0.5*W2)) 9 | Dset <- set.DAG(D) 10 | 11 | # Syntax '+ action': define two actions, intervening on node "A", imputing order 12 | Dset <- Dset + action("A0", nodes=node("A", distr="rbern", prob=0)) 13 | Dset <- Dset + action("A1", nodes=node("A", distr="rbern", prob=1)) 14 | 15 | # Equivalent syntax 'add.action': define two actions, intervening on node "A" 16 | Dset <- add.action(Dset, "A0", nodes=node("A", distr="rbern", prob=0)) 17 | Dset <- add.action(Dset, "A1", nodes=node("A", distr="rbern", prob=1)) 18 | 19 | #--------------------------------------------------------------------------------------- 20 | # EXAMPLE 2: Adding named attributes that define (index) the action. 21 | # Define intervention on A that is conditional on W1 crossing some threshold theta 22 | #--------------------------------------------------------------------------------------- 23 | 24 | # Redefining node W1 as uniform [0,1] 25 | D <- DAG.empty() 26 | D <- D + node(name="W1", distr="runif", min=0, max=1) 27 | D <- D + node(name="W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 28 | D <- D + node(name="A", distr="rbern", prob=plogis(-0.5 + 0.5*W1+ 0.5*W2)) 29 | Dset <- set.DAG(D) 30 | 31 | # Define a node that is indexed by unknown variable theta 32 | actN <- node("A",distr="rbern",prob=ifelse(W1 >= theta,1,0)) 33 | # Define 3 actions for theta=0.1, 0.5, 0.9 34 | Dset <- Dset + action("A1th0.1", nodes = actN, theta = 0.1) 35 | Dset <- Dset + action("A1th0.5", nodes = actN, theta = 0.5) 36 | Dset <- Dset + action("A1th0.9", nodes = actN, theta = 0.9) 37 | 38 | # Simulate 50 observations per each action above 39 | simfull(A(Dset), n=50) 40 | 41 | #--------------------------------------------------------------------------------------- 42 | # EXAMPLE 3: Time-varying action attributes for longitudinal DAG 43 | #--------------------------------------------------------------------------------------- 44 | # Define longitudinal data structure over 6 time-points t=(0:5) with survival outcome "Y" 45 | t_end <- 5 46 | D <- DAG.empty() 47 | D <- D + node("L2", t=0, distr="rbern", prob=0.05) 48 | D <- D + node("L1", t=0, distr="rbern", prob=ifelse(L2[0]==1,0.5,0.1)) 49 | D <- D + node("A1", t=0, distr="rbern", prob=ifelse(L1[0]==1, 0.5, 0.1)) 50 | D <- D + node("Y", t=0, distr="rbern", 51 | prob=plogis(-6.5 + L1[0] + 4*L2[0] + 0.05*I(L2[0]==0)), EFU=TRUE) 52 | D <- D + node("L2", t=1:t_end, distr="rbern", prob=ifelse(A1[t-1]==1, 0.1, 0.9)) 53 | D <- D + node("A1", t=1:t_end, distr="rbern", 54 | prob=ifelse(A1[t-1]==1, 1, ifelse(L1[0]==1 & L2[0]==0, 0.3, 0.5))) 55 | D <- D + node("Y", t=1:t_end, distr="rbern", prob=plogis(-6.5+L1[0]+4*L2[t]), EFU=TRUE) 56 | D <- set.DAG(D) 57 | 58 | #--------------------------------------------------------------------------------------- 59 | # Dynamic actions indexed by constant value of parameter theta={0,1}) 60 | #--------------------------------------------------------------------------------------- 61 | # Define time-varying node A1: sets A1 to 1 if L2 at t is >= theta 62 | actN_A1 <- node("A1",t=0:t_end, distr="rbern", prob=ifelse(L2[t] >= theta,1,0)) 63 | 64 | # Define two actions, indexed by fixed values of theta={0,1} 65 | D_act <- D + action("A1_th0", nodes=actN_A1, theta=0) 66 | D_act <- D_act + action("A1_th1", nodes=actN_A1, theta=1) 67 | 68 | # Simulate 50 observations for per each action above 69 | simfull(simcausal::A(D_act), n=50) 70 | 71 | #--------------------------------------------------------------------------------------- 72 | # Dynamic actions indexed by time-varying parameter theta[t] 73 | #--------------------------------------------------------------------------------------- 74 | # This defines an action node with threshold theta varying in time (note syntax theta[t]) 75 | actN_A1 <- node("A1",t=0:t_end, distr="rbern", prob=ifelse(L2[t] >= theta[t],1,0)) 76 | 77 | # Now define 3 actions that are indexed by various values of theta over time 78 | D_act <- D + action("A1_th_const0", nodes=actN_A1, theta=rep(0,(t_end+1))) 79 | D_act <- D_act + action("A1_th_var1", nodes=actN_A1, theta=c(0,0,0,1,1,1)) 80 | D_act <- D_act + action("A1_th_var2", nodes=actN_A1, theta=c(0,1,1,1,1,1)) 81 | 82 | # Simulate 50 observations for per each action above 83 | simfull(simcausal::A(D_act), n=50) 84 | -------------------------------------------------------------------------------- /tests/examples/set.targetE.examples.R: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------------------------------------------- 2 | # EXAMPLE 1: DAG with single point treatment 3 | #--------------------------------------------------------------------------------------- 4 | # Define a DAG with single-point treatment ("Anode") 5 | D <- DAG.empty() 6 | D <- D + node("W1", distr="rbern", prob=plogis(-0.5)) 7 | D <- D + node("W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 8 | D <- D + node("Anode", distr="rbern", prob=plogis(-0.5 - 0.3*W1 - 0.3*W2)) 9 | D <- D + node("Y", distr="rbern", prob=plogis(-0.1 + 1.2*Anode + 0.3*W1 + 0.3*W2), 10 | EFU=TRUE) 11 | D_WAY <- set.DAG(D) 12 | 13 | # Defining interventions (actions) 14 | # define action "A1" that sets the treatment node to constant 1 15 | D_WAY <- D_WAY + action("A1", nodes=node("Anode",distr="rbern", prob=1)) 16 | # define another action "A0" that sets the treatment node to constant 0 17 | D_WAY <- D_WAY + action("A0", nodes=node("Anode",distr="rbern", prob=0)) 18 | #--------------------------------------------------------------------------------------- 19 | # Defining and calculating causal parameters: 20 | #--------------------------------------------------------------------------------------- 21 | # Counterfactual mean of node "Y" under action "A1" 22 | D_WAY <- set.targetE(D_WAY, outcome="Y", param="A1") 23 | eval.target(D_WAY, n=10000) 24 | 25 | # Contrasts of means of "Y" under action "A1" minus action "A0" 26 | D_WAY <- set.targetE(D_WAY, outcome="Y", param="A1-A0") 27 | eval.target(D_WAY, n=10000) 28 | 29 | # Ratios of "Y" under action "A1" over action "A0" 30 | D_WAY <- set.targetE(D_WAY, outcome="Y", param="A1/A0") 31 | eval.target(D_WAY, n=10000) 32 | 33 | # Alternative parameter evaluation by passing already simulated full data to 34 | # \code{eval.target} 35 | X_dat1 <- simfull(A(D_WAY), n=10000) 36 | D_WAY <- set.targetE(D_WAY, outcome="Y", param="A1/A0") 37 | eval.target(D_WAY, data=X_dat1) 38 | 39 | #--------------------------------------------------------------------------------------- 40 | # EXAMPLE 2: DAG with time-varying outcomes (survival outcome) 41 | #--------------------------------------------------------------------------------------- 42 | # Define longitudinal data structure over 6 time-points t=(0:5) 43 | t_end <- 5 44 | D <- DAG.empty() 45 | D <- D + node("L2", t=0, distr="rbern", prob=0.05) 46 | D <- D + node("L1", t=0, distr="rbern", prob=ifelse(L2[0]==1,0.5,0.1)) 47 | D <- D + node("A1", t=0, distr="rbern", prob=ifelse(L1[0]==1 & L2[0]==0, 0.5, 48 | ifelse(L1[0]==0 & L2[0]==0, 0.1, 49 | ifelse(L1[0]==1 & L2[0]==1, 0.9, 0.5)))) 50 | D <- D + node("A2", t=0, distr="rbern", prob=0, order=4, EFU=TRUE) 51 | D <- D + node("Y", t=0, distr="rbern", 52 | prob=plogis(-6.5 + L1[0] + 4*L2[0] + 0.05*I(L2[0]==0)), 53 | EFU=TRUE) 54 | D <- D + node("L2", t=1:t_end, distr="rbern", prob=ifelse(A1[t-1]==1, 0.1, 55 | ifelse(L2[t-1]==1, 0.9, 56 | min(1,0.1 + t/16)))) 57 | D <- D + node("A1", t=1:t_end, distr="rbern", prob=ifelse(A1[t-1]==1, 1, 58 | ifelse(L1[0]==1 & L2[0]==0, 0.3, 59 | ifelse(L1[0]==0 & L2[0]==0, 0.1, 60 | ifelse(L1[0]==1 & L2[0]==1, 0.7, 61 | 0.5))))) 62 | D <- D + node("A2", t=1:t_end, distr="rbern", prob=0, EFU=TRUE) 63 | D <- D + node("Y", t=1:t_end, distr="rbern", 64 | prob=plogis(-6.5 + L1[0] + 4*L2[t] + 0.05*sum(I(L2[0:t]==rep(0,(t+1))))), 65 | EFU=TRUE) 66 | D <- set.DAG(D) 67 | 68 | # Add two dynamic actions (indexed by values of the parameter theta={0,1}) 69 | # Define intervention nodes 70 | act_t0_theta <- node("A1",t=0, distr="rbern", prob=ifelse(L2[0] >= theta,1,0)) 71 | act_tp_theta <- node("A1",t=1:t_end, distr="rbern", 72 | prob=ifelse(A1[t-1]==1,1,ifelse(L2[t] >= theta,1,0))) 73 | # Add two actions to current DAG object 74 | D <- D + action("A1_th0", nodes=c(act_t0_theta, act_tp_theta), theta=0) 75 | D <- D + action("A1_th1", nodes=c(act_t0_theta, act_tp_theta), theta=1) 76 | #--------------------------------------------------------------------------------------- 77 | # Defining and calculating the target parameter 78 | #--------------------------------------------------------------------------------------- 79 | # Counterfactual mean of node "Y" at time-point t=4 under action "A1_th0" 80 | D <- set.targetE(D, outcome="Y", t=4, param="A1_th0") 81 | eval.target(D, n=5000) 82 | 83 | # Vector of counterfactual means of"Y" over all time points under action "A1_th1" 84 | D <- set.targetE(D, outcome="Y", t=0:5, param="A1_th1") 85 | eval.target(D, n=5000) 86 | 87 | # Vector of counterfactual contrasts of "Y" over all time points 88 | # for action "A1_th1" minus action "A1_th0" 89 | D <- set.targetE(D, outcome="Y", t=0:5, param="A1_th1 - A1_th0") 90 | eval.target(D, n=5000) 91 | 92 | # Vector of counterfactual ratios of "Y" over all time points 93 | # for action "A1_th0" over action "A1_th1" 94 | D <- set.targetE(D, outcome="Y", t=0:5, param="A1_th0 / A1_th1") 95 | eval.target(D, n=5000) 96 | -------------------------------------------------------------------------------- /man/sim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulation.r 3 | \name{sim} 4 | \alias{sim} 5 | \title{Simulate Observed or Full Data from \code{DAG} Object} 6 | \usage{ 7 | sim(DAG, actions, n, wide = TRUE, LTCF = NULL, rndseed = NULL, 8 | rndseed.reset.node = NULL, verbose = getOption("simcausal.verbose")) 9 | } 10 | \arguments{ 11 | \item{DAG}{A DAG objects that has been locked with set.DAG(DAG). Observed data from this DAG will be simulated if actions argument is omitted.} 12 | 13 | \item{actions}{Character vector of action names which will be extracted from the DAG object. Alternatively, this can be a list of action DAGs selected with \code{A(DAG)} function, in which case the argument \code{DAG} is unused. When \code{actions} is omitted, the function returns simulated observed data (see \code{simobs}).} 14 | 15 | \item{n}{Number of observations to sample.} 16 | 17 | \item{wide}{A logical, if TRUE the output data is generated in wide format, if FALSE, the output longitudinal data in generated in long format} 18 | 19 | \item{LTCF}{If forward imputation is desired for the missing variable values, this argument should be set to the name of the node that indicates the end of follow-up event.} 20 | 21 | \item{rndseed}{Seed for the random number generator.} 22 | 23 | \item{rndseed.reset.node}{When \code{rndseed} is specified, use this argument to specify the name of the \code{DAG} node at which the random number generator seed is reset back to \code{NULL} (simulation function will call \code{set.seed(NULL)}). 24 | Can be useful if one wishes to simulate data using the set seed \code{rndseed} only for the first K nodes of the DAG and use an entirely random sample when simulating the rest of the nodes starting at K+1 and on. 25 | The name of such (K+1)th order \code{DAG} node should be then specified with this argument.} 26 | 27 | \item{verbose}{Set to \code{TRUE} to print messages on status and information to the console. 28 | Turn this off by default using options(simcausal.verbose=FALSE).} 29 | } 30 | \value{ 31 | If actions argument is missing a simulated data.frame is returned, otherwise the function returns a named list of action-specific simulated data.frames with action names giving names to corresponding list items. 32 | } 33 | \description{ 34 | This function simulates full data based on a list of intervention DAGs, returning a list of \code{data.frame}s. See the vignette for examples and detailed description. 35 | } 36 | \section{Forward Imputation}{ 37 | 38 | By default, when LTCF is left unspecified, all variables that follow after any end of follow-up (EFU) event are set to missing (NA). 39 | The end of follow-up event occurs when a binary node of type \code{EFU=TRUE} is equal to 1, indicating a failing or right-censoring event. 40 | To forward impute the values of the time-varying nodes after the occurrence of the \code{EFU} event, set the LTCF argument to a name of the EFU node representing this event. 41 | For additional details and examples see the vignette and \code{\link{doLTCF}} function. 42 | } 43 | 44 | \examples{ 45 | t_end <- 10 46 | lDAG <- DAG.empty() 47 | lDAG <- lDAG + 48 | node(name = "L2", t = 0, distr = "rconst", const = 0) + 49 | node(name = "A1", t = 0, distr = "rconst", const = 0) + 50 | node(name = "L2", t = 1:t_end, distr = "rbern", 51 | prob = ifelse(A1[t - 1] == 1, 0.1, 52 | ifelse(L2[t-1] == 1, 0.9, 53 | min(1,0.1 + t/t_end)))) + 54 | node(name = "A1", t = 1:t_end, distr = "rbern", 55 | prob = ifelse(A1[t - 1] == 1, 1, 56 | ifelse(L2[0] == 0, 0.3, 57 | ifelse(L2[0] == 0, 0.1, 58 | ifelse(L2[0] == 1, 0.7, 0.5))))) + 59 | node(name = "Y", t = 1:t_end, distr = "rbern", 60 | prob = plogis(-6.5 + 4 * L2[t] + 0.05 * sum(I(L2[0:t] == rep(0,(t + 1))))), 61 | EFU = TRUE) 62 | lDAG <- set.DAG(lDAG) 63 | #--------------------------------------------------------------------------------------- 64 | # EXAMPLE 1. No forward imputation. 65 | #--------------------------------------------------------------------------------------- 66 | Odat.wide <- sim(DAG = lDAG, n = 1000, rndseed = 123) 67 | Odat.wide[c(21,47), 1:18] 68 | Odat.wideLTCF <- sim(DAG = lDAG, n = 1000, LTCF = "Y", rndseed = 123) 69 | Odat.wideLTCF[c(21,47), 1:18] 70 | #--------------------------------------------------------------------------------------- 71 | # EXAMPLE 2. With forward imputation. 72 | #--------------------------------------------------------------------------------------- 73 | Odat.wideLTCF2 <- doLTCF(data = Odat.wide, LTCF = "Y") 74 | Odat.wideLTCF2[c(21,47), 1:18] 75 | # all.equal(Odat.wideLTCF, Odat.wideLTCF2) 76 | } 77 | \references{ 78 | Sofrygin O, van der Laan MJ, Neugebauer R (2017). 79 | "simcausal R Package: Conducting Transparent and Reproducible Simulation Studies of Causal Effect Estimation with Complex Longitudinal Data." 80 | Journal of Statistical Software, 81(2), 1-47. doi: 10.18637/jss.v081.i02. 81 | } 82 | \seealso{ 83 | \code{\link{simobs}} - a wrapper function for simulating observed data only; \code{\link{simfull}} - a wrapper function for simulating full data only; \code{\link{doLTCF}} - forward imputation of the missing values in already simulating data; \code{\link{DF.to.long}}, \code{\link{DF.to.longDT}} - converting longitudinal data from wide to long formats. 84 | 85 | Other simulation functions: \code{\link{simfull}}, 86 | \code{\link{simobs}} 87 | } 88 | \concept{simulation functions} 89 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | simcausal 0.1 2 | ============== 3 | 4 | * Initial version release to CRAN 5 | 6 | simcausal 0.1.9 7 | ============== 8 | 9 | * Development version update. 10 | 11 | * Added vignette; the node.depr function is no longer supported; updated the documentation for ?simcausal, doLTCF, sim, simobs and simfull, add.action, add.nodes functions; changed examples for node and set.DAG; added .onAttach message; 12 | 13 | simcausal 0.2.0 14 | ============== 15 | 16 | * New CRAN version release. 17 | 18 | * Adding vignette; improved documentation for ?simcausal; changing the documentation for sim, simobs and simfull functions; updated documentation add.nodes/+node and add.action/+action syntax; for node.depr is removed from the user access. 19 | 20 | simcausal 0.3.0 21 | ============== 22 | 23 | * New CRAN version release. 24 | 25 | * Major updates: 26 | * Addition of network() function that allows simulating networks; 27 | * Support for network-based sampling of dependent data. New syntax Variable[[netindx]] allows one to to index Variable values of friends (connections) (assuming a network has been already generated with the network() function); 28 | * User calling environment is now captured by each call to node() function; this is used as an enclosing environment when evaluating node() function arguments; 29 | * Moving to R6 class node call parser; 30 | 31 | * Minor updates: 32 | * Adding global option options(simcausal.verbose) that can be set to FALSE to suppress message printing; default is TRUE; 33 | * Adding network simulation example to /tests/Runit/gen.net.example.R, simulating networks using igraph package; Examples for simulating networks based on igraph ER model; 34 | * Adding conversion utility function to/from sparse adjacency network matrix to simcausal internet network storage; 35 | * Switching from warning to message when replacing/modifying existing DAG node object; 36 | 37 | simcausal 0.4.0 38 | ============== 39 | 40 | * New CRAN version release. 41 | 42 | * Fixing a warning on v.0.3.0 for generic "melt" being exported by data.table and reshape2; 43 | 44 | * Minor updates to the vignette; 45 | 46 | * Fixing an issue with NetIndClass not calling self$make.nF() when outside network matrix is assigned; 47 | 48 | * Making nF (vector of the number of friends across observations) available as a special variable that can be used inside node expressions (as distributional parameters); 49 | 50 | simcausal 0.5.0 51 | ============== 52 | 53 | * Major changes since last version 0.4.0. 54 | 55 | * Added support for time-varying nodes over network: can index friend nodes with syntactic sugar expressions like Node[[F_indx]], even when this Node is a time-varying random variable. For such nodes, start by using the syntactic indexing by t, i.e., Node[t], then add friend indexing with expressions like Node[t][[F_indx]]. See the forthcoming vignette on networks for examples. 56 | 57 | * Added support for latent variables, set.DAG has a new argument v.latent, any node names specified to this argument will be hidden from the simulated data. This allows one to explicitly define errors (U’s) for each node. This will be also plotted differently by plotDAG. 58 | 59 | * set.DAG has a new argument n.test, used for changing the default sample size of the simulation test performed by set.DAG (default n.test=100). Set n.test=0 to skip the DAG object test completely. 60 | 61 | * Node argument EFU can be a logical expression (any function of previously defined node names), which must evaluate to TRUE/FALSE. Right-censoring for a specific observation then occurs only if: 1) The node value evaluated to 1 & 2) EFU evaluates to TRUE 62 | 63 | * Added support for multivariate nodes, such as multivariate normal, copulas, etc. See a separate section on multivariate distributions in ?node. 64 | 65 | * A specially reserved variable name "Nsamp" can be used in any node expression, always evaluates to the currently simulated sample size (a column of constant values). Works similarly to the network-specific reserved variables "Kmax" and "nF" (see ?network). 66 | 67 | * Changed the format of plotDAG output. The internal (endogenous) nodes are no longer surrounded by circles. All latent nodes are surrounded by circles, all children of latent nodes are shown with dashed arrow. 68 | 69 | * Switched internal storage of simulated data to data.table. The output is always a data.frame. 70 | 71 | simcausal 0.5.1 72 | ============== 73 | 74 | * More consistent control over non-standard evaluation. Can wrap any R expression supplied as the node argument in .() or eval() to skip all of simcausal non-standard evaluation and checks, this will evaluate the expression in the environment of the data + user calling environment. The default R subsetting operators for '[...]' and '[[...]]' will be applied to all expressions wrapped in .() and hence the simcausal operators for time-varying nodes and networks will not longer be applied. See a separate section in ?node and examples 7,8 for details. One can apply the operator .() to only part of the expression, to avoid simcausal parsing on that specific part as shown in Example 8 with `.(coefAi[t]) * A[t-1]`. 75 | 76 | * Switching to new naming convention for categorical distributions (rcat.b0, rcat.b1 and rcat.factor). Old rcategor.int and rcategor are still available and are deprecated. 77 | 78 | 79 | simcausal 0.5.3 80 | ============== 81 | 82 | * Fixing a bug in that was identified due to changes to data.table (eval.MSM() was incorrectly checking for NA outcomes) 83 | * Fixing a CRAN error in test on R devel. 84 | 85 | simcausal 0.5.4 86 | ============== 87 | 88 | * Updating the package for new CRAN release 89 | 90 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## First submission of the new version 2 | simcausal 0.5.5 3 | ============== 4 | This update tries to put simcusal back on CRAN. 5 | 6 | ## Test environments: 7 | * local OS X install, R 3.5.2 8 | * ubuntu 12.04 (on travis-ci), R 3.5.2 9 | * R-hub 10 | * win-builder 11 | 12 | ## R CMD check --as-cran results: 13 | * no ERRORs or WARNINGs. 14 | 15 | ## Resubmission 16 | simcausal 0.5.4 17 | ============== 18 | This resubmission fixes one note. The other note is a FALSE POSITIVE. 19 | Note from JSS editor: "The DOI in the CITATION is for a new JSS publication that will be registered after publication on CRAN." 20 | 21 | ## Test environments: 22 | * local OS X install, R 3.3.1 23 | * ubuntu 12.04 (on travis-ci), R 3.3.1 24 | * win-builder 25 | 26 | ## R CMD check --as-cran results: 27 | * no ERRORs or WARNINGs. 28 | * one NOTE: Found the following (possibly) invalid DOIs: 29 | DOI: 10.18637/jss.v081.i02 30 | From: inst/CITATION 31 | Status: Not Found 32 | Message: 404 33 | "The DOI in the CITATION is for a new JSS publication that will be registered after publication on CRAN." 34 | 35 | ## First submission of the new version 36 | simcausal 0.5.4 37 | ============== 38 | This update includes CITATION for the forcoming JSS simcausal publication. Note from JSS editor: "The DOI in the CITATION is for a new JSS publication that will be registered after publication on CRAN."" 39 | 40 | ## Test environments: 41 | * local OS X install, R 3.3.1 42 | * ubuntu 12.04 (on travis-ci), R 3.3.1 43 | * win-builder 44 | 45 | ## R CMD check --as-cran results: 46 | * no ERRORs or WARNINGs. 47 | * one NOTE: "The DOI in the CITATION is for a new JSS publication that will be registered after publication on CRAN." 48 | 49 | ## First submission of the new version 50 | simcausal 0.5.3 51 | ============== 52 | This version fixes a CRAN test error on R devel and a bug identified due to upcoming version of the data.table dependency. 53 | ## Test environments: 54 | 55 | ## Test environments: 56 | * local OS X install, R 3.3.1 57 | * ubuntu 12.04 (on travis-ci), R 3.3.1 58 | * win-builder 59 | 60 | ## R CMD check --as-cran results: 61 | * no ERRORs or WARNINGs. 62 | * no NOTEs. 63 | 64 | ## Resubmission 65 | simcausal 0.5.1 66 | ============== 67 | This is a resubmission. This should fix the reverse dependency fail with the 'tmlenet' package. 68 | 69 | ## First submission of the new version 70 | simcausal 0.5.1 71 | ============== 72 | ## Test environments: 73 | 74 | ## Test environments: 75 | * local OS X install, R 3.2.4 76 | * ubuntu 12.04 (on travis-ci), R 3.2.4 77 | * win-builder (devel and release) 78 | 79 | ## R CMD check --as-cran results: 80 | * no ERRORs or WARNINGs. 81 | * no NOTEs. 82 | 83 | ## Resubmission 84 | simcausal 0.5.0 85 | ============== 86 | This is a resubmission. In this version I have: 87 | 88 | * Fixed the reverse dependency error for 'tmlenet' package 89 | 90 | simcausal 0.5.0 91 | ============== 92 | ## Test environments: 93 | 94 | ## Test environments: 95 | * local OS X install, R 3.2.3 96 | * ubuntu 12.04 (on travis-ci), R 3.2.3 97 | * win-builder (devel and release) 98 | 99 | ## R CMD check --as-cran results: 100 | * no ERRORs or WARNINGs. 101 | * no NOTEs. 102 | 103 | simcausal 0.4.0 104 | ============== 105 | 106 | ## Test environments: 107 | * local OS X install, R 3.2.2 108 | * ubuntu 12.04 (on travis-ci), R 3.2.2 109 | * win-builder (devel and release) 110 | 111 | ## R CMD check --as-cran results: 112 | * no ERRORs or WARNINGs. 113 | * no NOTEs. 114 | 115 | * Adding importFrom("methods", "is") and adding methods to Imports field; 116 | * Fixing a warning on v.0.3.0 for generic "melt" being exported twice by data.table and reshape2; 117 | 118 | simcausal 0.3.0 119 | ============== 120 | 121 | ## Test environments: 122 | * local OS X install, R 3.2.0 123 | * ubuntu 12.04 (on travis-ci), R 3.2.2 124 | * win-builder (devel and release) 125 | 126 | ## R CMD check --as-cran results: 127 | * no ERRORs or WARNINGs. 128 | * no NOTEs. 129 | 130 | * Please note that the R code in the vignette requires a substantial amount of time to run. 131 | 132 | simcausal 0.2.0 133 | ============== 134 | 135 | ## Test environments: 136 | * local OS X install, R 3.2.0 137 | * ubuntu 12.04 (on travis-ci), R 3.2.0 138 | * win-builder (devel and release) 139 | 140 | ## R CMD check --as-cran results: 141 | * no ERRORs or WARNINGs. 142 | * no NOTEs. 143 | 144 | * Please note that the R code in the vignette requires a substantial amount of time to run. 145 | 146 | simcausal 0.1 147 | ============== 148 | 149 | ## Resubmission 3: 150 | This is a third resubmission. In this version I have: 151 | 152 | * Removed \donttest{} sections from two examples in the help manual 153 | * Removed last sentence of the "Description" that referred to the vignette 154 | * Added skeleton NEWS and README.md files 155 | 156 | ## Resubmission 2: 157 | This is a second resubmission. In this version I have: 158 | 159 | * Reformatted all the R code in .Rd examples to be under 100 line width 160 | * Made sure all the checks are passed when running R CMD check --as-cran 161 | 162 | ## Resubmission: 163 | This is a resubmission. In this version I have: 164 | 165 | * Converted the DESCRIPTION title to title case 166 | * Removed . from the title end 167 | * Removed VignetteBuilder line from the DESCRIPTION 168 | * Removed 'simcausal.Rproj' file 169 | * Reformatted all the R code in .Rd examples to be under 100 line width 170 | 171 | ## Initial submission to CRAN: 172 | 173 | ## Test environments: 174 | * local OS X install, R 3.1.3 175 | * win-builder (devel and release) 176 | 177 | ## R CMD check results 178 | There were no ERRORs or WARNINGs. 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /man/set.targetMSM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/targetparam.r 3 | \name{set.targetMSM} 4 | \alias{set.targetMSM} 5 | \title{Define Causal Parameters with a Working Marginal Structural Model (MSM)} 6 | \usage{ 7 | set.targetMSM(DAG, outcome, t, formula, family = "quasibinomial", hazard, 8 | ..., attr = list()) 9 | } 10 | \arguments{ 11 | \item{DAG}{Object specifying the directed acyclic graph (DAG) for the observed data} 12 | 13 | \item{outcome}{Name of the outcome node} 14 | 15 | \item{t}{Vector of time points which are used for pooling the \code{outcome}} 16 | 17 | \item{formula}{MSM formula for modeling pooled outcome on the full data with glm regression. Left hand side should be equal to the \code{outcome}, right hand side can include baseline covariates, action-specific attribute names and time-dependent treatment summary measures. See Details.} 18 | 19 | \item{family}{Model family to use in the \code{glm} regression} 20 | 21 | \item{hazard}{When TRUE MSM fits the discrete hazard function for survival \code{outcome} (if outcome node had \code{EOF=TRUE} attribute)} 22 | 23 | \item{...}{Additional attributes (to be used in future versions)} 24 | 25 | \item{attr}{Additional attributes (to be used in future versions)} 26 | } 27 | \value{ 28 | A modified DAG object with well-defined target parameter saved as part of the DAG, this DAG can now be passed as an argument to \code{eval.target} function for actual Monte-Carlo evaluation of the target parameter. See Examples. 29 | } 30 | \description{ 31 | Set up the MSM causal target parameter for the current DAG object. These settings can be later used to evaluate the true value of the MSM parameter on the full (counterfactual) data by calling \code{eval.target} function. 32 | } 33 | \details{ 34 | Enclosing an MSM formula term inside S(), e.g., S(mean(A[0:t])), forces this term to be evaluated as a summary measure of time-indexed nodes in the full data environment. All such MSM terms are parsed and then evaluated inside the previously simulated full data environment, each S() term is then replaced with a vector name 'XMSMterms.i' that is a result of this evaluation. 35 | } 36 | \examples{ 37 | #--------------------------------------------------------------------------------------- 38 | # DAG with time-varying outcomes (survival outcome) 39 | #--------------------------------------------------------------------------------------- 40 | # Define longitudinal data structure over 6 time-points t=(0:5) 41 | t_end <- 5 42 | D <- DAG.empty() 43 | D <- D + node("L2", t=0, distr="rbern", prob=0.05) 44 | D <- D + node("L1", t=0, distr="rbern", prob=ifelse(L2[0]==1,0.5,0.1)) 45 | D <- D + node("A1", t=0, distr="rbern", prob=ifelse(L1[0]==1 & L2[0]==0, 0.5, 46 | ifelse(L1[0]==0 & L2[0]==0, 0.1, 47 | ifelse(L1[0]==1 & L2[0]==1, 0.9, 0.5)))) 48 | D <- D + node("A2", t=0, distr="rbern", prob=0, order=4, EFU=TRUE) 49 | D <- D + node("Y", t=0, distr="rbern", 50 | prob=plogis(-6.5 + L1[0] + 4*L2[0] + 0.05*I(L2[0]==0)), 51 | EFU=TRUE) 52 | D <- D + node("L2", t=1:t_end, distr="rbern", prob=ifelse(A1[t-1]==1, 0.1, 53 | ifelse(L2[t-1]==1, 0.9, 54 | min(1,0.1 + t/16)))) 55 | D <- D + node("A1", t=1:t_end, distr="rbern", prob=ifelse(A1[t-1]==1, 1, 56 | ifelse(L1[0]==1 & L2[0]==0, 0.3, 57 | ifelse(L1[0]==0 & L2[0]==0, 0.1, 58 | ifelse(L1[0]==1 & L2[0]==1, 0.7, 59 | 0.5))))) 60 | D <- D + node("A2", t=1:t_end, distr="rbern", prob=0, EFU=TRUE) 61 | D <- D + node( "Y", t=1:t_end, distr="rbern", 62 | prob=plogis(-6.5 + L1[0] + 4*L2[t] + 0.05*sum(I(L2[0:t]==rep(0,(t+1))))), 63 | EFU=TRUE) 64 | D <- set.DAG(D) 65 | 66 | # Add two dynamic actions (indexed by values of the parameter theta={0,1}) 67 | # Define intervention nodes 68 | act_t0_theta <- node("A1",t=0, distr="rbern", prob=ifelse(L2[0] >= theta,1,0)) 69 | act_tp_theta <- node("A1",t=1:t_end, distr="rbern", 70 | prob=ifelse(A1[t-1]==1,1,ifelse(L2[t] >= theta,1,0))) 71 | # Add two actions to current DAG object 72 | D <- D + action("A1_th0", nodes=c(act_t0_theta, act_tp_theta), theta=0) 73 | D <- D + action("A1_th1", nodes=c(act_t0_theta, act_tp_theta), theta=1) 74 | 75 | #--------------------------------------------------------------------------------------- 76 | # MSM EXAMPLE 1: Modeling survival over time 77 | #--------------------------------------------------------------------------------------- 78 | # Modeling pooled survival Y_t over time as a projection on the following working 79 | # logistic model: 80 | msm.form <- "Y ~ theta + t + I(theta*t)" 81 | D <- set.targetMSM(D, outcome="Y", t=0:5, formula=msm.form, family="binomial", 82 | hazard=FALSE) 83 | MSMres <- eval.target(D, n=1000) 84 | MSMres$coef 85 | 86 | #--------------------------------------------------------------------------------------- 87 | # MSM EXAMPLE 2: Modeling survival over time with exposure-based summary measures 88 | #--------------------------------------------------------------------------------------- 89 | # Now we want to model Y_t by adding a summary measure covariate defined as mean 90 | # exposure A1 from time 0 to t; 91 | # Enclosing any term inside S() forces its evaluation in the environment 92 | # of the full (counterfactual) data. 93 | msm.form_sum <- "Y ~ theta + t + I(theta*t) + S(mean(A1[0:t]))" 94 | D <- set.targetMSM(D, outcome="Y", t=0:5, formula=msm.form_sum, family="binomial", 95 | hazard=FALSE) 96 | MSMres <- eval.target(D, n=1000) 97 | MSMres$coef 98 | } 99 | \references{ 100 | Sofrygin O, van der Laan MJ, Neugebauer R (2017). 101 | "simcausal R Package: Conducting Transparent and Reproducible Simulation Studies of Causal Effect Estimation with Complex Longitudinal Data." 102 | Journal of Statistical Software, 81(2), 1-47. doi: 10.18637/jss.v081.i02. 103 | } 104 | -------------------------------------------------------------------------------- /man/simcausal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simcausal-package.r 3 | \docType{package} 4 | \name{simcausal} 5 | \alias{simcausal} 6 | \alias{simcausal-package} 7 | \title{Simulating Longitudinal Data with Causal Inference Applications} 8 | \description{ 9 | The \pkg{simcausal} R package is a tool for specification and simulation of complex longitudinal data structures that are 10 | based on structural equation models. The package provides a flexible tool for conducting transparent and reproducible 11 | simulation studies, with a particular emphasis on the types of data and interventions frequently encountered in typical 12 | causal inference problems, such as, observational data with time-dependent confounding, selection bias, and random monitoring processes. 13 | The package interface allows for concise expression of complex functional dependencies between a large number of nodes, 14 | where each node may represent a time-varying random variable. 15 | The package allows for specification and simulation of counterfactual data under various user-specified interventions 16 | (e.g., static, dynamic, deterministic, or stochastic). 17 | In particular, the interventions may represent exposures to treatment regimens, the occurrence or non-occurrence of right-censoring 18 | events, or of clinical monitoring events. \pkg{simcausal} enables the computation of a selected set of user-specified 19 | features of the distribution of the counterfactual data that represent common causal quantities of interest, 20 | such as, treatment-specific means, the average treatment effects and coefficients from working marginal structural models. 21 | For additional details and examples please see the package vignette and the function-specific documentation. 22 | } 23 | \section{Documentation}{ 24 | 25 | \itemize{ 26 | \item To see the package vignette use: \code{vignette("simcausal_vignette", package="simcausal")} 27 | \item To see all available package documentation use: \code{help(package = 'simcausal')} 28 | } 29 | } 30 | 31 | \section{Routines}{ 32 | 33 | The following routines will be generally invoked by a user, in the same order as presented below. 34 | \describe{ 35 | \item{\code{\link{DAG.empty}}}{Initiates an empty \code{DAG} object that contains no nodes.} 36 | \item{\code{\link{node}}}{Defines node(s) in the structural equation model and its conditional distribution(s) using a language of vector-like R expressions. A call to \code{node} can specify either a single node or multiple nodes at once.} 37 | \item{\code{\link{add.nodes}} or \code{+\link{node}}}{Provide two equivalent ways of growing the structural equation model by adding new nodes and their conditional distributions. 38 | Sequentially define nodes in the \code{DAG} object, with each node representing the outcomes of one or more structural equation(s), altogether making-up the causal model of interest.} 39 | \item{\code{\link{set.DAG}}}{Performs consistency checks and locks the \code{DAG} object so that no additional nodes can be subsequently added to the structural equation model.} 40 | \item{\code{\link{sim}} or \code{\link{simobs}}}{Simulates iid observations of the complete node sequence defined by the \code{DAG} object. The output dataset is stored as a \code{data.frame} and is referred to as the \emph{observed data}.} 41 | \item{\code{\link{add.action}} or \code{+\link{action}}}{Provide two equivalent ways to define one or more actions. 42 | Each action modifies the conditional distribution for a subset of nodes in the original \code{DAG} object. The resulting data generating distribution is referred to as the post-intervention distribution. 43 | It is saved in the \code{DAG} object alongside the original structural equation model (\code{DAG} object).} 44 | \item{\code{\link{sim}} or \code{\link{simfull}}}{Simulates independent observations from one or more post-intervention distribution(s). 45 | Produces a named list of \code{data.frame}s, collectively referred to as the \emph{full data}. 46 | The number of output \code{data.frame}s is equal to the number of post-intervention distributions specified in the \code{actions} argument, where each \code{data.frame} object is an iid sample from a particular post-intervention distribution.} 47 | \item{\code{\link{set.targetE}} and \code{\link{set.targetMSM}}}{Define two distinct types of target causal parameters. 48 | The function \code{set.targetE} defines causal parameters as the expected value(s) of \code{DAG} node(s) under one post-intervention distribution or the contrast of such expected value(s) from two post-intervention distributions. 49 | The function \code{set.targetMSM} defines causal parameters based on a user-specified \bold{working} marginal structural model.} 50 | \item{\code{\link{eval.target}}}{Evaluates the previously defined causal parameter using simulated full data} 51 | } 52 | } 53 | 54 | \section{Data structures}{ 55 | 56 | The following most common types of output are produced by the package: 57 | \itemize{ 58 | \item \emph{parameterized causal \code{DAG} model} - object that specifies the structural equation model, along with interventions and the causal target parameter of interest. 59 | \item \emph{observed data} - data simulated from the (pre-intervention) distribution specified by the structural equation model. 60 | \item \emph{full data} - data simulated from one or more post-intervention distributions defined by actions on the structural equation model. 61 | \item \emph{causal target parameter} - the true value of the causal target parameter evaluated with full data. 62 | } 63 | } 64 | 65 | \section{Updates}{ 66 | 67 | Check for updates and report bugs at \url{http://github.com/osofr/simcausal}. 68 | } 69 | 70 | \references{ 71 | Sofrygin O, van der Laan MJ, Neugebauer R (2017). 72 | "simcausal R Package: Conducting Transparent and Reproducible Simulation Studies of Causal Effect Estimation with Complex Longitudinal Data." 73 | Journal of Statistical Software, 81(2), 1-47. doi: 10.18637/jss.v081.i02. 74 | } 75 | -------------------------------------------------------------------------------- /man/set.targetE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/targetparam.r 3 | \name{set.targetE} 4 | \alias{set.targetE} 5 | \title{Define Non-Parametric Causal Parameters} 6 | \usage{ 7 | set.targetE(DAG, outcome, t, param, ..., attr = list()) 8 | } 9 | \arguments{ 10 | \item{DAG}{Object specifying the directed acyclic graph (DAG) for the observed data} 11 | 12 | \item{outcome}{Name of the outcome node} 13 | 14 | \item{t}{Integer vector of time points to use for expectations, has to be omitted or NULL for non-time-varying DAGs.} 15 | 16 | \item{param}{A character vector \code{"ActionName1"}, specifying the action name for the expectation target parameter; 17 | \code{"ActionName1 / ActionName2"}, for the ratio of expectations of \code{outcome} nodes for actions \code{"ActionName1"} 18 | and \code{"ActionName2"}; \code{"ActionName1 - ActionName2"} for the contrast of expectations of \code{outcome} for actions \code{"ActionName1"} and \code{"ActionName2"}} 19 | 20 | \item{...}{Additional attributes (to be used in future versions)} 21 | 22 | \item{attr}{Additional attributes (to be used in future versions)} 23 | } 24 | \value{ 25 | A modified DAG object with the target parameter saved as part of the DAG, 26 | this DAG can now be passed as an argument to \code{\link{eval.target}} function for actual Monte-Carlo evaluation of the target parameter. See Examples. 27 | } 28 | \description{ 29 | Set up the causal target parameter as a vector of expectations, ratio of expectations or contrast of expectations (average treatment effect) over the nodes of specified actions. 30 | These settings are then used to evaluate the true value of the causal target parameter by calling \code{\link{eval.target}} function. 31 | } 32 | \examples{ 33 | #--------------------------------------------------------------------------------------- 34 | # EXAMPLE 1: DAG with single point treatment 35 | #--------------------------------------------------------------------------------------- 36 | # Define a DAG with single-point treatment ("Anode") 37 | D <- DAG.empty() 38 | D <- D + node("W1", distr="rbern", prob=plogis(-0.5)) 39 | D <- D + node("W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 40 | D <- D + node("Anode", distr="rbern", prob=plogis(-0.5 - 0.3*W1 - 0.3*W2)) 41 | D <- D + node("Y", distr="rbern", prob=plogis(-0.1 + 1.2*Anode + 0.3*W1 + 0.3*W2), 42 | EFU=TRUE) 43 | D_WAY <- set.DAG(D) 44 | 45 | # Defining interventions (actions) 46 | # define action "A1" that sets the treatment node to constant 1 47 | D_WAY <- D_WAY + action("A1", nodes=node("Anode",distr="rbern", prob=1)) 48 | # define another action "A0" that sets the treatment node to constant 0 49 | D_WAY <- D_WAY + action("A0", nodes=node("Anode",distr="rbern", prob=0)) 50 | #--------------------------------------------------------------------------------------- 51 | # Defining and calculating causal parameters: 52 | #--------------------------------------------------------------------------------------- 53 | # Counterfactual mean of node "Y" under action "A1" 54 | D_WAY <- set.targetE(D_WAY, outcome="Y", param="A1") 55 | eval.target(D_WAY, n=10000) 56 | 57 | # Contrasts of means of "Y" under action "A1" minus action "A0" 58 | D_WAY <- set.targetE(D_WAY, outcome="Y", param="A1-A0") 59 | eval.target(D_WAY, n=10000) 60 | 61 | # Ratios of "Y" under action "A1" over action "A0" 62 | D_WAY <- set.targetE(D_WAY, outcome="Y", param="A1/A0") 63 | eval.target(D_WAY, n=10000) 64 | 65 | # Alternative parameter evaluation by passing already simulated full data to 66 | # \\code{eval.target} 67 | X_dat1 <- simfull(A(D_WAY), n=10000) 68 | D_WAY <- set.targetE(D_WAY, outcome="Y", param="A1/A0") 69 | eval.target(D_WAY, data=X_dat1) 70 | 71 | #--------------------------------------------------------------------------------------- 72 | # EXAMPLE 2: DAG with time-varying outcomes (survival outcome) 73 | #--------------------------------------------------------------------------------------- 74 | # Define longitudinal data structure over 6 time-points t=(0:5) 75 | t_end <- 5 76 | D <- DAG.empty() 77 | D <- D + node("L2", t=0, distr="rbern", prob=0.05) 78 | D <- D + node("L1", t=0, distr="rbern", prob=ifelse(L2[0]==1,0.5,0.1)) 79 | D <- D + node("A1", t=0, distr="rbern", prob=ifelse(L1[0]==1 & L2[0]==0, 0.5, 80 | ifelse(L1[0]==0 & L2[0]==0, 0.1, 81 | ifelse(L1[0]==1 & L2[0]==1, 0.9, 0.5)))) 82 | D <- D + node("A2", t=0, distr="rbern", prob=0, order=4, EFU=TRUE) 83 | D <- D + node("Y", t=0, distr="rbern", 84 | prob=plogis(-6.5 + L1[0] + 4*L2[0] + 0.05*I(L2[0]==0)), 85 | EFU=TRUE) 86 | D <- D + node("L2", t=1:t_end, distr="rbern", prob=ifelse(A1[t-1]==1, 0.1, 87 | ifelse(L2[t-1]==1, 0.9, 88 | min(1,0.1 + t/16)))) 89 | D <- D + node("A1", t=1:t_end, distr="rbern", prob=ifelse(A1[t-1]==1, 1, 90 | ifelse(L1[0]==1 & L2[0]==0, 0.3, 91 | ifelse(L1[0]==0 & L2[0]==0, 0.1, 92 | ifelse(L1[0]==1 & L2[0]==1, 0.7, 93 | 0.5))))) 94 | D <- D + node("A2", t=1:t_end, distr="rbern", prob=0, EFU=TRUE) 95 | D <- D + node("Y", t=1:t_end, distr="rbern", 96 | prob=plogis(-6.5 + L1[0] + 4*L2[t] + 0.05*sum(I(L2[0:t]==rep(0,(t+1))))), 97 | EFU=TRUE) 98 | D <- set.DAG(D) 99 | 100 | # Add two dynamic actions (indexed by values of the parameter theta={0,1}) 101 | # Define intervention nodes 102 | act_t0_theta <- node("A1",t=0, distr="rbern", prob=ifelse(L2[0] >= theta,1,0)) 103 | act_tp_theta <- node("A1",t=1:t_end, distr="rbern", 104 | prob=ifelse(A1[t-1]==1,1,ifelse(L2[t] >= theta,1,0))) 105 | # Add two actions to current DAG object 106 | D <- D + action("A1_th0", nodes=c(act_t0_theta, act_tp_theta), theta=0) 107 | D <- D + action("A1_th1", nodes=c(act_t0_theta, act_tp_theta), theta=1) 108 | #--------------------------------------------------------------------------------------- 109 | # Defining and calculating the target parameter 110 | #--------------------------------------------------------------------------------------- 111 | # Counterfactual mean of node "Y" at time-point t=4 under action "A1_th0" 112 | D <- set.targetE(D, outcome="Y", t=4, param="A1_th0") 113 | eval.target(D, n=5000) 114 | 115 | # Vector of counterfactual means of"Y" over all time points under action "A1_th1" 116 | D <- set.targetE(D, outcome="Y", t=0:5, param="A1_th1") 117 | eval.target(D, n=5000) 118 | 119 | # Vector of counterfactual contrasts of "Y" over all time points 120 | # for action "A1_th1" minus action "A1_th0" 121 | D <- set.targetE(D, outcome="Y", t=0:5, param="A1_th1 - A1_th0") 122 | eval.target(D, n=5000) 123 | 124 | # Vector of counterfactual ratios of "Y" over all time points 125 | # for action "A1_th0" over action "A1_th1" 126 | D <- set.targetE(D, outcome="Y", t=0:5, param="A1_th0 / A1_th1") 127 | eval.target(D, n=5000) 128 | } 129 | \references{ 130 | Sofrygin O, van der Laan MJ, Neugebauer R (2017). 131 | "simcausal R Package: Conducting Transparent and Reproducible Simulation Studies of Causal Effect Estimation with Complex Longitudinal Data." 132 | Journal of Statistical Software, 81(2), 1-47. doi: 10.18637/jss.v081.i02. 133 | } 134 | -------------------------------------------------------------------------------- /man/add.action.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/interface.r 3 | \name{add.action} 4 | \alias{add.action} 5 | \alias{action} 6 | \title{Define and Add Actions (Interventions)} 7 | \usage{ 8 | add.action(DAG, name, nodes, ..., attr = list()) 9 | 10 | action(...) 11 | } 12 | \arguments{ 13 | \item{DAG}{DAG object} 14 | 15 | \item{name}{Unique name of the action} 16 | 17 | \item{nodes}{A list of node objects that defines the action on the DAG (replaces the distributions of the corresponding nodes in DAG)} 18 | 19 | \item{...}{Additional named attributes defining / indexing the action} 20 | 21 | \item{attr}{Additional named attributes defining / indexing the action} 22 | } 23 | \value{ 24 | A modified \code{DAG} object with the added action 25 | } 26 | \description{ 27 | Define and add new action (intervention) to the existing DAG object. Use either syntax \code{DAG +} \code{action(name = ,nodes = )} or \code{add.action((DAG = ,name = ,nodes = )}. Both give identical results, see the examples in the vignette and below for details. 28 | } 29 | \details{ 30 | In addition to the action name and list of action nodes, both of these functions accept arbitrary named attributes (as additional arguments which must be given a name). 31 | This additional attributes can be used to simplify specification of dynamic regimes (actions that depend on the past observed covariates). 32 | 33 | The formula of the intervention node is allowed to contain undefined variables, as long as those are later defined as a named argument to \code{action}. 34 | 35 | In Example 2 below, \code{node("A",..., mean = ifelse(W1 >= theta, 1, 0))}, 36 | defines the mean of the node "A" as a function of some undefined variable \code{theta}, setting \code{A} to 1 if the baseline node \code{W1} is above or equal to \code{theta} and 0 vice versa. 37 | One specifies actual values of \code{theta} while defining a new action, possible creating a series of actions, each indexed by a different value of \code{theta}. 38 | A new action can be defined with \code{D<-D+action("A1th0.1", nodes=actN, theta=0.1)}. 39 | 40 | Note that any name can be used in place of \code{theta}. This attribute variable can appear anywhere inside the node distribution formula. 41 | Finally, the attribute variable can also be time varying and, just like with DAG nodes, can be indexed by square bracket notation, \code{theta[t]}. See Example 3 for defining time-varying attributes. 42 | } 43 | \examples{ 44 | #--------------------------------------------------------------------------------------- 45 | # EXAMPLE 1: Showing two equivalent ways of defining an action for a simple DAG 46 | #--------------------------------------------------------------------------------------- 47 | 48 | D <- DAG.empty() 49 | D <- D + node(name="W1", distr="rbern", prob=plogis(-0.5)) 50 | D <- D + node(name="W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 51 | D <- D + node(name="A", distr="rbern", prob=plogis(-0.5 + 0.5*W1+ 0.5*W2)) 52 | Dset <- set.DAG(D) 53 | 54 | # Syntax '+ action': define two actions, intervening on node "A", imputing order 55 | Dset <- Dset + action("A0", nodes=node("A", distr="rbern", prob=0)) 56 | Dset <- Dset + action("A1", nodes=node("A", distr="rbern", prob=1)) 57 | 58 | # Equivalent syntax 'add.action': define two actions, intervening on node "A" 59 | Dset <- add.action(Dset, "A0", nodes=node("A", distr="rbern", prob=0)) 60 | Dset <- add.action(Dset, "A1", nodes=node("A", distr="rbern", prob=1)) 61 | 62 | #--------------------------------------------------------------------------------------- 63 | # EXAMPLE 2: Adding named attributes that define (index) the action. 64 | # Define intervention on A that is conditional on W1 crossing some threshold theta 65 | #--------------------------------------------------------------------------------------- 66 | 67 | # Redefining node W1 as uniform [0,1] 68 | D <- DAG.empty() 69 | D <- D + node(name="W1", distr="runif", min=0, max=1) 70 | D <- D + node(name="W2", distr="rbern", prob=plogis(-0.5 + 0.5*W1)) 71 | D <- D + node(name="A", distr="rbern", prob=plogis(-0.5 + 0.5*W1+ 0.5*W2)) 72 | Dset <- set.DAG(D) 73 | 74 | # Define a node that is indexed by unknown variable theta 75 | actN <- node("A",distr="rbern",prob=ifelse(W1 >= theta,1,0)) 76 | # Define 3 actions for theta=0.1, 0.5, 0.9 77 | Dset <- Dset + action("A1th0.1", nodes = actN, theta = 0.1) 78 | Dset <- Dset + action("A1th0.5", nodes = actN, theta = 0.5) 79 | Dset <- Dset + action("A1th0.9", nodes = actN, theta = 0.9) 80 | 81 | # Simulate 50 observations per each action above 82 | simfull(A(Dset), n=50) 83 | 84 | #--------------------------------------------------------------------------------------- 85 | # EXAMPLE 3: Time-varying action attributes for longitudinal DAG 86 | #--------------------------------------------------------------------------------------- 87 | # Define longitudinal data structure over 6 time-points t=(0:5) with survival outcome "Y" 88 | t_end <- 5 89 | D <- DAG.empty() 90 | D <- D + node("L2", t=0, distr="rbern", prob=0.05) 91 | D <- D + node("L1", t=0, distr="rbern", prob=ifelse(L2[0]==1,0.5,0.1)) 92 | D <- D + node("A1", t=0, distr="rbern", prob=ifelse(L1[0]==1, 0.5, 0.1)) 93 | D <- D + node("Y", t=0, distr="rbern", 94 | prob=plogis(-6.5 + L1[0] + 4*L2[0] + 0.05*I(L2[0]==0)), EFU=TRUE) 95 | D <- D + node("L2", t=1:t_end, distr="rbern", prob=ifelse(A1[t-1]==1, 0.1, 0.9)) 96 | D <- D + node("A1", t=1:t_end, distr="rbern", 97 | prob=ifelse(A1[t-1]==1, 1, ifelse(L1[0]==1 & L2[0]==0, 0.3, 0.5))) 98 | D <- D + node("Y", t=1:t_end, distr="rbern", prob=plogis(-6.5+L1[0]+4*L2[t]), EFU=TRUE) 99 | D <- set.DAG(D) 100 | 101 | #--------------------------------------------------------------------------------------- 102 | # Dynamic actions indexed by constant value of parameter theta={0,1}) 103 | #--------------------------------------------------------------------------------------- 104 | # Define time-varying node A1: sets A1 to 1 if L2 at t is >= theta 105 | actN_A1 <- node("A1",t=0:t_end, distr="rbern", prob=ifelse(L2[t] >= theta,1,0)) 106 | 107 | # Define two actions, indexed by fixed values of theta={0,1} 108 | D_act <- D + action("A1_th0", nodes=actN_A1, theta=0) 109 | D_act <- D_act + action("A1_th1", nodes=actN_A1, theta=1) 110 | 111 | # Simulate 50 observations for per each action above 112 | simfull(simcausal::A(D_act), n=50) 113 | 114 | #--------------------------------------------------------------------------------------- 115 | # Dynamic actions indexed by time-varying parameter theta[t] 116 | #--------------------------------------------------------------------------------------- 117 | # This defines an action node with threshold theta varying in time (note syntax theta[t]) 118 | actN_A1 <- node("A1",t=0:t_end, distr="rbern", prob=ifelse(L2[t] >= theta[t],1,0)) 119 | 120 | # Now define 3 actions that are indexed by various values of theta over time 121 | D_act <- D + action("A1_th_const0", nodes=actN_A1, theta=rep(0,(t_end+1))) 122 | D_act <- D_act + action("A1_th_var1", nodes=actN_A1, theta=c(0,0,0,1,1,1)) 123 | D_act <- D_act + action("A1_th_var2", nodes=actN_A1, theta=c(0,1,1,1,1,1)) 124 | 125 | # Simulate 50 observations for per each action above 126 | simfull(simcausal::A(D_act), n=50) 127 | } 128 | -------------------------------------------------------------------------------- /tests/examples/example.simnets.R: -------------------------------------------------------------------------------- 1 | #-------------------------------------------------------------------------------------------------- 2 | # EXAMPLE 1. USING igraph R PACKAGE TO SIMULATE NETWORKS 3 | #-------------------------------------------------------------------------------------------------- 4 | 5 | #-------------------------------------------------------------------------------------------------- 6 | # Example of a network sampler, will be provided as "netfun" argument to network(, netfun=); 7 | # Generates a random graph according to the G(n,m) Erdos-Renyi model using the igraph package; 8 | # Returns (n,Kmax) matrix of net IDs (friends) by row; 9 | # Row i contains the IDs (row numbers) of i's friends; 10 | # i's friends are assumed connected to i and can influence i in equations defined by node()) 11 | # When i has less than Kmax friends, the remaining i row entries are filled with NAs; 12 | # Argument m_pn: > 0 13 | # a total number of edges in the network as a fraction (or multiplier) of n (sample size) 14 | #-------------------------------------------------------------------------------------------------- 15 | gen.ER <- function(n, m_pn, ...) { 16 | m <- as.integer(m_pn*n) 17 | if (n<=10) m <- 20 18 | igraph.ER <- igraph::sample_gnm(n = n, m = m, directed = TRUE) 19 | sparse_AdjMat <- igraph.to.sparseAdjMat(igraph.ER) 20 | NetInd_out <- sparseAdjMat.to.NetInd(sparse_AdjMat) 21 | return(NetInd_out$NetInd_k) 22 | } 23 | 24 | D <- DAG.empty() 25 | # Sample ER model network using igraph::sample_gnm with m_pn argument: 26 | D <- D + network("ER.net", netfun = "gen.ER", m_pn = 50) 27 | # W1 - categorical (6 categories, 1-6): 28 | D <- D + 29 | node("W1", distr = "rcat.b1", 30 | probs = c(0.0494, 0.1823, 0.2806, 0.2680, 0.1651, 0.0546)) + 31 | # W2 - binary infection status, positively correlated with W1: 32 | node("W2", distr = "rbern", prob = plogis(-0.2 + W1/3)) + 33 | # W3 - binary confounder: 34 | node("W3", distr = "rbern", prob = 0.6) 35 | # A[i] is a function W1[i] and the total of i's friends values W1, W2 and W3: 36 | D <- D + node("A", distr = "rbern", 37 | prob = plogis(2 + -0.5 * W1 + 38 | -0.1 * sum(W1[[1:Kmax]]) + 39 | -0.4 * sum(W2[[1:Kmax]]) + 40 | -0.7 * sum(W3[[1:Kmax]])), 41 | replaceNAw0 = TRUE) 42 | # Y[i] is a function of netW3 (friends of i W3 values) and the total N of i's friends 43 | # who are infected AND untreated: 44 | D <- D + node("Y", distr = "rbern", 45 | prob = plogis(-1 + 2 * sum(W2[[1:Kmax]] * (1 - A[[1:Kmax]])) + 46 | -2 * sum(W3[[1:Kmax]]) 47 | ), 48 | replaceNAw0 = TRUE) 49 | # Can add N untreated friends to the above outcome Y equation: sum(1 - A[[1:Kmax]]): 50 | D <- D + node("Y", distr = "rbern", 51 | prob = plogis(-1 + 1.5 * sum(W2[[1:Kmax]] * (1 - A[[1:Kmax]])) + 52 | -2 * sum(W3[[1:Kmax]]) + 53 | 0.25 * sum(1 - A[[1:Kmax]]) 54 | ), 55 | replaceNAw0 = TRUE) 56 | # Can add N infected friends at baseline to the above outcome Y equation: sum(W2[[1:Kmax]]): 57 | D <- D + node("Y", distr = "rbern", 58 | prob = plogis(-1 + 1 * sum(W2[[1:Kmax]] * (1 - A[[1:Kmax]])) + 59 | -2 * sum(W3[[1:Kmax]]) + 60 | 0.25 * sum(1 - A[[1:Kmax]]) + 61 | 0.25 * sum(W2[[1:Kmax]]) 62 | ), 63 | replaceNAw0 = TRUE) 64 | Dset <- set.DAG(D, n.test = 100) 65 | # Simulating data from the above sem: 66 | datnet <- sim(Dset, n = 1000, rndseed = 543) 67 | head(datnet) 68 | # Obtaining the network object from simulated data: 69 | net_object <- attributes(datnet)$netind_cl 70 | # Max number of friends: 71 | net_object$Kmax 72 | # Network matrix 73 | head(attributes(datnet)$netind_cl$NetInd) 74 | 75 | #-------------------------------------------------------------------------------------------------- 76 | # EXAMPLE 2. USING CUSTOM NETWORK GENERATING FUNCTION 77 | #-------------------------------------------------------------------------------------------------- 78 | 79 | #-------------------------------------------------------------------------------------------------- 80 | # Example of a user-defined network sampler(s) function 81 | # Arguments K, bslVar[i] (W1) & nF are evaluated in the environment of the simulated data then 82 | # passed to genNET() function 83 | # - K: maximum number of friends for any unit 84 | # - bslVar[i]: used for contructing weights for the probability of selecting i as 85 | # someone else's friend (weighted sampling), when missing the sampling goes to uniform 86 | # - nF[i]: total number of friends that need to be sampled for observation i 87 | #-------------------------------------------------------------------------------------------------- 88 | genNET <- function(n, K, bslVar, nF, ...) { 89 | prob_F <- plogis(-4.5 + 2.5*c(1:K)/2) / sum(plogis(-4.5 + 2.5*c(1:K)/2)) 90 | NetInd_k <- matrix(NA_integer_, nrow = n, ncol = K) 91 | nFriendTot <- rep(0L, n) 92 | for (index in (1:n)) { 93 | FriendSampSet <- setdiff(c(1:n), index) 94 | nFriendSamp <- max(nF[index] - nFriendTot[index], 0L) 95 | if (nFriendSamp > 0) { 96 | if (length(FriendSampSet) == 1) { 97 | friends_i <- FriendSampSet 98 | } else { 99 | friends_i <- sort(sample(FriendSampSet, size = nFriendSamp, 100 | prob = prob_F[bslVar[FriendSampSet] + 1])) 101 | } 102 | NetInd_k[index, ] <- c(as.integer(friends_i), 103 | rep_len(NA_integer_, K - length(friends_i))) 104 | nFriendTot[index] <- nFriendTot[index] + nFriendSamp 105 | } 106 | } 107 | return(NetInd_k) 108 | } 109 | 110 | D <- DAG.empty() 111 | D <- D + 112 | # W1 - categorical or continuous confounder (5 categories, 0-4): 113 | node("W1", distr = "rcat.b0", 114 | probs = c(0.0494, 0.1823, 0.2806, 0.2680, 0.1651, 0.0546)) + 115 | # W2 - binary infection status at t=0, positively correlated with W1: 116 | node("W2", distr = "rbern", prob = plogis(-0.2 + W1/3)) + 117 | # W3 - binary confounder: 118 | node("W3", distr = "rbern", prob = 0.6) 119 | 120 | # def.nF: total number of friends for each i (0-K), each def.nF[i] is influenced by categorical W1 121 | K <- 10 122 | set.seed(12345) 123 | normprob <- function(x) x / sum(x) 124 | p_nF_W1_mat <- apply(matrix(runif((K+1)*6), ncol = 6, nrow = (K+1)), 2, normprob) 125 | colnames(p_nF_W1_mat) <- paste0("p_nF_W1_", c(0:5)) 126 | create_probs_nF <- function(W1) t(p_nF_W1_mat[,W1+1]) 127 | vecfun.add("create_probs_nF") 128 | D <- D + node("def.nF", distr = "rcat.b0", probs = create_probs_nF(W1)) 129 | 130 | # Adding the network generator that depends on nF and categorical W1: 131 | D <- D + network(name="net.custom", netfun = "genNET", K = K, bslVar = W1, nF = def.nF) 132 | # Define A[i] is a function W1[i] as well as the total sum of i's friends values for W1, W2 and W3: 133 | D <- D + node("A", distr = "rbern", 134 | prob = plogis(2 + -0.5 * W1 + 135 | -0.1 * sum(W1[[1:Kmax]]) + 136 | -0.4 * sum(W2[[1:Kmax]]) + 137 | -0.7 * sum(W3[[1:Kmax]])), 138 | replaceNAw0 = TRUE) 139 | # Y[i] is a the total N of i's friends who are infected AND untreated 140 | # + a function of friends W3 values 141 | D <- D + node("pYRisk", distr = "rconst", 142 | const = plogis(-1 + 2 * sum(W2[[1:Kmax]] * (1 - A[[1:Kmax]])) + 143 | -1.5 * sum(W3[[1:Kmax]])), 144 | replaceNAw0 = TRUE) 145 | 146 | D <- D + node("Y", distr = "rbern", prob = pYRisk) 147 | Dset <- set.DAG(D, n.test = 100) 148 | 149 | # Simulating data from the above sem: 150 | datnet <- sim(Dset, n = 1000, rndseed = 543) 151 | head(datnet, 10) 152 | # Obtaining the network object from simulated data: 153 | net_object <- attributes(datnet)$netind_cl 154 | # Max number of friends: 155 | net_object$Kmax 156 | # Network matrix 157 | head(attributes(datnet)$netind_cl$NetInd) 158 | plotDAG(Dset) 159 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | simcausal 2 | ========== 3 | 4 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/simcausal)](https://CRAN.R-project.org/package=simcausal) 5 | [![](http://cranlogs.r-pkg.org/badges/simcausal)](https://CRAN.R-project.org/package=simcausal) [![](http://cranlogs.r-pkg.org/badges/grand-total/simcausal)](https://CRAN.R-project.org/package=simcausal) 6 | [![Travis-CI Build Status](https://travis-ci.org/osofr/simcausal.svg?branch=master)](https://travis-ci.org/osofr/simcausal) 7 | [![Coverage Status](https://coveralls.io/repos/osofr/simcausal/badge.svg?branch=master&service=github)](https://coveralls.io/github/osofr/simcausal?branch=master) 8 | 9 | The `simcausal` R package is a tool for specification and simulation of complex longitudinal data structures that are based on structural equation models (SEMs). The emphasis is on the types of simulations frequently encountered in causal inference problems, such as, observational data with time-dependent confounding, selection bias, and random monitoring processes. The interface allows for quick expression of dependencies between a large number of time-varying nodes. 10 | 11 | ### Installation 12 | 13 | To install the CRAN release version of `simcausal`: 14 | 15 | ```R 16 | install.packages('simcausal') 17 | ``` 18 | 19 | To install the development version (requires the `devtools` package): 20 | 21 | ```R 22 | devtools::install_github('osofr/simcausal', build_vignettes = FALSE) 23 | ``` 24 | 25 | ### Documentation 26 | 27 | Once the package is installed, see the [vignette](https://CRAN.R-project.org/package=simcausal/vignettes/simcausal_vignette.pdf), consult the internal package documentation and examples. 28 | 29 | * To see the vignette in R: 30 | 31 | ```R 32 | vignette("simcausal_vignette", package="simcausal") 33 | ``` 34 | 35 | * To see all available package documentation: 36 | 37 | ```R 38 | ?simcausal 39 | help(package = 'simcausal') 40 | ``` 41 | 42 | * To see the latest updates for the currently installed version of the package: 43 | 44 | ```r 45 | news(package = "simcausal") 46 | ``` 47 | 48 | ### Brief overview 49 | 50 | Below is an example simulating data with 4 covariates specified by 4 structural equations (nodes). New equations are added by using successive calls to `+ node()` function and data are simulated by calling `sim` function: 51 | 52 | ```R 53 | library("simcausal") 54 | D <- DAG.empty() + 55 | node("CVD", distr="rcat.b1", probs = c(0.5, 0.25, 0.25)) + 56 | node("A1C", distr="rnorm", mean = 5 + (CVD > 1)*10 + (CVD > 2)*5) + 57 | node("TI", distr="rbern", prob = plogis(-0.5 - 0.3*CVD + 0.2*A1C)) + 58 | node("Y", distr="rbern", prob = plogis(-3 + 1.2*TI + 0.1*CVD + 0.3*A1C)) 59 | D <- set.DAG(D) 60 | dat <- sim(D,n=200) 61 | ``` 62 | 63 | To display the above SEM object as a directed acyclic graph: 64 | 65 | ```R 66 | plotDAG(D) 67 | ``` 68 | 69 | To allow the above nodes `A1C`, `TI` and `Y` to change over time, for time points t = 0,...,7, and keeping `CVD` the same, simply add `t` argument to `node` function and use the square bracket `[...]` vector indexing to reference time-varying nodes inside the `node` function expressions: 70 | 71 | ```R 72 | library("simcausal") 73 | D <- DAG.empty() + 74 | node("CVD", distr="rcat.b1", probs = c(0.5, 0.25, 0.25)) + 75 | node("A1C", t=0, distr="rnorm", mean=5 + (CVD > 1)*10 + (CVD > 2)*5) + 76 | node("TI", t=0, distr="rbern", prob=plogis(-5 - 0.3*CVD + 0.5*A1C[t])) + 77 | 78 | node("A1C", t=1:7, distr="rnorm", mean=-TI[t-1]*10 + 5 + (CVD > 1)*10 + (CVD > 2)*5) + 79 | node("TI", t=1:7, distr="rbern", prob=plogis(-5 - 0.3*CVD + 0.5*A1C[t] + 1.5*TI[t-1])) + 80 | node("Y", t=0:7, distr="rbern", prob=plogis(-6 - 1.2*TI[t] + 0.1*CVD + 0.3*A1C[t]), EFU=TRUE) 81 | D <- set.DAG(D) 82 | dat.long <- sim(D,n=200) 83 | ``` 84 | 85 | The `+ action` function allows defining counterfactual data under various interventions (e.g., static, dynamic, deterministic, or stochastic), which can be then simulated by calling `sim` function. In particular, the interventions may represent exposures to treatment regimens, the occurrence or non-occurrence of right-censoring events, or of clinical monitoring events. 86 | 87 | In addition, the functions `set.targetE`, `set.targetMSM` and `eval.target` provide tools for defining and computing a few selected features of the distribution of the counterfactual data that represent common causal quantities of interest, such as, treatment-specific means, the average treatment effects and coefficients from working marginal structural models. 88 | 89 | 90 | ### Using networks in SEMs 91 | 92 | Function `network` provies support for networks simulations, in particular it enables defining and simulating SEM for dependent data. For example, a network sampling function like `rnet.gnm` (provided by the package, see `?rnet.gnm`) can be used to specify and simulate dependent data from a network-based SEM. Start defining a SEM that uses the this network, with a `+network` syntax and providing "`rnet.gnm`" as a "`netfun`" argument to `network` function: 93 | 94 | ```R 95 | library("simcausal") 96 | library("magrittr") 97 | D <- DAG.empty() + network("ER.net", netfun = "rnet.gnm", m_pn = 50) 98 | ``` 99 | 100 | First define two IDD nodes `W1` (categorical) and `W2` (Bernoulli): 101 | 102 | ```R 103 | D <- D + 104 | node("W1", distr = "rcat.b1", probs = c(0.0494, 0.1823, 0.2806, 0.2680, 0.1651, 0.0546)) + 105 | node("W2", distr = "rbern", prob = plogis(-0.2 + W1/3)) 106 | ``` 107 | 108 | New nodes (structural equations) can now be specified conditional on the past node values of observations connected to each unit `i` (*friends* of `i`). The friends are defined by the network matrix that is returned by the above network generator `rnet.gnm`. Double square bracket syntax "`[[...]]`" allows referencing the node values of connected friends. Two special variables, "`Kmax`" and "`nF`" can be used along-side indexing "`[[...]]`". `Kmax` defines the maximal number of friends (maximal friend index) for all observation. When `kth` friend referenced in "`Var[[k]]`" doesn't exist, the default is to set that value to "`NA`". Adding the argument "`replaceNAw0=TRUE`" to `node` function changes such values from `NA` to `0`. `nF` is another special variable, which is a vector of length `n` and each `nF[i]` is equal to the current number of friends for unit `i`. Any kind of summary function that can be applied to multiple time-varying nodes can be similarly applied to network-indexed nodes. For additional details, see the package documentation for the network function (`?network`) and the package vignette on conducting network simulations. 109 | 110 | Define network variable "`netW1`" as the `W1` values of the first friend and define binary exposure "`A`" so that probability of success for each unit 'i' for `A` is a logit-linear function of: 111 | 1. `W1[i]`, 112 | 2. Sum of `W1` values among all friends of `i`, 113 | 3. Mean value of `W2` among all friends of `i`. 114 | 115 | ```R 116 | dat.net <- { 117 | D + node("netW1.F1", distr = "rconst", const = W1[[1]]) + 118 | node("A", distr = "rbern", 119 | prob = plogis(2 + -0.5 * W1 + 120 | -0.1 * sum(W1[[1:Kmax]]) + 121 | -0.7 * ifelse(nF > 0, sum(W2[[1:Kmax]])/nF, 0)), 122 | replaceNAw0 = TRUE)} %>% 123 | set.DAG() %>% 124 | sim(n=1000) 125 | ``` 126 | 127 | The simulated data frame returned by `sim()` also contains the simulated network object, saved as a separate attribute. The network is saved as an `R6` object of class `NetIndClass`, under attribute called "`netind_cl`". The field "`NetInd`" contains the network matrix, the field "`Kmax`" contains the maximum number of friends (number of columns in `NetInd`) and the field "`nF`" contains the vector for total number of friends for each observation (see `?NetIndClass` for more information). 128 | 129 | ```{r} 130 | (Kmax <- attributes(dat.net)$netind_cl$Kmax) 131 | NetInd_mat <- attributes(dat.net)$netind_cl$NetInd 132 | head(NetInd_mat) 133 | nF <- attributes(dat.net)$netind_cl$nF 134 | head(nF) 135 | ``` 136 | 137 | ### Citation 138 | To cite `simcausal` in publications, please use: 139 | > Sofrygin O, van der Laan MJ, Neugebauer R (2015). *simcausal: Simulating Longitudinal Data with Causal Inference Applications.* R package version 0.5. 140 | 141 | ### Funding 142 | The development of this package was partially funded through internal operational funds provided by the Kaiser Permanente Center for Effectiveness & Safety Research (CESR). This work was also partially supported through a Patient-Centered Outcomes Research Institute (PCORI) Award (ME-1403-12506) and an NIH grant (R01 AI074345-07). 143 | 144 | ### Copyright 145 | This software is distributed under the GPL-2 license. 146 | -------------------------------------------------------------------------------- /R/simcausal-package.r: -------------------------------------------------------------------------------- 1 | #' Simulating Longitudinal Data with Causal Inference Applications 2 | #' 3 | #' The \pkg{simcausal} R package is a tool for specification and simulation of complex longitudinal data structures that are 4 | #' based on structural equation models. The package provides a flexible tool for conducting transparent and reproducible 5 | #' simulation studies, with a particular emphasis on the types of data and interventions frequently encountered in typical 6 | #' causal inference problems, such as, observational data with time-dependent confounding, selection bias, and random monitoring processes. 7 | #' The package interface allows for concise expression of complex functional dependencies between a large number of nodes, 8 | #' where each node may represent a time-varying random variable. 9 | #' The package allows for specification and simulation of counterfactual data under various user-specified interventions 10 | #' (e.g., static, dynamic, deterministic, or stochastic). 11 | #' In particular, the interventions may represent exposures to treatment regimens, the occurrence or non-occurrence of right-censoring 12 | #' events, or of clinical monitoring events. \pkg{simcausal} enables the computation of a selected set of user-specified 13 | #' features of the distribution of the counterfactual data that represent common causal quantities of interest, 14 | #' such as, treatment-specific means, the average treatment effects and coefficients from working marginal structural models. 15 | #' For additional details and examples please see the package vignette and the function-specific documentation. 16 | #' 17 | #' @section Documentation: 18 | #' \itemize{ 19 | #' \item To see the package vignette use: \code{vignette("simcausal_vignette", package="simcausal")} 20 | #' \item To see all available package documentation use: \code{help(package = 'simcausal')} 21 | #' } 22 | #' 23 | #' @section Routines: 24 | #' The following routines will be generally invoked by a user, in the same order as presented below. 25 | #' \describe{ 26 | #' \item{\code{\link{DAG.empty}}}{Initiates an empty \code{DAG} object that contains no nodes.} 27 | #' \item{\code{\link{node}}}{Defines node(s) in the structural equation model and its conditional distribution(s) using a language of vector-like R expressions. A call to \code{node} can specify either a single node or multiple nodes at once.} 28 | #' \item{\code{\link{add.nodes}} or \code{+\link{node}}}{Provide two equivalent ways of growing the structural equation model by adding new nodes and their conditional distributions. 29 | #' Sequentially define nodes in the \code{DAG} object, with each node representing the outcomes of one or more structural equation(s), altogether making-up the causal model of interest.} 30 | #' \item{\code{\link{set.DAG}}}{Performs consistency checks and locks the \code{DAG} object so that no additional nodes can be subsequently added to the structural equation model.} 31 | #' \item{\code{\link{sim}} or \code{\link{simobs}}}{Simulates iid observations of the complete node sequence defined by the \code{DAG} object. The output dataset is stored as a \code{data.frame} and is referred to as the \emph{observed data}.} 32 | #' \item{\code{\link{add.action}} or \code{+\link{action}}}{Provide two equivalent ways to define one or more actions. 33 | #' Each action modifies the conditional distribution for a subset of nodes in the original \code{DAG} object. The resulting data generating distribution is referred to as the post-intervention distribution. 34 | #' It is saved in the \code{DAG} object alongside the original structural equation model (\code{DAG} object).} 35 | #' \item{\code{\link{sim}} or \code{\link{simfull}}}{Simulates independent observations from one or more post-intervention distribution(s). 36 | #' Produces a named list of \code{data.frame}s, collectively referred to as the \emph{full data}. 37 | #' The number of output \code{data.frame}s is equal to the number of post-intervention distributions specified in the \code{actions} argument, where each \code{data.frame} object is an iid sample from a particular post-intervention distribution.} 38 | #' \item{\code{\link{set.targetE}} and \code{\link{set.targetMSM}}}{Define two distinct types of target causal parameters. 39 | #' The function \code{set.targetE} defines causal parameters as the expected value(s) of \code{DAG} node(s) under one post-intervention distribution or the contrast of such expected value(s) from two post-intervention distributions. 40 | #' The function \code{set.targetMSM} defines causal parameters based on a user-specified \bold{working} marginal structural model.} 41 | #' \item{\code{\link{eval.target}}}{Evaluates the previously defined causal parameter using simulated full data} 42 | #' } 43 | #' 44 | #' @section Data structures: 45 | #' The following most common types of output are produced by the package: 46 | #' \itemize{ 47 | #' \item \emph{parameterized causal \code{DAG} model} - object that specifies the structural equation model, along with interventions and the causal target parameter of interest. 48 | #' \item \emph{observed data} - data simulated from the (pre-intervention) distribution specified by the structural equation model. 49 | #' \item \emph{full data} - data simulated from one or more post-intervention distributions defined by actions on the structural equation model. 50 | #' \item \emph{causal target parameter} - the true value of the causal target parameter evaluated with full data. 51 | #' } 52 | #' 53 | #' @section Updates: 54 | #' Check for updates and report bugs at \url{http://github.com/osofr/simcausal}. 55 | #' 56 | #' @references Sofrygin O, van der Laan MJ, Neugebauer R (2017). 57 | #' "simcausal R Package: Conducting Transparent and Reproducible Simulation Studies of Causal Effect Estimation with Complex Longitudinal Data." 58 | #' Journal of Statistical Software, 81(2), 1-47. doi: 10.18637/jss.v081.i02. 59 | #' @docType package 60 | #' @name simcausal 61 | #' 62 | NULL 63 | 64 | #' @importFrom graphics legend par plot 65 | #' @importFrom stats as.formula glm na.exclude rbinom reshape rnorm runif setNames terms.formula 66 | #' @importFrom utils head str 67 | #' @importFrom utils getFromNamespace 68 | #' @importFrom methods is 69 | NULL 70 | 71 | 72 | # \item {\code{node}} - defines a node in the structural equation model and 73 | # its conditional distribution, i.e., the outcome of one equation in the 74 | # structural equation model and the formula that links the outcome value 75 | # to that of earlier covariates, referred to as parent nodes. A call to \code{node} 76 | # can specify either a single node or multiple nodes at once, with \code{name} 77 | # and \code{distr} being the only required arguments. To specify multiple 78 | # nodes with a single \code{node} call, one must also provide an indexing 79 | # vector of integers as an argument \code{t}. In this case, each node shares 80 | # the same name, but is indexed by distinct values in \code{t}. The simultaneous 81 | # specification of multiple nodes is particularly relevant for providing 82 | # a shorthand syntax for defining a time-varying covariate, i.e., for defining 83 | # repeated measurements over time of the same subject-matter attribute. 84 | 85 | # \item {\code{add.nodes} or \code{D + node}} - provide two equivalent ways of growing the structural equation model by adding new nodes and their conditional distributions. Informally, these routines are intended to be used to sequentially populate a \code{DAG} object with all the structural equations that make up the causal model of interest. 86 | 87 | # \item [{\code{set.DAG}}] performs consistency checks and locks the \code{DAG} object so that no additional nodes can be subsequently added to the structural equation model. 88 | # In addition, this routine performs several consistency checks of 89 | # the user-populated \code{DAG} object. In particular, the routine attempts 90 | # to simulate observations to verify that all conditional distributions in 91 | # the \code{DAG} object are well-defined. 92 | 93 | # \item [{\code{sim}}] simulates iid observations of the complete node sequence defined by a \code{DAG} object. The output dataset is stored as a \code{data.frame} and is referred to as the \emph{observed data}. 94 | # The output data can be structured in either long or wide formats. 95 | 96 | # \item [{\code{add.action} or \code{D + action}}] provides two equivalent ways to define one or more actions. 97 | # An action modifies the conditional distribution of one or more nodes of the structural equation model. 98 | # The resulting data generating distribution is referred to as the post-intervention distribution. It is saved in the \code{DAG} object alongside the original structural equation model. 99 | 100 | # \item [{\code{sim}}] can also be used for simulating independent observations from one or more post-intervention distributions, as specified by the \code{actions} argument. 101 | # The output is a named list of \code{data.frame} objects, collectively referred to as the \emph{full data}. 102 | # The number of \code{data.frame} objects in this list is equal to the number of post-intervention distributions specified in the \code{actions} 103 | # argument, where each \code{data.frame} object is an iid sample from a particular post-intervention distribution. 104 | 105 | # \item [{\code{set.targetE} and \code{set.targetMSM}}] 106 | # The function \code{set.targetE} defines causal parameters as the expected value(s) of \code{DAG} node(s) under one post-intervention distribution or the contrast of such expected value(s) from two post-intervention distributions. 107 | # The function \code{set.targetMSM} defines causal parameters based on a user-specified \textbf{working} marginal structural model. 108 | # The true value of the causal parameter is defined by one or several post-intervention distributions and can thus be approximated using full data. 109 | # The output is the modified \code{DAG} object with the definition of the target causal parameter saved alongside the interventions. 110 | 111 | # \item [{\code{eval.target}}] evaluates the causal parameter of interest using simulated full data. As input, it can take previously simulated full data (i.e., the output of a call to the \code{simfull} function) or, alternatively, the user can specify the sample size \code{n}, based on which full data will be simulated first. 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /R/distributions.R: -------------------------------------------------------------------------------- 1 | 2 | #' Random Sample from Bernoulli Distribution 3 | #' 4 | #' Wrapper for Bernoulli node distribution. 5 | #' 6 | #' @param n Sample size. 7 | #' @param prob A vector of success probabilities. 8 | #' @return Binary vector of length \code{n}. 9 | #' @examples 10 | #' 11 | #'#--------------------------------------------------------------------------------------- 12 | #'# Specifying and simulating from a DAG with 3 Bernoulli nodes 13 | #'#--------------------------------------------------------------------------------------- 14 | #'D <- DAG.empty() 15 | #'D <- D + node("W1", distr="rbern", prob=0.05) 16 | #'D <- D + node("W2", distr="rbern", prob=ifelse(W1==1,0.5,0.1)) 17 | #'D <- D + node("W3", distr="rbern", prob=ifelse(W1==1,0.5,0.1)) 18 | #'Dset <- set.DAG(D) 19 | #'simdat <- sim(Dset, n=200, rndseed=1) 20 | #' @export 21 | rbern <- function(n, prob) { 22 | rbinom(n=n, prob=prob, size=1) 23 | } 24 | 25 | #' Constant (Degenerate) Distribution (Returns its Own Argument \code{const}) 26 | #' 27 | #' Wrapper for constant value (degenerate) distribution. 28 | #' 29 | #' @param n Sample size. 30 | #' @param const Either a vector with one constant value (replicated \code{n} times) 31 | #' or a vector of length \code{n} or a matrix with \code{n} rows (for a multivariate node). 32 | #' @return A vector of constants of length \code{n}. 33 | #' @examples 34 | #' 35 | #'#--------------------------------------------------------------------------------------- 36 | #'# Specifying and simulating from a DAG with 1 Bernoulli and 2 constant nodes 37 | #'#--------------------------------------------------------------------------------------- 38 | #'D <- DAG.empty() 39 | #'D <- D + node("W1", distr = "rbern", prob = 0.45) 40 | #'D <- D + node("W2", distr = "rconst", const = 1) 41 | #'D <- D + node("W3", distr = "rconst", const = ifelse(W1 == 1, 5, 10)) 42 | #' 43 | #'# TWO equivalent ways of creating a multivariate node (just repeating W1 and W2): 44 | #'create_mat <- function(W1, W2) cbind(W1, W2) 45 | #'vecfun.add("create_mat") 46 | #' 47 | #'D <- D + node(c("W1.copy1", "W2.copy1"), distr = "rconst", const = c(W1, W2)) 48 | #'D <- D + node(c("W1.copy2", "W2.copy2"), distr = "rconst", const = create_mat(W1, W2)) 49 | #'Dset <- set.DAG(D) 50 | #'sim(Dset, n=10, rndseed=1) 51 | #' @export 52 | rconst <- function(n, const) { 53 | if (n==0) { 54 | return(vector(length = n)) 55 | } else if (is.matrix(const)) { 56 | return(const) 57 | } else if (length(const) == 1) { 58 | return(rep.int(const, n)) 59 | } else if (length(const) < n) { 60 | stop("the length of const arg is not 1 and is less than n; it needs to be one or the other") 61 | } else if (length(const) > n) { 62 | warning("the length of const arg is more than n; const was truncated to length n") 63 | return(const[1:n]) 64 | } else { 65 | return(const) 66 | } 67 | } 68 | 69 | #' Random Sample for a Categorical Factor 70 | #' 71 | #' Matrix version of the categorical distribution. The argument \code{probs} can be a matrix of n rows, 72 | #' specifying individual (varying in sample) categorical probabilities. 73 | #' The number of categories generated is equal to \code{ncol(probs)+1}, the levels labeled as: \code{1,...,ncol(probs)+1}. 74 | #' 75 | #' @param n Sample size. 76 | #' @param probs Either a vector or a matrix of success probabilities. 77 | #' When \code{probs} is a vector, \code{n} identically distributed random categorical variables 78 | #' are generated with categories: 1, 2, ..., length(probs)+1. 79 | #' When \code{probs} is a matrix, the categorical probabilities of the \code{k}th sample are determined by the 80 | #' \code{k}th row of \code{probs} matrix, i.e., \code{probs[k,]}. 81 | #' @return A factor of length \code{n} with levels: \code{1,2, ...,ncol(probs)+1}. 82 | #' @examples 83 | #' 84 | #'#--------------------------------------------------------------------------------------- 85 | #'# Specifying and simulating from a DAG with one categorical node with constant 86 | #'# probabilities 87 | #'#--------------------------------------------------------------------------------------- 88 | #'D <- DAG.empty() 89 | #'D <- D + node("race",t=0,distr="rcat.factor",probs=c(0.2,0.1,0.4,0.15,0.05,0.1)) 90 | #'Dset <- set.DAG(D) 91 | #'simdat <- sim(Dset, n=200, rndseed=1) 92 | #' 93 | #'#--------------------------------------------------------------------------------------- 94 | #'# Specifying and simulating from a DAG with a categorical node with varying 95 | #'# probabilities (probabilities are determined by values sampled for nodes L0 and L1) 96 | #'#--------------------------------------------------------------------------------------- 97 | #'D <- DAG.empty() 98 | #'D <- D + node("L0", distr="rnorm", mean=10, sd=5) 99 | #'D <- D + node("L1", distr="rnorm", mean=10, sd=5) 100 | #'D <- D + node("L2", distr="rcat.factor", probs=c(abs(1/L0), abs(1/L1))) 101 | #'Dset <- set.DAG(D) 102 | #'simdat <- sim(Dset, n=200, rndseed=1) 103 | #' @seealso \code{\link{rcat.b1}}, \code{\link{rcat.b0}} 104 | #' @export 105 | rcat.factor <- function(n, probs) { 106 | as.factor(rcat.b1(n = n, probs = probs)) 107 | } 108 | 109 | #' @describeIn rcat.factor (Deperecated) Random Sample of a Categorical Factor 110 | #' @export 111 | rcategor <- function(n, probs) { 112 | warning("This function is deprecated, please use rcat.factor() instead.") 113 | as.factor(rcategor.int(n = n, probs = probs)) 114 | } 115 | 116 | #' @describeIn rcat.b1 (Deperecated) Random Sample from Base 1 Categorical (Integer) Distribution 117 | #' @export 118 | rcategor.int <- function(n, probs) { 119 | warning("This function is deprecated, please use rcat.b1() instead.") 120 | rcat.b1(n, probs) 121 | } 122 | 123 | #' Random Sample from Base 1 (rcat.b1) or Base 0 (rcat.b0) Categorical (Integer) Distribution 124 | #' 125 | #' Same as \code{}, but returning a vector of sampled integers with range 1, 2, ..., \code{ncol(probs)+1} for \code{rcat.b1} 126 | #' or range 0, 1, ..., \code{ncol(probs)} for \code{rcat.b0}. For sampling categorical factors see \link{rcat.factor}. 127 | #' @param n Sample size. 128 | #' @param probs Either a vector or a matrix of success probabilities. 129 | #' When probs is a vector, \code{n} identically distributed random categorical variables are 130 | #' generated. 131 | #' When \code{probs} is a matrix, the categorical probabilities of the \code{k}th 132 | #' sample are determined by the \code{k}th row of probs matrix, i.e., \code{probs[k,]}. 133 | #' @return An integer vector of length \code{n} with range either in \code{0,...,ncol(probs)} or in \code{1,...,ncol(probs)+1}. 134 | #' @seealso \code{\link{rcat.factor}} 135 | #' @describeIn rcat.b1 Random Sample from Base 1 Categorical (Integer) Distribution 136 | #' @export 137 | rcat.b1 <- function(n, probs) { 138 | if (n==0) { 139 | probs <- matrix(nrow = n, ncol = length(probs), byrow = TRUE) 140 | } 141 | if (is.vector(probs) && n>0) { 142 | probs <- matrix(data = probs, nrow = n, ncol = length(probs), byrow = TRUE) 143 | } 144 | probs <- cbind(probs, 1 - rowSums(probs)) # sum each row and check if some need to be normalized 145 | pover1_idx <- which(probs[,ncol(probs)] < -(10^-6)) # which rows of probs need to be normalized 146 | # reset last category to zero if its with numeric epsilon error away from 0: 147 | restto0 <- which(abs(probs[,ncol(probs)]) <= 10^-6 & abs(probs[,ncol(probs)]) > 0L) 148 | if (length(restto0)>0) { 149 | probs[restto0, ncol(probs)] <- 0L 150 | } 151 | if (length(pover1_idx)>0) { 152 | warning("some categorical probabilities add up to more than 1, normalizing to add to 1") 153 | probs[pover1_idx, ncol(probs)] <- 0 154 | probs[pover1_idx, ] <- probs[pover1_idx, ,drop = FALSE] / rowSums(probs[pover1_idx, ,drop = FALSE]) # normalize 155 | } 156 | probs_cum <- matrix(nrow = nrow(probs), ncol = ncol(probs)) 157 | probs_cum[,1] <- probs[,1] 158 | for (i in seq(ncol(probs))[-1]) { 159 | probs_cum[,i] <- rowSums(probs[,c(1:i),drop = FALSE]) 160 | } 161 | cat_sample <- probs_cum - runif(nrow(probs_cum)) < 0 162 | if (is.matrix(cat_sample)) { 163 | samples <- rowSums(cat_sample) + 1 164 | } else { 165 | samples <- cat_sample 166 | } 167 | as.integer(samples) 168 | } 169 | 170 | #' @describeIn rcat.b1 Random Sample from Base 0 Categorical (Integer) Distribution 171 | #' @export 172 | rcat.b0 <- function(n, probs) rcat.b1(n, probs) - 1 173 | 174 | #' List All Custom Distribution Functions in \code{simcausal}. 175 | #' 176 | #' @export 177 | distr.list <- function() { 178 | message("All custom distributions defined in SimCausal:\n") 179 | print(ls("package:simcausal")[grep(pattern = "^r(?!net)", x = ls("package:simcausal"), perl=TRUE)]) 180 | # print(ls("package:simcausal", pattern="^[r]")) 181 | # print(ls("package:simcausal", pattern="^[S]L")) 182 | invisible(ls("package:simcausal")) 183 | } 184 | 185 | #' Template for Writing Custom Distribution Functions 186 | #' 187 | #' Template function for writing \code{SimCausal} custom distribution wrappers. 188 | #' 189 | #' One of the named arguments must be 'n', this argument is passed on to the function automatically by 190 | #' the package and is assigned to the number of samples that needs to be generated from this distribution. 191 | #' Other arguments (in this example arg1 and arg2) must be declared by the user as 192 | #' arguments inside the node() function that uses this distribution, 193 | #' e.g., \code{node("Node1"}, \code{distr="distr.template"}, \code{arg1 = ...}, \code{arg2 = ...)}. 194 | #' Both, arg1 and arg2, can be either numeric constants or formulas involving past node names. 195 | #' The constants get passed on to the distribution function unchanged. 196 | #' The formulas are evaluated inside the environment of the simulated data and are passed on to the 197 | #' distribution functions as vectors. 198 | #' The output of the distribution function is expected to be a vector of length n of the sampled covariates. 199 | #' 200 | #' @param n Sample size that needs to be generated 201 | #' @param arg1 Argument 2 202 | #' @param arg2 Argument 1 203 | #' @return A vector of length \code{n} 204 | #' @param ... Additional optional parameters 205 | #' @export 206 | rdistr.template <- function(n, arg1, arg2, ...) { 207 | if (length(arg1) == 1L) arg1 <- rep.int(arg1, n) 208 | if (length(arg2) == 1L) arg2 <- rep.int(arg2, n) 209 | if (length(arg1) != n || length(arg2) != n) stop("inputs arguments should all have the same length") 210 | out <- as.numeric(arg1 + arg2) 211 | stopifnot(length(out)==n) 212 | # length(out) <- n 213 | out 214 | } -------------------------------------------------------------------------------- /R/interface.r: -------------------------------------------------------------------------------- 1 | 2 | ################################################################### 3 | # Node constructor 4 | # 1) If some of the nodes in "nodes" already exist in DAG then replace them instead of adding 5 | # 2) If order is not defined in "nodes" then infer where to add the node and calculate the orders at set.DAG() 6 | # if DAG.nodelist object nodes consists of just one node, increment the order by 1 7 | ################################################################### 8 | #' Adding Node(s) to DAG 9 | #' 10 | #' Adding nodes to a growing DAG object, as in \code{DAG + node()}. Use either syntax \code{DAG + node()} or \code{add.nodes(DAG = , nodes = node())}. Both give identical results, see the examples in the vignette and below for details. 11 | #' 12 | #' @param DAG DAG object 13 | #' @param nodes A node or several nodes returned from a call to \code{node} function. If the node(s) under same name(s) already exist, the old node(s) get overwritten. 14 | #' @return An updated DAG object with new nodes 15 | #' @seealso \code{\link{node}} 16 | #' 17 | #' @export 18 | add.nodes <- function(DAG, nodes) { 19 | if (!is.DAG(DAG)) { 20 | stop("Not a DAG object") 21 | } 22 | if (is.DAGlocked(DAG)) { 23 | stop("DAG object is locked: nodes in this DAG cannot be modified or added after set.DAG()") 24 | } 25 | 26 | # DAG_names <- unlist(Nattr(DAG, "name")) # get DAG node names 27 | modDAG <- DAG 28 | # print("modDAG"); print(modDAG) 29 | # print("nodes"); print(nodes) 30 | for (node_idx in seq(nodes)) { # loop over each node in DAG.nodelist and add it or overwrite existing node 31 | DAG_names <- unlist(Nattr(modDAG, "name")) # get DAG node names 32 | node_nm <- nodes[[node_idx]]$name 33 | gnode_nm <- as.character(unlist(strsplit(node_nm, "_"))[1]) # generic node name (without t) 34 | 35 | checkexist <- (node_nm%in%DAG_names) # check if the node under the same name already exists 36 | checkexistgen <- (gnode_nm%in%DAG_names) # check if the generic node under the same name already exists 37 | 38 | if ((!checkexist) & checkexistgen) { # the TV node doesn`t exist yet but the generic (nonTV) already does 39 | # give warning and delete old node, add new ones 40 | gnode_idx <- which(DAG_names%in%gnode_nm) 41 | modDAG <- modDAG[-gnode_idx] 42 | class(modDAG) <- "DAG" 43 | warning("existing non-time-varying node "%+% gnode_nm %+% " was overwritten with a time-varying node") 44 | } 45 | 46 | if (!checkexist) { # this node doesn`t exist yet in the DAG, new node has to be added 47 | node_insert <- nodes[node_idx] 48 | t_insert <- nodes[[node_idx]]$t 49 | t_idx_all <- Nattr(modDAG, "t") # a list of t values from current DAG (including NULLs) 50 | t_idx_miss <- sapply(t_idx_all, is.null) # finding all nodes where t is undefined (null) 51 | t_idx_miss <- which(t_idx_miss%in%TRUE) 52 | if (is.null(t_insert)) { 53 | # t is undefined in the new node, hence insert it after last position where is.null(t) 54 | t_idx_poslast <- t_idx_miss[length(t_idx_miss)] 55 | # IMPORTANT: FORCE ALL NODES TO HAVE t defined (!is.null(node$t)) AFTER t has been defined for ANY prior node 56 | t_idx_nomiss <- sapply(t_idx_all, function(tnode) !is.null(tnode)) 57 | n_tnomiss <- sum(t_idx_nomiss%in%TRUE) 58 | if (n_tnomiss>0) stop("cannot define nodes with missing t after nodes with t non-missing were already defined") 59 | } else { # find the last occurence of a node with t=t_insert in modDAG and insert node after 60 | t_idx_poslast <- which(t_idx_all%in%t_insert) 61 | t_idx_poslast <- t_idx_poslast[length(t_idx_poslast)] 62 | t_idx_more <- sapply(t_idx_all, function(t) !is.null(t)&&(t > t_insert)) 63 | if (length(t_idx_more)>0 && length(t_idx_poslast)<1) { 64 | t_idx_poslast <- which(t_idx_more)[1]-1 65 | } 66 | } 67 | if ((length(t_idx_poslast)!=1) || (t_idx_poslast < 0) || (is.na(t_idx_poslast))) { 68 | t_idx_poslast <- length(t_idx_all) 69 | } 70 | modDAG <- append(modDAG, node_insert, t_idx_poslast) 71 | class(modDAG) <- "DAG" 72 | } else { # this node name already exist in the DAG, existing node is overwritten 73 | modDAG <- modDAGnode(modDAG, nodes[[node_idx]]) 74 | message("existing node "%+% nodes[[node_idx]]$name %+% " was modified") 75 | } 76 | } 77 | return(modDAG) 78 | } 79 | 80 | ################################################################### 81 | # Action constructor 82 | ################################################################### 83 | #' Define and Add Actions (Interventions) 84 | #' 85 | #' Define and add new action (intervention) to the existing DAG object. Use either syntax \code{DAG +} \code{action(name = ,nodes = )} or \code{add.action((DAG = ,name = ,nodes = )}. Both give identical results, see the examples in the vignette and below for details. 86 | #' 87 | #' In addition to the action name and list of action nodes, both of these functions accept arbitrary named attributes (as additional arguments which must be given a name). 88 | #' This additional attributes can be used to simplify specification of dynamic regimes (actions that depend on the past observed covariates). 89 | #' 90 | #' The formula of the intervention node is allowed to contain undefined variables, as long as those are later defined as a named argument to \code{action}. 91 | #' 92 | #' In Example 2 below, \code{node("A",..., mean = ifelse(W1 >= theta, 1, 0))}, 93 | #' defines the mean of the node "A" as a function of some undefined variable \code{theta}, setting \code{A} to 1 if the baseline node \code{W1} is above or equal to \code{theta} and 0 vice versa. 94 | #' One specifies actual values of \code{theta} while defining a new action, possible creating a series of actions, each indexed by a different value of \code{theta}. 95 | #' A new action can be defined with \code{D<-D+action("A1th0.1", nodes=actN, theta=0.1)}. 96 | #' 97 | #' Note that any name can be used in place of \code{theta}. This attribute variable can appear anywhere inside the node distribution formula. 98 | #' Finally, the attribute variable can also be time varying and, just like with DAG nodes, can be indexed by square bracket notation, \code{theta[t]}. See Example 3 for defining time-varying attributes. 99 | #' 100 | #' @param DAG DAG object 101 | #' @param name Unique name of the action 102 | #' @param nodes A list of node objects that defines the action on the DAG (replaces the distributions of the corresponding nodes in DAG) 103 | #' @param ... Additional named attributes defining / indexing the action 104 | #' @param attr Additional named attributes defining / indexing the action 105 | #' @return A modified \code{DAG} object with the added action 106 | #' @example tests/examples/add.action.R 107 | #' @export 108 | add.action <- function(DAG, name, nodes, ..., attr=list()) { 109 | if (!is.DAG(DAG)) { 110 | stop("Not a DAG object") 111 | } 112 | # collect all attributes (must be named) 113 | attrs <- list(...) 114 | attrs <- append(attrs, attr) 115 | attnames <- names(attrs) 116 | if (length(attrs) != 0 && (is.null(attnames) || any(attnames==""))) { 117 | stop("please specify name for each attribute") 118 | } 119 | if (missing(name)) stop("please specify an action name") 120 | if (!is.character(name) || name=="") stop("action name must be a non-empty string of characters") 121 | if (missing(nodes)) stop("please specify action node(s)") 122 | curr.names <- names(attr(DAG, "actions")) 123 | if (name %in% curr.names) { # modify currently existing DAG 124 | attr(DAG, "actions")[[name]] <- setAction(actname = name, inputDAG = attr(DAG, "actions")[[name]], actnodes = nodes, attr = attrs) 125 | } else { # create a new action 126 | new.action <- list(setAction(actname = name, inputDAG = DAG, actnodes = nodes, attr = attrs)) 127 | names(new.action) <- name 128 | attr(DAG, "actions") <- c(attr(DAG, "actions"), new.action) 129 | } 130 | return(DAG) 131 | } 132 | ## Action wrapper 133 | ## 134 | ## Constructor for a new action object 135 | ## @param ... Arguments defining action 136 | #' @rdname add.action 137 | #' @export 138 | action <- function(...) { 139 | structure(list(...), class="DAG.action") 140 | } 141 | 142 | 143 | ################################################################### 144 | # DAG Constructors 145 | ################################################################### 146 | #' Initialize an empty DAG object 147 | #' @export 148 | DAG.empty <- function() { 149 | emptyDAG <- list() 150 | class(emptyDAG) <- "DAG" 151 | emptyDAG 152 | } 153 | 154 | #' @rdname add.nodes 155 | #' @param obj1 Object that belongs to either classes: \code{DAG}, \code{DAG.action} or \code{DAG.nodelist}. 156 | #' @param obj2 Object that belongs to either classes: \code{DAG}, \code{DAG.action} or \code{DAG.nodelist}. 157 | #' @export 158 | `+.DAG` <- function(obj1, obj2) { 159 | if (!is.DAG(obj1) && is.DAG(obj2)) { 160 | tmp <- obj1 161 | obj1 <- obj2 162 | obj2 <- tmp 163 | } 164 | if ("DAG.action" %in% class(obj2)) { 165 | ## Adding action, possibly with attributes 166 | ## Option 1: Non-named argument defines the action name 167 | ## Option 2: Non-named argument defines the nodes 168 | # if (is.null(names(obj2))) { 169 | # toadd <- unlist(obj2, recursive=FALSE) 170 | # attr <- list() 171 | # } else { 172 | # toadd <- unlist(obj2[names(obj2)==""]) 173 | name <- unlist(obj2[names(obj2)==""], recursive=FALSE) 174 | if (length(name)>1) stop("only one unnamed argument can be specified") 175 | if (length(name)==0) name <- obj2[[which(names(obj2)%in%"name")]] 176 | if (length(name)==0) stop("name argument for action must be specified") 177 | # nodes <- unlist(obj2[names(obj2)%in%"nodes"]) 178 | # nodes <- unlist(obj2[names(obj2)%in%"nodes"], recursive=FALSE) 179 | nodes <- obj2[names(obj2)%in%"nodes"][[1]] 180 | attr <- obj2[(names(obj2)!="") & (!(names(obj2) %in% c("name", "nodes")))] 181 | dprint("name"); dprint(name) 182 | dprint("nodes"); dprint(nodes) 183 | dprint("attr"); dprint(attr) 184 | res <- add.action(DAG = obj1, name = name, nodes = nodes, attr = attr) 185 | } else if ("DAG.nodelist" %in% class(obj2)) { 186 | # res <- c(obj1, obj2) 187 | # class(res) <- "DAG" 188 | res <- add.nodes(DAG = obj1, nodes = obj2) 189 | } else if ("DAG.netlist" %in% class(obj2)) { 190 | # if (!is.null(attr(obj1, "DAG.net"))) message("overwriting previously defined network object") 191 | res <- add.nodes(DAG = obj1, nodes = obj2) 192 | attr(res, "DAG.net") <- obj2 193 | } else { 194 | stop("Cannot add unknown type to DAG") 195 | } 196 | res 197 | } 198 | 199 | # @export 200 | # `+.DAG.nodelist` <- function(obj1, obj2) { 201 | # if (!is.DAG(obj1) && is.DAG(obj2)) { 202 | # return(`+.DAG`(obj2,obj1)) 203 | # } 204 | # if (("DAG.nodelist" %in% class(obj1)) && ("DAG.nodelist" %in% class(obj2))) { 205 | # return(append(obj1, obj2)) 206 | # } else { 207 | # stop("Cannot add unknown type to DAG") 208 | # } 209 | # } 210 | -------------------------------------------------------------------------------- /vignettes/R-Pckgs.bib: -------------------------------------------------------------------------------- 1 | @Manual{R-tmlenet, 2 | title = {\pkg{tmlenet}: Targeted Maximum Likelihood Estimation for Network Data}, 3 | author = {Oleg Sofrygin and Mark J. {van der Laan}}, 4 | year = {2015}, 5 | note = {\proglang{R}~package version~0.1.0}, 6 | url = {http://CRAN.R-project.org/package=tmlenet}, 7 | } 8 | @Manual{R-simcausal, 9 | title = {\pkg{simcausal}: Simulating Longitudinal Data with Causal Inference Applications}, 10 | author = {Oleg Sofrygin and Mark J. van der Laan and Romain Neugebauer}, 11 | year = {2015}, 12 | note = {\proglang{R}~package version~0.4}, 13 | url = {http://CRAN.R-project.org/package=simcausal}, 14 | } 15 | @Manual{r, 16 | title = {\proglang{R}: A Language and Environment for Statistical Computing}, 17 | author = {{\proglang{R} Core Team}}, 18 | organization = {\proglang{R} Foundation for Statistical Computing}, 19 | address = {Vienna, Austria}, 20 | year = {2015}, 21 | url = {http://www.R-project.org/}, 22 | } 23 | @Manual{R-aftgee.old, 24 | title = {\pkg{aftgee}: Accelerated Failure Time Model with Generalized Estimating Equations}, 25 | author = {Sy Han Chiou and Sangwook Kang and Jun Yan}, 26 | year = {2014}, 27 | note = {\proglang{R}~package version~1.0-0}, 28 | url = {http://CRAN.R-project.org/package=aftgee}, 29 | } 30 | @Article{R-aftgee, 31 | title = {Fitting Accelerated Failure Time Models in Routine Survival Analysis with \proglang{R} Package \pkg{aftgee}}, 32 | author = {Sy Han Chiou and Sangwook Kang and Jun Yan}, 33 | journal = {Journal of Statistical Software}, 34 | year = {2014}, 35 | volume = {61}, 36 | number = {11}, 37 | pages = {1--23}, 38 | url = {http://www.jstatsoft.org/v61/i11/}, 39 | } 40 | @Manual{R-data.table, 41 | title = {\pkg{data.table}: Extension of data.frame}, 42 | author = {M Dowle and T Short and S Lianoglou and A Srinivasan with contributions from R Saporta and E Antonyan}, 43 | year = {2014}, 44 | note = {\proglang{R}~package version~1.9.4}, 45 | url = {http://CRAN.R-project.org/package=data.table}, 46 | } 47 | @Manual{R-eha, 48 | title = {\pkg{eha}: Event History Analysis}, 49 | author = {Göran Broström}, 50 | year = {2015}, 51 | note = {\proglang{R}~package version~2.4-3}, 52 | url = {http://CRAN.R-project.org/package=eha}, 53 | } 54 | @Manual{R-gems.old, 55 | title = {\pkg{gems}: Generalized Multistate Simulation Model}, 56 | author = {Luisa Salazar Vizcaya and Nello Blaser and Thomas Gsponer}, 57 | year = {2015}, 58 | note = {\proglang{R}~package version~1.0.0}, 59 | url = {http://CRAN.R-project.org/package=gems}, 60 | } 61 | @Article{R-gems, 62 | title = {\pkg{gems}: An \proglang{R} Package for Simulating from Disease Progression Models}, 63 | author = {Nello Blaser and Luisa {Salazar Vizcaya} and Janne Estill 64 | and Cindy Zahnd and Bindu Kalesan and Matthias Egger and Olivia 65 | Keiser and Thomas Gsponer}, 66 | journal = {Journal of Statistical Software}, 67 | year = {2015}, 68 | volume = {64}, 69 | number = {10}, 70 | pages = {1--22}, 71 | url = {http://www.jstatsoft.org/v64/i10/}, 72 | } 73 | @article{blaser2015gems, 74 | title={\pkg{gems}: an \proglang{R} package for Simulating from Disease Progression Models}, 75 | author={Blaser, Nello and Salazar Vizcaya, L and Estill, Janne and Zahnd, Cindy and Kalesan, Bindu and Egger, Matthias and Keiser, O and Gsponer, T}, 76 | journal={Journal of Statistical Software}, 77 | volume={64}, 78 | number={10}, 79 | pages={1--22}, 80 | year={2015} 81 | } 82 | @Manual{R-ggplot2, 83 | title = {\pkg{ggplot2}: An Implementation of the Grammar of Graphics}, 84 | author = {Hadley Wickham and Winston Chang}, 85 | year = {2015}, 86 | note = {\proglang{R}~package version~1.0.1}, 87 | url = {http://CRAN.R-project.org/package=ggplot2}, 88 | } 89 | @Manual{R-igraph, 90 | title = {\pkg{igraph}: Network Analysis and Visualization}, 91 | author = {See AUTHORS file.}, 92 | year = {2014}, 93 | note = {\proglang{R}~package version~0.7.1}, 94 | url = {http://CRAN.R-project.org/package=igraph}, 95 | } 96 | @article{igraph, 97 | title = {{The \pkg{igraph} Software Package for Complex Network Research}}, 98 | author = {Csardi, G and Nepusz, T}, 99 | doi = {10.1109/ICCSN.2010.34}, 100 | isbn = {978-1-4244-5726-7}, 101 | journal = {InterJournal}, 102 | pages = {1695}, 103 | url = {http://igraph.org}, 104 | volume = {Complex Systems}, 105 | year = {2006} 106 | } 107 | @Manual{R-knitr, 108 | title = {\pkg{knitr}: A General-Purpose Package for Dynamic Report Generation in \proglang{R}}, 109 | author = {Yihui Xie}, 110 | year = {2015}, 111 | note = {\proglang{R}~package version~1.11}, 112 | url = {http://CRAN.R-project.org/package=knitr}, 113 | } 114 | @Manual{R-lattice, 115 | title = {\pkg{lattice}: Lattice Graphics}, 116 | author = {Deepayan Sarkar}, 117 | year = {2015}, 118 | note = {\proglang{R}~package version~0.20-31}, 119 | url = {http://CRAN.R-project.org/package=lattice}, 120 | } 121 | @Manual{R-lavaan.old, 122 | title = {\pkg{lavaan}: Latent Variable Analysis}, 123 | author = {Yves Rosseel}, 124 | year = {2015}, 125 | note = {\proglang{R}~package version~0.5-18}, 126 | url = {http://CRAN.R-project.org/package=lavaan}, 127 | } 128 | @Article{R-lavaan, 129 | title = {\pkg{lavaan}: An \proglang{R} Package for Structural Equation Modeling}, 130 | author = {Yves Rosseel}, 131 | journal = {Journal of Statistical Software}, 132 | year = {2012}, 133 | volume = {48}, 134 | number = {2}, 135 | pages = {1--36}, 136 | url = {http://www.jstatsoft.org/v48/i02/}, 137 | } 138 | @Manual{R-lavaan.survey.old, 139 | title = {\pkg{lavaan.survey}: Complex Survey Structural Equation Modeling (SEM)}, 140 | author = {Daniel Oberski}, 141 | year = {2014}, 142 | note = {\proglang{R}~package version~1.1}, 143 | url = {http://CRAN.R-project.org/package=lavaan.survey}, 144 | } 145 | @Article{R-lavaan.survey, 146 | title = {\pkg{lavaan.survey}: An \proglang{R} Package for Complex Survey Analysis of Structural Equation Models}, 147 | author = {Daniel Oberski}, 148 | journal = {Journal of Statistical Software}, 149 | year = {2014}, 150 | volume = {57}, 151 | number = {1}, 152 | pages = {1--27}, 153 | url = {http://www.jstatsoft.org/v57/i01/}, 154 | } 155 | @Manual{R-ltmle, 156 | title = {\pkg{ltmle}: Longitudinal Targeted Maximum Likelihood Estimation}, 157 | author = {Joshua Schwab and Samuel Lendle and Maya Petersen and Mark {van der Laan}}, 158 | year = {2014}, 159 | note = {\proglang{R}~package version~0.9.3}, 160 | url = {http://CRAN.R-project.org/package=ltmle}, 161 | } 162 | @article{openmx2011, 163 | title = {{\pkg{OpenMx}: An Open Source Extended Structural Equation Modeling Framework}}, 164 | author = {Boker, Steven and Neale, Michael and Maes, Hermine and Wilde, Michael and Spiegel, Michael and Brick, Timothy and Spies, Jeffrey and Estabrook, Ryne and Kenny, Sarah and Bates, Timothy and Others}, 165 | journal = {Psychometrika}, 166 | number = {2}, 167 | pages = {306--317}, 168 | publisher = {Springer}, 169 | volume = {76}, 170 | year = {2011} 171 | } 172 | @Manual{R-OpenMx, 173 | title = {\pkg{OpenMx}: Multipurpose Software for Statistical Modeling}, 174 | author = {Steven M. Boker and Michael C. Neale and Hermine H. Maes and Michael Spiegel and Timothy R. Brick and Ryne Estabrook and Timothy C. Bates and Ross J. Gore and Michael D. Hunter and Joshua N. Pritikin and Mahsa Zahery and Robert M. Kirkpatrick}, 175 | year = {2014}, 176 | note = {\proglang{R}~package version~2.0.1}, 177 | url = {http://openmx.psyc.virginia.edu}, 178 | } 179 | @article{fox2006teacher, 180 | title={Teacher's Corner: Structural Equation Modeling with the \pkg{sem} Package in \proglang{R}}, 181 | author={Fox, John}, 182 | journal={Structural equation modeling}, 183 | volume={13}, 184 | number={3}, 185 | pages={465--486}, 186 | year={2006}, 187 | publisher={Taylor \& Francis} 188 | } 189 | @Manual{R-sem, 190 | title = {\pkg{sem}: Structural Equation Models}, 191 | author = {John Fox and Zhenghua Nie and Jarrett Byrnes}, 192 | year = {2014}, 193 | note = {\proglang{R}~package version~3.1}, 194 | url = {http://CRAN.R-project.org/package=sem}, 195 | } 196 | @Article{R-semPLS, 197 | title = {\pkg{semPLS}: Structural Equation Modeling Using Partial Least Squares}, 198 | author = {Armin Monecke and Friedrich Leisch}, 199 | journal = {Journal of Statistical Software}, 200 | year = {2012}, 201 | volume = {48}, 202 | number = {3}, 203 | pages = {1--32}, 204 | url = {http://www.jstatsoft.org/v48/i03/}, 205 | } 206 | @Manual{R-simsem, 207 | title = {\pkg{simsem}: SIMulated Structural Equation Modeling}, 208 | author = {Sunthud Pornprasertmanit and Patrick Miller and Alexander Schoemann}, 209 | year = {2015}, 210 | note = {\proglang{R}~package version~0.5}, 211 | url = {http://CRAN.R-project.org/package=simsem}, 212 | } 213 | @Manual{R-statmod, 214 | title = {\pkg{statmod}: Statistical Modeling}, 215 | author = {Gordon Smyth and Yifang Hu and Peter Dunn and Belinda Phipson and Yunshun Chen}, 216 | year = {2015}, 217 | note = {\proglang{R}~package version~1.4.21}, 218 | url = {http://CRAN.R-project.org/package=statmod}, 219 | } 220 | @Manual{R-survey, 221 | title = {\pkg{survey}: Analysis of Complex Survey Samples}, 222 | author = {Thomas Lumley}, 223 | year = {2014}, 224 | note = {\proglang{R}~package version~3.30-3}, 225 | url = {http://CRAN.R-project.org/package=survey}, 226 | } 227 | @Article{R-survey2, 228 | title = {Analysis of Complex Survey Samples}, 229 | author = {Thomas Lumley}, 230 | year = {2004}, 231 | journal = {Journal of Statistical Software}, 232 | volume = {9}, 233 | number = {1}, 234 | pages = {1-19}, 235 | note = {\proglang{R}~package verson 2.2}, 236 | } 237 | @Manual{R-survival, 238 | title = {A Package for Survival Analysis in \proglang{S}}, 239 | author = {Terry M Therneau}, 240 | year = {2015}, 241 | note = {version~2.38}, 242 | url = {http://CRAN.R-project.org/package=survival}, 243 | } 244 | @article{R-survsim, 245 | author = {David Moriña and Albert Navarro}, 246 | title = {The \proglang{R} Package \pkg{survsim} for the Simulation of Simple and Complex Survival Data}, 247 | journal = {Journal of Statistical Software}, 248 | volume = {59}, 249 | number = {1}, 250 | year = {2014}, 251 | keywords = {}, 252 | issn = {1548-7660}, 253 | pages = {1--20}, 254 | doi = {10.18637/jss.v059.i02}, 255 | url = {http://www.jstatsoft.org/index.php/jss/article/view/v059i02}, 256 | abstract = {We present an \proglang{R}~package for the simulation of simple and complex survival data. It covers different situations, including recurrent events and multiple events. The main simulation routine allows the user to introduce an arbitrary number of distributions, each corresponding to a new event or episode, with its parameters, choosing between the Weibull (and exponential as a particular case), log-logistic and log-normal distributions.} 257 | } 258 | @Article{R-simecol, 259 | title = {\pkg{simecol}: An Object-Oriented Framework for Ecological Modeling in \proglang{R}}, 260 | author = {Thomas Petzoldt and Karsten Rinke}, 261 | journal = {Journal of Statistical Software}, 262 | volume = {22}, 263 | number = {9}, 264 | pages = {1--31}, 265 | year = {2007}, 266 | coden = {JSSOBK}, 267 | issn = {1548-7660}, 268 | url = {http://www.jstatsoft.org/v22/i09}, 269 | keywords = {ecological modeling, individual-based model, object-oriented programming (OOP), code-sharing, R.}, 270 | } 271 | @Article{R-simFrame, 272 | title = {An Object-Oriented Framework for Statistical Simulation: The \proglang{R} Package \pkg{simFrame}}, 273 | author = {Andreas Alfons and Matthias Templ and Peter Filzmoser}, 274 | journal = {Journal of Statistical Software}, 275 | year = {2010}, 276 | volume = {37}, 277 | number = {3}, 278 | pages = {1--36}, 279 | url = {http://www.jstatsoft.org/v37/i03/}, 280 | } -------------------------------------------------------------------------------- /tests/examples/set.DAG.R: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------------------------------------------- 2 | # EXAMPLE 1A: Define some Bernoulli nodes, survival outcome Y and put it together in a 3 | # DAG object 4 | #--------------------------------------------------------------------------------------- 5 | W1node <- node(name = "W1", distr = "rbern", 6 | prob = plogis(-0.5), order = 1) 7 | W2node <- node(name = "W2", distr = "rbern", 8 | prob = plogis(-0.5 + 0.5 * W1), order = 2) 9 | Anode <- node(name = "A", distr = "rbern", 10 | prob = plogis(-0.5 - 0.3 * W1 - 0.3 * W2), order = 3) 11 | Ynode <- node(name = "Y", distr = "rbern", 12 | prob = plogis(-0.1 + 1.2 * A + 0.3 * W1 + 0.3 * W2), order = 4) 13 | D1Aset <- set.DAG(c(W1node,W2node,Anode,Ynode)) 14 | 15 | #--------------------------------------------------------------------------------------- 16 | # EXAMPLE 1B: Same as 1A using +node interface and no order argument 17 | #--------------------------------------------------------------------------------------- 18 | D1B <- DAG.empty() 19 | D1B <- D1B + 20 | node(name = "W1", distr = "rbern", prob = plogis(-0.5)) + 21 | node(name = "W2", distr = "rbern", prob = plogis(-0.5 + 0.5 * W1)) + 22 | node(name = "A", distr = "rbern", prob = plogis(-0.5 - 0.3 * W1 - 0.3 * W2)) + 23 | node(name = "Y", distr = "rbern", prob = plogis(-0.1 + 1.2 * A + 0.3 * W1 + 0.3 * W2)) 24 | D1Bset <- set.DAG(D1B) 25 | 26 | #--------------------------------------------------------------------------------------- 27 | # EXAMPLE 1C: Same as 1A and 1B using add.nodes interface and no order argument 28 | #--------------------------------------------------------------------------------------- 29 | D1C <- DAG.empty() 30 | D1C <- add.nodes(D1C, node(name = "W1", distr = "rbern", prob = plogis(-0.5))) 31 | D1C <- add.nodes(D1C, node(name = "W2", distr = "rbern", 32 | prob = plogis(-0.5 + 0.5 * W1))) 33 | D1C <- add.nodes(D1C, node(name = "A", distr = "rbern", 34 | prob = plogis(-0.5 - 0.3 * W1 - 0.3 * W2))) 35 | D1C <- add.nodes(D1C, node(name = "Y", distr = "rbern", 36 | prob = plogis(-0.1 + 1.2 * A + 0.3 * W1 + 0.3 * W2))) 37 | D1C <- set.DAG(D1C) 38 | 39 | #--------------------------------------------------------------------------------------- 40 | # EXAMPLE 1D: Add a uniformly distributed node and redefine outcome Y as categorical 41 | #--------------------------------------------------------------------------------------- 42 | D_unif <- DAG.empty() 43 | D_unif <- D_unif + 44 | node("W1", distr = "rbern", prob = plogis(-0.5)) + 45 | node("W2", distr = "rbern", prob = plogis(-0.5 + 0.5 * W1)) + 46 | node("W3", distr = "runif", min = plogis(-0.5 + 0.7 * W1 + 0.3 * W2), max = 10) + 47 | node("An", distr = "rbern", prob = plogis(-0.5 - 0.3 * W1 - 0.3 * W2 - 0.2 * sin(W3))) 48 | # Categorical syntax 1 (probabilities as values): 49 | D_cat_1 <- D_unif + node("Y", distr = "rcat.b1", probs = {0.3; 0.4}) 50 | D_cat_1 <- set.DAG(D_cat_1) 51 | # Categorical syntax 2 (probabilities as formulas): 52 | D_cat_2 <- D_unif + 53 | node("Y", distr = "rcat.b1", 54 | probs={plogis(-0.1 + 1.2 * An + 0.3 * W1 + 0.3 * W2 + 0.2 * cos(W3)); 55 | plogis(-0.5 + 0.7 * W1)}) 56 | D_cat_2 <- set.DAG(D_cat_2) 57 | 58 | #--------------------------------------------------------------------------------------- 59 | # EXAMPLE 2A: Define Bernoulli nodes using R rbinom() function, defining prob argument 60 | # for L2 as a function of node L1 61 | #--------------------------------------------------------------------------------------- 62 | D <- DAG.empty() 63 | D <- D + 64 | node("L1", t = 0, distr = "rbinom", prob = 0.05, size = 1) + 65 | node("L2", t = 0, distr = "rbinom", prob = ifelse(L1[0] == 1, 0.5, 0.1), size = 1) 66 | Dset <- set.DAG(D) 67 | 68 | #--------------------------------------------------------------------------------------- 69 | # EXAMPLE 2B: Equivalent to 2A, passing argument size to rbinom inside a named list 70 | # params 71 | #--------------------------------------------------------------------------------------- 72 | D <- DAG.empty() 73 | D <- D + 74 | node("L1", t = 0, distr = "rbinom", prob = 0.05, params = list(size = 1)) + 75 | node("L2", t = 0, distr = "rbinom", 76 | prob = ifelse(L1[0] == 1,0.5,0.1), params = list(size = 1)) 77 | Dset <- set.DAG(D) 78 | 79 | #--------------------------------------------------------------------------------------- 80 | # EXAMPLE 2C: Equivalent to 2A and 2B, define Bernoulli nodes using a wrapper "rbern" 81 | #--------------------------------------------------------------------------------------- 82 | D <- DAG.empty() 83 | D <- D + 84 | node("L1", t = 0, distr = "rbern", prob = 0.05) + 85 | node("L2", t = 0, distr = "rbern", prob = ifelse(L1[0] == 1, 0.5, 0.1)) 86 | Dset <- set.DAG(D) 87 | 88 | #--------------------------------------------------------------------------------------- 89 | # EXAMPLE 3: Define node with normal distribution using rnorm() R function 90 | #--------------------------------------------------------------------------------------- 91 | D <- DAG.empty() 92 | D <- D + node("L2", t = 0, distr = "rnorm", mean = 10, sd = 5) 93 | Dset <- set.DAG(D) 94 | 95 | #--------------------------------------------------------------------------------------- 96 | # EXAMPLE 4: Define 34 Bernoulli nodes, or 2 Bernoulli nodes over 17 time points, 97 | #--------------------------------------------------------------------------------------- 98 | t_end <- 16 99 | D <- DAG.empty() 100 | D <- D + 101 | node("L2", t = 0:t_end, distr = "rbinom", 102 | prob = ifelse(t == t_end, 0.5, 0.1), size = 1) + 103 | node("L1", t = 0:t_end, distr = "rbinom", 104 | prob = ifelse(L2[0] == 1, 0.5, 0.1), size = 1) 105 | Dset <- set.DAG(D) 106 | sim(Dset, n=10) 107 | 108 | #--------------------------------------------------------------------------------------- 109 | # EXAMPLE 5: Defining new distribution function 'rbern', defining and passing a custom 110 | # vectorized node function 'customfun' 111 | #--------------------------------------------------------------------------------------- 112 | rbern <- function(n, prob) { # defining a bernoulli wrapper based on R rbinom function 113 | rbinom(n = n, prob = prob, size = 1) 114 | } 115 | customfun <- function(arg, lambda) { 116 | res <- ifelse(arg == 1, lambda, 0.1) 117 | res 118 | } 119 | D <- DAG.empty() 120 | D <- D + 121 | node("W1", distr = "rbern", prob = 0.05) + 122 | node("W2", distr = "rbern", prob = customfun(W1, 0.5)) + 123 | node("W3", distr = "rbern", prob = ifelse(W1 == 1, 0.5, 0.1)) 124 | D1d <- set.DAG(D, vecfun = c("customfun")) 125 | sim(D1d, n = 10, rndseed = 1) 126 | 127 | #--------------------------------------------------------------------------------------- 128 | # EXAMPLE 6: Defining latent variables I and U.Y (will be hidden from simulated data) 129 | #--------------------------------------------------------------------------------------- 130 | D <- DAG.empty() 131 | D <- D + 132 | node("I", 133 | distr = "rcat.b1", 134 | probs = c(0.1, 0.2, 0.2, 0.2, 0.1, 0.1, 0.1)) + 135 | node("W1", 136 | distr = "rnorm", 137 | mean = ifelse(I == 1, 0, ifelse(I == 2, 3, 10)) + 0.6 * I, 138 | sd = 1) + 139 | node("W2", 140 | distr = "runif", 141 | min = 0.025*I, max = 0.7*I) + 142 | node("W3", 143 | distr = "rbern", 144 | prob = plogis(-0.5 + 0.7*W1 + 0.3*W2 - 0.2*I)) + 145 | node("A", 146 | distr = "rbern", 147 | prob = plogis(+4.2 - 0.5*W1 + 0.2*W2/2 + 0.2*W3)) + 148 | node("U.Y", distr = "rnorm", mean = 0, sd = 1) + 149 | node("Y", 150 | distr = "rconst", 151 | const = -0.5 + 1.2*A + 0.1*W1 + 0.3*W2 + 0.2*W3 + 0.2*I + U.Y) 152 | Dset1 <- set.DAG(D, latent.v = c("I", "U.Y")) 153 | sim(Dset1, n = 10, rndseed = 1) 154 | 155 | #--------------------------------------------------------------------------------------- 156 | # EXAMPLE 7: Multivariate random variables 157 | #--------------------------------------------------------------------------------------- 158 | if (requireNamespace("mvtnorm", quietly = TRUE)) { 159 | D <- DAG.empty() 160 | # 2 dimensional normal (uncorrelated), using rmvnorm function from rmvnorm package: 161 | D <- D + 162 | node(c("X1","X2"), distr = "mvtnorm::rmvnorm", 163 | asis.params = list(mean = "c(0,1)")) + 164 | # Can define a degenerate (rconst) multivariate node: 165 | node(c("X1.copy", "X2.copy"), distr = "rconst", const = c(X1, X2)) 166 | Dset1 <- set.DAG(D, verbose = TRUE) 167 | sim(Dset1, n = 10) 168 | } 169 | 170 | # On the other hand this syntax wont work, 171 | # since simcausal will parse c(0,1) into a two column matrix: 172 | \dontrun{ 173 | D <- DAG.empty() 174 | D <- D + node(c("X1","X2"), distr = "mvtnorm::rmvnorm", mean = c(0,1)) 175 | Dset1 <- set.DAG(D, verbose = TRUE) 176 | } 177 | 178 | if (requireNamespace("mvtnorm", quietly = TRUE)) { 179 | D <- DAG.empty() 180 | # Bivariate normal (correlation coef 0.75): 181 | D <- D + 182 | node(c("X1","X2"), distr = "mvtnorm::rmvnorm", 183 | asis.params = list(mean = "c(0,1)", 184 | sigma = "matrix(c(1,0.75,0.75,1), ncol=2)")) + 185 | # Can use any component of such multivariate nodes when defining new nodes: 186 | node("A", distr = "rconst", const = 1 - X1) 187 | Dset2 <- set.DAG(D, verbose = TRUE) 188 | sim(Dset2, n = 10) 189 | } 190 | 191 | # Time-varying multivar node (3 time-points, 2 dimensional normal) 192 | # plus changing the mean over time, as as function of t: 193 | if (requireNamespace("mvtnorm", quietly = TRUE)) { 194 | D <- DAG.empty() 195 | D <- D + 196 | node(c("X1", "X2"), t = 0:2, distr = "mvtnorm::rmvnorm", 197 | asis.params = list( 198 | mean = "c(0,1) + t", 199 | sigma = "matrix(rep(0.75,4), ncol=2)")) 200 | Dset5b <- set.DAG(D) 201 | sim(Dset5b, n = 10) 202 | } 203 | 204 | # Two ways to define the same bivariate uniform copula: 205 | if (requireNamespace("copula", quietly = TRUE)) { 206 | D <- DAG.empty() 207 | D <- D + 208 | # with a warning since normalCopula() returns an object unknown to simcausal: 209 | node(c("X1","X2"), distr = "copula::rCopula", 210 | copula = eval(copula::normalCopula(0.75, dim = 2))) + 211 | # same, as above: 212 | node(c("X3","X4"), distr = "copula::rCopula", 213 | asis.params = list(copula = "copula::normalCopula(0.75, dim = 2)")) 214 | vecfun.add("qbinom") 215 | # Bivariate binomial derived from previous copula, with same correlation: 216 | D <- D + 217 | node(c("A.Bin1", "A.Bin2"), distr = "rconst", 218 | const = c(qbinom(X1, 10, 0.5), qbinom(X2, 15, 0.7))) 219 | Dset3 <- set.DAG(D) 220 | sim(Dset3, n = 10) 221 | } 222 | 223 | # Same as "A.Bin1" and "A.Bin2", but directly using rmvbin function in bindata package: 224 | if (requireNamespace("bindata", quietly = TRUE)) { 225 | D <- DAG.empty() 226 | D <- D + 227 | node(c("B.Bin1","B.Bin2"), distr = "bindata::rmvbin", 228 | asis.params = list( 229 | margprob = "c(0.5, 0.5)", 230 | bincorr = "matrix(c(1,0.75,0.75,1), ncol=2)")) 231 | Dset4b <- set.DAG(D) 232 | sim(Dset4b, n = 10) 233 | } 234 | 235 | #--------------------------------------------------------------------------------------- 236 | # EXAMPLE 8: Combining simcausal non-standard evaluation with eval() forced evaluation 237 | #--------------------------------------------------------------------------------------- 238 | coefAi <- 1:10 239 | D <- DAG.empty() 240 | D <- D + 241 | node("A", t = 1, distr = "rbern", prob = 0.7) + 242 | node("A", t = 2:10, distr = "rconst", const = eval(coefAi[t]) * A[t-1]) 243 | Dset8 <- set.DAG(D) 244 | sim(Dset8, n = 10) 245 | 246 | #--------------------------------------------------------------------------------------- 247 | # TWO equivalent ways of creating a multivariate node (combining nodes W1 and W2): 248 | #--------------------------------------------------------------------------------------- 249 | D <- DAG.empty() 250 | D <- D + node("W1", distr = "rbern", prob = 0.45) 251 | D <- D + node("W2", distr = "rconst", const = 1) 252 | 253 | # option 1: 254 | D <- D + node(c("W1.copy1", "W2.copy1"), distr = "rconst", const = c(W1, W2)) 255 | 256 | # equivalent option 2: 257 | create_mat <- function(W1, W2) cbind(W1, W2) 258 | vecfun.add("create_mat") 259 | D <- D + node(c("W1.copy2", "W2.copy2"), distr = "rconst", const = create_mat(W1, W2)) 260 | 261 | Dset <- set.DAG(D) 262 | sim(Dset, n=10, rndseed=1) 263 | -------------------------------------------------------------------------------- /vignettes/SimCausal_2014.bib: -------------------------------------------------------------------------------- 1 | @article{watts1998collective, 2 | title={Collective Dynamics of ‘Small-World’ Networks}, 3 | author={Watts, Duncan J and Strogatz, Steven H}, 4 | journal={Nature}, 5 | volume={393}, 6 | number={6684}, 7 | pages={440--442}, 8 | year={1998}, 9 | publisher={Nature Publishing Group} 10 | } 11 | @article{barabasi1999emergence, 12 | title={Emergence of Scaling in Random Networks}, 13 | author={Barab{\'a}si, Albert-L{\'a}szl{\'o} and Albert, R{\'e}ka}, 14 | journal={Science}, 15 | volume={286}, 16 | number={5439}, 17 | pages={509--512}, 18 | year={1999}, 19 | publisher={American Association for the Advancement of Science} 20 | } 21 | @article{renyi1959random, 22 | title={On Random Graphs}, 23 | author={Renyi, A and Erdos, P}, 24 | journal={Publicationes Mathematicae}, 25 | volume={6}, 26 | number={290-297}, 27 | pages={5}, 28 | year={1959} 29 | } 30 | @article{bollen1989structural, 31 | title={Structural Equations with Latent Variables}, 32 | author={Bollen Kenneth, A}, 33 | journal={New York Wiley}, 34 | year={1989} 35 | } 36 | @article{robins1998marginal, 37 | title = {{Marginal Structural Models}}, 38 | author = {Robins, J M}, 39 | journal = {1997 proceedings of the American Statistical Association, section on Bayesian statistical science}, 40 | pages = {1--10}, 41 | url = {http://www.biostat.harvard.edu/~robins/research.html}, 42 | year = {1998} 43 | } 44 | @article{neugebauer2015, 45 | title = {{High-Dimensional Propensity Score Algorithm in Comparative Effectiveness Research with Time-Varying Interventions}}, 46 | author = {Neugebauer, Romain and Schmittdiel, Julie A and Zhu, Zheng and Rassen, Jeremy A and Seeger, John D and Schneeweiss, Sebastian}, 47 | doi = {10.1002/sim.6377}, 48 | issn = {1097-0258}, 49 | journal = {Statistics in Medicine}, 50 | keywords = {Super Learning,comparative effectiveness,diabetes,high-dimensional propensity score,inverse probability weighting,marginal structural model}, 51 | number = {5}, 52 | pages = {753--781}, 53 | url = {http://dx.doi.org/10.1002/sim.6377}, 54 | volume = {34}, 55 | year = {2015} 56 | } 57 | @article{pearl1995, 58 | title = {{Causal Diagrams for Empirical Research}}, 59 | author = {Pearl, Judea}, 60 | journal = {Biometrika}, 61 | number = {4}, 62 | pages = {669--688}, 63 | publisher = {Biometrika Trust}, 64 | volume = {82}, 65 | year = {1995} 66 | } 67 | @article{imbens2004, 68 | title = {{Nonparametric Estimation of Average Treatment Effects under Exogeneity: A Review}}, 69 | author = {Imbens, Guido W}, 70 | journal = {Review of Economics and Statistics}, 71 | number = {1}, 72 | pages = {4--29}, 73 | publisher = {MIT Press}, 74 | volume = {86}, 75 | year = {2004} 76 | } 77 | @incollection{ATETshpitser, 78 | title = {{Effects of Treatment on the Treated: Identification and Generalization}}, 79 | address = {Montreal, Quebec}, 80 | author = {Shpitser, I and Pearl, J}, 81 | booktitle = {Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence}, 82 | pages = {514--521}, 83 | publisher = {AUAI Press}, 84 | year = {2009} 85 | } 86 | @article{pearl2010, 87 | title = {{An Introduction to Causal Inference}}, 88 | author = {Pearl, Judea}, 89 | journal = {The International Journal of Biostatistics}, 90 | number = {2}, 91 | volume = {6}, 92 | year = {2010} 93 | } 94 | @article{petersen2006, 95 | title = {{Estimation of Direct Causal Effects}}, 96 | author = {Petersen, Maya L and Sinisi, Sandra E and van der Laan, Mark J}, 97 | journal = {Epidemiology}, 98 | number = {3}, 99 | pages = {276--284}, 100 | publisher = {LWW}, 101 | volume = {17}, 102 | year = {2006} 103 | } 104 | @article{hafeman2011, 105 | title = {{Alternative Assumptions for the Identification of Direct and Indirect Effects}}, 106 | author = {Hafeman, Danella M and VanderWeele, Tyler J}, 107 | journal = {Epidemiology}, 108 | number = {6}, 109 | pages = {753--764}, 110 | publisher = {LWW}, 111 | volume = {22}, 112 | year = {2011} 113 | } 114 | @article{ltmleMSMs2014, 115 | title = {{Targeted Maximum Likelihood Estimation for Dynamic and Static Longitudinal Marginal Structural Working Models}}, 116 | abstract = {This paper describes a targeted maximum likelihood estimator (TMLE) for the parameters of longitudinal static and dynamic marginal structural models. We consider a longitudinal data structure consisting of baseline covariates, time-dependent intervention nodes, intermediate time-dependent covariates, and a possibly time-dependent outcome. The intervention nodes at each time point can include a binary treatment as well as a right-censoring indicator. Given a class of dynamic or static interventions, a marginal structural model is used to model the mean of the intervention-specific counterfactual outcome as a function of the intervention, time point, and possibly a subset of baseline covariates. Because the true shape of this function is rarely known, the marginal structural model is used as a working model. The causal quantity of interest is defined as the projection of the true function onto this working model. Iterated conditional expectation double robust estimators for marginal structural model parameters were previously proposed by Robins (2000, 2002) and Bang and Robins (2005). Here we build on this work and present a pooled TMLE for the parameters of marginal structural working models. We compare this pooled estimator to a stratified TMLE (Schnitzer et al. 2014) that is based on estimating the intervention-specific mean separately for each intervention of interest. The performance of the pooled TMLE is compared to the performance of the stratified TMLE and the performance of inverse probability weighted (IPW) estimators using simulations. Concepts are illustrated using an example in which the aim is to estimate the causal effect of delayed switch following immunological failure of first line antiretroviral therapy among HIV-infected patients. Data from the International Epidemiological Databases to Evaluate AIDS, Southern Africa are analyzed to investigate this question using both TML and IPW estimators. Our results demonstrate practical advantages of the pool}, 117 | author = {Petersen, Maya and Schwab, Joshua and van der Laan, Mark and Gruber, Susan and Blaser, Nello and Schomaker, Michael}, 118 | journal = {Journal of Causal Inference}, 119 | month = sep, 120 | number = {2}, 121 | pages = {39}, 122 | url = {http://ideas.repec.org/a/bpj/causin/v2y2014i2p39n1.html}, 123 | volume = {2}, 124 | year = {2014} 125 | } 126 | @article{holland1986, 127 | title = {{Statistics and Causal Inference}}, 128 | author = {Holland, Paul W}, 129 | doi = {10.1080/01621459.1986.10478354}, 130 | journal = {Journal of the American Statistical Association}, 131 | number = {396}, 132 | pages = {945--960}, 133 | volume = {81}, 134 | year = {1986} 135 | } 136 | @article{joffe2007defining, 137 | title = {{Defining and Estimating Intervention Effects for Groups That Will Develop an Auxiliary Outcome}}, 138 | author = {Joffe, Marshall M and Small, Dylan and Hsu, Chi-Yuan and Others}, 139 | journal = {Statistical Science}, 140 | number = {1}, 141 | pages = {74--97}, 142 | publisher = {Institute of Mathematical Statistics}, 143 | volume = {22}, 144 | year = {2007} 145 | } 146 | @article{greene2013balanced, 147 | title = {{The Balanced Survivor Average Causal Effect}}, 148 | author = {Greene, Tom and Joffe, Marshall and Hu, Bo and Li, Liang and Boucher, Ken}, 149 | journal = {The International Journal of Biostatistics}, 150 | number = {2}, 151 | pages = {291--306}, 152 | volume = {9}, 153 | year = {2013} 154 | } 155 | @article{VanderWeele2014, 156 | title = {{Mediation Analysis with Multiple Mediators}}, 157 | abstract = {Recent advances in the causal inference literature on mediation have extended traditional approaches to direct and indirect effects to settings that allow for interactions and non-linearities. In this paper, these approaches from causal inference are further extended to settings in which multiple mediators may be of interest. Two analytic approaches, one based on regression and one based on weighting are proposed to estimate the effect mediated through multiple mediators and the effects through other pathways. The approaches proposed here accommodate exposure-mediator interactions and, to a certain extent, mediator-mediator interactions as well. The methods handle binary or continuous mediators and binary, continuous or count outcomes. When the mediators affect one another, the strategy of trying to assess direct and indirect effects one mediator at a time will in general fail; the approach given in this paper can still be used. A characterization is moreover given as to when the sum of the mediated effects for multiple mediators considered separately will be equal to the mediated effect of all of the mediators considered jointly. The approach proposed in this paper is robust to unmeasured common causes of two or more mediators.}, 158 | author = {VanderWeele, T J and Vansteelandt, S}, 159 | doi = {10.1515/em-2012-0010}, 160 | isbn = {2194-9263; 2161-962X}, 161 | journal = {Epidemiologic methods}, 162 | number = {1}, 163 | pages = {95--115}, 164 | url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4287269/}, 165 | volume = {2}, 166 | year = {2014} 167 | } 168 | @article{neugebauer2014, 169 | title = {{Targeted Learning in Real-World Comparative Effectiveness Research with Time-Varying Interventions}}, 170 | author = {Neugebauer, Romain and Schmittdiel, Julie A. and van der Laan, Mark J.}, 171 | journal = {Statistics in Medicine}, 172 | number = {14}, 173 | pages = {2480--2520}, 174 | volume = {33}, 175 | year = {2014}, 176 | issn = {1097-0258}, 177 | url = {http://dx.doi.org/10.1002/sim.6099}, 178 | doi = {10.1002/sim.6099} 179 | } 180 | @article{vdL2012a, 181 | title = {{Targeted Minimum Loss Based Estimation of Causal Effects of Multiple Time Point Interventions}}, 182 | abstract = {We consider estimation of the effect of a multiple time point intervention on an outcome of interest, where the intervention nodes are subject to time-dependent confounding by intermediate covariates. In previous work van der Laan (2010) and Stitelman and van der Laan (2011a) developed and implemented a closed form targeted maximum likelihood estimator (TMLE) relying on the log-likelihood loss function, and demonstrated important gains relative to inverse probability of treatment weighted estimators and estimating equation based estimators. This TMLE relies on an initial estimator of the entire probability distribution of the longitudinal data structure. To enhance the finite sample performance of the TMLE of the target parameter it is of interest to select the smallest possible relevant part of the data generating distribution, which is estimated and updated by TMLE. Inspired by this goal, we develop a new closed form TMLE of an intervention specific mean outcome based on general longitudinal data structures. The target parameter is represented as an iterative sequence of conditional expectations of the outcome of interest. This collection of conditional means represents the relevant part, which is estimated and updated using the general TMLE algorithm. We also develop this new TMLE for other causal parameters, such as parameters defined by working marginal structural models. The theoretical properties of the TMLE are also practically demonstrated with a small scale simulation study.The proposed TMLE is building upon a previously proposed estimator Bang and Robins (2005) by integrating some of its key and innovative ideas into the TMLE framework.}, 183 | author = {van der Laan, Mark J. and Gruber, Susan}, 184 | doi = {10.1515/1557-4679.1370}, 185 | issn = {1557-4679}, 186 | journal = {The International Journal of Biostatistics}, 187 | number = {1}, 188 | pmid = {22611591}, 189 | volume = {8}, 190 | year = {2012} 191 | } 192 | @article{eckles2014design, 193 | title = {{Design and Analysis of Experiments in Networks: Reducing Bias from Interference}}, 194 | author = {Eckles, Dean and Karrer, Brian and Ugander, Johan}, 195 | journal = {arXiv preprint arXiv:1404.7530}, 196 | year = {2014} 197 | } 198 | @inproceedings{PearlDirInd, 199 | title = {{Direct and Indirect Effects}}, 200 | address = {San Francisco, CA, USA}, 201 | author = {Pearl, Judea}, 202 | booktitle = {Proceedings of the Seventeenth Conference on Uncertainty in Artificial Intelligence}, 203 | isbn = {1-55860-800-1}, 204 | pages = {411--420}, 205 | publisher = {Morgan Kaufmann Publishers Inc.}, 206 | series = {UAI'01}, 207 | url = {http://dl.acm.org/citation.cfm?id=2074022.2074073}, 208 | year = {2001} 209 | } 210 | @book{Pearl2009, 211 | title = {{Causality: Models, Reasoning and Inference}}, 212 | address = {New York, NY, USA}, 213 | author = {Pearl, Judea}, 214 | edition = {2nd}, 215 | isbn = {052189560X, 9780521895606}, 216 | publisher = {Cambridge University Press}, 217 | year = {2009} 218 | } 219 | @book{sarndal2003model, 220 | title = {{Model Assisted Survey Sampling}}, 221 | author = {S\"{a}rndal, Carl-Erik and Swensson, Bengt and Wretman, Jan}, 222 | publisher = {Springer Science \& Business Media}, 223 | year = {2003} 224 | } 225 | @article{neugebauer2007, 226 | title = {{Nonparametric Causal Effects Based on Marginal Structural Models}}, 227 | author = {Neugebauer, Romain and van der Laan, Mark}, 228 | journal = {Journal of Statistical Planning and Inference}, 229 | number = {2}, 230 | pages = {419--434}, 231 | publisher = {Elsevier}, 232 | volume = {137}, 233 | year = {2007} 234 | } 235 | @article{vanderweele2009, 236 | title = {{Marginal Structural Models for the Estimation of Direct and Indirect Effects}}, 237 | author = {VanderWeele, Tyler J}, 238 | journal = {Epidemiology}, 239 | number = {1}, 240 | pages = {18--26}, 241 | publisher = {LWW}, 242 | volume = {20}, 243 | year = {2009} 244 | } 245 | @article{lefebvre2008, 246 | title = {{Impact of Mis-Specification of the Treatment Model on Estimates from a Marginal Structural Model}}, 247 | author = {Lefebvre, G and Delaney, J A and Platt, R W}, 248 | journal = {Statistics in Medicine}, 249 | publisher = {John Wiley & Sons, Ltd.}, 250 | number = {18}, 251 | pages = {3629--3642}, 252 | volume = {27}, 253 | year = {2008}, 254 | issn = {1097-0258}, 255 | url = {http://dx.doi.org/10.1002/sim.3200}, 256 | doi = {10.1002/sim.3200} 257 | } 258 | --------------------------------------------------------------------------------