├── .Rbuildignore
├── .github
    ├── .gitignore
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   └── issue_template.md
    ├── SUPPORT.md
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── cancel.yaml
    │   ├── covr.yaml
    │   ├── pkgdown.yaml
    │   ├── pr-commands.yml
    │   ├── readme.yaml
    │   ├── release-checks-manual.yml
    │   ├── touchstone-comment.yaml
    │   ├── touchstone-receive.yaml
    │   └── update-citation-cff.yaml
├── .gitignore
├── .lintr
├── CITATION.cff
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── RcppExports.R
    ├── add_correlated_data.R
    ├── add_data.R
    ├── asserts.R
    ├── conditions.R
    ├── define_data.R
    ├── generate_correlated_data.R
    ├── generate_data.R
    ├── generate_dist.R
    ├── glue.R
    ├── group_data.R
    ├── int_rmult.R
    ├── internal_utility.R
    ├── missing_data.R
    ├── simstudy-package.R
    ├── utility.R
    └── zzz.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── bench
    └── define.R
├── codecov.yml
├── codemeta.json
├── cran-comments.md
├── inst
    └── CITATION
├── man
    ├── addColumns.Rd
    ├── addCompRisk.Rd
    ├── addCondition.Rd
    ├── addCorData.Rd
    ├── addCorFlex.Rd
    ├── addCorGen.Rd
    ├── addDataDensity.Rd
    ├── addMarkov.Rd
    ├── addMultiFac.Rd
    ├── addPeriods.Rd
    ├── addSynthetic.Rd
    ├── betaGetShapes.Rd
    ├── blockDecayMat.Rd
    ├── blockExchangeMat.Rd
    ├── catProbs.Rd
    ├── defCondition.Rd
    ├── defData.Rd
    ├── defDataAdd.Rd
    ├── defMiss.Rd
    ├── defRead.Rd
    ├── defReadAdd.Rd
    ├── defReadCond.Rd
    ├── defRepeat.Rd
    ├── defRepeatAdd.Rd
    ├── defSurv.Rd
    ├── delColumns.Rd
    ├── distributions.Rd
    ├── gammaGetShapeRate.Rd
    ├── genCatFormula.Rd
    ├── genCluster.Rd
    ├── genCorData.Rd
    ├── genCorFlex.Rd
    ├── genCorGen.Rd
    ├── genCorMat.Rd
    ├── genCorOrdCat.Rd
    ├── genData.Rd
    ├── genDataDensity.Rd
    ├── genDummy.Rd
    ├── genFactor.Rd
    ├── genFormula.Rd
    ├── genMarkov.Rd
    ├── genMiss.Rd
    ├── genMixFormula.Rd
    ├── genMultiFac.Rd
    ├── genNthEvent.Rd
    ├── genObs.Rd
    ├── genOrdCat.Rd
    ├── genSpline.Rd
    ├── genSurv.Rd
    ├── genSynthetic.Rd
    ├── iccRE.Rd
    ├── logisticCoefs.Rd
    ├── mergeData.Rd
    ├── negbinomGetSizeProb.Rd
    ├── simstudy-deprecated.Rd
    ├── simstudy-package.Rd
    ├── survGetParams.Rd
    ├── survParamPlot.Rd
    ├── trimData.Rd
    ├── trtAssign.Rd
    ├── trtObserve.Rd
    ├── trtStepWedge.Rd
    ├── updateDef.Rd
    ├── updateDefAdd.Rd
    ├── viewBasis.Rd
    └── viewSplines.Rd
├── paper
    ├── paper.md
    └── simstudy.bib
├── src
    ├── RcppExports.cpp
    └── srcRcpp.cpp
├── tests
    ├── .lintr
    ├── testthat.R
    └── testthat
    │   ├── helper-gen_def.R
    │   ├── setup-general.R
    │   ├── teardown-general.R
    │   ├── test-actual-distributions.R
    │   ├── test-add_data.R
    │   ├── test-asserts.R
    │   ├── test-conditions.R
    │   ├── test-define_data.R
    │   ├── test-generate_correlated_data.R
    │   ├── test-generate_data.R
    │   ├── test-generate_dist.R
    │   ├── test-glue.R
    │   ├── test-group_data.R
    │   ├── test-internal_utility.R
    │   ├── test-missing_data.R
    │   ├── test-survival.R
    │   └── test-utility.R
├── touchstone
    ├── .gitignore
    ├── config.json
    ├── footer.R
    ├── header.R
    └── script.R
└── vignettes
    ├── clustered.Rmd
    ├── corelationmat.Rmd
    ├── correlated.Rmd
    ├── customdist.Rmd
    ├── double_dot_extension.Rmd
    ├── logisticCoefs.Rmd
    ├── longitudinal.Rmd
    ├── missing.Rmd
    ├── ordinal.Rmd
    ├── simstudy.Rmd
    ├── spline.Rmd
    ├── survival.Rmd
    └── treat_and_exposure.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^LICENSE$
 4 | ^cran-comments\.md$
 5 | ^README\.Rmd$
 6 | ^README-.*\.png$
 7 | ^Working-code$
 8 | ^Demo$
 9 | ^CRAN-RELEASE$
10 | ^doc$
11 | ^docs$
12 | ^Meta$
13 | ^\.github$
14 | ^codecov\.yml$
15 | ^_pkgdown\.yml$
16 | ^pkgdown$
17 | ^\.lintr$
18 | ^tests/\.lintr$
19 | ^File_management$
20 | ^simstudy\.code-workspace$
21 | ^codemeta\.json$
22 | ^paper$
23 | ^touchstone$
24 | ^bench$
25 | ^CITATION\.cff$
26 | ^README\.md$
27 | ^NEWS\.md$
28 | ^CRAN-SUBMISSION$
29 | ^revdep$
30 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity and
 10 | orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 | and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the overall
 26 | community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 | advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 | address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 | professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards
 42 | of acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies
 54 | when an individual is officially representing the community in public spaces.
 55 | 
 56 | ## Enforcement
 57 | 
 58 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 59 | reported to the community leaders responsible for enforcement at
 60 | keith.goldfeld@nyumc.org. All complaints will be reviewed and investigated
 61 | promptly and fairly.
 62 | 
 63 | All community leaders are obligated to respect the privacy and security of the
 64 | reporter of any incident.
 65 | 
 66 | ## Enforcement Guidelines
 67 | 
 68 | Community leaders will follow these Community Impact Guidelines in determining
 69 | the consequences for any action they deem in violation of this Code of Conduct:
 70 | 
 71 | ### 1. Correction
 72 | 
 73 | **Community Impact**: Use of inappropriate language or other behavior deemed
 74 | unprofessional or unwelcome in the community.
 75 | 
 76 | **Consequence**: A private, written warning from community leaders, providing
 77 | clarity around the nature of the violation and an explanation of why the
 78 | behavior was inappropriate. A public apology may be requested.
 79 | 
 80 | ### 2. Warning
 81 | 
 82 | **Community Impact**: A violation through a single incident or series of
 83 | actions.
 84 | 
 85 | **Consequence**: A warning with consequences for continued behavior. No
 86 | interaction with the people involved, including unsolicited interaction with
 87 | those enforcing the Code of Conduct, for a specified period of time. This
 88 | includes avoiding interactions in community spaces as well as external channels
 89 | like social media. Violating these terms may lead to a temporary or permanent
 90 | ban.
 91 | 
 92 | ### 3. Temporary Ban
 93 | 
 94 | **Community Impact**: A serious violation of community standards, including
 95 | sustained inappropriate behavior.
 96 | 
 97 | **Consequence**: A temporary ban from any sort of interaction or public
 98 | communication with the community for a specified period of time. No public or
 99 | private interaction with the people involved, including unsolicited interaction
100 | with those enforcing the Code of Conduct, is allowed during this period.
101 | Violating these terms may lead to a permanent ban.
102 | 
103 | ### 4. Permanent Ban
104 | 
105 | **Community Impact**: Demonstrating a pattern of violation of community
106 | standards, including sustained inappropriate behavior, harassment of an
107 | individual, or aggression toward or disparagement of classes of individuals.
108 | 
109 | **Consequence**: A permanent ban from any sort of public interaction within the
110 | community.
111 | 
112 | ## Attribution
113 | 
114 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
115 | version 2.0,
116 | available at https://www.contributor-covenant.org/version/2/0/
117 | code_of_conduct.html.
118 | 
119 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
120 | enforcement ladder](https://github.com/mozilla/diversity).
121 | 
122 | [homepage]: https://www.contributor-covenant.org
123 | 
124 | For answers to common questions about this code of conduct, see the FAQ at
125 | https://www.contributor-covenant.org/faq. Translations are available at https://
126 | www.contributor-covenant.org/translations.
127 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to simstudy
 2 | 
 3 | This outlines how to propose a change to simstudy. 
 4 | For more detailed info about contributing to this package please see the well written
 5 | [**tidyverse development contributing guide**](https://rstd.io/tidy-contrib).
 6 | The only difference is, that simstudy is not developed in bursts and we should
 7 | be able to answer your issues relatively quickly! Please do keep in mind that this
 8 | is a volunteer project though.
 9 | 
10 | ## Fixing typos
11 | 
12 | You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file. 
13 | This generally means you'll need to edit [roxygen2 comments](https://roxygen2.r-lib.org/articles/roxygen2.html) in an `.R`, not a `.Rd` file. 
14 | You can find the `.R` file that generates the `.Rd` by reading the comment in the first line.
15 | 
16 | ## Bigger changes
17 | 
18 | If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed. 
19 | If you’ve found a bug, please file an issue that illustrates the bug with a minimal 
20 | [reprex](https://www.tidyverse.org/help/#reprex) (this will also help you write a unit test, if needed).
21 | 
22 | ### Pull request process
23 | 
24 | *   Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis::create_from_github("kgoldfeld/simstudy", fork = TRUE)`.
25 | 
26 | *   Install all development dependences with `devtools::install_dev_deps()`, and then make sure the package passes R CMD check by running `devtools::check()`. 
27 |     If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing. 
28 | *   Create a Git branch for your pull request (PR). We recommend using `usethis::pr_init("brief-description-of-change")`.
29 | 
30 | *   Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser.
31 |     The title of your PR should briefly describe the change.
32 |     The body of your PR should contain `Fixes #issue-number`.
33 | 
34 | *  For user-facing changes, add a bullet to the top of `NEWS.md` (i.e. just below the first header). Follow the style described in <https://style.tidyverse.org/news.html>.
35 | 
36 | ### Code style
37 | 
38 | *   New code should follow the tidyverse [style
39 |     guide](https://style.tidyverse.org) but with camelCase instead of snake_case. 
40 |     You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR.  
41 | 
42 | *  We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/rd-formatting.html), for documentation.  
43 | 
44 | *  We use [testthat](https://cran.r-project.org/package=testthat) for unit tests. 
45 |    Contributions with test cases included are easier to accept.  
46 | 
47 | ## Code of Conduct
48 | 
49 | Please note that the simstudy project is released with a
50 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this
51 | project you agree to abide by its terms.
52 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/issue_template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report or feature request
 3 | about: Describe a bug you've seen or make a case for a new feature
 4 | ---
 5 | 
 6 | Please briefly describe your problem and what output you expect. 
 7 | 
 8 | Please include a minimal reproducible example (AKA a reprex). If you've never heard of a [reprex](http://reprex.tidyverse.org/) before, start by reading <https://www.tidyverse.org/help/#reprex>.
 9 | 
10 | Brief description of the problem
11 | 
12 | ```r
13 | # insert reprex here
14 | ```
15 | 


--------------------------------------------------------------------------------
/.github/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Getting help with simstudy
 2 | 
 3 | Thanks for using simstudy!
 4 | Before filing an issue, there are a few places to explore and pieces to put together to make the process as smooth as possible.
 5 | 
 6 | ## Make a reprex
 7 | 
 8 | Start by making a minimal **repr**oducible **ex**ample using the  [reprex](https://reprex.tidyverse.org/) package. 
 9 | If you haven't heard of or used reprex before, you're in for a treat! 
10 | Seriously, reprex will make all of your R-question-asking endeavors easier (which is a pretty insane ROI for the five to ten minutes it'll take you to learn what it's all about). 
11 | For additional reprex pointers, check out the [Get help!](https://www.tidyverse.org/help/) section of the tidyverse site.
12 | 
13 | ## Where to ask?
14 | 
15 | Armed with your reprex, the next step is to [search issues and pull requests](https://github.com/kgoldfeld/simstudy/issues) to make sure the question/bug hasn't been asked/reported and/or already fixed in the development version. 
16 | By default, the search will be pre-populated with `is:issue is:open`. 
17 | You can [edit the qualifiers](https://help.github.com/articles/searching-issues-and-pull-requests/)  (e.g. `is:pr`, `is:closed`) as needed. 
18 | For example, you'd simply remove `is:open` to search _all_ issues in the repo, open or closed.
19 | 
20 | ## What happens next?
21 | 
22 | We will try to respond to all issues in a reasonable time but please keep in
23 | mind that this is a volunteer effort.
24 | If we can’t reproduce the bug, we can’t fix it!
25 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | name: R-CMD-check
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |   workflow_dispatch:
11 |     inputs:
12 |       git-ref:
13 |         description: Git Ref (Optional)    
14 |         required: false 
15 | 
16 | jobs:
17 |   R-CMD-check:
18 |     runs-on: ${{ matrix.config.os }}
19 | 
20 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
21 | 
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         config:
26 |           - {os: windows-latest, r: 'release'}
27 |           - {os: windows-latest, r: 'oldrel'}
28 |           - {os: macos-latest, r: 'release'}
29 |           - {os: macos-latest, r: 'oldrel'}
30 |           - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
31 |           - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
32 | 
33 |     env:
34 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
35 |       RSPM: ${{ matrix.config.rspm }}
36 | 
37 |     steps:
38 |       - name: Clone Repository
39 |         uses: actions/checkout@v3
40 |         with:
41 |           ref: ${{ github.event.inputs.git-ref || github.sha }}
42 |           
43 |       - uses: r-lib/actions/setup-r@v2
44 |         with:
45 |           r-version: ${{ matrix.config.r }}
46 |       - uses: r-lib/actions/setup-pandoc@v2
47 |       - uses: r-lib/actions/setup-r-dependencies@v2
48 |         with:
49 |           extra-packages: |
50 |             rcmdcheck
51 |             hedgehogqa/r-hedgehog
52 |       - uses: r-lib/actions/check-r-package@v2


--------------------------------------------------------------------------------
/.github/workflows/cancel.yaml:
--------------------------------------------------------------------------------
 1 | # https://github.com/styfle/cancel-workflow-action
 2 | name: Cancel
 3 | on:
 4 |   workflow_run:
 5 |     workflows: ["Continuous Benchmarks (Comment)", "Continuous Benchmarks (Receive)", "code-coverage", "pkgdown", "R-CMD-check"] # list by name as in `name:` of the workflow file
 6 |     types:
 7 |       - requested
 8 | jobs:
 9 |   cancel:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |     - uses: styfle/cancel-workflow-action@0.9.1
13 |       with:
14 |         workflow_id: ${{ github.event.workflow.id }}
15 | 


--------------------------------------------------------------------------------
/.github/workflows/covr.yaml:
--------------------------------------------------------------------------------
 1 | name: code-coverage
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: macos-latest
14 |     steps:
15 |       - uses: actions/checkout@v2
16 |       - uses: r-lib/actions/setup-r@v2
17 |         with:
18 |           r-version: 'release'
19 |       - uses: r-lib/actions/setup-pandoc@v2
20 |       - uses: r-lib/actions/setup-r-dependencies@v2
21 |         with:
22 |           extra-packages: covr
23 |       - name: Test coverage
24 |         env:
25 |           NOT_CRAN: true
26 |         run: covr::codecov()
27 |         shell: Rscript {0}
28 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: 
 4 |       - main
 5 |       - release
 6 |   workflow_dispatch:
 7 |     inputs:
 8 |       git-ref:
 9 |         description: Git Ref (Optional)    
10 |         required: false
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: macos-latest
17 |     env:
18 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
19 |     steps:
20 |       - name: Clone Repository
21 |         uses: actions/checkout@v3
22 |         with:
23 |           ref: ${{ github.event.inputs.git-ref || github.sha }}
24 |           
25 |       - uses: r-lib/actions/setup-r@v2
26 |       - uses: r-lib/actions/setup-pandoc@v2
27 |       - uses: r-lib/actions/setup-r-dependencies@v2
28 |         with:
29 |           extra-packages: | 
30 |             github::r-lib/pkgdown
31 |             local::.
32 |       - name: Deploy package
33 |         run: |
34 |           git config --local user.email "actions@github.com"
35 |           git config --local user.name "GitHub Actions"
36 |           Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
37 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yml:
--------------------------------------------------------------------------------
 1 | name: Commands
 2 | on:
 3 |   issue_comment:
 4 |     types: [created]
 5 | 
 6 | jobs:
 7 |   document:
 8 |     if: startsWith(github.event.comment.body, '/document')
 9 |     name: document
10 |     runs-on: macOS-latest
11 |     env:
12 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13 |     steps:
14 |       - uses: actions/checkout@v2
15 |       - uses: r-lib/actions/pr-fetch@v2
16 |         with:
17 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
18 |       - uses: r-lib/actions/setup-r@v2
19 |       - name: Install dependencies
20 |         run: Rscript -e 'install.packages(c("remotes", "roxygen2"))' -e 'remotes::install_deps(dependencies = TRUE)'
21 |       - name: Document
22 |         run: Rscript -e 'roxygen2::roxygenise()'
23 |       - name: commit
24 |         run: |
25 |           git config --local user.email "actions@github.com"
26 |           git config --local user.name "GitHub Actions"
27 |           git add man/\* NAMESPACE
28 |           git commit -m 'Document'
29 |       - uses: r-lib/actions/pr-push@v2
30 |         with:
31 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
32 |   style:
33 |     if: startsWith(github.event.comment.body, '/style')
34 |     name: style
35 |     runs-on: macOS-latest
36 |     env:
37 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
38 |     steps:
39 |       - uses: actions/checkout@v3
40 |       - uses: r-lib/actions/pr-fetch@v2
41 |         with:
42 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
43 |       - uses: r-lib/actions/setup-r@v2
44 |       - name: Install dependencies
45 |         run: Rscript -e 'install.packages("styler")'
46 |       - name: Style
47 |         run: Rscript -e 'styler::style_pkg()'
48 |       - name: commit
49 |         run: |
50 |           git config --local user.email "actions@github.com"
51 |           git config --local user.name "GitHub Actions"
52 |           git add \*.R
53 |           git commit -m 'Style'
54 |       - uses: r-lib/actions/pr-push@v2
55 |         with:
56 |           repo-token: ${{ secrets.GITHUB_TOKEN }}


--------------------------------------------------------------------------------
/.github/workflows/readme.yaml:
--------------------------------------------------------------------------------
 1 | name: Render README
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - README.Rmd
 7 | 
 8 | jobs:
 9 |   render:
10 |     name: Render README
11 |     runs-on: macos-latest
12 |     steps:
13 |       - uses: actions/checkout@v3
14 |       - uses: r-lib/actions/setup-r@v2
15 |       - uses: r-lib/actions/setup-pandoc@v2
16 |       - uses: r-lib/actions/setup-r-dependencies@v2
17 |         with:
18 |           extra-packages: rmarkdown
19 |       - name: Render README
20 |         run: Rscript -e 'rmarkdown::render("README.Rmd")'
21 |       - name: Commit results
22 |         run: |
23 |           git config --local user.email "actions@github.com"
24 |           git config --local user.name "GitHub Actions"
25 |           git commit README.md -m 'Re-build README.Rmd' || echo "No changes to commit"
26 |           git push origin || echo "No changes to commit"


--------------------------------------------------------------------------------
/.github/workflows/release-checks-manual.yml:
--------------------------------------------------------------------------------
 1 | name: release checks
 2 | # only works when this file is in the default branch
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       git-ref:
 7 |         description: Git Ref (Optional)    
 8 |         required: false  
 9 |       email:
10 |         description: email 
11 |         required: true
12 |         default: "keith.goldfeld@nyulangone.org"
13 |       token:
14 |         description: email validation token    
15 |         required: true
16 |       platforms:
17 |         description: platforms to check on
18 |         required: true
19 |         default: "c('windows-x86_64-devel','fedora-clang-devel','linux-x86_64-rocker-gcc-san','debian-gcc-devel-nold')"
20 | jobs:
21 |   rhub:
22 |     runs-on: macos-latest
23 |     env:
24 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
25 |     steps:
26 |       - name: Clone Repository (Latest)
27 |         uses: actions/checkout@v3
28 |         if: github.event.inputs.git-ref == ''
29 | 
30 |       - name: Clone Repository (Custom Ref)
31 |         uses: actions/checkout@v3
32 |         if: github.event.inputs.git-ref != ''
33 |         with:
34 |           ref: ${{ github.event.inputs.git-ref }}
35 |           
36 |       - uses: r-lib/actions/setup-r@v2
37 |       - uses: r-lib/actions/setup-pandoc@v2
38 |       - uses: r-lib/actions/setup-r-dependencies@v2
39 |         with:
40 |           extra-packages: rhub
41 |       - name: Run rhub checks
42 |         run: |
43 |           rhub::validate_email(email = ${{ github.event.inputs.email }}, token = ${{ github.event.inputs.token }})
44 |           rhub::check(platform = ${{ github.event.inputs.platforms }}, email = ${{ github.event.inputs.email }})
45 |         shell: Rscript {0}
46 | 
47 | 


--------------------------------------------------------------------------------
/.github/workflows/touchstone-comment.yaml:
--------------------------------------------------------------------------------
 1 | name: Continuous Benchmarks (Comment)
 2 | 
 3 | # read-write repo token
 4 | # access to secrets
 5 | on:
 6 |   workflow_run:
 7 |     workflows: ["Continuous Benchmarks (Receive)"]
 8 |     types:
 9 |       - completed
10 | 
11 | jobs:
12 |   upload:
13 |     runs-on: ubuntu-latest
14 |     if: >
15 |       ${{ github.event.workflow_run.event == 'pull_request' &&
16 |       github.event.workflow_run.conclusion == 'success' }}
17 |     steps:
18 |       - name: 'Download artifact'
19 |         uses: actions/github-script@v3.1.0
20 |         with:
21 |           script: |
22 |             var artifacts = await github.actions.listWorkflowRunArtifacts({
23 |                owner: context.repo.owner,
24 |                repo: context.repo.repo,
25 |                run_id: ${{github.event.workflow_run.id }},
26 |             });
27 |             var matchArtifact = artifacts.data.artifacts.filter((artifact) => {
28 |               return artifact.name == "pr"
29 |             })[0];
30 |             var download = await github.actions.downloadArtifact({
31 |                owner: context.repo.owner,
32 |                repo: context.repo.repo,
33 |                artifact_id: matchArtifact.id,
34 |                archive_format: 'zip',
35 |             });
36 |             var fs = require('fs');
37 |             fs.writeFileSync('${{github.workspace}}/pr.zip', Buffer.from(download.data));
38 |       - run: unzip pr.zip
39 |       - name: 'Comment on PR'
40 |         uses: actions/github-script@v3
41 |         with:
42 |           github-token: ${{ secrets.GITHUB_TOKEN }}
43 |           script: |
44 |             var fs = require('fs');
45 |             var issue_number = Number(fs.readFileSync('./NR'));
46 |             var body = fs.readFileSync('./info.txt').toString();
47 |             await github.issues.createComment({
48 |               owner: context.repo.owner,
49 |               repo: context.repo.repo,
50 |               issue_number: issue_number,
51 |               body: body
52 |             });
53 | 


--------------------------------------------------------------------------------
/.github/workflows/touchstone-receive.yaml:
--------------------------------------------------------------------------------
 1 | name: Continuous Benchmarks (Receive)
 2 | on: pull_request
 3 | jobs:
 4 |   prepare:
 5 |     runs-on: ubuntu-latest
 6 |     outputs:
 7 |       config: ${{ steps.read_touchstone_config.outputs.config }}
 8 |     steps:
 9 |       - name: Checkout repo
10 |         uses: actions/checkout@v2
11 |         with:
12 |            fetch-depth: 0
13 | 
14 |       - id: read_touchstone_config
15 |         run: |
16 |           content=`cat ./touchstone/config.json`
17 |           # the following lines are only required for multi line json
18 |           content="${content//'%'/'%25'}"
19 |           content="${content//$'\n'/'%0A'}"
20 |           content="${content//$'\r'/'%0D'}"
21 |           # end of optional handling for multi line json
22 |           echo "::set-output name=config::$content"
23 |   build:
24 |     needs: prepare
25 |     runs-on: ${{ matrix.config.os }}
26 |     strategy:
27 |       fail-fast: false
28 |       matrix:
29 |         config:
30 |           - ${{ fromJson(needs.prepare.outputs.config) }}
31 |     env:
32 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
33 |       RSPM: ${{ matrix.config.rspm }}
34 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
35 |     steps:
36 |       - name: Checkout repo
37 |         uses: actions/checkout@v2
38 |         with:
39 |            fetch-depth: 0
40 |       - name: Set up git user
41 |         run: |
42 |           git config --local user.name "GitHub Actions"
43 |           git config --local user.email "actions@github.com"
44 |       - name: Ensure base branch is fetched
45 |         run: |
46 |           git checkout -b $GITHUB_HEAD_REF # this is current ref. This is required for naming.
47 |           git branch $GITHUB_BASE_REF remotes/origin/$GITHUB_BASE_REF
48 |       - name: Setup R
49 |         uses: r-lib/actions/setup-r@v2
50 |       - name: Setup dependencies
51 |         uses: r-lib/actions/setup-r-dependencies@v2
52 |         with:
53 |           cache-version: 1
54 |           extra-packages: |
55 |             lorenzwalthert/touchstone
56 |             ggplot2
57 |             dplyr
58 |             gert
59 |       - name: Remove global installation
60 |         run: | 
61 |           pkg <- basename(getwd())
62 |           if (pkg %in% rownames(installed.packages())) {
63 |             remove.packages(pkg)
64 |             cat('removed package ', pkg, '.', sep = "")
65 |           }
66 |         shell: Rscript {0}
67 |       - name: Checkout benchmarking repo
68 |         if: ${{ matrix.config.benchmarking_repo != ''}}
69 |         uses: actions/checkout@v2
70 |         with:
71 |           repository: ${{ matrix.config.benchmarking_repo }}
72 |           ref: ${{ matrix.config.benchmarking_ref }}
73 |           path: ${{ matrix.config.benchmarking_path }}
74 |       - name: Run benchmarks
75 |         run: Rscript -e 'touchstone::run_script("touchstone/script.R")'
76 |       - name: Save PR number
77 |         run: |
78 |           echo ${{ github.event.number }} > ./touchstone/pr-comment/NR
79 |       - uses: actions/upload-artifact@v2
80 |         with:
81 |           name: visual-benchmarks
82 |           path: touchstone/plots/
83 |       - uses: actions/upload-artifact@v1
84 |         with:
85 |           name: results
86 |           path: touchstone/pr-comment
87 |       - uses: actions/download-artifact@v1
88 |         with:
89 |           name: results
90 |       - name: comment PR
91 |         run: cat touchstone/pr-comment/info.txt
92 |       - uses: actions/upload-artifact@v2
93 |         with:
94 |           name: pr
95 |           path: touchstone/pr-comment/
96 | 


--------------------------------------------------------------------------------
/.github/workflows/update-citation-cff.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
 2 | # The action runs when:
 3 | # - A new release is published
 4 | # - The DESCRIPTION or inst/CITATION are modified
 5 | # - Can be run manually
 6 | # For customizing the triggers, visit https://docs.github.com/en/actions/learn-github-actions/events-that-trigger-workflows
 7 | on:
 8 |   release:
 9 |     types: [published]
10 |   push:
11 |     paths:
12 |       - DESCRIPTION
13 |       - inst/CITATION
14 |   workflow_dispatch:
15 | 
16 | name: Update CITATION.cff
17 | 
18 | jobs:
19 |   update-citation-cff:
20 |     runs-on: macos-latest
21 |     env:
22 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
23 |     steps:
24 |       - uses: actions/checkout@v3
25 | 
26 |       - uses: r-lib/actions/setup-r@v2
27 |         with:
28 |           use-public-rspm: true
29 | 
30 |       - uses: r-lib/actions/setup-r-dependencies@v2
31 |         with:
32 |           extra-packages: |
33 |             cffr
34 |             V8
35 | 
36 |       - name: Update CITATION.cff
37 |         run: |
38 | 
39 |           library(cffr)
40 | 
41 |           # Customize with your own code
42 |           # See https://docs.ropensci.org/cffr/articles/cffr.html
43 | 
44 |           # Write your own keys
45 |           mykeys <- list()
46 | 
47 |           # Create your CITATION.cff file
48 |           cff_write(keys = mykeys)
49 | 
50 |         shell: Rscript {0}
51 | 
52 |       - name: Commit results
53 |         run: |
54 |           git config --local user.name "$GITHUB_ACTOR"
55 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
56 |           git add CITATION.cff
57 |           git commit -m 'Update CITATION.cff' || echo "No changes to commit"
58 |           git push origin || echo "No changes to commit"
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .DS_Store
 5 | Working-code/
 6 | Demo/
 7 | simstudy.Rproj
 8 | inst/doc
 9 | cran-comments.md
10 | src/*.o
11 | src/*.so
12 | src/*.dll
13 | src-i386/
14 | src-x64/
15 | doc
16 | Meta
17 | simstudy.code-workspace
18 | .vscode/
19 | docs
20 | /doc/
21 | /Meta/
22 | revdep/
23 | 


--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: with_defaults(object_name_linter("camelCase"), object_usage_linter = NULL)
2 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Type: Package
 2 | Package: simstudy
 3 | Title: Simulation of Study Data
 4 | Version: 0.8.1.9000
 5 | Date: 2024-07-29
 6 | Authors@R: 
 7 |     c(person(given = "Keith",
 8 |              family = "Goldfeld",
 9 |              role = c("aut", "cre"),
10 |              email = "keith.goldfeld@nyulangone.org",
11 |              comment = c(ORCID = "0000-0002-0292-8780")),
12 |       person(given = "Jacob",
13 |              family = "Wujciak-Jens",
14 |              role = "aut",
15 |              email = "jacob@wujciak.de",
16 |              comment = c(ORCID = "0000-0002-7281-3989")))
17 | Description: Simulates data sets in order to explore modeling
18 |     techniques or better understand data generating processes. The user
19 |     specifies a set of relationships between covariates, and generates
20 |     data based on these specifications. The final data sets can represent
21 |     data from randomized control trials, repeated measure (longitudinal)
22 |     designs, and cluster randomized trials. Missingness can be generated
23 |     using various mechanisms (MCAR, MAR, NMAR).
24 | License: GPL-3
25 | URL: https://github.com/kgoldfeld/simstudy,
26 |      https://kgoldfeld.github.io/simstudy/,
27 |      https://kgoldfeld.github.io/simstudy/dev/
28 | BugReports: https://github.com/kgoldfeld/simstudy/issues
29 | Depends:
30 |     R (>= 3.3.0)
31 | Imports:
32 |     data.table,
33 |     glue,
34 |     methods,
35 |     mvnfast,
36 |     Rcpp,
37 |     backports,
38 |     fastglm
39 | Suggests: 
40 |     covr,
41 |     dplyr,
42 |     formatR,
43 |     gee,
44 |     ggplot2,
45 |     grid,
46 |     gridExtra,
47 |     hedgehog,
48 |     knitr,
49 |     magrittr,
50 |     Matrix,
51 |     mgcv,
52 |     ordinal,
53 |     pracma,
54 |     rmarkdown,
55 |     scales,
56 |     splines,
57 |     survival,
58 |     testthat,
59 |     gtsummary,
60 |     broom.helpers,
61 |     survminer,
62 |     katex,
63 |     dirmult,
64 |     rms
65 | LinkingTo: 
66 |     Rcpp, 
67 |     pbv (>= 0.4-22),
68 |     fastglm
69 | VignetteBuilder: 
70 |     knitr
71 | Encoding: UTF-8
72 | RoxygenNote: 7.3.2
73 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(addColumns)
 4 | export(addCompRisk)
 5 | export(addCondition)
 6 | export(addCorData)
 7 | export(addCorFlex)
 8 | export(addCorGen)
 9 | export(addDataDensity)
10 | export(addMarkov)
11 | export(addMultiFac)
12 | export(addPeriods)
13 | export(addSynthetic)
14 | export(betaGetShapes)
15 | export(blockDecayMat)
16 | export(blockExchangeMat)
17 | export(catProbs)
18 | export(defCondition)
19 | export(defData)
20 | export(defDataAdd)
21 | export(defMiss)
22 | export(defRead)
23 | export(defReadAdd)
24 | export(defReadCond)
25 | export(defRepeat)
26 | export(defRepeatAdd)
27 | export(defSurv)
28 | export(delColumns)
29 | export(gammaGetShapeRate)
30 | export(genCatFormula)
31 | export(genCluster)
32 | export(genCorData)
33 | export(genCorFlex)
34 | export(genCorGen)
35 | export(genCorMat)
36 | export(genCorOrdCat)
37 | export(genData)
38 | export(genDataDensity)
39 | export(genDummy)
40 | export(genFactor)
41 | export(genFormula)
42 | export(genMarkov)
43 | export(genMiss)
44 | export(genMixFormula)
45 | export(genMultiFac)
46 | export(genNthEvent)
47 | export(genObs)
48 | export(genOrdCat)
49 | export(genSpline)
50 | export(genSurv)
51 | export(genSynthetic)
52 | export(iccRE)
53 | export(logisticCoefs)
54 | export(mergeData)
55 | export(negbinomGetSizeProb)
56 | export(survGetParams)
57 | export(survParamPlot)
58 | export(trimData)
59 | export(trtAssign)
60 | export(trtObserve)
61 | export(trtStepWedge)
62 | export(updateDef)
63 | export(updateDefAdd)
64 | export(viewBasis)
65 | export(viewSplines)
66 | import(data.table)
67 | import(glue)
68 | importFrom(Rcpp,evalCpp)
69 | importFrom(Rcpp,sourceCpp)
70 | importFrom(fastglm,fastglm)
71 | importFrom(methods,is)
72 | useDynLib(simstudy)
73 | useDynLib(simstudy, .registration = TRUE)
74 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | matMultinom <- function(probmatrix) {
 5 |     .Call(`_simstudy_matMultinom`, probmatrix)
 6 | }
 7 | 
 8 | markovChains <- function(nchains, P, chainLen, state0) {
 9 |     .Call(`_simstudy_markovChains`, nchains, P, chainLen, state0)
10 | }
11 | 
12 | clipVec <- function(id, seq, event) {
13 |     .Call(`_simstudy_clipVec`, id, seq, event)
14 | }
15 | 
16 | chkNonIncreasing <- function(adjmatrix) {
17 |     .Call(`_simstudy_chkNonIncreasing`, adjmatrix)
18 | }
19 | 
20 | checkBoundsBin <- function(p1, p2, d) {
21 |     invisible(.Call(`_simstudy_checkBoundsBin`, p1, p2, d))
22 | }
23 | 
24 | findRhoBin <- function(p1, p2, d) {
25 |     .Call(`_simstudy_findRhoBin`, p1, p2, d)
26 | }
27 | 
28 | getRhoMat <- function(N, P, TCORR) {
29 |     .Call(`_simstudy_getRhoMat`, N, P, TCORR)
30 | }
31 | 
32 | getBeta0 <- function(lvec, popPrev, tolerance) {
33 |     .Call(`_simstudy_getBeta0`, lvec, popPrev, tolerance)
34 | }
35 | 
36 | estAUC <- function(dmatrix, y) {
37 |     .Call(`_simstudy_estAUC`, dmatrix, y)
38 | }
39 | 
40 | getBeta_auc <- function(covmat, coefs, auc, popPrev, tolerance) {
41 |     .Call(`_simstudy_getBeta_auc`, covmat, coefs, auc, popPrev, tolerance)
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/R/glue.R:
--------------------------------------------------------------------------------
  1 | #' Collapse Transformer
  2 | #'
  3 | #' @description Transformer for use with glue(). Collapses content of glue block
  4 | #' ending with regex.
  5 | #' @param regex Regex to mark blocks to collapse.
  6 | #' @param ... Arguments passed by the calling lue function (text, envir)
  7 | #'  and params to pass through to glue_collapse like sep, last.
  8 | #' @return The collapsed text, or identity when no marker found.
  9 | #' @noRd
 10 | collapseTransformer <- function(regex = "[*]$", ...) {
 11 |   function(text, envir) {
 12 |     collapse <- grepl(regex, text)
 13 |     if (collapse) {
 14 |       text <- sub(regex, "", text)
 15 |     }
 16 |     res <- identity_transformer(text, envir)
 17 |     if (collapse) {
 18 |       glue_collapse(res, ...)
 19 |     } else {
 20 |       res
 21 |     }
 22 |   }
 23 | }
 24 | 
 25 | #' Sprintf Transformer
 26 | #'
 27 | #' @description Transformer for use with glue(). Formats numbers
 28 | #' similar to sprintf. Use like: var:02d
 29 | #' @param text Text to format.
 30 | #' @param envir environment
 31 | #' @param envir environment
 32 | #' @return The formatted text.
 33 | #' @noRd
 34 | sprintfTransformer <- function(text, envir) {
 35 |   m <- regexpr(":.+$", text)
 36 |   if (m != -1) {
 37 |     format <- substring(regmatches(text, m), 2)
 38 |     regmatches(text, m) <- ""
 39 |     res <- eval(parse(text = text, keep.source = FALSE), envir)
 40 |     do.call(sprintf, list(glue("%{format}"), res))
 41 |   } else {
 42 |     eval(parse(text = text, keep.source = FALSE), envir)
 43 |   }
 44 | }
 45 | 
 46 | #' Sprintf Collapse Transformer
 47 | #'
 48 | #' @description Transformer for use with glue(). Formats numbers
 49 | #' similar to sprintf. Collapses vectors/lists.
 50 | #' @param ... Arguments passed by the calling lue function (text, envir)
 51 | #' @param sep Characters used to seperate items.
 52 | #' @param last Characters used to eperate last items.
 53 | #' @return The formatted text.
 54 | #' @noRd
 55 | sprintfCTransformer <- function(sep = ", ", last = " and ", ...) {
 56 |   function(text, envir) {
 57 |     m <- regexpr(":.+$", text)
 58 |     if (m != -1) {
 59 |       format <- substring(regmatches(text, m), 2)
 60 |       regmatches(text, m) <- ""
 61 |       expr <- parse(text = text, keep.source = FALSE)
 62 |       var <- all.vars(expr)
 63 |       fmtString <- glue("%{format}")
 64 |       varL <- ifelse(length(var) != 0, length(get(var, envir = envir)), 1)
 65 |       res <- eval(expr, envir)
 66 | 
 67 |       if (varL > 1) {
 68 |         do.call(
 69 |           sprintf,
 70 |           c(
 71 |             glue_collapse(
 72 |               rep(fmtString, varL),
 73 |               sep = sep, last = last
 74 |             ),
 75 |             as.list(res)
 76 |           )
 77 |         )
 78 |       } else {
 79 |         do.call(sprintf, list(fmtString, res))
 80 |       }
 81 |     } else {
 82 |       eval(parse(text = text, keep.source = FALSE), envir)
 83 |     }
 84 |   }
 85 | }
 86 | 
 87 | #' Collapse and glue text
 88 | #'
 89 | #' @inheritParams collapseTransformer
 90 | #' @inheritDotParams glue
 91 | #' @return The collapsed text.
 92 | #' @details Mark blocks to collapse with *
 93 | #' @noRd
 94 | glueCollapse <- function(..., sep = ", ", last = " and ",
 95 |                          .envir = parent.frame()) {
 96 |   glue(...,
 97 |     .transformer = collapseTransformer(sep = sep, last = last),
 98 |     .envir = .envir
 99 |   )
100 | }
101 | 
102 | #' Format numeric vars and glue text
103 | #'
104 | #' @inheritParams sprintfTransformer
105 | #' @inheritDotParams glue
106 | #' @return The formated text.
107 | #' @details var:.2 = %.2f
108 | #' @noRd
109 | glueFmt <- function(..., .envir = parent.frame()) {
110 |   glue(...,
111 |     .transformer = sprintfTransformer,
112 |     .envir = .envir
113 |   )
114 | }
115 | 
116 | #' Format and collapse numeric vars
117 | #'
118 | #' @inheritParams sprintfCTransformer
119 | #' @inheritDotParams glue
120 | #' @return The formated and collapsed text.
121 | #' @details var:.2 = %.2f
122 | #' @noRd
123 | glueFmtC <- function(..., .envir = parent.frame(), sep = ", ", last = " and ") {
124 |   glue(...,
125 |     .transformer = sprintfCTransformer(sep = sep, last = last),
126 |     .envir = .envir
127 |   )
128 | }
129 | 


--------------------------------------------------------------------------------
/R/int_rmult.R:
--------------------------------------------------------------------------------
 1 | #### Multinomial data generationa ####
 2 | 
 3 | # Internal function genExposure - returns categorical data
 4 | #
 5 | # @param p a vector of probabilities
 6 | # @return An integer (group) ranging from 1 to length of the probability vector
 7 | 
 8 | .rmult <- function(p) {
 9 |   nums <- length(p)
10 |   t(stats::rmultinom(n = 1, size = 1, p = p)) %*% c(1:nums)
11 | }
12 | 


--------------------------------------------------------------------------------
/R/simstudy-package.R:
--------------------------------------------------------------------------------
 1 | #' @keywords internal
 2 | #' @import glue
 3 | "_PACKAGE"
 4 | 
 5 | # The following block is used by usethis to automatically manage
 6 | # roxygen namespace tags. Modify with care!
 7 | ## usethis namespace: start
 8 | ## usethis namespace: end
 9 | NULL
10 | 
11 | # nolint start
12 | 
13 | #' Distributions for Data Definitions
14 | #'
15 | #' This help file describes the distributions used for data creation in
16 | #' `simstudy`.
17 | #'
18 | #' @param formula Desired mean as a Number or an R expression for mean as a
19 | #'  String. Variables defined via [defData()] and variables within the
20 | #'  parent environment (prefixed with `..`) can be used within the formula.
21 | #'  Functions from the parent environment can be used without a prefix.
22 | #' @param variance Number. Default is `0`.
23 | #' @param link String identifying the link function to be used. Default is
24 | #' `identity`.
25 | #' @details For details about the statistical distributions please see
26 | #'  [stats::distributions], any non-statistical distributions will be
27 | #'  explained below. Required variables and expected pattern for each
28 | #'  distribution can be found in this table:
29 | #'
30 | #' | **name**        | **formula**            | **format**                               | **variance**     | **link**          |
31 | #' |-----------------|------------------------|------------------------------------------|------------------|-------------------|
32 | #' | beta            | mean                   | String or Number                         | dispersion value | identity or logit |
33 | #' | binary          | probability for 1      | String or Number                         | NA             | identity, log, or logit |
34 | #' | binomial        | probability of success | String or Number                         | number of trials | identity, log, or logit |
35 | #' | categorical     | probabilities          | `p_1;p_2;..;p_n`                         | category labels: `a;b;c` , `50;130;20`| identity or logit |
36 | #' | custom          | name of function       | String                                   | arguments      | identity          |
37 | #' | exponential     | mean (lambda)          | String or Number                         | NA             | identity or log   |
38 | #' | gamma           | mean                   | String or Number                         | dispersion value | identity or log   |
39 | #' | mixture         | formula                | `x_1 `\|` p_1 + x_2 `\|` p_2 ... x_n `\|` p_n` | NA             | NA              |
40 | #' | negBinomial     | mean                   | String or Number                         | dispersion value | identity or log   |
41 | #' | nonrandom       | formula                | String or Number                         | NA             | NA              |
42 | #' | normal          | mean                   | String or Number                         | variance         | NA              |
43 | #' | noZeroPoisson   | mean                   | String or Number                         | NA             | identity or log   |
44 | #' | poisson         | mean                   | String or Number                         | NA             | identity or log   |
45 | #' | trtAssign       | ratio                  | `r_1;r_2;..;r_n`                         | stratification | identity or nonbalanced |
46 | #' | uniform         | range                  | `from;to`                                | NA             | NA              |
47 | #' | uniformInt      | range                  | `from;to`                                | NA             | NA              |
48 | #'
49 | #'
50 | #' @section Mixture: The mixture distribution makes it possible to mix to
51 | #'  previously defined distributions/variables. Each variable that should be
52 | #'  part of the new distribution `x_1,...,X_n` is assigned a probability
53 | #'  `p_1,...,p_n`. For more information see
54 | #'  [rdatagen.net](https://www.rdatagen.net/post/adding-mixture-distributions-to-simstudy/).
55 | #' @examples
56 | #' ext_var <- 2.9
57 | #' def <- defData(varname = "external", formula = "3 + log(..ext_var)", variance = .5)
58 | #' def
59 | #' genData(5, def)
60 | #' @name distributions
61 | #' @aliases normal poisson noZeroPoisson binary binomial uniform
62 | #'  categorical gamma beta negBinomial nonrandom exponential mixture
63 | #' @md
64 | NULL
65 | 
66 | # nolint end
67 | 
68 | #' Deprecated functions in simstudy
69 | #'
70 | #' These functions are provided for compatibility with older versions
71 | #' of simstudy only, and will be defunct in the future.
72 | #'
73 | #' * [genCorOrdCat]: This function is deprecated, and will
74 | #'  be removed in the future. Use [genOrdCat] with `asFactor = FALSE` instead.
75 | #' * [catProbs]: This function is deprecated, and will be removed in the future.
76 | #'   Use [genCatFormula] with the same functionality instead.
77 | #' @md
78 | #' @name simstudy-deprecated
79 | NULL
80 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onLoad <- function(libname, pkgname) {
2 |   backports::import(pkgname)
3 | }
4 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "simstudy"
 3 | output: github_document
 4 | ---
 5 | 
 6 | ```{r, include = FALSE}
 7 | knitr::opts_chunk$set(
 8 |   collapse = TRUE,
 9 |   comment = "#>",
10 |   fig.path = "man/figures/README-",
11 |   out.width = "100%"
12 | )
13 | ```
14 | 
15 | <!-- README.md is generated from README.Rmd. Please edit that file -->
16 | <!-- badges: start -->
17 | [![R build status](https://github.com/kgoldfeld/simstudy/workflows/R-CMD-check/badge.svg?branch=main)](https://github.com/kgoldfeld/simstudy/actions){target="_blank"}
18 | [![CRAN status](https://www.r-pkg.org/badges/version/simstudy)](https://CRAN.R-project.org/package=simstudy){target="_blank"}
19 | [![status](https://joss.theoj.org/papers/10.21105/joss.02763/status.svg)](https://joss.theoj.org/papers/10.21105/joss.02763){target="_blank"}
20 | [![CRAN downloads](https://cranlogs.r-pkg.org/badges/grand-total/simstudy)](https://CRAN.R-project.org/package=simstudy){target="_blank"}
21 | [![codecov](https://app.codecov.io/gh/kgoldfeld/simstudy/branch/main/graph/badge.svg)](https://app.codecov.io/gh/kgoldfeld/simstudy){target="_blank"}
22 | [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html){target="_blank"}
23 | <!-- badges: end -->
24 | 
25 | The `simstudy` package is a collection of functions that allow users to generate simulated data sets in order to explore modeling techniques or better understand data generating processes. The user defines the distributions of individual variables, specifies relationships between covariates and outcomes, and generates data based on these specifications. The final data sets can represent randomized control trials, repeated measure designs, cluster randomized trials, or naturally observed data processes. Other complexities that can be added include survival data, correlated data, factorial study designs, step wedge designs, and missing data processes.
26 | 
27 | Simulation using `simstudy` has two fundamental steps. The user (1) **defines** the data elements of a data set and (2) **generates** the data based on these definitions. Additional functionality exists to simulate observed or randomized **treatment assignment/exposures**, to create **longitudinal/panel** data, to create **multi-level/hierarchical** data, to create datasets with **correlated variables** based on a specified covariance structure, to **merge** datasets, to create data sets with **missing** data, and to create non-linear relationships with underlying **spline** curves.
28 | 
29 | The overarching philosophy of `simstudy` is to create data generating processes that mimic the typical models used to fit those types of data. So, the parameterization of some of the data generating processes may not follow the standard parameterizations for the specific distributions. For example, in `simstudy` *gamma*-distributed data are generated based on the specification of a mean &mu; (or log(&mu;)) and a dispersion $d$, rather than shape &alpha; and rate &beta; parameters that more typically characterize the *gamma* distribution. When we estimate the parameters, we are modeling &mu; (or some function of &mu;), so we should explicitly recover the `simstudy` parameters used to generate the model, thus illuminating the relationship between the underlying data generating processes and the models. For more details on the
30 | package, use cases, examples, and function reference see the [documentation page](https://kgoldfeld.github.io/simstudy/articles/simstudy.html).
31 | 
32 | 
33 | ## Installation
34 | 
35 | You can install the released version of simstudy from [CRAN](https://CRAN.R-project.org){target="_blank"} with:
36 | 
37 | ``` r
38 | install.packages("simstudy")
39 | ```
40 | 
41 | And the development version from [GitHub](https://github.com/){target="_blank"} with:
42 | 
43 | ``` r
44 | # install.packages("devtools")
45 | devtools::install_github("kgoldfeld/simstudy")
46 | ```
47 | ## Example
48 | 
49 | Here is some simple sample code, much more in the vignettes:
50 | 
51 | ```{r, echo = TRUE}
52 | library(simstudy)
53 | set.seed(1965)
54 | 
55 | def <- defData(varname="x", formula = 10, variance = 2, dist = "normal")
56 | def <- defData(def, varname="y", formula = "3 + 0.5 * x", variance = 1, dist = "normal")
57 | dd <- genData(250, def)
58 | 
59 | dd <- trtAssign(dd, nTrt = 4, grpName = "grp", balanced = TRUE)
60 | 
61 | dd
62 | ```
63 | 
64 | ## Contributing & Support
65 | 
66 | If you find a bug or need help, please file an issue with a [reprex](https://www.tidyverse.org/help/){target="_blank"} on [Github](https://github.com/kgoldfeld/simstudy/issues){target="_blank"}. We are happy to accept contributions to simstudy. More information on how to propose changes or fix bugs can be found [here](https://kgoldfeld.github.io/simstudy/CONTRIBUTING.html){target="_blank"}.
67 | 
68 | ## Code of Conduct
69 | 
70 | Please note that the simstudy project is released with a [Contributor Code of Conduct](https://kgoldfeld.github.io/simstudy/CODE_OF_CONDUCT.html){target="_blank"}. By contributing to this project, you agree to abide by its terms.
71 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | simstudy
  2 | ================
  3 | 
  4 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  5 | <!-- badges: start -->
  6 | 
  7 | <a href="https://github.com/kgoldfeld/simstudy/actions"
  8 | target="_blank"><img
  9 | src="https://github.com/kgoldfeld/simstudy/workflows/R-CMD-check/badge.svg?branch=main"
 10 | alt="R build status" /></a>
 11 | <a href="https://CRAN.R-project.org/package=simstudy"
 12 | target="_blank"><img src="https://www.r-pkg.org/badges/version/simstudy"
 13 | alt="CRAN status" /></a>
 14 | <a href="https://joss.theoj.org/papers/10.21105/joss.02763"
 15 | target="_blank"><img
 16 | src="https://joss.theoj.org/papers/10.21105/joss.02763/status.svg"
 17 | alt="status" /></a>
 18 | <a href="https://CRAN.R-project.org/package=simstudy"
 19 | target="_blank"><img
 20 | src="https://cranlogs.r-pkg.org/badges/grand-total/simstudy"
 21 | alt="CRAN downloads" /></a>
 22 | <a href="https://app.codecov.io/gh/kgoldfeld/simstudy"
 23 | target="_blank"><img
 24 | src="https://app.codecov.io/gh/kgoldfeld/simstudy/branch/main/graph/badge.svg"
 25 | alt="codecov" /></a>
 26 | <a href="https://lifecycle.r-lib.org/articles/stages.html"
 27 | target="_blank"><img
 28 | src="https://img.shields.io/badge/lifecycle-stable-brightgreen.svg"
 29 | alt="Lifecycle: stable" /></a> <!-- badges: end -->
 30 | 
 31 | The `simstudy` package is a collection of functions that allow users to
 32 | generate simulated data sets in order to explore modeling techniques or
 33 | better understand data generating processes. The user defines the
 34 | distributions of individual variables, specifies relationships between
 35 | covariates and outcomes, and generates data based on these
 36 | specifications. The final data sets can represent randomized control
 37 | trials, repeated measure designs, cluster randomized trials, or
 38 | naturally observed data processes. Other complexities that can be added
 39 | include survival data, correlated data, factorial study designs, step
 40 | wedge designs, and missing data processes.
 41 | 
 42 | Simulation using `simstudy` has two fundamental steps. The user (1)
 43 | **defines** the data elements of a data set and (2) **generates** the
 44 | data based on these definitions. Additional functionality exists to
 45 | simulate observed or randomized **treatment assignment/exposures**, to
 46 | create **longitudinal/panel** data, to create
 47 | **multi-level/hierarchical** data, to create datasets with **correlated
 48 | variables** based on a specified covariance structure, to **merge**
 49 | datasets, to create data sets with **missing** data, and to create
 50 | non-linear relationships with underlying **spline** curves.
 51 | 
 52 | The overarching philosophy of `simstudy` is to create data generating
 53 | processes that mimic the typical models used to fit those types of data.
 54 | So, the parameterization of some of the data generating processes may
 55 | not follow the standard parameterizations for the specific
 56 | distributions. For example, in `simstudy` *gamma*-distributed data are
 57 | generated based on the specification of a mean μ (or log(μ)) and a
 58 | dispersion $d$, rather than shape α and rate β parameters that more
 59 | typically characterize the *gamma* distribution. When we estimate the
 60 | parameters, we are modeling μ (or some function of μ), so we should
 61 | explicitly recover the `simstudy` parameters used to generate the model,
 62 | thus illuminating the relationship between the underlying data
 63 | generating processes and the models. For more details on the package,
 64 | use cases, examples, and function reference see the [documentation
 65 | page](https://kgoldfeld.github.io/simstudy/articles/simstudy.html).
 66 | 
 67 | ## Installation
 68 | 
 69 | You can install the released version of simstudy from
 70 | <a href="https://CRAN.R-project.org" target="_blank">CRAN</a> with:
 71 | 
 72 | ``` r
 73 | install.packages("simstudy")
 74 | ```
 75 | 
 76 | And the development version from
 77 | <a href="https://github.com/" target="_blank">GitHub</a> with:
 78 | 
 79 | ``` r
 80 | # install.packages("devtools")
 81 | devtools::install_github("kgoldfeld/simstudy")
 82 | ```
 83 | 
 84 | ## Example
 85 | 
 86 | Here is some simple sample code, much more in the vignettes:
 87 | 
 88 | ``` r
 89 | library(simstudy)
 90 | set.seed(1965)
 91 | 
 92 | def <- defData(varname="x", formula = 10, variance = 2, dist = "normal")
 93 | def <- defData(def, varname="y", formula = "3 + 0.5 * x", variance = 1, dist = "normal")
 94 | dd <- genData(250, def)
 95 | 
 96 | dd <- trtAssign(dd, nTrt = 4, grpName = "grp", balanced = TRUE)
 97 | 
 98 | dd
 99 | #> Key: <id>
100 | #>         id         x        y   grp
101 | #>      <int>     <num>    <num> <int>
102 | #>   1:     1 11.191960 8.949389     4
103 | #>   2:     2 10.418375 7.372060     4
104 | #>   3:     3  8.512109 6.925844     3
105 | #>   4:     4 11.361632 9.850340     4
106 | #>   5:     5  9.928811 6.515463     4
107 | #>  ---                               
108 | #> 246:   246  8.220609 7.898416     2
109 | #> 247:   247  8.531483 8.681783     2
110 | #> 248:   248 10.507370 8.552350     3
111 | #> 249:   249  8.621339 6.652300     1
112 | #> 250:   250  9.508164 7.083845     3
113 | ```
114 | 
115 | ## Contributing & Support
116 | 
117 | If you find a bug or need help, please file an issue with a
118 | <a href="https://www.tidyverse.org/help/" target="_blank">reprex</a> on
119 | <a href="https://github.com/kgoldfeld/simstudy/issues"
120 | target="_blank">Github</a>. We are happy to accept contributions to
121 | simstudy. More information on how to propose changes or fix bugs can be
122 | found <a href="https://kgoldfeld.github.io/simstudy/CONTRIBUTING.html"
123 | target="_blank">here</a>.
124 | 
125 | ## Code of Conduct
126 | 
127 | Please note that the simstudy project is released with a
128 | <a href="https://kgoldfeld.github.io/simstudy/CODE_OF_CONDUCT.html"
129 | target="_blank">Contributor Code of Conduct</a>. By contributing to this
130 | project, you agree to abide by its terms.
131 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | destination: docs/
 2 | useurl: https://kgoldfeld.github.io/simstudy
 3 | authors:
 4 |   Keith Goldfeld:
 5 |     href: "https://www.rdatagen.net/"
 6 | development: 
 7 |   mode: auto
 8 | reference:
 9 | - title: Define Data
10 | - contents:
11 |   - distributions
12 |   - has_concept("define_data")
13 | 
14 | - title: Generate Data
15 | - contents:
16 |   - has_concept("generate_data")
17 | 
18 | - title: Missing Data
19 |   desc: Define and generate different kinds of missingness. 
20 | - contents:
21 |   - has_concept("missing")
22 | 
23 | - title: Group Data
24 |   desc: Assign treatment groups and create longitudinal data.
25 | - contents:
26 |   - has_concept("group_data")
27 | 
28 | - title: Correlated Data
29 | - contents:
30 |   - has_concept("correlated")
31 | 
32 | - title: Splines
33 |   desc: Use splines to define and generate data.
34 | - contents:
35 |   - has_concept("splines")
36 | 
37 | - title: Utility
38 | - contents:
39 |   - has_concept("utility")
40 | 
41 | - title: Deprecated & Defunct
42 | - contents: 
43 |   - simstudy-deprecated
44 | 


--------------------------------------------------------------------------------
/bench/define.R:
--------------------------------------------------------------------------------
 1 | def_all_dists <- function() {
 2 |     def <- defData(varname = "age", dist = "uniformInt", formula = "22;75")
 3 |     def <- defData(def, varname = "rating", dist = "uniform", formula = "0;10")
 4 |     def <- defData(def,
 5 |         varname = "female", dist = "binary",
 6 |         formula = "-2 + age * 0.1", link = "logit"
 7 |     )
 8 |     def <- defData(def,
 9 |         varname = "baseDBP", dist = "normal",
10 |         formula = 70, variance = 40
11 |     )
12 |     def <- defData(def,
13 |         varname = "nClasses", dist = "noZeroPoisson", formula = 3
14 |     )
15 |     def <- defData(def,
16 |         varname = "visits", dist = "poisson",
17 |         formula = "1.5 - 0.2 * age + 0.5 * female", link = "log"
18 |     )
19 |     def <- defData(def,
20 |         varname = "Y0", dist = "normal", formula = 10, variance = 1
21 |     )
22 |     def <- defData(def,
23 |         varname = "Y1", dist = "normal", formula = "Y0 + 5 + 5",
24 |         variance = 1
25 |     )
26 |     def <- defData(def,
27 |         varname = "deterministic", dist = "nonrandom", formula = "25 + age"
28 |     )
29 |     def <- defData(def,
30 |         varname = "binom", dist = "binomial", formula = .4,
31 |         variance = 10
32 |     )
33 |     def <- defData(def,
34 |         varname = "cat", dist = "categorical", formula = genCatFormula(n = 5),
35 |         variance = "a;b;c;d;e"
36 |     )
37 |     def <- defData(def,
38 |         varname = "exp", dist = "exponential", formula = "42"
39 |     )
40 | 
41 |     def <- defData(def,
42 |         varname = "gamma", dist = "gamma", formula = "exp/age",
43 |         variance = 1
44 |     )
45 | 
46 |     def <- defData(def,
47 |         varname = "mix", dist = "mixture",
48 |         formula = genMixFormula(
49 |             c("age", "exp", "Y1", "baseDBP"),
50 |             c(0.3, .2, .4, .1)
51 |         )
52 |     )
53 | 
54 |     def <- defData(def,
55 |         varname = "negBin", dist = "negBinomial", formula = "Y0 + 10 + 5",
56 |         variance = 1
57 |     )
58 |     def <- defData(def,
59 |         varname = "beta", dist = "beta", formula = .6,
60 |         variance = .5
61 |     )
62 | 
63 |     def
64 | }
65 | 
66 | def_short <- function() {
67 |     def <- defData(varname = "age", dist = "uniformInt", formula = "22;75")
68 |     def <- defData(def, varname = "rating", dist = "uniform", formula = "0;10")
69 |     def <- defData(def,
70 |         varname = "female", dist = "binary",
71 |         formula = "-2 + age * 0.1", link = "logit"
72 |     )
73 |     def <- defData(def,
74 |         varname = "baseDBP", dist = "normal",
75 |         formula = 70, variance = 40
76 |     )
77 |     def <- defData(def,
78 |         varname = "deterministic", dist = "nonrandom", formula = "25 + age"
79 |     )
80 |     def <- defData(def,
81 |         varname = "cat", dist = "categorical", formula = genCatFormula(n = 5),
82 |         variance = "a;b;c;d;e"
83 |     )
84 | 
85 |     
86 |     def
87 | }
88 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## Submission 20240729
2 | 
3 | This is a submission of version 0.8.1
4 | 
5 | Developed with R Version 4.4.1
6 | 
7 | I did not get any notes from check_win_devel.
8 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite simstudy in publications, please use:")
 2 | 
 3 | bibentry(
 4 |   bibtype    = "Article",
 5 |   title    = "simstudy: Illuminating research methods through data generation",
 6 |   author   = "Keith Goldfeld and Jacob Wujciak-Jens",
 7 |   publisher = "The Open Journal",
 8 |   journal  = "Journal of Open Source Software",
 9 |   year     = 2020,
10 |   volume   = 5,
11 |   number   = 54,
12 |   pages    = 2763,
13 |   url      = "https://doi.org/10.21105/joss.02763",
14 |   doi      = "10.21105/joss.02763",
15 |   textVersion = paste0('Goldfeld K, Wujciak-Jens J (2020).',
16 |                 ' "simstudy: Illuminating research methods through data generation."',
17 |                 ' Journal of Open Source Software, 5(54), 2763. doi:10.21105/joss.02763',
18 |                 ' (URL: https://doi.org/10.21105/joss.02763).')
19 | )
20 | 


--------------------------------------------------------------------------------
/man/addColumns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_data.R
 3 | \name{addColumns}
 4 | \alias{addColumns}
 5 | \title{Add columns to existing data set}
 6 | \usage{
 7 | addColumns(dtDefs, dtOld, envir = parent.frame())
 8 | }
 9 | \arguments{
10 | \item{dtDefs}{Name of definitions for added columns}
11 | 
12 | \item{dtOld}{Name of data table that is to be updated}
13 | 
14 | \item{envir}{Environment the data definitions are evaluated in.
15 | Defaults to \link[base:sys.parent]{base::parent.frame}.}
16 | }
17 | \value{
18 | an updated data.table that contains the added simulated data
19 | }
20 | \description{
21 | Add columns to existing data set
22 | }
23 | \examples{
24 | # New data set
25 | 
26 | def <- defData(varname = "xNr", dist = "nonrandom", formula = 7, id = "idnum")
27 | def <- defData(def, varname = "xUni", dist = "uniform", formula = "10;20")
28 | 
29 | dt <- genData(10, def)
30 | 
31 | # Add columns to dt
32 | 
33 | def2 <- defDataAdd(varname = "y1", formula = 10, variance = 3)
34 | def2 <- defDataAdd(def2, varname = "y2", formula = .5, dist = "binary")
35 | def2
36 | 
37 | dt <- addColumns(def2, dt)
38 | dt
39 | }
40 | \concept{generate_data}
41 | 


--------------------------------------------------------------------------------
/man/addCompRisk.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{addCompRisk}
 4 | \alias{addCompRisk}
 5 | \title{Generating single competing risk survival variable}
 6 | \usage{
 7 | addCompRisk(
 8 |   dtName,
 9 |   events,
10 |   timeName,
11 |   censorName = NULL,
12 |   eventName = "event",
13 |   typeName = "type",
14 |   keepEvents = FALSE,
15 |   idName = "id"
16 | )
17 | }
18 | \arguments{
19 | \item{dtName}{Name of complete data set to be updated}
20 | 
21 | \item{events}{Vector of column names that include
22 | time-to-event outcome measures}
23 | 
24 | \item{timeName}{A string to indicate the name of the combined competing risk
25 | time-to-event outcome that reflects the minimum observed value of all 
26 | time-to-event outcomes.}
27 | 
28 | \item{censorName}{The name of a time-to-event variable that is the censoring
29 | variable. Must be one of the "events" names. Defaults to NULL.}
30 | 
31 | \item{eventName}{The name of the new numeric/integer column representing the
32 | competing event outcomes. If censorName is specified, the integer value for
33 | that event will be 0. Defaults to "event", but will be ignored 
34 | if timeName is NULL.}
35 | 
36 | \item{typeName}{The name of the new character column that will indicate the
37 | event type. The type will be the unique variable names in survDefs. Defaults
38 | to "type", but will be ignored if timeName is NULL.}
39 | 
40 | \item{keepEvents}{Indicator to retain original "events" columns. Defaults
41 | to FALSE.}
42 | 
43 | \item{idName}{Name of id field in existing data set.}
44 | }
45 | \value{
46 | An updated data table
47 | }
48 | \description{
49 | Generating single competing risk survival variable
50 | }
51 | \examples{
52 | d1 <- defData(varname = "x1", formula = .5, dist = "binary")
53 | d1 <- defData(d1, "x2", .5, dist = "binary")
54 | 
55 | dS <- defSurv(varname = "reinc", formula = "-10 - 0.6*x1 + 0.4*x2", shape = 0.3)
56 | dS <- defSurv(dS, "death", "-6.5 + 0.3*x1 - 0.5*x2", shape = 0.5)
57 | dS <- defSurv(dS, "censor", "-7", shape = 0.55)
58 | 
59 | dd <- genData(10, d1)
60 | dd <- genSurv(dd, dS)
61 | 
62 | addCompRisk(dd, c("reinc","death", "censor"), timeName = "time",
63 |    censorName = "censor", keepEvents = FALSE)
64 | 
65 | }
66 | \concept{utility}
67 | 


--------------------------------------------------------------------------------
/man/addCondition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_data.R
 3 | \name{addCondition}
 4 | \alias{addCondition}
 5 | \title{Add a single column to existing data set based on a condition}
 6 | \usage{
 7 | addCondition(condDefs, dtOld, newvar, envir = parent.frame())
 8 | }
 9 | \arguments{
10 | \item{condDefs}{Name of definitions for added column}
11 | 
12 | \item{dtOld}{Name of data table that is to be updated}
13 | 
14 | \item{newvar}{Name of new column to add}
15 | 
16 | \item{envir}{Environment the data definitions are evaluated in.
17 | Defaults to \link[base:sys.parent]{base::parent.frame}.}
18 | }
19 | \value{
20 | An updated data.table that contains the added simulated data
21 | }
22 | \description{
23 | Add a single column to existing data set based on a condition
24 | }
25 | \examples{
26 | 
27 | # New data set
28 | 
29 | def <- defData(varname = "x", dist = "categorical", formula = ".33;.33")
30 | def <- defData(def, varname = "y", dist = "uniform", formula = "-5;5")
31 | 
32 | dt <- genData(1000, def)
33 | 
34 | # Define conditions
35 | 
36 | defC <- defCondition(
37 |   condition = "x == 1", formula = "5 + 2*y-.5*y^2",
38 |   variance = 1, dist = "normal"
39 | )
40 | defC <- defCondition(defC,
41 |   condition = "x == 2",
42 |   formula = "3 - 3*y + y^2", variance = 2, dist = "normal"
43 | )
44 | defC <- defCondition(defC,
45 |   condition = "x == 3",
46 |   formula = "abs(y)", dist = "poisson"
47 | )
48 | 
49 | # Add column
50 | 
51 | dt <- addCondition(defC, dt, "NewVar")
52 | 
53 | # Plot data
54 | 
55 | library(ggplot2)
56 | 
57 | ggplot(data = dt, aes(x = y, y = NewVar, group = x)) +
58 |   geom_point(aes(color = factor(x)))
59 | }
60 | \concept{condition}
61 | \concept{generate_data}
62 | 


--------------------------------------------------------------------------------
/man/addCorData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_correlated_data.R
 3 | \name{addCorData}
 4 | \alias{addCorData}
 5 | \title{Add correlated data to existing data.table}
 6 | \usage{
 7 | addCorData(
 8 |   dtOld,
 9 |   idname,
10 |   mu,
11 |   sigma,
12 |   corMatrix = NULL,
13 |   rho,
14 |   corstr = "ind",
15 |   cnames = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{dtOld}{Data table that is the new columns will be appended to.}
20 | 
21 | \item{idname}{Character name of id field, defaults to "id".}
22 | 
23 | \item{mu}{A vector of means. The length of mu must be nvars.}
24 | 
25 | \item{sigma}{Standard deviation of variables. If standard deviation differs
26 | for each variable, enter as a vector with the same length as the mean vector
27 | mu. If the standard deviation is constant across variables, as single value
28 | can be entered.}
29 | 
30 | \item{corMatrix}{Correlation matrix can be entered directly. It must be
31 | symmetrical and positive semi-definite. It is not a required field; if a
32 | matrix is not provided, then a structure and correlation coefficient rho must
33 | be specified.}
34 | 
35 | \item{rho}{Correlation coefficient, -1 <= rho <= 1. Use if corMatrix is not
36 | provided.}
37 | 
38 | \item{corstr}{Correlation structure of the variance-covariance matrix
39 | defined by sigma and rho. Options include "ind" for an independence
40 | structure, "cs" for a compound symmetry structure, and "ar1" for an
41 | autoregressive structure.}
42 | 
43 | \item{cnames}{Explicit column names. A single string with names separated
44 | by commas. If no string is provided, the default names will be V#, where #
45 | represents the column.}
46 | }
47 | \value{
48 | The original data table with the additional correlated columns
49 | }
50 | \description{
51 | Add correlated data to existing data.table
52 | }
53 | \examples{
54 | def <- defData(varname = "xUni", dist = "uniform", formula = "10;20", id = "myID")
55 | def <- defData(def,
56 |   varname = "xNorm", formula = "xUni * 2", dist = "normal",
57 |   variance = 8
58 | )
59 | 
60 | dt <- genData(250, def)
61 | 
62 | mu <- c(3, 8, 15)
63 | sigma <- c(1, 2, 3)
64 | 
65 | dtAdd <- addCorData(dt, "myID",
66 |   mu = mu, sigma = sigma,
67 |   rho = .7, corstr = "cs"
68 | )
69 | dtAdd
70 | 
71 | round(var(dtAdd[, .(V1, V2, V3)]), 3)
72 | round(cor(dtAdd[, .(V1, V2, V3)]), 2)
73 | 
74 | dtAdd <- addCorData(dt, "myID",
75 |   mu = mu, sigma = sigma,
76 |   rho = .7, corstr = "ar1"
77 | )
78 | round(cor(dtAdd[, .(V1, V2, V3)]), 2)
79 | 
80 | corMat <- matrix(c(1, .2, .8, .2, 1, .6, .8, .6, 1), nrow = 3)
81 | 
82 | dtAdd <- addCorData(dt, "myID",
83 |   mu = mu, sigma = sigma,
84 |   corMatrix = corMat
85 | )
86 | round(cor(dtAdd[, .(V1, V2, V3)]), 2)
87 | }
88 | \concept{correlated}
89 | 


--------------------------------------------------------------------------------
/man/addCorFlex.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_correlated_data.R
 3 | \name{addCorFlex}
 4 | \alias{addCorFlex}
 5 | \title{Create multivariate (correlated) data - for general distributions}
 6 | \usage{
 7 | addCorFlex(
 8 |   dt,
 9 |   defs,
10 |   rho = 0,
11 |   tau = NULL,
12 |   corstr = "cs",
13 |   corMatrix = NULL,
14 |   envir = parent.frame()
15 | )
16 | }
17 | \arguments{
18 | \item{dt}{Data table that will be updated.}
19 | 
20 | \item{defs}{Field definition table created by function \code{defDataAdd}.}
21 | 
22 | \item{rho}{Correlation coefficient, -1 <= rho <= 1. Use if corMatrix is not
23 | provided.}
24 | 
25 | \item{tau}{Correlation based on Kendall's tau. If tau is specified, then it
26 | is used as the correlation even if rho is specified. If tau is NULL, then the
27 | specified value of rho is used, or rho defaults to 0.}
28 | 
29 | \item{corstr}{Correlation structure of the variance-covariance matrix defined
30 | by sigma and rho. Options include "cs" for a compound symmetry structure
31 | and "ar1" for an autoregressive structure. Defaults to "cs".}
32 | 
33 | \item{corMatrix}{Correlation matrix can be entered directly. It must be
34 | symmetrical and positive semi-definite. It is not a required field; if a
35 | matrix is not provided, then a structure and correlation coefficient rho must
36 | be specified.}
37 | 
38 | \item{envir}{Environment the data definitions are evaluated in.
39 | Defaults to \link[base:sys.parent]{base::parent.frame}.}
40 | }
41 | \value{
42 | data.table with added column(s) of correlated data
43 | }
44 | \description{
45 | Create multivariate (correlated) data - for general distributions
46 | }
47 | \examples{
48 | defC <- defData(
49 |   varname = "nInds", formula = 50, dist = "noZeroPoisson",
50 |   id = "idClust"
51 | )
52 | 
53 | dc <- genData(10, defC)
54 | #### Normal only
55 | 
56 | dc <- addCorData(dc,
57 |   mu = c(0, 0, 0, 0), sigma = c(2, 2, 2, 2), rho = .2,
58 |   corstr = "cs", cnames = c("a", "b", "c", "d"),
59 |   idname = "idClust"
60 | )
61 | 
62 | di <- genCluster(dc, "idClust", "nInds", "id")
63 | 
64 | defI <- defDataAdd(
65 |   varname = "A", formula = "-1 + a", variance = 3,
66 |   dist = "normal"
67 | )
68 | defI <- defDataAdd(defI,
69 |   varname = "B", formula = "4.5 + b", variance = .5,
70 |   dist = "normal"
71 | )
72 | defI <- defDataAdd(defI,
73 |   varname = "C", formula = "5*c", variance = 3,
74 |   dist = "normal"
75 | )
76 | defI <- defDataAdd(defI,
77 |   varname = "D", formula = "1.6 + d", variance = 1,
78 |   dist = "normal"
79 | )
80 | 
81 | #### Generate new data
82 | 
83 | di <- addCorFlex(di, defI, rho = 0.4, corstr = "cs")
84 | 
85 | # Check correlations by cluster
86 | 
87 | for (i in 1:nrow(dc)) {
88 |   print(cor(di[idClust == i, list(A, B, C, D)]))
89 | }
90 | 
91 | # Check global correlations - should not be as correlated
92 | cor(di[, list(A, B, C, D)])
93 | }
94 | \concept{correlated}
95 | 


--------------------------------------------------------------------------------
/man/addDataDensity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_data.R
 3 | \name{addDataDensity}
 4 | \alias{addDataDensity}
 5 | \title{Add data from a density defined by a vector of integers}
 6 | \usage{
 7 | addDataDensity(dtOld, dataDist, varname, uselimits = FALSE)
 8 | }
 9 | \arguments{
10 | \item{dtOld}{Name of data table that is to be updated.}
11 | 
12 | \item{dataDist}{Vector that defines the desired density.}
13 | 
14 | \item{varname}{Name of variable name.}
15 | 
16 | \item{uselimits}{Indicator to use minimum and maximum of input data vector as 
17 | limits for sampling. Defaults to FALSE, in which case a smoothed density that
18 | extends beyond the limits is used.}
19 | }
20 | \value{
21 | A data table with the generated data.
22 | }
23 | \description{
24 | Data are generated from an a density defined by a vector of integers.
25 | }
26 | \examples{
27 | def <- defData(varname = "x1", formula = 5, dist = "poisson")
28 | 
29 | data_dist <- data_dist <- c(1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 7, 7, 7, 8, 9, 10, 10)
30 | 
31 | dd <- genData(500, def)
32 | dd <- addDataDensity(dd, data_dist, varname = "x2")
33 | dd <- addDataDensity(dd, data_dist, varname = "x3", uselimits = TRUE)
34 | }
35 | \concept{generate_data}
36 | 


--------------------------------------------------------------------------------
/man/addMarkov.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_data.R
 3 | \name{addMarkov}
 4 | \alias{addMarkov}
 5 | \title{Add Markov chain}
 6 | \usage{
 7 | addMarkov(
 8 |   dd,
 9 |   transMat,
10 |   chainLen,
11 |   wide = FALSE,
12 |   id = "id",
13 |   pername = "period",
14 |   varname = "state",
15 |   widePrefix = "S",
16 |   start0lab = NULL,
17 |   trimvalue = NULL
18 | )
19 | }
20 | \arguments{
21 | \item{dd}{data.table with a unique identifier}
22 | 
23 | \item{transMat}{Square transition matrix where the sum of each row
24 | must equal 1. The dimensions of the matrix equal the number of possible
25 | states.}
26 | 
27 | \item{chainLen}{Length of each chain that will be generated for each
28 | chain; minimum chain length is 2.}
29 | 
30 | \item{wide}{Logical variable (TRUE or FALSE) indicating whether the
31 | resulting data table should be returned in wide or long format. The
32 | wide format includes all elements of a chain on a single row; the long
33 | format includes each element of a chain in its own row. The default is
34 | wide = FALSE, so the long format is returned by default.}
35 | 
36 | \item{id}{Character string that represents name of "id" field.
37 | Defaults to "id".}
38 | 
39 | \item{pername}{Character string that represents the variable name of the
40 | chain sequence in the long format. Defaults "period",}
41 | 
42 | \item{varname}{Character string that represents the variable name of the
43 | state in the long format. Defaults to "state".}
44 | 
45 | \item{widePrefix}{Character string that represents the variable name
46 | prefix for the state fields in the wide format. Defaults to "S".}
47 | 
48 | \item{start0lab}{Character string that represents name of the integer
49 | field containing starting state (State 0) of the chain for each individual.
50 | If it is NULL, starting state defaults to 1. Default is NULL.}
51 | 
52 | \item{trimvalue}{Integer value indicating end state. If trimvalue is not NULL,
53 | all records after the first instance of state = trimvalue will be deleted.}
54 | }
55 | \value{
56 | A data table with n rows if in wide format, or n by chainLen rows
57 | if in long format.
58 | }
59 | \description{
60 | Generate a Markov chain for n individuals or units by
61 | specifying a transition matrix.
62 | }
63 | \examples{
64 | def1 <- defData(varname = "x1", formula = 0, variance = 1)
65 | def1 <- defData(def1, varname = "x2", formula = 0, variance = 1)
66 | def1 <- defData(def1,
67 |   varname = "S0", formula = ".6;.3;.1",
68 |   dist = "categorical"
69 | )
70 | 
71 | dd <- genData(20, def1)
72 | 
73 | # Transition matrix P
74 | 
75 | P <- t(matrix(c(
76 |   0.7, 0.2, 0.1,
77 |   0.5, 0.3, 0.2,
78 |   0.0, 0.7, 0.3
79 | ),
80 | nrow = 3
81 | ))
82 | 
83 | d1 <- addMarkov(dd, P, chainLen = 3)
84 | d2 <- addMarkov(dd, P, chainLen = 5, wide = TRUE)
85 | d3 <- addMarkov(dd, P, chainLen = 5, wide = TRUE, start0lab = "S0")
86 | d4 <- addMarkov(dd, P, chainLen = 5, start0lab = "S0", trimvalue = 3)
87 | }
88 | \concept{generate_data}
89 | 


--------------------------------------------------------------------------------
/man/addMultiFac.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_data.R
 3 | \name{addMultiFac}
 4 | \alias{addMultiFac}
 5 | \title{Add multi-factorial data}
 6 | \usage{
 7 | addMultiFac(dtOld, nFactors, levels = 2, coding = "dummy", colNames = NULL)
 8 | }
 9 | \arguments{
10 | \item{dtOld}{data.table that is to be modified}
11 | 
12 | \item{nFactors}{Number of factors (columns) to generate.}
13 | 
14 | \item{levels}{Vector or scalar. If a vector is specified, it must be
15 | the same length as nFatctors. Each value of the vector represents the
16 | number of levels of each corresponding factor. If a scalar is specified,
17 | each factor will have the same number of levels. The default is 2 levels
18 | for each factor.}
19 | 
20 | \item{coding}{String value to specify if "dummy" or "effect" coding is used.
21 | Defaults to "dummy".}
22 | 
23 | \item{colNames}{A vector of strings, with a length of nFactors. The strings
24 | represent the name for each factor.}
25 | }
26 | \value{
27 | A data.table that contains the added simulated data. Each new column contains
28 | an integer.
29 | }
30 | \description{
31 | Add multi-factorial data
32 | }
33 | \examples{
34 | defD <- defData(varname = "x", formula = 0, variance = 1)
35 | 
36 | DT <- genData(360, defD)
37 | DT <- addMultiFac(DT, nFactors = 3, levels = c(2, 3, 3), colNames = c("A", "B", "C"))
38 | DT
39 | DT[, .N, keyby = .(A, B, C)]
40 | 
41 | DT <- genData(300, defD)
42 | DT <- addMultiFac(DT, nFactors = 3, levels = 2)
43 | DT[, .N, keyby = .(Var1, Var2, Var3)]
44 | }
45 | \concept{generate_data}
46 | 


--------------------------------------------------------------------------------
/man/addPeriods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_data.R
 3 | \name{addPeriods}
 4 | \alias{addPeriods}
 5 | \title{Create longitudinal/panel data}
 6 | \usage{
 7 | addPeriods(
 8 |   dtName,
 9 |   nPeriods = NULL,
10 |   idvars = "id",
11 |   timevars = NULL,
12 |   timevarName = "timevar",
13 |   timeid = "timeID",
14 |   perName = "period",
15 |   periodVec = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{dtName}{Name of existing data table}
20 | 
21 | \item{nPeriods}{Number of time periods for each record}
22 | 
23 | \item{idvars}{Names of index variables (in a string vector) that will be
24 | repeated during each time period}
25 | 
26 | \item{timevars}{Names of time dependent variables. Defaults to NULL.}
27 | 
28 | \item{timevarName}{Name of new time dependent variable}
29 | 
30 | \item{timeid}{Variable name for new index field. Defaults to "timevar"}
31 | 
32 | \item{perName}{Variable name for period field. Defaults to "period"}
33 | 
34 | \item{periodVec}{Vector of period times. Defaults to NULL}
35 | }
36 | \value{
37 | An updated data.table that that has multiple rows
38 | per observation in dtName
39 | }
40 | \description{
41 | Create longitudinal/panel data
42 | }
43 | \details{
44 | It is possible to generate longitudinal data with varying 
45 | numbers of measurement periods as well as varying time intervals between 
46 | each measurement period. This is done by defining specific variables \emph{in} the 
47 | data set that define the number of observations per subject and the average 
48 | interval time between each observation. \bold{\emph{nCount}} defines the number of 
49 | measurements for an individual; \bold{\emph{mInterval}} specifies the average time between 
50 | intervals for a subject; and \bold{\emph{vInterval}} specifies the variance of those 
51 | interval times. If \bold{\emph{mInterval}} is not defined, no intervals are used. If \bold{\emph{vInterval}} is set to 0 or is not defined, the interval for
52 | a subject is determined entirely by the mean interval. If \bold{\emph{vInterval}} is 
53 | greater than 0, time intervals are generated using a gamma distribution 
54 | with specified mean and dispersion. If either \bold{\emph{nPeriods}} or \bold{\emph{timevars}} 
55 | is specified, that will override any \bold{\emph{nCount}}, \bold{\emph{mInterval}}, and 
56 | \bold{\emph{vInterval}} data.
57 | 
58 | \bold{\emph{periodVec}} is used to specify measurement periods that are different
59 | the default counting variables. If \bold{\emph{periodVec}} is not specified, 
60 | the periods default to \emph{0, 1, ... n-1}, with \emph{n} periods. If 
61 | \bold{\emph{periodVec}} is specified as \emph{c(x_1, x_2, ... x_n)}, then
62 | \emph{x_1, x_2, ... x_n} represent the measurement periods.
63 | }
64 | \examples{
65 | tdef <- defData(varname = "T", dist = "binary", formula = 0.5)
66 | tdef <- defData(tdef, varname = "Y0", dist = "normal", formula = 10, variance = 1)
67 | tdef <- defData(tdef, varname = "Y1", dist = "normal", formula = "Y0 + 5 + 5 * T", variance = 1)
68 | tdef <- defData(tdef, varname = "Y2", dist = "normal", formula = "Y0 + 10 + 5 * T", variance = 1)
69 | 
70 | dtTrial <- genData(5, tdef)
71 | dtTrial
72 | 
73 | dtTime <- addPeriods(dtTrial,
74 |   nPeriods = 3, idvars = "id",
75 |   timevars = c("Y0", "Y1", "Y2"), timevarName = "Y"
76 | )
77 | dtTime
78 | 
79 | # Varying # of periods and intervals - need to have variables
80 | # called nCount and mInterval
81 | 
82 | def <- defData(varname = "xbase", dist = "normal", formula = 20, variance = 3)
83 | def <- defData(def, varname = "nCount", dist = "noZeroPoisson", formula = 6)
84 | def <- defData(def, varname = "mInterval", dist = "gamma", formula = 30, variance = .01)
85 | def <- defData(def, varname = "vInterval", dist = "nonrandom", formula = .07)
86 | 
87 | dt <- genData(200, def)
88 | dt[id \%in\% c(8, 121)]
89 | 
90 | dtPeriod <- addPeriods(dt)
91 | dtPeriod[id \%in\% c(8, 121)] # View individuals 8 and 121 only
92 | }
93 | \concept{group_data}
94 | 


--------------------------------------------------------------------------------
/man/addSynthetic.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/add_data.R
 3 | \name{addSynthetic}
 4 | \alias{addSynthetic}
 5 | \title{Add synthetic data to existing data set}
 6 | \usage{
 7 | addSynthetic(dtOld, dtFrom, vars = NULL, id = "id")
 8 | }
 9 | \arguments{
10 | \item{dtOld}{data.table that is to be modified}
11 | 
12 | \item{dtFrom}{Data table that contains the source data}
13 | 
14 | \item{vars}{A vector of string names specifying the fields that will be
15 | sampled. The default is that all variables will be selected.}
16 | 
17 | \item{id}{A string specifying the field that serves as the record id. The
18 | default field is "id".}
19 | }
20 | \value{
21 | A data.table that contains the added synthetic data.
22 | }
23 | \description{
24 | This function generates synthetic data from an existing 
25 | data.table and adds it to another (simstudy) data.table.
26 | }
27 | \details{
28 | Add synthetic data
29 | }
30 | \examples{
31 | ### Create fake "real" data set - this is the source of the synthetic data
32 | 
33 | d <- defData(varname = "a", formula = 3, variance = 1, dist = "normal")
34 | d <- defData(d, varname = "b", formula = 5, dist = "poisson")
35 | d <- defData(d, varname = "c", formula = 0.3, dist = "binary")
36 | d <- defData(d, varname = "d", formula = "a + b + 3*c", variance = 2, dist = "normal")
37 | 
38 | ### Create synthetic data set from "observed" data set A (normally this
39 | ### would be an actual external data set):
40 | 
41 | A <- genData(1000, d)
42 | 
43 | ### Generate new simstudy data set (using 'def')
44 | 
45 | def <- defData(varname = "x", formula = 0, variance = 5)
46 | S <- genData(120, def)
47 | 
48 | ### Create synthetic data from 'A' and add to simulated data in 'S'
49 | 
50 | S <- addSynthetic(dtOld = S, dtFrom = A, vars = c("b", "d"))
51 | }
52 | \concept{generate_data}
53 | 


--------------------------------------------------------------------------------
/man/betaGetShapes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{betaGetShapes}
 4 | \alias{betaGetShapes}
 5 | \title{Convert beta mean and precision parameters to two shape parameters}
 6 | \usage{
 7 | betaGetShapes(mean, precision)
 8 | }
 9 | \arguments{
10 | \item{mean}{The mean of a beta distribution}
11 | 
12 | \item{precision}{The precision parameter (phi) of a beta distribution}
13 | }
14 | \value{
15 | A list that includes the shape parameters of the beta distribution
16 | }
17 | \description{
18 | Convert beta mean and precision parameters to two shape parameters
19 | }
20 | \details{
21 | In simstudy, users specify the beta distribution as a function of
22 | two parameters - a mean and precision, where 0 < mean < 1 and precision > 0.
23 | In this case, the variance of the specified distribution is
24 | (mean)*(1-mean)/(1+precision). The base R function rbeta uses the two shape
25 | parameters to specify the beta distribution. This function converts the mean
26 | and precision into the shape1 and shape2 parameters.
27 | }
28 | \examples{
29 | set.seed(12345)
30 | mean <- 0.3
31 | precision <- 1.6
32 | rs <- betaGetShapes(mean, precision)
33 | c(rs$shape1, rs$shape2)
34 | vec <- rbeta(1000, shape1 = rs$shape1, shape2 = rs$shape2)
35 | (estMoments <- c(mean(vec), var(vec)))
36 | (theoryMoments <- c(mean, mean * (1 - mean) / (1 + precision)))
37 | (theoryMoments <- with(rs, c(
38 |   shape1 / (shape1 + shape2),
39 |   (shape1 * shape2) / ((shape1 + shape2)^2 * (1 + shape1 + shape2))
40 | )))
41 | }
42 | \concept{utility}
43 | 


--------------------------------------------------------------------------------
/man/blockDecayMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_correlated_data.R
 3 | \name{blockDecayMat}
 4 | \alias{blockDecayMat}
 5 | \title{Create a block correlation matrix}
 6 | \usage{
 7 | blockDecayMat(ninds, nperiods, rho_w, r, pattern = "xsection", nclusters = 1)
 8 | }
 9 | \arguments{
10 | \item{ninds}{The number of units (individuals) in each cluster in each period.}
11 | 
12 | \item{nperiods}{The number periods that data are observed.}
13 | 
14 | \item{rho_w}{The within-period/between-individual correlation coefficient between -1 and 1.}
15 | 
16 | \item{r}{The decay parameter if correlation declines over time, and can have values of
17 | "exp" or "prop". See details.}
18 | 
19 | \item{pattern}{A string argument with options "xsection" (default) or "cohort".}
20 | 
21 | \item{nclusters}{An integer that indicates the number of matrices that will be generated.}
22 | }
23 | \value{
24 | A single correlation matrix of size \code{nvars x nvars}, or a list of matrices of potentially
25 | different sizes with length indicated by \code{nclusters}.
26 | 
27 | A single correlation matrix or a list of matrices of potentially
28 | different sizes with length indicated by \code{nclusters}.
29 | }
30 | \description{
31 | The function genBlockMat() generates correlation matrices that 
32 | can accommodate clustered observations over time where the within-cluster 
33 | between-individual correlation in the same time period can be different from the 
34 | within-cluster between-individual correlation across time periods.The matrix
35 | generated here can be used in function addCorGen().
36 | }
37 | \details{
38 | Two general decay correlation structures are currently supported: a *cross-sectional* 
39 | exchangeable structure and a *closed cohort* exchangeable structure. In the *cross-sectional* 
40 | case, individuals or units in each time period are distinct. In the *closed cohort* structure, 
41 | individuals or units are repeated in each time period. The desired structure is specified 
42 | using \code{pattern}, which defaults to "xsection" if not specified. 
43 | 
44 | This function can generate correlation matrices of different sizes, depending on the 
45 | combination of arguments provided. A single matrix will be generated when 
46 | \code{nclusters == 1} (the default), and a list of matrices of matrices will be generated when
47 | \code{nclusters > 1}.
48 | 
49 | If \code{nclusters > 1}, the length of \code{ninds} will depend on if sample sizes will vary by cluster
50 | and/or period. There are three scenarios,  and function evaluates the length of \code{ninds} to 
51 | determine which approach to take:
52 | 
53 | \itemize{
54 | 
55 | \item{if the sample size is the same for all clusters in all periods, \code{ninds} will be
56 | a single value (i.e., length = 1).}
57 | 
58 | \item{if the sample size differs by cluster but is the same for each period within each cluster
59 | each period, then \code{ninds} will have a value for each cluster (i.e., length = \code{nclusters}).} 
60 | 
61 | \item{if the sample size differs across clusters and across periods within clusters, \code{ninds} will have a
62 | value for each cluster-period combination (i.e., length = \code{nclusters x nperiods}).} This option is
63 | only valid when \code{pattern = "xsection"}.
64 | 
65 | }
66 | 
67 | In addition, \code{rho_w} and \code{r} can be specified as a single value (in which case they are consistent
68 | across all clusters) or as a vector of length \code{nclusters}, in which case either one or 
69 | both of these parameters can vary by cluster.
70 | 
71 | See vignettes for more details.
72 | }
73 | \examples{
74 | blockDecayMat(ninds = 4, nperiods = 3, rho_w = .8, r = .9)
75 | blockDecayMat(ninds = 4, nperiods = 3, rho_w = .8, r = .9, pattern = "cohort")
76 | 
77 | blockDecayMat(ninds = 2, nperiods = 3, rho_w = .8, r = .9, pattern = "cohort", nclusters=2)
78 | blockDecayMat(ninds = c(2, 3), nperiods = 3, rho_w = c(.8,0.7), r = c(.9,.8), 
79 |   pattern = "cohort", nclusters=2)
80 | blockDecayMat(ninds = c(2, 3, 4, 4, 2, 1), nperiods = 3, rho_w = .8, r = .9, nclusters=2)
81 | 
82 | }
83 | \references{
84 | Li et al. Mixed-effects models for the design and analysis of stepped wedge 
85 | cluster randomized trials: An overview. Statistical Methods in Medical Research. 
86 | 2021;30(2):612-639. doi:10.1177/0962280220932962
87 | }
88 | \seealso{
89 | \code{\link{blockExchangeMat}} and \code{\link{addCorGen}}
90 | }
91 | \concept{correlated}
92 | 


--------------------------------------------------------------------------------
/man/blockExchangeMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_correlated_data.R
 3 | \name{blockExchangeMat}
 4 | \alias{blockExchangeMat}
 5 | \title{Create a block correlation matrix with exchangeable structure}
 6 | \usage{
 7 | blockExchangeMat(
 8 |   ninds,
 9 |   nperiods,
10 |   rho_w,
11 |   rho_b = 0,
12 |   rho_a = NULL,
13 |   pattern = "xsection",
14 |   nclusters = 1
15 | )
16 | }
17 | \arguments{
18 | \item{ninds}{The number of units (individuals) in each cluster in each period.}
19 | 
20 | \item{nperiods}{The number periods that data are observed.}
21 | 
22 | \item{rho_w}{The within-period/between-individual correlation coefficient between -1 and 1.}
23 | 
24 | \item{rho_b}{The between-period/between-individual correlation coefficient between -1 and 1.}
25 | 
26 | \item{rho_a}{The between-period/within-individual auto-correlation coefficient
27 | between -1 and 1.}
28 | 
29 | \item{pattern}{A string argument with options "xsection" (default) or "cohort".}
30 | 
31 | \item{nclusters}{An integer that indicates the number of matrices that will be generated.}
32 | }
33 | \value{
34 | A single correlation matrix or a list of matrices of potentially
35 | different sizes with length indicated by \code{nclusters}.
36 | }
37 | \description{
38 | The function \code{blockExchangeMat} generates exchangeable correlation matrices that 
39 | can accommodate clustered observations over time where the within-cluster 
40 | between-individual correlation in the same time period can be different from the 
41 | within-cluster between-individual correlation across time periods. The matrix
42 | generated here can be used in function \code{addCorGen}.
43 | }
44 | \details{
45 | Two general exchangeable correlation structures are currently supported: a *cross-sectional* exchangeable
46 | structure and a *closed cohort* exchangeable structure. In the *cross-sectional* case, individuals or units in each time period are distinct.
47 | In the *closed cohort* structure, individuals or units are repeated in each time period. 
48 | The desired structure is specified using \code{pattern}, which defaults to "xsection" if not specified. \code{rho_a} is the within-individual/unit 
49 | exchangeable correlation over time, and can only be used when \code{xsection = FALSE}.
50 | 
51 | This function can generate correlation matrices of different sizes, depending on the combination of arguments provided. 
52 | A single matrix will be generated when \code{nclusters == 1} (the default), and a list of matrices of matrices will be generated when
53 | \code{nclusters > 1}.
54 | 
55 | If \code{nclusters > 1}, the length of \code{ninds} will depend on if sample sizes will vary by cluster
56 | and/or period. There are three scenarios,  and function evaluates the length of \code{ninds} to determine which approach 
57 | to take:
58 | 
59 | \itemize{
60 | 
61 | \item{if the sample size is the same for all clusters in all periods, \code{ninds} will be
62 | a single value (i.e., length = 1).}
63 | 
64 | \item{if the sample size differs by cluster but is the same for each period within each cluster
65 | each period, then \code{ninds} will have a value for each cluster (i.e., length = \code{nclusters}).} 
66 | 
67 | \item{if the sample size differs across clusters and across periods within clusters, \code{ninds} will have a
68 | value for each cluster-period combination (i.e., length = \code{nclusters x nperiods}).} This option is
69 | only valid when \code{pattern = "xsection"}.
70 | 
71 | }
72 | 
73 | In addition, \code{rho_w}, \code{rho_b}, and \code{rho_a} can be specified as a single value (in which case they are consistent
74 | across all clusters) or as a vector of length \code{nclusters}, in which case any or all of these parameters can vary by cluster.
75 | 
76 | See vignettes for more details.
77 | }
78 | \examples{
79 | blockExchangeMat(ninds = 4, nperiods = 3, rho_w = .8)
80 | blockExchangeMat(ninds = 4, nperiods = 3, rho_w = .8, rho_b = 0.5)
81 | blockExchangeMat(ninds = 4, nperiods = 3, rho_w = .8, rho_b = 0.5, rho_a = 0.7, 
82 |     pattern = "cohort")
83 | blockExchangeMat(ninds = 2, nperiods = 3, rho_w = .8, rho_b = 0.5, rho_a = 0.7, 
84 |     nclusters = 3, pattern = "cohort")
85 | blockExchangeMat(ninds = c(2, 3), nperiods = 3, rho_w = .8, rho_b = 0.5, rho_a = 0.7, 
86 |     nclusters = 2, pattern="cohort")
87 | blockExchangeMat(ninds = c(2, 3, 4, 4, 2, 1), nperiods = 3, rho_w = .8, rho_b = 0.5, 
88 |     nclusters = 2)
89 | }
90 | \references{
91 | Li et al. Mixed-effects models for the design and analysis of stepped wedge cluster randomized trials: An overview. 
92 | Statistical Methods in Medical Research. 2021;30(2):612-639. doi:10.1177/0962280220932962
93 | }
94 | \seealso{
95 | \code{\link{blockDecayMat}} and \code{\link{addCorGen}}
96 | }
97 | \concept{correlated}
98 | 


--------------------------------------------------------------------------------
/man/catProbs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{catProbs}
 4 | \alias{catProbs}
 5 | \title{Generate Categorical Formula}
 6 | \usage{
 7 | catProbs(..., n = 0)
 8 | }
 9 | \description{
10 | This function is deprecated, please use \link{genCatFormula} instead.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/defCondition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/define_data.R
 3 | \name{defCondition}
 4 | \alias{defCondition}
 5 | \title{Add single row to definitions table of conditions that will be used to add data to an
 6 | existing definitions table}
 7 | \usage{
 8 | defCondition(
 9 |   dtDefs = NULL,
10 |   condition,
11 |   formula,
12 |   variance = 0,
13 |   dist = "normal",
14 |   link = "identity"
15 | )
16 | }
17 | \arguments{
18 | \item{dtDefs}{Name of definition table to be modified. Null if this is a new definition.}
19 | 
20 | \item{condition}{Formula specifying condition to be checked}
21 | 
22 | \item{formula}{An R expression for mean (string)}
23 | 
24 | \item{variance}{Number}
25 | 
26 | \item{dist}{Distribution. For possibilities, see details}
27 | 
28 | \item{link}{The link function for the mean, see details}
29 | }
30 | \value{
31 | A data.table named dtName that is an updated data definitions table
32 | }
33 | \description{
34 | Add single row to definitions table of conditions that will be used to add data to an
35 | existing definitions table
36 | }
37 | \examples{
38 | # New data set
39 | 
40 | def <- defData(varname = "x", dist = "noZeroPoisson", formula = 5)
41 | def <- defData(def, varname = "y", dist = "normal", formula = 0, variance = 9)
42 | 
43 | dt <- genData(10, def)
44 | 
45 | # Add columns to dt
46 | 
47 | defC <- defCondition(
48 |   condition = "x == 1", formula = "5 + 2*y",
49 |   variance = 1, dist = "normal"
50 | )
51 | 
52 | defC <- defCondition(defC,
53 |   condition = "x <= 5 & x >= 2", formula = "3 - 2*y",
54 |   variance = 1, dist = "normal"
55 | )
56 | 
57 | defC <- defCondition(defC,
58 |   condition = "x >= 6", formula = 1,
59 |   variance = 1, dist = "normal"
60 | )
61 | 
62 | defC
63 | 
64 | # Add conditional column with field name "z"
65 | 
66 | dt <- addCondition(defC, dt, "z")
67 | dt
68 | }
69 | \seealso{
70 | \link{distributions}
71 | }
72 | \concept{condition}
73 | \concept{define_data}
74 | 


--------------------------------------------------------------------------------
/man/defData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/define_data.R
 3 | \name{defData}
 4 | \alias{defData}
 5 | \title{Add single row to definitions table}
 6 | \usage{
 7 | defData(
 8 |   dtDefs = NULL,
 9 |   varname,
10 |   formula,
11 |   variance = 0,
12 |   dist = "normal",
13 |   link = "identity",
14 |   id = "id"
15 | )
16 | }
17 | \arguments{
18 | \item{dtDefs}{Definition data.table to be modified}
19 | 
20 | \item{varname}{Name (string) of new variable}
21 | 
22 | \item{formula}{An R expression for mean (string)}
23 | 
24 | \item{variance}{Number}
25 | 
26 | \item{dist}{Distribution. For possibilities, see details}
27 | 
28 | \item{link}{The link function for the mean, see details}
29 | 
30 | \item{id}{A string indicating the field name for the unique record identifier}
31 | }
32 | \value{
33 | A data.table named dtName that is an updated data definitions table
34 | }
35 | \description{
36 | Add single row to definitions table
37 | }
38 | \details{
39 | The possible data distributions are: normal, binary, binomial, poisson, noZeroPoisson, uniform, categorical, gamma, beta, nonrandom, uniformInt, negBinomial, exponential, mixture, trtAssign, clusterSize, custom.
40 | }
41 | \examples{
42 | extVar <- 2.3
43 | def <- defData(varname = "xNr", dist = "nonrandom", formula = 7, id = "idnum")
44 | def <- defData(def, varname = "xUni", dist = "uniform", formula = "10;20")
45 | def <- defData(def,
46 |   varname = "xNorm", formula = "xNr + xUni * 2", dist = "normal",
47 |   variance = 8
48 | )
49 | def <- defData(def,
50 |   varname = "xPois", dist = "poisson", formula = "xNr - 0.2 * xUni",
51 |   link = "log"
52 | )
53 | def <- defData(def, varname = "xCat", formula = "0.3;0.2;0.5", dist = "categorical")
54 | def <- defData(def,
55 |   varname = "xGamma", dist = "gamma", formula = "5+xCat",
56 |   variance = 1, link = "log"
57 | )
58 | def <- defData(def,
59 |   varname = "xBin", dist = "binary", formula = "-3 + xCat",
60 |   link = "logit"
61 | )
62 | def <- defData(def,
63 |   varname = "external", dist = "nonrandom",
64 |   formula = "xBin * log(..extVar)"
65 | )
66 | def
67 | }
68 | \seealso{
69 | \link{distributions}
70 | }
71 | \concept{define_data}
72 | 


--------------------------------------------------------------------------------
/man/defDataAdd.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/define_data.R
 3 | \name{defDataAdd}
 4 | \alias{defDataAdd}
 5 | \title{Add single row to definitions table that will be used to add data to an
 6 | existing data.table}
 7 | \usage{
 8 | defDataAdd(
 9 |   dtDefs = NULL,
10 |   varname,
11 |   formula,
12 |   variance = 0,
13 |   dist = "normal",
14 |   link = "identity"
15 | )
16 | }
17 | \arguments{
18 | \item{dtDefs}{Name of definition table to be modified. Null if this is a new definition.}
19 | 
20 | \item{varname}{Name (string) of new variable}
21 | 
22 | \item{formula}{An R expression for mean (string)}
23 | 
24 | \item{variance}{Number}
25 | 
26 | \item{dist}{Distribution. For possibilities, see details}
27 | 
28 | \item{link}{The link function for the mean, see details}
29 | }
30 | \value{
31 | A data.table named dtName that is an updated data definitions table
32 | }
33 | \description{
34 | Add single row to definitions table that will be used to add data to an
35 | existing data.table
36 | }
37 | \examples{
38 | # New data set
39 | 
40 | def <- defData(varname = "xNr", dist = "nonrandom", formula = 7, id = "idnum")
41 | def <- defData(def, varname = "xUni", dist = "uniform", formula = "10;20")
42 | 
43 | dt <- genData(10, def)
44 | 
45 | # Add columns to dt
46 | 
47 | def2 <- defDataAdd(varname = "y1", formula = 10, variance = 3)
48 | def2 <- defDataAdd(def2, varname = "y2", formula = .5, dist = "binary")
49 | def2
50 | 
51 | dt <- addColumns(def2, dt)
52 | dt
53 | }
54 | \seealso{
55 | [distributions]
56 | }
57 | \concept{define_data}
58 | 


--------------------------------------------------------------------------------
/man/defMiss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/missing_data.R
 3 | \name{defMiss}
 4 | \alias{defMiss}
 5 | \title{Definitions for missing data}
 6 | \usage{
 7 | defMiss(
 8 |   dtDefs = NULL,
 9 |   varname,
10 |   formula,
11 |   logit.link = FALSE,
12 |   baseline = FALSE,
13 |   monotonic = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{dtDefs}{Definition data.table to be modified}
18 | 
19 | \item{varname}{Name of variable with missingness}
20 | 
21 | \item{formula}{Formula to describe pattern of missingness}
22 | 
23 | \item{logit.link}{Indicator set to TRUE when the probability of missingness
24 | is based on a logit model.}
25 | 
26 | \item{baseline}{Indicator is set to TRUE if the variable is a baseline
27 | measure and should be missing throughout an entire observation period. This
28 | is applicable to repeated measures/longitudinal data.}
29 | 
30 | \item{monotonic}{Indicator set to TRUE if missingness at time t is followed
31 | by missingness at all follow-up times > t.}
32 | }
33 | \value{
34 | A data.table named dtName that is an updated data definitions table
35 | }
36 | \description{
37 | Add single row to definitions table for missing data
38 | }
39 | \examples{
40 | def1 <- defData(varname = "m", dist = "binary", formula = .5)
41 | def1 <- defData(def1, "u", dist = "binary", formula = .5)
42 | def1 <- defData(def1, "x1", dist = "normal", formula = "20*m + 20*u", variance = 2)
43 | def1 <- defData(def1, "x2", dist = "normal", formula = "20*m + 20*u", variance = 2)
44 | def1 <- defData(def1, "x3", dist = "normal", formula = "20*m + 20*u", variance = 2)
45 | 
46 | dtAct <- genData(1000, def1)
47 | 
48 | defM <- defMiss(varname = "x1", formula = .15, logit.link = FALSE)
49 | defM <- defMiss(defM, varname = "x2", formula = ".05 + m * 0.25", logit.link = FALSE)
50 | defM <- defMiss(defM, varname = "x3", formula = ".05 + u * 0.25", logit.link = FALSE)
51 | defM <- defMiss(defM, varname = "u", formula = 1, logit.link = FALSE) # not observed
52 | defM
53 | 
54 | # Generate missing data matrix
55 | 
56 | missMat <- genMiss(dtName = dtAct, missDefs = defM, idvars = "id")
57 | missMat
58 | 
59 | # Generate observed data from actual data and missing data matrix
60 | 
61 | dtObs <- genObs(dtAct, missMat, idvars = "id")
62 | dtObs
63 | }
64 | \seealso{
65 | \code{\link{genMiss}}, \code{\link{genObs}}
66 | }
67 | \concept{missing}
68 | 


--------------------------------------------------------------------------------
/man/defRead.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/define_data.R
 3 | \name{defRead}
 4 | \alias{defRead}
 5 | \title{Read external csv data set definitions}
 6 | \usage{
 7 | defRead(filen, id = "id")
 8 | }
 9 | \arguments{
10 | \item{filen}{String file name, including full path. Must be a csv file.}
11 | 
12 | \item{id}{string that includes name of id field. Defaults to "id"}
13 | }
14 | \value{
15 | A data.table with data set definitions
16 | }
17 | \description{
18 | Read external csv data set definitions
19 | }
20 | \examples{
21 | # Create temporary external "csv" file
22 | 
23 | test1 <- c(
24 |   "varname,formula,variance,dist,link",
25 |   "nr,7, 0,nonrandom,identity",
26 |   "x1,.4, 0,binary,identity",
27 |   "y1,nr + x1 * 2,8,normal,identity",
28 |   "y2,nr - 0.2 * x1,0,poisson, log"
29 | )
30 | 
31 | tfcsv <- tempfile()
32 | writeLines(test1, tfcsv)
33 | 
34 | # Read external csv file stored in file "tfcsv"
35 | 
36 | defs <- defRead(tfcsv, id = "myID")
37 | defs
38 | 
39 | unlink(tfcsv)
40 | 
41 | # Generate data based on external definition
42 | 
43 | genData(5, defs)
44 | }
45 | \seealso{
46 | [distributions]
47 | }
48 | \concept{condition}
49 | \concept{define_data}
50 | 


--------------------------------------------------------------------------------
/man/defReadAdd.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/define_data.R
 3 | \name{defReadAdd}
 4 | \alias{defReadAdd}
 5 | \title{Read external csv data set definitions for adding columns}
 6 | \usage{
 7 | defReadAdd(filen)
 8 | }
 9 | \arguments{
10 | \item{filen}{String file name, including full path. Must be a csv file.}
11 | }
12 | \value{
13 | A data.table with data set definitions
14 | }
15 | \description{
16 | Read external csv data set definitions for adding columns
17 | }
18 | \examples{
19 | # Create temporary external "csv" files
20 | 
21 | test1 <- c(
22 |   "varname,formula,variance,dist,link",
23 |   "nr,7, 0,nonrandom,identity"
24 | )
25 | 
26 | tfcsv1 <- tempfile()
27 | writeLines(test1, tfcsv1)
28 | 
29 | test2 <- c(
30 |   "varname,formula,variance,dist,link",
31 |   "x1,.4, 0,binary,identity",
32 |   "y1,nr + x1 * 2,8,normal,identity",
33 |   "y2,nr - 0.2 * x1,0,poisson, log"
34 | )
35 | 
36 | tfcsv2 <- tempfile()
37 | writeLines(test2, tfcsv2)
38 | 
39 | # Generate data based on external definitions
40 | 
41 | defs <- defRead(tfcsv1)
42 | dt <- genData(5, defs)
43 | dt
44 | 
45 | # Add additional data based on external definitions
46 | 
47 | defs2 <- defReadAdd(tfcsv2)
48 | dt <- addColumns(defs2, dt)
49 | dt
50 | 
51 | unlink(tfcsv1)
52 | unlink(tfcsv2)
53 | }
54 | \seealso{
55 | [distributions]
56 | }
57 | \concept{define_data}
58 | 


--------------------------------------------------------------------------------
/man/defReadCond.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/define_data.R
 3 | \name{defReadCond}
 4 | \alias{defReadCond}
 5 | \title{Read external csv data set definitions for adding columns}
 6 | \usage{
 7 | defReadCond(filen)
 8 | }
 9 | \arguments{
10 | \item{filen}{String file name, including full path. Must be a csv file.}
11 | }
12 | \value{
13 | A data.table with data set definitions
14 | }
15 | \description{
16 | Read external csv data set definitions for adding columns
17 | }
18 | \examples{
19 | # Create temporary external "csv" files
20 | 
21 | test1 <- c(
22 |   "varname,formula,variance,dist,link",
23 |   "x,0.3;0.4;0.3,0,categorical,identity"
24 | )
25 | 
26 | tfcsv1 <- tempfile()
27 | writeLines(test1, tfcsv1)
28 | 
29 | test2 <- c(
30 |   "condition,formula,variance,dist,link",
31 |   "x == 1, 0.4,0,binary,identity",
32 |   "x == 2, 0.6,0,binary,identity",
33 |   "x >= 3, 0.8,0,binary,identity"
34 | )
35 | 
36 | tfcsv2 <- tempfile()
37 | writeLines(test2, tfcsv2)
38 | 
39 | # Generate data based on external definitions
40 | 
41 | defs <- defRead(tfcsv1)
42 | dt <- genData(2000, defs)
43 | dt
44 | 
45 | # Add column based on
46 | 
47 | defsCond <- defReadCond(tfcsv2)
48 | dt <- addCondition(defsCond, dt, "y")
49 | dt
50 | 
51 | dt[, mean(y), keyby = x]
52 | 
53 | unlink(tfcsv1)
54 | unlink(tfcsv2)
55 | }
56 | \seealso{
57 | [distributions]
58 | }
59 | \concept{define_data}
60 | 


--------------------------------------------------------------------------------
/man/defRepeat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/define_data.R
 3 | \name{defRepeat}
 4 | \alias{defRepeat}
 5 | \title{Add multiple (similar) rows to definitions table}
 6 | \usage{
 7 | defRepeat(
 8 |   dtDefs = NULL,
 9 |   nVars,
10 |   prefix,
11 |   formula,
12 |   variance = 0,
13 |   dist = "normal",
14 |   link = "identity",
15 |   id = "id"
16 | )
17 | }
18 | \arguments{
19 | \item{dtDefs}{Definition data.table to be modified}
20 | 
21 | \item{nVars}{Number of new variables to define}
22 | 
23 | \item{prefix}{Prefix (character) for new variables}
24 | 
25 | \item{formula}{An R expression for mean (string)}
26 | 
27 | \item{variance}{Number or formula}
28 | 
29 | \item{dist}{Distribution. For possibilities, see details}
30 | 
31 | \item{link}{The link function for the mean, see details}
32 | 
33 | \item{id}{A string indicating the field name for the unique record identifier}
34 | }
35 | \value{
36 | A data.table named dtName that is an updated data definitions table
37 | }
38 | \description{
39 | Add multiple (similar) rows to definitions table
40 | }
41 | \details{
42 | The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`.
43 | }
44 | \examples{
45 | def <- defRepeat(
46 |   nVars = 4, prefix = "g", formula = "1/3;1/3;1/3",
47 |   variance = 0, dist = "categorical"
48 | )
49 | def <- defData(def, varname = "a", formula = "1;1", dist = "trtAssign")
50 | def <- defRepeat(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal")
51 | def <- defData(def, "y", formula = "0.10", dist = "binary")
52 | 
53 | def
54 | }
55 | \seealso{
56 | [distributions]
57 | }
58 | \concept{define_data}
59 | 


--------------------------------------------------------------------------------
/man/defRepeatAdd.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/define_data.R
 3 | \name{defRepeatAdd}
 4 | \alias{defRepeatAdd}
 5 | \title{Add multiple (similar) rows to definitions table that will be used to add data to an
 6 | existing data.table}
 7 | \usage{
 8 | defRepeatAdd(
 9 |   dtDefs = NULL,
10 |   nVars,
11 |   prefix,
12 |   formula,
13 |   variance = 0,
14 |   dist = "normal",
15 |   link = "identity",
16 |   id = "id"
17 | )
18 | }
19 | \arguments{
20 | \item{dtDefs}{Definition data.table to be modified}
21 | 
22 | \item{nVars}{Number of new variables to define}
23 | 
24 | \item{prefix}{Prefix (character) for new variables}
25 | 
26 | \item{formula}{An R expression for mean (string)}
27 | 
28 | \item{variance}{Number or formula}
29 | 
30 | \item{dist}{Distribution. For possibilities, see details}
31 | 
32 | \item{link}{The link function for the mean, see details}
33 | 
34 | \item{id}{A string indicating the field name for the unique record identifier}
35 | }
36 | \value{
37 | A data.table named dtName that is an updated data definitions table
38 | }
39 | \description{
40 | Add multiple (similar) rows to definitions table that will be used to add data to an
41 | existing data.table
42 | }
43 | \details{
44 | The possible data distributions are: `r paste0(.getDists(),collapse = ", ")`.
45 | }
46 | \examples{
47 | def <- defRepeatAdd(
48 |   nVars = 4, prefix = "g", formula = "1/3;1/3;1/3",
49 |   variance = 0, dist = "categorical"
50 | )
51 | def <- defDataAdd(def, varname = "a", formula = "1;1", dist = "trtAssign")
52 | def <- defRepeatAdd(def, 8, "b", formula = "5 + a", variance = 3, dist = "normal")
53 | def <- defDataAdd(def, "y", formula = "0.10", dist = "binary")
54 | 
55 | def
56 | }
57 | \seealso{
58 | [distributions]
59 | }
60 | \concept{define_data}
61 | 


--------------------------------------------------------------------------------
/man/defSurv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/define_data.R
 3 | \name{defSurv}
 4 | \alias{defSurv}
 5 | \title{Add single row to survival definitions}
 6 | \usage{
 7 | defSurv(
 8 |   dtDefs = NULL,
 9 |   varname,
10 |   formula = 0,
11 |   scale = 1,
12 |   shape = 1,
13 |   transition = 0
14 | )
15 | }
16 | \arguments{
17 | \item{dtDefs}{Definition data.table to be modified}
18 | 
19 | \item{varname}{Variable name}
20 | 
21 | \item{formula}{Covariates predicting survival}
22 | 
23 | \item{scale}{Scale parameter for the Weibull distribution.}
24 | 
25 | \item{shape}{The shape of the Weibull distribution. Shape = 1 for
26 | an exponential distribution}
27 | 
28 | \item{transition}{An integer value indicating the starting point for a new
29 | specification of the hazard function. It will default to 0 (and must be 0)
30 | for the first instance of a "varname".}
31 | }
32 | \value{
33 | A data.table named dtName that is an updated data definitions table
34 | }
35 | \description{
36 | Add single row to survival definitions
37 | }
38 | \examples{
39 | # Baseline data definitions
40 | 
41 | def <- defData(varname = "x1", formula = .5, dist = "binary")
42 | def <- defData(def, varname = "x2", formula = .5, dist = "binary")
43 | def <- defData(def, varname = "grp", formula = .5, dist = "binary")
44 | 
45 | # Survival data definitions
46 | 
47 | sdef <- defSurv(
48 |   varname = "survTime", formula = "1.5*x1",
49 |   scale = "grp*50 + (1-grp)*25", shape = "grp*1 + (1-grp)*1.5"
50 | )
51 | 
52 | sdef <- defSurv(sdef, varname = "censorTime", scale = 80, shape = 1)
53 | 
54 | sdef
55 | 
56 | # Baseline data definitions
57 | 
58 | dtSurv <- genData(300, def)
59 | 
60 | # Add survival times
61 | 
62 | dtSurv <- genSurv(dtSurv, sdef)
63 | 
64 | head(dtSurv)
65 | }
66 | \concept{define_data}
67 | 


--------------------------------------------------------------------------------
/man/delColumns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{delColumns}
 4 | \alias{delColumns}
 5 | \title{Delete columns from existing data set}
 6 | \usage{
 7 | delColumns(dtOld, vars)
 8 | }
 9 | \arguments{
10 | \item{dtOld}{Name of data table that is to be updated.}
11 | 
12 | \item{vars}{Vector of column names (as strings).}
13 | }
14 | \value{
15 | An updated data.table without \code{vars}.
16 | }
17 | \description{
18 | Delete columns from existing data set
19 | }
20 | \examples{
21 | # New data set
22 | 
23 | def <- defData(varname = "x", dist = "noZeroPoisson", formula = 7, id = "idnum")
24 | def <- defData(def, varname = "xUni", dist = "uniformInt", formula = "x-3;x+3")
25 | 
26 | dt <- genData(10, def)
27 | dt
28 | 
29 | # Delete column
30 | 
31 | dt <- delColumns(dt, "x")
32 | dt
33 | }
34 | \concept{utility}
35 | 


--------------------------------------------------------------------------------
/man/distributions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simstudy-package.R
 3 | \name{distributions}
 4 | \alias{distributions}
 5 | \alias{normal}
 6 | \alias{poisson}
 7 | \alias{noZeroPoisson}
 8 | \alias{binary}
 9 | \alias{binomial}
10 | \alias{uniform}
11 | \alias{categorical}
12 | \alias{gamma}
13 | \alias{beta}
14 | \alias{negBinomial}
15 | \alias{nonrandom}
16 | \alias{exponential}
17 | \alias{mixture}
18 | \title{Distributions for Data Definitions}
19 | \arguments{
20 | \item{formula}{Desired mean as a Number or an R expression for mean as a
21 | String. Variables defined via \code{\link[=defData]{defData()}} and variables within the
22 | parent environment (prefixed with \code{..}) can be used within the formula.
23 | Functions from the parent environment can be used without a prefix.}
24 | 
25 | \item{variance}{Number. Default is \code{0}.}
26 | 
27 | \item{link}{String identifying the link function to be used. Default is
28 | \code{identity}.}
29 | }
30 | \description{
31 | This help file describes the distributions used for data creation in
32 | \code{simstudy}.
33 | }
34 | \details{
35 | For details about the statistical distributions please see
36 | \link[stats:Distributions]{stats::distributions}, any non-statistical distributions will be
37 | explained below. Required variables and expected pattern for each
38 | distribution can be found in this table:\tabular{lllll}{
39 |    \strong{name} \tab \strong{formula} \tab \strong{format} \tab \strong{variance} \tab \strong{link} \cr
40 |    beta \tab mean \tab String or Number \tab dispersion value \tab identity or logit \cr
41 |    binary \tab probability for 1 \tab String or Number \tab NA \tab identity, log, or logit \cr
42 |    binomial \tab probability of success \tab String or Number \tab number of trials \tab identity, log, or logit \cr
43 |    categorical \tab probabilities \tab \verb{p_1;p_2;..;p_n} \tab category labels: \verb{a;b;c} , \verb{50;130;20} \tab identity or logit \cr
44 |    custom \tab name of function \tab String \tab arguments \tab identity \cr
45 |    exponential \tab mean (lambda) \tab String or Number \tab NA \tab identity or log \cr
46 |    gamma \tab mean \tab String or Number \tab dispersion value \tab identity or log \cr
47 |    mixture \tab formula \tab \code{x_1 }|\code{p_1 + x_2}|\verb{p_2 ... x_n}|\code{ p_n} \tab NA \tab NA \cr
48 |    negBinomial \tab mean \tab String or Number \tab dispersion value \tab identity or log \cr
49 |    nonrandom \tab formula \tab String or Number \tab NA \tab NA \cr
50 |    normal \tab mean \tab String or Number \tab variance \tab NA \cr
51 |    noZeroPoisson \tab mean \tab String or Number \tab NA \tab identity or log \cr
52 |    poisson \tab mean \tab String or Number \tab NA \tab identity or log \cr
53 |    trtAssign \tab ratio \tab \verb{r_1;r_2;..;r_n} \tab stratification \tab identity or nonbalanced \cr
54 |    uniform \tab range \tab \verb{from;to} \tab NA \tab NA \cr
55 |    uniformInt \tab range \tab \verb{from;to} \tab NA \tab NA \cr
56 | }
57 | }
58 | \section{Mixture}{
59 |  The mixture distribution makes it possible to mix to
60 | previously defined distributions/variables. Each variable that should be
61 | part of the new distribution \verb{x_1,...,X_n} is assigned a probability
62 | \verb{p_1,...,p_n}. For more information see
63 | \href{https://www.rdatagen.net/post/adding-mixture-distributions-to-simstudy/}{rdatagen.net}.
64 | }
65 | 
66 | \examples{
67 | ext_var <- 2.9
68 | def <- defData(varname = "external", formula = "3 + log(..ext_var)", variance = .5)
69 | def
70 | genData(5, def)
71 | }
72 | 


--------------------------------------------------------------------------------
/man/gammaGetShapeRate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{gammaGetShapeRate}
 4 | \alias{gammaGetShapeRate}
 5 | \title{Convert gamma mean and dispersion parameters to shape and rate parameters}
 6 | \usage{
 7 | gammaGetShapeRate(mean, dispersion)
 8 | }
 9 | \arguments{
10 | \item{mean}{The mean of a gamma distribution}
11 | 
12 | \item{dispersion}{The dispersion parameter of a gamma distribution}
13 | }
14 | \value{
15 | A list that includes the shape and rate parameters of the gamma distribution
16 | }
17 | \description{
18 | Convert gamma mean and dispersion parameters to shape and rate parameters
19 | }
20 | \details{
21 | In simstudy, users specify the gamma distribution as a function of two parameters - a mean
22 | and dispersion. In this case, the variance of the specified distribution is (mean^2)*dispersion.
23 | The base R function rgamma uses the shape and rate parameters to specify the gamma distribution.
24 | This function converts the mean and dispersion into the shape and rate.
25 | }
26 | \examples{
27 | set.seed(12345)
28 | mean <- 5
29 | dispersion <- 1.5
30 | rs <- gammaGetShapeRate(mean, dispersion)
31 | c(rs$shape, rs$rate)
32 | vec <- rgamma(1000, shape = rs$shape, rate = rs$rate)
33 | (estMoments <- c(mean(vec), var(vec)))
34 | (theoryMoments <- c(mean, mean^2 * dispersion))
35 | (theoryMoments <- c(rs$shape / rs$rate, rs$shape / rs$rate^2))
36 | }
37 | \concept{utility}
38 | 


--------------------------------------------------------------------------------
/man/genCatFormula.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{genCatFormula}
 4 | \alias{genCatFormula}
 5 | \title{Generate Categorical Formula}
 6 | \usage{
 7 | genCatFormula(..., n = 0)
 8 | }
 9 | \arguments{
10 | \item{...}{one or more numeric values to be concatenated, delimited by ";".}
11 | 
12 | \item{n}{Number of probabilities (categories) to be generated - all with
13 | equal probability.}
14 | }
15 | \value{
16 | string with multinomial probabilities.
17 | }
18 | \description{
19 | Create a semi-colon delimited string of probabilities to be used
20 | to define categorical data.
21 | }
22 | \details{
23 | The function accepts a number of probabilities or a value of n, but
24 | not both.
25 | 
26 | If probabilities are passed, the string that is returned depends on the
27 | nature of those probabilities. If the sum of the probabilities is less than
28 | 1, an additional category is created with the probability 1 - sum(provided
29 | probabilities). If the sum of the probabilities is equal to 1, then the
30 | number of categories is set to the number of probabilities provided. If the
31 | sum of the probabilities exceeds one (and there is more than one
32 | probability), the probabilities are standardized by dividing by the sum of
33 | the probabilities provided.
34 | 
35 | If n is provided, n probabilities are included in the string, each with a probability equal to 1/n.
36 | }
37 | \examples{
38 | genCatFormula(0.25, 0.25, 0.50)
39 | genCatFormula(1 / 3, 1 / 2)
40 | genCatFormula(1, 2, 3)
41 | genCatFormula(n = 5)
42 | }
43 | \concept{utility}
44 | 


--------------------------------------------------------------------------------
/man/genCluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_data.R
 3 | \name{genCluster}
 4 | \alias{genCluster}
 5 | \title{Simulate clustered data}
 6 | \usage{
 7 | genCluster(dtClust, cLevelVar, numIndsVar, level1ID, allLevel2 = TRUE)
 8 | }
 9 | \arguments{
10 | \item{dtClust}{Name of existing data set that contains the level "2" data}
11 | 
12 | \item{cLevelVar}{Variable name (string) of cluster id in dtClust}
13 | 
14 | \item{numIndsVar}{Variable name (string) of number of observations
15 | per cluster in dtClust. Can also be a single integer value that will
16 | be used for all clusters.}
17 | 
18 | \item{level1ID}{Name of id field in new level "1" data set}
19 | 
20 | \item{allLevel2}{Indicator: if set to TRUE (default), the returned data set
21 | includes all of the Level 2 data columns. If FALSE, the returned data set
22 | only includes the Levels 1 and 2 ids.}
23 | }
24 | \value{
25 | A simulated data table with level "1" data
26 | }
27 | \description{
28 | Simulate data set that is one level down in a multilevel data context. The
29 | level "2" data set must contain a field that specifies the number of
30 | individual records in a particular cluster.
31 | }
32 | \examples{
33 | gen.school <- defData(
34 |   varname = "s0", dist = "normal",
35 |   formula = 0, variance = 3, id = "idSchool"
36 | )
37 | gen.school <- defData(gen.school,
38 |   varname = "nClasses",
39 |   dist = "noZeroPoisson", formula = 3
40 | )
41 | 
42 | dtSchool <- genData(3, gen.school) #'
43 | dtSchool
44 | 
45 | dtClass <- genCluster(dtSchool,
46 |   cLevelVar = "idSchool",
47 |   numIndsVar = "nClasses", level1ID = "idClass"
48 | )
49 | dtClass
50 | 
51 | dtClass <- genCluster(dtSchool,
52 |   cLevelVar = "idSchool",
53 |   numIndsVar = 3, level1ID = "idClass"
54 | )
55 | dtClass
56 | }
57 | \concept{group_data}
58 | 


--------------------------------------------------------------------------------
/man/genCorData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_correlated_data.R
 3 | \name{genCorData}
 4 | \alias{genCorData}
 5 | \title{Create correlated data}
 6 | \usage{
 7 | genCorData(
 8 |   n,
 9 |   mu,
10 |   sigma,
11 |   corMatrix = NULL,
12 |   rho,
13 |   corstr = "ind",
14 |   cnames = NULL,
15 |   idname = "id"
16 | )
17 | }
18 | \arguments{
19 | \item{n}{Number of observations}
20 | 
21 | \item{mu}{A vector of means. The length of mu must be nvars.}
22 | 
23 | \item{sigma}{Standard deviation of variables. If standard deviation differs for
24 | each variable, enter as a vector with the same length as the mean vector mu. If
25 | the standard deviation is constant across variables, as single value can be entered.}
26 | 
27 | \item{corMatrix}{Correlation matrix can be entered directly. It must be symmetrical and
28 | positive semi-definite. It is not a required field; if a matrix is not provided, then a
29 | structure and correlation coefficient rho must be specified.}
30 | 
31 | \item{rho}{Correlation coefficient, -1 <= rho <= 1. Use if corMatrix is not provided.}
32 | 
33 | \item{corstr}{Correlation structure of the variance-covariance matrix
34 | defined by sigma and rho. Options include "ind" for an independence
35 | structure, "cs" for a compound symmetry structure, and "ar1" for an
36 | autoregressive structure.}
37 | 
38 | \item{cnames}{Explicit column names. A single string with names separated
39 | by commas. If no string is provided, the default names will be V#, where #
40 | represents the column.}
41 | 
42 | \item{idname}{The name of the index id name. Defaults to "id."}
43 | }
44 | \value{
45 | A data.table with n rows and the k + 1 columns, where k is the number of
46 | means in the vector mu.
47 | }
48 | \description{
49 | Create correlated data
50 | }
51 | \examples{
52 | mu <- c(3, 8, 15)
53 | sigma <- c(1, 2, 3)
54 | 
55 | corMat <- matrix(c(1, .2, .8, .2, 1, .6, .8, .6, 1), nrow = 3)
56 | 
57 | dtcor1 <- genCorData(1000, mu = mu, sigma = sigma, rho = .7, corstr = "cs")
58 | dtcor2 <- genCorData(1000, mu = mu, sigma = sigma, corMatrix = corMat)
59 | 
60 | dtcor1
61 | dtcor2
62 | 
63 | round(var(dtcor1[, .(V1, V2, V3)]), 3)
64 | round(cor(dtcor1[, .(V1, V2, V3)]), 2)
65 | 
66 | round(var(dtcor2[, .(V1, V2, V3)]), 3)
67 | round(cor(dtcor2[, .(V1, V2, V3)]), 2)
68 | }
69 | \concept{correlated}
70 | 


--------------------------------------------------------------------------------
/man/genCorFlex.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_correlated_data.R
 3 | \name{genCorFlex}
 4 | \alias{genCorFlex}
 5 | \title{Create multivariate (correlated) data - for general distributions}
 6 | \usage{
 7 | genCorFlex(n, defs, rho = 0, tau = NULL, corstr = "cs", corMatrix = NULL)
 8 | }
 9 | \arguments{
10 | \item{n}{Number of observations}
11 | 
12 | \item{defs}{Field definition table created by function `defData`. All definitions
13 | must be scalar. Definition specifies distribution, mean, and variance, with all
14 | caveats for each of the distributions. (See defData).}
15 | 
16 | \item{rho}{Correlation coefficient, -1 <= rho <= 1. Use if corMatrix is not provided.}
17 | 
18 | \item{tau}{Correlation based on Kendall's tau. If tau is specified, then it is
19 | used as the correlation even if rho is specified. If tau is NULL, then the specified
20 | value of rho is used, or rho defaults to 0.}
21 | 
22 | \item{corstr}{Correlation structure of the variance-covariance matrix
23 | defined by sigma and rho. Options include "cs" for a compound symmetry structure
24 | and "ar1" for an autoregressive structure. Defaults to "cs".}
25 | 
26 | \item{corMatrix}{Correlation matrix can be entered directly. It must be symmetrical and
27 | positive semi-definite. It is not a required field; if a matrix is not provided, then a
28 | structure and correlation coefficient rho must be specified. This is only used if tau
29 | is not specified.}
30 | }
31 | \value{
32 | data.table with added column(s) of correlated data
33 | }
34 | \description{
35 | Create multivariate (correlated) data - for general distributions
36 | }
37 | \examples{
38 | \dontrun{
39 | def <- defData(varname = "xNorm", formula = 0, variance = 4, dist = "normal")
40 | def <- defData(def, varname = "xGamma1", formula = 15, variance = 2, dist = "gamma")
41 | def <- defData(def, varname = "xBin", formula = 0.5, dist = "binary")
42 | def <- defData(def, varname = "xUnif1", formula = "0;10", dist = "uniform")
43 | def <- defData(def, varname = "xPois", formula = 15, dist = "poisson")
44 | def <- defData(def, varname = "xUnif2", formula = "23;28", dist = "uniform")
45 | def <- defData(def, varname = "xUnif3", formula = "100;150", dist = "uniform")
46 | def <- defData(def, varname = "xGamma2", formula = 150, variance = 0.003, dist = "gamma")
47 | def <- defData(def, varname = "xNegBin", formula = 5, variance = .8, dist = "negBinomial")
48 | 
49 | dt <- genCorFlex(1000, def, tau = 0.3, corstr = "cs")
50 | 
51 | cor(dt[, -"id"])
52 | cor(dt[, -"id"], method = "kendall")
53 | var(dt[, -"id"])
54 | apply(dt[, -"id"], 2, mean)
55 | }
56 | }
57 | \concept{correlated}
58 | 


--------------------------------------------------------------------------------
/man/genCorGen.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_correlated_data.R
 3 | \name{genCorGen}
 4 | \alias{genCorGen}
 5 | \title{Create multivariate (correlated) data - for general distributions}
 6 | \usage{
 7 | genCorGen(
 8 |   n,
 9 |   nvars,
10 |   params1,
11 |   params2 = NULL,
12 |   dist,
13 |   rho,
14 |   corstr,
15 |   corMatrix = NULL,
16 |   wide = FALSE,
17 |   cnames = NULL,
18 |   method = "copula",
19 |   idname = "id"
20 | )
21 | }
22 | \arguments{
23 | \item{n}{Number of observations}
24 | 
25 | \item{nvars}{Number of variables}
26 | 
27 | \item{params1}{A single vector specifying the mean of the distribution. The vector is of
28 | length 1 if the mean is the same across all observations, otherwise the vector is of length
29 | nvars. In the case of the uniform distribution the vector specifies the minimum.}
30 | 
31 | \item{params2}{A single vector specifying a possible second parameter for the distribution.
32 | For the normal distribution, this will be the variance; for the gamma distribution, this
33 | will be the dispersion; and for the uniform distribution, this will be the maximum. The
34 | vector is of length 1 if the mean is the same across all observations, otherwise the vector
35 | is of length nvars.}
36 | 
37 | \item{dist}{A string indicating "binary", "poisson" or "gamma", "normal", or "uniform".}
38 | 
39 | \item{rho}{Correlation coefficient, -1 <= rho <= 1. Use if corMatrix is not provided.}
40 | 
41 | \item{corstr}{Correlation structure of the variance-covariance matrix
42 | defined by sigma and rho. Options include "cs" for a compound symmetry structure
43 | and "ar1" for an autoregressive structure.}
44 | 
45 | \item{corMatrix}{Correlation matrix can be entered directly. It must be symmetrical and
46 | positive semi-definite. It is not a required field; if a matrix is not provided, then a
47 | structure and correlation coefficient rho must be specified.}
48 | 
49 | \item{wide}{The layout of the returned file - if wide = TRUE, all new correlated
50 | variables will be returned in a single record, if wide = FALSE, each new variable
51 | will be its own record (i.e. the data will be in long form). Defaults to FALSE.}
52 | 
53 | \item{cnames}{Explicit column names. A single string with names separated
54 | by commas. If no string is provided, the default names will be V#, where #
55 | represents the column.}
56 | 
57 | \item{method}{Two methods are available to generate correlated data. (1) "copula" uses
58 | the multivariate Gaussian copula method that is applied to all other distributions; this
59 | applies to all available distributions. (2) "ep" uses an algorithm developed by
60 | Emrich and Piedmonte (1991).}
61 | 
62 | \item{idname}{Character value that specifies the name of the id variable.}
63 | }
64 | \value{
65 | data.table with added column(s) of correlated data
66 | }
67 | \description{
68 | Create multivariate (correlated) data - for general distributions
69 | }
70 | \examples{
71 | set.seed(23432)
72 | lambda <- c(8, 10, 12)
73 | 
74 | genCorGen(100, nvars = 3, params1 = lambda, dist = "poisson", rho = .7, corstr = "cs")
75 | genCorGen(100, nvars = 3, params1 = 5, dist = "poisson", rho = .7, corstr = "cs")
76 | genCorGen(100, nvars = 3, params1 = lambda, dist = "poisson", rho = .7, corstr = "cs", wide = TRUE)
77 | genCorGen(100, nvars = 3, params1 = 5, dist = "poisson", rho = .7, corstr = "cs", wide = TRUE)
78 | 
79 | genCorGen(100,
80 |   nvars = 3, params1 = lambda, dist = "poisson", rho = .7, corstr = "cs",
81 |   cnames = "new_var"
82 | )
83 | genCorGen(100,
84 |   nvars = 3, params1 = lambda, dist = "poisson", rho = .7, corstr = "cs",
85 |   wide = TRUE, cnames = "a, b, c"
86 | )
87 | }
88 | \references{
89 | Emrich LJ, Piedmonte MR. A Method for Generating High-Dimensional
90 | Multivariate Binary Variates. The American Statistician 1991;45:302-4.
91 | }
92 | \concept{correlated}
93 | 


--------------------------------------------------------------------------------
/man/genCorMat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_correlated_data.R
 3 | \name{genCorMat}
 4 | \alias{genCorMat}
 5 | \title{Create a correlation matrix}
 6 | \usage{
 7 | genCorMat(nvars, cors = NULL, rho = NULL, corstr = "cs", nclusters = 1)
 8 | }
 9 | \arguments{
10 | \item{nvars}{number of rows and columns (i.e. number of variables) for correlation matrix. It can be
11 | a scalar or vector (see details).}
12 | 
13 | \item{cors}{vector of correlations.}
14 | 
15 | \item{rho}{Correlation coefficient, \code{-1 <= rho <= 1}. Use if corMatrix is not provided. It can
16 | be a scalar or vector (see details).}
17 | 
18 | \item{corstr}{Correlation structure. Options include "cs" for a compound symmetry structure, "ar1" 
19 | for an autoregressive structure of order 1, "arx" for an autoregressive structure 
20 | that has a general decay pattern, and "structured" that imposes a prescribed
21 | pattern between observation based on distance (see details).}
22 | 
23 | \item{nclusters}{An integer that indicates the number of matrices that will be generated.}
24 | }
25 | \value{
26 | A single correlation matrix of size \code{nvars x nvars}, or a list of matrices of potentially
27 | different sizes with length indicated by \code{nclusters}.
28 | }
29 | \description{
30 | Create a correlation matrix
31 | }
32 | \details{
33 | This function can generate correlation matrices randomly or deterministically, 
34 | depending on the combination of arguments provided. A single matrix will be
35 | generated when \code{nclusters == 1} (the default), and a list of matrices of matrices will be generated when
36 | \code{nclusters > 1}.
37 | 
38 | If the vector `cors` is specified with length `nvars - 1` then `corstr` must be "structured". If
39 | `cors` is specified with length `choose(nvars, 2)` then `corstr` should not be specified as
40 | "structured". In this case the `cors` vector should be interpreted as the lower triangle of the correlation
41 | matrix, and is specified by reading down the columns. For example, if \bold{CM} is the correlation matrix and
42 | \code{nvars = 3}, then \code{CM[2,1] = CM[1,2] = cors[1]},  \code{CM[3,1] = CM[1,3] = cors[2]}, 
43 | and \code{CM[3,2] = CM[2,3] = cors[3]}.
44 | 
45 | If the vector \code{cors} and \code{rho} are not specified, random correlation matrices are generated
46 | based on the specified \code{corstr}. If the structure is "arx", then a random vector of 
47 | length \code{nvars - 1} is randomly generated and sorted in descending order; the correlation matrix
48 | will be generated base on this set of structured correlations. If the structure is \emph{not} specified
49 | as "arx" then a random positive definite of dimensions nvars x nvars with no structural 
50 | assumptions is generated.
51 | 
52 | If \code{cors} is not specified but \code{rho} is specified, then a matrix with either a "cs" or "ar1" 
53 | structure is generated.
54 | 
55 | If \code{nclusters > 1}, \code{nvars} can be of length 1 or \code{nclusters}. If it is of length 1,
56 | each cluster will have correlation matrices with the same dimension. Likewise, if \code{nclusters > 1}, 
57 | \code{rho} can be of length 1 or \code{nclusters}. If length of \code{rho} is 1,
58 | each cluster will have correlation matrices with the same autocorrelation.
59 | }
60 | \examples{
61 | genCorMat(nvars = 3, cors = c(.3, -.2, .1))
62 | genCorMat(nvars = 3)
63 | 
64 | genCorMat(nvars = 4, c(.3, -.2, .1, .2, .5, .2))
65 | genCorMat(4)
66 | 
67 | genCorMat(nvars = 4, cors = c(.3, .2, .1), corstr = "structured") 
68 | genCorMat(nvars = 4, corstr = "arx") 
69 | 
70 | genCorMat(nvars = 4, rho = .4, corstr = "cs") 
71 | genCorMat(nvars = 4, rho = .4, corstr = "ar1") 
72 | 
73 | genCorMat(nvars = c(3, 2, 5), rho = c(.4, .8, .7), corstr = "ar1", nclusters = 3) 
74 | 
75 | }
76 | \concept{correlated}
77 | 


--------------------------------------------------------------------------------
/man/genCorOrdCat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_correlated_data.R
 3 | \name{genCorOrdCat}
 4 | \alias{genCorOrdCat}
 5 | \title{Generate correlated ordinal categorical data}
 6 | \usage{
 7 | genCorOrdCat(
 8 |   dtName,
 9 |   idname = "id",
10 |   adjVar = NULL,
11 |   baseprobs,
12 |   prefix = "grp",
13 |   rho,
14 |   corstr,
15 |   corMatrix = NULL
16 | )
17 | }
18 | \description{
19 | This function is deprecated, please use \link{genOrdCat} instead.
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/genData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_data.R
 3 | \name{genData}
 4 | \alias{genData}
 5 | \title{Calling function to simulate data}
 6 | \usage{
 7 | genData(n, dtDefs = NULL, id = "id", envir = parent.frame())
 8 | }
 9 | \arguments{
10 | \item{n}{the number of observations required in the data set.}
11 | 
12 | \item{dtDefs}{name of definitions data.table/data.frame. If no definitions
13 | are provided
14 | a data set with ids only is generated.}
15 | 
16 | \item{id}{The string defining the id of the record. Will override previously
17 | set id name with a warning (unless the old value is 'id'). If the
18 | id attribute in dtDefs is NULL will default to 'id'.}
19 | 
20 | \item{envir}{Environment the data definitions are evaluated in.
21 | Defaults to \link[base:sys.parent]{base::parent.frame}.}
22 | }
23 | \value{
24 | A data.table that contains the simulated data.
25 | }
26 | \description{
27 | Calling function to simulate data
28 | }
29 | \examples{
30 | genData(5)
31 | genData(5, id = "grpID")
32 | 
33 | def <- defData(
34 |   varname = "xNr", dist = "nonrandom", formula = 7,
35 |   id = "idnum"
36 | )
37 | def <- defData(def,
38 |   varname = "xUni", dist = "uniform",
39 |   formula = "10;20"
40 | )
41 | def <- defData(def,
42 |   varname = "xNorm", formula = "xNr + xUni * 2",
43 |   dist = "normal", variance = 8
44 | )
45 | def <- defData(def,
46 |   varname = "xPois", dist = "poisson",
47 |   formula = "xNr - 0.2 * xUni", link = "log"
48 | )
49 | def <- defData(def,
50 |   varname = "xCat", formula = "0.3;0.2;0.5",
51 |   dist = "categorical"
52 | )
53 | def <- defData(def,
54 |   varname = "xGamma", dist = "gamma", formula = "5+xCat",
55 |   variance = 1, link = "log"
56 | )
57 | def <- defData(def,
58 |   varname = "xBin", dist = "binary", formula = "-3 + xCat",
59 |   link = "logit"
60 | )
61 | def
62 | 
63 | genData(5, def)
64 | }
65 | \concept{generate_data}
66 | 


--------------------------------------------------------------------------------
/man/genDataDensity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_data.R
 3 | \name{genDataDensity}
 4 | \alias{genDataDensity}
 5 | \title{Generate data from a density defined by a vector of integers}
 6 | \usage{
 7 | genDataDensity(n, dataDist, varname, uselimits = FALSE, id = "id")
 8 | }
 9 | \arguments{
10 | \item{n}{Number of samples to draw from the density.}
11 | 
12 | \item{dataDist}{Vector that defines the desired density}
13 | 
14 | \item{varname}{Name of variable name}
15 | 
16 | \item{uselimits}{Indicator to use minimum and maximum of input data vector as 
17 | limits for sampling. Defaults to FALSE, in which case a smoothed density that
18 | extends beyond the limits is used.}
19 | 
20 | \item{id}{A string specifying the field that serves as the record id. The
21 | default field is "id".}
22 | }
23 | \value{
24 | A data table with the generated data
25 | }
26 | \description{
27 | Data are generated from an a density defined by a vector of integers
28 | }
29 | \examples{
30 | data_dist <- data_dist <- c(1, 2, 2, 3, 4, 4, 4, 5, 6, 6, 7, 7, 7, 8, 9, 10, 10)
31 | 
32 | genDataDensity(500, data_dist, varname = "x1", id = "id")
33 | genDataDensity(500, data_dist, varname = "x1", uselimits = TRUE, id = "id")
34 | }
35 | \concept{generate_data}
36 | 


--------------------------------------------------------------------------------
/man/genDummy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_data.R
 3 | \name{genDummy}
 4 | \alias{genDummy}
 5 | \title{Create dummy variables from a factor or integer variable}
 6 | \usage{
 7 | genDummy(dtName, varname, sep = ".", replace = FALSE)
 8 | }
 9 | \arguments{
10 | \item{dtName}{Data table with column}
11 | 
12 | \item{varname}{Name of factor}
13 | 
14 | \item{sep}{Character to be used in creating new name for dummy fields.
15 | Valid characters include all letters and "_". Will default to ".". If
16 | an invalid character is provided, it will be replaced by default.}
17 | 
18 | \item{replace}{If replace is set to TRUE (defaults to FALSE) the field
19 | referenced varname will be removed.}
20 | }
21 | \description{
22 | Create dummy variables from a factor or integer variable
23 | }
24 | \examples{
25 | 
26 | # First example:
27 | 
28 | def <- defData(varname = "cat", formula = ".2;.3;.5", dist = "categorical")
29 | def <- defData(def, varname = "x", formula = 5, variance = 2)
30 | 
31 | dx <- genData(200, def)
32 | dx
33 | 
34 | dx <- genFactor(dx, "cat", labels = c("one", "two", "three"), replace = TRUE)
35 | dx <- genDummy(dx, varname = "fcat", sep = "_")
36 | 
37 | dx
38 | 
39 | # Second example:
40 | 
41 | dx <- genData(15)
42 | dx <- trtAssign(dtName = dx, 3, grpName = "arm")
43 | dx <- genDummy(dx, varname = "arm")
44 | dx
45 | }
46 | \concept{generate_data}
47 | 


--------------------------------------------------------------------------------
/man/genFactor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_data.R
 3 | \name{genFactor}
 4 | \alias{genFactor}
 5 | \title{Create factor variable from an existing (non-double) variable}
 6 | \usage{
 7 | genFactor(dtName, varname, labels = NULL, prefix = "f", replace = FALSE)
 8 | }
 9 | \arguments{
10 | \item{dtName}{Data table with columns.}
11 | 
12 | \item{varname}{Name of field(s) to be converted.}
13 | 
14 | \item{labels}{Factor level labels. If not provided, the generated factor
15 | levels will be used as the labels. Can be a vector (if only one new factor or
16 | all factors have the same labels) or a list of character vectors of the same
17 | length as varname.}
18 | 
19 | \item{prefix}{By default, the new field name will be a concatenation of "f"
20 | and the old field name. A prefix string can be provided.}
21 | 
22 | \item{replace}{If replace is set to TRUE (defaults to FALSE) the field
23 | referenced varname will be removed.}
24 | }
25 | \description{
26 | Create factor variable from an existing (non-double) variable
27 | }
28 | \examples{
29 | 
30 | # First example:
31 | 
32 | def <- defData(varname = "cat", formula = ".2;.3;.5", dist = "categorical")
33 | def <- defData(def, varname = "x", formula = 5, variance = 2)
34 | 
35 | dx <- genData(200, def)
36 | dx
37 | 
38 | dx <- genFactor(dx, "cat", labels = c("one", "two", "three"))
39 | dx
40 | 
41 | # Second example:
42 | 
43 | dx <- genData(10)
44 | dx <- trtAssign(dtName = dx, 2, grpName = "studyArm")
45 | dx <- genFactor(dx, varname = "studyArm", labels = c("control", "treatment"), prefix = "t_")
46 | dx
47 | }
48 | \concept{generate_data}
49 | 


--------------------------------------------------------------------------------
/man/genFormula.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_data.R
 3 | \name{genFormula}
 4 | \alias{genFormula}
 5 | \title{Generate a linear formula}
 6 | \usage{
 7 | genFormula(coefs, vars)
 8 | }
 9 | \arguments{
10 | \item{coefs}{A vector that contains the values of the
11 | coefficients. Coefficients can also be defined as character for use with 
12 | double dot notation. If length(coefs) == length(vars), then no intercept
13 | is assumed. Otherwise, an intercept is assumed.}
14 | 
15 | \item{vars}{A vector of strings that specify the names of the
16 | explanatory variables in the equation.}
17 | }
18 | \value{
19 | A string that represents the desired formula
20 | }
21 | \description{
22 | Formulas for additive linear models can be generated
23 | with specified coefficient values and variable names.
24 | }
25 | \examples{
26 | 
27 | genFormula(c(.5, 2, 4), c("A", "B", "C"))
28 | genFormula(c(.5, 2, 4), c("A", "B"))
29 | 
30 | genFormula(c(.5, "..x", 4), c("A", "B", "C"))
31 | genFormula(c(.5, 2, "..z"), c("A", "B"))
32 | 
33 | changeX <- c(7, 10)
34 | genFormula(c(.5, 2, changeX[1]), c("A", "B"))
35 | genFormula(c(.5, 2, changeX[2]), c("A", "B"))
36 | genFormula(c(.5, 2, changeX[2]), c("A", "B", "C"))
37 | 
38 | newForm <- genFormula(c(-2, 1), c("A"))
39 | 
40 | def1 <- defData(varname = "A", formula = 0, variance = 3, dist = "normal")
41 | def1 <- defData(def1, varname = "B", formula = newForm, dist = "binary", link = "logit")
42 | 
43 | set.seed(2001)
44 | dt <- genData(500, def1)
45 | summary(glm(B ~ A, data = dt, family = binomial))
46 | }
47 | \concept{generate_data}
48 | 


--------------------------------------------------------------------------------
/man/genMarkov.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_data.R
 3 | \name{genMarkov}
 4 | \alias{genMarkov}
 5 | \title{Generate Markov chain}
 6 | \usage{
 7 | genMarkov(
 8 |   n,
 9 |   transMat,
10 |   chainLen,
11 |   wide = FALSE,
12 |   id = "id",
13 |   pername = "period",
14 |   varname = "state",
15 |   widePrefix = "S",
16 |   trimvalue = NULL,
17 |   startProb = NULL
18 | )
19 | }
20 | \arguments{
21 | \item{n}{number of individual chains to generate}
22 | 
23 | \item{transMat}{Square transition matrix where the sum of each row
24 | must equal 1. The dimensions of the matrix equal the number of possible
25 | states.}
26 | 
27 | \item{chainLen}{Length of each chain that will be generated for each
28 | chain; minimum chain length is 2.}
29 | 
30 | \item{wide}{Logical variable (TRUE or FALSE) indicating whether the
31 | resulting data table should be returned in wide or long format. The
32 | wide format includes all elements of a chain on a single row; the long
33 | format includes each element of a chain in its own row. The default is
34 | wide = FALSE, so the long format is returned by default.}
35 | 
36 | \item{id}{Character string that represents name of "id" field.
37 | Defaults to "id".}
38 | 
39 | \item{pername}{Character string that represents the variable name of the
40 | chain sequence in the long format. Defaults "period",}
41 | 
42 | \item{varname}{Character string that represents the variable name of the
43 | state in the long format. Defaults to "state".}
44 | 
45 | \item{widePrefix}{Character string that represents the variable name
46 | prefix for the state fields in the wide format. Defaults to "S".}
47 | 
48 | \item{trimvalue}{Integer value indicating end state. If trimvalue is not NULL,
49 | all records after the first instance of state = trimvalue will be deleted.}
50 | 
51 | \item{startProb}{A string that contains the probability distribution of the 
52 | starting state, separated by a ";". Length of start probabilities must match
53 | the number of rows of the transition matrix.}
54 | }
55 | \value{
56 | A data table with n rows if in wide format, or n by chainLen rows
57 | if in long format.
58 | }
59 | \description{
60 | Generate a Markov chain for n individuals or units by
61 | specifying a transition matrix.
62 | }
63 | \examples{
64 | 
65 | # Transition matrix P
66 | 
67 | P <- t(matrix(c(
68 |   0.7, 0.2, 0.1,
69 |   0.5, 0.3, 0.2,
70 |   0.0, 0.1, 0.9
71 | ), nrow = 3, ncol = 3))
72 | 
73 | d1 <- genMarkov(n = 10, transMat = P, chainLen = 5)
74 | d2 <- genMarkov(n = 10, transMat = P, chainLen = 5, wide = TRUE)
75 | d3 <- genMarkov(
76 |   n = 10, transMat = P, chainLen = 5,
77 |   pername = "seq", varname = "health",
78 |   trimvalue = 3
79 | )
80 | }
81 | \concept{generate_data}
82 | 


--------------------------------------------------------------------------------
/man/genMiss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/missing_data.R
 3 | \name{genMiss}
 4 | \alias{genMiss}
 5 | \title{Generate missing data}
 6 | \usage{
 7 | genMiss(
 8 |   dtName,
 9 |   missDefs,
10 |   idvars,
11 |   repeated = FALSE,
12 |   periodvar = "period",
13 |   envir = parent.frame()
14 | )
15 | }
16 | \arguments{
17 | \item{dtName}{Name of complete data set}
18 | 
19 | \item{missDefs}{Definitions of missingness}
20 | 
21 | \item{idvars}{Index variables}
22 | 
23 | \item{repeated}{Indicator for longitudinal data}
24 | 
25 | \item{periodvar}{Name of variable that contains period}
26 | 
27 | \item{envir}{parent.frame() by default, allows functionality with double-dot
28 | notation}
29 | }
30 | \value{
31 | Missing data matrix indexed by idvars (and period if relevant)
32 | }
33 | \description{
34 | Generate missing data
35 | }
36 | \examples{
37 | def1 <- defData(varname = "m", dist = "binary", formula = .5)
38 | def1 <- defData(def1, "u", dist = "binary", formula = .5)
39 | def1 <- defData(def1, "x1", dist = "normal", formula = "20*m + 20*u", variance = 2)
40 | def1 <- defData(def1, "x2", dist = "normal", formula = "20*m + 20*u", variance = 2)
41 | def1 <- defData(def1, "x3", dist = "normal", formula = "20*m + 20*u", variance = 2)
42 | 
43 | dtAct <- genData(1000, def1)
44 | 
45 | defM <- defMiss(varname = "x1", formula = .15, logit.link = FALSE)
46 | defM <- defMiss(defM, varname = "x2", formula = ".05 + m * 0.25", logit.link = FALSE)
47 | defM <- defMiss(defM, varname = "x3", formula = ".05 + u * 0.25", logit.link = FALSE)
48 | defM <- defMiss(defM, varname = "u", formula = 1, logit.link = FALSE) # not observed
49 | defM
50 | 
51 | # Generate missing data matrix
52 | 
53 | missMat <- genMiss(dtAct, defM, idvars = "id")
54 | missMat
55 | 
56 | # Generate observed data from actual data and missing data matrix
57 | 
58 | dtObs <- genObs(dtAct, missMat, idvars = "id")
59 | dtObs
60 | }
61 | \seealso{
62 | \code{\link{defMiss}}, \code{\link{genObs}}
63 | }
64 | \concept{missing}
65 | 


--------------------------------------------------------------------------------
/man/genMixFormula.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{genMixFormula}
 4 | \alias{genMixFormula}
 5 | \title{Generate Mixture Formula}
 6 | \usage{
 7 | genMixFormula(vars, probs = NULL, varLength = NULL)
 8 | }
 9 | \arguments{
10 | \item{vars}{Character vector/list of variable names.}
11 | 
12 | \item{probs}{Numeric vector/list of probabilities. Has to be same length as
13 | vars or NULL. Probabilities will be normalized if the sum to > 1.}
14 | 
15 | \item{varLength}{If \code{vars} is of length one and varLength is set to any
16 | integer > 0, \code{vars} will be interpreted as array of length \code{varLength} and
17 | all elements will used in sequence.}
18 | }
19 | \value{
20 | The mixture formula as a string.
21 | }
22 | \description{
23 | Generates a mixture formula from a vector of variable names and
24 | an optional vector of probabilities.
25 | }
26 | \examples{
27 | genMixFormula(c("a", "..b[..i]", "c"))
28 | genMixFormula(c("a", "..b", "c"), c(.2, .5, .3))
29 | 
30 | # Shorthand to use external vectors/lists
31 | genMixFormula("..arr", varLength = 5)
32 | }
33 | \concept{utility}
34 | 


--------------------------------------------------------------------------------
/man/genMultiFac.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_data.R
 3 | \name{genMultiFac}
 4 | \alias{genMultiFac}
 5 | \title{Generate multi-factorial data}
 6 | \usage{
 7 | genMultiFac(
 8 |   nFactors,
 9 |   each,
10 |   levels = 2,
11 |   coding = "dummy",
12 |   colNames = NULL,
13 |   idName = "id"
14 | )
15 | }
16 | \arguments{
17 | \item{nFactors}{Number of factors (columns) to generate.}
18 | 
19 | \item{each}{Number of replications for each combination of factors. Must be specified.}
20 | 
21 | \item{levels}{Vector or scalar. If a vector is specified, it must be
22 | the same length as nFatctors. Each value of the vector represents the
23 | number of levels of each corresponding factor. If a scalar is specified,
24 | each factor will have the same number of levels. The default is 2 levels
25 | for each factor.}
26 | 
27 | \item{coding}{String value to specify if "dummy" or "effect" coding is used.
28 | Defaults to "dummy".}
29 | 
30 | \item{colNames}{A vector of strings, with a length of nFactors. The strings
31 | represent the name for each factor.}
32 | 
33 | \item{idName}{A string that specifies the id of the record. Defaults to "id".}
34 | }
35 | \value{
36 | A data.table that contains the added simulated data. Each column contains
37 | an integer.
38 | }
39 | \description{
40 | Generate multi-factorial data
41 | }
42 | \examples{
43 | genMultiFac(nFactors = 2, each = 5)
44 | genMultiFac(nFactors = 2, each = 4, levels = c(2, 3))
45 | genMultiFac(
46 |   nFactors = 3, each = 1, coding = "effect",
47 |   colNames = c("Fac1", "Fac2", "Fac3"), id = "block"
48 | )
49 | }
50 | \concept{generate_data}
51 | 


--------------------------------------------------------------------------------
/man/genNthEvent.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_data.R
 3 | \name{genNthEvent}
 4 | \alias{genNthEvent}
 5 | \title{Generate event data using longitudinal data, and restrict output to time
 6 | until the nth event.}
 7 | \usage{
 8 | genNthEvent(dtName, defEvent, nEvents = 1, perName = "period", id = "id")
 9 | }
10 | \arguments{
11 | \item{dtName}{name of existing data table}
12 | 
13 | \item{defEvent}{data definition table (created with defDataAdd) that
14 | determines the event generating process.}
15 | 
16 | \item{nEvents}{maximum number of events that will be generated (the nth
17 | event).}
18 | 
19 | \item{perName}{variable name for period field. Defaults to "period"}
20 | 
21 | \item{id}{string representing name of the id
22 | field in table specified by dtName}
23 | }
24 | \value{
25 | data.table that stops after "nEvents" are reached.
26 | }
27 | \description{
28 | Generate event data using longitudinal data, and restrict output to time
29 | until the nth event.
30 | }
31 | \examples{
32 | defD <- defData(
33 |   varname = "effect", formula = 0, variance = 1,
34 |   dist = "normal"
35 | )
36 | defE <- defDataAdd(
37 |   varname = "died", formula = "-2.5 + 0.3*period + effect",
38 |   dist = "binary", link = "logit"
39 | )
40 | 
41 | d <- genData(1000, defD)
42 | d <- addPeriods(d, 10)
43 | dx <- genNthEvent(d, defEvent = defE, nEvents = 3)
44 | }
45 | \concept{group_data}
46 | 


--------------------------------------------------------------------------------
/man/genObs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/missing_data.R
 3 | \name{genObs}
 4 | \alias{genObs}
 5 | \title{Create an observed data set that includes missing data}
 6 | \usage{
 7 | genObs(dtName, dtMiss, idvars)
 8 | }
 9 | \arguments{
10 | \item{dtName}{Name of complete data set}
11 | 
12 | \item{dtMiss}{Name of missing data matrix}
13 | 
14 | \item{idvars}{Index variables that cannot be missing}
15 | }
16 | \value{
17 | A data table that represents observed data, including
18 | missing data
19 | }
20 | \description{
21 | Create an observed data set that includes missing data
22 | }
23 | \examples{
24 | def1 <- defData(varname = "m", dist = "binary", formula = .5)
25 | def1 <- defData(def1, "u", dist = "binary", formula = .5)
26 | def1 <- defData(def1, "x1", dist = "normal", formula = "20*m + 20*u", variance = 2)
27 | def1 <- defData(def1, "x2", dist = "normal", formula = "20*m + 20*u", variance = 2)
28 | def1 <- defData(def1, "x3", dist = "normal", formula = "20*m + 20*u", variance = 2)
29 | 
30 | dtAct <- genData(1000, def1)
31 | 
32 | defM <- defMiss(varname = "x1", formula = .15, logit.link = FALSE)
33 | defM <- defMiss(defM, varname = "x2", formula = ".05 + m * 0.25", logit.link = FALSE)
34 | defM <- defMiss(defM, varname = "x3", formula = ".05 + u * 0.25", logit.link = FALSE)
35 | defM <- defMiss(defM, varname = "u", formula = 1, logit.link = FALSE) # not observed
36 | defM
37 | 
38 | # Generate missing data matrix
39 | 
40 | missMat <- genMiss(dtAct, defM, idvars = "id")
41 | missMat
42 | 
43 | # Generate observed data from actual data and missing data matrix
44 | 
45 | dtObs <- genObs(dtAct, missMat, idvars = "id")
46 | dtObs
47 | }
48 | \seealso{
49 | \code{\link{defMiss}}, \code{\link{genMiss}}
50 | }
51 | \concept{missing}
52 | 


--------------------------------------------------------------------------------
/man/genOrdCat.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/generate_data.R
  3 | \name{genOrdCat}
  4 | \alias{genOrdCat}
  5 | \title{Generate ordinal categorical data}
  6 | \usage{
  7 | genOrdCat(
  8 |   dtName,
  9 |   adjVar = NULL,
 10 |   baseprobs,
 11 |   catVar = "cat",
 12 |   asFactor = TRUE,
 13 |   idname = "id",
 14 |   prefix = "grp",
 15 |   rho = 0,
 16 |   corstr = "ind",
 17 |   corMatrix = NULL,
 18 |   npVar = NULL,
 19 |   npAdj = NULL
 20 | )
 21 | }
 22 | \arguments{
 23 | \item{dtName}{Name of complete data set}
 24 | 
 25 | \item{adjVar}{Adjustment variable  name in dtName - determines
 26 | logistic shift. This is specified assuming a cumulative logit
 27 | link.}
 28 | 
 29 | \item{baseprobs}{Baseline probability expressed as a vector or matrix of
 30 | probabilities. The values (per row) must sum to <= 1. If \code{rowSums(baseprobs) < 1}, an additional category is added with probability \code{1 - rowSums(baseprobs)}. The number of rows represents the number of new
 31 | categorical variables. The number of columns represents the number of
 32 | possible responses - if an particular category has fewer possible responses,
 33 | assign zero probability to non-relevant columns.}
 34 | 
 35 | \item{catVar}{Name of the new categorical field. Defaults to "cat". Can be a
 36 | character vector with a name for each new variable defined via \code{baseprobs}.
 37 | Will be overridden by \code{prefix} if more than one variable is defined and
 38 | \code{length(catVar) == 1}.}
 39 | 
 40 | \item{asFactor}{If \code{asFactor == TRUE} (default), new field is returned
 41 | as a factor. If \code{asFactor == FALSE}, new field is returned as an integer.}
 42 | 
 43 | \item{idname}{Name of the id column in \code{dtName}.}
 44 | 
 45 | \item{prefix}{A string. The names of the new variables will be a
 46 | concatenation of the prefix and a sequence of integers indicating the
 47 | variable number.}
 48 | 
 49 | \item{rho}{Correlation coefficient, -1 < rho < 1. Use if corMatrix is not
 50 | provided.}
 51 | 
 52 | \item{corstr}{Correlation structure of the variance-covariance matrix defined
 53 | by sigma and rho. Options include "ind" for an independence structure, "cs"
 54 | for a compound symmetry structure, and "ar1" for an autoregressive structure.}
 55 | 
 56 | \item{corMatrix}{Correlation matrix can be entered directly. It must be
 57 | symmetrical and positive definite. It is not a required field; if a matrix is
 58 | not provided, then a structure and correlation coefficient rho must be
 59 | specified. (The matrix created via \code{rho} and \code{corstr} must also be positive
 60 | definite.)}
 61 | 
 62 | \item{npVar}{Vector of variable names that indicate which variables are to
 63 | violate the proportionality assumption.}
 64 | 
 65 | \item{npAdj}{Matrix with a row for each npVar and a column for each category.
 66 | Each value represents the deviation from the proportional odds assumption on
 67 | the logistic scale.}
 68 | }
 69 | \value{
 70 | Original data.table with added categorical field.
 71 | }
 72 | \description{
 73 | Ordinal categorical data is added to an existing data set.
 74 | Correlations can be added via correlation matrix or \code{rho} and \code{corstr}.
 75 | }
 76 | \examples{
 77 | # Ordinal Categorical Data ----
 78 | 
 79 | def1 <- defData(
 80 |   varname = "male",
 81 |   formula = 0.45, dist = "binary", id = "idG"
 82 | )
 83 | def1 <- defData(def1,
 84 |   varname = "z",
 85 |   formula = "1.2*male", dist = "nonrandom"
 86 | )
 87 | def1
 88 | 
 89 | ## Generate data
 90 | 
 91 | set.seed(20)
 92 | 
 93 | dx <- genData(1000, def1)
 94 | 
 95 | probs <- c(0.40, 0.25, 0.15)
 96 | 
 97 | dx <- genOrdCat(dx,
 98 |   adjVar = "z", idname = "idG", baseprobs = probs,
 99 |   catVar = "grp"
100 | )
101 | dx
102 | 
103 | # Correlated Ordinal Categorical Data ----
104 | 
105 | baseprobs <- matrix(c(
106 |   0.2, 0.1, 0.1, 0.6,
107 |   0.7, 0.2, 0.1, 0,
108 |   0.5, 0.2, 0.3, 0,
109 |   0.4, 0.2, 0.4, 0,
110 |   0.6, 0.2, 0.2, 0
111 | ),
112 | nrow = 5, byrow = TRUE
113 | )
114 | 
115 | set.seed(333)
116 | dT <- genData(1000)
117 | 
118 | dX <- genOrdCat(dT,
119 |   adjVar = NULL, baseprobs = baseprobs,
120 |   prefix = "q", rho = .125, corstr = "cs", asFactor = FALSE
121 | )
122 | dX
123 | 
124 | dM <- data.table::melt(dX, id.vars = "id")
125 | dProp <- dM[, prop.table(table(value)), by = variable]
126 | dProp[, response := c(1:4, 1:3, 1:3, 1:3, 1:3)]
127 | 
128 | data.table::dcast(dProp, variable ~ response,
129 |   value.var = "V1", fill = 0
130 | )
131 | 
132 | # proportional odds assumption violated
133 | 
134 | d1 <- defData(varname = "rx", formula = "1;1", dist = "trtAssign")
135 | d1 <- defData(d1, varname = "z", formula = "0 - 1.2*rx", dist = "nonrandom")
136 | 
137 | dd <- genData(1000, d1)
138 | 
139 | baseprobs <- c(.4, .3, .2, .1)
140 | npAdj <- c(0, 1, 0, 0)
141 | 
142 | dn <- genOrdCat(
143 |   dtName = dd, adjVar = "z",
144 |   baseprobs = baseprobs,
145 |   npVar = "rx", npAdj = npAdj
146 | )
147 | 
148 | }
149 | \concept{categorical}
150 | \concept{correlated}
151 | \concept{generate_data}
152 | 


--------------------------------------------------------------------------------
/man/genSpline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_data.R
 3 | \name{genSpline}
 4 | \alias{genSpline}
 5 | \title{Generate spline curves}
 6 | \usage{
 7 | genSpline(
 8 |   dt,
 9 |   newvar,
10 |   predictor,
11 |   theta,
12 |   knots = c(0.25, 0.5, 0.75),
13 |   degree = 3,
14 |   newrange = NULL,
15 |   noise.var = 0
16 | )
17 | }
18 | \arguments{
19 | \item{dt}{data.table that will be modified}
20 | 
21 | \item{newvar}{Name of new variable to be created}
22 | 
23 | \item{predictor}{Name of field in old data.table that is predicting new value}
24 | 
25 | \item{theta}{A vector or matrix of values between 0 and 1. Each column of the matrix
26 | represents the weights/coefficients that will be applied to the basis functions
27 | determined by the knots and degree. Each column of theta represents a separate
28 | spline curve.}
29 | 
30 | \item{knots}{A vector of values between 0 and 1, specifying quantile
31 | cut-points for splines. Defaults to c(0.25, 0.50, 0.75).}
32 | 
33 | \item{degree}{Integer specifying polynomial degree of curvature.}
34 | 
35 | \item{newrange}{Range of the spline function , specified as a string
36 | with two values separated by a semi-colon. The first value represents the
37 | minimum, and the second value represents the maximum. Defaults to NULL, which
38 | sets the range to be between 0 and 1.}
39 | 
40 | \item{noise.var}{Add to normally distributed noise to observation - where mean
41 | is value of spline curve.}
42 | }
43 | \value{
44 | A modified data.table with an added column named newvar.
45 | }
46 | \description{
47 | Generate spline curves
48 | }
49 | \examples{
50 | ddef <- defData(varname = "age", formula = "0;1", dist = "uniform")
51 | 
52 | theta1 <- c(0.1, 0.8, 0.6, 0.4, 0.6, 0.9, 0.9)
53 | knots <- c(0.25, 0.5, 0.75)
54 | 
55 | viewSplines(knots = knots, theta = theta1, degree = 3)
56 | 
57 | set.seed(234)
58 | dt <- genData(1000, ddef)
59 | 
60 | dt <- genSpline(
61 |   dt = dt, newvar = "weight",
62 |   predictor = "age", theta = theta1,
63 |   knots = knots, degree = 3,
64 |   noise.var = .025
65 | )
66 | 
67 | dt
68 | }
69 | \concept{generate_data}
70 | \concept{splines}
71 | 


--------------------------------------------------------------------------------
/man/genSurv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_data.R
 3 | \name{genSurv}
 4 | \alias{genSurv}
 5 | \title{Generate survival data}
 6 | \usage{
 7 | genSurv(
 8 |   dtName,
 9 |   survDefs,
10 |   digits = 3,
11 |   timeName = NULL,
12 |   censorName = NULL,
13 |   eventName = "event",
14 |   typeName = "type",
15 |   keepEvents = FALSE,
16 |   idName = "id",
17 |   envir = parent.frame()
18 | )
19 | }
20 | \arguments{
21 | \item{dtName}{Name of data set}
22 | 
23 | \item{survDefs}{Definitions of survival}
24 | 
25 | \item{digits}{Number of digits for rounding}
26 | 
27 | \item{timeName}{A string to indicate the name of a combined competing risk
28 | time-to-event outcome that reflects the minimum observed value of all 
29 | time-to-event outcomes. Defaults to NULL, indicating that each time-to-event
30 | outcome will be included in dataset.}
31 | 
32 | \item{censorName}{The name of a time to event variable that is the censoring
33 | variable. Will be ignored if timeName is NULL.}
34 | 
35 | \item{eventName}{The name of the new numeric/integer column representing the
36 | competing event outcomes. If censorName is specified, the integer value for
37 | that event will be 0. Defaults to "event", but will be ignored 
38 | if timeName is NULL.}
39 | 
40 | \item{typeName}{The name of the new character column that will indicate the
41 | event type. The type will be the unique variable names in survDefs. Defaults
42 | to "type", but will be ignored if timeName is NULL.}
43 | 
44 | \item{keepEvents}{Indicator to retain original "events" columns. Defaults
45 | to FALSE.}
46 | 
47 | \item{idName}{Name of id field in existing data set.}
48 | 
49 | \item{envir}{Optional environment, defaults to current calling environment.}
50 | }
51 | \value{
52 | Original data table with survival time
53 | }
54 | \description{
55 | Survival data is added to an existing data set.
56 | }
57 | \examples{
58 | # Baseline data definitions
59 | 
60 | def <- defData(varname = "x1", formula = .5, dist = "binary")
61 | def <- defData(def, varname = "x2", formula = .5, dist = "binary")
62 | def <- defData(def, varname = "grp", formula = .5, dist = "binary")
63 | 
64 | # Survival data definitions
65 | 
66 | sdef <- defSurv(
67 |   varname = "survTime", formula = "1.5*x1",
68 |   scale = "grp*50 + (1-grp)*25", shape = "grp*1 + (1-grp)*1.5"
69 | )
70 | 
71 | sdef <- defSurv(sdef, varname = "censorTime", scale = 80, shape = 1)
72 | 
73 | sdef
74 | 
75 | # Baseline data definitions
76 | 
77 | dtSurv <- genData(300, def)
78 | 
79 | # Add survival times
80 | 
81 | dtSurv <- genSurv(dtSurv, sdef)
82 | 
83 | head(dtSurv)
84 | }
85 | \concept{generate_data}
86 | 


--------------------------------------------------------------------------------
/man/genSynthetic.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/generate_data.R
 3 | \name{genSynthetic}
 4 | \alias{genSynthetic}
 5 | \title{Generate synthetic data}
 6 | \usage{
 7 | genSynthetic(dtFrom, n = nrow(dtFrom), vars = NULL, id = "id")
 8 | }
 9 | \arguments{
10 | \item{dtFrom}{Data table that contains the source data}
11 | 
12 | \item{n}{Number of samples to draw from the source data. The default
13 | is number of records that are in the source data file.}
14 | 
15 | \item{vars}{A vector of string names specifying the fields that will be
16 | sampled. The default is that all variables will be selected.}
17 | 
18 | \item{id}{A string specifying the field that serves as the record id. The
19 | default field is "id".}
20 | }
21 | \value{
22 | A data table with the generated data
23 | }
24 | \description{
25 | Synthetic data is generated from an existing data set
26 | }
27 | \examples{
28 | ### Create fake "real" data set
29 | 
30 | d <- defData(varname = "a", formula = 3, variance = 1, dist = "normal")
31 | d <- defData(d, varname = "b", formula = 5, dist = "poisson")
32 | d <- defData(d, varname = "c", formula = 0.3, dist = "binary")
33 | d <- defData(d, varname = "d", formula = "a + b + 3*c", variance = 2, dist = "normal")
34 | 
35 | A <- genData(100, d, id = "index")
36 | 
37 | ### Create synthetic data set from "observed" data set A:
38 | 
39 | def <- defDataAdd(varname = "x", formula = "2*b + 2*d", variance = 2)
40 | 
41 | S <- genSynthetic(dtFrom = A, n = 120, vars = c("b", "d"), id = "index")
42 | S <- addColumns(def, S)
43 | 
44 | }
45 | \concept{generate_data}
46 | 


--------------------------------------------------------------------------------
/man/iccRE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{iccRE}
 4 | \alias{iccRE}
 5 | \title{Generate variance for random effects that produce desired intra-class
 6 | coefficients (ICCs) for clustered data.}
 7 | \usage{
 8 | iccRE(ICC, dist, varTotal = NULL, varWithin = NULL, lambda = NULL, disp = NULL)
 9 | }
10 | \arguments{
11 | \item{ICC}{Vector of values between 0 and 1 that represent the
12 | target ICC levels}
13 | 
14 | \item{dist}{The distribution that describes the outcome data at the
15 | individual level. Possible distributions include "normal", "binary",
16 | "poisson", or "gamma"}
17 | 
18 | \item{varTotal}{Numeric value that represents the total variation for a
19 | normally distributed model. If "normal" distribution is specified, either
20 | varTotal or varWithin must be specified, but not both.}
21 | 
22 | \item{varWithin}{Numeric value that represents the variation within a
23 | cluster for a normally distributed model. If "normal" distribution is
24 | specified, either varTotal or varWithin must be specified, but not both.}
25 | 
26 | \item{lambda}{Numeric value that represents the grand mean. Must be specified
27 | when distribution is "poisson" or "negative binomial".}
28 | 
29 | \item{disp}{Numeric value that represents the dispersion parameter that is used
30 | to define a gamma or negative binomial distribution with a log link. Must be
31 | specified when distribution is "gamma".}
32 | }
33 | \value{
34 | A vector of values that represents the variances of random effects
35 | at the cluster level that correspond to the ICC vector.
36 | }
37 | \description{
38 | Generate variance for random effects that produce desired intra-class
39 | coefficients (ICCs) for clustered data.
40 | }
41 | \examples{
42 | targetICC <- seq(0.05, 0.20, by = .01)
43 | 
44 | iccRE(targetICC, "poisson", lambda = 30)
45 | 
46 | iccRE(targetICC, "binary")
47 | 
48 | iccRE(targetICC, "normal", varTotal = 100)
49 | iccRE(targetICC, "normal", varWithin = 100)
50 | 
51 | iccRE(targetICC, "gamma", disp = .5)
52 | 
53 | iccRE(targetICC, "negBinomial", lambda = 40, disp = .5)
54 | }
55 | \references{
56 | Nakagawa, Shinichi, and Holger Schielzeth. "A general and simple 
57 | method for obtaining R2 from generalized linear mixed‐effects models." 
58 | Methods in ecology and evolution 4, no. 2 (2013): 133-142.
59 | }
60 | \concept{utility}
61 | 


--------------------------------------------------------------------------------
/man/logisticCoefs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{logisticCoefs}
 4 | \alias{logisticCoefs}
 5 | \title{Determine intercept, treatment/exposure and covariate coefficients that can 
 6 | be used for binary data generation with a logit link and a set of covariates}
 7 | \usage{
 8 | logisticCoefs(
 9 |   defCovar,
10 |   coefs,
11 |   popPrev,
12 |   rr = NULL,
13 |   rd = NULL,
14 |   auc = NULL,
15 |   tolerance = 0.001,
16 |   sampleSize = 1e+05,
17 |   trtName = "A"
18 | )
19 | }
20 | \arguments{
21 | \item{defCovar}{A definition table for the covariates in the underlying
22 | population. This tables specifies the distribution of the covariates.}
23 | 
24 | \item{coefs}{A vector of coefficients that reflect the relationship between 
25 | each of the covariates and the log-odds of the outcome.}
26 | 
27 | \item{popPrev}{The target population prevalence of the outcome. 
28 | A value between 0 and 1.}
29 | 
30 | \item{rr}{The target risk ratio, which must be a value between 0 and
31 | 1/popPrev. Defaults to NULL.}
32 | 
33 | \item{rd}{The target risk difference, which must be between
34 | -(popPrev) and (1 - popPrev). Defaults to NULL}
35 | 
36 | \item{auc}{The target AUC, which must be a value between 0.5 and 1.0 . 
37 | Defaults to NULL.}
38 | 
39 | \item{tolerance}{The minimum stopping distance between the adjusted low and high
40 | endpoints. Defaults to 0.001.}
41 | 
42 | \item{sampleSize}{The number of units to generate for the bisection algorithm. 
43 | The default is 1e+05. To get a reliable estimate, the value 
44 | should be no smaller than the default, though larger values can be used, though
45 | computing time will increase.}
46 | 
47 | \item{trtName}{If either a risk ratio or risk difference is the target statistic,
48 | a treatment/exposure variable name can be provided. Defaults to "A".}
49 | }
50 | \value{
51 | A vector of parameters including the intercept and covariate 
52 | coefficients for the logistic model data generating process.
53 | }
54 | \description{
55 | This is an implementation of an iterative bisection procedure 
56 | that can be used to determine coefficient values for a target population 
57 | prevalence as well as a target risk ratio, risk difference, or AUC. These 
58 | coefficients can be used in a subsequent data generation process to simulate
59 | data with these desire characteristics.
60 | }
61 | \details{
62 | If no specific target statistic is specified, then only the intercept
63 | is returned along with the original coefficients. Only one target statistic (risk ratio, risk
64 | difference or AUC) can be specified with a single function call; in all three cases, a target
65 | prevalence is still required.
66 | }
67 | \examples{
68 | \dontrun{
69 | d1 <- defData(varname = "x1", formula = 0, variance = 1)
70 | d1 <- defData(d1, varname = "b1", formula = 0.5, dist = "binary")
71 | 
72 | coefs <- log(c(1.2, 0.8))
73 | 
74 | logisticCoefs(d1, coefs, popPrev = 0.20) 
75 | logisticCoefs(d1, coefs, popPrev = 0.20, rr = 1.50, trtName = "rx") 
76 | logisticCoefs(d1, coefs, popPrev = 0.20, rd = 0.30, trtName = "rx")
77 | logisticCoefs(d1, coefs, popPrev = 0.20, auc = 0.80)
78 | }
79 | }
80 | \references{
81 | Austin, Peter C. "The iterative bisection procedure: a useful 
82 | tool for determining parameter values in data-generating processes in 
83 | Monte Carlo simulations." BMC Medical Research Methodology 23, 
84 | no. 1 (2023): 1-10.
85 | }
86 | \concept{utility}
87 | 


--------------------------------------------------------------------------------
/man/mergeData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{mergeData}
 4 | \alias{mergeData}
 5 | \title{Merge two data tables}
 6 | \usage{
 7 | mergeData(dt1, dt2, idvars)
 8 | }
 9 | \arguments{
10 | \item{dt1}{Name of first data.table}
11 | 
12 | \item{dt2}{Name of second data.table}
13 | 
14 | \item{idvars}{Vector of string names to merge on}
15 | }
16 | \value{
17 | A new data table that merges dt2 with dt1
18 | }
19 | \description{
20 | Merge two data tables
21 | }
22 | \examples{
23 | def1 <- defData(varname = "x", formula = 0, variance = 1)
24 | def1 <- defData(varname = "xcat", formula = ".3;.2", dist = "categorical")
25 | 
26 | def2 <- defData(varname = "yBin", formula = 0.5, dist = "binary", id = "xcat")
27 | def2 <- defData(def2, varname = "yNorm", formula = 5, variance = 2)
28 | 
29 | dt1 <- genData(20, def1)
30 | dt2 <- genData(3, def2)
31 | 
32 | dtMerge <- mergeData(dt1, dt2, "xcat")
33 | dtMerge
34 | }
35 | \concept{utility}
36 | 


--------------------------------------------------------------------------------
/man/negbinomGetSizeProb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{negbinomGetSizeProb}
 4 | \alias{negbinomGetSizeProb}
 5 | \title{Convert negative binomial mean and dispersion parameters to size and prob parameters}
 6 | \usage{
 7 | negbinomGetSizeProb(mean, dispersion)
 8 | }
 9 | \arguments{
10 | \item{mean}{The mean of a gamma distribution}
11 | 
12 | \item{dispersion}{The dispersion parameter of a gamma distribution}
13 | }
14 | \value{
15 | A list that includes the size and prob parameters of the neg binom
16 | distribution
17 | }
18 | \description{
19 | Convert negative binomial mean and dispersion parameters to size and prob parameters
20 | }
21 | \details{
22 | In simstudy, users specify the negative binomial distribution as a
23 | function of two parameters - a mean and dispersion. In this case, the
24 | variance of the specified distribution is mean + (mean^2)*dispersion. The
25 | base R function rnbinom uses the size and prob parameters to specify the
26 | negative binomial distribution. This function converts the mean and
27 | dispersion into the size and probability parameters.
28 | }
29 | \examples{
30 | set.seed(12345)
31 | mean <- 5
32 | dispersion <- 0.5
33 | sp <- negbinomGetSizeProb(mean, dispersion)
34 | c(sp$size, sp$prob)
35 | vec <- rnbinom(1000, size = sp$size, prob = sp$prob)
36 | (estMoments <- c(mean(vec), var(vec)))
37 | (theoryMoments <- c(mean, mean + mean^2 * dispersion))
38 | (theoryMoments <- c(sp$size * (1 - sp$prob) / sp$prob, sp$size * (1 - sp$prob) / sp$prob^2))
39 | }
40 | \concept{utility}
41 | 


--------------------------------------------------------------------------------
/man/simstudy-deprecated.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simstudy-package.R
 3 | \name{simstudy-deprecated}
 4 | \alias{simstudy-deprecated}
 5 | \title{Deprecated functions in simstudy}
 6 | \description{
 7 | These functions are provided for compatibility with older versions
 8 | of simstudy only, and will be defunct in the future.
 9 | }
10 | \details{
11 | \itemize{
12 | \item \link{genCorOrdCat}: This function is deprecated, and will
13 | be removed in the future. Use \link{genOrdCat} with \code{asFactor = FALSE} instead.
14 | \item \link{catProbs}: This function is deprecated, and will be removed in the future.
15 | Use \link{genCatFormula} with the same functionality instead.
16 | }
17 | }
18 | 


--------------------------------------------------------------------------------
/man/simstudy-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simstudy-package.R
 3 | \docType{package}
 4 | \name{simstudy-package}
 5 | \alias{simstudy}
 6 | \alias{simstudy-package}
 7 | \title{simstudy: Simulation of Study Data}
 8 | \description{
 9 | Simulates data sets in order to explore modeling techniques or better understand data generating processes. The user specifies a set of relationships between covariates, and generates data based on these specifications. The final data sets can represent data from randomized control trials, repeated measure (longitudinal) designs, and cluster randomized trials. Missingness can be generated using various mechanisms (MCAR, MAR, NMAR).
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/kgoldfeld/simstudy}
15 |   \item \url{https://kgoldfeld.github.io/simstudy/}
16 |   \item \url{https://kgoldfeld.github.io/simstudy/dev/}
17 |   \item Report bugs at \url{https://github.com/kgoldfeld/simstudy/issues}
18 | }
19 | 
20 | }
21 | \author{
22 | \strong{Maintainer}: Keith Goldfeld \email{keith.goldfeld@nyulangone.org} (\href{https://orcid.org/0000-0002-0292-8780}{ORCID})
23 | 
24 | Authors:
25 | \itemize{
26 |   \item Jacob Wujciak-Jens \email{jacob@wujciak.de} (\href{https://orcid.org/0000-0002-7281-3989}{ORCID})
27 | }
28 | 
29 | }
30 | \keyword{internal}
31 | 


--------------------------------------------------------------------------------
/man/survGetParams.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{survGetParams}
 4 | \alias{survGetParams}
 5 | \title{Get survival curve parameters}
 6 | \usage{
 7 | survGetParams(points)
 8 | }
 9 | \arguments{
10 | \item{points}{A list of two-element vectors specifying the desired time and 
11 | probability pairs that define the desired survival curve}
12 | }
13 | \value{
14 | A vector of parameters that define the survival curve optimized for
15 | the target points. The first element of the vector represents the "f"
16 | parameter and the second element represents the "shape" parameter.
17 | }
18 | \description{
19 | Get survival curve parameters
20 | }
21 | \examples{
22 | points <- list(c(60, 0.90), c(100, .75), c(200, .25), c(250, .10))
23 | survGetParams(points)
24 | }
25 | \concept{utility}
26 | 


--------------------------------------------------------------------------------
/man/survParamPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{survParamPlot}
 4 | \alias{survParamPlot}
 5 | \title{Plot survival curves}
 6 | \usage{
 7 | survParamPlot(formula, shape, points = NULL, n = 100, scale = 1, limits = NULL)
 8 | }
 9 | \arguments{
10 | \item{formula}{This is the "formula" parameter of the Weibull-based survival curve
11 | that can be used to define the scale of the distribution.}
12 | 
13 | \item{shape}{The parameter that defines the shape of the distribution.}
14 | 
15 | \item{points}{An optional list of two-element vectors specifying the desired 
16 | time and probability pairs that define the desired survival curve. If no list
17 | is specified then the plot will not include any points.}
18 | 
19 | \item{n}{The number of points along the curve that will be used to 
20 | define the line. Defaults to 100.}
21 | 
22 | \item{scale}{An optional scale parameter that defaults to 1. If the value is 
23 | 1, the scale of the distribution is determined entirely by the argument "f".}
24 | 
25 | \item{limits}{A vector of length 2 that specifies x-axis limits for the plot. 
26 | The default is NULL, in which case no limits are imposed.}
27 | }
28 | \value{
29 | A ggplot of the survival curve defined by the specified parameters.
30 | If the argument points is specified, the plot will include them
31 | }
32 | \description{
33 | Plot survival curves
34 | }
35 | \examples{
36 | points <- list(c(60, 0.90), c(100, .75), c(200, .25), c(250, .10))
37 | r <- survGetParams(points)
38 | survParamPlot(r[1], r[2])
39 | survParamPlot(r[1], r[2], points = points)
40 | survParamPlot(r[1], r[2], points = points, limits = c(0, 100))
41 | }
42 | \concept{utility}
43 | 


--------------------------------------------------------------------------------
/man/trimData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{trimData}
 4 | \alias{trimData}
 5 | \title{Trim longitudinal data file once an event has occurred}
 6 | \usage{
 7 | trimData(dtOld, seqvar, eventvar, idvar = "id")
 8 | }
 9 | \arguments{
10 | \item{dtOld}{name of data table to be trimmed}
11 | 
12 | \item{seqvar}{string referencing column that indexes the sequence or period}
13 | 
14 | \item{eventvar}{string referencing event data column}
15 | 
16 | \item{idvar}{string referencing id column}
17 | }
18 | \value{
19 | an updated data.table removes all rows following the first event for each
20 | individual
21 | }
22 | \description{
23 | Trim longitudinal data file once an event has occurred
24 | }
25 | \examples{
26 | eDef <- defDataAdd(varname = "e", formula = "u==4", dist = "nonrandom")
27 | 
28 | P <- t(matrix(c(
29 |   0.4, 0.3, 0.2, 0.1,
30 |   0.0, 0.4, 0.3, 0.3,
31 |   0.0, 0.0, 0.5, 0.5,
32 |   0.0, 0.0, 0.0, 1.0
33 | ),
34 | nrow = 4
35 | ))
36 | 
37 | dp <- genMarkov(
38 |   n = 100, transMat = P,
39 |   chainLen = 8, id = "id",
40 |   pername = "period",
41 |   varname = "u"
42 | )
43 | 
44 | dp <- addColumns(eDef, dp)
45 | dp <- trimData(dp, seqvar = "period", eventvar = "e", idvar = "id")
46 | 
47 | dp
48 | }
49 | \concept{utility}
50 | 


--------------------------------------------------------------------------------
/man/trtAssign.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_data.R
 3 | \name{trtAssign}
 4 | \alias{trtAssign}
 5 | \title{Assign treatment}
 6 | \usage{
 7 | trtAssign(
 8 |   dtName,
 9 |   nTrt = 2,
10 |   balanced = TRUE,
11 |   strata = NULL,
12 |   grpName = "trtGrp",
13 |   ratio = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{dtName}{data table}
18 | 
19 | \item{nTrt}{number of treatment groups}
20 | 
21 | \item{balanced}{indicator for treatment assignment process}
22 | 
23 | \item{strata}{vector of strings representing stratifying variables}
24 | 
25 | \item{grpName}{string representing variable name for treatment or
26 | exposure group}
27 | 
28 | \item{ratio}{vector of values indicating relative proportion of group
29 | assignment}
30 | }
31 | \value{
32 | An integer (group) ranging from 1 to length of the
33 | probability vector
34 | }
35 | \description{
36 | Assign treatment
37 | }
38 | \examples{
39 | dt <- genData(15)
40 | 
41 | dt1 <- trtAssign(dt, nTrt = 3, balanced = TRUE)
42 | dt1[, .N, keyby = trtGrp]
43 | 
44 | dt2 <- trtAssign(dt, nTrt = 3, balanced = FALSE)
45 | dt2[, .N, keyby = trtGrp]
46 | 
47 | def <- defData(varname = "male", formula = .4, dist = "binary")
48 | dt <- genData(1000, def)
49 | dt
50 | 
51 | dt3 <- trtAssign(dt, nTrt = 5, strata = "male", balanced = TRUE, grpName = "Group")
52 | dt3
53 | dt3[, .N, keyby = .(male, Group)]
54 | dt3[, .N, keyby = .(Group)]
55 | 
56 | dt4 <- trtAssign(dt, nTrt = 5, strata = "male", balanced = FALSE, grpName = "Group")
57 | dt4[, .N, keyby = .(male, Group)]
58 | dt4[, .N, keyby = .(Group)]
59 | 
60 | dt5 <- trtAssign(dt, nTrt = 5, balanced = TRUE, grpName = "Group")
61 | dt5[, .N, keyby = .(male, Group)]
62 | dt5[, .N, keyby = .(Group)]
63 | 
64 | dt6 <- trtAssign(dt, nTrt = 3, ratio = c(1, 2, 2), grpName = "Group")
65 | dt6[, .N, keyby = .(Group)]
66 | }
67 | \seealso{
68 | \code{\link{trtObserve}}
69 | }
70 | \concept{group_data}
71 | 


--------------------------------------------------------------------------------
/man/trtObserve.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_data.R
 3 | \name{trtObserve}
 4 | \alias{trtObserve}
 5 | \title{Observed exposure or treatment}
 6 | \usage{
 7 | trtObserve(dt, formulas, logit.link = FALSE, grpName = "trtGrp")
 8 | }
 9 | \arguments{
10 | \item{dt}{data table}
11 | 
12 | \item{formulas}{collection of formulas that determine probabilities}
13 | 
14 | \item{logit.link}{indicator that specifies link. If TRUE, then logit link
15 | is used. If FALSE, the identity link is used.}
16 | 
17 | \item{grpName}{character string representing name of treatment/exposure group
18 | variable}
19 | }
20 | \value{
21 | An integer (group) ranging from 1 to length of the probability vector
22 | }
23 | \description{
24 | Observed exposure or treatment
25 | }
26 | \examples{
27 | def <- defData(varname = "male", dist = "binary", formula = .5, id = "cid")
28 | def <- defData(def, varname = "over65", dist = "binary", formula = "-1.7 + .8*male", link = "logit")
29 | def <- defData(def, varname = "baseDBP", dist = "normal", formula = 70, variance = 40)
30 | 
31 | dtstudy <- genData(1000, def)
32 | dtstudy
33 | 
34 | formula1 <- c("-2 + 2*male - .5*over65", "-1 + 2*male + .5*over65")
35 | dtObs <- trtObserve(dtstudy, formulas = formula1, logit.link = TRUE, grpName = "exposure")
36 | dtObs
37 | 
38 | # Check actual distributions
39 | 
40 | dtObs[, .(pctMale = round(mean(male), 2)), keyby = exposure]
41 | dtObs[, .(pctMale = round(mean(over65), 2)), keyby = exposure]
42 | 
43 | dtSum <- dtObs[, .N, keyby = .(male, over65, exposure)]
44 | dtSum[, grpPct := round(N / sum(N), 2), keyby = .(male, over65)]
45 | dtSum
46 | }
47 | \seealso{
48 | \code{\link{trtAssign}}
49 | }
50 | \concept{group_data}
51 | 


--------------------------------------------------------------------------------
/man/trtStepWedge.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/group_data.R
 3 | \name{trtStepWedge}
 4 | \alias{trtStepWedge}
 5 | \title{Assign treatment for stepped-wedge design}
 6 | \usage{
 7 | trtStepWedge(
 8 |   dtName,
 9 |   clustID,
10 |   nWaves,
11 |   lenWaves,
12 |   startPer,
13 |   perName = "period",
14 |   grpName = "rx",
15 |   lag = 0,
16 |   xrName = "xr"
17 | )
18 | }
19 | \arguments{
20 | \item{dtName}{data table}
21 | 
22 | \item{clustID}{string representing name of column of cluster level ids}
23 | 
24 | \item{nWaves}{number of treatment waves}
25 | 
26 | \item{lenWaves}{the number of periods between waves}
27 | 
28 | \item{startPer}{the starting period of the first wave}
29 | 
30 | \item{perName}{string representing name of column of time periods}
31 | 
32 | \item{grpName}{string representing variable name for treatment or
33 | exposure group}
34 | 
35 | \item{lag}{integer representing length of transition period}
36 | 
37 | \item{xrName}{string representing name of the field that
38 | indicates whether the cluster status is in transition status}
39 | }
40 | \value{
41 | A data.table with the added treatment assignment
42 | }
43 | \description{
44 | Assign treatment for stepped-wedge design
45 | }
46 | \examples{
47 | defc <- defData(
48 |   varname = "ceffect", formula = 0, variance = 0.10,
49 |   dist = "normal", id = "cluster"
50 | )
51 | defc <- defData(defc, "m", formula = 10, dist = "nonrandom")
52 | 
53 | # Will generate 3 waves of 4 clusters each - starting 2, 5, and 8
54 | 
55 | dc <- genData(12, defc)
56 | dp <- addPeriods(dc, 12, "cluster")
57 | dp <- trtStepWedge(dp, "cluster",
58 |   nWaves = 3,
59 |   lenWaves = 3, startPer = 2
60 | )
61 | dp
62 | 
63 | dp <- addPeriods(dc, 12, "cluster")
64 | dp <- trtStepWedge(dp, "cluster",
65 |   nWaves = 2,
66 |   lenWaves = 1, startPer = 4, lag = 3
67 | )
68 | dp
69 | }
70 | \seealso{
71 | \code{\link{trtObserve} \link{trtAssign}}
72 | }
73 | \concept{group_data}
74 | 


--------------------------------------------------------------------------------
/man/updateDef.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{updateDef}
 4 | \alias{updateDef}
 5 | \title{Update definition table}
 6 | \usage{
 7 | updateDef(
 8 |   dtDefs,
 9 |   changevar,
10 |   newformula = NULL,
11 |   newvariance = NULL,
12 |   newdist = NULL,
13 |   newlink = NULL,
14 |   remove = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{dtDefs}{Definition table that will be modified}
19 | 
20 | \item{changevar}{Name of field definition that will be changed}
21 | 
22 | \item{newformula}{New formula definition (defaults to NULL)}
23 | 
24 | \item{newvariance}{New variance specification (defaults to NULL)}
25 | 
26 | \item{newdist}{New distribution definition (defaults to NULL)}
27 | 
28 | \item{newlink}{New link specification (defaults to NULL)}
29 | 
30 | \item{remove}{If set to TRUE, remove `changevar`from
31 | definition (defaults to FALSE).}
32 | }
33 | \value{
34 | The updated data definition table.
35 | }
36 | \description{
37 | Updates row definition table created by function
38 | defData or defRead. (For tables created using defDataAdd
39 | and defReadAdd use updateDefAdd.) Does not modify in-place.
40 | }
41 | \examples{
42 | 
43 | # Example 1
44 | 
45 | defs <- defData(varname = "x", formula = 0, variance = 3, dist = "normal")
46 | defs <- defData(defs, varname = "y", formula = "2 + 3*x", variance = 1, dist = "normal")
47 | defs <- defData(defs, varname = "z", formula = "4 + 3*x - 2*y", variance = 1, dist = "normal")
48 | 
49 | defs
50 | 
51 | updateDef(dtDefs = defs, changevar = "y", newformula = "x + 5", newvariance = 2)
52 | updateDef(dtDefs = defs, changevar = "z", newdist = "poisson", newlink = "log")
53 | 
54 | # Example 2
55 | 
56 | defs <- defData(varname = "w", formula = 0, variance = 3, dist = "normal")
57 | defs <- defData(defs, varname = "x", formula = "1 + w", variance = 1, dist = "normal")
58 | defs <- defData(defs, varname = "z", formula = 4, variance = 1, dist = "normal")
59 | 
60 | defs
61 | 
62 | updateDef(dtDefs = defs, changevar = "x", remove = TRUE)
63 | updateDef(dtDefs = defs, changevar = "z", remove = TRUE)
64 | 
65 | # No changes to original definition:
66 | defs
67 | }
68 | \concept{define_data}
69 | \concept{utility}
70 | 


--------------------------------------------------------------------------------
/man/updateDefAdd.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{updateDefAdd}
 4 | \alias{updateDefAdd}
 5 | \title{Update definition table}
 6 | \usage{
 7 | updateDefAdd(
 8 |   dtDefs,
 9 |   changevar,
10 |   newformula = NULL,
11 |   newvariance = NULL,
12 |   newdist = NULL,
13 |   newlink = NULL,
14 |   remove = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{dtDefs}{Definition table that will be modified}
19 | 
20 | \item{changevar}{Name of field definition that will be changed}
21 | 
22 | \item{newformula}{New formula definition (defaults to NULL)}
23 | 
24 | \item{newvariance}{New variance specification (defaults to NULL)}
25 | 
26 | \item{newdist}{New distribution definition (defaults to NULL)}
27 | 
28 | \item{newlink}{New link specification (defaults to NULL)}
29 | 
30 | \item{remove}{If set to TRUE, remove definition (defaults to FALSE)}
31 | }
32 | \value{
33 | A string that represents the desired formula
34 | }
35 | \description{
36 | Updates row definition table created by functions
37 | defDataAdd and defReadAdd. (For tables created using defData
38 | or defRead use updateDef.)
39 | }
40 | \examples{
41 | 
42 | # Define original data
43 | 
44 | defs <- defData(varname = "w", formula = 0, variance = 3, dist = "normal")
45 | defs <- defData(defs, varname = "x", formula = "1 + w", variance = 1, dist = "normal")
46 | defs <- defData(defs, varname = "z", formula = 4, variance = 1, dist = "normal")
47 | 
48 | # Define additional columns
49 | 
50 | defsA <- defDataAdd(varname = "a", formula = "w + x + z", variance = 2, dist = "normal")
51 | 
52 | set.seed(2001)
53 | dt <- genData(10, defs)
54 | dt <- addColumns(defsA, dt)
55 | dt
56 | 
57 | # Modify definition of additional column
58 | 
59 | defsA <- updateDefAdd(dtDefs = defsA, changevar = "a", newformula = "w+z", newvariance = 1)
60 | 
61 | set.seed(2001)
62 | dt <- genData(10, defs)
63 | dt <- addColumns(defsA, dt)
64 | dt
65 | }
66 | \concept{define_data}
67 | \concept{utility}
68 | 


--------------------------------------------------------------------------------
/man/viewBasis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{viewBasis}
 4 | \alias{viewBasis}
 5 | \title{Plot basis spline functions}
 6 | \usage{
 7 | viewBasis(knots, degree)
 8 | }
 9 | \arguments{
10 | \item{knots}{A vector of values between 0 and 1, specifying cut-points for splines}
11 | 
12 | \item{degree}{Integer specifying degree of curvature.}
13 | }
14 | \value{
15 | A ggplot object that contains a plot of the basis functions. In total, there
16 | will be length(knots) + degree + 1 functions plotted.
17 | }
18 | \description{
19 | Plot basis spline functions
20 | }
21 | \examples{
22 | knots <- c(0.25, 0.50, 0.75)
23 | viewBasis(knots, degree = 1)
24 | 
25 | knots <- c(0.25, 0.50, 0.75)
26 | viewBasis(knots, degree = 2)
27 | 
28 | knots <- c(0.25, 0.50, 0.75)
29 | viewBasis(knots, degree = 3)
30 | }
31 | \concept{splines}
32 | \concept{utility}
33 | 


--------------------------------------------------------------------------------
/man/viewSplines.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utility.R
 3 | \name{viewSplines}
 4 | \alias{viewSplines}
 5 | \title{Plot spline curves}
 6 | \usage{
 7 | viewSplines(knots, degree, theta)
 8 | }
 9 | \arguments{
10 | \item{knots}{A vector of values between 0 and 1, specifying cut-points for splines}
11 | 
12 | \item{degree}{Integer specifying degree of curvature.}
13 | 
14 | \item{theta}{A vector or matrix of values between 0 and 1. Each column of the matrix
15 | represents the weights/coefficients that will be applied to the basis functions
16 | determined by the knots and degree. Each column of theta represents a separate
17 | spline curve.}
18 | }
19 | \value{
20 | A ggplot object that contains a plot of the spline curves. The number of
21 | spline curves in the plot will equal the number of columns in the matrix (or it
22 | will equal 1 if theta is a vector).
23 | }
24 | \description{
25 | Plot spline curves
26 | }
27 | \examples{
28 | knots <- c(0.25, 0.5, 0.75)
29 | theta1 <- c(0.1, 0.8, 0.4, 0.9, 0.2, 1.0)
30 | 
31 | viewSplines(knots, degree = 2, theta1)
32 | 
33 | theta2 <- matrix(c(
34 |   0.1, 0.2, 0.4, 0.9, 0.2, 0.3,
35 |   0.1, 0.3, 0.3, 0.8, 1.0, 0.9,
36 |   0.1, 0.4, 0.3, 0.8, 0.7, 0.5,
37 |   0.1, 0.9, 0.8, 0.2, 0.1, 0.6
38 | ),
39 | ncol = 4
40 | )
41 | 
42 | viewSplines(knots, degree = 2, theta2)
43 | }
44 | \concept{splines}
45 | \concept{utility}
46 | 


--------------------------------------------------------------------------------
/paper/paper.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: 'simstudy: Illuminating research methods through data generation'
  3 | tags:
  4 |   - R
  5 |   - statistics
  6 |   - data-simulation
  7 |   - statistical-models
  8 |   - data-generation
  9 | authors:
 10 |   - name: Keith Goldfeld
 11 |     orcid: 0000-0002-0292-8780
 12 |     affiliation: 1 
 13 |   - name: Jacob Wujciak-Jens
 14 |     orcid: 0000-0002-7281-3989
 15 |     affiliation: 2
 16 | affiliations:
 17 |  - name: NYU Grossman School of Medicine.
 18 |    index: 1
 19 |  - name: Independent Researcher
 20 |    index: 2
 21 | date: 18 October 2020
 22 | bibliography: simstudy.bib
 23 | ---
 24 | 
 25 | # Summary
 26 | 
 27 | The `simstudy` package is a collection of functions for R [@rcoreteam2020] that
 28 | allow users to generate simulated data sets in order to explore modeling
 29 | techniques or better understand data generating processes. The user defines the
 30 | distributions of individual variables, specifies relationships between
 31 | covariates and outcomes, and generates data based on these specifications. The
 32 | final data sets can represent randomized control trials, repeated measure
 33 | designs, cluster-randomized trials, or naturally observed data processes. Many other
 34 | complexities can be added, including survival data, correlated data, factorial
 35 | study designs, step wedge designs, and missing data processes.
 36 | 
 37 | Simulation using `simstudy` has two fundamental steps. The user (1) **defines**
 38 | the data elements of a data set and (2) **generates** the data based on these
 39 | definitions. Additional functionality exists to simulate observed or randomized
 40 | **treatment assignment/exposures**, to create **longitudinal/panel** data, to
 41 | create **multi-level/hierarchical** data, to create datasets with **correlated
 42 | variables** based on a specified covariance structure, to **merge** datasets, to
 43 | create data sets with **missing** data, and to create non-linear relationships
 44 | with underlying **spline** curves.
 45 | 
 46 | The overarching philosophy of `simstudy` is to create data generating processes
 47 | that mimic the typical models used to fit those types of data. So, the
 48 | parameterization of some of the data generating processes may not follow the
 49 | standard parameterizations for the specific distributions. For example, in
 50 | `simstudy` *gamma*-distributed data are generated based on the specification of
 51 | a mean $\mu$ (or $\log(\mu)$) and a dispersion $d$, rather than shape $\alpha$
 52 | and rate $\beta$ parameters that more typically characterize the *gamma*
 53 | distribution. When we estimate the parameters, we are modeling $\mu$ (or some
 54 | function of $(\mu)$), so we should explicitly recover the `simstudy` parameters
 55 | used to generate the model - illuminating the relationship between the
 56 | underlying data generating processes and the models. For more details on the
 57 | package, use cases, examples, and function reference see the [documentation page](https://kgoldfeld.github.io/simstudy/articles/simstudy.html).
 58 | 
 59 | `simstudy` is available on [CRAN](https://cran.r-project.org/package=simstudy)
 60 | and can be installed with:
 61 | 
 62 | ``` r
 63 | install.packages("simstudy")
 64 | ```
 65 | 
 66 | Alternatively, the newest development version can be installed from [GitHub](https://github.com/) with:
 67 | 
 68 | ``` r
 69 | # install.packages("devtools")
 70 | devtools::install_github("kgoldfeld/simstudy")
 71 | ```
 72 | 
 73 | # Statement of need 
 74 | 
 75 | Empiricism and statistical analysis are cornerstones of scientific research
 76 | but they can lead us astray if used incorrectly. Choosing the right methodology for the
 77 | hypothesis and expected data is crucial for useful, valid results. Data
 78 | simulated with `simstudy` under the assumptions derived from a hypothesis
 79 | enables researchers to test and refine their analysis methodologies without the
 80 | need for time-intensive, expensive pre-tests or collection of actual data. Additionally data generated with `simstudy` can be used in generalized, theoretical simulation studies to further the field of methodology.
 81 | 
 82 | There are several `R`-packages that allow for data generation under different
 83 | assumptions. Most of these packages have a narrower scope that focuses on
 84 | a specific class of data, like `ICCbin` [@hossain2017], `BinNonNor`
 85 | [@inan2020] and `genSurv` [@meira-machado2014]. Some do not seem to be actively
 86 | maintained [@hofert2016;@chan2014;@alfons2010;@bien2016], which can cause
 87 | compatibility issues. Some target specific fields of study and their needs, like the
 88 | psychology-focused `psych` package [@revelle2020] or the `conjurer` package
 89 | [@macherla2020] that provides methods to generate synthetic customer data for
 90 | industry use. `simstudy` is unique with its philosophy of data generating
 91 | processes that mimic the models used in analysis and allowing for the possibility of generating a wide range of complex data through these processes. The `SimDesign` Package
 92 | [@chalmers2020] and the related `MonteCarlo` Package [@leschinski2019] follow a
 93 | similar line of thought but focus on easy replication of the analyses and providing summaries of simulated data.
 94 | 
 95 | `simstudy` has been used in a variety of fields for theoretical exploration of
 96 | research methodology
 97 | [@anderson2019;@kirasich2018;@krzykalla2020;@liu2019;@nickodem2020;@thoya2018;@wang2020;@elalili2020],
 98 | power calculation for trials [@wei2019] and other simulation tasks supporting
 99 | researchers
100 | [@forthun2020;@horry2020;@renson2017;@chukwu2019].
101 | 
102 | # Acknowledgements
103 | 
104 | We acknowledge contributions from James Balamuta, Michael Bradley,  Gertjan
105 | Verhoeven. For the generation of multivariate binary data the algorithm by
106 | @emrich1991 is used.
107 | 
108 | # References


--------------------------------------------------------------------------------
/tests/.lintr:
--------------------------------------------------------------------------------
1 | linters: with_defaults(line_length_linter(120), object_name_linter = NULL)
2 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(hedgehog)
3 | library(simstudy)
4 | 
5 | data.table::setDTthreads(2) # added to solve CRAN issue
6 | test_check("simstudy")
7 | 


--------------------------------------------------------------------------------
/tests/testthat/setup-general.R:
--------------------------------------------------------------------------------
1 | library(hedgehog)
2 | freeze <- names(.GlobalEnv)
3 | 


--------------------------------------------------------------------------------
/tests/testthat/teardown-general.R:
--------------------------------------------------------------------------------
1 | rm(list = setdiff(names(.GlobalEnv), freeze), pos = .GlobalEnv)
2 | 


--------------------------------------------------------------------------------
/tests/testthat/test-actual-distributions.R:
--------------------------------------------------------------------------------
 1 | test_that("beta distributed data are generated correctly", {
 2 |   skip_on_cran()
 3 |   
 4 |   p1 <- runif(1, .1, .9)
 5 |   v1 <- p1*(1-p1)/2
 6 |   
 7 |   p2 <- runif(1, .1, .9)
 8 |   v2 <- p2*(1-p2)/2
 9 |   
10 |   logitp2 <- log(p2 / (1-p2))
11 |   def <- defData(varname = "b1", formula = "..p1", variance = 1, dist = "beta")
12 |   def <- defData(def, varname = "b2", formula = "..logitp2", variance = 1, 
13 |                  dist = "beta", link="logit")
14 |   
15 |   dd <- genData(n = 1000, dtDefs = def)
16 |   diff <- dd[, .(abs(mean(b1) - p1), abs(mean(b2) - p2)) ]
17 |   expect_true(all(diff < 0.05))
18 |   
19 |   diffv <- dd[, .(abs(var(b1) - v1), abs(var(b2) - v2)) ]
20 |   expect_true(all(diffv < 0.02))
21 | })
22 | 
23 | test_that("gamma distributed data are generated correctly", {
24 |   skip_on_cran()
25 |   
26 |   u1 <- runif(1, 5, 15)
27 |   u2 <- runif(1, 5, 15)
28 |   logu2 <- log(u2)
29 |  
30 |   def <- defData(varname = "g1", formula = "..u1", variance = 2, dist = "gamma")
31 |   def <- defData(def, varname = "g2", formula = "..logu2", 
32 |                  variance = 2, dist = "gamma", link = "log")
33 |   dd <- genData(n = 5000, dtDefs = def)
34 |   
35 |   diff <- dd[, .(abs(mean(g1) - u1), abs(mean(g2) - u2)) ]
36 |   expect_true(all(diff < 1.5))
37 |   
38 |   diffv <- dd[, .(abs(var(g1)/ (u1^2*2)), var(g2)/(u2^2*2)) ]
39 |   expect_true(all(abs(diffv - 1) < 0.25))
40 | })
41 | 
42 | 


--------------------------------------------------------------------------------
/tests/testthat/test-conditions.R:
--------------------------------------------------------------------------------
 1 | test_that("conditions have correct class.", {
 2 |   skip_on_cran()
 3 |   expect_error(stop(condition(c("error", "custom_Error"), "This is a custom error")),
 4 |     class = c("error", "custom_Error")
 5 |   )
 6 |   expect_warning(warning(condition(c("warning", "custom_warning"), "This is a custom warning")),
 7 |     class = c("warning", "custom_warning")
 8 |   )
 9 |   expect_message(message(condition(c("message", "custom_message"), "This is a custom message")),
10 |     class = c("message", "custom_message")
11 |   )
12 | })
13 | 
14 | test_that("pluralization works.", {
15 |   skip_on_cran()
16 |   expect_error(argMissingError("arg1"), "argument is missing",
17 |     class = "simstudy::missingArgument"
18 |   )
19 |   expect_error(argMissingError(c("arg1", "arg2")), "arguments are missing",
20 |     class = "simstudy::missingArgument"
21 |   )
22 | })
23 | 


--------------------------------------------------------------------------------
/tests/testthat/test-glue.R:
--------------------------------------------------------------------------------
 1 | test_that("Blocks are collapsed as expected.", {
 2 |   skip_on_cran()
 3 |   nums <- 1:3
 4 |   num <- 23
 5 |   expect_equal(
 6 |     glueCollapse("Collapse block one: { nums *} but not { num }."),
 7 |     "Collapse block one: 1, 2 and 3 but not 23."
 8 |   )
 9 |   expect_equal(
10 |     glueCollapse("Collapse block one: { nums *} but not { num }.", last = " und "),
11 |     "Collapse block one: 1, 2 und 3 but not 23."
12 |   )
13 |   expect_length(glueCollapse("Collapse block one: { nums *} but not { num }."), 1)
14 |   expect_length(glueCollapse("Collapse block one: { nums } but not { num }."), 3)
15 | })
16 | 
17 | test_that("numbers are formated as expected.", {
18 |   skip_on_cran()
19 |   nums <- c(1.23, 0.556, 1 / 3)
20 |   ints <- c(1, 2, 3)
21 |   expect_equal(glueFmt("{nums:.2f}"), as.character(round(nums, 2)))
22 |   expect_equal(glueFmt("{ints:.2f}"), c("1.00", "2.00", "3.00"))
23 |   expect_equal(glueFmt("{5}"), "5")
24 | })
25 | 
26 | test_that("numbers are collapsed and formated correctly.", {
27 |   skip_on_cran()
28 |   ints <- c(1, 2, 3)
29 |   expect_equal(glueFmtC("{ints:02d}"), "01, 02 and 03")
30 |   expect_equal(glueFmtC("{2:.1f}"), "2.0")
31 |   expect_equal(glueFmtC("{2}"), "2")
32 | })
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-group_data.R:
--------------------------------------------------------------------------------
 1 | # addPariods
 2 | test_that("addPeriods works", {
 3 |   skip_on_cran()
 4 |   
 5 |   tdef <- defData(varname = "T", dist = "binary", formula = 0.5)
 6 |   tdef <- defData(tdef, varname = "Y0", dist = "normal", formula = 10, variance = 1)
 7 |   tdef <- defData(tdef, varname = "Y1", dist = "normal", formula = "Y0 + 5 + 5 * T", variance = 1)
 8 |   tdef <- defData(tdef, varname = "Y2", dist = "normal", formula = "Y0 + 10 + 5 * T", variance = 1)
 9 | 
10 |   n <- ceiling(runif(1, 10, 20))
11 |   dtTrial <- genData(n, tdef)
12 |   
13 |   p <- ceiling(runif(1, 3, 8))
14 |   dtTime <- addPeriods(
15 |     dtTrial,
16 |     nPeriods = p, idvars = "id"
17 |   )
18 |   
19 |   expect_equal(nrow(dtTime), n*p)
20 |   
21 |   expect_silent(
22 |     addPeriods(dtTrial,
23 |       nPeriods = 3, idvars = "id",
24 |       timevars = c("Y0", "Y1", "Y2"), timevarName = "Y",
25 |       periodVec = c(0, 3, 5)
26 |     )
27 |   )
28 |   
29 |   expect_warning(
30 |     addPeriods(dtTrial,
31 |                nPeriods = 2, idvars = "id",
32 |                timevars = c("Y0", "Y1", "Y2"), timevarName = "Y"
33 |     )
34 |   )
35 |   
36 |   testthat::expect_silent(
37 |     addPeriods(dtTrial,
38 |                nPeriods = 3, idvars = "id",
39 |                timevars = c("Y0", "Y1", "Y2"), 
40 |                timevarName = "Y"
41 |     )
42 |   )
43 |   
44 |   def <- defData(varname = "xbase", dist = "normal", formula = 20, variance = 3)
45 |   def <- defData(def, varname = "nCount", dist = "noZeroPoisson", formula = 6)
46 |   def <- defData(def, varname = "mInterval", dist = "gamma", formula = 30, variance = .01)
47 |   def <- defData(def, varname = "vInterval", dist = "nonrandom", formula = .07)
48 | 
49 |   dt <- genData(50, def)
50 |   expect_silent(addPeriods(dt))
51 |   
52 |   
53 |   def <- defData(varname = "xbase", dist = "normal", formula = 20, variance = 3)
54 |   def <- defData(def, varname = "nCount", dist = "noZeroPoisson", formula = 6)
55 |   def <- defData(def, varname = "mInterval", dist = "gamma", formula = 30, variance = .01)
56 | 
57 |   dt <- genData(50, def)
58 |   expect_silent(addPeriods(dt))
59 |   
60 |   def <- defData(varname = "xbase", dist = "normal", formula = 20, variance = 3)
61 |   
62 |   dt <- genData(50, def)
63 |   expect_error(addPeriods(dt))
64 |   
65 | })
66 | 
67 | # .addStrataCode ----
68 | test_that("strata codes are added as expected.", {
69 |   skip_on_cran()
70 |   def <- defData(varname = "male", dist = "binary", formula = .5, id = "cid")
71 |   def <- defData(def, varname = "over65", dist = "binary", formula = "-1.7 + .8*male", link = "logit")
72 |   def <- defData(def, varname = "baseDBP", dist = "normal", formula = 70, variance = 40)
73 | 
74 |   data <- genData(330, def)
75 | 
76 |   expect_equal(range(.addStrataCode(data, "male")$.stratum), c(1, 2))
77 |   expect_equal(range(.addStrataCode(data, c("male", "over65"))$.stratum), c(1, 4))
78 |   expect_equal(.addStrataCode(data, "male")[, .SD, .SDcols = !".stratum"], data)
79 |   expect_error(.addStrataCode(data, ""))
80 | })
81 | 
82 | # .stratSamp ----
83 | test_that("stratified samples are drawn correctly.", {
84 |   skip_on_cran()
85 |   expect_length(.stratSamp(1, 2), 1)
86 |   expect_length(.stratSamp(2, 4), 2)
87 |   expect_length(.stratSamp(50, 3), 50)
88 |   expect_gte(table(.stratSamp(148, 2, c(1, 2)))[1], 49)
89 |   expect_gte(table(.stratSamp(148, 2, c(1, 2)))[2], 98)
90 |   expect_true(all(table(.stratSamp(150, 2, c(1, 2))) == c(50, 100)))
91 |   expect_equal(range(.stratSamp(50, 3)), c(1, 3))
92 | })
93 | 


--------------------------------------------------------------------------------
/tests/testthat/test-internal_utility.R:
--------------------------------------------------------------------------------
  1 | # .parseDotVars ----
  2 | test_that("dotVars are parsed correctly.", {
  3 |   skip_on_cran()
  4 |   extVar1 <- 23
  5 |   extVar2 <- 42
  6 |   res <- list(..extVar1 = 23, ..extVar2 = 42)
  7 | 
  8 |   expect_equal(.parseDotVars("a + ..extVar1 | b + ..extVar2"), res)
  9 |   expect_equal(.parseDotVars(c("a + ..extVar1", "b + ..extVar2")), res)
 10 |   expect_equal(length(.parseDotVars("a + b")), 0)
 11 | 
 12 |   expect_error(.parseDotVars("..extVar12"))
 13 | })
 14 | 
 15 | test_that("variables from different environments are parsed correctly.", {
 16 |   skip_on_cran()
 17 |   extVar3 <- 7
 18 |   env1 <- new.env()
 19 |   env2 <- new.env(parent = env1)
 20 |   env1$extVar1 <- 23
 21 |   env2$extVar2 <- 42
 22 |   res <- list(..extVar1 = 23, ..extVar2 = 42, ..extVar3 = 7)
 23 | 
 24 |   with(env2, {
 25 |     expect_equal(.parseDotVars("a + ..extVar1 | b + ..extVar2 * ..extVar3"), res)
 26 |     expect_equal(.parseDotVars(c("a + ..extVar1 * ..extVar2", "b + ..extVar3")), res)
 27 |   })
 28 | })
 29 | 
 30 | # .evalWith ----
 31 | test_that("evalWith throws errors.", {
 32 |   skip_on_cran()
 33 |   df <- data.frame()
 34 |   ext <- list(formula2parse = 2)
 35 | 
 36 |   expect_error(.evalWith("", ext), "reserved variable")
 37 |   expect_error(.evalWith("", list(), df, 10), "different length")
 38 | })
 39 | 
 40 | test_that("evalWith output length is correct.", {
 41 |   skip_on_cran()
 42 |   df <- data.frame(a = rep.int(5, 5))
 43 |   ext <- list(..ev = 2)
 44 | 
 45 |   expect_equal(length(.evalWith("a + ..ev", ext, df, 5)), 5)
 46 |   expect_equal(length(.evalWith("a + ..ev", ext, dtSim = df)), 5)
 47 | })
 48 | 
 49 | test_that("evalWith output is Matrix.", {
 50 |   skip_on_cran()
 51 |   df <- data.frame(a = rep.int(5, 5))
 52 |   ext <- list(..ev = 2)
 53 | 
 54 |   expect_is(.evalWith("a + ..ev", ext, df, 5), "matrix")
 55 |   expect_is(.evalWith("a + ..ev", ext, df), "matrix")
 56 |   expect_is(.evalWith(c("a + ..ev", "..ev * 2"), ext, df), "matrix")
 57 |   expect_is(.evalWith("..ev * 2", ext), "matrix")
 58 | })
 59 | 
 60 | # .adjustProbs ----
 61 | test_that("probabilities (matrix) are adjusted as documented.", {
 62 |   skip_on_cran()
 63 |   forall(gen.and_then(gen.c(gen.element(2:6), of = 2), function(n) {
 64 |     gen.with(gen.list(gen_n_norm_Probs(n[2]), of = n[1]), function(ps) {
 65 |       do.call("rbind", ps)
 66 |     })
 67 |   }), function(p) {
 68 |     over <- p / .9
 69 |     under <- p / 1.1
 70 |     expect_warning(.adjustProbs(over), class = "simstudy::valueWarning")
 71 |     expect_warning(.adjustProbs(under), class = "simstudy::valueWarning")
 72 |     expect_error(.adjustProbs(under * -1), class = "simstudy::valueError")
 73 |     expect_equal(mean(rowSums(.adjustProbs(under))), 1)
 74 |     expect_equal(mean(rowSums(.adjustProbs(over))), 1)
 75 |     expect_equal(dim(.adjustProbs(over)), dim(over))
 76 |     expect_equal(dim(.adjustProbs(under)), dim(under) + c(0, 1))
 77 |   })
 78 | })
 79 | 
 80 | # .getDists ----
 81 | test_that("number of Dists is up to date.", {
 82 |   skip_on_cran()
 83 |   expect_length(.getDists(), 17)
 84 | })
 85 | 
 86 | # .isFormulaScalar ----
 87 | test_that("isFormularScalar works correctly.", {
 88 |   skip_on_cran()
 89 |   expect_true(.isFormulaScalar("5 + 3"))
 90 |   expect_true(.isFormulaScalar(5 + 3))
 91 | 
 92 |   expect_false(.isFormulaScalar("a + 3"))
 93 |   expect_false(.isFormulaScalar("a;3"))
 94 |   expect_false(.isFormulaScalar(data.frame(a = "asd")))
 95 | })
 96 | 
 97 | # .isValidVarName ----
 98 | test_that("var names are validated correctly.", {
 99 |   skip_on_cran()
100 |   validNames <- c("var1", "name", "name2", "var1")
101 |   wrongNames <- c("...", "..1", "..5")
102 | 
103 |   expect_true(all(.isValidVarName(validNames)))
104 |   expect_true(all(.isValidVarName(validNames[1:3], unique = TRUE)))
105 |   expect_true(all(.isValidVarName(wrongNames, allowReserved = TRUE, unique = TRUE)))
106 | 
107 |   expect_false(all(.isValidVarName(wrongNames)))
108 |   expect_false(all(.isValidVarName(c(validNames, wrongNames))))
109 |   expect_false(all(.isValidVarName(validNames, unique = TRUE)))
110 | })
111 | 
112 | # .isError ----
113 | test_that("errors are detected correctly.", {
114 |   skip_on_cran()
115 |   err <- try(nonVar + 4, silent = TRUE)
116 |   noErr <- try(3 + 5, silent = TRUE)
117 | 
118 |   expect_true(.isError(err))
119 |   expect_false(.isError(noErr))
120 |   expect_false(.isError(5))
121 |   expect_false(.isError("ab"))
122 | })
123 | 
124 | # .hasValue ----
125 | test_that("hasValue works.", {
126 |   skip_on_cran()
127 |   expect_true(.hasValue("value"))
128 |   expect_true((function(x) .hasValue(x))(5))
129 |   expect_true((function(x) .hasValue(x))(NA))
130 |   expect_false(.hasValue())
131 |   expect_false((function(x) .hasValue(x))())
132 |   expect_false((function(x) .hasValue(x))(NULL))
133 |   expect_false(.hasValue(NULL))
134 | })
135 | 
136 | # .log2Prob ----
137 | test_that("log odds are converted correctly.", {
138 |   skip_on_cran()
139 |   prob <- 0.2
140 |   logOdds <- log(0.25)
141 | 
142 |   expect_equal(.log2Prob(logOdds), prob)
143 |   expect_equal(.log2Prob(rep(logOdds, 5)), rep(prob, 5))
144 | })
145 | 


--------------------------------------------------------------------------------
/tests/testthat/test-survival.R:
--------------------------------------------------------------------------------
 1 | test_that("defSurv kicks out transition error", {
 2 |   skip_on_cran()
 3 |   expect_error(defSurv(varname = "censor", formula = "-7", shape = 0.55, transition = 150))
 4 | })
 5 | 
 6 | test_that("genSurv runs OK", {
 7 |   skip_on_cran()
 8 |   dS <- defSurv(varname = "event_1", formula = "-10", shape = 0.3)
 9 |   dS <- defSurv(dS, "event_2", "-6.5", shape = 0.4)
10 |   dS <- defSurv(dS, "event_3", "-7", shape = 0.5)
11 | 
12 |   dd <- genData(1000)
13 | 
14 |   expect_equal(genSurv(dd, dS)[, .(c(
15 |     median(event_1),
16 |     median(event_2),
17 |     median(event_3)
18 |   ))][, V1],
19 |   c(18, 11.6, 27.5),
20 |   tolerance = .1
21 |   )
22 | 
23 |   d1 <- defData(varname = "x", formula = .5, dist = "binary")
24 | 
25 |   dS <- defSurv(varname = "time", formula = "-14.6 - .5*x", shape = .35, transition = 0)
26 |   dS <- defSurv(dS, varname = "time", formula = "-14.6 - 1.5*x", shape = .35, transition = 150)
27 | 
28 |   dd <- genData(1000, d1)
29 |   expect_equal(genSurv(dd, dS)[x == 1, mean(time)], 213, tolerance = .1)
30 | })
31 | 
32 | test_that("genSurv throws errors", {
33 |   skip_on_cran()
34 |   dS <- defSurv(varname = "event_1", formula = "-10", shape = 0.3)
35 |   dS <- defSurv(dS, "event_2", "-6.5", shape = 0.4)
36 |   dS <- defSurv(dS, "event_3", "-7", shape = 0.5)
37 | 
38 |   dd <- genData(5)
39 |   expect_error(genSurv(dd, dS, timeName = "event_1", keepEvents = TRUE))
40 |   expect_error(genSurv(dd, dS, timeName = "event_1", censorName = "censor"))
41 | })
42 | 
43 | test_that("addCmpRisk works", {
44 |   skip_on_cran()
45 |   dS <- defSurv(varname = "event_1", formula = "-10", shape = 0.3)
46 |   dS <- defSurv(dS, "event_2", "-6.5", shape = 0.4)
47 | 
48 |   dd <- genData(5000)
49 |   dd <- genSurv(dd, dS)
50 | 
51 |   expect_equal(addCompRisk(dd, c("event_1", "event_2"), "time")[, mean(event)],
52 |     1.8,
53 |     tolerance = 0.1
54 |   )
55 | 
56 |   expect_equivalent(
57 |     names(addCompRisk(dd, c("event_1", "event_2"), "time")[, table(event)]),
58 |     c("1", "2")
59 |   )
60 | 
61 |   expect_equivalent(
62 |     names(addCompRisk(dd, c("event_1", "event_2"), "time", "event_2")[, table(event)]),
63 |     c("0", "1")
64 |   )
65 | })
66 | 


--------------------------------------------------------------------------------
/touchstone/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !script.R
3 | !config.json
4 | !.gitignore
5 | !header.R
6 | !footer.R
7 | 


--------------------------------------------------------------------------------
/touchstone/config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "os": "ubuntu-20.04",
3 |     "r": "4.1.1",
4 |     "rspm": "https://packagemanager.rstudio.com/all/__linux__/focal/2912022-01-07+MTo3NDQwNTcyLDI6NDUyNjIxNTs0QzU3NUZBRQ"
5 | }


--------------------------------------------------------------------------------
/touchstone/footer.R:
--------------------------------------------------------------------------------
 1 | # You can modify the PR comment footer here. You can use github markdown e.g.
 2 | # emojis like :tada:.
 3 | # This file will be parsed and evaluate within the context of
 4 | # `benchmarks_analyze` and should return the comment text as the last value.
 5 | # See `?touchstone::pr_comment`
 6 | link <- "https://lorenzwalthert.github.io/touchstone/articles/inference.html"
 7 | glue::glue(
 8 |   "\nFurther explanation regarding interpretation and",
 9 |   " methodology can be found in the [documentation]({link})."
10 | )
11 | 


--------------------------------------------------------------------------------
/touchstone/header.R:
--------------------------------------------------------------------------------
 1 | # You can modify the PR comment header here. You can use github markdown e.g.
 2 | # emojis like :tada:.
 3 | # This file will be parsed and evaluate within the context of
 4 | # `benchmarks_analyze` and should return the comment text as the last value.
 5 | # Available variables for glue substitution:
 6 | # * ci: confidence interval
 7 | # * refs: BASE and HEAD refs benchmarked against each other.
 8 | # See `?touchstone::pr_comment`
 9 | glue::glue(
10 |   "This is how benchmark results would change (along with a",
11 |   " {100 * ci}% confidence interval in relative change) if ",
12 |   "{system2('git', c('rev-parse', 'HEAD'), stdout = TRUE)} is merged into {branches[1]}:\n"
13 | )
14 | 


--------------------------------------------------------------------------------
/touchstone/script.R:
--------------------------------------------------------------------------------
  1 | # see `help(run_script, package = 'touchstone')` on how to run this
  2 | # interactively
  3 | 
  4 | # TODO OPTIONAL Add directories you want to be available in this file or during the
  5 | # benchmarks.
  6 | touchstone::pin_assets("bench")
  7 | 
  8 | # installs branches to benchmark
  9 | touchstone::branch_install()
 10 | 
 11 | 
 12 | seed <- 282721
 13 | n <- 5000
 14 | reps <- 5
 15 | 
 16 | # setup <- rlang::expr({
 17 | #   library(simstudy)
 18 | #   set.seed(!!seed)
 19 | # }) test
 20 | 
 21 | source(touchstone::path_pinned_asset("bench/define.R"))
 22 | 
 23 | # touchstone runs its benchmarks in callr subprozess so we have to do the setup
 24 | # within each benchmark
 25 | touchstone::benchmark_run(
 26 |   expr_before_benchmark = {
 27 |     library(simstudy)
 28 |     set.seed(!!seed)
 29 |     def_all_dists <- !!def_all_dists
 30 |   },
 31 |   define_data = def_all_dists(),
 32 |   n = reps
 33 | )
 34 | 
 35 | 
 36 | 
 37 | touchstone::benchmark_run(
 38 |   expr_before_benchmark = {
 39 |     library(simstudy)
 40 |     set.seed(!!seed)
 41 |     def_all_dists <- !!def_all_dists
 42 |     def <- def_all_dists()
 43 |   },
 44 |   gen_all_dists = genData(!!n, def),
 45 |   n = reps
 46 | )
 47 | 
 48 | n <- 100000
 49 | 
 50 | touchstone::benchmark_run(
 51 |   expr_before_benchmark = {
 52 |     library(simstudy)
 53 |     set.seed(!!seed)
 54 |     def <- defData(varname = "x", formula = 0.5, variance = 2, dist = "beta")
 55 |   },
 56 |   dist_beta = genData(!!n, def),
 57 |   n = reps
 58 | )
 59 | 
 60 | touchstone::benchmark_run(
 61 |   expr_before_benchmark = {
 62 |     library(simstudy)
 63 |     set.seed(!!seed)
 64 |     def <- defData(varname = "x", formula = 0.3, dist = "binary")
 65 |   },
 66 |   dist_binary = genData(!!n, def),
 67 |   n = reps
 68 | )
 69 | 
 70 | touchstone::benchmark_run(
 71 |   expr_before_benchmark = {
 72 |     library(simstudy)
 73 |     set.seed(!!seed)
 74 |     def <- defData(varname = "x", formula = 0.3, variance = 42, dist = "binomial")
 75 |   },
 76 |   dist_binomial = genData(!!n, def),
 77 |   n = reps
 78 | )
 79 | 
 80 | touchstone::benchmark_run(
 81 |   expr_before_benchmark = {
 82 |     library(simstudy)
 83 |     set.seed(!!seed)
 84 |     def <- defData(varname = "x", formula = genCatFormula(0.2, 0.3, 0.1, 0.4), variance = "1;2;3;4", dist = "categorical")
 85 |   },
 86 |   dist_categorical = genData(!!n, def),
 87 |   n = reps
 88 | )
 89 | 
 90 | touchstone::benchmark_run(
 91 |   expr_before_benchmark = {
 92 |     library(simstudy)
 93 |     set.seed(!!seed)
 94 |     def <- defData(varname = "x", dist = "exponential", formula = 42)
 95 |   },
 96 |   dist_exponential = genData(!!n, def),
 97 |   n = reps
 98 | )
 99 | 
100 | touchstone::benchmark_run(
101 |   expr_before_benchmark = {
102 |     library(simstudy)
103 |     set.seed(!!seed)
104 |     def <- defData(varname = "x", dist = "gamma", formula = 42,
105 |         variance = 1)
106 |   },
107 |   dist_gamma = genData(!!n, def),
108 |   n = reps
109 | )
110 | 
111 | touchstone::benchmark_run(
112 |   expr_before_benchmark = {
113 |     library(simstudy)
114 |     set.seed(!!seed)
115 |     def <- defData(varname = "x1", dist = "exponential", formula = 42)
116 |     def <- defData(def, varname = "x2", formula = 0.5, variance = 2, dist = "beta")
117 |     def <- defData(def,varname = "x", dist = "mixture", formula = genMixFormula(c("x1","x2"), c(.65, .35)))
118 |   },
119 |   dist_mixture = genData(!!n, def),
120 |   n = reps
121 | )
122 | 
123 | touchstone::benchmark_run(
124 |   expr_before_benchmark = {
125 |     library(simstudy)
126 |     set.seed(!!seed)
127 |     def <- defData(varname = "x", formula = 2, variance = 1.5, dist = "normal")
128 |   },
129 |   dist_normal = genData(!!n, def),
130 |   n = reps
131 | )
132 | 
133 | 
134 | # create artifacts used downstream in the GitHub Action
135 | touchstone::benchmark_analyze()
136 | 


--------------------------------------------------------------------------------
/vignettes/longitudinal.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Longitudinal Data"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{Longitudinal Data}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   \usepackage[utf8]{inputenc}
  8 | ---
  9 | 
 10 | ```{r chunkname, echo=-1}
 11 | data.table::setDTthreads(2)
 12 | ```
 13 | 
 14 | ```{r, echo = FALSE, message = FALSE}
 15 | library(simstudy)
 16 | library(ggplot2)
 17 | library(scales)
 18 | library(grid)
 19 | library(gridExtra)
 20 | library(survival)
 21 | library(gee)
 22 | 
 23 | plotcolors <- c("#B84226", "#1B8445", "#1C5974")
 24 | 
 25 | cbbPalette <- c("#B84226","#B88F26", "#A5B435", "#1B8446",
 26 |                 "#B87326","#B8A526", "#6CA723", "#1C5974") 
 27 | 
 28 | ggtheme <- function(panelback = "white") {
 29 |   
 30 |   ggplot2::theme(
 31 |     panel.background = element_rect(fill = panelback),
 32 |     panel.grid = element_blank(),
 33 |     axis.ticks =  element_line(colour = "black"),
 34 |     panel.spacing =unit(0.25, "lines"),  # requires package grid
 35 |     panel.border = element_rect(fill = NA, colour="gray90"), 
 36 |     plot.title = element_text(size = 8,vjust=.5,hjust=0),
 37 |     axis.text = element_text(size=8),
 38 |     axis.title = element_text(size = 8)
 39 |   )  
 40 |   
 41 | }
 42 | 
 43 | ```
 44 | 
 45 | To simulate longitudinal data, we start with a 'cross-sectional' data set and convert it to a time-dependent data set. The original cross-sectional data set may or may not include time-dependent data in the columns. In the next example, we measure outcome `Y` once before and twice after intervention `T` in a randomized trial:
 46 | 
 47 | ```{r, tidy = TRUE}
 48 | tdef <- defData(varname = "T", dist="binary", formula = 0.5)
 49 | tdef <- defData(tdef, varname = "Y0", dist = "normal", formula = 10, variance = 1)
 50 | tdef <- defData(tdef, varname = "Y1", dist = "normal", formula = "Y0 + 5 + 5 * T", variance = 1)
 51 | tdef <- defData(tdef, varname = "Y2", dist = "normal", formula = "Y0 + 10 + 5 * T", variance = 1)
 52 | 
 53 | set.seed (483726)
 54 | 
 55 | dtTrial <- genData( 500, tdef)
 56 | dtTrial
 57 | ```
 58 | 
 59 | Longitudinal data are created with a call to **`addPeriods`**. If the cross-sectional data includes time-dependent data, then the number of periods `nPeriods` must be the same as the number of time-dependent columns. If a variable is not declared as one of the `timevars`, it will be repeated each time period. In this example, the treatment indicator `T` is not specified as a time-dependent variable. (Note: if there are two time-dependent variables, it is best to create two data sets and merge them. This will be shown later in the vignette).
 60 | 
 61 | ```{r, tidy = TRUE}
 62 | dtTime <- addPeriods(dtTrial, nPeriods = 3, idvars = "id", timevars = c("Y0", "Y1", "Y2"), timevarName = "Y")
 63 | dtTime
 64 | ```
 65 | 
 66 | This is what the longitudinal data look like:
 67 | 
 68 | ```{r, tidy = TRUE, echo = FALSE, fig.width = 6, fig.height = 3}
 69 | 
 70 | avg <- dtTime[,.(Y=mean(Y)), keyby = .(T, period)]
 71 | 
 72 | ggplot(data = dtTime, aes(x = factor(period), y = Y)) +
 73 |   geom_jitter(aes(color=factor(T)), size = .5, alpha = .8, width = .25) +
 74 |   geom_line(data=avg, aes(x = factor(period), y = Y, group = T, color= factor(T)), size=1) +
 75 |   xlab("Period") +
 76 |   scale_color_manual(values = plotcolors[c(3,1)], 
 77 |                      labels = c("Ctrl", "Trt")) +
 78 |   theme(legend.title=element_blank()) +
 79 |   ggtheme("grey90") +
 80 |   theme(legend.key=element_rect(fill=NA))
 81 | ```
 82 | 
 83 | ## Longitudinal data with varying observation and interval times
 84 | 
 85 | It is also possible to generate longitudinal data with varying numbers of measurement periods as well as varying time intervals between each measurement period. This is done by defining specific variables in the data set that define the number of observations per subject and the average interval time between each observation. `nCount` defines the number of measurements for an individual; `mInterval` specifies the average time between intervals for a subject; and `vInterval` specifies the variance of those interval times. If `vInterval` is set to 0 or is not defined, the interval for a subject is determined entirely by the mean interval. If `vInterval` is greater than 0, time intervals are generated using a gamma distribution with mean and dispersion specified.
 86 | 
 87 | In this simple example, the cross-sectional data generates individuals with a different number of measurement observations and different times between each observation. Data for two of these individuals is printed:
 88 | 
 89 | ```{r, tidy = TRUE}
 90 | def <- defData(varname = "xbase", dist = "normal", formula = 20, variance = 3)
 91 | def <- defData(def,varname = "nCount", dist = "noZeroPoisson", formula = 6)
 92 | def <- defData(def, varname = "mInterval", dist = "gamma", formula = 30, variance = .01)
 93 | def <- defData(def, varname = "vInterval", dist = "nonrandom", formula = .07)
 94 | 
 95 | dt <- genData(200, def)
 96 | dt[id %in% c(8,121)]                # View individuals 8 and 121
 97 | ```
 98 | 
 99 | The resulting longitudinal data for these two subjects can be inspected after a call to `addPeriods`. Notice that no parameters need to be set since all information resides in the data set itself:
100 | 
101 | ```{r, tidy = TRUE}
102 | dtPeriod <- addPeriods(dt)
103 | dtPeriod[id %in% c(8,121)]  # View individuals 8 and 121 only
104 | ```
105 | 
106 | If a time-sensitive measurement is added to the data set ...
107 | 
108 | ```{r, tidy = TRUE}
109 | def2 <- defDataAdd(varname = "Y", dist = "normal", formula = "15 + .1 * time", variance = 5)
110 | dtPeriod <- addColumns(def2, dtPeriod)
111 | ```
112 | 
113 | ... a plot of five randomly selected individuals looks like this:
114 | 
115 | ```{r, tidy = TRUE, echo = FALSE, fig.width = 6, fig.height = 3}
116 | 
117 | sampledID <- sample(1:nrow(dt), 5)
118 | dtSample <- dtPeriod[id %in% sampledID]
119 | 
120 | ggplot(data = dtSample, aes(x = time, y = Y, group=id)) +
121 |   geom_point(aes(color = factor(id))) +
122 |   geom_line(aes(color = factor(id))) +
123 |   xlab("Day") +
124 |   scale_color_manual(values = cbbPalette) +
125 |   theme(legend.position = "none") +
126 |   ggtheme("grey90")
127 | ```
128 | 


--------------------------------------------------------------------------------