├── .Rbuildignore
├── .github
└── .gitignore
├── .gitignore
├── DESCRIPTION
├── LICENSE.md
├── NAMESPACE
├── R
└── exercises.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── classes
├── 2021_grenoble
│ ├── .gitignore
│ ├── data
│ │ └── sim-data-ind-samples.csv
│ ├── faux.Rmd
│ ├── fixed-effects.Rmd
│ ├── mixed-effects.Rmd
│ └── sim1.csv
└── mixed2.Rmd
├── data-sim-workshops.Rproj
├── docs
├── 404.html
├── LICENSE.html
├── apple-touch-icon-120x120.png
├── apple-touch-icon-152x152.png
├── apple-touch-icon-180x180.png
├── apple-touch-icon-60x60.png
├── apple-touch-icon-76x76.png
├── apple-touch-icon.png
├── articles
│ ├── calories.html
│ ├── faux.html
│ ├── faux_files
│ │ └── figure-html
│ │ │ ├── unnamed-chunk-13-1.png
│ │ │ ├── unnamed-chunk-14-1.png
│ │ │ ├── unnamed-chunk-15-1.png
│ │ │ ├── unnamed-chunk-16-1.png
│ │ │ ├── unnamed-chunk-17-1.png
│ │ │ ├── unnamed-chunk-20-1.png
│ │ │ ├── unnamed-chunk-21-1.png
│ │ │ ├── unnamed-chunk-22-1.png
│ │ │ ├── unnamed-chunk-23-1.png
│ │ │ ├── unnamed-chunk-24-1.png
│ │ │ ├── unnamed-chunk-28-1.png
│ │ │ ├── unnamed-chunk-37-1.png
│ │ │ ├── unnamed-chunk-4-1.png
│ │ │ ├── unnamed-chunk-40-1.png
│ │ │ ├── unnamed-chunk-41-1.png
│ │ │ ├── unnamed-chunk-42-1.png
│ │ │ ├── unnamed-chunk-43-1.png
│ │ │ ├── unnamed-chunk-44-1.png
│ │ │ └── unnamed-chunk-45-1.png
│ ├── fixed.html
│ ├── fixed_files
│ │ └── figure-html
│ │ │ ├── ind-sim-fig-1.png
│ │ │ ├── pair-sim-fig-1.png
│ │ │ ├── rnorm-plot-1.png
│ │ │ ├── runif-hist-1.png
│ │ │ ├── sample-prob-1.png
│ │ │ ├── sample-replace-1.png
│ │ │ └── sim-p-fig-1.png
│ ├── index.html
│ ├── mixed.html
│ └── mixed_files
│ │ └── figure-html
│ │ ├── ex2-1.png
│ │ ├── plot-dv-1.png
│ │ ├── plot-ixn-1.png
│ │ ├── plot-stim-ranef-1.png
│ │ ├── plot-sub-ranef-1.png
│ │ ├── rslope-plot-dv-1.png
│ │ ├── unnamed-chunk-11-1.png
│ │ └── unnamed-chunk-7-1.png
├── authors.html
├── deps
│ ├── bootstrap-5.3.1
│ │ ├── bootstrap.bundle.min.js
│ │ ├── bootstrap.bundle.min.js.map
│ │ └── bootstrap.min.css
│ ├── data-deps.txt
│ └── jquery-3.6.0
│ │ ├── jquery-3.6.0.js
│ │ ├── jquery-3.6.0.min.js
│ │ └── jquery-3.6.0.min.map
├── favicon-16x16.png
├── favicon-32x32.png
├── favicon.ico
├── index.html
├── link.svg
├── logo.png
├── pkgdown.js
├── pkgdown.yml
├── reference
│ ├── Rplot001.png
│ ├── exercise.html
│ ├── figures
│ │ └── logo.png
│ └── index.html
├── search.json
└── sitemap.xml
├── inst
└── stubs
│ ├── calories-stub.Rmd
│ ├── faux-stub.Rmd
│ ├── fixed-stub.Rmd
│ └── mixed-stub.Rmd
├── man
├── exercise.Rd
└── figures
│ └── logo.png
├── pkgdown
└── favicon
│ ├── apple-touch-icon-120x120.png
│ ├── apple-touch-icon-152x152.png
│ ├── apple-touch-icon-180x180.png
│ ├── apple-touch-icon-60x60.png
│ ├── apple-touch-icon-76x76.png
│ ├── apple-touch-icon.png
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ └── favicon.ico
└── vignettes
├── .gitignore
├── calories.Rmd
├── data
└── sim-data-ind-samples.csv
├── faux.Rmd
├── fixed.Rmd
└── mixed.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^LICENSE\.md$
2 | ^.*\.Rproj$
3 | ^\.Rproj\.user$
4 | ^_pkgdown\.yml$
5 | ^docs$
6 | ^pkgdown$
7 | ^classes$
8 | ^README.Rmd$
9 | ^\.github$
10 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | inst/doc
6 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: dsw
2 | Title: Data Simulation Workshops
3 | Version: 0.0.0.9007
4 | Date: 2024-02-16
5 | Authors@R: c(
6 | person(
7 | given = "Lisa",
8 | family = "DeBruine",
9 | role = c("aut", "cre"),
10 | email = "debruine@gmail.com",
11 | comment = c(ORCID = "0000-0002-7523-5539")
12 | ))
13 | Description: Materials for data simulation workshops.
14 | Depends:
15 | R (>= 4.1.0)
16 | Imports:
17 | lme4,
18 | dplyr,
19 | tidyr,
20 | ggplot2 (>= 3.3.0),
21 | faux (>= 1.1.0),
22 | lmerTest,
23 | afex,
24 | broom,
25 | broom.mixed,
26 | MASS,
27 | emmeans,
28 | patchwork
29 | Suggests:
30 | testthat (>= 2.1.0),
31 | knitr,
32 | rmarkdown
33 | RoxygenNote: 7.1.1
34 | Encoding: UTF-8
35 | LazyData: true
36 | URL: https://github.com/debruine/data-sim-workshops,
37 | https://debruine.github.io/data-sim-workshops/
38 | BugReports: https://github.com/debruine/data-sim-workshops/issues
39 | License: CC BY 4.0
40 | VignetteBuilder: knitr
41 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Attribution 4.0 International
2 |
3 | =======================================================================
4 |
5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
6 | does not provide legal services or legal advice. Distribution of
7 | Creative Commons public licenses does not create a lawyer-client or
8 | other relationship. Creative Commons makes its licenses and related
9 | information available on an "as-is" basis. Creative Commons gives no
10 | warranties regarding its licenses, any material licensed under their
11 | terms and conditions, or any related information. Creative Commons
12 | disclaims all liability for damages resulting from their use to the
13 | fullest extent possible.
14 |
15 | Using Creative Commons Public Licenses
16 |
17 | Creative Commons public licenses provide a standard set of terms and
18 | conditions that creators and other rights holders may use to share
19 | original works of authorship and other material subject to copyright
20 | and certain other rights specified in the public license below. The
21 | following considerations are for informational purposes only, are not
22 | exhaustive, and do not form part of our licenses.
23 |
24 | Considerations for licensors: Our public licenses are
25 | intended for use by those authorized to give the public
26 | permission to use material in ways otherwise restricted by
27 | copyright and certain other rights. Our licenses are
28 | irrevocable. Licensors should read and understand the terms
29 | and conditions of the license they choose before applying it.
30 | Licensors should also secure all rights necessary before
31 | applying our licenses so that the public can reuse the
32 | material as expected. Licensors should clearly mark any
33 | material not subject to the license. This includes other CC-
34 | licensed material, or material used under an exception or
35 | limitation to copyright. More considerations for licensors:
36 | wiki.creativecommons.org/Considerations_for_licensors
37 |
38 | Considerations for the public: By using one of our public
39 | licenses, a licensor grants the public permission to use the
40 | licensed material under specified terms and conditions. If
41 | the licensor's permission is not necessary for any reason--for
42 | example, because of any applicable exception or limitation to
43 | copyright--then that use is not regulated by the license. Our
44 | licenses grant only permissions under copyright and certain
45 | other rights that a licensor has authority to grant. Use of
46 | the licensed material may still be restricted for other
47 | reasons, including because others have copyright or other
48 | rights in the material. A licensor may make special requests,
49 | such as asking that all changes be marked or described.
50 | Although not required by our licenses, you are encouraged to
51 | respect those requests where reasonable. More considerations
52 | for the public:
53 | wiki.creativecommons.org/Considerations_for_licensees
54 |
55 | =======================================================================
56 |
57 | Creative Commons Attribution 4.0 International Public License
58 |
59 | By exercising the Licensed Rights (defined below), You accept and agree
60 | to be bound by the terms and conditions of this Creative Commons
61 | Attribution 4.0 International Public License ("Public License"). To the
62 | extent this Public License may be interpreted as a contract, You are
63 | granted the Licensed Rights in consideration of Your acceptance of
64 | these terms and conditions, and the Licensor grants You such rights in
65 | consideration of benefits the Licensor receives from making the
66 | Licensed Material available under these terms and conditions.
67 |
68 |
69 | Section 1 -- Definitions.
70 |
71 | a. Adapted Material means material subject to Copyright and Similar
72 | Rights that is derived from or based upon the Licensed Material
73 | and in which the Licensed Material is translated, altered,
74 | arranged, transformed, or otherwise modified in a manner requiring
75 | permission under the Copyright and Similar Rights held by the
76 | Licensor. For purposes of this Public License, where the Licensed
77 | Material is a musical work, performance, or sound recording,
78 | Adapted Material is always produced where the Licensed Material is
79 | synched in timed relation with a moving image.
80 |
81 | b. Adapter's License means the license You apply to Your Copyright
82 | and Similar Rights in Your contributions to Adapted Material in
83 | accordance with the terms and conditions of this Public License.
84 |
85 | c. Copyright and Similar Rights means copyright and/or similar rights
86 | closely related to copyright including, without limitation,
87 | performance, broadcast, sound recording, and Sui Generis Database
88 | Rights, without regard to how the rights are labeled or
89 | categorized. For purposes of this Public License, the rights
90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar
91 | Rights.
92 |
93 | d. Effective Technological Measures means those measures that, in the
94 | absence of proper authority, may not be circumvented under laws
95 | fulfilling obligations under Article 11 of the WIPO Copyright
96 | Treaty adopted on December 20, 1996, and/or similar international
97 | agreements.
98 |
99 | e. Exceptions and Limitations means fair use, fair dealing, and/or
100 | any other exception or limitation to Copyright and Similar Rights
101 | that applies to Your use of the Licensed Material.
102 |
103 | f. Licensed Material means the artistic or literary work, database,
104 | or other material to which the Licensor applied this Public
105 | License.
106 |
107 | g. Licensed Rights means the rights granted to You subject to the
108 | terms and conditions of this Public License, which are limited to
109 | all Copyright and Similar Rights that apply to Your use of the
110 | Licensed Material and that the Licensor has authority to license.
111 |
112 | h. Licensor means the individual(s) or entity(ies) granting rights
113 | under this Public License.
114 |
115 | i. Share means to provide material to the public by any means or
116 | process that requires permission under the Licensed Rights, such
117 | as reproduction, public display, public performance, distribution,
118 | dissemination, communication, or importation, and to make material
119 | available to the public including in ways that members of the
120 | public may access the material from a place and at a time
121 | individually chosen by them.
122 |
123 | j. Sui Generis Database Rights means rights other than copyright
124 | resulting from Directive 96/9/EC of the European Parliament and of
125 | the Council of 11 March 1996 on the legal protection of databases,
126 | as amended and/or succeeded, as well as other essentially
127 | equivalent rights anywhere in the world.
128 |
129 | k. You means the individual or entity exercising the Licensed Rights
130 | under this Public License. Your has a corresponding meaning.
131 |
132 |
133 | Section 2 -- Scope.
134 |
135 | a. License grant.
136 |
137 | 1. Subject to the terms and conditions of this Public License,
138 | the Licensor hereby grants You a worldwide, royalty-free,
139 | non-sublicensable, non-exclusive, irrevocable license to
140 | exercise the Licensed Rights in the Licensed Material to:
141 |
142 | a. reproduce and Share the Licensed Material, in whole or
143 | in part; and
144 |
145 | b. produce, reproduce, and Share Adapted Material.
146 |
147 | 2. Exceptions and Limitations. For the avoidance of doubt, where
148 | Exceptions and Limitations apply to Your use, this Public
149 | License does not apply, and You do not need to comply with
150 | its terms and conditions.
151 |
152 | 3. Term. The term of this Public License is specified in Section
153 | 6(a).
154 |
155 | 4. Media and formats; technical modifications allowed. The
156 | Licensor authorizes You to exercise the Licensed Rights in
157 | all media and formats whether now known or hereafter created,
158 | and to make technical modifications necessary to do so. The
159 | Licensor waives and/or agrees not to assert any right or
160 | authority to forbid You from making technical modifications
161 | necessary to exercise the Licensed Rights, including
162 | technical modifications necessary to circumvent Effective
163 | Technological Measures. For purposes of this Public License,
164 | simply making modifications authorized by this Section 2(a)
165 | (4) never produces Adapted Material.
166 |
167 | 5. Downstream recipients.
168 |
169 | a. Offer from the Licensor -- Licensed Material. Every
170 | recipient of the Licensed Material automatically
171 | receives an offer from the Licensor to exercise the
172 | Licensed Rights under the terms and conditions of this
173 | Public License.
174 |
175 | b. No downstream restrictions. You may not offer or impose
176 | any additional or different terms or conditions on, or
177 | apply any Effective Technological Measures to, the
178 | Licensed Material if doing so restricts exercise of the
179 | Licensed Rights by any recipient of the Licensed
180 | Material.
181 |
182 | 6. No endorsement. Nothing in this Public License constitutes or
183 | may be construed as permission to assert or imply that You
184 | are, or that Your use of the Licensed Material is, connected
185 | with, or sponsored, endorsed, or granted official status by,
186 | the Licensor or others designated to receive attribution as
187 | provided in Section 3(a)(1)(A)(i).
188 |
189 | b. Other rights.
190 |
191 | 1. Moral rights, such as the right of integrity, are not
192 | licensed under this Public License, nor are publicity,
193 | privacy, and/or other similar personality rights; however, to
194 | the extent possible, the Licensor waives and/or agrees not to
195 | assert any such rights held by the Licensor to the limited
196 | extent necessary to allow You to exercise the Licensed
197 | Rights, but not otherwise.
198 |
199 | 2. Patent and trademark rights are not licensed under this
200 | Public License.
201 |
202 | 3. To the extent possible, the Licensor waives any right to
203 | collect royalties from You for the exercise of the Licensed
204 | Rights, whether directly or through a collecting society
205 | under any voluntary or waivable statutory or compulsory
206 | licensing scheme. In all other cases the Licensor expressly
207 | reserves any right to collect such royalties.
208 |
209 |
210 | Section 3 -- License Conditions.
211 |
212 | Your exercise of the Licensed Rights is expressly made subject to the
213 | following conditions.
214 |
215 | a. Attribution.
216 |
217 | 1. If You Share the Licensed Material (including in modified
218 | form), You must:
219 |
220 | a. retain the following if it is supplied by the Licensor
221 | with the Licensed Material:
222 |
223 | i. identification of the creator(s) of the Licensed
224 | Material and any others designated to receive
225 | attribution, in any reasonable manner requested by
226 | the Licensor (including by pseudonym if
227 | designated);
228 |
229 | ii. a copyright notice;
230 |
231 | iii. a notice that refers to this Public License;
232 |
233 | iv. a notice that refers to the disclaimer of
234 | warranties;
235 |
236 | v. a URI or hyperlink to the Licensed Material to the
237 | extent reasonably practicable;
238 |
239 | b. indicate if You modified the Licensed Material and
240 | retain an indication of any previous modifications; and
241 |
242 | c. indicate the Licensed Material is licensed under this
243 | Public License, and include the text of, or the URI or
244 | hyperlink to, this Public License.
245 |
246 | 2. You may satisfy the conditions in Section 3(a)(1) in any
247 | reasonable manner based on the medium, means, and context in
248 | which You Share the Licensed Material. For example, it may be
249 | reasonable to satisfy the conditions by providing a URI or
250 | hyperlink to a resource that includes the required
251 | information.
252 |
253 | 3. If requested by the Licensor, You must remove any of the
254 | information required by Section 3(a)(1)(A) to the extent
255 | reasonably practicable.
256 |
257 | 4. If You Share Adapted Material You produce, the Adapter's
258 | License You apply must not prevent recipients of the Adapted
259 | Material from complying with this Public License.
260 |
261 |
262 | Section 4 -- Sui Generis Database Rights.
263 |
264 | Where the Licensed Rights include Sui Generis Database Rights that
265 | apply to Your use of the Licensed Material:
266 |
267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right
268 | to extract, reuse, reproduce, and Share all or a substantial
269 | portion of the contents of the database;
270 |
271 | b. if You include all or a substantial portion of the database
272 | contents in a database in which You have Sui Generis Database
273 | Rights, then the database in which You have Sui Generis Database
274 | Rights (but not its individual contents) is Adapted Material; and
275 |
276 | c. You must comply with the conditions in Section 3(a) if You Share
277 | all or a substantial portion of the contents of the database.
278 |
279 | For the avoidance of doubt, this Section 4 supplements and does not
280 | replace Your obligations under this Public License where the Licensed
281 | Rights include other Copyright and Similar Rights.
282 |
283 |
284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
285 |
286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
296 |
297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
306 |
307 | c. The disclaimer of warranties and limitation of liability provided
308 | above shall be interpreted in a manner that, to the extent
309 | possible, most closely approximates an absolute disclaimer and
310 | waiver of all liability.
311 |
312 |
313 | Section 6 -- Term and Termination.
314 |
315 | a. This Public License applies for the term of the Copyright and
316 | Similar Rights licensed here. However, if You fail to comply with
317 | this Public License, then Your rights under this Public License
318 | terminate automatically.
319 |
320 | b. Where Your right to use the Licensed Material has terminated under
321 | Section 6(a), it reinstates:
322 |
323 | 1. automatically as of the date the violation is cured, provided
324 | it is cured within 30 days of Your discovery of the
325 | violation; or
326 |
327 | 2. upon express reinstatement by the Licensor.
328 |
329 | For the avoidance of doubt, this Section 6(b) does not affect any
330 | right the Licensor may have to seek remedies for Your violations
331 | of this Public License.
332 |
333 | c. For the avoidance of doubt, the Licensor may also offer the
334 | Licensed Material under separate terms or conditions or stop
335 | distributing the Licensed Material at any time; however, doing so
336 | will not terminate this Public License.
337 |
338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
339 | License.
340 |
341 |
342 | Section 7 -- Other Terms and Conditions.
343 |
344 | a. The Licensor shall not be bound by any additional or different
345 | terms or conditions communicated by You unless expressly agreed.
346 |
347 | b. Any arrangements, understandings, or agreements regarding the
348 | Licensed Material not stated herein are separate from and
349 | independent of the terms and conditions of this Public License.
350 |
351 |
352 | Section 8 -- Interpretation.
353 |
354 | a. For the avoidance of doubt, this Public License does not, and
355 | shall not be interpreted to, reduce, limit, restrict, or impose
356 | conditions on any use of the Licensed Material that could lawfully
357 | be made without permission under this Public License.
358 |
359 | b. To the extent possible, if any provision of this Public License is
360 | deemed unenforceable, it shall be automatically reformed to the
361 | minimum extent necessary to make it enforceable. If the provision
362 | cannot be reformed, it shall be severed from this Public License
363 | without affecting the enforceability of the remaining terms and
364 | conditions.
365 |
366 | c. No term or condition of this Public License will be waived and no
367 | failure to comply consented to unless expressly agreed to by the
368 | Licensor.
369 |
370 | d. Nothing in this Public License constitutes or may be interpreted
371 | as a limitation upon, or waiver of, any privileges and immunities
372 | that apply to the Licensor or You, including from the legal
373 | processes of any jurisdiction or authority.
374 |
375 |
376 | =======================================================================
377 |
378 | Creative Commons is not a party to its public
379 | licenses. Notwithstanding, Creative Commons may elect to apply one of
380 | its public licenses to material it publishes and in those instances
381 | will be considered the “Licensor.” The text of the Creative Commons
382 | public licenses is dedicated to the public domain under the CC0 Public
383 | Domain Dedication. Except for the limited purpose of indicating that
384 | material is shared under a Creative Commons public license or as
385 | otherwise permitted by the Creative Commons policies published at
386 | creativecommons.org/policies, Creative Commons does not authorize the
387 | use of the trademark "Creative Commons" or any other trademark or logo
388 | of Creative Commons without its prior written consent including,
389 | without limitation, in connection with any unauthorized modifications
390 | to any of its public licenses or any other arrangements,
391 | understandings, or agreements concerning use of licensed material. For
392 | the avoidance of doubt, this paragraph does not form part of the
393 | public licenses.
394 |
395 | Creative Commons may be contacted at creativecommons.org.
396 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(exercise)
4 |
--------------------------------------------------------------------------------
/R/exercises.R:
--------------------------------------------------------------------------------
1 | #' Get an exercise
2 | #'
3 | #' @param name The name of the exercise
4 | #' @param filename What filename you want to save (defaults to the name of the exercise in the working directory)
5 | #'
6 | #' @return Saves a file to the working directory (or path from filename)
7 | #' @export
8 | #'
9 | #' @examples
10 | #' \dontrun{
11 | #' exercise("faux") # get exercise for the faux workshop
12 | #' exercise("fixed", "exercises/fixed.Rmd") # save into exercises directory
13 | #' }
14 | exercise <- function(name = c("faux", "fixed", "mixed", "calories"), filename = NULL) {
15 | fname <- sprintf("stubs/%s-stub.Rmd", match.arg(name))
16 | f <- system.file(fname, package = "dsw")
17 |
18 | if (f == "") stop("Exercise ", name, " doesn't exist")
19 |
20 | if (is.null(filename)) {
21 | filename <- gsub("^stubs/", "", fname)
22 | }
23 |
24 | file.copy(f, filename)
25 | utils::browseURL(filename)
26 | }
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 | ```{r, include = FALSE}
6 | knitr::opts_chunk$set(
7 | collapse = TRUE,
8 | warning = FALSE,
9 | message = FALSE,
10 | comment = "#>",
11 | fig.path = "man/figures/README-",
12 | out.width = "100%"
13 | )
14 | set.seed(8675309)
15 | ```
16 |
17 | # Data Simulation Workshop Materials {style="float:right; width:200px;"}
18 |
19 | Being able to simulate data allows you to:
20 |
21 | * prep analysis scripts for pre-registration
22 | * calculate power and sensitivity for analyses that don't have empirical methods
23 | * create reproducible examples when your data are too big or confidential to share
24 | * enhance your understanding of statistical concepts
25 | * create demo data for teaching and tutorials
26 |
27 | ## Installation
28 |
29 | You can install the packages used in these tutorials and get a function that makes it easy to access the workshop .Rmd files by running the following code:
30 |
31 | ```{r, eval = FALSE}
32 | devtools::install_github("debruine/data-sim-workshops")
33 | ```
34 |
35 | Then you can load exercises with the following code:
36 |
37 | ```{r, eval = FALSE}
38 | dsw::exercise("faux")
39 | dsw::exercise("calories")
40 | dsw::exercise("fixed")
41 | dsw::exercise("mixed")
42 | ```
43 |
44 | Alternatively, download the stub files and install the specific packages for your workshop.
45 |
46 | * [faux-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/faux-stub.Rmd)
47 | * [calories-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/calories-stub.Rmd)
48 | * [fixed-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/fixed-stub.Rmd)
49 | * [mixed-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/mixed-stub.Rmd)
50 |
51 | ## Upcoming Workshops
52 |
53 |
93 |
94 | When: 2024 February 1-2
95 | Where: [Data Simulation Workshop 2024](https://kogpsy.github.io/datasimulationcourse_24/), Institute of Psychology, Bern, Switzerland
96 |
97 | ### Data Simulation with {faux}
98 |
99 | This session will cover the basics of simulation using {faux}. We will simulate data with factorial designs by specifying the within and between-subjects factor structure, each cell mean and standard deviation, and correlations between cells where appropriate. This can be used to create simulated data sets to be used in preparing the analysis code for pre-registrations or registered reports. We will also create data sets for simulation-based power analyses. Students will need to have very basic knowledge of R and R Markdown, and have installed {faux}, {afex}, {broom} and {tidyverse}.
100 |
101 | #### Prep
102 |
103 | * Install R packages from CRAN: `tidyverse`, `afex`, `faux`, and `broom`
104 | * Download files: [faux-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/faux-stub.Rmd)
105 |
106 |
107 | ### Data simulation for mixed designs
108 |
109 | This session will cover simulating data for a mixed design, where trials are crossed with subjects. We will learn how to analyse this using {lme4}, with a focus on understanding how the simulation parameters correspond to the output. Finally, we will learn how to use simulation to calculate power. Students will need to have basic knowledge of R and R Markdown, some familiarity with mixed designs (even if they don't currently analyse them with mixed models) and have installed {faux}, {afex}, {tidyverse}, and {lme4}.
110 |
111 | #### Prep
112 |
113 | * Install R packages from CRAN: `tidyverse`, `afex`, `lme4`, `broom`, `broom.mixed`, `faux`
114 | * Download files: [mixed-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/mixed-stub.Rmd)
115 |
116 |
117 |
118 | ## Resources
119 |
120 | * [Faux Shiny App](https://rstudio-connect.psy.gla.ac.uk/faux/)
121 | * [Data Skills for Reproducible Research](https://psyteachr.github.io/reprores/) open source textbook introducing tidyverse for psychologists
122 | * [Understanding mixed effects models through data simulation](https://osf.io/3cz2e/) (preprint, code, and shiny apps)
123 | * [Simulate Basic Distributions](https://rstudio-connect.psy.gla.ac.uk/simulate/)
124 |
125 | ## Past Workshops
126 |
127 | * Vrije Universiteit Amsterdam, NL
128 | Fake It Until You Make It: How and why to simulate research data
129 | 2023 September 20
130 |
131 | * Max Planck Institute for Evolutionary Anthropology, Leipzig, Germany
132 | Simulating data with {faux}
133 | 2023 July 27 9:00 - 12:00 (CET)
134 |
135 | * [European Evolutionary Biology Conference](https://www.empseb28.com/workshops), Millport, Scotland
136 | Fake It Until You Make It: How and why to simulate research data
137 | 2023 June 1 14:30 - 16:30 (GMT)
138 |
139 | * University of Glasgow Institute of Neuroscience & Psychology
140 | Data Simulation with {faux}
141 | 2023 January 18 12:00 - 13:00 (GMT)
142 |
143 | * Netherlands Institute for the Study of Crime and Law Enforcement
144 | Data Simulation with {faux}
145 | 2022 December 6 13:00 - 14:00 (CET)
146 |
147 | * Polish Association of Social Psychology Conference, Gdánsk
148 | Data simulation for fixed effects
149 | Data simulation for mixed designs
150 | Practical Session
151 | 2022 September 14 09:00 - 16:00 (CET)
152 |
153 | * [RLadies Glasgow](https://www.meetup.com/rladies-glasgow/events/285942871/)
154 | Data simulation using faux
155 | 2022 May 24 15:00-17:00 (BST)
156 |
157 | * University of York
158 | Data simulation for factorial designs
159 | Data simulation for mixed designs
160 | 2022 April 27 09:00-17:00 (BST)
161 |
162 | * [From Proposal to Publication: Pathways to Open Science](https://www.dropbox.com/s/aydsuk6eahxumzu/OSW-Jul21.pdf?dl=0)
163 | Data simulation for factorial designs
164 | Data simulation for mixed designs
165 | 2022 July 13 13:30-17:00
166 |
167 | * University of Glasgow
168 | Institute of Neuroscience and Psychology
169 | 2020 Jan 28 13:00-15:00 and Feb 5 14:00-16:00
170 |
171 | * University of Grenoble
172 | Understanding Mixed-Effects Models through Data Simulation
173 | 2021 February 5 13:00-15:00
174 |
175 | * [PsyPAG Data Simulation Summer School](https://simsummerschool.github.io/)
176 | Simulation for factorial designs with faux
177 | 2021 June 4 13:00-15:00
178 |
179 |
180 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Data Simulation Workshop Materials
3 |
4 | Being able to simulate data allows you to:
5 |
6 | - prep analysis scripts for pre-registration
7 | - calculate power and sensitivity for analyses that don’t have empirical
8 | methods
9 | - create reproducible examples when your data are too big or
10 | confidential to share
11 | - enhance your understanding of statistical concepts
12 | - create demo data for teaching and tutorials
13 |
14 | ## Installation
15 |
16 | You can install the packages used in these tutorials and get a function
17 | that makes it easy to access the workshop .Rmd files by running the
18 | following code:
19 |
20 | ``` r
21 | devtools::install_github("debruine/data-sim-workshops")
22 | ```
23 |
24 | Then you can load exercises with the following code:
25 |
26 | ``` r
27 | dsw::exercise("faux")
28 | dsw::exercise("calories")
29 | dsw::exercise("fixed")
30 | dsw::exercise("mixed")
31 | ```
32 |
33 | Alternatively, download the stub files and install the specific packages
34 | for your workshop.
35 |
36 | - [faux-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/faux-stub.Rmd)
37 | - [calories-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/calories-stub.Rmd)
38 | - [fixed-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/fixed-stub.Rmd)
39 | - [mixed-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/mixed-stub.Rmd)
40 |
41 | ## Upcoming Workshops
42 |
43 |
71 |
72 | When: 2024 February 1-2 Where: [Data Simulation Workshop
73 | 2024](https://kogpsy.github.io/datasimulationcourse_24/), Institute of
74 | Psychology, Bern , Switzerland
75 |
76 | ### Data Simulation with {faux}
77 |
78 | This session will cover the basics of simulation using {faux}. We will
79 | simulate data with factorial designs by specifying the within and
80 | between-subjects factor structure, each cell mean and standard
81 | deviation, and correlations between cells where appropriate. This can be
82 | used to create simulated data sets to be used in preparing the analysis
83 | code for pre-registrations or registered reports. We will also create
84 | data sets for simulation-based power analyses. Students will need to
85 | have very basic knowledge of R and R Markdown, and have installed
86 | {faux}, {afex}, {broom} and {tidyverse}.
87 |
88 | #### Prep
89 |
90 | - Install R packages from CRAN: `tidyverse`, `afex`, `faux`, and `broom`
91 | - Download files:
92 | [faux-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/faux-stub.Rmd)
93 |
94 | ### Data simulation for mixed designs
95 |
96 | This session will cover simulating data for a mixed design, where trials
97 | are crossed with subjects. We will learn how to analyse this using
98 | {lme4}, with a focus on understanding how the simulation parameters
99 | correspond to the output. Finally, we will learn how to use simulation
100 | to calculate power. Students will need to have basic knowledge of R and
101 | R Markdown, some familiarity with mixed designs (even if they don’t
102 | currently analyse them with mixed models) and have installed {faux},
103 | {afex}, {tidyverse}, and {lme4}.
104 |
105 | #### Prep
106 |
107 | - Install R packages from CRAN: `tidyverse`, `afex`, `lme4`, `broom`,
108 | `broom.mixed`, `faux`
109 | - Download files:
110 | [mixed-stub.Rmd](https://raw.githubusercontent.com/debruine/data-sim-workshops/master/inst/stubs/mixed-stub.Rmd)
111 |
112 | ## Resources
113 |
114 | - [Faux Shiny App](https://rstudio-connect.psy.gla.ac.uk/faux/)
115 | - [Data Skills for Reproducible
116 | Research](https://psyteachr.github.io/reprores/) open source textbook
117 | introducing tidyverse for psychologists
118 | - [Understanding mixed effects models through data
119 | simulation](https://osf.io/3cz2e/) (preprint, code, and shiny apps)
120 | - [Simulate Basic
121 | Distributions](https://rstudio-connect.psy.gla.ac.uk/simulate/)
122 |
123 | ## Past Workshops
124 |
125 | - Vrije Universiteit Amsterdam, NL Fake It Until You Make It: How and
126 | why to simulate research data 2023 September 20
127 |
128 | - Max Planck Institute for Evolutionary Anthropology, Leipzig, Germany
129 | Simulating data with {faux}
130 | 2023 July 27 9:00 - 12:00 (CET)
131 |
132 | - [European Evolutionary Biology
133 | Conference](https://www.empseb28.com/workshops), Millport, Scotland
134 | Fake It Until You Make It: How and why to simulate research data
135 | 2023 June 1 14:30 - 16:30 (GMT)
136 |
137 | - University of Glasgow Institute of Neuroscience & Psychology
138 | Data Simulation with {faux}
139 | 2023 January 18 12:00 - 13:00 (GMT)
140 |
141 | - Netherlands Institute for the Study of Crime and Law Enforcement
142 | Data Simulation with {faux}
143 | 2022 December 6 13:00 - 14:00 (CET)
144 |
145 | - Polish Association of Social Psychology Conference, Gdánsk
146 | Data simulation for fixed effects
147 | Data simulation for mixed designs
148 | Practical Session
149 | 2022 September 14 09:00 - 16:00 (CET)
150 |
151 | - [RLadies
152 | Glasgow](https://www.meetup.com/rladies-glasgow/events/285942871/)
153 | Data simulation using faux
154 | 2022 May 24 15:00-17:00 (BST)
155 |
156 | - University of York Data simulation for factorial designs
157 | Data simulation for mixed designs
158 | 2022 April 27 09:00-17:00 (BST)
159 |
160 | - [From Proposal to Publication: Pathways to Open
161 | Science](https://www.dropbox.com/s/aydsuk6eahxumzu/OSW-Jul21.pdf?dl=0)
162 | Data simulation for factorial designs
163 | Data simulation for mixed designs
164 | 2022 July 13 13:30-17:00
165 |
166 | - University of Glasgow
167 | Institute of Neuroscience and Psychology
168 | 2020 Jan 28 13:00-15:00 and Feb 5 14:00-16:00
169 |
170 | - University of Grenoble
171 | Understanding Mixed-Effects Models through Data Simulation
172 | 2021 February 5 13:00-15:00
173 |
174 | - [PsyPAG Data Simulation Summer
175 | School](https://simsummerschool.github.io/)
176 | Simulation for factorial designs with faux
177 | 2021 June 4 13:00-15:00
178 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://debruine.github.io/data-sim-workshops/
2 | template:
3 | bootstrap: 5
4 |
5 |
--------------------------------------------------------------------------------
/classes/2021_grenoble/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 |
--------------------------------------------------------------------------------
/classes/2021_grenoble/data/sim-data-ind-samples.csv:
--------------------------------------------------------------------------------
1 | sub_condition,score
2 | A,6.246779887685582
3 | A,8.012698250843949
4 | A,4.867842392941125
5 | A,9.088501533767825
6 | A,11.191954616879531
7 | A,9.03881103436473
8 | A,13.364185172917518
9 | A,8.573021978401277
10 | A,9.926466129326911
11 | A,12.465760458310296
12 | A,11.006386775570943
13 | A,12.070183895116143
14 | A,10.033619762103571
15 | A,10.995384277495292
16 | A,11.151025482671098
17 | A,11.737529418597154
18 | A,10.107403368529154
19 | A,10.08743955380821
20 | A,11.046650784553353
21 | A,13.324813380199299
22 | A,6.083382763357414
23 | A,7.939830725112717
24 | A,13.491990583570459
25 | A,9.929307036417796
26 | A,13.992512133778424
27 | A,12.974132881190506
28 | A,11.308656447373941
29 | A,4.681767465282951
30 | A,6.83174563061967
31 | A,13.55373559327446
32 | A,9.861460075291635
33 | A,12.482567836867604
34 | A,9.026286027170872
35 | A,8.890472969372913
36 | A,15.303984786835697
37 | A,6.997081253818567
38 | A,8.679393631873973
39 | A,12.022863706260424
40 | A,11.716011616642925
41 | A,8.070539893966673
42 | A,6.96557749307061
43 | A,11.375007340204416
44 | A,3.147117628404277
45 | A,13.330582222085273
46 | A,13.758980277949128
47 | A,10.789485906485362
48 | A,13.519313935692331
49 | A,9.864440483873304
50 | A,13.905192248849703
51 | A,11.397705283579329
52 | B,9.987060833609098
53 | B,12.674322116332078
54 | B,8.650535157300826
55 | B,10.319448936971005
56 | B,9.752828070738742
57 | B,5.1498404825835875
58 | B,10.043538459404154
59 | B,11.65041283679112
60 | B,9.058075464476607
61 | B,15.666221948680269
62 | B,7.023959326665469
63 | B,10.847992748971976
64 | B,8.794097493656123
65 | B,6.858495841865998
66 | B,8.738308394071947
67 | B,12.793683395855911
68 | B,12.135903818770199
69 | B,5.545096598403308
70 | B,10.862029942597166
71 | B,12.108632505565287
72 | B,13.495334631089289
73 | B,9.69541423462096
74 | B,11.943259358441965
75 | B,14.305599422199359
76 | B,11.700269821286627
77 | B,13.284868705790277
78 | B,15.565431929707096
79 | B,10.342210578825032
80 | B,14.111747249830643
81 | B,8.85930297529751
82 | B,5.989949207346347
83 | B,13.572615727642773
84 | B,9.89731802756733
85 | B,15.234728382886065
86 | B,13.22383240275949
87 | B,12.241645039637488
88 | B,13.08037623715171
89 | B,8.708664350861953
90 | B,10.300344672580954
91 | B,10.774925155744045
92 | B,11.136595827526197
93 | B,9.749899247259583
94 | B,7.480331644780475
95 | B,12.411943890011894
96 | B,12.08533760621049
97 | B,12.777919437631445
98 | B,14.986425355932196
99 | B,12.264388568164854
100 | B,7.348951770289929
101 | B,10.713960442867121
102 |
--------------------------------------------------------------------------------
/classes/2021_grenoble/faux.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Intro to Faux"
3 | author: "Lisa DeBruine"
4 | date: 2021-02-05
5 | output:
6 | html_document:
7 | df_print: kable
8 | ---
9 |
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 | collapse = TRUE,
13 | out.width = "100%",
14 | fig.width = 5,
15 | fig.height = 3,
16 | dpi = 144
17 | )
18 | set.seed(8675309) # Jenny, I've got your number
19 | ```
20 |
21 | ```{r libs, message=FALSE}
22 | library(tidyverse)
23 | library(faux)
24 | library(broom)
25 | library(afex)
26 | ```
27 |
28 | In this tutorial, we'll learn how to simulate data for factorial designs using {faux}. There are more extensive examples at .
29 |
30 | ## Multivariate normal
31 |
32 | You can create sets of correlated normally distributed values using `rnorm_multi()`.
33 |
34 | ```{r}
35 | dat3 <- rnorm_multi(
36 | n = 50,
37 | vars = 3,
38 | mu = c(1, 2, 3),
39 | sd = c(0.5, 1, 1.5),
40 | r = c(0, .25, .5),
41 | varnames = c("A", "B", "C")
42 | )
43 | ```
44 |
45 | The function `get_params()` gives you a quick way to see the means, SDs and correlations in the simulated data set to make sure you set the parameters correctly.
46 |
47 | ```{r}
48 | get_params(dat3)
49 | ```
50 |
51 | If you set `empirical` to `TRUE`, the values you set will be the **sample** parameters, not the **population** parameters. This isn't usually what you want for a simulation, but can be useful to check you set the simulation parameters correctly.
52 |
53 | ```{r}
54 | dat3 <- rnorm_multi(
55 | n = 50,
56 | vars = 3,
57 | mu = c(1, 2, 3),
58 | sd = c(0.5, 1, 1.5),
59 | r = c(0, .25, .5),
60 | varnames = c("A", "B", "C"),
61 | empirical = TRUE
62 | )
63 |
64 | get_params(dat3)
65 | ```
66 |
67 | ### Shortcuts
68 |
69 | There are a few shortcuts you can use. Run the following and see if you can guess how they work.
70 |
71 | ```{r}
72 | guess1 <- rnorm_multi(50, mu = c(x = 1, y = 2, z = 3), empirical = TRUE)
73 |
74 | get_params(guess1)
75 | ```
76 |
77 | ```{r}
78 | guess2 <- rnorm_multi(50, vars = 4, r = 0.5, empirical = TRUE)
79 |
80 | get_params(guess2)
81 | ```
82 |
83 | ```{r}
84 | iris_r <- cor(iris[, 1:4])
85 | iris_mu <- summarise_all(iris[, 1:4], mean) %>% t()
86 | iris_sd <- summarise_all(iris[, 1:4], sd) %>% t()
87 |
88 | guess3 <- rnorm_multi(50,
89 | mu = iris_mu,
90 | sd = iris_sd,
91 | r = iris_r)
92 |
93 | get_params(guess3)
94 | ```
95 |
96 | You can set the r for correlations is a few different ways.
97 |
98 | ```{r}
99 | # all correlations the same value
100 | rho_same <- rnorm_multi(50, 4, r = .5, empirical = TRUE)
101 | get_params(rho_same)
102 | ```
103 |
104 | ```{r}
105 | # upper right triangle
106 | rho_urt <- rnorm_multi(50, 4,
107 | # X2 X3 X4
108 | r = c(0.5, 0.4, 0.3, # X1
109 | 0.2, 0.1, # X2
110 | 0.0), # X3
111 | empirical = TRUE)
112 | get_params(rho_urt)
113 | ```
114 |
115 | ```{r}
116 | # full correlation matrix
117 | rho_cormat <- rnorm_multi(50, 4,
118 | # X1 X2 X3 X4
119 | r = c(1.0, 0.5, 0.4, 0.3, # X1
120 | 0.5, 1.0, 0.2, 0.1, # X2
121 | 0.4, 0.2, 1.0, 0.0, # X3
122 | 0.3, 0.1, 0.0, 1.0), # X4
123 | empirical = TRUE)
124 | get_params(rho_cormat)
125 | ```
126 |
127 |
128 | ```{r}
129 | rnorm_multi(10, 3, r = c(.9, .9, -.9))
130 | ```
131 |
132 |
133 | ## Factorial Designs
134 |
135 | You can just use `rnorm_multi()` to simulate data for each between-subjects cell of a factorial design and manually combine the tables, but faux has a function that better maps onto how we usually think and teach about factorial designs.
136 |
137 | The default design is 100 observations of one variable (named `y`) with a mean of 0 and SD of 1. Unless you set `plot = FALSE` or run `faux_options(plot = FALSE)`, this function will show you a plot of your design so you can check that it looks like you expect.
138 |
139 | ```{r}
140 | simdat1 <- sim_design()
141 | ```
142 |
143 |
144 | ### Factors
145 |
146 | Use lists to set the names and levels of within- and between-subject factors.
147 |
148 | ```{r}
149 | pettime <- sim_design(
150 | within = list(time = c("pre", "post"),
151 | condition = c("A", "B")),
152 | between = list(pet = c("cat", "dog", "ferret"))
153 | )
154 | ```
155 |
156 | You can set mu and sd with unnamed vectors, but getting the order right can be tricky.
157 |
158 | ```{r}
159 | pettime <- sim_design(
160 | within = list(time = c("pre", "post")),
161 | between = list(pet = c("cat", "dog", "ferret")),
162 | mu = 1:6
163 | )
164 | ```
165 |
166 | You can set values with a named vector for a single type of factor. The values do not have to be in the right order if they're named.
167 |
168 | ```{r}
169 | pettime <- sim_design(
170 | within = list(time = c("pre", "post")),
171 | between = list(pet = c("cat", "dog", "ferret")),
172 | mu = c(cat = 1, ferret = 5, dog = 3),
173 | sd = c(pre = 1, post = 2)
174 | )
175 | ```
176 |
177 | Or use a data frame for within- and between-subject factors.
178 |
179 | ```{r}
180 |
181 | mu <- data.frame(
182 | pre_A = c(1, 3, 5),
183 | post_A = c(2, 4, 6),
184 | pre_B = c(10, 30, 50),
185 | post_B = c(20, 40, 60),
186 | row.names = c("cat", "dog", "ferret")
187 | )
188 |
189 | pettime <- sim_design(
190 | within = list(time = c("pre", "post"),
191 | condition = c("A","B")),
192 | between = list(pet = c("cat", "dog", "ferret")),
193 | mu = mu
194 | )
195 | ```
196 |
197 | If you have within-subject factors, set the correlations for each between-subject cell like this. You need to tell `get_params()` if you have any between-subject columns.
198 |
199 | ```{r}
200 | pettime <- sim_design(
201 | within = list(time = c("pre", "post")),
202 | between = list(pet = c("cat", "dog", "ferret")),
203 | r = list(cat = 0.5,
204 | dog = 0.25,
205 | ferret = 0),
206 | empirical = TRUE,
207 | plot = FALSE
208 | )
209 |
210 | get_params(pettime, between = "pet")
211 | ```
212 |
213 | You can also change the name of the `dv` and `id` columns and output the data in long format. If you do this, you also need to tell `get_params()` what columns contain the between- and within-subject factors, the dv, and the id.
214 |
215 | ```{r}
216 | dat_long <- sim_design(
217 | within = list(time = c("pre", "post")),
218 | between = list(pet = c("cat", "dog", "ferret")),
219 | id = "subj_id",
220 | dv = "score",
221 | long = TRUE,
222 | plot = FALSE
223 | )
224 |
225 | get_params(dat_long,
226 | between = "pet",
227 | within = "time",
228 | id = "subj_id",
229 | dv = "score",
230 | digits = 3)
231 | ```
232 |
233 | ### Anonymous Factors
234 |
235 | If you need to make a quick demo, you can set factors anonymously with integer vectors.
236 |
237 | ```{r}
238 | dat_anon <- sim_design(
239 | n = 50,
240 | between = list(pet = c(dog = "Doggies", cat = "Kittens")),
241 | dv = c(score = "Happiness Score")
242 | )
243 |
244 | x <- attr(dat_anon, "design")
245 | ```
246 |
247 | Faux has a quick plotting function for visualising data made with sim_design.
248 |
249 | ```{r}
250 | plot(dat_anon)
251 | ```
252 | You can change the order of plotting and the types of geoms plotted. This takes a little trial and error, so this function will probably be refined in later versions.
253 |
254 | ```{r}
255 | plot(dat_anon, "B", "A", "C", geoms = c("jitter"))
256 | ```
257 |
258 | ### Replications
259 |
260 | You often want to simulate data repeatedly to do things like calculate power. The `sim_design()` function has a lot of overhead for checking if a design makes sense and if the correlation matrix is possible, so you can speed up the creation of multiple datasets with the same design using the `rep` argument. This will give you a nested data from with each dataset in the `data` column.
261 |
262 | ```{r}
263 | dat_rep <- sim_design(
264 | within = 2,
265 | n = 20,
266 | mu = c(0, 0.25),
267 | rep = 10,
268 | plot = FALSE
269 | )
270 | ```
271 |
272 | You can run analyses on the nested data like this:
273 |
274 | ```{r}
275 | map_df(dat_rep$data, ~{
276 | t.test(.x$A1, .x$A2, paired = TRUE) %>% broom::tidy()
277 | })
278 | ```
279 |
280 |
281 | ## Exercises
282 |
283 | ### Multivariate normal
284 |
285 | Sample 40 values of three variables named `J`, `K` and `L` from a population with means of 10, 20 and 30, and SDs of 5. `J` and `K` are correlated 0.5, `J` and `L` are correlated 0.25, and `K` and `L` are not correlated.
286 |
287 | ```{r, include=FALSE}
288 | ex1 <- rnorm_multi(n = 40, mu = c(J = 10, K = 20, L = 30),
289 | sd = 5, r = c(0.5, 0.25, 0))
290 |
291 | get_params(ex1)
292 | ```
293 |
294 | ### From existing data
295 |
296 | Using the data from the built-in dataset `attitude`, simulate a new set of 20 observations drawn from a population with the same means, SDs and correlations for each column as the original data.
297 |
298 | ```{r, include=FALSE}
299 | dat_r <- cor(attitude)
300 | dat_mu <- summarise_all(attitude, mean) %>% t()
301 | dat_sd <- summarise_all(attitude, sd) %>% t()
302 |
303 | ex2 <- rnorm_multi(20, mu = dat_mu, sd = dat_sd,r = dat_r)
304 |
305 | get_params(ex2)
306 | ```
307 |
308 |
309 | ### 2b
310 |
311 | Create a dataset with a between-subject factor of "pet" having two levels, "cat", and "dog". The DV is "happiness" score. There are 20 cat-owners with a mean happiness score of 10 (SD = 3) and there are 30 dog-owners with a mean happiness score of 11 (SD = 3).
312 |
313 | ```{r, include=FALSE}
314 | dat2b <- sim_design(
315 | between = list(pet = c("cat", "dog")),
316 | dv = "happiness",
317 | n = list(cat = 20, dog = 30),
318 | mu = list(cat = 10, dog = 11),
319 | sd = 3
320 | )
321 |
322 | get_params(dat2b, between = "pet")
323 | ```
324 |
325 | ### 3w
326 |
327 | Create a dataset of 20 observations with 1 within-subject variable ("condition") having 3 levels ("A", "B", "C") with means of 10, 20 and 30 and SD of 5. The correlations between each level have r = 0.4. The dataset should look like this:
328 |
329 | | id | condition | score |
330 | |:---|:----------|------:|
331 | |S01 | A | 9.17 |
332 | |... | ... | ... |
333 | |S20 | A | 11.57 |
334 | |S01 | B | 18.44 |
335 | |... | ... | ... |
336 | |S20 | B | 20.04 |
337 | |S01 | C | 35.11 |
338 | |... | ... | ... |
339 | |S20 | C | 29.16 |
340 |
341 | ```{r, include=FALSE}
342 |
343 | dat3w <- sim_design(
344 | within = list(condition = c("A", "B", "C")),
345 | n = 20,
346 | mu = c(10, 20, 30),
347 | sd = 5,
348 | r = .4,
349 | dv = "score",
350 | long = TRUE
351 | )
352 |
353 | get_params(dat3w)
354 |
355 | ```
356 |
357 | ### 2w*2w
358 |
359 | Create a dataset with 50 observations of 2 within-subject variables ("A" and "B") each having 2 levels. The mean for all cells is 10 and the SD is 2. The dataset should have 20 subjects. The correlations look like this:
360 |
361 | | | A1_B1 | A1_B2 | A2_B1 | A2_B2 |
362 | |:------|------:|------:|------:|------:|
363 | | A1_B1 | 1.0 | 0.5 | 0.5 | 0.2 |
364 | | A1_B2 | 0.5 | 1.0 | 0.2 | 0.5 |
365 | | A2_B1 | 0.5 | 0.2 | 1.0 | 0.5 |
366 | | A2_B2 | 0.2 | 0.5 | 0.5 | 1.0 |
367 |
368 |
369 | ```{r, include=FALSE}
370 | dat2w2w <- sim_design(
371 | within = c(2,2),
372 | n = 50,
373 | mu = 10,
374 | sd = 2,
375 | r = c(.5, .5, .2,
376 | .2, .5,
377 | .5)
378 | )
379 |
380 | get_params(dat2w2w)
381 | ```
382 |
383 | ### 2w*3b
384 |
385 | Create a dataset with a between-subject factor of "pet" having 3 levels ("cat", "dog", and "ferret") and a within-subject factor of "time" having 2 levels ("pre" and "post"). The N in each group should be 10. Means are:
386 |
387 | * cats: pre = 10, post = 12
388 | * dogs: pre = 14, post = 16
389 | * ferrets: pre = 18, post = 20
390 |
391 | SDs are all 5 and within-cell correlations are all 0.25.
392 |
393 | ```{r, include=FALSE}
394 |
395 | mu <- data.frame(
396 | cat = c(10, 12),
397 | dog = c(14, 16),
398 | ferret = c(18, 20)
399 | )
400 |
401 | dat2w3b <- sim_design(
402 | within = list(time = c("pre", "post")),
403 | between = list(pet = c("cat", "dog", "ferret")),
404 | n = 10,
405 | mu = mu,
406 | sd = 5,
407 | r = 0.25
408 | )
409 |
410 | get_params(dat2w3b)
411 |
412 | ```
413 |
414 | ### Replications
415 |
416 | Create 5 datasets with a 2b*2b design, 30 participants in each cell. Each cell's mean should be 0, except A1_B1, which should be 0.5. The SD should be 1. Make the resulting data in long format.
417 |
418 | ```{r, include=FALSE}
419 | dat2b2b <- sim_design(
420 | between = c(2,2),
421 | n = 30,
422 | mu = c(0.5, 0, 0, 0),
423 | rep = 5,
424 | long = TRUE
425 | )
426 | ```
427 |
428 | ### Power
429 |
430 | Simulate 100 datasets like the one above and use `lm()` or `afex::aov_ez()` to look at the interaction between A and B. What is the power of this design?
431 |
432 | ```{r, include=FALSE}
433 | dat2b2b_100 <- sim_design(
434 | between = c(2, 2),
435 | n = 30,
436 | mu = c(0.5, 0, 0, 0),
437 | rep = 100,
438 | long = TRUE
439 | )
440 |
441 | ana_lm <- map_df(dat2b2b_100$data, ~{
442 | lm(y ~ A*B, data = .x) %>% broom::tidy()
443 | })
444 |
445 | afex::set_sum_contrasts() # avoids annoying afex message
446 | ana_aov <- map_df(dat2b2b_100$data, ~{
447 | afex::aov_ez(id = "id",
448 | dv = "y",
449 | between = c("A", "B"),
450 | data = .x,
451 | return = "aov") %>% broom::tidy()
452 | })
453 |
454 | ana_aov %>%
455 | group_by(term) %>%
456 | summarise(power = mean(p.value < .05),
457 | .groups = "drop")
458 | ```
459 |
460 |
--------------------------------------------------------------------------------
/classes/2021_grenoble/sim1.csv:
--------------------------------------------------------------------------------
1 | effect,group,term,estimate,std.error,statistic,df,p.value
2 | fixed,NA,(Intercept),372.8236962436822,10.08365913366393,36.97305623898212,165.65881763290963,6.07720610062092e-82
3 | fixed,NA,sub_cond.e,68.73580843544242,15.076692205038604,4.559077515190702,222.03595696791052,8.487839790510339e-6
4 | fixed,NA,stim_version.e,50.07083246061155,2.09919721932919,23.852371754099387,154.6466162980542,1.0965014966349348e-53
5 | fixed,NA,sub_cond.e:stim_version.e,4.309073973261419,3.757369398397111,1.1468326683822103,198.16657034852403,0.25283358257310784
6 | ran_pars,sub_id,sd__(Intercept),103.06082058775809,NA,NA,NA,NA
7 | ran_pars,sub_id.1,sd__stim_version.e,21.054200010708943,NA,NA,NA,NA
8 | ran_pars,stim_id,sd__(Intercept),49.26529727552844,NA,NA,NA,NA
9 | ran_pars,stim_id.1,sd__stim_version.e,10.162977497391143,NA,NA,NA,NA
10 | ran_pars,stim_id.2,sd__sub_cond.e,27.158316402506028,NA,NA,NA,NA
11 | ran_pars,stim_id.3,sd__stim_version.e:sub_cond.e,15.417843335648813,NA,NA,NA,NA
12 | ran_pars,Residual,sd__Observation,24.9509561875767,NA,NA,NA,NA
13 |
--------------------------------------------------------------------------------
/data-sim-workshops.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 |
--------------------------------------------------------------------------------
/docs/404.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
calculate power and sensitivity for analyses that don’t have empirical methods
85 |
create reproducible examples when your data are too big or confidential to share
86 |
enhance your understanding of statistical concepts
87 |
create demo data for teaching and tutorials
88 |
89 |
90 |
Installation
91 |
92 |
You can install the packages used in these tutorials and get a function that makes it easy to access the workshop .Rmd files by running the following code:
This session will cover the basics of simulation using {faux}. We will simulate data with factorial designs by specifying the within and between-subjects factor structure, each cell mean and standard deviation, and correlations between cells where appropriate. This can be used to create simulated data sets to be used in preparing the analysis code for pre-registrations or registered reports. We will also create data sets for simulation-based power analyses. Students will need to have very basic knowledge of R and R Markdown, and have installed {faux}, {afex}, {broom} and {tidyverse}.
145 |
146 |
Prep
147 |
148 |
149 |
Install R packages from CRAN: tidyverse, afex, faux, and broom
150 |
This session will cover simulating data for a mixed design, where trials are crossed with subjects. We will learn how to analyse this using {lme4}, with a focus on understanding how the simulation parameters correspond to the output. Finally, we will learn how to use simulation to calculate power. Students will need to have basic knowledge of R and R Markdown, some familiarity with mixed designs (even if they don’t currently analyse them with mixed models) and have installed {faux}, {afex}, {tidyverse}, and {lme4}.
160 |
161 |
Prep
162 |
163 |
164 |
Install R packages from CRAN: tidyverse, afex, lme4, broom, broom.mixed, faux
165 |
Vrije Universiteit Amsterdam, NL Fake It Until You Make It: How and why to simulate research data 2023 September 20
189 |
Max Planck Institute for Evolutionary Anthropology, Leipzig, Germany
190 | Simulating data with {faux}
191 | 2023 July 27 9:00 - 12:00 (CET)
192 |
European Evolutionary Biology Conference, Millport, Scotland
193 | Fake It Until You Make It: How and why to simulate research data
194 | 2023 June 1 14:30 - 16:30 (GMT)
195 |
University of Glasgow Institute of Neuroscience & Psychology
196 | Data Simulation with {faux}
197 | 2023 January 18 12:00 - 13:00 (GMT)
198 |
Netherlands Institute for the Study of Crime and Law Enforcement
199 | Data Simulation with {faux}
200 | 2022 December 6 13:00 - 14:00 (CET)
201 |
Polish Association of Social Psychology Conference, Gdánsk
202 | Data simulation for fixed effects
203 | Data simulation for mixed designs
204 | Practical Session
205 | 2022 September 14 09:00 - 16:00 (CET)
206 |
RLadies Glasgow
207 | Data simulation using faux
208 | 2022 May 24 15:00-17:00 (BST)
209 |
University of York Data simulation for factorial designs
210 | Data simulation for mixed designs
211 | 2022 April 27 09:00-17:00 (BST)
What filename you want to save (defaults to the name of the exercise in the working directory)
71 |
72 |
73 |
74 |
Value
75 |
76 |
77 |
Saves a file to the working directory (or path from filename)
78 |
79 |
80 |
81 |
Examples
82 |
if(FALSE){
83 | exercise("faux")# get exercise for the faux workshop
84 | exercise("fixed", "exercises/fixed.Rmd")# save into exercises directory
85 | }
86 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | https://debruine.github.io/data-sim-workshops/404.html
5 |
6 |
7 | https://debruine.github.io/data-sim-workshops/LICENSE.html
8 |
9 |
10 | https://debruine.github.io/data-sim-workshops/articles/calories.html
11 |
12 |
13 | https://debruine.github.io/data-sim-workshops/articles/faux.html
14 |
15 |
16 | https://debruine.github.io/data-sim-workshops/articles/fixed.html
17 |
18 |
19 | https://debruine.github.io/data-sim-workshops/articles/index.html
20 |
21 |
22 | https://debruine.github.io/data-sim-workshops/articles/mixed.html
23 |
24 |
25 | https://debruine.github.io/data-sim-workshops/authors.html
26 |
27 |
28 | https://debruine.github.io/data-sim-workshops/index.html
29 |
30 |
31 | https://debruine.github.io/data-sim-workshops/reference/exercise.html
32 |
33 |
34 | https://debruine.github.io/data-sim-workshops/reference/index.html
35 |
36 |
37 |
--------------------------------------------------------------------------------
/inst/stubs/calories-stub.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Calorie Placement Re-Simulation"
3 | output:
4 | html_document:
5 | df_print: kable
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(error = TRUE)
10 | library(tidyverse)
11 | library(faux)
12 | library(afex)
13 | library(emmeans)
14 | faux_options(plot = FALSE)
15 |
16 | set.seed(8675309)
17 | ```
18 |
19 | ## Data Source
20 |
21 | We will be replicating some of the re-analyses in Francis & Thunell's (2020) Meta-Psychology paper: Excess success in "Don't count calorie labeling out: Calorie counts on the left side of menu items lead to lower calorie food choices".
22 |
23 | They ran power analyses for all 6 studies in Dallas, Liu, and Ubel's (2019) study showing that people order food with significantly fewer calories when the calorie count was placed to the left of the item than to the right (or having no calorie label). They then used these power estimates to calculate the probability of all 6 out of 6 studies being significant, given the observed power of each study.
24 |
25 | * [Re-analysis](https://doi.org/10.15626/MP.2019.2266)
26 | * [Re-analysis code](https://osf.io/xrdhj/)
27 | * [Original paper](https://doi.org/10.1002/jcpy.1053)
28 |
29 | Table 1 of the re-analysis paper provides all of the parameters we will need.
30 |
31 | ## Reanalyses
32 |
33 | ### Study 2
34 |
35 | We'll start with S2 because the analysis is very straightforward. It's a between-subjects design, where 143 subjects saw calorie placement on the left and their mean calories ordered were 1249.83 (SD = 449.07), while 132 subjects saw calorie placement on the right and their mean calories ordered were 1362.31 (SD = 447.35).
36 |
37 | Let's first simulate a single data table with these parameters and set up our analysis.
38 |
39 | ```{r}
40 | data <- NULL
41 | ```
42 |
43 | Wrap the analysis in a function using the `tidy()` function from {broom} to get the results in a tidy table. Check that it works by running it on the single data set above.
44 |
45 | ```{r}
46 | s2_analyse <- function(data) {
47 | }
48 |
49 | s2_analyse(data)
50 | ```
51 |
52 |
53 | Now, simulate the data 500 times.
54 |
55 | ```{r}
56 | s2 <- NULL
57 | ```
58 |
59 | Run the analysis on each data set.
60 |
61 | ```{r}
62 | s2_sim <- NULL
63 |
64 | head(s2_sim)
65 | ```
66 |
67 | Summarise the `p.value` column to get power.
68 |
69 | ```{r}
70 | s2_power <- NULL
71 | ```
72 |
73 | Compare this value (`r s2_power`) with the value in the paper (0.5426).
74 |
75 | ### Study 1
76 |
77 | Study 1 is a little more complicated because the design includes a "no label" condition, so the decision rule for supporting the hypothesis is more complicated.
78 |
79 | The data simulation is relatively straightforward, though.
80 |
81 | ```{r}
82 | mu = c(left = 654.53, right = 865.41, none = 914.34)
83 | sd = c(left = 390.45, right = 517.26, none = 560.94)
84 | n = c(left = 45, right = 54, none = 50)
85 |
86 | data <- NULL
87 | ```
88 |
89 | Set up the analysis. Here, we really just care about three p-values, so we'll just return those. We can use a function from the {emmeans} package to check the two pairwise comparisons.
90 |
91 | ```{r}
92 | afex::set_sum_contrasts() # avoids annoying afex message on each run
93 | afex_options(include_aov = TRUE) # we need aov for emmeans
94 |
95 | s1_analyse <- function(data) {
96 | # main effect of placement
97 | a <- afex::aov_ez(
98 | id = "id",
99 | dv = "calories",
100 | between = "placement",
101 | data = data
102 | )
103 |
104 | # contrasts
105 | e <- emmeans(a, "placement")
106 | c1 <- list(lr = c(-0.5, 0.5, 0),
107 | ln = c(-0.5, 0, 0.5))
108 | b <- contrast(e, c1, adjust = "holm") |>
109 | broom::tidy()
110 |
111 | data.frame(
112 | p_all = a$anova_table$`Pr(>F)`[[1]],
113 | p_1 = b$adj.p.value[[1]],
114 | p_2 = b$adj.p.value[[2]]
115 | )
116 | }
117 |
118 | s1_analyse(data)
119 | ```
120 |
121 | Let's just replicate this 100 times so the simulation doesn't take too long to run at first. We can always increase it later after we've run some sense checks.
122 |
123 | ```{r}
124 | s1 <- NULL
125 | ```
126 |
127 | Run the analysis on each data set.
128 |
129 | ```{r}
130 | s1_sim <- NULL
131 | ```
132 |
133 | Calculating power is a little trickier here, as all three p-values need to be significant here to support the hypothesis.
134 |
135 | ```{r}
136 | s1_power <- NULL
137 | ```
138 |
139 | Compare this value (`r s1_power`) with the value in the paper (0.4582).
140 |
141 | ### Study 3
142 |
143 | Now you can use the pattern from Study 1 to analyse the data for Study 3. We'll start with the repeated data set.
144 |
145 | ```{r}
146 | mu = c(left = 1428.24, right = 1308.66, none = 1436.79)
147 | sd = c(left = 377.02, right = 420.14, none = 378.47)
148 | n = c(left = 85, right = 86, none = 81)
149 |
150 | s3 <- NULL
151 | ```
152 |
153 | These data were collected in the Hebrew language, which reads right to left, so the paired contrasts will be different.
154 |
155 | ```{r}
156 | s3_analyse <- function(data) {
157 |
158 | }
159 | ```
160 |
161 | Run the analysis on each data set.
162 |
163 | ```{r}
164 | s3_sim <- NULL
165 | ```
166 |
167 | ```{r}
168 | s3_power <- NULL
169 | ```
170 |
171 | Compare this value (`r s3_power`) with the value in the paper (0.3626).
172 |
173 |
174 | ### Study S1
175 |
176 | Now you can use the pattern from Study 2 to analyse the data for Study S1. You can even reuse the analysis function `s2_analyse()`!
177 |
178 | ```{r}
179 | mu = c(left = 185.94, right = 215.73)
180 | sd = c(left = 93.92, right = 95.33)
181 | n = c(left = 99, right = 77)
182 |
183 | ss1 <- NULL
184 | ```
185 |
186 | ```{r}
187 | ss1_sim <- NULL
188 | ```
189 |
190 |
191 | ```{r}
192 | ss1_power <- NULL
193 | ```
194 |
195 |
196 | ### Study S2
197 |
198 | Now you can use the pattern from Study 1 to analyse the data for Study S2. You can even reuse the analysis function `s1_analyse()`!
199 |
200 | ```{r}
201 | mu = c(left = 1182.15, right = 1302.23, none = 1373.74)
202 | sd = c(left = 477.60, right = 434.41, none = 475.77)
203 | n = c(left = 139, right = 141, none = 151)
204 |
205 | ss2 <- NULL
206 | ```
207 |
208 | ```{r}
209 | ss2_sim <- NULL
210 | ```
211 |
212 | ```{r}
213 | ss2_power <- NULL
214 | ```
215 |
216 | ### Study S3
217 |
218 | Now you can use the pattern from Study 1 to analyse the data for Study S3.
219 |
220 | ```{r}
221 | mu = c(left = 1302.03, right = 1373.15, none = 1404.35)
222 | sd = c(left = 480.02, right = 442.49, none = 422.03)
223 | n = c(left = 336, right = 337, none = 333)
224 |
225 | ss3 <- NULL
226 | ```
227 |
228 | ```{r}
229 | ss3_sim <- NULL
230 | ```
231 |
232 | ```{r}
233 | ss3_power <- NULL
234 | ```
235 |
236 | ## Conclusion
237 |
238 | Now that you've calculated power for each of the 6 studies, just multiply the 6 power values together to get the probability that all 6 studies will be significant.
239 |
240 |
241 | ```{r}
242 | power_table <- tribble(
243 | ~study, ~power_ft, ~ power_my,
244 | "1", 0.4582, s1_power,
245 | "2", 0.5426, s2_power,
246 | "3", 0.3626, s3_power,
247 | "S1", 0.5358, ss1_power,
248 | "S2", 0.5667, ss2_power,
249 | "S3", 0.4953, ss3_power
250 | )
251 |
252 | power_table
253 | ```
254 |
255 | The `reduce()` function from {purrr} applies a function sequentially over a vector, so can give up the product of all the values in the power columns.
256 |
257 | ```{r}
258 | prob_ft <- purrr::reduce(power_table$power_ft, `*`)
259 | prob_my <- purrr::reduce(power_table$power_my, `*`)
260 | ```
261 |
262 | The Francis & Thunell paper showed a `r prob_ft` probability of getting 6 of 6 studies significant. Our re-simulation showed a `r prob_my` probability.
263 |
264 |
--------------------------------------------------------------------------------
/inst/stubs/faux-stub.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Intro to Faux"
3 | output:
4 | html_document:
5 | df_print: paged
6 | toc: true
7 | toc_float: true
8 | ---
9 |
10 |
11 | ```{r, include = FALSE}
12 | # control the appearance of the knitted result
13 | knitr::opts_chunk$set(
14 | collapse = TRUE,
15 | out.width = "100%",
16 | fig.width = 5,
17 | fig.height = 3,
18 | dpi = 144
19 | )
20 | ```
21 |
22 |
23 | In this tutorial, we'll learn how to simulate data for factorial designs using {faux}. There are more extensive examples at .
24 |
25 | ## Setup
26 |
27 | We'll be using 4 packages in this tutorial.
28 |
29 | ```{r libs, message=FALSE}
30 | library(tidyverse) # for data wrangling
31 | library(faux) # for simulation
32 | library(broom) # for tidy analysis results
33 | library(afex) # for ANOVA
34 |
35 | set.seed(8675309) # Jenny, I've got your number
36 | ```
37 |
38 | A seed makes randomness reproducible. Run the following code several times. Change the seed to your favourite integer. If the seed is the same, the random numbers after it will be the same, as long as the code is always executed in the same order.
39 |
40 | ```{r}
41 | set.seed(0)
42 | rnorm(1)
43 | ```
44 |
45 | ## Normal
46 |
47 | Let's start with a normal distribution using the base R function `rnorm()`, which returns `n` values from a normal distribution with a mean of 0 and a standard deviation of 1.
48 |
49 | ```{r}
50 | rnorm(n = 10)
51 | ```
52 |
53 | You can change the mean and SD. Simulate a lot of values (1e5 == 100,000), save them in a variable, and visualise them with `hist()`.
54 |
55 | ```{r}
56 | x <- rnorm(1e5, mean = 30, sd = 5)
57 |
58 | hist(x)
59 | ```
60 |
61 | ## Multivariate normal
62 |
63 | But how do you create correlated values? You can do this with `MASS::mvrnorm()`, but you need to construct the `Sigma` argument yourself from the correlation matrix and the standard deviations of the populations, and then you need to turn the resulting matrix into a data frame for many use cases. This isn't very difficult, but can be tedious with larger numbers of variables.
64 |
65 | ```{r}
66 | n = 1e5 # this is a large number to demonstrate that the result is as expected
67 | mu = c(A = 1, B = 2, C = 3)
68 | sd = c(0.5, 1, 1.5)
69 | r = c(0, .25, .5)
70 |
71 | cor_mat <- matrix(c(1, r[1], r[2],
72 | r[1], 1, r[3],
73 | r[2], r[3], 1),
74 | nrow = 3)
75 | Sigma <- (sd %*% t(sd)) * cor_mat
76 | vars <- MASS::mvrnorm(n, mu, Sigma) |> as.data.frame()
77 |
78 | cor(vars) |> round(2)
79 | ```
80 |
81 | ### rnorm_multi
82 |
83 | In faux, you can create sets of correlated normally distributed values using `rnorm_multi()`.
84 |
85 | ```{r}
86 | dat3 <- rnorm_multi(
87 | n = 50,
88 | mu = c(A = 1, B = 2, C = 3),
89 | sd = c(0.5, 1, 1.5),
90 | r = c(0, .25, .5)
91 | )
92 | ```
93 |
94 | The function `get_params()` gives you a quick way to see the means, SDs and correlations in the simulated data set to make sure you set the parameters correctly.
95 |
96 | ```{r}
97 | get_params(dat3)
98 | ```
99 |
100 | If you set `empirical` to `TRUE`, the values you set will be the **sample** parameters, not the **population** parameters. This isn't usually what you want for a simulation, but can be useful to check you set the parameters correctly.
101 |
102 | ```{r}
103 | dat3 <- rnorm_multi(
104 | n = 50,
105 | mu = c(A = 1, B = 2, C = 3),
106 | sd = c(0.5, 1, 1.5),
107 | r = c(0, .25, .5),
108 | empirical = TRUE
109 | )
110 |
111 | get_params(dat3)
112 | ```
113 |
114 |
115 | ### Setting r
116 |
117 | You can set the `r` argument for correlations in a few different ways.
118 |
119 | If all correlations have the same value, just set r equal to a single number.
120 |
121 | ```{r}
122 | # all correlations the same value
123 | rho_same <- rnorm_multi(50, 4, r = .5, empirical = TRUE)
124 | get_params(rho_same)
125 | ```
126 |
127 | You can set `r` to a vector or matrix of the full correlation matrix. This is convenient when you're getting the values from an existing dataset, where you can just use the output of the `cor()` function.
128 |
129 | ```{r}
130 | rho <- cor(iris[1:4])
131 | round(rho, 2)
132 | ```
133 |
134 | Notice how, since we didn't specify the names of the 4 variables anywhere else, `rnorm_multi()` will take them from the named correlation matrix.
135 |
136 | ```{r}
137 | rho_cormat <- rnorm_multi(50, 4, r = rho, empirical = TRUE)
138 | get_params(rho_cormat)
139 | ```
140 |
141 | Alternatively, you can just specify the values from the upper right triangle of a correlation matrix. This might be easier if you're reading the values out of a paper.
142 |
143 | ```{r}
144 | # upper right triangle
145 | # X2 X3 X4
146 | rho <- c(0.5, 0.4, 0.3, # X1
147 | 0.2, 0.1, # X2
148 | 0.0) # X3
149 |
150 | rho_urt <- rnorm_multi(50, 4, r = rho, empirical = TRUE)
151 | get_params(rho_urt)
152 | ```
153 |
154 |
155 | ## Factorial Designs
156 |
157 | You can use `rnorm_multi()` to simulate data for each between-subjects cell of a factorial design and manually combine the tables, but faux has a function that better maps onto how we usually think and teach about factorial designs.
158 |
159 | The default design is 100 observations of one variable (named `y`) with a mean of 0 and SD of 1. Unless you set `plot = FALSE` or run `faux_options(plot = FALSE)`, this function will show you a plot of your design so you can check that it looks like you expect.
160 |
161 | ```{r}
162 | simdat1 <- sim_design()
163 | ```
164 |
165 |
166 | ### Factors
167 |
168 | Use named lists to set the names and levels of `within` and `between` subject factors.
169 |
170 | ```{r}
171 | pettime <- sim_design(
172 | between = list(pet = c("cat", "dog", "ferret")),
173 | within = list(time = c("pre", "post"))
174 | )
175 | ```
176 |
177 | You can set `mu` and `sd` with unnamed vectors, but getting the order right can take some trial and error.
178 |
179 | ```{r}
180 | pettime <- sim_design(
181 | between = list(pet = c("cat", "dog", "ferret")),
182 | within = list(time = c("pre", "post")),
183 | mu = 1:6
184 | )
185 | ```
186 |
187 | You can set values with a named vector for a single type of factor. The values do not have to be in the right order if they're named.
188 |
189 | ```{r}
190 | pettime <- sim_design(
191 | between = list(pet = c("cat", "dog", "ferret")),
192 | within = list(time = c("pre", "post")),
193 | mu = c(cat = 1, ferret = 5, dog = 3),
194 | sd = c(pre = 1, post = 2)
195 | )
196 | ```
197 |
198 | Or use a data frame for within- and between-subject factors.
199 |
200 | ```{r}
201 | pettime <- sim_design(
202 | between = list(pet = c("cat", "dog", "ferret")),
203 | within = list(time = c("pre", "post")),
204 | mu = data.frame(
205 | pre = c(1, 3, 5),
206 | post = c(2, 4, 6),
207 | row.names = c("cat", "dog", "ferret")
208 | )
209 | )
210 | ```
211 |
212 | If you have within-subject factors, set the correlations for each between-subject cell like this.
213 |
214 | ```{r}
215 | pettime <- sim_design(
216 | between = list(pet = c("cat", "dog", "ferret")),
217 | within = list(time = c("pre", "post")),
218 | r = list(cat = 0.5,
219 | dog = 0.25,
220 | ferret = 0),
221 | empirical = TRUE,
222 | plot = FALSE
223 | )
224 |
225 | get_params(pettime)
226 | ```
227 |
228 | You can also change the name of the `dv` and `id` columns and output the data in long format. If you do this, you also need to tell `get_params()` what columns contain the between- and within-subject factors, the dv, and the id.
229 |
230 | ```{r}
231 | dat_long <- sim_design(
232 | between = list(pet = c("cat", "dog", "ferret")),
233 | within = list(time = c("pre", "post")),
234 | id = "subj_id",
235 | dv = "score",
236 | long = TRUE,
237 | plot = FALSE
238 | )
239 |
240 | get_params(dat_long, digits = 3)
241 | ```
242 |
243 | ### Multiple Factors
244 |
245 | Set more than one within-or between-subject factor like this:
246 |
247 | ```{r}
248 | dat_multi <- sim_design(
249 | between = list(pet = c("cat", "dog", "ferret"),
250 | country = c("UK", "NL")),
251 | within = list(time = c("pre", "post"),
252 | condition = c("ctl", "exp")),
253 | mu = data.frame(
254 | cat_UK = 1:4,
255 | cat_NL = 5:8,
256 | dog_UK = 9:12,
257 | dog_NL = 13:16,
258 | ferret_UK = 17:20,
259 | ferret_NL = 21:24,
260 | row.names = c("pre_ctl", "pre_exp", "post_ctl", "post_exp")
261 | )
262 | )
263 | ```
264 |
265 |
266 | Because faux uses an underscore for the separator, you have to set the `sep` argument to something different if you want to use underscores in your variable names (or set the separator globally with `faux_options`).
267 |
268 | ```{r}
269 | # faux_options(sep = ".")
270 |
271 | dat_multi <- sim_design(
272 | between = list(pet = c("cat", "dog", "ferret"),
273 | country = c("Glasgow_UK", "Rotterdam_NL")),
274 | within = list(time = c("pre", "post"),
275 | condition = c("ctl", "exp")),
276 | mu = data.frame(
277 | cat.Glasgow_UK = 1:4,
278 | cat.Rotterdam_NL = 5:8,
279 | dog.Glasgow_UK = 9:12,
280 | dog.Rotterdam_NL = 13:16,
281 | ferret.Glasgow_UK = 17:20,
282 | ferret.Rotterdam_NL = 21:24,
283 | row.names = c("pre.ctl", "pre.exp", "post.ctl", "post.exp")
284 | ),
285 | sep = "."
286 | )
287 | ```
288 |
289 | ### Anonymous Factors
290 |
291 | If you need to make a quick demo, you can set factors anonymously with integer vectors. For example, the following code makes 3B\*2B\*2W mixed design.
292 |
293 | ```{r}
294 | dat_anon <- sim_design(
295 | n = 50,
296 | between = c(3, 2),
297 | within = 2,
298 | mu = 1:12
299 | )
300 | ```
301 |
302 | Faux has a quick plotting function for visualising data made with faux. The plot created by `sim_design()` shows the *design*, while this function shows the simulated *data*.
303 |
304 | ```{r}
305 | plot(dat_anon)
306 | ```
307 |
308 | You can change the order of plotting and the types of geoms plotted. This takes a little trial and error, so this function will probably be refined in later versions.
309 |
310 | ```{r}
311 | plot(dat_anon, "B1", "B2", "W1", geoms = c("violin", "pointrangeSD"))
312 | ```
313 |
314 |
315 |
316 | ## Replications
317 |
318 | You often want to simulate data repeatedly to do things like calculate power. The `sim_design()` function has a lot of overhead for checking if a design makes sense and if the correlation matrix is possible, so you can speed up the creation of multiple datasets with the same design using the `rep` argument. This will give you a nested data frame with each dataset in the `data` column.
319 |
320 | ```{r}
321 | dat_rep <- sim_design(
322 | within = 2,
323 | n = 20,
324 | mu = c(0, 0.25),
325 | rep = 5,
326 | plot = FALSE
327 | )
328 | ```
329 |
330 | ### Analyse each replicate
331 |
332 | You can run analyses on the nested data by wrapping your analysis code in a function then using `map()` to run the analysis on each data set and `unnest()` to expand the results into a data table.
333 |
334 | ```{r}
335 | # define function
336 | analyse <- function(data) {
337 | t.test(data$W1a, data$W1b, paired = TRUE) %>% broom::tidy()
338 | }
339 |
340 | # get one test data set
341 | data <- dat_rep$data[[1]]
342 |
343 | # check function returns what you want
344 | analyse(data)
345 | ```
346 |
347 |
348 | ```{r}
349 | # run the function on each data set
350 | dat_rep |>
351 | mutate(analysis = map(data, analyse)) |>
352 | select(-data) |>
353 | unnest(analysis)
354 | ```
355 |
356 | ### ANOVA
357 |
358 | Use the same pattern to run an ANOVA on a version of the `pettime` dataset.
359 |
360 | First, simulate 100 datasets in long format. These data will have small main effects of pet and time, but no interaction.
361 |
362 | ```{r}
363 | pettime100 <- sim_design(
364 | between = list(pet = c("cat", "dog")),
365 | within = list(time = c("pre", "post")),
366 | n = c(cat = 50, dog = 40),
367 | mu = data.frame(
368 | pre = c(1, 1.2),
369 | post = c(1.2, 1.4),
370 | row.names = c("cat", "dog")
371 | ),
372 | sd = 1,
373 | id = "pet_id",
374 | dv = "score",
375 | r = 0.5,
376 | long = TRUE,
377 | rep = 100
378 | )
379 | ```
380 |
381 | Then set up your analysis. We'll use the `aov_ez()` function from the {afex} package because its arguments match those of `sim_design()`. There's a little setup to run first to get rid of annoying messages and make this run faster by omitting calculations we won't need.
382 |
383 | ```{r}
384 | afex::set_sum_contrasts() # avoids annoying afex message
385 | afex_options(include_aov = FALSE) # runs faster
386 | afex_options(es_aov = "pes") # changes effect size measure to partial eta squared
387 | ```
388 |
389 | This custom function takes the data frame as input and runs our ANOVA on it. The code at the end just cleans up the resulting table a bit.
390 |
391 | ```{r}
392 | analyse <- function(data) {
393 | a <- afex::aov_ez(
394 | id = "pet_id",
395 | dv = "score",
396 | between = "pet",
397 | within = "time",
398 | data = data
399 | )
400 | # return anova_table for GG-corrected DF
401 | as_tibble(a$anova_table, rownames = "term") |>
402 | mutate(term = factor(term, levels = term)) |> # keeps terms in order
403 | rename(p.value = `Pr(>F)`) # fixes annoying p.value name
404 | }
405 | ```
406 |
407 | Test the analysis code on the first simulated data frame.
408 |
409 | ```{r}
410 | analyse( pettime100$data[[1]] )
411 | ```
412 |
413 |
414 | Use the same code we used in the first example to make a table of the results of each analysis:
415 |
416 | ```{r}
417 | pettime_sim <- pettime100 |>
418 | mutate(analysis = map(data, analyse)) |>
419 | select(-data) |>
420 | unnest(analysis)
421 | ```
422 |
423 | ```{r, echo = FALSE}
424 | # show the first 6 rows
425 | head(pettime_sim) |>
426 | mutate(across(5:8, \(x) round(x, 3)))
427 | ```
428 |
429 | Then you can summarise the data to calculate things like power for each effect or mean effect size.
430 |
431 | ```{r}
432 | pettime_sim |>
433 | group_by(term) |>
434 | summarise(power = mean(p.value < 0.05),
435 | mean_pes = mean(pes) |> round(3),
436 | .groups = "drop")
437 | ```
438 |
439 | The power for the between-subjects effect of pet is smaller than for the within-subjects effect of time. What happens if you reduce the correlation between pre and post?
440 |
441 | ## Non-normal Distributions
442 |
443 | The newest version of faux has a new function for simulating non-normal distributions using the NORTA method (NORmal To Anything). The `dist` argument lists the variables with their distribution names (e.g., "norm", "pois", unif", "truncnorm", or anything that has an "rdist" function). The `params` argument lists the distribution function argument values for each variable (e.g., arguments to `rnorm`, `rpois`, `runif`, `rtruncnorm`).
444 |
445 | This function simulates multivariate non-normal distributions by using simulation to work out the correlations for a multivariate normal distribution that will produce the desired correlations after the normal distributions are converted to the desired distributions. This simulation can take a while if you have several variables and should warn you if you're requesting an impossible combination (but is still an experimental function, so let Lisa know if you have any problems).
446 |
447 | ```{r}
448 | dat_norta <- rmulti(
449 | n = 1000,
450 | dist = c(U = "unif",
451 | T = "truncnorm",
452 | L = "likert"),
453 | params = list(
454 | U = list(min = 0, max = 10),
455 | T = list(a = 1, b = 7, mean = 3.5, sd = 2.1),
456 | L = list(prob = c(`much less` = .10,
457 | `less` = .20,
458 | `equal` = .35,
459 | `more` = .25,
460 | `much more` = .10))
461 | ),
462 | r = c(-0.5, 0, 0.5)
463 | )
464 | ```
465 |
466 | The "likert" type is a set of distribution functions provided by faux to make creating Likert scale variables easier (see `?rlikert`). You may need to convert Likert-scale variables to numbers before analysis or calculating descriptives.
467 |
468 | ```{r}
469 | # convert likert-scale variable to integer
470 | dat_norta$L <- as.integer(dat_norta$L)
471 |
472 | get_params(dat_norta)
473 | ```
474 |
475 |
476 |
477 | ## Exercises
478 |
479 | ### Multivariate normal
480 |
481 | Sample 40 values of three variables named `J`, `K` and `L` from a population with means of 10, 20 and 30, and SDs of 5. `J` and `K` are correlated 0.5, `J` and `L` are correlated 0.25, and `K` and `L` are not correlated.
482 |
483 | ```{r}
484 |
485 | ```
486 |
487 | ### From existing data
488 |
489 | Using the data from the built-in dataset `attitude`, simulate a new set of 20 observations drawn from a population with the same means, SDs and correlations for each column as the original data.
490 |
491 | ```{r}
492 |
493 | ```
494 |
495 |
496 | ### 2b
497 |
498 | Create a dataset with a between-subject factor of "pet" having two levels, "cat", and "dog". The DV is "happiness" score. There are 20 cat-owners with a mean happiness score of 10 (SD = 3) and there are 30 dog-owners with a mean happiness score of 11 (SD = 3).
499 |
500 | ```{r}
501 |
502 | ```
503 |
504 | ### 3w
505 |
506 | Create a dataset of 20 observations with 1 within-subject variable ("condition") having 3 levels ("A", "B", "C") with means of 10, 20 and 30 and SD of 5. The correlations between each level have r = 0.4. The dataset should look like this:
507 |
508 | | id | condition | score |
509 | |:---|:----------|------:|
510 | |S01 | A | 9.17 |
511 | |... | ... | ... |
512 | |S20 | A | 11.57 |
513 | |S01 | B | 18.44 |
514 | |... | ... | ... |
515 | |S20 | B | 20.04 |
516 | |S01 | C | 35.11 |
517 | |... | ... | ... |
518 | |S20 | C | 29.16 |
519 |
520 | ```{r}
521 |
522 | ```
523 |
524 | ### 2w*2w
525 |
526 | Create a dataset with 50 subjects and 2 within-subject variables ("W1" and "W2") each having 2 levels. The mean for all cells is 10 and the SD is 2. The correlations look like this:
527 |
528 | | | W1a_W2a | W1a_W2b | W1b_W2a | W1b_W2b |
529 | |:--------|------:|------:|------:|------:|
530 | | W1a_W2a | 1.0 | 0.5 | 0.5 | 0.2 |
531 | | W1a_W2b | 0.5 | 1.0 | 0.2 | 0.5 |
532 | | W1b_W2a | 0.5 | 0.2 | 1.0 | 0.5 |
533 | | W1b_W2b | 0.2 | 0.5 | 0.5 | 1.0 |
534 |
535 |
536 | ```{r}
537 |
538 | ```
539 |
540 | ### 2w*3b
541 |
542 | Create a dataset with a between-subject factor of "pet" having 3 levels ("cat", "dog", and "ferret") and a within-subject factor of "time" having 2 levels ("pre" and "post"). The N in each group should be 10. Means are:
543 |
544 | * cats: pre = 10, post = 12
545 | * dogs: pre = 14, post = 16
546 | * ferrets: pre = 18, post = 20
547 |
548 | SDs are all 5 and within-cell correlations are all 0.25.
549 |
550 | ```{r}
551 |
552 | ```
553 |
554 | ### Replications
555 |
556 | Create 5 datasets with a 2b*2b design, 30 participants in each cell. Each cell's mean should be 0, except B1a:B2a, which should be 0.5. The SD should be 1. Make the resulting data in long format.
557 |
558 | ```{r}
559 |
560 | ```
561 |
562 | ### Power
563 |
564 | Simulate 100 datasets like the one above and use `lm()` or `afex::aov_ez()` to look at the interaction between B1 and B2. What is the power of this design?
565 |
566 | ```{r}
567 |
568 | ```
569 |
570 |
--------------------------------------------------------------------------------
/inst/stubs/fixed-stub.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Fixed Effects"
3 | output:
4 | html_document:
5 | df_print: kable
6 | ---
7 |
8 | ```{r, include = FALSE}
9 | knitr::opts_chunk$set(
10 | collapse = TRUE,
11 | out.width = "100%",
12 | fig.width = 5,
13 | fig.height = 3,
14 | dpi = 144
15 | )
16 | ```
17 |
18 | ```{r libs, message=FALSE}
19 | library(tidyverse)
20 | library(faux)
21 | library(afex) # for anova and lmer
22 | library(broom)
23 | library(broom.mixed) # to make tidy tables of lmer output
24 |
25 | theme_set(theme_minimal(base_size = 14))
26 | ```
27 |
28 |
29 | ## Simulation functions
30 |
31 | The functions below are commonly used when you're setting up a simulated dataset.
32 |
33 | ### Repeating
34 |
35 | The function `rep()` lets you repeat the first argument a number of times.
36 |
37 | Use `rep()` to create a vector of alternating `"A"` and `"B"` values of length 24.
38 |
39 | ```{r rep1-times}
40 | rep(c("A", "B"), times = 12)
41 | ```
42 |
43 | If the second argument is a vector that is the same length as the first argument, each element in the first vector is repeated that many times. Use `rep()` to create a vector of 11 `"A"` values followed by 3 `"B"` values.
44 |
45 | ```{r rep-vector}
46 | rep(c("A", "B"), c(11, 3))
47 | ```
48 |
49 | You can repeat each element of the vector a specified number of times using the `each` argument, Use `rep()` to create a vector of 12 `"A"` values followed by 12 `"B"` values.
50 |
51 | ```{r rep-each}
52 | rep(c("A", "B"), each = 12)
53 | ```
54 |
55 | What do you think will happen if you set `times` to 3 and `each` to 2?
56 |
57 | ```{r rep-times-each}
58 | rep(c("A", "B"), times = 3, each = 2)
59 | ```
60 |
61 |
62 | ### Sequences
63 |
64 | The function `seq()` is useful for generating a sequence of numbers with some pattern.
65 |
66 | Use `seq()` to create a vector of the integers 0 to 10.
67 |
68 | ```{r seq1-10}
69 | seq(0, 10)
70 | ```
71 |
72 | You can set the `by` argument to count by numbers other than 1 (the default). Use `seq()` to create a vector of the numbers 0 to 100 by 10s.
73 |
74 | ```{r seq-by}
75 | seq(0, 100, by = 10)
76 | ```
77 |
78 | The argument `length.out` is useful if you know how many steps you want to divide something into. Use `seq()` to create a vector that starts with 0, ends with 100, and has 12 equally spaced steps (hint: how many numbers would be in a vector with 2 *steps*?).
79 |
80 | ```{r seq-length-out}
81 | seq(0, 100, length.out = 13)
82 | ```
83 |
84 | ### Uniform Distribution
85 |
86 | The uniform distribution is the simplest distribution. All numbers in the range have an equal probability of being sampled. Use `runif()` to sample from a continuous uniform distribution.
87 |
88 | ```{r runif}
89 | runif(n = 10, min = 0, max = 1)
90 | ```
91 |
92 |
93 | Pipe the result to `hist()` to make a quick histogram of your simulated data.
94 |
95 | ```{r runif-hist}
96 | runif(100000, min = 0, max = 1) %>% hist()
97 | ```
98 |
99 | ### Discrete Distribution
100 |
101 | You can use `sample()` to simulate events like rolling dice or choosing from a deck of cards. The code below simulates rolling a 6-sided die 10000 times. We set `replace` to `TRUE` so that each event is independent. See what happens if you set `replace` to `FALSE`.
102 |
103 | ```{r sample-replace, fig.cap = "Distribution of dice rolls."}
104 | rolls <- sample(1:6, 10000, replace = TRUE)
105 |
106 | # plot the results
107 | as.factor(rolls) %>% plot()
108 | ```
109 |
110 | You can also use sample to sample from a list of named outcomes.
111 |
112 | ```{r sample-list}
113 | pet_types <- c("cat", "dog", "ferret", "bird", "fish")
114 | sample(pet_types, 10, replace = TRUE)
115 | ```
116 |
117 | Ferrets, while the best pet, are a much less common pet than cats and dogs, so our sample isn't very realistic. You can set the probabilities of each item in the list with the `prob` argument.
118 |
119 | ```{r sample-prob}
120 | pet_types <- c("cat", "dog", "ferret", "bird", "fish")
121 | pet_prob <- c(0.3, 0.4, 0.1, 0.1, 0.1)
122 | pet_data <- sample(pet_types, 100, replace = TRUE, prob = pet_prob)
123 |
124 | as.factor(pet_data) %>% plot()
125 | ```
126 |
127 |
128 | ### Binomial Distribution
129 |
130 | The `rbinom` function will generate a random binomial distribution.
131 |
132 | * `n` = number of observations
133 | * `size` = number of trials
134 | * `prob` = probability of success on each trial
135 |
136 | Coin flips are a typical example of a binomial distribution, where we can assign heads to 1 and tails to 0.
137 |
138 | ```{r rbinom-fair}
139 | # 20 individual coin flips of a fair coin
140 | rbinom(20, 1, 0.5)
141 | ```
142 |
143 |
144 | ```{r rbinom-bias}
145 | # 20 individual coin flips of a baised (0.75) coin
146 | rbinom(20, 1, 0.75)
147 | ```
148 |
149 | You can generate the total number of heads in 1 set of 20 coin flips by setting `size` to 20 and `n` to 1.
150 |
151 | ```{r rbinom-size}
152 | # 1 set of 20 fair coin flips
153 | rbinom(1, 20, 0.75)
154 | ```
155 |
156 | You can generate more sets of 20 coin flips by increasing the `n`.
157 |
158 | ```{r rbinom-n}
159 | # 10 sets of 20 fair coin flips
160 | rbinom(10, 20, 0.5)
161 | ```
162 |
163 | ### Normal Distribution
164 |
165 | We can simulate a normal distribution of size `n` if we know the `mean` and standard deviation (`sd`).
166 |
167 | ```{r rnorm}
168 | # 10 samples from a normal distribution with a mean of 0 and SD of 1
169 | rnorm(10, 0, 1)
170 | ```
171 |
172 | A density plot is usually the best way to visualise this type of data.
173 |
174 | ```{r rnorm-plot}
175 | # 100 samples from a normal distribution with a mean of 10 and SD of 2
176 | dv <- rnorm(100, 10, 2)
177 |
178 | # use sample to get a random colour
179 | fill_colour <- sample(colours(), 1)
180 |
181 | ggplot() +
182 | geom_density(aes(dv), fill = fill_colour) +
183 | scale_x_continuous(
184 | limits = c(0,20),
185 | breaks = seq(0,20)
186 | )
187 | ```
188 |
189 | Run the simulation above several times, noting how the density plot changes. Try changing the values of `n`, `mean`, and `sd`.
190 |
191 | ## Independent samples
192 |
193 | Now we're ready to start simulating some data. Let's start with a simple independent-samples design where the variables are from a normal distribution. Each subject produces one score (in condition A or B). What we need to know about these scores is:
194 |
195 | * How many subjects are in each condition?
196 | * What are the score means?
197 | * What are the score variances (or SDs)?
198 |
199 | ### Parameters
200 |
201 | First, set parameters for these values. This way, you can use these variables wherever you need them in the rest of the code and you can easily change them.
202 |
203 | ```{r ind-vars}
204 |
205 | A_sub_n <- 50
206 | B_sub_n <- 50
207 | A_mean <- 10
208 | B_mean <- 11
209 | A_sd <- 2.5
210 | B_sd <- 2.5
211 |
212 | ```
213 |
214 | ### Scores
215 |
216 | We can the generate the scores using the `rnorm()` function.
217 |
218 | ```{r ind-dat}
219 | A_scores <- rnorm(A_sub_n, A_mean, A_sd)
220 | B_scores <- rnorm(B_sub_n, B_mean, B_sd)
221 | ```
222 |
223 | You can stop here and just analyse your simulated data with `t.test(A_scores, B_scores)`, but usually you want to get your simulated data into a data table that looks like what you might eventually import from a CSV file with your actual experimental data.
224 |
225 | ```{r ind-tibble}
226 | dat <- tibble(
227 | sub_condition = rep( c("A", "B"), c(A_sub_n, B_sub_n) ),
228 | score = c(A_scores, B_scores)
229 | )
230 | ```
231 |
232 | If you're simulating data for a script where you will eventually import data from a csv file, you can save these data to a csv file and then re-read them in, so when you get your real data, all you need to do is comment out the simulation steps.
233 |
234 | ```{r}
235 | # make a data directory if there isn't one already
236 | if (!dir.exists("data")) dir.create("data")
237 |
238 | # save your simulated data
239 | write_csv(dat, "data/sim-data-ind-samples.csv")
240 |
241 | # start your analysis here
242 | dat <- read_csv("data/sim-data-ind-samples.csv")
243 |
244 | ```
245 |
246 |
247 | ### Check your data
248 |
249 | Always examine your simulated data after you generate it to make sure it looks like you want.
250 |
251 | ```{r ind-check}
252 | dat %>%
253 | group_by(sub_condition) %>%
254 | summarise(n = n() ,
255 | mean = mean(score),
256 | sd = sd(score),
257 | .groups = "drop")
258 | ```
259 |
260 |
261 | ### Analysis
262 |
263 | ```{r ind-test}
264 | t.test(score~sub_condition, dat)
265 | ```
266 |
267 | ### Function
268 |
269 | You can wrap all this in a function so you can run it many times to do a power calculation. Put all your parameters as arguments to the function.
270 |
271 | ```{r ind-func}
272 |
273 | ind_sim <- function(A_sub_n, B_sub_n,
274 | A_mean, B_mean,
275 | A_sd, B_sd) {
276 | # simulate data for groups A and B
277 | A_scores <- rnorm(A_sub_n, A_mean, A_sd)
278 | B_scores <- rnorm(B_sub_n, B_mean, B_sd)
279 |
280 | # put the data into a table
281 | dat <- tibble(
282 | sub_condition = rep( c("A", "B"), c(A_sub_n, B_sub_n) ),
283 | score = c(A_scores, B_scores)
284 | )
285 |
286 | # analyse the data
287 | t <- t.test(score~sub_condition, dat)
288 |
289 | # return a list of the values you care about
290 | # the double brackets ([[]]) get rid of the name of named numbers
291 | list(
292 | t = t$statistic[[1]],
293 | ci_lower = t$conf.int[[1]],
294 | ci_upper = t$conf.int[[2]],
295 | p = t$p.value[[1]],
296 | estimate = t$estimate[[1]] - t$estimate[[2]]
297 | )
298 | }
299 |
300 | ```
301 |
302 | Now run your new function with the values you used above.
303 |
304 | ```{r}
305 | # str() prints the resulting list in a shorter format
306 | ind_sim(50, 50, 10, 11, 2.5, 2.5) %>% str()
307 | ```
308 |
309 | Now you can use this function to run many simulations. The function `map_df` from the `purrr` package (loaded with `tidyverse`) is one of many ways to run a function many times and organise the results into a table.
310 |
311 | ```{r}
312 | mysim <- map_df(1:1000, ~ind_sim(50, 50, 10, 11, 2.5, 2.5))
313 | ```
314 |
315 | Now you can graph the data from your simulations.
316 |
317 | ```{r sim-p-fig}
318 | # set boundary = 0 when plotting p-values
319 | ggplot(mysim, aes(p)) +
320 | geom_histogram(binwidth = 0.05, boundary = 0,
321 | fill = "white", colour = "black")
322 | ```
323 |
324 |
325 | ```{r ind-sim-fig, fig.cap = "Distribution of results from simulated independent samples data"}
326 | mysim %>%
327 | gather(stat, value, t:estimate) %>%
328 | ggplot() +
329 | geom_density(aes(value, color = stat), show.legend = FALSE) +
330 | facet_wrap(~stat, scales = "free")
331 | ```
332 |
333 | You can calculate power as the proportion of simulations on which the p-value was less than your alpha.
334 |
335 | ```{r}
336 | alpha <- 0.05
337 | power <- mean(mysim$p < alpha)
338 | power
339 | ```
340 |
341 |
342 |
343 | ## Paired samples
344 |
345 | Now let's try a paired-samples design where the variables are from a normal distribution. Each subject produces two scores (in conditions A and B). What we need to know about these two scores is:
346 |
347 | * How many subjects?
348 | * What are the score means?
349 | * What are the score variances (or SDs)?
350 | * What is the correlation between the scores?
351 |
352 | ### Parameters {#paired-params}
353 |
354 | ```{r paired-vars}
355 |
356 | sub_n <- 100
357 | A_mean <- 10
358 | B_mean <- 11
359 | A_sd <- 2.5
360 | B_sd <- 2.5
361 | AB_r <- 0.5
362 |
363 | ```
364 |
365 |
366 | ### Correlated Scores
367 |
368 | You can then use `rnorm_multi()` to generate a data table with simulated values for correlated scores:
369 |
370 | ```{r sim-design}
371 | dat <- faux::rnorm_multi(
372 | n = sub_n,
373 | vars = 2,
374 | r = AB_r,
375 | mu = c(A_mean, B_mean),
376 | sd = c(A_sd, B_sd),
377 | varnames = c("A", "B")
378 | )
379 | ```
380 |
381 | You can also do this using the `MASS::mvrnorm` function, but `faux::rnorm_multi` is easier when you have more variables to simulate.
382 |
383 | ```{r}
384 | # make the correlation matrix
385 | cormat <- matrix(c( 1, AB_r,
386 | AB_r, 1),
387 | nrow = 2, byrow = TRUE)
388 |
389 | # make a corresponding matrix of the variance
390 | # (multiply the SDs for each cell)
391 | varmat <- matrix(c(A_sd * A_sd, A_sd * B_sd,
392 | A_sd * B_sd, B_sd * B_sd),
393 | nrow = 2, byrow = TRUE)
394 |
395 | # create correlated variables with the specified parameters
396 | S <- MASS::mvrnorm(n = sub_n,
397 | mu = c(A_mean, B_mean),
398 | Sigma = cormat * varmat)
399 | dat <- data.frame(
400 | A = S[, 1],
401 | B = S[, 2]
402 | )
403 |
404 | ```
405 |
406 |
407 | ### Check your data
408 |
409 | Now check your data; `faux` has a function `get_params()` that gives you the correlation table, means, and SDs for each numeric column in a data table.
410 |
411 | ```{r paired-check}
412 | faux::get_params(dat)
413 | ```
414 |
415 | ### Analysis
416 |
417 | ```{r paired-test}
418 | # paired-samples t-test
419 | t.test(dat$A, dat$B, paired = TRUE)
420 | ```
421 |
422 | ### Function
423 |
424 | ```{r paired-func}
425 |
426 | paired_sim <- function(sub_n, A_mean, B_mean, A_sd, B_sd, AB_r) {
427 |
428 | dat <- faux::rnorm_multi(
429 | n = sub_n,
430 | vars = 2,
431 | r = AB_r,
432 | mu = c(A_mean, B_mean),
433 | sd = c(A_sd, B_sd),
434 | varnames = c("A", "B")
435 | )
436 | t <- t.test(dat$A, dat$B, paired = TRUE)
437 |
438 | # return just the values you care about
439 | list(
440 | t = t$statistic[[1]],
441 | ci_lower = t$conf.int[[1]],
442 | ci_upper = t$conf.int[[2]],
443 | p = t$p.value[[1]],
444 | estimate = t$estimate[[1]]
445 | )
446 | }
447 |
448 | ```
449 |
450 | Run 1000 simulations and graph the results.
451 |
452 | ```{r}
453 | mysim_p <- map_df(1:1000, ~paired_sim(100, 10, 11, 2.5, 2.5, .5))
454 | ```
455 |
456 | ```{r pair-sim-fig, fig.cap = "Distribution of results from simulated paired samples data"}
457 | mysim_p %>%
458 | gather(stat, value, t:estimate) %>%
459 | ggplot() +
460 | geom_density(aes(value, color = stat), show.legend = FALSE) +
461 | facet_wrap(~stat, scales = "free")
462 | ```
463 |
464 | ```{r}
465 | alpha <- 0.05
466 | power <- mean(mysim_p$p < alpha)
467 | power
468 | ```
469 |
470 |
471 | ## Intercept model
472 |
473 | Now I'm going to show you a different way to simulate the same design. This might seem excessively complicated, but you will need this pattern when you start simulating data for mixed effects models.
474 |
475 | ### Parameters
476 |
477 | Remember, we used the following parameters to set up our simulation above:
478 |
479 | ```{r paired-vars2}
480 | sub_n <- 100
481 | A_mean <- 10
482 | B_mean <- 11
483 | A_sd <- 2.5
484 | B_sd <- 2.5
485 | AB_r <- 0.5
486 | ```
487 |
488 | From these, we can calculate the grand intercept (the overall mean regardless of condition), and the effect of condition (the mean of B minus A).
489 |
490 | ```{r}
491 | grand_i <- (A_mean + B_mean)/2
492 | AB_effect <- B_mean - A_mean
493 | ```
494 |
495 | We also need to think about variance a little differently. First, calculate the pooled variance as the mean of the variances for A and B (remember, variance is SD squared).
496 |
497 | ```{r}
498 | pooled_var <- (A_sd^2 + B_sd^2)/2
499 | ```
500 |
501 | The variance of the subject intercepts is `r` times this pooled variance and the error variance is what is left over. We take the square root (`sqrt()`) to set the subject intercept and error SDs for simulation later.
502 |
503 | ```{r}
504 | sub_sd <- sqrt(pooled_var * AB_r)
505 | error_sd <- sqrt(pooled_var * (1-AB_r))
506 | ```
507 |
508 |
509 | ### Subject intercepts
510 |
511 | Now we use these variables to create a data table for our subjects. Each subject gets an ID and a **random intercept** (`sub_i`). The intercept is simulated from a random normal distribution with a mean of 0 and an SD of `sub_sd`. This represents how much higher or lower than the average score each subject tends to be (regardless of condition).
512 |
513 | ```{r}
514 | sub <- tibble(
515 | sub_id = 1:sub_n,
516 | sub_i = rnorm(sub_n, 0, sub_sd)
517 | )
518 | ```
519 |
520 | ### Observations
521 |
522 | Next, set up a table where each row represents one observation. We'll use one of my favourite functions for simulation: `crossing()`. This creates every possible combination of the listed factors (it works the same as `expand.grid()`, but the results are in a more intuitive order). Here, we're using it to create a row for each subject in each condition, since this is a fully within-subjects design.
523 |
524 | ```{r}
525 | obs <- crossing(
526 | sub_id = 1:sub_n,
527 | condition = c("A", "B")
528 | )
529 | ```
530 |
531 | ### Calculate the score
532 |
533 | Next, we join the subject table so each row has the information about the subject's random intercept and then calculate the score. I've done it in a few steps below for clarity. The score is just the sum of:
534 |
535 | * the overall mean (`grand_i`)
536 | * the subject-specific intercept (`sub_i`)
537 | * the effect (`effect`): the numeric code for condition (`condition.e`) multiplied by the effect of condition (`AB_effect`)
538 | * the error term (simulated from a normal distribution with mean of 0 and SD of `error_sd`)
539 |
540 | ```{r im-data}
541 | dat <- obs %>%
542 | left_join(sub, by = "sub_id") %>%
543 | mutate(
544 | condition.e = recode(condition, "A" = -0.5, "B" = 0.5),
545 | effect = AB_effect * condition.e,
546 | error = rnorm(nrow(.), 0, error_sd),
547 | score = grand_i + sub_i + effect + error
548 | )
549 | ```
550 |
551 | Use `get_params` to check the data. With data in long format, you need to specify the columns that contain the id, dv, and within-id variables.
552 |
553 | ```{r im-get-params}
554 | # check the data
555 | faux::get_params(dat,
556 | id = "sub_id",
557 | dv = "score",
558 | within = "condition")
559 | ```
560 |
561 | You can use the following code to put the data table into a more familiar "wide" format.
562 |
563 | ```{r im-wide}
564 | dat_wide <- dat %>%
565 | select(sub_id, condition, score) %>%
566 | spread(condition, score)
567 | ```
568 |
569 | ### Analyses
570 |
571 | You can analyse the data with a paired-samples t-test from the wide format:
572 |
573 | ```{r im-wide-t}
574 | # paired-samples t-test from dat_wide
575 | t.test(dat_wide$A, dat_wide$B, paired = TRUE)
576 | ```
577 |
578 | Or in the long format:
579 |
580 | ```{r im-long-t}
581 | # paired-samples t-test from dat (long)
582 | t.test(score ~ condition, dat, paired = TRUE)
583 | ```
584 |
585 | You can analyse the data with ANOVA using the `aov_4()` function from `afex`. (Notice how the F-value is the square of the t-value above.)
586 |
587 | ```{r im-afex}
588 | # anova using afex::aov_4
589 | aov <- afex::aov_4(score ~ (condition | sub_id), data = dat)
590 |
591 | aov$anova_table
592 | ```
593 |
594 |
595 | You can even analyse the data with a mixed effects model using the `lmer` function (the `afex` version gives you p-values, but the `lme4` version does not).
596 |
597 | ```{r im-lmer}
598 | # mixed effect model using afex::lmer
599 | lmem <- afex::lmer(score ~ condition.e + (1 | sub_id), data = dat)
600 |
601 | # displays a tidy table of the fixed effects
602 | broom.mixed::tidy(lmem, effects = "fixed")
603 | ```
604 |
605 | ## Simulate a dataset from an analysis
606 |
607 | Simulate a dataset from the parameters of an analysis. We'll use the built-in dataset `mtcars` to predict miles per gallon (`mpg`) from transmission type (`am`) and engine type (`vs`).
608 |
609 | ```{r}
610 | model <- lm(mpg ~ am * vs, data = mtcars)
611 | broom::tidy(model)
612 | ```
613 |
614 | ### Simulate
615 |
616 | We can now simulate a dataset with 50 observations from each transmission type (`am`) and engine type (`vs`) combination, then use the model parameters to generate predicted values for `mpg`.
617 |
618 | ```{r}
619 | err_sd <- sigma(model) # SD of the error term from the model
620 | fx <- coefficients(model) # fixed effect coefficients
621 |
622 | sim_mtcars <- tibble(
623 | am = rep(c(0, 0, 1, 1), each = 50),
624 | vs = rep(c(0, 1, 0, 1), each = 50)
625 | ) %>%
626 | mutate(err = rnorm(200, 0, err_sd),
627 | mpg = fx[1] +
628 | fx["am"]*am +
629 | fx["vs"]*vs +
630 | fx["am:vs"]*am*vs + err)
631 |
632 | ```
633 |
634 | Analyse the simulated data with `lm()` and output the results as a table using `broom::tidy()`
635 |
636 | ```{r}
637 | sim_model <- lm(mpg ~ am * vs, data = sim_mtcars)
638 | broom::tidy(sim_model)
639 | ```
640 |
641 | ### Function
642 |
643 | ```{r}
644 | carsim <- function(n, b0, b_am, b_vs, b_am_vs, err_sd) {
645 | sim_mtcars <- tibble(
646 | am = rep(c(0, 0, 1, 1), each = n),
647 | vs = rep(c(0, 1, 0, 1), each = n)
648 | ) %>%
649 | mutate(err = rnorm(n*4, 0, err_sd),
650 | mpg = b0 + b_am*am + b_vs*vs + b_am_vs*am*vs + err)
651 |
652 | sim_model <- lm(mpg ~ am * vs, data = sim_mtcars)
653 | broom::tidy(sim_model)
654 | }
655 | ```
656 |
657 |
658 | Run the function with the values from the original model, but cut the fixed effect sizes in half.
659 |
660 | ```{r}
661 | err_sd <- sigma(model)
662 | fx2 <- coefficients(model)/2
663 |
664 | carsim(50, fx2[1], fx2[2], fx2[3], fx2[4], err_sd)
665 | ```
666 |
667 | Repeat this 100 time and calculate power for each effect.
668 |
669 | ```{r}
670 | simstats <- map_df(1:100, ~carsim(50, fx2[1], fx2[2], fx2[3], fx2[4], err_sd))
671 |
672 | simstats %>%
673 | group_by(term) %>%
674 | summarise(power = mean(p.value < .05), .groups = "drop")
675 | ```
676 |
677 | ## Exercises
678 |
679 | Using the dataset below, predict `moral` disgust from the interaction between `pathogen` and `sexual` disgust using `lm()`.
680 |
681 | ```{r}
682 | disgust <- read_csv("https://psyteachr.github.io/msc-data-skills/data/disgust_scores.csv")
683 | ```
684 |
685 |
686 | ```{r}
687 |
688 | ```
689 |
690 | Simulate a new dataset of 100 people with a similar pathogen and sexual disgust distribution to the original dataset. Remember that these are likely to be correlated and that scores can only range from 0 to 6. (Hint: look at the help for `norm2trunc`)
691 |
692 | ```{r}
693 |
694 | ```
695 |
696 | Write a function to simulate data, analyse it, and return a table of results. Make sure you can vary the important parameters using arguments.
697 |
698 | ```{r}
699 |
700 | ```
701 |
702 |
703 | Calculate power for the same fixed effects as in the original analysis. Adjust the N until the dsign has around .80 power to detect a main effect of pathogen disgust.
704 |
705 | ```{r}
706 |
707 | ```
708 |
709 |
--------------------------------------------------------------------------------
/man/exercise.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/exercises.R
3 | \name{exercise}
4 | \alias{exercise}
5 | \title{Get an exercise}
6 | \usage{
7 | exercise(name = c("faux", "fixed", "mixed", "calories"), filename = NULL)
8 | }
9 | \arguments{
10 | \item{name}{The name of the exercise}
11 |
12 | \item{filename}{What filename you want to save (defaults to the name of the exercise in the working directory)}
13 | }
14 | \value{
15 | Saves a file to the working directory (or path from filename)
16 | }
17 | \description{
18 | Get an exercise
19 | }
20 | \examples{
21 | \dontrun{
22 | exercise("faux") # get exercise for the faux workshop
23 | exercise("fixed", "exercises/fixed.Rmd") # save into exercises directory
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/debruine/data-sim-workshops/9bd222cd3e1c988e30d005a2d80cf57f50aa8994/man/figures/logo.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/debruine/data-sim-workshops/9bd222cd3e1c988e30d005a2d80cf57f50aa8994/pkgdown/favicon/apple-touch-icon-120x120.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/debruine/data-sim-workshops/9bd222cd3e1c988e30d005a2d80cf57f50aa8994/pkgdown/favicon/apple-touch-icon-152x152.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/debruine/data-sim-workshops/9bd222cd3e1c988e30d005a2d80cf57f50aa8994/pkgdown/favicon/apple-touch-icon-180x180.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/debruine/data-sim-workshops/9bd222cd3e1c988e30d005a2d80cf57f50aa8994/pkgdown/favicon/apple-touch-icon-60x60.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/debruine/data-sim-workshops/9bd222cd3e1c988e30d005a2d80cf57f50aa8994/pkgdown/favicon/apple-touch-icon-76x76.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/debruine/data-sim-workshops/9bd222cd3e1c988e30d005a2d80cf57f50aa8994/pkgdown/favicon/apple-touch-icon.png
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/debruine/data-sim-workshops/9bd222cd3e1c988e30d005a2d80cf57f50aa8994/pkgdown/favicon/favicon-16x16.png
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/debruine/data-sim-workshops/9bd222cd3e1c988e30d005a2d80cf57f50aa8994/pkgdown/favicon/favicon-32x32.png
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/debruine/data-sim-workshops/9bd222cd3e1c988e30d005a2d80cf57f50aa8994/pkgdown/favicon/favicon.ico
--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 |
--------------------------------------------------------------------------------
/vignettes/calories.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Calorie Placement Re-Simulation"
3 | author: "Lisa DeBruine"
4 | output:
5 | rmarkdown::html_document:
6 | df_print: paged
7 | vignette: >
8 | %\VignetteIndexEntry{Calorie Placement Re-Simulation}
9 | %\VignetteEngine{knitr::rmarkdown}
10 | %\VignetteEncoding{UTF-8}
11 | ---
12 |
13 | ```{r setup, include=FALSE}
14 | knitr::opts_chunk$set(echo = TRUE)
15 |
16 | library(tidyverse)
17 | library(faux)
18 | library(afex)
19 | library(emmeans)
20 | faux_options(plot = FALSE)
21 |
22 | set.seed(8675309)
23 | ```
24 |
25 | ## Data Source
26 |
27 | We will be replicating some of the re-analyses in Francis & Thunell's (2020) Meta-Psychology paper: Excess success in "Don't count calorie labeling out: Calorie counts on the left side of menu items lead to lower calorie food choices".
28 |
29 | They ran power analyses for all 6 studies in Dallas, Liu, and Ubel's (2019) study showing that people order food with significantly fewer calories when the calorie count was placed to the left of the item than to the right (or having no calorie label). They then used these power estimates to calculate the probability of all 6 out of 6 studies being significant, given the observed power of each study.
30 |
31 | * [Re-analysis](https://doi.org/10.15626/MP.2019.2266)
32 | * [Re-analysis code](https://osf.io/xrdhj/)
33 | * [Original paper](https://doi.org/10.1002/jcpy.1053)
34 |
35 | Table 1 of the re-analysis paper provides all of the parameters we will need.
36 |
37 | ## Reanalyses
38 |
39 | ### Study 2
40 |
41 | We'll start with S2 because the analysis is very straightforward. It's a between-subjects design, where 143 subjects saw calorie placement on the left and their mean calories ordered were 1249.83 (SD = 449.07), while 132 subjects saw calorie placement on the right and their mean calories ordered were 1362.31 (SD = 447.35).
42 |
43 | Let's first simulate a single data table with these parameters and set up our analysis.
44 |
45 | ```{r}
46 | data <- sim_design(
47 | between = list(placement = c("left", "right")),
48 | mu = c(left = 1249.83, right = 1362.31),
49 | sd = c(left = 449.07, right = 447.35),
50 | n = c(left = 143, right = 132),
51 | dv = "calories"
52 | )
53 | ```
54 |
55 | Wrap the analysis in a function using the `tidy()` function from {broom} to get the results in a tidy table. Check that it works by running it on the single data set above.
56 |
57 | ```{r}
58 | s2_analyse <- function(data) {
59 | t.test(calories ~ placement, data = data) |>
60 | broom::tidy()
61 | }
62 |
63 | s2_analyse(data)
64 | ```
65 |
66 |
67 | Now, simulate the data 500 times.
68 |
69 | ```{r}
70 | s2 <- sim_design(
71 | between = list(placement = c("left", "right")),
72 | mu = c(left = 1249.83, right = 1362.31),
73 | sd = c(left = 449.07, right = 447.35),
74 | n = c(left = 143, right = 132),
75 | dv = "calories",
76 | rep = 500
77 | )
78 | ```
79 |
80 | Run the analysis on each data set.
81 |
82 | ```{r}
83 | s2_sim <- s2 |>
84 | mutate(analysis = map(data, s2_analyse)) |>
85 | select(-data) |>
86 | unnest(analysis)
87 |
88 | head(s2_sim)
89 | ```
90 |
91 | Summarise the `p.value` column to get power.
92 |
93 | ```{r}
94 | s2_power <- s2_sim |>
95 | mutate(sig = p.value < .05) |>
96 | summarise(power = mean(sig)) |>
97 | pull(power)
98 | ```
99 |
100 | Compare this value (`r s2_power`) with the value in the paper (0.5426).
101 |
102 | ### Study 1
103 |
104 | Study 1 is a little more complicated because the design includes a "no label" condition, so the decision rule for supporting the hypothesis is more complicated.
105 |
106 | The data simulation is relatively straightforward, though.
107 |
108 | ```{r}
109 | data <- sim_design(
110 | between = list(placement = c("left", "right", "none")),
111 | mu = c(654.53, 865.41, 914.34),
112 | sd = c(390.45, 517.26, 560.94),
113 | n = c(45, 54, 50),
114 | dv = "calories"
115 | )
116 | ```
117 |
118 | Set up the analysis. Here, we really just care about three p-values, so we'll just return those. We can use a function from the {emmeans} package to check the two pairwise comparisons.
119 |
120 | ```{r}
121 | afex::set_sum_contrasts() # avoids annoying afex message on each run
122 | afex_options(include_aov = TRUE) # we need aov for lsmeans
123 |
124 | s1_analyse <- function(data) {
125 | # main effect of placement
126 | a <- afex::aov_ez(
127 | id = "id",
128 | dv = "calories",
129 | between = "placement",
130 | data = data
131 | )
132 |
133 | # contrasts
134 | e <- emmeans(a, "placement")
135 | c1 <- list(lr = c(-0.5, 0.5, 0),
136 | ln = c(-0.5, 0, 0.5))
137 | b <- contrast(e, c1, adjust = "holm") |>
138 | broom::tidy()
139 |
140 | data.frame(
141 | p_all = a$anova_table$`Pr(>F)`[[1]],
142 | p_1 = b$adj.p.value[[1]],
143 | p_2 = b$adj.p.value[[2]]
144 | )
145 | }
146 |
147 | s1_analyse(data)
148 | ```
149 |
150 | Let's just replicate this 100 times so the simulation doesn't take too long to run at first. We can always increase it later after we've run some sense checks.
151 |
152 | ```{r}
153 | s1 <- sim_design(
154 | between = list(placement = c("left", "right", "none")),
155 | mu = c(654.53, 865.41, 914.34),
156 | sd = c(390.45, 517.26, 560.94),
157 | n = c(45, 54, 50),
158 | dv = "calories",
159 | rep = 100
160 | )
161 | ```
162 |
163 | Run the analysis on each data set.
164 |
165 | ```{r}
166 | s1_sim <- s1 |>
167 | mutate(analysis = map(data, s1_analyse)) |>
168 | select(-data) |>
169 | unnest(analysis)
170 |
171 | head(s1_sim)
172 | ```
173 |
174 | Calculating power is a little trickier here, as all three p-values need to be significant here to support the hypothesis.
175 |
176 | ```{r}
177 | s1_power <- s1_sim |>
178 | mutate(sig = (p_all < .05) &
179 | (p_1 < .05) &
180 | (p_2 < .05) ) |>
181 | summarise(power = mean(sig)) |>
182 | pull(power)
183 | ```
184 |
185 | Compare this value (`r s1_power`) with the value in the paper (0.4582).
186 |
187 | ### Study 3
188 |
189 | Now you can use the pattern from Study 1 to analyse the data for Study 3. We'll start with the repeated data set.
190 |
191 | ```{r}
192 | s3 <- sim_design(
193 | between = list(placement = c("left", "right", "none")),
194 | mu = c(1428.24, 1308.66, 1436.79),
195 | sd = c(377.02, 420.14, 378.47),
196 | n = c(85, 86, 81),
197 | dv = "calories",
198 | rep = 100
199 | )
200 | ```
201 |
202 | These data were collected in the Hebrew language, which reads right to left, so the paired contrasts will be different.
203 |
204 | ```{r}
205 | s3_analyse <- function(data) {
206 | # main effect of placement
207 | a <- afex::aov_ez(
208 | id = "id",
209 | dv = "calories",
210 | between = "placement",
211 | data = data
212 | )
213 |
214 | # contrasts (reversed)
215 | e <- emmeans(a, "placement")
216 | c1 <- list(rl = c(0.5, -0.5, 0),
217 | ln = c(0, -0.5, 0.5))
218 | b <- contrast(e, c1, adjust = "holm") |>
219 | broom::tidy()
220 |
221 | data.frame(
222 | p_all = a$anova_table$`Pr(>F)`[[1]],
223 | p_1 = b$adj.p.value[[1]],
224 | p_2 = b$adj.p.value[[2]]
225 | )
226 | }
227 | ```
228 |
229 |
230 | Run the analysis on each data set.
231 |
232 | ```{r}
233 | s3_sim <- s3 |>
234 | mutate(analysis = map(data, s3_analyse)) |>
235 | select(-data) |>
236 | unnest(analysis)
237 |
238 | head(s3_sim)
239 | ```
240 |
241 | ```{r}
242 | s3_power <- s3_sim |>
243 | mutate(sig = (p_all < .05) &
244 | (p_1 < .05) &
245 | (p_2 < .05) ) |>
246 | summarise(power = mean(sig)) |>
247 | pull(power)
248 | ```
249 |
250 | Compare this value (`r s3_power`) with the value in the paper (0.3626).
251 |
252 |
253 | ### Study S1
254 |
255 | Now you can use the pattern from Study 2 to analyse the data for Study S1. You can even reuse the analysis function `s2_analyse()`!
256 |
257 | ```{r}
258 | ss1 <- sim_design(
259 | between = list(placement = c("left", "right")),
260 | mu = c(left = 185.94, right = 215.73),
261 | sd = c(left = 93.92, right = 95.33),
262 | n = c(left = 99, right = 77),
263 | dv = "calories",
264 | rep = 1000
265 | )
266 | ```
267 |
268 | ```{r}
269 | ss1_sim <- ss1 |>
270 | mutate(analysis = map(data, s2_analyse)) |>
271 | select(-data) |>
272 | unnest(analysis)
273 | ```
274 |
275 |
276 | ```{r}
277 | ss1_power <- ss1_sim |>
278 | mutate(sig = p.value < .05) |>
279 | summarise(power = mean(sig)) |>
280 | pull(power)
281 | ```
282 |
283 |
284 | ### Study S2
285 |
286 | Now you can use the pattern from Study 1 to analyse the data for Study S2. You can even reuse the analysis function `s1_analyse()`!
287 |
288 | ```{r}
289 | ss2 <- sim_design(
290 | between = list(placement = c("left", "right", "none")),
291 | mu = c(1182.15, 1302.23, 1373.74),
292 | sd = c(477.60, 434.41, 475.77),
293 | n = c(139, 141, 151),
294 | dv = "calories",
295 | rep = 100
296 | )
297 | ```
298 |
299 | ```{r}
300 | ss2_sim <- ss2 |>
301 | mutate(analysis = map(data, s1_analyse)) |>
302 | select(-data) |>
303 | unnest(analysis)
304 | ```
305 |
306 | ```{r}
307 | ss2_power <- ss2_sim |>
308 | mutate(sig = (p_all < .05) &
309 | (p_1 < .05) &
310 | (p_2 < .05) ) |>
311 | summarise(power = mean(sig)) |>
312 | pull(power)
313 | ```
314 |
315 | ### Study S3
316 |
317 | Now you can use the pattern from Study 1 to analyse the data for Study S3.
318 |
319 | ```{r}
320 | ss3 <- sim_design(
321 | between = list(placement = c("left", "right", "none")),
322 | mu = c(1302.03, 1373.15, 1404.35),
323 | sd = c(480.02, 442.49, 422.03),
324 | n = c(336, 337, 333),
325 | dv = "calories",
326 | rep = 100
327 | )
328 | ```
329 |
330 | ```{r}
331 | ss3_sim <- ss3 |>
332 | mutate(analysis = map(data, s1_analyse)) |>
333 | select(-data) |>
334 | unnest(analysis)
335 | ```
336 |
337 | ```{r}
338 | ss3_power <- ss3_sim |>
339 | mutate(sig = (p_all < .05) &
340 | (p_1 < .05) &
341 | (p_2 < .05) ) |>
342 | summarise(power = mean(sig)) |>
343 | pull(power)
344 | ```
345 |
346 | ## Conclusion
347 |
348 | Now that you've calculated power for each of the 6 studies, just multiply the 6 power values together to get the probability that all 6 studies will be significant.
349 |
350 |
351 | ```{r}
352 | power_table <- tribble(
353 | ~study, ~power_ft, ~ power_my,
354 | "1", 0.4582, s1_power,
355 | "2", 0.5426, s2_power,
356 | "3", 0.3626, s3_power,
357 | "S1", 0.5358, ss1_power,
358 | "S2", 0.5667, ss2_power,
359 | "S3", 0.4953, ss3_power
360 | )
361 |
362 | power_table
363 | ```
364 |
365 | The `reduce()` function from {purrr} applies a function sequentially over a vector, so can give up the product of all the values in the power columns.
366 |
367 | ```{r}
368 | prob_ft <- purrr::reduce(power_table$power_ft, `*`)
369 | prob_my <- purrr::reduce(power_table$power_my, `*`)
370 | ```
371 |
372 | The Francis & Thunell paper showed a `r prob_ft` probability of getting 6 of 6 studies significant. Our re-simulation showed a `r prob_my` probability.
373 |
374 |
--------------------------------------------------------------------------------
/vignettes/data/sim-data-ind-samples.csv:
--------------------------------------------------------------------------------
1 | sub_condition,score
2 | A,8.659143827311953
3 | A,14.306681811819699
4 | A,10.713040492669082
5 | A,10.414204142174725
6 | A,10.613585812486479
7 | A,7.953210054952468
8 | A,6.52177813719714
9 | A,8.365117743095091
10 | A,11.092587459730957
11 | A,14.062281356126192
12 | A,12.051544481443418
13 | A,14.500260485544738
14 | A,11.327941790503406
15 | A,4.849147729673105
16 | A,7.994833894625758
17 | A,11.496559868710417
18 | A,11.12338443739307
19 | A,9.41681818225328
20 | A,8.378648100425668
21 | A,12.25368069014773
22 | A,12.208135307589908
23 | A,12.833950353709977
24 | A,11.822052862134656
25 | A,10.061984283518894
26 | A,10.119329581809868
27 | A,8.034391626028608
28 | A,10.238392796236885
29 | A,6.914536464377451
30 | A,7.886804263927875
31 | A,12.683818897306685
32 | A,13.50246310263745
33 | A,12.48201122505167
34 | A,9.835467681159713
35 | A,14.427085525719825
36 | A,6.965452117476058
37 | A,7.558862658433231
38 | A,8.068466141423594
39 | A,10.806290949776782
40 | A,10.658575820446046
41 | A,12.830161666637538
42 | A,7.544107890339511
43 | A,10.15804909878836
44 | A,7.574301063484666
45 | A,7.157640215571564
46 | A,10.763093438116673
47 | A,6.8443775356767595
48 | A,11.406947180834656
49 | A,8.565681068680025
50 | A,16.47173345179801
51 | A,13.280822099741172
52 | B,7.290556337684006
53 | B,7.560913283694237
54 | B,9.601957010873326
55 | B,14.672506746437755
56 | B,14.160166038360277
57 | B,11.784889131576811
58 | B,14.34143310885541
59 | B,12.55246678752024
60 | B,10.840201103843262
61 | B,14.977746875421019
62 | B,10.11201265244846
63 | B,8.116960727589733
64 | B,8.345164636310528
65 | B,11.580494569521154
66 | B,8.991330584941077
67 | B,8.570957515146812
68 | B,11.035994512923782
69 | B,13.05100239532186
70 | B,8.163754096912456
71 | B,12.920561183356474
72 | B,10.499891656247017
73 | B,7.965685472608644
74 | B,13.936347873516183
75 | B,8.949386887496775
76 | B,11.217647958524797
77 | B,10.781408041597441
78 | B,8.632934410323612
79 | B,14.889816106792939
80 | B,9.764900268001272
81 | B,12.65100663200029
82 | B,12.603346488280355
83 | B,9.333126007262221
84 | B,10.874738393461369
85 | B,11.540604067658668
86 | B,13.589746212707297
87 | B,11.552525767214489
88 | B,12.144489376016997
89 | B,10.939591720847423
90 | B,9.496188379394045
91 | B,7.380340991356158
92 | B,8.388628883391284
93 | B,16.916674965222786
94 | B,10.204337529817664
95 | B,9.368852181073988
96 | B,6.157268812910302
97 | B,8.561531899970976
98 | B,11.802891862870466
99 | B,11.58933949228355
100 | B,10.56113629759848
101 | B,9.240614581122362
102 |
--------------------------------------------------------------------------------