├── .Rprofile
├── .github
    └── workflows
    │   └── publish.yaml
├── .gitignore
├── CITATION.cff
├── Hallgren2013
    ├── ReadingSheet.md
    ├── bootstrapping.r
    ├── mediation_raw_data.csv
    ├── novel question.R
    ├── novel_question_output.csv
    └── power analysis.R
├── Ihle2020
    └── glm_Freq_vs_YN.R
├── Introduction-Simulations-in-R.Rproj
├── LICENSE-CODE.md
├── LICENSE.md
├── README.md
├── _quarto.yml
├── assets
    ├── 1000hist10N01.png
    ├── 24hist1000N01.png
    ├── 24hist10N01.png
    ├── LMU-OSC_favicon.jpg
    ├── LMU-OSC_logo.jpg
    ├── conclude.png
    ├── define.png
    ├── download.PNG
    ├── existing-directory.PNG
    ├── explore.png
    ├── files-list.PNG
    ├── find-directory.PNG
    ├── generate.png
    ├── hist-power.png
    ├── hist10N01.png
    ├── logreg-results.png
    ├── musd-24-10-N01.png
    ├── musd-24-1000-N01.png
    ├── replicate1.png
    ├── replicate2.png
    ├── replicate3.png
    ├── simulated-data.png
    ├── test.png
    ├── ttest-changing-n.png
    └── ttest-changing-nrep.png
├── custom.scss
├── exercise_script_with_solutions.R
├── exercise_script_without_solutions.R
├── index.qmd
├── matomo-analytics.html
├── renv.lock
├── renv
    ├── .gitignore
    ├── activate.R
    └── settings.json
├── styles.css
└── tutorial_pages
    ├── basic-principles.qmd
    ├── check-alpha.qmd
    ├── check-power.qmd
    ├── definition.qmd
    ├── download-repo.qmd
    ├── dry-rule.qmd
    ├── general-structure.qmd
    ├── limitations.qmd
    ├── number-of-simulations-nrep.qmd
    ├── purpose.qmd
    ├── random-numbers-generators.qmd
    ├── real-life-example.qmd
    ├── repeat.qmd
    ├── resources.qmd
    ├── sample-size-n.qmd
    ├── seed.qmd
    └── simulate-for-preregistration.qmd


/.Rprofile:
--------------------------------------------------------------------------------
1 | source("renv/activate.R")
2 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |   push:
 4 |     branches: main
 5 | 
 6 | name: Quarto Publish
 7 | 
 8 | jobs:
 9 |   build-deploy:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       contents: write
13 |     steps:
14 |       - name: Check out repository
15 |         uses: actions/checkout@v4
16 | 
17 |       - name: Set up Quarto
18 |         uses: quarto-dev/quarto-actions/setup@v2
19 | 
20 |       - name: Install R
21 |         uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           r-version: '4.4.0'
24 | 
25 |       - name: Install R Dependencies
26 |         uses: r-lib/actions/setup-renv@v2
27 |         with:
28 |           cache-version: 1
29 | 
30 |       - name: Render and Publish
31 |         uses: quarto-dev/quarto-actions/publish@v2
32 |         with:
33 |           target: gh-pages
34 |         env:
35 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | .RDataTmp
 8 | 
 9 | # User-specific files
10 | .Ruserdata
11 | 
12 | # Example code in package build process
13 | *-Ex.R
14 | 
15 | # Output files from R CMD build
16 | /*.tar.gz
17 | 
18 | # Output files from R CMD check
19 | /*.Rcheck/
20 | 
21 | # RStudio files
22 | .Rproj.user/
23 | 
24 | # produced vignettes
25 | vignettes/*.html
26 | vignettes/*.pdf
27 | 
28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
29 | .httr-oauth
30 | 
31 | # knitr and R markdown default cache directories
32 | *_cache/
33 | /cache/
34 | 
35 | # Temporary files created by R markdown
36 | *.utf8.md
37 | *.knit.md
38 | 
39 | # R Environment Variables
40 | .Renviron
41 | 
42 | # pkgdown site
43 | docs/
44 | 
45 | # translation temp files
46 | po/*~
47 | 
48 | # RStudio Connect folder
49 | rsconnect/
50 | 
51 | # Quarto
52 | /.quarto/
53 | 
54 | # Future-proofing: ignore commonly included files that should not be tracked
55 | # Inspired by usethis::git_vaccinate() options
56 | .DS_Store
57 | .Rproj.user
58 | .httr-oauth
59 | .quarto
60 | _site/
61 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Ihle"
 5 |   given-names: "Malika"
 6 |   orcid: "https://orcid.org/0000-0002-3242-5981"
 7 |   affiliation:
 8 |   - name: "LMU Open Science Center"
 9 |     ror: "https://ror.org/029e6qe04"
10 | title: "Introduction to Simulations in R"
11 | version: 0.1.0
12 | doi: "DOI_HERE_AFTER_GENERATED"
13 | date-released: 2021-11-22
14 | repository-code: "https://github.com/lmu-osc/Introduction-Simulations-in-R"
15 | url: "https://lmu-osc.github.io/Introduction-Simulations-in-R"
16 | preferred-citation:
17 |   authors:
18 |     - family-names: "Ihle"
19 |       given-names: "Malika"
20 |       orcid: "https://orcid.org/0000-0002-3242-5981"
21 |   title: "Introduction to Simulations in R"
22 |   year: 2021
23 |   doi: "DOI_HERE_AFTER_GENERATED"
24 |   publisher:
25 |     name: "LMU Open Science Center"
26 |     ror: "https://ror.org/029e6qe04"
27 |   repository-code: "https://github.com/lmu-osc/Introduction-Simulations-in-R"
28 |   url: "https://lmu-osc.github.io/Introduction-Simulations-in-R"
29 |   type: "book"
30 | 


--------------------------------------------------------------------------------
/Hallgren2013/ReadingSheet.md:
--------------------------------------------------------------------------------
 1 | # Conducting Simulation Studies in the R Programming Environment - Reading Sheet
 2 | 
 3 | ***
 4 | [Hallgren A. K. 2013. Conducting simulation studies in the R programming environment. Tutor Quant Methods Psychol. ; 9(2): 43–60.](https://doi.org/10.20982/tqmp.09.2.p043) 
 5 | 
 6 | ***
 7 | 
 8 | Hallgren (2014) describes the benefits of using simulations and provides examples of applications.
 9 | 
10 | 1)	Please list all the words that you do not know/concepts you do not understand in this paper (except mediation, branching, zero-inflation, ceiling effect, structural equation models, social network exponential random graph).  
11 |     *  
12 |     *  
13 |     *  
14 | 
15 | 2) In your own words, describe which steps are common to all sorts of simulations. Illustrate with simple examples rather than, or in addition to, using terms such as assumptions and parameters.    
16 | **1)**   
17 | **2)**  
18 | **3)**  
19 | **4)**  
20 | **5)**  
21 | **6)**  
22 | 
23 | 
24 | 
25 | 3)	In your own words, describe the 3 types of applications for simulations covered by the author.  
26 | **1)**   
27 | **2)**  
28 | **3)** 
29 | 
30 | 4)	Describe, with your own words and/or examples, the 4 limitations to simulations mentioned by the author.  
31 | **1)**   
32 | **2)**  
33 | **3)**  
34 | **4)** 


--------------------------------------------------------------------------------
/Hallgren2013/bootstrapping.r:
--------------------------------------------------------------------------------
 1 | #Bootstrapping a 95% CI for mediation test
 2 | 
 3 | #Create a function for bootstrapping
 4 | mediation_bootstrap <- function(X, M, Y, reps){
 5 | 	ab_vector = NULL 							#remove any data that is stored under the variable ab_vector
 6 | 	for (i in 1:reps){ 							#loop the number of times specified by the reps parameter
 7 | 		s = sample(1:length(X), replace=TRUE) 	#sample cases N cases WITH replacement
 8 | 		Xs = X[s] 								#extract X variable for the sampled cases indexed by s
 9 | 		Ys = Y[s] 								#extract Y variable for the sampled cases indexed by s
10 | 		Ms = M[s] 								#extract M variable for the sampled cases indexed by s
11 | 		M_Xs = lm(Ms ~ Xs) 						#perform a regression model of M predicted by X
12 | 		Y_XMs = lm(Ys ~ Xs + Ms) 				#perform a regression model of Y predicted by X and M
13 | 		a = M_Xs$coefficients[2] 				#extract beta coefficient for magnitude of X->M relationship
14 | 		b = Y_XMs$coefficients[3] 				#extract beta coefficient for magnitude of M->Y relationship (with X covaried)
15 | 		ab = a*b 								#compute product of coefficients
16 | 		ab_vector = c(ab_vector, ab) 			#save each computed product of coefficients to vector called ab_vector
17 | 	}
18 | 	bootlim = c(quantile(ab_vector, 0.025), quantile(ab_vector, 0.975)) #identify ab values at 2.5 and 97.5 percentile, representing 95% CI
19 | 	hist(ab_vector)
20 | 	segments(bootlim, y0=0, y1=1000, lty=2)
21 | 	text(bootlim, y=1100, labels=c("2.5 %ile", "97.5 %ile"))
22 | 	return(bootlim) #return the 95% CI
23 | }
24 | 
25 | #Set starting seed for random number generator, this is not necessary but allows
26 | #results to be replicated exactly each time the simulation is run.
27 | set.seed(192)
28 | 
29 | #import raw data for bootstrapping
30 | #MUST REPLACE ... WITH APPROPRIATE FILE PATH
31 | d_raw = read.csv("C:\\...\\mediation_raw_data.csv", header=TRUE, sep=",")
32 | 
33 | #identify 95% confidence interval for indirect effect in X-M-Y mediation model
34 | mediation_bootstrap(d_raw$X, d_raw$M, d_raw$Y, 1000)
35 | 
36 | 


--------------------------------------------------------------------------------
/Hallgren2013/mediation_raw_data.csv:
--------------------------------------------------------------------------------
  1 | "X","M","Y"
  2 | -0.459422592229592,0.375619484669753,0.272959846539394
  3 | 0.545463582895455,1.70238291495158,0.779458587258395
  4 | -0.764667622547426,1.31145668566568,1.97433191856361
  5 | 0.940701276617902,-0.479281526586582,-0.56419241981757
  6 | 0.632187241133992,0.0395836411069699,-0.362511994558092
  7 | 0.532701760922955,2.64574838110305,0.14545399872269
  8 | 0.865085435904299,-0.0988285541271401,1.64140181276746
  9 | -1.64766444870884,-0.429544714376683,-0.0292141828560529
 10 | 0.51497317285928,-0.277791339756469,-0.320530246116004
 11 | -1.02611859450608,0.672581489783584,0.329954160085683
 12 | -0.463132033529028,0.190621173338574,-1.70712251876041
 13 | 1.00826496341162,0.0492571352502494,0.859641610103991
 14 | -0.690844714971464,0.0188770817369391,0.474249203167462
 15 | 0.167557747899954,0.166965689098577,0.548628675172202
 16 | 0.601566518904583,1.63076620803095,2.83272913732156
 17 | 0.0889074060164363,-0.76857289092241,-0.147586135670116
 18 | 0.428803196602837,0.209436130228748,-0.16335812158001
 19 | -1.29751648242467,-0.80240113656635,-2.27646195148323
 20 | -0.034062267664108,-0.90754646541644,-1.67408774914988
 21 | -0.341377414409222,-0.341788858023884,-1.32022471988158
 22 | 0.157073644521899,-0.339462423609617,1.23425380307358
 23 | -0.917293173339187,-0.105963838395548,0.351970030387375
 24 | -0.862692430674762,-2.33008939492282,-0.552730574568588
 25 | -0.332737428688238,-0.0771199808434716,0.354156043326907
 26 | 0.960829214713514,-0.393441390804781,2.15616461674781
 27 | 0.664058247829198,0.0819135952122542,0.418389278032388
 28 | -0.900433517739271,0.326310931984312,-0.780038331735161
 29 | -0.609606909880633,-0.134194257459534,1.97136988688382
 30 | -1.90173312554445,-0.138183736509094,-0.385042147481615
 31 | -0.134885416640231,-0.0589933490076972,0.239722530569949
 32 | 1.79293144575763,1.16857300293463,1.15263104808182
 33 | -1.46430694916678,-1.75503464315195,0.0175360749628484
 34 | 0.581177169174161,0.474813998082792,-0.581530519535664
 35 | -2.0526985013853,0.336597612105158,0.672479304543152
 36 | -0.332583034605221,-0.338069825281579,0.230466009935019
 37 | 0.0475103829812363,-0.0562065056847318,1.23506615677897
 38 | -0.269520918748873,-1.28513140425891,0.6031668148111
 39 | 1.45739923586218,-0.440189619960848,1.38678494824235
 40 | -1.00351399571707,-0.580758693893209,0.200888851037415
 41 | 0.777977816693309,1.64002507600881,0.942868045451794
 42 | 0.253203171543432,1.10557064173729,-0.100408203709445
 43 | -0.104232512920019,-1.60632210793143,-0.0974069833967142
 44 | -0.263489324159973,0.0573082362673224,-0.769242566434724
 45 | 0.311518851168968,0.577620173327382,-1.1405982559827
 46 | -0.391805459679531,-0.354518321884484,0.292982672287753
 47 | -0.171261490855296,0.0672829421201184,0.799234659651466
 48 | 1.9693116113272,0.689625683643709,1.26895745433345
 49 | -0.726416291271273,-0.276495065064203,0.0557111350214908
 50 | -1.46349970058361,0.695765703639326,2.00084805181987
 51 | -0.0269950751087659,-0.100428799891376,-0.953672544839969
 52 | -0.467088311213228,0.683871839226205,0.889503300649842
 53 | 0.196123881071676,0.711232467027268,-0.709598756899273
 54 | 1.48339942503247,1.52372018990832,0.7192220552732
 55 | -0.855386570239837,-0.581476928649075,-0.427207002046976
 56 | -0.54867454387952,-0.434813711441015,0.085355811248403
 57 | -1.13627342003011,-0.926000895659814,0.515040617166703
 58 | -1.41289402529198,0.600620663407046,-0.444462145925673
 59 | 0.513862834697331,-0.210562491133487,-1.06860663333026
 60 | 0.411280449718891,1.11154616725845,1.33592042794437
 61 | 0.23570186451873,-0.562718450611253,0.680296774540272
 62 | -0.181395566964554,0.704389295825081,0.0735790385235073
 63 | 0.532647678564621,-0.108851980011845,-0.952179772619551
 64 | -1.44740583352871,0.237351615348754,2.06061200556376
 65 | -2.30123583485102,-1.1775289345317,-1.85130626116058
 66 | -0.721574368895957,0.778537631403667,0.76948197402045
 67 | -0.152514187457018,-0.385714404551426,0.705345603866239
 68 | 0.729437861652527,2.20062518746485,-0.0706137888876649
 69 | 1.9506609896396,1.40729833053713,0.216502481166413
 70 | 0.308668401060737,-1.81863765442643,0.730132141120486
 71 | -2.01260465100557,-0.501334450666778,1.23103223673996
 72 | -0.284998353949408,1.19248315162128,0.00836929967864347
 73 | -0.988993637785728,-0.0213864228809247,0.655516518252573
 74 | -1.46289595218494,-0.22893986974449,2.4154226479429
 75 | -0.164584837906147,-0.253317813571704,1.7851588099309
 76 | 0.429090486670404,1.62883059594514,0.314891868289074
 77 | 0.489957618553033,-1.99750325883836,-0.278942387335838
 78 | -0.645030506845469,0.532484555848244,1.51914393629537
 79 | -0.725926361617648,1.30974246808284,1.03605072135945
 80 | 0.663296103132908,-0.802537063321646,0.768398026673031
 81 | 1.90389816862278,0.202891215723652,-0.569218517192488
 82 | -0.528536477067039,-1.15643266838751,0.37378381414304
 83 | 0.264537362660355,-0.0665590566770107,-1.03189524483381
 84 | 0.611063638405652,0.0260020629512783,-0.057365949793099
 85 | 0.355137211450492,-0.0955588271675045,-0.771922951143557
 86 | 1.00967500763017,1.15188675944681,1.54272657784551
 87 | -0.569198695246773,-1.20652471154877,1.78035879189381
 88 | -2.15334635108806,-1.58809816312251,-2.11285069615804
 89 | 0.184165774607566,0.131835418646949,3.40846020100392
 90 | -0.906147000502656,-1.6539608011468,1.03694447521102
 91 | 0.0536675932085977,0.872578159993932,-1.49504894859961
 92 | -0.578747723218303,0.36077214086072,1.07021522498704
 93 | 0.195747353116327,-1.67602682494811,-1.49016756119381
 94 | -1.25267875894061,-1.64660997210579,-0.169854089062683
 95 | -0.991767369309597,0.72979077361273,0.79410387106139
 96 | 0.332543330728203,-0.507491746867643,1.14818752893418
 97 | -0.945353208030204,0.29898911247977,0.214632367897235
 98 | 0.0423864173372419,-0.534538365129471,-0.739718332803947
 99 | 2.53963924364862,2.6261370549989,0.413366758339587
100 | -0.646393481210255,1.23200974636563,3.62709736901236
101 | -0.716586865034395,0.030978688625401,0.194807045790192
102 | -0.841719050868952,0.197483913273921,0.497517975041885
103 | -2.20773474569365,-1.78735069711425,-1.48778941510646
104 | -1.09825767030364,-0.478603022028319,-1.04591370471429
105 | 0.252103283375761,-1.10182124587493,-0.453535744618212
106 | 0.445152515342206,0.526187079368617,0.411625977802066
107 | -0.416960974750646,0.264691030574312,2.85592184380344
108 | 1.18074432908401,0.170293047151767,2.15364593170897
109 | -1.63459393020916,1.38377066228512,-0.173171799439492
110 | 0.663442941417185,0.591197809299972,-0.421420570528089
111 | -0.0887119595170727,-1.17364421609167,-1.46482257466387
112 | 0.506582405502905,0.188429531808663,-2.55588408884243
113 | -0.915706697204815,0.10479700914193,0.488889684647269
114 | -1.52536648106261,-1.61972756034475,-0.418571322061722
115 | -0.998733348288408,-1.75368490514685,-1.31309456203748
116 | -0.70931290858911,1.30915358217351,-0.367142594458864
117 | 0.640659852533815,0.4012477870433,1.10425629640205
118 | 0.0274925185629446,1.37241465354307,-1.63762154748311
119 | 0.21312749813679,-0.795198760883919,-0.513507445927222
120 | -0.902883476160632,0.152013405517755,0.640761089347153
121 | -0.288587240624669,-1.13140857866613,0.16515350430926
122 | 0.579500520250118,-1.33510407052177,-1.2118255706886
123 | -0.925995949524758,0.00364661972967395,-0.222924675135602
124 | 1.8037034884987,-1.52018335710772,-0.390994221463726
125 | 0.323733901690187,0.177464985338958,-0.259771393162853
126 | 0.0124998774663881,0.357181400716592,1.21780236000067
127 | -1.00667446092467,1.21743419618339,0.694637041606442
128 | -0.506938856217441,-1.55453572316585,-0.00173310251444414
129 | -0.028155734334309,-0.8144204087383,-1.29143075218377
130 | -0.128238442886287,-0.634116075619659,-0.63723960294427
131 | -0.861822651835747,-0.991479837265085,-2.10240914288263
132 | 0.491478600899291,2.13079950546573,2.14883350013865
133 | -2.04521015889511,-1.1369503890559,1.51784087174491
134 | -1.39841086393197,-2.56967579282579,-1.67419853295323
135 | -1.11275691203204,0.123890985939722,-0.757173091387086
136 | 0.0674921035117296,0.915629906173329,-0.40060936334127
137 | -2.33315971030088,-1.13152910791751,0.00294917954646756
138 | 1.13729641756132,0.836754732384624,1.16458575294981
139 | 0.468320313599477,0.711308358114731,0.921611241517056
140 | -1.27752469042358,-1.79406284130134,-0.57961458907725
141 | -0.344679199683488,1.76791368146474,3.26723390903491
142 | -0.76301752161441,-0.256893073169257,-1.04107647067082
143 | 0.304666372021712,0.965753996896422,0.539443619174959
144 | 0.271036378052135,0.117196153393934,0.0657794639515704
145 | -1.26404348274365,-1.83942022197946,-0.462002342266325
146 | 1.78478766916897,0.994858180698821,1.48152256191986
147 | 0.55296041302079,-0.345594670495304,-0.494754842451903
148 | 0.212341521293348,0.457572791745412,0.599608763750521
149 | 0.759730857778848,0.261539443461453,1.73760587711731
150 | -1.06903289563896,-0.965283479588678,0.851229594615949
151 | 0.824190414926679,1.42016752648631,0.699562155148275
152 | 0.359867802482143,-0.951111178280633,1.04862501374179
153 | 0.218633140708038,-0.130704041287248,-0.561335545200651
154 | -1.1466670982342,0.67669869569944,-0.81712432571474
155 | 2.95112218706533,0.418138757683956,0.365832899807551
156 | 1.56085567656968,0.620445363834492,0.718050823511652
157 | 0.326364998897937,-0.804098479060875,-1.75923362755744
158 | -1.23634936895527,-0.970807422789759,-2.16686704442382
159 | 2.23636117354172,0.131718872402842,1.18563700101253
160 | -0.730248514123208,0.170690765862369,-0.628573972990158
161 | 0.929198912796676,1.11327387802652,1.21900420115995
162 | -0.147055325708318,4.31651260417537,2.90360903298406
163 | 0.302370749301339,-0.563150381744145,-1.05602493460818
164 | 0.494539884317742,0.107392053687842,-1.51575094118821
165 | 1.29441975092435,-0.365393612078586,0.282897470651488
166 | -0.0103919919182019,2.82757042889834,0.239933579740327
167 | -1.43343888069419,-0.312256615157305,0.520243019865827
168 | -0.60188551757209,-0.29239154074291,0.674834114521821
169 | 1.29471805052204,-1.16234014261655,0.22613721184565
170 | 1.58019838887206,0.546097629073341,-0.157391040798399
171 | 0.802742683160719,0.452436037432193,0.965541693635794
172 | 0.950142149761672,-0.135443555740156,-0.761251012414718
173 | -0.43871950142524,-1.77575204602221,-2.77997334786374
174 | 0.618531455131127,1.65323935376108,0.736657532266359
175 | 0.221088511212154,0.359351850144774,-0.873776035835875
176 | 1.36792011717771,1.62449600149662,1.52025168617369
177 | 0.474637329673526,0.161406884739177,-0.281358207159691
178 | -1.00084823837256,0.0502919375958081,-0.417343895111818
179 | -0.345670428318186,1.9047012837651,0.27546881378142
180 | 0.657852540188372,-0.663259159685556,0.371756063852222
181 | 1.56279890225929,-0.512030955184687,2.35811503621137
182 | -1.08003309584932,-0.719579004693836,-0.353922668551684
183 | 1.70190264948874,-0.846273708225303,0.442267627833966
184 | -2.42041290828576,-1.62083204197538,0.0932095022117396
185 | -0.511667399573525,-0.446559875418834,-0.343068001344917
186 | -0.200454931014717,1.13785041511186,-0.33607963197219
187 | 0.12734931626455,0.704322863950136,0.875441089200776
188 | 0.644944509946664,0.136892811837818,0.0855623408315791
189 | -0.669077824518047,0.338587036731898,-1.49211529676404
190 | 0.628713312933516,0.691104013447248,0.350123778310327
191 | -1.2490238767003,-1.03631971289998,-0.769267786249409
192 | 0.689284655421753,0.0178918547734429,0.770299910681795
193 | 2.21929851488746,-0.68490558215836,1.54210283542253
194 | -0.616805658226838,-1.54353507971068,-0.653986732679104
195 | 1.79056128184299,0.512206763016197,1.04910672692131
196 | -1.06506937346027,0.497446452826084,-1.7099728183207
197 | 0.419565769482607,0.397130881390085,-1.08973047892049
198 | 0.663262044943389,-0.528702104745502,0.861928974774556
199 | 0.6420564886772,-0.729911338737075,-0.951485367169108
200 | -1.45356238091858,-0.659073493228766,1.30965719503448
201 | 1.05331492584788,0.677286391519462,1.03509611084608
202 | -0.225782916188212,0.153483633963033,-0.0674763430823723
203 | 0.340456589031501,0.697533939540248,0.750877599488548
204 | -0.169626435016236,1.65188699609178,-0.979891610662947
205 | 1.22081215213303,0.832160717152855,-0.402484055470123
206 | 0.561156596238595,1.61216422597788,0.469103478980116
207 | 0.170923239637072,0.874191712109122,0.633805754902373
208 | 0.112462846428017,-0.678671574828038,0.415058412822189
209 | 1.40693749633883,0.642670591466989,-0.131877738341268
210 | 1.43240315961845,0.788127551949071,1.64423309708272
211 | -0.483955269709877,-0.0895296747462856,-1.10822062215117
212 | 0.144681192629838,0.406295184500294,0.733005461600374
213 | 0.673577895578023,0.365884345329642,0.60232072576863
214 | -0.0309545369175297,0.684275607241005,-0.0552824148680601
215 | 0.139498654877596,1.87145887608243,1.04287997963827
216 | -0.332335571720686,-0.527311696100852,-0.748522942632576
217 | -1.24515499785961,-0.840713035463655,-0.868326790847419
218 | 1.07594357008928,-1.80591255439194,-0.544369743826404
219 | 0.654818046127231,1.70226414802034,-0.367898594431855
220 | -0.507353147110731,-0.60185561755896,0.0262327498177425
221 | -0.969992840971464,-1.02901181690143,0.915807191841815
222 | 0.787763502873627,0.626378597161787,1.23905747084146
223 | -0.249203039170496,-2.95667226154319,0.436833134345055
224 | -0.93778299609646,-1.38927793205971,-0.78273339960339
225 | 2.63342561458283,0.639838798388022,0.780936854633035
226 | 2.52706576408998,1.25416649625158,0.266613913255988
227 | 0.0448868912353245,-0.483104860646336,1.75311440669654
228 | 1.78402375192211,0.302228154023323,1.63699509606989
229 | 0.486029047163539,0.386429365487011,1.55585370225362
230 | 1.180731222242,0.921715782491086,-0.439839409113051
231 | 0.0804194469231378,1.77183934239586,1.88533890503129
232 | -0.300485645671156,0.0614595769184675,0.177200079449562
233 | -0.335828907742673,-0.739891520414093,-0.0507509117864409
234 | -0.42698719037845,-1.27013147987761,-0.155187169570225
235 | 0.226256798386137,1.58722361986589,0.690055577417061
236 | 0.0264663165981116,1.02183871771642,0.805433286310166
237 | 1.31707670466319,0.988649479296496,-0.0747860211967527
238 | 0.565930920385734,1.7793638224481,-0.76655949719908
239 | 0.0815379280496882,-1.6241202380207,-0.180594251266354
240 | -2.21333603667155,0.369884459346723,1.48868913189256
241 | 1.70689944175351,0.675819022451083,0.582256453197378
242 | -0.168244916214625,-0.467803119639771,1.71209146579919
243 | 0.145610001825848,0.492184760225567,1.60605564539753
244 | 1.20216611140053,0.865225630837673,-0.0910769647315459
245 | -2.9635545437848,-0.188871449951315,1.08577843208684
246 | 1.42428955207894,0.319878478470103,3.82711820670353
247 | -1.44781769131388,-0.479361133252284,0.207183271273409
248 | 0.218813683990411,0.950980034066862,-0.296089355281943
249 | 0.765485866128378,0.228569969537456,0.383403713760275
250 | -0.64776334216636,-0.302706010150254,0.148500549407663
251 | -0.393455903815341,-2.14438597760135,-1.84708576953698
252 | -0.0380191969641422,-2.0327360730753,-1.45265366508969
253 | -0.214491694182517,0.270710871625492,0.658704017789759
254 | -0.809710876325302,0.798999886891447,-0.0384162219861765
255 | -0.681586905288937,1.33881853943639,-0.20467692640177
256 | -0.589356132834343,0.578434944982357,0.946076257146052
257 | -0.259014911918159,-0.597534514294742,-1.02519501172007
258 | 1.49533969881379,2.11991452917452,0.828269773035721
259 | 0.931151536880709,0.630419186267908,1.62440031815158
260 | -0.396800542218102,0.0797767679547174,0.942156311720712
261 | 0.77344830703741,-0.996845525334123,0.22844372568324
262 | 0.478482378831286,-0.734385269706719,-1.06231435130593
263 | 0.671609289395807,-0.270062181009413,-0.0336217014583608
264 | -0.846111839677523,0.233013787616568,2.55843685747731
265 | 0.433803837428308,0.378700182928257,1.30905415073867
266 | -0.844599781425057,-0.956717303753552,-1.49686692787136
267 | 0.134754161756247,1.3565314535696,0.828382450863055
268 | -1.23129728279216,0.197789562875601,0.13301450044897
269 | -1.2184377515354,-1.00524717319334,-0.0794960283840722
270 | 0.895674496986596,1.39134877100233,1.19508170827559
271 | -0.464694449262212,-0.401782486626228,-0.184118065699338
272 | 1.27066784474497,0.92085774023182,1.20596567731797
273 | -1.5149718703103,-2.63602873524695,-1.10128665241538
274 | 1.07359464347295,0.409235894108222,1.06719846204811
275 | -0.178499275394487,0.501071259193609,0.500911220318338
276 | -0.728382041374663,0.279860682480301,-0.30928242970681
277 | -1.12278608599971,0.594670501293693,-1.92220065050573
278 | -1.49843709969433,-0.0794705098748185,-1.20844718583332
279 | 0.631481578932726,2.05607144025737,0.934287769407225
280 | 1.6197149767577,0.350164750764048,2.16851874673841
281 | -0.790813748812147,0.302103505256826,0.179971856146052
282 | 0.929732361772266,-1.0417984497627,-0.579781702988971
283 | -0.0829013164596387,0.0741759162134728,0.856291277741287
284 | -0.625942739552874,-1.02709631286512,0.989839005127848
285 | -0.193989910914329,1.68946302805717,0.700151606464307
286 | 0.873227257482941,-0.131827408880574,-0.445507814979066
287 | 1.4613650903671,1.18599867007674,-0.282374096492097
288 | 0.299173279336333,0.158006913588438,1.206724840906
289 | 0.546687820312104,2.43098980807801,0.0516632412971151
290 | 0.565090879252294,0.0678344872612057,0.68954269376158
291 | -1.53387607785561,0.783629245945927,0.892852015173458
292 | -1.36088471230853,-1.71318221143444,-0.346266859780386
293 | 0.770283263592743,0.103318852799744,0.987103809635518
294 | -1.16450116947292,1.0317392275943,-0.906410224666777
295 | 1.80045635025028,1.53663857312275,-0.12079494043064
296 | 0.935325998990073,1.14311074600394,-0.23621849815402
297 | 0.325799031291421,0.288181443142562,-0.500272513193658
298 | -0.0833561308382381,2.15445515572068,1.35278522086772
299 | -2.12284481942416,-1.42725485861395,-1.50638016129252
300 | -1.88153152321829,-1.01544741123315,-2.09236059286633
301 | 0.844476405269658,-1.58689032564961,-1.01107589052544
302 | 


--------------------------------------------------------------------------------
/Hallgren2013/novel question.R:
--------------------------------------------------------------------------------
 1 | #Answering a novel question about mediation analysis
 2 | 
 3 | #This simulation will generate mediational datasets with three variables, 
 4 | #X, M, and Y, and will calculate Sobel z-tests for two competing mediation
 5 | #models in each dataset: the first mediation model proposing that M mediates the 
 6 | #relationship between X and Y (X-M-Y mediation model), the second mediation 
 7 | #model poposes that Y mediates the relationship between X and M (X-Y-M 
 8 | #mediation model).
 9 | 
10 | #Note this simulation may take 10-45 minutes to complete, depending on computer speed
11 | 
12 | #Data characteristics specified by researcher -- edit these as needed
13 | N_list = c(100,300) 			#Values for N (number of participants in sample)
14 | a_list = c(-.3, 0, .3) 	#values for the "a" effect (regression coefficient for X->M path)
15 | b_list = c(-.3, 0, .3) 	#values for the "b" effect (regression coefficient for M->Y path after X is controlled)
16 | cp_list = c(-.2, 0, .2)	#values for the "c-prime" effect (regression coefficient for X->Y after M is controlled)
17 | reps = 1000 			#number of datasets to be generated in each condition
18 | 
19 | #Set starting seed for random number generator, this is not necessary but allows
20 | #results to be replicated exactly each time the simulation is run.
21 | set.seed(192)
22 | 
23 | #Create a function for estimating Sobel z-test of mediation effects
24 | sobel_test <- function(X, M, Y){
25 | 	M_X = lm(M ~ X) 									#regression model for M predicted by X
26 | 	Y_XM = lm(Y ~ X + M) 								#regression model for Y predicted by X and M
27 | 	a = coefficients(M_X)[2] 							#extracts the estimated "a" effect
28 | 	b = coefficients(Y_XM)[3] 							#extracts the estimated "b" effect
29 | 	stdera = summary(M_X)$coefficients[2,2] 			#extracts the standard error of the "a" effect
30 | 	stderb = summary(Y_XM)$coefficients[3,2] 			#extracts the standard error of the "b" effect
31 | 	sobelz = a*b / sqrt(b^2 * stdera^2 + a^2 * stderb^2)	#computes the Sobel z-test statistic 
32 | 	return(sobelz) 										#return the Sobel z-test statistic when this function is called
33 | }
34 | 
35 | #run simulation
36 | d = NULL #start with an empty dataset
37 | for (N in N_list){					#loop through all of the "N" sizes specified above
38 | 	for (a in a_list){ 				#loop through all of the "a" effects specified above
39 | 		for (b in b_list){ 			#loop through all of the "b" effects specified above
40 | 			for (cp in cp_list){ 	#loop through all of the "c-prime" effects specified above
41 | 				for (i in 1:reps){ 					#loop to replicate simulated datasets within each condition
42 | 					#Generate mediation based on MacKinnon, Fairchild, & Fritz (2007) equations for mediation.  This data is set-up so that X, M, and Y are conformed to be the idealized mediators
43 | 					X = rnorm(N, 0, 1) 				#generate random variable X that has N observations, mean = 0, sd = 1
44 | 					M = a*X + rnorm(N, 0, 1) 		#generate random varible M that inclues the "a" effect due to X and random error with mean = 0, sd = 1
45 | 					Y = cp*X + b*M + rnorm(N, 0, 1) #generate random variable Y that includes "b" and "c-prime" effects and random error with mean = 0, sd = 1
46 | 					#Compute Sobel z-statistic for two mediation analyses
47 | 					d = rbind(d, c(i, a, b, cp, N, 1, sobel_test(X, M, Y)))	#Compute Sobel z-test statistic for X-M-Y mediation and save parameter information to dtemp
48 | 					d = rbind(d, c(i, a, b, cp, N, 2, sobel_test(X, Y, M))) #Compute Sobel z-test statistic for M-Y-X mediation and save parameter information to dtemp
49 | 				}
50 | 			}
51 | 		}
52 | 	}
53 | }
54 | #add column names to matrix "d" and convert to data.frame
55 | colnames(d) = c("iteration", "a", "b", "cp", "N", "model", "Sobel_z") 
56 | d = data.frame(d)
57 | 
58 | #save data frame "d" as a CSV file
59 | #MUST REPLACE ... WITH APPROPRIATE FILE PATH
60 | write.table(d, "C:\\..\\novel_question_output.csv", sep=",", row.names=FALSE)
61 | 
62 | #save raw data from last iteration to data.set to illustrate bootstrapping example
63 | d_raw = cbind(X, M, Y)
64 | #MUST REPLACE ... WITH APPROPRIATE FILE PATH
65 | write.table(d_raw, "C:\\...\\mediation_raw_data.csv", sep=",", row.names=FALSE)
66 | 
67 | #Make a boxplot of X-M-Y and X-Y-M models when a=0.3, b=0.3, c' = 0.2, N = 300, and when a=0.3, b=0.3, c'=0, N = 300.
68 | boxplot(d$Sobel_z[d$a == 0.3 & d$b == 0.3 & d$cp == 0.2 & d$model == 1 & d$N == 300], 
69 | 	d$Sobel_z[d$a == 0.3 & d$b == 0.3 & d$cp == 0.2 & d$model == 2 & d$N == 300],
70 | 	d$Sobel_z[d$a == 0.3 & d$b == 0.3 & d$cp == 0 & d$model == 1 & d$N == 300], 
71 | 	d$Sobel_z[d$a == 0.3 & d$b == 0.3 & d$cp == 0 & d$model == 2 & d$N == 300],
72 | 	ylab="Sobel z-statistic",
73 | 	xaxt = 'n',
74 | 	tick=FALSE)	#suppress x-axis labels
75 | #Add labels to x-axis
76 | axis(1,at=c(1:4),labels=c("X-M-Y model\n(a=0.3, b=0.3, c'=0.2)", "X-Y-M model\n(a=0.3, b=0.3, c'=0.2)", "X-M-Y model\n(a=0.3, b=0.3, c'=0)", "X-Y-M model\n(a=0.3, b=0.3, c'=0)"),tick=FALSE)
77 | 


--------------------------------------------------------------------------------
/Hallgren2013/power analysis.R:
--------------------------------------------------------------------------------
 1 | #Estimating power of a mediation test
 2 | 
 3 | #read simulated dataset
 4 | #MUST REPLACE ... WITH APPROPRIATE FILE PATH
 5 | d = read.csv("C:\\...\\novel_question_output.csv", header=TRUE, sep=",")
 6 | 
 7 | #Example where a = 0.3, b=0.3, c'=0.2, N = 300
 8 | #extract Sobel z-statistic from the condition of interest
 9 | z_dist = d$Sobel_z[d$a==0.3 & d$b==0.3 & d$cp==0.2 & d$N==300 & d$model==1]
10 | 
11 | #identify which z-values are large enough to give p-value < 0.05
12 | significant = abs(z_dist) > 1.96
13 | 
14 | #identify the proportion of z-values with p-value < 0.05.  The proportion of
15 | #values that are TRUE is equal to the proportion of times the null hypothesis
16 | #of no indirect effect is rejected and is equivalent to power.
17 | table(significant)/length(significant)
18 | 
19 | 
20 | #Example where a = 0.3, b=0.3, c'=0.2, N = 100
21 | #extract Sobel z-statistic from the condition of interest
22 | z_dist = d$Sobel_z[d$a==0.3 & d$b==0.3 & d$cp==0.2 & d$N==100 & d$model==1]
23 | 
24 | #identify which z-values are large enough to give p-value < 0.05
25 | significant = abs(z_dist) > 1.96
26 | 
27 | #identify the proportion of z-values with p-value < 0.05.  The proportion of
28 | #values that are TRUE is equal to the proportion of times the null hypothesis
29 | #of no indirect effect is rejected and is equivalent to power.
30 | table(significant)/length(significant)
31 | 
32 | 
33 | #Other example where a = 0.3, b=0, c'=0.2, N = 300
34 | #extract Sobel z-statistic from the condition of interest
35 | z_dist = d$Sobel_z[d$a==0.3 & d$b==0 & d$cp==0.2 & d$N==300 & d$model==1]
36 | 
37 | #identify which z-values are large enough to give p-value < 0.05
38 | significant = abs(z_dist) > 1.96
39 | 
40 | #identify the proportion of z-values with p-value < 0.05.  The proportion of
41 | #values that are TRUE is equal to the proportion of times the null hypothesis
42 | #of no indirect effect is rejected and is equivalent to power.
43 | table(significant)/length(significant)
44 | 
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/Ihle2020/glm_Freq_vs_YN.R:
--------------------------------------------------------------------------------
  1 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  2 | #	 Malika IHLE      malika_ihle@hotmail.fr
  3 | #	 Preregistration manipulation color and unpalatability 
  4 | #     https://osf.io/f8uk9/?view_only=3943e7bb9c5f4effbf119ca5b062fe80
  5 | #     click to read about the design of this study
  6 | #  simulation of data to see whether planned analyses code works
  7 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  8 | 
  9 | 
 10 | # purpose
 11 | #### we here want to see if running a glm on a contingency table of counts (poisson distribution)
 12 | #### provides the same results as running a glm with one line per observation, and the variable as Yes/No)(binomial distribution) 
 13 | 
 14 | 
 15 | 
 16 | rm(list = ls(all = TRUE))
 17 | 
 18 | # packages
 19 | library(pbapply) # to replicate a function / a simulation multiple time with a bar of progress (function pbreplicate instead of replicate)
 20 | 
 21 | 
 22 | 
 23 | # 1. sample sizes & 5. try different parameter values
 24 | nF <- 100 # number of females to be tested
 25 | pbrep <- 1000 # number of simulation replicates
 26 | probsnaive <- 0.5 # probability of attacking the bitter prey when never exposed to the bitter compound - needs to be 0.25 to always detect the effect
 27 | probswhenexposed <- 0.5 # probability of attacking the bitter prey when trained on the bitter compound - needs to be 0.05 to always detect the interaction (if previous is 0.25)
 28 | 
 29 | 
 30 | # two-by-two factorial design - 'fixed' data structure
 31 | FPriorExposure <- c(1,1,1,1,0,0,0,0)
 32 | FColorGroup <- c('Green','Green','Beige','Beige','Green','Green','Beige','Beige') # the color that will contain DB, the other color will contain water
 33 | TermiteEatenPalatability <- c('Water','DB','Water','DB','Water','DB','Water','DB') # in one test, either the DB termite or the water termite has to be attacked for the test to end
 34 | TermiteEatenColor <- c('Beige','Green','Green','Beige','Beige','Green','Green','Beige') # deduced from FcolorGroup and Termite Eaten
 35 | 
 36 | 
 37 | 
 38 | 
 39 | # 2. simulation of an effect of the bitter compound (say smell) onto that attack, 
 40 | # if the termite has the bitter compound, prob of attack is = probs defined in parameters
 41 | 
 42 | 
 43 | ## Function to check number of significant result by chance 
 44 | Simulate_and_analyse <-function(){  # DO NOT RUN IF WANT TO CREATE ONE EXAMPLE TABLE
 45 |   
 46 |   
 47 |   
 48 |   ## create a contingency table summarizing frequencies
 49 |   
 50 |   ### generate a number of spider attacking the DB termite, given the probability of attacking it
 51 |   GreenDBNoExp <- sum(sample(c(1,0),nF/4, prob = c(probsnaive, 1-probsnaive), replace=TRUE))
 52 |   BeigeDBNoExp <- sum(sample(c(1,0),nF/4, prob = c(probsnaive, 1-probsnaive), replace=TRUE))
 53 |   GreenDBExp <-sum(sample(c(1,0),nF/4, prob = c(probswhenexposed, 1-probswhenexposed),replace=TRUE))
 54 |   BeigeDBExp <-sum(sample(c(1,0),nF/4, prob = c(probswhenexposed, 1-probswhenexposed), replace=TRUE))
 55 |   
 56 |   ### the number of spiders attacking the water termite is the number of spider tested in the two-by-two group
 57 |   ### minus the number of spider that attacked the DB termite 
 58 |   Freq <- c(nF/4 - GreenDBExp,GreenDBExp, nF/4 - BeigeDBExp, BeigeDBExp, nF/4 - GreenDBNoExp,GreenDBNoExp, nF/4 - BeigeDBNoExp, BeigeDBNoExp)
 59 |   
 60 |   ### in contingency table, diagnals should sum up to nF/4, this count the number of spiders that attacked the termite from this category
 61 |   contingencytable <- xtabs(Freq~TermiteEatenColor+TermiteEatenPalatability+FPriorExposure)
 62 |   FreqTable <- as.data.frame.table(contingencytable)
 63 |   
 64 |   
 65 | 
 66 |   ## create a table with one line per termite group (i.e. two line per test: the DB termites, and the water termite, of opposite colors)
 67 |  
 68 |   ### use the same simulated data but reformat them
 69 |   AttackedPreyTable <-   FreqTable[rep(1:nrow(FreqTable), FreqTable[,4]),-4] # 1 line per test describing the attacked prey
 70 |   nrow(AttackedPreyTable) # nF
 71 |   AttackedPreyTable$AttackedYN <- 1
 72 |   AttackedPreyTable$FID <- 1:nF
 73 |   SecondLinePerTestTable <- AttackedPreyTable
 74 |   SecondLinePerTestTable$AttackedYN <- 0
 75 |   
 76 |   ### reverse color and palatability of that second termite
 77 |   for (i in 1:nrow(SecondLinePerTestTable)){
 78 |     
 79 |     if(SecondLinePerTestTable$TermiteEatenColor[i] == 'Beige' )
 80 |     {SecondLinePerTestTable$TermiteEatenColor[i]<- 'Green'}
 81 |     else {SecondLinePerTestTable$TermiteEatenColor[i]<- 'Beige'}
 82 |     
 83 |     if(SecondLinePerTestTable$TermiteEatenPalatability[i] == 'DB' )
 84 |     {SecondLinePerTestTable$TermiteEatenPalatability[i]<- 'Water'}
 85 |     else {SecondLinePerTestTable$TermiteEatenPalatability[i]<- 'DB'}}
 86 |   
 87 |   TwoLinePerTestTable <- rbind(AttackedPreyTable,SecondLinePerTestTable )
 88 |   TwoLinePerTestTable <- TwoLinePerTestTable[order(TwoLinePerTestTable$FID),]
 89 |   
 90 |   
 91 |   ### pick one line at random for each female 
 92 |   ### (since when we know one line (she attacked or did not attack that one), we know she attacked or did not attack the other one)
 93 |   
 94 |   FocalAttackTable <- do.call(rbind,lapply(split(TwoLinePerTestTable, TwoLinePerTestTable$FID),function(x){x[sample(nrow(x), 1), ]}))
 95 |       ####do not rename only for easy to combine both model outputs later on
 96 |       ####colnames(FocalAttackTable) <- c('FocalTermiteColor', 'FocalTermitePalatability', 'FPriorExposure', 'FocalTermiteAttackedYN', 'FID')
 97 |   
 98 |   
 99 |   
100 |   
101 |   
102 |   ## 3. run statistical test: Poisson Model on contingency table
103 |   modFreq0 <- glm(Freq ~ TermiteEatenColor+TermiteEatenPalatability+FPriorExposure, family = 'poisson', data = FreqTable)
104 |   modFreq1 <- glm(Freq ~ TermiteEatenColor+TermiteEatenPalatability*FPriorExposure, family = 'poisson', data = FreqTable)
105 |   summary(modFreq1)
106 |   
107 |   anova(modFreq0,modFreq1,test='Chi')
108 |   
109 | 
110 |   
111 |   ## 3. run statistical test: Binomial model on 'long table'
112 |       ####should be named: glm (FocalTermiteAttackedYN ~ FocalTermiteColor+FocalTermitePalatability*FPriorExposure 
113 |       ####but keep name similar as above to combine them more easily
114 |   modBinom <- glm (AttackedYN ~ TermiteEatenColor +TermiteEatenPalatability*FPriorExposure, family = 'binomial', data = FocalAttackTable)
115 |   summary(modBinom)
116 |   
117 | 
118 |   
119 |   
120 |   ## 3. save parameter estimates for each iteration: extract p value
121 |   modFreq1p <-  coef(summary(modFreq1))[-1, 4]
122 |   modBinomp <- coef(summary(modBinom))[-1, 4]
123 |   
124 |   
125 | 
126 |   
127 |   pees <- rbind(modFreq1p,modBinomp)
128 |   return(list(pees))  # DO NOT RUN IF WANT TO CREATE ONE EXAMPLE TABLE
129 | }  
130 | 
131 | 
132 | 
133 | # 4. replicate
134 | OutputSimulation <- do.call(rbind, pbreplicate(pbrep,Simulate_and_analyse())) # collect all p values for both factors in the models
135 | 
136 | OutputSimulation <- OutputSimulation<0.05 # determine whether or not there are significant
137 | 
138 | OutputSimulationFreq <- OutputSimulation[rownames(OutputSimulation) == "modFreq1p",]
139 | OutputSimulationBinom <- OutputSimulation[rownames(OutputSimulation) == "modBinomp",]
140 | 
141 | # 6. analyse and interpret the results of simulations
142 | ## factors where no effect was simulated should have a percentage of false positive effect under 5%
143 | ## factors with simulated effect should detect an effect in at least more than 5% of the cases
144 | data.frame(colSums(OutputSimulationFreq)/pbrep) # count the number of significant p values out of the number of simulation replicate. 
145 | data.frame(colSums(OutputSimulationBinom)/pbrep) # count the number of significant p values out of the number of simulation replicate. 
146 | 
147 | 
148 | 
149 | # CONCLUSION: 
150 | # glm on three-way contingency table with Poisson distribution does as good as 
151 | # glm binomial with one line per test with the data on a focal termite
152 | 
153 | 


--------------------------------------------------------------------------------
/Introduction-Simulations-in-R.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 


--------------------------------------------------------------------------------
/LICENSE-CODE.md:
--------------------------------------------------------------------------------
 1 | # Creative Commons Zero v1.0 Universal
 2 | 
 3 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER.
 4 | 
 5 | ### Statement of Purpose
 6 | 
 7 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
 8 | 
 9 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
10 | 
11 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
12 | 
13 | 1. __Copyright and Related Rights.__ A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
14 | 
15 |     i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
16 | 
17 |     ii. moral rights retained by the original author(s) and/or performer(s);
18 | 
19 |     iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
20 | 
21 |     iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
22 | 
23 |     v. rights protecting the extraction, dissemination, use and reuse of data in a Work;
24 | 
25 |     vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
26 | 
27 |     vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
28 | 
29 | 2. __Waiver.__ To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
30 | 
31 | 3. __Public License Fallback.__ Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
32 | 
33 | 4. __Limitations and Disclaimers.__
34 | 
35 |     a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
36 | 
37 |     b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
38 | 
39 |     c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
40 | 
41 |     d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
42 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | # Creative Commons Attribution-ShareAlike 4.0 International
  2 | 
  3 | Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an “as-is” basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible.
  4 | 
  5 | **Using Creative Commons Public Licenses**
  6 | 
  7 | Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses.
  8 | 
  9 | * __Considerations for licensors:__ Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC-licensed material, or material used under an exception or limitation to copyright. [More considerations for licensors](http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensors).
 10 | 
 11 | * __Considerations for the public:__ By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor’s permission is not necessary for any reason–for example, because of any applicable exception or limitation to copyright–then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described. Although not required by our licenses, you are encouraged to respect those requests where reasonable. [More considerations for the public](http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensees).
 12 | 
 13 | ## Creative Commons Attribution-ShareAlike 4.0 International Public License
 14 | 
 15 | By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
 16 | 
 17 | ### Section 1 – Definitions.
 18 | 
 19 | a. __Adapted Material__ means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.
 20 | 
 21 | b. __Adapter's License__ means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.
 22 | 
 23 | c. __BY-SA Compatible License__ means a license listed at [creativecommons.org/compatiblelicenses](http://creativecommons.org/compatiblelicenses), approved by Creative Commons as essentially the equivalent of this Public License.
 24 | 
 25 | d. __Copyright and Similar Rights__ means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
 26 | 
 27 | e. __Effective Technological Measures__ means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
 28 | 
 29 | f. __Exceptions and Limitations__ means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.
 30 | 
 31 | g. __License Elements__ means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution and ShareAlike.
 32 | 
 33 | h. __Licensed Material__ means the artistic or literary work, database, or other material to which the Licensor applied this Public License.
 34 | 
 35 | i. __Licensed Rights__ means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
 36 | 
 37 | j. __Licensor__ means the individual(s) or entity(ies) granting rights under this Public License.
 38 | 
 39 | k. __Share__ means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.
 40 | 
 41 | l. __Sui Generis Database Rights__ means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
 42 | 
 43 | m. __You__ means the individual or entity exercising the Licensed Rights under this Public License. __Your__ has a corresponding meaning.
 44 | 
 45 | ### Section 2 – Scope.
 46 | 
 47 | a. ___License grant.___
 48 | 
 49 |    1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:
 50 | 
 51 |        A. reproduce and Share the Licensed Material, in whole or in part; and
 52 | 
 53 |        B. produce, reproduce, and Share Adapted Material.
 54 | 
 55 |    2. __Exceptions and Limitations.__ For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.
 56 | 
 57 |    3. __Term.__ The term of this Public License is specified in Section 6(a).
 58 | 
 59 |    4. __Media and formats; technical modifications allowed.__ The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.
 60 | 
 61 |    5. __Downstream recipients.__
 62 | 
 63 |        A. __Offer from the Licensor – Licensed Material.__ Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.
 64 | 
 65 |        B. __Additional offer from the Licensor – Adapted Material.__ Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply.
 66 | 
 67 |        C. __No downstream restrictions.__ You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
 68 | 
 69 |    6. __No endorsement.__ Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
 70 | 
 71 | b. ___Other rights.___
 72 | 
 73 |    1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
 74 | 
 75 |    2. Patent and trademark rights are not licensed under this Public License.
 76 | 
 77 |    3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties.
 78 | 
 79 | ### Section 3 – License Conditions.
 80 | 
 81 | Your exercise of the Licensed Rights is expressly made subject to the following conditions.
 82 | 
 83 | a. ___Attribution.___
 84 | 
 85 |    1. If You Share the Licensed Material (including in modified form), You must:
 86 | 
 87 |        A. retain the following if it is supplied by the Licensor with the Licensed Material:
 88 | 
 89 |          i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
 90 | 
 91 |          ii. a copyright notice;
 92 | 
 93 |          iii. a notice that refers to this Public License;
 94 | 
 95 |          iv. a notice that refers to the disclaimer of warranties;
 96 | 
 97 |          v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
 98 | 
 99 |        B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
100 | 
101 |        C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.
102 | 
103 |    2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
104 | 
105 |    3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.
106 | 
107 | b. ___ShareAlike.___
108 | 
109 | In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply.
110 | 
111 | 1. The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-SA Compatible License.
112 | 
113 | 2. You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material.
114 | 
115 | 3. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply.
116 | 
117 | ### Section 4 – Sui Generis Database Rights.
118 | 
119 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
120 | 
121 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database;
122 | 
123 | b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and
124 | 
125 | c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
126 | 
127 | For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.
128 | 
129 | ### Section 5 – Disclaimer of Warranties and Limitation of Liability.
130 | 
131 | a. __Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.__
132 | 
133 | b. __To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.__
134 | 
135 | c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
136 | 
137 | ### Section 6 – Term and Termination.
138 | 
139 | a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.
140 | 
141 | b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:
142 | 
143 |    1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or
144 | 
145 |    2. upon express reinstatement by the Licensor.
146 | 
147 |    For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.
148 | 
149 | c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.
150 | 
151 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
152 | 
153 | ### Section 7 – Other Terms and Conditions.
154 | 
155 | a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
156 | 
157 | b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.
158 | 
159 | ### Section 8 – Interpretation.
160 | 
161 | a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.
162 | 
163 | b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.
164 | 
165 | c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.
166 | 
167 | d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.
168 | 
169 | > Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” The text of the Creative Commons public licenses is dedicated to the public domain under the [CC0 Public Domain Dedication](https://creativecommons.org/publicdomain/zero/1.0/legalcode). Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at [creativecommons.org/policies](http://creativecommons.org/policies), Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses.
170 | >
171 | > Creative Commons may be contacted at creativecommons.org.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction to Simulations in R
 2 | 
 3 | ## About this work
 4 | This tutorial was created by [Malika Ihle](https://www.osc.uni-muenchen.de/about_us/coordinator/index.html) based on materials from [Joel Pick](https://joelpick.github.io/), [Hadley Wickham](https://www.yumpu.com/en/document/view/19077330/simulation-hadley-wickham), and [Kevin Hallgren](https://doi.org/10.20982/tqmp.09.2.p043), with contributions from [James Smith](https://github.com/worcjamessmith).   
 5 | It is licensed under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | * Have R and RStudio installed. If you don't, follow [these instructions](https://lmu-osc.github.io/Introduction-RStudio-Git-GitHub/installing_software.html).  
10 | * Know some R basics (e.g. how to select a value in a data frame, how to create a vector). If you don't, visit the following tutorial: <a href="https://lmu-osc.github.io/introduction-to-R/" target ="_blank">https://lmu-osc.github.io/introduction-to-R/</a>.  
11 | 
12 | ## Prior to the session: optional preparation to get familiarised with the subject
13 | 1) Watch this [30-minute introduction to credible research](https://osf.io/xtmek/), which contextualises the importance of simulations for reliable research. 
14 | 
15 | 2) Read [Hallgren, A. K. (2013). Conducting simulation studies in the R programming environment. *Tutorials in Quantitative Methods for Psychology*, *9*(2), 43–60](https://doi.org/10.20982/tqmp.09.2.p043).
16 | 
17 | ## Self-paced workshop
18 | ### How it works
19 | The self-paced tutorial (pages linked below) will alternate presentation of concepts and simple exercises for you to try to apply them in R. Each time you see written **YOUR TURN**, switch to your local copy of the exercise script (you can choose between a file <a href="https://github.com/lmu-osc/Introduction-Simulations-in-R/blob/main/exercise_script_with_solutions.R" target ="_blank">with</a> or <a href="https://github.com/lmu-osc/Introduction-Simulations-in-R/blob/main/exercise_script_without_solutions.R" target ="_blank">without</a> the solutions depending on e.g. your level of familiarity with R), review the examples if needed, complete the exercise, and check out the proposed answer (which often contains additional tips). Come back to the online tutorial and after finishing one page, you can navigate to the next page linked at the bottom to continue. The exercise script contains code for all the exercises and code that generates the plots that appear in the online tutorial, all in order of appearance in the tutorial.  
20 | 
21 | It is necessary that you work through the sections of the tutorial in order. Please read the blurbs of each section below to get an overview of this workshop. Then click on the first page 'Download the material' and follow along by navigating to the next page linked at the bottom of each page! You can get back to this overview at any time by clicking on the title 'Introduction-Simulations-in-R' at the top of each page.
22 |  
23 | 
24 | ### Tutorial
25 | * [Download the material](./tutorial_pages/download-repo.qmd) – Get this tutorial onto your machine.
26 | * [Definition](./tutorial_pages/definition.qmd) – What are simulations?
27 | * [Purpose](./tutorial_pages/purpose.qmd) – What can we use simulations for?
28 | * [Basic principles](./tutorial_pages/basic-principles.qmd) – What do we need to create a simulation?
29 | * [Random number generators](./tutorial_pages/random-numbers-generators.qmd) – How to generate random numbers in R?
30 | * [Repeat](./tutorial_pages/repeat.qmd) – How to repeat the generation of random numbers multiple times?
31 | * [Setting the seed](./tutorial_pages/seed.qmd) – How can you generate the same random numbers?
32 | * [Sample size `n`](./tutorial_pages/sample-size-n.qmd) – How many values should you generate within a simulation?
33 | * [Number of repetitions `nrep`](./tutorial_pages/number-of-simulations-nrep.qmd) – How many repeats of a simulation should you run?
34 | * [DRY rule](./tutorial_pages/dry-rule.qmd) – How to write your own functions?
35 | * [Simulate to check alpha](./tutorial_pages/check-alpha.qmd) – Write your first simulation and check the rate of false-positive findings.  
36 | * [Simulate to check power](./tutorial_pages/check-power.qmd) – Simulate data to perform a power analysis.  
37 | * [Simulate to prepare a preregistration](./tutorial_pages/simulate-for-preregistration.qmd) – Simulate data to test statistical analyses before preregistering them.  
38 | * [General structure](./tutorial_pages/general-structure.qmd) – What is the general structure of a simulation?
39 | * [Limitations](./tutorial_pages/limitations.qmd) – What are the limitations of simulations?
40 | * [Real-life example](./tutorial_pages/real-life-example.qmd) – What are real-life examples of simulations?
41 | * [Additional resources](./tutorial_pages/resources.qmd) – What resources can help you write your own simulation?
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
 1 | project:
 2 |   type: website
 3 |   render:
 4 |     - "*.qmd"
 5 |     - "!Hallgreen2013/"
 6 |     - "!Ihle2020/"
 7 | 
 8 | website:
 9 |   title: "Introduction to Simulations in R"
10 |   page-footer:
11 |     center: "Copyright, 2024 Open Science Center at LMU Munich"
12 |     border: false
13 |   search:
14 |     location: sidebar
15 |   repo-url: https://github.com/lmu-osc/Introduction-Simulations-in-R
16 |   repo-actions: [edit, issue]
17 |   back-to-top-navigation: true
18 |   page-navigation: true
19 |   favicon: assets/LMU-OSC_favicon.jpg
20 |   margin-header: |
21 |     ![](/assets/LMU-OSC_logo.jpg){width="175"}
22 | 
23 |   sidebar:
24 |     style: docked
25 |     contents:
26 |       - text: "Home"
27 |         href: index.qmd
28 |       - section: "Tutorial"
29 |         contents:
30 |           - href: ./tutorial_pages/download-repo.qmd
31 |             text: Download the material
32 |           - href: ./tutorial_pages/definition.qmd
33 |             text: Definition
34 |           - href: ./tutorial_pages/purpose.qmd
35 |             text: Purpose
36 |           - href: ./tutorial_pages/basic-principles.qmd
37 |             text: Basic principles
38 |           - href: ./tutorial_pages/random-numbers-generators.qmd
39 |             text: Random number generators
40 |           - href: ./tutorial_pages/repeat.qmd
41 |             text: Repeat
42 |           - href: ./tutorial_pages/seed.qmd
43 |             text: Setting the seed
44 |           - href: ./tutorial_pages/sample-size-n.qmd
45 |             text: Sample size `n`
46 |           - href: ./tutorial_pages/number-of-simulations-nrep.qmd
47 |             text: Number of repetitions `nrep`
48 |           - href: ./tutorial_pages/dry-rule.qmd
49 |             text: DRY rule
50 |           - href: ./tutorial_pages/check-alpha.qmd
51 |             text: Simulate to check alpha
52 |           - href: ./tutorial_pages/check-power.qmd
53 |             text: Simulate to check power
54 |           - href: ./tutorial_pages/simulate-for-preregistration.qmd
55 |             text: Simulate to prepare a preregistration
56 |           - href: ./tutorial_pages/general-structure.qmd
57 |             text: General structure
58 |           - href: ./tutorial_pages/limitations.qmd
59 |             text: Limitations
60 |           - href: ./tutorial_pages/real-life-example.qmd
61 |             text: Real-life example
62 |           - href: ./tutorial_pages/resources.qmd
63 |             text: Additional resources
64 | 
65 | format:
66 |   html:
67 |     theme:
68 |       - cosmo
69 |       - custom.scss
70 |     css: styles.css
71 |     toc: true
72 |     include-in-header:
73 |       - file: matomo-analytics.html
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/assets/1000hist10N01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/1000hist10N01.png


--------------------------------------------------------------------------------
/assets/24hist1000N01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/24hist1000N01.png


--------------------------------------------------------------------------------
/assets/24hist10N01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/24hist10N01.png


--------------------------------------------------------------------------------
/assets/LMU-OSC_favicon.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/LMU-OSC_favicon.jpg


--------------------------------------------------------------------------------
/assets/LMU-OSC_logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/LMU-OSC_logo.jpg


--------------------------------------------------------------------------------
/assets/conclude.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/conclude.png


--------------------------------------------------------------------------------
/assets/define.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/define.png


--------------------------------------------------------------------------------
/assets/download.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/download.PNG


--------------------------------------------------------------------------------
/assets/existing-directory.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/existing-directory.PNG


--------------------------------------------------------------------------------
/assets/explore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/explore.png


--------------------------------------------------------------------------------
/assets/files-list.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/files-list.PNG


--------------------------------------------------------------------------------
/assets/find-directory.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/find-directory.PNG


--------------------------------------------------------------------------------
/assets/generate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/generate.png


--------------------------------------------------------------------------------
/assets/hist-power.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/hist-power.png


--------------------------------------------------------------------------------
/assets/hist10N01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/hist10N01.png


--------------------------------------------------------------------------------
/assets/logreg-results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/logreg-results.png


--------------------------------------------------------------------------------
/assets/musd-24-10-N01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/musd-24-10-N01.png


--------------------------------------------------------------------------------
/assets/musd-24-1000-N01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/musd-24-1000-N01.png


--------------------------------------------------------------------------------
/assets/replicate1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/replicate1.png


--------------------------------------------------------------------------------
/assets/replicate2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/replicate2.png


--------------------------------------------------------------------------------
/assets/replicate3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/replicate3.png


--------------------------------------------------------------------------------
/assets/simulated-data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/simulated-data.png


--------------------------------------------------------------------------------
/assets/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/test.png


--------------------------------------------------------------------------------
/assets/ttest-changing-n.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/ttest-changing-n.png


--------------------------------------------------------------------------------
/assets/ttest-changing-nrep.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmu-osc/Introduction-Simulations-in-R/b01aa7e5f44e7194e879b2abb5d10c4415c457ee/assets/ttest-changing-nrep.png


--------------------------------------------------------------------------------
/custom.scss:
--------------------------------------------------------------------------------
1 | /*-- scss:defaults --*/
2 | // Base document colors
3 | $navbar-bg: #009933;
4 | $link-color: #006426;
5 | $sidebar-hl: #006426;
6 | 


--------------------------------------------------------------------------------
/exercise_script_with_solutions.R:
--------------------------------------------------------------------------------
  1 | ###############################################################################################################
  2 | # This R script contains                                                                                      #
  3 | #  - the examples shown in the step-by-step workshop pages                                                    #
  4 | #    (for you to run line-by-line to observe the outcome, or to modify and play with)                         #
  5 | #  - space called 'YOUR TURN' for you to write your own code to answer the exercises from the workshop pages  #
  6 | #  - a possible solution to those exercises                                                                   #
  7 | ###############################################################################################################
  8 | 
  9 | #~~~~~~~~~ Random Numbers Generators and sampling theory -----
 10 | 
 11 | # sample 
 12 | 
 13 | ## x is a sequence
 14 | x <- 1:10
 15 | x
 16 | ?sample # default: replace = FALSE
 17 | sample(x)
 18 | sample(x, replace = TRUE)
 19 | sample(letters, size = 10)
 20 | sample(x, size = 100, replace = TRUE)
 21 | 
 22 | ## x is a vector of combined values
 23 | x <- c(1,5,8)
 24 | x
 25 | sample(x, size = 6, replace = TRUE)
 26 | 
 27 | 
 28 | 
 29 | 
 30 | # YOUR TURN: generate random numbers
 31 | ## Sample 100 values between 3 and 103 with replacement
 32 | 
 33 | 
 34 | 
 35 | 
 36 | 
 37 | 
 38 | 
 39 | ### possible solutions ###
 40 | x <- 3:103
 41 | sample(x, 100, replace = TRUE)
 42 | ##########################
 43 | 
 44 | 
 45 | # random number generator drawing from specific distributions
 46 | ?runif  # runif(n, min, max) 
 47 | ?rpois  # rpois(n, lambda) 
 48 | ?rnorm  # rnorm(n, mean, sd) 
 49 | ?rbinom # rbinom(n, prob)	
 50 | 
 51 | 
 52 | 
 53 | # YOUR TURN: generate random numbers
 54 | ## Draw 100 values from a normal distribution with a mean of 0 and a sd of 1
 55 | 
 56 | 
 57 | 
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 
 64 | ### possible solutions ###
 65 | rnorm(n = 100, mean = 0, sd = 1)
 66 | rnorm(100) # if you sample from a normal distribution with a mean of 0 and a sd of 1, you do not need to provide them, they are the defaults
 67 | rnorm(100,0,1) # you do not need to label the arguments if you provide them in their default order, but you should provide names if they overwrite default values
 68 | ##########################
 69 | 
 70 | ## Draw 50 values from a normal distribution with a mean of 10 and sd of 5
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | 
 80 | ### possible solutions ###
 81 | rnorm(sd = 5, mean = 10, n = 50) # if you label your arguments you can put them in whatever order you want!
 82 | rnrom(50, mean = 10, sd = 5) # using the rules above: non-default arguments in order, followed by named arguments using names
 83 | ##########################
 84 | 
 85 | ## Draw 1000 values from a poisson distribution with a lambda of 50
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | ### possible solutions ###
 96 | rpois(n = 1000, lambda = 50)
 97 | rpois(1000,50)
 98 | ##########################
 99 | 
100 | ## Draw 30 values from a uniform distribution between 0 and 10
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | ### possible solutions ###
110 | runif(n = 30, min = 0, max = 10)
111 | runif(30, max = 10) # we can omit min = 0 because that's the default
112 | ##########################
113 | 
114 | 
115 | 
116 | # repeat 
117 | ?replicate # replicate(n, expression)
118 | rnorm(10)
119 | mean(rnorm(10))
120 | replicate(10,rnorm(10))
121 | replicate(10, mean(rnorm(100)))
122 | hist(replicate(10, mean(rnorm(100))))
123 | 
124 | # YOUR TURN: generate random numbers, repeat, and plot
125 | ## Replicate 1000 times the mean of 10 values drawn from a uniform distribution between 0 and 10  
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | ### possible solutions ###
136 | replicate(1000, mean(runif(10, max = 10)))
137 | hist(replicate(1000, mean(runif(10, max = 10))))
138 | ##########################
139 | 
140 | ## Replicate 100 times the mean of 50 values drawn from a normal distribution of mean 10 and standard deviation 5  
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | ### possible solutions ###
152 | replicate(100, mean(rnorm(50, mean = 10, sd = 5)))
153 | hist(replicate(100, mean(rnorm(50, mean = 10, sd = 5))))
154 | ##########################
155 | 
156 | 
157 | # set seed
158 | hist(replicate(100, mean(rnorm(10))))
159 | hist(replicate(100, mean(rnorm(10))))
160 | hist(replicate(100, mean(rnorm(10))))
161 | 
162 | set.seed(10)
163 | hist(replicate(100, mean(rnorm(10))))
164 | 
165 | set.seed(10)
166 | hist(replicate(100, mean(rnorm(10))))
167 | 
168 | 
169 | # defining sample size within a replication (n) and the number of simulation/repeats/replication (nrep)
170 | ## single random sample of normal distribution N(0,1) with n = 10
171 | set.seed(10)
172 | x <- rnorm(10) # mean = 0, sd = 1 are the defaults
173 | hist(x, breaks = 10, col = "grey", xlim = c(-4,4))
174 | abline(v = 0, col = "red", lty = 2, lwd = 2)
175 | abline(v = mean(x), col = "blue", lwd = 2)
176 | par(xpd = TRUE) # turn off clipping of legend
177 | # where a function has a long list of arguments, we can put them on a new line each
178 | legend(
179 |   0.9, 
180 |   y = 1.5, 
181 |   legend = c("mean(x)", "0"), 
182 |   lty = c(1, 2), 
183 |   col = c("blue","red")
184 | )
185 | 
186 | ## 24 sims of same distribution N(0,1) with n = 10
187 | set.seed(10)
188 | x24 <- replicate(24, rnorm(10))
189 | par(mfrow = c(3,8), mar = c(0,0,0,0))
190 | # apply is complicated because it takes a function as one of its arguments
191 | x24Plot <- apply(
192 |   x24,
193 |   2,
194 |   function(x) {
195 |     # for fairly simple functions with lots of arguments, we sometimes just cram them all on the same line.
196 |     # it's not great practice but it stops the script getting super long when dealing with graphical objects
197 |     hist(x, col = "grey", xlim = c(-5,5), ylim = c(0,7), breaks = c(-5:5),
198 |          main = "", ylab = "", xlab = "", xaxt = "n", yaxt = "n")
199 |     abline(v = mean(x), col = "blue", lwd = 2)
200 |     abline(v = 0, col = "red", lty = 2, lwd = 2)
201 |   }
202 | )
203 | 
204 | ## distribution of means and sd from 24 sims N(0,1) with n = 10
205 | par(mfrow = c(1,2), mar = c(5,5,1,1))
206 | hist(apply(x24, 2, mean), main = "Mean", col = "grey", xlim = c(-1,1))
207 | abline(v = 0, col = "red", lty = 2, lwd = 2)
208 | hist(apply(x24, 2, sd), main = "SD",col = "grey", xlim = c(0.6,1.4))
209 | abline(v = 1, col = "red", lty = 2, lwd = 2)
210 | 
211 | ## 24 sims of same distribution N(0,1) with n = 1000
212 | set.seed(10)
213 | x24b <- replicate(24, rnorm(1000))
214 | par(mfrow = c(3,8), mar = c(0,0,0,0))
215 | x24bPlot <- apply(
216 |   x24b,
217 |   2,
218 |   function(x){
219 |     hist(x, col = "grey", xlim = c(-5,5), ylim = c(0,500), breaks = c(-5:5),
220 |          main = "", ylab = "", xlab = "", xaxt = "n", yaxt = "n")
221 |     abline(v = mean(x), col = "blue", lwd = 2)
222 |     abline(v = 0, col = "red", lty = 2, lwd = 2)
223 |   }
224 | )
225 | 
226 | ## distribution of means and SDs from 24 sims N(0,1) with n = 1000
227 | par(mfrow = c(1,2), mar = c(5,5,1,1))
228 | hist(apply(x24b, 2, mean), main = "Mean",col = "grey", xlim = c(-1,1))
229 | abline(v = 0, col = "red", lty = 2, lwd = 2)
230 | hist(apply(x24b, 2, sd), main = "SD",col = "grey", xlim = c(0.6,1.4))
231 | abline(v = 1, col = "red", lty = 2, lwd = 2)
232 | 
233 | ## distribution of means and SDs from 1000 sims N(0,1) with n = 10
234 | set.seed(10)
235 | x1000 <- replicate(1000, rnorm(10))
236 | par(mfrow = c(1,2), mar = c(5,5,1,1))
237 | hist(apply(x1000, 2, mean), main = "Mean",col = "grey")
238 | abline(v = 0, col = "red", lty = 2, lwd = 2)
239 | hist(apply(x1000, 2, sd), main = "SD",col = "grey")
240 | abline(v = 1, col = "red", lty = 2, lwd = 2)
241 | 
242 | 
243 | #~~~~~~~~~ Functions -----
244 | 
245 | # writing a function
246 | ## function syntax:
247 | ## AwesomeFunctionName <- function(argument1, argument2,…){
248 | ##                                                         do stuff here
249 | ##                                                        }
250 | ## The last thing that appears in the 'do stuff here' section is the function's 
251 | ## "return value"
252 | 
253 | # YOUR TURN: write a function that takes input "nrep", replicates '(mean(rnorm(100)))'
254 | # nrep times, and draws a histogram of the results
255 | 
256 | 
257 | 
258 | 
259 | 
260 | 
261 | 
262 | 
263 | 
264 | 
265 | 
266 | 
267 | ### possible solutions ###
268 | #### step 1: the action
269 | mean(rnorm(100))
270 | #### step 2: replicate the action 1000 times
271 | replicate(1000, mean(rnorm(100)))
272 | #### step 3: plot the outcome of those simulations
273 | hist(replicate(1000, mean(rnorm(100))))
274 | #### step 4: replicate the action nrep time, with nrep defined outside the function
275 | nrep <- 1000
276 | replicate(nrep, mean(rnorm(100)))
277 | #### step 5: wrap it in a function:
278 | histrnorm100 <- function(nrep){
279 |   hist(replicate(nrep, mean(rnorm(100))))
280 | }
281 | #### step 6: check that the function works
282 | histrnorm100(9)
283 | histrnorm100(1000)
284 | ##########################
285 | 
286 | # YOUR TURN: modify your function
287 | ## to draw a histogram of nrep mean(rnorm(n)), where n is another input
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 
301 | 
302 | 
303 | 
304 | ### possible solutions ###
305 | #### step 4: define parameters outside the function
306 | nrep <- 100
307 | n <- 10
308 | replicate(nrep, mean(rnorm(n)))
309 | #### step 5: wrap the action in a function:
310 | histrnorm_n <- function(nrep, n){
311 |   hist(replicate(nrep, mean(rnorm(n))))
312 | }
313 | #### step 6: check that the function works
314 | histrnorm_n(10,10)
315 | histrnorm_n(10,100)
316 | histrnorm_n(100,100)
317 | histrnorm_n(1000,100)
318 | ##########################
319 | 
320 | 
321 | #~~~~~~~~~ Simulating no effect and check alpha -----
322 | 
323 | # YOUR TURN: draw from the same normal distribution twice 
324 | ## and see if the sample differ from each other
325 | ## will they differ significantly in 5% of the nrep?
326 | ### Figure out how to do a t.test in R  
327 | ### Generate two vectors of 10 values drawn from N(0,1) and compare them with a t test  
328 | ### Figure out how to extract the p-value from that object (HINT use `str` or `names`)
329 | ### Write a function simT that generates two vectors of n random normals, compare them with a t test and return the p-value  
330 | ### Repeat with nrep = 20 and draw a histogram for n = 10
331 | ### Repeat with nrep = 100 and draw a histogram for n = 10    
332 | 
333 | 
334 | 
335 | 
336 | 
337 | 
338 | 
339 | 
340 | 
341 | 
342 | 
343 | 
344 | 
345 | 
346 | 
347 | 
348 | ### possible solutions ###
349 | #### Figure out how to do a t.test in R
350 | ?t.test
351 | #### Generate two vectors of 10  N(0,1) 
352 | x1 <- rnorm(10,0,1)
353 | x2 <- rnorm(10,0,1)
354 | #### Compare them with a t test
355 | t.test(x1,x2)
356 | #### extract p value
357 | str(t.test(x1,x2))
358 | t.test(x1,x2)$p.value
359 | #### write function 
360 | simT <- function(n){
361 |   x1 <- rnorm(n,0,1)
362 |   x2 <- rnorm(n,0,1)
363 |   t.test(x1, x2)$p.value 
364 | }
365 | #### test function
366 | simT(50)
367 | 
368 | #### repeat function for n = 10 and for different nrep and plot
369 | par(mfrow = c(1,2))
370 | simTRep <- replicate(20, simT(10))
371 | hist(simTRep, breaks = 21, col = c('red',rep('grey',20)), 
372 |      main = "nrep = 20, n = 10", xlab = "pvalue")
373 | 
374 | simTRep2 <- replicate(100, simT(10))
375 | hist(simTRep2, breaks = 21, col = c('red',rep('grey',20)), 
376 |      main = "nrep = 100, n = 10", xlab = "pvalue")
377 | ##########################
378 | 
379 | #### repeat function for nrep = 1000 and various n
380 | par(mfrow = c(1,2))
381 | simTRep <- replicate(1000, simT(10))
382 | hist(simTRep, breaks = 21, col = c('red',rep('grey',20)), 
383 |      main = "nrep = 1000, n = 10", xlab = "pvalue")
384 | 
385 | simTRep2 <- replicate(1000, simT(100))
386 | hist(simTRep2, breaks = 21, col = c('red',rep('grey',20)),  
387 |      main = "nrep = 1000, n = 100", xlab = "pvalue")
388 | 
389 | 
390 | #~~~~~~~~~ Simulating an effect and check power -----
391 | 
392 | # we can calculate the power of a t.test for a given sample size using:
393 | power.t.test(n = NULL, delta = 0.5, sd = 1, sig.level = 0.05, power = 0.8)
394 | # the required sample size is 64 per group.
395 | 
396 | # YOUR TURN: Use your simulation skills to work out the power of a t-test for a given sample size through simulation.
397 | ## Write a function which:
398 | ### 1. Draws n values from a random normal distribution with mean 1, and another n values from a normal distribution with mean 2
399 | ### 2. Compares the means of these two samples with a t.test and extracts the p.value
400 | ## Then, use that function to replicate the function 1000 times using the parameters used in the power calculation above (that used the power.t.test function).
401 | ## Calculate the proportion of p-values that are <0.05
402 | 
403 | 
404 | 
405 | 
406 | 
407 | 
408 | 
409 | 
410 | 
411 | 
412 | 
413 | ### possible solution ###
414 | #### write new function 
415 | simT2 <- function(n, m1, m2) {
416 |   ##### n is sample size per group, m1 is mean of group 1, m2 is mean of group 2
417 |   x1 <- rnorm(n, m1)
418 |   x2 <- rnorm(n, m2)
419 |   t.test(x1, x2)$p.value
420 | }
421 | 
422 | ##### repeat the function 1000 times 
423 | ##### note that we are using a difference of 0.5 between means to match the "delta" 
424 | ##### used in the power calculation 
425 | set.seed(100)
426 | p <- replicate(1000, simT2(n = 64, m1 = 0, m2 = 0.5))
427 | 
428 | #### plot the results
429 | par(mfrow = c(1,1))
430 | hist(p, breaks = 21, col = c('red',rep('grey',20)), 
431 |      main = "nrep = 1000, n = 64, delta = 0.5", xlab = 'pvalue')
432 | 
433 | #### calculate the proportion "significant" 
434 | prop.table(table(p < 0.05))
435 | 
436 | ##########################
437 | 
438 | #### power is the probability that the test correctly rejects the null. Since we
439 | #### know the population paramaters (as we set them in our simulation), we know
440 | #### that there really is a difference, and the null should be rejected. The power
441 | #### is therefore the proportion of p.values <0.05
442 | 
443 | # compare that to calculating the "power" parameter using the function below with all the other parameters provided (including n)
444 | power.t.test(n = 64, delta = 0.5, sd = 1) # the results are similar
445 | 
446 | 
447 | #~~~~~~~~~ Simulating for a preregistration  -----
448 | 
449 | # YOUR TURN: 
450 | ## Try to make a dataset that looks like this, using the 
451 | ## functions `data.frame()`, `sample()`, and `rnorm()`    
452 | 
453 | # smoking_status lung_cancer sex      age
454 | # 1            Yes          No   M 12.67918
455 | # 2            Yes         Yes   F 23.71397
456 | # 3             No          No   M 28.87786
457 | # 4            Yes          No   F 28.99327
458 | # 5            Yes         Yes   F 30.41415
459 | # 6             No          No   M 44.60615
460 | 
461 | 
462 | 
463 | 
464 | 
465 | 
466 | 
467 | 
468 | 
469 | 
470 | 
471 | 
472 | #### possible solution ###
473 | set.seed(1234)
474 | N <- 10 # size of your dataset 
475 | yes_no <- c("Yes", "No") # some options for the outcomes in the data
476 | 
477 | # the dataframe() function let's us create a data frame
478 | # before the " = " is the column name. After " = " is the contents of the columm
479 | df <- data.frame(
480 |     smoking_status = sample(yes_no, size = N, replace = T),
481 |     lung_cancer = sample(yes_no, size = N, replace = T), 
482 |     sex = sample(c("M", "F"), size = N, replace = T),
483 |     age = rnorm(N, 30, sd = 10)
484 | )
485 | # run the parts after the " = " alone to make sure you understand what's going on
486 | sample(c("M", "F"), size = N, replace = T)
487 | 
488 | head(df)
489 | ##########################
490 | 
491 | # YOUR TURN:
492 | ## Run a logistic regression on the data with lung cancer as the outcome and
493 | ## adjusting for the other variables. 
494 | ## You could try something like:
495 | ## glm(lung_cancer ~ smoking_status, sex, age, family = binomial(link = "logit"), data = df)
496 | ## Why doesn't it work? Try to trouble shoot and get the code to work!
497 | 
498 | ## HINT: are the variables the correct data type?
499 | ## HINT: once the model works, use summary() to look at the results
500 | 
501 | 
502 | 
503 | 
504 | 
505 | 
506 | 
507 | 
508 | 
509 | 
510 | 
511 | #### possible solution ###
512 | 
513 | # This mysterious error message appears because the categorical variables
514 | # need to be factors for the model to run. So we are already learning
515 | # something about how the data need to be that you might not have known
516 | # before trying to run the code on a simulated dataset.
517 | 
518 | # You can convert the relevant variables to factors
519 | df[1:3] <- lapply(df[1:3], as.factor)
520 | 
521 | # You could also do this one variable at a time:
522 | # df$smoking_status <- as.factor(df$smoking_status)
523 | 
524 | # Now we can rerun the model:
525 | 
526 | m1 <- glm(
527 |   lung_cancer ~ smoking_status + sex + age, 
528 |   family = binomial(link = "logit"), 
529 |   data = df
530 | )
531 | 
532 | # and look at the results
533 | summary(m1)
534 | ##########################
535 | 
536 | 
537 | 
538 | 
539 | 
540 | 
541 | 
542 | 
543 | 
544 | 
545 | 
546 | 
547 | 
548 | 
549 | 
550 | 


--------------------------------------------------------------------------------
/exercise_script_without_solutions.R:
--------------------------------------------------------------------------------
  1 | ###############################################################################################################
  2 | # This R script contains                                                                                      #
  3 | #  - the examples shown in the step-by-step workshop pages                                                    #
  4 | #    (for you to run line-by-line to observe the outcome, or to modify and play with)                         #
  5 | #  - space called 'YOUR TURN' for you to write your own code to answer the exercises from the workshop pages  #
  6 | ###############################################################################################################
  7 | 
  8 | #~~~~~~~~~ Random Numbers Generators and sampling theory -----
  9 | 
 10 | # sample 
 11 | 
 12 | ## x is a sequence
 13 | x <- 1:10
 14 | x
 15 | ?sample # default: replace = FALSE
 16 | sample(x)
 17 | sample(x, replace = TRUE)
 18 | sample(letters, size = 10)
 19 | sample(x, size = 100, replace = TRUE)
 20 | 
 21 | ## x is a vector of combined values
 22 | x <- c(1,5,8)
 23 | x
 24 | sample(x, size = 6, replace = TRUE)
 25 | 
 26 | 
 27 | # YOUR TURN: generate random numbers
 28 | ## Sample 100 values between 3 and 103 with replacement
 29 | 
 30 | 
 31 | 
 32 | # random number generator drawing from specific distributions
 33 | 
 34 | ?runif  # runif(n, min, max) 
 35 | ?rpois  # rpois(n, lambda) 
 36 | ?rnorm  # rnorm(n, mean, sd) 
 37 | ?rbinom # rbinom(n, prob)	
 38 | 
 39 | # YOUR TURN: generate random numbers
 40 | ## Draw 100 values from a normal distribution with a mean of 0 and a sd of 1
 41 | 
 42 | 
 43 | ## Draw 50 values from a normal distribution with a mean of 10 and sd of 5
 44 | 
 45 | 
 46 | ## Draw 1000 values from a poisson distribution with a lambda of 50
 47 | 
 48 | 
 49 | ## Draw 30 values from a uniform distribution between 0 and 10
 50 | 
 51 | 
 52 | 
 53 | 
 54 | # repeat 
 55 | 
 56 | ?replicate # replicate(n, expression)
 57 | rnorm(10)
 58 | mean(rnorm(10))
 59 | replicate(10,rnorm(10))
 60 | replicate(10, mean(rnorm(100)))
 61 | hist(replicate(10, mean(rnorm(100))))
 62 | 
 63 | # YOUR TURN: generate random numbers, repeat, and plot
 64 | ## Replicate 1000 times the mean of 10 values drawn from a uniform distribution between 0 and 10  
 65 | 
 66 | 
 67 | ## Replicate 100 times the mean of 50 values drawn from a normal distribution of mean 10 and standard deviation 5  
 68 | 
 69 | 
 70 | 
 71 | 
 72 | # set seed
 73 | 
 74 | hist(replicate(100, mean(rnorm(10))))
 75 | hist(replicate(100, mean(rnorm(10))))
 76 | hist(replicate(100, mean(rnorm(10))))
 77 | 
 78 | set.seed(10)
 79 | hist(replicate(100, mean(rnorm(10))))
 80 | 
 81 | set.seed(10)
 82 | hist(replicate(100, mean(rnorm(10))))
 83 | 
 84 | 
 85 | # defining sample size within a replication (n) and the number of simulation/repeats/replication (nrep)
 86 | ## single random sample of normal distribution N(0,1) with n = 10
 87 | set.seed(10)
 88 | x <- rnorm(10) # mean = 0, sd = 1 are the defaults
 89 | hist(x, breaks = 10, col = "grey", xlim = c(-4,4))
 90 | abline(v = 0, col = "red", lty = 2, lwd = 2)
 91 | abline(v = mean(x), col = "blue", lwd = 2)
 92 | par(xpd = TRUE) # turn off clipping of legend
 93 | # where a function has a long list of arguments, we can put them on a new line each
 94 | legend(
 95 |   0.9, 
 96 |   y = 1.5, 
 97 |   legend = c("mean(x)", "0"), 
 98 |   lty = c(1, 2), 
 99 |   col = c("blue","red")
100 | )
101 | 
102 | ## 24 sims of same distribution N(0,1) with n = 10
103 | set.seed(10)
104 | x24 <- replicate(24, rnorm(10))
105 | par(mfrow = c(3,8), mar = c(0,0,0,0))
106 | # apply is complicated because it takes a function as one of its arguments
107 | x24Plot <- apply(
108 |   x24,
109 |   2,
110 |   function(x) {
111 |     # for fairly simple functions with lots of arguments, we sometimes just cram them all on the same line.
112 |     # it's not great practice but it stops the script getting super long when dealing with graphical objects
113 |     hist(x, col = "grey", xlim = c(-5,5), ylim = c(0,7), breaks = c(-5:5),
114 |          main = "", ylab = "", xlab = "", xaxt = "n", yaxt = "n")
115 |     abline(v = mean(x), col = "blue", lwd = 2)
116 |     abline(v = 0, col = "red", lty = 2, lwd = 2)
117 |   }
118 | )
119 | 
120 | ## distribution of means and SDs from 24 sims N(0,1) with n = 10
121 | par(mfrow = c(1,2), mar = c(5,5,1,1))
122 | hist(apply(x24, 2, mean), main = "Mean", col = "grey", xlim = c(-1,1))
123 | abline(v = 0, col = "red", lty = 2, lwd = 2)
124 | hist(apply(x24, 2, sd), main = "SD",col = "grey", xlim = c(0.6,1.4))
125 | abline(v = 1, col = "red", lty = 2, lwd = 2)
126 | 
127 | ## 24 sims of same distribution N(0,1) with n = 1000
128 | set.seed(10)
129 | x24b <- replicate(24, rnorm(1000))
130 | par(mfrow = c(3,8), mar = c(0,0,0,0))
131 | x24bPlot <- apply(
132 |   x24b,
133 |   2,
134 |   function(x){
135 |     hist(x, col = "grey", xlim = c(-5,5), ylim = c(0,500), breaks = c(-5:5),
136 |          main = "", ylab = "", xlab = "", xaxt = "n", yaxt = "n")
137 |     abline(v = mean(x), col = "blue", lwd = 2)
138 |     abline(v = 0, col = "red", lty = 2, lwd = 2)
139 |   }
140 | )
141 | 
142 | ## distribution of means and SDs from 24 sims N(0,1) with n = 1000
143 | par(mfrow = c(1,2), mar = c(5,5,1,1))
144 | hist(apply(x24b, 2, mean), main = "Mean",col = "grey", xlim = c(-1,1))
145 | abline(v = 0, col = "red", lty = 2, lwd = 2)
146 | hist(apply(x24b, 2, sd), main = "SD",col = "grey", xlim = c(0.6,1.4))
147 | abline(v = 1, col = "red", lty = 2, lwd = 2)
148 | 
149 | ## distribution of means and SDs from 1000 sims N(0,1) with n = 10
150 | set.seed(10)
151 | x1000 <- replicate(1000, rnorm(10))
152 | par(mfrow = c(1,2), mar = c(5,5,1,1))
153 | hist(apply(x1000, 2, mean), main = "Mean",col = "grey")
154 | abline(v = 0, col = "red", lty = 2, lwd = 2)
155 | hist(apply(x1000, 2, sd), main = "SD",col = "grey")
156 | abline(v = 1, col = "red", lty = 2, lwd = 2)
157 | 
158 | 
159 | #~~~~~~~~~ Functions -----
160 | 
161 | # writing a function
162 | ## function syntax:
163 | ## AwesomeFunctionName <- function(argument1, argument2,…){
164 | ##                                                         do stuff here
165 | ##                                                        }
166 | ## The last thing that appears in the 'do stuff here' section is the function's 
167 | ## "return value"
168 | 
169 | # YOUR TURN: write a function that takes input "nrep", replicates '(mean(rnorm(100)))'
170 | # nrep times, and draws a histogram of the results
171 | 
172 | 
173 | 
174 | # YOUR TURN: modify your function
175 | ## to draw a histogram of nrep mean(rnorm(n)), where n is another input
176 | 
177 | 
178 | 
179 | 
180 | #~~~~~~~~~ Simulating no effect and check alpha -----
181 | 
182 | # YOUR TURN: draw from the same normal distribution twice 
183 | ## and see if the sample differ from each other
184 | ## will they differ significantly in 5% of the nrep?
185 | ### Figure out how to do a t.test in R  
186 | ### Generate two vectors of 10 values drawn from N(0,1) and compare them with a t test  
187 | ### Figure out how to extract the p-value from that object (HINT use `str` or `names`)
188 | ### Write a function simT that generates two vectors of n random normals, compare them with a t test and return the p-value  
189 | ### Repeat with nrep = 20 and draw a histogram for n = 10
190 | ### Repeat with nrep = 100 and draw a histogram for n = 10    
191 | 
192 | 
193 | 
194 | 
195 | 
196 | 
197 | ##########################
198 | 
199 | #### repeat function for nrep = 1000 and various n (e.g. 10 and 100)
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 
206 | 
207 | #~~~~~~~~~ Simulating an effect and check power -----
208 | 
209 | # we can calculate the power of a t.test for a given sample size using:
210 | power.t.test(n = NULL, delta = 0.5, sd = 1, sig.level = 0.05, power = 0.8)
211 | # the required sample size is 64 per group.
212 | 
213 | # YOUR TURN: Use your simulation skills to work out the power of a t-test for a given sample size through simulation.
214 | ## Write a function which:
215 | ### 1. Draws n values from a random normal distribution with mean 1, and another n values from a normal distribution with mean 2
216 | ### 2. Compares the means of these two samples with a t.test and extracts the p.value
217 | ## Then, use that function to replicate the function 1000 times using the parameters used in the power calculation above (that used the power.t.test function).
218 | ## Calculate the proportion of p-values that are <0.05
219 | 
220 | 
221 | 
222 | 
223 | #~~~~~~~~~ Simulating for a preregistration  -----
224 | 
225 | # YOUR TURN: 
226 | ## Try to make a dataset that looks like this, using the 
227 | ## functions `data.frame()`, `sample()`, and `rnorm()`    
228 | 
229 | # smoking_status lung_cancer sex      age
230 | # 1            Yes          No   M 12.67918
231 | # 2            Yes         Yes   F 23.71397
232 | # 3             No          No   M 28.87786
233 | # 4            Yes          No   F 28.99327
234 | # 5            Yes         Yes   F 30.41415
235 | # 6             No          No   M 44.60615
236 | 
237 | 
238 | 
239 | 
240 | 
241 | # YOUR TURN:
242 | ## Run a logistic regression on the data with lung cancer as the outcome and
243 | ## adjusting for the other variables. 
244 | ## You could try something like:
245 | ## glm(lung_cancer ~ smoking_status + sex + age, family = binomial(link = "logit"), data = df)
246 | ## Why doesn't it work? Try to trouble shoot and get the code to work!
247 | 
248 | ## HINT: are the variables the correct data type?
249 | ## HINT: once the model works, use summary() to look at the results
250 | 
251 | 
252 | 
253 | 


--------------------------------------------------------------------------------
/index.qmd:
--------------------------------------------------------------------------------
 1 | # Introduction to Simulations in R
 2 | 
 3 | ## About this work
 4 | This tutorial was created by [Malika Ihle](https://www.osc.uni-muenchen.de/about_us/coordinator/index.html) based on materials from [Joel Pick](https://joelpick.github.io/), [Hadley Wickham](https://www.yumpu.com/en/document/view/19077330/simulation-hadley-wickham), and [Kevin Hallgren](https://doi.org/10.20982/tqmp.09.2.p043), with contributions from [James Smith](https://github.com/worcjamessmith).   
 5 | It is licensed under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | * Have R and RStudio installed. If you don't, follow [these instructions](https://lmu-osc.github.io/Introduction-RStudio-Git-GitHub/installing_software.html).  
10 | * Know some R basics (e.g. how to select a value in a data frame, how to create a vector). If you don't, visit the following tutorial: <a href="https://lmu-osc.github.io/introduction-to-R/" target ="_blank">https://lmu-osc.github.io/introduction-to-R/</a>.  
11 | 
12 | ## Prior to the session: optional preparation to get familiarised with the subject
13 | 1) Watch this [30-minute introduction to credible research](https://osf.io/xtmek/), which contextualises the importance of simulations for reliable research. 
14 | 
15 | 2) Read [Hallgren, A. K. (2013). Conducting simulation studies in the R programming environment. *Tutorials in Quantitative Methods for Psychology*, *9*(2), 43–60](https://doi.org/10.20982/tqmp.09.2.p043).
16 | 
17 | ## Self-paced workshop
18 | ### How it works
19 | The self-paced tutorial (pages linked below) will alternate presentation of concepts and simple exercises for you to try to apply them in R. Each time you see written **YOUR TURN**, switch to your local copy of the exercise script (you can choose between a file <a href="https://github.com/lmu-osc/Introduction-Simulations-in-R/blob/main/exercise_script_with_solutions.R" target ="_blank">with</a> or <a href="https://github.com/lmu-osc/Introduction-Simulations-in-R/blob/main/exercise_script_without_solutions.R" target ="_blank">without</a> the solutions depending on e.g. your level of familiarity with R), review the examples if needed, complete the exercise, and check out the proposed answer (which often contains additional tips). Come back to the online tutorial and after finishing one page, you can navigate to the next page linked at the bottom to continue. The exercise script contains code for all the exercises and code that generates the plots that appear in the online tutorial, all in order of appearance in the tutorial.  
20 | 
21 | It is necessary that you work through the sections of the tutorial in order. Please read the blurbs of each section below to get an overview of this workshop. Then click on the first page 'Download the material' and follow along by navigating to the next page linked at the bottom of each page! You can get back to this overview at any time by clicking on the title 'Introduction-Simulations-in-R' at the top of each page.
22 |  
23 | 
24 | ### Tutorial
25 | * [Download the material](./tutorial_pages/download-repo.qmd) – Get this tutorial onto your machine.
26 | * [Definition](./tutorial_pages/definition.qmd) – What are simulations?
27 | * [Purpose](./tutorial_pages/purpose.qmd) – What can we use simulations for?
28 | * [Basic principles](./tutorial_pages/basic-principles.qmd) – What do we need to create a simulation?
29 | * [Random number generators](./tutorial_pages/random-numbers-generators.qmd) – How to generate random numbers in R?
30 | * [Repeat](./tutorial_pages/repeat.qmd) – How to repeat the generation of random numbers multiple times?
31 | * [Setting the seed](./tutorial_pages/seed.qmd) – How can you generate the same random numbers?
32 | * [Sample size `n`](./tutorial_pages/sample-size-n.qmd) – How many values should you generate within a simulation?
33 | * [Number of repetitions `nrep`](./tutorial_pages/number-of-simulations-nrep.qmd) – How many repeats of a simulation should you run?
34 | * [DRY rule](./tutorial_pages/dry-rule.qmd) – How to write your own functions?
35 | * [Simulate to check alpha](./tutorial_pages/check-alpha.qmd) – Write your first simulation and check the rate of false-positive findings.  
36 | * [Simulate to check power](./tutorial_pages/check-power.qmd) – Simulate data to perform a power analysis.  
37 | * [Simulate to prepare a preregistration](./tutorial_pages/simulate-for-preregistration.qmd) – Simulate data to test statistical analyses before preregistering them.  
38 | * [General structure](./tutorial_pages/general-structure.qmd) – What is the general structure of a simulation?
39 | * [Limitations](./tutorial_pages/limitations.qmd) – What are the limitations of simulations?
40 | * [Real-life example](./tutorial_pages/real-life-example.qmd) – What are real-life examples of simulations?
41 | * [Additional resources](./tutorial_pages/resources.qmd) – What resources can help you write your own simulation?
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/matomo-analytics.html:
--------------------------------------------------------------------------------
 1 | <!-- Matomo -->
 2 | <script>
 3 |   var _paq = window._paq = window._paq || [];
 4 |   /* tracker methods like "setCustomDimension" should be called before "trackPageView" */
 5 |   _paq.push(['trackPageView']);
 6 |   _paq.push(['enableLinkTracking']);
 7 |   (function() {
 8 |     var u="https://www.analytics.osc.lmu.de/";
 9 |     _paq.push(['setTrackerUrl', u+'matomo.php']);
10 |     _paq.push(['setSiteId', '9']);
11 |     var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
12 |     g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
13 |   })();
14 | </script>
15 | <!-- End Matomo Code -->
16 | 


--------------------------------------------------------------------------------
/renv.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "R": {
 3 |     "Version": "4.4.0",
 4 |     "Repositories": [
 5 |       {
 6 |         "Name": "CRAN",
 7 |         "URL": "https://packagemanager.posit.co/cran/latest"
 8 |       }
 9 |     ]
10 |   },
11 |   "Packages": {
12 |     "pbapply": {
13 |       "Package": "pbapply",
14 |       "Version": "1.7-2",
15 |       "Source": "Repository",
16 |       "Repository": "CRAN",
17 |       "Requirements": [
18 |         "R",
19 |         "parallel"
20 |       ],
21 |       "Hash": "68a2d681e10cf72f0afa1d84d45380e5"
22 |     },
23 |     "renv": {
24 |       "Package": "renv",
25 |       "Version": "1.0.7",
26 |       "Source": "Repository",
27 |       "Repository": "CRAN",
28 |       "Requirements": [
29 |         "utils"
30 |       ],
31 |       "Hash": "397b7b2a265bc5a7a06852524dabae20"
32 |     },
33 |     "yaml": {
34 |       "Package": "yaml",
35 |       "Version": "2.3.8",
36 |       "Source": "Repository"
37 |     }
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/renv/.gitignore:
--------------------------------------------------------------------------------
1 | library/
2 | local/
3 | cellar/
4 | lock/
5 | python/
6 | sandbox/
7 | staging/
8 | 


--------------------------------------------------------------------------------
/renv/activate.R:
--------------------------------------------------------------------------------
   1 | 
   2 | local({
   3 | 
   4 |   # the requested version of renv
   5 |   version <- "1.0.7"
   6 |   attr(version, "sha") <- NULL
   7 | 
   8 |   # the project directory
   9 |   project <- Sys.getenv("RENV_PROJECT")
  10 |   if (!nzchar(project))
  11 |     project <- getwd()
  12 | 
  13 |   # use start-up diagnostics if enabled
  14 |   diagnostics <- Sys.getenv("RENV_STARTUP_DIAGNOSTICS", unset = "FALSE")
  15 |   if (diagnostics) {
  16 |     start <- Sys.time()
  17 |     profile <- tempfile("renv-startup-", fileext = ".Rprof")
  18 |     utils::Rprof(profile)
  19 |     on.exit({
  20 |       utils::Rprof(NULL)
  21 |       elapsed <- signif(difftime(Sys.time(), start, units = "auto"), digits = 2L)
  22 |       writeLines(sprintf("- renv took %s to run the autoloader.", format(elapsed)))
  23 |       writeLines(sprintf("- Profile: %s", profile))
  24 |       print(utils::summaryRprof(profile))
  25 |     }, add = TRUE)
  26 |   }
  27 | 
  28 |   # figure out whether the autoloader is enabled
  29 |   enabled <- local({
  30 | 
  31 |     # first, check config option
  32 |     override <- getOption("renv.config.autoloader.enabled")
  33 |     if (!is.null(override))
  34 |       return(override)
  35 | 
  36 |     # if we're being run in a context where R_LIBS is already set,
  37 |     # don't load -- presumably we're being run as a sub-process and
  38 |     # the parent process has already set up library paths for us
  39 |     rcmd <- Sys.getenv("R_CMD", unset = NA)
  40 |     rlibs <- Sys.getenv("R_LIBS", unset = NA)
  41 |     if (!is.na(rlibs) && !is.na(rcmd))
  42 |       return(FALSE)
  43 | 
  44 |     # next, check environment variables
  45 |     # TODO: prefer using the configuration one in the future
  46 |     envvars <- c(
  47 |       "RENV_CONFIG_AUTOLOADER_ENABLED",
  48 |       "RENV_AUTOLOADER_ENABLED",
  49 |       "RENV_ACTIVATE_PROJECT"
  50 |     )
  51 | 
  52 |     for (envvar in envvars) {
  53 |       envval <- Sys.getenv(envvar, unset = NA)
  54 |       if (!is.na(envval))
  55 |         return(tolower(envval) %in% c("true", "t", "1"))
  56 |     }
  57 | 
  58 |     # enable by default
  59 |     TRUE
  60 | 
  61 |   })
  62 | 
  63 |   # bail if we're not enabled
  64 |   if (!enabled) {
  65 | 
  66 |     # if we're not enabled, we might still need to manually load
  67 |     # the user profile here
  68 |     profile <- Sys.getenv("R_PROFILE_USER", unset = "~/.Rprofile")
  69 |     if (file.exists(profile)) {
  70 |       cfg <- Sys.getenv("RENV_CONFIG_USER_PROFILE", unset = "TRUE")
  71 |       if (tolower(cfg) %in% c("true", "t", "1"))
  72 |         sys.source(profile, envir = globalenv())
  73 |     }
  74 | 
  75 |     return(FALSE)
  76 | 
  77 |   }
  78 | 
  79 |   # avoid recursion
  80 |   if (identical(getOption("renv.autoloader.running"), TRUE)) {
  81 |     warning("ignoring recursive attempt to run renv autoloader")
  82 |     return(invisible(TRUE))
  83 |   }
  84 | 
  85 |   # signal that we're loading renv during R startup
  86 |   options(renv.autoloader.running = TRUE)
  87 |   on.exit(options(renv.autoloader.running = NULL), add = TRUE)
  88 | 
  89 |   # signal that we've consented to use renv
  90 |   options(renv.consent = TRUE)
  91 | 
  92 |   # load the 'utils' package eagerly -- this ensures that renv shims, which
  93 |   # mask 'utils' packages, will come first on the search path
  94 |   library(utils, lib.loc = .Library)
  95 | 
  96 |   # unload renv if it's already been loaded
  97 |   if ("renv" %in% loadedNamespaces())
  98 |     unloadNamespace("renv")
  99 | 
 100 |   # load bootstrap tools   
 101 |   `%||%` <- function(x, y) {
 102 |     if (is.null(x)) y else x
 103 |   }
 104 |   
 105 |   catf <- function(fmt, ..., appendLF = TRUE) {
 106 |   
 107 |     quiet <- getOption("renv.bootstrap.quiet", default = FALSE)
 108 |     if (quiet)
 109 |       return(invisible())
 110 |   
 111 |     msg <- sprintf(fmt, ...)
 112 |     cat(msg, file = stdout(), sep = if (appendLF) "\n" else "")
 113 |   
 114 |     invisible(msg)
 115 |   
 116 |   }
 117 |   
 118 |   header <- function(label,
 119 |                      ...,
 120 |                      prefix = "#",
 121 |                      suffix = "-",
 122 |                      n = min(getOption("width"), 78))
 123 |   {
 124 |     label <- sprintf(label, ...)
 125 |     n <- max(n - nchar(label) - nchar(prefix) - 2L, 8L)
 126 |     if (n <= 0)
 127 |       return(paste(prefix, label))
 128 |   
 129 |     tail <- paste(rep.int(suffix, n), collapse = "")
 130 |     paste0(prefix, " ", label, " ", tail)
 131 |   
 132 |   }
 133 |   
 134 |   heredoc <- function(text, leave = 0) {
 135 |   
 136 |     # remove leading, trailing whitespace
 137 |     trimmed <- gsub("^\\s*\\n|\\n\\s*$", "", text)
 138 |   
 139 |     # split into lines
 140 |     lines <- strsplit(trimmed, "\n", fixed = TRUE)[[1L]]
 141 |   
 142 |     # compute common indent
 143 |     indent <- regexpr("[^[:space:]]", lines)
 144 |     common <- min(setdiff(indent, -1L)) - leave
 145 |     paste(substring(lines, common), collapse = "\n")
 146 |   
 147 |   }
 148 |   
 149 |   startswith <- function(string, prefix) {
 150 |     substring(string, 1, nchar(prefix)) == prefix
 151 |   }
 152 |   
 153 |   bootstrap <- function(version, library) {
 154 |   
 155 |     friendly <- renv_bootstrap_version_friendly(version)
 156 |     section <- header(sprintf("Bootstrapping renv %s", friendly))
 157 |     catf(section)
 158 |   
 159 |     # attempt to download renv
 160 |     catf("- Downloading renv ... ", appendLF = FALSE)
 161 |     withCallingHandlers(
 162 |       tarball <- renv_bootstrap_download(version),
 163 |       error = function(err) {
 164 |         catf("FAILED")
 165 |         stop("failed to download:\n", conditionMessage(err))
 166 |       }
 167 |     )
 168 |     catf("OK")
 169 |     on.exit(unlink(tarball), add = TRUE)
 170 |   
 171 |     # now attempt to install
 172 |     catf("- Installing renv  ... ", appendLF = FALSE)
 173 |     withCallingHandlers(
 174 |       status <- renv_bootstrap_install(version, tarball, library),
 175 |       error = function(err) {
 176 |         catf("FAILED")
 177 |         stop("failed to install:\n", conditionMessage(err))
 178 |       }
 179 |     )
 180 |     catf("OK")
 181 |   
 182 |     # add empty line to break up bootstrapping from normal output
 183 |     catf("")
 184 |   
 185 |     return(invisible())
 186 |   }
 187 |   
 188 |   renv_bootstrap_tests_running <- function() {
 189 |     getOption("renv.tests.running", default = FALSE)
 190 |   }
 191 |   
 192 |   renv_bootstrap_repos <- function() {
 193 |   
 194 |     # get CRAN repository
 195 |     cran <- getOption("renv.repos.cran", "https://cloud.r-project.org")
 196 |   
 197 |     # check for repos override
 198 |     repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA)
 199 |     if (!is.na(repos)) {
 200 |   
 201 |       # check for RSPM; if set, use a fallback repository for renv
 202 |       rspm <- Sys.getenv("RSPM", unset = NA)
 203 |       if (identical(rspm, repos))
 204 |         repos <- c(RSPM = rspm, CRAN = cran)
 205 |   
 206 |       return(repos)
 207 |   
 208 |     }
 209 |   
 210 |     # check for lockfile repositories
 211 |     repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity)
 212 |     if (!inherits(repos, "error") && length(repos))
 213 |       return(repos)
 214 |   
 215 |     # retrieve current repos
 216 |     repos <- getOption("repos")
 217 |   
 218 |     # ensure @CRAN@ entries are resolved
 219 |     repos[repos == "@CRAN@"] <- cran
 220 |   
 221 |     # add in renv.bootstrap.repos if set
 222 |     default <- c(FALLBACK = "https://cloud.r-project.org")
 223 |     extra <- getOption("renv.bootstrap.repos", default = default)
 224 |     repos <- c(repos, extra)
 225 |   
 226 |     # remove duplicates that might've snuck in
 227 |     dupes <- duplicated(repos) | duplicated(names(repos))
 228 |     repos[!dupes]
 229 |   
 230 |   }
 231 |   
 232 |   renv_bootstrap_repos_lockfile <- function() {
 233 |   
 234 |     lockpath <- Sys.getenv("RENV_PATHS_LOCKFILE", unset = "renv.lock")
 235 |     if (!file.exists(lockpath))
 236 |       return(NULL)
 237 |   
 238 |     lockfile <- tryCatch(renv_json_read(lockpath), error = identity)
 239 |     if (inherits(lockfile, "error")) {
 240 |       warning(lockfile)
 241 |       return(NULL)
 242 |     }
 243 |   
 244 |     repos <- lockfile$R$Repositories
 245 |     if (length(repos) == 0)
 246 |       return(NULL)
 247 |   
 248 |     keys <- vapply(repos, `[[`, "Name", FUN.VALUE = character(1))
 249 |     vals <- vapply(repos, `[[`, "URL", FUN.VALUE = character(1))
 250 |     names(vals) <- keys
 251 |   
 252 |     return(vals)
 253 |   
 254 |   }
 255 |   
 256 |   renv_bootstrap_download <- function(version) {
 257 |   
 258 |     sha <- attr(version, "sha", exact = TRUE)
 259 |   
 260 |     methods <- if (!is.null(sha)) {
 261 |   
 262 |       # attempting to bootstrap a development version of renv
 263 |       c(
 264 |         function() renv_bootstrap_download_tarball(sha),
 265 |         function() renv_bootstrap_download_github(sha)
 266 |       )
 267 |   
 268 |     } else {
 269 |   
 270 |       # attempting to bootstrap a release version of renv
 271 |       c(
 272 |         function() renv_bootstrap_download_tarball(version),
 273 |         function() renv_bootstrap_download_cran_latest(version),
 274 |         function() renv_bootstrap_download_cran_archive(version)
 275 |       )
 276 |   
 277 |     }
 278 |   
 279 |     for (method in methods) {
 280 |       path <- tryCatch(method(), error = identity)
 281 |       if (is.character(path) && file.exists(path))
 282 |         return(path)
 283 |     }
 284 |   
 285 |     stop("All download methods failed")
 286 |   
 287 |   }
 288 |   
 289 |   renv_bootstrap_download_impl <- function(url, destfile) {
 290 |   
 291 |     mode <- "wb"
 292 |   
 293 |     # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715
 294 |     fixup <-
 295 |       Sys.info()[["sysname"]] == "Windows" &&
 296 |       substring(url, 1L, 5L) == "file:"
 297 |   
 298 |     if (fixup)
 299 |       mode <- "w+b"
 300 |   
 301 |     args <- list(
 302 |       url      = url,
 303 |       destfile = destfile,
 304 |       mode     = mode,
 305 |       quiet    = TRUE
 306 |     )
 307 |   
 308 |     if ("headers" %in% names(formals(utils::download.file)))
 309 |       args$headers <- renv_bootstrap_download_custom_headers(url)
 310 |   
 311 |     do.call(utils::download.file, args)
 312 |   
 313 |   }
 314 |   
 315 |   renv_bootstrap_download_custom_headers <- function(url) {
 316 |   
 317 |     headers <- getOption("renv.download.headers")
 318 |     if (is.null(headers))
 319 |       return(character())
 320 |   
 321 |     if (!is.function(headers))
 322 |       stopf("'renv.download.headers' is not a function")
 323 |   
 324 |     headers <- headers(url)
 325 |     if (length(headers) == 0L)
 326 |       return(character())
 327 |   
 328 |     if (is.list(headers))
 329 |       headers <- unlist(headers, recursive = FALSE, use.names = TRUE)
 330 |   
 331 |     ok <-
 332 |       is.character(headers) &&
 333 |       is.character(names(headers)) &&
 334 |       all(nzchar(names(headers)))
 335 |   
 336 |     if (!ok)
 337 |       stop("invocation of 'renv.download.headers' did not return a named character vector")
 338 |   
 339 |     headers
 340 |   
 341 |   }
 342 |   
 343 |   renv_bootstrap_download_cran_latest <- function(version) {
 344 |   
 345 |     spec <- renv_bootstrap_download_cran_latest_find(version)
 346 |     type  <- spec$type
 347 |     repos <- spec$repos
 348 |   
 349 |     baseurl <- utils::contrib.url(repos = repos, type = type)
 350 |     ext <- if (identical(type, "source"))
 351 |       ".tar.gz"
 352 |     else if (Sys.info()[["sysname"]] == "Windows")
 353 |       ".zip"
 354 |     else
 355 |       ".tgz"
 356 |     name <- sprintf("renv_%s%s", version, ext)
 357 |     url <- paste(baseurl, name, sep = "/")
 358 |   
 359 |     destfile <- file.path(tempdir(), name)
 360 |     status <- tryCatch(
 361 |       renv_bootstrap_download_impl(url, destfile),
 362 |       condition = identity
 363 |     )
 364 |   
 365 |     if (inherits(status, "condition"))
 366 |       return(FALSE)
 367 |   
 368 |     # report success and return
 369 |     destfile
 370 |   
 371 |   }
 372 |   
 373 |   renv_bootstrap_download_cran_latest_find <- function(version) {
 374 |   
 375 |     # check whether binaries are supported on this system
 376 |     binary <-
 377 |       getOption("renv.bootstrap.binary", default = TRUE) &&
 378 |       !identical(.Platform$pkgType, "source") &&
 379 |       !identical(getOption("pkgType"), "source") &&
 380 |       Sys.info()[["sysname"]] %in% c("Darwin", "Windows")
 381 |   
 382 |     types <- c(if (binary) "binary", "source")
 383 |   
 384 |     # iterate over types + repositories
 385 |     for (type in types) {
 386 |       for (repos in renv_bootstrap_repos()) {
 387 |   
 388 |         # retrieve package database
 389 |         db <- tryCatch(
 390 |           as.data.frame(
 391 |             utils::available.packages(type = type, repos = repos),
 392 |             stringsAsFactors = FALSE
 393 |           ),
 394 |           error = identity
 395 |         )
 396 |   
 397 |         if (inherits(db, "error"))
 398 |           next
 399 |   
 400 |         # check for compatible entry
 401 |         entry <- db[db$Package %in% "renv" & db$Version %in% version, ]
 402 |         if (nrow(entry) == 0)
 403 |           next
 404 |   
 405 |         # found it; return spec to caller
 406 |         spec <- list(entry = entry, type = type, repos = repos)
 407 |         return(spec)
 408 |   
 409 |       }
 410 |     }
 411 |   
 412 |     # if we got here, we failed to find renv
 413 |     fmt <- "renv %s is not available from your declared package repositories"
 414 |     stop(sprintf(fmt, version))
 415 |   
 416 |   }
 417 |   
 418 |   renv_bootstrap_download_cran_archive <- function(version) {
 419 |   
 420 |     name <- sprintf("renv_%s.tar.gz", version)
 421 |     repos <- renv_bootstrap_repos()
 422 |     urls <- file.path(repos, "src/contrib/Archive/renv", name)
 423 |     destfile <- file.path(tempdir(), name)
 424 |   
 425 |     for (url in urls) {
 426 |   
 427 |       status <- tryCatch(
 428 |         renv_bootstrap_download_impl(url, destfile),
 429 |         condition = identity
 430 |       )
 431 |   
 432 |       if (identical(status, 0L))
 433 |         return(destfile)
 434 |   
 435 |     }
 436 |   
 437 |     return(FALSE)
 438 |   
 439 |   }
 440 |   
 441 |   renv_bootstrap_download_tarball <- function(version) {
 442 |   
 443 |     # if the user has provided the path to a tarball via
 444 |     # an environment variable, then use it
 445 |     tarball <- Sys.getenv("RENV_BOOTSTRAP_TARBALL", unset = NA)
 446 |     if (is.na(tarball))
 447 |       return()
 448 |   
 449 |     # allow directories
 450 |     if (dir.exists(tarball)) {
 451 |       name <- sprintf("renv_%s.tar.gz", version)
 452 |       tarball <- file.path(tarball, name)
 453 |     }
 454 |   
 455 |     # bail if it doesn't exist
 456 |     if (!file.exists(tarball)) {
 457 |   
 458 |       # let the user know we weren't able to honour their request
 459 |       fmt <- "- RENV_BOOTSTRAP_TARBALL is set (%s) but does not exist."
 460 |       msg <- sprintf(fmt, tarball)
 461 |       warning(msg)
 462 |   
 463 |       # bail
 464 |       return()
 465 |   
 466 |     }
 467 |   
 468 |     catf("- Using local tarball '%s'.", tarball)
 469 |     tarball
 470 |   
 471 |   }
 472 |   
 473 |   renv_bootstrap_download_github <- function(version) {
 474 |   
 475 |     enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE")
 476 |     if (!identical(enabled, "TRUE"))
 477 |       return(FALSE)
 478 |   
 479 |     # prepare download options
 480 |     pat <- Sys.getenv("GITHUB_PAT")
 481 |     if (nzchar(Sys.which("curl")) && nzchar(pat)) {
 482 |       fmt <- "--location --fail --header \"Authorization: token %s\""
 483 |       extra <- sprintf(fmt, pat)
 484 |       saved <- options("download.file.method", "download.file.extra")
 485 |       options(download.file.method = "curl", download.file.extra = extra)
 486 |       on.exit(do.call(base::options, saved), add = TRUE)
 487 |     } else if (nzchar(Sys.which("wget")) && nzchar(pat)) {
 488 |       fmt <- "--header=\"Authorization: token %s\""
 489 |       extra <- sprintf(fmt, pat)
 490 |       saved <- options("download.file.method", "download.file.extra")
 491 |       options(download.file.method = "wget", download.file.extra = extra)
 492 |       on.exit(do.call(base::options, saved), add = TRUE)
 493 |     }
 494 |   
 495 |     url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version)
 496 |     name <- sprintf("renv_%s.tar.gz", version)
 497 |     destfile <- file.path(tempdir(), name)
 498 |   
 499 |     status <- tryCatch(
 500 |       renv_bootstrap_download_impl(url, destfile),
 501 |       condition = identity
 502 |     )
 503 |   
 504 |     if (!identical(status, 0L))
 505 |       return(FALSE)
 506 |   
 507 |     renv_bootstrap_download_augment(destfile)
 508 |   
 509 |     return(destfile)
 510 |   
 511 |   }
 512 |   
 513 |   # Add Sha to DESCRIPTION. This is stop gap until #890, after which we
 514 |   # can use renv::install() to fully capture metadata.
 515 |   renv_bootstrap_download_augment <- function(destfile) {
 516 |     sha <- renv_bootstrap_git_extract_sha1_tar(destfile)
 517 |     if (is.null(sha)) {
 518 |       return()
 519 |     }
 520 |   
 521 |     # Untar
 522 |     tempdir <- tempfile("renv-github-")
 523 |     on.exit(unlink(tempdir, recursive = TRUE), add = TRUE)
 524 |     untar(destfile, exdir = tempdir)
 525 |     pkgdir <- dir(tempdir, full.names = TRUE)[[1]]
 526 |   
 527 |     # Modify description
 528 |     desc_path <- file.path(pkgdir, "DESCRIPTION")
 529 |     desc_lines <- readLines(desc_path)
 530 |     remotes_fields <- c(
 531 |       "RemoteType: github",
 532 |       "RemoteHost: api.github.com",
 533 |       "RemoteRepo: renv",
 534 |       "RemoteUsername: rstudio",
 535 |       "RemotePkgRef: rstudio/renv",
 536 |       paste("RemoteRef: ", sha),
 537 |       paste("RemoteSha: ", sha)
 538 |     )
 539 |     writeLines(c(desc_lines[desc_lines != ""], remotes_fields), con = desc_path)
 540 |   
 541 |     # Re-tar
 542 |     local({
 543 |       old <- setwd(tempdir)
 544 |       on.exit(setwd(old), add = TRUE)
 545 |   
 546 |       tar(destfile, compression = "gzip")
 547 |     })
 548 |     invisible()
 549 |   }
 550 |   
 551 |   # Extract the commit hash from a git archive. Git archives include the SHA1
 552 |   # hash as the comment field of the tarball pax extended header
 553 |   # (see https://www.kernel.org/pub/software/scm/git/docs/git-archive.html)
 554 |   # For GitHub archives this should be the first header after the default one
 555 |   # (512 byte) header.
 556 |   renv_bootstrap_git_extract_sha1_tar <- function(bundle) {
 557 |   
 558 |     # open the bundle for reading
 559 |     # We use gzcon for everything because (from ?gzcon)
 560 |     # > Reading from a connection which does not supply a 'gzip' magic
 561 |     # > header is equivalent to reading from the original connection
 562 |     conn <- gzcon(file(bundle, open = "rb", raw = TRUE))
 563 |     on.exit(close(conn))
 564 |   
 565 |     # The default pax header is 512 bytes long and the first pax extended header
 566 |     # with the comment should be 51 bytes long
 567 |     # `52 comment=` (11 chars) + 40 byte SHA1 hash
 568 |     len <- 0x200 + 0x33
 569 |     res <- rawToChar(readBin(conn, "raw", n = len)[0x201:len])
 570 |   
 571 |     if (grepl("^52 comment=", res)) {
 572 |       sub("52 comment=", "", res)
 573 |     } else {
 574 |       NULL
 575 |     }
 576 |   }
 577 |   
 578 |   renv_bootstrap_install <- function(version, tarball, library) {
 579 |   
 580 |     # attempt to install it into project library
 581 |     dir.create(library, showWarnings = FALSE, recursive = TRUE)
 582 |     output <- renv_bootstrap_install_impl(library, tarball)
 583 |   
 584 |     # check for successful install
 585 |     status <- attr(output, "status")
 586 |     if (is.null(status) || identical(status, 0L))
 587 |       return(status)
 588 |   
 589 |     # an error occurred; report it
 590 |     header <- "installation of renv failed"
 591 |     lines <- paste(rep.int("=", nchar(header)), collapse = "")
 592 |     text <- paste(c(header, lines, output), collapse = "\n")
 593 |     stop(text)
 594 |   
 595 |   }
 596 |   
 597 |   renv_bootstrap_install_impl <- function(library, tarball) {
 598 |   
 599 |     # invoke using system2 so we can capture and report output
 600 |     bin <- R.home("bin")
 601 |     exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R"
 602 |     R <- file.path(bin, exe)
 603 |   
 604 |     args <- c(
 605 |       "--vanilla", "CMD", "INSTALL", "--no-multiarch",
 606 |       "-l", shQuote(path.expand(library)),
 607 |       shQuote(path.expand(tarball))
 608 |     )
 609 |   
 610 |     system2(R, args, stdout = TRUE, stderr = TRUE)
 611 |   
 612 |   }
 613 |   
 614 |   renv_bootstrap_platform_prefix <- function() {
 615 |   
 616 |     # construct version prefix
 617 |     version <- paste(R.version$major, R.version$minor, sep = ".")
 618 |     prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-")
 619 |   
 620 |     # include SVN revision for development versions of R
 621 |     # (to avoid sharing platform-specific artefacts with released versions of R)
 622 |     devel <-
 623 |       identical(R.version[["status"]],   "Under development (unstable)") ||
 624 |       identical(R.version[["nickname"]], "Unsuffered Consequences")
 625 |   
 626 |     if (devel)
 627 |       prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r")
 628 |   
 629 |     # build list of path components
 630 |     components <- c(prefix, R.version$platform)
 631 |   
 632 |     # include prefix if provided by user
 633 |     prefix <- renv_bootstrap_platform_prefix_impl()
 634 |     if (!is.na(prefix) && nzchar(prefix))
 635 |       components <- c(prefix, components)
 636 |   
 637 |     # build prefix
 638 |     paste(components, collapse = "/")
 639 |   
 640 |   }
 641 |   
 642 |   renv_bootstrap_platform_prefix_impl <- function() {
 643 |   
 644 |     # if an explicit prefix has been supplied, use it
 645 |     prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA)
 646 |     if (!is.na(prefix))
 647 |       return(prefix)
 648 |   
 649 |     # if the user has requested an automatic prefix, generate it
 650 |     auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA)
 651 |     if (is.na(auto) && getRversion() >= "4.4.0")
 652 |       auto <- "TRUE"
 653 |   
 654 |     if (auto %in% c("TRUE", "True", "true", "1"))
 655 |       return(renv_bootstrap_platform_prefix_auto())
 656 |   
 657 |     # empty string on failure
 658 |     ""
 659 |   
 660 |   }
 661 |   
 662 |   renv_bootstrap_platform_prefix_auto <- function() {
 663 |   
 664 |     prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity)
 665 |     if (inherits(prefix, "error") || prefix %in% "unknown") {
 666 |   
 667 |       msg <- paste(
 668 |         "failed to infer current operating system",
 669 |         "please file a bug report at https://github.com/rstudio/renv/issues",
 670 |         sep = "; "
 671 |       )
 672 |   
 673 |       warning(msg)
 674 |   
 675 |     }
 676 |   
 677 |     prefix
 678 |   
 679 |   }
 680 |   
 681 |   renv_bootstrap_platform_os <- function() {
 682 |   
 683 |     sysinfo <- Sys.info()
 684 |     sysname <- sysinfo[["sysname"]]
 685 |   
 686 |     # handle Windows + macOS up front
 687 |     if (sysname == "Windows")
 688 |       return("windows")
 689 |     else if (sysname == "Darwin")
 690 |       return("macos")
 691 |   
 692 |     # check for os-release files
 693 |     for (file in c("/etc/os-release", "/usr/lib/os-release"))
 694 |       if (file.exists(file))
 695 |         return(renv_bootstrap_platform_os_via_os_release(file, sysinfo))
 696 |   
 697 |     # check for redhat-release files
 698 |     if (file.exists("/etc/redhat-release"))
 699 |       return(renv_bootstrap_platform_os_via_redhat_release())
 700 |   
 701 |     "unknown"
 702 |   
 703 |   }
 704 |   
 705 |   renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) {
 706 |   
 707 |     # read /etc/os-release
 708 |     release <- utils::read.table(
 709 |       file             = file,
 710 |       sep              = "=",
 711 |       quote            = c("\"", "'"),
 712 |       col.names        = c("Key", "Value"),
 713 |       comment.char     = "#",
 714 |       stringsAsFactors = FALSE
 715 |     )
 716 |   
 717 |     vars <- as.list(release$Value)
 718 |     names(vars) <- release$Key
 719 |   
 720 |     # get os name
 721 |     os <- tolower(sysinfo[["sysname"]])
 722 |   
 723 |     # read id
 724 |     id <- "unknown"
 725 |     for (field in c("ID", "ID_LIKE")) {
 726 |       if (field %in% names(vars) && nzchar(vars[[field]])) {
 727 |         id <- vars[[field]]
 728 |         break
 729 |       }
 730 |     }
 731 |   
 732 |     # read version
 733 |     version <- "unknown"
 734 |     for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) {
 735 |       if (field %in% names(vars) && nzchar(vars[[field]])) {
 736 |         version <- vars[[field]]
 737 |         break
 738 |       }
 739 |     }
 740 |   
 741 |     # join together
 742 |     paste(c(os, id, version), collapse = "-")
 743 |   
 744 |   }
 745 |   
 746 |   renv_bootstrap_platform_os_via_redhat_release <- function() {
 747 |   
 748 |     # read /etc/redhat-release
 749 |     contents <- readLines("/etc/redhat-release", warn = FALSE)
 750 |   
 751 |     # infer id
 752 |     id <- if (grepl("centos", contents, ignore.case = TRUE))
 753 |       "centos"
 754 |     else if (grepl("redhat", contents, ignore.case = TRUE))
 755 |       "redhat"
 756 |     else
 757 |       "unknown"
 758 |   
 759 |     # try to find a version component (very hacky)
 760 |     version <- "unknown"
 761 |   
 762 |     parts <- strsplit(contents, "[[:space:]]")[[1L]]
 763 |     for (part in parts) {
 764 |   
 765 |       nv <- tryCatch(numeric_version(part), error = identity)
 766 |       if (inherits(nv, "error"))
 767 |         next
 768 |   
 769 |       version <- nv[1, 1]
 770 |       break
 771 |   
 772 |     }
 773 |   
 774 |     paste(c("linux", id, version), collapse = "-")
 775 |   
 776 |   }
 777 |   
 778 |   renv_bootstrap_library_root_name <- function(project) {
 779 |   
 780 |     # use project name as-is if requested
 781 |     asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE")
 782 |     if (asis)
 783 |       return(basename(project))
 784 |   
 785 |     # otherwise, disambiguate based on project's path
 786 |     id <- substring(renv_bootstrap_hash_text(project), 1L, 8L)
 787 |     paste(basename(project), id, sep = "-")
 788 |   
 789 |   }
 790 |   
 791 |   renv_bootstrap_library_root <- function(project) {
 792 |   
 793 |     prefix <- renv_bootstrap_profile_prefix()
 794 |   
 795 |     path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA)
 796 |     if (!is.na(path))
 797 |       return(paste(c(path, prefix), collapse = "/"))
 798 |   
 799 |     path <- renv_bootstrap_library_root_impl(project)
 800 |     if (!is.null(path)) {
 801 |       name <- renv_bootstrap_library_root_name(project)
 802 |       return(paste(c(path, prefix, name), collapse = "/"))
 803 |     }
 804 |   
 805 |     renv_bootstrap_paths_renv("library", project = project)
 806 |   
 807 |   }
 808 |   
 809 |   renv_bootstrap_library_root_impl <- function(project) {
 810 |   
 811 |     root <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA)
 812 |     if (!is.na(root))
 813 |       return(root)
 814 |   
 815 |     type <- renv_bootstrap_project_type(project)
 816 |     if (identical(type, "package")) {
 817 |       userdir <- renv_bootstrap_user_dir()
 818 |       return(file.path(userdir, "library"))
 819 |     }
 820 |   
 821 |   }
 822 |   
 823 |   renv_bootstrap_validate_version <- function(version, description = NULL) {
 824 |   
 825 |     # resolve description file
 826 |     #
 827 |     # avoid passing lib.loc to `packageDescription()` below, since R will
 828 |     # use the loaded version of the package by default anyhow. note that
 829 |     # this function should only be called after 'renv' is loaded
 830 |     # https://github.com/rstudio/renv/issues/1625
 831 |     description <- description %||% packageDescription("renv")
 832 |   
 833 |     # check whether requested version 'version' matches loaded version of renv
 834 |     sha <- attr(version, "sha", exact = TRUE)
 835 |     valid <- if (!is.null(sha))
 836 |       renv_bootstrap_validate_version_dev(sha, description)
 837 |     else
 838 |       renv_bootstrap_validate_version_release(version, description)
 839 |   
 840 |     if (valid)
 841 |       return(TRUE)
 842 |   
 843 |     # the loaded version of renv doesn't match the requested version;
 844 |     # give the user instructions on how to proceed
 845 |     dev <- identical(description[["RemoteType"]], "github")
 846 |     remote <- if (dev)
 847 |       paste("rstudio/renv", description[["RemoteSha"]], sep = "@")
 848 |     else
 849 |       paste("renv", description[["Version"]], sep = "@")
 850 |   
 851 |     # display both loaded version + sha if available
 852 |     friendly <- renv_bootstrap_version_friendly(
 853 |       version = description[["Version"]],
 854 |       sha     = if (dev) description[["RemoteSha"]]
 855 |     )
 856 |   
 857 |     fmt <- heredoc("
 858 |       renv %1$s was loaded from project library, but this project is configured to use renv %2$s.
 859 |       - Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.
 860 |       - Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.
 861 |     ")
 862 |     catf(fmt, friendly, renv_bootstrap_version_friendly(version), remote)
 863 |   
 864 |     FALSE
 865 |   
 866 |   }
 867 |   
 868 |   renv_bootstrap_validate_version_dev <- function(version, description) {
 869 |     expected <- description[["RemoteSha"]]
 870 |     is.character(expected) && startswith(expected, version)
 871 |   }
 872 |   
 873 |   renv_bootstrap_validate_version_release <- function(version, description) {
 874 |     expected <- description[["Version"]]
 875 |     is.character(expected) && identical(expected, version)
 876 |   }
 877 |   
 878 |   renv_bootstrap_hash_text <- function(text) {
 879 |   
 880 |     hashfile <- tempfile("renv-hash-")
 881 |     on.exit(unlink(hashfile), add = TRUE)
 882 |   
 883 |     writeLines(text, con = hashfile)
 884 |     tools::md5sum(hashfile)
 885 |   
 886 |   }
 887 |   
 888 |   renv_bootstrap_load <- function(project, libpath, version) {
 889 |   
 890 |     # try to load renv from the project library
 891 |     if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE))
 892 |       return(FALSE)
 893 |   
 894 |     # warn if the version of renv loaded does not match
 895 |     renv_bootstrap_validate_version(version)
 896 |   
 897 |     # execute renv load hooks, if any
 898 |     hooks <- getHook("renv::autoload")
 899 |     for (hook in hooks)
 900 |       if (is.function(hook))
 901 |         tryCatch(hook(), error = warnify)
 902 |   
 903 |     # load the project
 904 |     renv::load(project)
 905 |   
 906 |     TRUE
 907 |   
 908 |   }
 909 |   
 910 |   renv_bootstrap_profile_load <- function(project) {
 911 |   
 912 |     # if RENV_PROFILE is already set, just use that
 913 |     profile <- Sys.getenv("RENV_PROFILE", unset = NA)
 914 |     if (!is.na(profile) && nzchar(profile))
 915 |       return(profile)
 916 |   
 917 |     # check for a profile file (nothing to do if it doesn't exist)
 918 |     path <- renv_bootstrap_paths_renv("profile", profile = FALSE, project = project)
 919 |     if (!file.exists(path))
 920 |       return(NULL)
 921 |   
 922 |     # read the profile, and set it if it exists
 923 |     contents <- readLines(path, warn = FALSE)
 924 |     if (length(contents) == 0L)
 925 |       return(NULL)
 926 |   
 927 |     # set RENV_PROFILE
 928 |     profile <- contents[[1L]]
 929 |     if (!profile %in% c("", "default"))
 930 |       Sys.setenv(RENV_PROFILE = profile)
 931 |   
 932 |     profile
 933 |   
 934 |   }
 935 |   
 936 |   renv_bootstrap_profile_prefix <- function() {
 937 |     profile <- renv_bootstrap_profile_get()
 938 |     if (!is.null(profile))
 939 |       return(file.path("profiles", profile, "renv"))
 940 |   }
 941 |   
 942 |   renv_bootstrap_profile_get <- function() {
 943 |     profile <- Sys.getenv("RENV_PROFILE", unset = "")
 944 |     renv_bootstrap_profile_normalize(profile)
 945 |   }
 946 |   
 947 |   renv_bootstrap_profile_set <- function(profile) {
 948 |     profile <- renv_bootstrap_profile_normalize(profile)
 949 |     if (is.null(profile))
 950 |       Sys.unsetenv("RENV_PROFILE")
 951 |     else
 952 |       Sys.setenv(RENV_PROFILE = profile)
 953 |   }
 954 |   
 955 |   renv_bootstrap_profile_normalize <- function(profile) {
 956 |   
 957 |     if (is.null(profile) || profile %in% c("", "default"))
 958 |       return(NULL)
 959 |   
 960 |     profile
 961 |   
 962 |   }
 963 |   
 964 |   renv_bootstrap_path_absolute <- function(path) {
 965 |   
 966 |     substr(path, 1L, 1L) %in% c("~", "/", "\\") || (
 967 |       substr(path, 1L, 1L) %in% c(letters, LETTERS) &&
 968 |       substr(path, 2L, 3L) %in% c(":/", ":\\")
 969 |     )
 970 |   
 971 |   }
 972 |   
 973 |   renv_bootstrap_paths_renv <- function(..., profile = TRUE, project = NULL) {
 974 |     renv <- Sys.getenv("RENV_PATHS_RENV", unset = "renv")
 975 |     root <- if (renv_bootstrap_path_absolute(renv)) NULL else project
 976 |     prefix <- if (profile) renv_bootstrap_profile_prefix()
 977 |     components <- c(root, renv, prefix, ...)
 978 |     paste(components, collapse = "/")
 979 |   }
 980 |   
 981 |   renv_bootstrap_project_type <- function(path) {
 982 |   
 983 |     descpath <- file.path(path, "DESCRIPTION")
 984 |     if (!file.exists(descpath))
 985 |       return("unknown")
 986 |   
 987 |     desc <- tryCatch(
 988 |       read.dcf(descpath, all = TRUE),
 989 |       error = identity
 990 |     )
 991 |   
 992 |     if (inherits(desc, "error"))
 993 |       return("unknown")
 994 |   
 995 |     type <- desc$Type
 996 |     if (!is.null(type))
 997 |       return(tolower(type))
 998 |   
 999 |     package <- desc$Package
1000 |     if (!is.null(package))
1001 |       return("package")
1002 |   
1003 |     "unknown"
1004 |   
1005 |   }
1006 |   
1007 |   renv_bootstrap_user_dir <- function() {
1008 |     dir <- renv_bootstrap_user_dir_impl()
1009 |     path.expand(chartr("\\", "/", dir))
1010 |   }
1011 |   
1012 |   renv_bootstrap_user_dir_impl <- function() {
1013 |   
1014 |     # use local override if set
1015 |     override <- getOption("renv.userdir.override")
1016 |     if (!is.null(override))
1017 |       return(override)
1018 |   
1019 |     # use R_user_dir if available
1020 |     tools <- asNamespace("tools")
1021 |     if (is.function(tools$R_user_dir))
1022 |       return(tools$R_user_dir("renv", "cache"))
1023 |   
1024 |     # try using our own backfill for older versions of R
1025 |     envvars <- c("R_USER_CACHE_DIR", "XDG_CACHE_HOME")
1026 |     for (envvar in envvars) {
1027 |       root <- Sys.getenv(envvar, unset = NA)
1028 |       if (!is.na(root))
1029 |         return(file.path(root, "R/renv"))
1030 |     }
1031 |   
1032 |     # use platform-specific default fallbacks
1033 |     if (Sys.info()[["sysname"]] == "Windows")
1034 |       file.path(Sys.getenv("LOCALAPPDATA"), "R/cache/R/renv")
1035 |     else if (Sys.info()[["sysname"]] == "Darwin")
1036 |       "~/Library/Caches/org.R-project.R/R/renv"
1037 |     else
1038 |       "~/.cache/R/renv"
1039 |   
1040 |   }
1041 |   
1042 |   renv_bootstrap_version_friendly <- function(version, shafmt = NULL, sha = NULL) {
1043 |     sha <- sha %||% attr(version, "sha", exact = TRUE)
1044 |     parts <- c(version, sprintf(shafmt %||% " [sha: %s]", substring(sha, 1L, 7L)))
1045 |     paste(parts, collapse = "")
1046 |   }
1047 |   
1048 |   renv_bootstrap_exec <- function(project, libpath, version) {
1049 |     if (!renv_bootstrap_load(project, libpath, version))
1050 |       renv_bootstrap_run(version, libpath)
1051 |   }
1052 |   
1053 |   renv_bootstrap_run <- function(version, libpath) {
1054 |   
1055 |     # perform bootstrap
1056 |     bootstrap(version, libpath)
1057 |   
1058 |     # exit early if we're just testing bootstrap
1059 |     if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA)))
1060 |       return(TRUE)
1061 |   
1062 |     # try again to load
1063 |     if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) {
1064 |       return(renv::load(project = getwd()))
1065 |     }
1066 |   
1067 |     # failed to download or load renv; warn the user
1068 |     msg <- c(
1069 |       "Failed to find an renv installation: the project will not be loaded.",
1070 |       "Use `renv::activate()` to re-initialize the project."
1071 |     )
1072 |   
1073 |     warning(paste(msg, collapse = "\n"), call. = FALSE)
1074 |   
1075 |   }
1076 |   
1077 |   renv_json_read <- function(file = NULL, text = NULL) {
1078 |   
1079 |     jlerr <- NULL
1080 |   
1081 |     # if jsonlite is loaded, use that instead
1082 |     if ("jsonlite" %in% loadedNamespaces()) {
1083 |   
1084 |       json <- tryCatch(renv_json_read_jsonlite(file, text), error = identity)
1085 |       if (!inherits(json, "error"))
1086 |         return(json)
1087 |   
1088 |       jlerr <- json
1089 |   
1090 |     }
1091 |   
1092 |     # otherwise, fall back to the default JSON reader
1093 |     json <- tryCatch(renv_json_read_default(file, text), error = identity)
1094 |     if (!inherits(json, "error"))
1095 |       return(json)
1096 |   
1097 |     # report an error
1098 |     if (!is.null(jlerr))
1099 |       stop(jlerr)
1100 |     else
1101 |       stop(json)
1102 |   
1103 |   }
1104 |   
1105 |   renv_json_read_jsonlite <- function(file = NULL, text = NULL) {
1106 |     text <- paste(text %||% readLines(file, warn = FALSE), collapse = "\n")
1107 |     jsonlite::fromJSON(txt = text, simplifyVector = FALSE)
1108 |   }
1109 |   
1110 |   renv_json_read_default <- function(file = NULL, text = NULL) {
1111 |   
1112 |     # find strings in the JSON
1113 |     text <- paste(text %||% readLines(file, warn = FALSE), collapse = "\n")
1114 |     pattern <- '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]'
1115 |     locs <- gregexpr(pattern, text, perl = TRUE)[[1]]
1116 |   
1117 |     # if any are found, replace them with placeholders
1118 |     replaced <- text
1119 |     strings <- character()
1120 |     replacements <- character()
1121 |   
1122 |     if (!identical(c(locs), -1L)) {
1123 |   
1124 |       # get the string values
1125 |       starts <- locs
1126 |       ends <- locs + attr(locs, "match.length") - 1L
1127 |       strings <- substring(text, starts, ends)
1128 |   
1129 |       # only keep those requiring escaping
1130 |       strings <- grep("[[\\]{}:]", strings, perl = TRUE, value = TRUE)
1131 |   
1132 |       # compute replacements
1133 |       replacements <- sprintf('"\032%i\032"', seq_along(strings))
1134 |   
1135 |       # replace the strings
1136 |       mapply(function(string, replacement) {
1137 |         replaced <<- sub(string, replacement, replaced, fixed = TRUE)
1138 |       }, strings, replacements)
1139 |   
1140 |     }
1141 |   
1142 |     # transform the JSON into something the R parser understands
1143 |     transformed <- replaced
1144 |     transformed <- gsub("{}", "`names<-`(list(), character())", transformed, fixed = TRUE)
1145 |     transformed <- gsub("[[{]", "list(", transformed, perl = TRUE)
1146 |     transformed <- gsub("[]}]", ")", transformed, perl = TRUE)
1147 |     transformed <- gsub(":", "=", transformed, fixed = TRUE)
1148 |     text <- paste(transformed, collapse = "\n")
1149 |   
1150 |     # parse it
1151 |     json <- parse(text = text, keep.source = FALSE, srcfile = NULL)[[1L]]
1152 |   
1153 |     # construct map between source strings, replaced strings
1154 |     map <- as.character(parse(text = strings))
1155 |     names(map) <- as.character(parse(text = replacements))
1156 |   
1157 |     # convert to list
1158 |     map <- as.list(map)
1159 |   
1160 |     # remap strings in object
1161 |     remapped <- renv_json_read_remap(json, map)
1162 |   
1163 |     # evaluate
1164 |     eval(remapped, envir = baseenv())
1165 |   
1166 |   }
1167 |   
1168 |   renv_json_read_remap <- function(json, map) {
1169 |   
1170 |     # fix names
1171 |     if (!is.null(names(json))) {
1172 |       lhs <- match(names(json), names(map), nomatch = 0L)
1173 |       rhs <- match(names(map), names(json), nomatch = 0L)
1174 |       names(json)[rhs] <- map[lhs]
1175 |     }
1176 |   
1177 |     # fix values
1178 |     if (is.character(json))
1179 |       return(map[[json]] %||% json)
1180 |   
1181 |     # handle true, false, null
1182 |     if (is.name(json)) {
1183 |       text <- as.character(json)
1184 |       if (text == "true")
1185 |         return(TRUE)
1186 |       else if (text == "false")
1187 |         return(FALSE)
1188 |       else if (text == "null")
1189 |         return(NULL)
1190 |     }
1191 |   
1192 |     # recurse
1193 |     if (is.recursive(json)) {
1194 |       for (i in seq_along(json)) {
1195 |         json[i] <- list(renv_json_read_remap(json[[i]], map))
1196 |       }
1197 |     }
1198 |   
1199 |     json
1200 |   
1201 |   }
1202 | 
1203 |   # load the renv profile, if any
1204 |   renv_bootstrap_profile_load(project)
1205 | 
1206 |   # construct path to library root
1207 |   root <- renv_bootstrap_library_root(project)
1208 | 
1209 |   # construct library prefix for platform
1210 |   prefix <- renv_bootstrap_platform_prefix()
1211 | 
1212 |   # construct full libpath
1213 |   libpath <- file.path(root, prefix)
1214 | 
1215 |   # run bootstrap code
1216 |   renv_bootstrap_exec(project, libpath, version)
1217 | 
1218 |   invisible()
1219 | 
1220 | })
1221 | 


--------------------------------------------------------------------------------
/renv/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "bioconductor.version": null,
 3 |   "external.libraries": [],
 4 |   "ignored.packages": [],
 5 |   "package.dependency.fields": [
 6 |     "Imports",
 7 |     "Depends",
 8 |     "LinkingTo"
 9 |   ],
10 |   "ppm.enabled": null,
11 |   "ppm.ignored.urls": [],
12 |   "r.version": null,
13 |   "snapshot.type": "implicit",
14 |   "use.cache": true,
15 |   "vcs.ignore.cellar": true,
16 |   "vcs.ignore.library": true,
17 |   "vcs.ignore.local": true,
18 |   "vcs.manage.ignores": true
19 | }
20 | 


--------------------------------------------------------------------------------
/styles.css:
--------------------------------------------------------------------------------
1 | /* css styles */
2 | 
3 | .nav-page .nav-page-text {
4 |   font-size: 15pt;
5 | }
6 | 


--------------------------------------------------------------------------------
/tutorial_pages/basic-principles.qmd:
--------------------------------------------------------------------------------
 1 | # Basic principles
 2 | 
 3 | Basically, a simulation consists of:  
 4 | **1) Generating `n` random numbers from a known distribution.**    
 5 | **2) Repeating this `nrep` times.**  
 6 | 
 7 | Once you know how to do this, the questions we will explore are:   
 8 | **1) What sample size `n` should we use within a simulation?**  
 9 | **2) How many simulation repetitions `nrep` should we run?**  
10 | 
11 |  ***
12 | 
13 | 


--------------------------------------------------------------------------------
/tutorial_pages/check-alpha.qmd:
--------------------------------------------------------------------------------
 1 | # Using simulations to check alpha
 2 | 
 3 | In most quantitative sciences, we accept a type I error rate of 0.05, which is often called the `alpha` or significance level. This value tells us the probability of rejecting the null hypothesis (i.e. of finding an effect) given that the null hypothesis is true. 
 4 | 
 5 | In other words, if there is no true effect (e.g. no difference between two groups), we would expect our null hypothesis of no effect to be rejected (incorrectly) (`alpha` * 100)% of the time.
 6 | 
 7 | If you draw from the same normal distribution twice, will the mean of the two samples differ significantly in 5% of the cases? 
 8 | 
 9 | 
10 | ***
11 | 
12 | **YOUR TURN:**  
13 | 1. Figure out how to do a *t*-test in R.  
14 | 2. Generate two vectors of 10 values drawn from N(0,1) and compare them with a *t*-test.  
15 | 3. Figure out how to extract the *p*-value from that object (explore your R object with the functions `str()` or `names()`).    
16 | 4. Write a function `simT()` that generates two vectors of `n` values drawn from a standard normal distribution (N(0,1)), compares them with a *t*-test, and returns the *p*-value.  
17 | 5. Test your function by calling it for `n = 50`.  
18 | 6. For a sample size of `n = 10`, generate `nrep = 20` repetitions and draw a histogram.  
19 | 7. Repeat the previous task with `nrep = 100` repetitions.
20 | 
21 | ***
22 | 
23 | ***p*-values of *t*-tests comparing means from 20 or 100 repetitions simulating N(0,1) with n = 10:**   
24 | <br/>
25 | <img src="../assets/ttest-changing-nrep.png" width="500">  
26 | <br/>
27 | 
28 | 
29 | In the first case, where `nrep = 20`, we expect 1 out of the 20 tests to be significant (5%). In my case, I did! How many did you get?    
30 | In the second case, where `nrep = 100`, we expect 5 out of the 100 tests to be significant. In my case, I got 6. How many did you get?      
31 | Are those deviations meaningful? Are they significant? 
32 | 
33 | ***
34 | 
35 | **YOUR TURN:**  
36 | 1. Plot a histogram of `nrep = 1000` repetitions of the function `simT` with `n = 10`.  
37 | 2. Plot a histogram of `nrep = 1000` repetitions of the function `simT` with `n = 100`.
38 | 
39 | ***
40 | 
41 | ***p*-values of *t*-tests comparing means from 1000 repetitions simulating N(0,1) with n=10 or n=100:**   
42 | <br/>
43 | <img src="../assets/ttest-changing-n.png" width="500">  
44 | <br/>
45 | 
46 | In both cases, we expect 50 out of the 1000 tests to be significant by chance (i.e. with a *p*-value under 0.05). In my simulation repetitions, I get 40 and 45 false positive results for `n = 10` and `n = 100`, respectively. How many did you get?  
47 | 
48 | These proportions are not significantly different from 5%. 
49 | 
50 | ```r
51 | prop.test(45, 1000, p = 0.05, alternative = "two.sided", correct = TRUE)
52 | ```
53 | 
54 | > 1-sample proportions test with continuity correction  
55 | > data:  45 out of 1000, null probability 0.05  
56 | > X-squared = 0.42632, df = 1, p-value = 0.5138  
57 | 
58 | It is important to note that, although `alpha = 0.05` is commonly used, this is an arbitrary choice and you should consider what is an appropriate type 1 error rate for your particular investigation.  
59 | 
60 | Although it isn't necessary to check that a statistical analysis as simple as a *t*-test does not yield more than 5% false-positive results, in situations where the structure of the data is complex and analysed with more advanced models (e.g. when explanatory variables are mathematically linked to each other or are combined in a mixed-effect model), this may allow to compare different modelling approaches and select one that does not produce more than 5% false-positive results.  
61 | 
62 | Such complex example, where simulation is the only viable approach to construct a statistical model that does not lead to spurious effects, can be found in this paper:
63 | 
64 | * Ihle, M., Pick, J. L., Winney, I. S., Nakagawa, S., & Burke, T. (2019). Measuring Up to Reality: Null Models and Analysis Simulations to Study Parental Coordination Over Provisioning Offspring. *Frontiers in Ecology and Evolution*, *7*, 142. <a href="https://doi.org/10.3389/fevo.2019.00142" target="_blank">https://doi.org/10.3389/fevo.2019.00142</a>
65 | 
66 | ***
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/tutorial_pages/check-power.qmd:
--------------------------------------------------------------------------------
 1 | # Checking power through simulations
 2 | 
 3 | The power of a statistical test tells us the probability that the test correctly rejects the null hypothesis. In other words, if we only examine true effects, the power is the proportion of tests that will (correctly) reject the null hypothesis. Often, the power is set to 80%, though, as with `alpha = 0.05`, this is an arbitrary choice. 
 4 | 
 5 | Generally, we want to do power analysis before collecting data, to work out the sample size we need to detect some effect. If we are calculating a required sample size, the power analysis can also be called a sample size calculation. 
 6 | 
 7 | Taking the example of a *t*-test, we need to understand a few parameters:
 8 | 
 9 | * `n`, the sample size.
10 | * `delta`, the difference in means that you want to be able to detect. Deciding what this value should be is tricky. You might rely on estimates from the literature (though bear in mind they are likely to be inflated), or you can use a minimally important difference, which is the threshold below which you do not consider a difference interesting enough to be worth detecting. In a clinical trial, for example, this might be the smallest difference that a patient would care about. 
11 | * `sd`, the standard deviation. Usually, this needs to be estimated from the literature or pilot studies. 
12 | * `sig.level`, the alpha, as discussed previously. 
13 | * `power`, the power as defined above.
14 | 
15 | You can calculate any one of these parameters, given all of the others. We usually want to specify, `delta`, `sd`, `sig.level` and `power` and calculate the required sample size.
16 | 
17 | We can calculate the required sample size for a *t*-test using:
18 | 
19 | ```r
20 | power.t.test(n = NULL, delta = 0.5, sd = 1, sig.level = 0.05, power = 0.8)
21 | ```
22 | 
23 | Notice that `n = NULL`, so this parameter is calculated. 
24 | 
25 | The sample size `n` we need, given this set of parameters, is 64 per group.
26 | 
27 | 
28 | Just as we can check the alpha of our test by sampling from the same distribution (i.e. simulating data without an effect), we can check the power by sampling from different distributions (i.e. simulating data with an effect).
29 | 
30 | If we sample values from two normal distributions with different means (e.g. N(0,1) and N(0.5,1)), what is the minimum sample size we need to detect a significant difference in means with a *t*-test 80% of the time?
31 | 
32 | ***
33 | 
34 | **YOUR TURN:**
35 | 
36 | 1. Use your simulation skills to work out the power through simulation. Write a function that does the following: 
37 | 
38 |     i. Draws `n` values from a random normal distribution with `mean1` and another `n` values from a normal distribution with `mean2`.
39 |     ii. Compares the means of these two samples with a *t*-test and extracts the *p*-value.
40 | 
41 | 2. Repeat the function 1000 times using the parameters used in the power calculation above (that used the `power.t.test()` function).
42 | 3. Calculate the proportion of *p*-values that are smaller than 0.05.
43 | 
44 | ***
45 | 
46 | ***p*-values of *t*-tests comparing means from 1000 repetitions simulating N(0,1) and N(0.5,1) with n = 64:**  
47 | 
48 | <br/>
49 | <img src="../assets/hist-power.png" width="500">  
50 | <br/>
51 | 
52 | The proportion of correctly rejected null hypotheses in the simulation is close to 0.8, which is what we would expect. 
53 | 
54 | Using simulations for power analysis is not really necessary for simple examples like a *t*-test, though it is useful to check your understanding. 
55 | 
56 | When analyses become complex and it is hard or impossible to determine a sample size analytically (i.e. you can't calculate it, or there's no suitable function to use), then simulations are an indispensable tool.
57 | 
58 | A simple example of a power analysis like the one you've just done can be found in the "Power analysis" section of this paper:
59 | 
60 | * Blanco, D., Schroter, S., Aldcroft, A., Moher, D., Boutron, I., Kirkham, J. J., & Cobo, E. (2020). Effect of an editorial intervention to improve the completeness of reporting of randomised trials: a randomised controlled trial. *BMJ Open*, *10*(5), e036799. <a href="https://doi.org/10.1136/bmjopen-2020-036799" target="_blank">https://doi.org/10.1136/bmjopen-2020-036799</a>
61 | 
62 | A complete self-paced tutorial to simulate data for power analysis of complex statistical designs can be found here:
63 | 
64 | * <a href="https://lmu-osc.github.io/Simulations-for-Advanced-Power-Analyses/" target="_blank">https://lmu-osc.github.io/Simulations-for-Advanced-Power-Analyses/</a> 
65 | 
66 | ***
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/tutorial_pages/definition.qmd:
--------------------------------------------------------------------------------
 1 | # Definition
 2 | 
 3 | **"A computer simulation (or 'sim') is an attempt to model a real-life or hypothetical situation on a computer so that it can be studied to see how the system works. By changing variables in the simulation, predictions may be made about the behavior of the system. It is a tool to virtually investigate the behavior of the system under study."**  
 4 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*Wikipedia*
 5 |     
 6 | 
 7 | **"A computer simulation is the generation of random data to build up an understanding of the real data and the statistical models we use to analyze them."**  
 8 | &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;*Malika*
 9 | 
10 | ***
11 | 
12 | 


--------------------------------------------------------------------------------
/tutorial_pages/download-repo.qmd:
--------------------------------------------------------------------------------
 1 | # Make this repository a local RStudio project
 2 | 
 3 | You have two options to fetch this material:
 4 | 
 5 | ## A. You know version control with Git and GitHub
 6 | Fork and clone [this repository](https://github.com/lmu-osc/Introduction-Simulations-in-R) ([here](https://lmu-osc.github.io/Collaborative-RStudio-GitHub/) is a reminder on how to do this).
 7 | 
 8 | ## B. You do not know version control, and/or simply want to download a copy of this material
 9 | 
10 | **1) Download the repository**  
11 | Please download the GitHub repository that we are using today: <a href="https://github.com/lmu-osc/Introduction-Simulations-in-R" target="_blank">https://github.com/lmu-osc/Introduction-Simulations-in-R</a>.
12 | 
13 | <br/>
14 | <img src="../assets/download.PNG" width="750">  
15 | <br/>
16 | 
17 | Once the .zip file downloaded, extract it and place the folder in the desired directory (e.g. Documents).  
18 | 
19 | **2) Turn it into an RStudio project**  
20 | 
21 | If you do not have R or RStudio installed, please follow [these instructions](https://lmu-osc.github.io/Introduction-RStudio-Git-GitHub/installing_software.html) first.  
22 | 
23 | Open RStudio, go to 'File', choose 'New Project...', and select 'Existing Directory'.  
24 | <br/>
25 | <img src="../assets/existing-directory.PNG" width="400">  
26 | <br/>
27 | 
28 | Select the downloaded (and extracted) folder by clicking on 'Browse', then select 'Create Project'.   
29 | <br/>
30 | <img src="../assets/find-directory.PNG" width="400">  
31 | <br/>
32 | 
33 | In the panel containing the 'Files' tab, find the exercise sheet and open it by double clicking on it.  
34 | <br/>
35 | <img src="../assets/files-list.PNG" width="600">  
36 | <br/>
37 | 
38 |  ***
39 | 
40 | 


--------------------------------------------------------------------------------
/tutorial_pages/dry-rule.qmd:
--------------------------------------------------------------------------------
 1 | # **D**o not **R**epeat **Y**ourself – DRY rule
 2 | 
 3 | ## *vs.* **W**rite **E**verything **T**wice – WET rule
 4 | 
 5 | Following the WET rule:  
 6 | 
 7 | * Makes changes more difficult and/or time consuming.  
 8 | * Decreases code clarity.  
 9 | * Increases the likelihood of inconsistencies.  
10 | 
11 | To prevent duplication and follow the DRY rule, we can write custom functions.  
12 | 
13 | Functions are 'self-contained' sets of commands that accomplish a specific task.   
14 | Functions usually 'take in' data or parameter values (these inputs are called 'function arguments'), process them, and 'return' a result. 
15 | You've already used several functions in this tutorial; for example `rnorm(n, mean, sd)`, where `n`, `mean`, and `sd` are inputs and the result is a random sample from the normal distribution. 
16 | The only difference here is that you are writing the function yourself. 
17 | Once a function is written, it can be used over and over again by calling its name, just like other functions such as `rnorm()`.  
18 | To write your own function, use the function `function`:
19 | 
20 | ```r
21 | AwesomeFunctionName <- function(argument1, argument2,…) {  
22 |   do stuff here  
23 | }
24 | ```
25 | 
26 | To build up a function, start by writing the "`stuff`" outside the function to test that it works.
27 | 
28 | ***
29 | 
30 | **YOUR TURN:**  
31 | 1. Create a function that repeats the calculation of the mean of 100 values drawn from a standard normal distribution (use `mean(rnorm(n = 100))` for this calculation) `nrep` times and returns a histogram of the `nrep` means.
32 | 2. Modify your function such that, in addition to the number of repetitions `nrep`, the number of drawn values `n` (i.e. argument `n` of the `rnorm()` function) can also be varied when calling your function. 
33 | 
34 | 
35 | ***
36 | 
37 | Note that it is useful to define the number of repetitions `nrep` outside of the function, so users of your script can more easily change that value, e.g. from a low number (to verify the script runs without error) to a large number (to obtain reliable results).  
38 | 
39 | 


--------------------------------------------------------------------------------
/tutorial_pages/general-structure.qmd:
--------------------------------------------------------------------------------
 1 | # General structure of a simulation
 2 | 
 3 | 1. **Define** what type of data and variables need to be simulated, i.e. their **distribution**, their class (e.g. factor vs. numerical values), **sample sizes** (within a dataset and number of repetitions), what will need to vary (e.g. the strength of relationship), etc.  
 4 | 
 5 | 2. **Generate data**, random data or data including an effect (e.g. an imposed correlation between two variables).  
 6 | 
 7 | 3. **Run the statistical test** you think is appropriate and record the relevant statistic (e.g. *p*-value). 
 8 | 
 9 | 4. **Repeat** step 2 and 3 to get the distribution of the statistic of interest.  
10 | 
11 | 5. Try out different parameter sets (**explore the parameter space** for which results are similar).  
12 | 
13 | 6. **Analyse and interpret the combined results of many simulation repetitions** within each set of parameters. For instance, check that you only get a significant result in 5% of the repetitions (if `alpha = 0.05`) when you simulated no effect and that you get a significant result in 80% of the repetitions (if you targeted a power of 80%) when you simulated an effect.  
14 | 
15 | ***
16 | 
17 | 


--------------------------------------------------------------------------------
/tutorial_pages/limitations.qmd:
--------------------------------------------------------------------------------
 1 | # Limitations to simulations
 2 | 
 3 | 1. **Assumptions** made regarding variables might not be true, e.g. the distribution of supposedly normally distributed data may not be quite normal. (Have a back-up analysis plan!)  
 4 | 
 5 | 2. **Parameter space may be unknown**. (Explore it a bit and use previous observations to be at least in a relevant range.)  
 6 | 
 7 | 3. **Computational resources and time**. (Use research software engineer staff on campus to optimize code, use parallel core processing, use server services on campus, etc.)  
 8 | 
 9 | 4. **Simulations might be redundant with mathematical demonstrations**. (I don't mind, still useful for me!)  
10 | 
11 | ***
12 | 
13 | 


--------------------------------------------------------------------------------
/tutorial_pages/number-of-simulations-nrep.qmd:
--------------------------------------------------------------------------------
 1 | # Number of repetitions `nrep`
 2 | 
 3 | Sampling theory applies to the number of repetitions `nrep` (also referred to as the number of *replications*) just as much as it does to the sample size `n` within a simulation.
 4 | 
 5 | **Means and SDs from 24 repetitions simulating N(0,1) with n = 10:**  
 6 | <br/>
 7 | <img src="../assets/musd-24-10-N01.png" width="500">  
 8 | <br/>
 9 | 
10 | Now, let's do the same with a number of repetitions `nrep` of 1000.  
11 | 
12 | **Means and SDs from 1000 repetitions simulating N(0,1) with n = 10:**   
13 | <br/>
14 | <img src="../assets/1000hist10N01.png" width="500">  
15 | <br/>
16 | 
17 | 
18 | ### Conclusion  
19 | 
20 | The number of repetitions needs to be a large enough number to obtain a good representation of the distribution of the simulation results, e.g. 1000. 
21 | 
22 | ***
23 | 
24 | **YOUR TURN:**  
25 | The code generating the data and plots presented above are included in your exercise script. Feel free to modify the parameters of the functions that simulate data and plot the results to better understand the principles presented in these two pages, but, at this stage, there is no need to fully understand the code that generates the plots.
26 | 
27 | ***
28 | 
29 | 


--------------------------------------------------------------------------------
/tutorial_pages/purpose.qmd:
--------------------------------------------------------------------------------
 1 | # Purpose
 2 | 
 3 | You can use computer simulations to: 
 4 | 
 5 | * **Test your statistical intuition or demonstrate mathematical properties you cannot easily anticipate.**  
 6 |   * *Example: Check whether there are more than 5% significant effects for a variable in a model when supposedly random data are generated.*  
 7 | <br/>
 8 | 
 9 | * **Understand sampling theory and probability distributions or test whether you understand the underlying processes of your system.**  
10 |   * *Example: See whether simulated data drawn from specific distributions is comparable to real data.*  
11 | <br/>
12 | 
13 | * **Perform power analyses.**
14 |   * *Example: Assess whether the sample size (within a simulation repetition) is high enough to detect a simulated effect in more than 80% of the cases.*  
15 | <br/>
16 | 
17 | * **Perform bootstrapping to get a confidence interval around a parameter estimate.** 
18 |   * *Bootstrapping means to sample with replacement (i.e. all the original options to draw from are available at each draw) in an observed dataset. Doing this generates new 'simulated' datasets. With each of them, one can run the statistical analysis made on the observed dataset, saving each time the parameter estimate of interest. After doing this multiple times, one can obtain a confidence interval for the parameter of interest.*   
19 | <br/>
20 | 
21 | * **Prepare a pre-analysis plan.** 
22 |   * *To be confident about the (confirmatory) statistical analyses you may wish to commit to before data collection (e.g. through a preregistration or registered report), practising the analyses on a simulated dataset is very helpful! If you are still unsure about the most appropriate statistical test to apply to your data, providing a simulated dataset to a statistician or mentor will allow them to provide concrete suggestions! The code containing the analyses of simulated data can be submitted along with your preregistration or registered report for reviewers to exactly understand what analyses you intend to perform. Once you get your real data, you may simply plug them into this code and get the results of your confirmatory analyses immediately!*   
23 | 
24 |  
25 |  ***
26 | 
27 | 


--------------------------------------------------------------------------------
/tutorial_pages/random-numbers-generators.qmd:
--------------------------------------------------------------------------------
 1 | # Random number generators
 2 | 
 3 | R contains several functions to generate random numbers.  
 4 | Type `?`*`function`* in your console to get information on the function's arguments (i.e. the values that must be provided to obtain the function's result).  
 5 | 
 6 | The function `sample(x, n, replace = FALSE)` draws `n` values from a given vector `x` without replacement (by default). 
 7 | 
 8 | Sampling without replacement means that when you repeatedly draw e.g. one item at a time from a pool of items, any item selected during the first draw is not available for selection during the second draw, and the first and second selected items are not in the pool to select from during the third draw, etc. Sampling with replacement means that all the original options are available at each draw.  
 9 | 
10 | ***
11 | **YOUR TURN:**  
12 | Sample 100 values between 3 and 103 with replacement. For this, open the R script(s) with the exercises (`./exercise_script_with_solutions.R` and/or `./exercise_script_without_solutions.R`) from the root of your local repository, review the examples if needed, complete the exercise, and check out the proposed answer.
13 | 
14 | ***
15 | 
16 | The following functions draw `n` values from distributions with the specified parameters:  
17 | 
18 | * `runif(n, min, max)` draws `n` values from a *uniform* distribution with the specified `min` and `max`.  
19 | * `rpois(n, lambda)` draws `n` values from a *Poisson* distribution with the specified `lambda`.  
20 | * `rnorm(n, mean, sd)` draws `n` values from a *normal* distribution with the specified `mean` and standard deviation `sd`.  
21 | * `rbinom(n, prob)`	draws `n` values from a	*binomial* distribution with the specified probability `prob`.  
22 | 
23 | ***
24 | **YOUR TURN:**    
25 | 1. Draw 100 values from a normal distribution with a mean of 0 and a standard deviation of 1.  
26 | 2. Draw 50 values from a normal distribution with a mean of 10 and a standard deviation of 5.  
27 | 3. Draw 1000 values from a Poisson distribution with a lambda of 50.  
28 | 4. Draw 30 values from a uniform distribution between 0 and 10.  
29 | 
30 | Try it out in your local exercise script.
31 | 
32 | ***
33 | 
34 | 
35 |  
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/tutorial_pages/real-life-example.qmd:
--------------------------------------------------------------------------------
 1 | # Real-life example
 2 | 
 3 | This is a walk through one relatively simple simulation written to check whether the following two models would provide the same results: 
 4 | 
 5 | * A generalized linear model based on a contingency table of counts (Poisson distribution).  
 6 | * A generalized linear model with one line per observation and the occurrence of the variable of interest coded as 'Yes' or 'No' (binomial distribution).  
 7 | 
 8 | I created this code while preparing my preregistration for a simple behavioural ecology experiment about methods for independently manipulating palatability and colour in small insect prey ([article](https://doi.org/10.1371/journal.pone.0231205), [OSF preregistration](https://osf.io/f8uk9?view_only=3943e7bb9c5f4effbf119ca5b062fe80)).  
 9 | 
10 | The R script screenshot below, `glm_Freq_vs_YN.R`, can be found in the folder [Ihle2020](https://github.com/lmu-osc/Introduction-Simulations-in-R/tree/main/Ihle2020).  
11 | 
12 | This walkthrough will use the steps as defined on the page '[General structure](./general-structure.qmd)'.
13 | 
14 | 
15 | 1. **Define sample sizes** (within a dataset and number of repetitions), **experimental design** (fixed dataset structure, e.g. treatment groups, factors), and **parameters** that will need to vary (here, the strength of the effect).  
16 | 
17 |     <img src="../assets/define.png" width="1000">  
18 |     <br/>
19 | 
20 | 2. **Generate data** (here, using `sample()` and the probabilities defined in step 1) and format it in two different ways to accommodate the two statistical tests to be compared. 
21 | 
22 |     <img src="../assets/generate.png" width="1000">  
23 |     <br/>
24 | 
25 | 3. **Run the statistical test and save the parameter estimate of interest for that iteration**. Here, this is done for both statistical tests to be compared.    
26 | 
27 |     <img src="../assets/test.png" width="1000">  
28 |     <br/>
29 | 
30 | 
31 | 4. **Repeat** steps 2 (data simulation) and 3 (data analyses) to get the distribution of the parameter estimates by wrapping these steps in a function.  
32 | 
33 |     Definition of the function at the beginning: 
34 |     <br/>
35 |     <img src="../assets/replicate1.png" width="800">    
36 |     <br/>
37 |     Output returned from the function at the end: 
38 |     <br/>
39 |     <img src="../assets/replicate2.png" width="1000">  
40 |     <br/>
41 |     Repeat the function `nrep` times. Here, `pbreplicate()` is used to provide a bar of progress for R to run this command. 
42 |     <br/>
43 |     <img src="../assets/replicate3.png" width="1000">  
44 |     <br/>
45 | 
46 | 5. **Explore the parameter space**. Here, vary the probabilities of sampling between 0 and 1 depending on the treatment group category.
47 | 
48 |     <img src="../assets/explore.png" width="1000">  
49 |     <br/>
50 | 
51 | 6. **Analyse and interpret the combined results of many simulation repetitions**. In this case, the results of the two models were qualitatively the same (comparison of results for a few different parameter values), and both models gave the same expected 5% false positive results when no effect was simulated. Varying the effect (the probability of sampling 0 or 1 depending on the experimental treatment) allowed us to find the minimum effect size for which the number of positive results of the tests is over 80%. 
52 | 
53 |     <img src="../assets/conclude.png" width="1000">  
54 |     <br/>
55 | 
56 | 
57 | 
58 | ***
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/tutorial_pages/repeat.qmd:
--------------------------------------------------------------------------------
 1 | # Repetition
 2 | 
 3 | The function `replicate(nrep, expression)` repeats the `expression` provided `nrep` times.
 4 | 
 5 | For example, `replicate(10, mean(rnorm(100)))` reads: 'Draw 100 values from a normal distribution with a mean of 0 and a standard deviation of 1 (the default values of `rnorm(n, mean, sd)`), calculate the mean of these 100 values, and do all that 10 times.'
 6 |   
 7 | ***
 8 | 
 9 | **YOUR TURN:**  
10 | In your local exercise script:  
11 | 1. Repeat 1000 times the calculation of the mean of 10 values drawn from a uniform distribution between 0 and 10.  
12 | 2. Repeat 100 times the calculation of the mean of 50 values drawn from a normal distribution with a mean of 10 and a standard deviation of 5.  
13 | 3. Make a histogram of your results for each task. Are the distributions looking as expected?  
14 | 
15 | ***
16 | 
17 | 


--------------------------------------------------------------------------------
/tutorial_pages/resources.qmd:
--------------------------------------------------------------------------------
 1 | # Resources
 2 | ## Follow-up self-paced tutorial on simulation of data analyses for advanced power analyses
 3 | * <a href="https://lmu-osc.github.io/Simulations-for-Advanced-Power-Analyses/" target="_blank">https://lmu-osc.github.io/Simulations-for-Advanced-Power-Analyses/</a>
 4 | 
 5 | ## Hallgren 2013
 6 | 
 7 | The article suggested for getting familiarised with the topic prior to the session, i.e. [Hallgren, A. K. (2013). Conducting simulation studies in the R programming environment. *Tutorials in Quantitative Methods for Psychology*, *9*(2), 43–60](https://doi.org/10.20982/tqmp.09.2.p043), contains accompanying R scripts and CSV data files which you can peruse in the [Hallgren2013](https://github.com/lmu-osc/Introduction-Simulations-in-R/tree/main/Hallgren2013) folder of this repository. It contains:  
 8 | 
 9 | * Annotated R syntax file for Example 1: `novel question.R`.  
10 | * Annotated R syntax file for Example 2: `power analysis.R`.  
11 | * Annotated R syntax file for Example 3: `bootstrapping.R`.  
12 | * CSV dataset generated in Example 1, which is also used later in Example 2: `novel_question_output.csv`.  
13 | * CSV dataset used in Example 3: `mediation_raw_data.csv`.
14 | 
15 | ## Other articles
16 | 
17 | Depending on the type of simulation that would be useful for you, these articles may be of interest:  
18 | 
19 | * Johnson, P. C. D., Barry, S. J. E., Ferguson, H. M., & Müller, P. (2015). Power analysis for **generalized linear mixed models** in ecology and evolution. *Methods in Ecology and Evolution*, *6*(2), 133–142. <a href="https://doi.org/10.1111/2041-210X.12306" target="_blank">https://doi.org/10.1111/2041-210X.12306</a>
20 | 
21 | * Blanco, D., Schroter, S., Aldcroft, A., Moher, D., Boutron, I., Kirkham, J. J., & Cobo, E. (2020). Effect of an editorial intervention to improve the completeness of reporting of randomised trials: a randomised controlled trial. *BMJ Open*, *10*(5), e036799. <a href="https://doi.org/10.1136/bmjopen-2020-036799" target="_blank">https://doi.org/10.1136/bmjopen-2020-036799</a>
22 |   * In the "Power analysis" section, there is a **simple example of a power simulation**. R code is provided in the supplementary material. 
23 | 
24 | * Muldoon, A. (2018). Getting started simulating data in R: some helpful functions and how to use them. <a href="https://aosmith.rbind.io/2018/08/29/getting-started-simulating-data/" target="_blank">https://aosmith.rbind.io/2018/08/29/getting-started-simulating-data/</a> 
25 |   * This blog gives a great overview of how to start **simulating more complex datasets**, including step-by-step explanations of relevant R functions. 
26 | 
27 | * Privé, F., Aschard, H., Ziyatdinov, A., & Blum, M. G. B. (2018). Efficient analysis of **large-scale genome-wide data** with two R packages: bigstatsr and bigsnpr. *Bioinformatics*, *34*(16), 2781–2787. <a href="https://doi.org/10.1093/bioinformatics/bty185" target="_blank">https://doi.org/10.1093/bioinformatics/bty185</a>  
28 | 
29 | * Rönnegård, L., McFarlane, S. E., Husby, A., Kawakami, T., Ellegren, H., & Qvarnström, A. (2016). Increasing the **power of genome wide association** studies in natural populations using repeated measures – evaluation and implementation. *Methods in Ecology and Evolution*, *7*(7), 792–799. <a href="https://doi.org/10.1111/2041-210X.12535" target="_blank">https://doi.org/10.1111/2041-210X.12535</a>
30 | 
31 | * Dalpiaz, D. (2020). *Applied Statistics with R*, section "Simulating SLR" in the chapter "**Simple Linear Regression**". <a href="https://daviddalpiaz.github.io/appliedstats/simple-linear-regression.html#simulating-slr" target="_blank">https://daviddalpiaz.github.io/appliedstats/simple-linear-regression.html#simulating-slr</a>
32 | 
33 | 
34 | ## Use of R packages to run simulations  
35 | 
36 | * `lme4`: Bolker, B. Simulation-based power analysis for mixed models in `lme4`. <a href="https://rpubs.com/bbolker/simpower" target="_blank">https://rpubs.com/bbolker/simpower</a>
37 | 
38 | * `simstudy`: Goldfeld, K., & Wujciak-Jens, J. Simulating Study Data. <a href="https://cran.r-project.org/web/packages/simstudy/vignettes/simstudy.html" target="_blank">https://cran.r-project.org/web/packages/simstudy/vignettes/simstudy.html</a>
39 | 
40 | * `faux`: DeBruine, L. (2023). faux: Simulation for Factorial Designs. <a href="https://debruine.github.io/faux/" target="_blank">https://debruine.github.io/faux/</a>  
41 | 
42 | * `simsem` (SIMulated Structural Equation Modeling): Pornprasertmanit, S., Miller, P., Schoemann, A., & Jorgensen, T. Vignette. <a href="https://github.com/simsem/simsem/wiki/Vignette" target="_blank">https://github.com/simsem/simsem/wiki/Vignette</a> 
43 | 
44 | * `simglm`: LeBeau, B. Tidy Simulation with `simglm`. <a href="https://cran.r-project.org/web/packages/simglm/vignettes/tidy_simulation.html" target="_blank">https://cran.r-project.org/web/packages/simglm/vignettes/tidy_simulation.html</a>   
45 | 
46 | * `powerlmm`: Magnusson, K. (2018). New paper: The consequences of ignoring therapist effects in longitudinal data analysis. <a href="https://rpsychologist.com/therapists-effects-longitudinal" target="_blank">https://rpsychologist.com/therapists-effects-longitudinal</a>  
47 | 
48 | 
49 | ***
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/tutorial_pages/sample-size-n.qmd:
--------------------------------------------------------------------------------
 1 | # Sample size `n`
 2 | 
 3 | How many values should you generate within a simulation? Let's explore.  
 4 | 
 5 | If I draw 10 data points from a normal distribution with a mean of 0 and a standard deviation of 1 (i.e N(0,1)), after setting the seed to 10 (for no specific reason), here is the distribution of the values I get:
 6 | 
 7 | **1 repetition simulating N(0,1) with n = 10:**  
 8 | <img src="../assets/hist10N01.png" width="300">  
 9 | <br/>
10 | 
11 | If I repeat this simulation 24 times, here are the distributions of the 10 values pseudo-randomly sampled from N(0,1):  
12 | 
13 | **24 repetitions simulating N(0,1) with n = 10:**  
14 | <br/>
15 | <img src="../assets/24hist10N01.png" width="600">  
16 | <br/>
17 | 
18 | 
19 | Note that because we are drawing from N(0,1), we expect the mean of the values drawn (mean(x), blue lines) to be very close to 0, i.e. the mean of the normal distribution we sample from (red dashed lines).  
20 | <br/>
21 | 
22 | How are the means and standard deviations of the 24 repetitions distributed?  
23 | 
24 | **Distributions of the means and SDs from 24 repetitions simulating N(0,1) with n = 10:**  
25 | <br/>
26 | <img src="../assets/musd-24-10-N01.png" width="500">  
27 | <br/>
28 | 
29 | Now, let's do the same with a sample size `n` of 1000.  
30 | 
31 | **24 repetitions simulating the same distribution, i.e. N(0,1), with n = 1000:**  
32 | <br/>
33 | <img src="../assets/24hist1000N01.png" width="600">  
34 | <br/>
35 | 
36 | **Distributions of the means and SDs from 24 repetitions simulating N(0,1) with n = 1000:**  
37 | <br/>
38 | <img src="../assets/musd-24-1000-N01.png" width="500">  
39 | <br/>
40 | 
41 | 
42 | ### Conclusion  
43 | The sample size within a simulation affects the **precision** with which the parameters of that distribution can be estimated.  
44 | 
45 | What should determine the sample size within your simulation?  
46 | 
47 | Choose a sample size that is relevant to the context of the simulation, e.g. the sample size you will be able to reach in your study or the minimum sample size that would allow you to detect the smallest effect of interest (as determined by a power analysis, which we will cover in a moment).
48 | 
49 | ***
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/tutorial_pages/seed.qmd:
--------------------------------------------------------------------------------
 1 | # Setting the seed
 2 | 
 3 | * `set.seed()`
 4 | 
 5 | Computers in general, and R specifically, can, in fact, only provide *pseudo*random number generators.  
 6 | A pseudorandom number generator's number sequence is completely determined by its seed, i.e. a number used to initialize that sequence.
 7 | 
 8 | Thus, if a pseudorandom number generator is reinitialized with the same seed, it will produce the same sequence of numbers.
 9 | You can set the seed (with any arbitrary number) at the beginning of a script, and, if commands drawing random numbers are run in the exact same order, they will provide the same output in subsequent runs.
10 | 
11 | This is useful for sharing code and reproduce simulations, as well as for debugging code.
12 | 
13 | ***
14 | 
15 | **YOUR TURN:**  
16 | In your local exercise script, play around with the function `set.seed()` by running and modifying the examples provided (e.g. compare outputs with and without seeds, change the seed number).
17 |  
18 | ***
19 | 
20 | 


--------------------------------------------------------------------------------
/tutorial_pages/simulate-for-preregistration.qmd:
--------------------------------------------------------------------------------
 1 | # Simulating data to check or preregister code
 2 | 
 3 | 
 4 | One of the simplest uses of simulations is to make a dataset on which you can run and therefore pre-specify your analytic code. 
 5 | 
 6 | For example, let's say you plan to collect an observational dataset and want to look at the effect of smoking on lung cancer. 
 7 | A typical protocol might state that the data will be analysed by logistic regression, adjusting for confounders. 
 8 | But which confounders exactly? How will each variable be coded? What type of logistic regression will you use? 
 9 | 
10 | Making a dataset that has the variables you expect your real dataset will have allows you to exactly state (in **code** rather than potentially ambiguous words) what you will do. 
11 | 
12 | This is really useful if you want input, e.g. from a statistician -- they can look at your code and more clearly see what you are trying to do. 
13 | It has the added benefit of forcing you to really think about what your dataset will look like! I've found this to be very useful in the past.
14 | 
15 | It's very easy to make a dataset. Let's take the simple example about smoking and lung cancer. We are interested in only 4 variables:
16 | 
17 | * `smoking_status`, a binary variable indicating whether an individual smokes (Yes/No). 
18 | * `lung_cancer`, a binary variable indicating whether an individual has lung cancer (Yes/No).
19 | * `sex`, a binary variable indicating an individual's sex (M/F).
20 | * `age`, a numeric variable containing an individual's age (in years).
21 | 
22 | Here is the top of the simulated dataset. 
23 | 
24 | <br/>
25 | <img src="../assets/simulated-data.png" width="750">  
26 | <br/>
27 | 
28 | ***
29 | 
30 | **YOUR TURN:**
31 | 
32 | 1. Can you recreate it? Try it yourself! 
33 | 
34 |     HINT: use the `data.frame()`, `sample()`, and `rnorm()` functions.
35 | 
36 | 2. Now that you've made a dataset, try to run a logistic regression on your data with `lung_cancer` as the outcome.
37 | 3. Do you face any error message? Does the data need to be in a specific format for statistical models to run?
38 | 
39 | ***
40 | 
41 | 
42 | Once we have a working model, we can look at the summary and make sure it behaves as we expected, and we can share the code with collaborators or reviewers.
43 | 
44 | You could combine this approach with the sampling approaches you learned on the previous two pages to look at power for more complex analyses like this.
45 | 
46 | <br/>
47 | <img src="../assets/logreg-results.png" width="750">  
48 | <br/>
49 | 
50 | 
51 | And that's it! We have written down our code in a way that is totally unambiguous. This practice is useful for any project: you can include the exact code that you plan to use in a preregistration, and reviewers will be able to verify that you did what you planned.
52 | 
53 | A real example of a simple simulation like this, used in the submission of a registered report, can be found in the R Markdown file [here](https://osf.io/5jf9p/).
54 | 
55 | *****
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------