├── .gitignore
├── LICENSE
├── README.md
├── phackR
    ├── .Rbuildignore
    ├── .gitignore
    ├── DESCRIPTION
    ├── LICENSE
    ├── NAMESPACE
    ├── R
    │   ├── combinedStrategies_regression.R
    │   ├── combinedStrategies_ttest.R
    │   ├── compositeScores.R
    │   ├── exploitCovariates.R
    │   ├── exploitCutoffs.R
    │   ├── favorableImputation.R
    │   ├── helpers.R
    │   ├── incorrectRounding.R
    │   ├── optionalStopping.R
    │   ├── outlierExclusion.R
    │   ├── plotsShiny.R
    │   ├── runShinyPHack.R
    │   ├── selectiveReportingDV.R
    │   ├── selectiveReportingIV.R
    │   ├── statAnalysis.R
    │   ├── subgroupAnalysis.R
    │   └── variableTransformation.R
    ├── doc
    │   ├── phackR_vignette.R
    │   ├── phackR_vignette.Rmd
    │   └── phackR_vignette.html
    ├── inst
    │   ├── shiny-phack
    │   │   └── ShinyPHack
    │   │   │   ├── data
    │   │   │       └── startplots.rds
    │   │   │   ├── mddoc
    │   │   │       ├── 01_CompScores.md
    │   │   │       ├── 02_ExploitCovariates.md
    │   │   │       ├── 03_ExploitCutoffs.md
    │   │   │       ├── 04_FavorableImputation.md
    │   │   │       ├── 05_IncorrectRounding.md
    │   │   │       ├── 06_OptionalStopping.md
    │   │   │       ├── 07_OutlierExclusion.md
    │   │   │       ├── 09_SelectiveReportingDV.md
    │   │   │       ├── 10_SelectiveReportingIV.md
    │   │   │       ├── 11_StatAnalysis.md
    │   │   │       ├── 12_SubgroupAnalysis.md
    │   │   │       ├── 13_VariableTransformation.md
    │   │   │       └── landingPage.md
    │   │   │   ├── server.R
    │   │   │   └── ui.R
    │   └── sim_startplots_Shiny.R
    ├── man
    │   ├── dot-compCohensD.Rd
    │   ├── dot-compR2t.Rd
    │   ├── dot-compscoreHack.Rd
    │   ├── dot-covhack.Rd
    │   ├── dot-cutoffHack.Rd
    │   ├── dot-easyimpute.Rd
    │   ├── dot-estimate_mode.Rd
    │   ├── dot-extractoutlier.Rd
    │   ├── dot-impHack.Rd
    │   ├── dot-multDVhack.Rd
    │   ├── dot-multIVhack_reg.Rd
    │   ├── dot-multIVhack_ttest.Rd
    │   ├── dot-optstop.Rd
    │   ├── dot-out.boxplot.Rd
    │   ├── dot-out.cook.Rd
    │   ├── dot-out.covratio.Rd
    │   ├── dot-out.dfbeta.Rd
    │   ├── dot-out.dffits.Rd
    │   ├── dot-out.leverage.Rd
    │   ├── dot-out.mahalanobis.Rd
    │   ├── dot-out.percentrule.Rd
    │   ├── dot-out.residual.Rd
    │   ├── dot-out.sdrule.Rd
    │   ├── dot-out.stemleaf.Rd
    │   ├── dot-outHack.Rd
    │   ├── dot-roundhack.Rd
    │   ├── dot-selectpvalue.Rd
    │   ├── dot-sim.compscore.Rd
    │   ├── dot-sim.covariates.Rd
    │   ├── dot-sim.data.Rd
    │   ├── dot-sim.multDV.Rd
    │   ├── dot-sim.multIV.Rd
    │   ├── dot-sim.multcor.Rd
    │   ├── dot-sim.subgroup.Rd
    │   ├── dot-statAnalysisHack.Rd
    │   ├── dot-subgroupHack.Rd
    │   ├── dot-varTransHack.Rd
    │   ├── esplots.Rd
    │   ├── pplots.Rd
    │   ├── runShinyPHack.Rd
    │   ├── sim.compscoreHack.Rd
    │   ├── sim.covhack.Rd
    │   ├── sim.cutoffHack.Rd
    │   ├── sim.impHack.Rd
    │   ├── sim.multDVhack.Rd
    │   ├── sim.multIVhack.Rd
    │   ├── sim.optstop.Rd
    │   ├── sim.outHack.Rd
    │   ├── sim.roundhack.Rd
    │   ├── sim.statAnalysisHack.Rd
    │   ├── sim.subgroupHack.Rd
    │   └── sim.varTransHack.Rd
    ├── phackR.Rproj
    ├── tests
    │   ├── testthat.R
    │   └── testthat
    │   │   └── test-simfunctions.R
    └── vignettes
    │   ├── .gitignore
    │   └── phackR_vignette.Rmd
├── phacking_compendium.Rproj
└── simulations
    ├── 00_simulation_helpers.R
    ├── combinedStrategies_simulation.R
    ├── compscoreHack_simulation.R
    ├── covhack_simulation.R
    ├── cutoffHack_simulation.R
    ├── exploreNormality.R
    ├── impHack_simulation.R
    ├── multDVhack_simulation.R
    ├── multIVHack_simulation.R
    ├── optstop_simulation.R
    ├── outHack_simulation.R
    ├── plot_BFdist.R
    ├── plot_ESDist.R
    ├── plot_FPcombined.R
    ├── plot_pCurve.R
    ├── plot_redefineSig.R
    ├── plots_FPrate.R
    ├── roundHack_simulation.R
    ├── statAnalysisHack_simulation.R
    ├── subgroupHack_simulation.R
    └── varTransHack_simulation.R


/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | *.RData
 7 | 
 8 | # Example code in package build process
 9 | *-Ex.R
10 | 
11 | # Output files from R CMD build
12 | /*.tar.gz
13 | 
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 | 
17 | # RStudio files
18 | .Rproj.user/
19 | 
20 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
21 | .httr-oauth
22 | 
23 | # knitr and R markdown default cache directories
24 | /*_cache/
25 | /cache/
26 | 
27 | # Temporary files created by R markdown
28 | *.utf8.md
29 | *.knit.md
30 | 
31 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
32 | rsconnect/
33 | .Rproj.user
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Angelika Stefan
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # The p-Hacking Compendium: Simulating Different p-Hacking Strategies
 2 | 
 3 | ## Project Description
 4 | This project contains an R-package with code to simulate and investigate the effects of different p-hacking strategies. It has the following components:
 5 | * Functions to simulate 12 different p-hacking strategies
 6 | * A Shiny app to investigate the effects of p-hacking on the distribution of p-values, the rate of false positive results, and the distribution of effect sizes
 7 | * Code to reproduce simulation results conducted in our upcoming preprint, as well as plots
 8 | 
 9 | ## Installation
10 | The phackR package is not on CRAN, but you can install it from GitHub:
11 | 
12 | ```
13 | library(devtools)
14 | install_github("astefan1/phacking_compendium/phackR", build_vignettes = TRUE)
15 | ```
16 | 
17 | ## Package Description
18 | To get an overview of the structure of the code and the simulation functions in the package, read the package vignette:
19 | 
20 | ```
21 | library(phackR)
22 | utils::vignette("phackR_vignette", "phackR")
23 | ```
24 | 
25 | ## Shiny App
26 | You can start the Shiny app directly from the package by using the following code:
27 | 
28 | ```
29 | phackR::runShinyPHack()
30 | ```
31 | 
32 | Alternatively, you can directly access the Shiny app online via [https://shiny.psy.lmu.de/felix/ShinyPHack/](https://shiny.psy.lmu.de/felix/ShinyPHack/)
33 | 
34 | ## Simulation Results
35 | All simulation results can be reproduced using the code in the [_/simulations_ folder of this Github project](https://github.com/astefan1/phacking_compendium/tree/master/simulations). First, follow the steps above to install the phackR package. Then, run the script "00_simulation_helpers.R", followed by all R scripts with the "\_simulation.R" suffix. Results can be visualized using the scripts with the prefix "plot\_". 
36 | 
37 | 
38 | ```
39 | > sessionInfo()
40 | R version 4.2.1 (2022-06-23)
41 | Platform: x86_64-apple-darwin17.0 (64-bit)
42 | Running under: macOS Big Sur 11.6.8
43 | 
44 | Matrix products: default
45 | LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
46 | 
47 | locale:
48 | [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
49 | 
50 | attached base packages:
51 | [1] stats     graphics  grDevices utils     datasets  methods   base     
52 | 
53 | other attached packages:
54 |  [1] BayesFactor_0.9.12-4.4 Matrix_1.5-1           coda_0.19-4           
55 |  [4] phackR_0.0.0.9000      dplyr_1.0.10           ggforce_0.4.1         
56 |  [7] R.devices_2.17.1       wesanderson_0.3.6      ggplot2_3.3.6         
57 | [10] testthat_3.1.5        
58 | 
59 | loaded via a namespace (and not attached):
60 |  [1] fs_1.5.2             usethis_2.1.6        devtools_2.4.5       insight_0.18.6      
61 |  [5] rprojroot_2.0.3      tools_4.2.1          profvis_0.3.7        backports_1.4.1     
62 |  [9] utf8_1.2.2           R6_2.5.1             colorspace_2.0-3     urlchecker_1.0.1    
63 | [13] withr_2.5.0          tidyselect_1.1.2     prettyunits_1.1.1    processx_3.7.0      
64 | [17] compiler_4.2.1       sgeostat_1.0-27      performance_0.10.0   cli_3.4.1           
65 | [21] mice_3.14.0          desc_1.4.2           labeling_0.4.2       scales_1.2.1        
66 | [25] DEoptimR_1.0-11      mvtnorm_1.1-3        robustbase_0.95-0    mc2d_0.1-21         
67 | [29] callr_3.7.2          pbapply_1.5-0        stringr_1.4.1        digest_0.6.29       
68 | [33] rmarkdown_2.17       R.utils_2.12.0       base64enc_0.1-3      WRS2_1.1-4          
69 | [37] pkgconfig_2.0.3      htmltools_0.5.3      sessioninfo_1.2.2    fastmap_1.1.0       
70 | [41] htmlwidgets_1.5.4    rlang_1.0.6          rstudioapi_0.14      shiny_1.7.2         
71 | [45] generics_0.1.3       farver_2.1.1         car_3.1-1            R.oo_1.25.0         
72 | [49] magrittr_2.0.3       Rcpp_1.0.9           munsell_0.5.0        fansi_1.0.3         
73 | [53] abind_1.4-5          lifecycle_1.0.3      R.methodsS3_1.8.2    yaml_2.3.5          
74 | [57] stringi_1.7.8        carData_3.0-5        MASS_7.3-57          brio_1.1.3          
75 | [61] pkgbuild_1.3.1       plyr_1.8.7           grid_4.2.1           parallel_4.2.1      
76 | [65] promises_1.2.0.1     shinydashboard_0.7.2 forcats_0.5.2        crayon_1.5.2        
77 | [69] miniUI_0.1.1.1       lattice_0.20-45      knitr_1.40           aplpack_1.3.5       
78 | [73] ps_1.7.1             pillar_1.8.1         tcltk_4.2.1          pkgload_1.3.1       
79 | [77] glue_1.6.2           evaluate_0.17        remotes_2.4.2        vctrs_0.4.2         
80 | [81] tweenr_2.0.2         httpuv_1.6.6         MatrixModels_0.5-1   gtable_0.3.1        
81 | [85] purrr_0.3.5          polyclip_1.10-4      tidyr_1.2.1          reshape_0.8.9       
82 | [89] cachem_1.0.6         xfun_0.33            mime_0.12            xtable_1.8-4        
83 | [93] broom_1.0.1          later_1.3.0          tibble_3.1.8         memoise_2.0.1       
84 | [97] mvoutlier_2.1.1      ellipsis_0.3.2 
85 | ```
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/phackR/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^doc$
4 | ^Meta$
5 | 


--------------------------------------------------------------------------------
/phackR/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | Meta
5 | /doc/
6 | /Meta/
7 | 


--------------------------------------------------------------------------------
/phackR/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: phackR
 2 | Title: Simulate p-Hacking
 3 | Version: 0.0.0.9000
 4 | Authors@R: person("Angelika M.", "Stefan", email = "a.m.stefan@uva.nl", role = c("aut", "cre"))
 5 | Description: Many different questionable research practices have been described in the literature. This package contains functions to simulate different sorts p-hacking and allows to analyze their impact on the distributions of p-values. 
 6 | Depends: R (>= 3.6.0)
 7 | Imports: aplpack, R.devices, car, mvoutlier, dplyr, performance, WRS2, mice, rlang, shiny, pbapply, tidyr, ggplot2, shinydashboard, magrittr, grid, Matrix
 8 | License: MIT + file LICENSE
 9 | Encoding: UTF-8
10 | LazyData: true
11 | Suggests: 
12 |     testthat,
13 |     knitr,
14 |     rmarkdown
15 | RoxygenNote: 7.2.1
16 | VignetteBuilder: knitr
17 | 


--------------------------------------------------------------------------------
/phackR/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2019
2 | COPYRIGHT HOLDER: Angelika Stefan, Felix Schönbrodt


--------------------------------------------------------------------------------
/phackR/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(runShinyPHack)
 4 | export(sim.compscoreHack)
 5 | export(sim.covhack)
 6 | export(sim.cutoffHack)
 7 | export(sim.impHack)
 8 | export(sim.multDVhack)
 9 | export(sim.multIVhack)
10 | export(sim.optstop)
11 | export(sim.outHack)
12 | export(sim.roundhack)
13 | export(sim.statAnalysisHack)
14 | export(sim.subgroupHack)
15 | export(sim.varTransHack)
16 | importFrom(R.devices,suppressGraphics)
17 | importFrom(WRS2,yuen)
18 | importFrom(aplpack,stem.leaf)
19 | importFrom(car,Anova)
20 | importFrom(dplyr,"%>%")
21 | importFrom(dplyr,all_of)
22 | importFrom(dplyr,do)
23 | importFrom(dplyr,group_by_at)
24 | importFrom(dplyr,mutate)
25 | importFrom(ggplot2,aes)
26 | importFrom(ggplot2,annotation_custom)
27 | importFrom(ggplot2,coord_cartesian)
28 | importFrom(ggplot2,element_text)
29 | importFrom(ggplot2,geom_col)
30 | importFrom(ggplot2,geom_histogram)
31 | importFrom(ggplot2,geom_segment)
32 | importFrom(ggplot2,geom_vline)
33 | importFrom(ggplot2,ggplot)
34 | importFrom(ggplot2,ggtitle)
35 | importFrom(ggplot2,layer_scales)
36 | importFrom(ggplot2,scale_fill_manual)
37 | importFrom(ggplot2,scale_x_continuous)
38 | importFrom(ggplot2,scale_y_continuous)
39 | importFrom(ggplot2,theme)
40 | importFrom(ggplot2,theme_light)
41 | importFrom(ggplot2,waiver)
42 | importFrom(ggplot2,xlab)
43 | importFrom(ggplot2,ylab)
44 | importFrom(graphics,boxplot)
45 | importFrom(grid,gpar)
46 | importFrom(grid,grobTree)
47 | importFrom(grid,textGrob)
48 | importFrom(magrittr,"%$%")
49 | importFrom(mice,complete)
50 | importFrom(mvoutlier,uni.plot)
51 | importFrom(pbapply,pblapply)
52 | importFrom(performance,item_reliability)
53 | importFrom(rlang,.data)
54 | importFrom(shiny,incProgress)
55 | importFrom(shiny,withProgress)
56 | importFrom(stats,aov)
57 | importFrom(stats,as.formula)
58 | importFrom(stats,cooks.distance)
59 | importFrom(stats,cor)
60 | importFrom(stats,density)
61 | importFrom(stats,dfbeta)
62 | importFrom(stats,dffits)
63 | importFrom(stats,hatvalues)
64 | importFrom(stats,influence.measures)
65 | importFrom(stats,lm)
66 | importFrom(stats,median)
67 | importFrom(stats,qf)
68 | importFrom(stats,quantile)
69 | importFrom(stats,rnorm)
70 | importFrom(stats,rstandard)
71 | importFrom(stats,rstudent)
72 | importFrom(stats,sd)
73 | importFrom(stats,t.test)
74 | importFrom(stats,wilcox.test)
75 | importFrom(utils,capture.output)
76 | importFrom(utils,tail)
77 | 


--------------------------------------------------------------------------------
/phackR/R/combinedStrategies_regression.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Impact of combined p-hacking strategies: t-test Example 
  3 | # ==============================================================================
  4 | 
  5 | .sim.combined.reg <- function(nobs = 100, missing = 0.1, ncompv = 5, rcomp = 0.75){
  6 |   
  7 |   # Sample dependent variable, variables in the score, compute score
  8 |   DV        <- rnorm(nobs, 0, 1)
  9 |   
 10 |   SCOREVAR  <- .sim.multcor(nobs = nobs, 
 11 |                             nvar = ncompv,
 12 |                             r = rcomp)
 13 |   SCORE     <- rowMeans(SCOREVAR)
 14 |   
 15 |   # Introduce missing values in score
 16 |   missingSCORE <- sample(c(TRUE, FALSE), 
 17 |                          size = nobs, 
 18 |                          prob = c(missing, 1-missing),
 19 |                          replace = TRUE)
 20 |   SCORE[missingSCORE] <- NA
 21 |   
 22 |   # Introduce missing values in DV
 23 |   missingDV <- sample(missingSCORE, 
 24 |                       length(missingSCORE))
 25 |   DV[missingDV] <- NA
 26 |   
 27 |   # Create a missing value on a score variable if value in score is missing
 28 |   whichmissing            <- matrix(c(which(missingSCORE == TRUE),
 29 |                                       sample(1:ncompv, 
 30 |                                              size = sum(missingSCORE),
 31 |                                              replace = TRUE)),
 32 |                                     ncol=2,
 33 |                                     byrow=FALSE)
 34 |   SCOREVAR[whichmissing]  <- NA
 35 |   
 36 |   # Bind them all together
 37 |   DAT.FULL <- cbind(DV, SCOREVAR, SCORE)
 38 |   
 39 |   return(DAT.FULL)
 40 |   
 41 | }
 42 | 
 43 | .combined.reg.hack <- function(df, roundinglevel = 0.051, nImpMethods = 5, transvar = "xy", ndelete = 3, nOutMethods = 3, strategy = "firstsig", alpha = 0.05){
 44 |   
 45 |   ####################### (1) Original p-value ###################
 46 |   
 47 |   modres <- summary(lm(df$DV ~ df$SCORE))
 48 |   p.orig <- modres$coefficients[2, 4]
 49 |   r2.orig <- modres$r.squared
 50 |   
 51 |   # If original p-value is significant stop and return original p-value
 52 |   if(p.orig <= alpha) return(list(p.final = p.orig,
 53 |                                   p.orig = p.orig,
 54 |                                   r2.final = r2.orig,
 55 |                                   r2.orig = r2.orig,
 56 |                                   stage = 1))
 57 |   
 58 |   # If original p-value is smaller than rounding level stop and return alpha as p
 59 |   if(p.orig < roundinglevel) return(list(p.final = alpha,
 60 |                                           p.orig = p.orig,
 61 |                                           r2.final = r2.orig,
 62 |                                           r2.orig = r2.orig,
 63 |                                           stage = 1.5))
 64 |   
 65 |   ####################### (2) Favorable Imputation #####################
 66 |   
 67 |   # Apply imputation methods (random selection)
 68 |   impMethods  <- sample(c(1:10), nImpMethods)
 69 |   impres      <- .impHack(df, 
 70 |                          x = 7, 
 71 |                          y = 1, 
 72 |                          which = impMethods, 
 73 |                          strategy = strategy, 
 74 |                          alpha = roundinglevel)
 75 |   
 76 |   # If p-value is significant stop and return
 77 |   if(impres$p.final < roundinglevel) return(list(p.final = impres$p.final,
 78 |                                             p.orig = p.orig,
 79 |                                             r2.final = impres$r2.final,
 80 |                                             r2.orig = r2.orig,
 81 |                                             stage = 2))
 82 |   
 83 |   ###################### (3) Variable transformation #################
 84 |   
 85 |   # Apply variable transformation (omit NA)
 86 |   transres <- .varTransHack(df[-which(is.na(df$DV) | is.na(df$SCORE)), ], 
 87 |                             x = 7, 
 88 |                             y = 1, 
 89 |                             transvar = "xy", 
 90 |                             strategy = "firstsig", 
 91 |                             alpha = roundinglevel)
 92 |   
 93 |   # If p-value is significant, stop and return
 94 |   if(transres$p.final < roundinglevel) return(list(p.final = transres$p.final,
 95 |                                               p.orig = p.orig,
 96 |                                               r2.final = transres$r2.final,
 97 |                                               r2.orig = r2.orig,
 98 |                                               stage = 3))
 99 |   
100 |   ##################### (4) Scale redefinition ##########################
101 |   
102 |   # Scale redefinition
103 |   rescaleRes <- .compscoreHack(df[-which(is.na(df$DV) | is.na(df$SCORE)), ], 
104 |                                dv = 1,
105 |                                compv = c(2:6),
106 |                                ndelete = ndelete,
107 |                                strategy = strategy,
108 |                                alpha = roundinglevel)
109 |   
110 |   # If p-value is significant, stop and return
111 |   if(rescaleRes$p.final < roundinglevel) return(list(p.final = rescaleRes$p.final,
112 |                                                 p.orig = p.orig,
113 |                                                 r2.final = rescaleRes$r2.final,
114 |                                                 r2.orig = r2.orig,
115 |                                                 stage = 4))
116 |   
117 |   ##################### (5) Outlier exclusion #############################
118 |   
119 |   # Exclude outliers
120 |   outMethods  <- sample(c(1:12), nOutMethods)
121 |   outlierRes <- .outHack(df[-which(is.na(df$DV) | is.na(df$SCORE)), ], 
122 |                          x = 7, 
123 |                          y = 1, 
124 |                          which = outMethods, 
125 |                          strategy = strategy, 
126 |                          alpha = roundinglevel)
127 |   
128 |   # If p-value is significant, stop and return, else return original p-value
129 |   if(outlierRes$p.final < roundinglevel){
130 |     return(list(p.final = outlierRes$p.final,
131 |                 p.orig = p.orig,
132 |                 r2.final = outlierRes$r2.final,
133 |                 r2.orig = r2.orig,
134 |                 stage = 5)) 
135 |   } else {
136 |     return(list(p.final = p.orig,
137 |                 p.orig = p.orig,
138 |                 r2.final = r2.orig,
139 |                 r2.orig = r2.orig,
140 |                 stage = 6))
141 |   }
142 |                                           
143 | }
144 | 
145 | sim.combined.reg <- function(nobs = 100, missing = 0.1, ncompv = 5, rcomp = 0.75, roundinglevel = 0.051, nImpMethods = 5, transvar = "xy", ndelete = 3, nOutMethods = 3, strategy = "firstsig", alpha = 0.05, iter = 1000){
146 |   
147 |   # Simulate as many datasets as desired iterations
148 |   dat <- list()
149 |   for(i in 1:iter){
150 |     dat[[i]] <- .sim.combined.reg(nobs = nobs, 
151 |                                   missing = missing, 
152 |                                   ncompv = ncompv, 
153 |                                   rcomp = rcomp)
154 |   }
155 |   
156 |   # Apply p-hacking procedure to each dataset
157 |   .combined.reg.hack.list <- function(x){
158 |     .combined.reg.hack(df = x, 
159 |                        roundinglevel = roundinglevel, 
160 |                        nImpMethods = nImpMethods, 
161 |                        transvar = transvar, 
162 |                        ndelete = ndelete, 
163 |                        nOutMethods = nOutMethods, 
164 |                        strategy = strategy, 
165 |                        alpha = alpha)
166 |   }
167 |   
168 |   # Apply p-hacking procedure to each dataset
169 |   res <- pbapply::pblapply(dat, .combined.reg.hack.list)
170 |   
171 |   ps.hack <- NULL
172 |   ps.orig <- NULL
173 |   r2s.hack <- NULL
174 |   r2s.orig <- NULL
175 |   stage <- NULL
176 |   
177 |   for(i in 1:iter){
178 |     ps.hack[i] <- res[[i]][["p.final"]]
179 |     ps.orig[i] <- res[[i]][["p.orig"]]
180 |     r2s.hack[i] <- res[[i]][["r2.final"]]
181 |     r2s.orig[i] <- res[[i]][["r2.orig"]]
182 |     stage[i] <- res[[i]][["stage"]]
183 |   }
184 |   
185 |   res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, stage)
186 |   
187 |   return(res)
188 |   
189 | }


--------------------------------------------------------------------------------
/phackR/R/compositeScores.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Scale Redefinition / Composite Scores
  3 | # ==============================================================================
  4 | 
  5 | #' Simulate data: Correlated composite score raw variables and one non-correlated dependent variable
  6 | #' @param nobs Integer giving number of observations
  7 | #' @param ncompv Integer giving number of variables to build the composite score
  8 | #' @param rcomp Correlation between the composite score variables
  9 | 
 10 | .sim.compscore <- function(nobs, ncompv, rcomp){
 11 | 
 12 |   dv <- rnorm(nobs, 0, 1)
 13 | 
 14 |   iv <- .sim.multcor(nobs = nobs, nvar = ncompv, r = rcomp)
 15 | 
 16 |   res <- cbind(dv, iv)
 17 | 
 18 |   return(res)
 19 | 
 20 | }
 21 | 
 22 | #' P-Hacking function for scale redefinition / Composite Scores
 23 | #' @param df Data frame containing dependent variable and composite score items as columns
 24 | #' @param dv Location of dependent variable in the data frame
 25 | #' @param compv Location of composite score variables in the data frame
 26 | #' @param ndelete How many items should be deleted from the scale at maximum?
 27 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 28 | #' @param alpha Significance level of the t-test (default: 0.05)
 29 | #' @importFrom stats lm
 30 | #' @importFrom performance item_reliability
 31 | 
 32 | .compscoreHack <- function(df, dv, compv, ndelete, strategy = "firstsig", alpha = 0.05){
 33 | 
 34 |   stopifnot(length(compv)-ndelete >= 2)
 35 | 
 36 |   # Compute original p-value and R^2 with full scale
 37 |   modres <- summary(lm(df[, dv] ~ rowMeans(df[, compv])))
 38 |   p.orig <- modres$coefficients[2, 4]
 39 |   r2.orig <- modres$r.squared
 40 | 
 41 |   # Prepare and initialize variables for p-hacking
 42 |   ps <- list()
 43 |   r2s <- list()
 44 |   compscale <- df[, compv]
 45 |   changescale <- df[, compv]
 46 |   out <- NULL
 47 | 
 48 |   # Strategically delete items from the composite scale and re-calculate the p-value
 49 |   for(i in 1:ndelete){
 50 | 
 51 |     pval <- rep(NA, 2)
 52 |     r2val <- rep(NA, 2)
 53 | 
 54 |     # Define new item to delete from the scale
 55 |     out[i] <- which(colnames(compscale) %in% colnames(changescale)[which.max(performance::item_reliability(changescale)[,2])])
 56 | 
 57 |     # Compute p-value for the new composite score
 58 |     newscore <- rowMeans(compscale[, -out])
 59 |     newmodres <- summary(lm(df[, dv] ~ newscore))
 60 |     pval[1] <- newmodres$coefficients[2, 4]
 61 |     r2val[1] <- newmodres$r.squared
 62 | 
 63 |     # Compute p-value for the item deleted from the score
 64 |     itemscore <- compscale[, out[i]]
 65 |     newmodres2 <- summary(lm(df[, dv] ~ itemscore))
 66 |     pval[2] <- newmodres2$coefficients[2, 4]
 67 |     r2val[2] <- newmodres2$r.squared
 68 | 
 69 |     # Compute p-value for a scale of all items deleted so far
 70 |     #nonscore <- rowMeans(cbind(compscale[, out]))
 71 |     #newmodres3 <- summary(lm(df[, dv] ~ nonscore))
 72 |     #pval[3] <- newmodres3$coefficients[2, 4]
 73 |     #r2val[3] <- newmodres3$r.squared
 74 | 
 75 |     changescale <- compscale[, -out]
 76 |     ps[[i]] <- pval
 77 |     r2s[[i]] <- r2val
 78 |   }
 79 | 
 80 |   ps <- c(p.orig, unique(unlist(ps)))
 81 |   r2s <- c(r2.orig, unique(unlist(r2s)))
 82 | 
 83 |   # Select final p-hacked p-value based on strategy
 84 |   p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
 85 |   r2.final <- unique(r2s[ps == p.final])
 86 | 
 87 |   return(list(p.final = p.final,
 88 |               ps = ps,
 89 |               r2.final = r2.final,
 90 |               r2s = r2s))
 91 | 
 92 | }
 93 | 
 94 | #' Simulate p-hacking with composite scores
 95 | #' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
 96 | #' @param nobs Integer giving number of observations
 97 | #' @param ncompv Integer giving number of variables to build the composite score
 98 | #' @param rcomp Correlation between the composite score variables
 99 | #' @param ndelete How many items should be deleted from the scale at maximum?
100 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
101 | #' @param alpha Significance level of the t-test (default: 0.05)
102 | #' @param iter Number of simulation iterations
103 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
104 | #' @importFrom pbapply pblapply
105 | #' @importFrom shiny withProgress incProgress
106 | #' @export
107 | 
108 | sim.compscoreHack <- function(nobs, ncompv, rcomp, ndelete, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv=FALSE){
109 | 
110 |   # Simulate as many datasets as desired iterations
111 |   dat <- list()
112 |   for(i in 1:iter){
113 |     dat[[i]] <- .sim.compscore(nobs = nobs, ncompv = ncompv, rcomp = rcomp)
114 |   }
115 | 
116 |   # Apply p-hacking procedure to each dataset (with progress bar within or outside Shiny)
117 |   if(!shinyEnv){
118 |     .compscoreHackList <- function(x){
119 |       .compscoreHack(df = x, dv = 1, compv = c(2:(ncompv+1)), ndelete = ndelete,
120 |                      strategy = strategy, alpha = alpha)
121 |     }
122 | 
123 |     res <- pbapply::pblapply(dat, .compscoreHackList)
124 |   }
125 | 
126 |   if(shinyEnv){
127 |     percentage <- 0
128 |     withProgress(message = "Running simulation", value=0, {
129 |       res=lapply(dat, function(x){
130 |         percentage <<- percentage + 1/length(dat)*100
131 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
132 |         .compscoreHack(df = x, dv = 1, compv = c(2:(ncompv+1)), ndelete = ndelete,
133 |                        strategy = strategy, alpha = alpha)
134 |       })
135 |     })
136 |   }
137 | 
138 |   ps.hack <- NULL
139 |   ps.orig <- NULL
140 |   r2s.orig <- NULL
141 |   r2s.hack <- NULL
142 | 
143 |   for(i in 1:iter){
144 |     ps.hack[i] <- res[[i]][["p.final"]]
145 |     ps.orig[i] <- res[[i]][["ps"]][1]
146 |     r2s.hack[i] <- res[[i]][["r2.final"]]
147 |     r2s.orig[i] <- res[[i]][["r2s"]][1]
148 |   }
149 | 
150 |   res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
151 | 
152 |   return(res)
153 | 
154 | }
155 | 


--------------------------------------------------------------------------------
/phackR/R/exploitCovariates.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Exploiting Covariates
  3 | # ==============================================================================
  4 | 
  5 | #' Simulate data with (correlated) covariates
  6 | #' @description Simulates a dependent variable that correlates with multiple (correlated) covariates as well as an independent IV
  7 | #' @param nobs.group Vector with number of observations per group
  8 | #' @param ncov Number of continuous covariates in the simulated data frame
  9 | #' @param rcov Correlation between the covariates
 10 | #' @param rcovdv Correlation between covariates and dependent variable
 11 | #' @param mu Mean of the random data
 12 | #' @param sd Standard deviation of the random data
 13 | #' @param missing Proportion of missing values per variable (e.g., 0.2 = 20 percent)
 14 | #' @importFrom stats rnorm
 15 | 
 16 | .sim.covariates <- function(nobs.group, ncov, rcov, rcovdv, mu = 0, sd = 1, missing = 0){
 17 | 
 18 |   # Observations per group and total observations
 19 |   if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2)
 20 |   nobs <- sum(nobs.group)
 21 | 
 22 |   # Generate group vector
 23 |   group <- rep(1:length(nobs.group), nobs.group)
 24 | 
 25 |   # Set up correlation matrix
 26 |   nvar <- ncov + 1
 27 |   R <- matrix(rep(rcov, nvar**2), nrow = nvar)
 28 |   R[,1] <- rep(rcovdv, nvar)
 29 |   R[1,] <- R[,1]
 30 |   diag(R) <- rep(1, nvar)
 31 | 
 32 |   # transposed Cholesky decomposition of correlation matrix
 33 |   U <- t(chol(R))
 34 | 
 35 |   # create random noise matrix
 36 |   random.normal <- matrix(stats::rnorm(nvar*nobs, mu, sd), nrow=nvar, ncol=nobs)
 37 | 
 38 |   # create raw data from matrix multiplication of U and random noise
 39 |   X <- as.data.frame(t(U %*% random.normal))
 40 | 
 41 |   # create final simulated data matrix
 42 |   Xfull <- cbind(group, X)
 43 | 
 44 |   # add missing values
 45 |   if(missing > 0){
 46 |     navalues <- as.data.frame(replicate(nvar+1, sample(1:nobs, missing*nobs)))
 47 |     for(i in 1:nvar){
 48 |       X[unlist(navalues[,i]),i] <- NA
 49 |     }
 50 |   }
 51 | 
 52 |   return(Xfull)
 53 | 
 54 | }
 55 | 
 56 | #' P-Hacking function for multiple covariates
 57 | #' @description Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
 58 | #' @param df Data frame with one group variable, one dependent variable, and one or more covariates
 59 | #' @param dv Integer defining the location of the dependent variable column
 60 | #' @param group Integer defining the location of the group variable column
 61 | #' @param covs Numeric vector defining the location of the covariate(s).
 62 | #' @param interactions Should interaction terms be added to the ANCOVA models? TRUE/FALSE
 63 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 64 | #' @param alpha Significance level of the t-test
 65 | #' @importFrom car Anova
 66 | #' @importFrom stats cor aov as.formula
 67 | 
 68 | .covhack <- function(df, dv, group, covs, interactions = FALSE, strategy = "firstsig", alpha = 0.05){
 69 | 
 70 |   # Prepare data frame
 71 |   colnames(df)[group] <- "group"
 72 |   colnames(df)[dv] <- "dv"
 73 |   colnames(df)[covs] <- paste0("CV", 1:length(covs))
 74 |   df <- df[, c(dv, group, covs)]
 75 | 
 76 |   ps <- NULL
 77 |   eta2s <- NULL # partial eta^2
 78 | 
 79 |   # Compute correlations between covariates and dependent variable and order covariates accordingly
 80 |   dvcors <- apply(X = df[,-group], MARGIN = 2, FUN = function(x) stats::cor(x, df$dv))[-1]
 81 |   covorder <- order(dvcors, decreasing = TRUE)
 82 | 
 83 |   # Define ANCOVA models (add covariates in decreasing correlation with dependent variable)
 84 | 
 85 |   interactions <- ifelse(interactions, " * ", " + ")
 86 |   addmodels <- c("dv ~ group", rep(NA, length(covs)))
 87 |   singmodels <- c("dv ~ group", rep(NA, length(covs)))
 88 | 
 89 |   for(i in 1:length(covs)){
 90 |     mdl <- paste("dv ~ group", paste0("CV", covorder[i]), sep = interactions)
 91 |     singmodels[i + 1] <- mdl
 92 |   }
 93 | 
 94 |   for(i in 1:length(covs)){
 95 |     mdl <- paste(paste0("CV", covorder[1:i]), collapse = interactions)
 96 |     mdl <- paste("dv ~ group", mdl, sep = interactions)
 97 |     addmodels[i+1] <- mdl
 98 |   }
 99 | 
100 |   models <- unique(c(singmodels, addmodels))
101 | 
102 |   # Compute ANCOVAs
103 | 
104 |   for(i in 1:length(models)){
105 | 
106 |     res <- stats::aov(stats::as.formula(models[i]), data = df)
107 |     resanc <- car::Anova(res, type = 2)
108 |     ps[i] <- resanc["group", "Pr(>F)"]
109 |     eta2s[i] <- resanc["group", "Sum Sq"]/(resanc["group", "Sum Sq"] + resanc["Residuals", "Sum Sq"])
110 | 
111 |   }
112 | 
113 |   # Select final p-hacked p-value based on strategy
114 |   p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
115 |   eta2.final <- unique(eta2s[ps == p.final])
116 | 
117 |   return(list(p.final = p.final,
118 |               ps = ps,
119 |               eta2.final = eta2.final,
120 |               eta2s = eta2s))
121 | 
122 | 
123 | }
124 | 
125 | #' Simulate p-Hacking with multiple covariates
126 | #' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
127 | #' @param nobs.group Vector with number of observations per group
128 | #' @param ncov Number of continuous covariates in the simulated data frame
129 | #' @param rcov Correlation between the covariates
130 | #' @param rcovdv Correlation between covariates and dependent variable
131 | #' @param interactions Should interaction terms be added to the ANCOVA models? TRUE/FALSE
132 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
133 | #' @param alpha Significance level of the t-test
134 | #' @param iter Number of simulation iterations
135 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
136 | #' @export
137 | 
138 | 
139 | sim.covhack <- function(nobs.group, ncov, rcov, rcovdv, interactions = FALSE, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
140 | 
141 |   # Simulate as many datasets as desired iterations
142 |   dat <- list()
143 |   for(i in 1:iter){
144 |     dat[[i]] <- .sim.covariates(nobs.group = nobs.group, ncov = ncov, rcov = rcov, rcovdv = rcovdv)
145 |   }
146 | 
147 |   # Apply p-hacking procedure to each dataset
148 |   if(!shinyEnv){
149 |     .covhacklist <- function(x){
150 |       .covhack(df = x, dv = 2, group = 1, covs = c(3:(2+ncov)), interactions = interactions, strategy = strategy, alpha = alpha)
151 |     }
152 | 
153 |     res <- pbapply::pblapply(dat, .covhacklist)
154 |   }
155 | 
156 |   if(shinyEnv){
157 |     percentage <- 0
158 |     withProgress(message = "Running simulation", value = 0, {
159 |       res = lapply(dat, function(x){
160 |         percentage <<- percentage + 1/length(dat)*100
161 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
162 |         .covhack(df = x, dv = 2, group = 1, covs = c(3:(2+ncov)), interactions = interactions, strategy = strategy, alpha = alpha)
163 |       })
164 |     })
165 |   }
166 | 
167 |   ps.hack <- NULL
168 |   ps.orig <- NULL
169 |   eta2s.hack <- NULL
170 |   eta2s.orig <- NULL
171 | 
172 |   for(i in 1:iter){
173 |     ps.hack[i] <- res[[i]][["p.final"]]
174 |     ps.orig[i] <- res[[i]][["ps"]][1]
175 |     eta2s.hack[i] <- res[[i]][["eta2.final"]]
176 |     eta2s.orig[i] <- res[[i]][["eta2s"]][1]
177 |   }
178 | 
179 |   res <- cbind(ps.hack, ps.orig, eta2s.hack, eta2s.orig)
180 | 
181 |   return(res)
182 | 
183 | 
184 | }
185 | 
186 | 
187 | 


--------------------------------------------------------------------------------
/phackR/R/exploitCutoffs.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Exploiting arbitrary cutoff values
  3 | # ==============================================================================
  4 | 
  5 | # Simulation can be done with .sim.multcor where the correlation is zero
  6 | 
  7 | #' P-Hacking function for exploiting cutoff values
  8 | #' @param df Data frame with one continuous independent variable and one continuous dependent variable
  9 | #' @param iv Location of the independent variable in the data frame
 10 | #' @param dv Location of the dependent variable in the data frame
 11 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 12 | #' @param alpha Significance level of the t-test
 13 | #' @importFrom stats t.test aov median quantile
 14 | 
 15 | .cutoffHack <- function(df, iv, dv, strategy = "firstsig", alpha = 0.05){
 16 | 
 17 |   iv <- df[, iv]
 18 |   dv <- df[, dv]
 19 | 
 20 |   mod.orig <- summary(stats::lm(dv ~ iv))
 21 |   p.orig <- mod.orig$coefficients[2, 4]
 22 |   r2.orig <- mod.orig$r.squared
 23 | 
 24 |   # Do the mediansplit
 25 |   mediansplitvar <- as.numeric(iv > stats::median(iv)) + 1
 26 |   p.mediansplit <- stats::t.test(dv[mediansplitvar == 1], dv[mediansplitvar == 2],
 27 |                           var.equal = TRUE, alternative = "two.sided")$p.value
 28 |   r2.mediansplit <- .compR2t(dv[mediansplitvar == 1], dv[mediansplitvar == 2])
 29 | 
 30 |   # Cut the middle
 31 |   tertiles <- as.numeric(stats::quantile(iv, probs = c(1/3, 2/3)))
 32 |   threecut <- cut(iv, breaks = c(-Inf, tertiles, Inf), labels = c(1,0,2))
 33 |   dv2 <- dv[threecut %in% c(1,2)]
 34 |   threecut2 <- threecut[threecut %in% c(1, 2)]
 35 |   p.cutmiddle <- stats::t.test(dv2[threecut2 == 2], dv2[threecut2 == 1],
 36 |                         var.equal = TRUE, alternative = "two.sided")$p.value
 37 |   r2.cutmiddle <- .compR2t(dv2[threecut2 == 2], dv2[threecut2 == 1])
 38 | 
 39 |   # 3 Categories: Omnibus test
 40 |   mod.threecat <- summary(stats::aov(dv ~ threecut))
 41 |   p.threecat <- mod.threecat[[1]][[5]][1]
 42 |   r2.threecat <- mod.threecat[[1]][1,2]/sum(mod.threecat[[1]][,2])
 43 | 
 44 |   ps <- c(p.orig, p.mediansplit, p.cutmiddle, p.threecat)
 45 |   r2s <- c(r2.orig, r2.mediansplit, r2.cutmiddle, r2.threecat)
 46 | 
 47 |   # Select final p-hacked p-value based on strategy
 48 |   p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
 49 |   r2.final <- unique(r2s[ps == p.final])
 50 | 
 51 |   return(list(p.final = p.final,
 52 |               ps = ps,
 53 |               r2.final = r2.final,
 54 |               r2s = r2s))
 55 | 
 56 | }
 57 | 
 58 | #' Simulate p-Hacking for exploiting cutoff values
 59 | #' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
 60 | #' @param nobs Number of observations
 61 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 62 | #' @param alpha Significance level of the t-test
 63 | #' @param iter Number of simulation iterations
 64 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 65 | #' @export
 66 | 
 67 | sim.cutoffHack <- function(nobs, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
 68 | 
 69 |   dat <- list()
 70 |   for(i in 1:iter){
 71 |     dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0)
 72 |   }
 73 | 
 74 |   # Apply p-hacking procedure to each dataset
 75 | 
 76 |   if(!shinyEnv){
 77 |     .cutoffHackList <- function(x){
 78 |       .cutoffHack(df = x, iv = 1, dv = 2, strategy = strategy, alpha = alpha)
 79 |     }
 80 | 
 81 |     res <- pbapply::pblapply(dat, .cutoffHackList)
 82 |   }
 83 | 
 84 |   if(shinyEnv){
 85 |     percentage <- 0
 86 |     withProgress(message = "Running simulation", value = 0, {
 87 |       res = lapply(dat, function(x){
 88 |         percentage <<- percentage + 1/length(dat)*100
 89 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
 90 |         .cutoffHack(df = x, iv = 1, dv = 2, strategy = strategy, alpha = alpha)
 91 |       })
 92 |     })
 93 |   }
 94 | 
 95 |   ps.hack <- NULL
 96 |   ps.orig <- NULL
 97 |   r2s.hack <- NULL
 98 |   r2s.orig <- NULL
 99 | 
100 |   for(i in 1:iter){
101 |     ps.hack[i] <- res[[i]][["p.final"]]
102 |     ps.orig[i] <- res[[i]][["ps"]][1]
103 |     r2s.hack[i] <- res[[i]][["r2.final"]]
104 |     r2s.orig[i] <- res[[i]][["r2s"]][1]
105 |   }
106 | 
107 |   res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
108 | 
109 |   return(res)
110 | 
111 | 
112 | 
113 | }
114 | 


--------------------------------------------------------------------------------
/phackR/R/favorableImputation.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # p-Hacking through Favorable Imputation
  3 | # ==============================================================================
  4 | 
  5 | # ------------------------------------------------------------------------------
  6 | # Some local helper functions
  7 | # ------------------------------------------------------------------------------
  8 | 
  9 | #' Single value imputation function
 10 | #' Imputes NA values in a single variable using the function specified in fun
 11 | #' @param x The target variable of the imputation
 12 | #' @param fun The function used to replace missing values that takes x as an argument (e.g., mean)
 13 | #' @param ... Additional arguments to fun
 14 | 
 15 | .easyimpute <- function(x, fun, ...){
 16 |   x[is.na(x)] <- fun(x, ...)
 17 |   return(x)
 18 | }
 19 | 
 20 | #' Estimate mode of continuous variables
 21 | #' Estimates mode of continuous variables using the density() function
 22 | #' @param x The target variable for which the mode should be searched
 23 | #' @importFrom stats density
 24 | 
 25 | .estimate_mode <- function(x) {
 26 |   d <- stats::density(x, na.rm = T)
 27 |   d$x[which.max(d$y)]
 28 | }
 29 | 
 30 | # ------------------------------------------------------------------------------
 31 | # P-Hacking functions
 32 | # ------------------------------------------------------------------------------
 33 | 
 34 | # Simulation function: Data can be simulated using .sim.multcor with r = 0.
 35 | # Proportion of missing values can be controlled through the argument "missing"
 36 | 
 37 | #' P-Hacking function favorable imputation in univariate linear regression
 38 | #' @description Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
 39 | #' @param df Data frame containing x and y variables as columns
 40 | #' @param x Location of x variable (predictor) in the data frame
 41 | #' @param y Location of y variable (criterion) in the data frame
 42 | #' @param which Which missing value handling method? 1: delete missing, 2: mean imputation, 3: median imputation, 4: mode imputation, 5: predictive mean matching, 6: weighted predictive mean matching, 7: sample from observed values, 8: Bayesian linear regression, 9: linear regression ignoring model error, 10: linear regression predicted values
 43 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 44 | #' @param alpha Significance level of the t-test (default: 0.05)
 45 | #' @importFrom stats median lm
 46 | #' @importFrom mice complete
 47 | 
 48 | 
 49 | .impHack <- function(df, x, y, which = c(1:10), strategy = "firstsig", alpha = 0.05){
 50 | 
 51 |   x <- df[,x]
 52 |   y <- df[,y]
 53 | 
 54 |   # Stop if imputation methods are not defined
 55 |   stopifnot(any(c(1:10) %in% which))
 56 | 
 57 |   # Initialize result vector
 58 |   ps <- rep(NA, 10)
 59 |   r2s <- rep(NA, 10)
 60 | 
 61 |   # p-value when missing values are deleted
 62 |   if(1 %in% which){
 63 |     mod1 <- summary(stats::lm(y ~ x, na.action = "na.omit"))
 64 |     ps[1] <- mod1$coefficients[2, 4]
 65 |     r2s[1] <- mod1$r.squared
 66 |   }
 67 | 
 68 |   # Mean imputation
 69 |   if(2 %in% which){
 70 |     newx <- .easyimpute(x, mean, na.rm = T)
 71 |     newy <- .easyimpute(y, mean, na.rm = T)
 72 |     mod2 <- summary(stats::lm(newy ~ newx))
 73 |     ps[2] <- mod2$coefficients[2, 4]
 74 |     r2s[2] <- mod2$r.squared
 75 |   }
 76 | 
 77 |   # Median imputation
 78 |   if(3 %in% which){
 79 |     newx <- .easyimpute(x, mean, na.rm = T)
 80 |     newy <- .easyimpute(y, mean, na.rm = T)
 81 |     mod3 <- summary(stats::lm(newy ~ newx))
 82 |     ps[3] <- mod3$coefficients[2, 4]
 83 |     r2s[3] <- mod3$r.squared
 84 |   }
 85 | 
 86 |   # Mode imputation
 87 |   if(4 %in% which){
 88 |     newx <- .easyimpute(x, .estimate_mode)
 89 |     newy <- .easyimpute(y, .estimate_mode)
 90 |     mod4 <- summary(stats::lm(newy ~ newx))
 91 |     ps[4] <- mod4$coefficients[2, 4]
 92 |     r2s[4] <- mod4$r.squared
 93 |   }
 94 | 
 95 |   # Multivariate imputations by chained equations ("mice" package): predictive mean matchihng
 96 |   dfnew <- as.data.frame(cbind(x, y))
 97 |   if(5 %in% which){
 98 |     imp <- mice::mice(dfnew, m = 1, method = "pmm", silent = TRUE, print = FALSE)
 99 |     mod5 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
100 |     ps[5] <- mod5$coefficients[2, 4]
101 |     r2s[5] <- mod5$r.squared
102 |   }
103 | 
104 |   # Multivariate imputations by chained equations ("mice" package): Weighted predictive mean matching
105 |   if(6 %in% which){
106 |     imp <- mice::mice(dfnew, m = 1, method = "midastouch", silent = TRUE, print = FALSE)
107 |     mod6 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
108 |     ps[6] <- mod6$coefficients[2, 4]
109 |     r2s[6] <- mod6$r.squared
110 |   }
111 | 
112 |   # Multivariate imputations by chained equations ("mice" package): Sample from observed values
113 |   if(7 %in% which){
114 |     imp <- mice::mice(dfnew, m = 1, method = "sample", silent = TRUE, print = FALSE)
115 |     mod7 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
116 |     ps[7] <- mod7$coefficients[2, 4]
117 |     r2s[7] <- mod7$r.squared
118 |   }
119 | 
120 |   # Multivariate imputations by chained equations ("mice" package): Bayesian linear regression
121 |   if(8 %in% which){
122 |     imp <- mice::mice(dfnew, m = 1, method = "norm", silent = TRUE, print = FALSE)
123 |     mod8 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
124 |     ps[8] <- mod8$coefficients[2, 4]
125 |     r2s[8] <- mod8$r.squared
126 |   }
127 | 
128 |   # Multivariate imputations by chained equations ("mice" package): Linear regression ignoring model error
129 |   if(9 %in% which){
130 |     imp <- mice::mice(dfnew, m = 1, method = "norm.nob", silent = TRUE, print = FALSE)
131 |     mod9 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
132 |     ps[9] <- mod9$coefficients[2, 4]
133 |     r2s[9] <- mod9$r.squared
134 |   }
135 | 
136 |   # Multivariate imputations by chained equations ("mice" package): Linear regression predicted values
137 |   if(10 %in% which){
138 |     imp <- mice::mice(dfnew, m = 1, method = "norm.predict", silent = TRUE, print = FALSE)
139 |     mod10 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1)))
140 |     ps[10] <- mod10$coefficients[2, 4]
141 |     r2s[10] <- mod10$r.squared
142 |   }
143 | 
144 |   ps <- ps[!is.na(ps)]
145 |   r2s <- r2s[!is.na(r2s)]
146 | 
147 |   # Select final p-hacked p-value based on strategy
148 |   p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
149 |   r2.final <- unique(r2s[ps == p.final])
150 | 
151 |   return(list(p.final = p.final,
152 |               ps = ps,
153 |               r2.final = r2.final,
154 |               r2s = r2s))
155 | 
156 | }
157 | 
158 | #' Simulate p-Hacking with different sorts of outlier definition missing value imputation
159 | #' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
160 | #' @param nobs Integer giving number of observations
161 | #' @param missing Percentage of missing values (e.g., 0.1 for 10 percent)
162 | #' @param which Which imputation methods?  Either 5 random methods are chosen ("random") or a numeric vector containing the chosen methods (1: delete missing, 2: mean imputation, 3: median imputation, 4: mode imputation, 5: predictive mean matching, 6: weighted predictive mean matching, 7: sample from observed values, 8: Bayesian linear regression, 9: linear regression ignoring model error, 10: linear regression predicted values)
163 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
164 | #' @param alpha Significance level of the t-test (default: 0.05)
165 | #' @param iter Number of simulation iterations
166 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
167 | #' @export
168 | 
169 | sim.impHack <- function(nobs, missing, which = c(1:10), strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
170 | 
171 |   # Simulate as many datasets as desired iterations
172 |   dat <- list()
173 |   for(i in 1:iter){
174 |     dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0, missing = missing)
175 |   }
176 | 
177 |   if(any(which == "random")) which <- sample(c(1:10), 5)
178 | 
179 |   # Apply p-hacking procedure to each dataset
180 | 
181 |   if(!shinyEnv){
182 |     .impHackList <- function(x){
183 |       .impHack(df = x, x = 1, y = 2,
184 |                which = which, strategy = strategy, alpha = alpha)
185 |     }
186 | 
187 |     res <- pbapply::pblapply(dat, .impHackList)
188 |   }
189 | 
190 |   if(shinyEnv){
191 |     percentage <- 0
192 |     withProgress(message = "Running simulation", value = 0, {
193 |       res = lapply(dat, function(x){
194 |         percentage <<- percentage + 1/length(dat)*100
195 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
196 |         .impHack(df = x, x = 1, y = 2,
197 |                  which = which, strategy = strategy, alpha = alpha)
198 |       })
199 |     })
200 |   }
201 | 
202 |   ps.hack <- NULL
203 |   ps.orig <- NULL
204 |   r2s.hack <- NULL
205 |   r2s.orig <- NULL
206 |   ps.all <- list()
207 |   for(i in 1:iter){
208 |     ps.hack[i] <- res[[i]][["p.final"]]
209 |     ps.orig[i] <- res[[i]][["ps"]][1]
210 |     r2s.hack[i] <- res[[i]][["r2.final"]]
211 |     r2s.orig[i] <- res[[i]][["r2s"]][1]
212 |   }
213 | 
214 |   res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
215 | 
216 |   return(res)
217 | 
218 | }
219 | 
220 | 


--------------------------------------------------------------------------------
/phackR/R/helpers.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Helpers
  3 | # ==============================================================================
  4 | 
  5 | #' Simulate multivariate correlated data for continuous variables
  6 | #' @description Outputs a data frame with correlated variables of defined length
  7 | #' @param nobs Number of observations (rows) in the simulated data frame
  8 | #' @param nvar Number of variables (columns) in the data frame
  9 | #' @param r Desired correlation between the variables (integer)
 10 | #' @param mu Mean of the random data
 11 | #' @param sd Standard deviation of the random data
 12 | #' @param missing Proportion of missing values per variable (e.g., 0.2 = 20 percent)
 13 | #' @importFrom stats rnorm
 14 | 
 15 | .sim.multcor <- function(nobs, nvar, r, mu = 0, sd = 1, missing = 0){
 16 | 
 17 |   # set up correlation matrix
 18 |   R <- matrix(rep(r, nvar**2), nrow = nvar)
 19 |   diag(R) <- rep(1, nvar)
 20 | 
 21 |   # transposed Cholesky decomposition of correlation matrix
 22 |   U <- t(chol(R))
 23 | 
 24 |   # create random noise matrix
 25 |   random.normal <- matrix(stats::rnorm(nvar*nobs, mu, sd), nrow=nvar, ncol=nobs)
 26 | 
 27 |   # create raw data from matrix multiplication of U and random noise
 28 |   X <- as.data.frame(t(U %*% random.normal))
 29 | 
 30 |   # add missing values
 31 |   if(missing > 0){
 32 |     if(missing * nobs < 2){
 33 |       navalues <- as.data.frame(t(replicate(nvar, sample(1:nobs, missing*nobs))))
 34 |     } else {
 35 |       navalues <- as.data.frame(replicate(nvar, sample(1:nobs, missing*nobs)))
 36 |     }
 37 |     for(i in 1:nvar){
 38 |       X[unlist(navalues[,i]),i] <- NA
 39 |     }
 40 |   }
 41 | 
 42 |   return(X)
 43 | 
 44 | }
 45 | 
 46 | #' Generic sampling function
 47 | #' @description Outputs a data frame with two columns
 48 | #' @param nobs.group Number of observations per group. Either a scalar or a vector with two elements.
 49 | #' @importFrom stats rnorm
 50 | 
 51 | .sim.data <- function(nobs.group){
 52 | 
 53 |   if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2)
 54 |   V1 <- stats::rnorm(nobs.group[1], 0, 1)
 55 |   V2 <- stats::rnorm(nobs.group[2], 0, 1)
 56 |   group <- c(rep(1, nobs.group[1]), rep(2, nobs.group[2]))
 57 | 
 58 |   res <- cbind(group, c(V1, V2))
 59 |   return(res)
 60 | 
 61 | }
 62 | 
 63 | #' Create data frames without outliers
 64 | #' @description Inputs data frame and two sets of outlier values, outputs list with three data frames
 65 | #' @param x Original vector of x values
 66 | #' @param y Original vector of y values
 67 | #' @param outsx Outlier values to be removed from x
 68 | #' @param outsy Outlier values to be removed from y
 69 | 
 70 | 
 71 | .extractoutlier <- function(x, y, outsx, outsy){
 72 | 
 73 |   # Remove x outliers from x and y
 74 |   if(length(outsx) > 0){
 75 |     x1 <- x[!x %in% outsx]
 76 |     y1 <- y[!x %in% outsx]
 77 |   } else {
 78 |     x1 <- x
 79 |     y1 <- y
 80 |   }
 81 |   xy1 <- unname(cbind(x1, y1))
 82 | 
 83 |   # Remove y outliers from x and y
 84 |   if(length(outsy) > 0){
 85 |     x2 <- x[!y %in% outsy]
 86 |     y2 <- y[!y %in% outsy]
 87 |   } else {
 88 |     x2 <- x
 89 |     y2 <- y
 90 |   }
 91 |   xy2 <- unname(cbind(x2, y2))
 92 | 
 93 |   # Remove x and y outliers from x and y
 94 |   if(length(outsx) > 0 && length(outsy) > 0){
 95 |     x3 <- x[!x %in% outsx & !y %in% outsy]
 96 |     y3 <- y[!x %in% outsx & !y %in% outsy]
 97 |   } else {
 98 |     x3 <- x
 99 |     y3 <- y
100 |   }
101 |   xy3 <- unname(cbind(x3, y3))
102 | 
103 |   # Combine results
104 |   res <- unname(list(xy1, xy2, xy3))
105 |   res <- unique(res)
106 | 
107 |   return(res)
108 | 
109 | }
110 | 
111 | #' Select a p-value from a vector of p-hacked p-values
112 | #' @description Takes a vector of p-values and selects the smallest, first significant, or smallest significant p-value.
113 | #' @param ps Vector of p values
114 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
115 | #' @param alpha Significance level (default: 0.05)
116 | 
117 | .selectpvalue <- function(ps, strategy, alpha){
118 | 
119 |   p.final <- NA
120 |   p.orig <- ps[1]
121 | 
122 |   # Select smallest significant p-value
123 |   if(strategy == "smallest.sig"){
124 | 
125 |     if(min(ps) < alpha){
126 |       p.final <- min(ps)
127 |     } else {
128 |       p.final <- p.orig
129 |     }
130 | 
131 |     # Select first significant p-value
132 |   } else if (strategy == "firstsig") {
133 | 
134 |     if(min(ps) < alpha){
135 |       p.final <- ps[which(ps < alpha)[1]]
136 |     } else {
137 |       p.final <- p.orig
138 |     }
139 | 
140 |     # Select smallest p-value
141 |   } else if (strategy == "smallest") {
142 |     p.final <- min(ps)
143 |   }
144 | 
145 |   return(p.final)
146 | 
147 | }
148 | 
149 | #' Compute R squared for the t-test
150 | #' @param x values of group 1
151 | #' @param y values of group 2
152 | 
153 | .compR2t <- function(x, y){
154 |   grandmean <- mean(c(x, y))
155 |   sst <- sum((c(x,y)-grandmean)^2)
156 |   sse <- sum((x-mean(x))^2)+sum((y-mean(y))^2)
157 |   return(1-(sse/sst))
158 | }
159 | 
160 | #' Compute Cohen's d
161 | #' @description Compute Cohen's d from t-value with equal sized groups of size n
162 | #' @param t t-value
163 | #' @param n sample size per group
164 | 
165 | .compCohensD <- function(t, n){
166 |   t*sqrt(2/n)
167 | }
168 | 


--------------------------------------------------------------------------------
/phackR/R/incorrectRounding.R:
--------------------------------------------------------------------------------
 1 | # ==============================================================================
 2 | # Incorrect Rounding
 3 | # ==============================================================================
 4 | 
 5 | # Generic sampling function .sim.data() can be used
 6 | 
 7 | #' P-Hacking function for incorrect rounding
 8 | #' @description Outputs a p-hacked p-value and the non-p-hacked-p-value
 9 | #' @param df Data frame
10 | #' @param group Scalar defining location of the group vector in the data frame
11 | #' @param dv Scalar defining location of dependent variable in the data frame
12 | #' @param roundinglevel Highest p-value that is rounded down to 0.05
13 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
14 | #' @param alpha Significance level of the t-test (default: 0.05)
15 | #' @importFrom stats t.test
16 | 
17 | .roundhack <- function(df, group, dv, roundinglevel, alternative = "two.sided", alpha = 0.05){
18 | 
19 |   # Compute t-test
20 |   pval <- stats::t.test(df[,dv] ~ df[,group],
21 |                         var.equal = TRUE, alternative = alternative)$p.value
22 |   r2val <- .compR2t(df[,dv][(df[,group] == unique(df[,group])[1])],
23 |                     df[,dv][(df[,group] == unique(df[,group])[2])])
24 | 
25 |   # P-hack p-value
26 |   if(pval > alpha && pval < roundinglevel){
27 |     p.final <- alpha
28 |   } else {
29 |     p.final <- pval
30 |   }
31 | 
32 |   ps <- c(pval, p.final)
33 | 
34 |   return(list(p.final = p.final,
35 |               ps = ps,
36 |               r2.final = r2val,
37 |               r2s = rep(r2val, 2)))
38 | 
39 | }
40 | 
41 | #' Simulate p-hacking with incorrect rounding
42 | #' @param roundinglevel Highest p-value that is rounded down to alpha
43 | #' @param iter Number of iterations
44 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
45 | #' @param alpha Significance level of the t-test (default: 0.05)
46 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
47 | #' @export
48 | 
49 | sim.roundhack <- function(roundinglevel, iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){
50 | 
51 |   # Simulate as many datasets as desired iterations
52 |   dat <- list()
53 |   for(i in 1:iter){
54 |     dat[[i]] <- .sim.data(nobs.group = 30)
55 |   }
56 | 
57 |   # Apply p-hacking procedure to each dataset
58 |   if(!shinyEnv){
59 |     res <- pbapply::pblapply(dat, .roundhack, group = 1, dv = 2,
60 |                   roundinglevel = roundinglevel, alternative = alternative, alpha = alpha)
61 |   }
62 |   if(shinyEnv){
63 |     percentage <- 0
64 |     withProgress(message = "Running simulation", value = 0, {
65 |       res = lapply(dat, function(x){
66 |         percentage <<- percentage + 1/length(dat)*100
67 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
68 |         .roundhack(df=x, group = 1, dv = 2, roundinglevel = roundinglevel,
69 |                    alternative = alternative, alpha = alpha)
70 |       })
71 |     })
72 |   }
73 | 
74 |   ps.hack <- NULL
75 |   ps.orig <- NULL
76 |   r2s.hack <- NULL
77 |   r2s.orig <- NULL
78 | 
79 |   for(i in 1:iter){
80 |     ps.hack[i] <- res[[i]][["p.final"]]
81 |     ps.orig[i] <- res[[i]][["ps"]][1]
82 |     r2s.hack[i] <- res[[i]][["r2.final"]]
83 |     r2s.orig[i] <- res[[i]][["r2s"]][1]
84 |   }
85 | 
86 |   res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
87 | 
88 |   return(res)
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/phackR/R/optionalStopping.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Optional Stopping Based on Significance
  3 | # ==============================================================================
  4 | 
  5 | # Generic sampling function .sim.data() can be used
  6 | 
  7 | #' Optional Stopping based on existing dataset
  8 | #' @description Returns a p-hacked p-value and a non-p-hacked p-value based on the maximum sample size
  9 | #' @param df Data frame
 10 | #' @param group group Scalar defining grouping column
 11 | #' @param dv Scalar defining location of dependent variable in the data frame
 12 | #' @param n.min Minimum sample size
 13 | #' @param n.max Maximum sample size
 14 | #' @param step Step size of the optional stopping (default is 1)
 15 | #' @param peek Determines how often one peeks at the data. Overrides step argument if not NULL.
 16 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
 17 | #' @param alpha Significance level of the t-test (default: 0.05)
 18 | #' @importFrom stats t.test
 19 | #' @importFrom utils tail
 20 | 
 21 | .optstop <- function(df, group, dv, n.min, n.max, step = 1, peek = NULL, alternative = "two.sided", alpha = 0.05){
 22 | 
 23 |   # Extract group variables
 24 |   g1 <- df[df[,group] == unique(df[,group])[1], dv]
 25 |   g2 <- df[df[,group] == unique(df[,group])[2], dv]
 26 | 
 27 |   # Sanity check: Enough data?
 28 |   stopifnot(length(g1) >= n.max && length(g2) >= n.max)
 29 | 
 30 |   # Determine places of peeks
 31 |   if(is.null(peek)){
 32 |     peeks <- seq(n.min, n.max, by=step)
 33 |     if(step > (n.max-n.min)) peeks <- c(n.min, n.max)
 34 |   } else {
 35 |     peeks <- round(seq(n.min, n.max, length.out = peek))
 36 |   }
 37 | 
 38 |   # Compute t-tests
 39 |   mod <- sapply(peeks, FUN = function(x) {stats::t.test(g1[1:x], g2[1:x], var.equal = TRUE, alternative = alternative)})
 40 |   ps <- simplify2array(mod["p.value",])
 41 |   r2s <- sapply(peeks, FUN = function(x) {.compR2t(g1[1:x], g2[1:x])})
 42 |   ds <- .compCohensD(simplify2array(mod["statistic",]), peeks)
 43 | 
 44 |   # Do the p-hacking
 45 |   if(any(ps < alpha) == FALSE){
 46 |     p.final <- utils::tail(ps, 1)
 47 |     r2.final <- utils::tail(r2s, 1)
 48 |     d.final <- utils::tail(ds, 1)
 49 |   } else if (any(ps < alpha) == TRUE) {
 50 |     p.final <- ps[which(ps < alpha)][1]
 51 |     r2.final <- unique(r2s[ps == p.final])
 52 |     d.final <- unique(ds[ps == p.final])
 53 |   }
 54 | 
 55 |   return(list(p.final = p.final,
 56 |               ps = ps,
 57 |               r2.final = r2.final,
 58 |               r2s = r2s,
 59 |               d.final = d.final,
 60 |               ds = ds))
 61 | }
 62 | 
 63 | #' Simulate p-hacking with incorrect rounding
 64 | #' @param n.min Minimum sample size
 65 | #' @param n.max Maximum sample size
 66 | #' @param step Step size of the optional stopping (default is 1)
 67 | #' @param peek Determines how often one peeks at the data. Overrides step argument if not NULL.
 68 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
 69 | #' @param iter Number of iterations
 70 | #' @param alpha Significance level of the t-test (default: 0.05)
 71 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 72 | #' @importFrom utils tail
 73 | #' @export
 74 | #'
 75 | 
 76 | sim.optstop <- function(n.min, n.max, step = 1, peek = NULL, alternative = "two.sided", iter = 1000, alpha = 0.05, shinyEnv = FALSE){
 77 | 
 78 |   # Simulate as many datasets as desired iterations
 79 |   dat <- list()
 80 |   for(i in 1:iter){
 81 |     dat[[i]] <- .sim.data(nobs.group = n.max)
 82 |   }
 83 | 
 84 |   # Apply p-hacking procedure to each dataset
 85 |   if(!shinyEnv){
 86 |     res <- pbapply::pblapply(dat, .optstop, group = 1, dv = 2,
 87 |                   n.min = n.min, n.max = n.max, step = step, peek = peek,
 88 |                   alternative = alternative, alpha = alpha)
 89 |   }
 90 | 
 91 |   if(shinyEnv){
 92 |     percentage <- 0
 93 |     withProgress(message = "Running simulation", value = 0, {
 94 |       res = lapply(dat, function(x){
 95 |         percentage <<- percentage + 1/length(dat)*100
 96 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
 97 |         .optstop(df=x, group = 1, dv = 2,
 98 |                  n.min = n.min, n.max = n.max, step = step,
 99 |                  alternative = alternative, alpha = alpha)
100 |       })
101 |     })
102 |   }
103 | 
104 |   ps.hack <- NULL
105 |   ps.orig <- NULL
106 |   r2s.hack <- NULL
107 |   r2s.orig <- NULL
108 |   ds.hack <- NULL
109 |   ds.orig <- NULL
110 | 
111 |   for(i in 1:iter){
112 |     ps.hack[i] <- res[[i]][["p.final"]]
113 |     ps.orig[i] <- utils::tail(res[[i]][["ps"]], 1)
114 |     r2s.hack[i] <- res[[i]][["r2.final"]]
115 |     r2s.orig[i] <- utils::tail(res[[i]][["r2s"]], 1)
116 |     ds.hack[i] <- res[[i]][["d.final"]]
117 |     ds.orig[i] <- utils::tail(res[[i]][["ds"]], 1)
118 |   }
119 | 
120 |   res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, ds.hack, ds.orig)
121 | 
122 |   return(res)
123 | 
124 | }
125 | 


--------------------------------------------------------------------------------
/phackR/R/plotsShiny.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Figures: p-value and effect size distributions
  3 | # ==============================================================================
  4 | 
  5 | #' Plot p-value distributions
  6 | #' @param simdat Simulated data from one of the p-hacking simulation functions
  7 | #' @param alpha Alpha level
  8 | #' @importFrom ggplot2 ggplot geom_histogram aes theme_light xlab ggtitle theme element_text geom_vline scale_fill_manual layer_scales ylab geom_segment geom_col scale_x_continuous scale_y_continuous waiver
  9 | #' @importFrom rlang .data
 10 | #' @importFrom dplyr all_of mutate
 11 | #' @importFrom magrittr "%$%"
 12 | 
 13 | pplots <- function(simdat, alpha){
 14 | 
 15 |   simdat <- as.data.frame(simdat)
 16 | 
 17 |   simdat <- as.data.frame(simdat)
 18 | 
 19 |   simdat_long <- tidyr::gather(simdat, "condition", "pval", all_of("ps.hack"):all_of("ps.orig"))
 20 | 
 21 |   bin <- condition <- Freq <- binInt <- pval <- plotVal <- NULL
 22 | 
 23 |   plotdata <-
 24 |     simdat_long %>%
 25 |     mutate(bin = cut(pval, seq(0, 1, by=0.025))) %$%
 26 |     table(bin, condition) %>%
 27 |     as.data.frame() %>%
 28 |     mutate(plotVal = ifelse(condition == "ps.orig",
 29 |                             -1*Freq,
 30 |                             Freq)) %>%
 31 |     mutate(binInt = as.integer(bin))
 32 | 
 33 | 
 34 |   pcomp <- ggplot(plotdata,
 35 |                   aes(x = binInt,
 36 |                       y = plotVal,
 37 |                       fill = condition)) +
 38 |     geom_segment(x = 0.5, xend = 40.5, y = nrow(simdat)/40, yend = nrow(simdat)/40, color = "#C27516") +
 39 |     geom_segment(x = 0.5, xend = 40.5, y = -nrow(simdat)/40, yend = -nrow(simdat)/40, color = "#024B7A") +
 40 |     geom_col() +
 41 |     scale_x_continuous(breaks = c(c(0, 10, 20, 30, 40)+0.5, alpha*40+0.5),
 42 |                        labels = c("0", "0.25", "0.5", "0.75", "1", expression(alpha))) +
 43 |     scale_y_continuous(breaks = waiver(),
 44 |                        labels = abs) +
 45 |     xlab("p-value") +
 46 |     ylab("count") +
 47 |     ggtitle("Distribution of p-values") +
 48 |     scale_fill_manual(values=c("#FFAE4A", "#43B7C2"),
 49 |                       labels=c("p-hacked", "original")) +
 50 |     theme_light() +
 51 |     theme(axis.title = element_text(size=14),
 52 |           axis.text = element_text(size=12),
 53 |           plot.title = element_text(size=18)) +
 54 |     geom_vline(xintercept = alpha*40+0.5, linetype = "dashed")
 55 | 
 56 |   return(list(pcomp=pcomp))
 57 | }
 58 | 
 59 | #' Plot effect size distributions
 60 | #' @param simdat Simulated data from one of the p-hacking simulation functions
 61 | #' @param EScolumn.hack Column number of hacked effect sizes
 62 | #' @param EScolumn.orig Column number of original effect sizes
 63 | #' @param titles Title of effect size plots
 64 | #' @importFrom grid grobTree textGrob gpar
 65 | #' @importFrom ggplot2 annotation_custom coord_cartesian
 66 | 
 67 | esplots <- function(simdat, EScolumn.hack, EScolumn.orig, titles = c(expression("Distribution of p-hacked effect sizes R"^2),
 68 |                                                                            expression("Distribution of original effect sizes R"^2))){
 69 | 
 70 |   simdat <- as.data.frame(simdat)
 71 |   es.hack <- colnames(simdat)[EScolumn.hack]
 72 |   es.orig <- colnames(simdat)[EScolumn.orig]
 73 | 
 74 |   meanES.hack <- grobTree(textGrob(paste0("Mean: ", round(mean(simdat[,es.hack]), 3)), x = 0.95, y=0.95, hjust=1, gp=gpar(fontsize=14)))
 75 |   meanES.orig <- grobTree(textGrob(paste0("Mean: ", round(mean(simdat[,es.orig]), 3)), x = 0.95, y=0.95, hjust=1, gp=gpar(fontsize=14)))
 76 | 
 77 |   eshack <- ggplot(simdat, aes(x=simdat[,es.hack])) +
 78 |     geom_histogram(fill="#FFAE4A", color="#C27516", bins=30, na.rm=FALSE) +
 79 |     theme_light() +
 80 |     xlab("Effect Size") +
 81 |     ggtitle(titles[1]) +
 82 |     theme(axis.title = element_text(size=14),
 83 |           axis.text = element_text(size=12),
 84 |           plot.title = element_text(size=18)) +
 85 |     annotation_custom(meanES.hack)
 86 | 
 87 |   esnohack <- ggplot(simdat, aes(x=simdat[, es.orig])) +
 88 |     geom_histogram(fill="#43B7C2", color="#024B7A", bins=30) +
 89 |     theme_light() +
 90 |     xlab("Effect Size") +
 91 |     ggtitle(titles[2]) +
 92 |     theme(axis.title = element_text(size=14),
 93 |           axis.text = element_text(size=12),
 94 |           plot.title = element_text(size=18)) +
 95 |     annotation_custom(meanES.orig)
 96 | 
 97 |   xlim <- range(c(layer_scales(eshack)$x$range$range, layer_scales(esnohack)$x$range$range))
 98 | 
 99 |   eshack <- eshack + coord_cartesian(xlim = xlim)
100 |   esnohack <- esnohack + coord_cartesian(xlim = xlim)
101 | 
102 |   return(list(eshack=eshack,
103 |               esnohack=esnohack))
104 | 
105 | }
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/phackR/R/runShinyPHack.R:
--------------------------------------------------------------------------------
 1 | #' Run Shiny app for p-hacking simulaton
 2 | #' @export
 3 | 
 4 | runShinyPHack <- function() {
 5 |   appDir <- system.file("shiny-phack", "ShinyPHack", package = "phackR")
 6 |   if (appDir == "") {
 7 |     stop("Could not find example directory. Try re-installing `phackR`.", call. = FALSE)
 8 |   }
 9 | 
10 |   shiny::runApp(appDir, display.mode = "normal")
11 | }
12 | 


--------------------------------------------------------------------------------
/phackR/R/selectiveReportingDV.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Selective Reporting of the Dependent Variable
  3 | # ==============================================================================
  4 | 
  5 | #' Simulate dataset with multiple dependent variables
  6 | #' @description Outputs data frame with a grouping variable and multiple correlated dependent variables
  7 | #' @param nobs.group Vector giving number of observations per group
  8 | #' @param nvar Number of dependent variables in the data frame
  9 | #' @param r Desired correlation between the dependent variables (scalar)
 10 | 
 11 | .sim.multDV <- function(nobs.group, nvar, r){
 12 | 
 13 |   # Observations per group
 14 |   if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2)
 15 | 
 16 |   # Generate group vector
 17 |   group <- rep(1:length(nobs.group), nobs.group)
 18 | 
 19 |   # Generate dependent variables
 20 |   dvs <- .sim.multcor(nobs = sum(nobs.group), nvar = nvar, r = r)
 21 | 
 22 |   # Generate data frame
 23 |   res <- cbind(group, dvs)
 24 | 
 25 |   return(res)
 26 | }
 27 | 
 28 | #' P-Hacking function for multiple dependent variables
 29 | #' @description Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
 30 | #' @param df Data frame with one group variable and multiple dependent variables
 31 | #' @param dvs Vector defining the DV columns (will be checked in given order)
 32 | #' @param group Scalar defining grouping column
 33 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 34 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
 35 | #' @param alpha Significance level of the t-test
 36 | #' @importFrom stats t.test
 37 | 
 38 | .multDVhack <- function(df, dvs, group, strategy = "firstsig", alternative = "two.sided", alpha = 0.05){
 39 | 
 40 |   # Prepare data frame
 41 |   dvs <- as.matrix(df[, dvs], ncol = length(dvs))
 42 |   group <- df[, group]
 43 |   mod <- list()
 44 |   r2s <- NULL
 45 | 
 46 |   # Compute t-tests
 47 |   for(i in 1:ncol(dvs)){
 48 | 
 49 |     mod[[i]] <- stats::t.test(dvs[, i] ~ group,
 50 |                            var.equal = TRUE, alternative = alternative)
 51 |     r2s[i] <- .compR2t(dvs[group == unique(group)[1], i],
 52 |                        dvs[group == unique(group)[2], i])
 53 |   }
 54 | 
 55 |   ps <- unlist(simplify2array(mod)["p.value", ])
 56 |   ds <- .compCohensD(unlist(simplify2array(mod)["statistic", ]), length(df[, group])/2)
 57 | 
 58 |   # Select final p-hacked p-value based on strategy
 59 |   p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
 60 |   r2.final <- unique(r2s[ps == p.final])
 61 |   d.final <- unique(ds[ps == p.final])
 62 | 
 63 |   return(list(p.final = p.final,
 64 |               ps = ps,
 65 |               r2.final = r2.final,
 66 |               r2s = r2s,
 67 |               d.final = d.final,
 68 |               ds = ds))
 69 | 
 70 | }
 71 | 
 72 | #' Simulate p-Hacking with multiple dependent variables
 73 | #' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
 74 | #' @param nobs.group Vector giving number of observations per group
 75 | #' @param nvar Number of dependent variables (columns) in the data frame
 76 | #' @param r Desired correlation between the dependent variables (scalar)
 77 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 78 | #' @param iter Number of simulation iterations
 79 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
 80 | #' @param alpha Significance level of the t-test (default: 0.05)
 81 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 82 | #' @export
 83 | 
 84 | sim.multDVhack <- function(nobs.group, nvar, r, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){
 85 | 
 86 |   # Simulate as many datasets as desired iterations
 87 |   dat <- list()
 88 |   for(i in 1:iter){
 89 |     dat[[i]] <- .sim.multDV(nobs.group = nobs.group, nvar = nvar, r = r)
 90 |   }
 91 | 
 92 |   # Apply p-hacking procedure to each dataset
 93 | 
 94 |   if(!shinyEnv){
 95 |     res <- pbapply::pblapply(dat, .multDVhack, dvs = c(2:(nvar+1)), group = 1,
 96 |                   strategy = strategy, alternative = alternative, alpha = alpha)
 97 |   }
 98 | 
 99 |   if(shinyEnv){
100 |     percentage <- 0
101 |     withProgress(message = "Running simulation", value = 0, {
102 |       res = lapply(dat, function(x){
103 |         percentage <<- percentage + 1/length(dat)*100
104 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
105 |         .multDVhack(df=x, dvs = c(2:(nvar+1)), group = 1,
106 |                     strategy = strategy, alternative = alternative, alpha = alpha)
107 |       })
108 |     })
109 |   }
110 | 
111 |   ps.hack <- NULL
112 |   ps.orig <- NULL
113 |   r2s.hack <- NULL
114 |   r2s.orig <- NULL
115 |   ds.hack <- NULL
116 |   ds.orig <- NULL
117 | 
118 |   for(i in 1:iter){
119 |     ps.hack[i] <- res[[i]][["p.final"]]
120 |     ps.orig[i] <- res[[i]][["ps"]][1]
121 |     r2s.hack[i] <- res[[i]][["r2.final"]]
122 |     r2s.orig[i] <- res[[i]][["r2s"]][1]
123 |     ds.hack[i] <- res[[i]][["d.final"]]
124 |     ds.orig[i] <- res[[i]][["ds"]][1]
125 |   }
126 | 
127 |   res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, ds.hack, ds.orig)
128 | 
129 |   return(res)
130 | }
131 | 
132 | 


--------------------------------------------------------------------------------
/phackR/R/selectiveReportingIV.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Selective Reporting of the Independent Variable
  3 | # ==============================================================================
  4 | 
  5 | #' Simulate dataset with multiple independent variables
  6 | #' @description Outputs data frame with multiple independent variables
  7 | #' @param nobs.group Scalar defining number of observations per group (or number of observations in predictors in regression)
  8 | #' @param nvar Number of independent variables in the data frame
  9 | #' @param r Desired correlation between the independent variables (scalar)
 10 | #' @param regression Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE)
 11 | 
 12 | .sim.multIV <- function(nobs.group, nvar, r, regression = FALSE){
 13 | 
 14 |   # Observations per group
 15 |   if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2)
 16 | 
 17 |   # Simulate control group / criterion variable
 18 |   control <- rnorm(nobs.group[1])
 19 |   if(regression) criterion <- control
 20 | 
 21 |   # Simulate multiple experimental groups / predictor variables
 22 |   ivs <- .sim.multcor(nobs = nobs.group[2], nvar = nvar, r = r)
 23 | 
 24 |   # Generate data frame
 25 |   res <- cbind(control, ivs)
 26 |   if(regression) colnames(res)[1] <- "criterion"
 27 | 
 28 |   return(res)
 29 | 
 30 | }
 31 | 
 32 | #' P-Hacking function for multiple independent variables in a t-test
 33 | #' @description Returns a p-hacked p-value and a vector of all p-values that were computed in the process
 34 | #' @param df Data frame (wide format) containing a control group variable and multiple treatment group variables
 35 | #' @param ivs Location of the independent variables (treatment groups) in the (wide) data frame
 36 | #' @param control Location of the control group in the (wide) data frame
 37 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 38 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
 39 | #' @param alpha Significance level of the t-test (default: 0.05)
 40 | #' @importFrom stats t.test
 41 | 
 42 | .multIVhack_ttest <- function(df, ivs, control, strategy = "firstsig", alternative = "two.sided", alpha = 0.05){
 43 | 
 44 |   treatm <- df[, ivs]
 45 |   control <- df[, control]
 46 | 
 47 |   # Prepare dataset
 48 |   mod <- list()
 49 |   r2s <- rep(NA, length(ivs))
 50 | 
 51 |   # Compute t-tests
 52 |   for(i in 1:length(ivs)){
 53 |     mod[[i]] <- stats::t.test(control, treatm[,i], var.equal = TRUE, alternative = alternative)
 54 |     r2s[i] <- .compR2t(control, treatm[,i])
 55 |   }
 56 | 
 57 |   ps <- unlist(simplify2array(mod)["p.value", ])
 58 |   ds <- .compCohensD(unlist(simplify2array(mod)["statistic", ]), length(control))
 59 | 
 60 |   # Select final p-hacked p-value based on strategy
 61 |   p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
 62 |   r2.final <- r2s[ps == p.final]
 63 |   d.final <- ds[ps == p.final]
 64 | 
 65 |   return(list(p.final = p.final,
 66 |               ps = ps,
 67 |               r2.final = r2.final,
 68 |               r2s = r2s,
 69 |               d.final = d.final,
 70 |               ds = ds))
 71 | 
 72 | }
 73 | 
 74 | #' P-Hacking function for multiple predictors in a regression
 75 | #' @description Returns a p-hacked p-value and a vector of all p-values that were computed in the process
 76 | #' @param df Data frame containing a criterion variable and multiple predictor variables
 77 | #' @param ivs Location of the independent variables (predictors) in the data frame
 78 | #' @param control Location of the criterion in the data frame
 79 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 80 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
 81 | #' @param alpha Significance level of the t-test (default: 0.05)
 82 | #' @importFrom stats t.test
 83 | 
 84 | .multIVhack_reg <- function(df, ivs, control, strategy = "firstsig", alternative="two.sided", alpha = 0.05){
 85 |   
 86 |   predictors <- df[, ivs]
 87 |   criterion <- df[, control]
 88 |   
 89 |   # Prepare dataset
 90 |   ps <- rep(NA, length(ivs))
 91 |   r2s <- rep(NA, length(ivs))
 92 |   
 93 |   # Compute regressions
 94 |   for(i in 1:length(ivs)){
 95 |     mod <- summary(stats::lm(criterion ~ predictors[,i]))
 96 |     ps[i] <- mod$coefficients[2, 4]
 97 |     r2s[i] <- mod$r.squared
 98 |   }
 99 |   
100 |   # Select final p-hacked p-value based on strategy
101 |   p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
102 |   r2.final <- r2s[ps == p.final]
103 | 
104 |   return(list(p.final = p.final,
105 |               ps = ps,
106 |               r2.final = r2.final,
107 |               r2s = r2s))
108 |   
109 | }
110 | 
111 | #' Simulate p-Hacking with multiple independent variables
112 | #' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
113 | #' @param nobs.group Vector giving number of observations per group
114 | #' @param nvar Number of independent variables (columns) in the data frame
115 | #' @param r Desired correlation between the dependent variables (scalar)
116 | #' @param regression Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE)
117 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
118 | #' @param iter Number of simulation iterations
119 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
120 | #' @param alpha Significance level of the t-test (default: 0.05)
121 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
122 | #' @export
123 | 
124 | sim.multIVhack <- function(nobs.group, nvar, r, regression=FALSE, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){
125 | 
126 |   # Simulate as many datasets as desired iterations
127 |   dat <- list()
128 |   for(i in 1:iter){
129 |     dat[[i]] <- .sim.multIV(nobs.group = nobs.group, nvar = nvar, r = r, regression=regression)
130 |   }
131 | 
132 |   # Apply p-hacking procedure to each dataset
133 |   .multIVhack <- ifelse(regression, .multIVhack_reg, .multIVhack_ttest)
134 |   
135 |   .multIVhacklist <- function(x){
136 |     .multIVhack(df = x, ivs = c(2:(nvar+1)), control = 1,
137 |                 strategy = strategy, alternative = alternative, alpha = alpha)
138 |   }
139 | 
140 |   if(!shinyEnv){
141 |     res <- pbapply::pblapply(dat, .multIVhacklist)
142 |   }
143 | 
144 |   if(shinyEnv){
145 |     percentage <- 0
146 |     withProgress(message = "Running simulation", value = 0, {
147 |       res = lapply(dat, function(x){
148 |         percentage <<- percentage + 1/length(dat)*100
149 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
150 |         .multIVhack(df=x, ivs = c(2:(nvar+1)), control = 1,
151 |                     strategy = strategy, alternative = alternative, alpha = alpha)
152 |       })
153 |     })
154 |   }
155 | 
156 |   ps.hack <- NULL
157 |   ps.orig <- NULL
158 |   r2s.hack <- NULL
159 |   r2s.orig <- NULL
160 |   ds.hack <- NULL
161 |   ds.orig <- NULL
162 | 
163 |   for(i in 1:iter){
164 |     ps.hack[i] <- res[[i]][["p.final"]]
165 |     ps.orig[i] <- res[[i]][["ps"]][1]
166 |     r2s.hack[i] <- res[[i]][["r2.final"]]
167 |     r2s.orig[i] <- res[[i]][["r2s"]][1]
168 |     if(!regression){
169 |       ds.hack[i] <- res[[i]][["d.final"]]
170 |       ds.orig[i] <- res[[i]][["ds"]][1]
171 |     }
172 |   }
173 | 
174 |   res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
175 |   if(!regression) res <- cbind(res, ds.hack, ds.orig)
176 | 
177 |   return(res)
178 | 
179 | }
180 | 


--------------------------------------------------------------------------------
/phackR/R/statAnalysis.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Exploiting statistical analysis options
  3 | # ==============================================================================
  4 | 
  5 | # Data simulation can be done with .sim.data
  6 | 
  7 | #' P-Hacking function for exploiting different statistical analysis options
  8 | #' @param df Data frame with one continuous independent variable and one continuous dependent variable
  9 | #' @param group Location of the grouping variable in the data frame
 10 | #' @param dv Location of the dependent variabl in the data frame
 11 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 12 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
 13 | #' @param alpha Significance level of the t-test
 14 | #' @importFrom stats t.test wilcox.test
 15 | #' @importFrom WRS2 yuen
 16 | 
 17 | .statAnalysisHack <- function(df, group, dv, strategy = "firstsig", alternative = "two.sided", alpha = 0.05){
 18 | 
 19 |   dftest <- cbind(df[, group], df[, dv])
 20 |   colnames(dftest) <- c("group", "dv")
 21 | 
 22 |   # "Normal" t-test
 23 |   p.orig <- stats::t.test(dv ~ group, var.equal = TRUE, alternative = alternative,
 24 |                           data = dftest)$p.value
 25 | 
 26 |   # Welch test
 27 |   p.welch <- stats::t.test(dv ~ group, var.equal = FALSE,
 28 |                            alternative = alternative, data = dftest)$p.value
 29 | 
 30 |   # Mann-Whitney / Wilcoxon test
 31 |   p.wilcox <- stats::wilcox.test(dv ~ group, alternative = alternative,
 32 |                                  data = dftest)$p.value
 33 | 
 34 |   # Yuen test with different levels of trimming
 35 |   p.yuen <- rep(NA, 4)
 36 |   trim <- c(0.1, 0.15, 0.2, 0.25)
 37 |   for(i in 1:4) {
 38 |     p.yuen[i] <- WRS2::yuen(dv ~ group, tr = trim[i],
 39 |                             data = as.data.frame(dftest))$p.value
 40 |   }
 41 | 
 42 |   ps <- c(p.orig, p.welch, p.wilcox, p.yuen)
 43 | 
 44 |   # Select final p-hacked p-value based on strategy
 45 |   p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
 46 | 
 47 |   return(list(p.final = p.final,
 48 |               ps = ps))
 49 | 
 50 | }
 51 | 
 52 | #' Simulate p-Hacking for exploiting different statistical analysis options
 53 | #' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
 54 | #' @param nobs.group Number of observations per group. Either a scalar or a vector with 2 elements.
 55 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 56 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
 57 | #' @param alpha Significance level of the t-test
 58 | #' @param iter Number of simulation iterations
 59 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 60 | #' @export
 61 | 
 62 | sim.statAnalysisHack <- function(nobs.group, strategy = "firstsig", alternative = "two.sided", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
 63 | 
 64 |   # Simulate as many datasets as desired iterations
 65 |   dat <- list()
 66 |   for(i in 1:iter){
 67 |     dat[[i]] <- .sim.data(nobs.group = nobs.group)
 68 |   }
 69 | 
 70 |   # Apply p-hacking procedure to each dataset
 71 | 
 72 |   .statAnalysisHackList <- function(x){
 73 |     .statAnalysisHack(df = x, group = 1, dv = 2, strategy = strategy, alternative = alternative, alpha = alpha)
 74 |   }
 75 | 
 76 |   if(!shinyEnv){
 77 |     res <- pbapply::pblapply(dat, .statAnalysisHackList)
 78 |   }
 79 | 
 80 |   if(shinyEnv){
 81 |     percentage <- 0
 82 |     withProgress(message = "Running simulation", value = 0, {
 83 |       res = lapply(dat, function(x){
 84 |         percentage <<- percentage + 1/length(dat)*100
 85 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
 86 |         .statAnalysisHack(df = x, group = 1, dv = 2, strategy = strategy,
 87 |                           alternative = alternative, alpha = alpha)
 88 |       })
 89 |     })
 90 |   }
 91 | 
 92 |   ps.hack <- NULL
 93 |   ps.orig <- NULL
 94 |   for(i in 1:iter){
 95 |     ps.hack[i] <- res[[i]][["p.final"]]
 96 |     ps.orig[i] <- res[[i]][["ps"]][1]
 97 |   }
 98 | 
 99 |   res <- cbind(ps.hack, ps.orig)
100 | 
101 |   return(res)
102 | 
103 | 
104 | }
105 | 
106 | 


--------------------------------------------------------------------------------
/phackR/R/subgroupAnalysis.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Subgroup Analyses
  3 | # ==============================================================================
  4 | 
  5 | #' Simulate data with subgroups
  6 | #' @description Outputs data frame with multiple binary variables from which subgroups can be extracted
  7 | #' @param nobs.group Vector giving number of observations per group
  8 | #' @param nsubvars Integer specifying number of variables for potential subgroups
  9 | 
 10 | .sim.subgroup <- function(nobs.group, nsubvars){
 11 | 
 12 |   dat <- .sim.data(nobs.group)
 13 | 
 14 |   # Observations per group and total observations
 15 |   if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2)
 16 |   nobs <- sum(nobs.group)
 17 | 
 18 |   subvars <- matrix(NA, nrow = nobs, ncol = nsubvars)
 19 |   for(i in 1:nsubvars){
 20 |     subvars[,i] <- sample(c(0, 1), size = nobs, replace = TRUE)
 21 |   }
 22 | 
 23 |   res <- cbind(dat, subvars)
 24 | 
 25 |   return(res)
 26 | 
 27 | }
 28 | 
 29 | #' P-Hacking function for multiple subgroups analysis
 30 | #' @description Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
 31 | #' @param df A matrix or data frame containing all relevant data
 32 | #' @param iv Integer specifying the location of the binary independent variable in the data frame
 33 | #' @param dv Integer specifying the location of the dependent variable in the data frame
 34 | #' @param subvars Vector specifying the location of the subgroup variables in the data frame
 35 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
 36 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 37 | #' @param alpha Significance level of the t-test
 38 | #' @importFrom dplyr group_by_at do
 39 | #' @importFrom stats t.test
 40 | #' @importFrom dplyr "%>%"
 41 | #' @importFrom rlang .data
 42 | 
 43 | .subgroupHack <- function(df, iv, dv, subvars, alternative = "two.sided", strategy = "firstsig", alpha = 0.05){
 44 | 
 45 |   # Prepare data frame
 46 |   ttest.df <- cbind(df[,iv], df[,dv])
 47 |   subvars.df <- cbind(df[, subvars])
 48 |   dfnew <- as.data.frame(cbind(ttest.df, subvars.df))
 49 | 
 50 |   # Compute p-values, R^2, Cohen's d
 51 | 
 52 |   # Not p-hacked
 53 |   mod.orig <- stats::t.test(ttest.df[,2] ~ ttest.df[,1], var.equal = TRUE, alternative = alternative)
 54 |   p.orig <- mod.orig$p.value
 55 |   r2.orig <- .compR2t(ttest.df[ttest.df[,1] == unique(ttest.df[,1])[1],2],
 56 |                       ttest.df[ttest.df[,1] == unique(ttest.df[,1])[2],2])
 57 |   d.orig <- .compCohensD(unname(mod.orig$statistic), nrow(ttest.df)/2)
 58 | 
 59 | 
 60 |   # p-hacked
 61 |   ps <- list()
 62 |   ds <- list()
 63 |   r2s <- list()
 64 | 
 65 |   for(i in 1:length(subvars)){
 66 | 
 67 |     tmp <- dplyr::group_by_at(dfnew, subvars[i]) %>%
 68 |       dplyr::do(as.data.frame(stats::t.test(.data$V2 ~ .data$V1, var.equal = TRUE, alternative = alternative)[c("p.value", "statistic")]))
 69 |     tmp2 <- dplyr::group_by_at(dfnew, subvars[i]) %>%
 70 |       dplyr::do(as.data.frame(table(.data$V1)))
 71 |     tmp3 <- dplyr::group_by_at(dfnew, subvars[i]) %>% do(as.data.frame(.compR2t(.data$V2[.data$V1 == unique(.data$V1)[1]], .data$V2[.data$V1 == unique(.data$V1)[2]])))
 72 | 
 73 |     ps[[i]] <- tmp[[2]]
 74 |     ds[[i]] <- c(tmp[[3]][1]*sqrt(sum(1/tmp2[[3]][1:2])), tmp[[3]][2]*sqrt(sum(1/tmp2[[3]][3:4])))
 75 |     r2s[[i]] <- tmp3[[2]]
 76 | 
 77 |   }
 78 | 
 79 |   ps <- c(p.orig, unlist(ps))
 80 |   r2s <- c(r2.orig, unlist(r2s))
 81 |   ds <- c(d.orig, unlist(ds))
 82 | 
 83 |   # Select final p-hacked p-value based on strategy
 84 |   p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
 85 |   r2.final <- unique(r2s[ps == p.final])
 86 |   d.final <- unique(ds[ps == p.final])
 87 | 
 88 |   return(list(p.final = p.final,
 89 |               ps = ps,
 90 |               r2.final = r2.final,
 91 |               r2s = r2s,
 92 |               d.final = d.final,
 93 |               ds = ds))
 94 | 
 95 | }
 96 | 
 97 | #' Simulate p-hacking with multiple subgroups
 98 | #' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
 99 | #' @param nobs.group Vector giving number of observations per group
100 | #' @param nsubvars Integer specifying number of variables for potential subgroups
101 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater")
102 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
103 | #' @param alpha Significance level of the t-test
104 | #' @param iter Number of simulation iterations
105 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
106 | #' @export
107 | 
108 | sim.subgroupHack <- function(nobs.group, nsubvars, alternative = "two.sided", strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
109 | 
110 |   # Simulate as many datasets as desired iterations
111 |   dat <- list()
112 |   for(i in 1:iter){
113 |     dat[[i]] <- .sim.subgroup(nobs.group = nobs.group, nsubvars = nsubvars)
114 |   }
115 | 
116 |   # Apply p-hacking procedure to each dataset
117 |   .subgroupHackList <- function(x){
118 |     .subgroupHack(df = x, iv = 1, dv = 2, subvars = c(3:(2+nsubvars)),
119 |                   alternative = alternative, strategy = strategy, alpha = alpha)
120 |   }
121 | 
122 |   if(!shinyEnv){
123 |     res <- pbapply::pblapply(dat, .subgroupHackList)
124 |   }
125 | 
126 |   if(shinyEnv){
127 |     percentage <- 0
128 |     withProgress(message = "Running simulation", value = 0, {
129 |       res = lapply(dat, function(x){
130 |         percentage <<- percentage + 1/length(dat)*100
131 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
132 |         .subgroupHack(df = x, iv = 1, dv = 2, subvars = c(3:(2+nsubvars)),
133 |                       alternative = alternative, strategy = strategy, alpha = alpha)
134 |       })
135 |     })
136 |   }
137 | 
138 |   ps.hack <- NULL
139 |   ps.orig <- NULL
140 |   r2s.hack <- NULL
141 |   r2s.orig <- NULL
142 |   ds.hack <- NULL
143 |   ds.orig <- NULL
144 | 
145 |   for(i in 1:iter){
146 |     ps.hack[i] <- res[[i]][["p.final"]]
147 |     ps.orig[i] <- res[[i]][["ps"]][1]
148 |     r2s.hack[i] <- res[[i]][["r2.final"]]
149 |     r2s.orig[i] <- res[[i]][["r2s"]][1]
150 |     ds.hack[i] <- res[[i]][["d.final"]]
151 |     ds.orig[i] <- res[[i]][["ds"]][1]
152 |   }
153 | 
154 |   res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, ds.hack, ds.orig)
155 | 
156 |   return(res)
157 | 
158 | }
159 | 
160 | 


--------------------------------------------------------------------------------
/phackR/R/variableTransformation.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # p-Hacking through Variable Transformation
  3 | # ==============================================================================
  4 | 
  5 | # Simulation function: Data can be simulated with .sim.multcor where r = 0
  6 | 
  7 | #' P-Hacking function variable transformation in univariate linear regression
  8 | #' @description Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
  9 | #' @param df Data frame containing x and y variables as columns
 10 | #' @param x Location of x variable (predictor) in the data frame
 11 | #' @param y Location of y variable (criterion) in the data frame
 12 | #' @param transvar Which variables should be transformed? Either "x" (for x variable), "y" (for y variable), or "xy" (for both)
 13 | #' @param testnorm Should variables only be transformed after a significant test for normality of residuals?
 14 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 15 | #' @param alpha Significance level of the t-test (default: 0.05)
 16 | 
 17 | .varTransHack <- function(df, x, y, transvar, testnorm = FALSE, strategy = "firstsig", alpha = 0.05){
 18 | 
 19 |   x <- df[, x]
 20 |   y <- df[, y]
 21 |   
 22 |   # Test normality of residuals first
 23 |   normality <- FALSE
 24 |   if(testnorm){
 25 |     mod <- stats::lm(y ~ x)
 26 |     normality <- stats::shapiro.test(stats::residuals(mod))$p.value > alpha
 27 |   }
 28 |   
 29 |   # Transform all variables that should be transformed
 30 | 
 31 |   Xtrans <- matrix(NA, nrow = nrow(df))
 32 |   Xtrans[,1] <- x
 33 |   Ytrans <- matrix(NA, nrow = nrow(df))
 34 |   Ytrans[,1] <- y
 35 | 
 36 |   if(transvar != "y" && normality == FALSE){
 37 |     Xtrans <- cbind(Xtrans,
 38 |                     log(x+abs(min(x))+1e-10),        # log transformation
 39 |                     sqrt(x+abs(min(x))+1e-10),       # square root transformation
 40 |                     1/x                              # inverse
 41 |     )
 42 |   }
 43 | 
 44 | 
 45 |   if(transvar != "x" && normality == FALSE){
 46 |     Ytrans <- cbind(Ytrans,
 47 |                     log(y+abs(min(y))+1e-10),        # log transformation
 48 |                     sqrt(y+abs(min(y))+1e-10),       # square root transformation
 49 |                     1/y                              # inverse
 50 |     )
 51 |   }
 52 | 
 53 |   # Calculate p-values for all transformed variables
 54 | 
 55 |   ps <- matrix(NA, nrow = dim(Xtrans)[2], ncol = dim(Ytrans)[2])
 56 |   r2s <- matrix(NA, nrow = dim(Xtrans)[2], ncol = dim(Ytrans)[2])
 57 | 
 58 |   for(i in 1:ncol(Xtrans)){
 59 |     for(j in 1:ncol(Ytrans)){
 60 |       mod <- summary(stats::lm(Ytrans[,j] ~ Xtrans[,i]))
 61 |       ps[i,j] <- mod$coefficients[2, 4]
 62 |       r2s[i,j] <- mod$r.squared
 63 |     }
 64 |   }
 65 | 
 66 |   ps <- as.vector(ps)
 67 |   r2s <- as.vector(r2s)
 68 | 
 69 |   # Select final p-hacked p-value based on strategy
 70 |   p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha)
 71 |   r2.final <- unique(r2s[ps == p.final])
 72 | 
 73 |   return(list(p.final = p.final,
 74 |               ps = ps,
 75 |               r2.final = r2.final,
 76 |               r2s = r2s))
 77 | 
 78 | }
 79 | 
 80 | #' Simulate p-hacking with variable transformations
 81 | #' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
 82 | #' @param nobs Integer giving number of observations
 83 | #' @param transvar Which variables should be transformed? Either "x" (for x variable), "y" (for y variable), or "xy" (for both)
 84 | #' @param testnorm Should variables only be transformed after a significant test for normality of residuals?
 85 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig"
 86 | #' @param alpha Significance level of the t-test (default: 0.05)
 87 | #' @param iter Number of simulation iterations
 88 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE
 89 | #' @export
 90 | 
 91 | sim.varTransHack <- function(nobs, transvar, testnorm = FALSE, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){
 92 | 
 93 |   # Simulate as many datasets as desired iterations
 94 |   dat <- list()
 95 |   for(i in 1:iter){
 96 |     dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0)
 97 |   }
 98 | 
 99 |   # Apply p-hacking procedure to each dataset
100 |   .varTransHackList <- function(arg){
101 |     .varTransHack(df = arg, x = 1, y = 2, testnorm = testnorm, transvar = transvar,
102 |                   strategy = strategy, alpha = alpha)
103 |   }
104 | 
105 |   if(!shinyEnv){
106 |     res <- pbapply::pblapply(dat, .varTransHackList)
107 |   }
108 | 
109 |   if(shinyEnv){
110 |     percentage <- 0
111 |     withProgress(message = "Running simulation", value = 0, {
112 |       res = lapply(dat, function(x){
113 |         percentage <<- percentage + 1/length(dat)*100
114 |         incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%"))
115 |         .varTransHack(df = x, x = 1, y = 2, transvar = transvar,
116 |                       strategy = strategy, alpha = alpha)
117 |       })
118 |     })
119 |   }
120 | 
121 |   ps.hack <- NULL
122 |   ps.orig <- NULL
123 |   r2s.hack <- NULL
124 |   r2s.orig <- NULL
125 | 
126 |   for(i in 1:iter){
127 |     ps.hack[i] <- res[[i]][["p.final"]]
128 |     ps.orig[i] <- res[[i]][["ps"]][1]
129 |     r2s.hack[i] <- res[[i]][["r2.final"]]
130 |     r2s.orig[i] <- res[[i]][["r2s"]][1]
131 |   }
132 | 
133 |   res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig)
134 | 
135 |   return(res)
136 | 
137 | }
138 | 


--------------------------------------------------------------------------------
/phackR/doc/phackR_vignette.R:
--------------------------------------------------------------------------------
 1 | ## ---- include = FALSE---------------------------------------------------------
 2 | knitr::opts_chunk$set(
 3 |   collapse = TRUE,
 4 |   comment = "#>"
 5 | )
 6 | 
 7 | ## ----setup, message=FALSE-----------------------------------------------------
 8 | library(phackR)
 9 | 
10 | ## ----selectiveReportingDV-----------------------------------------------------
11 | set.seed(1234)
12 | sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.3, strategy = "smallest", 
13 |                iter = 10, alternative = "two.sided", alpha = 0.05)
14 | 
15 | ## ----selectiveReportingIV-----------------------------------------------------
16 | set.seed(1234)
17 | sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3, strategy = "smallest",
18 |                regression = FALSE, iter = 10, alternative = "two.sided", 
19 |                alpha = 0.05)
20 | 
21 | ## ----incorrectRounding--------------------------------------------------------
22 | set.seed(1234)
23 | sim.roundhack(roundinglevel = 0.06, iter = 10, alternative = "two.sided", 
24 |               alpha = 0.05)
25 | 
26 | ## ----optionalStopping---------------------------------------------------------
27 | set.seed(1234)
28 | sim.optstop(n.min = 10, n.max = 20, step = 2, alternative = "two.sided", 
29 |             iter = 10, alpha = 0.05)
30 | 
31 | ## ----outlierExclusion---------------------------------------------------------
32 | set.seed(1234)
33 | sim.outHack(nobs = 30, which = "random", strategy = "smallest", alpha = 0.05, 
34 |             iter = 10)
35 | 
36 | ## ----exploitCovariates--------------------------------------------------------
37 | set.seed(1234)
38 | sim.covhack(nobs.group = 30, ncov = 4, rcov = 0.3, rcovdv = 0.5, 
39 |             interactions = FALSE, strategy = "smallest", 
40 |             alpha = 0.05, iter = 10)
41 | 
42 | ## ----subgroupAnalysis---------------------------------------------------------
43 | set.seed(1234)
44 | sim.subgroupHack(nobs.group = 30, nsubvars = 3, alternative = "two.sided", 
45 |                  strategy = "smallest", alpha = 0.05, iter = 10)
46 | 
47 | ## ----compositeScores----------------------------------------------------------
48 | set.seed(1234)
49 | sim.compscoreHack(nobs = 30, ncompv = 5, rcomp = 0.7, ndelete = 3, 
50 |                   strategy = "smallest", alpha = 0.05, iter = 10)
51 | 
52 | ## ----variableTransformation---------------------------------------------------
53 | set.seed(1234)
54 | sim.varTransHack(nobs = 30, transvar = "xy", strategy = "smallest", 
55 |                  alpha = 0.05, iter = 10)
56 | 
57 | ## ----exploitCutoffs-----------------------------------------------------------
58 | set.seed(1234)
59 | sim.cutoffHack(nobs = 30, strategy = "smallest", alpha = 0.05, iter = 10)
60 | 
61 | ## ----statAnalysis-------------------------------------------------------------
62 | set.seed(1234)
63 | sim.statAnalysisHack(nobs.group = 30, strategy = "smallest", 
64 |                      alternative = "two.sided", alpha = 0.05, iter = 10)
65 | 
66 | ## ----favorableImputation------------------------------------------------------
67 | set.seed(1234)
68 | sim.impHack(nobs = 30, missing = 0.2, which = c(1:10), strategy = "smallest", 
69 |             alpha = 0.05, iter = 10)
70 | 
71 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/data/startplots.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/astefan1/phacking_compendium/b663bf8701c3af19159e1d01d5986b2db580901b/phackR/inst/shiny-phack/ShinyPHack/data/startplots.rds


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/01_CompScores.md:
--------------------------------------------------------------------------------
 1 | ### Scale Redefinition
 2 | 
 3 | The *scale redefinition* strategy assumes that one of the variables in the hypothesis test in question is a composite score (e.g., the mean of items in a personality inventory), and that a researcher manipulates which items are included in the composite score to obtain a significant result.
 4 | 
 5 | Here, we assume that the focal hypothesis test is a univariate linear regression, and that items are excluded based on the reliability coefficient Cronbach's &alpha; in an iterative fashion. The underlying idea is to delete the item that contributes least to a reliable score, i.e., the item leading to the highest Cronbach's &alpha; when deleted. After a candidate item for deletion has been found, the regression is recomputed with (1) the reduced score as a predictor, (2) the deleted item as a predictor, and (3) the score of all deleted items as a predictor, and the p-values are recorded.
 6 | 
 7 | The simulation function in this Shiny app allows the specification of the total number of items in the score, as well as their correlation. Users can also specify the maximum number of items deleted from the score. Naturally, this number should be smaller than the total number of items. Other options users can specify are the number of observations, the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/02_ExploitCovariates.md:
--------------------------------------------------------------------------------
1 | ### Controlling for Covariates
2 | 
3 | This p-hacking strategy exploits the common practice of controlling for covariates in statistical analyses. Here, we assume that a researcher is interested in an independent samples t-test. If this test does not yield a significant result, the researcher introduces a number of continuous covariates into the analysis (which will then be computed as an ANCOVA). We assume that all covariates are first entered into the analysis individually, and if this does not yield a significant result, they are added sequentially as y ~ x + cov1, y ~ x + cov1 + cov2, ... (in decreasing order of correlation with the dependent variable).
4 | 
5 | The simulation function in this Shiny app allows the specification of the number of covariates, as well as their correlation. Users can also specify whether the ANCOVA models should include interaction terms. Note that the inclusion of interaction terms will slow down the computation considerably. Other options users can specify are the number of observations per group, the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
6 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/03_ExploitCutoffs.md:
--------------------------------------------------------------------------------
1 | ### Discretizing variables
2 | 
3 | This p-hacking strategy is based on splitting a continuous variable into categories with regard to two or more arbitrary cutoff values. Here, we assume that at the start a researcher plans to conduct a univariate linear regression. If this analysis does not yield a significant result, the researcher discretizes the independent variable and compares the means of the resulting groups in the dependent variable. We simulate three approaches: (1) Compare high-scorers and low-scorers based on a median split; (2) conduct a three-way split of the independent variable and compare the two extreme groups; (3) conduct a three-way split of the independent variables and compare all three groups using an ANOVA.
4 | 
5 | The simulation function in this Shiny app allows the specification of the sample size, as well as of the p-value selection method, the significance level &alpha;, and the number of iterations in the simulation.
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/04_FavorableImputation.md:
--------------------------------------------------------------------------------
1 | ### Favorable Imputation of Missing Values
2 | 
3 | This p-hacking strategy assumes that the original dataset a researcher is confronted with contains missing values. A researcher engaging in p-hacking can now try out different imputation methods to replace the missing values, until (possibly) a significant result is obtained. Here, we simulate this p-hacking strategy based on a univariate linear regression, because many imputation methods assume a regression context.
4 | 
5 | The simulation function in this Shiny app allows the specification of the total number of observations (observations with missing values are included in this number), the percentage of missing values, and the imputation methods that are used. The percentage of missing values defined is the same for the predictor and the outcome variable (e.g., if the percentage is set to 10%, there will be ten percent missing values in both the predictor and the outcome variable). Additionally, users can specify the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
6 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/05_IncorrectRounding.md:
--------------------------------------------------------------------------------
1 | ### Incorrect Rounding
2 | 
3 | This p-hacking strategy is not based on tinkering with the data or the analyses, but on misreporting the analysis outcome. Usually, the result of a hypothesis test is significant if p &leq; &alpha;. However, as has been shown (e.g., Hartgerink, van Aert, van Nuijten, Wicherts, & van Assen, 2016), sometimes p-values that are slightly larger than the significance level are reported as significant, that is, p-values are incorrectly rounded down to p = &alpha;. 
4 | 
5 | In the simulation function in this Shiny app, the user can specify the margin in which p-values should be rounded down, as well as the significance level. For example, if the significance level is specified as &alpha; = 0.05, and the margin is specified as 0.001, then all p-values below 0.05+0.001=0.051 will be reported as significant and rounded down to p = 0.05. Additionally, users can specify the direction of the test, and the number of simulation iterations.
6 | 
7 | Note that type I error rates of this p-hacking strategy can also be determined analytically. The theoretical &alpha;-level after p-hacking is equivalent to the sum of the original alpha level and the rounding margin. 
8 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/06_OptionalStopping.md:
--------------------------------------------------------------------------------
1 | ### Optional Stopping
2 | 
3 | Researchers engaging in optional stopping repeatedly inspect the results of the statistical tests during data collection. They stop data collection as soon as a significant result has been obtained or a maximum sample size is reached. Here, we assume that the underlying statistical test is an independent-samples t-test.
4 | 
5 | In the simulation function provided in this Shiny app, the user can specify the minimum sample size (per group), the maximum sample size (per group), and the number of observations that are collected at each step of the sampling process (*step size*). For example, if the minimum sample size is specified to be 10, the maximum sample size 30, and the step size 5, then interim analyses will be conducted at N = 10, N = 15, N = 20, N = 25, and N = 30. Additionally, users can define the direction of the hypothesis test, the significance level &alpha;, and the number of simulation iterations.
6 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/07_OutlierExclusion.md:
--------------------------------------------------------------------------------
1 | ### Outlier Exclusion
2 | 
3 | In this p-hacking strategy, a researcher applies different outlier exclusion criteria to their data with the goal of obtaining a significant result in a focal hypothesis test. Here, we assume that the hypothesis test in question is a univariate linear regression. Further, we assume that the researcher first checks for potential outliers in the predictor variable (x) and in the outcome variable (y), and then reruns the analysis (1) without the xy pairs where x is an outlier, (2) without the xy pairs where y is an outlier, (3) without the xy pairs where x *and* y are outliers. We assume that this is done for each outlier exclusion method.
4 | 
5 | In the simulation function provided in this Shiny app, users can define the outlier exclusion methods that are applied, as well as the sample size, the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/09_SelectiveReportingDV.md:
--------------------------------------------------------------------------------
1 | ### Selective Reporting of the Dependent Variable
2 | 
3 | This p-hacking strategy assumes that the dataset contains multiple candidate dependent variables. For example, in a clinical trial, the treatment and control group could be compared on different outcome variables, such as mental and physical well-being. A researcher engaging in p-hacking would conduct one hypothesis test for each dependent variable, and selectively report the significant results. Here, we assume that the hypothesis test in question is an independent-samples t-test.
4 | 
5 | The simulation function in this Shiny app allows the specification of the number of dependent variables as well as their correlation. Additionally, users can define the number of observations per group, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
6 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/10_SelectiveReportingIV.md:
--------------------------------------------------------------------------------
1 | ### Selective Reporting of the Independent Variable
2 | 
3 | This p-hacking strategy assumes that an experiment or clinical trial contains multiple experimental groups and one control group. A researcher engaging in p-hacking statistically compares all experimental groups to the control group, and only report the significant results. Here, we assume that all conducted hypothesis tests are t-tests.
4 | 
5 | The simulation function in this Shiny app allows the specification of the number of experimental groups (independent variables), and their correlation. Additionally, users can set the number of observations per group, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations. 
6 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/11_StatAnalysis.md:
--------------------------------------------------------------------------------
1 | ### Exploiting Alternative Hypothesis Tests
2 | 
3 | Often, different statistical analysis techniques can be used to answer the same research question. This p-hacking strategy assumes that a researcher tries out different statistical analysis options and decides for the one yielding a significant result. Here, we assume that the hypothesis tests in question are an independent-samples t-test, a Welch test, a Wilcoxon test, and a Yuen test (with different levels of trimming).
4 | 
5 | The simulation function in this Shiny app allows users to specify the number of observations per group, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations.
6 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/12_SubgroupAnalysis.md:
--------------------------------------------------------------------------------
1 | ### Subgroup Analyses
2 | 
3 | This p-hacking strategy assumes that if an initial hypothesis test does not yield a significant result, a researcher would repeat the same hypothesis test on subgroups of the sample (e.g., right-handed and left-handed participants). Here, we assume that all subgroup variables have two levels, and that the hypothesis test is conducted on each level of the subgroup variables. Additionally, we assume that the hypothesis test in question is a t-test (e.g., between an experimental and a control condition). Note that we do not assume that the experimental and control condition are balanced within the subgroups. Therefore, within a subgroup, the number of participants in the experimental and control group can differ.
4 | 
5 | In the simulation function in this Shiny app, users can specify the number of observations per group in the original t-test, the number of subgroup variables, the direction of the test, the p-value selection method, the significance level &alpha;, and the number of simulation iterations. 
6 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/13_VariableTransformation.md:
--------------------------------------------------------------------------------
1 | ### Variable Transformation
2 | 
3 | This p-hacking strategy assumes that if an initial hypothesis test does not yield significant results, a researcher would apply transformations to the variables involved in the test. Here, we assume that the test in question is a univariate linear regression, and that the transformations are a natural log transformation (ln(x)), a square root transformation (&radic;x), and an inverse transformation (1/x). Transformations can be applied to the predictor variable, to the outcome variable, or both.
4 | 
5 | In the simulation function in this Shiny app, users can specify which of the variables should be transformed. Additionally, they can specify the number of observations, the p-value selection method, the significance level &alpha;, and the number of simulation iterations. 
6 | 


--------------------------------------------------------------------------------
/phackR/inst/shiny-phack/ShinyPHack/mddoc/landingPage.md:
--------------------------------------------------------------------------------
 1 | # Gazing into the Abyss of p-Hacking: A Shiny App for p-Hacking Simulation
 2 | 
 3 | ## What is p-Hacking?
 4 | The p-value is a core component of null hypothesis significance testing (NHST), a statistical framework that has found ubiquitous use across many scientific disciplines. A p-value is defined as the probability to obtain a result at least as extreme as the observed one if the null hypothesis is true (i.e., if there is no effect). If the p-value is smaller than a certain threshold called alpha level, then the test result is labeled "significant" and the null hypothesis is rejected. Researchers who are interested in showing an effect in their data (e.g., that a new medicine improved the health of patients) are therefore eager to obtain small p-values that allow them to reject the null hypothesis and claim the existence of an effect.
 5 | 
 6 | In recent years, failed attempts to replicate experiments have instigated investigations into how researchers use NHST in practice. Studies found that many researchers apply questionable research practices to render previously non-significant results significant. We summarize these practices under the term of *p-hacking*.
 7 | 
 8 | ## How Does p-Hacking Work?
 9 | All p-hacking strategies are based on the principle of alpha error accumulation. Basically, alpha error accumulation means that more and more hypothesis tests are conducted, the probability of making at least one false decision increases. Therefore, even if there is no effect in the population, the probability is very high that at least one hypothesis test will (erroneously) show a significant result, if a sufficiently large number of tests are conducted. Researchers then report this significant result, and claim to have found an effect.
10 | 
11 | ## Obvious Warning: Thou Shalt Not p-Hack!
12 | Given the explanation above, it almost seems needless to say that p-hacking is detrimental and you should not do it. P-hacking slows down scientific progress by increasing the amount of false positive results in the literature. Additionally, p-hacking leads to an inflation of effect sizes that are published in the literature because only "extreme" results are reported. This means that p-hacking increases the number of cases where research wrongly claims an effect, and even if an effect exists, the reported effect size is likely to be larger than the true effect size. 
13 | 
14 | Sounds bad? It actually is. What makes it even worse is that it is difficult to discover p-hacking in the literature. How can we tell whether a reported effect is real or p-hacked? How can we tell that a p-hacked significant result (i.e., a significant finding that a researcher found after running many hypothesis tests) is not actually a true effect that was discovered? The truth is, for a single finding, it is impossible to know. However, if we know what p-hacking strategies researchers employ, it is possible to predict what distributions of p-values and effect sizes will look like, and how the rate of false positive results will be changed compared to a situation without p-hacking. The purpose of this app is to showcase these scenarios using simulated data.
15 | 
16 | ## A Compendium of p-Hacking Strategies
17 | In the literature, p-hacking has typically been described as being comprised of different strategies that researchers can use to tinker with their statistical results to achieve statistical significance. In order to learn more about the effects of p-hacking, it is important to understand all strategies and their effects on the reported scientific results. However, a comprehensive description of these strategies has been missing so far.
18 | 
19 | Here, we provide an overview of different p-hacking strategies that have been mentioned in the literature, together with a Shiny app that lets users explore the effects of p-hacking on the distribution of hypothesis testing results.
20 | 
21 | ## Exploring the Effects of P-Hacking
22 | Each tab of this Shiny app lets the user explore the effects of a different p-hacking strategy. All tabs have the same structure: First, we describe the p-hacking strategy, and how we applied it in our simulations. Below, we present simulation results, specifically the distribution of p-values, the distribution of effect sizes (if applicable), and the rate of false positive results. On a panel on the right side, the user can adjust the settings of the simulation, including the severity of the p-hacking.
23 | 
24 | ### Common Settings
25 | Several settings are common to the simulation of (almost) all p-hacking strategies. To avoid unnecessary repetition, we will describe these settings here.
26 | 
27 | #### p-Value selection method
28 | In all simulation functions, it is necessary to specify how the final p-value is determined. There are three options: *first significant* simulates a situation where the researcher conducts a series of hypothesis tests, and stops as soon as the result is significant, that is, at the first significant p-value. In a comment on Simonsohn et al. (2014), Ulrich and Miller (2015) argued that researchers might instead engage in "ambitious" p-hacking, where the researcher conducts a series of hypothesis tests and selects the smallest significant p-value from the set. This strategy is implemented in the *smallest significant* option. Simonsohn (private comm.) argues that there might exist a third p-hacking strategy where the researcher tries a number of different analysis options, and selects the smallest p-value, no matter if it is significant or not. This strategy is implemented in the option *smallest*. The default strategy is *first significant*.
29 | 
30 | #### True effect size
31 | The true effect size in all simulations is equal to zero. 
32 | 
33 | #### Significance level
34 | The significance level &alpha; determines the significance level for each hypothesis test. For example, if the significance level is set to &alpha; = 0.05 (the default), the simulation assumes that a researcher would call the result of a hypothesis test significant if p < 0.05.
35 | 
36 | #### Iterations
37 | The *iterations* option determines the number of iterations in the simulation. The default setting is 1000.
38 | 
39 | #### Alternative
40 | Whenever the simulations are based on t-tests, the option *alternative* can be specified. This option relates to the sidedness of the alternative hypothesis in the t-test. It can either be *two-sided* or *greater*. The default setting is *two-sided*.
41 | 
42 | #### Number of observations
43 | The number of observations determines the sample size in the test. In the case of a t-test, the specified number refers to the observations *per group*. In the case of a linear regression, the specified number refers to the overall sample size. 
44 | 
45 | #### Start simulation
46 | A new simulation will be started when you click the *Start simulation* button on the bottom of the options panel in each tab. The progress of the simulation will be displayed in a small progress bar in the bottom right corner of the screen.
47 | 
48 | ## Resources
49 | The code for this Shiny app as well as for the simulations can be found on [https://github.com/nicebread/phacking_compendium](https://github.com/nicebread/phacking_compendium).
50 | 
51 | ## About
52 | This Shiny app and the underlying R-package were created by Angelika Stefan and Felix Schönbrodt. If you have questions or feature requests, submit a GitHub issue on [https://github.com/nicebread/phacking_compendium](https://github.com/nicebread/phacking_compendium) or write an e-mail to a.m.stefan[at]uva.nl.
53 | 


--------------------------------------------------------------------------------
/phackR/inst/sim_startplots_Shiny.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # Shiny App: Simulation for Plots at Start
  3 | # ==============================================================================
  4 | library(phackR)
  5 | startplots <- list()
  6 | 
  7 | # 1: Composite Scores
  8 | 
  9 | res1 <- sim.compscoreHack(nobs=30, ncompv=5, rcomp=0.8, ndelete=2, strategy = "firstsig", alpha = 0.05, iter = 1000)
 10 | startplots$compscorePlot <- phackR:::pplots(simdat=res1, alpha=0.05)
 11 | startplots$compscorePlotES <- phackR:::esplots(simdat=res1, EScolumn.hack=3, EScolumn.orig=4)
 12 | startplots$compscore.fprate.p <- paste0(round(sum(res1[,"ps.hack"] < 0.05)/1000*100, 2), " %")
 13 | startplots$compscore.fprate.o <- paste0(round(sum(res1[,"ps.orig"] < 0.05)/1000*100, 2), " %")
 14 | startplots$res1 <- res1
 15 | 
 16 | # 2: Exploit Covariates
 17 | 
 18 | res2 <- sim.covhack(nobs.group = 30, ncov = 3, rcov = 0.3, rcovdv = 0.5, interactions = FALSE, strategy = "firstsig", alpha = 0.05, iter = 1000)
 19 | startplots$expCovPlot <- phackR:::pplots(simdat=res2, alpha=0.05)
 20 | startplots$expCovES <- phackR:::esplots(simdat=res2, EScolumn.hack=3, EScolumn.orig=4, titles = c(expression("Distribution of p-hacked effect sizes "*eta^2),
 21 |                                                                               expression("Distribution of original effect sizes "*eta^2)))
 22 | startplots$expcov.fprate.p <- paste0(round(sum(res2[,"ps.hack"] < 0.05)/1000*100, 2), " %")
 23 | startplots$expcov.fprate.o <- paste0(round(sum(res2[,"ps.orig"] < 0.05)/1000*100, 2), " %")
 24 | startplots$res2 <- res2
 25 | 
 26 | # 3: Exploit Cutoffs
 27 | 
 28 | res3 <- sim.cutoffHack(nobs = 30, strategy = "firstsig", alpha = 0.05, iter = 1000)
 29 | startplots$expCutPlot <- phackR:::pplots(simdat=res3, alpha=0.05)
 30 | startplots$expCutES <- phackR:::esplots(simdat=res3, EScolumn.hack=3, EScolumn.orig=4)
 31 | startplots$expcut.fprate.p <- paste0(round(sum(res3[,"ps.hack"] < 0.05)/1000*100, 2), " %")
 32 | startplots$expcut.fprate.o <- paste0(round(sum(res3[,"ps.orig"] < 0.05)/1000*100, 2), " %")
 33 | startplots$res3 <- res3
 34 | 
 35 | # 4: Favorable Imputation
 36 | res4 <- sim.impHack(nobs = 30, missing = 0.1, which = c(1:3), strategy = "firstsig", alpha = 0.05, iter = 1000)
 37 | startplots$favImpPlot <- phackR:::pplots(simdat=res4, alpha=0.05)
 38 | startplots$favImpES <- phackR:::esplots(simdat=res4, EScolumn.hack=3, EScolumn.orig=4)
 39 | startplots$favimp.fprate.p <- paste0(round(sum(res4[,"ps.hack"] < 0.05)/1000*100, 2), " %")
 40 | startplots$favimp.fprate.o <- paste0(round(sum(res4[,"ps.orig"] < 0.05)/1000*100, 2), " %")
 41 | startplots$res4 <- res4
 42 | 
 43 | # 5: Incorrect Rounding
 44 | res5 <- sim.roundhack(roundinglevel = 0.051, iter = 1000, alternative = "two.sided", alpha = 0.05)
 45 | startplots$roundingPlot <- phackR:::pplots(simdat=res5, alpha=0.05)
 46 | startplots$roundingES <- phackR:::esplots(simdat=res5, EScolumn.hack=3, EScolumn.orig=4)
 47 | startplots$rounding.fprate.p <- paste0(sum(round(res5[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 48 | startplots$rounding.fprate.o <- paste0(sum(round(res5[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 49 | startplots$res5 <- res5
 50 | 
 51 | # 6: Optional Stopping
 52 | res6 <- sim.optstop(n.min = 10, n.max = 100, step = 1, alternative = "two.sided", iter = 1000, alpha = 0.05)
 53 | startplots$optstopPlot <- phackR:::pplots(simdat = res6, alpha = 0.05)
 54 | startplots$optstopESr2 <- phackR:::esplots(simdat=res6, EScolumn.hack=3, EScolumn.orig=4)
 55 | startplots$optstopESd <- phackR:::esplots(simdat=res6, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
 56 |                                                                                 expression("Distribution of original effect sizes "*delta)))
 57 | startplots$optstop.fprate.p <- paste0(round(sum(res6[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 58 | startplots$optstop.fprate.o <- paste0(round(sum(res6[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 59 | startplots$res6 <- res6
 60 | 
 61 | # 7: Outlier Exclusion
 62 | res7 <- sim.outHack(nobs = 30, which = c(1:2), strategy = "firstsig", alpha = 0.05, iter = 1000)
 63 | startplots$outExclPlot <- phackR:::pplots(simdat = res7, alpha = 0.05)
 64 | startplots$outExclES <- phackR:::esplots(simdat = res7, EScolumn.hack = 3, EScolumn.orig = 4)
 65 | startplots$outExcl.fprate.p <- paste0(round(sum(res7[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 66 | startplots$outExcl.fprate.o <- paste0(round(sum(res7[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 67 | startplots$res7 <- res7
 68 | 
 69 | # 9: Selective Reporting DV
 70 | res9 <- sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.5, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05)
 71 | startplots$SRDVPlot <- phackR:::pplots(simdat = res9, alpha = 0.05)
 72 | startplots$SRDVESr2 <- phackR:::esplots(simdat=res9, EScolumn.hack=3, EScolumn.orig=4)
 73 | startplots$SRDVESd <- phackR:::esplots(simdat=res9, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
 74 |                                                                              expression("Distribution of original effect sizes "*delta)))
 75 | startplots$SRDV.fprate.p <- paste0(round(sum(res9[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 76 | startplots$SRDV.fprate.o <- paste0(round(sum(res9[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 77 | startplots$res9 <- res9
 78 | 
 79 | # 10: Selective Reporting IV
 80 | res10 <- sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.5, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05)
 81 | startplots$SRIVPlot <- phackR:::pplots(simdat = res10, alpha = 0.05)
 82 | startplots$SRIVESr2 <- phackR:::esplots(simdat=res10, EScolumn.hack=3, EScolumn.orig=4)
 83 | startplots$SRIVESd <- phackR:::esplots(simdat=res10, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
 84 |                                                                               expression("Distribution of original effect sizes "*delta)))
 85 | startplots$SRIV.fprate.p <- paste0(round(sum(res10[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 86 | startplots$SRIV.fprate.o <- paste0(round(sum(res10[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 87 | startplots$res10 <- res10
 88 | 
 89 | # 11: Statistical Analyses
 90 | res11 <- sim.statAnalysisHack(nobs.group = 30, strategy = "firstsig", alternative = "two.sided", alpha = 0.05, iter = 1000)
 91 | startplots$statAnalysisPlot <- phackR:::pplots(simdat = res11, alpha = 0.05)
 92 | startplots$statAnalysis.fprate.p <- paste0(round(sum(res11[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
 93 | startplots$statAnalysis.fprate.o <- paste0(round(sum(res11[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
 94 | startplots$res11 <- res11
 95 | 
 96 | # 12: Subgroup Analyses
 97 | res12 <- sim.subgroupHack(nobs.group = 30, nsubvars = 5, alternative = "two.sided", strategy = "firstsig", alpha = 0.05, iter = 1000)
 98 | startplots$subgroupPlot <- phackR:::pplots(simdat = res12, alpha = 0.05)
 99 | startplots$subgroupESr2 <- phackR:::esplots(simdat=res12, EScolumn.hack=3, EScolumn.orig=4)
100 | startplots$subgroupESd <- phackR:::esplots(simdat=res12, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta),
101 |                                                                                   expression("Distribution of original effect sizes "*delta)))
102 | startplots$subgroup.fprate.p <- paste0(round(sum(res12[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
103 | startplots$subgroup.fprate.o <- paste0(round(sum(res12[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
104 | startplots$res12 <- res12
105 | 
106 | # 13: Variable Transformations
107 | res13 <- sim.varTransHack(nobs = 30, transvar = "x", strategy = "firstsig", alpha = 0.05, iter = 1000)
108 | startplots$varTransPlot <- phackR:::pplots(simdat = res13, alpha = 0.05)
109 | startplots$varTransES <- phackR:::esplots(simdat = res13, EScolumn.hack = 3, EScolumn.orig = 4)
110 | startplots$varTrans.fprate.p <- paste0(round(sum(res13[,"ps.hack"] <= 0.05)/1000*100, 2), " %")
111 | startplots$varTrans.fprate.o <- paste0(round(sum(res13[,"ps.orig"] <= 0.05)/1000*100, 2), " %")
112 | startplots$res13 <- res13
113 | 
114 | saveRDS(startplots, file="./inst/shiny-phack/ShinyPHack/data/startplots.rds")
115 | 
116 | 


--------------------------------------------------------------------------------
/phackR/man/dot-compCohensD.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{.compCohensD}
 4 | \alias{.compCohensD}
 5 | \title{Compute Cohen's d}
 6 | \usage{
 7 | .compCohensD(t, n)
 8 | }
 9 | \arguments{
10 | \item{t}{t-value}
11 | 
12 | \item{n}{sample size per group}
13 | }
14 | \description{
15 | Compute Cohen's d from t-value with equal sized groups of size n
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-compR2t.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{.compR2t}
 4 | \alias{.compR2t}
 5 | \title{Compute R squared for the t-test}
 6 | \usage{
 7 | .compR2t(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{values of group 1}
11 | 
12 | \item{y}{values of group 2}
13 | }
14 | \description{
15 | Compute R squared for the t-test
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-compscoreHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compositeScores.R
 3 | \name{.compscoreHack}
 4 | \alias{.compscoreHack}
 5 | \title{P-Hacking function for scale redefinition / Composite Scores}
 6 | \usage{
 7 | .compscoreHack(df, dv, compv, ndelete, strategy = "firstsig", alpha = 0.05)
 8 | }
 9 | \arguments{
10 | \item{df}{Data frame containing dependent variable and composite score items as columns}
11 | 
12 | \item{dv}{Location of dependent variable in the data frame}
13 | 
14 | \item{compv}{Location of composite score variables in the data frame}
15 | 
16 | \item{ndelete}{How many items should be deleted from the scale at maximum?}
17 | 
18 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
19 | 
20 | \item{alpha}{Significance level of the t-test (default: 0.05)}
21 | }
22 | \description{
23 | P-Hacking function for scale redefinition / Composite Scores
24 | }
25 | 


--------------------------------------------------------------------------------
/phackR/man/dot-covhack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/exploitCovariates.R
 3 | \name{.covhack}
 4 | \alias{.covhack}
 5 | \title{P-Hacking function for multiple covariates}
 6 | \usage{
 7 | .covhack(
 8 |   df,
 9 |   dv,
10 |   group,
11 |   covs,
12 |   interactions = FALSE,
13 |   strategy = "firstsig",
14 |   alpha = 0.05
15 | )
16 | }
17 | \arguments{
18 | \item{df}{Data frame with one group variable, one dependent variable, and one or more covariates}
19 | 
20 | \item{dv}{Integer defining the location of the dependent variable column}
21 | 
22 | \item{group}{Integer defining the location of the group variable column}
23 | 
24 | \item{covs}{Numeric vector defining the location of the covariate(s).}
25 | 
26 | \item{interactions}{Should interaction terms be added to the ANCOVA models? TRUE/FALSE}
27 | 
28 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
29 | 
30 | \item{alpha}{Significance level of the t-test}
31 | }
32 | \description{
33 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
34 | }
35 | 


--------------------------------------------------------------------------------
/phackR/man/dot-cutoffHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/exploitCutoffs.R
 3 | \name{.cutoffHack}
 4 | \alias{.cutoffHack}
 5 | \title{P-Hacking function for exploiting cutoff values}
 6 | \usage{
 7 | .cutoffHack(df, iv, dv, strategy = "firstsig", alpha = 0.05)
 8 | }
 9 | \arguments{
10 | \item{df}{Data frame with one continuous independent variable and one continuous dependent variable}
11 | 
12 | \item{iv}{Location of the independent variable in the data frame}
13 | 
14 | \item{dv}{Location of the dependent variable in the data frame}
15 | 
16 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
17 | 
18 | \item{alpha}{Significance level of the t-test}
19 | }
20 | \description{
21 | P-Hacking function for exploiting cutoff values
22 | }
23 | 


--------------------------------------------------------------------------------
/phackR/man/dot-easyimpute.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/favorableImputation.R
 3 | \name{.easyimpute}
 4 | \alias{.easyimpute}
 5 | \title{Single value imputation function
 6 | Imputes NA values in a single variable using the function specified in fun}
 7 | \usage{
 8 | .easyimpute(x, fun, ...)
 9 | }
10 | \arguments{
11 | \item{x}{The target variable of the imputation}
12 | 
13 | \item{fun}{The function used to replace missing values that takes x as an argument (e.g., mean)}
14 | 
15 | \item{...}{Additional arguments to fun}
16 | }
17 | \description{
18 | Single value imputation function
19 | Imputes NA values in a single variable using the function specified in fun
20 | }
21 | 


--------------------------------------------------------------------------------
/phackR/man/dot-estimate_mode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/favorableImputation.R
 3 | \name{.estimate_mode}
 4 | \alias{.estimate_mode}
 5 | \title{Estimate mode of continuous variables
 6 | Estimates mode of continuous variables using the density() function}
 7 | \usage{
 8 | .estimate_mode(x)
 9 | }
10 | \arguments{
11 | \item{x}{The target variable for which the mode should be searched}
12 | }
13 | \description{
14 | Estimate mode of continuous variables
15 | Estimates mode of continuous variables using the density() function
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-extractoutlier.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{.extractoutlier}
 4 | \alias{.extractoutlier}
 5 | \title{Create data frames without outliers}
 6 | \usage{
 7 | .extractoutlier(x, y, outsx, outsy)
 8 | }
 9 | \arguments{
10 | \item{x}{Original vector of x values}
11 | 
12 | \item{y}{Original vector of y values}
13 | 
14 | \item{outsx}{Outlier values to be removed from x}
15 | 
16 | \item{outsy}{Outlier values to be removed from y}
17 | }
18 | \description{
19 | Inputs data frame and two sets of outlier values, outputs list with three data frames
20 | }
21 | 


--------------------------------------------------------------------------------
/phackR/man/dot-impHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/favorableImputation.R
 3 | \name{.impHack}
 4 | \alias{.impHack}
 5 | \title{P-Hacking function favorable imputation in univariate linear regression}
 6 | \usage{
 7 | .impHack(df, x, y, which = c(1:10), strategy = "firstsig", alpha = 0.05)
 8 | }
 9 | \arguments{
10 | \item{df}{Data frame containing x and y variables as columns}
11 | 
12 | \item{x}{Location of x variable (predictor) in the data frame}
13 | 
14 | \item{y}{Location of y variable (criterion) in the data frame}
15 | 
16 | \item{which}{Which missing value handling method? 1: delete missing, 2: mean imputation, 3: median imputation, 4: mode imputation, 5: predictive mean matching, 6: weighted predictive mean matching, 7: sample from observed values, 8: Bayesian linear regression, 9: linear regression ignoring model error, 10: linear regression predicted values}
17 | 
18 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
19 | 
20 | \item{alpha}{Significance level of the t-test (default: 0.05)}
21 | }
22 | \description{
23 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
24 | }
25 | 


--------------------------------------------------------------------------------
/phackR/man/dot-multDVhack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/selectiveReportingDV.R
 3 | \name{.multDVhack}
 4 | \alias{.multDVhack}
 5 | \title{P-Hacking function for multiple dependent variables}
 6 | \usage{
 7 | .multDVhack(
 8 |   df,
 9 |   dvs,
10 |   group,
11 |   strategy = "firstsig",
12 |   alternative = "two.sided",
13 |   alpha = 0.05
14 | )
15 | }
16 | \arguments{
17 | \item{df}{Data frame with one group variable and multiple dependent variables}
18 | 
19 | \item{dvs}{Vector defining the DV columns (will be checked in given order)}
20 | 
21 | \item{group}{Scalar defining grouping column}
22 | 
23 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
24 | 
25 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
26 | 
27 | \item{alpha}{Significance level of the t-test}
28 | }
29 | \description{
30 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
31 | }
32 | 


--------------------------------------------------------------------------------
/phackR/man/dot-multIVhack_reg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/selectiveReportingIV.R
 3 | \name{.multIVhack_reg}
 4 | \alias{.multIVhack_reg}
 5 | \title{P-Hacking function for multiple predictors in a regression}
 6 | \usage{
 7 | .multIVhack_reg(
 8 |   df,
 9 |   ivs,
10 |   control,
11 |   strategy = "firstsig",
12 |   alternative = "two.sided",
13 |   alpha = 0.05
14 | )
15 | }
16 | \arguments{
17 | \item{df}{Data frame containing a criterion variable and multiple predictor variables}
18 | 
19 | \item{ivs}{Location of the independent variables (predictors) in the data frame}
20 | 
21 | \item{control}{Location of the criterion in the data frame}
22 | 
23 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
24 | 
25 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
26 | 
27 | \item{alpha}{Significance level of the t-test (default: 0.05)}
28 | }
29 | \description{
30 | Returns a p-hacked p-value and a vector of all p-values that were computed in the process
31 | }
32 | 


--------------------------------------------------------------------------------
/phackR/man/dot-multIVhack_ttest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/selectiveReportingIV.R
 3 | \name{.multIVhack_ttest}
 4 | \alias{.multIVhack_ttest}
 5 | \title{P-Hacking function for multiple independent variables in a t-test}
 6 | \usage{
 7 | .multIVhack_ttest(
 8 |   df,
 9 |   ivs,
10 |   control,
11 |   strategy = "firstsig",
12 |   alternative = "two.sided",
13 |   alpha = 0.05
14 | )
15 | }
16 | \arguments{
17 | \item{df}{Data frame (wide format) containing a control group variable and multiple treatment group variables}
18 | 
19 | \item{ivs}{Location of the independent variables (treatment groups) in the (wide) data frame}
20 | 
21 | \item{control}{Location of the control group in the (wide) data frame}
22 | 
23 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
24 | 
25 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
26 | 
27 | \item{alpha}{Significance level of the t-test (default: 0.05)}
28 | }
29 | \description{
30 | Returns a p-hacked p-value and a vector of all p-values that were computed in the process
31 | }
32 | 


--------------------------------------------------------------------------------
/phackR/man/dot-optstop.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/optionalStopping.R
 3 | \name{.optstop}
 4 | \alias{.optstop}
 5 | \title{Optional Stopping based on existing dataset}
 6 | \usage{
 7 | .optstop(
 8 |   df,
 9 |   group,
10 |   dv,
11 |   n.min,
12 |   n.max,
13 |   step = 1,
14 |   peek = NULL,
15 |   alternative = "two.sided",
16 |   alpha = 0.05
17 | )
18 | }
19 | \arguments{
20 | \item{df}{Data frame}
21 | 
22 | \item{group}{group Scalar defining grouping column}
23 | 
24 | \item{dv}{Scalar defining location of dependent variable in the data frame}
25 | 
26 | \item{n.min}{Minimum sample size}
27 | 
28 | \item{n.max}{Maximum sample size}
29 | 
30 | \item{step}{Step size of the optional stopping (default is 1)}
31 | 
32 | \item{peek}{Determines how often one peeks at the data. Overrides step argument if not NULL.}
33 | 
34 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
35 | 
36 | \item{alpha}{Significance level of the t-test (default: 0.05)}
37 | }
38 | \description{
39 | Returns a p-hacked p-value and a non-p-hacked p-value based on the maximum sample size
40 | }
41 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.boxplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.boxplot}
 4 | \alias{.out.boxplot}
 5 | \title{Box plot outlier definition}
 6 | \usage{
 7 | .out.boxplot(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of values from which outliers should be excluded}
11 | 
12 | \item{y}{Vector of values from which outliers should be excluded}
13 | }
14 | \description{
15 | Box plot outlier definition function
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.cook.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.cook}
 4 | \alias{.out.cook}
 5 | \title{Cook's Distance outlier definition}
 6 | \usage{
 7 | .out.cook(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of x values (predictor in linear regression)}
11 | 
12 | \item{y}{Vector of y values (criterion in linear regression)}
13 | }
14 | \description{
15 | Excludes values that have a Cook's distance larger than the median of an F distribution with p and n-p degrees of freedom or larger than 1 (see Wikipedia for Cook's distance for the cutoff)
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.covratio.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.covratio}
 4 | \alias{.out.covratio}
 5 | \title{Covariance ratio outlier definition}
 6 | \usage{
 7 | .out.covratio(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of x values (predictor in linear regression)}
11 | 
12 | \item{y}{Vector of y values (criterion in linear regression)}
13 | }
14 | \description{
15 | Excludes values that have a covariance ratio differing from 1 (cutoff: influence.measues function internal)
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.dfbeta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.dfbeta}
 4 | \alias{.out.dfbeta}
 5 | \title{DFBETAS outlier definition}
 6 | \usage{
 7 | .out.dfbeta(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of x values (predictor in linear regression)}
11 | 
12 | \item{y}{Vector of y values (criterion in linear regression)}
13 | }
14 | \description{
15 | Excludes the 1-3 values that have the highest influence on the regression slope
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.dffits.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.dffits}
 4 | \alias{.out.dffits}
 5 | \title{DFFITS outlier definition}
 6 | \usage{
 7 | .out.dffits(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of x values (predictor in linear regression)}
11 | 
12 | \item{y}{Vector of y values (criterion in linear regression)}
13 | }
14 | \description{
15 | Excludes values that have absolute DFFIT values larger than 2*sqrt(2/n) (see Wikipedia page for DFFITS for the cutoff)
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.leverage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.leverage}
 4 | \alias{.out.leverage}
 5 | \title{Leverage values outlier definition}
 6 | \usage{
 7 | .out.leverage(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of x values (predictor in linear regression)}
11 | 
12 | \item{y}{Vector of y values (criterion in linear regression)}
13 | }
14 | \description{
15 | Excludes values that have high leverage values (3 times larger than the mean leverage value 3*(p/n), see 'https://newonlinecourses.science.psu.edu/stat501/node/338/' for the cutoff)
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.mahalanobis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.mahalanobis}
 4 | \alias{.out.mahalanobis}
 5 | \title{Robust Mahalanobis Distance outlier definition}
 6 | \usage{
 7 | .out.mahalanobis(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of x values (predictor in linear regression)}
11 | 
12 | \item{y}{Vector of y values (criterion in linear regression)}
13 | }
14 | \description{
15 | Excludes values that have a high robust Mahalanobis Distance (cutoff: squared MD > qchisq(0.98, 2), see Filzmoser et al. (2005))
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.percentrule.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.percentrule}
 4 | \alias{.out.percentrule}
 5 | \title{Percentage outlier definition}
 6 | \usage{
 7 | .out.percentrule(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of values from which outliers should be excluded}
11 | 
12 | \item{y}{Vector of values from which outliers should be excluded}
13 | }
14 | \description{
15 | Percentage outlier definition function
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.residual.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.residual}
 4 | \alias{.out.residual}
 5 | \title{Residuals outlier definition}
 6 | \usage{
 7 | .out.residual(x, y, type)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of x values (predictor in linear regression)}
11 | 
12 | \item{y}{Vector of y values (criterion in linear regression)}
13 | 
14 | \item{type}{What type of residuals \code{"stan"}, \code{"stud"}}
15 | }
16 | \description{
17 | Excludes values with high standardized / studentized residuals. If the largest residual > 2, values with residuals larger than 2, 2.5, 3, ... are excluded. If the largest residual < 2, values with 1:3 largest residuals are excluded (largest 3 standardized residuals is equivalent to the q-q plot definition of outliers in the regression diagnostics in the lm package)
18 | }
19 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.sdrule.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.sdrule}
 4 | \alias{.out.sdrule}
 5 | \title{Standard deviation outlier definition}
 6 | \usage{
 7 | .out.sdrule(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of values from which outliers should be excluded}
11 | 
12 | \item{y}{Vector of values from which outliers should be excluded}
13 | }
14 | \description{
15 | Standard deviation outlier definition function: Takes a vector x, tries different standard deviation outlier rules (x > 2, 2.5, 3, ...) and returns all differing result vectors without the outliers.
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-out.stemleaf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.out.stemleaf}
 4 | \alias{.out.stemleaf}
 5 | \title{Stem and Leaf plot outlier definition}
 6 | \usage{
 7 | .out.stemleaf(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{Vector of values from which outliers should be excluded}
11 | 
12 | \item{y}{Vector of values from which outliers should be excluded}
13 | }
14 | \description{
15 | Stem and Leaf plot outlier definition function
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-outHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{.outHack}
 4 | \alias{.outHack}
 5 | \title{P-Hacking function for outlier exclusion in univariate linear regression}
 6 | \usage{
 7 | .outHack(df, x, y, which = c(1:12), strategy = "firstsig", alpha = 0.05)
 8 | }
 9 | \arguments{
10 | \item{df}{Data frame containing x and y variables as columns}
11 | 
12 | \item{x}{Location of x variable (predictor) in the data frame}
13 | 
14 | \item{y}{Location of y variable (criterion) in the data frame}
15 | 
16 | \item{which}{Which outlier definition methods? A numeric vector containing the chosen methods (1: boxplot, 2: stem&leaf, 3: standard deviation, 4: percentile, 5: studentized residuals, 6: standardized residuals, 7: DFBETA, 8: DFFITS, 9: Cook's D, 10: Mahalanobis distance, 11: Leverage values, 12: Covariance ratio)}
17 | 
18 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
19 | 
20 | \item{alpha}{Significance level of the t-test (default: 0.05)}
21 | }
22 | \description{
23 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
24 | }
25 | 


--------------------------------------------------------------------------------
/phackR/man/dot-roundhack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/incorrectRounding.R
 3 | \name{.roundhack}
 4 | \alias{.roundhack}
 5 | \title{P-Hacking function for incorrect rounding}
 6 | \usage{
 7 | .roundhack(
 8 |   df,
 9 |   group,
10 |   dv,
11 |   roundinglevel,
12 |   alternative = "two.sided",
13 |   alpha = 0.05
14 | )
15 | }
16 | \arguments{
17 | \item{df}{Data frame}
18 | 
19 | \item{group}{Scalar defining location of the group vector in the data frame}
20 | 
21 | \item{dv}{Scalar defining location of dependent variable in the data frame}
22 | 
23 | \item{roundinglevel}{Highest p-value that is rounded down to 0.05}
24 | 
25 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
26 | 
27 | \item{alpha}{Significance level of the t-test (default: 0.05)}
28 | }
29 | \description{
30 | Outputs a p-hacked p-value and the non-p-hacked-p-value
31 | }
32 | 


--------------------------------------------------------------------------------
/phackR/man/dot-selectpvalue.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{.selectpvalue}
 4 | \alias{.selectpvalue}
 5 | \title{Select a p-value from a vector of p-hacked p-values}
 6 | \usage{
 7 | .selectpvalue(ps, strategy, alpha)
 8 | }
 9 | \arguments{
10 | \item{ps}{Vector of p values}
11 | 
12 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
13 | 
14 | \item{alpha}{Significance level (default: 0.05)}
15 | }
16 | \description{
17 | Takes a vector of p-values and selects the smallest, first significant, or smallest significant p-value.
18 | }
19 | 


--------------------------------------------------------------------------------
/phackR/man/dot-sim.compscore.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compositeScores.R
 3 | \name{.sim.compscore}
 4 | \alias{.sim.compscore}
 5 | \title{Simulate data: Correlated composite score raw variables and one non-correlated dependent variable}
 6 | \usage{
 7 | .sim.compscore(nobs, ncompv, rcomp)
 8 | }
 9 | \arguments{
10 | \item{nobs}{Integer giving number of observations}
11 | 
12 | \item{ncompv}{Integer giving number of variables to build the composite score}
13 | 
14 | \item{rcomp}{Correlation between the composite score variables}
15 | }
16 | \description{
17 | Simulate data: Correlated composite score raw variables and one non-correlated dependent variable
18 | }
19 | 


--------------------------------------------------------------------------------
/phackR/man/dot-sim.covariates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/exploitCovariates.R
 3 | \name{.sim.covariates}
 4 | \alias{.sim.covariates}
 5 | \title{Simulate data with (correlated) covariates}
 6 | \usage{
 7 | .sim.covariates(nobs.group, ncov, rcov, rcovdv, mu = 0, sd = 1, missing = 0)
 8 | }
 9 | \arguments{
10 | \item{nobs.group}{Vector with number of observations per group}
11 | 
12 | \item{ncov}{Number of continuous covariates in the simulated data frame}
13 | 
14 | \item{rcov}{Correlation between the covariates}
15 | 
16 | \item{rcovdv}{Correlation between covariates and dependent variable}
17 | 
18 | \item{mu}{Mean of the random data}
19 | 
20 | \item{sd}{Standard deviation of the random data}
21 | 
22 | \item{missing}{Proportion of missing values per variable (e.g., 0.2 = 20 percent)}
23 | }
24 | \description{
25 | Simulates a dependent variable that correlates with multiple (correlated) covariates as well as an independent IV
26 | }
27 | 


--------------------------------------------------------------------------------
/phackR/man/dot-sim.data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{.sim.data}
 4 | \alias{.sim.data}
 5 | \title{Generic sampling function}
 6 | \usage{
 7 | .sim.data(nobs.group)
 8 | }
 9 | \arguments{
10 | \item{nobs.group}{Number of observations per group. Either a scalar or a vector with two elements.}
11 | }
12 | \description{
13 | Outputs a data frame with two columns
14 | }
15 | 


--------------------------------------------------------------------------------
/phackR/man/dot-sim.multDV.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/selectiveReportingDV.R
 3 | \name{.sim.multDV}
 4 | \alias{.sim.multDV}
 5 | \title{Simulate dataset with multiple dependent variables}
 6 | \usage{
 7 | .sim.multDV(nobs.group, nvar, r)
 8 | }
 9 | \arguments{
10 | \item{nobs.group}{Vector giving number of observations per group}
11 | 
12 | \item{nvar}{Number of dependent variables in the data frame}
13 | 
14 | \item{r}{Desired correlation between the dependent variables (scalar)}
15 | }
16 | \description{
17 | Outputs data frame with a grouping variable and multiple correlated dependent variables
18 | }
19 | 


--------------------------------------------------------------------------------
/phackR/man/dot-sim.multIV.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/selectiveReportingIV.R
 3 | \name{.sim.multIV}
 4 | \alias{.sim.multIV}
 5 | \title{Simulate dataset with multiple independent variables}
 6 | \usage{
 7 | .sim.multIV(nobs.group, nvar, r, regression = FALSE)
 8 | }
 9 | \arguments{
10 | \item{nobs.group}{Scalar defining number of observations per group (or number of observations in predictors in regression)}
11 | 
12 | \item{nvar}{Number of independent variables in the data frame}
13 | 
14 | \item{r}{Desired correlation between the independent variables (scalar)}
15 | 
16 | \item{regression}{Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE)}
17 | }
18 | \description{
19 | Outputs data frame with multiple independent variables
20 | }
21 | 


--------------------------------------------------------------------------------
/phackR/man/dot-sim.multcor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{.sim.multcor}
 4 | \alias{.sim.multcor}
 5 | \title{Simulate multivariate correlated data for continuous variables}
 6 | \usage{
 7 | .sim.multcor(nobs, nvar, r, mu = 0, sd = 1, missing = 0)
 8 | }
 9 | \arguments{
10 | \item{nobs}{Number of observations (rows) in the simulated data frame}
11 | 
12 | \item{nvar}{Number of variables (columns) in the data frame}
13 | 
14 | \item{r}{Desired correlation between the variables (integer)}
15 | 
16 | \item{mu}{Mean of the random data}
17 | 
18 | \item{sd}{Standard deviation of the random data}
19 | 
20 | \item{missing}{Proportion of missing values per variable (e.g., 0.2 = 20 percent)}
21 | }
22 | \description{
23 | Outputs a data frame with correlated variables of defined length
24 | }
25 | 


--------------------------------------------------------------------------------
/phackR/man/dot-sim.subgroup.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subgroupAnalysis.R
 3 | \name{.sim.subgroup}
 4 | \alias{.sim.subgroup}
 5 | \title{Simulate data with subgroups}
 6 | \usage{
 7 | .sim.subgroup(nobs.group, nsubvars)
 8 | }
 9 | \arguments{
10 | \item{nobs.group}{Vector giving number of observations per group}
11 | 
12 | \item{nsubvars}{Integer specifying number of variables for potential subgroups}
13 | }
14 | \description{
15 | Outputs data frame with multiple binary variables from which subgroups can be extracted
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/dot-statAnalysisHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/statAnalysis.R
 3 | \name{.statAnalysisHack}
 4 | \alias{.statAnalysisHack}
 5 | \title{P-Hacking function for exploiting different statistical analysis options}
 6 | \usage{
 7 | .statAnalysisHack(
 8 |   df,
 9 |   group,
10 |   dv,
11 |   strategy = "firstsig",
12 |   alternative = "two.sided",
13 |   alpha = 0.05
14 | )
15 | }
16 | \arguments{
17 | \item{df}{Data frame with one continuous independent variable and one continuous dependent variable}
18 | 
19 | \item{group}{Location of the grouping variable in the data frame}
20 | 
21 | \item{dv}{Location of the dependent variabl in the data frame}
22 | 
23 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
24 | 
25 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
26 | 
27 | \item{alpha}{Significance level of the t-test}
28 | }
29 | \description{
30 | P-Hacking function for exploiting different statistical analysis options
31 | }
32 | 


--------------------------------------------------------------------------------
/phackR/man/dot-subgroupHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subgroupAnalysis.R
 3 | \name{.subgroupHack}
 4 | \alias{.subgroupHack}
 5 | \title{P-Hacking function for multiple subgroups analysis}
 6 | \usage{
 7 | .subgroupHack(
 8 |   df,
 9 |   iv,
10 |   dv,
11 |   subvars,
12 |   alternative = "two.sided",
13 |   strategy = "firstsig",
14 |   alpha = 0.05
15 | )
16 | }
17 | \arguments{
18 | \item{df}{A matrix or data frame containing all relevant data}
19 | 
20 | \item{iv}{Integer specifying the location of the binary independent variable in the data frame}
21 | 
22 | \item{dv}{Integer specifying the location of the dependent variable in the data frame}
23 | 
24 | \item{subvars}{Vector specifying the location of the subgroup variables in the data frame}
25 | 
26 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
27 | 
28 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
29 | 
30 | \item{alpha}{Significance level of the t-test}
31 | }
32 | \description{
33 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
34 | }
35 | 


--------------------------------------------------------------------------------
/phackR/man/dot-varTransHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/variableTransformation.R
 3 | \name{.varTransHack}
 4 | \alias{.varTransHack}
 5 | \title{P-Hacking function variable transformation in univariate linear regression}
 6 | \usage{
 7 | .varTransHack(
 8 |   df,
 9 |   x,
10 |   y,
11 |   transvar,
12 |   testnorm = FALSE,
13 |   strategy = "firstsig",
14 |   alpha = 0.05
15 | )
16 | }
17 | \arguments{
18 | \item{df}{Data frame containing x and y variables as columns}
19 | 
20 | \item{x}{Location of x variable (predictor) in the data frame}
21 | 
22 | \item{y}{Location of y variable (criterion) in the data frame}
23 | 
24 | \item{transvar}{Which variables should be transformed? Either "x" (for x variable), "y" (for y variable), or "xy" (for both)}
25 | 
26 | \item{testnorm}{Should variables only be transformed after a significant test for normality of residuals?}
27 | 
28 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
29 | 
30 | \item{alpha}{Significance level of the t-test (default: 0.05)}
31 | }
32 | \description{
33 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process
34 | }
35 | 


--------------------------------------------------------------------------------
/phackR/man/esplots.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotsShiny.R
 3 | \name{esplots}
 4 | \alias{esplots}
 5 | \title{Plot effect size distributions}
 6 | \usage{
 7 | esplots(
 8 |   simdat,
 9 |   EScolumn.hack,
10 |   EScolumn.orig,
11 |   titles = c(expression("Distribution of p-hacked effect sizes R"^2),
12 |     expression("Distribution of original effect sizes R"^2))
13 | )
14 | }
15 | \arguments{
16 | \item{simdat}{Simulated data from one of the p-hacking simulation functions}
17 | 
18 | \item{EScolumn.hack}{Column number of hacked effect sizes}
19 | 
20 | \item{EScolumn.orig}{Column number of original effect sizes}
21 | 
22 | \item{titles}{Title of effect size plots}
23 | }
24 | \description{
25 | Plot effect size distributions
26 | }
27 | 


--------------------------------------------------------------------------------
/phackR/man/pplots.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotsShiny.R
 3 | \name{pplots}
 4 | \alias{pplots}
 5 | \title{Plot p-value distributions}
 6 | \usage{
 7 | pplots(simdat, alpha)
 8 | }
 9 | \arguments{
10 | \item{simdat}{Simulated data from one of the p-hacking simulation functions}
11 | 
12 | \item{alpha}{Alpha level}
13 | }
14 | \description{
15 | Plot p-value distributions
16 | }
17 | 


--------------------------------------------------------------------------------
/phackR/man/runShinyPHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/runShinyPHack.R
 3 | \name{runShinyPHack}
 4 | \alias{runShinyPHack}
 5 | \title{Run Shiny app for p-hacking simulaton}
 6 | \usage{
 7 | runShinyPHack()
 8 | }
 9 | \description{
10 | Run Shiny app for p-hacking simulaton
11 | }
12 | 


--------------------------------------------------------------------------------
/phackR/man/sim.compscoreHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compositeScores.R
 3 | \name{sim.compscoreHack}
 4 | \alias{sim.compscoreHack}
 5 | \title{Simulate p-hacking with composite scores
 6 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations}
 7 | \usage{
 8 | sim.compscoreHack(
 9 |   nobs,
10 |   ncompv,
11 |   rcomp,
12 |   ndelete,
13 |   strategy = "firstsig",
14 |   alpha = 0.05,
15 |   iter = 1000,
16 |   shinyEnv = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{nobs}{Integer giving number of observations}
21 | 
22 | \item{ncompv}{Integer giving number of variables to build the composite score}
23 | 
24 | \item{rcomp}{Correlation between the composite score variables}
25 | 
26 | \item{ndelete}{How many items should be deleted from the scale at maximum?}
27 | 
28 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
29 | 
30 | \item{alpha}{Significance level of the t-test (default: 0.05)}
31 | 
32 | \item{iter}{Number of simulation iterations}
33 | 
34 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
35 | }
36 | \description{
37 | Simulate p-hacking with composite scores
38 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
39 | }
40 | 


--------------------------------------------------------------------------------
/phackR/man/sim.covhack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/exploitCovariates.R
 3 | \name{sim.covhack}
 4 | \alias{sim.covhack}
 5 | \title{Simulate p-Hacking with multiple covariates
 6 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations}
 7 | \usage{
 8 | sim.covhack(
 9 |   nobs.group,
10 |   ncov,
11 |   rcov,
12 |   rcovdv,
13 |   interactions = FALSE,
14 |   strategy = "firstsig",
15 |   alpha = 0.05,
16 |   iter = 1000,
17 |   shinyEnv = FALSE
18 | )
19 | }
20 | \arguments{
21 | \item{nobs.group}{Vector with number of observations per group}
22 | 
23 | \item{ncov}{Number of continuous covariates in the simulated data frame}
24 | 
25 | \item{rcov}{Correlation between the covariates}
26 | 
27 | \item{rcovdv}{Correlation between covariates and dependent variable}
28 | 
29 | \item{interactions}{Should interaction terms be added to the ANCOVA models? TRUE/FALSE}
30 | 
31 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
32 | 
33 | \item{alpha}{Significance level of the t-test}
34 | 
35 | \item{iter}{Number of simulation iterations}
36 | 
37 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
38 | }
39 | \description{
40 | Simulate p-Hacking with multiple covariates
41 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
42 | }
43 | 


--------------------------------------------------------------------------------
/phackR/man/sim.cutoffHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/exploitCutoffs.R
 3 | \name{sim.cutoffHack}
 4 | \alias{sim.cutoffHack}
 5 | \title{Simulate p-Hacking for exploiting cutoff values
 6 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations}
 7 | \usage{
 8 | sim.cutoffHack(
 9 |   nobs,
10 |   strategy = "firstsig",
11 |   alpha = 0.05,
12 |   iter = 1000,
13 |   shinyEnv = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{nobs}{Number of observations}
18 | 
19 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
20 | 
21 | \item{alpha}{Significance level of the t-test}
22 | 
23 | \item{iter}{Number of simulation iterations}
24 | 
25 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
26 | }
27 | \description{
28 | Simulate p-Hacking for exploiting cutoff values
29 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
30 | }
31 | 


--------------------------------------------------------------------------------
/phackR/man/sim.impHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/favorableImputation.R
 3 | \name{sim.impHack}
 4 | \alias{sim.impHack}
 5 | \title{Simulate p-Hacking with different sorts of outlier definition missing value imputation}
 6 | \usage{
 7 | sim.impHack(
 8 |   nobs,
 9 |   missing,
10 |   which = c(1:10),
11 |   strategy = "firstsig",
12 |   alpha = 0.05,
13 |   iter = 1000,
14 |   shinyEnv = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{nobs}{Integer giving number of observations}
19 | 
20 | \item{missing}{Percentage of missing values (e.g., 0.1 for 10 percent)}
21 | 
22 | \item{which}{Which imputation methods?  Either 5 random methods are chosen ("random") or a numeric vector containing the chosen methods (1: delete missing, 2: mean imputation, 3: median imputation, 4: mode imputation, 5: predictive mean matching, 6: weighted predictive mean matching, 7: sample from observed values, 8: Bayesian linear regression, 9: linear regression ignoring model error, 10: linear regression predicted values)}
23 | 
24 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
25 | 
26 | \item{alpha}{Significance level of the t-test (default: 0.05)}
27 | 
28 | \item{iter}{Number of simulation iterations}
29 | 
30 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
31 | }
32 | \description{
33 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
34 | }
35 | 


--------------------------------------------------------------------------------
/phackR/man/sim.multDVhack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/selectiveReportingDV.R
 3 | \name{sim.multDVhack}
 4 | \alias{sim.multDVhack}
 5 | \title{Simulate p-Hacking with multiple dependent variables}
 6 | \usage{
 7 | sim.multDVhack(
 8 |   nobs.group,
 9 |   nvar,
10 |   r,
11 |   strategy = "firstsig",
12 |   iter = 1000,
13 |   alternative = "two.sided",
14 |   alpha = 0.05,
15 |   shinyEnv = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{nobs.group}{Vector giving number of observations per group}
20 | 
21 | \item{nvar}{Number of dependent variables (columns) in the data frame}
22 | 
23 | \item{r}{Desired correlation between the dependent variables (scalar)}
24 | 
25 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
26 | 
27 | \item{iter}{Number of simulation iterations}
28 | 
29 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
30 | 
31 | \item{alpha}{Significance level of the t-test (default: 0.05)}
32 | 
33 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
34 | }
35 | \description{
36 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
37 | }
38 | 


--------------------------------------------------------------------------------
/phackR/man/sim.multIVhack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/selectiveReportingIV.R
 3 | \name{sim.multIVhack}
 4 | \alias{sim.multIVhack}
 5 | \title{Simulate p-Hacking with multiple independent variables}
 6 | \usage{
 7 | sim.multIVhack(
 8 |   nobs.group,
 9 |   nvar,
10 |   r,
11 |   regression = FALSE,
12 |   strategy = "firstsig",
13 |   iter = 1000,
14 |   alternative = "two.sided",
15 |   alpha = 0.05,
16 |   shinyEnv = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{nobs.group}{Vector giving number of observations per group}
21 | 
22 | \item{nvar}{Number of independent variables (columns) in the data frame}
23 | 
24 | \item{r}{Desired correlation between the dependent variables (scalar)}
25 | 
26 | \item{regression}{Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE)}
27 | 
28 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
29 | 
30 | \item{iter}{Number of simulation iterations}
31 | 
32 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
33 | 
34 | \item{alpha}{Significance level of the t-test (default: 0.05)}
35 | 
36 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
37 | }
38 | \description{
39 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
40 | }
41 | 


--------------------------------------------------------------------------------
/phackR/man/sim.optstop.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/optionalStopping.R
 3 | \name{sim.optstop}
 4 | \alias{sim.optstop}
 5 | \title{Simulate p-hacking with incorrect rounding}
 6 | \usage{
 7 | sim.optstop(
 8 |   n.min,
 9 |   n.max,
10 |   step = 1,
11 |   peek = NULL,
12 |   alternative = "two.sided",
13 |   iter = 1000,
14 |   alpha = 0.05,
15 |   shinyEnv = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{n.min}{Minimum sample size}
20 | 
21 | \item{n.max}{Maximum sample size}
22 | 
23 | \item{step}{Step size of the optional stopping (default is 1)}
24 | 
25 | \item{peek}{Determines how often one peeks at the data. Overrides step argument if not NULL.}
26 | 
27 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
28 | 
29 | \item{iter}{Number of iterations}
30 | 
31 | \item{alpha}{Significance level of the t-test (default: 0.05)}
32 | 
33 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
34 | }
35 | \description{
36 | Simulate p-hacking with incorrect rounding
37 | }
38 | 


--------------------------------------------------------------------------------
/phackR/man/sim.outHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outlierExclusion.R
 3 | \name{sim.outHack}
 4 | \alias{sim.outHack}
 5 | \title{Simulate p-Hacking with different sorts of outlier definition}
 6 | \usage{
 7 | sim.outHack(
 8 |   nobs,
 9 |   which = c(1:12),
10 |   strategy = "firstsig",
11 |   alpha = 0.05,
12 |   iter = 1000,
13 |   shinyEnv = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{nobs}{Integer giving number of observations}
18 | 
19 | \item{which}{Which outlier detection methods?  Either 5 random methods are chosen ("random") or a numeric vector containing the chosen methods (1: boxplot, 2: stem&leaf, 3: standard deviation, 4: percentile, 5: studentized residuals, 6: standardized residuals, 7: DFBETA, 8: DFFITS, 9: Cook's D, 10: Mahalanobis distance, 11: Leverage values, 12: Covariance ratio)}
20 | 
21 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
22 | 
23 | \item{alpha}{Significance level of the t-test (default: 0.05)}
24 | 
25 | \item{iter}{Number of simulation iterations}
26 | 
27 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
28 | }
29 | \description{
30 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
31 | }
32 | 


--------------------------------------------------------------------------------
/phackR/man/sim.roundhack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/incorrectRounding.R
 3 | \name{sim.roundhack}
 4 | \alias{sim.roundhack}
 5 | \title{Simulate p-hacking with incorrect rounding}
 6 | \usage{
 7 | sim.roundhack(
 8 |   roundinglevel,
 9 |   iter = 1000,
10 |   alternative = "two.sided",
11 |   alpha = 0.05,
12 |   shinyEnv = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{roundinglevel}{Highest p-value that is rounded down to alpha}
17 | 
18 | \item{iter}{Number of iterations}
19 | 
20 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
21 | 
22 | \item{alpha}{Significance level of the t-test (default: 0.05)}
23 | 
24 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
25 | }
26 | \description{
27 | Simulate p-hacking with incorrect rounding
28 | }
29 | 


--------------------------------------------------------------------------------
/phackR/man/sim.statAnalysisHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/statAnalysis.R
 3 | \name{sim.statAnalysisHack}
 4 | \alias{sim.statAnalysisHack}
 5 | \title{Simulate p-Hacking for exploiting different statistical analysis options}
 6 | \usage{
 7 | sim.statAnalysisHack(
 8 |   nobs.group,
 9 |   strategy = "firstsig",
10 |   alternative = "two.sided",
11 |   alpha = 0.05,
12 |   iter = 1000,
13 |   shinyEnv = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{nobs.group}{Number of observations per group. Either a scalar or a vector with 2 elements.}
18 | 
19 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
20 | 
21 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
22 | 
23 | \item{alpha}{Significance level of the t-test}
24 | 
25 | \item{iter}{Number of simulation iterations}
26 | 
27 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
28 | }
29 | \description{
30 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
31 | }
32 | 


--------------------------------------------------------------------------------
/phackR/man/sim.subgroupHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/subgroupAnalysis.R
 3 | \name{sim.subgroupHack}
 4 | \alias{sim.subgroupHack}
 5 | \title{Simulate p-hacking with multiple subgroups
 6 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations}
 7 | \usage{
 8 | sim.subgroupHack(
 9 |   nobs.group,
10 |   nsubvars,
11 |   alternative = "two.sided",
12 |   strategy = "firstsig",
13 |   alpha = 0.05,
14 |   iter = 1000,
15 |   shinyEnv = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{nobs.group}{Vector giving number of observations per group}
20 | 
21 | \item{nsubvars}{Integer specifying number of variables for potential subgroups}
22 | 
23 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")}
24 | 
25 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
26 | 
27 | \item{alpha}{Significance level of the t-test}
28 | 
29 | \item{iter}{Number of simulation iterations}
30 | 
31 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
32 | }
33 | \description{
34 | Simulate p-hacking with multiple subgroups
35 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
36 | }
37 | 


--------------------------------------------------------------------------------
/phackR/man/sim.varTransHack.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/variableTransformation.R
 3 | \name{sim.varTransHack}
 4 | \alias{sim.varTransHack}
 5 | \title{Simulate p-hacking with variable transformations
 6 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations}
 7 | \usage{
 8 | sim.varTransHack(
 9 |   nobs,
10 |   transvar,
11 |   testnorm = FALSE,
12 |   strategy = "firstsig",
13 |   alpha = 0.05,
14 |   iter = 1000,
15 |   shinyEnv = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{nobs}{Integer giving number of observations}
20 | 
21 | \item{transvar}{Which variables should be transformed? Either "x" (for x variable), "y" (for y variable), or "xy" (for both)}
22 | 
23 | \item{testnorm}{Should variables only be transformed after a significant test for normality of residuals?}
24 | 
25 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"}
26 | 
27 | \item{alpha}{Significance level of the t-test (default: 0.05)}
28 | 
29 | \item{iter}{Number of simulation iterations}
30 | 
31 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE}
32 | }
33 | \description{
34 | Simulate p-hacking with variable transformations
35 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations
36 | }
37 | 


--------------------------------------------------------------------------------
/phackR/phackR.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/phackR/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(phackR)
3 | 
4 | test_check("phackR")
5 | 


--------------------------------------------------------------------------------
/phackR/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/phacking_compendium.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/simulations/00_simulation_helpers.R:
--------------------------------------------------------------------------------
 1 | findFPrate <- function(simresult, alpha = 0.05, hack = TRUE){
 2 |   
 3 |   if(hack){
 4 |     FP.firstsig <- sapply(simresult$firstsig,
 5 |                           function(x) {sum(x$ps.hack < alpha) / nrow(x)})
 6 |     FP.smallest <- sapply(simresult$smallest,
 7 |                           function(x) {sum(x$ps.hack < alpha) / nrow(x)})
 8 |     FP.smallestsig <- sapply(simresult$smallestsig,
 9 |                              function(x) {sum(x$ps.hack < alpha) / nrow(x)})
10 |   } else {
11 |     FP.firstsig <- sapply(simresult$firstsig,
12 |                           function(x) {sum(x$ps.orig < alpha) / nrow(x)})
13 |     FP.smallest <- sapply(simresult$smallest,
14 |                           function(x) {sum(x$ps.orig < alpha) / nrow(x)})
15 |     FP.smallestsig <- sapply(simresult$smallestsig,
16 |                              function(x) {sum(x$ps.orig < alpha) / nrow(x)})
17 |   }
18 |   
19 |   FP.rates <- rowMeans(cbind(FP.firstsig, FP.smallest, FP.smallestsig))
20 |   
21 |   return(FP.rates)
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/simulations/combinedStrategies_simulation.R:
--------------------------------------------------------------------------------
 1 | # Combined p-Hacking Strategies: Simulations
 2 | 
 3 | SIM_combinedHack_t <- sim.combined.t(nobs.group = 100,
 4 |                                      nDV = 5,
 5 |                                      rDV = 0.6,
 6 |                                      nCOV = 3,
 7 |                                      rCOV = 0.3,
 8 |                                      rcovdv = 0.3,
 9 |                                      nSUB = 3,
10 |                                      roundinglevel = 0.051,
11 |                                      alternative = "two.sided",
12 |                                      strategy = "firstsig",
13 |                                      alpha = 0.05,
14 |                                      iter = 10000)
15 | 
16 | save(SIM_combinedHack_t, file = "simulations/SIM_combinedHack_t.RData")
17 |                
18 | SIM_combinedHack_reg <- sim.combined.reg(nobs = 100, 
19 |                                          missing = 0.1, 
20 |                                          ncompv = 5, 
21 |                                          rcomp = 0.75, 
22 |                                          roundinglevel = 0.051, 
23 |                                          nImpMethods = 5, 
24 |                                          transvar = "xy", 
25 |                                          ndelete = 3, 
26 |                                          nOutMethods = 3, 
27 |                                          strategy = "firstsig", 
28 |                                          alpha = 0.05, 
29 |                                          iter = 10000)
30 | 
31 | save(SIM_combinedHack_reg, file = "simulations/SIM_combinedHack_reg.RData")
32 | 
33 | 
34 | # Combined p-Hacking Strategies: Simulations with alpha = 0.005
35 | 
36 | SIM_combinedHack_t_005 <- sim.combined.t(nobs.group = 100,
37 |                                      nDV = 5,
38 |                                      rDV = 0.6,
39 |                                      nCOV = 3,
40 |                                      rCOV = 0.3,
41 |                                      rcovdv = 0.3,
42 |                                      nSUB = 3,
43 |                                      roundinglevel = 0.0051,
44 |                                      alternative = "two.sided",
45 |                                      strategy = "firstsig",
46 |                                      alpha = 0.005,
47 |                                      iter = 10000)
48 | 
49 | save(SIM_combinedHack_t_005, file = "simulations/SIM_combinedHack_t_005.RData")
50 | 
51 | SIM_combinedHack_reg_005 <- sim.combined.reg(nobs = 100, 
52 |                                          missing = 0.1, 
53 |                                          ncompv = 5, 
54 |                                          rcomp = 0.75, 
55 |                                          roundinglevel = 0.0051, 
56 |                                          nImpMethods = 5, 
57 |                                          transvar = "xy", 
58 |                                          ndelete = 3, 
59 |                                          nOutMethods = 3, 
60 |                                          strategy = "firstsig", 
61 |                                          alpha = 0.005, 
62 |                                          iter = 10000)
63 | 
64 | save(SIM_combinedHack_reg_005, file = "simulations/SIM_combinedHack_reg_005.RData")
65 | 


--------------------------------------------------------------------------------
/simulations/compscoreHack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Scale Redefinition / Composite Scores: Simulation
 2 | 
 3 | #### Conditions ####
 4 | nobs <- c(30, 50, 100, 300)
 5 | ncompv <- c(5, 10) 
 6 | rcomp <- c(0.3, 0.7)
 7 | ndelete <- c(1, 3, 7)
 8 | strategy <- c("firstsig", "smallest", "smallest.sig")
 9 | 
10 | cond.compscoreHack <- expand.grid(nobs, ncompv, rcomp, ndelete)
11 | cond.compscoreHack <- cond.compscoreHack[cond.compscoreHack$Var4 < cond.compscoreHack$Var2, ]
12 | 
13 | #### Simulation ####
14 | 
15 | simresults.compscoreHack <- list()
16 | 
17 | simmultiple.compscoreHack <- function(par, strategy){
18 |   data.frame(sim.compscoreHack(nobs = par[1],
19 |                             ncompv = par[2],
20 |                             rcomp = par[3],
21 |                             ndelete = par[4],
22 |                             strategy = strategy,
23 |                             iter = 10000,
24 |                             alpha = 0.05))
25 | }
26 | 
27 | simresults.compscoreHack$firstsig <- apply(cond.compscoreHack, 1, function(x) {
28 |   simmultiple.compscoreHack(x, strategy = "firstsig")
29 | })
30 | 
31 | simresults.compscoreHack$smallest <- apply(cond.compscoreHack, 1, function(x) {
32 |   simmultiple.compscoreHack(x, strategy = "smallest")
33 | })
34 | 
35 | simresults.compscoreHack$smallestsig <- apply(cond.compscoreHack, 1, function(x) {
36 |   simmultiple.compscoreHack(x, strategy = "smallest.sig")
37 | })
38 | 
39 | save(simresults.compscoreHack, file = "simulations/SIM_compscoreHack.RData")
40 | 


--------------------------------------------------------------------------------
/simulations/covhack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Exploiting Covariates: Simulation
 2 | 
 3 | #### Conditions ####
 4 | nobs.group <- c(30, 50, 100, 300)
 5 | ncov <- c(3, 5, 10)
 6 | rcov <- c(0, 0.3, 0.8)
 7 | rcovdv <- c(0, 0.3)
 8 | strategy <- c("firstsig", "smallest", "smallest.sig")
 9 | 
10 | cond.covhack <- expand.grid(nobs.group, ncov, rcov, rcovdv)
11 | 
12 | #### Simulation ####
13 | 
14 | simresults.covhack <- list()
15 | 
16 | simmultiple.covhack <- function(par, strategy){
17 |   data.frame(sim.covhack(nobs.group = par[1],
18 |                          ncov = par[2],
19 |                          rcov = par[3],
20 |                          rcovdv = par[4],
21 |                          strategy = strategy,
22 |                          interactions = FALSE,
23 |                          iter = 10000,
24 |                          alpha = 0.05))
25 | }
26 |   
27 | simresults.covhack$firstsig <- apply(cond.covhack, 1, function(x) {
28 |   simmultiple.covhack(x, strategy = "firstsig")
29 | })
30 | 
31 | simresults.covhack$smallest <- apply(cond.covhack, 1, function(x) {
32 |   simmultiple.covhack(x, strategy = "smallest")
33 | })
34 | 
35 | simresults.covhack$smallestsig <- apply(cond.covhack, 1, function(x) {
36 |   simmultiple.covhack(x, strategy = "smallest.sig")
37 | })
38 | 
39 | save(simresults.covhack, file = "simulations/SIM_covhack.RData")
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/simulations/cutoffHack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Exploiting different Cut-Off Values: Simulation
 2 | 
 3 | #### Conditions ####
 4 | 
 5 | nobs <- c(30, 50, 100, 300)
 6 | strategy <- c("firstsig", "smallest", "smallest.sig")
 7 | 
 8 | cond.cutoffHack <- expand.grid(nobs)
 9 | 
10 | #### Simulation ####
11 | 
12 | simresults.cutoffHack <- list()
13 | 
14 | simmultiple.cutoffHack <- function(par, strategy){
15 |   data.frame(sim.cutoffHack(nobs = par[1],
16 |                             strategy = strategy,
17 |                             iter = 10000,
18 |                             alpha = 0.05))
19 |                          
20 | }
21 | 
22 | simresults.cutoffHack$firstsig <- apply(cond.cutoffHack, 1, function(x) {
23 |   simmultiple.cutoffHack(x, strategy = "firstsig")
24 | })
25 | 
26 | simresults.cutoffHack$smallest <- apply(cond.cutoffHack, 1, function(x) {
27 |   simmultiple.cutoffHack(x, strategy = "smallest")
28 | })
29 | 
30 | simresults.cutoffHack$smallestsig <- apply(cond.cutoffHack, 1, function(x) {
31 |   simmultiple.cutoffHack(x, strategy = "smallest.sig")
32 | })
33 | 
34 | save(simresults.cutoffHack, file = "simulations/SIM_cutoffHack.RData")
35 | 
36 | 


--------------------------------------------------------------------------------
/simulations/exploreNormality.R:
--------------------------------------------------------------------------------
  1 | # ==============================================================================
  2 | # SIMULATION TO INVESTIGATE NORMALITY OF RESIDUALS UNDER TRANSFORMATIONS
  3 | # ==============================================================================
  4 | 
  5 | source("./simulations/00_simulation_helpers.R")
  6 | 
  7 | # Compute p-values of lm() and p-values and test statistics from normality tests
  8 | # under different transformations
  9 | 
 10 | VarTransExploration <- function(nobs, iter){
 11 |   
 12 |   final <- array(dim=c(iter, 5, 4, 4))
 13 |   
 14 |   # Simulate as many datasets as desired iterations
 15 |   dat <- list()
 16 |   for(i in 1:iter){
 17 |     dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0)
 18 |   }
 19 |   
 20 |   # Apply transformation and test to each dataset
 21 |   for(i in 1:iter){
 22 |     
 23 |     df <- dat[[i]]
 24 |     x <- df[,1]
 25 |     y <- df[,2]
 26 |     
 27 |     Xtrans <- matrix(NA, nrow = nrow(df))
 28 |     Xtrans[,1] <- x
 29 |     Ytrans <- matrix(NA, nrow = nrow(df))
 30 |     Ytrans[,1] <- y
 31 |     
 32 |     Xtrans <- cbind(Xtrans,
 33 |                     log(x+abs(min(x))+1e-10),        # log transformation
 34 |                     sqrt(x+abs(min(x))+1e-10),       # square root transformation
 35 |                     1/x                              # inverse
 36 |     )
 37 |     
 38 |     Ytrans <- cbind(Ytrans,
 39 |                     log(y+abs(min(y))+1e-10),        # log transformation
 40 |                     sqrt(y+abs(min(y))+1e-10),       # square root transformation
 41 |                     1/y                              # inverse
 42 |     )
 43 |     
 44 |     ps.lm <- matrix(NA, nrow = 4, ncol = 4)
 45 |     ps.ks <- matrix(NA, nrow = 4, ncol = 4)
 46 |     ps.sw <- matrix(NA, nrow = 4, ncol = 4)
 47 |     stat.ks <- matrix(NA, nrow = 4, ncol = 4)
 48 |     stat.sw <- matrix(NA, nrow = 4, ncol = 4)
 49 |     
 50 |     for(j in 1:ncol(Xtrans)){
 51 |       for(k in 1:ncol(Ytrans)){
 52 |         mod <- summary(stats::lm(Ytrans[,k] ~ Xtrans[,j]))
 53 |         ps.lm[j,k] <- mod$coefficients[2, 4]
 54 |         ks <- ks.test(mod$residuals, "pnorm")
 55 |         ps.ks[j,k] <- ks$p.value
 56 |         stat.ks[j,k] <- ks$statistic
 57 |         sw <- shapiro.test(mod$residuals)
 58 |         ps.sw[j,k] <- sw$p.value
 59 |         stat.sw[j,k] <- sw$statistic
 60 |       }
 61 |     }
 62 |     
 63 |     res <- array(dim=c(5,4,4))
 64 |     res[1,,] <- ps.lm
 65 |     res[2,,] <- ps.ks
 66 |     res[3,,] <- stat.ks
 67 |     res[4,,] <- ps.sw
 68 |     res[5,,] <- stat.sw
 69 |     
 70 |     final[i, , , ] <- res
 71 |   }
 72 |   
 73 |   return(final)
 74 | }
 75 | 
 76 | ############################ FOR N = 30 ########################################
 77 | 
 78 | explore30 <- VarTransExploration(nobs=30, iter=1000)
 79 | 
 80 | mainsX <- matrix(c("X", "log(X)", "sqrt(X)", "1/X", rep("", 12)), nrow = 4, byrow=FALSE)
 81 | mainsY <- matrix(c("Y", "log(Y)", "sqrt(Y)", "1/Y", rep("", 12)), nrow = 4, byrow=TRUE)
 82 | 
 83 | # plot p values from lm
 84 | par(mfrow=c(4,4), oma=c(0,0,2,0))
 85 | for(i in 1:4){
 86 |   for(j in 1:4){
 87 |     hist(explore30[, 1, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5)  
 88 |   }
 89 | }
 90 | mtext("p-Value regression", side=3, line=0, outer=TRUE, cex=2)
 91 | 
 92 | # plot p values from ks test
 93 | for(i in 1:4){
 94 |   for(j in 1:4){
 95 |     hist(explore30[, 2, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5)  
 96 |   }
 97 | }
 98 | mtext("p-Value Kolmogorov-Smirnov test (residuals)", side=3, line=0, outer=TRUE, cex=2)
 99 | 
100 | # plot ks test statistic
101 | for(i in 1:4){
102 |   for(j in 1:4){
103 |     hist(explore30[, 3, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5)  
104 |   }
105 | }
106 | mtext("Test statistic Kolmogorov-Smirnov test (residuals)", side=3, line=0, outer=TRUE, cex=2)
107 | 
108 | # plot p values from shapiro wilk
109 | for(i in 1:4){
110 |   for(j in 1:4){
111 |     hist(explore30[, 4, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5)  
112 |   }
113 | }
114 | mtext("p-Value Shapiro-Wilk test (residuals)", side=3, line=0, outer=TRUE, cex=2)
115 | 
116 | 
117 | # plot shapiro wilk test statistic
118 | for(i in 1:4){
119 |   for(j in 1:4){
120 |     hist(explore30[, 5, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5)  
121 |   }
122 | }
123 | mtext("Test statistic Shapiro-Wilk test (residuals)", side=3, line=0, outer=TRUE, cex=2)
124 | 
125 | ############################ FOR N = 300 ########################################
126 | 
127 | explore300 <- VarTransExploration(nobs=300, iter=1000)
128 | 
129 | mainsX <- matrix(c("X", "log(X)", "sqrt(X)", "1/X", rep("", 12)), nrow = 4, byrow=FALSE)
130 | mainsY <- matrix(c("Y", "log(Y)", "sqrt(Y)", "1/Y", rep("", 12)), nrow = 4, byrow=TRUE)
131 | 
132 | # plot p values from lm
133 | par(mfrow=c(4,4), oma=c(0,0,2,0))
134 | for(i in 1:4){
135 |   for(j in 1:4){
136 |     hist(explore300[, 1, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5)  
137 |   }
138 | }
139 | mtext("p-Value regression", side=3, line=0, outer=TRUE, cex=2)
140 | 
141 | # plot p values from ks test
142 | for(i in 1:4){
143 |   for(j in 1:4){
144 |     hist(explore300[, 2, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5)  
145 |   }
146 | }
147 | mtext("p-Value Kolmogorov-Smirnov test (residuals)", side=3, line=0, outer=TRUE, cex=2)
148 | 
149 | # plot ks test statistic
150 | for(i in 1:4){
151 |   for(j in 1:4){
152 |     hist(explore300[, 3, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5)  
153 |   }
154 | }
155 | mtext("Test statistic Kolmogorov-Smirnov test (residuals)", side=3, line=0, outer=TRUE, cex=2)
156 | 
157 | # plot p values from shapiro wilk
158 | for(i in 1:4){
159 |   for(j in 1:4){
160 |     hist(explore300[, 4, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5)  
161 |   }
162 | }
163 | mtext("p-Value Shapiro-Wilk test (residuals)", side=3, line=0, outer=TRUE, cex=2)
164 | 
165 | 
166 | # plot shapiro wilk test statistic
167 | for(i in 1:4){
168 |   for(j in 1:4){
169 |     hist(explore300[, 5, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5)  
170 |   }
171 | }
172 | mtext("Test statistic Shapiro-Wilk test (residuals)", side=3, line=0, outer=TRUE, cex=2)
173 | 
174 | 


--------------------------------------------------------------------------------
/simulations/impHack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Favorable Imputation: Simulation
 2 | 
 3 | #### Conditions ####
 4 | nobs <- c(30, 50, 100, 300)
 5 | missing <- c(0.05, 0.2)
 6 | howmany = c(3, 5, 10)
 7 | 
 8 | cond.impHack <- expand.grid(nobs, missing, howmany)
 9 | 
10 | #### Simulation ####
11 | simresults.impHack <- list()
12 | 
13 | simmultiple.impHack <- function(par, strategy){
14 |   data.frame(sim.impHack(nobs = par[1],
15 |                          missing = par[2],
16 |                          which = sample(1:10, size = par[3]),
17 |                          strategy = strategy,
18 |                          iter = 10000,
19 |                          alpha = 0.05))
20 |                             
21 |   
22 | }
23 | 
24 | simresults.impHack$firstsig <- apply(cond.impHack, 1, function(x) {
25 |   simmultiple.impHack(x, strategy = "firstsig")
26 | })
27 | 
28 | simresults.impHack$smallest <- apply(cond.impHack, 1, function(x) {
29 |   simmultiple.impHack(x, strategy = "smallest")
30 | })
31 | 
32 | simresults.impHack$smallestsig <- apply(cond.impHack, 1, function(x) {
33 |   simmultiple.impHack(x, strategy = "smallest.sig")
34 | })
35 | 
36 | save(simresults.impHack, file = "simulations/SIM_impHack.RData")
37 | 


--------------------------------------------------------------------------------
/simulations/multDVhack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Selective Reporting of the Dependent Variable: Simulation
 2 | 
 3 | #### Conditions ####
 4 | 
 5 | nobs.group <- c(30, 50, 100, 300)   # number of observations per group
 6 | nvar <- c(3, 5, 10)                 # number of dependent variables
 7 | r <- c(0, 0.3, 0.8)                 # correlation between dependent variables
 8 | 
 9 | cond.multDVhack <- expand.grid(nobs.group, nvar, r)
10 | 
11 | #### Simulation ####
12 | 
13 | simresults.multDVhack <- list()
14 | 
15 | simmultiple.multDVhack <- function(par, strategy){
16 |   data.frame(sim.multDVhack(nobs.group = par[1],
17 |                  nvar = par[2],
18 |                  r = par[3],
19 |                  strategy = strategy,
20 |                  iter = 10000,
21 |                  alternative = "two.sided",
22 |                  alpha = 0.05))
23 | }
24 | 
25 | simresults.multDVhack$firstsig <- apply(cond.multDVhack, 1, function(x) {
26 |   simmultiple.multDVhack(x, strategy = "firstsig")
27 |                                         })
28 | 
29 | simresults.multDVhack$smallest <- apply(cond.multDVhack, 1, function(x) {
30 |   simmultiple.multDVhack(x, strategy = "smallest")
31 | })
32 | 
33 | simresults.multDVhack$smallestsig <- apply(cond.multDVhack, 1, function(x) {
34 |   simmultiple.multDVhack(x, strategy = "smallest.sig")
35 | })
36 | 
37 | save(simresults.multDVhack, file = "simulations/SIM_multDVhack.RData")
38 | 
39 | 


--------------------------------------------------------------------------------
/simulations/multIVHack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Selective Reporting of the Independent Variable: Simulation
 2 | 
 3 | #### Conditions ####
 4 | 
 5 | nobs.group <- c(30, 50, 100, 300)
 6 | nvar <- c(3, 5, 10)
 7 | r <- c(0, 0.3, 0.8)
 8 | 
 9 | cond.multIVhack <- expand.grid(nobs.group, nvar, r)
10 | 
11 | #### Simulation t-Test ####
12 | simresults.multIVhack_ttest <- list()
13 | 
14 | simmultiple.multIVhack_ttest <- function(par, strategy){
15 |   data.frame(sim.multIVhack(nobs.group = par[1],
16 |                             nvar = par[2],
17 |                             r = par[3],
18 |                             regression = FALSE,
19 |                             strategy = strategy,
20 |                             iter = 10000,
21 |                             alternative = "two.sided",
22 |                             alpha = 0.05))
23 | }
24 | 
25 | simresults.multIVhack_ttest$firstsig <- apply(cond.multIVhack, 1, function(x) {
26 |   simmultiple.multIVhack_ttest(x, strategy = "firstsig")
27 | })
28 | 
29 | simresults.multIVhack_ttest$smallest <- apply(cond.multIVhack, 1, function(x) {
30 |   simmultiple.multIVhack_ttest(x, strategy = "smallest")
31 | })
32 | 
33 | simresults.multIVhack_ttest$smallestsig <- apply(cond.multIVhack, 1, function(x) {
34 |   simmultiple.multIVhack_ttest(x, strategy = "smallest.sig")
35 | })
36 | 
37 | save(simresults.multIVhack_ttest, file = "simulations/SIM_multIVhack_ttest.RData")
38 | 
39 | ### Simulation regression ####
40 | 
41 | simresults.multIVhack_reg <- list()
42 | 
43 | simmultiple.multIVhack_reg <- function(par, strategy){
44 |   data.frame(sim.multIVhack(nobs.group = par[1],
45 |                             nvar = par[2],
46 |                             r = par[3],
47 |                             regression = TRUE,
48 |                             strategy = strategy,
49 |                             iter = 10000,
50 |                             alternative = "two.sided",
51 |                             alpha = 0.05))
52 | }
53 | 
54 | simresults.multIVhack_reg$firstsig <- apply(cond.multIVhack, 1, function(x) {
55 |   simmultiple.multIVhack_reg(x, strategy = "firstsig")
56 | })
57 | 
58 | simresults.multIVhack_reg$smallest <- apply(cond.multIVhack, 1, function(x) {
59 |   simmultiple.multIVhack_reg(x, strategy = "smallest")
60 | })
61 | 
62 | simresults.multIVhack_reg$smallestsig <- apply(cond.multIVhack, 1, function(x) {
63 |   simmultiple.multIVhack_reg(x, strategy = "smallest.sig")
64 | })
65 | 
66 | save(simresults.multIVhack_reg, file = "simulations/SIM_multIVhack_reg.RData")
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/simulations/optstop_simulation.R:
--------------------------------------------------------------------------------
 1 | # Optional Stopping: Simulation
 2 | 
 3 | #### Change n.max ####
 4 | 
 5 | # Conditions
 6 | n.min <- 5
 7 | n.max <- c(30, 50, 100, 300)
 8 | step <- c(1, 5, 10, 50)
 9 | 
10 | cond.optstop_nmax <- expand.grid(n.max, step)
11 | 
12 | simresults.optstop_nmax <- list()
13 | 
14 | simmultiple.optstop_nmax <- function(par){
15 |   data.frame(sim.optstop(n.min = 5,
16 |                          n.max = par[1],
17 |                          step = par[2],
18 |                          alternative = "two.sided",
19 |                          iter = 10000,
20 |                          alpha = 0.05))
21 |   
22 | }
23 | 
24 | simresults.optstop_nmax <- apply(cond.optstop_nmax, 1, function(x) {
25 |   simmultiple.optstop_nmax(x)
26 | })
27 | 
28 | save(simresults.optstop_nmax, file = "simulations/SIM_optstop_nmax.RData")
29 | 
30 | #### Change n.min ####
31 | n.min <- c(5, 30, 50, 100)
32 | n.max <- 300
33 | step <- c(1, 5, 10, 50)
34 | 
35 | cond.optstop_nmin <- expand.grid(n.min, step)
36 | 
37 | #### Simulation ####
38 | 
39 | simresults.optstop_nmin <- list()
40 | 
41 | simmultiple.optstop_nmin <- function(par){
42 |   data.frame(sim.optstop(n.min = par[1],
43 |                          n.max = 300,
44 |                          step = par[2],
45 |                          alternative = "two.sided",
46 |                          iter = 10000,
47 |                          alpha = 0.05))
48 |   
49 | }
50 | 
51 | simresults.optstop_nmin <- apply(cond.optstop_nmin, 1, function(x) {
52 |   simmultiple.optstop_nmin(x)
53 | })
54 | 
55 | save(simresults.optstop_nmin, file = "simulations/SIM_optstop_nmin.RData")
56 | 


--------------------------------------------------------------------------------
/simulations/outHack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Outlier Exclusion: Simulation
 2 | 
 3 | #### Conditions ####
 4 | nobs <- c(30, 50, 100, 300)
 5 | howmany <- c(3, 5, 12)
 6 | 
 7 | cond.outHack <- expand.grid(nobs, howmany)
 8 | 
 9 | #### Simulation ####
10 | simresults.outHack <- list()
11 | 
12 | simmultiple.outHack <- function(par, strategy){
13 |   data.frame(sim.outHack(nobs = par[1],
14 |                          which = sample(1:12, size = par[2]),
15 |                          strategy = strategy,
16 |                          iter = 10000,
17 |                          alpha = 0.05))
18 | }
19 | 
20 | simresults.outHack$firstsig <- apply(cond.outHack, 1, function(x) {
21 |   simmultiple.outHack(x, strategy = "firstsig")
22 | })
23 | 
24 | simresults.outHack$smallest <- apply(cond.outHack, 1, function(x) {
25 |   simmultiple.outHack(x, strategy = "smallest")
26 | })
27 | 
28 | simresults.outHack$smallestsig <- apply(cond.outHack, 1, function(x) {
29 |   simmultiple.outHack(x, strategy = "smallest.sig")
30 | })
31 | 
32 | save(simresults.outHack, file = "simulations/SIM_outHack.RData")
33 | 
34 | 


--------------------------------------------------------------------------------
/simulations/plot_BFdist.R:
--------------------------------------------------------------------------------
  1 | # Compute Bayes factors from p-values
  2 | 
  3 | # Get simulation results
  4 | 
  5 | source("simulations/00_simulation_helpers.R")
  6 | lapply(paste0("simulations/", 
  7 |               dir("simulations")[grepl(dir("simulations"), pattern="SIM*")]),
  8 |        load,
  9 |        .GlobalEnv)
 10 | 
 11 | library(ggplot2)
 12 | library(dplyr)
 13 | library(BayesFactor)
 14 | 
 15 | # Conditions for multiple dependent variables
 16 | 
 17 | nobs.group <- c(30, 50, 100, 300)   # number of observations per group
 18 | nvar <- c(3, 5, 10)                 # number of dependent variables
 19 | r <- c(0, 0.3, 0.8)                 # correlation between dependent variables
 20 | 
 21 | cond.multDVhack <- expand.grid(nobs.group, nvar, r)
 22 | 
 23 | # ------------------------------------------------------------------------------
 24 | # Extract p-values and t-values and compute BFs ####
 25 | # ------------------------------------------------------------------------------
 26 | 
 27 | getBFs <- function(simdat, conddat, nobs, r, strategy){
 28 |   
 29 |   cond.plot <- which(conddat$Var1 == nobs & conddat$Var3 == r)
 30 |   
 31 |   # extract p-values and t-values
 32 |   ps.hack <- unlist(lapply(simdat[[strategy]][cond.plot], function(x) x$ps.hack))
 33 |   ts.hack <- qt(ps.hack/2, df=nobs-2)
 34 |   ps.nohack <- unlist(lapply(simdat[[strategy]][cond.plot], function(x) x$ps.orig))
 35 |   ts.nohack <- qt(ps.nohack/2, df=nobs-2)
 36 |   
 37 |   # compute BFs
 38 |   BFs.hack <- lapply(ts.hack, function(x) ttest.tstat(x, n1=nobs, n2=nobs, simple=TRUE))
 39 |   BFs.hack <- unname(simplify2array(BFs.hack))
 40 |   BFs.nohack <- lapply(ts.nohack, function(x) ttest.tstat(x, n1=nobs, n2=nobs, simple=TRUE))
 41 |   BFs.nohack <- unname(simplify2array(BFs.nohack))
 42 |   
 43 |   return(cbind(BFs.hack, BFs.nohack))
 44 | }
 45 | 
 46 | # Compute all BFs
 47 | 
 48 | BF50_firstsig <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=50, r=0, strategy="firstsig")
 49 | BF50_smallestsig <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=50, r=0, strategy="smallestsig")
 50 | BF50_smallest <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=50, r=0, strategy="smallest")
 51 | 
 52 | BF300_firstsig <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=300, r=0, strategy="firstsig")
 53 | BF300_smallestsig <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=300, r=0, strategy="smallestsig")
 54 | BF300_smallest <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=300, r=0, strategy="smallest")
 55 | 
 56 | 
 57 | # ------------------------------------------------------------------------------
 58 | # Plot the legend ####
 59 | # ------------------------------------------------------------------------------
 60 | 
 61 | plot.new()
 62 | legend(x = "topleft", legend = c("p-hacked", "original"), fill = c("#FFAE4A", "#5AB4BD"))
 63 | 
 64 | # ------------------------------------------------------------------------------
 65 | # BF plots ####
 66 | # ------------------------------------------------------------------------------
 67 | 
 68 | plotBFDist <- function(BFobj, conddat, nobs, r){
 69 |   
 70 |   cond.plot <- which(conddat$Var1 == nobs & conddat$Var3 == r)
 71 |   iter <- nrow(BFobj)
 72 |   nDV <- rep(conddat[cond.plot,]$Var2, each=iter)
 73 |   BFs.hack <- BFobj[,1]
 74 |   BFs.nohack <- BFobj[,2]
 75 |   
 76 |   plotdat <- data.frame(lbfs.hack=log(BFs.hack),
 77 |                         lbfs.nohack=log(BFs.nohack),
 78 |                         nDV=as.factor(nDV))
 79 |   
 80 |   newplotdat <- plotdat %>% # compute densities from ES
 81 |     group_by(nDV) %>%
 82 |     do(data.frame(loc.bfs = density(.$lbfs.hack)$x,
 83 |                   dens.bfs = density(.$lbfs.hack)$y / (2.2*max(density(.$lbfs.hack)$y)),
 84 |                   loc.bfs.nohack = density(.$lbfs.nohack)$x,
 85 |                   dens.bfs.nohack = -1*density(.$lbfs.nohack)$y/(2*max(density(.$lbfs.nohack)$y))))
 86 |   newplotdat$dens.bfs <- newplotdat$dens.bfs + as.numeric(newplotdat$nDV) # y-offset for different number of DVs
 87 |   newplotdat$dens.bfs.nohack <- newplotdat$dens.bfs.nohack + as.numeric(newplotdat$nDV)
 88 |   
 89 |   newplotdat$title <- paste0("N = ", nobs)
 90 |   
 91 |   ggplot(data=newplotdat, aes(group = nDV)) +
 92 |     geom_polygon(aes(y=dens.bfs, x=loc.bfs), fill = "#FFAE4A") +
 93 |     geom_polygon(aes(y=dens.bfs.nohack, x=loc.bfs.nohack), fill = "#5AB4BD") +
 94 |     labs(x = "Bayes factor",
 95 |          y = "Number of dependent variables") +
 96 |     theme_bw() +
 97 |     theme(text = element_text(size=35),
 98 |           axis.title = element_text(size=25),
 99 |           axis.text = element_text(size=25)) +
100 |     scale_y_continuous(breaks = c(1,2,3), labels = c("3", "5", "10")) +
101 |     scale_x_continuous(breaks = log(c(1/10, 1/3, 1, 3, 10)), labels = c("1/10", "1/3", "1", "3", "10")) +
102 |     coord_cartesian(xlim = log(c(1/20, 100))) +
103 |     facet_grid(. ~ title) +
104 |     geom_vline(xintercept = 0, linetype = "dashed", col = "grey", lwd = 1.5) 
105 |   
106 |   
107 | }
108 | 
109 | plotBFDist(BFobj=BF50_firstsig, conddat=cond.multDVhack, nobs=50, r=0)
110 | plotBFDist(BFobj=BF50_smallestsig, conddat=cond.multDVhack, nobs=50, r=0)
111 | plotBFDist(BFobj=BF50_smallest, conddat=cond.multDVhack, nobs=50, r=0)
112 | 
113 | plotBFDist(BFobj=BF300_firstsig, conddat=cond.multDVhack, nobs=300, r=0)
114 | plotBFDist(BFobj=BF300_smallestsig, conddat=cond.multDVhack, nobs=300, r=0)
115 | plotBFDist(BFobj=BF300_smallest, conddat=cond.multDVhack, nobs=300, r=0)
116 | 


--------------------------------------------------------------------------------
/simulations/plot_redefineSig.R:
--------------------------------------------------------------------------------
  1 | # Impact of redefining statistical significance
  2 | 
  3 | # Get simulation results
  4 | 
  5 | source("simulations/00_simulation_helpers.R")
  6 | lapply(paste0("simulations/", 
  7 |               dir("simulations")[grepl(dir("simulations"), pattern="SIM*")]),
  8 |        load,
  9 |        .GlobalEnv)
 10 | 
 11 | library(ggplot2)
 12 | 
 13 | # Function to calculate FP-rate only from smallest strategy 
 14 | 
 15 | findFPrateR <- function(simresult, alpha = 0.005){
 16 |   sapply(simresult$smallestsig,
 17 |          function(x) {sum(x$ps.hack < alpha) / nrow(x)})
 18 | }
 19 | 
 20 | # Find FP-rates
 21 | 
 22 | FP.multDV <- max(unname(findFPrate(simresults.multDVhack)))
 23 | FPR.multDV <- max(unname(findFPrateR(simresults.multDVhack)))
 24 | 
 25 | FP.multIV <- max(unname(findFPrate(simresults.multIVhack_reg)))
 26 | FPR.multIV <- max(unname(findFPrateR(simresults.multIVhack_reg)))
 27 | 
 28 | FP.optstop <- max(sapply(simresults.optstop_nmin,
 29 |        function(x) {sum(x$ps.hack < 0.05) / nrow(x)}))
 30 | 
 31 | # optional stopping requires re-simulating with alpha=0.005 because stopping
 32 | # was determined on reaching 0.05
 33 | # ----
 34 | n.min <- 5
 35 | n.max <- c(300) #Var1
 36 | step = c(1) #Var2
 37 | 
 38 | cond.optstop <- expand.grid(n.max, step)
 39 | 
 40 | simresults.optstopR <- list()
 41 | 
 42 | simmultiple.optstop <- function(par){
 43 |   data.frame(sim.optstop(n.min = 5,
 44 |                          n.max = par[1],
 45 |                          step = par[2],
 46 |                          alternative = "two.sided",
 47 |                          iter = 10000,
 48 |                          alpha = 0.005))
 49 |   
 50 | }
 51 | 
 52 | simresults.optstopR <- apply(cond.optstop, 1, function(x) {
 53 |   simmultiple.optstop(x)
 54 | })
 55 | save(simresults.optstopR, file = "simulations/SIM_optstop_Redefine.RData")
 56 | 
 57 | # ------
 58 | 
 59 | FPR.optstop <- max(sapply(simresults.optstopR,
 60 |                       function(x) {sum(x$ps.hack < 0.005) / nrow(x)}))
 61 | 
 62 | FP.outHack <- max(unname(findFPrate(simresults.outHack)))
 63 | FPR.outHack <- max(unname(findFPrateR(simresults.outHack)))
 64 | 
 65 | FP.covHack <- max(unname(findFPrate(simresults.covhack)))
 66 | FPR.covHack <- max(unname(findFPrateR(simresults.covhack)))
 67 | 
 68 | FP.compscoreHack <- max(unname(findFPrate(simresults.compscoreHack)))
 69 | FPR.compscoreHack <- max(unname(findFPrateR(simresults.compscoreHack)))
 70 | 
 71 | FP.varTransHack <- max(unname(findFPrate(simresults.varTransHack_nonormtest)))
 72 | FPR.varTransHack <- max(unname(findFPrateR(simresults.varTransHack_nonormtest)))
 73 | 
 74 | FP.cutoffHack <- max(unname(findFPrate(simresults.cutoffHack)))
 75 | FPR.cutoffHack <- max(unname(findFPrateR(simresults.cutoffHack)))
 76 | 
 77 | FP.statAnalysisHack <- max(unname(findFPrate(simresults.statAnalysisHack)))
 78 | FPR.statAnalysisHack <- max(unname(findFPrateR(simresults.statAnalysisHack)))
 79 | 
 80 | FP.impHack <- max(unname(findFPrate(simresults.impHack)))
 81 | FPR.impHack <- max(unname(findFPrateR(simresults.impHack)))
 82 | 
 83 | FP.subgroupHack <- max(unname(findFPrate(simresults.subgroupHack)))
 84 | FPR.subgroupHack <- max(unname(findFPrateR(simresults.subgroupHack)))
 85 | 
 86 | FPregular <- c(FP.multDV, FP.multIV, FP.optstop, FP.outHack, FP.covHack,
 87 |                FP.compscoreHack, FP.varTransHack, FP.cutoffHack, 
 88 |                FP.statAnalysisHack, FP.impHack, FP.subgroupHack)
 89 | 
 90 | FPredefined <- c(FPR.multDV, FPR.multIV, FPR.optstop, FPR.outHack, FPR.covHack,
 91 |                  FPR.compscoreHack, FPR.varTransHack, FPR.cutoffHack, 
 92 |                  FPR.statAnalysisHack, FPR.impHack, FPR.subgroupHack)
 93 | 
 94 | FPregularByTen <- FPregular/10
 95 | 
 96 | plotdat <- data.frame(FP.rate = c(FPregular, FPredefined, FPregularByTen),
 97 |                       whichFP = rep(c("0.05", "0.005", "byTen"), each=length(FPregular)),
 98 |                       strategy = rep(1:length(FPregular), 3),
 99 |                       linetype = rep(c(1,1,2), each=length(FPregular)),
100 |                       linecolor = rep(c(1,2,1), each=length(FPregular)))
101 | 
102 | ggplot(plotdat, aes(x = strategy, 
103 |                     y=FP.rate, 
104 |                     group=as.factor(whichFP), 
105 |                     linetype = as.factor(linetype),
106 |                     colour = as.factor(whichFP))) +
107 |   geom_hline(yintercept = 0.05, col = "grey") +
108 |   geom_hline(yintercept = 0.005, col = "grey") +
109 |   geom_line(size=1) +
110 |   scale_x_continuous(breaks=c(1:11),
111 |                      labels=c("Selective reporting DV", "Selective reporting IV",
112 |                               "Optional Stopping", "Outlier exclusion", 
113 |                               "Controlling covariates", "Scale redefinition",
114 |                               "Variable transformation", "Discretizing variables",
115 |                               "Alt. hypothesis tests", "Favorable imputation",
116 |                               "Inclusion criteria")) +
117 |   labs(x = "",
118 |        y = "Highest false positive rate") +
119 |   theme_classic() +
120 |   theme(axis.text.x = element_text(angle = 45, hjust = 1),
121 |         text = element_text(size = 20),
122 |         axis.title = element_text(size=20, colour = "grey30"),
123 |         legend.position = "none",
124 |         plot.margin = unit(c(10,0,0,40), unit="pt")) +
125 |   scale_color_manual(values= c("#009975", "black", "grey"))
126 | 
127 | plot.new()
128 | legend(x = "topleft", legend = c("p-hacked: p < 0.05", "p-hacked: p < 0.005"), col = c("black", "#009975"), lty = "solid", lwd = 3)
129 | 


--------------------------------------------------------------------------------
/simulations/roundHack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Incorrect Rounding: Simulation
 2 | 
 3 | #### Conditions ####
 4 | roundinglevel <- c(0.051, 0.055, 0.1)
 5 | 
 6 | cond.roundHack <- expand.grid(roundinglevel)
 7 | 
 8 | #### Simulation ####
 9 | 
10 | simmultiple.roundHack <- function(par, strategy){
11 |   data.frame(sim.roundhack(roundinglevel = par[1],
12 |                          iter = 10000,
13 |                          alpha = 0.05))
14 |   
15 |   
16 | }
17 | 
18 | simresults.roundHack <- apply(cond.roundHack, 1, function(x) {
19 |   simmultiple.roundHack(x)
20 | })
21 | 
22 | 


--------------------------------------------------------------------------------
/simulations/statAnalysisHack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Exploit Statistical Analysis Options: Simulation
 2 | 
 3 | #### Conditions ####
 4 | nobs.group <- c(30, 50, 100, 300)
 5 | 
 6 | cond.statAnalysisHack <- expand.grid(nobs.group)
 7 | 
 8 | #### Simulation ####
 9 | simresults.statAnalysisHack <- list()
10 | 
11 | simmultiple.statAnalysisHack <- function(par, strategy){
12 |   data.frame(sim.statAnalysisHack(nobs.group = par[1],
13 |                                   strategy = strategy,
14 |                                   iter = 10000,
15 |                                   alternative = "two.sided",
16 |                                   alpha = 0.05
17 |                             ))
18 | }
19 | 
20 | simresults.statAnalysisHack$firstsig <- apply(cond.statAnalysisHack, 1, function(x) {
21 |   simmultiple.statAnalysisHack(x, strategy = "firstsig")
22 | })
23 | 
24 | simresults.statAnalysisHack$smallest <- apply(cond.statAnalysisHack, 1, function(x) {
25 |   simmultiple.statAnalysisHack(x, strategy = "smallest")
26 | })
27 | 
28 | simresults.statAnalysisHack$smallestsig <- apply(cond.statAnalysisHack, 1, function(x) {
29 |   simmultiple.statAnalysisHack(x, strategy = "smallest.sig")
30 | })
31 | 
32 | save(simresults.statAnalysisHack, file = "simulations/SIM_statAnalysisHack.RData")
33 | 


--------------------------------------------------------------------------------
/simulations/subgroupHack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Subgroup Analysis: Simulation
 2 | 
 3 | #### Conditions ####
 4 | nobs.group <-  c(30, 50, 100, 300)
 5 | nsubvars <- c(1, 3, 5)
 6 | 
 7 | cond.subgroupHack <- expand.grid(nobs.group, nsubvars)
 8 | 
 9 | #### Simulation ####
10 | simresults.subgroupHack <- list()
11 | 
12 | simmultiple.subgroupHack <- function(par, strategy){
13 |   data.frame(sim.subgroupHack(nobs.group = par[1],
14 |                               nsubvars = par[2],
15 |                                   strategy = strategy,
16 |                                   iter = 10000,
17 |                                   alternative = "two.sided",
18 |                                   alpha = 0.05
19 |   ))
20 | }
21 | 
22 | simresults.subgroupHack$firstsig <- apply(cond.subgroupHack, 1, function(x) {
23 |   simmultiple.subgroupHack(x, strategy = "firstsig")
24 | })
25 | 
26 | simresults.subgroupHack$smallest <- apply(cond.subgroupHack, 1, function(x) {
27 |   simmultiple.subgroupHack(x, strategy = "smallest")
28 | })
29 | 
30 | simresults.subgroupHack$smallestsig <- apply(cond.subgroupHack, 1, function(x) {
31 |   simmultiple.subgroupHack(x, strategy = "smallest.sig")
32 | })
33 | 
34 | save(simresults.subgroupHack, file = "simulations/SIM_subgroupHack.RData")
35 | 
36 | 


--------------------------------------------------------------------------------
/simulations/varTransHack_simulation.R:
--------------------------------------------------------------------------------
 1 | # Variable Transformation: Simulation
 2 | 
 3 | #### Conditions ####
 4 | nobs <- c(30, 50, 100, 300)
 5 | transvar <- c(1:3)
 6 | 
 7 | cond.varTransHack <- expand.grid(nobs, transvar)
 8 | 
 9 | #### Simulation without tests of normality of residuals ####
10 | simresults.varTransHack_nonormtest <- list()
11 | 
12 | simmultiple.varTransHack <- function(par, strategy){
13 |   data.frame(sim.varTransHack(nobs = par[1],
14 |                               transvar = switch(par[2],
15 |                                                 "x" = 1,
16 |                                                 "y" = 2,
17 |                                                 "xy" = 3),
18 |                               testnorm = FALSE,
19 |                               strategy = strategy,
20 |                               iter = 10000,
21 |                               alpha = 0.05
22 |   ))
23 | }
24 | 
25 | simresults.varTransHack_nonormtest$firstsig <- apply(cond.varTransHack, 1, function(x) {
26 |   simmultiple.varTransHack(x, strategy = "firstsig")
27 | })
28 | 
29 | simresults.varTransHack_nonormtest$smallest <- apply(cond.varTransHack, 1, function(x) {
30 |   simmultiple.varTransHack(x, strategy = "smallest")
31 | })
32 | 
33 | simresults.varTransHack_nonormtest$smallestsig <- apply(cond.varTransHack, 1, function(x) {
34 |   simmultiple.varTransHack(x, strategy = "smallest.sig")
35 | })
36 | 
37 | save(simresults.varTransHack_nonormtest, file = "simulations/SIM_varTransHack_nonormtest.RData")
38 | 
39 | #### Simulation with tests of normality of residuals ####
40 | 
41 | simresults.varTransHack_normtest <- list()
42 | 
43 | simmultiple.varTransHack <- function(par, strategy){
44 |   data.frame(sim.varTransHack(nobs = par[1],
45 |                               transvar = switch(par[2],
46 |                                                 "x" = 1,
47 |                                                 "y" = 2,
48 |                                                 "xy" = 3),
49 |                               testnorm = TRUE,
50 |                               strategy = strategy,
51 |                               iter = 10000,
52 |                               alpha = 0.05
53 |   ))
54 | }
55 | 
56 | simresults.varTransHack_normtest$firstsig <- apply(cond.varTransHack, 1, function(x) {
57 |   simmultiple.varTransHack(x, strategy = "firstsig")
58 | })
59 | 
60 | simresults.varTransHack_normtest$smallest <- apply(cond.varTransHack, 1, function(x) {
61 |   simmultiple.varTransHack(x, strategy = "smallest")
62 | })
63 | 
64 | simresults.varTransHack_normtest$smallestsig <- apply(cond.varTransHack, 1, function(x) {
65 |   simmultiple.varTransHack(x, strategy = "smallest.sig")
66 | })
67 | 
68 | save(simresults.varTransHack_normtest, file = "simulations/SIM_varTransHack_normtest.RData")
69 | 
70 | 


--------------------------------------------------------------------------------