├── .gitignore ├── LICENSE ├── README.md ├── phackR ├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R │ ├── combinedStrategies_regression.R │ ├── combinedStrategies_ttest.R │ ├── compositeScores.R │ ├── exploitCovariates.R │ ├── exploitCutoffs.R │ ├── favorableImputation.R │ ├── helpers.R │ ├── incorrectRounding.R │ ├── optionalStopping.R │ ├── outlierExclusion.R │ ├── plotsShiny.R │ ├── runShinyPHack.R │ ├── selectiveReportingDV.R │ ├── selectiveReportingIV.R │ ├── statAnalysis.R │ ├── subgroupAnalysis.R │ └── variableTransformation.R ├── doc │ ├── phackR_vignette.R │ ├── phackR_vignette.Rmd │ └── phackR_vignette.html ├── inst │ ├── shiny-phack │ │ └── ShinyPHack │ │ │ ├── data │ │ │ └── startplots.rds │ │ │ ├── mddoc │ │ │ ├── 01_CompScores.md │ │ │ ├── 02_ExploitCovariates.md │ │ │ ├── 03_ExploitCutoffs.md │ │ │ ├── 04_FavorableImputation.md │ │ │ ├── 05_IncorrectRounding.md │ │ │ ├── 06_OptionalStopping.md │ │ │ ├── 07_OutlierExclusion.md │ │ │ ├── 09_SelectiveReportingDV.md │ │ │ ├── 10_SelectiveReportingIV.md │ │ │ ├── 11_StatAnalysis.md │ │ │ ├── 12_SubgroupAnalysis.md │ │ │ ├── 13_VariableTransformation.md │ │ │ └── landingPage.md │ │ │ ├── server.R │ │ │ └── ui.R │ └── sim_startplots_Shiny.R ├── man │ ├── dot-compCohensD.Rd │ ├── dot-compR2t.Rd │ ├── dot-compscoreHack.Rd │ ├── dot-covhack.Rd │ ├── dot-cutoffHack.Rd │ ├── dot-easyimpute.Rd │ ├── dot-estimate_mode.Rd │ ├── dot-extractoutlier.Rd │ ├── dot-impHack.Rd │ ├── dot-multDVhack.Rd │ ├── dot-multIVhack_reg.Rd │ ├── dot-multIVhack_ttest.Rd │ ├── dot-optstop.Rd │ ├── dot-out.boxplot.Rd │ ├── dot-out.cook.Rd │ ├── dot-out.covratio.Rd │ ├── dot-out.dfbeta.Rd │ ├── dot-out.dffits.Rd │ ├── dot-out.leverage.Rd │ ├── dot-out.mahalanobis.Rd │ ├── dot-out.percentrule.Rd │ ├── dot-out.residual.Rd │ ├── dot-out.sdrule.Rd │ ├── dot-out.stemleaf.Rd │ ├── dot-outHack.Rd │ ├── dot-roundhack.Rd │ ├── dot-selectpvalue.Rd │ ├── dot-sim.compscore.Rd │ ├── dot-sim.covariates.Rd │ ├── dot-sim.data.Rd │ ├── dot-sim.multDV.Rd │ ├── dot-sim.multIV.Rd │ ├── dot-sim.multcor.Rd │ ├── dot-sim.subgroup.Rd │ ├── dot-statAnalysisHack.Rd │ ├── dot-subgroupHack.Rd │ ├── dot-varTransHack.Rd │ ├── esplots.Rd │ ├── pplots.Rd │ ├── runShinyPHack.Rd │ ├── sim.compscoreHack.Rd │ ├── sim.covhack.Rd │ ├── sim.cutoffHack.Rd │ ├── sim.impHack.Rd │ ├── sim.multDVhack.Rd │ ├── sim.multIVhack.Rd │ ├── sim.optstop.Rd │ ├── sim.outHack.Rd │ ├── sim.roundhack.Rd │ ├── sim.statAnalysisHack.Rd │ ├── sim.subgroupHack.Rd │ └── sim.varTransHack.Rd ├── phackR.Rproj ├── tests │ ├── testthat.R │ └── testthat │ │ └── test-simfunctions.R └── vignettes │ ├── .gitignore │ └── phackR_vignette.Rmd ├── phacking_compendium.Rproj └── simulations ├── 00_simulation_helpers.R ├── combinedStrategies_simulation.R ├── compscoreHack_simulation.R ├── covhack_simulation.R ├── cutoffHack_simulation.R ├── exploreNormality.R ├── impHack_simulation.R ├── multDVhack_simulation.R ├── multIVHack_simulation.R ├── optstop_simulation.R ├── outHack_simulation.R ├── plot_BFdist.R ├── plot_ESDist.R ├── plot_FPcombined.R ├── plot_pCurve.R ├── plot_redefineSig.R ├── plots_FPrate.R ├── roundHack_simulation.R ├── statAnalysisHack_simulation.R ├── subgroupHack_simulation.R └── varTransHack_simulation.R /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | *.RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 21 | .httr-oauth 22 | 23 | # knitr and R markdown default cache directories 24 | /*_cache/ 25 | /cache/ 26 | 27 | # Temporary files created by R markdown 28 | *.utf8.md 29 | *.knit.md 30 | 31 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html 32 | rsconnect/ 33 | .Rproj.user 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Angelika Stefan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The p-Hacking Compendium: Simulating Different p-Hacking Strategies 2 | 3 | ## Project Description 4 | This project contains an R-package with code to simulate and investigate the effects of different p-hacking strategies. It has the following components: 5 | * Functions to simulate 12 different p-hacking strategies 6 | * A Shiny app to investigate the effects of p-hacking on the distribution of p-values, the rate of false positive results, and the distribution of effect sizes 7 | * Code to reproduce simulation results conducted in our upcoming preprint, as well as plots 8 | 9 | ## Installation 10 | The phackR package is not on CRAN, but you can install it from GitHub: 11 | 12 | ``` 13 | library(devtools) 14 | install_github("astefan1/phacking_compendium/phackR", build_vignettes = TRUE) 15 | ``` 16 | 17 | ## Package Description 18 | To get an overview of the structure of the code and the simulation functions in the package, read the package vignette: 19 | 20 | ``` 21 | library(phackR) 22 | utils::vignette("phackR_vignette", "phackR") 23 | ``` 24 | 25 | ## Shiny App 26 | You can start the Shiny app directly from the package by using the following code: 27 | 28 | ``` 29 | phackR::runShinyPHack() 30 | ``` 31 | 32 | Alternatively, you can directly access the Shiny app online via [https://shiny.psy.lmu.de/felix/ShinyPHack/](https://shiny.psy.lmu.de/felix/ShinyPHack/) 33 | 34 | ## Simulation Results 35 | All simulation results can be reproduced using the code in the [_/simulations_ folder of this Github project](https://github.com/astefan1/phacking_compendium/tree/master/simulations). First, follow the steps above to install the phackR package. Then, run the script "00_simulation_helpers.R", followed by all R scripts with the "\_simulation.R" suffix. Results can be visualized using the scripts with the prefix "plot\_". 36 | 37 | 38 | ``` 39 | > sessionInfo() 40 | R version 4.2.1 (2022-06-23) 41 | Platform: x86_64-apple-darwin17.0 (64-bit) 42 | Running under: macOS Big Sur 11.6.8 43 | 44 | Matrix products: default 45 | LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib 46 | 47 | locale: 48 | [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 49 | 50 | attached base packages: 51 | [1] stats graphics grDevices utils datasets methods base 52 | 53 | other attached packages: 54 | [1] BayesFactor_0.9.12-4.4 Matrix_1.5-1 coda_0.19-4 55 | [4] phackR_0.0.0.9000 dplyr_1.0.10 ggforce_0.4.1 56 | [7] R.devices_2.17.1 wesanderson_0.3.6 ggplot2_3.3.6 57 | [10] testthat_3.1.5 58 | 59 | loaded via a namespace (and not attached): 60 | [1] fs_1.5.2 usethis_2.1.6 devtools_2.4.5 insight_0.18.6 61 | [5] rprojroot_2.0.3 tools_4.2.1 profvis_0.3.7 backports_1.4.1 62 | [9] utf8_1.2.2 R6_2.5.1 colorspace_2.0-3 urlchecker_1.0.1 63 | [13] withr_2.5.0 tidyselect_1.1.2 prettyunits_1.1.1 processx_3.7.0 64 | [17] compiler_4.2.1 sgeostat_1.0-27 performance_0.10.0 cli_3.4.1 65 | [21] mice_3.14.0 desc_1.4.2 labeling_0.4.2 scales_1.2.1 66 | [25] DEoptimR_1.0-11 mvtnorm_1.1-3 robustbase_0.95-0 mc2d_0.1-21 67 | [29] callr_3.7.2 pbapply_1.5-0 stringr_1.4.1 digest_0.6.29 68 | [33] rmarkdown_2.17 R.utils_2.12.0 base64enc_0.1-3 WRS2_1.1-4 69 | [37] pkgconfig_2.0.3 htmltools_0.5.3 sessioninfo_1.2.2 fastmap_1.1.0 70 | [41] htmlwidgets_1.5.4 rlang_1.0.6 rstudioapi_0.14 shiny_1.7.2 71 | [45] generics_0.1.3 farver_2.1.1 car_3.1-1 R.oo_1.25.0 72 | [49] magrittr_2.0.3 Rcpp_1.0.9 munsell_0.5.0 fansi_1.0.3 73 | [53] abind_1.4-5 lifecycle_1.0.3 R.methodsS3_1.8.2 yaml_2.3.5 74 | [57] stringi_1.7.8 carData_3.0-5 MASS_7.3-57 brio_1.1.3 75 | [61] pkgbuild_1.3.1 plyr_1.8.7 grid_4.2.1 parallel_4.2.1 76 | [65] promises_1.2.0.1 shinydashboard_0.7.2 forcats_0.5.2 crayon_1.5.2 77 | [69] miniUI_0.1.1.1 lattice_0.20-45 knitr_1.40 aplpack_1.3.5 78 | [73] ps_1.7.1 pillar_1.8.1 tcltk_4.2.1 pkgload_1.3.1 79 | [77] glue_1.6.2 evaluate_0.17 remotes_2.4.2 vctrs_0.4.2 80 | [81] tweenr_2.0.2 httpuv_1.6.6 MatrixModels_0.5-1 gtable_0.3.1 81 | [85] purrr_0.3.5 polyclip_1.10-4 tidyr_1.2.1 reshape_0.8.9 82 | [89] cachem_1.0.6 xfun_0.33 mime_0.12 xtable_1.8-4 83 | [93] broom_1.0.1 later_1.3.0 tibble_3.1.8 memoise_2.0.1 84 | [97] mvoutlier_2.1.1 ellipsis_0.3.2 85 | ``` 86 | 87 | 88 | -------------------------------------------------------------------------------- /phackR/.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^doc$ 4 | ^Meta$ 5 | -------------------------------------------------------------------------------- /phackR/.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | Meta 5 | /doc/ 6 | /Meta/ 7 | -------------------------------------------------------------------------------- /phackR/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: phackR 2 | Title: Simulate p-Hacking 3 | Version: 0.0.0.9000 4 | Authors@R: person("Angelika M.", "Stefan", email = "a.m.stefan@uva.nl", role = c("aut", "cre")) 5 | Description: Many different questionable research practices have been described in the literature. This package contains functions to simulate different sorts p-hacking and allows to analyze their impact on the distributions of p-values. 6 | Depends: R (>= 3.6.0) 7 | Imports: aplpack, R.devices, car, mvoutlier, dplyr, performance, WRS2, mice, rlang, shiny, pbapply, tidyr, ggplot2, shinydashboard, magrittr, grid, Matrix 8 | License: MIT + file LICENSE 9 | Encoding: UTF-8 10 | LazyData: true 11 | Suggests: 12 | testthat, 13 | knitr, 14 | rmarkdown 15 | RoxygenNote: 7.2.1 16 | VignetteBuilder: knitr 17 | -------------------------------------------------------------------------------- /phackR/LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2019 2 | COPYRIGHT HOLDER: Angelika Stefan, Felix Schönbrodt -------------------------------------------------------------------------------- /phackR/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(runShinyPHack) 4 | export(sim.compscoreHack) 5 | export(sim.covhack) 6 | export(sim.cutoffHack) 7 | export(sim.impHack) 8 | export(sim.multDVhack) 9 | export(sim.multIVhack) 10 | export(sim.optstop) 11 | export(sim.outHack) 12 | export(sim.roundhack) 13 | export(sim.statAnalysisHack) 14 | export(sim.subgroupHack) 15 | export(sim.varTransHack) 16 | importFrom(R.devices,suppressGraphics) 17 | importFrom(WRS2,yuen) 18 | importFrom(aplpack,stem.leaf) 19 | importFrom(car,Anova) 20 | importFrom(dplyr,"%>%") 21 | importFrom(dplyr,all_of) 22 | importFrom(dplyr,do) 23 | importFrom(dplyr,group_by_at) 24 | importFrom(dplyr,mutate) 25 | importFrom(ggplot2,aes) 26 | importFrom(ggplot2,annotation_custom) 27 | importFrom(ggplot2,coord_cartesian) 28 | importFrom(ggplot2,element_text) 29 | importFrom(ggplot2,geom_col) 30 | importFrom(ggplot2,geom_histogram) 31 | importFrom(ggplot2,geom_segment) 32 | importFrom(ggplot2,geom_vline) 33 | importFrom(ggplot2,ggplot) 34 | importFrom(ggplot2,ggtitle) 35 | importFrom(ggplot2,layer_scales) 36 | importFrom(ggplot2,scale_fill_manual) 37 | importFrom(ggplot2,scale_x_continuous) 38 | importFrom(ggplot2,scale_y_continuous) 39 | importFrom(ggplot2,theme) 40 | importFrom(ggplot2,theme_light) 41 | importFrom(ggplot2,waiver) 42 | importFrom(ggplot2,xlab) 43 | importFrom(ggplot2,ylab) 44 | importFrom(graphics,boxplot) 45 | importFrom(grid,gpar) 46 | importFrom(grid,grobTree) 47 | importFrom(grid,textGrob) 48 | importFrom(magrittr,"%$%") 49 | importFrom(mice,complete) 50 | importFrom(mvoutlier,uni.plot) 51 | importFrom(pbapply,pblapply) 52 | importFrom(performance,item_reliability) 53 | importFrom(rlang,.data) 54 | importFrom(shiny,incProgress) 55 | importFrom(shiny,withProgress) 56 | importFrom(stats,aov) 57 | importFrom(stats,as.formula) 58 | importFrom(stats,cooks.distance) 59 | importFrom(stats,cor) 60 | importFrom(stats,density) 61 | importFrom(stats,dfbeta) 62 | importFrom(stats,dffits) 63 | importFrom(stats,hatvalues) 64 | importFrom(stats,influence.measures) 65 | importFrom(stats,lm) 66 | importFrom(stats,median) 67 | importFrom(stats,qf) 68 | importFrom(stats,quantile) 69 | importFrom(stats,rnorm) 70 | importFrom(stats,rstandard) 71 | importFrom(stats,rstudent) 72 | importFrom(stats,sd) 73 | importFrom(stats,t.test) 74 | importFrom(stats,wilcox.test) 75 | importFrom(utils,capture.output) 76 | importFrom(utils,tail) 77 | -------------------------------------------------------------------------------- /phackR/R/combinedStrategies_regression.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Impact of combined p-hacking strategies: t-test Example 3 | # ============================================================================== 4 | 5 | .sim.combined.reg <- function(nobs = 100, missing = 0.1, ncompv = 5, rcomp = 0.75){ 6 | 7 | # Sample dependent variable, variables in the score, compute score 8 | DV <- rnorm(nobs, 0, 1) 9 | 10 | SCOREVAR <- .sim.multcor(nobs = nobs, 11 | nvar = ncompv, 12 | r = rcomp) 13 | SCORE <- rowMeans(SCOREVAR) 14 | 15 | # Introduce missing values in score 16 | missingSCORE <- sample(c(TRUE, FALSE), 17 | size = nobs, 18 | prob = c(missing, 1-missing), 19 | replace = TRUE) 20 | SCORE[missingSCORE] <- NA 21 | 22 | # Introduce missing values in DV 23 | missingDV <- sample(missingSCORE, 24 | length(missingSCORE)) 25 | DV[missingDV] <- NA 26 | 27 | # Create a missing value on a score variable if value in score is missing 28 | whichmissing <- matrix(c(which(missingSCORE == TRUE), 29 | sample(1:ncompv, 30 | size = sum(missingSCORE), 31 | replace = TRUE)), 32 | ncol=2, 33 | byrow=FALSE) 34 | SCOREVAR[whichmissing] <- NA 35 | 36 | # Bind them all together 37 | DAT.FULL <- cbind(DV, SCOREVAR, SCORE) 38 | 39 | return(DAT.FULL) 40 | 41 | } 42 | 43 | .combined.reg.hack <- function(df, roundinglevel = 0.051, nImpMethods = 5, transvar = "xy", ndelete = 3, nOutMethods = 3, strategy = "firstsig", alpha = 0.05){ 44 | 45 | ####################### (1) Original p-value ################### 46 | 47 | modres <- summary(lm(df$DV ~ df$SCORE)) 48 | p.orig <- modres$coefficients[2, 4] 49 | r2.orig <- modres$r.squared 50 | 51 | # If original p-value is significant stop and return original p-value 52 | if(p.orig <= alpha) return(list(p.final = p.orig, 53 | p.orig = p.orig, 54 | r2.final = r2.orig, 55 | r2.orig = r2.orig, 56 | stage = 1)) 57 | 58 | # If original p-value is smaller than rounding level stop and return alpha as p 59 | if(p.orig < roundinglevel) return(list(p.final = alpha, 60 | p.orig = p.orig, 61 | r2.final = r2.orig, 62 | r2.orig = r2.orig, 63 | stage = 1.5)) 64 | 65 | ####################### (2) Favorable Imputation ##################### 66 | 67 | # Apply imputation methods (random selection) 68 | impMethods <- sample(c(1:10), nImpMethods) 69 | impres <- .impHack(df, 70 | x = 7, 71 | y = 1, 72 | which = impMethods, 73 | strategy = strategy, 74 | alpha = roundinglevel) 75 | 76 | # If p-value is significant stop and return 77 | if(impres$p.final < roundinglevel) return(list(p.final = impres$p.final, 78 | p.orig = p.orig, 79 | r2.final = impres$r2.final, 80 | r2.orig = r2.orig, 81 | stage = 2)) 82 | 83 | ###################### (3) Variable transformation ################# 84 | 85 | # Apply variable transformation (omit NA) 86 | transres <- .varTransHack(df[-which(is.na(df$DV) | is.na(df$SCORE)), ], 87 | x = 7, 88 | y = 1, 89 | transvar = "xy", 90 | strategy = "firstsig", 91 | alpha = roundinglevel) 92 | 93 | # If p-value is significant, stop and return 94 | if(transres$p.final < roundinglevel) return(list(p.final = transres$p.final, 95 | p.orig = p.orig, 96 | r2.final = transres$r2.final, 97 | r2.orig = r2.orig, 98 | stage = 3)) 99 | 100 | ##################### (4) Scale redefinition ########################## 101 | 102 | # Scale redefinition 103 | rescaleRes <- .compscoreHack(df[-which(is.na(df$DV) | is.na(df$SCORE)), ], 104 | dv = 1, 105 | compv = c(2:6), 106 | ndelete = ndelete, 107 | strategy = strategy, 108 | alpha = roundinglevel) 109 | 110 | # If p-value is significant, stop and return 111 | if(rescaleRes$p.final < roundinglevel) return(list(p.final = rescaleRes$p.final, 112 | p.orig = p.orig, 113 | r2.final = rescaleRes$r2.final, 114 | r2.orig = r2.orig, 115 | stage = 4)) 116 | 117 | ##################### (5) Outlier exclusion ############################# 118 | 119 | # Exclude outliers 120 | outMethods <- sample(c(1:12), nOutMethods) 121 | outlierRes <- .outHack(df[-which(is.na(df$DV) | is.na(df$SCORE)), ], 122 | x = 7, 123 | y = 1, 124 | which = outMethods, 125 | strategy = strategy, 126 | alpha = roundinglevel) 127 | 128 | # If p-value is significant, stop and return, else return original p-value 129 | if(outlierRes$p.final < roundinglevel){ 130 | return(list(p.final = outlierRes$p.final, 131 | p.orig = p.orig, 132 | r2.final = outlierRes$r2.final, 133 | r2.orig = r2.orig, 134 | stage = 5)) 135 | } else { 136 | return(list(p.final = p.orig, 137 | p.orig = p.orig, 138 | r2.final = r2.orig, 139 | r2.orig = r2.orig, 140 | stage = 6)) 141 | } 142 | 143 | } 144 | 145 | sim.combined.reg <- function(nobs = 100, missing = 0.1, ncompv = 5, rcomp = 0.75, roundinglevel = 0.051, nImpMethods = 5, transvar = "xy", ndelete = 3, nOutMethods = 3, strategy = "firstsig", alpha = 0.05, iter = 1000){ 146 | 147 | # Simulate as many datasets as desired iterations 148 | dat <- list() 149 | for(i in 1:iter){ 150 | dat[[i]] <- .sim.combined.reg(nobs = nobs, 151 | missing = missing, 152 | ncompv = ncompv, 153 | rcomp = rcomp) 154 | } 155 | 156 | # Apply p-hacking procedure to each dataset 157 | .combined.reg.hack.list <- function(x){ 158 | .combined.reg.hack(df = x, 159 | roundinglevel = roundinglevel, 160 | nImpMethods = nImpMethods, 161 | transvar = transvar, 162 | ndelete = ndelete, 163 | nOutMethods = nOutMethods, 164 | strategy = strategy, 165 | alpha = alpha) 166 | } 167 | 168 | # Apply p-hacking procedure to each dataset 169 | res <- pbapply::pblapply(dat, .combined.reg.hack.list) 170 | 171 | ps.hack <- NULL 172 | ps.orig <- NULL 173 | r2s.hack <- NULL 174 | r2s.orig <- NULL 175 | stage <- NULL 176 | 177 | for(i in 1:iter){ 178 | ps.hack[i] <- res[[i]][["p.final"]] 179 | ps.orig[i] <- res[[i]][["p.orig"]] 180 | r2s.hack[i] <- res[[i]][["r2.final"]] 181 | r2s.orig[i] <- res[[i]][["r2.orig"]] 182 | stage[i] <- res[[i]][["stage"]] 183 | } 184 | 185 | res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, stage) 186 | 187 | return(res) 188 | 189 | } -------------------------------------------------------------------------------- /phackR/R/compositeScores.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Scale Redefinition / Composite Scores 3 | # ============================================================================== 4 | 5 | #' Simulate data: Correlated composite score raw variables and one non-correlated dependent variable 6 | #' @param nobs Integer giving number of observations 7 | #' @param ncompv Integer giving number of variables to build the composite score 8 | #' @param rcomp Correlation between the composite score variables 9 | 10 | .sim.compscore <- function(nobs, ncompv, rcomp){ 11 | 12 | dv <- rnorm(nobs, 0, 1) 13 | 14 | iv <- .sim.multcor(nobs = nobs, nvar = ncompv, r = rcomp) 15 | 16 | res <- cbind(dv, iv) 17 | 18 | return(res) 19 | 20 | } 21 | 22 | #' P-Hacking function for scale redefinition / Composite Scores 23 | #' @param df Data frame containing dependent variable and composite score items as columns 24 | #' @param dv Location of dependent variable in the data frame 25 | #' @param compv Location of composite score variables in the data frame 26 | #' @param ndelete How many items should be deleted from the scale at maximum? 27 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 28 | #' @param alpha Significance level of the t-test (default: 0.05) 29 | #' @importFrom stats lm 30 | #' @importFrom performance item_reliability 31 | 32 | .compscoreHack <- function(df, dv, compv, ndelete, strategy = "firstsig", alpha = 0.05){ 33 | 34 | stopifnot(length(compv)-ndelete >= 2) 35 | 36 | # Compute original p-value and R^2 with full scale 37 | modres <- summary(lm(df[, dv] ~ rowMeans(df[, compv]))) 38 | p.orig <- modres$coefficients[2, 4] 39 | r2.orig <- modres$r.squared 40 | 41 | # Prepare and initialize variables for p-hacking 42 | ps <- list() 43 | r2s <- list() 44 | compscale <- df[, compv] 45 | changescale <- df[, compv] 46 | out <- NULL 47 | 48 | # Strategically delete items from the composite scale and re-calculate the p-value 49 | for(i in 1:ndelete){ 50 | 51 | pval <- rep(NA, 2) 52 | r2val <- rep(NA, 2) 53 | 54 | # Define new item to delete from the scale 55 | out[i] <- which(colnames(compscale) %in% colnames(changescale)[which.max(performance::item_reliability(changescale)[,2])]) 56 | 57 | # Compute p-value for the new composite score 58 | newscore <- rowMeans(compscale[, -out]) 59 | newmodres <- summary(lm(df[, dv] ~ newscore)) 60 | pval[1] <- newmodres$coefficients[2, 4] 61 | r2val[1] <- newmodres$r.squared 62 | 63 | # Compute p-value for the item deleted from the score 64 | itemscore <- compscale[, out[i]] 65 | newmodres2 <- summary(lm(df[, dv] ~ itemscore)) 66 | pval[2] <- newmodres2$coefficients[2, 4] 67 | r2val[2] <- newmodres2$r.squared 68 | 69 | # Compute p-value for a scale of all items deleted so far 70 | #nonscore <- rowMeans(cbind(compscale[, out])) 71 | #newmodres3 <- summary(lm(df[, dv] ~ nonscore)) 72 | #pval[3] <- newmodres3$coefficients[2, 4] 73 | #r2val[3] <- newmodres3$r.squared 74 | 75 | changescale <- compscale[, -out] 76 | ps[[i]] <- pval 77 | r2s[[i]] <- r2val 78 | } 79 | 80 | ps <- c(p.orig, unique(unlist(ps))) 81 | r2s <- c(r2.orig, unique(unlist(r2s))) 82 | 83 | # Select final p-hacked p-value based on strategy 84 | p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha) 85 | r2.final <- unique(r2s[ps == p.final]) 86 | 87 | return(list(p.final = p.final, 88 | ps = ps, 89 | r2.final = r2.final, 90 | r2s = r2s)) 91 | 92 | } 93 | 94 | #' Simulate p-hacking with composite scores 95 | #' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 96 | #' @param nobs Integer giving number of observations 97 | #' @param ncompv Integer giving number of variables to build the composite score 98 | #' @param rcomp Correlation between the composite score variables 99 | #' @param ndelete How many items should be deleted from the scale at maximum? 100 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 101 | #' @param alpha Significance level of the t-test (default: 0.05) 102 | #' @param iter Number of simulation iterations 103 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 104 | #' @importFrom pbapply pblapply 105 | #' @importFrom shiny withProgress incProgress 106 | #' @export 107 | 108 | sim.compscoreHack <- function(nobs, ncompv, rcomp, ndelete, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv=FALSE){ 109 | 110 | # Simulate as many datasets as desired iterations 111 | dat <- list() 112 | for(i in 1:iter){ 113 | dat[[i]] <- .sim.compscore(nobs = nobs, ncompv = ncompv, rcomp = rcomp) 114 | } 115 | 116 | # Apply p-hacking procedure to each dataset (with progress bar within or outside Shiny) 117 | if(!shinyEnv){ 118 | .compscoreHackList <- function(x){ 119 | .compscoreHack(df = x, dv = 1, compv = c(2:(ncompv+1)), ndelete = ndelete, 120 | strategy = strategy, alpha = alpha) 121 | } 122 | 123 | res <- pbapply::pblapply(dat, .compscoreHackList) 124 | } 125 | 126 | if(shinyEnv){ 127 | percentage <- 0 128 | withProgress(message = "Running simulation", value=0, { 129 | res=lapply(dat, function(x){ 130 | percentage <<- percentage + 1/length(dat)*100 131 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 132 | .compscoreHack(df = x, dv = 1, compv = c(2:(ncompv+1)), ndelete = ndelete, 133 | strategy = strategy, alpha = alpha) 134 | }) 135 | }) 136 | } 137 | 138 | ps.hack <- NULL 139 | ps.orig <- NULL 140 | r2s.orig <- NULL 141 | r2s.hack <- NULL 142 | 143 | for(i in 1:iter){ 144 | ps.hack[i] <- res[[i]][["p.final"]] 145 | ps.orig[i] <- res[[i]][["ps"]][1] 146 | r2s.hack[i] <- res[[i]][["r2.final"]] 147 | r2s.orig[i] <- res[[i]][["r2s"]][1] 148 | } 149 | 150 | res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig) 151 | 152 | return(res) 153 | 154 | } 155 | -------------------------------------------------------------------------------- /phackR/R/exploitCovariates.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Exploiting Covariates 3 | # ============================================================================== 4 | 5 | #' Simulate data with (correlated) covariates 6 | #' @description Simulates a dependent variable that correlates with multiple (correlated) covariates as well as an independent IV 7 | #' @param nobs.group Vector with number of observations per group 8 | #' @param ncov Number of continuous covariates in the simulated data frame 9 | #' @param rcov Correlation between the covariates 10 | #' @param rcovdv Correlation between covariates and dependent variable 11 | #' @param mu Mean of the random data 12 | #' @param sd Standard deviation of the random data 13 | #' @param missing Proportion of missing values per variable (e.g., 0.2 = 20 percent) 14 | #' @importFrom stats rnorm 15 | 16 | .sim.covariates <- function(nobs.group, ncov, rcov, rcovdv, mu = 0, sd = 1, missing = 0){ 17 | 18 | # Observations per group and total observations 19 | if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2) 20 | nobs <- sum(nobs.group) 21 | 22 | # Generate group vector 23 | group <- rep(1:length(nobs.group), nobs.group) 24 | 25 | # Set up correlation matrix 26 | nvar <- ncov + 1 27 | R <- matrix(rep(rcov, nvar**2), nrow = nvar) 28 | R[,1] <- rep(rcovdv, nvar) 29 | R[1,] <- R[,1] 30 | diag(R) <- rep(1, nvar) 31 | 32 | # transposed Cholesky decomposition of correlation matrix 33 | U <- t(chol(R)) 34 | 35 | # create random noise matrix 36 | random.normal <- matrix(stats::rnorm(nvar*nobs, mu, sd), nrow=nvar, ncol=nobs) 37 | 38 | # create raw data from matrix multiplication of U and random noise 39 | X <- as.data.frame(t(U %*% random.normal)) 40 | 41 | # create final simulated data matrix 42 | Xfull <- cbind(group, X) 43 | 44 | # add missing values 45 | if(missing > 0){ 46 | navalues <- as.data.frame(replicate(nvar+1, sample(1:nobs, missing*nobs))) 47 | for(i in 1:nvar){ 48 | X[unlist(navalues[,i]),i] <- NA 49 | } 50 | } 51 | 52 | return(Xfull) 53 | 54 | } 55 | 56 | #' P-Hacking function for multiple covariates 57 | #' @description Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 58 | #' @param df Data frame with one group variable, one dependent variable, and one or more covariates 59 | #' @param dv Integer defining the location of the dependent variable column 60 | #' @param group Integer defining the location of the group variable column 61 | #' @param covs Numeric vector defining the location of the covariate(s). 62 | #' @param interactions Should interaction terms be added to the ANCOVA models? TRUE/FALSE 63 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 64 | #' @param alpha Significance level of the t-test 65 | #' @importFrom car Anova 66 | #' @importFrom stats cor aov as.formula 67 | 68 | .covhack <- function(df, dv, group, covs, interactions = FALSE, strategy = "firstsig", alpha = 0.05){ 69 | 70 | # Prepare data frame 71 | colnames(df)[group] <- "group" 72 | colnames(df)[dv] <- "dv" 73 | colnames(df)[covs] <- paste0("CV", 1:length(covs)) 74 | df <- df[, c(dv, group, covs)] 75 | 76 | ps <- NULL 77 | eta2s <- NULL # partial eta^2 78 | 79 | # Compute correlations between covariates and dependent variable and order covariates accordingly 80 | dvcors <- apply(X = df[,-group], MARGIN = 2, FUN = function(x) stats::cor(x, df$dv))[-1] 81 | covorder <- order(dvcors, decreasing = TRUE) 82 | 83 | # Define ANCOVA models (add covariates in decreasing correlation with dependent variable) 84 | 85 | interactions <- ifelse(interactions, " * ", " + ") 86 | addmodels <- c("dv ~ group", rep(NA, length(covs))) 87 | singmodels <- c("dv ~ group", rep(NA, length(covs))) 88 | 89 | for(i in 1:length(covs)){ 90 | mdl <- paste("dv ~ group", paste0("CV", covorder[i]), sep = interactions) 91 | singmodels[i + 1] <- mdl 92 | } 93 | 94 | for(i in 1:length(covs)){ 95 | mdl <- paste(paste0("CV", covorder[1:i]), collapse = interactions) 96 | mdl <- paste("dv ~ group", mdl, sep = interactions) 97 | addmodels[i+1] <- mdl 98 | } 99 | 100 | models <- unique(c(singmodels, addmodels)) 101 | 102 | # Compute ANCOVAs 103 | 104 | for(i in 1:length(models)){ 105 | 106 | res <- stats::aov(stats::as.formula(models[i]), data = df) 107 | resanc <- car::Anova(res, type = 2) 108 | ps[i] <- resanc["group", "Pr(>F)"] 109 | eta2s[i] <- resanc["group", "Sum Sq"]/(resanc["group", "Sum Sq"] + resanc["Residuals", "Sum Sq"]) 110 | 111 | } 112 | 113 | # Select final p-hacked p-value based on strategy 114 | p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha) 115 | eta2.final <- unique(eta2s[ps == p.final]) 116 | 117 | return(list(p.final = p.final, 118 | ps = ps, 119 | eta2.final = eta2.final, 120 | eta2s = eta2s)) 121 | 122 | 123 | } 124 | 125 | #' Simulate p-Hacking with multiple covariates 126 | #' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 127 | #' @param nobs.group Vector with number of observations per group 128 | #' @param ncov Number of continuous covariates in the simulated data frame 129 | #' @param rcov Correlation between the covariates 130 | #' @param rcovdv Correlation between covariates and dependent variable 131 | #' @param interactions Should interaction terms be added to the ANCOVA models? TRUE/FALSE 132 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 133 | #' @param alpha Significance level of the t-test 134 | #' @param iter Number of simulation iterations 135 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 136 | #' @export 137 | 138 | 139 | sim.covhack <- function(nobs.group, ncov, rcov, rcovdv, interactions = FALSE, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){ 140 | 141 | # Simulate as many datasets as desired iterations 142 | dat <- list() 143 | for(i in 1:iter){ 144 | dat[[i]] <- .sim.covariates(nobs.group = nobs.group, ncov = ncov, rcov = rcov, rcovdv = rcovdv) 145 | } 146 | 147 | # Apply p-hacking procedure to each dataset 148 | if(!shinyEnv){ 149 | .covhacklist <- function(x){ 150 | .covhack(df = x, dv = 2, group = 1, covs = c(3:(2+ncov)), interactions = interactions, strategy = strategy, alpha = alpha) 151 | } 152 | 153 | res <- pbapply::pblapply(dat, .covhacklist) 154 | } 155 | 156 | if(shinyEnv){ 157 | percentage <- 0 158 | withProgress(message = "Running simulation", value = 0, { 159 | res = lapply(dat, function(x){ 160 | percentage <<- percentage + 1/length(dat)*100 161 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 162 | .covhack(df = x, dv = 2, group = 1, covs = c(3:(2+ncov)), interactions = interactions, strategy = strategy, alpha = alpha) 163 | }) 164 | }) 165 | } 166 | 167 | ps.hack <- NULL 168 | ps.orig <- NULL 169 | eta2s.hack <- NULL 170 | eta2s.orig <- NULL 171 | 172 | for(i in 1:iter){ 173 | ps.hack[i] <- res[[i]][["p.final"]] 174 | ps.orig[i] <- res[[i]][["ps"]][1] 175 | eta2s.hack[i] <- res[[i]][["eta2.final"]] 176 | eta2s.orig[i] <- res[[i]][["eta2s"]][1] 177 | } 178 | 179 | res <- cbind(ps.hack, ps.orig, eta2s.hack, eta2s.orig) 180 | 181 | return(res) 182 | 183 | 184 | } 185 | 186 | 187 | -------------------------------------------------------------------------------- /phackR/R/exploitCutoffs.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Exploiting arbitrary cutoff values 3 | # ============================================================================== 4 | 5 | # Simulation can be done with .sim.multcor where the correlation is zero 6 | 7 | #' P-Hacking function for exploiting cutoff values 8 | #' @param df Data frame with one continuous independent variable and one continuous dependent variable 9 | #' @param iv Location of the independent variable in the data frame 10 | #' @param dv Location of the dependent variable in the data frame 11 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 12 | #' @param alpha Significance level of the t-test 13 | #' @importFrom stats t.test aov median quantile 14 | 15 | .cutoffHack <- function(df, iv, dv, strategy = "firstsig", alpha = 0.05){ 16 | 17 | iv <- df[, iv] 18 | dv <- df[, dv] 19 | 20 | mod.orig <- summary(stats::lm(dv ~ iv)) 21 | p.orig <- mod.orig$coefficients[2, 4] 22 | r2.orig <- mod.orig$r.squared 23 | 24 | # Do the mediansplit 25 | mediansplitvar <- as.numeric(iv > stats::median(iv)) + 1 26 | p.mediansplit <- stats::t.test(dv[mediansplitvar == 1], dv[mediansplitvar == 2], 27 | var.equal = TRUE, alternative = "two.sided")$p.value 28 | r2.mediansplit <- .compR2t(dv[mediansplitvar == 1], dv[mediansplitvar == 2]) 29 | 30 | # Cut the middle 31 | tertiles <- as.numeric(stats::quantile(iv, probs = c(1/3, 2/3))) 32 | threecut <- cut(iv, breaks = c(-Inf, tertiles, Inf), labels = c(1,0,2)) 33 | dv2 <- dv[threecut %in% c(1,2)] 34 | threecut2 <- threecut[threecut %in% c(1, 2)] 35 | p.cutmiddle <- stats::t.test(dv2[threecut2 == 2], dv2[threecut2 == 1], 36 | var.equal = TRUE, alternative = "two.sided")$p.value 37 | r2.cutmiddle <- .compR2t(dv2[threecut2 == 2], dv2[threecut2 == 1]) 38 | 39 | # 3 Categories: Omnibus test 40 | mod.threecat <- summary(stats::aov(dv ~ threecut)) 41 | p.threecat <- mod.threecat[[1]][[5]][1] 42 | r2.threecat <- mod.threecat[[1]][1,2]/sum(mod.threecat[[1]][,2]) 43 | 44 | ps <- c(p.orig, p.mediansplit, p.cutmiddle, p.threecat) 45 | r2s <- c(r2.orig, r2.mediansplit, r2.cutmiddle, r2.threecat) 46 | 47 | # Select final p-hacked p-value based on strategy 48 | p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha) 49 | r2.final <- unique(r2s[ps == p.final]) 50 | 51 | return(list(p.final = p.final, 52 | ps = ps, 53 | r2.final = r2.final, 54 | r2s = r2s)) 55 | 56 | } 57 | 58 | #' Simulate p-Hacking for exploiting cutoff values 59 | #' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 60 | #' @param nobs Number of observations 61 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 62 | #' @param alpha Significance level of the t-test 63 | #' @param iter Number of simulation iterations 64 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 65 | #' @export 66 | 67 | sim.cutoffHack <- function(nobs, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){ 68 | 69 | dat <- list() 70 | for(i in 1:iter){ 71 | dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0) 72 | } 73 | 74 | # Apply p-hacking procedure to each dataset 75 | 76 | if(!shinyEnv){ 77 | .cutoffHackList <- function(x){ 78 | .cutoffHack(df = x, iv = 1, dv = 2, strategy = strategy, alpha = alpha) 79 | } 80 | 81 | res <- pbapply::pblapply(dat, .cutoffHackList) 82 | } 83 | 84 | if(shinyEnv){ 85 | percentage <- 0 86 | withProgress(message = "Running simulation", value = 0, { 87 | res = lapply(dat, function(x){ 88 | percentage <<- percentage + 1/length(dat)*100 89 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 90 | .cutoffHack(df = x, iv = 1, dv = 2, strategy = strategy, alpha = alpha) 91 | }) 92 | }) 93 | } 94 | 95 | ps.hack <- NULL 96 | ps.orig <- NULL 97 | r2s.hack <- NULL 98 | r2s.orig <- NULL 99 | 100 | for(i in 1:iter){ 101 | ps.hack[i] <- res[[i]][["p.final"]] 102 | ps.orig[i] <- res[[i]][["ps"]][1] 103 | r2s.hack[i] <- res[[i]][["r2.final"]] 104 | r2s.orig[i] <- res[[i]][["r2s"]][1] 105 | } 106 | 107 | res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig) 108 | 109 | return(res) 110 | 111 | 112 | 113 | } 114 | -------------------------------------------------------------------------------- /phackR/R/favorableImputation.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # p-Hacking through Favorable Imputation 3 | # ============================================================================== 4 | 5 | # ------------------------------------------------------------------------------ 6 | # Some local helper functions 7 | # ------------------------------------------------------------------------------ 8 | 9 | #' Single value imputation function 10 | #' Imputes NA values in a single variable using the function specified in fun 11 | #' @param x The target variable of the imputation 12 | #' @param fun The function used to replace missing values that takes x as an argument (e.g., mean) 13 | #' @param ... Additional arguments to fun 14 | 15 | .easyimpute <- function(x, fun, ...){ 16 | x[is.na(x)] <- fun(x, ...) 17 | return(x) 18 | } 19 | 20 | #' Estimate mode of continuous variables 21 | #' Estimates mode of continuous variables using the density() function 22 | #' @param x The target variable for which the mode should be searched 23 | #' @importFrom stats density 24 | 25 | .estimate_mode <- function(x) { 26 | d <- stats::density(x, na.rm = T) 27 | d$x[which.max(d$y)] 28 | } 29 | 30 | # ------------------------------------------------------------------------------ 31 | # P-Hacking functions 32 | # ------------------------------------------------------------------------------ 33 | 34 | # Simulation function: Data can be simulated using .sim.multcor with r = 0. 35 | # Proportion of missing values can be controlled through the argument "missing" 36 | 37 | #' P-Hacking function favorable imputation in univariate linear regression 38 | #' @description Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 39 | #' @param df Data frame containing x and y variables as columns 40 | #' @param x Location of x variable (predictor) in the data frame 41 | #' @param y Location of y variable (criterion) in the data frame 42 | #' @param which Which missing value handling method? 1: delete missing, 2: mean imputation, 3: median imputation, 4: mode imputation, 5: predictive mean matching, 6: weighted predictive mean matching, 7: sample from observed values, 8: Bayesian linear regression, 9: linear regression ignoring model error, 10: linear regression predicted values 43 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 44 | #' @param alpha Significance level of the t-test (default: 0.05) 45 | #' @importFrom stats median lm 46 | #' @importFrom mice complete 47 | 48 | 49 | .impHack <- function(df, x, y, which = c(1:10), strategy = "firstsig", alpha = 0.05){ 50 | 51 | x <- df[,x] 52 | y <- df[,y] 53 | 54 | # Stop if imputation methods are not defined 55 | stopifnot(any(c(1:10) %in% which)) 56 | 57 | # Initialize result vector 58 | ps <- rep(NA, 10) 59 | r2s <- rep(NA, 10) 60 | 61 | # p-value when missing values are deleted 62 | if(1 %in% which){ 63 | mod1 <- summary(stats::lm(y ~ x, na.action = "na.omit")) 64 | ps[1] <- mod1$coefficients[2, 4] 65 | r2s[1] <- mod1$r.squared 66 | } 67 | 68 | # Mean imputation 69 | if(2 %in% which){ 70 | newx <- .easyimpute(x, mean, na.rm = T) 71 | newy <- .easyimpute(y, mean, na.rm = T) 72 | mod2 <- summary(stats::lm(newy ~ newx)) 73 | ps[2] <- mod2$coefficients[2, 4] 74 | r2s[2] <- mod2$r.squared 75 | } 76 | 77 | # Median imputation 78 | if(3 %in% which){ 79 | newx <- .easyimpute(x, mean, na.rm = T) 80 | newy <- .easyimpute(y, mean, na.rm = T) 81 | mod3 <- summary(stats::lm(newy ~ newx)) 82 | ps[3] <- mod3$coefficients[2, 4] 83 | r2s[3] <- mod3$r.squared 84 | } 85 | 86 | # Mode imputation 87 | if(4 %in% which){ 88 | newx <- .easyimpute(x, .estimate_mode) 89 | newy <- .easyimpute(y, .estimate_mode) 90 | mod4 <- summary(stats::lm(newy ~ newx)) 91 | ps[4] <- mod4$coefficients[2, 4] 92 | r2s[4] <- mod4$r.squared 93 | } 94 | 95 | # Multivariate imputations by chained equations ("mice" package): predictive mean matchihng 96 | dfnew <- as.data.frame(cbind(x, y)) 97 | if(5 %in% which){ 98 | imp <- mice::mice(dfnew, m = 1, method = "pmm", silent = TRUE, print = FALSE) 99 | mod5 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1))) 100 | ps[5] <- mod5$coefficients[2, 4] 101 | r2s[5] <- mod5$r.squared 102 | } 103 | 104 | # Multivariate imputations by chained equations ("mice" package): Weighted predictive mean matching 105 | if(6 %in% which){ 106 | imp <- mice::mice(dfnew, m = 1, method = "midastouch", silent = TRUE, print = FALSE) 107 | mod6 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1))) 108 | ps[6] <- mod6$coefficients[2, 4] 109 | r2s[6] <- mod6$r.squared 110 | } 111 | 112 | # Multivariate imputations by chained equations ("mice" package): Sample from observed values 113 | if(7 %in% which){ 114 | imp <- mice::mice(dfnew, m = 1, method = "sample", silent = TRUE, print = FALSE) 115 | mod7 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1))) 116 | ps[7] <- mod7$coefficients[2, 4] 117 | r2s[7] <- mod7$r.squared 118 | } 119 | 120 | # Multivariate imputations by chained equations ("mice" package): Bayesian linear regression 121 | if(8 %in% which){ 122 | imp <- mice::mice(dfnew, m = 1, method = "norm", silent = TRUE, print = FALSE) 123 | mod8 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1))) 124 | ps[8] <- mod8$coefficients[2, 4] 125 | r2s[8] <- mod8$r.squared 126 | } 127 | 128 | # Multivariate imputations by chained equations ("mice" package): Linear regression ignoring model error 129 | if(9 %in% which){ 130 | imp <- mice::mice(dfnew, m = 1, method = "norm.nob", silent = TRUE, print = FALSE) 131 | mod9 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1))) 132 | ps[9] <- mod9$coefficients[2, 4] 133 | r2s[9] <- mod9$r.squared 134 | } 135 | 136 | # Multivariate imputations by chained equations ("mice" package): Linear regression predicted values 137 | if(10 %in% which){ 138 | imp <- mice::mice(dfnew, m = 1, method = "norm.predict", silent = TRUE, print = FALSE) 139 | mod10 <- summary(stats::lm(y ~ x, data = mice::complete(imp, 1))) 140 | ps[10] <- mod10$coefficients[2, 4] 141 | r2s[10] <- mod10$r.squared 142 | } 143 | 144 | ps <- ps[!is.na(ps)] 145 | r2s <- r2s[!is.na(r2s)] 146 | 147 | # Select final p-hacked p-value based on strategy 148 | p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha) 149 | r2.final <- unique(r2s[ps == p.final]) 150 | 151 | return(list(p.final = p.final, 152 | ps = ps, 153 | r2.final = r2.final, 154 | r2s = r2s)) 155 | 156 | } 157 | 158 | #' Simulate p-Hacking with different sorts of outlier definition missing value imputation 159 | #' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 160 | #' @param nobs Integer giving number of observations 161 | #' @param missing Percentage of missing values (e.g., 0.1 for 10 percent) 162 | #' @param which Which imputation methods? Either 5 random methods are chosen ("random") or a numeric vector containing the chosen methods (1: delete missing, 2: mean imputation, 3: median imputation, 4: mode imputation, 5: predictive mean matching, 6: weighted predictive mean matching, 7: sample from observed values, 8: Bayesian linear regression, 9: linear regression ignoring model error, 10: linear regression predicted values) 163 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 164 | #' @param alpha Significance level of the t-test (default: 0.05) 165 | #' @param iter Number of simulation iterations 166 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 167 | #' @export 168 | 169 | sim.impHack <- function(nobs, missing, which = c(1:10), strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){ 170 | 171 | # Simulate as many datasets as desired iterations 172 | dat <- list() 173 | for(i in 1:iter){ 174 | dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0, missing = missing) 175 | } 176 | 177 | if(any(which == "random")) which <- sample(c(1:10), 5) 178 | 179 | # Apply p-hacking procedure to each dataset 180 | 181 | if(!shinyEnv){ 182 | .impHackList <- function(x){ 183 | .impHack(df = x, x = 1, y = 2, 184 | which = which, strategy = strategy, alpha = alpha) 185 | } 186 | 187 | res <- pbapply::pblapply(dat, .impHackList) 188 | } 189 | 190 | if(shinyEnv){ 191 | percentage <- 0 192 | withProgress(message = "Running simulation", value = 0, { 193 | res = lapply(dat, function(x){ 194 | percentage <<- percentage + 1/length(dat)*100 195 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 196 | .impHack(df = x, x = 1, y = 2, 197 | which = which, strategy = strategy, alpha = alpha) 198 | }) 199 | }) 200 | } 201 | 202 | ps.hack <- NULL 203 | ps.orig <- NULL 204 | r2s.hack <- NULL 205 | r2s.orig <- NULL 206 | ps.all <- list() 207 | for(i in 1:iter){ 208 | ps.hack[i] <- res[[i]][["p.final"]] 209 | ps.orig[i] <- res[[i]][["ps"]][1] 210 | r2s.hack[i] <- res[[i]][["r2.final"]] 211 | r2s.orig[i] <- res[[i]][["r2s"]][1] 212 | } 213 | 214 | res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig) 215 | 216 | return(res) 217 | 218 | } 219 | 220 | -------------------------------------------------------------------------------- /phackR/R/helpers.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Helpers 3 | # ============================================================================== 4 | 5 | #' Simulate multivariate correlated data for continuous variables 6 | #' @description Outputs a data frame with correlated variables of defined length 7 | #' @param nobs Number of observations (rows) in the simulated data frame 8 | #' @param nvar Number of variables (columns) in the data frame 9 | #' @param r Desired correlation between the variables (integer) 10 | #' @param mu Mean of the random data 11 | #' @param sd Standard deviation of the random data 12 | #' @param missing Proportion of missing values per variable (e.g., 0.2 = 20 percent) 13 | #' @importFrom stats rnorm 14 | 15 | .sim.multcor <- function(nobs, nvar, r, mu = 0, sd = 1, missing = 0){ 16 | 17 | # set up correlation matrix 18 | R <- matrix(rep(r, nvar**2), nrow = nvar) 19 | diag(R) <- rep(1, nvar) 20 | 21 | # transposed Cholesky decomposition of correlation matrix 22 | U <- t(chol(R)) 23 | 24 | # create random noise matrix 25 | random.normal <- matrix(stats::rnorm(nvar*nobs, mu, sd), nrow=nvar, ncol=nobs) 26 | 27 | # create raw data from matrix multiplication of U and random noise 28 | X <- as.data.frame(t(U %*% random.normal)) 29 | 30 | # add missing values 31 | if(missing > 0){ 32 | if(missing * nobs < 2){ 33 | navalues <- as.data.frame(t(replicate(nvar, sample(1:nobs, missing*nobs)))) 34 | } else { 35 | navalues <- as.data.frame(replicate(nvar, sample(1:nobs, missing*nobs))) 36 | } 37 | for(i in 1:nvar){ 38 | X[unlist(navalues[,i]),i] <- NA 39 | } 40 | } 41 | 42 | return(X) 43 | 44 | } 45 | 46 | #' Generic sampling function 47 | #' @description Outputs a data frame with two columns 48 | #' @param nobs.group Number of observations per group. Either a scalar or a vector with two elements. 49 | #' @importFrom stats rnorm 50 | 51 | .sim.data <- function(nobs.group){ 52 | 53 | if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2) 54 | V1 <- stats::rnorm(nobs.group[1], 0, 1) 55 | V2 <- stats::rnorm(nobs.group[2], 0, 1) 56 | group <- c(rep(1, nobs.group[1]), rep(2, nobs.group[2])) 57 | 58 | res <- cbind(group, c(V1, V2)) 59 | return(res) 60 | 61 | } 62 | 63 | #' Create data frames without outliers 64 | #' @description Inputs data frame and two sets of outlier values, outputs list with three data frames 65 | #' @param x Original vector of x values 66 | #' @param y Original vector of y values 67 | #' @param outsx Outlier values to be removed from x 68 | #' @param outsy Outlier values to be removed from y 69 | 70 | 71 | .extractoutlier <- function(x, y, outsx, outsy){ 72 | 73 | # Remove x outliers from x and y 74 | if(length(outsx) > 0){ 75 | x1 <- x[!x %in% outsx] 76 | y1 <- y[!x %in% outsx] 77 | } else { 78 | x1 <- x 79 | y1 <- y 80 | } 81 | xy1 <- unname(cbind(x1, y1)) 82 | 83 | # Remove y outliers from x and y 84 | if(length(outsy) > 0){ 85 | x2 <- x[!y %in% outsy] 86 | y2 <- y[!y %in% outsy] 87 | } else { 88 | x2 <- x 89 | y2 <- y 90 | } 91 | xy2 <- unname(cbind(x2, y2)) 92 | 93 | # Remove x and y outliers from x and y 94 | if(length(outsx) > 0 && length(outsy) > 0){ 95 | x3 <- x[!x %in% outsx & !y %in% outsy] 96 | y3 <- y[!x %in% outsx & !y %in% outsy] 97 | } else { 98 | x3 <- x 99 | y3 <- y 100 | } 101 | xy3 <- unname(cbind(x3, y3)) 102 | 103 | # Combine results 104 | res <- unname(list(xy1, xy2, xy3)) 105 | res <- unique(res) 106 | 107 | return(res) 108 | 109 | } 110 | 111 | #' Select a p-value from a vector of p-hacked p-values 112 | #' @description Takes a vector of p-values and selects the smallest, first significant, or smallest significant p-value. 113 | #' @param ps Vector of p values 114 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 115 | #' @param alpha Significance level (default: 0.05) 116 | 117 | .selectpvalue <- function(ps, strategy, alpha){ 118 | 119 | p.final <- NA 120 | p.orig <- ps[1] 121 | 122 | # Select smallest significant p-value 123 | if(strategy == "smallest.sig"){ 124 | 125 | if(min(ps) < alpha){ 126 | p.final <- min(ps) 127 | } else { 128 | p.final <- p.orig 129 | } 130 | 131 | # Select first significant p-value 132 | } else if (strategy == "firstsig") { 133 | 134 | if(min(ps) < alpha){ 135 | p.final <- ps[which(ps < alpha)[1]] 136 | } else { 137 | p.final <- p.orig 138 | } 139 | 140 | # Select smallest p-value 141 | } else if (strategy == "smallest") { 142 | p.final <- min(ps) 143 | } 144 | 145 | return(p.final) 146 | 147 | } 148 | 149 | #' Compute R squared for the t-test 150 | #' @param x values of group 1 151 | #' @param y values of group 2 152 | 153 | .compR2t <- function(x, y){ 154 | grandmean <- mean(c(x, y)) 155 | sst <- sum((c(x,y)-grandmean)^2) 156 | sse <- sum((x-mean(x))^2)+sum((y-mean(y))^2) 157 | return(1-(sse/sst)) 158 | } 159 | 160 | #' Compute Cohen's d 161 | #' @description Compute Cohen's d from t-value with equal sized groups of size n 162 | #' @param t t-value 163 | #' @param n sample size per group 164 | 165 | .compCohensD <- function(t, n){ 166 | t*sqrt(2/n) 167 | } 168 | -------------------------------------------------------------------------------- /phackR/R/incorrectRounding.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Incorrect Rounding 3 | # ============================================================================== 4 | 5 | # Generic sampling function .sim.data() can be used 6 | 7 | #' P-Hacking function for incorrect rounding 8 | #' @description Outputs a p-hacked p-value and the non-p-hacked-p-value 9 | #' @param df Data frame 10 | #' @param group Scalar defining location of the group vector in the data frame 11 | #' @param dv Scalar defining location of dependent variable in the data frame 12 | #' @param roundinglevel Highest p-value that is rounded down to 0.05 13 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 14 | #' @param alpha Significance level of the t-test (default: 0.05) 15 | #' @importFrom stats t.test 16 | 17 | .roundhack <- function(df, group, dv, roundinglevel, alternative = "two.sided", alpha = 0.05){ 18 | 19 | # Compute t-test 20 | pval <- stats::t.test(df[,dv] ~ df[,group], 21 | var.equal = TRUE, alternative = alternative)$p.value 22 | r2val <- .compR2t(df[,dv][(df[,group] == unique(df[,group])[1])], 23 | df[,dv][(df[,group] == unique(df[,group])[2])]) 24 | 25 | # P-hack p-value 26 | if(pval > alpha && pval < roundinglevel){ 27 | p.final <- alpha 28 | } else { 29 | p.final <- pval 30 | } 31 | 32 | ps <- c(pval, p.final) 33 | 34 | return(list(p.final = p.final, 35 | ps = ps, 36 | r2.final = r2val, 37 | r2s = rep(r2val, 2))) 38 | 39 | } 40 | 41 | #' Simulate p-hacking with incorrect rounding 42 | #' @param roundinglevel Highest p-value that is rounded down to alpha 43 | #' @param iter Number of iterations 44 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 45 | #' @param alpha Significance level of the t-test (default: 0.05) 46 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 47 | #' @export 48 | 49 | sim.roundhack <- function(roundinglevel, iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){ 50 | 51 | # Simulate as many datasets as desired iterations 52 | dat <- list() 53 | for(i in 1:iter){ 54 | dat[[i]] <- .sim.data(nobs.group = 30) 55 | } 56 | 57 | # Apply p-hacking procedure to each dataset 58 | if(!shinyEnv){ 59 | res <- pbapply::pblapply(dat, .roundhack, group = 1, dv = 2, 60 | roundinglevel = roundinglevel, alternative = alternative, alpha = alpha) 61 | } 62 | if(shinyEnv){ 63 | percentage <- 0 64 | withProgress(message = "Running simulation", value = 0, { 65 | res = lapply(dat, function(x){ 66 | percentage <<- percentage + 1/length(dat)*100 67 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 68 | .roundhack(df=x, group = 1, dv = 2, roundinglevel = roundinglevel, 69 | alternative = alternative, alpha = alpha) 70 | }) 71 | }) 72 | } 73 | 74 | ps.hack <- NULL 75 | ps.orig <- NULL 76 | r2s.hack <- NULL 77 | r2s.orig <- NULL 78 | 79 | for(i in 1:iter){ 80 | ps.hack[i] <- res[[i]][["p.final"]] 81 | ps.orig[i] <- res[[i]][["ps"]][1] 82 | r2s.hack[i] <- res[[i]][["r2.final"]] 83 | r2s.orig[i] <- res[[i]][["r2s"]][1] 84 | } 85 | 86 | res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig) 87 | 88 | return(res) 89 | 90 | } 91 | -------------------------------------------------------------------------------- /phackR/R/optionalStopping.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Optional Stopping Based on Significance 3 | # ============================================================================== 4 | 5 | # Generic sampling function .sim.data() can be used 6 | 7 | #' Optional Stopping based on existing dataset 8 | #' @description Returns a p-hacked p-value and a non-p-hacked p-value based on the maximum sample size 9 | #' @param df Data frame 10 | #' @param group group Scalar defining grouping column 11 | #' @param dv Scalar defining location of dependent variable in the data frame 12 | #' @param n.min Minimum sample size 13 | #' @param n.max Maximum sample size 14 | #' @param step Step size of the optional stopping (default is 1) 15 | #' @param peek Determines how often one peeks at the data. Overrides step argument if not NULL. 16 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 17 | #' @param alpha Significance level of the t-test (default: 0.05) 18 | #' @importFrom stats t.test 19 | #' @importFrom utils tail 20 | 21 | .optstop <- function(df, group, dv, n.min, n.max, step = 1, peek = NULL, alternative = "two.sided", alpha = 0.05){ 22 | 23 | # Extract group variables 24 | g1 <- df[df[,group] == unique(df[,group])[1], dv] 25 | g2 <- df[df[,group] == unique(df[,group])[2], dv] 26 | 27 | # Sanity check: Enough data? 28 | stopifnot(length(g1) >= n.max && length(g2) >= n.max) 29 | 30 | # Determine places of peeks 31 | if(is.null(peek)){ 32 | peeks <- seq(n.min, n.max, by=step) 33 | if(step > (n.max-n.min)) peeks <- c(n.min, n.max) 34 | } else { 35 | peeks <- round(seq(n.min, n.max, length.out = peek)) 36 | } 37 | 38 | # Compute t-tests 39 | mod <- sapply(peeks, FUN = function(x) {stats::t.test(g1[1:x], g2[1:x], var.equal = TRUE, alternative = alternative)}) 40 | ps <- simplify2array(mod["p.value",]) 41 | r2s <- sapply(peeks, FUN = function(x) {.compR2t(g1[1:x], g2[1:x])}) 42 | ds <- .compCohensD(simplify2array(mod["statistic",]), peeks) 43 | 44 | # Do the p-hacking 45 | if(any(ps < alpha) == FALSE){ 46 | p.final <- utils::tail(ps, 1) 47 | r2.final <- utils::tail(r2s, 1) 48 | d.final <- utils::tail(ds, 1) 49 | } else if (any(ps < alpha) == TRUE) { 50 | p.final <- ps[which(ps < alpha)][1] 51 | r2.final <- unique(r2s[ps == p.final]) 52 | d.final <- unique(ds[ps == p.final]) 53 | } 54 | 55 | return(list(p.final = p.final, 56 | ps = ps, 57 | r2.final = r2.final, 58 | r2s = r2s, 59 | d.final = d.final, 60 | ds = ds)) 61 | } 62 | 63 | #' Simulate p-hacking with incorrect rounding 64 | #' @param n.min Minimum sample size 65 | #' @param n.max Maximum sample size 66 | #' @param step Step size of the optional stopping (default is 1) 67 | #' @param peek Determines how often one peeks at the data. Overrides step argument if not NULL. 68 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 69 | #' @param iter Number of iterations 70 | #' @param alpha Significance level of the t-test (default: 0.05) 71 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 72 | #' @importFrom utils tail 73 | #' @export 74 | #' 75 | 76 | sim.optstop <- function(n.min, n.max, step = 1, peek = NULL, alternative = "two.sided", iter = 1000, alpha = 0.05, shinyEnv = FALSE){ 77 | 78 | # Simulate as many datasets as desired iterations 79 | dat <- list() 80 | for(i in 1:iter){ 81 | dat[[i]] <- .sim.data(nobs.group = n.max) 82 | } 83 | 84 | # Apply p-hacking procedure to each dataset 85 | if(!shinyEnv){ 86 | res <- pbapply::pblapply(dat, .optstop, group = 1, dv = 2, 87 | n.min = n.min, n.max = n.max, step = step, peek = peek, 88 | alternative = alternative, alpha = alpha) 89 | } 90 | 91 | if(shinyEnv){ 92 | percentage <- 0 93 | withProgress(message = "Running simulation", value = 0, { 94 | res = lapply(dat, function(x){ 95 | percentage <<- percentage + 1/length(dat)*100 96 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 97 | .optstop(df=x, group = 1, dv = 2, 98 | n.min = n.min, n.max = n.max, step = step, 99 | alternative = alternative, alpha = alpha) 100 | }) 101 | }) 102 | } 103 | 104 | ps.hack <- NULL 105 | ps.orig <- NULL 106 | r2s.hack <- NULL 107 | r2s.orig <- NULL 108 | ds.hack <- NULL 109 | ds.orig <- NULL 110 | 111 | for(i in 1:iter){ 112 | ps.hack[i] <- res[[i]][["p.final"]] 113 | ps.orig[i] <- utils::tail(res[[i]][["ps"]], 1) 114 | r2s.hack[i] <- res[[i]][["r2.final"]] 115 | r2s.orig[i] <- utils::tail(res[[i]][["r2s"]], 1) 116 | ds.hack[i] <- res[[i]][["d.final"]] 117 | ds.orig[i] <- utils::tail(res[[i]][["ds"]], 1) 118 | } 119 | 120 | res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, ds.hack, ds.orig) 121 | 122 | return(res) 123 | 124 | } 125 | -------------------------------------------------------------------------------- /phackR/R/plotsShiny.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Figures: p-value and effect size distributions 3 | # ============================================================================== 4 | 5 | #' Plot p-value distributions 6 | #' @param simdat Simulated data from one of the p-hacking simulation functions 7 | #' @param alpha Alpha level 8 | #' @importFrom ggplot2 ggplot geom_histogram aes theme_light xlab ggtitle theme element_text geom_vline scale_fill_manual layer_scales ylab geom_segment geom_col scale_x_continuous scale_y_continuous waiver 9 | #' @importFrom rlang .data 10 | #' @importFrom dplyr all_of mutate 11 | #' @importFrom magrittr "%$%" 12 | 13 | pplots <- function(simdat, alpha){ 14 | 15 | simdat <- as.data.frame(simdat) 16 | 17 | simdat <- as.data.frame(simdat) 18 | 19 | simdat_long <- tidyr::gather(simdat, "condition", "pval", all_of("ps.hack"):all_of("ps.orig")) 20 | 21 | bin <- condition <- Freq <- binInt <- pval <- plotVal <- NULL 22 | 23 | plotdata <- 24 | simdat_long %>% 25 | mutate(bin = cut(pval, seq(0, 1, by=0.025))) %$% 26 | table(bin, condition) %>% 27 | as.data.frame() %>% 28 | mutate(plotVal = ifelse(condition == "ps.orig", 29 | -1*Freq, 30 | Freq)) %>% 31 | mutate(binInt = as.integer(bin)) 32 | 33 | 34 | pcomp <- ggplot(plotdata, 35 | aes(x = binInt, 36 | y = plotVal, 37 | fill = condition)) + 38 | geom_segment(x = 0.5, xend = 40.5, y = nrow(simdat)/40, yend = nrow(simdat)/40, color = "#C27516") + 39 | geom_segment(x = 0.5, xend = 40.5, y = -nrow(simdat)/40, yend = -nrow(simdat)/40, color = "#024B7A") + 40 | geom_col() + 41 | scale_x_continuous(breaks = c(c(0, 10, 20, 30, 40)+0.5, alpha*40+0.5), 42 | labels = c("0", "0.25", "0.5", "0.75", "1", expression(alpha))) + 43 | scale_y_continuous(breaks = waiver(), 44 | labels = abs) + 45 | xlab("p-value") + 46 | ylab("count") + 47 | ggtitle("Distribution of p-values") + 48 | scale_fill_manual(values=c("#FFAE4A", "#43B7C2"), 49 | labels=c("p-hacked", "original")) + 50 | theme_light() + 51 | theme(axis.title = element_text(size=14), 52 | axis.text = element_text(size=12), 53 | plot.title = element_text(size=18)) + 54 | geom_vline(xintercept = alpha*40+0.5, linetype = "dashed") 55 | 56 | return(list(pcomp=pcomp)) 57 | } 58 | 59 | #' Plot effect size distributions 60 | #' @param simdat Simulated data from one of the p-hacking simulation functions 61 | #' @param EScolumn.hack Column number of hacked effect sizes 62 | #' @param EScolumn.orig Column number of original effect sizes 63 | #' @param titles Title of effect size plots 64 | #' @importFrom grid grobTree textGrob gpar 65 | #' @importFrom ggplot2 annotation_custom coord_cartesian 66 | 67 | esplots <- function(simdat, EScolumn.hack, EScolumn.orig, titles = c(expression("Distribution of p-hacked effect sizes R"^2), 68 | expression("Distribution of original effect sizes R"^2))){ 69 | 70 | simdat <- as.data.frame(simdat) 71 | es.hack <- colnames(simdat)[EScolumn.hack] 72 | es.orig <- colnames(simdat)[EScolumn.orig] 73 | 74 | meanES.hack <- grobTree(textGrob(paste0("Mean: ", round(mean(simdat[,es.hack]), 3)), x = 0.95, y=0.95, hjust=1, gp=gpar(fontsize=14))) 75 | meanES.orig <- grobTree(textGrob(paste0("Mean: ", round(mean(simdat[,es.orig]), 3)), x = 0.95, y=0.95, hjust=1, gp=gpar(fontsize=14))) 76 | 77 | eshack <- ggplot(simdat, aes(x=simdat[,es.hack])) + 78 | geom_histogram(fill="#FFAE4A", color="#C27516", bins=30, na.rm=FALSE) + 79 | theme_light() + 80 | xlab("Effect Size") + 81 | ggtitle(titles[1]) + 82 | theme(axis.title = element_text(size=14), 83 | axis.text = element_text(size=12), 84 | plot.title = element_text(size=18)) + 85 | annotation_custom(meanES.hack) 86 | 87 | esnohack <- ggplot(simdat, aes(x=simdat[, es.orig])) + 88 | geom_histogram(fill="#43B7C2", color="#024B7A", bins=30) + 89 | theme_light() + 90 | xlab("Effect Size") + 91 | ggtitle(titles[2]) + 92 | theme(axis.title = element_text(size=14), 93 | axis.text = element_text(size=12), 94 | plot.title = element_text(size=18)) + 95 | annotation_custom(meanES.orig) 96 | 97 | xlim <- range(c(layer_scales(eshack)$x$range$range, layer_scales(esnohack)$x$range$range)) 98 | 99 | eshack <- eshack + coord_cartesian(xlim = xlim) 100 | esnohack <- esnohack + coord_cartesian(xlim = xlim) 101 | 102 | return(list(eshack=eshack, 103 | esnohack=esnohack)) 104 | 105 | } 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /phackR/R/runShinyPHack.R: -------------------------------------------------------------------------------- 1 | #' Run Shiny app for p-hacking simulaton 2 | #' @export 3 | 4 | runShinyPHack <- function() { 5 | appDir <- system.file("shiny-phack", "ShinyPHack", package = "phackR") 6 | if (appDir == "") { 7 | stop("Could not find example directory. Try re-installing `phackR`.", call. = FALSE) 8 | } 9 | 10 | shiny::runApp(appDir, display.mode = "normal") 11 | } 12 | -------------------------------------------------------------------------------- /phackR/R/selectiveReportingDV.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Selective Reporting of the Dependent Variable 3 | # ============================================================================== 4 | 5 | #' Simulate dataset with multiple dependent variables 6 | #' @description Outputs data frame with a grouping variable and multiple correlated dependent variables 7 | #' @param nobs.group Vector giving number of observations per group 8 | #' @param nvar Number of dependent variables in the data frame 9 | #' @param r Desired correlation between the dependent variables (scalar) 10 | 11 | .sim.multDV <- function(nobs.group, nvar, r){ 12 | 13 | # Observations per group 14 | if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2) 15 | 16 | # Generate group vector 17 | group <- rep(1:length(nobs.group), nobs.group) 18 | 19 | # Generate dependent variables 20 | dvs <- .sim.multcor(nobs = sum(nobs.group), nvar = nvar, r = r) 21 | 22 | # Generate data frame 23 | res <- cbind(group, dvs) 24 | 25 | return(res) 26 | } 27 | 28 | #' P-Hacking function for multiple dependent variables 29 | #' @description Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 30 | #' @param df Data frame with one group variable and multiple dependent variables 31 | #' @param dvs Vector defining the DV columns (will be checked in given order) 32 | #' @param group Scalar defining grouping column 33 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 34 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 35 | #' @param alpha Significance level of the t-test 36 | #' @importFrom stats t.test 37 | 38 | .multDVhack <- function(df, dvs, group, strategy = "firstsig", alternative = "two.sided", alpha = 0.05){ 39 | 40 | # Prepare data frame 41 | dvs <- as.matrix(df[, dvs], ncol = length(dvs)) 42 | group <- df[, group] 43 | mod <- list() 44 | r2s <- NULL 45 | 46 | # Compute t-tests 47 | for(i in 1:ncol(dvs)){ 48 | 49 | mod[[i]] <- stats::t.test(dvs[, i] ~ group, 50 | var.equal = TRUE, alternative = alternative) 51 | r2s[i] <- .compR2t(dvs[group == unique(group)[1], i], 52 | dvs[group == unique(group)[2], i]) 53 | } 54 | 55 | ps <- unlist(simplify2array(mod)["p.value", ]) 56 | ds <- .compCohensD(unlist(simplify2array(mod)["statistic", ]), length(df[, group])/2) 57 | 58 | # Select final p-hacked p-value based on strategy 59 | p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha) 60 | r2.final <- unique(r2s[ps == p.final]) 61 | d.final <- unique(ds[ps == p.final]) 62 | 63 | return(list(p.final = p.final, 64 | ps = ps, 65 | r2.final = r2.final, 66 | r2s = r2s, 67 | d.final = d.final, 68 | ds = ds)) 69 | 70 | } 71 | 72 | #' Simulate p-Hacking with multiple dependent variables 73 | #' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 74 | #' @param nobs.group Vector giving number of observations per group 75 | #' @param nvar Number of dependent variables (columns) in the data frame 76 | #' @param r Desired correlation between the dependent variables (scalar) 77 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 78 | #' @param iter Number of simulation iterations 79 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 80 | #' @param alpha Significance level of the t-test (default: 0.05) 81 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 82 | #' @export 83 | 84 | sim.multDVhack <- function(nobs.group, nvar, r, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){ 85 | 86 | # Simulate as many datasets as desired iterations 87 | dat <- list() 88 | for(i in 1:iter){ 89 | dat[[i]] <- .sim.multDV(nobs.group = nobs.group, nvar = nvar, r = r) 90 | } 91 | 92 | # Apply p-hacking procedure to each dataset 93 | 94 | if(!shinyEnv){ 95 | res <- pbapply::pblapply(dat, .multDVhack, dvs = c(2:(nvar+1)), group = 1, 96 | strategy = strategy, alternative = alternative, alpha = alpha) 97 | } 98 | 99 | if(shinyEnv){ 100 | percentage <- 0 101 | withProgress(message = "Running simulation", value = 0, { 102 | res = lapply(dat, function(x){ 103 | percentage <<- percentage + 1/length(dat)*100 104 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 105 | .multDVhack(df=x, dvs = c(2:(nvar+1)), group = 1, 106 | strategy = strategy, alternative = alternative, alpha = alpha) 107 | }) 108 | }) 109 | } 110 | 111 | ps.hack <- NULL 112 | ps.orig <- NULL 113 | r2s.hack <- NULL 114 | r2s.orig <- NULL 115 | ds.hack <- NULL 116 | ds.orig <- NULL 117 | 118 | for(i in 1:iter){ 119 | ps.hack[i] <- res[[i]][["p.final"]] 120 | ps.orig[i] <- res[[i]][["ps"]][1] 121 | r2s.hack[i] <- res[[i]][["r2.final"]] 122 | r2s.orig[i] <- res[[i]][["r2s"]][1] 123 | ds.hack[i] <- res[[i]][["d.final"]] 124 | ds.orig[i] <- res[[i]][["ds"]][1] 125 | } 126 | 127 | res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, ds.hack, ds.orig) 128 | 129 | return(res) 130 | } 131 | 132 | -------------------------------------------------------------------------------- /phackR/R/selectiveReportingIV.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Selective Reporting of the Independent Variable 3 | # ============================================================================== 4 | 5 | #' Simulate dataset with multiple independent variables 6 | #' @description Outputs data frame with multiple independent variables 7 | #' @param nobs.group Scalar defining number of observations per group (or number of observations in predictors in regression) 8 | #' @param nvar Number of independent variables in the data frame 9 | #' @param r Desired correlation between the independent variables (scalar) 10 | #' @param regression Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE) 11 | 12 | .sim.multIV <- function(nobs.group, nvar, r, regression = FALSE){ 13 | 14 | # Observations per group 15 | if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2) 16 | 17 | # Simulate control group / criterion variable 18 | control <- rnorm(nobs.group[1]) 19 | if(regression) criterion <- control 20 | 21 | # Simulate multiple experimental groups / predictor variables 22 | ivs <- .sim.multcor(nobs = nobs.group[2], nvar = nvar, r = r) 23 | 24 | # Generate data frame 25 | res <- cbind(control, ivs) 26 | if(regression) colnames(res)[1] <- "criterion" 27 | 28 | return(res) 29 | 30 | } 31 | 32 | #' P-Hacking function for multiple independent variables in a t-test 33 | #' @description Returns a p-hacked p-value and a vector of all p-values that were computed in the process 34 | #' @param df Data frame (wide format) containing a control group variable and multiple treatment group variables 35 | #' @param ivs Location of the independent variables (treatment groups) in the (wide) data frame 36 | #' @param control Location of the control group in the (wide) data frame 37 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 38 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 39 | #' @param alpha Significance level of the t-test (default: 0.05) 40 | #' @importFrom stats t.test 41 | 42 | .multIVhack_ttest <- function(df, ivs, control, strategy = "firstsig", alternative = "two.sided", alpha = 0.05){ 43 | 44 | treatm <- df[, ivs] 45 | control <- df[, control] 46 | 47 | # Prepare dataset 48 | mod <- list() 49 | r2s <- rep(NA, length(ivs)) 50 | 51 | # Compute t-tests 52 | for(i in 1:length(ivs)){ 53 | mod[[i]] <- stats::t.test(control, treatm[,i], var.equal = TRUE, alternative = alternative) 54 | r2s[i] <- .compR2t(control, treatm[,i]) 55 | } 56 | 57 | ps <- unlist(simplify2array(mod)["p.value", ]) 58 | ds <- .compCohensD(unlist(simplify2array(mod)["statistic", ]), length(control)) 59 | 60 | # Select final p-hacked p-value based on strategy 61 | p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha) 62 | r2.final <- r2s[ps == p.final] 63 | d.final <- ds[ps == p.final] 64 | 65 | return(list(p.final = p.final, 66 | ps = ps, 67 | r2.final = r2.final, 68 | r2s = r2s, 69 | d.final = d.final, 70 | ds = ds)) 71 | 72 | } 73 | 74 | #' P-Hacking function for multiple predictors in a regression 75 | #' @description Returns a p-hacked p-value and a vector of all p-values that were computed in the process 76 | #' @param df Data frame containing a criterion variable and multiple predictor variables 77 | #' @param ivs Location of the independent variables (predictors) in the data frame 78 | #' @param control Location of the criterion in the data frame 79 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 80 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 81 | #' @param alpha Significance level of the t-test (default: 0.05) 82 | #' @importFrom stats t.test 83 | 84 | .multIVhack_reg <- function(df, ivs, control, strategy = "firstsig", alternative="two.sided", alpha = 0.05){ 85 | 86 | predictors <- df[, ivs] 87 | criterion <- df[, control] 88 | 89 | # Prepare dataset 90 | ps <- rep(NA, length(ivs)) 91 | r2s <- rep(NA, length(ivs)) 92 | 93 | # Compute regressions 94 | for(i in 1:length(ivs)){ 95 | mod <- summary(stats::lm(criterion ~ predictors[,i])) 96 | ps[i] <- mod$coefficients[2, 4] 97 | r2s[i] <- mod$r.squared 98 | } 99 | 100 | # Select final p-hacked p-value based on strategy 101 | p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha) 102 | r2.final <- r2s[ps == p.final] 103 | 104 | return(list(p.final = p.final, 105 | ps = ps, 106 | r2.final = r2.final, 107 | r2s = r2s)) 108 | 109 | } 110 | 111 | #' Simulate p-Hacking with multiple independent variables 112 | #' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 113 | #' @param nobs.group Vector giving number of observations per group 114 | #' @param nvar Number of independent variables (columns) in the data frame 115 | #' @param r Desired correlation between the dependent variables (scalar) 116 | #' @param regression Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE) 117 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 118 | #' @param iter Number of simulation iterations 119 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 120 | #' @param alpha Significance level of the t-test (default: 0.05) 121 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 122 | #' @export 123 | 124 | sim.multIVhack <- function(nobs.group, nvar, r, regression=FALSE, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05, shinyEnv = FALSE){ 125 | 126 | # Simulate as many datasets as desired iterations 127 | dat <- list() 128 | for(i in 1:iter){ 129 | dat[[i]] <- .sim.multIV(nobs.group = nobs.group, nvar = nvar, r = r, regression=regression) 130 | } 131 | 132 | # Apply p-hacking procedure to each dataset 133 | .multIVhack <- ifelse(regression, .multIVhack_reg, .multIVhack_ttest) 134 | 135 | .multIVhacklist <- function(x){ 136 | .multIVhack(df = x, ivs = c(2:(nvar+1)), control = 1, 137 | strategy = strategy, alternative = alternative, alpha = alpha) 138 | } 139 | 140 | if(!shinyEnv){ 141 | res <- pbapply::pblapply(dat, .multIVhacklist) 142 | } 143 | 144 | if(shinyEnv){ 145 | percentage <- 0 146 | withProgress(message = "Running simulation", value = 0, { 147 | res = lapply(dat, function(x){ 148 | percentage <<- percentage + 1/length(dat)*100 149 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 150 | .multIVhack(df=x, ivs = c(2:(nvar+1)), control = 1, 151 | strategy = strategy, alternative = alternative, alpha = alpha) 152 | }) 153 | }) 154 | } 155 | 156 | ps.hack <- NULL 157 | ps.orig <- NULL 158 | r2s.hack <- NULL 159 | r2s.orig <- NULL 160 | ds.hack <- NULL 161 | ds.orig <- NULL 162 | 163 | for(i in 1:iter){ 164 | ps.hack[i] <- res[[i]][["p.final"]] 165 | ps.orig[i] <- res[[i]][["ps"]][1] 166 | r2s.hack[i] <- res[[i]][["r2.final"]] 167 | r2s.orig[i] <- res[[i]][["r2s"]][1] 168 | if(!regression){ 169 | ds.hack[i] <- res[[i]][["d.final"]] 170 | ds.orig[i] <- res[[i]][["ds"]][1] 171 | } 172 | } 173 | 174 | res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig) 175 | if(!regression) res <- cbind(res, ds.hack, ds.orig) 176 | 177 | return(res) 178 | 179 | } 180 | -------------------------------------------------------------------------------- /phackR/R/statAnalysis.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Exploiting statistical analysis options 3 | # ============================================================================== 4 | 5 | # Data simulation can be done with .sim.data 6 | 7 | #' P-Hacking function for exploiting different statistical analysis options 8 | #' @param df Data frame with one continuous independent variable and one continuous dependent variable 9 | #' @param group Location of the grouping variable in the data frame 10 | #' @param dv Location of the dependent variabl in the data frame 11 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 12 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 13 | #' @param alpha Significance level of the t-test 14 | #' @importFrom stats t.test wilcox.test 15 | #' @importFrom WRS2 yuen 16 | 17 | .statAnalysisHack <- function(df, group, dv, strategy = "firstsig", alternative = "two.sided", alpha = 0.05){ 18 | 19 | dftest <- cbind(df[, group], df[, dv]) 20 | colnames(dftest) <- c("group", "dv") 21 | 22 | # "Normal" t-test 23 | p.orig <- stats::t.test(dv ~ group, var.equal = TRUE, alternative = alternative, 24 | data = dftest)$p.value 25 | 26 | # Welch test 27 | p.welch <- stats::t.test(dv ~ group, var.equal = FALSE, 28 | alternative = alternative, data = dftest)$p.value 29 | 30 | # Mann-Whitney / Wilcoxon test 31 | p.wilcox <- stats::wilcox.test(dv ~ group, alternative = alternative, 32 | data = dftest)$p.value 33 | 34 | # Yuen test with different levels of trimming 35 | p.yuen <- rep(NA, 4) 36 | trim <- c(0.1, 0.15, 0.2, 0.25) 37 | for(i in 1:4) { 38 | p.yuen[i] <- WRS2::yuen(dv ~ group, tr = trim[i], 39 | data = as.data.frame(dftest))$p.value 40 | } 41 | 42 | ps <- c(p.orig, p.welch, p.wilcox, p.yuen) 43 | 44 | # Select final p-hacked p-value based on strategy 45 | p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha) 46 | 47 | return(list(p.final = p.final, 48 | ps = ps)) 49 | 50 | } 51 | 52 | #' Simulate p-Hacking for exploiting different statistical analysis options 53 | #' @description Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 54 | #' @param nobs.group Number of observations per group. Either a scalar or a vector with 2 elements. 55 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 56 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 57 | #' @param alpha Significance level of the t-test 58 | #' @param iter Number of simulation iterations 59 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 60 | #' @export 61 | 62 | sim.statAnalysisHack <- function(nobs.group, strategy = "firstsig", alternative = "two.sided", alpha = 0.05, iter = 1000, shinyEnv = FALSE){ 63 | 64 | # Simulate as many datasets as desired iterations 65 | dat <- list() 66 | for(i in 1:iter){ 67 | dat[[i]] <- .sim.data(nobs.group = nobs.group) 68 | } 69 | 70 | # Apply p-hacking procedure to each dataset 71 | 72 | .statAnalysisHackList <- function(x){ 73 | .statAnalysisHack(df = x, group = 1, dv = 2, strategy = strategy, alternative = alternative, alpha = alpha) 74 | } 75 | 76 | if(!shinyEnv){ 77 | res <- pbapply::pblapply(dat, .statAnalysisHackList) 78 | } 79 | 80 | if(shinyEnv){ 81 | percentage <- 0 82 | withProgress(message = "Running simulation", value = 0, { 83 | res = lapply(dat, function(x){ 84 | percentage <<- percentage + 1/length(dat)*100 85 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 86 | .statAnalysisHack(df = x, group = 1, dv = 2, strategy = strategy, 87 | alternative = alternative, alpha = alpha) 88 | }) 89 | }) 90 | } 91 | 92 | ps.hack <- NULL 93 | ps.orig <- NULL 94 | for(i in 1:iter){ 95 | ps.hack[i] <- res[[i]][["p.final"]] 96 | ps.orig[i] <- res[[i]][["ps"]][1] 97 | } 98 | 99 | res <- cbind(ps.hack, ps.orig) 100 | 101 | return(res) 102 | 103 | 104 | } 105 | 106 | -------------------------------------------------------------------------------- /phackR/R/subgroupAnalysis.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Subgroup Analyses 3 | # ============================================================================== 4 | 5 | #' Simulate data with subgroups 6 | #' @description Outputs data frame with multiple binary variables from which subgroups can be extracted 7 | #' @param nobs.group Vector giving number of observations per group 8 | #' @param nsubvars Integer specifying number of variables for potential subgroups 9 | 10 | .sim.subgroup <- function(nobs.group, nsubvars){ 11 | 12 | dat <- .sim.data(nobs.group) 13 | 14 | # Observations per group and total observations 15 | if(length(nobs.group) == 1) nobs.group <- rep(nobs.group, 2) 16 | nobs <- sum(nobs.group) 17 | 18 | subvars <- matrix(NA, nrow = nobs, ncol = nsubvars) 19 | for(i in 1:nsubvars){ 20 | subvars[,i] <- sample(c(0, 1), size = nobs, replace = TRUE) 21 | } 22 | 23 | res <- cbind(dat, subvars) 24 | 25 | return(res) 26 | 27 | } 28 | 29 | #' P-Hacking function for multiple subgroups analysis 30 | #' @description Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 31 | #' @param df A matrix or data frame containing all relevant data 32 | #' @param iv Integer specifying the location of the binary independent variable in the data frame 33 | #' @param dv Integer specifying the location of the dependent variable in the data frame 34 | #' @param subvars Vector specifying the location of the subgroup variables in the data frame 35 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 36 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 37 | #' @param alpha Significance level of the t-test 38 | #' @importFrom dplyr group_by_at do 39 | #' @importFrom stats t.test 40 | #' @importFrom dplyr "%>%" 41 | #' @importFrom rlang .data 42 | 43 | .subgroupHack <- function(df, iv, dv, subvars, alternative = "two.sided", strategy = "firstsig", alpha = 0.05){ 44 | 45 | # Prepare data frame 46 | ttest.df <- cbind(df[,iv], df[,dv]) 47 | subvars.df <- cbind(df[, subvars]) 48 | dfnew <- as.data.frame(cbind(ttest.df, subvars.df)) 49 | 50 | # Compute p-values, R^2, Cohen's d 51 | 52 | # Not p-hacked 53 | mod.orig <- stats::t.test(ttest.df[,2] ~ ttest.df[,1], var.equal = TRUE, alternative = alternative) 54 | p.orig <- mod.orig$p.value 55 | r2.orig <- .compR2t(ttest.df[ttest.df[,1] == unique(ttest.df[,1])[1],2], 56 | ttest.df[ttest.df[,1] == unique(ttest.df[,1])[2],2]) 57 | d.orig <- .compCohensD(unname(mod.orig$statistic), nrow(ttest.df)/2) 58 | 59 | 60 | # p-hacked 61 | ps <- list() 62 | ds <- list() 63 | r2s <- list() 64 | 65 | for(i in 1:length(subvars)){ 66 | 67 | tmp <- dplyr::group_by_at(dfnew, subvars[i]) %>% 68 | dplyr::do(as.data.frame(stats::t.test(.data$V2 ~ .data$V1, var.equal = TRUE, alternative = alternative)[c("p.value", "statistic")])) 69 | tmp2 <- dplyr::group_by_at(dfnew, subvars[i]) %>% 70 | dplyr::do(as.data.frame(table(.data$V1))) 71 | tmp3 <- dplyr::group_by_at(dfnew, subvars[i]) %>% do(as.data.frame(.compR2t(.data$V2[.data$V1 == unique(.data$V1)[1]], .data$V2[.data$V1 == unique(.data$V1)[2]]))) 72 | 73 | ps[[i]] <- tmp[[2]] 74 | ds[[i]] <- c(tmp[[3]][1]*sqrt(sum(1/tmp2[[3]][1:2])), tmp[[3]][2]*sqrt(sum(1/tmp2[[3]][3:4]))) 75 | r2s[[i]] <- tmp3[[2]] 76 | 77 | } 78 | 79 | ps <- c(p.orig, unlist(ps)) 80 | r2s <- c(r2.orig, unlist(r2s)) 81 | ds <- c(d.orig, unlist(ds)) 82 | 83 | # Select final p-hacked p-value based on strategy 84 | p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha) 85 | r2.final <- unique(r2s[ps == p.final]) 86 | d.final <- unique(ds[ps == p.final]) 87 | 88 | return(list(p.final = p.final, 89 | ps = ps, 90 | r2.final = r2.final, 91 | r2s = r2s, 92 | d.final = d.final, 93 | ds = ds)) 94 | 95 | } 96 | 97 | #' Simulate p-hacking with multiple subgroups 98 | #' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 99 | #' @param nobs.group Vector giving number of observations per group 100 | #' @param nsubvars Integer specifying number of variables for potential subgroups 101 | #' @param alternative Direction of the t-test ("two.sided", "less", "greater") 102 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 103 | #' @param alpha Significance level of the t-test 104 | #' @param iter Number of simulation iterations 105 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 106 | #' @export 107 | 108 | sim.subgroupHack <- function(nobs.group, nsubvars, alternative = "two.sided", strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){ 109 | 110 | # Simulate as many datasets as desired iterations 111 | dat <- list() 112 | for(i in 1:iter){ 113 | dat[[i]] <- .sim.subgroup(nobs.group = nobs.group, nsubvars = nsubvars) 114 | } 115 | 116 | # Apply p-hacking procedure to each dataset 117 | .subgroupHackList <- function(x){ 118 | .subgroupHack(df = x, iv = 1, dv = 2, subvars = c(3:(2+nsubvars)), 119 | alternative = alternative, strategy = strategy, alpha = alpha) 120 | } 121 | 122 | if(!shinyEnv){ 123 | res <- pbapply::pblapply(dat, .subgroupHackList) 124 | } 125 | 126 | if(shinyEnv){ 127 | percentage <- 0 128 | withProgress(message = "Running simulation", value = 0, { 129 | res = lapply(dat, function(x){ 130 | percentage <<- percentage + 1/length(dat)*100 131 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 132 | .subgroupHack(df = x, iv = 1, dv = 2, subvars = c(3:(2+nsubvars)), 133 | alternative = alternative, strategy = strategy, alpha = alpha) 134 | }) 135 | }) 136 | } 137 | 138 | ps.hack <- NULL 139 | ps.orig <- NULL 140 | r2s.hack <- NULL 141 | r2s.orig <- NULL 142 | ds.hack <- NULL 143 | ds.orig <- NULL 144 | 145 | for(i in 1:iter){ 146 | ps.hack[i] <- res[[i]][["p.final"]] 147 | ps.orig[i] <- res[[i]][["ps"]][1] 148 | r2s.hack[i] <- res[[i]][["r2.final"]] 149 | r2s.orig[i] <- res[[i]][["r2s"]][1] 150 | ds.hack[i] <- res[[i]][["d.final"]] 151 | ds.orig[i] <- res[[i]][["ds"]][1] 152 | } 153 | 154 | res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig, ds.hack, ds.orig) 155 | 156 | return(res) 157 | 158 | } 159 | 160 | -------------------------------------------------------------------------------- /phackR/R/variableTransformation.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # p-Hacking through Variable Transformation 3 | # ============================================================================== 4 | 5 | # Simulation function: Data can be simulated with .sim.multcor where r = 0 6 | 7 | #' P-Hacking function variable transformation in univariate linear regression 8 | #' @description Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 9 | #' @param df Data frame containing x and y variables as columns 10 | #' @param x Location of x variable (predictor) in the data frame 11 | #' @param y Location of y variable (criterion) in the data frame 12 | #' @param transvar Which variables should be transformed? Either "x" (for x variable), "y" (for y variable), or "xy" (for both) 13 | #' @param testnorm Should variables only be transformed after a significant test for normality of residuals? 14 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 15 | #' @param alpha Significance level of the t-test (default: 0.05) 16 | 17 | .varTransHack <- function(df, x, y, transvar, testnorm = FALSE, strategy = "firstsig", alpha = 0.05){ 18 | 19 | x <- df[, x] 20 | y <- df[, y] 21 | 22 | # Test normality of residuals first 23 | normality <- FALSE 24 | if(testnorm){ 25 | mod <- stats::lm(y ~ x) 26 | normality <- stats::shapiro.test(stats::residuals(mod))$p.value > alpha 27 | } 28 | 29 | # Transform all variables that should be transformed 30 | 31 | Xtrans <- matrix(NA, nrow = nrow(df)) 32 | Xtrans[,1] <- x 33 | Ytrans <- matrix(NA, nrow = nrow(df)) 34 | Ytrans[,1] <- y 35 | 36 | if(transvar != "y" && normality == FALSE){ 37 | Xtrans <- cbind(Xtrans, 38 | log(x+abs(min(x))+1e-10), # log transformation 39 | sqrt(x+abs(min(x))+1e-10), # square root transformation 40 | 1/x # inverse 41 | ) 42 | } 43 | 44 | 45 | if(transvar != "x" && normality == FALSE){ 46 | Ytrans <- cbind(Ytrans, 47 | log(y+abs(min(y))+1e-10), # log transformation 48 | sqrt(y+abs(min(y))+1e-10), # square root transformation 49 | 1/y # inverse 50 | ) 51 | } 52 | 53 | # Calculate p-values for all transformed variables 54 | 55 | ps <- matrix(NA, nrow = dim(Xtrans)[2], ncol = dim(Ytrans)[2]) 56 | r2s <- matrix(NA, nrow = dim(Xtrans)[2], ncol = dim(Ytrans)[2]) 57 | 58 | for(i in 1:ncol(Xtrans)){ 59 | for(j in 1:ncol(Ytrans)){ 60 | mod <- summary(stats::lm(Ytrans[,j] ~ Xtrans[,i])) 61 | ps[i,j] <- mod$coefficients[2, 4] 62 | r2s[i,j] <- mod$r.squared 63 | } 64 | } 65 | 66 | ps <- as.vector(ps) 67 | r2s <- as.vector(r2s) 68 | 69 | # Select final p-hacked p-value based on strategy 70 | p.final <- .selectpvalue(ps = ps, strategy = strategy, alpha = alpha) 71 | r2.final <- unique(r2s[ps == p.final]) 72 | 73 | return(list(p.final = p.final, 74 | ps = ps, 75 | r2.final = r2.final, 76 | r2s = r2s)) 77 | 78 | } 79 | 80 | #' Simulate p-hacking with variable transformations 81 | #' Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 82 | #' @param nobs Integer giving number of observations 83 | #' @param transvar Which variables should be transformed? Either "x" (for x variable), "y" (for y variable), or "xy" (for both) 84 | #' @param testnorm Should variables only be transformed after a significant test for normality of residuals? 85 | #' @param strategy String value: One out of "firstsig", "smallest", "smallest.sig" 86 | #' @param alpha Significance level of the t-test (default: 0.05) 87 | #' @param iter Number of simulation iterations 88 | #' @param shinyEnv Is the function run in a Shiny session? TRUE/FALSE 89 | #' @export 90 | 91 | sim.varTransHack <- function(nobs, transvar, testnorm = FALSE, strategy = "firstsig", alpha = 0.05, iter = 1000, shinyEnv = FALSE){ 92 | 93 | # Simulate as many datasets as desired iterations 94 | dat <- list() 95 | for(i in 1:iter){ 96 | dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0) 97 | } 98 | 99 | # Apply p-hacking procedure to each dataset 100 | .varTransHackList <- function(arg){ 101 | .varTransHack(df = arg, x = 1, y = 2, testnorm = testnorm, transvar = transvar, 102 | strategy = strategy, alpha = alpha) 103 | } 104 | 105 | if(!shinyEnv){ 106 | res <- pbapply::pblapply(dat, .varTransHackList) 107 | } 108 | 109 | if(shinyEnv){ 110 | percentage <- 0 111 | withProgress(message = "Running simulation", value = 0, { 112 | res = lapply(dat, function(x){ 113 | percentage <<- percentage + 1/length(dat)*100 114 | incProgress(1/length(dat), detail = paste0("Progress: ",round(percentage,2), "%")) 115 | .varTransHack(df = x, x = 1, y = 2, transvar = transvar, 116 | strategy = strategy, alpha = alpha) 117 | }) 118 | }) 119 | } 120 | 121 | ps.hack <- NULL 122 | ps.orig <- NULL 123 | r2s.hack <- NULL 124 | r2s.orig <- NULL 125 | 126 | for(i in 1:iter){ 127 | ps.hack[i] <- res[[i]][["p.final"]] 128 | ps.orig[i] <- res[[i]][["ps"]][1] 129 | r2s.hack[i] <- res[[i]][["r2.final"]] 130 | r2s.orig[i] <- res[[i]][["r2s"]][1] 131 | } 132 | 133 | res <- cbind(ps.hack, ps.orig, r2s.hack, r2s.orig) 134 | 135 | return(res) 136 | 137 | } 138 | -------------------------------------------------------------------------------- /phackR/doc/phackR_vignette.R: -------------------------------------------------------------------------------- 1 | ## ---- include = FALSE--------------------------------------------------------- 2 | knitr::opts_chunk$set( 3 | collapse = TRUE, 4 | comment = "#>" 5 | ) 6 | 7 | ## ----setup, message=FALSE----------------------------------------------------- 8 | library(phackR) 9 | 10 | ## ----selectiveReportingDV----------------------------------------------------- 11 | set.seed(1234) 12 | sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.3, strategy = "smallest", 13 | iter = 10, alternative = "two.sided", alpha = 0.05) 14 | 15 | ## ----selectiveReportingIV----------------------------------------------------- 16 | set.seed(1234) 17 | sim.multIVhack(nobs.group = 30, nvar = 5, r = 0.3, strategy = "smallest", 18 | regression = FALSE, iter = 10, alternative = "two.sided", 19 | alpha = 0.05) 20 | 21 | ## ----incorrectRounding-------------------------------------------------------- 22 | set.seed(1234) 23 | sim.roundhack(roundinglevel = 0.06, iter = 10, alternative = "two.sided", 24 | alpha = 0.05) 25 | 26 | ## ----optionalStopping--------------------------------------------------------- 27 | set.seed(1234) 28 | sim.optstop(n.min = 10, n.max = 20, step = 2, alternative = "two.sided", 29 | iter = 10, alpha = 0.05) 30 | 31 | ## ----outlierExclusion--------------------------------------------------------- 32 | set.seed(1234) 33 | sim.outHack(nobs = 30, which = "random", strategy = "smallest", alpha = 0.05, 34 | iter = 10) 35 | 36 | ## ----exploitCovariates-------------------------------------------------------- 37 | set.seed(1234) 38 | sim.covhack(nobs.group = 30, ncov = 4, rcov = 0.3, rcovdv = 0.5, 39 | interactions = FALSE, strategy = "smallest", 40 | alpha = 0.05, iter = 10) 41 | 42 | ## ----subgroupAnalysis--------------------------------------------------------- 43 | set.seed(1234) 44 | sim.subgroupHack(nobs.group = 30, nsubvars = 3, alternative = "two.sided", 45 | strategy = "smallest", alpha = 0.05, iter = 10) 46 | 47 | ## ----compositeScores---------------------------------------------------------- 48 | set.seed(1234) 49 | sim.compscoreHack(nobs = 30, ncompv = 5, rcomp = 0.7, ndelete = 3, 50 | strategy = "smallest", alpha = 0.05, iter = 10) 51 | 52 | ## ----variableTransformation--------------------------------------------------- 53 | set.seed(1234) 54 | sim.varTransHack(nobs = 30, transvar = "xy", strategy = "smallest", 55 | alpha = 0.05, iter = 10) 56 | 57 | ## ----exploitCutoffs----------------------------------------------------------- 58 | set.seed(1234) 59 | sim.cutoffHack(nobs = 30, strategy = "smallest", alpha = 0.05, iter = 10) 60 | 61 | ## ----statAnalysis------------------------------------------------------------- 62 | set.seed(1234) 63 | sim.statAnalysisHack(nobs.group = 30, strategy = "smallest", 64 | alternative = "two.sided", alpha = 0.05, iter = 10) 65 | 66 | ## ----favorableImputation------------------------------------------------------ 67 | set.seed(1234) 68 | sim.impHack(nobs = 30, missing = 0.2, which = c(1:10), strategy = "smallest", 69 | alpha = 0.05, iter = 10) 70 | 71 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/data/startplots.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astefan1/phacking_compendium/b663bf8701c3af19159e1d01d5986b2db580901b/phackR/inst/shiny-phack/ShinyPHack/data/startplots.rds -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/01_CompScores.md: -------------------------------------------------------------------------------- 1 | ### Scale Redefinition 2 | 3 | The *scale redefinition* strategy assumes that one of the variables in the hypothesis test in question is a composite score (e.g., the mean of items in a personality inventory), and that a researcher manipulates which items are included in the composite score to obtain a significant result. 4 | 5 | Here, we assume that the focal hypothesis test is a univariate linear regression, and that items are excluded based on the reliability coefficient Cronbach's α in an iterative fashion. The underlying idea is to delete the item that contributes least to a reliable score, i.e., the item leading to the highest Cronbach's α when deleted. After a candidate item for deletion has been found, the regression is recomputed with (1) the reduced score as a predictor, (2) the deleted item as a predictor, and (3) the score of all deleted items as a predictor, and the p-values are recorded. 6 | 7 | The simulation function in this Shiny app allows the specification of the total number of items in the score, as well as their correlation. Users can also specify the maximum number of items deleted from the score. Naturally, this number should be smaller than the total number of items. Other options users can specify are the number of observations, the p-value selection method, the significance level α, and the number of simulation iterations. 8 | 9 | 10 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/02_ExploitCovariates.md: -------------------------------------------------------------------------------- 1 | ### Controlling for Covariates 2 | 3 | This p-hacking strategy exploits the common practice of controlling for covariates in statistical analyses. Here, we assume that a researcher is interested in an independent samples t-test. If this test does not yield a significant result, the researcher introduces a number of continuous covariates into the analysis (which will then be computed as an ANCOVA). We assume that all covariates are first entered into the analysis individually, and if this does not yield a significant result, they are added sequentially as y ~ x + cov1, y ~ x + cov1 + cov2, ... (in decreasing order of correlation with the dependent variable). 4 | 5 | The simulation function in this Shiny app allows the specification of the number of covariates, as well as their correlation. Users can also specify whether the ANCOVA models should include interaction terms. Note that the inclusion of interaction terms will slow down the computation considerably. Other options users can specify are the number of observations per group, the p-value selection method, the significance level α, and the number of simulation iterations. 6 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/03_ExploitCutoffs.md: -------------------------------------------------------------------------------- 1 | ### Discretizing variables 2 | 3 | This p-hacking strategy is based on splitting a continuous variable into categories with regard to two or more arbitrary cutoff values. Here, we assume that at the start a researcher plans to conduct a univariate linear regression. If this analysis does not yield a significant result, the researcher discretizes the independent variable and compares the means of the resulting groups in the dependent variable. We simulate three approaches: (1) Compare high-scorers and low-scorers based on a median split; (2) conduct a three-way split of the independent variable and compare the two extreme groups; (3) conduct a three-way split of the independent variables and compare all three groups using an ANOVA. 4 | 5 | The simulation function in this Shiny app allows the specification of the sample size, as well as of the p-value selection method, the significance level α, and the number of iterations in the simulation. 6 | 7 | 8 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/04_FavorableImputation.md: -------------------------------------------------------------------------------- 1 | ### Favorable Imputation of Missing Values 2 | 3 | This p-hacking strategy assumes that the original dataset a researcher is confronted with contains missing values. A researcher engaging in p-hacking can now try out different imputation methods to replace the missing values, until (possibly) a significant result is obtained. Here, we simulate this p-hacking strategy based on a univariate linear regression, because many imputation methods assume a regression context. 4 | 5 | The simulation function in this Shiny app allows the specification of the total number of observations (observations with missing values are included in this number), the percentage of missing values, and the imputation methods that are used. The percentage of missing values defined is the same for the predictor and the outcome variable (e.g., if the percentage is set to 10%, there will be ten percent missing values in both the predictor and the outcome variable). Additionally, users can specify the p-value selection method, the significance level α, and the number of simulation iterations. 6 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/05_IncorrectRounding.md: -------------------------------------------------------------------------------- 1 | ### Incorrect Rounding 2 | 3 | This p-hacking strategy is not based on tinkering with the data or the analyses, but on misreporting the analysis outcome. Usually, the result of a hypothesis test is significant if p ≤ α. However, as has been shown (e.g., Hartgerink, van Aert, van Nuijten, Wicherts, & van Assen, 2016), sometimes p-values that are slightly larger than the significance level are reported as significant, that is, p-values are incorrectly rounded down to p = α. 4 | 5 | In the simulation function in this Shiny app, the user can specify the margin in which p-values should be rounded down, as well as the significance level. For example, if the significance level is specified as α = 0.05, and the margin is specified as 0.001, then all p-values below 0.05+0.001=0.051 will be reported as significant and rounded down to p = 0.05. Additionally, users can specify the direction of the test, and the number of simulation iterations. 6 | 7 | Note that type I error rates of this p-hacking strategy can also be determined analytically. The theoretical α-level after p-hacking is equivalent to the sum of the original alpha level and the rounding margin. 8 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/06_OptionalStopping.md: -------------------------------------------------------------------------------- 1 | ### Optional Stopping 2 | 3 | Researchers engaging in optional stopping repeatedly inspect the results of the statistical tests during data collection. They stop data collection as soon as a significant result has been obtained or a maximum sample size is reached. Here, we assume that the underlying statistical test is an independent-samples t-test. 4 | 5 | In the simulation function provided in this Shiny app, the user can specify the minimum sample size (per group), the maximum sample size (per group), and the number of observations that are collected at each step of the sampling process (*step size*). For example, if the minimum sample size is specified to be 10, the maximum sample size 30, and the step size 5, then interim analyses will be conducted at N = 10, N = 15, N = 20, N = 25, and N = 30. Additionally, users can define the direction of the hypothesis test, the significance level α, and the number of simulation iterations. 6 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/07_OutlierExclusion.md: -------------------------------------------------------------------------------- 1 | ### Outlier Exclusion 2 | 3 | In this p-hacking strategy, a researcher applies different outlier exclusion criteria to their data with the goal of obtaining a significant result in a focal hypothesis test. Here, we assume that the hypothesis test in question is a univariate linear regression. Further, we assume that the researcher first checks for potential outliers in the predictor variable (x) and in the outcome variable (y), and then reruns the analysis (1) without the xy pairs where x is an outlier, (2) without the xy pairs where y is an outlier, (3) without the xy pairs where x *and* y are outliers. We assume that this is done for each outlier exclusion method. 4 | 5 | In the simulation function provided in this Shiny app, users can define the outlier exclusion methods that are applied, as well as the sample size, the p-value selection method, the significance level α, and the number of simulation iterations. 6 | 7 | 8 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/09_SelectiveReportingDV.md: -------------------------------------------------------------------------------- 1 | ### Selective Reporting of the Dependent Variable 2 | 3 | This p-hacking strategy assumes that the dataset contains multiple candidate dependent variables. For example, in a clinical trial, the treatment and control group could be compared on different outcome variables, such as mental and physical well-being. A researcher engaging in p-hacking would conduct one hypothesis test for each dependent variable, and selectively report the significant results. Here, we assume that the hypothesis test in question is an independent-samples t-test. 4 | 5 | The simulation function in this Shiny app allows the specification of the number of dependent variables as well as their correlation. Additionally, users can define the number of observations per group, the direction of the test, the p-value selection method, the significance level α, and the number of simulation iterations. 6 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/10_SelectiveReportingIV.md: -------------------------------------------------------------------------------- 1 | ### Selective Reporting of the Independent Variable 2 | 3 | This p-hacking strategy assumes that an experiment or clinical trial contains multiple experimental groups and one control group. A researcher engaging in p-hacking statistically compares all experimental groups to the control group, and only report the significant results. Here, we assume that all conducted hypothesis tests are t-tests. 4 | 5 | The simulation function in this Shiny app allows the specification of the number of experimental groups (independent variables), and their correlation. Additionally, users can set the number of observations per group, the direction of the test, the p-value selection method, the significance level α, and the number of simulation iterations. 6 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/11_StatAnalysis.md: -------------------------------------------------------------------------------- 1 | ### Exploiting Alternative Hypothesis Tests 2 | 3 | Often, different statistical analysis techniques can be used to answer the same research question. This p-hacking strategy assumes that a researcher tries out different statistical analysis options and decides for the one yielding a significant result. Here, we assume that the hypothesis tests in question are an independent-samples t-test, a Welch test, a Wilcoxon test, and a Yuen test (with different levels of trimming). 4 | 5 | The simulation function in this Shiny app allows users to specify the number of observations per group, the direction of the test, the p-value selection method, the significance level α, and the number of simulation iterations. 6 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/12_SubgroupAnalysis.md: -------------------------------------------------------------------------------- 1 | ### Subgroup Analyses 2 | 3 | This p-hacking strategy assumes that if an initial hypothesis test does not yield a significant result, a researcher would repeat the same hypothesis test on subgroups of the sample (e.g., right-handed and left-handed participants). Here, we assume that all subgroup variables have two levels, and that the hypothesis test is conducted on each level of the subgroup variables. Additionally, we assume that the hypothesis test in question is a t-test (e.g., between an experimental and a control condition). Note that we do not assume that the experimental and control condition are balanced within the subgroups. Therefore, within a subgroup, the number of participants in the experimental and control group can differ. 4 | 5 | In the simulation function in this Shiny app, users can specify the number of observations per group in the original t-test, the number of subgroup variables, the direction of the test, the p-value selection method, the significance level α, and the number of simulation iterations. 6 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/13_VariableTransformation.md: -------------------------------------------------------------------------------- 1 | ### Variable Transformation 2 | 3 | This p-hacking strategy assumes that if an initial hypothesis test does not yield significant results, a researcher would apply transformations to the variables involved in the test. Here, we assume that the test in question is a univariate linear regression, and that the transformations are a natural log transformation (ln(x)), a square root transformation (√x), and an inverse transformation (1/x). Transformations can be applied to the predictor variable, to the outcome variable, or both. 4 | 5 | In the simulation function in this Shiny app, users can specify which of the variables should be transformed. Additionally, they can specify the number of observations, the p-value selection method, the significance level α, and the number of simulation iterations. 6 | -------------------------------------------------------------------------------- /phackR/inst/shiny-phack/ShinyPHack/mddoc/landingPage.md: -------------------------------------------------------------------------------- 1 | # Gazing into the Abyss of p-Hacking: A Shiny App for p-Hacking Simulation 2 | 3 | ## What is p-Hacking? 4 | The p-value is a core component of null hypothesis significance testing (NHST), a statistical framework that has found ubiquitous use across many scientific disciplines. A p-value is defined as the probability to obtain a result at least as extreme as the observed one if the null hypothesis is true (i.e., if there is no effect). If the p-value is smaller than a certain threshold called alpha level, then the test result is labeled "significant" and the null hypothesis is rejected. Researchers who are interested in showing an effect in their data (e.g., that a new medicine improved the health of patients) are therefore eager to obtain small p-values that allow them to reject the null hypothesis and claim the existence of an effect. 5 | 6 | In recent years, failed attempts to replicate experiments have instigated investigations into how researchers use NHST in practice. Studies found that many researchers apply questionable research practices to render previously non-significant results significant. We summarize these practices under the term of *p-hacking*. 7 | 8 | ## How Does p-Hacking Work? 9 | All p-hacking strategies are based on the principle of alpha error accumulation. Basically, alpha error accumulation means that more and more hypothesis tests are conducted, the probability of making at least one false decision increases. Therefore, even if there is no effect in the population, the probability is very high that at least one hypothesis test will (erroneously) show a significant result, if a sufficiently large number of tests are conducted. Researchers then report this significant result, and claim to have found an effect. 10 | 11 | ## Obvious Warning: Thou Shalt Not p-Hack! 12 | Given the explanation above, it almost seems needless to say that p-hacking is detrimental and you should not do it. P-hacking slows down scientific progress by increasing the amount of false positive results in the literature. Additionally, p-hacking leads to an inflation of effect sizes that are published in the literature because only "extreme" results are reported. This means that p-hacking increases the number of cases where research wrongly claims an effect, and even if an effect exists, the reported effect size is likely to be larger than the true effect size. 13 | 14 | Sounds bad? It actually is. What makes it even worse is that it is difficult to discover p-hacking in the literature. How can we tell whether a reported effect is real or p-hacked? How can we tell that a p-hacked significant result (i.e., a significant finding that a researcher found after running many hypothesis tests) is not actually a true effect that was discovered? The truth is, for a single finding, it is impossible to know. However, if we know what p-hacking strategies researchers employ, it is possible to predict what distributions of p-values and effect sizes will look like, and how the rate of false positive results will be changed compared to a situation without p-hacking. The purpose of this app is to showcase these scenarios using simulated data. 15 | 16 | ## A Compendium of p-Hacking Strategies 17 | In the literature, p-hacking has typically been described as being comprised of different strategies that researchers can use to tinker with their statistical results to achieve statistical significance. In order to learn more about the effects of p-hacking, it is important to understand all strategies and their effects on the reported scientific results. However, a comprehensive description of these strategies has been missing so far. 18 | 19 | Here, we provide an overview of different p-hacking strategies that have been mentioned in the literature, together with a Shiny app that lets users explore the effects of p-hacking on the distribution of hypothesis testing results. 20 | 21 | ## Exploring the Effects of P-Hacking 22 | Each tab of this Shiny app lets the user explore the effects of a different p-hacking strategy. All tabs have the same structure: First, we describe the p-hacking strategy, and how we applied it in our simulations. Below, we present simulation results, specifically the distribution of p-values, the distribution of effect sizes (if applicable), and the rate of false positive results. On a panel on the right side, the user can adjust the settings of the simulation, including the severity of the p-hacking. 23 | 24 | ### Common Settings 25 | Several settings are common to the simulation of (almost) all p-hacking strategies. To avoid unnecessary repetition, we will describe these settings here. 26 | 27 | #### p-Value selection method 28 | In all simulation functions, it is necessary to specify how the final p-value is determined. There are three options: *first significant* simulates a situation where the researcher conducts a series of hypothesis tests, and stops as soon as the result is significant, that is, at the first significant p-value. In a comment on Simonsohn et al. (2014), Ulrich and Miller (2015) argued that researchers might instead engage in "ambitious" p-hacking, where the researcher conducts a series of hypothesis tests and selects the smallest significant p-value from the set. This strategy is implemented in the *smallest significant* option. Simonsohn (private comm.) argues that there might exist a third p-hacking strategy where the researcher tries a number of different analysis options, and selects the smallest p-value, no matter if it is significant or not. This strategy is implemented in the option *smallest*. The default strategy is *first significant*. 29 | 30 | #### True effect size 31 | The true effect size in all simulations is equal to zero. 32 | 33 | #### Significance level 34 | The significance level α determines the significance level for each hypothesis test. For example, if the significance level is set to α = 0.05 (the default), the simulation assumes that a researcher would call the result of a hypothesis test significant if p < 0.05. 35 | 36 | #### Iterations 37 | The *iterations* option determines the number of iterations in the simulation. The default setting is 1000. 38 | 39 | #### Alternative 40 | Whenever the simulations are based on t-tests, the option *alternative* can be specified. This option relates to the sidedness of the alternative hypothesis in the t-test. It can either be *two-sided* or *greater*. The default setting is *two-sided*. 41 | 42 | #### Number of observations 43 | The number of observations determines the sample size in the test. In the case of a t-test, the specified number refers to the observations *per group*. In the case of a linear regression, the specified number refers to the overall sample size. 44 | 45 | #### Start simulation 46 | A new simulation will be started when you click the *Start simulation* button on the bottom of the options panel in each tab. The progress of the simulation will be displayed in a small progress bar in the bottom right corner of the screen. 47 | 48 | ## Resources 49 | The code for this Shiny app as well as for the simulations can be found on [https://github.com/nicebread/phacking_compendium](https://github.com/nicebread/phacking_compendium). 50 | 51 | ## About 52 | This Shiny app and the underlying R-package were created by Angelika Stefan and Felix Schönbrodt. If you have questions or feature requests, submit a GitHub issue on [https://github.com/nicebread/phacking_compendium](https://github.com/nicebread/phacking_compendium) or write an e-mail to a.m.stefan[at]uva.nl. 53 | -------------------------------------------------------------------------------- /phackR/inst/sim_startplots_Shiny.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # Shiny App: Simulation for Plots at Start 3 | # ============================================================================== 4 | library(phackR) 5 | startplots <- list() 6 | 7 | # 1: Composite Scores 8 | 9 | res1 <- sim.compscoreHack(nobs=30, ncompv=5, rcomp=0.8, ndelete=2, strategy = "firstsig", alpha = 0.05, iter = 1000) 10 | startplots$compscorePlot <- phackR:::pplots(simdat=res1, alpha=0.05) 11 | startplots$compscorePlotES <- phackR:::esplots(simdat=res1, EScolumn.hack=3, EScolumn.orig=4) 12 | startplots$compscore.fprate.p <- paste0(round(sum(res1[,"ps.hack"] < 0.05)/1000*100, 2), " %") 13 | startplots$compscore.fprate.o <- paste0(round(sum(res1[,"ps.orig"] < 0.05)/1000*100, 2), " %") 14 | startplots$res1 <- res1 15 | 16 | # 2: Exploit Covariates 17 | 18 | res2 <- sim.covhack(nobs.group = 30, ncov = 3, rcov = 0.3, rcovdv = 0.5, interactions = FALSE, strategy = "firstsig", alpha = 0.05, iter = 1000) 19 | startplots$expCovPlot <- phackR:::pplots(simdat=res2, alpha=0.05) 20 | startplots$expCovES <- phackR:::esplots(simdat=res2, EScolumn.hack=3, EScolumn.orig=4, titles = c(expression("Distribution of p-hacked effect sizes "*eta^2), 21 | expression("Distribution of original effect sizes "*eta^2))) 22 | startplots$expcov.fprate.p <- paste0(round(sum(res2[,"ps.hack"] < 0.05)/1000*100, 2), " %") 23 | startplots$expcov.fprate.o <- paste0(round(sum(res2[,"ps.orig"] < 0.05)/1000*100, 2), " %") 24 | startplots$res2 <- res2 25 | 26 | # 3: Exploit Cutoffs 27 | 28 | res3 <- sim.cutoffHack(nobs = 30, strategy = "firstsig", alpha = 0.05, iter = 1000) 29 | startplots$expCutPlot <- phackR:::pplots(simdat=res3, alpha=0.05) 30 | startplots$expCutES <- phackR:::esplots(simdat=res3, EScolumn.hack=3, EScolumn.orig=4) 31 | startplots$expcut.fprate.p <- paste0(round(sum(res3[,"ps.hack"] < 0.05)/1000*100, 2), " %") 32 | startplots$expcut.fprate.o <- paste0(round(sum(res3[,"ps.orig"] < 0.05)/1000*100, 2), " %") 33 | startplots$res3 <- res3 34 | 35 | # 4: Favorable Imputation 36 | res4 <- sim.impHack(nobs = 30, missing = 0.1, which = c(1:3), strategy = "firstsig", alpha = 0.05, iter = 1000) 37 | startplots$favImpPlot <- phackR:::pplots(simdat=res4, alpha=0.05) 38 | startplots$favImpES <- phackR:::esplots(simdat=res4, EScolumn.hack=3, EScolumn.orig=4) 39 | startplots$favimp.fprate.p <- paste0(round(sum(res4[,"ps.hack"] < 0.05)/1000*100, 2), " %") 40 | startplots$favimp.fprate.o <- paste0(round(sum(res4[,"ps.orig"] < 0.05)/1000*100, 2), " %") 41 | startplots$res4 <- res4 42 | 43 | # 5: Incorrect Rounding 44 | res5 <- sim.roundhack(roundinglevel = 0.051, iter = 1000, alternative = "two.sided", alpha = 0.05) 45 | startplots$roundingPlot <- phackR:::pplots(simdat=res5, alpha=0.05) 46 | startplots$roundingES <- phackR:::esplots(simdat=res5, EScolumn.hack=3, EScolumn.orig=4) 47 | startplots$rounding.fprate.p <- paste0(sum(round(res5[,"ps.hack"] <= 0.05)/1000*100, 2), " %") 48 | startplots$rounding.fprate.o <- paste0(sum(round(res5[,"ps.orig"] <= 0.05)/1000*100, 2), " %") 49 | startplots$res5 <- res5 50 | 51 | # 6: Optional Stopping 52 | res6 <- sim.optstop(n.min = 10, n.max = 100, step = 1, alternative = "two.sided", iter = 1000, alpha = 0.05) 53 | startplots$optstopPlot <- phackR:::pplots(simdat = res6, alpha = 0.05) 54 | startplots$optstopESr2 <- phackR:::esplots(simdat=res6, EScolumn.hack=3, EScolumn.orig=4) 55 | startplots$optstopESd <- phackR:::esplots(simdat=res6, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta), 56 | expression("Distribution of original effect sizes "*delta))) 57 | startplots$optstop.fprate.p <- paste0(round(sum(res6[,"ps.hack"] <= 0.05)/1000*100, 2), " %") 58 | startplots$optstop.fprate.o <- paste0(round(sum(res6[,"ps.orig"] <= 0.05)/1000*100, 2), " %") 59 | startplots$res6 <- res6 60 | 61 | # 7: Outlier Exclusion 62 | res7 <- sim.outHack(nobs = 30, which = c(1:2), strategy = "firstsig", alpha = 0.05, iter = 1000) 63 | startplots$outExclPlot <- phackR:::pplots(simdat = res7, alpha = 0.05) 64 | startplots$outExclES <- phackR:::esplots(simdat = res7, EScolumn.hack = 3, EScolumn.orig = 4) 65 | startplots$outExcl.fprate.p <- paste0(round(sum(res7[,"ps.hack"] <= 0.05)/1000*100, 2), " %") 66 | startplots$outExcl.fprate.o <- paste0(round(sum(res7[,"ps.orig"] <= 0.05)/1000*100, 2), " %") 67 | startplots$res7 <- res7 68 | 69 | # 9: Selective Reporting DV 70 | res9 <- sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.5, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05) 71 | startplots$SRDVPlot <- phackR:::pplots(simdat = res9, alpha = 0.05) 72 | startplots$SRDVESr2 <- phackR:::esplots(simdat=res9, EScolumn.hack=3, EScolumn.orig=4) 73 | startplots$SRDVESd <- phackR:::esplots(simdat=res9, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta), 74 | expression("Distribution of original effect sizes "*delta))) 75 | startplots$SRDV.fprate.p <- paste0(round(sum(res9[,"ps.hack"] <= 0.05)/1000*100, 2), " %") 76 | startplots$SRDV.fprate.o <- paste0(round(sum(res9[,"ps.orig"] <= 0.05)/1000*100, 2), " %") 77 | startplots$res9 <- res9 78 | 79 | # 10: Selective Reporting IV 80 | res10 <- sim.multDVhack(nobs.group = 30, nvar = 5, r = 0.5, strategy = "firstsig", iter = 1000, alternative = "two.sided", alpha = 0.05) 81 | startplots$SRIVPlot <- phackR:::pplots(simdat = res10, alpha = 0.05) 82 | startplots$SRIVESr2 <- phackR:::esplots(simdat=res10, EScolumn.hack=3, EScolumn.orig=4) 83 | startplots$SRIVESd <- phackR:::esplots(simdat=res10, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta), 84 | expression("Distribution of original effect sizes "*delta))) 85 | startplots$SRIV.fprate.p <- paste0(round(sum(res10[,"ps.hack"] <= 0.05)/1000*100, 2), " %") 86 | startplots$SRIV.fprate.o <- paste0(round(sum(res10[,"ps.orig"] <= 0.05)/1000*100, 2), " %") 87 | startplots$res10 <- res10 88 | 89 | # 11: Statistical Analyses 90 | res11 <- sim.statAnalysisHack(nobs.group = 30, strategy = "firstsig", alternative = "two.sided", alpha = 0.05, iter = 1000) 91 | startplots$statAnalysisPlot <- phackR:::pplots(simdat = res11, alpha = 0.05) 92 | startplots$statAnalysis.fprate.p <- paste0(round(sum(res11[,"ps.hack"] <= 0.05)/1000*100, 2), " %") 93 | startplots$statAnalysis.fprate.o <- paste0(round(sum(res11[,"ps.orig"] <= 0.05)/1000*100, 2), " %") 94 | startplots$res11 <- res11 95 | 96 | # 12: Subgroup Analyses 97 | res12 <- sim.subgroupHack(nobs.group = 30, nsubvars = 5, alternative = "two.sided", strategy = "firstsig", alpha = 0.05, iter = 1000) 98 | startplots$subgroupPlot <- phackR:::pplots(simdat = res12, alpha = 0.05) 99 | startplots$subgroupESr2 <- phackR:::esplots(simdat=res12, EScolumn.hack=3, EScolumn.orig=4) 100 | startplots$subgroupESd <- phackR:::esplots(simdat=res12, EScolumn.hack=5, EScolumn.orig=6, titles = c(expression("Distribution of p-hacked effect sizes "*delta), 101 | expression("Distribution of original effect sizes "*delta))) 102 | startplots$subgroup.fprate.p <- paste0(round(sum(res12[,"ps.hack"] <= 0.05)/1000*100, 2), " %") 103 | startplots$subgroup.fprate.o <- paste0(round(sum(res12[,"ps.orig"] <= 0.05)/1000*100, 2), " %") 104 | startplots$res12 <- res12 105 | 106 | # 13: Variable Transformations 107 | res13 <- sim.varTransHack(nobs = 30, transvar = "x", strategy = "firstsig", alpha = 0.05, iter = 1000) 108 | startplots$varTransPlot <- phackR:::pplots(simdat = res13, alpha = 0.05) 109 | startplots$varTransES <- phackR:::esplots(simdat = res13, EScolumn.hack = 3, EScolumn.orig = 4) 110 | startplots$varTrans.fprate.p <- paste0(round(sum(res13[,"ps.hack"] <= 0.05)/1000*100, 2), " %") 111 | startplots$varTrans.fprate.o <- paste0(round(sum(res13[,"ps.orig"] <= 0.05)/1000*100, 2), " %") 112 | startplots$res13 <- res13 113 | 114 | saveRDS(startplots, file="./inst/shiny-phack/ShinyPHack/data/startplots.rds") 115 | 116 | -------------------------------------------------------------------------------- /phackR/man/dot-compCohensD.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{.compCohensD} 4 | \alias{.compCohensD} 5 | \title{Compute Cohen's d} 6 | \usage{ 7 | .compCohensD(t, n) 8 | } 9 | \arguments{ 10 | \item{t}{t-value} 11 | 12 | \item{n}{sample size per group} 13 | } 14 | \description{ 15 | Compute Cohen's d from t-value with equal sized groups of size n 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-compR2t.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{.compR2t} 4 | \alias{.compR2t} 5 | \title{Compute R squared for the t-test} 6 | \usage{ 7 | .compR2t(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{values of group 1} 11 | 12 | \item{y}{values of group 2} 13 | } 14 | \description{ 15 | Compute R squared for the t-test 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-compscoreHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compositeScores.R 3 | \name{.compscoreHack} 4 | \alias{.compscoreHack} 5 | \title{P-Hacking function for scale redefinition / Composite Scores} 6 | \usage{ 7 | .compscoreHack(df, dv, compv, ndelete, strategy = "firstsig", alpha = 0.05) 8 | } 9 | \arguments{ 10 | \item{df}{Data frame containing dependent variable and composite score items as columns} 11 | 12 | \item{dv}{Location of dependent variable in the data frame} 13 | 14 | \item{compv}{Location of composite score variables in the data frame} 15 | 16 | \item{ndelete}{How many items should be deleted from the scale at maximum?} 17 | 18 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 19 | 20 | \item{alpha}{Significance level of the t-test (default: 0.05)} 21 | } 22 | \description{ 23 | P-Hacking function for scale redefinition / Composite Scores 24 | } 25 | -------------------------------------------------------------------------------- /phackR/man/dot-covhack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exploitCovariates.R 3 | \name{.covhack} 4 | \alias{.covhack} 5 | \title{P-Hacking function for multiple covariates} 6 | \usage{ 7 | .covhack( 8 | df, 9 | dv, 10 | group, 11 | covs, 12 | interactions = FALSE, 13 | strategy = "firstsig", 14 | alpha = 0.05 15 | ) 16 | } 17 | \arguments{ 18 | \item{df}{Data frame with one group variable, one dependent variable, and one or more covariates} 19 | 20 | \item{dv}{Integer defining the location of the dependent variable column} 21 | 22 | \item{group}{Integer defining the location of the group variable column} 23 | 24 | \item{covs}{Numeric vector defining the location of the covariate(s).} 25 | 26 | \item{interactions}{Should interaction terms be added to the ANCOVA models? TRUE/FALSE} 27 | 28 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 29 | 30 | \item{alpha}{Significance level of the t-test} 31 | } 32 | \description{ 33 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 34 | } 35 | -------------------------------------------------------------------------------- /phackR/man/dot-cutoffHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exploitCutoffs.R 3 | \name{.cutoffHack} 4 | \alias{.cutoffHack} 5 | \title{P-Hacking function for exploiting cutoff values} 6 | \usage{ 7 | .cutoffHack(df, iv, dv, strategy = "firstsig", alpha = 0.05) 8 | } 9 | \arguments{ 10 | \item{df}{Data frame with one continuous independent variable and one continuous dependent variable} 11 | 12 | \item{iv}{Location of the independent variable in the data frame} 13 | 14 | \item{dv}{Location of the dependent variable in the data frame} 15 | 16 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 17 | 18 | \item{alpha}{Significance level of the t-test} 19 | } 20 | \description{ 21 | P-Hacking function for exploiting cutoff values 22 | } 23 | -------------------------------------------------------------------------------- /phackR/man/dot-easyimpute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/favorableImputation.R 3 | \name{.easyimpute} 4 | \alias{.easyimpute} 5 | \title{Single value imputation function 6 | Imputes NA values in a single variable using the function specified in fun} 7 | \usage{ 8 | .easyimpute(x, fun, ...) 9 | } 10 | \arguments{ 11 | \item{x}{The target variable of the imputation} 12 | 13 | \item{fun}{The function used to replace missing values that takes x as an argument (e.g., mean)} 14 | 15 | \item{...}{Additional arguments to fun} 16 | } 17 | \description{ 18 | Single value imputation function 19 | Imputes NA values in a single variable using the function specified in fun 20 | } 21 | -------------------------------------------------------------------------------- /phackR/man/dot-estimate_mode.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/favorableImputation.R 3 | \name{.estimate_mode} 4 | \alias{.estimate_mode} 5 | \title{Estimate mode of continuous variables 6 | Estimates mode of continuous variables using the density() function} 7 | \usage{ 8 | .estimate_mode(x) 9 | } 10 | \arguments{ 11 | \item{x}{The target variable for which the mode should be searched} 12 | } 13 | \description{ 14 | Estimate mode of continuous variables 15 | Estimates mode of continuous variables using the density() function 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-extractoutlier.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{.extractoutlier} 4 | \alias{.extractoutlier} 5 | \title{Create data frames without outliers} 6 | \usage{ 7 | .extractoutlier(x, y, outsx, outsy) 8 | } 9 | \arguments{ 10 | \item{x}{Original vector of x values} 11 | 12 | \item{y}{Original vector of y values} 13 | 14 | \item{outsx}{Outlier values to be removed from x} 15 | 16 | \item{outsy}{Outlier values to be removed from y} 17 | } 18 | \description{ 19 | Inputs data frame and two sets of outlier values, outputs list with three data frames 20 | } 21 | -------------------------------------------------------------------------------- /phackR/man/dot-impHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/favorableImputation.R 3 | \name{.impHack} 4 | \alias{.impHack} 5 | \title{P-Hacking function favorable imputation in univariate linear regression} 6 | \usage{ 7 | .impHack(df, x, y, which = c(1:10), strategy = "firstsig", alpha = 0.05) 8 | } 9 | \arguments{ 10 | \item{df}{Data frame containing x and y variables as columns} 11 | 12 | \item{x}{Location of x variable (predictor) in the data frame} 13 | 14 | \item{y}{Location of y variable (criterion) in the data frame} 15 | 16 | \item{which}{Which missing value handling method? 1: delete missing, 2: mean imputation, 3: median imputation, 4: mode imputation, 5: predictive mean matching, 6: weighted predictive mean matching, 7: sample from observed values, 8: Bayesian linear regression, 9: linear regression ignoring model error, 10: linear regression predicted values} 17 | 18 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 19 | 20 | \item{alpha}{Significance level of the t-test (default: 0.05)} 21 | } 22 | \description{ 23 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 24 | } 25 | -------------------------------------------------------------------------------- /phackR/man/dot-multDVhack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/selectiveReportingDV.R 3 | \name{.multDVhack} 4 | \alias{.multDVhack} 5 | \title{P-Hacking function for multiple dependent variables} 6 | \usage{ 7 | .multDVhack( 8 | df, 9 | dvs, 10 | group, 11 | strategy = "firstsig", 12 | alternative = "two.sided", 13 | alpha = 0.05 14 | ) 15 | } 16 | \arguments{ 17 | \item{df}{Data frame with one group variable and multiple dependent variables} 18 | 19 | \item{dvs}{Vector defining the DV columns (will be checked in given order)} 20 | 21 | \item{group}{Scalar defining grouping column} 22 | 23 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 24 | 25 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 26 | 27 | \item{alpha}{Significance level of the t-test} 28 | } 29 | \description{ 30 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 31 | } 32 | -------------------------------------------------------------------------------- /phackR/man/dot-multIVhack_reg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/selectiveReportingIV.R 3 | \name{.multIVhack_reg} 4 | \alias{.multIVhack_reg} 5 | \title{P-Hacking function for multiple predictors in a regression} 6 | \usage{ 7 | .multIVhack_reg( 8 | df, 9 | ivs, 10 | control, 11 | strategy = "firstsig", 12 | alternative = "two.sided", 13 | alpha = 0.05 14 | ) 15 | } 16 | \arguments{ 17 | \item{df}{Data frame containing a criterion variable and multiple predictor variables} 18 | 19 | \item{ivs}{Location of the independent variables (predictors) in the data frame} 20 | 21 | \item{control}{Location of the criterion in the data frame} 22 | 23 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 24 | 25 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 26 | 27 | \item{alpha}{Significance level of the t-test (default: 0.05)} 28 | } 29 | \description{ 30 | Returns a p-hacked p-value and a vector of all p-values that were computed in the process 31 | } 32 | -------------------------------------------------------------------------------- /phackR/man/dot-multIVhack_ttest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/selectiveReportingIV.R 3 | \name{.multIVhack_ttest} 4 | \alias{.multIVhack_ttest} 5 | \title{P-Hacking function for multiple independent variables in a t-test} 6 | \usage{ 7 | .multIVhack_ttest( 8 | df, 9 | ivs, 10 | control, 11 | strategy = "firstsig", 12 | alternative = "two.sided", 13 | alpha = 0.05 14 | ) 15 | } 16 | \arguments{ 17 | \item{df}{Data frame (wide format) containing a control group variable and multiple treatment group variables} 18 | 19 | \item{ivs}{Location of the independent variables (treatment groups) in the (wide) data frame} 20 | 21 | \item{control}{Location of the control group in the (wide) data frame} 22 | 23 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 24 | 25 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 26 | 27 | \item{alpha}{Significance level of the t-test (default: 0.05)} 28 | } 29 | \description{ 30 | Returns a p-hacked p-value and a vector of all p-values that were computed in the process 31 | } 32 | -------------------------------------------------------------------------------- /phackR/man/dot-optstop.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/optionalStopping.R 3 | \name{.optstop} 4 | \alias{.optstop} 5 | \title{Optional Stopping based on existing dataset} 6 | \usage{ 7 | .optstop( 8 | df, 9 | group, 10 | dv, 11 | n.min, 12 | n.max, 13 | step = 1, 14 | peek = NULL, 15 | alternative = "two.sided", 16 | alpha = 0.05 17 | ) 18 | } 19 | \arguments{ 20 | \item{df}{Data frame} 21 | 22 | \item{group}{group Scalar defining grouping column} 23 | 24 | \item{dv}{Scalar defining location of dependent variable in the data frame} 25 | 26 | \item{n.min}{Minimum sample size} 27 | 28 | \item{n.max}{Maximum sample size} 29 | 30 | \item{step}{Step size of the optional stopping (default is 1)} 31 | 32 | \item{peek}{Determines how often one peeks at the data. Overrides step argument if not NULL.} 33 | 34 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 35 | 36 | \item{alpha}{Significance level of the t-test (default: 0.05)} 37 | } 38 | \description{ 39 | Returns a p-hacked p-value and a non-p-hacked p-value based on the maximum sample size 40 | } 41 | -------------------------------------------------------------------------------- /phackR/man/dot-out.boxplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.boxplot} 4 | \alias{.out.boxplot} 5 | \title{Box plot outlier definition} 6 | \usage{ 7 | .out.boxplot(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of values from which outliers should be excluded} 11 | 12 | \item{y}{Vector of values from which outliers should be excluded} 13 | } 14 | \description{ 15 | Box plot outlier definition function 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-out.cook.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.cook} 4 | \alias{.out.cook} 5 | \title{Cook's Distance outlier definition} 6 | \usage{ 7 | .out.cook(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of x values (predictor in linear regression)} 11 | 12 | \item{y}{Vector of y values (criterion in linear regression)} 13 | } 14 | \description{ 15 | Excludes values that have a Cook's distance larger than the median of an F distribution with p and n-p degrees of freedom or larger than 1 (see Wikipedia for Cook's distance for the cutoff) 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-out.covratio.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.covratio} 4 | \alias{.out.covratio} 5 | \title{Covariance ratio outlier definition} 6 | \usage{ 7 | .out.covratio(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of x values (predictor in linear regression)} 11 | 12 | \item{y}{Vector of y values (criterion in linear regression)} 13 | } 14 | \description{ 15 | Excludes values that have a covariance ratio differing from 1 (cutoff: influence.measues function internal) 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-out.dfbeta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.dfbeta} 4 | \alias{.out.dfbeta} 5 | \title{DFBETAS outlier definition} 6 | \usage{ 7 | .out.dfbeta(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of x values (predictor in linear regression)} 11 | 12 | \item{y}{Vector of y values (criterion in linear regression)} 13 | } 14 | \description{ 15 | Excludes the 1-3 values that have the highest influence on the regression slope 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-out.dffits.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.dffits} 4 | \alias{.out.dffits} 5 | \title{DFFITS outlier definition} 6 | \usage{ 7 | .out.dffits(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of x values (predictor in linear regression)} 11 | 12 | \item{y}{Vector of y values (criterion in linear regression)} 13 | } 14 | \description{ 15 | Excludes values that have absolute DFFIT values larger than 2*sqrt(2/n) (see Wikipedia page for DFFITS for the cutoff) 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-out.leverage.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.leverage} 4 | \alias{.out.leverage} 5 | \title{Leverage values outlier definition} 6 | \usage{ 7 | .out.leverage(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of x values (predictor in linear regression)} 11 | 12 | \item{y}{Vector of y values (criterion in linear regression)} 13 | } 14 | \description{ 15 | Excludes values that have high leverage values (3 times larger than the mean leverage value 3*(p/n), see 'https://newonlinecourses.science.psu.edu/stat501/node/338/' for the cutoff) 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-out.mahalanobis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.mahalanobis} 4 | \alias{.out.mahalanobis} 5 | \title{Robust Mahalanobis Distance outlier definition} 6 | \usage{ 7 | .out.mahalanobis(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of x values (predictor in linear regression)} 11 | 12 | \item{y}{Vector of y values (criterion in linear regression)} 13 | } 14 | \description{ 15 | Excludes values that have a high robust Mahalanobis Distance (cutoff: squared MD > qchisq(0.98, 2), see Filzmoser et al. (2005)) 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-out.percentrule.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.percentrule} 4 | \alias{.out.percentrule} 5 | \title{Percentage outlier definition} 6 | \usage{ 7 | .out.percentrule(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of values from which outliers should be excluded} 11 | 12 | \item{y}{Vector of values from which outliers should be excluded} 13 | } 14 | \description{ 15 | Percentage outlier definition function 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-out.residual.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.residual} 4 | \alias{.out.residual} 5 | \title{Residuals outlier definition} 6 | \usage{ 7 | .out.residual(x, y, type) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of x values (predictor in linear regression)} 11 | 12 | \item{y}{Vector of y values (criterion in linear regression)} 13 | 14 | \item{type}{What type of residuals \code{"stan"}, \code{"stud"}} 15 | } 16 | \description{ 17 | Excludes values with high standardized / studentized residuals. If the largest residual > 2, values with residuals larger than 2, 2.5, 3, ... are excluded. If the largest residual < 2, values with 1:3 largest residuals are excluded (largest 3 standardized residuals is equivalent to the q-q plot definition of outliers in the regression diagnostics in the lm package) 18 | } 19 | -------------------------------------------------------------------------------- /phackR/man/dot-out.sdrule.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.sdrule} 4 | \alias{.out.sdrule} 5 | \title{Standard deviation outlier definition} 6 | \usage{ 7 | .out.sdrule(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of values from which outliers should be excluded} 11 | 12 | \item{y}{Vector of values from which outliers should be excluded} 13 | } 14 | \description{ 15 | Standard deviation outlier definition function: Takes a vector x, tries different standard deviation outlier rules (x > 2, 2.5, 3, ...) and returns all differing result vectors without the outliers. 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-out.stemleaf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.out.stemleaf} 4 | \alias{.out.stemleaf} 5 | \title{Stem and Leaf plot outlier definition} 6 | \usage{ 7 | .out.stemleaf(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{Vector of values from which outliers should be excluded} 11 | 12 | \item{y}{Vector of values from which outliers should be excluded} 13 | } 14 | \description{ 15 | Stem and Leaf plot outlier definition function 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-outHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{.outHack} 4 | \alias{.outHack} 5 | \title{P-Hacking function for outlier exclusion in univariate linear regression} 6 | \usage{ 7 | .outHack(df, x, y, which = c(1:12), strategy = "firstsig", alpha = 0.05) 8 | } 9 | \arguments{ 10 | \item{df}{Data frame containing x and y variables as columns} 11 | 12 | \item{x}{Location of x variable (predictor) in the data frame} 13 | 14 | \item{y}{Location of y variable (criterion) in the data frame} 15 | 16 | \item{which}{Which outlier definition methods? A numeric vector containing the chosen methods (1: boxplot, 2: stem&leaf, 3: standard deviation, 4: percentile, 5: studentized residuals, 6: standardized residuals, 7: DFBETA, 8: DFFITS, 9: Cook's D, 10: Mahalanobis distance, 11: Leverage values, 12: Covariance ratio)} 17 | 18 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 19 | 20 | \item{alpha}{Significance level of the t-test (default: 0.05)} 21 | } 22 | \description{ 23 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 24 | } 25 | -------------------------------------------------------------------------------- /phackR/man/dot-roundhack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/incorrectRounding.R 3 | \name{.roundhack} 4 | \alias{.roundhack} 5 | \title{P-Hacking function for incorrect rounding} 6 | \usage{ 7 | .roundhack( 8 | df, 9 | group, 10 | dv, 11 | roundinglevel, 12 | alternative = "two.sided", 13 | alpha = 0.05 14 | ) 15 | } 16 | \arguments{ 17 | \item{df}{Data frame} 18 | 19 | \item{group}{Scalar defining location of the group vector in the data frame} 20 | 21 | \item{dv}{Scalar defining location of dependent variable in the data frame} 22 | 23 | \item{roundinglevel}{Highest p-value that is rounded down to 0.05} 24 | 25 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 26 | 27 | \item{alpha}{Significance level of the t-test (default: 0.05)} 28 | } 29 | \description{ 30 | Outputs a p-hacked p-value and the non-p-hacked-p-value 31 | } 32 | -------------------------------------------------------------------------------- /phackR/man/dot-selectpvalue.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{.selectpvalue} 4 | \alias{.selectpvalue} 5 | \title{Select a p-value from a vector of p-hacked p-values} 6 | \usage{ 7 | .selectpvalue(ps, strategy, alpha) 8 | } 9 | \arguments{ 10 | \item{ps}{Vector of p values} 11 | 12 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 13 | 14 | \item{alpha}{Significance level (default: 0.05)} 15 | } 16 | \description{ 17 | Takes a vector of p-values and selects the smallest, first significant, or smallest significant p-value. 18 | } 19 | -------------------------------------------------------------------------------- /phackR/man/dot-sim.compscore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compositeScores.R 3 | \name{.sim.compscore} 4 | \alias{.sim.compscore} 5 | \title{Simulate data: Correlated composite score raw variables and one non-correlated dependent variable} 6 | \usage{ 7 | .sim.compscore(nobs, ncompv, rcomp) 8 | } 9 | \arguments{ 10 | \item{nobs}{Integer giving number of observations} 11 | 12 | \item{ncompv}{Integer giving number of variables to build the composite score} 13 | 14 | \item{rcomp}{Correlation between the composite score variables} 15 | } 16 | \description{ 17 | Simulate data: Correlated composite score raw variables and one non-correlated dependent variable 18 | } 19 | -------------------------------------------------------------------------------- /phackR/man/dot-sim.covariates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exploitCovariates.R 3 | \name{.sim.covariates} 4 | \alias{.sim.covariates} 5 | \title{Simulate data with (correlated) covariates} 6 | \usage{ 7 | .sim.covariates(nobs.group, ncov, rcov, rcovdv, mu = 0, sd = 1, missing = 0) 8 | } 9 | \arguments{ 10 | \item{nobs.group}{Vector with number of observations per group} 11 | 12 | \item{ncov}{Number of continuous covariates in the simulated data frame} 13 | 14 | \item{rcov}{Correlation between the covariates} 15 | 16 | \item{rcovdv}{Correlation between covariates and dependent variable} 17 | 18 | \item{mu}{Mean of the random data} 19 | 20 | \item{sd}{Standard deviation of the random data} 21 | 22 | \item{missing}{Proportion of missing values per variable (e.g., 0.2 = 20 percent)} 23 | } 24 | \description{ 25 | Simulates a dependent variable that correlates with multiple (correlated) covariates as well as an independent IV 26 | } 27 | -------------------------------------------------------------------------------- /phackR/man/dot-sim.data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{.sim.data} 4 | \alias{.sim.data} 5 | \title{Generic sampling function} 6 | \usage{ 7 | .sim.data(nobs.group) 8 | } 9 | \arguments{ 10 | \item{nobs.group}{Number of observations per group. Either a scalar or a vector with two elements.} 11 | } 12 | \description{ 13 | Outputs a data frame with two columns 14 | } 15 | -------------------------------------------------------------------------------- /phackR/man/dot-sim.multDV.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/selectiveReportingDV.R 3 | \name{.sim.multDV} 4 | \alias{.sim.multDV} 5 | \title{Simulate dataset with multiple dependent variables} 6 | \usage{ 7 | .sim.multDV(nobs.group, nvar, r) 8 | } 9 | \arguments{ 10 | \item{nobs.group}{Vector giving number of observations per group} 11 | 12 | \item{nvar}{Number of dependent variables in the data frame} 13 | 14 | \item{r}{Desired correlation between the dependent variables (scalar)} 15 | } 16 | \description{ 17 | Outputs data frame with a grouping variable and multiple correlated dependent variables 18 | } 19 | -------------------------------------------------------------------------------- /phackR/man/dot-sim.multIV.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/selectiveReportingIV.R 3 | \name{.sim.multIV} 4 | \alias{.sim.multIV} 5 | \title{Simulate dataset with multiple independent variables} 6 | \usage{ 7 | .sim.multIV(nobs.group, nvar, r, regression = FALSE) 8 | } 9 | \arguments{ 10 | \item{nobs.group}{Scalar defining number of observations per group (or number of observations in predictors in regression)} 11 | 12 | \item{nvar}{Number of independent variables in the data frame} 13 | 14 | \item{r}{Desired correlation between the independent variables (scalar)} 15 | 16 | \item{regression}{Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE)} 17 | } 18 | \description{ 19 | Outputs data frame with multiple independent variables 20 | } 21 | -------------------------------------------------------------------------------- /phackR/man/dot-sim.multcor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helpers.R 3 | \name{.sim.multcor} 4 | \alias{.sim.multcor} 5 | \title{Simulate multivariate correlated data for continuous variables} 6 | \usage{ 7 | .sim.multcor(nobs, nvar, r, mu = 0, sd = 1, missing = 0) 8 | } 9 | \arguments{ 10 | \item{nobs}{Number of observations (rows) in the simulated data frame} 11 | 12 | \item{nvar}{Number of variables (columns) in the data frame} 13 | 14 | \item{r}{Desired correlation between the variables (integer)} 15 | 16 | \item{mu}{Mean of the random data} 17 | 18 | \item{sd}{Standard deviation of the random data} 19 | 20 | \item{missing}{Proportion of missing values per variable (e.g., 0.2 = 20 percent)} 21 | } 22 | \description{ 23 | Outputs a data frame with correlated variables of defined length 24 | } 25 | -------------------------------------------------------------------------------- /phackR/man/dot-sim.subgroup.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/subgroupAnalysis.R 3 | \name{.sim.subgroup} 4 | \alias{.sim.subgroup} 5 | \title{Simulate data with subgroups} 6 | \usage{ 7 | .sim.subgroup(nobs.group, nsubvars) 8 | } 9 | \arguments{ 10 | \item{nobs.group}{Vector giving number of observations per group} 11 | 12 | \item{nsubvars}{Integer specifying number of variables for potential subgroups} 13 | } 14 | \description{ 15 | Outputs data frame with multiple binary variables from which subgroups can be extracted 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/dot-statAnalysisHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statAnalysis.R 3 | \name{.statAnalysisHack} 4 | \alias{.statAnalysisHack} 5 | \title{P-Hacking function for exploiting different statistical analysis options} 6 | \usage{ 7 | .statAnalysisHack( 8 | df, 9 | group, 10 | dv, 11 | strategy = "firstsig", 12 | alternative = "two.sided", 13 | alpha = 0.05 14 | ) 15 | } 16 | \arguments{ 17 | \item{df}{Data frame with one continuous independent variable and one continuous dependent variable} 18 | 19 | \item{group}{Location of the grouping variable in the data frame} 20 | 21 | \item{dv}{Location of the dependent variabl in the data frame} 22 | 23 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 24 | 25 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 26 | 27 | \item{alpha}{Significance level of the t-test} 28 | } 29 | \description{ 30 | P-Hacking function for exploiting different statistical analysis options 31 | } 32 | -------------------------------------------------------------------------------- /phackR/man/dot-subgroupHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/subgroupAnalysis.R 3 | \name{.subgroupHack} 4 | \alias{.subgroupHack} 5 | \title{P-Hacking function for multiple subgroups analysis} 6 | \usage{ 7 | .subgroupHack( 8 | df, 9 | iv, 10 | dv, 11 | subvars, 12 | alternative = "two.sided", 13 | strategy = "firstsig", 14 | alpha = 0.05 15 | ) 16 | } 17 | \arguments{ 18 | \item{df}{A matrix or data frame containing all relevant data} 19 | 20 | \item{iv}{Integer specifying the location of the binary independent variable in the data frame} 21 | 22 | \item{dv}{Integer specifying the location of the dependent variable in the data frame} 23 | 24 | \item{subvars}{Vector specifying the location of the subgroup variables in the data frame} 25 | 26 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 27 | 28 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 29 | 30 | \item{alpha}{Significance level of the t-test} 31 | } 32 | \description{ 33 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 34 | } 35 | -------------------------------------------------------------------------------- /phackR/man/dot-varTransHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/variableTransformation.R 3 | \name{.varTransHack} 4 | \alias{.varTransHack} 5 | \title{P-Hacking function variable transformation in univariate linear regression} 6 | \usage{ 7 | .varTransHack( 8 | df, 9 | x, 10 | y, 11 | transvar, 12 | testnorm = FALSE, 13 | strategy = "firstsig", 14 | alpha = 0.05 15 | ) 16 | } 17 | \arguments{ 18 | \item{df}{Data frame containing x and y variables as columns} 19 | 20 | \item{x}{Location of x variable (predictor) in the data frame} 21 | 22 | \item{y}{Location of y variable (criterion) in the data frame} 23 | 24 | \item{transvar}{Which variables should be transformed? Either "x" (for x variable), "y" (for y variable), or "xy" (for both)} 25 | 26 | \item{testnorm}{Should variables only be transformed after a significant test for normality of residuals?} 27 | 28 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 29 | 30 | \item{alpha}{Significance level of the t-test (default: 0.05)} 31 | } 32 | \description{ 33 | Outputs a p-hacked p-value and a vector of all p-values that were computed in the process 34 | } 35 | -------------------------------------------------------------------------------- /phackR/man/esplots.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotsShiny.R 3 | \name{esplots} 4 | \alias{esplots} 5 | \title{Plot effect size distributions} 6 | \usage{ 7 | esplots( 8 | simdat, 9 | EScolumn.hack, 10 | EScolumn.orig, 11 | titles = c(expression("Distribution of p-hacked effect sizes R"^2), 12 | expression("Distribution of original effect sizes R"^2)) 13 | ) 14 | } 15 | \arguments{ 16 | \item{simdat}{Simulated data from one of the p-hacking simulation functions} 17 | 18 | \item{EScolumn.hack}{Column number of hacked effect sizes} 19 | 20 | \item{EScolumn.orig}{Column number of original effect sizes} 21 | 22 | \item{titles}{Title of effect size plots} 23 | } 24 | \description{ 25 | Plot effect size distributions 26 | } 27 | -------------------------------------------------------------------------------- /phackR/man/pplots.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotsShiny.R 3 | \name{pplots} 4 | \alias{pplots} 5 | \title{Plot p-value distributions} 6 | \usage{ 7 | pplots(simdat, alpha) 8 | } 9 | \arguments{ 10 | \item{simdat}{Simulated data from one of the p-hacking simulation functions} 11 | 12 | \item{alpha}{Alpha level} 13 | } 14 | \description{ 15 | Plot p-value distributions 16 | } 17 | -------------------------------------------------------------------------------- /phackR/man/runShinyPHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runShinyPHack.R 3 | \name{runShinyPHack} 4 | \alias{runShinyPHack} 5 | \title{Run Shiny app for p-hacking simulaton} 6 | \usage{ 7 | runShinyPHack() 8 | } 9 | \description{ 10 | Run Shiny app for p-hacking simulaton 11 | } 12 | -------------------------------------------------------------------------------- /phackR/man/sim.compscoreHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compositeScores.R 3 | \name{sim.compscoreHack} 4 | \alias{sim.compscoreHack} 5 | \title{Simulate p-hacking with composite scores 6 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations} 7 | \usage{ 8 | sim.compscoreHack( 9 | nobs, 10 | ncompv, 11 | rcomp, 12 | ndelete, 13 | strategy = "firstsig", 14 | alpha = 0.05, 15 | iter = 1000, 16 | shinyEnv = FALSE 17 | ) 18 | } 19 | \arguments{ 20 | \item{nobs}{Integer giving number of observations} 21 | 22 | \item{ncompv}{Integer giving number of variables to build the composite score} 23 | 24 | \item{rcomp}{Correlation between the composite score variables} 25 | 26 | \item{ndelete}{How many items should be deleted from the scale at maximum?} 27 | 28 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 29 | 30 | \item{alpha}{Significance level of the t-test (default: 0.05)} 31 | 32 | \item{iter}{Number of simulation iterations} 33 | 34 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 35 | } 36 | \description{ 37 | Simulate p-hacking with composite scores 38 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 39 | } 40 | -------------------------------------------------------------------------------- /phackR/man/sim.covhack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exploitCovariates.R 3 | \name{sim.covhack} 4 | \alias{sim.covhack} 5 | \title{Simulate p-Hacking with multiple covariates 6 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations} 7 | \usage{ 8 | sim.covhack( 9 | nobs.group, 10 | ncov, 11 | rcov, 12 | rcovdv, 13 | interactions = FALSE, 14 | strategy = "firstsig", 15 | alpha = 0.05, 16 | iter = 1000, 17 | shinyEnv = FALSE 18 | ) 19 | } 20 | \arguments{ 21 | \item{nobs.group}{Vector with number of observations per group} 22 | 23 | \item{ncov}{Number of continuous covariates in the simulated data frame} 24 | 25 | \item{rcov}{Correlation between the covariates} 26 | 27 | \item{rcovdv}{Correlation between covariates and dependent variable} 28 | 29 | \item{interactions}{Should interaction terms be added to the ANCOVA models? TRUE/FALSE} 30 | 31 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 32 | 33 | \item{alpha}{Significance level of the t-test} 34 | 35 | \item{iter}{Number of simulation iterations} 36 | 37 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 38 | } 39 | \description{ 40 | Simulate p-Hacking with multiple covariates 41 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 42 | } 43 | -------------------------------------------------------------------------------- /phackR/man/sim.cutoffHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exploitCutoffs.R 3 | \name{sim.cutoffHack} 4 | \alias{sim.cutoffHack} 5 | \title{Simulate p-Hacking for exploiting cutoff values 6 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations} 7 | \usage{ 8 | sim.cutoffHack( 9 | nobs, 10 | strategy = "firstsig", 11 | alpha = 0.05, 12 | iter = 1000, 13 | shinyEnv = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{nobs}{Number of observations} 18 | 19 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 20 | 21 | \item{alpha}{Significance level of the t-test} 22 | 23 | \item{iter}{Number of simulation iterations} 24 | 25 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 26 | } 27 | \description{ 28 | Simulate p-Hacking for exploiting cutoff values 29 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 30 | } 31 | -------------------------------------------------------------------------------- /phackR/man/sim.impHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/favorableImputation.R 3 | \name{sim.impHack} 4 | \alias{sim.impHack} 5 | \title{Simulate p-Hacking with different sorts of outlier definition missing value imputation} 6 | \usage{ 7 | sim.impHack( 8 | nobs, 9 | missing, 10 | which = c(1:10), 11 | strategy = "firstsig", 12 | alpha = 0.05, 13 | iter = 1000, 14 | shinyEnv = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{nobs}{Integer giving number of observations} 19 | 20 | \item{missing}{Percentage of missing values (e.g., 0.1 for 10 percent)} 21 | 22 | \item{which}{Which imputation methods? Either 5 random methods are chosen ("random") or a numeric vector containing the chosen methods (1: delete missing, 2: mean imputation, 3: median imputation, 4: mode imputation, 5: predictive mean matching, 6: weighted predictive mean matching, 7: sample from observed values, 8: Bayesian linear regression, 9: linear regression ignoring model error, 10: linear regression predicted values)} 23 | 24 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 25 | 26 | \item{alpha}{Significance level of the t-test (default: 0.05)} 27 | 28 | \item{iter}{Number of simulation iterations} 29 | 30 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 31 | } 32 | \description{ 33 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 34 | } 35 | -------------------------------------------------------------------------------- /phackR/man/sim.multDVhack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/selectiveReportingDV.R 3 | \name{sim.multDVhack} 4 | \alias{sim.multDVhack} 5 | \title{Simulate p-Hacking with multiple dependent variables} 6 | \usage{ 7 | sim.multDVhack( 8 | nobs.group, 9 | nvar, 10 | r, 11 | strategy = "firstsig", 12 | iter = 1000, 13 | alternative = "two.sided", 14 | alpha = 0.05, 15 | shinyEnv = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{nobs.group}{Vector giving number of observations per group} 20 | 21 | \item{nvar}{Number of dependent variables (columns) in the data frame} 22 | 23 | \item{r}{Desired correlation between the dependent variables (scalar)} 24 | 25 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 26 | 27 | \item{iter}{Number of simulation iterations} 28 | 29 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 30 | 31 | \item{alpha}{Significance level of the t-test (default: 0.05)} 32 | 33 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 34 | } 35 | \description{ 36 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 37 | } 38 | -------------------------------------------------------------------------------- /phackR/man/sim.multIVhack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/selectiveReportingIV.R 3 | \name{sim.multIVhack} 4 | \alias{sim.multIVhack} 5 | \title{Simulate p-Hacking with multiple independent variables} 6 | \usage{ 7 | sim.multIVhack( 8 | nobs.group, 9 | nvar, 10 | r, 11 | regression = FALSE, 12 | strategy = "firstsig", 13 | iter = 1000, 14 | alternative = "two.sided", 15 | alpha = 0.05, 16 | shinyEnv = FALSE 17 | ) 18 | } 19 | \arguments{ 20 | \item{nobs.group}{Vector giving number of observations per group} 21 | 22 | \item{nvar}{Number of independent variables (columns) in the data frame} 23 | 24 | \item{r}{Desired correlation between the dependent variables (scalar)} 25 | 26 | \item{regression}{Should the simulation be conducted for a regression analysis (TRUE) or a t-test? (FALSE)} 27 | 28 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 29 | 30 | \item{iter}{Number of simulation iterations} 31 | 32 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 33 | 34 | \item{alpha}{Significance level of the t-test (default: 0.05)} 35 | 36 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 37 | } 38 | \description{ 39 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 40 | } 41 | -------------------------------------------------------------------------------- /phackR/man/sim.optstop.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/optionalStopping.R 3 | \name{sim.optstop} 4 | \alias{sim.optstop} 5 | \title{Simulate p-hacking with incorrect rounding} 6 | \usage{ 7 | sim.optstop( 8 | n.min, 9 | n.max, 10 | step = 1, 11 | peek = NULL, 12 | alternative = "two.sided", 13 | iter = 1000, 14 | alpha = 0.05, 15 | shinyEnv = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{n.min}{Minimum sample size} 20 | 21 | \item{n.max}{Maximum sample size} 22 | 23 | \item{step}{Step size of the optional stopping (default is 1)} 24 | 25 | \item{peek}{Determines how often one peeks at the data. Overrides step argument if not NULL.} 26 | 27 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 28 | 29 | \item{iter}{Number of iterations} 30 | 31 | \item{alpha}{Significance level of the t-test (default: 0.05)} 32 | 33 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 34 | } 35 | \description{ 36 | Simulate p-hacking with incorrect rounding 37 | } 38 | -------------------------------------------------------------------------------- /phackR/man/sim.outHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outlierExclusion.R 3 | \name{sim.outHack} 4 | \alias{sim.outHack} 5 | \title{Simulate p-Hacking with different sorts of outlier definition} 6 | \usage{ 7 | sim.outHack( 8 | nobs, 9 | which = c(1:12), 10 | strategy = "firstsig", 11 | alpha = 0.05, 12 | iter = 1000, 13 | shinyEnv = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{nobs}{Integer giving number of observations} 18 | 19 | \item{which}{Which outlier detection methods? Either 5 random methods are chosen ("random") or a numeric vector containing the chosen methods (1: boxplot, 2: stem&leaf, 3: standard deviation, 4: percentile, 5: studentized residuals, 6: standardized residuals, 7: DFBETA, 8: DFFITS, 9: Cook's D, 10: Mahalanobis distance, 11: Leverage values, 12: Covariance ratio)} 20 | 21 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 22 | 23 | \item{alpha}{Significance level of the t-test (default: 0.05)} 24 | 25 | \item{iter}{Number of simulation iterations} 26 | 27 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 28 | } 29 | \description{ 30 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 31 | } 32 | -------------------------------------------------------------------------------- /phackR/man/sim.roundhack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/incorrectRounding.R 3 | \name{sim.roundhack} 4 | \alias{sim.roundhack} 5 | \title{Simulate p-hacking with incorrect rounding} 6 | \usage{ 7 | sim.roundhack( 8 | roundinglevel, 9 | iter = 1000, 10 | alternative = "two.sided", 11 | alpha = 0.05, 12 | shinyEnv = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{roundinglevel}{Highest p-value that is rounded down to alpha} 17 | 18 | \item{iter}{Number of iterations} 19 | 20 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 21 | 22 | \item{alpha}{Significance level of the t-test (default: 0.05)} 23 | 24 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 25 | } 26 | \description{ 27 | Simulate p-hacking with incorrect rounding 28 | } 29 | -------------------------------------------------------------------------------- /phackR/man/sim.statAnalysisHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statAnalysis.R 3 | \name{sim.statAnalysisHack} 4 | \alias{sim.statAnalysisHack} 5 | \title{Simulate p-Hacking for exploiting different statistical analysis options} 6 | \usage{ 7 | sim.statAnalysisHack( 8 | nobs.group, 9 | strategy = "firstsig", 10 | alternative = "two.sided", 11 | alpha = 0.05, 12 | iter = 1000, 13 | shinyEnv = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{nobs.group}{Number of observations per group. Either a scalar or a vector with 2 elements.} 18 | 19 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 20 | 21 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 22 | 23 | \item{alpha}{Significance level of the t-test} 24 | 25 | \item{iter}{Number of simulation iterations} 26 | 27 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 28 | } 29 | \description{ 30 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 31 | } 32 | -------------------------------------------------------------------------------- /phackR/man/sim.subgroupHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/subgroupAnalysis.R 3 | \name{sim.subgroupHack} 4 | \alias{sim.subgroupHack} 5 | \title{Simulate p-hacking with multiple subgroups 6 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations} 7 | \usage{ 8 | sim.subgroupHack( 9 | nobs.group, 10 | nsubvars, 11 | alternative = "two.sided", 12 | strategy = "firstsig", 13 | alpha = 0.05, 14 | iter = 1000, 15 | shinyEnv = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{nobs.group}{Vector giving number of observations per group} 20 | 21 | \item{nsubvars}{Integer specifying number of variables for potential subgroups} 22 | 23 | \item{alternative}{Direction of the t-test ("two.sided", "less", "greater")} 24 | 25 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 26 | 27 | \item{alpha}{Significance level of the t-test} 28 | 29 | \item{iter}{Number of simulation iterations} 30 | 31 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 32 | } 33 | \description{ 34 | Simulate p-hacking with multiple subgroups 35 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 36 | } 37 | -------------------------------------------------------------------------------- /phackR/man/sim.varTransHack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/variableTransformation.R 3 | \name{sim.varTransHack} 4 | \alias{sim.varTransHack} 5 | \title{Simulate p-hacking with variable transformations 6 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations} 7 | \usage{ 8 | sim.varTransHack( 9 | nobs, 10 | transvar, 11 | testnorm = FALSE, 12 | strategy = "firstsig", 13 | alpha = 0.05, 14 | iter = 1000, 15 | shinyEnv = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{nobs}{Integer giving number of observations} 20 | 21 | \item{transvar}{Which variables should be transformed? Either "x" (for x variable), "y" (for y variable), or "xy" (for both)} 22 | 23 | \item{testnorm}{Should variables only be transformed after a significant test for normality of residuals?} 24 | 25 | \item{strategy}{String value: One out of "firstsig", "smallest", "smallest.sig"} 26 | 27 | \item{alpha}{Significance level of the t-test (default: 0.05)} 28 | 29 | \item{iter}{Number of simulation iterations} 30 | 31 | \item{shinyEnv}{Is the function run in a Shiny session? TRUE/FALSE} 32 | } 33 | \description{ 34 | Simulate p-hacking with variable transformations 35 | Outputs a matrix containing the p-hacked p-values (\code{ps.hack}) and the original p-values (\code{ps.orig}) from all iterations 36 | } 37 | -------------------------------------------------------------------------------- /phackR/phackR.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /phackR/tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(phackR) 3 | 4 | test_check("phackR") 5 | -------------------------------------------------------------------------------- /phackR/vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /phacking_compendium.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /simulations/00_simulation_helpers.R: -------------------------------------------------------------------------------- 1 | findFPrate <- function(simresult, alpha = 0.05, hack = TRUE){ 2 | 3 | if(hack){ 4 | FP.firstsig <- sapply(simresult$firstsig, 5 | function(x) {sum(x$ps.hack < alpha) / nrow(x)}) 6 | FP.smallest <- sapply(simresult$smallest, 7 | function(x) {sum(x$ps.hack < alpha) / nrow(x)}) 8 | FP.smallestsig <- sapply(simresult$smallestsig, 9 | function(x) {sum(x$ps.hack < alpha) / nrow(x)}) 10 | } else { 11 | FP.firstsig <- sapply(simresult$firstsig, 12 | function(x) {sum(x$ps.orig < alpha) / nrow(x)}) 13 | FP.smallest <- sapply(simresult$smallest, 14 | function(x) {sum(x$ps.orig < alpha) / nrow(x)}) 15 | FP.smallestsig <- sapply(simresult$smallestsig, 16 | function(x) {sum(x$ps.orig < alpha) / nrow(x)}) 17 | } 18 | 19 | FP.rates <- rowMeans(cbind(FP.firstsig, FP.smallest, FP.smallestsig)) 20 | 21 | return(FP.rates) 22 | } 23 | 24 | -------------------------------------------------------------------------------- /simulations/combinedStrategies_simulation.R: -------------------------------------------------------------------------------- 1 | # Combined p-Hacking Strategies: Simulations 2 | 3 | SIM_combinedHack_t <- sim.combined.t(nobs.group = 100, 4 | nDV = 5, 5 | rDV = 0.6, 6 | nCOV = 3, 7 | rCOV = 0.3, 8 | rcovdv = 0.3, 9 | nSUB = 3, 10 | roundinglevel = 0.051, 11 | alternative = "two.sided", 12 | strategy = "firstsig", 13 | alpha = 0.05, 14 | iter = 10000) 15 | 16 | save(SIM_combinedHack_t, file = "simulations/SIM_combinedHack_t.RData") 17 | 18 | SIM_combinedHack_reg <- sim.combined.reg(nobs = 100, 19 | missing = 0.1, 20 | ncompv = 5, 21 | rcomp = 0.75, 22 | roundinglevel = 0.051, 23 | nImpMethods = 5, 24 | transvar = "xy", 25 | ndelete = 3, 26 | nOutMethods = 3, 27 | strategy = "firstsig", 28 | alpha = 0.05, 29 | iter = 10000) 30 | 31 | save(SIM_combinedHack_reg, file = "simulations/SIM_combinedHack_reg.RData") 32 | 33 | 34 | # Combined p-Hacking Strategies: Simulations with alpha = 0.005 35 | 36 | SIM_combinedHack_t_005 <- sim.combined.t(nobs.group = 100, 37 | nDV = 5, 38 | rDV = 0.6, 39 | nCOV = 3, 40 | rCOV = 0.3, 41 | rcovdv = 0.3, 42 | nSUB = 3, 43 | roundinglevel = 0.0051, 44 | alternative = "two.sided", 45 | strategy = "firstsig", 46 | alpha = 0.005, 47 | iter = 10000) 48 | 49 | save(SIM_combinedHack_t_005, file = "simulations/SIM_combinedHack_t_005.RData") 50 | 51 | SIM_combinedHack_reg_005 <- sim.combined.reg(nobs = 100, 52 | missing = 0.1, 53 | ncompv = 5, 54 | rcomp = 0.75, 55 | roundinglevel = 0.0051, 56 | nImpMethods = 5, 57 | transvar = "xy", 58 | ndelete = 3, 59 | nOutMethods = 3, 60 | strategy = "firstsig", 61 | alpha = 0.005, 62 | iter = 10000) 63 | 64 | save(SIM_combinedHack_reg_005, file = "simulations/SIM_combinedHack_reg_005.RData") 65 | -------------------------------------------------------------------------------- /simulations/compscoreHack_simulation.R: -------------------------------------------------------------------------------- 1 | # Scale Redefinition / Composite Scores: Simulation 2 | 3 | #### Conditions #### 4 | nobs <- c(30, 50, 100, 300) 5 | ncompv <- c(5, 10) 6 | rcomp <- c(0.3, 0.7) 7 | ndelete <- c(1, 3, 7) 8 | strategy <- c("firstsig", "smallest", "smallest.sig") 9 | 10 | cond.compscoreHack <- expand.grid(nobs, ncompv, rcomp, ndelete) 11 | cond.compscoreHack <- cond.compscoreHack[cond.compscoreHack$Var4 < cond.compscoreHack$Var2, ] 12 | 13 | #### Simulation #### 14 | 15 | simresults.compscoreHack <- list() 16 | 17 | simmultiple.compscoreHack <- function(par, strategy){ 18 | data.frame(sim.compscoreHack(nobs = par[1], 19 | ncompv = par[2], 20 | rcomp = par[3], 21 | ndelete = par[4], 22 | strategy = strategy, 23 | iter = 10000, 24 | alpha = 0.05)) 25 | } 26 | 27 | simresults.compscoreHack$firstsig <- apply(cond.compscoreHack, 1, function(x) { 28 | simmultiple.compscoreHack(x, strategy = "firstsig") 29 | }) 30 | 31 | simresults.compscoreHack$smallest <- apply(cond.compscoreHack, 1, function(x) { 32 | simmultiple.compscoreHack(x, strategy = "smallest") 33 | }) 34 | 35 | simresults.compscoreHack$smallestsig <- apply(cond.compscoreHack, 1, function(x) { 36 | simmultiple.compscoreHack(x, strategy = "smallest.sig") 37 | }) 38 | 39 | save(simresults.compscoreHack, file = "simulations/SIM_compscoreHack.RData") 40 | -------------------------------------------------------------------------------- /simulations/covhack_simulation.R: -------------------------------------------------------------------------------- 1 | # Exploiting Covariates: Simulation 2 | 3 | #### Conditions #### 4 | nobs.group <- c(30, 50, 100, 300) 5 | ncov <- c(3, 5, 10) 6 | rcov <- c(0, 0.3, 0.8) 7 | rcovdv <- c(0, 0.3) 8 | strategy <- c("firstsig", "smallest", "smallest.sig") 9 | 10 | cond.covhack <- expand.grid(nobs.group, ncov, rcov, rcovdv) 11 | 12 | #### Simulation #### 13 | 14 | simresults.covhack <- list() 15 | 16 | simmultiple.covhack <- function(par, strategy){ 17 | data.frame(sim.covhack(nobs.group = par[1], 18 | ncov = par[2], 19 | rcov = par[3], 20 | rcovdv = par[4], 21 | strategy = strategy, 22 | interactions = FALSE, 23 | iter = 10000, 24 | alpha = 0.05)) 25 | } 26 | 27 | simresults.covhack$firstsig <- apply(cond.covhack, 1, function(x) { 28 | simmultiple.covhack(x, strategy = "firstsig") 29 | }) 30 | 31 | simresults.covhack$smallest <- apply(cond.covhack, 1, function(x) { 32 | simmultiple.covhack(x, strategy = "smallest") 33 | }) 34 | 35 | simresults.covhack$smallestsig <- apply(cond.covhack, 1, function(x) { 36 | simmultiple.covhack(x, strategy = "smallest.sig") 37 | }) 38 | 39 | save(simresults.covhack, file = "simulations/SIM_covhack.RData") 40 | 41 | 42 | -------------------------------------------------------------------------------- /simulations/cutoffHack_simulation.R: -------------------------------------------------------------------------------- 1 | # Exploiting different Cut-Off Values: Simulation 2 | 3 | #### Conditions #### 4 | 5 | nobs <- c(30, 50, 100, 300) 6 | strategy <- c("firstsig", "smallest", "smallest.sig") 7 | 8 | cond.cutoffHack <- expand.grid(nobs) 9 | 10 | #### Simulation #### 11 | 12 | simresults.cutoffHack <- list() 13 | 14 | simmultiple.cutoffHack <- function(par, strategy){ 15 | data.frame(sim.cutoffHack(nobs = par[1], 16 | strategy = strategy, 17 | iter = 10000, 18 | alpha = 0.05)) 19 | 20 | } 21 | 22 | simresults.cutoffHack$firstsig <- apply(cond.cutoffHack, 1, function(x) { 23 | simmultiple.cutoffHack(x, strategy = "firstsig") 24 | }) 25 | 26 | simresults.cutoffHack$smallest <- apply(cond.cutoffHack, 1, function(x) { 27 | simmultiple.cutoffHack(x, strategy = "smallest") 28 | }) 29 | 30 | simresults.cutoffHack$smallestsig <- apply(cond.cutoffHack, 1, function(x) { 31 | simmultiple.cutoffHack(x, strategy = "smallest.sig") 32 | }) 33 | 34 | save(simresults.cutoffHack, file = "simulations/SIM_cutoffHack.RData") 35 | 36 | -------------------------------------------------------------------------------- /simulations/exploreNormality.R: -------------------------------------------------------------------------------- 1 | # ============================================================================== 2 | # SIMULATION TO INVESTIGATE NORMALITY OF RESIDUALS UNDER TRANSFORMATIONS 3 | # ============================================================================== 4 | 5 | source("./simulations/00_simulation_helpers.R") 6 | 7 | # Compute p-values of lm() and p-values and test statistics from normality tests 8 | # under different transformations 9 | 10 | VarTransExploration <- function(nobs, iter){ 11 | 12 | final <- array(dim=c(iter, 5, 4, 4)) 13 | 14 | # Simulate as many datasets as desired iterations 15 | dat <- list() 16 | for(i in 1:iter){ 17 | dat[[i]] <- .sim.multcor(nobs = nobs, nvar = 2, r = 0) 18 | } 19 | 20 | # Apply transformation and test to each dataset 21 | for(i in 1:iter){ 22 | 23 | df <- dat[[i]] 24 | x <- df[,1] 25 | y <- df[,2] 26 | 27 | Xtrans <- matrix(NA, nrow = nrow(df)) 28 | Xtrans[,1] <- x 29 | Ytrans <- matrix(NA, nrow = nrow(df)) 30 | Ytrans[,1] <- y 31 | 32 | Xtrans <- cbind(Xtrans, 33 | log(x+abs(min(x))+1e-10), # log transformation 34 | sqrt(x+abs(min(x))+1e-10), # square root transformation 35 | 1/x # inverse 36 | ) 37 | 38 | Ytrans <- cbind(Ytrans, 39 | log(y+abs(min(y))+1e-10), # log transformation 40 | sqrt(y+abs(min(y))+1e-10), # square root transformation 41 | 1/y # inverse 42 | ) 43 | 44 | ps.lm <- matrix(NA, nrow = 4, ncol = 4) 45 | ps.ks <- matrix(NA, nrow = 4, ncol = 4) 46 | ps.sw <- matrix(NA, nrow = 4, ncol = 4) 47 | stat.ks <- matrix(NA, nrow = 4, ncol = 4) 48 | stat.sw <- matrix(NA, nrow = 4, ncol = 4) 49 | 50 | for(j in 1:ncol(Xtrans)){ 51 | for(k in 1:ncol(Ytrans)){ 52 | mod <- summary(stats::lm(Ytrans[,k] ~ Xtrans[,j])) 53 | ps.lm[j,k] <- mod$coefficients[2, 4] 54 | ks <- ks.test(mod$residuals, "pnorm") 55 | ps.ks[j,k] <- ks$p.value 56 | stat.ks[j,k] <- ks$statistic 57 | sw <- shapiro.test(mod$residuals) 58 | ps.sw[j,k] <- sw$p.value 59 | stat.sw[j,k] <- sw$statistic 60 | } 61 | } 62 | 63 | res <- array(dim=c(5,4,4)) 64 | res[1,,] <- ps.lm 65 | res[2,,] <- ps.ks 66 | res[3,,] <- stat.ks 67 | res[4,,] <- ps.sw 68 | res[5,,] <- stat.sw 69 | 70 | final[i, , , ] <- res 71 | } 72 | 73 | return(final) 74 | } 75 | 76 | ############################ FOR N = 30 ######################################## 77 | 78 | explore30 <- VarTransExploration(nobs=30, iter=1000) 79 | 80 | mainsX <- matrix(c("X", "log(X)", "sqrt(X)", "1/X", rep("", 12)), nrow = 4, byrow=FALSE) 81 | mainsY <- matrix(c("Y", "log(Y)", "sqrt(Y)", "1/Y", rep("", 12)), nrow = 4, byrow=TRUE) 82 | 83 | # plot p values from lm 84 | par(mfrow=c(4,4), oma=c(0,0,2,0)) 85 | for(i in 1:4){ 86 | for(j in 1:4){ 87 | hist(explore30[, 1, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5) 88 | } 89 | } 90 | mtext("p-Value regression", side=3, line=0, outer=TRUE, cex=2) 91 | 92 | # plot p values from ks test 93 | for(i in 1:4){ 94 | for(j in 1:4){ 95 | hist(explore30[, 2, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5) 96 | } 97 | } 98 | mtext("p-Value Kolmogorov-Smirnov test (residuals)", side=3, line=0, outer=TRUE, cex=2) 99 | 100 | # plot ks test statistic 101 | for(i in 1:4){ 102 | for(j in 1:4){ 103 | hist(explore30[, 3, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5) 104 | } 105 | } 106 | mtext("Test statistic Kolmogorov-Smirnov test (residuals)", side=3, line=0, outer=TRUE, cex=2) 107 | 108 | # plot p values from shapiro wilk 109 | for(i in 1:4){ 110 | for(j in 1:4){ 111 | hist(explore30[, 4, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5) 112 | } 113 | } 114 | mtext("p-Value Shapiro-Wilk test (residuals)", side=3, line=0, outer=TRUE, cex=2) 115 | 116 | 117 | # plot shapiro wilk test statistic 118 | for(i in 1:4){ 119 | for(j in 1:4){ 120 | hist(explore30[, 5, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5) 121 | } 122 | } 123 | mtext("Test statistic Shapiro-Wilk test (residuals)", side=3, line=0, outer=TRUE, cex=2) 124 | 125 | ############################ FOR N = 300 ######################################## 126 | 127 | explore300 <- VarTransExploration(nobs=300, iter=1000) 128 | 129 | mainsX <- matrix(c("X", "log(X)", "sqrt(X)", "1/X", rep("", 12)), nrow = 4, byrow=FALSE) 130 | mainsY <- matrix(c("Y", "log(Y)", "sqrt(Y)", "1/Y", rep("", 12)), nrow = 4, byrow=TRUE) 131 | 132 | # plot p values from lm 133 | par(mfrow=c(4,4), oma=c(0,0,2,0)) 134 | for(i in 1:4){ 135 | for(j in 1:4){ 136 | hist(explore300[, 1, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5) 137 | } 138 | } 139 | mtext("p-Value regression", side=3, line=0, outer=TRUE, cex=2) 140 | 141 | # plot p values from ks test 142 | for(i in 1:4){ 143 | for(j in 1:4){ 144 | hist(explore300[, 2, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5) 145 | } 146 | } 147 | mtext("p-Value Kolmogorov-Smirnov test (residuals)", side=3, line=0, outer=TRUE, cex=2) 148 | 149 | # plot ks test statistic 150 | for(i in 1:4){ 151 | for(j in 1:4){ 152 | hist(explore300[, 3, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5) 153 | } 154 | } 155 | mtext("Test statistic Kolmogorov-Smirnov test (residuals)", side=3, line=0, outer=TRUE, cex=2) 156 | 157 | # plot p values from shapiro wilk 158 | for(i in 1:4){ 159 | for(j in 1:4){ 160 | hist(explore300[, 4, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5) 161 | } 162 | } 163 | mtext("p-Value Shapiro-Wilk test (residuals)", side=3, line=0, outer=TRUE, cex=2) 164 | 165 | 166 | # plot shapiro wilk test statistic 167 | for(i in 1:4){ 168 | for(j in 1:4){ 169 | hist(explore300[, 5, i, j], main=mainsY[i,j], ylab=mainsX[i,j], xlab="", cex.lab=1.5) 170 | } 171 | } 172 | mtext("Test statistic Shapiro-Wilk test (residuals)", side=3, line=0, outer=TRUE, cex=2) 173 | 174 | -------------------------------------------------------------------------------- /simulations/impHack_simulation.R: -------------------------------------------------------------------------------- 1 | # Favorable Imputation: Simulation 2 | 3 | #### Conditions #### 4 | nobs <- c(30, 50, 100, 300) 5 | missing <- c(0.05, 0.2) 6 | howmany = c(3, 5, 10) 7 | 8 | cond.impHack <- expand.grid(nobs, missing, howmany) 9 | 10 | #### Simulation #### 11 | simresults.impHack <- list() 12 | 13 | simmultiple.impHack <- function(par, strategy){ 14 | data.frame(sim.impHack(nobs = par[1], 15 | missing = par[2], 16 | which = sample(1:10, size = par[3]), 17 | strategy = strategy, 18 | iter = 10000, 19 | alpha = 0.05)) 20 | 21 | 22 | } 23 | 24 | simresults.impHack$firstsig <- apply(cond.impHack, 1, function(x) { 25 | simmultiple.impHack(x, strategy = "firstsig") 26 | }) 27 | 28 | simresults.impHack$smallest <- apply(cond.impHack, 1, function(x) { 29 | simmultiple.impHack(x, strategy = "smallest") 30 | }) 31 | 32 | simresults.impHack$smallestsig <- apply(cond.impHack, 1, function(x) { 33 | simmultiple.impHack(x, strategy = "smallest.sig") 34 | }) 35 | 36 | save(simresults.impHack, file = "simulations/SIM_impHack.RData") 37 | -------------------------------------------------------------------------------- /simulations/multDVhack_simulation.R: -------------------------------------------------------------------------------- 1 | # Selective Reporting of the Dependent Variable: Simulation 2 | 3 | #### Conditions #### 4 | 5 | nobs.group <- c(30, 50, 100, 300) # number of observations per group 6 | nvar <- c(3, 5, 10) # number of dependent variables 7 | r <- c(0, 0.3, 0.8) # correlation between dependent variables 8 | 9 | cond.multDVhack <- expand.grid(nobs.group, nvar, r) 10 | 11 | #### Simulation #### 12 | 13 | simresults.multDVhack <- list() 14 | 15 | simmultiple.multDVhack <- function(par, strategy){ 16 | data.frame(sim.multDVhack(nobs.group = par[1], 17 | nvar = par[2], 18 | r = par[3], 19 | strategy = strategy, 20 | iter = 10000, 21 | alternative = "two.sided", 22 | alpha = 0.05)) 23 | } 24 | 25 | simresults.multDVhack$firstsig <- apply(cond.multDVhack, 1, function(x) { 26 | simmultiple.multDVhack(x, strategy = "firstsig") 27 | }) 28 | 29 | simresults.multDVhack$smallest <- apply(cond.multDVhack, 1, function(x) { 30 | simmultiple.multDVhack(x, strategy = "smallest") 31 | }) 32 | 33 | simresults.multDVhack$smallestsig <- apply(cond.multDVhack, 1, function(x) { 34 | simmultiple.multDVhack(x, strategy = "smallest.sig") 35 | }) 36 | 37 | save(simresults.multDVhack, file = "simulations/SIM_multDVhack.RData") 38 | 39 | -------------------------------------------------------------------------------- /simulations/multIVHack_simulation.R: -------------------------------------------------------------------------------- 1 | # Selective Reporting of the Independent Variable: Simulation 2 | 3 | #### Conditions #### 4 | 5 | nobs.group <- c(30, 50, 100, 300) 6 | nvar <- c(3, 5, 10) 7 | r <- c(0, 0.3, 0.8) 8 | 9 | cond.multIVhack <- expand.grid(nobs.group, nvar, r) 10 | 11 | #### Simulation t-Test #### 12 | simresults.multIVhack_ttest <- list() 13 | 14 | simmultiple.multIVhack_ttest <- function(par, strategy){ 15 | data.frame(sim.multIVhack(nobs.group = par[1], 16 | nvar = par[2], 17 | r = par[3], 18 | regression = FALSE, 19 | strategy = strategy, 20 | iter = 10000, 21 | alternative = "two.sided", 22 | alpha = 0.05)) 23 | } 24 | 25 | simresults.multIVhack_ttest$firstsig <- apply(cond.multIVhack, 1, function(x) { 26 | simmultiple.multIVhack_ttest(x, strategy = "firstsig") 27 | }) 28 | 29 | simresults.multIVhack_ttest$smallest <- apply(cond.multIVhack, 1, function(x) { 30 | simmultiple.multIVhack_ttest(x, strategy = "smallest") 31 | }) 32 | 33 | simresults.multIVhack_ttest$smallestsig <- apply(cond.multIVhack, 1, function(x) { 34 | simmultiple.multIVhack_ttest(x, strategy = "smallest.sig") 35 | }) 36 | 37 | save(simresults.multIVhack_ttest, file = "simulations/SIM_multIVhack_ttest.RData") 38 | 39 | ### Simulation regression #### 40 | 41 | simresults.multIVhack_reg <- list() 42 | 43 | simmultiple.multIVhack_reg <- function(par, strategy){ 44 | data.frame(sim.multIVhack(nobs.group = par[1], 45 | nvar = par[2], 46 | r = par[3], 47 | regression = TRUE, 48 | strategy = strategy, 49 | iter = 10000, 50 | alternative = "two.sided", 51 | alpha = 0.05)) 52 | } 53 | 54 | simresults.multIVhack_reg$firstsig <- apply(cond.multIVhack, 1, function(x) { 55 | simmultiple.multIVhack_reg(x, strategy = "firstsig") 56 | }) 57 | 58 | simresults.multIVhack_reg$smallest <- apply(cond.multIVhack, 1, function(x) { 59 | simmultiple.multIVhack_reg(x, strategy = "smallest") 60 | }) 61 | 62 | simresults.multIVhack_reg$smallestsig <- apply(cond.multIVhack, 1, function(x) { 63 | simmultiple.multIVhack_reg(x, strategy = "smallest.sig") 64 | }) 65 | 66 | save(simresults.multIVhack_reg, file = "simulations/SIM_multIVhack_reg.RData") 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /simulations/optstop_simulation.R: -------------------------------------------------------------------------------- 1 | # Optional Stopping: Simulation 2 | 3 | #### Change n.max #### 4 | 5 | # Conditions 6 | n.min <- 5 7 | n.max <- c(30, 50, 100, 300) 8 | step <- c(1, 5, 10, 50) 9 | 10 | cond.optstop_nmax <- expand.grid(n.max, step) 11 | 12 | simresults.optstop_nmax <- list() 13 | 14 | simmultiple.optstop_nmax <- function(par){ 15 | data.frame(sim.optstop(n.min = 5, 16 | n.max = par[1], 17 | step = par[2], 18 | alternative = "two.sided", 19 | iter = 10000, 20 | alpha = 0.05)) 21 | 22 | } 23 | 24 | simresults.optstop_nmax <- apply(cond.optstop_nmax, 1, function(x) { 25 | simmultiple.optstop_nmax(x) 26 | }) 27 | 28 | save(simresults.optstop_nmax, file = "simulations/SIM_optstop_nmax.RData") 29 | 30 | #### Change n.min #### 31 | n.min <- c(5, 30, 50, 100) 32 | n.max <- 300 33 | step <- c(1, 5, 10, 50) 34 | 35 | cond.optstop_nmin <- expand.grid(n.min, step) 36 | 37 | #### Simulation #### 38 | 39 | simresults.optstop_nmin <- list() 40 | 41 | simmultiple.optstop_nmin <- function(par){ 42 | data.frame(sim.optstop(n.min = par[1], 43 | n.max = 300, 44 | step = par[2], 45 | alternative = "two.sided", 46 | iter = 10000, 47 | alpha = 0.05)) 48 | 49 | } 50 | 51 | simresults.optstop_nmin <- apply(cond.optstop_nmin, 1, function(x) { 52 | simmultiple.optstop_nmin(x) 53 | }) 54 | 55 | save(simresults.optstop_nmin, file = "simulations/SIM_optstop_nmin.RData") 56 | -------------------------------------------------------------------------------- /simulations/outHack_simulation.R: -------------------------------------------------------------------------------- 1 | # Outlier Exclusion: Simulation 2 | 3 | #### Conditions #### 4 | nobs <- c(30, 50, 100, 300) 5 | howmany <- c(3, 5, 12) 6 | 7 | cond.outHack <- expand.grid(nobs, howmany) 8 | 9 | #### Simulation #### 10 | simresults.outHack <- list() 11 | 12 | simmultiple.outHack <- function(par, strategy){ 13 | data.frame(sim.outHack(nobs = par[1], 14 | which = sample(1:12, size = par[2]), 15 | strategy = strategy, 16 | iter = 10000, 17 | alpha = 0.05)) 18 | } 19 | 20 | simresults.outHack$firstsig <- apply(cond.outHack, 1, function(x) { 21 | simmultiple.outHack(x, strategy = "firstsig") 22 | }) 23 | 24 | simresults.outHack$smallest <- apply(cond.outHack, 1, function(x) { 25 | simmultiple.outHack(x, strategy = "smallest") 26 | }) 27 | 28 | simresults.outHack$smallestsig <- apply(cond.outHack, 1, function(x) { 29 | simmultiple.outHack(x, strategy = "smallest.sig") 30 | }) 31 | 32 | save(simresults.outHack, file = "simulations/SIM_outHack.RData") 33 | 34 | -------------------------------------------------------------------------------- /simulations/plot_BFdist.R: -------------------------------------------------------------------------------- 1 | # Compute Bayes factors from p-values 2 | 3 | # Get simulation results 4 | 5 | source("simulations/00_simulation_helpers.R") 6 | lapply(paste0("simulations/", 7 | dir("simulations")[grepl(dir("simulations"), pattern="SIM*")]), 8 | load, 9 | .GlobalEnv) 10 | 11 | library(ggplot2) 12 | library(dplyr) 13 | library(BayesFactor) 14 | 15 | # Conditions for multiple dependent variables 16 | 17 | nobs.group <- c(30, 50, 100, 300) # number of observations per group 18 | nvar <- c(3, 5, 10) # number of dependent variables 19 | r <- c(0, 0.3, 0.8) # correlation between dependent variables 20 | 21 | cond.multDVhack <- expand.grid(nobs.group, nvar, r) 22 | 23 | # ------------------------------------------------------------------------------ 24 | # Extract p-values and t-values and compute BFs #### 25 | # ------------------------------------------------------------------------------ 26 | 27 | getBFs <- function(simdat, conddat, nobs, r, strategy){ 28 | 29 | cond.plot <- which(conddat$Var1 == nobs & conddat$Var3 == r) 30 | 31 | # extract p-values and t-values 32 | ps.hack <- unlist(lapply(simdat[[strategy]][cond.plot], function(x) x$ps.hack)) 33 | ts.hack <- qt(ps.hack/2, df=nobs-2) 34 | ps.nohack <- unlist(lapply(simdat[[strategy]][cond.plot], function(x) x$ps.orig)) 35 | ts.nohack <- qt(ps.nohack/2, df=nobs-2) 36 | 37 | # compute BFs 38 | BFs.hack <- lapply(ts.hack, function(x) ttest.tstat(x, n1=nobs, n2=nobs, simple=TRUE)) 39 | BFs.hack <- unname(simplify2array(BFs.hack)) 40 | BFs.nohack <- lapply(ts.nohack, function(x) ttest.tstat(x, n1=nobs, n2=nobs, simple=TRUE)) 41 | BFs.nohack <- unname(simplify2array(BFs.nohack)) 42 | 43 | return(cbind(BFs.hack, BFs.nohack)) 44 | } 45 | 46 | # Compute all BFs 47 | 48 | BF50_firstsig <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=50, r=0, strategy="firstsig") 49 | BF50_smallestsig <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=50, r=0, strategy="smallestsig") 50 | BF50_smallest <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=50, r=0, strategy="smallest") 51 | 52 | BF300_firstsig <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=300, r=0, strategy="firstsig") 53 | BF300_smallestsig <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=300, r=0, strategy="smallestsig") 54 | BF300_smallest <- getBFs(simdat=simresults.multDVhack, conddat=cond.multDVhack, nobs=300, r=0, strategy="smallest") 55 | 56 | 57 | # ------------------------------------------------------------------------------ 58 | # Plot the legend #### 59 | # ------------------------------------------------------------------------------ 60 | 61 | plot.new() 62 | legend(x = "topleft", legend = c("p-hacked", "original"), fill = c("#FFAE4A", "#5AB4BD")) 63 | 64 | # ------------------------------------------------------------------------------ 65 | # BF plots #### 66 | # ------------------------------------------------------------------------------ 67 | 68 | plotBFDist <- function(BFobj, conddat, nobs, r){ 69 | 70 | cond.plot <- which(conddat$Var1 == nobs & conddat$Var3 == r) 71 | iter <- nrow(BFobj) 72 | nDV <- rep(conddat[cond.plot,]$Var2, each=iter) 73 | BFs.hack <- BFobj[,1] 74 | BFs.nohack <- BFobj[,2] 75 | 76 | plotdat <- data.frame(lbfs.hack=log(BFs.hack), 77 | lbfs.nohack=log(BFs.nohack), 78 | nDV=as.factor(nDV)) 79 | 80 | newplotdat <- plotdat %>% # compute densities from ES 81 | group_by(nDV) %>% 82 | do(data.frame(loc.bfs = density(.$lbfs.hack)$x, 83 | dens.bfs = density(.$lbfs.hack)$y / (2.2*max(density(.$lbfs.hack)$y)), 84 | loc.bfs.nohack = density(.$lbfs.nohack)$x, 85 | dens.bfs.nohack = -1*density(.$lbfs.nohack)$y/(2*max(density(.$lbfs.nohack)$y)))) 86 | newplotdat$dens.bfs <- newplotdat$dens.bfs + as.numeric(newplotdat$nDV) # y-offset for different number of DVs 87 | newplotdat$dens.bfs.nohack <- newplotdat$dens.bfs.nohack + as.numeric(newplotdat$nDV) 88 | 89 | newplotdat$title <- paste0("N = ", nobs) 90 | 91 | ggplot(data=newplotdat, aes(group = nDV)) + 92 | geom_polygon(aes(y=dens.bfs, x=loc.bfs), fill = "#FFAE4A") + 93 | geom_polygon(aes(y=dens.bfs.nohack, x=loc.bfs.nohack), fill = "#5AB4BD") + 94 | labs(x = "Bayes factor", 95 | y = "Number of dependent variables") + 96 | theme_bw() + 97 | theme(text = element_text(size=35), 98 | axis.title = element_text(size=25), 99 | axis.text = element_text(size=25)) + 100 | scale_y_continuous(breaks = c(1,2,3), labels = c("3", "5", "10")) + 101 | scale_x_continuous(breaks = log(c(1/10, 1/3, 1, 3, 10)), labels = c("1/10", "1/3", "1", "3", "10")) + 102 | coord_cartesian(xlim = log(c(1/20, 100))) + 103 | facet_grid(. ~ title) + 104 | geom_vline(xintercept = 0, linetype = "dashed", col = "grey", lwd = 1.5) 105 | 106 | 107 | } 108 | 109 | plotBFDist(BFobj=BF50_firstsig, conddat=cond.multDVhack, nobs=50, r=0) 110 | plotBFDist(BFobj=BF50_smallestsig, conddat=cond.multDVhack, nobs=50, r=0) 111 | plotBFDist(BFobj=BF50_smallest, conddat=cond.multDVhack, nobs=50, r=0) 112 | 113 | plotBFDist(BFobj=BF300_firstsig, conddat=cond.multDVhack, nobs=300, r=0) 114 | plotBFDist(BFobj=BF300_smallestsig, conddat=cond.multDVhack, nobs=300, r=0) 115 | plotBFDist(BFobj=BF300_smallest, conddat=cond.multDVhack, nobs=300, r=0) 116 | -------------------------------------------------------------------------------- /simulations/plot_redefineSig.R: -------------------------------------------------------------------------------- 1 | # Impact of redefining statistical significance 2 | 3 | # Get simulation results 4 | 5 | source("simulations/00_simulation_helpers.R") 6 | lapply(paste0("simulations/", 7 | dir("simulations")[grepl(dir("simulations"), pattern="SIM*")]), 8 | load, 9 | .GlobalEnv) 10 | 11 | library(ggplot2) 12 | 13 | # Function to calculate FP-rate only from smallest strategy 14 | 15 | findFPrateR <- function(simresult, alpha = 0.005){ 16 | sapply(simresult$smallestsig, 17 | function(x) {sum(x$ps.hack < alpha) / nrow(x)}) 18 | } 19 | 20 | # Find FP-rates 21 | 22 | FP.multDV <- max(unname(findFPrate(simresults.multDVhack))) 23 | FPR.multDV <- max(unname(findFPrateR(simresults.multDVhack))) 24 | 25 | FP.multIV <- max(unname(findFPrate(simresults.multIVhack_reg))) 26 | FPR.multIV <- max(unname(findFPrateR(simresults.multIVhack_reg))) 27 | 28 | FP.optstop <- max(sapply(simresults.optstop_nmin, 29 | function(x) {sum(x$ps.hack < 0.05) / nrow(x)})) 30 | 31 | # optional stopping requires re-simulating with alpha=0.005 because stopping 32 | # was determined on reaching 0.05 33 | # ---- 34 | n.min <- 5 35 | n.max <- c(300) #Var1 36 | step = c(1) #Var2 37 | 38 | cond.optstop <- expand.grid(n.max, step) 39 | 40 | simresults.optstopR <- list() 41 | 42 | simmultiple.optstop <- function(par){ 43 | data.frame(sim.optstop(n.min = 5, 44 | n.max = par[1], 45 | step = par[2], 46 | alternative = "two.sided", 47 | iter = 10000, 48 | alpha = 0.005)) 49 | 50 | } 51 | 52 | simresults.optstopR <- apply(cond.optstop, 1, function(x) { 53 | simmultiple.optstop(x) 54 | }) 55 | save(simresults.optstopR, file = "simulations/SIM_optstop_Redefine.RData") 56 | 57 | # ------ 58 | 59 | FPR.optstop <- max(sapply(simresults.optstopR, 60 | function(x) {sum(x$ps.hack < 0.005) / nrow(x)})) 61 | 62 | FP.outHack <- max(unname(findFPrate(simresults.outHack))) 63 | FPR.outHack <- max(unname(findFPrateR(simresults.outHack))) 64 | 65 | FP.covHack <- max(unname(findFPrate(simresults.covhack))) 66 | FPR.covHack <- max(unname(findFPrateR(simresults.covhack))) 67 | 68 | FP.compscoreHack <- max(unname(findFPrate(simresults.compscoreHack))) 69 | FPR.compscoreHack <- max(unname(findFPrateR(simresults.compscoreHack))) 70 | 71 | FP.varTransHack <- max(unname(findFPrate(simresults.varTransHack_nonormtest))) 72 | FPR.varTransHack <- max(unname(findFPrateR(simresults.varTransHack_nonormtest))) 73 | 74 | FP.cutoffHack <- max(unname(findFPrate(simresults.cutoffHack))) 75 | FPR.cutoffHack <- max(unname(findFPrateR(simresults.cutoffHack))) 76 | 77 | FP.statAnalysisHack <- max(unname(findFPrate(simresults.statAnalysisHack))) 78 | FPR.statAnalysisHack <- max(unname(findFPrateR(simresults.statAnalysisHack))) 79 | 80 | FP.impHack <- max(unname(findFPrate(simresults.impHack))) 81 | FPR.impHack <- max(unname(findFPrateR(simresults.impHack))) 82 | 83 | FP.subgroupHack <- max(unname(findFPrate(simresults.subgroupHack))) 84 | FPR.subgroupHack <- max(unname(findFPrateR(simresults.subgroupHack))) 85 | 86 | FPregular <- c(FP.multDV, FP.multIV, FP.optstop, FP.outHack, FP.covHack, 87 | FP.compscoreHack, FP.varTransHack, FP.cutoffHack, 88 | FP.statAnalysisHack, FP.impHack, FP.subgroupHack) 89 | 90 | FPredefined <- c(FPR.multDV, FPR.multIV, FPR.optstop, FPR.outHack, FPR.covHack, 91 | FPR.compscoreHack, FPR.varTransHack, FPR.cutoffHack, 92 | FPR.statAnalysisHack, FPR.impHack, FPR.subgroupHack) 93 | 94 | FPregularByTen <- FPregular/10 95 | 96 | plotdat <- data.frame(FP.rate = c(FPregular, FPredefined, FPregularByTen), 97 | whichFP = rep(c("0.05", "0.005", "byTen"), each=length(FPregular)), 98 | strategy = rep(1:length(FPregular), 3), 99 | linetype = rep(c(1,1,2), each=length(FPregular)), 100 | linecolor = rep(c(1,2,1), each=length(FPregular))) 101 | 102 | ggplot(plotdat, aes(x = strategy, 103 | y=FP.rate, 104 | group=as.factor(whichFP), 105 | linetype = as.factor(linetype), 106 | colour = as.factor(whichFP))) + 107 | geom_hline(yintercept = 0.05, col = "grey") + 108 | geom_hline(yintercept = 0.005, col = "grey") + 109 | geom_line(size=1) + 110 | scale_x_continuous(breaks=c(1:11), 111 | labels=c("Selective reporting DV", "Selective reporting IV", 112 | "Optional Stopping", "Outlier exclusion", 113 | "Controlling covariates", "Scale redefinition", 114 | "Variable transformation", "Discretizing variables", 115 | "Alt. hypothesis tests", "Favorable imputation", 116 | "Inclusion criteria")) + 117 | labs(x = "", 118 | y = "Highest false positive rate") + 119 | theme_classic() + 120 | theme(axis.text.x = element_text(angle = 45, hjust = 1), 121 | text = element_text(size = 20), 122 | axis.title = element_text(size=20, colour = "grey30"), 123 | legend.position = "none", 124 | plot.margin = unit(c(10,0,0,40), unit="pt")) + 125 | scale_color_manual(values= c("#009975", "black", "grey")) 126 | 127 | plot.new() 128 | legend(x = "topleft", legend = c("p-hacked: p < 0.05", "p-hacked: p < 0.005"), col = c("black", "#009975"), lty = "solid", lwd = 3) 129 | -------------------------------------------------------------------------------- /simulations/roundHack_simulation.R: -------------------------------------------------------------------------------- 1 | # Incorrect Rounding: Simulation 2 | 3 | #### Conditions #### 4 | roundinglevel <- c(0.051, 0.055, 0.1) 5 | 6 | cond.roundHack <- expand.grid(roundinglevel) 7 | 8 | #### Simulation #### 9 | 10 | simmultiple.roundHack <- function(par, strategy){ 11 | data.frame(sim.roundhack(roundinglevel = par[1], 12 | iter = 10000, 13 | alpha = 0.05)) 14 | 15 | 16 | } 17 | 18 | simresults.roundHack <- apply(cond.roundHack, 1, function(x) { 19 | simmultiple.roundHack(x) 20 | }) 21 | 22 | -------------------------------------------------------------------------------- /simulations/statAnalysisHack_simulation.R: -------------------------------------------------------------------------------- 1 | # Exploit Statistical Analysis Options: Simulation 2 | 3 | #### Conditions #### 4 | nobs.group <- c(30, 50, 100, 300) 5 | 6 | cond.statAnalysisHack <- expand.grid(nobs.group) 7 | 8 | #### Simulation #### 9 | simresults.statAnalysisHack <- list() 10 | 11 | simmultiple.statAnalysisHack <- function(par, strategy){ 12 | data.frame(sim.statAnalysisHack(nobs.group = par[1], 13 | strategy = strategy, 14 | iter = 10000, 15 | alternative = "two.sided", 16 | alpha = 0.05 17 | )) 18 | } 19 | 20 | simresults.statAnalysisHack$firstsig <- apply(cond.statAnalysisHack, 1, function(x) { 21 | simmultiple.statAnalysisHack(x, strategy = "firstsig") 22 | }) 23 | 24 | simresults.statAnalysisHack$smallest <- apply(cond.statAnalysisHack, 1, function(x) { 25 | simmultiple.statAnalysisHack(x, strategy = "smallest") 26 | }) 27 | 28 | simresults.statAnalysisHack$smallestsig <- apply(cond.statAnalysisHack, 1, function(x) { 29 | simmultiple.statAnalysisHack(x, strategy = "smallest.sig") 30 | }) 31 | 32 | save(simresults.statAnalysisHack, file = "simulations/SIM_statAnalysisHack.RData") 33 | -------------------------------------------------------------------------------- /simulations/subgroupHack_simulation.R: -------------------------------------------------------------------------------- 1 | # Subgroup Analysis: Simulation 2 | 3 | #### Conditions #### 4 | nobs.group <- c(30, 50, 100, 300) 5 | nsubvars <- c(1, 3, 5) 6 | 7 | cond.subgroupHack <- expand.grid(nobs.group, nsubvars) 8 | 9 | #### Simulation #### 10 | simresults.subgroupHack <- list() 11 | 12 | simmultiple.subgroupHack <- function(par, strategy){ 13 | data.frame(sim.subgroupHack(nobs.group = par[1], 14 | nsubvars = par[2], 15 | strategy = strategy, 16 | iter = 10000, 17 | alternative = "two.sided", 18 | alpha = 0.05 19 | )) 20 | } 21 | 22 | simresults.subgroupHack$firstsig <- apply(cond.subgroupHack, 1, function(x) { 23 | simmultiple.subgroupHack(x, strategy = "firstsig") 24 | }) 25 | 26 | simresults.subgroupHack$smallest <- apply(cond.subgroupHack, 1, function(x) { 27 | simmultiple.subgroupHack(x, strategy = "smallest") 28 | }) 29 | 30 | simresults.subgroupHack$smallestsig <- apply(cond.subgroupHack, 1, function(x) { 31 | simmultiple.subgroupHack(x, strategy = "smallest.sig") 32 | }) 33 | 34 | save(simresults.subgroupHack, file = "simulations/SIM_subgroupHack.RData") 35 | 36 | -------------------------------------------------------------------------------- /simulations/varTransHack_simulation.R: -------------------------------------------------------------------------------- 1 | # Variable Transformation: Simulation 2 | 3 | #### Conditions #### 4 | nobs <- c(30, 50, 100, 300) 5 | transvar <- c(1:3) 6 | 7 | cond.varTransHack <- expand.grid(nobs, transvar) 8 | 9 | #### Simulation without tests of normality of residuals #### 10 | simresults.varTransHack_nonormtest <- list() 11 | 12 | simmultiple.varTransHack <- function(par, strategy){ 13 | data.frame(sim.varTransHack(nobs = par[1], 14 | transvar = switch(par[2], 15 | "x" = 1, 16 | "y" = 2, 17 | "xy" = 3), 18 | testnorm = FALSE, 19 | strategy = strategy, 20 | iter = 10000, 21 | alpha = 0.05 22 | )) 23 | } 24 | 25 | simresults.varTransHack_nonormtest$firstsig <- apply(cond.varTransHack, 1, function(x) { 26 | simmultiple.varTransHack(x, strategy = "firstsig") 27 | }) 28 | 29 | simresults.varTransHack_nonormtest$smallest <- apply(cond.varTransHack, 1, function(x) { 30 | simmultiple.varTransHack(x, strategy = "smallest") 31 | }) 32 | 33 | simresults.varTransHack_nonormtest$smallestsig <- apply(cond.varTransHack, 1, function(x) { 34 | simmultiple.varTransHack(x, strategy = "smallest.sig") 35 | }) 36 | 37 | save(simresults.varTransHack_nonormtest, file = "simulations/SIM_varTransHack_nonormtest.RData") 38 | 39 | #### Simulation with tests of normality of residuals #### 40 | 41 | simresults.varTransHack_normtest <- list() 42 | 43 | simmultiple.varTransHack <- function(par, strategy){ 44 | data.frame(sim.varTransHack(nobs = par[1], 45 | transvar = switch(par[2], 46 | "x" = 1, 47 | "y" = 2, 48 | "xy" = 3), 49 | testnorm = TRUE, 50 | strategy = strategy, 51 | iter = 10000, 52 | alpha = 0.05 53 | )) 54 | } 55 | 56 | simresults.varTransHack_normtest$firstsig <- apply(cond.varTransHack, 1, function(x) { 57 | simmultiple.varTransHack(x, strategy = "firstsig") 58 | }) 59 | 60 | simresults.varTransHack_normtest$smallest <- apply(cond.varTransHack, 1, function(x) { 61 | simmultiple.varTransHack(x, strategy = "smallest") 62 | }) 63 | 64 | simresults.varTransHack_normtest$smallestsig <- apply(cond.varTransHack, 1, function(x) { 65 | simmultiple.varTransHack(x, strategy = "smallest.sig") 66 | }) 67 | 68 | save(simresults.varTransHack_normtest, file = "simulations/SIM_varTransHack_normtest.RData") 69 | 70 | --------------------------------------------------------------------------------