├── .DS_Store ├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── R ├── .DS_Store ├── RcppExports.R ├── calcTau.R ├── mSPRT.R ├── mSPRT.default.R ├── plot.mSPRT.R └── print.mSPRT.R ├── README.Rmd ├── README.md ├── README_files ├── figure-gfm │ ├── unnamed-chunk-3-1.png │ └── unnamed-chunk-5-1.png └── figure-markdown_github │ ├── unnamed-chunk-3-1.png │ └── unnamed-chunk-5-1.png ├── figure └── unnamed-chunk-3-1.png ├── man ├── .DS_Store ├── calcTau.Rd ├── cppmSPRT.Rd ├── mSPRT.Rd ├── mSPRT.default.Rd ├── mixtureSPRT-package.Rd ├── plot.mSPRT.Rd └── print.mSPRT.Rd ├── mixtureSPRT.Rproj ├── src ├── .DS_Store ├── .gitignore ├── RcppExports.cpp └── test.cpp └── vignettes ├── .gitignore └── my-vignette.Rmd /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erik-stenberg/mixtureSPRT/65eb0762b671ace3258d2a20a503fe6913cb8e42/.DS_Store -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | inst/doc 2 | .Rproj.user 3 | .Rhistory 4 | .RData 5 | .Ruserdata 6 | src/*.o 7 | src/*.so 8 | src/*.dll 9 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mixtureSPRT 2 | Type: Package 3 | Title: Mixture Sequential Probability Ratio Test 4 | Version: 1.0 5 | Date: 2019-04-02 6 | Author: Erik Stenberg 7 | Maintainer: Erik Stenberg 8 | Description: Perfoms mixture Sequential Probability Ratio Test for normally and Bernoulli distributed data. 9 | License: GPL (>= 2) 10 | Imports: 11 | Rcpp, 12 | ggplot2 13 | LinkingTo: Rcpp 14 | RoxygenNote: 6.1.1 15 | Suggests: 16 | knitr, 17 | rmarkdown 18 | VignetteBuilder: knitr 19 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(plot,mSPRT) 4 | S3method(print,mSPRT) 5 | export(calcTau) 6 | export(cppmSPRT) 7 | export(mSPRT) 8 | export(mSPRT.default) 9 | importFrom(Rcpp,sourceCpp) 10 | importFrom(ggplot2,aes) 11 | importFrom(ggplot2,geom_hline) 12 | importFrom(ggplot2,geom_line) 13 | importFrom(ggplot2,ggplot) 14 | importFrom(ggplot2,labs) 15 | importFrom(ggplot2,theme_minimal) 16 | importFrom(ggplot2,xlab) 17 | importFrom(ggplot2,ylab) 18 | importFrom(ggplot2,ylim) 19 | importFrom(graphics,plot) 20 | useDynLib(mixtureSPRT) 21 | -------------------------------------------------------------------------------- /R/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erik-stenberg/mixtureSPRT/65eb0762b671ace3258d2a20a503fe6913cb8e42/R/.DS_Store -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #' Calculate mixture Sequential Probability Ratio Test in C++ 5 | #' 6 | #' @param x,y Numeric vectors 7 | #' @param xpre,ypre Numeric vectors of pre-experiment data 8 | #' @param sigma Population standard deviation 9 | #' @param tau Mixture variance 10 | #' @param theta Hypothesised difference between \code{x} and \code{y} 11 | #' @param distribution The desired distribution. 12 | #' @param alpha Significance level 13 | #' @return The likelihood ratio 14 | #' @name cppmSPRT 15 | #' @export 16 | NULL 17 | 18 | cppmSPRT <- function(x, y, xpre, ypre, sigma, tau, theta = 0, distribution = "normal") { 19 | .Call('_mixtureSPRT_cppmSPRT', PACKAGE = 'mixtureSPRT', x, y, xpre, ypre, sigma, tau, theta, distribution) 20 | } 21 | 22 | -------------------------------------------------------------------------------- /R/calcTau.R: -------------------------------------------------------------------------------- 1 | #' Calculate Mixture Variance 2 | #' 3 | #' @param alpha Significance level 4 | #' @param sigma Population standard deviation 5 | #' @param truncation Desired truncation time for mSPRT 6 | #' @return tau Optimal mixture variance \eqn{\tau} for mSPRT. 7 | #' @references Johari, R., Koomen, P., Pekelis, L. & Walsh, D. 2017, "Peeking at A/B Tests: Why it matters, and what to do about it", ACM, , pp. 1517 8 | #' @section Details: 9 | #' Mixture variance \deqn{\tau^2 = \sigma^2 \frac{\Phi(-b)}{\frac{1}{b}\phi(b)-\Phi(-b)}.} 10 | #' @export 11 | calcTau <- function(alpha, sigma, truncation) { 12 | is.numeric(alpha) & alpha > 0 & alpha < 1 || stop("Alpha must be between 0 and 1") 13 | b <- (2*log(alpha^(-1)))/(truncation*sigma^2)^(1/2) 14 | return(round(sigma^2 *( stats::pnorm(-b) / ((1/b)*stats::dnorm(b) - stats::pnorm(-b)) ),2)) 15 | } -------------------------------------------------------------------------------- /R/mSPRT.R: -------------------------------------------------------------------------------- 1 | #' Calculate mixture Sequential Probability Ratio Test 2 | #' 3 | #' @param x,y Numeric vectors 4 | #' @param xpre,ypre Numeric vectors of pre-experiment data 5 | #' @param sigma Population standard deviation 6 | #' @param tau Mixture variance 7 | #' @param theta Hypothesised difference between \code{x} and \code{y} 8 | #' @param distribution The desired distribution. 9 | #' @param alpha Significance level 10 | #' @param useCpp Boolean. Use C++ for calculations? Useful for running many tests as it reduces runtime substantially 11 | #' @return The likelihood ratio 12 | #' @references Johari, R., Koomen, P., Pekelis, L. & Walsh, D. 2017, 'Peeking at A/B Tests: Why it matters, and what to do about it', ACM, , pp. 1517 13 | #' @section Details: 14 | #' With normal data and normal prior, the closed form solution of the probability ratio after \eqn{n} observations have been collected is: 15 | #' \deqn{\tilde{\Lambda}_n = \sqrt{\frac{2\sigma^2}{V_n + n\tau^2}}\exp{\left(\frac{n^2\tau^2(\bar{Y}_n - \bar{X}_n-\theta_0)^2}{4\sigma2(2\sigma^2+n\tau^2)}\right)}.} 16 | #' With Bernoulli data, the closed form solution is: 17 | #' \deqn{\tilde{\Lambda}_n = \sqrt{\frac{V_n}{V_n + n\tau^2}}\exp{\left(\frac{n^2\tau^2(\bar{Y}_n - \bar{X}_n-\theta_0)^2}{2V_n(V_n+n\tau^2)}\right)}.} 18 | #' @export 19 | 20 | 21 | mSPRT <- function(x, y, xpre = NULL, ypre = NULL, sigma, tau, theta=0, distribution='normal', alpha=0.05, useCpp=F) { 22 | UseMethod("mSPRT") 23 | } -------------------------------------------------------------------------------- /R/mSPRT.default.R: -------------------------------------------------------------------------------- 1 | #' Perform mixture Sequential Probability Ratio Test 2 | #' 3 | #' @param x,y Numeric vectors 4 | #' @param xpre,ypre Numeric vectors of pre-experiment data 5 | #' @param sigma Population standard deviation 6 | #' @param tau Mixture variance 7 | #' @param theta Hypothesised difference between \code{x} and \code{y} 8 | #' @param distribution The desired distribution. Currently, only \code{normal} is implemented. 9 | #' @param alpha Significance level 10 | #' @return The likelihood ratio 11 | #' @references Johari, R., Koomen, P., Pekelis, L. & Walsh, D. 2017, "Peeking at A/B Tests: Why it matters, and what to do about it", ACM, , pp. 1517 12 | #' @name mSPRT.default 13 | #' @export 14 | #' @useDynLib mixtureSPRT 15 | #' @importFrom Rcpp sourceCpp 16 | NULL 17 | 18 | 19 | # Validations ------------ 20 | mSPRT.default <- function(x, y, xpre = NULL, ypre = NULL, sigma, tau, theta=0, distribution='normal', alpha=0.05, useCpp=F){ 21 | !is.null(x) & !is.null(y) || stop("x and y cannot be empty") 22 | length(x) == length(y) || stop("x and y must be of same length") 23 | burnIn = 100 24 | 25 | x <- tryCatch(expr = as.numeric(x), 26 | warning = function(w) { 27 | message("x and y must be numerical vectors") 28 | stop() 29 | }) 30 | 31 | # sigma 32 | if(distribution=="normal"){ 33 | is.numeric(sigma) || stop("sigma must be numeric") 34 | } 35 | 36 | # theta 37 | is.numeric(theta) || stop("theta must be numeric") 38 | 39 | # tau 40 | (is.numeric(tau) & tau > 0) || stop("tau must be numeric and positive") 41 | 42 | # tau 43 | (is.numeric(alpha) & alpha > 0 & alpha < 1 ) || stop("Significance level must be between 0 and 1") 44 | 45 | 46 | # distribution 47 | distribution <- as.character(distribution) 48 | if(!tolower(distribution) %in% c("normal","bernoulli")){ 49 | stop("Distribution should be either 'normal' or 'bernoulli'") 50 | } 51 | 52 | 53 | ################## 54 | 55 | n <- length(x) 56 | z <- x-y 57 | 58 | ################### 59 | ### CALC IN C++ ### 60 | ################### 61 | 62 | if(useCpp == T){ 63 | 64 | ### Normal ### 65 | 66 | if(distribution == "normal") { 67 | 68 | out <- mixtureSPRT::cppmSPRT(x = x, 69 | y = y, 70 | xpre = xpre, 71 | ypre = ypre, 72 | sigma = sigma, 73 | tau = tau, 74 | theta = theta, 75 | distribution = distribution) 76 | 77 | 78 | ### Bernoulli ### 79 | 80 | } else if (distribution == "bernoulli") { 81 | 82 | out <- mixtureSPRT::cppmSPRT(x = x, 83 | y = y, 84 | xpre = xpre, 85 | ypre = ypre, 86 | sigma = sigma, 87 | tau = tau, 88 | theta = theta, 89 | distribution = distribution) 90 | 91 | 92 | } 93 | 94 | } 95 | 96 | ################# 97 | ### CALC IN R ### 98 | ################# 99 | 100 | else if(useCpp == F){ 101 | 102 | ### Normal ### 103 | 104 | out <- matrix(NA,length(x)) 105 | if(distribution == "normal"){ 106 | 107 | if(!is.null(xpre) & !is.null(ypre)){ 108 | 109 | for(i in burnIn:length(x)){ 110 | 111 | k <- 0.5 * ((cov(xpre[1:i], x[1:i])/var(xpre[1:i])) + (cov(ypre[1:i], y[1:i])/var(ypre[1:i]))) 112 | 113 | xn <- x[1:i] - k * xpre[1:i] 114 | yn <- y[1:i] - k * ypre[1:i] 115 | rho <- 0.5*(cor(x[1:i],xpre[1:i]) + cor(y[1:i],ypre[1:i])) 116 | 117 | out[i] <- sqrt((2*sigma^2*(1-rho^2))/(2*sigma^2*(1-rho^2) + i*tau^2)) * exp(((i)^2*tau^2*(mean(xn)-mean(yn) - theta)^2) / (4*sigma^2*(1-rho^2)*(2*sigma^2*(1-k^2) + i*tau^2))) 118 | 119 | } 120 | out[1:burnIn] <- 0 121 | 122 | } else if( is.null(xpre) | is.null(ypre)) { 123 | 124 | for(i in 1:length(z)){ 125 | out[i] <- sqrt((2*sigma^2)/(2*sigma^2 + i*tau^2)) * exp(((i)^2*tau^2*(mean(x[1:i]) - mean(y[1:i]) - theta)^2) / (4*sigma^2*(2*sigma^2 + i*tau^2))) 126 | 127 | } 128 | } 129 | 130 | out <- as.vector(out) 131 | } 132 | 133 | 134 | ### Bernoulli ### 135 | 136 | else if(distribution == "bernoulli"){ 137 | 138 | if(!is.null(xpre) & !is.null(ypre)){ 139 | 140 | for(i in burnIn:length(x)){ 141 | k <- 0.5 * ((cov(xpre[1:i], x[1:i])/var(xpre[1:i])) + (cov(ypre[1:i], y[1:i])/var(ypre[1:i]))) 142 | 143 | xn <- x[1:i] - k * xpre[1:i] 144 | yn <- y[1:i] - k * ypre[1:i] 145 | 146 | Vn <- mean(xn[1:i]) * (1 - mean(xn[1:i])) + mean(yn[1:i]) * (1 - mean(yn[1:i])) 147 | 148 | out[i] <- sqrt((Vn)/(Vn + i*tau^2)) * exp(((i)^2*tau^2*(mean(xn[1:i])-mean(yn[1:i]) - theta)^2) / (2*Vn*(Vn + i*tau^2))) 149 | } 150 | } else if( is.null(xpre) | is.null(ypre)){ 151 | 152 | 153 | for(i in burnIn:length(z)){ 154 | Vn <- mean(x[0:i]) * (1-mean(x[0:i])) + mean(y[0:i]) * (1-mean(y[0:i])) 155 | out[i] <- sqrt((Vn)/(Vn + i*tau^2)) * exp(((i)^2*tau^2*(mean(z[1:i]) - theta)^2) / (2*Vn*(Vn + i*tau^2))) 156 | } 157 | } 158 | out[1:burnIn] <- 0 159 | out <- as.vector(out) 160 | } 161 | } 162 | 163 | ################# 164 | 165 | 166 | ########## 167 | # Output # 168 | ########## 169 | 170 | 171 | # Decision and text 172 | n.rejection <- if(max(out,na.rm = T) > alpha^(-1)){ 173 | min(which(out>alpha^(-1))) 174 | }else{ 175 | length(z) 176 | } 177 | 178 | decision <- ifelse(n.rejection < length(x), paste0("Accept H1"), paste0("Accept H0")) 179 | text <- paste0("Decision made after ",n.rejection," observations were collected") 180 | 181 | 182 | output <- list( 183 | distribution = distribution, 184 | n = length(x), 185 | spr = out, 186 | n.rejection = n.rejection, 187 | decision = decision, 188 | text = text, 189 | alpha = alpha 190 | ) 191 | class(output) <- "mSPRT" 192 | return(output) 193 | 194 | } 195 | 196 | 197 | -------------------------------------------------------------------------------- /R/plot.mSPRT.R: -------------------------------------------------------------------------------- 1 | #' plot.mSPRT 2 | #' 3 | #' @importFrom ggplot2 ggplot geom_line geom_hline aes labs theme_minimal xlab ylab ylim 4 | #' @importFrom graphics plot 5 | #' @param x An object of class \code{mSPRT} 6 | #' @param ... Further arguments 7 | #' @export 8 | 9 | plot.mSPRT <- function(x, ...) { 10 | 11 | if (requireNamespace("ggplot2", quietly = TRUE)){ 12 | 13 | xp <- as.data.frame(x$"spr") 14 | # plot(stats::ts(xp), 15 | # xlab = "Observations Collected", 16 | # ylab = "Probability Ratio") 17 | # abline(h = (x$alpha)^(-1)) 18 | colnames(xp) <- "spr" 19 | ggplot(xp, aes(y = spr, x = 1:nrow(xp)))+ 20 | geom_line()+ 21 | ylab("Probability Ratio")+ 22 | xlab("Observations Collected")+ 23 | geom_hline(yintercept = x$alpha^(-1))+ 24 | ylim(c(0,max(x$alpha^(-1)+2,max(x$spr))))+ 25 | theme_minimal()+ 26 | labs(title="Mixture Sequential Probability Ratio Test", 27 | subtitle = ifelse(x$n.rejection < nrow(xp), 28 | paste0("Null Hypothesis Rejected After ",x$n.rejection, " Observations"), 29 | paste0("Null Hypothesis Accepted"))) 30 | } 31 | else 32 | { 33 | print("ggplot2 required for plot") 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /R/print.mSPRT.R: -------------------------------------------------------------------------------- 1 | #' print.mSPRT 2 | #' 3 | #' @param x An object of class \code{mSPRT} 4 | #' @param ... Further arguments 5 | #' @export 6 | print.mSPRT <- function(x,...){ 7 | 8 | cat(" Decision: ", x$decision,"\n", 9 | x$text) 10 | cat(if(x$decision == "Accept H0"){" (truncated)"}, 11 | "\n", "Distribution:", x$distribution,"\n", 12 | "Significance level:", x$alpha, "\n") 13 | if(max(x$spr) < x$alpha^(-1)){ 14 | cat(" Probability Ratio after last observation:", round(max(x$spr),2),"\n", 15 | "Rejection Region: ", "Prob. Ratio > ", x$alpha^(-1)) 16 | } 17 | } 18 | 19 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "mixtureSPRT" 3 | output: 4 | md_document: 5 | variant: gfm 6 | --- 7 | 8 | # mixtureSPRT 9 | ```{r setup, include=FALSE} 10 | knitr::opts_chunk$set(echo = TRUE) 11 | ``` 12 | 13 | mixtureSPRT is a package for performing mixture Sequential Probability Ratio tests. It includes functions for calculating mixing variance and test statistic, as well as methods for plotting and printing. It also contains an option carry out the calculations in C++ as it reduced runtime substantially. This is particularly useful when many tests are performed to see for example sampling distributions or compare the mSPRT to other tests. 14 | 15 | - `calcTau()` 16 | - `mSPRT()` 17 | 18 | ```{r,echo=F, include=F} 19 | library(mixtureSPRT) 20 | library(tidyverse) 21 | ``` 22 | 23 | 24 | ## Installation 25 | 26 | ```{r,eval=F} 27 | devtools::install_github("shitoushan/mixtureSPRT") 28 | ``` 29 | 30 | ## Usage 31 | 32 | ```{r} 33 | set.seed(1337) 34 | n <- 10000 35 | m <- mSPRT(x = rnorm(n), 36 | y = rnorm(n, mean = 0.05), 37 | sigma = 1, 38 | tau = calcTau(alpha = 0.05, sigma = 1, truncation = n), 39 | theta = 0, 40 | distribution = "normal", 41 | alpha = 0.05) 42 | 43 | plot(m) 44 | ``` 45 | 46 | 47 | 48 | ### C++ 49 | 50 | ```{r} 51 | library(mixtureSPRT) 52 | library(microbenchmark) 53 | 54 | y <- rnorm(100) 55 | x <- rnorm(100) 56 | sigma = 1 57 | tau = calcTau(0.05,1,100) 58 | theta = 0 59 | distribution="normal" 60 | alpha=0.05 61 | 62 | microbenchmark( 63 | m <- mSPRT(x,y,sigma=sigma,tau=tau, 64 | useCpp = F), 65 | mcpp <- mSPRT(x,y,sigma=sigma,tau=tau, 66 | useCpp = T) 67 | ) 68 | ``` 69 | In case pre-experiment data is available, those can be included too as control variates in order to reduce the variance in the variable tested. 70 | 71 | ```{r,message=F} 72 | library(MASS) 73 | set.seed(1337) 74 | rho=0.6 # Correlation between pre-experiment data and post-treatment data 75 | sigma = 1 76 | Sigma <- matrix(c(sigma^2,rho*sqrt(sigma^2*sigma^2),rho*sqrt(sigma^2*sigma^2),sigma^2),2,2) # covar.matrix to make sure correlation = rho 77 | n <- 1000 # Truncation point 78 | 79 | x <- mvrnorm(n = n, c(0,0.1), Sigma, empirical = T) %>% as.data.frame() # Treatment group 80 | y <- mvrnorm(n = n, c(0,0), Sigma, empirical = T) %>% as.data.frame() # Control group 81 | cor(x[,1],x[,2]) 82 | cor(y[,1],y[,2]) 83 | 84 | 85 | m1 <- mSPRT(x = x[,2], 86 | y = y[,2], 87 | xpre = x[,1], 88 | ypre = y[,1], 89 | sigma = sigma, 90 | tau = calcTau(alpha = 0.05, truncation = n, sigma = 1), 91 | theta = 0, 92 | distribution = "normal", 93 | alpha = 0.05, 94 | useCpp = T) 95 | 96 | m2 <- mSPRT(x = x[,2], 97 | y = y[,2], 98 | sigma = sigma, 99 | tau = calcTau(alpha = 0.05, truncation = n, sigma = 1), 100 | theta = 0, 101 | distribution = "normal", 102 | alpha = 0.05, 103 | useCpp = T) 104 | 105 | gridExtra::grid.arrange(plot(m1),plot(m2)) 106 | 107 | ``` 108 | 109 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mixtureSPRT 2 | 3 | mixtureSPRT is a package for performing mixture Sequential Probability 4 | Ratio tests. It includes functions for calculating mixing variance and 5 | test statistic, as well as methods for plotting and printing. It also 6 | contains an option carry out the calculations in C++ as it reduced 7 | runtime substantially. This is particularly useful when many tests are 8 | performed to see for example sampling distributions or compare the mSPRT 9 | to other tests. 10 | 11 | - `calcTau()` 12 | - `mSPRT()` 13 | 14 | ## Installation 15 | 16 | ``` r 17 | devtools::install_github("erik-stenberg/mixtureSPRT") 18 | ``` 19 | 20 | ## Usage 21 | 22 | ``` r 23 | set.seed(1337) 24 | n <- 10000 25 | m <- mSPRT(x = rnorm(n), 26 | y = rnorm(n, mean = 0.05), 27 | sigma = 1, 28 | tau = calcTau(alpha = 0.05, sigma = 1, truncation = n), 29 | theta = 0, 30 | distribution = "normal", 31 | alpha = 0.05) 32 | 33 | plot(m) 34 | ``` 35 | 36 | ![](README_files/figure-gfm/unnamed-chunk-3-1.png) 37 | 38 | ### C++ 39 | 40 | ``` r 41 | library(mixtureSPRT) 42 | library(microbenchmark) 43 | 44 | y <- rnorm(100) 45 | x <- rnorm(100) 46 | sigma = 1 47 | tau = calcTau(0.05,1,100) 48 | theta = 0 49 | distribution="normal" 50 | alpha=0.05 51 | 52 | microbenchmark( 53 | m <- mSPRT(x,y,sigma=sigma,tau=tau, 54 | useCpp = F), 55 | mcpp <- mSPRT(x,y,sigma=sigma,tau=tau, 56 | useCpp = T) 57 | ) 58 | ``` 59 | 60 | ## Unit: microseconds 61 | ## expr min 62 | ## m <- mSPRT(x, y, sigma = sigma, tau = tau, useCpp = F) 1025.595 63 | ## mcpp <- mSPRT(x, y, sigma = sigma, tau = tau, useCpp = T) 298.758 64 | ## lq mean median uq max neval 65 | ## 1281.853 1668.998 1373.2345 1610.736 13006.415 100 66 | ## 383.537 475.625 413.7475 524.851 1263.475 100 67 | 68 | In case pre-experiment data is available, those can be included too as 69 | control variates in order to reduce the variance in the variable tested. 70 | 71 | ``` r 72 | library(MASS) 73 | set.seed(1337) 74 | rho=0.6 # Correlation between pre-experiment data and post-treatment data 75 | sigma = 1 76 | Sigma <- matrix(c(sigma^2,rho*sqrt(sigma^2*sigma^2),rho*sqrt(sigma^2*sigma^2),sigma^2),2,2) # covar.matrix to make sure correlation = rho 77 | n <- 1000 # Truncation point 78 | 79 | x <- mvrnorm(n = n, c(0,0.1), Sigma, empirical = T) %>% as.data.frame() # Treatment group 80 | y <- mvrnorm(n = n, c(0,0), Sigma, empirical = T) %>% as.data.frame() # Control group 81 | cor(x[,1],x[,2]) 82 | ``` 83 | 84 | ## [1] 0.6 85 | 86 | ``` r 87 | cor(y[,1],y[,2]) 88 | ``` 89 | 90 | ## [1] 0.6 91 | 92 | ``` r 93 | m1 <- mSPRT(x = x[,2], 94 | y = y[,2], 95 | xpre = x[,1], 96 | ypre = y[,1], 97 | sigma = sigma, 98 | tau = calcTau(alpha = 0.05, truncation = n, sigma = 1), 99 | theta = 0, 100 | distribution = "normal", 101 | alpha = 0.05, 102 | useCpp = T) 103 | 104 | m2 <- mSPRT(x = x[,2], 105 | y = y[,2], 106 | sigma = sigma, 107 | tau = calcTau(alpha = 0.05, truncation = n, sigma = 1), 108 | theta = 0, 109 | distribution = "normal", 110 | alpha = 0.05, 111 | useCpp = T) 112 | 113 | gridExtra::grid.arrange(plot(m1),plot(m2)) 114 | ``` 115 | 116 | ![](README_files/figure-gfm/unnamed-chunk-5-1.png) 117 | -------------------------------------------------------------------------------- /README_files/figure-gfm/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erik-stenberg/mixtureSPRT/65eb0762b671ace3258d2a20a503fe6913cb8e42/README_files/figure-gfm/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /README_files/figure-gfm/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erik-stenberg/mixtureSPRT/65eb0762b671ace3258d2a20a503fe6913cb8e42/README_files/figure-gfm/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /README_files/figure-markdown_github/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erik-stenberg/mixtureSPRT/65eb0762b671ace3258d2a20a503fe6913cb8e42/README_files/figure-markdown_github/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /README_files/figure-markdown_github/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erik-stenberg/mixtureSPRT/65eb0762b671ace3258d2a20a503fe6913cb8e42/README_files/figure-markdown_github/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /figure/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erik-stenberg/mixtureSPRT/65eb0762b671ace3258d2a20a503fe6913cb8e42/figure/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /man/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erik-stenberg/mixtureSPRT/65eb0762b671ace3258d2a20a503fe6913cb8e42/man/.DS_Store -------------------------------------------------------------------------------- /man/calcTau.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calcTau.R 3 | \name{calcTau} 4 | \alias{calcTau} 5 | \title{Calculate Mixture Variance} 6 | \usage{ 7 | calcTau(alpha, sigma, truncation) 8 | } 9 | \arguments{ 10 | \item{alpha}{Significance level} 11 | 12 | \item{sigma}{Population standard deviation} 13 | 14 | \item{truncation}{Desired truncation time for mSPRT} 15 | } 16 | \value{ 17 | tau Optimal mixture variance \eqn{\tau} for mSPRT. 18 | } 19 | \description{ 20 | Calculate Mixture Variance 21 | } 22 | \section{Details}{ 23 | 24 | Mixture variance \deqn{\tau^2 = \sigma^2 \frac{\Phi(-b)}{\frac{1}{b}\phi(b)-\Phi(-b)}.} 25 | } 26 | 27 | \references{ 28 | Johari, R., Koomen, P., Pekelis, L. & Walsh, D. 2017, "Peeking at A/B Tests: Why it matters, and what to do about it", ACM, , pp. 1517 29 | } 30 | -------------------------------------------------------------------------------- /man/cppmSPRT.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{cppmSPRT} 4 | \alias{cppmSPRT} 5 | \title{Calculate mixture Sequential Probability Ratio Test in C++} 6 | \arguments{ 7 | \item{x, y}{Numeric vectors} 8 | 9 | \item{xpre, ypre}{Numeric vectors of pre-experiment data} 10 | 11 | \item{sigma}{Population standard deviation} 12 | 13 | \item{tau}{Mixture variance} 14 | 15 | \item{theta}{Hypothesised difference between \code{x} and \code{y}} 16 | 17 | \item{distribution}{The desired distribution.} 18 | 19 | \item{alpha}{Significance level} 20 | } 21 | \value{ 22 | The likelihood ratio 23 | } 24 | \description{ 25 | Calculate mixture Sequential Probability Ratio Test in C++ 26 | } 27 | -------------------------------------------------------------------------------- /man/mSPRT.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mSPRT.R 3 | \name{mSPRT} 4 | \alias{mSPRT} 5 | \title{Calculate mixture Sequential Probability Ratio Test} 6 | \usage{ 7 | mSPRT(x, y, xpre = NULL, ypre = NULL, sigma, tau, theta = 0, 8 | distribution = "normal", alpha = 0.05, useCpp = F) 9 | } 10 | \arguments{ 11 | \item{x, y}{Numeric vectors} 12 | 13 | \item{xpre, ypre}{Numeric vectors of pre-experiment data} 14 | 15 | \item{sigma}{Population standard deviation} 16 | 17 | \item{tau}{Mixture variance} 18 | 19 | \item{theta}{Hypothesised difference between \code{x} and \code{y}} 20 | 21 | \item{distribution}{The desired distribution.} 22 | 23 | \item{alpha}{Significance level} 24 | 25 | \item{useCpp}{Boolean. Use C++ for calculations? Useful for running many tests as it reduces runtime substantially} 26 | } 27 | \value{ 28 | The likelihood ratio 29 | } 30 | \description{ 31 | Calculate mixture Sequential Probability Ratio Test 32 | } 33 | \section{Details}{ 34 | 35 | With normal data and normal prior, the closed form solution of the probability ratio after \eqn{n} observations have been collected is: 36 | \deqn{\tilde{\Lambda}_n = \sqrt{\frac{2\sigma^2}{V_n + n\tau^2}}\exp{\left(\frac{n^2\tau^2(\bar{Y}_n - \bar{X}_n-\theta_0)^2}{4\sigma2(2\sigma^2+n\tau^2)}\right)}.} 37 | With Bernoulli data, the closed form solution is: 38 | \deqn{\tilde{\Lambda}_n = \sqrt{\frac{V_n}{V_n + n\tau^2}}\exp{\left(\frac{n^2\tau^2(\bar{Y}_n - \bar{X}_n-\theta_0)^2}{2V_n(V_n+n\tau^2)}\right)}.} 39 | } 40 | 41 | \references{ 42 | Johari, R., Koomen, P., Pekelis, L. & Walsh, D. 2017, 'Peeking at A/B Tests: Why it matters, and what to do about it', ACM, , pp. 1517 43 | } 44 | -------------------------------------------------------------------------------- /man/mSPRT.default.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mSPRT.default.R 3 | \name{mSPRT.default} 4 | \alias{mSPRT.default} 5 | \title{Perform mixture Sequential Probability Ratio Test} 6 | \arguments{ 7 | \item{x, y}{Numeric vectors} 8 | 9 | \item{xpre, ypre}{Numeric vectors of pre-experiment data} 10 | 11 | \item{sigma}{Population standard deviation} 12 | 13 | \item{tau}{Mixture variance} 14 | 15 | \item{theta}{Hypothesised difference between \code{x} and \code{y}} 16 | 17 | \item{distribution}{The desired distribution. Currently, only \code{normal} is implemented.} 18 | 19 | \item{alpha}{Significance level} 20 | } 21 | \value{ 22 | The likelihood ratio 23 | } 24 | \description{ 25 | Perform mixture Sequential Probability Ratio Test 26 | } 27 | \references{ 28 | Johari, R., Koomen, P., Pekelis, L. & Walsh, D. 2017, "Peeking at A/B Tests: Why it matters, and what to do about it", ACM, , pp. 1517 29 | } 30 | -------------------------------------------------------------------------------- /man/mixtureSPRT-package.Rd: -------------------------------------------------------------------------------- 1 | \name{mixtureSPRT-package} 2 | \alias{mixtureSPRT-package} 3 | \alias{mixtureSPRT} 4 | \docType{package} 5 | \title{ 6 | mixture Sequential Probability Ratio test 7 | } 8 | \description{ 9 | Perform mixture Sequential Probability Ratio Test (mSPRT). 10 | } 11 | \details{ 12 | This package implements the mixture Sequential Probability Ratio Test (mSPRT) with normally and bernoulli distributed data and normal mixture distribution, as described in \emph{"Peeking at A/B Tests: Why it matters, and what to do about it"}. It containts a function for actual calculation of the test statistic, as well as plot and print methods and decision. It also contains a function to calculate the optimal mixture variance given desired significance level, population variance and truncation time of the test. 13 | } 14 | \author{ 15 | Erik Stenberg 16 | 17 | Maintainer: Erik Stenberg 18 | } 19 | \references{ 20 | Johari, R., Koomen, P., Pekelis, L. & Walsh, D. 2017, "Peeking at A/B Tests: Why it matters, and what to do about it", ACM, pp. 1517 21 | } 22 | \keyword{mSPRT, Sequential Analysis, Wald's SPRT} 23 | \examples{ 24 | \dontrun{ 25 | library(mixtureSPRT) 26 | set.seed(12345) 27 | n <- 10000 28 | m <- mSPRT(x = rnorm(n), 29 | y = rnorm(n, mean = 0.06), 30 | sigma = 1, 31 | tau = calcTau(alpha = 0.05, sigma = 1, truncation = n), 32 | theta = 0, 33 | distribution = "normal", 34 | alpha = 0.05) 35 | 36 | plot(m) 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /man/plot.mSPRT.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.mSPRT.R 3 | \name{plot.mSPRT} 4 | \alias{plot.mSPRT} 5 | \title{plot.mSPRT} 6 | \usage{ 7 | \method{plot}{mSPRT}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{mSPRT}} 11 | 12 | \item{...}{Further arguments} 13 | } 14 | \description{ 15 | plot.mSPRT 16 | } 17 | -------------------------------------------------------------------------------- /man/print.mSPRT.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print.mSPRT.R 3 | \name{print.mSPRT} 4 | \alias{print.mSPRT} 5 | \title{print.mSPRT} 6 | \usage{ 7 | \method{print}{mSPRT}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{mSPRT}} 11 | 12 | \item{...}{Further arguments} 13 | } 14 | \description{ 15 | print.mSPRT 16 | } 17 | -------------------------------------------------------------------------------- /mixtureSPRT.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace 19 | -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/erik-stenberg/mixtureSPRT/65eb0762b671ace3258d2a20a503fe6913cb8e42/src/.DS_Store -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // cppmSPRT 9 | NumericVector cppmSPRT(Rcpp::NumericVector x, Rcpp::NumericVector y, Nullable xpre, Nullable ypre, double sigma, double tau, double theta, Rcpp::CharacterVector distribution); 10 | RcppExport SEXP _mixtureSPRT_cppmSPRT(SEXP xSEXP, SEXP ySEXP, SEXP xpreSEXP, SEXP ypreSEXP, SEXP sigmaSEXP, SEXP tauSEXP, SEXP thetaSEXP, SEXP distributionSEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RObject rcpp_result_gen; 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type x(xSEXP); 15 | Rcpp::traits::input_parameter< Rcpp::NumericVector >::type y(ySEXP); 16 | Rcpp::traits::input_parameter< Nullable >::type xpre(xpreSEXP); 17 | Rcpp::traits::input_parameter< Nullable >::type ypre(ypreSEXP); 18 | Rcpp::traits::input_parameter< double >::type sigma(sigmaSEXP); 19 | Rcpp::traits::input_parameter< double >::type tau(tauSEXP); 20 | Rcpp::traits::input_parameter< double >::type theta(thetaSEXP); 21 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type distribution(distributionSEXP); 22 | rcpp_result_gen = Rcpp::wrap(cppmSPRT(x, y, xpre, ypre, sigma, tau, theta, distribution)); 23 | return rcpp_result_gen; 24 | END_RCPP 25 | } 26 | 27 | static const R_CallMethodDef CallEntries[] = { 28 | {"_mixtureSPRT_cppmSPRT", (DL_FUNC) &_mixtureSPRT_cppmSPRT, 8}, 29 | {NULL, NULL, 0} 30 | }; 31 | 32 | RcppExport void R_init_mixtureSPRT(DllInfo *dll) { 33 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 34 | R_useDynamicSymbols(dll, FALSE); 35 | } 36 | -------------------------------------------------------------------------------- /src/test.cpp: -------------------------------------------------------------------------------- 1 | //' Calculate mixture Sequential Probability Ratio Test in C++ 2 | //' 3 | //' @param x,y Numeric vectors 4 | //' @param xpre,ypre Numeric vectors of pre-experiment data 5 | //' @param sigma Population standard deviation 6 | //' @param tau Mixture variance 7 | //' @param theta Hypothesised difference between \code{x} and \code{y} 8 | //' @param distribution The desired distribution. 9 | //' @param alpha Significance level 10 | //' @return The likelihood ratio 11 | //' @name cppmSPRT 12 | //' @export 13 | 14 | #include 15 | #include 16 | #include 17 | using namespace Rcpp; 18 | using namespace std; 19 | 20 | double meanC(NumericVector x) 21 | { 22 | int n = x.size(); 23 | double total = 0; 24 | 25 | for (int i = 0; i < n; ++i) 26 | { 27 | total += x[i]; 28 | } 29 | return total / n; 30 | } 31 | 32 | double covC(NumericVector x, NumericVector y) 33 | { 34 | int n = x.size(); 35 | double meany = mean(y); 36 | double meanx = mean(x); 37 | NumericVector summa(n); 38 | for (int i = 0; i < n; i++) 39 | { 40 | summa[i] = (x[i] - meanx) * 41 | (y[i] - meany); 42 | } 43 | return sum(summa) / (n - 1); 44 | } 45 | 46 | bool checkNull(Nullable x) 47 | { 48 | if (x.isNull()) 49 | { 50 | return true; 51 | } 52 | else 53 | { 54 | return false; 55 | } 56 | } 57 | 58 | // [[Rcpp::export]] 59 | NumericVector cppmSPRT(Rcpp::NumericVector x, Rcpp::NumericVector y, Nullable xpre, Nullable ypre, double sigma, double tau, double theta = 0, Rcpp::CharacterVector distribution = "normal") 60 | { 61 | int n = x.size(); 62 | NumericVector out(n); 63 | double burnIn = 100; 64 | CharacterVector norm = "normal"; 65 | CharacterVector bern = "bernoulli"; 66 | 67 | if (checkNull(xpre) == false) 68 | { 69 | NumericVector xpre_n = NumericVector(xpre); 70 | NumericVector ypre_n = NumericVector(ypre); 71 | 72 | if (distribution[0] == bern[0]) 73 | { 74 | for (int i = burnIn; i < n; ++i) 75 | { 76 | double k = 0.5 * ((covC(xpre_n[Rcpp::Range(0, (i))], x[Rcpp::Range(0, (i))]) / var(xpre_n[Rcpp::Range(0, (i))])) + 77 | (covC(ypre_n[Rcpp::Range(0, (i))], y[Rcpp::Range(0, (i))]) / var(ypre_n[Rcpp::Range(0, (i))]))); 78 | 79 | NumericVector xn = x[Range(0, i)] - k * xpre_n[Range(0, i)]; 80 | NumericVector yn = y[Range(0, i)] - k * ypre_n[Range(0, i)]; 81 | 82 | double Vn = mean(yn) * (1 - mean(yn)) + mean(xn) * (1 - mean(xn)); 83 | 84 | out[i] = sqrt((Vn) / (Vn + (i + 1) * pow(tau, 2))) * exp((pow((i + 1), 2) * pow(tau, 2) * pow((mean(xn) - mean(yn) - theta), 2)) / (2 * Vn * (Vn + (i + 1) * pow(tau, 2)))); 85 | } 86 | } 87 | 88 | else if (distribution[0] == norm[0]) 89 | { 90 | for (int i = burnIn; i < n; ++i) 91 | { 92 | 93 | double k = 0.5 * ((covC(xpre_n[Rcpp::Range(0, (i))], x[Rcpp::Range(0, (i))]) / var(xpre_n[Rcpp::Range(0, (i))])) + 94 | (covC(ypre_n[Rcpp::Range(0, (i))], y[Rcpp::Range(0, (i))]) / var(ypre_n[Rcpp::Range(0, (i))]))); 95 | 96 | NumericVector xn = x[Range(0, i)] - k * xpre_n[Range(0, i)]; 97 | NumericVector yn = y[Range(0, i)] - k * ypre_n[Range(0, i)]; 98 | 99 | double rho = 0.5 * (covC(ypre_n[Rcpp::Range(0, (i))], y[Rcpp::Range(0, (i))])/sqrt(var(ypre_n[Rcpp::Range(0, (i))]) * var(y[Rcpp::Range(0, (i))])) + 100 | covC(xpre_n[Rcpp::Range(0, (i))], x[Rcpp::Range(0, (i))])/sqrt(var(xpre_n[Rcpp::Range(0, (i))]) * var(x[Rcpp::Range(0, (i))]))); 101 | 102 | 103 | out[i] = sqrt((2 * pow(sigma,2)*(1-pow(rho,2))) / (2 * pow(sigma,2)*(1-pow(rho,2)) + (i + 1) * pow(tau, 2))) * exp((pow((i + 1), 2) * pow(tau, 2) * pow((mean(xn) - mean(yn) - theta), 2)) / (4 * pow(sigma,2)*(1-pow(rho,2)) * (2 * pow(sigma,2)*(1-pow(rho,2)) + (i + 1) * pow(tau, 2)))); 104 | } 105 | } 106 | 107 | return out; 108 | } 109 | else 110 | { 111 | 112 | if (distribution[0] == norm[0]) 113 | { 114 | for (int i = 0; i < n; ++i) 115 | { 116 | out[i] = sqrt((2 * pow(sigma, 2) / (2 * pow(sigma, 2) + (i+1) * pow(tau, 2)))) * exp((pow((i+1 ), 2) * pow(tau, 2) * pow((mean(x[Rcpp::Range(0, (i))]) - mean(y[Rcpp::Range(0, (i))]) - theta), 2)) / (4 * pow(sigma, 2) * (2 * pow(sigma, 2) + (i+1 ) * pow(tau, 2)))); 117 | } 118 | } 119 | 120 | else if (distribution[0] == bern[0]) 121 | { 122 | for (int i = 0; i < n; ++i) 123 | { 124 | double Vn = mean(x[Range(0, i)]) * (1 - mean(x[Range(0, i)])) + mean(y[Range(0, i)]) * (1 - mean(y[Range(0, i)])); 125 | out[i] = sqrt((Vn) / (Vn + (i + 1) * pow(tau, 2))) * exp((pow((i + 1), 2) * pow(tau, 2) * pow((mean(x[Rcpp::Range(0, (i))]) - mean(y[Rcpp::Range(0, (i))]) - theta), 2)) / (2 * Vn * (Vn + (i + 1) * pow(tau, 2)))); 126 | } 127 | } 128 | return out; 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/my-vignette.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "mixtureSPRT" 3 | author: "Erik Stenberg" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Vignette Title} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include = FALSE} 13 | knitr::opts_chunk$set( 14 | collapse = TRUE, 15 | comment = "#>" 16 | ) 17 | ``` 18 | 19 | --------------------------------------------------------------------------------