├── vignettes
    ├── .gitignore
    └── intro.Rmd
├── LICENSE
├── R
    ├── .DS_Store
    ├── sysdata.rda
    ├── grizbayr-package.R
    ├── sysdata.R
    ├── calculate_total_cm.R
    ├── is_winner_max.R
    ├── validate_data_values.R
    ├── calculate_multi_rev_per_session.R
    ├── validate_posterior_samples.R
    ├── validate_wrt_option.R
    ├── is_prior_valid.R
    ├── validate_priors.R
    ├── impute_missing_options.R
    ├── find_best_option.R
    ├── estimate_win_prob.R
    ├── rdirichlet.R
    ├── sample_response_rate.R
    ├── update_beta.R
    ├── estimate_win_prob_vs_baseline.R
    ├── estimate_win_prob_vs_baseline_given_posterior.R
    ├── sample_ctr.R
    ├── validate_input_column.R
    ├── sample_cpc.R
    ├── validate_input_df.R
    ├── sample_conv_rate.R
    ├── estimate_win_prob_given_posterior.R
    ├── estimate_lift_vs_baseline.R
    ├── sample_session_duration.R
    ├── update_gamma.R
    ├── update_dirichlet.R
    ├── estimate_value_remaining.R
    ├── sample_page_views_per_session.R
    ├── estimate_loss.R
    ├── estimate_lift.R
    ├── sample_from_posterior.R
    ├── sample_cpa.R
    ├── sample_rev_per_session.R
    ├── sample_cm_per_click.R
    ├── estimate_all_values.R
    ├── sample_multi_rev_per_session.R
    └── sample_total_cm.R
├── tests
    ├── testthat.R
    ├── spelling.R
    └── testthat
    │   ├── test-validate_posterior_samples.R
    │   ├── test-is_winner_max.R
    │   ├── test-validate_wrt_option.R
    │   ├── test-sample_cpc.R
    │   ├── test-calculate_total_cm.R
    │   ├── test-validate_priors.R
    │   ├── test-sample_conv_rate.R
    │   ├── test-sample_session_duration.R
    │   ├── test-calculate_multi_rev_per_session.R
    │   ├── test-sample_cpa.R
    │   ├── test-sample_ctr.R
    │   ├── test-sample_page_views_per_session.R
    │   ├── test-sample_response_rate.R
    │   ├── test-estimate_all_values.R
    │   ├── test-sample_rev_per_session.R
    │   ├── test-sample_cm_per_click.R
    │   ├── test-sample_total_cm.R
    │   ├── test-estimate_value_remaining.R
    │   ├── test-sample_multi_rev_per_session.R
    │   ├── test-update_beta.R
    │   ├── test-update_dirichlet.R
    │   ├── test-validate_data_values.R
    │   ├── test-estimate_lift_vs_baseline.R
    │   ├── test-estimate_win_prob_vs_baseline.R
    │   ├── test-rdirichlet.R
    │   ├── test-validate_input_df.R
    │   ├── test-estimate_win_prob.R
    │   ├── test-find_best_option.R
    │   ├── test-impute_missing_options.R
    │   ├── test-estimate_win_prob_vs_baseline_given_posterior.R
    │   ├── test-is_prior_valid.R
    │   ├── test-update_gamma.R
    │   ├── test-validate_input_column.R
    │   ├── test-estimate_win_prob_given_posterior.R
    │   ├── test-sample_from_posterior.R
    │   ├── test-estimate_loss.R
    │   └── test-estimate_lift.R
├── CRAN-SUBMISSION
├── .Rbuildignore
├── inst
    └── WORDLIST
├── man
    ├── validate_data_values.Rd
    ├── is_winner_max.Rd
    ├── validate_posterior_samples.Rd
    ├── is_prior_valid.Rd
    ├── calculate_total_cm.Rd
    ├── impute_missing_options.Rd
    ├── validate_wrt_option.Rd
    ├── validate_priors.Rd
    ├── calculate_multi_rev_per_session.Rd
    ├── rdirichlet.Rd
    ├── validate_input_column.Rd
    ├── update_beta.Rd
    ├── find_best_option.Rd
    ├── grizbayr-package.Rd
    ├── validate_input_df.Rd
    ├── estimate_win_prob.Rd
    ├── sample_cpc.Rd
    ├── sample_conv_rate.Rd
    ├── estimate_win_prob_given_posterior.Rd
    ├── sample_from_posterior.Rd
    ├── estimate_loss.Rd
    ├── update_gamma.Rd
    ├── sample_response_rate.Rd
    ├── estimate_win_prob_vs_baseline_given_posterior.Rd
    ├── sample_ctr.Rd
    ├── estimate_lift.Rd
    ├── estimate_win_prob_vs_baseline.Rd
    ├── update_dirichlet.Rd
    ├── sample_session_duration.Rd
    ├── sample_cm_per_click.Rd
    ├── sample_multi_rev_per_session.Rd
    ├── sample_page_views_per_session.Rd
    ├── estimate_lift_vs_baseline.Rd
    ├── sample_cpa.Rd
    ├── estimate_value_remaining.Rd
    ├── sample_total_cm.Rd
    ├── sample_rev_per_session.Rd
    └── estimate_all_values.Rd
├── grizbayr.Rproj
├── .gitignore
├── LICENSE.md
├── DESCRIPTION
├── NEWS.md
├── NAMESPACE
└── README.md


/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2020
2 | COPYRIGHT HOLDER: Ryan Angi
3 | 


--------------------------------------------------------------------------------
/R/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-angi/grizbayr/HEAD/R/.DS_Store


--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-angi/grizbayr/HEAD/R/sysdata.rda


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(grizbayr)
3 | 
4 | test_check("grizbayr")
5 | 


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 1.3.3
2 | Date: 2022-12-22 19:55:22 UTC
3 | SHA: f4e8ecd3b054d1ef13bff84d0e4752527f11057e
4 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.git$
 4 | ^LICENSE\.md$
 5 | ^doc$
 6 | ^Meta$
 7 | ^CRAN-RELEASE$
 8 | ^cran-comments\.md$
 9 | ^CRAN-SUBMISSION$
10 | 


--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | if(requireNamespace('spelling', quietly = TRUE))
2 |   spelling::spell_check_test(vignettes = TRUE, error = FALSE,
3 |                              skip_on_cran = TRUE)
4 | 


--------------------------------------------------------------------------------
/R/grizbayr-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 | 
4 | ## usethis namespace: start
5 | #' @importFrom dplyr %>%
6 | #' @importFrom rlang .data
7 | ## usethis namespace: end
8 | NULL
9 | 


--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
 1 | Bool
 2 | Bugfix
 3 | CPC
 4 | DataFrame
 5 | KaTeX
 6 | README
 7 | Rmd
 8 | Stucchio
 9 | Tibble
10 | VWO
11 | bayesr
12 | cdot
13 | chr
14 | coercian
15 | conv
16 | cpc
17 | df
18 | dirichlet
19 | dplyr
20 | dplyr's
21 | griz
22 | params
23 | roxygen
24 | str
25 | tibble
26 | tidyr
27 | wp
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-validate_posterior_samples.R:
--------------------------------------------------------------------------------
1 | context("Validate Posterior Samples")
2 | 
3 | test_that("validate_posterior_samples fails if the input is not a dataframe", {
4 |   input_list <- list(option_name = c("A", "B", "C"), samples = c(0,1,2))
5 |   expect_error({
6 |     validate_posterior_samples(posterior_samples = input_list)
7 |   })
8 | })
9 | 


--------------------------------------------------------------------------------
/R/sysdata.R:
--------------------------------------------------------------------------------
 1 | #' A Mapping from distribution names (inputs) to columns. This allows the
 2 | #' package to dynamically select column names for different distribution types.
 3 | #'
 4 | #' @docType data
 5 | #' @name distribution_column_mapping
 6 | #' @usage distribution_column_mapping
 7 | #' @format A data frame with 11 rows and 12 columns
 8 | #'
 9 | #' "distribution_column_mapping"
10 | 
11 | 


--------------------------------------------------------------------------------
/tests/testthat/test-is_winner_max.R:
--------------------------------------------------------------------------------
 1 | context("Is Winner Max")
 2 | 
 3 | test_that("is_winner_max returns FALSE when CPC or CPA is input", {
 4 |   expect_false(is_winner_max("cpc"))
 5 |   expect_false(is_winner_max("cpa"))
 6 | })
 7 | 
 8 | test_that("is_winner_max returns TRUE when anything else is input", {
 9 |   expect_true(is_winner_max("conversion_rate"))
10 |   expect_true(is_winner_max("something_random"))
11 | })
12 | 


--------------------------------------------------------------------------------
/man/validate_data_values.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate_data_values.R
 3 | \name{validate_data_values}
 4 | \alias{validate_data_values}
 5 | \title{Validate Data Values}
 6 | \usage{
 7 | validate_data_values(data_values)
 8 | }
 9 | \arguments{
10 | \item{data_values}{List of named data values}
11 | }
12 | \value{
13 | None
14 | }
15 | \description{
16 | Validates data values are all greater than 0.
17 | }
18 | 


--------------------------------------------------------------------------------
/R/calculate_total_cm.R:
--------------------------------------------------------------------------------
 1 | #' Calculate Total CM
 2 | #'
 3 | #' @param rev_per_click vector of rev per click samples
 4 | #' @param cost_per_click vector of cost per click (cpc) samples
 5 | #' @param expected_clicks vector of expected clicks (expected CTR * fixed impressions)
 6 | #'
 7 | #' @return vector of CM estimates (dbl)
 8 | #'
 9 | calculate_total_cm <- function(rev_per_click, cost_per_click, expected_clicks){
10 |   (rev_per_click - cost_per_click) * expected_clicks
11 | }
12 | 


--------------------------------------------------------------------------------
/R/is_winner_max.R:
--------------------------------------------------------------------------------
 1 | #' Is Winner Max
 2 | #'
 3 | #' Determines if the max or min function should be used for win probability.
 4 | #' If CPA or CPC distribution, lower is better, else higher number is better.
 5 | #'
 6 | #' @param distribution String: the name of the distribution
 7 | #'
 8 | #' @return Boolean TRUE/FALSE
 9 | #'
10 | is_winner_max <- function(distribution){
11 |   switch(distribution,
12 |          "cpc" = ,
13 |          "cpa" = FALSE,
14 |          TRUE)
15 | }
16 | 


--------------------------------------------------------------------------------
/grizbayr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageCheckArgs: --as-cran
22 | 


--------------------------------------------------------------------------------
/man/is_winner_max.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/is_winner_max.R
 3 | \name{is_winner_max}
 4 | \alias{is_winner_max}
 5 | \title{Is Winner Max}
 6 | \usage{
 7 | is_winner_max(distribution)
 8 | }
 9 | \arguments{
10 | \item{distribution}{String: the name of the distribution}
11 | }
12 | \value{
13 | Boolean TRUE/FALSE
14 | }
15 | \description{
16 | Determines if the max or min function should be used for win probability.
17 | If CPA or CPC distribution, lower is better, else higher number is better.
18 | }
19 | 


--------------------------------------------------------------------------------
/R/validate_data_values.R:
--------------------------------------------------------------------------------
 1 | #' Validate Data Values
 2 | #'
 3 | #' Validates data values are all greater than 0.
 4 | #'
 5 | #' @param data_values List of named data values
 6 | #'
 7 | #' @return None
 8 | #'
 9 | #' @importFrom purrr walk
10 | #'
11 | validate_data_values <- function(data_values){
12 |   if (length(data_values) == 0) {
13 |     stop("No Data Values available to validate.")
14 |   }
15 |   purrr::walk(data_values, ~ if (.x < 0) {
16 |     stop(paste(names(which(data_values == .x)),
17 |                "is less than 0. Cannot update distribution."))
18 |   })
19 | }
20 | 


--------------------------------------------------------------------------------
/man/validate_posterior_samples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate_posterior_samples.R
 3 | \name{validate_posterior_samples}
 4 | \alias{validate_posterior_samples}
 5 | \title{Validate Posterior Samples Dataframe}
 6 | \usage{
 7 | validate_posterior_samples(posterior_samples)
 8 | }
 9 | \arguments{
10 | \item{posterior_samples}{Tibble of data in long form with 2 columns
11 | `option_name` and `samples`}
12 | }
13 | \value{
14 | None
15 | }
16 | \description{
17 | Function fails if posterior is not shaped correctly.
18 | }
19 | 


--------------------------------------------------------------------------------
/R/calculate_multi_rev_per_session.R:
--------------------------------------------------------------------------------
 1 | #' Calculate Multi Rev Per Session
 2 | #'
 3 | #' @param conv_rates Dirichlet samples containing a tibble with columns alpha_1, alpha_2, and alpha_0
 4 | #' @param inverse_rev_A Vector of inverse revenue samples from A conversion type
 5 | #' @param inverse_rev_B Vector of inverse revenue samples from B conversion type
 6 | #'
 7 | #' @return Vector of samples (dbl)
 8 | #'
 9 | calculate_multi_rev_per_session <- function(conv_rates, inverse_rev_A, inverse_rev_B){
10 |   (conv_rates$alpha_1 / inverse_rev_A) + (conv_rates$alpha_2 / inverse_rev_B)
11 | }
12 | 


--------------------------------------------------------------------------------
/man/is_prior_valid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/is_prior_valid.R
 3 | \name{is_prior_valid}
 4 | \alias{is_prior_valid}
 5 | \title{Is Prior Valid}
 6 | \usage{
 7 | is_prior_valid(priors_list, valid_prior)
 8 | }
 9 | \arguments{
10 | \item{priors_list}{A list of valid priors}
11 | 
12 | \item{valid_prior}{A character string}
13 | }
14 | \value{
15 | Boolean (TRUE/FALSE)
16 | }
17 | \description{
18 | Checks if a single valid prior name is in the list of prior values and if
19 |     that prior value from the list is greater than 0.
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-validate_wrt_option.R:
--------------------------------------------------------------------------------
 1 | context("Validate With Respect To Option")
 2 | 
 3 | test_that("validate_wrt_option fails when not in option name", {
 4 |   post_samples_example <- tibble::tibble(option_name = c("X", "Y", "Z"))
 5 |   expect_error({
 6 |     validate_wrt_option(wrt_option = "B", posterior_samples = post_samples_example)
 7 |   })
 8 | })
 9 | 
10 | 
11 | test_that("validate_wrt_option passes when in option name", {
12 |   post_samples_example <- tibble::tibble(option_name = c("A", "B", "C"))
13 |   expect_invisible({
14 |     validate_wrt_option(wrt_option = "B", posterior_samples = post_samples_example)
15 |   })
16 | })
17 | 


--------------------------------------------------------------------------------
/R/validate_posterior_samples.R:
--------------------------------------------------------------------------------
 1 | #' Validate Posterior Samples Dataframe
 2 | #'
 3 | #' Function fails if posterior is not shaped correctly.
 4 | #'
 5 | #' @param posterior_samples Tibble of data in long form with 2 columns
 6 | #' `option_name` and `samples`
 7 | #'
 8 | #' @return None
 9 | #'
10 | validate_posterior_samples <- function(posterior_samples){
11 |   if(!is.data.frame(posterior_samples)){
12 |     stop("Posterior samples input is not a dataframe object.")
13 |   }
14 |   required_columns <- c("option_name", "samples", "sample_id")
15 |   purrr::walk(required_columns, ~validate_input_column(.x, posterior_samples, greater_than_zero = FALSE))
16 | }
17 | 


--------------------------------------------------------------------------------
/man/calculate_total_cm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_total_cm.R
 3 | \name{calculate_total_cm}
 4 | \alias{calculate_total_cm}
 5 | \title{Calculate Total CM}
 6 | \usage{
 7 | calculate_total_cm(rev_per_click, cost_per_click, expected_clicks)
 8 | }
 9 | \arguments{
10 | \item{rev_per_click}{vector of rev per click samples}
11 | 
12 | \item{cost_per_click}{vector of cost per click (cpc) samples}
13 | 
14 | \item{expected_clicks}{vector of expected clicks (expected CTR * fixed impressions)}
15 | }
16 | \value{
17 | vector of CM estimates (dbl)
18 | }
19 | \description{
20 | Calculate Total CM
21 | }
22 | 


--------------------------------------------------------------------------------
/R/validate_wrt_option.R:
--------------------------------------------------------------------------------
 1 | #' Validate With Respect To Option
 2 | #'
 3 | #' Verify that the option provided is in the poster_samples dataframe `option_name` column.
 4 | #' Raises error if not TRUE
 5 | #'
 6 | #' @param wrt_option string name of the option
 7 | #' @param posterior_samples Tibble returned from sample_from_posterior with 3 columns
 8 | #'   `option_name`, `samples`, and `sample_id`.
 9 | #'
10 | #' @return None
11 | validate_wrt_option <- function(wrt_option, posterior_samples){
12 |   if(!(wrt_option %in% posterior_samples$option_name)){
13 |     stop(paste(wrt_option, "is an invalid wrt_option. Not one of in the posterior_samples options."))
14 |   }
15 | }
16 | 


--------------------------------------------------------------------------------
/man/impute_missing_options.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/impute_missing_options.R
 3 | \name{impute_missing_options}
 4 | \alias{impute_missing_options}
 5 | \title{Impute Missing Options}
 6 | \usage{
 7 | impute_missing_options(posterior_samples, wp_raw)
 8 | }
 9 | \arguments{
10 | \item{posterior_samples}{Tibble of data in long form with 2 columns
11 | `option_name` and `samples`}
12 | 
13 | \item{wp_raw}{Tibble of win probabilities with the columns:
14 | `option_name` and `win_prob_raw`}
15 | }
16 | \value{
17 | wp_raw table with new rows if option names were missing.
18 | }
19 | \description{
20 | When win probability is calculated
21 | }
22 | 


--------------------------------------------------------------------------------
/man/validate_wrt_option.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate_wrt_option.R
 3 | \name{validate_wrt_option}
 4 | \alias{validate_wrt_option}
 5 | \title{Validate With Respect To Option}
 6 | \usage{
 7 | validate_wrt_option(wrt_option, posterior_samples)
 8 | }
 9 | \arguments{
10 | \item{wrt_option}{string name of the option}
11 | 
12 | \item{posterior_samples}{Tibble returned from sample_from_posterior with 3 columns
13 | `option_name`, `samples`, and `sample_id`.}
14 | }
15 | \value{
16 | None
17 | }
18 | \description{
19 | Verify that the option provided is in the poster_samples dataframe `option_name` column.
20 | Raises error if not TRUE
21 | }
22 | 


--------------------------------------------------------------------------------
/man/validate_priors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate_priors.R
 3 | \name{validate_priors}
 4 | \alias{validate_priors}
 5 | \title{Validate Priors}
 6 | \usage{
 7 | validate_priors(priors, valid_priors, default_priors)
 8 | }
 9 | \arguments{
10 | \item{priors}{List of named priors with double values.}
11 | 
12 | \item{valid_priors}{A character vector of valid prior names.}
13 | 
14 | \item{default_priors}{A list of default priors for the distribution.}
15 | }
16 | \value{
17 | A named list of valid priors for the distribution.
18 | }
19 | \description{
20 | Validates list of priors against a vector of valid priors and if
21 |    the values are not valid, default priors are returned.
22 | }
23 | 


--------------------------------------------------------------------------------
/R/is_prior_valid.R:
--------------------------------------------------------------------------------
 1 | #' Is Prior Valid
 2 | #'
 3 | #' Checks if a single valid prior name is in the list of prior values and if
 4 | #'     that prior value from the list is greater than 0.
 5 | #'
 6 | #' @param priors_list A list of valid priors
 7 | #' @param valid_prior A character string
 8 | #'
 9 | #' @return Boolean (TRUE/FALSE)
10 | #'
11 | 
12 | is_prior_valid <- function(priors_list, valid_prior) {
13 |   if (length(priors_list) == 0) {
14 |     return(FALSE)
15 |   }
16 |   if (!valid_prior %in% names(priors_list)) {
17 |     warning(paste(valid_prior, "is not in priors list."))
18 |     return(FALSE)
19 |   }
20 |   if (priors_list[[valid_prior]] <= 0) {
21 |     warning(paste(valid_prior, "prior is not greater than 0."))
22 |     return(FALSE)
23 |   }
24 |   TRUE
25 | }
26 | 


--------------------------------------------------------------------------------
/man/calculate_multi_rev_per_session.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_multi_rev_per_session.R
 3 | \name{calculate_multi_rev_per_session}
 4 | \alias{calculate_multi_rev_per_session}
 5 | \title{Calculate Multi Rev Per Session}
 6 | \usage{
 7 | calculate_multi_rev_per_session(conv_rates, inverse_rev_A, inverse_rev_B)
 8 | }
 9 | \arguments{
10 | \item{conv_rates}{Dirichlet samples containing a tibble with columns alpha_1, alpha_2, and alpha_0}
11 | 
12 | \item{inverse_rev_A}{Vector of inverse revenue samples from A conversion type}
13 | 
14 | \item{inverse_rev_B}{Vector of inverse revenue samples from B conversion type}
15 | }
16 | \value{
17 | Vector of samples (dbl)
18 | }
19 | \description{
20 | Calculate Multi Rev Per Session
21 | }
22 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_cpc.R:
--------------------------------------------------------------------------------
 1 | context("Sample CPC")
 2 | 
 3 | test_that("sample_cpc returns correct shape", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_clicks = c(1000, 1000, 1000),
 7 |     sum_cost = c(10, 50, 30),
 8 |   )
 9 |   n_options <- length(unique(input_df$option_name))
10 |   n_samples <- 150
11 |   expected_col_names <- c(colnames(input_df), "gamma_params", "samples")
12 |   output <- sample_cpc(input_df, priors = list(), n_samples = n_samples)
13 |   expect_true(is.data.frame(output))
14 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
15 |   expect_length(output$samples, n_options)
16 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
17 |   expect_equal(colnames(output), expected_col_names)
18 | })
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-calculate_total_cm.R:
--------------------------------------------------------------------------------
 1 | context("Calculate Total CM")
 2 | 
 3 | test_that("calculate_total_cm returns correct cm", {
 4 |   expected_ouput <- 15
 5 |   output <- calculate_total_cm(rev_per_click = 10, cost_per_click = 5, expected_clicks = 3)
 6 |   expect_equal(output, expected_ouput)
 7 | })
 8 | 
 9 | test_that("calculate_total_cm returns negative value", {
10 |   expected_ouput <- -30
11 |   output <- calculate_total_cm(rev_per_click = 10, cost_per_click = 20, expected_clicks = 3)
12 |   expect_equal(output, expected_ouput)
13 | })
14 | 
15 | test_that("calculate_total_cm returns 0 when there are no clicks", {
16 |   expected_ouput <- 0
17 |   output <- calculate_total_cm(rev_per_click = 10, cost_per_click = 5, expected_clicks = 0)
18 |   expect_equal(output, expected_ouput)
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-validate_priors.R:
--------------------------------------------------------------------------------
 1 | context("Validate Priors")
 2 | 
 3 | test_that("validate_priors selects subset of valid priors", {
 4 |   expected_output <- list(b = 2, c = 3)
 5 |   priors <- list(a = 1, b = 2, c = 3)
 6 |   valid_priors <- c("b", "c")
 7 |   default_priors <- list(b = 1, c = 1)
 8 |   output <- validate_priors(priors, valid_priors, default_priors)
 9 |   testthat::expect_equal(output, expected_output)
10 | })
11 | 
12 | test_that("validate_priors returns default priors when a single parameter is invalid", {
13 |   priors <- list(a = 1, b = 2, c = -4)
14 |   valid_priors <- c("b", "c")
15 |   default_priors <- list(b = 1, c = 1)
16 |   testthat::expect_warning({
17 |     output <- validate_priors(priors, valid_priors, default_priors)
18 |   })
19 |   testthat::expect_equal(output, default_priors)
20 | })
21 | 


--------------------------------------------------------------------------------
/man/rdirichlet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rdirichlet.R
 3 | \name{rdirichlet}
 4 | \alias{rdirichlet}
 5 | \title{Random Dirichlet}
 6 | \usage{
 7 | rdirichlet(n, alphas_list)
 8 | }
 9 | \arguments{
10 | \item{n}{integer, the number of samples}
11 | 
12 | \item{alphas_list}{Named List of Integers: parameters of the dirichlet,
13 | interpreted as the number of success of each outcome}
14 | }
15 | \value{
16 | n x length(alphas) named tibble representing the probability of observing each outcome
17 | }
18 | \description{
19 | Randomly samples a vector of length n from a dirichlet distribution parameterized by a vector of alphas
20 | PDF of Gamma with scale = 1 : f(x)= 1/(Gamma(a)) x^(a-1) e^-(x)
21 | }
22 | \examples{
23 | rdirichlet(100, list(a = 20, b = 15, c = 60))
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_conv_rate.R:
--------------------------------------------------------------------------------
 1 | context("Sample Conversion Rate")
 2 | 
 3 | test_that("sample_conv_rate returns correct shape", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_clicks = c(1000, 1000, 1000),
 7 |     sum_conversions = c(100, 120, 110)
 8 |   )
 9 |   n_options <- length(unique(input_df$option_name))
10 |   n_samples <- 150
11 |   expected_col_names <- c(colnames(input_df), "beta_params", "samples")
12 |   output <- sample_conv_rate(input_df, priors = list(), n_samples = n_samples)
13 |   expect_true(is.data.frame(output))
14 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
15 |   expect_length(output$samples, n_options)
16 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
17 |   expect_equal(colnames(output), expected_col_names)
18 | })
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_session_duration.R:
--------------------------------------------------------------------------------
 1 | context("Sample Session Duration")
 2 | 
 3 | test_that("sample_session_duration returns correct shape", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_sessions = c(1000, 500, 1000),
 7 |     sum_duration = c(50000, 60000, 35000),
 8 |   )
 9 |   n_options <- length(unique(input_df$option_name))
10 |   n_samples <- 150
11 |   expected_col_names <- c(colnames(input_df), "gamma_params", "samples")
12 |   output <- sample_session_duration(input_df, priors = list(), n_samples = n_samples)
13 |   expect_true(is.data.frame(output))
14 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
15 |   expect_length(output$samples, n_options)
16 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
17 |   expect_equal(colnames(output), expected_col_names)
18 | })
19 | 


--------------------------------------------------------------------------------
/man/validate_input_column.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate_input_column.R
 3 | \name{validate_input_column}
 4 | \alias{validate_input_column}
 5 | \title{Validate Input Column}
 6 | \usage{
 7 | validate_input_column(column_name, input_df, greater_than_zero = TRUE)
 8 | }
 9 | \arguments{
10 | \item{column_name}{String value of the column name}
11 | 
12 | \item{input_df}{Dataframe containing option_name (str) and various other columns
13 | depending on the distribution type. See vignette for more details.}
14 | 
15 | \item{greater_than_zero}{Boolean: Do all values in the column have to be greater than zero?}
16 | }
17 | \value{
18 | None
19 | }
20 | \description{
21 | Validates the input column exists in the dataframe, is of the correct type,
22 | and that all values are greater than or equal to 0.
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test-calculate_multi_rev_per_session.R:
--------------------------------------------------------------------------------
 1 | context("Calculate Mult Rev Per Session")
 2 | 
 3 | test_that("calculate_multi_rev_per_session returns correct calculations", {
 4 |   expected_ouput <- 45
 5 |   output <- calculate_multi_rev_per_session(conv_rates = list(alpha_1 = 0.5, alpha_2 = 0.2),
 6 |                                             inverse_rev_A = 0.1,
 7 |                                             inverse_rev_B = 0.005)
 8 |   expect_equal(output, expected_ouput)
 9 | })
10 | 
11 | test_that("calculate_multi_rev_per_session returns 0 when rates are 0", {
12 |   expected_ouput <- 0
13 |   output <- calculate_multi_rev_per_session(conv_rates = list(alpha_1 = 0, alpha_2 = 0),
14 |                                             inverse_rev_A = 0.1,
15 |                                             inverse_rev_B = 0.005)
16 |   expect_equal(output, expected_ouput)
17 | })
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_cpa.R:
--------------------------------------------------------------------------------
 1 | context("Sample CPA")
 2 | 
 3 | test_that("sample_cpa returns correct shape", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_clicks = c(1000, 1000, 1000),
 7 |     sum_conversions = c(100, 120, 110),
 8 |     sum_cost = c(10, 50, 30),
 9 |   )
10 |   n_options <- length(unique(input_df$option_name))
11 |   n_samples <- 150
12 |   expected_col_names <- c(colnames(input_df),
13 |                           "beta_params", "gamma_params", "samples")
14 |   output <- sample_cpa(input_df, priors = list(), n_samples = n_samples)
15 |   expect_true(is.data.frame(output))
16 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
17 |   expect_length(output$samples, n_options)
18 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
19 |   expect_equal(colnames(output), expected_col_names)
20 | })
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_ctr.R:
--------------------------------------------------------------------------------
 1 | context("Sample CTR")
 2 | 
 3 | test_that("sample_ctr returns correct shape", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_impressions = c(10000, 10000, 10000),
 7 |     sum_clicks = c(1000, 950, 1050)
 8 |   )
 9 |   n_options <- length(unique(input_df$option_name))
10 |   n_samples <- 150
11 |   expected_col_names <- c("option_name", "sum_impressions", "sum_clicks",
12 |                           "sum_conversions", "beta_params", "samples")
13 |   output <- sample_ctr(input_df, priors = list(), n_samples = n_samples)
14 |   expect_true(is.data.frame(output))
15 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
16 |   expect_length(output$samples, n_options)
17 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
18 |   expect_equal(colnames(output), expected_col_names)
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_page_views_per_session.R:
--------------------------------------------------------------------------------
 1 | context("Sample Page Views Per Session")
 2 | 
 3 | test_that("sample_page_views_per_session returns correct shape", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_sessions = c(1000, 500, 1000),
 7 |     sum_page_views = c(2000, 1500, 3465),
 8 |   )
 9 |   n_options <- length(unique(input_df$option_name))
10 |   n_samples <- 150
11 |   expected_col_names <- c(colnames(input_df), "gamma_params", "samples")
12 |   output <- sample_page_views_per_session(input_df, priors = list(), n_samples = n_samples)
13 |   expect_true(is.data.frame(output))
14 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
15 |   expect_length(output$samples, n_options)
16 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
17 |   expect_equal(colnames(output), expected_col_names)
18 | })
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | # History files
 3 | .Rhistory
 4 | .Rapp.history
 5 | 
 6 | # Session Data files
 7 | .RData
 8 | 
 9 | # User-specific files
10 | .Ruserdata
11 | 
12 | # Example code in package build process
13 | *-Ex.R
14 | 
15 | # Output files from R CMD build
16 | /*.tar.gz
17 | 
18 | # Output files from R CMD check
19 | /*.Rcheck/
20 | 
21 | # RStudio files
22 | .Rproj.user/
23 | 
24 | # produced vignettes
25 | vignettes/*.html
26 | vignettes/*.pdf
27 | 
28 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
29 | .httr-oauth
30 | 
31 | # knitr and R markdown default cache directories
32 | *_cache/
33 | /cache/
34 | 
35 | # Temporary files created by R markdown
36 | *.utf8.md
37 | *.knit.md
38 | 
39 | # R Environment Variables
40 | .Renviron
41 | 
42 | # pkgdown site
43 | docs/
44 | inst/doc
45 | doc
46 | Meta
47 | 
48 | # CRAN comments
49 | cran-comments.md
50 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_response_rate.R:
--------------------------------------------------------------------------------
 1 | context("Sample Response Rate")
 2 | 
 3 | test_that("sample_response_rate returns correct shape", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_clicks = c(1000, 1000, 1000),
 7 |     sum_sessions = c(1000, 1000, 1000),
 8 |     sum_conversions = c(100, 120, 110)
 9 |   )
10 |   n_options <- length(unique(input_df$option_name))
11 |   n_samples <- 150
12 |   expected_col_names <- c(colnames(input_df), "beta_params", "samples")
13 |   output <- sample_response_rate(input_df, priors = list(), n_samples = n_samples)
14 |   expect_true(is.data.frame(output))
15 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
16 |   expect_length(output$samples, n_options)
17 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
18 |   expect_equal(colnames(output), expected_col_names)
19 | })
20 | 


--------------------------------------------------------------------------------
/R/validate_priors.R:
--------------------------------------------------------------------------------
 1 | #' Validate Priors
 2 | #'
 3 | #' Validates list of priors against a vector of valid priors and if
 4 | #'    the values are not valid, default priors are returned.
 5 | #'
 6 | #' @param priors List of named priors with double values.
 7 | #' @param valid_priors A character vector of valid prior names.
 8 | #' @param default_priors A list of default priors for the distribution.
 9 | #'
10 | #' @return A named list of valid priors for the distribution.
11 | #'
12 | #' @importFrom purrr map
13 | #' @importFrom magrittr %>%
14 | #'
15 | #'
16 | validate_priors <- function(priors, valid_priors, default_priors) {
17 |   are_priors_valid <- purrr::map(valid_priors, ~ is_prior_valid(priors, .x)) %>%
18 |     unlist()
19 |   if (all(are_priors_valid)) {
20 |     priors[valid_priors] %>% as.list()
21 |   } else{
22 |     message("Using default priors.")
23 |     default_priors
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/testthat/test-estimate_all_values.R:
--------------------------------------------------------------------------------
 1 | context("Estimate All Values")
 2 | 
 3 | test_that("Estimate All Values returns correct types", {
 4 |   expected_output <-
 5 |   input_df <- tibble::tibble(
 6 |     option_name = c("A", "B", "C"),
 7 |     sum_clicks = c(1000, 1000, 1000),
 8 |     sum_conversions = c(100, 120, 110)
 9 |   )
10 |   output <- estimate_all_values(input_df, distribution = "conversion_rate", wrt_option_lift = "A")
11 |   expect_length(output, 4)
12 | 
13 |   is_wp_output_tibble <- is.data.frame(output[["Win Probability"]])
14 |   expect_true(is_wp_output_tibble)
15 | 
16 |   is_wpb_output_tibble <- is.data.frame(output[["Win Probability vs Baseline"]])
17 |   expect_true(is_wpb_output_tibble)
18 | 
19 |   is_vr_double <- is.double(output[["Value Remaining"]])
20 |   expect_true(is_vr_double)
21 | 
22 |   is_lift_double <- is.double(output[["Lift vs Baseline"]])
23 |   expect_true(is_lift_double)
24 | })
25 | 


--------------------------------------------------------------------------------
/man/update_beta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/update_beta.R
 3 | \name{update_beta}
 4 | \alias{update_beta}
 5 | \title{Update Beta}
 6 | \usage{
 7 | update_beta(alpha, beta, priors = list())
 8 | }
 9 | \arguments{
10 | \item{alpha}{Double value for alpha (count of successes). Must be 0 or greater.}
11 | 
12 | \item{beta}{Double value for beta (count of failures). Must be 0 or greater.}
13 | 
14 | \item{priors}{An optional list object that contains alpha0 and
15 | beta0. Otherwise the function with use Beta(1,1) as the prior distribution.}
16 | }
17 | \value{
18 | A tibble object that contains `alpha` and `beta`
19 | }
20 | \description{
21 | Updates Beta Distribution with the Beta-Bernoulli
22 | conjugate prior update rule
23 | }
24 | \examples{
25 | update_beta(alpha = 1, beta = 5, priors = list(alpha0 = 2, beta0 = 2))
26 | update_beta(alpha = 20000, beta = 50000)
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/R/impute_missing_options.R:
--------------------------------------------------------------------------------
 1 | #' Impute Missing Options
 2 | #'
 3 | #' When win probability is calculated
 4 | #'
 5 | #' @param posterior_samples Tibble of data in long form with 2 columns
 6 | #' `option_name` and `samples`
 7 | #' @param wp_raw Tibble of win probabilities with the columns:
 8 | #'     `option_name` and `win_prob_raw`
 9 | #'
10 | #' @return wp_raw table with new rows if option names were missing.
11 | #'
12 | #' @importFrom dplyr bind_rows
13 | #' @importFrom tibble tibble
14 | 
15 | impute_missing_options <- function(posterior_samples, wp_raw) {
16 |   all_option_names <- unique(posterior_samples$option_name)
17 |   missing_option_names <- all_option_names[!all_option_names %in% wp_raw$option_name]
18 |   if(length(missing_option_names > 0)){
19 |     wp_raw %>%
20 |       dplyr::bind_rows(
21 |         tibble::tibble(option_name = missing_option_names, win_prob_raw = 0)
22 |       )
23 |   }else{
24 |     wp_raw
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/man/find_best_option.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_best_option.R
 3 | \name{find_best_option}
 4 | \alias{find_best_option}
 5 | \title{Find Best Option}
 6 | \usage{
 7 | find_best_option(posterior_samples, distribution)
 8 | }
 9 | \arguments{
10 | \item{posterior_samples}{Tibble returned from sample_from_posterior with 3 columns
11 | `option_name`, `samples`, and `sample_id`.}
12 | 
13 | \item{distribution}{String: name of the distribution}
14 | }
15 | \value{
16 | String: the best option name
17 | }
18 | \description{
19 | Samples from posterior, calculates win probability, and selects the best option.
20 | Note: this can be inefficient if you already have the win probability dataframe.
21 | Only use this if that has not already been calculated.
22 | }
23 | \examples{
24 | # Requires posterior distribution
25 | \dontrun{
26 | find_best_option(posterior_samples = posterior_samples, distribution = "conversion_rate")
27 | }
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_rev_per_session.R:
--------------------------------------------------------------------------------
 1 | context("Sample Rev Per Session")
 2 | 
 3 | test_that("sample_rev_per_session returns correct shape", {
 4 |   input_df_cm_per_click <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_sessions = c(1000, 1000, 1000),
 7 |     sum_conversions = c(100, 120, 110),
 8 |     sum_revenue = c(900, 1200, 1150)
 9 |   )
10 |   n_options <- length(unique(input_df_cm_per_click$option_name))
11 |   n_samples <- 150
12 |   expected_col_names <- c(colnames(input_df_cm_per_click),
13 |                           "beta_params", "gamma_params", "samples")
14 |   output <- sample_rev_per_session(input_df_cm_per_click, priors = list(), n_samples = n_samples)
15 |   expect_true(is.data.frame(output))
16 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
17 |   expect_length(output$samples, n_options)
18 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
19 |   expect_equal(colnames(output), expected_col_names)
20 | })
21 | 


--------------------------------------------------------------------------------
/man/grizbayr-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/grizbayr-package.R
 3 | \docType{package}
 4 | \name{grizbayr-package}
 5 | \alias{grizbayr}
 6 | \alias{grizbayr-package}
 7 | \title{grizbayr: Bayesian Inference for A|B and Bandit Marketing Tests}
 8 | \description{
 9 | Uses simple Bayesian conjugate prior update rules to calculate the win probability of each option, value remaining in the test, and percent lift over the baseline for various marketing objectives. References: Fink, Daniel (1997) "A Compendium of Conjugate Priors" \url{https://www.johndcook.com/CompendiumOfConjugatePriors.pdf}. Stucchio, Chris (2015) "Bayesian A/B Testing at VWO" \url{https://vwo.com/downloads/VWO_SmartStats_technical_whitepaper.pdf}.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/rangi513/grizbayr}
15 |   \item Report bugs at \url{https://github.com/rangi513/grizbayr/issues}
16 | }
17 | 
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_cm_per_click.R:
--------------------------------------------------------------------------------
 1 | context("Sample CM Per Click")
 2 | 
 3 | test_that("sample_cm_per_click returns correct shape", {
 4 |   input_df_cm_per_click <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_clicks = c(1000, 1000, 1000),
 7 |     sum_conversions = c(100, 120, 110),
 8 |     sum_revenue = c(900, 1200, 1150),
 9 |     sum_cost = c(10, 50, 30),
10 |   )
11 |   n_options <- length(unique(input_df_cm_per_click$option_name))
12 |   n_samples <- 150
13 |   expected_col_names <- c(colnames(input_df_cm_per_click),
14 |                           "beta_params", "gamma_params_rev", "gamma_params_cost", "samples")
15 |   output <- sample_cm_per_click(input_df_cm_per_click, priors = list(), n_samples = n_samples)
16 |   expect_true(is.data.frame(output))
17 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
18 |   expect_length(output$samples, n_options)
19 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
20 |   expect_equal(colnames(output), expected_col_names)
21 | })
22 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_total_cm.R:
--------------------------------------------------------------------------------
 1 | context("Sample Total CM")
 2 | 
 3 | test_that("sample_total_cm returns correct shape", {
 4 |   input_df_cm <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_impressions = c(10000, 10000, 10000),
 7 |     sum_clicks = c(1000, 1000, 1000),
 8 |     sum_conversions = c(100, 120, 110),
 9 |     sum_revenue = c(900, 1200, 1150),
10 |     sum_cost = c(10, 50, 30),
11 |   )
12 |   n_options <- length(unique(input_df_cm$option_name))
13 |   n_samples <- 150
14 |   expected_col_names <- c(colnames(input_df_cm), "beta_params_conv", "beta_params_ctr",
15 |                           "gamma_params_rev", "gamma_params_cost", "samples")
16 |   output <- sample_total_cm(input_df_cm, priors = list(), n_samples = n_samples)
17 |   expect_true(is.data.frame(output))
18 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
19 |   expect_length(output$samples, n_options)
20 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
21 |   expect_equal(colnames(output), expected_col_names)
22 | })
23 | 


--------------------------------------------------------------------------------
/man/validate_input_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate_input_df.R
 3 | \name{validate_input_df}
 4 | \alias{validate_input_df}
 5 | \title{Validate Input DataFrame}
 6 | \usage{
 7 | validate_input_df(input_df, distribution)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str) and various other columns
11 | depending on the distribution type. See vignette for more details.}
12 | 
13 | \item{distribution}{String of the distribution name}
14 | }
15 | \value{
16 | Bool TRUE if all checks pass.
17 | }
18 | \description{
19 | Validates the input dataframe has the correct type, correct required column names,
20 | that the distribution is valid, that the column types are correct, and that the
21 | column values are greater than or equal to 0 when they are numeric.
22 | }
23 | \examples{
24 | input_df <- tibble::tibble(
25 |    option_name = c("A", "B"),
26 |    sum_clicks = c(1000, 1000),
27 |    sum_conversions = c(100, 120)
28 | )
29 | validate_input_df(input_df, "conversion_rate")
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-estimate_value_remaining.R:
--------------------------------------------------------------------------------
 1 | context("Estimate Value Remaining")
 2 | 
 3 | 
 4 | test_that("estimate_value_remaining returns single value around 0.13", {
 5 |   input_df <- tibble::tibble(
 6 |     option_name = c("A", "B", "C"),
 7 |     sum_clicks = c(1000, 1000, 1000),
 8 |     sum_conversions = c(100, 120, 110)
 9 |   )
10 |   output <- estimate_value_remaining(input_df, distribution = "conversion_rate")
11 |   expect_length(output, 1)
12 |   expect_true(is.double(output))
13 |   expect_lt(output, 0.18)
14 |   expect_gt(output, 0.10)
15 | })
16 | 
17 | test_that("estimate_lift_vs_baseline returns single value around .22 when threshold is 0.99", {
18 |   input_df <- tibble::tibble(
19 |     option_name = c("A", "B", "C"),
20 |     sum_clicks = c(1000, 1000, 1000),
21 |     sum_conversions = c(100, 120, 110)
22 |   )
23 |   output <- estimate_value_remaining(input_df, distribution = "conversion_rate", threshold = 0.99)
24 |   expect_length(output, 1)
25 |   expect_true(is.double(output))
26 |   expect_lt(output, 0.26)
27 |   expect_gt(output, 0.15)
28 | })
29 | 


--------------------------------------------------------------------------------
/man/estimate_win_prob.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_win_prob.R
 3 | \name{estimate_win_prob}
 4 | \alias{estimate_win_prob}
 5 | \title{Estimate Win Probability}
 6 | \usage{
 7 | estimate_win_prob(input_df, distribution, priors = list())
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str) and various other columns
11 | depending on the distribution type. See vignette for more details.}
12 | 
13 | \item{distribution}{String of the distribution name}
14 | 
15 | \item{priors}{Optional list of priors. Defaults will be use otherwise.}
16 | }
17 | \value{
18 | tibble object with 2 columns: `option_name`
19 |     and `win_probability` formatted as a percent
20 | }
21 | \description{
22 | Creates a tibble of win probabilities for each option based on the data observed.
23 | }
24 | \examples{
25 | input_df <- tibble::tibble(
26 |    option_name = c("A", "B"),
27 |    sum_clicks = c(1000, 1000),
28 |    sum_conversions = c(100, 120)
29 | )
30 | estimate_win_prob(input_df, "conversion_rate")
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_multi_rev_per_session.R:
--------------------------------------------------------------------------------
 1 | context("Sample Multi Rev Per Session")
 2 | 
 3 | test_that("sample_multi_rev_per_session returns correct shape", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_sessions = c(1000, 1000, 1000),
 7 |     sum_conversions = c(100, 120, 110),
 8 |     sum_revenue = c(900, 1200, 1150),
 9 |     sum_conversions_2 = c(10, 8, 20),
10 |     sum_revenue_2 = c(10, 16, 15)
11 |   )
12 |   n_options <- length(unique(input_df$option_name))
13 |   n_samples <- 150
14 |   expected_col_names <- c(colnames(input_df), "dirichlet_params",
15 |                            "gamma_params_A", "gamma_params_B", "samples")
16 |   output <- sample_multi_rev_per_session(input_df, priors = list(), n_samples = n_samples)
17 |   expect_true(is.data.frame(output))
18 |   expect_true(all(c("option_name", "samples") %in% colnames(output)))
19 |   expect_length(output$samples, n_options)
20 |   purrr::walk(output$samples, ~ expect_length(.x, n_samples))
21 |   expect_equal(sort(colnames(output)), sort(expected_col_names))
22 | })
23 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2020 Ryan Angi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: grizbayr
 2 | Type: Package
 3 | Title: Bayesian Inference for A|B and Bandit Marketing Tests
 4 | Version: 1.3.5
 5 | Author: Ryan Angi
 6 | Maintainer: Ryan Angi <angi.ryan@gmail.com>
 7 | Description: Uses simple Bayesian conjugate prior update rules to calculate 
 8 |     the win probability of each option, value remaining in the test, and 
 9 |     percent lift over the baseline for various marketing objectives.
10 |     References: 
11 |     Fink, Daniel (1997) "A Compendium of Conjugate Priors" <https://www.johndcook.com/CompendiumOfConjugatePriors.pdf>.
12 |     Stucchio, Chris (2015) "Bayesian A/B Testing at VWO" <https://vwo.com/downloads/VWO_SmartStats_technical_whitepaper.pdf>.
13 | Depends: R (>= 2.10)
14 | License: MIT + file LICENSE
15 | Encoding: UTF-8
16 | RoxygenNote: 7.2.3
17 | Imports: 
18 |     purrr,
19 |     dplyr,
20 |     tidyr (>= 1.0.0),
21 |     magrittr,
22 |     tibble,
23 |     rlang
24 | Suggests: 
25 |     spelling,
26 |     knitr,
27 |     testthat (>= 2.1.0),
28 |     rmarkdown
29 | Language: en-US
30 | VignetteBuilder: knitr
31 | URL: https://github.com/rangi513/grizbayr
32 | BugReports: https://github.com/rangi513/grizbayr/issues
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-update_beta.R:
--------------------------------------------------------------------------------
 1 | context("Update Beta")
 2 | 
 3 | test_that("update_beta updates with priors", {
 4 |   expected_output <- tibble::tibble(alpha = 3, beta = 7)
 5 |   output <- update_beta(alpha = 1, beta = 5, priors = list(alpha0 = 2, beta0 = 2))
 6 |   testthat::expect_equal(output, expected_output)
 7 | })
 8 | 
 9 | test_that("update_beta updates without priors", {
10 |   expected_output <- tibble::tibble(alpha = 2, beta = 6)
11 |   output <- update_beta(alpha = 1, beta = 5)
12 |   testthat::expect_equal(output, expected_output)
13 | })
14 | 
15 | test_that("update_beta updates but warns with invalid priors", {
16 |   expected_output <- tibble::tibble(alpha = 2, beta = 6)
17 |   invalid_priors <- list(alpha0 = -1, beta0 = 3)
18 |   testthat::expect_warning({
19 |     output <- update_beta(alpha = 1, beta = 5, priors = invalid_priors)
20 |   })
21 |   testthat::expect_equal(output, expected_output)
22 | })
23 | 
24 | test_that("update_beta updates without priors large numbers", {
25 |   expected_output <- tibble::tibble(alpha = 20001, beta = 50001)
26 |   output <- update_beta(alpha = 20000, beta = 50000)
27 |   testthat::expect_equal(output, expected_output)
28 | })
29 | 


--------------------------------------------------------------------------------
/R/find_best_option.R:
--------------------------------------------------------------------------------
 1 | #' Find Best Option
 2 | #'
 3 | #' Samples from posterior, calculates win probability, and selects the best option.
 4 | #' Note: this can be inefficient if you already have the win probability dataframe.
 5 | #' Only use this if that has not already been calculated.
 6 | #'
 7 | #' @param posterior_samples Tibble returned from sample_from_posterior with 3 columns
 8 | #'   `option_name`, `samples`, and `sample_id`.
 9 | #' @param distribution String: name of the distribution
10 | #'
11 | #' @return String: the best option name
12 | #'
13 | #' @importFrom dplyr filter
14 | #' @importFrom magrittr extract2 %>%
15 | #' @importFrom rlang .data
16 | #' @export
17 | #'
18 | #' @examples
19 | #' # Requires posterior distribution
20 | #' \dontrun{
21 | #' find_best_option(posterior_samples = posterior_samples, distribution = "conversion_rate")
22 | #' }
23 | find_best_option <- function(posterior_samples, distribution){
24 |   estimate_win_prob_given_posterior(posterior_samples = posterior_samples,
25 |                                     winner_is_max = is_winner_max(distribution)) %>%
26 |     dplyr::filter(.data$win_prob_raw == max(.data$win_prob_raw)) %>%
27 |     magrittr::extract2("option_name")
28 | }
29 | 


--------------------------------------------------------------------------------
/man/sample_cpc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_cpc.R
 3 | \name{sample_cpc}
 4 | \alias{sample_cpc}
 5 | \title{Sample Cost Per Click}
 6 | \usage{
 7 | sample_cpc(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str), sum_clicks (dbl), sum_cost (dbl).}
11 | 
12 | \item{priors}{Optional list of priors {k0, theta0} for Gamma.
13 | Default \eqn{Gamma(1, 250)} will be use otherwise.}
14 | 
15 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
16 | }
17 | \value{
18 | input_df with 2 new nested columns `gamma_params` and `samples`
19 | }
20 | \description{
21 | Adds 2 new nested columns to the input_df: `gamma_params` and `samples`
22 |      `gamma_params` in each row should be a tibble of length 2 (\eqn{k}
23 |         and \eqn{\theta} parameters)
24 |     `samples` in each row should be a tibble of length `n_samples`
25 | }
26 | \details{
27 | See update_rules vignette for a mathematical representation.
28 | \deqn{cpc_i ~ Exponential(\lambda)}
29 | \deqn{\lambda ~ Gamma(k, \theta)}
30 | Average CPC is sampled from a Gamma distribution with an Exponential likelihood
31 | of an individual cost.
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-update_dirichlet.R:
--------------------------------------------------------------------------------
 1 | context("Update Dirichlet")
 2 | 
 3 | test_that("update_dirichlet adds 1 when default priors are used", {
 4 |   expected_output <- tibble::tibble(alpha_0 = 21, alpha_1 = 6, alpha_2 = 3)
 5 |   output <- update_dirichlet(alpha_0 = 20, alpha_1 = 5, alpha_2 = 2)
 6 |   expect_true(is.data.frame(output))
 7 |   expect_equal(output, expected_output)
 8 | })
 9 | 
10 | test_that("update_dirichlet adds non default priors", {
11 |   sample_priors_list <- list(alpha00 = 2, alpha01 = 3, alpha02 = 5)
12 |   expected_output <- tibble::tibble(alpha_0 = 22, alpha_1 = 8, alpha_2 = 7)
13 |   output <- update_dirichlet(alpha_0 = 20, alpha_1 = 5, alpha_2 = 2, priors = sample_priors_list)
14 |   expect_true(is.data.frame(output))
15 |   expect_equal(output, expected_output)
16 | })
17 | 
18 | test_that("update_dirichlet adds 1 when default incorrect priors are used", {
19 |   incorrect_priors_list <- list(beta00 = 2, beta01 = 3, beta02 = 5)
20 |   expected_output <- tibble::tibble(alpha_0 = 21, alpha_1 = 6, alpha_2 = 3)
21 |   expect_warning({
22 |     output <- update_dirichlet(alpha_0 = 20, alpha_1 = 5, alpha_2 = 2, priors = incorrect_priors_list)
23 |   })
24 |   expect_true(is.data.frame(output))
25 |   expect_equal(output, expected_output)
26 | })
27 | 


--------------------------------------------------------------------------------
/man/sample_conv_rate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_conv_rate.R
 3 | \name{sample_conv_rate}
 4 | \alias{sample_conv_rate}
 5 | \title{Sample Conversion Rate}
 6 | \usage{
 7 | sample_conv_rate(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str),
11 | sum_conversions (dbl), and sum_clicks (dbl).}
12 | 
13 | \item{priors}{Optional list of priors alpha0 and beta0.
14 | Default \eqn{Beta(1,1)} will be use otherwise.}
15 | 
16 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
17 | }
18 | \value{
19 | input_df with 2 new nested columns `beta_params` and `samples`
20 | }
21 | \description{
22 | Adds 2 new nested columns to the input_df: `beta_params` and `samples`
23 |     `beta_params` in each row should be a tibble of length 2 (\eqn{\alpha}
24 |         and \eqn{\beta} parameters)
25 |     `samples` in each row should be a tibble of length `n_samples`
26 | }
27 | \details{
28 | See update_rules vignette for a mathematical representation.
29 | \deqn{conversion_i ~ Bernoulli(\phi)}
30 | \deqn{\phi ~ Beta(\alpha, \beta)}
31 | Conversion Rate is sampled from a Beta distribution with a Binomial likelihood
32 | of an individual converting.
33 | }
34 | 


--------------------------------------------------------------------------------
/R/estimate_win_prob.R:
--------------------------------------------------------------------------------
 1 | #' Estimate Win Probability
 2 | #'
 3 | #' Creates a tibble of win probabilities for each option based on the data observed.
 4 | #'
 5 | #' @param input_df Dataframe containing option_name (str) and various other columns
 6 | #'     depending on the distribution type. See vignette for more details.
 7 | #' @param distribution String of the distribution name
 8 | #' @param priors Optional list of priors. Defaults will be use otherwise.
 9 | #'
10 | #' @return tibble object with 2 columns: `option_name`
11 | #'     and `win_probability` formatted as a percent
12 | #' @export
13 | #'
14 | #' @examples
15 | #' input_df <- tibble::tibble(
16 | #'    option_name = c("A", "B"),
17 | #'    sum_clicks = c(1000, 1000),
18 | #'    sum_conversions = c(100, 120)
19 | #' )
20 | #' estimate_win_prob(input_df, "conversion_rate")
21 | #'
22 | estimate_win_prob <- function(input_df, distribution, priors = list()){
23 |   validate_input_df(input_df, distribution)
24 | 
25 |   # Sample from posterior distribution
26 |   posterior_samples <- sample_from_posterior(input_df, distribution, priors)
27 | 
28 |   # Calculate Win Probability
29 |   estimate_win_prob_given_posterior(posterior_samples = posterior_samples,
30 |                                     winner_is_max = is_winner_max(distribution))
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-validate_data_values.R:
--------------------------------------------------------------------------------
 1 | context("Validate Data Values")
 2 | 
 3 | test_that("validate_data_values returns same list of 2 when values are valid.", {
 4 |   data_values <-  list(successes = 22, failures = 100)
 5 |   output <- validate_data_values(data_values)
 6 |   testthat::expect_equal(output, expected = data_values)
 7 | })
 8 | 
 9 | test_that("validate_data_values returns same list of 4 when values are valid.", {
10 |   data_values <-  list(successes = 22, failures = 100, revenue = 5678, cost = 1234)
11 |   output <- validate_data_values(data_values)
12 |   testthat::expect_equal(output, expected = data_values)
13 | })
14 | 
15 | test_that("validate_data_values fails when list is empty.", {
16 |   empty_data_values <-  list()
17 |   testthat::expect_error(validate_data_values(empty_data_values))
18 | })
19 | 
20 | test_that("validate_data_values fails when one value is less than 0.", {
21 |   data_values <-  list(successes = -10, failures = 100)
22 |   testthat::expect_error(validate_data_values(data_values))
23 | })
24 | 
25 | test_that("validate_data_values returns the same list when one value equals 0.", {
26 |   data_values <-  list(successes = 0, failures = 100)
27 |   output <- validate_data_values(data_values)
28 |   testthat::expect_equal(output, expected = data_values)
29 | })
30 | 


--------------------------------------------------------------------------------
/R/rdirichlet.R:
--------------------------------------------------------------------------------
 1 | #' Random Dirichlet
 2 | #'
 3 | #' Randomly samples a vector of length n from a dirichlet distribution parameterized by a vector of alphas
 4 | #' PDF of Gamma with scale = 1 : f(x)= 1/(Gamma(a)) x^(a-1) e^-(x)
 5 | #'
 6 | #' @param alphas_list Named List of Integers: parameters of the dirichlet,
 7 | #'     interpreted as the number of success of each outcome
 8 | #' @param n integer, the number of samples
 9 | #'
10 | #' @importFrom magrittr set_colnames %>%
11 | #' @importFrom tibble as_tibble
12 | #' @importFrom stats rgamma
13 | #' @export
14 | #'
15 | #' @return n x length(alphas) named tibble representing the probability of observing each outcome
16 | #'
17 | #' @examples
18 | #' rdirichlet(100, list(a = 20, b = 15, c = 60))
19 | #'
20 | rdirichlet <- function(n, alphas_list) {
21 |   alphas <- unlist(alphas_list)
22 |   dimensions <- length(alphas)
23 | 
24 |   # generate a n x length_alphas matrix of samples from a gamma with shape = alpha_i & scale = 1
25 |   gamma_samples <- matrix(stats::rgamma(n * dimensions, alphas), ncol = dimensions, byrow = TRUE) %>%
26 |     magrittr::set_colnames(names(alphas))
27 | 
28 |   # standardize each sample, so that each of n samples is a simplex
29 |   scaled_samples <- gamma_samples / apply(gamma_samples, 1, sum)
30 |   tibble::as_tibble(scaled_samples)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/estimate_win_prob_given_posterior.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_win_prob_given_posterior.R
 3 | \name{estimate_win_prob_given_posterior}
 4 | \alias{estimate_win_prob_given_posterior}
 5 | \title{Estimate Win Probability Given Posterior Distribution}
 6 | \usage{
 7 | estimate_win_prob_given_posterior(posterior_samples, winner_is_max = TRUE)
 8 | }
 9 | \arguments{
10 | \item{posterior_samples}{Tibble of data in long form with 2 columns
11 | `option_name` and `samples`}
12 | 
13 | \item{winner_is_max}{Boolean. This should almost always be TRUE. If a larger number is better
14 | then this should be TRUE. This should be FALSE for metrics such as CPA or CPC where a higher cost
15 | is not necessarily better.}
16 | }
17 | \value{
18 | Tibble of each option_name and the win probability expressed as a percentage and a decimal `raw`
19 | }
20 | \description{
21 | Estimate Win Probability Given Posterior Distribution
22 | }
23 | \examples{
24 | # Requires posterior_samples dataframe. See `sample_from_posterior()`
25 | # for an example.
26 | \dontrun{
27 | estimate_win_prob_given_posterior(posterior_samples = posterior_samples)
28 | estimate_win_prob_given_posterior(
29 |     posterior_samples = posterior_samples,
30 |     winner_is_max = TRUE
31 | )
32 | }
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/man/sample_from_posterior.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_from_posterior.R
 3 | \name{sample_from_posterior}
 4 | \alias{sample_from_posterior}
 5 | \title{Sample From Posterior}
 6 | \usage{
 7 | sample_from_posterior(
 8 |   input_df,
 9 |   distribution,
10 |   priors = list(),
11 |   n_samples = 50000
12 | )
13 | }
14 | \arguments{
15 | \item{input_df}{Dataframe containing option_name (str) and various other columns
16 | depending on the distribution type. See vignette for more details.}
17 | 
18 | \item{distribution}{String of the distribution name}
19 | 
20 | \item{priors}{Optional list of priors. Defaults will be use otherwise.}
21 | 
22 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
23 | }
24 | \value{
25 | A tibble with 2 columns: option_name (chr) and samples (dbl) [long form data].
26 | }
27 | \description{
28 | Selects which function to use to sample from the posterior distribution
29 | }
30 | \examples{
31 | input_df <- tibble::tibble(
32 |    option_name = c("A", "B"),
33 |    sum_clicks = c(1000, 1000),
34 |    sum_conversions = c(100, 120),
35 |    sum_sessions = c(1000, 1000),
36 |    sum_revenue = c(1000, 1500)
37 | )
38 | sample_from_posterior(input_df, "conversion_rate")
39 | sample_from_posterior(input_df, "rev_per_session")
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/man/estimate_loss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_loss.R
 3 | \name{estimate_loss}
 4 | \alias{estimate_loss}
 5 | \title{Estimate Loss}
 6 | \usage{
 7 | estimate_loss(
 8 |   posterior_samples,
 9 |   distribution,
10 |   wrt_option = NULL,
11 |   metric = c("absolute", "lift", "relative_risk")
12 | )
13 | }
14 | \arguments{
15 | \item{posterior_samples}{Tibble: returned from sample_from_posterior with 3 columns
16 | `option_name`, `samples`, and `sample_id`.}
17 | 
18 | \item{distribution}{String: the name of the distribution}
19 | 
20 | \item{wrt_option}{String: the option loss is calculated with respect to (wrt). If NULL, the best option will be chosen.}
21 | 
22 | \item{metric}{String: the type of loss.
23 | absolute will be the difference, on the outcome scale. 0 when best = wrt_option
24 | lift will be the (best - wrt_option) / wrt_option, 0 when best = wrt_option
25 | relative_risk will be the ratio best/wrt_option, 1 when best = wrt_option}
26 | }
27 | \value{
28 | numeric, the loss distribution
29 | }
30 | \description{
31 | Estimate Loss
32 | }
33 | \examples{
34 | # Requires posterior_samples dataframe. See `sample_from_posterior()`
35 | # for an example.
36 | 
37 | \dontrun{
38 | estimate_loss(posterior_samples = posterior_samples, distribution = "conversion_rate")
39 | }
40 | }
41 | 


--------------------------------------------------------------------------------
/man/update_gamma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/update_gamma.R
 3 | \name{update_gamma}
 4 | \alias{update_gamma}
 5 | \title{Update Gamma}
 6 | \usage{
 7 | update_gamma(k, theta, priors = list(), alternate_priors = FALSE)
 8 | }
 9 | \arguments{
10 | \item{k}{Double value for \eqn{k} (total revenue generating events). Must be 0 or greater.}
11 | 
12 | \item{theta}{Double value for \eqn{\theta} (sum of revenue). Must be 0 or greater.}
13 | 
14 | \item{priors}{An optional list object that contains k0 and
15 | theta0. Otherwise the function will use \eqn{Gamma(1,250)} as the prior distribution.
16 | If a second gamma distribution is used k01 and theta01 can be defined as separate priors
17 | when alternate_priors is set to TRUE.}
18 | 
19 | \item{alternate_priors}{Boolean Defaults to FALSE. Allows a user to specify alternate
20 | prior names so the same prior isn't required when multiple gamma distributions are used.}
21 | }
22 | \value{
23 | A list object that contains `k` and `theta`
24 | }
25 | \description{
26 | Updates Gamma Distribution with the Gamma-Exponential
27 | conjugate prior update rule. Parameterized by \eqn{k} and \eqn{\theta} (not \eqn{\alpha, \beta})
28 | }
29 | \examples{
30 | update_gamma(k = 1, theta = 100, priors = list(k0 = 2, theta0 = 1000))
31 | update_gamma(k = 10, theta = 200)
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/man/sample_response_rate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_response_rate.R
 3 | \name{sample_response_rate}
 4 | \alias{sample_response_rate}
 5 | \title{Sample Response Rate}
 6 | \usage{
 7 | sample_response_rate(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str),
11 | sum_conversions (dbl), and sum_sessions (dbl).}
12 | 
13 | \item{priors}{Optional list of priors alpha0 and beta0.
14 | Default \eqn{Beta(1,1)} will be use otherwise.}
15 | 
16 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
17 | }
18 | \value{
19 | input_df with 2 new nested columns `beta_params` and `samples`
20 | }
21 | \description{
22 | This is an alias for sample_conv_rate with a different input column.
23 | Adds 2 new nested columns to the input_df: `beta_params` and `samples`
24 |     `beta_params` in each row should be a tibble of length 2 (\eqn{\alpha}
25 |         and \eqn{\beta} parameters)
26 |     `samples` in each row should be a tibble of length `n_samples`
27 | }
28 | \details{
29 | See update_rules vignette for a mathematical representation.
30 | \deqn{conversion_i ~ Bernoulli(\phi)}
31 | \deqn{\phi ~ Beta(\alpha, \beta)}
32 | Response Rate is sampled from a Beta distribution with a Binomial likelihood
33 | of an individual converting.
34 | }
35 | 


--------------------------------------------------------------------------------
/man/estimate_win_prob_vs_baseline_given_posterior.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_win_prob_vs_baseline_given_posterior.R
 3 | \name{estimate_win_prob_vs_baseline_given_posterior}
 4 | \alias{estimate_win_prob_vs_baseline_given_posterior}
 5 | \title{Estimate Win Probability vs. Baseline Given Posterior}
 6 | \usage{
 7 | estimate_win_prob_vs_baseline_given_posterior(
 8 |   posterior_samples,
 9 |   distribution,
10 |   wrt_option
11 | )
12 | }
13 | \arguments{
14 | \item{posterior_samples}{Tibble returned from sample_from_posterior with 3 columns
15 | `option_name`, `samples`, and `sample_id`.}
16 | 
17 | \item{distribution}{String: the distribution name}
18 | 
19 | \item{wrt_option}{String: the option to compare against the best option.}
20 | }
21 | \value{
22 | Tibble of each option_name and the win probability expressed as a percentage and a decimal `raw`
23 | }
24 | \description{
25 | Calculates the win probability of the best option compared to a single other option
26 | given a posterior distribution.
27 | }
28 | \examples{
29 | # Requires posterior_samples dataframe. See `sample_from_posterior()`
30 | # for an example.
31 | \dontrun{
32 | estimate_win_prob_vs_baseline_given_posterior(
33 |     posterior_samples = posterior_samples,
34 |     distribution = "conversion_rate",
35 |     wrt_option = "A")
36 | }
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # grizbayr 1.3.5
 2 | 
 3 | ## 1.3.5
 4 | 
 5 | - Update package documentation due to roxygen2 breaking change
 6 | 
 7 | ## 1.3.3
 8 | 
 9 | - Update maintainer email
10 | - Update url in DESCRIPTION to referenced paper
11 | - Fix test in `test-sample_from_posterior.R`
12 | - Add CRAN badges to README
13 | 
14 | ## 1.3.2
15 | 
16 | - Fix backslash in KaTeX in sample_session_duration.R roxygen2 documentation
17 | - Added roxygen2 documentation for lazy loaded `sysdata.rda` 
18 | 
19 | ## 1.3.1
20 | 
21 | - Bugfix in sample_total_cm distribution where the CM distribution is no longer incorrectly bimodal.
22 | 
23 | ## 1.3.0
24 | 
25 | - Add 2 new distributions: **Page Views Per Session** and **Session Duration**
26 | 
27 | ## 1.2.3
28 | 
29 | - Remove `add = FALSE` argument to a group_by since default is already FALSE and dplyr 1.0.0 throws deprecation warning.
30 | - Add hard requirement for tidyr >= 1.0.0 to use pivot_wider and pivot_longer functions.
31 | 
32 | ## 1.2.2
33 | 
34 | - Bugfix - Fixed Win Probability Vs. Baseline to divide by samples per option, not total samples.
35 | - Fixed documentation example on `estimate_all_values()` to make sure option_name column returned a string not a factor.
36 | - Changed `estimate_lift()` metric argument to default to `"lift"` and provided clearer error messaging when there is an invalid argument.
37 | 
38 | ## 1.2.1 
39 | 
40 | - Initial release.
41 | 


--------------------------------------------------------------------------------
/man/sample_ctr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_ctr.R
 3 | \name{sample_ctr}
 4 | \alias{sample_ctr}
 5 | \title{Sample Click Through Rate}
 6 | \usage{
 7 | sample_ctr(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str),
11 | sum_clicks (dbl), and sum_impressions (dbl).}
12 | 
13 | \item{priors}{Optional list of priors alpha0 and beta0.
14 | Default \eqn{Beta(1,1)} will be use otherwise.}
15 | 
16 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
17 | }
18 | \value{
19 | input_df with 2 new nested columns `beta_params` and `samples`
20 | }
21 | \description{
22 | This is an alias for sample_conv_rate with 2 different input
23 | columns. This function calculates posterior samples of
24 | \eqn{CTR = clicks/impressions}. Adds 2 new nested columns to
25 | the input_df: `beta_params` and `samples`.
26 | `beta_params` in each row should be a tibble of length 2 (\eqn{\alpha}
27 | and \eqn{\beta} parameters)
28 | `samples` in each row should be a tibble of length `n_samples`
29 | }
30 | \details{
31 | See update_rules vignette for a mathematical representation.
32 | \deqn{click_i ~ Bernoulli(\phi)}
33 | \deqn{\phi ~ Beta(\alpha, \beta)}
34 | Click Through Rate is sampled from a Beta distribution with a Binomial
35 | likelihood of an individual Clicking
36 | }
37 | 


--------------------------------------------------------------------------------
/R/sample_response_rate.R:
--------------------------------------------------------------------------------
 1 | #' Sample Response Rate
 2 | #'
 3 | #' This is an alias for sample_conv_rate with a different input column.
 4 | #' Adds 2 new nested columns to the input_df: `beta_params` and `samples`
 5 | #'     `beta_params` in each row should be a tibble of length 2 (\eqn{\alpha}
 6 | #'         and \eqn{\beta} parameters)
 7 | #'     `samples` in each row should be a tibble of length `n_samples`
 8 | #'
 9 | #' See update_rules vignette for a mathematical representation.
10 | #' \deqn{conversion_i ~ Bernoulli(\phi)}
11 | #' \deqn{\phi ~ Beta(\alpha, \beta)}
12 | #' Response Rate is sampled from a Beta distribution with a Binomial likelihood
13 | #' of an individual converting.
14 | #'
15 | #' @param input_df Dataframe containing option_name (str),
16 | #'     sum_conversions (dbl), and sum_sessions (dbl).
17 | #' @param priors Optional list of priors alpha0 and beta0.
18 | #'     Default \eqn{Beta(1,1)} will be use otherwise.
19 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
20 | #'
21 | #' @importFrom dplyr rename
22 | #' @importFrom rlang .data
23 | #'
24 | #' @return input_df with 2 new nested columns `beta_params` and `samples`
25 | #'
26 | sample_response_rate <- function(input_df, priors, n_samples = 5e4){
27 |   renamed_input_df <- dplyr::mutate(input_df, sum_clicks = .data$sum_sessions)
28 |   sample_conv_rate(renamed_input_df, priors, n_samples)
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/man/estimate_lift.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_lift.R
 3 | \name{estimate_lift}
 4 | \alias{estimate_lift}
 5 | \title{Estimate Lift Distribution}
 6 | \usage{
 7 | estimate_lift(posterior_samples, distribution, wrt_option, metric = "lift")
 8 | }
 9 | \arguments{
10 | \item{posterior_samples}{Tibble returned from sample_from_posterior with 3 columns
11 | `option_name`, `samples`, and `sample_id`.}
12 | 
13 | \item{distribution}{String of the distribution name}
14 | 
15 | \item{wrt_option}{string the option lift is calculated with respect to (wrt). Required.}
16 | 
17 | \item{metric}{string the type of lift.
18 | `absolute`` will be the difference, on the outcome scale. 0 when best = wrt_option
19 | `lift`` will be the (best - wrt_option) / wrt_option, 0 when best = wrt_option
20 | `relative_risk`` will be the ratio best/wrt_option, 1 when best = wrt_option}
21 | }
22 | \value{
23 | numeric, the lift distribution
24 | }
25 | \description{
26 | Estimates lift distribution vector from posterior samples.
27 | }
28 | \examples{
29 | # Requires posterior_samples dataframe. See `sample_from_posterior()`
30 | # for an example.
31 | 
32 | \dontrun{
33 | estimate_lift(posterior_samples = posterior_samples,
34 |               distribution = "conversion_rate",
35 |               wrt_option = "A",
36 |               metric = "lift")
37 | }
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/man/estimate_win_prob_vs_baseline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_win_prob_vs_baseline.R
 3 | \name{estimate_win_prob_vs_baseline}
 4 | \alias{estimate_win_prob_vs_baseline}
 5 | \title{Estimate Win Probability vs. Baseline}
 6 | \usage{
 7 | estimate_win_prob_vs_baseline(
 8 |   input_df,
 9 |   distribution,
10 |   priors = list(),
11 |   wrt_option
12 | )
13 | }
14 | \arguments{
15 | \item{input_df}{Dataframe containing option_name (str) and various other columns
16 | depending on the distribution type. See vignette for more details.}
17 | 
18 | \item{distribution}{String of the distribution name}
19 | 
20 | \item{priors}{Optional list of priors. Defaults will be use otherwise.}
21 | 
22 | \item{wrt_option}{string the option win prob is calculated with respect to (wrt). Required.}
23 | }
24 | \value{
25 | Tibble of each option_name and the win probability expressed as a percentage and a decimal `raw`
26 | }
27 | \description{
28 | Calculates the win probability of the best option compared to a single other option
29 | given an input_df
30 | }
31 | \examples{
32 | input_df <- tibble::tibble(
33 |     option_name = c("A", "B", "C"),
34 |     sum_clicks = c(1000, 1000, 1000),
35 |     sum_conversions = c(100, 120, 110)
36 | )
37 | estimate_win_prob_vs_baseline(input_df = input_df,
38 |     distribution = "conversion_rate",
39 |     wrt_option = "B")
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/tests/testthat/test-estimate_lift_vs_baseline.R:
--------------------------------------------------------------------------------
 1 | context("Estimate Lift Vs Baseline")
 2 | 
 3 | test_that("estimate_lift_vs_baseline returns single value around .12", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_clicks = c(1000, 1000, 1000),
 7 |     sum_conversions = c(100, 120, 110)
 8 |   )
 9 |   output <- estimate_lift_vs_baseline(input_df,
10 |                                       distribution = "conversion_rate",
11 |                                       wrt_option = "A",
12 |                                       metric = "lift",
13 |                                       threshold = 0.7)
14 |   expect_length(output, 1)
15 |   expect_lt(output, 0.18)
16 |   expect_gt(output, 0.06)
17 | })
18 | 
19 | test_that("estimate_lift_vs_baseline returns single value around .20 when threshold is 0.5", {
20 |   input_df <- tibble::tibble(
21 |     option_name = c("A", "B", "C"),
22 |     sum_clicks = c(1000, 1000, 1000),
23 |     sum_conversions = c(100, 120, 110)
24 |   )
25 |   output <- estimate_lift_vs_baseline(input_df,
26 |                                       distribution = "conversion_rate",
27 |                                       wrt_option = "A",
28 |                                       metric = "lift",
29 |                                       threshold = 0.5)
30 |   expect_length(output, 1)
31 |   expect_lt(output, 0.25)
32 |   expect_gt(output, 0.15)
33 | })
34 | 


--------------------------------------------------------------------------------
/R/update_beta.R:
--------------------------------------------------------------------------------
 1 | #' Update Beta
 2 | #'
 3 | #' Updates Beta Distribution with the Beta-Bernoulli
 4 | #' conjugate prior update rule
 5 | #'
 6 | #' @param alpha Double value for alpha (count of successes). Must be 0 or greater.
 7 | #' @param beta Double value for beta (count of failures). Must be 0 or greater.
 8 | #' @param priors An optional list object that contains alpha0 and
 9 | #'     beta0. Otherwise the function with use Beta(1,1) as the prior distribution.
10 | #'
11 | #'
12 | #' @return A tibble object that contains `alpha` and `beta`
13 | #' @export
14 | #' @importFrom tibble tibble
15 | #'
16 | #' @examples
17 | #' update_beta(alpha = 1, beta = 5, priors = list(alpha0 = 2, beta0 = 2))
18 | #' update_beta(alpha = 20000, beta = 50000)
19 | #'
20 | update_beta <- function(alpha, beta, priors = list()) {
21 |   validate_data_values(data_values = list(alpha = alpha, beta = beta))
22 | 
23 |   # Set Attributes
24 |   valid_beta_params <- c("alpha0", "beta0")
25 |   default_beta_priors <- list(alpha0 = 1, beta0 = 1)
26 | 
27 |   # Validate Priors
28 |   validated_priors <- validate_priors(priors = priors,
29 |                                       valid_priors = valid_beta_params,
30 |                                       default_priors = default_beta_priors)
31 |   alpha0 <- validated_priors$alpha0
32 |   beta0 <- validated_priors$beta0
33 | 
34 |   tibble::tibble(alpha = alpha0 + alpha, beta = beta0 + beta)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/update_dirichlet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/update_dirichlet.R
 3 | \name{update_dirichlet}
 4 | \alias{update_dirichlet}
 5 | \title{Update Dirichlet Distribution}
 6 | \usage{
 7 | update_dirichlet(alpha_0, alpha_1, alpha_2, priors = list())
 8 | }
 9 | \arguments{
10 | \item{alpha_0}{Double value for alpha_0 (count of failures). Must be 0 or greater.}
11 | 
12 | \item{alpha_1}{Double value for alpha_1 (count of successes side 1). Must be 0 or greater.}
13 | 
14 | \item{alpha_2}{Double value for alpha_2 (count of successes side 2). Must be 0 or greater.}
15 | 
16 | \item{priors}{An optional list object that contains alpha00, alpha01, and alpha02.
17 | Otherwise the function with use \eqn{Dirichlet(1,1,1)} as the prior distribution.}
18 | }
19 | \value{
20 | tibble with columns alpha_0, alpha_1, and alpha_2
21 | }
22 | \description{
23 | This function updates the Dirichlet distribution with the
24 | Dirichlet-Multinomial conjugate prior update rule.
25 | }
26 | \details{
27 | TODO: This function currently only works in 3 dimensions.
28 | Should be extended into N dimensions in the future. Can use ... notation.
29 | }
30 | \examples{
31 | update_dirichlet(alpha_0 = 20, alpha_1 = 5, alpha_2 = 2)
32 | sample_priors_list <- list(alpha00 = 2, alpha01 = 3, alpha02 = 5)
33 | update_dirichlet(alpha_0 = 20, alpha_1 = 5, alpha_2 = 2, priors = sample_priors_list)
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test-estimate_win_prob_vs_baseline.R:
--------------------------------------------------------------------------------
 1 | context("Estimate Win Probability vs Baseline")
 2 | 
 3 | test_that("estimate_win_prob_vs_baseline returns only 2 options", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_clicks = c(1000, 1000, 1000),
 7 |     sum_conversions = c(100, 120, 110)
 8 |   )
 9 |   all_option_names <- unique(input_df$option_name)
10 |   output <- estimate_win_prob_vs_baseline(input_df = input_df,
11 |                                           distribution = "conversion_rate",
12 |                                           wrt_option = "A")
13 |   # Subset exists
14 |   expect_true(all(output$option_name %in% all_option_names))
15 |   expect_true(nrow(output) == 2)
16 | })
17 | 
18 | 
19 | test_that("estimate_win_prob_vs_baseline_given_posterior handles when wrt_option is the best", {
20 |   # Can't be better than yourself
21 |   input_df <- tibble::tibble(
22 |     option_name = c("A", "B", "C"),
23 |     sum_clicks = c(1000, 1000, 1000),
24 |     sum_conversions = c(100, 120, 110)
25 |   )
26 |   all_option_names <- unique(input_df$option_name)
27 |   output <- estimate_win_prob_vs_baseline(input_df = input_df,
28 |                                           distribution = "conversion_rate",
29 |                                           wrt_option = "B")
30 |   expect_true(all(output$option_name %in% all_option_names))
31 |   expect_true(nrow(output) == 1)
32 | })
33 | 


--------------------------------------------------------------------------------
/man/sample_session_duration.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_session_duration.R
 3 | \name{sample_session_duration}
 4 | \alias{sample_session_duration}
 5 | \title{Sample Session Duration}
 6 | \usage{
 7 | sample_session_duration(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str),
11 | sum_sessions (dbl), and sum_duration (dbl).}
12 | 
13 | \item{priors}{Optional list of priors k0 and theta0.
14 | Default \eqn{Gamma(1, 250)} will be use otherwise.}
15 | 
16 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
17 | }
18 | \value{
19 | input_df with 2 new nested columns `gamma_params` and `samples`
20 | }
21 | \description{
22 | Adds 2 new nested columns to the input_df: `gamma_params` and `samples`
23 |     `gamma_params` in each row should be a tibble of length 2 (\eqn{k}
24 |         and \eqn{\theta} parameters)
25 |     `samples` in each row should be a tibble of length `n_samples`
26 | }
27 | \details{
28 | See update_rules vignette for a mathematical representation.
29 | \deqn{duration_i ~ Exponential(\lambda)}
30 | \deqn{\lambda ~ Gamma(k, \theta)}
31 | Session Duration is sampled from a Gamma distribution with a Exponential likelihood
32 | of an individual leaving the site or ending a session at time t.
33 | 
34 | This is not always the case, so verify your data follows the shape of
35 | an exponential distribution before using this.
36 | }
37 | 


--------------------------------------------------------------------------------
/tests/testthat/test-rdirichlet.R:
--------------------------------------------------------------------------------
 1 | context("Random Dirichlet")
 2 | 
 3 | test_that("rdirichlet returns correct size df", {
 4 |   n_samples <- 100
 5 |   output <- rdirichlet(n_samples, list(a = 20, b = 15, c = 60))
 6 |   expect_true(is.data.frame(output))
 7 |   expect_true(nrow(output) == n_samples)
 8 | })
 9 | 
10 | test_that("rdirichlet returns empty df when 0 samples are requested", {
11 |   n_samples <- 0
12 |   output <- rdirichlet(n_samples, list(a = 20, b = 15, c = 60))
13 |   expect_true(is.data.frame(output))
14 |   expect_true(nrow(output) == n_samples)
15 | })
16 | 
17 | test_that("rdirichlet returns simplex for each row", {
18 |   n_samples <- 100
19 |   output <- rdirichlet(n_samples, list(a = 20, b = 15, c = 60))
20 |   expect_true({
21 |     purrr::pmap(output, function(...) round(sum(...), 2)) %>%
22 |       purrr::every( ~ .x == 1)
23 |   })
24 | })
25 | 
26 | test_that("rdirichlet returns simplex for each row long list", {
27 |   n_samples <- 100
28 |   output <- rdirichlet(n_samples, list(a = 20, b = 15, c = 60, d = 30, f = 60, g = 22, h = 1, i = 0, k = 6))
29 |   expect_true({
30 |     purrr::pmap(output, function(...) round(sum(...), 2)) %>%
31 |       purrr::every( ~ .x == 1)
32 |   })
33 | })
34 | 
35 | test_that("rdirichlet names match input", {
36 |   n_samples <- 100
37 |   input_list <- list(a = 20, b = 15, c = 60, d = 30, f = 60, g = 22, h = 1, i = 0, k = 6)
38 |   output <- rdirichlet(n_samples, input_list)
39 |   expect_true(all(names(output) == names(input_list)))
40 | })
41 | 


--------------------------------------------------------------------------------
/R/estimate_win_prob_vs_baseline.R:
--------------------------------------------------------------------------------
 1 | #' Estimate Win Probability vs. Baseline
 2 | #'
 3 | #' Calculates the win probability of the best option compared to a single other option
 4 | #' given an input_df
 5 | #'
 6 | #' @param input_df Dataframe containing option_name (str) and various other columns
 7 | #'     depending on the distribution type. See vignette for more details.
 8 | #' @param distribution String of the distribution name
 9 | #' @param priors Optional list of priors. Defaults will be use otherwise.
10 | #' @param wrt_option string the option win prob is calculated with respect to (wrt). Required.
11 | #'
12 | #' @return Tibble of each option_name and the win probability expressed as a percentage and a decimal `raw`
13 | #' @export
14 | #'
15 | #' @examples
16 | #' input_df <- tibble::tibble(
17 | #'     option_name = c("A", "B", "C"),
18 | #'     sum_clicks = c(1000, 1000, 1000),
19 | #'     sum_conversions = c(100, 120, 110)
20 | #' )
21 | #' estimate_win_prob_vs_baseline(input_df = input_df,
22 | #'     distribution = "conversion_rate",
23 | #'     wrt_option = "B")
24 | #'
25 | estimate_win_prob_vs_baseline <- function(input_df, distribution, priors = list(), wrt_option){
26 |   validate_input_df(input_df, distribution)
27 | 
28 |   # Sample from posterior distribution
29 |   posterior_samples <- sample_from_posterior(input_df, distribution, priors)
30 | 
31 |   # Calculate Win Prob vs Baseline
32 |   estimate_win_prob_vs_baseline_given_posterior(posterior_samples, distribution, wrt_option)
33 | }
34 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(estimate_all_values)
 4 | export(estimate_lift)
 5 | export(estimate_lift_vs_baseline)
 6 | export(estimate_loss)
 7 | export(estimate_value_remaining)
 8 | export(estimate_win_prob)
 9 | export(estimate_win_prob_given_posterior)
10 | export(estimate_win_prob_vs_baseline)
11 | export(estimate_win_prob_vs_baseline_given_posterior)
12 | export(find_best_option)
13 | export(rdirichlet)
14 | export(sample_from_posterior)
15 | export(update_beta)
16 | export(update_dirichlet)
17 | export(update_gamma)
18 | export(validate_input_df)
19 | importFrom(dplyr,"%>%")
20 | importFrom(dplyr,arrange)
21 | importFrom(dplyr,bind_rows)
22 | importFrom(dplyr,desc)
23 | importFrom(dplyr,filter)
24 | importFrom(dplyr,group_by)
25 | importFrom(dplyr,mutate)
26 | importFrom(dplyr,rename)
27 | importFrom(dplyr,row_number)
28 | importFrom(dplyr,select)
29 | importFrom(dplyr,select_if)
30 | importFrom(dplyr,summarise)
31 | importFrom(magrittr,"%>%")
32 | importFrom(magrittr,extract2)
33 | importFrom(magrittr,set_colnames)
34 | importFrom(magrittr,set_names)
35 | importFrom(magrittr,use_series)
36 | importFrom(purrr,map)
37 | importFrom(purrr,map2)
38 | importFrom(purrr,pmap)
39 | importFrom(purrr,some)
40 | importFrom(purrr,walk)
41 | importFrom(rlang,.data)
42 | importFrom(stats,quantile)
43 | importFrom(stats,rbeta)
44 | importFrom(stats,rgamma)
45 | importFrom(tibble,as_tibble)
46 | importFrom(tibble,tibble)
47 | importFrom(tidyr,pivot_wider)
48 | importFrom(tidyr,unnest)
49 | 


--------------------------------------------------------------------------------
/man/sample_cm_per_click.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_cm_per_click.R
 3 | \name{sample_cm_per_click}
 4 | \alias{sample_cm_per_click}
 5 | \title{Sample CM Per Click}
 6 | \usage{
 7 | sample_cm_per_click(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str), sum_conversions (dbl), sum_revenue (dbl),
11 | and sum_clicks (dbl).}
12 | 
13 | \item{priors}{Optional list of priors {alpha0, beta0} for Beta, {k0, theta0} for Gamma Inverse Revenue,
14 | and {k01, theta01} for Gamma Cost (uses alternate priors so they can be different from Revenue).
15 | Default \eqn{Beta(1,1)} and \eqn{Gamma(1, 250)} will be use otherwise.}
16 | 
17 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
18 | }
19 | \value{
20 | input_df with 4 new nested columns `beta_params`, `gamma_params_rev`,
21 |     `gamma_params_cost`, and `samples`
22 | }
23 | \description{
24 | Adds 4 new nested columns to the input_df: `beta_params`,
25 |     `gamma_params_rev`, `gamma_params_cost`and `samples`
26 | }
27 | \details{
28 | `beta_params` and `gamma_params_rev` in each row should be a
29 | tibble of length 2 (\eqn{\alpha} and \eqn{\beta} parameters
30 | and \eqn{k} and \eqn{\theta} parameters)
31 | `samples` in each row should be a tibble of length `n_samples`
32 | 
33 | See update_rules vignette for a mathematical representation.
34 | \deqn{CMPerClick = ConversionsPerClick * RevPerConversion - CostPerClick}
35 | }
36 | 


--------------------------------------------------------------------------------
/R/estimate_win_prob_vs_baseline_given_posterior.R:
--------------------------------------------------------------------------------
 1 | #' Estimate Win Probability vs. Baseline Given Posterior
 2 | #'
 3 | #' Calculates the win probability of the best option compared to a single other option
 4 | #' given a posterior distribution.
 5 | #'
 6 | #' @param posterior_samples Tibble returned from sample_from_posterior with 3 columns
 7 | #'   `option_name`, `samples`, and `sample_id`.
 8 | #' @param distribution String: the distribution name
 9 | #' @param wrt_option String: the option to compare against the best option.
10 | #'
11 | #' @return Tibble of each option_name and the win probability expressed as a percentage and a decimal `raw`
12 | #' @export
13 | #' @importFrom rlang .data
14 | #' @importFrom dplyr filter
15 | #'
16 | #' @examples
17 | #' # Requires posterior_samples dataframe. See `sample_from_posterior()`
18 | #' # for an example.
19 | #' \dontrun{
20 | #' estimate_win_prob_vs_baseline_given_posterior(
21 | #'     posterior_samples = posterior_samples,
22 | #'     distribution = "conversion_rate",
23 | #'     wrt_option = "A")
24 | #'}
25 | #'
26 | estimate_win_prob_vs_baseline_given_posterior <- function(posterior_samples, distribution, wrt_option){
27 |   validate_wrt_option(wrt_option, posterior_samples)
28 |   best_option <- find_best_option(posterior_samples, distribution)
29 | 
30 |   posterior_samples_subset <- posterior_samples %>%
31 |     dplyr::filter(.data$option_name %in% c(wrt_option, best_option))
32 | 
33 |   estimate_win_prob_given_posterior(posterior_samples_subset, is_winner_max(distribution))
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/man/sample_multi_rev_per_session.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_multi_rev_per_session.R
 3 | \name{sample_multi_rev_per_session}
 4 | \alias{sample_multi_rev_per_session}
 5 | \title{Sample Multiple Revenue Per Session}
 6 | \usage{
 7 | sample_multi_rev_per_session(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str),
11 | sum_conversions (dbl), sum_sessions (dbl), sum_revenue (dbl),
12 | sum_conversion_2 (dbl), sum_sessions_2 (dbl), sum_revenue_2 (dbl).}
13 | 
14 | \item{priors}{Optional list of priors alpha0 and beta0.
15 | Default \eqn{Beta(1,1)} will be use otherwise.}
16 | 
17 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
18 | }
19 | \value{
20 | input_df with 4 new nested columns `dirichlet_params`,
21 |     `gamma_params_A`, `gamma_params_B`, and `samples`.
22 |     `samples` in each row should be a tibble of length `n_samples`.
23 | }
24 | \description{
25 | Adds 5 new nested columns to the input_df: `dirichlet_params`,
26 |     `gamma_params_A`, `gamma_params_B`, and `samples`.
27 |     This samples from multiple revenue per session distributions
28 |     at once.
29 | }
30 | \details{
31 | See update_rules vignette for a mathematical representation.
32 | 
33 | \deqn{conversion_i ~ MultiNomial(\phi_1, \phi_2, ..., \phi_k)}
34 | \deqn{\phi_k ~ Dirichlet(\alpha, \beta)}
35 | Conversion Rate is sampled from a Dirichlet distribution with a Multinomial likelihood
36 | of an individual converting.
37 | }
38 | 


--------------------------------------------------------------------------------
/R/sample_ctr.R:
--------------------------------------------------------------------------------
 1 | #' Sample Click Through Rate
 2 | #'
 3 | #' This is an alias for sample_conv_rate with 2 different input
 4 | #' columns. This function calculates posterior samples of
 5 | #' \eqn{CTR = clicks/impressions}. Adds 2 new nested columns to
 6 | #' the input_df: `beta_params` and `samples`.
 7 | #' `beta_params` in each row should be a tibble of length 2 (\eqn{\alpha}
 8 | #' and \eqn{\beta} parameters)
 9 | #' `samples` in each row should be a tibble of length `n_samples`
10 | #'
11 | #' See update_rules vignette for a mathematical representation.
12 | #' \deqn{click_i ~ Bernoulli(\phi)}
13 | #' \deqn{\phi ~ Beta(\alpha, \beta)}
14 | #' Click Through Rate is sampled from a Beta distribution with a Binomial
15 | #' likelihood of an individual Clicking
16 | #'
17 | #' @param input_df Dataframe containing option_name (str),
18 | #'     sum_clicks (dbl), and sum_impressions (dbl).
19 | #' @param priors Optional list of priors alpha0 and beta0.
20 | #'     Default \eqn{Beta(1,1)} will be use otherwise.
21 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
22 | #'
23 | #' @importFrom dplyr mutate
24 | #' @importFrom rlang .data
25 | #'
26 | #' @return input_df with 2 new nested columns `beta_params` and `samples`
27 | #'
28 | sample_ctr <- function(input_df, priors, n_samples = 5e4){
29 |   renamed_input_df <- dplyr::mutate(input_df,
30 |                                     sum_conversions = .data$sum_clicks,
31 |                                     sum_clicks = .data$sum_impressions)
32 |   sample_conv_rate(renamed_input_df, priors, n_samples)
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/R/validate_input_column.R:
--------------------------------------------------------------------------------
 1 | #' Validate Input Column
 2 | #'
 3 | #' Validates the input column exists in the dataframe, is of the correct type,
 4 | #' and that all values are greater than or equal to 0.
 5 | #'
 6 | #' @param column_name String value of the column name
 7 | #' @param input_df Dataframe containing option_name (str) and various other columns
 8 | #'     depending on the distribution type. See vignette for more details.
 9 | #' @param greater_than_zero Boolean: Do all values in the column have to be greater than zero?
10 | #'
11 | #' @return None
12 | #'
13 | #' @importFrom purrr some
14 | #'
15 | validate_input_column <- function(column_name, input_df, greater_than_zero = TRUE){
16 |   # Ensure All Columns Exist
17 |   if(!column_name %in% colnames(input_df)){
18 |     stop(paste(column_name, "is a required column for this distribution type and is not found in the input_df."))
19 |   }
20 | 
21 |   # Ensure Column Types are correct
22 |   if(column_name == "option_name"){
23 |     if(!is.character(input_df[["option_name"]])){
24 |       stop("option_name column is not a character string")
25 |     }
26 |   }else{
27 |     if(!is.numeric(input_df[[column_name]])){
28 |       stop(paste(column_name, "is not a numeric column."))
29 |     }
30 |   }
31 | 
32 |   if(greater_than_zero){
33 |     # Ensure all values are greater than or equal to 0.
34 |     if(purrr::some(input_df[[column_name]], ~ .x < 0)){
35 |       stop(paste("All values in column `",
36 |                  column_name,
37 |                  "` must be greater than or equal to zero."))
38 |     }
39 |   }
40 | }
41 | 


--------------------------------------------------------------------------------
/tests/testthat/test-validate_input_df.R:
--------------------------------------------------------------------------------
 1 | test_that("validate_input_df returns TRUE when df is valid for conversion rate", {
 2 |   input_df <- tibble::tibble(
 3 |     option_name = c("A", "B"),
 4 |     sum_clicks = c(1000, 1000),
 5 |     sum_conversions = c(100, 120)
 6 |   )
 7 |   expect_true(validate_input_df(input_df, "conversion_rate"))
 8 | })
 9 | 
10 | test_that("validate_input_df fails when df is not valid for rev per session", {
11 |   input_df <- tibble::tibble(
12 |     option_name = c("A", "B"),
13 |     sum_clicks = c(1000, 1000),
14 |     sum_conversions = c(100, 120)
15 |   )
16 |   expect_error(validate_input_df(input_df, "rev_per_session"))
17 | })
18 | 
19 | test_that("validate_input_df fails when df is not valid for rev per session", {
20 |   input_df <- tibble::tibble(
21 |     option_name = c("A", "B"),
22 |     sum_clicks = c(1000, 1000),
23 |     sum_conversions = c(100, 120)
24 |   )
25 |   expect_error(validate_input_df(input_df, "rev_per_session"))
26 | })
27 | 
28 | test_that("validate_input_df fails when input is not a dataframe", {
29 |   input_df <- list(
30 |     option_name = c("A", "B"),
31 |     sum_clicks = c(1000, 1000),
32 |     sum_conversions = c(100, 120)
33 |   )
34 |   expect_error(validate_input_df(input_df, "conversion_rate"))
35 | })
36 | 
37 | test_that("validate_input_df fails if distribution name is invalid", {
38 |   input_df <- tibble::tibble(
39 |     option_name = c("A", "B"),
40 |     sum_clicks = c(1000, 1000),
41 |     sum_conversions = c(100, 120)
42 |   )
43 |   expect_error(validate_input_df(input_df, "invalid_distribution"))
44 | })
45 | 


--------------------------------------------------------------------------------
/tests/testthat/test-estimate_win_prob.R:
--------------------------------------------------------------------------------
 1 | context("Estimate Win Probability")
 2 | 
 3 | test_that("estimate_win_prob returns tibble with correct values", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_clicks = c(1000, 1000, 1000),
 7 |     sum_conversions = c(100, 120, 110)
 8 |   )
 9 |   output <- estimate_win_prob(input_df, "conversion_rate")
10 |   all_option_names <- input_df$option_name
11 |   expected_column_names <- c("option_name", "win_prob_raw", "win_prob")
12 |   expect_true(is.data.frame(output))
13 |   expect_true(all(expected_column_names %in% colnames(output)))
14 |   expect_true(all(output$option_name %in% all_option_names))
15 |   expect_true(is.double(output$win_prob_raw))
16 |   expect_true(is.character(output$win_prob))
17 |   expect_equal(nrow(output), length(all_option_names))
18 | })
19 | 
20 | test_that("estimate_win_prob returns tibble with correct values when win prob is 0", {
21 |   input_df <- tibble::tibble(
22 |     option_name = c("A", "B", "C"),
23 |     sum_clicks = c(1000, 1000, 1000),
24 |     sum_conversions = c(1, 120, 2)
25 |   )
26 |   output <- estimate_win_prob(input_df, "conversion_rate")
27 |   all_option_names <- input_df$option_name
28 |   expected_column_names <- c("option_name", "win_prob_raw", "win_prob")
29 |   expect_true(is.data.frame(output))
30 |   expect_true(all(expected_column_names %in% colnames(output)))
31 |   expect_true(all(output$option_name %in% all_option_names))
32 |   expect_true(is.double(output$win_prob_raw))
33 |   expect_true(is.character(output$win_prob))
34 |   expect_equal(nrow(output), length(all_option_names))
35 | })
36 | 


--------------------------------------------------------------------------------
/man/sample_page_views_per_session.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_page_views_per_session.R
 3 | \name{sample_page_views_per_session}
 4 | \alias{sample_page_views_per_session}
 5 | \title{Sample Page Views Per Session (Visit)}
 6 | \usage{
 7 | sample_page_views_per_session(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str),
11 | sum_sessions (dbl), and sum_page_views_per_session (dbl).}
12 | 
13 | \item{priors}{Optional list of priors k0 and theta0.
14 | Default \eqn{Gamma(1, 250)} will be use otherwise.
15 | \eqn{Gamma(1, 1)} might also be a good choice for this distribution
16 | if you only have a few page views per session.}
17 | 
18 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
19 | }
20 | \value{
21 | input_df with 2 new nested columns `gamma_params` and `samples`
22 | }
23 | \description{
24 | Adds 2 new nested columns to the input_df: `gamma_params` and `samples`
25 |     `gamma_params` in each row should be a tibble of length 2 (\eqn{k}
26 |         and \eqn{\theta} parameters)
27 |     `samples` in each row should be a tibble of length `n_samples`
28 | }
29 | \details{
30 | See update_rules vignette for a mathematical representation.
31 | \deqn{page_views_i ~ Poisson(\lambda)}
32 | \deqn{\lambda ~ Gamma(k, \theta)}
33 | Page Views Per Visit is sampled from a Gamma distribution with a Poisson likelihood
34 | of an individual having n page views by the end of their session.
35 | 
36 | This is not always the case, so verify your data follows the shape of
37 | an Poisson distribution before using this.
38 | }
39 | 


--------------------------------------------------------------------------------
/man/estimate_lift_vs_baseline.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_lift_vs_baseline.R
 3 | \name{estimate_lift_vs_baseline}
 4 | \alias{estimate_lift_vs_baseline}
 5 | \title{Estimate Lift vs Baseline}
 6 | \usage{
 7 | estimate_lift_vs_baseline(
 8 |   input_df,
 9 |   distribution,
10 |   priors = list(),
11 |   wrt_option,
12 |   metric = "lift",
13 |   threshold = 0.7
14 | )
15 | }
16 | \arguments{
17 | \item{input_df}{Dataframe containing option_name (str) and various other columns
18 | depending on the distribution type. See vignette for more details.}
19 | 
20 | \item{distribution}{String of the distribution name}
21 | 
22 | \item{priors}{Optional list of priors. Defaults will be use otherwise.}
23 | 
24 | \item{wrt_option}{string the option loss is calculated with respect to (wrt). Required.}
25 | 
26 | \item{metric}{string the type of loss.
27 | absolute will be the difference, on the outcome scale. 0 when best = wrt_option
28 | lift will be the (best - wrt_option) / wrt_option, 0 when best = wrt_option
29 | relative_risk will be the ratio best/wrt_option, 1 when best = wrt_option}
30 | 
31 | \item{threshold}{Lift percentage threshold between 0 and 1. (0.7
32 | threshold is "at least 70\% lift"). Defaults to 0.7.}
33 | }
34 | \value{
35 | numeric value remaining at the specified threshold
36 | }
37 | \description{
38 | Estimate Lift vs Baseline
39 | }
40 | \examples{
41 | input_df <- tibble::tibble(option_name = c("A", "B", "C"),
42 |     sum_clicks = c(1000, 1000, 1000),
43 |     sum_conversions = c(100, 120, 110))
44 | estimate_lift_vs_baseline(input_df, distribution = "conversion_rate", wrt_option = "A")
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-find_best_option.R:
--------------------------------------------------------------------------------
 1 | context("Find Best Option")
 2 | 
 3 | test_that("find_best_option returns the correct best option when max is best", {
 4 |   expected_output <- "B"
 5 |   input_df <- tibble::tibble(
 6 |     option_name = c("A", "B", "C"),
 7 |     sum_clicks = c(1000, 1000, 1000),
 8 |     sum_conversions = c(100, 120, 110)
 9 |   )
10 |   posterior_samples <- sample_from_posterior(input_df, "conversion_rate")
11 |   output <- find_best_option(posterior_samples = posterior_samples,
12 |                              distribution = "conversion_rate")
13 |   expect_equal(output, expected_output)
14 | })
15 | 
16 | test_that("find_best_option returns the correct best option when min is best", {
17 |   expected_output <- "C"
18 |   input_df <- tibble::tibble(
19 |     option_name = c("A", "B", "C"),
20 |     sum_clicks = c(1000, 1000, 1000),
21 |     sum_conversions = c(100, 120, 110),
22 |     sum_cost = c(150, 200, 100),
23 |   )
24 |   posterior_samples <- sample_from_posterior(input_df, "cpa")
25 |   output <- find_best_option(posterior_samples = posterior_samples,
26 |                              distribution = "cpa")
27 |   expect_equal(output, expected_output)
28 | })
29 | 
30 | 
31 | test_that("find_best_option returns when 2 options are equal", {
32 |   expected_output <- c("B","C")
33 |   input_df <- tibble::tibble(
34 |     option_name = c("A", "B", "C"),
35 |     sum_clicks = c(1000, 1000, 1000),
36 |     sum_conversions = c(100, 120, 120),
37 |   )
38 |   posterior_samples <- sample_from_posterior(input_df, "conversion_rate")
39 |   output <- find_best_option(posterior_samples = posterior_samples,
40 |                              distribution = "conversion_rate")
41 |   expect_true(all(output %in% expected_output))
42 | })
43 | 


--------------------------------------------------------------------------------
/tests/testthat/test-impute_missing_options.R:
--------------------------------------------------------------------------------
 1 | context("Impute Missing Options")
 2 | 
 3 | test_that("impute_missing_options adds a row if it doesn't exist", {
 4 |   expected_output <- tibble::tibble(option_name = c("B", "C", "A"), win_prob_raw = c(0.4, 0.6, 0.0))
 5 |   post_sample_example <- tibble::tibble(option_name = c("A", "B", "C"))
 6 |   wp_raw <- tibble::tibble(option_name = c("B", "C"),
 7 |                            win_prob_raw = c(0.4, 0.6))
 8 |   output <- impute_missing_options(posterior_samples = post_sample_example,
 9 |                                    wp_raw = wp_raw)
10 |   expect_true(is.data.frame(output))
11 |   expect_equal(output, expected_output)
12 | })
13 | 
14 | test_that("impute_missing_options adds multiple rows if they don't exist", {
15 |   expected_output <- tibble::tibble(option_name = c("C", "A", "B"), win_prob_raw = c(1.0, 0.0, 0.0))
16 |   post_sample_example <- tibble::tibble(option_name = c("A", "B", "C"))
17 |   wp_raw <- tibble::tibble(option_name = c("C"),
18 |                            win_prob_raw = c(1.0))
19 |   output <- impute_missing_options(posterior_samples = post_sample_example,
20 |                                    wp_raw = wp_raw)
21 |   expect_true(is.data.frame(output))
22 |   expect_equal(output, expected_output)
23 | })
24 | 
25 | test_that("impute_missing_options doesn't add anything if all exist", {
26 |   expected_output <- tibble::tibble(option_name = c("A", "B", "C"), win_prob_raw = c(0.3, 0.4, 0.3))
27 |   post_sample_example <- tibble::tibble(option_name = c("A", "B", "C"))
28 |   output <- impute_missing_options(posterior_samples = post_sample_example,
29 |                                    wp_raw = expected_output)
30 |   expect_true(is.data.frame(output))
31 |   expect_equal(output, expected_output)
32 | })
33 | 
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-estimate_win_prob_vs_baseline_given_posterior.R:
--------------------------------------------------------------------------------
 1 | context("Estimate Win Probability vs Baseline Given Posterior")
 2 | 
 3 | test_that("estimate_win_prob_vs_baseline_given_posterior returns only 2 options", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_clicks = c(1000, 1000, 1000),
 7 |     sum_conversions = c(100, 120, 110)
 8 |   )
 9 |   posterior_samples <- sample_from_posterior(input_df, "conversion_rate", priors = list())
10 |   all_option_names <- unique(posterior_samples$option_name)
11 |   output <- estimate_win_prob_vs_baseline_given_posterior(posterior_samples = posterior_samples,
12 |                                                           "conversion_rate",
13 |                                                           "A")
14 |   # Subset exists
15 |   expect_true(all(output$option_name %in% all_option_names))
16 |   expect_true(nrow(output) == 2)
17 | })
18 | 
19 | 
20 | test_that("estimate_win_prob_vs_baseline_given_posterior handles when wrt_option is the best", {
21 |   # Can't be better than yourself
22 |   input_df <- tibble::tibble(
23 |     option_name = c("A", "B", "C"),
24 |     sum_clicks = c(1000, 1000, 1000),
25 |     sum_conversions = c(100, 120, 110)
26 |   )
27 |   posterior_samples <- sample_from_posterior(input_df, "conversion_rate", priors = list())
28 |   all_option_names <- unique(posterior_samples$option_name)
29 |   output <- estimate_win_prob_vs_baseline_given_posterior(posterior_samples = posterior_samples,
30 |                                                           distribution = "conversion_rate",
31 |                                                           wrt_option = "B")
32 |   expect_true(all(output$option_name %in% all_option_names))
33 |   expect_true(nrow(output) == 1)
34 | })
35 | 


--------------------------------------------------------------------------------
/man/sample_cpa.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_cpa.R
 3 | \name{sample_cpa}
 4 | \alias{sample_cpa}
 5 | \title{Sample Cost Per Activation (CPA)}
 6 | \usage{
 7 | sample_cpa(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str), sum_conversions (dbl),
11 | sum_cost (dbl), and sum_clicks (dbl).}
12 | 
13 | \item{priors}{Optional list of priors {alpha0, beta0} for Beta and {k0, theta0}
14 | for Gamma.
15 | Default \eqn{Beta(1,1)} and \eqn{Gamma(1, 250)} will be use otherwise.}
16 | 
17 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
18 | }
19 | \value{
20 | input_df with 3 new nested columns `beta_params`, `gamma_params`, and `samples`
21 | }
22 | \description{
23 | Adds 3 new nested columns to the input_df: `beta_params`, `gamma_params`, and `samples`
24 |     `beta_params` and `gamma_params` in each row should be a tibble of length 2 (\eqn{\alpha}
25 |         and \eqn{\beta} parameters and \eqn{k} and \eqn{\theta} parameters)
26 |     `samples` in each row should be a tibble of length `n_samples`
27 | }
28 | \details{
29 | See update_rules vignette for a mathematical representation.
30 | This is a combination of a Beta-Bernoulli update and a Gamma-Exponential update.
31 | 
32 | \deqn{conversion_i ~ Bernoulli(\phi)}
33 | \deqn{cpc_i ~ Exponential(\lambda)}
34 | \deqn{\phi ~ Beta(\alpha, \beta)}
35 | \deqn{\lambda ~ Gamma(k, \theta)}
36 | 
37 | \deqn{cpa_i ~ 1/ (Bernoulli(\phi) * Exponential(\lambda))}
38 | \deqn{averageCPA ~ 1/(\phi\lambda)}
39 | 
40 | Conversion Rate is sampled from a Beta distribution with a Binomial likelihood
41 | of an individual converting.
42 | 
43 | Average CPC is sampled from a Gamma distribution with an Exponential likelihood
44 | of an individual cost.
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/testthat/test-is_prior_valid.R:
--------------------------------------------------------------------------------
 1 | context("Is Prior Valid")
 2 | 
 3 | test_that("is_prior_valid returns TRUE when valid value exists.", {
 4 |   example_prior_list <- list(x = 1, y = 10, z = 15)
 5 |   output <- is_prior_valid(priors_list = example_prior_list, valid_prior = "y")
 6 |   testthat::expect_equal(output, expected = TRUE)
 7 | })
 8 | 
 9 | test_that("is_prior_valid returns TRUE when valid value exists and list is length 1.", {
10 |   example_prior_list <- list(y = 10)
11 |   output <- is_prior_valid(priors_list = example_prior_list, valid_prior = "y")
12 |   testthat::expect_equal(output, expected = TRUE)
13 | })
14 | 
15 | test_that("is_prior_valid returns FALSE when empty list is passed in.", {
16 |   example_prior_list <- list()
17 |   output <- is_prior_valid(priors_list = example_prior_list, valid_prior = "y")
18 |   testthat::expect_equal(output, expected = FALSE)
19 | })
20 | 
21 | test_that("is_prior_valid returns FALSE when valid value is not in list.", {
22 |   example_prior_list <- list(x = 1, y = 10, z = 15)
23 |   expect_warning({
24 |     output <- is_prior_valid(priors_list = example_prior_list, valid_prior = "a")
25 |   })
26 |   testthat::expect_equal(output, expected = FALSE)
27 | })
28 | 
29 | test_that("is_prior_valid returns FALSE when negative value is in list.", {
30 |   example_prior_list <- list(x = 1, y = -8, z = 15)
31 |   expect_warning({
32 |     output <- is_prior_valid(priors_list = example_prior_list, valid_prior = "y")
33 |   })
34 |   testthat::expect_equal(output, expected = FALSE)
35 | })
36 | 
37 | test_that("is_prior_valid returns FALSE value in list is 0.", {
38 |   example_prior_list <- list(x = 1, y = 0, z = 15)
39 |   expect_warning({
40 |     output <- is_prior_valid(priors_list = example_prior_list, valid_prior = "y")
41 |   })
42 |   testthat::expect_equal(output, expected = FALSE)
43 | })
44 | 


--------------------------------------------------------------------------------
/R/sample_cpc.R:
--------------------------------------------------------------------------------
 1 | #' Sample Cost Per Click
 2 | #'
 3 | #' Adds 2 new nested columns to the input_df: `gamma_params` and `samples`
 4 | #'      `gamma_params` in each row should be a tibble of length 2 (\eqn{k}
 5 | #'         and \eqn{\theta} parameters)
 6 | #'     `samples` in each row should be a tibble of length `n_samples`
 7 | #'
 8 | #' See update_rules vignette for a mathematical representation.
 9 | #' \deqn{cpc_i ~ Exponential(\lambda)}
10 | #' \deqn{\lambda ~ Gamma(k, \theta)}
11 | #' Average CPC is sampled from a Gamma distribution with an Exponential likelihood
12 | #' of an individual cost.
13 | #'
14 | #' @param input_df Dataframe containing option_name (str), sum_clicks (dbl), sum_cost (dbl).
15 | #' @param priors Optional list of priors {k0, theta0} for Gamma.
16 | #'     Default \eqn{Gamma(1, 250)} will be use otherwise.
17 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
18 | #'
19 | #' @importFrom purrr map map2
20 | #' @importFrom dplyr mutate %>%
21 | #' @importFrom stats rgamma
22 | #' @importFrom rlang .data
23 | #'
24 | #' @return input_df with 2 new nested columns `gamma_params` and `samples`
25 | #'
26 | sample_cpc <- function(input_df, priors, n_samples = 5e4){
27 |   input_df %>%
28 |     dplyr::mutate(
29 |       gamma_params = purrr::map2(.x = .data$sum_clicks,
30 |                                  .y = .data$sum_cost,
31 |                                  ~ update_gamma(k = .x,
32 |                                                 theta = .y,
33 |                                                 priors = priors)
34 |       ),
35 |       samples = purrr::map(.x = .data$gamma_params,
36 |                            ~ 1 / stats::rgamma(n_samples,
37 |                                                shape = .x$k,
38 |                                                scale = .x$theta)
39 |       )
40 |     )
41 | }
42 | 


--------------------------------------------------------------------------------
/R/validate_input_df.R:
--------------------------------------------------------------------------------
 1 | #' Validate Input DataFrame
 2 | #'
 3 | #' Validates the input dataframe has the correct type, correct required column names,
 4 | #' that the distribution is valid, that the column types are correct, and that the
 5 | #' column values are greater than or equal to 0 when they are numeric.
 6 | #'
 7 | #' @param input_df Dataframe containing option_name (str) and various other columns
 8 | #'     depending on the distribution type. See vignette for more details.
 9 | #' @param distribution String of the distribution name
10 | #'
11 | #' @return Bool TRUE if all checks pass.
12 | #'
13 | #' @importFrom dplyr select select_if filter %>%
14 | #' @importFrom purrr walk
15 | #' @importFrom rlang .data
16 | #' @export
17 | #'
18 | #' @examples
19 | #' input_df <- tibble::tibble(
20 | #'    option_name = c("A", "B"),
21 | #'    sum_clicks = c(1000, 1000),
22 | #'    sum_conversions = c(100, 120)
23 | #' )
24 | #' validate_input_df(input_df, "conversion_rate")
25 | #'
26 | validate_input_df <- function(input_df, distribution){
27 |   if(!is.data.frame(input_df)){
28 |     stop("input_df is not of type data frame.")
29 |   }
30 | 
31 |   valid_distribution_names <- unique(distribution_column_mapping$distribution_type)
32 |   if(!distribution %in% valid_distribution_names){
33 |     comma_sep_names<- paste(valid_distribution_names, collapse = ", ")
34 |     stop(paste(distribution),
35 |          "is an invalid distribution. Select a distribution from the following:",
36 |          comma_sep_names)
37 |   }
38 | 
39 |   required_column_names <- distribution_column_mapping %>%
40 |     dplyr::filter(.data$distribution_type == distribution) %>%
41 |     dplyr::select(-"distribution_type") %>%
42 |     dplyr::select_if(~ sum(.) == 1) %>%
43 |     colnames()
44 | 
45 |   purrr::walk(required_column_names, ~validate_input_column(.x, input_df))
46 |   TRUE
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/tests/testthat/test-update_gamma.R:
--------------------------------------------------------------------------------
 1 | context("Update Gamma")
 2 | 
 3 | test_that("update_gamma updates with priors", {
 4 |   expected_output <- tibble::tibble(k = 3, theta = 1000/(1 + 1000 * 100))
 5 |   output <- update_gamma(k = 1, theta = 100, priors = list(k0 = 2, theta0 = 1000))
 6 |   testthat::expect_equal(output, expected_output)
 7 | })
 8 | 
 9 | test_that("update_gamma updates without priors", {
10 |   expected_output <- tibble::tibble(k = 11, theta = 250/(1 + 250 * 200))
11 |   output <- update_gamma(k = 10, theta = 200)
12 |   testthat::expect_equal(output, expected_output)
13 | })
14 | 
15 | test_that("update_gamma updates but warns with invalid priors", {
16 |   expected_output <- tibble::tibble(k = 11, theta = 250/(1 + 250 * 200))
17 |   invalid_priors <- list(k0 = -2, theta0 = 1000)
18 |   testthat::expect_warning({
19 |     output <- update_gamma(k = 10, theta = 200, priors = invalid_priors)
20 |   })
21 |   testthat::expect_equal(output, expected_output)
22 | })
23 | 
24 | test_that("update_gamma updates with alternate priors", {
25 |   expected_output <- tibble::tibble(k = 3, theta = 1000/(1 + 1000 * 100))
26 |   output <- update_gamma(k = 1, theta = 100, priors = list(k01 = 2, theta01 = 1000), alternate_priors = TRUE)
27 |   testthat::expect_equal(output, expected_output)
28 | })
29 | 
30 | test_that("update_gamma uses default priors with only alternate priors when set to FALSE", {
31 |   expected_output <- tibble::tibble(k = 2, theta = 250/(1 + 250 * 100))
32 |   testthat::expect_warning(output <- update_gamma(k = 1,
33 |                                                   theta = 100,
34 |                                                   priors = list(k01 = 2, theta01 = 1000),
35 |                                                   alternate_priors = FALSE)
36 |                            )
37 |   testthat::expect_equal(output, expected_output)
38 | })
39 | 


--------------------------------------------------------------------------------
/R/sample_conv_rate.R:
--------------------------------------------------------------------------------
 1 | #' Sample Conversion Rate
 2 | #'
 3 | #' Adds 2 new nested columns to the input_df: `beta_params` and `samples`
 4 | #'     `beta_params` in each row should be a tibble of length 2 (\eqn{\alpha}
 5 | #'         and \eqn{\beta} parameters)
 6 | #'     `samples` in each row should be a tibble of length `n_samples`
 7 | #'
 8 | #' See update_rules vignette for a mathematical representation.
 9 | #' \deqn{conversion_i ~ Bernoulli(\phi)}
10 | #' \deqn{\phi ~ Beta(\alpha, \beta)}
11 | #' Conversion Rate is sampled from a Beta distribution with a Binomial likelihood
12 | #' of an individual converting.
13 | #'
14 | #' @param input_df Dataframe containing option_name (str),
15 | #'     sum_conversions (dbl), and sum_clicks (dbl).
16 | #' @param priors Optional list of priors alpha0 and beta0.
17 | #'     Default \eqn{Beta(1,1)} will be use otherwise.
18 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
19 | #'
20 | #' @importFrom purrr map map2
21 | #' @importFrom dplyr mutate %>%
22 | #' @importFrom rlang .data
23 | #'
24 | #' @return input_df with 2 new nested columns `beta_params` and `samples`
25 | #'
26 | sample_conv_rate <- function(input_df, priors, n_samples = 5e4){
27 |   input_df %>%
28 |     dplyr::mutate(
29 |       beta_params = purrr::map2(.x = .data$sum_conversions,
30 |                                 .y = .data$sum_clicks,
31 |                                 ~ update_beta(alpha = .x,
32 |                                               beta = .y - .x,
33 |                                               priors = priors)
34 |                                 ),
35 |       samples = purrr::map(.x = .data$beta_params,
36 |                            ~ rbeta(n_samples,
37 |                                    shape1 = .x$alpha,
38 |                                    shape2 = .x$beta)
39 |                            )
40 |     )
41 | }
42 | 


--------------------------------------------------------------------------------
/R/estimate_win_prob_given_posterior.R:
--------------------------------------------------------------------------------
 1 | #' Estimate Win Probability Given Posterior Distribution
 2 | #'
 3 | #' @param posterior_samples Tibble of data in long form with 2 columns
 4 | #' `option_name` and `samples`
 5 | #' @param winner_is_max Boolean. This should almost always be TRUE. If a larger number is better
 6 | #' then this should be TRUE. This should be FALSE for metrics such as CPA or CPC where a higher cost
 7 | #' is not necessarily better.
 8 | #'
 9 | #' @return Tibble of each option_name and the win probability expressed as a percentage and a decimal `raw`
10 | #' @export
11 | #' @importFrom dplyr %>% group_by filter summarise mutate arrange desc
12 | #'
13 | #' @examples
14 | #' # Requires posterior_samples dataframe. See `sample_from_posterior()`
15 | #' # for an example.
16 | #' \dontrun{
17 | #' estimate_win_prob_given_posterior(posterior_samples = posterior_samples)
18 | #' estimate_win_prob_given_posterior(
19 | #'     posterior_samples = posterior_samples,
20 | #'     winner_is_max = TRUE
21 | #' )
22 | #' }
23 | #'
24 | estimate_win_prob_given_posterior <- function(posterior_samples, winner_is_max = TRUE){
25 |   validate_posterior_samples(posterior_samples)
26 |   n_unique_options <- length(unique(posterior_samples$option_name))
27 |   samples_per_option <- nrow(posterior_samples)/n_unique_options
28 |   wp_raw <- posterior_samples %>%
29 |     dplyr::group_by(.data$sample_id) %>%
30 |     dplyr::filter(.data$samples == if(winner_is_max) max(.data$samples) else min(.data$samples)) %>%
31 |     dplyr::group_by(.data$option_name) %>%
32 |     dplyr::summarise(win_prob_raw = dplyr::n()/samples_per_option)
33 | 
34 |   wp_raw_imputed <- impute_missing_options(posterior_samples, wp_raw)
35 | 
36 |   wp_raw_imputed %>%
37 |     dplyr::mutate(win_prob = paste0(round(.data$win_prob_raw * 100, 2), "%")) %>%
38 |     dplyr::arrange(dplyr::desc(.data$win_prob_raw))
39 | }
40 | 


--------------------------------------------------------------------------------
/R/estimate_lift_vs_baseline.R:
--------------------------------------------------------------------------------
 1 | #' Estimate Lift vs Baseline
 2 | #'
 3 | #' @param input_df Dataframe containing option_name (str) and various other columns
 4 | #'     depending on the distribution type. See vignette for more details.
 5 | #' @param distribution String of the distribution name
 6 | #' @param priors Optional list of priors. Defaults will be use otherwise.
 7 | #' @param wrt_option string the option loss is calculated with respect to (wrt). Required.
 8 | #' @param metric string the type of loss.
 9 | #'   absolute will be the difference, on the outcome scale. 0 when best = wrt_option
10 | #'   lift will be the (best - wrt_option) / wrt_option, 0 when best = wrt_option
11 | #'   relative_risk will be the ratio best/wrt_option, 1 when best = wrt_option
12 | #' @param threshold Lift percentage threshold between 0 and 1. (0.7
13 | #'   threshold is "at least 70\% lift"). Defaults to 0.7.
14 | #'
15 | #' @return numeric value remaining at the specified threshold
16 | #' @export
17 | #'
18 | #' @importFrom stats quantile
19 | #'
20 | #' @examples
21 | #' input_df <- tibble::tibble(option_name = c("A", "B", "C"),
22 | #'     sum_clicks = c(1000, 1000, 1000),
23 | #'     sum_conversions = c(100, 120, 110))
24 | #' estimate_lift_vs_baseline(input_df, distribution = "conversion_rate", wrt_option = "A")
25 | #'
26 | estimate_lift_vs_baseline <- function(input_df, distribution, priors = list(),
27 |                                       wrt_option, metric = "lift", threshold = 0.7){
28 |   validate_input_df(input_df, distribution)
29 | 
30 |   # Sample from posterior distribution
31 |   posterior_samples <- sample_from_posterior(input_df, distribution, priors)
32 | 
33 |   # Calculate Lift Distribution
34 |   estimate_lift(posterior_samples = posterior_samples,
35 |                 distribution = distribution,
36 |                 wrt_option = wrt_option,
37 |                 metric = metric) %>%
38 |     # Select a single point
39 |     stats::quantile(probs = 1 - threshold, na.rm = TRUE)
40 | }
41 | 


--------------------------------------------------------------------------------
/man/estimate_value_remaining.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_value_remaining.R
 3 | \name{estimate_value_remaining}
 4 | \alias{estimate_value_remaining}
 5 | \title{Estimate Value Remaining}
 6 | \usage{
 7 | estimate_value_remaining(
 8 |   input_df,
 9 |   distribution,
10 |   priors = list(),
11 |   wrt_option = NULL,
12 |   metric = "lift",
13 |   threshold = 0.95
14 | )
15 | }
16 | \arguments{
17 | \item{input_df}{Dataframe containing option_name (str) and various other columns
18 | depending on the distribution type. See vignette for more details.}
19 | 
20 | \item{distribution}{String of the distribution name}
21 | 
22 | \item{priors}{Optional list of priors. Defaults will be use otherwise.}
23 | 
24 | \item{wrt_option}{string the option loss is calculated with respect to (wrt). If NULL, the best option will be chosen.}
25 | 
26 | \item{metric}{string the type of loss.
27 | absolute will be the difference, on the outcome scale. 0 when best = wrt_option
28 | lift will be the (best - wrt_option) / wrt_option, 0 when best = wrt_option
29 | relative_risk will be the ratio best/wrt_option, 1 when best = wrt_option}
30 | 
31 | \item{threshold}{The confidence interval specifying what the "worst case scenario should be.
32 | Defaults to 95\%. (optional)}
33 | }
34 | \value{
35 | numeric value remaining at the specified threshold
36 | }
37 | \description{
38 | Estimates value remaining or loss (in terms of percent lift, absolute, or relative).
39 | }
40 | \examples{
41 | input_df <- tibble::tibble(option_name = c("A", "B", "C"),
42 |     sum_clicks = c(1000, 1000, 1000),
43 |     sum_conversions = c(100, 120, 110))
44 | estimate_value_remaining(input_df, distribution = "conversion_rate")
45 | estimate_value_remaining(input_df,
46 |     distribution = "conversion_rate",
47 |     threshold = 0.99)
48 | estimate_value_remaining(input_df,
49 |     distribution = "conversion_rate",
50 |     wrt_option = "A",
51 |     metric = "absolute")
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/man/sample_total_cm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_total_cm.R
 3 | \name{sample_total_cm}
 4 | \alias{sample_total_cm}
 5 | \title{Sample Total CM (Given Impression Count)}
 6 | \usage{
 7 | sample_total_cm(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str),
11 | sum_conversions (dbl), sum_revenue (dbl), and sum_clicks (dbl).}
12 | 
13 | \item{priors}{Optional list of priors {alpha0, beta0} for Beta,
14 | {k0, theta0} for Gamma Inverse Revenue, and {k01, theta01} for
15 | Gamma Cost (uses alternate priors so they can be different from Revenue).
16 | Default \eqn{Beta(1,1)} and \eqn{Gamma(1, 250)} will be use otherwise.}
17 | 
18 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
19 | }
20 | \value{
21 | input_df with 5 new nested columns `beta_params_conv`,
22 |     `beta_params_ctr`, `gamma_params_rev`,`gamma_params_cost`,
23 |     and `samples`
24 | }
25 | \description{
26 | Adds 4 new nested columns to the input_df: `beta_params_ctr`,
27 | `beta_params_conv`,`gamma_params_rev`, `gamma_params_cost`
28 | and `samples`.
29 | }
30 | \details{
31 | `beta_params` and `gamma_params` in each row should be a tibble of length 2
32 | (\eqn{\alpha} and \eqn{\beta} params and \eqn{k} and \eqn{\theta} params).
33 | `samples` in each row should be a tibble of length `n_samples`.
34 | 
35 | One assumption in this model is that sum_impressions is not stochastic.
36 | This assumes that Clicks are stochastically generated from a set number
37 | of Impressions. It does not require that the number of impressions are
38 | equal on either side. Generally this assumption holds true in marketing
39 | tests where traffic is split 50/50 and very little variance is observed
40 | in the number of impressions on either side.
41 | 
42 | 
43 | See update_rules vignette for a mathematical representation.
44 | 
45 | \deqn{TotalCM = Impr * ExpectedCTR * (RevPerOrder * OrdersPerClick - ExpectedCPC)}
46 | }
47 | 


--------------------------------------------------------------------------------
/R/sample_session_duration.R:
--------------------------------------------------------------------------------
 1 | #' Sample Session Duration
 2 | #'
 3 | #' Adds 2 new nested columns to the input_df: `gamma_params` and `samples`
 4 | #'     `gamma_params` in each row should be a tibble of length 2 (\eqn{k}
 5 | #'         and \eqn{\theta} parameters)
 6 | #'     `samples` in each row should be a tibble of length `n_samples`
 7 | #'
 8 | #' See update_rules vignette for a mathematical representation.
 9 | #' \deqn{duration_i ~ Exponential(\lambda)}
10 | #' \deqn{\lambda ~ Gamma(k, \theta)}
11 | #' Session Duration is sampled from a Gamma distribution with a Exponential likelihood
12 | #' of an individual leaving the site or ending a session at time t.
13 | #'
14 | #' This is not always the case, so verify your data follows the shape of
15 | #' an exponential distribution before using this.
16 | #'
17 | #' @param input_df Dataframe containing option_name (str),
18 | #'     sum_sessions (dbl), and sum_duration (dbl).
19 | #' @param priors Optional list of priors k0 and theta0.
20 | #'     Default \eqn{Gamma(1, 250)} will be use otherwise.
21 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
22 | #'
23 | #' @importFrom purrr map map2
24 | #' @importFrom dplyr mutate %>%
25 | #' @importFrom rlang .data
26 | #'
27 | #' @return input_df with 2 new nested columns `gamma_params` and `samples`
28 | #'
29 | sample_session_duration <- function(input_df, priors, n_samples = 5e4){
30 |   input_df %>%
31 |     dplyr::mutate(
32 |       gamma_params = purrr::map2(.x = .data$sum_sessions,
33 |                                  .y = .data$sum_duration,
34 |                                  ~ update_gamma(k = .x,
35 |                                                 theta = .y,
36 |                                                 priors = priors)
37 |       ),
38 |       samples = purrr::map(.x = .data$gamma_params,
39 |                            ~ 1 / stats::rgamma(n_samples,
40 |                                                shape = .x$k,
41 |                                                scale = .x$theta)
42 |       )
43 |     )
44 | }
45 | 


--------------------------------------------------------------------------------
/R/update_gamma.R:
--------------------------------------------------------------------------------
 1 | #' Update Gamma
 2 | #'
 3 | #' Updates Gamma Distribution with the Gamma-Exponential
 4 | #' conjugate prior update rule. Parameterized by \eqn{k} and \eqn{\theta} (not \eqn{\alpha, \beta})
 5 | #'
 6 | #' @param k Double value for \eqn{k} (total revenue generating events). Must be 0 or greater.
 7 | #' @param theta Double value for \eqn{\theta} (sum of revenue). Must be 0 or greater.
 8 | #' @param priors An optional list object that contains k0 and
 9 | #'     theta0. Otherwise the function will use \eqn{Gamma(1,250)} as the prior distribution.
10 | #'     If a second gamma distribution is used k01 and theta01 can be defined as separate priors
11 | #'     when alternate_priors is set to TRUE.
12 | #' @param alternate_priors Boolean Defaults to FALSE. Allows a user to specify alternate
13 | #'     prior names so the same prior isn't required when multiple gamma distributions are used.
14 | #'
15 | #' @return A list object that contains `k` and `theta`
16 | #' @export
17 | #' @importFrom tibble tibble
18 | #' @importFrom magrittr set_names
19 | #'
20 | #' @examples
21 | #' update_gamma(k = 1, theta = 100, priors = list(k0 = 2, theta0 = 1000))
22 | #' update_gamma(k = 10, theta = 200)
23 | #'
24 | update_gamma <- function(k, theta, priors = list(), alternate_priors = FALSE) {
25 |   validate_data_values(data_values = list(k = k, theta = theta))
26 | 
27 |   # Set Attributes
28 |   valid_gamma_params <- if(alternate_priors) c("k01", "theta01") else c("k0", "theta0")
29 |   default_gamma_priors <- list(k0 = 1, theta0 = 250) %>%
30 |     magrittr::set_names(valid_gamma_params)
31 | 
32 |   # Validate Priors
33 |   validated_priors <- validate_priors(priors = priors,
34 |                                       valid_priors = valid_gamma_params,
35 |                                       default_priors = default_gamma_priors)
36 | 
37 |   k0 <- if(alternate_priors) validated_priors$k01 else validated_priors$k0
38 |   theta0 <- if(alternate_priors) validated_priors$theta01 else validated_priors$theta0
39 | 
40 |   tibble::tibble(k = k0 + k, theta = theta0/(1 + theta0 * theta))
41 | }
42 | 


--------------------------------------------------------------------------------
/man/sample_rev_per_session.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_rev_per_session.R
 3 | \name{sample_rev_per_session}
 4 | \alias{sample_rev_per_session}
 5 | \title{Sample Rev Per Session}
 6 | \usage{
 7 | sample_rev_per_session(input_df, priors, n_samples = 50000)
 8 | }
 9 | \arguments{
10 | \item{input_df}{Dataframe containing option_name (str),
11 | sum_conversions (dbl), sum_revenue (dbl), and sum_clicks (dbl).}
12 | 
13 | \item{priors}{Optional list of priors {alpha0, beta0} for Beta
14 | and {k0, theta0} for Gamma. Default \eqn{Beta(1,1)}
15 | and \eqn{Gamma(1, 250)} will be use otherwise.}
16 | 
17 | \item{n_samples}{Optional integer value. Defaults to 50,000 samples.}
18 | }
19 | \value{
20 | input_df with 3 new nested columns `beta_params`, `gamma_params`, and `samples`
21 | }
22 | \description{
23 | Adds 3 new nested columns to the input_df: `beta_params`, `gamma_params`, and `samples`
24 |     `beta_params` and `gamma_params` in each row should be a tibble of length 2 (\eqn{\alpha}
25 |         and \eqn{\beta} parameters and \eqn{k} and \eqn{\theta} parameters)
26 |     `samples` in each row should be a tibble of length `n_samples`
27 | }
28 | \details{
29 | See update_rules vignette for a mathematical representation.
30 | 
31 | \deqn{RevPerSession = RevPerOrder * OrdersPerClick}
32 | This is a combination of a Beta-Bernoulli update and a Gamma-Exponential update.
33 | 
34 | \deqn{conversion_i ~ Bernoulli(\phi)}
35 | \deqn{revenue_i ~ Exponential(\lambda)}
36 | \deqn{\phi ~ Beta(\alpha, \beta)}
37 | \deqn{\lambda ~ Gamma(k, \theta)}
38 | 
39 | \deqn{revenue_i ~ Bernoulli(\phi) * Exponential(\lambda)^-1)}
40 | \deqn{Rev Per Session ~ \phi / \lambda}
41 | 
42 | Conversion Rate is sampled from a Beta distribution with a Binomial likelihood
43 | of an individual converting.
44 | 
45 | Average Rev Per Order is sampled from a Gamma distribution with an Exponential likelihood
46 | of Revenue from an individual order.
47 | This function makes sense to use if there is a distribution of possible revenue values
48 | that can be produced from a single order or conversion.
49 | }
50 | 


--------------------------------------------------------------------------------
/tests/testthat/test-validate_input_column.R:
--------------------------------------------------------------------------------
 1 | context("Validate Input Column")
 2 | 
 3 | test_that("validate_input_column returns nothing with valid data", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B"),
 6 |     sum_clicks = c(1000, 1000),
 7 |     sum_conversions = c(100, 120)
 8 |   )
 9 |   expect_invisible(validate_input_column("option_name", input_df))
10 |   expect_invisible(validate_input_column("sum_clicks", input_df))
11 |   expect_invisible(validate_input_column("sum_conversions", input_df))
12 | })
13 | 
14 | test_that("validate_input_column fails when required column is not in the df", {
15 |   input_df <- tibble::tibble(
16 |     option_name = c("A", "B"),
17 |     sum_clicks = c(1000, 1000),
18 |     sum_conversions = c(100, 120)
19 |   )
20 |   expect_error(validate_input_column("sum_revenue", input_df))
21 | })
22 | 
23 | test_that("validate_input_column fails when option_name is not a character string column", {
24 |   input_df <- tibble::tibble(
25 |     option_name = as.double(c(0, 2)),
26 |     sum_clicks = c(1000, 1000),
27 |     sum_conversions = c(100, 120)
28 |   )
29 |   expect_error(validate_input_column("option_name", input_df))
30 | })
31 | 
32 | test_that("validate_input_column fails when sum_clicks is not a double column", {
33 |   input_df <- tibble::tibble(
34 |     option_name = c("A", "B"),
35 |     sum_clicks = c("something wrong", "another wrong thing"),
36 |     sum_conversions = c(100, 120)
37 |   )
38 |   expect_error(validate_input_column("sum_clicks", input_df))
39 | })
40 | 
41 | test_that("validate_input_column fails when a value is less than 0", {
42 |   input_df <- tibble::tibble(
43 |     option_name = as.double(c(0, 2)),
44 |     sum_clicks = c(1000, 1000),
45 |     sum_conversions = c(-2, 120)
46 |   )
47 |   expect_error(validate_input_column("sum_conversions", input_df))
48 | })
49 | 
50 | test_that("validate_input_column returns nothing when a value is equal to 0", {
51 |   input_df <- tibble::tibble(
52 |     option_name = as.double(c(0, 2)),
53 |     sum_clicks = c(1000, 0),
54 |     sum_conversions = c(0, 120)
55 |   )
56 |   expect_invisible(validate_input_column("sum_conversions", input_df))
57 | })
58 | 


--------------------------------------------------------------------------------
/tests/testthat/test-estimate_win_prob_given_posterior.R:
--------------------------------------------------------------------------------
 1 | context("Estimate Win Probability Given Posterior")
 2 | 
 3 | test_that("estimate_win_prob_given_posterior returns correct results", {
 4 |   input_df <- tibble::tibble(
 5 |     option_name = c("A", "B", "C"),
 6 |     sum_clicks = c(1000, 1000, 1000),
 7 |     sum_conversions = c(100, 120, 110)
 8 |   )
 9 |   posterior_samples <- sample_from_posterior(input_df, "conversion_rate", priors = list())
10 |   all_option_names <- unique(posterior_samples$option_name)
11 | 
12 |   output <- estimate_win_prob_given_posterior(posterior_samples = posterior_samples, winner_is_max = TRUE)
13 |   expected_column_names <- c("option_name", "win_prob_raw", "win_prob")
14 | 
15 |   expect_true(is.data.frame(output))
16 |   expect_true(all(expected_column_names %in% colnames(output)))
17 |   expect_true(all(output$option_name %in% all_option_names))
18 |   expect_true(is.double(output$win_prob_raw))
19 |   expect_true(is.character(output$win_prob))
20 |   expect_equal(nrow(output), length(all_option_names))
21 | })
22 | 
23 | test_that("estimate_win_prob_given_posterior returns correct results when winner_is_max = FALSE", {
24 |   input_df <- tibble::tibble(
25 |     option_name = c("A", "B", "C"),
26 |     sum_clicks = c(1000, 1000, 1000),
27 |     sum_conversions = c(100, 120, 110)
28 |   )
29 |   posterior_samples <- sample_from_posterior(input_df, "conversion_rate", priors = list())
30 |   all_option_names <- unique(posterior_samples$option_name)
31 | 
32 |   output <- estimate_win_prob_given_posterior(posterior_samples = posterior_samples, winner_is_max = FALSE)
33 |   expected_column_names <- c("option_name", "win_prob_raw", "win_prob")
34 |   win_prob_of_lowest <- as.double(output[output$option_name == "A","win_prob_raw"][1])
35 | 
36 |   expect_equal(win_prob_of_lowest, max(output$win_prob_raw))
37 | 
38 |   expect_true(is.data.frame(output))
39 |   expect_true(all(expected_column_names %in% colnames(output)))
40 |   expect_true(all(output$option_name %in% all_option_names))
41 |   expect_true(is.double(output$win_prob_raw))
42 |   expect_true(is.character(output$win_prob))
43 |   expect_equal(nrow(output), length(all_option_names))
44 | })
45 | 


--------------------------------------------------------------------------------
/R/update_dirichlet.R:
--------------------------------------------------------------------------------
 1 | #' Update Dirichlet Distribution
 2 | #'
 3 | #' This function updates the Dirichlet distribution with the
 4 | #' Dirichlet-Multinomial conjugate prior update rule.
 5 | #'
 6 | #' TODO: This function currently only works in 3 dimensions.
 7 | #' Should be extended into N dimensions in the future. Can use ... notation.
 8 | #'
 9 | #' @param alpha_0 Double value for alpha_0 (count of failures). Must be 0 or greater.
10 | #' @param alpha_1 Double value for alpha_1 (count of successes side 1). Must be 0 or greater.
11 | #' @param alpha_2 Double value for alpha_2 (count of successes side 2). Must be 0 or greater.
12 | #' @param priors An optional list object that contains alpha00, alpha01, and alpha02.
13 | #'     Otherwise the function with use \eqn{Dirichlet(1,1,1)} as the prior distribution.
14 | #'
15 | #' @return tibble with columns alpha_0, alpha_1, and alpha_2
16 | #' @export
17 | #' @importFrom tibble tibble
18 | #'
19 | #' @examples
20 | #' update_dirichlet(alpha_0 = 20, alpha_1 = 5, alpha_2 = 2)
21 | #' sample_priors_list <- list(alpha00 = 2, alpha01 = 3, alpha02 = 5)
22 | #' update_dirichlet(alpha_0 = 20, alpha_1 = 5, alpha_2 = 2, priors = sample_priors_list)
23 | #'
24 | update_dirichlet <- function(alpha_0, alpha_1, alpha_2, priors = list()) {
25 |   validate_data_values(data_values = list(alpha_0 = alpha_0, # None
26 |                                           alpha_1 = alpha_1, # sum_conversions
27 |                                           alpha_2 = alpha_2)) # sum_conversions_2
28 | 
29 |   # Set Attributes
30 |   valid_dirichlet_params <- c("alpha00", "alpha01", "alpha02")
31 |   default_dirichlet_priors <- list(alpha00 = 1, alpha01 = 1, alpha02 = 1)
32 | 
33 |   # Validate Priors
34 |   validated_priors <- validate_priors(priors = priors,
35 |                                       valid_priors = valid_dirichlet_params,
36 |                                       default_priors = default_dirichlet_priors)
37 |   alpha00 <- validated_priors$alpha00
38 |   alpha01 <- validated_priors$alpha01
39 |   alpha02 <- validated_priors$alpha02
40 | 
41 |   tibble::tibble(alpha_0 = alpha_0 + alpha00,
42 |                  alpha_1 = alpha_1 + alpha01,
43 |                  alpha_2 = alpha_2 + alpha02)
44 | }
45 | 


--------------------------------------------------------------------------------
/R/estimate_value_remaining.R:
--------------------------------------------------------------------------------
 1 | #' Estimate Value Remaining
 2 | #'
 3 | #' Estimates value remaining or loss (in terms of percent lift, absolute, or relative).
 4 | #'
 5 | #' @param input_df Dataframe containing option_name (str) and various other columns
 6 | #'     depending on the distribution type. See vignette for more details.
 7 | #' @param distribution String of the distribution name
 8 | #' @param priors Optional list of priors. Defaults will be use otherwise.
 9 | #' @param wrt_option string the option loss is calculated with respect to (wrt). If NULL, the best option will be chosen.
10 | #' @param metric string the type of loss.
11 | #'   absolute will be the difference, on the outcome scale. 0 when best = wrt_option
12 | #'   lift will be the (best - wrt_option) / wrt_option, 0 when best = wrt_option
13 | #'   relative_risk will be the ratio best/wrt_option, 1 when best = wrt_option
14 | #' @param threshold The confidence interval specifying what the "worst case scenario should be.
15 | #'     Defaults to 95\%. (optional)
16 | #'
17 | #' @return numeric value remaining at the specified threshold
18 | #' @export
19 | #' @importFrom stats quantile
20 | #'
21 | #' @examples
22 | #' input_df <- tibble::tibble(option_name = c("A", "B", "C"),
23 | #'     sum_clicks = c(1000, 1000, 1000),
24 | #'     sum_conversions = c(100, 120, 110))
25 | #' estimate_value_remaining(input_df, distribution = "conversion_rate")
26 | #' estimate_value_remaining(input_df,
27 | #'     distribution = "conversion_rate",
28 | #'     threshold = 0.99)
29 | #' estimate_value_remaining(input_df,
30 | #'     distribution = "conversion_rate",
31 | #'     wrt_option = "A",
32 | #'     metric = "absolute")
33 | #'
34 | estimate_value_remaining <- function(input_df, distribution, priors = list(),
35 |                                      wrt_option = NULL, metric = "lift", threshold = 0.95){
36 |   validate_input_df(input_df, distribution)
37 | 
38 |   # Sample from posterior distribution
39 |   posterior_samples <- sample_from_posterior(input_df, distribution, priors)
40 | 
41 |   # Calculate Loss Distribution
42 |   estimate_loss(posterior_samples = posterior_samples,
43 |                 distribution = distribution,
44 |                 wrt_option = wrt_option,
45 |                 metric = metric) %>%
46 |     stats::quantile(probs = threshold, na.rm = TRUE)
47 | }
48 | 


--------------------------------------------------------------------------------
/R/sample_page_views_per_session.R:
--------------------------------------------------------------------------------
 1 | #' Sample Page Views Per Session (Visit)
 2 | #'
 3 | #' Adds 2 new nested columns to the input_df: `gamma_params` and `samples`
 4 | #'     `gamma_params` in each row should be a tibble of length 2 (\eqn{k}
 5 | #'         and \eqn{\theta} parameters)
 6 | #'     `samples` in each row should be a tibble of length `n_samples`
 7 | #'
 8 | #' See update_rules vignette for a mathematical representation.
 9 | #' \deqn{page_views_i ~ Poisson(\lambda)}
10 | #' \deqn{\lambda ~ Gamma(k, \theta)}
11 | #' Page Views Per Visit is sampled from a Gamma distribution with a Poisson likelihood
12 | #' of an individual having n page views by the end of their session.
13 | #'
14 | #' This is not always the case, so verify your data follows the shape of
15 | #' an Poisson distribution before using this.
16 | #'
17 | #' @param input_df Dataframe containing option_name (str),
18 | #'     sum_sessions (dbl), and sum_page_views_per_session (dbl).
19 | #' @param priors Optional list of priors k0 and theta0.
20 | #'     Default \eqn{Gamma(1, 250)} will be use otherwise.
21 | #'     \eqn{Gamma(1, 1)} might also be a good choice for this distribution
22 | #'     if you only have a few page views per session.
23 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
24 | #'
25 | #' @importFrom purrr map map2
26 | #' @importFrom dplyr mutate %>%
27 | #' @importFrom rlang .data
28 | #'
29 | #' @return input_df with 2 new nested columns `gamma_params` and `samples`
30 | #'
31 | sample_page_views_per_session <- function(input_df, priors, n_samples = 5e4){
32 |   input_df %>%
33 |     dplyr::mutate(
34 |       gamma_params = purrr::map2(.x = .data$sum_page_views,
35 |                                  .y = .data$sum_sessions,
36 |                                  ~ update_gamma(k = .x - .y, # Page Views offset since every session starts with 1 PV. Add 1 to likelihood observations
37 |                                                 theta = .y,
38 |                                                 priors = priors)
39 |       ),
40 |       samples = purrr::map(.x = .data$gamma_params,
41 |                            ~ stats::rgamma(n_samples,
42 |                                            shape = .x$k,
43 |                                            scale = .x$theta) + 1 # Adding the offset from above back in
44 |       )
45 |     )
46 | }
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sample_from_posterior.R:
--------------------------------------------------------------------------------
 1 | context("Sample From Posterior")
 2 | 
 3 | test_that("sample_from_posterior returns correct dataframe shape: conversion_rate", {
 4 |   input_df <- tibble::tibble(
 5 |      option_name = c("A", "B", "C"),
 6 |      sum_clicks = c(1000, 1000, 1000),
 7 |      sum_conversions = c(100, 120, 110)
 8 |   )
 9 |   n_samples <- 150
10 |   n_unique_options <- length(unique(input_df$option_name))
11 |   expected_col_names <- c("option_name", "samples", "sample_id")
12 |   output <- sample_from_posterior(input_df, "conversion_rate", n_samples = n_samples)
13 |   expect_true(is.data.frame(output))
14 |   expect_length(output, n_unique_options)
15 |   expect_equal(nrow(output), n_unique_options * n_samples)
16 |   expect_true(all(colnames(output) == expected_col_names))
17 | })
18 | 
19 | test_that("sample_from_posterior returns correct dataframe shape for all types", {
20 |   input_df_all <- tibble::tibble(
21 |     option_name = c("A", "B", "C"),
22 |     sum_impressions = c(10000, 9000, 11000),
23 |     sum_sessions = c(1000, 1000, 1000),
24 |     sum_conversions = c(100, 120, 110),
25 |     sum_revenue = c(900, 1200, 1150),
26 |     sum_cost = c(10, 50, 30),
27 |     sum_conversions_2 = c(10, 8, 20),
28 |     sum_revenue_2 = c(10, 16, 15),
29 |     sum_duration = c(5000, 3000, 4000),
30 |     sum_page_views = c(3000, 2000, 1345)
31 |   ) %>%
32 |     dplyr::mutate(sum_clicks = sum_sessions)
33 |   n_samples <- 150
34 |   n_unique_options <- length(unique(input_df_all$option_name))
35 |   expected_col_names <- c("option_name", "samples", "sample_id")
36 | 
37 |   output <- purrr::map(distribution_column_mapping$distribution_type,
38 |                        ~ sample_from_posterior(input_df_all,
39 |                                                .x,
40 |                                                n_samples = n_samples))
41 | 
42 |   expect_true(purrr::every(output, ~ is.data.frame(.x)))
43 |   purrr::walk(output, ~ expect_length(.x, n_unique_options))
44 |   purrr::walk(output, ~ expect_equal(nrow(.x), n_unique_options * n_samples))
45 |   expect_true(purrr::every(output, ~ all(colnames(.x) == expected_col_names)))
46 | })
47 | 
48 | test_that("sample_from_posterior fails when incorrect distribution is input", {
49 |   input_df <- tibble::tibble(
50 |     option_name = c("A", "B", "C"),
51 |     sum_clicks = c(1000, 1000, 1000),
52 |     sum_conversions = c(100, 120, 110)
53 |   )
54 |   expect_error(sample_from_posterior(input_df, "bad_input", n_samples = n_samples))
55 | })
56 | 


--------------------------------------------------------------------------------
/man/estimate_all_values.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_all_values.R
 3 | \name{estimate_all_values}
 4 | \alias{estimate_all_values}
 5 | \title{Estimate All Values}
 6 | \usage{
 7 | estimate_all_values(
 8 |   input_df,
 9 |   distribution,
10 |   wrt_option_lift,
11 |   priors = list(),
12 |   wrt_option_vr = NULL,
13 |   loss_threshold = 0.95,
14 |   lift_threshold = 0.7,
15 |   metric = "lift"
16 | )
17 | }
18 | \arguments{
19 | \item{input_df}{Dataframe containing option_name (str) and various other columns
20 | depending on the distribution type. See vignette for more details.}
21 | 
22 | \item{distribution}{String of the distribution name}
23 | 
24 | \item{wrt_option_lift}{String: the option lift and win probability is calculated
25 | with respect to (wrt). Required.}
26 | 
27 | \item{priors}{Optional list of priors. Defaults will be use otherwise.}
28 | 
29 | \item{wrt_option_vr}{String: the option against which loss (value remaining)
30 | is calculated. If NULL the best option will be used. (optional)}
31 | 
32 | \item{loss_threshold}{The confidence interval specifying what the "worst case scenario" should be.
33 | Defaults to 95\%. (optional)}
34 | 
35 | \item{lift_threshold}{The confidence interval specifying how likely the lift is to be true.
36 | Defaults to 70\%. (optional)}
37 | 
38 | \item{metric}{string the type of loss.
39 | absolute will be the difference, on the outcome scale. 0 when best = wrt_option
40 | lift will be the (best - wrt_option) / wrt_option, 0 when best = wrt_option
41 | relative_risk will be the ratio best/wrt_option, 1 when best = wrt_option}
42 | }
43 | \value{
44 | A list with 4 named items: Win Probability, Value Remaining,
45 |     Lift vs Baseline, and Win Probability vs Baseline.
46 | }
47 | \description{
48 | Efficiently estimates all values at once so the posterior only need to be
49 | sampled one time. This function will return as a list win probability,
50 | value remaining, estimated percent lift with respect to the provided option,
51 | and the win probability of the best option vs the provided option.
52 | }
53 | \details{
54 | TODO: Add high density credible intervals to this output for each option.
55 | }
56 | \examples{
57 | \dontrun{
58 | input_df <- data.frame(option_name = c("A", "B", "C"),
59 |     sum_clicks = c(1000, 1000, 1000),
60 |     sum_conversions = c(100, 120, 110), stringsAsFactors = FALSE)
61 | estimate_all_values(input_df, distribution = "conversion_rate", wrt_option_lift = "A")
62 | }
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/tests/testthat/test-estimate_loss.R:
--------------------------------------------------------------------------------
 1 | context("Estimate Loss Distribution")
 2 | 
 3 | test_that("estimate_loss returns double vector the same length as the rows in posterior_samples input", {
 4 |   distribution_type <- "conversion_rate"
 5 |   input_df <- tibble::tibble(
 6 |     option_name = c("A", "B", "C"),
 7 |     sum_clicks = c(1000, 1000, 1000),
 8 |     sum_conversions = c(100, 120, 110)
 9 |   )
10 |   posterior_samples <- sample_from_posterior(input_df, distribution_type, priors = list())
11 |   count_unique_options <- length(unique(posterior_samples$option_name))
12 |   posterior_samples_rows <- nrow(posterior_samples)/count_unique_options
13 | 
14 |   output <- estimate_loss(posterior_samples = posterior_samples,
15 |                           distribution = distribution_type)
16 |   expect_length(output, posterior_samples_rows)
17 |   expect_true(is.double(output))
18 | })
19 | 
20 | 
21 | test_that("estimate_loss returns double vector when wrt_option is set", {
22 |   distribution_type <- "conversion_rate"
23 |   input_df <- tibble::tibble(
24 |     option_name = c("A", "B", "C"),
25 |     sum_clicks = c(1000, 1000, 1000),
26 |     sum_conversions = c(100, 120, 110)
27 |   )
28 |   posterior_samples <- sample_from_posterior(input_df, distribution_type, priors = list())
29 |   count_unique_options <- length(unique(posterior_samples$option_name))
30 |   posterior_samples_rows <- nrow(posterior_samples)/count_unique_options
31 | 
32 |   output <- estimate_loss(posterior_samples = posterior_samples,
33 |                           distribution = distribution_type,
34 |                           wrt_option = "C")
35 |   expect_length(output, posterior_samples_rows)
36 |   expect_true(is.double(output))
37 | })
38 | 
39 | test_that("estimate_loss returns double vector the same length
40 |           as the rows in posterior_samples input
41 |           when winner_is_min", {
42 |   distribution_type <- "cpa"
43 |   input_df <- tibble::tibble(
44 |     option_name = c("A", "B", "C"),
45 |     sum_clicks = c(1000, 1000, 1000),
46 |     sum_conversions = c(100, 120, 110),
47 |     sum_cost = c(50, 100, 150),
48 |   )
49 |   posterior_samples <- sample_from_posterior(input_df, distribution_type, priors = list())
50 |   count_unique_options <- length(unique(posterior_samples$option_name))
51 |   posterior_samples_rows <- nrow(posterior_samples)/count_unique_options
52 | 
53 |   output <- estimate_loss(posterior_samples = posterior_samples,
54 |                           distribution = distribution_type)
55 |   expect_length(output, posterior_samples_rows)
56 |   expect_true(is.double(output))
57 | })
58 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # grizbayr
 2 | 
 3 | <!-- badges: start -->
 4 | [![CRAN status](https://www.r-pkg.org/badges/version/grizbayr)](https://CRAN.R-project.org/package=grizbayr)
 5 | [![](https://cranlogs.r-pkg.org/badges/grizbayr)](https://cran.r-project.org/package=grizbayr)
 6 | <!-- badges: end -->
 7 | 
 8 | ## A Bayesian Inference Package for A|B and Bandit Marketing Tests
 9 | 
10 | ### Description:
11 | 
12 | Uses simple Bayesian conjugate prior update rules to calculate the following metrics for various marketing objectives:
13 | 
14 |   1. Win Probability of each option
15 |   2. Value Remaining in the Test
16 |   3. Percent Lift Over the Baseline
17 | 
18 | This allows a user to implement Bayesian Inference methods when analyzing the results of a split test or Bandit experiment.
19 | 
20 | ## Examples
21 | 
22 | See the `intro` vignette for examples to get started.
23 | 
24 | ## Marketing objectives supported:
25 | 
26 |  - Conversion Rate
27 |  - Response Rate
28 |  - Click Through Rate (CTR)
29 |  - Revenue Per Session
30 |  - Multi Revenue Per Session
31 |  - Cost Per Activation (CPA)
32 |  - Total Contribution Margin (CM)
33 |  - CM Per Click
34 |  - Cost Per Click (CPC)
35 |  - Session Duration (seconds)
36 |  - Page Views Per Session
37 | 
38 | 
39 | ## Contributing
40 | 
41 | ### New Posterior Distributions
42 | 
43 | To add a new posterior distribution you must complete the following:
44 | 
45 | 1. Create a new function called `sample_...(input_df, priors, n_samples)`. Use the internal helper functions update_gamma, update_beta, etc. included in this package or you can create a new one.
46 | 1. This function (and the name) must be added to the switch statement in `sample_from_posterior()`
47 | 1. A new row must be added to the internal data object `distribution_column_mapping`. 
48 |     - Select this object from the package
49 |     - Add a new row with a 1 for every column that is required for this distribution (this is for data validation and clear alerting for the end user)
50 |     - Save the updated tibble object using `use_data(new_tibble, internal = TRUE, overwrite = TRUE)` and it will be saved as `sysdata.rda` in the package for internal use.
51 |     - Update the intro.Rmd markdown table to include which columns are required for your function.
52 |   
53 | 1. Create a PR for review.  
54 | 
55 | ### New Features Ideas (TODO)
56 | 
57 | - High Density Credible Intervals with each option
58 | - Conjugate Prior Update Rules vignette deriving each marketing objective `update_rules`
59 | 
60 | ---
61 | 
62 | #### Package Name
63 | 
64 | The name is a play on Bayes with an added r (bayesr). The added griz (or Grizzly Bear) creates a unique name that is searchable due to too many similarly named packages.
65 | 


--------------------------------------------------------------------------------
/R/estimate_loss.R:
--------------------------------------------------------------------------------
 1 | #' Estimate Loss
 2 | #'
 3 | #' @param posterior_samples Tibble: returned from sample_from_posterior with 3 columns
 4 | #'   `option_name`, `samples`, and `sample_id`.
 5 | #' @param distribution String: the name of the distribution
 6 | #' @param wrt_option String: the option loss is calculated with respect to (wrt). If NULL, the best option will be chosen.
 7 | #' @param metric String: the type of loss.
 8 | #'   absolute will be the difference, on the outcome scale. 0 when best = wrt_option
 9 | #'   lift will be the (best - wrt_option) / wrt_option, 0 when best = wrt_option
10 | #'   relative_risk will be the ratio best/wrt_option, 1 when best = wrt_option
11 | #'
12 | #' @return numeric, the loss distribution
13 | #'
14 | #' @importFrom dplyr filter select
15 | #' @importFrom magrittr use_series %>%
16 | #' @importFrom tidyr pivot_wider
17 | #' @importFrom rlang .data
18 | #'
19 | #' @export
20 | #'
21 | #' @examples
22 | #' # Requires posterior_samples dataframe. See `sample_from_posterior()`
23 | #' # for an example.
24 | #'
25 | #' \dontrun{
26 | #' estimate_loss(posterior_samples = posterior_samples, distribution = "conversion_rate")
27 | #' }
28 | estimate_loss <- function(posterior_samples, distribution, wrt_option = NULL, metric = c("absolute", "lift", "relative_risk")) {
29 |   metric <- match.arg(metric)
30 | 
31 |   # estimate 'best' option if no wrt option is provided
32 |   if (is.null(wrt_option)) {
33 |     wrt_option <- find_best_option(posterior_samples, distribution)
34 |   } else {
35 |     validate_wrt_option(wrt_option, posterior_samples)
36 |   }
37 | 
38 |   posterior_samples_wide <- posterior_samples %>%
39 |     tidyr::pivot_wider(names_from = "option_name", values_from = "samples") %>%
40 |     dplyr::select(-"sample_id")
41 | 
42 |   theta_star <- posterior_samples_wide[[wrt_option]]
43 | 
44 |   # Need to Flip Loss Distributions if Lower is Better
45 |   if(is_winner_max(distribution)){
46 |     theta_max <- as.matrix(posterior_samples_wide) %>%
47 |       apply(1, max)
48 | 
49 |     loss_distribution <- switch(metric,
50 |                                 absolute = theta_max - theta_star,
51 |                                 lift = (theta_max - theta_star) / theta_star,
52 |                                 relative_risk = theta_max / theta_star
53 |     )
54 |   }else{
55 |     theta_min <- as.matrix(posterior_samples_wide) %>%
56 |       apply(1, min)
57 | 
58 |     loss_distribution <- switch(metric,
59 |                                 absolute = theta_star - theta_min,
60 |                                 lift = ( theta_star - theta_min ) / theta_star,
61 |                                 relative_risk = theta_min / theta_star
62 |     )
63 |   }
64 |   loss_distribution
65 | }
66 | 


--------------------------------------------------------------------------------
/R/estimate_lift.R:
--------------------------------------------------------------------------------
 1 | #' Estimate Lift Distribution
 2 | #'
 3 | #' Estimates lift distribution vector from posterior samples.
 4 | #'
 5 | #' @param posterior_samples Tibble returned from sample_from_posterior with 3 columns
 6 | #'   `option_name`, `samples`, and `sample_id`.
 7 | #' @param distribution String of the distribution name
 8 | #' @param wrt_option string the option lift is calculated with respect to (wrt). Required.
 9 | #' @param metric string the type of lift.
10 | #'   `absolute`` will be the difference, on the outcome scale. 0 when best = wrt_option
11 | #'   `lift`` will be the (best - wrt_option) / wrt_option, 0 when best = wrt_option
12 | #'   `relative_risk`` will be the ratio best/wrt_option, 1 when best = wrt_option
13 | #'
14 | #' @return numeric, the lift distribution
15 | #' @export
16 | #' @importFrom rlang .data
17 | #' @importFrom tidyr pivot_wider
18 | #' @importFrom dplyr select
19 | #'
20 | #' @examples
21 | #' # Requires posterior_samples dataframe. See `sample_from_posterior()`
22 | #' # for an example.
23 | #'
24 | #' \dontrun{
25 | #' estimate_lift(posterior_samples = posterior_samples,
26 | #'               distribution = "conversion_rate",
27 | #'               wrt_option = "A",
28 | #'               metric = "lift")
29 | #'}
30 | #'
31 | estimate_lift <- function(posterior_samples, distribution, wrt_option, metric = "lift"){
32 |   if(!metric %in% c("lift", "relative_risk", "absolute")){
33 |     stop("Invalid argument. `metric` must be one of `lift`, `absolute`, or `relative_risk`.")
34 |   }
35 |   validate_wrt_option(wrt_option, posterior_samples)
36 | 
37 |   best_option <- find_best_option(posterior_samples, distribution)
38 | 
39 |   # Format samples wider so matrices can be used
40 |   posterior_samples_wide <- posterior_samples %>%
41 |     tidyr::pivot_wider(names_from = "option_name", values_from = "samples") %>%
42 |     dplyr::select(-"sample_id")
43 | 
44 |   theta_best <- posterior_samples_wide[[best_option]]
45 |   theta_control <- posterior_samples_wide[[wrt_option]]
46 | 
47 |   # Need to Flip Loss Distributions if Lower is Better
48 |   if(is_winner_max(distribution)){
49 |     lift_distribution <- switch(metric,
50 |                                 absolute = theta_best - theta_control,
51 |                                 lift = (theta_best - theta_control) / theta_control,
52 |                                 relative_risk = theta_best / theta_control
53 |     )
54 |   }else{
55 |     lift_distribution <- switch(metric,
56 |                                 absolute = theta_control - theta_best,
57 |                                 lift = ( theta_control - theta_best ) / theta_control,
58 |                                 relative_risk = theta_best / theta_control
59 |     )
60 |   }
61 |   lift_distribution
62 | }
63 | 


--------------------------------------------------------------------------------
/tests/testthat/test-estimate_lift.R:
--------------------------------------------------------------------------------
 1 | context("Estimated Lift Distribution")
 2 | 
 3 | test_that("estimate_lift returns double vector the same length as the rows in posterior_samples input", {
 4 |   distribution_type <- "conversion_rate"
 5 |   input_df <- tibble::tibble(
 6 |     option_name = c("A", "B", "C"),
 7 |     sum_clicks = c(1000, 1000, 1000),
 8 |     sum_conversions = c(100, 120, 110)
 9 |   )
10 |   posterior_samples <- sample_from_posterior(input_df, distribution_type, priors = list())
11 |   count_unique_options <- length(unique(posterior_samples$option_name))
12 |   posterior_samples_rows <- nrow(posterior_samples)/count_unique_options
13 | 
14 |   output <- estimate_lift(posterior_samples = posterior_samples,
15 |                           distribution = distribution_type,
16 |                           wrt_option = "A",
17 |                           metric = "lift")
18 |   expect_length(output, posterior_samples_rows)
19 |   expect_true(is.double(output))
20 | })
21 | 
22 | 
23 | test_that("estimate_lift returns double vector the same length
24 |           as the rows in posterior_samples input when lower is better", {
25 |   distribution_type <- "cpa"
26 |   input_df <- tibble::tibble(
27 |     option_name = c("A", "B", "C"),
28 |     sum_clicks = c(1000, 1000, 1000),
29 |     sum_conversions = c(100, 120, 110),
30 |     sum_cost = c(50, 100, 150),
31 |   )
32 |   posterior_samples <- sample_from_posterior(input_df, distribution_type, priors = list())
33 |   count_unique_options <- length(unique(posterior_samples$option_name))
34 |   posterior_samples_rows <- nrow(posterior_samples)/count_unique_options
35 | 
36 |   output <- estimate_lift(posterior_samples = posterior_samples,
37 |                           distribution = distribution_type,
38 |                           wrt_option = "A",
39 |                           metric = "lift")
40 |   expect_length(output, posterior_samples_rows)
41 |   expect_true(is.double(output))
42 | })
43 | 
44 | 
45 | test_that("estimate_lift errors when invalid metric is passed", {
46 |             distribution_type <- "cpa"
47 |             input_df <- tibble::tibble(
48 |               option_name = c("A", "B", "C"),
49 |               sum_clicks = c(1000, 1000, 1000),
50 |               sum_conversions = c(100, 120, 110),
51 |               sum_cost = c(50, 100, 150),
52 |             )
53 |             posterior_samples <- sample_from_posterior(input_df, distribution_type, priors = list())
54 |             count_unique_options <- length(unique(posterior_samples$option_name))
55 |             posterior_samples_rows <- nrow(posterior_samples)/count_unique_options
56 | 
57 |             testthat::expect_error({
58 |               estimate_lift(posterior_samples = posterior_samples,
59 |                             distribution = distribution_type,
60 |                             wrt_option = "A",
61 |                             metric = "invalid")
62 |             })
63 |           })
64 | 


--------------------------------------------------------------------------------
/R/sample_from_posterior.R:
--------------------------------------------------------------------------------
 1 | #' Sample From Posterior
 2 | #'
 3 | #' Selects which function to use to sample from the posterior distribution
 4 | #'
 5 | #' @param input_df Dataframe containing option_name (str) and various other columns
 6 | #'     depending on the distribution type. See vignette for more details.
 7 | #' @param distribution String of the distribution name
 8 | #' @param priors Optional list of priors. Defaults will be use otherwise.
 9 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
10 | #'
11 | #' @return A tibble with 2 columns: option_name (chr) and samples (dbl) [long form data].
12 | #' @export
13 | #' @importFrom dplyr select mutate row_number %>%
14 | #' @importFrom tidyr unnest
15 | #' @importFrom purrr map
16 | #' @importFrom tibble tibble
17 | #' @importFrom rlang .data
18 | #'
19 | #' @examples
20 | #' input_df <- tibble::tibble(
21 | #'    option_name = c("A", "B"),
22 | #'    sum_clicks = c(1000, 1000),
23 | #'    sum_conversions = c(100, 120),
24 | #'    sum_sessions = c(1000, 1000),
25 | #'    sum_revenue = c(1000, 1500)
26 | #' )
27 | #' sample_from_posterior(input_df, "conversion_rate")
28 | #' sample_from_posterior(input_df, "rev_per_session")
29 | #'
30 | sample_from_posterior <- function(input_df, distribution, priors = list(), n_samples = 5e4){
31 |   samples_tibble <- switch(distribution,
32 |    "conversion_rate" =        sample_conv_rate(input_df, priors, n_samples),
33 |    "response_rate" =          sample_response_rate(input_df, priors, n_samples),
34 |    "ctr" =                    sample_ctr(input_df, priors, n_samples),
35 |    "rev_per_session" =        sample_rev_per_session(input_df, priors, n_samples),
36 |    "multi_rev_per_session" =  sample_multi_rev_per_session(input_df, priors, n_samples),
37 |    "cpa" =                    sample_cpa(input_df, priors, n_samples),
38 |    "total_cm" =               sample_total_cm(input_df, priors, n_samples),
39 |    "cm_per_click" =           sample_cm_per_click(input_df, priors, n_samples),
40 |    "cpc" =                    sample_cpc(input_df, priors, n_samples),
41 |    "session_duration" =       sample_session_duration(input_df, priors, n_samples),
42 |    "page_views_per_session" = sample_page_views_per_session(input_df, priors, n_samples),
43 |    stop(
44 |      paste(distribution,
45 |            "is an invalid distribution type. Select from one of the following:",
46 |            paste(distribution_column_mapping$distribution_type, collapse = ", "))
47 |      )
48 |   )
49 |   # Clean tibble into expected 2 dimensional output with added sample_id
50 |   samples_tibble %>%
51 |      dplyr::mutate(samples = purrr::map(.x = .data$samples,
52 |                                         ~ tibble::tibble(samples = .x) %>%
53 |                                            dplyr::mutate(sample_id = dplyr::row_number())
54 |                                         )
55 |                    ) %>%
56 |      dplyr::select("option_name", "samples") %>%
57 |      tidyr::unnest(cols = "samples")
58 | }
59 | 


--------------------------------------------------------------------------------
/R/sample_cpa.R:
--------------------------------------------------------------------------------
 1 | #' Sample Cost Per Activation (CPA)
 2 | #'
 3 | #' Adds 3 new nested columns to the input_df: `beta_params`, `gamma_params`, and `samples`
 4 | #'     `beta_params` and `gamma_params` in each row should be a tibble of length 2 (\eqn{\alpha}
 5 | #'         and \eqn{\beta} parameters and \eqn{k} and \eqn{\theta} parameters)
 6 | #'     `samples` in each row should be a tibble of length `n_samples`
 7 | #'
 8 | #'
 9 | #' See update_rules vignette for a mathematical representation.
10 | #' This is a combination of a Beta-Bernoulli update and a Gamma-Exponential update.
11 | #'
12 | #' \deqn{conversion_i ~ Bernoulli(\phi)}
13 | #' \deqn{cpc_i ~ Exponential(\lambda)}
14 | #' \deqn{\phi ~ Beta(\alpha, \beta)}
15 | #' \deqn{\lambda ~ Gamma(k, \theta)}
16 | #'
17 | #' \deqn{cpa_i ~ 1/ (Bernoulli(\phi) * Exponential(\lambda))}
18 | #' \deqn{averageCPA ~ 1/(\phi\lambda)}
19 | #'
20 | #' Conversion Rate is sampled from a Beta distribution with a Binomial likelihood
21 | #' of an individual converting.
22 | #'
23 | #' Average CPC is sampled from a Gamma distribution with an Exponential likelihood
24 | #' of an individual cost.
25 | #'
26 | #'
27 | #' @param input_df Dataframe containing option_name (str), sum_conversions (dbl),
28 | #'     sum_cost (dbl), and sum_clicks (dbl).
29 | #' @param priors Optional list of priors {alpha0, beta0} for Beta and {k0, theta0}
30 | #'     for Gamma.
31 | #'     Default \eqn{Beta(1,1)} and \eqn{Gamma(1, 250)} will be use otherwise.
32 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
33 | #'
34 | #' @importFrom purrr map2
35 | #' @importFrom dplyr mutate %>%
36 | #' @importFrom stats rgamma rbeta
37 | #' @importFrom rlang .data
38 | #'
39 | #' @return input_df with 3 new nested columns `beta_params`, `gamma_params`, and `samples`
40 | #'
41 | sample_cpa <- function(input_df, priors, n_samples = 5e4){
42 |   input_df %>%
43 |     dplyr::mutate(
44 |       beta_params = purrr::map2(.x = .data$sum_conversions,
45 |                                 .y = .data$sum_clicks,
46 |                                 ~ update_beta(alpha = .x,
47 |                                               beta = .y - .x,
48 |                                               priors = priors)
49 |       ),
50 |       gamma_params = purrr::map2(.x = .data$sum_clicks,
51 |                                  .y = .data$sum_cost,
52 |                                  ~ update_gamma(k = .x,
53 |                                                 theta = .y,
54 |                                                 priors = priors)
55 |       ),
56 |       samples = purrr::map2(.x = .data$beta_params,
57 |                             .y = .data$gamma_params,
58 |                             ~ 1 /( stats::rgamma(n_samples,
59 |                                                  shape = .y$k,
60 |                                                  scale = .y$theta) *
61 |                                      stats::rbeta(n_samples,
62 |                                                   shape1 = .x$alpha,
63 |                                                   shape2 = .x$beta) )
64 |       )
65 |     )
66 | }
67 | 


--------------------------------------------------------------------------------
/R/sample_rev_per_session.R:
--------------------------------------------------------------------------------
 1 | #' Sample Rev Per Session
 2 | #'
 3 | #' Adds 3 new nested columns to the input_df: `beta_params`, `gamma_params`, and `samples`
 4 | #'     `beta_params` and `gamma_params` in each row should be a tibble of length 2 (\eqn{\alpha}
 5 | #'         and \eqn{\beta} parameters and \eqn{k} and \eqn{\theta} parameters)
 6 | #'     `samples` in each row should be a tibble of length `n_samples`
 7 | #'
 8 | #'
 9 | #' See update_rules vignette for a mathematical representation.
10 | #'
11 | #' \deqn{RevPerSession = RevPerOrder * OrdersPerClick}
12 | #' This is a combination of a Beta-Bernoulli update and a Gamma-Exponential update.
13 | #'
14 | #' \deqn{conversion_i ~ Bernoulli(\phi)}
15 | #' \deqn{revenue_i ~ Exponential(\lambda)}
16 | #' \deqn{\phi ~ Beta(\alpha, \beta)}
17 | #' \deqn{\lambda ~ Gamma(k, \theta)}
18 | #'
19 | #' \deqn{revenue_i ~ Bernoulli(\phi) * Exponential(\lambda)^-1)}
20 | #' \deqn{Rev Per Session ~ \phi / \lambda}
21 | #'
22 | #' Conversion Rate is sampled from a Beta distribution with a Binomial likelihood
23 | #' of an individual converting.
24 | #'
25 | #' Average Rev Per Order is sampled from a Gamma distribution with an Exponential likelihood
26 | #' of Revenue from an individual order.
27 | #' This function makes sense to use if there is a distribution of possible revenue values
28 | #' that can be produced from a single order or conversion.
29 | #'
30 | #' @param input_df Dataframe containing option_name (str),
31 | #'     sum_conversions (dbl), sum_revenue (dbl), and sum_clicks (dbl).
32 | #' @param priors Optional list of priors {alpha0, beta0} for Beta
33 | #'     and {k0, theta0} for Gamma. Default \eqn{Beta(1,1)}
34 | #'     and \eqn{Gamma(1, 250)} will be use otherwise.
35 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
36 | #'
37 | #' @importFrom purrr map2
38 | #' @importFrom dplyr mutate %>%
39 | #' @importFrom stats rgamma rbeta
40 | #' @importFrom rlang .data
41 | #'
42 | #' @return input_df with 3 new nested columns `beta_params`, `gamma_params`, and `samples`
43 | #'
44 | sample_rev_per_session <- function(input_df, priors, n_samples = 5e4){
45 |   input_df %>%
46 |     dplyr::mutate(
47 |       beta_params = purrr::map2(.x = .data$sum_conversions,
48 |                                 .y = .data$sum_sessions,
49 |                                 ~ update_beta(alpha = .x,
50 |                                               beta = .y - .x,
51 |                                               priors = priors)
52 |       ),
53 |       gamma_params = purrr::map2(.x = .data$sum_conversions,
54 |                                  .y = .data$sum_revenue,
55 |                                  ~ update_gamma(k = .x,
56 |                                                 theta = .y,
57 |                                                 priors = priors)
58 |       ),
59 |       samples = purrr::map2(.x = .data$beta_params,
60 |                             .y = .data$gamma_params,
61 |                             ~ stats::rbeta(n_samples,
62 |                                            shape1 = .x$alpha,
63 |                                            shape2 = .x$beta) /
64 |                               stats::rgamma(n_samples,
65 |                                             shape = .y$k,
66 |                                             scale = .y$theta)
67 |       )
68 |     )
69 | }
70 | 


--------------------------------------------------------------------------------
/R/sample_cm_per_click.R:
--------------------------------------------------------------------------------
 1 | #' Sample CM Per Click
 2 | #'
 3 | #' @description
 4 | #' Adds 4 new nested columns to the input_df: `beta_params`,
 5 | #'     `gamma_params_rev`, `gamma_params_cost`and `samples`
 6 | #'
 7 | #' @details
 8 | #'`beta_params` and `gamma_params_rev` in each row should be a
 9 | #' tibble of length 2 (\eqn{\alpha} and \eqn{\beta} parameters
10 | #' and \eqn{k} and \eqn{\theta} parameters)
11 | #'`samples` in each row should be a tibble of length `n_samples`
12 | #'
13 | #' See update_rules vignette for a mathematical representation.
14 | #' \deqn{CMPerClick = ConversionsPerClick * RevPerConversion - CostPerClick}
15 | #'
16 | #' @param input_df Dataframe containing option_name (str), sum_conversions (dbl), sum_revenue (dbl),
17 | #'     and sum_clicks (dbl).
18 | #' @param priors Optional list of priors {alpha0, beta0} for Beta, {k0, theta0} for Gamma Inverse Revenue,
19 | #'     and {k01, theta01} for Gamma Cost (uses alternate priors so they can be different from Revenue).
20 | #'     Default \eqn{Beta(1,1)} and \eqn{Gamma(1, 250)} will be use otherwise.
21 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
22 | #'
23 | #' @importFrom purrr pmap map2
24 | #' @importFrom dplyr mutate %>%
25 | #' @importFrom stats rbeta rgamma
26 | #' @importFrom rlang .data
27 | #'
28 | #' @return input_df with 4 new nested columns `beta_params`, `gamma_params_rev`,
29 | #'     `gamma_params_cost`, and `samples`
30 | #'
31 | sample_cm_per_click <- function(input_df, priors, n_samples = 5e4){
32 |   input_df %>%
33 |     dplyr::mutate(
34 |       beta_params = purrr::map2(.x = .data$sum_conversions,
35 |                                 .y = .data$sum_clicks,
36 |                                 ~ update_beta(alpha = .x,
37 |                                               beta = .y - .x,
38 |                                               priors = priors)
39 |       ),
40 |       gamma_params_rev = purrr::map2(.x = .data$sum_conversions,
41 |                                      .y = .data$sum_revenue,
42 |                                      ~ update_gamma(k = .x,
43 |                                                     theta = .y,
44 |                                                     priors = priors)
45 |       ),
46 |       gamma_params_cost = purrr::map2(.x = .data$sum_clicks,
47 |                                       .y = .data$sum_cost,
48 |                                       ~ update_gamma(k = .x,
49 |                                                      theta = .y,
50 |                                                      priors = priors,
51 |                                                      alternate_priors = TRUE)
52 |       ),
53 |       samples = purrr::pmap(list(.data$beta_params,
54 |                                  .data$gamma_params_rev,
55 |                                  .data$gamma_params_cost),
56 |                             ~ ( # Rev Per Click
57 |                               stats::rbeta(n_samples,
58 |                                            shape1 = ..1$alpha,
59 |                                            shape2 = ..1$beta) /
60 |                                 stats::rgamma(n_samples,
61 |                                               shape = ..2$k,
62 |                                               scale = ..2$theta) ) -
63 |                               # Minus Variable Cost Per Click
64 |                               1 / stats::rgamma(n_samples,
65 |                                                 shape = ..3$k,
66 |                                                 scale = ..3$theta)
67 |       )
68 |     )
69 | }
70 | 


--------------------------------------------------------------------------------
/R/estimate_all_values.R:
--------------------------------------------------------------------------------
 1 | #' Estimate All Values
 2 | #'
 3 | #' Efficiently estimates all values at once so the posterior only need to be
 4 | #' sampled one time. This function will return as a list win probability,
 5 | #' value remaining, estimated percent lift with respect to the provided option,
 6 | #' and the win probability of the best option vs the provided option.
 7 | #'
 8 | #' TODO: Add high density credible intervals to this output for each option.
 9 | #'
10 | #' @param input_df Dataframe containing option_name (str) and various other columns
11 | #'     depending on the distribution type. See vignette for more details.
12 | #' @param distribution String of the distribution name
13 | #' @param wrt_option_lift String: the option lift and win probability is calculated
14 | #'     with respect to (wrt). Required.
15 | #' @param priors Optional list of priors. Defaults will be use otherwise.
16 | #' @param wrt_option_vr String: the option against which loss (value remaining)
17 | #'   is calculated. If NULL the best option will be used. (optional)
18 | #' @param loss_threshold The confidence interval specifying what the "worst case scenario" should be.
19 | #'     Defaults to 95\%. (optional)
20 | #' @param lift_threshold The confidence interval specifying how likely the lift is to be true.
21 | #'     Defaults to 70\%. (optional)
22 | #' @param metric string the type of loss.
23 | #'   absolute will be the difference, on the outcome scale. 0 when best = wrt_option
24 | #'   lift will be the (best - wrt_option) / wrt_option, 0 when best = wrt_option
25 | #'   relative_risk will be the ratio best/wrt_option, 1 when best = wrt_option
26 | #'
27 | #'
28 | #' @return A list with 4 named items: Win Probability, Value Remaining,
29 | #'     Lift vs Baseline, and Win Probability vs Baseline.
30 | #' @export
31 | #'
32 | #' @importFrom stats quantile
33 | #'
34 | #' @examples
35 | #' \dontrun{
36 | #' input_df <- data.frame(option_name = c("A", "B", "C"),
37 | #'     sum_clicks = c(1000, 1000, 1000),
38 | #'     sum_conversions = c(100, 120, 110), stringsAsFactors = FALSE)
39 | #' estimate_all_values(input_df, distribution = "conversion_rate", wrt_option_lift = "A")
40 | #' }
41 | #'
42 | estimate_all_values <- function(input_df, distribution, wrt_option_lift, priors = list(),
43 |                                 wrt_option_vr = NULL, loss_threshold = 0.95, lift_threshold = 0.7,
44 |                                 metric = "lift"){
45 |   validate_input_df(input_df, distribution)
46 | 
47 |   # Sample from posterior distribution
48 |   posterior_samples <- sample_from_posterior(input_df, distribution, priors)
49 | 
50 |   # Calculate Win Probability
51 |   win_prob <- estimate_win_prob_given_posterior(posterior_samples = posterior_samples,
52 |                                                 winner_is_max = is_winner_max(distribution))
53 | 
54 |   # Calculate Value Remaining
55 |   vr <- estimate_loss(posterior_samples = posterior_samples,
56 |                       distribution = distribution,
57 |                       wrt_option = wrt_option_vr,
58 |                       metric = metric) %>%
59 |     stats::quantile(probs = loss_threshold, na.rm = TRUE)
60 | 
61 |   # Calculate Lift Relative to Baseline
62 |   lift <- estimate_lift(posterior_samples = posterior_samples,
63 |                         distribution = distribution,
64 |                         wrt_option = wrt_option_lift,
65 |                         metric = metric) %>%
66 |     stats::quantile(probs = 1 - lift_threshold, na.rm = TRUE)
67 | 
68 |   # Calculate Win Prob vs Baseline
69 |   win_prob_vs_base <- estimate_win_prob_vs_baseline_given_posterior(posterior_samples,
70 |                                                                     distribution,
71 |                                                                     wrt_option_lift)
72 |   list(`Win Probability` = win_prob,
73 |        `Value Remaining` = vr,
74 |        `Lift vs Baseline` = lift,
75 |        `Win Probability vs Baseline` = win_prob_vs_base)
76 | }
77 | 


--------------------------------------------------------------------------------
/R/sample_multi_rev_per_session.R:
--------------------------------------------------------------------------------
 1 | #' Sample Multiple Revenue Per Session
 2 | #'
 3 | #' Adds 5 new nested columns to the input_df: `dirichlet_params`,
 4 | #'     `gamma_params_A`, `gamma_params_B`, and `samples`.
 5 | #'     This samples from multiple revenue per session distributions
 6 | #'     at once.
 7 | #'
 8 | #'
 9 | #' See update_rules vignette for a mathematical representation.
10 | #'
11 | #' \deqn{conversion_i ~ MultiNomial(\phi_1, \phi_2, ..., \phi_k)}
12 | #' \deqn{\phi_k ~ Dirichlet(\alpha, \beta)}
13 | #' Conversion Rate is sampled from a Dirichlet distribution with a Multinomial likelihood
14 | #' of an individual converting.
15 | #'
16 | #' @param input_df Dataframe containing option_name (str),
17 | #'     sum_conversions (dbl), sum_sessions (dbl), sum_revenue (dbl),
18 | #'     sum_conversion_2 (dbl), sum_sessions_2 (dbl), sum_revenue_2 (dbl).
19 | #' @param priors Optional list of priors alpha0 and beta0.
20 | #'     Default \eqn{Beta(1,1)} will be use otherwise.
21 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
22 | #'
23 | #' @importFrom purrr map map2 pmap
24 | #' @importFrom dplyr mutate select %>%
25 | #' @importFrom stats rgamma
26 | #' @importFrom rlang .data
27 | #'
28 | #' @return input_df with 4 new nested columns `dirichlet_params`,
29 | #'     `gamma_params_A`, `gamma_params_B`, and `samples`.
30 | #'     `samples` in each row should be a tibble of length `n_samples`.
31 | #'
32 | sample_multi_rev_per_session <- function(input_df, priors, n_samples = 5e4){
33 |   input_df %>%
34 |     dplyr::mutate(
35 |       no_clicks = .data$sum_sessions - .data$sum_conversions - .data$sum_conversions_2,
36 |       dirichlet_params = purrr::pmap(.l = list(alpha_0 = .data$no_clicks,
37 |                                                alpha_1 = .data$sum_conversions,
38 |                                                alpha_2 = .data$sum_conversions_2),
39 |                                   ~ update_dirichlet(...,
40 |                                                      priors = priors)
41 |       ),
42 |       gamma_params_A = purrr::map2(.x = .data$sum_conversions,
43 |                                    .y = .data$sum_revenue,
44 |                                    ~ update_gamma(k = .x,
45 |                                                   theta = .y,
46 |                                                   priors = priors)
47 |       ),
48 |       gamma_params_B = purrr::map2(.x = .data$sum_conversions_2,
49 |                                    .y = .data$sum_revenue_2,
50 |                                    ~ update_gamma(k = .x,
51 |                                                   theta = .y,
52 |                                                   priors = priors)
53 |       ),
54 |       dirichlet_samples = purrr::map(.data$dirichlet_params,
55 |                                      ~ rdirichlet(n_samples, alphas_list = .x)
56 |       ),
57 |       gamma_samples_A = purrr::map(.data$gamma_params_A,
58 |                                    ~ stats::rgamma(n_samples,
59 |                                             shape = .x$k,
60 |                                             scale = .x$theta)
61 |       ),
62 |       gamma_samples_B = purrr::map(.data$gamma_params_B,
63 |                                    ~ stats::rgamma(n_samples,
64 |                                             shape = .x$k,
65 |                                             scale = .x$theta)
66 |       ),
67 |       samples = purrr::pmap(.l = list(conv_rates = .data$dirichlet_samples,
68 |                                       inverse_rev_A = .data$gamma_samples_A,
69 |                                       inverse_rev_B = .data$gamma_samples_B),
70 |                             ~ calculate_multi_rev_per_session(...)
71 |       )
72 |     ) %>%
73 |     select(
74 |       "option_name",
75 |       "sum_sessions",
76 |       "sum_conversions",
77 |       "sum_conversions_2",
78 |       "sum_revenue",
79 |       "sum_revenue_2",
80 |       "dirichlet_params",
81 |       "gamma_params_A",
82 |       "gamma_params_B",
83 |       "samples",
84 |     )
85 | }
86 | 


--------------------------------------------------------------------------------
/R/sample_total_cm.R:
--------------------------------------------------------------------------------
  1 | #' Sample Total CM (Given Impression Count)
  2 | #'
  3 | #' @description
  4 | #' Adds 4 new nested columns to the input_df: `beta_params_ctr`,
  5 | #' `beta_params_conv`,`gamma_params_rev`, `gamma_params_cost`
  6 | #' and `samples`.
  7 | #'
  8 | #' @details
  9 | #'`beta_params` and `gamma_params` in each row should be a tibble of length 2
 10 | #' (\eqn{\alpha} and \eqn{\beta} params and \eqn{k} and \eqn{\theta} params).
 11 | #'`samples` in each row should be a tibble of length `n_samples`.
 12 | #'
 13 | #' One assumption in this model is that sum_impressions is not stochastic.
 14 | #' This assumes that Clicks are stochastically generated from a set number
 15 | #' of Impressions. It does not require that the number of impressions are
 16 | #' equal on either side. Generally this assumption holds true in marketing
 17 | #' tests where traffic is split 50/50 and very little variance is observed
 18 | #' in the number of impressions on either side.
 19 | #'
 20 | #'
 21 | #' See update_rules vignette for a mathematical representation.
 22 | #'
 23 | #' \deqn{TotalCM = Impr * ExpectedCTR * (RevPerOrder * OrdersPerClick - ExpectedCPC)}
 24 | #'
 25 | #'
 26 | #' @param input_df Dataframe containing option_name (str),
 27 | #'     sum_conversions (dbl), sum_revenue (dbl), and sum_clicks (dbl).
 28 | #' @param priors Optional list of priors {alpha0, beta0} for Beta,
 29 | #'     {k0, theta0} for Gamma Inverse Revenue, and {k01, theta01} for
 30 | #'     Gamma Cost (uses alternate priors so they can be different from Revenue).
 31 | #'     Default \eqn{Beta(1,1)} and \eqn{Gamma(1, 250)} will be use otherwise.
 32 | #' @param n_samples Optional integer value. Defaults to 50,000 samples.
 33 | #'
 34 | #' @importFrom purrr pmap map2
 35 | #' @importFrom dplyr mutate select %>%
 36 | #' @importFrom stats rgamma rbeta
 37 | #' @importFrom rlang .data
 38 | #'
 39 | #' @return input_df with 5 new nested columns `beta_params_conv`,
 40 | #'     `beta_params_ctr`, `gamma_params_rev`,`gamma_params_cost`,
 41 | #'     and `samples`
 42 | #'
 43 | sample_total_cm <- function(input_df, priors, n_samples = 5e4){
 44 |   input_df %>%
 45 |     dplyr::mutate(
 46 |       beta_params_conv = purrr::map2(.x = .data$sum_conversions,
 47 |                                      .y = .data$sum_clicks,
 48 |                                      ~ update_beta(alpha = .x,
 49 |                                                    beta = .y - .x,
 50 |                                                    priors = priors)
 51 |       ),
 52 |       beta_params_ctr = purrr::map2(.x = .data$sum_clicks,
 53 |                                     .y = .data$sum_impressions,
 54 |                                     ~ update_beta(alpha = .x,
 55 |                                                   beta = .y - .x,
 56 |                                                   priors = priors)
 57 |       ),
 58 |       gamma_params_rev = purrr::map2(.x = .data$sum_conversions,
 59 |                                      .y = .data$sum_revenue,
 60 |                                      ~ update_gamma(k = .x,
 61 |                                                     theta = .y,
 62 |                                                     priors = priors)
 63 |       ),
 64 |       gamma_params_cost = purrr::map2(.x = .data$sum_clicks,
 65 |                                       .y = .data$sum_cost,
 66 |                                       ~ update_gamma(k = .x,
 67 |                                                      theta = .y,
 68 |                                                      priors = priors,
 69 |                                                      alternate_priors = TRUE)
 70 |       ),
 71 |       rev_per_click_samples = purrr::map2(.x = .data$beta_params_conv,
 72 |                                           .y = .data$gamma_params_rev,
 73 |                                           ~ stats::rbeta(n_samples,
 74 |                                                          shape1 = .x$alpha,
 75 |                                                          shape2 = .x$beta) /
 76 |                                             stats::rgamma(n_samples,
 77 |                                                           shape = .y$k,
 78 |                                                           scale = .y$theta)
 79 |       ),
 80 |       cost_per_click_samples = purrr::map(.x = .data$gamma_params_cost,
 81 |                                           ~ 1 / stats::rgamma(n_samples,
 82 |                                                               shape = .x$k,
 83 |                                                               scale = .x$theta)
 84 |       ),
 85 |       expected_clicks_rates = purrr::map(.x = .data$beta_params_ctr,
 86 |                                             ~ stats::rbeta(n_samples,
 87 |                                                            shape1 = .x$alpha,
 88 |                                                            shape2 = .x$beta)
 89 |       ),
 90 |       # Expected CTR samples Times Fixed Impressions
 91 |       expected_clicks_samples = purrr::map2(.x = .data$expected_clicks_rates,
 92 |                                             .y = .data$sum_impressions,
 93 |                                              ~ .x * .y),
 94 |       samples = purrr::pmap(list(rev_per_click = .data$rev_per_click_samples,
 95 |                                  cost_per_click = .data$cost_per_click_samples,
 96 |                                  expected_clicks = .data$expected_clicks_samples),
 97 |                             ~ calculate_total_cm(...)
 98 |       )
 99 |     ) %>%
100 |     dplyr::select(
101 |       -"rev_per_click_samples",
102 |       -"cost_per_click_samples",
103 |       -"expected_clicks_rates",
104 |       -"expected_clicks_samples",
105 |       )
106 | }
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/vignettes/intro.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Intro Examples to grizbayr"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{start}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   \usepackage[utf8]{inputenc}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | ```{r setup, warning=FALSE, message=FALSE}
 18 | library(grizbayr)
 19 | library(dplyr)
 20 | ```
 21 | 
 22 | ## About the Package
 23 | 
 24 | Bayesian Inference is a method of statistical inference that can be used in the analysis of observed data from marketing tests. Bayesian updates start with a prior distribution (prior probable information about the environment) and a likelihood function (an expected distribution from which the samples are drawn). Then, given some observed data, the prior can be multiplied by the likelihood of the data to produce a posterior distribution of probabilities. At the core of all of this is Bayes' Rule. 
 25 | 
 26 | $$ P(A\ |\ Data) \sim P(Data\ |\ A) \cdot P(A)$$
 27 | This package is intended to abstract the math of the conjugate prior update rules to provide 3 pieces of information for a user:
 28 | 
 29 | 1. Win Probability (overall and vs baseline)
 30 | 1. Value Remaining
 31 | 1. Lift vs. Control
 32 | 
 33 | ## Usage
 34 | 
 35 | Select which piece of information you would like to calculate.
 36 | 
 37 | | Metric                       | Function Call                     |
 38 | |------------------------------|-----------------------------------|
 39 | | All Below Metrics            | `calculate_all_metrics()`         |
 40 | | Win Probability              | `estimate_win_prob()`             |
 41 | | Value Remaining              | `estimate_value_remaining()`      |
 42 | | Lift vs. Control             | `estimate_lift_vs_baseline()`     |
 43 | | Win Probability vs. Baseline | `estimate_win_prob_vs_baseline()` |
 44 | 
 45 | If you would like to calculate all the metrics then use `calculate_all_metrics()`. This is a slightly more efficient implementation since it only needs to sample from the posterior once for all 4 calculations instead of once for each metric.
 46 | 
 47 | ### Create an Input Dataframe or Tibble
 48 | 
 49 | All of these functions require a very specific tibble format. However, the same tibble can be used in all metric calculations. A tibble is used here because it has the additional check that all column lengths are the same. A tibble of this format can also conveniently be created using dplyr's `group_by() %>% summarise()` sequence of functions.
 50 | 
 51 | The columns in the following table are required if there is an `X` in the box for the distribution. (Int columns can also be dbl due to R coercian)
 52 | 
 53 | | Distribution Type         | option_name (char) | sum_impressions (int) | sum_clicks (int) | sum_sessions (int) | sum_conversions (dbl) | sum_revenue (dbl) | sum_cost (dbl) | sum_conversions_2 (dbl) | sum_revenue_2 (dbl) | sum_duration (dbl) | sum_page_views (int) |
 54 | |---------------------------|:------------------:|:---------------------:|:----------------:|:------------------:|:---------------------:|:-----------------:|:--------------:|:-----------------------:|:-------------------:|:------------------:|:--------------------:|
 55 | | Conversion Rate           |          X         |                       |         X        |                    |           X           |                   |                |                         |                     |                    |                      |
 56 | | Response Rate             |          X         |                       |                  |          X         |           X           |                   |                |                         |                     |                    |                      |
 57 | | Click Through Rate (CTR)  |          X         |           X           |         X        |                    |                       |                   |                |                         |                     |                    |                      |
 58 | | Revenue Per Session       |          X         |                       |                  |          X         |           X           |         X         |                |                         |                     |                    |                      |
 59 | | Multi Revenue Per Session |          X         |                       |                  |          X         |           X           |         X         |                |             X           |           X         |                    |                      |
 60 | | Cost Per Activation (CPA) |          X         |                       |         X        |                    |           X           |                   |        X       |                         |                     |                    |                      |
 61 | | Total CM                  |          X         |           X           |         X        |                    |           X           |         X         |        X       |                         |                     |                    |                      |
 62 | | CM Per Click              |          X         |                       |         X        |                    |           X           |         X         |        X       |                         |                     |                    |                      |
 63 | | Cost Per Click (CPC)      |          X         |                       |         X        |                    |                       |                   |        X       |                         |                     |                    |                      |
 64 | | Session Duration          |          X         |                       |                  |          X         |                       |                   |                |                         |                     |           X        |                      |
 65 | | Page Views Per Session    |          X         |                       |                  |          X         |                       |                   |                |                         |                     |                    |            X         |
 66 | 
 67 | #### Example:
 68 | We will use the Conversion Rate distribution for this example so we need the columns **option_name**, **sum_clicks**, and **sum_conversions**.
 69 | 
 70 | ```{r}
 71 | raw_data_long_format <- tibble::tribble(
 72 |    ~option_name, ~clicks, ~conversions,
 73 |             "A",       6,           3,
 74 |             "A",       1,           0,
 75 |             "B",       2,           1,
 76 |             "A",       2,           0,
 77 |             "A",       1,           0,
 78 |             "B",       5,           2,
 79 |             "A",       1,           0,
 80 |             "B",       1,           1,
 81 |             "B",       1,           0,
 82 |             "A",       3,           1,
 83 |             "B",       1,           0,
 84 |             "A",       1,           1
 85 | )
 86 | 
 87 | raw_data_long_format %>% 
 88 |   dplyr::group_by(option_name) %>% 
 89 |   dplyr::summarise(sum_clicks = sum(clicks), 
 90 |                    sum_conversions = sum(conversions))
 91 | ```
 92 | 
 93 | This input dataframe can also be created manually if the aggregations are already done in an external program.
 94 | 
 95 | ```{r}
 96 | # Since this is a stochastic process with a random number generator,
 97 | # it is worth setting the seed to get consistent results.
 98 | set.seed(1776)
 99 | 
100 | input_df <- tibble::tibble(
101 |   option_name = c("A", "B", "C"),
102 |   sum_clicks = c(1000, 1000, 1000),
103 |   sum_conversions = c(100, 120, 110)
104 | )
105 | input_df
106 | ```
107 | 
108 | One note: clicks or sessions must be greater than or equal to the number of conversions (this is a rate bound between 0 and 1).
109 | 
110 | `input_df` is used in the following examples. 
111 | 
112 | ### Estimate All Metrics
113 | 
114 | This function wraps all the below functions into one call.
115 | 
116 | ```{r}
117 | estimate_all_values(input_df, distribution = "conversion_rate", wrt_option_lift = "A")
118 | ```
119 | 
120 | 
121 | ### Win Probability
122 | 
123 | This produces a tibble with all the option names, the `win_prob_raw` so this can be used as a double, and a cleaned string `win_prob` where the decimal is represented as a percent. 
124 | 
125 | ```{r}
126 | estimate_win_prob(input_df, distribution = "conversion_rate")
127 | ```
128 | 
129 | ### Value Remaining (Loss)
130 | 
131 | Value Remaining is a measure of loss. If B is selected as the current best option, we can estimate with 95% confidence (default), that an alternative option is not more than X% worse than the current expected best option. 
132 | 
133 | ```{r}
134 | estimate_value_remaining(input_df, distribution = "conversion_rate")
135 | ```
136 | 
137 | This number can also be framed in absolute dollar terms (or percentage points in the case of a rate metric).
138 | 
139 | ```{r}
140 | estimate_value_remaining(input_df, distribution = "conversion_rate", metric = "absolute")
141 | ```
142 | 
143 | ### Estimate Lift
144 | 
145 | The `metric` argument defaults to `lift` which produces a percent lift vs the baseline. Sometimes we may want to understand this lift in absolute terms (especially when samples from the posteriors could be negative, such as Contribution Margin (CM).)
146 | 
147 | ```{r}
148 | estimate_lift_vs_baseline(input_df, distribution = "conversion_rate", wrt_option = "A")
149 | ```
150 | 
151 | ```{r}
152 | estimate_lift_vs_baseline(input_df, distribution = "conversion_rate", wrt_option = "A", metric = "absolute")
153 | ```
154 | 
155 | ### Win Probability vs. Baseline
156 | 
157 | This function is used to compare an individual option to the best option as opposed to the win probability of each option overall.
158 | 
159 | ```{r}
160 | estimate_win_prob_vs_baseline(input_df, distribution = "conversion_rate", wrt_option = "A")
161 | ```
162 | 
163 | ### Sample From the Posterior
164 | 
165 | Samples can be directly collected from the posterior with the following function.
166 | 
167 | ```{r}
168 | sample_from_posterior(input_df, distribution = "conversion_rate")
169 | ```
170 | 
171 | ## Alternate Distribution Type (Rev Per Session)
172 | ```{r}
173 | (input_df_rps <- tibble::tibble(
174 |    option_name = c("A", "B", "C"),
175 |    sum_sessions = c(1000, 1000, 1000),
176 |    sum_conversions = c(100, 120, 110),
177 |    sum_revenue = c(900, 1200, 1150)
178 | ))
179 | 
180 | estimate_all_values(input_df_rps, distribution = "rev_per_session", wrt_option_lift = "A")
181 | ```
182 | 
183 | 
184 | 
185 | ## Valid Posteriors
186 | 
187 | You may want to pass alternate priors to a distribution. 
188 | Only do this if you are making an informed decision.
189 | 
190 | ```
191 | Beta - alpha0, beta0
192 | Gamma - k0, theta0 (k01, theta01 if alternate Gamma priors are required)
193 | Dirichlet - alpha_00 (none), alpha_01 (first conversion type), alpha_02 (alternate conversion type)
194 | ```
195 | 
196 | ```{r}
197 | # You can also pass priors for just the Beta distribution and not the Gamma distribution.
198 | new_priors <- list(alpha0 = 2, beta0 = 10, k0 = 3, theta0 = 10000)
199 | estimate_all_values(input_df_rps, distribution = "rev_per_session", wrt_option_lift = "A", priors = new_priors)
200 | ```
201 | 
202 | ## Looping Over All Distributions
203 | 
204 | You may want to evaluate the results of a test in multiple different distributions.
205 | 
206 | ```{r}
207 | (input_df_all <- tibble::tibble(
208 |    option_name = c("A", "B", "C"),
209 |    sum_impressions = c(10000, 9000, 11000),
210 |    sum_sessions = c(1000, 1000, 1000),
211 |    sum_conversions = c(100, 120, 110),
212 |    sum_revenue = c(900, 1200, 1150),
213 |    sum_cost = c(10, 50, 30),
214 |    sum_conversions_2 = c(10, 8, 20),
215 |    sum_revenue_2 = c(10, 16, 15)
216 | ) %>% 
217 |   dplyr::mutate(sum_clicks = sum_sessions)) # Clicks are the same as Sessions
218 | 
219 | distributions <- c("conversion_rate", "response_rate", "ctr", "rev_per_session", "multi_rev_per_session", "cpa", "total_cm", "cm_per_click", "cpc")
220 | 
221 | # Purrr map allows us to apply a function to each element of a list. (Similar to a for loop)
222 | purrr::map(distributions,
223 |            ~ estimate_all_values(input_df_all,
224 |                                  distribution = .x,
225 |                                  wrt_option_lift = "A",
226 |                                  metric = "absolute")
227 | )
228 | ```
229 | 
230 | 


--------------------------------------------------------------------------------