├── .github
    └── workflows
    │   ├── coverage.yml
    │   ├── release.yml
    │   └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── bayesian_testing
    ├── __init__.py
    ├── experiments
    │   ├── __init__.py
    │   ├── base.py
    │   ├── binary.py
    │   ├── delta_lognormal.py
    │   ├── delta_normal.py
    │   ├── discrete.py
    │   ├── exponential.py
    │   ├── normal.py
    │   └── poisson.py
    ├── metrics
    │   ├── __init__.py
    │   ├── evaluation.py
    │   └── posteriors.py
    └── utilities
    │   ├── __init__.py
    │   ├── common.py
    │   ├── logging.conf
    │   └── logging.py
├── codecov.yml
├── examples
    ├── README.md
    ├── data
    │   └── session_data.csv
    ├── dice_rolls_ab_testing.ipynb
    ├── goals_scored_ab_testing.ipynb
    ├── session_data_ab_testing.ipynb
    ├── session_data_manual_pbbs.ipynb
    └── waiting_time_ab_testing.ipynb
├── poetry.lock
├── pyproject.toml
└── tests
    ├── README.md
    ├── test_binary.py
    ├── test_delta_lognormal.py
    ├── test_delta_normal.py
    ├── test_discrete.py
    ├── test_evaluation.py
    ├── test_exponential.py
    ├── test_normal.py
    ├── test_poisson.py
    ├── test_posteriors.py
    └── test_validators.py


/.github/workflows/coverage.yml:
--------------------------------------------------------------------------------
 1 | name: Coverage
 2 | on: push
 3 | 
 4 | jobs:
 5 |   coverage:
 6 |     runs-on: ubuntu-latest
 7 |     steps:
 8 |       - name: Check out repository code
 9 |         uses: actions/checkout@v2
10 |       - name: Set up Python
11 |         uses: actions/setup-python@v1
12 |         with:
13 |           python-version: '3.10'
14 |       - name: Install dependencies
15 |         run: |
16 |           python -m pip install --upgrade pip
17 |           pip install poetry==2.*
18 |           poetry install
19 |       - name: Test with pytest
20 |         run: |
21 |           poetry run coverage run -m pytest
22 |           poetry run coverage report -i
23 |           poetry run coverage xml -i
24 |       - name: Upload coverage to Codecov
25 |         uses: codecov/codecov-action@v2
26 |         with:
27 |           fail_ci_if_error: true
28 |           token: ${{ secrets.CODECOV_TOKEN }}
29 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | on:
 3 |   release:
 4 |     types: [published]
 5 | jobs:
 6 |   release:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |     - uses: actions/checkout@v2
10 |     - uses: actions/setup-python@v1
11 |       with:
12 |         python-version: '3.10'
13 |         architecture: x64
14 |     - run: python -m pip install --upgrade pip
15 |     - run: pip install poetry==2.*
16 |     - run: poetry install
17 |     - run: poetry run coverage run -m pytest
18 |     - run: poetry build
19 |     - run: poetry publish --username=__token__ --password=${{ secrets.PYPI_TOKEN }}
20 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | on:
 3 |   pull_request:
 4 |     branches:
 5 |       - main
 6 | 
 7 | jobs:
 8 |   tests:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Check out repository code
12 |         uses: actions/checkout@v2
13 |       - name: Set up Python
14 |         uses: actions/setup-python@v1
15 |         with:
16 |           python-version: '3.10'
17 |       - name: Install dependencies
18 |         run: |
19 |           python -m pip install --upgrade pip
20 |           pip install poetry==2.*
21 |           poetry install
22 |       - name: Test with pytest
23 |         run: |
24 |           poetry run coverage run -m pytest
25 |           poetry run coverage report -i
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # other
132 | .DS_Store
133 | .idea
134 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.5.0
 4 |     hooks:
 5 |       - id: check-yaml
 6 |       - id: end-of-file-fixer
 7 |       - id: trailing-whitespace
 8 |   - repo: https://github.com/psf/black
 9 |     rev: 22.3.0
10 |     hooks:
11 |       - id: black
12 |         args: [--line-length=100]
13 |   - repo: https://github.com/pycqa/flake8
14 |     rev: 6.1.0
15 |     hooks:
16 |       - id: flake8
17 |         args: [--max-line-length=100]
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Matus Baniar
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Tests](https://github.com/Matt52/bayesian-testing/workflows/Tests/badge.svg)](https://github.com/Matt52/bayesian-testing/actions?workflow=Tests)
  2 | [![Codecov](https://codecov.io/gh/Matt52/bayesian-testing/branch/main/graph/badge.svg)](https://codecov.io/gh/Matt52/bayesian-testing)
  3 | [![PyPI](https://img.shields.io/pypi/v/bayesian-testing.svg)](https://pypi.org/project/bayesian-testing/)
  4 | # Bayesian A/B testing
  5 | `bayesian_testing` is a small package for a quick evaluation of A/B (or A/B/C/...) tests using
  6 | Bayesian approach.
  7 | 
  8 | **Implemented tests:**
  9 | - [BinaryDataTest](bayesian_testing/experiments/binary.py)
 10 |   - **_Input data_** - binary data (`[0, 1, 0, ...]`)
 11 |   - Designed for conversion-like data A/B testing.
 12 | - [NormalDataTest](bayesian_testing/experiments/normal.py)
 13 |   - **_Input data_** - normal data with unknown variance
 14 |   - Designed for normal data A/B testing.
 15 | - [DeltaLognormalDataTest](bayesian_testing/experiments/delta_lognormal.py)
 16 |   - **_Input data_** - lognormal data with zeros
 17 |   - Designed for revenue-like data A/B testing.
 18 | - [DeltaNormalDataTest](bayesian_testing/experiments/delta_normal.py)
 19 |   - **_Input data_** - normal data with zeros
 20 |   - Designed for profit-like data A/B testing.
 21 | - [DiscreteDataTest](bayesian_testing/experiments/discrete.py)
 22 |   - **_Input data_** - categorical data with numerical categories
 23 |   - Designed for discrete data A/B testing (e.g. dice rolls, star ratings, 1-10 ratings, etc.).
 24 | - [PoissonDataTest](bayesian_testing/experiments/poisson.py)
 25 |   - **_Input data_** - non-negative integers (`[1, 0, 3, ...]`)
 26 |   - Designed for poisson data A/B testing.
 27 | - [ExponentialDataTest](bayesian_testing/experiments/exponential.py)
 28 |   - **_Input data_** - exponential data (non-negative real numbers)
 29 |   - Designed for exponential data A/B testing (e.g. session/waiting time, time between events,
 30 | etc.).
 31 | 
 32 | **Implemented evaluation metrics:**
 33 | - `Posterior Mean`
 34 |   - Expected value from the posterior distribution for a given variant.
 35 | - `Credible Interval`
 36 |   - Quantile-based credible intervals based on simulations from posterior distributions (i.e.
 37 | empirical).
 38 |   - Interval probability (`interval_alpha`) can be set during the evaluation (default value is 95%).
 39 | - `Probability of Being Best`
 40 |   - Probability that a given variant is best among all variants.
 41 |   - By default, `the best` is equivalent to `the greatest` (from a data/metric point of view),
 42 | however it is possible to change this by using `min_is_best=True` in the evaluation method
 43 | (this can be useful if we try to find the variant with the smallest tested measure).
 44 | - `Expected Loss`
 45 |   - "Risk" of choosing particular variant over other variants in the test.
 46 |   - Measured in same units as a tested measure (e.g. positive rate or average value).
 47 | 
 48 | `Credible Interval`, `Probability of Being Best` and `Expected Loss` are calculated using
 49 | simulations from posterior distributions (considering given data).
 50 | 
 51 | 
 52 | ## Installation
 53 | `bayesian_testing` can be installed using pip:
 54 | ```console
 55 | pip install bayesian_testing
 56 | ```
 57 | Alternatively, you can clone the repository and use `poetry` manually:
 58 | ```console
 59 | cd bayesian_testing
 60 | pip install poetry
 61 | poetry install
 62 | poetry shell
 63 | ```
 64 | 
 65 | ## Basic Usage
 66 | The primary features are classes:
 67 | - `BinaryDataTest`
 68 | - `NormalDataTest`
 69 | - `DeltaLognormalDataTest`
 70 | - `DeltaNormalDataTest`
 71 | - `DiscreteDataTest`
 72 | - `PoissonDataTest`
 73 | - `ExponentialDataTest`
 74 | 
 75 | All test classes support two methods to insert the data:
 76 | - `add_variant_data` - Adding raw data for a variant as a list of observations (or numpy 1-D array).
 77 | - `add_variant_data_agg` - Adding aggregated variant data (this can be practical for a large data,
 78 | as the aggregation can be done already on a database level).
 79 | 
 80 | Both methods for adding data allow specification of prior distributions
 81 | (see details in respective docstrings). Default prior setup should be sufficient for most of the
 82 | cases (e.g. cases with unknown priors or large amounts of data).
 83 | 
 84 | To get the results of the test, simply call the method `evaluate`.
 85 | 
 86 | Probability of being best, expected loss and credible intervals are approximated using simulations,
 87 | hence the `evaluate` method can return slightly different values for different runs. To stabilize
 88 | it, you can  set the `sim_count` parameter of the `evaluate` to a higher value (default value is
 89 | 20K), or even use the `seed` parameter to fix it completely.
 90 | 
 91 | ### BinaryDataTest
 92 | Class for a Bayesian A/B test for the binary-like data (e.g. conversions, successes, etc.).
 93 | 
 94 | **Example:**
 95 | ```python
 96 | import numpy as np
 97 | from bayesian_testing.experiments import BinaryDataTest
 98 | 
 99 | # generating some random data
100 | rng = np.random.default_rng(52)
101 | # random 1x1500 array of 0/1 data with 5.2% probability for 1:
102 | data_a = rng.binomial(n=1, p=0.052, size=1500)
103 | # random 1x1200 array of 0/1 data with 6.7% probability for 1:
104 | data_b = rng.binomial(n=1, p=0.067, size=1200)
105 | 
106 | # initialize a test:
107 | test = BinaryDataTest()
108 | 
109 | # add variant using raw data (arrays of zeros and ones):
110 | test.add_variant_data("A", data_a)
111 | test.add_variant_data("B", data_b)
112 | # priors can be specified like this (default for this test is a=b=1/2):
113 | # test.add_variant_data("B", data_b, a_prior=1, b_prior=20)
114 | 
115 | # add variant using aggregated data (same as raw data with 950 zeros and 50 ones):
116 | test.add_variant_data_agg("C", totals=1000, positives=50)
117 | 
118 | # evaluate test:
119 | results = test.evaluate()
120 | results
121 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
122 | ```
123 | 
124 |     +-------------------+-----------+-------------+-------------+
125 |     |                   | A         | B           | C           |
126 |     +===================+===========+=============+=============+
127 |     | totals            | 1500      | 1200        | 1000        |
128 |     +-------------------+-----------+-------------+-------------+
129 |     | positives         | 80        | 80          | 50          |
130 |     +-------------------+-----------+-------------+-------------+
131 |     | positive_rate     | 0.05333   | 0.06667     | 0.05        |
132 |     +-------------------+-----------+-------------+-------------+
133 |     | posterior_mean    | 0.05363   | 0.06703     | 0.05045     |
134 |     +-------------------+-----------+-------------+-------------+
135 |     | credible_interval | [0.04284, | [0.0535309, | [0.0379814, |
136 |     |                   | 0.065501] | 0.0816476]  | 0.0648625]  |
137 |     +-------------------+-----------+-------------+-------------+
138 |     | prob_being_best   | 0.06485   | 0.89295     | 0.0422      |
139 |     +-------------------+-----------+-------------+-------------+
140 |     | expected_loss     | 0.0139248 | 0.0004693   | 0.0170767   |
141 |     +-------------------+-----------+-------------+-------------+
142 | 
143 | ### NormalDataTest
144 | Class for a Bayesian A/B test for the normal data.
145 | 
146 | **Example:**
147 | ```python
148 | import numpy as np
149 | from bayesian_testing.experiments import NormalDataTest
150 | 
151 | # generating some random data
152 | rng = np.random.default_rng(21)
153 | data_a = rng.normal(7.2, 2, 1000)
154 | data_b = rng.normal(7.1, 2, 800)
155 | data_c = rng.normal(7.0, 4, 500)
156 | 
157 | # initialize a test:
158 | test = NormalDataTest()
159 | 
160 | # add variant using raw data:
161 | test.add_variant_data("A", data_a)
162 | test.add_variant_data("B", data_b)
163 | # test.add_variant_data("C", data_c)
164 | 
165 | # add variant using aggregated data:
166 | test.add_variant_data_agg("C", len(data_c), sum(data_c), sum(np.square(data_c)))
167 | 
168 | # evaluate test:
169 | results = test.evaluate(sim_count=20000, seed=52, min_is_best=False, interval_alpha=0.99)
170 | results
171 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
172 | ```
173 | 
174 |     +-------------------+-------------+-------------+-------------+
175 |     |                   | A           | B           | C           |
176 |     +===================+=============+=============+=============+
177 |     | totals            | 1000        | 800         | 500         |
178 |     +-------------------+-------------+-------------+-------------+
179 |     | sum_values        | 7294.67901  | 5685.86168  | 3736.91581  |
180 |     +-------------------+-------------+-------------+-------------+
181 |     | avg_values        | 7.29468     | 7.10733     | 7.47383     |
182 |     +-------------------+-------------+-------------+-------------+
183 |     | posterior_mean    | 7.29462     | 7.10725     | 7.4737      |
184 |     +-------------------+-------------+-------------+-------------+
185 |     | credible_interval | [7.1359436, | [6.9324733, | [7.0240102, |
186 |     |                   | 7.4528369]  | 7.2779293]  | 7.9379341]  |
187 |     +-------------------+-------------+-------------+-------------+
188 |     | prob_being_best   | 0.1707      | 0.00125     | 0.82805     |
189 |     +-------------------+-------------+-------------+-------------+
190 |     | expected_loss     | 0.1968735   | 0.385112    | 0.0169998   |
191 |     +-------------------+-------------+-------------+-------------+
192 | 
193 | ### DeltaLognormalDataTest
194 | Class for a Bayesian A/B test for the delta-lognormal data (log-normal with zeros).
195 | Delta-lognormal data is typical case of revenue per session data where many sessions have 0 revenue
196 | but non-zero values are positive values with possible log-normal distribution.
197 | To handle this data, the calculation is combining binary Bayes model for zero vs non-zero
198 | "conversions" and log-normal model for non-zero values.
199 | 
200 | **Example:**
201 | ```python
202 | import numpy as np
203 | from bayesian_testing.experiments import DeltaLognormalDataTest
204 | 
205 | test = DeltaLognormalDataTest()
206 | 
207 | data_a = [7.1, 0.3, 5.9, 0, 1.3, 0.3, 0, 1.2, 0, 3.6, 0, 1.5,
208 |           2.2, 0, 4.9, 0, 0, 1.1, 0, 0, 7.1, 0, 6.9, 0]
209 | data_b = [4.0, 0, 3.3, 19.3, 18.5, 0, 0, 0, 12.9, 0, 0, 0, 10.2,
210 |           0, 0, 23.1, 0, 3.7, 0, 0, 11.3, 10.0, 0, 18.3, 12.1]
211 | 
212 | # adding variant using raw data:
213 | test.add_variant_data("A", data_a)
214 | # test.add_variant_data("B", data_b)
215 | 
216 | # alternatively, variant can be also added using aggregated data
217 | # (looks more complicated, but it can be quite handy for a large data):
218 | test.add_variant_data_agg(
219 |     name="B",
220 |     totals=len(data_b),
221 |     positives=sum(x > 0 for x in data_b),
222 |     sum_values=sum(data_b),
223 |     sum_logs=sum([np.log(x) for x in data_b if x > 0]),
224 |     sum_logs_2=sum([np.square(np.log(x)) for x in data_b if x > 0])
225 | )
226 | 
227 | # evaluate test:
228 | results = test.evaluate(seed=21)
229 | results
230 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
231 | ```
232 | 
233 |     +---------------------+-------------+-------------+
234 |     |                     | A           | B           |
235 |     +=====================+=============+=============+
236 |     | totals              | 24          | 25          |
237 |     +---------------------+-------------+-------------+
238 |     | positives           | 13          | 12          |
239 |     +---------------------+-------------+-------------+
240 |     | sum_values          | 43.4        | 146.7       |
241 |     +---------------------+-------------+-------------+
242 |     | avg_values          | 1.80833     | 5.868       |
243 |     +---------------------+-------------+-------------+
244 |     | avg_positive_values | 3.33846     | 12.225      |
245 |     +---------------------+-------------+-------------+
246 |     | posterior_mean      | 2.09766     | 6.19017     |
247 |     +---------------------+-------------+-------------+
248 |     | credible_interval   | [0.9884509, | [3.3746212, |
249 |     |                     | 6.9054963]  | 11.7349253] |
250 |     +---------------------+-------------+-------------+
251 |     | prob_being_best     | 0.04815     | 0.95185     |
252 |     +---------------------+-------------+-------------+
253 |     | expected_loss       | 4.0941101   | 0.1588627   |
254 |     +---------------------+-------------+-------------+
255 | 
256 | ***Note**: Alternatively, `DeltaNormalDataTest` can be used for a case when conversions are not
257 | necessarily positive values.*
258 | 
259 | ### DiscreteDataTest
260 | Class for a Bayesian A/B test for the discrete data with finite number of numerical categories
261 | (states), representing some value.
262 | This test can be used for instance for dice rolls data (when looking for the "best" of multiple
263 | dice) or rating data (e.g. 1-5 stars or 1-10 scale).
264 | 
265 | **Example:**
266 | ```python
267 | from bayesian_testing.experiments import DiscreteDataTest
268 | 
269 | # dice rolls data for 3 dice - A, B, C
270 | data_a = [2, 5, 1, 4, 6, 2, 2, 6, 3, 2, 6, 3, 4, 6, 3, 1, 6, 3, 5, 6]
271 | data_b = [1, 2, 2, 2, 2, 3, 2, 3, 4, 2]
272 | data_c = [1, 3, 6, 5, 4]
273 | 
274 | # initialize a test with all possible states (i.e. numerical categories):
275 | test = DiscreteDataTest(states=[1, 2, 3, 4, 5, 6])
276 | 
277 | # add variant using raw data:
278 | test.add_variant_data("A", data_a)
279 | test.add_variant_data("B", data_b)
280 | test.add_variant_data("C", data_c)
281 | 
282 | # add variant using aggregated data:
283 | # test.add_variant_data_agg("C", [1, 0, 1, 1, 1, 1]) # equivalent to rolls in data_c
284 | 
285 | # evaluate test:
286 | results = test.evaluate(sim_count=20000, seed=52, min_is_best=False, interval_alpha=0.95)
287 | results
288 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
289 | ```
290 | 
291 |     +-------------------+------------------+------------------+------------------+
292 |     |                   | A                | B                | C                |
293 |     +===================+==================+==================+==================+
294 |     | concentration     | {1: 2.0, 2: 4.0, | {1: 1.0, 2: 6.0, | {1: 1.0, 2: 0.0, |
295 |     |                   | 3: 4.0, 4: 2.0,  | 3: 2.0, 4: 1.0,  | 3: 1.0, 4: 1.0,  |
296 |     |                   | 5: 2.0, 6: 6.0}  | 5: 0.0, 6: 0.0}  | 5: 1.0, 6: 1.0}  |
297 |     +-------------------+------------------+------------------+------------------+
298 |     | average_value     | 3.8              | 2.3              | 3.8              |
299 |     +-------------------+------------------+------------------+------------------+
300 |     | posterior_mean    | 3.73077          | 2.75             | 3.63636          |
301 |     +-------------------+------------------+------------------+------------------+
302 |     | credible_interval | [3.0710797,      | [2.1791584,      | [2.6556465,      |
303 |     |                   | 4.3888021]       | 3.4589178]       | 4.5784839]       |
304 |     +-------------------+------------------+------------------+------------------+
305 |     | prob_being_best   | 0.54685          | 0.008            | 0.44515          |
306 |     +-------------------+------------------+------------------+------------------+
307 |     | expected_loss     | 0.199953         | 1.1826766        | 0.2870247        |
308 |     +-------------------+------------------+------------------+------------------+
309 | 
310 | ### PoissonDataTest
311 | Class for a Bayesian A/B test for the poisson data.
312 | 
313 | **Example:**
314 | ```python
315 | from bayesian_testing.experiments import PoissonDataTest
316 | 
317 | # goals received - so less is better (duh...)
318 | psg_goals_against = [0, 2, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 3, 1, 0]
319 | city_goals_against = [0, 0, 3, 2, 0, 1, 0, 3, 0, 1, 1, 0, 1, 2]
320 | bayern_goals_against = [1, 0, 0, 1, 1, 2, 1, 0, 2, 0, 0, 2, 2, 1, 0]
321 | 
322 | # initialize a test:
323 | test = PoissonDataTest()
324 | 
325 | # add variant using raw data:
326 | test.add_variant_data('psg', psg_goals_against)
327 | 
328 | # example with specific priors
329 | # ("b_prior" as an effective sample size, and "a_prior/b_prior" as a prior mean):
330 | test.add_variant_data('city', city_goals_against, a_prior=3, b_prior=1)
331 | # test.add_variant_data('bayern', bayern_goals_against)
332 | 
333 | # add variant using aggregated data:
334 | test.add_variant_data_agg("bayern", len(bayern_goals_against), sum(bayern_goals_against))
335 | 
336 | # evaluate test (since fewer goals is better, we explicitly set the min_is_best to True)
337 | results = test.evaluate(sim_count=20000, seed=52, min_is_best=True)
338 | results
339 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
340 | ```
341 | 
342 |     +-------------------+-------------+-------------+------------+
343 |     |                   | psg         | city        | bayern     |
344 |     +===================+=============+=============+============+
345 |     | totals            | 15          | 14          | 15         |
346 |     +-------------------+-------------+-------------+------------+
347 |     | sum_values        | 9           | 14          | 13         |
348 |     +-------------------+-------------+-------------+------------+
349 |     | observed_average  | 0.6         | 1.0         | 0.86667    |
350 |     +-------------------+-------------+-------------+------------+
351 |     | posterior_mean    | 0.60265     | 1.13333     | 0.86755    |
352 |     +-------------------+-------------+-------------+------------+
353 |     | credible_interval | [0.2800848, | [0.6562029, | [0.465913, |
354 |     |                   | 1.0570327]  | 1.7265045]  | 1.3964389] |
355 |     +-------------------+-------------+-------------+------------+
356 |     | prob_being_best   | 0.78175     | 0.0344      | 0.18385    |
357 |     +-------------------+-------------+-------------+------------+
358 |     | expected_loss     | 0.0369998   | 0.5620553   | 0.3003345  |
359 |     +-------------------+-------------+-------------+------------+
360 | 
361 | _note: Since we set `min_is_best=True` (because received goals are "bad"), probability and loss are
362 | in a favor of variants with lower posterior means._
363 | 
364 | ### ExponentialDataTest
365 | Class for a Bayesian A/B test for the exponential data.
366 | 
367 | **Example:**
368 | ```python
369 | import numpy as np
370 | from bayesian_testing.experiments import ExponentialDataTest
371 | 
372 | # waiting times for 3 different variants, each with many observations,
373 | # generated using exponential distributions with defined scales (expected values)
374 | waiting_times_a = np.random.exponential(scale=10, size=200)
375 | waiting_times_b = np.random.exponential(scale=11, size=210)
376 | waiting_times_c = np.random.exponential(scale=11, size=220)
377 | 
378 | # initialize a test:
379 | test = ExponentialDataTest()
380 | # adding variants using the observation data:
381 | test.add_variant_data('A', waiting_times_a)
382 | test.add_variant_data('B', waiting_times_b)
383 | test.add_variant_data('C', waiting_times_c)
384 | 
385 | # alternatively, add variants using aggregated data:
386 | # test.add_variant_data_agg('A', len(waiting_times_a), sum(waiting_times_a))
387 | 
388 | # evaluate test (since a lower waiting time is better, we set the min_is_best to True)
389 | results = test.evaluate(sim_count=20000, min_is_best=True)
390 | results
391 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
392 | ```
393 | 
394 |     +-------------------+-------------+-------------+-------------+
395 |     |                   | A           | B           | C           |
396 |     +===================+=============+=============+=============+
397 |     | totals            | 200         | 210         | 220         |
398 |     +-------------------+-------------+-------------+-------------+
399 |     | sum_values        | 1827.81709  | 2217.46016  | 2160.73134  |
400 |     +-------------------+-------------+-------------+-------------+
401 |     | observed_average  | 9.13909     | 10.55933    | 9.82151     |
402 |     +-------------------+-------------+-------------+-------------+
403 |     | posterior_mean    | 9.13502     | 10.55478    | 9.8175      |
404 |     +-------------------+-------------+-------------+-------------+
405 |     | credible_interval | [7.994178,  | [9.2543372, | [8.6184821, |
406 |     |                   | 10.5410967] | 12.1527256] | 11.2566538] |
407 |     +-------------------+-------------+-------------+-------------+
408 |     | prob_being_best   | 0.7456      | 0.0405      | 0.2139      |
409 |     +-------------------+-------------+-------------+-------------+
410 |     | expected_loss     | 0.1428729   | 1.5674747   | 0.8230728   |
411 |     +-------------------+-------------+-------------+-------------+
412 | 
413 | ## Development
414 | To set up a development environment, use [Poetry](https://python-poetry.org/)
415 | and [pre-commit](https://pre-commit.com):
416 | ```console
417 | pip install poetry
418 | poetry install
419 | poetry run pre-commit install
420 | ```
421 | 
422 | ## To be implemented
423 | 
424 | Additional metrics:
425 | - `Potential Value Remaining`
426 | 
427 | ## References
428 | - `bayesian_testing` package itself depends only on [numpy](https://numpy.org) package.
429 | - Work on this package (including default priors selection) was inspired mainly by a Coursera
430 | course [Bayesian Statistics: From Concept to Data Analysis](https://www.coursera.org/learn/bayesian-statistics).
431 | 


--------------------------------------------------------------------------------
/bayesian_testing/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from importlib.metadata import version, PackageNotFoundError  # type: ignore
 3 | except ImportError:  # pragma: no cover
 4 |     from importlib_metadata import version, PackageNotFoundError  # type: ignore
 5 | 
 6 | try:
 7 |     __version__ = version(__name__)
 8 | except PackageNotFoundError:  # pragma: no cover
 9 |     __version__ = "unknown"
10 | 


--------------------------------------------------------------------------------
/bayesian_testing/experiments/__init__.py:
--------------------------------------------------------------------------------
 1 | from .binary import BinaryDataTest
 2 | from .normal import NormalDataTest
 3 | from .delta_lognormal import DeltaLognormalDataTest
 4 | from .discrete import DiscreteDataTest
 5 | from .poisson import PoissonDataTest
 6 | from .delta_normal import DeltaNormalDataTest
 7 | from .exponential import ExponentialDataTest
 8 | 
 9 | __all__ = [
10 |     "BinaryDataTest",
11 |     "NormalDataTest",
12 |     "DeltaLognormalDataTest",
13 |     "DeltaNormalDataTest",
14 |     "DiscreteDataTest",
15 |     "PoissonDataTest",
16 |     "ExponentialDataTest",
17 | ]
18 | 


--------------------------------------------------------------------------------
/bayesian_testing/experiments/base.py:
--------------------------------------------------------------------------------
  1 | from typing import Tuple
  2 | import warnings
  3 | 
  4 | 
  5 | class BaseDataTest:
  6 |     """
  7 |     Base class for Bayesian A/B test.
  8 |     """
  9 | 
 10 |     def __init__(self) -> None:
 11 |         """
 12 |         Initialize BaseDataTest class.
 13 |         """
 14 |         self.data = {}
 15 | 
 16 |     @property
 17 |     def variant_names(self):
 18 |         return [k for k in self.data]
 19 | 
 20 |     def eval_simulation(
 21 |         self,
 22 |         sim_count: int = 20000,
 23 |         seed: int = None,
 24 |         min_is_best: bool = False,
 25 |         interval_alpha: float = 0.95,
 26 |     ) -> Tuple[dict, dict, dict]:
 27 |         """
 28 |         Should be implemented in each individual experiment.
 29 |         """
 30 |         raise NotImplementedError
 31 | 
 32 |     def probabs_of_being_best(
 33 |         self,
 34 |         sim_count: int = 20000,
 35 |         seed: int = None,
 36 |         min_is_best: bool = False,
 37 |         interval_alpha: float = 0.95,
 38 |     ) -> dict:
 39 |         """
 40 |         Calculate probabilities of being best for a current class state.
 41 | 
 42 |         Parameters
 43 |         ----------
 44 |         sim_count : Number of simulations to be used for probability estimation.
 45 |         seed : Random seed.
 46 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 47 |         interval_alpha : Credible interval probability (value between 0 and 1).
 48 | 
 49 |         Returns
 50 |         -------
 51 |         pbbs : Dictionary with probabilities of being best for all variants in experiment.
 52 |         """
 53 |         pbbs, loss, intervals = self.eval_simulation(sim_count, seed, min_is_best, interval_alpha)
 54 | 
 55 |         return pbbs
 56 | 
 57 |     def expected_loss(
 58 |         self,
 59 |         sim_count: int = 20000,
 60 |         seed: int = None,
 61 |         min_is_best: bool = False,
 62 |         interval_alpha: float = 0.95,
 63 |     ) -> dict:
 64 |         """
 65 |         Calculate expected loss for a current class state.
 66 | 
 67 |         Parameters
 68 |         ----------
 69 |         sim_count : Number of simulations to be used for probability estimation.
 70 |         seed : Random seed.
 71 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 72 |         interval_alpha : Credible interval probability (value between 0 and 1).
 73 | 
 74 |         Returns
 75 |         -------
 76 |         loss : Dictionary with expected loss for all variants in experiment.
 77 |         """
 78 |         pbbs, loss, intervals = self.eval_simulation(sim_count, seed, min_is_best, interval_alpha)
 79 | 
 80 |         return loss
 81 | 
 82 |     def credible_intervals(
 83 |         self,
 84 |         sim_count: int = 20000,
 85 |         seed: int = None,
 86 |         min_is_best: bool = False,
 87 |         interval_alpha: float = 0.95,
 88 |     ) -> dict:
 89 |         """
 90 |         Calculate quantile-based credible intervals for a current class state.
 91 | 
 92 |         Parameters
 93 |         ----------
 94 |         sim_count : Number of simulations to be used for probability estimation.
 95 |         seed : Random seed.
 96 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 97 |         interval_alpha : Credible interval probability (value between 0 and 1).
 98 | 
 99 |         Returns
100 |         -------
101 |         intervals : Dictionary with quantile-based credible intervals for all variants.
102 |         """
103 |         pbbs, loss, intervals = self.eval_simulation(sim_count, seed, min_is_best, interval_alpha)
104 | 
105 |         return intervals
106 | 
107 |     def delete_variant(self, name: str) -> None:
108 |         """
109 |         Delete variant and all its data from experiment.
110 | 
111 |         Parameters
112 |         ----------
113 |         name : Variant name.
114 |         """
115 |         if not isinstance(name, str):
116 |             raise ValueError("Variant name has to be a string.")
117 |         if name not in self.variant_names:
118 |             warnings.warn(f"Nothing to be deleted. Variant {name} is not in experiment.")
119 |         else:
120 |             del self.data[name]
121 | 


--------------------------------------------------------------------------------
/bayesian_testing/experiments/binary.py:
--------------------------------------------------------------------------------
  1 | from numbers import Number
  2 | from typing import List, Tuple
  3 | 
  4 | from bayesian_testing.experiments.base import BaseDataTest
  5 | from bayesian_testing.metrics import eval_bernoulli_agg
  6 | from bayesian_testing.utilities import get_logger
  7 | 
  8 | logger = get_logger("bayesian_testing")
  9 | 
 10 | 
 11 | class BinaryDataTest(BaseDataTest):
 12 |     """
 13 |     Class for Bayesian A/B test for binary-like data (conversions, successes, etc.).
 14 | 
 15 |     After class initialization, use add_variant methods to insert variant data.
 16 |     Then to get results of the test, use for instance `evaluate` method.
 17 |     """
 18 | 
 19 |     def __init__(self) -> None:
 20 |         """
 21 |         Initialize BinaryDataTest class.
 22 |         """
 23 |         super().__init__()
 24 | 
 25 |     @property
 26 |     def totals(self):
 27 |         return [self.data[k]["totals"] for k in self.data]
 28 | 
 29 |     @property
 30 |     def positives(self):
 31 |         return [self.data[k]["positives"] for k in self.data]
 32 | 
 33 |     @property
 34 |     def a_priors(self):
 35 |         return [self.data[k]["a_prior"] for k in self.data]
 36 | 
 37 |     @property
 38 |     def b_priors(self):
 39 |         return [self.data[k]["b_prior"] for k in self.data]
 40 | 
 41 |     def eval_simulation(
 42 |         self,
 43 |         sim_count: int = 20000,
 44 |         seed: int = None,
 45 |         min_is_best: bool = False,
 46 |         interval_alpha: float = 0.95,
 47 |     ) -> Tuple[dict, dict, dict]:
 48 |         """
 49 |         Calculate probabilities of being best, expected loss and credible intervals for a current
 50 |         class state.
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         sim_count : Number of simulations to be used for probability estimation.
 55 |         seed : Random seed.
 56 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 57 |         interval_alpha : Credible interval probability (value between 0 and 1).
 58 | 
 59 |         Returns
 60 |         -------
 61 |         res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
 62 |         res_loss : Dictionary with expected loss for all variants in experiment.
 63 |         res_intervals : Dictionary with quantile-based credible intervals for all variants.
 64 |         """
 65 |         pbbs, loss, intervals = eval_bernoulli_agg(
 66 |             self.totals,
 67 |             self.positives,
 68 |             self.a_priors,
 69 |             self.b_priors,
 70 |             sim_count,
 71 |             seed,
 72 |             min_is_best,
 73 |             interval_alpha,
 74 |         )
 75 |         res_pbbs = dict(zip(self.variant_names, pbbs))
 76 |         res_loss = dict(zip(self.variant_names, loss))
 77 |         res_intervals = dict(zip(self.variant_names, intervals))
 78 | 
 79 |         return res_pbbs, res_loss, res_intervals
 80 | 
 81 |     def evaluate(
 82 |         self,
 83 |         sim_count: int = 20000,
 84 |         seed: int = None,
 85 |         min_is_best: bool = False,
 86 |         interval_alpha: float = 0.95,
 87 |     ) -> List[dict]:
 88 |         """
 89 |         Evaluation of experiment.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         sim_count : Number of simulations to be used for probability estimation.
 94 |         seed : Random seed.
 95 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 96 |         interval_alpha : Credible interval probability (value between 0 and 1).
 97 | 
 98 |         Returns
 99 |         -------
100 |         res : List of dictionaries with results per variant.
101 |         """
102 |         keys = [
103 |             "variant",
104 |             "totals",
105 |             "positives",
106 |             "positive_rate",
107 |             "posterior_mean",
108 |             "credible_interval",
109 |             "prob_being_best",
110 |             "expected_loss",
111 |         ]
112 |         positive_rate = [round(i[0] / i[1], 5) for i in zip(self.positives, self.totals)]
113 |         posterior_mean = [
114 |             round((i[2] + i[0]) / (i[2] + i[3] + i[1]), 5)
115 |             for i in zip(self.positives, self.totals, self.a_priors, self.b_priors)
116 |         ]
117 |         eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
118 |             sim_count, seed, min_is_best, interval_alpha
119 |         )
120 |         pbbs = list(eval_pbbs.values())
121 |         loss = list(eval_loss.values())
122 |         intervals = list(eval_intervals.values())
123 |         data = [
124 |             self.variant_names,
125 |             self.totals,
126 |             self.positives,
127 |             positive_rate,
128 |             posterior_mean,
129 |             intervals,
130 |             pbbs,
131 |             loss,
132 |         ]
133 |         res = [dict(zip(keys, item)) for item in zip(*data)]
134 | 
135 |         return res
136 | 
137 |     def add_variant_data_agg(
138 |         self,
139 |         name: str,
140 |         totals: int,
141 |         positives: int,
142 |         a_prior: Number = 0.5,
143 |         b_prior: Number = 0.5,
144 |         replace: bool = True,
145 |     ) -> None:
146 |         """
147 |         Add variant data to test class using aggregated binary data.
148 |         This can be convenient as aggregation can be done on database level.
149 | 
150 |         Default prior setup is set for Beta(1/2, 1/2) which is non-information prior.
151 | 
152 |         Parameters
153 |         ----------
154 |         name : Variant name.
155 |         totals : Total number of experiment observations (e.g. number of sessions).
156 |         positives : Total number of 1s for a given variant (e.g. number of conversions).
157 |         a_prior : Prior alpha parameter of a Beta distribution (conjugate prior).
158 |             Default value 0.5 is based on non-information prior Beta(0.5, 0.5).
159 |         b_prior : Prior beta parameter of a Beta distribution (conjugate prior).
160 |             Default value 0.5 is based on non-information prior Beta(0.5, 0.5).
161 |         replace : Replace data if variant already exists.
162 |             If set to False, data of existing variant will be appended to existing data.
163 |         """
164 |         if not isinstance(name, str):
165 |             raise ValueError("Variant name has to be a string.")
166 |         if a_prior <= 0 or b_prior <= 0:
167 |             raise ValueError("Both [a_prior, b_prior] have to be positive numbers.")
168 |         if totals <= 0:
169 |             raise ValueError("Input variable 'totals' is expected to be positive integer.")
170 |         if positives < 0:
171 |             raise ValueError("Input variable 'positives' is expected to be non-negative integer.")
172 |         if totals < positives:
173 |             raise ValueError("Not possible to have more positives that totals!")
174 | 
175 |         if name not in self.variant_names:
176 |             self.data[name] = {
177 |                 "totals": totals,
178 |                 "positives": positives,
179 |                 "a_prior": a_prior,
180 |                 "b_prior": b_prior,
181 |             }
182 |         elif name in self.variant_names and replace:
183 |             msg = (
184 |                 f"Variant {name} already exists - new data is replacing it. "
185 |                 "If you wish to append instead, use replace=False."
186 |             )
187 |             logger.info(msg)
188 |             self.data[name] = {
189 |                 "totals": totals,
190 |                 "positives": positives,
191 |                 "a_prior": a_prior,
192 |                 "b_prior": b_prior,
193 |             }
194 |         elif name in self.variant_names and not replace:
195 |             msg = (
196 |                 f"Variant {name} already exists - new data is appended to variant, "
197 |                 "keeping its original prior setup. "
198 |                 "If you wish to replace data instead, use replace=True."
199 |             )
200 |             logger.info(msg)
201 |             self.data[name]["totals"] += totals
202 |             self.data[name]["positives"] += positives
203 | 
204 |     def add_variant_data(
205 |         self,
206 |         name: str,
207 |         data: List[int],
208 |         a_prior: Number = 0.5,
209 |         b_prior: Number = 0.5,
210 |         replace: bool = True,
211 |     ) -> None:
212 |         """
213 |         Add variant data to test class using raw binary data.
214 | 
215 |         Default prior setup is set for Beta(1/2, 1/2) which is non-information prior.
216 | 
217 |         Parameters
218 |         ----------
219 |         name : Variant name.
220 |         data : List of binary data containing zeros (non-conversion) and ones (conversions).
221 |         a_prior : Prior alpha parameter of a Beta distribution (conjugate prior).
222 |             Default value 0.5 is based on non-information prior Beta(0.5, 0.5).
223 |         b_prior : Prior beta parameter of a Beta distribution (conjugate prior).
224 |             Default value 0.5 is based on non-information prior Beta(0.5, 0.5).
225 |         replace : Replace data if variant already exists.
226 |             If set to False, data of existing variant will be appended to existing data.
227 |         """
228 |         if len(data) == 0:
229 |             raise ValueError("Data of added variant needs to have some observations.")
230 |         if not min([i in [0, 1] for i in data]):
231 |             raise ValueError("Input data needs to be a list of zeros and ones.")
232 | 
233 |         totals = len(data)
234 |         positives = sum(data)
235 | 
236 |         self.add_variant_data_agg(name, totals, positives, a_prior, b_prior, replace)
237 | 


--------------------------------------------------------------------------------
/bayesian_testing/experiments/delta_lognormal.py:
--------------------------------------------------------------------------------
  1 | from numbers import Number
  2 | from typing import List, Tuple
  3 | 
  4 | import numpy as np
  5 | 
  6 | from bayesian_testing.experiments.base import BaseDataTest
  7 | from bayesian_testing.metrics import eval_delta_lognormal_agg
  8 | from bayesian_testing.utilities import get_logger
  9 | 
 10 | logger = get_logger("bayesian_testing")
 11 | 
 12 | 
 13 | class DeltaLognormalDataTest(BaseDataTest):
 14 |     """
 15 |     Class for Bayesian A/B test for Delta-LogNormal data (Log-Normal with possible zeros).
 16 |     Delta-lognormal data is typical case of revenue/session data where many
 17 |     sessions are with 0 revenue (meaning non-conversions).
 18 |     To handle this data, the evaluation methods are combining binary bayes model for
 19 |     zero vs non-zero "conversion" and log-normal model for non-zero values.
 20 | 
 21 |     After class initialization, use add_variant methods to insert variant data.
 22 |     Then to get results of the test, use for instance `evaluate` method.
 23 |     """
 24 | 
 25 |     def __init__(self) -> None:
 26 |         """
 27 |         Initialize DeltaLognormalDataTest class.
 28 |         """
 29 |         super().__init__()
 30 | 
 31 |     @property
 32 |     def totals(self):
 33 |         return [self.data[k]["totals"] for k in self.data]
 34 | 
 35 |     @property
 36 |     def positives(self):
 37 |         return [self.data[k]["positives"] for k in self.data]
 38 | 
 39 |     @property
 40 |     def sum_values(self):
 41 |         return [self.data[k]["sum_values"] for k in self.data]
 42 | 
 43 |     @property
 44 |     def sum_logs(self):
 45 |         return [self.data[k]["sum_logs"] for k in self.data]
 46 | 
 47 |     @property
 48 |     def sum_logs_2(self):
 49 |         return [self.data[k]["sum_logs_2"] for k in self.data]
 50 | 
 51 |     @property
 52 |     def a_priors_beta(self):
 53 |         return [self.data[k]["a_prior_beta"] for k in self.data]
 54 | 
 55 |     @property
 56 |     def b_priors_beta(self):
 57 |         return [self.data[k]["b_prior_beta"] for k in self.data]
 58 | 
 59 |     @property
 60 |     def m_priors(self):
 61 |         return [self.data[k]["m_prior"] for k in self.data]
 62 | 
 63 |     @property
 64 |     def a_priors_ig(self):
 65 |         return [self.data[k]["a_prior_ig"] for k in self.data]
 66 | 
 67 |     @property
 68 |     def b_priors_ig(self):
 69 |         return [self.data[k]["b_prior_ig"] for k in self.data]
 70 | 
 71 |     @property
 72 |     def w_priors(self):
 73 |         return [self.data[k]["w_prior"] for k in self.data]
 74 | 
 75 |     def eval_simulation(
 76 |         self,
 77 |         sim_count: int = 20000,
 78 |         seed: int = None,
 79 |         min_is_best: bool = False,
 80 |         interval_alpha: float = 0.95,
 81 |     ) -> Tuple[dict, dict, dict]:
 82 |         """
 83 |         Calculate probabilities of being best, expected loss and credible intervals for a current
 84 |         class state.
 85 | 
 86 |         Parameters
 87 |         ----------
 88 |         sim_count : Number of simulations to be used for probability estimation.
 89 |         seed : Random seed.
 90 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 91 |         interval_alpha : Credible interval probability (value between 0 and 1).
 92 | 
 93 |         Returns
 94 |         -------
 95 |         res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
 96 |         res_loss : Dictionary with expected loss for all variants in experiment.
 97 |         res_intervals : Dictionary with quantile-based credible intervals for all variants.
 98 |         """
 99 |         pbbs, loss, intervals = eval_delta_lognormal_agg(
100 |             self.totals,
101 |             self.positives,
102 |             self.sum_logs,
103 |             self.sum_logs_2,
104 |             sim_count=sim_count,
105 |             a_priors_beta=self.a_priors_beta,
106 |             b_priors_beta=self.b_priors_beta,
107 |             m_priors=self.m_priors,
108 |             a_priors_ig=self.a_priors_ig,
109 |             b_priors_ig=self.b_priors_ig,
110 |             w_priors=self.w_priors,
111 |             seed=seed,
112 |             min_is_best=min_is_best,
113 |             interval_alpha=interval_alpha,
114 |         )
115 |         res_pbbs = dict(zip(self.variant_names, pbbs))
116 |         res_loss = dict(zip(self.variant_names, loss))
117 |         res_intervals = dict(zip(self.variant_names, intervals))
118 | 
119 |         return res_pbbs, res_loss, res_intervals
120 | 
121 |     def evaluate(
122 |         self,
123 |         sim_count: int = 20000,
124 |         seed: int = None,
125 |         min_is_best: bool = False,
126 |         interval_alpha: float = 0.95,
127 |     ) -> List[dict]:
128 |         """
129 |         Evaluation of experiment.
130 | 
131 |         Parameters
132 |         ----------
133 |         sim_count : Number of simulations to be used for probability estimation.
134 |         seed : Random seed.
135 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
136 |         interval_alpha : Credible interval probability (value between 0 and 1).
137 | 
138 |         Returns
139 |         -------
140 |         res : List of dictionaries with results per variant.
141 |         """
142 |         keys = [
143 |             "variant",
144 |             "totals",
145 |             "positives",
146 |             "sum_values",
147 |             "avg_values",
148 |             "avg_positive_values",
149 |             "posterior_mean",
150 |             "credible_interval",
151 |             "prob_being_best",
152 |             "expected_loss",
153 |         ]
154 |         avg_values = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.totals)]
155 |         avg_pos_values = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.positives)]
156 |         a_posterior_ig = [i[0] + (i[1] / 2) for i in zip(self.a_priors_ig, self.positives)]
157 |         x_ig = [i[0] / i[1] for i in zip(self.sum_logs, self.positives)]
158 |         b_posterior_ig = [
159 |             (
160 |                 i[6]
161 |                 + (1 / 2) * (i[1] - 2 * i[0] * i[3] + i[2] * (i[3] ** 2))
162 |                 + ((i[2] * i[5]) / (2 * (i[2] + i[5]))) * ((i[3] - i[4]) ** 2)
163 |             )
164 |             for i in zip(
165 |                 self.sum_logs,
166 |                 self.sum_logs_2,
167 |                 self.positives,
168 |                 x_ig,
169 |                 self.m_priors,
170 |                 self.w_priors,
171 |                 self.b_priors_ig,
172 |             )
173 |         ]
174 |         posterior_mean = [
175 |             round(
176 |                 np.exp(((i[0] + i[3] * i[4]) / (i[1] + i[4])) + i[8] / (2 * i[7]))
177 |                 * ((i[5] + i[1]) / (i[6] + i[2])),
178 |                 5,
179 |             )
180 |             for i in zip(
181 |                 self.sum_logs,
182 |                 self.positives,
183 |                 self.totals,
184 |                 self.m_priors,
185 |                 self.w_priors,
186 |                 self.a_priors_beta,
187 |                 self.b_priors_beta,
188 |                 a_posterior_ig,
189 |                 b_posterior_ig,
190 |             )
191 |         ]
192 |         eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
193 |             sim_count, seed, min_is_best, interval_alpha
194 |         )
195 |         pbbs = list(eval_pbbs.values())
196 |         loss = list(eval_loss.values())
197 |         intervals = list(eval_intervals.values())
198 |         data = [
199 |             self.variant_names,
200 |             self.totals,
201 |             self.positives,
202 |             [round(i, 5) for i in self.sum_values],
203 |             avg_values,
204 |             avg_pos_values,
205 |             posterior_mean,
206 |             intervals,
207 |             pbbs,
208 |             loss,
209 |         ]
210 |         res = [dict(zip(keys, item)) for item in zip(*data)]
211 | 
212 |         return res
213 | 
214 |     def add_variant_data_agg(
215 |         self,
216 |         name: str,
217 |         totals: int,
218 |         positives: int,
219 |         sum_values: float,
220 |         sum_logs: float,
221 |         sum_logs_2: float,
222 |         a_prior_beta: Number = 0.5,
223 |         b_prior_beta: Number = 0.5,
224 |         m_prior: Number = 1,
225 |         a_prior_ig: Number = 0,
226 |         b_prior_ig: Number = 0,
227 |         w_prior: Number = 0.01,
228 |         replace: bool = True,
229 |     ) -> None:
230 |         """
231 |         Add variant data to test class using aggregated Delta-LogNormal data.
232 |         This can be convenient as aggregation can be done on database level.
233 | 
234 |         The goal of default prior setup is to be low information.
235 |         It should be tuned with caution.
236 | 
237 |         Parameters
238 |         ----------
239 |         name : Variant name.
240 |         totals : Total number of experiment observations (e.g. number of sessions).
241 |         positives : Total number of non-zero values for a given variant.
242 |         sum_values : Sum of non-zero values for a given variant.
243 |         sum_logs : Sum of logarithms of non-zero data values for a given variant.
244 |         sum_logs_2 : Sum of logarithms squrared of non-zero data values for a given variant.
245 |         a_prior_beta : Prior alpha parameter from Beta distribution for conversion part.
246 |         b_prior_beta : Prior beta parameter from Beta distribution for conversion part.
247 |         m_prior : Prior normal mean for logarithms of non-zero data.
248 |         a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance of logarithms.
249 |             In theory a > 0, but as we always have at least one observation, we can start at 0.
250 |         b_prior_ig : Prior beta from inverse gamma dist. for unknown variance of logarithms.
251 |             In theory b > 0, but as we always have at least one observation, we can start at 0.
252 |         w_prior : Prior effective sample sizes for normal distribution of logarithms of data.
253 |         replace : Replace data if variant already exists.
254 |             If set to False, data of existing variant will be appended to existing data.
255 |         """
256 |         if not isinstance(name, str):
257 |             raise ValueError("Variant name has to be a string.")
258 |         if a_prior_beta <= 0 or b_prior_beta <= 0:
259 |             raise ValueError("Both [a_prior_beta, b_prior_beta] have to be positive numbers.")
260 |         if m_prior < 0 or a_prior_ig < 0 or b_prior_ig < 0 or w_prior < 0:
261 |             raise ValueError("All priors of [m, a_ig, b_ig, w] have to be non-negative numbers.")
262 |         if positives == 0:
263 |             raise ValueError("Variant has to have some non-zero (positive) values.")
264 |         if positives < 0:
265 |             raise ValueError("Input variable 'positives' is expected to be a positive integer.")
266 |         if totals < positives:
267 |             raise ValueError("Not possible to have more positives that totals!")
268 | 
269 |         if name not in self.variant_names:
270 |             self.data[name] = {
271 |                 "totals": totals,
272 |                 "positives": positives,
273 |                 "sum_values": sum_values,
274 |                 "sum_logs": sum_logs,
275 |                 "sum_logs_2": sum_logs_2,
276 |                 "a_prior_beta": a_prior_beta,
277 |                 "b_prior_beta": b_prior_beta,
278 |                 "m_prior": m_prior,
279 |                 "a_prior_ig": a_prior_ig,
280 |                 "b_prior_ig": b_prior_ig,
281 |                 "w_prior": w_prior,
282 |             }
283 |         elif name in self.variant_names and replace:
284 |             msg = (
285 |                 f"Variant {name} already exists - new data is replacing it. "
286 |                 "If you wish to append instead, use replace=False."
287 |             )
288 |             logger.info(msg)
289 |             self.data[name] = {
290 |                 "totals": totals,
291 |                 "positives": positives,
292 |                 "sum_values": sum_values,
293 |                 "sum_logs": sum_logs,
294 |                 "sum_logs_2": sum_logs_2,
295 |                 "a_prior_beta": a_prior_beta,
296 |                 "b_prior_beta": b_prior_beta,
297 |                 "m_prior": m_prior,
298 |                 "a_prior_ig": a_prior_ig,
299 |                 "b_prior_ig": b_prior_ig,
300 |                 "w_prior": w_prior,
301 |             }
302 |         elif name in self.variant_names and not replace:
303 |             msg = (
304 |                 f"Variant {name} already exists - new data is appended to variant, "
305 |                 "keeping its original prior setup. "
306 |                 "If you wish to replace data instead, use replace=True."
307 |             )
308 |             logger.info(msg)
309 |             self.data[name]["totals"] += totals
310 |             self.data[name]["positives"] += positives
311 |             self.data[name]["sum_values"] += sum_values
312 |             self.data[name]["sum_logs"] += sum_logs
313 |             self.data[name]["sum_logs_2"] += sum_logs_2
314 | 
315 |     def add_variant_data(
316 |         self,
317 |         name: str,
318 |         data: List[Number],
319 |         a_prior_beta: Number = 0.5,
320 |         b_prior_beta: Number = 0.5,
321 |         m_prior: Number = 1,
322 |         a_prior_ig: Number = 0,
323 |         b_prior_ig: Number = 0,
324 |         w_prior: Number = 0.01,
325 |         replace: bool = True,
326 |     ) -> None:
327 |         """
328 |         Add variant data to test class using raw Delta-LogNormal data.
329 | 
330 |         The goal of default prior setup is to be low information. It should be tuned with caution.
331 | 
332 |         Parameters
333 |         ----------
334 |         name : Variant name.
335 |         data : List of delta-lognormal data (e.g. revenues in sessions).
336 |         a_prior_beta : Prior alpha parameter from Beta distribution for conversion part.
337 |         b_prior_beta : Prior beta parameter from Beta distribution for conversion part.
338 |         m_prior : Prior mean for logarithms of non-zero data.
339 |         a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance of logarithms.
340 |             In theory a > 0, but as we always have at least one observation, we can start at 0.
341 |         b_prior_ig : Prior beta from inverse gamma dist. for unknown variance of logarithms.
342 |             In theory b > 0, but as we always have at least one observation, we can start at 0.
343 |         w_prior : Prior effective sample sizes for normal distribution of logarithms of data.
344 |         replace : Replace data if variant already exists.
345 |             If set to False, data of existing variant will be appended to existing data.
346 |         """
347 |         if len(data) == 0:
348 |             raise ValueError("Data of added variant needs to have some observations.")
349 |         if min(data) < 0:
350 |             raise ValueError("Input data needs to be a list of non-negative numbers.")
351 | 
352 |         totals = len(data)
353 |         positives = sum(x > 0 for x in data)
354 |         sum_values = sum(data)
355 |         sum_logs = sum([np.log(x) for x in data if x > 0])
356 |         sum_logs_2 = sum([np.square(np.log(x)) for x in data if x > 0])
357 | 
358 |         self.add_variant_data_agg(
359 |             name,
360 |             totals,
361 |             positives,
362 |             sum_values,
363 |             sum_logs,
364 |             sum_logs_2,
365 |             a_prior_beta,
366 |             b_prior_beta,
367 |             m_prior,
368 |             a_prior_ig,
369 |             b_prior_ig,
370 |             w_prior,
371 |             replace,
372 |         )
373 | 


--------------------------------------------------------------------------------
/bayesian_testing/experiments/delta_normal.py:
--------------------------------------------------------------------------------
  1 | from numbers import Number
  2 | from typing import List, Tuple
  3 | import numpy as np
  4 | from bayesian_testing.experiments.base import BaseDataTest
  5 | from bayesian_testing.metrics import eval_delta_normal_agg
  6 | from bayesian_testing.utilities import get_logger
  7 | 
  8 | logger = get_logger("bayesian_testing")
  9 | 
 10 | 
 11 | class DeltaNormalDataTest(BaseDataTest):
 12 |     """
 13 |     Class for Bayesian A/B test for Delta-Normal data (Normally distributed conversions).
 14 |     Delta-normal data is typical case of net profit data where many sessions have 0 values
 15 |     (meaning non-conversions), and the remaining revenue data is normally distributed.
 16 |     To handle this data, the evaluation methods are combining binary bayes model
 17 |     for zero vs non-zero “conversion” and normal model.
 18 | 
 19 |     After class initialization, use add_variant methods to insert variant data.
 20 |     Then to get results of the test, use for instance `evaluate` method.
 21 |     """
 22 | 
 23 |     def __init__(self) -> None:
 24 |         """
 25 |         Initialize DeltaNormalDataTest class.
 26 |         """
 27 |         super().__init__()
 28 | 
 29 |     @property
 30 |     def totals(self):
 31 |         return [self.data[k]["totals"] for k in self.data]
 32 | 
 33 |     @property
 34 |     def non_zeros(self):
 35 |         return [self.data[k]["non_zeros"] for k in self.data]
 36 | 
 37 |     @property
 38 |     def sum_values(self):
 39 |         return [self.data[k]["sum_values"] for k in self.data]
 40 | 
 41 |     @property
 42 |     def sum_values_2(self):
 43 |         return [self.data[k]["sum_values_2"] for k in self.data]
 44 | 
 45 |     @property
 46 |     def a_priors_beta(self):
 47 |         return [self.data[k]["a_prior_beta"] for k in self.data]
 48 | 
 49 |     @property
 50 |     def b_priors_beta(self):
 51 |         return [self.data[k]["b_prior_beta"] for k in self.data]
 52 | 
 53 |     @property
 54 |     def m_priors(self):
 55 |         return [self.data[k]["m_prior"] for k in self.data]
 56 | 
 57 |     @property
 58 |     def a_priors_ig(self):
 59 |         return [self.data[k]["a_prior_ig"] for k in self.data]
 60 | 
 61 |     @property
 62 |     def b_priors_ig(self):
 63 |         return [self.data[k]["b_prior_ig"] for k in self.data]
 64 | 
 65 |     @property
 66 |     def w_priors(self):
 67 |         return [self.data[k]["w_prior"] for k in self.data]
 68 | 
 69 |     def eval_simulation(
 70 |         self,
 71 |         sim_count: int = 20000,
 72 |         seed: int = None,
 73 |         min_is_best: bool = False,
 74 |         interval_alpha: float = 0.95,
 75 |     ) -> Tuple[dict, dict, dict]:
 76 |         """
 77 |         Calculate probabilities of being best, expected loss and credible intervals for a current
 78 |         class state.
 79 | 
 80 |         Parameters
 81 |         ----------
 82 |         sim_count : Number of simulations to be used for probability estimation.
 83 |         seed : Random seed.
 84 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 85 |         interval_alpha : Credible interval probability (value between 0 and 1).
 86 | 
 87 |         Returns
 88 |         -------
 89 |         res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
 90 |         res_loss : Dictionary with expected loss for all variants in experiment.
 91 |         res_intervals : Dictionary with quantile-based credible intervals for all variants.
 92 |         """
 93 |         pbbs, loss, intervals = eval_delta_normal_agg(
 94 |             self.totals,
 95 |             self.non_zeros,
 96 |             self.sum_values,
 97 |             self.sum_values_2,
 98 |             sim_count=sim_count,
 99 |             a_priors_beta=self.a_priors_beta,
100 |             b_priors_beta=self.b_priors_beta,
101 |             m_priors=self.m_priors,
102 |             a_priors_ig=self.a_priors_ig,
103 |             b_priors_ig=self.b_priors_ig,
104 |             w_priors=self.w_priors,
105 |             seed=seed,
106 |             min_is_best=min_is_best,
107 |             interval_alpha=interval_alpha,
108 |         )
109 |         res_pbbs = dict(zip(self.variant_names, pbbs))
110 |         res_loss = dict(zip(self.variant_names, loss))
111 |         res_intervals = dict(zip(self.variant_names, intervals))
112 | 
113 |         return res_pbbs, res_loss, res_intervals
114 | 
115 |     def evaluate(
116 |         self,
117 |         sim_count: int = 20000,
118 |         seed: int = None,
119 |         min_is_best: bool = False,
120 |         interval_alpha: float = 0.95,
121 |     ) -> List[dict]:
122 |         """
123 |         Evaluation of experiment.
124 | 
125 |         Parameters
126 |         ----------
127 |         sim_count : Number of simulations to be used for probability estimation.
128 |         seed : Random seed.
129 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
130 |         interval_alpha : Credible interval probability (value between 0 and 1).
131 | 
132 |         Returns
133 |         -------
134 |         res : List of dictionaries with results per variant.
135 |         """
136 |         keys = [
137 |             "variant",
138 |             "totals",
139 |             "non_zeros",
140 |             "sum_values",
141 |             "avg_values",
142 |             "avg_non_zero_values",
143 |             "posterior_mean",
144 |             "credible_interval",
145 |             "prob_being_best",
146 |             "expected_loss",
147 |         ]
148 |         avg_values = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.totals)]
149 |         avg_pos_values = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.non_zeros)]
150 |         posterior_mean = [
151 |             round(((i[0] + i[3] * i[4]) / (i[1] + i[4])) * ((i[5] + i[1]) / (i[6] + i[2])), 5)
152 |             for i in zip(
153 |                 self.sum_values,
154 |                 self.non_zeros,
155 |                 self.totals,
156 |                 self.m_priors,
157 |                 self.w_priors,
158 |                 self.a_priors_beta,
159 |                 self.b_priors_beta,
160 |             )
161 |         ]
162 |         eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
163 |             sim_count, seed, min_is_best, interval_alpha
164 |         )
165 |         pbbs = list(eval_pbbs.values())
166 |         loss = list(eval_loss.values())
167 |         intervals = list(eval_intervals.values())
168 |         data = [
169 |             self.variant_names,
170 |             self.totals,
171 |             self.non_zeros,
172 |             [round(i, 5) for i in self.sum_values],
173 |             avg_values,
174 |             avg_pos_values,
175 |             posterior_mean,
176 |             intervals,
177 |             pbbs,
178 |             loss,
179 |         ]
180 |         res = [dict(zip(keys, item)) for item in zip(*data)]
181 | 
182 |         return res
183 | 
184 |     def add_variant_data_agg(
185 |         self,
186 |         name: str,
187 |         totals: int,
188 |         non_zeros: int,
189 |         sum_values: float,
190 |         sum_values_2: float,
191 |         a_prior_beta: Number = 0.5,
192 |         b_prior_beta: Number = 0.5,
193 |         m_prior: Number = 1,
194 |         a_prior_ig: Number = 0,
195 |         b_prior_ig: Number = 0,
196 |         w_prior: Number = 0.01,
197 |         replace: bool = True,
198 |     ) -> None:
199 |         """
200 |         Add variant data to test class using aggregated Delta-Normal data.
201 |         This can be convenient as aggregation can be done on database level.
202 | 
203 |         The goal of default prior setup is to be low information.
204 |         It should be tuned with caution.
205 | 
206 |         Parameters
207 |         ----------
208 |         name : Variant name.
209 |         totals : Total number of experiment observations (e.g. number of sessions).
210 |         non_zeros : Total number of non-zero values for a given variant.
211 |         sum_values : Sum of non-zero values for a given variant.
212 |         sum_values_2 : Sum of values squared for a given variant.
213 |         a_prior_beta : Prior alpha parameter from Beta distribution for conversion part.
214 |         b_prior_beta : Prior beta parameter from Beta distribution for conversion part.
215 |         m_prior : Prior normal mean.
216 |         a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance.
217 |             In theory a > 0, but as we always have at least one observation, we can start at 0.
218 |         b_prior_ig : Prior beta from inverse gamma dist. for unknown variance.
219 |             In theory b > 0, but as we always have at least one observation, we can start at 0.
220 |         w_prior : Prior effective sample sizes.
221 |         replace : Replace data if variant already exists.
222 |             If set to False, data of existing variant will be appended to existing data.
223 |         """
224 |         if not isinstance(name, str):
225 |             raise ValueError("Variant name has to be a string.")
226 |         if a_prior_beta <= 0 or b_prior_beta <= 0:
227 |             raise ValueError("Both [a_prior_beta, b_prior_beta] have to be positive numbers.")
228 |         if m_prior < 0 or a_prior_ig < 0 or b_prior_ig < 0 or w_prior < 0:
229 |             raise ValueError("All priors of [m, a_ig, b_ig, w] have to be non-negative numbers.")
230 |         if non_zeros == 0:
231 |             raise ValueError("Variant has to have some non-zero values.")
232 |         if non_zeros < 0:
233 |             raise ValueError("Input variable 'non_zeros' is expected to be positive integer.")
234 |         if totals < non_zeros:
235 |             raise ValueError("Not possible to have more non_zero numbers that totals!")
236 | 
237 |         if name not in self.variant_names:
238 |             self.data[name] = {
239 |                 "totals": totals,
240 |                 "non_zeros": non_zeros,
241 |                 "sum_values": sum_values,
242 |                 "sum_values_2": sum_values_2,
243 |                 "a_prior_beta": a_prior_beta,
244 |                 "b_prior_beta": b_prior_beta,
245 |                 "m_prior": m_prior,
246 |                 "a_prior_ig": a_prior_ig,
247 |                 "b_prior_ig": b_prior_ig,
248 |                 "w_prior": w_prior,
249 |             }
250 |         elif name in self.variant_names and replace:
251 |             msg = (
252 |                 f"Variant {name} already exists - new data is replacing it. "
253 |                 "If you wish to append instead, use replace=False."
254 |             )
255 |             logger.info(msg)
256 |             self.data[name] = {
257 |                 "totals": totals,
258 |                 "non_zeros": non_zeros,
259 |                 "sum_values": sum_values,
260 |                 "sum_values_2": sum_values_2,
261 |                 "a_prior_beta": a_prior_beta,
262 |                 "b_prior_beta": b_prior_beta,
263 |                 "m_prior": m_prior,
264 |                 "a_prior_ig": a_prior_ig,
265 |                 "b_prior_ig": b_prior_ig,
266 |                 "w_prior": w_prior,
267 |             }
268 |         elif name in self.variant_names and not replace:
269 |             msg = (
270 |                 f"Variant {name} already exists - new data is appended to variant, "
271 |                 "keeping its original prior setup. "
272 |                 "If you wish to replace data instead, use replace=True."
273 |             )
274 |             logger.info(msg)
275 |             self.data[name]["totals"] += totals
276 |             self.data[name]["non_zeros"] += non_zeros
277 |             self.data[name]["sum_values"] += sum_values
278 |             self.data[name]["sum_values_2"] += sum_values_2
279 | 
280 |     def add_variant_data(
281 |         self,
282 |         name: str,
283 |         data: List[Number],
284 |         a_prior_beta: Number = 0.5,
285 |         b_prior_beta: Number = 0.5,
286 |         m_prior: Number = 1,
287 |         a_prior_ig: Number = 0,
288 |         b_prior_ig: Number = 0,
289 |         w_prior: Number = 0.01,
290 |         replace: bool = True,
291 |     ) -> None:
292 |         """
293 |         Add variant data to test class using raw Delta-Normal data.
294 | 
295 |         The goal of default prior setup is to be low information. It should be tuned with caution.
296 | 
297 |         Parameters
298 |         ----------
299 |         name : Variant name.
300 |         data : List of delta-normal data (e.g. revenues in sessions).
301 |         a_prior_beta : Prior alpha parameter from Beta distribution for conversion part.
302 |         b_prior_beta : Prior beta parameter from Beta distribution for conversion part.
303 |         m_prior : Prior normal mean.
304 |         a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance.
305 |             In theory a > 0, but as we always have at least one observation, we can start at 0.
306 |         b_prior_ig : Prior beta from inverse gamma dist. for unknown variance.
307 |             In theory b > 0, but as we always have at least one observation, we can start at 0.
308 |         w_prior : Prior effective sample sizes.
309 |         replace : Replace data if variant already exists.
310 |             If set to False, data of existing variant will be appended to existing data.
311 |         """
312 |         if len(data) == 0:
313 |             raise ValueError("Data of added variant needs to have some observations.")
314 | 
315 |         totals = len(data)
316 |         non_zeros = sum(x != 0 for x in data)
317 |         sum_values = sum(data)
318 |         sum_values_2 = sum(np.square(data))
319 | 
320 |         self.add_variant_data_agg(
321 |             name,
322 |             totals,
323 |             non_zeros,
324 |             sum_values,
325 |             sum_values_2,
326 |             a_prior_beta,
327 |             b_prior_beta,
328 |             m_prior,
329 |             a_prior_ig,
330 |             b_prior_ig,
331 |             w_prior,
332 |             replace,
333 |         )
334 | 


--------------------------------------------------------------------------------
/bayesian_testing/experiments/discrete.py:
--------------------------------------------------------------------------------
  1 | from numbers import Number
  2 | from typing import List, Tuple, Union
  3 | import numpy as np
  4 | 
  5 | from bayesian_testing.experiments.base import BaseDataTest
  6 | from bayesian_testing.metrics import eval_numerical_dirichlet_agg
  7 | from bayesian_testing.utilities import get_logger
  8 | 
  9 | logger = get_logger("bayesian_testing")
 10 | 
 11 | 
 12 | class DiscreteDataTest(BaseDataTest):
 13 |     """
 14 |     Class for Bayesian A/B test for data with finite discrete states (i.e. categorical data
 15 |     with numerical categories). As a real world examples we can think of dice rolls,
 16 |     1-5 star ratings, 1-10 ratings, etc.
 17 | 
 18 |     After class initialization, use add_variant methods to insert variant data.
 19 |     Then to get results of the test, use for instance `evaluate` method.
 20 |     """
 21 | 
 22 |     def __init__(self, states: List[Union[float, int]]) -> None:
 23 |         """
 24 |         Initialize DiscreteDataTest class.
 25 | 
 26 |         Parameters
 27 |         ----------
 28 |         states : List of all possible states for a given discrete variable.
 29 |         """
 30 |         super().__init__()
 31 |         if not self.check_if_numerical(states):
 32 |             raise ValueError("States in the test have to be numbers (int or float).")
 33 |         self.states = states
 34 | 
 35 |     @property
 36 |     def concentrations(self):
 37 |         return [self.data[k]["concentration"] for k in self.data]
 38 | 
 39 |     @property
 40 |     def prior_alphas(self):
 41 |         return [self.data[k]["prior"] for k in self.data]
 42 | 
 43 |     @staticmethod
 44 |     def check_if_numerical(values):
 45 |         res = True
 46 |         for v in values:
 47 |             if not isinstance(v, Number):
 48 |                 res = False
 49 |         return res
 50 | 
 51 |     def eval_simulation(
 52 |         self,
 53 |         sim_count: int = 20000,
 54 |         seed: int = None,
 55 |         min_is_best: bool = False,
 56 |         interval_alpha: float = 0.95,
 57 |     ) -> Tuple[dict, dict, dict]:
 58 |         """
 59 |         Calculate probabilities of being best, expected loss and credible intervals for a current
 60 |         class state.
 61 | 
 62 |         Parameters
 63 |         ----------
 64 |         sim_count : Number of simulations to be used for probability estimation.
 65 |         seed : Random seed.
 66 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 67 |         interval_alpha : Credible interval probability (value between 0 and 1).
 68 | 
 69 |         Returns
 70 |         -------
 71 |         res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
 72 |         res_loss : Dictionary with expected loss for all variants in experiment.
 73 |         res_intervals : Dictionary with quantile-based credible intervals for all variants.
 74 |         """
 75 |         pbbs, loss, intervals = eval_numerical_dirichlet_agg(
 76 |             self.states,
 77 |             self.concentrations,
 78 |             self.prior_alphas,
 79 |             sim_count,
 80 |             seed,
 81 |             min_is_best,
 82 |             interval_alpha,
 83 |         )
 84 |         res_pbbs = dict(zip(self.variant_names, pbbs))
 85 |         res_loss = dict(zip(self.variant_names, loss))
 86 |         res_intervals = dict(zip(self.variant_names, intervals))
 87 | 
 88 |         return res_pbbs, res_loss, res_intervals
 89 | 
 90 |     def evaluate(
 91 |         self,
 92 |         sim_count: int = 20000,
 93 |         seed: int = None,
 94 |         min_is_best: bool = False,
 95 |         interval_alpha: float = 0.95,
 96 |     ) -> List[dict]:
 97 |         """
 98 |         Evaluation of experiment.
 99 | 
100 |         Parameters
101 |         ----------
102 |         sim_count : Number of simulations to be used for probability estimation.
103 |         seed : Random seed.
104 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
105 |         interval_alpha : Credible interval probability (value between 0 and 1).
106 | 
107 |         Returns
108 |         -------
109 |         res : List of dictionaries with results per variant.
110 |         """
111 |         keys = [
112 |             "variant",
113 |             "concentration",
114 |             "average_value",
115 |             "posterior_mean",
116 |             "credible_interval",
117 |             "prob_being_best",
118 |             "expected_loss",
119 |         ]
120 |         posterior_alphas = [
121 |             list(np.array(i[0]) + np.array(i[1]))
122 |             for i in zip(self.concentrations, self.prior_alphas)
123 |         ]
124 |         posterior_mean = [
125 |             round(sum(np.multiply(np.array(self.states), np.array(i[0]) / sum(np.array(i[0])))), 5)
126 |             for i in zip(posterior_alphas)
127 |         ]
128 |         eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
129 |             sim_count, seed, min_is_best, interval_alpha
130 |         )
131 |         pbbs = list(eval_pbbs.values())
132 |         loss = list(eval_loss.values())
133 |         intervals = list(eval_intervals.values())
134 |         average_values = [
135 |             np.sum(np.multiply(i, self.states)) / np.sum(i) for i in self.concentrations
136 |         ]
137 |         data = [
138 |             self.variant_names,
139 |             [dict(zip(self.states, i)) for i in self.concentrations],
140 |             average_values,
141 |             posterior_mean,
142 |             intervals,
143 |             pbbs,
144 |             loss,
145 |         ]
146 |         res = [dict(zip(keys, item)) for item in zip(*data)]
147 | 
148 |         return res
149 | 
150 |     def add_variant_data_agg(
151 |         self,
152 |         name: str,
153 |         concentration: List[int],
154 |         prior: List[Union[float, int]] = None,
155 |         replace: bool = True,
156 |     ) -> None:
157 |         """
158 |         Add variant data to test class using aggregated discrete data.
159 |         This can be convenient as aggregation can be done on database level.
160 | 
161 |         Default prior setup is Dirichlet(1,...,1) which is low information prior
162 |         (we can interpret it as prior 1 observation of each state).
163 | 
164 |         Parameters
165 |         ----------
166 |         name : Variant name.
167 |         concentration : Total number of experiment observations for each state
168 |             (e.g. number of rolls for each side in a die roll data).
169 |         prior : Prior alpha parameters of a Dirichlet distribution (conjugate prior).
170 |         replace : Replace data if variant already exists.
171 |             If set to False, data of existing variant will be appended to existing data.
172 |         """
173 |         if not isinstance(name, str):
174 |             raise ValueError("Variant name has to be a string.")
175 |         if not len(self.states) == len(concentration):
176 |             msg = (
177 |                 f"Concentration list has to have same size as number of states in a test "
178 |                 f"{len(concentration)} != {len(self.states)}."
179 |             )
180 |             raise ValueError(msg)
181 |         if not self.check_if_numerical(concentration):
182 |             raise ValueError("Concentration parameter has to be a list of integer values.")
183 | 
184 |         if not prior:
185 |             prior = [1] * len(self.states)
186 | 
187 |         if name not in self.variant_names:
188 |             self.data[name] = {"concentration": concentration, "prior": prior}
189 |         elif name in self.variant_names and replace:
190 |             msg = (
191 |                 f"Variant {name} already exists - new data is replacing it. "
192 |                 "If you wish to append instead, use replace=False."
193 |             )
194 |             logger.info(msg)
195 |             self.data[name] = {"concentration": concentration, "prior": prior}
196 |         elif name in self.variant_names and not replace:
197 |             msg = (
198 |                 f"Variant {name} already exists - new data is appended to variant, "
199 |                 "keeping its original prior setup. "
200 |                 "If you wish to replace data instead, use replace=True."
201 |             )
202 |             logger.info(msg)
203 |             self.data[name]["concentration"] = [
204 |                 sum(x) for x in zip(self.data[name]["concentration"], concentration)
205 |             ]
206 | 
207 |     def add_variant_data(
208 |         self,
209 |         name: str,
210 |         data: List[int],
211 |         prior: List[Union[float, int]] = None,
212 |         replace: bool = True,
213 |     ) -> None:
214 |         """
215 |         Add variant data to test class using raw discrete data.
216 | 
217 |         Default prior setup is Dirichlet(1,...,1) which is low information prior
218 |         (we can interpret it as prior 1 observation of each state).
219 | 
220 |         Parameters
221 |         ----------
222 |         name : Variant name.
223 |         data : List of numerical data observations from possible states.
224 |         prior : Prior alpha parameters of a Dirichlet distribution (conjugate prior).
225 |         replace : Replace data if variant already exists.
226 |             If set to False, data of existing variant will be appended to existing data.
227 |         """
228 |         if len(data) == 0:
229 |             raise ValueError("Data of added variant needs to have some observations.")
230 |         if not min([i in self.states for i in data]):
231 |             msg = (
232 |                 "Input data needs to be a list of numbers from possible states: " f"{self.states}."
233 |             )
234 |             raise ValueError(msg)
235 | 
236 |         counter_dict = dict(zip(self.states, np.zeros(len(self.states))))
237 |         for i in data:
238 |             counter_dict[i] += 1
239 |         concentration = [counter_dict[i] for i in self.states]
240 | 
241 |         self.add_variant_data_agg(name, concentration, prior, replace)
242 | 


--------------------------------------------------------------------------------
/bayesian_testing/experiments/exponential.py:
--------------------------------------------------------------------------------
  1 | from numbers import Number
  2 | from typing import List, Tuple, Union
  3 | 
  4 | from bayesian_testing.experiments.base import BaseDataTest
  5 | from bayesian_testing.metrics import eval_exponential_agg
  6 | from bayesian_testing.utilities import get_logger
  7 | 
  8 | logger = get_logger("bayesian_testing")
  9 | 
 10 | 
 11 | class ExponentialDataTest(BaseDataTest):
 12 |     """
 13 |     Class for Bayesian A/B test for Exponential data (e.g. session time, waiting time, etc.).
 14 | 
 15 |     After class initialization, use add_variant methods to insert variant data.
 16 |     Then to get results of the test, use for instance `evaluate` method.
 17 |     """
 18 | 
 19 |     def __init__(self) -> None:
 20 |         """
 21 |         Initialize BinaryDataTest class.
 22 |         """
 23 |         super().__init__()
 24 | 
 25 |     @property
 26 |     def totals(self):
 27 |         return [self.data[k]["totals"] for k in self.data]
 28 | 
 29 |     @property
 30 |     def sum_values(self):
 31 |         return [self.data[k]["sum_values"] for k in self.data]
 32 | 
 33 |     @property
 34 |     def a_priors(self):
 35 |         return [self.data[k]["a_prior"] for k in self.data]
 36 | 
 37 |     @property
 38 |     def b_priors(self):
 39 |         return [self.data[k]["b_prior"] for k in self.data]
 40 | 
 41 |     def eval_simulation(
 42 |         self,
 43 |         sim_count: int = 20000,
 44 |         seed: int = None,
 45 |         min_is_best: bool = False,
 46 |         interval_alpha: float = 0.95,
 47 |     ) -> Tuple[dict, dict, dict]:
 48 |         """
 49 |         Calculate probabilities of being best, expected loss and credible intervals for a current
 50 |         class state.
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         sim_count : Number of simulations to be used for probability estimation.
 55 |         seed : Random seed.
 56 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 57 |         interval_alpha : Credible interval probability (value between 0 and 1).
 58 | 
 59 |         Returns
 60 |         -------
 61 |         res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
 62 |         res_loss : Dictionary with expected loss for all variants in experiment.
 63 |         res_intervals : Dictionary with quantile-based credible intervals for all variants.
 64 |         """
 65 |         pbbs, loss, intervals = eval_exponential_agg(
 66 |             self.totals,
 67 |             self.sum_values,
 68 |             self.a_priors,
 69 |             self.b_priors,
 70 |             sim_count,
 71 |             seed,
 72 |             min_is_best,
 73 |             interval_alpha,
 74 |         )
 75 |         res_pbbs = dict(zip(self.variant_names, pbbs))
 76 |         res_loss = dict(zip(self.variant_names, loss))
 77 |         res_intervals = dict(zip(self.variant_names, intervals))
 78 | 
 79 |         return res_pbbs, res_loss, res_intervals
 80 | 
 81 |     def evaluate(
 82 |         self,
 83 |         sim_count: int = 20000,
 84 |         seed: int = None,
 85 |         min_is_best: bool = False,
 86 |         interval_alpha: float = 0.95,
 87 |     ) -> List[dict]:
 88 |         """
 89 |         Evaluation of experiment.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         sim_count : Number of simulations to be used for probability estimation.
 94 |         seed : Random seed.
 95 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 96 |         interval_alpha : Credible interval probability (value between 0 and 1).
 97 | 
 98 |         Returns
 99 |         -------
100 |         res : List of dictionaries with results per variant.
101 |         """
102 |         keys = [
103 |             "variant",
104 |             "totals",
105 |             "sum_values",
106 |             "observed_average",
107 |             "posterior_mean",
108 |             "credible_interval",
109 |             "prob_being_best",
110 |             "expected_loss",
111 |         ]
112 |         observed_average = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.totals)]
113 |         posterior_mean = [
114 |             round((i[3] + i[1]) / (i[2] + i[0]), 5)
115 |             for i in zip(self.totals, self.sum_values, self.a_priors, self.b_priors)
116 |         ]
117 |         eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
118 |             sim_count, seed, min_is_best, interval_alpha
119 |         )
120 |         pbbs = list(eval_pbbs.values())
121 |         loss = list(eval_loss.values())
122 |         intervals = list(eval_intervals.values())
123 |         data = [
124 |             self.variant_names,
125 |             self.totals,
126 |             [round(i, 5) for i in self.sum_values],
127 |             observed_average,
128 |             posterior_mean,
129 |             intervals,
130 |             pbbs,
131 |             loss,
132 |         ]
133 |         res = [dict(zip(keys, item)) for item in zip(*data)]
134 | 
135 |         return res
136 | 
137 |     def add_variant_data_agg(
138 |         self,
139 |         name: str,
140 |         totals: int,
141 |         sum_values: Union[float, int],
142 |         a_prior: Number = 0.1,
143 |         b_prior: Number = 0.1,
144 |         replace: bool = True,
145 |     ) -> None:
146 |         """
147 |         Add variant data to a test class using aggregated Exponential data.
148 |         This can be convenient as aggregation can be done on database level.
149 | 
150 |         Default prior setup is set for Gamma(0.1, 0.1) which is on purpose very vague prior.
151 | 
152 |         Parameters
153 |         ----------
154 |         name : Variant name.
155 |         totals : Total number of experiment observations (e.g. number of sessions).
156 |         sum_values : Sum of values for a given variant (e.g. total sum of waiting time).
157 |         a_prior : Prior alpha parameter of a Gamma distribution (conjugate prior).
158 |             Default value 0.1 is on purpose to be vague (lower information).
159 |         b_prior : Prior beta parameter (rate) of a Gamma distribution (conjugate prior).
160 |             Default value 0.1 is on purpose to be vague (lower information).
161 |         replace : Replace data if variant already exists.
162 |             If set to False, data of existing variant will be appended to existing data.
163 |         """
164 |         if not isinstance(name, str):
165 |             raise ValueError("Variant name has to be a string.")
166 |         if a_prior <= 0 or b_prior <= 0:
167 |             raise ValueError("Both [a_prior, b_prior] have to be positive numbers.")
168 |         if totals <= 0:
169 |             raise ValueError("Input variable 'totals' is expected to be positive integer.")
170 |         if sum_values < 0:
171 |             raise ValueError("Input variable 'sum_values' is expected to be non-negative number.")
172 | 
173 |         if name not in self.variant_names:
174 |             self.data[name] = {
175 |                 "totals": totals,
176 |                 "sum_values": sum_values,
177 |                 "a_prior": a_prior,
178 |                 "b_prior": b_prior,
179 |             }
180 |         elif name in self.variant_names and replace:
181 |             msg = (
182 |                 f"Variant {name} already exists - new data is replacing it. "
183 |                 "If you wish to append instead, use replace=False."
184 |             )
185 |             logger.info(msg)
186 |             self.data[name] = {
187 |                 "totals": totals,
188 |                 "sum_values": sum_values,
189 |                 "a_prior": a_prior,
190 |                 "b_prior": b_prior,
191 |             }
192 |         elif name in self.variant_names and not replace:
193 |             msg = (
194 |                 f"Variant {name} already exists - new data is appended to variant, "
195 |                 "keeping its original prior setup. "
196 |                 "If you wish to replace data instead, use replace=True."
197 |             )
198 |             logger.info(msg)
199 |             self.data[name]["totals"] += totals
200 |             self.data[name]["sum_values"] += sum_values
201 | 
202 |     def add_variant_data(
203 |         self,
204 |         name: str,
205 |         data: List[Union[float, int]],
206 |         a_prior: Number = 0.1,
207 |         b_prior: Number = 0.1,
208 |         replace: bool = True,
209 |     ) -> None:
210 |         """
211 |         Add variant data to a test class using raw Exponential data.
212 | 
213 |         Default prior setup is set for Gamma(0.1, 0.1) which is non-information prior.
214 | 
215 |         Parameters
216 |         ----------
217 |         name : Variant name.s
218 |         data : List of Exponential data.
219 |         a_prior : Prior alpha parameter of a Gamma distribution (conjugate prior).
220 |             Default value 0.1 is on purpose to be vague (lower information).
221 |         b_prior : Prior beta parameter (rate) of a Gamma distribution (conjugate prior).
222 |             Default value 0.1 is on purpose to be vague (lower information).
223 |         replace : Replace data if variant already exists.
224 |             If set to False, data of existing variant will be appended to existing data.
225 |         """
226 |         if len(data) == 0:
227 |             raise ValueError("Data of added variant needs to have some observations.")
228 |         if not min([i >= 0 for i in data]):
229 |             raise ValueError("Input data needs to be a list of non-negative integers.")
230 | 
231 |         totals = len(data)
232 |         sum_values = sum(data)
233 | 
234 |         self.add_variant_data_agg(name, totals, sum_values, a_prior, b_prior, replace)
235 | 


--------------------------------------------------------------------------------
/bayesian_testing/experiments/normal.py:
--------------------------------------------------------------------------------
  1 | from numbers import Number
  2 | from typing import List, Tuple
  3 | 
  4 | import numpy as np
  5 | 
  6 | from bayesian_testing.experiments.base import BaseDataTest
  7 | from bayesian_testing.metrics import eval_normal_agg
  8 | from bayesian_testing.utilities import get_logger
  9 | 
 10 | logger = get_logger("bayesian_testing")
 11 | 
 12 | 
 13 | class NormalDataTest(BaseDataTest):
 14 |     """
 15 |     Class for Bayesian A/B test for Normal data.
 16 | 
 17 |     After class initialization, use add_variant methods to insert variant data.
 18 |     Then to get results of the test, use for instance `evaluate` method.
 19 |     """
 20 | 
 21 |     def __init__(self) -> None:
 22 |         """
 23 |         Initialize NormalDataTest class.
 24 |         """
 25 |         super().__init__()
 26 | 
 27 |     @property
 28 |     def totals(self):
 29 |         return [self.data[k]["totals"] for k in self.data]
 30 | 
 31 |     @property
 32 |     def sum_values(self):
 33 |         return [self.data[k]["sum_values"] for k in self.data]
 34 | 
 35 |     @property
 36 |     def sum_values_2(self):
 37 |         return [self.data[k]["sum_values_2"] for k in self.data]
 38 | 
 39 |     @property
 40 |     def m_priors(self):
 41 |         return [self.data[k]["m_prior"] for k in self.data]
 42 | 
 43 |     @property
 44 |     def a_priors_ig(self):
 45 |         return [self.data[k]["a_prior_ig"] for k in self.data]
 46 | 
 47 |     @property
 48 |     def b_priors_ig(self):
 49 |         return [self.data[k]["b_prior_ig"] for k in self.data]
 50 | 
 51 |     @property
 52 |     def w_priors(self):
 53 |         return [self.data[k]["w_prior"] for k in self.data]
 54 | 
 55 |     def eval_simulation(
 56 |         self,
 57 |         sim_count: int = 20000,
 58 |         seed: int = None,
 59 |         min_is_best: bool = False,
 60 |         interval_alpha: float = 0.95,
 61 |     ) -> Tuple[dict, dict, dict]:
 62 |         """
 63 |         Calculate probabilities of being best, expected loss and credible intervals for a current
 64 |         class state.
 65 | 
 66 |         Parameters
 67 |         ----------
 68 |         sim_count : Number of simulations to be used for probability estimation.
 69 |         seed : Random seed.
 70 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 71 |         interval_alpha : Credible interval probability (value between 0 and 1).
 72 | 
 73 |         Returns
 74 |         -------
 75 |         res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
 76 |         res_loss : Dictionary with expected loss for all variants in experiment.
 77 |         res_intervals : Dictionary with quantile-based credible intervals for all variants.
 78 |         """
 79 |         pbbs, loss, intervals = eval_normal_agg(
 80 |             self.totals,
 81 |             self.sum_values,
 82 |             self.sum_values_2,
 83 |             sim_count=sim_count,
 84 |             m_priors=self.m_priors,
 85 |             a_priors_ig=self.a_priors_ig,
 86 |             b_priors_ig=self.b_priors_ig,
 87 |             w_priors=self.w_priors,
 88 |             seed=seed,
 89 |             min_is_best=min_is_best,
 90 |             interval_alpha=interval_alpha,
 91 |         )
 92 |         res_pbbs = dict(zip(self.variant_names, pbbs))
 93 |         res_loss = dict(zip(self.variant_names, loss))
 94 |         res_intervals = dict(zip(self.variant_names, intervals))
 95 | 
 96 |         return res_pbbs, res_loss, res_intervals
 97 | 
 98 |     def evaluate(
 99 |         self,
100 |         sim_count: int = 20000,
101 |         seed: int = None,
102 |         min_is_best: bool = False,
103 |         interval_alpha: float = 0.95,
104 |     ) -> List[dict]:
105 |         """
106 |         Evaluation of experiment.
107 | 
108 |         Parameters
109 |         ----------
110 |         sim_count : Number of simulations to be used for probability estimation.
111 |         seed : Random seed.
112 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
113 |         interval_alpha : Credible interval probability (value between 0 and 1).
114 | 
115 |         Returns
116 |         -------
117 |         res : List of dictionaries with results per variant.
118 |         """
119 |         keys = [
120 |             "variant",
121 |             "totals",
122 |             "sum_values",
123 |             "avg_values",
124 |             "posterior_mean",
125 |             "credible_interval",
126 |             "prob_being_best",
127 |             "expected_loss",
128 |         ]
129 |         avg_values = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.totals)]
130 |         posterior_mean = [
131 |             round((i[0] + i[3] * i[2]) / (i[1] + i[3]), 5)
132 |             for i in zip(self.sum_values, self.totals, self.m_priors, self.w_priors)
133 |         ]
134 |         eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
135 |             sim_count, seed, min_is_best, interval_alpha
136 |         )
137 |         pbbs = list(eval_pbbs.values())
138 |         loss = list(eval_loss.values())
139 |         intervals = list(eval_intervals.values())
140 |         data = [
141 |             self.variant_names,
142 |             self.totals,
143 |             [round(i, 5) for i in self.sum_values],
144 |             avg_values,
145 |             posterior_mean,
146 |             intervals,
147 |             pbbs,
148 |             loss,
149 |         ]
150 |         res = [dict(zip(keys, item)) for item in zip(*data)]
151 | 
152 |         return res
153 | 
154 |     def add_variant_data_agg(
155 |         self,
156 |         name: str,
157 |         totals: int,
158 |         sum_values: float,
159 |         sum_values_2: float,
160 |         m_prior: Number = 1,
161 |         a_prior_ig: Number = 0,
162 |         b_prior_ig: Number = 0,
163 |         w_prior: Number = 0.01,
164 |         replace: bool = True,
165 |     ) -> None:
166 |         """
167 |         Add variant data to test class using aggregated Normal data.
168 |         This can be convenient as aggregation can be done on database level.
169 | 
170 |         The goal of default prior setup is to be low information.
171 |         It should be tuned with caution.
172 | 
173 |         Parameters
174 |         ----------
175 |         name : Variant name.
176 |         totals : Total number of experiment observations (e.g. number of sessions).
177 |         sum_values : Sum of values for a given variant.
178 |         sum_values_2 : Sum of values squared for a given variant.
179 |         m_prior : Prior normal mean.
180 |         a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance.
181 |             In theory a > 0, but as we always have at least one observation, we can start at 0.
182 |         b_prior_ig : Prior beta from inverse gamma dist. for unknown variance.
183 |             In theory b > 0, but as we always have at least one observation, we can start at 0.
184 |         w_prior : Prior effective sample sizes.
185 |         replace : Replace data if variant already exists.
186 |             If set to False, data of existing variant will be appended to existing data.
187 |         """
188 |         if not isinstance(name, str):
189 |             raise ValueError("Variant name has to be a string.")
190 |         if m_prior < 0 or a_prior_ig < 0 or b_prior_ig < 0 or w_prior < 0:
191 |             raise ValueError("All priors of [m, a_ig, b_ig, w] have to be non-negative numbers.")
192 |         if totals <= 0:
193 |             raise ValueError("Input variable 'totals' is expected to be positive integer.")
194 | 
195 |         if name not in self.variant_names:
196 |             self.data[name] = {
197 |                 "totals": totals,
198 |                 "sum_values": sum_values,
199 |                 "sum_values_2": sum_values_2,
200 |                 "m_prior": m_prior,
201 |                 "a_prior_ig": a_prior_ig,
202 |                 "b_prior_ig": b_prior_ig,
203 |                 "w_prior": w_prior,
204 |             }
205 |         elif name in self.variant_names and replace:
206 |             msg = (
207 |                 f"Variant {name} already exists - new data is replacing it. "
208 |                 "If you wish to append instead, use replace=False."
209 |             )
210 |             logger.info(msg)
211 |             self.data[name] = {
212 |                 "totals": totals,
213 |                 "sum_values": sum_values,
214 |                 "sum_values_2": sum_values_2,
215 |                 "m_prior": m_prior,
216 |                 "a_prior_ig": a_prior_ig,
217 |                 "b_prior_ig": b_prior_ig,
218 |                 "w_prior": w_prior,
219 |             }
220 |         elif name in self.variant_names and not replace:
221 |             msg = (
222 |                 f"Variant {name} already exists - new data is appended to variant, "
223 |                 "keeping its original prior setup. "
224 |                 "If you wish to replace data instead, use replace=True."
225 |             )
226 |             logger.info(msg)
227 |             self.data[name]["totals"] += totals
228 |             self.data[name]["sum_values"] += sum_values
229 |             self.data[name]["sum_values_2"] += sum_values_2
230 | 
231 |     def add_variant_data(
232 |         self,
233 |         name: str,
234 |         data: List[Number],
235 |         m_prior: Number = 1,
236 |         a_prior_ig: Number = 0,
237 |         b_prior_ig: Number = 0,
238 |         w_prior: Number = 0.01,
239 |         replace: bool = True,
240 |     ) -> None:
241 |         """
242 |         Add variant data to test class using raw Normal data.
243 | 
244 |         The goal of default prior setup is to be low information. It should be tuned with caution.
245 | 
246 |         Parameters
247 |         ----------
248 |         name : Variant name.
249 |         data : List of normal data.
250 |         m_prior : Prior mean.
251 |         a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance.
252 |             In theory a > 0, but as we always have at least one observation, we can start at 0.
253 |         b_prior_ig : Prior beta from inverse gamma dist. for unknown variance.
254 |             In theory b > 0, but as we always have at least one observation, we can start at 0.
255 |         w_prior : Prior effective sample sizes.
256 |         replace : Replace data if variant already exists.
257 |             If set to False, data of existing variant will be appended to existing data.
258 |         """
259 |         if len(data) == 0:
260 |             raise ValueError("Data of added variant needs to have some observations.")
261 | 
262 |         totals = len(data)
263 |         sum_values = sum(data)
264 |         sum_values_2 = sum(np.square(data))
265 | 
266 |         self.add_variant_data_agg(
267 |             name,
268 |             totals,
269 |             sum_values,
270 |             sum_values_2,
271 |             m_prior,
272 |             a_prior_ig,
273 |             b_prior_ig,
274 |             w_prior,
275 |             replace,
276 |         )
277 | 


--------------------------------------------------------------------------------
/bayesian_testing/experiments/poisson.py:
--------------------------------------------------------------------------------
  1 | from numbers import Number
  2 | from typing import List, Tuple, Union
  3 | 
  4 | from bayesian_testing.experiments.base import BaseDataTest
  5 | from bayesian_testing.metrics import eval_poisson_agg
  6 | from bayesian_testing.utilities import get_logger
  7 | 
  8 | logger = get_logger("bayesian_testing")
  9 | 
 10 | 
 11 | class PoissonDataTest(BaseDataTest):
 12 |     """
 13 |     Class for Bayesian A/B test for Poisson data (i.e. numbers of events, e.g. goals scored).
 14 | 
 15 |     After class initialization, use add_variant methods to insert variant data.
 16 |     Then to get results of the test, use for instance `evaluate` method.
 17 |     """
 18 | 
 19 |     def __init__(self) -> None:
 20 |         """
 21 |         Initialize BinaryDataTest class.
 22 |         """
 23 |         super().__init__()
 24 | 
 25 |     @property
 26 |     def totals(self):
 27 |         return [self.data[k]["totals"] for k in self.data]
 28 | 
 29 |     @property
 30 |     def sum_values(self):
 31 |         return [self.data[k]["sum_values"] for k in self.data]
 32 | 
 33 |     @property
 34 |     def a_priors(self):
 35 |         return [self.data[k]["a_prior"] for k in self.data]
 36 | 
 37 |     @property
 38 |     def b_priors(self):
 39 |         return [self.data[k]["b_prior"] for k in self.data]
 40 | 
 41 |     def eval_simulation(
 42 |         self,
 43 |         sim_count: int = 20000,
 44 |         seed: int = None,
 45 |         min_is_best: bool = False,
 46 |         interval_alpha: float = 0.95,
 47 |     ) -> Tuple[dict, dict, dict]:
 48 |         """
 49 |         Calculate probabilities of being best, expected loss and credible intervals for a current
 50 |         class state.
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         sim_count : Number of simulations to be used for probability estimation.
 55 |         seed : Random seed.
 56 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 57 |         interval_alpha : Credible interval probability (value between 0 and 1).
 58 | 
 59 |         Returns
 60 |         -------
 61 |         res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
 62 |         res_loss : Dictionary with expected loss for all variants in experiment.
 63 |         res_intervals : Dictionary with quantile-based credible intervals for all variants.
 64 |         """
 65 |         pbbs, loss, intervals = eval_poisson_agg(
 66 |             self.totals,
 67 |             self.sum_values,
 68 |             self.a_priors,
 69 |             self.b_priors,
 70 |             sim_count,
 71 |             seed,
 72 |             min_is_best,
 73 |             interval_alpha,
 74 |         )
 75 |         res_pbbs = dict(zip(self.variant_names, pbbs))
 76 |         res_loss = dict(zip(self.variant_names, loss))
 77 |         res_intervals = dict(zip(self.variant_names, intervals))
 78 | 
 79 |         return res_pbbs, res_loss, res_intervals
 80 | 
 81 |     def evaluate(
 82 |         self,
 83 |         sim_count: int = 20000,
 84 |         seed: int = None,
 85 |         min_is_best: bool = False,
 86 |         interval_alpha: float = 0.95,
 87 |     ) -> List[dict]:
 88 |         """
 89 |         Evaluation of experiment.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         sim_count : Number of simulations to be used for probability estimation.
 94 |         seed : Random seed.
 95 |         min_is_best : Option to change "being best" to a minimum. Default is maximum.
 96 |         interval_alpha : Credible interval probability (value between 0 and 1).
 97 | 
 98 |         Returns
 99 |         -------
100 |         res : List of dictionaries with results per variant.
101 |         """
102 |         keys = [
103 |             "variant",
104 |             "totals",
105 |             "sum_values",
106 |             "observed_average",
107 |             "posterior_mean",
108 |             "credible_interval",
109 |             "prob_being_best",
110 |             "expected_loss",
111 |         ]
112 |         observed_average = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.totals)]
113 |         posterior_mean = [
114 |             round((i[2] + i[0]) / (i[3] + i[1]), 5)
115 |             for i in zip(self.sum_values, self.totals, self.a_priors, self.b_priors)
116 |         ]
117 |         eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
118 |             sim_count, seed, min_is_best, interval_alpha
119 |         )
120 |         pbbs = list(eval_pbbs.values())
121 |         loss = list(eval_loss.values())
122 |         intervals = list(eval_intervals.values())
123 |         data = [
124 |             self.variant_names,
125 |             self.totals,
126 |             self.sum_values,
127 |             observed_average,
128 |             posterior_mean,
129 |             intervals,
130 |             pbbs,
131 |             loss,
132 |         ]
133 |         res = [dict(zip(keys, item)) for item in zip(*data)]
134 | 
135 |         return res
136 | 
137 |     def add_variant_data_agg(
138 |         self,
139 |         name: str,
140 |         totals: int,
141 |         sum_values: Union[float, int],
142 |         a_prior: Number = 0.1,
143 |         b_prior: Number = 0.1,
144 |         replace: bool = True,
145 |     ) -> None:
146 |         """
147 |         Add variant data to test class using aggregated Poisson data.
148 |         This can be convenient as aggregation can be done on database level.
149 | 
150 |         Default prior setup is set for Gamma(0.1, 0.1) which is on purpose very vague prior.
151 | 
152 |         Parameters
153 |         ----------
154 |         name : Variant name.
155 |         totals : Total number of experiment observations (e.g. number of matches).
156 |         sum_values : Sum of values for a given variant (e.g. total number of goals).
157 |         a_prior : Prior alpha parameter of a Gamma distribution (conjugate prior).
158 |             Default value 0.1 is on purpose to be vague (lower information).
159 |         b_prior : Prior beta parameter (rate) of a Gamma distribution (conjugate prior).
160 |             Default value 0.1 is on purpose to be vague (lower information).
161 |         replace : Replace data if variant already exists.
162 |             If set to False, data of existing variant will be appended to existing data.
163 |         """
164 |         if not isinstance(name, str):
165 |             raise ValueError("Variant name has to be a string.")
166 |         if a_prior <= 0 or b_prior <= 0:
167 |             raise ValueError("Both [a_prior, b_prior] have to be positive numbers.")
168 |         if totals <= 0:
169 |             raise ValueError("Input variable 'totals' is expected to be positive integer.")
170 |         if sum_values < 0:
171 |             raise ValueError("Input variable 'sum_values' is expected to be non-negative number.")
172 | 
173 |         if name not in self.variant_names:
174 |             self.data[name] = {
175 |                 "totals": totals,
176 |                 "sum_values": sum_values,
177 |                 "a_prior": a_prior,
178 |                 "b_prior": b_prior,
179 |             }
180 |         elif name in self.variant_names and replace:
181 |             msg = (
182 |                 f"Variant {name} already exists - new data is replacing it. "
183 |                 "If you wish to append instead, use replace=False."
184 |             )
185 |             logger.info(msg)
186 |             self.data[name] = {
187 |                 "totals": totals,
188 |                 "sum_values": sum_values,
189 |                 "a_prior": a_prior,
190 |                 "b_prior": b_prior,
191 |             }
192 |         elif name in self.variant_names and not replace:
193 |             msg = (
194 |                 f"Variant {name} already exists - new data is appended to variant, "
195 |                 "keeping its original prior setup. "
196 |                 "If you wish to replace data instead, use replace=True."
197 |             )
198 |             logger.info(msg)
199 |             self.data[name]["totals"] += totals
200 |             self.data[name]["sum_values"] += sum_values
201 | 
202 |     def add_variant_data(
203 |         self,
204 |         name: str,
205 |         data: List[int],
206 |         a_prior: Number = 0.1,
207 |         b_prior: Number = 0.1,
208 |         replace: bool = True,
209 |     ) -> None:
210 |         """
211 |         Add variant data to test class using raw Poisson data.
212 | 
213 |         Default prior setup is set for Gamma(0.1, 0.1) which is non-information prior.
214 | 
215 |         Parameters
216 |         ----------
217 |         name : Variant name.
218 |         data : List of Poisson data.
219 |         a_prior : Prior alpha parameter of a Gamma distribution (conjugate prior).
220 |             Default value 0.1 is on purpose to be vague (lower information).
221 |         b_prior : Prior beta parameter (rate) of a Gamma distribution (conjugate prior).
222 |             Default value 0.1 is on purpose to be vague (lower information).
223 |         replace : Replace data if variant already exists.
224 |             If set to False, data of existing variant will be appended to existing data.
225 |         """
226 |         if len(data) == 0:
227 |             raise ValueError("Data of added variant needs to have some observations.")
228 |         if not min([i >= 0 for i in data]):
229 |             raise ValueError("Input data needs to be a list of non-negative integers.")
230 | 
231 |         totals = len(data)
232 |         sum_values = sum(data)
233 | 
234 |         self.add_variant_data_agg(name, totals, sum_values, a_prior, b_prior, replace)
235 | 


--------------------------------------------------------------------------------
/bayesian_testing/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | from .evaluation import (
 2 |     eval_bernoulli_agg,
 3 |     eval_normal_agg,
 4 |     eval_delta_lognormal_agg,
 5 |     eval_numerical_dirichlet_agg,
 6 |     eval_poisson_agg,
 7 |     eval_delta_normal_agg,
 8 |     eval_exponential_agg,
 9 | )
10 | 
11 | __all__ = [
12 |     "eval_bernoulli_agg",
13 |     "eval_normal_agg",
14 |     "eval_delta_lognormal_agg",
15 |     "eval_delta_normal_agg",
16 |     "eval_numerical_dirichlet_agg",
17 |     "eval_poisson_agg",
18 |     "eval_exponential_agg",
19 | ]
20 | 


--------------------------------------------------------------------------------
/bayesian_testing/metrics/posteriors.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Tuple, Union
  2 | 
  3 | import numpy as np
  4 | 
  5 | 
  6 | def beta_posteriors_all(
  7 |     totals: List[int],
  8 |     positives: List[int],
  9 |     sim_count: int,
 10 |     a_priors_beta: List[Union[float, int]],
 11 |     b_priors_beta: List[Union[float, int]],
 12 |     seed: Union[int, np.random.bit_generator.SeedSequence] = None,
 13 | ) -> np.ndarray:
 14 |     """
 15 |     Draw from Beta posterior distributions for all variants at once.
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     totals : List of total experiment observations (e.g. number of sessions) for each variant.
 20 |     positives : List of total number of ones (e.g. number of conversions) for each variant.
 21 |     sim_count : Number of simulations to be used for probability estimation.
 22 |     a_priors_beta : List of prior alpha parameters of Beta distributions for each variant.
 23 |     b_priors_beta : List of prior beta parameters of Beta distributions for each variant.
 24 |     seed : Random seed.
 25 | 
 26 |     Returns
 27 |     -------
 28 |     beta_samples : List of lists of beta distribution samples for all variants.
 29 |     """
 30 |     rng = np.random.default_rng(seed)
 31 | 
 32 |     beta_samples = np.array(
 33 |         [
 34 |             rng.beta(
 35 |                 positives[i] + a_priors_beta[i],
 36 |                 totals[i] - positives[i] + b_priors_beta[i],
 37 |                 sim_count,
 38 |             )
 39 |             for i in range(len(totals))
 40 |         ]
 41 |     )
 42 |     return beta_samples
 43 | 
 44 | 
 45 | def normal_posteriors(
 46 |     total: int,
 47 |     sums: float,
 48 |     sums_2: float,
 49 |     sim_count: int = 20000,
 50 |     prior_m: Union[float, int] = 1,
 51 |     prior_a: Union[float, int] = 0,
 52 |     prior_b: Union[float, int] = 0,
 53 |     prior_w: Union[float, int] = 0.01,
 54 |     seed: Union[int, np.random.bit_generator.SeedSequence] = None,
 55 | ) -> Tuple[List[Union[float, int]], List[Union[float, int]]]:
 56 |     """
 57 |     Drawing mus and sigmas from posterior Normal distribution considering given aggregated data.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     total : Number of data observations from normal data.
 62 |     sums : Sum of original data.
 63 |     sums_2 : Sum of squares of original data.
 64 |     sim_count : Number of simulations.
 65 |     prior_m : Prior mean.
 66 |     prior_a : Prior alpha from inverse gamma dist. for unknown variance of original data.
 67 |         In theory a > 0, but as we always have at least one observation, we can start at 0.
 68 |     prior_b : Prior beta from inverse gamma dist. for unknown variance of original data.
 69 |         In theory b > 0, but as we always have at least one observation, we can start at 0.
 70 |     prior_w : Prior effective sample size.
 71 |     seed : Random seed.
 72 | 
 73 |     Returns
 74 |     -------
 75 |     mu_post : List of size sim_count with mus drawn from normal distribution.
 76 |     sig_2_post : List of size sim_count with mus drawn from normal distribution.
 77 |     """
 78 |     rng = np.random.default_rng(seed)
 79 | 
 80 |     x_bar = sums / total
 81 |     a_post = prior_a + (total / 2)
 82 |     b_post = (
 83 |         prior_b
 84 |         + (1 / 2) * (sums_2 - 2 * sums * x_bar + total * (x_bar**2))
 85 |         + ((total * prior_w) / (2 * (total + prior_w))) * ((x_bar - prior_m) ** 2)
 86 |     )
 87 | 
 88 |     # here it has to be 1/b as it is a scale, and not a rate
 89 |     sig_2_post = 1 / rng.gamma(a_post, 1 / b_post, sim_count)
 90 | 
 91 |     m_post = (total * x_bar + prior_w * prior_m) / (total + prior_w)
 92 | 
 93 |     mu_post = rng.normal(m_post, np.sqrt(sig_2_post / (total + prior_w)))
 94 | 
 95 |     return mu_post, sig_2_post
 96 | 
 97 | 
 98 | def lognormal_posteriors(
 99 |     total: int,
100 |     sum_logs: float,
101 |     sum_logs_2: float,
102 |     sim_count: int = 20000,
103 |     prior_m: Union[float, int] = 1,
104 |     prior_a: Union[float, int] = 0,
105 |     prior_b: Union[float, int] = 0,
106 |     prior_w: Union[float, int] = 0.01,
107 |     seed: Union[int, np.random.bit_generator.SeedSequence] = None,
108 | ) -> List[float]:
109 |     """
110 |     Drawing from posterior LogNormal distribution using logarithms of original (lognormal) data
111 |     (logarithms of lognormal data are normal). Input data is in aggregated form.
112 | 
113 |     Parameters
114 |     ----------
115 |     total : Number of lognormal data observations.
116 |         Could be number of conversions in session data.
117 |     sum_logs : Sum of logarithms of original data.
118 |     sum_logs_2 : Sum of logarithms squared of original data.
119 |     sim_count : Number of simulations.
120 |     prior_m : Prior mean of logarithms of original data.
121 |     prior_a : Prior alpha from inverse gamma dist. for unknown variance of logarithms
122 |         of original data. In theory a > 0, but as we always have at least one observation,
123 |         we can start at 0.
124 |     prior_b : Prior beta from inverse gamma dist. for unknown variance of logarithms
125 |         of original data. In theory b > 0, but as we always have at least one observation,
126 |         we can start at 0.
127 |     prior_w : Prior effective sample size.
128 |     seed : Random seed.
129 | 
130 |     Returns
131 |     -------
132 |     res : List of sim_count numbers drawn from lognormal distribution.
133 |     """
134 |     if total <= 0:
135 |         return list(np.zeros(sim_count))
136 | 
137 |     # normal posterior for aggregated data of logarithms of original data
138 |     normal_mu_post, normal_sig_2_post = normal_posteriors(
139 |         total, sum_logs, sum_logs_2, sim_count, prior_m, prior_a, prior_b, prior_w, seed
140 |     )
141 | 
142 |     # final simulated lognormal means using simulated normal means and sigmas
143 |     res = np.exp(normal_mu_post + (normal_sig_2_post / 2))
144 | 
145 |     return res
146 | 
147 | 
148 | def dirichlet_posteriors(
149 |     concentration: List[int],
150 |     prior: List[Union[float, int]],
151 |     sim_count: int = 20000,
152 |     seed: Union[int, np.random.bit_generator.SeedSequence] = None,
153 | ) -> np.ndarray:
154 |     """
155 |     Drawing from Dirichlet posterior for a single variant.
156 | 
157 |     Parameters
158 |     ----------
159 |     concentration : List of numbers of observation for each possible category.
160 |         In dice example it would be numbers of observations for each possible face.
161 |     prior : List of prior values for each category in dirichlet distribution.
162 |     sim_count : Number of simulations.
163 |     seed : Random seed.
164 | 
165 |     Returns
166 |     -------
167 |     res : List of lists of dirichlet samples.
168 |     """
169 |     rng = np.random.default_rng(seed)
170 | 
171 |     posterior_concentration = [sum(x) for x in zip(prior, concentration)]
172 |     res = rng.dirichlet(posterior_concentration, sim_count)
173 | 
174 |     return res
175 | 
176 | 
177 | def pois_gamma_posteriors_all(
178 |     totals: List[int],
179 |     sums: List[Union[float, int]],
180 |     sim_count: int,
181 |     a_priors_gamma: List[Union[float, int]],
182 |     b_priors_gamma: List[Union[float, int]],
183 |     seed: Union[int, np.random.bit_generator.SeedSequence] = None,
184 | ) -> np.ndarray:
185 |     """
186 |     Draw from Gamma posterior distributions for all variants of Poisson data at once.
187 | 
188 |     Parameters
189 |     ----------
190 |     totals : List of total experiment observations (e.g. number of matches) for each variant.
191 |     sums : List of sums of observations (e.g. number of goals) for each variant.
192 |     sim_count : Number of simulations to be used for probability estimation.
193 |     a_priors_gamma : List of prior alpha parameters of Gamma distributions for each variant.
194 |     b_priors_gamma : List of prior beta parameters (rates) of Gamma distributions for each variant.
195 |     seed : Random seed.
196 | 
197 |     Returns
198 |     -------
199 |     gamma_samples : List of lists of Gamma distribution samples for all variants.
200 |     """
201 |     rng = np.random.default_rng(seed)
202 | 
203 |     gamma_samples = np.array(
204 |         [
205 |             rng.gamma(
206 |                 sums[i] + a_priors_gamma[i],
207 |                 # here it has to be 1/(...) as it is a scale, and not a rate
208 |                 1 / (totals[i] + b_priors_gamma[i]),
209 |                 sim_count,
210 |             )
211 |             for i in range(len(totals))
212 |         ]
213 |     )
214 |     return gamma_samples
215 | 
216 | 
217 | def exp_gamma_posteriors_all(
218 |     totals: List[int],
219 |     sums: List[Union[float, int]],
220 |     sim_count: int,
221 |     a_priors_gamma: List[Union[float, int]],
222 |     b_priors_gamma: List[Union[float, int]],
223 |     seed: Union[int, np.random.bit_generator.SeedSequence] = None,
224 | ) -> np.ndarray:
225 |     """
226 |     Draw from Gamma posterior distributions for all variants of Exponential data at once.
227 | 
228 |     Parameters
229 |     ----------
230 |     totals : List of total experiment observations (e.g. number of sessions) for each variant.
231 |     sums : List of sums of observations (e.g. total time spent) for each variant.
232 |     sim_count : Number of simulations to be used for probability estimation.
233 |     a_priors_gamma : List of prior alpha parameters of Gamma distributions for each variant.
234 |     b_priors_gamma : List of prior beta parameters (rates) of Gamma distributions for each variant.
235 |     seed : Random seed.
236 | 
237 |     Returns
238 |     -------
239 |     gamma_samples : List of lists of Gamma distribution samples for all variants.
240 |     """
241 |     rng = np.random.default_rng(seed)
242 | 
243 |     gamma_samples = np.array(
244 |         [
245 |             rng.gamma(
246 |                 totals[i] + a_priors_gamma[i],
247 |                 # here it has to be 1/(...) as it is a scale, and not a rate
248 |                 1 / (sums[i] + b_priors_gamma[i]),
249 |                 sim_count,
250 |             )
251 |             for i in range(len(totals))
252 |         ]
253 |     )
254 |     return gamma_samples
255 | 


--------------------------------------------------------------------------------
/bayesian_testing/utilities/__init__.py:
--------------------------------------------------------------------------------
1 | from .logging import get_logger
2 | 
3 | __all__ = ["get_logger"]
4 | 


--------------------------------------------------------------------------------
/bayesian_testing/utilities/common.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | 
 4 | def check_list_lengths(lists: List[List]) -> None:
 5 |     """
 6 |     Check if input lists are all of same length.
 7 |     Parameters
 8 |     ----------
 9 |     lists : List of lists of different possible types.
10 |     """
11 |     it = iter(lists)
12 |     the_len = len(next(it))
13 |     if not all(len(i) == the_len for i in it):
14 |         raise ValueError("Not all lists have same length!")
15 | 


--------------------------------------------------------------------------------
/bayesian_testing/utilities/logging.conf:
--------------------------------------------------------------------------------
 1 | [loggers]
 2 | keys=root,bayesian_testing
 3 | 
 4 | [handlers]
 5 | keys=consoleHandler
 6 | 
 7 | [formatters]
 8 | keys=simpleFormatter
 9 | 
10 | [logger_root]
11 | level=INFO
12 | handlers=consoleHandler
13 | 
14 | [logger_bayesian_testing]
15 | level=INFO
16 | handlers=consoleHandler
17 | qualname=bayesian_testing
18 | propagate=0
19 | 
20 | [handler_consoleHandler]
21 | class=StreamHandler
22 | level=INFO
23 | formatter=simpleFormatter
24 | args=(sys.stdout,)
25 | 
26 | [formatter_simpleFormatter]
27 | format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
28 | 


--------------------------------------------------------------------------------
/bayesian_testing/utilities/logging.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import logging.config
 3 | from os import path
 4 | 
 5 | log_file_path = path.join(path.dirname(path.abspath(__file__)), "logging.conf")
 6 | 
 7 | logging.config.fileConfig(log_file_path, disable_existing_loggers=False)
 8 | 
 9 | 
10 | def get_logger(logger_name):
11 |     return logging.getLogger(logger_name)
12 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Matt52/bayesian-testing/cea9afa5d7e3321d159d7b387ff57803467a18d5/codecov.yml


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 | 


--------------------------------------------------------------------------------
/examples/dice_rolls_ab_testing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "45ce22be-8ae0-4b0e-bce5-9e9aab6f105f",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "from IPython.core.interactiveshell import InteractiveShell\n",
 13 |     "InteractiveShell.ast_node_interactivity = \"all\"\n",
 14 |     "\n",
 15 |     "import numpy as np\n",
 16 |     "np.set_printoptions(legacy=\"1.25\")\n",
 17 |     "import pandas as pd\n",
 18 |     "from bayesian_testing.experiments import DiscreteDataTest"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "id": "f888f299-69a0-4f3c-bd57-af3a59bedba0",
 25 |    "metadata": {
 26 |     "tags": []
 27 |    },
 28 |    "outputs": [
 29 |     {
 30 |      "data": {
 31 |       "text/plain": [
 32 |        "Generator(PCG64) at 0x132BA19E0"
 33 |       ]
 34 |      },
 35 |      "execution_count": 2,
 36 |      "metadata": {},
 37 |      "output_type": "execute_result"
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "np.random.default_rng(52)\n",
 42 |     "\n",
 43 |     "values = [1,2,3,4,5,6]"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 3,
 49 |    "id": "24a15d66-d928-432c-beb3-e25e3be10cc0",
 50 |    "metadata": {
 51 |     "tags": []
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "die_A_rolls = list(np.random.choice(values, 1000, p=[1/6, 1/6, 1/6, 1/6, 1/6, 1/6]))\n",
 56 |     "die_B_rolls = list(np.random.choice(values, 1200, p=[0.2, 0.2, 0.1, 0.1, 0.2, 0.2]))\n",
 57 |     "die_C_rolls = list(np.random.choice(values, 500, p=[0.2, 0.1, 0.1, 0.2, 0.2, 0.2]))"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 4,
 63 |    "id": "35989040-af25-4129-9678-de04c0397c32",
 64 |    "metadata": {
 65 |     "tags": []
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "discrete_test = DiscreteDataTest(values)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 5,
 75 |    "id": "e902885d-7382-42c8-af7f-1d82fba06bb4",
 76 |    "metadata": {
 77 |     "tags": []
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "discrete_test.add_variant_data('A', die_A_rolls)\n",
 82 |     "discrete_test.add_variant_data('B', die_B_rolls)\n",
 83 |     "discrete_test.add_variant_data('C', die_C_rolls)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 6,
 89 |    "id": "a3ee97a1-d48c-407b-b13c-5cfb11e6591f",
 90 |    "metadata": {
 91 |     "tags": []
 92 |    },
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "{'A': {'concentration': [168.0, 166.0, 176.0, 172.0, 168.0, 150.0],\n",
 98 |        "  'prior': [1, 1, 1, 1, 1, 1]},\n",
 99 |        " 'B': {'concentration': [256.0, 246.0, 111.0, 116.0, 239.0, 232.0],\n",
100 |        "  'prior': [1, 1, 1, 1, 1, 1]},\n",
101 |        " 'C': {'concentration': [84.0, 57.0, 58.0, 100.0, 100.0, 101.0],\n",
102 |        "  'prior': [1, 1, 1, 1, 1, 1]}}"
103 |       ]
104 |      },
105 |      "execution_count": 6,
106 |      "metadata": {},
107 |      "output_type": "execute_result"
108 |     }
109 |    ],
110 |    "source": [
111 |     "discrete_test.data"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 7,
117 |    "id": "23484578-dc84-4325-9aa0-7a1498ee161b",
118 |    "metadata": {
119 |     "tags": []
120 |    },
121 |    "outputs": [
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "{'A': 0.00065, 'B': 0.00035, 'C': 0.999}"
126 |       ]
127 |      },
128 |      "execution_count": 7,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "discrete_test.probabs_of_being_best(sim_count = 20000, seed=52)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 8,
140 |    "id": "7001814b-7705-420a-813d-b65393e68288",
141 |    "metadata": {
142 |     "tags": []
143 |    },
144 |    "outputs": [
145 |     {
146 |      "data": {
147 |       "text/plain": [
148 |        "{'A': 0.2964593, 'B': 0.309296, 'C': 3.45e-05}"
149 |       ]
150 |      },
151 |      "execution_count": 8,
152 |      "metadata": {},
153 |      "output_type": "execute_result"
154 |     }
155 |    ],
156 |    "source": [
157 |     "discrete_test.expected_loss(sim_count = 20000, seed=52)"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 9,
163 |    "id": "86cb2b3b-cc93-489f-ae1d-7becac229c33",
164 |    "metadata": {
165 |     "tags": []
166 |    },
167 |    "outputs": [
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
173 |       "| variant   | concentration                                                |   average_value |   posterior_mean | credible_interval      |   prob_being_best |   expected_loss |\n",
174 |       "+===========+==============================================================+=================+==================+========================+===================+=================+\n",
175 |       "| A         | {1: 168.0, 2: 166.0, 3: 176.0, 4: 172.0, 5: 168.0, 6: 150.0} |         3.456   |          3.45626 | [3.3530612, 3.559381]  |            0.0006 |        0.296753 |\n",
176 |       "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
177 |       "| B         | {1: 256.0, 2: 246.0, 3: 111.0, 4: 116.0, 5: 239.0, 6: 232.0} |         3.44333 |          3.44362 | [3.3386877, 3.5493953] |            0.0006 |        0.309481 |\n",
178 |       "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
179 |       "| C         | {1: 84.0, 2: 57.0, 3: 58.0, 4: 100.0, 5: 100.0, 6: 101.0}    |         3.756   |          3.75296 | [3.5993774, 3.904388]  |            0.9988 |        4.27e-05 |\n",
180 |       "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n"
181 |      ]
182 |     }
183 |    ],
184 |    "source": [
185 |     "results = discrete_test.evaluate()\n",
186 |     "print(pd.DataFrame(results).to_markdown(tablefmt=\"grid\", index=False))"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 10,
192 |    "id": "616e35ba-26d3-4d10-ad65-4dc37e5771a6",
193 |    "metadata": {
194 |     "tags": []
195 |    },
196 |    "outputs": [
197 |     {
198 |      "data": {
199 |       "text/plain": [
200 |        "{'A': 0.4319, 'B': 0.568, 'C': 0.0001}"
201 |       ]
202 |      },
203 |      "execution_count": 10,
204 |      "metadata": {},
205 |      "output_type": "execute_result"
206 |     },
207 |     {
208 |      "data": {
209 |       "text/plain": [
210 |        "{'A': 0.0371495, 'B': 0.0243128, 'C': 0.3335743}"
211 |       ]
212 |      },
213 |      "execution_count": 10,
214 |      "metadata": {},
215 |      "output_type": "execute_result"
216 |     },
217 |     {
218 |      "name": "stdout",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
222 |       "| variant   | concentration                                                |   average_value |   posterior_mean | credible_interval      |   prob_being_best |   expected_loss |\n",
223 |       "+===========+==============================================================+=================+==================+========================+===================+=================+\n",
224 |       "| A         | {1: 168.0, 2: 166.0, 3: 176.0, 4: 172.0, 5: 168.0, 6: 150.0} |         3.456   |          3.45626 | [3.3515318, 3.5614544] |           0.4304  |       0.0370878 |\n",
225 |       "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
226 |       "| B         | {1: 256.0, 2: 246.0, 3: 111.0, 4: 116.0, 5: 239.0, 6: 232.0} |         3.44333 |          3.44362 | [3.3376023, 3.5515158] |           0.56955 |       0.0246001 |\n",
227 |       "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
228 |       "| C         | {1: 84.0, 2: 57.0, 3: 58.0, 4: 100.0, 5: 100.0, 6: 101.0}    |         3.756   |          3.75296 | [3.6002351, 3.9037053] |           5e-05   |       0.33356   |\n",
229 |       "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n"
230 |      ]
231 |     }
232 |    ],
233 |    "source": [
234 |     "# reversed test (where minimum is best)\n",
235 |     "discrete_test.probabs_of_being_best(sim_count = 20000, seed=52, min_is_best=True)\n",
236 |     "discrete_test.expected_loss(sim_count = 20000, seed=52, min_is_best=True)\n",
237 |     "results_min = discrete_test.evaluate(min_is_best=True)\n",
238 |     "print(pd.DataFrame(results_min).to_markdown(tablefmt=\"grid\", index=False))"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "id": "19b56eb8-143e-47aa-9a22-a2473f91cfa1",
245 |    "metadata": {},
246 |    "outputs": [],
247 |    "source": []
248 |   }
249 |  ],
250 |  "metadata": {
251 |   "kernelspec": {
252 |    "display_name": "Python 3 (ipykernel)",
253 |    "language": "python",
254 |    "name": "python3"
255 |   },
256 |   "language_info": {
257 |    "codemirror_mode": {
258 |     "name": "ipython",
259 |     "version": 3
260 |    },
261 |    "file_extension": ".py",
262 |    "mimetype": "text/x-python",
263 |    "name": "python",
264 |    "nbconvert_exporter": "python",
265 |    "pygments_lexer": "ipython3",
266 |    "version": "3.10.12"
267 |   }
268 |  },
269 |  "nbformat": 4,
270 |  "nbformat_minor": 5
271 | }
272 | 


--------------------------------------------------------------------------------
/examples/goals_scored_ab_testing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "c5f8cedc-94d6-4805-90d4-466d4de6b293",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "from IPython.core.interactiveshell import InteractiveShell\n",
 13 |     "InteractiveShell.ast_node_interactivity = \"all\"\n",
 14 |     "\n",
 15 |     "import numpy as np\n",
 16 |     "import pandas as pd\n",
 17 |     "from bayesian_testing.experiments import PoissonDataTest"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "id": "8e57546e-4b90-4c89-8668-aafe4aff6485",
 24 |    "metadata": {
 25 |     "tags": []
 26 |    },
 27 |    "outputs": [
 28 |     {
 29 |      "data": {
 30 |       "text/plain": [
 31 |        "Generator(PCG64) at 0x111F9C660"
 32 |       ]
 33 |      },
 34 |      "execution_count": 2,
 35 |      "metadata": {},
 36 |      "output_type": "execute_result"
 37 |     }
 38 |    ],
 39 |    "source": [
 40 |     "np.random.default_rng(52)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "id": "1cc3f939-43a0-4d19-af63-9ae632861dee",
 47 |    "metadata": {
 48 |     "tags": []
 49 |    },
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "# goals scored - more is better (duh...)\n",
 53 |     "psg_goals_for = [5, 5, 7, 1, 3, 3, 1, 1, 2, 0, 1, 3, 4, 2, 5]\n",
 54 |     "city_goals_for = [2, 4, 3, 4, 6, 1, 3, 6, 4, 0, 3, 1, 2, 1]\n",
 55 |     "bayern_goals_for = [6, 2, 7, 1, 1, 2, 0, 4, 2, 5, 2, 6, 3, 6, 2]\n",
 56 |     "\n",
 57 |     "\n",
 58 |     "# goals received - so less is better\n",
 59 |     "psg_goals_against = [0, 2, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 3, 1, 0]\n",
 60 |     "city_goals_against = [0, 0, 3, 2, 0, 1, 0, 3, 0, 1, 1, 0, 1, 2]\n",
 61 |     "bayern_goals_against = [1, 0, 0, 1, 1, 2, 1, 0, 2, 0, 0, 2, 2, 1, 0]"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 4,
 67 |    "id": "fe532f01-6c91-4462-9213-e33379be1f9e",
 68 |    "metadata": {
 69 |     "tags": []
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "# Poisson test for \"goals for\"\n",
 74 |     "poisson_test_gf = PoissonDataTest()\n",
 75 |     "poisson_test_gf.add_variant_data('psg', psg_goals_for)\n",
 76 |     "# adding \"city\" with effective sample size 10 and the prior mean 2 (20/10):\n",
 77 |     "poisson_test_gf.add_variant_data('city', city_goals_for, a_prior=20, b_prior=10)\n",
 78 |     "# adding \"bayern\" with aggregated data instead of list of all observations\n",
 79 |     "poisson_test_gf.add_variant_data_agg('bayern', totals=len(bayern_goals_for), sum_values=sum(bayern_goals_for))\n",
 80 |     "\n",
 81 |     "\n",
 82 |     "# Poisson test for \"goals against\"\n",
 83 |     "poisson_test_ga = PoissonDataTest()\n",
 84 |     "poisson_test_ga.add_variant_data('psg', psg_goals_against)\n",
 85 |     "poisson_test_ga.add_variant_data('city', city_goals_against)\n",
 86 |     "poisson_test_ga.add_variant_data('bayern', bayern_goals_against)"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 5,
 92 |    "id": "93cd6353-01c6-4873-a62e-9816932679fe",
 93 |    "metadata": {
 94 |     "tags": []
 95 |    },
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
102 |       "| variant   |   totals |   sum_values |   observed_average |   posterior_mean | credible_interval      |   prob_being_best |   expected_loss |\n",
103 |       "+===========+==========+==============+====================+==================+========================+===================+=================+\n",
104 |       "| psg       |       15 |           43 |            2.86667 |          2.8543  | [2.0701365, 3.7817813] |           0.24485 |        0.512094 |\n",
105 |       "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
106 |       "| city      |       14 |           40 |            2.85714 |          2.5     | [1.9035733, 3.1737824] |           0.04655 |        0.870001 |\n",
107 |       "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
108 |       "| bayern    |       15 |           49 |            3.26667 |          3.25166 | [2.4038302, 4.2176997] |           0.7086  |        0.109746 |\n",
109 |       "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n"
110 |      ]
111 |     }
112 |    ],
113 |    "source": [
114 |     "# poisson_test_gf.probabs_of_being_best(sim_count = 20000, seed=52)\n",
115 |     "# poisson_test_gf.expected_loss(sim_count = 20000, seed=52)\n",
116 |     "results_gf = poisson_test_gf.evaluate()\n",
117 |     "print(pd.DataFrame(results_gf).to_markdown(tablefmt=\"grid\", index=False))"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 6,
123 |    "id": "39220217-6553-4f88-b537-064ade561996",
124 |    "metadata": {
125 |     "tags": []
126 |    },
127 |    "outputs": [
128 |     {
129 |      "name": "stdout",
130 |      "output_type": "stream",
131 |      "text": [
132 |       "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
133 |       "| variant   |   totals |   sum_values |   observed_average |   posterior_mean | credible_interval      |   prob_being_best |   expected_loss |\n",
134 |       "+===========+==========+==============+====================+==================+========================+===================+=================+\n",
135 |       "| psg       |       15 |            9 |            0.6     |          0.60265 | [0.2140532, 1.2324781] |           0.756   |       0.0425375 |\n",
136 |       "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
137 |       "| city      |       14 |           14 |            1       |          1       | [0.4487859, 1.8478473] |           0.07585 |       0.439937  |\n",
138 |       "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
139 |       "| bayern    |       15 |           13 |            0.86667 |          0.86755 | [0.3680665, 1.6067354] |           0.16815 |       0.30884   |\n",
140 |       "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n"
141 |      ]
142 |     }
143 |    ],
144 |    "source": [
145 |     "# poisson_test_ga.probabs_of_being_best(sim_count = 20000, seed=52, min_is_best=True)\n",
146 |     "# poisson_test_ga.expected_loss(sim_count = 20000, seed=52, min_is_best=True)\n",
147 |     "results_ga = poisson_test_ga.evaluate(min_is_best=True, interval_alpha=0.99)\n",
148 |     "print(pd.DataFrame(results_ga).to_markdown(tablefmt=\"grid\", index=False))"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": null,
154 |    "id": "d36d68d3-d119-49a4-b757-016da25f6f28",
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": []
158 |   }
159 |  ],
160 |  "metadata": {
161 |   "kernelspec": {
162 |    "display_name": "Python 3 (ipykernel)",
163 |    "language": "python",
164 |    "name": "python3"
165 |   },
166 |   "language_info": {
167 |    "codemirror_mode": {
168 |     "name": "ipython",
169 |     "version": 3
170 |    },
171 |    "file_extension": ".py",
172 |    "mimetype": "text/x-python",
173 |    "name": "python",
174 |    "nbconvert_exporter": "python",
175 |    "pygments_lexer": "ipython3",
176 |    "version": "3.10.12"
177 |   }
178 |  },
179 |  "nbformat": 4,
180 |  "nbformat_minor": 5
181 | }
182 | 


--------------------------------------------------------------------------------
/examples/session_data_manual_pbbs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "8b11e1e0-ccc4-4fc9-9cdd-9f906e64b1c7",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "from IPython.core.interactiveshell import InteractiveShell\n",
 13 |     "InteractiveShell.ast_node_interactivity = \"all\"\n",
 14 |     "\n",
 15 |     "import numpy as np\n",
 16 |     "np.set_printoptions(legacy=\"1.25\")\n",
 17 |     "import pandas as pd\n",
 18 |     "from bayesian_testing.metrics import eval_bernoulli_agg, eval_delta_lognormal_agg"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "id": "b31da712-cdb9-4671-b3ed-63e351896915",
 25 |    "metadata": {
 26 |     "tags": []
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "df = pd.read_csv(\"data/session_data.csv\")"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "id": "6032fc6e-3a4c-47f3-830f-1a85d49c253c",
 37 |    "metadata": {
 38 |     "tags": []
 39 |    },
 40 |    "outputs": [
 41 |     {
 42 |      "data": {
 43 |       "text/plain": [
 44 |        "94500"
 45 |       ]
 46 |      },
 47 |      "execution_count": 3,
 48 |      "metadata": {},
 49 |      "output_type": "execute_result"
 50 |     },
 51 |     {
 52 |      "data": {
 53 |       "text/html": [
 54 |        "<div>\n",
 55 |        "<style scoped>\n",
 56 |        "    .dataframe tbody tr th:only-of-type {\n",
 57 |        "        vertical-align: middle;\n",
 58 |        "    }\n",
 59 |        "\n",
 60 |        "    .dataframe tbody tr th {\n",
 61 |        "        vertical-align: top;\n",
 62 |        "    }\n",
 63 |        "\n",
 64 |        "    .dataframe thead th {\n",
 65 |        "        text-align: right;\n",
 66 |        "    }\n",
 67 |        "</style>\n",
 68 |        "<table border=\"1\" class=\"dataframe\">\n",
 69 |        "  <thead>\n",
 70 |        "    <tr style=\"text-align: right;\">\n",
 71 |        "      <th></th>\n",
 72 |        "      <th>conversion</th>\n",
 73 |        "      <th>date</th>\n",
 74 |        "      <th>revenue</th>\n",
 75 |        "      <th>source</th>\n",
 76 |        "      <th>variant</th>\n",
 77 |        "    </tr>\n",
 78 |        "  </thead>\n",
 79 |        "  <tbody>\n",
 80 |        "    <tr>\n",
 81 |        "      <th>0</th>\n",
 82 |        "      <td>0</td>\n",
 83 |        "      <td>2021-08-07</td>\n",
 84 |        "      <td>0.000000</td>\n",
 85 |        "      <td>desktop</td>\n",
 86 |        "      <td>B</td>\n",
 87 |        "    </tr>\n",
 88 |        "    <tr>\n",
 89 |        "      <th>1</th>\n",
 90 |        "      <td>1</td>\n",
 91 |        "      <td>2021-08-05</td>\n",
 92 |        "      <td>7.241015</td>\n",
 93 |        "      <td>desktop</td>\n",
 94 |        "      <td>C</td>\n",
 95 |        "    </tr>\n",
 96 |        "    <tr>\n",
 97 |        "      <th>2</th>\n",
 98 |        "      <td>0</td>\n",
 99 |        "      <td>2021-08-06</td>\n",
100 |        "      <td>0.000000</td>\n",
101 |        "      <td>desktop</td>\n",
102 |        "      <td>A</td>\n",
103 |        "    </tr>\n",
104 |        "    <tr>\n",
105 |        "      <th>3</th>\n",
106 |        "      <td>0</td>\n",
107 |        "      <td>2021-08-05</td>\n",
108 |        "      <td>0.000000</td>\n",
109 |        "      <td>desktop</td>\n",
110 |        "      <td>C</td>\n",
111 |        "    </tr>\n",
112 |        "    <tr>\n",
113 |        "      <th>4</th>\n",
114 |        "      <td>0</td>\n",
115 |        "      <td>2021-08-03</td>\n",
116 |        "      <td>0.000000</td>\n",
117 |        "      <td>desktop</td>\n",
118 |        "      <td>A</td>\n",
119 |        "    </tr>\n",
120 |        "  </tbody>\n",
121 |        "</table>\n",
122 |        "</div>"
123 |       ],
124 |       "text/plain": [
125 |        "   conversion        date   revenue   source variant\n",
126 |        "0           0  2021-08-07  0.000000  desktop       B\n",
127 |        "1           1  2021-08-05  7.241015  desktop       C\n",
128 |        "2           0  2021-08-06  0.000000  desktop       A\n",
129 |        "3           0  2021-08-05  0.000000  desktop       C\n",
130 |        "4           0  2021-08-03  0.000000  desktop       A"
131 |       ]
132 |      },
133 |      "execution_count": 3,
134 |      "metadata": {},
135 |      "output_type": "execute_result"
136 |     }
137 |    ],
138 |    "source": [
139 |     "# example session data - each row represent one session\n",
140 |     "len(df)\n",
141 |     "df.head()"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 4,
147 |    "id": "744e5833-cbc3-45d3-963d-11c2a92acff2",
148 |    "metadata": {
149 |     "tags": []
150 |    },
151 |    "outputs": [
152 |     {
153 |      "data": {
154 |       "text/html": [
155 |        "<div>\n",
156 |        "<style scoped>\n",
157 |        "    .dataframe tbody tr th:only-of-type {\n",
158 |        "        vertical-align: middle;\n",
159 |        "    }\n",
160 |        "\n",
161 |        "    .dataframe tbody tr th {\n",
162 |        "        vertical-align: top;\n",
163 |        "    }\n",
164 |        "\n",
165 |        "    .dataframe thead th {\n",
166 |        "        text-align: right;\n",
167 |        "    }\n",
168 |        "</style>\n",
169 |        "<table border=\"1\" class=\"dataframe\">\n",
170 |        "  <thead>\n",
171 |        "    <tr style=\"text-align: right;\">\n",
172 |        "      <th></th>\n",
173 |        "      <th>sessions</th>\n",
174 |        "      <th>conversions</th>\n",
175 |        "      <th>revenue</th>\n",
176 |        "      <th>conversion_rate</th>\n",
177 |        "      <th>revenue_per_session</th>\n",
178 |        "      <th>revenue_per_converted_sessions</th>\n",
179 |        "    </tr>\n",
180 |        "    <tr>\n",
181 |        "      <th>variant</th>\n",
182 |        "      <th></th>\n",
183 |        "      <th></th>\n",
184 |        "      <th></th>\n",
185 |        "      <th></th>\n",
186 |        "      <th></th>\n",
187 |        "      <th></th>\n",
188 |        "    </tr>\n",
189 |        "  </thead>\n",
190 |        "  <tbody>\n",
191 |        "    <tr>\n",
192 |        "      <th>A</th>\n",
193 |        "      <td>31500</td>\n",
194 |        "      <td>1580</td>\n",
195 |        "      <td>30830.025613</td>\n",
196 |        "      <td>0.050159</td>\n",
197 |        "      <td>0.978731</td>\n",
198 |        "      <td>19.512674</td>\n",
199 |        "    </tr>\n",
200 |        "    <tr>\n",
201 |        "      <th>B</th>\n",
202 |        "      <td>32000</td>\n",
203 |        "      <td>1700</td>\n",
204 |        "      <td>35203.216888</td>\n",
205 |        "      <td>0.053125</td>\n",
206 |        "      <td>1.100101</td>\n",
207 |        "      <td>20.707775</td>\n",
208 |        "    </tr>\n",
209 |        "    <tr>\n",
210 |        "      <th>C</th>\n",
211 |        "      <td>31000</td>\n",
212 |        "      <td>1550</td>\n",
213 |        "      <td>37259.563364</td>\n",
214 |        "      <td>0.050000</td>\n",
215 |        "      <td>1.201921</td>\n",
216 |        "      <td>24.038428</td>\n",
217 |        "    </tr>\n",
218 |        "  </tbody>\n",
219 |        "</table>\n",
220 |        "</div>"
221 |       ],
222 |       "text/plain": [
223 |        "         sessions  conversions       revenue  conversion_rate  \\\n",
224 |        "variant                                                         \n",
225 |        "A           31500         1580  30830.025613         0.050159   \n",
226 |        "B           32000         1700  35203.216888         0.053125   \n",
227 |        "C           31000         1550  37259.563364         0.050000   \n",
228 |        "\n",
229 |        "         revenue_per_session  revenue_per_converted_sessions  \n",
230 |        "variant                                                       \n",
231 |        "A                   0.978731                       19.512674  \n",
232 |        "B                   1.100101                       20.707775  \n",
233 |        "C                   1.201921                       24.038428  "
234 |       ]
235 |      },
236 |      "execution_count": 4,
237 |      "metadata": {},
238 |      "output_type": "execute_result"
239 |     }
240 |    ],
241 |    "source": [
242 |     "# summary statistics per variant\n",
243 |     "\n",
244 |     "summary = df.groupby('variant')[['variant', 'conversion', 'revenue']]\\\n",
245 |     "            .agg({'variant': 'count', 'conversion': 'sum','revenue': 'sum'})\\\n",
246 |     "            .rename(columns = {'variant': 'sessions', 'conversion': 'conversions'})\n",
247 |     "\n",
248 |     "summary['conversion_rate'] = summary['conversions'] / summary['sessions']\n",
249 |     "summary['revenue_per_session'] = summary['revenue'] / summary['sessions']\n",
250 |     "summary['revenue_per_converted_sessions'] = summary['revenue'] / summary['conversions']\n",
251 |     "\n",
252 |     "summary"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": 5,
258 |    "id": "be57dc82-8958-4118-aab0-71122490d17a",
259 |    "metadata": {
260 |     "tags": []
261 |    },
262 |    "outputs": [
263 |     {
264 |      "data": {
265 |       "text/plain": [
266 |        "[31500, 32000, 31000]"
267 |       ]
268 |      },
269 |      "execution_count": 5,
270 |      "metadata": {},
271 |      "output_type": "execute_result"
272 |     },
273 |     {
274 |      "data": {
275 |       "text/plain": [
276 |        "[1580, 1700, 1550]"
277 |       ]
278 |      },
279 |      "execution_count": 5,
280 |      "metadata": {},
281 |      "output_type": "execute_result"
282 |     },
283 |     {
284 |      "data": {
285 |       "text/plain": [
286 |        "[3831.806394737816, 4211.72986767986, 4055.965234848171]"
287 |       ]
288 |      },
289 |      "execution_count": 5,
290 |      "metadata": {},
291 |      "output_type": "execute_result"
292 |     },
293 |     {
294 |      "data": {
295 |       "text/plain": [
296 |        "[11029.923165846496, 12259.51868396913, 12357.911862914]"
297 |       ]
298 |      },
299 |      "execution_count": 5,
300 |      "metadata": {},
301 |      "output_type": "execute_result"
302 |     }
303 |    ],
304 |    "source": [
305 |     "variant_A = df['revenue'][(df.variant == 'A')].values\n",
306 |     "variant_B = df['revenue'][(df.variant == 'B')].values\n",
307 |     "variant_C = df['revenue'][(df.variant == 'C')].values\n",
308 |     "\n",
309 |     "sessions = [\n",
310 |     "    variant_A.size,\n",
311 |     "    variant_B.size,\n",
312 |     "    variant_C.size\n",
313 |     "]\n",
314 |     "\n",
315 |     "conversions = [\n",
316 |     "    sum(variant_A > 0),\n",
317 |     "    sum(variant_B > 0),\n",
318 |     "    sum(variant_C > 0)\n",
319 |     "]\n",
320 |     "\n",
321 |     "sum_log_revenue = [\n",
322 |     "    np.log(variant_A[variant_A > 0]).sum(),\n",
323 |     "    np.log(variant_B[variant_B > 0]).sum(),\n",
324 |     "    np.log(variant_C[variant_C > 0]).sum()\n",
325 |     "]\n",
326 |     "\n",
327 |     "sum_log_2_revenue = [\n",
328 |     "    np.square(np.log(variant_A[variant_A > 0])).sum(),\n",
329 |     "    np.square(np.log(variant_B[variant_B > 0])).sum(),\n",
330 |     "    np.square(np.log(variant_C[variant_C > 0])).sum()\n",
331 |     "]\n",
332 |     "\n",
333 |     "sessions\n",
334 |     "conversions\n",
335 |     "sum_log_revenue\n",
336 |     "sum_log_2_revenue"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "markdown",
341 |    "id": "a68cc3a7-1c6e-40c4-b5af-59a7fb9fb548",
342 |    "metadata": {},
343 |    "source": [
344 |     "## Results"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "code",
349 |    "execution_count": 6,
350 |    "id": "3ade0625-fb50-434f-93f6-e70c3c543713",
351 |    "metadata": {
352 |     "tags": []
353 |    },
354 |    "outputs": [
355 |     {
356 |      "name": "stdout",
357 |      "output_type": "stream",
358 |      "text": [
359 |       "Probabilities of being best: [0.043, 0.92335, 0.03365]\n",
360 |       "Expected loss: [0.0030022, 5.89e-05, 0.0031487]\n",
361 |       "95% credible intervals: [[0.0477987, 0.0525911], [0.0506903, 0.0556017], [0.0476257, 0.0524881]]\n"
362 |      ]
363 |     }
364 |    ],
365 |    "source": [
366 |     "# conversion rate probabilities of being best, expected loss and credible intervals for each variant\n",
367 |     "pbbs, loss, intervals = eval_bernoulli_agg(sessions, conversions)\n",
368 |     "print(f\"Probabilities of being best: {pbbs}\")\n",
369 |     "print(f\"Expected loss: {loss}\")\n",
370 |     "print(f\"95% credible intervals: {intervals}\")"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "code",
375 |    "execution_count": 7,
376 |    "id": "21c3ae3c-46b3-4bc7-bd33-5306d3e20506",
377 |    "metadata": {
378 |     "tags": []
379 |    },
380 |    "outputs": [
381 |     {
382 |      "name": "stdout",
383 |      "output_type": "stream",
384 |      "text": [
385 |       "Probabilities of being best: [0.0002, 0.03395, 0.96585]\n",
386 |       "Expected loss: [0.2212336, 0.1210695, 0.0008982]\n",
387 |       "95% credible intervals: [[0.9086416, 1.0649507], [1.0043019, 1.170394], [1.1094296, 1.3069562]]\n"
388 |      ]
389 |     }
390 |    ],
391 |    "source": [
392 |     "# revenue per session probabilities of being best, expected loss and credible intervals for each variant\n",
393 |     "pbbs, loss, intervals = eval_delta_lognormal_agg(sessions, conversions, sum_log_revenue, sum_log_2_revenue)\n",
394 |     "print(f\"Probabilities of being best: {pbbs}\")\n",
395 |     "print(f\"Expected loss: {loss}\")\n",
396 |     "print(f\"95% credible intervals: {intervals}\")"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "markdown",
401 |    "id": "bdb6c1f2-f144-4cfa-9808-b429ceed6354",
402 |    "metadata": {},
403 |    "source": [
404 |     "### Results for \"being best\" = \"being minimum\""
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": 8,
410 |    "id": "b651c9d9-6d51-4ad1-aabb-475296963a88",
411 |    "metadata": {
412 |     "tags": []
413 |    },
414 |    "outputs": [
415 |     {
416 |      "name": "stdout",
417 |      "output_type": "stream",
418 |      "text": [
419 |       "Probabilities of being best: [0.4572, 0.00945, 0.53335]\n",
420 |       "Expected loss: [0.0007868, 0.00374, 0.00062]\n",
421 |       "95% credible intervals: [[0.0478316, 0.0526332], [0.050685, 0.0556378], [0.0476584, 0.0524571]]\n"
422 |      ]
423 |     }
424 |    ],
425 |    "source": [
426 |     "# conversion rate probabilities of being best, expected loss and credible intervals for each variant\n",
427 |     "pbbs, loss, intervals = eval_bernoulli_agg(sessions, conversions, min_is_best=True)\n",
428 |     "print(f\"Probabilities of being best: {pbbs}\")\n",
429 |     "print(f\"Expected loss: {loss}\")\n",
430 |     "print(f\"95% credible intervals: {intervals}\")"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "code",
435 |    "execution_count": 9,
436 |    "id": "f6573b6b-314e-49de-ac63-c75201eac707",
437 |    "metadata": {
438 |     "tags": []
439 |    },
440 |    "outputs": [
441 |     {
442 |      "name": "stdout",
443 |      "output_type": "stream",
444 |      "text": [
445 |       "Probabilities of being best: [0.95695, 0.04285, 0.0002]\n",
446 |       "Expected loss: [0.0010886, 0.1012619, 0.2202282]\n",
447 |       "95% credible intervals: [[0.9073725, 1.0666041], [1.0044587, 1.1692741], [1.1082288, 1.305592]]\n"
448 |      ]
449 |     }
450 |    ],
451 |    "source": [
452 |     "# revenue per session probabilities of being best, expected loss and credible intervals for each variant\n",
453 |     "pbbs, loss, intervals = eval_delta_lognormal_agg(sessions, conversions, sum_log_revenue, sum_log_2_revenue, min_is_best=True)\n",
454 |     "print(f\"Probabilities of being best: {pbbs}\")\n",
455 |     "print(f\"Expected loss: {loss}\")\n",
456 |     "print(f\"95% credible intervals: {intervals}\")"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": null,
462 |    "id": "bd9b3af3-d34c-4781-a05a-d94a7bc7ee1c",
463 |    "metadata": {},
464 |    "outputs": [],
465 |    "source": []
466 |   }
467 |  ],
468 |  "metadata": {
469 |   "kernelspec": {
470 |    "display_name": "Python 3 (ipykernel)",
471 |    "language": "python",
472 |    "name": "python3"
473 |   },
474 |   "language_info": {
475 |    "codemirror_mode": {
476 |     "name": "ipython",
477 |     "version": 3
478 |    },
479 |    "file_extension": ".py",
480 |    "mimetype": "text/x-python",
481 |    "name": "python",
482 |    "nbconvert_exporter": "python",
483 |    "pygments_lexer": "ipython3",
484 |    "version": "3.10.12"
485 |   }
486 |  },
487 |  "nbformat": 4,
488 |  "nbformat_minor": 5
489 | }
490 | 


--------------------------------------------------------------------------------
/examples/waiting_time_ab_testing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "ffef50bb-d334-438c-b170-4d70c2d6d19e",
  7 |    "metadata": {
  8 |     "tags": []
  9 |    },
 10 |    "outputs": [],
 11 |    "source": [
 12 |     "from IPython.core.interactiveshell import InteractiveShell\n",
 13 |     "InteractiveShell.ast_node_interactivity = \"all\"\n",
 14 |     "\n",
 15 |     "import numpy as np\n",
 16 |     "np.set_printoptions(legacy=\"1.25\")\n",
 17 |     "import pandas as pd\n",
 18 |     "from bayesian_testing.experiments import ExponentialDataTest"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "id": "133748f0-26cd-4647-9cf3-e0b7646a51af",
 25 |    "metadata": {
 26 |     "tags": []
 27 |    },
 28 |    "outputs": [
 29 |     {
 30 |      "data": {
 31 |       "text/plain": [
 32 |        "Generator(PCG64) at 0x132F99AC0"
 33 |       ]
 34 |      },
 35 |      "execution_count": 2,
 36 |      "metadata": {},
 37 |      "output_type": "execute_result"
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "# optionally stabilize the random seed:\n",
 42 |     "np.random.default_rng(100)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 3,
 48 |    "id": "98cbecdc-69a9-48f4-a95e-d5c71644f00c",
 49 |    "metadata": {
 50 |     "tags": []
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# waiting times for 3 different variants, each with many observations\n",
 55 |     "# generated using exponential distributions with defined scales (expected values)\n",
 56 |     "waiting_times_a = np.random.exponential(scale=10, size=200)\n",
 57 |     "waiting_times_b = np.random.exponential(scale=11, size=210)\n",
 58 |     "waiting_times_c = np.random.exponential(scale=11, size=220)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "id": "6510ba7f-d854-4a88-b063-eb44fc59cf1b",
 65 |    "metadata": {
 66 |     "tags": []
 67 |    },
 68 |    "outputs": [
 69 |     {
 70 |      "data": {
 71 |       "text/plain": [
 72 |        "9.547258592723825"
 73 |       ]
 74 |      },
 75 |      "execution_count": 4,
 76 |      "metadata": {},
 77 |      "output_type": "execute_result"
 78 |     },
 79 |     {
 80 |      "data": {
 81 |       "text/plain": [
 82 |        "11.761611555402082"
 83 |       ]
 84 |      },
 85 |      "execution_count": 4,
 86 |      "metadata": {},
 87 |      "output_type": "execute_result"
 88 |     },
 89 |     {
 90 |      "data": {
 91 |       "text/plain": [
 92 |        "12.042807741815093"
 93 |       ]
 94 |      },
 95 |      "execution_count": 4,
 96 |      "metadata": {},
 97 |      "output_type": "execute_result"
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "np.mean(waiting_times_a)\n",
102 |     "np.mean(waiting_times_b)\n",
103 |     "np.mean(waiting_times_c)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 5,
109 |    "id": "fde94e6d-a05b-4863-8c85-002e623ca2fb",
110 |    "metadata": {
111 |     "tags": []
112 |    },
113 |    "outputs": [],
114 |    "source": [
115 |     "# Exponential A/B/C test\n",
116 |     "exponential_test = ExponentialDataTest()\n",
117 |     "exponential_test.add_variant_data('A', waiting_times_a)\n",
118 |     "exponential_test.add_variant_data('B', waiting_times_b)\n",
119 |     "exponential_test.add_variant_data('C', waiting_times_c)"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 6,
125 |    "id": "86c03a05-d091-4de5-a223-27efdfbe0615",
126 |    "metadata": {
127 |     "tags": []
128 |    },
129 |    "outputs": [
130 |     {
131 |      "name": "stdout",
132 |      "output_type": "stream",
133 |      "text": [
134 |       "+-----------+----------+--------------+--------------------+------------------+--------------------------+-------------------+-----------------+\n",
135 |       "| variant   |   totals |   sum_values |   observed_average |   posterior_mean | credible_interval        |   prob_being_best |   expected_loss |\n",
136 |       "+===========+==========+==============+====================+==================+==========================+===================+=================+\n",
137 |       "| A         |      200 |      1909.45 |            9.54726 |          9.54299 | [8.3546163, 11.024919]   |           0.97495 |       0.0094311 |\n",
138 |       "+-----------+----------+--------------+--------------------+------------------+--------------------------+-------------------+-----------------+\n",
139 |       "| B         |      210 |      2469.94 |           11.7616  |         11.7565  | [10.3265266, 13.5271393] |           0.0177  |       2.23267   |\n",
140 |       "+-----------+----------+--------------+--------------------+------------------+--------------------------+-------------------+-----------------+\n",
141 |       "| C         |      220 |      2649.42 |           12.0428  |         12.0378  | [10.5696647, 13.8087663] |           0.00735 |       2.50462   |\n",
142 |       "+-----------+----------+--------------+--------------------+------------------+--------------------------+-------------------+-----------------+\n"
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "# evaluate test (using min_is_best=True as a lower waiting time is better)\n",
148 |     "results = exponential_test.evaluate(min_is_best=True)\n",
149 |     "print(pd.DataFrame(results).to_markdown(tablefmt=\"grid\", index=False))"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "id": "5cbe6fe3-d6c8-422a-ab62-ffd87b345459",
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": []
159 |   }
160 |  ],
161 |  "metadata": {
162 |   "kernelspec": {
163 |    "display_name": "Python 3 (ipykernel)",
164 |    "language": "python",
165 |    "name": "python3"
166 |   },
167 |   "language_info": {
168 |    "codemirror_mode": {
169 |     "name": "ipython",
170 |     "version": 3
171 |    },
172 |    "file_extension": ".py",
173 |    "mimetype": "text/x-python",
174 |    "name": "python",
175 |    "nbconvert_exporter": "python",
176 |    "pygments_lexer": "ipython3",
177 |    "version": "3.10.12"
178 |   }
179 |  },
180 |  "nbformat": 4,
181 |  "nbformat_minor": 5
182 | }
183 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "bayesian-testing"
 3 | version = "0.9.1"
 4 | description = "Bayesian A/B testing with simple probabilities."
 5 | authors = ["Matus Baniar"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | homepage = "https://github.com/Matt52/bayesian-testing"
 9 | repository = "https://github.com/Matt52/bayesian-testing"
10 | keywords = ["ab testing", "bayes", "bayesian statistics"]
11 | include = [
12 |     "LICENSE",
13 | ]
14 | 
15 | packages = [
16 |     {include = "bayesian_testing"}
17 | ]
18 | 
19 | [tool.poetry.dependencies]
20 | python = ">=3.8"
21 | numpy = ">=1.19"
22 | 
23 | [tool.poetry.group.dev.dependencies]
24 | jupyter = ">=1.1"
25 | jupyterlab = ">=4.3"
26 | black = ">=23.1"
27 | pytest = ">=8.3"
28 | coverage = ">=7.6"
29 | pandas = ">=1.5"
30 | pre-commit = ">=3.1"
31 | isort = ">=5.10"
32 | tabulate = ">=0.9.0"
33 | setuptools = { version = "^78.1.1", markers = "python_version >= '3.9'" }
34 | tornado = { version = "^6.5.0", markers = "python_version >= '3.9'" }
35 | 
36 | [build-system]
37 | requires = ["poetry-core>=1.0.0"]
38 | build-backend = "poetry.core.masonry.api"
39 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Tests
 2 | 
 3 | Run in top level directory:
 4 | ```bash
 5 | python -m pytest
 6 | ```
 7 | 
 8 | or:
 9 | ```bash
10 | coverage run -m pytest
11 | coverage report
12 | coverage html
13 | ```
14 | 


--------------------------------------------------------------------------------
/tests/test_binary.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from bayesian_testing.experiments import BinaryDataTest
  4 | 
  5 | 
  6 | @pytest.fixture
  7 | def conv_test():
  8 |     cv = BinaryDataTest()
  9 |     cv.add_variant_data("A", [0, 1, 0, 1, 0, 0, 0, 0, 0, 1])
 10 |     cv.add_variant_data("B", [0, 0, 0, 1, 0, 0, 0, 0, 0, 1])
 11 |     cv.add_variant_data_agg("C", 11, 2, a_prior=1, b_prior=2)
 12 |     cv.add_variant_data_agg("D", 10, 10)
 13 |     cv.add_variant_data_agg("D", 20, 20, replace=False)
 14 |     cv.add_variant_data_agg("D", 20, 20, replace=True)
 15 |     cv.delete_variant("D")
 16 |     return cv
 17 | 
 18 | 
 19 | def test_variants(conv_test):
 20 |     assert conv_test.variant_names == ["A", "B", "C"]
 21 | 
 22 | 
 23 | def test_totals(conv_test):
 24 |     assert conv_test.totals == [10, 10, 11]
 25 | 
 26 | 
 27 | def test_positives(conv_test):
 28 |     assert conv_test.positives == [3, 2, 2]
 29 | 
 30 | 
 31 | def test_a_priors(conv_test):
 32 |     assert conv_test.a_priors == [0.5, 0.5, 1]
 33 | 
 34 | 
 35 | def test_b_priors(conv_test):
 36 |     assert conv_test.b_priors == [0.5, 0.5, 2]
 37 | 
 38 | 
 39 | def test_probabs_of_being_best(conv_test):
 40 |     pbbs = conv_test.probabs_of_being_best(sim_count=20000, seed=52)
 41 |     assert pbbs == {"A": 0.57225, "B": 0.233, "C": 0.19475}
 42 | 
 43 | 
 44 | def test_expected_loss(conv_test):
 45 |     loss = conv_test.expected_loss(sim_count=20000, seed=52)
 46 |     assert loss == {"A": 0.0529281, "B": 0.1452113, "C": 0.1557502}
 47 | 
 48 | 
 49 | def test_credible_intervals_95(conv_test):
 50 |     ci = conv_test.credible_intervals(sim_count=20000, seed=52)
 51 |     assert ci == {
 52 |         "A": [0.0917579, 0.6028411],
 53 |         "B": [0.0442435, 0.5032699],
 54 |         "C": [0.0522996, 0.452392],
 55 |     }
 56 | 
 57 | 
 58 | def test_credible_intervals_99(conv_test):
 59 |     ci = conv_test.credible_intervals(sim_count=20000, seed=52, interval_alpha=0.99)
 60 |     assert ci == {
 61 |         "A": [0.0552614, 0.6892976],
 62 |         "B": [0.0214602, 0.6045644],
 63 |         "C": [0.0300364, 0.5320378],
 64 |     }
 65 | 
 66 | 
 67 | def test_evaluate(conv_test):
 68 |     eval_report = conv_test.evaluate(sim_count=20000, seed=52)
 69 |     assert eval_report == [
 70 |         {
 71 |             "variant": "A",
 72 |             "totals": 10,
 73 |             "positives": 3,
 74 |             "positive_rate": 0.3,
 75 |             "posterior_mean": 0.31818,
 76 |             "credible_interval": [0.0917579, 0.6028411],
 77 |             "prob_being_best": 0.57225,
 78 |             "expected_loss": 0.0529281,
 79 |         },
 80 |         {
 81 |             "variant": "B",
 82 |             "totals": 10,
 83 |             "positives": 2,
 84 |             "positive_rate": 0.2,
 85 |             "posterior_mean": 0.22727,
 86 |             "credible_interval": [0.0442435, 0.5032699],
 87 |             "prob_being_best": 0.233,
 88 |             "expected_loss": 0.1452113,
 89 |         },
 90 |         {
 91 |             "variant": "C",
 92 |             "totals": 11,
 93 |             "positives": 2,
 94 |             "positive_rate": 0.18182,
 95 |             "posterior_mean": 0.21429,
 96 |             "credible_interval": [0.0522996, 0.452392],
 97 |             "prob_being_best": 0.19475,
 98 |             "expected_loss": 0.1557502,
 99 |         },
100 |     ]
101 | 
102 | 
103 | def test_wrong_inputs():
104 |     cv = BinaryDataTest()
105 |     with pytest.raises(ValueError):
106 |         cv.add_variant_data(10, [1, 0, 1])
107 |     with pytest.raises(ValueError):
108 |         cv.add_variant_data("A", [1, 0, 1], a_prior=-1)
109 |     with pytest.raises(ValueError):
110 |         cv.add_variant_data_agg("A", -1, 7)
111 |     with pytest.raises(ValueError):
112 |         cv.add_variant_data_agg("A", 1, -7)
113 |     with pytest.raises(ValueError):
114 |         cv.add_variant_data("A", [])
115 |     with pytest.raises(ValueError):
116 |         cv.add_variant_data("A", [1, 2, 0])
117 | 
118 | 
119 | def test_wrong_credible_interval_input(conv_test):
120 |     with pytest.raises(ValueError):
121 |         conv_test.evaluate(interval_alpha=2)
122 |     with pytest.raises(ValueError):
123 |         conv_test.evaluate(interval_alpha=-1)
124 | 


--------------------------------------------------------------------------------
/tests/test_delta_lognormal.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from bayesian_testing.experiments import DeltaLognormalDataTest
  4 | 
  5 | 
  6 | @pytest.fixture
  7 | def rev_test():
  8 |     rev = DeltaLognormalDataTest()
  9 |     rev.add_variant_data_agg(
 10 |         "A", 31500, 1580, 30830.02561, 3831.806394737816, 11029.923165846496, a_prior_beta=1
 11 |     )
 12 |     rev.add_variant_data_agg(
 13 |         "B", 32000, 1700, 35203.21689, 4211.72986767986, 12259.51868396913, m_prior=2, w_prior=0.02
 14 |     )
 15 |     rev.add_variant_data_agg(
 16 |         "C",
 17 |         31000,
 18 |         1550,
 19 |         37259.56336,
 20 |         4055.965234848171,
 21 |         12357.911862914,
 22 |         a_prior_ig=1,
 23 |         b_prior_ig=2,
 24 |     )
 25 |     rev.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22])
 26 |     rev.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22], replace=False)
 27 |     rev.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22], replace=True)
 28 |     rev.delete_variant("D")
 29 |     return rev
 30 | 
 31 | 
 32 | def test_variants(rev_test):
 33 |     assert rev_test.variant_names == ["A", "B", "C"]
 34 | 
 35 | 
 36 | def test_totals(rev_test):
 37 |     assert rev_test.totals == [31500, 32000, 31000]
 38 | 
 39 | 
 40 | def test_positives(rev_test):
 41 |     assert rev_test.positives == [1580, 1700, 1550]
 42 | 
 43 | 
 44 | def test_sum_values(rev_test):
 45 |     assert rev_test.sum_values == [30830.02561, 35203.21689, 37259.56336]
 46 | 
 47 | 
 48 | def test_sum_logs(rev_test):
 49 |     assert [round(i, 5) for i in rev_test.sum_logs] == [3831.80639, 4211.72987, 4055.96523]
 50 | 
 51 | 
 52 | def test_sum_logs_2(rev_test):
 53 |     assert [round(i, 5) for i in rev_test.sum_logs_2] == [11029.92317, 12259.51868, 12357.91186]
 54 | 
 55 | 
 56 | def test_a_priors_beta(rev_test):
 57 |     assert rev_test.a_priors_beta == [1, 0.5, 0.5]
 58 | 
 59 | 
 60 | def test_b_priors_beta(rev_test):
 61 |     assert rev_test.b_priors_beta == [0.5, 0.5, 0.5]
 62 | 
 63 | 
 64 | def test_m_priors(rev_test):
 65 |     assert rev_test.m_priors == [1, 2, 1]
 66 | 
 67 | 
 68 | def test_a_priors_ig(rev_test):
 69 |     assert rev_test.a_priors_ig == [0, 0, 1]
 70 | 
 71 | 
 72 | def test_b_priors_ig(rev_test):
 73 |     assert rev_test.b_priors_ig == [0, 0, 2]
 74 | 
 75 | 
 76 | def test_w_priors(rev_test):
 77 |     assert rev_test.w_priors == [0.01, 0.02, 0.01]
 78 | 
 79 | 
 80 | def test_probabs_of_being_best(rev_test):
 81 |     pbbs = rev_test.probabs_of_being_best(sim_count=20000, seed=152)
 82 |     assert pbbs == {"A": 0.0004, "B": 0.03355, "C": 0.96605}
 83 | 
 84 | 
 85 | def test_expected_loss(rev_test):
 86 |     loss = rev_test.expected_loss(sim_count=20000, seed=152)
 87 |     assert loss == {"A": 0.2214416, "B": 0.1212818, "C": 0.0008639}
 88 | 
 89 | 
 90 | def test_credible_intervals_95(rev_test):
 91 |     ci = rev_test.credible_intervals(sim_count=20000, seed=152)
 92 |     assert ci == {
 93 |         "A": [0.9084717, 1.0661301],
 94 |         "B": [1.0038179, 1.1705975],
 95 |         "C": [1.1097381, 1.3084524],
 96 |     }
 97 | 
 98 | 
 99 | def test_credible_intervals_99(rev_test):
100 |     ci = rev_test.credible_intervals(sim_count=20000, seed=152, interval_alpha=0.99)
101 |     assert ci == {
102 |         "A": [0.8847602, 1.0948976],
103 |         "B": [0.9789665, 1.1996421],
104 |         "C": [1.0813447, 1.3416523],
105 |     }
106 | 
107 | 
108 | def test_evaluate(rev_test):
109 |     eval_report = rev_test.evaluate(sim_count=20000, seed=152)
110 |     assert eval_report == [
111 |         {
112 |             "variant": "A",
113 |             "totals": 31500,
114 |             "positives": 1580,
115 |             "sum_values": 30830.02561,
116 |             "avg_values": 0.97873,
117 |             "avg_positive_values": 19.51267,
118 |             "posterior_mean": 0.98309,
119 |             "credible_interval": [0.9084717, 1.0661301],
120 |             "prob_being_best": 0.0004,
121 |             "expected_loss": 0.2214416,
122 |         },
123 |         {
124 |             "variant": "B",
125 |             "totals": 32000,
126 |             "positives": 1700,
127 |             "sum_values": 35203.21689,
128 |             "avg_values": 1.1001,
129 |             "avg_positive_values": 20.70777,
130 |             "posterior_mean": 1.08266,
131 |             "credible_interval": [1.0038179, 1.1705975],
132 |             "prob_being_best": 0.03355,
133 |             "expected_loss": 0.1212818,
134 |         },
135 |         {
136 |             "variant": "C",
137 |             "totals": 31000,
138 |             "positives": 1550,
139 |             "sum_values": 37259.56336,
140 |             "avg_values": 1.20192,
141 |             "avg_positive_values": 24.03843,
142 |             "posterior_mean": 1.20276,
143 |             "credible_interval": [1.1097381, 1.3084524],
144 |             "prob_being_best": 0.96605,
145 |             "expected_loss": 0.0008639,
146 |         },
147 |     ]
148 | 
149 | 
150 | def test_wrong_inputs():
151 |     dl_test = DeltaLognormalDataTest()
152 |     with pytest.raises(ValueError):
153 |         dl_test.add_variant_data(10, [1, 2, 3])
154 |     with pytest.raises(ValueError):
155 |         dl_test.add_variant_data("A", [1, 2, 3], a_prior_beta=-1)
156 |     with pytest.raises(ValueError):
157 |         dl_test.add_variant_data("A", [])
158 |     with pytest.raises(ValueError):
159 |         dl_test.add_variant_data("A", [0, 0, 0])
160 |     with pytest.raises(ValueError):
161 |         dl_test.add_variant_data("C", [0, 10.7, -1])
162 | 


--------------------------------------------------------------------------------
/tests/test_delta_normal.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from bayesian_testing.experiments import DeltaNormalDataTest
  3 | 
  4 | 
  5 | @pytest.fixture
  6 | def delta_norm_test():
  7 |     delta_norm = DeltaNormalDataTest()
  8 |     delta_norm.add_variant_data_agg(
  9 |         name="A",
 10 |         totals=31500,
 11 |         non_zeros=10,
 12 |         sum_values=102.02561,
 13 |         sum_values_2=1700.8,
 14 |         a_prior_beta=1,
 15 |     )
 16 |     delta_norm.add_variant_data_agg(
 17 |         name="B",
 18 |         totals=32000,
 19 |         non_zeros=40,
 20 |         sum_values=273.02,
 21 |         sum_values_2=3567.5,
 22 |         a_prior_beta=0.02,
 23 |         m_prior=2,
 24 |         w_prior=0.02,
 25 |     )
 26 | 
 27 |     delta_norm.add_variant_data("C", [0, 10.7, -1, 8, 0, -3, 0, -10, 0, 11.22])
 28 |     delta_norm.add_variant_data("C", [0, 10.7, -1, 8, 0, -3, 0, -10, 0, 11.22], replace=False)
 29 |     delta_norm.add_variant_data("C", [0, 10.7, -1, 8, 0, -3, 0, -10, 0, 11.22], replace=True)
 30 |     delta_norm.delete_variant("C")
 31 |     return delta_norm
 32 | 
 33 | 
 34 | def test_variants(delta_norm_test):
 35 |     assert delta_norm_test.variant_names == ["A", "B"]
 36 | 
 37 | 
 38 | def test_totals(delta_norm_test):
 39 |     assert delta_norm_test.totals == [31500, 32000]
 40 | 
 41 | 
 42 | def test_non_zeros(delta_norm_test):
 43 |     assert delta_norm_test.non_zeros == [10, 40]
 44 | 
 45 | 
 46 | def test_sum_values(delta_norm_test):
 47 |     assert delta_norm_test.sum_values == [102.02561, 273.02]
 48 | 
 49 | 
 50 | def test_sum_values_2(delta_norm_test):
 51 |     assert delta_norm_test.sum_values_2 == [1700.8, 3567.5]
 52 | 
 53 | 
 54 | def test_a_priors_beta(delta_norm_test):
 55 |     assert delta_norm_test.a_priors_beta == [1, 0.02]
 56 | 
 57 | 
 58 | def test_b_priors_beta(delta_norm_test):
 59 |     assert delta_norm_test.b_priors_beta == [0.5, 0.5]
 60 | 
 61 | 
 62 | def test_m_priors(delta_norm_test):
 63 |     assert delta_norm_test.m_priors == [1, 2]
 64 | 
 65 | 
 66 | def test_a_priors_ig(delta_norm_test):
 67 |     assert delta_norm_test.a_priors_ig == [0, 0]
 68 | 
 69 | 
 70 | def test_b_priors_ig(delta_norm_test):
 71 |     assert delta_norm_test.b_priors_ig == [0, 0]
 72 | 
 73 | 
 74 | def test_w_priors(delta_norm_test):
 75 |     assert delta_norm_test.w_priors == [0.01, 0.02]
 76 | 
 77 | 
 78 | def test_probabs_of_being_best(delta_norm_test):
 79 |     pbbs = delta_norm_test.probabs_of_being_best(sim_count=20000, seed=152)
 80 |     assert pbbs == {"A": 0.02235, "B": 0.97765}
 81 | 
 82 | 
 83 | def test_expected_loss(delta_norm_test):
 84 |     loss = delta_norm_test.expected_loss(sim_count=20000, seed=152)
 85 |     assert loss == {"A": 0.005, "B": 2.46e-05}
 86 | 
 87 | 
 88 | def test_credible_intervals_95(delta_norm_test):
 89 |     ci = delta_norm_test.credible_intervals(sim_count=20000, seed=152)
 90 |     assert ci == {
 91 |         "A": [0.0011935, 0.0070944],
 92 |         "B": [0.0051651, 0.0125917],
 93 |     }
 94 | 
 95 | 
 96 | def test_credible_intervals_99(delta_norm_test):
 97 |     ci = delta_norm_test.credible_intervals(sim_count=20000, seed=152, interval_alpha=0.99)
 98 |     assert ci == {
 99 |         "A": [0.0006048, 0.0087352],
100 |         "B": [0.0043509, 0.0142946],
101 |     }
102 | 
103 | 
104 | def test_evaluate(delta_norm_test):
105 |     eval_report = delta_norm_test.evaluate(sim_count=20000, seed=152)
106 |     assert eval_report == [
107 |         {
108 |             "variant": "A",
109 |             "totals": 31500,
110 |             "non_zeros": 10,
111 |             "sum_values": 102.02561,
112 |             "avg_values": 0.00324,
113 |             "avg_non_zero_values": 10.20256,
114 |             "posterior_mean": 0.00356,
115 |             "credible_interval": [0.0011935, 0.0070944],
116 |             "prob_being_best": 0.02235,
117 |             "expected_loss": 0.005,
118 |         },
119 |         {
120 |             "variant": "B",
121 |             "totals": 32000,
122 |             "non_zeros": 40,
123 |             "sum_values": 273.02,
124 |             "avg_values": 0.00853,
125 |             "avg_non_zero_values": 6.8255,
126 |             "posterior_mean": 0.00853,
127 |             "credible_interval": [0.0051651, 0.0125917],
128 |             "prob_being_best": 0.97765,
129 |             "expected_loss": 2.46e-05,
130 |         },
131 |     ]
132 | 
133 | 
134 | def test_wrong_inputs():
135 |     dn_test = DeltaNormalDataTest()
136 |     with pytest.raises(ValueError):
137 |         dn_test.add_variant_data(10, [1, 2, 3])
138 |     with pytest.raises(ValueError):
139 |         dn_test.add_variant_data("A", [1, 2, 3], a_prior_beta=-1)
140 |     with pytest.raises(ValueError):
141 |         dn_test.add_variant_data_agg("A", 2, 3, 6, 21)
142 |     with pytest.raises(ValueError):
143 |         dn_test.add_variant_data_agg("A", 1, -7, 6, 21)
144 |     with pytest.raises(ValueError):
145 |         dn_test.add_variant_data("A", [])
146 |     with pytest.raises(ValueError):
147 |         dn_test.add_variant_data("A", [0, 0, 0])
148 |     with pytest.raises(ValueError):
149 |         dn_test.add_variant_data("C", [0, 10.7, -1], a_prior_ig=-1)
150 | 


--------------------------------------------------------------------------------
/tests/test_discrete.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from bayesian_testing.experiments import DiscreteDataTest
  4 | 
  5 | 
  6 | @pytest.fixture
  7 | def discrete_test():
  8 |     disc = DiscreteDataTest(states=[1, 2, 3, 4, 5, 6])
  9 |     disc.add_variant_data("A", [6, 5, 4, 4, 4, 2, 5, 4, 2, 1, 2, 5, 4, 6, 2, 3, 6, 2, 3, 6])
 10 |     disc.add_variant_data("B", [4, 6, 3, 6, 4, 6, 6, 1, 4, 1])
 11 |     disc.add_variant_data_agg("C", [10, 10, 10, 10, 10, 10], prior=[100, 100, 100, 100, 100, 100])
 12 |     disc.add_variant_data_agg("D", [1, 2, 3, 8, 10, 7])
 13 |     disc.add_variant_data_agg("D", [1, 2, 3, 8, 10, 6], replace=False)
 14 |     disc.add_variant_data_agg("D", [1, 2, 3, 8, 10, 6], replace=True)
 15 |     disc.delete_variant("D")
 16 |     return disc
 17 | 
 18 | 
 19 | def test_variants(discrete_test):
 20 |     assert discrete_test.variant_names == ["A", "B", "C"]
 21 | 
 22 | 
 23 | def test_states(discrete_test):
 24 |     assert discrete_test.states == [1, 2, 3, 4, 5, 6]
 25 | 
 26 | 
 27 | def test_concentrations(discrete_test):
 28 |     assert discrete_test.concentrations == [
 29 |         [1, 5, 2, 5, 3, 4],
 30 |         [2, 0, 1, 3, 0, 4],
 31 |         [10, 10, 10, 10, 10, 10],
 32 |     ]
 33 | 
 34 | 
 35 | def test_probabs_of_being_best(discrete_test):
 36 |     pbbs = discrete_test.probabs_of_being_best(sim_count=20000, seed=52)
 37 |     assert pbbs == {"A": 0.35595, "B": 0.59325, "C": 0.0508}
 38 | 
 39 | 
 40 | def test_expected_loss(discrete_test):
 41 |     loss = discrete_test.expected_loss(sim_count=20000, seed=52)
 42 |     assert loss == {"A": 0.3053921, "B": 0.1560257, "C": 0.5328904}
 43 | 
 44 | 
 45 | def test_credible_intervals_95(discrete_test):
 46 |     ci = discrete_test.credible_intervals(sim_count=20000, seed=52)
 47 |     assert ci == {
 48 |         "A": [3.122705, 4.3265574],
 49 |         "B": [2.9826238, 4.7094185],
 50 |         "C": [3.3681015, 3.6302274],
 51 |     }
 52 | 
 53 | 
 54 | def test_credible_intervals_99(discrete_test):
 55 |     ci = discrete_test.credible_intervals(sim_count=20000, seed=52, interval_alpha=0.99)
 56 |     assert ci == {
 57 |         "A": [2.9260719, 4.5245231],
 58 |         "B": [2.7013326, 4.9277036],
 59 |         "C": [3.3281699, 3.6751105],
 60 |     }
 61 | 
 62 | 
 63 | def test_evaluate(discrete_test):
 64 |     eval_report = discrete_test.evaluate(sim_count=20000, seed=52)
 65 |     assert eval_report == [
 66 |         {
 67 |             "variant": "A",
 68 |             "concentration": {1: 1.0, 2: 5.0, 3: 2.0, 4: 5.0, 5: 3.0, 6: 4.0},
 69 |             "average_value": 3.8,
 70 |             "posterior_mean": 3.73077,
 71 |             "credible_interval": [3.122705, 4.3265574],
 72 |             "prob_being_best": 0.35595,
 73 |             "expected_loss": 0.3053921,
 74 |         },
 75 |         {
 76 |             "variant": "B",
 77 |             "concentration": {1: 2.0, 2: 0.0, 3: 1.0, 4: 3.0, 5: 0.0, 6: 4.0},
 78 |             "average_value": 4.1,
 79 |             "posterior_mean": 3.875,
 80 |             "credible_interval": [2.9826238, 4.7094185],
 81 |             "prob_being_best": 0.59325,
 82 |             "expected_loss": 0.1560257,
 83 |         },
 84 |         {
 85 |             "variant": "C",
 86 |             "concentration": {1: 10, 2: 10, 3: 10, 4: 10, 5: 10, 6: 10},
 87 |             "average_value": 3.5,
 88 |             "posterior_mean": 3.5,
 89 |             "credible_interval": [3.3681015, 3.6302274],
 90 |             "prob_being_best": 0.0508,
 91 |             "expected_loss": 0.5328904,
 92 |         },
 93 |     ]
 94 | 
 95 | 
 96 | def test_non_numerical_states_error():
 97 |     with pytest.raises(ValueError):
 98 |         DiscreteDataTest(states=[1, 2.0, "3"])
 99 | 
100 | 
101 | def test_non_string_variant_error(discrete_test):
102 |     with pytest.raises(ValueError):
103 |         discrete_test.add_variant_data_agg(1, [1, 2, 3, 8, 10, 7])
104 | 
105 | 
106 | def test_length_mismatch_input_error(discrete_test):
107 |     with pytest.raises(ValueError):
108 |         discrete_test.add_variant_data_agg("D", [1, 2, 3, 8, 10])
109 | 
110 | 
111 | def test_empty_data_error(discrete_test):
112 |     with pytest.raises(ValueError):
113 |         discrete_test.add_variant_data("D", [])
114 | 
115 | 
116 | def test_non_existing_state_error(discrete_test):
117 |     with pytest.raises(ValueError):
118 |         discrete_test.add_variant_data("D", [1, 2, 3, 5, 21])
119 | 


--------------------------------------------------------------------------------
/tests/test_evaluation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from bayesian_testing.metrics import (
  5 |     eval_bernoulli_agg,
  6 |     eval_normal_agg,
  7 |     eval_delta_lognormal_agg,
  8 |     eval_delta_normal_agg,
  9 |     eval_numerical_dirichlet_agg,
 10 |     eval_poisson_agg,
 11 |     eval_exponential_agg,
 12 | )
 13 | 
 14 | PBB_BERNOULLI_AGG_INPUTS = [
 15 |     {
 16 |         "input": {
 17 |             "totals": [31500, 32000, 31000],
 18 |             "successes": [1580, 1700, 1550],
 19 |             "sim_count": 20000,
 20 |             "seed": 52,
 21 |             "min_is_best": False,
 22 |             "interval_alpha": 0.95,
 23 |         },
 24 |         "expected_output": (
 25 |             [0.04185, 0.92235, 0.0358],
 26 |             [0.0030138, 6.06e-05, 0.0031649],
 27 |             [[0.0477826, 0.0526302], [0.0506933, 0.0555936], [0.0476604, 0.0524757]],
 28 |         ),
 29 |     },
 30 |     {
 31 |         "input": {
 32 |             "totals": [31500, 32000, 31000],
 33 |             "successes": [1580, 1700, 1550],
 34 |             "sim_count": 20000,
 35 |             "seed": 52,
 36 |             "min_is_best": True,
 37 |             "interval_alpha": 0.99,
 38 |         },
 39 |         "expected_output": (
 40 |             [0.4594, 0.00925, 0.53135],
 41 |             [0.000781, 0.0037342, 0.0006299],
 42 |             [[0.0470873, 0.0534391], [0.0499116, 0.056421], [0.0469394, 0.0532695]],
 43 |         ),
 44 |     },
 45 |     {
 46 |         "input": {
 47 |             "totals": [100, 200],
 48 |             "successes": [80, 160],
 49 |             "sim_count": 10000,
 50 |             "seed": 52,
 51 |             "min_is_best": False,
 52 |             "interval_alpha": 0.5,
 53 |         },
 54 |         "expected_output": (
 55 |             [0.4899, 0.5101],
 56 |             [0.0204051, 0.0182965],
 57 |             [[0.7713375, 0.8248972], [0.7810789, 0.8179153]],
 58 |         ),
 59 |     },
 60 |     {
 61 |         "input": {
 62 |             "totals": [100, 100],
 63 |             "successes": [0, 0],
 64 |             "sim_count": 20000,
 65 |             "seed": 52,
 66 |             "min_is_best": False,
 67 |             "interval_alpha": 0.95,
 68 |         },
 69 |         "expected_output": (
 70 |             [0.5008, 0.4992],
 71 |             [0.0030829, 0.0031614],
 72 |             [[4.8e-06, 0.0252857], [4.8e-06, 0.0243717]],
 73 |         ),
 74 |     },
 75 |     {
 76 |         "input": {
 77 |             "totals": [100],
 78 |             "successes": [77],
 79 |             "sim_count": 20000,
 80 |             "seed": 52,
 81 |             "min_is_best": False,
 82 |             "interval_alpha": 0.95,
 83 |         },
 84 |         "expected_output": ([1], [0], [[0.6810233, 0.8442006]]),
 85 |     },
 86 |     {
 87 |         "input": {
 88 |             "totals": [],
 89 |             "successes": [],
 90 |             "sim_count": 20000,
 91 |             "seed": 52,
 92 |             "min_is_best": False,
 93 |             "interval_alpha": 0.95,
 94 |         },
 95 |         "expected_output": ([], [], []),
 96 |     },
 97 | ]
 98 | 
 99 | PBB_NORMAL_AGG_INPUTS = [
100 |     {
101 |         "input": {
102 |             "totals": [31000, 30000, 32000],
103 |             "sums": [33669.629254438274, 32451.58924937506, 34745.69678322253],
104 |             "sums_2": [659657.6891070933, 95284.82070196551, 260327.13931832163],
105 |             "sim_count": 20000,
106 |             "seed": 52,
107 |             "interval_alpha": 0.95,
108 |         },
109 |         "expected_output": (
110 |             [0.43605, 0.19685, 0.3671],
111 |             [0.0133512, 0.0179947, 0.0137618],
112 |             [[1.0366696, 1.13634], [1.0652914, 1.0977888], [1.0574217, 1.1141581]],
113 |         ),
114 |     },
115 |     {
116 |         "input": {
117 |             "totals": [10000, 10000],
118 |             "sums": [11446.345516947431, 10708.892428298526],
119 |             "sums_2": [214614.35949718487, 31368.55305547222],
120 |             "sim_count": 20000,
121 |             "seed": 52,
122 |             "interval_alpha": 0.99,
123 |         },
124 |         "expected_output": (
125 |             [0.94445, 0.05555],
126 |             [0.0011338, 0.0753121],
127 |             [[1.0278553, 1.2601174], [1.0337017, 1.1071861]],
128 |         ),
129 |     },
130 |     {
131 |         "input": {
132 |             "totals": [10, 20, 30, 40],
133 |             "sums": [0, 0, 0, 0],
134 |             "sums_2": [0, 0, 0, 0],
135 |             "sim_count": 20000,
136 |             "seed": 52,
137 |             "interval_alpha": 0.95,
138 |         },
139 |         "expected_output": (
140 |             [0.40785, 0.25105, 0.1928, 0.1483],
141 |             [0.0058965, 0.0065083, 0.0066249, 0.0067183],
142 |             [
143 |                 [-0.021071, 0.0232855],
144 |                 [-0.0101753, 0.0108701],
145 |                 [-0.0064358, 0.0070877],
146 |                 [-0.004795, 0.0052896],
147 |             ],
148 |         ),
149 |     },
150 |     {
151 |         "input": {
152 |             "totals": [100],
153 |             "sums": [0],
154 |             "sums_2": [0],
155 |             "sim_count": 10000,
156 |             "seed": 52,
157 |             "interval_alpha": 0.95,
158 |         },
159 |         "expected_output": ([1], [0], [[-0.0019355, 0.0020896]]),
160 |     },
161 |     {
162 |         "input": {
163 |             "totals": [10000, 10000],
164 |             "sums": [11446.35, 11446.35],
165 |             "sums_2": [214614.36, 214614.36],
166 |             "sim_count": 20000,
167 |             "seed": 52,
168 |             "interval_alpha": 0.95,
169 |         },
170 |         "expected_output": (
171 |             [0.5024, 0.4976],
172 |             [0.0250157, 0.0256253],
173 |             [[1.0577297, 1.2331092], [1.0545188, 1.2327107]],
174 |         ),
175 |     },
176 |     {
177 |         "input": {
178 |             "totals": [],
179 |             "sums": [],
180 |             "sums_2": [],
181 |             "sim_count": 10000,
182 |             "seed": 52,
183 |             "interval_alpha": 0.95,
184 |         },
185 |         "expected_output": ([], [], []),
186 |     },
187 | ]
188 | 
189 | PBB_DELTA_LOGNORMAL_AGG_INPUTS = [
190 |     {
191 |         "input": {
192 |             "totals": [31500, 32000, 31000],
193 |             "successes": [1580, 1700, 1550],
194 |             "sum_logs": [3831.806394737816, 4211.72986767986, 4055.965234848171],
195 |             "sum_logs_2": [11029.923165846496, 12259.51868396913, 12357.911862914],
196 |             "sim_count": 20000,
197 |             "seed": 52,
198 |             "interval_alpha": 0.95,
199 |         },
200 |         "expected_output": (
201 |             [0.00015, 0.03345, 0.9664],
202 |             [0.2209593, 0.1205541, 0.0008458],
203 |             [[0.9065769, 1.0655643], [1.0046391, 1.1707248], [1.1085257, 1.3061752]],
204 |         ),
205 |     },
206 |     {
207 |         "input": {
208 |             "totals": [31000, 31000],
209 |             "successes": [1550, 1550],
210 |             "sum_logs": [4055.965234848171, 4055.965234848171],
211 |             "sum_logs_2": [12357.911862914, 12357.911862914],
212 |             "sim_count": 10000,
213 |             "seed": 52,
214 |             "interval_alpha": 0.9,
215 |         },
216 |         "expected_output": (
217 |             [0.5013, 0.4987],
218 |             [0.028189, 0.0287233],
219 |             [[1.1227657, 1.2882371], [1.1210866, 1.2895949]],
220 |         ),
221 |     },
222 |     {
223 |         "input": {
224 |             "totals": [10, 20, 30, 40],
225 |             "successes": [0, 0, 0, 0],
226 |             "sum_logs": [0, 0, 0, 0],
227 |             "sum_logs_2": [0, 0, 0, 0],
228 |             "sim_count": 10000,
229 |             "seed": 52,
230 |             "interval_alpha": 0.5,
231 |         },
232 |         "expected_output": (
233 |             [0.25, 0.25, 0.25, 0.25],
234 |             [np.nan, np.nan, np.nan, np.nan],
235 |             [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]],
236 |         ),
237 |     },
238 |     {
239 |         "input": {
240 |             "totals": [100],
241 |             "successes": [10],
242 |             "sum_logs": [0],
243 |             "sum_logs_2": [0],
244 |             "sim_count": 10000,
245 |             "seed": 52,
246 |             "interval_alpha": 0.95,
247 |         },
248 |         "expected_output": ([1], [0], [[0.051825, 0.1697968]]),
249 |     },
250 |     {
251 |         "input": {
252 |             "totals": [],
253 |             "successes": [],
254 |             "sum_logs": [],
255 |             "sum_logs_2": [],
256 |             "sim_count": 10000,
257 |             "seed": 52,
258 |             "interval_alpha": 0.95,
259 |         },
260 |         "expected_output": ([], [], []),
261 |     },
262 | ]
263 | 
264 | PBB_NUMERICAL_DIRICHLET_AGG_INPUTS = [
265 |     {
266 |         "input": {
267 |             "states": [1, 2, 3, 4, 5, 6],
268 |             "concentrations": [
269 |                 [10, 10, 10, 10, 20, 10],
270 |                 [10, 10, 10, 10, 10, 20],
271 |                 [10, 10, 10, 20, 10, 10],
272 |             ],
273 |             "sim_count": 20000,
274 |             "seed": 52,
275 |             "interval_alpha": 0.9,
276 |         },
277 |         "expected_output": (
278 |             [0.28205, 0.62335, 0.0946],
279 |             [0.1999528, 0.0698306, 0.334045],
280 |             [[3.3214796, 4.0718396], [3.4218451, 4.2243033], [3.1984494, 3.9184425]],
281 |         ),
282 |     },
283 |     {
284 |         "input": {
285 |             "states": [1, 2, 3],
286 |             "concentrations": [[100, 100, 100]],
287 |             "sim_count": 20000,
288 |             "seed": 52,
289 |             "interval_alpha": 0.9,
290 |         },
291 |         "expected_output": ([1], [0], [[1.9077157, 2.0908699]]),
292 |     },
293 |     {
294 |         "input": {
295 |             "states": [],
296 |             "concentrations": [],
297 |             "sim_count": 20000,
298 |             "seed": 52,
299 |             "interval_alpha": 0.9,
300 |         },
301 |         "expected_output": ([], [], []),
302 |     },
303 | ]
304 | 
305 | PBB_POISSON_AGG_INPUTS = [
306 |     {
307 |         "input": {
308 |             "totals": [3150, 3200, 3100],
309 |             "sums": [10000, 10000, 10000],
310 |             "sim_count": 20000,
311 |             "seed": 52,
312 |             "min_is_best": False,
313 |             "interval_alpha": 0.95,
314 |         },
315 |         "expected_output": (
316 |             [0.127, 0.00695, 0.86605],
317 |             [0.0539495, 0.1042691, 0.0030418],
318 |             [[3.1132541, 3.2375641], [3.0635577, 3.1863114], [3.1634511, 3.2890376]],
319 |         ),
320 |     },
321 |     {
322 |         "input": {
323 |             "totals": [3150, 3200, 3100],
324 |             "sums": [10000, 10000, 10000],
325 |             "sim_count": 20000,
326 |             "seed": 52,
327 |             "min_is_best": True,
328 |             "interval_alpha": 0.9,
329 |         },
330 |         "expected_output": (
331 |             [0.12775, 0.8656, 0.00665],
332 |             [0.0532581, 0.0029385, 0.1041658],
333 |             [[3.123135, 3.2276693], [3.0732817, 3.1764313], [3.1729959, 3.2788603]],
334 |         ),
335 |     },
336 |     {
337 |         "input": {
338 |             "totals": [100],
339 |             "sums": [77],
340 |             "sim_count": 20000,
341 |             "seed": 52,
342 |             "min_is_best": False,
343 |             "interval_alpha": 0.75,
344 |         },
345 |         "expected_output": ([1], [0], [[0.6723231, 0.8727923]]),
346 |     },
347 |     {
348 |         "input": {
349 |             "totals": [],
350 |             "sums": [],
351 |             "sim_count": 20000,
352 |             "seed": 52,
353 |             "min_is_best": False,
354 |             "interval_alpha": 0.9,
355 |         },
356 |         "expected_output": ([], [], []),
357 |     },
358 | ]
359 | 
360 | PBB_EXPONENTIAL_AGG_INPUTS = [
361 |     {
362 |         "input": {
363 |             "totals": [100, 90, 80],
364 |             "sums": [1040.29884, 993.66883, 883.05801],
365 |             "sim_count": 20000,
366 |             "seed": 52,
367 |             "min_is_best": False,
368 |             "interval_alpha": 0.9,
369 |         },
370 |         "expected_output": (
371 |             [0.1826, 0.4065, 0.4109],
372 |             [1.5195025, 0.8380173, 0.8431285],
373 |             [[8.8658129, 12.3263561], [9.3561749, 13.2588682], [9.2650625, 13.3809534]],
374 |         ),
375 |     },
376 |     {
377 |         "input": {
378 |             "totals": [1000, 1000, 1000],
379 |             "sums": [2288.69431, 2471.61961, 2745.7794],
380 |             "sim_count": 20000,
381 |             "seed": 52,
382 |             "min_is_best": True,
383 |             "interval_alpha": 0.9,
384 |         },
385 |         "expected_output": (
386 |             [0.9594, 0.0406, 0.0],
387 |             [0.0017238, 0.1865276, 0.4598496],
388 |             [[2.1727503, 2.4111014], [2.3482046, 2.6066663], [2.6087576, 2.8941021]],
389 |         ),
390 |     },
391 |     {
392 |         "input": {
393 |             "totals": [100],
394 |             "sums": [1007.25317],
395 |             "sim_count": 20000,
396 |             "seed": 52,
397 |             "min_is_best": True,
398 |             "interval_alpha": 0.912,
399 |         },
400 |         "expected_output": ([1], [0], [[8.5325723, 11.9986705]]),
401 |     },
402 |     {
403 |         "input": {
404 |             "totals": [],
405 |             "sums": [],
406 |             "sim_count": 20000,
407 |             "seed": 52,
408 |             "min_is_best": False,
409 |             "interval_alpha": 0.9,
410 |         },
411 |         "expected_output": ([], [], []),
412 |     },
413 | ]
414 | 
415 | PBB_DELTA_NORMAL_AGG_INPUTS = [
416 |     {
417 |         "input": {
418 |             "totals": [10000, 1000],
419 |             "non_zeros": [1009, 111],
420 |             "sums": [7026.30599, 801.53947],
421 |             "sums_2": [49993.4988, 5891.6073],
422 |             "sim_count": 20000,
423 |             "seed": 52,
424 |             "min_is_best": False,
425 |             "interval_alpha": 0.9,
426 |         },
427 |         "expected_output": (
428 |             [0.08285, 0.91715],
429 |             [0.1045921, 0.0026141],
430 |             [[0.6683901, 0.7384471], [0.6897179, 0.9275315]],
431 |         ),
432 |     },
433 |     {
434 |         "input": {
435 |             "totals": [10, 20, 30, 40],
436 |             "non_zeros": [0, 0, 0, 0],
437 |             "sums": [0, 0, 0, 0],
438 |             "sums_2": [0, 0, 0, 0],
439 |             "sim_count": 10000,
440 |             "seed": 52,
441 |             "min_is_best": False,
442 |             "interval_alpha": 0.9,
443 |         },
444 |         "expected_output": (
445 |             [0.25, 0.25, 0.25, 0.25],
446 |             [np.nan, np.nan, np.nan, np.nan],
447 |             [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]],
448 |         ),
449 |     },
450 |     {
451 |         "input": {
452 |             "totals": [100],
453 |             "non_zeros": [10],
454 |             "sums": [0],
455 |             "sums_2": [0],
456 |             "sim_count": 10000,
457 |             "seed": 52,
458 |             "min_is_best": False,
459 |             "interval_alpha": 0.9,
460 |         },
461 |         "expected_output": ([1], [0], [[-0.0017847, 0.0020072]]),
462 |     },
463 |     {
464 |         "input": {
465 |             "totals": [],
466 |             "non_zeros": [],
467 |             "sums": [],
468 |             "sums_2": [],
469 |             "sim_count": 10000,
470 |             "seed": 52,
471 |             "min_is_best": False,
472 |             "interval_alpha": 0.9,
473 |         },
474 |         "expected_output": ([], [], []),
475 |     },
476 | ]
477 | 
478 | 
479 | @pytest.mark.parametrize("inp", PBB_BERNOULLI_AGG_INPUTS)
480 | def test_eval_bernoulli_agg(inp):
481 |     i = inp["input"]
482 |     res = eval_bernoulli_agg(
483 |         i["totals"],
484 |         i["successes"],
485 |         sim_count=i["sim_count"],
486 |         seed=i["seed"],
487 |         min_is_best=i["min_is_best"],
488 |         interval_alpha=i["interval_alpha"],
489 |     )
490 |     assert res == inp["expected_output"]
491 | 
492 | 
493 | @pytest.mark.parametrize("inp", PBB_NORMAL_AGG_INPUTS)
494 | def test_eval_normal_agg(inp):
495 |     i = inp["input"]
496 |     res = eval_normal_agg(
497 |         i["totals"],
498 |         i["sums"],
499 |         i["sums_2"],
500 |         sim_count=i["sim_count"],
501 |         seed=i["seed"],
502 |         interval_alpha=i["interval_alpha"],
503 |     )
504 |     assert res == inp["expected_output"]
505 | 
506 | 
507 | def test_eval_normal_agg_different_runs():
508 |     # two different runs of same input without seed should be different
509 |     run1 = eval_normal_agg([100, 100], [10, 10], [20, 20])
510 |     run2 = eval_normal_agg([100, 100], [10, 10], [20, 20])
511 |     assert run1 != run2
512 | 
513 | 
514 | @pytest.mark.parametrize("inp", PBB_DELTA_LOGNORMAL_AGG_INPUTS)
515 | def test_eval_delta_lognormal_agg(inp):
516 |     i = inp["input"]
517 |     res = eval_delta_lognormal_agg(
518 |         i["totals"],
519 |         i["successes"],
520 |         i["sum_logs"],
521 |         i["sum_logs_2"],
522 |         sim_count=i["sim_count"],
523 |         seed=i["seed"],
524 |         interval_alpha=i["interval_alpha"],
525 |     )
526 |     assert res == inp["expected_output"]
527 | 
528 | 
529 | def test_eval_delta_lognormal_agg_different_runs():
530 |     # two different runs of same input without seed should be different
531 |     run1 = eval_delta_lognormal_agg([1000, 1000], [100, 100], [10, 10], [20, 20], sim_count=100000)
532 |     run2 = eval_delta_lognormal_agg([1000, 1000], [100, 100], [10, 10], [20, 20], sim_count=100000)
533 |     assert run1 != run2
534 | 
535 | 
536 | @pytest.mark.parametrize("inp", PBB_NUMERICAL_DIRICHLET_AGG_INPUTS)
537 | def test_eval_numerical_dirichlet_agg(inp):
538 |     i = inp["input"]
539 |     res = eval_numerical_dirichlet_agg(
540 |         i["states"], i["concentrations"], sim_count=i["sim_count"], seed=i["seed"]
541 |     )
542 |     assert res == inp["expected_output"]
543 | 
544 | 
545 | def test_eval_numerical_dirichlet_agg_different_runs():
546 |     # two different runs of same input without seed should be different
547 |     run1 = eval_numerical_dirichlet_agg([1, 20], [[10, 10], [20, 20]])
548 |     run2 = eval_numerical_dirichlet_agg([1, 20], [[10, 10], [20, 20]])
549 |     assert run1 != run2
550 | 
551 | 
552 | @pytest.mark.parametrize("inp", PBB_POISSON_AGG_INPUTS)
553 | def test_eval_poisson_agg(inp):
554 |     i = inp["input"]
555 |     res = eval_poisson_agg(
556 |         i["totals"],
557 |         i["sums"],
558 |         sim_count=i["sim_count"],
559 |         seed=i["seed"],
560 |         min_is_best=i["min_is_best"],
561 |         interval_alpha=i["interval_alpha"],
562 |     )
563 |     assert res == inp["expected_output"]
564 | 
565 | 
566 | @pytest.mark.parametrize("inp", PBB_EXPONENTIAL_AGG_INPUTS)
567 | def test_eval_exponential_agg(inp):
568 |     i = inp["input"]
569 |     res = eval_exponential_agg(
570 |         i["totals"],
571 |         i["sums"],
572 |         sim_count=i["sim_count"],
573 |         seed=i["seed"],
574 |         min_is_best=i["min_is_best"],
575 |         interval_alpha=i["interval_alpha"],
576 |     )
577 |     assert res == inp["expected_output"]
578 | 
579 | 
580 | @pytest.mark.parametrize("inp", PBB_DELTA_NORMAL_AGG_INPUTS)
581 | def test_eval_delta_normal_agg(inp):
582 |     i = inp["input"]
583 |     res = eval_delta_normal_agg(
584 |         i["totals"],
585 |         i["non_zeros"],
586 |         i["sums"],
587 |         i["sums_2"],
588 |         sim_count=i["sim_count"],
589 |         seed=i["seed"],
590 |         min_is_best=i["min_is_best"],
591 |         interval_alpha=i["interval_alpha"],
592 |     )
593 |     assert res == inp["expected_output"]
594 | 


--------------------------------------------------------------------------------
/tests/test_exponential.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from bayesian_testing.experiments import ExponentialDataTest
  4 | 
  5 | 
  6 | @pytest.fixture
  7 | def exponential_test():
  8 |     expo = ExponentialDataTest()
  9 |     expo.add_variant_data(
 10 |         "A",
 11 |         [
 12 |             3.27,
 13 |             5.62,
 14 |             0.31,
 15 |             3.9,
 16 |             2.4,
 17 |             10.49,
 18 |             0.63,
 19 |             2.71,
 20 |             1.64,
 21 |             0.43,
 22 |             0.22,
 23 |             0.3,
 24 |             1.99,
 25 |             0.69,
 26 |             5.15,
 27 |             1.31,
 28 |             1.01,
 29 |             1.26,
 30 |             0.2,
 31 |             1.6,
 32 |         ],
 33 |     )
 34 |     expo.add_variant_data(
 35 |         "B",
 36 |         [
 37 |             0.28,
 38 |             0.18,
 39 |             0.13,
 40 |             4.79,
 41 |             1.07,
 42 |             0.69,
 43 |             5.75,
 44 |             2.07,
 45 |             9.67,
 46 |             2.79,
 47 |             0.18,
 48 |             5.8,
 49 |             12.81,
 50 |             2.33,
 51 |             2.28,
 52 |             1.56,
 53 |             4.18,
 54 |             1.47,
 55 |             1.67,
 56 |             0.98,
 57 |         ],
 58 |     )
 59 |     expo.add_variant_data_agg("C", 20, 72.27, a_prior=1, b_prior=2)
 60 |     expo.add_variant_data_agg("D", 100, 200)
 61 |     expo.add_variant_data_agg("D", 100, 220, replace=False)
 62 |     expo.add_variant_data_agg("D", 10, 20, replace=True)
 63 |     expo.delete_variant("D")
 64 |     return expo
 65 | 
 66 | 
 67 | def test_variants(exponential_test):
 68 |     assert exponential_test.variant_names == ["A", "B", "C"]
 69 | 
 70 | 
 71 | def test_totals(exponential_test):
 72 |     assert exponential_test.totals == [20, 20, 20]
 73 | 
 74 | 
 75 | def test_positives(exponential_test):
 76 |     assert exponential_test.sum_values == [45.13, 60.68, 72.27]
 77 | 
 78 | 
 79 | def test_a_priors(exponential_test):
 80 |     assert exponential_test.a_priors == [0.1, 0.1, 1]
 81 | 
 82 | 
 83 | def test_b_priors(exponential_test):
 84 |     assert exponential_test.b_priors == [0.1, 0.1, 2]
 85 | 
 86 | 
 87 | def test_probabs_of_being_best(exponential_test):
 88 |     pbbs = exponential_test.probabs_of_being_best(sim_count=20000, seed=52)
 89 |     assert pbbs == {"A": 0.0414, "B": 0.29885, "C": 0.65975}
 90 | 
 91 | 
 92 | def test_expected_loss(exponential_test):
 93 |     loss = exponential_test.expected_loss(sim_count=20000, seed=52)
 94 |     assert loss == {"A": 1.5907038, "B": 0.7596064, "C": 0.2414208}
 95 | 
 96 | 
 97 | def test_credible_intervals_95(exponential_test):
 98 |     ci = exponential_test.credible_intervals(sim_count=20000, seed=52)
 99 |     assert ci == {
100 |         "A": [1.5151401, 3.6571069],
101 |         "B": [2.0455239, 4.9692854],
102 |         "C": [2.4059958, 5.6846722],
103 |     }
104 | 
105 | 
106 | def test_credible_intervals_99(exponential_test):
107 |     ci = exponential_test.credible_intervals(sim_count=20000, seed=52, interval_alpha=0.99)
108 |     assert ci == {
109 |         "A": [1.3525642, 4.3405547],
110 |         "B": [1.8175504, 5.9001709],
111 |         "C": [2.124576, 6.7291228],
112 |     }
113 | 
114 | 
115 | def test_evaluate(exponential_test):
116 |     eval_report = exponential_test.evaluate(sim_count=20000, seed=52)
117 |     assert eval_report == [
118 |         {
119 |             "variant": "A",
120 |             "totals": 20,
121 |             "sum_values": 45.13,
122 |             "observed_average": 2.2565,
123 |             "posterior_mean": 2.25025,
124 |             "credible_interval": [1.5151401, 3.6571069],
125 |             "prob_being_best": 0.0414,
126 |             "expected_loss": 1.5907038,
127 |         },
128 |         {
129 |             "variant": "B",
130 |             "totals": 20,
131 |             "sum_values": 60.68,
132 |             "observed_average": 3.034,
133 |             "posterior_mean": 3.02388,
134 |             "credible_interval": [2.0455239, 4.9692854],
135 |             "prob_being_best": 0.29885,
136 |             "expected_loss": 0.7596064,
137 |         },
138 |         {
139 |             "variant": "C",
140 |             "totals": 20,
141 |             "sum_values": 72.27,
142 |             "observed_average": 3.6135,
143 |             "posterior_mean": 3.53667,
144 |             "credible_interval": [2.4059958, 5.6846722],
145 |             "prob_being_best": 0.65975,
146 |             "expected_loss": 0.2414208,
147 |         },
148 |     ]
149 | 
150 | 
151 | def test_wrong_inputs():
152 |     exp_test = ExponentialDataTest()
153 |     with pytest.raises(ValueError):
154 |         exp_test.add_variant_data(10, [1, 2, 3])
155 |     with pytest.raises(ValueError):
156 |         exp_test.add_variant_data("A", [1, 2, 3], a_prior=-1)
157 |     with pytest.raises(ValueError):
158 |         exp_test.add_variant_data_agg("A", -1, 7)
159 |     with pytest.raises(ValueError):
160 |         exp_test.add_variant_data_agg("A", 1, -7)
161 |     with pytest.raises(ValueError):
162 |         exp_test.add_variant_data("A", [])
163 |     with pytest.raises(ValueError):
164 |         exp_test.add_variant_data("A", [1, 2, -3])
165 | 


--------------------------------------------------------------------------------
/tests/test_normal.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from bayesian_testing.experiments import NormalDataTest
  4 | 
  5 | 
  6 | @pytest.fixture
  7 | def norm_test():
  8 |     norm = NormalDataTest()
  9 |     norm.add_variant_data(
 10 |         "A",
 11 |         [
 12 |             11.8,
 13 |             12.2,
 14 |             12.4,
 15 |             9.5,
 16 |             2.2,
 17 |             3.3,
 18 |             16.2,
 19 |             4.9,
 20 |             12.4,
 21 |             6.8,
 22 |             8.7,
 23 |             9.8,
 24 |             5.4,
 25 |             9.0,
 26 |             15.0,
 27 |             12.3,
 28 |             9.6,
 29 |             12.5,
 30 |             9.1,
 31 |             10.2,
 32 |         ],
 33 |         m_prior=9,
 34 |     )
 35 |     norm.add_variant_data(
 36 |         "B",
 37 |         [
 38 |             10.6,
 39 |             5.1,
 40 |             9.4,
 41 |             11.2,
 42 |             2.0,
 43 |             13.4,
 44 |             14.1,
 45 |             15.4,
 46 |             16.3,
 47 |             11.7,
 48 |             7.3,
 49 |             6.8,
 50 |             8.2,
 51 |             16.2,
 52 |             10.8,
 53 |             7.1,
 54 |             12.2,
 55 |             11.2,
 56 |         ],
 57 |         w_prior=0.03,
 58 |     )
 59 |     norm.add_variant_data(
 60 |         "C",
 61 |         [
 62 |             25.3,
 63 |             10.3,
 64 |             24.7,
 65 |             -8.1,
 66 |             8.4,
 67 |             10.3,
 68 |             14.8,
 69 |             13.4,
 70 |             11.5,
 71 |             -4.7,
 72 |             5.3,
 73 |             7.4,
 74 |             17.2,
 75 |             15.4,
 76 |             13.0,
 77 |             12.9,
 78 |             19.2,
 79 |             11.6,
 80 |             0.4,
 81 |             5.7,
 82 |             23.5,
 83 |             15.2,
 84 |         ],
 85 |         b_prior_ig=2,
 86 |     )
 87 |     norm.add_variant_data_agg("A", 20, 193.3, 2127.71, replace=False)
 88 |     norm.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22])
 89 |     norm.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22], replace=False)
 90 |     norm.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22], replace=True)
 91 |     norm.delete_variant("D")
 92 |     return norm
 93 | 
 94 | 
 95 | def test_variants(norm_test):
 96 |     assert norm_test.variant_names == ["A", "B", "C"]
 97 | 
 98 | 
 99 | def test_totals(norm_test):
100 |     assert norm_test.totals == [40, 18, 22]
101 | 
102 | 
103 | def test_sum_values(norm_test):
104 |     assert norm_test.sum_values == [386.6, 188.99999999999997, 252.69999999999996]
105 | 
106 | 
107 | def test_sum_values_2(norm_test):
108 |     assert norm_test.sum_values_2 == [4255.42, 2244.8200000000006, 4421.87]
109 | 
110 | 
111 | def test_m_priors(norm_test):
112 |     assert norm_test.m_priors == [9, 1, 1]
113 | 
114 | 
115 | def test_a_priors_ig(norm_test):
116 |     assert norm_test.a_priors_ig == [0, 0, 0]
117 | 
118 | 
119 | def test_b_priors_ig(norm_test):
120 |     assert norm_test.b_priors_ig == [0, 0, 2]
121 | 
122 | 
123 | def test_w_priors(norm_test):
124 |     assert norm_test.w_priors == [0.01, 0.03, 0.01]
125 | 
126 | 
127 | def test_probabs_of_being_best(norm_test):
128 |     pbbs = norm_test.probabs_of_being_best(sim_count=20000, seed=52)
129 |     assert pbbs == {"A": 0.05105, "B": 0.27935, "C": 0.6696}
130 | 
131 | 
132 | def test_expected_loss(norm_test):
133 |     loss = norm_test.expected_loss(sim_count=20000, seed=52)
134 |     assert loss == {"A": 2.2696341, "B": 1.4580033, "C": 0.4464154}
135 | 
136 | 
137 | def test_credible_intervals_95(norm_test):
138 |     ci = norm_test.credible_intervals(sim_count=20000, seed=52)
139 |     assert ci == {
140 |         "A": [8.5300072, 10.8231841],
141 |         "B": [8.5577171, 12.3448628],
142 |         "C": [7.8915125, 15.1179586],
143 |     }
144 | 
145 | 
146 | def test_credible_intervals_99(norm_test):
147 |     ci = norm_test.credible_intervals(sim_count=20000, seed=52, interval_alpha=0.99)
148 |     assert ci == {
149 |         "A": [8.1196181, 11.2023581],
150 |         "B": [7.8792145, 13.0964176],
151 |         "C": [6.5669908, 16.5226358],
152 |     }
153 | 
154 | 
155 | def test_evaluate(norm_test):
156 |     eval_report = norm_test.evaluate(sim_count=20000, seed=52)
157 |     assert eval_report == [
158 |         {
159 |             "variant": "A",
160 |             "totals": 40,
161 |             "sum_values": 386.6,
162 |             "avg_values": 9.665,
163 |             "posterior_mean": 9.66483,
164 |             "credible_interval": [8.5300072, 10.8231841],
165 |             "prob_being_best": 0.05105,
166 |             "expected_loss": 2.2696341,
167 |         },
168 |         {
169 |             "variant": "B",
170 |             "totals": 18,
171 |             "sum_values": 189.0,
172 |             "avg_values": 10.5,
173 |             "posterior_mean": 10.48419,
174 |             "credible_interval": [8.5577171, 12.3448628],
175 |             "prob_being_best": 0.27935,
176 |             "expected_loss": 1.4580033,
177 |         },
178 |         {
179 |             "variant": "C",
180 |             "totals": 22,
181 |             "sum_values": 252.7,
182 |             "avg_values": 11.48636,
183 |             "posterior_mean": 11.4816,
184 |             "credible_interval": [7.8915125, 15.1179586],
185 |             "prob_being_best": 0.6696,
186 |             "expected_loss": 0.4464154,
187 |         },
188 |     ]
189 | 


--------------------------------------------------------------------------------
/tests/test_poisson.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from bayesian_testing.experiments import PoissonDataTest
  4 | 
  5 | 
  6 | @pytest.fixture
  7 | def poisson_test():
  8 |     pois = PoissonDataTest()
  9 |     pois.add_variant_data("A", [5, 5, 7, 1, 3, 3, 1, 1, 2, 0, 1, 3, 4, 2, 5])
 10 |     pois.add_variant_data("B", [2, 4, 3, 4, 6, 1, 3, 6, 4, 0, 3, 1, 2, 1])
 11 |     pois.add_variant_data_agg("C", 15, 49, a_prior=1, b_prior=2)
 12 |     pois.add_variant_data_agg("D", 10, 10)
 13 |     pois.add_variant_data_agg("D", 20, 20, replace=False)
 14 |     pois.add_variant_data_agg("D", 20, 20, replace=True)
 15 |     pois.delete_variant("D")
 16 |     return pois
 17 | 
 18 | 
 19 | def test_variants(poisson_test):
 20 |     assert poisson_test.variant_names == ["A", "B", "C"]
 21 | 
 22 | 
 23 | def test_totals(poisson_test):
 24 |     assert poisson_test.totals == [15, 14, 15]
 25 | 
 26 | 
 27 | def test_positives(poisson_test):
 28 |     assert poisson_test.sum_values == [43, 40, 49]
 29 | 
 30 | 
 31 | def test_a_priors(poisson_test):
 32 |     assert poisson_test.a_priors == [0.1, 0.1, 1]
 33 | 
 34 | 
 35 | def test_b_priors(poisson_test):
 36 |     assert poisson_test.b_priors == [0.1, 0.1, 2]
 37 | 
 38 | 
 39 | def test_probabs_of_being_best(poisson_test):
 40 |     pbbs = poisson_test.probabs_of_being_best(sim_count=20000, seed=52)
 41 |     assert pbbs == {"A": 0.30945, "B": 0.29665, "C": 0.3939}
 42 | 
 43 | 
 44 | def test_expected_loss(poisson_test):
 45 |     loss = poisson_test.expected_loss(sim_count=20000, seed=52)
 46 |     assert loss == {"A": 0.3936672, "B": 0.4144949, "C": 0.3109256}
 47 | 
 48 | 
 49 | def test_credible_intervals_95(poisson_test):
 50 |     ci = poisson_test.credible_intervals(sim_count=20000, seed=52)
 51 |     assert ci == {
 52 |         "A": [2.0742056, 3.7731115],
 53 |         "B": [2.0264899, 3.7822918],
 54 |         "C": [2.1895805, 3.8084984],
 55 |     }
 56 | 
 57 | 
 58 | def test_credible_intervals_99(poisson_test):
 59 |     ci = poisson_test.credible_intervals(sim_count=20000, seed=52, interval_alpha=0.99)
 60 |     assert ci == {
 61 |         "A": [1.8569798, 4.0897961],
 62 |         "B": [1.8082962, 4.1242607],
 63 |         "C": [1.9771075, 4.1434489],
 64 |     }
 65 | 
 66 | 
 67 | def test_evaluate(poisson_test):
 68 |     eval_report = poisson_test.evaluate(sim_count=20000, seed=52)
 69 |     assert eval_report == [
 70 |         {
 71 |             "variant": "A",
 72 |             "totals": 15,
 73 |             "sum_values": 43,
 74 |             "observed_average": 2.86667,
 75 |             "posterior_mean": 2.8543,
 76 |             "credible_interval": [2.0742056, 3.7731115],
 77 |             "prob_being_best": 0.30945,
 78 |             "expected_loss": 0.3936672,
 79 |         },
 80 |         {
 81 |             "variant": "B",
 82 |             "totals": 14,
 83 |             "sum_values": 40,
 84 |             "observed_average": 2.85714,
 85 |             "posterior_mean": 2.84397,
 86 |             "credible_interval": [2.0264899, 3.7822918],
 87 |             "prob_being_best": 0.29665,
 88 |             "expected_loss": 0.4144949,
 89 |         },
 90 |         {
 91 |             "variant": "C",
 92 |             "totals": 15,
 93 |             "sum_values": 49,
 94 |             "observed_average": 3.26667,
 95 |             "posterior_mean": 2.94118,
 96 |             "credible_interval": [2.1895805, 3.8084984],
 97 |             "prob_being_best": 0.3939,
 98 |             "expected_loss": 0.3109256,
 99 |         },
100 |     ]
101 | 
102 | 
103 | def test_wrong_inputs():
104 |     pois_test = PoissonDataTest()
105 |     with pytest.raises(ValueError):
106 |         pois_test.add_variant_data(10, [1, 2, 3])
107 |     with pytest.raises(ValueError):
108 |         pois_test.add_variant_data("A", [1, 2, 3], a_prior=-1)
109 |     with pytest.raises(ValueError):
110 |         pois_test.add_variant_data_agg("A", -1, 7)
111 |     with pytest.raises(ValueError):
112 |         pois_test.add_variant_data_agg("A", 1, -7)
113 |     with pytest.raises(ValueError):
114 |         pois_test.add_variant_data("A", [])
115 |     with pytest.raises(ValueError):
116 |         pois_test.add_variant_data("A", [1, 2, -3])
117 | 


--------------------------------------------------------------------------------
/tests/test_posteriors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from bayesian_testing.metrics.posteriors import (
  5 |     beta_posteriors_all,
  6 |     lognormal_posteriors,
  7 |     dirichlet_posteriors,
  8 |     pois_gamma_posteriors_all,
  9 |     exp_gamma_posteriors_all,
 10 | )
 11 | 
 12 | BETA_POSTERIORS_ALL_INPUTS = [
 13 |     {
 14 |         "totals": [10, 20, 30],
 15 |         "successes": [8, 16, 24],
 16 |         "sim_count": 10,
 17 |         "a_priors_beta": [0.5, 0.5, 0.5],
 18 |         "b_priors_beta": [0.5, 0.5, 0.5],
 19 |     },
 20 |     {
 21 |         "totals": [20, 30],
 22 |         "successes": [16, 24],
 23 |         "sim_count": 20,
 24 |         "a_priors_beta": [0.5, 0.5],
 25 |         "b_priors_beta": [0.5, 0.5],
 26 |     },
 27 | ]
 28 | 
 29 | LOGNORMAL_POSTERIORS_INPUTS = [
 30 |     {
 31 |         "totals": 1580,
 32 |         "sum_logs": 3831.806394737816,
 33 |         "sum_logs_2": 11029.923165846496,
 34 |         "sim_count": 10000,
 35 |     },
 36 |     {
 37 |         "totals": 1580,
 38 |         "sum_logs": 4055.965234848171,
 39 |         "sum_logs_2": 12357.911862914,
 40 |         "sim_count": 100,
 41 |     },
 42 |     {
 43 |         "totals": 0,
 44 |         "sum_logs": 0,
 45 |         "sum_logs_2": 0,
 46 |         "sim_count": 100,
 47 |     },
 48 | ]
 49 | 
 50 | DIRICHLET_POSTERIORS_INPUTS = [
 51 |     {
 52 |         "concentration": [1, 2, 3],
 53 |         "prior": [1, 1, 1],
 54 |         "sim_count": 10000,
 55 |     },
 56 |     {
 57 |         "concentration": [100, 200],
 58 |         "prior": [1 / 2, 1 / 2],
 59 |         "sim_count": 100,
 60 |     },
 61 | ]
 62 | 
 63 | GAMMA_POSTERIORS_ALL_INPUTS = [
 64 |     {
 65 |         "totals": [10, 20, 30],
 66 |         "sums": [80, 161, 260],
 67 |         "sim_count": 10,
 68 |         "a_priors_gamma": [0.5, 0.5, 0.5],
 69 |         "b_priors_gamma": [0.5, 0.5, 0.5],
 70 |     },
 71 |     {
 72 |         "totals": [20, 30],
 73 |         "sums": [160, 240],
 74 |         "sim_count": 20,
 75 |         "a_priors_gamma": [0.5, 0.5],
 76 |         "b_priors_gamma": [0.5, 0.5],
 77 |     },
 78 | ]
 79 | 
 80 | 
 81 | @pytest.mark.parametrize("inp", BETA_POSTERIORS_ALL_INPUTS)
 82 | def test_beta_posteriors_all(inp):
 83 |     all_pos = beta_posteriors_all(
 84 |         inp["totals"],
 85 |         inp["successes"],
 86 |         inp["sim_count"],
 87 |         inp["a_priors_beta"],
 88 |         inp["b_priors_beta"],
 89 |     )
 90 |     all_pos_shape = np.array(all_pos).shape
 91 |     assert all_pos_shape == (len(inp["totals"]), inp["sim_count"])
 92 | 
 93 | 
 94 | @pytest.mark.parametrize("inp", LOGNORMAL_POSTERIORS_INPUTS)
 95 | def test_lognormal_posteriors(inp):
 96 |     all_pos = lognormal_posteriors(
 97 |         inp["totals"],
 98 |         inp["sum_logs"],
 99 |         inp["sum_logs_2"],
100 |         inp["sim_count"],
101 |     )
102 |     assert len(all_pos) == inp["sim_count"]
103 | 
104 | 
105 | @pytest.mark.parametrize("inp", DIRICHLET_POSTERIORS_INPUTS)
106 | def test_dirichlet_posteriors(inp):
107 |     all_pos = dirichlet_posteriors(
108 |         inp["concentration"],
109 |         inp["prior"],
110 |         inp["sim_count"],
111 |     )
112 |     assert all_pos.shape == (inp["sim_count"], len(inp["concentration"]))
113 | 
114 | 
115 | @pytest.mark.parametrize("inp", GAMMA_POSTERIORS_ALL_INPUTS)
116 | def test_pois_gamma_posteriors_all(inp):
117 |     all_pos = pois_gamma_posteriors_all(
118 |         inp["totals"],
119 |         inp["sums"],
120 |         inp["sim_count"],
121 |         inp["a_priors_gamma"],
122 |         inp["b_priors_gamma"],
123 |     )
124 |     all_pos_shape = np.array(all_pos).shape
125 |     assert all_pos_shape == (len(inp["totals"]), inp["sim_count"])
126 | 
127 | 
128 | @pytest.mark.parametrize("inp", GAMMA_POSTERIORS_ALL_INPUTS)
129 | def test_exp_gamma_posteriors_all(inp):
130 |     all_pos = exp_gamma_posteriors_all(
131 |         inp["totals"],
132 |         inp["sums"],
133 |         inp["sim_count"],
134 |         inp["a_priors_gamma"],
135 |         inp["b_priors_gamma"],
136 |     )
137 |     all_pos_shape = np.array(all_pos).shape
138 |     assert all_pos_shape == (len(inp["totals"]), inp["sim_count"])
139 | 


--------------------------------------------------------------------------------
/tests/test_validators.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bayesian_testing.metrics.evaluation import validate_bernoulli_input
 4 | from bayesian_testing.utilities.common import check_list_lengths
 5 | 
 6 | 
 7 | def test_validate_bernoulli_input():
 8 |     validate_bernoulli_input([1, 2, 3], [1, 1, 1])
 9 |     validate_bernoulli_input([1, 2], [1, 1])
10 |     validate_bernoulli_input([1], [1])
11 | 
12 | 
13 | def test_validate_bernoulli_input_error():
14 |     with pytest.raises(ValueError):
15 |         validate_bernoulli_input([1, 2], [1])
16 | 
17 | 
18 | def test_check_list_lengths():
19 |     check_list_lengths([[1, 2, 3], [1, 1, 1], [2, 2, 2], [7, 7, 7]])
20 |     check_list_lengths([[], [], []])
21 | 
22 | 
23 | def test_check_list_lengths_error():
24 |     with pytest.raises(ValueError):
25 |         check_list_lengths([[1, 2, 3], [1, 1, 1], [2, 2, 2], [7, 7]])
26 | 


--------------------------------------------------------------------------------