├── .github
└── workflows
│ ├── coverage.yml
│ ├── release.yml
│ └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── bayesian_testing
├── __init__.py
├── experiments
│ ├── __init__.py
│ ├── base.py
│ ├── binary.py
│ ├── delta_lognormal.py
│ ├── delta_normal.py
│ ├── discrete.py
│ ├── exponential.py
│ ├── normal.py
│ └── poisson.py
├── metrics
│ ├── __init__.py
│ ├── evaluation.py
│ └── posteriors.py
└── utilities
│ ├── __init__.py
│ ├── common.py
│ ├── logging.conf
│ └── logging.py
├── codecov.yml
├── examples
├── README.md
├── data
│ └── session_data.csv
├── dice_rolls_ab_testing.ipynb
├── goals_scored_ab_testing.ipynb
├── session_data_ab_testing.ipynb
├── session_data_manual_pbbs.ipynb
└── waiting_time_ab_testing.ipynb
├── poetry.lock
├── pyproject.toml
└── tests
├── README.md
├── test_binary.py
├── test_delta_lognormal.py
├── test_delta_normal.py
├── test_discrete.py
├── test_evaluation.py
├── test_exponential.py
├── test_normal.py
├── test_poisson.py
├── test_posteriors.py
└── test_validators.py
/.github/workflows/coverage.yml:
--------------------------------------------------------------------------------
1 | name: Coverage
2 | on: push
3 |
4 | jobs:
5 | coverage:
6 | runs-on: ubuntu-latest
7 | steps:
8 | - name: Check out repository code
9 | uses: actions/checkout@v2
10 | - name: Set up Python
11 | uses: actions/setup-python@v1
12 | with:
13 | python-version: '3.10'
14 | - name: Install dependencies
15 | run: |
16 | python -m pip install --upgrade pip
17 | pip install poetry==2.*
18 | poetry install
19 | - name: Test with pytest
20 | run: |
21 | poetry run coverage run -m pytest
22 | poetry run coverage report -i
23 | poetry run coverage xml -i
24 | - name: Upload coverage to Codecov
25 | uses: codecov/codecov-action@v2
26 | with:
27 | fail_ci_if_error: true
28 | token: ${{ secrets.CODECOV_TOKEN }}
29 |
--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
1 | name: Release
2 | on:
3 | release:
4 | types: [published]
5 | jobs:
6 | release:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - uses: actions/checkout@v2
10 | - uses: actions/setup-python@v1
11 | with:
12 | python-version: '3.10'
13 | architecture: x64
14 | - run: python -m pip install --upgrade pip
15 | - run: pip install poetry==2.*
16 | - run: poetry install
17 | - run: poetry run coverage run -m pytest
18 | - run: poetry build
19 | - run: poetry publish --username=__token__ --password=${{ secrets.PYPI_TOKEN }}
20 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 | on:
3 | pull_request:
4 | branches:
5 | - main
6 |
7 | jobs:
8 | tests:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - name: Check out repository code
12 | uses: actions/checkout@v2
13 | - name: Set up Python
14 | uses: actions/setup-python@v1
15 | with:
16 | python-version: '3.10'
17 | - name: Install dependencies
18 | run: |
19 | python -m pip install --upgrade pip
20 | pip install poetry==2.*
21 | poetry install
22 | - name: Test with pytest
23 | run: |
24 | poetry run coverage run -m pytest
25 | poetry run coverage report -i
26 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # other
132 | .DS_Store
133 | .idea
134 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v4.5.0
4 | hooks:
5 | - id: check-yaml
6 | - id: end-of-file-fixer
7 | - id: trailing-whitespace
8 | - repo: https://github.com/psf/black
9 | rev: 22.3.0
10 | hooks:
11 | - id: black
12 | args: [--line-length=100]
13 | - repo: https://github.com/pycqa/flake8
14 | rev: 6.1.0
15 | hooks:
16 | - id: flake8
17 | args: [--max-line-length=100]
18 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Matus Baniar
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/Matt52/bayesian-testing/actions?workflow=Tests)
2 | [](https://codecov.io/gh/Matt52/bayesian-testing)
3 | [](https://pypi.org/project/bayesian-testing/)
4 | # Bayesian A/B testing
5 | `bayesian_testing` is a small package for a quick evaluation of A/B (or A/B/C/...) tests using
6 | Bayesian approach.
7 |
8 | **Implemented tests:**
9 | - [BinaryDataTest](bayesian_testing/experiments/binary.py)
10 | - **_Input data_** - binary data (`[0, 1, 0, ...]`)
11 | - Designed for conversion-like data A/B testing.
12 | - [NormalDataTest](bayesian_testing/experiments/normal.py)
13 | - **_Input data_** - normal data with unknown variance
14 | - Designed for normal data A/B testing.
15 | - [DeltaLognormalDataTest](bayesian_testing/experiments/delta_lognormal.py)
16 | - **_Input data_** - lognormal data with zeros
17 | - Designed for revenue-like data A/B testing.
18 | - [DeltaNormalDataTest](bayesian_testing/experiments/delta_normal.py)
19 | - **_Input data_** - normal data with zeros
20 | - Designed for profit-like data A/B testing.
21 | - [DiscreteDataTest](bayesian_testing/experiments/discrete.py)
22 | - **_Input data_** - categorical data with numerical categories
23 | - Designed for discrete data A/B testing (e.g. dice rolls, star ratings, 1-10 ratings, etc.).
24 | - [PoissonDataTest](bayesian_testing/experiments/poisson.py)
25 | - **_Input data_** - non-negative integers (`[1, 0, 3, ...]`)
26 | - Designed for poisson data A/B testing.
27 | - [ExponentialDataTest](bayesian_testing/experiments/exponential.py)
28 | - **_Input data_** - exponential data (non-negative real numbers)
29 | - Designed for exponential data A/B testing (e.g. session/waiting time, time between events,
30 | etc.).
31 |
32 | **Implemented evaluation metrics:**
33 | - `Posterior Mean`
34 | - Expected value from the posterior distribution for a given variant.
35 | - `Credible Interval`
36 | - Quantile-based credible intervals based on simulations from posterior distributions (i.e.
37 | empirical).
38 | - Interval probability (`interval_alpha`) can be set during the evaluation (default value is 95%).
39 | - `Probability of Being Best`
40 | - Probability that a given variant is best among all variants.
41 | - By default, `the best` is equivalent to `the greatest` (from a data/metric point of view),
42 | however it is possible to change this by using `min_is_best=True` in the evaluation method
43 | (this can be useful if we try to find the variant with the smallest tested measure).
44 | - `Expected Loss`
45 | - "Risk" of choosing particular variant over other variants in the test.
46 | - Measured in same units as a tested measure (e.g. positive rate or average value).
47 |
48 | `Credible Interval`, `Probability of Being Best` and `Expected Loss` are calculated using
49 | simulations from posterior distributions (considering given data).
50 |
51 |
52 | ## Installation
53 | `bayesian_testing` can be installed using pip:
54 | ```console
55 | pip install bayesian_testing
56 | ```
57 | Alternatively, you can clone the repository and use `poetry` manually:
58 | ```console
59 | cd bayesian_testing
60 | pip install poetry
61 | poetry install
62 | poetry shell
63 | ```
64 |
65 | ## Basic Usage
66 | The primary features are classes:
67 | - `BinaryDataTest`
68 | - `NormalDataTest`
69 | - `DeltaLognormalDataTest`
70 | - `DeltaNormalDataTest`
71 | - `DiscreteDataTest`
72 | - `PoissonDataTest`
73 | - `ExponentialDataTest`
74 |
75 | All test classes support two methods to insert the data:
76 | - `add_variant_data` - Adding raw data for a variant as a list of observations (or numpy 1-D array).
77 | - `add_variant_data_agg` - Adding aggregated variant data (this can be practical for a large data,
78 | as the aggregation can be done already on a database level).
79 |
80 | Both methods for adding data allow specification of prior distributions
81 | (see details in respective docstrings). Default prior setup should be sufficient for most of the
82 | cases (e.g. cases with unknown priors or large amounts of data).
83 |
84 | To get the results of the test, simply call the method `evaluate`.
85 |
86 | Probability of being best, expected loss and credible intervals are approximated using simulations,
87 | hence the `evaluate` method can return slightly different values for different runs. To stabilize
88 | it, you can set the `sim_count` parameter of the `evaluate` to a higher value (default value is
89 | 20K), or even use the `seed` parameter to fix it completely.
90 |
91 | ### BinaryDataTest
92 | Class for a Bayesian A/B test for the binary-like data (e.g. conversions, successes, etc.).
93 |
94 | **Example:**
95 | ```python
96 | import numpy as np
97 | from bayesian_testing.experiments import BinaryDataTest
98 |
99 | # generating some random data
100 | rng = np.random.default_rng(52)
101 | # random 1x1500 array of 0/1 data with 5.2% probability for 1:
102 | data_a = rng.binomial(n=1, p=0.052, size=1500)
103 | # random 1x1200 array of 0/1 data with 6.7% probability for 1:
104 | data_b = rng.binomial(n=1, p=0.067, size=1200)
105 |
106 | # initialize a test:
107 | test = BinaryDataTest()
108 |
109 | # add variant using raw data (arrays of zeros and ones):
110 | test.add_variant_data("A", data_a)
111 | test.add_variant_data("B", data_b)
112 | # priors can be specified like this (default for this test is a=b=1/2):
113 | # test.add_variant_data("B", data_b, a_prior=1, b_prior=20)
114 |
115 | # add variant using aggregated data (same as raw data with 950 zeros and 50 ones):
116 | test.add_variant_data_agg("C", totals=1000, positives=50)
117 |
118 | # evaluate test:
119 | results = test.evaluate()
120 | results
121 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
122 | ```
123 |
124 | +-------------------+-----------+-------------+-------------+
125 | | | A | B | C |
126 | +===================+===========+=============+=============+
127 | | totals | 1500 | 1200 | 1000 |
128 | +-------------------+-----------+-------------+-------------+
129 | | positives | 80 | 80 | 50 |
130 | +-------------------+-----------+-------------+-------------+
131 | | positive_rate | 0.05333 | 0.06667 | 0.05 |
132 | +-------------------+-----------+-------------+-------------+
133 | | posterior_mean | 0.05363 | 0.06703 | 0.05045 |
134 | +-------------------+-----------+-------------+-------------+
135 | | credible_interval | [0.04284, | [0.0535309, | [0.0379814, |
136 | | | 0.065501] | 0.0816476] | 0.0648625] |
137 | +-------------------+-----------+-------------+-------------+
138 | | prob_being_best | 0.06485 | 0.89295 | 0.0422 |
139 | +-------------------+-----------+-------------+-------------+
140 | | expected_loss | 0.0139248 | 0.0004693 | 0.0170767 |
141 | +-------------------+-----------+-------------+-------------+
142 |
143 | ### NormalDataTest
144 | Class for a Bayesian A/B test for the normal data.
145 |
146 | **Example:**
147 | ```python
148 | import numpy as np
149 | from bayesian_testing.experiments import NormalDataTest
150 |
151 | # generating some random data
152 | rng = np.random.default_rng(21)
153 | data_a = rng.normal(7.2, 2, 1000)
154 | data_b = rng.normal(7.1, 2, 800)
155 | data_c = rng.normal(7.0, 4, 500)
156 |
157 | # initialize a test:
158 | test = NormalDataTest()
159 |
160 | # add variant using raw data:
161 | test.add_variant_data("A", data_a)
162 | test.add_variant_data("B", data_b)
163 | # test.add_variant_data("C", data_c)
164 |
165 | # add variant using aggregated data:
166 | test.add_variant_data_agg("C", len(data_c), sum(data_c), sum(np.square(data_c)))
167 |
168 | # evaluate test:
169 | results = test.evaluate(sim_count=20000, seed=52, min_is_best=False, interval_alpha=0.99)
170 | results
171 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
172 | ```
173 |
174 | +-------------------+-------------+-------------+-------------+
175 | | | A | B | C |
176 | +===================+=============+=============+=============+
177 | | totals | 1000 | 800 | 500 |
178 | +-------------------+-------------+-------------+-------------+
179 | | sum_values | 7294.67901 | 5685.86168 | 3736.91581 |
180 | +-------------------+-------------+-------------+-------------+
181 | | avg_values | 7.29468 | 7.10733 | 7.47383 |
182 | +-------------------+-------------+-------------+-------------+
183 | | posterior_mean | 7.29462 | 7.10725 | 7.4737 |
184 | +-------------------+-------------+-------------+-------------+
185 | | credible_interval | [7.1359436, | [6.9324733, | [7.0240102, |
186 | | | 7.4528369] | 7.2779293] | 7.9379341] |
187 | +-------------------+-------------+-------------+-------------+
188 | | prob_being_best | 0.1707 | 0.00125 | 0.82805 |
189 | +-------------------+-------------+-------------+-------------+
190 | | expected_loss | 0.1968735 | 0.385112 | 0.0169998 |
191 | +-------------------+-------------+-------------+-------------+
192 |
193 | ### DeltaLognormalDataTest
194 | Class for a Bayesian A/B test for the delta-lognormal data (log-normal with zeros).
195 | Delta-lognormal data is typical case of revenue per session data where many sessions have 0 revenue
196 | but non-zero values are positive values with possible log-normal distribution.
197 | To handle this data, the calculation is combining binary Bayes model for zero vs non-zero
198 | "conversions" and log-normal model for non-zero values.
199 |
200 | **Example:**
201 | ```python
202 | import numpy as np
203 | from bayesian_testing.experiments import DeltaLognormalDataTest
204 |
205 | test = DeltaLognormalDataTest()
206 |
207 | data_a = [7.1, 0.3, 5.9, 0, 1.3, 0.3, 0, 1.2, 0, 3.6, 0, 1.5,
208 | 2.2, 0, 4.9, 0, 0, 1.1, 0, 0, 7.1, 0, 6.9, 0]
209 | data_b = [4.0, 0, 3.3, 19.3, 18.5, 0, 0, 0, 12.9, 0, 0, 0, 10.2,
210 | 0, 0, 23.1, 0, 3.7, 0, 0, 11.3, 10.0, 0, 18.3, 12.1]
211 |
212 | # adding variant using raw data:
213 | test.add_variant_data("A", data_a)
214 | # test.add_variant_data("B", data_b)
215 |
216 | # alternatively, variant can be also added using aggregated data
217 | # (looks more complicated, but it can be quite handy for a large data):
218 | test.add_variant_data_agg(
219 | name="B",
220 | totals=len(data_b),
221 | positives=sum(x > 0 for x in data_b),
222 | sum_values=sum(data_b),
223 | sum_logs=sum([np.log(x) for x in data_b if x > 0]),
224 | sum_logs_2=sum([np.square(np.log(x)) for x in data_b if x > 0])
225 | )
226 |
227 | # evaluate test:
228 | results = test.evaluate(seed=21)
229 | results
230 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
231 | ```
232 |
233 | +---------------------+-------------+-------------+
234 | | | A | B |
235 | +=====================+=============+=============+
236 | | totals | 24 | 25 |
237 | +---------------------+-------------+-------------+
238 | | positives | 13 | 12 |
239 | +---------------------+-------------+-------------+
240 | | sum_values | 43.4 | 146.7 |
241 | +---------------------+-------------+-------------+
242 | | avg_values | 1.80833 | 5.868 |
243 | +---------------------+-------------+-------------+
244 | | avg_positive_values | 3.33846 | 12.225 |
245 | +---------------------+-------------+-------------+
246 | | posterior_mean | 2.09766 | 6.19017 |
247 | +---------------------+-------------+-------------+
248 | | credible_interval | [0.9884509, | [3.3746212, |
249 | | | 6.9054963] | 11.7349253] |
250 | +---------------------+-------------+-------------+
251 | | prob_being_best | 0.04815 | 0.95185 |
252 | +---------------------+-------------+-------------+
253 | | expected_loss | 4.0941101 | 0.1588627 |
254 | +---------------------+-------------+-------------+
255 |
256 | ***Note**: Alternatively, `DeltaNormalDataTest` can be used for a case when conversions are not
257 | necessarily positive values.*
258 |
259 | ### DiscreteDataTest
260 | Class for a Bayesian A/B test for the discrete data with finite number of numerical categories
261 | (states), representing some value.
262 | This test can be used for instance for dice rolls data (when looking for the "best" of multiple
263 | dice) or rating data (e.g. 1-5 stars or 1-10 scale).
264 |
265 | **Example:**
266 | ```python
267 | from bayesian_testing.experiments import DiscreteDataTest
268 |
269 | # dice rolls data for 3 dice - A, B, C
270 | data_a = [2, 5, 1, 4, 6, 2, 2, 6, 3, 2, 6, 3, 4, 6, 3, 1, 6, 3, 5, 6]
271 | data_b = [1, 2, 2, 2, 2, 3, 2, 3, 4, 2]
272 | data_c = [1, 3, 6, 5, 4]
273 |
274 | # initialize a test with all possible states (i.e. numerical categories):
275 | test = DiscreteDataTest(states=[1, 2, 3, 4, 5, 6])
276 |
277 | # add variant using raw data:
278 | test.add_variant_data("A", data_a)
279 | test.add_variant_data("B", data_b)
280 | test.add_variant_data("C", data_c)
281 |
282 | # add variant using aggregated data:
283 | # test.add_variant_data_agg("C", [1, 0, 1, 1, 1, 1]) # equivalent to rolls in data_c
284 |
285 | # evaluate test:
286 | results = test.evaluate(sim_count=20000, seed=52, min_is_best=False, interval_alpha=0.95)
287 | results
288 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
289 | ```
290 |
291 | +-------------------+------------------+------------------+------------------+
292 | | | A | B | C |
293 | +===================+==================+==================+==================+
294 | | concentration | {1: 2.0, 2: 4.0, | {1: 1.0, 2: 6.0, | {1: 1.0, 2: 0.0, |
295 | | | 3: 4.0, 4: 2.0, | 3: 2.0, 4: 1.0, | 3: 1.0, 4: 1.0, |
296 | | | 5: 2.0, 6: 6.0} | 5: 0.0, 6: 0.0} | 5: 1.0, 6: 1.0} |
297 | +-------------------+------------------+------------------+------------------+
298 | | average_value | 3.8 | 2.3 | 3.8 |
299 | +-------------------+------------------+------------------+------------------+
300 | | posterior_mean | 3.73077 | 2.75 | 3.63636 |
301 | +-------------------+------------------+------------------+------------------+
302 | | credible_interval | [3.0710797, | [2.1791584, | [2.6556465, |
303 | | | 4.3888021] | 3.4589178] | 4.5784839] |
304 | +-------------------+------------------+------------------+------------------+
305 | | prob_being_best | 0.54685 | 0.008 | 0.44515 |
306 | +-------------------+------------------+------------------+------------------+
307 | | expected_loss | 0.199953 | 1.1826766 | 0.2870247 |
308 | +-------------------+------------------+------------------+------------------+
309 |
310 | ### PoissonDataTest
311 | Class for a Bayesian A/B test for the poisson data.
312 |
313 | **Example:**
314 | ```python
315 | from bayesian_testing.experiments import PoissonDataTest
316 |
317 | # goals received - so less is better (duh...)
318 | psg_goals_against = [0, 2, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 3, 1, 0]
319 | city_goals_against = [0, 0, 3, 2, 0, 1, 0, 3, 0, 1, 1, 0, 1, 2]
320 | bayern_goals_against = [1, 0, 0, 1, 1, 2, 1, 0, 2, 0, 0, 2, 2, 1, 0]
321 |
322 | # initialize a test:
323 | test = PoissonDataTest()
324 |
325 | # add variant using raw data:
326 | test.add_variant_data('psg', psg_goals_against)
327 |
328 | # example with specific priors
329 | # ("b_prior" as an effective sample size, and "a_prior/b_prior" as a prior mean):
330 | test.add_variant_data('city', city_goals_against, a_prior=3, b_prior=1)
331 | # test.add_variant_data('bayern', bayern_goals_against)
332 |
333 | # add variant using aggregated data:
334 | test.add_variant_data_agg("bayern", len(bayern_goals_against), sum(bayern_goals_against))
335 |
336 | # evaluate test (since fewer goals is better, we explicitly set the min_is_best to True)
337 | results = test.evaluate(sim_count=20000, seed=52, min_is_best=True)
338 | results
339 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
340 | ```
341 |
342 | +-------------------+-------------+-------------+------------+
343 | | | psg | city | bayern |
344 | +===================+=============+=============+============+
345 | | totals | 15 | 14 | 15 |
346 | +-------------------+-------------+-------------+------------+
347 | | sum_values | 9 | 14 | 13 |
348 | +-------------------+-------------+-------------+------------+
349 | | observed_average | 0.6 | 1.0 | 0.86667 |
350 | +-------------------+-------------+-------------+------------+
351 | | posterior_mean | 0.60265 | 1.13333 | 0.86755 |
352 | +-------------------+-------------+-------------+------------+
353 | | credible_interval | [0.2800848, | [0.6562029, | [0.465913, |
354 | | | 1.0570327] | 1.7265045] | 1.3964389] |
355 | +-------------------+-------------+-------------+------------+
356 | | prob_being_best | 0.78175 | 0.0344 | 0.18385 |
357 | +-------------------+-------------+-------------+------------+
358 | | expected_loss | 0.0369998 | 0.5620553 | 0.3003345 |
359 | +-------------------+-------------+-------------+------------+
360 |
361 | _note: Since we set `min_is_best=True` (because received goals are "bad"), probability and loss are
362 | in a favor of variants with lower posterior means._
363 |
364 | ### ExponentialDataTest
365 | Class for a Bayesian A/B test for the exponential data.
366 |
367 | **Example:**
368 | ```python
369 | import numpy as np
370 | from bayesian_testing.experiments import ExponentialDataTest
371 |
372 | # waiting times for 3 different variants, each with many observations,
373 | # generated using exponential distributions with defined scales (expected values)
374 | waiting_times_a = np.random.exponential(scale=10, size=200)
375 | waiting_times_b = np.random.exponential(scale=11, size=210)
376 | waiting_times_c = np.random.exponential(scale=11, size=220)
377 |
378 | # initialize a test:
379 | test = ExponentialDataTest()
380 | # adding variants using the observation data:
381 | test.add_variant_data('A', waiting_times_a)
382 | test.add_variant_data('B', waiting_times_b)
383 | test.add_variant_data('C', waiting_times_c)
384 |
385 | # alternatively, add variants using aggregated data:
386 | # test.add_variant_data_agg('A', len(waiting_times_a), sum(waiting_times_a))
387 |
388 | # evaluate test (since a lower waiting time is better, we set the min_is_best to True)
389 | results = test.evaluate(sim_count=20000, min_is_best=True)
390 | results
391 | # print(pd.DataFrame(results).set_index('variant').T.to_markdown(tablefmt="grid"))
392 | ```
393 |
394 | +-------------------+-------------+-------------+-------------+
395 | | | A | B | C |
396 | +===================+=============+=============+=============+
397 | | totals | 200 | 210 | 220 |
398 | +-------------------+-------------+-------------+-------------+
399 | | sum_values | 1827.81709 | 2217.46016 | 2160.73134 |
400 | +-------------------+-------------+-------------+-------------+
401 | | observed_average | 9.13909 | 10.55933 | 9.82151 |
402 | +-------------------+-------------+-------------+-------------+
403 | | posterior_mean | 9.13502 | 10.55478 | 9.8175 |
404 | +-------------------+-------------+-------------+-------------+
405 | | credible_interval | [7.994178, | [9.2543372, | [8.6184821, |
406 | | | 10.5410967] | 12.1527256] | 11.2566538] |
407 | +-------------------+-------------+-------------+-------------+
408 | | prob_being_best | 0.7456 | 0.0405 | 0.2139 |
409 | +-------------------+-------------+-------------+-------------+
410 | | expected_loss | 0.1428729 | 1.5674747 | 0.8230728 |
411 | +-------------------+-------------+-------------+-------------+
412 |
413 | ## Development
414 | To set up a development environment, use [Poetry](https://python-poetry.org/)
415 | and [pre-commit](https://pre-commit.com):
416 | ```console
417 | pip install poetry
418 | poetry install
419 | poetry run pre-commit install
420 | ```
421 |
422 | ## To be implemented
423 |
424 | Additional metrics:
425 | - `Potential Value Remaining`
426 |
427 | ## References
428 | - `bayesian_testing` package itself depends only on [numpy](https://numpy.org) package.
429 | - Work on this package (including default priors selection) was inspired mainly by a Coursera
430 | course [Bayesian Statistics: From Concept to Data Analysis](https://www.coursera.org/learn/bayesian-statistics).
431 |
--------------------------------------------------------------------------------
/bayesian_testing/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | from importlib.metadata import version, PackageNotFoundError # type: ignore
3 | except ImportError: # pragma: no cover
4 | from importlib_metadata import version, PackageNotFoundError # type: ignore
5 |
6 | try:
7 | __version__ = version(__name__)
8 | except PackageNotFoundError: # pragma: no cover
9 | __version__ = "unknown"
10 |
--------------------------------------------------------------------------------
/bayesian_testing/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | from .binary import BinaryDataTest
2 | from .normal import NormalDataTest
3 | from .delta_lognormal import DeltaLognormalDataTest
4 | from .discrete import DiscreteDataTest
5 | from .poisson import PoissonDataTest
6 | from .delta_normal import DeltaNormalDataTest
7 | from .exponential import ExponentialDataTest
8 |
9 | __all__ = [
10 | "BinaryDataTest",
11 | "NormalDataTest",
12 | "DeltaLognormalDataTest",
13 | "DeltaNormalDataTest",
14 | "DiscreteDataTest",
15 | "PoissonDataTest",
16 | "ExponentialDataTest",
17 | ]
18 |
--------------------------------------------------------------------------------
/bayesian_testing/experiments/base.py:
--------------------------------------------------------------------------------
1 | from typing import Tuple
2 | import warnings
3 |
4 |
5 | class BaseDataTest:
6 | """
7 | Base class for Bayesian A/B test.
8 | """
9 |
10 | def __init__(self) -> None:
11 | """
12 | Initialize BaseDataTest class.
13 | """
14 | self.data = {}
15 |
16 | @property
17 | def variant_names(self):
18 | return [k for k in self.data]
19 |
20 | def eval_simulation(
21 | self,
22 | sim_count: int = 20000,
23 | seed: int = None,
24 | min_is_best: bool = False,
25 | interval_alpha: float = 0.95,
26 | ) -> Tuple[dict, dict, dict]:
27 | """
28 | Should be implemented in each individual experiment.
29 | """
30 | raise NotImplementedError
31 |
32 | def probabs_of_being_best(
33 | self,
34 | sim_count: int = 20000,
35 | seed: int = None,
36 | min_is_best: bool = False,
37 | interval_alpha: float = 0.95,
38 | ) -> dict:
39 | """
40 | Calculate probabilities of being best for a current class state.
41 |
42 | Parameters
43 | ----------
44 | sim_count : Number of simulations to be used for probability estimation.
45 | seed : Random seed.
46 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
47 | interval_alpha : Credible interval probability (value between 0 and 1).
48 |
49 | Returns
50 | -------
51 | pbbs : Dictionary with probabilities of being best for all variants in experiment.
52 | """
53 | pbbs, loss, intervals = self.eval_simulation(sim_count, seed, min_is_best, interval_alpha)
54 |
55 | return pbbs
56 |
57 | def expected_loss(
58 | self,
59 | sim_count: int = 20000,
60 | seed: int = None,
61 | min_is_best: bool = False,
62 | interval_alpha: float = 0.95,
63 | ) -> dict:
64 | """
65 | Calculate expected loss for a current class state.
66 |
67 | Parameters
68 | ----------
69 | sim_count : Number of simulations to be used for probability estimation.
70 | seed : Random seed.
71 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
72 | interval_alpha : Credible interval probability (value between 0 and 1).
73 |
74 | Returns
75 | -------
76 | loss : Dictionary with expected loss for all variants in experiment.
77 | """
78 | pbbs, loss, intervals = self.eval_simulation(sim_count, seed, min_is_best, interval_alpha)
79 |
80 | return loss
81 |
82 | def credible_intervals(
83 | self,
84 | sim_count: int = 20000,
85 | seed: int = None,
86 | min_is_best: bool = False,
87 | interval_alpha: float = 0.95,
88 | ) -> dict:
89 | """
90 | Calculate quantile-based credible intervals for a current class state.
91 |
92 | Parameters
93 | ----------
94 | sim_count : Number of simulations to be used for probability estimation.
95 | seed : Random seed.
96 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
97 | interval_alpha : Credible interval probability (value between 0 and 1).
98 |
99 | Returns
100 | -------
101 | intervals : Dictionary with quantile-based credible intervals for all variants.
102 | """
103 | pbbs, loss, intervals = self.eval_simulation(sim_count, seed, min_is_best, interval_alpha)
104 |
105 | return intervals
106 |
107 | def delete_variant(self, name: str) -> None:
108 | """
109 | Delete variant and all its data from experiment.
110 |
111 | Parameters
112 | ----------
113 | name : Variant name.
114 | """
115 | if not isinstance(name, str):
116 | raise ValueError("Variant name has to be a string.")
117 | if name not in self.variant_names:
118 | warnings.warn(f"Nothing to be deleted. Variant {name} is not in experiment.")
119 | else:
120 | del self.data[name]
121 |
--------------------------------------------------------------------------------
/bayesian_testing/experiments/binary.py:
--------------------------------------------------------------------------------
1 | from numbers import Number
2 | from typing import List, Tuple
3 |
4 | from bayesian_testing.experiments.base import BaseDataTest
5 | from bayesian_testing.metrics import eval_bernoulli_agg
6 | from bayesian_testing.utilities import get_logger
7 |
8 | logger = get_logger("bayesian_testing")
9 |
10 |
11 | class BinaryDataTest(BaseDataTest):
12 | """
13 | Class for Bayesian A/B test for binary-like data (conversions, successes, etc.).
14 |
15 | After class initialization, use add_variant methods to insert variant data.
16 | Then to get results of the test, use for instance `evaluate` method.
17 | """
18 |
19 | def __init__(self) -> None:
20 | """
21 | Initialize BinaryDataTest class.
22 | """
23 | super().__init__()
24 |
25 | @property
26 | def totals(self):
27 | return [self.data[k]["totals"] for k in self.data]
28 |
29 | @property
30 | def positives(self):
31 | return [self.data[k]["positives"] for k in self.data]
32 |
33 | @property
34 | def a_priors(self):
35 | return [self.data[k]["a_prior"] for k in self.data]
36 |
37 | @property
38 | def b_priors(self):
39 | return [self.data[k]["b_prior"] for k in self.data]
40 |
41 | def eval_simulation(
42 | self,
43 | sim_count: int = 20000,
44 | seed: int = None,
45 | min_is_best: bool = False,
46 | interval_alpha: float = 0.95,
47 | ) -> Tuple[dict, dict, dict]:
48 | """
49 | Calculate probabilities of being best, expected loss and credible intervals for a current
50 | class state.
51 |
52 | Parameters
53 | ----------
54 | sim_count : Number of simulations to be used for probability estimation.
55 | seed : Random seed.
56 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
57 | interval_alpha : Credible interval probability (value between 0 and 1).
58 |
59 | Returns
60 | -------
61 | res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
62 | res_loss : Dictionary with expected loss for all variants in experiment.
63 | res_intervals : Dictionary with quantile-based credible intervals for all variants.
64 | """
65 | pbbs, loss, intervals = eval_bernoulli_agg(
66 | self.totals,
67 | self.positives,
68 | self.a_priors,
69 | self.b_priors,
70 | sim_count,
71 | seed,
72 | min_is_best,
73 | interval_alpha,
74 | )
75 | res_pbbs = dict(zip(self.variant_names, pbbs))
76 | res_loss = dict(zip(self.variant_names, loss))
77 | res_intervals = dict(zip(self.variant_names, intervals))
78 |
79 | return res_pbbs, res_loss, res_intervals
80 |
81 | def evaluate(
82 | self,
83 | sim_count: int = 20000,
84 | seed: int = None,
85 | min_is_best: bool = False,
86 | interval_alpha: float = 0.95,
87 | ) -> List[dict]:
88 | """
89 | Evaluation of experiment.
90 |
91 | Parameters
92 | ----------
93 | sim_count : Number of simulations to be used for probability estimation.
94 | seed : Random seed.
95 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
96 | interval_alpha : Credible interval probability (value between 0 and 1).
97 |
98 | Returns
99 | -------
100 | res : List of dictionaries with results per variant.
101 | """
102 | keys = [
103 | "variant",
104 | "totals",
105 | "positives",
106 | "positive_rate",
107 | "posterior_mean",
108 | "credible_interval",
109 | "prob_being_best",
110 | "expected_loss",
111 | ]
112 | positive_rate = [round(i[0] / i[1], 5) for i in zip(self.positives, self.totals)]
113 | posterior_mean = [
114 | round((i[2] + i[0]) / (i[2] + i[3] + i[1]), 5)
115 | for i in zip(self.positives, self.totals, self.a_priors, self.b_priors)
116 | ]
117 | eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
118 | sim_count, seed, min_is_best, interval_alpha
119 | )
120 | pbbs = list(eval_pbbs.values())
121 | loss = list(eval_loss.values())
122 | intervals = list(eval_intervals.values())
123 | data = [
124 | self.variant_names,
125 | self.totals,
126 | self.positives,
127 | positive_rate,
128 | posterior_mean,
129 | intervals,
130 | pbbs,
131 | loss,
132 | ]
133 | res = [dict(zip(keys, item)) for item in zip(*data)]
134 |
135 | return res
136 |
137 | def add_variant_data_agg(
138 | self,
139 | name: str,
140 | totals: int,
141 | positives: int,
142 | a_prior: Number = 0.5,
143 | b_prior: Number = 0.5,
144 | replace: bool = True,
145 | ) -> None:
146 | """
147 | Add variant data to test class using aggregated binary data.
148 | This can be convenient as aggregation can be done on database level.
149 |
150 | Default prior setup is set for Beta(1/2, 1/2) which is non-information prior.
151 |
152 | Parameters
153 | ----------
154 | name : Variant name.
155 | totals : Total number of experiment observations (e.g. number of sessions).
156 | positives : Total number of 1s for a given variant (e.g. number of conversions).
157 | a_prior : Prior alpha parameter of a Beta distribution (conjugate prior).
158 | Default value 0.5 is based on non-information prior Beta(0.5, 0.5).
159 | b_prior : Prior beta parameter of a Beta distribution (conjugate prior).
160 | Default value 0.5 is based on non-information prior Beta(0.5, 0.5).
161 | replace : Replace data if variant already exists.
162 | If set to False, data of existing variant will be appended to existing data.
163 | """
164 | if not isinstance(name, str):
165 | raise ValueError("Variant name has to be a string.")
166 | if a_prior <= 0 or b_prior <= 0:
167 | raise ValueError("Both [a_prior, b_prior] have to be positive numbers.")
168 | if totals <= 0:
169 | raise ValueError("Input variable 'totals' is expected to be positive integer.")
170 | if positives < 0:
171 | raise ValueError("Input variable 'positives' is expected to be non-negative integer.")
172 | if totals < positives:
173 | raise ValueError("Not possible to have more positives that totals!")
174 |
175 | if name not in self.variant_names:
176 | self.data[name] = {
177 | "totals": totals,
178 | "positives": positives,
179 | "a_prior": a_prior,
180 | "b_prior": b_prior,
181 | }
182 | elif name in self.variant_names and replace:
183 | msg = (
184 | f"Variant {name} already exists - new data is replacing it. "
185 | "If you wish to append instead, use replace=False."
186 | )
187 | logger.info(msg)
188 | self.data[name] = {
189 | "totals": totals,
190 | "positives": positives,
191 | "a_prior": a_prior,
192 | "b_prior": b_prior,
193 | }
194 | elif name in self.variant_names and not replace:
195 | msg = (
196 | f"Variant {name} already exists - new data is appended to variant, "
197 | "keeping its original prior setup. "
198 | "If you wish to replace data instead, use replace=True."
199 | )
200 | logger.info(msg)
201 | self.data[name]["totals"] += totals
202 | self.data[name]["positives"] += positives
203 |
204 | def add_variant_data(
205 | self,
206 | name: str,
207 | data: List[int],
208 | a_prior: Number = 0.5,
209 | b_prior: Number = 0.5,
210 | replace: bool = True,
211 | ) -> None:
212 | """
213 | Add variant data to test class using raw binary data.
214 |
215 | Default prior setup is set for Beta(1/2, 1/2) which is non-information prior.
216 |
217 | Parameters
218 | ----------
219 | name : Variant name.
220 | data : List of binary data containing zeros (non-conversion) and ones (conversions).
221 | a_prior : Prior alpha parameter of a Beta distribution (conjugate prior).
222 | Default value 0.5 is based on non-information prior Beta(0.5, 0.5).
223 | b_prior : Prior beta parameter of a Beta distribution (conjugate prior).
224 | Default value 0.5 is based on non-information prior Beta(0.5, 0.5).
225 | replace : Replace data if variant already exists.
226 | If set to False, data of existing variant will be appended to existing data.
227 | """
228 | if len(data) == 0:
229 | raise ValueError("Data of added variant needs to have some observations.")
230 | if not min([i in [0, 1] for i in data]):
231 | raise ValueError("Input data needs to be a list of zeros and ones.")
232 |
233 | totals = len(data)
234 | positives = sum(data)
235 |
236 | self.add_variant_data_agg(name, totals, positives, a_prior, b_prior, replace)
237 |
--------------------------------------------------------------------------------
/bayesian_testing/experiments/delta_lognormal.py:
--------------------------------------------------------------------------------
1 | from numbers import Number
2 | from typing import List, Tuple
3 |
4 | import numpy as np
5 |
6 | from bayesian_testing.experiments.base import BaseDataTest
7 | from bayesian_testing.metrics import eval_delta_lognormal_agg
8 | from bayesian_testing.utilities import get_logger
9 |
10 | logger = get_logger("bayesian_testing")
11 |
12 |
13 | class DeltaLognormalDataTest(BaseDataTest):
14 | """
15 | Class for Bayesian A/B test for Delta-LogNormal data (Log-Normal with possible zeros).
16 | Delta-lognormal data is typical case of revenue/session data where many
17 | sessions are with 0 revenue (meaning non-conversions).
18 | To handle this data, the evaluation methods are combining binary bayes model for
19 | zero vs non-zero "conversion" and log-normal model for non-zero values.
20 |
21 | After class initialization, use add_variant methods to insert variant data.
22 | Then to get results of the test, use for instance `evaluate` method.
23 | """
24 |
25 | def __init__(self) -> None:
26 | """
27 | Initialize DeltaLognormalDataTest class.
28 | """
29 | super().__init__()
30 |
31 | @property
32 | def totals(self):
33 | return [self.data[k]["totals"] for k in self.data]
34 |
35 | @property
36 | def positives(self):
37 | return [self.data[k]["positives"] for k in self.data]
38 |
39 | @property
40 | def sum_values(self):
41 | return [self.data[k]["sum_values"] for k in self.data]
42 |
43 | @property
44 | def sum_logs(self):
45 | return [self.data[k]["sum_logs"] for k in self.data]
46 |
47 | @property
48 | def sum_logs_2(self):
49 | return [self.data[k]["sum_logs_2"] for k in self.data]
50 |
51 | @property
52 | def a_priors_beta(self):
53 | return [self.data[k]["a_prior_beta"] for k in self.data]
54 |
55 | @property
56 | def b_priors_beta(self):
57 | return [self.data[k]["b_prior_beta"] for k in self.data]
58 |
59 | @property
60 | def m_priors(self):
61 | return [self.data[k]["m_prior"] for k in self.data]
62 |
63 | @property
64 | def a_priors_ig(self):
65 | return [self.data[k]["a_prior_ig"] for k in self.data]
66 |
67 | @property
68 | def b_priors_ig(self):
69 | return [self.data[k]["b_prior_ig"] for k in self.data]
70 |
71 | @property
72 | def w_priors(self):
73 | return [self.data[k]["w_prior"] for k in self.data]
74 |
75 | def eval_simulation(
76 | self,
77 | sim_count: int = 20000,
78 | seed: int = None,
79 | min_is_best: bool = False,
80 | interval_alpha: float = 0.95,
81 | ) -> Tuple[dict, dict, dict]:
82 | """
83 | Calculate probabilities of being best, expected loss and credible intervals for a current
84 | class state.
85 |
86 | Parameters
87 | ----------
88 | sim_count : Number of simulations to be used for probability estimation.
89 | seed : Random seed.
90 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
91 | interval_alpha : Credible interval probability (value between 0 and 1).
92 |
93 | Returns
94 | -------
95 | res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
96 | res_loss : Dictionary with expected loss for all variants in experiment.
97 | res_intervals : Dictionary with quantile-based credible intervals for all variants.
98 | """
99 | pbbs, loss, intervals = eval_delta_lognormal_agg(
100 | self.totals,
101 | self.positives,
102 | self.sum_logs,
103 | self.sum_logs_2,
104 | sim_count=sim_count,
105 | a_priors_beta=self.a_priors_beta,
106 | b_priors_beta=self.b_priors_beta,
107 | m_priors=self.m_priors,
108 | a_priors_ig=self.a_priors_ig,
109 | b_priors_ig=self.b_priors_ig,
110 | w_priors=self.w_priors,
111 | seed=seed,
112 | min_is_best=min_is_best,
113 | interval_alpha=interval_alpha,
114 | )
115 | res_pbbs = dict(zip(self.variant_names, pbbs))
116 | res_loss = dict(zip(self.variant_names, loss))
117 | res_intervals = dict(zip(self.variant_names, intervals))
118 |
119 | return res_pbbs, res_loss, res_intervals
120 |
121 | def evaluate(
122 | self,
123 | sim_count: int = 20000,
124 | seed: int = None,
125 | min_is_best: bool = False,
126 | interval_alpha: float = 0.95,
127 | ) -> List[dict]:
128 | """
129 | Evaluation of experiment.
130 |
131 | Parameters
132 | ----------
133 | sim_count : Number of simulations to be used for probability estimation.
134 | seed : Random seed.
135 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
136 | interval_alpha : Credible interval probability (value between 0 and 1).
137 |
138 | Returns
139 | -------
140 | res : List of dictionaries with results per variant.
141 | """
142 | keys = [
143 | "variant",
144 | "totals",
145 | "positives",
146 | "sum_values",
147 | "avg_values",
148 | "avg_positive_values",
149 | "posterior_mean",
150 | "credible_interval",
151 | "prob_being_best",
152 | "expected_loss",
153 | ]
154 | avg_values = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.totals)]
155 | avg_pos_values = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.positives)]
156 | a_posterior_ig = [i[0] + (i[1] / 2) for i in zip(self.a_priors_ig, self.positives)]
157 | x_ig = [i[0] / i[1] for i in zip(self.sum_logs, self.positives)]
158 | b_posterior_ig = [
159 | (
160 | i[6]
161 | + (1 / 2) * (i[1] - 2 * i[0] * i[3] + i[2] * (i[3] ** 2))
162 | + ((i[2] * i[5]) / (2 * (i[2] + i[5]))) * ((i[3] - i[4]) ** 2)
163 | )
164 | for i in zip(
165 | self.sum_logs,
166 | self.sum_logs_2,
167 | self.positives,
168 | x_ig,
169 | self.m_priors,
170 | self.w_priors,
171 | self.b_priors_ig,
172 | )
173 | ]
174 | posterior_mean = [
175 | round(
176 | np.exp(((i[0] + i[3] * i[4]) / (i[1] + i[4])) + i[8] / (2 * i[7]))
177 | * ((i[5] + i[1]) / (i[6] + i[2])),
178 | 5,
179 | )
180 | for i in zip(
181 | self.sum_logs,
182 | self.positives,
183 | self.totals,
184 | self.m_priors,
185 | self.w_priors,
186 | self.a_priors_beta,
187 | self.b_priors_beta,
188 | a_posterior_ig,
189 | b_posterior_ig,
190 | )
191 | ]
192 | eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
193 | sim_count, seed, min_is_best, interval_alpha
194 | )
195 | pbbs = list(eval_pbbs.values())
196 | loss = list(eval_loss.values())
197 | intervals = list(eval_intervals.values())
198 | data = [
199 | self.variant_names,
200 | self.totals,
201 | self.positives,
202 | [round(i, 5) for i in self.sum_values],
203 | avg_values,
204 | avg_pos_values,
205 | posterior_mean,
206 | intervals,
207 | pbbs,
208 | loss,
209 | ]
210 | res = [dict(zip(keys, item)) for item in zip(*data)]
211 |
212 | return res
213 |
214 | def add_variant_data_agg(
215 | self,
216 | name: str,
217 | totals: int,
218 | positives: int,
219 | sum_values: float,
220 | sum_logs: float,
221 | sum_logs_2: float,
222 | a_prior_beta: Number = 0.5,
223 | b_prior_beta: Number = 0.5,
224 | m_prior: Number = 1,
225 | a_prior_ig: Number = 0,
226 | b_prior_ig: Number = 0,
227 | w_prior: Number = 0.01,
228 | replace: bool = True,
229 | ) -> None:
230 | """
231 | Add variant data to test class using aggregated Delta-LogNormal data.
232 | This can be convenient as aggregation can be done on database level.
233 |
234 | The goal of default prior setup is to be low information.
235 | It should be tuned with caution.
236 |
237 | Parameters
238 | ----------
239 | name : Variant name.
240 | totals : Total number of experiment observations (e.g. number of sessions).
241 | positives : Total number of non-zero values for a given variant.
242 | sum_values : Sum of non-zero values for a given variant.
243 | sum_logs : Sum of logarithms of non-zero data values for a given variant.
244 | sum_logs_2 : Sum of logarithms squrared of non-zero data values for a given variant.
245 | a_prior_beta : Prior alpha parameter from Beta distribution for conversion part.
246 | b_prior_beta : Prior beta parameter from Beta distribution for conversion part.
247 | m_prior : Prior normal mean for logarithms of non-zero data.
248 | a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance of logarithms.
249 | In theory a > 0, but as we always have at least one observation, we can start at 0.
250 | b_prior_ig : Prior beta from inverse gamma dist. for unknown variance of logarithms.
251 | In theory b > 0, but as we always have at least one observation, we can start at 0.
252 | w_prior : Prior effective sample sizes for normal distribution of logarithms of data.
253 | replace : Replace data if variant already exists.
254 | If set to False, data of existing variant will be appended to existing data.
255 | """
256 | if not isinstance(name, str):
257 | raise ValueError("Variant name has to be a string.")
258 | if a_prior_beta <= 0 or b_prior_beta <= 0:
259 | raise ValueError("Both [a_prior_beta, b_prior_beta] have to be positive numbers.")
260 | if m_prior < 0 or a_prior_ig < 0 or b_prior_ig < 0 or w_prior < 0:
261 | raise ValueError("All priors of [m, a_ig, b_ig, w] have to be non-negative numbers.")
262 | if positives == 0:
263 | raise ValueError("Variant has to have some non-zero (positive) values.")
264 | if positives < 0:
265 | raise ValueError("Input variable 'positives' is expected to be a positive integer.")
266 | if totals < positives:
267 | raise ValueError("Not possible to have more positives that totals!")
268 |
269 | if name not in self.variant_names:
270 | self.data[name] = {
271 | "totals": totals,
272 | "positives": positives,
273 | "sum_values": sum_values,
274 | "sum_logs": sum_logs,
275 | "sum_logs_2": sum_logs_2,
276 | "a_prior_beta": a_prior_beta,
277 | "b_prior_beta": b_prior_beta,
278 | "m_prior": m_prior,
279 | "a_prior_ig": a_prior_ig,
280 | "b_prior_ig": b_prior_ig,
281 | "w_prior": w_prior,
282 | }
283 | elif name in self.variant_names and replace:
284 | msg = (
285 | f"Variant {name} already exists - new data is replacing it. "
286 | "If you wish to append instead, use replace=False."
287 | )
288 | logger.info(msg)
289 | self.data[name] = {
290 | "totals": totals,
291 | "positives": positives,
292 | "sum_values": sum_values,
293 | "sum_logs": sum_logs,
294 | "sum_logs_2": sum_logs_2,
295 | "a_prior_beta": a_prior_beta,
296 | "b_prior_beta": b_prior_beta,
297 | "m_prior": m_prior,
298 | "a_prior_ig": a_prior_ig,
299 | "b_prior_ig": b_prior_ig,
300 | "w_prior": w_prior,
301 | }
302 | elif name in self.variant_names and not replace:
303 | msg = (
304 | f"Variant {name} already exists - new data is appended to variant, "
305 | "keeping its original prior setup. "
306 | "If you wish to replace data instead, use replace=True."
307 | )
308 | logger.info(msg)
309 | self.data[name]["totals"] += totals
310 | self.data[name]["positives"] += positives
311 | self.data[name]["sum_values"] += sum_values
312 | self.data[name]["sum_logs"] += sum_logs
313 | self.data[name]["sum_logs_2"] += sum_logs_2
314 |
315 | def add_variant_data(
316 | self,
317 | name: str,
318 | data: List[Number],
319 | a_prior_beta: Number = 0.5,
320 | b_prior_beta: Number = 0.5,
321 | m_prior: Number = 1,
322 | a_prior_ig: Number = 0,
323 | b_prior_ig: Number = 0,
324 | w_prior: Number = 0.01,
325 | replace: bool = True,
326 | ) -> None:
327 | """
328 | Add variant data to test class using raw Delta-LogNormal data.
329 |
330 | The goal of default prior setup is to be low information. It should be tuned with caution.
331 |
332 | Parameters
333 | ----------
334 | name : Variant name.
335 | data : List of delta-lognormal data (e.g. revenues in sessions).
336 | a_prior_beta : Prior alpha parameter from Beta distribution for conversion part.
337 | b_prior_beta : Prior beta parameter from Beta distribution for conversion part.
338 | m_prior : Prior mean for logarithms of non-zero data.
339 | a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance of logarithms.
340 | In theory a > 0, but as we always have at least one observation, we can start at 0.
341 | b_prior_ig : Prior beta from inverse gamma dist. for unknown variance of logarithms.
342 | In theory b > 0, but as we always have at least one observation, we can start at 0.
343 | w_prior : Prior effective sample sizes for normal distribution of logarithms of data.
344 | replace : Replace data if variant already exists.
345 | If set to False, data of existing variant will be appended to existing data.
346 | """
347 | if len(data) == 0:
348 | raise ValueError("Data of added variant needs to have some observations.")
349 | if min(data) < 0:
350 | raise ValueError("Input data needs to be a list of non-negative numbers.")
351 |
352 | totals = len(data)
353 | positives = sum(x > 0 for x in data)
354 | sum_values = sum(data)
355 | sum_logs = sum([np.log(x) for x in data if x > 0])
356 | sum_logs_2 = sum([np.square(np.log(x)) for x in data if x > 0])
357 |
358 | self.add_variant_data_agg(
359 | name,
360 | totals,
361 | positives,
362 | sum_values,
363 | sum_logs,
364 | sum_logs_2,
365 | a_prior_beta,
366 | b_prior_beta,
367 | m_prior,
368 | a_prior_ig,
369 | b_prior_ig,
370 | w_prior,
371 | replace,
372 | )
373 |
--------------------------------------------------------------------------------
/bayesian_testing/experiments/delta_normal.py:
--------------------------------------------------------------------------------
1 | from numbers import Number
2 | from typing import List, Tuple
3 | import numpy as np
4 | from bayesian_testing.experiments.base import BaseDataTest
5 | from bayesian_testing.metrics import eval_delta_normal_agg
6 | from bayesian_testing.utilities import get_logger
7 |
8 | logger = get_logger("bayesian_testing")
9 |
10 |
11 | class DeltaNormalDataTest(BaseDataTest):
12 | """
13 | Class for Bayesian A/B test for Delta-Normal data (Normally distributed conversions).
14 | Delta-normal data is typical case of net profit data where many sessions have 0 values
15 | (meaning non-conversions), and the remaining revenue data is normally distributed.
16 | To handle this data, the evaluation methods are combining binary bayes model
17 | for zero vs non-zero “conversion” and normal model.
18 |
19 | After class initialization, use add_variant methods to insert variant data.
20 | Then to get results of the test, use for instance `evaluate` method.
21 | """
22 |
23 | def __init__(self) -> None:
24 | """
25 | Initialize DeltaNormalDataTest class.
26 | """
27 | super().__init__()
28 |
29 | @property
30 | def totals(self):
31 | return [self.data[k]["totals"] for k in self.data]
32 |
33 | @property
34 | def non_zeros(self):
35 | return [self.data[k]["non_zeros"] for k in self.data]
36 |
37 | @property
38 | def sum_values(self):
39 | return [self.data[k]["sum_values"] for k in self.data]
40 |
41 | @property
42 | def sum_values_2(self):
43 | return [self.data[k]["sum_values_2"] for k in self.data]
44 |
45 | @property
46 | def a_priors_beta(self):
47 | return [self.data[k]["a_prior_beta"] for k in self.data]
48 |
49 | @property
50 | def b_priors_beta(self):
51 | return [self.data[k]["b_prior_beta"] for k in self.data]
52 |
53 | @property
54 | def m_priors(self):
55 | return [self.data[k]["m_prior"] for k in self.data]
56 |
57 | @property
58 | def a_priors_ig(self):
59 | return [self.data[k]["a_prior_ig"] for k in self.data]
60 |
61 | @property
62 | def b_priors_ig(self):
63 | return [self.data[k]["b_prior_ig"] for k in self.data]
64 |
65 | @property
66 | def w_priors(self):
67 | return [self.data[k]["w_prior"] for k in self.data]
68 |
69 | def eval_simulation(
70 | self,
71 | sim_count: int = 20000,
72 | seed: int = None,
73 | min_is_best: bool = False,
74 | interval_alpha: float = 0.95,
75 | ) -> Tuple[dict, dict, dict]:
76 | """
77 | Calculate probabilities of being best, expected loss and credible intervals for a current
78 | class state.
79 |
80 | Parameters
81 | ----------
82 | sim_count : Number of simulations to be used for probability estimation.
83 | seed : Random seed.
84 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
85 | interval_alpha : Credible interval probability (value between 0 and 1).
86 |
87 | Returns
88 | -------
89 | res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
90 | res_loss : Dictionary with expected loss for all variants in experiment.
91 | res_intervals : Dictionary with quantile-based credible intervals for all variants.
92 | """
93 | pbbs, loss, intervals = eval_delta_normal_agg(
94 | self.totals,
95 | self.non_zeros,
96 | self.sum_values,
97 | self.sum_values_2,
98 | sim_count=sim_count,
99 | a_priors_beta=self.a_priors_beta,
100 | b_priors_beta=self.b_priors_beta,
101 | m_priors=self.m_priors,
102 | a_priors_ig=self.a_priors_ig,
103 | b_priors_ig=self.b_priors_ig,
104 | w_priors=self.w_priors,
105 | seed=seed,
106 | min_is_best=min_is_best,
107 | interval_alpha=interval_alpha,
108 | )
109 | res_pbbs = dict(zip(self.variant_names, pbbs))
110 | res_loss = dict(zip(self.variant_names, loss))
111 | res_intervals = dict(zip(self.variant_names, intervals))
112 |
113 | return res_pbbs, res_loss, res_intervals
114 |
115 | def evaluate(
116 | self,
117 | sim_count: int = 20000,
118 | seed: int = None,
119 | min_is_best: bool = False,
120 | interval_alpha: float = 0.95,
121 | ) -> List[dict]:
122 | """
123 | Evaluation of experiment.
124 |
125 | Parameters
126 | ----------
127 | sim_count : Number of simulations to be used for probability estimation.
128 | seed : Random seed.
129 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
130 | interval_alpha : Credible interval probability (value between 0 and 1).
131 |
132 | Returns
133 | -------
134 | res : List of dictionaries with results per variant.
135 | """
136 | keys = [
137 | "variant",
138 | "totals",
139 | "non_zeros",
140 | "sum_values",
141 | "avg_values",
142 | "avg_non_zero_values",
143 | "posterior_mean",
144 | "credible_interval",
145 | "prob_being_best",
146 | "expected_loss",
147 | ]
148 | avg_values = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.totals)]
149 | avg_pos_values = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.non_zeros)]
150 | posterior_mean = [
151 | round(((i[0] + i[3] * i[4]) / (i[1] + i[4])) * ((i[5] + i[1]) / (i[6] + i[2])), 5)
152 | for i in zip(
153 | self.sum_values,
154 | self.non_zeros,
155 | self.totals,
156 | self.m_priors,
157 | self.w_priors,
158 | self.a_priors_beta,
159 | self.b_priors_beta,
160 | )
161 | ]
162 | eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
163 | sim_count, seed, min_is_best, interval_alpha
164 | )
165 | pbbs = list(eval_pbbs.values())
166 | loss = list(eval_loss.values())
167 | intervals = list(eval_intervals.values())
168 | data = [
169 | self.variant_names,
170 | self.totals,
171 | self.non_zeros,
172 | [round(i, 5) for i in self.sum_values],
173 | avg_values,
174 | avg_pos_values,
175 | posterior_mean,
176 | intervals,
177 | pbbs,
178 | loss,
179 | ]
180 | res = [dict(zip(keys, item)) for item in zip(*data)]
181 |
182 | return res
183 |
184 | def add_variant_data_agg(
185 | self,
186 | name: str,
187 | totals: int,
188 | non_zeros: int,
189 | sum_values: float,
190 | sum_values_2: float,
191 | a_prior_beta: Number = 0.5,
192 | b_prior_beta: Number = 0.5,
193 | m_prior: Number = 1,
194 | a_prior_ig: Number = 0,
195 | b_prior_ig: Number = 0,
196 | w_prior: Number = 0.01,
197 | replace: bool = True,
198 | ) -> None:
199 | """
200 | Add variant data to test class using aggregated Delta-Normal data.
201 | This can be convenient as aggregation can be done on database level.
202 |
203 | The goal of default prior setup is to be low information.
204 | It should be tuned with caution.
205 |
206 | Parameters
207 | ----------
208 | name : Variant name.
209 | totals : Total number of experiment observations (e.g. number of sessions).
210 | non_zeros : Total number of non-zero values for a given variant.
211 | sum_values : Sum of non-zero values for a given variant.
212 | sum_values_2 : Sum of values squared for a given variant.
213 | a_prior_beta : Prior alpha parameter from Beta distribution for conversion part.
214 | b_prior_beta : Prior beta parameter from Beta distribution for conversion part.
215 | m_prior : Prior normal mean.
216 | a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance.
217 | In theory a > 0, but as we always have at least one observation, we can start at 0.
218 | b_prior_ig : Prior beta from inverse gamma dist. for unknown variance.
219 | In theory b > 0, but as we always have at least one observation, we can start at 0.
220 | w_prior : Prior effective sample sizes.
221 | replace : Replace data if variant already exists.
222 | If set to False, data of existing variant will be appended to existing data.
223 | """
224 | if not isinstance(name, str):
225 | raise ValueError("Variant name has to be a string.")
226 | if a_prior_beta <= 0 or b_prior_beta <= 0:
227 | raise ValueError("Both [a_prior_beta, b_prior_beta] have to be positive numbers.")
228 | if m_prior < 0 or a_prior_ig < 0 or b_prior_ig < 0 or w_prior < 0:
229 | raise ValueError("All priors of [m, a_ig, b_ig, w] have to be non-negative numbers.")
230 | if non_zeros == 0:
231 | raise ValueError("Variant has to have some non-zero values.")
232 | if non_zeros < 0:
233 | raise ValueError("Input variable 'non_zeros' is expected to be positive integer.")
234 | if totals < non_zeros:
235 | raise ValueError("Not possible to have more non_zero numbers that totals!")
236 |
237 | if name not in self.variant_names:
238 | self.data[name] = {
239 | "totals": totals,
240 | "non_zeros": non_zeros,
241 | "sum_values": sum_values,
242 | "sum_values_2": sum_values_2,
243 | "a_prior_beta": a_prior_beta,
244 | "b_prior_beta": b_prior_beta,
245 | "m_prior": m_prior,
246 | "a_prior_ig": a_prior_ig,
247 | "b_prior_ig": b_prior_ig,
248 | "w_prior": w_prior,
249 | }
250 | elif name in self.variant_names and replace:
251 | msg = (
252 | f"Variant {name} already exists - new data is replacing it. "
253 | "If you wish to append instead, use replace=False."
254 | )
255 | logger.info(msg)
256 | self.data[name] = {
257 | "totals": totals,
258 | "non_zeros": non_zeros,
259 | "sum_values": sum_values,
260 | "sum_values_2": sum_values_2,
261 | "a_prior_beta": a_prior_beta,
262 | "b_prior_beta": b_prior_beta,
263 | "m_prior": m_prior,
264 | "a_prior_ig": a_prior_ig,
265 | "b_prior_ig": b_prior_ig,
266 | "w_prior": w_prior,
267 | }
268 | elif name in self.variant_names and not replace:
269 | msg = (
270 | f"Variant {name} already exists - new data is appended to variant, "
271 | "keeping its original prior setup. "
272 | "If you wish to replace data instead, use replace=True."
273 | )
274 | logger.info(msg)
275 | self.data[name]["totals"] += totals
276 | self.data[name]["non_zeros"] += non_zeros
277 | self.data[name]["sum_values"] += sum_values
278 | self.data[name]["sum_values_2"] += sum_values_2
279 |
280 | def add_variant_data(
281 | self,
282 | name: str,
283 | data: List[Number],
284 | a_prior_beta: Number = 0.5,
285 | b_prior_beta: Number = 0.5,
286 | m_prior: Number = 1,
287 | a_prior_ig: Number = 0,
288 | b_prior_ig: Number = 0,
289 | w_prior: Number = 0.01,
290 | replace: bool = True,
291 | ) -> None:
292 | """
293 | Add variant data to test class using raw Delta-Normal data.
294 |
295 | The goal of default prior setup is to be low information. It should be tuned with caution.
296 |
297 | Parameters
298 | ----------
299 | name : Variant name.
300 | data : List of delta-normal data (e.g. revenues in sessions).
301 | a_prior_beta : Prior alpha parameter from Beta distribution for conversion part.
302 | b_prior_beta : Prior beta parameter from Beta distribution for conversion part.
303 | m_prior : Prior normal mean.
304 | a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance.
305 | In theory a > 0, but as we always have at least one observation, we can start at 0.
306 | b_prior_ig : Prior beta from inverse gamma dist. for unknown variance.
307 | In theory b > 0, but as we always have at least one observation, we can start at 0.
308 | w_prior : Prior effective sample sizes.
309 | replace : Replace data if variant already exists.
310 | If set to False, data of existing variant will be appended to existing data.
311 | """
312 | if len(data) == 0:
313 | raise ValueError("Data of added variant needs to have some observations.")
314 |
315 | totals = len(data)
316 | non_zeros = sum(x != 0 for x in data)
317 | sum_values = sum(data)
318 | sum_values_2 = sum(np.square(data))
319 |
320 | self.add_variant_data_agg(
321 | name,
322 | totals,
323 | non_zeros,
324 | sum_values,
325 | sum_values_2,
326 | a_prior_beta,
327 | b_prior_beta,
328 | m_prior,
329 | a_prior_ig,
330 | b_prior_ig,
331 | w_prior,
332 | replace,
333 | )
334 |
--------------------------------------------------------------------------------
/bayesian_testing/experiments/discrete.py:
--------------------------------------------------------------------------------
1 | from numbers import Number
2 | from typing import List, Tuple, Union
3 | import numpy as np
4 |
5 | from bayesian_testing.experiments.base import BaseDataTest
6 | from bayesian_testing.metrics import eval_numerical_dirichlet_agg
7 | from bayesian_testing.utilities import get_logger
8 |
9 | logger = get_logger("bayesian_testing")
10 |
11 |
12 | class DiscreteDataTest(BaseDataTest):
13 | """
14 | Class for Bayesian A/B test for data with finite discrete states (i.e. categorical data
15 | with numerical categories). As a real world examples we can think of dice rolls,
16 | 1-5 star ratings, 1-10 ratings, etc.
17 |
18 | After class initialization, use add_variant methods to insert variant data.
19 | Then to get results of the test, use for instance `evaluate` method.
20 | """
21 |
22 | def __init__(self, states: List[Union[float, int]]) -> None:
23 | """
24 | Initialize DiscreteDataTest class.
25 |
26 | Parameters
27 | ----------
28 | states : List of all possible states for a given discrete variable.
29 | """
30 | super().__init__()
31 | if not self.check_if_numerical(states):
32 | raise ValueError("States in the test have to be numbers (int or float).")
33 | self.states = states
34 |
35 | @property
36 | def concentrations(self):
37 | return [self.data[k]["concentration"] for k in self.data]
38 |
39 | @property
40 | def prior_alphas(self):
41 | return [self.data[k]["prior"] for k in self.data]
42 |
43 | @staticmethod
44 | def check_if_numerical(values):
45 | res = True
46 | for v in values:
47 | if not isinstance(v, Number):
48 | res = False
49 | return res
50 |
51 | def eval_simulation(
52 | self,
53 | sim_count: int = 20000,
54 | seed: int = None,
55 | min_is_best: bool = False,
56 | interval_alpha: float = 0.95,
57 | ) -> Tuple[dict, dict, dict]:
58 | """
59 | Calculate probabilities of being best, expected loss and credible intervals for a current
60 | class state.
61 |
62 | Parameters
63 | ----------
64 | sim_count : Number of simulations to be used for probability estimation.
65 | seed : Random seed.
66 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
67 | interval_alpha : Credible interval probability (value between 0 and 1).
68 |
69 | Returns
70 | -------
71 | res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
72 | res_loss : Dictionary with expected loss for all variants in experiment.
73 | res_intervals : Dictionary with quantile-based credible intervals for all variants.
74 | """
75 | pbbs, loss, intervals = eval_numerical_dirichlet_agg(
76 | self.states,
77 | self.concentrations,
78 | self.prior_alphas,
79 | sim_count,
80 | seed,
81 | min_is_best,
82 | interval_alpha,
83 | )
84 | res_pbbs = dict(zip(self.variant_names, pbbs))
85 | res_loss = dict(zip(self.variant_names, loss))
86 | res_intervals = dict(zip(self.variant_names, intervals))
87 |
88 | return res_pbbs, res_loss, res_intervals
89 |
90 | def evaluate(
91 | self,
92 | sim_count: int = 20000,
93 | seed: int = None,
94 | min_is_best: bool = False,
95 | interval_alpha: float = 0.95,
96 | ) -> List[dict]:
97 | """
98 | Evaluation of experiment.
99 |
100 | Parameters
101 | ----------
102 | sim_count : Number of simulations to be used for probability estimation.
103 | seed : Random seed.
104 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
105 | interval_alpha : Credible interval probability (value between 0 and 1).
106 |
107 | Returns
108 | -------
109 | res : List of dictionaries with results per variant.
110 | """
111 | keys = [
112 | "variant",
113 | "concentration",
114 | "average_value",
115 | "posterior_mean",
116 | "credible_interval",
117 | "prob_being_best",
118 | "expected_loss",
119 | ]
120 | posterior_alphas = [
121 | list(np.array(i[0]) + np.array(i[1]))
122 | for i in zip(self.concentrations, self.prior_alphas)
123 | ]
124 | posterior_mean = [
125 | round(sum(np.multiply(np.array(self.states), np.array(i[0]) / sum(np.array(i[0])))), 5)
126 | for i in zip(posterior_alphas)
127 | ]
128 | eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
129 | sim_count, seed, min_is_best, interval_alpha
130 | )
131 | pbbs = list(eval_pbbs.values())
132 | loss = list(eval_loss.values())
133 | intervals = list(eval_intervals.values())
134 | average_values = [
135 | np.sum(np.multiply(i, self.states)) / np.sum(i) for i in self.concentrations
136 | ]
137 | data = [
138 | self.variant_names,
139 | [dict(zip(self.states, i)) for i in self.concentrations],
140 | average_values,
141 | posterior_mean,
142 | intervals,
143 | pbbs,
144 | loss,
145 | ]
146 | res = [dict(zip(keys, item)) for item in zip(*data)]
147 |
148 | return res
149 |
150 | def add_variant_data_agg(
151 | self,
152 | name: str,
153 | concentration: List[int],
154 | prior: List[Union[float, int]] = None,
155 | replace: bool = True,
156 | ) -> None:
157 | """
158 | Add variant data to test class using aggregated discrete data.
159 | This can be convenient as aggregation can be done on database level.
160 |
161 | Default prior setup is Dirichlet(1,...,1) which is low information prior
162 | (we can interpret it as prior 1 observation of each state).
163 |
164 | Parameters
165 | ----------
166 | name : Variant name.
167 | concentration : Total number of experiment observations for each state
168 | (e.g. number of rolls for each side in a die roll data).
169 | prior : Prior alpha parameters of a Dirichlet distribution (conjugate prior).
170 | replace : Replace data if variant already exists.
171 | If set to False, data of existing variant will be appended to existing data.
172 | """
173 | if not isinstance(name, str):
174 | raise ValueError("Variant name has to be a string.")
175 | if not len(self.states) == len(concentration):
176 | msg = (
177 | f"Concentration list has to have same size as number of states in a test "
178 | f"{len(concentration)} != {len(self.states)}."
179 | )
180 | raise ValueError(msg)
181 | if not self.check_if_numerical(concentration):
182 | raise ValueError("Concentration parameter has to be a list of integer values.")
183 |
184 | if not prior:
185 | prior = [1] * len(self.states)
186 |
187 | if name not in self.variant_names:
188 | self.data[name] = {"concentration": concentration, "prior": prior}
189 | elif name in self.variant_names and replace:
190 | msg = (
191 | f"Variant {name} already exists - new data is replacing it. "
192 | "If you wish to append instead, use replace=False."
193 | )
194 | logger.info(msg)
195 | self.data[name] = {"concentration": concentration, "prior": prior}
196 | elif name in self.variant_names and not replace:
197 | msg = (
198 | f"Variant {name} already exists - new data is appended to variant, "
199 | "keeping its original prior setup. "
200 | "If you wish to replace data instead, use replace=True."
201 | )
202 | logger.info(msg)
203 | self.data[name]["concentration"] = [
204 | sum(x) for x in zip(self.data[name]["concentration"], concentration)
205 | ]
206 |
207 | def add_variant_data(
208 | self,
209 | name: str,
210 | data: List[int],
211 | prior: List[Union[float, int]] = None,
212 | replace: bool = True,
213 | ) -> None:
214 | """
215 | Add variant data to test class using raw discrete data.
216 |
217 | Default prior setup is Dirichlet(1,...,1) which is low information prior
218 | (we can interpret it as prior 1 observation of each state).
219 |
220 | Parameters
221 | ----------
222 | name : Variant name.
223 | data : List of numerical data observations from possible states.
224 | prior : Prior alpha parameters of a Dirichlet distribution (conjugate prior).
225 | replace : Replace data if variant already exists.
226 | If set to False, data of existing variant will be appended to existing data.
227 | """
228 | if len(data) == 0:
229 | raise ValueError("Data of added variant needs to have some observations.")
230 | if not min([i in self.states for i in data]):
231 | msg = (
232 | "Input data needs to be a list of numbers from possible states: " f"{self.states}."
233 | )
234 | raise ValueError(msg)
235 |
236 | counter_dict = dict(zip(self.states, np.zeros(len(self.states))))
237 | for i in data:
238 | counter_dict[i] += 1
239 | concentration = [counter_dict[i] for i in self.states]
240 |
241 | self.add_variant_data_agg(name, concentration, prior, replace)
242 |
--------------------------------------------------------------------------------
/bayesian_testing/experiments/exponential.py:
--------------------------------------------------------------------------------
1 | from numbers import Number
2 | from typing import List, Tuple, Union
3 |
4 | from bayesian_testing.experiments.base import BaseDataTest
5 | from bayesian_testing.metrics import eval_exponential_agg
6 | from bayesian_testing.utilities import get_logger
7 |
8 | logger = get_logger("bayesian_testing")
9 |
10 |
11 | class ExponentialDataTest(BaseDataTest):
12 | """
13 | Class for Bayesian A/B test for Exponential data (e.g. session time, waiting time, etc.).
14 |
15 | After class initialization, use add_variant methods to insert variant data.
16 | Then to get results of the test, use for instance `evaluate` method.
17 | """
18 |
19 | def __init__(self) -> None:
20 | """
21 | Initialize BinaryDataTest class.
22 | """
23 | super().__init__()
24 |
25 | @property
26 | def totals(self):
27 | return [self.data[k]["totals"] for k in self.data]
28 |
29 | @property
30 | def sum_values(self):
31 | return [self.data[k]["sum_values"] for k in self.data]
32 |
33 | @property
34 | def a_priors(self):
35 | return [self.data[k]["a_prior"] for k in self.data]
36 |
37 | @property
38 | def b_priors(self):
39 | return [self.data[k]["b_prior"] for k in self.data]
40 |
41 | def eval_simulation(
42 | self,
43 | sim_count: int = 20000,
44 | seed: int = None,
45 | min_is_best: bool = False,
46 | interval_alpha: float = 0.95,
47 | ) -> Tuple[dict, dict, dict]:
48 | """
49 | Calculate probabilities of being best, expected loss and credible intervals for a current
50 | class state.
51 |
52 | Parameters
53 | ----------
54 | sim_count : Number of simulations to be used for probability estimation.
55 | seed : Random seed.
56 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
57 | interval_alpha : Credible interval probability (value between 0 and 1).
58 |
59 | Returns
60 | -------
61 | res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
62 | res_loss : Dictionary with expected loss for all variants in experiment.
63 | res_intervals : Dictionary with quantile-based credible intervals for all variants.
64 | """
65 | pbbs, loss, intervals = eval_exponential_agg(
66 | self.totals,
67 | self.sum_values,
68 | self.a_priors,
69 | self.b_priors,
70 | sim_count,
71 | seed,
72 | min_is_best,
73 | interval_alpha,
74 | )
75 | res_pbbs = dict(zip(self.variant_names, pbbs))
76 | res_loss = dict(zip(self.variant_names, loss))
77 | res_intervals = dict(zip(self.variant_names, intervals))
78 |
79 | return res_pbbs, res_loss, res_intervals
80 |
81 | def evaluate(
82 | self,
83 | sim_count: int = 20000,
84 | seed: int = None,
85 | min_is_best: bool = False,
86 | interval_alpha: float = 0.95,
87 | ) -> List[dict]:
88 | """
89 | Evaluation of experiment.
90 |
91 | Parameters
92 | ----------
93 | sim_count : Number of simulations to be used for probability estimation.
94 | seed : Random seed.
95 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
96 | interval_alpha : Credible interval probability (value between 0 and 1).
97 |
98 | Returns
99 | -------
100 | res : List of dictionaries with results per variant.
101 | """
102 | keys = [
103 | "variant",
104 | "totals",
105 | "sum_values",
106 | "observed_average",
107 | "posterior_mean",
108 | "credible_interval",
109 | "prob_being_best",
110 | "expected_loss",
111 | ]
112 | observed_average = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.totals)]
113 | posterior_mean = [
114 | round((i[3] + i[1]) / (i[2] + i[0]), 5)
115 | for i in zip(self.totals, self.sum_values, self.a_priors, self.b_priors)
116 | ]
117 | eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
118 | sim_count, seed, min_is_best, interval_alpha
119 | )
120 | pbbs = list(eval_pbbs.values())
121 | loss = list(eval_loss.values())
122 | intervals = list(eval_intervals.values())
123 | data = [
124 | self.variant_names,
125 | self.totals,
126 | [round(i, 5) for i in self.sum_values],
127 | observed_average,
128 | posterior_mean,
129 | intervals,
130 | pbbs,
131 | loss,
132 | ]
133 | res = [dict(zip(keys, item)) for item in zip(*data)]
134 |
135 | return res
136 |
137 | def add_variant_data_agg(
138 | self,
139 | name: str,
140 | totals: int,
141 | sum_values: Union[float, int],
142 | a_prior: Number = 0.1,
143 | b_prior: Number = 0.1,
144 | replace: bool = True,
145 | ) -> None:
146 | """
147 | Add variant data to a test class using aggregated Exponential data.
148 | This can be convenient as aggregation can be done on database level.
149 |
150 | Default prior setup is set for Gamma(0.1, 0.1) which is on purpose very vague prior.
151 |
152 | Parameters
153 | ----------
154 | name : Variant name.
155 | totals : Total number of experiment observations (e.g. number of sessions).
156 | sum_values : Sum of values for a given variant (e.g. total sum of waiting time).
157 | a_prior : Prior alpha parameter of a Gamma distribution (conjugate prior).
158 | Default value 0.1 is on purpose to be vague (lower information).
159 | b_prior : Prior beta parameter (rate) of a Gamma distribution (conjugate prior).
160 | Default value 0.1 is on purpose to be vague (lower information).
161 | replace : Replace data if variant already exists.
162 | If set to False, data of existing variant will be appended to existing data.
163 | """
164 | if not isinstance(name, str):
165 | raise ValueError("Variant name has to be a string.")
166 | if a_prior <= 0 or b_prior <= 0:
167 | raise ValueError("Both [a_prior, b_prior] have to be positive numbers.")
168 | if totals <= 0:
169 | raise ValueError("Input variable 'totals' is expected to be positive integer.")
170 | if sum_values < 0:
171 | raise ValueError("Input variable 'sum_values' is expected to be non-negative number.")
172 |
173 | if name not in self.variant_names:
174 | self.data[name] = {
175 | "totals": totals,
176 | "sum_values": sum_values,
177 | "a_prior": a_prior,
178 | "b_prior": b_prior,
179 | }
180 | elif name in self.variant_names and replace:
181 | msg = (
182 | f"Variant {name} already exists - new data is replacing it. "
183 | "If you wish to append instead, use replace=False."
184 | )
185 | logger.info(msg)
186 | self.data[name] = {
187 | "totals": totals,
188 | "sum_values": sum_values,
189 | "a_prior": a_prior,
190 | "b_prior": b_prior,
191 | }
192 | elif name in self.variant_names and not replace:
193 | msg = (
194 | f"Variant {name} already exists - new data is appended to variant, "
195 | "keeping its original prior setup. "
196 | "If you wish to replace data instead, use replace=True."
197 | )
198 | logger.info(msg)
199 | self.data[name]["totals"] += totals
200 | self.data[name]["sum_values"] += sum_values
201 |
202 | def add_variant_data(
203 | self,
204 | name: str,
205 | data: List[Union[float, int]],
206 | a_prior: Number = 0.1,
207 | b_prior: Number = 0.1,
208 | replace: bool = True,
209 | ) -> None:
210 | """
211 | Add variant data to a test class using raw Exponential data.
212 |
213 | Default prior setup is set for Gamma(0.1, 0.1) which is non-information prior.
214 |
215 | Parameters
216 | ----------
217 | name : Variant name.s
218 | data : List of Exponential data.
219 | a_prior : Prior alpha parameter of a Gamma distribution (conjugate prior).
220 | Default value 0.1 is on purpose to be vague (lower information).
221 | b_prior : Prior beta parameter (rate) of a Gamma distribution (conjugate prior).
222 | Default value 0.1 is on purpose to be vague (lower information).
223 | replace : Replace data if variant already exists.
224 | If set to False, data of existing variant will be appended to existing data.
225 | """
226 | if len(data) == 0:
227 | raise ValueError("Data of added variant needs to have some observations.")
228 | if not min([i >= 0 for i in data]):
229 | raise ValueError("Input data needs to be a list of non-negative integers.")
230 |
231 | totals = len(data)
232 | sum_values = sum(data)
233 |
234 | self.add_variant_data_agg(name, totals, sum_values, a_prior, b_prior, replace)
235 |
--------------------------------------------------------------------------------
/bayesian_testing/experiments/normal.py:
--------------------------------------------------------------------------------
1 | from numbers import Number
2 | from typing import List, Tuple
3 |
4 | import numpy as np
5 |
6 | from bayesian_testing.experiments.base import BaseDataTest
7 | from bayesian_testing.metrics import eval_normal_agg
8 | from bayesian_testing.utilities import get_logger
9 |
10 | logger = get_logger("bayesian_testing")
11 |
12 |
13 | class NormalDataTest(BaseDataTest):
14 | """
15 | Class for Bayesian A/B test for Normal data.
16 |
17 | After class initialization, use add_variant methods to insert variant data.
18 | Then to get results of the test, use for instance `evaluate` method.
19 | """
20 |
21 | def __init__(self) -> None:
22 | """
23 | Initialize NormalDataTest class.
24 | """
25 | super().__init__()
26 |
27 | @property
28 | def totals(self):
29 | return [self.data[k]["totals"] for k in self.data]
30 |
31 | @property
32 | def sum_values(self):
33 | return [self.data[k]["sum_values"] for k in self.data]
34 |
35 | @property
36 | def sum_values_2(self):
37 | return [self.data[k]["sum_values_2"] for k in self.data]
38 |
39 | @property
40 | def m_priors(self):
41 | return [self.data[k]["m_prior"] for k in self.data]
42 |
43 | @property
44 | def a_priors_ig(self):
45 | return [self.data[k]["a_prior_ig"] for k in self.data]
46 |
47 | @property
48 | def b_priors_ig(self):
49 | return [self.data[k]["b_prior_ig"] for k in self.data]
50 |
51 | @property
52 | def w_priors(self):
53 | return [self.data[k]["w_prior"] for k in self.data]
54 |
55 | def eval_simulation(
56 | self,
57 | sim_count: int = 20000,
58 | seed: int = None,
59 | min_is_best: bool = False,
60 | interval_alpha: float = 0.95,
61 | ) -> Tuple[dict, dict, dict]:
62 | """
63 | Calculate probabilities of being best, expected loss and credible intervals for a current
64 | class state.
65 |
66 | Parameters
67 | ----------
68 | sim_count : Number of simulations to be used for probability estimation.
69 | seed : Random seed.
70 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
71 | interval_alpha : Credible interval probability (value between 0 and 1).
72 |
73 | Returns
74 | -------
75 | res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
76 | res_loss : Dictionary with expected loss for all variants in experiment.
77 | res_intervals : Dictionary with quantile-based credible intervals for all variants.
78 | """
79 | pbbs, loss, intervals = eval_normal_agg(
80 | self.totals,
81 | self.sum_values,
82 | self.sum_values_2,
83 | sim_count=sim_count,
84 | m_priors=self.m_priors,
85 | a_priors_ig=self.a_priors_ig,
86 | b_priors_ig=self.b_priors_ig,
87 | w_priors=self.w_priors,
88 | seed=seed,
89 | min_is_best=min_is_best,
90 | interval_alpha=interval_alpha,
91 | )
92 | res_pbbs = dict(zip(self.variant_names, pbbs))
93 | res_loss = dict(zip(self.variant_names, loss))
94 | res_intervals = dict(zip(self.variant_names, intervals))
95 |
96 | return res_pbbs, res_loss, res_intervals
97 |
98 | def evaluate(
99 | self,
100 | sim_count: int = 20000,
101 | seed: int = None,
102 | min_is_best: bool = False,
103 | interval_alpha: float = 0.95,
104 | ) -> List[dict]:
105 | """
106 | Evaluation of experiment.
107 |
108 | Parameters
109 | ----------
110 | sim_count : Number of simulations to be used for probability estimation.
111 | seed : Random seed.
112 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
113 | interval_alpha : Credible interval probability (value between 0 and 1).
114 |
115 | Returns
116 | -------
117 | res : List of dictionaries with results per variant.
118 | """
119 | keys = [
120 | "variant",
121 | "totals",
122 | "sum_values",
123 | "avg_values",
124 | "posterior_mean",
125 | "credible_interval",
126 | "prob_being_best",
127 | "expected_loss",
128 | ]
129 | avg_values = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.totals)]
130 | posterior_mean = [
131 | round((i[0] + i[3] * i[2]) / (i[1] + i[3]), 5)
132 | for i in zip(self.sum_values, self.totals, self.m_priors, self.w_priors)
133 | ]
134 | eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
135 | sim_count, seed, min_is_best, interval_alpha
136 | )
137 | pbbs = list(eval_pbbs.values())
138 | loss = list(eval_loss.values())
139 | intervals = list(eval_intervals.values())
140 | data = [
141 | self.variant_names,
142 | self.totals,
143 | [round(i, 5) for i in self.sum_values],
144 | avg_values,
145 | posterior_mean,
146 | intervals,
147 | pbbs,
148 | loss,
149 | ]
150 | res = [dict(zip(keys, item)) for item in zip(*data)]
151 |
152 | return res
153 |
154 | def add_variant_data_agg(
155 | self,
156 | name: str,
157 | totals: int,
158 | sum_values: float,
159 | sum_values_2: float,
160 | m_prior: Number = 1,
161 | a_prior_ig: Number = 0,
162 | b_prior_ig: Number = 0,
163 | w_prior: Number = 0.01,
164 | replace: bool = True,
165 | ) -> None:
166 | """
167 | Add variant data to test class using aggregated Normal data.
168 | This can be convenient as aggregation can be done on database level.
169 |
170 | The goal of default prior setup is to be low information.
171 | It should be tuned with caution.
172 |
173 | Parameters
174 | ----------
175 | name : Variant name.
176 | totals : Total number of experiment observations (e.g. number of sessions).
177 | sum_values : Sum of values for a given variant.
178 | sum_values_2 : Sum of values squared for a given variant.
179 | m_prior : Prior normal mean.
180 | a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance.
181 | In theory a > 0, but as we always have at least one observation, we can start at 0.
182 | b_prior_ig : Prior beta from inverse gamma dist. for unknown variance.
183 | In theory b > 0, but as we always have at least one observation, we can start at 0.
184 | w_prior : Prior effective sample sizes.
185 | replace : Replace data if variant already exists.
186 | If set to False, data of existing variant will be appended to existing data.
187 | """
188 | if not isinstance(name, str):
189 | raise ValueError("Variant name has to be a string.")
190 | if m_prior < 0 or a_prior_ig < 0 or b_prior_ig < 0 or w_prior < 0:
191 | raise ValueError("All priors of [m, a_ig, b_ig, w] have to be non-negative numbers.")
192 | if totals <= 0:
193 | raise ValueError("Input variable 'totals' is expected to be positive integer.")
194 |
195 | if name not in self.variant_names:
196 | self.data[name] = {
197 | "totals": totals,
198 | "sum_values": sum_values,
199 | "sum_values_2": sum_values_2,
200 | "m_prior": m_prior,
201 | "a_prior_ig": a_prior_ig,
202 | "b_prior_ig": b_prior_ig,
203 | "w_prior": w_prior,
204 | }
205 | elif name in self.variant_names and replace:
206 | msg = (
207 | f"Variant {name} already exists - new data is replacing it. "
208 | "If you wish to append instead, use replace=False."
209 | )
210 | logger.info(msg)
211 | self.data[name] = {
212 | "totals": totals,
213 | "sum_values": sum_values,
214 | "sum_values_2": sum_values_2,
215 | "m_prior": m_prior,
216 | "a_prior_ig": a_prior_ig,
217 | "b_prior_ig": b_prior_ig,
218 | "w_prior": w_prior,
219 | }
220 | elif name in self.variant_names and not replace:
221 | msg = (
222 | f"Variant {name} already exists - new data is appended to variant, "
223 | "keeping its original prior setup. "
224 | "If you wish to replace data instead, use replace=True."
225 | )
226 | logger.info(msg)
227 | self.data[name]["totals"] += totals
228 | self.data[name]["sum_values"] += sum_values
229 | self.data[name]["sum_values_2"] += sum_values_2
230 |
231 | def add_variant_data(
232 | self,
233 | name: str,
234 | data: List[Number],
235 | m_prior: Number = 1,
236 | a_prior_ig: Number = 0,
237 | b_prior_ig: Number = 0,
238 | w_prior: Number = 0.01,
239 | replace: bool = True,
240 | ) -> None:
241 | """
242 | Add variant data to test class using raw Normal data.
243 |
244 | The goal of default prior setup is to be low information. It should be tuned with caution.
245 |
246 | Parameters
247 | ----------
248 | name : Variant name.
249 | data : List of normal data.
250 | m_prior : Prior mean.
251 | a_prior_ig : Prior alpha from inverse gamma dist. for unknown variance.
252 | In theory a > 0, but as we always have at least one observation, we can start at 0.
253 | b_prior_ig : Prior beta from inverse gamma dist. for unknown variance.
254 | In theory b > 0, but as we always have at least one observation, we can start at 0.
255 | w_prior : Prior effective sample sizes.
256 | replace : Replace data if variant already exists.
257 | If set to False, data of existing variant will be appended to existing data.
258 | """
259 | if len(data) == 0:
260 | raise ValueError("Data of added variant needs to have some observations.")
261 |
262 | totals = len(data)
263 | sum_values = sum(data)
264 | sum_values_2 = sum(np.square(data))
265 |
266 | self.add_variant_data_agg(
267 | name,
268 | totals,
269 | sum_values,
270 | sum_values_2,
271 | m_prior,
272 | a_prior_ig,
273 | b_prior_ig,
274 | w_prior,
275 | replace,
276 | )
277 |
--------------------------------------------------------------------------------
/bayesian_testing/experiments/poisson.py:
--------------------------------------------------------------------------------
1 | from numbers import Number
2 | from typing import List, Tuple, Union
3 |
4 | from bayesian_testing.experiments.base import BaseDataTest
5 | from bayesian_testing.metrics import eval_poisson_agg
6 | from bayesian_testing.utilities import get_logger
7 |
8 | logger = get_logger("bayesian_testing")
9 |
10 |
11 | class PoissonDataTest(BaseDataTest):
12 | """
13 | Class for Bayesian A/B test for Poisson data (i.e. numbers of events, e.g. goals scored).
14 |
15 | After class initialization, use add_variant methods to insert variant data.
16 | Then to get results of the test, use for instance `evaluate` method.
17 | """
18 |
19 | def __init__(self) -> None:
20 | """
21 | Initialize BinaryDataTest class.
22 | """
23 | super().__init__()
24 |
25 | @property
26 | def totals(self):
27 | return [self.data[k]["totals"] for k in self.data]
28 |
29 | @property
30 | def sum_values(self):
31 | return [self.data[k]["sum_values"] for k in self.data]
32 |
33 | @property
34 | def a_priors(self):
35 | return [self.data[k]["a_prior"] for k in self.data]
36 |
37 | @property
38 | def b_priors(self):
39 | return [self.data[k]["b_prior"] for k in self.data]
40 |
41 | def eval_simulation(
42 | self,
43 | sim_count: int = 20000,
44 | seed: int = None,
45 | min_is_best: bool = False,
46 | interval_alpha: float = 0.95,
47 | ) -> Tuple[dict, dict, dict]:
48 | """
49 | Calculate probabilities of being best, expected loss and credible intervals for a current
50 | class state.
51 |
52 | Parameters
53 | ----------
54 | sim_count : Number of simulations to be used for probability estimation.
55 | seed : Random seed.
56 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
57 | interval_alpha : Credible interval probability (value between 0 and 1).
58 |
59 | Returns
60 | -------
61 | res_pbbs : Dictionary with probabilities of being best for all variants in experiment.
62 | res_loss : Dictionary with expected loss for all variants in experiment.
63 | res_intervals : Dictionary with quantile-based credible intervals for all variants.
64 | """
65 | pbbs, loss, intervals = eval_poisson_agg(
66 | self.totals,
67 | self.sum_values,
68 | self.a_priors,
69 | self.b_priors,
70 | sim_count,
71 | seed,
72 | min_is_best,
73 | interval_alpha,
74 | )
75 | res_pbbs = dict(zip(self.variant_names, pbbs))
76 | res_loss = dict(zip(self.variant_names, loss))
77 | res_intervals = dict(zip(self.variant_names, intervals))
78 |
79 | return res_pbbs, res_loss, res_intervals
80 |
81 | def evaluate(
82 | self,
83 | sim_count: int = 20000,
84 | seed: int = None,
85 | min_is_best: bool = False,
86 | interval_alpha: float = 0.95,
87 | ) -> List[dict]:
88 | """
89 | Evaluation of experiment.
90 |
91 | Parameters
92 | ----------
93 | sim_count : Number of simulations to be used for probability estimation.
94 | seed : Random seed.
95 | min_is_best : Option to change "being best" to a minimum. Default is maximum.
96 | interval_alpha : Credible interval probability (value between 0 and 1).
97 |
98 | Returns
99 | -------
100 | res : List of dictionaries with results per variant.
101 | """
102 | keys = [
103 | "variant",
104 | "totals",
105 | "sum_values",
106 | "observed_average",
107 | "posterior_mean",
108 | "credible_interval",
109 | "prob_being_best",
110 | "expected_loss",
111 | ]
112 | observed_average = [round(i[0] / i[1], 5) for i in zip(self.sum_values, self.totals)]
113 | posterior_mean = [
114 | round((i[2] + i[0]) / (i[3] + i[1]), 5)
115 | for i in zip(self.sum_values, self.totals, self.a_priors, self.b_priors)
116 | ]
117 | eval_pbbs, eval_loss, eval_intervals = self.eval_simulation(
118 | sim_count, seed, min_is_best, interval_alpha
119 | )
120 | pbbs = list(eval_pbbs.values())
121 | loss = list(eval_loss.values())
122 | intervals = list(eval_intervals.values())
123 | data = [
124 | self.variant_names,
125 | self.totals,
126 | self.sum_values,
127 | observed_average,
128 | posterior_mean,
129 | intervals,
130 | pbbs,
131 | loss,
132 | ]
133 | res = [dict(zip(keys, item)) for item in zip(*data)]
134 |
135 | return res
136 |
137 | def add_variant_data_agg(
138 | self,
139 | name: str,
140 | totals: int,
141 | sum_values: Union[float, int],
142 | a_prior: Number = 0.1,
143 | b_prior: Number = 0.1,
144 | replace: bool = True,
145 | ) -> None:
146 | """
147 | Add variant data to test class using aggregated Poisson data.
148 | This can be convenient as aggregation can be done on database level.
149 |
150 | Default prior setup is set for Gamma(0.1, 0.1) which is on purpose very vague prior.
151 |
152 | Parameters
153 | ----------
154 | name : Variant name.
155 | totals : Total number of experiment observations (e.g. number of matches).
156 | sum_values : Sum of values for a given variant (e.g. total number of goals).
157 | a_prior : Prior alpha parameter of a Gamma distribution (conjugate prior).
158 | Default value 0.1 is on purpose to be vague (lower information).
159 | b_prior : Prior beta parameter (rate) of a Gamma distribution (conjugate prior).
160 | Default value 0.1 is on purpose to be vague (lower information).
161 | replace : Replace data if variant already exists.
162 | If set to False, data of existing variant will be appended to existing data.
163 | """
164 | if not isinstance(name, str):
165 | raise ValueError("Variant name has to be a string.")
166 | if a_prior <= 0 or b_prior <= 0:
167 | raise ValueError("Both [a_prior, b_prior] have to be positive numbers.")
168 | if totals <= 0:
169 | raise ValueError("Input variable 'totals' is expected to be positive integer.")
170 | if sum_values < 0:
171 | raise ValueError("Input variable 'sum_values' is expected to be non-negative number.")
172 |
173 | if name not in self.variant_names:
174 | self.data[name] = {
175 | "totals": totals,
176 | "sum_values": sum_values,
177 | "a_prior": a_prior,
178 | "b_prior": b_prior,
179 | }
180 | elif name in self.variant_names and replace:
181 | msg = (
182 | f"Variant {name} already exists - new data is replacing it. "
183 | "If you wish to append instead, use replace=False."
184 | )
185 | logger.info(msg)
186 | self.data[name] = {
187 | "totals": totals,
188 | "sum_values": sum_values,
189 | "a_prior": a_prior,
190 | "b_prior": b_prior,
191 | }
192 | elif name in self.variant_names and not replace:
193 | msg = (
194 | f"Variant {name} already exists - new data is appended to variant, "
195 | "keeping its original prior setup. "
196 | "If you wish to replace data instead, use replace=True."
197 | )
198 | logger.info(msg)
199 | self.data[name]["totals"] += totals
200 | self.data[name]["sum_values"] += sum_values
201 |
202 | def add_variant_data(
203 | self,
204 | name: str,
205 | data: List[int],
206 | a_prior: Number = 0.1,
207 | b_prior: Number = 0.1,
208 | replace: bool = True,
209 | ) -> None:
210 | """
211 | Add variant data to test class using raw Poisson data.
212 |
213 | Default prior setup is set for Gamma(0.1, 0.1) which is non-information prior.
214 |
215 | Parameters
216 | ----------
217 | name : Variant name.
218 | data : List of Poisson data.
219 | a_prior : Prior alpha parameter of a Gamma distribution (conjugate prior).
220 | Default value 0.1 is on purpose to be vague (lower information).
221 | b_prior : Prior beta parameter (rate) of a Gamma distribution (conjugate prior).
222 | Default value 0.1 is on purpose to be vague (lower information).
223 | replace : Replace data if variant already exists.
224 | If set to False, data of existing variant will be appended to existing data.
225 | """
226 | if len(data) == 0:
227 | raise ValueError("Data of added variant needs to have some observations.")
228 | if not min([i >= 0 for i in data]):
229 | raise ValueError("Input data needs to be a list of non-negative integers.")
230 |
231 | totals = len(data)
232 | sum_values = sum(data)
233 |
234 | self.add_variant_data_agg(name, totals, sum_values, a_prior, b_prior, replace)
235 |
--------------------------------------------------------------------------------
/bayesian_testing/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluation import (
2 | eval_bernoulli_agg,
3 | eval_normal_agg,
4 | eval_delta_lognormal_agg,
5 | eval_numerical_dirichlet_agg,
6 | eval_poisson_agg,
7 | eval_delta_normal_agg,
8 | eval_exponential_agg,
9 | )
10 |
11 | __all__ = [
12 | "eval_bernoulli_agg",
13 | "eval_normal_agg",
14 | "eval_delta_lognormal_agg",
15 | "eval_delta_normal_agg",
16 | "eval_numerical_dirichlet_agg",
17 | "eval_poisson_agg",
18 | "eval_exponential_agg",
19 | ]
20 |
--------------------------------------------------------------------------------
/bayesian_testing/metrics/posteriors.py:
--------------------------------------------------------------------------------
1 | from typing import List, Tuple, Union
2 |
3 | import numpy as np
4 |
5 |
6 | def beta_posteriors_all(
7 | totals: List[int],
8 | positives: List[int],
9 | sim_count: int,
10 | a_priors_beta: List[Union[float, int]],
11 | b_priors_beta: List[Union[float, int]],
12 | seed: Union[int, np.random.bit_generator.SeedSequence] = None,
13 | ) -> np.ndarray:
14 | """
15 | Draw from Beta posterior distributions for all variants at once.
16 |
17 | Parameters
18 | ----------
19 | totals : List of total experiment observations (e.g. number of sessions) for each variant.
20 | positives : List of total number of ones (e.g. number of conversions) for each variant.
21 | sim_count : Number of simulations to be used for probability estimation.
22 | a_priors_beta : List of prior alpha parameters of Beta distributions for each variant.
23 | b_priors_beta : List of prior beta parameters of Beta distributions for each variant.
24 | seed : Random seed.
25 |
26 | Returns
27 | -------
28 | beta_samples : List of lists of beta distribution samples for all variants.
29 | """
30 | rng = np.random.default_rng(seed)
31 |
32 | beta_samples = np.array(
33 | [
34 | rng.beta(
35 | positives[i] + a_priors_beta[i],
36 | totals[i] - positives[i] + b_priors_beta[i],
37 | sim_count,
38 | )
39 | for i in range(len(totals))
40 | ]
41 | )
42 | return beta_samples
43 |
44 |
45 | def normal_posteriors(
46 | total: int,
47 | sums: float,
48 | sums_2: float,
49 | sim_count: int = 20000,
50 | prior_m: Union[float, int] = 1,
51 | prior_a: Union[float, int] = 0,
52 | prior_b: Union[float, int] = 0,
53 | prior_w: Union[float, int] = 0.01,
54 | seed: Union[int, np.random.bit_generator.SeedSequence] = None,
55 | ) -> Tuple[List[Union[float, int]], List[Union[float, int]]]:
56 | """
57 | Drawing mus and sigmas from posterior Normal distribution considering given aggregated data.
58 |
59 | Parameters
60 | ----------
61 | total : Number of data observations from normal data.
62 | sums : Sum of original data.
63 | sums_2 : Sum of squares of original data.
64 | sim_count : Number of simulations.
65 | prior_m : Prior mean.
66 | prior_a : Prior alpha from inverse gamma dist. for unknown variance of original data.
67 | In theory a > 0, but as we always have at least one observation, we can start at 0.
68 | prior_b : Prior beta from inverse gamma dist. for unknown variance of original data.
69 | In theory b > 0, but as we always have at least one observation, we can start at 0.
70 | prior_w : Prior effective sample size.
71 | seed : Random seed.
72 |
73 | Returns
74 | -------
75 | mu_post : List of size sim_count with mus drawn from normal distribution.
76 | sig_2_post : List of size sim_count with mus drawn from normal distribution.
77 | """
78 | rng = np.random.default_rng(seed)
79 |
80 | x_bar = sums / total
81 | a_post = prior_a + (total / 2)
82 | b_post = (
83 | prior_b
84 | + (1 / 2) * (sums_2 - 2 * sums * x_bar + total * (x_bar**2))
85 | + ((total * prior_w) / (2 * (total + prior_w))) * ((x_bar - prior_m) ** 2)
86 | )
87 |
88 | # here it has to be 1/b as it is a scale, and not a rate
89 | sig_2_post = 1 / rng.gamma(a_post, 1 / b_post, sim_count)
90 |
91 | m_post = (total * x_bar + prior_w * prior_m) / (total + prior_w)
92 |
93 | mu_post = rng.normal(m_post, np.sqrt(sig_2_post / (total + prior_w)))
94 |
95 | return mu_post, sig_2_post
96 |
97 |
98 | def lognormal_posteriors(
99 | total: int,
100 | sum_logs: float,
101 | sum_logs_2: float,
102 | sim_count: int = 20000,
103 | prior_m: Union[float, int] = 1,
104 | prior_a: Union[float, int] = 0,
105 | prior_b: Union[float, int] = 0,
106 | prior_w: Union[float, int] = 0.01,
107 | seed: Union[int, np.random.bit_generator.SeedSequence] = None,
108 | ) -> List[float]:
109 | """
110 | Drawing from posterior LogNormal distribution using logarithms of original (lognormal) data
111 | (logarithms of lognormal data are normal). Input data is in aggregated form.
112 |
113 | Parameters
114 | ----------
115 | total : Number of lognormal data observations.
116 | Could be number of conversions in session data.
117 | sum_logs : Sum of logarithms of original data.
118 | sum_logs_2 : Sum of logarithms squared of original data.
119 | sim_count : Number of simulations.
120 | prior_m : Prior mean of logarithms of original data.
121 | prior_a : Prior alpha from inverse gamma dist. for unknown variance of logarithms
122 | of original data. In theory a > 0, but as we always have at least one observation,
123 | we can start at 0.
124 | prior_b : Prior beta from inverse gamma dist. for unknown variance of logarithms
125 | of original data. In theory b > 0, but as we always have at least one observation,
126 | we can start at 0.
127 | prior_w : Prior effective sample size.
128 | seed : Random seed.
129 |
130 | Returns
131 | -------
132 | res : List of sim_count numbers drawn from lognormal distribution.
133 | """
134 | if total <= 0:
135 | return list(np.zeros(sim_count))
136 |
137 | # normal posterior for aggregated data of logarithms of original data
138 | normal_mu_post, normal_sig_2_post = normal_posteriors(
139 | total, sum_logs, sum_logs_2, sim_count, prior_m, prior_a, prior_b, prior_w, seed
140 | )
141 |
142 | # final simulated lognormal means using simulated normal means and sigmas
143 | res = np.exp(normal_mu_post + (normal_sig_2_post / 2))
144 |
145 | return res
146 |
147 |
148 | def dirichlet_posteriors(
149 | concentration: List[int],
150 | prior: List[Union[float, int]],
151 | sim_count: int = 20000,
152 | seed: Union[int, np.random.bit_generator.SeedSequence] = None,
153 | ) -> np.ndarray:
154 | """
155 | Drawing from Dirichlet posterior for a single variant.
156 |
157 | Parameters
158 | ----------
159 | concentration : List of numbers of observation for each possible category.
160 | In dice example it would be numbers of observations for each possible face.
161 | prior : List of prior values for each category in dirichlet distribution.
162 | sim_count : Number of simulations.
163 | seed : Random seed.
164 |
165 | Returns
166 | -------
167 | res : List of lists of dirichlet samples.
168 | """
169 | rng = np.random.default_rng(seed)
170 |
171 | posterior_concentration = [sum(x) for x in zip(prior, concentration)]
172 | res = rng.dirichlet(posterior_concentration, sim_count)
173 |
174 | return res
175 |
176 |
177 | def pois_gamma_posteriors_all(
178 | totals: List[int],
179 | sums: List[Union[float, int]],
180 | sim_count: int,
181 | a_priors_gamma: List[Union[float, int]],
182 | b_priors_gamma: List[Union[float, int]],
183 | seed: Union[int, np.random.bit_generator.SeedSequence] = None,
184 | ) -> np.ndarray:
185 | """
186 | Draw from Gamma posterior distributions for all variants of Poisson data at once.
187 |
188 | Parameters
189 | ----------
190 | totals : List of total experiment observations (e.g. number of matches) for each variant.
191 | sums : List of sums of observations (e.g. number of goals) for each variant.
192 | sim_count : Number of simulations to be used for probability estimation.
193 | a_priors_gamma : List of prior alpha parameters of Gamma distributions for each variant.
194 | b_priors_gamma : List of prior beta parameters (rates) of Gamma distributions for each variant.
195 | seed : Random seed.
196 |
197 | Returns
198 | -------
199 | gamma_samples : List of lists of Gamma distribution samples for all variants.
200 | """
201 | rng = np.random.default_rng(seed)
202 |
203 | gamma_samples = np.array(
204 | [
205 | rng.gamma(
206 | sums[i] + a_priors_gamma[i],
207 | # here it has to be 1/(...) as it is a scale, and not a rate
208 | 1 / (totals[i] + b_priors_gamma[i]),
209 | sim_count,
210 | )
211 | for i in range(len(totals))
212 | ]
213 | )
214 | return gamma_samples
215 |
216 |
217 | def exp_gamma_posteriors_all(
218 | totals: List[int],
219 | sums: List[Union[float, int]],
220 | sim_count: int,
221 | a_priors_gamma: List[Union[float, int]],
222 | b_priors_gamma: List[Union[float, int]],
223 | seed: Union[int, np.random.bit_generator.SeedSequence] = None,
224 | ) -> np.ndarray:
225 | """
226 | Draw from Gamma posterior distributions for all variants of Exponential data at once.
227 |
228 | Parameters
229 | ----------
230 | totals : List of total experiment observations (e.g. number of sessions) for each variant.
231 | sums : List of sums of observations (e.g. total time spent) for each variant.
232 | sim_count : Number of simulations to be used for probability estimation.
233 | a_priors_gamma : List of prior alpha parameters of Gamma distributions for each variant.
234 | b_priors_gamma : List of prior beta parameters (rates) of Gamma distributions for each variant.
235 | seed : Random seed.
236 |
237 | Returns
238 | -------
239 | gamma_samples : List of lists of Gamma distribution samples for all variants.
240 | """
241 | rng = np.random.default_rng(seed)
242 |
243 | gamma_samples = np.array(
244 | [
245 | rng.gamma(
246 | totals[i] + a_priors_gamma[i],
247 | # here it has to be 1/(...) as it is a scale, and not a rate
248 | 1 / (sums[i] + b_priors_gamma[i]),
249 | sim_count,
250 | )
251 | for i in range(len(totals))
252 | ]
253 | )
254 | return gamma_samples
255 |
--------------------------------------------------------------------------------
/bayesian_testing/utilities/__init__.py:
--------------------------------------------------------------------------------
1 | from .logging import get_logger
2 |
3 | __all__ = ["get_logger"]
4 |
--------------------------------------------------------------------------------
/bayesian_testing/utilities/common.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 |
4 | def check_list_lengths(lists: List[List]) -> None:
5 | """
6 | Check if input lists are all of same length.
7 | Parameters
8 | ----------
9 | lists : List of lists of different possible types.
10 | """
11 | it = iter(lists)
12 | the_len = len(next(it))
13 | if not all(len(i) == the_len for i in it):
14 | raise ValueError("Not all lists have same length!")
15 |
--------------------------------------------------------------------------------
/bayesian_testing/utilities/logging.conf:
--------------------------------------------------------------------------------
1 | [loggers]
2 | keys=root,bayesian_testing
3 |
4 | [handlers]
5 | keys=consoleHandler
6 |
7 | [formatters]
8 | keys=simpleFormatter
9 |
10 | [logger_root]
11 | level=INFO
12 | handlers=consoleHandler
13 |
14 | [logger_bayesian_testing]
15 | level=INFO
16 | handlers=consoleHandler
17 | qualname=bayesian_testing
18 | propagate=0
19 |
20 | [handler_consoleHandler]
21 | class=StreamHandler
22 | level=INFO
23 | formatter=simpleFormatter
24 | args=(sys.stdout,)
25 |
26 | [formatter_simpleFormatter]
27 | format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
28 |
--------------------------------------------------------------------------------
/bayesian_testing/utilities/logging.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import logging.config
3 | from os import path
4 |
5 | log_file_path = path.join(path.dirname(path.abspath(__file__)), "logging.conf")
6 |
7 | logging.config.fileConfig(log_file_path, disable_existing_loggers=False)
8 |
9 |
10 | def get_logger(logger_name):
11 | return logging.getLogger(logger_name)
12 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Matt52/bayesian-testing/cea9afa5d7e3321d159d7b387ff57803467a18d5/codecov.yml
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples
2 |
--------------------------------------------------------------------------------
/examples/dice_rolls_ab_testing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "45ce22be-8ae0-4b0e-bce5-9e9aab6f105f",
7 | "metadata": {
8 | "tags": []
9 | },
10 | "outputs": [],
11 | "source": [
12 | "from IPython.core.interactiveshell import InteractiveShell\n",
13 | "InteractiveShell.ast_node_interactivity = \"all\"\n",
14 | "\n",
15 | "import numpy as np\n",
16 | "np.set_printoptions(legacy=\"1.25\")\n",
17 | "import pandas as pd\n",
18 | "from bayesian_testing.experiments import DiscreteDataTest"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "id": "f888f299-69a0-4f3c-bd57-af3a59bedba0",
25 | "metadata": {
26 | "tags": []
27 | },
28 | "outputs": [
29 | {
30 | "data": {
31 | "text/plain": [
32 | "Generator(PCG64) at 0x132BA19E0"
33 | ]
34 | },
35 | "execution_count": 2,
36 | "metadata": {},
37 | "output_type": "execute_result"
38 | }
39 | ],
40 | "source": [
41 | "np.random.default_rng(52)\n",
42 | "\n",
43 | "values = [1,2,3,4,5,6]"
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": 3,
49 | "id": "24a15d66-d928-432c-beb3-e25e3be10cc0",
50 | "metadata": {
51 | "tags": []
52 | },
53 | "outputs": [],
54 | "source": [
55 | "die_A_rolls = list(np.random.choice(values, 1000, p=[1/6, 1/6, 1/6, 1/6, 1/6, 1/6]))\n",
56 | "die_B_rolls = list(np.random.choice(values, 1200, p=[0.2, 0.2, 0.1, 0.1, 0.2, 0.2]))\n",
57 | "die_C_rolls = list(np.random.choice(values, 500, p=[0.2, 0.1, 0.1, 0.2, 0.2, 0.2]))"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 4,
63 | "id": "35989040-af25-4129-9678-de04c0397c32",
64 | "metadata": {
65 | "tags": []
66 | },
67 | "outputs": [],
68 | "source": [
69 | "discrete_test = DiscreteDataTest(values)"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 5,
75 | "id": "e902885d-7382-42c8-af7f-1d82fba06bb4",
76 | "metadata": {
77 | "tags": []
78 | },
79 | "outputs": [],
80 | "source": [
81 | "discrete_test.add_variant_data('A', die_A_rolls)\n",
82 | "discrete_test.add_variant_data('B', die_B_rolls)\n",
83 | "discrete_test.add_variant_data('C', die_C_rolls)"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 6,
89 | "id": "a3ee97a1-d48c-407b-b13c-5cfb11e6591f",
90 | "metadata": {
91 | "tags": []
92 | },
93 | "outputs": [
94 | {
95 | "data": {
96 | "text/plain": [
97 | "{'A': {'concentration': [168.0, 166.0, 176.0, 172.0, 168.0, 150.0],\n",
98 | " 'prior': [1, 1, 1, 1, 1, 1]},\n",
99 | " 'B': {'concentration': [256.0, 246.0, 111.0, 116.0, 239.0, 232.0],\n",
100 | " 'prior': [1, 1, 1, 1, 1, 1]},\n",
101 | " 'C': {'concentration': [84.0, 57.0, 58.0, 100.0, 100.0, 101.0],\n",
102 | " 'prior': [1, 1, 1, 1, 1, 1]}}"
103 | ]
104 | },
105 | "execution_count": 6,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "discrete_test.data"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 7,
117 | "id": "23484578-dc84-4325-9aa0-7a1498ee161b",
118 | "metadata": {
119 | "tags": []
120 | },
121 | "outputs": [
122 | {
123 | "data": {
124 | "text/plain": [
125 | "{'A': 0.00065, 'B': 0.00035, 'C': 0.999}"
126 | ]
127 | },
128 | "execution_count": 7,
129 | "metadata": {},
130 | "output_type": "execute_result"
131 | }
132 | ],
133 | "source": [
134 | "discrete_test.probabs_of_being_best(sim_count = 20000, seed=52)"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": 8,
140 | "id": "7001814b-7705-420a-813d-b65393e68288",
141 | "metadata": {
142 | "tags": []
143 | },
144 | "outputs": [
145 | {
146 | "data": {
147 | "text/plain": [
148 | "{'A': 0.2964593, 'B': 0.309296, 'C': 3.45e-05}"
149 | ]
150 | },
151 | "execution_count": 8,
152 | "metadata": {},
153 | "output_type": "execute_result"
154 | }
155 | ],
156 | "source": [
157 | "discrete_test.expected_loss(sim_count = 20000, seed=52)"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 9,
163 | "id": "86cb2b3b-cc93-489f-ae1d-7becac229c33",
164 | "metadata": {
165 | "tags": []
166 | },
167 | "outputs": [
168 | {
169 | "name": "stdout",
170 | "output_type": "stream",
171 | "text": [
172 | "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
173 | "| variant | concentration | average_value | posterior_mean | credible_interval | prob_being_best | expected_loss |\n",
174 | "+===========+==============================================================+=================+==================+========================+===================+=================+\n",
175 | "| A | {1: 168.0, 2: 166.0, 3: 176.0, 4: 172.0, 5: 168.0, 6: 150.0} | 3.456 | 3.45626 | [3.3530612, 3.559381] | 0.0006 | 0.296753 |\n",
176 | "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
177 | "| B | {1: 256.0, 2: 246.0, 3: 111.0, 4: 116.0, 5: 239.0, 6: 232.0} | 3.44333 | 3.44362 | [3.3386877, 3.5493953] | 0.0006 | 0.309481 |\n",
178 | "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
179 | "| C | {1: 84.0, 2: 57.0, 3: 58.0, 4: 100.0, 5: 100.0, 6: 101.0} | 3.756 | 3.75296 | [3.5993774, 3.904388] | 0.9988 | 4.27e-05 |\n",
180 | "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n"
181 | ]
182 | }
183 | ],
184 | "source": [
185 | "results = discrete_test.evaluate()\n",
186 | "print(pd.DataFrame(results).to_markdown(tablefmt=\"grid\", index=False))"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 10,
192 | "id": "616e35ba-26d3-4d10-ad65-4dc37e5771a6",
193 | "metadata": {
194 | "tags": []
195 | },
196 | "outputs": [
197 | {
198 | "data": {
199 | "text/plain": [
200 | "{'A': 0.4319, 'B': 0.568, 'C': 0.0001}"
201 | ]
202 | },
203 | "execution_count": 10,
204 | "metadata": {},
205 | "output_type": "execute_result"
206 | },
207 | {
208 | "data": {
209 | "text/plain": [
210 | "{'A': 0.0371495, 'B': 0.0243128, 'C': 0.3335743}"
211 | ]
212 | },
213 | "execution_count": 10,
214 | "metadata": {},
215 | "output_type": "execute_result"
216 | },
217 | {
218 | "name": "stdout",
219 | "output_type": "stream",
220 | "text": [
221 | "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
222 | "| variant | concentration | average_value | posterior_mean | credible_interval | prob_being_best | expected_loss |\n",
223 | "+===========+==============================================================+=================+==================+========================+===================+=================+\n",
224 | "| A | {1: 168.0, 2: 166.0, 3: 176.0, 4: 172.0, 5: 168.0, 6: 150.0} | 3.456 | 3.45626 | [3.3515318, 3.5614544] | 0.4304 | 0.0370878 |\n",
225 | "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
226 | "| B | {1: 256.0, 2: 246.0, 3: 111.0, 4: 116.0, 5: 239.0, 6: 232.0} | 3.44333 | 3.44362 | [3.3376023, 3.5515158] | 0.56955 | 0.0246001 |\n",
227 | "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n",
228 | "| C | {1: 84.0, 2: 57.0, 3: 58.0, 4: 100.0, 5: 100.0, 6: 101.0} | 3.756 | 3.75296 | [3.6002351, 3.9037053] | 5e-05 | 0.33356 |\n",
229 | "+-----------+--------------------------------------------------------------+-----------------+------------------+------------------------+-------------------+-----------------+\n"
230 | ]
231 | }
232 | ],
233 | "source": [
234 | "# reversed test (where minimum is best)\n",
235 | "discrete_test.probabs_of_being_best(sim_count = 20000, seed=52, min_is_best=True)\n",
236 | "discrete_test.expected_loss(sim_count = 20000, seed=52, min_is_best=True)\n",
237 | "results_min = discrete_test.evaluate(min_is_best=True)\n",
238 | "print(pd.DataFrame(results_min).to_markdown(tablefmt=\"grid\", index=False))"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": null,
244 | "id": "19b56eb8-143e-47aa-9a22-a2473f91cfa1",
245 | "metadata": {},
246 | "outputs": [],
247 | "source": []
248 | }
249 | ],
250 | "metadata": {
251 | "kernelspec": {
252 | "display_name": "Python 3 (ipykernel)",
253 | "language": "python",
254 | "name": "python3"
255 | },
256 | "language_info": {
257 | "codemirror_mode": {
258 | "name": "ipython",
259 | "version": 3
260 | },
261 | "file_extension": ".py",
262 | "mimetype": "text/x-python",
263 | "name": "python",
264 | "nbconvert_exporter": "python",
265 | "pygments_lexer": "ipython3",
266 | "version": "3.10.12"
267 | }
268 | },
269 | "nbformat": 4,
270 | "nbformat_minor": 5
271 | }
272 |
--------------------------------------------------------------------------------
/examples/goals_scored_ab_testing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "c5f8cedc-94d6-4805-90d4-466d4de6b293",
7 | "metadata": {
8 | "tags": []
9 | },
10 | "outputs": [],
11 | "source": [
12 | "from IPython.core.interactiveshell import InteractiveShell\n",
13 | "InteractiveShell.ast_node_interactivity = \"all\"\n",
14 | "\n",
15 | "import numpy as np\n",
16 | "import pandas as pd\n",
17 | "from bayesian_testing.experiments import PoissonDataTest"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": 2,
23 | "id": "8e57546e-4b90-4c89-8668-aafe4aff6485",
24 | "metadata": {
25 | "tags": []
26 | },
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/plain": [
31 | "Generator(PCG64) at 0x111F9C660"
32 | ]
33 | },
34 | "execution_count": 2,
35 | "metadata": {},
36 | "output_type": "execute_result"
37 | }
38 | ],
39 | "source": [
40 | "np.random.default_rng(52)"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 3,
46 | "id": "1cc3f939-43a0-4d19-af63-9ae632861dee",
47 | "metadata": {
48 | "tags": []
49 | },
50 | "outputs": [],
51 | "source": [
52 | "# goals scored - more is better (duh...)\n",
53 | "psg_goals_for = [5, 5, 7, 1, 3, 3, 1, 1, 2, 0, 1, 3, 4, 2, 5]\n",
54 | "city_goals_for = [2, 4, 3, 4, 6, 1, 3, 6, 4, 0, 3, 1, 2, 1]\n",
55 | "bayern_goals_for = [6, 2, 7, 1, 1, 2, 0, 4, 2, 5, 2, 6, 3, 6, 2]\n",
56 | "\n",
57 | "\n",
58 | "# goals received - so less is better\n",
59 | "psg_goals_against = [0, 2, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 3, 1, 0]\n",
60 | "city_goals_against = [0, 0, 3, 2, 0, 1, 0, 3, 0, 1, 1, 0, 1, 2]\n",
61 | "bayern_goals_against = [1, 0, 0, 1, 1, 2, 1, 0, 2, 0, 0, 2, 2, 1, 0]"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 4,
67 | "id": "fe532f01-6c91-4462-9213-e33379be1f9e",
68 | "metadata": {
69 | "tags": []
70 | },
71 | "outputs": [],
72 | "source": [
73 | "# Poisson test for \"goals for\"\n",
74 | "poisson_test_gf = PoissonDataTest()\n",
75 | "poisson_test_gf.add_variant_data('psg', psg_goals_for)\n",
76 | "# adding \"city\" with effective sample size 10 and the prior mean 2 (20/10):\n",
77 | "poisson_test_gf.add_variant_data('city', city_goals_for, a_prior=20, b_prior=10)\n",
78 | "# adding \"bayern\" with aggregated data instead of list of all observations\n",
79 | "poisson_test_gf.add_variant_data_agg('bayern', totals=len(bayern_goals_for), sum_values=sum(bayern_goals_for))\n",
80 | "\n",
81 | "\n",
82 | "# Poisson test for \"goals against\"\n",
83 | "poisson_test_ga = PoissonDataTest()\n",
84 | "poisson_test_ga.add_variant_data('psg', psg_goals_against)\n",
85 | "poisson_test_ga.add_variant_data('city', city_goals_against)\n",
86 | "poisson_test_ga.add_variant_data('bayern', bayern_goals_against)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 5,
92 | "id": "93cd6353-01c6-4873-a62e-9816932679fe",
93 | "metadata": {
94 | "tags": []
95 | },
96 | "outputs": [
97 | {
98 | "name": "stdout",
99 | "output_type": "stream",
100 | "text": [
101 | "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
102 | "| variant | totals | sum_values | observed_average | posterior_mean | credible_interval | prob_being_best | expected_loss |\n",
103 | "+===========+==========+==============+====================+==================+========================+===================+=================+\n",
104 | "| psg | 15 | 43 | 2.86667 | 2.8543 | [2.0701365, 3.7817813] | 0.24485 | 0.512094 |\n",
105 | "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
106 | "| city | 14 | 40 | 2.85714 | 2.5 | [1.9035733, 3.1737824] | 0.04655 | 0.870001 |\n",
107 | "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
108 | "| bayern | 15 | 49 | 3.26667 | 3.25166 | [2.4038302, 4.2176997] | 0.7086 | 0.109746 |\n",
109 | "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n"
110 | ]
111 | }
112 | ],
113 | "source": [
114 | "# poisson_test_gf.probabs_of_being_best(sim_count = 20000, seed=52)\n",
115 | "# poisson_test_gf.expected_loss(sim_count = 20000, seed=52)\n",
116 | "results_gf = poisson_test_gf.evaluate()\n",
117 | "print(pd.DataFrame(results_gf).to_markdown(tablefmt=\"grid\", index=False))"
118 | ]
119 | },
120 | {
121 | "cell_type": "code",
122 | "execution_count": 6,
123 | "id": "39220217-6553-4f88-b537-064ade561996",
124 | "metadata": {
125 | "tags": []
126 | },
127 | "outputs": [
128 | {
129 | "name": "stdout",
130 | "output_type": "stream",
131 | "text": [
132 | "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
133 | "| variant | totals | sum_values | observed_average | posterior_mean | credible_interval | prob_being_best | expected_loss |\n",
134 | "+===========+==========+==============+====================+==================+========================+===================+=================+\n",
135 | "| psg | 15 | 9 | 0.6 | 0.60265 | [0.2140532, 1.2324781] | 0.756 | 0.0425375 |\n",
136 | "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
137 | "| city | 14 | 14 | 1 | 1 | [0.4487859, 1.8478473] | 0.07585 | 0.439937 |\n",
138 | "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n",
139 | "| bayern | 15 | 13 | 0.86667 | 0.86755 | [0.3680665, 1.6067354] | 0.16815 | 0.30884 |\n",
140 | "+-----------+----------+--------------+--------------------+------------------+------------------------+-------------------+-----------------+\n"
141 | ]
142 | }
143 | ],
144 | "source": [
145 | "# poisson_test_ga.probabs_of_being_best(sim_count = 20000, seed=52, min_is_best=True)\n",
146 | "# poisson_test_ga.expected_loss(sim_count = 20000, seed=52, min_is_best=True)\n",
147 | "results_ga = poisson_test_ga.evaluate(min_is_best=True, interval_alpha=0.99)\n",
148 | "print(pd.DataFrame(results_ga).to_markdown(tablefmt=\"grid\", index=False))"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": null,
154 | "id": "d36d68d3-d119-49a4-b757-016da25f6f28",
155 | "metadata": {},
156 | "outputs": [],
157 | "source": []
158 | }
159 | ],
160 | "metadata": {
161 | "kernelspec": {
162 | "display_name": "Python 3 (ipykernel)",
163 | "language": "python",
164 | "name": "python3"
165 | },
166 | "language_info": {
167 | "codemirror_mode": {
168 | "name": "ipython",
169 | "version": 3
170 | },
171 | "file_extension": ".py",
172 | "mimetype": "text/x-python",
173 | "name": "python",
174 | "nbconvert_exporter": "python",
175 | "pygments_lexer": "ipython3",
176 | "version": "3.10.12"
177 | }
178 | },
179 | "nbformat": 4,
180 | "nbformat_minor": 5
181 | }
182 |
--------------------------------------------------------------------------------
/examples/session_data_manual_pbbs.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "8b11e1e0-ccc4-4fc9-9cdd-9f906e64b1c7",
7 | "metadata": {
8 | "tags": []
9 | },
10 | "outputs": [],
11 | "source": [
12 | "from IPython.core.interactiveshell import InteractiveShell\n",
13 | "InteractiveShell.ast_node_interactivity = \"all\"\n",
14 | "\n",
15 | "import numpy as np\n",
16 | "np.set_printoptions(legacy=\"1.25\")\n",
17 | "import pandas as pd\n",
18 | "from bayesian_testing.metrics import eval_bernoulli_agg, eval_delta_lognormal_agg"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "id": "b31da712-cdb9-4671-b3ed-63e351896915",
25 | "metadata": {
26 | "tags": []
27 | },
28 | "outputs": [],
29 | "source": [
30 | "df = pd.read_csv(\"data/session_data.csv\")"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 3,
36 | "id": "6032fc6e-3a4c-47f3-830f-1a85d49c253c",
37 | "metadata": {
38 | "tags": []
39 | },
40 | "outputs": [
41 | {
42 | "data": {
43 | "text/plain": [
44 | "94500"
45 | ]
46 | },
47 | "execution_count": 3,
48 | "metadata": {},
49 | "output_type": "execute_result"
50 | },
51 | {
52 | "data": {
53 | "text/html": [
54 | "
\n",
55 | "\n",
68 | "
\n",
69 | " \n",
70 | " \n",
71 | " | \n",
72 | " conversion | \n",
73 | " date | \n",
74 | " revenue | \n",
75 | " source | \n",
76 | " variant | \n",
77 | "
\n",
78 | " \n",
79 | " \n",
80 | " \n",
81 | " 0 | \n",
82 | " 0 | \n",
83 | " 2021-08-07 | \n",
84 | " 0.000000 | \n",
85 | " desktop | \n",
86 | " B | \n",
87 | "
\n",
88 | " \n",
89 | " 1 | \n",
90 | " 1 | \n",
91 | " 2021-08-05 | \n",
92 | " 7.241015 | \n",
93 | " desktop | \n",
94 | " C | \n",
95 | "
\n",
96 | " \n",
97 | " 2 | \n",
98 | " 0 | \n",
99 | " 2021-08-06 | \n",
100 | " 0.000000 | \n",
101 | " desktop | \n",
102 | " A | \n",
103 | "
\n",
104 | " \n",
105 | " 3 | \n",
106 | " 0 | \n",
107 | " 2021-08-05 | \n",
108 | " 0.000000 | \n",
109 | " desktop | \n",
110 | " C | \n",
111 | "
\n",
112 | " \n",
113 | " 4 | \n",
114 | " 0 | \n",
115 | " 2021-08-03 | \n",
116 | " 0.000000 | \n",
117 | " desktop | \n",
118 | " A | \n",
119 | "
\n",
120 | " \n",
121 | "
\n",
122 | "
"
123 | ],
124 | "text/plain": [
125 | " conversion date revenue source variant\n",
126 | "0 0 2021-08-07 0.000000 desktop B\n",
127 | "1 1 2021-08-05 7.241015 desktop C\n",
128 | "2 0 2021-08-06 0.000000 desktop A\n",
129 | "3 0 2021-08-05 0.000000 desktop C\n",
130 | "4 0 2021-08-03 0.000000 desktop A"
131 | ]
132 | },
133 | "execution_count": 3,
134 | "metadata": {},
135 | "output_type": "execute_result"
136 | }
137 | ],
138 | "source": [
139 | "# example session data - each row represent one session\n",
140 | "len(df)\n",
141 | "df.head()"
142 | ]
143 | },
144 | {
145 | "cell_type": "code",
146 | "execution_count": 4,
147 | "id": "744e5833-cbc3-45d3-963d-11c2a92acff2",
148 | "metadata": {
149 | "tags": []
150 | },
151 | "outputs": [
152 | {
153 | "data": {
154 | "text/html": [
155 | "\n",
156 | "\n",
169 | "
\n",
170 | " \n",
171 | " \n",
172 | " | \n",
173 | " sessions | \n",
174 | " conversions | \n",
175 | " revenue | \n",
176 | " conversion_rate | \n",
177 | " revenue_per_session | \n",
178 | " revenue_per_converted_sessions | \n",
179 | "
\n",
180 | " \n",
181 | " variant | \n",
182 | " | \n",
183 | " | \n",
184 | " | \n",
185 | " | \n",
186 | " | \n",
187 | " | \n",
188 | "
\n",
189 | " \n",
190 | " \n",
191 | " \n",
192 | " A | \n",
193 | " 31500 | \n",
194 | " 1580 | \n",
195 | " 30830.025613 | \n",
196 | " 0.050159 | \n",
197 | " 0.978731 | \n",
198 | " 19.512674 | \n",
199 | "
\n",
200 | " \n",
201 | " B | \n",
202 | " 32000 | \n",
203 | " 1700 | \n",
204 | " 35203.216888 | \n",
205 | " 0.053125 | \n",
206 | " 1.100101 | \n",
207 | " 20.707775 | \n",
208 | "
\n",
209 | " \n",
210 | " C | \n",
211 | " 31000 | \n",
212 | " 1550 | \n",
213 | " 37259.563364 | \n",
214 | " 0.050000 | \n",
215 | " 1.201921 | \n",
216 | " 24.038428 | \n",
217 | "
\n",
218 | " \n",
219 | "
\n",
220 | "
"
221 | ],
222 | "text/plain": [
223 | " sessions conversions revenue conversion_rate \\\n",
224 | "variant \n",
225 | "A 31500 1580 30830.025613 0.050159 \n",
226 | "B 32000 1700 35203.216888 0.053125 \n",
227 | "C 31000 1550 37259.563364 0.050000 \n",
228 | "\n",
229 | " revenue_per_session revenue_per_converted_sessions \n",
230 | "variant \n",
231 | "A 0.978731 19.512674 \n",
232 | "B 1.100101 20.707775 \n",
233 | "C 1.201921 24.038428 "
234 | ]
235 | },
236 | "execution_count": 4,
237 | "metadata": {},
238 | "output_type": "execute_result"
239 | }
240 | ],
241 | "source": [
242 | "# summary statistics per variant\n",
243 | "\n",
244 | "summary = df.groupby('variant')[['variant', 'conversion', 'revenue']]\\\n",
245 | " .agg({'variant': 'count', 'conversion': 'sum','revenue': 'sum'})\\\n",
246 | " .rename(columns = {'variant': 'sessions', 'conversion': 'conversions'})\n",
247 | "\n",
248 | "summary['conversion_rate'] = summary['conversions'] / summary['sessions']\n",
249 | "summary['revenue_per_session'] = summary['revenue'] / summary['sessions']\n",
250 | "summary['revenue_per_converted_sessions'] = summary['revenue'] / summary['conversions']\n",
251 | "\n",
252 | "summary"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 5,
258 | "id": "be57dc82-8958-4118-aab0-71122490d17a",
259 | "metadata": {
260 | "tags": []
261 | },
262 | "outputs": [
263 | {
264 | "data": {
265 | "text/plain": [
266 | "[31500, 32000, 31000]"
267 | ]
268 | },
269 | "execution_count": 5,
270 | "metadata": {},
271 | "output_type": "execute_result"
272 | },
273 | {
274 | "data": {
275 | "text/plain": [
276 | "[1580, 1700, 1550]"
277 | ]
278 | },
279 | "execution_count": 5,
280 | "metadata": {},
281 | "output_type": "execute_result"
282 | },
283 | {
284 | "data": {
285 | "text/plain": [
286 | "[3831.806394737816, 4211.72986767986, 4055.965234848171]"
287 | ]
288 | },
289 | "execution_count": 5,
290 | "metadata": {},
291 | "output_type": "execute_result"
292 | },
293 | {
294 | "data": {
295 | "text/plain": [
296 | "[11029.923165846496, 12259.51868396913, 12357.911862914]"
297 | ]
298 | },
299 | "execution_count": 5,
300 | "metadata": {},
301 | "output_type": "execute_result"
302 | }
303 | ],
304 | "source": [
305 | "variant_A = df['revenue'][(df.variant == 'A')].values\n",
306 | "variant_B = df['revenue'][(df.variant == 'B')].values\n",
307 | "variant_C = df['revenue'][(df.variant == 'C')].values\n",
308 | "\n",
309 | "sessions = [\n",
310 | " variant_A.size,\n",
311 | " variant_B.size,\n",
312 | " variant_C.size\n",
313 | "]\n",
314 | "\n",
315 | "conversions = [\n",
316 | " sum(variant_A > 0),\n",
317 | " sum(variant_B > 0),\n",
318 | " sum(variant_C > 0)\n",
319 | "]\n",
320 | "\n",
321 | "sum_log_revenue = [\n",
322 | " np.log(variant_A[variant_A > 0]).sum(),\n",
323 | " np.log(variant_B[variant_B > 0]).sum(),\n",
324 | " np.log(variant_C[variant_C > 0]).sum()\n",
325 | "]\n",
326 | "\n",
327 | "sum_log_2_revenue = [\n",
328 | " np.square(np.log(variant_A[variant_A > 0])).sum(),\n",
329 | " np.square(np.log(variant_B[variant_B > 0])).sum(),\n",
330 | " np.square(np.log(variant_C[variant_C > 0])).sum()\n",
331 | "]\n",
332 | "\n",
333 | "sessions\n",
334 | "conversions\n",
335 | "sum_log_revenue\n",
336 | "sum_log_2_revenue"
337 | ]
338 | },
339 | {
340 | "cell_type": "markdown",
341 | "id": "a68cc3a7-1c6e-40c4-b5af-59a7fb9fb548",
342 | "metadata": {},
343 | "source": [
344 | "## Results"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": 6,
350 | "id": "3ade0625-fb50-434f-93f6-e70c3c543713",
351 | "metadata": {
352 | "tags": []
353 | },
354 | "outputs": [
355 | {
356 | "name": "stdout",
357 | "output_type": "stream",
358 | "text": [
359 | "Probabilities of being best: [0.043, 0.92335, 0.03365]\n",
360 | "Expected loss: [0.0030022, 5.89e-05, 0.0031487]\n",
361 | "95% credible intervals: [[0.0477987, 0.0525911], [0.0506903, 0.0556017], [0.0476257, 0.0524881]]\n"
362 | ]
363 | }
364 | ],
365 | "source": [
366 | "# conversion rate probabilities of being best, expected loss and credible intervals for each variant\n",
367 | "pbbs, loss, intervals = eval_bernoulli_agg(sessions, conversions)\n",
368 | "print(f\"Probabilities of being best: {pbbs}\")\n",
369 | "print(f\"Expected loss: {loss}\")\n",
370 | "print(f\"95% credible intervals: {intervals}\")"
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "execution_count": 7,
376 | "id": "21c3ae3c-46b3-4bc7-bd33-5306d3e20506",
377 | "metadata": {
378 | "tags": []
379 | },
380 | "outputs": [
381 | {
382 | "name": "stdout",
383 | "output_type": "stream",
384 | "text": [
385 | "Probabilities of being best: [0.0002, 0.03395, 0.96585]\n",
386 | "Expected loss: [0.2212336, 0.1210695, 0.0008982]\n",
387 | "95% credible intervals: [[0.9086416, 1.0649507], [1.0043019, 1.170394], [1.1094296, 1.3069562]]\n"
388 | ]
389 | }
390 | ],
391 | "source": [
392 | "# revenue per session probabilities of being best, expected loss and credible intervals for each variant\n",
393 | "pbbs, loss, intervals = eval_delta_lognormal_agg(sessions, conversions, sum_log_revenue, sum_log_2_revenue)\n",
394 | "print(f\"Probabilities of being best: {pbbs}\")\n",
395 | "print(f\"Expected loss: {loss}\")\n",
396 | "print(f\"95% credible intervals: {intervals}\")"
397 | ]
398 | },
399 | {
400 | "cell_type": "markdown",
401 | "id": "bdb6c1f2-f144-4cfa-9808-b429ceed6354",
402 | "metadata": {},
403 | "source": [
404 | "### Results for \"being best\" = \"being minimum\""
405 | ]
406 | },
407 | {
408 | "cell_type": "code",
409 | "execution_count": 8,
410 | "id": "b651c9d9-6d51-4ad1-aabb-475296963a88",
411 | "metadata": {
412 | "tags": []
413 | },
414 | "outputs": [
415 | {
416 | "name": "stdout",
417 | "output_type": "stream",
418 | "text": [
419 | "Probabilities of being best: [0.4572, 0.00945, 0.53335]\n",
420 | "Expected loss: [0.0007868, 0.00374, 0.00062]\n",
421 | "95% credible intervals: [[0.0478316, 0.0526332], [0.050685, 0.0556378], [0.0476584, 0.0524571]]\n"
422 | ]
423 | }
424 | ],
425 | "source": [
426 | "# conversion rate probabilities of being best, expected loss and credible intervals for each variant\n",
427 | "pbbs, loss, intervals = eval_bernoulli_agg(sessions, conversions, min_is_best=True)\n",
428 | "print(f\"Probabilities of being best: {pbbs}\")\n",
429 | "print(f\"Expected loss: {loss}\")\n",
430 | "print(f\"95% credible intervals: {intervals}\")"
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": 9,
436 | "id": "f6573b6b-314e-49de-ac63-c75201eac707",
437 | "metadata": {
438 | "tags": []
439 | },
440 | "outputs": [
441 | {
442 | "name": "stdout",
443 | "output_type": "stream",
444 | "text": [
445 | "Probabilities of being best: [0.95695, 0.04285, 0.0002]\n",
446 | "Expected loss: [0.0010886, 0.1012619, 0.2202282]\n",
447 | "95% credible intervals: [[0.9073725, 1.0666041], [1.0044587, 1.1692741], [1.1082288, 1.305592]]\n"
448 | ]
449 | }
450 | ],
451 | "source": [
452 | "# revenue per session probabilities of being best, expected loss and credible intervals for each variant\n",
453 | "pbbs, loss, intervals = eval_delta_lognormal_agg(sessions, conversions, sum_log_revenue, sum_log_2_revenue, min_is_best=True)\n",
454 | "print(f\"Probabilities of being best: {pbbs}\")\n",
455 | "print(f\"Expected loss: {loss}\")\n",
456 | "print(f\"95% credible intervals: {intervals}\")"
457 | ]
458 | },
459 | {
460 | "cell_type": "code",
461 | "execution_count": null,
462 | "id": "bd9b3af3-d34c-4781-a05a-d94a7bc7ee1c",
463 | "metadata": {},
464 | "outputs": [],
465 | "source": []
466 | }
467 | ],
468 | "metadata": {
469 | "kernelspec": {
470 | "display_name": "Python 3 (ipykernel)",
471 | "language": "python",
472 | "name": "python3"
473 | },
474 | "language_info": {
475 | "codemirror_mode": {
476 | "name": "ipython",
477 | "version": 3
478 | },
479 | "file_extension": ".py",
480 | "mimetype": "text/x-python",
481 | "name": "python",
482 | "nbconvert_exporter": "python",
483 | "pygments_lexer": "ipython3",
484 | "version": "3.10.12"
485 | }
486 | },
487 | "nbformat": 4,
488 | "nbformat_minor": 5
489 | }
490 |
--------------------------------------------------------------------------------
/examples/waiting_time_ab_testing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "ffef50bb-d334-438c-b170-4d70c2d6d19e",
7 | "metadata": {
8 | "tags": []
9 | },
10 | "outputs": [],
11 | "source": [
12 | "from IPython.core.interactiveshell import InteractiveShell\n",
13 | "InteractiveShell.ast_node_interactivity = \"all\"\n",
14 | "\n",
15 | "import numpy as np\n",
16 | "np.set_printoptions(legacy=\"1.25\")\n",
17 | "import pandas as pd\n",
18 | "from bayesian_testing.experiments import ExponentialDataTest"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "id": "133748f0-26cd-4647-9cf3-e0b7646a51af",
25 | "metadata": {
26 | "tags": []
27 | },
28 | "outputs": [
29 | {
30 | "data": {
31 | "text/plain": [
32 | "Generator(PCG64) at 0x132F99AC0"
33 | ]
34 | },
35 | "execution_count": 2,
36 | "metadata": {},
37 | "output_type": "execute_result"
38 | }
39 | ],
40 | "source": [
41 | "# optionally stabilize the random seed:\n",
42 | "np.random.default_rng(100)"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 3,
48 | "id": "98cbecdc-69a9-48f4-a95e-d5c71644f00c",
49 | "metadata": {
50 | "tags": []
51 | },
52 | "outputs": [],
53 | "source": [
54 | "# waiting times for 3 different variants, each with many observations\n",
55 | "# generated using exponential distributions with defined scales (expected values)\n",
56 | "waiting_times_a = np.random.exponential(scale=10, size=200)\n",
57 | "waiting_times_b = np.random.exponential(scale=11, size=210)\n",
58 | "waiting_times_c = np.random.exponential(scale=11, size=220)"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 4,
64 | "id": "6510ba7f-d854-4a88-b063-eb44fc59cf1b",
65 | "metadata": {
66 | "tags": []
67 | },
68 | "outputs": [
69 | {
70 | "data": {
71 | "text/plain": [
72 | "9.547258592723825"
73 | ]
74 | },
75 | "execution_count": 4,
76 | "metadata": {},
77 | "output_type": "execute_result"
78 | },
79 | {
80 | "data": {
81 | "text/plain": [
82 | "11.761611555402082"
83 | ]
84 | },
85 | "execution_count": 4,
86 | "metadata": {},
87 | "output_type": "execute_result"
88 | },
89 | {
90 | "data": {
91 | "text/plain": [
92 | "12.042807741815093"
93 | ]
94 | },
95 | "execution_count": 4,
96 | "metadata": {},
97 | "output_type": "execute_result"
98 | }
99 | ],
100 | "source": [
101 | "np.mean(waiting_times_a)\n",
102 | "np.mean(waiting_times_b)\n",
103 | "np.mean(waiting_times_c)"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 5,
109 | "id": "fde94e6d-a05b-4863-8c85-002e623ca2fb",
110 | "metadata": {
111 | "tags": []
112 | },
113 | "outputs": [],
114 | "source": [
115 | "# Exponential A/B/C test\n",
116 | "exponential_test = ExponentialDataTest()\n",
117 | "exponential_test.add_variant_data('A', waiting_times_a)\n",
118 | "exponential_test.add_variant_data('B', waiting_times_b)\n",
119 | "exponential_test.add_variant_data('C', waiting_times_c)"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 6,
125 | "id": "86c03a05-d091-4de5-a223-27efdfbe0615",
126 | "metadata": {
127 | "tags": []
128 | },
129 | "outputs": [
130 | {
131 | "name": "stdout",
132 | "output_type": "stream",
133 | "text": [
134 | "+-----------+----------+--------------+--------------------+------------------+--------------------------+-------------------+-----------------+\n",
135 | "| variant | totals | sum_values | observed_average | posterior_mean | credible_interval | prob_being_best | expected_loss |\n",
136 | "+===========+==========+==============+====================+==================+==========================+===================+=================+\n",
137 | "| A | 200 | 1909.45 | 9.54726 | 9.54299 | [8.3546163, 11.024919] | 0.97495 | 0.0094311 |\n",
138 | "+-----------+----------+--------------+--------------------+------------------+--------------------------+-------------------+-----------------+\n",
139 | "| B | 210 | 2469.94 | 11.7616 | 11.7565 | [10.3265266, 13.5271393] | 0.0177 | 2.23267 |\n",
140 | "+-----------+----------+--------------+--------------------+------------------+--------------------------+-------------------+-----------------+\n",
141 | "| C | 220 | 2649.42 | 12.0428 | 12.0378 | [10.5696647, 13.8087663] | 0.00735 | 2.50462 |\n",
142 | "+-----------+----------+--------------+--------------------+------------------+--------------------------+-------------------+-----------------+\n"
143 | ]
144 | }
145 | ],
146 | "source": [
147 | "# evaluate test (using min_is_best=True as a lower waiting time is better)\n",
148 | "results = exponential_test.evaluate(min_is_best=True)\n",
149 | "print(pd.DataFrame(results).to_markdown(tablefmt=\"grid\", index=False))"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "id": "5cbe6fe3-d6c8-422a-ab62-ffd87b345459",
156 | "metadata": {},
157 | "outputs": [],
158 | "source": []
159 | }
160 | ],
161 | "metadata": {
162 | "kernelspec": {
163 | "display_name": "Python 3 (ipykernel)",
164 | "language": "python",
165 | "name": "python3"
166 | },
167 | "language_info": {
168 | "codemirror_mode": {
169 | "name": "ipython",
170 | "version": 3
171 | },
172 | "file_extension": ".py",
173 | "mimetype": "text/x-python",
174 | "name": "python",
175 | "nbconvert_exporter": "python",
176 | "pygments_lexer": "ipython3",
177 | "version": "3.10.12"
178 | }
179 | },
180 | "nbformat": 4,
181 | "nbformat_minor": 5
182 | }
183 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "bayesian-testing"
3 | version = "0.9.1"
4 | description = "Bayesian A/B testing with simple probabilities."
5 | authors = ["Matus Baniar"]
6 | license = "MIT"
7 | readme = "README.md"
8 | homepage = "https://github.com/Matt52/bayesian-testing"
9 | repository = "https://github.com/Matt52/bayesian-testing"
10 | keywords = ["ab testing", "bayes", "bayesian statistics"]
11 | include = [
12 | "LICENSE",
13 | ]
14 |
15 | packages = [
16 | {include = "bayesian_testing"}
17 | ]
18 |
19 | [tool.poetry.dependencies]
20 | python = ">=3.8"
21 | numpy = ">=1.19"
22 |
23 | [tool.poetry.group.dev.dependencies]
24 | jupyter = ">=1.1"
25 | jupyterlab = ">=4.3"
26 | black = ">=23.1"
27 | pytest = ">=8.3"
28 | coverage = ">=7.6"
29 | pandas = ">=1.5"
30 | pre-commit = ">=3.1"
31 | isort = ">=5.10"
32 | tabulate = ">=0.9.0"
33 | setuptools = { version = "^78.1.1", markers = "python_version >= '3.9'" }
34 | tornado = { version = "^6.5.0", markers = "python_version >= '3.9'" }
35 |
36 | [build-system]
37 | requires = ["poetry-core>=1.0.0"]
38 | build-backend = "poetry.core.masonry.api"
39 |
--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | # Tests
2 |
3 | Run in top level directory:
4 | ```bash
5 | python -m pytest
6 | ```
7 |
8 | or:
9 | ```bash
10 | coverage run -m pytest
11 | coverage report
12 | coverage html
13 | ```
14 |
--------------------------------------------------------------------------------
/tests/test_binary.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bayesian_testing.experiments import BinaryDataTest
4 |
5 |
6 | @pytest.fixture
7 | def conv_test():
8 | cv = BinaryDataTest()
9 | cv.add_variant_data("A", [0, 1, 0, 1, 0, 0, 0, 0, 0, 1])
10 | cv.add_variant_data("B", [0, 0, 0, 1, 0, 0, 0, 0, 0, 1])
11 | cv.add_variant_data_agg("C", 11, 2, a_prior=1, b_prior=2)
12 | cv.add_variant_data_agg("D", 10, 10)
13 | cv.add_variant_data_agg("D", 20, 20, replace=False)
14 | cv.add_variant_data_agg("D", 20, 20, replace=True)
15 | cv.delete_variant("D")
16 | return cv
17 |
18 |
19 | def test_variants(conv_test):
20 | assert conv_test.variant_names == ["A", "B", "C"]
21 |
22 |
23 | def test_totals(conv_test):
24 | assert conv_test.totals == [10, 10, 11]
25 |
26 |
27 | def test_positives(conv_test):
28 | assert conv_test.positives == [3, 2, 2]
29 |
30 |
31 | def test_a_priors(conv_test):
32 | assert conv_test.a_priors == [0.5, 0.5, 1]
33 |
34 |
35 | def test_b_priors(conv_test):
36 | assert conv_test.b_priors == [0.5, 0.5, 2]
37 |
38 |
39 | def test_probabs_of_being_best(conv_test):
40 | pbbs = conv_test.probabs_of_being_best(sim_count=20000, seed=52)
41 | assert pbbs == {"A": 0.57225, "B": 0.233, "C": 0.19475}
42 |
43 |
44 | def test_expected_loss(conv_test):
45 | loss = conv_test.expected_loss(sim_count=20000, seed=52)
46 | assert loss == {"A": 0.0529281, "B": 0.1452113, "C": 0.1557502}
47 |
48 |
49 | def test_credible_intervals_95(conv_test):
50 | ci = conv_test.credible_intervals(sim_count=20000, seed=52)
51 | assert ci == {
52 | "A": [0.0917579, 0.6028411],
53 | "B": [0.0442435, 0.5032699],
54 | "C": [0.0522996, 0.452392],
55 | }
56 |
57 |
58 | def test_credible_intervals_99(conv_test):
59 | ci = conv_test.credible_intervals(sim_count=20000, seed=52, interval_alpha=0.99)
60 | assert ci == {
61 | "A": [0.0552614, 0.6892976],
62 | "B": [0.0214602, 0.6045644],
63 | "C": [0.0300364, 0.5320378],
64 | }
65 |
66 |
67 | def test_evaluate(conv_test):
68 | eval_report = conv_test.evaluate(sim_count=20000, seed=52)
69 | assert eval_report == [
70 | {
71 | "variant": "A",
72 | "totals": 10,
73 | "positives": 3,
74 | "positive_rate": 0.3,
75 | "posterior_mean": 0.31818,
76 | "credible_interval": [0.0917579, 0.6028411],
77 | "prob_being_best": 0.57225,
78 | "expected_loss": 0.0529281,
79 | },
80 | {
81 | "variant": "B",
82 | "totals": 10,
83 | "positives": 2,
84 | "positive_rate": 0.2,
85 | "posterior_mean": 0.22727,
86 | "credible_interval": [0.0442435, 0.5032699],
87 | "prob_being_best": 0.233,
88 | "expected_loss": 0.1452113,
89 | },
90 | {
91 | "variant": "C",
92 | "totals": 11,
93 | "positives": 2,
94 | "positive_rate": 0.18182,
95 | "posterior_mean": 0.21429,
96 | "credible_interval": [0.0522996, 0.452392],
97 | "prob_being_best": 0.19475,
98 | "expected_loss": 0.1557502,
99 | },
100 | ]
101 |
102 |
103 | def test_wrong_inputs():
104 | cv = BinaryDataTest()
105 | with pytest.raises(ValueError):
106 | cv.add_variant_data(10, [1, 0, 1])
107 | with pytest.raises(ValueError):
108 | cv.add_variant_data("A", [1, 0, 1], a_prior=-1)
109 | with pytest.raises(ValueError):
110 | cv.add_variant_data_agg("A", -1, 7)
111 | with pytest.raises(ValueError):
112 | cv.add_variant_data_agg("A", 1, -7)
113 | with pytest.raises(ValueError):
114 | cv.add_variant_data("A", [])
115 | with pytest.raises(ValueError):
116 | cv.add_variant_data("A", [1, 2, 0])
117 |
118 |
119 | def test_wrong_credible_interval_input(conv_test):
120 | with pytest.raises(ValueError):
121 | conv_test.evaluate(interval_alpha=2)
122 | with pytest.raises(ValueError):
123 | conv_test.evaluate(interval_alpha=-1)
124 |
--------------------------------------------------------------------------------
/tests/test_delta_lognormal.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bayesian_testing.experiments import DeltaLognormalDataTest
4 |
5 |
6 | @pytest.fixture
7 | def rev_test():
8 | rev = DeltaLognormalDataTest()
9 | rev.add_variant_data_agg(
10 | "A", 31500, 1580, 30830.02561, 3831.806394737816, 11029.923165846496, a_prior_beta=1
11 | )
12 | rev.add_variant_data_agg(
13 | "B", 32000, 1700, 35203.21689, 4211.72986767986, 12259.51868396913, m_prior=2, w_prior=0.02
14 | )
15 | rev.add_variant_data_agg(
16 | "C",
17 | 31000,
18 | 1550,
19 | 37259.56336,
20 | 4055.965234848171,
21 | 12357.911862914,
22 | a_prior_ig=1,
23 | b_prior_ig=2,
24 | )
25 | rev.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22])
26 | rev.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22], replace=False)
27 | rev.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22], replace=True)
28 | rev.delete_variant("D")
29 | return rev
30 |
31 |
32 | def test_variants(rev_test):
33 | assert rev_test.variant_names == ["A", "B", "C"]
34 |
35 |
36 | def test_totals(rev_test):
37 | assert rev_test.totals == [31500, 32000, 31000]
38 |
39 |
40 | def test_positives(rev_test):
41 | assert rev_test.positives == [1580, 1700, 1550]
42 |
43 |
44 | def test_sum_values(rev_test):
45 | assert rev_test.sum_values == [30830.02561, 35203.21689, 37259.56336]
46 |
47 |
48 | def test_sum_logs(rev_test):
49 | assert [round(i, 5) for i in rev_test.sum_logs] == [3831.80639, 4211.72987, 4055.96523]
50 |
51 |
52 | def test_sum_logs_2(rev_test):
53 | assert [round(i, 5) for i in rev_test.sum_logs_2] == [11029.92317, 12259.51868, 12357.91186]
54 |
55 |
56 | def test_a_priors_beta(rev_test):
57 | assert rev_test.a_priors_beta == [1, 0.5, 0.5]
58 |
59 |
60 | def test_b_priors_beta(rev_test):
61 | assert rev_test.b_priors_beta == [0.5, 0.5, 0.5]
62 |
63 |
64 | def test_m_priors(rev_test):
65 | assert rev_test.m_priors == [1, 2, 1]
66 |
67 |
68 | def test_a_priors_ig(rev_test):
69 | assert rev_test.a_priors_ig == [0, 0, 1]
70 |
71 |
72 | def test_b_priors_ig(rev_test):
73 | assert rev_test.b_priors_ig == [0, 0, 2]
74 |
75 |
76 | def test_w_priors(rev_test):
77 | assert rev_test.w_priors == [0.01, 0.02, 0.01]
78 |
79 |
80 | def test_probabs_of_being_best(rev_test):
81 | pbbs = rev_test.probabs_of_being_best(sim_count=20000, seed=152)
82 | assert pbbs == {"A": 0.0004, "B": 0.03355, "C": 0.96605}
83 |
84 |
85 | def test_expected_loss(rev_test):
86 | loss = rev_test.expected_loss(sim_count=20000, seed=152)
87 | assert loss == {"A": 0.2214416, "B": 0.1212818, "C": 0.0008639}
88 |
89 |
90 | def test_credible_intervals_95(rev_test):
91 | ci = rev_test.credible_intervals(sim_count=20000, seed=152)
92 | assert ci == {
93 | "A": [0.9084717, 1.0661301],
94 | "B": [1.0038179, 1.1705975],
95 | "C": [1.1097381, 1.3084524],
96 | }
97 |
98 |
99 | def test_credible_intervals_99(rev_test):
100 | ci = rev_test.credible_intervals(sim_count=20000, seed=152, interval_alpha=0.99)
101 | assert ci == {
102 | "A": [0.8847602, 1.0948976],
103 | "B": [0.9789665, 1.1996421],
104 | "C": [1.0813447, 1.3416523],
105 | }
106 |
107 |
108 | def test_evaluate(rev_test):
109 | eval_report = rev_test.evaluate(sim_count=20000, seed=152)
110 | assert eval_report == [
111 | {
112 | "variant": "A",
113 | "totals": 31500,
114 | "positives": 1580,
115 | "sum_values": 30830.02561,
116 | "avg_values": 0.97873,
117 | "avg_positive_values": 19.51267,
118 | "posterior_mean": 0.98309,
119 | "credible_interval": [0.9084717, 1.0661301],
120 | "prob_being_best": 0.0004,
121 | "expected_loss": 0.2214416,
122 | },
123 | {
124 | "variant": "B",
125 | "totals": 32000,
126 | "positives": 1700,
127 | "sum_values": 35203.21689,
128 | "avg_values": 1.1001,
129 | "avg_positive_values": 20.70777,
130 | "posterior_mean": 1.08266,
131 | "credible_interval": [1.0038179, 1.1705975],
132 | "prob_being_best": 0.03355,
133 | "expected_loss": 0.1212818,
134 | },
135 | {
136 | "variant": "C",
137 | "totals": 31000,
138 | "positives": 1550,
139 | "sum_values": 37259.56336,
140 | "avg_values": 1.20192,
141 | "avg_positive_values": 24.03843,
142 | "posterior_mean": 1.20276,
143 | "credible_interval": [1.1097381, 1.3084524],
144 | "prob_being_best": 0.96605,
145 | "expected_loss": 0.0008639,
146 | },
147 | ]
148 |
149 |
150 | def test_wrong_inputs():
151 | dl_test = DeltaLognormalDataTest()
152 | with pytest.raises(ValueError):
153 | dl_test.add_variant_data(10, [1, 2, 3])
154 | with pytest.raises(ValueError):
155 | dl_test.add_variant_data("A", [1, 2, 3], a_prior_beta=-1)
156 | with pytest.raises(ValueError):
157 | dl_test.add_variant_data("A", [])
158 | with pytest.raises(ValueError):
159 | dl_test.add_variant_data("A", [0, 0, 0])
160 | with pytest.raises(ValueError):
161 | dl_test.add_variant_data("C", [0, 10.7, -1])
162 |
--------------------------------------------------------------------------------
/tests/test_delta_normal.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from bayesian_testing.experiments import DeltaNormalDataTest
3 |
4 |
5 | @pytest.fixture
6 | def delta_norm_test():
7 | delta_norm = DeltaNormalDataTest()
8 | delta_norm.add_variant_data_agg(
9 | name="A",
10 | totals=31500,
11 | non_zeros=10,
12 | sum_values=102.02561,
13 | sum_values_2=1700.8,
14 | a_prior_beta=1,
15 | )
16 | delta_norm.add_variant_data_agg(
17 | name="B",
18 | totals=32000,
19 | non_zeros=40,
20 | sum_values=273.02,
21 | sum_values_2=3567.5,
22 | a_prior_beta=0.02,
23 | m_prior=2,
24 | w_prior=0.02,
25 | )
26 |
27 | delta_norm.add_variant_data("C", [0, 10.7, -1, 8, 0, -3, 0, -10, 0, 11.22])
28 | delta_norm.add_variant_data("C", [0, 10.7, -1, 8, 0, -3, 0, -10, 0, 11.22], replace=False)
29 | delta_norm.add_variant_data("C", [0, 10.7, -1, 8, 0, -3, 0, -10, 0, 11.22], replace=True)
30 | delta_norm.delete_variant("C")
31 | return delta_norm
32 |
33 |
34 | def test_variants(delta_norm_test):
35 | assert delta_norm_test.variant_names == ["A", "B"]
36 |
37 |
38 | def test_totals(delta_norm_test):
39 | assert delta_norm_test.totals == [31500, 32000]
40 |
41 |
42 | def test_non_zeros(delta_norm_test):
43 | assert delta_norm_test.non_zeros == [10, 40]
44 |
45 |
46 | def test_sum_values(delta_norm_test):
47 | assert delta_norm_test.sum_values == [102.02561, 273.02]
48 |
49 |
50 | def test_sum_values_2(delta_norm_test):
51 | assert delta_norm_test.sum_values_2 == [1700.8, 3567.5]
52 |
53 |
54 | def test_a_priors_beta(delta_norm_test):
55 | assert delta_norm_test.a_priors_beta == [1, 0.02]
56 |
57 |
58 | def test_b_priors_beta(delta_norm_test):
59 | assert delta_norm_test.b_priors_beta == [0.5, 0.5]
60 |
61 |
62 | def test_m_priors(delta_norm_test):
63 | assert delta_norm_test.m_priors == [1, 2]
64 |
65 |
66 | def test_a_priors_ig(delta_norm_test):
67 | assert delta_norm_test.a_priors_ig == [0, 0]
68 |
69 |
70 | def test_b_priors_ig(delta_norm_test):
71 | assert delta_norm_test.b_priors_ig == [0, 0]
72 |
73 |
74 | def test_w_priors(delta_norm_test):
75 | assert delta_norm_test.w_priors == [0.01, 0.02]
76 |
77 |
78 | def test_probabs_of_being_best(delta_norm_test):
79 | pbbs = delta_norm_test.probabs_of_being_best(sim_count=20000, seed=152)
80 | assert pbbs == {"A": 0.02235, "B": 0.97765}
81 |
82 |
83 | def test_expected_loss(delta_norm_test):
84 | loss = delta_norm_test.expected_loss(sim_count=20000, seed=152)
85 | assert loss == {"A": 0.005, "B": 2.46e-05}
86 |
87 |
88 | def test_credible_intervals_95(delta_norm_test):
89 | ci = delta_norm_test.credible_intervals(sim_count=20000, seed=152)
90 | assert ci == {
91 | "A": [0.0011935, 0.0070944],
92 | "B": [0.0051651, 0.0125917],
93 | }
94 |
95 |
96 | def test_credible_intervals_99(delta_norm_test):
97 | ci = delta_norm_test.credible_intervals(sim_count=20000, seed=152, interval_alpha=0.99)
98 | assert ci == {
99 | "A": [0.0006048, 0.0087352],
100 | "B": [0.0043509, 0.0142946],
101 | }
102 |
103 |
104 | def test_evaluate(delta_norm_test):
105 | eval_report = delta_norm_test.evaluate(sim_count=20000, seed=152)
106 | assert eval_report == [
107 | {
108 | "variant": "A",
109 | "totals": 31500,
110 | "non_zeros": 10,
111 | "sum_values": 102.02561,
112 | "avg_values": 0.00324,
113 | "avg_non_zero_values": 10.20256,
114 | "posterior_mean": 0.00356,
115 | "credible_interval": [0.0011935, 0.0070944],
116 | "prob_being_best": 0.02235,
117 | "expected_loss": 0.005,
118 | },
119 | {
120 | "variant": "B",
121 | "totals": 32000,
122 | "non_zeros": 40,
123 | "sum_values": 273.02,
124 | "avg_values": 0.00853,
125 | "avg_non_zero_values": 6.8255,
126 | "posterior_mean": 0.00853,
127 | "credible_interval": [0.0051651, 0.0125917],
128 | "prob_being_best": 0.97765,
129 | "expected_loss": 2.46e-05,
130 | },
131 | ]
132 |
133 |
134 | def test_wrong_inputs():
135 | dn_test = DeltaNormalDataTest()
136 | with pytest.raises(ValueError):
137 | dn_test.add_variant_data(10, [1, 2, 3])
138 | with pytest.raises(ValueError):
139 | dn_test.add_variant_data("A", [1, 2, 3], a_prior_beta=-1)
140 | with pytest.raises(ValueError):
141 | dn_test.add_variant_data_agg("A", 2, 3, 6, 21)
142 | with pytest.raises(ValueError):
143 | dn_test.add_variant_data_agg("A", 1, -7, 6, 21)
144 | with pytest.raises(ValueError):
145 | dn_test.add_variant_data("A", [])
146 | with pytest.raises(ValueError):
147 | dn_test.add_variant_data("A", [0, 0, 0])
148 | with pytest.raises(ValueError):
149 | dn_test.add_variant_data("C", [0, 10.7, -1], a_prior_ig=-1)
150 |
--------------------------------------------------------------------------------
/tests/test_discrete.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bayesian_testing.experiments import DiscreteDataTest
4 |
5 |
6 | @pytest.fixture
7 | def discrete_test():
8 | disc = DiscreteDataTest(states=[1, 2, 3, 4, 5, 6])
9 | disc.add_variant_data("A", [6, 5, 4, 4, 4, 2, 5, 4, 2, 1, 2, 5, 4, 6, 2, 3, 6, 2, 3, 6])
10 | disc.add_variant_data("B", [4, 6, 3, 6, 4, 6, 6, 1, 4, 1])
11 | disc.add_variant_data_agg("C", [10, 10, 10, 10, 10, 10], prior=[100, 100, 100, 100, 100, 100])
12 | disc.add_variant_data_agg("D", [1, 2, 3, 8, 10, 7])
13 | disc.add_variant_data_agg("D", [1, 2, 3, 8, 10, 6], replace=False)
14 | disc.add_variant_data_agg("D", [1, 2, 3, 8, 10, 6], replace=True)
15 | disc.delete_variant("D")
16 | return disc
17 |
18 |
19 | def test_variants(discrete_test):
20 | assert discrete_test.variant_names == ["A", "B", "C"]
21 |
22 |
23 | def test_states(discrete_test):
24 | assert discrete_test.states == [1, 2, 3, 4, 5, 6]
25 |
26 |
27 | def test_concentrations(discrete_test):
28 | assert discrete_test.concentrations == [
29 | [1, 5, 2, 5, 3, 4],
30 | [2, 0, 1, 3, 0, 4],
31 | [10, 10, 10, 10, 10, 10],
32 | ]
33 |
34 |
35 | def test_probabs_of_being_best(discrete_test):
36 | pbbs = discrete_test.probabs_of_being_best(sim_count=20000, seed=52)
37 | assert pbbs == {"A": 0.35595, "B": 0.59325, "C": 0.0508}
38 |
39 |
40 | def test_expected_loss(discrete_test):
41 | loss = discrete_test.expected_loss(sim_count=20000, seed=52)
42 | assert loss == {"A": 0.3053921, "B": 0.1560257, "C": 0.5328904}
43 |
44 |
45 | def test_credible_intervals_95(discrete_test):
46 | ci = discrete_test.credible_intervals(sim_count=20000, seed=52)
47 | assert ci == {
48 | "A": [3.122705, 4.3265574],
49 | "B": [2.9826238, 4.7094185],
50 | "C": [3.3681015, 3.6302274],
51 | }
52 |
53 |
54 | def test_credible_intervals_99(discrete_test):
55 | ci = discrete_test.credible_intervals(sim_count=20000, seed=52, interval_alpha=0.99)
56 | assert ci == {
57 | "A": [2.9260719, 4.5245231],
58 | "B": [2.7013326, 4.9277036],
59 | "C": [3.3281699, 3.6751105],
60 | }
61 |
62 |
63 | def test_evaluate(discrete_test):
64 | eval_report = discrete_test.evaluate(sim_count=20000, seed=52)
65 | assert eval_report == [
66 | {
67 | "variant": "A",
68 | "concentration": {1: 1.0, 2: 5.0, 3: 2.0, 4: 5.0, 5: 3.0, 6: 4.0},
69 | "average_value": 3.8,
70 | "posterior_mean": 3.73077,
71 | "credible_interval": [3.122705, 4.3265574],
72 | "prob_being_best": 0.35595,
73 | "expected_loss": 0.3053921,
74 | },
75 | {
76 | "variant": "B",
77 | "concentration": {1: 2.0, 2: 0.0, 3: 1.0, 4: 3.0, 5: 0.0, 6: 4.0},
78 | "average_value": 4.1,
79 | "posterior_mean": 3.875,
80 | "credible_interval": [2.9826238, 4.7094185],
81 | "prob_being_best": 0.59325,
82 | "expected_loss": 0.1560257,
83 | },
84 | {
85 | "variant": "C",
86 | "concentration": {1: 10, 2: 10, 3: 10, 4: 10, 5: 10, 6: 10},
87 | "average_value": 3.5,
88 | "posterior_mean": 3.5,
89 | "credible_interval": [3.3681015, 3.6302274],
90 | "prob_being_best": 0.0508,
91 | "expected_loss": 0.5328904,
92 | },
93 | ]
94 |
95 |
96 | def test_non_numerical_states_error():
97 | with pytest.raises(ValueError):
98 | DiscreteDataTest(states=[1, 2.0, "3"])
99 |
100 |
101 | def test_non_string_variant_error(discrete_test):
102 | with pytest.raises(ValueError):
103 | discrete_test.add_variant_data_agg(1, [1, 2, 3, 8, 10, 7])
104 |
105 |
106 | def test_length_mismatch_input_error(discrete_test):
107 | with pytest.raises(ValueError):
108 | discrete_test.add_variant_data_agg("D", [1, 2, 3, 8, 10])
109 |
110 |
111 | def test_empty_data_error(discrete_test):
112 | with pytest.raises(ValueError):
113 | discrete_test.add_variant_data("D", [])
114 |
115 |
116 | def test_non_existing_state_error(discrete_test):
117 | with pytest.raises(ValueError):
118 | discrete_test.add_variant_data("D", [1, 2, 3, 5, 21])
119 |
--------------------------------------------------------------------------------
/tests/test_evaluation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | from bayesian_testing.metrics import (
5 | eval_bernoulli_agg,
6 | eval_normal_agg,
7 | eval_delta_lognormal_agg,
8 | eval_delta_normal_agg,
9 | eval_numerical_dirichlet_agg,
10 | eval_poisson_agg,
11 | eval_exponential_agg,
12 | )
13 |
14 | PBB_BERNOULLI_AGG_INPUTS = [
15 | {
16 | "input": {
17 | "totals": [31500, 32000, 31000],
18 | "successes": [1580, 1700, 1550],
19 | "sim_count": 20000,
20 | "seed": 52,
21 | "min_is_best": False,
22 | "interval_alpha": 0.95,
23 | },
24 | "expected_output": (
25 | [0.04185, 0.92235, 0.0358],
26 | [0.0030138, 6.06e-05, 0.0031649],
27 | [[0.0477826, 0.0526302], [0.0506933, 0.0555936], [0.0476604, 0.0524757]],
28 | ),
29 | },
30 | {
31 | "input": {
32 | "totals": [31500, 32000, 31000],
33 | "successes": [1580, 1700, 1550],
34 | "sim_count": 20000,
35 | "seed": 52,
36 | "min_is_best": True,
37 | "interval_alpha": 0.99,
38 | },
39 | "expected_output": (
40 | [0.4594, 0.00925, 0.53135],
41 | [0.000781, 0.0037342, 0.0006299],
42 | [[0.0470873, 0.0534391], [0.0499116, 0.056421], [0.0469394, 0.0532695]],
43 | ),
44 | },
45 | {
46 | "input": {
47 | "totals": [100, 200],
48 | "successes": [80, 160],
49 | "sim_count": 10000,
50 | "seed": 52,
51 | "min_is_best": False,
52 | "interval_alpha": 0.5,
53 | },
54 | "expected_output": (
55 | [0.4899, 0.5101],
56 | [0.0204051, 0.0182965],
57 | [[0.7713375, 0.8248972], [0.7810789, 0.8179153]],
58 | ),
59 | },
60 | {
61 | "input": {
62 | "totals": [100, 100],
63 | "successes": [0, 0],
64 | "sim_count": 20000,
65 | "seed": 52,
66 | "min_is_best": False,
67 | "interval_alpha": 0.95,
68 | },
69 | "expected_output": (
70 | [0.5008, 0.4992],
71 | [0.0030829, 0.0031614],
72 | [[4.8e-06, 0.0252857], [4.8e-06, 0.0243717]],
73 | ),
74 | },
75 | {
76 | "input": {
77 | "totals": [100],
78 | "successes": [77],
79 | "sim_count": 20000,
80 | "seed": 52,
81 | "min_is_best": False,
82 | "interval_alpha": 0.95,
83 | },
84 | "expected_output": ([1], [0], [[0.6810233, 0.8442006]]),
85 | },
86 | {
87 | "input": {
88 | "totals": [],
89 | "successes": [],
90 | "sim_count": 20000,
91 | "seed": 52,
92 | "min_is_best": False,
93 | "interval_alpha": 0.95,
94 | },
95 | "expected_output": ([], [], []),
96 | },
97 | ]
98 |
99 | PBB_NORMAL_AGG_INPUTS = [
100 | {
101 | "input": {
102 | "totals": [31000, 30000, 32000],
103 | "sums": [33669.629254438274, 32451.58924937506, 34745.69678322253],
104 | "sums_2": [659657.6891070933, 95284.82070196551, 260327.13931832163],
105 | "sim_count": 20000,
106 | "seed": 52,
107 | "interval_alpha": 0.95,
108 | },
109 | "expected_output": (
110 | [0.43605, 0.19685, 0.3671],
111 | [0.0133512, 0.0179947, 0.0137618],
112 | [[1.0366696, 1.13634], [1.0652914, 1.0977888], [1.0574217, 1.1141581]],
113 | ),
114 | },
115 | {
116 | "input": {
117 | "totals": [10000, 10000],
118 | "sums": [11446.345516947431, 10708.892428298526],
119 | "sums_2": [214614.35949718487, 31368.55305547222],
120 | "sim_count": 20000,
121 | "seed": 52,
122 | "interval_alpha": 0.99,
123 | },
124 | "expected_output": (
125 | [0.94445, 0.05555],
126 | [0.0011338, 0.0753121],
127 | [[1.0278553, 1.2601174], [1.0337017, 1.1071861]],
128 | ),
129 | },
130 | {
131 | "input": {
132 | "totals": [10, 20, 30, 40],
133 | "sums": [0, 0, 0, 0],
134 | "sums_2": [0, 0, 0, 0],
135 | "sim_count": 20000,
136 | "seed": 52,
137 | "interval_alpha": 0.95,
138 | },
139 | "expected_output": (
140 | [0.40785, 0.25105, 0.1928, 0.1483],
141 | [0.0058965, 0.0065083, 0.0066249, 0.0067183],
142 | [
143 | [-0.021071, 0.0232855],
144 | [-0.0101753, 0.0108701],
145 | [-0.0064358, 0.0070877],
146 | [-0.004795, 0.0052896],
147 | ],
148 | ),
149 | },
150 | {
151 | "input": {
152 | "totals": [100],
153 | "sums": [0],
154 | "sums_2": [0],
155 | "sim_count": 10000,
156 | "seed": 52,
157 | "interval_alpha": 0.95,
158 | },
159 | "expected_output": ([1], [0], [[-0.0019355, 0.0020896]]),
160 | },
161 | {
162 | "input": {
163 | "totals": [10000, 10000],
164 | "sums": [11446.35, 11446.35],
165 | "sums_2": [214614.36, 214614.36],
166 | "sim_count": 20000,
167 | "seed": 52,
168 | "interval_alpha": 0.95,
169 | },
170 | "expected_output": (
171 | [0.5024, 0.4976],
172 | [0.0250157, 0.0256253],
173 | [[1.0577297, 1.2331092], [1.0545188, 1.2327107]],
174 | ),
175 | },
176 | {
177 | "input": {
178 | "totals": [],
179 | "sums": [],
180 | "sums_2": [],
181 | "sim_count": 10000,
182 | "seed": 52,
183 | "interval_alpha": 0.95,
184 | },
185 | "expected_output": ([], [], []),
186 | },
187 | ]
188 |
189 | PBB_DELTA_LOGNORMAL_AGG_INPUTS = [
190 | {
191 | "input": {
192 | "totals": [31500, 32000, 31000],
193 | "successes": [1580, 1700, 1550],
194 | "sum_logs": [3831.806394737816, 4211.72986767986, 4055.965234848171],
195 | "sum_logs_2": [11029.923165846496, 12259.51868396913, 12357.911862914],
196 | "sim_count": 20000,
197 | "seed": 52,
198 | "interval_alpha": 0.95,
199 | },
200 | "expected_output": (
201 | [0.00015, 0.03345, 0.9664],
202 | [0.2209593, 0.1205541, 0.0008458],
203 | [[0.9065769, 1.0655643], [1.0046391, 1.1707248], [1.1085257, 1.3061752]],
204 | ),
205 | },
206 | {
207 | "input": {
208 | "totals": [31000, 31000],
209 | "successes": [1550, 1550],
210 | "sum_logs": [4055.965234848171, 4055.965234848171],
211 | "sum_logs_2": [12357.911862914, 12357.911862914],
212 | "sim_count": 10000,
213 | "seed": 52,
214 | "interval_alpha": 0.9,
215 | },
216 | "expected_output": (
217 | [0.5013, 0.4987],
218 | [0.028189, 0.0287233],
219 | [[1.1227657, 1.2882371], [1.1210866, 1.2895949]],
220 | ),
221 | },
222 | {
223 | "input": {
224 | "totals": [10, 20, 30, 40],
225 | "successes": [0, 0, 0, 0],
226 | "sum_logs": [0, 0, 0, 0],
227 | "sum_logs_2": [0, 0, 0, 0],
228 | "sim_count": 10000,
229 | "seed": 52,
230 | "interval_alpha": 0.5,
231 | },
232 | "expected_output": (
233 | [0.25, 0.25, 0.25, 0.25],
234 | [np.nan, np.nan, np.nan, np.nan],
235 | [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]],
236 | ),
237 | },
238 | {
239 | "input": {
240 | "totals": [100],
241 | "successes": [10],
242 | "sum_logs": [0],
243 | "sum_logs_2": [0],
244 | "sim_count": 10000,
245 | "seed": 52,
246 | "interval_alpha": 0.95,
247 | },
248 | "expected_output": ([1], [0], [[0.051825, 0.1697968]]),
249 | },
250 | {
251 | "input": {
252 | "totals": [],
253 | "successes": [],
254 | "sum_logs": [],
255 | "sum_logs_2": [],
256 | "sim_count": 10000,
257 | "seed": 52,
258 | "interval_alpha": 0.95,
259 | },
260 | "expected_output": ([], [], []),
261 | },
262 | ]
263 |
264 | PBB_NUMERICAL_DIRICHLET_AGG_INPUTS = [
265 | {
266 | "input": {
267 | "states": [1, 2, 3, 4, 5, 6],
268 | "concentrations": [
269 | [10, 10, 10, 10, 20, 10],
270 | [10, 10, 10, 10, 10, 20],
271 | [10, 10, 10, 20, 10, 10],
272 | ],
273 | "sim_count": 20000,
274 | "seed": 52,
275 | "interval_alpha": 0.9,
276 | },
277 | "expected_output": (
278 | [0.28205, 0.62335, 0.0946],
279 | [0.1999528, 0.0698306, 0.334045],
280 | [[3.3214796, 4.0718396], [3.4218451, 4.2243033], [3.1984494, 3.9184425]],
281 | ),
282 | },
283 | {
284 | "input": {
285 | "states": [1, 2, 3],
286 | "concentrations": [[100, 100, 100]],
287 | "sim_count": 20000,
288 | "seed": 52,
289 | "interval_alpha": 0.9,
290 | },
291 | "expected_output": ([1], [0], [[1.9077157, 2.0908699]]),
292 | },
293 | {
294 | "input": {
295 | "states": [],
296 | "concentrations": [],
297 | "sim_count": 20000,
298 | "seed": 52,
299 | "interval_alpha": 0.9,
300 | },
301 | "expected_output": ([], [], []),
302 | },
303 | ]
304 |
305 | PBB_POISSON_AGG_INPUTS = [
306 | {
307 | "input": {
308 | "totals": [3150, 3200, 3100],
309 | "sums": [10000, 10000, 10000],
310 | "sim_count": 20000,
311 | "seed": 52,
312 | "min_is_best": False,
313 | "interval_alpha": 0.95,
314 | },
315 | "expected_output": (
316 | [0.127, 0.00695, 0.86605],
317 | [0.0539495, 0.1042691, 0.0030418],
318 | [[3.1132541, 3.2375641], [3.0635577, 3.1863114], [3.1634511, 3.2890376]],
319 | ),
320 | },
321 | {
322 | "input": {
323 | "totals": [3150, 3200, 3100],
324 | "sums": [10000, 10000, 10000],
325 | "sim_count": 20000,
326 | "seed": 52,
327 | "min_is_best": True,
328 | "interval_alpha": 0.9,
329 | },
330 | "expected_output": (
331 | [0.12775, 0.8656, 0.00665],
332 | [0.0532581, 0.0029385, 0.1041658],
333 | [[3.123135, 3.2276693], [3.0732817, 3.1764313], [3.1729959, 3.2788603]],
334 | ),
335 | },
336 | {
337 | "input": {
338 | "totals": [100],
339 | "sums": [77],
340 | "sim_count": 20000,
341 | "seed": 52,
342 | "min_is_best": False,
343 | "interval_alpha": 0.75,
344 | },
345 | "expected_output": ([1], [0], [[0.6723231, 0.8727923]]),
346 | },
347 | {
348 | "input": {
349 | "totals": [],
350 | "sums": [],
351 | "sim_count": 20000,
352 | "seed": 52,
353 | "min_is_best": False,
354 | "interval_alpha": 0.9,
355 | },
356 | "expected_output": ([], [], []),
357 | },
358 | ]
359 |
360 | PBB_EXPONENTIAL_AGG_INPUTS = [
361 | {
362 | "input": {
363 | "totals": [100, 90, 80],
364 | "sums": [1040.29884, 993.66883, 883.05801],
365 | "sim_count": 20000,
366 | "seed": 52,
367 | "min_is_best": False,
368 | "interval_alpha": 0.9,
369 | },
370 | "expected_output": (
371 | [0.1826, 0.4065, 0.4109],
372 | [1.5195025, 0.8380173, 0.8431285],
373 | [[8.8658129, 12.3263561], [9.3561749, 13.2588682], [9.2650625, 13.3809534]],
374 | ),
375 | },
376 | {
377 | "input": {
378 | "totals": [1000, 1000, 1000],
379 | "sums": [2288.69431, 2471.61961, 2745.7794],
380 | "sim_count": 20000,
381 | "seed": 52,
382 | "min_is_best": True,
383 | "interval_alpha": 0.9,
384 | },
385 | "expected_output": (
386 | [0.9594, 0.0406, 0.0],
387 | [0.0017238, 0.1865276, 0.4598496],
388 | [[2.1727503, 2.4111014], [2.3482046, 2.6066663], [2.6087576, 2.8941021]],
389 | ),
390 | },
391 | {
392 | "input": {
393 | "totals": [100],
394 | "sums": [1007.25317],
395 | "sim_count": 20000,
396 | "seed": 52,
397 | "min_is_best": True,
398 | "interval_alpha": 0.912,
399 | },
400 | "expected_output": ([1], [0], [[8.5325723, 11.9986705]]),
401 | },
402 | {
403 | "input": {
404 | "totals": [],
405 | "sums": [],
406 | "sim_count": 20000,
407 | "seed": 52,
408 | "min_is_best": False,
409 | "interval_alpha": 0.9,
410 | },
411 | "expected_output": ([], [], []),
412 | },
413 | ]
414 |
415 | PBB_DELTA_NORMAL_AGG_INPUTS = [
416 | {
417 | "input": {
418 | "totals": [10000, 1000],
419 | "non_zeros": [1009, 111],
420 | "sums": [7026.30599, 801.53947],
421 | "sums_2": [49993.4988, 5891.6073],
422 | "sim_count": 20000,
423 | "seed": 52,
424 | "min_is_best": False,
425 | "interval_alpha": 0.9,
426 | },
427 | "expected_output": (
428 | [0.08285, 0.91715],
429 | [0.1045921, 0.0026141],
430 | [[0.6683901, 0.7384471], [0.6897179, 0.9275315]],
431 | ),
432 | },
433 | {
434 | "input": {
435 | "totals": [10, 20, 30, 40],
436 | "non_zeros": [0, 0, 0, 0],
437 | "sums": [0, 0, 0, 0],
438 | "sums_2": [0, 0, 0, 0],
439 | "sim_count": 10000,
440 | "seed": 52,
441 | "min_is_best": False,
442 | "interval_alpha": 0.9,
443 | },
444 | "expected_output": (
445 | [0.25, 0.25, 0.25, 0.25],
446 | [np.nan, np.nan, np.nan, np.nan],
447 | [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]],
448 | ),
449 | },
450 | {
451 | "input": {
452 | "totals": [100],
453 | "non_zeros": [10],
454 | "sums": [0],
455 | "sums_2": [0],
456 | "sim_count": 10000,
457 | "seed": 52,
458 | "min_is_best": False,
459 | "interval_alpha": 0.9,
460 | },
461 | "expected_output": ([1], [0], [[-0.0017847, 0.0020072]]),
462 | },
463 | {
464 | "input": {
465 | "totals": [],
466 | "non_zeros": [],
467 | "sums": [],
468 | "sums_2": [],
469 | "sim_count": 10000,
470 | "seed": 52,
471 | "min_is_best": False,
472 | "interval_alpha": 0.9,
473 | },
474 | "expected_output": ([], [], []),
475 | },
476 | ]
477 |
478 |
479 | @pytest.mark.parametrize("inp", PBB_BERNOULLI_AGG_INPUTS)
480 | def test_eval_bernoulli_agg(inp):
481 | i = inp["input"]
482 | res = eval_bernoulli_agg(
483 | i["totals"],
484 | i["successes"],
485 | sim_count=i["sim_count"],
486 | seed=i["seed"],
487 | min_is_best=i["min_is_best"],
488 | interval_alpha=i["interval_alpha"],
489 | )
490 | assert res == inp["expected_output"]
491 |
492 |
493 | @pytest.mark.parametrize("inp", PBB_NORMAL_AGG_INPUTS)
494 | def test_eval_normal_agg(inp):
495 | i = inp["input"]
496 | res = eval_normal_agg(
497 | i["totals"],
498 | i["sums"],
499 | i["sums_2"],
500 | sim_count=i["sim_count"],
501 | seed=i["seed"],
502 | interval_alpha=i["interval_alpha"],
503 | )
504 | assert res == inp["expected_output"]
505 |
506 |
507 | def test_eval_normal_agg_different_runs():
508 | # two different runs of same input without seed should be different
509 | run1 = eval_normal_agg([100, 100], [10, 10], [20, 20])
510 | run2 = eval_normal_agg([100, 100], [10, 10], [20, 20])
511 | assert run1 != run2
512 |
513 |
514 | @pytest.mark.parametrize("inp", PBB_DELTA_LOGNORMAL_AGG_INPUTS)
515 | def test_eval_delta_lognormal_agg(inp):
516 | i = inp["input"]
517 | res = eval_delta_lognormal_agg(
518 | i["totals"],
519 | i["successes"],
520 | i["sum_logs"],
521 | i["sum_logs_2"],
522 | sim_count=i["sim_count"],
523 | seed=i["seed"],
524 | interval_alpha=i["interval_alpha"],
525 | )
526 | assert res == inp["expected_output"]
527 |
528 |
529 | def test_eval_delta_lognormal_agg_different_runs():
530 | # two different runs of same input without seed should be different
531 | run1 = eval_delta_lognormal_agg([1000, 1000], [100, 100], [10, 10], [20, 20], sim_count=100000)
532 | run2 = eval_delta_lognormal_agg([1000, 1000], [100, 100], [10, 10], [20, 20], sim_count=100000)
533 | assert run1 != run2
534 |
535 |
536 | @pytest.mark.parametrize("inp", PBB_NUMERICAL_DIRICHLET_AGG_INPUTS)
537 | def test_eval_numerical_dirichlet_agg(inp):
538 | i = inp["input"]
539 | res = eval_numerical_dirichlet_agg(
540 | i["states"], i["concentrations"], sim_count=i["sim_count"], seed=i["seed"]
541 | )
542 | assert res == inp["expected_output"]
543 |
544 |
545 | def test_eval_numerical_dirichlet_agg_different_runs():
546 | # two different runs of same input without seed should be different
547 | run1 = eval_numerical_dirichlet_agg([1, 20], [[10, 10], [20, 20]])
548 | run2 = eval_numerical_dirichlet_agg([1, 20], [[10, 10], [20, 20]])
549 | assert run1 != run2
550 |
551 |
552 | @pytest.mark.parametrize("inp", PBB_POISSON_AGG_INPUTS)
553 | def test_eval_poisson_agg(inp):
554 | i = inp["input"]
555 | res = eval_poisson_agg(
556 | i["totals"],
557 | i["sums"],
558 | sim_count=i["sim_count"],
559 | seed=i["seed"],
560 | min_is_best=i["min_is_best"],
561 | interval_alpha=i["interval_alpha"],
562 | )
563 | assert res == inp["expected_output"]
564 |
565 |
566 | @pytest.mark.parametrize("inp", PBB_EXPONENTIAL_AGG_INPUTS)
567 | def test_eval_exponential_agg(inp):
568 | i = inp["input"]
569 | res = eval_exponential_agg(
570 | i["totals"],
571 | i["sums"],
572 | sim_count=i["sim_count"],
573 | seed=i["seed"],
574 | min_is_best=i["min_is_best"],
575 | interval_alpha=i["interval_alpha"],
576 | )
577 | assert res == inp["expected_output"]
578 |
579 |
580 | @pytest.mark.parametrize("inp", PBB_DELTA_NORMAL_AGG_INPUTS)
581 | def test_eval_delta_normal_agg(inp):
582 | i = inp["input"]
583 | res = eval_delta_normal_agg(
584 | i["totals"],
585 | i["non_zeros"],
586 | i["sums"],
587 | i["sums_2"],
588 | sim_count=i["sim_count"],
589 | seed=i["seed"],
590 | min_is_best=i["min_is_best"],
591 | interval_alpha=i["interval_alpha"],
592 | )
593 | assert res == inp["expected_output"]
594 |
--------------------------------------------------------------------------------
/tests/test_exponential.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bayesian_testing.experiments import ExponentialDataTest
4 |
5 |
6 | @pytest.fixture
7 | def exponential_test():
8 | expo = ExponentialDataTest()
9 | expo.add_variant_data(
10 | "A",
11 | [
12 | 3.27,
13 | 5.62,
14 | 0.31,
15 | 3.9,
16 | 2.4,
17 | 10.49,
18 | 0.63,
19 | 2.71,
20 | 1.64,
21 | 0.43,
22 | 0.22,
23 | 0.3,
24 | 1.99,
25 | 0.69,
26 | 5.15,
27 | 1.31,
28 | 1.01,
29 | 1.26,
30 | 0.2,
31 | 1.6,
32 | ],
33 | )
34 | expo.add_variant_data(
35 | "B",
36 | [
37 | 0.28,
38 | 0.18,
39 | 0.13,
40 | 4.79,
41 | 1.07,
42 | 0.69,
43 | 5.75,
44 | 2.07,
45 | 9.67,
46 | 2.79,
47 | 0.18,
48 | 5.8,
49 | 12.81,
50 | 2.33,
51 | 2.28,
52 | 1.56,
53 | 4.18,
54 | 1.47,
55 | 1.67,
56 | 0.98,
57 | ],
58 | )
59 | expo.add_variant_data_agg("C", 20, 72.27, a_prior=1, b_prior=2)
60 | expo.add_variant_data_agg("D", 100, 200)
61 | expo.add_variant_data_agg("D", 100, 220, replace=False)
62 | expo.add_variant_data_agg("D", 10, 20, replace=True)
63 | expo.delete_variant("D")
64 | return expo
65 |
66 |
67 | def test_variants(exponential_test):
68 | assert exponential_test.variant_names == ["A", "B", "C"]
69 |
70 |
71 | def test_totals(exponential_test):
72 | assert exponential_test.totals == [20, 20, 20]
73 |
74 |
75 | def test_positives(exponential_test):
76 | assert exponential_test.sum_values == [45.13, 60.68, 72.27]
77 |
78 |
79 | def test_a_priors(exponential_test):
80 | assert exponential_test.a_priors == [0.1, 0.1, 1]
81 |
82 |
83 | def test_b_priors(exponential_test):
84 | assert exponential_test.b_priors == [0.1, 0.1, 2]
85 |
86 |
87 | def test_probabs_of_being_best(exponential_test):
88 | pbbs = exponential_test.probabs_of_being_best(sim_count=20000, seed=52)
89 | assert pbbs == {"A": 0.0414, "B": 0.29885, "C": 0.65975}
90 |
91 |
92 | def test_expected_loss(exponential_test):
93 | loss = exponential_test.expected_loss(sim_count=20000, seed=52)
94 | assert loss == {"A": 1.5907038, "B": 0.7596064, "C": 0.2414208}
95 |
96 |
97 | def test_credible_intervals_95(exponential_test):
98 | ci = exponential_test.credible_intervals(sim_count=20000, seed=52)
99 | assert ci == {
100 | "A": [1.5151401, 3.6571069],
101 | "B": [2.0455239, 4.9692854],
102 | "C": [2.4059958, 5.6846722],
103 | }
104 |
105 |
106 | def test_credible_intervals_99(exponential_test):
107 | ci = exponential_test.credible_intervals(sim_count=20000, seed=52, interval_alpha=0.99)
108 | assert ci == {
109 | "A": [1.3525642, 4.3405547],
110 | "B": [1.8175504, 5.9001709],
111 | "C": [2.124576, 6.7291228],
112 | }
113 |
114 |
115 | def test_evaluate(exponential_test):
116 | eval_report = exponential_test.evaluate(sim_count=20000, seed=52)
117 | assert eval_report == [
118 | {
119 | "variant": "A",
120 | "totals": 20,
121 | "sum_values": 45.13,
122 | "observed_average": 2.2565,
123 | "posterior_mean": 2.25025,
124 | "credible_interval": [1.5151401, 3.6571069],
125 | "prob_being_best": 0.0414,
126 | "expected_loss": 1.5907038,
127 | },
128 | {
129 | "variant": "B",
130 | "totals": 20,
131 | "sum_values": 60.68,
132 | "observed_average": 3.034,
133 | "posterior_mean": 3.02388,
134 | "credible_interval": [2.0455239, 4.9692854],
135 | "prob_being_best": 0.29885,
136 | "expected_loss": 0.7596064,
137 | },
138 | {
139 | "variant": "C",
140 | "totals": 20,
141 | "sum_values": 72.27,
142 | "observed_average": 3.6135,
143 | "posterior_mean": 3.53667,
144 | "credible_interval": [2.4059958, 5.6846722],
145 | "prob_being_best": 0.65975,
146 | "expected_loss": 0.2414208,
147 | },
148 | ]
149 |
150 |
151 | def test_wrong_inputs():
152 | exp_test = ExponentialDataTest()
153 | with pytest.raises(ValueError):
154 | exp_test.add_variant_data(10, [1, 2, 3])
155 | with pytest.raises(ValueError):
156 | exp_test.add_variant_data("A", [1, 2, 3], a_prior=-1)
157 | with pytest.raises(ValueError):
158 | exp_test.add_variant_data_agg("A", -1, 7)
159 | with pytest.raises(ValueError):
160 | exp_test.add_variant_data_agg("A", 1, -7)
161 | with pytest.raises(ValueError):
162 | exp_test.add_variant_data("A", [])
163 | with pytest.raises(ValueError):
164 | exp_test.add_variant_data("A", [1, 2, -3])
165 |
--------------------------------------------------------------------------------
/tests/test_normal.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bayesian_testing.experiments import NormalDataTest
4 |
5 |
6 | @pytest.fixture
7 | def norm_test():
8 | norm = NormalDataTest()
9 | norm.add_variant_data(
10 | "A",
11 | [
12 | 11.8,
13 | 12.2,
14 | 12.4,
15 | 9.5,
16 | 2.2,
17 | 3.3,
18 | 16.2,
19 | 4.9,
20 | 12.4,
21 | 6.8,
22 | 8.7,
23 | 9.8,
24 | 5.4,
25 | 9.0,
26 | 15.0,
27 | 12.3,
28 | 9.6,
29 | 12.5,
30 | 9.1,
31 | 10.2,
32 | ],
33 | m_prior=9,
34 | )
35 | norm.add_variant_data(
36 | "B",
37 | [
38 | 10.6,
39 | 5.1,
40 | 9.4,
41 | 11.2,
42 | 2.0,
43 | 13.4,
44 | 14.1,
45 | 15.4,
46 | 16.3,
47 | 11.7,
48 | 7.3,
49 | 6.8,
50 | 8.2,
51 | 16.2,
52 | 10.8,
53 | 7.1,
54 | 12.2,
55 | 11.2,
56 | ],
57 | w_prior=0.03,
58 | )
59 | norm.add_variant_data(
60 | "C",
61 | [
62 | 25.3,
63 | 10.3,
64 | 24.7,
65 | -8.1,
66 | 8.4,
67 | 10.3,
68 | 14.8,
69 | 13.4,
70 | 11.5,
71 | -4.7,
72 | 5.3,
73 | 7.4,
74 | 17.2,
75 | 15.4,
76 | 13.0,
77 | 12.9,
78 | 19.2,
79 | 11.6,
80 | 0.4,
81 | 5.7,
82 | 23.5,
83 | 15.2,
84 | ],
85 | b_prior_ig=2,
86 | )
87 | norm.add_variant_data_agg("A", 20, 193.3, 2127.71, replace=False)
88 | norm.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22])
89 | norm.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22], replace=False)
90 | norm.add_variant_data("D", [0, 10.7, 0, 8, 0, 0, 0, 0, 0, 11.22], replace=True)
91 | norm.delete_variant("D")
92 | return norm
93 |
94 |
95 | def test_variants(norm_test):
96 | assert norm_test.variant_names == ["A", "B", "C"]
97 |
98 |
99 | def test_totals(norm_test):
100 | assert norm_test.totals == [40, 18, 22]
101 |
102 |
103 | def test_sum_values(norm_test):
104 | assert norm_test.sum_values == [386.6, 188.99999999999997, 252.69999999999996]
105 |
106 |
107 | def test_sum_values_2(norm_test):
108 | assert norm_test.sum_values_2 == [4255.42, 2244.8200000000006, 4421.87]
109 |
110 |
111 | def test_m_priors(norm_test):
112 | assert norm_test.m_priors == [9, 1, 1]
113 |
114 |
115 | def test_a_priors_ig(norm_test):
116 | assert norm_test.a_priors_ig == [0, 0, 0]
117 |
118 |
119 | def test_b_priors_ig(norm_test):
120 | assert norm_test.b_priors_ig == [0, 0, 2]
121 |
122 |
123 | def test_w_priors(norm_test):
124 | assert norm_test.w_priors == [0.01, 0.03, 0.01]
125 |
126 |
127 | def test_probabs_of_being_best(norm_test):
128 | pbbs = norm_test.probabs_of_being_best(sim_count=20000, seed=52)
129 | assert pbbs == {"A": 0.05105, "B": 0.27935, "C": 0.6696}
130 |
131 |
132 | def test_expected_loss(norm_test):
133 | loss = norm_test.expected_loss(sim_count=20000, seed=52)
134 | assert loss == {"A": 2.2696341, "B": 1.4580033, "C": 0.4464154}
135 |
136 |
137 | def test_credible_intervals_95(norm_test):
138 | ci = norm_test.credible_intervals(sim_count=20000, seed=52)
139 | assert ci == {
140 | "A": [8.5300072, 10.8231841],
141 | "B": [8.5577171, 12.3448628],
142 | "C": [7.8915125, 15.1179586],
143 | }
144 |
145 |
146 | def test_credible_intervals_99(norm_test):
147 | ci = norm_test.credible_intervals(sim_count=20000, seed=52, interval_alpha=0.99)
148 | assert ci == {
149 | "A": [8.1196181, 11.2023581],
150 | "B": [7.8792145, 13.0964176],
151 | "C": [6.5669908, 16.5226358],
152 | }
153 |
154 |
155 | def test_evaluate(norm_test):
156 | eval_report = norm_test.evaluate(sim_count=20000, seed=52)
157 | assert eval_report == [
158 | {
159 | "variant": "A",
160 | "totals": 40,
161 | "sum_values": 386.6,
162 | "avg_values": 9.665,
163 | "posterior_mean": 9.66483,
164 | "credible_interval": [8.5300072, 10.8231841],
165 | "prob_being_best": 0.05105,
166 | "expected_loss": 2.2696341,
167 | },
168 | {
169 | "variant": "B",
170 | "totals": 18,
171 | "sum_values": 189.0,
172 | "avg_values": 10.5,
173 | "posterior_mean": 10.48419,
174 | "credible_interval": [8.5577171, 12.3448628],
175 | "prob_being_best": 0.27935,
176 | "expected_loss": 1.4580033,
177 | },
178 | {
179 | "variant": "C",
180 | "totals": 22,
181 | "sum_values": 252.7,
182 | "avg_values": 11.48636,
183 | "posterior_mean": 11.4816,
184 | "credible_interval": [7.8915125, 15.1179586],
185 | "prob_being_best": 0.6696,
186 | "expected_loss": 0.4464154,
187 | },
188 | ]
189 |
--------------------------------------------------------------------------------
/tests/test_poisson.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bayesian_testing.experiments import PoissonDataTest
4 |
5 |
6 | @pytest.fixture
7 | def poisson_test():
8 | pois = PoissonDataTest()
9 | pois.add_variant_data("A", [5, 5, 7, 1, 3, 3, 1, 1, 2, 0, 1, 3, 4, 2, 5])
10 | pois.add_variant_data("B", [2, 4, 3, 4, 6, 1, 3, 6, 4, 0, 3, 1, 2, 1])
11 | pois.add_variant_data_agg("C", 15, 49, a_prior=1, b_prior=2)
12 | pois.add_variant_data_agg("D", 10, 10)
13 | pois.add_variant_data_agg("D", 20, 20, replace=False)
14 | pois.add_variant_data_agg("D", 20, 20, replace=True)
15 | pois.delete_variant("D")
16 | return pois
17 |
18 |
19 | def test_variants(poisson_test):
20 | assert poisson_test.variant_names == ["A", "B", "C"]
21 |
22 |
23 | def test_totals(poisson_test):
24 | assert poisson_test.totals == [15, 14, 15]
25 |
26 |
27 | def test_positives(poisson_test):
28 | assert poisson_test.sum_values == [43, 40, 49]
29 |
30 |
31 | def test_a_priors(poisson_test):
32 | assert poisson_test.a_priors == [0.1, 0.1, 1]
33 |
34 |
35 | def test_b_priors(poisson_test):
36 | assert poisson_test.b_priors == [0.1, 0.1, 2]
37 |
38 |
39 | def test_probabs_of_being_best(poisson_test):
40 | pbbs = poisson_test.probabs_of_being_best(sim_count=20000, seed=52)
41 | assert pbbs == {"A": 0.30945, "B": 0.29665, "C": 0.3939}
42 |
43 |
44 | def test_expected_loss(poisson_test):
45 | loss = poisson_test.expected_loss(sim_count=20000, seed=52)
46 | assert loss == {"A": 0.3936672, "B": 0.4144949, "C": 0.3109256}
47 |
48 |
49 | def test_credible_intervals_95(poisson_test):
50 | ci = poisson_test.credible_intervals(sim_count=20000, seed=52)
51 | assert ci == {
52 | "A": [2.0742056, 3.7731115],
53 | "B": [2.0264899, 3.7822918],
54 | "C": [2.1895805, 3.8084984],
55 | }
56 |
57 |
58 | def test_credible_intervals_99(poisson_test):
59 | ci = poisson_test.credible_intervals(sim_count=20000, seed=52, interval_alpha=0.99)
60 | assert ci == {
61 | "A": [1.8569798, 4.0897961],
62 | "B": [1.8082962, 4.1242607],
63 | "C": [1.9771075, 4.1434489],
64 | }
65 |
66 |
67 | def test_evaluate(poisson_test):
68 | eval_report = poisson_test.evaluate(sim_count=20000, seed=52)
69 | assert eval_report == [
70 | {
71 | "variant": "A",
72 | "totals": 15,
73 | "sum_values": 43,
74 | "observed_average": 2.86667,
75 | "posterior_mean": 2.8543,
76 | "credible_interval": [2.0742056, 3.7731115],
77 | "prob_being_best": 0.30945,
78 | "expected_loss": 0.3936672,
79 | },
80 | {
81 | "variant": "B",
82 | "totals": 14,
83 | "sum_values": 40,
84 | "observed_average": 2.85714,
85 | "posterior_mean": 2.84397,
86 | "credible_interval": [2.0264899, 3.7822918],
87 | "prob_being_best": 0.29665,
88 | "expected_loss": 0.4144949,
89 | },
90 | {
91 | "variant": "C",
92 | "totals": 15,
93 | "sum_values": 49,
94 | "observed_average": 3.26667,
95 | "posterior_mean": 2.94118,
96 | "credible_interval": [2.1895805, 3.8084984],
97 | "prob_being_best": 0.3939,
98 | "expected_loss": 0.3109256,
99 | },
100 | ]
101 |
102 |
103 | def test_wrong_inputs():
104 | pois_test = PoissonDataTest()
105 | with pytest.raises(ValueError):
106 | pois_test.add_variant_data(10, [1, 2, 3])
107 | with pytest.raises(ValueError):
108 | pois_test.add_variant_data("A", [1, 2, 3], a_prior=-1)
109 | with pytest.raises(ValueError):
110 | pois_test.add_variant_data_agg("A", -1, 7)
111 | with pytest.raises(ValueError):
112 | pois_test.add_variant_data_agg("A", 1, -7)
113 | with pytest.raises(ValueError):
114 | pois_test.add_variant_data("A", [])
115 | with pytest.raises(ValueError):
116 | pois_test.add_variant_data("A", [1, 2, -3])
117 |
--------------------------------------------------------------------------------
/tests/test_posteriors.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pytest
3 |
4 | from bayesian_testing.metrics.posteriors import (
5 | beta_posteriors_all,
6 | lognormal_posteriors,
7 | dirichlet_posteriors,
8 | pois_gamma_posteriors_all,
9 | exp_gamma_posteriors_all,
10 | )
11 |
12 | BETA_POSTERIORS_ALL_INPUTS = [
13 | {
14 | "totals": [10, 20, 30],
15 | "successes": [8, 16, 24],
16 | "sim_count": 10,
17 | "a_priors_beta": [0.5, 0.5, 0.5],
18 | "b_priors_beta": [0.5, 0.5, 0.5],
19 | },
20 | {
21 | "totals": [20, 30],
22 | "successes": [16, 24],
23 | "sim_count": 20,
24 | "a_priors_beta": [0.5, 0.5],
25 | "b_priors_beta": [0.5, 0.5],
26 | },
27 | ]
28 |
29 | LOGNORMAL_POSTERIORS_INPUTS = [
30 | {
31 | "totals": 1580,
32 | "sum_logs": 3831.806394737816,
33 | "sum_logs_2": 11029.923165846496,
34 | "sim_count": 10000,
35 | },
36 | {
37 | "totals": 1580,
38 | "sum_logs": 4055.965234848171,
39 | "sum_logs_2": 12357.911862914,
40 | "sim_count": 100,
41 | },
42 | {
43 | "totals": 0,
44 | "sum_logs": 0,
45 | "sum_logs_2": 0,
46 | "sim_count": 100,
47 | },
48 | ]
49 |
50 | DIRICHLET_POSTERIORS_INPUTS = [
51 | {
52 | "concentration": [1, 2, 3],
53 | "prior": [1, 1, 1],
54 | "sim_count": 10000,
55 | },
56 | {
57 | "concentration": [100, 200],
58 | "prior": [1 / 2, 1 / 2],
59 | "sim_count": 100,
60 | },
61 | ]
62 |
63 | GAMMA_POSTERIORS_ALL_INPUTS = [
64 | {
65 | "totals": [10, 20, 30],
66 | "sums": [80, 161, 260],
67 | "sim_count": 10,
68 | "a_priors_gamma": [0.5, 0.5, 0.5],
69 | "b_priors_gamma": [0.5, 0.5, 0.5],
70 | },
71 | {
72 | "totals": [20, 30],
73 | "sums": [160, 240],
74 | "sim_count": 20,
75 | "a_priors_gamma": [0.5, 0.5],
76 | "b_priors_gamma": [0.5, 0.5],
77 | },
78 | ]
79 |
80 |
81 | @pytest.mark.parametrize("inp", BETA_POSTERIORS_ALL_INPUTS)
82 | def test_beta_posteriors_all(inp):
83 | all_pos = beta_posteriors_all(
84 | inp["totals"],
85 | inp["successes"],
86 | inp["sim_count"],
87 | inp["a_priors_beta"],
88 | inp["b_priors_beta"],
89 | )
90 | all_pos_shape = np.array(all_pos).shape
91 | assert all_pos_shape == (len(inp["totals"]), inp["sim_count"])
92 |
93 |
94 | @pytest.mark.parametrize("inp", LOGNORMAL_POSTERIORS_INPUTS)
95 | def test_lognormal_posteriors(inp):
96 | all_pos = lognormal_posteriors(
97 | inp["totals"],
98 | inp["sum_logs"],
99 | inp["sum_logs_2"],
100 | inp["sim_count"],
101 | )
102 | assert len(all_pos) == inp["sim_count"]
103 |
104 |
105 | @pytest.mark.parametrize("inp", DIRICHLET_POSTERIORS_INPUTS)
106 | def test_dirichlet_posteriors(inp):
107 | all_pos = dirichlet_posteriors(
108 | inp["concentration"],
109 | inp["prior"],
110 | inp["sim_count"],
111 | )
112 | assert all_pos.shape == (inp["sim_count"], len(inp["concentration"]))
113 |
114 |
115 | @pytest.mark.parametrize("inp", GAMMA_POSTERIORS_ALL_INPUTS)
116 | def test_pois_gamma_posteriors_all(inp):
117 | all_pos = pois_gamma_posteriors_all(
118 | inp["totals"],
119 | inp["sums"],
120 | inp["sim_count"],
121 | inp["a_priors_gamma"],
122 | inp["b_priors_gamma"],
123 | )
124 | all_pos_shape = np.array(all_pos).shape
125 | assert all_pos_shape == (len(inp["totals"]), inp["sim_count"])
126 |
127 |
128 | @pytest.mark.parametrize("inp", GAMMA_POSTERIORS_ALL_INPUTS)
129 | def test_exp_gamma_posteriors_all(inp):
130 | all_pos = exp_gamma_posteriors_all(
131 | inp["totals"],
132 | inp["sums"],
133 | inp["sim_count"],
134 | inp["a_priors_gamma"],
135 | inp["b_priors_gamma"],
136 | )
137 | all_pos_shape = np.array(all_pos).shape
138 | assert all_pos_shape == (len(inp["totals"]), inp["sim_count"])
139 |
--------------------------------------------------------------------------------
/tests/test_validators.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from bayesian_testing.metrics.evaluation import validate_bernoulli_input
4 | from bayesian_testing.utilities.common import check_list_lengths
5 |
6 |
7 | def test_validate_bernoulli_input():
8 | validate_bernoulli_input([1, 2, 3], [1, 1, 1])
9 | validate_bernoulli_input([1, 2], [1, 1])
10 | validate_bernoulli_input([1], [1])
11 |
12 |
13 | def test_validate_bernoulli_input_error():
14 | with pytest.raises(ValueError):
15 | validate_bernoulli_input([1, 2], [1])
16 |
17 |
18 | def test_check_list_lengths():
19 | check_list_lengths([[1, 2, 3], [1, 1, 1], [2, 2, 2], [7, 7, 7]])
20 | check_list_lengths([[], [], []])
21 |
22 |
23 | def test_check_list_lengths_error():
24 | with pytest.raises(ValueError):
25 | check_list_lengths([[1, 2, 3], [1, 1, 1], [2, 2, 2], [7, 7]])
26 |
--------------------------------------------------------------------------------