├── abyes
    ├── test
    │   ├── __init__.py
    │   └── test_abExp.py
    ├── examples
    │   ├── __init__.py
    │   ├── example.png
    │   └── examples.py
    ├── __init__.py
    ├── utils.py
    └── ab_exp.py
├── .gitignore
├── MANIFEST.in
├── Makefile
├── requirements.txt
├── setup.py
├── README.rst
└── LICENSE


/abyes/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/abyes/examples/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/abyes/__init__.py:
--------------------------------------------------------------------------------
1 | from .ab_exp import AbExp
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | abyes.egg-info
3 | .idea
4 | __pycache__/
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst
2 | include requirements.txt
3 | include Makefile


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | init:
2 | 	pip install -r requirements.txt
3 | 
4 | test:
5 | 	nosetests
6 | 


--------------------------------------------------------------------------------
/abyes/examples/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cbellei/abyes/HEAD/abyes/examples/example.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.4.1
2 | matplotlib-inline==0.1.2
3 | nose==1.3.7
4 | numpy==1.20.2
5 | pymc3==3.11.2
6 | scipy==1.6.3
7 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(name='abyes',
 4 |       version='0.1.0',
 5 |       description='AB Testing using Bayesian statistics',
 6 |       url='https://github.com/cbellei/abyes',
 7 |       author='Claudio Bellei',
 8 |       author_email='',
 9 |       license='OSI Approved Apache Software License',
10 |       packages=['abyes'],
11 |       zip_safe=False,
12 |       test_suite='nose.collector',
13 |       tests_require=['nose']
14 |       )
15 | 


--------------------------------------------------------------------------------
/abyes/utils.py:
--------------------------------------------------------------------------------
 1 | def check_size(x, dim):
 2 |     if not len(x)==dim:
 3 |         raise Exception('The data should be a two-dimensional array')
 4 |     else:
 5 |         return
 6 | 
 7 | 
 8 | def print_result(result):
 9 | 
10 |     msg = ''
11 |     if result == 1:
12 |         print()
13 |         msg = 'Result is conclusive: B variant is winner!'
14 |     elif result == -1:
15 |         print()
16 |         msg = '* Result is conclusive: A variant is winner!'
17 |     elif result == 0:
18 |         print()
19 |         msg = '* Result is conclusive: A and B variants are effectively equivalent!'
20 |     else:
21 |         if(type(result)==list and len(result)==2):
22 |             print_result(result[0])
23 |             print_result(result[1])
24 |         else:
25 |             print()
26 |             msg = 'Result is inconclusive.'
27 | 
28 |     print(msg)
29 |     print()
30 | 
31 | def print_info(info):
32 |     print()
33 |     print('*** abyes ***')
34 |     print()
35 |     print('Method = %s' % info.method)
36 |     print('Decision Rule = %s' % info.rule)
37 |     if info.rule== 'rope':
38 |         print('Alpha = %s' % info.alpha)
39 |         print('Rope = %s' % str(info.rope))
40 |     elif info.rule== 'loss':
41 |         print('Threshold of Caring = %s' % info.toc)
42 |     print('Decision Variable = %s' %info.decision_var)
43 | 
44 | 


--------------------------------------------------------------------------------
/abyes/examples/examples.py:
--------------------------------------------------------------------------------
 1 | import abyes as ab
 2 | import numpy as np
 3 | 
 4 | # --- ANALYTIC METHOD ---
 5 | # warning: only decision_var='lift' currently supported when rule=='loss'
 6 | data = [np.random.binomial(1, 0.5, size=10000), np.random.binomial(1, 0.5, size=10000)]
 7 | # Example 1: analytic method, rope decision rule, lift decision variable
 8 | exp1 = ab.AbExp(alpha=0.95, method='analytic', rule='rope', decision_var='lift', plot=True)
 9 | exp1.experiment(data)
10 | # Example 2: analytic method, rope decision rule, effect size decision variable
11 | exp2 = ab.AbExp(alpha=0.95, method='analytic', rule='rope', decision_var='es', plot=True)
12 | exp2.experiment(data)
13 | # Example 3: analytic method, loss decision rule, lift decision variable
14 | exp3 = ab.AbExp(alpha=0.95, method='analytic', rule='loss', decision_var='lift', plot=True)
15 | exp3.experiment(data)
16 | 
17 | # --- MCMC METHOD ---
18 | # warning: only decision_var='lift' currently supported when rule=='loss'
19 | data = [np.random.binomial(1, 0.5, size=10000), np.random.binomial(1, 0.7, size=10000)]
20 | # Example 1: mcmc method, rope decision rule, lift decision variable
21 | exp1 = ab.AbExp(alpha=0.95, method='mcmc', rule='rope', decision_var='lift', resolution=1000, plot=True)
22 | exp1.experiment(data)
23 | # Example 2: mcmc method, rope decision rule, effect size decision variable
24 | exp2 = ab.AbExp(alpha=0.95, method='mcmc', rule='rope', decision_var='es', resolution=1000, plot=True)
25 | exp2.experiment(data)
26 | # Example 3: mcmc method, loss decision rule, lift decision variable
27 | exp3 = ab.AbExp(alpha=0.95, method='mcmc', rule='loss', decision_var='lift', plot=True, resolution=1000)
28 | exp3.experiment(data)
29 | 
30 | # --- COMPARE ANALYTIC vs. MCMC METHOD ---
31 | # warning: only ROPE method currently supported in "compare" mode
32 | data = [np.random.binomial(1, 0.8, size=2500), np.random.binomial(1, 0.2, size=2500)]
33 | exp1 = ab.AbExp(alpha=0.95, method='compare', rule='rope', decision_var='lift', plot=True, resolution=1000)
34 | exp1.experiment(data)
35 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | .. highlight:: rst
 2 | 
 3 | ^^^^^^^^^^^^
 4 | aByes
 5 | ^^^^^^^^^^^^
 6 | aByes is a Python package for Bayesian A/B Testing, which supports two main decision rules:
 7 | 
 8 | * Region Of Practical Equivalence (as in the paper `Bayesian estimation supersedes the t-test <http://www.indiana.edu/~kruschke/articles/Kruschke2013JEPG.pdf>`__, J. K. Kruschke, Journal of Experimental Psychology, 2012)
 9 | * Expected Loss (as discussed in `Bayesian A/B Testing at VWO <https://cdn2.hubspot.net/hubfs/310840/VWO_SmartStats_technical_whitepaper.pdf>`__, C. Stucchio)
10 | 
11 | A lot of the underlying theory is discussed in `this blog post <http://www.claudiobellei.com/2017/11/02/bayesian-AB-testing/>`__.
12 | 
13 | Installation
14 | ============
15 | * In your target folder, clone the repository with the command::
16 | 
17 |         git clone https://github.com/cbellei/abyes.git
18 | 
19 | * Then, inside the same folder (as always, it is advisable to use a virtual environment)::
20 | 
21 |         pip install .
22 | 
23 | * To check that the package has been installed, in the Python shell type::
24 | 
25 |         import abyes
26 | 
27 | * If everything works correctly, the package will be imported without errors.
28 | 
29 | Dependencies
30 | ============
31 | * aByes is tested on Python 3.5 and depends on NumPy, Scipy, Matplotlib, Pymc3 (see ``requirements.txt`` for version
32 | information).
33 | 
34 | How to use aByes
35 | ================
36 | The main steps to run the analysis of an A/B experiment are:
37 | 
38 | * Aggregate the data for the "A" and "B" variations in a List of numpy arrays
39 | * Decide how to do the analysis. Options are: 1. analytic solution; 2. MCMC solution (using PyMC3); 3. compare the analytic and MCMC solutions
40 | * Set decision rule. Options are: 1. ROPE method; 2. Expected Loss method
41 | * Set parameter to use for the decision. Options are: 1. Lift (difference in means); 2. Effect size
42 | 
43 | These and many more examples and instructions can be found in this blogpost.
44 | 
45 | Example
46 | =======
47 | * In IPython, type::
48 | 
49 |     import abyes as ab
50 |     import numpy as np
51 | 
52 |     data = [np.random.binomial(1, 0.4, size=10000), np.random.binomial(1, 0.5, size=10000)]
53 |     exp = ab.AbExp(method='analytic', decision_var = 'lift', rule='rope', rope=(-0.01,0.01), plot=True)
54 |     exp.experiment(data)
55 | 
56 | * This will plot the posterior distribution:
57 | 
58 |    .. image:: https://raw.githubusercontent.com/cbellei/abyes/master/abyes/examples/example.png
59 | 
60 | * It will then give the following result::
61 | 
62 |     *** abyes ***
63 | 
64 |     Method = analytic
65 |     Decision Rule = rope
66 |     Alpha = 0.95
67 |     Rope = (-0.01, 0.01)
68 |     Decision Variable = lift
69 | 
70 |     Result is conclusive: B variant is winner!
71 | 
72 | * There are many more examples available in the file ``example.py``, which can be run from the root directory with the command::
73 | 
74 |     python abyes/examples/examples.py
75 | 
76 | Limitations
77 | ===========
78 | Currently, aByes:
79 | 
80 | * only focuses on conversion rate experiments
81 | * allows for only two variants at a time to be tested
82 | 
83 | These shortcomings may be improved in future versions of aByes. (Feel free to fork the project and make these improvements yourself!)
84 | 
85 | Licence
86 | =======
87 | `Apache License, Version
88 | 2.0 <https://github.com/cbellei/abyes/blob/master/LICENSE>`__


--------------------------------------------------------------------------------
/abyes/test/test_abExp.py:
--------------------------------------------------------------------------------
  1 | from unittest import TestCase
  2 | from abyes.ab_exp import AbExp
  3 | from scipy.stats import norm
  4 | import numpy as np
  5 | import logging
  6 | import sys
  7 | 
  8 | 
  9 | class TestFunctions(TestCase):
 10 | 
 11 |     def test_hpd(self):
 12 |         """
 13 |         Test that the hpd functions returns the correct interval
 14 |         for a normal standard distribution,
 15 |         when alpha=68.3%, alpha=95.4% and alpha=99.7%
 16 |         :return:
 17 |         """
 18 |         x = np.linspace(-4, 4, 10000)
 19 |         dx = x[1] - x[0]
 20 |         y = norm.pdf(x)
 21 |         bins = np.append(x - 0.5*dx, x[-1] + dx)
 22 |         pdf = (y, bins)
 23 |         posterior = dict({'avar': pdf})
 24 | 
 25 |         one_sigma = 0.682689492137
 26 |         exp1 = AbExp(alpha=one_sigma)
 27 |         hpd1 = exp1.hpd(posterior, 'avar')
 28 |         min1, max1 = [min(hpd1), max(hpd1)]
 29 | 
 30 |         two_sigma = 0.954499736104
 31 |         exp2 = AbExp(alpha=two_sigma)
 32 |         hpd2 = exp2.hpd(posterior, 'avar')
 33 |         min2, max2 = [min(hpd2), max(hpd2)]
 34 | 
 35 |         three_sigma = 0.997300203937
 36 |         exp3 = AbExp(alpha=three_sigma)
 37 |         hpd3 = exp3.hpd(posterior, 'avar')
 38 |         min3, max3 = [min(hpd3), max(hpd3)]
 39 | 
 40 |         self.assertAlmostEqual(min1, -1.0, places=2)
 41 |         self.assertAlmostEqual(max1, 1.0, places=2)
 42 | 
 43 |         self.assertAlmostEqual(min2, -2.0, places=2)
 44 |         self.assertAlmostEqual(max2, 2.0, places=2)
 45 | 
 46 |         self.assertAlmostEqual(min3, -3.0, places=1)
 47 |         self.assertAlmostEqual(max3, 3.0, places=1)
 48 | 
 49 |     def test_rope_decision(self):
 50 |         """
 51 |         Test the function rope_decision
 52 |         :return:
 53 |         """
 54 |         exp = AbExp(rope=(-0.1, 0.1))
 55 | 
 56 |         result1 = exp.rope_decision([-1., -0.11])
 57 |         self.assertEqual(result1, -1.0)
 58 | 
 59 |         result2 = exp.rope_decision([-1., -0.10])
 60 |         self.assertTrue(result2 != result2)  # np.nan
 61 | 
 62 |         result3 = exp.rope_decision([-0.1, 0.1])
 63 |         self.assertEqual(result3, 0.0)
 64 | 
 65 |         result4 = exp.rope_decision([0.1, 0.2])
 66 |         self.assertTrue(result4 != result4)  # np.nan
 67 | 
 68 |         result5 = exp.rope_decision([0.11, 0.2])
 69 |         self.assertEqual(result5, 1.0)
 70 | 
 71 |     def test_expected_loss_decision(self):
 72 |         """
 73 |         Test expected_loss_decision function using normal standard distribution.
 74 |         Should get expected loss = 0.5 * sqrt(2/pi), where sqrt(2/pi) is the expected value of a half-gaussian
 75 |         :return:
 76 |         """
 77 | 
 78 |         x = np.linspace(-4, 4, 10000)
 79 |         dx = x[1] - x[0]
 80 |         y = norm.pdf(x)
 81 |         bins = np.append(x - 0.5 * dx, x[-1] + dx)
 82 |         pdf = (y, bins)
 83 |         posterior = dict({'avar': pdf})
 84 | 
 85 |         exp = AbExp(toc=0.01)
 86 |         result1 = exp.expected_loss_decision(posterior, 'avar')
 87 |         self.assertTrue(result1 != result1)  # np.nan
 88 | 
 89 |         exp = AbExp(toc=0.8)
 90 |         result2 = exp.expected_loss_decision(posterior, 'avar')
 91 |         self.assertEqual(result2, 0.0)
 92 | 
 93 |     def test_analytic_method_it(self):
 94 |         """
 95 |         Integration test. Verify that the analytic method of solution works,
 96 |         with scenarios that effectively at 100% give rise to conclusive results.
 97 |         """
 98 |         ns = 100000
 99 | 
100 |         pa = 0.8
101 |         pb = 0.2
102 |         data = [np.random.binomial(1, pa, size=ns), np.random.binomial(1, pb, size=ns)]
103 | 
104 |         exp1 = AbExp(alpha=0.95, method='analytic', rule='rope', decision_var='es')
105 |         posterior = exp1.posterior_analytic(data)
106 |         hpd1 = exp1.hpd(posterior, 'es')
107 |         result1 = exp1.rope_decision(hpd1)
108 | 
109 |         exp2 = AbExp(alpha=0.95, method='analytic', rule='loss', decision_var='lift')
110 |         posterior = exp2.posterior_analytic(data)
111 |         result2 = exp2.expected_loss_decision(posterior, 'lift')
112 | 
113 |         pa = 0.2
114 |         pb = 0.8
115 |         data = [np.random.binomial(1, pa, size=ns), np.random.binomial(1, pb, size=ns)]
116 | 
117 |         exp3 = AbExp(alpha=0.95, method='analytic', rule='rope', decision_var='es')
118 |         posterior = exp3.posterior_analytic(data)
119 |         hpd3 = exp3.hpd(posterior, 'es')
120 |         result3 = exp3.rope_decision(hpd3)
121 | 
122 |         exp4 = AbExp(alpha=0.95, method='analytic', rule='loss', decision_var='lift')
123 |         posterior = exp4.posterior_analytic(data)
124 |         result4 = exp4.expected_loss_decision(posterior, 'lift')
125 | 
126 |         self.assertEqual(result1, -1.0)
127 |         self.assertEqual(result2, -1.0)
128 |         self.assertEqual(result3,  1.0)
129 |         self.assertEqual(result4,  1.0)
130 | 
131 |     def test_mcmc_method_it(self):
132 |         """
133 |         Integration test. Verify that the mcmc method of solution works,
134 |         with scenarios that effectively at 100% give rise to conclusive results.
135 |         """
136 |         ns = 10000
137 | 
138 |         pa = 0.8
139 |         pb = 0.2
140 |         data = [np.random.binomial(1, pa, size=ns), np.random.binomial(1, pb, size=ns)]
141 | 
142 |         exp1 = AbExp(alpha=0.95, method='mcmc', rule='rope', decision_var='es', iterations=2500)
143 |         posterior = exp1.posterior_mcmc(data)
144 |         hpd1 = exp1.hpd(posterior, 'es')
145 |         result1 = exp1.rope_decision(hpd1)
146 | 
147 |         exp2 = AbExp(alpha=0.95, method='mcmc', rule='loss', decision_var='lift', iterations=2500)
148 |         posterior = exp2.posterior_mcmc(data)
149 |         result2 = exp2.expected_loss_decision(posterior, 'lift')
150 | 
151 |         pa = 0.2
152 |         pb = 0.8
153 |         data = [np.random.binomial(1, pa, size=ns), np.random.binomial(1, pb, size=ns)]
154 | 
155 |         exp3 = AbExp(alpha=0.95, method='mcmc', rule='rope', decision_var='es', iterations=2500)
156 |         posterior = exp1.posterior_mcmc(data)
157 |         hpd3 = exp3.hpd(posterior, 'es')
158 |         result3 = exp3.rope_decision(hpd3)
159 | 
160 |         exp4 = AbExp(alpha=0.95, method='mcmc', rule='loss', decision_var='lift', iterations=2500)
161 |         posterior = exp4.posterior_mcmc(data)
162 |         result4 = exp4.expected_loss_decision(posterior, 'lift')
163 | 
164 |         self.assertEqual(result1, -1.0)
165 |         self.assertEqual(result2, -1.0)
166 |         self.assertEqual(result3,  1.0)
167 |         self.assertEqual(result4,  1.0)
168 | 
169 | 
170 | if __name__ == "__main__":
171 |     logging.basicConfig(stream=sys.stderr)
172 |     logging.getLogger("TestFunctions.test_hpd").setLevel(logging.DEBUG)
173 |     unittest.main()
174 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/abyes/ab_exp.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.stats import beta
  3 | from .utils import check_size, print_result, print_info
  4 | import matplotlib.pyplot as plt
  5 | import warnings
  6 | import pymc3 as pm
  7 | 
  8 | 
  9 | class AbExp:
 10 |     """
 11 |     Define a Bayesian A/B Test on conversion rate experimental data.
 12 |     Parameters
 13 |     ----------
 14 |     method : `str`
 15 |         choose method for analysis (options: 'analytic', 'mcmc', 'compare')
 16 |         [default: 'analytic']
 17 |     rule : `str`
 18 |         choose decision rule (options: 'rope', 'loss')
 19 |         [default: 'rope']
 20 |     alpha : `float`
 21 |         alpha parameter for rope calculation [default: 0.95]
 22 |     alpha_prior : `float`
 23 |         alpha parameter for the prior (beta distribution)
 24 |         [default: 1]
 25 |     beta_prior : `float`
 26 |         beta parameter for the prior (beta distribution)
 27 |         [default: 1]
 28 |     rope : `tuple(float, float)`
 29 |         define region of practical equivalence
 30 |         [default: (-0.1, 0.1)]
 31 |     toc : `float`
 32 |         define threshold of caring
 33 |         [default: 0.01]
 34 |     verbose : `bool`
 35 |         whether or not to print the test results
 36 |         [default: True]
 37 |     """
 38 |     def __init__(self, method='analytic', rule='rope',
 39 |                  alpha=0.95, alpha_prior=1, beta_prior=1,
 40 |                  resolution=500, rope=(-0.1, 0.1), toc=1.e-2,
 41 |                  iterations=5000, plot=False, decision_var='es',
 42 |                  verbose=True):
 43 |         self.method = method
 44 |         self.rule = rule
 45 |         self.alpha = alpha
 46 |         self.alpha_prior = alpha_prior
 47 |         self.beta_prior = beta_prior
 48 |         self.resolution = resolution
 49 |         self.rope = rope
 50 |         self.toc = toc
 51 |         self.iterations = iterations
 52 |         self.plot = plot
 53 |         self.decision_var = decision_var
 54 |         self.verbose = verbose
 55 | 
 56 |         if method == 'compare' and not rule == 'rope':
 57 |             warnings.warn('For "compare" method, only ROPE decision rule is currently supported. Setting rule to ROPE.')
 58 |             self.rule = 'rope'
 59 | 
 60 |         if rule == 'loss' and decision_var == 'es':
 61 |             warnings.warn('For "loss" decision rule, only "lift" decision variable is currently supported. Setting decision_var to "lift".')
 62 |             self.decision_var = 'lift'
 63 | 
 64 |     def experiment(self, data):
 65 |         """
 66 |         Run experiment with data provided
 67 |         Parameters
 68 |         ----------
 69 |         data : `List(np.array, np.array)`
 70 |         """
 71 |         check_size(data, dim=2)
 72 | 
 73 |         posterior = self.find_posterior(data)
 74 | 
 75 |         decision = self.decision(posterior)
 76 | 
 77 |         if plt.plot:
 78 |             plt.show()
 79 | 
 80 |         return decision
 81 | 
 82 |     def find_posterior(self, data):
 83 |         """
 84 |         Find posterior distribution
 85 |         """
 86 |         if self.method == 'analytic':
 87 |             posterior = self.posterior_analytic(data)
 88 |         elif self.method == 'mcmc':
 89 |             posterior = self.posterior_mcmc(data)
 90 |         elif self.method == 'compare':
 91 |             posterior = [self.posterior_analytic(data), self.posterior_mcmc(data)]
 92 |         else:
 93 |             raise Exception('method not recognized')
 94 | 
 95 |         return posterior
 96 | 
 97 |     def decision(self, posterior):
 98 |         """
 99 |         Make decision on the experiment
100 |         """
101 |         if self.plot:
102 |             plt.figure(figsize=(15, 5))
103 | 
104 |         if self.method == 'compare':
105 |             hpd1 = self.hpd(posterior[0], self.decision_var, {'clr': 'r', 'label1': 'analytic', 'label2': '',
106 |                                                               'label3': '', 'label4': '', 'label': 'analytic'})
107 |             result1 = self.rope_decision(hpd1)
108 | 
109 |             hpd2 = self.hpd(posterior[1], self.decision_var, {'clr': 'k', 'ls': '--', 'label1': 'mcmc',
110 |                                                               'label2': '', 'label3': '', 'label4': '', 'label': 'mcmc'})
111 |             result2 = self.rope_decision(hpd2)
112 |             result = [result1, result2]
113 |         else:
114 |             if self.rule == 'rope':
115 |                 hpd = self.hpd(posterior, self.decision_var)
116 |                 result = self.rope_decision(hpd)
117 |             else:
118 |                 result = self.expected_loss_decision(posterior, self.decision_var)
119 | 
120 |         if self.verbose:
121 |             if not(self.method == 'compare'):
122 |                 print_info(self)
123 |             print_result(result)
124 |         return result
125 | 
126 |     def posterior_analytic(self, data):
127 |         """
128 |         Find posterior distribution for the analytic method of solution
129 |         """
130 | 
131 |         ca = np.sum(data[0])
132 |         na = len(data[0])
133 | 
134 |         cb = np.sum(data[1])
135 |         nb = len(data[1])
136 | 
137 |         # find posterior of A and B from analytic solution
138 |         x = np.linspace(0, 1, self.resolution-1)
139 |         dx = x[1] - x[0]
140 |         pa = (np.array([beta.pdf(xx, self.alpha_prior + ca, self.beta_prior + na - ca) for xx in x]),
141 |               np.append(x, x[-1]+dx) - 0.5*dx)
142 |         pb = (np.array([beta.pdf(xx, self.alpha_prior + cb, self.beta_prior + nb - cb) for xx in x]),
143 |               np.append(x, x[-1] + dx) - 0.5 * dx)
144 | 
145 |         # bootstrapping now
146 |         a_rvs = beta.rvs(self.alpha_prior + ca, self.beta_prior + na - ca, size=400*self.resolution)
147 |         b_rvs = beta.rvs(self.alpha_prior + cb, self.beta_prior + nb - cb, size=400*self.resolution)
148 | 
149 |         rvs = b_rvs - a_rvs
150 |         bins = np.linspace(np.min(rvs) - 0.2 * abs(np.min(rvs)), np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution)
151 |         lift = np.histogram(rvs, bins=bins, density=True)
152 | 
153 |         bins = np.linspace(0, 1, self.resolution)
154 |         sigma_a_rvs = np.sqrt(a_rvs * (1 - a_rvs))
155 |         sigma_b_rvs = np.sqrt(b_rvs * (1 - b_rvs))
156 |         psigma_a = np.histogram(sigma_a_rvs, bins=bins, density=True)
157 |         psigma_b = np.histogram(sigma_b_rvs, bins=bins, density=True)
158 | 
159 |         rvs = (b_rvs - a_rvs) / np.sqrt(0.5 * (sigma_a_rvs**2 + sigma_b_rvs**2))
160 |         bins = np.linspace(np.min(rvs) - 0.2 * abs(np.min(rvs)), np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution)
161 |         pes = np.histogram(rvs, bins=bins, density=True)
162 | 
163 |         posterior = {'muA': pa, 'muB': pb, 'psigma_a': psigma_a, 'psigma_b': psigma_b,
164 |                      'lift': lift, 'es': pes, 'prior': self.prior()}
165 | 
166 |         return posterior
167 | 
168 |     def posterior_mcmc(self, data):
169 |         """
170 |         Find posterior distribution for the numerical method of solution
171 |         """
172 | 
173 |         with pm.Model() as ab_model:
174 |             # priors
175 |             mua = pm.distributions.continuous.Beta('muA', alpha=self.alpha_prior, beta=self.beta_prior)
176 |             mub = pm.distributions.continuous.Beta('muB', alpha=self.alpha_prior, beta=self.beta_prior)
177 |             # likelihoods
178 |             pm.Bernoulli('likelihoodA', mua, observed=data[0])
179 |             pm.Bernoulli('likelihoodB', mub, observed=data[1])
180 | 
181 |             # find distribution of difference
182 |             pm.Deterministic('lift', mub - mua)
183 |             # find distribution of effect size
184 |             sigma_a = pm.Deterministic('sigmaA', np.sqrt(mua * (1 - mua)))
185 |             sigma_b = pm.Deterministic('sigmaB', np.sqrt(mub * (1 - mub)))
186 |             pm.Deterministic('effect_size', (mub - mua) / (np.sqrt(0.5 * (sigma_a ** 2 + sigma_b ** 2))))
187 | 
188 |             start = pm.find_MAP()
189 |             step = pm.Slice()
190 |             trace = pm.sample(self.iterations, step=step, start=start)
191 | 
192 |         bins = np.linspace(0, 1, self.resolution)
193 |         mua = np.histogram(trace['muA'][500:], bins=bins, density=True)
194 |         mub = np.histogram(trace['muB'][500:], bins=bins, density=True)
195 |         sigma_a = np.histogram(trace['sigmaA'][500:], bins=bins, density=True)
196 |         sigma_b = np.histogram(trace['sigmaB'][500:], bins=bins, density=True)
197 | 
198 |         rvs = trace['lift'][500:]
199 |         bins = np.linspace(np.min(rvs) - 0.2 * abs(np.min(rvs)), np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution)
200 |         lift = np.histogram(rvs, bins=bins, density=True)
201 | 
202 |         rvs = trace['effect_size'][500:]
203 |         bins = np.linspace(np.min(rvs) - 0.2 * abs(np.min(rvs)), np.max(rvs) + 0.2 * abs(np.max(rvs)), self.resolution)
204 |         pes = np.histogram(rvs, bins=bins, density=True)
205 | 
206 |         posterior = {'muA': mua, 'muB': mub, 'sigmaA': sigma_a, 'sigmaB': sigma_b,
207 |                      'lift': lift, 'es': pes, 'prior': self.prior()}
208 | 
209 |         return posterior
210 | 
211 |     def prior(self):
212 |         """
213 |         Find out prior distribution
214 |         """
215 |         return [beta.pdf(x, self.alpha_prior, self.beta_prior) for x in np.linspace(0, 1, self.resolution)]
216 | 
217 |     def hpd(self, posterior, var, *parameters):
218 |         """
219 |         Find out High Posterior Density Region
220 |         """
221 | 
222 |         bins = posterior[var][1]
223 |         x = 0.5 * (bins[0:-1] + bins[1:])
224 |         pdf = posterior[var][0]
225 |         k = np.linspace(0, max(pdf), 1000)
226 |         area_above = np.array([np.trapz(pdf[pdf >= kk], x[pdf >= kk]) for kk in k])
227 |         index = np.argwhere(np.abs(area_above - self.alpha) == np.min(np.abs(area_above - self.alpha)))[0]
228 | 
229 |         if self.plot:
230 |             self.plot_rope_posterior(index, k, x, posterior, var, *parameters)
231 | 
232 |         return x[pdf >= k[index]]
233 | 
234 |     def rope_decision(self, hpd):
235 |         """
236 |         Apply decision rule for ROPE method
237 |         """
238 | 
239 |         if all(h < min(self.rope) for h in hpd):
240 |             result = -1
241 |         elif all(h > max(self.rope) for h in hpd):
242 |             result = 1
243 |         elif all(min(self.rope) <= h <= max(self.rope) for h in hpd):
244 |             result = 0
245 |         else:
246 |             result = np.nan
247 | 
248 |         return result
249 | 
250 |     def expected_loss_decision(self, posterior, var):
251 |         """
252 |         Calculate expected loss and apply decision rule
253 |         """
254 |         dl = posterior[var][1]
255 |         dl = 0.5 * (dl[0:-1] + dl[1:])
256 |         fdl = posterior[var][0]
257 |         inta = np.maximum(dl, 0) * fdl
258 |         intb = np.maximum(-dl, 0) * fdl
259 | 
260 |         ela = np.trapz(inta, dl)
261 |         elb = np.trapz(intb, dl)
262 | 
263 |         if self.plot:
264 |             plt.subplot(1, 2, 1)
265 |             b = posterior['muA'][1]
266 |             plt.plot(0.5*(b[0:-1]+b[1:]), posterior['muA'][0], lw=2, label=r'$f(\mu_A)$')
267 |             b = posterior['muB'][1]
268 |             plt.plot(0.5*(b[0:-1]+b[1:]), posterior['muB'][0], lw=2, label=r'$f(\mu_B)$')
269 |             plt.xlabel('$\mu_A,\  \mu_B$')
270 |             plt.xlim([0, 1])
271 |             plt.title('Conversion Rate')
272 |             plt.locator_params(nbins=6)
273 |             plt.gca().set_ylim(bottom=0)
274 |             plt.legend()
275 | 
276 |             plt.subplot(1, 2, 2)
277 |             plt.plot(dl, fdl, 'b', lw=3, label=r'f$(\mu_B - \mu_A)$')
278 |             plt.plot([ela, ela], [0, 0.3*np.max(fdl)], 'r', lw=3, label='A: Expected Loss')
279 |             plt.plot([elb, elb], [0, 0.3*np.max(fdl)], 'c', lw=3, label='B: Expected Loss')
280 |             plt.plot([self.toc, self.toc], [0, 0.3*np.max(fdl)], 'k--', lw=3, label='Threshold of Caring')
281 |             plt.xlabel(r'$\mu_B-\mu_A$')
282 |             plt.title('Expected Loss')
283 |             plt.gca().set_ylim(bottom=0)
284 |             plt.gca().locator_params(axis='x', nbins=6)
285 |             plt.legend()
286 | 
287 |         if ela <= self.toc and elb <= self.toc:
288 |             result = 0
289 |         elif elb < self.toc:
290 |             result = 1
291 |         elif ela < self.toc:
292 |             result = -1
293 |         else:
294 |             result = np.nan
295 | 
296 |         return result
297 | 
298 |     def plot_rope_posterior(self, index, k, x, posterior, var, *args):
299 | 
300 |         label1 = r'$f(\mu_A)$'
301 |         label2 = r'$f(\mu_B)$'
302 |         label3 = 'HPD'
303 |         label4 = 'ROPE'
304 |         if var == 'es':
305 |             label = '$f$(ES)'
306 |         elif var == 'lift':
307 |             label = r'$f(\mu_B - \mu_A)$'
308 |         ls = '-'
309 | 
310 |         for arg in args:
311 |             if 'ls' in arg:
312 |                 ls = arg['ls']
313 |             if 'clr' in arg:
314 |                 clr = arg['clr']
315 |             if 'label1' in arg:
316 |                 label1 = arg['label1']
317 |             if 'label2' in arg:
318 |                 label2 = arg['label2']
319 |             if 'label3' in arg:
320 |                 label3 = arg['label3']
321 |             if 'label4' in arg:
322 |                 label4 = arg['label4']
323 |             if 'label' in arg:
324 |                 label = arg['label']
325 | 
326 |         plt.subplot(1, 2, 1)
327 |         b = posterior['muA'][1]
328 |         line, = plt.plot(0.5 * (b[0:-1] + b[1:]), posterior['muA'][0], ls=ls, lw=2, label=label1)
329 |         if 'clr' in locals():
330 |             line.set_color(clr)
331 |         b = posterior['muB'][1]
332 |         line, = plt.plot(0.5 * (b[0:-1] + b[1:]), posterior['muB'][0], ls=ls, lw=2, label=label2)
333 |         if 'clr' in locals():
334 |             line.set_color(clr)
335 |         plt.xlabel('$\mu_A,\  \mu_B$')
336 |         plt.xlim([0, 1])
337 |         plt.title('Conversion Rate')
338 |         plt.gca().set_ylim(bottom=0)
339 |         plt.locator_params(nbins=6)
340 |         plt.legend()
341 | 
342 |         plt.subplot(1, 2, 2)
343 |         pdf = posterior[var][0]
344 |         line, = plt.plot(x, pdf, lw=3, ls='-', label=label)
345 |         if 'clr' in locals():
346 |             line.set_color(clr)
347 |         plt.plot(x[pdf >= k[index]], 0 * x[pdf >= k[index]], linewidth=4, label=label3)
348 |         plt.xlim([np.minimum(np.min(x), -1), np.maximum(1, np.max(x))])
349 |         plt.plot([self.rope[0], self.rope[0]], [0, 4], 'g--', linewidth=5, label=label4)
350 |         plt.plot([self.rope[1], self.rope[1]], [0, 4], 'g--', linewidth=5)
351 |         plt.gca().set_ylim(bottom=0)
352 |         plt.gca().locator_params(axis='x', nbins=6)
353 |         plt.legend()
354 |         if var == 'es':
355 |             plt.xlabel(r'$(\mu_B-\mu_A)/\sqrt{\sigma_A^2 + \sigma_B^2)}$')
356 |             plt.title('Effect Size')
357 |         elif var == 'lift':
358 |             plt.xlabel(r'$\mu_B-\mu_A$')
359 |             plt.title(r'Lift')
360 | 


--------------------------------------------------------------------------------