├── .gitignore ├── LICENSE ├── README.md ├── customer_bases.png ├── setup.py └── shifted_beta_geometric ├── __init__.py └── sbg.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, and 10 | distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright 13 | owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all other entities 16 | that control, are controlled by, or are under common control with that entity. 17 | For the purposes of this definition, "control" means (i) the power, direct or 18 | indirect, to cause the direction or management of such entity, whether by 19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the 20 | outstanding shares, or (iii) beneficial ownership of such entity. 21 | 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising 23 | permissions granted by this License. 24 | 25 | "Source" form shall mean the preferred form for making modifications, including 26 | but not limited to software source code, documentation source, and configuration 27 | files. 28 | 29 | "Object" form shall mean any form resulting from mechanical transformation or 30 | translation of a Source form, including but not limited to compiled object code, 31 | generated documentation, and conversions to other media types. 32 | 33 | "Work" shall mean the work of authorship, whether in Source or Object form, made 34 | available under the License, as indicated by a copyright notice that is included 35 | in or attached to the work (an example is provided in the Appendix below). 36 | 37 | "Derivative Works" shall mean any work, whether in Source or Object form, that 38 | is based on (or derived from) the Work and for which the editorial revisions, 39 | annotations, elaborations, or other modifications represent, as a whole, an 40 | original work of authorship. For the purposes of this License, Derivative Works 41 | shall not include works that remain separable from, or merely link (or bind by 42 | name) to the interfaces of, the Work and Derivative Works thereof. 43 | 44 | "Contribution" shall mean any work of authorship, including the original version 45 | of the Work and any modifications or additions to that Work or Derivative Works 46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 47 | by the copyright owner or by an individual or Legal Entity authorized to submit 48 | on behalf of the copyright owner. For the purposes of this definition, 49 | "submitted" means any form of electronic, verbal, or written communication sent 50 | to the Licensor or its representatives, including but not limited to 51 | communication on electronic mailing lists, source code control systems, and 52 | issue tracking systems that are managed by, or on behalf of, the Licensor for 53 | the purpose of discussing and improving the Work, but excluding communication 54 | that is conspicuously marked or otherwise designated in writing by the copyright 55 | owner as "Not a Contribution." 56 | 57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf 58 | of whom a Contribution has been received by Licensor and subsequently 59 | incorporated within the Work. 60 | 61 | 2. Grant of Copyright License. 62 | 63 | Subject to the terms and conditions of this License, each Contributor hereby 64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 65 | irrevocable copyright license to reproduce, prepare Derivative Works of, 66 | publicly display, publicly perform, sublicense, and distribute the Work and such 67 | Derivative Works in Source or Object form. 68 | 69 | 3. Grant of Patent License. 70 | 71 | Subject to the terms and conditions of this License, each Contributor hereby 72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 73 | irrevocable (except as stated in this section) patent license to make, have 74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 75 | such license applies only to those patent claims licensable by such Contributor 76 | that are necessarily infringed by their Contribution(s) alone or by combination 77 | of their Contribution(s) with the Work to which such Contribution(s) was 78 | submitted. If You institute patent litigation against any entity (including a 79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 80 | Contribution incorporated within the Work constitutes direct or contributory 81 | patent infringement, then any patent licenses granted to You under this License 82 | for that Work shall terminate as of the date such litigation is filed. 83 | 84 | 4. Redistribution. 85 | 86 | You may reproduce and distribute copies of the Work or Derivative Works thereof 87 | in any medium, with or without modifications, and in Source or Object form, 88 | provided that You meet the following conditions: 89 | 90 | You must give any other recipients of the Work or Derivative Works a copy of 91 | this License; and 92 | You must cause any modified files to carry prominent notices stating that You 93 | changed the files; and 94 | You must retain, in the Source form of any Derivative Works that You distribute, 95 | all copyright, patent, trademark, and attribution notices from the Source form 96 | of the Work, excluding those notices that do not pertain to any part of the 97 | Derivative Works; and 98 | If the Work includes a "NOTICE" text file as part of its distribution, then any 99 | Derivative Works that You distribute must include a readable copy of the 100 | attribution notices contained within such NOTICE file, excluding those notices 101 | that do not pertain to any part of the Derivative Works, in at least one of the 102 | following places: within a NOTICE text file distributed as part of the 103 | Derivative Works; within the Source form or documentation, if provided along 104 | with the Derivative Works; or, within a display generated by the Derivative 105 | Works, if and wherever such third-party notices normally appear. The contents of 106 | the NOTICE file are for informational purposes only and do not modify the 107 | License. You may add Your own attribution notices within Derivative Works that 108 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 109 | provided that such additional attribution notices cannot be construed as 110 | modifying the License. 111 | You may add Your own copyright statement to Your modifications and may provide 112 | additional or different license terms and conditions for use, reproduction, or 113 | distribution of Your modifications, or for any such Derivative Works as a whole, 114 | provided Your use, reproduction, and distribution of the Work otherwise complies 115 | with the conditions stated in this License. 116 | 117 | 5. Submission of Contributions. 118 | 119 | Unless You explicitly state otherwise, any Contribution intentionally submitted 120 | for inclusion in the Work by You to the Licensor shall be under the terms and 121 | conditions of this License, without any additional terms or conditions. 122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 123 | any separate license agreement you may have executed with Licensor regarding 124 | such Contributions. 125 | 126 | 6. Trademarks. 127 | 128 | This License does not grant permission to use the trade names, trademarks, 129 | service marks, or product names of the Licensor, except as required for 130 | reasonable and customary use in describing the origin of the Work and 131 | reproducing the content of the NOTICE file. 132 | 133 | 7. Disclaimer of Warranty. 134 | 135 | Unless required by applicable law or agreed to in writing, Licensor provides the 136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, 137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 138 | including, without limitation, any warranties or conditions of TITLE, 139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 140 | solely responsible for determining the appropriateness of using or 141 | redistributing the Work and assume any risks associated with Your exercise of 142 | permissions under this License. 143 | 144 | 8. Limitation of Liability. 145 | 146 | In no event and under no legal theory, whether in tort (including negligence), 147 | contract, or otherwise, unless required by applicable law (such as deliberate 148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 149 | liable to You for damages, including any direct, indirect, special, incidental, 150 | or consequential damages of any character arising as a result of this License or 151 | out of the use or inability to use the Work (including but not limited to 152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 153 | any and all other commercial damages or losses), even if such Contributor has 154 | been advised of the possibility of such damages. 155 | 156 | 9. Accepting Warranty or Additional Liability. 157 | 158 | While redistributing the Work or Derivative Works thereof, You may choose to 159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 160 | other liability obligations and/or rights consistent with this License. However, 161 | in accepting such obligations, You may act only on Your own behalf and on Your 162 | sole responsibility, not on behalf of any other Contributor, and only if You 163 | agree to indemnify, defend, and hold each Contributor harmless for any liability 164 | incurred by, or claims asserted against, such Contributor by reason of your 165 | accepting any such warranty or additional liability. 166 | 167 | END OF TERMS AND CONDITIONS 168 | 169 | APPENDIX: How to apply the Apache License to your work 170 | 171 | To apply the Apache License to your work, attach the following boilerplate 172 | notice, with the fields enclosed by brackets "[]" replaced with your own 173 | identifying information. (Don't include the brackets!) The text should be 174 | enclosed in the appropriate comment syntax for the file format. We also 175 | recommend that a file or class name and description of purpose be included on 176 | the same "printed page" as the copyright notice for easier identification within 177 | third-party archives. 178 | 179 | Copyright [yyyy] [name of copyright owner] 180 | 181 | Licensed under the Apache License, Version 2.0 (the "License"); 182 | you may not use this file except in compliance with the License. 183 | You may obtain a copy of the License at 184 | 185 | http://www.apache.org/licenses/LICENSE-2.0 186 | 187 | Unless required by applicable law or agreed to in writing, software 188 | distributed under the License is distributed on an "AS IS" BASIS, 189 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 190 | See the License for the specific language governing permissions and 191 | limitations under the License. 192 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sBG model of customer retention 2 | 3 | A python implementation of the shifted-beta-geometric (sBG) model from Fader and Hardie's ["How to Project Customer 4 | Retention" (2006)](http://www.brucehardie.com/papers/021/sbg_2006-05-30.pdf). 5 | 6 | Important note to modelers: amongst other presumptions, see §3 of the paper, sBG is only applicable to discrete, 7 | contractual customer relationships: 8 | 9 | Custer Bases Diagram 10 | 11 | Figure Source: ["Probability Models for Customer-Base Analysis" (Fader and Hardie 2009)](https://marketing.wharton.upenn.edu/files/?whdmsaction=public:main.file&fileID=341) 12 | 13 | ## Example 14 | 15 | ```python 16 | from shifted_beta_geometric import derl, fit, predicted_survival 17 | 18 | # measured percentage of cohort that survives over time 19 | example_data = [.869, .743, .653, .593, .551, .517, .491] 20 | 21 | # fit our observed data to the sBG model, which returns the parameters alpha and beta 22 | alpha, beta = fit(example_data) 23 | 24 | # predict the next 5 time samples: 25 | future = predicted_survival(alpha, beta, len(example_data) + 5)[-5:] 26 | 27 | # future = [0.460, 0.436, 0.414, 0.395, 0.378] 28 | 29 | # compute the discounted expected residual lifetime (DERL) for the survivors 30 | # of this cohort at point in time t: 31 | discount = 0.10 # rate at which we discount future revenue 32 | # to get value in today's terms, e.g. 10%/year 33 | t = len(example_data) 34 | residual_cohort_lifetime = derl(alpha, beta, discount, t) 35 | 36 | # residual_cohort_lifetime = 7.530 37 | 38 | # if our average revenue per period per customer is a constant v_avg, 39 | # to get the residual customer lifetime value (CLV) of this cohort 40 | # we simply multiply the residual_cohort_lifetime by v_avg: 41 | 42 | v_avg = 10 43 | residual_cohort_clv = residual_cohort_lifetime * v_avg 44 | 45 | # thus residual_cohort_clv = $75.30 per customer in this cohort 46 | ``` 47 | 48 | ## Requirements 49 | sBG requires `numpy` and `scipy` for fitting and the gauss hypergeometric function. 50 | -------------------------------------------------------------------------------- /customer_bases.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdmaturen/shifted_beta_geometric_py/80561a2a108ebcbcf2e61afa886d582f746f7612/customer_bases.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import setup 4 | 5 | 6 | def read(fname): 7 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 8 | 9 | setup( 10 | name = "shifted_beta_geometric", 11 | version = "0.0.1", 12 | author = "JD Maturen", 13 | author_email = "jdmaturen@gmail.com", 14 | description = """An implementation of the shifted-beta-geometric (sBG) model from Fader and Hardie's "How to Project 15 | Customer Retention" (2006)""", 16 | license = "Apache 2", 17 | keywords = "clv crm customer retention data modeling", 18 | # url = "http://github.com/jdmaturen/shifted_beta_geometric_py", 19 | packages=['shifted_beta_geometric'], 20 | 21 | classifiers = [ 22 | "Development Status :: 3 - Alpha", 23 | "Programming Language :: Python", 24 | "Topic :: Scientific/Engineering", 25 | "Topic :: Software Development :: Libraries", 26 | "License :: OSI Approved :: Apache Software License", 27 | ], 28 | ) -------------------------------------------------------------------------------- /shifted_beta_geometric/__init__.py: -------------------------------------------------------------------------------- 1 | from .sbg import derl 2 | from .sbg import fit 3 | from .sbg import fit_multi_cohort 4 | from .sbg import predicted_retention 5 | from .sbg import predicted_survival 6 | -------------------------------------------------------------------------------- /shifted_beta_geometric/sbg.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of the shifted beta geometric (sBG) model from "How to Project Customer Retention" (Fader and Hardie 2006) 3 | 4 | http://www.brucehardie.com/papers/021/sbg_2006-05-30.pdf 5 | 6 | Apache 2 License 7 | """ 8 | 9 | from math import log 10 | 11 | import numpy as np 12 | 13 | from scipy.optimize import minimize 14 | from scipy.special import hyp2f1 15 | 16 | __author__ = 'JD Maturen' 17 | 18 | 19 | def generate_probabilities(alpha, beta, x): 20 | """Generate probabilities in one pass for all t in x""" 21 | p = [alpha / (alpha + beta)] 22 | for t in range(1, x): 23 | pt = (beta + t - 1) / (alpha + beta + t) * p[t-1] 24 | p.append(pt) 25 | return p 26 | 27 | 28 | def probability(alpha, beta, t): 29 | """Probability function P""" 30 | if t == 0: 31 | return alpha / (alpha + beta) 32 | return (beta + t - 1) / (alpha + beta + t) * probability(alpha, beta, t-1) 33 | 34 | 35 | def survivor(probabilities, t): 36 | """Survivor function S""" 37 | s = 1 - probabilities[0] 38 | for x in range(1, t + 1): 39 | s = s - probabilities[x] 40 | return s 41 | 42 | 43 | def log_likelihood(alpha, beta, data, survivors=None): 44 | """Function to maximize to obtain ideal alpha and beta parameters""" 45 | if alpha <= 0 or beta <= 0: 46 | return -1000 47 | if survivors is None: 48 | survivors = survivor_rates(data) 49 | probabilities = generate_probabilities(alpha, beta, len(data)) 50 | final_survivor_likelihood = survivor(probabilities, len(data) - 1) 51 | 52 | return sum([s * log(probabilities[t]) for t, s in enumerate(survivors)]) + data[-1] * log(final_survivor_likelihood) 53 | 54 | 55 | def log_likelihood_multi_cohort(alpha, beta, data): 56 | """Function to maximize to obtain ideal alpha and beta parameters using data across multiple (contiguous) cohorts. 57 | `data` must be a list of cohorts each with an absolute number per observed time unit.""" 58 | if alpha <= 0 or beta <= 0: 59 | return -1000 60 | probabilities = generate_probabilities(alpha, beta, len(data[0])) 61 | 62 | cohorts = len(data) 63 | total = 0 64 | for i, cohort in enumerate(data): 65 | total += sum([(cohort[j]-cohort[j+1])*log(probabilities[j]) for j in range(len(cohort)-1)]) 66 | total += cohort[-1] * log(survivor(probabilities, cohorts - i - 1)) 67 | return total 68 | 69 | 70 | def survivor_rates(data): 71 | s = [] 72 | for i, x in enumerate(data): 73 | if i == 0: 74 | s.append(1 - data[0]) 75 | else: 76 | s.append(data[i-1] - data[i]) 77 | return s 78 | 79 | 80 | def maximize(data): 81 | survivors = survivor_rates(data) 82 | func = lambda x: -log_likelihood(x[0], x[1], data, survivors) 83 | x0 = np.array([100., 100.]) 84 | res = minimize(func, x0, method='nelder-mead', options={'xtol': 1e-8}) 85 | return res 86 | 87 | 88 | def maximize_multi_cohort(data): 89 | func = lambda x: -log_likelihood_multi_cohort(x[0], x[1], data) 90 | x0 = np.array([1., 1.]) 91 | res = minimize(func, x0, method='nelder-mead', options={'xtol': 1e-8}) 92 | return res 93 | 94 | 95 | def predicted_retention(alpha, beta, t): 96 | """Predicted retention probability at t. Function 8 in the paper""" 97 | return (beta + t) / (alpha + beta + t) 98 | 99 | 100 | def predicted_survival(alpha, beta, x): 101 | """Predicted survival probability, i.e. percentage of customers retained, for all t in x. 102 | Function 1 in the paper""" 103 | s = [predicted_retention(alpha, beta, 0)] 104 | for t in range(1, x): 105 | s.append(predicted_retention(alpha, beta, t) * s[t-1]) 106 | return s 107 | 108 | 109 | def fit(data): 110 | res = maximize(data) 111 | if res.status != 0: 112 | raise Exception(res.message) 113 | return res.x 114 | 115 | 116 | def fit_multi_cohort(data): 117 | res = maximize_multi_cohort(data) 118 | if res.status != 0: 119 | raise Exception(res.message) 120 | return res.x 121 | 122 | 123 | def derl(alpha, beta, d, n): 124 | """discounted expected residual lifetime from "Customer-Base Valuation in a Contractual Setting: The Perils of 125 | Ignoring Heterogeneity" (Fader and Hardie 2009)""" 126 | return predicted_retention(alpha, beta, n) * hyp2f1(1, beta + n + 1, alpha + beta + n + 1, 1 / (1 + d)) 127 | 128 | 129 | def test(): 130 | """Test against the High End subscription retention data from the paper""" 131 | example_data = [.869, .743, .653, .593, .551, .517, .491] 132 | ll11 = log_likelihood(1., 1., example_data) 133 | print(np.allclose(ll11, -2.115, 1e-3)) 134 | 135 | res = maximize(example_data) 136 | alpha, beta = res.x 137 | print(res.status == 0 and np.allclose(alpha, 0.668, 1e-3) and np.allclose(beta, 3.806, 1e-3)) 138 | print() 139 | 140 | print("real\t", ["{0:.1f}%".format(x*100) for x in example_data]) 141 | print("pred\t", ["{0:.1f}%".format(x*100) for x in predicted_survival(alpha, beta, 12)]) 142 | print() 143 | 144 | print(list(map("{0:f}".format, [derl(alpha, beta, 0.1, x) for x in range(12)]))) 145 | print() 146 | 147 | multi_cohort_data = [[10000, 8000, 6480, 5307, 4391], [10000, 8000, 6480, 5307], [10000, 8000, 6480], [10000, 8000]] 148 | alpha, beta = fit_multi_cohort(multi_cohort_data) 149 | print(np.allclose(alpha, 3.80, 1e-2) and np.allclose(beta, 15.19, 1e-2)) 150 | 151 | 152 | if __name__ == '__main__': 153 | test() 154 | --------------------------------------------------------------------------------