├── AB_Testing.ipynb ├── README.md └── ab_data.csv /AB_Testing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#### Clean Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "#import packages\n", 17 | "import pandas as pd\n", 18 | "import math\n", 19 | "import statsmodels.stats.api as sms\n", 20 | "import scipy.stats as st" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "Number of rows: 294478 Number of columns: 5\n" 33 | ] 34 | }, 35 | { 36 | "data": { 37 | "text/html": [ 38 | "
\n", 39 | "\n", 52 | "\n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | "
user_idtimestampgrouplanding_pageconverted
085110411:48.6controlold_page0
180422801:45.2controlold_page0
266159055:06.2treatmentnew_page0
385354128:03.1treatmentnew_page0
486497552:26.2controlold_page1
\n", 106 | "
" 107 | ], 108 | "text/plain": [ 109 | " user_id timestamp group landing_page converted\n", 110 | "0 851104 11:48.6 control old_page 0\n", 111 | "1 804228 01:45.2 control old_page 0\n", 112 | "2 661590 55:06.2 treatment new_page 0\n", 113 | "3 853541 28:03.1 treatment new_page 0\n", 114 | "4 864975 52:26.2 control old_page 1" 115 | ] 116 | }, 117 | "execution_count": 2, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "#import data\n", 124 | "raw_data = pd.read_csv(\"ab_data.csv\")\n", 125 | "df = raw_data.copy()\n", 126 | "\n", 127 | "print(\"Number of rows: \", df.shape[0], \" Number of columns: \", df.shape[1])\n", 128 | "df.head()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 3, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "treatment 147276\n", 140 | "control 147202\n", 141 | "Name: group, dtype: int64" 142 | ] 143 | }, 144 | "execution_count": 3, 145 | "metadata": {}, 146 | "output_type": "execute_result" 147 | } 148 | ], 149 | "source": [ 150 | "df[\"group\"].value_counts()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 4, 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "name": "stdout", 160 | "output_type": "stream", 161 | "text": [ 162 | "(290585, 5)\n" 163 | ] 164 | }, 165 | { 166 | "data": { 167 | "text/plain": [ 168 | "treatment 145311\n", 169 | "control 145274\n", 170 | "Name: group, dtype: int64" 171 | ] 172 | }, 173 | "execution_count": 4, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "#some of the control group saw the new_page and some tretment group saw the old_page - delete these instances\n", 180 | "mask1 = (df[\"group\"] == \"control\") & (df[\"landing_page\"] == \"new_page\")\n", 181 | "index_to_drop1 = df[mask1].index\n", 182 | "df = df.drop(index_to_drop1)\n", 183 | "\n", 184 | "mask2 = (df[\"group\"] == \"treatment\") & (df[\"landing_page\"] == \"old_page\")\n", 185 | "index_to_drop2 = df[mask2].index\n", 186 | "df = df.drop(index_to_drop2)\n", 187 | "\n", 188 | "print(df.shape)\n", 189 | "df[\"group\"].value_counts()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 5, 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "name": "stdout", 199 | "output_type": "stream", 200 | "text": [ 201 | "290585\n", 202 | "290584\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "#Check how many duplicated users exist\n", 208 | "print(df[\"user_id\"].count())\n", 209 | "print(df[\"user_id\"].nunique())" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 6, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "#drop duplicated users\n", 219 | "df.drop_duplicates(subset ='user_id',keep ='first',inplace = True)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 12, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | "Split of control users who saw old page vs treatment users who saw new page: 49.99 % 50.01 %\n", 232 | "Number of control users who converted on old page: 17489\n", 233 | "Percentage of control users who converted: 12.04 %\n", 234 | "Number of treatment users who converted on new page: 17264\n", 235 | "Percentage of treatment users who converted: 11.88 %\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "#Show the % split between users who saw new vs old page\n", 241 | "#Calculate pooled probability\n", 242 | "mask = (df[\"group\"] == \"control\")\n", 243 | "conversions_control = df[\"converted\"][mask].sum()\n", 244 | "total_users_control = df[\"converted\"][mask].count()\n", 245 | "\n", 246 | "mask = (df[\"group\"] == \"treatment\")\n", 247 | "conversions_treatment = df[\"converted\"][mask].sum()\n", 248 | "total_users_treatment = df[\"converted\"][mask].count()\n", 249 | "\n", 250 | "print(\"Split of control users who saw old page vs treatment users who saw new page: \", \n", 251 | " round(total_users_control / df[\"converted\"].count() * 100, 2), \"% \",\n", 252 | " round((total_users_treatment / df[\"converted\"].count()) * 100, 2), \"%\")\n", 253 | "\n", 254 | "#count number of users who converted in each group\n", 255 | "print(\"Number of control users who converted on old page: \", conversions_control)\n", 256 | "print(\"Percentage of control users who converted: \", round((conversions_control / total_users_control) * 100, 2), \"%\")\n", 257 | "\n", 258 | "mask = (df[\"group\"] == \"treatment\")\n", 259 | "print(\"Number of treatment users who converted on new page: \", conversions_treatment)\n", 260 | "print(\"Percentage of treatment users who converted: \", round((conversions_treatment/ total_users_treatment) * 100, 2), \"%\")" 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "metadata": {}, 266 | "source": [ 267 | "#### Set Test Parameters" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 13, 273 | "metadata": {}, 274 | "outputs": [ 275 | { 276 | "name": "stdout", 277 | "output_type": "stream", 278 | "text": [ 279 | "Required sample size: 17209 per group\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "#Check what sample size is required\n", 285 | "baseline_rate = conversions_control / total_users_control\n", 286 | "practical_significance = 0.01 #user defined\n", 287 | "confidence_level = 0.05 #user defined, for a 95% confidence interval\n", 288 | "sensitivity = 0.8 #user defined\n", 289 | "\n", 290 | "effect_size = sms.proportion_effectsize(baseline_rate, baseline_rate + practical_significance)\n", 291 | "sample_size = sms.NormalIndPower().solve_power(effect_size = effect_size, power = sensitivity, \n", 292 | " alpha = confidence_level, ratio=1)\n", 293 | "print(\"Required sample size: \", round(sample_size), \" per group\")" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "#### A/B Test" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 14, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "#Calculate pooled probability\n", 310 | "mask = (df[\"group\"] == \"control\")\n", 311 | "conversions_control = df[\"converted\"][mask].sum()\n", 312 | "total_users_control = df[\"converted\"][mask].count()\n", 313 | "\n", 314 | "mask = (df[\"group\"] == \"treatment\")\n", 315 | "conversions_treatment = df[\"converted\"][mask].sum()\n", 316 | "total_users_treatment = df[\"converted\"][mask].count()\n", 317 | "\n", 318 | "prob_pooled = (conversions_control + conversions_treatment) / (total_users_control + total_users_treatment)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 15, 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "name": "stdout", 328 | "output_type": "stream", 329 | "text": [ 330 | "Do not reject the null hypothesis\n", 331 | "The lower bound of the confidence interval is -0.39 %\n", 332 | "The upper bound of the confidence interval is 0.08 %\n" 333 | ] 334 | } 335 | ], 336 | "source": [ 337 | "#Calculate pooled standard error and margin of error\n", 338 | "se_pooled = math.sqrt(prob_pooled * (1 - prob_pooled) * (1 / total_users_control + 1 / total_users_treatment))\n", 339 | "z_score = st.norm.ppf(1 - confidence_level / 2)\n", 340 | "margin_of_error = se_pooled * z_score\n", 341 | "\n", 342 | "#Calculate dhat, the estimated difference between probability of conversions in the experiment and control groups\n", 343 | "d_hat = (conversions_treatment / total_users_treatment) - (conversions_control / total_users_control)\n", 344 | "\n", 345 | "#Test if we can reject the null hypothesis\n", 346 | "lower_bound = d_hat - margin_of_error\n", 347 | "upper_bound = d_hat + margin_of_error\n", 348 | "\n", 349 | "if practical_significance < lower_bound:\n", 350 | " print(\"Reject null hypothesis\")\n", 351 | "else: \n", 352 | " print(\"Do not reject the null hypothesis\")\n", 353 | " \n", 354 | "print(\"The lower bound of the confidence interval is \", round(lower_bound * 100, 2), \"%\")\n", 355 | "print(\"The upper bound of the confidence interval is \", round(upper_bound * 100, 2), \"%\")" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [] 364 | } 365 | ], 366 | "metadata": { 367 | "kernelspec": { 368 | "display_name": "Python 3", 369 | "language": "python", 370 | "name": "python3" 371 | }, 372 | "language_info": { 373 | "codemirror_mode": { 374 | "name": "ipython", 375 | "version": 3 376 | }, 377 | "file_extension": ".py", 378 | "mimetype": "text/x-python", 379 | "name": "python", 380 | "nbconvert_exporter": "python", 381 | "pygments_lexer": "ipython3", 382 | "version": "3.7.3" 383 | } 384 | }, 385 | "nbformat": 4, 386 | "nbformat_minor": 2 387 | } 388 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AB_Testing 2 | Implementing A/B Tests in Python 3 | 4 | A/B testing is one of the most important tools for optimizing most things we interact with on our computers, phones and tablets. From website layouts to social media ads and product features, every button, banner and call to action has probably been A/B tested. And these tests can be extremely granular; Google famously tested "40 shades of blue" to decide what shade of blue should be used for links on the Google and Gmail landing pages. 5 | 6 | I recently completed the Udacity course "A/B Testing by Google - Online Experiment Design and Analysis" and wanted to share some of my key takeaways as well as how you can implement A/B testing using Python. 7 | 8 | Data source: https://www.kaggle.com/zhangluyuan/ab-testing 9 | --------------------------------------------------------------------------------