├── .gitignore ├── .ipynb_checkpoints └── Lifetime Value Estimation-checkpoint.ipynb ├── Lifetime Value Estimation.ipynb └── example_config /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # gcal_etl:: 62 | .app_data/ 63 | logs/ 64 | docs/ 65 | .vscode/ 66 | .ipynb_checkpoints/ 67 | 68 | config 69 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Lifetime Value Estimation-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 32, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#Import Libraries needed for API, and Pandas\n", 12 | "import requests\n", 13 | "import pandas as pd\n", 14 | "import numpy as np\n", 15 | "import ConfigParser\n", 16 | "\n", 17 | "#Read config file with Looker API and Database connection information\n", 18 | "config = ConfigParser.RawConfigParser(allow_no_value=True)\n", 19 | "config.read('config')\n", 20 | "\n", 21 | "#Very Basic Looker API class allowing us to access the data from a given Look ID\n", 22 | "class lookerAPIClient:\n", 23 | " def __init__(self, api_host=None, api_client_id=None, api_secret=None, api_port='19999'):\n", 24 | " auth_request_payload = {'client_id': api_client_id, 'client_secret': api_secret}\n", 25 | " self.host = api_host\n", 26 | " self.uri_stub = '/api/3.0/'\n", 27 | " self.uri_full = ''.join([api_host, ':', api_port, self.uri_stub])\n", 28 | " response = requests.post(self.uri_full + 'login', params=auth_request_payload)\n", 29 | " authData = response.json()\n", 30 | " self.access_token = authData['access_token']\n", 31 | " self.auth_headers = {\n", 32 | " 'Authorization' : 'token ' + self.access_token,\n", 33 | " }\n", 34 | "\n", 35 | " def post(self, call='', json_payload=None):\n", 36 | " response = requests.post(self.uri_full + call, headers=self.auth_headers, json=json_payload)\n", 37 | " return response.json()\n", 38 | "\n", 39 | " def get(self, call=''):\n", 40 | " response = requests.get(self.uri_full + call, headers=self.auth_headers)\n", 41 | " return response.json()\n", 42 | "\n", 43 | " def runLook(self, look, limit=100):\n", 44 | " optional_arguments = '?' + 'limit=' + str(limit)\n", 45 | " return self.get('/'.join(['looks',look,'run','json'])+optional_arguments)\n", 46 | "\n", 47 | "#Initialize the Looker API Class with the data in our config file (which is stored in a neighboring file 'config')\n", 48 | "x = lookerAPIClient(\n", 49 | " api_host = config.get('api', 'api_host'), \n", 50 | " api_client_id = config.get('api', 'api_client_id'), \n", 51 | " api_secret = config.get('api', 'api_secret'), \n", 52 | " api_port = config.get('api', 'api_port')\n", 53 | " ) \n", 54 | " \n", 55 | "#Use the API to get our training/'test' dataset and our new 'validation' dataset we will predict upon\n", 56 | "historicalCustomers = x.runLook('292',limit=10000)\n", 57 | "newCustomers = x.runLook('293',limit=10000)\n", 58 | "\n", 59 | "\n", 60 | "historicalCustomersDF = pd.DataFrame(historicalCustomers)\n", 61 | "newCustomersDF = pd.DataFrame(newCustomers)\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 33, 67 | "metadata": { 68 | "collapsed": true 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "import statsmodels.api as sm\n", 73 | "# import pandas.tseries.statsmodels.api as sm\n", 74 | "trainingSet.head()\n", 75 | "\n", 76 | "Y = historicalCustomersDF['user_facts.total_revenue']\n", 77 | "X = historicalCustomersDF[['users.age','user_facts.orders_in_first_30_days','user_facts.total_revenue_in_first_30_days']]\n", 78 | "\n", 79 | "X = sm.add_constant(X)\n", 80 | "\n", 81 | "est = sm.OLS(Y,X)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 34, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/html": [ 92 | "\n", 93 | "\n", 94 | "\n", 95 | " \n", 96 | "\n", 97 | "\n", 98 | " \n", 99 | "\n", 100 | "\n", 101 | " \n", 102 | "\n", 103 | "\n", 104 | " \n", 105 | "\n", 106 | "\n", 107 | " \n", 108 | "\n", 109 | "\n", 110 | " \n", 111 | "\n", 112 | "\n", 113 | " \n", 114 | "\n", 115 | "\n", 116 | " \n", 117 | "\n", 118 | "\n", 119 | " \n", 120 | "\n", 121 | "
OLS Regression Results
Dep. Variable: user_facts.total_revenue R-squared: 0.138
Model: OLS Adj. R-squared: 0.138
Method: Least Squares F-statistic: 203.3
Date: Sat, 09 Sep 2017 Prob (F-statistic): 2.64e-122
Time: 21:07:32 Log-Likelihood: -25629.
No. Observations: 3808 AIC: 5.127e+04
Df Residuals: 3804 BIC: 5.129e+04
Df Model: 3
Covariance Type: nonrobust
\n", 122 | "\n", 123 | "\n", 124 | " \n", 125 | "\n", 126 | "\n", 127 | " \n", 128 | "\n", 129 | "\n", 130 | " \n", 131 | "\n", 132 | "\n", 133 | " \n", 134 | "\n", 135 | "\n", 136 | " \n", 137 | "\n", 138 | "
coef std err t P>|t| [0.025 0.975]
const 200.5521 8.993 22.300 0.000 182.920 218.184
users.age -0.3200 0.175 -1.824 0.068 -0.664 0.024
user_facts.orders_in_first_30_days -82.1189 7.208 -11.393 0.000 -96.250 -67.988
user_facts.total_revenue_in_first_30_days 1.1115 0.047 23.791 0.000 1.020 1.203
\n", 139 | "\n", 140 | "\n", 141 | " \n", 142 | "\n", 143 | "\n", 144 | " \n", 145 | "\n", 146 | "\n", 147 | " \n", 148 | "\n", 149 | "\n", 150 | " \n", 151 | "\n", 152 | "
Omnibus: 2142.726 Durbin-Watson: 0.951
Prob(Omnibus): 0.000 Jarque-Bera (JB): 18301.039
Skew: 2.591 Prob(JB): 0.00
Kurtosis: 12.407 Cond. No. 318.
" 153 | ], 154 | "text/plain": [ 155 | "\n", 156 | "\"\"\"\n", 157 | " OLS Regression Results \n", 158 | "====================================================================================\n", 159 | "Dep. Variable: user_facts.total_revenue R-squared: 0.138\n", 160 | "Model: OLS Adj. R-squared: 0.138\n", 161 | "Method: Least Squares F-statistic: 203.3\n", 162 | "Date: Sat, 09 Sep 2017 Prob (F-statistic): 2.64e-122\n", 163 | "Time: 21:07:32 Log-Likelihood: -25629.\n", 164 | "No. Observations: 3808 AIC: 5.127e+04\n", 165 | "Df Residuals: 3804 BIC: 5.129e+04\n", 166 | "Df Model: 3 \n", 167 | "Covariance Type: nonrobust \n", 168 | "=============================================================================================================\n", 169 | " coef std err t P>|t| [0.025 0.975]\n", 170 | "-------------------------------------------------------------------------------------------------------------\n", 171 | "const 200.5521 8.993 22.300 0.000 182.920 218.184\n", 172 | "users.age -0.3200 0.175 -1.824 0.068 -0.664 0.024\n", 173 | "user_facts.orders_in_first_30_days -82.1189 7.208 -11.393 0.000 -96.250 -67.988\n", 174 | "user_facts.total_revenue_in_first_30_days 1.1115 0.047 23.791 0.000 1.020 1.203\n", 175 | "==============================================================================\n", 176 | "Omnibus: 2142.726 Durbin-Watson: 0.951\n", 177 | "Prob(Omnibus): 0.000 Jarque-Bera (JB): 18301.039\n", 178 | "Skew: 2.591 Prob(JB): 0.00\n", 179 | "Kurtosis: 12.407 Cond. No. 318.\n", 180 | "==============================================================================\n", 181 | "\n", 182 | "Warnings:\n", 183 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", 184 | "\"\"\"" 185 | ] 186 | }, 187 | "execution_count": 34, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "est = est.fit()\n", 194 | "est.summary()" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 35, 200 | "metadata": { 201 | "collapsed": true 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "## Validation Set Shaping \n", 206 | "X2 = newCustomersDF[['users.age','user_facts.orders_in_first_30_days', 'user_facts.total_revenue_in_first_30_days']]\n", 207 | "X2 = sm.add_constant(X2)\n", 208 | "## END Validation Set Shaping \n", 209 | "\n", 210 | "output = pd.concat([newCustomersDF[['users.id']],est.predict(X2)],axis=1)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 36, 216 | "metadata": { 217 | "collapsed": true 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "#from __future__ import print_function\n", 222 | "from datetime import date, datetime, timedelta\n", 223 | "import mysql.connector\n", 224 | "\n", 225 | "cnx = mysql.connector.connect(\n", 226 | " user = config.get('database', 'user'), \n", 227 | " password = config.get('database', 'password'),\n", 228 | " host = config.get('database', 'host'),\n", 229 | " database = config.get('database', 'database')\n", 230 | " )\n", 231 | "cursor = cnx.cursor()\n", 232 | "\n", 233 | "cursor.execute('truncate table my_schema.ltv_predictions')\n", 234 | "\n", 235 | "for elem in output.itertuples():\n", 236 | " add_record = (\"INSERT INTO my_schema.ltv_predictions (user_id, ltv_prediction) VALUES (%s, %s)\")\n", 237 | " cursor.execute(add_record,(str(elem[1]),str(elem[2])))\n", 238 | "\n", 239 | "\n", 240 | "cnx.commit()\n", 241 | "cursor.close()\n", 242 | "cnx.close()\n" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "collapsed": true 250 | }, 251 | "outputs": [], 252 | "source": [] 253 | } 254 | ], 255 | "metadata": { 256 | "kernelspec": { 257 | "display_name": "Python 2", 258 | "language": "python", 259 | "name": "python2" 260 | }, 261 | "language_info": { 262 | "codemirror_mode": { 263 | "name": "ipython", 264 | "version": 2 265 | }, 266 | "file_extension": ".py", 267 | "mimetype": "text/x-python", 268 | "name": "python", 269 | "nbconvert_exporter": "python", 270 | "pygments_lexer": "ipython2", 271 | "version": "2.7.13" 272 | } 273 | }, 274 | "nbformat": 4, 275 | "nbformat_minor": 2 276 | } 277 | -------------------------------------------------------------------------------- /Lifetime Value Estimation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 32, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#Import Libraries needed for API, and Pandas\n", 12 | "import requests\n", 13 | "import pandas as pd\n", 14 | "import numpy as np\n", 15 | "import ConfigParser\n", 16 | "\n", 17 | "#Read config file with Looker API and Database connection information\n", 18 | "config = ConfigParser.RawConfigParser(allow_no_value=True)\n", 19 | "config.read('config')\n", 20 | "\n", 21 | "#Very Basic Looker API class allowing us to access the data from a given Look ID\n", 22 | "class lookerAPIClient:\n", 23 | " def __init__(self, api_host=None, api_client_id=None, api_secret=None, api_port='19999'):\n", 24 | " auth_request_payload = {'client_id': api_client_id, 'client_secret': api_secret}\n", 25 | " self.host = api_host\n", 26 | " self.uri_stub = '/api/3.0/'\n", 27 | " self.uri_full = ''.join([api_host, ':', api_port, self.uri_stub])\n", 28 | " response = requests.post(self.uri_full + 'login', params=auth_request_payload)\n", 29 | " authData = response.json()\n", 30 | " self.access_token = authData['access_token']\n", 31 | " self.auth_headers = {\n", 32 | " 'Authorization' : 'token ' + self.access_token,\n", 33 | " }\n", 34 | "\n", 35 | " def post(self, call='', json_payload=None):\n", 36 | " response = requests.post(self.uri_full + call, headers=self.auth_headers, json=json_payload)\n", 37 | " return response.json()\n", 38 | "\n", 39 | " def get(self, call=''):\n", 40 | " response = requests.get(self.uri_full + call, headers=self.auth_headers)\n", 41 | " return response.json()\n", 42 | "\n", 43 | " def runLook(self, look, limit=100):\n", 44 | " optional_arguments = '?' + 'limit=' + str(limit)\n", 45 | " return self.get('/'.join(['looks',look,'run','json'])+optional_arguments)\n", 46 | "\n", 47 | "#Initialize the Looker API Class with the data in our config file (which is stored in a neighboring file 'config')\n", 48 | "x = lookerAPIClient(\n", 49 | " api_host = config.get('api', 'api_host'), \n", 50 | " api_client_id = config.get('api', 'api_client_id'), \n", 51 | " api_secret = config.get('api', 'api_secret'), \n", 52 | " api_port = config.get('api', 'api_port')\n", 53 | " ) \n", 54 | " \n", 55 | "#Use the API to get our training/'test' dataset and our new 'validation' dataset we will predict upon\n", 56 | "historicalCustomers = x.runLook('292',limit=10000)\n", 57 | "newCustomers = x.runLook('293',limit=10000)\n", 58 | "\n", 59 | "\n", 60 | "historicalCustomersDF = pd.DataFrame(historicalCustomers)\n", 61 | "newCustomersDF = pd.DataFrame(newCustomers)\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 33, 67 | "metadata": { 68 | "collapsed": true 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "import statsmodels.api as sm\n", 73 | "# import pandas.tseries.statsmodels.api as sm\n", 74 | "trainingSet.head()\n", 75 | "\n", 76 | "Y = historicalCustomersDF['user_facts.total_revenue']\n", 77 | "X = historicalCustomersDF[['users.age','user_facts.orders_in_first_30_days','user_facts.total_revenue_in_first_30_days']]\n", 78 | "\n", 79 | "X = sm.add_constant(X)\n", 80 | "\n", 81 | "est = sm.OLS(Y,X)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 34, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "data": { 91 | "text/html": [ 92 | "\n", 93 | "\n", 94 | "\n", 95 | " \n", 96 | "\n", 97 | "\n", 98 | " \n", 99 | "\n", 100 | "\n", 101 | " \n", 102 | "\n", 103 | "\n", 104 | " \n", 105 | "\n", 106 | "\n", 107 | " \n", 108 | "\n", 109 | "\n", 110 | " \n", 111 | "\n", 112 | "\n", 113 | " \n", 114 | "\n", 115 | "\n", 116 | " \n", 117 | "\n", 118 | "\n", 119 | " \n", 120 | "\n", 121 | "
OLS Regression Results
Dep. Variable: user_facts.total_revenue R-squared: 0.138
Model: OLS Adj. R-squared: 0.138
Method: Least Squares F-statistic: 203.3
Date: Sat, 09 Sep 2017 Prob (F-statistic): 2.64e-122
Time: 21:07:32 Log-Likelihood: -25629.
No. Observations: 3808 AIC: 5.127e+04
Df Residuals: 3804 BIC: 5.129e+04
Df Model: 3
Covariance Type: nonrobust
\n", 122 | "\n", 123 | "\n", 124 | " \n", 125 | "\n", 126 | "\n", 127 | " \n", 128 | "\n", 129 | "\n", 130 | " \n", 131 | "\n", 132 | "\n", 133 | " \n", 134 | "\n", 135 | "\n", 136 | " \n", 137 | "\n", 138 | "
coef std err t P>|t| [0.025 0.975]
const 200.5521 8.993 22.300 0.000 182.920 218.184
users.age -0.3200 0.175 -1.824 0.068 -0.664 0.024
user_facts.orders_in_first_30_days -82.1189 7.208 -11.393 0.000 -96.250 -67.988
user_facts.total_revenue_in_first_30_days 1.1115 0.047 23.791 0.000 1.020 1.203
\n", 139 | "\n", 140 | "\n", 141 | " \n", 142 | "\n", 143 | "\n", 144 | " \n", 145 | "\n", 146 | "\n", 147 | " \n", 148 | "\n", 149 | "\n", 150 | " \n", 151 | "\n", 152 | "
Omnibus: 2142.726 Durbin-Watson: 0.951
Prob(Omnibus): 0.000 Jarque-Bera (JB): 18301.039
Skew: 2.591 Prob(JB): 0.00
Kurtosis: 12.407 Cond. No. 318.
" 153 | ], 154 | "text/plain": [ 155 | "\n", 156 | "\"\"\"\n", 157 | " OLS Regression Results \n", 158 | "====================================================================================\n", 159 | "Dep. Variable: user_facts.total_revenue R-squared: 0.138\n", 160 | "Model: OLS Adj. R-squared: 0.138\n", 161 | "Method: Least Squares F-statistic: 203.3\n", 162 | "Date: Sat, 09 Sep 2017 Prob (F-statistic): 2.64e-122\n", 163 | "Time: 21:07:32 Log-Likelihood: -25629.\n", 164 | "No. Observations: 3808 AIC: 5.127e+04\n", 165 | "Df Residuals: 3804 BIC: 5.129e+04\n", 166 | "Df Model: 3 \n", 167 | "Covariance Type: nonrobust \n", 168 | "=============================================================================================================\n", 169 | " coef std err t P>|t| [0.025 0.975]\n", 170 | "-------------------------------------------------------------------------------------------------------------\n", 171 | "const 200.5521 8.993 22.300 0.000 182.920 218.184\n", 172 | "users.age -0.3200 0.175 -1.824 0.068 -0.664 0.024\n", 173 | "user_facts.orders_in_first_30_days -82.1189 7.208 -11.393 0.000 -96.250 -67.988\n", 174 | "user_facts.total_revenue_in_first_30_days 1.1115 0.047 23.791 0.000 1.020 1.203\n", 175 | "==============================================================================\n", 176 | "Omnibus: 2142.726 Durbin-Watson: 0.951\n", 177 | "Prob(Omnibus): 0.000 Jarque-Bera (JB): 18301.039\n", 178 | "Skew: 2.591 Prob(JB): 0.00\n", 179 | "Kurtosis: 12.407 Cond. No. 318.\n", 180 | "==============================================================================\n", 181 | "\n", 182 | "Warnings:\n", 183 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", 184 | "\"\"\"" 185 | ] 186 | }, 187 | "execution_count": 34, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "est = est.fit()\n", 194 | "est.summary()" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 35, 200 | "metadata": { 201 | "collapsed": true 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "## Validation Set Shaping \n", 206 | "X2 = newCustomersDF[['users.age','user_facts.orders_in_first_30_days', 'user_facts.total_revenue_in_first_30_days']]\n", 207 | "X2 = sm.add_constant(X2)\n", 208 | "## END Validation Set Shaping \n", 209 | "\n", 210 | "output = pd.concat([newCustomersDF[['users.id']],est.predict(X2)],axis=1)" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 36, 216 | "metadata": { 217 | "collapsed": true 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "#from __future__ import print_function\n", 222 | "from datetime import date, datetime, timedelta\n", 223 | "import mysql.connector\n", 224 | "\n", 225 | "cnx = mysql.connector.connect(\n", 226 | " user = config.get('database', 'user'), \n", 227 | " password = config.get('database', 'password'),\n", 228 | " host = config.get('database', 'host'),\n", 229 | " database = config.get('database', 'database')\n", 230 | " )\n", 231 | "cursor = cnx.cursor()\n", 232 | "\n", 233 | "cursor.execute('truncate table my_schema.ltv_predictions')\n", 234 | "\n", 235 | "for elem in output.itertuples():\n", 236 | " add_record = (\"INSERT INTO my_schema.ltv_predictions (user_id, ltv_prediction) VALUES (%s, %s)\")\n", 237 | " cursor.execute(add_record,(str(elem[1]),str(elem[2])))\n", 238 | "\n", 239 | "\n", 240 | "cnx.commit()\n", 241 | "cursor.close()\n", 242 | "cnx.close()\n" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "collapsed": true 250 | }, 251 | "outputs": [], 252 | "source": [] 253 | } 254 | ], 255 | "metadata": { 256 | "kernelspec": { 257 | "display_name": "Python 2", 258 | "language": "python", 259 | "name": "python2" 260 | }, 261 | "language_info": { 262 | "codemirror_mode": { 263 | "name": "ipython", 264 | "version": 2 265 | }, 266 | "file_extension": ".py", 267 | "mimetype": "text/x-python", 268 | "name": "python", 269 | "nbconvert_exporter": "python", 270 | "pygments_lexer": "ipython2", 271 | "version": "2.7.13" 272 | } 273 | }, 274 | "nbformat": 4, 275 | "nbformat_minor": 2 276 | } 277 | -------------------------------------------------------------------------------- /example_config: -------------------------------------------------------------------------------- 1 | [database] 2 | user = <> 3 | password = <> 4 | host = <> 5 | database = <> 6 | 7 | [api] 8 | api_host = https://<>.looker.com 9 | api_client_id = <> 10 | api_secret = <> 11 | api_port= 19999 --------------------------------------------------------------------------------