├── README.md
├── LICENSE
├── requirements.txt
└── LeadsBlogNotebook.ipynb


/README.md:
--------------------------------------------------------------------------------
 1 | # End-to-End Lead Scoring Example
 2 | 
 3 | Example data and code for [the end-to-end lead scoring tutorial](https://towardsdatascience.com/a-true-end-to-end-ml-example-lead-scoring-f5b52e9a3c80) on Towards Data Science.
 4 | 
 5 | Test out this [lead scoring model demo on Booklet.ai.](https://app.booklet.ai/model/lead-scoring)
 6 | 
 7 | ## Setup
 8 | 
 9 | This example is built with Python `3.8.2`
10 | 
11 | After cloning this repo and entering the directory with `cd lead-scoring-demo`
12 | 
13 | ```
14 | python3 -m venv lead-scoring
15 | source lead-scoring/bin/activate
16 | pip install -r requirements.txt
17 | python -m ipykernel install --user --name=lead-scoring
18 | ```
19 | 
20 | Then start the MLFlow server:
21 | 
22 | ```
23 | mlflow server
24 | ```
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 BookletAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | alembic==1.4.2
 2 | appnope==0.1.0
 3 | backcall==0.2.0
 4 | boto3==1.14.0
 5 | botocore==1.17.0
 6 | certifi==2020.4.5.2
 7 | chardet==3.0.4
 8 | click==7.1.2
 9 | cloudpickle==1.4.1
10 | databricks-cli==0.11.0
11 | decorator==4.4.2
12 | docker==4.2.1
13 | docutils==0.15.2
14 | entrypoints==0.3
15 | Flask==1.1.2
16 | gitdb==4.0.5
17 | GitPython==3.1.3
18 | gorilla==0.3.0
19 | gunicorn==20.0.4
20 | idna==2.9
21 | ipykernel==5.3.0
22 | ipython==7.15.0
23 | ipython-genutils==0.2.0
24 | itsdangerous==1.1.0
25 | jedi==0.17.0
26 | Jinja2==2.11.2
27 | jmespath==0.10.0
28 | joblib==0.15.1
29 | jupyter-client==6.1.3
30 | jupyter-core==4.6.3
31 | Mako==1.1.3
32 | MarkupSafe==1.1.1
33 | mlflow==1.8.0
34 | numpy==1.18.5
35 | pandas==1.0.4
36 | parso==0.7.0
37 | pexpect==4.8.0
38 | pickleshare==0.7.5
39 | prometheus-client==0.8.0
40 | prometheus-flask-exporter==0.13.0
41 | prompt-toolkit==3.0.5
42 | protobuf==3.12.2
43 | ptyprocess==0.6.0
44 | Pygments==2.6.1
45 | python-dateutil==2.8.1
46 | python-editor==1.0.4
47 | pytz==2020.1
48 | PyYAML==5.3.1
49 | pyzmq==19.0.1
50 | querystring-parser==1.2.4
51 | requests==2.23.0
52 | s3transfer==0.3.3
53 | scikit-learn==0.23.1
54 | scipy==1.4.1
55 | simplejson==3.17.0
56 | six==1.15.0
57 | sklearn==0.0
58 | smmap==3.0.4
59 | SQLAlchemy==1.3.13
60 | sqlparse==0.3.1
61 | tabulate==0.8.7
62 | threadpoolctl==2.1.0
63 | tornado==6.0.4
64 | traitlets==4.3.3
65 | urllib3==1.25.9
66 | wcwidth==0.2.4
67 | websocket-client==0.57.0
68 | Werkzeug==1.0.1
69 | 


--------------------------------------------------------------------------------
/LeadsBlogNotebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import requests\n",
 12 |     "\n",
 13 |     "import boto3\n",
 14 |     "import mlflow\n",
 15 |     "from mlflow import pyfunc as ml_pyfunc\n",
 16 |     "import pandas as pd\n",
 17 |     "from sklearn.ensemble import RandomForestClassifier\n",
 18 |     "from sklearn.model_selection import train_test_split\n",
 19 |     "from sklearn.preprocessing import StandardScaler\n",
 20 |     "from sklearn import metrics"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {
 27 |     "collapsed": true
 28 |    },
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "########################################################\n",
 32 |     "### Import Dataset\n",
 33 |     "########################################################"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": true
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "## Import dataset\n",
 45 |     "leads_dataset = pd.read_csv('data/leads_cleaned.csv')\n",
 46 |     "leads_dataset.columns = map(str.lower, leads_dataset.columns)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "collapsed": true
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "########################################################\n",
 58 |     "### Clean and Prepare Data\n",
 59 |     "########################################################"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {
 66 |     "collapsed": true
 67 |    },
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "# Create data pre-processing steps before plugging into model\n",
 71 |     "leads_categorical_columns = ['lead origin',\n",
 72 |     "                             'lead source',\n",
 73 |     "                             'last activity',\n",
 74 |     "                             'specialization',\n",
 75 |     "                             'what is your current occupation',\n",
 76 |     "                             'what matters most to you in choosing a course',\n",
 77 |     "                             'city',\n",
 78 |     "                             'last notable activity']\n",
 79 |     "\n",
 80 |     "leads_numeric_columns = ['totalvisits',\n",
 81 |     "                         'total time spent on website',\n",
 82 |     "                         'page views per visit']\n",
 83 |     "\n",
 84 |     "leads_response_columns = ['converted']"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "collapsed": true
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "#split data for training, remove extras\n",
 96 |     "\n",
 97 |     "leads_x = leads_dataset.drop(leads_response_columns, axis=1)\n",
 98 |     "leads_y = leads_dataset[leads_response_columns]\n",
 99 |     "\n",
100 |     "leads_x_train, leads_x_test, leads_y_train, leads_y_test = train_test_split(leads_x,\n",
101 |     "                                                                            leads_y,\n",
102 |     "                                                                            train_size=0.7,\n",
103 |     "                                                                            test_size=0.3,\n",
104 |     "                                                                            random_state=5050)"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {
111 |     "collapsed": true
112 |    },
113 |    "outputs": [],
114 |    "source": [
115 |     "scaler = StandardScaler()\n",
116 |     "scaler = scaler.fit(leads_x_train[leads_numeric_columns])"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": null,
122 |    "metadata": {
123 |     "collapsed": true
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "def pre_process_leads_data(df,\n",
128 |     "                           numeric_columns,\n",
129 |     "                           categorical_columns,\n",
130 |     "                           fitted_scaler,\n",
131 |     "                           train_df_columns = None):\n",
132 |     "    ## create new df with selected columns\n",
133 |     "    df.columns = map(str.lower, df.columns)\n",
134 |     "    _df = df[set(numeric_columns + categorical_columns)].copy()\n",
135 |     "    \n",
136 |     "    ## scale the numeric columns with the pre-built scaler\n",
137 |     "    _df[numeric_columns] = fitted_scaler.transform(_df[numeric_columns])\n",
138 |     "         \n",
139 |     "    # First, make categorical text lowercase\n",
140 |     "    _df[categorical_columns] = _df[categorical_columns].apply(lambda x: x.str.lower())\n",
141 |     "    # Next, create one-hot-encoded variables, add to dataframe, drop old columns\n",
142 |     "    _df_dummies = pd.get_dummies(_df[categorical_columns], drop_first=True)\n",
143 |     "    _df = pd.concat([_df, _df_dummies], axis=1)\n",
144 |     "    _df.drop(categorical_columns, axis=1, inplace = True)\n",
145 |     "\n",
146 |     "    if train_df_columns:\n",
147 |     "        _df = _df.reindex(columns=train_df_columns, fill_value=0)\n",
148 |     "\n",
149 |     "    return _df"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {
156 |     "collapsed": true
157 |    },
158 |    "outputs": [],
159 |    "source": [
160 |     "leads_x_train_clean = pre_process_leads_data(df = leads_x_train,\n",
161 |     "                                            numeric_columns = leads_numeric_columns,\n",
162 |     "                                            categorical_columns = leads_categorical_columns,\n",
163 |     "                                            fitted_scaler = scaler)\n",
164 |     "\n",
165 |     "leads_x_test_clean = pre_process_leads_data(df = leads_x_test,\n",
166 |     "                                           numeric_columns = leads_numeric_columns,\n",
167 |     "                                           categorical_columns = leads_categorical_columns,\n",
168 |     "                                           fitted_scaler = scaler,\n",
169 |     "                                           train_df_columns = leads_x_train_clean.columns.tolist())"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {
176 |     "collapsed": true
177 |    },
178 |    "outputs": [],
179 |    "source": [
180 |     "########################################################\n",
181 |     "### Train and Evaluate Model\n",
182 |     "########################################################"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "## Train the random forest model\n",
192 |     "num_estimators = 100\n",
193 |     "min_samples = 4\n",
194 |     "\n",
195 |     "rf = RandomForestClassifier(n_estimators=num_estimators,\n",
196 |     "                            min_samples_split=min_samples)\n",
197 |     "rf.fit(leads_x_train_clean, leads_y_train.values.ravel())"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {},
204 |    "outputs": [],
205 |    "source": [
206 |     "leads_y_test_predicted = rf.predict(leads_x_test_clean)\n",
207 |     "\n",
208 |     "accuracy = metrics.accuracy_score(leads_y_test, leads_y_test_predicted)\n",
209 |     "auc_score = metrics.roc_auc_score(leads_y_test, leads_y_test_predicted)\n",
210 |     "\n",
211 |     "print(accuracy)\n",
212 |     "print(auc_score)"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {
219 |     "collapsed": true
220 |    },
221 |    "outputs": [],
222 |    "source": [
223 |     "########################################################\n",
224 |     "### MLflow and environment setup\n",
225 |     "########################################################"
226 |    ]
227 |   },
228 |   {
229 |    "cell_type": "code",
230 |    "execution_count": null,
231 |    "metadata": {},
232 |    "outputs": [],
233 |    "source": [
234 |     "# connect to MLflow\n",
235 |     "mlflow.set_tracking_uri(\"http://localhost:5000\")\n",
236 |     "mlflow.set_experiment(\"LeadScoringProcessed\") # creates an experiment if it doesn't exist"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {
243 |     "collapsed": true
244 |    },
245 |    "outputs": [],
246 |    "source": [
247 |     "# define specific python and package versions for environment\n",
248 |     "mlflow_conda_env = {\n",
249 |     " 'name': 'mlflow-env',\n",
250 |     " 'channels': ['defaults'],\n",
251 |     " 'dependencies': ['python=3.8.2', {'pip': ['mlflow==1.8.0','scikit-learn==0.23.1','cloudpickle==1.4.1']}]\n",
252 |     "}"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "code",
257 |    "execution_count": null,
258 |    "metadata": {
259 |     "collapsed": true
260 |    },
261 |    "outputs": [],
262 |    "source": [
263 |     "########################################################\n",
264 |     "### Define Model\n",
265 |     "########################################################"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": null,
271 |    "metadata": {
272 |     "collapsed": true
273 |    },
274 |    "outputs": [],
275 |    "source": [
276 |     "class leadsModel(mlflow.pyfunc.PythonModel):\n",
277 |     "   \n",
278 |     "    ## defining objects needed for leadsModel prediction. \n",
279 |     "    def __init__(self,\n",
280 |     "                 train_df_columns,\n",
281 |     "                 model,\n",
282 |     "                 leads_categorical_columns,\n",
283 |     "                 leads_numeric_columns,\n",
284 |     "                 fitted_scaler,\n",
285 |     "                 pre_process_leads_data):\n",
286 |     "        \n",
287 |     "        ## Setting up all needed objects\n",
288 |     "        self.train_df_columns = train_df_columns\n",
289 |     "        self.model = model\n",
290 |     "        self.leads_categorical_columns = leads_categorical_columns\n",
291 |     "        self.leads_numeric_columns = leads_numeric_columns\n",
292 |     "        self.fitted_scaler = fitted_scaler\n",
293 |     "        self.pre_process_leads_data = pre_process_leads_data\n",
294 |     "    \n",
295 |     "    ## define function with processing and feeding data into prediction at the end\n",
296 |     "    def predict(self,context,model_input):\n",
297 |     "        \n",
298 |     "        # make sure all inputted columns are lowercase\n",
299 |     "        model_input.columns = map(str.lower, model_input.columns)\n",
300 |     "        \n",
301 |     "        # run inputted dataset through our processing function\n",
302 |     "        # note: we are excluding the response columns here since not needed for deploy\n",
303 |     "        model_input_processed = self.pre_process_leads_data(\n",
304 |     "                                   df = model_input,\n",
305 |     "                                   numeric_columns = self.leads_numeric_columns,\n",
306 |     "                                   categorical_columns = self.leads_categorical_columns,\n",
307 |     "                                   fitted_scaler = self.fitted_scaler,\n",
308 |     "                                   train_df_columns = self.train_df_columns)       \n",
309 |     "        \n",
310 |     "        # finally input the cleaned/adjusted dataset into our model for prediction\n",
311 |     "        return self.model.predict(model_input_processed)"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "code",
316 |    "execution_count": null,
317 |    "metadata": {},
318 |    "outputs": [],
319 |    "source": [
320 |     "# Testing the prediction class before pushing to MLflow\n",
321 |     "m = leadsModel(train_df_columns = leads_x_train_clean.columns.tolist(),\n",
322 |     "                                  model = rf,\n",
323 |     "                                  leads_categorical_columns = leads_categorical_columns,\n",
324 |     "                                  leads_numeric_columns = leads_numeric_columns,\n",
325 |     "                                  fitted_scaler = scaler,\n",
326 |     "                                  pre_process_leads_data = pre_process_leads_data)\n",
327 |     "model_input = leads_x.head(1)\n",
328 |     "model_output = m.predict(None,model_input)\n",
329 |     "print(model_output)"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": null,
335 |    "metadata": {
336 |     "collapsed": true
337 |    },
338 |    "outputs": [],
339 |    "source": [
340 |     "########################################################\n",
341 |     "### Log Model to MLflow\n",
342 |     "########################################################"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": null,
348 |    "metadata": {
349 |     "collapsed": true
350 |    },
351 |    "outputs": [],
352 |    "source": [
353 |     "# start mlflow run, log parameters, metrics, and the model\n",
354 |     "with mlflow.start_run(run_name=\"Leads Model with Processing\") as run:\n",
355 |     "    # log the parameters that we defined for the model training\n",
356 |     "    mlflow.log_param(\"num_estimators\", num_estimators)\n",
357 |     "    mlflow.log_param(\"min_samples\", min_samples)\n",
358 |     "    \n",
359 |     "    # log the performance metrics that we calculated earlier\n",
360 |     "    mlflow.log_metric(\"accuracy\", accuracy)\n",
361 |     "    mlflow.log_metric(\"auc_score\", auc_score)\n",
362 |     "    \n",
363 |     "    # log model with all objects referenced in the leadsModel class\n",
364 |     "    ml_pyfunc.log_model(\n",
365 |     "        artifact_path = \"leads_pyfunc\",\n",
366 |     "        python_model = leadsModel(train_df_columns = leads_x_train_clean.columns.tolist(),\n",
367 |     "                                  model = rf,\n",
368 |     "                                  leads_categorical_columns = leads_categorical_columns,\n",
369 |     "                                  leads_numeric_columns = leads_numeric_columns,\n",
370 |     "                                  fitted_scaler = scaler,\n",
371 |     "                                  pre_process_leads_data = pre_process_leads_data\n",
372 |     "                                 ),\n",
373 |     "        conda_env = mlflow_conda_env\n",
374 |     "    )\n",
375 |     "    \n",
376 |     "    # save run_id and experiment_id for deployment\n",
377 |     "    run_id = run.info.run_uuid\n",
378 |     "    experiment_id = run.info.experiment_id\n",
379 |     "    \n",
380 |     "    # end the mlflow run!\n",
381 |     "    mlflow.end_run()"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "code",
386 |    "execution_count": null,
387 |    "metadata": {
388 |     "collapsed": true
389 |    },
390 |    "outputs": [],
391 |    "source": [
392 |     "########################################################\n",
393 |     "### Test Local Deployment\n",
394 |     "########################################################"
395 |    ]
396 |   },
397 |   {
398 |    "cell_type": "code",
399 |    "execution_count": null,
400 |    "metadata": {},
401 |    "outputs": [],
402 |    "source": [
403 |     "# Run this command in the same directory as MLflow to kick-off a local sagemaker build\n",
404 |     "\n",
405 |     "sagemaker_local_command = 'mlflow sagemaker run-local -m ./mlruns/{experiment_id}/{run_id}/artifacts/leads_pyfunc -p 5001'. \\\n",
406 |     "    format(experiment_id=experiment_id,run_id=run_id)\n",
407 |     "\n",
408 |     "print(sagemaker_local_command)"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": null,
414 |    "metadata": {
415 |     "collapsed": true
416 |    },
417 |    "outputs": [],
418 |    "source": [
419 |     "# Building a function to test out the locally-build sagemaker container\n",
420 |     "def query_local_endpoint(input_json):\n",
421 |     "    response = requests.post('http://localhost:5001/invocations'\n",
422 |     "                           , headers = {'Content-Type': 'application/json'} \n",
423 |     "                           , data=input_json)\n",
424 |     "    print(response)\n",
425 |     "    preds = response.json()\n",
426 |     "    return preds"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "code",
431 |    "execution_count": null,
432 |    "metadata": {},
433 |    "outputs": [],
434 |    "source": [
435 |     "# Running a query against the local endpoint and examining the output\n",
436 |     "model_input = leads_x.head(1)\n",
437 |     "output=query_local_endpoint(model_input.to_json(orient=\"split\"))\n",
438 |     "print(output)"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "code",
443 |    "execution_count": null,
444 |    "metadata": {
445 |     "collapsed": true
446 |    },
447 |    "outputs": [],
448 |    "source": [
449 |     "########################################################\n",
450 |     "### Deploy Model to Sagemaker\n",
451 |     "########################################################"
452 |    ]
453 |   },
454 |   {
455 |    "cell_type": "code",
456 |    "execution_count": null,
457 |    "metadata": {
458 |     "collapsed": true
459 |    },
460 |    "outputs": [],
461 |    "source": [
462 |     "## Note: this requires a MLflow pyfunc docker container to already exist in sagemaker\n",
463 |     "\n",
464 |     "import mlflow.sagemaker as mfs\n",
465 |     "\n",
466 |     "\n",
467 |     "# we pull the run and experiment id's from above to create this mlflow location\n",
468 |     "model_uri = \"mlruns/%s/%s/artifacts/leads_pyfunc\" % (experiment_id,run_id)\n",
469 |     "\n",
470 |     "# The region is chosen, pick whats close to you or your systems!\n",
471 |     "region = \"us-east-1\"\n",
472 |     "# The aws account id can be found in the console\n",
473 |     "aws_account_id = \"XXXXXXX\"\n",
474 |     "# We use these inputs to automatically reference the sagemaker docker container\n",
475 |     "image_url = aws_account_id \\\n",
476 |     "            + \".dkr.ecr.\" \\\n",
477 |     "            + region \\\n",
478 |     "            + \".amazonaws.com/mlflow-pyfunc:1.5.0\"\n",
479 |     "\n",
480 |     "# now we specify the role that we setup for sagemaker in the previous step\n",
481 |     "sagemaker_arn = \"arn:aws:iam::\"+aws_account_id+\":role/AmazonSageMakerFullAccess\"\n",
482 |     "\n",
483 |     "\n",
484 |     "# finally, we pick a name for our endpoint within sagemaker\n",
485 |     "endpoint_name = \"lead-rf-1\" \n",
486 |     "\n",
487 |     "\n",
488 |     "# with all of the inputs, we run the following to deploy the model it sagemaker\n",
489 |     "mfs.deploy(app_name=endpoint_name, \n",
490 |     "           model_uri=model_uri,\n",
491 |     "           region_name=region,\n",
492 |     "           mode=\"create\", #this should change to replace if the endpoint already exists\n",
493 |     "           execution_role_arn=sagemaker_arn,\n",
494 |     "           image_url=image_url, \n",
495 |     "           instance_type='ml.t2.medium') # smallest/cheapest sagemaker allowed size"
496 |    ]
497 |   },
498 |   {
499 |    "cell_type": "code",
500 |    "execution_count": null,
501 |    "metadata": {
502 |     "collapsed": true
503 |    },
504 |    "outputs": [],
505 |    "source": []
506 |   }
507 |  ],
508 |  "metadata": {
509 |   "kernelspec": {
510 |    "display_name": "lead-scoring",
511 |    "language": "python",
512 |    "name": "lead-scoring"
513 |   },
514 |   "language_info": {
515 |    "codemirror_mode": {
516 |     "name": "ipython",
517 |     "version": 3
518 |    },
519 |    "file_extension": ".py",
520 |    "mimetype": "text/x-python",
521 |    "name": "python",
522 |    "nbconvert_exporter": "python",
523 |    "pygments_lexer": "ipython3",
524 |    "version": "3.8.2"
525 |   }
526 |  },
527 |  "nbformat": 4,
528 |  "nbformat_minor": 2
529 | }
530 | 


--------------------------------------------------------------------------------