├── .DS_Store
├── artifacts
    ├── predictions_df.pkl
    ├── xgb_clf_model.pkl
    ├── xgb_reg_model.pkl
    ├── imp_spend_prob_df.pkl
    └── imp_spend_amount_df.pkl
├── .vscode
    └── settings.json
├── app_commands.txt
├── app_plot.py
├── environment.yml
├── app.py
└── lab_59_customer_ltv.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/business-science/lab_59_cust_lifetime_py/HEAD/.DS_Store


--------------------------------------------------------------------------------
/artifacts/predictions_df.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/business-science/lab_59_cust_lifetime_py/HEAD/artifacts/predictions_df.pkl


--------------------------------------------------------------------------------
/artifacts/xgb_clf_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/business-science/lab_59_cust_lifetime_py/HEAD/artifacts/xgb_clf_model.pkl


--------------------------------------------------------------------------------
/artifacts/xgb_reg_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/business-science/lab_59_cust_lifetime_py/HEAD/artifacts/xgb_reg_model.pkl


--------------------------------------------------------------------------------
/artifacts/imp_spend_prob_df.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/business-science/lab_59_cust_lifetime_py/HEAD/artifacts/imp_spend_prob_df.pkl


--------------------------------------------------------------------------------
/artifacts/imp_spend_amount_df.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/business-science/lab_59_cust_lifetime_py/HEAD/artifacts/imp_spend_amount_df.pkl


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.pythonPath": "/Users/mdancho/opt/anaconda3/envs/lab_59_cust_lifetime_py/bin/python",
3 |     "jupyter.notebookFileRoot": "${workspaceFolder}",
4 |     "jupyter.interactiveWindow.textEditor.executeSelection": true,
5 | }


--------------------------------------------------------------------------------
/app_commands.txt:
--------------------------------------------------------------------------------
1 | # TERMINAL COMMANDS
2 | # Make sure to `cd` to your lab_59_cust_lifetime_py directory
3 | 
4 | conda info --envs
5 | conda activate lab_59_cust_lifetime_py
6 | python app.py
7 | 
8 | # Run this app with `python app.py` and
9 | # visit http://127.0.0.1:8050/ in your web browser.


--------------------------------------------------------------------------------
/app_plot.py:
--------------------------------------------------------------------------------
 1 | import plotly.express as px
 2 | 
 3 | import pandas as pd
 4 | 
 5 | predictions_df = pd.read_pickle('artifacts/predictions_df.pkl')
 6 | 
 7 | df = predictions_df \
 8 |     .assign(
 9 |         spend_actual_vs_pred = lambda x: x['spend_90_total'] - x['pred_spend'] 
10 |     )
11 | 
12 | px.scatter(
13 |     data_frame=df,
14 |     x = 'frequency',
15 |     y = 'pred_prob',
16 |     color = 'spend_actual_vs_pred', 
17 |     color_continuous_midpoint=0, 
18 |     opacity=0.5, 
19 |     color_continuous_scale='IceFire', 
20 |     # trendline='lowess', 
21 |     # trendline_color_override='black'
22 | ) \
23 |     .update_layout(
24 |         {
25 |             'plot_bgcolor': 'white'
26 |         }
27 |     )
28 | 
29 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | # In terminal, run: conda env create -f environment.yml
 2 | # To update, run: conda env update -f environment.yml
 3 | name: lab_59_cust_lifetime_py
 4 | channels:
 5 |   - anaconda
 6 |   - conda-forge
 7 |   - defaults
 8 | dependencies:
 9 |   - python=3.7.1
10 |   - pip
11 |   - pip:
12 |       # Core Data
13 |       - numpy==1.20.2
14 |       - pandas==1.2.2
15 |       - plydata==0.4.3
16 | 
17 |       # Visualization
18 |       - matplotlib==3.3.4
19 |       - plotnine==0.7.1
20 |       - mizani==0.7.2
21 |       - plotly==4.14.3
22 | 
23 |       # Modeling & Machine Learning
24 |       - statsmodels
25 |       - scikit-learn==0.23.2
26 |       - xgboost==0.90
27 |       - sklearn-pandas==2.0.4
28 |       - scikit-misc==0.1.3
29 | 
30 |       # API
31 |       - fastapi==0.63.0
32 |       - uvicorn==0.13.4
33 | 
34 |       # Database
35 |       - sqlalchemy==1.4.7
36 | 
37 |       # Jupyter
38 |       - jupyterlab==3.0.13
39 |       - jupyterlab-server==2.4.0
40 |       - jupyter-packaging==0.7.12
41 |       - jupyter-server==1.6.1 # Solves ImportError: cannot import name 'get_version_info' from 'jupyter_packaging'
42 |       - ipywidgets==7.6.3
43 |       - ipympl==0.7.0
44 |       - jupytext
45 |       - papermill==2.3.3
46 |       - nbconvert==5.6.1
47 | 
48 |       # Apps
49 |       - streamlit==0.80.0
50 |       - dash==1.20.0
51 |       - dash_bootstrap_components==0.12.2
52 | 
53 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | # LL PRO BONUS: R SHINY APPLICATION ----
  2 | # BUSINESS SCIENCE LEARNING LABS ----
  3 | # LAB 59: CUSTOMER LIFETIME VALUE | PYTHON DASH ----
  4 | # ----
  5 | 
  6 | # LIBRARIES
  7 | 
  8 | import dash
  9 | import dash_core_components as dcc
 10 | import dash_html_components as html
 11 | from dash.dependencies import Input, Output, State
 12 | 
 13 | import dash_bootstrap_components as dbc
 14 | 
 15 | import plotly.express as px
 16 | 
 17 | import pandas as pd
 18 | import numpy as np
 19 | 
 20 | import pathlib
 21 | 
 22 | # APP SETUP
 23 | external_stylesheets = [dbc.themes.CYBORG]
 24 | app = dash.Dash(
 25 |     __name__, 
 26 |     external_stylesheets=external_stylesheets
 27 | )
 28 | 
 29 | PLOT_BACKGROUND = 'rgba(0,0,0,0)'
 30 | PLOT_FONT_COLOR = 'white'
 31 | LOGO = "https://www.business-science.io/img/business-science-logo.png"
 32 | 
 33 | # PATHS
 34 | BASE_PATH = pathlib.Path(__file__).parent.resolve()
 35 | ART_PATH = BASE_PATH.joinpath("artifacts").resolve()
 36 | 
 37 | # DATA
 38 | predictions_df = pd.read_pickle(ART_PATH.joinpath("predictions_df.pkl"))
 39 | 
 40 | df = predictions_df \
 41 |     .assign(
 42 |         spend_actual_vs_pred = lambda x: x['spend_90_total'] - x['pred_spend'] 
 43 |     )
 44 | 
 45 | # LAYOUT
 46 | 
 47 | # Slider Marks
 48 | x = np.linspace(df['spend_actual_vs_pred'].min(), df['spend_actual_vs_pred'].max(), 10, dtype=int)
 49 | x = x.round(0)
 50 | 
 51 | navbar = dbc.Navbar(
 52 |     [
 53 |         html.A(
 54 |             # Use row and col to control vertical alignment of logo / brand
 55 |             dbc.Row(
 56 |                 [
 57 |                     dbc.Col(html.Img(src=LOGO, height="30px")),
 58 |                     dbc.Col(dbc.NavbarBrand("Customer Spend Prediction", className="ml-2")),
 59 |                 ],
 60 |                 align="center",
 61 |                 no_gutters=True,
 62 |             ),
 63 |             href="https://www.business-science.io/",
 64 |         ),
 65 |         dbc.NavbarToggler(id="navbar-toggler", n_clicks=0),
 66 |         dbc.Collapse(
 67 |             id="navbar-collapse", navbar=True, is_open=False
 68 |         ),
 69 |     ],
 70 |     color="dark",
 71 |     dark=True,
 72 | )
 73 | 
 74 | app.layout = html.Div(
 75 |     children = [
 76 |         navbar, 
 77 |         dbc.Row(
 78 |             [
 79 |                 dbc.Col(
 80 |                     [
 81 |                         
 82 |                         html.H3("Welcome to the Customer Analytics Dashboard"),
 83 |                         html.Div(
 84 |                             id="intro",
 85 |                             children="Explore Customers by Predicted Spend versus Actual Spend during the 90-day evaluation period.",
 86 |                         ),
 87 |                         html.Br(),
 88 |                         html.Hr(),
 89 |                         html.H5("Spend Actual vs Predicted"),
 90 |                         html.P("Segment Customers that were predicted to spend but didn't. Then target these customers with targeted emails."),
 91 |                         dcc.Slider(
 92 |                             id    = 'spend-slider', 
 93 |                             value = df['spend_actual_vs_pred'].max(),
 94 |                             max   = df['spend_actual_vs_pred'].max(),
 95 |                             min   = df['spend_actual_vs_pred'].min(), 
 96 |                             marks = {i: '$'+str(i) for i in range(x[0],x[-1]) if i % 300 == 0}
 97 |                         ),
 98 |                         html.Br(),
 99 |                         html.Button("Download Segmentation", id="btn"), dcc.Download(id="download")
100 |                     ],
101 |                     width = 3,
102 |                     style={'margin':'10px'}
103 |                 ),
104 |                 dbc.Col(
105 |                     dcc.Graph(id='graph-slider'),
106 |                     width = 8
107 |                 )
108 |             ] 
109 |         )
110 |     ]
111 | )
112 | 
113 | # CALLBACKS 
114 | @app.callback(
115 |     Output('graph-slider', 'figure'),
116 |     Input('spend-slider', 'value'))
117 | def update_figure(spend_delta_max):
118 |     
119 |     df_filtered = df[df['spend_actual_vs_pred'] <= spend_delta_max]
120 | 
121 |     fig = px.scatter(
122 |         data_frame=df_filtered,
123 |         x = 'frequency',
124 |         y = 'pred_prob',
125 |         color = 'spend_actual_vs_pred', 
126 |         color_continuous_midpoint=0, 
127 |         opacity=0.5, 
128 |         color_continuous_scale='IceFire', 
129 |         hover_name='customer_id',
130 |         hover_data=['spend_90_total', 'pred_spend'],
131 |     ) \
132 |         .update_layout(
133 |             {
134 |                 'plot_bgcolor': PLOT_BACKGROUND,
135 |                 'paper_bgcolor':PLOT_BACKGROUND,
136 |                 'font_color': PLOT_FONT_COLOR,
137 |                 'height':700
138 |             }
139 |         ) \
140 |         .update_traces(
141 |             marker = dict(size = 12)
142 |         )
143 |     
144 |     return fig
145 | 
146 | # Download Button
147 | @app.callback(
148 |     Output("download", "data"), 
149 |     Input("btn", "n_clicks"), 
150 |     State('spend-slider', 'value'),
151 |     prevent_initial_call=True,
152 | )
153 | def func(n_clicks, spend_delta_max):
154 |     
155 |     df_filtered = df[df['spend_actual_vs_pred'] <= spend_delta_max]
156 | 
157 |     return dcc.send_data_frame(df_filtered.to_csv, "customer_segmentation.csv")
158 | 
159 | # Navbar
160 | @app.callback(
161 |     Output("navbar-collapse", "is_open"),
162 |     [Input("navbar-toggler", "n_clicks")],
163 |     [State("navbar-collapse", "is_open")],
164 | )
165 | def toggle_navbar_collapse(n, is_open):
166 |     if n:
167 |         return not is_open
168 |     return is_open
169 | 
170 | if __name__ == '__main__':
171 |     app.run_server(debug=True)


--------------------------------------------------------------------------------
/lab_59_customer_ltv.py:
--------------------------------------------------------------------------------
  1 | # BUSINESS SCIENCE LEARNING LABS ----
  2 | # LAB 59: CUSTOMER LIFETIME VALUE ----
  3 | # CUSTOMER LIFETIME VALUE WITH MACHINE LEARNING ----
  4 | # **** ----
  5 | 
  6 | # CONDA ENV USED: lab_59_customer_ltv_py
  7 | 
  8 | # LIBRARIES ----
  9 | import pandas as pd
 10 | import numpy as np
 11 | import joblib 
 12 | 
 13 | import plydata.cat_tools as cat
 14 | import plotnine as pn
 15 | 
 16 | from xgboost import XGBClassifier, XGBRegressor
 17 | from sklearn.model_selection import GridSearchCV
 18 | 
 19 | pn.options.dpi = 300
 20 | 
 21 | 
 22 | # 1.0 DATA PREPARATION ----
 23 | 
 24 | cdnow_raw_df = pd.read_csv(
 25 |     "data/CDNOW_master.txt", 
 26 |     sep   = "\s+",
 27 |     names = ["customer_id", "date", "quantity", "price"]
 28 | )
 29 | 
 30 | cdnow_raw_df.info()
 31 | 
 32 | cdnow_df = cdnow_raw_df \
 33 |     .assign(
 34 |         date = lambda x: x['date'].astype(str)
 35 |     ) \
 36 |     .assign(
 37 |         date = lambda x: pd.to_datetime(x['date'])
 38 |     ) \
 39 |     .dropna()
 40 | 
 41 | cdnow_df.info()
 42 | 
 43 | # 2.0 COHORT ANALYSIS ----
 44 | # - Only the customers that have joined at the specific business day
 45 | 
 46 | # Get Range of Initial Purchases ----
 47 | cdnow_first_purchase_tbl = cdnow_df \
 48 |     .sort_values(['customer_id', 'date']) \
 49 |     .groupby('customer_id') \
 50 |     .first()
 51 | 
 52 | cdnow_first_purchase_tbl
 53 | 
 54 | cdnow_first_purchase_tbl['date'].min()
 55 | 
 56 | cdnow_first_purchase_tbl['date'].max()
 57 | 
 58 | # Visualize: All purchases within cohort
 59 | 
 60 | cdnow_df \
 61 |     .reset_index() \
 62 |     .set_index('date') \
 63 |     [['price']] \
 64 |     .resample(
 65 |         rule = "MS"
 66 |     ) \
 67 |     .sum() \
 68 |     .plot()
 69 | 
 70 | # Visualize: Individual Customer Purchases
 71 | 
 72 | ids = cdnow_df['customer_id'].unique()
 73 | ids_selected = ids[0:10]
 74 | 
 75 | cdnow_cust_id_subset_df = cdnow_df \
 76 |     [cdnow_df['customer_id'].isin(ids_selected)] \
 77 |     .groupby(['customer_id', 'date']) \
 78 |     .sum() \
 79 |     .reset_index()
 80 | 
 81 | pn.ggplot(
 82 |     pn.aes('date', 'price', group = 'customer_id'),
 83 |     data = cdnow_cust_id_subset_df
 84 | ) \
 85 |     + pn.geom_line() \
 86 |     + pn.geom_point() \
 87 |     + pn.facet_wrap('customer_id') \
 88 |     + pn.scale_x_date(
 89 |         date_breaks = "1 year",
 90 |         date_labels = "%Y"
 91 |     )
 92 | 
 93 | 
 94 | # 3.0 MACHINE LEARNING ----
 95 | #  Frame the problem:
 96 | #  - What will the customers spend in the next 90-Days? (Regression)
 97 | #  - What is the probability of a customer to make a purchase in next 90-days? (Classification)
 98 | 
 99 | 
100 | # 3.1 TIME SPLITTING (STAGE 1) ----
101 | 
102 | n_days   = 90
103 | max_date = cdnow_df['date'].max() 
104 | cutoff   = max_date - pd.to_timedelta(n_days, unit = "d")
105 | 
106 | temporal_in_df = cdnow_df \
107 |     [cdnow_df['date'] <= cutoff]
108 | 
109 | temporal_out_df = cdnow_df \
110 |     [cdnow_df['date'] > cutoff]
111 | 
112 | 
113 | # 3.2 FEATURE ENGINEERING (RFM) ----
114 | #   - Most challenging part
115 | #   - 2-Stage Process
116 | #   - Need to frame the problem
117 | #   - Need to think about what features to include
118 | 
119 | # Make Targets from out data ----
120 | 
121 | targets_df = temporal_out_df \
122 |     .drop('quantity', axis=1) \
123 |     .groupby('customer_id') \
124 |     .sum() \
125 |     .rename({'price': 'spend_90_total'}, axis = 1) \
126 |     .assign(spend_90_flag = 1)
127 | 
128 | # Make Recency (Date) Features from in data ----
129 | 
130 | max_date = temporal_in_df['date'].max()
131 | 
132 | recency_features_df = temporal_in_df \
133 |     [['customer_id', 'date']] \
134 |     .groupby('customer_id') \
135 |     .apply(
136 |         lambda x: (x['date'].max() - max_date) / pd.to_timedelta(1, "day")
137 |     ) \
138 |     .to_frame() \
139 |     .set_axis(["recency"], axis=1)
140 | 
141 | recency_features_df
142 | 
143 | # Make Frequency (Count) Features from in data ----
144 | 
145 | frequency_features_df = temporal_in_df \
146 |     [['customer_id', 'date']] \
147 |     .groupby('customer_id') \
148 |     .count() \
149 |     .set_axis(['frequency'], axis=1)
150 | 
151 | frequency_features_df
152 | 
153 | # Make Price (Monetary) Features from in data ----
154 | 
155 | price_features_df = temporal_in_df \
156 |     .groupby('customer_id') \
157 |     .aggregate(
158 |         {
159 |             'price': ["sum", "mean"]
160 |         }
161 |     ) \
162 |     .set_axis(['price_sum', 'price_mean'], axis = 1)
163 | 
164 | price_features_df
165 | 
166 | # 3.3 COMBINE FEATURES ----
167 | 
168 | features_df = pd.concat(
169 |     [recency_features_df, frequency_features_df, price_features_df], axis = 1
170 | ) \
171 |     .merge(
172 |         targets_df, 
173 |         left_index  = True, 
174 |         right_index = True, 
175 |         how         = "left"
176 |     ) \
177 |     .fillna(0)
178 | 
179 | # 4.0 MACHINE LEARNING -----
180 | 
181 | from xgboost import XGBClassifier, XGBRegressor
182 | 
183 | from sklearn.model_selection import GridSearchCV
184 | 
185 | X = features_df[['recency', 'frequency', 'price_sum', 'price_mean']]
186 | 
187 | # 4.1 NEXT 90-DAY SPEND PREDICTION ----
188 | 
189 | y_spend = features_df['spend_90_total']
190 | 
191 | xgb_reg_spec = XGBRegressor(
192 |     objective="reg:squarederror",   
193 |     random_state=123
194 | )
195 | 
196 | xgb_reg_model = GridSearchCV(
197 |     estimator=xgb_reg_spec, 
198 |     param_grid=dict(
199 |         learning_rate = [0.01, 0.1, 0.3, 0.5]
200 |     ),
201 |     scoring = 'neg_mean_absolute_error',
202 |     refit   = True,
203 |     cv      = 5
204 | )
205 | 
206 | xgb_reg_model.fit(X, y_spend)
207 | 
208 | xgb_reg_model.best_score_
209 | 
210 | xgb_reg_model.best_params_
211 | 
212 | xgb_reg_model.best_estimator_
213 | 
214 | predictions_reg = xgb_reg_model.predict(X)
215 | 
216 | 
217 | # 4.2 NEXT 90-DAY SPEND PROBABILITY ----
218 | 
219 | y_prob = features_df['spend_90_flag']
220 | 
221 | xgb_clf_spec = XGBClassifier(
222 |     objective    = "binary:logistic",   
223 |     random_state = 123
224 | )
225 | 
226 | xgb_clf_model = GridSearchCV(
227 |     estimator=xgb_clf_spec, 
228 |     param_grid=dict(
229 |         learning_rate = [0.01, 0.1, 0.3, 0.5]
230 |     ),
231 |     scoring = 'roc_auc',
232 |     refit   = True,
233 |     cv      = 5
234 | )
235 | 
236 | xgb_clf_model.fit(X, y_prob)
237 | 
238 | xgb_clf_model.best_score_
239 | 
240 | xgb_clf_model.best_params_
241 | 
242 | xgb_clf_model.best_estimator_
243 | 
244 | predictions_clf = xgb_clf_model.predict_proba(X)
245 | 
246 | # 4.3 FEATURE IMPORTANCE (GLOBAL) ----
247 | 
248 | # Importance | Spend Amount Model
249 | imp_spend_amount_dict = xgb_reg_model \
250 |     .best_estimator_ \
251 |     .get_booster() \
252 |     .get_score(importance_type = 'gain') 
253 | 
254 | imp_spend_amount_df = pd.DataFrame(
255 |     data  = {
256 |         'feature':list(imp_spend_amount_dict.keys()),
257 |         'value':list(imp_spend_amount_dict.values())
258 |     }
259 | ) \
260 |     .assign(
261 |         feature = lambda x: cat.cat_reorder(x['feature'] , x['value'])
262 |     )
263 | 
264 | pn.ggplot(
265 |     pn.aes('feature', 'value'),
266 |     data = imp_spend_amount_df
267 | ) \
268 |     + pn.geom_col() \
269 |     + pn.coord_flip()
270 | 
271 | # Importance | Spend Probability Model
272 | imp_spend_prob_dict = xgb_clf_model \
273 |     .best_estimator_ \
274 |     .get_booster() \
275 |     .get_score(importance_type = 'gain') 
276 | 
277 | imp_spend_prob_df = pd.DataFrame(
278 |     data  = {
279 |         'feature':list(imp_spend_prob_dict.keys()),
280 |         'value':list(imp_spend_prob_dict.values())
281 |     }
282 | ) \
283 |     .assign(
284 |         feature = lambda x: cat.cat_reorder(x['feature'] , x['value'])
285 |     )
286 | 
287 | pn.ggplot(
288 |     pn.aes('feature', 'value'),
289 |     data = imp_spend_prob_df
290 | ) \
291 |     + pn.geom_col() \
292 |     + pn.coord_flip() 
293 | 
294 | # 5.0 SAVE WORK ----
295 | 
296 | # Save Predictions
297 | predictions_df = pd.concat(
298 |     [
299 |         pd.DataFrame(predictions_reg).set_axis(['pred_spend'], axis=1),
300 |         pd.DataFrame(predictions_clf)[[1]].set_axis(['pred_prob'], axis=1),
301 |         features_df.reset_index()
302 |     ], 
303 |     axis=1
304 | )
305 | 
306 | predictions_df
307 | 
308 | predictions_df.to_pickle("artifacts/predictions_df.pkl")
309 | 
310 | pd.read_pickle('artifacts/predictions_df.pkl')
311 | 
312 | # Save Importance
313 | imp_spend_amount_df.to_pickle("artifacts/imp_spend_amount_df.pkl")
314 | imp_spend_prob_df.to_pickle("artifacts/imp_spend_prob_df.pkl")
315 | 
316 | pd.read_pickle("artifacts/imp_spend_amount_df.pkl")
317 | 
318 | # Save Models
319 | joblib.dump(xgb_reg_model, 'artifacts/xgb_reg_model.pkl')
320 | joblib.dump(xgb_clf_model, 'artifacts/xgb_clf_model.pkl')
321 | 
322 | model = joblib.load('artifacts/xgb_reg_model.pkl')
323 | model.predict(X)
324 | 
325 | 
326 | # 6.0 HOW CAN WE USE THIS INFORMATION ---- 
327 | 
328 | # 6.1 Which customers have the highest spend probability in next 90-days? 
329 | #     - Target for new products similar to what they have purchased in the past
330 | 
331 | predictions_df \
332 |     .sort_values('pred_prob', ascending=False)
333 | 
334 | # 6.2 Which customers have recently purchased but are unlikely to buy? 
335 | #    - Incentivize actions to increase probability
336 | #    - Provide discounts, encourage referring a friend, nurture by letting them know what's coming
337 | 
338 | predictions_df \
339 |     [
340 |         predictions_df['recency'] > -90
341 |     ] \
342 |     [
343 |         predictions_df['pred_prob'] < 0.20
344 |     ] \
345 |     .sort_values('pred_prob', ascending=False)
346 | 
347 | 
348 | # 6.3 Missed opportunities: Big spenders that could be unlocked ----
349 | #    - Send bundle offers encouraging volume purchases
350 | #    - Focus on missed opportunities
351 | 
352 | predictions_df \
353 |     [
354 |         predictions_df['spend_90_total'] == 0.0
355 |     ] \
356 |     .sort_values('pred_spend', ascending=False) 
357 | 
358 | # 7.0 NEXT STEPS ----
359 | # - It's really exciting what you can do with Machine Learning.
360 | #   Very powerful. But you have to put in the work.
361 | 
362 | # - Learning Data Wrangling, Modeling, and Visualization (101)
363 | # - Model Improvement (Coming Soon):
364 | #   - Algorithms (201-P)
365 | #   - AutoML (201-P)
366 | #   - Hyper Parameter Tuning (201-P)
367 | # - Forecasting: When will customers purchase? (TBD)
368 | # - Web Applications, API's & Production (202-P)
369 | 


--------------------------------------------------------------------------------