├── PyCaret Apr 27 2021.pptx
├── README.md
├── demo4.py
├── AirPassengers.csv
└── Demo 4 - Scoring from MLFlow backend.ipynb
/PyCaret Apr 27 2021.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pycaret/pycaret-demo-mlflow/HEAD/PyCaret Apr 27 2021.pptx
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
--------------------------------------------------------------------------------
/demo4.py:
--------------------------------------------------------------------------------
1 | from pycaret.regression import *
2 | from pycaret.datasets import get_data
3 | import mlflow
4 |
5 | def run():
6 | data = get_data('insurance')
7 | mlflow.set_tracking_uri("sqlite:///mlruns.db")
8 | s = setup(data, target = 'charges', session_id = 123, silent = True,
9 | log_experiment = True, experiment_name = 'insurance_demo4', log_plots = True)
10 | models = ['lr', 'dt', 'lightgbm', 'rf']
11 | all_models = [create_model(i) for i in models]
12 |
13 | if __name__ == "__main__":
14 | run()
15 |
16 | # run mlflow backend
17 | # mlflow ui --backend-store-uri sqlite:///mlruns.db
--------------------------------------------------------------------------------
/AirPassengers.csv:
--------------------------------------------------------------------------------
1 | Date,Passengers
2 | 1949-01-01,112
3 | 1949-02-01,118
4 | 1949-03-01,132
5 | 1949-04-01,129
6 | 1949-05-01,121
7 | 1949-06-01,135
8 | 1949-07-01,148
9 | 1949-08-01,148
10 | 1949-09-01,136
11 | 1949-10-01,119
12 | 1949-11-01,104
13 | 1949-12-01,118
14 | 1950-01-01,115
15 | 1950-02-01,126
16 | 1950-03-01,141
17 | 1950-04-01,135
18 | 1950-05-01,125
19 | 1950-06-01,149
20 | 1950-07-01,170
21 | 1950-08-01,170
22 | 1950-09-01,158
23 | 1950-10-01,133
24 | 1950-11-01,114
25 | 1950-12-01,140
26 | 1951-01-01,145
27 | 1951-02-01,150
28 | 1951-03-01,178
29 | 1951-04-01,163
30 | 1951-05-01,172
31 | 1951-06-01,178
32 | 1951-07-01,199
33 | 1951-08-01,199
34 | 1951-09-01,184
35 | 1951-10-01,162
36 | 1951-11-01,146
37 | 1951-12-01,166
38 | 1952-01-01,171
39 | 1952-02-01,180
40 | 1952-03-01,193
41 | 1952-04-01,181
42 | 1952-05-01,183
43 | 1952-06-01,218
44 | 1952-07-01,230
45 | 1952-08-01,242
46 | 1952-09-01,209
47 | 1952-10-01,191
48 | 1952-11-01,172
49 | 1952-12-01,194
50 | 1953-01-01,196
51 | 1953-02-01,196
52 | 1953-03-01,236
53 | 1953-04-01,235
54 | 1953-05-01,229
55 | 1953-06-01,243
56 | 1953-07-01,264
57 | 1953-08-01,272
58 | 1953-09-01,237
59 | 1953-10-01,211
60 | 1953-11-01,180
61 | 1953-12-01,201
62 | 1954-01-01,204
63 | 1954-02-01,188
64 | 1954-03-01,235
65 | 1954-04-01,227
66 | 1954-05-01,234
67 | 1954-06-01,264
68 | 1954-07-01,302
69 | 1954-08-01,293
70 | 1954-09-01,259
71 | 1954-10-01,229
72 | 1954-11-01,203
73 | 1954-12-01,229
74 | 1955-01-01,242
75 | 1955-02-01,233
76 | 1955-03-01,267
77 | 1955-04-01,269
78 | 1955-05-01,270
79 | 1955-06-01,315
80 | 1955-07-01,364
81 | 1955-08-01,347
82 | 1955-09-01,312
83 | 1955-10-01,274
84 | 1955-11-01,237
85 | 1955-12-01,278
86 | 1956-01-01,284
87 | 1956-02-01,277
88 | 1956-03-01,317
89 | 1956-04-01,313
90 | 1956-05-01,318
91 | 1956-06-01,374
92 | 1956-07-01,413
93 | 1956-08-01,405
94 | 1956-09-01,355
95 | 1956-10-01,306
96 | 1956-11-01,271
97 | 1956-12-01,306
98 | 1957-01-01,315
99 | 1957-02-01,301
100 | 1957-03-01,356
101 | 1957-04-01,348
102 | 1957-05-01,355
103 | 1957-06-01,422
104 | 1957-07-01,465
105 | 1957-08-01,467
106 | 1957-09-01,404
107 | 1957-10-01,347
108 | 1957-11-01,305
109 | 1957-12-01,336
110 | 1958-01-01,340
111 | 1958-02-01,318
112 | 1958-03-01,362
113 | 1958-04-01,348
114 | 1958-05-01,363
115 | 1958-06-01,435
116 | 1958-07-01,491
117 | 1958-08-01,505
118 | 1958-09-01,404
119 | 1958-10-01,359
120 | 1958-11-01,310
121 | 1958-12-01,337
122 | 1959-01-01,360
123 | 1959-02-01,342
124 | 1959-03-01,406
125 | 1959-04-01,396
126 | 1959-05-01,420
127 | 1959-06-01,472
128 | 1959-07-01,548
129 | 1959-08-01,559
130 | 1959-09-01,463
131 | 1959-10-01,407
132 | 1959-11-01,362
133 | 1959-12-01,405
134 | 1960-01-01,417
135 | 1960-02-01,391
136 | 1960-03-01,419
137 | 1960-04-01,461
138 | 1960-05-01,472
139 | 1960-06-01,535
140 | 1960-07-01,622
141 | 1960-08-01,606
142 | 1960-09-01,508
143 | 1960-10-01,461
144 | 1960-11-01,390
145 | 1960-12-01,432
146 |
--------------------------------------------------------------------------------
/Demo 4 - Scoring from MLFlow backend.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 👉 Use MLFlow API for Scoring"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Dataset For Scoring"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {},
21 | "outputs": [
22 | {
23 | "data": {
24 | "text/html": [
25 | "
\n",
26 | "\n",
39 | "
\n",
40 | " \n",
41 | " \n",
42 | " | \n",
43 | " age | \n",
44 | " sex | \n",
45 | " bmi | \n",
46 | " children | \n",
47 | " smoker | \n",
48 | " region | \n",
49 | "
\n",
50 | " \n",
51 | " \n",
52 | " \n",
53 | " | 0 | \n",
54 | " 19 | \n",
55 | " female | \n",
56 | " 27.900 | \n",
57 | " 0 | \n",
58 | " yes | \n",
59 | " southwest | \n",
60 | "
\n",
61 | " \n",
62 | " | 1 | \n",
63 | " 18 | \n",
64 | " male | \n",
65 | " 33.770 | \n",
66 | " 1 | \n",
67 | " no | \n",
68 | " southeast | \n",
69 | "
\n",
70 | " \n",
71 | " | 2 | \n",
72 | " 28 | \n",
73 | " male | \n",
74 | " 33.000 | \n",
75 | " 3 | \n",
76 | " no | \n",
77 | " southeast | \n",
78 | "
\n",
79 | " \n",
80 | " | 3 | \n",
81 | " 33 | \n",
82 | " male | \n",
83 | " 22.705 | \n",
84 | " 0 | \n",
85 | " no | \n",
86 | " northwest | \n",
87 | "
\n",
88 | " \n",
89 | " | 4 | \n",
90 | " 32 | \n",
91 | " male | \n",
92 | " 28.880 | \n",
93 | " 0 | \n",
94 | " no | \n",
95 | " northwest | \n",
96 | "
\n",
97 | " \n",
98 | "
\n",
99 | "
"
100 | ],
101 | "text/plain": [
102 | " age sex bmi children smoker region\n",
103 | "0 19 female 27.900 0 yes southwest\n",
104 | "1 18 male 33.770 1 no southeast\n",
105 | "2 28 male 33.000 3 no southeast\n",
106 | "3 33 male 22.705 0 no northwest\n",
107 | "4 32 male 28.880 0 no northwest"
108 | ]
109 | },
110 | "execution_count": 1,
111 | "metadata": {},
112 | "output_type": "execute_result"
113 | }
114 | ],
115 | "source": [
116 | "from pycaret.datasets import get_data\n",
117 | "data = get_data('insurance', verbose=False)\n",
118 | "data.drop('charges', axis=1, inplace=True)\n",
119 | "data.head()"
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "## Define MLFlow Scoring Function"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 2,
132 | "metadata": {},
133 | "outputs": [],
134 | "source": [
135 | "import mlflow\n",
136 | "\n",
137 | "def score_model(data, model_name, model_version):\n",
138 | " \n",
139 | " mlflow.set_tracking_uri(\"sqlite:///mlruns.db\")\n",
140 | " model_uri = \"models:/{}/{}\".format(model_name, model_version)\n",
141 | " model = mlflow.pyfunc.load_model(model_uri)\n",
142 | " return model.predict(data)"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 3,
148 | "metadata": {},
149 | "outputs": [
150 | {
151 | "data": {
152 | "text/plain": [
153 | "array([18894.26007319, 3698.2875344 , 6029.27157845, ...,\n",
154 | " 2442.28835297, 2613.21866387, 28782.04091164])"
155 | ]
156 | },
157 | "execution_count": 3,
158 | "metadata": {},
159 | "output_type": "execute_result"
160 | }
161 | ],
162 | "source": [
163 | "score_model(data, 'my_first_model', 1)"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 4,
169 | "metadata": {},
170 | "outputs": [
171 | {
172 | "data": {
173 | "text/plain": [
174 | "array([16831.03017578, 2908.97255127, 4915.3686792 , ...,\n",
175 | " 2035.52375305, 2061.27198486, 28688.36953125])"
176 | ]
177 | },
178 | "execution_count": 4,
179 | "metadata": {},
180 | "output_type": "execute_result"
181 | }
182 | ],
183 | "source": [
184 | "score_model(data, 'my_first_model', 2)"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 5,
190 | "metadata": {},
191 | "outputs": [
192 | {
193 | "data": {
194 | "text/plain": [
195 | "array([25119.76880225, 4104.82323122, 6043.30438595, ...,\n",
196 | " 4195.51717011, 1111.38184836, 36858.02738907])"
197 | ]
198 | },
199 | "execution_count": 5,
200 | "metadata": {},
201 | "output_type": "execute_result"
202 | }
203 | ],
204 | "source": [
205 | "score_model(data, 'my_first_model', 3)"
206 | ]
207 | },
208 | {
209 | "cell_type": "markdown",
210 | "metadata": {},
211 | "source": [
212 | "# 👉 Alteratively ..."
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 6,
218 | "metadata": {},
219 | "outputs": [
220 | {
221 | "name": "stdout",
222 | "output_type": "stream",
223 | "text": [
224 | "Transformation Pipeline and Model Successfully Loaded\n"
225 | ]
226 | }
227 | ],
228 | "source": [
229 | "from pycaret.regression import load_model, predict_model\n",
230 | "l = load_model('mlruns/1/c391ad05516442eb85c5defd46c27931/artifacts/model/model')"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": 7,
236 | "metadata": {},
237 | "outputs": [
238 | {
239 | "name": "stdout",
240 | "output_type": "stream",
241 | "text": [
242 | "Pipeline(steps=[('dtypes',\n",
243 | " DataTypes_Auto_infer(display_types=False,\n",
244 | " ml_usecase='regression',\n",
245 | " target='charges')),\n",
246 | " ('imputer',\n",
247 | " Simple_Imputer(categorical_strategy='not_available',\n",
248 | " fill_value_categorical=None,\n",
249 | " fill_value_numerical=None,\n",
250 | " numeric_strategy='mean',\n",
251 | " target_variable=None)),\n",
252 | " ('new_levels1',\n",
253 | " New_Catagorical_Levels_in_TestData(replacement_strategy='leas...\n",
254 | " ('binn', 'passthrough'), ('rem_outliers', 'passthrough'),\n",
255 | " ('cluster_all', 'passthrough'),\n",
256 | " ('dummy', Dummify(target='charges')),\n",
257 | " ('fix_perfect', Remove_100(target='charges')),\n",
258 | " ('clean_names', Clean_Colum_Names()),\n",
259 | " ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),\n",
260 | " ('dfs', 'passthrough'), ('pca', 'passthrough'),\n",
261 | " ['trained_model', GradientBoostingRegressor(random_state=123)]])\n"
262 | ]
263 | }
264 | ],
265 | "source": [
266 | "print(l)"
267 | ]
268 | },
269 | {
270 | "cell_type": "code",
271 | "execution_count": 8,
272 | "metadata": {},
273 | "outputs": [
274 | {
275 | "data": {
276 | "text/html": [
277 | "\n",
278 | "\n",
291 | "
\n",
292 | " \n",
293 | " \n",
294 | " | \n",
295 | " age | \n",
296 | " sex | \n",
297 | " bmi | \n",
298 | " children | \n",
299 | " smoker | \n",
300 | " region | \n",
301 | " Label | \n",
302 | "
\n",
303 | " \n",
304 | " \n",
305 | " \n",
306 | " | 0 | \n",
307 | " 19 | \n",
308 | " female | \n",
309 | " 27.900 | \n",
310 | " 0 | \n",
311 | " yes | \n",
312 | " southwest | \n",
313 | " 18894.260073 | \n",
314 | "
\n",
315 | " \n",
316 | " | 1 | \n",
317 | " 18 | \n",
318 | " male | \n",
319 | " 33.770 | \n",
320 | " 1 | \n",
321 | " no | \n",
322 | " southeast | \n",
323 | " 3698.287534 | \n",
324 | "
\n",
325 | " \n",
326 | " | 2 | \n",
327 | " 28 | \n",
328 | " male | \n",
329 | " 33.000 | \n",
330 | " 3 | \n",
331 | " no | \n",
332 | " southeast | \n",
333 | " 6029.271578 | \n",
334 | "
\n",
335 | " \n",
336 | " | 3 | \n",
337 | " 33 | \n",
338 | " male | \n",
339 | " 22.705 | \n",
340 | " 0 | \n",
341 | " no | \n",
342 | " northwest | \n",
343 | " 8958.189116 | \n",
344 | "
\n",
345 | " \n",
346 | " | 4 | \n",
347 | " 32 | \n",
348 | " male | \n",
349 | " 28.880 | \n",
350 | " 0 | \n",
351 | " no | \n",
352 | " northwest | \n",
353 | " 3900.039002 | \n",
354 | "
\n",
355 | " \n",
356 | "
\n",
357 | "
"
358 | ],
359 | "text/plain": [
360 | " age sex bmi children smoker region Label\n",
361 | "0 19 female 27.900 0 yes southwest 18894.260073\n",
362 | "1 18 male 33.770 1 no southeast 3698.287534\n",
363 | "2 28 male 33.000 3 no southeast 6029.271578\n",
364 | "3 33 male 22.705 0 no northwest 8958.189116\n",
365 | "4 32 male 28.880 0 no northwest 3900.039002"
366 | ]
367 | },
368 | "execution_count": 8,
369 | "metadata": {},
370 | "output_type": "execute_result"
371 | }
372 | ],
373 | "source": [
374 | "predictions = predict_model(l, data=data)\n",
375 | "predictions.head()"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": null,
381 | "metadata": {},
382 | "outputs": [],
383 | "source": []
384 | }
385 | ],
386 | "metadata": {
387 | "kernelspec": {
388 | "display_name": "pycaret-dev",
389 | "language": "python",
390 | "name": "pycaret-dev"
391 | },
392 | "language_info": {
393 | "codemirror_mode": {
394 | "name": "ipython",
395 | "version": 3
396 | },
397 | "file_extension": ".py",
398 | "mimetype": "text/x-python",
399 | "name": "python",
400 | "nbconvert_exporter": "python",
401 | "pygments_lexer": "ipython3",
402 | "version": "3.6.10"
403 | }
404 | },
405 | "nbformat": 4,
406 | "nbformat_minor": 2
407 | }
408 |
--------------------------------------------------------------------------------