├── AWS_SageMaker.ipynb
└── README.md
/AWS_SageMaker.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "view-in-github",
7 | "colab_type": "text"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "id": "8c436963-afa1-454c-9275-a1ca109a4cbe",
17 | "metadata": {
18 | "tags": [],
19 | "id": "8c436963-afa1-454c-9275-a1ca109a4cbe"
20 | },
21 | "outputs": [],
22 | "source": [
23 | "import pandas as pd\n",
24 | "import numpy as np\n",
25 | "import matplotlib.pyplot as plt\n",
26 | "import sagemaker\n",
27 | "import boto3"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "id": "7721d237-76df-47c2-a947-175a55c5f88f",
34 | "metadata": {
35 | "tags": [],
36 | "id": "7721d237-76df-47c2-a947-175a55c5f88f",
37 | "outputId": "0776a493-d9c4-496b-b42b-eff885a8c41b"
38 | },
39 | "outputs": [
40 | {
41 | "name": "stdout",
42 | "output_type": "stream",
43 | "text": [
44 | "us-east-1\n"
45 | ]
46 | }
47 | ],
48 | "source": [
49 | "bucket_name = \"dsmlsagemaker-prasang\"\n",
50 | "# Check the region of the instance\n",
51 | "my_region = boto3.session.Session().region_name\n",
52 | "print(my_region)"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "id": "e30ee6c1-9f90-4357-91b4-80e2be7a464d",
59 | "metadata": {
60 | "tags": [],
61 | "id": "e30ee6c1-9f90-4357-91b4-80e2be7a464d",
62 | "outputId": "b3790a2b-89e9-483c-962a-a69000a20e3d"
63 | },
64 | "outputs": [
65 | {
66 | "name": "stdout",
67 | "output_type": "stream",
68 | "text": [
69 | "S3 Bucket Created Successfully!\n"
70 | ]
71 | }
72 | ],
73 | "source": [
74 | "# Create a bucket\n",
75 | "s3 = boto3.resource(\"s3\", \n",
76 | " region_name=my_region,\n",
77 | " aws_access_key_id=\"\",\n",
78 | " aws_secret_access_key=\"\")\n",
79 | "\n",
80 | "try:\n",
81 | " if my_region == \"us-east-1\":\n",
82 | " s3.create_bucket(Bucket=bucket_name)\n",
83 | " print(\"S3 Bucket Created Successfully!\")\n",
84 | "except Exception as e:\n",
85 | " print(str(e))\n",
86 | " "
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": null,
92 | "id": "3eb7f357-4ae3-4d46-a13c-dad2d1ce5467",
93 | "metadata": {
94 | "tags": [],
95 | "id": "3eb7f357-4ae3-4d46-a13c-dad2d1ce5467",
96 | "outputId": "fdb144d3-89bb-48c8-a484-6abaf1fc9257"
97 | },
98 | "outputs": [
99 | {
100 | "name": "stdout",
101 | "output_type": "stream",
102 | "text": [
103 | "s3://dsmlsagemaker-prasang/xgboost-as-a-built-in-algo/output\n"
104 | ]
105 | }
106 | ],
107 | "source": [
108 | "## set an output path for model artifacts\n",
109 | "path = \"xgboost-as-a-built-in-algo\"\n",
110 | "output_path = f\"s3://{bucket_name}/{path}/output\"\n",
111 | "print(output_path)"
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "id": "3771b3e3-d416-4555-b4e1-6c776b344508",
117 | "metadata": {
118 | "id": "3771b3e3-d416-4555-b4e1-6c776b344508"
119 | },
120 | "source": [
121 | "## Download the data and store in S3"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": null,
127 | "id": "275ef361-0969-4898-b28f-6ff1d0266def",
128 | "metadata": {
129 | "tags": [],
130 | "id": "275ef361-0969-4898-b28f-6ff1d0266def",
131 | "outputId": "eef657c0-a005-4606-b4d9-85f83a9b7ef6"
132 | },
133 | "outputs": [
134 | {
135 | "name": "stdout",
136 | "output_type": "stream",
137 | "text": [
138 | "Success: downloaded bank_clean.csv.\n",
139 | "Success: Data loaded into dataframe.\n"
140 | ]
141 | }
142 | ],
143 | "source": [
144 | "import urllib\n",
145 | "try:\n",
146 | " #the bank data is in one hot encoded format already\n",
147 | " urllib.request.urlretrieve (\"https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv\", \"bank_clean.csv\")\n",
148 | " print('Success: downloaded bank_clean.csv.')\n",
149 | "except Exception as e:\n",
150 | " print('Data load error: ',e) \n",
151 | "try:\n",
152 | " model_data = pd.read_csv('./bank_clean.csv',index_col=0)\n",
153 | " print('Success: Data loaded into dataframe.')\n",
154 | "except Exception as e:\n",
155 | " print('Data load error: ',e)"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "id": "8039da29-803d-44b4-ad41-23e957b05fd3",
162 | "metadata": {
163 | "tags": [],
164 | "id": "8039da29-803d-44b4-ad41-23e957b05fd3",
165 | "outputId": "666db70c-d53b-4a32-d1f0-a6e685b8ff73"
166 | },
167 | "outputs": [
168 | {
169 | "data": {
170 | "text/html": [
171 | "
\n",
172 | "\n",
185 | "
\n",
186 | " \n",
187 | " \n",
188 | " | \n",
189 | " age | \n",
190 | " campaign | \n",
191 | " pdays | \n",
192 | " previous | \n",
193 | " no_previous_contact | \n",
194 | " not_working | \n",
195 | " job_admin. | \n",
196 | " job_blue-collar | \n",
197 | " job_entrepreneur | \n",
198 | " job_housemaid | \n",
199 | " ... | \n",
200 | " day_of_week_fri | \n",
201 | " day_of_week_mon | \n",
202 | " day_of_week_thu | \n",
203 | " day_of_week_tue | \n",
204 | " day_of_week_wed | \n",
205 | " poutcome_failure | \n",
206 | " poutcome_nonexistent | \n",
207 | " poutcome_success | \n",
208 | " y_no | \n",
209 | " y_yes | \n",
210 | "
\n",
211 | " \n",
212 | " \n",
213 | " \n",
214 | " 0 | \n",
215 | " 56 | \n",
216 | " 1 | \n",
217 | " 999 | \n",
218 | " 0 | \n",
219 | " 1 | \n",
220 | " 0 | \n",
221 | " 0 | \n",
222 | " 0 | \n",
223 | " 0 | \n",
224 | " 1 | \n",
225 | " ... | \n",
226 | " 0 | \n",
227 | " 1 | \n",
228 | " 0 | \n",
229 | " 0 | \n",
230 | " 0 | \n",
231 | " 0 | \n",
232 | " 1 | \n",
233 | " 0 | \n",
234 | " 1 | \n",
235 | " 0 | \n",
236 | "
\n",
237 | " \n",
238 | " 1 | \n",
239 | " 57 | \n",
240 | " 1 | \n",
241 | " 999 | \n",
242 | " 0 | \n",
243 | " 1 | \n",
244 | " 0 | \n",
245 | " 0 | \n",
246 | " 0 | \n",
247 | " 0 | \n",
248 | " 0 | \n",
249 | " ... | \n",
250 | " 0 | \n",
251 | " 1 | \n",
252 | " 0 | \n",
253 | " 0 | \n",
254 | " 0 | \n",
255 | " 0 | \n",
256 | " 1 | \n",
257 | " 0 | \n",
258 | " 1 | \n",
259 | " 0 | \n",
260 | "
\n",
261 | " \n",
262 | " 2 | \n",
263 | " 37 | \n",
264 | " 1 | \n",
265 | " 999 | \n",
266 | " 0 | \n",
267 | " 1 | \n",
268 | " 0 | \n",
269 | " 0 | \n",
270 | " 0 | \n",
271 | " 0 | \n",
272 | " 0 | \n",
273 | " ... | \n",
274 | " 0 | \n",
275 | " 1 | \n",
276 | " 0 | \n",
277 | " 0 | \n",
278 | " 0 | \n",
279 | " 0 | \n",
280 | " 1 | \n",
281 | " 0 | \n",
282 | " 1 | \n",
283 | " 0 | \n",
284 | "
\n",
285 | " \n",
286 | " 3 | \n",
287 | " 40 | \n",
288 | " 1 | \n",
289 | " 999 | \n",
290 | " 0 | \n",
291 | " 1 | \n",
292 | " 0 | \n",
293 | " 1 | \n",
294 | " 0 | \n",
295 | " 0 | \n",
296 | " 0 | \n",
297 | " ... | \n",
298 | " 0 | \n",
299 | " 1 | \n",
300 | " 0 | \n",
301 | " 0 | \n",
302 | " 0 | \n",
303 | " 0 | \n",
304 | " 1 | \n",
305 | " 0 | \n",
306 | " 1 | \n",
307 | " 0 | \n",
308 | "
\n",
309 | " \n",
310 | " 4 | \n",
311 | " 56 | \n",
312 | " 1 | \n",
313 | " 999 | \n",
314 | " 0 | \n",
315 | " 1 | \n",
316 | " 0 | \n",
317 | " 0 | \n",
318 | " 0 | \n",
319 | " 0 | \n",
320 | " 0 | \n",
321 | " ... | \n",
322 | " 0 | \n",
323 | " 1 | \n",
324 | " 0 | \n",
325 | " 0 | \n",
326 | " 0 | \n",
327 | " 0 | \n",
328 | " 1 | \n",
329 | " 0 | \n",
330 | " 1 | \n",
331 | " 0 | \n",
332 | "
\n",
333 | " \n",
334 | "
\n",
335 | "
5 rows × 61 columns
\n",
336 | "
"
337 | ],
338 | "text/plain": [
339 | " age campaign pdays previous no_previous_contact not_working \\\n",
340 | "0 56 1 999 0 1 0 \n",
341 | "1 57 1 999 0 1 0 \n",
342 | "2 37 1 999 0 1 0 \n",
343 | "3 40 1 999 0 1 0 \n",
344 | "4 56 1 999 0 1 0 \n",
345 | "\n",
346 | " job_admin. job_blue-collar job_entrepreneur job_housemaid ... \\\n",
347 | "0 0 0 0 1 ... \n",
348 | "1 0 0 0 0 ... \n",
349 | "2 0 0 0 0 ... \n",
350 | "3 1 0 0 0 ... \n",
351 | "4 0 0 0 0 ... \n",
352 | "\n",
353 | " day_of_week_fri day_of_week_mon day_of_week_thu day_of_week_tue \\\n",
354 | "0 0 1 0 0 \n",
355 | "1 0 1 0 0 \n",
356 | "2 0 1 0 0 \n",
357 | "3 0 1 0 0 \n",
358 | "4 0 1 0 0 \n",
359 | "\n",
360 | " day_of_week_wed poutcome_failure poutcome_nonexistent poutcome_success \\\n",
361 | "0 0 0 1 0 \n",
362 | "1 0 0 1 0 \n",
363 | "2 0 0 1 0 \n",
364 | "3 0 0 1 0 \n",
365 | "4 0 0 1 0 \n",
366 | "\n",
367 | " y_no y_yes \n",
368 | "0 1 0 \n",
369 | "1 1 0 \n",
370 | "2 1 0 \n",
371 | "3 1 0 \n",
372 | "4 1 0 \n",
373 | "\n",
374 | "[5 rows x 61 columns]"
375 | ]
376 | },
377 | "execution_count": 41,
378 | "metadata": {},
379 | "output_type": "execute_result"
380 | }
381 | ],
382 | "source": [
383 | "model_data.head()"
384 | ]
385 | },
386 | {
387 | "cell_type": "code",
388 | "execution_count": null,
389 | "id": "332a9f7c-556d-4ced-ac81-5c1b51162417",
390 | "metadata": {
391 | "tags": [],
392 | "id": "332a9f7c-556d-4ced-ac81-5c1b51162417",
393 | "outputId": "a28f7c82-70e3-4525-b131-c5e8922f9cf6"
394 | },
395 | "outputs": [
396 | {
397 | "name": "stdout",
398 | "output_type": "stream",
399 | "text": [
400 | "\n",
401 | "Int64Index: 41188 entries, 0 to 41187\n",
402 | "Data columns (total 61 columns):\n",
403 | " # Column Non-Null Count Dtype\n",
404 | "--- ------ -------------- -----\n",
405 | " 0 age 41188 non-null int64\n",
406 | " 1 campaign 41188 non-null int64\n",
407 | " 2 pdays 41188 non-null int64\n",
408 | " 3 previous 41188 non-null int64\n",
409 | " 4 no_previous_contact 41188 non-null int64\n",
410 | " 5 not_working 41188 non-null int64\n",
411 | " 6 job_admin. 41188 non-null int64\n",
412 | " 7 job_blue-collar 41188 non-null int64\n",
413 | " 8 job_entrepreneur 41188 non-null int64\n",
414 | " 9 job_housemaid 41188 non-null int64\n",
415 | " 10 job_management 41188 non-null int64\n",
416 | " 11 job_retired 41188 non-null int64\n",
417 | " 12 job_self-employed 41188 non-null int64\n",
418 | " 13 job_services 41188 non-null int64\n",
419 | " 14 job_student 41188 non-null int64\n",
420 | " 15 job_technician 41188 non-null int64\n",
421 | " 16 job_unemployed 41188 non-null int64\n",
422 | " 17 job_unknown 41188 non-null int64\n",
423 | " 18 marital_divorced 41188 non-null int64\n",
424 | " 19 marital_married 41188 non-null int64\n",
425 | " 20 marital_single 41188 non-null int64\n",
426 | " 21 marital_unknown 41188 non-null int64\n",
427 | " 22 education_basic.4y 41188 non-null int64\n",
428 | " 23 education_basic.6y 41188 non-null int64\n",
429 | " 24 education_basic.9y 41188 non-null int64\n",
430 | " 25 education_high.school 41188 non-null int64\n",
431 | " 26 education_illiterate 41188 non-null int64\n",
432 | " 27 education_professional.course 41188 non-null int64\n",
433 | " 28 education_university.degree 41188 non-null int64\n",
434 | " 29 education_unknown 41188 non-null int64\n",
435 | " 30 default_no 41188 non-null int64\n",
436 | " 31 default_unknown 41188 non-null int64\n",
437 | " 32 default_yes 41188 non-null int64\n",
438 | " 33 housing_no 41188 non-null int64\n",
439 | " 34 housing_unknown 41188 non-null int64\n",
440 | " 35 housing_yes 41188 non-null int64\n",
441 | " 36 loan_no 41188 non-null int64\n",
442 | " 37 loan_unknown 41188 non-null int64\n",
443 | " 38 loan_yes 41188 non-null int64\n",
444 | " 39 contact_cellular 41188 non-null int64\n",
445 | " 40 contact_telephone 41188 non-null int64\n",
446 | " 41 month_apr 41188 non-null int64\n",
447 | " 42 month_aug 41188 non-null int64\n",
448 | " 43 month_dec 41188 non-null int64\n",
449 | " 44 month_jul 41188 non-null int64\n",
450 | " 45 month_jun 41188 non-null int64\n",
451 | " 46 month_mar 41188 non-null int64\n",
452 | " 47 month_may 41188 non-null int64\n",
453 | " 48 month_nov 41188 non-null int64\n",
454 | " 49 month_oct 41188 non-null int64\n",
455 | " 50 month_sep 41188 non-null int64\n",
456 | " 51 day_of_week_fri 41188 non-null int64\n",
457 | " 52 day_of_week_mon 41188 non-null int64\n",
458 | " 53 day_of_week_thu 41188 non-null int64\n",
459 | " 54 day_of_week_tue 41188 non-null int64\n",
460 | " 55 day_of_week_wed 41188 non-null int64\n",
461 | " 56 poutcome_failure 41188 non-null int64\n",
462 | " 57 poutcome_nonexistent 41188 non-null int64\n",
463 | " 58 poutcome_success 41188 non-null int64\n",
464 | " 59 y_no 41188 non-null int64\n",
465 | " 60 y_yes 41188 non-null int64\n",
466 | "dtypes: int64(61)\n",
467 | "memory usage: 19.5 MB\n"
468 | ]
469 | }
470 | ],
471 | "source": [
472 | "model_data.info()"
473 | ]
474 | },
475 | {
476 | "cell_type": "markdown",
477 | "id": "b1420e12-0ab4-4e7b-8b8f-ca10411f3514",
478 | "metadata": {
479 | "id": "b1420e12-0ab4-4e7b-8b8f-ca10411f3514"
480 | },
481 | "source": [
482 | "## Train and Test Split"
483 | ]
484 | },
485 | {
486 | "cell_type": "code",
487 | "execution_count": null,
488 | "id": "d1cc3aba-b34f-442b-8822-462b31002fac",
489 | "metadata": {
490 | "tags": [],
491 | "id": "d1cc3aba-b34f-442b-8822-462b31002fac"
492 | },
493 | "outputs": [],
494 | "source": [
495 | "from sklearn.model_selection import train_test_split\n",
496 | "\n",
497 | "train_data, test_data = train_test_split(model_data, test_size=0.3)"
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": null,
503 | "id": "b680644d-3697-4cee-9710-38397f5b89de",
504 | "metadata": {
505 | "tags": [],
506 | "id": "b680644d-3697-4cee-9710-38397f5b89de"
507 | },
508 | "outputs": [],
509 | "source": [
510 | "## saving training and testing data in s3 buckets\n",
511 | "import os\n",
512 | "pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'],\n",
513 | "axis = 1)],\n",
514 | "axis = 1).to_csv(\"train.csv\", index=False, header=False)"
515 | ]
516 | },
517 | {
518 | "cell_type": "code",
519 | "execution_count": null,
520 | "id": "509d33df-9a2c-4847-8a23-bb7889e72c33",
521 | "metadata": {
522 | "tags": [],
523 | "id": "509d33df-9a2c-4847-8a23-bb7889e72c33",
524 | "outputId": "3bdbd3a4-1d54-4965-e5bf-56f1a1060bcb"
525 | },
526 | "outputs": [
527 | {
528 | "name": "stderr",
529 | "output_type": "stream",
530 | "text": [
531 | "INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole\n"
532 | ]
533 | }
534 | ],
535 | "source": [
536 | "boto3.Session().resource(\"s3\").Bucket(bucket_name).Object(os.path.join(path, \"train/train.csv\")).upload_file(\"train.csv\")"
537 | ]
538 | },
539 | {
540 | "cell_type": "code",
541 | "execution_count": null,
542 | "id": "163bcf90-7aa4-41d7-b790-3f94a505465c",
543 | "metadata": {
544 | "tags": [],
545 | "id": "163bcf90-7aa4-41d7-b790-3f94a505465c"
546 | },
547 | "outputs": [],
548 | "source": [
549 | "## now storing the training csv into a variable\n",
550 | "s3_input_train = sagemaker.TrainingInput(s3_data=f\"s3://{bucket_name}/{path}/train/\", content_type=\"csv\")"
551 | ]
552 | },
553 | {
554 | "cell_type": "code",
555 | "execution_count": null,
556 | "id": "14b988c2-e807-4b82-ab86-064ac66d4b51",
557 | "metadata": {
558 | "tags": [],
559 | "id": "14b988c2-e807-4b82-ab86-064ac66d4b51"
560 | },
561 | "outputs": [],
562 | "source": [
563 | "pd.concat([test_data['y_yes'], test_data.drop(['y_no', 'y_yes'],\n",
564 | "axis = 1)],\n",
565 | "axis = 1).to_csv(\"test.csv\", index=False, header=False)"
566 | ]
567 | },
568 | {
569 | "cell_type": "code",
570 | "execution_count": null,
571 | "id": "5508f1fa-07c2-41db-8728-1964d3c1feee",
572 | "metadata": {
573 | "tags": [],
574 | "id": "5508f1fa-07c2-41db-8728-1964d3c1feee",
575 | "outputId": "13c99d77-63fa-44f1-d67c-581192240834"
576 | },
577 | "outputs": [
578 | {
579 | "name": "stderr",
580 | "output_type": "stream",
581 | "text": [
582 | "INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole\n"
583 | ]
584 | }
585 | ],
586 | "source": [
587 | "boto3.Session().resource(\"s3\").Bucket(bucket_name).Object(os.path.join(path, \"test/test.csv\")).upload_file(\"test.csv\")\n",
588 | "## now storing the training csv into a variable\n",
589 | "s3_input_test = sagemaker.TrainingInput(s3_data=f\"s3://{bucket_name}/{path}/test/\", content_type=\"csv\")"
590 | ]
591 | },
592 | {
593 | "cell_type": "markdown",
594 | "id": "37413d3a-eab0-43a4-a650-c1d6411cde6e",
595 | "metadata": {
596 | "id": "37413d3a-eab0-43a4-a650-c1d6411cde6e"
597 | },
598 | "source": [
599 | "## Building XGBoost-inbuild algo"
600 | ]
601 | },
602 | {
603 | "cell_type": "code",
604 | "execution_count": null,
605 | "id": "945764ca-2025-4d23-9e73-457b0290308c",
606 | "metadata": {
607 | "tags": [],
608 | "id": "945764ca-2025-4d23-9e73-457b0290308c"
609 | },
610 | "outputs": [],
611 | "source": [
612 | "from sagemaker.amazon.amazon_estimator import image_uris\n",
613 | "from sagemaker.session import s3_input, Session"
614 | ]
615 | },
616 | {
617 | "cell_type": "code",
618 | "execution_count": null,
619 | "id": "00ffeb83-0dd8-4cd0-9036-38ca23ea2d40",
620 | "metadata": {
621 | "tags": [],
622 | "id": "00ffeb83-0dd8-4cd0-9036-38ca23ea2d40",
623 | "outputId": "2fec7532-e1a6-4f97-d753-47d24ac94d95"
624 | },
625 | "outputs": [
626 | {
627 | "name": "stderr",
628 | "output_type": "stream",
629 | "text": [
630 | "INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.\n"
631 | ]
632 | }
633 | ],
634 | "source": [
635 | "container = image_uris.retrieve(\"xgboost\", boto3.Session().region_name, \"1.5-1\")"
636 | ]
637 | },
638 | {
639 | "cell_type": "code",
640 | "execution_count": null,
641 | "id": "8cc12158-aa48-4306-a600-abb7a540f3dd",
642 | "metadata": {
643 | "tags": [],
644 | "id": "8cc12158-aa48-4306-a600-abb7a540f3dd"
645 | },
646 | "outputs": [],
647 | "source": [
648 | "##initialise all the parameters \n",
649 | "hyperparameters = {\n",
650 | "\"max_depth\": \"5\",\n",
651 | "\"eta\": \"0.2\",\n",
652 | "\"gamma\":\"4\",\n",
653 | "\"min_child_weight\":\"6\",\n",
654 | "\"subsample\":\"0.7\",\n",
655 | "\"objective\":\"binary:logistic\",\n",
656 | "\"num_round\":50\n",
657 | "}"
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "execution_count": null,
663 | "id": "05b18a5b-9148-46bb-a502-7b2455c2ee3d",
664 | "metadata": {
665 | "tags": [],
666 | "id": "05b18a5b-9148-46bb-a502-7b2455c2ee3d"
667 | },
668 | "outputs": [],
669 | "source": [
670 | "# create sagemaker estimator that calls the xgboost container\n",
671 | "estimator = sagemaker.estimator.Estimator(image_uri=container, \n",
672 | " hyperparameters=hyperparameters,\n",
673 | " role=sagemaker.get_execution_role(),\n",
674 | " instance_count=1,\n",
675 | " instance_type=\"ml.m5.2xlarge\",\n",
676 | " volume_size=5,\n",
677 | " output_path=output_path)"
678 | ]
679 | },
680 | {
681 | "cell_type": "code",
682 | "execution_count": null,
683 | "id": "6d3e01e8-1039-43e0-a26c-2a48e9e2b155",
684 | "metadata": {
685 | "tags": [],
686 | "id": "6d3e01e8-1039-43e0-a26c-2a48e9e2b155",
687 | "outputId": "66e9f423-4475-44ba-9bb3-dec828445bdd"
688 | },
689 | "outputs": [
690 | {
691 | "name": "stderr",
692 | "output_type": "stream",
693 | "text": [
694 | "INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-06-08-16-50-36-867\n"
695 | ]
696 | },
697 | {
698 | "name": "stdout",
699 | "output_type": "stream",
700 | "text": [
701 | "2023-06-08 16:50:40 Starting - Starting the training job...\n",
702 | "2023-06-08 16:50:55 Starting - Preparing the instances for training......\n",
703 | "2023-06-08 16:52:14 Downloading - Downloading input data\n",
704 | "2023-06-08 16:52:14 Training - Downloading the training image...\n",
705 | "2023-06-08 16:52:35 Training - Training image download completed. Training in progress...\u001b[34m[2023-06-08 16:52:54.214 ip-10-2-95-175.ec2.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None\u001b[0m\n",
706 | "\u001b[34m[2023-06-08 16:52:54.283 ip-10-2-95-175.ec2.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.\u001b[0m\n",
707 | "\u001b[34m[2023-06-08:16:52:54:INFO] Imported framework sagemaker_xgboost_container.training\u001b[0m\n",
708 | "\u001b[34m[2023-06-08:16:52:54:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.\u001b[0m\n",
709 | "\u001b[34mReturning the value itself\u001b[0m\n",
710 | "\u001b[34m[2023-06-08:16:52:54:INFO] No GPUs detected (normal if no gpus installed)\u001b[0m\n",
711 | "\u001b[34m[2023-06-08:16:52:54:INFO] Running XGBoost Sagemaker in algorithm mode\u001b[0m\n",
712 | "\u001b[34m[2023-06-08:16:52:54:INFO] Determined 0 GPU(s) available on the instance.\u001b[0m\n",
713 | "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
714 | "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
715 | "\u001b[34m[2023-06-08:16:52:54:INFO] files path: /opt/ml/input/data/train\u001b[0m\n",
716 | "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
717 | "\u001b[34m[2023-06-08:16:52:54:INFO] files path: /opt/ml/input/data/validation\u001b[0m\n",
718 | "\u001b[34m[2023-06-08:16:52:54:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
719 | "\u001b[34m[2023-06-08:16:52:54:INFO] Single node training.\u001b[0m\n",
720 | "\u001b[34m[2023-06-08:16:52:54:INFO] Train matrix has 28831 rows and 59 columns\u001b[0m\n",
721 | "\u001b[34m[2023-06-08:16:52:54:INFO] Validation matrix has 12357 rows\u001b[0m\n",
722 | "\u001b[34m[2023-06-08 16:52:54.720 ip-10-2-95-175.ec2.internal:7 INFO json_config.py:92] Creating hook from json_config at /opt/ml/input/config/debughookconfig.json.\u001b[0m\n",
723 | "\u001b[34m[2023-06-08 16:52:54.721 ip-10-2-95-175.ec2.internal:7 INFO hook.py:206] tensorboard_dir has not been set for the hook. SMDebug will not be exporting tensorboard summaries.\u001b[0m\n",
724 | "\u001b[34m[2023-06-08 16:52:54.722 ip-10-2-95-175.ec2.internal:7 INFO hook.py:259] Saving to /opt/ml/output/tensors\u001b[0m\n",
725 | "\u001b[34m[2023-06-08 16:52:54.722 ip-10-2-95-175.ec2.internal:7 INFO state_store.py:77] The checkpoint config file /opt/ml/input/config/checkpointconfig.json does not exist.\u001b[0m\n",
726 | "\u001b[34m[2023-06-08:16:52:54:INFO] Debug hook created from config\u001b[0m\n",
727 | "\u001b[34m[16:52:54] WARNING: ../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\u001b[0m\n",
728 | "\u001b[34m[2023-06-08 16:52:54.799 ip-10-2-95-175.ec2.internal:7 INFO hook.py:427] Monitoring the collections: metrics\u001b[0m\n",
729 | "\u001b[34m[2023-06-08 16:52:54.802 ip-10-2-95-175.ec2.internal:7 INFO hook.py:491] Hook is writing from the hook with pid: 7\u001b[0m\n",
730 | "\u001b[34m[0]#011train-logloss:0.57428#011validation-logloss:0.57330\u001b[0m\n",
731 | "\u001b[34m[1]#011train-logloss:0.49542#011validation-logloss:0.49375\u001b[0m\n",
732 | "\u001b[34m[2]#011train-logloss:0.43939#011validation-logloss:0.43713\u001b[0m\n",
733 | "\u001b[34m[3]#011train-logloss:0.40015#011validation-logloss:0.39731\u001b[0m\n",
734 | "\u001b[34m[4]#011train-logloss:0.37156#011validation-logloss:0.36851\u001b[0m\n",
735 | "\u001b[34m[5]#011train-logloss:0.34992#011validation-logloss:0.34684\u001b[0m\n",
736 | "\u001b[34m[6]#011train-logloss:0.33411#011validation-logloss:0.33079\u001b[0m\n",
737 | "\u001b[34m[7]#011train-logloss:0.32230#011validation-logloss:0.31873\u001b[0m\n",
738 | "\u001b[34m[8]#011train-logloss:0.31328#011validation-logloss:0.30944\u001b[0m\n",
739 | "\u001b[34m[9]#011train-logloss:0.30687#011validation-logloss:0.30285\u001b[0m\n",
740 | "\u001b[34m[10]#011train-logloss:0.30208#011validation-logloss:0.29787\u001b[0m\n",
741 | "\u001b[34m[11]#011train-logloss:0.29824#011validation-logloss:0.29425\u001b[0m\n",
742 | "\u001b[34m[12]#011train-logloss:0.29549#011validation-logloss:0.29142\u001b[0m\n",
743 | "\u001b[34m[13]#011train-logloss:0.29334#011validation-logloss:0.28951\u001b[0m\n",
744 | "\u001b[34m[14]#011train-logloss:0.29156#011validation-logloss:0.28759\u001b[0m\n",
745 | "\u001b[34m[15]#011train-logloss:0.29002#011validation-logloss:0.28623\u001b[0m\n",
746 | "\u001b[34m[16]#011train-logloss:0.28896#011validation-logloss:0.28525\u001b[0m\n",
747 | "\u001b[34m[17]#011train-logloss:0.28785#011validation-logloss:0.28464\u001b[0m\n",
748 | "\u001b[34m[18]#011train-logloss:0.28711#011validation-logloss:0.28403\u001b[0m\n",
749 | "\u001b[34m[19]#011train-logloss:0.28606#011validation-logloss:0.28331\u001b[0m\n",
750 | "\u001b[34m[20]#011train-logloss:0.28546#011validation-logloss:0.28283\u001b[0m\n",
751 | "\u001b[34m[21]#011train-logloss:0.28482#011validation-logloss:0.28255\u001b[0m\n",
752 | "\u001b[34m[22]#011train-logloss:0.28445#011validation-logloss:0.28218\u001b[0m\n",
753 | "\u001b[34m[23]#011train-logloss:0.28360#011validation-logloss:0.28154\u001b[0m\n",
754 | "\u001b[34m[24]#011train-logloss:0.28319#011validation-logloss:0.28148\u001b[0m\n",
755 | "\u001b[34m[25]#011train-logloss:0.28266#011validation-logloss:0.28155\u001b[0m\n",
756 | "\u001b[34m[26]#011train-logloss:0.28223#011validation-logloss:0.28107\u001b[0m\n",
757 | "\u001b[34m[27]#011train-logloss:0.28208#011validation-logloss:0.28106\u001b[0m\n",
758 | "\u001b[34m[28]#011train-logloss:0.28179#011validation-logloss:0.28102\u001b[0m\n",
759 | "\u001b[34m[29]#011train-logloss:0.28158#011validation-logloss:0.28087\u001b[0m\n",
760 | "\u001b[34m[30]#011train-logloss:0.28133#011validation-logloss:0.28070\u001b[0m\n",
761 | "\u001b[34m[31]#011train-logloss:0.28119#011validation-logloss:0.28057\u001b[0m\n",
762 | "\u001b[34m[32]#011train-logloss:0.28093#011validation-logloss:0.28066\u001b[0m\n",
763 | "\u001b[34m[33]#011train-logloss:0.28046#011validation-logloss:0.28070\u001b[0m\n",
764 | "\u001b[34m[34]#011train-logloss:0.28036#011validation-logloss:0.28076\u001b[0m\n",
765 | "\u001b[34m[35]#011train-logloss:0.28011#011validation-logloss:0.28081\u001b[0m\n",
766 | "\u001b[34m[36]#011train-logloss:0.27984#011validation-logloss:0.28091\u001b[0m\n",
767 | "\u001b[34m[37]#011train-logloss:0.27935#011validation-logloss:0.28062\u001b[0m\n",
768 | "\u001b[34m[38]#011train-logloss:0.27918#011validation-logloss:0.28051\u001b[0m\n",
769 | "\u001b[34m[39]#011train-logloss:0.27897#011validation-logloss:0.28065\u001b[0m\n",
770 | "\u001b[34m[40]#011train-logloss:0.27889#011validation-logloss:0.28070\u001b[0m\n",
771 | "\u001b[34m[41]#011train-logloss:0.27871#011validation-logloss:0.28066\u001b[0m\n",
772 | "\u001b[34m[42]#011train-logloss:0.27860#011validation-logloss:0.28077\u001b[0m\n",
773 | "\u001b[34m[43]#011train-logloss:0.27860#011validation-logloss:0.28077\u001b[0m\n",
774 | "\u001b[34m[44]#011train-logloss:0.27829#011validation-logloss:0.28093\u001b[0m\n",
775 | "\u001b[34m[45]#011train-logloss:0.27808#011validation-logloss:0.28091\u001b[0m\n",
776 | "\u001b[34m[46]#011train-logloss:0.27801#011validation-logloss:0.28085\u001b[0m\n",
777 | "\u001b[34m[47]#011train-logloss:0.27786#011validation-logloss:0.28088\u001b[0m\n",
778 | "\u001b[34m[48]#011train-logloss:0.27779#011validation-logloss:0.28085\u001b[0m\n",
779 | "\u001b[34m[49]#011train-logloss:0.27756#011validation-logloss:0.28106\u001b[0m\n",
780 | "\n",
781 | "2023-06-08 16:53:16 Uploading - Uploading generated training model\n",
782 | "2023-06-08 16:53:16 Completed - Training job completed\n",
783 | "Training seconds: 82\n",
784 | "Billable seconds: 82\n"
785 | ]
786 | }
787 | ],
788 | "source": [
789 | "estimator.fit({\"train\":s3_input_train, \"validation\":s3_input_test})"
790 | ]
791 | },
792 | {
793 | "cell_type": "markdown",
794 | "id": "550b55e8-6f11-4643-b0e1-a0465e0f8d68",
795 | "metadata": {
796 | "id": "550b55e8-6f11-4643-b0e1-a0465e0f8d68"
797 | },
798 | "source": [
799 | "## Deploying the ML Model as an endpoint"
800 | ]
801 | },
802 | {
803 | "cell_type": "code",
804 | "execution_count": null,
805 | "id": "d5801e3e-a924-4422-84c9-de93827d25f8",
806 | "metadata": {
807 | "tags": [],
808 | "id": "d5801e3e-a924-4422-84c9-de93827d25f8",
809 | "outputId": "49613511-3aba-4db0-b92c-bbb010540176"
810 | },
811 | "outputs": [
812 | {
813 | "name": "stderr",
814 | "output_type": "stream",
815 | "text": [
816 | "INFO:sagemaker:Creating model with name: sagemaker-xgboost-2023-06-08-16-53-29-875\n",
817 | "INFO:sagemaker:Creating endpoint-config with name sagemaker-xgboost-2023-06-08-16-53-29-875\n",
818 | "INFO:sagemaker:Creating endpoint with name sagemaker-xgboost-2023-06-08-16-53-29-875\n"
819 | ]
820 | },
821 | {
822 | "name": "stdout",
823 | "output_type": "stream",
824 | "text": [
825 | "-------!"
826 | ]
827 | }
828 | ],
829 | "source": [
830 | "from sagemaker.serializers import CSVSerializer \n",
831 | "xgb_predictor = estimator.deploy(initial_instance_count = 1, instance_type=\"ml.m4.xlarge\", serializer=CSVSerializer())"
832 | ]
833 | },
834 | {
835 | "cell_type": "markdown",
836 | "id": "fdbfd854-3489-49fd-a0a7-5016ae0079ae",
837 | "metadata": {
838 | "id": "fdbfd854-3489-49fd-a0a7-5016ae0079ae"
839 | },
840 | "source": [
841 | "## Prediction of test data"
842 | ]
843 | },
844 | {
845 | "cell_type": "code",
846 | "execution_count": null,
847 | "id": "82d7c25f-d9c5-4355-80df-6b83c19bb2f1",
848 | "metadata": {
849 | "tags": [],
850 | "id": "82d7c25f-d9c5-4355-80df-6b83c19bb2f1",
851 | "outputId": "8345be11-9d16-4c21-efaf-7e9c52f8a824"
852 | },
853 | "outputs": [
854 | {
855 | "data": {
856 | "text/plain": [
857 | "array([[ 34, 4, 999, ..., 0, 1, 0],\n",
858 | " [ 31, 6, 999, ..., 0, 1, 0],\n",
859 | " [ 32, 1, 999, ..., 0, 1, 0],\n",
860 | " ...,\n",
861 | " [ 36, 3, 999, ..., 0, 1, 0],\n",
862 | " [ 47, 3, 999, ..., 0, 1, 0],\n",
863 | " [ 39, 8, 999, ..., 0, 1, 0]])"
864 | ]
865 | },
866 | "execution_count": 55,
867 | "metadata": {},
868 | "output_type": "execute_result"
869 | }
870 | ],
871 | "source": [
872 | "test_data_array = test_data.drop(['y_yes', 'y_no'], axis=1).values\n",
873 | "test_data_array"
874 | ]
875 | },
876 | {
877 | "cell_type": "code",
878 | "execution_count": null,
879 | "id": "31843c08-a7b8-438c-8370-46c48626b11f",
880 | "metadata": {
881 | "tags": [],
882 | "id": "31843c08-a7b8-438c-8370-46c48626b11f"
883 | },
884 | "outputs": [],
885 | "source": [
886 | "predictions = xgb_predictor.predict(test_data_array).decode(\"utf-8\")"
887 | ]
888 | },
889 | {
890 | "cell_type": "code",
891 | "execution_count": null,
892 | "id": "46bcd959-2951-4329-b211-279a84b174f1",
893 | "metadata": {
894 | "tags": [],
895 | "id": "46bcd959-2951-4329-b211-279a84b174f1",
896 | "outputId": "fdf0fc9c-48c2-4c8b-d02e-5c159c140643"
897 | },
898 | "outputs": [
899 | {
900 | "data": {
901 | "text/plain": [
902 | "array([0.08596137, 0.08066913, 0.2411833 , ..., 0.10747377, 0.07555814,\n",
903 | " 0.03684008])"
904 | ]
905 | },
906 | "execution_count": 57,
907 | "metadata": {},
908 | "output_type": "execute_result"
909 | }
910 | ],
911 | "source": [
912 | "predictions_array = np.fromstring(predictions[:], sep=\"\\n\")\n",
913 | "predictions_array"
914 | ]
915 | },
916 | {
917 | "cell_type": "code",
918 | "execution_count": null,
919 | "id": "8f049870-18b8-472b-9ecd-e65be3e08922",
920 | "metadata": {
921 | "id": "8f049870-18b8-472b-9ecd-e65be3e08922",
922 | "outputId": "d8709a5b-c1d6-4dd7-b297-0555f58be0be"
923 | },
924 | "outputs": [
925 | {
926 | "name": "stdout",
927 | "output_type": "stream",
928 | "text": [
929 | "[[10828 163]\n",
930 | " [ 1076 290]]\n",
931 | " precision recall f1-score support\n",
932 | "\n",
933 | " 0 0.91 0.99 0.95 10991\n",
934 | " 1 0.64 0.21 0.32 1366\n",
935 | "\n",
936 | " accuracy 0.90 12357\n",
937 | " macro avg 0.77 0.60 0.63 12357\n",
938 | "weighted avg 0.88 0.90 0.88 12357\n",
939 | "\n"
940 | ]
941 | }
942 | ],
943 | "source": [
944 | "import sklearn.metrics\n",
945 | "cutoff=0.5\n",
946 | "print(sklearn.metrics.confusion_matrix(test_data['y_yes'],np.round(predictions_array)))\n",
947 | "print(sklearn.metrics.classification_report(test_data['y_yes'],np.round(predictions_array)))"
948 | ]
949 | },
950 | {
951 | "cell_type": "code",
952 | "execution_count": null,
953 | "id": "f2efc2f5-b86f-4ff2-a429-3524f045f79f",
954 | "metadata": {
955 | "id": "f2efc2f5-b86f-4ff2-a429-3524f045f79f",
956 | "outputId": "1221010c-6e00-485a-c0aa-5a7831cbd74d"
957 | },
958 | "outputs": [
959 | {
960 | "data": {
961 | "text/plain": [
962 | "'sagemaker-xgboost-2023-06-08-16-53-29-875'"
963 | ]
964 | },
965 | "execution_count": 59,
966 | "metadata": {},
967 | "output_type": "execute_result"
968 | }
969 | ],
970 | "source": [
971 | "xgb_predictor.endpoint_name"
972 | ]
973 | },
974 | {
975 | "cell_type": "code",
976 | "execution_count": null,
977 | "id": "5963d98c-3fbd-475d-99ee-17886bf2543a",
978 | "metadata": {
979 | "id": "5963d98c-3fbd-475d-99ee-17886bf2543a",
980 | "outputId": "c54c3685-8101-4905-a862-51362df24d23"
981 | },
982 | "outputs": [
983 | {
984 | "name": "stderr",
985 | "output_type": "stream",
986 | "text": [
987 | "WARNING:sagemaker.deprecations:The endpoint attribute has been renamed in sagemaker>=2.\n",
988 | "See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.\n",
989 | "INFO:sagemaker:Deleting endpoint with name: sagemaker-xgboost-2023-06-08-16-53-29-875\n"
990 | ]
991 | }
992 | ],
993 | "source": [
994 | "sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)\n",
995 | "bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)"
996 | ]
997 | },
998 | {
999 | "cell_type": "code",
1000 | "execution_count": null,
1001 | "id": "1cbb5a75-f0d7-4c6f-8a3e-e7737d3c0c4f",
1002 | "metadata": {
1003 | "tags": [],
1004 | "id": "1cbb5a75-f0d7-4c6f-8a3e-e7737d3c0c4f",
1005 | "outputId": "45df1f88-4d5f-4bbd-c9a6-1bb0daeff769"
1006 | },
1007 | "outputs": [
1008 | {
1009 | "data": {
1010 | "text/plain": [
1011 | "[{'ResponseMetadata': {'RequestId': '3ZFZK6BFTT1TP8MY',\n",
1012 | " 'HostId': 'oGJmvH3ZJE/pzZ8b4ipnm20ms75AFSLBPaOYYV9qlTBwihvWdnULVLgtk+xQdk9hJrFeFaEBz5E=',\n",
1013 | " 'HTTPStatusCode': 200,\n",
1014 | " 'HTTPHeaders': {'x-amz-id-2': 'oGJmvH3ZJE/pzZ8b4ipnm20ms75AFSLBPaOYYV9qlTBwihvWdnULVLgtk+xQdk9hJrFeFaEBz5E=',\n",
1015 | " 'x-amz-request-id': '3ZFZK6BFTT1TP8MY',\n",
1016 | " 'date': 'Thu, 08 Jun 2023 16:57:35 GMT',\n",
1017 | " 'content-type': 'application/xml',\n",
1018 | " 'transfer-encoding': 'chunked',\n",
1019 | " 'server': 'AmazonS3',\n",
1020 | " 'connection': 'close'},\n",
1021 | " 'RetryAttempts': 0},\n",
1022 | " 'Deleted': [{'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000030_worker_0.json'},\n",
1023 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/claim.smd'},\n",
1024 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000010_worker_0.json'},\n",
1025 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000020/000000000020_worker_0.tfevents'},\n",
1026 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/incremental/2023060816/1686243120.algo-1.json'},\n",
1027 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000000/000000000000_worker_0.tfevents'},\n",
1028 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/training_job_end.ts'},\n",
1029 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/training_job_end.ts'},\n",
1030 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000000_worker_0.json'},\n",
1031 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000020_worker_0.json'},\n",
1032 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/index/000000000/000000000040_worker_0.json'},\n",
1033 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/collections/000000000/worker_0_collections.json'},\n",
1034 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/output/model.tar.gz'},\n",
1035 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000040/000000000040_worker_0.tfevents'},\n",
1036 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/incremental/2023060816/1686243180.algo-1.json'},\n",
1037 | " {'Key': 'xgboost-as-a-built-in-algo/test/test.csv'},\n",
1038 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/framework/training_job_end.ts'},\n",
1039 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000010/000000000010_worker_0.tfevents'},\n",
1040 | " {'Key': 'xgboost-as-a-built-in-algo/train/train.csv'},\n",
1041 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/profiler-output/system/incremental/2023060816/1686243060.algo-1.json'},\n",
1042 | " {'Key': 'xgboost-as-a-built-in-algo/output/sagemaker-xgboost-2023-06-08-16-50-36-867/debug-output/events/000000000030/000000000030_worker_0.tfevents'}]}]"
1043 | ]
1044 | },
1045 | "execution_count": 61,
1046 | "metadata": {},
1047 | "output_type": "execute_result"
1048 | }
1049 | ],
1050 | "source": [
1051 | "bucket_to_delete.objects.all().delete()"
1052 | ]
1053 | },
1054 | {
1055 | "cell_type": "code",
1056 | "execution_count": null,
1057 | "id": "43110ec1-844b-413f-b52e-42668eb1f436",
1058 | "metadata": {
1059 | "id": "43110ec1-844b-413f-b52e-42668eb1f436"
1060 | },
1061 | "outputs": [],
1062 | "source": []
1063 | }
1064 | ],
1065 | "metadata": {
1066 | "kernelspec": {
1067 | "display_name": "conda_python3",
1068 | "language": "python",
1069 | "name": "conda_python3"
1070 | },
1071 | "language_info": {
1072 | "codemirror_mode": {
1073 | "name": "ipython",
1074 | "version": 3
1075 | },
1076 | "file_extension": ".py",
1077 | "mimetype": "text/x-python",
1078 | "name": "python",
1079 | "nbconvert_exporter": "python",
1080 | "pygments_lexer": "ipython3",
1081 | "version": "3.10.8"
1082 | },
1083 | "colab": {
1084 | "provenance": [],
1085 | "include_colab_link": true
1086 | }
1087 | },
1088 | "nbformat": 4,
1089 | "nbformat_minor": 5
1090 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # aws-sagemaker
2 | Demonstration of Amazon Web Services (AWS) SageMaker for the training of XGBoost models using the SageMaker APIs.
3 |
4 | 1. [SageMaker Python API](https://sagemaker.readthedocs.io/en/stable/overview.html)
5 | 2. [Know more about AWS SageMaker](https://aws.amazon.com/sagemaker/)
6 | 3. [SageMaker Pricing](https://aws.amazon.com/sagemaker/pricing/?nc=sn&loc=3)
7 |
--------------------------------------------------------------------------------