├── .gitignore
├── 01-deploy-text-embedding-model.ipynb
├── 02-deploy-text-generation-model.ipynb
├── 03-create-dynamodb-tables.ipynb
├── 04-create-os-index.ipynb
├── 05-lambda-handler.py
├── LICENSE
├── README.md
├── chatbot-app
├── app.py
├── config
│ └── config.yml
├── ddb.py
├── llm.py
└── retrieve.py
├── config.yml
├── img
└── cognition.png
└── lambda-env.csv
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/01-deploy-text-embedding-model.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "3a4bc882-63fb-4049-8551-d0fa3127bd6f",
6 | "metadata": {},
7 | "source": [
8 | "## Deploy Text Embedding Model (GPT-J 6B FP-16)"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "259bf0cc-c971-48c7-a537-6cc5958c9267",
14 | "metadata": {},
15 | "source": [
16 | "#### Imports"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "id": "fa427a29-aa9d-411c-8ffc-7cada2feb57f",
23 | "metadata": {
24 | "tags": []
25 | },
26 | "outputs": [],
27 | "source": [
28 | "from sagemaker.jumpstart.notebook_utils import list_jumpstart_models\n",
29 | "from sagemaker.predictor import Predictor\n",
30 | "from sagemaker import get_execution_role\n",
31 | "from sagemaker.model import Model\n",
32 | "from sagemaker import script_uris\n",
33 | "from sagemaker import image_uris \n",
34 | "from sagemaker import model_uris\n",
35 | "import sagemaker\n",
36 | "import logging\n",
37 | "import boto3\n",
38 | "import time\n",
39 | "import json"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "id": "09080f41-3b2c-464e-9557-a9fae1313b63",
45 | "metadata": {},
46 | "source": [
47 | "##### Setup logging"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": null,
53 | "id": "bde2a70f-471b-4eaf-9703-10fdcf67863e",
54 | "metadata": {
55 | "tags": []
56 | },
57 | "outputs": [],
58 | "source": [
59 | "logger = logging.getLogger('sagemaker')\n",
60 | "logger.setLevel(logging.DEBUG)\n",
61 | "logger.addHandler(logging.StreamHandler())"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "id": "5372bb7c-ba1d-46af-ad4c-2e77503f422a",
67 | "metadata": {},
68 | "source": [
69 | "##### Log versions of dependencies"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": null,
75 | "id": "cb81aa05-1479-48e3-97fa-e9fa5a3684a5",
76 | "metadata": {
77 | "tags": []
78 | },
79 | "outputs": [],
80 | "source": [
81 | "logger.info(f'Using sagemaker=={sagemaker.__version__}')\n",
82 | "logger.info(f'Using boto3=={boto3.__version__}')"
83 | ]
84 | },
85 | {
86 | "cell_type": "markdown",
87 | "id": "df8a48e0-a23f-4fbd-9be2-811ae129622c",
88 | "metadata": {},
89 | "source": [
90 | "#### Setup essentials "
91 | ]
92 | },
93 | {
94 | "cell_type": "markdown",
95 | "id": "93dc7315-4803-440d-8dfb-e36f51c0a54f",
96 | "metadata": {},
97 | "source": [
98 | "##### List and filter all text embedding models available in JumpStart"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": null,
104 | "id": "9060ff53-7c41-4998-b258-c847a6ee09c0",
105 | "metadata": {
106 | "tags": []
107 | },
108 | "outputs": [],
109 | "source": [
110 | "models = list_jumpstart_models()\n",
111 | "logger.info(f'Total number of models in SageMaker JumpStart hub = {len(models)}')\n",
112 | "\n",
113 | "FILTER = 'task == textembedding'\n",
114 | "txt2img_models = list_jumpstart_models(filter=FILTER)\n",
115 | "txt2img_models"
116 | ]
117 | },
118 | {
119 | "cell_type": "markdown",
120 | "id": "34b6fcc4-a7d0-4f01-b2fa-d01ad3f0e2a5",
121 | "metadata": {},
122 | "source": [
123 | "##### Setup config params"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": null,
129 | "id": "1b77dc13-20d5-4dad-aa8a-45ed2f8e70bf",
130 | "metadata": {
131 | "tags": []
132 | },
133 | "outputs": [],
134 | "source": [
135 | "MODEL_ID = 'huggingface-textembedding-gpt-j-6b-fp16' \n",
136 | "MODEL_VERSION = '*'\n",
137 | "INSTANCE_TYPE = 'ml.g5.2xlarge'\n",
138 | "INSTANCE_COUNT = 1\n",
139 | "IMAGE_SCOPE = 'inference'\n",
140 | "MODEL_DATA_DOWNLOAD_TIMEOUT = 3600 # in seconds\n",
141 | "CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT = 3600\n",
142 | "CONTENT_TYPE = 'application/json'\n",
143 | "\n",
144 | "# Set up roles and clients \n",
145 | "client = boto3.client('sagemaker-runtime')\n",
146 | "ROLE = get_execution_role()\n",
147 | "logger.info(f'Role => {ROLE}')"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": null,
153 | "id": "1526ad5f-9e74-459b-8dbd-1accdd576f12",
154 | "metadata": {
155 | "tags": []
156 | },
157 | "outputs": [],
158 | "source": [
159 | "unix_time = int(time.time())\n",
160 | "endpoint_name = f'{MODEL_ID}-{unix_time}'\n",
161 | "logger.info(f'Endpoint name: {endpoint_name}')"
162 | ]
163 | },
164 | {
165 | "cell_type": "markdown",
166 | "id": "283e1db7-5629-495a-abd1-cf04351d2ad8",
167 | "metadata": {},
168 | "source": [
169 | "#### Retrieve image and model URIs"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": null,
175 | "id": "187d02d5-93c9-4a87-bdd7-dd7443d7f64c",
176 | "metadata": {
177 | "tags": []
178 | },
179 | "outputs": [],
180 | "source": [
181 | "deploy_image_uri = image_uris.retrieve(region=None, \n",
182 | " framework=None, \n",
183 | " image_scope=IMAGE_SCOPE, \n",
184 | " model_id=MODEL_ID, \n",
185 | " model_version=MODEL_VERSION, \n",
186 | " instance_type=INSTANCE_TYPE)\n",
187 | "logger.info(f'Deploy image URI => {deploy_image_uri}')"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": null,
193 | "id": "521ba56c-0b53-485c-9c96-b08c5a332a8f",
194 | "metadata": {
195 | "tags": []
196 | },
197 | "outputs": [],
198 | "source": [
199 | "model_uri = model_uris.retrieve(model_id=MODEL_ID, \n",
200 | " model_version=MODEL_VERSION, \n",
201 | " model_scope=IMAGE_SCOPE)\n",
202 | "logger.info(f'Model URI => {model_uri}')"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": null,
208 | "id": "a32fbce5-500c-4606-8f19-e667e7c6e6a3",
209 | "metadata": {
210 | "tags": []
211 | },
212 | "outputs": [],
213 | "source": [
214 | "env = {\n",
215 | " 'SAGEMAKER_MODEL_SERVER_TIMEOUT': str(3600),\n",
216 | " 'MODEL_CACHE_ROOT': '/opt/ml/model', \n",
217 | " 'SAGEMAKER_ENV': '1',\n",
218 | " 'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',\n",
219 | " 'SAGEMAKER_PROGRAM': 'inference.py',\n",
220 | " 'SAGEMAKER_MODEL_SERVER_WORKERS': '1', \n",
221 | " 'TS_DEFAULT_WORKERS_PER_MODEL': '1', \n",
222 | "}"
223 | ]
224 | },
225 | {
226 | "cell_type": "markdown",
227 | "id": "f2657343-5a65-4c57-b39d-6c93107588c5",
228 | "metadata": {},
229 | "source": [
230 | "#### Create SageMaker Model"
231 | ]
232 | },
233 | {
234 | "cell_type": "code",
235 | "execution_count": null,
236 | "id": "60aae2d1-c78a-4a89-890a-d6111ab95c57",
237 | "metadata": {
238 | "tags": []
239 | },
240 | "outputs": [],
241 | "source": [
242 | "model = Model(image_uri=deploy_image_uri, \n",
243 | " model_data=model_uri, \n",
244 | " role=ROLE, \n",
245 | " predictor_cls=Predictor, \n",
246 | " name=endpoint_name, \n",
247 | " env=env)"
248 | ]
249 | },
250 | {
251 | "cell_type": "markdown",
252 | "id": "5a6b21c7-ff26-4dc1-89e9-aa5fea339771",
253 | "metadata": {},
254 | "source": [
255 | "#### Deploy text embedding model as SageMaker endpoint for real-time synchronous inference"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": null,
261 | "id": "0ce97c1f-b2e6-42e4-87c4-6fba2a92bed1",
262 | "metadata": {
263 | "tags": []
264 | },
265 | "outputs": [],
266 | "source": [
267 | "%%time\n",
268 | "\n",
269 | "_ = model.deploy(initial_instance_count=INSTANCE_COUNT, \n",
270 | " instance_type=INSTANCE_TYPE, \n",
271 | " endpoint_name=endpoint_name, \n",
272 | " model_data_download_timeout=MODEL_DATA_DOWNLOAD_TIMEOUT, \n",
273 | " container_startup_health_check_timeout=CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT)"
274 | ]
275 | },
276 | {
277 | "cell_type": "markdown",
278 | "id": "46c497c6-2bd6-4e7c-a794-029c51bdc301",
279 | "metadata": {},
280 | "source": [
281 | "### Test SageMaker endpoint for inference"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "id": "2ec45951-bd9f-4096-812e-8ded522f6303",
288 | "metadata": {
289 | "tags": []
290 | },
291 | "outputs": [],
292 | "source": [
293 | "# ENDPOINT_NAME = 'huggingface-textembedding-gpt-j-6b-fp16-1680825746'"
294 | ]
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": null,
299 | "id": "929dfc15-0b3a-490c-b856-6df11ca10f0d",
300 | "metadata": {
301 | "tags": []
302 | },
303 | "outputs": [],
304 | "source": [
305 | "query = 'what is the meaning of life according to an ant?'"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": null,
311 | "id": "1c06fb32-9246-4a3d-94e3-3cd57a4ba5e2",
312 | "metadata": {
313 | "tags": []
314 | },
315 | "outputs": [],
316 | "source": [
317 | "payload = {'text_inputs': [query]}\n",
318 | "payload = json.dumps(payload).encode('utf-8')"
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": null,
324 | "id": "6d834078-f340-4ffd-8239-18774069574c",
325 | "metadata": {
326 | "tags": []
327 | },
328 | "outputs": [],
329 | "source": [
330 | "%%time\n",
331 | "\n",
332 | "response = client.invoke_endpoint(EndpointName=endpoint_name,\n",
333 | " ContentType='application/json',\n",
334 | " Body=payload)\n",
335 | " "
336 | ]
337 | },
338 | {
339 | "cell_type": "markdown",
340 | "id": "213b5ef6-bf75-4d98-a096-da26168f696f",
341 | "metadata": {},
342 | "source": [
343 | "##### Parse model response to extract query embedding"
344 | ]
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": null,
349 | "id": "757f986f-7986-452d-9946-2ea411a883a7",
350 | "metadata": {
351 | "tags": []
352 | },
353 | "outputs": [],
354 | "source": [
355 | "body = json.loads(response['Body'].read())\n",
356 | "embedding = body['embedding'][0]\n",
357 | "embedding"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": null,
363 | "id": "5c609bfd-8c56-4bd3-8184-759ac1b67174",
364 | "metadata": {},
365 | "outputs": [],
366 | "source": []
367 | }
368 | ],
369 | "metadata": {
370 | "availableInstances": [
371 | {
372 | "_defaultOrder": 0,
373 | "_isFastLaunch": true,
374 | "category": "General purpose",
375 | "gpuNum": 0,
376 | "hideHardwareSpecs": false,
377 | "memoryGiB": 4,
378 | "name": "ml.t3.medium",
379 | "vcpuNum": 2
380 | },
381 | {
382 | "_defaultOrder": 1,
383 | "_isFastLaunch": false,
384 | "category": "General purpose",
385 | "gpuNum": 0,
386 | "hideHardwareSpecs": false,
387 | "memoryGiB": 8,
388 | "name": "ml.t3.large",
389 | "vcpuNum": 2
390 | },
391 | {
392 | "_defaultOrder": 2,
393 | "_isFastLaunch": false,
394 | "category": "General purpose",
395 | "gpuNum": 0,
396 | "hideHardwareSpecs": false,
397 | "memoryGiB": 16,
398 | "name": "ml.t3.xlarge",
399 | "vcpuNum": 4
400 | },
401 | {
402 | "_defaultOrder": 3,
403 | "_isFastLaunch": false,
404 | "category": "General purpose",
405 | "gpuNum": 0,
406 | "hideHardwareSpecs": false,
407 | "memoryGiB": 32,
408 | "name": "ml.t3.2xlarge",
409 | "vcpuNum": 8
410 | },
411 | {
412 | "_defaultOrder": 4,
413 | "_isFastLaunch": true,
414 | "category": "General purpose",
415 | "gpuNum": 0,
416 | "hideHardwareSpecs": false,
417 | "memoryGiB": 8,
418 | "name": "ml.m5.large",
419 | "vcpuNum": 2
420 | },
421 | {
422 | "_defaultOrder": 5,
423 | "_isFastLaunch": false,
424 | "category": "General purpose",
425 | "gpuNum": 0,
426 | "hideHardwareSpecs": false,
427 | "memoryGiB": 16,
428 | "name": "ml.m5.xlarge",
429 | "vcpuNum": 4
430 | },
431 | {
432 | "_defaultOrder": 6,
433 | "_isFastLaunch": false,
434 | "category": "General purpose",
435 | "gpuNum": 0,
436 | "hideHardwareSpecs": false,
437 | "memoryGiB": 32,
438 | "name": "ml.m5.2xlarge",
439 | "vcpuNum": 8
440 | },
441 | {
442 | "_defaultOrder": 7,
443 | "_isFastLaunch": false,
444 | "category": "General purpose",
445 | "gpuNum": 0,
446 | "hideHardwareSpecs": false,
447 | "memoryGiB": 64,
448 | "name": "ml.m5.4xlarge",
449 | "vcpuNum": 16
450 | },
451 | {
452 | "_defaultOrder": 8,
453 | "_isFastLaunch": false,
454 | "category": "General purpose",
455 | "gpuNum": 0,
456 | "hideHardwareSpecs": false,
457 | "memoryGiB": 128,
458 | "name": "ml.m5.8xlarge",
459 | "vcpuNum": 32
460 | },
461 | {
462 | "_defaultOrder": 9,
463 | "_isFastLaunch": false,
464 | "category": "General purpose",
465 | "gpuNum": 0,
466 | "hideHardwareSpecs": false,
467 | "memoryGiB": 192,
468 | "name": "ml.m5.12xlarge",
469 | "vcpuNum": 48
470 | },
471 | {
472 | "_defaultOrder": 10,
473 | "_isFastLaunch": false,
474 | "category": "General purpose",
475 | "gpuNum": 0,
476 | "hideHardwareSpecs": false,
477 | "memoryGiB": 256,
478 | "name": "ml.m5.16xlarge",
479 | "vcpuNum": 64
480 | },
481 | {
482 | "_defaultOrder": 11,
483 | "_isFastLaunch": false,
484 | "category": "General purpose",
485 | "gpuNum": 0,
486 | "hideHardwareSpecs": false,
487 | "memoryGiB": 384,
488 | "name": "ml.m5.24xlarge",
489 | "vcpuNum": 96
490 | },
491 | {
492 | "_defaultOrder": 12,
493 | "_isFastLaunch": false,
494 | "category": "General purpose",
495 | "gpuNum": 0,
496 | "hideHardwareSpecs": false,
497 | "memoryGiB": 8,
498 | "name": "ml.m5d.large",
499 | "vcpuNum": 2
500 | },
501 | {
502 | "_defaultOrder": 13,
503 | "_isFastLaunch": false,
504 | "category": "General purpose",
505 | "gpuNum": 0,
506 | "hideHardwareSpecs": false,
507 | "memoryGiB": 16,
508 | "name": "ml.m5d.xlarge",
509 | "vcpuNum": 4
510 | },
511 | {
512 | "_defaultOrder": 14,
513 | "_isFastLaunch": false,
514 | "category": "General purpose",
515 | "gpuNum": 0,
516 | "hideHardwareSpecs": false,
517 | "memoryGiB": 32,
518 | "name": "ml.m5d.2xlarge",
519 | "vcpuNum": 8
520 | },
521 | {
522 | "_defaultOrder": 15,
523 | "_isFastLaunch": false,
524 | "category": "General purpose",
525 | "gpuNum": 0,
526 | "hideHardwareSpecs": false,
527 | "memoryGiB": 64,
528 | "name": "ml.m5d.4xlarge",
529 | "vcpuNum": 16
530 | },
531 | {
532 | "_defaultOrder": 16,
533 | "_isFastLaunch": false,
534 | "category": "General purpose",
535 | "gpuNum": 0,
536 | "hideHardwareSpecs": false,
537 | "memoryGiB": 128,
538 | "name": "ml.m5d.8xlarge",
539 | "vcpuNum": 32
540 | },
541 | {
542 | "_defaultOrder": 17,
543 | "_isFastLaunch": false,
544 | "category": "General purpose",
545 | "gpuNum": 0,
546 | "hideHardwareSpecs": false,
547 | "memoryGiB": 192,
548 | "name": "ml.m5d.12xlarge",
549 | "vcpuNum": 48
550 | },
551 | {
552 | "_defaultOrder": 18,
553 | "_isFastLaunch": false,
554 | "category": "General purpose",
555 | "gpuNum": 0,
556 | "hideHardwareSpecs": false,
557 | "memoryGiB": 256,
558 | "name": "ml.m5d.16xlarge",
559 | "vcpuNum": 64
560 | },
561 | {
562 | "_defaultOrder": 19,
563 | "_isFastLaunch": false,
564 | "category": "General purpose",
565 | "gpuNum": 0,
566 | "hideHardwareSpecs": false,
567 | "memoryGiB": 384,
568 | "name": "ml.m5d.24xlarge",
569 | "vcpuNum": 96
570 | },
571 | {
572 | "_defaultOrder": 20,
573 | "_isFastLaunch": false,
574 | "category": "General purpose",
575 | "gpuNum": 0,
576 | "hideHardwareSpecs": true,
577 | "memoryGiB": 0,
578 | "name": "ml.geospatial.interactive",
579 | "supportedImageNames": [
580 | "sagemaker-geospatial-v1-0"
581 | ],
582 | "vcpuNum": 0
583 | },
584 | {
585 | "_defaultOrder": 21,
586 | "_isFastLaunch": true,
587 | "category": "Compute optimized",
588 | "gpuNum": 0,
589 | "hideHardwareSpecs": false,
590 | "memoryGiB": 4,
591 | "name": "ml.c5.large",
592 | "vcpuNum": 2
593 | },
594 | {
595 | "_defaultOrder": 22,
596 | "_isFastLaunch": false,
597 | "category": "Compute optimized",
598 | "gpuNum": 0,
599 | "hideHardwareSpecs": false,
600 | "memoryGiB": 8,
601 | "name": "ml.c5.xlarge",
602 | "vcpuNum": 4
603 | },
604 | {
605 | "_defaultOrder": 23,
606 | "_isFastLaunch": false,
607 | "category": "Compute optimized",
608 | "gpuNum": 0,
609 | "hideHardwareSpecs": false,
610 | "memoryGiB": 16,
611 | "name": "ml.c5.2xlarge",
612 | "vcpuNum": 8
613 | },
614 | {
615 | "_defaultOrder": 24,
616 | "_isFastLaunch": false,
617 | "category": "Compute optimized",
618 | "gpuNum": 0,
619 | "hideHardwareSpecs": false,
620 | "memoryGiB": 32,
621 | "name": "ml.c5.4xlarge",
622 | "vcpuNum": 16
623 | },
624 | {
625 | "_defaultOrder": 25,
626 | "_isFastLaunch": false,
627 | "category": "Compute optimized",
628 | "gpuNum": 0,
629 | "hideHardwareSpecs": false,
630 | "memoryGiB": 72,
631 | "name": "ml.c5.9xlarge",
632 | "vcpuNum": 36
633 | },
634 | {
635 | "_defaultOrder": 26,
636 | "_isFastLaunch": false,
637 | "category": "Compute optimized",
638 | "gpuNum": 0,
639 | "hideHardwareSpecs": false,
640 | "memoryGiB": 96,
641 | "name": "ml.c5.12xlarge",
642 | "vcpuNum": 48
643 | },
644 | {
645 | "_defaultOrder": 27,
646 | "_isFastLaunch": false,
647 | "category": "Compute optimized",
648 | "gpuNum": 0,
649 | "hideHardwareSpecs": false,
650 | "memoryGiB": 144,
651 | "name": "ml.c5.18xlarge",
652 | "vcpuNum": 72
653 | },
654 | {
655 | "_defaultOrder": 28,
656 | "_isFastLaunch": false,
657 | "category": "Compute optimized",
658 | "gpuNum": 0,
659 | "hideHardwareSpecs": false,
660 | "memoryGiB": 192,
661 | "name": "ml.c5.24xlarge",
662 | "vcpuNum": 96
663 | },
664 | {
665 | "_defaultOrder": 29,
666 | "_isFastLaunch": true,
667 | "category": "Accelerated computing",
668 | "gpuNum": 1,
669 | "hideHardwareSpecs": false,
670 | "memoryGiB": 16,
671 | "name": "ml.g4dn.xlarge",
672 | "vcpuNum": 4
673 | },
674 | {
675 | "_defaultOrder": 30,
676 | "_isFastLaunch": false,
677 | "category": "Accelerated computing",
678 | "gpuNum": 1,
679 | "hideHardwareSpecs": false,
680 | "memoryGiB": 32,
681 | "name": "ml.g4dn.2xlarge",
682 | "vcpuNum": 8
683 | },
684 | {
685 | "_defaultOrder": 31,
686 | "_isFastLaunch": false,
687 | "category": "Accelerated computing",
688 | "gpuNum": 1,
689 | "hideHardwareSpecs": false,
690 | "memoryGiB": 64,
691 | "name": "ml.g4dn.4xlarge",
692 | "vcpuNum": 16
693 | },
694 | {
695 | "_defaultOrder": 32,
696 | "_isFastLaunch": false,
697 | "category": "Accelerated computing",
698 | "gpuNum": 1,
699 | "hideHardwareSpecs": false,
700 | "memoryGiB": 128,
701 | "name": "ml.g4dn.8xlarge",
702 | "vcpuNum": 32
703 | },
704 | {
705 | "_defaultOrder": 33,
706 | "_isFastLaunch": false,
707 | "category": "Accelerated computing",
708 | "gpuNum": 4,
709 | "hideHardwareSpecs": false,
710 | "memoryGiB": 192,
711 | "name": "ml.g4dn.12xlarge",
712 | "vcpuNum": 48
713 | },
714 | {
715 | "_defaultOrder": 34,
716 | "_isFastLaunch": false,
717 | "category": "Accelerated computing",
718 | "gpuNum": 1,
719 | "hideHardwareSpecs": false,
720 | "memoryGiB": 256,
721 | "name": "ml.g4dn.16xlarge",
722 | "vcpuNum": 64
723 | },
724 | {
725 | "_defaultOrder": 35,
726 | "_isFastLaunch": false,
727 | "category": "Accelerated computing",
728 | "gpuNum": 1,
729 | "hideHardwareSpecs": false,
730 | "memoryGiB": 61,
731 | "name": "ml.p3.2xlarge",
732 | "vcpuNum": 8
733 | },
734 | {
735 | "_defaultOrder": 36,
736 | "_isFastLaunch": false,
737 | "category": "Accelerated computing",
738 | "gpuNum": 4,
739 | "hideHardwareSpecs": false,
740 | "memoryGiB": 244,
741 | "name": "ml.p3.8xlarge",
742 | "vcpuNum": 32
743 | },
744 | {
745 | "_defaultOrder": 37,
746 | "_isFastLaunch": false,
747 | "category": "Accelerated computing",
748 | "gpuNum": 8,
749 | "hideHardwareSpecs": false,
750 | "memoryGiB": 488,
751 | "name": "ml.p3.16xlarge",
752 | "vcpuNum": 64
753 | },
754 | {
755 | "_defaultOrder": 38,
756 | "_isFastLaunch": false,
757 | "category": "Accelerated computing",
758 | "gpuNum": 8,
759 | "hideHardwareSpecs": false,
760 | "memoryGiB": 768,
761 | "name": "ml.p3dn.24xlarge",
762 | "vcpuNum": 96
763 | },
764 | {
765 | "_defaultOrder": 39,
766 | "_isFastLaunch": false,
767 | "category": "Memory Optimized",
768 | "gpuNum": 0,
769 | "hideHardwareSpecs": false,
770 | "memoryGiB": 16,
771 | "name": "ml.r5.large",
772 | "vcpuNum": 2
773 | },
774 | {
775 | "_defaultOrder": 40,
776 | "_isFastLaunch": false,
777 | "category": "Memory Optimized",
778 | "gpuNum": 0,
779 | "hideHardwareSpecs": false,
780 | "memoryGiB": 32,
781 | "name": "ml.r5.xlarge",
782 | "vcpuNum": 4
783 | },
784 | {
785 | "_defaultOrder": 41,
786 | "_isFastLaunch": false,
787 | "category": "Memory Optimized",
788 | "gpuNum": 0,
789 | "hideHardwareSpecs": false,
790 | "memoryGiB": 64,
791 | "name": "ml.r5.2xlarge",
792 | "vcpuNum": 8
793 | },
794 | {
795 | "_defaultOrder": 42,
796 | "_isFastLaunch": false,
797 | "category": "Memory Optimized",
798 | "gpuNum": 0,
799 | "hideHardwareSpecs": false,
800 | "memoryGiB": 128,
801 | "name": "ml.r5.4xlarge",
802 | "vcpuNum": 16
803 | },
804 | {
805 | "_defaultOrder": 43,
806 | "_isFastLaunch": false,
807 | "category": "Memory Optimized",
808 | "gpuNum": 0,
809 | "hideHardwareSpecs": false,
810 | "memoryGiB": 256,
811 | "name": "ml.r5.8xlarge",
812 | "vcpuNum": 32
813 | },
814 | {
815 | "_defaultOrder": 44,
816 | "_isFastLaunch": false,
817 | "category": "Memory Optimized",
818 | "gpuNum": 0,
819 | "hideHardwareSpecs": false,
820 | "memoryGiB": 384,
821 | "name": "ml.r5.12xlarge",
822 | "vcpuNum": 48
823 | },
824 | {
825 | "_defaultOrder": 45,
826 | "_isFastLaunch": false,
827 | "category": "Memory Optimized",
828 | "gpuNum": 0,
829 | "hideHardwareSpecs": false,
830 | "memoryGiB": 512,
831 | "name": "ml.r5.16xlarge",
832 | "vcpuNum": 64
833 | },
834 | {
835 | "_defaultOrder": 46,
836 | "_isFastLaunch": false,
837 | "category": "Memory Optimized",
838 | "gpuNum": 0,
839 | "hideHardwareSpecs": false,
840 | "memoryGiB": 768,
841 | "name": "ml.r5.24xlarge",
842 | "vcpuNum": 96
843 | },
844 | {
845 | "_defaultOrder": 47,
846 | "_isFastLaunch": false,
847 | "category": "Accelerated computing",
848 | "gpuNum": 1,
849 | "hideHardwareSpecs": false,
850 | "memoryGiB": 16,
851 | "name": "ml.g5.xlarge",
852 | "vcpuNum": 4
853 | },
854 | {
855 | "_defaultOrder": 48,
856 | "_isFastLaunch": false,
857 | "category": "Accelerated computing",
858 | "gpuNum": 1,
859 | "hideHardwareSpecs": false,
860 | "memoryGiB": 32,
861 | "name": "ml.g5.2xlarge",
862 | "vcpuNum": 8
863 | },
864 | {
865 | "_defaultOrder": 49,
866 | "_isFastLaunch": false,
867 | "category": "Accelerated computing",
868 | "gpuNum": 1,
869 | "hideHardwareSpecs": false,
870 | "memoryGiB": 64,
871 | "name": "ml.g5.4xlarge",
872 | "vcpuNum": 16
873 | },
874 | {
875 | "_defaultOrder": 50,
876 | "_isFastLaunch": false,
877 | "category": "Accelerated computing",
878 | "gpuNum": 1,
879 | "hideHardwareSpecs": false,
880 | "memoryGiB": 128,
881 | "name": "ml.g5.8xlarge",
882 | "vcpuNum": 32
883 | },
884 | {
885 | "_defaultOrder": 51,
886 | "_isFastLaunch": false,
887 | "category": "Accelerated computing",
888 | "gpuNum": 1,
889 | "hideHardwareSpecs": false,
890 | "memoryGiB": 256,
891 | "name": "ml.g5.16xlarge",
892 | "vcpuNum": 64
893 | },
894 | {
895 | "_defaultOrder": 52,
896 | "_isFastLaunch": false,
897 | "category": "Accelerated computing",
898 | "gpuNum": 4,
899 | "hideHardwareSpecs": false,
900 | "memoryGiB": 192,
901 | "name": "ml.g5.12xlarge",
902 | "vcpuNum": 48
903 | },
904 | {
905 | "_defaultOrder": 53,
906 | "_isFastLaunch": false,
907 | "category": "Accelerated computing",
908 | "gpuNum": 4,
909 | "hideHardwareSpecs": false,
910 | "memoryGiB": 384,
911 | "name": "ml.g5.24xlarge",
912 | "vcpuNum": 96
913 | },
914 | {
915 | "_defaultOrder": 54,
916 | "_isFastLaunch": false,
917 | "category": "Accelerated computing",
918 | "gpuNum": 8,
919 | "hideHardwareSpecs": false,
920 | "memoryGiB": 768,
921 | "name": "ml.g5.48xlarge",
922 | "vcpuNum": 192
923 | },
924 | {
925 | "_defaultOrder": 55,
926 | "_isFastLaunch": false,
927 | "category": "Accelerated computing",
928 | "gpuNum": 8,
929 | "hideHardwareSpecs": false,
930 | "memoryGiB": 1152,
931 | "name": "ml.p4d.24xlarge",
932 | "vcpuNum": 96
933 | },
934 | {
935 | "_defaultOrder": 56,
936 | "_isFastLaunch": false,
937 | "category": "Accelerated computing",
938 | "gpuNum": 8,
939 | "hideHardwareSpecs": false,
940 | "memoryGiB": 1152,
941 | "name": "ml.p4de.24xlarge",
942 | "vcpuNum": 96
943 | }
944 | ],
945 | "instance_type": "ml.t3.medium",
946 | "kernelspec": {
947 | "display_name": "Python 3 (Data Science)",
948 | "language": "python",
949 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
950 | },
951 | "language_info": {
952 | "codemirror_mode": {
953 | "name": "ipython",
954 | "version": 3
955 | },
956 | "file_extension": ".py",
957 | "mimetype": "text/x-python",
958 | "name": "python",
959 | "nbconvert_exporter": "python",
960 | "pygments_lexer": "ipython3",
961 | "version": "3.7.10"
962 | }
963 | },
964 | "nbformat": 4,
965 | "nbformat_minor": 5
966 | }
967 |
--------------------------------------------------------------------------------
/02-deploy-text-generation-model.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "ce1c37fc-91a5-49df-804f-92319ed8a678",
6 | "metadata": {},
7 | "source": [
8 | "## Deploy Text Generation Model (FLAN-T5 XXL)"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "cd154824-add4-46d3-a47d-a3dc317c5847",
14 | "metadata": {},
15 | "source": [
16 | "#### Imports "
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "id": "66e8ab21-8a0f-405c-a706-3aa5608a04a8",
23 | "metadata": {
24 | "tags": []
25 | },
26 | "outputs": [],
27 | "source": [
28 | "from sagemaker.jumpstart.notebook_utils import list_jumpstart_models\n",
29 | "from sagemaker.predictor import Predictor\n",
30 | "from sagemaker import get_execution_role\n",
31 | "from sagemaker import ModelPackage\n",
32 | "from sagemaker.model import Model\n",
33 | "from sagemaker import image_uris \n",
34 | "from sagemaker import model_uris\n",
35 | "import numpy as np\n",
36 | "import sagemaker\n",
37 | "import logging\n",
38 | "import boto3\n",
39 | "import time\n",
40 | "import json"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "id": "38c09c3f-4302-40c2-8191-c730c32b5dbc",
46 | "metadata": {},
47 | "source": [
48 | "##### Setup logging "
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 3,
54 | "id": "594a1c5c-a125-4cce-a538-3cfc840edcd7",
55 | "metadata": {
56 | "tags": []
57 | },
58 | "outputs": [],
59 | "source": [
60 | "logger = logging.getLogger('sagemaker')\n",
61 | "logger.setLevel(logging.DEBUG)\n",
62 | "logger.addHandler(logging.StreamHandler())"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "id": "cd0e20e0-9c4b-4487-ae23-995cf33807a2",
68 | "metadata": {},
69 | "source": [
70 | "##### Log versions of dependencies "
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 4,
76 | "id": "48c47fd6-fd9f-4da8-94d1-7c0b3d23490b",
77 | "metadata": {
78 | "tags": []
79 | },
80 | "outputs": [
81 | {
82 | "name": "stderr",
83 | "output_type": "stream",
84 | "text": [
85 | "Using sagemaker==2.145.0\n",
86 | "Using boto3==1.26.111\n"
87 | ]
88 | }
89 | ],
90 | "source": [
91 | "logger.info(f'Using sagemaker=={sagemaker.__version__}')\n",
92 | "logger.info(f'Using boto3=={boto3.__version__}')"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "id": "38c2bb8a-a711-4c2c-aaf0-04cd2a22bc08",
98 | "metadata": {},
99 | "source": [
100 | "#### Setup essentials "
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 5,
106 | "id": "6718b0cf-42bf-42f8-83a5-89c2189891a5",
107 | "metadata": {
108 | "tags": []
109 | },
110 | "outputs": [
111 | {
112 | "name": "stderr",
113 | "output_type": "stream",
114 | "text": [
115 | "Region = us-east-1\n"
116 | ]
117 | }
118 | ],
119 | "source": [
120 | "region = boto3.Session().region_name\n",
121 | "logger.info(f'Region = {region}')"
122 | ]
123 | },
124 | {
125 | "cell_type": "markdown",
126 | "id": "93779cf2-1d73-48b7-8e9c-7a58a0d1e23a",
127 | "metadata": {},
128 | "source": [
129 | "##### Get list of language models available in JS model hub"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 6,
135 | "id": "ef740ccc-04b9-49cd-a9f2-b00a1638d7a5",
136 | "metadata": {
137 | "tags": []
138 | },
139 | "outputs": [
140 | {
141 | "name": "stderr",
142 | "output_type": "stream",
143 | "text": [
144 | "Total number of models in SageMaker JumpStart hub = 679\n"
145 | ]
146 | }
147 | ],
148 | "source": [
149 | "models = list_jumpstart_models()\n",
150 | "logger.info(f'Total number of models in SageMaker JumpStart hub = {len(models)}')"
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "id": "bf7702b6-31ca-4166-9b80-27f2cd9019fc",
156 | "metadata": {},
157 | "source": [
158 | "##### Setup inference deployment config params"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 7,
164 | "id": "44235c29-162b-468d-8a7f-7024a852100e",
165 | "metadata": {},
166 | "outputs": [
167 | {
168 | "name": "stderr",
169 | "output_type": "stream",
170 | "text": [
171 | "Role => arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628\n"
172 | ]
173 | }
174 | ],
175 | "source": [
176 | "MODEL_ID = 'huggingface-text2text-flan-t5-xxl' # this is hard-coded\n",
177 | "MODEL_VERSION = '*'\n",
178 | "INSTANCE_TYPE = 'ml.g4dn.12xlarge'\n",
179 | "INSTANCE_COUNT = 1\n",
180 | "IMAGE_SCOPE = 'inference'\n",
181 | "MODEL_DATA_DOWNLOAD_TIMEOUT = 3600 # in seconds\n",
182 | "CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT = 3600\n",
183 | "CONTENT_TYPE = 'application/json'\n",
184 | "\n",
185 | "# set up roles and clients \n",
186 | "client = boto3.client('sagemaker-runtime')\n",
187 | "ROLE = get_execution_role()\n",
188 | "logger.info(f'Role => {ROLE}')"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 8,
194 | "id": "d5647e3c-8d3f-4bac-8229-3c5a2de5233b",
195 | "metadata": {},
196 | "outputs": [
197 | {
198 | "name": "stderr",
199 | "output_type": "stream",
200 | "text": [
201 | "Endpoint name: flan-xxl-1686852282\n"
202 | ]
203 | }
204 | ],
205 | "source": [
206 | "unix_time = int(time.time())\n",
207 | "\n",
208 | "endpoint_name = f'flan-xxl-{unix_time}'\n",
209 | "logger.info(f'Endpoint name: {endpoint_name}')"
210 | ]
211 | },
212 | {
213 | "cell_type": "markdown",
214 | "id": "97b8cdaf-6b26-4fa4-8d01-98c14c7f7f97",
215 | "metadata": {},
216 | "source": [
217 | "#### Retrieve Image and Model URIs"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": 9,
223 | "id": "fe5b3bcc-3ed9-4a8e-bc8e-32fcaab25322",
224 | "metadata": {},
225 | "outputs": [
226 | {
227 | "name": "stderr",
228 | "output_type": "stream",
229 | "text": [
230 | "Deploy image URI => 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04\n"
231 | ]
232 | }
233 | ],
234 | "source": [
235 | "deploy_image_uri = image_uris.retrieve(region=None, \n",
236 | " framework=None, \n",
237 | " image_scope=IMAGE_SCOPE, \n",
238 | " model_id=MODEL_ID, \n",
239 | " model_version=MODEL_VERSION, \n",
240 | " instance_type=INSTANCE_TYPE)\n",
241 | "logger.info(f'Deploy image URI => {deploy_image_uri}')"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 10,
247 | "id": "b7abadcf-e5f1-4253-b1e0-fdda825b0099",
248 | "metadata": {},
249 | "outputs": [
250 | {
251 | "name": "stderr",
252 | "output_type": "stream",
253 | "text": [
254 | "Model URI => s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xxl.tar.gz\n"
255 | ]
256 | }
257 | ],
258 | "source": [
259 | "model_uri = model_uris.retrieve(model_id=MODEL_ID, \n",
260 | " model_version=MODEL_VERSION, \n",
261 | " model_scope=IMAGE_SCOPE)\n",
262 | "logger.info(f'Model URI => {model_uri}')"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 11,
268 | "id": "a24ad3db-6a79-453a-a74f-53d7d9bb5a8f",
269 | "metadata": {
270 | "tags": []
271 | },
272 | "outputs": [],
273 | "source": [
274 | "env = {\n",
275 | " 'SAGEMAKER_MODEL_SERVER_TIMEOUT': str(3600),\n",
276 | " 'MODEL_CACHE_ROOT': '/opt/ml/model', \n",
277 | " 'SAGEMAKER_ENV': '1',\n",
278 | " 'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',\n",
279 | " 'SAGEMAKER_PROGRAM': 'inference.py',\n",
280 | " 'SAGEMAKER_MODEL_SERVER_WORKERS': '1', \n",
281 | " 'TS_DEFAULT_WORKERS_PER_MODEL': '1', \n",
282 | "}"
283 | ]
284 | },
285 | {
286 | "cell_type": "markdown",
287 | "id": "70caec84-ece2-4f96-9c71-812eeaa9215d",
288 | "metadata": {},
289 | "source": [
290 | "#### Create SageMaker model"
291 | ]
292 | },
293 | {
294 | "cell_type": "code",
295 | "execution_count": 12,
296 | "id": "9dba6599-3a10-40d4-96d9-5af82823131e",
297 | "metadata": {
298 | "tags": []
299 | },
300 | "outputs": [],
301 | "source": [
302 | "model_name = endpoint_name.replace('huggingface-textgeneration2-gpt-', '')\n",
303 | "model = Model(image_uri=deploy_image_uri, \n",
304 | " model_data=model_uri, \n",
305 | " role=ROLE, \n",
306 | " predictor_cls=Predictor, \n",
307 | " name=model_name, \n",
308 | " env=env)"
309 | ]
310 | },
311 | {
312 | "cell_type": "markdown",
313 | "id": "5d119629-e939-4a3a-896d-73dc33e57187",
314 | "metadata": {},
315 | "source": [
316 | "#### Deploy text generation model as SageMaker endpoint for real-time synchronous inference"
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": null,
322 | "id": "eb07c4cb-cae1-4af7-9a86-38d9e6851c73",
323 | "metadata": {
324 | "tags": []
325 | },
326 | "outputs": [
327 | {
328 | "name": "stderr",
329 | "output_type": "stream",
330 | "text": [
331 | "Creating model with name: flan-xxl-1686852282\n",
332 | "CreateModel request: {\n",
333 | " \"ModelName\": \"flan-xxl-1686852282\",\n",
334 | " \"ExecutionRoleArn\": \"arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628\",\n",
335 | " \"PrimaryContainer\": {\n",
336 | " \"Image\": \"763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04\",\n",
337 | " \"Environment\": {\n",
338 | " \"SAGEMAKER_MODEL_SERVER_TIMEOUT\": \"3600\",\n",
339 | " \"MODEL_CACHE_ROOT\": \"/opt/ml/model\",\n",
340 | " \"SAGEMAKER_ENV\": \"1\",\n",
341 | " \"SAGEMAKER_SUBMIT_DIRECTORY\": \"/opt/ml/model/code/\",\n",
342 | " \"SAGEMAKER_PROGRAM\": \"inference.py\",\n",
343 | " \"SAGEMAKER_MODEL_SERVER_WORKERS\": \"1\",\n",
344 | " \"TS_DEFAULT_WORKERS_PER_MODEL\": \"1\"\n",
345 | " },\n",
346 | " \"ModelDataUrl\": \"s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xxl.tar.gz\"\n",
347 | " },\n",
348 | " \"Tags\": [\n",
349 | " {\n",
350 | " \"Key\": \"aws-jumpstart-inference-model-uri\",\n",
351 | " \"Value\": \"s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xxl.tar.gz\"\n",
352 | " }\n",
353 | " ]\n",
354 | "}\n",
355 | "Creating endpoint-config with name flan-xxl-1686852282\n",
356 | "Creating endpoint with name flan-xxl-1686852282\n"
357 | ]
358 | },
359 | {
360 | "name": "stdout",
361 | "output_type": "stream",
362 | "text": [
363 | "--------------------!CPU times: user 158 ms, sys: 17.9 ms, total: 176 ms\n",
364 | "Wall time: 10min 34s\n"
365 | ]
366 | }
367 | ],
368 | "source": [
369 | "%%time\n",
370 | "\n",
371 | "_ = model.deploy(initial_instance_count=INSTANCE_COUNT, \n",
372 | " instance_type=INSTANCE_TYPE, \n",
373 | " endpoint_name=endpoint_name, \n",
374 | " model_data_download_timeout=MODEL_DATA_DOWNLOAD_TIMEOUT, \n",
375 | " container_startup_health_check_timeout=CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT)"
376 | ]
377 | },
378 | {
379 | "cell_type": "markdown",
380 | "id": "3bed6d31-f6ae-41e4-9a56-23c7e99cd019",
381 | "metadata": {},
382 | "source": [
383 | "### II. Invoke SageMaker endpoint to test the deployed model for natural language understanding (NLU) and natural language generation (NLG) tasks"
384 | ]
385 | },
386 | {
387 | "cell_type": "markdown",
388 | "id": "1353ee9e-9beb-4bc7-acff-047c3ebf2038",
389 | "metadata": {},
390 | "source": [
391 | "***\n",
392 | "This model also supports many advanced parameters while performing inference. They include:\n",
393 | "\n",
394 | "* **max_length:** Model generates text until the output length (which includes the input context length) reaches `max_length`. If specified, it must be a positive integer.\n",
395 | "* **num_return_sequences:** Number of output sequences returned. If specified, it must be a positive integer.\n",
396 | "* **num_beams:** Number of beams used in the greedy search. If specified, it must be integer greater than or equal to `num_return_sequences`.\n",
397 | "* **no_repeat_ngram_size:** Model ensures that a sequence of words of `no_repeat_ngram_size` is not repeated in the output sequence. If specified, it must be a positive integer greater than 1.\n",
398 | "* **temperature:** Controls the randomness in the output. Higher temperature results in output sequence with low-probability words and lower temperature results in output sequence with high-probability words. If `temperature` -> 0, it results in greedy decoding. If specified, it must be a positive float.\n",
399 | "* **early_stopping:** If True, text generation is finished when all beam hypotheses reach the end of sentence token. If specified, it must be boolean.\n",
400 | "* **do_sample:** If True, sample the next word as per the likelihood. If specified, it must be boolean.\n",
401 | "* **top_k:** In each step of text generation, sample from only the `top_k` most likely words. If specified, it must be a positive integer.\n",
402 | "* **top_p:** In each step of text generation, sample from the smallest possible set of words with cumulative probability `top_p`. If specified, it must be a float between 0 and 1.\n",
403 | "* **seed:** Fix the randomized state for reproducibility. If specified, it must be an integer.\n",
404 | "\n",
405 | "We may specify any subset of the parameters mentioned above while invoking an endpoint. Next, we show an example of how to invoke endpoint with these arguments\n",
406 | "\n",
407 | "***"
408 | ]
409 | },
410 | {
411 | "cell_type": "code",
412 | "execution_count": null,
413 | "id": "5e5b8e2e-cd62-413d-b31c-a6004bf39b33",
414 | "metadata": {
415 | "tags": []
416 | },
417 | "outputs": [],
418 | "source": [
419 | "prompt = \"\"\"Me: hi\n",
420 | "AI: Hello. How can I help you?\n",
421 | "Me: How are you doing?\n",
422 | "AI:\n",
423 | "\"\"\""
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": null,
429 | "id": "56919d90-0546-4e68-a294-5b114abbd3aa",
430 | "metadata": {
431 | "tags": []
432 | },
433 | "outputs": [],
434 | "source": [
435 | "payload = {\n",
436 | " 'text_inputs': prompt,\n",
437 | " 'seed': 123,\n",
438 | " 'temperature': 0.1,\n",
439 | " 'no_repeat_ngram_size': 2,\n",
440 | " 'max_length': 128\n",
441 | "}"
442 | ]
443 | },
444 | {
445 | "cell_type": "code",
446 | "execution_count": null,
447 | "id": "461a651f-da5a-40b6-8202-d9097efb2f02",
448 | "metadata": {
449 | "tags": []
450 | },
451 | "outputs": [],
452 | "source": [
453 | "payload = json.dumps(payload).encode('utf-8')"
454 | ]
455 | },
456 | {
457 | "cell_type": "code",
458 | "execution_count": null,
459 | "id": "ddf94fd3-fa77-4a48-bc43-e2536e4b621c",
460 | "metadata": {
461 | "tags": []
462 | },
463 | "outputs": [],
464 | "source": [
465 | "%%time \n",
466 | "response = client.invoke_endpoint(EndpointName='huggingface-text2text-flan-t5-xl-1679769737', \n",
467 | " ContentType=CONTENT_TYPE, \n",
468 | " Body=payload)"
469 | ]
470 | },
471 | {
472 | "cell_type": "markdown",
473 | "id": "354e0f71-d4a7-4779-b269-044d808565d3",
474 | "metadata": {},
475 | "source": [
476 | "#### Parse response to extract completion"
477 | ]
478 | },
479 | {
480 | "cell_type": "code",
481 | "execution_count": null,
482 | "id": "5995a882-7ef1-4e9a-8bba-bcb627dab002",
483 | "metadata": {
484 | "tags": []
485 | },
486 | "outputs": [],
487 | "source": [
488 | "model_predictions = json.loads(response['Body'].read())\n",
489 | "completion = model_predictions['generated_texts'][0].strip()\n",
490 | "completion"
491 | ]
492 | },
493 | {
494 | "cell_type": "code",
495 | "execution_count": null,
496 | "id": "5863f215-b007-4e73-ba12-1e1ffb44a637",
497 | "metadata": {},
498 | "outputs": [],
499 | "source": []
500 | }
501 | ],
502 | "metadata": {
503 | "availableInstances": [
504 | {
505 | "_defaultOrder": 0,
506 | "_isFastLaunch": true,
507 | "category": "General purpose",
508 | "gpuNum": 0,
509 | "hideHardwareSpecs": false,
510 | "memoryGiB": 4,
511 | "name": "ml.t3.medium",
512 | "vcpuNum": 2
513 | },
514 | {
515 | "_defaultOrder": 1,
516 | "_isFastLaunch": false,
517 | "category": "General purpose",
518 | "gpuNum": 0,
519 | "hideHardwareSpecs": false,
520 | "memoryGiB": 8,
521 | "name": "ml.t3.large",
522 | "vcpuNum": 2
523 | },
524 | {
525 | "_defaultOrder": 2,
526 | "_isFastLaunch": false,
527 | "category": "General purpose",
528 | "gpuNum": 0,
529 | "hideHardwareSpecs": false,
530 | "memoryGiB": 16,
531 | "name": "ml.t3.xlarge",
532 | "vcpuNum": 4
533 | },
534 | {
535 | "_defaultOrder": 3,
536 | "_isFastLaunch": false,
537 | "category": "General purpose",
538 | "gpuNum": 0,
539 | "hideHardwareSpecs": false,
540 | "memoryGiB": 32,
541 | "name": "ml.t3.2xlarge",
542 | "vcpuNum": 8
543 | },
544 | {
545 | "_defaultOrder": 4,
546 | "_isFastLaunch": true,
547 | "category": "General purpose",
548 | "gpuNum": 0,
549 | "hideHardwareSpecs": false,
550 | "memoryGiB": 8,
551 | "name": "ml.m5.large",
552 | "vcpuNum": 2
553 | },
554 | {
555 | "_defaultOrder": 5,
556 | "_isFastLaunch": false,
557 | "category": "General purpose",
558 | "gpuNum": 0,
559 | "hideHardwareSpecs": false,
560 | "memoryGiB": 16,
561 | "name": "ml.m5.xlarge",
562 | "vcpuNum": 4
563 | },
564 | {
565 | "_defaultOrder": 6,
566 | "_isFastLaunch": false,
567 | "category": "General purpose",
568 | "gpuNum": 0,
569 | "hideHardwareSpecs": false,
570 | "memoryGiB": 32,
571 | "name": "ml.m5.2xlarge",
572 | "vcpuNum": 8
573 | },
574 | {
575 | "_defaultOrder": 7,
576 | "_isFastLaunch": false,
577 | "category": "General purpose",
578 | "gpuNum": 0,
579 | "hideHardwareSpecs": false,
580 | "memoryGiB": 64,
581 | "name": "ml.m5.4xlarge",
582 | "vcpuNum": 16
583 | },
584 | {
585 | "_defaultOrder": 8,
586 | "_isFastLaunch": false,
587 | "category": "General purpose",
588 | "gpuNum": 0,
589 | "hideHardwareSpecs": false,
590 | "memoryGiB": 128,
591 | "name": "ml.m5.8xlarge",
592 | "vcpuNum": 32
593 | },
594 | {
595 | "_defaultOrder": 9,
596 | "_isFastLaunch": false,
597 | "category": "General purpose",
598 | "gpuNum": 0,
599 | "hideHardwareSpecs": false,
600 | "memoryGiB": 192,
601 | "name": "ml.m5.12xlarge",
602 | "vcpuNum": 48
603 | },
604 | {
605 | "_defaultOrder": 10,
606 | "_isFastLaunch": false,
607 | "category": "General purpose",
608 | "gpuNum": 0,
609 | "hideHardwareSpecs": false,
610 | "memoryGiB": 256,
611 | "name": "ml.m5.16xlarge",
612 | "vcpuNum": 64
613 | },
614 | {
615 | "_defaultOrder": 11,
616 | "_isFastLaunch": false,
617 | "category": "General purpose",
618 | "gpuNum": 0,
619 | "hideHardwareSpecs": false,
620 | "memoryGiB": 384,
621 | "name": "ml.m5.24xlarge",
622 | "vcpuNum": 96
623 | },
624 | {
625 | "_defaultOrder": 12,
626 | "_isFastLaunch": false,
627 | "category": "General purpose",
628 | "gpuNum": 0,
629 | "hideHardwareSpecs": false,
630 | "memoryGiB": 8,
631 | "name": "ml.m5d.large",
632 | "vcpuNum": 2
633 | },
634 | {
635 | "_defaultOrder": 13,
636 | "_isFastLaunch": false,
637 | "category": "General purpose",
638 | "gpuNum": 0,
639 | "hideHardwareSpecs": false,
640 | "memoryGiB": 16,
641 | "name": "ml.m5d.xlarge",
642 | "vcpuNum": 4
643 | },
644 | {
645 | "_defaultOrder": 14,
646 | "_isFastLaunch": false,
647 | "category": "General purpose",
648 | "gpuNum": 0,
649 | "hideHardwareSpecs": false,
650 | "memoryGiB": 32,
651 | "name": "ml.m5d.2xlarge",
652 | "vcpuNum": 8
653 | },
654 | {
655 | "_defaultOrder": 15,
656 | "_isFastLaunch": false,
657 | "category": "General purpose",
658 | "gpuNum": 0,
659 | "hideHardwareSpecs": false,
660 | "memoryGiB": 64,
661 | "name": "ml.m5d.4xlarge",
662 | "vcpuNum": 16
663 | },
664 | {
665 | "_defaultOrder": 16,
666 | "_isFastLaunch": false,
667 | "category": "General purpose",
668 | "gpuNum": 0,
669 | "hideHardwareSpecs": false,
670 | "memoryGiB": 128,
671 | "name": "ml.m5d.8xlarge",
672 | "vcpuNum": 32
673 | },
674 | {
675 | "_defaultOrder": 17,
676 | "_isFastLaunch": false,
677 | "category": "General purpose",
678 | "gpuNum": 0,
679 | "hideHardwareSpecs": false,
680 | "memoryGiB": 192,
681 | "name": "ml.m5d.12xlarge",
682 | "vcpuNum": 48
683 | },
684 | {
685 | "_defaultOrder": 18,
686 | "_isFastLaunch": false,
687 | "category": "General purpose",
688 | "gpuNum": 0,
689 | "hideHardwareSpecs": false,
690 | "memoryGiB": 256,
691 | "name": "ml.m5d.16xlarge",
692 | "vcpuNum": 64
693 | },
694 | {
695 | "_defaultOrder": 19,
696 | "_isFastLaunch": false,
697 | "category": "General purpose",
698 | "gpuNum": 0,
699 | "hideHardwareSpecs": false,
700 | "memoryGiB": 384,
701 | "name": "ml.m5d.24xlarge",
702 | "vcpuNum": 96
703 | },
704 | {
705 | "_defaultOrder": 20,
706 | "_isFastLaunch": false,
707 | "category": "General purpose",
708 | "gpuNum": 0,
709 | "hideHardwareSpecs": true,
710 | "memoryGiB": 0,
711 | "name": "ml.geospatial.interactive",
712 | "supportedImageNames": [
713 | "sagemaker-geospatial-v1-0"
714 | ],
715 | "vcpuNum": 0
716 | },
717 | {
718 | "_defaultOrder": 21,
719 | "_isFastLaunch": true,
720 | "category": "Compute optimized",
721 | "gpuNum": 0,
722 | "hideHardwareSpecs": false,
723 | "memoryGiB": 4,
724 | "name": "ml.c5.large",
725 | "vcpuNum": 2
726 | },
727 | {
728 | "_defaultOrder": 22,
729 | "_isFastLaunch": false,
730 | "category": "Compute optimized",
731 | "gpuNum": 0,
732 | "hideHardwareSpecs": false,
733 | "memoryGiB": 8,
734 | "name": "ml.c5.xlarge",
735 | "vcpuNum": 4
736 | },
737 | {
738 | "_defaultOrder": 23,
739 | "_isFastLaunch": false,
740 | "category": "Compute optimized",
741 | "gpuNum": 0,
742 | "hideHardwareSpecs": false,
743 | "memoryGiB": 16,
744 | "name": "ml.c5.2xlarge",
745 | "vcpuNum": 8
746 | },
747 | {
748 | "_defaultOrder": 24,
749 | "_isFastLaunch": false,
750 | "category": "Compute optimized",
751 | "gpuNum": 0,
752 | "hideHardwareSpecs": false,
753 | "memoryGiB": 32,
754 | "name": "ml.c5.4xlarge",
755 | "vcpuNum": 16
756 | },
757 | {
758 | "_defaultOrder": 25,
759 | "_isFastLaunch": false,
760 | "category": "Compute optimized",
761 | "gpuNum": 0,
762 | "hideHardwareSpecs": false,
763 | "memoryGiB": 72,
764 | "name": "ml.c5.9xlarge",
765 | "vcpuNum": 36
766 | },
767 | {
768 | "_defaultOrder": 26,
769 | "_isFastLaunch": false,
770 | "category": "Compute optimized",
771 | "gpuNum": 0,
772 | "hideHardwareSpecs": false,
773 | "memoryGiB": 96,
774 | "name": "ml.c5.12xlarge",
775 | "vcpuNum": 48
776 | },
777 | {
778 | "_defaultOrder": 27,
779 | "_isFastLaunch": false,
780 | "category": "Compute optimized",
781 | "gpuNum": 0,
782 | "hideHardwareSpecs": false,
783 | "memoryGiB": 144,
784 | "name": "ml.c5.18xlarge",
785 | "vcpuNum": 72
786 | },
787 | {
788 | "_defaultOrder": 28,
789 | "_isFastLaunch": false,
790 | "category": "Compute optimized",
791 | "gpuNum": 0,
792 | "hideHardwareSpecs": false,
793 | "memoryGiB": 192,
794 | "name": "ml.c5.24xlarge",
795 | "vcpuNum": 96
796 | },
797 | {
798 | "_defaultOrder": 29,
799 | "_isFastLaunch": true,
800 | "category": "Accelerated computing",
801 | "gpuNum": 1,
802 | "hideHardwareSpecs": false,
803 | "memoryGiB": 16,
804 | "name": "ml.g4dn.xlarge",
805 | "vcpuNum": 4
806 | },
807 | {
808 | "_defaultOrder": 30,
809 | "_isFastLaunch": false,
810 | "category": "Accelerated computing",
811 | "gpuNum": 1,
812 | "hideHardwareSpecs": false,
813 | "memoryGiB": 32,
814 | "name": "ml.g4dn.2xlarge",
815 | "vcpuNum": 8
816 | },
817 | {
818 | "_defaultOrder": 31,
819 | "_isFastLaunch": false,
820 | "category": "Accelerated computing",
821 | "gpuNum": 1,
822 | "hideHardwareSpecs": false,
823 | "memoryGiB": 64,
824 | "name": "ml.g4dn.4xlarge",
825 | "vcpuNum": 16
826 | },
827 | {
828 | "_defaultOrder": 32,
829 | "_isFastLaunch": false,
830 | "category": "Accelerated computing",
831 | "gpuNum": 1,
832 | "hideHardwareSpecs": false,
833 | "memoryGiB": 128,
834 | "name": "ml.g4dn.8xlarge",
835 | "vcpuNum": 32
836 | },
837 | {
838 | "_defaultOrder": 33,
839 | "_isFastLaunch": false,
840 | "category": "Accelerated computing",
841 | "gpuNum": 4,
842 | "hideHardwareSpecs": false,
843 | "memoryGiB": 192,
844 | "name": "ml.g4dn.12xlarge",
845 | "vcpuNum": 48
846 | },
847 | {
848 | "_defaultOrder": 34,
849 | "_isFastLaunch": false,
850 | "category": "Accelerated computing",
851 | "gpuNum": 1,
852 | "hideHardwareSpecs": false,
853 | "memoryGiB": 256,
854 | "name": "ml.g4dn.16xlarge",
855 | "vcpuNum": 64
856 | },
857 | {
858 | "_defaultOrder": 35,
859 | "_isFastLaunch": false,
860 | "category": "Accelerated computing",
861 | "gpuNum": 1,
862 | "hideHardwareSpecs": false,
863 | "memoryGiB": 61,
864 | "name": "ml.p3.2xlarge",
865 | "vcpuNum": 8
866 | },
867 | {
868 | "_defaultOrder": 36,
869 | "_isFastLaunch": false,
870 | "category": "Accelerated computing",
871 | "gpuNum": 4,
872 | "hideHardwareSpecs": false,
873 | "memoryGiB": 244,
874 | "name": "ml.p3.8xlarge",
875 | "vcpuNum": 32
876 | },
877 | {
878 | "_defaultOrder": 37,
879 | "_isFastLaunch": false,
880 | "category": "Accelerated computing",
881 | "gpuNum": 8,
882 | "hideHardwareSpecs": false,
883 | "memoryGiB": 488,
884 | "name": "ml.p3.16xlarge",
885 | "vcpuNum": 64
886 | },
887 | {
888 | "_defaultOrder": 38,
889 | "_isFastLaunch": false,
890 | "category": "Accelerated computing",
891 | "gpuNum": 8,
892 | "hideHardwareSpecs": false,
893 | "memoryGiB": 768,
894 | "name": "ml.p3dn.24xlarge",
895 | "vcpuNum": 96
896 | },
897 | {
898 | "_defaultOrder": 39,
899 | "_isFastLaunch": false,
900 | "category": "Memory Optimized",
901 | "gpuNum": 0,
902 | "hideHardwareSpecs": false,
903 | "memoryGiB": 16,
904 | "name": "ml.r5.large",
905 | "vcpuNum": 2
906 | },
907 | {
908 | "_defaultOrder": 40,
909 | "_isFastLaunch": false,
910 | "category": "Memory Optimized",
911 | "gpuNum": 0,
912 | "hideHardwareSpecs": false,
913 | "memoryGiB": 32,
914 | "name": "ml.r5.xlarge",
915 | "vcpuNum": 4
916 | },
917 | {
918 | "_defaultOrder": 41,
919 | "_isFastLaunch": false,
920 | "category": "Memory Optimized",
921 | "gpuNum": 0,
922 | "hideHardwareSpecs": false,
923 | "memoryGiB": 64,
924 | "name": "ml.r5.2xlarge",
925 | "vcpuNum": 8
926 | },
927 | {
928 | "_defaultOrder": 42,
929 | "_isFastLaunch": false,
930 | "category": "Memory Optimized",
931 | "gpuNum": 0,
932 | "hideHardwareSpecs": false,
933 | "memoryGiB": 128,
934 | "name": "ml.r5.4xlarge",
935 | "vcpuNum": 16
936 | },
937 | {
938 | "_defaultOrder": 43,
939 | "_isFastLaunch": false,
940 | "category": "Memory Optimized",
941 | "gpuNum": 0,
942 | "hideHardwareSpecs": false,
943 | "memoryGiB": 256,
944 | "name": "ml.r5.8xlarge",
945 | "vcpuNum": 32
946 | },
947 | {
948 | "_defaultOrder": 44,
949 | "_isFastLaunch": false,
950 | "category": "Memory Optimized",
951 | "gpuNum": 0,
952 | "hideHardwareSpecs": false,
953 | "memoryGiB": 384,
954 | "name": "ml.r5.12xlarge",
955 | "vcpuNum": 48
956 | },
957 | {
958 | "_defaultOrder": 45,
959 | "_isFastLaunch": false,
960 | "category": "Memory Optimized",
961 | "gpuNum": 0,
962 | "hideHardwareSpecs": false,
963 | "memoryGiB": 512,
964 | "name": "ml.r5.16xlarge",
965 | "vcpuNum": 64
966 | },
967 | {
968 | "_defaultOrder": 46,
969 | "_isFastLaunch": false,
970 | "category": "Memory Optimized",
971 | "gpuNum": 0,
972 | "hideHardwareSpecs": false,
973 | "memoryGiB": 768,
974 | "name": "ml.r5.24xlarge",
975 | "vcpuNum": 96
976 | },
977 | {
978 | "_defaultOrder": 47,
979 | "_isFastLaunch": false,
980 | "category": "Accelerated computing",
981 | "gpuNum": 1,
982 | "hideHardwareSpecs": false,
983 | "memoryGiB": 16,
984 | "name": "ml.g5.xlarge",
985 | "vcpuNum": 4
986 | },
987 | {
988 | "_defaultOrder": 48,
989 | "_isFastLaunch": false,
990 | "category": "Accelerated computing",
991 | "gpuNum": 1,
992 | "hideHardwareSpecs": false,
993 | "memoryGiB": 32,
994 | "name": "ml.g5.2xlarge",
995 | "vcpuNum": 8
996 | },
997 | {
998 | "_defaultOrder": 49,
999 | "_isFastLaunch": false,
1000 | "category": "Accelerated computing",
1001 | "gpuNum": 1,
1002 | "hideHardwareSpecs": false,
1003 | "memoryGiB": 64,
1004 | "name": "ml.g5.4xlarge",
1005 | "vcpuNum": 16
1006 | },
1007 | {
1008 | "_defaultOrder": 50,
1009 | "_isFastLaunch": false,
1010 | "category": "Accelerated computing",
1011 | "gpuNum": 1,
1012 | "hideHardwareSpecs": false,
1013 | "memoryGiB": 128,
1014 | "name": "ml.g5.8xlarge",
1015 | "vcpuNum": 32
1016 | },
1017 | {
1018 | "_defaultOrder": 51,
1019 | "_isFastLaunch": false,
1020 | "category": "Accelerated computing",
1021 | "gpuNum": 1,
1022 | "hideHardwareSpecs": false,
1023 | "memoryGiB": 256,
1024 | "name": "ml.g5.16xlarge",
1025 | "vcpuNum": 64
1026 | },
1027 | {
1028 | "_defaultOrder": 52,
1029 | "_isFastLaunch": false,
1030 | "category": "Accelerated computing",
1031 | "gpuNum": 4,
1032 | "hideHardwareSpecs": false,
1033 | "memoryGiB": 192,
1034 | "name": "ml.g5.12xlarge",
1035 | "vcpuNum": 48
1036 | },
1037 | {
1038 | "_defaultOrder": 53,
1039 | "_isFastLaunch": false,
1040 | "category": "Accelerated computing",
1041 | "gpuNum": 4,
1042 | "hideHardwareSpecs": false,
1043 | "memoryGiB": 384,
1044 | "name": "ml.g5.24xlarge",
1045 | "vcpuNum": 96
1046 | },
1047 | {
1048 | "_defaultOrder": 54,
1049 | "_isFastLaunch": false,
1050 | "category": "Accelerated computing",
1051 | "gpuNum": 8,
1052 | "hideHardwareSpecs": false,
1053 | "memoryGiB": 768,
1054 | "name": "ml.g5.48xlarge",
1055 | "vcpuNum": 192
1056 | },
1057 | {
1058 | "_defaultOrder": 55,
1059 | "_isFastLaunch": false,
1060 | "category": "Accelerated computing",
1061 | "gpuNum": 8,
1062 | "hideHardwareSpecs": false,
1063 | "memoryGiB": 1152,
1064 | "name": "ml.p4d.24xlarge",
1065 | "vcpuNum": 96
1066 | },
1067 | {
1068 | "_defaultOrder": 56,
1069 | "_isFastLaunch": false,
1070 | "category": "Accelerated computing",
1071 | "gpuNum": 8,
1072 | "hideHardwareSpecs": false,
1073 | "memoryGiB": 1152,
1074 | "name": "ml.p4de.24xlarge",
1075 | "vcpuNum": 96
1076 | }
1077 | ],
1078 | "instance_type": "ml.t3.medium",
1079 | "kernelspec": {
1080 | "display_name": "Python 3 (Data Science)",
1081 | "language": "python",
1082 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
1083 | },
1084 | "language_info": {
1085 | "codemirror_mode": {
1086 | "name": "ipython",
1087 | "version": 3
1088 | },
1089 | "file_extension": ".py",
1090 | "mimetype": "text/x-python",
1091 | "name": "python",
1092 | "nbconvert_exporter": "python",
1093 | "pygments_lexer": "ipython3",
1094 | "version": "3.7.10"
1095 | }
1096 | },
1097 | "nbformat": 4,
1098 | "nbformat_minor": 5
1099 | }
1100 |
--------------------------------------------------------------------------------
/03-create-dynamodb-tables.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "bf90ceb7-4bf5-4516-a28f-a6f028ef301c",
6 | "metadata": {},
7 | "source": [
8 | "## Create DynamoDB Tables "
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "e974bd09-bd15-4c5c-8ab4-60d054b78bc5",
14 | "metadata": {},
15 | "source": [
16 | "#### Imports "
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "id": "222f38ee-7b50-4f88-9381-58c5e128420b",
23 | "metadata": {
24 | "tags": []
25 | },
26 | "outputs": [],
27 | "source": [
28 | "import logging\n",
29 | "import boto3"
30 | ]
31 | },
32 | {
33 | "cell_type": "markdown",
34 | "id": "e2b5c7e8-1be3-4906-b9be-dc5a44f92d09",
35 | "metadata": {},
36 | "source": [
37 | "##### Setup logging"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 3,
43 | "id": "bea6c92d-5fda-4264-9fdc-2e45c55ed7c7",
44 | "metadata": {
45 | "tags": []
46 | },
47 | "outputs": [],
48 | "source": [
49 | "logger = logging.getLogger('sagemaker')\n",
50 | "logger.setLevel(logging.DEBUG)\n",
51 | "logger.addHandler(logging.StreamHandler())"
52 | ]
53 | },
54 | {
55 | "cell_type": "markdown",
56 | "id": "ad53853d-6997-4120-95ac-53a3d555da36",
57 | "metadata": {},
58 | "source": [
59 | "##### Log versions of dependencies "
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 4,
65 | "id": "fbc8a299-aa25-491a-b5de-4c31bbbfea22",
66 | "metadata": {
67 | "tags": []
68 | },
69 | "outputs": [
70 | {
71 | "name": "stderr",
72 | "output_type": "stream",
73 | "text": [
74 | "Using boto3==1.26.111\n"
75 | ]
76 | }
77 | ],
78 | "source": [
79 | "logger.info(f'Using boto3=={boto3.__version__}')"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "id": "744a3c36-6504-4e8c-b5ff-4b9998ecc465",
85 | "metadata": {},
86 | "source": [
87 | "#### Create DynamoDB client "
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": 5,
93 | "id": "dbb83e0c-1dd0-4e6a-8e4c-9859ea63b15a",
94 | "metadata": {
95 | "tags": []
96 | },
97 | "outputs": [],
98 | "source": [
99 | "dynamodb = boto3.resource('dynamodb')"
100 | ]
101 | },
102 | {
103 | "cell_type": "markdown",
104 | "id": "d184dfeb-3ecd-4644-8a0e-54175a8d2e0f",
105 | "metadata": {},
106 | "source": [
107 | "#### Create `conversations` table"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 6,
113 | "id": "69bac2ac-c0fa-488f-9680-81849eb1b676",
114 | "metadata": {
115 | "tags": []
116 | },
117 | "outputs": [],
118 | "source": [
119 | "def create_conversations_table(table_name: str) -> None:\n",
120 | " table = dynamodb.create_table(\n",
121 | " TableName=table_name,\n",
122 | " KeySchema=[\n",
123 | " {'AttributeName': 'session_id', 'KeyType': 'HASH'},\n",
124 | " {'AttributeName': 'timestamp', 'KeyType': 'RANGE'}\n",
125 | " ],\n",
126 | " AttributeDefinitions=[\n",
127 | " {'AttributeName': 'session_id', 'AttributeType': 'S'},\n",
128 | " {'AttributeName': 'timestamp', 'AttributeType': 'N'}\n",
129 | " ],\n",
130 | " ProvisionedThroughput={\n",
131 | " 'ReadCapacityUnits': 5,\n",
132 | " 'WriteCapacityUnits': 5\n",
133 | " }\n",
134 | " )"
135 | ]
136 | },
137 | {
138 | "cell_type": "markdown",
139 | "id": "65486230-dedc-485c-95a5-9da962836f8d",
140 | "metadata": {},
141 | "source": [
142 | "#### Create `sessions` table"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 7,
148 | "id": "5983fcdc-d39b-4f0e-b6a9-d15dbd27928a",
149 | "metadata": {
150 | "tags": []
151 | },
152 | "outputs": [],
153 | "source": [
154 | "def create_sessions_table(table_name: str) -> None:\n",
155 | " table = dynamodb.create_table(\n",
156 | " TableName=table_name,\n",
157 | " KeySchema=[\n",
158 | " {'AttributeName': 'session_id', 'KeyType': 'HASH'}\n",
159 | " ],\n",
160 | " AttributeDefinitions=[\n",
161 | " {'AttributeName': 'session_id', 'AttributeType': 'S'}\n",
162 | " ],\n",
163 | " ProvisionedThroughput={\n",
164 | " 'ReadCapacityUnits': 5,\n",
165 | " 'WriteCapacityUnits': 5\n",
166 | " }\n",
167 | " )"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 8,
173 | "id": "ce937e6b-928d-4622-94e4-944415eb2f11",
174 | "metadata": {},
175 | "outputs": [],
176 | "source": [
177 | "create_conversations_table('conversations')\n",
178 | "create_sessions_table('sessions')"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": null,
184 | "id": "287794f5-b672-4169-b27b-e2bb02343909",
185 | "metadata": {},
186 | "outputs": [],
187 | "source": []
188 | }
189 | ],
190 | "metadata": {
191 | "availableInstances": [
192 | {
193 | "_defaultOrder": 0,
194 | "_isFastLaunch": true,
195 | "category": "General purpose",
196 | "gpuNum": 0,
197 | "hideHardwareSpecs": false,
198 | "memoryGiB": 4,
199 | "name": "ml.t3.medium",
200 | "vcpuNum": 2
201 | },
202 | {
203 | "_defaultOrder": 1,
204 | "_isFastLaunch": false,
205 | "category": "General purpose",
206 | "gpuNum": 0,
207 | "hideHardwareSpecs": false,
208 | "memoryGiB": 8,
209 | "name": "ml.t3.large",
210 | "vcpuNum": 2
211 | },
212 | {
213 | "_defaultOrder": 2,
214 | "_isFastLaunch": false,
215 | "category": "General purpose",
216 | "gpuNum": 0,
217 | "hideHardwareSpecs": false,
218 | "memoryGiB": 16,
219 | "name": "ml.t3.xlarge",
220 | "vcpuNum": 4
221 | },
222 | {
223 | "_defaultOrder": 3,
224 | "_isFastLaunch": false,
225 | "category": "General purpose",
226 | "gpuNum": 0,
227 | "hideHardwareSpecs": false,
228 | "memoryGiB": 32,
229 | "name": "ml.t3.2xlarge",
230 | "vcpuNum": 8
231 | },
232 | {
233 | "_defaultOrder": 4,
234 | "_isFastLaunch": true,
235 | "category": "General purpose",
236 | "gpuNum": 0,
237 | "hideHardwareSpecs": false,
238 | "memoryGiB": 8,
239 | "name": "ml.m5.large",
240 | "vcpuNum": 2
241 | },
242 | {
243 | "_defaultOrder": 5,
244 | "_isFastLaunch": false,
245 | "category": "General purpose",
246 | "gpuNum": 0,
247 | "hideHardwareSpecs": false,
248 | "memoryGiB": 16,
249 | "name": "ml.m5.xlarge",
250 | "vcpuNum": 4
251 | },
252 | {
253 | "_defaultOrder": 6,
254 | "_isFastLaunch": false,
255 | "category": "General purpose",
256 | "gpuNum": 0,
257 | "hideHardwareSpecs": false,
258 | "memoryGiB": 32,
259 | "name": "ml.m5.2xlarge",
260 | "vcpuNum": 8
261 | },
262 | {
263 | "_defaultOrder": 7,
264 | "_isFastLaunch": false,
265 | "category": "General purpose",
266 | "gpuNum": 0,
267 | "hideHardwareSpecs": false,
268 | "memoryGiB": 64,
269 | "name": "ml.m5.4xlarge",
270 | "vcpuNum": 16
271 | },
272 | {
273 | "_defaultOrder": 8,
274 | "_isFastLaunch": false,
275 | "category": "General purpose",
276 | "gpuNum": 0,
277 | "hideHardwareSpecs": false,
278 | "memoryGiB": 128,
279 | "name": "ml.m5.8xlarge",
280 | "vcpuNum": 32
281 | },
282 | {
283 | "_defaultOrder": 9,
284 | "_isFastLaunch": false,
285 | "category": "General purpose",
286 | "gpuNum": 0,
287 | "hideHardwareSpecs": false,
288 | "memoryGiB": 192,
289 | "name": "ml.m5.12xlarge",
290 | "vcpuNum": 48
291 | },
292 | {
293 | "_defaultOrder": 10,
294 | "_isFastLaunch": false,
295 | "category": "General purpose",
296 | "gpuNum": 0,
297 | "hideHardwareSpecs": false,
298 | "memoryGiB": 256,
299 | "name": "ml.m5.16xlarge",
300 | "vcpuNum": 64
301 | },
302 | {
303 | "_defaultOrder": 11,
304 | "_isFastLaunch": false,
305 | "category": "General purpose",
306 | "gpuNum": 0,
307 | "hideHardwareSpecs": false,
308 | "memoryGiB": 384,
309 | "name": "ml.m5.24xlarge",
310 | "vcpuNum": 96
311 | },
312 | {
313 | "_defaultOrder": 12,
314 | "_isFastLaunch": false,
315 | "category": "General purpose",
316 | "gpuNum": 0,
317 | "hideHardwareSpecs": false,
318 | "memoryGiB": 8,
319 | "name": "ml.m5d.large",
320 | "vcpuNum": 2
321 | },
322 | {
323 | "_defaultOrder": 13,
324 | "_isFastLaunch": false,
325 | "category": "General purpose",
326 | "gpuNum": 0,
327 | "hideHardwareSpecs": false,
328 | "memoryGiB": 16,
329 | "name": "ml.m5d.xlarge",
330 | "vcpuNum": 4
331 | },
332 | {
333 | "_defaultOrder": 14,
334 | "_isFastLaunch": false,
335 | "category": "General purpose",
336 | "gpuNum": 0,
337 | "hideHardwareSpecs": false,
338 | "memoryGiB": 32,
339 | "name": "ml.m5d.2xlarge",
340 | "vcpuNum": 8
341 | },
342 | {
343 | "_defaultOrder": 15,
344 | "_isFastLaunch": false,
345 | "category": "General purpose",
346 | "gpuNum": 0,
347 | "hideHardwareSpecs": false,
348 | "memoryGiB": 64,
349 | "name": "ml.m5d.4xlarge",
350 | "vcpuNum": 16
351 | },
352 | {
353 | "_defaultOrder": 16,
354 | "_isFastLaunch": false,
355 | "category": "General purpose",
356 | "gpuNum": 0,
357 | "hideHardwareSpecs": false,
358 | "memoryGiB": 128,
359 | "name": "ml.m5d.8xlarge",
360 | "vcpuNum": 32
361 | },
362 | {
363 | "_defaultOrder": 17,
364 | "_isFastLaunch": false,
365 | "category": "General purpose",
366 | "gpuNum": 0,
367 | "hideHardwareSpecs": false,
368 | "memoryGiB": 192,
369 | "name": "ml.m5d.12xlarge",
370 | "vcpuNum": 48
371 | },
372 | {
373 | "_defaultOrder": 18,
374 | "_isFastLaunch": false,
375 | "category": "General purpose",
376 | "gpuNum": 0,
377 | "hideHardwareSpecs": false,
378 | "memoryGiB": 256,
379 | "name": "ml.m5d.16xlarge",
380 | "vcpuNum": 64
381 | },
382 | {
383 | "_defaultOrder": 19,
384 | "_isFastLaunch": false,
385 | "category": "General purpose",
386 | "gpuNum": 0,
387 | "hideHardwareSpecs": false,
388 | "memoryGiB": 384,
389 | "name": "ml.m5d.24xlarge",
390 | "vcpuNum": 96
391 | },
392 | {
393 | "_defaultOrder": 20,
394 | "_isFastLaunch": false,
395 | "category": "General purpose",
396 | "gpuNum": 0,
397 | "hideHardwareSpecs": true,
398 | "memoryGiB": 0,
399 | "name": "ml.geospatial.interactive",
400 | "supportedImageNames": [
401 | "sagemaker-geospatial-v1-0"
402 | ],
403 | "vcpuNum": 0
404 | },
405 | {
406 | "_defaultOrder": 21,
407 | "_isFastLaunch": true,
408 | "category": "Compute optimized",
409 | "gpuNum": 0,
410 | "hideHardwareSpecs": false,
411 | "memoryGiB": 4,
412 | "name": "ml.c5.large",
413 | "vcpuNum": 2
414 | },
415 | {
416 | "_defaultOrder": 22,
417 | "_isFastLaunch": false,
418 | "category": "Compute optimized",
419 | "gpuNum": 0,
420 | "hideHardwareSpecs": false,
421 | "memoryGiB": 8,
422 | "name": "ml.c5.xlarge",
423 | "vcpuNum": 4
424 | },
425 | {
426 | "_defaultOrder": 23,
427 | "_isFastLaunch": false,
428 | "category": "Compute optimized",
429 | "gpuNum": 0,
430 | "hideHardwareSpecs": false,
431 | "memoryGiB": 16,
432 | "name": "ml.c5.2xlarge",
433 | "vcpuNum": 8
434 | },
435 | {
436 | "_defaultOrder": 24,
437 | "_isFastLaunch": false,
438 | "category": "Compute optimized",
439 | "gpuNum": 0,
440 | "hideHardwareSpecs": false,
441 | "memoryGiB": 32,
442 | "name": "ml.c5.4xlarge",
443 | "vcpuNum": 16
444 | },
445 | {
446 | "_defaultOrder": 25,
447 | "_isFastLaunch": false,
448 | "category": "Compute optimized",
449 | "gpuNum": 0,
450 | "hideHardwareSpecs": false,
451 | "memoryGiB": 72,
452 | "name": "ml.c5.9xlarge",
453 | "vcpuNum": 36
454 | },
455 | {
456 | "_defaultOrder": 26,
457 | "_isFastLaunch": false,
458 | "category": "Compute optimized",
459 | "gpuNum": 0,
460 | "hideHardwareSpecs": false,
461 | "memoryGiB": 96,
462 | "name": "ml.c5.12xlarge",
463 | "vcpuNum": 48
464 | },
465 | {
466 | "_defaultOrder": 27,
467 | "_isFastLaunch": false,
468 | "category": "Compute optimized",
469 | "gpuNum": 0,
470 | "hideHardwareSpecs": false,
471 | "memoryGiB": 144,
472 | "name": "ml.c5.18xlarge",
473 | "vcpuNum": 72
474 | },
475 | {
476 | "_defaultOrder": 28,
477 | "_isFastLaunch": false,
478 | "category": "Compute optimized",
479 | "gpuNum": 0,
480 | "hideHardwareSpecs": false,
481 | "memoryGiB": 192,
482 | "name": "ml.c5.24xlarge",
483 | "vcpuNum": 96
484 | },
485 | {
486 | "_defaultOrder": 29,
487 | "_isFastLaunch": true,
488 | "category": "Accelerated computing",
489 | "gpuNum": 1,
490 | "hideHardwareSpecs": false,
491 | "memoryGiB": 16,
492 | "name": "ml.g4dn.xlarge",
493 | "vcpuNum": 4
494 | },
495 | {
496 | "_defaultOrder": 30,
497 | "_isFastLaunch": false,
498 | "category": "Accelerated computing",
499 | "gpuNum": 1,
500 | "hideHardwareSpecs": false,
501 | "memoryGiB": 32,
502 | "name": "ml.g4dn.2xlarge",
503 | "vcpuNum": 8
504 | },
505 | {
506 | "_defaultOrder": 31,
507 | "_isFastLaunch": false,
508 | "category": "Accelerated computing",
509 | "gpuNum": 1,
510 | "hideHardwareSpecs": false,
511 | "memoryGiB": 64,
512 | "name": "ml.g4dn.4xlarge",
513 | "vcpuNum": 16
514 | },
515 | {
516 | "_defaultOrder": 32,
517 | "_isFastLaunch": false,
518 | "category": "Accelerated computing",
519 | "gpuNum": 1,
520 | "hideHardwareSpecs": false,
521 | "memoryGiB": 128,
522 | "name": "ml.g4dn.8xlarge",
523 | "vcpuNum": 32
524 | },
525 | {
526 | "_defaultOrder": 33,
527 | "_isFastLaunch": false,
528 | "category": "Accelerated computing",
529 | "gpuNum": 4,
530 | "hideHardwareSpecs": false,
531 | "memoryGiB": 192,
532 | "name": "ml.g4dn.12xlarge",
533 | "vcpuNum": 48
534 | },
535 | {
536 | "_defaultOrder": 34,
537 | "_isFastLaunch": false,
538 | "category": "Accelerated computing",
539 | "gpuNum": 1,
540 | "hideHardwareSpecs": false,
541 | "memoryGiB": 256,
542 | "name": "ml.g4dn.16xlarge",
543 | "vcpuNum": 64
544 | },
545 | {
546 | "_defaultOrder": 35,
547 | "_isFastLaunch": false,
548 | "category": "Accelerated computing",
549 | "gpuNum": 1,
550 | "hideHardwareSpecs": false,
551 | "memoryGiB": 61,
552 | "name": "ml.p3.2xlarge",
553 | "vcpuNum": 8
554 | },
555 | {
556 | "_defaultOrder": 36,
557 | "_isFastLaunch": false,
558 | "category": "Accelerated computing",
559 | "gpuNum": 4,
560 | "hideHardwareSpecs": false,
561 | "memoryGiB": 244,
562 | "name": "ml.p3.8xlarge",
563 | "vcpuNum": 32
564 | },
565 | {
566 | "_defaultOrder": 37,
567 | "_isFastLaunch": false,
568 | "category": "Accelerated computing",
569 | "gpuNum": 8,
570 | "hideHardwareSpecs": false,
571 | "memoryGiB": 488,
572 | "name": "ml.p3.16xlarge",
573 | "vcpuNum": 64
574 | },
575 | {
576 | "_defaultOrder": 38,
577 | "_isFastLaunch": false,
578 | "category": "Accelerated computing",
579 | "gpuNum": 8,
580 | "hideHardwareSpecs": false,
581 | "memoryGiB": 768,
582 | "name": "ml.p3dn.24xlarge",
583 | "vcpuNum": 96
584 | },
585 | {
586 | "_defaultOrder": 39,
587 | "_isFastLaunch": false,
588 | "category": "Memory Optimized",
589 | "gpuNum": 0,
590 | "hideHardwareSpecs": false,
591 | "memoryGiB": 16,
592 | "name": "ml.r5.large",
593 | "vcpuNum": 2
594 | },
595 | {
596 | "_defaultOrder": 40,
597 | "_isFastLaunch": false,
598 | "category": "Memory Optimized",
599 | "gpuNum": 0,
600 | "hideHardwareSpecs": false,
601 | "memoryGiB": 32,
602 | "name": "ml.r5.xlarge",
603 | "vcpuNum": 4
604 | },
605 | {
606 | "_defaultOrder": 41,
607 | "_isFastLaunch": false,
608 | "category": "Memory Optimized",
609 | "gpuNum": 0,
610 | "hideHardwareSpecs": false,
611 | "memoryGiB": 64,
612 | "name": "ml.r5.2xlarge",
613 | "vcpuNum": 8
614 | },
615 | {
616 | "_defaultOrder": 42,
617 | "_isFastLaunch": false,
618 | "category": "Memory Optimized",
619 | "gpuNum": 0,
620 | "hideHardwareSpecs": false,
621 | "memoryGiB": 128,
622 | "name": "ml.r5.4xlarge",
623 | "vcpuNum": 16
624 | },
625 | {
626 | "_defaultOrder": 43,
627 | "_isFastLaunch": false,
628 | "category": "Memory Optimized",
629 | "gpuNum": 0,
630 | "hideHardwareSpecs": false,
631 | "memoryGiB": 256,
632 | "name": "ml.r5.8xlarge",
633 | "vcpuNum": 32
634 | },
635 | {
636 | "_defaultOrder": 44,
637 | "_isFastLaunch": false,
638 | "category": "Memory Optimized",
639 | "gpuNum": 0,
640 | "hideHardwareSpecs": false,
641 | "memoryGiB": 384,
642 | "name": "ml.r5.12xlarge",
643 | "vcpuNum": 48
644 | },
645 | {
646 | "_defaultOrder": 45,
647 | "_isFastLaunch": false,
648 | "category": "Memory Optimized",
649 | "gpuNum": 0,
650 | "hideHardwareSpecs": false,
651 | "memoryGiB": 512,
652 | "name": "ml.r5.16xlarge",
653 | "vcpuNum": 64
654 | },
655 | {
656 | "_defaultOrder": 46,
657 | "_isFastLaunch": false,
658 | "category": "Memory Optimized",
659 | "gpuNum": 0,
660 | "hideHardwareSpecs": false,
661 | "memoryGiB": 768,
662 | "name": "ml.r5.24xlarge",
663 | "vcpuNum": 96
664 | },
665 | {
666 | "_defaultOrder": 47,
667 | "_isFastLaunch": false,
668 | "category": "Accelerated computing",
669 | "gpuNum": 1,
670 | "hideHardwareSpecs": false,
671 | "memoryGiB": 16,
672 | "name": "ml.g5.xlarge",
673 | "vcpuNum": 4
674 | },
675 | {
676 | "_defaultOrder": 48,
677 | "_isFastLaunch": false,
678 | "category": "Accelerated computing",
679 | "gpuNum": 1,
680 | "hideHardwareSpecs": false,
681 | "memoryGiB": 32,
682 | "name": "ml.g5.2xlarge",
683 | "vcpuNum": 8
684 | },
685 | {
686 | "_defaultOrder": 49,
687 | "_isFastLaunch": false,
688 | "category": "Accelerated computing",
689 | "gpuNum": 1,
690 | "hideHardwareSpecs": false,
691 | "memoryGiB": 64,
692 | "name": "ml.g5.4xlarge",
693 | "vcpuNum": 16
694 | },
695 | {
696 | "_defaultOrder": 50,
697 | "_isFastLaunch": false,
698 | "category": "Accelerated computing",
699 | "gpuNum": 1,
700 | "hideHardwareSpecs": false,
701 | "memoryGiB": 128,
702 | "name": "ml.g5.8xlarge",
703 | "vcpuNum": 32
704 | },
705 | {
706 | "_defaultOrder": 51,
707 | "_isFastLaunch": false,
708 | "category": "Accelerated computing",
709 | "gpuNum": 1,
710 | "hideHardwareSpecs": false,
711 | "memoryGiB": 256,
712 | "name": "ml.g5.16xlarge",
713 | "vcpuNum": 64
714 | },
715 | {
716 | "_defaultOrder": 52,
717 | "_isFastLaunch": false,
718 | "category": "Accelerated computing",
719 | "gpuNum": 4,
720 | "hideHardwareSpecs": false,
721 | "memoryGiB": 192,
722 | "name": "ml.g5.12xlarge",
723 | "vcpuNum": 48
724 | },
725 | {
726 | "_defaultOrder": 53,
727 | "_isFastLaunch": false,
728 | "category": "Accelerated computing",
729 | "gpuNum": 4,
730 | "hideHardwareSpecs": false,
731 | "memoryGiB": 384,
732 | "name": "ml.g5.24xlarge",
733 | "vcpuNum": 96
734 | },
735 | {
736 | "_defaultOrder": 54,
737 | "_isFastLaunch": false,
738 | "category": "Accelerated computing",
739 | "gpuNum": 8,
740 | "hideHardwareSpecs": false,
741 | "memoryGiB": 768,
742 | "name": "ml.g5.48xlarge",
743 | "vcpuNum": 192
744 | },
745 | {
746 | "_defaultOrder": 55,
747 | "_isFastLaunch": false,
748 | "category": "Accelerated computing",
749 | "gpuNum": 8,
750 | "hideHardwareSpecs": false,
751 | "memoryGiB": 1152,
752 | "name": "ml.p4d.24xlarge",
753 | "vcpuNum": 96
754 | },
755 | {
756 | "_defaultOrder": 56,
757 | "_isFastLaunch": false,
758 | "category": "Accelerated computing",
759 | "gpuNum": 8,
760 | "hideHardwareSpecs": false,
761 | "memoryGiB": 1152,
762 | "name": "ml.p4de.24xlarge",
763 | "vcpuNum": 96
764 | }
765 | ],
766 | "instance_type": "ml.m5.large",
767 | "kernelspec": {
768 | "display_name": "Python 3 (Data Science)",
769 | "language": "python",
770 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
771 | },
772 | "language_info": {
773 | "codemirror_mode": {
774 | "name": "ipython",
775 | "version": 3
776 | },
777 | "file_extension": ".py",
778 | "mimetype": "text/x-python",
779 | "name": "python",
780 | "nbconvert_exporter": "python",
781 | "pygments_lexer": "ipython3",
782 | "version": "3.7.10"
783 | }
784 | },
785 | "nbformat": 4,
786 | "nbformat_minor": 5
787 | }
788 |
--------------------------------------------------------------------------------
/04-create-os-index.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "b985c5b5-1d65-4e4a-82d1-2dfc9768d97d",
6 | "metadata": {},
7 | "source": [
8 | "## Create Index for `Past Conversations`"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "id": "57aa4ada-a686-43de-bc90-0f4107f95ce1",
14 | "metadata": {},
15 | "source": [
16 | "##### Prerequisites"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "id": "faf12933-564a-41a3-9a9e-c02a9437310e",
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "%%capture \n",
27 | "\n",
28 | "!pip install PyYAML"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "id": "52706ec1-fd47-42d4-af0f-33f0a03f654d",
34 | "metadata": {},
35 | "source": [
36 | "#### Imports"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 2,
42 | "id": "81eea5f6-50e3-4398-80eb-11b680d026b4",
43 | "metadata": {
44 | "tags": []
45 | },
46 | "outputs": [],
47 | "source": [
48 | "from requests.auth import HTTPBasicAuth\n",
49 | "from tqdm import tqdm\n",
50 | "import requests\n",
51 | "import logging \n",
52 | "import boto3\n",
53 | "import yaml\n",
54 | "import json\n",
55 | "import os"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "id": "49ec6d42-db7a-4c4c-8322-95963806f987",
61 | "metadata": {},
62 | "source": [
63 | "##### Setup logging"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 3,
69 | "id": "fcddd51c-9251-4428-9931-b2700a71142a",
70 | "metadata": {
71 | "tags": []
72 | },
73 | "outputs": [],
74 | "source": [
75 | "logger = logging.getLogger('sagemaker')\n",
76 | "logger.setLevel(logging.DEBUG)\n",
77 | "logger.addHandler(logging.StreamHandler())"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "id": "4a81f78f-675d-4009-9228-8fa3cfc559b8",
83 | "metadata": {},
84 | "source": [
85 | "##### Log versions of dependencies "
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": 4,
91 | "id": "75474334-8b19-4491-abb9-58468bc33329",
92 | "metadata": {
93 | "tags": []
94 | },
95 | "outputs": [
96 | {
97 | "name": "stderr",
98 | "output_type": "stream",
99 | "text": [
100 | "Using requests==2.28.2\n",
101 | "Using pyyaml==6.0\n"
102 | ]
103 | }
104 | ],
105 | "source": [
106 | "logger.info(f'Using requests=={requests.__version__}')\n",
107 | "logger.info(f'Using pyyaml=={yaml.__version__}')"
108 | ]
109 | },
110 | {
111 | "cell_type": "markdown",
112 | "id": "c52e3b3f-f770-4b90-bc35-12cc0f793604",
113 | "metadata": {},
114 | "source": [
115 | "#### Setup essentials"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": 5,
121 | "id": "3b2b1f73-2576-412f-ad38-13638db59281",
122 | "metadata": {
123 | "tags": []
124 | },
125 | "outputs": [],
126 | "source": [
127 | "with open('config.yml', 'r') as file:\n",
128 | " config = yaml.safe_load(file)\n",
129 | "\n",
130 | "os_username = config['credentials']['username']\n",
131 | "os_password = config['credentials']['password']\n",
132 | "\n",
133 | "domain_endpoint = config['domain']['endpoint']\n",
134 | "domain_index = config['domain']['index']"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": 6,
140 | "id": "3e558272-b128-47da-8e68-7cdc79aca465",
141 | "metadata": {
142 | "tags": []
143 | },
144 | "outputs": [
145 | {
146 | "name": "stderr",
147 | "output_type": "stream",
148 | "text": [
149 | "URL for OpenSearch index = https://search-semantic-search-hryn56c5jy43yryimohz4ajvyi.us-east-1.es.amazonaws.com/conversations\n"
150 | ]
151 | }
152 | ],
153 | "source": [
154 | "URL = f'{domain_endpoint}/{domain_index}'\n",
155 | "logger.info(f'URL for OpenSearch index = {URL}')"
156 | ]
157 | },
158 | {
159 | "cell_type": "markdown",
160 | "id": "78f72086-5050-40c3-b4e8-32341cbe071c",
161 | "metadata": {},
162 | "source": [
163 | "#### Define the index mapping with a k-NN vector field"
164 | ]
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 7,
169 | "id": "45c097ca-4501-42c4-ba1f-d10b52fc025e",
170 | "metadata": {
171 | "tags": []
172 | },
173 | "outputs": [],
174 | "source": [
175 | "mapping = {\n",
176 | " 'settings': {\n",
177 | " 'index': {\n",
178 | " 'knn': True # Enable k-NN search for this index\n",
179 | " }\n",
180 | " },\n",
181 | " 'mappings': {\n",
182 | " 'properties': {\n",
183 | " 'embedding': { # k-NN vector field\n",
184 | " 'type': 'knn_vector',\n",
185 | " 'dimension': 4096 # Dimension of the vector\n",
186 | " },\n",
187 | " 'session_id': {\n",
188 | " 'type': 'keyword'\n",
189 | " },\n",
190 | " 'created_at': {\n",
191 | " 'type': 'long'\n",
192 | " },\n",
193 | " 'conversation_summary': {\n",
194 | " 'type': 'text'\n",
195 | " }\n",
196 | " }\n",
197 | " }\n",
198 | "}"
199 | ]
200 | },
201 | {
202 | "cell_type": "markdown",
203 | "id": "e1a33e70-1e5f-44d7-aec4-4c374d2a8e3d",
204 | "metadata": {},
205 | "source": [
206 | "#### Create the index with the specified mapping"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": 8,
212 | "id": "06601622-e982-42b2-910a-71de86e56475",
213 | "metadata": {
214 | "tags": []
215 | },
216 | "outputs": [
217 | {
218 | "name": "stderr",
219 | "output_type": "stream",
220 | "text": [
221 | "Index created: {\"acknowledged\":true,\"shards_acknowledged\":true,\"index\":\"conversations\"}\n"
222 | ]
223 | }
224 | ],
225 | "source": [
226 | "# Check if the index exists using an HTTP HEAD request\n",
227 | "response = requests.head(URL, auth=HTTPBasicAuth(os_username, os_password))\n",
228 | "\n",
229 | "# If the index does not exist (status code 404), create the index\n",
230 | "if response.status_code == 404:\n",
231 | " response = requests.put(URL, auth=HTTPBasicAuth(os_username, os_password), json=mapping)\n",
232 | " logger.info(f'Index created: {response.text}')\n",
233 | "else:\n",
234 | " logger.error('Index already exists!')"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": null,
240 | "id": "17ef4711-ec69-4628-b829-64e25d8e290e",
241 | "metadata": {},
242 | "outputs": [],
243 | "source": []
244 | }
245 | ],
246 | "metadata": {
247 | "availableInstances": [
248 | {
249 | "_defaultOrder": 0,
250 | "_isFastLaunch": true,
251 | "category": "General purpose",
252 | "gpuNum": 0,
253 | "hideHardwareSpecs": false,
254 | "memoryGiB": 4,
255 | "name": "ml.t3.medium",
256 | "vcpuNum": 2
257 | },
258 | {
259 | "_defaultOrder": 1,
260 | "_isFastLaunch": false,
261 | "category": "General purpose",
262 | "gpuNum": 0,
263 | "hideHardwareSpecs": false,
264 | "memoryGiB": 8,
265 | "name": "ml.t3.large",
266 | "vcpuNum": 2
267 | },
268 | {
269 | "_defaultOrder": 2,
270 | "_isFastLaunch": false,
271 | "category": "General purpose",
272 | "gpuNum": 0,
273 | "hideHardwareSpecs": false,
274 | "memoryGiB": 16,
275 | "name": "ml.t3.xlarge",
276 | "vcpuNum": 4
277 | },
278 | {
279 | "_defaultOrder": 3,
280 | "_isFastLaunch": false,
281 | "category": "General purpose",
282 | "gpuNum": 0,
283 | "hideHardwareSpecs": false,
284 | "memoryGiB": 32,
285 | "name": "ml.t3.2xlarge",
286 | "vcpuNum": 8
287 | },
288 | {
289 | "_defaultOrder": 4,
290 | "_isFastLaunch": true,
291 | "category": "General purpose",
292 | "gpuNum": 0,
293 | "hideHardwareSpecs": false,
294 | "memoryGiB": 8,
295 | "name": "ml.m5.large",
296 | "vcpuNum": 2
297 | },
298 | {
299 | "_defaultOrder": 5,
300 | "_isFastLaunch": false,
301 | "category": "General purpose",
302 | "gpuNum": 0,
303 | "hideHardwareSpecs": false,
304 | "memoryGiB": 16,
305 | "name": "ml.m5.xlarge",
306 | "vcpuNum": 4
307 | },
308 | {
309 | "_defaultOrder": 6,
310 | "_isFastLaunch": false,
311 | "category": "General purpose",
312 | "gpuNum": 0,
313 | "hideHardwareSpecs": false,
314 | "memoryGiB": 32,
315 | "name": "ml.m5.2xlarge",
316 | "vcpuNum": 8
317 | },
318 | {
319 | "_defaultOrder": 7,
320 | "_isFastLaunch": false,
321 | "category": "General purpose",
322 | "gpuNum": 0,
323 | "hideHardwareSpecs": false,
324 | "memoryGiB": 64,
325 | "name": "ml.m5.4xlarge",
326 | "vcpuNum": 16
327 | },
328 | {
329 | "_defaultOrder": 8,
330 | "_isFastLaunch": false,
331 | "category": "General purpose",
332 | "gpuNum": 0,
333 | "hideHardwareSpecs": false,
334 | "memoryGiB": 128,
335 | "name": "ml.m5.8xlarge",
336 | "vcpuNum": 32
337 | },
338 | {
339 | "_defaultOrder": 9,
340 | "_isFastLaunch": false,
341 | "category": "General purpose",
342 | "gpuNum": 0,
343 | "hideHardwareSpecs": false,
344 | "memoryGiB": 192,
345 | "name": "ml.m5.12xlarge",
346 | "vcpuNum": 48
347 | },
348 | {
349 | "_defaultOrder": 10,
350 | "_isFastLaunch": false,
351 | "category": "General purpose",
352 | "gpuNum": 0,
353 | "hideHardwareSpecs": false,
354 | "memoryGiB": 256,
355 | "name": "ml.m5.16xlarge",
356 | "vcpuNum": 64
357 | },
358 | {
359 | "_defaultOrder": 11,
360 | "_isFastLaunch": false,
361 | "category": "General purpose",
362 | "gpuNum": 0,
363 | "hideHardwareSpecs": false,
364 | "memoryGiB": 384,
365 | "name": "ml.m5.24xlarge",
366 | "vcpuNum": 96
367 | },
368 | {
369 | "_defaultOrder": 12,
370 | "_isFastLaunch": false,
371 | "category": "General purpose",
372 | "gpuNum": 0,
373 | "hideHardwareSpecs": false,
374 | "memoryGiB": 8,
375 | "name": "ml.m5d.large",
376 | "vcpuNum": 2
377 | },
378 | {
379 | "_defaultOrder": 13,
380 | "_isFastLaunch": false,
381 | "category": "General purpose",
382 | "gpuNum": 0,
383 | "hideHardwareSpecs": false,
384 | "memoryGiB": 16,
385 | "name": "ml.m5d.xlarge",
386 | "vcpuNum": 4
387 | },
388 | {
389 | "_defaultOrder": 14,
390 | "_isFastLaunch": false,
391 | "category": "General purpose",
392 | "gpuNum": 0,
393 | "hideHardwareSpecs": false,
394 | "memoryGiB": 32,
395 | "name": "ml.m5d.2xlarge",
396 | "vcpuNum": 8
397 | },
398 | {
399 | "_defaultOrder": 15,
400 | "_isFastLaunch": false,
401 | "category": "General purpose",
402 | "gpuNum": 0,
403 | "hideHardwareSpecs": false,
404 | "memoryGiB": 64,
405 | "name": "ml.m5d.4xlarge",
406 | "vcpuNum": 16
407 | },
408 | {
409 | "_defaultOrder": 16,
410 | "_isFastLaunch": false,
411 | "category": "General purpose",
412 | "gpuNum": 0,
413 | "hideHardwareSpecs": false,
414 | "memoryGiB": 128,
415 | "name": "ml.m5d.8xlarge",
416 | "vcpuNum": 32
417 | },
418 | {
419 | "_defaultOrder": 17,
420 | "_isFastLaunch": false,
421 | "category": "General purpose",
422 | "gpuNum": 0,
423 | "hideHardwareSpecs": false,
424 | "memoryGiB": 192,
425 | "name": "ml.m5d.12xlarge",
426 | "vcpuNum": 48
427 | },
428 | {
429 | "_defaultOrder": 18,
430 | "_isFastLaunch": false,
431 | "category": "General purpose",
432 | "gpuNum": 0,
433 | "hideHardwareSpecs": false,
434 | "memoryGiB": 256,
435 | "name": "ml.m5d.16xlarge",
436 | "vcpuNum": 64
437 | },
438 | {
439 | "_defaultOrder": 19,
440 | "_isFastLaunch": false,
441 | "category": "General purpose",
442 | "gpuNum": 0,
443 | "hideHardwareSpecs": false,
444 | "memoryGiB": 384,
445 | "name": "ml.m5d.24xlarge",
446 | "vcpuNum": 96
447 | },
448 | {
449 | "_defaultOrder": 20,
450 | "_isFastLaunch": false,
451 | "category": "General purpose",
452 | "gpuNum": 0,
453 | "hideHardwareSpecs": true,
454 | "memoryGiB": 0,
455 | "name": "ml.geospatial.interactive",
456 | "supportedImageNames": [
457 | "sagemaker-geospatial-v1-0"
458 | ],
459 | "vcpuNum": 0
460 | },
461 | {
462 | "_defaultOrder": 21,
463 | "_isFastLaunch": true,
464 | "category": "Compute optimized",
465 | "gpuNum": 0,
466 | "hideHardwareSpecs": false,
467 | "memoryGiB": 4,
468 | "name": "ml.c5.large",
469 | "vcpuNum": 2
470 | },
471 | {
472 | "_defaultOrder": 22,
473 | "_isFastLaunch": false,
474 | "category": "Compute optimized",
475 | "gpuNum": 0,
476 | "hideHardwareSpecs": false,
477 | "memoryGiB": 8,
478 | "name": "ml.c5.xlarge",
479 | "vcpuNum": 4
480 | },
481 | {
482 | "_defaultOrder": 23,
483 | "_isFastLaunch": false,
484 | "category": "Compute optimized",
485 | "gpuNum": 0,
486 | "hideHardwareSpecs": false,
487 | "memoryGiB": 16,
488 | "name": "ml.c5.2xlarge",
489 | "vcpuNum": 8
490 | },
491 | {
492 | "_defaultOrder": 24,
493 | "_isFastLaunch": false,
494 | "category": "Compute optimized",
495 | "gpuNum": 0,
496 | "hideHardwareSpecs": false,
497 | "memoryGiB": 32,
498 | "name": "ml.c5.4xlarge",
499 | "vcpuNum": 16
500 | },
501 | {
502 | "_defaultOrder": 25,
503 | "_isFastLaunch": false,
504 | "category": "Compute optimized",
505 | "gpuNum": 0,
506 | "hideHardwareSpecs": false,
507 | "memoryGiB": 72,
508 | "name": "ml.c5.9xlarge",
509 | "vcpuNum": 36
510 | },
511 | {
512 | "_defaultOrder": 26,
513 | "_isFastLaunch": false,
514 | "category": "Compute optimized",
515 | "gpuNum": 0,
516 | "hideHardwareSpecs": false,
517 | "memoryGiB": 96,
518 | "name": "ml.c5.12xlarge",
519 | "vcpuNum": 48
520 | },
521 | {
522 | "_defaultOrder": 27,
523 | "_isFastLaunch": false,
524 | "category": "Compute optimized",
525 | "gpuNum": 0,
526 | "hideHardwareSpecs": false,
527 | "memoryGiB": 144,
528 | "name": "ml.c5.18xlarge",
529 | "vcpuNum": 72
530 | },
531 | {
532 | "_defaultOrder": 28,
533 | "_isFastLaunch": false,
534 | "category": "Compute optimized",
535 | "gpuNum": 0,
536 | "hideHardwareSpecs": false,
537 | "memoryGiB": 192,
538 | "name": "ml.c5.24xlarge",
539 | "vcpuNum": 96
540 | },
541 | {
542 | "_defaultOrder": 29,
543 | "_isFastLaunch": true,
544 | "category": "Accelerated computing",
545 | "gpuNum": 1,
546 | "hideHardwareSpecs": false,
547 | "memoryGiB": 16,
548 | "name": "ml.g4dn.xlarge",
549 | "vcpuNum": 4
550 | },
551 | {
552 | "_defaultOrder": 30,
553 | "_isFastLaunch": false,
554 | "category": "Accelerated computing",
555 | "gpuNum": 1,
556 | "hideHardwareSpecs": false,
557 | "memoryGiB": 32,
558 | "name": "ml.g4dn.2xlarge",
559 | "vcpuNum": 8
560 | },
561 | {
562 | "_defaultOrder": 31,
563 | "_isFastLaunch": false,
564 | "category": "Accelerated computing",
565 | "gpuNum": 1,
566 | "hideHardwareSpecs": false,
567 | "memoryGiB": 64,
568 | "name": "ml.g4dn.4xlarge",
569 | "vcpuNum": 16
570 | },
571 | {
572 | "_defaultOrder": 32,
573 | "_isFastLaunch": false,
574 | "category": "Accelerated computing",
575 | "gpuNum": 1,
576 | "hideHardwareSpecs": false,
577 | "memoryGiB": 128,
578 | "name": "ml.g4dn.8xlarge",
579 | "vcpuNum": 32
580 | },
581 | {
582 | "_defaultOrder": 33,
583 | "_isFastLaunch": false,
584 | "category": "Accelerated computing",
585 | "gpuNum": 4,
586 | "hideHardwareSpecs": false,
587 | "memoryGiB": 192,
588 | "name": "ml.g4dn.12xlarge",
589 | "vcpuNum": 48
590 | },
591 | {
592 | "_defaultOrder": 34,
593 | "_isFastLaunch": false,
594 | "category": "Accelerated computing",
595 | "gpuNum": 1,
596 | "hideHardwareSpecs": false,
597 | "memoryGiB": 256,
598 | "name": "ml.g4dn.16xlarge",
599 | "vcpuNum": 64
600 | },
601 | {
602 | "_defaultOrder": 35,
603 | "_isFastLaunch": false,
604 | "category": "Accelerated computing",
605 | "gpuNum": 1,
606 | "hideHardwareSpecs": false,
607 | "memoryGiB": 61,
608 | "name": "ml.p3.2xlarge",
609 | "vcpuNum": 8
610 | },
611 | {
612 | "_defaultOrder": 36,
613 | "_isFastLaunch": false,
614 | "category": "Accelerated computing",
615 | "gpuNum": 4,
616 | "hideHardwareSpecs": false,
617 | "memoryGiB": 244,
618 | "name": "ml.p3.8xlarge",
619 | "vcpuNum": 32
620 | },
621 | {
622 | "_defaultOrder": 37,
623 | "_isFastLaunch": false,
624 | "category": "Accelerated computing",
625 | "gpuNum": 8,
626 | "hideHardwareSpecs": false,
627 | "memoryGiB": 488,
628 | "name": "ml.p3.16xlarge",
629 | "vcpuNum": 64
630 | },
631 | {
632 | "_defaultOrder": 38,
633 | "_isFastLaunch": false,
634 | "category": "Accelerated computing",
635 | "gpuNum": 8,
636 | "hideHardwareSpecs": false,
637 | "memoryGiB": 768,
638 | "name": "ml.p3dn.24xlarge",
639 | "vcpuNum": 96
640 | },
641 | {
642 | "_defaultOrder": 39,
643 | "_isFastLaunch": false,
644 | "category": "Memory Optimized",
645 | "gpuNum": 0,
646 | "hideHardwareSpecs": false,
647 | "memoryGiB": 16,
648 | "name": "ml.r5.large",
649 | "vcpuNum": 2
650 | },
651 | {
652 | "_defaultOrder": 40,
653 | "_isFastLaunch": false,
654 | "category": "Memory Optimized",
655 | "gpuNum": 0,
656 | "hideHardwareSpecs": false,
657 | "memoryGiB": 32,
658 | "name": "ml.r5.xlarge",
659 | "vcpuNum": 4
660 | },
661 | {
662 | "_defaultOrder": 41,
663 | "_isFastLaunch": false,
664 | "category": "Memory Optimized",
665 | "gpuNum": 0,
666 | "hideHardwareSpecs": false,
667 | "memoryGiB": 64,
668 | "name": "ml.r5.2xlarge",
669 | "vcpuNum": 8
670 | },
671 | {
672 | "_defaultOrder": 42,
673 | "_isFastLaunch": false,
674 | "category": "Memory Optimized",
675 | "gpuNum": 0,
676 | "hideHardwareSpecs": false,
677 | "memoryGiB": 128,
678 | "name": "ml.r5.4xlarge",
679 | "vcpuNum": 16
680 | },
681 | {
682 | "_defaultOrder": 43,
683 | "_isFastLaunch": false,
684 | "category": "Memory Optimized",
685 | "gpuNum": 0,
686 | "hideHardwareSpecs": false,
687 | "memoryGiB": 256,
688 | "name": "ml.r5.8xlarge",
689 | "vcpuNum": 32
690 | },
691 | {
692 | "_defaultOrder": 44,
693 | "_isFastLaunch": false,
694 | "category": "Memory Optimized",
695 | "gpuNum": 0,
696 | "hideHardwareSpecs": false,
697 | "memoryGiB": 384,
698 | "name": "ml.r5.12xlarge",
699 | "vcpuNum": 48
700 | },
701 | {
702 | "_defaultOrder": 45,
703 | "_isFastLaunch": false,
704 | "category": "Memory Optimized",
705 | "gpuNum": 0,
706 | "hideHardwareSpecs": false,
707 | "memoryGiB": 512,
708 | "name": "ml.r5.16xlarge",
709 | "vcpuNum": 64
710 | },
711 | {
712 | "_defaultOrder": 46,
713 | "_isFastLaunch": false,
714 | "category": "Memory Optimized",
715 | "gpuNum": 0,
716 | "hideHardwareSpecs": false,
717 | "memoryGiB": 768,
718 | "name": "ml.r5.24xlarge",
719 | "vcpuNum": 96
720 | },
721 | {
722 | "_defaultOrder": 47,
723 | "_isFastLaunch": false,
724 | "category": "Accelerated computing",
725 | "gpuNum": 1,
726 | "hideHardwareSpecs": false,
727 | "memoryGiB": 16,
728 | "name": "ml.g5.xlarge",
729 | "vcpuNum": 4
730 | },
731 | {
732 | "_defaultOrder": 48,
733 | "_isFastLaunch": false,
734 | "category": "Accelerated computing",
735 | "gpuNum": 1,
736 | "hideHardwareSpecs": false,
737 | "memoryGiB": 32,
738 | "name": "ml.g5.2xlarge",
739 | "vcpuNum": 8
740 | },
741 | {
742 | "_defaultOrder": 49,
743 | "_isFastLaunch": false,
744 | "category": "Accelerated computing",
745 | "gpuNum": 1,
746 | "hideHardwareSpecs": false,
747 | "memoryGiB": 64,
748 | "name": "ml.g5.4xlarge",
749 | "vcpuNum": 16
750 | },
751 | {
752 | "_defaultOrder": 50,
753 | "_isFastLaunch": false,
754 | "category": "Accelerated computing",
755 | "gpuNum": 1,
756 | "hideHardwareSpecs": false,
757 | "memoryGiB": 128,
758 | "name": "ml.g5.8xlarge",
759 | "vcpuNum": 32
760 | },
761 | {
762 | "_defaultOrder": 51,
763 | "_isFastLaunch": false,
764 | "category": "Accelerated computing",
765 | "gpuNum": 1,
766 | "hideHardwareSpecs": false,
767 | "memoryGiB": 256,
768 | "name": "ml.g5.16xlarge",
769 | "vcpuNum": 64
770 | },
771 | {
772 | "_defaultOrder": 52,
773 | "_isFastLaunch": false,
774 | "category": "Accelerated computing",
775 | "gpuNum": 4,
776 | "hideHardwareSpecs": false,
777 | "memoryGiB": 192,
778 | "name": "ml.g5.12xlarge",
779 | "vcpuNum": 48
780 | },
781 | {
782 | "_defaultOrder": 53,
783 | "_isFastLaunch": false,
784 | "category": "Accelerated computing",
785 | "gpuNum": 4,
786 | "hideHardwareSpecs": false,
787 | "memoryGiB": 384,
788 | "name": "ml.g5.24xlarge",
789 | "vcpuNum": 96
790 | },
791 | {
792 | "_defaultOrder": 54,
793 | "_isFastLaunch": false,
794 | "category": "Accelerated computing",
795 | "gpuNum": 8,
796 | "hideHardwareSpecs": false,
797 | "memoryGiB": 768,
798 | "name": "ml.g5.48xlarge",
799 | "vcpuNum": 192
800 | },
801 | {
802 | "_defaultOrder": 55,
803 | "_isFastLaunch": false,
804 | "category": "Accelerated computing",
805 | "gpuNum": 8,
806 | "hideHardwareSpecs": false,
807 | "memoryGiB": 1152,
808 | "name": "ml.p4d.24xlarge",
809 | "vcpuNum": 96
810 | },
811 | {
812 | "_defaultOrder": 56,
813 | "_isFastLaunch": false,
814 | "category": "Accelerated computing",
815 | "gpuNum": 8,
816 | "hideHardwareSpecs": false,
817 | "memoryGiB": 1152,
818 | "name": "ml.p4de.24xlarge",
819 | "vcpuNum": 96
820 | }
821 | ],
822 | "instance_type": "ml.t3.medium",
823 | "kernelspec": {
824 | "display_name": "Python 3 (Data Science)",
825 | "language": "python",
826 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
827 | },
828 | "language_info": {
829 | "codemirror_mode": {
830 | "name": "ipython",
831 | "version": 3
832 | },
833 | "file_extension": ".py",
834 | "mimetype": "text/x-python",
835 | "name": "python",
836 | "nbconvert_exporter": "python",
837 | "pygments_lexer": "ipython3",
838 | "version": "3.7.10"
839 | }
840 | },
841 | "nbformat": 4,
842 | "nbformat_minor": 5
843 | }
844 |
--------------------------------------------------------------------------------
/05-lambda-handler.py:
--------------------------------------------------------------------------------
1 | from boto3.dynamodb.conditions import Key
2 | from requests.auth import HTTPBasicAuth
3 | import requests
4 | import logging
5 | import boto3
6 | import json
7 | import os
8 |
9 |
10 | # Set up logger
11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
12 | datefmt='%Y-%m-%d %H:%M:%S')
13 | logger = logging.getLogger('log')
14 |
15 | # Create service clients
16 | dynamodb = boto3.resource('dynamodb')
17 | sagemaker_runtime = boto3.client('sagemaker-runtime')
18 |
19 | # Reference SageMaker JumpStart endpoints
20 | domain_endpoint = os.environ['OS_ENDPOINT']
21 | domain_index = os.environ['OS_INDEX_NAME']
22 | os_username = os.environ['OS_USERNAME']
23 | os_password = os.environ['OS_PASSWORD']
24 |
25 | # Reference Amazon OpenSearch endpoint
26 | URL = f'{domain_endpoint}/{domain_index}'
27 |
28 | # Set LLM generation configs
29 | MAX_LENGTH = 512
30 | NUM_RETURN_SEQUENCES = 1
31 | TOP_K = 0
32 | TOP_P = 0.7
33 | DO_SAMPLE = True
34 | CONTENT_TYPE = 'application/json'
35 | TEMPERATURE = 0.1
36 |
37 |
38 |
39 | def lambda_handler(event: dict, context: dict) -> None:
40 | logger.info(f'Received event: {event}')
41 | logger.info(f'Received context: {context}')
42 |
43 | for record in event['Records']:
44 | if record['eventName'] == 'MODIFY':
45 | session_item = record['dynamodb']['NewImage']
46 | session_id = session_item['session_id']['S']
47 | end_time = session_item['end_time']['N']
48 |
49 | # Query the conversations table
50 | conversation_turns = query_conversations_table(session_id)
51 |
52 | # Flatten the conversation turns into a dict
53 | flattened_conversations = flatten_conversations(conversation_turns)
54 |
55 | summary = summarize_conversations(flattened_conversations)
56 |
57 | # Encode the dict into an embedding
58 | embedding = encode_conversations(summary)
59 |
60 | # Write the embedding to Elasticsearch
61 | write_to_elasticsearch(session_id, embedding, end_time, summary)
62 |
63 | print(f"Session {session_id} was persisted to long term memory")
64 |
65 |
66 | def query_conversations_table(session_id: str) -> list:
67 | table = dynamodb.Table('conversations')
68 | response = table.query(KeyConditionExpression=Key('session_id').eq(session_id))
69 | return response['Items']
70 |
71 |
72 | def flatten_conversations(conversation_turns: list) -> dict:
73 | flattened_conversations = {'conversation': ''}
74 | for turn in conversation_turns:
75 | user_message = turn['Me']
76 | bot_message = turn['AI']
77 | flattened_conversations['conversation'] += f"{user_message} {bot_message} "
78 | return flattened_conversations
79 |
80 |
81 | def summarize_conversations(conversation: str) -> str:
82 | logger.info('Conversation: {conversation}')
83 | prompt = f"""Conversation==hi there! I'm doing well, thank you. what is the meaning of eminent domain? Eminent domain is the power of the government to take private property for public use, with just compensation.
84 | Summary==We discussed about the meaning of eminent domain and that it is the government's power to take private property for public use with just compensation.
85 |
86 | Conversation==Hey! I'm feeling great, how about you? Can you tell me what is the concept of due diligence? Due diligence is a comprehensive investigation or appraisal of a business or person before entering into an agreement or transaction.
87 | Summary==We discussed about the meaning of due diligence and that it is a comprehensive investigation or appraisal of a business or person before entering into an agreement or transaction.
88 |
89 | Conversation==hello! I'm good, thank you for asking. What is the definition of fiduciary duty? Fiduciary duty is a legal obligation of one party to act in the best interests of another, often in financial or legal matters.
90 | Summary==We talked about the meaning of fiduciary duty and that it is a legal obligation of one party to act in the best interests of another, often in financial or legal matters.
91 |
92 | Conversation=={conversation}
93 | Summary==
94 |
95 |
96 | Summarize the above Conversation as a short paragraph in 3 to 4 sentences."""
97 | payload = {'text_inputs': prompt,
98 | 'max_length': MAX_LENGTH,
99 | 'temperature': TEMPERATURE,
100 | 'num_return_sequences': NUM_RETURN_SEQUENCES,
101 | 'top_k': TOP_K,
102 | 'top_p': TOP_P,
103 | 'do_sample': DO_SAMPLE}
104 | payload = json.dumps(payload).encode('utf-8')
105 | response = sagemaker_runtime.invoke_endpoint(EndpointName=os.environ['SAGEMAKER_TEXT_GEN_ENDPOINT'],
106 | ContentType=CONTENT_TYPE,
107 | Body=payload)
108 | model_predictions = json.loads(response['Body'].read())
109 | generated_text = model_predictions['generated_texts'][0]
110 | logger.info(f'Summary: {generated_text}')
111 | return generated_text
112 |
113 |
114 | def encode_conversations(summary: str) -> list:
115 | payload = {'text_inputs': [summary]}
116 | payload = json.dumps(payload).encode('utf-8')
117 | response = sagemaker_runtime.invoke_endpoint(EndpointName=os.environ['SAGEMAKER_TEXT_EMBED_ENDPOINT'],
118 | ContentType='application/json',
119 | Body=payload)
120 | body = json.loads(response['Body'].read())
121 | embedding = body['embedding'][0]
122 | return embedding
123 |
124 |
125 | def write_to_elasticsearch(session_id: str, embedding: list, end_time: int, summary: str) -> None:
126 | document = {
127 | 'session_id': session_id,
128 | 'embedding': embedding,
129 | 'created_at': end_time,
130 | 'conversation_summary': summary
131 | }
132 |
133 | try:
134 | response = requests.post(f'{URL}/_doc/{session_id}', auth=HTTPBasicAuth(os_username, os_password),
135 | json=document)
136 | if response.status_code not in [200, 201]:
137 | logger.error(response.status_code)
138 | logger.error(response.text)
139 | except Exception as e:
140 | logger.error(e)
141 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AWS SageMaker Chatbot
2 | Build a context-aware chatbot with short and long-term memory using AWS SageMaker and other AWS services for improved user interactions.
3 |
4 |
5 | 
6 |
7 |
--------------------------------------------------------------------------------
/chatbot-app/app.py:
--------------------------------------------------------------------------------
1 | from retrieve import retrieve_top_matching_past_conversations
2 | from llm import summarize_passages_and_collate_answers
3 | from retrieve import retrieve_top_matching_passages
4 | from ddb import get_conversations_by_session_id
5 | from llm import generate_dialogue_response
6 | from ddb import add_conversation_turn
7 | from ddb import create_session
8 | from ddb import end_session
9 | from llm import detect_task
10 | import streamlit as st
11 | import logging
12 | import boto3
13 |
14 |
15 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
16 | datefmt='%Y-%m-%d %H:%M:%S')
17 | logger = logging.getLogger('log')
18 |
19 |
20 | # Set Streamlit page configuration
21 | st.set_page_config(page_title='ai-assistant', layout='wide')
22 |
23 | dynamodb = boto3.resource('dynamodb')
24 |
25 | # Initialize session states
26 | if 'generated' not in st.session_state:
27 | st.session_state['generated'] = []
28 | if 'past' not in st.session_state:
29 | st.session_state['past'] = []
30 | if 'input' not in st.session_state:
31 | st.session_state['input'] = ''
32 | if 'stored_session' not in st.session_state:
33 | st.session_state['stored_session'] = []
34 |
35 |
36 | def get_text_input():
37 | """
38 | Get the user inputted text.
39 | :return: Text entered by the user
40 | """
41 | text_input = st.text_input('You: ',
42 | st.session_state['input'],
43 | key='input',
44 | placeholder='Your AI assistant here! Ask me anything ...',
45 | label_visibility='hidden')
46 | return text_input
47 |
48 |
49 | def new_session():
50 | """
51 | Clears session state and starts a new session.
52 | """
53 | # End current session and update sessions table in DynamoDB
54 | table_name = 'sessions'
55 | table = dynamodb.Table(table_name)
56 | end_session(table, st.session_state.session_id)
57 |
58 | save = []
59 | for j in range(len(st.session_state['generated']) - 1, -1, -1):
60 | save.append(f"User: {st.session_state['past'][j]}")
61 | save.append(f"Bot: {st.session_state['generated'][j]}")
62 |
63 | st.session_state['stored_session'].append(save)
64 | st.session_state['generated'] = []
65 | st.session_state['past'] = []
66 | st.session_state['input'] = ''
67 |
68 |
69 | # Set up sidebar with various options
70 | with st.sidebar.expander('🛠️', expanded=True):
71 | max_turns = st.number_input('Number of turns to remember',
72 | min_value=1,
73 | max_value=100)
74 |
75 | # Set up the Streamlit app layout
76 | st.title('🤖 AI Assistant 🧠')
77 | st.subheader('Powered by ☁️ AWS')
78 |
79 | # Add a button to start a new chat
80 | st.sidebar.button('New Session', on_click=new_session, type='primary')
81 |
82 | # Get the user input
83 | user_input = get_text_input()
84 |
85 | sessions_table = dynamodb.Table('sessions')
86 | conversations_table = dynamodb.Table('conversations')
87 |
88 |
89 | def respond_by_task(query, history):
90 | logger.info(f'HISTORY: {history}')
91 | task_type = detect_task(query)
92 | logger.info(f'TASK TYPE = {task_type}')
93 | completion = None
94 | if task_type == 'STM CHAT':
95 | if len(history) > 0:
96 | prompt = f"""{history}
97 | Me: {user_input}
98 | AI:"""
99 | logger.info(f'Prompt: {prompt}')
100 | completion = generate_dialogue_response(prompt)
101 | else:
102 | prompt = f"""Me: {user_input}
103 | AI:"""
104 | logger.info(f'Prompt: {prompt}')
105 | completion = generate_dialogue_response(prompt)
106 | elif task_type == 'LTM PAST CONVERSATIONS':
107 | completion = retrieve_top_matching_past_conversations(user_input, 'conversations')
108 | completion = '\n\n'.join(completion)
109 | elif task_type == 'LTM VERIFIED SOURCES':
110 | completion = retrieve_top_matching_passages(user_input, 'passages')
111 | completion = summarize_passages_and_collate_answers(completion, user_input)
112 | return completion
113 |
114 |
115 | def transform_ddb_past_history(history: list, num_turns=10) -> str:
116 | past_hist = []
117 | for turn in history:
118 | me_utterance = turn['Me']
119 | bot_utterance = turn['AI']
120 | past_hist.append(f'Me: {me_utterance}')
121 | past_hist.append(f'AI: {bot_utterance}')
122 | past_hist = past_hist[-num_turns*2:]
123 | past_hist_str = '\n'.join(past_hist)
124 | return past_hist_str
125 |
126 |
127 | if user_input:
128 | user_utterance = st.session_state['input']
129 | ai_utterance = st.session_state['generated']
130 | if len(ai_utterance) == 0:
131 | # Start a new session
132 | st.session_state.session_id = create_session(sessions_table)
133 |
134 | past_history = get_conversations_by_session_id(conversations_table, st.session_state.session_id)
135 | past_history = transform_ddb_past_history(past_history, max_turns)
136 | output = respond_by_task(user_input, past_history)
137 |
138 | st.session_state.past.append(user_input)
139 | st.session_state.generated.append(output)
140 |
141 | ai_utterance = st.session_state['generated'][-1]
142 | add_conversation_turn(conversations_table, st.session_state.session_id, user_utterance, ai_utterance)
143 |
144 | # Display the conversation history using an expander, and allow the user to download it
145 | download_str = []
146 | with st.expander('Conversation', expanded=True):
147 | for i in range(len(st.session_state['generated']) - 1, -1, -1):
148 | st.info(st.session_state['past'][i], icon='🧐')
149 | st.success(st.session_state['generated'][i], icon='🤖')
150 | download_str.append(f"Human: {st.session_state['past'][i]}")
151 | download_str.append(f"AI: {st.session_state['generated'][i]}")
152 |
153 | download_str = '\n'.join(download_str)
154 | if download_str:
155 | st.download_button('Download', download_str)
156 |
157 | # Display stored conversation sessions in the sidebar
158 | for i, sublist in enumerate(st.session_state.stored_session):
159 | with st.sidebar.expander(label=f'Conversation Session:{i}'):
160 | st.write(sublist)
161 |
162 |
163 | def del_sessions():
164 | del st.session_state.stored_session
165 |
166 |
167 | # Allow the user to clear all stored conversation sessions
168 | if st.session_state.stored_session:
169 | st.sidebar.button('Clear All', on_click=del_sessions, type='primary')
170 |
--------------------------------------------------------------------------------
/chatbot-app/config/config.yml:
--------------------------------------------------------------------------------
1 | opensearch:
2 | credentials:
3 | username: xxxxxxxx
4 | password: xxxxxxxx
5 | domain:
6 | endpoint: https://xxxxxxxx.us-east-1.es.amazonaws.com
7 | jumpstart:
8 | text_gen_endpoint_name: xxxxxxxx
9 | text_embed_endpoint_name: xxxxxxxx
10 |
--------------------------------------------------------------------------------
/chatbot-app/ddb.py:
--------------------------------------------------------------------------------
1 | from boto3.dynamodb.conditions import Key
2 | import logging
3 | import boto3
4 | import time
5 | import uuid
6 |
7 |
8 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
9 | datefmt='%Y-%m-%d %H:%M:%S')
10 | logger = logging.getLogger('log')
11 |
12 | client = boto3.resource('dynamodb')
13 |
14 |
15 | def add_conversation_turn(table, session_id, user, bot):
16 | timestamp = int(time.time() * 1000)
17 | table.put_item(
18 | Item={
19 | 'session_id': session_id,
20 | 'timestamp': timestamp,
21 | 'Me': user,
22 | 'AI': bot
23 | }
24 | )
25 |
26 |
27 | def get_conversations_by_session_id(table, session_id, descending=True):
28 | response = table.query(
29 | KeyConditionExpression=Key('session_id').eq(session_id),
30 | ScanIndexForward=descending
31 | )
32 | return response['Items']
33 |
34 |
35 | def delete_conversation(table, session_id, timestamp):
36 | table.delete_item(
37 | Key={
38 | 'session_id': session_id,
39 | 'timestamp': timestamp
40 | }
41 | )
42 |
43 |
44 | def create_session(table):
45 | session_id = str(uuid.uuid4())
46 | start_time = int(time.time() * 1000)
47 | table.put_item(
48 | Item={
49 | 'session_id': session_id,
50 | 'start_time': start_time,
51 | 'end_time': None,
52 | 'num_turns': 0
53 | }
54 | )
55 | return session_id
56 |
57 |
58 | def end_session(table, session_id):
59 | end_time = int(time.time() * 1000)
60 | start_time_response = table.get_item(
61 | Key={'session_id': session_id}
62 | )
63 | start_time = start_time_response['Item']['start_time']
64 | num_turns = len(get_conversations_by_session_id(table, session_id))
65 | conversation_duration = end_time - start_time # Compute duration in milliseconds
66 |
67 | table.update_item(
68 | Key={'session_id': session_id},
69 | UpdateExpression="SET end_time = :end_time, num_turns = :num_turns, conversation_duration = "
70 | ":conversation_duration",
71 | ExpressionAttributeValues={
72 | ':end_time': end_time,
73 | ':num_turns': num_turns,
74 | ':conversation_duration': conversation_duration
75 | }
76 | )
77 |
78 |
79 | if __name__ == '__main__':
80 | # Start a new session
81 | table_name = 'sessions'
82 |
83 | # Get the table instance
84 | table_ = client.Table(table_name)
85 | session_id_ = create_session(table_)
86 |
87 | # Add conversation turns
88 | table_name = 'conversations'
89 | table_ = client.Table(table_name)
90 | add_conversation_turn(table_, session_id_, 'hi', 'hello')
91 | add_conversation_turn(table_, session_id_, 'how are you?', 'i am fine')
92 | add_conversation_turn(table_, session_id_, 'what is the definition of court defamation?',
93 | 'Court defamation is a type of '
94 | 'civil wrong.')
95 |
96 | # End the session
97 | table_name = 'sessions'
98 | table_ = client.Table(table_name)
99 | end_session(table_, session_id_)
100 |
--------------------------------------------------------------------------------
/chatbot-app/llm.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import boto3
3 | import json
4 | import yaml
5 |
6 |
7 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
8 | datefmt='%Y-%m-%d %H:%M:%S')
9 | logger = logging.getLogger('log')
10 |
11 |
12 | with open('./config/config.yml', 'r') as f:
13 | config = yaml.safe_load(f)
14 |
15 | endpoint_name = config['jumpstart']['text_gen_endpoint_name']
16 | CONTENT_TYPE = 'application/json'
17 |
18 | client = boto3.client('sagemaker-runtime')
19 |
20 | NUM_RETURN_SEQUENCES = 1
21 | TOP_K = 0
22 | TOP_P = 0.7
23 | DO_SAMPLE = True
24 | TEMPERATURE = 0.1
25 |
26 |
27 | def detect_task(query: str) -> str:
28 | if query.startswith('\\verified') or query.startswith('/verified'):
29 | return 'LTM VERIFIED SOURCES'
30 | elif query.startswith('\\past') or query.startswith('/past'):
31 | return 'LTM PAST CONVERSATIONS'
32 | else:
33 | return 'STM CHAT'
34 |
35 |
36 | def generate(prompt: str, max_length=256) -> str:
37 | payload = {'text_inputs': prompt,
38 | 'max_length': max_length,
39 | 'num_return_sequences': NUM_RETURN_SEQUENCES,
40 | 'top_k': TOP_K,
41 | 'top_p': TOP_P,
42 | 'temperature': TEMPERATURE,
43 | 'do_sample': DO_SAMPLE}
44 | payload = json.dumps(payload).encode('utf-8')
45 | response = client.invoke_endpoint(EndpointName=ENDPOINT_NAME,
46 | ContentType=CONTENT_TYPE,
47 | Body=payload)
48 | model_predictions = json.loads(response['Body'].read())
49 | generated_text = model_predictions['generated_texts'][0]
50 | completion = generated_text.strip()
51 | return completion
52 |
53 |
54 | def summarize_passages_and_collate_answers(passages: list, query: str) -> str:
55 | collated_answers = []
56 | for passage, doc_id, passage_id in passages:
57 | prompt = f'Passage=={passage}\n\nQuestion=={query}\n\nAnswer==\n\nGiven a passage and a question, generate ' \
58 | f'a clean answer in 2 to 3 short complete sentences. '
59 | answer = generate(prompt, 256)
60 | collated_answers.append(f'{answer}\n\n[doc = {doc_id} | passage = {passage_id}]')
61 | collated_answers = '\n\n'.join(collated_answers)
62 | logger.info(f'ANSWERS: {collated_answers}')
63 | return collated_answers
64 |
65 |
66 | def generate_dialogue_response(prompt: str) -> str:
67 | completion = generate(prompt, 256)
68 | logger.info(f'DIALOGUE RESPONSE: {completion}')
69 | return completion
70 |
71 |
72 | if __name__ == '__main__':
73 | completion_ = detect_task('definition of bribery by indian law ')
74 | logging.info(completion_)
75 |
--------------------------------------------------------------------------------
/chatbot-app/retrieve.py:
--------------------------------------------------------------------------------
1 | from requests.auth import HTTPBasicAuth
2 | import datetime
3 | import requests
4 | import logging
5 | import boto3
6 | import yaml
7 | import json
8 |
9 |
10 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
11 | datefmt='%Y-%m-%d %H:%M:%S')
12 | logger = logging.getLogger('log')
13 |
14 |
15 | with open('./config/config.yml', 'r') as file:
16 | config = yaml.safe_load(file)
17 |
18 | os_username = config['opensearch']['credentials']['username']
19 | os_password = config['opensearch']['credentials']['password']
20 | domain_endpoint = config['opensearch']['domain']['endpoint']
21 | text_embedding_model_endpoint_name = config['jumpstart']['text_embed_endpoint_name']
22 | CONTENT_TYPE = 'application/json'
23 |
24 | sagemaker_client = boto3.client('runtime.sagemaker')
25 |
26 |
27 | def encode_query(query: str) -> list:
28 | payload = {'text_inputs': [query]}
29 | payload = json.dumps(payload).encode('utf-8')
30 | response = sagemaker_client.invoke_endpoint(EndpointName=text_embedding_model_endpoint_name,
31 | ContentType='application/json',
32 | Body=payload)
33 | body = json.loads(response['Body'].read())
34 | embedding = body['embedding'][0]
35 | return embedding
36 |
37 |
38 | def get_es_query(embedding: list, k) -> dict:
39 | query = {
40 | 'size': k,
41 | 'query': {
42 | 'knn': {
43 | 'embedding': {
44 | 'vector': embedding,
45 | 'k': k
46 | }
47 | }
48 | }
49 | }
50 | return query
51 |
52 |
53 | def retrieve_top_matching_passages(query: str, index: str) -> list:
54 | passages = []
55 | embedding = encode_query(query)
56 | query = get_es_query(embedding, 3)
57 | url = f'{domain_endpoint}/{index}/_search'
58 | response = requests.post(url, auth=HTTPBasicAuth(os_username, os_password), json=query)
59 | response_json = response.json()
60 | hits = response_json['hits']['hits']
61 | for hit in hits:
62 | # score = hit['_score']
63 | passage = hit['_source']['passage']
64 | doc_id = hit['_source']['doc_id']
65 | passage_id = hit['_source']['passage_id']
66 | passages.append([passage, doc_id, passage_id])
67 | return passages
68 |
69 |
70 | def retrieve_top_matching_past_conversations(query: str, index: str) -> list:
71 | past_conversations = {}
72 | embedding = encode_query(query)
73 | query = get_es_query(embedding, 3)
74 | url = f'{domain_endpoint}/{index}/_search'
75 | response = requests.post(url, auth=HTTPBasicAuth(os_username, os_password), json=query)
76 | response_json = response.json()
77 | hits = response_json['hits']['hits']
78 |
79 | for hit in hits:
80 | # score = hit['_score']
81 | conversation_summary = hit['_source']['conversation_summary']
82 | created_at_ms = hit['_source']['created_at']
83 | created_at = datetime.datetime.fromtimestamp(int(created_at_ms) / 1000.0)
84 | created_at = created_at.strftime('%Y-%m-%d %H:%M:%S')
85 | date, time = created_at.split(' ')
86 | # session_id = hit['_source']['session_id']
87 | summary = f'[{date}][{time}] {conversation_summary}'
88 | past_conversations[int(created_at_ms)] = summary
89 |
90 | sorted_past_conversations = {}
91 | for key in sorted(past_conversations.keys()):
92 | sorted_past_conversations[key] = past_conversations[key]
93 |
94 | sorted_conversations = list(sorted_past_conversations.values())
95 | sorted_conversations.reverse()
96 | return sorted_conversations
97 |
98 |
99 | if __name__ == '__main__':
100 | matches = retrieve_top_matching_past_conversations('court defamation', 'conversations')
101 | logger.info(matches)
102 | matches = retrieve_top_matching_passages('court defamation', 'legal-passages')
103 | logger.info(matches)
104 |
--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
1 | credentials:
2 | username: js-es
3 | password: JumpStart123!
4 | domain:
5 | endpoint: https://search-semantic-search-hryn56c5jy43yryimohz4ajvyi.us-east-1.es.amazonaws.com
6 | index: conversations
7 |
--------------------------------------------------------------------------------
/img/cognition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arunprsh/aws-sagemaker-chatbot-memory/cb38c5e9fec385df2e99a25df8f2c83ee0f401a7/img/cognition.png
--------------------------------------------------------------------------------
/lambda-env.csv:
--------------------------------------------------------------------------------
1 | Environment Variable,Value
2 | OS_ENDPOINT,https://xxxxxxxxx.us-east-1.es.amazonaws.com
3 | OS_INDEX_NAME,conversations
4 | OS_PASSWORD,xxxxxxxxx
5 | OS_USERNAME,xxxxxxxxx
6 | REGION,us-east-1
7 | SAGEMAKER_TEXT_EMBED_ENDPOINT,huggingface-textembedding-gpt-j-6b-fp16-xxxxxxxxx
8 | SAGEMAKER_TEXT_GEN_ENDPOINT,flan-xxl-xxxxxxxxx
--------------------------------------------------------------------------------