├── .gitignore
├── 01-deploy-text-embedding-model.ipynb
├── 02-deploy-text-generation-model.ipynb
├── 03-create-dynamodb-tables.ipynb
├── 04-create-os-index.ipynb
├── 05-lambda-handler.py
├── LICENSE
├── README.md
├── chatbot-app
    ├── app.py
    ├── config
    │   └── config.yml
    ├── ddb.py
    ├── llm.py
    └── retrieve.py
├── config.yml
├── img
    └── cognition.png
└── lambda-env.csv


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/01-deploy-text-embedding-model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "3a4bc882-63fb-4049-8551-d0fa3127bd6f",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Deploy Text Embedding Model (GPT-J 6B FP-16)"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "259bf0cc-c971-48c7-a537-6cc5958c9267",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "#### Imports"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "id": "fa427a29-aa9d-411c-8ffc-7cada2feb57f",
 23 |    "metadata": {
 24 |     "tags": []
 25 |    },
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "from sagemaker.jumpstart.notebook_utils import list_jumpstart_models\n",
 29 |     "from sagemaker.predictor import Predictor\n",
 30 |     "from sagemaker import get_execution_role\n",
 31 |     "from sagemaker.model import Model\n",
 32 |     "from sagemaker import script_uris\n",
 33 |     "from sagemaker import image_uris \n",
 34 |     "from sagemaker import model_uris\n",
 35 |     "import sagemaker\n",
 36 |     "import logging\n",
 37 |     "import boto3\n",
 38 |     "import time\n",
 39 |     "import json"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "id": "09080f41-3b2c-464e-9557-a9fae1313b63",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "##### Setup logging"
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "execution_count": null,
 53 |    "id": "bde2a70f-471b-4eaf-9703-10fdcf67863e",
 54 |    "metadata": {
 55 |     "tags": []
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "logger = logging.getLogger('sagemaker')\n",
 60 |     "logger.setLevel(logging.DEBUG)\n",
 61 |     "logger.addHandler(logging.StreamHandler())"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "id": "5372bb7c-ba1d-46af-ad4c-2e77503f422a",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "##### Log versions of dependencies"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "id": "cb81aa05-1479-48e3-97fa-e9fa5a3684a5",
 76 |    "metadata": {
 77 |     "tags": []
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "logger.info(f'Using sagemaker=={sagemaker.__version__}')\n",
 82 |     "logger.info(f'Using boto3=={boto3.__version__}')"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "id": "df8a48e0-a23f-4fbd-9be2-811ae129622c",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "#### Setup essentials "
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "id": "93dc7315-4803-440d-8dfb-e36f51c0a54f",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "##### List and filter all text embedding models available in JumpStart"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "id": "9060ff53-7c41-4998-b258-c847a6ee09c0",
105 |    "metadata": {
106 |     "tags": []
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "models = list_jumpstart_models()\n",
111 |     "logger.info(f'Total number of models in SageMaker JumpStart hub = {len(models)}')\n",
112 |     "\n",
113 |     "FILTER = 'task == textembedding'\n",
114 |     "txt2img_models = list_jumpstart_models(filter=FILTER)\n",
115 |     "txt2img_models"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "id": "34b6fcc4-a7d0-4f01-b2fa-d01ad3f0e2a5",
121 |    "metadata": {},
122 |    "source": [
123 |     "##### Setup config params"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "id": "1b77dc13-20d5-4dad-aa8a-45ed2f8e70bf",
130 |    "metadata": {
131 |     "tags": []
132 |    },
133 |    "outputs": [],
134 |    "source": [
135 |     "MODEL_ID = 'huggingface-textembedding-gpt-j-6b-fp16'  \n",
136 |     "MODEL_VERSION = '*'\n",
137 |     "INSTANCE_TYPE = 'ml.g5.2xlarge'\n",
138 |     "INSTANCE_COUNT = 1\n",
139 |     "IMAGE_SCOPE = 'inference'\n",
140 |     "MODEL_DATA_DOWNLOAD_TIMEOUT = 3600  # in seconds\n",
141 |     "CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT = 3600\n",
142 |     "CONTENT_TYPE = 'application/json'\n",
143 |     "\n",
144 |     "# Set up roles and clients \n",
145 |     "client = boto3.client('sagemaker-runtime')\n",
146 |     "ROLE = get_execution_role()\n",
147 |     "logger.info(f'Role => {ROLE}')"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": null,
153 |    "id": "1526ad5f-9e74-459b-8dbd-1accdd576f12",
154 |    "metadata": {
155 |     "tags": []
156 |    },
157 |    "outputs": [],
158 |    "source": [
159 |     "unix_time = int(time.time())\n",
160 |     "endpoint_name = f'{MODEL_ID}-{unix_time}'\n",
161 |     "logger.info(f'Endpoint name: {endpoint_name}')"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "id": "283e1db7-5629-495a-abd1-cf04351d2ad8",
167 |    "metadata": {},
168 |    "source": [
169 |     "#### Retrieve image and model URIs"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "id": "187d02d5-93c9-4a87-bdd7-dd7443d7f64c",
176 |    "metadata": {
177 |     "tags": []
178 |    },
179 |    "outputs": [],
180 |    "source": [
181 |     "deploy_image_uri = image_uris.retrieve(region=None, \n",
182 |     "                                       framework=None, \n",
183 |     "                                       image_scope=IMAGE_SCOPE, \n",
184 |     "                                       model_id=MODEL_ID, \n",
185 |     "                                       model_version=MODEL_VERSION, \n",
186 |     "                                       instance_type=INSTANCE_TYPE)\n",
187 |     "logger.info(f'Deploy image URI => {deploy_image_uri}')"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "id": "521ba56c-0b53-485c-9c96-b08c5a332a8f",
194 |    "metadata": {
195 |     "tags": []
196 |    },
197 |    "outputs": [],
198 |    "source": [
199 |     "model_uri = model_uris.retrieve(model_id=MODEL_ID, \n",
200 |     "                                model_version=MODEL_VERSION, \n",
201 |     "                                model_scope=IMAGE_SCOPE)\n",
202 |     "logger.info(f'Model URI => {model_uri}')"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": null,
208 |    "id": "a32fbce5-500c-4606-8f19-e667e7c6e6a3",
209 |    "metadata": {
210 |     "tags": []
211 |    },
212 |    "outputs": [],
213 |    "source": [
214 |     "env = {\n",
215 |     "    'SAGEMAKER_MODEL_SERVER_TIMEOUT': str(3600),\n",
216 |     "    'MODEL_CACHE_ROOT': '/opt/ml/model', \n",
217 |     "    'SAGEMAKER_ENV': '1',\n",
218 |     "    'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',\n",
219 |     "    'SAGEMAKER_PROGRAM': 'inference.py',\n",
220 |     "    'SAGEMAKER_MODEL_SERVER_WORKERS': '1', \n",
221 |     "    'TS_DEFAULT_WORKERS_PER_MODEL': '1', \n",
222 |     "}"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "markdown",
227 |    "id": "f2657343-5a65-4c57-b39d-6c93107588c5",
228 |    "metadata": {},
229 |    "source": [
230 |     "#### Create SageMaker Model"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "id": "60aae2d1-c78a-4a89-890a-d6111ab95c57",
237 |    "metadata": {
238 |     "tags": []
239 |    },
240 |    "outputs": [],
241 |    "source": [
242 |     "model = Model(image_uri=deploy_image_uri, \n",
243 |     "              model_data=model_uri, \n",
244 |     "              role=ROLE, \n",
245 |     "              predictor_cls=Predictor, \n",
246 |     "              name=endpoint_name, \n",
247 |     "              env=env)"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "id": "5a6b21c7-ff26-4dc1-89e9-aa5fea339771",
253 |    "metadata": {},
254 |    "source": [
255 |     "#### Deploy text embedding model as SageMaker endpoint for real-time synchronous inference"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "id": "0ce97c1f-b2e6-42e4-87c4-6fba2a92bed1",
262 |    "metadata": {
263 |     "tags": []
264 |    },
265 |    "outputs": [],
266 |    "source": [
267 |     "%%time\n",
268 |     "\n",
269 |     "_ = model.deploy(initial_instance_count=INSTANCE_COUNT, \n",
270 |     "                 instance_type=INSTANCE_TYPE, \n",
271 |     "                 endpoint_name=endpoint_name, \n",
272 |     "                 model_data_download_timeout=MODEL_DATA_DOWNLOAD_TIMEOUT, \n",
273 |     "                 container_startup_health_check_timeout=CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT)"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "id": "46c497c6-2bd6-4e7c-a794-029c51bdc301",
279 |    "metadata": {},
280 |    "source": [
281 |     "### Test SageMaker endpoint for inference"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "id": "2ec45951-bd9f-4096-812e-8ded522f6303",
288 |    "metadata": {
289 |     "tags": []
290 |    },
291 |    "outputs": [],
292 |    "source": [
293 |     "# ENDPOINT_NAME = 'huggingface-textembedding-gpt-j-6b-fp16-1680825746'"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": null,
299 |    "id": "929dfc15-0b3a-490c-b856-6df11ca10f0d",
300 |    "metadata": {
301 |     "tags": []
302 |    },
303 |    "outputs": [],
304 |    "source": [
305 |     "query = 'what is the meaning of life according to an ant?'"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": null,
311 |    "id": "1c06fb32-9246-4a3d-94e3-3cd57a4ba5e2",
312 |    "metadata": {
313 |     "tags": []
314 |    },
315 |    "outputs": [],
316 |    "source": [
317 |     "payload = {'text_inputs': [query]}\n",
318 |     "payload = json.dumps(payload).encode('utf-8')"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "id": "6d834078-f340-4ffd-8239-18774069574c",
325 |    "metadata": {
326 |     "tags": []
327 |    },
328 |    "outputs": [],
329 |    "source": [
330 |     "%%time\n",
331 |     "\n",
332 |     "response = client.invoke_endpoint(EndpointName=endpoint_name,\n",
333 |     "                                  ContentType='application/json',\n",
334 |     "                                  Body=payload)\n",
335 |     "    "
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "markdown",
340 |    "id": "213b5ef6-bf75-4d98-a096-da26168f696f",
341 |    "metadata": {},
342 |    "source": [
343 |     "##### Parse model response to extract query embedding"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": null,
349 |    "id": "757f986f-7986-452d-9946-2ea411a883a7",
350 |    "metadata": {
351 |     "tags": []
352 |    },
353 |    "outputs": [],
354 |    "source": [
355 |     "body = json.loads(response['Body'].read())\n",
356 |     "embedding = body['embedding'][0]\n",
357 |     "embedding"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": null,
363 |    "id": "5c609bfd-8c56-4bd3-8184-759ac1b67174",
364 |    "metadata": {},
365 |    "outputs": [],
366 |    "source": []
367 |   }
368 |  ],
369 |  "metadata": {
370 |   "availableInstances": [
371 |    {
372 |     "_defaultOrder": 0,
373 |     "_isFastLaunch": true,
374 |     "category": "General purpose",
375 |     "gpuNum": 0,
376 |     "hideHardwareSpecs": false,
377 |     "memoryGiB": 4,
378 |     "name": "ml.t3.medium",
379 |     "vcpuNum": 2
380 |    },
381 |    {
382 |     "_defaultOrder": 1,
383 |     "_isFastLaunch": false,
384 |     "category": "General purpose",
385 |     "gpuNum": 0,
386 |     "hideHardwareSpecs": false,
387 |     "memoryGiB": 8,
388 |     "name": "ml.t3.large",
389 |     "vcpuNum": 2
390 |    },
391 |    {
392 |     "_defaultOrder": 2,
393 |     "_isFastLaunch": false,
394 |     "category": "General purpose",
395 |     "gpuNum": 0,
396 |     "hideHardwareSpecs": false,
397 |     "memoryGiB": 16,
398 |     "name": "ml.t3.xlarge",
399 |     "vcpuNum": 4
400 |    },
401 |    {
402 |     "_defaultOrder": 3,
403 |     "_isFastLaunch": false,
404 |     "category": "General purpose",
405 |     "gpuNum": 0,
406 |     "hideHardwareSpecs": false,
407 |     "memoryGiB": 32,
408 |     "name": "ml.t3.2xlarge",
409 |     "vcpuNum": 8
410 |    },
411 |    {
412 |     "_defaultOrder": 4,
413 |     "_isFastLaunch": true,
414 |     "category": "General purpose",
415 |     "gpuNum": 0,
416 |     "hideHardwareSpecs": false,
417 |     "memoryGiB": 8,
418 |     "name": "ml.m5.large",
419 |     "vcpuNum": 2
420 |    },
421 |    {
422 |     "_defaultOrder": 5,
423 |     "_isFastLaunch": false,
424 |     "category": "General purpose",
425 |     "gpuNum": 0,
426 |     "hideHardwareSpecs": false,
427 |     "memoryGiB": 16,
428 |     "name": "ml.m5.xlarge",
429 |     "vcpuNum": 4
430 |    },
431 |    {
432 |     "_defaultOrder": 6,
433 |     "_isFastLaunch": false,
434 |     "category": "General purpose",
435 |     "gpuNum": 0,
436 |     "hideHardwareSpecs": false,
437 |     "memoryGiB": 32,
438 |     "name": "ml.m5.2xlarge",
439 |     "vcpuNum": 8
440 |    },
441 |    {
442 |     "_defaultOrder": 7,
443 |     "_isFastLaunch": false,
444 |     "category": "General purpose",
445 |     "gpuNum": 0,
446 |     "hideHardwareSpecs": false,
447 |     "memoryGiB": 64,
448 |     "name": "ml.m5.4xlarge",
449 |     "vcpuNum": 16
450 |    },
451 |    {
452 |     "_defaultOrder": 8,
453 |     "_isFastLaunch": false,
454 |     "category": "General purpose",
455 |     "gpuNum": 0,
456 |     "hideHardwareSpecs": false,
457 |     "memoryGiB": 128,
458 |     "name": "ml.m5.8xlarge",
459 |     "vcpuNum": 32
460 |    },
461 |    {
462 |     "_defaultOrder": 9,
463 |     "_isFastLaunch": false,
464 |     "category": "General purpose",
465 |     "gpuNum": 0,
466 |     "hideHardwareSpecs": false,
467 |     "memoryGiB": 192,
468 |     "name": "ml.m5.12xlarge",
469 |     "vcpuNum": 48
470 |    },
471 |    {
472 |     "_defaultOrder": 10,
473 |     "_isFastLaunch": false,
474 |     "category": "General purpose",
475 |     "gpuNum": 0,
476 |     "hideHardwareSpecs": false,
477 |     "memoryGiB": 256,
478 |     "name": "ml.m5.16xlarge",
479 |     "vcpuNum": 64
480 |    },
481 |    {
482 |     "_defaultOrder": 11,
483 |     "_isFastLaunch": false,
484 |     "category": "General purpose",
485 |     "gpuNum": 0,
486 |     "hideHardwareSpecs": false,
487 |     "memoryGiB": 384,
488 |     "name": "ml.m5.24xlarge",
489 |     "vcpuNum": 96
490 |    },
491 |    {
492 |     "_defaultOrder": 12,
493 |     "_isFastLaunch": false,
494 |     "category": "General purpose",
495 |     "gpuNum": 0,
496 |     "hideHardwareSpecs": false,
497 |     "memoryGiB": 8,
498 |     "name": "ml.m5d.large",
499 |     "vcpuNum": 2
500 |    },
501 |    {
502 |     "_defaultOrder": 13,
503 |     "_isFastLaunch": false,
504 |     "category": "General purpose",
505 |     "gpuNum": 0,
506 |     "hideHardwareSpecs": false,
507 |     "memoryGiB": 16,
508 |     "name": "ml.m5d.xlarge",
509 |     "vcpuNum": 4
510 |    },
511 |    {
512 |     "_defaultOrder": 14,
513 |     "_isFastLaunch": false,
514 |     "category": "General purpose",
515 |     "gpuNum": 0,
516 |     "hideHardwareSpecs": false,
517 |     "memoryGiB": 32,
518 |     "name": "ml.m5d.2xlarge",
519 |     "vcpuNum": 8
520 |    },
521 |    {
522 |     "_defaultOrder": 15,
523 |     "_isFastLaunch": false,
524 |     "category": "General purpose",
525 |     "gpuNum": 0,
526 |     "hideHardwareSpecs": false,
527 |     "memoryGiB": 64,
528 |     "name": "ml.m5d.4xlarge",
529 |     "vcpuNum": 16
530 |    },
531 |    {
532 |     "_defaultOrder": 16,
533 |     "_isFastLaunch": false,
534 |     "category": "General purpose",
535 |     "gpuNum": 0,
536 |     "hideHardwareSpecs": false,
537 |     "memoryGiB": 128,
538 |     "name": "ml.m5d.8xlarge",
539 |     "vcpuNum": 32
540 |    },
541 |    {
542 |     "_defaultOrder": 17,
543 |     "_isFastLaunch": false,
544 |     "category": "General purpose",
545 |     "gpuNum": 0,
546 |     "hideHardwareSpecs": false,
547 |     "memoryGiB": 192,
548 |     "name": "ml.m5d.12xlarge",
549 |     "vcpuNum": 48
550 |    },
551 |    {
552 |     "_defaultOrder": 18,
553 |     "_isFastLaunch": false,
554 |     "category": "General purpose",
555 |     "gpuNum": 0,
556 |     "hideHardwareSpecs": false,
557 |     "memoryGiB": 256,
558 |     "name": "ml.m5d.16xlarge",
559 |     "vcpuNum": 64
560 |    },
561 |    {
562 |     "_defaultOrder": 19,
563 |     "_isFastLaunch": false,
564 |     "category": "General purpose",
565 |     "gpuNum": 0,
566 |     "hideHardwareSpecs": false,
567 |     "memoryGiB": 384,
568 |     "name": "ml.m5d.24xlarge",
569 |     "vcpuNum": 96
570 |    },
571 |    {
572 |     "_defaultOrder": 20,
573 |     "_isFastLaunch": false,
574 |     "category": "General purpose",
575 |     "gpuNum": 0,
576 |     "hideHardwareSpecs": true,
577 |     "memoryGiB": 0,
578 |     "name": "ml.geospatial.interactive",
579 |     "supportedImageNames": [
580 |      "sagemaker-geospatial-v1-0"
581 |     ],
582 |     "vcpuNum": 0
583 |    },
584 |    {
585 |     "_defaultOrder": 21,
586 |     "_isFastLaunch": true,
587 |     "category": "Compute optimized",
588 |     "gpuNum": 0,
589 |     "hideHardwareSpecs": false,
590 |     "memoryGiB": 4,
591 |     "name": "ml.c5.large",
592 |     "vcpuNum": 2
593 |    },
594 |    {
595 |     "_defaultOrder": 22,
596 |     "_isFastLaunch": false,
597 |     "category": "Compute optimized",
598 |     "gpuNum": 0,
599 |     "hideHardwareSpecs": false,
600 |     "memoryGiB": 8,
601 |     "name": "ml.c5.xlarge",
602 |     "vcpuNum": 4
603 |    },
604 |    {
605 |     "_defaultOrder": 23,
606 |     "_isFastLaunch": false,
607 |     "category": "Compute optimized",
608 |     "gpuNum": 0,
609 |     "hideHardwareSpecs": false,
610 |     "memoryGiB": 16,
611 |     "name": "ml.c5.2xlarge",
612 |     "vcpuNum": 8
613 |    },
614 |    {
615 |     "_defaultOrder": 24,
616 |     "_isFastLaunch": false,
617 |     "category": "Compute optimized",
618 |     "gpuNum": 0,
619 |     "hideHardwareSpecs": false,
620 |     "memoryGiB": 32,
621 |     "name": "ml.c5.4xlarge",
622 |     "vcpuNum": 16
623 |    },
624 |    {
625 |     "_defaultOrder": 25,
626 |     "_isFastLaunch": false,
627 |     "category": "Compute optimized",
628 |     "gpuNum": 0,
629 |     "hideHardwareSpecs": false,
630 |     "memoryGiB": 72,
631 |     "name": "ml.c5.9xlarge",
632 |     "vcpuNum": 36
633 |    },
634 |    {
635 |     "_defaultOrder": 26,
636 |     "_isFastLaunch": false,
637 |     "category": "Compute optimized",
638 |     "gpuNum": 0,
639 |     "hideHardwareSpecs": false,
640 |     "memoryGiB": 96,
641 |     "name": "ml.c5.12xlarge",
642 |     "vcpuNum": 48
643 |    },
644 |    {
645 |     "_defaultOrder": 27,
646 |     "_isFastLaunch": false,
647 |     "category": "Compute optimized",
648 |     "gpuNum": 0,
649 |     "hideHardwareSpecs": false,
650 |     "memoryGiB": 144,
651 |     "name": "ml.c5.18xlarge",
652 |     "vcpuNum": 72
653 |    },
654 |    {
655 |     "_defaultOrder": 28,
656 |     "_isFastLaunch": false,
657 |     "category": "Compute optimized",
658 |     "gpuNum": 0,
659 |     "hideHardwareSpecs": false,
660 |     "memoryGiB": 192,
661 |     "name": "ml.c5.24xlarge",
662 |     "vcpuNum": 96
663 |    },
664 |    {
665 |     "_defaultOrder": 29,
666 |     "_isFastLaunch": true,
667 |     "category": "Accelerated computing",
668 |     "gpuNum": 1,
669 |     "hideHardwareSpecs": false,
670 |     "memoryGiB": 16,
671 |     "name": "ml.g4dn.xlarge",
672 |     "vcpuNum": 4
673 |    },
674 |    {
675 |     "_defaultOrder": 30,
676 |     "_isFastLaunch": false,
677 |     "category": "Accelerated computing",
678 |     "gpuNum": 1,
679 |     "hideHardwareSpecs": false,
680 |     "memoryGiB": 32,
681 |     "name": "ml.g4dn.2xlarge",
682 |     "vcpuNum": 8
683 |    },
684 |    {
685 |     "_defaultOrder": 31,
686 |     "_isFastLaunch": false,
687 |     "category": "Accelerated computing",
688 |     "gpuNum": 1,
689 |     "hideHardwareSpecs": false,
690 |     "memoryGiB": 64,
691 |     "name": "ml.g4dn.4xlarge",
692 |     "vcpuNum": 16
693 |    },
694 |    {
695 |     "_defaultOrder": 32,
696 |     "_isFastLaunch": false,
697 |     "category": "Accelerated computing",
698 |     "gpuNum": 1,
699 |     "hideHardwareSpecs": false,
700 |     "memoryGiB": 128,
701 |     "name": "ml.g4dn.8xlarge",
702 |     "vcpuNum": 32
703 |    },
704 |    {
705 |     "_defaultOrder": 33,
706 |     "_isFastLaunch": false,
707 |     "category": "Accelerated computing",
708 |     "gpuNum": 4,
709 |     "hideHardwareSpecs": false,
710 |     "memoryGiB": 192,
711 |     "name": "ml.g4dn.12xlarge",
712 |     "vcpuNum": 48
713 |    },
714 |    {
715 |     "_defaultOrder": 34,
716 |     "_isFastLaunch": false,
717 |     "category": "Accelerated computing",
718 |     "gpuNum": 1,
719 |     "hideHardwareSpecs": false,
720 |     "memoryGiB": 256,
721 |     "name": "ml.g4dn.16xlarge",
722 |     "vcpuNum": 64
723 |    },
724 |    {
725 |     "_defaultOrder": 35,
726 |     "_isFastLaunch": false,
727 |     "category": "Accelerated computing",
728 |     "gpuNum": 1,
729 |     "hideHardwareSpecs": false,
730 |     "memoryGiB": 61,
731 |     "name": "ml.p3.2xlarge",
732 |     "vcpuNum": 8
733 |    },
734 |    {
735 |     "_defaultOrder": 36,
736 |     "_isFastLaunch": false,
737 |     "category": "Accelerated computing",
738 |     "gpuNum": 4,
739 |     "hideHardwareSpecs": false,
740 |     "memoryGiB": 244,
741 |     "name": "ml.p3.8xlarge",
742 |     "vcpuNum": 32
743 |    },
744 |    {
745 |     "_defaultOrder": 37,
746 |     "_isFastLaunch": false,
747 |     "category": "Accelerated computing",
748 |     "gpuNum": 8,
749 |     "hideHardwareSpecs": false,
750 |     "memoryGiB": 488,
751 |     "name": "ml.p3.16xlarge",
752 |     "vcpuNum": 64
753 |    },
754 |    {
755 |     "_defaultOrder": 38,
756 |     "_isFastLaunch": false,
757 |     "category": "Accelerated computing",
758 |     "gpuNum": 8,
759 |     "hideHardwareSpecs": false,
760 |     "memoryGiB": 768,
761 |     "name": "ml.p3dn.24xlarge",
762 |     "vcpuNum": 96
763 |    },
764 |    {
765 |     "_defaultOrder": 39,
766 |     "_isFastLaunch": false,
767 |     "category": "Memory Optimized",
768 |     "gpuNum": 0,
769 |     "hideHardwareSpecs": false,
770 |     "memoryGiB": 16,
771 |     "name": "ml.r5.large",
772 |     "vcpuNum": 2
773 |    },
774 |    {
775 |     "_defaultOrder": 40,
776 |     "_isFastLaunch": false,
777 |     "category": "Memory Optimized",
778 |     "gpuNum": 0,
779 |     "hideHardwareSpecs": false,
780 |     "memoryGiB": 32,
781 |     "name": "ml.r5.xlarge",
782 |     "vcpuNum": 4
783 |    },
784 |    {
785 |     "_defaultOrder": 41,
786 |     "_isFastLaunch": false,
787 |     "category": "Memory Optimized",
788 |     "gpuNum": 0,
789 |     "hideHardwareSpecs": false,
790 |     "memoryGiB": 64,
791 |     "name": "ml.r5.2xlarge",
792 |     "vcpuNum": 8
793 |    },
794 |    {
795 |     "_defaultOrder": 42,
796 |     "_isFastLaunch": false,
797 |     "category": "Memory Optimized",
798 |     "gpuNum": 0,
799 |     "hideHardwareSpecs": false,
800 |     "memoryGiB": 128,
801 |     "name": "ml.r5.4xlarge",
802 |     "vcpuNum": 16
803 |    },
804 |    {
805 |     "_defaultOrder": 43,
806 |     "_isFastLaunch": false,
807 |     "category": "Memory Optimized",
808 |     "gpuNum": 0,
809 |     "hideHardwareSpecs": false,
810 |     "memoryGiB": 256,
811 |     "name": "ml.r5.8xlarge",
812 |     "vcpuNum": 32
813 |    },
814 |    {
815 |     "_defaultOrder": 44,
816 |     "_isFastLaunch": false,
817 |     "category": "Memory Optimized",
818 |     "gpuNum": 0,
819 |     "hideHardwareSpecs": false,
820 |     "memoryGiB": 384,
821 |     "name": "ml.r5.12xlarge",
822 |     "vcpuNum": 48
823 |    },
824 |    {
825 |     "_defaultOrder": 45,
826 |     "_isFastLaunch": false,
827 |     "category": "Memory Optimized",
828 |     "gpuNum": 0,
829 |     "hideHardwareSpecs": false,
830 |     "memoryGiB": 512,
831 |     "name": "ml.r5.16xlarge",
832 |     "vcpuNum": 64
833 |    },
834 |    {
835 |     "_defaultOrder": 46,
836 |     "_isFastLaunch": false,
837 |     "category": "Memory Optimized",
838 |     "gpuNum": 0,
839 |     "hideHardwareSpecs": false,
840 |     "memoryGiB": 768,
841 |     "name": "ml.r5.24xlarge",
842 |     "vcpuNum": 96
843 |    },
844 |    {
845 |     "_defaultOrder": 47,
846 |     "_isFastLaunch": false,
847 |     "category": "Accelerated computing",
848 |     "gpuNum": 1,
849 |     "hideHardwareSpecs": false,
850 |     "memoryGiB": 16,
851 |     "name": "ml.g5.xlarge",
852 |     "vcpuNum": 4
853 |    },
854 |    {
855 |     "_defaultOrder": 48,
856 |     "_isFastLaunch": false,
857 |     "category": "Accelerated computing",
858 |     "gpuNum": 1,
859 |     "hideHardwareSpecs": false,
860 |     "memoryGiB": 32,
861 |     "name": "ml.g5.2xlarge",
862 |     "vcpuNum": 8
863 |    },
864 |    {
865 |     "_defaultOrder": 49,
866 |     "_isFastLaunch": false,
867 |     "category": "Accelerated computing",
868 |     "gpuNum": 1,
869 |     "hideHardwareSpecs": false,
870 |     "memoryGiB": 64,
871 |     "name": "ml.g5.4xlarge",
872 |     "vcpuNum": 16
873 |    },
874 |    {
875 |     "_defaultOrder": 50,
876 |     "_isFastLaunch": false,
877 |     "category": "Accelerated computing",
878 |     "gpuNum": 1,
879 |     "hideHardwareSpecs": false,
880 |     "memoryGiB": 128,
881 |     "name": "ml.g5.8xlarge",
882 |     "vcpuNum": 32
883 |    },
884 |    {
885 |     "_defaultOrder": 51,
886 |     "_isFastLaunch": false,
887 |     "category": "Accelerated computing",
888 |     "gpuNum": 1,
889 |     "hideHardwareSpecs": false,
890 |     "memoryGiB": 256,
891 |     "name": "ml.g5.16xlarge",
892 |     "vcpuNum": 64
893 |    },
894 |    {
895 |     "_defaultOrder": 52,
896 |     "_isFastLaunch": false,
897 |     "category": "Accelerated computing",
898 |     "gpuNum": 4,
899 |     "hideHardwareSpecs": false,
900 |     "memoryGiB": 192,
901 |     "name": "ml.g5.12xlarge",
902 |     "vcpuNum": 48
903 |    },
904 |    {
905 |     "_defaultOrder": 53,
906 |     "_isFastLaunch": false,
907 |     "category": "Accelerated computing",
908 |     "gpuNum": 4,
909 |     "hideHardwareSpecs": false,
910 |     "memoryGiB": 384,
911 |     "name": "ml.g5.24xlarge",
912 |     "vcpuNum": 96
913 |    },
914 |    {
915 |     "_defaultOrder": 54,
916 |     "_isFastLaunch": false,
917 |     "category": "Accelerated computing",
918 |     "gpuNum": 8,
919 |     "hideHardwareSpecs": false,
920 |     "memoryGiB": 768,
921 |     "name": "ml.g5.48xlarge",
922 |     "vcpuNum": 192
923 |    },
924 |    {
925 |     "_defaultOrder": 55,
926 |     "_isFastLaunch": false,
927 |     "category": "Accelerated computing",
928 |     "gpuNum": 8,
929 |     "hideHardwareSpecs": false,
930 |     "memoryGiB": 1152,
931 |     "name": "ml.p4d.24xlarge",
932 |     "vcpuNum": 96
933 |    },
934 |    {
935 |     "_defaultOrder": 56,
936 |     "_isFastLaunch": false,
937 |     "category": "Accelerated computing",
938 |     "gpuNum": 8,
939 |     "hideHardwareSpecs": false,
940 |     "memoryGiB": 1152,
941 |     "name": "ml.p4de.24xlarge",
942 |     "vcpuNum": 96
943 |    }
944 |   ],
945 |   "instance_type": "ml.t3.medium",
946 |   "kernelspec": {
947 |    "display_name": "Python 3 (Data Science)",
948 |    "language": "python",
949 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
950 |   },
951 |   "language_info": {
952 |    "codemirror_mode": {
953 |     "name": "ipython",
954 |     "version": 3
955 |    },
956 |    "file_extension": ".py",
957 |    "mimetype": "text/x-python",
958 |    "name": "python",
959 |    "nbconvert_exporter": "python",
960 |    "pygments_lexer": "ipython3",
961 |    "version": "3.7.10"
962 |   }
963 |  },
964 |  "nbformat": 4,
965 |  "nbformat_minor": 5
966 | }
967 | 


--------------------------------------------------------------------------------
/02-deploy-text-generation-model.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "id": "ce1c37fc-91a5-49df-804f-92319ed8a678",
   6 |    "metadata": {},
   7 |    "source": [
   8 |     "## Deploy Text Generation Model (FLAN-T5 XXL)"
   9 |    ]
  10 |   },
  11 |   {
  12 |    "cell_type": "markdown",
  13 |    "id": "cd154824-add4-46d3-a47d-a3dc317c5847",
  14 |    "metadata": {},
  15 |    "source": [
  16 |     "#### Imports "
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "code",
  21 |    "execution_count": 2,
  22 |    "id": "66e8ab21-8a0f-405c-a706-3aa5608a04a8",
  23 |    "metadata": {
  24 |     "tags": []
  25 |    },
  26 |    "outputs": [],
  27 |    "source": [
  28 |     "from sagemaker.jumpstart.notebook_utils import list_jumpstart_models\n",
  29 |     "from sagemaker.predictor import Predictor\n",
  30 |     "from sagemaker import get_execution_role\n",
  31 |     "from sagemaker import ModelPackage\n",
  32 |     "from sagemaker.model import Model\n",
  33 |     "from sagemaker import image_uris \n",
  34 |     "from sagemaker import model_uris\n",
  35 |     "import numpy as np\n",
  36 |     "import sagemaker\n",
  37 |     "import logging\n",
  38 |     "import boto3\n",
  39 |     "import time\n",
  40 |     "import json"
  41 |    ]
  42 |   },
  43 |   {
  44 |    "cell_type": "markdown",
  45 |    "id": "38c09c3f-4302-40c2-8191-c730c32b5dbc",
  46 |    "metadata": {},
  47 |    "source": [
  48 |     "##### Setup logging "
  49 |    ]
  50 |   },
  51 |   {
  52 |    "cell_type": "code",
  53 |    "execution_count": 3,
  54 |    "id": "594a1c5c-a125-4cce-a538-3cfc840edcd7",
  55 |    "metadata": {
  56 |     "tags": []
  57 |    },
  58 |    "outputs": [],
  59 |    "source": [
  60 |     "logger = logging.getLogger('sagemaker')\n",
  61 |     "logger.setLevel(logging.DEBUG)\n",
  62 |     "logger.addHandler(logging.StreamHandler())"
  63 |    ]
  64 |   },
  65 |   {
  66 |    "cell_type": "markdown",
  67 |    "id": "cd0e20e0-9c4b-4487-ae23-995cf33807a2",
  68 |    "metadata": {},
  69 |    "source": [
  70 |     "##### Log versions of dependencies "
  71 |    ]
  72 |   },
  73 |   {
  74 |    "cell_type": "code",
  75 |    "execution_count": 4,
  76 |    "id": "48c47fd6-fd9f-4da8-94d1-7c0b3d23490b",
  77 |    "metadata": {
  78 |     "tags": []
  79 |    },
  80 |    "outputs": [
  81 |     {
  82 |      "name": "stderr",
  83 |      "output_type": "stream",
  84 |      "text": [
  85 |       "Using sagemaker==2.145.0\n",
  86 |       "Using boto3==1.26.111\n"
  87 |      ]
  88 |     }
  89 |    ],
  90 |    "source": [
  91 |     "logger.info(f'Using sagemaker=={sagemaker.__version__}')\n",
  92 |     "logger.info(f'Using boto3=={boto3.__version__}')"
  93 |    ]
  94 |   },
  95 |   {
  96 |    "cell_type": "markdown",
  97 |    "id": "38c2bb8a-a711-4c2c-aaf0-04cd2a22bc08",
  98 |    "metadata": {},
  99 |    "source": [
 100 |     "#### Setup essentials "
 101 |    ]
 102 |   },
 103 |   {
 104 |    "cell_type": "code",
 105 |    "execution_count": 5,
 106 |    "id": "6718b0cf-42bf-42f8-83a5-89c2189891a5",
 107 |    "metadata": {
 108 |     "tags": []
 109 |    },
 110 |    "outputs": [
 111 |     {
 112 |      "name": "stderr",
 113 |      "output_type": "stream",
 114 |      "text": [
 115 |       "Region = us-east-1\n"
 116 |      ]
 117 |     }
 118 |    ],
 119 |    "source": [
 120 |     "region = boto3.Session().region_name\n",
 121 |     "logger.info(f'Region = {region}')"
 122 |    ]
 123 |   },
 124 |   {
 125 |    "cell_type": "markdown",
 126 |    "id": "93779cf2-1d73-48b7-8e9c-7a58a0d1e23a",
 127 |    "metadata": {},
 128 |    "source": [
 129 |     "##### Get list of language models available in JS model hub"
 130 |    ]
 131 |   },
 132 |   {
 133 |    "cell_type": "code",
 134 |    "execution_count": 6,
 135 |    "id": "ef740ccc-04b9-49cd-a9f2-b00a1638d7a5",
 136 |    "metadata": {
 137 |     "tags": []
 138 |    },
 139 |    "outputs": [
 140 |     {
 141 |      "name": "stderr",
 142 |      "output_type": "stream",
 143 |      "text": [
 144 |       "Total number of models in SageMaker JumpStart hub = 679\n"
 145 |      ]
 146 |     }
 147 |    ],
 148 |    "source": [
 149 |     "models = list_jumpstart_models()\n",
 150 |     "logger.info(f'Total number of models in SageMaker JumpStart hub = {len(models)}')"
 151 |    ]
 152 |   },
 153 |   {
 154 |    "cell_type": "markdown",
 155 |    "id": "bf7702b6-31ca-4166-9b80-27f2cd9019fc",
 156 |    "metadata": {},
 157 |    "source": [
 158 |     "##### Setup inference deployment config params"
 159 |    ]
 160 |   },
 161 |   {
 162 |    "cell_type": "code",
 163 |    "execution_count": 7,
 164 |    "id": "44235c29-162b-468d-8a7f-7024a852100e",
 165 |    "metadata": {},
 166 |    "outputs": [
 167 |     {
 168 |      "name": "stderr",
 169 |      "output_type": "stream",
 170 |      "text": [
 171 |       "Role => arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628\n"
 172 |      ]
 173 |     }
 174 |    ],
 175 |    "source": [
 176 |     "MODEL_ID = 'huggingface-text2text-flan-t5-xxl'  # this is hard-coded\n",
 177 |     "MODEL_VERSION = '*'\n",
 178 |     "INSTANCE_TYPE = 'ml.g4dn.12xlarge'\n",
 179 |     "INSTANCE_COUNT = 1\n",
 180 |     "IMAGE_SCOPE = 'inference'\n",
 181 |     "MODEL_DATA_DOWNLOAD_TIMEOUT = 3600  # in seconds\n",
 182 |     "CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT = 3600\n",
 183 |     "CONTENT_TYPE = 'application/json'\n",
 184 |     "\n",
 185 |     "# set up roles and clients \n",
 186 |     "client = boto3.client('sagemaker-runtime')\n",
 187 |     "ROLE = get_execution_role()\n",
 188 |     "logger.info(f'Role => {ROLE}')"
 189 |    ]
 190 |   },
 191 |   {
 192 |    "cell_type": "code",
 193 |    "execution_count": 8,
 194 |    "id": "d5647e3c-8d3f-4bac-8229-3c5a2de5233b",
 195 |    "metadata": {},
 196 |    "outputs": [
 197 |     {
 198 |      "name": "stderr",
 199 |      "output_type": "stream",
 200 |      "text": [
 201 |       "Endpoint name: flan-xxl-1686852282\n"
 202 |      ]
 203 |     }
 204 |    ],
 205 |    "source": [
 206 |     "unix_time = int(time.time())\n",
 207 |     "\n",
 208 |     "endpoint_name = f'flan-xxl-{unix_time}'\n",
 209 |     "logger.info(f'Endpoint name: {endpoint_name}')"
 210 |    ]
 211 |   },
 212 |   {
 213 |    "cell_type": "markdown",
 214 |    "id": "97b8cdaf-6b26-4fa4-8d01-98c14c7f7f97",
 215 |    "metadata": {},
 216 |    "source": [
 217 |     "#### Retrieve Image and Model URIs"
 218 |    ]
 219 |   },
 220 |   {
 221 |    "cell_type": "code",
 222 |    "execution_count": 9,
 223 |    "id": "fe5b3bcc-3ed9-4a8e-bc8e-32fcaab25322",
 224 |    "metadata": {},
 225 |    "outputs": [
 226 |     {
 227 |      "name": "stderr",
 228 |      "output_type": "stream",
 229 |      "text": [
 230 |       "Deploy image URI => 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04\n"
 231 |      ]
 232 |     }
 233 |    ],
 234 |    "source": [
 235 |     "deploy_image_uri = image_uris.retrieve(region=None, \n",
 236 |     "                                       framework=None, \n",
 237 |     "                                       image_scope=IMAGE_SCOPE, \n",
 238 |     "                                       model_id=MODEL_ID, \n",
 239 |     "                                       model_version=MODEL_VERSION, \n",
 240 |     "                                       instance_type=INSTANCE_TYPE)\n",
 241 |     "logger.info(f'Deploy image URI => {deploy_image_uri}')"
 242 |    ]
 243 |   },
 244 |   {
 245 |    "cell_type": "code",
 246 |    "execution_count": 10,
 247 |    "id": "b7abadcf-e5f1-4253-b1e0-fdda825b0099",
 248 |    "metadata": {},
 249 |    "outputs": [
 250 |     {
 251 |      "name": "stderr",
 252 |      "output_type": "stream",
 253 |      "text": [
 254 |       "Model URI => s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xxl.tar.gz\n"
 255 |      ]
 256 |     }
 257 |    ],
 258 |    "source": [
 259 |     "model_uri = model_uris.retrieve(model_id=MODEL_ID, \n",
 260 |     "                                model_version=MODEL_VERSION, \n",
 261 |     "                                model_scope=IMAGE_SCOPE)\n",
 262 |     "logger.info(f'Model URI => {model_uri}')"
 263 |    ]
 264 |   },
 265 |   {
 266 |    "cell_type": "code",
 267 |    "execution_count": 11,
 268 |    "id": "a24ad3db-6a79-453a-a74f-53d7d9bb5a8f",
 269 |    "metadata": {
 270 |     "tags": []
 271 |    },
 272 |    "outputs": [],
 273 |    "source": [
 274 |     "env = {\n",
 275 |     "    'SAGEMAKER_MODEL_SERVER_TIMEOUT': str(3600),\n",
 276 |     "    'MODEL_CACHE_ROOT': '/opt/ml/model', \n",
 277 |     "    'SAGEMAKER_ENV': '1',\n",
 278 |     "    'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',\n",
 279 |     "    'SAGEMAKER_PROGRAM': 'inference.py',\n",
 280 |     "    'SAGEMAKER_MODEL_SERVER_WORKERS': '1', \n",
 281 |     "    'TS_DEFAULT_WORKERS_PER_MODEL': '1', \n",
 282 |     "}"
 283 |    ]
 284 |   },
 285 |   {
 286 |    "cell_type": "markdown",
 287 |    "id": "70caec84-ece2-4f96-9c71-812eeaa9215d",
 288 |    "metadata": {},
 289 |    "source": [
 290 |     "#### Create SageMaker model"
 291 |    ]
 292 |   },
 293 |   {
 294 |    "cell_type": "code",
 295 |    "execution_count": 12,
 296 |    "id": "9dba6599-3a10-40d4-96d9-5af82823131e",
 297 |    "metadata": {
 298 |     "tags": []
 299 |    },
 300 |    "outputs": [],
 301 |    "source": [
 302 |     "model_name = endpoint_name.replace('huggingface-textgeneration2-gpt-', '')\n",
 303 |     "model = Model(image_uri=deploy_image_uri, \n",
 304 |     "              model_data=model_uri, \n",
 305 |     "              role=ROLE, \n",
 306 |     "              predictor_cls=Predictor, \n",
 307 |     "              name=model_name, \n",
 308 |     "              env=env)"
 309 |    ]
 310 |   },
 311 |   {
 312 |    "cell_type": "markdown",
 313 |    "id": "5d119629-e939-4a3a-896d-73dc33e57187",
 314 |    "metadata": {},
 315 |    "source": [
 316 |     "#### Deploy text generation model as SageMaker endpoint for real-time synchronous inference"
 317 |    ]
 318 |   },
 319 |   {
 320 |    "cell_type": "code",
 321 |    "execution_count": null,
 322 |    "id": "eb07c4cb-cae1-4af7-9a86-38d9e6851c73",
 323 |    "metadata": {
 324 |     "tags": []
 325 |    },
 326 |    "outputs": [
 327 |     {
 328 |      "name": "stderr",
 329 |      "output_type": "stream",
 330 |      "text": [
 331 |       "Creating model with name: flan-xxl-1686852282\n",
 332 |       "CreateModel request: {\n",
 333 |       "    \"ModelName\": \"flan-xxl-1686852282\",\n",
 334 |       "    \"ExecutionRoleArn\": \"arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628\",\n",
 335 |       "    \"PrimaryContainer\": {\n",
 336 |       "        \"Image\": \"763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04\",\n",
 337 |       "        \"Environment\": {\n",
 338 |       "            \"SAGEMAKER_MODEL_SERVER_TIMEOUT\": \"3600\",\n",
 339 |       "            \"MODEL_CACHE_ROOT\": \"/opt/ml/model\",\n",
 340 |       "            \"SAGEMAKER_ENV\": \"1\",\n",
 341 |       "            \"SAGEMAKER_SUBMIT_DIRECTORY\": \"/opt/ml/model/code/\",\n",
 342 |       "            \"SAGEMAKER_PROGRAM\": \"inference.py\",\n",
 343 |       "            \"SAGEMAKER_MODEL_SERVER_WORKERS\": \"1\",\n",
 344 |       "            \"TS_DEFAULT_WORKERS_PER_MODEL\": \"1\"\n",
 345 |       "        },\n",
 346 |       "        \"ModelDataUrl\": \"s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xxl.tar.gz\"\n",
 347 |       "    },\n",
 348 |       "    \"Tags\": [\n",
 349 |       "        {\n",
 350 |       "            \"Key\": \"aws-jumpstart-inference-model-uri\",\n",
 351 |       "            \"Value\": \"s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xxl.tar.gz\"\n",
 352 |       "        }\n",
 353 |       "    ]\n",
 354 |       "}\n",
 355 |       "Creating endpoint-config with name flan-xxl-1686852282\n",
 356 |       "Creating endpoint with name flan-xxl-1686852282\n"
 357 |      ]
 358 |     },
 359 |     {
 360 |      "name": "stdout",
 361 |      "output_type": "stream",
 362 |      "text": [
 363 |       "--------------------!CPU times: user 158 ms, sys: 17.9 ms, total: 176 ms\n",
 364 |       "Wall time: 10min 34s\n"
 365 |      ]
 366 |     }
 367 |    ],
 368 |    "source": [
 369 |     "%%time\n",
 370 |     "\n",
 371 |     "_ = model.deploy(initial_instance_count=INSTANCE_COUNT, \n",
 372 |     "                 instance_type=INSTANCE_TYPE, \n",
 373 |     "                 endpoint_name=endpoint_name, \n",
 374 |     "                 model_data_download_timeout=MODEL_DATA_DOWNLOAD_TIMEOUT, \n",
 375 |     "                 container_startup_health_check_timeout=CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT)"
 376 |    ]
 377 |   },
 378 |   {
 379 |    "cell_type": "markdown",
 380 |    "id": "3bed6d31-f6ae-41e4-9a56-23c7e99cd019",
 381 |    "metadata": {},
 382 |    "source": [
 383 |     "### II. Invoke SageMaker endpoint to test the deployed model for natural language understanding (NLU) and natural language generation (NLG) tasks"
 384 |    ]
 385 |   },
 386 |   {
 387 |    "cell_type": "markdown",
 388 |    "id": "1353ee9e-9beb-4bc7-acff-047c3ebf2038",
 389 |    "metadata": {},
 390 |    "source": [
 391 |     "***\n",
 392 |     "This model also supports many advanced parameters while performing inference. They include:\n",
 393 |     "\n",
 394 |     "* **max_length:** Model generates text until the output length (which includes the input context length) reaches `max_length`. If specified, it must be a positive integer.\n",
 395 |     "* **num_return_sequences:** Number of output sequences returned. If specified, it must be a positive integer.\n",
 396 |     "* **num_beams:** Number of beams used in the greedy search. If specified, it must be integer greater than or equal to `num_return_sequences`.\n",
 397 |     "* **no_repeat_ngram_size:** Model ensures that a sequence of words of `no_repeat_ngram_size` is not repeated in the output sequence. If specified, it must be a positive integer greater than 1.\n",
 398 |     "* **temperature:** Controls the randomness in the output. Higher temperature results in output sequence with low-probability words and lower temperature results in output sequence with high-probability words. If `temperature` -> 0, it results in greedy decoding. If specified, it must be a positive float.\n",
 399 |     "* **early_stopping:** If True, text generation is finished when all beam hypotheses reach the end of sentence token. If specified, it must be boolean.\n",
 400 |     "* **do_sample:** If True, sample the next word as per the likelihood. If specified, it must be boolean.\n",
 401 |     "* **top_k:** In each step of text generation, sample from only the `top_k` most likely words. If specified, it must be a positive integer.\n",
 402 |     "* **top_p:** In each step of text generation, sample from the smallest possible set of words with cumulative probability `top_p`. If specified, it must be a float between 0 and 1.\n",
 403 |     "* **seed:** Fix the randomized state for reproducibility. If specified, it must be an integer.\n",
 404 |     "\n",
 405 |     "We may specify any subset of the parameters mentioned above while invoking an endpoint. Next, we show an example of how to invoke endpoint with these arguments\n",
 406 |     "\n",
 407 |     "***"
 408 |    ]
 409 |   },
 410 |   {
 411 |    "cell_type": "code",
 412 |    "execution_count": null,
 413 |    "id": "5e5b8e2e-cd62-413d-b31c-a6004bf39b33",
 414 |    "metadata": {
 415 |     "tags": []
 416 |    },
 417 |    "outputs": [],
 418 |    "source": [
 419 |     "prompt = \"\"\"Me: hi\n",
 420 |     "AI: Hello. How can I help you?\n",
 421 |     "Me: How are you doing?\n",
 422 |     "AI:\n",
 423 |     "\"\"\""
 424 |    ]
 425 |   },
 426 |   {
 427 |    "cell_type": "code",
 428 |    "execution_count": null,
 429 |    "id": "56919d90-0546-4e68-a294-5b114abbd3aa",
 430 |    "metadata": {
 431 |     "tags": []
 432 |    },
 433 |    "outputs": [],
 434 |    "source": [
 435 |     "payload = {\n",
 436 |     "    'text_inputs': prompt,\n",
 437 |     "    'seed': 123,\n",
 438 |     "    'temperature': 0.1,\n",
 439 |     "    'no_repeat_ngram_size': 2,\n",
 440 |     "    'max_length': 128\n",
 441 |     "}"
 442 |    ]
 443 |   },
 444 |   {
 445 |    "cell_type": "code",
 446 |    "execution_count": null,
 447 |    "id": "461a651f-da5a-40b6-8202-d9097efb2f02",
 448 |    "metadata": {
 449 |     "tags": []
 450 |    },
 451 |    "outputs": [],
 452 |    "source": [
 453 |     "payload = json.dumps(payload).encode('utf-8')"
 454 |    ]
 455 |   },
 456 |   {
 457 |    "cell_type": "code",
 458 |    "execution_count": null,
 459 |    "id": "ddf94fd3-fa77-4a48-bc43-e2536e4b621c",
 460 |    "metadata": {
 461 |     "tags": []
 462 |    },
 463 |    "outputs": [],
 464 |    "source": [
 465 |     "%%time \n",
 466 |     "response = client.invoke_endpoint(EndpointName='huggingface-text2text-flan-t5-xl-1679769737', \n",
 467 |     "                                  ContentType=CONTENT_TYPE, \n",
 468 |     "                                  Body=payload)"
 469 |    ]
 470 |   },
 471 |   {
 472 |    "cell_type": "markdown",
 473 |    "id": "354e0f71-d4a7-4779-b269-044d808565d3",
 474 |    "metadata": {},
 475 |    "source": [
 476 |     "#### Parse response to extract completion"
 477 |    ]
 478 |   },
 479 |   {
 480 |    "cell_type": "code",
 481 |    "execution_count": null,
 482 |    "id": "5995a882-7ef1-4e9a-8bba-bcb627dab002",
 483 |    "metadata": {
 484 |     "tags": []
 485 |    },
 486 |    "outputs": [],
 487 |    "source": [
 488 |     "model_predictions = json.loads(response['Body'].read())\n",
 489 |     "completion = model_predictions['generated_texts'][0].strip()\n",
 490 |     "completion"
 491 |    ]
 492 |   },
 493 |   {
 494 |    "cell_type": "code",
 495 |    "execution_count": null,
 496 |    "id": "5863f215-b007-4e73-ba12-1e1ffb44a637",
 497 |    "metadata": {},
 498 |    "outputs": [],
 499 |    "source": []
 500 |   }
 501 |  ],
 502 |  "metadata": {
 503 |   "availableInstances": [
 504 |    {
 505 |     "_defaultOrder": 0,
 506 |     "_isFastLaunch": true,
 507 |     "category": "General purpose",
 508 |     "gpuNum": 0,
 509 |     "hideHardwareSpecs": false,
 510 |     "memoryGiB": 4,
 511 |     "name": "ml.t3.medium",
 512 |     "vcpuNum": 2
 513 |    },
 514 |    {
 515 |     "_defaultOrder": 1,
 516 |     "_isFastLaunch": false,
 517 |     "category": "General purpose",
 518 |     "gpuNum": 0,
 519 |     "hideHardwareSpecs": false,
 520 |     "memoryGiB": 8,
 521 |     "name": "ml.t3.large",
 522 |     "vcpuNum": 2
 523 |    },
 524 |    {
 525 |     "_defaultOrder": 2,
 526 |     "_isFastLaunch": false,
 527 |     "category": "General purpose",
 528 |     "gpuNum": 0,
 529 |     "hideHardwareSpecs": false,
 530 |     "memoryGiB": 16,
 531 |     "name": "ml.t3.xlarge",
 532 |     "vcpuNum": 4
 533 |    },
 534 |    {
 535 |     "_defaultOrder": 3,
 536 |     "_isFastLaunch": false,
 537 |     "category": "General purpose",
 538 |     "gpuNum": 0,
 539 |     "hideHardwareSpecs": false,
 540 |     "memoryGiB": 32,
 541 |     "name": "ml.t3.2xlarge",
 542 |     "vcpuNum": 8
 543 |    },
 544 |    {
 545 |     "_defaultOrder": 4,
 546 |     "_isFastLaunch": true,
 547 |     "category": "General purpose",
 548 |     "gpuNum": 0,
 549 |     "hideHardwareSpecs": false,
 550 |     "memoryGiB": 8,
 551 |     "name": "ml.m5.large",
 552 |     "vcpuNum": 2
 553 |    },
 554 |    {
 555 |     "_defaultOrder": 5,
 556 |     "_isFastLaunch": false,
 557 |     "category": "General purpose",
 558 |     "gpuNum": 0,
 559 |     "hideHardwareSpecs": false,
 560 |     "memoryGiB": 16,
 561 |     "name": "ml.m5.xlarge",
 562 |     "vcpuNum": 4
 563 |    },
 564 |    {
 565 |     "_defaultOrder": 6,
 566 |     "_isFastLaunch": false,
 567 |     "category": "General purpose",
 568 |     "gpuNum": 0,
 569 |     "hideHardwareSpecs": false,
 570 |     "memoryGiB": 32,
 571 |     "name": "ml.m5.2xlarge",
 572 |     "vcpuNum": 8
 573 |    },
 574 |    {
 575 |     "_defaultOrder": 7,
 576 |     "_isFastLaunch": false,
 577 |     "category": "General purpose",
 578 |     "gpuNum": 0,
 579 |     "hideHardwareSpecs": false,
 580 |     "memoryGiB": 64,
 581 |     "name": "ml.m5.4xlarge",
 582 |     "vcpuNum": 16
 583 |    },
 584 |    {
 585 |     "_defaultOrder": 8,
 586 |     "_isFastLaunch": false,
 587 |     "category": "General purpose",
 588 |     "gpuNum": 0,
 589 |     "hideHardwareSpecs": false,
 590 |     "memoryGiB": 128,
 591 |     "name": "ml.m5.8xlarge",
 592 |     "vcpuNum": 32
 593 |    },
 594 |    {
 595 |     "_defaultOrder": 9,
 596 |     "_isFastLaunch": false,
 597 |     "category": "General purpose",
 598 |     "gpuNum": 0,
 599 |     "hideHardwareSpecs": false,
 600 |     "memoryGiB": 192,
 601 |     "name": "ml.m5.12xlarge",
 602 |     "vcpuNum": 48
 603 |    },
 604 |    {
 605 |     "_defaultOrder": 10,
 606 |     "_isFastLaunch": false,
 607 |     "category": "General purpose",
 608 |     "gpuNum": 0,
 609 |     "hideHardwareSpecs": false,
 610 |     "memoryGiB": 256,
 611 |     "name": "ml.m5.16xlarge",
 612 |     "vcpuNum": 64
 613 |    },
 614 |    {
 615 |     "_defaultOrder": 11,
 616 |     "_isFastLaunch": false,
 617 |     "category": "General purpose",
 618 |     "gpuNum": 0,
 619 |     "hideHardwareSpecs": false,
 620 |     "memoryGiB": 384,
 621 |     "name": "ml.m5.24xlarge",
 622 |     "vcpuNum": 96
 623 |    },
 624 |    {
 625 |     "_defaultOrder": 12,
 626 |     "_isFastLaunch": false,
 627 |     "category": "General purpose",
 628 |     "gpuNum": 0,
 629 |     "hideHardwareSpecs": false,
 630 |     "memoryGiB": 8,
 631 |     "name": "ml.m5d.large",
 632 |     "vcpuNum": 2
 633 |    },
 634 |    {
 635 |     "_defaultOrder": 13,
 636 |     "_isFastLaunch": false,
 637 |     "category": "General purpose",
 638 |     "gpuNum": 0,
 639 |     "hideHardwareSpecs": false,
 640 |     "memoryGiB": 16,
 641 |     "name": "ml.m5d.xlarge",
 642 |     "vcpuNum": 4
 643 |    },
 644 |    {
 645 |     "_defaultOrder": 14,
 646 |     "_isFastLaunch": false,
 647 |     "category": "General purpose",
 648 |     "gpuNum": 0,
 649 |     "hideHardwareSpecs": false,
 650 |     "memoryGiB": 32,
 651 |     "name": "ml.m5d.2xlarge",
 652 |     "vcpuNum": 8
 653 |    },
 654 |    {
 655 |     "_defaultOrder": 15,
 656 |     "_isFastLaunch": false,
 657 |     "category": "General purpose",
 658 |     "gpuNum": 0,
 659 |     "hideHardwareSpecs": false,
 660 |     "memoryGiB": 64,
 661 |     "name": "ml.m5d.4xlarge",
 662 |     "vcpuNum": 16
 663 |    },
 664 |    {
 665 |     "_defaultOrder": 16,
 666 |     "_isFastLaunch": false,
 667 |     "category": "General purpose",
 668 |     "gpuNum": 0,
 669 |     "hideHardwareSpecs": false,
 670 |     "memoryGiB": 128,
 671 |     "name": "ml.m5d.8xlarge",
 672 |     "vcpuNum": 32
 673 |    },
 674 |    {
 675 |     "_defaultOrder": 17,
 676 |     "_isFastLaunch": false,
 677 |     "category": "General purpose",
 678 |     "gpuNum": 0,
 679 |     "hideHardwareSpecs": false,
 680 |     "memoryGiB": 192,
 681 |     "name": "ml.m5d.12xlarge",
 682 |     "vcpuNum": 48
 683 |    },
 684 |    {
 685 |     "_defaultOrder": 18,
 686 |     "_isFastLaunch": false,
 687 |     "category": "General purpose",
 688 |     "gpuNum": 0,
 689 |     "hideHardwareSpecs": false,
 690 |     "memoryGiB": 256,
 691 |     "name": "ml.m5d.16xlarge",
 692 |     "vcpuNum": 64
 693 |    },
 694 |    {
 695 |     "_defaultOrder": 19,
 696 |     "_isFastLaunch": false,
 697 |     "category": "General purpose",
 698 |     "gpuNum": 0,
 699 |     "hideHardwareSpecs": false,
 700 |     "memoryGiB": 384,
 701 |     "name": "ml.m5d.24xlarge",
 702 |     "vcpuNum": 96
 703 |    },
 704 |    {
 705 |     "_defaultOrder": 20,
 706 |     "_isFastLaunch": false,
 707 |     "category": "General purpose",
 708 |     "gpuNum": 0,
 709 |     "hideHardwareSpecs": true,
 710 |     "memoryGiB": 0,
 711 |     "name": "ml.geospatial.interactive",
 712 |     "supportedImageNames": [
 713 |      "sagemaker-geospatial-v1-0"
 714 |     ],
 715 |     "vcpuNum": 0
 716 |    },
 717 |    {
 718 |     "_defaultOrder": 21,
 719 |     "_isFastLaunch": true,
 720 |     "category": "Compute optimized",
 721 |     "gpuNum": 0,
 722 |     "hideHardwareSpecs": false,
 723 |     "memoryGiB": 4,
 724 |     "name": "ml.c5.large",
 725 |     "vcpuNum": 2
 726 |    },
 727 |    {
 728 |     "_defaultOrder": 22,
 729 |     "_isFastLaunch": false,
 730 |     "category": "Compute optimized",
 731 |     "gpuNum": 0,
 732 |     "hideHardwareSpecs": false,
 733 |     "memoryGiB": 8,
 734 |     "name": "ml.c5.xlarge",
 735 |     "vcpuNum": 4
 736 |    },
 737 |    {
 738 |     "_defaultOrder": 23,
 739 |     "_isFastLaunch": false,
 740 |     "category": "Compute optimized",
 741 |     "gpuNum": 0,
 742 |     "hideHardwareSpecs": false,
 743 |     "memoryGiB": 16,
 744 |     "name": "ml.c5.2xlarge",
 745 |     "vcpuNum": 8
 746 |    },
 747 |    {
 748 |     "_defaultOrder": 24,
 749 |     "_isFastLaunch": false,
 750 |     "category": "Compute optimized",
 751 |     "gpuNum": 0,
 752 |     "hideHardwareSpecs": false,
 753 |     "memoryGiB": 32,
 754 |     "name": "ml.c5.4xlarge",
 755 |     "vcpuNum": 16
 756 |    },
 757 |    {
 758 |     "_defaultOrder": 25,
 759 |     "_isFastLaunch": false,
 760 |     "category": "Compute optimized",
 761 |     "gpuNum": 0,
 762 |     "hideHardwareSpecs": false,
 763 |     "memoryGiB": 72,
 764 |     "name": "ml.c5.9xlarge",
 765 |     "vcpuNum": 36
 766 |    },
 767 |    {
 768 |     "_defaultOrder": 26,
 769 |     "_isFastLaunch": false,
 770 |     "category": "Compute optimized",
 771 |     "gpuNum": 0,
 772 |     "hideHardwareSpecs": false,
 773 |     "memoryGiB": 96,
 774 |     "name": "ml.c5.12xlarge",
 775 |     "vcpuNum": 48
 776 |    },
 777 |    {
 778 |     "_defaultOrder": 27,
 779 |     "_isFastLaunch": false,
 780 |     "category": "Compute optimized",
 781 |     "gpuNum": 0,
 782 |     "hideHardwareSpecs": false,
 783 |     "memoryGiB": 144,
 784 |     "name": "ml.c5.18xlarge",
 785 |     "vcpuNum": 72
 786 |    },
 787 |    {
 788 |     "_defaultOrder": 28,
 789 |     "_isFastLaunch": false,
 790 |     "category": "Compute optimized",
 791 |     "gpuNum": 0,
 792 |     "hideHardwareSpecs": false,
 793 |     "memoryGiB": 192,
 794 |     "name": "ml.c5.24xlarge",
 795 |     "vcpuNum": 96
 796 |    },
 797 |    {
 798 |     "_defaultOrder": 29,
 799 |     "_isFastLaunch": true,
 800 |     "category": "Accelerated computing",
 801 |     "gpuNum": 1,
 802 |     "hideHardwareSpecs": false,
 803 |     "memoryGiB": 16,
 804 |     "name": "ml.g4dn.xlarge",
 805 |     "vcpuNum": 4
 806 |    },
 807 |    {
 808 |     "_defaultOrder": 30,
 809 |     "_isFastLaunch": false,
 810 |     "category": "Accelerated computing",
 811 |     "gpuNum": 1,
 812 |     "hideHardwareSpecs": false,
 813 |     "memoryGiB": 32,
 814 |     "name": "ml.g4dn.2xlarge",
 815 |     "vcpuNum": 8
 816 |    },
 817 |    {
 818 |     "_defaultOrder": 31,
 819 |     "_isFastLaunch": false,
 820 |     "category": "Accelerated computing",
 821 |     "gpuNum": 1,
 822 |     "hideHardwareSpecs": false,
 823 |     "memoryGiB": 64,
 824 |     "name": "ml.g4dn.4xlarge",
 825 |     "vcpuNum": 16
 826 |    },
 827 |    {
 828 |     "_defaultOrder": 32,
 829 |     "_isFastLaunch": false,
 830 |     "category": "Accelerated computing",
 831 |     "gpuNum": 1,
 832 |     "hideHardwareSpecs": false,
 833 |     "memoryGiB": 128,
 834 |     "name": "ml.g4dn.8xlarge",
 835 |     "vcpuNum": 32
 836 |    },
 837 |    {
 838 |     "_defaultOrder": 33,
 839 |     "_isFastLaunch": false,
 840 |     "category": "Accelerated computing",
 841 |     "gpuNum": 4,
 842 |     "hideHardwareSpecs": false,
 843 |     "memoryGiB": 192,
 844 |     "name": "ml.g4dn.12xlarge",
 845 |     "vcpuNum": 48
 846 |    },
 847 |    {
 848 |     "_defaultOrder": 34,
 849 |     "_isFastLaunch": false,
 850 |     "category": "Accelerated computing",
 851 |     "gpuNum": 1,
 852 |     "hideHardwareSpecs": false,
 853 |     "memoryGiB": 256,
 854 |     "name": "ml.g4dn.16xlarge",
 855 |     "vcpuNum": 64
 856 |    },
 857 |    {
 858 |     "_defaultOrder": 35,
 859 |     "_isFastLaunch": false,
 860 |     "category": "Accelerated computing",
 861 |     "gpuNum": 1,
 862 |     "hideHardwareSpecs": false,
 863 |     "memoryGiB": 61,
 864 |     "name": "ml.p3.2xlarge",
 865 |     "vcpuNum": 8
 866 |    },
 867 |    {
 868 |     "_defaultOrder": 36,
 869 |     "_isFastLaunch": false,
 870 |     "category": "Accelerated computing",
 871 |     "gpuNum": 4,
 872 |     "hideHardwareSpecs": false,
 873 |     "memoryGiB": 244,
 874 |     "name": "ml.p3.8xlarge",
 875 |     "vcpuNum": 32
 876 |    },
 877 |    {
 878 |     "_defaultOrder": 37,
 879 |     "_isFastLaunch": false,
 880 |     "category": "Accelerated computing",
 881 |     "gpuNum": 8,
 882 |     "hideHardwareSpecs": false,
 883 |     "memoryGiB": 488,
 884 |     "name": "ml.p3.16xlarge",
 885 |     "vcpuNum": 64
 886 |    },
 887 |    {
 888 |     "_defaultOrder": 38,
 889 |     "_isFastLaunch": false,
 890 |     "category": "Accelerated computing",
 891 |     "gpuNum": 8,
 892 |     "hideHardwareSpecs": false,
 893 |     "memoryGiB": 768,
 894 |     "name": "ml.p3dn.24xlarge",
 895 |     "vcpuNum": 96
 896 |    },
 897 |    {
 898 |     "_defaultOrder": 39,
 899 |     "_isFastLaunch": false,
 900 |     "category": "Memory Optimized",
 901 |     "gpuNum": 0,
 902 |     "hideHardwareSpecs": false,
 903 |     "memoryGiB": 16,
 904 |     "name": "ml.r5.large",
 905 |     "vcpuNum": 2
 906 |    },
 907 |    {
 908 |     "_defaultOrder": 40,
 909 |     "_isFastLaunch": false,
 910 |     "category": "Memory Optimized",
 911 |     "gpuNum": 0,
 912 |     "hideHardwareSpecs": false,
 913 |     "memoryGiB": 32,
 914 |     "name": "ml.r5.xlarge",
 915 |     "vcpuNum": 4
 916 |    },
 917 |    {
 918 |     "_defaultOrder": 41,
 919 |     "_isFastLaunch": false,
 920 |     "category": "Memory Optimized",
 921 |     "gpuNum": 0,
 922 |     "hideHardwareSpecs": false,
 923 |     "memoryGiB": 64,
 924 |     "name": "ml.r5.2xlarge",
 925 |     "vcpuNum": 8
 926 |    },
 927 |    {
 928 |     "_defaultOrder": 42,
 929 |     "_isFastLaunch": false,
 930 |     "category": "Memory Optimized",
 931 |     "gpuNum": 0,
 932 |     "hideHardwareSpecs": false,
 933 |     "memoryGiB": 128,
 934 |     "name": "ml.r5.4xlarge",
 935 |     "vcpuNum": 16
 936 |    },
 937 |    {
 938 |     "_defaultOrder": 43,
 939 |     "_isFastLaunch": false,
 940 |     "category": "Memory Optimized",
 941 |     "gpuNum": 0,
 942 |     "hideHardwareSpecs": false,
 943 |     "memoryGiB": 256,
 944 |     "name": "ml.r5.8xlarge",
 945 |     "vcpuNum": 32
 946 |    },
 947 |    {
 948 |     "_defaultOrder": 44,
 949 |     "_isFastLaunch": false,
 950 |     "category": "Memory Optimized",
 951 |     "gpuNum": 0,
 952 |     "hideHardwareSpecs": false,
 953 |     "memoryGiB": 384,
 954 |     "name": "ml.r5.12xlarge",
 955 |     "vcpuNum": 48
 956 |    },
 957 |    {
 958 |     "_defaultOrder": 45,
 959 |     "_isFastLaunch": false,
 960 |     "category": "Memory Optimized",
 961 |     "gpuNum": 0,
 962 |     "hideHardwareSpecs": false,
 963 |     "memoryGiB": 512,
 964 |     "name": "ml.r5.16xlarge",
 965 |     "vcpuNum": 64
 966 |    },
 967 |    {
 968 |     "_defaultOrder": 46,
 969 |     "_isFastLaunch": false,
 970 |     "category": "Memory Optimized",
 971 |     "gpuNum": 0,
 972 |     "hideHardwareSpecs": false,
 973 |     "memoryGiB": 768,
 974 |     "name": "ml.r5.24xlarge",
 975 |     "vcpuNum": 96
 976 |    },
 977 |    {
 978 |     "_defaultOrder": 47,
 979 |     "_isFastLaunch": false,
 980 |     "category": "Accelerated computing",
 981 |     "gpuNum": 1,
 982 |     "hideHardwareSpecs": false,
 983 |     "memoryGiB": 16,
 984 |     "name": "ml.g5.xlarge",
 985 |     "vcpuNum": 4
 986 |    },
 987 |    {
 988 |     "_defaultOrder": 48,
 989 |     "_isFastLaunch": false,
 990 |     "category": "Accelerated computing",
 991 |     "gpuNum": 1,
 992 |     "hideHardwareSpecs": false,
 993 |     "memoryGiB": 32,
 994 |     "name": "ml.g5.2xlarge",
 995 |     "vcpuNum": 8
 996 |    },
 997 |    {
 998 |     "_defaultOrder": 49,
 999 |     "_isFastLaunch": false,
1000 |     "category": "Accelerated computing",
1001 |     "gpuNum": 1,
1002 |     "hideHardwareSpecs": false,
1003 |     "memoryGiB": 64,
1004 |     "name": "ml.g5.4xlarge",
1005 |     "vcpuNum": 16
1006 |    },
1007 |    {
1008 |     "_defaultOrder": 50,
1009 |     "_isFastLaunch": false,
1010 |     "category": "Accelerated computing",
1011 |     "gpuNum": 1,
1012 |     "hideHardwareSpecs": false,
1013 |     "memoryGiB": 128,
1014 |     "name": "ml.g5.8xlarge",
1015 |     "vcpuNum": 32
1016 |    },
1017 |    {
1018 |     "_defaultOrder": 51,
1019 |     "_isFastLaunch": false,
1020 |     "category": "Accelerated computing",
1021 |     "gpuNum": 1,
1022 |     "hideHardwareSpecs": false,
1023 |     "memoryGiB": 256,
1024 |     "name": "ml.g5.16xlarge",
1025 |     "vcpuNum": 64
1026 |    },
1027 |    {
1028 |     "_defaultOrder": 52,
1029 |     "_isFastLaunch": false,
1030 |     "category": "Accelerated computing",
1031 |     "gpuNum": 4,
1032 |     "hideHardwareSpecs": false,
1033 |     "memoryGiB": 192,
1034 |     "name": "ml.g5.12xlarge",
1035 |     "vcpuNum": 48
1036 |    },
1037 |    {
1038 |     "_defaultOrder": 53,
1039 |     "_isFastLaunch": false,
1040 |     "category": "Accelerated computing",
1041 |     "gpuNum": 4,
1042 |     "hideHardwareSpecs": false,
1043 |     "memoryGiB": 384,
1044 |     "name": "ml.g5.24xlarge",
1045 |     "vcpuNum": 96
1046 |    },
1047 |    {
1048 |     "_defaultOrder": 54,
1049 |     "_isFastLaunch": false,
1050 |     "category": "Accelerated computing",
1051 |     "gpuNum": 8,
1052 |     "hideHardwareSpecs": false,
1053 |     "memoryGiB": 768,
1054 |     "name": "ml.g5.48xlarge",
1055 |     "vcpuNum": 192
1056 |    },
1057 |    {
1058 |     "_defaultOrder": 55,
1059 |     "_isFastLaunch": false,
1060 |     "category": "Accelerated computing",
1061 |     "gpuNum": 8,
1062 |     "hideHardwareSpecs": false,
1063 |     "memoryGiB": 1152,
1064 |     "name": "ml.p4d.24xlarge",
1065 |     "vcpuNum": 96
1066 |    },
1067 |    {
1068 |     "_defaultOrder": 56,
1069 |     "_isFastLaunch": false,
1070 |     "category": "Accelerated computing",
1071 |     "gpuNum": 8,
1072 |     "hideHardwareSpecs": false,
1073 |     "memoryGiB": 1152,
1074 |     "name": "ml.p4de.24xlarge",
1075 |     "vcpuNum": 96
1076 |    }
1077 |   ],
1078 |   "instance_type": "ml.t3.medium",
1079 |   "kernelspec": {
1080 |    "display_name": "Python 3 (Data Science)",
1081 |    "language": "python",
1082 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
1083 |   },
1084 |   "language_info": {
1085 |    "codemirror_mode": {
1086 |     "name": "ipython",
1087 |     "version": 3
1088 |    },
1089 |    "file_extension": ".py",
1090 |    "mimetype": "text/x-python",
1091 |    "name": "python",
1092 |    "nbconvert_exporter": "python",
1093 |    "pygments_lexer": "ipython3",
1094 |    "version": "3.7.10"
1095 |   }
1096 |  },
1097 |  "nbformat": 4,
1098 |  "nbformat_minor": 5
1099 | }
1100 | 


--------------------------------------------------------------------------------
/03-create-dynamodb-tables.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "bf90ceb7-4bf5-4516-a28f-a6f028ef301c",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Create DynamoDB Tables "
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "e974bd09-bd15-4c5c-8ab4-60d054b78bc5",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "#### Imports "
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "id": "222f38ee-7b50-4f88-9381-58c5e128420b",
 23 |    "metadata": {
 24 |     "tags": []
 25 |    },
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import logging\n",
 29 |     "import boto3"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "id": "e2b5c7e8-1be3-4906-b9be-dc5a44f92d09",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "##### Setup logging"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "id": "bea6c92d-5fda-4264-9fdc-2e45c55ed7c7",
 44 |    "metadata": {
 45 |     "tags": []
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "logger = logging.getLogger('sagemaker')\n",
 50 |     "logger.setLevel(logging.DEBUG)\n",
 51 |     "logger.addHandler(logging.StreamHandler())"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "id": "ad53853d-6997-4120-95ac-53a3d555da36",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "##### Log versions of dependencies "
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "id": "fbc8a299-aa25-491a-b5de-4c31bbbfea22",
 66 |    "metadata": {
 67 |     "tags": []
 68 |    },
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stderr",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "Using boto3==1.26.111\n"
 75 |      ]
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "logger.info(f'Using boto3=={boto3.__version__}')"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "id": "744a3c36-6504-4e8c-b5ff-4b9998ecc465",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "#### Create DynamoDB client "
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 5,
 93 |    "id": "dbb83e0c-1dd0-4e6a-8e4c-9859ea63b15a",
 94 |    "metadata": {
 95 |     "tags": []
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "dynamodb = boto3.resource('dynamodb')"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "id": "d184dfeb-3ecd-4644-8a0e-54175a8d2e0f",
105 |    "metadata": {},
106 |    "source": [
107 |     "#### Create `conversations` table"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 6,
113 |    "id": "69bac2ac-c0fa-488f-9680-81849eb1b676",
114 |    "metadata": {
115 |     "tags": []
116 |    },
117 |    "outputs": [],
118 |    "source": [
119 |     "def create_conversations_table(table_name: str) -> None:\n",
120 |     "    table = dynamodb.create_table(\n",
121 |     "        TableName=table_name,\n",
122 |     "        KeySchema=[\n",
123 |     "            {'AttributeName': 'session_id', 'KeyType': 'HASH'},\n",
124 |     "            {'AttributeName': 'timestamp', 'KeyType': 'RANGE'}\n",
125 |     "        ],\n",
126 |     "        AttributeDefinitions=[\n",
127 |     "            {'AttributeName': 'session_id', 'AttributeType': 'S'},\n",
128 |     "            {'AttributeName': 'timestamp', 'AttributeType': 'N'}\n",
129 |     "        ],\n",
130 |     "        ProvisionedThroughput={\n",
131 |     "            'ReadCapacityUnits': 5,\n",
132 |     "            'WriteCapacityUnits': 5\n",
133 |     "        }\n",
134 |     "    )"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "id": "65486230-dedc-485c-95a5-9da962836f8d",
140 |    "metadata": {},
141 |    "source": [
142 |     "#### Create `sessions` table"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "code",
147 |    "execution_count": 7,
148 |    "id": "5983fcdc-d39b-4f0e-b6a9-d15dbd27928a",
149 |    "metadata": {
150 |     "tags": []
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "def create_sessions_table(table_name: str) -> None:\n",
155 |     "    table = dynamodb.create_table(\n",
156 |     "        TableName=table_name,\n",
157 |     "        KeySchema=[\n",
158 |     "            {'AttributeName': 'session_id', 'KeyType': 'HASH'}\n",
159 |     "        ],\n",
160 |     "        AttributeDefinitions=[\n",
161 |     "            {'AttributeName': 'session_id', 'AttributeType': 'S'}\n",
162 |     "        ],\n",
163 |     "        ProvisionedThroughput={\n",
164 |     "            'ReadCapacityUnits': 5,\n",
165 |     "            'WriteCapacityUnits': 5\n",
166 |     "        }\n",
167 |     "    )"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 8,
173 |    "id": "ce937e6b-928d-4622-94e4-944415eb2f11",
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": [
177 |     "create_conversations_table('conversations')\n",
178 |     "create_sessions_table('sessions')"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "id": "287794f5-b672-4169-b27b-e2bb02343909",
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": []
188 |   }
189 |  ],
190 |  "metadata": {
191 |   "availableInstances": [
192 |    {
193 |     "_defaultOrder": 0,
194 |     "_isFastLaunch": true,
195 |     "category": "General purpose",
196 |     "gpuNum": 0,
197 |     "hideHardwareSpecs": false,
198 |     "memoryGiB": 4,
199 |     "name": "ml.t3.medium",
200 |     "vcpuNum": 2
201 |    },
202 |    {
203 |     "_defaultOrder": 1,
204 |     "_isFastLaunch": false,
205 |     "category": "General purpose",
206 |     "gpuNum": 0,
207 |     "hideHardwareSpecs": false,
208 |     "memoryGiB": 8,
209 |     "name": "ml.t3.large",
210 |     "vcpuNum": 2
211 |    },
212 |    {
213 |     "_defaultOrder": 2,
214 |     "_isFastLaunch": false,
215 |     "category": "General purpose",
216 |     "gpuNum": 0,
217 |     "hideHardwareSpecs": false,
218 |     "memoryGiB": 16,
219 |     "name": "ml.t3.xlarge",
220 |     "vcpuNum": 4
221 |    },
222 |    {
223 |     "_defaultOrder": 3,
224 |     "_isFastLaunch": false,
225 |     "category": "General purpose",
226 |     "gpuNum": 0,
227 |     "hideHardwareSpecs": false,
228 |     "memoryGiB": 32,
229 |     "name": "ml.t3.2xlarge",
230 |     "vcpuNum": 8
231 |    },
232 |    {
233 |     "_defaultOrder": 4,
234 |     "_isFastLaunch": true,
235 |     "category": "General purpose",
236 |     "gpuNum": 0,
237 |     "hideHardwareSpecs": false,
238 |     "memoryGiB": 8,
239 |     "name": "ml.m5.large",
240 |     "vcpuNum": 2
241 |    },
242 |    {
243 |     "_defaultOrder": 5,
244 |     "_isFastLaunch": false,
245 |     "category": "General purpose",
246 |     "gpuNum": 0,
247 |     "hideHardwareSpecs": false,
248 |     "memoryGiB": 16,
249 |     "name": "ml.m5.xlarge",
250 |     "vcpuNum": 4
251 |    },
252 |    {
253 |     "_defaultOrder": 6,
254 |     "_isFastLaunch": false,
255 |     "category": "General purpose",
256 |     "gpuNum": 0,
257 |     "hideHardwareSpecs": false,
258 |     "memoryGiB": 32,
259 |     "name": "ml.m5.2xlarge",
260 |     "vcpuNum": 8
261 |    },
262 |    {
263 |     "_defaultOrder": 7,
264 |     "_isFastLaunch": false,
265 |     "category": "General purpose",
266 |     "gpuNum": 0,
267 |     "hideHardwareSpecs": false,
268 |     "memoryGiB": 64,
269 |     "name": "ml.m5.4xlarge",
270 |     "vcpuNum": 16
271 |    },
272 |    {
273 |     "_defaultOrder": 8,
274 |     "_isFastLaunch": false,
275 |     "category": "General purpose",
276 |     "gpuNum": 0,
277 |     "hideHardwareSpecs": false,
278 |     "memoryGiB": 128,
279 |     "name": "ml.m5.8xlarge",
280 |     "vcpuNum": 32
281 |    },
282 |    {
283 |     "_defaultOrder": 9,
284 |     "_isFastLaunch": false,
285 |     "category": "General purpose",
286 |     "gpuNum": 0,
287 |     "hideHardwareSpecs": false,
288 |     "memoryGiB": 192,
289 |     "name": "ml.m5.12xlarge",
290 |     "vcpuNum": 48
291 |    },
292 |    {
293 |     "_defaultOrder": 10,
294 |     "_isFastLaunch": false,
295 |     "category": "General purpose",
296 |     "gpuNum": 0,
297 |     "hideHardwareSpecs": false,
298 |     "memoryGiB": 256,
299 |     "name": "ml.m5.16xlarge",
300 |     "vcpuNum": 64
301 |    },
302 |    {
303 |     "_defaultOrder": 11,
304 |     "_isFastLaunch": false,
305 |     "category": "General purpose",
306 |     "gpuNum": 0,
307 |     "hideHardwareSpecs": false,
308 |     "memoryGiB": 384,
309 |     "name": "ml.m5.24xlarge",
310 |     "vcpuNum": 96
311 |    },
312 |    {
313 |     "_defaultOrder": 12,
314 |     "_isFastLaunch": false,
315 |     "category": "General purpose",
316 |     "gpuNum": 0,
317 |     "hideHardwareSpecs": false,
318 |     "memoryGiB": 8,
319 |     "name": "ml.m5d.large",
320 |     "vcpuNum": 2
321 |    },
322 |    {
323 |     "_defaultOrder": 13,
324 |     "_isFastLaunch": false,
325 |     "category": "General purpose",
326 |     "gpuNum": 0,
327 |     "hideHardwareSpecs": false,
328 |     "memoryGiB": 16,
329 |     "name": "ml.m5d.xlarge",
330 |     "vcpuNum": 4
331 |    },
332 |    {
333 |     "_defaultOrder": 14,
334 |     "_isFastLaunch": false,
335 |     "category": "General purpose",
336 |     "gpuNum": 0,
337 |     "hideHardwareSpecs": false,
338 |     "memoryGiB": 32,
339 |     "name": "ml.m5d.2xlarge",
340 |     "vcpuNum": 8
341 |    },
342 |    {
343 |     "_defaultOrder": 15,
344 |     "_isFastLaunch": false,
345 |     "category": "General purpose",
346 |     "gpuNum": 0,
347 |     "hideHardwareSpecs": false,
348 |     "memoryGiB": 64,
349 |     "name": "ml.m5d.4xlarge",
350 |     "vcpuNum": 16
351 |    },
352 |    {
353 |     "_defaultOrder": 16,
354 |     "_isFastLaunch": false,
355 |     "category": "General purpose",
356 |     "gpuNum": 0,
357 |     "hideHardwareSpecs": false,
358 |     "memoryGiB": 128,
359 |     "name": "ml.m5d.8xlarge",
360 |     "vcpuNum": 32
361 |    },
362 |    {
363 |     "_defaultOrder": 17,
364 |     "_isFastLaunch": false,
365 |     "category": "General purpose",
366 |     "gpuNum": 0,
367 |     "hideHardwareSpecs": false,
368 |     "memoryGiB": 192,
369 |     "name": "ml.m5d.12xlarge",
370 |     "vcpuNum": 48
371 |    },
372 |    {
373 |     "_defaultOrder": 18,
374 |     "_isFastLaunch": false,
375 |     "category": "General purpose",
376 |     "gpuNum": 0,
377 |     "hideHardwareSpecs": false,
378 |     "memoryGiB": 256,
379 |     "name": "ml.m5d.16xlarge",
380 |     "vcpuNum": 64
381 |    },
382 |    {
383 |     "_defaultOrder": 19,
384 |     "_isFastLaunch": false,
385 |     "category": "General purpose",
386 |     "gpuNum": 0,
387 |     "hideHardwareSpecs": false,
388 |     "memoryGiB": 384,
389 |     "name": "ml.m5d.24xlarge",
390 |     "vcpuNum": 96
391 |    },
392 |    {
393 |     "_defaultOrder": 20,
394 |     "_isFastLaunch": false,
395 |     "category": "General purpose",
396 |     "gpuNum": 0,
397 |     "hideHardwareSpecs": true,
398 |     "memoryGiB": 0,
399 |     "name": "ml.geospatial.interactive",
400 |     "supportedImageNames": [
401 |      "sagemaker-geospatial-v1-0"
402 |     ],
403 |     "vcpuNum": 0
404 |    },
405 |    {
406 |     "_defaultOrder": 21,
407 |     "_isFastLaunch": true,
408 |     "category": "Compute optimized",
409 |     "gpuNum": 0,
410 |     "hideHardwareSpecs": false,
411 |     "memoryGiB": 4,
412 |     "name": "ml.c5.large",
413 |     "vcpuNum": 2
414 |    },
415 |    {
416 |     "_defaultOrder": 22,
417 |     "_isFastLaunch": false,
418 |     "category": "Compute optimized",
419 |     "gpuNum": 0,
420 |     "hideHardwareSpecs": false,
421 |     "memoryGiB": 8,
422 |     "name": "ml.c5.xlarge",
423 |     "vcpuNum": 4
424 |    },
425 |    {
426 |     "_defaultOrder": 23,
427 |     "_isFastLaunch": false,
428 |     "category": "Compute optimized",
429 |     "gpuNum": 0,
430 |     "hideHardwareSpecs": false,
431 |     "memoryGiB": 16,
432 |     "name": "ml.c5.2xlarge",
433 |     "vcpuNum": 8
434 |    },
435 |    {
436 |     "_defaultOrder": 24,
437 |     "_isFastLaunch": false,
438 |     "category": "Compute optimized",
439 |     "gpuNum": 0,
440 |     "hideHardwareSpecs": false,
441 |     "memoryGiB": 32,
442 |     "name": "ml.c5.4xlarge",
443 |     "vcpuNum": 16
444 |    },
445 |    {
446 |     "_defaultOrder": 25,
447 |     "_isFastLaunch": false,
448 |     "category": "Compute optimized",
449 |     "gpuNum": 0,
450 |     "hideHardwareSpecs": false,
451 |     "memoryGiB": 72,
452 |     "name": "ml.c5.9xlarge",
453 |     "vcpuNum": 36
454 |    },
455 |    {
456 |     "_defaultOrder": 26,
457 |     "_isFastLaunch": false,
458 |     "category": "Compute optimized",
459 |     "gpuNum": 0,
460 |     "hideHardwareSpecs": false,
461 |     "memoryGiB": 96,
462 |     "name": "ml.c5.12xlarge",
463 |     "vcpuNum": 48
464 |    },
465 |    {
466 |     "_defaultOrder": 27,
467 |     "_isFastLaunch": false,
468 |     "category": "Compute optimized",
469 |     "gpuNum": 0,
470 |     "hideHardwareSpecs": false,
471 |     "memoryGiB": 144,
472 |     "name": "ml.c5.18xlarge",
473 |     "vcpuNum": 72
474 |    },
475 |    {
476 |     "_defaultOrder": 28,
477 |     "_isFastLaunch": false,
478 |     "category": "Compute optimized",
479 |     "gpuNum": 0,
480 |     "hideHardwareSpecs": false,
481 |     "memoryGiB": 192,
482 |     "name": "ml.c5.24xlarge",
483 |     "vcpuNum": 96
484 |    },
485 |    {
486 |     "_defaultOrder": 29,
487 |     "_isFastLaunch": true,
488 |     "category": "Accelerated computing",
489 |     "gpuNum": 1,
490 |     "hideHardwareSpecs": false,
491 |     "memoryGiB": 16,
492 |     "name": "ml.g4dn.xlarge",
493 |     "vcpuNum": 4
494 |    },
495 |    {
496 |     "_defaultOrder": 30,
497 |     "_isFastLaunch": false,
498 |     "category": "Accelerated computing",
499 |     "gpuNum": 1,
500 |     "hideHardwareSpecs": false,
501 |     "memoryGiB": 32,
502 |     "name": "ml.g4dn.2xlarge",
503 |     "vcpuNum": 8
504 |    },
505 |    {
506 |     "_defaultOrder": 31,
507 |     "_isFastLaunch": false,
508 |     "category": "Accelerated computing",
509 |     "gpuNum": 1,
510 |     "hideHardwareSpecs": false,
511 |     "memoryGiB": 64,
512 |     "name": "ml.g4dn.4xlarge",
513 |     "vcpuNum": 16
514 |    },
515 |    {
516 |     "_defaultOrder": 32,
517 |     "_isFastLaunch": false,
518 |     "category": "Accelerated computing",
519 |     "gpuNum": 1,
520 |     "hideHardwareSpecs": false,
521 |     "memoryGiB": 128,
522 |     "name": "ml.g4dn.8xlarge",
523 |     "vcpuNum": 32
524 |    },
525 |    {
526 |     "_defaultOrder": 33,
527 |     "_isFastLaunch": false,
528 |     "category": "Accelerated computing",
529 |     "gpuNum": 4,
530 |     "hideHardwareSpecs": false,
531 |     "memoryGiB": 192,
532 |     "name": "ml.g4dn.12xlarge",
533 |     "vcpuNum": 48
534 |    },
535 |    {
536 |     "_defaultOrder": 34,
537 |     "_isFastLaunch": false,
538 |     "category": "Accelerated computing",
539 |     "gpuNum": 1,
540 |     "hideHardwareSpecs": false,
541 |     "memoryGiB": 256,
542 |     "name": "ml.g4dn.16xlarge",
543 |     "vcpuNum": 64
544 |    },
545 |    {
546 |     "_defaultOrder": 35,
547 |     "_isFastLaunch": false,
548 |     "category": "Accelerated computing",
549 |     "gpuNum": 1,
550 |     "hideHardwareSpecs": false,
551 |     "memoryGiB": 61,
552 |     "name": "ml.p3.2xlarge",
553 |     "vcpuNum": 8
554 |    },
555 |    {
556 |     "_defaultOrder": 36,
557 |     "_isFastLaunch": false,
558 |     "category": "Accelerated computing",
559 |     "gpuNum": 4,
560 |     "hideHardwareSpecs": false,
561 |     "memoryGiB": 244,
562 |     "name": "ml.p3.8xlarge",
563 |     "vcpuNum": 32
564 |    },
565 |    {
566 |     "_defaultOrder": 37,
567 |     "_isFastLaunch": false,
568 |     "category": "Accelerated computing",
569 |     "gpuNum": 8,
570 |     "hideHardwareSpecs": false,
571 |     "memoryGiB": 488,
572 |     "name": "ml.p3.16xlarge",
573 |     "vcpuNum": 64
574 |    },
575 |    {
576 |     "_defaultOrder": 38,
577 |     "_isFastLaunch": false,
578 |     "category": "Accelerated computing",
579 |     "gpuNum": 8,
580 |     "hideHardwareSpecs": false,
581 |     "memoryGiB": 768,
582 |     "name": "ml.p3dn.24xlarge",
583 |     "vcpuNum": 96
584 |    },
585 |    {
586 |     "_defaultOrder": 39,
587 |     "_isFastLaunch": false,
588 |     "category": "Memory Optimized",
589 |     "gpuNum": 0,
590 |     "hideHardwareSpecs": false,
591 |     "memoryGiB": 16,
592 |     "name": "ml.r5.large",
593 |     "vcpuNum": 2
594 |    },
595 |    {
596 |     "_defaultOrder": 40,
597 |     "_isFastLaunch": false,
598 |     "category": "Memory Optimized",
599 |     "gpuNum": 0,
600 |     "hideHardwareSpecs": false,
601 |     "memoryGiB": 32,
602 |     "name": "ml.r5.xlarge",
603 |     "vcpuNum": 4
604 |    },
605 |    {
606 |     "_defaultOrder": 41,
607 |     "_isFastLaunch": false,
608 |     "category": "Memory Optimized",
609 |     "gpuNum": 0,
610 |     "hideHardwareSpecs": false,
611 |     "memoryGiB": 64,
612 |     "name": "ml.r5.2xlarge",
613 |     "vcpuNum": 8
614 |    },
615 |    {
616 |     "_defaultOrder": 42,
617 |     "_isFastLaunch": false,
618 |     "category": "Memory Optimized",
619 |     "gpuNum": 0,
620 |     "hideHardwareSpecs": false,
621 |     "memoryGiB": 128,
622 |     "name": "ml.r5.4xlarge",
623 |     "vcpuNum": 16
624 |    },
625 |    {
626 |     "_defaultOrder": 43,
627 |     "_isFastLaunch": false,
628 |     "category": "Memory Optimized",
629 |     "gpuNum": 0,
630 |     "hideHardwareSpecs": false,
631 |     "memoryGiB": 256,
632 |     "name": "ml.r5.8xlarge",
633 |     "vcpuNum": 32
634 |    },
635 |    {
636 |     "_defaultOrder": 44,
637 |     "_isFastLaunch": false,
638 |     "category": "Memory Optimized",
639 |     "gpuNum": 0,
640 |     "hideHardwareSpecs": false,
641 |     "memoryGiB": 384,
642 |     "name": "ml.r5.12xlarge",
643 |     "vcpuNum": 48
644 |    },
645 |    {
646 |     "_defaultOrder": 45,
647 |     "_isFastLaunch": false,
648 |     "category": "Memory Optimized",
649 |     "gpuNum": 0,
650 |     "hideHardwareSpecs": false,
651 |     "memoryGiB": 512,
652 |     "name": "ml.r5.16xlarge",
653 |     "vcpuNum": 64
654 |    },
655 |    {
656 |     "_defaultOrder": 46,
657 |     "_isFastLaunch": false,
658 |     "category": "Memory Optimized",
659 |     "gpuNum": 0,
660 |     "hideHardwareSpecs": false,
661 |     "memoryGiB": 768,
662 |     "name": "ml.r5.24xlarge",
663 |     "vcpuNum": 96
664 |    },
665 |    {
666 |     "_defaultOrder": 47,
667 |     "_isFastLaunch": false,
668 |     "category": "Accelerated computing",
669 |     "gpuNum": 1,
670 |     "hideHardwareSpecs": false,
671 |     "memoryGiB": 16,
672 |     "name": "ml.g5.xlarge",
673 |     "vcpuNum": 4
674 |    },
675 |    {
676 |     "_defaultOrder": 48,
677 |     "_isFastLaunch": false,
678 |     "category": "Accelerated computing",
679 |     "gpuNum": 1,
680 |     "hideHardwareSpecs": false,
681 |     "memoryGiB": 32,
682 |     "name": "ml.g5.2xlarge",
683 |     "vcpuNum": 8
684 |    },
685 |    {
686 |     "_defaultOrder": 49,
687 |     "_isFastLaunch": false,
688 |     "category": "Accelerated computing",
689 |     "gpuNum": 1,
690 |     "hideHardwareSpecs": false,
691 |     "memoryGiB": 64,
692 |     "name": "ml.g5.4xlarge",
693 |     "vcpuNum": 16
694 |    },
695 |    {
696 |     "_defaultOrder": 50,
697 |     "_isFastLaunch": false,
698 |     "category": "Accelerated computing",
699 |     "gpuNum": 1,
700 |     "hideHardwareSpecs": false,
701 |     "memoryGiB": 128,
702 |     "name": "ml.g5.8xlarge",
703 |     "vcpuNum": 32
704 |    },
705 |    {
706 |     "_defaultOrder": 51,
707 |     "_isFastLaunch": false,
708 |     "category": "Accelerated computing",
709 |     "gpuNum": 1,
710 |     "hideHardwareSpecs": false,
711 |     "memoryGiB": 256,
712 |     "name": "ml.g5.16xlarge",
713 |     "vcpuNum": 64
714 |    },
715 |    {
716 |     "_defaultOrder": 52,
717 |     "_isFastLaunch": false,
718 |     "category": "Accelerated computing",
719 |     "gpuNum": 4,
720 |     "hideHardwareSpecs": false,
721 |     "memoryGiB": 192,
722 |     "name": "ml.g5.12xlarge",
723 |     "vcpuNum": 48
724 |    },
725 |    {
726 |     "_defaultOrder": 53,
727 |     "_isFastLaunch": false,
728 |     "category": "Accelerated computing",
729 |     "gpuNum": 4,
730 |     "hideHardwareSpecs": false,
731 |     "memoryGiB": 384,
732 |     "name": "ml.g5.24xlarge",
733 |     "vcpuNum": 96
734 |    },
735 |    {
736 |     "_defaultOrder": 54,
737 |     "_isFastLaunch": false,
738 |     "category": "Accelerated computing",
739 |     "gpuNum": 8,
740 |     "hideHardwareSpecs": false,
741 |     "memoryGiB": 768,
742 |     "name": "ml.g5.48xlarge",
743 |     "vcpuNum": 192
744 |    },
745 |    {
746 |     "_defaultOrder": 55,
747 |     "_isFastLaunch": false,
748 |     "category": "Accelerated computing",
749 |     "gpuNum": 8,
750 |     "hideHardwareSpecs": false,
751 |     "memoryGiB": 1152,
752 |     "name": "ml.p4d.24xlarge",
753 |     "vcpuNum": 96
754 |    },
755 |    {
756 |     "_defaultOrder": 56,
757 |     "_isFastLaunch": false,
758 |     "category": "Accelerated computing",
759 |     "gpuNum": 8,
760 |     "hideHardwareSpecs": false,
761 |     "memoryGiB": 1152,
762 |     "name": "ml.p4de.24xlarge",
763 |     "vcpuNum": 96
764 |    }
765 |   ],
766 |   "instance_type": "ml.m5.large",
767 |   "kernelspec": {
768 |    "display_name": "Python 3 (Data Science)",
769 |    "language": "python",
770 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
771 |   },
772 |   "language_info": {
773 |    "codemirror_mode": {
774 |     "name": "ipython",
775 |     "version": 3
776 |    },
777 |    "file_extension": ".py",
778 |    "mimetype": "text/x-python",
779 |    "name": "python",
780 |    "nbconvert_exporter": "python",
781 |    "pygments_lexer": "ipython3",
782 |    "version": "3.7.10"
783 |   }
784 |  },
785 |  "nbformat": 4,
786 |  "nbformat_minor": 5
787 | }
788 | 


--------------------------------------------------------------------------------
/04-create-os-index.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "b985c5b5-1d65-4e4a-82d1-2dfc9768d97d",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Create Index for `Past Conversations`"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "57aa4ada-a686-43de-bc90-0f4107f95ce1",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "##### Prerequisites"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "id": "faf12933-564a-41a3-9a9e-c02a9437310e",
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "%%capture \n",
 27 |     "\n",
 28 |     "!pip install PyYAML"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "id": "52706ec1-fd47-42d4-af0f-33f0a03f654d",
 34 |    "metadata": {},
 35 |    "source": [
 36 |     "#### Imports"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "id": "81eea5f6-50e3-4398-80eb-11b680d026b4",
 43 |    "metadata": {
 44 |     "tags": []
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "from requests.auth import HTTPBasicAuth\n",
 49 |     "from tqdm import tqdm\n",
 50 |     "import requests\n",
 51 |     "import logging \n",
 52 |     "import boto3\n",
 53 |     "import yaml\n",
 54 |     "import json\n",
 55 |     "import os"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "id": "49ec6d42-db7a-4c4c-8322-95963806f987",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "##### Setup logging"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 3,
 69 |    "id": "fcddd51c-9251-4428-9931-b2700a71142a",
 70 |    "metadata": {
 71 |     "tags": []
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "logger = logging.getLogger('sagemaker')\n",
 76 |     "logger.setLevel(logging.DEBUG)\n",
 77 |     "logger.addHandler(logging.StreamHandler())"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "id": "4a81f78f-675d-4009-9228-8fa3cfc559b8",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "##### Log versions of dependencies "
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 4,
 91 |    "id": "75474334-8b19-4491-abb9-58468bc33329",
 92 |    "metadata": {
 93 |     "tags": []
 94 |    },
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stderr",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "Using requests==2.28.2\n",
101 |       "Using pyyaml==6.0\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "logger.info(f'Using requests=={requests.__version__}')\n",
107 |     "logger.info(f'Using pyyaml=={yaml.__version__}')"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "id": "c52e3b3f-f770-4b90-bc35-12cc0f793604",
113 |    "metadata": {},
114 |    "source": [
115 |     "#### Setup essentials"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 5,
121 |    "id": "3b2b1f73-2576-412f-ad38-13638db59281",
122 |    "metadata": {
123 |     "tags": []
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "with open('config.yml', 'r') as file:\n",
128 |     "    config = yaml.safe_load(file)\n",
129 |     "\n",
130 |     "os_username = config['credentials']['username']\n",
131 |     "os_password = config['credentials']['password']\n",
132 |     "\n",
133 |     "domain_endpoint = config['domain']['endpoint']\n",
134 |     "domain_index = config['domain']['index']"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 6,
140 |    "id": "3e558272-b128-47da-8e68-7cdc79aca465",
141 |    "metadata": {
142 |     "tags": []
143 |    },
144 |    "outputs": [
145 |     {
146 |      "name": "stderr",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "URL for OpenSearch index = https://search-semantic-search-hryn56c5jy43yryimohz4ajvyi.us-east-1.es.amazonaws.com/conversations\n"
150 |      ]
151 |     }
152 |    ],
153 |    "source": [
154 |     "URL = f'{domain_endpoint}/{domain_index}'\n",
155 |     "logger.info(f'URL for OpenSearch index = {URL}')"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "id": "78f72086-5050-40c3-b4e8-32341cbe071c",
161 |    "metadata": {},
162 |    "source": [
163 |     "#### Define the index mapping with a k-NN vector field"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 7,
169 |    "id": "45c097ca-4501-42c4-ba1f-d10b52fc025e",
170 |    "metadata": {
171 |     "tags": []
172 |    },
173 |    "outputs": [],
174 |    "source": [
175 |     "mapping = {\n",
176 |     "    'settings': {\n",
177 |     "        'index': {\n",
178 |     "            'knn': True  # Enable k-NN search for this index\n",
179 |     "        }\n",
180 |     "    },\n",
181 |     "    'mappings': {\n",
182 |     "        'properties': {\n",
183 |     "            'embedding': {  # k-NN vector field\n",
184 |     "                'type': 'knn_vector',\n",
185 |     "                'dimension': 4096  # Dimension of the vector\n",
186 |     "            },\n",
187 |     "            'session_id': {\n",
188 |     "                'type': 'keyword'\n",
189 |     "            },\n",
190 |     "            'created_at': {\n",
191 |     "                'type': 'long'\n",
192 |     "            },\n",
193 |     "            'conversation_summary': {\n",
194 |     "                'type': 'text'\n",
195 |     "            }\n",
196 |     "        }\n",
197 |     "    }\n",
198 |     "}"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "markdown",
203 |    "id": "e1a33e70-1e5f-44d7-aec4-4c374d2a8e3d",
204 |    "metadata": {},
205 |    "source": [
206 |     "#### Create the index with the specified mapping"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 8,
212 |    "id": "06601622-e982-42b2-910a-71de86e56475",
213 |    "metadata": {
214 |     "tags": []
215 |    },
216 |    "outputs": [
217 |     {
218 |      "name": "stderr",
219 |      "output_type": "stream",
220 |      "text": [
221 |       "Index created: {\"acknowledged\":true,\"shards_acknowledged\":true,\"index\":\"conversations\"}\n"
222 |      ]
223 |     }
224 |    ],
225 |    "source": [
226 |     "# Check if the index exists using an HTTP HEAD request\n",
227 |     "response = requests.head(URL, auth=HTTPBasicAuth(os_username, os_password))\n",
228 |     "\n",
229 |     "# If the index does not exist (status code 404), create the index\n",
230 |     "if response.status_code == 404:\n",
231 |     "    response = requests.put(URL, auth=HTTPBasicAuth(os_username, os_password), json=mapping)\n",
232 |     "    logger.info(f'Index created: {response.text}')\n",
233 |     "else:\n",
234 |     "    logger.error('Index already exists!')"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": null,
240 |    "id": "17ef4711-ec69-4628-b829-64e25d8e290e",
241 |    "metadata": {},
242 |    "outputs": [],
243 |    "source": []
244 |   }
245 |  ],
246 |  "metadata": {
247 |   "availableInstances": [
248 |    {
249 |     "_defaultOrder": 0,
250 |     "_isFastLaunch": true,
251 |     "category": "General purpose",
252 |     "gpuNum": 0,
253 |     "hideHardwareSpecs": false,
254 |     "memoryGiB": 4,
255 |     "name": "ml.t3.medium",
256 |     "vcpuNum": 2
257 |    },
258 |    {
259 |     "_defaultOrder": 1,
260 |     "_isFastLaunch": false,
261 |     "category": "General purpose",
262 |     "gpuNum": 0,
263 |     "hideHardwareSpecs": false,
264 |     "memoryGiB": 8,
265 |     "name": "ml.t3.large",
266 |     "vcpuNum": 2
267 |    },
268 |    {
269 |     "_defaultOrder": 2,
270 |     "_isFastLaunch": false,
271 |     "category": "General purpose",
272 |     "gpuNum": 0,
273 |     "hideHardwareSpecs": false,
274 |     "memoryGiB": 16,
275 |     "name": "ml.t3.xlarge",
276 |     "vcpuNum": 4
277 |    },
278 |    {
279 |     "_defaultOrder": 3,
280 |     "_isFastLaunch": false,
281 |     "category": "General purpose",
282 |     "gpuNum": 0,
283 |     "hideHardwareSpecs": false,
284 |     "memoryGiB": 32,
285 |     "name": "ml.t3.2xlarge",
286 |     "vcpuNum": 8
287 |    },
288 |    {
289 |     "_defaultOrder": 4,
290 |     "_isFastLaunch": true,
291 |     "category": "General purpose",
292 |     "gpuNum": 0,
293 |     "hideHardwareSpecs": false,
294 |     "memoryGiB": 8,
295 |     "name": "ml.m5.large",
296 |     "vcpuNum": 2
297 |    },
298 |    {
299 |     "_defaultOrder": 5,
300 |     "_isFastLaunch": false,
301 |     "category": "General purpose",
302 |     "gpuNum": 0,
303 |     "hideHardwareSpecs": false,
304 |     "memoryGiB": 16,
305 |     "name": "ml.m5.xlarge",
306 |     "vcpuNum": 4
307 |    },
308 |    {
309 |     "_defaultOrder": 6,
310 |     "_isFastLaunch": false,
311 |     "category": "General purpose",
312 |     "gpuNum": 0,
313 |     "hideHardwareSpecs": false,
314 |     "memoryGiB": 32,
315 |     "name": "ml.m5.2xlarge",
316 |     "vcpuNum": 8
317 |    },
318 |    {
319 |     "_defaultOrder": 7,
320 |     "_isFastLaunch": false,
321 |     "category": "General purpose",
322 |     "gpuNum": 0,
323 |     "hideHardwareSpecs": false,
324 |     "memoryGiB": 64,
325 |     "name": "ml.m5.4xlarge",
326 |     "vcpuNum": 16
327 |    },
328 |    {
329 |     "_defaultOrder": 8,
330 |     "_isFastLaunch": false,
331 |     "category": "General purpose",
332 |     "gpuNum": 0,
333 |     "hideHardwareSpecs": false,
334 |     "memoryGiB": 128,
335 |     "name": "ml.m5.8xlarge",
336 |     "vcpuNum": 32
337 |    },
338 |    {
339 |     "_defaultOrder": 9,
340 |     "_isFastLaunch": false,
341 |     "category": "General purpose",
342 |     "gpuNum": 0,
343 |     "hideHardwareSpecs": false,
344 |     "memoryGiB": 192,
345 |     "name": "ml.m5.12xlarge",
346 |     "vcpuNum": 48
347 |    },
348 |    {
349 |     "_defaultOrder": 10,
350 |     "_isFastLaunch": false,
351 |     "category": "General purpose",
352 |     "gpuNum": 0,
353 |     "hideHardwareSpecs": false,
354 |     "memoryGiB": 256,
355 |     "name": "ml.m5.16xlarge",
356 |     "vcpuNum": 64
357 |    },
358 |    {
359 |     "_defaultOrder": 11,
360 |     "_isFastLaunch": false,
361 |     "category": "General purpose",
362 |     "gpuNum": 0,
363 |     "hideHardwareSpecs": false,
364 |     "memoryGiB": 384,
365 |     "name": "ml.m5.24xlarge",
366 |     "vcpuNum": 96
367 |    },
368 |    {
369 |     "_defaultOrder": 12,
370 |     "_isFastLaunch": false,
371 |     "category": "General purpose",
372 |     "gpuNum": 0,
373 |     "hideHardwareSpecs": false,
374 |     "memoryGiB": 8,
375 |     "name": "ml.m5d.large",
376 |     "vcpuNum": 2
377 |    },
378 |    {
379 |     "_defaultOrder": 13,
380 |     "_isFastLaunch": false,
381 |     "category": "General purpose",
382 |     "gpuNum": 0,
383 |     "hideHardwareSpecs": false,
384 |     "memoryGiB": 16,
385 |     "name": "ml.m5d.xlarge",
386 |     "vcpuNum": 4
387 |    },
388 |    {
389 |     "_defaultOrder": 14,
390 |     "_isFastLaunch": false,
391 |     "category": "General purpose",
392 |     "gpuNum": 0,
393 |     "hideHardwareSpecs": false,
394 |     "memoryGiB": 32,
395 |     "name": "ml.m5d.2xlarge",
396 |     "vcpuNum": 8
397 |    },
398 |    {
399 |     "_defaultOrder": 15,
400 |     "_isFastLaunch": false,
401 |     "category": "General purpose",
402 |     "gpuNum": 0,
403 |     "hideHardwareSpecs": false,
404 |     "memoryGiB": 64,
405 |     "name": "ml.m5d.4xlarge",
406 |     "vcpuNum": 16
407 |    },
408 |    {
409 |     "_defaultOrder": 16,
410 |     "_isFastLaunch": false,
411 |     "category": "General purpose",
412 |     "gpuNum": 0,
413 |     "hideHardwareSpecs": false,
414 |     "memoryGiB": 128,
415 |     "name": "ml.m5d.8xlarge",
416 |     "vcpuNum": 32
417 |    },
418 |    {
419 |     "_defaultOrder": 17,
420 |     "_isFastLaunch": false,
421 |     "category": "General purpose",
422 |     "gpuNum": 0,
423 |     "hideHardwareSpecs": false,
424 |     "memoryGiB": 192,
425 |     "name": "ml.m5d.12xlarge",
426 |     "vcpuNum": 48
427 |    },
428 |    {
429 |     "_defaultOrder": 18,
430 |     "_isFastLaunch": false,
431 |     "category": "General purpose",
432 |     "gpuNum": 0,
433 |     "hideHardwareSpecs": false,
434 |     "memoryGiB": 256,
435 |     "name": "ml.m5d.16xlarge",
436 |     "vcpuNum": 64
437 |    },
438 |    {
439 |     "_defaultOrder": 19,
440 |     "_isFastLaunch": false,
441 |     "category": "General purpose",
442 |     "gpuNum": 0,
443 |     "hideHardwareSpecs": false,
444 |     "memoryGiB": 384,
445 |     "name": "ml.m5d.24xlarge",
446 |     "vcpuNum": 96
447 |    },
448 |    {
449 |     "_defaultOrder": 20,
450 |     "_isFastLaunch": false,
451 |     "category": "General purpose",
452 |     "gpuNum": 0,
453 |     "hideHardwareSpecs": true,
454 |     "memoryGiB": 0,
455 |     "name": "ml.geospatial.interactive",
456 |     "supportedImageNames": [
457 |      "sagemaker-geospatial-v1-0"
458 |     ],
459 |     "vcpuNum": 0
460 |    },
461 |    {
462 |     "_defaultOrder": 21,
463 |     "_isFastLaunch": true,
464 |     "category": "Compute optimized",
465 |     "gpuNum": 0,
466 |     "hideHardwareSpecs": false,
467 |     "memoryGiB": 4,
468 |     "name": "ml.c5.large",
469 |     "vcpuNum": 2
470 |    },
471 |    {
472 |     "_defaultOrder": 22,
473 |     "_isFastLaunch": false,
474 |     "category": "Compute optimized",
475 |     "gpuNum": 0,
476 |     "hideHardwareSpecs": false,
477 |     "memoryGiB": 8,
478 |     "name": "ml.c5.xlarge",
479 |     "vcpuNum": 4
480 |    },
481 |    {
482 |     "_defaultOrder": 23,
483 |     "_isFastLaunch": false,
484 |     "category": "Compute optimized",
485 |     "gpuNum": 0,
486 |     "hideHardwareSpecs": false,
487 |     "memoryGiB": 16,
488 |     "name": "ml.c5.2xlarge",
489 |     "vcpuNum": 8
490 |    },
491 |    {
492 |     "_defaultOrder": 24,
493 |     "_isFastLaunch": false,
494 |     "category": "Compute optimized",
495 |     "gpuNum": 0,
496 |     "hideHardwareSpecs": false,
497 |     "memoryGiB": 32,
498 |     "name": "ml.c5.4xlarge",
499 |     "vcpuNum": 16
500 |    },
501 |    {
502 |     "_defaultOrder": 25,
503 |     "_isFastLaunch": false,
504 |     "category": "Compute optimized",
505 |     "gpuNum": 0,
506 |     "hideHardwareSpecs": false,
507 |     "memoryGiB": 72,
508 |     "name": "ml.c5.9xlarge",
509 |     "vcpuNum": 36
510 |    },
511 |    {
512 |     "_defaultOrder": 26,
513 |     "_isFastLaunch": false,
514 |     "category": "Compute optimized",
515 |     "gpuNum": 0,
516 |     "hideHardwareSpecs": false,
517 |     "memoryGiB": 96,
518 |     "name": "ml.c5.12xlarge",
519 |     "vcpuNum": 48
520 |    },
521 |    {
522 |     "_defaultOrder": 27,
523 |     "_isFastLaunch": false,
524 |     "category": "Compute optimized",
525 |     "gpuNum": 0,
526 |     "hideHardwareSpecs": false,
527 |     "memoryGiB": 144,
528 |     "name": "ml.c5.18xlarge",
529 |     "vcpuNum": 72
530 |    },
531 |    {
532 |     "_defaultOrder": 28,
533 |     "_isFastLaunch": false,
534 |     "category": "Compute optimized",
535 |     "gpuNum": 0,
536 |     "hideHardwareSpecs": false,
537 |     "memoryGiB": 192,
538 |     "name": "ml.c5.24xlarge",
539 |     "vcpuNum": 96
540 |    },
541 |    {
542 |     "_defaultOrder": 29,
543 |     "_isFastLaunch": true,
544 |     "category": "Accelerated computing",
545 |     "gpuNum": 1,
546 |     "hideHardwareSpecs": false,
547 |     "memoryGiB": 16,
548 |     "name": "ml.g4dn.xlarge",
549 |     "vcpuNum": 4
550 |    },
551 |    {
552 |     "_defaultOrder": 30,
553 |     "_isFastLaunch": false,
554 |     "category": "Accelerated computing",
555 |     "gpuNum": 1,
556 |     "hideHardwareSpecs": false,
557 |     "memoryGiB": 32,
558 |     "name": "ml.g4dn.2xlarge",
559 |     "vcpuNum": 8
560 |    },
561 |    {
562 |     "_defaultOrder": 31,
563 |     "_isFastLaunch": false,
564 |     "category": "Accelerated computing",
565 |     "gpuNum": 1,
566 |     "hideHardwareSpecs": false,
567 |     "memoryGiB": 64,
568 |     "name": "ml.g4dn.4xlarge",
569 |     "vcpuNum": 16
570 |    },
571 |    {
572 |     "_defaultOrder": 32,
573 |     "_isFastLaunch": false,
574 |     "category": "Accelerated computing",
575 |     "gpuNum": 1,
576 |     "hideHardwareSpecs": false,
577 |     "memoryGiB": 128,
578 |     "name": "ml.g4dn.8xlarge",
579 |     "vcpuNum": 32
580 |    },
581 |    {
582 |     "_defaultOrder": 33,
583 |     "_isFastLaunch": false,
584 |     "category": "Accelerated computing",
585 |     "gpuNum": 4,
586 |     "hideHardwareSpecs": false,
587 |     "memoryGiB": 192,
588 |     "name": "ml.g4dn.12xlarge",
589 |     "vcpuNum": 48
590 |    },
591 |    {
592 |     "_defaultOrder": 34,
593 |     "_isFastLaunch": false,
594 |     "category": "Accelerated computing",
595 |     "gpuNum": 1,
596 |     "hideHardwareSpecs": false,
597 |     "memoryGiB": 256,
598 |     "name": "ml.g4dn.16xlarge",
599 |     "vcpuNum": 64
600 |    },
601 |    {
602 |     "_defaultOrder": 35,
603 |     "_isFastLaunch": false,
604 |     "category": "Accelerated computing",
605 |     "gpuNum": 1,
606 |     "hideHardwareSpecs": false,
607 |     "memoryGiB": 61,
608 |     "name": "ml.p3.2xlarge",
609 |     "vcpuNum": 8
610 |    },
611 |    {
612 |     "_defaultOrder": 36,
613 |     "_isFastLaunch": false,
614 |     "category": "Accelerated computing",
615 |     "gpuNum": 4,
616 |     "hideHardwareSpecs": false,
617 |     "memoryGiB": 244,
618 |     "name": "ml.p3.8xlarge",
619 |     "vcpuNum": 32
620 |    },
621 |    {
622 |     "_defaultOrder": 37,
623 |     "_isFastLaunch": false,
624 |     "category": "Accelerated computing",
625 |     "gpuNum": 8,
626 |     "hideHardwareSpecs": false,
627 |     "memoryGiB": 488,
628 |     "name": "ml.p3.16xlarge",
629 |     "vcpuNum": 64
630 |    },
631 |    {
632 |     "_defaultOrder": 38,
633 |     "_isFastLaunch": false,
634 |     "category": "Accelerated computing",
635 |     "gpuNum": 8,
636 |     "hideHardwareSpecs": false,
637 |     "memoryGiB": 768,
638 |     "name": "ml.p3dn.24xlarge",
639 |     "vcpuNum": 96
640 |    },
641 |    {
642 |     "_defaultOrder": 39,
643 |     "_isFastLaunch": false,
644 |     "category": "Memory Optimized",
645 |     "gpuNum": 0,
646 |     "hideHardwareSpecs": false,
647 |     "memoryGiB": 16,
648 |     "name": "ml.r5.large",
649 |     "vcpuNum": 2
650 |    },
651 |    {
652 |     "_defaultOrder": 40,
653 |     "_isFastLaunch": false,
654 |     "category": "Memory Optimized",
655 |     "gpuNum": 0,
656 |     "hideHardwareSpecs": false,
657 |     "memoryGiB": 32,
658 |     "name": "ml.r5.xlarge",
659 |     "vcpuNum": 4
660 |    },
661 |    {
662 |     "_defaultOrder": 41,
663 |     "_isFastLaunch": false,
664 |     "category": "Memory Optimized",
665 |     "gpuNum": 0,
666 |     "hideHardwareSpecs": false,
667 |     "memoryGiB": 64,
668 |     "name": "ml.r5.2xlarge",
669 |     "vcpuNum": 8
670 |    },
671 |    {
672 |     "_defaultOrder": 42,
673 |     "_isFastLaunch": false,
674 |     "category": "Memory Optimized",
675 |     "gpuNum": 0,
676 |     "hideHardwareSpecs": false,
677 |     "memoryGiB": 128,
678 |     "name": "ml.r5.4xlarge",
679 |     "vcpuNum": 16
680 |    },
681 |    {
682 |     "_defaultOrder": 43,
683 |     "_isFastLaunch": false,
684 |     "category": "Memory Optimized",
685 |     "gpuNum": 0,
686 |     "hideHardwareSpecs": false,
687 |     "memoryGiB": 256,
688 |     "name": "ml.r5.8xlarge",
689 |     "vcpuNum": 32
690 |    },
691 |    {
692 |     "_defaultOrder": 44,
693 |     "_isFastLaunch": false,
694 |     "category": "Memory Optimized",
695 |     "gpuNum": 0,
696 |     "hideHardwareSpecs": false,
697 |     "memoryGiB": 384,
698 |     "name": "ml.r5.12xlarge",
699 |     "vcpuNum": 48
700 |    },
701 |    {
702 |     "_defaultOrder": 45,
703 |     "_isFastLaunch": false,
704 |     "category": "Memory Optimized",
705 |     "gpuNum": 0,
706 |     "hideHardwareSpecs": false,
707 |     "memoryGiB": 512,
708 |     "name": "ml.r5.16xlarge",
709 |     "vcpuNum": 64
710 |    },
711 |    {
712 |     "_defaultOrder": 46,
713 |     "_isFastLaunch": false,
714 |     "category": "Memory Optimized",
715 |     "gpuNum": 0,
716 |     "hideHardwareSpecs": false,
717 |     "memoryGiB": 768,
718 |     "name": "ml.r5.24xlarge",
719 |     "vcpuNum": 96
720 |    },
721 |    {
722 |     "_defaultOrder": 47,
723 |     "_isFastLaunch": false,
724 |     "category": "Accelerated computing",
725 |     "gpuNum": 1,
726 |     "hideHardwareSpecs": false,
727 |     "memoryGiB": 16,
728 |     "name": "ml.g5.xlarge",
729 |     "vcpuNum": 4
730 |    },
731 |    {
732 |     "_defaultOrder": 48,
733 |     "_isFastLaunch": false,
734 |     "category": "Accelerated computing",
735 |     "gpuNum": 1,
736 |     "hideHardwareSpecs": false,
737 |     "memoryGiB": 32,
738 |     "name": "ml.g5.2xlarge",
739 |     "vcpuNum": 8
740 |    },
741 |    {
742 |     "_defaultOrder": 49,
743 |     "_isFastLaunch": false,
744 |     "category": "Accelerated computing",
745 |     "gpuNum": 1,
746 |     "hideHardwareSpecs": false,
747 |     "memoryGiB": 64,
748 |     "name": "ml.g5.4xlarge",
749 |     "vcpuNum": 16
750 |    },
751 |    {
752 |     "_defaultOrder": 50,
753 |     "_isFastLaunch": false,
754 |     "category": "Accelerated computing",
755 |     "gpuNum": 1,
756 |     "hideHardwareSpecs": false,
757 |     "memoryGiB": 128,
758 |     "name": "ml.g5.8xlarge",
759 |     "vcpuNum": 32
760 |    },
761 |    {
762 |     "_defaultOrder": 51,
763 |     "_isFastLaunch": false,
764 |     "category": "Accelerated computing",
765 |     "gpuNum": 1,
766 |     "hideHardwareSpecs": false,
767 |     "memoryGiB": 256,
768 |     "name": "ml.g5.16xlarge",
769 |     "vcpuNum": 64
770 |    },
771 |    {
772 |     "_defaultOrder": 52,
773 |     "_isFastLaunch": false,
774 |     "category": "Accelerated computing",
775 |     "gpuNum": 4,
776 |     "hideHardwareSpecs": false,
777 |     "memoryGiB": 192,
778 |     "name": "ml.g5.12xlarge",
779 |     "vcpuNum": 48
780 |    },
781 |    {
782 |     "_defaultOrder": 53,
783 |     "_isFastLaunch": false,
784 |     "category": "Accelerated computing",
785 |     "gpuNum": 4,
786 |     "hideHardwareSpecs": false,
787 |     "memoryGiB": 384,
788 |     "name": "ml.g5.24xlarge",
789 |     "vcpuNum": 96
790 |    },
791 |    {
792 |     "_defaultOrder": 54,
793 |     "_isFastLaunch": false,
794 |     "category": "Accelerated computing",
795 |     "gpuNum": 8,
796 |     "hideHardwareSpecs": false,
797 |     "memoryGiB": 768,
798 |     "name": "ml.g5.48xlarge",
799 |     "vcpuNum": 192
800 |    },
801 |    {
802 |     "_defaultOrder": 55,
803 |     "_isFastLaunch": false,
804 |     "category": "Accelerated computing",
805 |     "gpuNum": 8,
806 |     "hideHardwareSpecs": false,
807 |     "memoryGiB": 1152,
808 |     "name": "ml.p4d.24xlarge",
809 |     "vcpuNum": 96
810 |    },
811 |    {
812 |     "_defaultOrder": 56,
813 |     "_isFastLaunch": false,
814 |     "category": "Accelerated computing",
815 |     "gpuNum": 8,
816 |     "hideHardwareSpecs": false,
817 |     "memoryGiB": 1152,
818 |     "name": "ml.p4de.24xlarge",
819 |     "vcpuNum": 96
820 |    }
821 |   ],
822 |   "instance_type": "ml.t3.medium",
823 |   "kernelspec": {
824 |    "display_name": "Python 3 (Data Science)",
825 |    "language": "python",
826 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0"
827 |   },
828 |   "language_info": {
829 |    "codemirror_mode": {
830 |     "name": "ipython",
831 |     "version": 3
832 |    },
833 |    "file_extension": ".py",
834 |    "mimetype": "text/x-python",
835 |    "name": "python",
836 |    "nbconvert_exporter": "python",
837 |    "pygments_lexer": "ipython3",
838 |    "version": "3.7.10"
839 |   }
840 |  },
841 |  "nbformat": 4,
842 |  "nbformat_minor": 5
843 | }
844 | 


--------------------------------------------------------------------------------
/05-lambda-handler.py:
--------------------------------------------------------------------------------
  1 | from boto3.dynamodb.conditions import Key
  2 | from requests.auth import HTTPBasicAuth
  3 | import requests
  4 | import logging
  5 | import boto3
  6 | import json
  7 | import os
  8 | 
  9 | 
 10 | # Set up logger
 11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 12 |                     datefmt='%Y-%m-%d %H:%M:%S')
 13 | logger = logging.getLogger('log')
 14 | 
 15 | # Create service clients
 16 | dynamodb = boto3.resource('dynamodb')
 17 | sagemaker_runtime = boto3.client('sagemaker-runtime')
 18 | 
 19 | # Reference SageMaker JumpStart endpoints
 20 | domain_endpoint = os.environ['OS_ENDPOINT']
 21 | domain_index = os.environ['OS_INDEX_NAME']
 22 | os_username = os.environ['OS_USERNAME']
 23 | os_password = os.environ['OS_PASSWORD']
 24 | 
 25 | # Reference Amazon OpenSearch endpoint
 26 | URL = f'{domain_endpoint}/{domain_index}'
 27 | 
 28 | # Set LLM generation configs
 29 | MAX_LENGTH = 512
 30 | NUM_RETURN_SEQUENCES = 1
 31 | TOP_K = 0
 32 | TOP_P = 0.7
 33 | DO_SAMPLE = True
 34 | CONTENT_TYPE = 'application/json'
 35 | TEMPERATURE = 0.1
 36 | 
 37 | 
 38 | 
 39 | def lambda_handler(event: dict, context: dict) -> None:
 40 |     logger.info(f'Received event: {event}')
 41 |     logger.info(f'Received context: {context}')
 42 | 
 43 |     for record in event['Records']:
 44 |         if record['eventName'] == 'MODIFY':
 45 |             session_item = record['dynamodb']['NewImage']
 46 |             session_id = session_item['session_id']['S']
 47 |             end_time = session_item['end_time']['N']
 48 | 
 49 |             # Query the conversations table
 50 |             conversation_turns = query_conversations_table(session_id)
 51 | 
 52 |             # Flatten the conversation turns into a dict
 53 |             flattened_conversations = flatten_conversations(conversation_turns)
 54 | 
 55 |             summary = summarize_conversations(flattened_conversations)
 56 | 
 57 |             # Encode the dict into an embedding
 58 |             embedding = encode_conversations(summary)
 59 | 
 60 |             # Write the embedding to Elasticsearch
 61 |             write_to_elasticsearch(session_id, embedding, end_time, summary)
 62 | 
 63 |             print(f"Session {session_id} was persisted to long term memory")
 64 | 
 65 | 
 66 | def query_conversations_table(session_id: str) -> list:
 67 |     table = dynamodb.Table('conversations')
 68 |     response = table.query(KeyConditionExpression=Key('session_id').eq(session_id))
 69 |     return response['Items']
 70 | 
 71 | 
 72 | def flatten_conversations(conversation_turns: list) -> dict:
 73 |     flattened_conversations = {'conversation': ''}
 74 |     for turn in conversation_turns:
 75 |         user_message = turn['Me']
 76 |         bot_message = turn['AI']
 77 |         flattened_conversations['conversation'] += f"{user_message} {bot_message} "
 78 |     return flattened_conversations
 79 | 
 80 | 
 81 | def summarize_conversations(conversation: str) -> str:
 82 |     logger.info('Conversation: {conversation}')
 83 |     prompt = f"""Conversation==hi there! I'm doing well, thank you. what is the meaning of eminent domain? Eminent domain is the power of the government to take private property for public use, with just compensation. 
 84 | Summary==We discussed about the meaning of eminent domain and that it is the government's power to take private property for public use with just compensation. 
 85 | 
 86 | Conversation==Hey! I'm feeling great, how about you? Can you tell me what is the concept of due diligence? Due diligence is a comprehensive investigation or appraisal of a business or person before entering into an agreement or transaction. 
 87 | Summary==We discussed about the meaning of due diligence and that it is a comprehensive investigation or appraisal of a business or person before entering into an agreement or transaction. 
 88 | 
 89 | Conversation==hello! I'm good, thank you for asking. What is the definition of fiduciary duty? Fiduciary duty is a legal obligation of one party to act in the best interests of another, often in financial or legal matters. 
 90 | Summary==We talked about the meaning of fiduciary duty and that it is a legal obligation of one party to act in the best interests of another, often in financial or legal matters. 
 91 | 
 92 | Conversation=={conversation}
 93 | Summary==
 94 | 
 95 | 
 96 | Summarize the above Conversation as a short paragraph in 3 to 4 sentences."""
 97 |     payload = {'text_inputs': prompt,
 98 |                'max_length': MAX_LENGTH,
 99 |                'temperature': TEMPERATURE,
100 |                'num_return_sequences': NUM_RETURN_SEQUENCES,
101 |                'top_k': TOP_K,
102 |                'top_p': TOP_P,
103 |                'do_sample': DO_SAMPLE}
104 |     payload = json.dumps(payload).encode('utf-8')
105 |     response = sagemaker_runtime.invoke_endpoint(EndpointName=os.environ['SAGEMAKER_TEXT_GEN_ENDPOINT'],
106 |                                                  ContentType=CONTENT_TYPE,
107 |                                                  Body=payload)
108 |     model_predictions = json.loads(response['Body'].read())
109 |     generated_text = model_predictions['generated_texts'][0]
110 |     logger.info(f'Summary: {generated_text}')
111 |     return generated_text
112 | 
113 | 
114 | def encode_conversations(summary: str) -> list:
115 |     payload = {'text_inputs': [summary]}
116 |     payload = json.dumps(payload).encode('utf-8')
117 |     response = sagemaker_runtime.invoke_endpoint(EndpointName=os.environ['SAGEMAKER_TEXT_EMBED_ENDPOINT'],
118 |                                                  ContentType='application/json',
119 |                                                  Body=payload)
120 |     body = json.loads(response['Body'].read())
121 |     embedding = body['embedding'][0]
122 |     return embedding
123 | 
124 | 
125 | def write_to_elasticsearch(session_id: str, embedding: list, end_time: int, summary: str) -> None:
126 |     document = {
127 |         'session_id': session_id,
128 |         'embedding': embedding,
129 |         'created_at': end_time,
130 |         'conversation_summary': summary
131 |     }
132 |     
133 |     try:
134 |         response = requests.post(f'{URL}/_doc/{session_id}', auth=HTTPBasicAuth(os_username, os_password),
135 |                                  json=document)
136 |         if response.status_code not in [200, 201]:
137 |             logger.error(response.status_code)
138 |             logger.error(response.text)
139 |     except Exception as e:
140 |         logger.error(e)
141 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AWS SageMaker Chatbot 
2 | Build a context-aware chatbot with short and long-term memory using AWS SageMaker and other AWS services for improved user interactions.
3 | <br>
4 | <br>
5 | ![Cognitive Architecture AWS](./img/cognition.png)
6 | <br>
7 | <br>


--------------------------------------------------------------------------------
/chatbot-app/app.py:
--------------------------------------------------------------------------------
  1 | from retrieve import retrieve_top_matching_past_conversations
  2 | from llm import summarize_passages_and_collate_answers
  3 | from retrieve import retrieve_top_matching_passages
  4 | from ddb import get_conversations_by_session_id
  5 | from llm import generate_dialogue_response
  6 | from ddb import add_conversation_turn
  7 | from ddb import create_session
  8 | from ddb import end_session
  9 | from llm import detect_task
 10 | import streamlit as st
 11 | import logging
 12 | import boto3
 13 | 
 14 | 
 15 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 16 |                     datefmt='%Y-%m-%d %H:%M:%S')
 17 | logger = logging.getLogger('log')
 18 | 
 19 | 
 20 | # Set Streamlit page configuration
 21 | st.set_page_config(page_title='ai-assistant', layout='wide')
 22 | 
 23 | dynamodb = boto3.resource('dynamodb')
 24 | 
 25 | # Initialize session states
 26 | if 'generated' not in st.session_state:
 27 |     st.session_state['generated'] = []
 28 | if 'past' not in st.session_state:
 29 |     st.session_state['past'] = []
 30 | if 'input' not in st.session_state:
 31 |     st.session_state['input'] = ''
 32 | if 'stored_session' not in st.session_state:
 33 |     st.session_state['stored_session'] = []
 34 | 
 35 | 
 36 | def get_text_input():
 37 |     """
 38 |     Get the user inputted text.
 39 |     :return: Text entered by the user
 40 |     """
 41 |     text_input = st.text_input('You: ',
 42 |                                st.session_state['input'],
 43 |                                key='input',
 44 |                                placeholder='Your AI assistant here! Ask me anything ...',
 45 |                                label_visibility='hidden')
 46 |     return text_input
 47 | 
 48 | 
 49 | def new_session():
 50 |     """
 51 |     Clears session state and starts a new session.
 52 |     """
 53 |     # End current session and update sessions table in DynamoDB
 54 |     table_name = 'sessions'
 55 |     table = dynamodb.Table(table_name)
 56 |     end_session(table, st.session_state.session_id)
 57 | 
 58 |     save = []
 59 |     for j in range(len(st.session_state['generated']) - 1, -1, -1):
 60 |         save.append(f"User: {st.session_state['past'][j]}")
 61 |         save.append(f"Bot: {st.session_state['generated'][j]}")
 62 | 
 63 |     st.session_state['stored_session'].append(save)
 64 |     st.session_state['generated'] = []
 65 |     st.session_state['past'] = []
 66 |     st.session_state['input'] = ''
 67 | 
 68 | 
 69 | # Set up sidebar with various options
 70 | with st.sidebar.expander('🛠️', expanded=True):
 71 |     max_turns = st.number_input('Number of turns to remember',
 72 |                                 min_value=1,
 73 |                                 max_value=100)
 74 | 
 75 | # Set up the Streamlit app layout
 76 | st.title('🤖 AI Assistant 🧠')
 77 | st.subheader('Powered by ☁️ AWS')
 78 | 
 79 | # Add a button to start a new chat
 80 | st.sidebar.button('New Session', on_click=new_session, type='primary')
 81 | 
 82 | # Get the user input
 83 | user_input = get_text_input()
 84 | 
 85 | sessions_table = dynamodb.Table('sessions')
 86 | conversations_table = dynamodb.Table('conversations')
 87 | 
 88 | 
 89 | def respond_by_task(query, history):
 90 |     logger.info(f'HISTORY: {history}')
 91 |     task_type = detect_task(query)
 92 |     logger.info(f'TASK TYPE = {task_type}')
 93 |     completion = None
 94 |     if task_type == 'STM CHAT':
 95 |         if len(history) > 0:
 96 |             prompt = f"""{history}
 97 | Me: {user_input}
 98 | AI:"""
 99 |             logger.info(f'Prompt: {prompt}')
100 |             completion = generate_dialogue_response(prompt)
101 |         else:
102 |             prompt = f"""Me: {user_input}
103 | AI:"""
104 |             logger.info(f'Prompt: {prompt}')
105 |             completion = generate_dialogue_response(prompt)
106 |     elif task_type == 'LTM PAST CONVERSATIONS':
107 |         completion = retrieve_top_matching_past_conversations(user_input, 'conversations')
108 |         completion = '\n\n'.join(completion)
109 |     elif task_type == 'LTM VERIFIED SOURCES':
110 |         completion = retrieve_top_matching_passages(user_input, 'passages')
111 |         completion = summarize_passages_and_collate_answers(completion, user_input)
112 |     return completion
113 | 
114 | 
115 | def transform_ddb_past_history(history: list, num_turns=10) -> str:
116 |     past_hist = []
117 |     for turn in history:
118 |         me_utterance = turn['Me']
119 |         bot_utterance = turn['AI']
120 |         past_hist.append(f'Me: {me_utterance}')
121 |         past_hist.append(f'AI: {bot_utterance}')
122 |     past_hist = past_hist[-num_turns*2:]
123 |     past_hist_str = '\n'.join(past_hist)
124 |     return past_hist_str
125 | 
126 | 
127 | if user_input:
128 |     user_utterance = st.session_state['input']
129 |     ai_utterance = st.session_state['generated']
130 |     if len(ai_utterance) == 0:
131 |         # Start a new session
132 |         st.session_state.session_id = create_session(sessions_table)
133 | 
134 |     past_history = get_conversations_by_session_id(conversations_table, st.session_state.session_id)
135 |     past_history = transform_ddb_past_history(past_history, max_turns)
136 |     output = respond_by_task(user_input, past_history)
137 | 
138 |     st.session_state.past.append(user_input)
139 |     st.session_state.generated.append(output)
140 | 
141 |     ai_utterance = st.session_state['generated'][-1]
142 |     add_conversation_turn(conversations_table, st.session_state.session_id, user_utterance, ai_utterance)
143 | 
144 | # Display the conversation history using an expander, and allow the user to download it
145 | download_str = []
146 | with st.expander('Conversation', expanded=True):
147 |     for i in range(len(st.session_state['generated']) - 1, -1, -1):
148 |         st.info(st.session_state['past'][i], icon='🧐')
149 |         st.success(st.session_state['generated'][i], icon='🤖')
150 |         download_str.append(f"Human: {st.session_state['past'][i]}")
151 |         download_str.append(f"AI: {st.session_state['generated'][i]}")
152 | 
153 |     download_str = '\n'.join(download_str)
154 |     if download_str:
155 |         st.download_button('Download', download_str)
156 | 
157 | # Display stored conversation sessions in the sidebar
158 | for i, sublist in enumerate(st.session_state.stored_session):
159 |     with st.sidebar.expander(label=f'Conversation Session:{i}'):
160 |         st.write(sublist)
161 | 
162 | 
163 | def del_sessions():
164 |     del st.session_state.stored_session
165 | 
166 | 
167 | # Allow the user to clear all stored conversation sessions
168 | if st.session_state.stored_session:
169 |     st.sidebar.button('Clear All', on_click=del_sessions, type='primary')
170 | 


--------------------------------------------------------------------------------
/chatbot-app/config/config.yml:
--------------------------------------------------------------------------------
 1 | opensearch:
 2 |     credentials:
 3 |         username: xxxxxxxx
 4 |         password: xxxxxxxx
 5 |     domain:
 6 |         endpoint: https://xxxxxxxx.us-east-1.es.amazonaws.com
 7 | jumpstart:
 8 |     text_gen_endpoint_name: xxxxxxxx
 9 |     text_embed_endpoint_name: xxxxxxxx
10 |     


--------------------------------------------------------------------------------
/chatbot-app/ddb.py:
--------------------------------------------------------------------------------
  1 | from boto3.dynamodb.conditions import Key
  2 | import logging
  3 | import boto3
  4 | import time
  5 | import uuid
  6 | 
  7 | 
  8 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
  9 |                     datefmt='%Y-%m-%d %H:%M:%S')
 10 | logger = logging.getLogger('log')
 11 | 
 12 | client = boto3.resource('dynamodb')
 13 | 
 14 | 
 15 | def add_conversation_turn(table, session_id, user, bot):
 16 |     timestamp = int(time.time() * 1000)
 17 |     table.put_item(
 18 |         Item={
 19 |             'session_id': session_id,
 20 |             'timestamp': timestamp,
 21 |             'Me': user,
 22 |             'AI': bot
 23 |         }
 24 |     )
 25 | 
 26 | 
 27 | def get_conversations_by_session_id(table, session_id, descending=True):
 28 |     response = table.query(
 29 |         KeyConditionExpression=Key('session_id').eq(session_id),
 30 |         ScanIndexForward=descending
 31 |     )
 32 |     return response['Items']
 33 | 
 34 | 
 35 | def delete_conversation(table, session_id, timestamp):
 36 |     table.delete_item(
 37 |         Key={
 38 |             'session_id': session_id,
 39 |             'timestamp': timestamp
 40 |         }
 41 |     )
 42 | 
 43 | 
 44 | def create_session(table):
 45 |     session_id = str(uuid.uuid4())
 46 |     start_time = int(time.time() * 1000)
 47 |     table.put_item(
 48 |         Item={
 49 |             'session_id': session_id,
 50 |             'start_time': start_time,
 51 |             'end_time': None,
 52 |             'num_turns': 0
 53 |         }
 54 |     )
 55 |     return session_id
 56 | 
 57 | 
 58 | def end_session(table, session_id):
 59 |     end_time = int(time.time() * 1000)
 60 |     start_time_response = table.get_item(
 61 |         Key={'session_id': session_id}
 62 |     )
 63 |     start_time = start_time_response['Item']['start_time']
 64 |     num_turns = len(get_conversations_by_session_id(table, session_id))
 65 |     conversation_duration = end_time - start_time  # Compute duration in milliseconds
 66 | 
 67 |     table.update_item(
 68 |         Key={'session_id': session_id},
 69 |         UpdateExpression="SET end_time = :end_time, num_turns = :num_turns, conversation_duration = "
 70 |                          ":conversation_duration",
 71 |         ExpressionAttributeValues={
 72 |             ':end_time': end_time,
 73 |             ':num_turns': num_turns,
 74 |             ':conversation_duration': conversation_duration
 75 |         }
 76 |     )
 77 | 
 78 | 
 79 | if __name__ == '__main__':
 80 |     # Start a new session
 81 |     table_name = 'sessions'
 82 | 
 83 |     # Get the table instance
 84 |     table_ = client.Table(table_name)
 85 |     session_id_ = create_session(table_)
 86 | 
 87 |     # Add conversation turns
 88 |     table_name = 'conversations'
 89 |     table_ = client.Table(table_name)
 90 |     add_conversation_turn(table_, session_id_, 'hi', 'hello')
 91 |     add_conversation_turn(table_, session_id_, 'how are you?', 'i am fine')
 92 |     add_conversation_turn(table_, session_id_, 'what is the definition of court defamation?',
 93 |                           'Court defamation is a type of '
 94 |                           'civil wrong.')
 95 | 
 96 |     # End the session
 97 |     table_name = 'sessions'
 98 |     table_ = client.Table(table_name)
 99 |     end_session(table_, session_id_)
100 | 


--------------------------------------------------------------------------------
/chatbot-app/llm.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import boto3
 3 | import json
 4 | import yaml
 5 | 
 6 | 
 7 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 8 |                     datefmt='%Y-%m-%d %H:%M:%S')
 9 | logger = logging.getLogger('log')
10 | 
11 | 
12 | with open('./config/config.yml', 'r') as f:
13 |     config = yaml.safe_load(f)
14 | 
15 | endpoint_name = config['jumpstart']['text_gen_endpoint_name']
16 | CONTENT_TYPE = 'application/json'
17 | 
18 | client = boto3.client('sagemaker-runtime')
19 | 
20 | NUM_RETURN_SEQUENCES = 1
21 | TOP_K = 0
22 | TOP_P = 0.7
23 | DO_SAMPLE = True
24 | TEMPERATURE = 0.1
25 | 
26 | 
27 | def detect_task(query: str) -> str:
28 |     if query.startswith('\\verified') or query.startswith('/verified'):
29 |         return 'LTM VERIFIED SOURCES'
30 |     elif query.startswith('\\past') or query.startswith('/past'):
31 |         return 'LTM PAST CONVERSATIONS'
32 |     else:
33 |         return 'STM CHAT'
34 | 
35 | 
36 | def generate(prompt: str, max_length=256) -> str:
37 |     payload = {'text_inputs': prompt,
38 |                'max_length': max_length,
39 |                'num_return_sequences': NUM_RETURN_SEQUENCES,
40 |                'top_k': TOP_K,
41 |                'top_p': TOP_P,
42 |                'temperature': TEMPERATURE,
43 |                'do_sample': DO_SAMPLE}
44 |     payload = json.dumps(payload).encode('utf-8')
45 |     response = client.invoke_endpoint(EndpointName=ENDPOINT_NAME,
46 |                                       ContentType=CONTENT_TYPE,
47 |                                       Body=payload)
48 |     model_predictions = json.loads(response['Body'].read())
49 |     generated_text = model_predictions['generated_texts'][0]
50 |     completion = generated_text.strip()
51 |     return completion
52 | 
53 | 
54 | def summarize_passages_and_collate_answers(passages: list, query: str) -> str:
55 |     collated_answers = []
56 |     for passage, doc_id, passage_id in passages:
57 |         prompt = f'Passage=={passage}\n\nQuestion=={query}\n\nAnswer==\n\nGiven a passage and a question, generate ' \
58 |                  f'a clean answer in 2 to 3 short complete sentences. '
59 |         answer = generate(prompt, 256)
60 |         collated_answers.append(f'{answer}\n\n[doc = {doc_id} | passage = {passage_id}]')
61 |     collated_answers = '\n\n'.join(collated_answers)
62 |     logger.info(f'ANSWERS: {collated_answers}')
63 |     return collated_answers
64 | 
65 | 
66 | def generate_dialogue_response(prompt: str) -> str:
67 |     completion = generate(prompt, 256)
68 |     logger.info(f'DIALOGUE RESPONSE: {completion}')
69 |     return completion
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     completion_ = detect_task('definition of bribery by indian law ')
74 |     logging.info(completion_)
75 | 


--------------------------------------------------------------------------------
/chatbot-app/retrieve.py:
--------------------------------------------------------------------------------
  1 | from requests.auth import HTTPBasicAuth
  2 | import datetime
  3 | import requests
  4 | import logging
  5 | import boto3
  6 | import yaml
  7 | import json
  8 | 
  9 | 
 10 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
 11 |                     datefmt='%Y-%m-%d %H:%M:%S')
 12 | logger = logging.getLogger('log')
 13 | 
 14 | 
 15 | with open('./config/config.yml', 'r') as file:
 16 |     config = yaml.safe_load(file)
 17 | 
 18 | os_username = config['opensearch']['credentials']['username']
 19 | os_password = config['opensearch']['credentials']['password']
 20 | domain_endpoint = config['opensearch']['domain']['endpoint']
 21 | text_embedding_model_endpoint_name = config['jumpstart']['text_embed_endpoint_name']
 22 | CONTENT_TYPE = 'application/json'
 23 | 
 24 | sagemaker_client = boto3.client('runtime.sagemaker')
 25 | 
 26 | 
 27 | def encode_query(query: str) -> list:
 28 |     payload = {'text_inputs': [query]}
 29 |     payload = json.dumps(payload).encode('utf-8')
 30 |     response = sagemaker_client.invoke_endpoint(EndpointName=text_embedding_model_endpoint_name,
 31 |                                                 ContentType='application/json',
 32 |                                                 Body=payload)
 33 |     body = json.loads(response['Body'].read())
 34 |     embedding = body['embedding'][0]
 35 |     return embedding
 36 | 
 37 | 
 38 | def get_es_query(embedding: list, k) -> dict:
 39 |     query = {
 40 |         'size': k,
 41 |         'query': {
 42 |             'knn': {
 43 |                 'embedding': {
 44 |                     'vector': embedding,
 45 |                     'k': k
 46 |                 }
 47 |             }
 48 |         }
 49 |     }
 50 |     return query
 51 | 
 52 | 
 53 | def retrieve_top_matching_passages(query: str, index: str) -> list:
 54 |     passages = []
 55 |     embedding = encode_query(query)
 56 |     query = get_es_query(embedding, 3)
 57 |     url = f'{domain_endpoint}/{index}/_search'
 58 |     response = requests.post(url, auth=HTTPBasicAuth(os_username, os_password), json=query)
 59 |     response_json = response.json()
 60 |     hits = response_json['hits']['hits']
 61 |     for hit in hits:
 62 |         # score = hit['_score']
 63 |         passage = hit['_source']['passage']
 64 |         doc_id = hit['_source']['doc_id']
 65 |         passage_id = hit['_source']['passage_id']
 66 |         passages.append([passage, doc_id, passage_id])
 67 |     return passages
 68 | 
 69 | 
 70 | def retrieve_top_matching_past_conversations(query: str, index: str) -> list:
 71 |     past_conversations = {}
 72 |     embedding = encode_query(query)
 73 |     query = get_es_query(embedding, 3)
 74 |     url = f'{domain_endpoint}/{index}/_search'
 75 |     response = requests.post(url, auth=HTTPBasicAuth(os_username, os_password), json=query)
 76 |     response_json = response.json()
 77 |     hits = response_json['hits']['hits']
 78 | 
 79 |     for hit in hits:
 80 |         # score = hit['_score']
 81 |         conversation_summary = hit['_source']['conversation_summary']
 82 |         created_at_ms = hit['_source']['created_at']
 83 |         created_at = datetime.datetime.fromtimestamp(int(created_at_ms) / 1000.0)
 84 |         created_at = created_at.strftime('%Y-%m-%d %H:%M:%S')
 85 |         date, time = created_at.split(' ')
 86 |         # session_id = hit['_source']['session_id']
 87 |         summary = f'[{date}][{time}] {conversation_summary}'
 88 |         past_conversations[int(created_at_ms)] = summary
 89 | 
 90 |     sorted_past_conversations = {}
 91 |     for key in sorted(past_conversations.keys()):
 92 |         sorted_past_conversations[key] = past_conversations[key]
 93 | 
 94 |     sorted_conversations = list(sorted_past_conversations.values())
 95 |     sorted_conversations.reverse()
 96 |     return sorted_conversations
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     matches = retrieve_top_matching_past_conversations('court defamation', 'conversations')
101 |     logger.info(matches)
102 |     matches = retrieve_top_matching_passages('court defamation', 'legal-passages')
103 |     logger.info(matches)
104 | 


--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
1 | credentials:
2 |     username: js-es
3 |     password: JumpStart123!
4 | domain:
5 |     endpoint: https://search-semantic-search-hryn56c5jy43yryimohz4ajvyi.us-east-1.es.amazonaws.com
6 |     index: conversations
7 | 


--------------------------------------------------------------------------------
/img/cognition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arunprsh/aws-sagemaker-chatbot-memory/cb38c5e9fec385df2e99a25df8f2c83ee0f401a7/img/cognition.png


--------------------------------------------------------------------------------
/lambda-env.csv:
--------------------------------------------------------------------------------
1 | Environment Variable,Value
2 | OS_ENDPOINT,https://xxxxxxxxx.us-east-1.es.amazonaws.com
3 | OS_INDEX_NAME,conversations
4 | OS_PASSWORD,xxxxxxxxx
5 | OS_USERNAME,xxxxxxxxx
6 | REGION,us-east-1
7 | SAGEMAKER_TEXT_EMBED_ENDPOINT,huggingface-textembedding-gpt-j-6b-fp16-xxxxxxxxx
8 | SAGEMAKER_TEXT_GEN_ENDPOINT,flan-xxl-xxxxxxxxx


--------------------------------------------------------------------------------