├── .gitignore ├── 01-deploy-text-embedding-model.ipynb ├── 02-deploy-text-generation-model.ipynb ├── 03-create-dynamodb-tables.ipynb ├── 04-create-os-index.ipynb ├── 05-lambda-handler.py ├── LICENSE ├── README.md ├── chatbot-app ├── app.py ├── config │ └── config.yml ├── ddb.py ├── llm.py └── retrieve.py ├── config.yml ├── img └── cognition.png └── lambda-env.csv /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /01-deploy-text-embedding-model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "3a4bc882-63fb-4049-8551-d0fa3127bd6f", 6 | "metadata": {}, 7 | "source": [ 8 | "## Deploy Text Embedding Model (GPT-J 6B FP-16)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "259bf0cc-c971-48c7-a537-6cc5958c9267", 14 | "metadata": {}, 15 | "source": [ 16 | "#### Imports" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "fa427a29-aa9d-411c-8ffc-7cada2feb57f", 23 | "metadata": { 24 | "tags": [] 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "from sagemaker.jumpstart.notebook_utils import list_jumpstart_models\n", 29 | "from sagemaker.predictor import Predictor\n", 30 | "from sagemaker import get_execution_role\n", 31 | "from sagemaker.model import Model\n", 32 | "from sagemaker import script_uris\n", 33 | "from sagemaker import image_uris \n", 34 | "from sagemaker import model_uris\n", 35 | "import sagemaker\n", 36 | "import logging\n", 37 | "import boto3\n", 38 | "import time\n", 39 | "import json" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "09080f41-3b2c-464e-9557-a9fae1313b63", 45 | "metadata": {}, 46 | "source": [ 47 | "##### Setup logging" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "id": "bde2a70f-471b-4eaf-9703-10fdcf67863e", 54 | "metadata": { 55 | "tags": [] 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "logger = logging.getLogger('sagemaker')\n", 60 | "logger.setLevel(logging.DEBUG)\n", 61 | "logger.addHandler(logging.StreamHandler())" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "id": "5372bb7c-ba1d-46af-ad4c-2e77503f422a", 67 | "metadata": {}, 68 | "source": [ 69 | "##### Log versions of dependencies" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "id": "cb81aa05-1479-48e3-97fa-e9fa5a3684a5", 76 | "metadata": { 77 | "tags": [] 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "logger.info(f'Using sagemaker=={sagemaker.__version__}')\n", 82 | "logger.info(f'Using boto3=={boto3.__version__}')" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "id": "df8a48e0-a23f-4fbd-9be2-811ae129622c", 88 | "metadata": {}, 89 | "source": [ 90 | "#### Setup essentials " 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "id": "93dc7315-4803-440d-8dfb-e36f51c0a54f", 96 | "metadata": {}, 97 | "source": [ 98 | "##### List and filter all text embedding models available in JumpStart" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "id": "9060ff53-7c41-4998-b258-c847a6ee09c0", 105 | "metadata": { 106 | "tags": [] 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "models = list_jumpstart_models()\n", 111 | "logger.info(f'Total number of models in SageMaker JumpStart hub = {len(models)}')\n", 112 | "\n", 113 | "FILTER = 'task == textembedding'\n", 114 | "txt2img_models = list_jumpstart_models(filter=FILTER)\n", 115 | "txt2img_models" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "id": "34b6fcc4-a7d0-4f01-b2fa-d01ad3f0e2a5", 121 | "metadata": {}, 122 | "source": [ 123 | "##### Setup config params" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "id": "1b77dc13-20d5-4dad-aa8a-45ed2f8e70bf", 130 | "metadata": { 131 | "tags": [] 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "MODEL_ID = 'huggingface-textembedding-gpt-j-6b-fp16' \n", 136 | "MODEL_VERSION = '*'\n", 137 | "INSTANCE_TYPE = 'ml.g5.2xlarge'\n", 138 | "INSTANCE_COUNT = 1\n", 139 | "IMAGE_SCOPE = 'inference'\n", 140 | "MODEL_DATA_DOWNLOAD_TIMEOUT = 3600 # in seconds\n", 141 | "CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT = 3600\n", 142 | "CONTENT_TYPE = 'application/json'\n", 143 | "\n", 144 | "# Set up roles and clients \n", 145 | "client = boto3.client('sagemaker-runtime')\n", 146 | "ROLE = get_execution_role()\n", 147 | "logger.info(f'Role => {ROLE}')" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "id": "1526ad5f-9e74-459b-8dbd-1accdd576f12", 154 | "metadata": { 155 | "tags": [] 156 | }, 157 | "outputs": [], 158 | "source": [ 159 | "unix_time = int(time.time())\n", 160 | "endpoint_name = f'{MODEL_ID}-{unix_time}'\n", 161 | "logger.info(f'Endpoint name: {endpoint_name}')" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "id": "283e1db7-5629-495a-abd1-cf04351d2ad8", 167 | "metadata": {}, 168 | "source": [ 169 | "#### Retrieve image and model URIs" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "id": "187d02d5-93c9-4a87-bdd7-dd7443d7f64c", 176 | "metadata": { 177 | "tags": [] 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "deploy_image_uri = image_uris.retrieve(region=None, \n", 182 | " framework=None, \n", 183 | " image_scope=IMAGE_SCOPE, \n", 184 | " model_id=MODEL_ID, \n", 185 | " model_version=MODEL_VERSION, \n", 186 | " instance_type=INSTANCE_TYPE)\n", 187 | "logger.info(f'Deploy image URI => {deploy_image_uri}')" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "id": "521ba56c-0b53-485c-9c96-b08c5a332a8f", 194 | "metadata": { 195 | "tags": [] 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "model_uri = model_uris.retrieve(model_id=MODEL_ID, \n", 200 | " model_version=MODEL_VERSION, \n", 201 | " model_scope=IMAGE_SCOPE)\n", 202 | "logger.info(f'Model URI => {model_uri}')" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "id": "a32fbce5-500c-4606-8f19-e667e7c6e6a3", 209 | "metadata": { 210 | "tags": [] 211 | }, 212 | "outputs": [], 213 | "source": [ 214 | "env = {\n", 215 | " 'SAGEMAKER_MODEL_SERVER_TIMEOUT': str(3600),\n", 216 | " 'MODEL_CACHE_ROOT': '/opt/ml/model', \n", 217 | " 'SAGEMAKER_ENV': '1',\n", 218 | " 'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',\n", 219 | " 'SAGEMAKER_PROGRAM': 'inference.py',\n", 220 | " 'SAGEMAKER_MODEL_SERVER_WORKERS': '1', \n", 221 | " 'TS_DEFAULT_WORKERS_PER_MODEL': '1', \n", 222 | "}" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "id": "f2657343-5a65-4c57-b39d-6c93107588c5", 228 | "metadata": {}, 229 | "source": [ 230 | "#### Create SageMaker Model" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "id": "60aae2d1-c78a-4a89-890a-d6111ab95c57", 237 | "metadata": { 238 | "tags": [] 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "model = Model(image_uri=deploy_image_uri, \n", 243 | " model_data=model_uri, \n", 244 | " role=ROLE, \n", 245 | " predictor_cls=Predictor, \n", 246 | " name=endpoint_name, \n", 247 | " env=env)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "id": "5a6b21c7-ff26-4dc1-89e9-aa5fea339771", 253 | "metadata": {}, 254 | "source": [ 255 | "#### Deploy text embedding model as SageMaker endpoint for real-time synchronous inference" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "id": "0ce97c1f-b2e6-42e4-87c4-6fba2a92bed1", 262 | "metadata": { 263 | "tags": [] 264 | }, 265 | "outputs": [], 266 | "source": [ 267 | "%%time\n", 268 | "\n", 269 | "_ = model.deploy(initial_instance_count=INSTANCE_COUNT, \n", 270 | " instance_type=INSTANCE_TYPE, \n", 271 | " endpoint_name=endpoint_name, \n", 272 | " model_data_download_timeout=MODEL_DATA_DOWNLOAD_TIMEOUT, \n", 273 | " container_startup_health_check_timeout=CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT)" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "id": "46c497c6-2bd6-4e7c-a794-029c51bdc301", 279 | "metadata": {}, 280 | "source": [ 281 | "### Test SageMaker endpoint for inference" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "id": "2ec45951-bd9f-4096-812e-8ded522f6303", 288 | "metadata": { 289 | "tags": [] 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "# ENDPOINT_NAME = 'huggingface-textembedding-gpt-j-6b-fp16-1680825746'" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "id": "929dfc15-0b3a-490c-b856-6df11ca10f0d", 300 | "metadata": { 301 | "tags": [] 302 | }, 303 | "outputs": [], 304 | "source": [ 305 | "query = 'what is the meaning of life according to an ant?'" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": null, 311 | "id": "1c06fb32-9246-4a3d-94e3-3cd57a4ba5e2", 312 | "metadata": { 313 | "tags": [] 314 | }, 315 | "outputs": [], 316 | "source": [ 317 | "payload = {'text_inputs': [query]}\n", 318 | "payload = json.dumps(payload).encode('utf-8')" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "id": "6d834078-f340-4ffd-8239-18774069574c", 325 | "metadata": { 326 | "tags": [] 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "%%time\n", 331 | "\n", 332 | "response = client.invoke_endpoint(EndpointName=endpoint_name,\n", 333 | " ContentType='application/json',\n", 334 | " Body=payload)\n", 335 | " " 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "id": "213b5ef6-bf75-4d98-a096-da26168f696f", 341 | "metadata": {}, 342 | "source": [ 343 | "##### Parse model response to extract query embedding" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "id": "757f986f-7986-452d-9946-2ea411a883a7", 350 | "metadata": { 351 | "tags": [] 352 | }, 353 | "outputs": [], 354 | "source": [ 355 | "body = json.loads(response['Body'].read())\n", 356 | "embedding = body['embedding'][0]\n", 357 | "embedding" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "id": "5c609bfd-8c56-4bd3-8184-759ac1b67174", 364 | "metadata": {}, 365 | "outputs": [], 366 | "source": [] 367 | } 368 | ], 369 | "metadata": { 370 | "availableInstances": [ 371 | { 372 | "_defaultOrder": 0, 373 | "_isFastLaunch": true, 374 | "category": "General purpose", 375 | "gpuNum": 0, 376 | "hideHardwareSpecs": false, 377 | "memoryGiB": 4, 378 | "name": "ml.t3.medium", 379 | "vcpuNum": 2 380 | }, 381 | { 382 | "_defaultOrder": 1, 383 | "_isFastLaunch": false, 384 | "category": "General purpose", 385 | "gpuNum": 0, 386 | "hideHardwareSpecs": false, 387 | "memoryGiB": 8, 388 | "name": "ml.t3.large", 389 | "vcpuNum": 2 390 | }, 391 | { 392 | "_defaultOrder": 2, 393 | "_isFastLaunch": false, 394 | "category": "General purpose", 395 | "gpuNum": 0, 396 | "hideHardwareSpecs": false, 397 | "memoryGiB": 16, 398 | "name": "ml.t3.xlarge", 399 | "vcpuNum": 4 400 | }, 401 | { 402 | "_defaultOrder": 3, 403 | "_isFastLaunch": false, 404 | "category": "General purpose", 405 | "gpuNum": 0, 406 | "hideHardwareSpecs": false, 407 | "memoryGiB": 32, 408 | "name": "ml.t3.2xlarge", 409 | "vcpuNum": 8 410 | }, 411 | { 412 | "_defaultOrder": 4, 413 | "_isFastLaunch": true, 414 | "category": "General purpose", 415 | "gpuNum": 0, 416 | "hideHardwareSpecs": false, 417 | "memoryGiB": 8, 418 | "name": "ml.m5.large", 419 | "vcpuNum": 2 420 | }, 421 | { 422 | "_defaultOrder": 5, 423 | "_isFastLaunch": false, 424 | "category": "General purpose", 425 | "gpuNum": 0, 426 | "hideHardwareSpecs": false, 427 | "memoryGiB": 16, 428 | "name": "ml.m5.xlarge", 429 | "vcpuNum": 4 430 | }, 431 | { 432 | "_defaultOrder": 6, 433 | "_isFastLaunch": false, 434 | "category": "General purpose", 435 | "gpuNum": 0, 436 | "hideHardwareSpecs": false, 437 | "memoryGiB": 32, 438 | "name": "ml.m5.2xlarge", 439 | "vcpuNum": 8 440 | }, 441 | { 442 | "_defaultOrder": 7, 443 | "_isFastLaunch": false, 444 | "category": "General purpose", 445 | "gpuNum": 0, 446 | "hideHardwareSpecs": false, 447 | "memoryGiB": 64, 448 | "name": "ml.m5.4xlarge", 449 | "vcpuNum": 16 450 | }, 451 | { 452 | "_defaultOrder": 8, 453 | "_isFastLaunch": false, 454 | "category": "General purpose", 455 | "gpuNum": 0, 456 | "hideHardwareSpecs": false, 457 | "memoryGiB": 128, 458 | "name": "ml.m5.8xlarge", 459 | "vcpuNum": 32 460 | }, 461 | { 462 | "_defaultOrder": 9, 463 | "_isFastLaunch": false, 464 | "category": "General purpose", 465 | "gpuNum": 0, 466 | "hideHardwareSpecs": false, 467 | "memoryGiB": 192, 468 | "name": "ml.m5.12xlarge", 469 | "vcpuNum": 48 470 | }, 471 | { 472 | "_defaultOrder": 10, 473 | "_isFastLaunch": false, 474 | "category": "General purpose", 475 | "gpuNum": 0, 476 | "hideHardwareSpecs": false, 477 | "memoryGiB": 256, 478 | "name": "ml.m5.16xlarge", 479 | "vcpuNum": 64 480 | }, 481 | { 482 | "_defaultOrder": 11, 483 | "_isFastLaunch": false, 484 | "category": "General purpose", 485 | "gpuNum": 0, 486 | "hideHardwareSpecs": false, 487 | "memoryGiB": 384, 488 | "name": "ml.m5.24xlarge", 489 | "vcpuNum": 96 490 | }, 491 | { 492 | "_defaultOrder": 12, 493 | "_isFastLaunch": false, 494 | "category": "General purpose", 495 | "gpuNum": 0, 496 | "hideHardwareSpecs": false, 497 | "memoryGiB": 8, 498 | "name": "ml.m5d.large", 499 | "vcpuNum": 2 500 | }, 501 | { 502 | "_defaultOrder": 13, 503 | "_isFastLaunch": false, 504 | "category": "General purpose", 505 | "gpuNum": 0, 506 | "hideHardwareSpecs": false, 507 | "memoryGiB": 16, 508 | "name": "ml.m5d.xlarge", 509 | "vcpuNum": 4 510 | }, 511 | { 512 | "_defaultOrder": 14, 513 | "_isFastLaunch": false, 514 | "category": "General purpose", 515 | "gpuNum": 0, 516 | "hideHardwareSpecs": false, 517 | "memoryGiB": 32, 518 | "name": "ml.m5d.2xlarge", 519 | "vcpuNum": 8 520 | }, 521 | { 522 | "_defaultOrder": 15, 523 | "_isFastLaunch": false, 524 | "category": "General purpose", 525 | "gpuNum": 0, 526 | "hideHardwareSpecs": false, 527 | "memoryGiB": 64, 528 | "name": "ml.m5d.4xlarge", 529 | "vcpuNum": 16 530 | }, 531 | { 532 | "_defaultOrder": 16, 533 | "_isFastLaunch": false, 534 | "category": "General purpose", 535 | "gpuNum": 0, 536 | "hideHardwareSpecs": false, 537 | "memoryGiB": 128, 538 | "name": "ml.m5d.8xlarge", 539 | "vcpuNum": 32 540 | }, 541 | { 542 | "_defaultOrder": 17, 543 | "_isFastLaunch": false, 544 | "category": "General purpose", 545 | "gpuNum": 0, 546 | "hideHardwareSpecs": false, 547 | "memoryGiB": 192, 548 | "name": "ml.m5d.12xlarge", 549 | "vcpuNum": 48 550 | }, 551 | { 552 | "_defaultOrder": 18, 553 | "_isFastLaunch": false, 554 | "category": "General purpose", 555 | "gpuNum": 0, 556 | "hideHardwareSpecs": false, 557 | "memoryGiB": 256, 558 | "name": "ml.m5d.16xlarge", 559 | "vcpuNum": 64 560 | }, 561 | { 562 | "_defaultOrder": 19, 563 | "_isFastLaunch": false, 564 | "category": "General purpose", 565 | "gpuNum": 0, 566 | "hideHardwareSpecs": false, 567 | "memoryGiB": 384, 568 | "name": "ml.m5d.24xlarge", 569 | "vcpuNum": 96 570 | }, 571 | { 572 | "_defaultOrder": 20, 573 | "_isFastLaunch": false, 574 | "category": "General purpose", 575 | "gpuNum": 0, 576 | "hideHardwareSpecs": true, 577 | "memoryGiB": 0, 578 | "name": "ml.geospatial.interactive", 579 | "supportedImageNames": [ 580 | "sagemaker-geospatial-v1-0" 581 | ], 582 | "vcpuNum": 0 583 | }, 584 | { 585 | "_defaultOrder": 21, 586 | "_isFastLaunch": true, 587 | "category": "Compute optimized", 588 | "gpuNum": 0, 589 | "hideHardwareSpecs": false, 590 | "memoryGiB": 4, 591 | "name": "ml.c5.large", 592 | "vcpuNum": 2 593 | }, 594 | { 595 | "_defaultOrder": 22, 596 | "_isFastLaunch": false, 597 | "category": "Compute optimized", 598 | "gpuNum": 0, 599 | "hideHardwareSpecs": false, 600 | "memoryGiB": 8, 601 | "name": "ml.c5.xlarge", 602 | "vcpuNum": 4 603 | }, 604 | { 605 | "_defaultOrder": 23, 606 | "_isFastLaunch": false, 607 | "category": "Compute optimized", 608 | "gpuNum": 0, 609 | "hideHardwareSpecs": false, 610 | "memoryGiB": 16, 611 | "name": "ml.c5.2xlarge", 612 | "vcpuNum": 8 613 | }, 614 | { 615 | "_defaultOrder": 24, 616 | "_isFastLaunch": false, 617 | "category": "Compute optimized", 618 | "gpuNum": 0, 619 | "hideHardwareSpecs": false, 620 | "memoryGiB": 32, 621 | "name": "ml.c5.4xlarge", 622 | "vcpuNum": 16 623 | }, 624 | { 625 | "_defaultOrder": 25, 626 | "_isFastLaunch": false, 627 | "category": "Compute optimized", 628 | "gpuNum": 0, 629 | "hideHardwareSpecs": false, 630 | "memoryGiB": 72, 631 | "name": "ml.c5.9xlarge", 632 | "vcpuNum": 36 633 | }, 634 | { 635 | "_defaultOrder": 26, 636 | "_isFastLaunch": false, 637 | "category": "Compute optimized", 638 | "gpuNum": 0, 639 | "hideHardwareSpecs": false, 640 | "memoryGiB": 96, 641 | "name": "ml.c5.12xlarge", 642 | "vcpuNum": 48 643 | }, 644 | { 645 | "_defaultOrder": 27, 646 | "_isFastLaunch": false, 647 | "category": "Compute optimized", 648 | "gpuNum": 0, 649 | "hideHardwareSpecs": false, 650 | "memoryGiB": 144, 651 | "name": "ml.c5.18xlarge", 652 | "vcpuNum": 72 653 | }, 654 | { 655 | "_defaultOrder": 28, 656 | "_isFastLaunch": false, 657 | "category": "Compute optimized", 658 | "gpuNum": 0, 659 | "hideHardwareSpecs": false, 660 | "memoryGiB": 192, 661 | "name": "ml.c5.24xlarge", 662 | "vcpuNum": 96 663 | }, 664 | { 665 | "_defaultOrder": 29, 666 | "_isFastLaunch": true, 667 | "category": "Accelerated computing", 668 | "gpuNum": 1, 669 | "hideHardwareSpecs": false, 670 | "memoryGiB": 16, 671 | "name": "ml.g4dn.xlarge", 672 | "vcpuNum": 4 673 | }, 674 | { 675 | "_defaultOrder": 30, 676 | "_isFastLaunch": false, 677 | "category": "Accelerated computing", 678 | "gpuNum": 1, 679 | "hideHardwareSpecs": false, 680 | "memoryGiB": 32, 681 | "name": "ml.g4dn.2xlarge", 682 | "vcpuNum": 8 683 | }, 684 | { 685 | "_defaultOrder": 31, 686 | "_isFastLaunch": false, 687 | "category": "Accelerated computing", 688 | "gpuNum": 1, 689 | "hideHardwareSpecs": false, 690 | "memoryGiB": 64, 691 | "name": "ml.g4dn.4xlarge", 692 | "vcpuNum": 16 693 | }, 694 | { 695 | "_defaultOrder": 32, 696 | "_isFastLaunch": false, 697 | "category": "Accelerated computing", 698 | "gpuNum": 1, 699 | "hideHardwareSpecs": false, 700 | "memoryGiB": 128, 701 | "name": "ml.g4dn.8xlarge", 702 | "vcpuNum": 32 703 | }, 704 | { 705 | "_defaultOrder": 33, 706 | "_isFastLaunch": false, 707 | "category": "Accelerated computing", 708 | "gpuNum": 4, 709 | "hideHardwareSpecs": false, 710 | "memoryGiB": 192, 711 | "name": "ml.g4dn.12xlarge", 712 | "vcpuNum": 48 713 | }, 714 | { 715 | "_defaultOrder": 34, 716 | "_isFastLaunch": false, 717 | "category": "Accelerated computing", 718 | "gpuNum": 1, 719 | "hideHardwareSpecs": false, 720 | "memoryGiB": 256, 721 | "name": "ml.g4dn.16xlarge", 722 | "vcpuNum": 64 723 | }, 724 | { 725 | "_defaultOrder": 35, 726 | "_isFastLaunch": false, 727 | "category": "Accelerated computing", 728 | "gpuNum": 1, 729 | "hideHardwareSpecs": false, 730 | "memoryGiB": 61, 731 | "name": "ml.p3.2xlarge", 732 | "vcpuNum": 8 733 | }, 734 | { 735 | "_defaultOrder": 36, 736 | "_isFastLaunch": false, 737 | "category": "Accelerated computing", 738 | "gpuNum": 4, 739 | "hideHardwareSpecs": false, 740 | "memoryGiB": 244, 741 | "name": "ml.p3.8xlarge", 742 | "vcpuNum": 32 743 | }, 744 | { 745 | "_defaultOrder": 37, 746 | "_isFastLaunch": false, 747 | "category": "Accelerated computing", 748 | "gpuNum": 8, 749 | "hideHardwareSpecs": false, 750 | "memoryGiB": 488, 751 | "name": "ml.p3.16xlarge", 752 | "vcpuNum": 64 753 | }, 754 | { 755 | "_defaultOrder": 38, 756 | "_isFastLaunch": false, 757 | "category": "Accelerated computing", 758 | "gpuNum": 8, 759 | "hideHardwareSpecs": false, 760 | "memoryGiB": 768, 761 | "name": "ml.p3dn.24xlarge", 762 | "vcpuNum": 96 763 | }, 764 | { 765 | "_defaultOrder": 39, 766 | "_isFastLaunch": false, 767 | "category": "Memory Optimized", 768 | "gpuNum": 0, 769 | "hideHardwareSpecs": false, 770 | "memoryGiB": 16, 771 | "name": "ml.r5.large", 772 | "vcpuNum": 2 773 | }, 774 | { 775 | "_defaultOrder": 40, 776 | "_isFastLaunch": false, 777 | "category": "Memory Optimized", 778 | "gpuNum": 0, 779 | "hideHardwareSpecs": false, 780 | "memoryGiB": 32, 781 | "name": "ml.r5.xlarge", 782 | "vcpuNum": 4 783 | }, 784 | { 785 | "_defaultOrder": 41, 786 | "_isFastLaunch": false, 787 | "category": "Memory Optimized", 788 | "gpuNum": 0, 789 | "hideHardwareSpecs": false, 790 | "memoryGiB": 64, 791 | "name": "ml.r5.2xlarge", 792 | "vcpuNum": 8 793 | }, 794 | { 795 | "_defaultOrder": 42, 796 | "_isFastLaunch": false, 797 | "category": "Memory Optimized", 798 | "gpuNum": 0, 799 | "hideHardwareSpecs": false, 800 | "memoryGiB": 128, 801 | "name": "ml.r5.4xlarge", 802 | "vcpuNum": 16 803 | }, 804 | { 805 | "_defaultOrder": 43, 806 | "_isFastLaunch": false, 807 | "category": "Memory Optimized", 808 | "gpuNum": 0, 809 | "hideHardwareSpecs": false, 810 | "memoryGiB": 256, 811 | "name": "ml.r5.8xlarge", 812 | "vcpuNum": 32 813 | }, 814 | { 815 | "_defaultOrder": 44, 816 | "_isFastLaunch": false, 817 | "category": "Memory Optimized", 818 | "gpuNum": 0, 819 | "hideHardwareSpecs": false, 820 | "memoryGiB": 384, 821 | "name": "ml.r5.12xlarge", 822 | "vcpuNum": 48 823 | }, 824 | { 825 | "_defaultOrder": 45, 826 | "_isFastLaunch": false, 827 | "category": "Memory Optimized", 828 | "gpuNum": 0, 829 | "hideHardwareSpecs": false, 830 | "memoryGiB": 512, 831 | "name": "ml.r5.16xlarge", 832 | "vcpuNum": 64 833 | }, 834 | { 835 | "_defaultOrder": 46, 836 | "_isFastLaunch": false, 837 | "category": "Memory Optimized", 838 | "gpuNum": 0, 839 | "hideHardwareSpecs": false, 840 | "memoryGiB": 768, 841 | "name": "ml.r5.24xlarge", 842 | "vcpuNum": 96 843 | }, 844 | { 845 | "_defaultOrder": 47, 846 | "_isFastLaunch": false, 847 | "category": "Accelerated computing", 848 | "gpuNum": 1, 849 | "hideHardwareSpecs": false, 850 | "memoryGiB": 16, 851 | "name": "ml.g5.xlarge", 852 | "vcpuNum": 4 853 | }, 854 | { 855 | "_defaultOrder": 48, 856 | "_isFastLaunch": false, 857 | "category": "Accelerated computing", 858 | "gpuNum": 1, 859 | "hideHardwareSpecs": false, 860 | "memoryGiB": 32, 861 | "name": "ml.g5.2xlarge", 862 | "vcpuNum": 8 863 | }, 864 | { 865 | "_defaultOrder": 49, 866 | "_isFastLaunch": false, 867 | "category": "Accelerated computing", 868 | "gpuNum": 1, 869 | "hideHardwareSpecs": false, 870 | "memoryGiB": 64, 871 | "name": "ml.g5.4xlarge", 872 | "vcpuNum": 16 873 | }, 874 | { 875 | "_defaultOrder": 50, 876 | "_isFastLaunch": false, 877 | "category": "Accelerated computing", 878 | "gpuNum": 1, 879 | "hideHardwareSpecs": false, 880 | "memoryGiB": 128, 881 | "name": "ml.g5.8xlarge", 882 | "vcpuNum": 32 883 | }, 884 | { 885 | "_defaultOrder": 51, 886 | "_isFastLaunch": false, 887 | "category": "Accelerated computing", 888 | "gpuNum": 1, 889 | "hideHardwareSpecs": false, 890 | "memoryGiB": 256, 891 | "name": "ml.g5.16xlarge", 892 | "vcpuNum": 64 893 | }, 894 | { 895 | "_defaultOrder": 52, 896 | "_isFastLaunch": false, 897 | "category": "Accelerated computing", 898 | "gpuNum": 4, 899 | "hideHardwareSpecs": false, 900 | "memoryGiB": 192, 901 | "name": "ml.g5.12xlarge", 902 | "vcpuNum": 48 903 | }, 904 | { 905 | "_defaultOrder": 53, 906 | "_isFastLaunch": false, 907 | "category": "Accelerated computing", 908 | "gpuNum": 4, 909 | "hideHardwareSpecs": false, 910 | "memoryGiB": 384, 911 | "name": "ml.g5.24xlarge", 912 | "vcpuNum": 96 913 | }, 914 | { 915 | "_defaultOrder": 54, 916 | "_isFastLaunch": false, 917 | "category": "Accelerated computing", 918 | "gpuNum": 8, 919 | "hideHardwareSpecs": false, 920 | "memoryGiB": 768, 921 | "name": "ml.g5.48xlarge", 922 | "vcpuNum": 192 923 | }, 924 | { 925 | "_defaultOrder": 55, 926 | "_isFastLaunch": false, 927 | "category": "Accelerated computing", 928 | "gpuNum": 8, 929 | "hideHardwareSpecs": false, 930 | "memoryGiB": 1152, 931 | "name": "ml.p4d.24xlarge", 932 | "vcpuNum": 96 933 | }, 934 | { 935 | "_defaultOrder": 56, 936 | "_isFastLaunch": false, 937 | "category": "Accelerated computing", 938 | "gpuNum": 8, 939 | "hideHardwareSpecs": false, 940 | "memoryGiB": 1152, 941 | "name": "ml.p4de.24xlarge", 942 | "vcpuNum": 96 943 | } 944 | ], 945 | "instance_type": "ml.t3.medium", 946 | "kernelspec": { 947 | "display_name": "Python 3 (Data Science)", 948 | "language": "python", 949 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" 950 | }, 951 | "language_info": { 952 | "codemirror_mode": { 953 | "name": "ipython", 954 | "version": 3 955 | }, 956 | "file_extension": ".py", 957 | "mimetype": "text/x-python", 958 | "name": "python", 959 | "nbconvert_exporter": "python", 960 | "pygments_lexer": "ipython3", 961 | "version": "3.7.10" 962 | } 963 | }, 964 | "nbformat": 4, 965 | "nbformat_minor": 5 966 | } 967 | -------------------------------------------------------------------------------- /02-deploy-text-generation-model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ce1c37fc-91a5-49df-804f-92319ed8a678", 6 | "metadata": {}, 7 | "source": [ 8 | "## Deploy Text Generation Model (FLAN-T5 XXL)" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "cd154824-add4-46d3-a47d-a3dc317c5847", 14 | "metadata": {}, 15 | "source": [ 16 | "#### Imports " 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "id": "66e8ab21-8a0f-405c-a706-3aa5608a04a8", 23 | "metadata": { 24 | "tags": [] 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "from sagemaker.jumpstart.notebook_utils import list_jumpstart_models\n", 29 | "from sagemaker.predictor import Predictor\n", 30 | "from sagemaker import get_execution_role\n", 31 | "from sagemaker import ModelPackage\n", 32 | "from sagemaker.model import Model\n", 33 | "from sagemaker import image_uris \n", 34 | "from sagemaker import model_uris\n", 35 | "import numpy as np\n", 36 | "import sagemaker\n", 37 | "import logging\n", 38 | "import boto3\n", 39 | "import time\n", 40 | "import json" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "id": "38c09c3f-4302-40c2-8191-c730c32b5dbc", 46 | "metadata": {}, 47 | "source": [ 48 | "##### Setup logging " 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "id": "594a1c5c-a125-4cce-a538-3cfc840edcd7", 55 | "metadata": { 56 | "tags": [] 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "logger = logging.getLogger('sagemaker')\n", 61 | "logger.setLevel(logging.DEBUG)\n", 62 | "logger.addHandler(logging.StreamHandler())" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "id": "cd0e20e0-9c4b-4487-ae23-995cf33807a2", 68 | "metadata": {}, 69 | "source": [ 70 | "##### Log versions of dependencies " 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 4, 76 | "id": "48c47fd6-fd9f-4da8-94d1-7c0b3d23490b", 77 | "metadata": { 78 | "tags": [] 79 | }, 80 | "outputs": [ 81 | { 82 | "name": "stderr", 83 | "output_type": "stream", 84 | "text": [ 85 | "Using sagemaker==2.145.0\n", 86 | "Using boto3==1.26.111\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "logger.info(f'Using sagemaker=={sagemaker.__version__}')\n", 92 | "logger.info(f'Using boto3=={boto3.__version__}')" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "id": "38c2bb8a-a711-4c2c-aaf0-04cd2a22bc08", 98 | "metadata": {}, 99 | "source": [ 100 | "#### Setup essentials " 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 5, 106 | "id": "6718b0cf-42bf-42f8-83a5-89c2189891a5", 107 | "metadata": { 108 | "tags": [] 109 | }, 110 | "outputs": [ 111 | { 112 | "name": "stderr", 113 | "output_type": "stream", 114 | "text": [ 115 | "Region = us-east-1\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "region = boto3.Session().region_name\n", 121 | "logger.info(f'Region = {region}')" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "id": "93779cf2-1d73-48b7-8e9c-7a58a0d1e23a", 127 | "metadata": {}, 128 | "source": [ 129 | "##### Get list of language models available in JS model hub" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 6, 135 | "id": "ef740ccc-04b9-49cd-a9f2-b00a1638d7a5", 136 | "metadata": { 137 | "tags": [] 138 | }, 139 | "outputs": [ 140 | { 141 | "name": "stderr", 142 | "output_type": "stream", 143 | "text": [ 144 | "Total number of models in SageMaker JumpStart hub = 679\n" 145 | ] 146 | } 147 | ], 148 | "source": [ 149 | "models = list_jumpstart_models()\n", 150 | "logger.info(f'Total number of models in SageMaker JumpStart hub = {len(models)}')" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "id": "bf7702b6-31ca-4166-9b80-27f2cd9019fc", 156 | "metadata": {}, 157 | "source": [ 158 | "##### Setup inference deployment config params" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 7, 164 | "id": "44235c29-162b-468d-8a7f-7024a852100e", 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "name": "stderr", 169 | "output_type": "stream", 170 | "text": [ 171 | "Role => arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "MODEL_ID = 'huggingface-text2text-flan-t5-xxl' # this is hard-coded\n", 177 | "MODEL_VERSION = '*'\n", 178 | "INSTANCE_TYPE = 'ml.g4dn.12xlarge'\n", 179 | "INSTANCE_COUNT = 1\n", 180 | "IMAGE_SCOPE = 'inference'\n", 181 | "MODEL_DATA_DOWNLOAD_TIMEOUT = 3600 # in seconds\n", 182 | "CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT = 3600\n", 183 | "CONTENT_TYPE = 'application/json'\n", 184 | "\n", 185 | "# set up roles and clients \n", 186 | "client = boto3.client('sagemaker-runtime')\n", 187 | "ROLE = get_execution_role()\n", 188 | "logger.info(f'Role => {ROLE}')" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 8, 194 | "id": "d5647e3c-8d3f-4bac-8229-3c5a2de5233b", 195 | "metadata": {}, 196 | "outputs": [ 197 | { 198 | "name": "stderr", 199 | "output_type": "stream", 200 | "text": [ 201 | "Endpoint name: flan-xxl-1686852282\n" 202 | ] 203 | } 204 | ], 205 | "source": [ 206 | "unix_time = int(time.time())\n", 207 | "\n", 208 | "endpoint_name = f'flan-xxl-{unix_time}'\n", 209 | "logger.info(f'Endpoint name: {endpoint_name}')" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "id": "97b8cdaf-6b26-4fa4-8d01-98c14c7f7f97", 215 | "metadata": {}, 216 | "source": [ 217 | "#### Retrieve Image and Model URIs" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 9, 223 | "id": "fe5b3bcc-3ed9-4a8e-bc8e-32fcaab25322", 224 | "metadata": {}, 225 | "outputs": [ 226 | { 227 | "name": "stderr", 228 | "output_type": "stream", 229 | "text": [ 230 | "Deploy image URI => 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04\n" 231 | ] 232 | } 233 | ], 234 | "source": [ 235 | "deploy_image_uri = image_uris.retrieve(region=None, \n", 236 | " framework=None, \n", 237 | " image_scope=IMAGE_SCOPE, \n", 238 | " model_id=MODEL_ID, \n", 239 | " model_version=MODEL_VERSION, \n", 240 | " instance_type=INSTANCE_TYPE)\n", 241 | "logger.info(f'Deploy image URI => {deploy_image_uri}')" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 10, 247 | "id": "b7abadcf-e5f1-4253-b1e0-fdda825b0099", 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "name": "stderr", 252 | "output_type": "stream", 253 | "text": [ 254 | "Model URI => s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xxl.tar.gz\n" 255 | ] 256 | } 257 | ], 258 | "source": [ 259 | "model_uri = model_uris.retrieve(model_id=MODEL_ID, \n", 260 | " model_version=MODEL_VERSION, \n", 261 | " model_scope=IMAGE_SCOPE)\n", 262 | "logger.info(f'Model URI => {model_uri}')" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 11, 268 | "id": "a24ad3db-6a79-453a-a74f-53d7d9bb5a8f", 269 | "metadata": { 270 | "tags": [] 271 | }, 272 | "outputs": [], 273 | "source": [ 274 | "env = {\n", 275 | " 'SAGEMAKER_MODEL_SERVER_TIMEOUT': str(3600),\n", 276 | " 'MODEL_CACHE_ROOT': '/opt/ml/model', \n", 277 | " 'SAGEMAKER_ENV': '1',\n", 278 | " 'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',\n", 279 | " 'SAGEMAKER_PROGRAM': 'inference.py',\n", 280 | " 'SAGEMAKER_MODEL_SERVER_WORKERS': '1', \n", 281 | " 'TS_DEFAULT_WORKERS_PER_MODEL': '1', \n", 282 | "}" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "id": "70caec84-ece2-4f96-9c71-812eeaa9215d", 288 | "metadata": {}, 289 | "source": [ 290 | "#### Create SageMaker model" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 12, 296 | "id": "9dba6599-3a10-40d4-96d9-5af82823131e", 297 | "metadata": { 298 | "tags": [] 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "model_name = endpoint_name.replace('huggingface-textgeneration2-gpt-', '')\n", 303 | "model = Model(image_uri=deploy_image_uri, \n", 304 | " model_data=model_uri, \n", 305 | " role=ROLE, \n", 306 | " predictor_cls=Predictor, \n", 307 | " name=model_name, \n", 308 | " env=env)" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "id": "5d119629-e939-4a3a-896d-73dc33e57187", 314 | "metadata": {}, 315 | "source": [ 316 | "#### Deploy text generation model as SageMaker endpoint for real-time synchronous inference" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "id": "eb07c4cb-cae1-4af7-9a86-38d9e6851c73", 323 | "metadata": { 324 | "tags": [] 325 | }, 326 | "outputs": [ 327 | { 328 | "name": "stderr", 329 | "output_type": "stream", 330 | "text": [ 331 | "Creating model with name: flan-xxl-1686852282\n", 332 | "CreateModel request: {\n", 333 | " \"ModelName\": \"flan-xxl-1686852282\",\n", 334 | " \"ExecutionRoleArn\": \"arn:aws:iam::119174016168:role/service-role/AmazonSageMaker-ExecutionRole-20211014T093628\",\n", 335 | " \"PrimaryContainer\": {\n", 336 | " \"Image\": \"763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04\",\n", 337 | " \"Environment\": {\n", 338 | " \"SAGEMAKER_MODEL_SERVER_TIMEOUT\": \"3600\",\n", 339 | " \"MODEL_CACHE_ROOT\": \"/opt/ml/model\",\n", 340 | " \"SAGEMAKER_ENV\": \"1\",\n", 341 | " \"SAGEMAKER_SUBMIT_DIRECTORY\": \"/opt/ml/model/code/\",\n", 342 | " \"SAGEMAKER_PROGRAM\": \"inference.py\",\n", 343 | " \"SAGEMAKER_MODEL_SERVER_WORKERS\": \"1\",\n", 344 | " \"TS_DEFAULT_WORKERS_PER_MODEL\": \"1\"\n", 345 | " },\n", 346 | " \"ModelDataUrl\": \"s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xxl.tar.gz\"\n", 347 | " },\n", 348 | " \"Tags\": [\n", 349 | " {\n", 350 | " \"Key\": \"aws-jumpstart-inference-model-uri\",\n", 351 | " \"Value\": \"s3://jumpstart-cache-prod-us-east-1/huggingface-infer/prepack/v1.1.1/infer-prepack-huggingface-text2text-flan-t5-xxl.tar.gz\"\n", 352 | " }\n", 353 | " ]\n", 354 | "}\n", 355 | "Creating endpoint-config with name flan-xxl-1686852282\n", 356 | "Creating endpoint with name flan-xxl-1686852282\n" 357 | ] 358 | }, 359 | { 360 | "name": "stdout", 361 | "output_type": "stream", 362 | "text": [ 363 | "--------------------!CPU times: user 158 ms, sys: 17.9 ms, total: 176 ms\n", 364 | "Wall time: 10min 34s\n" 365 | ] 366 | } 367 | ], 368 | "source": [ 369 | "%%time\n", 370 | "\n", 371 | "_ = model.deploy(initial_instance_count=INSTANCE_COUNT, \n", 372 | " instance_type=INSTANCE_TYPE, \n", 373 | " endpoint_name=endpoint_name, \n", 374 | " model_data_download_timeout=MODEL_DATA_DOWNLOAD_TIMEOUT, \n", 375 | " container_startup_health_check_timeout=CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "id": "3bed6d31-f6ae-41e4-9a56-23c7e99cd019", 381 | "metadata": {}, 382 | "source": [ 383 | "### II. Invoke SageMaker endpoint to test the deployed model for natural language understanding (NLU) and natural language generation (NLG) tasks" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "id": "1353ee9e-9beb-4bc7-acff-047c3ebf2038", 389 | "metadata": {}, 390 | "source": [ 391 | "***\n", 392 | "This model also supports many advanced parameters while performing inference. They include:\n", 393 | "\n", 394 | "* **max_length:** Model generates text until the output length (which includes the input context length) reaches `max_length`. If specified, it must be a positive integer.\n", 395 | "* **num_return_sequences:** Number of output sequences returned. If specified, it must be a positive integer.\n", 396 | "* **num_beams:** Number of beams used in the greedy search. If specified, it must be integer greater than or equal to `num_return_sequences`.\n", 397 | "* **no_repeat_ngram_size:** Model ensures that a sequence of words of `no_repeat_ngram_size` is not repeated in the output sequence. If specified, it must be a positive integer greater than 1.\n", 398 | "* **temperature:** Controls the randomness in the output. Higher temperature results in output sequence with low-probability words and lower temperature results in output sequence with high-probability words. If `temperature` -> 0, it results in greedy decoding. If specified, it must be a positive float.\n", 399 | "* **early_stopping:** If True, text generation is finished when all beam hypotheses reach the end of sentence token. If specified, it must be boolean.\n", 400 | "* **do_sample:** If True, sample the next word as per the likelihood. If specified, it must be boolean.\n", 401 | "* **top_k:** In each step of text generation, sample from only the `top_k` most likely words. If specified, it must be a positive integer.\n", 402 | "* **top_p:** In each step of text generation, sample from the smallest possible set of words with cumulative probability `top_p`. If specified, it must be a float between 0 and 1.\n", 403 | "* **seed:** Fix the randomized state for reproducibility. If specified, it must be an integer.\n", 404 | "\n", 405 | "We may specify any subset of the parameters mentioned above while invoking an endpoint. Next, we show an example of how to invoke endpoint with these arguments\n", 406 | "\n", 407 | "***" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": null, 413 | "id": "5e5b8e2e-cd62-413d-b31c-a6004bf39b33", 414 | "metadata": { 415 | "tags": [] 416 | }, 417 | "outputs": [], 418 | "source": [ 419 | "prompt = \"\"\"Me: hi\n", 420 | "AI: Hello. How can I help you?\n", 421 | "Me: How are you doing?\n", 422 | "AI:\n", 423 | "\"\"\"" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "id": "56919d90-0546-4e68-a294-5b114abbd3aa", 430 | "metadata": { 431 | "tags": [] 432 | }, 433 | "outputs": [], 434 | "source": [ 435 | "payload = {\n", 436 | " 'text_inputs': prompt,\n", 437 | " 'seed': 123,\n", 438 | " 'temperature': 0.1,\n", 439 | " 'no_repeat_ngram_size': 2,\n", 440 | " 'max_length': 128\n", 441 | "}" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": null, 447 | "id": "461a651f-da5a-40b6-8202-d9097efb2f02", 448 | "metadata": { 449 | "tags": [] 450 | }, 451 | "outputs": [], 452 | "source": [ 453 | "payload = json.dumps(payload).encode('utf-8')" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "id": "ddf94fd3-fa77-4a48-bc43-e2536e4b621c", 460 | "metadata": { 461 | "tags": [] 462 | }, 463 | "outputs": [], 464 | "source": [ 465 | "%%time \n", 466 | "response = client.invoke_endpoint(EndpointName='huggingface-text2text-flan-t5-xl-1679769737', \n", 467 | " ContentType=CONTENT_TYPE, \n", 468 | " Body=payload)" 469 | ] 470 | }, 471 | { 472 | "cell_type": "markdown", 473 | "id": "354e0f71-d4a7-4779-b269-044d808565d3", 474 | "metadata": {}, 475 | "source": [ 476 | "#### Parse response to extract completion" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "id": "5995a882-7ef1-4e9a-8bba-bcb627dab002", 483 | "metadata": { 484 | "tags": [] 485 | }, 486 | "outputs": [], 487 | "source": [ 488 | "model_predictions = json.loads(response['Body'].read())\n", 489 | "completion = model_predictions['generated_texts'][0].strip()\n", 490 | "completion" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "id": "5863f215-b007-4e73-ba12-1e1ffb44a637", 497 | "metadata": {}, 498 | "outputs": [], 499 | "source": [] 500 | } 501 | ], 502 | "metadata": { 503 | "availableInstances": [ 504 | { 505 | "_defaultOrder": 0, 506 | "_isFastLaunch": true, 507 | "category": "General purpose", 508 | "gpuNum": 0, 509 | "hideHardwareSpecs": false, 510 | "memoryGiB": 4, 511 | "name": "ml.t3.medium", 512 | "vcpuNum": 2 513 | }, 514 | { 515 | "_defaultOrder": 1, 516 | "_isFastLaunch": false, 517 | "category": "General purpose", 518 | "gpuNum": 0, 519 | "hideHardwareSpecs": false, 520 | "memoryGiB": 8, 521 | "name": "ml.t3.large", 522 | "vcpuNum": 2 523 | }, 524 | { 525 | "_defaultOrder": 2, 526 | "_isFastLaunch": false, 527 | "category": "General purpose", 528 | "gpuNum": 0, 529 | "hideHardwareSpecs": false, 530 | "memoryGiB": 16, 531 | "name": "ml.t3.xlarge", 532 | "vcpuNum": 4 533 | }, 534 | { 535 | "_defaultOrder": 3, 536 | "_isFastLaunch": false, 537 | "category": "General purpose", 538 | "gpuNum": 0, 539 | "hideHardwareSpecs": false, 540 | "memoryGiB": 32, 541 | "name": "ml.t3.2xlarge", 542 | "vcpuNum": 8 543 | }, 544 | { 545 | "_defaultOrder": 4, 546 | "_isFastLaunch": true, 547 | "category": "General purpose", 548 | "gpuNum": 0, 549 | "hideHardwareSpecs": false, 550 | "memoryGiB": 8, 551 | "name": "ml.m5.large", 552 | "vcpuNum": 2 553 | }, 554 | { 555 | "_defaultOrder": 5, 556 | "_isFastLaunch": false, 557 | "category": "General purpose", 558 | "gpuNum": 0, 559 | "hideHardwareSpecs": false, 560 | "memoryGiB": 16, 561 | "name": "ml.m5.xlarge", 562 | "vcpuNum": 4 563 | }, 564 | { 565 | "_defaultOrder": 6, 566 | "_isFastLaunch": false, 567 | "category": "General purpose", 568 | "gpuNum": 0, 569 | "hideHardwareSpecs": false, 570 | "memoryGiB": 32, 571 | "name": "ml.m5.2xlarge", 572 | "vcpuNum": 8 573 | }, 574 | { 575 | "_defaultOrder": 7, 576 | "_isFastLaunch": false, 577 | "category": "General purpose", 578 | "gpuNum": 0, 579 | "hideHardwareSpecs": false, 580 | "memoryGiB": 64, 581 | "name": "ml.m5.4xlarge", 582 | "vcpuNum": 16 583 | }, 584 | { 585 | "_defaultOrder": 8, 586 | "_isFastLaunch": false, 587 | "category": "General purpose", 588 | "gpuNum": 0, 589 | "hideHardwareSpecs": false, 590 | "memoryGiB": 128, 591 | "name": "ml.m5.8xlarge", 592 | "vcpuNum": 32 593 | }, 594 | { 595 | "_defaultOrder": 9, 596 | "_isFastLaunch": false, 597 | "category": "General purpose", 598 | "gpuNum": 0, 599 | "hideHardwareSpecs": false, 600 | "memoryGiB": 192, 601 | "name": "ml.m5.12xlarge", 602 | "vcpuNum": 48 603 | }, 604 | { 605 | "_defaultOrder": 10, 606 | "_isFastLaunch": false, 607 | "category": "General purpose", 608 | "gpuNum": 0, 609 | "hideHardwareSpecs": false, 610 | "memoryGiB": 256, 611 | "name": "ml.m5.16xlarge", 612 | "vcpuNum": 64 613 | }, 614 | { 615 | "_defaultOrder": 11, 616 | "_isFastLaunch": false, 617 | "category": "General purpose", 618 | "gpuNum": 0, 619 | "hideHardwareSpecs": false, 620 | "memoryGiB": 384, 621 | "name": "ml.m5.24xlarge", 622 | "vcpuNum": 96 623 | }, 624 | { 625 | "_defaultOrder": 12, 626 | "_isFastLaunch": false, 627 | "category": "General purpose", 628 | "gpuNum": 0, 629 | "hideHardwareSpecs": false, 630 | "memoryGiB": 8, 631 | "name": "ml.m5d.large", 632 | "vcpuNum": 2 633 | }, 634 | { 635 | "_defaultOrder": 13, 636 | "_isFastLaunch": false, 637 | "category": "General purpose", 638 | "gpuNum": 0, 639 | "hideHardwareSpecs": false, 640 | "memoryGiB": 16, 641 | "name": "ml.m5d.xlarge", 642 | "vcpuNum": 4 643 | }, 644 | { 645 | "_defaultOrder": 14, 646 | "_isFastLaunch": false, 647 | "category": "General purpose", 648 | "gpuNum": 0, 649 | "hideHardwareSpecs": false, 650 | "memoryGiB": 32, 651 | "name": "ml.m5d.2xlarge", 652 | "vcpuNum": 8 653 | }, 654 | { 655 | "_defaultOrder": 15, 656 | "_isFastLaunch": false, 657 | "category": "General purpose", 658 | "gpuNum": 0, 659 | "hideHardwareSpecs": false, 660 | "memoryGiB": 64, 661 | "name": "ml.m5d.4xlarge", 662 | "vcpuNum": 16 663 | }, 664 | { 665 | "_defaultOrder": 16, 666 | "_isFastLaunch": false, 667 | "category": "General purpose", 668 | "gpuNum": 0, 669 | "hideHardwareSpecs": false, 670 | "memoryGiB": 128, 671 | "name": "ml.m5d.8xlarge", 672 | "vcpuNum": 32 673 | }, 674 | { 675 | "_defaultOrder": 17, 676 | "_isFastLaunch": false, 677 | "category": "General purpose", 678 | "gpuNum": 0, 679 | "hideHardwareSpecs": false, 680 | "memoryGiB": 192, 681 | "name": "ml.m5d.12xlarge", 682 | "vcpuNum": 48 683 | }, 684 | { 685 | "_defaultOrder": 18, 686 | "_isFastLaunch": false, 687 | "category": "General purpose", 688 | "gpuNum": 0, 689 | "hideHardwareSpecs": false, 690 | "memoryGiB": 256, 691 | "name": "ml.m5d.16xlarge", 692 | "vcpuNum": 64 693 | }, 694 | { 695 | "_defaultOrder": 19, 696 | "_isFastLaunch": false, 697 | "category": "General purpose", 698 | "gpuNum": 0, 699 | "hideHardwareSpecs": false, 700 | "memoryGiB": 384, 701 | "name": "ml.m5d.24xlarge", 702 | "vcpuNum": 96 703 | }, 704 | { 705 | "_defaultOrder": 20, 706 | "_isFastLaunch": false, 707 | "category": "General purpose", 708 | "gpuNum": 0, 709 | "hideHardwareSpecs": true, 710 | "memoryGiB": 0, 711 | "name": "ml.geospatial.interactive", 712 | "supportedImageNames": [ 713 | "sagemaker-geospatial-v1-0" 714 | ], 715 | "vcpuNum": 0 716 | }, 717 | { 718 | "_defaultOrder": 21, 719 | "_isFastLaunch": true, 720 | "category": "Compute optimized", 721 | "gpuNum": 0, 722 | "hideHardwareSpecs": false, 723 | "memoryGiB": 4, 724 | "name": "ml.c5.large", 725 | "vcpuNum": 2 726 | }, 727 | { 728 | "_defaultOrder": 22, 729 | "_isFastLaunch": false, 730 | "category": "Compute optimized", 731 | "gpuNum": 0, 732 | "hideHardwareSpecs": false, 733 | "memoryGiB": 8, 734 | "name": "ml.c5.xlarge", 735 | "vcpuNum": 4 736 | }, 737 | { 738 | "_defaultOrder": 23, 739 | "_isFastLaunch": false, 740 | "category": "Compute optimized", 741 | "gpuNum": 0, 742 | "hideHardwareSpecs": false, 743 | "memoryGiB": 16, 744 | "name": "ml.c5.2xlarge", 745 | "vcpuNum": 8 746 | }, 747 | { 748 | "_defaultOrder": 24, 749 | "_isFastLaunch": false, 750 | "category": "Compute optimized", 751 | "gpuNum": 0, 752 | "hideHardwareSpecs": false, 753 | "memoryGiB": 32, 754 | "name": "ml.c5.4xlarge", 755 | "vcpuNum": 16 756 | }, 757 | { 758 | "_defaultOrder": 25, 759 | "_isFastLaunch": false, 760 | "category": "Compute optimized", 761 | "gpuNum": 0, 762 | "hideHardwareSpecs": false, 763 | "memoryGiB": 72, 764 | "name": "ml.c5.9xlarge", 765 | "vcpuNum": 36 766 | }, 767 | { 768 | "_defaultOrder": 26, 769 | "_isFastLaunch": false, 770 | "category": "Compute optimized", 771 | "gpuNum": 0, 772 | "hideHardwareSpecs": false, 773 | "memoryGiB": 96, 774 | "name": "ml.c5.12xlarge", 775 | "vcpuNum": 48 776 | }, 777 | { 778 | "_defaultOrder": 27, 779 | "_isFastLaunch": false, 780 | "category": "Compute optimized", 781 | "gpuNum": 0, 782 | "hideHardwareSpecs": false, 783 | "memoryGiB": 144, 784 | "name": "ml.c5.18xlarge", 785 | "vcpuNum": 72 786 | }, 787 | { 788 | "_defaultOrder": 28, 789 | "_isFastLaunch": false, 790 | "category": "Compute optimized", 791 | "gpuNum": 0, 792 | "hideHardwareSpecs": false, 793 | "memoryGiB": 192, 794 | "name": "ml.c5.24xlarge", 795 | "vcpuNum": 96 796 | }, 797 | { 798 | "_defaultOrder": 29, 799 | "_isFastLaunch": true, 800 | "category": "Accelerated computing", 801 | "gpuNum": 1, 802 | "hideHardwareSpecs": false, 803 | "memoryGiB": 16, 804 | "name": "ml.g4dn.xlarge", 805 | "vcpuNum": 4 806 | }, 807 | { 808 | "_defaultOrder": 30, 809 | "_isFastLaunch": false, 810 | "category": "Accelerated computing", 811 | "gpuNum": 1, 812 | "hideHardwareSpecs": false, 813 | "memoryGiB": 32, 814 | "name": "ml.g4dn.2xlarge", 815 | "vcpuNum": 8 816 | }, 817 | { 818 | "_defaultOrder": 31, 819 | "_isFastLaunch": false, 820 | "category": "Accelerated computing", 821 | "gpuNum": 1, 822 | "hideHardwareSpecs": false, 823 | "memoryGiB": 64, 824 | "name": "ml.g4dn.4xlarge", 825 | "vcpuNum": 16 826 | }, 827 | { 828 | "_defaultOrder": 32, 829 | "_isFastLaunch": false, 830 | "category": "Accelerated computing", 831 | "gpuNum": 1, 832 | "hideHardwareSpecs": false, 833 | "memoryGiB": 128, 834 | "name": "ml.g4dn.8xlarge", 835 | "vcpuNum": 32 836 | }, 837 | { 838 | "_defaultOrder": 33, 839 | "_isFastLaunch": false, 840 | "category": "Accelerated computing", 841 | "gpuNum": 4, 842 | "hideHardwareSpecs": false, 843 | "memoryGiB": 192, 844 | "name": "ml.g4dn.12xlarge", 845 | "vcpuNum": 48 846 | }, 847 | { 848 | "_defaultOrder": 34, 849 | "_isFastLaunch": false, 850 | "category": "Accelerated computing", 851 | "gpuNum": 1, 852 | "hideHardwareSpecs": false, 853 | "memoryGiB": 256, 854 | "name": "ml.g4dn.16xlarge", 855 | "vcpuNum": 64 856 | }, 857 | { 858 | "_defaultOrder": 35, 859 | "_isFastLaunch": false, 860 | "category": "Accelerated computing", 861 | "gpuNum": 1, 862 | "hideHardwareSpecs": false, 863 | "memoryGiB": 61, 864 | "name": "ml.p3.2xlarge", 865 | "vcpuNum": 8 866 | }, 867 | { 868 | "_defaultOrder": 36, 869 | "_isFastLaunch": false, 870 | "category": "Accelerated computing", 871 | "gpuNum": 4, 872 | "hideHardwareSpecs": false, 873 | "memoryGiB": 244, 874 | "name": "ml.p3.8xlarge", 875 | "vcpuNum": 32 876 | }, 877 | { 878 | "_defaultOrder": 37, 879 | "_isFastLaunch": false, 880 | "category": "Accelerated computing", 881 | "gpuNum": 8, 882 | "hideHardwareSpecs": false, 883 | "memoryGiB": 488, 884 | "name": "ml.p3.16xlarge", 885 | "vcpuNum": 64 886 | }, 887 | { 888 | "_defaultOrder": 38, 889 | "_isFastLaunch": false, 890 | "category": "Accelerated computing", 891 | "gpuNum": 8, 892 | "hideHardwareSpecs": false, 893 | "memoryGiB": 768, 894 | "name": "ml.p3dn.24xlarge", 895 | "vcpuNum": 96 896 | }, 897 | { 898 | "_defaultOrder": 39, 899 | "_isFastLaunch": false, 900 | "category": "Memory Optimized", 901 | "gpuNum": 0, 902 | "hideHardwareSpecs": false, 903 | "memoryGiB": 16, 904 | "name": "ml.r5.large", 905 | "vcpuNum": 2 906 | }, 907 | { 908 | "_defaultOrder": 40, 909 | "_isFastLaunch": false, 910 | "category": "Memory Optimized", 911 | "gpuNum": 0, 912 | "hideHardwareSpecs": false, 913 | "memoryGiB": 32, 914 | "name": "ml.r5.xlarge", 915 | "vcpuNum": 4 916 | }, 917 | { 918 | "_defaultOrder": 41, 919 | "_isFastLaunch": false, 920 | "category": "Memory Optimized", 921 | "gpuNum": 0, 922 | "hideHardwareSpecs": false, 923 | "memoryGiB": 64, 924 | "name": "ml.r5.2xlarge", 925 | "vcpuNum": 8 926 | }, 927 | { 928 | "_defaultOrder": 42, 929 | "_isFastLaunch": false, 930 | "category": "Memory Optimized", 931 | "gpuNum": 0, 932 | "hideHardwareSpecs": false, 933 | "memoryGiB": 128, 934 | "name": "ml.r5.4xlarge", 935 | "vcpuNum": 16 936 | }, 937 | { 938 | "_defaultOrder": 43, 939 | "_isFastLaunch": false, 940 | "category": "Memory Optimized", 941 | "gpuNum": 0, 942 | "hideHardwareSpecs": false, 943 | "memoryGiB": 256, 944 | "name": "ml.r5.8xlarge", 945 | "vcpuNum": 32 946 | }, 947 | { 948 | "_defaultOrder": 44, 949 | "_isFastLaunch": false, 950 | "category": "Memory Optimized", 951 | "gpuNum": 0, 952 | "hideHardwareSpecs": false, 953 | "memoryGiB": 384, 954 | "name": "ml.r5.12xlarge", 955 | "vcpuNum": 48 956 | }, 957 | { 958 | "_defaultOrder": 45, 959 | "_isFastLaunch": false, 960 | "category": "Memory Optimized", 961 | "gpuNum": 0, 962 | "hideHardwareSpecs": false, 963 | "memoryGiB": 512, 964 | "name": "ml.r5.16xlarge", 965 | "vcpuNum": 64 966 | }, 967 | { 968 | "_defaultOrder": 46, 969 | "_isFastLaunch": false, 970 | "category": "Memory Optimized", 971 | "gpuNum": 0, 972 | "hideHardwareSpecs": false, 973 | "memoryGiB": 768, 974 | "name": "ml.r5.24xlarge", 975 | "vcpuNum": 96 976 | }, 977 | { 978 | "_defaultOrder": 47, 979 | "_isFastLaunch": false, 980 | "category": "Accelerated computing", 981 | "gpuNum": 1, 982 | "hideHardwareSpecs": false, 983 | "memoryGiB": 16, 984 | "name": "ml.g5.xlarge", 985 | "vcpuNum": 4 986 | }, 987 | { 988 | "_defaultOrder": 48, 989 | "_isFastLaunch": false, 990 | "category": "Accelerated computing", 991 | "gpuNum": 1, 992 | "hideHardwareSpecs": false, 993 | "memoryGiB": 32, 994 | "name": "ml.g5.2xlarge", 995 | "vcpuNum": 8 996 | }, 997 | { 998 | "_defaultOrder": 49, 999 | "_isFastLaunch": false, 1000 | "category": "Accelerated computing", 1001 | "gpuNum": 1, 1002 | "hideHardwareSpecs": false, 1003 | "memoryGiB": 64, 1004 | "name": "ml.g5.4xlarge", 1005 | "vcpuNum": 16 1006 | }, 1007 | { 1008 | "_defaultOrder": 50, 1009 | "_isFastLaunch": false, 1010 | "category": "Accelerated computing", 1011 | "gpuNum": 1, 1012 | "hideHardwareSpecs": false, 1013 | "memoryGiB": 128, 1014 | "name": "ml.g5.8xlarge", 1015 | "vcpuNum": 32 1016 | }, 1017 | { 1018 | "_defaultOrder": 51, 1019 | "_isFastLaunch": false, 1020 | "category": "Accelerated computing", 1021 | "gpuNum": 1, 1022 | "hideHardwareSpecs": false, 1023 | "memoryGiB": 256, 1024 | "name": "ml.g5.16xlarge", 1025 | "vcpuNum": 64 1026 | }, 1027 | { 1028 | "_defaultOrder": 52, 1029 | "_isFastLaunch": false, 1030 | "category": "Accelerated computing", 1031 | "gpuNum": 4, 1032 | "hideHardwareSpecs": false, 1033 | "memoryGiB": 192, 1034 | "name": "ml.g5.12xlarge", 1035 | "vcpuNum": 48 1036 | }, 1037 | { 1038 | "_defaultOrder": 53, 1039 | "_isFastLaunch": false, 1040 | "category": "Accelerated computing", 1041 | "gpuNum": 4, 1042 | "hideHardwareSpecs": false, 1043 | "memoryGiB": 384, 1044 | "name": "ml.g5.24xlarge", 1045 | "vcpuNum": 96 1046 | }, 1047 | { 1048 | "_defaultOrder": 54, 1049 | "_isFastLaunch": false, 1050 | "category": "Accelerated computing", 1051 | "gpuNum": 8, 1052 | "hideHardwareSpecs": false, 1053 | "memoryGiB": 768, 1054 | "name": "ml.g5.48xlarge", 1055 | "vcpuNum": 192 1056 | }, 1057 | { 1058 | "_defaultOrder": 55, 1059 | "_isFastLaunch": false, 1060 | "category": "Accelerated computing", 1061 | "gpuNum": 8, 1062 | "hideHardwareSpecs": false, 1063 | "memoryGiB": 1152, 1064 | "name": "ml.p4d.24xlarge", 1065 | "vcpuNum": 96 1066 | }, 1067 | { 1068 | "_defaultOrder": 56, 1069 | "_isFastLaunch": false, 1070 | "category": "Accelerated computing", 1071 | "gpuNum": 8, 1072 | "hideHardwareSpecs": false, 1073 | "memoryGiB": 1152, 1074 | "name": "ml.p4de.24xlarge", 1075 | "vcpuNum": 96 1076 | } 1077 | ], 1078 | "instance_type": "ml.t3.medium", 1079 | "kernelspec": { 1080 | "display_name": "Python 3 (Data Science)", 1081 | "language": "python", 1082 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" 1083 | }, 1084 | "language_info": { 1085 | "codemirror_mode": { 1086 | "name": "ipython", 1087 | "version": 3 1088 | }, 1089 | "file_extension": ".py", 1090 | "mimetype": "text/x-python", 1091 | "name": "python", 1092 | "nbconvert_exporter": "python", 1093 | "pygments_lexer": "ipython3", 1094 | "version": "3.7.10" 1095 | } 1096 | }, 1097 | "nbformat": 4, 1098 | "nbformat_minor": 5 1099 | } 1100 | -------------------------------------------------------------------------------- /03-create-dynamodb-tables.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "bf90ceb7-4bf5-4516-a28f-a6f028ef301c", 6 | "metadata": {}, 7 | "source": [ 8 | "## Create DynamoDB Tables " 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "e974bd09-bd15-4c5c-8ab4-60d054b78bc5", 14 | "metadata": {}, 15 | "source": [ 16 | "#### Imports " 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "id": "222f38ee-7b50-4f88-9381-58c5e128420b", 23 | "metadata": { 24 | "tags": [] 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "import logging\n", 29 | "import boto3" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "e2b5c7e8-1be3-4906-b9be-dc5a44f92d09", 35 | "metadata": {}, 36 | "source": [ 37 | "##### Setup logging" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "id": "bea6c92d-5fda-4264-9fdc-2e45c55ed7c7", 44 | "metadata": { 45 | "tags": [] 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "logger = logging.getLogger('sagemaker')\n", 50 | "logger.setLevel(logging.DEBUG)\n", 51 | "logger.addHandler(logging.StreamHandler())" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "id": "ad53853d-6997-4120-95ac-53a3d555da36", 57 | "metadata": {}, 58 | "source": [ 59 | "##### Log versions of dependencies " 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 4, 65 | "id": "fbc8a299-aa25-491a-b5de-4c31bbbfea22", 66 | "metadata": { 67 | "tags": [] 68 | }, 69 | "outputs": [ 70 | { 71 | "name": "stderr", 72 | "output_type": "stream", 73 | "text": [ 74 | "Using boto3==1.26.111\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "logger.info(f'Using boto3=={boto3.__version__}')" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "id": "744a3c36-6504-4e8c-b5ff-4b9998ecc465", 85 | "metadata": {}, 86 | "source": [ 87 | "#### Create DynamoDB client " 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 5, 93 | "id": "dbb83e0c-1dd0-4e6a-8e4c-9859ea63b15a", 94 | "metadata": { 95 | "tags": [] 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "dynamodb = boto3.resource('dynamodb')" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "id": "d184dfeb-3ecd-4644-8a0e-54175a8d2e0f", 105 | "metadata": {}, 106 | "source": [ 107 | "#### Create `conversations` table" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 6, 113 | "id": "69bac2ac-c0fa-488f-9680-81849eb1b676", 114 | "metadata": { 115 | "tags": [] 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "def create_conversations_table(table_name: str) -> None:\n", 120 | " table = dynamodb.create_table(\n", 121 | " TableName=table_name,\n", 122 | " KeySchema=[\n", 123 | " {'AttributeName': 'session_id', 'KeyType': 'HASH'},\n", 124 | " {'AttributeName': 'timestamp', 'KeyType': 'RANGE'}\n", 125 | " ],\n", 126 | " AttributeDefinitions=[\n", 127 | " {'AttributeName': 'session_id', 'AttributeType': 'S'},\n", 128 | " {'AttributeName': 'timestamp', 'AttributeType': 'N'}\n", 129 | " ],\n", 130 | " ProvisionedThroughput={\n", 131 | " 'ReadCapacityUnits': 5,\n", 132 | " 'WriteCapacityUnits': 5\n", 133 | " }\n", 134 | " )" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "id": "65486230-dedc-485c-95a5-9da962836f8d", 140 | "metadata": {}, 141 | "source": [ 142 | "#### Create `sessions` table" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 7, 148 | "id": "5983fcdc-d39b-4f0e-b6a9-d15dbd27928a", 149 | "metadata": { 150 | "tags": [] 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "def create_sessions_table(table_name: str) -> None:\n", 155 | " table = dynamodb.create_table(\n", 156 | " TableName=table_name,\n", 157 | " KeySchema=[\n", 158 | " {'AttributeName': 'session_id', 'KeyType': 'HASH'}\n", 159 | " ],\n", 160 | " AttributeDefinitions=[\n", 161 | " {'AttributeName': 'session_id', 'AttributeType': 'S'}\n", 162 | " ],\n", 163 | " ProvisionedThroughput={\n", 164 | " 'ReadCapacityUnits': 5,\n", 165 | " 'WriteCapacityUnits': 5\n", 166 | " }\n", 167 | " )" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 8, 173 | "id": "ce937e6b-928d-4622-94e4-944415eb2f11", 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "create_conversations_table('conversations')\n", 178 | "create_sessions_table('sessions')" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "id": "287794f5-b672-4169-b27b-e2bb02343909", 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [] 188 | } 189 | ], 190 | "metadata": { 191 | "availableInstances": [ 192 | { 193 | "_defaultOrder": 0, 194 | "_isFastLaunch": true, 195 | "category": "General purpose", 196 | "gpuNum": 0, 197 | "hideHardwareSpecs": false, 198 | "memoryGiB": 4, 199 | "name": "ml.t3.medium", 200 | "vcpuNum": 2 201 | }, 202 | { 203 | "_defaultOrder": 1, 204 | "_isFastLaunch": false, 205 | "category": "General purpose", 206 | "gpuNum": 0, 207 | "hideHardwareSpecs": false, 208 | "memoryGiB": 8, 209 | "name": "ml.t3.large", 210 | "vcpuNum": 2 211 | }, 212 | { 213 | "_defaultOrder": 2, 214 | "_isFastLaunch": false, 215 | "category": "General purpose", 216 | "gpuNum": 0, 217 | "hideHardwareSpecs": false, 218 | "memoryGiB": 16, 219 | "name": "ml.t3.xlarge", 220 | "vcpuNum": 4 221 | }, 222 | { 223 | "_defaultOrder": 3, 224 | "_isFastLaunch": false, 225 | "category": "General purpose", 226 | "gpuNum": 0, 227 | "hideHardwareSpecs": false, 228 | "memoryGiB": 32, 229 | "name": "ml.t3.2xlarge", 230 | "vcpuNum": 8 231 | }, 232 | { 233 | "_defaultOrder": 4, 234 | "_isFastLaunch": true, 235 | "category": "General purpose", 236 | "gpuNum": 0, 237 | "hideHardwareSpecs": false, 238 | "memoryGiB": 8, 239 | "name": "ml.m5.large", 240 | "vcpuNum": 2 241 | }, 242 | { 243 | "_defaultOrder": 5, 244 | "_isFastLaunch": false, 245 | "category": "General purpose", 246 | "gpuNum": 0, 247 | "hideHardwareSpecs": false, 248 | "memoryGiB": 16, 249 | "name": "ml.m5.xlarge", 250 | "vcpuNum": 4 251 | }, 252 | { 253 | "_defaultOrder": 6, 254 | "_isFastLaunch": false, 255 | "category": "General purpose", 256 | "gpuNum": 0, 257 | "hideHardwareSpecs": false, 258 | "memoryGiB": 32, 259 | "name": "ml.m5.2xlarge", 260 | "vcpuNum": 8 261 | }, 262 | { 263 | "_defaultOrder": 7, 264 | "_isFastLaunch": false, 265 | "category": "General purpose", 266 | "gpuNum": 0, 267 | "hideHardwareSpecs": false, 268 | "memoryGiB": 64, 269 | "name": "ml.m5.4xlarge", 270 | "vcpuNum": 16 271 | }, 272 | { 273 | "_defaultOrder": 8, 274 | "_isFastLaunch": false, 275 | "category": "General purpose", 276 | "gpuNum": 0, 277 | "hideHardwareSpecs": false, 278 | "memoryGiB": 128, 279 | "name": "ml.m5.8xlarge", 280 | "vcpuNum": 32 281 | }, 282 | { 283 | "_defaultOrder": 9, 284 | "_isFastLaunch": false, 285 | "category": "General purpose", 286 | "gpuNum": 0, 287 | "hideHardwareSpecs": false, 288 | "memoryGiB": 192, 289 | "name": "ml.m5.12xlarge", 290 | "vcpuNum": 48 291 | }, 292 | { 293 | "_defaultOrder": 10, 294 | "_isFastLaunch": false, 295 | "category": "General purpose", 296 | "gpuNum": 0, 297 | "hideHardwareSpecs": false, 298 | "memoryGiB": 256, 299 | "name": "ml.m5.16xlarge", 300 | "vcpuNum": 64 301 | }, 302 | { 303 | "_defaultOrder": 11, 304 | "_isFastLaunch": false, 305 | "category": "General purpose", 306 | "gpuNum": 0, 307 | "hideHardwareSpecs": false, 308 | "memoryGiB": 384, 309 | "name": "ml.m5.24xlarge", 310 | "vcpuNum": 96 311 | }, 312 | { 313 | "_defaultOrder": 12, 314 | "_isFastLaunch": false, 315 | "category": "General purpose", 316 | "gpuNum": 0, 317 | "hideHardwareSpecs": false, 318 | "memoryGiB": 8, 319 | "name": "ml.m5d.large", 320 | "vcpuNum": 2 321 | }, 322 | { 323 | "_defaultOrder": 13, 324 | "_isFastLaunch": false, 325 | "category": "General purpose", 326 | "gpuNum": 0, 327 | "hideHardwareSpecs": false, 328 | "memoryGiB": 16, 329 | "name": "ml.m5d.xlarge", 330 | "vcpuNum": 4 331 | }, 332 | { 333 | "_defaultOrder": 14, 334 | "_isFastLaunch": false, 335 | "category": "General purpose", 336 | "gpuNum": 0, 337 | "hideHardwareSpecs": false, 338 | "memoryGiB": 32, 339 | "name": "ml.m5d.2xlarge", 340 | "vcpuNum": 8 341 | }, 342 | { 343 | "_defaultOrder": 15, 344 | "_isFastLaunch": false, 345 | "category": "General purpose", 346 | "gpuNum": 0, 347 | "hideHardwareSpecs": false, 348 | "memoryGiB": 64, 349 | "name": "ml.m5d.4xlarge", 350 | "vcpuNum": 16 351 | }, 352 | { 353 | "_defaultOrder": 16, 354 | "_isFastLaunch": false, 355 | "category": "General purpose", 356 | "gpuNum": 0, 357 | "hideHardwareSpecs": false, 358 | "memoryGiB": 128, 359 | "name": "ml.m5d.8xlarge", 360 | "vcpuNum": 32 361 | }, 362 | { 363 | "_defaultOrder": 17, 364 | "_isFastLaunch": false, 365 | "category": "General purpose", 366 | "gpuNum": 0, 367 | "hideHardwareSpecs": false, 368 | "memoryGiB": 192, 369 | "name": "ml.m5d.12xlarge", 370 | "vcpuNum": 48 371 | }, 372 | { 373 | "_defaultOrder": 18, 374 | "_isFastLaunch": false, 375 | "category": "General purpose", 376 | "gpuNum": 0, 377 | "hideHardwareSpecs": false, 378 | "memoryGiB": 256, 379 | "name": "ml.m5d.16xlarge", 380 | "vcpuNum": 64 381 | }, 382 | { 383 | "_defaultOrder": 19, 384 | "_isFastLaunch": false, 385 | "category": "General purpose", 386 | "gpuNum": 0, 387 | "hideHardwareSpecs": false, 388 | "memoryGiB": 384, 389 | "name": "ml.m5d.24xlarge", 390 | "vcpuNum": 96 391 | }, 392 | { 393 | "_defaultOrder": 20, 394 | "_isFastLaunch": false, 395 | "category": "General purpose", 396 | "gpuNum": 0, 397 | "hideHardwareSpecs": true, 398 | "memoryGiB": 0, 399 | "name": "ml.geospatial.interactive", 400 | "supportedImageNames": [ 401 | "sagemaker-geospatial-v1-0" 402 | ], 403 | "vcpuNum": 0 404 | }, 405 | { 406 | "_defaultOrder": 21, 407 | "_isFastLaunch": true, 408 | "category": "Compute optimized", 409 | "gpuNum": 0, 410 | "hideHardwareSpecs": false, 411 | "memoryGiB": 4, 412 | "name": "ml.c5.large", 413 | "vcpuNum": 2 414 | }, 415 | { 416 | "_defaultOrder": 22, 417 | "_isFastLaunch": false, 418 | "category": "Compute optimized", 419 | "gpuNum": 0, 420 | "hideHardwareSpecs": false, 421 | "memoryGiB": 8, 422 | "name": "ml.c5.xlarge", 423 | "vcpuNum": 4 424 | }, 425 | { 426 | "_defaultOrder": 23, 427 | "_isFastLaunch": false, 428 | "category": "Compute optimized", 429 | "gpuNum": 0, 430 | "hideHardwareSpecs": false, 431 | "memoryGiB": 16, 432 | "name": "ml.c5.2xlarge", 433 | "vcpuNum": 8 434 | }, 435 | { 436 | "_defaultOrder": 24, 437 | "_isFastLaunch": false, 438 | "category": "Compute optimized", 439 | "gpuNum": 0, 440 | "hideHardwareSpecs": false, 441 | "memoryGiB": 32, 442 | "name": "ml.c5.4xlarge", 443 | "vcpuNum": 16 444 | }, 445 | { 446 | "_defaultOrder": 25, 447 | "_isFastLaunch": false, 448 | "category": "Compute optimized", 449 | "gpuNum": 0, 450 | "hideHardwareSpecs": false, 451 | "memoryGiB": 72, 452 | "name": "ml.c5.9xlarge", 453 | "vcpuNum": 36 454 | }, 455 | { 456 | "_defaultOrder": 26, 457 | "_isFastLaunch": false, 458 | "category": "Compute optimized", 459 | "gpuNum": 0, 460 | "hideHardwareSpecs": false, 461 | "memoryGiB": 96, 462 | "name": "ml.c5.12xlarge", 463 | "vcpuNum": 48 464 | }, 465 | { 466 | "_defaultOrder": 27, 467 | "_isFastLaunch": false, 468 | "category": "Compute optimized", 469 | "gpuNum": 0, 470 | "hideHardwareSpecs": false, 471 | "memoryGiB": 144, 472 | "name": "ml.c5.18xlarge", 473 | "vcpuNum": 72 474 | }, 475 | { 476 | "_defaultOrder": 28, 477 | "_isFastLaunch": false, 478 | "category": "Compute optimized", 479 | "gpuNum": 0, 480 | "hideHardwareSpecs": false, 481 | "memoryGiB": 192, 482 | "name": "ml.c5.24xlarge", 483 | "vcpuNum": 96 484 | }, 485 | { 486 | "_defaultOrder": 29, 487 | "_isFastLaunch": true, 488 | "category": "Accelerated computing", 489 | "gpuNum": 1, 490 | "hideHardwareSpecs": false, 491 | "memoryGiB": 16, 492 | "name": "ml.g4dn.xlarge", 493 | "vcpuNum": 4 494 | }, 495 | { 496 | "_defaultOrder": 30, 497 | "_isFastLaunch": false, 498 | "category": "Accelerated computing", 499 | "gpuNum": 1, 500 | "hideHardwareSpecs": false, 501 | "memoryGiB": 32, 502 | "name": "ml.g4dn.2xlarge", 503 | "vcpuNum": 8 504 | }, 505 | { 506 | "_defaultOrder": 31, 507 | "_isFastLaunch": false, 508 | "category": "Accelerated computing", 509 | "gpuNum": 1, 510 | "hideHardwareSpecs": false, 511 | "memoryGiB": 64, 512 | "name": "ml.g4dn.4xlarge", 513 | "vcpuNum": 16 514 | }, 515 | { 516 | "_defaultOrder": 32, 517 | "_isFastLaunch": false, 518 | "category": "Accelerated computing", 519 | "gpuNum": 1, 520 | "hideHardwareSpecs": false, 521 | "memoryGiB": 128, 522 | "name": "ml.g4dn.8xlarge", 523 | "vcpuNum": 32 524 | }, 525 | { 526 | "_defaultOrder": 33, 527 | "_isFastLaunch": false, 528 | "category": "Accelerated computing", 529 | "gpuNum": 4, 530 | "hideHardwareSpecs": false, 531 | "memoryGiB": 192, 532 | "name": "ml.g4dn.12xlarge", 533 | "vcpuNum": 48 534 | }, 535 | { 536 | "_defaultOrder": 34, 537 | "_isFastLaunch": false, 538 | "category": "Accelerated computing", 539 | "gpuNum": 1, 540 | "hideHardwareSpecs": false, 541 | "memoryGiB": 256, 542 | "name": "ml.g4dn.16xlarge", 543 | "vcpuNum": 64 544 | }, 545 | { 546 | "_defaultOrder": 35, 547 | "_isFastLaunch": false, 548 | "category": "Accelerated computing", 549 | "gpuNum": 1, 550 | "hideHardwareSpecs": false, 551 | "memoryGiB": 61, 552 | "name": "ml.p3.2xlarge", 553 | "vcpuNum": 8 554 | }, 555 | { 556 | "_defaultOrder": 36, 557 | "_isFastLaunch": false, 558 | "category": "Accelerated computing", 559 | "gpuNum": 4, 560 | "hideHardwareSpecs": false, 561 | "memoryGiB": 244, 562 | "name": "ml.p3.8xlarge", 563 | "vcpuNum": 32 564 | }, 565 | { 566 | "_defaultOrder": 37, 567 | "_isFastLaunch": false, 568 | "category": "Accelerated computing", 569 | "gpuNum": 8, 570 | "hideHardwareSpecs": false, 571 | "memoryGiB": 488, 572 | "name": "ml.p3.16xlarge", 573 | "vcpuNum": 64 574 | }, 575 | { 576 | "_defaultOrder": 38, 577 | "_isFastLaunch": false, 578 | "category": "Accelerated computing", 579 | "gpuNum": 8, 580 | "hideHardwareSpecs": false, 581 | "memoryGiB": 768, 582 | "name": "ml.p3dn.24xlarge", 583 | "vcpuNum": 96 584 | }, 585 | { 586 | "_defaultOrder": 39, 587 | "_isFastLaunch": false, 588 | "category": "Memory Optimized", 589 | "gpuNum": 0, 590 | "hideHardwareSpecs": false, 591 | "memoryGiB": 16, 592 | "name": "ml.r5.large", 593 | "vcpuNum": 2 594 | }, 595 | { 596 | "_defaultOrder": 40, 597 | "_isFastLaunch": false, 598 | "category": "Memory Optimized", 599 | "gpuNum": 0, 600 | "hideHardwareSpecs": false, 601 | "memoryGiB": 32, 602 | "name": "ml.r5.xlarge", 603 | "vcpuNum": 4 604 | }, 605 | { 606 | "_defaultOrder": 41, 607 | "_isFastLaunch": false, 608 | "category": "Memory Optimized", 609 | "gpuNum": 0, 610 | "hideHardwareSpecs": false, 611 | "memoryGiB": 64, 612 | "name": "ml.r5.2xlarge", 613 | "vcpuNum": 8 614 | }, 615 | { 616 | "_defaultOrder": 42, 617 | "_isFastLaunch": false, 618 | "category": "Memory Optimized", 619 | "gpuNum": 0, 620 | "hideHardwareSpecs": false, 621 | "memoryGiB": 128, 622 | "name": "ml.r5.4xlarge", 623 | "vcpuNum": 16 624 | }, 625 | { 626 | "_defaultOrder": 43, 627 | "_isFastLaunch": false, 628 | "category": "Memory Optimized", 629 | "gpuNum": 0, 630 | "hideHardwareSpecs": false, 631 | "memoryGiB": 256, 632 | "name": "ml.r5.8xlarge", 633 | "vcpuNum": 32 634 | }, 635 | { 636 | "_defaultOrder": 44, 637 | "_isFastLaunch": false, 638 | "category": "Memory Optimized", 639 | "gpuNum": 0, 640 | "hideHardwareSpecs": false, 641 | "memoryGiB": 384, 642 | "name": "ml.r5.12xlarge", 643 | "vcpuNum": 48 644 | }, 645 | { 646 | "_defaultOrder": 45, 647 | "_isFastLaunch": false, 648 | "category": "Memory Optimized", 649 | "gpuNum": 0, 650 | "hideHardwareSpecs": false, 651 | "memoryGiB": 512, 652 | "name": "ml.r5.16xlarge", 653 | "vcpuNum": 64 654 | }, 655 | { 656 | "_defaultOrder": 46, 657 | "_isFastLaunch": false, 658 | "category": "Memory Optimized", 659 | "gpuNum": 0, 660 | "hideHardwareSpecs": false, 661 | "memoryGiB": 768, 662 | "name": "ml.r5.24xlarge", 663 | "vcpuNum": 96 664 | }, 665 | { 666 | "_defaultOrder": 47, 667 | "_isFastLaunch": false, 668 | "category": "Accelerated computing", 669 | "gpuNum": 1, 670 | "hideHardwareSpecs": false, 671 | "memoryGiB": 16, 672 | "name": "ml.g5.xlarge", 673 | "vcpuNum": 4 674 | }, 675 | { 676 | "_defaultOrder": 48, 677 | "_isFastLaunch": false, 678 | "category": "Accelerated computing", 679 | "gpuNum": 1, 680 | "hideHardwareSpecs": false, 681 | "memoryGiB": 32, 682 | "name": "ml.g5.2xlarge", 683 | "vcpuNum": 8 684 | }, 685 | { 686 | "_defaultOrder": 49, 687 | "_isFastLaunch": false, 688 | "category": "Accelerated computing", 689 | "gpuNum": 1, 690 | "hideHardwareSpecs": false, 691 | "memoryGiB": 64, 692 | "name": "ml.g5.4xlarge", 693 | "vcpuNum": 16 694 | }, 695 | { 696 | "_defaultOrder": 50, 697 | "_isFastLaunch": false, 698 | "category": "Accelerated computing", 699 | "gpuNum": 1, 700 | "hideHardwareSpecs": false, 701 | "memoryGiB": 128, 702 | "name": "ml.g5.8xlarge", 703 | "vcpuNum": 32 704 | }, 705 | { 706 | "_defaultOrder": 51, 707 | "_isFastLaunch": false, 708 | "category": "Accelerated computing", 709 | "gpuNum": 1, 710 | "hideHardwareSpecs": false, 711 | "memoryGiB": 256, 712 | "name": "ml.g5.16xlarge", 713 | "vcpuNum": 64 714 | }, 715 | { 716 | "_defaultOrder": 52, 717 | "_isFastLaunch": false, 718 | "category": "Accelerated computing", 719 | "gpuNum": 4, 720 | "hideHardwareSpecs": false, 721 | "memoryGiB": 192, 722 | "name": "ml.g5.12xlarge", 723 | "vcpuNum": 48 724 | }, 725 | { 726 | "_defaultOrder": 53, 727 | "_isFastLaunch": false, 728 | "category": "Accelerated computing", 729 | "gpuNum": 4, 730 | "hideHardwareSpecs": false, 731 | "memoryGiB": 384, 732 | "name": "ml.g5.24xlarge", 733 | "vcpuNum": 96 734 | }, 735 | { 736 | "_defaultOrder": 54, 737 | "_isFastLaunch": false, 738 | "category": "Accelerated computing", 739 | "gpuNum": 8, 740 | "hideHardwareSpecs": false, 741 | "memoryGiB": 768, 742 | "name": "ml.g5.48xlarge", 743 | "vcpuNum": 192 744 | }, 745 | { 746 | "_defaultOrder": 55, 747 | "_isFastLaunch": false, 748 | "category": "Accelerated computing", 749 | "gpuNum": 8, 750 | "hideHardwareSpecs": false, 751 | "memoryGiB": 1152, 752 | "name": "ml.p4d.24xlarge", 753 | "vcpuNum": 96 754 | }, 755 | { 756 | "_defaultOrder": 56, 757 | "_isFastLaunch": false, 758 | "category": "Accelerated computing", 759 | "gpuNum": 8, 760 | "hideHardwareSpecs": false, 761 | "memoryGiB": 1152, 762 | "name": "ml.p4de.24xlarge", 763 | "vcpuNum": 96 764 | } 765 | ], 766 | "instance_type": "ml.m5.large", 767 | "kernelspec": { 768 | "display_name": "Python 3 (Data Science)", 769 | "language": "python", 770 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" 771 | }, 772 | "language_info": { 773 | "codemirror_mode": { 774 | "name": "ipython", 775 | "version": 3 776 | }, 777 | "file_extension": ".py", 778 | "mimetype": "text/x-python", 779 | "name": "python", 780 | "nbconvert_exporter": "python", 781 | "pygments_lexer": "ipython3", 782 | "version": "3.7.10" 783 | } 784 | }, 785 | "nbformat": 4, 786 | "nbformat_minor": 5 787 | } 788 | -------------------------------------------------------------------------------- /04-create-os-index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "b985c5b5-1d65-4e4a-82d1-2dfc9768d97d", 6 | "metadata": {}, 7 | "source": [ 8 | "## Create Index for `Past Conversations`" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "57aa4ada-a686-43de-bc90-0f4107f95ce1", 14 | "metadata": {}, 15 | "source": [ 16 | "##### Prerequisites" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "faf12933-564a-41a3-9a9e-c02a9437310e", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "%%capture \n", 27 | "\n", 28 | "!pip install PyYAML" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "id": "52706ec1-fd47-42d4-af0f-33f0a03f654d", 34 | "metadata": {}, 35 | "source": [ 36 | "#### Imports" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "id": "81eea5f6-50e3-4398-80eb-11b680d026b4", 43 | "metadata": { 44 | "tags": [] 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "from requests.auth import HTTPBasicAuth\n", 49 | "from tqdm import tqdm\n", 50 | "import requests\n", 51 | "import logging \n", 52 | "import boto3\n", 53 | "import yaml\n", 54 | "import json\n", 55 | "import os" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "id": "49ec6d42-db7a-4c4c-8322-95963806f987", 61 | "metadata": {}, 62 | "source": [ 63 | "##### Setup logging" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "id": "fcddd51c-9251-4428-9931-b2700a71142a", 70 | "metadata": { 71 | "tags": [] 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "logger = logging.getLogger('sagemaker')\n", 76 | "logger.setLevel(logging.DEBUG)\n", 77 | "logger.addHandler(logging.StreamHandler())" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "id": "4a81f78f-675d-4009-9228-8fa3cfc559b8", 83 | "metadata": {}, 84 | "source": [ 85 | "##### Log versions of dependencies " 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 4, 91 | "id": "75474334-8b19-4491-abb9-58468bc33329", 92 | "metadata": { 93 | "tags": [] 94 | }, 95 | "outputs": [ 96 | { 97 | "name": "stderr", 98 | "output_type": "stream", 99 | "text": [ 100 | "Using requests==2.28.2\n", 101 | "Using pyyaml==6.0\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "logger.info(f'Using requests=={requests.__version__}')\n", 107 | "logger.info(f'Using pyyaml=={yaml.__version__}')" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "id": "c52e3b3f-f770-4b90-bc35-12cc0f793604", 113 | "metadata": {}, 114 | "source": [ 115 | "#### Setup essentials" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 5, 121 | "id": "3b2b1f73-2576-412f-ad38-13638db59281", 122 | "metadata": { 123 | "tags": [] 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "with open('config.yml', 'r') as file:\n", 128 | " config = yaml.safe_load(file)\n", 129 | "\n", 130 | "os_username = config['credentials']['username']\n", 131 | "os_password = config['credentials']['password']\n", 132 | "\n", 133 | "domain_endpoint = config['domain']['endpoint']\n", 134 | "domain_index = config['domain']['index']" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 6, 140 | "id": "3e558272-b128-47da-8e68-7cdc79aca465", 141 | "metadata": { 142 | "tags": [] 143 | }, 144 | "outputs": [ 145 | { 146 | "name": "stderr", 147 | "output_type": "stream", 148 | "text": [ 149 | "URL for OpenSearch index = https://search-semantic-search-hryn56c5jy43yryimohz4ajvyi.us-east-1.es.amazonaws.com/conversations\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "URL = f'{domain_endpoint}/{domain_index}'\n", 155 | "logger.info(f'URL for OpenSearch index = {URL}')" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "id": "78f72086-5050-40c3-b4e8-32341cbe071c", 161 | "metadata": {}, 162 | "source": [ 163 | "#### Define the index mapping with a k-NN vector field" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 7, 169 | "id": "45c097ca-4501-42c4-ba1f-d10b52fc025e", 170 | "metadata": { 171 | "tags": [] 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "mapping = {\n", 176 | " 'settings': {\n", 177 | " 'index': {\n", 178 | " 'knn': True # Enable k-NN search for this index\n", 179 | " }\n", 180 | " },\n", 181 | " 'mappings': {\n", 182 | " 'properties': {\n", 183 | " 'embedding': { # k-NN vector field\n", 184 | " 'type': 'knn_vector',\n", 185 | " 'dimension': 4096 # Dimension of the vector\n", 186 | " },\n", 187 | " 'session_id': {\n", 188 | " 'type': 'keyword'\n", 189 | " },\n", 190 | " 'created_at': {\n", 191 | " 'type': 'long'\n", 192 | " },\n", 193 | " 'conversation_summary': {\n", 194 | " 'type': 'text'\n", 195 | " }\n", 196 | " }\n", 197 | " }\n", 198 | "}" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "id": "e1a33e70-1e5f-44d7-aec4-4c374d2a8e3d", 204 | "metadata": {}, 205 | "source": [ 206 | "#### Create the index with the specified mapping" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 8, 212 | "id": "06601622-e982-42b2-910a-71de86e56475", 213 | "metadata": { 214 | "tags": [] 215 | }, 216 | "outputs": [ 217 | { 218 | "name": "stderr", 219 | "output_type": "stream", 220 | "text": [ 221 | "Index created: {\"acknowledged\":true,\"shards_acknowledged\":true,\"index\":\"conversations\"}\n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "# Check if the index exists using an HTTP HEAD request\n", 227 | "response = requests.head(URL, auth=HTTPBasicAuth(os_username, os_password))\n", 228 | "\n", 229 | "# If the index does not exist (status code 404), create the index\n", 230 | "if response.status_code == 404:\n", 231 | " response = requests.put(URL, auth=HTTPBasicAuth(os_username, os_password), json=mapping)\n", 232 | " logger.info(f'Index created: {response.text}')\n", 233 | "else:\n", 234 | " logger.error('Index already exists!')" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "id": "17ef4711-ec69-4628-b829-64e25d8e290e", 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [] 244 | } 245 | ], 246 | "metadata": { 247 | "availableInstances": [ 248 | { 249 | "_defaultOrder": 0, 250 | "_isFastLaunch": true, 251 | "category": "General purpose", 252 | "gpuNum": 0, 253 | "hideHardwareSpecs": false, 254 | "memoryGiB": 4, 255 | "name": "ml.t3.medium", 256 | "vcpuNum": 2 257 | }, 258 | { 259 | "_defaultOrder": 1, 260 | "_isFastLaunch": false, 261 | "category": "General purpose", 262 | "gpuNum": 0, 263 | "hideHardwareSpecs": false, 264 | "memoryGiB": 8, 265 | "name": "ml.t3.large", 266 | "vcpuNum": 2 267 | }, 268 | { 269 | "_defaultOrder": 2, 270 | "_isFastLaunch": false, 271 | "category": "General purpose", 272 | "gpuNum": 0, 273 | "hideHardwareSpecs": false, 274 | "memoryGiB": 16, 275 | "name": "ml.t3.xlarge", 276 | "vcpuNum": 4 277 | }, 278 | { 279 | "_defaultOrder": 3, 280 | "_isFastLaunch": false, 281 | "category": "General purpose", 282 | "gpuNum": 0, 283 | "hideHardwareSpecs": false, 284 | "memoryGiB": 32, 285 | "name": "ml.t3.2xlarge", 286 | "vcpuNum": 8 287 | }, 288 | { 289 | "_defaultOrder": 4, 290 | "_isFastLaunch": true, 291 | "category": "General purpose", 292 | "gpuNum": 0, 293 | "hideHardwareSpecs": false, 294 | "memoryGiB": 8, 295 | "name": "ml.m5.large", 296 | "vcpuNum": 2 297 | }, 298 | { 299 | "_defaultOrder": 5, 300 | "_isFastLaunch": false, 301 | "category": "General purpose", 302 | "gpuNum": 0, 303 | "hideHardwareSpecs": false, 304 | "memoryGiB": 16, 305 | "name": "ml.m5.xlarge", 306 | "vcpuNum": 4 307 | }, 308 | { 309 | "_defaultOrder": 6, 310 | "_isFastLaunch": false, 311 | "category": "General purpose", 312 | "gpuNum": 0, 313 | "hideHardwareSpecs": false, 314 | "memoryGiB": 32, 315 | "name": "ml.m5.2xlarge", 316 | "vcpuNum": 8 317 | }, 318 | { 319 | "_defaultOrder": 7, 320 | "_isFastLaunch": false, 321 | "category": "General purpose", 322 | "gpuNum": 0, 323 | "hideHardwareSpecs": false, 324 | "memoryGiB": 64, 325 | "name": "ml.m5.4xlarge", 326 | "vcpuNum": 16 327 | }, 328 | { 329 | "_defaultOrder": 8, 330 | "_isFastLaunch": false, 331 | "category": "General purpose", 332 | "gpuNum": 0, 333 | "hideHardwareSpecs": false, 334 | "memoryGiB": 128, 335 | "name": "ml.m5.8xlarge", 336 | "vcpuNum": 32 337 | }, 338 | { 339 | "_defaultOrder": 9, 340 | "_isFastLaunch": false, 341 | "category": "General purpose", 342 | "gpuNum": 0, 343 | "hideHardwareSpecs": false, 344 | "memoryGiB": 192, 345 | "name": "ml.m5.12xlarge", 346 | "vcpuNum": 48 347 | }, 348 | { 349 | "_defaultOrder": 10, 350 | "_isFastLaunch": false, 351 | "category": "General purpose", 352 | "gpuNum": 0, 353 | "hideHardwareSpecs": false, 354 | "memoryGiB": 256, 355 | "name": "ml.m5.16xlarge", 356 | "vcpuNum": 64 357 | }, 358 | { 359 | "_defaultOrder": 11, 360 | "_isFastLaunch": false, 361 | "category": "General purpose", 362 | "gpuNum": 0, 363 | "hideHardwareSpecs": false, 364 | "memoryGiB": 384, 365 | "name": "ml.m5.24xlarge", 366 | "vcpuNum": 96 367 | }, 368 | { 369 | "_defaultOrder": 12, 370 | "_isFastLaunch": false, 371 | "category": "General purpose", 372 | "gpuNum": 0, 373 | "hideHardwareSpecs": false, 374 | "memoryGiB": 8, 375 | "name": "ml.m5d.large", 376 | "vcpuNum": 2 377 | }, 378 | { 379 | "_defaultOrder": 13, 380 | "_isFastLaunch": false, 381 | "category": "General purpose", 382 | "gpuNum": 0, 383 | "hideHardwareSpecs": false, 384 | "memoryGiB": 16, 385 | "name": "ml.m5d.xlarge", 386 | "vcpuNum": 4 387 | }, 388 | { 389 | "_defaultOrder": 14, 390 | "_isFastLaunch": false, 391 | "category": "General purpose", 392 | "gpuNum": 0, 393 | "hideHardwareSpecs": false, 394 | "memoryGiB": 32, 395 | "name": "ml.m5d.2xlarge", 396 | "vcpuNum": 8 397 | }, 398 | { 399 | "_defaultOrder": 15, 400 | "_isFastLaunch": false, 401 | "category": "General purpose", 402 | "gpuNum": 0, 403 | "hideHardwareSpecs": false, 404 | "memoryGiB": 64, 405 | "name": "ml.m5d.4xlarge", 406 | "vcpuNum": 16 407 | }, 408 | { 409 | "_defaultOrder": 16, 410 | "_isFastLaunch": false, 411 | "category": "General purpose", 412 | "gpuNum": 0, 413 | "hideHardwareSpecs": false, 414 | "memoryGiB": 128, 415 | "name": "ml.m5d.8xlarge", 416 | "vcpuNum": 32 417 | }, 418 | { 419 | "_defaultOrder": 17, 420 | "_isFastLaunch": false, 421 | "category": "General purpose", 422 | "gpuNum": 0, 423 | "hideHardwareSpecs": false, 424 | "memoryGiB": 192, 425 | "name": "ml.m5d.12xlarge", 426 | "vcpuNum": 48 427 | }, 428 | { 429 | "_defaultOrder": 18, 430 | "_isFastLaunch": false, 431 | "category": "General purpose", 432 | "gpuNum": 0, 433 | "hideHardwareSpecs": false, 434 | "memoryGiB": 256, 435 | "name": "ml.m5d.16xlarge", 436 | "vcpuNum": 64 437 | }, 438 | { 439 | "_defaultOrder": 19, 440 | "_isFastLaunch": false, 441 | "category": "General purpose", 442 | "gpuNum": 0, 443 | "hideHardwareSpecs": false, 444 | "memoryGiB": 384, 445 | "name": "ml.m5d.24xlarge", 446 | "vcpuNum": 96 447 | }, 448 | { 449 | "_defaultOrder": 20, 450 | "_isFastLaunch": false, 451 | "category": "General purpose", 452 | "gpuNum": 0, 453 | "hideHardwareSpecs": true, 454 | "memoryGiB": 0, 455 | "name": "ml.geospatial.interactive", 456 | "supportedImageNames": [ 457 | "sagemaker-geospatial-v1-0" 458 | ], 459 | "vcpuNum": 0 460 | }, 461 | { 462 | "_defaultOrder": 21, 463 | "_isFastLaunch": true, 464 | "category": "Compute optimized", 465 | "gpuNum": 0, 466 | "hideHardwareSpecs": false, 467 | "memoryGiB": 4, 468 | "name": "ml.c5.large", 469 | "vcpuNum": 2 470 | }, 471 | { 472 | "_defaultOrder": 22, 473 | "_isFastLaunch": false, 474 | "category": "Compute optimized", 475 | "gpuNum": 0, 476 | "hideHardwareSpecs": false, 477 | "memoryGiB": 8, 478 | "name": "ml.c5.xlarge", 479 | "vcpuNum": 4 480 | }, 481 | { 482 | "_defaultOrder": 23, 483 | "_isFastLaunch": false, 484 | "category": "Compute optimized", 485 | "gpuNum": 0, 486 | "hideHardwareSpecs": false, 487 | "memoryGiB": 16, 488 | "name": "ml.c5.2xlarge", 489 | "vcpuNum": 8 490 | }, 491 | { 492 | "_defaultOrder": 24, 493 | "_isFastLaunch": false, 494 | "category": "Compute optimized", 495 | "gpuNum": 0, 496 | "hideHardwareSpecs": false, 497 | "memoryGiB": 32, 498 | "name": "ml.c5.4xlarge", 499 | "vcpuNum": 16 500 | }, 501 | { 502 | "_defaultOrder": 25, 503 | "_isFastLaunch": false, 504 | "category": "Compute optimized", 505 | "gpuNum": 0, 506 | "hideHardwareSpecs": false, 507 | "memoryGiB": 72, 508 | "name": "ml.c5.9xlarge", 509 | "vcpuNum": 36 510 | }, 511 | { 512 | "_defaultOrder": 26, 513 | "_isFastLaunch": false, 514 | "category": "Compute optimized", 515 | "gpuNum": 0, 516 | "hideHardwareSpecs": false, 517 | "memoryGiB": 96, 518 | "name": "ml.c5.12xlarge", 519 | "vcpuNum": 48 520 | }, 521 | { 522 | "_defaultOrder": 27, 523 | "_isFastLaunch": false, 524 | "category": "Compute optimized", 525 | "gpuNum": 0, 526 | "hideHardwareSpecs": false, 527 | "memoryGiB": 144, 528 | "name": "ml.c5.18xlarge", 529 | "vcpuNum": 72 530 | }, 531 | { 532 | "_defaultOrder": 28, 533 | "_isFastLaunch": false, 534 | "category": "Compute optimized", 535 | "gpuNum": 0, 536 | "hideHardwareSpecs": false, 537 | "memoryGiB": 192, 538 | "name": "ml.c5.24xlarge", 539 | "vcpuNum": 96 540 | }, 541 | { 542 | "_defaultOrder": 29, 543 | "_isFastLaunch": true, 544 | "category": "Accelerated computing", 545 | "gpuNum": 1, 546 | "hideHardwareSpecs": false, 547 | "memoryGiB": 16, 548 | "name": "ml.g4dn.xlarge", 549 | "vcpuNum": 4 550 | }, 551 | { 552 | "_defaultOrder": 30, 553 | "_isFastLaunch": false, 554 | "category": "Accelerated computing", 555 | "gpuNum": 1, 556 | "hideHardwareSpecs": false, 557 | "memoryGiB": 32, 558 | "name": "ml.g4dn.2xlarge", 559 | "vcpuNum": 8 560 | }, 561 | { 562 | "_defaultOrder": 31, 563 | "_isFastLaunch": false, 564 | "category": "Accelerated computing", 565 | "gpuNum": 1, 566 | "hideHardwareSpecs": false, 567 | "memoryGiB": 64, 568 | "name": "ml.g4dn.4xlarge", 569 | "vcpuNum": 16 570 | }, 571 | { 572 | "_defaultOrder": 32, 573 | "_isFastLaunch": false, 574 | "category": "Accelerated computing", 575 | "gpuNum": 1, 576 | "hideHardwareSpecs": false, 577 | "memoryGiB": 128, 578 | "name": "ml.g4dn.8xlarge", 579 | "vcpuNum": 32 580 | }, 581 | { 582 | "_defaultOrder": 33, 583 | "_isFastLaunch": false, 584 | "category": "Accelerated computing", 585 | "gpuNum": 4, 586 | "hideHardwareSpecs": false, 587 | "memoryGiB": 192, 588 | "name": "ml.g4dn.12xlarge", 589 | "vcpuNum": 48 590 | }, 591 | { 592 | "_defaultOrder": 34, 593 | "_isFastLaunch": false, 594 | "category": "Accelerated computing", 595 | "gpuNum": 1, 596 | "hideHardwareSpecs": false, 597 | "memoryGiB": 256, 598 | "name": "ml.g4dn.16xlarge", 599 | "vcpuNum": 64 600 | }, 601 | { 602 | "_defaultOrder": 35, 603 | "_isFastLaunch": false, 604 | "category": "Accelerated computing", 605 | "gpuNum": 1, 606 | "hideHardwareSpecs": false, 607 | "memoryGiB": 61, 608 | "name": "ml.p3.2xlarge", 609 | "vcpuNum": 8 610 | }, 611 | { 612 | "_defaultOrder": 36, 613 | "_isFastLaunch": false, 614 | "category": "Accelerated computing", 615 | "gpuNum": 4, 616 | "hideHardwareSpecs": false, 617 | "memoryGiB": 244, 618 | "name": "ml.p3.8xlarge", 619 | "vcpuNum": 32 620 | }, 621 | { 622 | "_defaultOrder": 37, 623 | "_isFastLaunch": false, 624 | "category": "Accelerated computing", 625 | "gpuNum": 8, 626 | "hideHardwareSpecs": false, 627 | "memoryGiB": 488, 628 | "name": "ml.p3.16xlarge", 629 | "vcpuNum": 64 630 | }, 631 | { 632 | "_defaultOrder": 38, 633 | "_isFastLaunch": false, 634 | "category": "Accelerated computing", 635 | "gpuNum": 8, 636 | "hideHardwareSpecs": false, 637 | "memoryGiB": 768, 638 | "name": "ml.p3dn.24xlarge", 639 | "vcpuNum": 96 640 | }, 641 | { 642 | "_defaultOrder": 39, 643 | "_isFastLaunch": false, 644 | "category": "Memory Optimized", 645 | "gpuNum": 0, 646 | "hideHardwareSpecs": false, 647 | "memoryGiB": 16, 648 | "name": "ml.r5.large", 649 | "vcpuNum": 2 650 | }, 651 | { 652 | "_defaultOrder": 40, 653 | "_isFastLaunch": false, 654 | "category": "Memory Optimized", 655 | "gpuNum": 0, 656 | "hideHardwareSpecs": false, 657 | "memoryGiB": 32, 658 | "name": "ml.r5.xlarge", 659 | "vcpuNum": 4 660 | }, 661 | { 662 | "_defaultOrder": 41, 663 | "_isFastLaunch": false, 664 | "category": "Memory Optimized", 665 | "gpuNum": 0, 666 | "hideHardwareSpecs": false, 667 | "memoryGiB": 64, 668 | "name": "ml.r5.2xlarge", 669 | "vcpuNum": 8 670 | }, 671 | { 672 | "_defaultOrder": 42, 673 | "_isFastLaunch": false, 674 | "category": "Memory Optimized", 675 | "gpuNum": 0, 676 | "hideHardwareSpecs": false, 677 | "memoryGiB": 128, 678 | "name": "ml.r5.4xlarge", 679 | "vcpuNum": 16 680 | }, 681 | { 682 | "_defaultOrder": 43, 683 | "_isFastLaunch": false, 684 | "category": "Memory Optimized", 685 | "gpuNum": 0, 686 | "hideHardwareSpecs": false, 687 | "memoryGiB": 256, 688 | "name": "ml.r5.8xlarge", 689 | "vcpuNum": 32 690 | }, 691 | { 692 | "_defaultOrder": 44, 693 | "_isFastLaunch": false, 694 | "category": "Memory Optimized", 695 | "gpuNum": 0, 696 | "hideHardwareSpecs": false, 697 | "memoryGiB": 384, 698 | "name": "ml.r5.12xlarge", 699 | "vcpuNum": 48 700 | }, 701 | { 702 | "_defaultOrder": 45, 703 | "_isFastLaunch": false, 704 | "category": "Memory Optimized", 705 | "gpuNum": 0, 706 | "hideHardwareSpecs": false, 707 | "memoryGiB": 512, 708 | "name": "ml.r5.16xlarge", 709 | "vcpuNum": 64 710 | }, 711 | { 712 | "_defaultOrder": 46, 713 | "_isFastLaunch": false, 714 | "category": "Memory Optimized", 715 | "gpuNum": 0, 716 | "hideHardwareSpecs": false, 717 | "memoryGiB": 768, 718 | "name": "ml.r5.24xlarge", 719 | "vcpuNum": 96 720 | }, 721 | { 722 | "_defaultOrder": 47, 723 | "_isFastLaunch": false, 724 | "category": "Accelerated computing", 725 | "gpuNum": 1, 726 | "hideHardwareSpecs": false, 727 | "memoryGiB": 16, 728 | "name": "ml.g5.xlarge", 729 | "vcpuNum": 4 730 | }, 731 | { 732 | "_defaultOrder": 48, 733 | "_isFastLaunch": false, 734 | "category": "Accelerated computing", 735 | "gpuNum": 1, 736 | "hideHardwareSpecs": false, 737 | "memoryGiB": 32, 738 | "name": "ml.g5.2xlarge", 739 | "vcpuNum": 8 740 | }, 741 | { 742 | "_defaultOrder": 49, 743 | "_isFastLaunch": false, 744 | "category": "Accelerated computing", 745 | "gpuNum": 1, 746 | "hideHardwareSpecs": false, 747 | "memoryGiB": 64, 748 | "name": "ml.g5.4xlarge", 749 | "vcpuNum": 16 750 | }, 751 | { 752 | "_defaultOrder": 50, 753 | "_isFastLaunch": false, 754 | "category": "Accelerated computing", 755 | "gpuNum": 1, 756 | "hideHardwareSpecs": false, 757 | "memoryGiB": 128, 758 | "name": "ml.g5.8xlarge", 759 | "vcpuNum": 32 760 | }, 761 | { 762 | "_defaultOrder": 51, 763 | "_isFastLaunch": false, 764 | "category": "Accelerated computing", 765 | "gpuNum": 1, 766 | "hideHardwareSpecs": false, 767 | "memoryGiB": 256, 768 | "name": "ml.g5.16xlarge", 769 | "vcpuNum": 64 770 | }, 771 | { 772 | "_defaultOrder": 52, 773 | "_isFastLaunch": false, 774 | "category": "Accelerated computing", 775 | "gpuNum": 4, 776 | "hideHardwareSpecs": false, 777 | "memoryGiB": 192, 778 | "name": "ml.g5.12xlarge", 779 | "vcpuNum": 48 780 | }, 781 | { 782 | "_defaultOrder": 53, 783 | "_isFastLaunch": false, 784 | "category": "Accelerated computing", 785 | "gpuNum": 4, 786 | "hideHardwareSpecs": false, 787 | "memoryGiB": 384, 788 | "name": "ml.g5.24xlarge", 789 | "vcpuNum": 96 790 | }, 791 | { 792 | "_defaultOrder": 54, 793 | "_isFastLaunch": false, 794 | "category": "Accelerated computing", 795 | "gpuNum": 8, 796 | "hideHardwareSpecs": false, 797 | "memoryGiB": 768, 798 | "name": "ml.g5.48xlarge", 799 | "vcpuNum": 192 800 | }, 801 | { 802 | "_defaultOrder": 55, 803 | "_isFastLaunch": false, 804 | "category": "Accelerated computing", 805 | "gpuNum": 8, 806 | "hideHardwareSpecs": false, 807 | "memoryGiB": 1152, 808 | "name": "ml.p4d.24xlarge", 809 | "vcpuNum": 96 810 | }, 811 | { 812 | "_defaultOrder": 56, 813 | "_isFastLaunch": false, 814 | "category": "Accelerated computing", 815 | "gpuNum": 8, 816 | "hideHardwareSpecs": false, 817 | "memoryGiB": 1152, 818 | "name": "ml.p4de.24xlarge", 819 | "vcpuNum": 96 820 | } 821 | ], 822 | "instance_type": "ml.t3.medium", 823 | "kernelspec": { 824 | "display_name": "Python 3 (Data Science)", 825 | "language": "python", 826 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" 827 | }, 828 | "language_info": { 829 | "codemirror_mode": { 830 | "name": "ipython", 831 | "version": 3 832 | }, 833 | "file_extension": ".py", 834 | "mimetype": "text/x-python", 835 | "name": "python", 836 | "nbconvert_exporter": "python", 837 | "pygments_lexer": "ipython3", 838 | "version": "3.7.10" 839 | } 840 | }, 841 | "nbformat": 4, 842 | "nbformat_minor": 5 843 | } 844 | -------------------------------------------------------------------------------- /05-lambda-handler.py: -------------------------------------------------------------------------------- 1 | from boto3.dynamodb.conditions import Key 2 | from requests.auth import HTTPBasicAuth 3 | import requests 4 | import logging 5 | import boto3 6 | import json 7 | import os 8 | 9 | 10 | # Set up logger 11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 12 | datefmt='%Y-%m-%d %H:%M:%S') 13 | logger = logging.getLogger('log') 14 | 15 | # Create service clients 16 | dynamodb = boto3.resource('dynamodb') 17 | sagemaker_runtime = boto3.client('sagemaker-runtime') 18 | 19 | # Reference SageMaker JumpStart endpoints 20 | domain_endpoint = os.environ['OS_ENDPOINT'] 21 | domain_index = os.environ['OS_INDEX_NAME'] 22 | os_username = os.environ['OS_USERNAME'] 23 | os_password = os.environ['OS_PASSWORD'] 24 | 25 | # Reference Amazon OpenSearch endpoint 26 | URL = f'{domain_endpoint}/{domain_index}' 27 | 28 | # Set LLM generation configs 29 | MAX_LENGTH = 512 30 | NUM_RETURN_SEQUENCES = 1 31 | TOP_K = 0 32 | TOP_P = 0.7 33 | DO_SAMPLE = True 34 | CONTENT_TYPE = 'application/json' 35 | TEMPERATURE = 0.1 36 | 37 | 38 | 39 | def lambda_handler(event: dict, context: dict) -> None: 40 | logger.info(f'Received event: {event}') 41 | logger.info(f'Received context: {context}') 42 | 43 | for record in event['Records']: 44 | if record['eventName'] == 'MODIFY': 45 | session_item = record['dynamodb']['NewImage'] 46 | session_id = session_item['session_id']['S'] 47 | end_time = session_item['end_time']['N'] 48 | 49 | # Query the conversations table 50 | conversation_turns = query_conversations_table(session_id) 51 | 52 | # Flatten the conversation turns into a dict 53 | flattened_conversations = flatten_conversations(conversation_turns) 54 | 55 | summary = summarize_conversations(flattened_conversations) 56 | 57 | # Encode the dict into an embedding 58 | embedding = encode_conversations(summary) 59 | 60 | # Write the embedding to Elasticsearch 61 | write_to_elasticsearch(session_id, embedding, end_time, summary) 62 | 63 | print(f"Session {session_id} was persisted to long term memory") 64 | 65 | 66 | def query_conversations_table(session_id: str) -> list: 67 | table = dynamodb.Table('conversations') 68 | response = table.query(KeyConditionExpression=Key('session_id').eq(session_id)) 69 | return response['Items'] 70 | 71 | 72 | def flatten_conversations(conversation_turns: list) -> dict: 73 | flattened_conversations = {'conversation': ''} 74 | for turn in conversation_turns: 75 | user_message = turn['Me'] 76 | bot_message = turn['AI'] 77 | flattened_conversations['conversation'] += f"{user_message} {bot_message} " 78 | return flattened_conversations 79 | 80 | 81 | def summarize_conversations(conversation: str) -> str: 82 | logger.info('Conversation: {conversation}') 83 | prompt = f"""Conversation==hi there! I'm doing well, thank you. what is the meaning of eminent domain? Eminent domain is the power of the government to take private property for public use, with just compensation. 84 | Summary==We discussed about the meaning of eminent domain and that it is the government's power to take private property for public use with just compensation. 85 | 86 | Conversation==Hey! I'm feeling great, how about you? Can you tell me what is the concept of due diligence? Due diligence is a comprehensive investigation or appraisal of a business or person before entering into an agreement or transaction. 87 | Summary==We discussed about the meaning of due diligence and that it is a comprehensive investigation or appraisal of a business or person before entering into an agreement or transaction. 88 | 89 | Conversation==hello! I'm good, thank you for asking. What is the definition of fiduciary duty? Fiduciary duty is a legal obligation of one party to act in the best interests of another, often in financial or legal matters. 90 | Summary==We talked about the meaning of fiduciary duty and that it is a legal obligation of one party to act in the best interests of another, often in financial or legal matters. 91 | 92 | Conversation=={conversation} 93 | Summary== 94 | 95 | 96 | Summarize the above Conversation as a short paragraph in 3 to 4 sentences.""" 97 | payload = {'text_inputs': prompt, 98 | 'max_length': MAX_LENGTH, 99 | 'temperature': TEMPERATURE, 100 | 'num_return_sequences': NUM_RETURN_SEQUENCES, 101 | 'top_k': TOP_K, 102 | 'top_p': TOP_P, 103 | 'do_sample': DO_SAMPLE} 104 | payload = json.dumps(payload).encode('utf-8') 105 | response = sagemaker_runtime.invoke_endpoint(EndpointName=os.environ['SAGEMAKER_TEXT_GEN_ENDPOINT'], 106 | ContentType=CONTENT_TYPE, 107 | Body=payload) 108 | model_predictions = json.loads(response['Body'].read()) 109 | generated_text = model_predictions['generated_texts'][0] 110 | logger.info(f'Summary: {generated_text}') 111 | return generated_text 112 | 113 | 114 | def encode_conversations(summary: str) -> list: 115 | payload = {'text_inputs': [summary]} 116 | payload = json.dumps(payload).encode('utf-8') 117 | response = sagemaker_runtime.invoke_endpoint(EndpointName=os.environ['SAGEMAKER_TEXT_EMBED_ENDPOINT'], 118 | ContentType='application/json', 119 | Body=payload) 120 | body = json.loads(response['Body'].read()) 121 | embedding = body['embedding'][0] 122 | return embedding 123 | 124 | 125 | def write_to_elasticsearch(session_id: str, embedding: list, end_time: int, summary: str) -> None: 126 | document = { 127 | 'session_id': session_id, 128 | 'embedding': embedding, 129 | 'created_at': end_time, 130 | 'conversation_summary': summary 131 | } 132 | 133 | try: 134 | response = requests.post(f'{URL}/_doc/{session_id}', auth=HTTPBasicAuth(os_username, os_password), 135 | json=document) 136 | if response.status_code not in [200, 201]: 137 | logger.error(response.status_code) 138 | logger.error(response.text) 139 | except Exception as e: 140 | logger.error(e) 141 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AWS SageMaker Chatbot 2 | Build a context-aware chatbot with short and long-term memory using AWS SageMaker and other AWS services for improved user interactions. 3 |
4 |
5 | ![Cognitive Architecture AWS](./img/cognition.png) 6 |
7 |
-------------------------------------------------------------------------------- /chatbot-app/app.py: -------------------------------------------------------------------------------- 1 | from retrieve import retrieve_top_matching_past_conversations 2 | from llm import summarize_passages_and_collate_answers 3 | from retrieve import retrieve_top_matching_passages 4 | from ddb import get_conversations_by_session_id 5 | from llm import generate_dialogue_response 6 | from ddb import add_conversation_turn 7 | from ddb import create_session 8 | from ddb import end_session 9 | from llm import detect_task 10 | import streamlit as st 11 | import logging 12 | import boto3 13 | 14 | 15 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 16 | datefmt='%Y-%m-%d %H:%M:%S') 17 | logger = logging.getLogger('log') 18 | 19 | 20 | # Set Streamlit page configuration 21 | st.set_page_config(page_title='ai-assistant', layout='wide') 22 | 23 | dynamodb = boto3.resource('dynamodb') 24 | 25 | # Initialize session states 26 | if 'generated' not in st.session_state: 27 | st.session_state['generated'] = [] 28 | if 'past' not in st.session_state: 29 | st.session_state['past'] = [] 30 | if 'input' not in st.session_state: 31 | st.session_state['input'] = '' 32 | if 'stored_session' not in st.session_state: 33 | st.session_state['stored_session'] = [] 34 | 35 | 36 | def get_text_input(): 37 | """ 38 | Get the user inputted text. 39 | :return: Text entered by the user 40 | """ 41 | text_input = st.text_input('You: ', 42 | st.session_state['input'], 43 | key='input', 44 | placeholder='Your AI assistant here! Ask me anything ...', 45 | label_visibility='hidden') 46 | return text_input 47 | 48 | 49 | def new_session(): 50 | """ 51 | Clears session state and starts a new session. 52 | """ 53 | # End current session and update sessions table in DynamoDB 54 | table_name = 'sessions' 55 | table = dynamodb.Table(table_name) 56 | end_session(table, st.session_state.session_id) 57 | 58 | save = [] 59 | for j in range(len(st.session_state['generated']) - 1, -1, -1): 60 | save.append(f"User: {st.session_state['past'][j]}") 61 | save.append(f"Bot: {st.session_state['generated'][j]}") 62 | 63 | st.session_state['stored_session'].append(save) 64 | st.session_state['generated'] = [] 65 | st.session_state['past'] = [] 66 | st.session_state['input'] = '' 67 | 68 | 69 | # Set up sidebar with various options 70 | with st.sidebar.expander('🛠️', expanded=True): 71 | max_turns = st.number_input('Number of turns to remember', 72 | min_value=1, 73 | max_value=100) 74 | 75 | # Set up the Streamlit app layout 76 | st.title('🤖 AI Assistant 🧠') 77 | st.subheader('Powered by ☁️ AWS') 78 | 79 | # Add a button to start a new chat 80 | st.sidebar.button('New Session', on_click=new_session, type='primary') 81 | 82 | # Get the user input 83 | user_input = get_text_input() 84 | 85 | sessions_table = dynamodb.Table('sessions') 86 | conversations_table = dynamodb.Table('conversations') 87 | 88 | 89 | def respond_by_task(query, history): 90 | logger.info(f'HISTORY: {history}') 91 | task_type = detect_task(query) 92 | logger.info(f'TASK TYPE = {task_type}') 93 | completion = None 94 | if task_type == 'STM CHAT': 95 | if len(history) > 0: 96 | prompt = f"""{history} 97 | Me: {user_input} 98 | AI:""" 99 | logger.info(f'Prompt: {prompt}') 100 | completion = generate_dialogue_response(prompt) 101 | else: 102 | prompt = f"""Me: {user_input} 103 | AI:""" 104 | logger.info(f'Prompt: {prompt}') 105 | completion = generate_dialogue_response(prompt) 106 | elif task_type == 'LTM PAST CONVERSATIONS': 107 | completion = retrieve_top_matching_past_conversations(user_input, 'conversations') 108 | completion = '\n\n'.join(completion) 109 | elif task_type == 'LTM VERIFIED SOURCES': 110 | completion = retrieve_top_matching_passages(user_input, 'passages') 111 | completion = summarize_passages_and_collate_answers(completion, user_input) 112 | return completion 113 | 114 | 115 | def transform_ddb_past_history(history: list, num_turns=10) -> str: 116 | past_hist = [] 117 | for turn in history: 118 | me_utterance = turn['Me'] 119 | bot_utterance = turn['AI'] 120 | past_hist.append(f'Me: {me_utterance}') 121 | past_hist.append(f'AI: {bot_utterance}') 122 | past_hist = past_hist[-num_turns*2:] 123 | past_hist_str = '\n'.join(past_hist) 124 | return past_hist_str 125 | 126 | 127 | if user_input: 128 | user_utterance = st.session_state['input'] 129 | ai_utterance = st.session_state['generated'] 130 | if len(ai_utterance) == 0: 131 | # Start a new session 132 | st.session_state.session_id = create_session(sessions_table) 133 | 134 | past_history = get_conversations_by_session_id(conversations_table, st.session_state.session_id) 135 | past_history = transform_ddb_past_history(past_history, max_turns) 136 | output = respond_by_task(user_input, past_history) 137 | 138 | st.session_state.past.append(user_input) 139 | st.session_state.generated.append(output) 140 | 141 | ai_utterance = st.session_state['generated'][-1] 142 | add_conversation_turn(conversations_table, st.session_state.session_id, user_utterance, ai_utterance) 143 | 144 | # Display the conversation history using an expander, and allow the user to download it 145 | download_str = [] 146 | with st.expander('Conversation', expanded=True): 147 | for i in range(len(st.session_state['generated']) - 1, -1, -1): 148 | st.info(st.session_state['past'][i], icon='🧐') 149 | st.success(st.session_state['generated'][i], icon='🤖') 150 | download_str.append(f"Human: {st.session_state['past'][i]}") 151 | download_str.append(f"AI: {st.session_state['generated'][i]}") 152 | 153 | download_str = '\n'.join(download_str) 154 | if download_str: 155 | st.download_button('Download', download_str) 156 | 157 | # Display stored conversation sessions in the sidebar 158 | for i, sublist in enumerate(st.session_state.stored_session): 159 | with st.sidebar.expander(label=f'Conversation Session:{i}'): 160 | st.write(sublist) 161 | 162 | 163 | def del_sessions(): 164 | del st.session_state.stored_session 165 | 166 | 167 | # Allow the user to clear all stored conversation sessions 168 | if st.session_state.stored_session: 169 | st.sidebar.button('Clear All', on_click=del_sessions, type='primary') 170 | -------------------------------------------------------------------------------- /chatbot-app/config/config.yml: -------------------------------------------------------------------------------- 1 | opensearch: 2 | credentials: 3 | username: xxxxxxxx 4 | password: xxxxxxxx 5 | domain: 6 | endpoint: https://xxxxxxxx.us-east-1.es.amazonaws.com 7 | jumpstart: 8 | text_gen_endpoint_name: xxxxxxxx 9 | text_embed_endpoint_name: xxxxxxxx 10 | -------------------------------------------------------------------------------- /chatbot-app/ddb.py: -------------------------------------------------------------------------------- 1 | from boto3.dynamodb.conditions import Key 2 | import logging 3 | import boto3 4 | import time 5 | import uuid 6 | 7 | 8 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 9 | datefmt='%Y-%m-%d %H:%M:%S') 10 | logger = logging.getLogger('log') 11 | 12 | client = boto3.resource('dynamodb') 13 | 14 | 15 | def add_conversation_turn(table, session_id, user, bot): 16 | timestamp = int(time.time() * 1000) 17 | table.put_item( 18 | Item={ 19 | 'session_id': session_id, 20 | 'timestamp': timestamp, 21 | 'Me': user, 22 | 'AI': bot 23 | } 24 | ) 25 | 26 | 27 | def get_conversations_by_session_id(table, session_id, descending=True): 28 | response = table.query( 29 | KeyConditionExpression=Key('session_id').eq(session_id), 30 | ScanIndexForward=descending 31 | ) 32 | return response['Items'] 33 | 34 | 35 | def delete_conversation(table, session_id, timestamp): 36 | table.delete_item( 37 | Key={ 38 | 'session_id': session_id, 39 | 'timestamp': timestamp 40 | } 41 | ) 42 | 43 | 44 | def create_session(table): 45 | session_id = str(uuid.uuid4()) 46 | start_time = int(time.time() * 1000) 47 | table.put_item( 48 | Item={ 49 | 'session_id': session_id, 50 | 'start_time': start_time, 51 | 'end_time': None, 52 | 'num_turns': 0 53 | } 54 | ) 55 | return session_id 56 | 57 | 58 | def end_session(table, session_id): 59 | end_time = int(time.time() * 1000) 60 | start_time_response = table.get_item( 61 | Key={'session_id': session_id} 62 | ) 63 | start_time = start_time_response['Item']['start_time'] 64 | num_turns = len(get_conversations_by_session_id(table, session_id)) 65 | conversation_duration = end_time - start_time # Compute duration in milliseconds 66 | 67 | table.update_item( 68 | Key={'session_id': session_id}, 69 | UpdateExpression="SET end_time = :end_time, num_turns = :num_turns, conversation_duration = " 70 | ":conversation_duration", 71 | ExpressionAttributeValues={ 72 | ':end_time': end_time, 73 | ':num_turns': num_turns, 74 | ':conversation_duration': conversation_duration 75 | } 76 | ) 77 | 78 | 79 | if __name__ == '__main__': 80 | # Start a new session 81 | table_name = 'sessions' 82 | 83 | # Get the table instance 84 | table_ = client.Table(table_name) 85 | session_id_ = create_session(table_) 86 | 87 | # Add conversation turns 88 | table_name = 'conversations' 89 | table_ = client.Table(table_name) 90 | add_conversation_turn(table_, session_id_, 'hi', 'hello') 91 | add_conversation_turn(table_, session_id_, 'how are you?', 'i am fine') 92 | add_conversation_turn(table_, session_id_, 'what is the definition of court defamation?', 93 | 'Court defamation is a type of ' 94 | 'civil wrong.') 95 | 96 | # End the session 97 | table_name = 'sessions' 98 | table_ = client.Table(table_name) 99 | end_session(table_, session_id_) 100 | -------------------------------------------------------------------------------- /chatbot-app/llm.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import boto3 3 | import json 4 | import yaml 5 | 6 | 7 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 8 | datefmt='%Y-%m-%d %H:%M:%S') 9 | logger = logging.getLogger('log') 10 | 11 | 12 | with open('./config/config.yml', 'r') as f: 13 | config = yaml.safe_load(f) 14 | 15 | endpoint_name = config['jumpstart']['text_gen_endpoint_name'] 16 | CONTENT_TYPE = 'application/json' 17 | 18 | client = boto3.client('sagemaker-runtime') 19 | 20 | NUM_RETURN_SEQUENCES = 1 21 | TOP_K = 0 22 | TOP_P = 0.7 23 | DO_SAMPLE = True 24 | TEMPERATURE = 0.1 25 | 26 | 27 | def detect_task(query: str) -> str: 28 | if query.startswith('\\verified') or query.startswith('/verified'): 29 | return 'LTM VERIFIED SOURCES' 30 | elif query.startswith('\\past') or query.startswith('/past'): 31 | return 'LTM PAST CONVERSATIONS' 32 | else: 33 | return 'STM CHAT' 34 | 35 | 36 | def generate(prompt: str, max_length=256) -> str: 37 | payload = {'text_inputs': prompt, 38 | 'max_length': max_length, 39 | 'num_return_sequences': NUM_RETURN_SEQUENCES, 40 | 'top_k': TOP_K, 41 | 'top_p': TOP_P, 42 | 'temperature': TEMPERATURE, 43 | 'do_sample': DO_SAMPLE} 44 | payload = json.dumps(payload).encode('utf-8') 45 | response = client.invoke_endpoint(EndpointName=ENDPOINT_NAME, 46 | ContentType=CONTENT_TYPE, 47 | Body=payload) 48 | model_predictions = json.loads(response['Body'].read()) 49 | generated_text = model_predictions['generated_texts'][0] 50 | completion = generated_text.strip() 51 | return completion 52 | 53 | 54 | def summarize_passages_and_collate_answers(passages: list, query: str) -> str: 55 | collated_answers = [] 56 | for passage, doc_id, passage_id in passages: 57 | prompt = f'Passage=={passage}\n\nQuestion=={query}\n\nAnswer==\n\nGiven a passage and a question, generate ' \ 58 | f'a clean answer in 2 to 3 short complete sentences. ' 59 | answer = generate(prompt, 256) 60 | collated_answers.append(f'{answer}\n\n[doc = {doc_id} | passage = {passage_id}]') 61 | collated_answers = '\n\n'.join(collated_answers) 62 | logger.info(f'ANSWERS: {collated_answers}') 63 | return collated_answers 64 | 65 | 66 | def generate_dialogue_response(prompt: str) -> str: 67 | completion = generate(prompt, 256) 68 | logger.info(f'DIALOGUE RESPONSE: {completion}') 69 | return completion 70 | 71 | 72 | if __name__ == '__main__': 73 | completion_ = detect_task('definition of bribery by indian law ') 74 | logging.info(completion_) 75 | -------------------------------------------------------------------------------- /chatbot-app/retrieve.py: -------------------------------------------------------------------------------- 1 | from requests.auth import HTTPBasicAuth 2 | import datetime 3 | import requests 4 | import logging 5 | import boto3 6 | import yaml 7 | import json 8 | 9 | 10 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', 11 | datefmt='%Y-%m-%d %H:%M:%S') 12 | logger = logging.getLogger('log') 13 | 14 | 15 | with open('./config/config.yml', 'r') as file: 16 | config = yaml.safe_load(file) 17 | 18 | os_username = config['opensearch']['credentials']['username'] 19 | os_password = config['opensearch']['credentials']['password'] 20 | domain_endpoint = config['opensearch']['domain']['endpoint'] 21 | text_embedding_model_endpoint_name = config['jumpstart']['text_embed_endpoint_name'] 22 | CONTENT_TYPE = 'application/json' 23 | 24 | sagemaker_client = boto3.client('runtime.sagemaker') 25 | 26 | 27 | def encode_query(query: str) -> list: 28 | payload = {'text_inputs': [query]} 29 | payload = json.dumps(payload).encode('utf-8') 30 | response = sagemaker_client.invoke_endpoint(EndpointName=text_embedding_model_endpoint_name, 31 | ContentType='application/json', 32 | Body=payload) 33 | body = json.loads(response['Body'].read()) 34 | embedding = body['embedding'][0] 35 | return embedding 36 | 37 | 38 | def get_es_query(embedding: list, k) -> dict: 39 | query = { 40 | 'size': k, 41 | 'query': { 42 | 'knn': { 43 | 'embedding': { 44 | 'vector': embedding, 45 | 'k': k 46 | } 47 | } 48 | } 49 | } 50 | return query 51 | 52 | 53 | def retrieve_top_matching_passages(query: str, index: str) -> list: 54 | passages = [] 55 | embedding = encode_query(query) 56 | query = get_es_query(embedding, 3) 57 | url = f'{domain_endpoint}/{index}/_search' 58 | response = requests.post(url, auth=HTTPBasicAuth(os_username, os_password), json=query) 59 | response_json = response.json() 60 | hits = response_json['hits']['hits'] 61 | for hit in hits: 62 | # score = hit['_score'] 63 | passage = hit['_source']['passage'] 64 | doc_id = hit['_source']['doc_id'] 65 | passage_id = hit['_source']['passage_id'] 66 | passages.append([passage, doc_id, passage_id]) 67 | return passages 68 | 69 | 70 | def retrieve_top_matching_past_conversations(query: str, index: str) -> list: 71 | past_conversations = {} 72 | embedding = encode_query(query) 73 | query = get_es_query(embedding, 3) 74 | url = f'{domain_endpoint}/{index}/_search' 75 | response = requests.post(url, auth=HTTPBasicAuth(os_username, os_password), json=query) 76 | response_json = response.json() 77 | hits = response_json['hits']['hits'] 78 | 79 | for hit in hits: 80 | # score = hit['_score'] 81 | conversation_summary = hit['_source']['conversation_summary'] 82 | created_at_ms = hit['_source']['created_at'] 83 | created_at = datetime.datetime.fromtimestamp(int(created_at_ms) / 1000.0) 84 | created_at = created_at.strftime('%Y-%m-%d %H:%M:%S') 85 | date, time = created_at.split(' ') 86 | # session_id = hit['_source']['session_id'] 87 | summary = f'[{date}][{time}] {conversation_summary}' 88 | past_conversations[int(created_at_ms)] = summary 89 | 90 | sorted_past_conversations = {} 91 | for key in sorted(past_conversations.keys()): 92 | sorted_past_conversations[key] = past_conversations[key] 93 | 94 | sorted_conversations = list(sorted_past_conversations.values()) 95 | sorted_conversations.reverse() 96 | return sorted_conversations 97 | 98 | 99 | if __name__ == '__main__': 100 | matches = retrieve_top_matching_past_conversations('court defamation', 'conversations') 101 | logger.info(matches) 102 | matches = retrieve_top_matching_passages('court defamation', 'legal-passages') 103 | logger.info(matches) 104 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- 1 | credentials: 2 | username: js-es 3 | password: JumpStart123! 4 | domain: 5 | endpoint: https://search-semantic-search-hryn56c5jy43yryimohz4ajvyi.us-east-1.es.amazonaws.com 6 | index: conversations 7 | -------------------------------------------------------------------------------- /img/cognition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arunprsh/aws-sagemaker-chatbot-memory/cb38c5e9fec385df2e99a25df8f2c83ee0f401a7/img/cognition.png -------------------------------------------------------------------------------- /lambda-env.csv: -------------------------------------------------------------------------------- 1 | Environment Variable,Value 2 | OS_ENDPOINT,https://xxxxxxxxx.us-east-1.es.amazonaws.com 3 | OS_INDEX_NAME,conversations 4 | OS_PASSWORD,xxxxxxxxx 5 | OS_USERNAME,xxxxxxxxx 6 | REGION,us-east-1 7 | SAGEMAKER_TEXT_EMBED_ENDPOINT,huggingface-textembedding-gpt-j-6b-fp16-xxxxxxxxx 8 | SAGEMAKER_TEXT_GEN_ENDPOINT,flan-xxl-xxxxxxxxx --------------------------------------------------------------------------------