├── lab2 └── studio-playground-ui │ ├── start_playground.txt │ ├── .DS_Store │ ├── ml_image_prompt.png │ ├── images │ ├── flanmodel.png │ ├── flant5xxl.png │ ├── parameters.png │ ├── flan-deploy.png │ ├── flan-deployed.png │ ├── invoke_update.png │ ├── streamlitapp.png │ ├── jumpstart-panel.png │ └── .ipynb_checkpoints │ │ └── flan-deploy-checkpoint.png │ ├── templates │ ├── AI21-SUMMARY.template.json │ ├── AI21-CONTEXT-QA.template.json │ ├── Falcon-7B-Instruct-BF16-JS.template.json │ └── AI21-J2-GRANDE-INSTRUCT.template.json │ ├── invoke_endpoint.py │ ├── README.md │ ├── dict.py │ ├── main.py │ └── Run_My_PlayGround.ipynb ├── lab3 └── rag_including_chatbot │ ├── .DS_Store │ ├── ml_image.jpg │ ├── images │ ├── chains.png │ ├── loading.gif │ ├── models.png │ ├── prompt.png │ ├── langchain.png │ ├── ml_image.jpg │ ├── Chatbot_lang.png │ ├── vectorstore.png │ ├── Agent_langchain.png │ ├── Embeddings_lang.png │ ├── chatbot_internet.jpg │ ├── Langchain_sagemaker.png │ └── pinecone_langchain.png │ ├── sagemaker-faqs.pdf │ ├── .ipynb_checkpoints │ └── ml_image-checkpoint.jpg │ └── requirements.txt ├── CODE_OF_CONDUCT.md ├── README.md ├── LICENSE ├── CONTRIBUTING.md ├── lab1 ├── deploy-llama2-changes.ipynb └── deploy-llama2.ipynb ├── lab4 └── image-generation.ipynb └── lab5 ├── code-llama-changes.ipynb └── code-llama.ipynb /lab2/studio-playground-ui/start_playground.txt: -------------------------------------------------------------------------------- 1 | pip install boto3 streamlit streamlit-ace -Uq 2 | 3 | streamlit run main.py -------------------------------------------------------------------------------- /lab2/studio-playground-ui/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/.DS_Store -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/.DS_Store -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/ml_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/ml_image.jpg -------------------------------------------------------------------------------- /lab2/studio-playground-ui/ml_image_prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/ml_image_prompt.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/chains.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/chains.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/loading.gif -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/models.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/models.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/prompt.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/sagemaker-faqs.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/sagemaker-faqs.pdf -------------------------------------------------------------------------------- /lab2/studio-playground-ui/images/flanmodel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/images/flanmodel.png -------------------------------------------------------------------------------- /lab2/studio-playground-ui/images/flant5xxl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/images/flant5xxl.png -------------------------------------------------------------------------------- /lab2/studio-playground-ui/images/parameters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/images/parameters.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/langchain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/langchain.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/ml_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/ml_image.jpg -------------------------------------------------------------------------------- /lab2/studio-playground-ui/images/flan-deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/images/flan-deploy.png -------------------------------------------------------------------------------- /lab2/studio-playground-ui/images/flan-deployed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/images/flan-deployed.png -------------------------------------------------------------------------------- /lab2/studio-playground-ui/images/invoke_update.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/images/invoke_update.png -------------------------------------------------------------------------------- /lab2/studio-playground-ui/images/streamlitapp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/images/streamlitapp.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/Chatbot_lang.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/Chatbot_lang.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/vectorstore.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/vectorstore.png -------------------------------------------------------------------------------- /lab2/studio-playground-ui/images/jumpstart-panel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/images/jumpstart-panel.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/Agent_langchain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/Agent_langchain.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/Embeddings_lang.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/Embeddings_lang.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/chatbot_internet.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/chatbot_internet.jpg -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/Langchain_sagemaker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/Langchain_sagemaker.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/images/pinecone_langchain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/images/pinecone_langchain.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/.ipynb_checkpoints/ml_image-checkpoint.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab3/rag_including_chatbot/.ipynb_checkpoints/ml_image-checkpoint.jpg -------------------------------------------------------------------------------- /lab2/studio-playground-ui/images/.ipynb_checkpoints/flan-deploy-checkpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aim402-using-publicly-available-fms-on-amazon-sagemaker-jumpstart/main/lab2/studio-playground-ui/images/.ipynb_checkpoints/flan-deploy-checkpoint.png -------------------------------------------------------------------------------- /lab3/rag_including_chatbot/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.161 2 | chromadb==0.3.21 3 | boto3==1.26.16 4 | botocore==1.29.27 5 | html2text 6 | jinja2 7 | faiss-cpu==1.7.4 8 | pypdf==3.8.1 9 | transformers==4.24.0 10 | anthropic 11 | sentence_transformers==2.2.2 -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /lab2/studio-playground-ui/templates/AI21-SUMMARY.template.json: -------------------------------------------------------------------------------- 1 | {"model_name": "AI21-J2-GRANDE", "model_type":"AI21-SUMMARY","description":"Summaries are returned as a single string, containing bullet points representing the key points of your text. The input text should contain at least 40 words and no more than 50,000 characters. This translates to roughly 10,000 words, or an impressive 40 pages!", "endpoint_name": "summarize", "payload": {"parameters": "None"}} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## AIM402 - Using publicly available FMs on Amazon SageMaker JumpStart 2 | 3 | Repository for training and deploying Generative AI models, including text-text, text-to-image generation, prompt engineering playground and chain of thought examples using SageMaker Studio. 4 | 5 | ## Security 6 | 7 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 8 | 9 | ## License 10 | 11 | This library is licensed under the MIT-0 License. See the LICENSE file. 12 | 13 | -------------------------------------------------------------------------------- /lab2/studio-playground-ui/templates/AI21-CONTEXT-QA.template.json: -------------------------------------------------------------------------------- 1 | {"model_name": "AI21-CONTEXT-QA", "model_type":"AI21-CONTEXT-QA", "endpoint_name": "contextual-answers","description":"The AI21 Context Answer model allows you to access a high-quality question answering technology. It was designed to answer questions based on a specific document context provided by the customer. This avoids any factual issues that language models may have and makes sure the answers it provides are grounded in that context document.", "payload": {"parameters": "None"}} -------------------------------------------------------------------------------- /lab2/studio-playground-ui/templates/Falcon-7B-Instruct-BF16-JS.template.json: -------------------------------------------------------------------------------- 1 | {"model_name": "Falcon-7B-Instruct-BF16-model", "description": "Falcon-7B-Instruct is a 7B parameters causal decoder-only model built by TII based on Falcon-7B and finetuned on a mixture of chat/instruct datasets. It is ready-to-use chat/instruct model based on Falcon 7B.", "model_type":"OSS", "endpoint_name": "hf-llm-falcon-7b-instruct-bf16-2023-09-26-18-52-09-870", "payload": {"parameters": {"max_new_tokens": {"default": 500, "range": [10, 1024]}, "top_p": {"default": 0.9, "range": [0.0, 1.0]}, "top_k": {"default": 50, "range": [0, 100]}, "temperature": {"default": 0.8, "range": [0.0, 1.0]}}}} -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | 18 | -------------------------------------------------------------------------------- /lab2/studio-playground-ui/templates/AI21-J2-GRANDE-INSTRUCT.template.json: -------------------------------------------------------------------------------- 1 | {"model_name": "AI21-J2-GRANDE", "model_type":"AI21", "endpoint_name": "j2-grande-instruct", "description":"Jurassic-2 Grande Instruct is an LLM by AI21 Labs that can be applied to any language comprehension or generation task. It is optimized to follow natural language instructions and context, so there is no need to provide it with any examples. The endpoint comes pre-loaded with the model and ready to serve queries via an easy-to-use API and Python SDK, so you can hit the ground running. Jurassic-2 Grande Instruct is a mid-sized language model carefully designed to strike the perfect balance between exceptional quality and affordability. You can use it to compose human-like text and solve complex language tasks such as question answering, text classification and many others. Popular use-cases include generating marketing copy, powering chatbots, assisting with creative writing, text summarization and information extraction.", "payload": {"parameters": {"maxTokens": {"default": 5, "range": [1, 500]}, "numResults": {"default": 1, "range": [0, 5]}, "temperature": {"default": 0.5, "range": [0.0, 1.0]}}}} -------------------------------------------------------------------------------- /lab2/studio-playground-ui/invoke_endpoint.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | 4 | sagemaker_runtime = boto3.client("runtime.sagemaker") 5 | 6 | 7 | def generate_text(payload, endpoint_name): 8 | encoded_input = json.dumps(payload).encode("utf-8") 9 | 10 | response = sagemaker_runtime.invoke_endpoint( 11 | EndpointName=endpoint_name, ContentType="application/json", Body=encoded_input 12 | ) 13 | print("Model input: \n", encoded_input) 14 | result = json.loads(response["Body"].read()) 15 | # - this works for faster transformr and DJL containers 16 | for item in result: 17 | # print(f" Item={item}, type={type(item)}") 18 | if isinstance(item, list): 19 | for element in item: 20 | if isinstance(element, dict): 21 | # print(f"List:element::is: {element['generated_text']} ") 22 | return element["generated_text"] 23 | elif isinstance(item, str): 24 | # print(item["generated_text"]) 25 | # return item["generated_text"] 26 | print(f"probably:Item:from:dict::result[item]={result[item]}") 27 | return result[item] 28 | else: 29 | return result[0]["generated_text"] 30 | 31 | 32 | def generate_text_ai21(payload, endpoint_name): 33 | print("payload type: ", type(payload)) 34 | print("payload: ", payload) 35 | encoded_input = json.dumps({ 36 | "prompt": payload["inputs"], 37 | "maxTokens": payload["maxTokens"], 38 | "temperature": payload["temperature"], 39 | "numResults": payload["numResults"]}).encode("utf-8") 40 | response = sagemaker_runtime.invoke_endpoint( 41 | EndpointName=endpoint_name, ContentType="application/json", Body=encoded_input 42 | ) 43 | print("Model input: \n", encoded_input) 44 | result = json.loads(response["Body"].read()) 45 | print(result['completions'][0]['data']['text']) 46 | return result['completions'][0]['data']['text'] 47 | 48 | 49 | def generate_text_ai21_summarize(payload, endpoint_name): 50 | encoded_input = json.dumps({ 51 | "source": payload["inputs"], 52 | "sourceType": "TEXT"}).encode("utf-8") 53 | response = sagemaker_runtime.invoke_endpoint( 54 | EndpointName=endpoint_name, ContentType="application/json", Body=encoded_input 55 | ) 56 | print("Model input: \n", encoded_input) 57 | result = json.loads(response["Body"].read()) 58 | 59 | return result['summary'] 60 | 61 | 62 | def generate_text_ai21_context_qa(payload, question, endpoint_name): 63 | print('----- Context -------', payload["inputs"]) 64 | print('----- Question ------', question) 65 | encoded_input = json.dumps({ 66 | "context": payload["inputs"], 67 | "question": question}).encode("utf-8") 68 | response = sagemaker_runtime.invoke_endpoint( 69 | EndpointName=endpoint_name, ContentType="application/json", Body=encoded_input 70 | ) 71 | print("Model input: \n", encoded_input) 72 | result = json.loads(response["Body"].read()) 73 | return result['answer'] 74 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /lab2/studio-playground-ui/README.md: -------------------------------------------------------------------------------- 1 | ## Prompt Engineering 2 | 3 | "Prompt engineering” refers to efforts to extract accurate, consistent, and fair outputs from large generative models, such text-to-image synthesizers or large language models (LLMs). LLMs are trained on large-scale bodies of text, so they encode a great deal of factual information about the world. But they’re trained to produce sequences of words that are probable in the general case — not accurate in the particular case. 4 | 5 | What is a prompt? 6 | - Text given to a pre-trained model for a prediction task 7 | 8 | Components of Prompt 9 | - Context 10 | - Task (Instruction, question etc.) 11 | - Image/Text 12 | - Training samples 13 | - Generative Model 14 | 15 | ### Types of Prompt Engineering 16 | 17 | 1. Several types of prompt engineering include: 18 | 1. Zero shot 19 | 2. One shot and Few shot 20 | 3. Chain of thought 21 | 4. Advanced concepts on prompt engineering 22 | 5. Adversarial Prompt engineering 23 | 24 | #### 1. Zero Shot: 25 | Zero-shot prompting enables a model to make predictions about previously unseen data without the need for any additional training. 26 | 27 | This is in contrast to traditional machine learning techniques, which require a large amount of labeled training data to make accurate predictions. 28 | 29 | In the context of prompt engineering, zero-shot learning can be used to generate natural language text without the need for explicit programming or pre-defined templates. 30 | 31 | 32 | #### 2. One Shot and Few Shot Prompting: 33 | 34 | One-shot prompting is used to generate natural language text with a limited amount of input data such as a single example or template. 35 | 36 | One-shot prompting can be combined with other natural language processing techniques like dialogue management and context modeling to create more sophisticated and effective text generation systems. 37 | 38 | Few-shot prompting is a technique where the model is given a small number of examples, typically between two and five, in order to quickly adapt to new examples of previously seen objects. 39 | 40 | Few-shot learning can be used in the context of prompt engineering, to create natural language text with a limited amount of input data. Although it requires less data, this technique can allow for the creation of more versatile and adaptive text generation models. 41 | 42 | By using advanced techniques such as few-shot prompting, it is possible to create natural language generation models that are more flexible, adaptable, and engaging for human users. 43 | 44 | ### Chain of thought prompting 45 | 46 | Chain-of-thought (CoT) prompting improves the reasoning ability of LLMs by prompting them to generate a series of intermediate steps that lead to the final answer of a multi-step problem. 47 | 48 | 49 | In this lab, we are going to uncover how to setup SageMaker Studio for prompt engineering utility and use cases with different types of prompt engineering techniques. 50 | 51 | ## Setup - UI 52 | - Navigate to `SageMaker Studio`. 53 | - Click on `Home` on the left panel. 54 | - Click on `Deployments` and navigate to `Models,notebooks,solutions` link within `SageMaker Jumpstart` in the drop-down menu. 55 | 56 | ![image](/images/jumpstart-panel.png) 57 | 58 | - Under `Foundation Models` search for `FLAN-T5 XL` model and click on `view model`. 59 | 60 | ![image](/images/flant5xxl.png) 61 | 62 | - As executed in lab1, in a similar way, deploy the Flan-T5 XL model. You can change the deployment configuration or security settings based on the desired settings but you can go ahead and click on `deploy` for this lab. For deployment of the LLM models -- you can also refer to : https://github.com/aws/amazon-sagemaker-examples/tree/main/inference/generativeai/llm-workshop. 63 | 64 | ![image](/images/flan-deploy.png) 65 | 66 | - Take note of the model Endpoint details: Endpoint arn, Endpoint name, and the model location. IT may take 5-10 minutes to deploy the model endpoint. Endpoint status should switch to `In Service` in some-time. You can also scroll down on the page to open a demo notebook in order to query your endpoint from Studio. 67 | 68 | - Now that our endpoint is created (which can also be seen in SageMaker-Studio dashboard), navigate to `studio-playground-ui` folder that you cloned. 69 | - Within the folder, you will see `main.py` file and a `templates` folder. Inside templates folder open `FLANT5-JS.template.json` file with editor option. Replace the `endpoint_name` with your endpoint name created in the previous steps and save the file. 70 | - Check the instructions for creating GUI within the environment using Streamlit present in `start_playground.txt` file. In order to launch the GUI, click on `File` within SageMaker Studio tab, then click on `New` and navigate to `Terminal`. 71 | - Within the Terminal, run the following command: `pip install boto3 streamlit streamlit-ace -Uq` 72 | - Please ignore any warnings or errors related to `pip's dependency..`. 73 | - Next, run the next command found in start_playground.txt -- `streamlit run main.py`. Make sure you are in the right directory to run this command in the terminal (`amazon-sagemaker-generativeai/studio-playground-ui/`). Once the command is executed, you can view your Streamlit app in your browser. 74 | - Copy the URL for SageMaker Studio and update the same by appending `/proxy//`. You would have received the port number after running the Streamlit app. Domain URL should look similar to `https://d-lcxav5wg2gdg.studio.us-east-1.sagemaker.aws/jupyter/default/proxy//` 75 | 76 | - Now we can get started with the model playground and run prompt engineering on the deployed model endpoint within our environment in a secured fashion. 77 | 78 | 79 | ## Security 80 | 81 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. 82 | 83 | ## License 84 | 85 | This library is licensed under the MIT-0 License. See the LICENSE file. 86 | 87 | -------------------------------------------------------------------------------- /lab2/studio-playground-ui/dict.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from collections import defaultdict 4 | 5 | 6 | code_example = """{ 7 | "model_name": "example", 8 | "model_type": "AI21-SUMMARY", 9 | "endpoint_name": "summarize", 10 | "payload": { 11 | "parameters": { 12 | "max_length": { 13 | "default": 200, 14 | "range": [ 15 | 10, 16 | 500 17 | ] 18 | }, 19 | "num_return_sequences": { 20 | "default": 10, 21 | "range": [ 22 | 0, 23 | 10 24 | ] 25 | }, 26 | "num_beams": { 27 | "default": 3, 28 | "range": [ 29 | 0, 30 | 10 31 | ] 32 | }, 33 | "temperature": { 34 | "default": 0.5, 35 | "range": [ 36 | 0, 37 | 1 38 | ] 39 | }, 40 | "early_stopping": { 41 | "default": true, 42 | "range": [ 43 | true, 44 | false 45 | ] 46 | }, 47 | "stopwords_list": { 48 | "default": [ 49 | "stop", 50 | "dot" 51 | ], 52 | "range": [ 53 | "a", 54 | "an", 55 | "the", 56 | "and", 57 | "it", 58 | "for", 59 | "or", 60 | "but", 61 | "in", 62 | "my", 63 | "your", 64 | "our", 65 | "stop", 66 | "dot" 67 | ] 68 | } 69 | } 70 | } 71 | } 72 | """ 73 | 74 | parameters_help_map = { 75 | "max_length": "Model generates text until the output length (which includes the input context length) reaches max_length. If specified, it must be a positive integer.", 76 | "num_return_sequences": "Number of output sequences returned. If specified, it must be a positive integer.", 77 | "num_beams": "Number of beams used in the greedy search. If specified, it must be integer greater than or equal to num_return_sequences.", 78 | "no_repeat_ngram_size": "Model ensures that a sequence of words of no_repeat_ngram_size is not repeated in the output sequence. If specified, it must be a positive integer greater than 1.", 79 | "temperature": "Controls the randomness in the output. Higher temperature results in output sequence with low-probability words and lower temperature results in output sequence with high-probability words. If temperature -> 0, it results in greedy decoding. If specified, it must be a positive float.", 80 | "early_stopping": "If True, text generation is finished when all beam hypotheses reach the end of stence token. If specified, it must be boolean.", 81 | "do_sample": "If True, sample the next word as per the likelyhood. If specified, it must be boolean.", 82 | "top_k": "In each step of text generation, sample from only the top_k most likely words. If specified, it must be a positive integer.", 83 | "top_p": "In each step of text generation, sample from the smallest possible set of words with cumulative probability top_p. If specified, it must be a float between 0 and 1.", 84 | "seed": "Fix the randomized state for reproducibility. If specified, it must be an integer.", 85 | } 86 | 87 | example_list = [" ", "Table Q&A", "Product description", "Summarize reviews", "Generate SQL"] 88 | example_context_ai21_qa = ["Sample Context ", "Financial", "Healthcare"] 89 | example_prompts_ai21 = { 90 | "Table Q&A": "| Ship Name | Color | Total Passengers | Status | Captain | \n \ 91 | | Symphony | White | 7700 | Active | Mike | \n \ 92 | | Wonder | Grey | 7900 | Under Construction | Anna | \n \ 93 | | Odyssey | White | 5800 | Active | Mohammed | \n \ 94 | | Quantum | White | 5700 | Active | Ricardo | \n \ 95 | | Mariner | Grey | 4300 | Active | Saanvi | \n \ 96 | Q: Which active ship carries the most passengers? \n \ 97 | A: Symphony \n \ 98 | Q: What is the color of the ship whose captain is Saanvi? \n \ 99 | A: Grey \n \ 100 | Q: How many passengers does Ricardo's ship carry? \n \ 101 | A:", 102 | "Product description": "Write an engaging product description for clothing eCommerce site. Make sure to include the following features in the description. \n \ 103 | Product: Humor Men's Graphic T-Shirt with a print of Einstein's quote: \"artificial intelligence is no match for natural stupidity” \n \ 104 | Features: \n \ 105 | - Soft cotton \n \ 106 | - Short sleeve \n \ 107 | Description:", 108 | "Summarize reviews": "Summarize the following restaurant review \n \ 109 | Restaurant: Luigi's \n \ 110 | Review: We were passing through SF on a Thursday afternoon and wanted some Italian food. We passed by a couple places which were packed until finally stopping at Luigi's, mainly because it was a little less crowded and the people seemed to be mostly locals. We ordered the tagliatelle and mozzarella caprese. The tagliatelle were a work of art - the pasta was just right and the tomato sauce with fresh basil was perfect. The caprese was OK but nothing out of the ordinary. Service was slow at first but overall it was fine. Other than that - Luigi's great experience! \n \ 111 | Summary: Local spot. Not crowded. Excellent tagliatelle with tomato sauce. Service slow at first. \n \ 112 | ## \n \ 113 | Summarize the following restaurant review \n \ 114 | Restaurant: La Taqueria \n \ 115 | Review: La Taqueria is a tiny place with 3 long tables inside and 2 small tables outside. The inside is cramped, but the outside is pleasant. Unfortunately, we had to sit inside as all the outside tables were taken. The tacos are delicious and reasonably priced and the salsa is spicy and flavorful. Service was friendly. Aside from the seating, the only thing I didn't like was the lack of parking - we had to walk six blocks to find a spot. \n \ 116 | Summary:", 117 | 118 | "Generate SQL": "Create SQL statement from instruction. \n \ 119 | Database: Customers(CustomerID, CustomerName, ContactName, Address, City, PostalCode, Country)\n \ 120 | Request: all the countries we have customers in without repetitions.\n \ 121 | SQL statement:\n \ 122 | SELECT DISTINCT Country FROM Customers;\n \ 123 | ##\n \ 124 | Create SQL statement from instruction.\n \ 125 | Database: Orders(OrderID, CustomerID, EmployeeID, OrderDate, ShipperID)\n \ 126 | Request: select all the orders from customer id 1.\n \ 127 | SQL statement:\n \ 128 | SELECT * FROM Orders\n \ 129 | WHERE CustomerID = 1;\n \ 130 | ##\n \ 131 | Create SQL statement from instruction.\n \ 132 | Database: Products(ProductID, ProductName, SupplierID, CategoryID, Unit, Price)\n \ 133 | Request: selects all products from categories 1 and 7\n \ 134 | SQL statement:\n \ 135 | SELECT * FROM Products\n \ 136 | WHERE CategoryID = 1 OR CategoryID = 7;\n \ 137 | ##\n \ 138 | Create SQL statement from instruction.\n \ 139 | Database: Customers(CustomerID, CustomerName, ContactName, Address, City, PostalCode, Country)\n \ 140 | Request: change the first customer's name to Alfred Schmidt who lives in Frankfurt city.\n \ 141 | SQL statement:", 142 | } 143 | 144 | example_context_ai21_qa = { 145 | "Financial": "n 2020 and 2021, enormous QE — approximately $4.4 trillion, or 18%, of 2021 gross domestic product (GDP) — and enormous fiscal stimulus (which has been and always will be inflationary) — approximately $5 trillion, or 21%, of 2021 GDP — stabilized markets and allowed companies to raise enormous amounts of capital. In addition, this infusion of capital saved many small businesses and put more than $2.5 trillion in the hands of consumers and almost $1 trillion into state and local coffers. These actions led to a rapid decline in unemployment, dropping from 15% to under 4% in 20 months — the magnitude and speed of which were both unprecedented. Additionally, the economy grew 7% in 2021 despite the arrival of the Delta and Omicron variants and the global supply chain shortages, which were largely fueled by the dramatic upswing in consumer spending and the shift in that spend from services to goods. Fortunately, during these two years, vaccines for COVID-19 were also rapidly developed and distributed. \n \ 146 | In today's economy, the consumer is in excellent financial shape (on average), with leverage among the lowest on record, excellent mortgage underwriting (even though we've had home price appreciation), plentiful jobs with wage increases and more than $2 trillion in excess savings, mostly due to government stimulus. Most consumers and companies (and states) are still flush with the money generated in 2020 and 2021, with consumer spending over the last several months 12% above pre-COVID-19 levels. (But we must recognize that the account balances in lower-income households, smaller to begin with, are going down faster and that income for those households is not keeping pace with rising inflation.) \n \ 147 | Today's economic landscape is completely different from the 2008 financial crisis when the consumer was extraordinarily overleveraged, as was the financial system as a whole — from banks and investment banks to shadow banks, hedge funds, private equity, Fannie Mae and many other entities. In addition, home price appreciation, fed by bad underwriting and leverage in the mortgage system, led to excessive speculation, which was missed by virtually everyone — eventually leading to nearly $1 trillion in actual losses.", 148 | "Healthcare": "A heart attack occurs when blood flow that brings \ 149 | oxygen-rich blood to the heart muscle is severely \ 150 | reduced or cut off. This is due to a buildup of fat,\ 151 | cholesterol and other substances (plaque) that narrows\ 152 | coronary arteries. This process is called atherosclerosis.\ 153 | When plaque in a heart artery breaks open, a blood clot\ 154 | forms. The clot can block blood flow. When it completely\ 155 | stops blood flow to part of the heart muscle, that\ 156 | portion of muscle begins to die. Damage increases the\ 157 | longer an artery stays blocked. Once some of the heart\ 158 | muscle dies, permanent heart damage results.\ 159 | The amount of damage to the heart muscle depends on\ 160 | the size of the area supplied by the blocked artery and\ 161 | the time between injury and treatment. The blocked\ 162 | artery should be opened as soon as possible to reduce\ 163 | heart damage. \n \ 164 | Atherosclerosis develops over time. It often has no symptoms\ 165 | until enough damage lessens blood flow to your heart\ 166 | muscle. That means you usually can’t feel it happening until\ 167 | blood flow to heart muscle is blocked. \n \ 168 | You should know the warning signs of heart attack so you\ 169 | can get help right away for yourself or someone else.\ 170 | Some heart attacks are sudden and intense. But most start\ 171 | slowly, with mild pain or discomfort. Signs of a heart attack\ 172 | include:\n\ 173 | • Uncomfortable pressure, squeezing, fullness or pain in the\ 174 | center of your chest. It lasts more than a few minutes, or\ 175 | goes away and comes back.\n\ 176 | • Pain or discomfort in one or both arms, your back, neck,\ 177 | jaw or stomach.\n\ 178 | • Shortness of breath with or without chest discomfort.\n\ 179 | • Other signs such as breaking out in a cold sweat, nausea\ 180 | or lightheadedness." 181 | } 182 | parameters_help_map = defaultdict(str, parameters_help_map) 183 | example_prompts_ai21 = defaultdict(str, example_prompts_ai21) 184 | example_context_ai21_qa = defaultdict(str, example_context_ai21_qa) 185 | -------------------------------------------------------------------------------- /lab2/studio-playground-ui/main.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from collections import defaultdict 3 | import io 4 | import jinja2 5 | import json 6 | import os 7 | from pathlib import Path 8 | import random 9 | from streamlit_ace import st_ace 10 | import streamlit as st 11 | import string 12 | from io import StringIO 13 | import re 14 | from invoke_endpoint import * 15 | from dict import * 16 | 17 | N = 7 18 | template_loader = jinja2.FileSystemLoader(searchpath="./") 19 | template_env = jinja2.Environment(loader=template_loader) 20 | 21 | 22 | def list_templates(dir_path): 23 | # folder path 24 | templates = [] 25 | # Iterate directory 26 | for path in os.listdir(dir_path): 27 | # check if current path is a file 28 | if os.path.isfile(os.path.join(dir_path, path)): 29 | templates.append(path.split(".")[0]) 30 | return templates 31 | 32 | 33 | def read_template(template_path): 34 | template = template_env.get_template(template_path) 35 | output_text = template.render() 36 | return output_text 37 | 38 | 39 | def is_valid_default(parameter, minimum, maximum): 40 | # parameter is a list 41 | if type(parameter) == list: 42 | return True 43 | 44 | # parameter is an int or float and is in valid range 45 | if parameter <= maximum and parameter >= minimum: 46 | return True 47 | 48 | # parameter is a bool 49 | if type(parameter) == bool and type(minimum) == bool and type(maximum) == bool: 50 | return True 51 | return False 52 | 53 | 54 | def get_user_input(): 55 | uploaded_file = st.file_uploader( 56 | label="Upload JSON Template", type=["json"]) 57 | uploaded_file_location = st.empty() 58 | 59 | # user uploads an image 60 | if uploaded_file is not None: 61 | input_str = json.load(uploaded_file) 62 | if validate_json_template(input_str): 63 | user_file_path = os.path.join( 64 | "templates", input_str["model_name"] + ".template.json" 65 | ) 66 | with open(user_file_path, "wb") as user_file: 67 | user_file.write(uploaded_file.getbuffer()) 68 | uploaded_file_location.write( 69 | "Template Uploaded: " + str(user_file_path)) 70 | st.session_state["new_template_added"] = True 71 | else: 72 | uploaded_file_location.warning( 73 | "Invalid Input: please upload a valid template.json" 74 | ) 75 | else: 76 | user_file_path = None 77 | 78 | return user_file_path 79 | 80 | 81 | @st.cache_data 82 | def validate_json_template(json_dictionary): 83 | expected_keys = {"model_name", "endpoint_name", "payload"} 84 | actual_keys = set(json_dictionary.keys()) 85 | 86 | if not expected_keys.issubset(actual_keys): 87 | st.warning( 88 | "Invalid Input: template.json must contain a modelName, endpoint_name, and payload keys" 89 | ) 90 | return False 91 | 92 | if not "parameters" in json_dictionary["payload"].keys() and not type( 93 | json_dictionary["payload"]["parameters"] == list 94 | ): 95 | st.warning( 96 | "Invalid Input: template.json must contain a payload key with parameters listed" 97 | ) 98 | return False 99 | return True 100 | 101 | 102 | @st.cache_data 103 | def handle_editor_content(input_str): 104 | if validate_json_template(input_str): 105 | try: 106 | model_name = input_str["model_name"] 107 | filename = model_name + ".template.json" 108 | user_file_path = os.path.join("templates", filename) 109 | with open(user_file_path, "w+") as f: 110 | json.dump(input_str, f) 111 | 112 | st.write("json saved at " + str(user_file_path)) 113 | st.session_state["new_template_added"] = True 114 | 115 | except Exception as e: 116 | st.write(e) 117 | 118 | 119 | def handle_parameters(parameters): 120 | for p in parameters: 121 | minimum = parameters[p]["range"][0] 122 | maximum = parameters[p]["range"][-1] 123 | default = parameters[p]["default"] 124 | parameter_range = parameters[p]["range"] 125 | parameter_help = parameters_help_map[p] 126 | if not is_valid_default(default, minimum, maximum): 127 | st.warning( 128 | "Invalid Default: " 129 | + p 130 | + " default value does not follow the convention default >= min and default <= max." 131 | ) 132 | elif len(parameter_range) > 2: 133 | if not set(default).issubset(set(parameter_range)): 134 | st.warning( 135 | "Invalid Default: " 136 | + p 137 | + " Every Multiselect default value must exist in options" 138 | ) 139 | else: 140 | parameters[p] = st.sidebar.multiselect( 141 | p, options=parameter_range, default=default 142 | ) 143 | 144 | elif type(minimum) == int and type(maximum) == int and type(default) == int: 145 | parameters[p] = st.sidebar.slider( 146 | p, 147 | min_value=minimum, 148 | max_value=maximum, 149 | value=default, 150 | step=1, 151 | help=parameter_help, 152 | ) 153 | elif type(minimum) == bool and type(maximum) == bool and type(default) == bool: 154 | parameters[p] = st.sidebar.selectbox( 155 | p, ["True", "False"], help=parameter_help 156 | ) 157 | elif ( 158 | type(minimum) == float and type( 159 | maximum) == float and type(default) == float 160 | ): 161 | parameters[p] = st.sidebar.slider( 162 | p, 163 | min_value=float(minimum), 164 | max_value=float(maximum), 165 | value=float(default), 166 | step=0.01, 167 | help=parameter_help, 168 | ) 169 | else: 170 | st.warning( 171 | "Invalid Parameter: " 172 | + p 173 | + " is not a valid parameter for this model or the parameter type is not supported in this demo." 174 | ) 175 | return parameters 176 | 177 | 178 | def on_clicked(): 179 | st.session_state.text = example_prompts_ai21[st.session_state.task] 180 | 181 | 182 | def on_clicked_qa(): 183 | st.session_state.text = example_context_ai21_qa[st.session_state.taskqa] 184 | 185 | 186 | def main(): 187 | default_endpoint_option = "Select" 188 | st.session_state["new_template_added"] = False 189 | sidebar_selectbox = st.sidebar.empty() 190 | selected_endpoint = sidebar_selectbox.selectbox( 191 | label="Select the endpoint to run in SageMaker", 192 | options=[default_endpoint_option] + list_templates("templates"), 193 | ) 194 | 195 | st.sidebar.title("Model Parameters") 196 | st.image("./ml_image_prompt.png") 197 | 198 | # Adding your own model 199 | with st.expander("Add a New Model"): 200 | st.header("Add a New Model") 201 | st.write( 202 | """Add a new model by uploading a .template.json file or by pasting the dictionary 203 | in the editor. A model template is a json dictionary containing a model_name, 204 | endpoint_name, and payload with parameters. \n \n Below is an example of a 205 | template.json""" 206 | ) 207 | res = "".join(random.choices( 208 | string.ascii_uppercase + string.digits, k=N)) 209 | get_user_input() 210 | 211 | # Spawn a new Ace editor and display editor's content as you type 212 | content = st_ace( 213 | theme="tomorrow_night", 214 | wrap=True, 215 | show_gutter=True, 216 | language="json", 217 | value=code_example, 218 | keybinding="vscode", 219 | min_lines=15, 220 | ) 221 | 222 | if content != code_example: 223 | input_str = json.loads(content) 224 | handle_editor_content(input_str) 225 | templates = list_templates("templates") 226 | 227 | if st.session_state["new_template_added"]: 228 | res = "".join(random.choices( 229 | string.ascii_uppercase + string.digits, k=N)) 230 | selected_endpoint = sidebar_selectbox.selectbox( 231 | label="Select the endpoint to run in SageMaker", 232 | options=list_templates("templates"), 233 | key=res, 234 | ) 235 | 236 | # Prompt Engineering Playground 237 | st.header("Prompt Engineering Playground") 238 | if selected_endpoint != default_endpoint_option: 239 | output_text = read_template( 240 | f"templates/{selected_endpoint}.template.json") 241 | output = json.loads(output_text) 242 | parameters = output["payload"]["parameters"] 243 | print("parameters ------------------ ", parameters) 244 | if parameters != "None": 245 | parameters = handle_parameters(parameters) 246 | 247 | st.markdown( 248 | output["description"] 249 | ) 250 | if selected_endpoint == "AI21-J2-GRANDE-INSTRUCT": 251 | selected_task = st.selectbox( 252 | label="Example prompts", 253 | options=example_list, 254 | on_change=on_clicked, 255 | key="task" 256 | ) 257 | if selected_endpoint == "AI21-CONTEXT-QA": 258 | selected_task = st.selectbox( 259 | label="Example context", 260 | options=example_context_ai21_qa, 261 | on_change=on_clicked_qa, 262 | key="taskqa" 263 | ) 264 | if selected_endpoint == "AI21-SUMMARY" or selected_endpoint == "AI21-CONTEXT-QA": 265 | uploaded_file = st.file_uploader("Choose a file") 266 | if uploaded_file is not None: 267 | stringio = StringIO(uploaded_file.getvalue().decode("utf-8")) 268 | # To read file as string: 269 | string_data = stringio.read() 270 | st.session_state.text = string_data 271 | prompt = st.session_state.text 272 | 273 | prompt = st.text_area("Enter your prompt here:", height=350, key="text") 274 | if selected_endpoint == "AI21-CONTEXT-QA": 275 | question = st.text_area( 276 | "Enter your question here", height=80, key="question") 277 | placeholder = st.empty() 278 | 279 | if st.button("Run"): 280 | final_text = "" 281 | if selected_endpoint != default_endpoint_option: 282 | placeholder = st.empty() 283 | endpoint_name = output["endpoint_name"] 284 | print(parameters) 285 | if parameters != "None": 286 | payload = {"inputs": prompt, "parameters": {**parameters}} 287 | else: 288 | payload = {"inputs": prompt} 289 | if output["model_type"] == "AI21": 290 | print('-------- Payload ----------', payload) 291 | generated_text = generate_text_ai21(payload, endpoint_name) 292 | final_text = f''' {generated_text} ''' # to take care of multi line prompt 293 | st.write(final_text) 294 | elif output["model_type"] == "AI21-SUMMARY": 295 | generated_text = generate_text_ai21_summarize( 296 | payload, endpoint_name) 297 | summaries = generated_text.split("\n") 298 | for summary in summaries: 299 | st.markdown("- " + summary) 300 | final_text += summary 301 | elif output["model_type"] == "AI21-CONTEXT-QA": 302 | generated_text = generate_text_ai21_context_qa( 303 | payload, question, endpoint_name) 304 | final_text = f''' {generated_text} ''' # to take care of multi line prompt 305 | st.write(final_text) 306 | else: 307 | generated_text = generate_text(payload, endpoint_name) 308 | final_text = f''' {generated_text} ''' # to take care of multi line prompt 309 | st.write(final_text) 310 | else: 311 | st.warning("Invalid Endpoint: Please select a valid endpoint") 312 | st.download_button("Download", final_text, file_name="output.txt") 313 | 314 | 315 | if __name__ == "__main__": 316 | main() 317 | -------------------------------------------------------------------------------- /lab2/studio-playground-ui/Run_My_PlayGround.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "3711c999-fb77-48da-a372-684e9a721758", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "from IPython.display import IFrame\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "id": "6b95f3fa-1444-45a0-8e9d-ccfad7c3df8d", 19 | "metadata": { 20 | "tags": [] 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import sagemaker\n", 25 | "import jinja2\n", 26 | "from sagemaker import image_uris\n", 27 | "import boto3\n", 28 | "import os\n", 29 | "import time\n", 30 | "import json\n", 31 | "from pathlib import Path\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "id": "54511796-5e5d-401b-beb3-e37abb31eda3", 38 | "metadata": { 39 | "tags": [] 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "import boto3\n", 44 | "\n", 45 | "region = boto3.Session().region_name\n", 46 | "client = boto3.Session().client(\"sagemaker\")\n", 47 | "domainId=client.list_domains()[\"Domains\"][0].get(\"DomainId\")\n", 48 | "sess= sagemaker.session.Session()\n", 49 | "region = sess._region_name\n", 50 | "account_id = sess.account_id()\n", 51 | "\n", 52 | "vpc_id = client.describe_domain(\n", 53 | " DomainId=client.list_domains()[\"Domains\"][0].get(\"DomainId\")\n", 54 | ").get(\"VpcId\")\n", 55 | "\n", 56 | "url = f\"https://{domainId}.studio.{region}.sagemaker.aws/jupyter/default/proxy/8501/\"\n", 57 | "print(url )" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "2e4ea496-d568-4f82-a90f-0818e4c65d98", 64 | "metadata": { 65 | "tags": [] 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "IFrame(url, width='90%', height='1100')" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "id": "7e177fe3-f391-4ee2-90a1-c98f05e571d9", 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "id": "2bf3905a-7344-463b-98eb-569677c97034", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [] 87 | } 88 | ], 89 | "metadata": { 90 | "availableInstances": [ 91 | { 92 | "_defaultOrder": 0, 93 | "_isFastLaunch": true, 94 | "category": "General purpose", 95 | "gpuNum": 0, 96 | "hideHardwareSpecs": false, 97 | "memoryGiB": 4, 98 | "name": "ml.t3.medium", 99 | "vcpuNum": 2 100 | }, 101 | { 102 | "_defaultOrder": 1, 103 | "_isFastLaunch": false, 104 | "category": "General purpose", 105 | "gpuNum": 0, 106 | "hideHardwareSpecs": false, 107 | "memoryGiB": 8, 108 | "name": "ml.t3.large", 109 | "vcpuNum": 2 110 | }, 111 | { 112 | "_defaultOrder": 2, 113 | "_isFastLaunch": false, 114 | "category": "General purpose", 115 | "gpuNum": 0, 116 | "hideHardwareSpecs": false, 117 | "memoryGiB": 16, 118 | "name": "ml.t3.xlarge", 119 | "vcpuNum": 4 120 | }, 121 | { 122 | "_defaultOrder": 3, 123 | "_isFastLaunch": false, 124 | "category": "General purpose", 125 | "gpuNum": 0, 126 | "hideHardwareSpecs": false, 127 | "memoryGiB": 32, 128 | "name": "ml.t3.2xlarge", 129 | "vcpuNum": 8 130 | }, 131 | { 132 | "_defaultOrder": 4, 133 | "_isFastLaunch": true, 134 | "category": "General purpose", 135 | "gpuNum": 0, 136 | "hideHardwareSpecs": false, 137 | "memoryGiB": 8, 138 | "name": "ml.m5.large", 139 | "vcpuNum": 2 140 | }, 141 | { 142 | "_defaultOrder": 5, 143 | "_isFastLaunch": false, 144 | "category": "General purpose", 145 | "gpuNum": 0, 146 | "hideHardwareSpecs": false, 147 | "memoryGiB": 16, 148 | "name": "ml.m5.xlarge", 149 | "vcpuNum": 4 150 | }, 151 | { 152 | "_defaultOrder": 6, 153 | "_isFastLaunch": false, 154 | "category": "General purpose", 155 | "gpuNum": 0, 156 | "hideHardwareSpecs": false, 157 | "memoryGiB": 32, 158 | "name": "ml.m5.2xlarge", 159 | "vcpuNum": 8 160 | }, 161 | { 162 | "_defaultOrder": 7, 163 | "_isFastLaunch": false, 164 | "category": "General purpose", 165 | "gpuNum": 0, 166 | "hideHardwareSpecs": false, 167 | "memoryGiB": 64, 168 | "name": "ml.m5.4xlarge", 169 | "vcpuNum": 16 170 | }, 171 | { 172 | "_defaultOrder": 8, 173 | "_isFastLaunch": false, 174 | "category": "General purpose", 175 | "gpuNum": 0, 176 | "hideHardwareSpecs": false, 177 | "memoryGiB": 128, 178 | "name": "ml.m5.8xlarge", 179 | "vcpuNum": 32 180 | }, 181 | { 182 | "_defaultOrder": 9, 183 | "_isFastLaunch": false, 184 | "category": "General purpose", 185 | "gpuNum": 0, 186 | "hideHardwareSpecs": false, 187 | "memoryGiB": 192, 188 | "name": "ml.m5.12xlarge", 189 | "vcpuNum": 48 190 | }, 191 | { 192 | "_defaultOrder": 10, 193 | "_isFastLaunch": false, 194 | "category": "General purpose", 195 | "gpuNum": 0, 196 | "hideHardwareSpecs": false, 197 | "memoryGiB": 256, 198 | "name": "ml.m5.16xlarge", 199 | "vcpuNum": 64 200 | }, 201 | { 202 | "_defaultOrder": 11, 203 | "_isFastLaunch": false, 204 | "category": "General purpose", 205 | "gpuNum": 0, 206 | "hideHardwareSpecs": false, 207 | "memoryGiB": 384, 208 | "name": "ml.m5.24xlarge", 209 | "vcpuNum": 96 210 | }, 211 | { 212 | "_defaultOrder": 12, 213 | "_isFastLaunch": false, 214 | "category": "General purpose", 215 | "gpuNum": 0, 216 | "hideHardwareSpecs": false, 217 | "memoryGiB": 8, 218 | "name": "ml.m5d.large", 219 | "vcpuNum": 2 220 | }, 221 | { 222 | "_defaultOrder": 13, 223 | "_isFastLaunch": false, 224 | "category": "General purpose", 225 | "gpuNum": 0, 226 | "hideHardwareSpecs": false, 227 | "memoryGiB": 16, 228 | "name": "ml.m5d.xlarge", 229 | "vcpuNum": 4 230 | }, 231 | { 232 | "_defaultOrder": 14, 233 | "_isFastLaunch": false, 234 | "category": "General purpose", 235 | "gpuNum": 0, 236 | "hideHardwareSpecs": false, 237 | "memoryGiB": 32, 238 | "name": "ml.m5d.2xlarge", 239 | "vcpuNum": 8 240 | }, 241 | { 242 | "_defaultOrder": 15, 243 | "_isFastLaunch": false, 244 | "category": "General purpose", 245 | "gpuNum": 0, 246 | "hideHardwareSpecs": false, 247 | "memoryGiB": 64, 248 | "name": "ml.m5d.4xlarge", 249 | "vcpuNum": 16 250 | }, 251 | { 252 | "_defaultOrder": 16, 253 | "_isFastLaunch": false, 254 | "category": "General purpose", 255 | "gpuNum": 0, 256 | "hideHardwareSpecs": false, 257 | "memoryGiB": 128, 258 | "name": "ml.m5d.8xlarge", 259 | "vcpuNum": 32 260 | }, 261 | { 262 | "_defaultOrder": 17, 263 | "_isFastLaunch": false, 264 | "category": "General purpose", 265 | "gpuNum": 0, 266 | "hideHardwareSpecs": false, 267 | "memoryGiB": 192, 268 | "name": "ml.m5d.12xlarge", 269 | "vcpuNum": 48 270 | }, 271 | { 272 | "_defaultOrder": 18, 273 | "_isFastLaunch": false, 274 | "category": "General purpose", 275 | "gpuNum": 0, 276 | "hideHardwareSpecs": false, 277 | "memoryGiB": 256, 278 | "name": "ml.m5d.16xlarge", 279 | "vcpuNum": 64 280 | }, 281 | { 282 | "_defaultOrder": 19, 283 | "_isFastLaunch": false, 284 | "category": "General purpose", 285 | "gpuNum": 0, 286 | "hideHardwareSpecs": false, 287 | "memoryGiB": 384, 288 | "name": "ml.m5d.24xlarge", 289 | "vcpuNum": 96 290 | }, 291 | { 292 | "_defaultOrder": 20, 293 | "_isFastLaunch": false, 294 | "category": "General purpose", 295 | "gpuNum": 0, 296 | "hideHardwareSpecs": true, 297 | "memoryGiB": 0, 298 | "name": "ml.geospatial.interactive", 299 | "supportedImageNames": [ 300 | "sagemaker-geospatial-v1-0" 301 | ], 302 | "vcpuNum": 0 303 | }, 304 | { 305 | "_defaultOrder": 21, 306 | "_isFastLaunch": true, 307 | "category": "Compute optimized", 308 | "gpuNum": 0, 309 | "hideHardwareSpecs": false, 310 | "memoryGiB": 4, 311 | "name": "ml.c5.large", 312 | "vcpuNum": 2 313 | }, 314 | { 315 | "_defaultOrder": 22, 316 | "_isFastLaunch": false, 317 | "category": "Compute optimized", 318 | "gpuNum": 0, 319 | "hideHardwareSpecs": false, 320 | "memoryGiB": 8, 321 | "name": "ml.c5.xlarge", 322 | "vcpuNum": 4 323 | }, 324 | { 325 | "_defaultOrder": 23, 326 | "_isFastLaunch": false, 327 | "category": "Compute optimized", 328 | "gpuNum": 0, 329 | "hideHardwareSpecs": false, 330 | "memoryGiB": 16, 331 | "name": "ml.c5.2xlarge", 332 | "vcpuNum": 8 333 | }, 334 | { 335 | "_defaultOrder": 24, 336 | "_isFastLaunch": false, 337 | "category": "Compute optimized", 338 | "gpuNum": 0, 339 | "hideHardwareSpecs": false, 340 | "memoryGiB": 32, 341 | "name": "ml.c5.4xlarge", 342 | "vcpuNum": 16 343 | }, 344 | { 345 | "_defaultOrder": 25, 346 | "_isFastLaunch": false, 347 | "category": "Compute optimized", 348 | "gpuNum": 0, 349 | "hideHardwareSpecs": false, 350 | "memoryGiB": 72, 351 | "name": "ml.c5.9xlarge", 352 | "vcpuNum": 36 353 | }, 354 | { 355 | "_defaultOrder": 26, 356 | "_isFastLaunch": false, 357 | "category": "Compute optimized", 358 | "gpuNum": 0, 359 | "hideHardwareSpecs": false, 360 | "memoryGiB": 96, 361 | "name": "ml.c5.12xlarge", 362 | "vcpuNum": 48 363 | }, 364 | { 365 | "_defaultOrder": 27, 366 | "_isFastLaunch": false, 367 | "category": "Compute optimized", 368 | "gpuNum": 0, 369 | "hideHardwareSpecs": false, 370 | "memoryGiB": 144, 371 | "name": "ml.c5.18xlarge", 372 | "vcpuNum": 72 373 | }, 374 | { 375 | "_defaultOrder": 28, 376 | "_isFastLaunch": false, 377 | "category": "Compute optimized", 378 | "gpuNum": 0, 379 | "hideHardwareSpecs": false, 380 | "memoryGiB": 192, 381 | "name": "ml.c5.24xlarge", 382 | "vcpuNum": 96 383 | }, 384 | { 385 | "_defaultOrder": 29, 386 | "_isFastLaunch": true, 387 | "category": "Accelerated computing", 388 | "gpuNum": 1, 389 | "hideHardwareSpecs": false, 390 | "memoryGiB": 16, 391 | "name": "ml.g4dn.xlarge", 392 | "vcpuNum": 4 393 | }, 394 | { 395 | "_defaultOrder": 30, 396 | "_isFastLaunch": false, 397 | "category": "Accelerated computing", 398 | "gpuNum": 1, 399 | "hideHardwareSpecs": false, 400 | "memoryGiB": 32, 401 | "name": "ml.g4dn.2xlarge", 402 | "vcpuNum": 8 403 | }, 404 | { 405 | "_defaultOrder": 31, 406 | "_isFastLaunch": false, 407 | "category": "Accelerated computing", 408 | "gpuNum": 1, 409 | "hideHardwareSpecs": false, 410 | "memoryGiB": 64, 411 | "name": "ml.g4dn.4xlarge", 412 | "vcpuNum": 16 413 | }, 414 | { 415 | "_defaultOrder": 32, 416 | "_isFastLaunch": false, 417 | "category": "Accelerated computing", 418 | "gpuNum": 1, 419 | "hideHardwareSpecs": false, 420 | "memoryGiB": 128, 421 | "name": "ml.g4dn.8xlarge", 422 | "vcpuNum": 32 423 | }, 424 | { 425 | "_defaultOrder": 33, 426 | "_isFastLaunch": false, 427 | "category": "Accelerated computing", 428 | "gpuNum": 4, 429 | "hideHardwareSpecs": false, 430 | "memoryGiB": 192, 431 | "name": "ml.g4dn.12xlarge", 432 | "vcpuNum": 48 433 | }, 434 | { 435 | "_defaultOrder": 34, 436 | "_isFastLaunch": false, 437 | "category": "Accelerated computing", 438 | "gpuNum": 1, 439 | "hideHardwareSpecs": false, 440 | "memoryGiB": 256, 441 | "name": "ml.g4dn.16xlarge", 442 | "vcpuNum": 64 443 | }, 444 | { 445 | "_defaultOrder": 35, 446 | "_isFastLaunch": false, 447 | "category": "Accelerated computing", 448 | "gpuNum": 1, 449 | "hideHardwareSpecs": false, 450 | "memoryGiB": 61, 451 | "name": "ml.p3.2xlarge", 452 | "vcpuNum": 8 453 | }, 454 | { 455 | "_defaultOrder": 36, 456 | "_isFastLaunch": false, 457 | "category": "Accelerated computing", 458 | "gpuNum": 4, 459 | "hideHardwareSpecs": false, 460 | "memoryGiB": 244, 461 | "name": "ml.p3.8xlarge", 462 | "vcpuNum": 32 463 | }, 464 | { 465 | "_defaultOrder": 37, 466 | "_isFastLaunch": false, 467 | "category": "Accelerated computing", 468 | "gpuNum": 8, 469 | "hideHardwareSpecs": false, 470 | "memoryGiB": 488, 471 | "name": "ml.p3.16xlarge", 472 | "vcpuNum": 64 473 | }, 474 | { 475 | "_defaultOrder": 38, 476 | "_isFastLaunch": false, 477 | "category": "Accelerated computing", 478 | "gpuNum": 8, 479 | "hideHardwareSpecs": false, 480 | "memoryGiB": 768, 481 | "name": "ml.p3dn.24xlarge", 482 | "vcpuNum": 96 483 | }, 484 | { 485 | "_defaultOrder": 39, 486 | "_isFastLaunch": false, 487 | "category": "Memory Optimized", 488 | "gpuNum": 0, 489 | "hideHardwareSpecs": false, 490 | "memoryGiB": 16, 491 | "name": "ml.r5.large", 492 | "vcpuNum": 2 493 | }, 494 | { 495 | "_defaultOrder": 40, 496 | "_isFastLaunch": false, 497 | "category": "Memory Optimized", 498 | "gpuNum": 0, 499 | "hideHardwareSpecs": false, 500 | "memoryGiB": 32, 501 | "name": "ml.r5.xlarge", 502 | "vcpuNum": 4 503 | }, 504 | { 505 | "_defaultOrder": 41, 506 | "_isFastLaunch": false, 507 | "category": "Memory Optimized", 508 | "gpuNum": 0, 509 | "hideHardwareSpecs": false, 510 | "memoryGiB": 64, 511 | "name": "ml.r5.2xlarge", 512 | "vcpuNum": 8 513 | }, 514 | { 515 | "_defaultOrder": 42, 516 | "_isFastLaunch": false, 517 | "category": "Memory Optimized", 518 | "gpuNum": 0, 519 | "hideHardwareSpecs": false, 520 | "memoryGiB": 128, 521 | "name": "ml.r5.4xlarge", 522 | "vcpuNum": 16 523 | }, 524 | { 525 | "_defaultOrder": 43, 526 | "_isFastLaunch": false, 527 | "category": "Memory Optimized", 528 | "gpuNum": 0, 529 | "hideHardwareSpecs": false, 530 | "memoryGiB": 256, 531 | "name": "ml.r5.8xlarge", 532 | "vcpuNum": 32 533 | }, 534 | { 535 | "_defaultOrder": 44, 536 | "_isFastLaunch": false, 537 | "category": "Memory Optimized", 538 | "gpuNum": 0, 539 | "hideHardwareSpecs": false, 540 | "memoryGiB": 384, 541 | "name": "ml.r5.12xlarge", 542 | "vcpuNum": 48 543 | }, 544 | { 545 | "_defaultOrder": 45, 546 | "_isFastLaunch": false, 547 | "category": "Memory Optimized", 548 | "gpuNum": 0, 549 | "hideHardwareSpecs": false, 550 | "memoryGiB": 512, 551 | "name": "ml.r5.16xlarge", 552 | "vcpuNum": 64 553 | }, 554 | { 555 | "_defaultOrder": 46, 556 | "_isFastLaunch": false, 557 | "category": "Memory Optimized", 558 | "gpuNum": 0, 559 | "hideHardwareSpecs": false, 560 | "memoryGiB": 768, 561 | "name": "ml.r5.24xlarge", 562 | "vcpuNum": 96 563 | }, 564 | { 565 | "_defaultOrder": 47, 566 | "_isFastLaunch": false, 567 | "category": "Accelerated computing", 568 | "gpuNum": 1, 569 | "hideHardwareSpecs": false, 570 | "memoryGiB": 16, 571 | "name": "ml.g5.xlarge", 572 | "vcpuNum": 4 573 | }, 574 | { 575 | "_defaultOrder": 48, 576 | "_isFastLaunch": false, 577 | "category": "Accelerated computing", 578 | "gpuNum": 1, 579 | "hideHardwareSpecs": false, 580 | "memoryGiB": 32, 581 | "name": "ml.g5.2xlarge", 582 | "vcpuNum": 8 583 | }, 584 | { 585 | "_defaultOrder": 49, 586 | "_isFastLaunch": false, 587 | "category": "Accelerated computing", 588 | "gpuNum": 1, 589 | "hideHardwareSpecs": false, 590 | "memoryGiB": 64, 591 | "name": "ml.g5.4xlarge", 592 | "vcpuNum": 16 593 | }, 594 | { 595 | "_defaultOrder": 50, 596 | "_isFastLaunch": false, 597 | "category": "Accelerated computing", 598 | "gpuNum": 1, 599 | "hideHardwareSpecs": false, 600 | "memoryGiB": 128, 601 | "name": "ml.g5.8xlarge", 602 | "vcpuNum": 32 603 | }, 604 | { 605 | "_defaultOrder": 51, 606 | "_isFastLaunch": false, 607 | "category": "Accelerated computing", 608 | "gpuNum": 1, 609 | "hideHardwareSpecs": false, 610 | "memoryGiB": 256, 611 | "name": "ml.g5.16xlarge", 612 | "vcpuNum": 64 613 | }, 614 | { 615 | "_defaultOrder": 52, 616 | "_isFastLaunch": false, 617 | "category": "Accelerated computing", 618 | "gpuNum": 4, 619 | "hideHardwareSpecs": false, 620 | "memoryGiB": 192, 621 | "name": "ml.g5.12xlarge", 622 | "vcpuNum": 48 623 | }, 624 | { 625 | "_defaultOrder": 53, 626 | "_isFastLaunch": false, 627 | "category": "Accelerated computing", 628 | "gpuNum": 4, 629 | "hideHardwareSpecs": false, 630 | "memoryGiB": 384, 631 | "name": "ml.g5.24xlarge", 632 | "vcpuNum": 96 633 | }, 634 | { 635 | "_defaultOrder": 54, 636 | "_isFastLaunch": false, 637 | "category": "Accelerated computing", 638 | "gpuNum": 8, 639 | "hideHardwareSpecs": false, 640 | "memoryGiB": 768, 641 | "name": "ml.g5.48xlarge", 642 | "vcpuNum": 192 643 | }, 644 | { 645 | "_defaultOrder": 55, 646 | "_isFastLaunch": false, 647 | "category": "Accelerated computing", 648 | "gpuNum": 8, 649 | "hideHardwareSpecs": false, 650 | "memoryGiB": 1152, 651 | "name": "ml.p4d.24xlarge", 652 | "vcpuNum": 96 653 | }, 654 | { 655 | "_defaultOrder": 56, 656 | "_isFastLaunch": false, 657 | "category": "Accelerated computing", 658 | "gpuNum": 8, 659 | "hideHardwareSpecs": false, 660 | "memoryGiB": 1152, 661 | "name": "ml.p4de.24xlarge", 662 | "vcpuNum": 96 663 | } 664 | ], 665 | "instance_type": "ml.t3.medium", 666 | "kernelspec": { 667 | "display_name": "Python 3 (Data Science 3.0)", 668 | "language": "python", 669 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1" 670 | }, 671 | "language_info": { 672 | "codemirror_mode": { 673 | "name": "ipython", 674 | "version": 3 675 | }, 676 | "file_extension": ".py", 677 | "mimetype": "text/x-python", 678 | "name": "python", 679 | "nbconvert_exporter": "python", 680 | "pygments_lexer": "ipython3", 681 | "version": "3.10.6" 682 | } 683 | }, 684 | "nbformat": 4, 685 | "nbformat_minor": 5 686 | } 687 | -------------------------------------------------------------------------------- /lab1/deploy-llama2-changes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "af2ee7fb-e888-4e38-a349-c7c40dfd2963", 6 | "metadata": { 7 | "jupyter": { 8 | "outputs_hidden": true 9 | }, 10 | "tags": [] 11 | }, 12 | "source": [ 13 | "# Llama2 on Amazon SageMaker JumpStart" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "id": "251624f9-1eb6-4051-a774-0a4ba83cabf5", 19 | "metadata": { 20 | "jupyter": { 21 | "outputs_hidden": true 22 | }, 23 | "tags": [] 24 | }, 25 | "source": [ 26 | "---\n", 27 | "In this demo notebook, we demonstrate how to use the SageMaker Python SDK to deploy pre-trained Llama 2 model as well carry out inference using an example prompt.\n", 28 | "\n", 29 | "---" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "e3d9b99d-639b-40f3-91e3-1fe00ee032a4", 35 | "metadata": { 36 | "jupyter": { 37 | "outputs_hidden": true 38 | }, 39 | "tags": [] 40 | }, 41 | "source": [ 42 | "### Model License information\n", 43 | "---\n", 44 | "To perform inference on Llama2 models, you need to pass custom_attributes='accept_eula=true' as part of header. This means you have read and accept the end-user-license-agreement (EULA) of the model. EULA can be found in model card description or from https://ai.meta.com/resources/models-and-libraries/llama-downloads/. By default, this notebook sets custom_attributes='accept_eula=false', so all inference requests will fail until you explicitly change this custom attribute.\n", 45 | "\n", 46 | "Note: Custom_attributes used to pass EULA are key/value pairs. The key and value are separated by '=' and pairs are separated by ';'. If the user passes the same key more than once, the last value is kept and passed to the script handler (i.e., in this case, used for conditional logic). For example, if 'accept_eula=false; accept_eula=true' is passed to the server, then 'accept_eula=true' is kept and passed to the script handler.\n", 47 | "\n", 48 | "---" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "id": "019c4fcd-d6c5-4381-8425-1d224c0ac197", 54 | "metadata": { 55 | "jupyter": { 56 | "outputs_hidden": true 57 | }, 58 | "tags": [] 59 | }, 60 | "source": [ 61 | "### Set up\n", 62 | "\n", 63 | "---\n", 64 | "We begin by installing and upgrading necessary packages. Restart the kernel after executing the cell below for the first time.\n", 65 | "\n", 66 | "---" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "id": "85addd9d-ec89-44a7-9fb5-9bc24fe9993b", 73 | "metadata": { 74 | "tags": [] 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "!pip install --upgrade sagemaker datasets" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "id": "13274b9b-87bd-4090-a6aa-294570c31e0e", 84 | "metadata": {}, 85 | "source": [ 86 | "## Deploy Pre-trained Model\n", 87 | "\n", 88 | "---\n", 89 | "\n", 90 | "First we will deploy the Llama-2 7B model as a SageMaker endpoint. \n", 91 | "\n", 92 | "---" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "id": "e756bd54", 99 | "metadata": { 100 | "jumpStartAlterations": [ 101 | "modelIdVersion" 102 | ], 103 | "tags": [] 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "model_id, model_version = \"meta-textgeneration-llama-2-7b\", \"*\"" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "id": "319f5108-9cc5-4bb1-a2f9-1b39fd89fa92", 113 | "metadata": {}, 114 | "source": [ 115 | "If you are deploying the model for the first time, make sure to follow the code below to deploy the model endpoint and then use it to make predictions." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "id": "1722b230-b7bc-487f-b4ee-98ca42848423", 122 | "metadata": { 123 | "tags": [] 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "from sagemaker.jumpstart.model import JumpStartModel\n", 128 | "\n", 129 | "pretrained_model = JumpStartModel(model_id=model_id)\n", 130 | "pretrained_predictor = pretrained_model.deploy(accept_eula = True)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "id": "5034e450-254d-431e-8e32-4def47e20fef", 136 | "metadata": {}, 137 | "source": [ 138 | "If you have already deployed the model and do not wish to deploy again, uncomment the code below to utilize the existing endpoint to make predictions using your query." 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "id": "7e01bcc7-0ae3-4e67-9b06-aaf7ab0a081e", 145 | "metadata": { 146 | "tags": [] 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "# from sagemaker.predictor import Predictor\n", 151 | "# from sagemaker.serializers import JSONSerializer\n", 152 | "# from sagemaker.deserializers import JSONDeserializer\n", 153 | "\n", 154 | "# # Use the existing endpoint name\n", 155 | "# endpoint_name = \"\" # Replace with your endpoint name\n", 156 | "\n", 157 | "# # Create a SageMaker predictor object\n", 158 | "# pretrained_predictor = Predictor(\n", 159 | "# endpoint_name=endpoint_name,\n", 160 | "# serializer=JSONSerializer(),\n", 161 | "# deserializer=JSONDeserializer(),\n", 162 | "# )\n", 163 | "\n", 164 | "# name = pretrained_predictor.endpoint " 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "id": "8017c4ef-eb89-4da6-8e28-c800adbfc4b8", 170 | "metadata": { 171 | "tags": [] 172 | }, 173 | "source": [ 174 | "## Invoke the endpoint\n", 175 | "\n", 176 | "---\n", 177 | "Next, we invoke the endpoint with a sample query. \n", 178 | "\n", 179 | "---" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "id": "b795a085-048f-42b2-945f-0cd339c1cf91", 186 | "metadata": { 187 | "tags": [] 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "def print_response(payload, response):\n", 192 | " print(payload[\"inputs\"])\n", 193 | " print(f\"> {response[0]['generated_text']}\")\n", 194 | " print(\"\\n==================================\\n\")" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "id": "5dd833f8-1ddc-4805-80b2-19e7db629880", 201 | "metadata": { 202 | "tags": [] 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "payload = {\n", 207 | " \"inputs\": \"I believe the meaning of life is\",\n", 208 | " \"parameters\": {\n", 209 | " \"max_new_tokens\": 200,\n", 210 | " \"top_p\": 0.9,\n", 211 | " \"temperature\": 0.9,\n", 212 | " \"return_full_text\": True,\n", 213 | " },\n", 214 | "}\n", 215 | "try:\n", 216 | " response = pretrained_predictor.predict(payload)\n", 217 | " print_response(payload, response)\n", 218 | "except Exception as e:\n", 219 | " print(e)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "id": "c5204d71-95e7-4ec3-abaa-6a1a4f2e787d", 225 | "metadata": {}, 226 | "source": [ 227 | "## Clean-up\n", 228 | "Delete the endpoint by running the cell below. " 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "id": "d7691ce4-b5d7-4b1f-a741-2d7333c14a45", 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "pretrained_predictor.delete_model()\n", 239 | "pretrained_predictor.delete_endpoint()" 240 | ] 241 | } 242 | ], 243 | "metadata": { 244 | "availableInstances": [ 245 | { 246 | "_defaultOrder": 0, 247 | "_isFastLaunch": true, 248 | "category": "General purpose", 249 | "gpuNum": 0, 250 | "hideHardwareSpecs": false, 251 | "memoryGiB": 4, 252 | "name": "ml.t3.medium", 253 | "vcpuNum": 2 254 | }, 255 | { 256 | "_defaultOrder": 1, 257 | "_isFastLaunch": false, 258 | "category": "General purpose", 259 | "gpuNum": 0, 260 | "hideHardwareSpecs": false, 261 | "memoryGiB": 8, 262 | "name": "ml.t3.large", 263 | "vcpuNum": 2 264 | }, 265 | { 266 | "_defaultOrder": 2, 267 | "_isFastLaunch": false, 268 | "category": "General purpose", 269 | "gpuNum": 0, 270 | "hideHardwareSpecs": false, 271 | "memoryGiB": 16, 272 | "name": "ml.t3.xlarge", 273 | "vcpuNum": 4 274 | }, 275 | { 276 | "_defaultOrder": 3, 277 | "_isFastLaunch": false, 278 | "category": "General purpose", 279 | "gpuNum": 0, 280 | "hideHardwareSpecs": false, 281 | "memoryGiB": 32, 282 | "name": "ml.t3.2xlarge", 283 | "vcpuNum": 8 284 | }, 285 | { 286 | "_defaultOrder": 4, 287 | "_isFastLaunch": true, 288 | "category": "General purpose", 289 | "gpuNum": 0, 290 | "hideHardwareSpecs": false, 291 | "memoryGiB": 8, 292 | "name": "ml.m5.large", 293 | "vcpuNum": 2 294 | }, 295 | { 296 | "_defaultOrder": 5, 297 | "_isFastLaunch": false, 298 | "category": "General purpose", 299 | "gpuNum": 0, 300 | "hideHardwareSpecs": false, 301 | "memoryGiB": 16, 302 | "name": "ml.m5.xlarge", 303 | "vcpuNum": 4 304 | }, 305 | { 306 | "_defaultOrder": 6, 307 | "_isFastLaunch": false, 308 | "category": "General purpose", 309 | "gpuNum": 0, 310 | "hideHardwareSpecs": false, 311 | "memoryGiB": 32, 312 | "name": "ml.m5.2xlarge", 313 | "vcpuNum": 8 314 | }, 315 | { 316 | "_defaultOrder": 7, 317 | "_isFastLaunch": false, 318 | "category": "General purpose", 319 | "gpuNum": 0, 320 | "hideHardwareSpecs": false, 321 | "memoryGiB": 64, 322 | "name": "ml.m5.4xlarge", 323 | "vcpuNum": 16 324 | }, 325 | { 326 | "_defaultOrder": 8, 327 | "_isFastLaunch": false, 328 | "category": "General purpose", 329 | "gpuNum": 0, 330 | "hideHardwareSpecs": false, 331 | "memoryGiB": 128, 332 | "name": "ml.m5.8xlarge", 333 | "vcpuNum": 32 334 | }, 335 | { 336 | "_defaultOrder": 9, 337 | "_isFastLaunch": false, 338 | "category": "General purpose", 339 | "gpuNum": 0, 340 | "hideHardwareSpecs": false, 341 | "memoryGiB": 192, 342 | "name": "ml.m5.12xlarge", 343 | "vcpuNum": 48 344 | }, 345 | { 346 | "_defaultOrder": 10, 347 | "_isFastLaunch": false, 348 | "category": "General purpose", 349 | "gpuNum": 0, 350 | "hideHardwareSpecs": false, 351 | "memoryGiB": 256, 352 | "name": "ml.m5.16xlarge", 353 | "vcpuNum": 64 354 | }, 355 | { 356 | "_defaultOrder": 11, 357 | "_isFastLaunch": false, 358 | "category": "General purpose", 359 | "gpuNum": 0, 360 | "hideHardwareSpecs": false, 361 | "memoryGiB": 384, 362 | "name": "ml.m5.24xlarge", 363 | "vcpuNum": 96 364 | }, 365 | { 366 | "_defaultOrder": 12, 367 | "_isFastLaunch": false, 368 | "category": "General purpose", 369 | "gpuNum": 0, 370 | "hideHardwareSpecs": false, 371 | "memoryGiB": 8, 372 | "name": "ml.m5d.large", 373 | "vcpuNum": 2 374 | }, 375 | { 376 | "_defaultOrder": 13, 377 | "_isFastLaunch": false, 378 | "category": "General purpose", 379 | "gpuNum": 0, 380 | "hideHardwareSpecs": false, 381 | "memoryGiB": 16, 382 | "name": "ml.m5d.xlarge", 383 | "vcpuNum": 4 384 | }, 385 | { 386 | "_defaultOrder": 14, 387 | "_isFastLaunch": false, 388 | "category": "General purpose", 389 | "gpuNum": 0, 390 | "hideHardwareSpecs": false, 391 | "memoryGiB": 32, 392 | "name": "ml.m5d.2xlarge", 393 | "vcpuNum": 8 394 | }, 395 | { 396 | "_defaultOrder": 15, 397 | "_isFastLaunch": false, 398 | "category": "General purpose", 399 | "gpuNum": 0, 400 | "hideHardwareSpecs": false, 401 | "memoryGiB": 64, 402 | "name": "ml.m5d.4xlarge", 403 | "vcpuNum": 16 404 | }, 405 | { 406 | "_defaultOrder": 16, 407 | "_isFastLaunch": false, 408 | "category": "General purpose", 409 | "gpuNum": 0, 410 | "hideHardwareSpecs": false, 411 | "memoryGiB": 128, 412 | "name": "ml.m5d.8xlarge", 413 | "vcpuNum": 32 414 | }, 415 | { 416 | "_defaultOrder": 17, 417 | "_isFastLaunch": false, 418 | "category": "General purpose", 419 | "gpuNum": 0, 420 | "hideHardwareSpecs": false, 421 | "memoryGiB": 192, 422 | "name": "ml.m5d.12xlarge", 423 | "vcpuNum": 48 424 | }, 425 | { 426 | "_defaultOrder": 18, 427 | "_isFastLaunch": false, 428 | "category": "General purpose", 429 | "gpuNum": 0, 430 | "hideHardwareSpecs": false, 431 | "memoryGiB": 256, 432 | "name": "ml.m5d.16xlarge", 433 | "vcpuNum": 64 434 | }, 435 | { 436 | "_defaultOrder": 19, 437 | "_isFastLaunch": false, 438 | "category": "General purpose", 439 | "gpuNum": 0, 440 | "hideHardwareSpecs": false, 441 | "memoryGiB": 384, 442 | "name": "ml.m5d.24xlarge", 443 | "vcpuNum": 96 444 | }, 445 | { 446 | "_defaultOrder": 20, 447 | "_isFastLaunch": false, 448 | "category": "General purpose", 449 | "gpuNum": 0, 450 | "hideHardwareSpecs": true, 451 | "memoryGiB": 0, 452 | "name": "ml.geospatial.interactive", 453 | "supportedImageNames": [ 454 | "sagemaker-geospatial-v1-0" 455 | ], 456 | "vcpuNum": 0 457 | }, 458 | { 459 | "_defaultOrder": 21, 460 | "_isFastLaunch": true, 461 | "category": "Compute optimized", 462 | "gpuNum": 0, 463 | "hideHardwareSpecs": false, 464 | "memoryGiB": 4, 465 | "name": "ml.c5.large", 466 | "vcpuNum": 2 467 | }, 468 | { 469 | "_defaultOrder": 22, 470 | "_isFastLaunch": false, 471 | "category": "Compute optimized", 472 | "gpuNum": 0, 473 | "hideHardwareSpecs": false, 474 | "memoryGiB": 8, 475 | "name": "ml.c5.xlarge", 476 | "vcpuNum": 4 477 | }, 478 | { 479 | "_defaultOrder": 23, 480 | "_isFastLaunch": false, 481 | "category": "Compute optimized", 482 | "gpuNum": 0, 483 | "hideHardwareSpecs": false, 484 | "memoryGiB": 16, 485 | "name": "ml.c5.2xlarge", 486 | "vcpuNum": 8 487 | }, 488 | { 489 | "_defaultOrder": 24, 490 | "_isFastLaunch": false, 491 | "category": "Compute optimized", 492 | "gpuNum": 0, 493 | "hideHardwareSpecs": false, 494 | "memoryGiB": 32, 495 | "name": "ml.c5.4xlarge", 496 | "vcpuNum": 16 497 | }, 498 | { 499 | "_defaultOrder": 25, 500 | "_isFastLaunch": false, 501 | "category": "Compute optimized", 502 | "gpuNum": 0, 503 | "hideHardwareSpecs": false, 504 | "memoryGiB": 72, 505 | "name": "ml.c5.9xlarge", 506 | "vcpuNum": 36 507 | }, 508 | { 509 | "_defaultOrder": 26, 510 | "_isFastLaunch": false, 511 | "category": "Compute optimized", 512 | "gpuNum": 0, 513 | "hideHardwareSpecs": false, 514 | "memoryGiB": 96, 515 | "name": "ml.c5.12xlarge", 516 | "vcpuNum": 48 517 | }, 518 | { 519 | "_defaultOrder": 27, 520 | "_isFastLaunch": false, 521 | "category": "Compute optimized", 522 | "gpuNum": 0, 523 | "hideHardwareSpecs": false, 524 | "memoryGiB": 144, 525 | "name": "ml.c5.18xlarge", 526 | "vcpuNum": 72 527 | }, 528 | { 529 | "_defaultOrder": 28, 530 | "_isFastLaunch": false, 531 | "category": "Compute optimized", 532 | "gpuNum": 0, 533 | "hideHardwareSpecs": false, 534 | "memoryGiB": 192, 535 | "name": "ml.c5.24xlarge", 536 | "vcpuNum": 96 537 | }, 538 | { 539 | "_defaultOrder": 29, 540 | "_isFastLaunch": true, 541 | "category": "Accelerated computing", 542 | "gpuNum": 1, 543 | "hideHardwareSpecs": false, 544 | "memoryGiB": 16, 545 | "name": "ml.g4dn.xlarge", 546 | "vcpuNum": 4 547 | }, 548 | { 549 | "_defaultOrder": 30, 550 | "_isFastLaunch": false, 551 | "category": "Accelerated computing", 552 | "gpuNum": 1, 553 | "hideHardwareSpecs": false, 554 | "memoryGiB": 32, 555 | "name": "ml.g4dn.2xlarge", 556 | "vcpuNum": 8 557 | }, 558 | { 559 | "_defaultOrder": 31, 560 | "_isFastLaunch": false, 561 | "category": "Accelerated computing", 562 | "gpuNum": 1, 563 | "hideHardwareSpecs": false, 564 | "memoryGiB": 64, 565 | "name": "ml.g4dn.4xlarge", 566 | "vcpuNum": 16 567 | }, 568 | { 569 | "_defaultOrder": 32, 570 | "_isFastLaunch": false, 571 | "category": "Accelerated computing", 572 | "gpuNum": 1, 573 | "hideHardwareSpecs": false, 574 | "memoryGiB": 128, 575 | "name": "ml.g4dn.8xlarge", 576 | "vcpuNum": 32 577 | }, 578 | { 579 | "_defaultOrder": 33, 580 | "_isFastLaunch": false, 581 | "category": "Accelerated computing", 582 | "gpuNum": 4, 583 | "hideHardwareSpecs": false, 584 | "memoryGiB": 192, 585 | "name": "ml.g4dn.12xlarge", 586 | "vcpuNum": 48 587 | }, 588 | { 589 | "_defaultOrder": 34, 590 | "_isFastLaunch": false, 591 | "category": "Accelerated computing", 592 | "gpuNum": 1, 593 | "hideHardwareSpecs": false, 594 | "memoryGiB": 256, 595 | "name": "ml.g4dn.16xlarge", 596 | "vcpuNum": 64 597 | }, 598 | { 599 | "_defaultOrder": 35, 600 | "_isFastLaunch": false, 601 | "category": "Accelerated computing", 602 | "gpuNum": 1, 603 | "hideHardwareSpecs": false, 604 | "memoryGiB": 61, 605 | "name": "ml.p3.2xlarge", 606 | "vcpuNum": 8 607 | }, 608 | { 609 | "_defaultOrder": 36, 610 | "_isFastLaunch": false, 611 | "category": "Accelerated computing", 612 | "gpuNum": 4, 613 | "hideHardwareSpecs": false, 614 | "memoryGiB": 244, 615 | "name": "ml.p3.8xlarge", 616 | "vcpuNum": 32 617 | }, 618 | { 619 | "_defaultOrder": 37, 620 | "_isFastLaunch": false, 621 | "category": "Accelerated computing", 622 | "gpuNum": 8, 623 | "hideHardwareSpecs": false, 624 | "memoryGiB": 488, 625 | "name": "ml.p3.16xlarge", 626 | "vcpuNum": 64 627 | }, 628 | { 629 | "_defaultOrder": 38, 630 | "_isFastLaunch": false, 631 | "category": "Accelerated computing", 632 | "gpuNum": 8, 633 | "hideHardwareSpecs": false, 634 | "memoryGiB": 768, 635 | "name": "ml.p3dn.24xlarge", 636 | "vcpuNum": 96 637 | }, 638 | { 639 | "_defaultOrder": 39, 640 | "_isFastLaunch": false, 641 | "category": "Memory Optimized", 642 | "gpuNum": 0, 643 | "hideHardwareSpecs": false, 644 | "memoryGiB": 16, 645 | "name": "ml.r5.large", 646 | "vcpuNum": 2 647 | }, 648 | { 649 | "_defaultOrder": 40, 650 | "_isFastLaunch": false, 651 | "category": "Memory Optimized", 652 | "gpuNum": 0, 653 | "hideHardwareSpecs": false, 654 | "memoryGiB": 32, 655 | "name": "ml.r5.xlarge", 656 | "vcpuNum": 4 657 | }, 658 | { 659 | "_defaultOrder": 41, 660 | "_isFastLaunch": false, 661 | "category": "Memory Optimized", 662 | "gpuNum": 0, 663 | "hideHardwareSpecs": false, 664 | "memoryGiB": 64, 665 | "name": "ml.r5.2xlarge", 666 | "vcpuNum": 8 667 | }, 668 | { 669 | "_defaultOrder": 42, 670 | "_isFastLaunch": false, 671 | "category": "Memory Optimized", 672 | "gpuNum": 0, 673 | "hideHardwareSpecs": false, 674 | "memoryGiB": 128, 675 | "name": "ml.r5.4xlarge", 676 | "vcpuNum": 16 677 | }, 678 | { 679 | "_defaultOrder": 43, 680 | "_isFastLaunch": false, 681 | "category": "Memory Optimized", 682 | "gpuNum": 0, 683 | "hideHardwareSpecs": false, 684 | "memoryGiB": 256, 685 | "name": "ml.r5.8xlarge", 686 | "vcpuNum": 32 687 | }, 688 | { 689 | "_defaultOrder": 44, 690 | "_isFastLaunch": false, 691 | "category": "Memory Optimized", 692 | "gpuNum": 0, 693 | "hideHardwareSpecs": false, 694 | "memoryGiB": 384, 695 | "name": "ml.r5.12xlarge", 696 | "vcpuNum": 48 697 | }, 698 | { 699 | "_defaultOrder": 45, 700 | "_isFastLaunch": false, 701 | "category": "Memory Optimized", 702 | "gpuNum": 0, 703 | "hideHardwareSpecs": false, 704 | "memoryGiB": 512, 705 | "name": "ml.r5.16xlarge", 706 | "vcpuNum": 64 707 | }, 708 | { 709 | "_defaultOrder": 46, 710 | "_isFastLaunch": false, 711 | "category": "Memory Optimized", 712 | "gpuNum": 0, 713 | "hideHardwareSpecs": false, 714 | "memoryGiB": 768, 715 | "name": "ml.r5.24xlarge", 716 | "vcpuNum": 96 717 | }, 718 | { 719 | "_defaultOrder": 47, 720 | "_isFastLaunch": false, 721 | "category": "Accelerated computing", 722 | "gpuNum": 1, 723 | "hideHardwareSpecs": false, 724 | "memoryGiB": 16, 725 | "name": "ml.g5.xlarge", 726 | "vcpuNum": 4 727 | }, 728 | { 729 | "_defaultOrder": 48, 730 | "_isFastLaunch": false, 731 | "category": "Accelerated computing", 732 | "gpuNum": 1, 733 | "hideHardwareSpecs": false, 734 | "memoryGiB": 32, 735 | "name": "ml.g5.2xlarge", 736 | "vcpuNum": 8 737 | }, 738 | { 739 | "_defaultOrder": 49, 740 | "_isFastLaunch": false, 741 | "category": "Accelerated computing", 742 | "gpuNum": 1, 743 | "hideHardwareSpecs": false, 744 | "memoryGiB": 64, 745 | "name": "ml.g5.4xlarge", 746 | "vcpuNum": 16 747 | }, 748 | { 749 | "_defaultOrder": 50, 750 | "_isFastLaunch": false, 751 | "category": "Accelerated computing", 752 | "gpuNum": 1, 753 | "hideHardwareSpecs": false, 754 | "memoryGiB": 128, 755 | "name": "ml.g5.8xlarge", 756 | "vcpuNum": 32 757 | }, 758 | { 759 | "_defaultOrder": 51, 760 | "_isFastLaunch": false, 761 | "category": "Accelerated computing", 762 | "gpuNum": 1, 763 | "hideHardwareSpecs": false, 764 | "memoryGiB": 256, 765 | "name": "ml.g5.16xlarge", 766 | "vcpuNum": 64 767 | }, 768 | { 769 | "_defaultOrder": 52, 770 | "_isFastLaunch": false, 771 | "category": "Accelerated computing", 772 | "gpuNum": 4, 773 | "hideHardwareSpecs": false, 774 | "memoryGiB": 192, 775 | "name": "ml.g5.12xlarge", 776 | "vcpuNum": 48 777 | }, 778 | { 779 | "_defaultOrder": 53, 780 | "_isFastLaunch": false, 781 | "category": "Accelerated computing", 782 | "gpuNum": 4, 783 | "hideHardwareSpecs": false, 784 | "memoryGiB": 384, 785 | "name": "ml.g5.24xlarge", 786 | "vcpuNum": 96 787 | }, 788 | { 789 | "_defaultOrder": 54, 790 | "_isFastLaunch": false, 791 | "category": "Accelerated computing", 792 | "gpuNum": 8, 793 | "hideHardwareSpecs": false, 794 | "memoryGiB": 768, 795 | "name": "ml.g5.48xlarge", 796 | "vcpuNum": 192 797 | }, 798 | { 799 | "_defaultOrder": 55, 800 | "_isFastLaunch": false, 801 | "category": "Accelerated computing", 802 | "gpuNum": 8, 803 | "hideHardwareSpecs": false, 804 | "memoryGiB": 1152, 805 | "name": "ml.p4d.24xlarge", 806 | "vcpuNum": 96 807 | }, 808 | { 809 | "_defaultOrder": 56, 810 | "_isFastLaunch": false, 811 | "category": "Accelerated computing", 812 | "gpuNum": 8, 813 | "hideHardwareSpecs": false, 814 | "memoryGiB": 1152, 815 | "name": "ml.p4de.24xlarge", 816 | "vcpuNum": 96 817 | } 818 | ], 819 | "instance_type": "ml.t3.medium", 820 | "kernelspec": { 821 | "display_name": "Python 3 (Data Science 3.0)", 822 | "language": "python", 823 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1" 824 | }, 825 | "language_info": { 826 | "codemirror_mode": { 827 | "name": "ipython", 828 | "version": 3 829 | }, 830 | "file_extension": ".py", 831 | "mimetype": "text/x-python", 832 | "name": "python", 833 | "nbconvert_exporter": "python", 834 | "pygments_lexer": "ipython3", 835 | "version": "3.10.6" 836 | } 837 | }, 838 | "nbformat": 4, 839 | "nbformat_minor": 5 840 | } 841 | -------------------------------------------------------------------------------- /lab1/deploy-llama2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "af2ee7fb-e888-4e38-a349-c7c40dfd2963", 6 | "metadata": { 7 | "jupyter": { 8 | "outputs_hidden": true 9 | }, 10 | "tags": [] 11 | }, 12 | "source": [ 13 | "# Llama2 on Amazon SageMaker JumpStart" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "id": "251624f9-1eb6-4051-a774-0a4ba83cabf5", 19 | "metadata": { 20 | "jupyter": { 21 | "outputs_hidden": true 22 | }, 23 | "tags": [] 24 | }, 25 | "source": [ 26 | "---\n", 27 | "In this demo notebook, we demonstrate how to use the SageMaker Python SDK to deploy pre-trained Llama 2 model as well carry out inference using an example prompt.\n", 28 | "\n", 29 | "---" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "id": "e3d9b99d-639b-40f3-91e3-1fe00ee032a4", 35 | "metadata": { 36 | "jupyter": { 37 | "outputs_hidden": true 38 | }, 39 | "tags": [] 40 | }, 41 | "source": [ 42 | "### Model License information\n", 43 | "---\n", 44 | "To perform inference on Llama2 models, you need to pass custom_attributes='accept_eula=true' as part of header. This means you have read and accept the end-user-license-agreement (EULA) of the model. EULA can be found in model card description or from https://ai.meta.com/resources/models-and-libraries/llama-downloads/. By default, this notebook sets custom_attributes='accept_eula=false', so all inference requests will fail until you explicitly change this custom attribute.\n", 45 | "\n", 46 | "Note: Custom_attributes used to pass EULA are key/value pairs. The key and value are separated by '=' and pairs are separated by ';'. If the user passes the same key more than once, the last value is kept and passed to the script handler (i.e., in this case, used for conditional logic). For example, if 'accept_eula=false; accept_eula=true' is passed to the server, then 'accept_eula=true' is kept and passed to the script handler.\n", 47 | "\n", 48 | "---" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "id": "019c4fcd-d6c5-4381-8425-1d224c0ac197", 54 | "metadata": { 55 | "jupyter": { 56 | "outputs_hidden": true 57 | }, 58 | "tags": [] 59 | }, 60 | "source": [ 61 | "### Set up\n", 62 | "\n", 63 | "---\n", 64 | "We begin by installing and upgrading necessary packages. Restart the kernel after executing the cell below for the first time.\n", 65 | "\n", 66 | "---" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "id": "85addd9d-ec89-44a7-9fb5-9bc24fe9993b", 73 | "metadata": { 74 | "tags": [] 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "!pip install --upgrade sagemaker datasets" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "id": "13274b9b-87bd-4090-a6aa-294570c31e0e", 84 | "metadata": {}, 85 | "source": [ 86 | "## Deploy Pre-trained Model\n", 87 | "\n", 88 | "---\n", 89 | "\n", 90 | "First we will deploy the Llama-2 7B model as a SageMaker endpoint. \n", 91 | "\n", 92 | "---" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "id": "e756bd54", 99 | "metadata": { 100 | "jumpStartAlterations": [ 101 | "modelIdVersion" 102 | ], 103 | "tags": [] 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "model_id, model_version = \"meta-textgeneration-llama-2-7b\", \"*\"" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "id": "319f5108-9cc5-4bb1-a2f9-1b39fd89fa92", 113 | "metadata": {}, 114 | "source": [ 115 | "If you are deploying the model for the first time, make sure to follow the code below to deploy the model endpoint and then use it to make predictions." 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "id": "1722b230-b7bc-487f-b4ee-98ca42848423", 122 | "metadata": { 123 | "tags": [] 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "from sagemaker.jumpstart.model import JumpStartModel\n", 128 | "\n", 129 | "pretrained_model = JumpStartModel(model_id=model_id)\n", 130 | "pretrained_predictor = pretrained_model.deploy()" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "id": "5034e450-254d-431e-8e32-4def47e20fef", 136 | "metadata": {}, 137 | "source": [ 138 | "If you have already deployed the model and do not wish to deploy again, uncomment the code below to utilize the existing endpoint to make predictions using your query." 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "id": "7e01bcc7-0ae3-4e67-9b06-aaf7ab0a081e", 145 | "metadata": { 146 | "tags": [] 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "# from sagemaker.predictor import Predictor\n", 151 | "# from sagemaker.serializers import JSONSerializer\n", 152 | "# from sagemaker.deserializers import JSONDeserializer\n", 153 | "\n", 154 | "# # Use the existing endpoint name\n", 155 | "# endpoint_name = \"\" # Replace with your endpoint name\n", 156 | "\n", 157 | "# # Create a SageMaker predictor object\n", 158 | "# pretrained_predictor = Predictor(\n", 159 | "# endpoint_name=endpoint_name,\n", 160 | "# serializer=JSONSerializer(),\n", 161 | "# deserializer=JSONDeserializer(),\n", 162 | "# )\n", 163 | "\n", 164 | "# name = pretrained_predictor.endpoint " 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "id": "8017c4ef-eb89-4da6-8e28-c800adbfc4b8", 170 | "metadata": { 171 | "tags": [] 172 | }, 173 | "source": [ 174 | "## Invoke the endpoint\n", 175 | "\n", 176 | "---\n", 177 | "Next, we invoke the endpoint with a sample query. \n", 178 | "\n", 179 | "---" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "id": "b795a085-048f-42b2-945f-0cd339c1cf91", 186 | "metadata": { 187 | "tags": [] 188 | }, 189 | "outputs": [], 190 | "source": [ 191 | "def print_response(payload, response):\n", 192 | " print(payload[\"inputs\"])\n", 193 | " print(f\"> {response[0]['generation']}\")\n", 194 | " print(\"\\n==================================\\n\")" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "id": "5dd833f8-1ddc-4805-80b2-19e7db629880", 201 | "metadata": { 202 | "tags": [] 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "payload = {\n", 207 | " \"inputs\": \"I believe the meaning of life is\",\n", 208 | " \"parameters\": {\n", 209 | " \"max_new_tokens\": 200,\n", 210 | " \"top_p\": 0.9,\n", 211 | " \"temperature\": 0.9,\n", 212 | " \"return_full_text\": True,\n", 213 | " },\n", 214 | "}\n", 215 | "try:\n", 216 | " response = pretrained_predictor.predict(payload, custom_attributes=\"accept_eula=true\")\n", 217 | " print_response(payload, response)\n", 218 | "except Exception as e:\n", 219 | " print(e)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "id": "c5204d71-95e7-4ec3-abaa-6a1a4f2e787d", 225 | "metadata": {}, 226 | "source": [ 227 | "## Clean-up\n", 228 | "Delete the endpoint by running the cell below. " 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "id": "d7691ce4-b5d7-4b1f-a741-2d7333c14a45", 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "pretrained_predictor.delete_model()\n", 239 | "pretrained_predictor.delete_endpoint()" 240 | ] 241 | } 242 | ], 243 | "metadata": { 244 | "availableInstances": [ 245 | { 246 | "_defaultOrder": 0, 247 | "_isFastLaunch": true, 248 | "category": "General purpose", 249 | "gpuNum": 0, 250 | "hideHardwareSpecs": false, 251 | "memoryGiB": 4, 252 | "name": "ml.t3.medium", 253 | "vcpuNum": 2 254 | }, 255 | { 256 | "_defaultOrder": 1, 257 | "_isFastLaunch": false, 258 | "category": "General purpose", 259 | "gpuNum": 0, 260 | "hideHardwareSpecs": false, 261 | "memoryGiB": 8, 262 | "name": "ml.t3.large", 263 | "vcpuNum": 2 264 | }, 265 | { 266 | "_defaultOrder": 2, 267 | "_isFastLaunch": false, 268 | "category": "General purpose", 269 | "gpuNum": 0, 270 | "hideHardwareSpecs": false, 271 | "memoryGiB": 16, 272 | "name": "ml.t3.xlarge", 273 | "vcpuNum": 4 274 | }, 275 | { 276 | "_defaultOrder": 3, 277 | "_isFastLaunch": false, 278 | "category": "General purpose", 279 | "gpuNum": 0, 280 | "hideHardwareSpecs": false, 281 | "memoryGiB": 32, 282 | "name": "ml.t3.2xlarge", 283 | "vcpuNum": 8 284 | }, 285 | { 286 | "_defaultOrder": 4, 287 | "_isFastLaunch": true, 288 | "category": "General purpose", 289 | "gpuNum": 0, 290 | "hideHardwareSpecs": false, 291 | "memoryGiB": 8, 292 | "name": "ml.m5.large", 293 | "vcpuNum": 2 294 | }, 295 | { 296 | "_defaultOrder": 5, 297 | "_isFastLaunch": false, 298 | "category": "General purpose", 299 | "gpuNum": 0, 300 | "hideHardwareSpecs": false, 301 | "memoryGiB": 16, 302 | "name": "ml.m5.xlarge", 303 | "vcpuNum": 4 304 | }, 305 | { 306 | "_defaultOrder": 6, 307 | "_isFastLaunch": false, 308 | "category": "General purpose", 309 | "gpuNum": 0, 310 | "hideHardwareSpecs": false, 311 | "memoryGiB": 32, 312 | "name": "ml.m5.2xlarge", 313 | "vcpuNum": 8 314 | }, 315 | { 316 | "_defaultOrder": 7, 317 | "_isFastLaunch": false, 318 | "category": "General purpose", 319 | "gpuNum": 0, 320 | "hideHardwareSpecs": false, 321 | "memoryGiB": 64, 322 | "name": "ml.m5.4xlarge", 323 | "vcpuNum": 16 324 | }, 325 | { 326 | "_defaultOrder": 8, 327 | "_isFastLaunch": false, 328 | "category": "General purpose", 329 | "gpuNum": 0, 330 | "hideHardwareSpecs": false, 331 | "memoryGiB": 128, 332 | "name": "ml.m5.8xlarge", 333 | "vcpuNum": 32 334 | }, 335 | { 336 | "_defaultOrder": 9, 337 | "_isFastLaunch": false, 338 | "category": "General purpose", 339 | "gpuNum": 0, 340 | "hideHardwareSpecs": false, 341 | "memoryGiB": 192, 342 | "name": "ml.m5.12xlarge", 343 | "vcpuNum": 48 344 | }, 345 | { 346 | "_defaultOrder": 10, 347 | "_isFastLaunch": false, 348 | "category": "General purpose", 349 | "gpuNum": 0, 350 | "hideHardwareSpecs": false, 351 | "memoryGiB": 256, 352 | "name": "ml.m5.16xlarge", 353 | "vcpuNum": 64 354 | }, 355 | { 356 | "_defaultOrder": 11, 357 | "_isFastLaunch": false, 358 | "category": "General purpose", 359 | "gpuNum": 0, 360 | "hideHardwareSpecs": false, 361 | "memoryGiB": 384, 362 | "name": "ml.m5.24xlarge", 363 | "vcpuNum": 96 364 | }, 365 | { 366 | "_defaultOrder": 12, 367 | "_isFastLaunch": false, 368 | "category": "General purpose", 369 | "gpuNum": 0, 370 | "hideHardwareSpecs": false, 371 | "memoryGiB": 8, 372 | "name": "ml.m5d.large", 373 | "vcpuNum": 2 374 | }, 375 | { 376 | "_defaultOrder": 13, 377 | "_isFastLaunch": false, 378 | "category": "General purpose", 379 | "gpuNum": 0, 380 | "hideHardwareSpecs": false, 381 | "memoryGiB": 16, 382 | "name": "ml.m5d.xlarge", 383 | "vcpuNum": 4 384 | }, 385 | { 386 | "_defaultOrder": 14, 387 | "_isFastLaunch": false, 388 | "category": "General purpose", 389 | "gpuNum": 0, 390 | "hideHardwareSpecs": false, 391 | "memoryGiB": 32, 392 | "name": "ml.m5d.2xlarge", 393 | "vcpuNum": 8 394 | }, 395 | { 396 | "_defaultOrder": 15, 397 | "_isFastLaunch": false, 398 | "category": "General purpose", 399 | "gpuNum": 0, 400 | "hideHardwareSpecs": false, 401 | "memoryGiB": 64, 402 | "name": "ml.m5d.4xlarge", 403 | "vcpuNum": 16 404 | }, 405 | { 406 | "_defaultOrder": 16, 407 | "_isFastLaunch": false, 408 | "category": "General purpose", 409 | "gpuNum": 0, 410 | "hideHardwareSpecs": false, 411 | "memoryGiB": 128, 412 | "name": "ml.m5d.8xlarge", 413 | "vcpuNum": 32 414 | }, 415 | { 416 | "_defaultOrder": 17, 417 | "_isFastLaunch": false, 418 | "category": "General purpose", 419 | "gpuNum": 0, 420 | "hideHardwareSpecs": false, 421 | "memoryGiB": 192, 422 | "name": "ml.m5d.12xlarge", 423 | "vcpuNum": 48 424 | }, 425 | { 426 | "_defaultOrder": 18, 427 | "_isFastLaunch": false, 428 | "category": "General purpose", 429 | "gpuNum": 0, 430 | "hideHardwareSpecs": false, 431 | "memoryGiB": 256, 432 | "name": "ml.m5d.16xlarge", 433 | "vcpuNum": 64 434 | }, 435 | { 436 | "_defaultOrder": 19, 437 | "_isFastLaunch": false, 438 | "category": "General purpose", 439 | "gpuNum": 0, 440 | "hideHardwareSpecs": false, 441 | "memoryGiB": 384, 442 | "name": "ml.m5d.24xlarge", 443 | "vcpuNum": 96 444 | }, 445 | { 446 | "_defaultOrder": 20, 447 | "_isFastLaunch": false, 448 | "category": "General purpose", 449 | "gpuNum": 0, 450 | "hideHardwareSpecs": true, 451 | "memoryGiB": 0, 452 | "name": "ml.geospatial.interactive", 453 | "supportedImageNames": [ 454 | "sagemaker-geospatial-v1-0" 455 | ], 456 | "vcpuNum": 0 457 | }, 458 | { 459 | "_defaultOrder": 21, 460 | "_isFastLaunch": true, 461 | "category": "Compute optimized", 462 | "gpuNum": 0, 463 | "hideHardwareSpecs": false, 464 | "memoryGiB": 4, 465 | "name": "ml.c5.large", 466 | "vcpuNum": 2 467 | }, 468 | { 469 | "_defaultOrder": 22, 470 | "_isFastLaunch": false, 471 | "category": "Compute optimized", 472 | "gpuNum": 0, 473 | "hideHardwareSpecs": false, 474 | "memoryGiB": 8, 475 | "name": "ml.c5.xlarge", 476 | "vcpuNum": 4 477 | }, 478 | { 479 | "_defaultOrder": 23, 480 | "_isFastLaunch": false, 481 | "category": "Compute optimized", 482 | "gpuNum": 0, 483 | "hideHardwareSpecs": false, 484 | "memoryGiB": 16, 485 | "name": "ml.c5.2xlarge", 486 | "vcpuNum": 8 487 | }, 488 | { 489 | "_defaultOrder": 24, 490 | "_isFastLaunch": false, 491 | "category": "Compute optimized", 492 | "gpuNum": 0, 493 | "hideHardwareSpecs": false, 494 | "memoryGiB": 32, 495 | "name": "ml.c5.4xlarge", 496 | "vcpuNum": 16 497 | }, 498 | { 499 | "_defaultOrder": 25, 500 | "_isFastLaunch": false, 501 | "category": "Compute optimized", 502 | "gpuNum": 0, 503 | "hideHardwareSpecs": false, 504 | "memoryGiB": 72, 505 | "name": "ml.c5.9xlarge", 506 | "vcpuNum": 36 507 | }, 508 | { 509 | "_defaultOrder": 26, 510 | "_isFastLaunch": false, 511 | "category": "Compute optimized", 512 | "gpuNum": 0, 513 | "hideHardwareSpecs": false, 514 | "memoryGiB": 96, 515 | "name": "ml.c5.12xlarge", 516 | "vcpuNum": 48 517 | }, 518 | { 519 | "_defaultOrder": 27, 520 | "_isFastLaunch": false, 521 | "category": "Compute optimized", 522 | "gpuNum": 0, 523 | "hideHardwareSpecs": false, 524 | "memoryGiB": 144, 525 | "name": "ml.c5.18xlarge", 526 | "vcpuNum": 72 527 | }, 528 | { 529 | "_defaultOrder": 28, 530 | "_isFastLaunch": false, 531 | "category": "Compute optimized", 532 | "gpuNum": 0, 533 | "hideHardwareSpecs": false, 534 | "memoryGiB": 192, 535 | "name": "ml.c5.24xlarge", 536 | "vcpuNum": 96 537 | }, 538 | { 539 | "_defaultOrder": 29, 540 | "_isFastLaunch": true, 541 | "category": "Accelerated computing", 542 | "gpuNum": 1, 543 | "hideHardwareSpecs": false, 544 | "memoryGiB": 16, 545 | "name": "ml.g4dn.xlarge", 546 | "vcpuNum": 4 547 | }, 548 | { 549 | "_defaultOrder": 30, 550 | "_isFastLaunch": false, 551 | "category": "Accelerated computing", 552 | "gpuNum": 1, 553 | "hideHardwareSpecs": false, 554 | "memoryGiB": 32, 555 | "name": "ml.g4dn.2xlarge", 556 | "vcpuNum": 8 557 | }, 558 | { 559 | "_defaultOrder": 31, 560 | "_isFastLaunch": false, 561 | "category": "Accelerated computing", 562 | "gpuNum": 1, 563 | "hideHardwareSpecs": false, 564 | "memoryGiB": 64, 565 | "name": "ml.g4dn.4xlarge", 566 | "vcpuNum": 16 567 | }, 568 | { 569 | "_defaultOrder": 32, 570 | "_isFastLaunch": false, 571 | "category": "Accelerated computing", 572 | "gpuNum": 1, 573 | "hideHardwareSpecs": false, 574 | "memoryGiB": 128, 575 | "name": "ml.g4dn.8xlarge", 576 | "vcpuNum": 32 577 | }, 578 | { 579 | "_defaultOrder": 33, 580 | "_isFastLaunch": false, 581 | "category": "Accelerated computing", 582 | "gpuNum": 4, 583 | "hideHardwareSpecs": false, 584 | "memoryGiB": 192, 585 | "name": "ml.g4dn.12xlarge", 586 | "vcpuNum": 48 587 | }, 588 | { 589 | "_defaultOrder": 34, 590 | "_isFastLaunch": false, 591 | "category": "Accelerated computing", 592 | "gpuNum": 1, 593 | "hideHardwareSpecs": false, 594 | "memoryGiB": 256, 595 | "name": "ml.g4dn.16xlarge", 596 | "vcpuNum": 64 597 | }, 598 | { 599 | "_defaultOrder": 35, 600 | "_isFastLaunch": false, 601 | "category": "Accelerated computing", 602 | "gpuNum": 1, 603 | "hideHardwareSpecs": false, 604 | "memoryGiB": 61, 605 | "name": "ml.p3.2xlarge", 606 | "vcpuNum": 8 607 | }, 608 | { 609 | "_defaultOrder": 36, 610 | "_isFastLaunch": false, 611 | "category": "Accelerated computing", 612 | "gpuNum": 4, 613 | "hideHardwareSpecs": false, 614 | "memoryGiB": 244, 615 | "name": "ml.p3.8xlarge", 616 | "vcpuNum": 32 617 | }, 618 | { 619 | "_defaultOrder": 37, 620 | "_isFastLaunch": false, 621 | "category": "Accelerated computing", 622 | "gpuNum": 8, 623 | "hideHardwareSpecs": false, 624 | "memoryGiB": 488, 625 | "name": "ml.p3.16xlarge", 626 | "vcpuNum": 64 627 | }, 628 | { 629 | "_defaultOrder": 38, 630 | "_isFastLaunch": false, 631 | "category": "Accelerated computing", 632 | "gpuNum": 8, 633 | "hideHardwareSpecs": false, 634 | "memoryGiB": 768, 635 | "name": "ml.p3dn.24xlarge", 636 | "vcpuNum": 96 637 | }, 638 | { 639 | "_defaultOrder": 39, 640 | "_isFastLaunch": false, 641 | "category": "Memory Optimized", 642 | "gpuNum": 0, 643 | "hideHardwareSpecs": false, 644 | "memoryGiB": 16, 645 | "name": "ml.r5.large", 646 | "vcpuNum": 2 647 | }, 648 | { 649 | "_defaultOrder": 40, 650 | "_isFastLaunch": false, 651 | "category": "Memory Optimized", 652 | "gpuNum": 0, 653 | "hideHardwareSpecs": false, 654 | "memoryGiB": 32, 655 | "name": "ml.r5.xlarge", 656 | "vcpuNum": 4 657 | }, 658 | { 659 | "_defaultOrder": 41, 660 | "_isFastLaunch": false, 661 | "category": "Memory Optimized", 662 | "gpuNum": 0, 663 | "hideHardwareSpecs": false, 664 | "memoryGiB": 64, 665 | "name": "ml.r5.2xlarge", 666 | "vcpuNum": 8 667 | }, 668 | { 669 | "_defaultOrder": 42, 670 | "_isFastLaunch": false, 671 | "category": "Memory Optimized", 672 | "gpuNum": 0, 673 | "hideHardwareSpecs": false, 674 | "memoryGiB": 128, 675 | "name": "ml.r5.4xlarge", 676 | "vcpuNum": 16 677 | }, 678 | { 679 | "_defaultOrder": 43, 680 | "_isFastLaunch": false, 681 | "category": "Memory Optimized", 682 | "gpuNum": 0, 683 | "hideHardwareSpecs": false, 684 | "memoryGiB": 256, 685 | "name": "ml.r5.8xlarge", 686 | "vcpuNum": 32 687 | }, 688 | { 689 | "_defaultOrder": 44, 690 | "_isFastLaunch": false, 691 | "category": "Memory Optimized", 692 | "gpuNum": 0, 693 | "hideHardwareSpecs": false, 694 | "memoryGiB": 384, 695 | "name": "ml.r5.12xlarge", 696 | "vcpuNum": 48 697 | }, 698 | { 699 | "_defaultOrder": 45, 700 | "_isFastLaunch": false, 701 | "category": "Memory Optimized", 702 | "gpuNum": 0, 703 | "hideHardwareSpecs": false, 704 | "memoryGiB": 512, 705 | "name": "ml.r5.16xlarge", 706 | "vcpuNum": 64 707 | }, 708 | { 709 | "_defaultOrder": 46, 710 | "_isFastLaunch": false, 711 | "category": "Memory Optimized", 712 | "gpuNum": 0, 713 | "hideHardwareSpecs": false, 714 | "memoryGiB": 768, 715 | "name": "ml.r5.24xlarge", 716 | "vcpuNum": 96 717 | }, 718 | { 719 | "_defaultOrder": 47, 720 | "_isFastLaunch": false, 721 | "category": "Accelerated computing", 722 | "gpuNum": 1, 723 | "hideHardwareSpecs": false, 724 | "memoryGiB": 16, 725 | "name": "ml.g5.xlarge", 726 | "vcpuNum": 4 727 | }, 728 | { 729 | "_defaultOrder": 48, 730 | "_isFastLaunch": false, 731 | "category": "Accelerated computing", 732 | "gpuNum": 1, 733 | "hideHardwareSpecs": false, 734 | "memoryGiB": 32, 735 | "name": "ml.g5.2xlarge", 736 | "vcpuNum": 8 737 | }, 738 | { 739 | "_defaultOrder": 49, 740 | "_isFastLaunch": false, 741 | "category": "Accelerated computing", 742 | "gpuNum": 1, 743 | "hideHardwareSpecs": false, 744 | "memoryGiB": 64, 745 | "name": "ml.g5.4xlarge", 746 | "vcpuNum": 16 747 | }, 748 | { 749 | "_defaultOrder": 50, 750 | "_isFastLaunch": false, 751 | "category": "Accelerated computing", 752 | "gpuNum": 1, 753 | "hideHardwareSpecs": false, 754 | "memoryGiB": 128, 755 | "name": "ml.g5.8xlarge", 756 | "vcpuNum": 32 757 | }, 758 | { 759 | "_defaultOrder": 51, 760 | "_isFastLaunch": false, 761 | "category": "Accelerated computing", 762 | "gpuNum": 1, 763 | "hideHardwareSpecs": false, 764 | "memoryGiB": 256, 765 | "name": "ml.g5.16xlarge", 766 | "vcpuNum": 64 767 | }, 768 | { 769 | "_defaultOrder": 52, 770 | "_isFastLaunch": false, 771 | "category": "Accelerated computing", 772 | "gpuNum": 4, 773 | "hideHardwareSpecs": false, 774 | "memoryGiB": 192, 775 | "name": "ml.g5.12xlarge", 776 | "vcpuNum": 48 777 | }, 778 | { 779 | "_defaultOrder": 53, 780 | "_isFastLaunch": false, 781 | "category": "Accelerated computing", 782 | "gpuNum": 4, 783 | "hideHardwareSpecs": false, 784 | "memoryGiB": 384, 785 | "name": "ml.g5.24xlarge", 786 | "vcpuNum": 96 787 | }, 788 | { 789 | "_defaultOrder": 54, 790 | "_isFastLaunch": false, 791 | "category": "Accelerated computing", 792 | "gpuNum": 8, 793 | "hideHardwareSpecs": false, 794 | "memoryGiB": 768, 795 | "name": "ml.g5.48xlarge", 796 | "vcpuNum": 192 797 | }, 798 | { 799 | "_defaultOrder": 55, 800 | "_isFastLaunch": false, 801 | "category": "Accelerated computing", 802 | "gpuNum": 8, 803 | "hideHardwareSpecs": false, 804 | "memoryGiB": 1152, 805 | "name": "ml.p4d.24xlarge", 806 | "vcpuNum": 96 807 | }, 808 | { 809 | "_defaultOrder": 56, 810 | "_isFastLaunch": false, 811 | "category": "Accelerated computing", 812 | "gpuNum": 8, 813 | "hideHardwareSpecs": false, 814 | "memoryGiB": 1152, 815 | "name": "ml.p4de.24xlarge", 816 | "vcpuNum": 96 817 | } 818 | ], 819 | "instance_type": "ml.t3.medium", 820 | "kernelspec": { 821 | "display_name": "Python 3 (Data Science 3.0)", 822 | "language": "python", 823 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1" 824 | }, 825 | "language_info": { 826 | "codemirror_mode": { 827 | "name": "ipython", 828 | "version": 3 829 | }, 830 | "file_extension": ".py", 831 | "mimetype": "text/x-python", 832 | "name": "python", 833 | "nbconvert_exporter": "python", 834 | "pygments_lexer": "ipython3", 835 | "version": "3.10.6" 836 | } 837 | }, 838 | "nbformat": 4, 839 | "nbformat_minor": 5 840 | } 841 | -------------------------------------------------------------------------------- /lab4/image-generation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "380b51db-e36e-4c72-9a10-d5a2f603c30f", 6 | "metadata": {}, 7 | "source": [ 8 | "# SageMaker JumpStart - Image Generation" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "e81ab07a", 14 | "metadata": {}, 15 | "source": [ 16 | "---\n", 17 | "Welcome to [Amazon SageMaker Jumpstart](https://docs.aws.amazon.com/sagemaker/latest/dg/studio-jumpstart.html)! You can use Amazon SageMaker Jumpstart to solve many Machine Learning tasks through one-click in SageMaker Studio, or through [SageMaker Python SDK](https://sagemaker.readthedocs.io/en/stable/overview.html#use-prebuilt-models-with-sagemaker-jumpstart).\n", 18 | "\n", 19 | "In this demo notebook, we demonstrate how to use the SageMaker Python SDK to deploy Stable Diffusion models for image generation task. The container being utilized here is a customized stabilityai container that has been optimized to excel in terms of both speed and quality. We demonstrate how to use SageMaker Python SDK for Text-to-Image and Image-to-Image generation. Text-to-Image is the task of generating realistic image given any text input. Here, we show how to use state-of-the-art pre-trained Stable Diffusion models for generating image from text and image.\n", 20 | "\n", 21 | "---" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "053651bf", 27 | "metadata": {}, 28 | "source": [ 29 | "1. [Set Up](#1.-Set-Up)\n", 30 | "2. [Select a model](#2.-Select-a-model)\n", 31 | "3. [Deploy an endpoint & Query endpoint](#3.-Deploy-an-Endpoint-&-Query-endpointt)" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "eaf18e76", 37 | "metadata": {}, 38 | "source": [ 39 | "### 1. Set Up" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "1e8c198a", 45 | "metadata": {}, 46 | "source": [ 47 | "---\n", 48 | "Before executing the notebook, there are some initial steps required for set up\n", 49 | "\n", 50 | "---" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "id": "69c82791", 57 | "metadata": { 58 | "tags": [] 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "%pip install --upgrade sagemaker --quiet" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "id": "8b7e79ee", 68 | "metadata": {}, 69 | "source": [ 70 | "## 2. Select a pre-trained model" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "id": "db300c7c", 77 | "metadata": { 78 | "jumpStartAlterations": [ 79 | "modelIdOnly" 80 | ], 81 | "tags": [] 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "model_id = \"model-imagegeneration-stabilityai-stable-diffusion-xl-base-1-0\"" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "id": "d16bb9b5", 92 | "metadata": { 93 | "tags": [] 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "import IPython\n", 98 | "from ipywidgets import Dropdown\n", 99 | "from sagemaker.jumpstart.notebook_utils import list_jumpstart_models\n", 100 | "from sagemaker.jumpstart.filters import And\n", 101 | "\n", 102 | "\n", 103 | "filter_value = And(\"task == imagegeneration\")\n", 104 | "ss_models = list_jumpstart_models(filter=filter_value)\n", 105 | "\n", 106 | "dropdown = Dropdown(\n", 107 | " value=model_id,\n", 108 | " options=ss_models,\n", 109 | " description=\"Sagemaker Pre-Trained Image Generation Models:\",\n", 110 | " style={\"description_width\": \"initial\"},\n", 111 | " layout={\"width\": \"max-content\"},\n", 112 | ")\n", 113 | "display(IPython.display.Markdown(\"## Select a pre-trained model from the dropdown below\"))\n", 114 | "display(dropdown)" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "id": "6d8f84c3", 120 | "metadata": {}, 121 | "source": [ 122 | "### 3. Deploy an Endpoint & Query Endpoint\n", 123 | "\n", 124 | "***\n", 125 | "\n", 126 | "Using SageMaker, we can perform inference on the pre-trained model.\n", 127 | "\n", 128 | "***" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "id": "413822f2", 135 | "metadata": { 136 | "tags": [] 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "#Deploying the model\n", 141 | "from sagemaker.jumpstart.model import JumpStartModel\n", 142 | "from sagemaker.serializers import JSONSerializer\n", 143 | "import time\n", 144 | "\n", 145 | "# The model is deployed on the ml.g5.4xlarge instance. To see all the supported parameters by the JumpStartModel\n", 146 | "# class use this link - https://sagemaker.readthedocs.io/en/stable/api/inference/model.html#sagemaker.jumpstart.model.JumpStartModel\n", 147 | "my_model = JumpStartModel(model_id=dropdown.value)\n", 148 | "predictor = my_model.deploy()\n", 149 | "# We will wait for a few seconds so model is properly loaded.\n", 150 | "time.sleep(60)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "id": "36dd1358", 156 | "metadata": {}, 157 | "source": [ 158 | "Importing relevant packages for querying endpoint and image visualization" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "id": "4c2c1462-f9a9-4fc7-8a6f-b7ff3b24c63b", 165 | "metadata": { 166 | "tags": [] 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "from PIL import Image\n", 171 | "import io\n", 172 | "import base64\n", 173 | "import json\n", 174 | "import boto3\n", 175 | "from typing import Union, Tuple\n", 176 | "import os" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "id": "ab6fcceb-0ccc-43af-a204-da1612ca2b59", 182 | "metadata": {}, 183 | "source": [ 184 | "## Query Stable Diffusion endpoint\n", 185 | "\n", 186 | "### Supported features\n", 187 | "\n", 188 | "***\n", 189 | "This model supports many advanced parameters while performing inference. They include:\n", 190 | "\n", 191 | "* **text**: prompt to guide the image generation. Must be specified and should be string.\n", 192 | "* **width**: width of the hallucinated image. If specified, it must be a positive integer divisible by 8.\n", 193 | "* **height**: height of the hallucinated image. If specified, it must be a positive integer divisible by 8. Image size should be larger than 256x256.\n", 194 | "* **sampler**: Available samplers are EulerEDMSampler, HeunEDMSampler,EulerAncestralSampler, DPMPP2SAncestralSampler, DPMPP2MSampler, LinearMultistepSampler\n", 195 | "* **cfg_scale**: A higher cfg_scale results in image closely related to the prompt, at the expense of image quality. If specified, it must be a float. cfg_scale<=1 is ignored.\n", 196 | "* **steps**: number of denoising steps during image generation. More steps lead to higher quality image. If specified, it must a positive integer.\n", 197 | "* **seed**: fix the randomized state for reproducibility. If specified, it must be an integer.\n", 198 | "* **use_refiner**: Refiner is used by defauly with the SDXL model. You can disbale it by using this parameter\n", 199 | "* **init_image**: Image to be used as the starting point.\n", 200 | "* **image_strength**: Indicates extent to transform the reference image. Must be between 0 and 1.\n", 201 | "* **refiner_steps**: Number of denoising steps during image generation for the refiner. More steps lead to higher quality image. If specified, it must a positive integer.\n", 202 | "* **refiner_strength**: Indicates extent to transform the input image to the refiner.\n", 203 | "* **negative_prompt**: guide image generation against this prompt. If specified, it must be a string. It is specified in the text_prompts with a negative weight.\n", 204 | "\n", 205 | "\n", 206 | "***\n", 207 | "### Text to Image\n", 208 | "\n", 209 | "***" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "id": "00851369-9e37-4e14-b9ee-f440d538df5b", 216 | "metadata": { 217 | "tags": [] 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "payload = {\n", 222 | " \"text_prompts\":[{\"text\": \"jaguar in the Amazon rainforest\"}],\n", 223 | " \"width\": 1024,\n", 224 | " \"height\": 1024,\n", 225 | " \"sampler\": \"DPMPP2MSampler\",\n", 226 | " \"cfg_scale\": 7.0,\n", 227 | " \"steps\": 50,\n", 228 | " \"seed\": 133,\n", 229 | " \"use_refiner\": True,\n", 230 | " \"refiner_steps\": 40,\n", 231 | " \"refiner_strength\": 0.2\n", 232 | " }" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "id": "b82da60f", 239 | "metadata": { 240 | "collapsed": false, 241 | "jupyter": { 242 | "outputs_hidden": false 243 | } 244 | }, 245 | "outputs": [], 246 | "source": [ 247 | "def decode_and_show(model_response) -> None:\n", 248 | " \"\"\"\n", 249 | " Decodes and displays an image from SDXL output\n", 250 | "\n", 251 | " Args:\n", 252 | " model_response (GenerationResponse): The response object from the deployed SDXL model.\n", 253 | "\n", 254 | " Returns:\n", 255 | " None\n", 256 | " \"\"\"\n", 257 | " image = Image.open(io.BytesIO(base64.b64decode(model_response)))\n", 258 | " display(image)\n", 259 | " image.close()\n", 260 | "\n", 261 | "\n", 262 | "response = predictor.predict(payload)\n", 263 | "decode_and_show(response[\"generated_image\"])" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "id": "12ab6f8d", 269 | "metadata": {}, 270 | "source": [ 271 | "### Image to Image\n", 272 | "\n", 273 | "***\n", 274 | "To perform inference that takes an image as input, you must pass the image into init_image as a base64-encoded string.\n", 275 | "\n", 276 | "Below is a helper function for converting images to base64-encoded strings:\n", 277 | "\n", 278 | "***" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "id": "14da9dce", 285 | "metadata": { 286 | "collapsed": false, 287 | "jupyter": { 288 | "outputs_hidden": false 289 | } 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "def encode_image(image_path: str, resize: bool = True, size: Tuple[int, int] = (1024, 1024)) -> Union[str, None]:\n", 294 | " \"\"\"\n", 295 | " Encode an image as a base64 string, optionally resizing it to a supported resolution.\n", 296 | "\n", 297 | " Args:\n", 298 | " image_path (str): The path to the image file.\n", 299 | " resize (bool, optional): Whether to resize the image. Defaults to True.\n", 300 | "\n", 301 | " Returns:\n", 302 | " Union[str, None]: The encoded image as a string, or None if encoding failed.\n", 303 | " \"\"\"\n", 304 | " assert os.path.exists(image_path)\n", 305 | "\n", 306 | " if resize:\n", 307 | " image = Image.open(image_path)\n", 308 | " image = image.resize(size)\n", 309 | " image.save(\"image_path_resized.png\")\n", 310 | " image_path = \"image_path_resized.png\"\n", 311 | " image = Image.open(image_path)\n", 312 | " assert image.size == size\n", 313 | " with open(image_path, \"rb\") as image_file:\n", 314 | " img_byte_array = image_file.read()\n", 315 | " # Encode the byte array as a Base64 string\n", 316 | " try:\n", 317 | " base64_str = base64.b64encode(img_byte_array).decode(\"utf-8\")\n", 318 | " return base64_str\n", 319 | " except Exception as e:\n", 320 | " print(f\"Failed to encode image {image_path} as base64 string.\")\n", 321 | " print(e)\n", 322 | " return None\n", 323 | " image.close()" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "id": "290832a4", 329 | "metadata": {}, 330 | "source": [ 331 | "Let's feed an image into the model as well as the prompt this time. We can set image_strength to weight the relative importance of the image and the prompt. For the demo, we'll use a picture of the cat, taken from Wikimedia Commons, provided along with this notebook." 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "id": "f56a8878", 338 | "metadata": { 339 | "collapsed": false, 340 | "jupyter": { 341 | "outputs_hidden": false 342 | } 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "# Here is the original image:\n", 347 | "region = boto3.Session().region_name\n", 348 | "s3_bucket = f\"jumpstart-cache-prod-{region}\"\n", 349 | "key_prefix = \"model-metadata/assets\"\n", 350 | "input_img_file_name = \"dog_suit.jpg\"\n", 351 | "\n", 352 | "s3 = boto3.client(\"s3\")\n", 353 | "\n", 354 | "s3.download_file(s3_bucket, f\"{key_prefix}/{input_img_file_name}\", input_img_file_name)\n", 355 | "image = Image.open(input_img_file_name)\n", 356 | "display(image)\n", 357 | "image.close()" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "id": "f08b8148", 364 | "metadata": { 365 | "collapsed": false, 366 | "jupyter": { 367 | "outputs_hidden": false 368 | } 369 | }, 370 | "outputs": [], 371 | "source": [ 372 | "size = (512, 512)\n", 373 | "dog_data = encode_image(input_img_file_name, size=size)\n", 374 | "\n", 375 | "payload = {\n", 376 | " \"text_prompts\":[{\"text\": \"dog in embroidery\"}],\n", 377 | " \"init_image\": dog_data,\n", 378 | " \"cfg_scale\": 9,\n", 379 | " \"image_strength\": 0.8,\n", 380 | " \"seed\": 42,\n", 381 | " }\n", 382 | "\n", 383 | "response = predictor.predict(payload)\n", 384 | "decode_and_show(response[\"generated_image\"])" 385 | ] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "id": "fcfa677f", 390 | "metadata": {}, 391 | "source": [ 392 | "### 4. Clean Endpoint" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": null, 398 | "id": "766bda80", 399 | "metadata": { 400 | "collapsed": false, 401 | "jupyter": { 402 | "outputs_hidden": false 403 | } 404 | }, 405 | "outputs": [], 406 | "source": [ 407 | "# Delete the SageMaker endpoint\n", 408 | "predictor.delete_model()\n", 409 | "predictor.delete_endpoint()" 410 | ] 411 | } 412 | ], 413 | "metadata": { 414 | "availableInstances": [ 415 | { 416 | "_defaultOrder": 0, 417 | "_isFastLaunch": true, 418 | "category": "General purpose", 419 | "gpuNum": 0, 420 | "hideHardwareSpecs": false, 421 | "memoryGiB": 4, 422 | "name": "ml.t3.medium", 423 | "vcpuNum": 2 424 | }, 425 | { 426 | "_defaultOrder": 1, 427 | "_isFastLaunch": false, 428 | "category": "General purpose", 429 | "gpuNum": 0, 430 | "hideHardwareSpecs": false, 431 | "memoryGiB": 8, 432 | "name": "ml.t3.large", 433 | "vcpuNum": 2 434 | }, 435 | { 436 | "_defaultOrder": 2, 437 | "_isFastLaunch": false, 438 | "category": "General purpose", 439 | "gpuNum": 0, 440 | "hideHardwareSpecs": false, 441 | "memoryGiB": 16, 442 | "name": "ml.t3.xlarge", 443 | "vcpuNum": 4 444 | }, 445 | { 446 | "_defaultOrder": 3, 447 | "_isFastLaunch": false, 448 | "category": "General purpose", 449 | "gpuNum": 0, 450 | "hideHardwareSpecs": false, 451 | "memoryGiB": 32, 452 | "name": "ml.t3.2xlarge", 453 | "vcpuNum": 8 454 | }, 455 | { 456 | "_defaultOrder": 4, 457 | "_isFastLaunch": true, 458 | "category": "General purpose", 459 | "gpuNum": 0, 460 | "hideHardwareSpecs": false, 461 | "memoryGiB": 8, 462 | "name": "ml.m5.large", 463 | "vcpuNum": 2 464 | }, 465 | { 466 | "_defaultOrder": 5, 467 | "_isFastLaunch": false, 468 | "category": "General purpose", 469 | "gpuNum": 0, 470 | "hideHardwareSpecs": false, 471 | "memoryGiB": 16, 472 | "name": "ml.m5.xlarge", 473 | "vcpuNum": 4 474 | }, 475 | { 476 | "_defaultOrder": 6, 477 | "_isFastLaunch": false, 478 | "category": "General purpose", 479 | "gpuNum": 0, 480 | "hideHardwareSpecs": false, 481 | "memoryGiB": 32, 482 | "name": "ml.m5.2xlarge", 483 | "vcpuNum": 8 484 | }, 485 | { 486 | "_defaultOrder": 7, 487 | "_isFastLaunch": false, 488 | "category": "General purpose", 489 | "gpuNum": 0, 490 | "hideHardwareSpecs": false, 491 | "memoryGiB": 64, 492 | "name": "ml.m5.4xlarge", 493 | "vcpuNum": 16 494 | }, 495 | { 496 | "_defaultOrder": 8, 497 | "_isFastLaunch": false, 498 | "category": "General purpose", 499 | "gpuNum": 0, 500 | "hideHardwareSpecs": false, 501 | "memoryGiB": 128, 502 | "name": "ml.m5.8xlarge", 503 | "vcpuNum": 32 504 | }, 505 | { 506 | "_defaultOrder": 9, 507 | "_isFastLaunch": false, 508 | "category": "General purpose", 509 | "gpuNum": 0, 510 | "hideHardwareSpecs": false, 511 | "memoryGiB": 192, 512 | "name": "ml.m5.12xlarge", 513 | "vcpuNum": 48 514 | }, 515 | { 516 | "_defaultOrder": 10, 517 | "_isFastLaunch": false, 518 | "category": "General purpose", 519 | "gpuNum": 0, 520 | "hideHardwareSpecs": false, 521 | "memoryGiB": 256, 522 | "name": "ml.m5.16xlarge", 523 | "vcpuNum": 64 524 | }, 525 | { 526 | "_defaultOrder": 11, 527 | "_isFastLaunch": false, 528 | "category": "General purpose", 529 | "gpuNum": 0, 530 | "hideHardwareSpecs": false, 531 | "memoryGiB": 384, 532 | "name": "ml.m5.24xlarge", 533 | "vcpuNum": 96 534 | }, 535 | { 536 | "_defaultOrder": 12, 537 | "_isFastLaunch": false, 538 | "category": "General purpose", 539 | "gpuNum": 0, 540 | "hideHardwareSpecs": false, 541 | "memoryGiB": 8, 542 | "name": "ml.m5d.large", 543 | "vcpuNum": 2 544 | }, 545 | { 546 | "_defaultOrder": 13, 547 | "_isFastLaunch": false, 548 | "category": "General purpose", 549 | "gpuNum": 0, 550 | "hideHardwareSpecs": false, 551 | "memoryGiB": 16, 552 | "name": "ml.m5d.xlarge", 553 | "vcpuNum": 4 554 | }, 555 | { 556 | "_defaultOrder": 14, 557 | "_isFastLaunch": false, 558 | "category": "General purpose", 559 | "gpuNum": 0, 560 | "hideHardwareSpecs": false, 561 | "memoryGiB": 32, 562 | "name": "ml.m5d.2xlarge", 563 | "vcpuNum": 8 564 | }, 565 | { 566 | "_defaultOrder": 15, 567 | "_isFastLaunch": false, 568 | "category": "General purpose", 569 | "gpuNum": 0, 570 | "hideHardwareSpecs": false, 571 | "memoryGiB": 64, 572 | "name": "ml.m5d.4xlarge", 573 | "vcpuNum": 16 574 | }, 575 | { 576 | "_defaultOrder": 16, 577 | "_isFastLaunch": false, 578 | "category": "General purpose", 579 | "gpuNum": 0, 580 | "hideHardwareSpecs": false, 581 | "memoryGiB": 128, 582 | "name": "ml.m5d.8xlarge", 583 | "vcpuNum": 32 584 | }, 585 | { 586 | "_defaultOrder": 17, 587 | "_isFastLaunch": false, 588 | "category": "General purpose", 589 | "gpuNum": 0, 590 | "hideHardwareSpecs": false, 591 | "memoryGiB": 192, 592 | "name": "ml.m5d.12xlarge", 593 | "vcpuNum": 48 594 | }, 595 | { 596 | "_defaultOrder": 18, 597 | "_isFastLaunch": false, 598 | "category": "General purpose", 599 | "gpuNum": 0, 600 | "hideHardwareSpecs": false, 601 | "memoryGiB": 256, 602 | "name": "ml.m5d.16xlarge", 603 | "vcpuNum": 64 604 | }, 605 | { 606 | "_defaultOrder": 19, 607 | "_isFastLaunch": false, 608 | "category": "General purpose", 609 | "gpuNum": 0, 610 | "hideHardwareSpecs": false, 611 | "memoryGiB": 384, 612 | "name": "ml.m5d.24xlarge", 613 | "vcpuNum": 96 614 | }, 615 | { 616 | "_defaultOrder": 20, 617 | "_isFastLaunch": false, 618 | "category": "General purpose", 619 | "gpuNum": 0, 620 | "hideHardwareSpecs": true, 621 | "memoryGiB": 0, 622 | "name": "ml.geospatial.interactive", 623 | "supportedImageNames": [ 624 | "sagemaker-geospatial-v1-0" 625 | ], 626 | "vcpuNum": 0 627 | }, 628 | { 629 | "_defaultOrder": 21, 630 | "_isFastLaunch": true, 631 | "category": "Compute optimized", 632 | "gpuNum": 0, 633 | "hideHardwareSpecs": false, 634 | "memoryGiB": 4, 635 | "name": "ml.c5.large", 636 | "vcpuNum": 2 637 | }, 638 | { 639 | "_defaultOrder": 22, 640 | "_isFastLaunch": false, 641 | "category": "Compute optimized", 642 | "gpuNum": 0, 643 | "hideHardwareSpecs": false, 644 | "memoryGiB": 8, 645 | "name": "ml.c5.xlarge", 646 | "vcpuNum": 4 647 | }, 648 | { 649 | "_defaultOrder": 23, 650 | "_isFastLaunch": false, 651 | "category": "Compute optimized", 652 | "gpuNum": 0, 653 | "hideHardwareSpecs": false, 654 | "memoryGiB": 16, 655 | "name": "ml.c5.2xlarge", 656 | "vcpuNum": 8 657 | }, 658 | { 659 | "_defaultOrder": 24, 660 | "_isFastLaunch": false, 661 | "category": "Compute optimized", 662 | "gpuNum": 0, 663 | "hideHardwareSpecs": false, 664 | "memoryGiB": 32, 665 | "name": "ml.c5.4xlarge", 666 | "vcpuNum": 16 667 | }, 668 | { 669 | "_defaultOrder": 25, 670 | "_isFastLaunch": false, 671 | "category": "Compute optimized", 672 | "gpuNum": 0, 673 | "hideHardwareSpecs": false, 674 | "memoryGiB": 72, 675 | "name": "ml.c5.9xlarge", 676 | "vcpuNum": 36 677 | }, 678 | { 679 | "_defaultOrder": 26, 680 | "_isFastLaunch": false, 681 | "category": "Compute optimized", 682 | "gpuNum": 0, 683 | "hideHardwareSpecs": false, 684 | "memoryGiB": 96, 685 | "name": "ml.c5.12xlarge", 686 | "vcpuNum": 48 687 | }, 688 | { 689 | "_defaultOrder": 27, 690 | "_isFastLaunch": false, 691 | "category": "Compute optimized", 692 | "gpuNum": 0, 693 | "hideHardwareSpecs": false, 694 | "memoryGiB": 144, 695 | "name": "ml.c5.18xlarge", 696 | "vcpuNum": 72 697 | }, 698 | { 699 | "_defaultOrder": 28, 700 | "_isFastLaunch": false, 701 | "category": "Compute optimized", 702 | "gpuNum": 0, 703 | "hideHardwareSpecs": false, 704 | "memoryGiB": 192, 705 | "name": "ml.c5.24xlarge", 706 | "vcpuNum": 96 707 | }, 708 | { 709 | "_defaultOrder": 29, 710 | "_isFastLaunch": true, 711 | "category": "Accelerated computing", 712 | "gpuNum": 1, 713 | "hideHardwareSpecs": false, 714 | "memoryGiB": 16, 715 | "name": "ml.g4dn.xlarge", 716 | "vcpuNum": 4 717 | }, 718 | { 719 | "_defaultOrder": 30, 720 | "_isFastLaunch": false, 721 | "category": "Accelerated computing", 722 | "gpuNum": 1, 723 | "hideHardwareSpecs": false, 724 | "memoryGiB": 32, 725 | "name": "ml.g4dn.2xlarge", 726 | "vcpuNum": 8 727 | }, 728 | { 729 | "_defaultOrder": 31, 730 | "_isFastLaunch": false, 731 | "category": "Accelerated computing", 732 | "gpuNum": 1, 733 | "hideHardwareSpecs": false, 734 | "memoryGiB": 64, 735 | "name": "ml.g4dn.4xlarge", 736 | "vcpuNum": 16 737 | }, 738 | { 739 | "_defaultOrder": 32, 740 | "_isFastLaunch": false, 741 | "category": "Accelerated computing", 742 | "gpuNum": 1, 743 | "hideHardwareSpecs": false, 744 | "memoryGiB": 128, 745 | "name": "ml.g4dn.8xlarge", 746 | "vcpuNum": 32 747 | }, 748 | { 749 | "_defaultOrder": 33, 750 | "_isFastLaunch": false, 751 | "category": "Accelerated computing", 752 | "gpuNum": 4, 753 | "hideHardwareSpecs": false, 754 | "memoryGiB": 192, 755 | "name": "ml.g4dn.12xlarge", 756 | "vcpuNum": 48 757 | }, 758 | { 759 | "_defaultOrder": 34, 760 | "_isFastLaunch": false, 761 | "category": "Accelerated computing", 762 | "gpuNum": 1, 763 | "hideHardwareSpecs": false, 764 | "memoryGiB": 256, 765 | "name": "ml.g4dn.16xlarge", 766 | "vcpuNum": 64 767 | }, 768 | { 769 | "_defaultOrder": 35, 770 | "_isFastLaunch": false, 771 | "category": "Accelerated computing", 772 | "gpuNum": 1, 773 | "hideHardwareSpecs": false, 774 | "memoryGiB": 61, 775 | "name": "ml.p3.2xlarge", 776 | "vcpuNum": 8 777 | }, 778 | { 779 | "_defaultOrder": 36, 780 | "_isFastLaunch": false, 781 | "category": "Accelerated computing", 782 | "gpuNum": 4, 783 | "hideHardwareSpecs": false, 784 | "memoryGiB": 244, 785 | "name": "ml.p3.8xlarge", 786 | "vcpuNum": 32 787 | }, 788 | { 789 | "_defaultOrder": 37, 790 | "_isFastLaunch": false, 791 | "category": "Accelerated computing", 792 | "gpuNum": 8, 793 | "hideHardwareSpecs": false, 794 | "memoryGiB": 488, 795 | "name": "ml.p3.16xlarge", 796 | "vcpuNum": 64 797 | }, 798 | { 799 | "_defaultOrder": 38, 800 | "_isFastLaunch": false, 801 | "category": "Accelerated computing", 802 | "gpuNum": 8, 803 | "hideHardwareSpecs": false, 804 | "memoryGiB": 768, 805 | "name": "ml.p3dn.24xlarge", 806 | "vcpuNum": 96 807 | }, 808 | { 809 | "_defaultOrder": 39, 810 | "_isFastLaunch": false, 811 | "category": "Memory Optimized", 812 | "gpuNum": 0, 813 | "hideHardwareSpecs": false, 814 | "memoryGiB": 16, 815 | "name": "ml.r5.large", 816 | "vcpuNum": 2 817 | }, 818 | { 819 | "_defaultOrder": 40, 820 | "_isFastLaunch": false, 821 | "category": "Memory Optimized", 822 | "gpuNum": 0, 823 | "hideHardwareSpecs": false, 824 | "memoryGiB": 32, 825 | "name": "ml.r5.xlarge", 826 | "vcpuNum": 4 827 | }, 828 | { 829 | "_defaultOrder": 41, 830 | "_isFastLaunch": false, 831 | "category": "Memory Optimized", 832 | "gpuNum": 0, 833 | "hideHardwareSpecs": false, 834 | "memoryGiB": 64, 835 | "name": "ml.r5.2xlarge", 836 | "vcpuNum": 8 837 | }, 838 | { 839 | "_defaultOrder": 42, 840 | "_isFastLaunch": false, 841 | "category": "Memory Optimized", 842 | "gpuNum": 0, 843 | "hideHardwareSpecs": false, 844 | "memoryGiB": 128, 845 | "name": "ml.r5.4xlarge", 846 | "vcpuNum": 16 847 | }, 848 | { 849 | "_defaultOrder": 43, 850 | "_isFastLaunch": false, 851 | "category": "Memory Optimized", 852 | "gpuNum": 0, 853 | "hideHardwareSpecs": false, 854 | "memoryGiB": 256, 855 | "name": "ml.r5.8xlarge", 856 | "vcpuNum": 32 857 | }, 858 | { 859 | "_defaultOrder": 44, 860 | "_isFastLaunch": false, 861 | "category": "Memory Optimized", 862 | "gpuNum": 0, 863 | "hideHardwareSpecs": false, 864 | "memoryGiB": 384, 865 | "name": "ml.r5.12xlarge", 866 | "vcpuNum": 48 867 | }, 868 | { 869 | "_defaultOrder": 45, 870 | "_isFastLaunch": false, 871 | "category": "Memory Optimized", 872 | "gpuNum": 0, 873 | "hideHardwareSpecs": false, 874 | "memoryGiB": 512, 875 | "name": "ml.r5.16xlarge", 876 | "vcpuNum": 64 877 | }, 878 | { 879 | "_defaultOrder": 46, 880 | "_isFastLaunch": false, 881 | "category": "Memory Optimized", 882 | "gpuNum": 0, 883 | "hideHardwareSpecs": false, 884 | "memoryGiB": 768, 885 | "name": "ml.r5.24xlarge", 886 | "vcpuNum": 96 887 | }, 888 | { 889 | "_defaultOrder": 47, 890 | "_isFastLaunch": false, 891 | "category": "Accelerated computing", 892 | "gpuNum": 1, 893 | "hideHardwareSpecs": false, 894 | "memoryGiB": 16, 895 | "name": "ml.g5.xlarge", 896 | "vcpuNum": 4 897 | }, 898 | { 899 | "_defaultOrder": 48, 900 | "_isFastLaunch": false, 901 | "category": "Accelerated computing", 902 | "gpuNum": 1, 903 | "hideHardwareSpecs": false, 904 | "memoryGiB": 32, 905 | "name": "ml.g5.2xlarge", 906 | "vcpuNum": 8 907 | }, 908 | { 909 | "_defaultOrder": 49, 910 | "_isFastLaunch": false, 911 | "category": "Accelerated computing", 912 | "gpuNum": 1, 913 | "hideHardwareSpecs": false, 914 | "memoryGiB": 64, 915 | "name": "ml.g5.4xlarge", 916 | "vcpuNum": 16 917 | }, 918 | { 919 | "_defaultOrder": 50, 920 | "_isFastLaunch": false, 921 | "category": "Accelerated computing", 922 | "gpuNum": 1, 923 | "hideHardwareSpecs": false, 924 | "memoryGiB": 128, 925 | "name": "ml.g5.8xlarge", 926 | "vcpuNum": 32 927 | }, 928 | { 929 | "_defaultOrder": 51, 930 | "_isFastLaunch": false, 931 | "category": "Accelerated computing", 932 | "gpuNum": 1, 933 | "hideHardwareSpecs": false, 934 | "memoryGiB": 256, 935 | "name": "ml.g5.16xlarge", 936 | "vcpuNum": 64 937 | }, 938 | { 939 | "_defaultOrder": 52, 940 | "_isFastLaunch": false, 941 | "category": "Accelerated computing", 942 | "gpuNum": 4, 943 | "hideHardwareSpecs": false, 944 | "memoryGiB": 192, 945 | "name": "ml.g5.12xlarge", 946 | "vcpuNum": 48 947 | }, 948 | { 949 | "_defaultOrder": 53, 950 | "_isFastLaunch": false, 951 | "category": "Accelerated computing", 952 | "gpuNum": 4, 953 | "hideHardwareSpecs": false, 954 | "memoryGiB": 384, 955 | "name": "ml.g5.24xlarge", 956 | "vcpuNum": 96 957 | }, 958 | { 959 | "_defaultOrder": 54, 960 | "_isFastLaunch": false, 961 | "category": "Accelerated computing", 962 | "gpuNum": 8, 963 | "hideHardwareSpecs": false, 964 | "memoryGiB": 768, 965 | "name": "ml.g5.48xlarge", 966 | "vcpuNum": 192 967 | }, 968 | { 969 | "_defaultOrder": 55, 970 | "_isFastLaunch": false, 971 | "category": "Accelerated computing", 972 | "gpuNum": 8, 973 | "hideHardwareSpecs": false, 974 | "memoryGiB": 1152, 975 | "name": "ml.p4d.24xlarge", 976 | "vcpuNum": 96 977 | }, 978 | { 979 | "_defaultOrder": 56, 980 | "_isFastLaunch": false, 981 | "category": "Accelerated computing", 982 | "gpuNum": 8, 983 | "hideHardwareSpecs": false, 984 | "memoryGiB": 1152, 985 | "name": "ml.p4de.24xlarge", 986 | "vcpuNum": 96 987 | } 988 | ], 989 | "instance_type": "ml.t3.medium", 990 | "kernelspec": { 991 | "display_name": "Python 3 (Data Science 3.0)", 992 | "language": "python", 993 | "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1" 994 | }, 995 | "language_info": { 996 | "codemirror_mode": { 997 | "name": "ipython", 998 | "version": 3 999 | }, 1000 | "file_extension": ".py", 1001 | "mimetype": "text/x-python", 1002 | "name": "python", 1003 | "nbconvert_exporter": "python", 1004 | "pygments_lexer": "ipython3", 1005 | "version": "3.10.6" 1006 | } 1007 | }, 1008 | "nbformat": 4, 1009 | "nbformat_minor": 5 1010 | } 1011 | -------------------------------------------------------------------------------- /lab5/code-llama-changes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "tags": [] 7 | }, 8 | "source": [ 9 | "# SageMaker Code Generation with Code Llama: Deploying Pre trained Code Llama" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "#### Importing sys and other important libraries: Lanchain, Chromadb as our vectordb to store indexes and boto3 for our environment" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": { 23 | "tags": [] 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "import sys\n", 28 | "!{sys.executable} -m pip install langchain\n", 29 | "!{sys.executable} -m pip install chromadb\n", 30 | "!{sys.executable} -m pip install --upgrade boto3" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "#### Import other libraries and document loaders as well as libraries like the recursive character splitting to be able to efficiently generate code through our model" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": false, 45 | "jupyter": { 46 | "outputs_hidden": false 47 | }, 48 | "tags": [] 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "import argparse\n", 53 | "import os\n", 54 | "from langchain.document_loaders import DirectoryLoader\n", 55 | "import chromadb\n", 56 | "import json\n", 57 | "import boto3\n", 58 | "import time\n", 59 | "import glob\n", 60 | "from langchain.text_splitter import (\n", 61 | " RecursiveCharacterTextSplitter,\n", 62 | " Language,\n", 63 | ")\n", 64 | "import ast\n", 65 | "import sys" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": { 71 | "tags": [] 72 | }, 73 | "source": [ 74 | "### Deploy the code Llama 7b model\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": false, 82 | "jupyter": { 83 | "outputs_hidden": false 84 | }, 85 | "tags": [] 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "model_id = \"meta-textgeneration-llama-codellama-7b\"\n", 90 | "\n", 91 | "from sagemaker.jumpstart.model import JumpStartModel\n", 92 | "\n", 93 | "model = JumpStartModel(model_id=model_id)\n", 94 | "predictor = model.deploy(accept_eula = True)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "tags": [] 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "# Get the name of the endpoint\n", 106 | "endpoint_name = str(predictor.endpoint)\n", 107 | "\n", 108 | "print(endpoint_name)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "tags": [] 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "def query_endpoint(payload):\n", 120 | " client = boto3.client('runtime.sagemaker')\n", 121 | " response = client.invoke_endpoint(\n", 122 | " EndpointName=endpoint_name,\n", 123 | " ContentType='application/json',\n", 124 | " Body=json.dumps(payload).encode('utf-8'),\n", 125 | " CustomAttributes=\"accept_eula=true\",\n", 126 | " )\n", 127 | " response = response[\"Body\"].read().decode(\"utf8\")\n", 128 | " response = json.loads(response)\n", 129 | " return response" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "### Supported parameters\n", 137 | "\n", 138 | "***\n", 139 | "This model supports many parameters while performing inference. They include:\n", 140 | "\n", 141 | "* **max_length:** Model generates text until the output length (which includes the input context length) reaches `max_length`. If specified, it must be a positive integer.\n", 142 | "* **max_new_tokens:** Model generates text until the output length (excluding the input context length) reaches `max_new_tokens`. If specified, it must be a positive integer.\n", 143 | "* **num_beams:** Number of beams used in the greedy search. If specified, it must be integer greater than or equal to `num_return_sequences`.\n", 144 | "* **no_repeat_ngram_size:** Model ensures that a sequence of words of `no_repeat_ngram_size` is not repeated in the output sequence. If specified, it must be a positive integer greater than 1.\n", 145 | "* **temperature:** Controls the randomness in the output. Higher temperature results in output sequence with low-probability words and lower temperature results in output sequence with high-probability words. If `temperature` -> 0, it results in greedy decoding. If specified, it must be a positive float.\n", 146 | "* **early_stopping:** If True, text generation is finished when all beam hypotheses reach the end of sentence token. If specified, it must be boolean.\n", 147 | "* **do_sample:** If True, sample the next word as per the likelihood. If specified, it must be boolean.\n", 148 | "* **top_k:** In each step of text generation, sample from only the `top_k` most likely words. If specified, it must be a positive integer.\n", 149 | "* **top_p:** In each step of text generation, sample from the smallest possible set of words with cumulative probability `top_p`. If specified, it must be a float between 0 and 1.\n", 150 | "* **return_full_text:** If True, input text will be part of the output generated text. If specified, it must be boolean. The default value for it is False.\n", 151 | "* **stop**: If specified, it must a list of strings. Text generation stops if any one of the specified strings is generated.\n", 152 | "\n", 153 | "We may specify any subset of the parameters mentioned above while invoking an endpoint. Next, we show an example of how to invoke endpoint with these arguments.\n", 154 | "***" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": { 160 | "tags": [] 161 | }, 162 | "source": [ 163 | "## Code completion without context\n", 164 | "***\n", 165 | "This section demonstrate how to perform code generation where the expected endpoint response is the natural continuation of the prompt. No context is provided to. As seen below the LLM hallucinates when providing the continuation of the code because it has not been trained on the library used to test\n", 166 | "***" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": { 173 | "tags": [] 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "def print_completion(prompt: str, response: str) -> None:\n", 178 | " bold, unbold = '\\033[1m', '\\033[0m'\n", 179 | " print(f\"{bold}> Input{unbold}\\n{prompt}{bold}\\n> Output{unbold}\\n{response[0]['generated_text']}\\n\")" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": false, 187 | "jupyter": { 188 | "outputs_hidden": false 189 | }, 190 | "tags": [] 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "%%time\n", 195 | "\n", 196 | "prompt = \"\"\"\\\n", 197 | "import sagemaker\n", 198 | "\n", 199 | "# Create an HTML page about Amazon SageMaker\n", 200 | "html_content = f'''\n", 201 | "\n", 202 | "\n", 203 | "\n", 204 | " Amazon SageMaker\n", 205 | "\n", 206 | "\n", 207 | "

Welcome to Amazon SageMaker

\n", 208 | "

Amazon SageMaker is a fully managed service for building, training, and deploying machine learning models.

\n", 209 | "

Key Features

\n", 210 | "
    \n", 211 | "
  • Easy to use
  • \n", 212 | "
  • Scalable
  • \n", 213 | "
  • End-to-end machine learning workflow
  • \n", 214 | "
\n", 215 | "

Get started with SageMaker today and unlock the power of machine learning!

\n", 216 | "\n", 217 | "\n", 218 | "'''\n", 219 | "\n", 220 | "html_content\n", 221 | "\"\"\"\n", 222 | "\n", 223 | "payload = {\"inputs\": prompt, \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.2, \"top_p\": 0.9}}\n", 224 | "response = query_endpoint(payload)\n", 225 | "print_completion(prompt, response)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "# Code completion\n", 233 | "***\n", 234 | "The examples in this section demonstrate how to perform code generation where the expected endpoint response is the natural continuation of the prompt.\n", 235 | "***" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "tags": [] 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "def print_completion(prompt: str, response: str) -> None:\n", 247 | " bold, unbold = '\\033[1m', '\\033[0m'\n", 248 | " print(f\"{bold}> Input{unbold}\\n{prompt}{bold}\\n> Output{unbold}\\n{response[0]['generated_text']}\\n\")" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": { 255 | "tags": [] 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "%%time\n", 260 | "\n", 261 | "prompt = \"\"\"\\\n", 262 | "import socket\n", 263 | "\n", 264 | "def ping_exponential_backoff(host: str):\\\n", 265 | "\"\"\"\n", 266 | "\n", 267 | "payload = {\"inputs\": prompt, \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.2, \"top_p\": 0.9}}\n", 268 | "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n", 269 | "print_completion(prompt, response)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "tags": [] 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "%%time\n", 281 | "\n", 282 | "prompt = \"\"\"\\\n", 283 | "import argparse\n", 284 | "\n", 285 | "def main(string: str):\n", 286 | " print(string)\n", 287 | " print(string[::-1])\n", 288 | "\n", 289 | "if __name__ == \"__main__\":\\\n", 290 | "\"\"\"\n", 291 | "\n", 292 | "payload = {\"inputs\": prompt, \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.2, \"top_p\": 0.9}}\n", 293 | "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n", 294 | "print_completion(prompt, response)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "## Code infilling\n", 302 | "***\n", 303 | "The examples in this section demonstrate how to perform code generation where the expected endpoint response infills text between a prefix and a suffix. Only 7B, 7B-Instruct, 13B, and 13B-Instruct models have this capability, while the non-instruct models have been observed to obtain the best anecdotal performance.\n", 304 | "***" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": { 311 | "tags": [] 312 | }, 313 | "outputs": [], 314 | "source": [ 315 | "def format_infilling(prompt: str) -> str:\n", 316 | " prefix, suffix = prompt.split(\"\")\n", 317 | " return f\"
 {prefix} {suffix} \"\n",
 318 |     "\n",
 319 |     "\n",
 320 |     "def print_infilling(prompt: str, response: str) -> str:\n",
 321 |     "    green, font_reset = \"\\x1b[38;5;2m\", \"\\x1b[0m\"\n",
 322 |     "    prefix, suffix = prompt.split(\"\")\n",
 323 |     "    print(f\"{prefix}{green}{response[0]['generated_text']}{font_reset}{suffix}\")"
 324 |    ]
 325 |   },
 326 |   {
 327 |    "cell_type": "code",
 328 |    "execution_count": null,
 329 |    "metadata": {
 330 |     "tags": []
 331 |    },
 332 |    "outputs": [],
 333 |    "source": [
 334 |     "%%time\n",
 335 |     "\n",
 336 |     "prompt = '''\\\n",
 337 |     "def remove_non_ascii(s: str) -> str:\n",
 338 |     "    \"\"\"\n",
 339 |     "    return result\n",
 340 |     "'''\n",
 341 |     "prompt_formatted = format_infilling(prompt)\n",
 342 |     "payload = {\n",
 343 |     "    \"inputs\": prompt_formatted,\n",
 344 |     "    \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.05, \"top_p\": 0.9}\n",
 345 |     "}\n",
 346 |     "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n",
 347 |     "print_infilling(prompt, response)"
 348 |    ]
 349 |   },
 350 |   {
 351 |    "cell_type": "code",
 352 |    "execution_count": null,
 353 |    "metadata": {
 354 |     "tags": []
 355 |    },
 356 |    "outputs": [],
 357 |    "source": [
 358 |     "%%time\n",
 359 |     "\n",
 360 |     "prompt = \"\"\"\\\n",
 361 |     "# Installation instructions:\n",
 362 |     "    ```bash\n",
 363 |     "\n",
 364 |     "    ```\n",
 365 |     "This downloads the LLaMA inference code and installs the repository as a local pip package.\n",
 366 |     "\"\"\"\n",
 367 |     "prompt_formatted = format_infilling(prompt)\n",
 368 |     "payload = {\n",
 369 |     "    \"inputs\": prompt_formatted,\n",
 370 |     "    \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.05, \"top_p\": 0.9}\n",
 371 |     "}\n",
 372 |     "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n",
 373 |     "print_infilling(prompt, response)"
 374 |    ]
 375 |   },
 376 |   {
 377 |    "cell_type": "code",
 378 |    "execution_count": null,
 379 |    "metadata": {
 380 |     "tags": []
 381 |    },
 382 |    "outputs": [],
 383 |    "source": [
 384 |     "%%time\n",
 385 |     "\n",
 386 |     "prompt = \"\"\"\\\n",
 387 |     "class InterfaceManagerFactory(AbstractManagerFactory):\n",
 388 |     "    def __init__(\n",
 389 |     "def main():\n",
 390 |     "    factory = InterfaceManagerFactory(start=datetime.now())\n",
 391 |     "    managers = []\n",
 392 |     "    for i in range(10):\n",
 393 |     "        managers.append(factory.build(id=i))\n",
 394 |     "\"\"\"\n",
 395 |     "prompt_formatted = format_infilling(prompt)\n",
 396 |     "payload = {\n",
 397 |     "    \"inputs\": prompt_formatted,\n",
 398 |     "    \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.05, \"top_p\": 0.9}\n",
 399 |     "}\n",
 400 |     "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n",
 401 |     "print_infilling(prompt, response)"
 402 |    ]
 403 |   },
 404 |   {
 405 |    "cell_type": "code",
 406 |    "execution_count": null,
 407 |    "metadata": {
 408 |     "tags": []
 409 |    },
 410 |    "outputs": [],
 411 |    "source": [
 412 |     "%%time\n",
 413 |     "\n",
 414 |     "prompt = \"\"\"\\\n",
 415 |     "/-- A quasi-prefunctoid is 1-connected iff all its etalisations are 1-connected. -/\n",
 416 |     "theorem connected_iff_etalisation [C D : precategoroid] (P : quasi_prefunctoid C D) :\n",
 417 |     "  π₁ P = 0 ↔  = 0 :=\n",
 418 |     "begin\n",
 419 |     "  split,\n",
 420 |     "  { intros h f,\n",
 421 |     "    rw pi_1_etalisation at h,\n",
 422 |     "    simp [h],\n",
 423 |     "    refl\n",
 424 |     "  },\n",
 425 |     "  { intro h,\n",
 426 |     "    have := @quasi_adjoint C D P,\n",
 427 |     "    simp [←pi_1_etalisation, this, h],\n",
 428 |     "    refl\n",
 429 |     "  }\n",
 430 |     "end\n",
 431 |     "\"\"\"\n",
 432 |     "prompt_formatted = format_infilling(prompt)\n",
 433 |     "payload = {\n",
 434 |     "    \"inputs\": prompt_formatted,\n",
 435 |     "    \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.05, \"top_p\": 0.9}\n",
 436 |     "}\n",
 437 |     "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n",
 438 |     "print_infilling(prompt, response)"
 439 |    ]
 440 |   },
 441 |   {
 442 |    "cell_type": "markdown",
 443 |    "metadata": {},
 444 |    "source": [
 445 |     "## Clean up the endpoint\n",
 446 |     "If you are running the next lab on customizing Code Llama model then do not delete the endpoint. Otherwise go ahead and delete the endpoint by running the next cell."
 447 |    ]
 448 |   },
 449 |   {
 450 |    "cell_type": "code",
 451 |    "execution_count": null,
 452 |    "metadata": {
 453 |     "tags": []
 454 |    },
 455 |    "outputs": [],
 456 |    "source": [
 457 |     "# predictor.delete_endpoint()"
 458 |    ]
 459 |   }
 460 |  ],
 461 |  "metadata": {
 462 |   "availableInstances": [
 463 |    {
 464 |     "_defaultOrder": 0,
 465 |     "_isFastLaunch": true,
 466 |     "category": "General purpose",
 467 |     "gpuNum": 0,
 468 |     "hideHardwareSpecs": false,
 469 |     "memoryGiB": 4,
 470 |     "name": "ml.t3.medium",
 471 |     "vcpuNum": 2
 472 |    },
 473 |    {
 474 |     "_defaultOrder": 1,
 475 |     "_isFastLaunch": false,
 476 |     "category": "General purpose",
 477 |     "gpuNum": 0,
 478 |     "hideHardwareSpecs": false,
 479 |     "memoryGiB": 8,
 480 |     "name": "ml.t3.large",
 481 |     "vcpuNum": 2
 482 |    },
 483 |    {
 484 |     "_defaultOrder": 2,
 485 |     "_isFastLaunch": false,
 486 |     "category": "General purpose",
 487 |     "gpuNum": 0,
 488 |     "hideHardwareSpecs": false,
 489 |     "memoryGiB": 16,
 490 |     "name": "ml.t3.xlarge",
 491 |     "vcpuNum": 4
 492 |    },
 493 |    {
 494 |     "_defaultOrder": 3,
 495 |     "_isFastLaunch": false,
 496 |     "category": "General purpose",
 497 |     "gpuNum": 0,
 498 |     "hideHardwareSpecs": false,
 499 |     "memoryGiB": 32,
 500 |     "name": "ml.t3.2xlarge",
 501 |     "vcpuNum": 8
 502 |    },
 503 |    {
 504 |     "_defaultOrder": 4,
 505 |     "_isFastLaunch": true,
 506 |     "category": "General purpose",
 507 |     "gpuNum": 0,
 508 |     "hideHardwareSpecs": false,
 509 |     "memoryGiB": 8,
 510 |     "name": "ml.m5.large",
 511 |     "vcpuNum": 2
 512 |    },
 513 |    {
 514 |     "_defaultOrder": 5,
 515 |     "_isFastLaunch": false,
 516 |     "category": "General purpose",
 517 |     "gpuNum": 0,
 518 |     "hideHardwareSpecs": false,
 519 |     "memoryGiB": 16,
 520 |     "name": "ml.m5.xlarge",
 521 |     "vcpuNum": 4
 522 |    },
 523 |    {
 524 |     "_defaultOrder": 6,
 525 |     "_isFastLaunch": false,
 526 |     "category": "General purpose",
 527 |     "gpuNum": 0,
 528 |     "hideHardwareSpecs": false,
 529 |     "memoryGiB": 32,
 530 |     "name": "ml.m5.2xlarge",
 531 |     "vcpuNum": 8
 532 |    },
 533 |    {
 534 |     "_defaultOrder": 7,
 535 |     "_isFastLaunch": false,
 536 |     "category": "General purpose",
 537 |     "gpuNum": 0,
 538 |     "hideHardwareSpecs": false,
 539 |     "memoryGiB": 64,
 540 |     "name": "ml.m5.4xlarge",
 541 |     "vcpuNum": 16
 542 |    },
 543 |    {
 544 |     "_defaultOrder": 8,
 545 |     "_isFastLaunch": false,
 546 |     "category": "General purpose",
 547 |     "gpuNum": 0,
 548 |     "hideHardwareSpecs": false,
 549 |     "memoryGiB": 128,
 550 |     "name": "ml.m5.8xlarge",
 551 |     "vcpuNum": 32
 552 |    },
 553 |    {
 554 |     "_defaultOrder": 9,
 555 |     "_isFastLaunch": false,
 556 |     "category": "General purpose",
 557 |     "gpuNum": 0,
 558 |     "hideHardwareSpecs": false,
 559 |     "memoryGiB": 192,
 560 |     "name": "ml.m5.12xlarge",
 561 |     "vcpuNum": 48
 562 |    },
 563 |    {
 564 |     "_defaultOrder": 10,
 565 |     "_isFastLaunch": false,
 566 |     "category": "General purpose",
 567 |     "gpuNum": 0,
 568 |     "hideHardwareSpecs": false,
 569 |     "memoryGiB": 256,
 570 |     "name": "ml.m5.16xlarge",
 571 |     "vcpuNum": 64
 572 |    },
 573 |    {
 574 |     "_defaultOrder": 11,
 575 |     "_isFastLaunch": false,
 576 |     "category": "General purpose",
 577 |     "gpuNum": 0,
 578 |     "hideHardwareSpecs": false,
 579 |     "memoryGiB": 384,
 580 |     "name": "ml.m5.24xlarge",
 581 |     "vcpuNum": 96
 582 |    },
 583 |    {
 584 |     "_defaultOrder": 12,
 585 |     "_isFastLaunch": false,
 586 |     "category": "General purpose",
 587 |     "gpuNum": 0,
 588 |     "hideHardwareSpecs": false,
 589 |     "memoryGiB": 8,
 590 |     "name": "ml.m5d.large",
 591 |     "vcpuNum": 2
 592 |    },
 593 |    {
 594 |     "_defaultOrder": 13,
 595 |     "_isFastLaunch": false,
 596 |     "category": "General purpose",
 597 |     "gpuNum": 0,
 598 |     "hideHardwareSpecs": false,
 599 |     "memoryGiB": 16,
 600 |     "name": "ml.m5d.xlarge",
 601 |     "vcpuNum": 4
 602 |    },
 603 |    {
 604 |     "_defaultOrder": 14,
 605 |     "_isFastLaunch": false,
 606 |     "category": "General purpose",
 607 |     "gpuNum": 0,
 608 |     "hideHardwareSpecs": false,
 609 |     "memoryGiB": 32,
 610 |     "name": "ml.m5d.2xlarge",
 611 |     "vcpuNum": 8
 612 |    },
 613 |    {
 614 |     "_defaultOrder": 15,
 615 |     "_isFastLaunch": false,
 616 |     "category": "General purpose",
 617 |     "gpuNum": 0,
 618 |     "hideHardwareSpecs": false,
 619 |     "memoryGiB": 64,
 620 |     "name": "ml.m5d.4xlarge",
 621 |     "vcpuNum": 16
 622 |    },
 623 |    {
 624 |     "_defaultOrder": 16,
 625 |     "_isFastLaunch": false,
 626 |     "category": "General purpose",
 627 |     "gpuNum": 0,
 628 |     "hideHardwareSpecs": false,
 629 |     "memoryGiB": 128,
 630 |     "name": "ml.m5d.8xlarge",
 631 |     "vcpuNum": 32
 632 |    },
 633 |    {
 634 |     "_defaultOrder": 17,
 635 |     "_isFastLaunch": false,
 636 |     "category": "General purpose",
 637 |     "gpuNum": 0,
 638 |     "hideHardwareSpecs": false,
 639 |     "memoryGiB": 192,
 640 |     "name": "ml.m5d.12xlarge",
 641 |     "vcpuNum": 48
 642 |    },
 643 |    {
 644 |     "_defaultOrder": 18,
 645 |     "_isFastLaunch": false,
 646 |     "category": "General purpose",
 647 |     "gpuNum": 0,
 648 |     "hideHardwareSpecs": false,
 649 |     "memoryGiB": 256,
 650 |     "name": "ml.m5d.16xlarge",
 651 |     "vcpuNum": 64
 652 |    },
 653 |    {
 654 |     "_defaultOrder": 19,
 655 |     "_isFastLaunch": false,
 656 |     "category": "General purpose",
 657 |     "gpuNum": 0,
 658 |     "hideHardwareSpecs": false,
 659 |     "memoryGiB": 384,
 660 |     "name": "ml.m5d.24xlarge",
 661 |     "vcpuNum": 96
 662 |    },
 663 |    {
 664 |     "_defaultOrder": 20,
 665 |     "_isFastLaunch": false,
 666 |     "category": "General purpose",
 667 |     "gpuNum": 0,
 668 |     "hideHardwareSpecs": true,
 669 |     "memoryGiB": 0,
 670 |     "name": "ml.geospatial.interactive",
 671 |     "supportedImageNames": [
 672 |      "sagemaker-geospatial-v1-0"
 673 |     ],
 674 |     "vcpuNum": 0
 675 |    },
 676 |    {
 677 |     "_defaultOrder": 21,
 678 |     "_isFastLaunch": true,
 679 |     "category": "Compute optimized",
 680 |     "gpuNum": 0,
 681 |     "hideHardwareSpecs": false,
 682 |     "memoryGiB": 4,
 683 |     "name": "ml.c5.large",
 684 |     "vcpuNum": 2
 685 |    },
 686 |    {
 687 |     "_defaultOrder": 22,
 688 |     "_isFastLaunch": false,
 689 |     "category": "Compute optimized",
 690 |     "gpuNum": 0,
 691 |     "hideHardwareSpecs": false,
 692 |     "memoryGiB": 8,
 693 |     "name": "ml.c5.xlarge",
 694 |     "vcpuNum": 4
 695 |    },
 696 |    {
 697 |     "_defaultOrder": 23,
 698 |     "_isFastLaunch": false,
 699 |     "category": "Compute optimized",
 700 |     "gpuNum": 0,
 701 |     "hideHardwareSpecs": false,
 702 |     "memoryGiB": 16,
 703 |     "name": "ml.c5.2xlarge",
 704 |     "vcpuNum": 8
 705 |    },
 706 |    {
 707 |     "_defaultOrder": 24,
 708 |     "_isFastLaunch": false,
 709 |     "category": "Compute optimized",
 710 |     "gpuNum": 0,
 711 |     "hideHardwareSpecs": false,
 712 |     "memoryGiB": 32,
 713 |     "name": "ml.c5.4xlarge",
 714 |     "vcpuNum": 16
 715 |    },
 716 |    {
 717 |     "_defaultOrder": 25,
 718 |     "_isFastLaunch": false,
 719 |     "category": "Compute optimized",
 720 |     "gpuNum": 0,
 721 |     "hideHardwareSpecs": false,
 722 |     "memoryGiB": 72,
 723 |     "name": "ml.c5.9xlarge",
 724 |     "vcpuNum": 36
 725 |    },
 726 |    {
 727 |     "_defaultOrder": 26,
 728 |     "_isFastLaunch": false,
 729 |     "category": "Compute optimized",
 730 |     "gpuNum": 0,
 731 |     "hideHardwareSpecs": false,
 732 |     "memoryGiB": 96,
 733 |     "name": "ml.c5.12xlarge",
 734 |     "vcpuNum": 48
 735 |    },
 736 |    {
 737 |     "_defaultOrder": 27,
 738 |     "_isFastLaunch": false,
 739 |     "category": "Compute optimized",
 740 |     "gpuNum": 0,
 741 |     "hideHardwareSpecs": false,
 742 |     "memoryGiB": 144,
 743 |     "name": "ml.c5.18xlarge",
 744 |     "vcpuNum": 72
 745 |    },
 746 |    {
 747 |     "_defaultOrder": 28,
 748 |     "_isFastLaunch": false,
 749 |     "category": "Compute optimized",
 750 |     "gpuNum": 0,
 751 |     "hideHardwareSpecs": false,
 752 |     "memoryGiB": 192,
 753 |     "name": "ml.c5.24xlarge",
 754 |     "vcpuNum": 96
 755 |    },
 756 |    {
 757 |     "_defaultOrder": 29,
 758 |     "_isFastLaunch": true,
 759 |     "category": "Accelerated computing",
 760 |     "gpuNum": 1,
 761 |     "hideHardwareSpecs": false,
 762 |     "memoryGiB": 16,
 763 |     "name": "ml.g4dn.xlarge",
 764 |     "vcpuNum": 4
 765 |    },
 766 |    {
 767 |     "_defaultOrder": 30,
 768 |     "_isFastLaunch": false,
 769 |     "category": "Accelerated computing",
 770 |     "gpuNum": 1,
 771 |     "hideHardwareSpecs": false,
 772 |     "memoryGiB": 32,
 773 |     "name": "ml.g4dn.2xlarge",
 774 |     "vcpuNum": 8
 775 |    },
 776 |    {
 777 |     "_defaultOrder": 31,
 778 |     "_isFastLaunch": false,
 779 |     "category": "Accelerated computing",
 780 |     "gpuNum": 1,
 781 |     "hideHardwareSpecs": false,
 782 |     "memoryGiB": 64,
 783 |     "name": "ml.g4dn.4xlarge",
 784 |     "vcpuNum": 16
 785 |    },
 786 |    {
 787 |     "_defaultOrder": 32,
 788 |     "_isFastLaunch": false,
 789 |     "category": "Accelerated computing",
 790 |     "gpuNum": 1,
 791 |     "hideHardwareSpecs": false,
 792 |     "memoryGiB": 128,
 793 |     "name": "ml.g4dn.8xlarge",
 794 |     "vcpuNum": 32
 795 |    },
 796 |    {
 797 |     "_defaultOrder": 33,
 798 |     "_isFastLaunch": false,
 799 |     "category": "Accelerated computing",
 800 |     "gpuNum": 4,
 801 |     "hideHardwareSpecs": false,
 802 |     "memoryGiB": 192,
 803 |     "name": "ml.g4dn.12xlarge",
 804 |     "vcpuNum": 48
 805 |    },
 806 |    {
 807 |     "_defaultOrder": 34,
 808 |     "_isFastLaunch": false,
 809 |     "category": "Accelerated computing",
 810 |     "gpuNum": 1,
 811 |     "hideHardwareSpecs": false,
 812 |     "memoryGiB": 256,
 813 |     "name": "ml.g4dn.16xlarge",
 814 |     "vcpuNum": 64
 815 |    },
 816 |    {
 817 |     "_defaultOrder": 35,
 818 |     "_isFastLaunch": false,
 819 |     "category": "Accelerated computing",
 820 |     "gpuNum": 1,
 821 |     "hideHardwareSpecs": false,
 822 |     "memoryGiB": 61,
 823 |     "name": "ml.p3.2xlarge",
 824 |     "vcpuNum": 8
 825 |    },
 826 |    {
 827 |     "_defaultOrder": 36,
 828 |     "_isFastLaunch": false,
 829 |     "category": "Accelerated computing",
 830 |     "gpuNum": 4,
 831 |     "hideHardwareSpecs": false,
 832 |     "memoryGiB": 244,
 833 |     "name": "ml.p3.8xlarge",
 834 |     "vcpuNum": 32
 835 |    },
 836 |    {
 837 |     "_defaultOrder": 37,
 838 |     "_isFastLaunch": false,
 839 |     "category": "Accelerated computing",
 840 |     "gpuNum": 8,
 841 |     "hideHardwareSpecs": false,
 842 |     "memoryGiB": 488,
 843 |     "name": "ml.p3.16xlarge",
 844 |     "vcpuNum": 64
 845 |    },
 846 |    {
 847 |     "_defaultOrder": 38,
 848 |     "_isFastLaunch": false,
 849 |     "category": "Accelerated computing",
 850 |     "gpuNum": 8,
 851 |     "hideHardwareSpecs": false,
 852 |     "memoryGiB": 768,
 853 |     "name": "ml.p3dn.24xlarge",
 854 |     "vcpuNum": 96
 855 |    },
 856 |    {
 857 |     "_defaultOrder": 39,
 858 |     "_isFastLaunch": false,
 859 |     "category": "Memory Optimized",
 860 |     "gpuNum": 0,
 861 |     "hideHardwareSpecs": false,
 862 |     "memoryGiB": 16,
 863 |     "name": "ml.r5.large",
 864 |     "vcpuNum": 2
 865 |    },
 866 |    {
 867 |     "_defaultOrder": 40,
 868 |     "_isFastLaunch": false,
 869 |     "category": "Memory Optimized",
 870 |     "gpuNum": 0,
 871 |     "hideHardwareSpecs": false,
 872 |     "memoryGiB": 32,
 873 |     "name": "ml.r5.xlarge",
 874 |     "vcpuNum": 4
 875 |    },
 876 |    {
 877 |     "_defaultOrder": 41,
 878 |     "_isFastLaunch": false,
 879 |     "category": "Memory Optimized",
 880 |     "gpuNum": 0,
 881 |     "hideHardwareSpecs": false,
 882 |     "memoryGiB": 64,
 883 |     "name": "ml.r5.2xlarge",
 884 |     "vcpuNum": 8
 885 |    },
 886 |    {
 887 |     "_defaultOrder": 42,
 888 |     "_isFastLaunch": false,
 889 |     "category": "Memory Optimized",
 890 |     "gpuNum": 0,
 891 |     "hideHardwareSpecs": false,
 892 |     "memoryGiB": 128,
 893 |     "name": "ml.r5.4xlarge",
 894 |     "vcpuNum": 16
 895 |    },
 896 |    {
 897 |     "_defaultOrder": 43,
 898 |     "_isFastLaunch": false,
 899 |     "category": "Memory Optimized",
 900 |     "gpuNum": 0,
 901 |     "hideHardwareSpecs": false,
 902 |     "memoryGiB": 256,
 903 |     "name": "ml.r5.8xlarge",
 904 |     "vcpuNum": 32
 905 |    },
 906 |    {
 907 |     "_defaultOrder": 44,
 908 |     "_isFastLaunch": false,
 909 |     "category": "Memory Optimized",
 910 |     "gpuNum": 0,
 911 |     "hideHardwareSpecs": false,
 912 |     "memoryGiB": 384,
 913 |     "name": "ml.r5.12xlarge",
 914 |     "vcpuNum": 48
 915 |    },
 916 |    {
 917 |     "_defaultOrder": 45,
 918 |     "_isFastLaunch": false,
 919 |     "category": "Memory Optimized",
 920 |     "gpuNum": 0,
 921 |     "hideHardwareSpecs": false,
 922 |     "memoryGiB": 512,
 923 |     "name": "ml.r5.16xlarge",
 924 |     "vcpuNum": 64
 925 |    },
 926 |    {
 927 |     "_defaultOrder": 46,
 928 |     "_isFastLaunch": false,
 929 |     "category": "Memory Optimized",
 930 |     "gpuNum": 0,
 931 |     "hideHardwareSpecs": false,
 932 |     "memoryGiB": 768,
 933 |     "name": "ml.r5.24xlarge",
 934 |     "vcpuNum": 96
 935 |    },
 936 |    {
 937 |     "_defaultOrder": 47,
 938 |     "_isFastLaunch": false,
 939 |     "category": "Accelerated computing",
 940 |     "gpuNum": 1,
 941 |     "hideHardwareSpecs": false,
 942 |     "memoryGiB": 16,
 943 |     "name": "ml.g5.xlarge",
 944 |     "vcpuNum": 4
 945 |    },
 946 |    {
 947 |     "_defaultOrder": 48,
 948 |     "_isFastLaunch": false,
 949 |     "category": "Accelerated computing",
 950 |     "gpuNum": 1,
 951 |     "hideHardwareSpecs": false,
 952 |     "memoryGiB": 32,
 953 |     "name": "ml.g5.2xlarge",
 954 |     "vcpuNum": 8
 955 |    },
 956 |    {
 957 |     "_defaultOrder": 49,
 958 |     "_isFastLaunch": false,
 959 |     "category": "Accelerated computing",
 960 |     "gpuNum": 1,
 961 |     "hideHardwareSpecs": false,
 962 |     "memoryGiB": 64,
 963 |     "name": "ml.g5.4xlarge",
 964 |     "vcpuNum": 16
 965 |    },
 966 |    {
 967 |     "_defaultOrder": 50,
 968 |     "_isFastLaunch": false,
 969 |     "category": "Accelerated computing",
 970 |     "gpuNum": 1,
 971 |     "hideHardwareSpecs": false,
 972 |     "memoryGiB": 128,
 973 |     "name": "ml.g5.8xlarge",
 974 |     "vcpuNum": 32
 975 |    },
 976 |    {
 977 |     "_defaultOrder": 51,
 978 |     "_isFastLaunch": false,
 979 |     "category": "Accelerated computing",
 980 |     "gpuNum": 1,
 981 |     "hideHardwareSpecs": false,
 982 |     "memoryGiB": 256,
 983 |     "name": "ml.g5.16xlarge",
 984 |     "vcpuNum": 64
 985 |    },
 986 |    {
 987 |     "_defaultOrder": 52,
 988 |     "_isFastLaunch": false,
 989 |     "category": "Accelerated computing",
 990 |     "gpuNum": 4,
 991 |     "hideHardwareSpecs": false,
 992 |     "memoryGiB": 192,
 993 |     "name": "ml.g5.12xlarge",
 994 |     "vcpuNum": 48
 995 |    },
 996 |    {
 997 |     "_defaultOrder": 53,
 998 |     "_isFastLaunch": false,
 999 |     "category": "Accelerated computing",
1000 |     "gpuNum": 4,
1001 |     "hideHardwareSpecs": false,
1002 |     "memoryGiB": 384,
1003 |     "name": "ml.g5.24xlarge",
1004 |     "vcpuNum": 96
1005 |    },
1006 |    {
1007 |     "_defaultOrder": 54,
1008 |     "_isFastLaunch": false,
1009 |     "category": "Accelerated computing",
1010 |     "gpuNum": 8,
1011 |     "hideHardwareSpecs": false,
1012 |     "memoryGiB": 768,
1013 |     "name": "ml.g5.48xlarge",
1014 |     "vcpuNum": 192
1015 |    },
1016 |    {
1017 |     "_defaultOrder": 55,
1018 |     "_isFastLaunch": false,
1019 |     "category": "Accelerated computing",
1020 |     "gpuNum": 8,
1021 |     "hideHardwareSpecs": false,
1022 |     "memoryGiB": 1152,
1023 |     "name": "ml.p4d.24xlarge",
1024 |     "vcpuNum": 96
1025 |    },
1026 |    {
1027 |     "_defaultOrder": 56,
1028 |     "_isFastLaunch": false,
1029 |     "category": "Accelerated computing",
1030 |     "gpuNum": 8,
1031 |     "hideHardwareSpecs": false,
1032 |     "memoryGiB": 1152,
1033 |     "name": "ml.p4de.24xlarge",
1034 |     "vcpuNum": 96
1035 |    }
1036 |   ],
1037 |   "instance_type": "ml.t3.medium",
1038 |   "kernelspec": {
1039 |    "display_name": "Python 3 (Data Science 3.0)",
1040 |    "language": "python",
1041 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1"
1042 |   },
1043 |   "language_info": {
1044 |    "codemirror_mode": {
1045 |     "name": "ipython",
1046 |     "version": 3
1047 |    },
1048 |    "file_extension": ".py",
1049 |    "mimetype": "text/x-python",
1050 |    "name": "python",
1051 |    "nbconvert_exporter": "python",
1052 |    "pygments_lexer": "ipython3",
1053 |    "version": "3.10.6"
1054 |   },
1055 |   "pycharm": {
1056 |    "stem_cell": {
1057 |     "cell_type": "raw",
1058 |     "metadata": {
1059 |      "collapsed": false
1060 |     },
1061 |     "source": []
1062 |    }
1063 |   }
1064 |  },
1065 |  "nbformat": 4,
1066 |  "nbformat_minor": 4
1067 | }
1068 | 


--------------------------------------------------------------------------------
/lab5/code-llama.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {
   6 |     "tags": []
   7 |    },
   8 |    "source": [
   9 |     "# SageMaker Code Generation with Code Llama: Deploying Pre trained Code Llama"
  10 |    ]
  11 |   },
  12 |   {
  13 |    "cell_type": "markdown",
  14 |    "metadata": {},
  15 |    "source": [
  16 |     "#### Importing sys and other important libraries: Lanchain, Chromadb as our vectordb to store indexes and boto3 for our environment"
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "code",
  21 |    "execution_count": null,
  22 |    "metadata": {
  23 |     "tags": []
  24 |    },
  25 |    "outputs": [],
  26 |    "source": [
  27 |     "import sys\n",
  28 |     "!{sys.executable} -m pip install langchain\n",
  29 |     "!{sys.executable} -m pip install chromadb\n",
  30 |     "!{sys.executable} -m pip install --upgrade boto3"
  31 |    ]
  32 |   },
  33 |   {
  34 |    "cell_type": "markdown",
  35 |    "metadata": {},
  36 |    "source": [
  37 |     "#### Import other libraries and document loaders as well as libraries like the recursive character splitting to be able to efficiently generate code through our model"
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "code",
  42 |    "execution_count": null,
  43 |    "metadata": {
  44 |     "collapsed": false,
  45 |     "jupyter": {
  46 |      "outputs_hidden": false
  47 |     },
  48 |     "tags": []
  49 |    },
  50 |    "outputs": [],
  51 |    "source": [
  52 |     "import argparse\n",
  53 |     "import os\n",
  54 |     "from langchain.document_loaders import DirectoryLoader\n",
  55 |     "import chromadb\n",
  56 |     "import json\n",
  57 |     "import boto3\n",
  58 |     "import time\n",
  59 |     "import glob\n",
  60 |     "from langchain.text_splitter import (\n",
  61 |     "    RecursiveCharacterTextSplitter,\n",
  62 |     "    Language,\n",
  63 |     ")\n",
  64 |     "import ast\n",
  65 |     "import sys"
  66 |    ]
  67 |   },
  68 |   {
  69 |    "cell_type": "markdown",
  70 |    "metadata": {
  71 |     "tags": []
  72 |    },
  73 |    "source": [
  74 |     "### Deploy the code Llama 7b model\n"
  75 |    ]
  76 |   },
  77 |   {
  78 |    "cell_type": "code",
  79 |    "execution_count": null,
  80 |    "metadata": {
  81 |     "collapsed": false,
  82 |     "jupyter": {
  83 |      "outputs_hidden": false
  84 |     },
  85 |     "tags": []
  86 |    },
  87 |    "outputs": [],
  88 |    "source": [
  89 |     "model_id = \"meta-textgeneration-llama-codellama-7b\"\n",
  90 |     "\n",
  91 |     "from sagemaker.jumpstart.model import JumpStartModel\n",
  92 |     "\n",
  93 |     "model = JumpStartModel(model_id=model_id)\n",
  94 |     "predictor = model.deploy()"
  95 |    ]
  96 |   },
  97 |   {
  98 |    "cell_type": "code",
  99 |    "execution_count": null,
 100 |    "metadata": {
 101 |     "tags": []
 102 |    },
 103 |    "outputs": [],
 104 |    "source": [
 105 |     "# Get the name of the endpoint\n",
 106 |     "endpoint_name = str(predictor.endpoint)\n",
 107 |     "\n",
 108 |     "print(endpoint_name)"
 109 |    ]
 110 |   },
 111 |   {
 112 |    "cell_type": "code",
 113 |    "execution_count": null,
 114 |    "metadata": {
 115 |     "tags": []
 116 |    },
 117 |    "outputs": [],
 118 |    "source": [
 119 |     "def query_endpoint(payload):\n",
 120 |     "    client = boto3.client('runtime.sagemaker')\n",
 121 |     "    response = client.invoke_endpoint(\n",
 122 |     "        EndpointName=endpoint_name,\n",
 123 |     "        ContentType='application/json',\n",
 124 |     "        Body=json.dumps(payload).encode('utf-8'),\n",
 125 |     "        CustomAttributes=\"accept_eula=true\",\n",
 126 |     "    )\n",
 127 |     "    response = response[\"Body\"].read().decode(\"utf8\")\n",
 128 |     "    response = json.loads(response)\n",
 129 |     "    return response"
 130 |    ]
 131 |   },
 132 |   {
 133 |    "cell_type": "markdown",
 134 |    "metadata": {},
 135 |    "source": [
 136 |     "### Supported parameters\n",
 137 |     "\n",
 138 |     "***\n",
 139 |     "This model supports many parameters while performing inference. They include:\n",
 140 |     "\n",
 141 |     "* **max_length:** Model generates text until the output length (which includes the input context length) reaches `max_length`. If specified, it must be a positive integer.\n",
 142 |     "* **max_new_tokens:** Model generates text until the output length (excluding the input context length) reaches `max_new_tokens`. If specified, it must be a positive integer.\n",
 143 |     "* **num_beams:** Number of beams used in the greedy search. If specified, it must be integer greater than or equal to `num_return_sequences`.\n",
 144 |     "* **no_repeat_ngram_size:** Model ensures that a sequence of words of `no_repeat_ngram_size` is not repeated in the output sequence. If specified, it must be a positive integer greater than 1.\n",
 145 |     "* **temperature:** Controls the randomness in the output. Higher temperature results in output sequence with low-probability words and lower temperature results in output sequence with high-probability words. If `temperature` -> 0, it results in greedy decoding. If specified, it must be a positive float.\n",
 146 |     "* **early_stopping:** If True, text generation is finished when all beam hypotheses reach the end of sentence token. If specified, it must be boolean.\n",
 147 |     "* **do_sample:** If True, sample the next word as per the likelihood. If specified, it must be boolean.\n",
 148 |     "* **top_k:** In each step of text generation, sample from only the `top_k` most likely words. If specified, it must be a positive integer.\n",
 149 |     "* **top_p:** In each step of text generation, sample from the smallest possible set of words with cumulative probability `top_p`. If specified, it must be a float between 0 and 1.\n",
 150 |     "* **return_full_text:** If True, input text will be part of the output generated text. If specified, it must be boolean. The default value for it is False.\n",
 151 |     "* **stop**: If specified, it must a list of strings. Text generation stops if any one of the specified strings is generated.\n",
 152 |     "\n",
 153 |     "We may specify any subset of the parameters mentioned above while invoking an endpoint. Next, we show an example of how to invoke endpoint with these arguments.\n",
 154 |     "***"
 155 |    ]
 156 |   },
 157 |   {
 158 |    "cell_type": "markdown",
 159 |    "metadata": {
 160 |     "tags": []
 161 |    },
 162 |    "source": [
 163 |     "## Code completion without context\n",
 164 |     "***\n",
 165 |     "This section demonstrate how to perform code generation where the expected endpoint response is the natural continuation of the prompt. No context is provided to. As seen below the LLM hallucinates when providing the continuation of the code because it has not been trained on the library used to test\n",
 166 |     "***"
 167 |    ]
 168 |   },
 169 |   {
 170 |    "cell_type": "code",
 171 |    "execution_count": null,
 172 |    "metadata": {
 173 |     "tags": []
 174 |    },
 175 |    "outputs": [],
 176 |    "source": [
 177 |     "def print_completion(prompt: str, response: str) -> None:\n",
 178 |     "    bold, unbold = '\\033[1m', '\\033[0m'\n",
 179 |     "    print(f\"{bold}> Input{unbold}\\n{prompt}{bold}\\n> Output{unbold}\\n{response['generated_text']}\\n\")"
 180 |    ]
 181 |   },
 182 |   {
 183 |    "cell_type": "code",
 184 |    "execution_count": null,
 185 |    "metadata": {
 186 |     "collapsed": false,
 187 |     "jupyter": {
 188 |      "outputs_hidden": false
 189 |     },
 190 |     "tags": []
 191 |    },
 192 |    "outputs": [],
 193 |    "source": [
 194 |     "%%time\n",
 195 |     "\n",
 196 |     "prompt = \"\"\"\\\n",
 197 |     "import sagemaker\n",
 198 |     "\n",
 199 |     "# Create an HTML page about Amazon SageMaker\n",
 200 |     "html_content = f'''\n",
 201 |     "\n",
 202 |     "\n",
 203 |     "\n",
 204 |     "    Amazon SageMaker\n",
 205 |     "\n",
 206 |     "\n",
 207 |     "    

Welcome to Amazon SageMaker

\n", 208 | "

Amazon SageMaker is a fully managed service for building, training, and deploying machine learning models.

\n", 209 | "

Key Features

\n", 210 | "
    \n", 211 | "
  • Easy to use
  • \n", 212 | "
  • Scalable
  • \n", 213 | "
  • End-to-end machine learning workflow
  • \n", 214 | "
\n", 215 | "

Get started with SageMaker today and unlock the power of machine learning!

\n", 216 | "\n", 217 | "\n", 218 | "'''\n", 219 | "\n", 220 | "html_content\n", 221 | "\"\"\"\n", 222 | "\n", 223 | "payload = {\"inputs\": prompt, \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.2, \"top_p\": 0.9}}\n", 224 | "response = query_endpoint(payload)\n", 225 | "print_completion(prompt, response)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "# Code completion\n", 233 | "***\n", 234 | "The examples in this section demonstrate how to perform code generation where the expected endpoint response is the natural continuation of the prompt.\n", 235 | "***" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": { 242 | "tags": [] 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "def print_completion(prompt: str, response: str) -> None:\n", 247 | " bold, unbold = '\\033[1m', '\\033[0m'\n", 248 | " print(f\"{bold}> Input{unbold}\\n{prompt}{bold}\\n> Output{unbold}\\n{response['generated_text']}\\n\")" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": { 255 | "tags": [] 256 | }, 257 | "outputs": [], 258 | "source": [ 259 | "%%time\n", 260 | "\n", 261 | "prompt = \"\"\"\\\n", 262 | "import socket\n", 263 | "\n", 264 | "def ping_exponential_backoff(host: str):\\\n", 265 | "\"\"\"\n", 266 | "\n", 267 | "payload = {\"inputs\": prompt, \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.2, \"top_p\": 0.9}}\n", 268 | "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n", 269 | "print_completion(prompt, response)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": { 276 | "tags": [] 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "%%time\n", 281 | "\n", 282 | "prompt = \"\"\"\\\n", 283 | "import argparse\n", 284 | "\n", 285 | "def main(string: str):\n", 286 | " print(string)\n", 287 | " print(string[::-1])\n", 288 | "\n", 289 | "if __name__ == \"__main__\":\\\n", 290 | "\"\"\"\n", 291 | "\n", 292 | "payload = {\"inputs\": prompt, \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.2, \"top_p\": 0.9}}\n", 293 | "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n", 294 | "print_completion(prompt, response)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": {}, 300 | "source": [ 301 | "## Code infilling\n", 302 | "***\n", 303 | "The examples in this section demonstrate how to perform code generation where the expected endpoint response infills text between a prefix and a suffix. Only 7B, 7B-Instruct, 13B, and 13B-Instruct models have this capability, while the non-instruct models have been observed to obtain the best anecdotal performance.\n", 304 | "***" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": { 311 | "tags": [] 312 | }, 313 | "outputs": [], 314 | "source": [ 315 | "def format_infilling(prompt: str) -> str:\n", 316 | " prefix, suffix = prompt.split(\"\")\n", 317 | " return f\"
 {prefix} {suffix} \"\n",
 318 |     "\n",
 319 |     "\n",
 320 |     "def print_infilling(prompt: str, response: str) -> str:\n",
 321 |     "    green, font_reset = \"\\x1b[38;5;2m\", \"\\x1b[0m\"\n",
 322 |     "    prefix, suffix = prompt.split(\"\")\n",
 323 |     "    print(f\"{prefix}{green}{response['generated_text']}{font_reset}{suffix}\")"
 324 |    ]
 325 |   },
 326 |   {
 327 |    "cell_type": "code",
 328 |    "execution_count": null,
 329 |    "metadata": {
 330 |     "tags": []
 331 |    },
 332 |    "outputs": [],
 333 |    "source": [
 334 |     "%%time\n",
 335 |     "\n",
 336 |     "prompt = '''\\\n",
 337 |     "def remove_non_ascii(s: str) -> str:\n",
 338 |     "    \"\"\"\n",
 339 |     "    return result\n",
 340 |     "'''\n",
 341 |     "prompt_formatted = format_infilling(prompt)\n",
 342 |     "payload = {\n",
 343 |     "    \"inputs\": prompt_formatted,\n",
 344 |     "    \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.05, \"top_p\": 0.9}\n",
 345 |     "}\n",
 346 |     "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n",
 347 |     "print_infilling(prompt, response)"
 348 |    ]
 349 |   },
 350 |   {
 351 |    "cell_type": "code",
 352 |    "execution_count": null,
 353 |    "metadata": {
 354 |     "tags": []
 355 |    },
 356 |    "outputs": [],
 357 |    "source": [
 358 |     "%%time\n",
 359 |     "\n",
 360 |     "prompt = \"\"\"\\\n",
 361 |     "# Installation instructions:\n",
 362 |     "    ```bash\n",
 363 |     "\n",
 364 |     "    ```\n",
 365 |     "This downloads the LLaMA inference code and installs the repository as a local pip package.\n",
 366 |     "\"\"\"\n",
 367 |     "prompt_formatted = format_infilling(prompt)\n",
 368 |     "payload = {\n",
 369 |     "    \"inputs\": prompt_formatted,\n",
 370 |     "    \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.05, \"top_p\": 0.9}\n",
 371 |     "}\n",
 372 |     "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n",
 373 |     "print_infilling(prompt, response)"
 374 |    ]
 375 |   },
 376 |   {
 377 |    "cell_type": "code",
 378 |    "execution_count": null,
 379 |    "metadata": {
 380 |     "tags": []
 381 |    },
 382 |    "outputs": [],
 383 |    "source": [
 384 |     "%%time\n",
 385 |     "\n",
 386 |     "prompt = \"\"\"\\\n",
 387 |     "class InterfaceManagerFactory(AbstractManagerFactory):\n",
 388 |     "    def __init__(\n",
 389 |     "def main():\n",
 390 |     "    factory = InterfaceManagerFactory(start=datetime.now())\n",
 391 |     "    managers = []\n",
 392 |     "    for i in range(10):\n",
 393 |     "        managers.append(factory.build(id=i))\n",
 394 |     "\"\"\"\n",
 395 |     "prompt_formatted = format_infilling(prompt)\n",
 396 |     "payload = {\n",
 397 |     "    \"inputs\": prompt_formatted,\n",
 398 |     "    \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.05, \"top_p\": 0.9}\n",
 399 |     "}\n",
 400 |     "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n",
 401 |     "print_infilling(prompt, response)"
 402 |    ]
 403 |   },
 404 |   {
 405 |    "cell_type": "code",
 406 |    "execution_count": null,
 407 |    "metadata": {
 408 |     "tags": []
 409 |    },
 410 |    "outputs": [],
 411 |    "source": [
 412 |     "%%time\n",
 413 |     "\n",
 414 |     "prompt = \"\"\"\\\n",
 415 |     "/-- A quasi-prefunctoid is 1-connected iff all its etalisations are 1-connected. -/\n",
 416 |     "theorem connected_iff_etalisation [C D : precategoroid] (P : quasi_prefunctoid C D) :\n",
 417 |     "  π₁ P = 0 ↔  = 0 :=\n",
 418 |     "begin\n",
 419 |     "  split,\n",
 420 |     "  { intros h f,\n",
 421 |     "    rw pi_1_etalisation at h,\n",
 422 |     "    simp [h],\n",
 423 |     "    refl\n",
 424 |     "  },\n",
 425 |     "  { intro h,\n",
 426 |     "    have := @quasi_adjoint C D P,\n",
 427 |     "    simp [←pi_1_etalisation, this, h],\n",
 428 |     "    refl\n",
 429 |     "  }\n",
 430 |     "end\n",
 431 |     "\"\"\"\n",
 432 |     "prompt_formatted = format_infilling(prompt)\n",
 433 |     "payload = {\n",
 434 |     "    \"inputs\": prompt_formatted,\n",
 435 |     "    \"parameters\": {\"max_new_tokens\": 256, \"temperature\": 0.05, \"top_p\": 0.9}\n",
 436 |     "}\n",
 437 |     "response = predictor.predict(payload, custom_attributes='accept_eula=true')\n",
 438 |     "print_infilling(prompt, response)"
 439 |    ]
 440 |   },
 441 |   {
 442 |    "cell_type": "markdown",
 443 |    "metadata": {},
 444 |    "source": [
 445 |     "## Clean up the endpoint\n",
 446 |     "If you are running the next lab on customizing Code Llama model then do not delete the endpoint. Otherwise go ahead and delete the endpoint by running the next cell."
 447 |    ]
 448 |   },
 449 |   {
 450 |    "cell_type": "code",
 451 |    "execution_count": null,
 452 |    "metadata": {
 453 |     "tags": []
 454 |    },
 455 |    "outputs": [],
 456 |    "source": [
 457 |     "predictor.delete_endpoint()"
 458 |    ]
 459 |   },
 460 |   {
 461 |    "cell_type": "code",
 462 |    "execution_count": null,
 463 |    "metadata": {},
 464 |    "outputs": [],
 465 |    "source": []
 466 |   }
 467 |  ],
 468 |  "metadata": {
 469 |   "availableInstances": [
 470 |    {
 471 |     "_defaultOrder": 0,
 472 |     "_isFastLaunch": true,
 473 |     "category": "General purpose",
 474 |     "gpuNum": 0,
 475 |     "hideHardwareSpecs": false,
 476 |     "memoryGiB": 4,
 477 |     "name": "ml.t3.medium",
 478 |     "vcpuNum": 2
 479 |    },
 480 |    {
 481 |     "_defaultOrder": 1,
 482 |     "_isFastLaunch": false,
 483 |     "category": "General purpose",
 484 |     "gpuNum": 0,
 485 |     "hideHardwareSpecs": false,
 486 |     "memoryGiB": 8,
 487 |     "name": "ml.t3.large",
 488 |     "vcpuNum": 2
 489 |    },
 490 |    {
 491 |     "_defaultOrder": 2,
 492 |     "_isFastLaunch": false,
 493 |     "category": "General purpose",
 494 |     "gpuNum": 0,
 495 |     "hideHardwareSpecs": false,
 496 |     "memoryGiB": 16,
 497 |     "name": "ml.t3.xlarge",
 498 |     "vcpuNum": 4
 499 |    },
 500 |    {
 501 |     "_defaultOrder": 3,
 502 |     "_isFastLaunch": false,
 503 |     "category": "General purpose",
 504 |     "gpuNum": 0,
 505 |     "hideHardwareSpecs": false,
 506 |     "memoryGiB": 32,
 507 |     "name": "ml.t3.2xlarge",
 508 |     "vcpuNum": 8
 509 |    },
 510 |    {
 511 |     "_defaultOrder": 4,
 512 |     "_isFastLaunch": true,
 513 |     "category": "General purpose",
 514 |     "gpuNum": 0,
 515 |     "hideHardwareSpecs": false,
 516 |     "memoryGiB": 8,
 517 |     "name": "ml.m5.large",
 518 |     "vcpuNum": 2
 519 |    },
 520 |    {
 521 |     "_defaultOrder": 5,
 522 |     "_isFastLaunch": false,
 523 |     "category": "General purpose",
 524 |     "gpuNum": 0,
 525 |     "hideHardwareSpecs": false,
 526 |     "memoryGiB": 16,
 527 |     "name": "ml.m5.xlarge",
 528 |     "vcpuNum": 4
 529 |    },
 530 |    {
 531 |     "_defaultOrder": 6,
 532 |     "_isFastLaunch": false,
 533 |     "category": "General purpose",
 534 |     "gpuNum": 0,
 535 |     "hideHardwareSpecs": false,
 536 |     "memoryGiB": 32,
 537 |     "name": "ml.m5.2xlarge",
 538 |     "vcpuNum": 8
 539 |    },
 540 |    {
 541 |     "_defaultOrder": 7,
 542 |     "_isFastLaunch": false,
 543 |     "category": "General purpose",
 544 |     "gpuNum": 0,
 545 |     "hideHardwareSpecs": false,
 546 |     "memoryGiB": 64,
 547 |     "name": "ml.m5.4xlarge",
 548 |     "vcpuNum": 16
 549 |    },
 550 |    {
 551 |     "_defaultOrder": 8,
 552 |     "_isFastLaunch": false,
 553 |     "category": "General purpose",
 554 |     "gpuNum": 0,
 555 |     "hideHardwareSpecs": false,
 556 |     "memoryGiB": 128,
 557 |     "name": "ml.m5.8xlarge",
 558 |     "vcpuNum": 32
 559 |    },
 560 |    {
 561 |     "_defaultOrder": 9,
 562 |     "_isFastLaunch": false,
 563 |     "category": "General purpose",
 564 |     "gpuNum": 0,
 565 |     "hideHardwareSpecs": false,
 566 |     "memoryGiB": 192,
 567 |     "name": "ml.m5.12xlarge",
 568 |     "vcpuNum": 48
 569 |    },
 570 |    {
 571 |     "_defaultOrder": 10,
 572 |     "_isFastLaunch": false,
 573 |     "category": "General purpose",
 574 |     "gpuNum": 0,
 575 |     "hideHardwareSpecs": false,
 576 |     "memoryGiB": 256,
 577 |     "name": "ml.m5.16xlarge",
 578 |     "vcpuNum": 64
 579 |    },
 580 |    {
 581 |     "_defaultOrder": 11,
 582 |     "_isFastLaunch": false,
 583 |     "category": "General purpose",
 584 |     "gpuNum": 0,
 585 |     "hideHardwareSpecs": false,
 586 |     "memoryGiB": 384,
 587 |     "name": "ml.m5.24xlarge",
 588 |     "vcpuNum": 96
 589 |    },
 590 |    {
 591 |     "_defaultOrder": 12,
 592 |     "_isFastLaunch": false,
 593 |     "category": "General purpose",
 594 |     "gpuNum": 0,
 595 |     "hideHardwareSpecs": false,
 596 |     "memoryGiB": 8,
 597 |     "name": "ml.m5d.large",
 598 |     "vcpuNum": 2
 599 |    },
 600 |    {
 601 |     "_defaultOrder": 13,
 602 |     "_isFastLaunch": false,
 603 |     "category": "General purpose",
 604 |     "gpuNum": 0,
 605 |     "hideHardwareSpecs": false,
 606 |     "memoryGiB": 16,
 607 |     "name": "ml.m5d.xlarge",
 608 |     "vcpuNum": 4
 609 |    },
 610 |    {
 611 |     "_defaultOrder": 14,
 612 |     "_isFastLaunch": false,
 613 |     "category": "General purpose",
 614 |     "gpuNum": 0,
 615 |     "hideHardwareSpecs": false,
 616 |     "memoryGiB": 32,
 617 |     "name": "ml.m5d.2xlarge",
 618 |     "vcpuNum": 8
 619 |    },
 620 |    {
 621 |     "_defaultOrder": 15,
 622 |     "_isFastLaunch": false,
 623 |     "category": "General purpose",
 624 |     "gpuNum": 0,
 625 |     "hideHardwareSpecs": false,
 626 |     "memoryGiB": 64,
 627 |     "name": "ml.m5d.4xlarge",
 628 |     "vcpuNum": 16
 629 |    },
 630 |    {
 631 |     "_defaultOrder": 16,
 632 |     "_isFastLaunch": false,
 633 |     "category": "General purpose",
 634 |     "gpuNum": 0,
 635 |     "hideHardwareSpecs": false,
 636 |     "memoryGiB": 128,
 637 |     "name": "ml.m5d.8xlarge",
 638 |     "vcpuNum": 32
 639 |    },
 640 |    {
 641 |     "_defaultOrder": 17,
 642 |     "_isFastLaunch": false,
 643 |     "category": "General purpose",
 644 |     "gpuNum": 0,
 645 |     "hideHardwareSpecs": false,
 646 |     "memoryGiB": 192,
 647 |     "name": "ml.m5d.12xlarge",
 648 |     "vcpuNum": 48
 649 |    },
 650 |    {
 651 |     "_defaultOrder": 18,
 652 |     "_isFastLaunch": false,
 653 |     "category": "General purpose",
 654 |     "gpuNum": 0,
 655 |     "hideHardwareSpecs": false,
 656 |     "memoryGiB": 256,
 657 |     "name": "ml.m5d.16xlarge",
 658 |     "vcpuNum": 64
 659 |    },
 660 |    {
 661 |     "_defaultOrder": 19,
 662 |     "_isFastLaunch": false,
 663 |     "category": "General purpose",
 664 |     "gpuNum": 0,
 665 |     "hideHardwareSpecs": false,
 666 |     "memoryGiB": 384,
 667 |     "name": "ml.m5d.24xlarge",
 668 |     "vcpuNum": 96
 669 |    },
 670 |    {
 671 |     "_defaultOrder": 20,
 672 |     "_isFastLaunch": false,
 673 |     "category": "General purpose",
 674 |     "gpuNum": 0,
 675 |     "hideHardwareSpecs": true,
 676 |     "memoryGiB": 0,
 677 |     "name": "ml.geospatial.interactive",
 678 |     "supportedImageNames": [
 679 |      "sagemaker-geospatial-v1-0"
 680 |     ],
 681 |     "vcpuNum": 0
 682 |    },
 683 |    {
 684 |     "_defaultOrder": 21,
 685 |     "_isFastLaunch": true,
 686 |     "category": "Compute optimized",
 687 |     "gpuNum": 0,
 688 |     "hideHardwareSpecs": false,
 689 |     "memoryGiB": 4,
 690 |     "name": "ml.c5.large",
 691 |     "vcpuNum": 2
 692 |    },
 693 |    {
 694 |     "_defaultOrder": 22,
 695 |     "_isFastLaunch": false,
 696 |     "category": "Compute optimized",
 697 |     "gpuNum": 0,
 698 |     "hideHardwareSpecs": false,
 699 |     "memoryGiB": 8,
 700 |     "name": "ml.c5.xlarge",
 701 |     "vcpuNum": 4
 702 |    },
 703 |    {
 704 |     "_defaultOrder": 23,
 705 |     "_isFastLaunch": false,
 706 |     "category": "Compute optimized",
 707 |     "gpuNum": 0,
 708 |     "hideHardwareSpecs": false,
 709 |     "memoryGiB": 16,
 710 |     "name": "ml.c5.2xlarge",
 711 |     "vcpuNum": 8
 712 |    },
 713 |    {
 714 |     "_defaultOrder": 24,
 715 |     "_isFastLaunch": false,
 716 |     "category": "Compute optimized",
 717 |     "gpuNum": 0,
 718 |     "hideHardwareSpecs": false,
 719 |     "memoryGiB": 32,
 720 |     "name": "ml.c5.4xlarge",
 721 |     "vcpuNum": 16
 722 |    },
 723 |    {
 724 |     "_defaultOrder": 25,
 725 |     "_isFastLaunch": false,
 726 |     "category": "Compute optimized",
 727 |     "gpuNum": 0,
 728 |     "hideHardwareSpecs": false,
 729 |     "memoryGiB": 72,
 730 |     "name": "ml.c5.9xlarge",
 731 |     "vcpuNum": 36
 732 |    },
 733 |    {
 734 |     "_defaultOrder": 26,
 735 |     "_isFastLaunch": false,
 736 |     "category": "Compute optimized",
 737 |     "gpuNum": 0,
 738 |     "hideHardwareSpecs": false,
 739 |     "memoryGiB": 96,
 740 |     "name": "ml.c5.12xlarge",
 741 |     "vcpuNum": 48
 742 |    },
 743 |    {
 744 |     "_defaultOrder": 27,
 745 |     "_isFastLaunch": false,
 746 |     "category": "Compute optimized",
 747 |     "gpuNum": 0,
 748 |     "hideHardwareSpecs": false,
 749 |     "memoryGiB": 144,
 750 |     "name": "ml.c5.18xlarge",
 751 |     "vcpuNum": 72
 752 |    },
 753 |    {
 754 |     "_defaultOrder": 28,
 755 |     "_isFastLaunch": false,
 756 |     "category": "Compute optimized",
 757 |     "gpuNum": 0,
 758 |     "hideHardwareSpecs": false,
 759 |     "memoryGiB": 192,
 760 |     "name": "ml.c5.24xlarge",
 761 |     "vcpuNum": 96
 762 |    },
 763 |    {
 764 |     "_defaultOrder": 29,
 765 |     "_isFastLaunch": true,
 766 |     "category": "Accelerated computing",
 767 |     "gpuNum": 1,
 768 |     "hideHardwareSpecs": false,
 769 |     "memoryGiB": 16,
 770 |     "name": "ml.g4dn.xlarge",
 771 |     "vcpuNum": 4
 772 |    },
 773 |    {
 774 |     "_defaultOrder": 30,
 775 |     "_isFastLaunch": false,
 776 |     "category": "Accelerated computing",
 777 |     "gpuNum": 1,
 778 |     "hideHardwareSpecs": false,
 779 |     "memoryGiB": 32,
 780 |     "name": "ml.g4dn.2xlarge",
 781 |     "vcpuNum": 8
 782 |    },
 783 |    {
 784 |     "_defaultOrder": 31,
 785 |     "_isFastLaunch": false,
 786 |     "category": "Accelerated computing",
 787 |     "gpuNum": 1,
 788 |     "hideHardwareSpecs": false,
 789 |     "memoryGiB": 64,
 790 |     "name": "ml.g4dn.4xlarge",
 791 |     "vcpuNum": 16
 792 |    },
 793 |    {
 794 |     "_defaultOrder": 32,
 795 |     "_isFastLaunch": false,
 796 |     "category": "Accelerated computing",
 797 |     "gpuNum": 1,
 798 |     "hideHardwareSpecs": false,
 799 |     "memoryGiB": 128,
 800 |     "name": "ml.g4dn.8xlarge",
 801 |     "vcpuNum": 32
 802 |    },
 803 |    {
 804 |     "_defaultOrder": 33,
 805 |     "_isFastLaunch": false,
 806 |     "category": "Accelerated computing",
 807 |     "gpuNum": 4,
 808 |     "hideHardwareSpecs": false,
 809 |     "memoryGiB": 192,
 810 |     "name": "ml.g4dn.12xlarge",
 811 |     "vcpuNum": 48
 812 |    },
 813 |    {
 814 |     "_defaultOrder": 34,
 815 |     "_isFastLaunch": false,
 816 |     "category": "Accelerated computing",
 817 |     "gpuNum": 1,
 818 |     "hideHardwareSpecs": false,
 819 |     "memoryGiB": 256,
 820 |     "name": "ml.g4dn.16xlarge",
 821 |     "vcpuNum": 64
 822 |    },
 823 |    {
 824 |     "_defaultOrder": 35,
 825 |     "_isFastLaunch": false,
 826 |     "category": "Accelerated computing",
 827 |     "gpuNum": 1,
 828 |     "hideHardwareSpecs": false,
 829 |     "memoryGiB": 61,
 830 |     "name": "ml.p3.2xlarge",
 831 |     "vcpuNum": 8
 832 |    },
 833 |    {
 834 |     "_defaultOrder": 36,
 835 |     "_isFastLaunch": false,
 836 |     "category": "Accelerated computing",
 837 |     "gpuNum": 4,
 838 |     "hideHardwareSpecs": false,
 839 |     "memoryGiB": 244,
 840 |     "name": "ml.p3.8xlarge",
 841 |     "vcpuNum": 32
 842 |    },
 843 |    {
 844 |     "_defaultOrder": 37,
 845 |     "_isFastLaunch": false,
 846 |     "category": "Accelerated computing",
 847 |     "gpuNum": 8,
 848 |     "hideHardwareSpecs": false,
 849 |     "memoryGiB": 488,
 850 |     "name": "ml.p3.16xlarge",
 851 |     "vcpuNum": 64
 852 |    },
 853 |    {
 854 |     "_defaultOrder": 38,
 855 |     "_isFastLaunch": false,
 856 |     "category": "Accelerated computing",
 857 |     "gpuNum": 8,
 858 |     "hideHardwareSpecs": false,
 859 |     "memoryGiB": 768,
 860 |     "name": "ml.p3dn.24xlarge",
 861 |     "vcpuNum": 96
 862 |    },
 863 |    {
 864 |     "_defaultOrder": 39,
 865 |     "_isFastLaunch": false,
 866 |     "category": "Memory Optimized",
 867 |     "gpuNum": 0,
 868 |     "hideHardwareSpecs": false,
 869 |     "memoryGiB": 16,
 870 |     "name": "ml.r5.large",
 871 |     "vcpuNum": 2
 872 |    },
 873 |    {
 874 |     "_defaultOrder": 40,
 875 |     "_isFastLaunch": false,
 876 |     "category": "Memory Optimized",
 877 |     "gpuNum": 0,
 878 |     "hideHardwareSpecs": false,
 879 |     "memoryGiB": 32,
 880 |     "name": "ml.r5.xlarge",
 881 |     "vcpuNum": 4
 882 |    },
 883 |    {
 884 |     "_defaultOrder": 41,
 885 |     "_isFastLaunch": false,
 886 |     "category": "Memory Optimized",
 887 |     "gpuNum": 0,
 888 |     "hideHardwareSpecs": false,
 889 |     "memoryGiB": 64,
 890 |     "name": "ml.r5.2xlarge",
 891 |     "vcpuNum": 8
 892 |    },
 893 |    {
 894 |     "_defaultOrder": 42,
 895 |     "_isFastLaunch": false,
 896 |     "category": "Memory Optimized",
 897 |     "gpuNum": 0,
 898 |     "hideHardwareSpecs": false,
 899 |     "memoryGiB": 128,
 900 |     "name": "ml.r5.4xlarge",
 901 |     "vcpuNum": 16
 902 |    },
 903 |    {
 904 |     "_defaultOrder": 43,
 905 |     "_isFastLaunch": false,
 906 |     "category": "Memory Optimized",
 907 |     "gpuNum": 0,
 908 |     "hideHardwareSpecs": false,
 909 |     "memoryGiB": 256,
 910 |     "name": "ml.r5.8xlarge",
 911 |     "vcpuNum": 32
 912 |    },
 913 |    {
 914 |     "_defaultOrder": 44,
 915 |     "_isFastLaunch": false,
 916 |     "category": "Memory Optimized",
 917 |     "gpuNum": 0,
 918 |     "hideHardwareSpecs": false,
 919 |     "memoryGiB": 384,
 920 |     "name": "ml.r5.12xlarge",
 921 |     "vcpuNum": 48
 922 |    },
 923 |    {
 924 |     "_defaultOrder": 45,
 925 |     "_isFastLaunch": false,
 926 |     "category": "Memory Optimized",
 927 |     "gpuNum": 0,
 928 |     "hideHardwareSpecs": false,
 929 |     "memoryGiB": 512,
 930 |     "name": "ml.r5.16xlarge",
 931 |     "vcpuNum": 64
 932 |    },
 933 |    {
 934 |     "_defaultOrder": 46,
 935 |     "_isFastLaunch": false,
 936 |     "category": "Memory Optimized",
 937 |     "gpuNum": 0,
 938 |     "hideHardwareSpecs": false,
 939 |     "memoryGiB": 768,
 940 |     "name": "ml.r5.24xlarge",
 941 |     "vcpuNum": 96
 942 |    },
 943 |    {
 944 |     "_defaultOrder": 47,
 945 |     "_isFastLaunch": false,
 946 |     "category": "Accelerated computing",
 947 |     "gpuNum": 1,
 948 |     "hideHardwareSpecs": false,
 949 |     "memoryGiB": 16,
 950 |     "name": "ml.g5.xlarge",
 951 |     "vcpuNum": 4
 952 |    },
 953 |    {
 954 |     "_defaultOrder": 48,
 955 |     "_isFastLaunch": false,
 956 |     "category": "Accelerated computing",
 957 |     "gpuNum": 1,
 958 |     "hideHardwareSpecs": false,
 959 |     "memoryGiB": 32,
 960 |     "name": "ml.g5.2xlarge",
 961 |     "vcpuNum": 8
 962 |    },
 963 |    {
 964 |     "_defaultOrder": 49,
 965 |     "_isFastLaunch": false,
 966 |     "category": "Accelerated computing",
 967 |     "gpuNum": 1,
 968 |     "hideHardwareSpecs": false,
 969 |     "memoryGiB": 64,
 970 |     "name": "ml.g5.4xlarge",
 971 |     "vcpuNum": 16
 972 |    },
 973 |    {
 974 |     "_defaultOrder": 50,
 975 |     "_isFastLaunch": false,
 976 |     "category": "Accelerated computing",
 977 |     "gpuNum": 1,
 978 |     "hideHardwareSpecs": false,
 979 |     "memoryGiB": 128,
 980 |     "name": "ml.g5.8xlarge",
 981 |     "vcpuNum": 32
 982 |    },
 983 |    {
 984 |     "_defaultOrder": 51,
 985 |     "_isFastLaunch": false,
 986 |     "category": "Accelerated computing",
 987 |     "gpuNum": 1,
 988 |     "hideHardwareSpecs": false,
 989 |     "memoryGiB": 256,
 990 |     "name": "ml.g5.16xlarge",
 991 |     "vcpuNum": 64
 992 |    },
 993 |    {
 994 |     "_defaultOrder": 52,
 995 |     "_isFastLaunch": false,
 996 |     "category": "Accelerated computing",
 997 |     "gpuNum": 4,
 998 |     "hideHardwareSpecs": false,
 999 |     "memoryGiB": 192,
1000 |     "name": "ml.g5.12xlarge",
1001 |     "vcpuNum": 48
1002 |    },
1003 |    {
1004 |     "_defaultOrder": 53,
1005 |     "_isFastLaunch": false,
1006 |     "category": "Accelerated computing",
1007 |     "gpuNum": 4,
1008 |     "hideHardwareSpecs": false,
1009 |     "memoryGiB": 384,
1010 |     "name": "ml.g5.24xlarge",
1011 |     "vcpuNum": 96
1012 |    },
1013 |    {
1014 |     "_defaultOrder": 54,
1015 |     "_isFastLaunch": false,
1016 |     "category": "Accelerated computing",
1017 |     "gpuNum": 8,
1018 |     "hideHardwareSpecs": false,
1019 |     "memoryGiB": 768,
1020 |     "name": "ml.g5.48xlarge",
1021 |     "vcpuNum": 192
1022 |    },
1023 |    {
1024 |     "_defaultOrder": 55,
1025 |     "_isFastLaunch": false,
1026 |     "category": "Accelerated computing",
1027 |     "gpuNum": 8,
1028 |     "hideHardwareSpecs": false,
1029 |     "memoryGiB": 1152,
1030 |     "name": "ml.p4d.24xlarge",
1031 |     "vcpuNum": 96
1032 |    },
1033 |    {
1034 |     "_defaultOrder": 56,
1035 |     "_isFastLaunch": false,
1036 |     "category": "Accelerated computing",
1037 |     "gpuNum": 8,
1038 |     "hideHardwareSpecs": false,
1039 |     "memoryGiB": 1152,
1040 |     "name": "ml.p4de.24xlarge",
1041 |     "vcpuNum": 96
1042 |    }
1043 |   ],
1044 |   "instance_type": "ml.t3.medium",
1045 |   "kernelspec": {
1046 |    "display_name": "Python 3 (Data Science 3.0)",
1047 |    "language": "python",
1048 |    "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1"
1049 |   },
1050 |   "language_info": {
1051 |    "codemirror_mode": {
1052 |     "name": "ipython",
1053 |     "version": 3
1054 |    },
1055 |    "file_extension": ".py",
1056 |    "mimetype": "text/x-python",
1057 |    "name": "python",
1058 |    "nbconvert_exporter": "python",
1059 |    "pygments_lexer": "ipython3",
1060 |    "version": "3.10.6"
1061 |   },
1062 |   "pycharm": {
1063 |    "stem_cell": {
1064 |     "cell_type": "raw",
1065 |     "metadata": {
1066 |      "collapsed": false
1067 |     },
1068 |     "source": []
1069 |    }
1070 |   }
1071 |  },
1072 |  "nbformat": 4,
1073 |  "nbformat_minor": 4
1074 | }
1075 | 


--------------------------------------------------------------------------------