├── .DS_Store ├── 1. Generative AI Use Case - Summarize Dialogue ├── .DS_Store ├── images │ ├── kernel_set_up.png │ └── w1_kernel_and_instance_type.png ├── Lab_1_summarize_dialogue.ipynb └── .ipynb_checkpoints │ └── Lab_1_summarize_dialogue-checkpoint.ipynb ├── 2. Fine-tune a generative AI model for dialogue summarization ├── .DS_Store └── images │ ├── kernel_set_up.png │ └── w2_kernel_and_instance_type.png ├── 3. Fine-tune FLAN-T5 with reinforcement learning to generate more-positive summaries ├── .DS_Store └── images │ ├── kernel_set_up.png │ └── w3_kernel_and_instance_type.png └── README.md /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rochitasundar/Generative-AI-with-Large-Language-Models/HEAD/.DS_Store -------------------------------------------------------------------------------- /1. Generative AI Use Case - Summarize Dialogue/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rochitasundar/Generative-AI-with-Large-Language-Models/HEAD/1. Generative AI Use Case - Summarize Dialogue/.DS_Store -------------------------------------------------------------------------------- /1. Generative AI Use Case - Summarize Dialogue/images/kernel_set_up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rochitasundar/Generative-AI-with-Large-Language-Models/HEAD/1. Generative AI Use Case - Summarize Dialogue/images/kernel_set_up.png -------------------------------------------------------------------------------- /2. Fine-tune a generative AI model for dialogue summarization/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rochitasundar/Generative-AI-with-Large-Language-Models/HEAD/2. Fine-tune a generative AI model for dialogue summarization/.DS_Store -------------------------------------------------------------------------------- /1. Generative AI Use Case - Summarize Dialogue/images/w1_kernel_and_instance_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rochitasundar/Generative-AI-with-Large-Language-Models/HEAD/1. Generative AI Use Case - Summarize Dialogue/images/w1_kernel_and_instance_type.png -------------------------------------------------------------------------------- /2. Fine-tune a generative AI model for dialogue summarization/images/kernel_set_up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rochitasundar/Generative-AI-with-Large-Language-Models/HEAD/2. Fine-tune a generative AI model for dialogue summarization/images/kernel_set_up.png -------------------------------------------------------------------------------- /3. Fine-tune FLAN-T5 with reinforcement learning to generate more-positive summaries/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rochitasundar/Generative-AI-with-Large-Language-Models/HEAD/3. Fine-tune FLAN-T5 with reinforcement learning to generate more-positive summaries/.DS_Store -------------------------------------------------------------------------------- /2. Fine-tune a generative AI model for dialogue summarization/images/w2_kernel_and_instance_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rochitasundar/Generative-AI-with-Large-Language-Models/HEAD/2. Fine-tune a generative AI model for dialogue summarization/images/w2_kernel_and_instance_type.png -------------------------------------------------------------------------------- /3. Fine-tune FLAN-T5 with reinforcement learning to generate more-positive summaries/images/kernel_set_up.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rochitasundar/Generative-AI-with-Large-Language-Models/HEAD/3. Fine-tune FLAN-T5 with reinforcement learning to generate more-positive summaries/images/kernel_set_up.png -------------------------------------------------------------------------------- /3. Fine-tune FLAN-T5 with reinforcement learning to generate more-positive summaries/images/w3_kernel_and_instance_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rochitasundar/Generative-AI-with-Large-Language-Models/HEAD/3. Fine-tune FLAN-T5 with reinforcement learning to generate more-positive summaries/images/w3_kernel_and_instance_type.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Generative-AI-with-Large-Language-Models 2 | 3 | This repository contains the lab work for Coursera course on "[Generative AI with Large Language Models](https://www.coursera.org/learn/generative-ai-with-llms)". 4 | 5 | #### 1. Lab 1 - Generative AI Use Case: Summarize Dialogue 6 | Perform dialog summarization using Generative AI. Experiment with in context learning such as zero shot, one shot and few shot inferences and tune associated configuration parameters at inference to influence results. 7 | 8 | #### 2. Lab 2 - Fine-tune a generative AI model for dialogue summarization 9 | Perform instruction fine tuning on an existing LLM from Hugging Face, Flan-T5 model. Explore both full fine tuning as well as PEFT (Parameter Efficient Fine Tuning) methods such as LoRA (Low Rank Adaptation) and evaluation using ROUGE metrics. 10 | 11 | #### 3. Lab 3 - Fine-tune FLAN-T5 with reinforcement learning to generate more-positive summaries 12 | Further fine tune a Flan-T5 model using reinforcement learning with a reward model such as Meta AI's hate speech reward model to generate less toxic summaries. Use Proximal Policy Optimization (PPO) to fine-tune and detoxify the model. 13 | -------------------------------------------------------------------------------- /1. Generative AI Use Case - Summarize Dialogue/Lab_1_summarize_dialogue.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Generative AI Use Case: Summarize Dialogue\n", 8 | "\n", 9 | "Welcome to the practical side of this course. In this lab you will do the dialogue summarization task using generative AI. You will explore how the input text affects the output of the model, and perform prompt engineering to direct it towards the task you need. By comparing zero shot, one shot, and few shot inferences, you will take the first step towards prompt engineering and see how it can enhance the generative output of Large Language Models." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Table of Contents" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "- [ 1 - Set up Kernel and Required Dependencies](#1)\n", 24 | "- [ 2 - Summarize Dialogue without Prompt Engineering](#2)\n", 25 | "- [ 3 - Summarize Dialogue with an Instruction Prompt](#3)\n", 26 | " - [ 3.1 - Zero Shot Inference with an Instruction Prompt](#3.1)\n", 27 | " - [ 3.2 - Zero Shot Inference with the Prompt Template from FLAN-T5](#3.2)\n", 28 | "- [ 4 - Summarize Dialogue with One Shot and Few Shot Inference](#4)\n", 29 | " - [ 4.1 - One Shot Inference](#4.1)\n", 30 | " - [ 4.2 - Few Shot Inference](#4.2)\n", 31 | "- [ 5 - Generative Configuration Parameters for Inference](#5)\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "\n", 39 | "## 1 - Set up Kernel and Required Dependencies" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": { 45 | "tags": [] 46 | }, 47 | "source": [ 48 | "First, check that the correct kernel is chosen.\n", 49 | "\n", 50 | "\n", 51 | "\n", 52 | "You can click on that (top right of the screen) to see and check the details of the image, kernel, and instance type.\n", 53 | "\n", 54 | "\n", 55 | "\n", 56 | "\"Time\n" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": { 62 | "tags": [] 63 | }, 64 | "source": [ 65 | "Now install the required packages to use PyTorch and Hugging Face transformers and datasets.\n", 66 | "\n", 67 | "\"Time" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 2, 73 | "metadata": { 74 | "tags": [] 75 | }, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "Requirement already satisfied: pip in /opt/conda/lib/python3.7/site-packages (23.2.1)\n", 82 | "Collecting pip\n", 83 | " Obtaining dependency information for pip from https://files.pythonhosted.org/packages/47/6a/453160888fab7c6a432a6e25f8afe6256d0d9f2cbd25971021da6491d899/pip-23.3.1-py3-none-any.whl.metadata\n", 84 | " Downloading pip-23.3.1-py3-none-any.whl.metadata (3.5 kB)\n", 85 | "Downloading pip-23.3.1-py3-none-any.whl (2.1 MB)\n", 86 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", 87 | "\u001b[?25h\u001b[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", 88 | "\u001b[0mInstalling collected packages: pip\n", 89 | " Attempting uninstall: pip\n", 90 | " Found existing installation: pip 23.2.1\n", 91 | " Uninstalling pip-23.2.1:\n", 92 | " Successfully uninstalled pip-23.2.1\n", 93 | "Successfully installed pip-23.3.1\n", 94 | "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", 95 | "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n", 96 | "\u001b[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", 97 | "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", 98 | "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n", 99 | "\u001b[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", 100 | "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", 101 | "pytest-astropy 0.8.0 requires pytest-cov>=2.0, which is not installed.\n", 102 | "pytest-astropy 0.8.0 requires pytest-filter-subpackage>=0.1, which is not installed.\n", 103 | "spyder 4.0.1 requires pyqt5<5.13; python_version >= \"3\", which is not installed.\n", 104 | "spyder 4.0.1 requires pyqtwebengine<5.13; python_version >= \"3\", which is not installed.\n", 105 | "notebook 6.5.6 requires pyzmq<25,>=17, but you have pyzmq 25.1.1 which is incompatible.\n", 106 | "pathos 0.3.1 requires dill>=0.3.7, but you have dill 0.3.6 which is incompatible.\n", 107 | "pathos 0.3.1 requires multiprocess>=0.70.15, but you have multiprocess 0.70.14 which is incompatible.\n", 108 | "sparkmagic 0.20.4 requires nest-asyncio==1.5.5, but you have nest-asyncio 1.5.8 which is incompatible.\n", 109 | "spyder 4.0.1 requires jedi==0.14.1, but you have jedi 0.19.1 which is incompatible.\u001b[0m\u001b[31m\n", 110 | "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", 111 | "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "%pip install --upgrade pip\n", 117 | "%pip install --disable-pip-version-check \\\n", 118 | " torch==1.13.1 \\\n", 119 | " torchdata==0.5.1 --quiet\n", 120 | "\n", 121 | "%pip install \\\n", 122 | " transformers==4.27.2 \\\n", 123 | " datasets==2.11.0 --quiet" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": { 129 | "tags": [] 130 | }, 131 | "source": [ 132 | "\"Time" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": { 138 | "tags": [] 139 | }, 140 | "source": [ 141 | "Load the datasets, Large Language Model (LLM), tokenizer, and configurator. Do not worry if you do not understand yet all of those components - they will be described and discussed later in the notebook." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 3, 147 | "metadata": { 148 | "tags": [] 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "from datasets import load_dataset\n", 153 | "from transformers import AutoModelForSeq2SeqLM\n", 154 | "from transformers import AutoTokenizer\n", 155 | "from transformers import GenerationConfig" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "\n", 163 | "## 2 - Summarize Dialogue without Prompt Engineering\n", 164 | "\n", 165 | "In this use case, you will be generating a summary of a dialogue with the pre-trained Large Language Model (LLM) FLAN-T5 from Hugging Face. The list of available models in the Hugging Face `transformers` package can be found [here](https://huggingface.co/docs/transformers/index). \n", 166 | "\n", 167 | "Let's upload some simple dialogues from the [DialogSum](https://huggingface.co/datasets/knkarthick/dialogsum) Hugging Face dataset. This dataset contains 10,000+ dialogues with the corresponding manually labeled summaries and topics. " 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 4, 173 | "metadata": { 174 | "tags": [] 175 | }, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "application/vnd.jupyter.widget-view+json": { 180 | "model_id": "c5e9b04ddd23473289ed47c50dcf81e7", 181 | "version_major": 2, 182 | "version_minor": 0 183 | }, 184 | "text/plain": [ 185 | "Downloading readme: 0%| | 0.00/4.65k [00:00\n", 697 | "## 3 - Summarize Dialogue with an Instruction Prompt\n", 698 | "\n", 699 | "Prompt engineering is an important concept in using foundation models for text generation. You can check out [this blog](https://www.amazon.science/blog/emnlp-prompt-engineering-is-the-new-feature-engineering) from Amazon Science for a quick introduction to prompt engineering." 700 | ] 701 | }, 702 | { 703 | "cell_type": "markdown", 704 | "metadata": {}, 705 | "source": [ 706 | "\n", 707 | "### 3.1 - Zero Shot Inference with an Instruction Prompt\n", 708 | "\n", 709 | "In order to instruct the model to perform a task - summarize a dialogue - you can take the dialogue and convert it into an instruction prompt. This is often called **zero shot inference**. You can check out [this blog from AWS](https://aws.amazon.com/blogs/machine-learning/zero-shot-prompting-for-the-flan-t5-foundation-model-in-amazon-sagemaker-jumpstart/) for a quick description of what zero shot learning is and why it is an important concept to the LLM model.\n", 710 | "\n", 711 | "Wrap the dialogue in a descriptive instruction and see how the generated text will change:" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": 10, 717 | "metadata": { 718 | "tags": [] 719 | }, 720 | "outputs": [ 721 | { 722 | "name": "stdout", 723 | "output_type": "stream", 724 | "text": [ 725 | "---------------------------------------------------------------------------------------------------\n", 726 | "Example 1\n", 727 | "---------------------------------------------------------------------------------------------------\n", 728 | "INPUT PROMPT:\n", 729 | "\n", 730 | "Summarize the following conversation.\n", 731 | "\n", 732 | "#Person1#: What time is it, Tom?\n", 733 | "#Person2#: Just a minute. It's ten to nine by my watch.\n", 734 | "#Person1#: Is it? I had no idea it was so late. I must be off now.\n", 735 | "#Person2#: What's the hurry?\n", 736 | "#Person1#: I must catch the nine-thirty train.\n", 737 | "#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.\n", 738 | "\n", 739 | "Summary:\n", 740 | " \n", 741 | "---------------------------------------------------------------------------------------------------\n", 742 | "BASELINE HUMAN SUMMARY:\n", 743 | "#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.\n", 744 | "---------------------------------------------------------------------------------------------------\n", 745 | "MODEL GENERATION - ZERO SHOT:\n", 746 | "The train is about to leave.\n", 747 | "\n", 748 | "---------------------------------------------------------------------------------------------------\n", 749 | "Example 2\n", 750 | "---------------------------------------------------------------------------------------------------\n", 751 | "INPUT PROMPT:\n", 752 | "\n", 753 | "Summarize the following conversation.\n", 754 | "\n", 755 | "#Person1#: Have you considered upgrading your system?\n", 756 | "#Person2#: Yes, but I'm not sure what exactly I would need.\n", 757 | "#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.\n", 758 | "#Person2#: That would be a definite bonus.\n", 759 | "#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.\n", 760 | "#Person2#: How can we do that?\n", 761 | "#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?\n", 762 | "#Person2#: No.\n", 763 | "#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.\n", 764 | "#Person2#: That sounds great. Thanks.\n", 765 | "\n", 766 | "Summary:\n", 767 | " \n", 768 | "---------------------------------------------------------------------------------------------------\n", 769 | "BASELINE HUMAN SUMMARY:\n", 770 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 771 | "---------------------------------------------------------------------------------------------------\n", 772 | "MODEL GENERATION - ZERO SHOT:\n", 773 | "#Person1#: I'm thinking of upgrading my computer.\n", 774 | "\n" 775 | ] 776 | } 777 | ], 778 | "source": [ 779 | "for i, index in enumerate(example_indices):\n", 780 | " dialogue = dataset['test'][index]['dialogue']\n", 781 | " summary = dataset['test'][index]['summary']\n", 782 | "\n", 783 | " prompt = f\"\"\"\n", 784 | "Summarize the following conversation.\n", 785 | "\n", 786 | "{dialogue}\n", 787 | "\n", 788 | "Summary:\n", 789 | " \"\"\"\n", 790 | "\n", 791 | " # Input constructed prompt instead of the dialogue.\n", 792 | " inputs = tokenizer(prompt, return_tensors='pt')\n", 793 | " output = tokenizer.decode(\n", 794 | " model.generate(\n", 795 | " inputs[\"input_ids\"], \n", 796 | " max_new_tokens=50,\n", 797 | " )[0], \n", 798 | " skip_special_tokens=True\n", 799 | " )\n", 800 | " \n", 801 | " print(dash_line)\n", 802 | " print('Example ', i + 1)\n", 803 | " print(dash_line)\n", 804 | " print(f'INPUT PROMPT:\\n{prompt}')\n", 805 | " print(dash_line)\n", 806 | " print(f'BASELINE HUMAN SUMMARY:\\n{summary}')\n", 807 | " print(dash_line) \n", 808 | " print(f'MODEL GENERATION - ZERO SHOT:\\n{output}\\n')" 809 | ] 810 | }, 811 | { 812 | "cell_type": "markdown", 813 | "metadata": {}, 814 | "source": [ 815 | "This is much better! But the model still does not pick up on the nuance of the conversations though." 816 | ] 817 | }, 818 | { 819 | "cell_type": "markdown", 820 | "metadata": {}, 821 | "source": [ 822 | "**Exercise:**\n", 823 | "\n", 824 | "- Experiment with the `prompt` text and see how the inferences will be changed. Will the inferences change if you end the prompt with just empty string vs. `Summary: `?\n", 825 | "- Try to rephrase the beginning of the `prompt` text from `Summarize the following conversation.` to something different - and see how it will influence the generated output." 826 | ] 827 | }, 828 | { 829 | "cell_type": "markdown", 830 | "metadata": {}, 831 | "source": [ 832 | "\n", 833 | "### 3.2 - Zero Shot Inference with the Prompt Template from FLAN-T5\n", 834 | "\n", 835 | "Let's use a slightly different prompt. FLAN-T5 has many prompt templates that are published for certain tasks [here](https://github.com/google-research/FLAN/tree/main/flan/v2). In the following code, you will use one of the [pre-built FLAN-T5 prompts](https://github.com/google-research/FLAN/blob/main/flan/v2/templates.py):" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": 11, 841 | "metadata": { 842 | "tags": [] 843 | }, 844 | "outputs": [ 845 | { 846 | "name": "stdout", 847 | "output_type": "stream", 848 | "text": [ 849 | "---------------------------------------------------------------------------------------------------\n", 850 | "Example 1\n", 851 | "---------------------------------------------------------------------------------------------------\n", 852 | "INPUT PROMPT:\n", 853 | "\n", 854 | "Dialogue:\n", 855 | "\n", 856 | "#Person1#: What time is it, Tom?\n", 857 | "#Person2#: Just a minute. It's ten to nine by my watch.\n", 858 | "#Person1#: Is it? I had no idea it was so late. I must be off now.\n", 859 | "#Person2#: What's the hurry?\n", 860 | "#Person1#: I must catch the nine-thirty train.\n", 861 | "#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.\n", 862 | "\n", 863 | "What was going on?\n", 864 | "\n", 865 | "---------------------------------------------------------------------------------------------------\n", 866 | "BASELINE HUMAN SUMMARY:\n", 867 | "#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.\n", 868 | "\n", 869 | "---------------------------------------------------------------------------------------------------\n", 870 | "MODEL GENERATION - ZERO SHOT:\n", 871 | "Tom is late for the train.\n", 872 | "\n", 873 | "---------------------------------------------------------------------------------------------------\n", 874 | "Example 2\n", 875 | "---------------------------------------------------------------------------------------------------\n", 876 | "INPUT PROMPT:\n", 877 | "\n", 878 | "Dialogue:\n", 879 | "\n", 880 | "#Person1#: Have you considered upgrading your system?\n", 881 | "#Person2#: Yes, but I'm not sure what exactly I would need.\n", 882 | "#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.\n", 883 | "#Person2#: That would be a definite bonus.\n", 884 | "#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.\n", 885 | "#Person2#: How can we do that?\n", 886 | "#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?\n", 887 | "#Person2#: No.\n", 888 | "#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.\n", 889 | "#Person2#: That sounds great. Thanks.\n", 890 | "\n", 891 | "What was going on?\n", 892 | "\n", 893 | "---------------------------------------------------------------------------------------------------\n", 894 | "BASELINE HUMAN SUMMARY:\n", 895 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 896 | "\n", 897 | "---------------------------------------------------------------------------------------------------\n", 898 | "MODEL GENERATION - ZERO SHOT:\n", 899 | "#Person1#: You could add a painting program to your software. #Person2#: That would be a bonus. #Person1#: You might also want to upgrade your hardware. #Person1#\n", 900 | "\n" 901 | ] 902 | } 903 | ], 904 | "source": [ 905 | "for i, index in enumerate(example_indices):\n", 906 | " dialogue = dataset['test'][index]['dialogue']\n", 907 | " summary = dataset['test'][index]['summary']\n", 908 | " \n", 909 | " prompt = f\"\"\"\n", 910 | "Dialogue:\n", 911 | "\n", 912 | "{dialogue}\n", 913 | "\n", 914 | "What was going on?\n", 915 | "\"\"\"\n", 916 | "\n", 917 | " inputs = tokenizer(prompt, return_tensors='pt')\n", 918 | " output = tokenizer.decode(\n", 919 | " model.generate(\n", 920 | " inputs[\"input_ids\"], \n", 921 | " max_new_tokens=50,\n", 922 | " )[0], \n", 923 | " skip_special_tokens=True\n", 924 | " )\n", 925 | "\n", 926 | " print(dash_line)\n", 927 | " print('Example ', i + 1)\n", 928 | " print(dash_line)\n", 929 | " print(f'INPUT PROMPT:\\n{prompt}')\n", 930 | " print(dash_line)\n", 931 | " print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')\n", 932 | " print(dash_line)\n", 933 | " print(f'MODEL GENERATION - ZERO SHOT:\\n{output}\\n')" 934 | ] 935 | }, 936 | { 937 | "cell_type": "markdown", 938 | "metadata": {}, 939 | "source": [ 940 | "Notice that this prompt from FLAN-T5 did help a bit, but still struggles to pick up on the nuance of the conversation. This is what you will try to solve with the few shot inferencing." 941 | ] 942 | }, 943 | { 944 | "cell_type": "markdown", 945 | "metadata": {}, 946 | "source": [ 947 | "\n", 948 | "## 4 - Summarize Dialogue with One Shot and Few Shot Inference\n", 949 | "\n", 950 | "**One shot and few shot inference** are the practices of providing an LLM with either one or more full examples of prompt-response pairs that match your task - before your actual prompt that you want completed. This is called \"in-context learning\" and puts your model into a state that understands your specific task. You can read more about it in [this blog from HuggingFace](https://huggingface.co/blog/few-shot-learning-gpt-neo-and-inference-api)." 951 | ] 952 | }, 953 | { 954 | "cell_type": "markdown", 955 | "metadata": { 956 | "tags": [] 957 | }, 958 | "source": [ 959 | "\n", 960 | "### 4.1 - One Shot Inference\n", 961 | "\n", 962 | "Let's build a function that takes a list of `example_indices_full`, generates a prompt with full examples, then at the end appends the prompt which you want the model to complete (`example_index_to_summarize`). You will use the same FLAN-T5 prompt template from section [3.2](#3.2). " 963 | ] 964 | }, 965 | { 966 | "cell_type": "code", 967 | "execution_count": 12, 968 | "metadata": { 969 | "tags": [] 970 | }, 971 | "outputs": [], 972 | "source": [ 973 | "def make_prompt(example_indices_full, example_index_to_summarize):\n", 974 | " prompt = ''\n", 975 | " for index in example_indices_full:\n", 976 | " dialogue = dataset['test'][index]['dialogue']\n", 977 | " summary = dataset['test'][index]['summary']\n", 978 | " \n", 979 | " # The stop sequence '{summary}\\n\\n\\n' is important for FLAN-T5. Other models may have their own preferred stop sequence.\n", 980 | " prompt += f\"\"\"\n", 981 | "Dialogue:\n", 982 | "\n", 983 | "{dialogue}\n", 984 | "\n", 985 | "What was going on?\n", 986 | "{summary}\n", 987 | "\n", 988 | "\n", 989 | "\"\"\"\n", 990 | " \n", 991 | " dialogue = dataset['test'][example_index_to_summarize]['dialogue']\n", 992 | " \n", 993 | " prompt += f\"\"\"\n", 994 | "Dialogue:\n", 995 | "\n", 996 | "{dialogue}\n", 997 | "\n", 998 | "What was going on?\n", 999 | "\"\"\"\n", 1000 | " \n", 1001 | " return prompt" 1002 | ] 1003 | }, 1004 | { 1005 | "cell_type": "markdown", 1006 | "metadata": { 1007 | "tags": [] 1008 | }, 1009 | "source": [ 1010 | "Construct the prompt to perform one shot inference:" 1011 | ] 1012 | }, 1013 | { 1014 | "cell_type": "code", 1015 | "execution_count": 13, 1016 | "metadata": { 1017 | "tags": [] 1018 | }, 1019 | "outputs": [ 1020 | { 1021 | "name": "stdout", 1022 | "output_type": "stream", 1023 | "text": [ 1024 | "\n", 1025 | "Dialogue:\n", 1026 | "\n", 1027 | "#Person1#: What time is it, Tom?\n", 1028 | "#Person2#: Just a minute. It's ten to nine by my watch.\n", 1029 | "#Person1#: Is it? I had no idea it was so late. I must be off now.\n", 1030 | "#Person2#: What's the hurry?\n", 1031 | "#Person1#: I must catch the nine-thirty train.\n", 1032 | "#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.\n", 1033 | "\n", 1034 | "What was going on?\n", 1035 | "#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.\n", 1036 | "\n", 1037 | "\n", 1038 | "\n", 1039 | "Dialogue:\n", 1040 | "\n", 1041 | "#Person1#: Have you considered upgrading your system?\n", 1042 | "#Person2#: Yes, but I'm not sure what exactly I would need.\n", 1043 | "#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.\n", 1044 | "#Person2#: That would be a definite bonus.\n", 1045 | "#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.\n", 1046 | "#Person2#: How can we do that?\n", 1047 | "#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?\n", 1048 | "#Person2#: No.\n", 1049 | "#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.\n", 1050 | "#Person2#: That sounds great. Thanks.\n", 1051 | "\n", 1052 | "What was going on?\n", 1053 | "\n" 1054 | ] 1055 | } 1056 | ], 1057 | "source": [ 1058 | "example_indices_full = [40]\n", 1059 | "example_index_to_summarize = 200\n", 1060 | "\n", 1061 | "one_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)\n", 1062 | "\n", 1063 | "print(one_shot_prompt)" 1064 | ] 1065 | }, 1066 | { 1067 | "cell_type": "markdown", 1068 | "metadata": { 1069 | "tags": [] 1070 | }, 1071 | "source": [ 1072 | "Now pass this prompt to perform the one shot inference:" 1073 | ] 1074 | }, 1075 | { 1076 | "cell_type": "code", 1077 | "execution_count": 14, 1078 | "metadata": { 1079 | "tags": [] 1080 | }, 1081 | "outputs": [ 1082 | { 1083 | "name": "stdout", 1084 | "output_type": "stream", 1085 | "text": [ 1086 | "---------------------------------------------------------------------------------------------------\n", 1087 | "BASELINE HUMAN SUMMARY:\n", 1088 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1089 | "\n", 1090 | "---------------------------------------------------------------------------------------------------\n", 1091 | "MODEL GENERATION - ONE SHOT:\n", 1092 | "#Person1 wants to upgrade his system. #Person2 wants to add a painting program to his software. #Person1 wants to add a CD-ROM drive.\n" 1093 | ] 1094 | } 1095 | ], 1096 | "source": [ 1097 | "summary = dataset['test'][example_index_to_summarize]['summary']\n", 1098 | "\n", 1099 | "inputs = tokenizer(one_shot_prompt, return_tensors='pt')\n", 1100 | "output = tokenizer.decode(\n", 1101 | " model.generate(\n", 1102 | " inputs[\"input_ids\"],\n", 1103 | " max_new_tokens=50,\n", 1104 | " )[0], \n", 1105 | " skip_special_tokens=True\n", 1106 | ")\n", 1107 | "\n", 1108 | "print(dash_line)\n", 1109 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')\n", 1110 | "print(dash_line)\n", 1111 | "print(f'MODEL GENERATION - ONE SHOT:\\n{output}')" 1112 | ] 1113 | }, 1114 | { 1115 | "cell_type": "markdown", 1116 | "metadata": { 1117 | "tags": [] 1118 | }, 1119 | "source": [ 1120 | "\n", 1121 | "### 4.2 - Few Shot Inference\n", 1122 | "\n", 1123 | "Let's explore few shot inference by adding two more full dialogue-summary pairs to your prompt." 1124 | ] 1125 | }, 1126 | { 1127 | "cell_type": "code", 1128 | "execution_count": 15, 1129 | "metadata": { 1130 | "tags": [] 1131 | }, 1132 | "outputs": [ 1133 | { 1134 | "name": "stdout", 1135 | "output_type": "stream", 1136 | "text": [ 1137 | "\n", 1138 | "Dialogue:\n", 1139 | "\n", 1140 | "#Person1#: What time is it, Tom?\n", 1141 | "#Person2#: Just a minute. It's ten to nine by my watch.\n", 1142 | "#Person1#: Is it? I had no idea it was so late. I must be off now.\n", 1143 | "#Person2#: What's the hurry?\n", 1144 | "#Person1#: I must catch the nine-thirty train.\n", 1145 | "#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.\n", 1146 | "\n", 1147 | "What was going on?\n", 1148 | "#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.\n", 1149 | "\n", 1150 | "\n", 1151 | "\n", 1152 | "Dialogue:\n", 1153 | "\n", 1154 | "#Person1#: May, do you mind helping me prepare for the picnic?\n", 1155 | "#Person2#: Sure. Have you checked the weather report?\n", 1156 | "#Person1#: Yes. It says it will be sunny all day. No sign of rain at all. This is your father's favorite sausage. Sandwiches for you and Daniel.\n", 1157 | "#Person2#: No, thanks Mom. I'd like some toast and chicken wings.\n", 1158 | "#Person1#: Okay. Please take some fruit salad and crackers for me.\n", 1159 | "#Person2#: Done. Oh, don't forget to take napkins disposable plates, cups and picnic blanket.\n", 1160 | "#Person1#: All set. May, can you help me take all these things to the living room?\n", 1161 | "#Person2#: Yes, madam.\n", 1162 | "#Person1#: Ask Daniel to give you a hand?\n", 1163 | "#Person2#: No, mom, I can manage it by myself. His help just causes more trouble.\n", 1164 | "\n", 1165 | "What was going on?\n", 1166 | "Mom asks May to help to prepare for the picnic and May agrees.\n", 1167 | "\n", 1168 | "\n", 1169 | "\n", 1170 | "Dialogue:\n", 1171 | "\n", 1172 | "#Person1#: Hello, I bought the pendant in your shop, just before. \n", 1173 | "#Person2#: Yes. Thank you very much. \n", 1174 | "#Person1#: Now I come back to the hotel and try to show it to my friend, the pendant is broken, I'm afraid. \n", 1175 | "#Person2#: Oh, is it? \n", 1176 | "#Person1#: Would you change it to a new one? \n", 1177 | "#Person2#: Yes, certainly. You have the receipt? \n", 1178 | "#Person1#: Yes, I do. \n", 1179 | "#Person2#: Then would you kindly come to our shop with the receipt by 10 o'clock? We will replace it. \n", 1180 | "#Person1#: Thank you so much. \n", 1181 | "\n", 1182 | "What was going on?\n", 1183 | "#Person1# wants to change the broken pendant in #Person2#'s shop.\n", 1184 | "\n", 1185 | "\n", 1186 | "\n", 1187 | "Dialogue:\n", 1188 | "\n", 1189 | "#Person1#: Have you considered upgrading your system?\n", 1190 | "#Person2#: Yes, but I'm not sure what exactly I would need.\n", 1191 | "#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.\n", 1192 | "#Person2#: That would be a definite bonus.\n", 1193 | "#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.\n", 1194 | "#Person2#: How can we do that?\n", 1195 | "#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?\n", 1196 | "#Person2#: No.\n", 1197 | "#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.\n", 1198 | "#Person2#: That sounds great. Thanks.\n", 1199 | "\n", 1200 | "What was going on?\n", 1201 | "\n" 1202 | ] 1203 | } 1204 | ], 1205 | "source": [ 1206 | "example_indices_full = [40, 80, 120]\n", 1207 | "example_index_to_summarize = 200\n", 1208 | "\n", 1209 | "few_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)\n", 1210 | "\n", 1211 | "print(few_shot_prompt)" 1212 | ] 1213 | }, 1214 | { 1215 | "cell_type": "markdown", 1216 | "metadata": { 1217 | "tags": [] 1218 | }, 1219 | "source": [ 1220 | "Now pass this prompt to perform a few shot inference:" 1221 | ] 1222 | }, 1223 | { 1224 | "cell_type": "code", 1225 | "execution_count": 16, 1226 | "metadata": { 1227 | "tags": [] 1228 | }, 1229 | "outputs": [ 1230 | { 1231 | "name": "stderr", 1232 | "output_type": "stream", 1233 | "text": [ 1234 | "Token indices sequence length is longer than the specified maximum sequence length for this model (819 > 512). Running this sequence through the model will result in indexing errors\n" 1235 | ] 1236 | }, 1237 | { 1238 | "name": "stdout", 1239 | "output_type": "stream", 1240 | "text": [ 1241 | "---------------------------------------------------------------------------------------------------\n", 1242 | "BASELINE HUMAN SUMMARY:\n", 1243 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1244 | "\n", 1245 | "---------------------------------------------------------------------------------------------------\n", 1246 | "MODEL GENERATION - FEW SHOT:\n", 1247 | "#Person1 wants to upgrade his system. #Person2 wants to add a painting program to his software. #Person1 wants to upgrade his hardware.\n" 1248 | ] 1249 | } 1250 | ], 1251 | "source": [ 1252 | "summary = dataset['test'][example_index_to_summarize]['summary']\n", 1253 | "\n", 1254 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1255 | "output = tokenizer.decode(\n", 1256 | " model.generate(\n", 1257 | " inputs[\"input_ids\"],\n", 1258 | " max_new_tokens=50,\n", 1259 | " )[0], \n", 1260 | " skip_special_tokens=True\n", 1261 | ")\n", 1262 | "\n", 1263 | "print(dash_line)\n", 1264 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')\n", 1265 | "print(dash_line)\n", 1266 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')" 1267 | ] 1268 | }, 1269 | { 1270 | "cell_type": "markdown", 1271 | "metadata": { 1272 | "tags": [] 1273 | }, 1274 | "source": [ 1275 | "In this case, few shot did not provide much of an improvement over one shot inference. And, anything above 5 or 6 shot will typically not help much, either. Also, you need to make sure that you do not exceed the model's input-context length which, in our case, if 512 tokens. Anything above the context length will be ignored.\n", 1276 | "\n", 1277 | "However, you can see that feeding in at least one full example (one shot) provides the model with more information and qualitatively improves the summary overall." 1278 | ] 1279 | }, 1280 | { 1281 | "cell_type": "markdown", 1282 | "metadata": { 1283 | "tags": [] 1284 | }, 1285 | "source": [ 1286 | "**Exercise:**\n", 1287 | "\n", 1288 | "Experiment with the few shot inferencing.\n", 1289 | "- Choose different dialogues - change the indices in the `example_indices_full` list and `example_index_to_summarize` value.\n", 1290 | "- Change the number of shots. Be sure to stay within the model's 512 context length, however.\n", 1291 | "\n", 1292 | "How well does few shot inferencing work with other examples?" 1293 | ] 1294 | }, 1295 | { 1296 | "cell_type": "markdown", 1297 | "metadata": { 1298 | "tags": [] 1299 | }, 1300 | "source": [ 1301 | "\n", 1302 | "## 5 - Generative Configuration Parameters for Inference" 1303 | ] 1304 | }, 1305 | { 1306 | "cell_type": "markdown", 1307 | "metadata": { 1308 | "tags": [] 1309 | }, 1310 | "source": [ 1311 | "You can change the configuration parameters of the `generate()` method to see a different output from the LLM. So far the only parameter that you have been setting was `max_new_tokens=50`, which defines the maximum number of tokens to generate. A full list of available parameters can be found in the [Hugging Face Generation documentation](https://huggingface.co/docs/transformers/v4.29.1/en/main_classes/text_generation#transformers.GenerationConfig). \n", 1312 | "\n", 1313 | "A convenient way of organizing the configuration parameters is to use `GenerationConfig` class. " 1314 | ] 1315 | }, 1316 | { 1317 | "cell_type": "markdown", 1318 | "metadata": { 1319 | "tags": [] 1320 | }, 1321 | "source": [ 1322 | "**Exercise:**\n", 1323 | "\n", 1324 | "Change the configuration parameters to investigate their influence on the output. \n", 1325 | "\n", 1326 | "Putting the parameter `do_sample = True`, you activate various decoding strategies which influence the next token from the probability distribution over the entire vocabulary. You can then adjust the outputs changing `temperature` and other parameters (such as `top_k` and `top_p`). \n", 1327 | "\n", 1328 | "Uncomment the lines in the cell below and rerun the code. Try to analyze the results. You can read some comments below." 1329 | ] 1330 | }, 1331 | { 1332 | "cell_type": "code", 1333 | "execution_count": 17, 1334 | "metadata": { 1335 | "tags": [] 1336 | }, 1337 | "outputs": [ 1338 | { 1339 | "name": "stdout", 1340 | "output_type": "stream", 1341 | "text": [ 1342 | "---------------------------------------------------------------------------------------------------\n", 1343 | "MODEL GENERATION - FEW SHOT:\n", 1344 | "#Person1 wants to upgrade his system. #Person2 wants to add a painting program to his software. #Person1 wants to upgrade his hardware.\n", 1345 | "---------------------------------------------------------------------------------------------------\n", 1346 | "BASELINE HUMAN SUMMARY:\n", 1347 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1348 | "\n" 1349 | ] 1350 | } 1351 | ], 1352 | "source": [ 1353 | "generation_config = GenerationConfig(max_new_tokens=50)\n", 1354 | "# generation_config = GenerationConfig(max_new_tokens=10)\n", 1355 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)\n", 1356 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)\n", 1357 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)\n", 1358 | "\n", 1359 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1360 | "output = tokenizer.decode(\n", 1361 | " model.generate(\n", 1362 | " inputs[\"input_ids\"],\n", 1363 | " generation_config=generation_config,\n", 1364 | " )[0], \n", 1365 | " skip_special_tokens=True\n", 1366 | ")\n", 1367 | "\n", 1368 | "print(dash_line)\n", 1369 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')\n", 1370 | "print(dash_line)\n", 1371 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')" 1372 | ] 1373 | }, 1374 | { 1375 | "cell_type": "markdown", 1376 | "metadata": {}, 1377 | "source": [ 1378 | "Comments related to the choice of the parameters in the code cell above:\n", 1379 | "- Choosing `max_new_tokens=10` will make the output text too short, so the dialogue summary will be cut.\n", 1380 | "- Putting `do_sample = True` and changing the temperature value you get more flexibility in the output." 1381 | ] 1382 | }, 1383 | { 1384 | "cell_type": "markdown", 1385 | "metadata": {}, 1386 | "source": [ 1387 | "As you can see, prompt engineering can take you a long way for this use case, but there are some limitations. Next, you will start to explore how you can use fine-tuning to help your LLM to understand a particular use case in better depth!" 1388 | ] 1389 | }, 1390 | { 1391 | "cell_type": "code", 1392 | "execution_count": 18, 1393 | "metadata": { 1394 | "tags": [] 1395 | }, 1396 | "outputs": [ 1397 | { 1398 | "name": "stdout", 1399 | "output_type": "stream", 1400 | "text": [ 1401 | "---------------------------------------------------------------------------------------------------\n", 1402 | "MODEL GENERATION - FEW SHOT:\n", 1403 | "#Person1 wants to upgrade his system.\n", 1404 | "---------------------------------------------------------------------------------------------------\n", 1405 | "BASELINE HUMAN SUMMARY:\n", 1406 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1407 | "\n" 1408 | ] 1409 | } 1410 | ], 1411 | "source": [ 1412 | "# generation_config = GenerationConfig(max_new_tokens=50)\n", 1413 | "generation_config = GenerationConfig(max_new_tokens=10)\n", 1414 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)\n", 1415 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)\n", 1416 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)\n", 1417 | "\n", 1418 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1419 | "output = tokenizer.decode(\n", 1420 | " model.generate(\n", 1421 | " inputs[\"input_ids\"],\n", 1422 | " generation_config=generation_config,\n", 1423 | " )[0], \n", 1424 | " skip_special_tokens=True\n", 1425 | ")\n", 1426 | "\n", 1427 | "print(dash_line)\n", 1428 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')\n", 1429 | "print(dash_line)\n", 1430 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')" 1431 | ] 1432 | }, 1433 | { 1434 | "cell_type": "code", 1435 | "execution_count": 19, 1436 | "metadata": { 1437 | "tags": [] 1438 | }, 1439 | "outputs": [ 1440 | { 1441 | "name": "stdout", 1442 | "output_type": "stream", 1443 | "text": [ 1444 | "---------------------------------------------------------------------------------------------------\n", 1445 | "MODEL GENERATION - FEW SHOT:\n", 1446 | "#Person1 recommends upgrading the system, adding a painting program, adding a computer, adding a CD-ROM drive and adding a CD-ROM drive.\n", 1447 | "---------------------------------------------------------------------------------------------------\n", 1448 | "BASELINE HUMAN SUMMARY:\n", 1449 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1450 | "\n" 1451 | ] 1452 | } 1453 | ], 1454 | "source": [ 1455 | "# generation_config = GenerationConfig(max_new_tokens=50)\n", 1456 | "# generation_config = GenerationConfig(max_new_tokens=10)\n", 1457 | "generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)\n", 1458 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)\n", 1459 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)\n", 1460 | "\n", 1461 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1462 | "output = tokenizer.decode(\n", 1463 | " model.generate(\n", 1464 | " inputs[\"input_ids\"],\n", 1465 | " generation_config=generation_config,\n", 1466 | " )[0], \n", 1467 | " skip_special_tokens=True\n", 1468 | ")\n", 1469 | "\n", 1470 | "print(dash_line)\n", 1471 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')\n", 1472 | "print(dash_line)\n", 1473 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')" 1474 | ] 1475 | }, 1476 | { 1477 | "cell_type": "code", 1478 | "execution_count": 20, 1479 | "metadata": { 1480 | "tags": [] 1481 | }, 1482 | "outputs": [ 1483 | { 1484 | "name": "stdout", 1485 | "output_type": "stream", 1486 | "text": [ 1487 | "---------------------------------------------------------------------------------------------------\n", 1488 | "MODEL GENERATION - FEW SHOT:\n", 1489 | "#Person1 offers several suggestions on upgrading his system.\n", 1490 | "---------------------------------------------------------------------------------------------------\n", 1491 | "BASELINE HUMAN SUMMARY:\n", 1492 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1493 | "\n" 1494 | ] 1495 | } 1496 | ], 1497 | "source": [ 1498 | "# generation_config = GenerationConfig(max_new_tokens=50)\n", 1499 | "# generation_config = GenerationConfig(max_new_tokens=10)\n", 1500 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)\n", 1501 | "generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)\n", 1502 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)\n", 1503 | "\n", 1504 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1505 | "output = tokenizer.decode(\n", 1506 | " model.generate(\n", 1507 | " inputs[\"input_ids\"],\n", 1508 | " generation_config=generation_config,\n", 1509 | " )[0], \n", 1510 | " skip_special_tokens=True\n", 1511 | ")\n", 1512 | "\n", 1513 | "print(dash_line)\n", 1514 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')\n", 1515 | "print(dash_line)\n", 1516 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')" 1517 | ] 1518 | }, 1519 | { 1520 | "cell_type": "code", 1521 | "execution_count": 21, 1522 | "metadata": { 1523 | "tags": [] 1524 | }, 1525 | "outputs": [ 1526 | { 1527 | "name": "stdout", 1528 | "output_type": "stream", 1529 | "text": [ 1530 | "---------------------------------------------------------------------------------------------------\n", 1531 | "MODEL GENERATION - FEW SHOT:\n", 1532 | "There are several things you could change.\n", 1533 | "---------------------------------------------------------------------------------------------------\n", 1534 | "BASELINE HUMAN SUMMARY:\n", 1535 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1536 | "\n" 1537 | ] 1538 | } 1539 | ], 1540 | "source": [ 1541 | "# generation_config = GenerationConfig(max_new_tokens=50)\n", 1542 | "# generation_config = GenerationConfig(max_new_tokens=10)\n", 1543 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)\n", 1544 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)\n", 1545 | "generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)\n", 1546 | "\n", 1547 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1548 | "output = tokenizer.decode(\n", 1549 | " model.generate(\n", 1550 | " inputs[\"input_ids\"],\n", 1551 | " generation_config=generation_config,\n", 1552 | " )[0], \n", 1553 | " skip_special_tokens=True\n", 1554 | ")\n", 1555 | "\n", 1556 | "print(dash_line)\n", 1557 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')\n", 1558 | "print(dash_line)\n", 1559 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')" 1560 | ] 1561 | }, 1562 | { 1563 | "cell_type": "code", 1564 | "execution_count": null, 1565 | "metadata": {}, 1566 | "outputs": [], 1567 | "source": [] 1568 | } 1569 | ], 1570 | "metadata": { 1571 | "availableInstances": [ 1572 | { 1573 | "_defaultOrder": 0, 1574 | "_isFastLaunch": true, 1575 | "category": "General purpose", 1576 | "gpuNum": 0, 1577 | "hideHardwareSpecs": false, 1578 | "memoryGiB": 4, 1579 | "name": "ml.t3.medium", 1580 | "vcpuNum": 2 1581 | }, 1582 | { 1583 | "_defaultOrder": 1, 1584 | "_isFastLaunch": false, 1585 | "category": "General purpose", 1586 | "gpuNum": 0, 1587 | "hideHardwareSpecs": false, 1588 | "memoryGiB": 8, 1589 | "name": "ml.t3.large", 1590 | "vcpuNum": 2 1591 | }, 1592 | { 1593 | "_defaultOrder": 2, 1594 | "_isFastLaunch": false, 1595 | "category": "General purpose", 1596 | "gpuNum": 0, 1597 | "hideHardwareSpecs": false, 1598 | "memoryGiB": 16, 1599 | "name": "ml.t3.xlarge", 1600 | "vcpuNum": 4 1601 | }, 1602 | { 1603 | "_defaultOrder": 3, 1604 | "_isFastLaunch": false, 1605 | "category": "General purpose", 1606 | "gpuNum": 0, 1607 | "hideHardwareSpecs": false, 1608 | "memoryGiB": 32, 1609 | "name": "ml.t3.2xlarge", 1610 | "vcpuNum": 8 1611 | }, 1612 | { 1613 | "_defaultOrder": 4, 1614 | "_isFastLaunch": true, 1615 | "category": "General purpose", 1616 | "gpuNum": 0, 1617 | "hideHardwareSpecs": false, 1618 | "memoryGiB": 8, 1619 | "name": "ml.m5.large", 1620 | "vcpuNum": 2 1621 | }, 1622 | { 1623 | "_defaultOrder": 5, 1624 | "_isFastLaunch": false, 1625 | "category": "General purpose", 1626 | "gpuNum": 0, 1627 | "hideHardwareSpecs": false, 1628 | "memoryGiB": 16, 1629 | "name": "ml.m5.xlarge", 1630 | "vcpuNum": 4 1631 | }, 1632 | { 1633 | "_defaultOrder": 6, 1634 | "_isFastLaunch": false, 1635 | "category": "General purpose", 1636 | "gpuNum": 0, 1637 | "hideHardwareSpecs": false, 1638 | "memoryGiB": 32, 1639 | "name": "ml.m5.2xlarge", 1640 | "vcpuNum": 8 1641 | }, 1642 | { 1643 | "_defaultOrder": 7, 1644 | "_isFastLaunch": false, 1645 | "category": "General purpose", 1646 | "gpuNum": 0, 1647 | "hideHardwareSpecs": false, 1648 | "memoryGiB": 64, 1649 | "name": "ml.m5.4xlarge", 1650 | "vcpuNum": 16 1651 | }, 1652 | { 1653 | "_defaultOrder": 8, 1654 | "_isFastLaunch": false, 1655 | "category": "General purpose", 1656 | "gpuNum": 0, 1657 | "hideHardwareSpecs": false, 1658 | "memoryGiB": 128, 1659 | "name": "ml.m5.8xlarge", 1660 | "vcpuNum": 32 1661 | }, 1662 | { 1663 | "_defaultOrder": 9, 1664 | "_isFastLaunch": false, 1665 | "category": "General purpose", 1666 | "gpuNum": 0, 1667 | "hideHardwareSpecs": false, 1668 | "memoryGiB": 192, 1669 | "name": "ml.m5.12xlarge", 1670 | "vcpuNum": 48 1671 | }, 1672 | { 1673 | "_defaultOrder": 10, 1674 | "_isFastLaunch": false, 1675 | "category": "General purpose", 1676 | "gpuNum": 0, 1677 | "hideHardwareSpecs": false, 1678 | "memoryGiB": 256, 1679 | "name": "ml.m5.16xlarge", 1680 | "vcpuNum": 64 1681 | }, 1682 | { 1683 | "_defaultOrder": 11, 1684 | "_isFastLaunch": false, 1685 | "category": "General purpose", 1686 | "gpuNum": 0, 1687 | "hideHardwareSpecs": false, 1688 | "memoryGiB": 384, 1689 | "name": "ml.m5.24xlarge", 1690 | "vcpuNum": 96 1691 | }, 1692 | { 1693 | "_defaultOrder": 12, 1694 | "_isFastLaunch": false, 1695 | "category": "General purpose", 1696 | "gpuNum": 0, 1697 | "hideHardwareSpecs": false, 1698 | "memoryGiB": 8, 1699 | "name": "ml.m5d.large", 1700 | "vcpuNum": 2 1701 | }, 1702 | { 1703 | "_defaultOrder": 13, 1704 | "_isFastLaunch": false, 1705 | "category": "General purpose", 1706 | "gpuNum": 0, 1707 | "hideHardwareSpecs": false, 1708 | "memoryGiB": 16, 1709 | "name": "ml.m5d.xlarge", 1710 | "vcpuNum": 4 1711 | }, 1712 | { 1713 | "_defaultOrder": 14, 1714 | "_isFastLaunch": false, 1715 | "category": "General purpose", 1716 | "gpuNum": 0, 1717 | "hideHardwareSpecs": false, 1718 | "memoryGiB": 32, 1719 | "name": "ml.m5d.2xlarge", 1720 | "vcpuNum": 8 1721 | }, 1722 | { 1723 | "_defaultOrder": 15, 1724 | "_isFastLaunch": false, 1725 | "category": "General purpose", 1726 | "gpuNum": 0, 1727 | "hideHardwareSpecs": false, 1728 | "memoryGiB": 64, 1729 | "name": "ml.m5d.4xlarge", 1730 | "vcpuNum": 16 1731 | }, 1732 | { 1733 | "_defaultOrder": 16, 1734 | "_isFastLaunch": false, 1735 | "category": "General purpose", 1736 | "gpuNum": 0, 1737 | "hideHardwareSpecs": false, 1738 | "memoryGiB": 128, 1739 | "name": "ml.m5d.8xlarge", 1740 | "vcpuNum": 32 1741 | }, 1742 | { 1743 | "_defaultOrder": 17, 1744 | "_isFastLaunch": false, 1745 | "category": "General purpose", 1746 | "gpuNum": 0, 1747 | "hideHardwareSpecs": false, 1748 | "memoryGiB": 192, 1749 | "name": "ml.m5d.12xlarge", 1750 | "vcpuNum": 48 1751 | }, 1752 | { 1753 | "_defaultOrder": 18, 1754 | "_isFastLaunch": false, 1755 | "category": "General purpose", 1756 | "gpuNum": 0, 1757 | "hideHardwareSpecs": false, 1758 | "memoryGiB": 256, 1759 | "name": "ml.m5d.16xlarge", 1760 | "vcpuNum": 64 1761 | }, 1762 | { 1763 | "_defaultOrder": 19, 1764 | "_isFastLaunch": false, 1765 | "category": "General purpose", 1766 | "gpuNum": 0, 1767 | "hideHardwareSpecs": false, 1768 | "memoryGiB": 384, 1769 | "name": "ml.m5d.24xlarge", 1770 | "vcpuNum": 96 1771 | }, 1772 | { 1773 | "_defaultOrder": 20, 1774 | "_isFastLaunch": false, 1775 | "category": "General purpose", 1776 | "gpuNum": 0, 1777 | "hideHardwareSpecs": true, 1778 | "memoryGiB": 0, 1779 | "name": "ml.geospatial.interactive", 1780 | "supportedImageNames": [ 1781 | "sagemaker-geospatial-v1-0" 1782 | ], 1783 | "vcpuNum": 0 1784 | }, 1785 | { 1786 | "_defaultOrder": 21, 1787 | "_isFastLaunch": true, 1788 | "category": "Compute optimized", 1789 | "gpuNum": 0, 1790 | "hideHardwareSpecs": false, 1791 | "memoryGiB": 4, 1792 | "name": "ml.c5.large", 1793 | "vcpuNum": 2 1794 | }, 1795 | { 1796 | "_defaultOrder": 22, 1797 | "_isFastLaunch": false, 1798 | "category": "Compute optimized", 1799 | "gpuNum": 0, 1800 | "hideHardwareSpecs": false, 1801 | "memoryGiB": 8, 1802 | "name": "ml.c5.xlarge", 1803 | "vcpuNum": 4 1804 | }, 1805 | { 1806 | "_defaultOrder": 23, 1807 | "_isFastLaunch": false, 1808 | "category": "Compute optimized", 1809 | "gpuNum": 0, 1810 | "hideHardwareSpecs": false, 1811 | "memoryGiB": 16, 1812 | "name": "ml.c5.2xlarge", 1813 | "vcpuNum": 8 1814 | }, 1815 | { 1816 | "_defaultOrder": 24, 1817 | "_isFastLaunch": false, 1818 | "category": "Compute optimized", 1819 | "gpuNum": 0, 1820 | "hideHardwareSpecs": false, 1821 | "memoryGiB": 32, 1822 | "name": "ml.c5.4xlarge", 1823 | "vcpuNum": 16 1824 | }, 1825 | { 1826 | "_defaultOrder": 25, 1827 | "_isFastLaunch": false, 1828 | "category": "Compute optimized", 1829 | "gpuNum": 0, 1830 | "hideHardwareSpecs": false, 1831 | "memoryGiB": 72, 1832 | "name": "ml.c5.9xlarge", 1833 | "vcpuNum": 36 1834 | }, 1835 | { 1836 | "_defaultOrder": 26, 1837 | "_isFastLaunch": false, 1838 | "category": "Compute optimized", 1839 | "gpuNum": 0, 1840 | "hideHardwareSpecs": false, 1841 | "memoryGiB": 96, 1842 | "name": "ml.c5.12xlarge", 1843 | "vcpuNum": 48 1844 | }, 1845 | { 1846 | "_defaultOrder": 27, 1847 | "_isFastLaunch": false, 1848 | "category": "Compute optimized", 1849 | "gpuNum": 0, 1850 | "hideHardwareSpecs": false, 1851 | "memoryGiB": 144, 1852 | "name": "ml.c5.18xlarge", 1853 | "vcpuNum": 72 1854 | }, 1855 | { 1856 | "_defaultOrder": 28, 1857 | "_isFastLaunch": false, 1858 | "category": "Compute optimized", 1859 | "gpuNum": 0, 1860 | "hideHardwareSpecs": false, 1861 | "memoryGiB": 192, 1862 | "name": "ml.c5.24xlarge", 1863 | "vcpuNum": 96 1864 | }, 1865 | { 1866 | "_defaultOrder": 29, 1867 | "_isFastLaunch": true, 1868 | "category": "Accelerated computing", 1869 | "gpuNum": 1, 1870 | "hideHardwareSpecs": false, 1871 | "memoryGiB": 16, 1872 | "name": "ml.g4dn.xlarge", 1873 | "vcpuNum": 4 1874 | }, 1875 | { 1876 | "_defaultOrder": 30, 1877 | "_isFastLaunch": false, 1878 | "category": "Accelerated computing", 1879 | "gpuNum": 1, 1880 | "hideHardwareSpecs": false, 1881 | "memoryGiB": 32, 1882 | "name": "ml.g4dn.2xlarge", 1883 | "vcpuNum": 8 1884 | }, 1885 | { 1886 | "_defaultOrder": 31, 1887 | "_isFastLaunch": false, 1888 | "category": "Accelerated computing", 1889 | "gpuNum": 1, 1890 | "hideHardwareSpecs": false, 1891 | "memoryGiB": 64, 1892 | "name": "ml.g4dn.4xlarge", 1893 | "vcpuNum": 16 1894 | }, 1895 | { 1896 | "_defaultOrder": 32, 1897 | "_isFastLaunch": false, 1898 | "category": "Accelerated computing", 1899 | "gpuNum": 1, 1900 | "hideHardwareSpecs": false, 1901 | "memoryGiB": 128, 1902 | "name": "ml.g4dn.8xlarge", 1903 | "vcpuNum": 32 1904 | }, 1905 | { 1906 | "_defaultOrder": 33, 1907 | "_isFastLaunch": false, 1908 | "category": "Accelerated computing", 1909 | "gpuNum": 4, 1910 | "hideHardwareSpecs": false, 1911 | "memoryGiB": 192, 1912 | "name": "ml.g4dn.12xlarge", 1913 | "vcpuNum": 48 1914 | }, 1915 | { 1916 | "_defaultOrder": 34, 1917 | "_isFastLaunch": false, 1918 | "category": "Accelerated computing", 1919 | "gpuNum": 1, 1920 | "hideHardwareSpecs": false, 1921 | "memoryGiB": 256, 1922 | "name": "ml.g4dn.16xlarge", 1923 | "vcpuNum": 64 1924 | }, 1925 | { 1926 | "_defaultOrder": 35, 1927 | "_isFastLaunch": false, 1928 | "category": "Accelerated computing", 1929 | "gpuNum": 1, 1930 | "hideHardwareSpecs": false, 1931 | "memoryGiB": 61, 1932 | "name": "ml.p3.2xlarge", 1933 | "vcpuNum": 8 1934 | }, 1935 | { 1936 | "_defaultOrder": 36, 1937 | "_isFastLaunch": false, 1938 | "category": "Accelerated computing", 1939 | "gpuNum": 4, 1940 | "hideHardwareSpecs": false, 1941 | "memoryGiB": 244, 1942 | "name": "ml.p3.8xlarge", 1943 | "vcpuNum": 32 1944 | }, 1945 | { 1946 | "_defaultOrder": 37, 1947 | "_isFastLaunch": false, 1948 | "category": "Accelerated computing", 1949 | "gpuNum": 8, 1950 | "hideHardwareSpecs": false, 1951 | "memoryGiB": 488, 1952 | "name": "ml.p3.16xlarge", 1953 | "vcpuNum": 64 1954 | }, 1955 | { 1956 | "_defaultOrder": 38, 1957 | "_isFastLaunch": false, 1958 | "category": "Accelerated computing", 1959 | "gpuNum": 8, 1960 | "hideHardwareSpecs": false, 1961 | "memoryGiB": 768, 1962 | "name": "ml.p3dn.24xlarge", 1963 | "vcpuNum": 96 1964 | }, 1965 | { 1966 | "_defaultOrder": 39, 1967 | "_isFastLaunch": false, 1968 | "category": "Memory Optimized", 1969 | "gpuNum": 0, 1970 | "hideHardwareSpecs": false, 1971 | "memoryGiB": 16, 1972 | "name": "ml.r5.large", 1973 | "vcpuNum": 2 1974 | }, 1975 | { 1976 | "_defaultOrder": 40, 1977 | "_isFastLaunch": false, 1978 | "category": "Memory Optimized", 1979 | "gpuNum": 0, 1980 | "hideHardwareSpecs": false, 1981 | "memoryGiB": 32, 1982 | "name": "ml.r5.xlarge", 1983 | "vcpuNum": 4 1984 | }, 1985 | { 1986 | "_defaultOrder": 41, 1987 | "_isFastLaunch": false, 1988 | "category": "Memory Optimized", 1989 | "gpuNum": 0, 1990 | "hideHardwareSpecs": false, 1991 | "memoryGiB": 64, 1992 | "name": "ml.r5.2xlarge", 1993 | "vcpuNum": 8 1994 | }, 1995 | { 1996 | "_defaultOrder": 42, 1997 | "_isFastLaunch": false, 1998 | "category": "Memory Optimized", 1999 | "gpuNum": 0, 2000 | "hideHardwareSpecs": false, 2001 | "memoryGiB": 128, 2002 | "name": "ml.r5.4xlarge", 2003 | "vcpuNum": 16 2004 | }, 2005 | { 2006 | "_defaultOrder": 43, 2007 | "_isFastLaunch": false, 2008 | "category": "Memory Optimized", 2009 | "gpuNum": 0, 2010 | "hideHardwareSpecs": false, 2011 | "memoryGiB": 256, 2012 | "name": "ml.r5.8xlarge", 2013 | "vcpuNum": 32 2014 | }, 2015 | { 2016 | "_defaultOrder": 44, 2017 | "_isFastLaunch": false, 2018 | "category": "Memory Optimized", 2019 | "gpuNum": 0, 2020 | "hideHardwareSpecs": false, 2021 | "memoryGiB": 384, 2022 | "name": "ml.r5.12xlarge", 2023 | "vcpuNum": 48 2024 | }, 2025 | { 2026 | "_defaultOrder": 45, 2027 | "_isFastLaunch": false, 2028 | "category": "Memory Optimized", 2029 | "gpuNum": 0, 2030 | "hideHardwareSpecs": false, 2031 | "memoryGiB": 512, 2032 | "name": "ml.r5.16xlarge", 2033 | "vcpuNum": 64 2034 | }, 2035 | { 2036 | "_defaultOrder": 46, 2037 | "_isFastLaunch": false, 2038 | "category": "Memory Optimized", 2039 | "gpuNum": 0, 2040 | "hideHardwareSpecs": false, 2041 | "memoryGiB": 768, 2042 | "name": "ml.r5.24xlarge", 2043 | "vcpuNum": 96 2044 | }, 2045 | { 2046 | "_defaultOrder": 47, 2047 | "_isFastLaunch": false, 2048 | "category": "Accelerated computing", 2049 | "gpuNum": 1, 2050 | "hideHardwareSpecs": false, 2051 | "memoryGiB": 16, 2052 | "name": "ml.g5.xlarge", 2053 | "vcpuNum": 4 2054 | }, 2055 | { 2056 | "_defaultOrder": 48, 2057 | "_isFastLaunch": false, 2058 | "category": "Accelerated computing", 2059 | "gpuNum": 1, 2060 | "hideHardwareSpecs": false, 2061 | "memoryGiB": 32, 2062 | "name": "ml.g5.2xlarge", 2063 | "vcpuNum": 8 2064 | }, 2065 | { 2066 | "_defaultOrder": 49, 2067 | "_isFastLaunch": false, 2068 | "category": "Accelerated computing", 2069 | "gpuNum": 1, 2070 | "hideHardwareSpecs": false, 2071 | "memoryGiB": 64, 2072 | "name": "ml.g5.4xlarge", 2073 | "vcpuNum": 16 2074 | }, 2075 | { 2076 | "_defaultOrder": 50, 2077 | "_isFastLaunch": false, 2078 | "category": "Accelerated computing", 2079 | "gpuNum": 1, 2080 | "hideHardwareSpecs": false, 2081 | "memoryGiB": 128, 2082 | "name": "ml.g5.8xlarge", 2083 | "vcpuNum": 32 2084 | }, 2085 | { 2086 | "_defaultOrder": 51, 2087 | "_isFastLaunch": false, 2088 | "category": "Accelerated computing", 2089 | "gpuNum": 1, 2090 | "hideHardwareSpecs": false, 2091 | "memoryGiB": 256, 2092 | "name": "ml.g5.16xlarge", 2093 | "vcpuNum": 64 2094 | }, 2095 | { 2096 | "_defaultOrder": 52, 2097 | "_isFastLaunch": false, 2098 | "category": "Accelerated computing", 2099 | "gpuNum": 4, 2100 | "hideHardwareSpecs": false, 2101 | "memoryGiB": 192, 2102 | "name": "ml.g5.12xlarge", 2103 | "vcpuNum": 48 2104 | }, 2105 | { 2106 | "_defaultOrder": 53, 2107 | "_isFastLaunch": false, 2108 | "category": "Accelerated computing", 2109 | "gpuNum": 4, 2110 | "hideHardwareSpecs": false, 2111 | "memoryGiB": 384, 2112 | "name": "ml.g5.24xlarge", 2113 | "vcpuNum": 96 2114 | }, 2115 | { 2116 | "_defaultOrder": 54, 2117 | "_isFastLaunch": false, 2118 | "category": "Accelerated computing", 2119 | "gpuNum": 8, 2120 | "hideHardwareSpecs": false, 2121 | "memoryGiB": 768, 2122 | "name": "ml.g5.48xlarge", 2123 | "vcpuNum": 192 2124 | }, 2125 | { 2126 | "_defaultOrder": 55, 2127 | "_isFastLaunch": false, 2128 | "category": "Accelerated computing", 2129 | "gpuNum": 8, 2130 | "hideHardwareSpecs": false, 2131 | "memoryGiB": 1152, 2132 | "name": "ml.p4d.24xlarge", 2133 | "vcpuNum": 96 2134 | }, 2135 | { 2136 | "_defaultOrder": 56, 2137 | "_isFastLaunch": false, 2138 | "category": "Accelerated computing", 2139 | "gpuNum": 8, 2140 | "hideHardwareSpecs": false, 2141 | "memoryGiB": 1152, 2142 | "name": "ml.p4de.24xlarge", 2143 | "vcpuNum": 96 2144 | }, 2145 | { 2146 | "_defaultOrder": 57, 2147 | "_isFastLaunch": false, 2148 | "category": "Accelerated computing", 2149 | "gpuNum": 0, 2150 | "hideHardwareSpecs": false, 2151 | "memoryGiB": 32, 2152 | "name": "ml.trn1.2xlarge", 2153 | "vcpuNum": 8 2154 | }, 2155 | { 2156 | "_defaultOrder": 58, 2157 | "_isFastLaunch": false, 2158 | "category": "Accelerated computing", 2159 | "gpuNum": 0, 2160 | "hideHardwareSpecs": false, 2161 | "memoryGiB": 512, 2162 | "name": "ml.trn1.32xlarge", 2163 | "vcpuNum": 128 2164 | }, 2165 | { 2166 | "_defaultOrder": 59, 2167 | "_isFastLaunch": false, 2168 | "category": "Accelerated computing", 2169 | "gpuNum": 0, 2170 | "hideHardwareSpecs": false, 2171 | "memoryGiB": 512, 2172 | "name": "ml.trn1n.32xlarge", 2173 | "vcpuNum": 128 2174 | } 2175 | ], 2176 | "instance_type": "ml.m5.2xlarge", 2177 | "kernelspec": { 2178 | "display_name": "Python 3 (ipykernel)", 2179 | "language": "python", 2180 | "name": "python3" 2181 | }, 2182 | "language_info": { 2183 | "codemirror_mode": { 2184 | "name": "ipython", 2185 | "version": 3 2186 | }, 2187 | "file_extension": ".py", 2188 | "mimetype": "text/x-python", 2189 | "name": "python", 2190 | "nbconvert_exporter": "python", 2191 | "pygments_lexer": "ipython3", 2192 | "version": "3.8.17" 2193 | } 2194 | }, 2195 | "nbformat": 4, 2196 | "nbformat_minor": 4 2197 | } 2198 | -------------------------------------------------------------------------------- /1. Generative AI Use Case - Summarize Dialogue/.ipynb_checkpoints/Lab_1_summarize_dialogue-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Generative AI Use Case: Summarize Dialogue\n", 8 | "\n", 9 | "Welcome to the practical side of this course. In this lab you will do the dialogue summarization task using generative AI. You will explore how the input text affects the output of the model, and perform prompt engineering to direct it towards the task you need. By comparing zero shot, one shot, and few shot inferences, you will take the first step towards prompt engineering and see how it can enhance the generative output of Large Language Models." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Table of Contents" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "- [ 1 - Set up Kernel and Required Dependencies](#1)\n", 24 | "- [ 2 - Summarize Dialogue without Prompt Engineering](#2)\n", 25 | "- [ 3 - Summarize Dialogue with an Instruction Prompt](#3)\n", 26 | " - [ 3.1 - Zero Shot Inference with an Instruction Prompt](#3.1)\n", 27 | " - [ 3.2 - Zero Shot Inference with the Prompt Template from FLAN-T5](#3.2)\n", 28 | "- [ 4 - Summarize Dialogue with One Shot and Few Shot Inference](#4)\n", 29 | " - [ 4.1 - One Shot Inference](#4.1)\n", 30 | " - [ 4.2 - Few Shot Inference](#4.2)\n", 31 | "- [ 5 - Generative Configuration Parameters for Inference](#5)\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "\n", 39 | "## 1 - Set up Kernel and Required Dependencies" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": { 45 | "tags": [] 46 | }, 47 | "source": [ 48 | "First, check that the correct kernel is chosen.\n", 49 | "\n", 50 | "\n", 51 | "\n", 52 | "You can click on that (top right of the screen) to see and check the details of the image, kernel, and instance type.\n", 53 | "\n", 54 | "\n", 55 | "\n", 56 | "\"Time\n" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": { 62 | "tags": [] 63 | }, 64 | "source": [ 65 | "Now install the required packages to use PyTorch and Hugging Face transformers and datasets.\n", 66 | "\n", 67 | "\"Time" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 2, 73 | "metadata": { 74 | "tags": [] 75 | }, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "Requirement already satisfied: pip in /opt/conda/lib/python3.7/site-packages (23.2.1)\n", 82 | "Collecting pip\n", 83 | " Obtaining dependency information for pip from https://files.pythonhosted.org/packages/47/6a/453160888fab7c6a432a6e25f8afe6256d0d9f2cbd25971021da6491d899/pip-23.3.1-py3-none-any.whl.metadata\n", 84 | " Downloading pip-23.3.1-py3-none-any.whl.metadata (3.5 kB)\n", 85 | "Downloading pip-23.3.1-py3-none-any.whl (2.1 MB)\n", 86 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", 87 | "\u001b[?25h\u001b[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 23.3 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", 88 | "\u001b[0mInstalling collected packages: pip\n", 89 | " Attempting uninstall: pip\n", 90 | " Found existing installation: pip 23.2.1\n", 91 | " Uninstalling pip-23.2.1:\n", 92 | " Successfully uninstalled pip-23.2.1\n", 93 | "Successfully installed pip-23.3.1\n", 94 | "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", 95 | "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n", 96 | "\u001b[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", 97 | "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", 98 | "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n", 99 | "\u001b[33mDEPRECATION: pyodbc 4.0.0-unsupported has a non-standard version number. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pyodbc or contact the author to suggest that they release a version with a conforming version number. Discussion can be found at https://github.com/pypa/pip/issues/12063\u001b[0m\u001b[33m\n", 100 | "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", 101 | "pytest-astropy 0.8.0 requires pytest-cov>=2.0, which is not installed.\n", 102 | "pytest-astropy 0.8.0 requires pytest-filter-subpackage>=0.1, which is not installed.\n", 103 | "spyder 4.0.1 requires pyqt5<5.13; python_version >= \"3\", which is not installed.\n", 104 | "spyder 4.0.1 requires pyqtwebengine<5.13; python_version >= \"3\", which is not installed.\n", 105 | "notebook 6.5.6 requires pyzmq<25,>=17, but you have pyzmq 25.1.1 which is incompatible.\n", 106 | "pathos 0.3.1 requires dill>=0.3.7, but you have dill 0.3.6 which is incompatible.\n", 107 | "pathos 0.3.1 requires multiprocess>=0.70.15, but you have multiprocess 0.70.14 which is incompatible.\n", 108 | "sparkmagic 0.20.4 requires nest-asyncio==1.5.5, but you have nest-asyncio 1.5.8 which is incompatible.\n", 109 | "spyder 4.0.1 requires jedi==0.14.1, but you have jedi 0.19.1 which is incompatible.\u001b[0m\u001b[31m\n", 110 | "\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", 111 | "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "%pip install --upgrade pip\n", 117 | "%pip install --disable-pip-version-check \\\n", 118 | " torch==1.13.1 \\\n", 119 | " torchdata==0.5.1 --quiet\n", 120 | "\n", 121 | "%pip install \\\n", 122 | " transformers==4.27.2 \\\n", 123 | " datasets==2.11.0 --quiet" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": { 129 | "tags": [] 130 | }, 131 | "source": [ 132 | "\"Time" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": { 138 | "tags": [] 139 | }, 140 | "source": [ 141 | "Load the datasets, Large Language Model (LLM), tokenizer, and configurator. Do not worry if you do not understand yet all of those components - they will be described and discussed later in the notebook." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 3, 147 | "metadata": { 148 | "tags": [] 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "from datasets import load_dataset\n", 153 | "from transformers import AutoModelForSeq2SeqLM\n", 154 | "from transformers import AutoTokenizer\n", 155 | "from transformers import GenerationConfig" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "\n", 163 | "## 2 - Summarize Dialogue without Prompt Engineering\n", 164 | "\n", 165 | "In this use case, you will be generating a summary of a dialogue with the pre-trained Large Language Model (LLM) FLAN-T5 from Hugging Face. The list of available models in the Hugging Face `transformers` package can be found [here](https://huggingface.co/docs/transformers/index). \n", 166 | "\n", 167 | "Let's upload some simple dialogues from the [DialogSum](https://huggingface.co/datasets/knkarthick/dialogsum) Hugging Face dataset. This dataset contains 10,000+ dialogues with the corresponding manually labeled summaries and topics. " 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 4, 173 | "metadata": { 174 | "tags": [] 175 | }, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "application/vnd.jupyter.widget-view+json": { 180 | "model_id": "c5e9b04ddd23473289ed47c50dcf81e7", 181 | "version_major": 2, 182 | "version_minor": 0 183 | }, 184 | "text/plain": [ 185 | "Downloading readme: 0%| | 0.00/4.65k [00:00\n", 697 | "## 3 - Summarize Dialogue with an Instruction Prompt\n", 698 | "\n", 699 | "Prompt engineering is an important concept in using foundation models for text generation. You can check out [this blog](https://www.amazon.science/blog/emnlp-prompt-engineering-is-the-new-feature-engineering) from Amazon Science for a quick introduction to prompt engineering." 700 | ] 701 | }, 702 | { 703 | "cell_type": "markdown", 704 | "metadata": {}, 705 | "source": [ 706 | "\n", 707 | "### 3.1 - Zero Shot Inference with an Instruction Prompt\n", 708 | "\n", 709 | "In order to instruct the model to perform a task - summarize a dialogue - you can take the dialogue and convert it into an instruction prompt. This is often called **zero shot inference**. You can check out [this blog from AWS](https://aws.amazon.com/blogs/machine-learning/zero-shot-prompting-for-the-flan-t5-foundation-model-in-amazon-sagemaker-jumpstart/) for a quick description of what zero shot learning is and why it is an important concept to the LLM model.\n", 710 | "\n", 711 | "Wrap the dialogue in a descriptive instruction and see how the generated text will change:" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": 10, 717 | "metadata": { 718 | "tags": [] 719 | }, 720 | "outputs": [ 721 | { 722 | "name": "stdout", 723 | "output_type": "stream", 724 | "text": [ 725 | "---------------------------------------------------------------------------------------------------\n", 726 | "Example 1\n", 727 | "---------------------------------------------------------------------------------------------------\n", 728 | "INPUT PROMPT:\n", 729 | "\n", 730 | "Summarize the following conversation.\n", 731 | "\n", 732 | "#Person1#: What time is it, Tom?\n", 733 | "#Person2#: Just a minute. It's ten to nine by my watch.\n", 734 | "#Person1#: Is it? I had no idea it was so late. I must be off now.\n", 735 | "#Person2#: What's the hurry?\n", 736 | "#Person1#: I must catch the nine-thirty train.\n", 737 | "#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.\n", 738 | "\n", 739 | "Summary:\n", 740 | " \n", 741 | "---------------------------------------------------------------------------------------------------\n", 742 | "BASELINE HUMAN SUMMARY:\n", 743 | "#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.\n", 744 | "---------------------------------------------------------------------------------------------------\n", 745 | "MODEL GENERATION - ZERO SHOT:\n", 746 | "The train is about to leave.\n", 747 | "\n", 748 | "---------------------------------------------------------------------------------------------------\n", 749 | "Example 2\n", 750 | "---------------------------------------------------------------------------------------------------\n", 751 | "INPUT PROMPT:\n", 752 | "\n", 753 | "Summarize the following conversation.\n", 754 | "\n", 755 | "#Person1#: Have you considered upgrading your system?\n", 756 | "#Person2#: Yes, but I'm not sure what exactly I would need.\n", 757 | "#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.\n", 758 | "#Person2#: That would be a definite bonus.\n", 759 | "#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.\n", 760 | "#Person2#: How can we do that?\n", 761 | "#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?\n", 762 | "#Person2#: No.\n", 763 | "#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.\n", 764 | "#Person2#: That sounds great. Thanks.\n", 765 | "\n", 766 | "Summary:\n", 767 | " \n", 768 | "---------------------------------------------------------------------------------------------------\n", 769 | "BASELINE HUMAN SUMMARY:\n", 770 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 771 | "---------------------------------------------------------------------------------------------------\n", 772 | "MODEL GENERATION - ZERO SHOT:\n", 773 | "#Person1#: I'm thinking of upgrading my computer.\n", 774 | "\n" 775 | ] 776 | } 777 | ], 778 | "source": [ 779 | "for i, index in enumerate(example_indices):\n", 780 | " dialogue = dataset['test'][index]['dialogue']\n", 781 | " summary = dataset['test'][index]['summary']\n", 782 | "\n", 783 | " prompt = f\"\"\"\n", 784 | "Summarize the following conversation.\n", 785 | "\n", 786 | "{dialogue}\n", 787 | "\n", 788 | "Summary:\n", 789 | " \"\"\"\n", 790 | "\n", 791 | " # Input constructed prompt instead of the dialogue.\n", 792 | " inputs = tokenizer(prompt, return_tensors='pt')\n", 793 | " output = tokenizer.decode(\n", 794 | " model.generate(\n", 795 | " inputs[\"input_ids\"], \n", 796 | " max_new_tokens=50,\n", 797 | " )[0], \n", 798 | " skip_special_tokens=True\n", 799 | " )\n", 800 | " \n", 801 | " print(dash_line)\n", 802 | " print('Example ', i + 1)\n", 803 | " print(dash_line)\n", 804 | " print(f'INPUT PROMPT:\\n{prompt}')\n", 805 | " print(dash_line)\n", 806 | " print(f'BASELINE HUMAN SUMMARY:\\n{summary}')\n", 807 | " print(dash_line) \n", 808 | " print(f'MODEL GENERATION - ZERO SHOT:\\n{output}\\n')" 809 | ] 810 | }, 811 | { 812 | "cell_type": "markdown", 813 | "metadata": {}, 814 | "source": [ 815 | "This is much better! But the model still does not pick up on the nuance of the conversations though." 816 | ] 817 | }, 818 | { 819 | "cell_type": "markdown", 820 | "metadata": {}, 821 | "source": [ 822 | "**Exercise:**\n", 823 | "\n", 824 | "- Experiment with the `prompt` text and see how the inferences will be changed. Will the inferences change if you end the prompt with just empty string vs. `Summary: `?\n", 825 | "- Try to rephrase the beginning of the `prompt` text from `Summarize the following conversation.` to something different - and see how it will influence the generated output." 826 | ] 827 | }, 828 | { 829 | "cell_type": "markdown", 830 | "metadata": {}, 831 | "source": [ 832 | "\n", 833 | "### 3.2 - Zero Shot Inference with the Prompt Template from FLAN-T5\n", 834 | "\n", 835 | "Let's use a slightly different prompt. FLAN-T5 has many prompt templates that are published for certain tasks [here](https://github.com/google-research/FLAN/tree/main/flan/v2). In the following code, you will use one of the [pre-built FLAN-T5 prompts](https://github.com/google-research/FLAN/blob/main/flan/v2/templates.py):" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": 11, 841 | "metadata": { 842 | "tags": [] 843 | }, 844 | "outputs": [ 845 | { 846 | "name": "stdout", 847 | "output_type": "stream", 848 | "text": [ 849 | "---------------------------------------------------------------------------------------------------\n", 850 | "Example 1\n", 851 | "---------------------------------------------------------------------------------------------------\n", 852 | "INPUT PROMPT:\n", 853 | "\n", 854 | "Dialogue:\n", 855 | "\n", 856 | "#Person1#: What time is it, Tom?\n", 857 | "#Person2#: Just a minute. It's ten to nine by my watch.\n", 858 | "#Person1#: Is it? I had no idea it was so late. I must be off now.\n", 859 | "#Person2#: What's the hurry?\n", 860 | "#Person1#: I must catch the nine-thirty train.\n", 861 | "#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.\n", 862 | "\n", 863 | "What was going on?\n", 864 | "\n", 865 | "---------------------------------------------------------------------------------------------------\n", 866 | "BASELINE HUMAN SUMMARY:\n", 867 | "#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.\n", 868 | "\n", 869 | "---------------------------------------------------------------------------------------------------\n", 870 | "MODEL GENERATION - ZERO SHOT:\n", 871 | "Tom is late for the train.\n", 872 | "\n", 873 | "---------------------------------------------------------------------------------------------------\n", 874 | "Example 2\n", 875 | "---------------------------------------------------------------------------------------------------\n", 876 | "INPUT PROMPT:\n", 877 | "\n", 878 | "Dialogue:\n", 879 | "\n", 880 | "#Person1#: Have you considered upgrading your system?\n", 881 | "#Person2#: Yes, but I'm not sure what exactly I would need.\n", 882 | "#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.\n", 883 | "#Person2#: That would be a definite bonus.\n", 884 | "#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.\n", 885 | "#Person2#: How can we do that?\n", 886 | "#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?\n", 887 | "#Person2#: No.\n", 888 | "#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.\n", 889 | "#Person2#: That sounds great. Thanks.\n", 890 | "\n", 891 | "What was going on?\n", 892 | "\n", 893 | "---------------------------------------------------------------------------------------------------\n", 894 | "BASELINE HUMAN SUMMARY:\n", 895 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 896 | "\n", 897 | "---------------------------------------------------------------------------------------------------\n", 898 | "MODEL GENERATION - ZERO SHOT:\n", 899 | "#Person1#: You could add a painting program to your software. #Person2#: That would be a bonus. #Person1#: You might also want to upgrade your hardware. #Person1#\n", 900 | "\n" 901 | ] 902 | } 903 | ], 904 | "source": [ 905 | "for i, index in enumerate(example_indices):\n", 906 | " dialogue = dataset['test'][index]['dialogue']\n", 907 | " summary = dataset['test'][index]['summary']\n", 908 | " \n", 909 | " prompt = f\"\"\"\n", 910 | "Dialogue:\n", 911 | "\n", 912 | "{dialogue}\n", 913 | "\n", 914 | "What was going on?\n", 915 | "\"\"\"\n", 916 | "\n", 917 | " inputs = tokenizer(prompt, return_tensors='pt')\n", 918 | " output = tokenizer.decode(\n", 919 | " model.generate(\n", 920 | " inputs[\"input_ids\"], \n", 921 | " max_new_tokens=50,\n", 922 | " )[0], \n", 923 | " skip_special_tokens=True\n", 924 | " )\n", 925 | "\n", 926 | " print(dash_line)\n", 927 | " print('Example ', i + 1)\n", 928 | " print(dash_line)\n", 929 | " print(f'INPUT PROMPT:\\n{prompt}')\n", 930 | " print(dash_line)\n", 931 | " print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')\n", 932 | " print(dash_line)\n", 933 | " print(f'MODEL GENERATION - ZERO SHOT:\\n{output}\\n')" 934 | ] 935 | }, 936 | { 937 | "cell_type": "markdown", 938 | "metadata": {}, 939 | "source": [ 940 | "Notice that this prompt from FLAN-T5 did help a bit, but still struggles to pick up on the nuance of the conversation. This is what you will try to solve with the few shot inferencing." 941 | ] 942 | }, 943 | { 944 | "cell_type": "markdown", 945 | "metadata": {}, 946 | "source": [ 947 | "\n", 948 | "## 4 - Summarize Dialogue with One Shot and Few Shot Inference\n", 949 | "\n", 950 | "**One shot and few shot inference** are the practices of providing an LLM with either one or more full examples of prompt-response pairs that match your task - before your actual prompt that you want completed. This is called \"in-context learning\" and puts your model into a state that understands your specific task. You can read more about it in [this blog from HuggingFace](https://huggingface.co/blog/few-shot-learning-gpt-neo-and-inference-api)." 951 | ] 952 | }, 953 | { 954 | "cell_type": "markdown", 955 | "metadata": { 956 | "tags": [] 957 | }, 958 | "source": [ 959 | "\n", 960 | "### 4.1 - One Shot Inference\n", 961 | "\n", 962 | "Let's build a function that takes a list of `example_indices_full`, generates a prompt with full examples, then at the end appends the prompt which you want the model to complete (`example_index_to_summarize`). You will use the same FLAN-T5 prompt template from section [3.2](#3.2). " 963 | ] 964 | }, 965 | { 966 | "cell_type": "code", 967 | "execution_count": 12, 968 | "metadata": { 969 | "tags": [] 970 | }, 971 | "outputs": [], 972 | "source": [ 973 | "def make_prompt(example_indices_full, example_index_to_summarize):\n", 974 | " prompt = ''\n", 975 | " for index in example_indices_full:\n", 976 | " dialogue = dataset['test'][index]['dialogue']\n", 977 | " summary = dataset['test'][index]['summary']\n", 978 | " \n", 979 | " # The stop sequence '{summary}\\n\\n\\n' is important for FLAN-T5. Other models may have their own preferred stop sequence.\n", 980 | " prompt += f\"\"\"\n", 981 | "Dialogue:\n", 982 | "\n", 983 | "{dialogue}\n", 984 | "\n", 985 | "What was going on?\n", 986 | "{summary}\n", 987 | "\n", 988 | "\n", 989 | "\"\"\"\n", 990 | " \n", 991 | " dialogue = dataset['test'][example_index_to_summarize]['dialogue']\n", 992 | " \n", 993 | " prompt += f\"\"\"\n", 994 | "Dialogue:\n", 995 | "\n", 996 | "{dialogue}\n", 997 | "\n", 998 | "What was going on?\n", 999 | "\"\"\"\n", 1000 | " \n", 1001 | " return prompt" 1002 | ] 1003 | }, 1004 | { 1005 | "cell_type": "markdown", 1006 | "metadata": { 1007 | "tags": [] 1008 | }, 1009 | "source": [ 1010 | "Construct the prompt to perform one shot inference:" 1011 | ] 1012 | }, 1013 | { 1014 | "cell_type": "code", 1015 | "execution_count": 13, 1016 | "metadata": { 1017 | "tags": [] 1018 | }, 1019 | "outputs": [ 1020 | { 1021 | "name": "stdout", 1022 | "output_type": "stream", 1023 | "text": [ 1024 | "\n", 1025 | "Dialogue:\n", 1026 | "\n", 1027 | "#Person1#: What time is it, Tom?\n", 1028 | "#Person2#: Just a minute. It's ten to nine by my watch.\n", 1029 | "#Person1#: Is it? I had no idea it was so late. I must be off now.\n", 1030 | "#Person2#: What's the hurry?\n", 1031 | "#Person1#: I must catch the nine-thirty train.\n", 1032 | "#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.\n", 1033 | "\n", 1034 | "What was going on?\n", 1035 | "#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.\n", 1036 | "\n", 1037 | "\n", 1038 | "\n", 1039 | "Dialogue:\n", 1040 | "\n", 1041 | "#Person1#: Have you considered upgrading your system?\n", 1042 | "#Person2#: Yes, but I'm not sure what exactly I would need.\n", 1043 | "#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.\n", 1044 | "#Person2#: That would be a definite bonus.\n", 1045 | "#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.\n", 1046 | "#Person2#: How can we do that?\n", 1047 | "#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?\n", 1048 | "#Person2#: No.\n", 1049 | "#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.\n", 1050 | "#Person2#: That sounds great. Thanks.\n", 1051 | "\n", 1052 | "What was going on?\n", 1053 | "\n" 1054 | ] 1055 | } 1056 | ], 1057 | "source": [ 1058 | "example_indices_full = [40]\n", 1059 | "example_index_to_summarize = 200\n", 1060 | "\n", 1061 | "one_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)\n", 1062 | "\n", 1063 | "print(one_shot_prompt)" 1064 | ] 1065 | }, 1066 | { 1067 | "cell_type": "markdown", 1068 | "metadata": { 1069 | "tags": [] 1070 | }, 1071 | "source": [ 1072 | "Now pass this prompt to perform the one shot inference:" 1073 | ] 1074 | }, 1075 | { 1076 | "cell_type": "code", 1077 | "execution_count": 14, 1078 | "metadata": { 1079 | "tags": [] 1080 | }, 1081 | "outputs": [ 1082 | { 1083 | "name": "stdout", 1084 | "output_type": "stream", 1085 | "text": [ 1086 | "---------------------------------------------------------------------------------------------------\n", 1087 | "BASELINE HUMAN SUMMARY:\n", 1088 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1089 | "\n", 1090 | "---------------------------------------------------------------------------------------------------\n", 1091 | "MODEL GENERATION - ONE SHOT:\n", 1092 | "#Person1 wants to upgrade his system. #Person2 wants to add a painting program to his software. #Person1 wants to add a CD-ROM drive.\n" 1093 | ] 1094 | } 1095 | ], 1096 | "source": [ 1097 | "summary = dataset['test'][example_index_to_summarize]['summary']\n", 1098 | "\n", 1099 | "inputs = tokenizer(one_shot_prompt, return_tensors='pt')\n", 1100 | "output = tokenizer.decode(\n", 1101 | " model.generate(\n", 1102 | " inputs[\"input_ids\"],\n", 1103 | " max_new_tokens=50,\n", 1104 | " )[0], \n", 1105 | " skip_special_tokens=True\n", 1106 | ")\n", 1107 | "\n", 1108 | "print(dash_line)\n", 1109 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')\n", 1110 | "print(dash_line)\n", 1111 | "print(f'MODEL GENERATION - ONE SHOT:\\n{output}')" 1112 | ] 1113 | }, 1114 | { 1115 | "cell_type": "markdown", 1116 | "metadata": { 1117 | "tags": [] 1118 | }, 1119 | "source": [ 1120 | "\n", 1121 | "### 4.2 - Few Shot Inference\n", 1122 | "\n", 1123 | "Let's explore few shot inference by adding two more full dialogue-summary pairs to your prompt." 1124 | ] 1125 | }, 1126 | { 1127 | "cell_type": "code", 1128 | "execution_count": 15, 1129 | "metadata": { 1130 | "tags": [] 1131 | }, 1132 | "outputs": [ 1133 | { 1134 | "name": "stdout", 1135 | "output_type": "stream", 1136 | "text": [ 1137 | "\n", 1138 | "Dialogue:\n", 1139 | "\n", 1140 | "#Person1#: What time is it, Tom?\n", 1141 | "#Person2#: Just a minute. It's ten to nine by my watch.\n", 1142 | "#Person1#: Is it? I had no idea it was so late. I must be off now.\n", 1143 | "#Person2#: What's the hurry?\n", 1144 | "#Person1#: I must catch the nine-thirty train.\n", 1145 | "#Person2#: You've plenty of time yet. The railway station is very close. It won't take more than twenty minutes to get there.\n", 1146 | "\n", 1147 | "What was going on?\n", 1148 | "#Person1# is in a hurry to catch a train. Tom tells #Person1# there is plenty of time.\n", 1149 | "\n", 1150 | "\n", 1151 | "\n", 1152 | "Dialogue:\n", 1153 | "\n", 1154 | "#Person1#: May, do you mind helping me prepare for the picnic?\n", 1155 | "#Person2#: Sure. Have you checked the weather report?\n", 1156 | "#Person1#: Yes. It says it will be sunny all day. No sign of rain at all. This is your father's favorite sausage. Sandwiches for you and Daniel.\n", 1157 | "#Person2#: No, thanks Mom. I'd like some toast and chicken wings.\n", 1158 | "#Person1#: Okay. Please take some fruit salad and crackers for me.\n", 1159 | "#Person2#: Done. Oh, don't forget to take napkins disposable plates, cups and picnic blanket.\n", 1160 | "#Person1#: All set. May, can you help me take all these things to the living room?\n", 1161 | "#Person2#: Yes, madam.\n", 1162 | "#Person1#: Ask Daniel to give you a hand?\n", 1163 | "#Person2#: No, mom, I can manage it by myself. His help just causes more trouble.\n", 1164 | "\n", 1165 | "What was going on?\n", 1166 | "Mom asks May to help to prepare for the picnic and May agrees.\n", 1167 | "\n", 1168 | "\n", 1169 | "\n", 1170 | "Dialogue:\n", 1171 | "\n", 1172 | "#Person1#: Hello, I bought the pendant in your shop, just before. \n", 1173 | "#Person2#: Yes. Thank you very much. \n", 1174 | "#Person1#: Now I come back to the hotel and try to show it to my friend, the pendant is broken, I'm afraid. \n", 1175 | "#Person2#: Oh, is it? \n", 1176 | "#Person1#: Would you change it to a new one? \n", 1177 | "#Person2#: Yes, certainly. You have the receipt? \n", 1178 | "#Person1#: Yes, I do. \n", 1179 | "#Person2#: Then would you kindly come to our shop with the receipt by 10 o'clock? We will replace it. \n", 1180 | "#Person1#: Thank you so much. \n", 1181 | "\n", 1182 | "What was going on?\n", 1183 | "#Person1# wants to change the broken pendant in #Person2#'s shop.\n", 1184 | "\n", 1185 | "\n", 1186 | "\n", 1187 | "Dialogue:\n", 1188 | "\n", 1189 | "#Person1#: Have you considered upgrading your system?\n", 1190 | "#Person2#: Yes, but I'm not sure what exactly I would need.\n", 1191 | "#Person1#: You could consider adding a painting program to your software. It would allow you to make up your own flyers and banners for advertising.\n", 1192 | "#Person2#: That would be a definite bonus.\n", 1193 | "#Person1#: You might also want to upgrade your hardware because it is pretty outdated now.\n", 1194 | "#Person2#: How can we do that?\n", 1195 | "#Person1#: You'd probably need a faster processor, to begin with. And you also need a more powerful hard disc, more memory and a faster modem. Do you have a CD-ROM drive?\n", 1196 | "#Person2#: No.\n", 1197 | "#Person1#: Then you might want to add a CD-ROM drive too, because most new software programs are coming out on Cds.\n", 1198 | "#Person2#: That sounds great. Thanks.\n", 1199 | "\n", 1200 | "What was going on?\n", 1201 | "\n" 1202 | ] 1203 | } 1204 | ], 1205 | "source": [ 1206 | "example_indices_full = [40, 80, 120]\n", 1207 | "example_index_to_summarize = 200\n", 1208 | "\n", 1209 | "few_shot_prompt = make_prompt(example_indices_full, example_index_to_summarize)\n", 1210 | "\n", 1211 | "print(few_shot_prompt)" 1212 | ] 1213 | }, 1214 | { 1215 | "cell_type": "markdown", 1216 | "metadata": { 1217 | "tags": [] 1218 | }, 1219 | "source": [ 1220 | "Now pass this prompt to perform a few shot inference:" 1221 | ] 1222 | }, 1223 | { 1224 | "cell_type": "code", 1225 | "execution_count": 16, 1226 | "metadata": { 1227 | "tags": [] 1228 | }, 1229 | "outputs": [ 1230 | { 1231 | "name": "stderr", 1232 | "output_type": "stream", 1233 | "text": [ 1234 | "Token indices sequence length is longer than the specified maximum sequence length for this model (819 > 512). Running this sequence through the model will result in indexing errors\n" 1235 | ] 1236 | }, 1237 | { 1238 | "name": "stdout", 1239 | "output_type": "stream", 1240 | "text": [ 1241 | "---------------------------------------------------------------------------------------------------\n", 1242 | "BASELINE HUMAN SUMMARY:\n", 1243 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1244 | "\n", 1245 | "---------------------------------------------------------------------------------------------------\n", 1246 | "MODEL GENERATION - FEW SHOT:\n", 1247 | "#Person1 wants to upgrade his system. #Person2 wants to add a painting program to his software. #Person1 wants to upgrade his hardware.\n" 1248 | ] 1249 | } 1250 | ], 1251 | "source": [ 1252 | "summary = dataset['test'][example_index_to_summarize]['summary']\n", 1253 | "\n", 1254 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1255 | "output = tokenizer.decode(\n", 1256 | " model.generate(\n", 1257 | " inputs[\"input_ids\"],\n", 1258 | " max_new_tokens=50,\n", 1259 | " )[0], \n", 1260 | " skip_special_tokens=True\n", 1261 | ")\n", 1262 | "\n", 1263 | "print(dash_line)\n", 1264 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')\n", 1265 | "print(dash_line)\n", 1266 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')" 1267 | ] 1268 | }, 1269 | { 1270 | "cell_type": "markdown", 1271 | "metadata": { 1272 | "tags": [] 1273 | }, 1274 | "source": [ 1275 | "In this case, few shot did not provide much of an improvement over one shot inference. And, anything above 5 or 6 shot will typically not help much, either. Also, you need to make sure that you do not exceed the model's input-context length which, in our case, if 512 tokens. Anything above the context length will be ignored.\n", 1276 | "\n", 1277 | "However, you can see that feeding in at least one full example (one shot) provides the model with more information and qualitatively improves the summary overall." 1278 | ] 1279 | }, 1280 | { 1281 | "cell_type": "markdown", 1282 | "metadata": { 1283 | "tags": [] 1284 | }, 1285 | "source": [ 1286 | "**Exercise:**\n", 1287 | "\n", 1288 | "Experiment with the few shot inferencing.\n", 1289 | "- Choose different dialogues - change the indices in the `example_indices_full` list and `example_index_to_summarize` value.\n", 1290 | "- Change the number of shots. Be sure to stay within the model's 512 context length, however.\n", 1291 | "\n", 1292 | "How well does few shot inferencing work with other examples?" 1293 | ] 1294 | }, 1295 | { 1296 | "cell_type": "markdown", 1297 | "metadata": { 1298 | "tags": [] 1299 | }, 1300 | "source": [ 1301 | "\n", 1302 | "## 5 - Generative Configuration Parameters for Inference" 1303 | ] 1304 | }, 1305 | { 1306 | "cell_type": "markdown", 1307 | "metadata": { 1308 | "tags": [] 1309 | }, 1310 | "source": [ 1311 | "You can change the configuration parameters of the `generate()` method to see a different output from the LLM. So far the only parameter that you have been setting was `max_new_tokens=50`, which defines the maximum number of tokens to generate. A full list of available parameters can be found in the [Hugging Face Generation documentation](https://huggingface.co/docs/transformers/v4.29.1/en/main_classes/text_generation#transformers.GenerationConfig). \n", 1312 | "\n", 1313 | "A convenient way of organizing the configuration parameters is to use `GenerationConfig` class. " 1314 | ] 1315 | }, 1316 | { 1317 | "cell_type": "markdown", 1318 | "metadata": { 1319 | "tags": [] 1320 | }, 1321 | "source": [ 1322 | "**Exercise:**\n", 1323 | "\n", 1324 | "Change the configuration parameters to investigate their influence on the output. \n", 1325 | "\n", 1326 | "Putting the parameter `do_sample = True`, you activate various decoding strategies which influence the next token from the probability distribution over the entire vocabulary. You can then adjust the outputs changing `temperature` and other parameters (such as `top_k` and `top_p`). \n", 1327 | "\n", 1328 | "Uncomment the lines in the cell below and rerun the code. Try to analyze the results. You can read some comments below." 1329 | ] 1330 | }, 1331 | { 1332 | "cell_type": "code", 1333 | "execution_count": 17, 1334 | "metadata": { 1335 | "tags": [] 1336 | }, 1337 | "outputs": [ 1338 | { 1339 | "name": "stdout", 1340 | "output_type": "stream", 1341 | "text": [ 1342 | "---------------------------------------------------------------------------------------------------\n", 1343 | "MODEL GENERATION - FEW SHOT:\n", 1344 | "#Person1 wants to upgrade his system. #Person2 wants to add a painting program to his software. #Person1 wants to upgrade his hardware.\n", 1345 | "---------------------------------------------------------------------------------------------------\n", 1346 | "BASELINE HUMAN SUMMARY:\n", 1347 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1348 | "\n" 1349 | ] 1350 | } 1351 | ], 1352 | "source": [ 1353 | "generation_config = GenerationConfig(max_new_tokens=50)\n", 1354 | "# generation_config = GenerationConfig(max_new_tokens=10)\n", 1355 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)\n", 1356 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)\n", 1357 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)\n", 1358 | "\n", 1359 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1360 | "output = tokenizer.decode(\n", 1361 | " model.generate(\n", 1362 | " inputs[\"input_ids\"],\n", 1363 | " generation_config=generation_config,\n", 1364 | " )[0], \n", 1365 | " skip_special_tokens=True\n", 1366 | ")\n", 1367 | "\n", 1368 | "print(dash_line)\n", 1369 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')\n", 1370 | "print(dash_line)\n", 1371 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')" 1372 | ] 1373 | }, 1374 | { 1375 | "cell_type": "markdown", 1376 | "metadata": {}, 1377 | "source": [ 1378 | "Comments related to the choice of the parameters in the code cell above:\n", 1379 | "- Choosing `max_new_tokens=10` will make the output text too short, so the dialogue summary will be cut.\n", 1380 | "- Putting `do_sample = True` and changing the temperature value you get more flexibility in the output." 1381 | ] 1382 | }, 1383 | { 1384 | "cell_type": "markdown", 1385 | "metadata": {}, 1386 | "source": [ 1387 | "As you can see, prompt engineering can take you a long way for this use case, but there are some limitations. Next, you will start to explore how you can use fine-tuning to help your LLM to understand a particular use case in better depth!" 1388 | ] 1389 | }, 1390 | { 1391 | "cell_type": "code", 1392 | "execution_count": 18, 1393 | "metadata": { 1394 | "tags": [] 1395 | }, 1396 | "outputs": [ 1397 | { 1398 | "name": "stdout", 1399 | "output_type": "stream", 1400 | "text": [ 1401 | "---------------------------------------------------------------------------------------------------\n", 1402 | "MODEL GENERATION - FEW SHOT:\n", 1403 | "#Person1 wants to upgrade his system.\n", 1404 | "---------------------------------------------------------------------------------------------------\n", 1405 | "BASELINE HUMAN SUMMARY:\n", 1406 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1407 | "\n" 1408 | ] 1409 | } 1410 | ], 1411 | "source": [ 1412 | "# generation_config = GenerationConfig(max_new_tokens=50)\n", 1413 | "generation_config = GenerationConfig(max_new_tokens=10)\n", 1414 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)\n", 1415 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)\n", 1416 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)\n", 1417 | "\n", 1418 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1419 | "output = tokenizer.decode(\n", 1420 | " model.generate(\n", 1421 | " inputs[\"input_ids\"],\n", 1422 | " generation_config=generation_config,\n", 1423 | " )[0], \n", 1424 | " skip_special_tokens=True\n", 1425 | ")\n", 1426 | "\n", 1427 | "print(dash_line)\n", 1428 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')\n", 1429 | "print(dash_line)\n", 1430 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')" 1431 | ] 1432 | }, 1433 | { 1434 | "cell_type": "code", 1435 | "execution_count": 19, 1436 | "metadata": { 1437 | "tags": [] 1438 | }, 1439 | "outputs": [ 1440 | { 1441 | "name": "stdout", 1442 | "output_type": "stream", 1443 | "text": [ 1444 | "---------------------------------------------------------------------------------------------------\n", 1445 | "MODEL GENERATION - FEW SHOT:\n", 1446 | "#Person1 recommends upgrading the system, adding a painting program, adding a computer, adding a CD-ROM drive and adding a CD-ROM drive.\n", 1447 | "---------------------------------------------------------------------------------------------------\n", 1448 | "BASELINE HUMAN SUMMARY:\n", 1449 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1450 | "\n" 1451 | ] 1452 | } 1453 | ], 1454 | "source": [ 1455 | "# generation_config = GenerationConfig(max_new_tokens=50)\n", 1456 | "# generation_config = GenerationConfig(max_new_tokens=10)\n", 1457 | "generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)\n", 1458 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)\n", 1459 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)\n", 1460 | "\n", 1461 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1462 | "output = tokenizer.decode(\n", 1463 | " model.generate(\n", 1464 | " inputs[\"input_ids\"],\n", 1465 | " generation_config=generation_config,\n", 1466 | " )[0], \n", 1467 | " skip_special_tokens=True\n", 1468 | ")\n", 1469 | "\n", 1470 | "print(dash_line)\n", 1471 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')\n", 1472 | "print(dash_line)\n", 1473 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')" 1474 | ] 1475 | }, 1476 | { 1477 | "cell_type": "code", 1478 | "execution_count": 20, 1479 | "metadata": { 1480 | "tags": [] 1481 | }, 1482 | "outputs": [ 1483 | { 1484 | "name": "stdout", 1485 | "output_type": "stream", 1486 | "text": [ 1487 | "---------------------------------------------------------------------------------------------------\n", 1488 | "MODEL GENERATION - FEW SHOT:\n", 1489 | "#Person1 offers several suggestions on upgrading his system.\n", 1490 | "---------------------------------------------------------------------------------------------------\n", 1491 | "BASELINE HUMAN SUMMARY:\n", 1492 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1493 | "\n" 1494 | ] 1495 | } 1496 | ], 1497 | "source": [ 1498 | "# generation_config = GenerationConfig(max_new_tokens=50)\n", 1499 | "# generation_config = GenerationConfig(max_new_tokens=10)\n", 1500 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)\n", 1501 | "generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)\n", 1502 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)\n", 1503 | "\n", 1504 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1505 | "output = tokenizer.decode(\n", 1506 | " model.generate(\n", 1507 | " inputs[\"input_ids\"],\n", 1508 | " generation_config=generation_config,\n", 1509 | " )[0], \n", 1510 | " skip_special_tokens=True\n", 1511 | ")\n", 1512 | "\n", 1513 | "print(dash_line)\n", 1514 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')\n", 1515 | "print(dash_line)\n", 1516 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')" 1517 | ] 1518 | }, 1519 | { 1520 | "cell_type": "code", 1521 | "execution_count": 21, 1522 | "metadata": { 1523 | "tags": [] 1524 | }, 1525 | "outputs": [ 1526 | { 1527 | "name": "stdout", 1528 | "output_type": "stream", 1529 | "text": [ 1530 | "---------------------------------------------------------------------------------------------------\n", 1531 | "MODEL GENERATION - FEW SHOT:\n", 1532 | "There are several things you could change.\n", 1533 | "---------------------------------------------------------------------------------------------------\n", 1534 | "BASELINE HUMAN SUMMARY:\n", 1535 | "#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.\n", 1536 | "\n" 1537 | ] 1538 | } 1539 | ], 1540 | "source": [ 1541 | "# generation_config = GenerationConfig(max_new_tokens=50)\n", 1542 | "# generation_config = GenerationConfig(max_new_tokens=10)\n", 1543 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.1)\n", 1544 | "# generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=0.5)\n", 1545 | "generation_config = GenerationConfig(max_new_tokens=50, do_sample=True, temperature=1.0)\n", 1546 | "\n", 1547 | "inputs = tokenizer(few_shot_prompt, return_tensors='pt')\n", 1548 | "output = tokenizer.decode(\n", 1549 | " model.generate(\n", 1550 | " inputs[\"input_ids\"],\n", 1551 | " generation_config=generation_config,\n", 1552 | " )[0], \n", 1553 | " skip_special_tokens=True\n", 1554 | ")\n", 1555 | "\n", 1556 | "print(dash_line)\n", 1557 | "print(f'MODEL GENERATION - FEW SHOT:\\n{output}')\n", 1558 | "print(dash_line)\n", 1559 | "print(f'BASELINE HUMAN SUMMARY:\\n{summary}\\n')" 1560 | ] 1561 | }, 1562 | { 1563 | "cell_type": "code", 1564 | "execution_count": null, 1565 | "metadata": {}, 1566 | "outputs": [], 1567 | "source": [] 1568 | } 1569 | ], 1570 | "metadata": { 1571 | "availableInstances": [ 1572 | { 1573 | "_defaultOrder": 0, 1574 | "_isFastLaunch": true, 1575 | "category": "General purpose", 1576 | "gpuNum": 0, 1577 | "hideHardwareSpecs": false, 1578 | "memoryGiB": 4, 1579 | "name": "ml.t3.medium", 1580 | "vcpuNum": 2 1581 | }, 1582 | { 1583 | "_defaultOrder": 1, 1584 | "_isFastLaunch": false, 1585 | "category": "General purpose", 1586 | "gpuNum": 0, 1587 | "hideHardwareSpecs": false, 1588 | "memoryGiB": 8, 1589 | "name": "ml.t3.large", 1590 | "vcpuNum": 2 1591 | }, 1592 | { 1593 | "_defaultOrder": 2, 1594 | "_isFastLaunch": false, 1595 | "category": "General purpose", 1596 | "gpuNum": 0, 1597 | "hideHardwareSpecs": false, 1598 | "memoryGiB": 16, 1599 | "name": "ml.t3.xlarge", 1600 | "vcpuNum": 4 1601 | }, 1602 | { 1603 | "_defaultOrder": 3, 1604 | "_isFastLaunch": false, 1605 | "category": "General purpose", 1606 | "gpuNum": 0, 1607 | "hideHardwareSpecs": false, 1608 | "memoryGiB": 32, 1609 | "name": "ml.t3.2xlarge", 1610 | "vcpuNum": 8 1611 | }, 1612 | { 1613 | "_defaultOrder": 4, 1614 | "_isFastLaunch": true, 1615 | "category": "General purpose", 1616 | "gpuNum": 0, 1617 | "hideHardwareSpecs": false, 1618 | "memoryGiB": 8, 1619 | "name": "ml.m5.large", 1620 | "vcpuNum": 2 1621 | }, 1622 | { 1623 | "_defaultOrder": 5, 1624 | "_isFastLaunch": false, 1625 | "category": "General purpose", 1626 | "gpuNum": 0, 1627 | "hideHardwareSpecs": false, 1628 | "memoryGiB": 16, 1629 | "name": "ml.m5.xlarge", 1630 | "vcpuNum": 4 1631 | }, 1632 | { 1633 | "_defaultOrder": 6, 1634 | "_isFastLaunch": false, 1635 | "category": "General purpose", 1636 | "gpuNum": 0, 1637 | "hideHardwareSpecs": false, 1638 | "memoryGiB": 32, 1639 | "name": "ml.m5.2xlarge", 1640 | "vcpuNum": 8 1641 | }, 1642 | { 1643 | "_defaultOrder": 7, 1644 | "_isFastLaunch": false, 1645 | "category": "General purpose", 1646 | "gpuNum": 0, 1647 | "hideHardwareSpecs": false, 1648 | "memoryGiB": 64, 1649 | "name": "ml.m5.4xlarge", 1650 | "vcpuNum": 16 1651 | }, 1652 | { 1653 | "_defaultOrder": 8, 1654 | "_isFastLaunch": false, 1655 | "category": "General purpose", 1656 | "gpuNum": 0, 1657 | "hideHardwareSpecs": false, 1658 | "memoryGiB": 128, 1659 | "name": "ml.m5.8xlarge", 1660 | "vcpuNum": 32 1661 | }, 1662 | { 1663 | "_defaultOrder": 9, 1664 | "_isFastLaunch": false, 1665 | "category": "General purpose", 1666 | "gpuNum": 0, 1667 | "hideHardwareSpecs": false, 1668 | "memoryGiB": 192, 1669 | "name": "ml.m5.12xlarge", 1670 | "vcpuNum": 48 1671 | }, 1672 | { 1673 | "_defaultOrder": 10, 1674 | "_isFastLaunch": false, 1675 | "category": "General purpose", 1676 | "gpuNum": 0, 1677 | "hideHardwareSpecs": false, 1678 | "memoryGiB": 256, 1679 | "name": "ml.m5.16xlarge", 1680 | "vcpuNum": 64 1681 | }, 1682 | { 1683 | "_defaultOrder": 11, 1684 | "_isFastLaunch": false, 1685 | "category": "General purpose", 1686 | "gpuNum": 0, 1687 | "hideHardwareSpecs": false, 1688 | "memoryGiB": 384, 1689 | "name": "ml.m5.24xlarge", 1690 | "vcpuNum": 96 1691 | }, 1692 | { 1693 | "_defaultOrder": 12, 1694 | "_isFastLaunch": false, 1695 | "category": "General purpose", 1696 | "gpuNum": 0, 1697 | "hideHardwareSpecs": false, 1698 | "memoryGiB": 8, 1699 | "name": "ml.m5d.large", 1700 | "vcpuNum": 2 1701 | }, 1702 | { 1703 | "_defaultOrder": 13, 1704 | "_isFastLaunch": false, 1705 | "category": "General purpose", 1706 | "gpuNum": 0, 1707 | "hideHardwareSpecs": false, 1708 | "memoryGiB": 16, 1709 | "name": "ml.m5d.xlarge", 1710 | "vcpuNum": 4 1711 | }, 1712 | { 1713 | "_defaultOrder": 14, 1714 | "_isFastLaunch": false, 1715 | "category": "General purpose", 1716 | "gpuNum": 0, 1717 | "hideHardwareSpecs": false, 1718 | "memoryGiB": 32, 1719 | "name": "ml.m5d.2xlarge", 1720 | "vcpuNum": 8 1721 | }, 1722 | { 1723 | "_defaultOrder": 15, 1724 | "_isFastLaunch": false, 1725 | "category": "General purpose", 1726 | "gpuNum": 0, 1727 | "hideHardwareSpecs": false, 1728 | "memoryGiB": 64, 1729 | "name": "ml.m5d.4xlarge", 1730 | "vcpuNum": 16 1731 | }, 1732 | { 1733 | "_defaultOrder": 16, 1734 | "_isFastLaunch": false, 1735 | "category": "General purpose", 1736 | "gpuNum": 0, 1737 | "hideHardwareSpecs": false, 1738 | "memoryGiB": 128, 1739 | "name": "ml.m5d.8xlarge", 1740 | "vcpuNum": 32 1741 | }, 1742 | { 1743 | "_defaultOrder": 17, 1744 | "_isFastLaunch": false, 1745 | "category": "General purpose", 1746 | "gpuNum": 0, 1747 | "hideHardwareSpecs": false, 1748 | "memoryGiB": 192, 1749 | "name": "ml.m5d.12xlarge", 1750 | "vcpuNum": 48 1751 | }, 1752 | { 1753 | "_defaultOrder": 18, 1754 | "_isFastLaunch": false, 1755 | "category": "General purpose", 1756 | "gpuNum": 0, 1757 | "hideHardwareSpecs": false, 1758 | "memoryGiB": 256, 1759 | "name": "ml.m5d.16xlarge", 1760 | "vcpuNum": 64 1761 | }, 1762 | { 1763 | "_defaultOrder": 19, 1764 | "_isFastLaunch": false, 1765 | "category": "General purpose", 1766 | "gpuNum": 0, 1767 | "hideHardwareSpecs": false, 1768 | "memoryGiB": 384, 1769 | "name": "ml.m5d.24xlarge", 1770 | "vcpuNum": 96 1771 | }, 1772 | { 1773 | "_defaultOrder": 20, 1774 | "_isFastLaunch": false, 1775 | "category": "General purpose", 1776 | "gpuNum": 0, 1777 | "hideHardwareSpecs": true, 1778 | "memoryGiB": 0, 1779 | "name": "ml.geospatial.interactive", 1780 | "supportedImageNames": [ 1781 | "sagemaker-geospatial-v1-0" 1782 | ], 1783 | "vcpuNum": 0 1784 | }, 1785 | { 1786 | "_defaultOrder": 21, 1787 | "_isFastLaunch": true, 1788 | "category": "Compute optimized", 1789 | "gpuNum": 0, 1790 | "hideHardwareSpecs": false, 1791 | "memoryGiB": 4, 1792 | "name": "ml.c5.large", 1793 | "vcpuNum": 2 1794 | }, 1795 | { 1796 | "_defaultOrder": 22, 1797 | "_isFastLaunch": false, 1798 | "category": "Compute optimized", 1799 | "gpuNum": 0, 1800 | "hideHardwareSpecs": false, 1801 | "memoryGiB": 8, 1802 | "name": "ml.c5.xlarge", 1803 | "vcpuNum": 4 1804 | }, 1805 | { 1806 | "_defaultOrder": 23, 1807 | "_isFastLaunch": false, 1808 | "category": "Compute optimized", 1809 | "gpuNum": 0, 1810 | "hideHardwareSpecs": false, 1811 | "memoryGiB": 16, 1812 | "name": "ml.c5.2xlarge", 1813 | "vcpuNum": 8 1814 | }, 1815 | { 1816 | "_defaultOrder": 24, 1817 | "_isFastLaunch": false, 1818 | "category": "Compute optimized", 1819 | "gpuNum": 0, 1820 | "hideHardwareSpecs": false, 1821 | "memoryGiB": 32, 1822 | "name": "ml.c5.4xlarge", 1823 | "vcpuNum": 16 1824 | }, 1825 | { 1826 | "_defaultOrder": 25, 1827 | "_isFastLaunch": false, 1828 | "category": "Compute optimized", 1829 | "gpuNum": 0, 1830 | "hideHardwareSpecs": false, 1831 | "memoryGiB": 72, 1832 | "name": "ml.c5.9xlarge", 1833 | "vcpuNum": 36 1834 | }, 1835 | { 1836 | "_defaultOrder": 26, 1837 | "_isFastLaunch": false, 1838 | "category": "Compute optimized", 1839 | "gpuNum": 0, 1840 | "hideHardwareSpecs": false, 1841 | "memoryGiB": 96, 1842 | "name": "ml.c5.12xlarge", 1843 | "vcpuNum": 48 1844 | }, 1845 | { 1846 | "_defaultOrder": 27, 1847 | "_isFastLaunch": false, 1848 | "category": "Compute optimized", 1849 | "gpuNum": 0, 1850 | "hideHardwareSpecs": false, 1851 | "memoryGiB": 144, 1852 | "name": "ml.c5.18xlarge", 1853 | "vcpuNum": 72 1854 | }, 1855 | { 1856 | "_defaultOrder": 28, 1857 | "_isFastLaunch": false, 1858 | "category": "Compute optimized", 1859 | "gpuNum": 0, 1860 | "hideHardwareSpecs": false, 1861 | "memoryGiB": 192, 1862 | "name": "ml.c5.24xlarge", 1863 | "vcpuNum": 96 1864 | }, 1865 | { 1866 | "_defaultOrder": 29, 1867 | "_isFastLaunch": true, 1868 | "category": "Accelerated computing", 1869 | "gpuNum": 1, 1870 | "hideHardwareSpecs": false, 1871 | "memoryGiB": 16, 1872 | "name": "ml.g4dn.xlarge", 1873 | "vcpuNum": 4 1874 | }, 1875 | { 1876 | "_defaultOrder": 30, 1877 | "_isFastLaunch": false, 1878 | "category": "Accelerated computing", 1879 | "gpuNum": 1, 1880 | "hideHardwareSpecs": false, 1881 | "memoryGiB": 32, 1882 | "name": "ml.g4dn.2xlarge", 1883 | "vcpuNum": 8 1884 | }, 1885 | { 1886 | "_defaultOrder": 31, 1887 | "_isFastLaunch": false, 1888 | "category": "Accelerated computing", 1889 | "gpuNum": 1, 1890 | "hideHardwareSpecs": false, 1891 | "memoryGiB": 64, 1892 | "name": "ml.g4dn.4xlarge", 1893 | "vcpuNum": 16 1894 | }, 1895 | { 1896 | "_defaultOrder": 32, 1897 | "_isFastLaunch": false, 1898 | "category": "Accelerated computing", 1899 | "gpuNum": 1, 1900 | "hideHardwareSpecs": false, 1901 | "memoryGiB": 128, 1902 | "name": "ml.g4dn.8xlarge", 1903 | "vcpuNum": 32 1904 | }, 1905 | { 1906 | "_defaultOrder": 33, 1907 | "_isFastLaunch": false, 1908 | "category": "Accelerated computing", 1909 | "gpuNum": 4, 1910 | "hideHardwareSpecs": false, 1911 | "memoryGiB": 192, 1912 | "name": "ml.g4dn.12xlarge", 1913 | "vcpuNum": 48 1914 | }, 1915 | { 1916 | "_defaultOrder": 34, 1917 | "_isFastLaunch": false, 1918 | "category": "Accelerated computing", 1919 | "gpuNum": 1, 1920 | "hideHardwareSpecs": false, 1921 | "memoryGiB": 256, 1922 | "name": "ml.g4dn.16xlarge", 1923 | "vcpuNum": 64 1924 | }, 1925 | { 1926 | "_defaultOrder": 35, 1927 | "_isFastLaunch": false, 1928 | "category": "Accelerated computing", 1929 | "gpuNum": 1, 1930 | "hideHardwareSpecs": false, 1931 | "memoryGiB": 61, 1932 | "name": "ml.p3.2xlarge", 1933 | "vcpuNum": 8 1934 | }, 1935 | { 1936 | "_defaultOrder": 36, 1937 | "_isFastLaunch": false, 1938 | "category": "Accelerated computing", 1939 | "gpuNum": 4, 1940 | "hideHardwareSpecs": false, 1941 | "memoryGiB": 244, 1942 | "name": "ml.p3.8xlarge", 1943 | "vcpuNum": 32 1944 | }, 1945 | { 1946 | "_defaultOrder": 37, 1947 | "_isFastLaunch": false, 1948 | "category": "Accelerated computing", 1949 | "gpuNum": 8, 1950 | "hideHardwareSpecs": false, 1951 | "memoryGiB": 488, 1952 | "name": "ml.p3.16xlarge", 1953 | "vcpuNum": 64 1954 | }, 1955 | { 1956 | "_defaultOrder": 38, 1957 | "_isFastLaunch": false, 1958 | "category": "Accelerated computing", 1959 | "gpuNum": 8, 1960 | "hideHardwareSpecs": false, 1961 | "memoryGiB": 768, 1962 | "name": "ml.p3dn.24xlarge", 1963 | "vcpuNum": 96 1964 | }, 1965 | { 1966 | "_defaultOrder": 39, 1967 | "_isFastLaunch": false, 1968 | "category": "Memory Optimized", 1969 | "gpuNum": 0, 1970 | "hideHardwareSpecs": false, 1971 | "memoryGiB": 16, 1972 | "name": "ml.r5.large", 1973 | "vcpuNum": 2 1974 | }, 1975 | { 1976 | "_defaultOrder": 40, 1977 | "_isFastLaunch": false, 1978 | "category": "Memory Optimized", 1979 | "gpuNum": 0, 1980 | "hideHardwareSpecs": false, 1981 | "memoryGiB": 32, 1982 | "name": "ml.r5.xlarge", 1983 | "vcpuNum": 4 1984 | }, 1985 | { 1986 | "_defaultOrder": 41, 1987 | "_isFastLaunch": false, 1988 | "category": "Memory Optimized", 1989 | "gpuNum": 0, 1990 | "hideHardwareSpecs": false, 1991 | "memoryGiB": 64, 1992 | "name": "ml.r5.2xlarge", 1993 | "vcpuNum": 8 1994 | }, 1995 | { 1996 | "_defaultOrder": 42, 1997 | "_isFastLaunch": false, 1998 | "category": "Memory Optimized", 1999 | "gpuNum": 0, 2000 | "hideHardwareSpecs": false, 2001 | "memoryGiB": 128, 2002 | "name": "ml.r5.4xlarge", 2003 | "vcpuNum": 16 2004 | }, 2005 | { 2006 | "_defaultOrder": 43, 2007 | "_isFastLaunch": false, 2008 | "category": "Memory Optimized", 2009 | "gpuNum": 0, 2010 | "hideHardwareSpecs": false, 2011 | "memoryGiB": 256, 2012 | "name": "ml.r5.8xlarge", 2013 | "vcpuNum": 32 2014 | }, 2015 | { 2016 | "_defaultOrder": 44, 2017 | "_isFastLaunch": false, 2018 | "category": "Memory Optimized", 2019 | "gpuNum": 0, 2020 | "hideHardwareSpecs": false, 2021 | "memoryGiB": 384, 2022 | "name": "ml.r5.12xlarge", 2023 | "vcpuNum": 48 2024 | }, 2025 | { 2026 | "_defaultOrder": 45, 2027 | "_isFastLaunch": false, 2028 | "category": "Memory Optimized", 2029 | "gpuNum": 0, 2030 | "hideHardwareSpecs": false, 2031 | "memoryGiB": 512, 2032 | "name": "ml.r5.16xlarge", 2033 | "vcpuNum": 64 2034 | }, 2035 | { 2036 | "_defaultOrder": 46, 2037 | "_isFastLaunch": false, 2038 | "category": "Memory Optimized", 2039 | "gpuNum": 0, 2040 | "hideHardwareSpecs": false, 2041 | "memoryGiB": 768, 2042 | "name": "ml.r5.24xlarge", 2043 | "vcpuNum": 96 2044 | }, 2045 | { 2046 | "_defaultOrder": 47, 2047 | "_isFastLaunch": false, 2048 | "category": "Accelerated computing", 2049 | "gpuNum": 1, 2050 | "hideHardwareSpecs": false, 2051 | "memoryGiB": 16, 2052 | "name": "ml.g5.xlarge", 2053 | "vcpuNum": 4 2054 | }, 2055 | { 2056 | "_defaultOrder": 48, 2057 | "_isFastLaunch": false, 2058 | "category": "Accelerated computing", 2059 | "gpuNum": 1, 2060 | "hideHardwareSpecs": false, 2061 | "memoryGiB": 32, 2062 | "name": "ml.g5.2xlarge", 2063 | "vcpuNum": 8 2064 | }, 2065 | { 2066 | "_defaultOrder": 49, 2067 | "_isFastLaunch": false, 2068 | "category": "Accelerated computing", 2069 | "gpuNum": 1, 2070 | "hideHardwareSpecs": false, 2071 | "memoryGiB": 64, 2072 | "name": "ml.g5.4xlarge", 2073 | "vcpuNum": 16 2074 | }, 2075 | { 2076 | "_defaultOrder": 50, 2077 | "_isFastLaunch": false, 2078 | "category": "Accelerated computing", 2079 | "gpuNum": 1, 2080 | "hideHardwareSpecs": false, 2081 | "memoryGiB": 128, 2082 | "name": "ml.g5.8xlarge", 2083 | "vcpuNum": 32 2084 | }, 2085 | { 2086 | "_defaultOrder": 51, 2087 | "_isFastLaunch": false, 2088 | "category": "Accelerated computing", 2089 | "gpuNum": 1, 2090 | "hideHardwareSpecs": false, 2091 | "memoryGiB": 256, 2092 | "name": "ml.g5.16xlarge", 2093 | "vcpuNum": 64 2094 | }, 2095 | { 2096 | "_defaultOrder": 52, 2097 | "_isFastLaunch": false, 2098 | "category": "Accelerated computing", 2099 | "gpuNum": 4, 2100 | "hideHardwareSpecs": false, 2101 | "memoryGiB": 192, 2102 | "name": "ml.g5.12xlarge", 2103 | "vcpuNum": 48 2104 | }, 2105 | { 2106 | "_defaultOrder": 53, 2107 | "_isFastLaunch": false, 2108 | "category": "Accelerated computing", 2109 | "gpuNum": 4, 2110 | "hideHardwareSpecs": false, 2111 | "memoryGiB": 384, 2112 | "name": "ml.g5.24xlarge", 2113 | "vcpuNum": 96 2114 | }, 2115 | { 2116 | "_defaultOrder": 54, 2117 | "_isFastLaunch": false, 2118 | "category": "Accelerated computing", 2119 | "gpuNum": 8, 2120 | "hideHardwareSpecs": false, 2121 | "memoryGiB": 768, 2122 | "name": "ml.g5.48xlarge", 2123 | "vcpuNum": 192 2124 | }, 2125 | { 2126 | "_defaultOrder": 55, 2127 | "_isFastLaunch": false, 2128 | "category": "Accelerated computing", 2129 | "gpuNum": 8, 2130 | "hideHardwareSpecs": false, 2131 | "memoryGiB": 1152, 2132 | "name": "ml.p4d.24xlarge", 2133 | "vcpuNum": 96 2134 | }, 2135 | { 2136 | "_defaultOrder": 56, 2137 | "_isFastLaunch": false, 2138 | "category": "Accelerated computing", 2139 | "gpuNum": 8, 2140 | "hideHardwareSpecs": false, 2141 | "memoryGiB": 1152, 2142 | "name": "ml.p4de.24xlarge", 2143 | "vcpuNum": 96 2144 | }, 2145 | { 2146 | "_defaultOrder": 57, 2147 | "_isFastLaunch": false, 2148 | "category": "Accelerated computing", 2149 | "gpuNum": 0, 2150 | "hideHardwareSpecs": false, 2151 | "memoryGiB": 32, 2152 | "name": "ml.trn1.2xlarge", 2153 | "vcpuNum": 8 2154 | }, 2155 | { 2156 | "_defaultOrder": 58, 2157 | "_isFastLaunch": false, 2158 | "category": "Accelerated computing", 2159 | "gpuNum": 0, 2160 | "hideHardwareSpecs": false, 2161 | "memoryGiB": 512, 2162 | "name": "ml.trn1.32xlarge", 2163 | "vcpuNum": 128 2164 | }, 2165 | { 2166 | "_defaultOrder": 59, 2167 | "_isFastLaunch": false, 2168 | "category": "Accelerated computing", 2169 | "gpuNum": 0, 2170 | "hideHardwareSpecs": false, 2171 | "memoryGiB": 512, 2172 | "name": "ml.trn1n.32xlarge", 2173 | "vcpuNum": 128 2174 | } 2175 | ], 2176 | "instance_type": "ml.m5.2xlarge", 2177 | "kernelspec": { 2178 | "display_name": "Python 3 (ipykernel)", 2179 | "language": "python", 2180 | "name": "python3" 2181 | }, 2182 | "language_info": { 2183 | "codemirror_mode": { 2184 | "name": "ipython", 2185 | "version": 3 2186 | }, 2187 | "file_extension": ".py", 2188 | "mimetype": "text/x-python", 2189 | "name": "python", 2190 | "nbconvert_exporter": "python", 2191 | "pygments_lexer": "ipython3", 2192 | "version": "3.8.17" 2193 | } 2194 | }, 2195 | "nbformat": 4, 2196 | "nbformat_minor": 4 2197 | } 2198 | --------------------------------------------------------------------------------