├── LICENSE
├── README.md
├── ai-eng-access-llms.ipynb
├── ai-eng-agents.ipynb
├── ai-eng-basic-prompting.ipynb
├── ai-eng-chaining-augmentation.ipynb
├── ai-eng-fine-tuning-flan.ipynb
├── ai-eng-fine-tuning-llama2.ipynb
├── ai-eng-fine-tuning-sft.ipynb
├── ai-eng-prompt-engineering.ipynb
└── app
    └── app.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AI Engineering Workshop: Building Generative AI Apps That Overcome Enterprise Barriers and Create Real Value
 2 | 
 3 | (These are the materials are meant to accompany Daniel Whitenack's generative AI/ LLM workshops. These workshops are meant to be given in-person. As such, certain relevant context and discussions might be missing from the notebooks themselves.)
 4 | 
 5 | Product teams are scrambling to figure out what they will be doing with this latest wave of AI technologies, and engineering organizations are struggling to bring generative AI into enterprise environments. Beyond anecdotal ChatGPT interactions and intriguing demos, teams need to know how to validate inconsistent model outputs, mitigate the risk of hallucinations, structure text completions, maintain data privacy, integrate private data, establish competitive advantages, plan development activities, and understand the landscape of tooling.
 6 | 
 7 | Leaving this workshop, you will be equipped with processes and knowledge to overcome each of these barriers, and you will have gained the practical, hands-on expertise to start integrating generative AI in your domain.
 8 | 
 9 | Key Takeaways:
10 | 1. Learn the essential AI engineering skills of prompting, data augmentation, chaining, developing agents, validating/filtering model inputs and outputs, and fine-tuning.
11 | 2. Get hands-on with the latest generative AI models (i.e., Llama 2, WizardCoder, etc.)
12 | 3. Gain a better understanding of how transformative AI applications are being architected via a new generative AI stack of tools/infra.
13 | 
14 | ## Setup and pre-requisites:
15 | 
16 | - Obtain a [Prediction Guard](https://predictionguard.com) token from the instructor
17 | - Create a free SerpAPI account and get an API key as described [here](https://serpapi.com/)
18 | - Make sure you either:
19 |     - have access to a Google account, such that you can use the below notebooks via Google Colab
20 |     - have a local Python environment with Jupyter available (to download notebooks from here and use them locally
21 | 
22 | ## Workshop materials
23 | 
24 | Slides from the workshop are available [here](https://docs.google.com/presentation/d/1xoW56dulB933N3Lapt9B0tXDVDcjmqQmYHEGLkOgOHs/edit?usp=sharing). All of the code examples are in the form of Python notebooks. The notebooks can be run in Google Colab by:
25 | 
26 | 1. Opening [Google Colab](https://colab.research.google.com/?utm_source=scs-index)
27 | 2. Navigating to File -> Open Notebook -> GitHub and pasting in the link to this repository
28 | 3. Selecting the notebook you want to run
29 | 


--------------------------------------------------------------------------------
/ai-eng-access-llms.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyMNrg1I2PYN0rsnXtweI9oG"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["To run any prompt through a model, we need to set a foundation for how we will access generative AI models and perform inference. There is a huge variety in the landscape of generative AI models in terms of size, access patterns, licensing, etc. However, a common theme is the usage of LLMs through a REST API, which is either:\n","- Provided by a third party service (OpenAI, Anthropic, Cohere, etc.)\n","- Self-hosted in your own infrastructure or in an account you control with a model hosting provider (Replicate, Baseten, etc.)\n","- Self-hosted using a DIY model serving API (Flask, FastAPI, etc.)\n","\n","We will use a tool called [Prediction Guard](https://www.predictionguard.com/) to call both proprietary models (like OpenAI) and open access LLMs (like Llama 2, WizardCoder, MPT, etc.) via a standardized OpenAI-like API. This will allow us to explore the full range of LLMs available. Further, it will illustrate how companies can access a wide range of models (outside of the GPT family).\n","\n","If you are interested, Prediction Guard does provide some significant functionality on top of this standardized API (see the [docs](https://docs.predictionguard.com/)). Specifically, it lets you:\n","\n","- **Control** the structure of and easily constrain LLM output to the types, formats, and information relevant to your business;\n","- **Validate** and check LLM output to guard against hallucination and toxicity; and\n","- **Implement compliant LLM systems** (HIPAA, and self-hosted) that give your legal counsel warm fuzzy feeling while still delighting your customers with AI features.\n","\n","To run your first LLM prompt with *Prediction Guard*, you will need a Prediction Guard access token that will be provided to you by the instructor."],"metadata":{"id":"tx0f1rKRWqS_"}},{"cell_type":"markdown","source":["# Install dependences, imports"],"metadata":{"id":"ZGe8RF_LzKjK"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"pI0jTm47xNj5"},"outputs":[],"source":["! pip install predictionguard"]},{"cell_type":"code","source":["import os\n","import json\n","\n","import predictionguard as pg\n","from getpass import getpass"],"metadata":{"id":"Wg7xvnBhxb38"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["pg_access_token = getpass('Enter your Prediction Guard access token: ')\n","os.environ['PREDICTIONGUARD_TOKEN'] = pg_access_token"],"metadata":{"id":"K_cUA6tClxcM"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# List available models"],"metadata":{"id":"eutZBE8vtLC5"}},{"cell_type":"markdown","source":["You can find out more about the models available via the Prediction Guard API [in the docs](https://docs.predictionguard.com/models)."],"metadata":{"id":"obLO0rEGtPTE"}},{"cell_type":"code","source":["pg.Completion.list_models()"],"metadata":{"id":"wM5pESLxtXic"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Generate some text from the latest open access LLMs"],"metadata":{"id":"fr7dHK-VyW2s"}},{"cell_type":"code","source":["response = pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","                          prompt=\"The best joke I know is: \")\n","\n","print(json.dumps(\n","    response,\n","    sort_keys=True,\n","    indent=4,\n","    separators=(',', ': ')\n","))"],"metadata":{"id":"9xw7U9qDzPKZ"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Generate text from a proprietary LLM (OpenAI)"],"metadata":{"id":"54ybrbDxnSi6"}},{"cell_type":"code","source":["openai_api_key = getpass('Enter your OpenAI API key: ')\n","os.environ['OPENAI_API_KEY'] = openai_api_key"],"metadata":{"id":"JadVmYcYnxj-"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["response = pg.Completion.create(model=\"OpenAI-text-davinci-003\",\n","                          prompt=\"The best joke I know is: \")\n","\n","print(response['choices'][0]['text'])"],"metadata":{"id":"cwAXEVuHnVUx"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"-jeL4vF3xSui"},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------
/ai-eng-agents.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyN5jBp78936ztiklVHGjDLM"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["Prompts, chaining, and prompt engineering is important. However, you might not always know what chain or prompts you need to execute prior to receiving user input or new data. This is where automation and **agents** can help. This is an active area of development, but some very useful tooling is available.\n","\n","In the following we will explore using [LangChain agents](https://python.langchain.com/en/latest/use_cases/personal_assistants.html) and [Prediction Guard's Task Agents]() to detect and automate LLM actions."],"metadata":{"id":"u6S30xEsi299"}},{"cell_type":"markdown","source":["# Dependencies and imports"],"metadata":{"id":"zLoZ8qQyiyqQ"}},{"cell_type":"code","source":["! pip install predictionguard langchain google-search-results"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"-vk_eN4oMMka","executionInfo":{"status":"ok","timestamp":1696466679295,"user_tz":240,"elapsed":14259,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"b20af756-1d02-4ce2-fd7e-8b7f8b36645c"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting predictionguard\n","  Downloading predictionguard-1.4.2-py2.py3-none-any.whl (7.2 kB)\n","Collecting langchain\n","  Downloading langchain-0.0.308-py3-none-any.whl (1.8 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting google-search-results\n","  Downloading google_search_results-2.4.2.tar.gz (18 kB)\n","  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: tabulate>=0.8.10 in /usr/local/lib/python3.10/dist-packages (from predictionguard) (0.9.0)\n","Requirement already satisfied: requests>=2.27.1 in /usr/local/lib/python3.10/dist-packages (from predictionguard) (2.31.0)\n","Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n","Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.21)\n","Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.8.5)\n","Requirement already satisfied: anyio<4.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.7.1)\n","Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n","Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)\n","  Downloading dataclasses_json-0.6.1-py3-none-any.whl (27 kB)\n","Collecting jsonpatch<2.0,>=1.33 (from langchain)\n","  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n","Collecting langsmith<0.1.0,>=0.0.40 (from langchain)\n","  Downloading langsmith-0.0.41-py3-none-any.whl (39 kB)\n","Requirement already satisfied: numpy<2,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.23.5)\n","Requirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (1.10.13)\n","Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.2.3)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\n","Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (3.2.0)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.2)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.0)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n","Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4.0->langchain) (3.4)\n","Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4.0->langchain) (1.3.0)\n","Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4.0->langchain) (1.1.3)\n","Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n","  Downloading marshmallow-3.20.1-py3-none-any.whl (49 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\n","  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n","Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain)\n","  Downloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n","Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (4.5.0)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.27.1->predictionguard) (2.0.5)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.27.1->predictionguard) (2023.7.22)\n","Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (2.0.2)\n","Requirement already satisfied: packaging>=17.0 in /usr/local/lib/python3.10/dist-packages (from marshmallow<4.0.0,>=3.18.0->dataclasses-json<0.7,>=0.5.7->langchain) (23.1)\n","Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain)\n","  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n","Building wheels for collected packages: google-search-results\n","  Building wheel for google-search-results (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for google-search-results: filename=google_search_results-2.4.2-py3-none-any.whl size=32003 sha256=b07fe04023fa0e69c5e7ed14046f5a7399deb1c814f3bd76129e4c6042218339\n","  Stored in directory: /root/.cache/pip/wheels/d3/b2/c3/03302d12bb44a2cdff3c9371f31b72c0c4e84b8d2285eeac53\n","Successfully built google-search-results\n","Installing collected packages: mypy-extensions, marshmallow, jsonpointer, typing-inspect, predictionguard, langsmith, jsonpatch, google-search-results, dataclasses-json, langchain\n","Successfully installed dataclasses-json-0.6.1 google-search-results-2.4.2 jsonpatch-1.33 jsonpointer-2.4 langchain-0.0.308 langsmith-0.0.41 marshmallow-3.20.1 mypy-extensions-1.0.0 predictionguard-1.4.2 typing-inspect-0.9.0\n"]}]},{"cell_type":"code","execution_count":2,"metadata":{"id":"v5Q9azmsMK_S","executionInfo":{"status":"ok","timestamp":1696466681734,"user_tz":240,"elapsed":2458,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}}},"outputs":[],"source":["from langchain.agents import load_tools\n","from langchain.agents import initialize_agent\n","from langchain.agents import AgentType\n","from langchain.llms import PredictionGuard\n","import os\n","from getpass import getpass"]},{"cell_type":"code","source":["pg_access_token = getpass('Enter your Prediction Guard access token: ')\n","os.environ['PREDICTIONGUARD_TOKEN'] = pg_access_token"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0iw0xj56wyec","executionInfo":{"status":"ok","timestamp":1696466694798,"user_tz":240,"elapsed":13109,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"49075310-fda7-4126-89f7-58087ad9cd2d"},"execution_count":3,"outputs":[{"name":"stdout","output_type":"stream","text":["Enter your Prediction Guard access token: ··········\n"]}]},{"cell_type":"code","source":["serpapi_key = getpass('Enter your serpapi api key: ')\n","os.environ['SERPAPI_API_KEY'] = serpapi_key"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_z10PvSKwy9L","executionInfo":{"status":"ok","timestamp":1696466709619,"user_tz":240,"elapsed":11272,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"0f9547e0-266d-4a8d-9a16-755bb97a64b3"},"execution_count":4,"outputs":[{"name":"stdout","output_type":"stream","text":["Enter your serpapi api key: ··········\n"]}]},{"cell_type":"code","source":["openai_api_key = getpass('Enter your OpenAI api key: ')\n","os.environ['OPENAI_API_KEY'] = openai_api_key"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"FPX2mCcGxqyH","executionInfo":{"status":"ok","timestamp":1690894292059,"user_tz":240,"elapsed":1747,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"cdccc0cf-5e30-4de7-a460-e6e3599affa8"},"execution_count":null,"outputs":[{"name":"stdout","output_type":"stream","text":["Enter your OpenAI api key: ··········\n"]}]},{"cell_type":"markdown","source":["# Searching the Internet on-the-fly"],"metadata":{"id":"0cd-f52Zjle_"}},{"cell_type":"markdown","source":["In this example, we will use the SerpAPI to perform Google searches on-the-fly based on an LLM parsing of user input. You will need a SerpAPI key to run this example. You can setup a free account and get a key [here](https://serpapi.com/)."],"metadata":{"id":"asM-oDSdjwWI"}},{"cell_type":"code","source":["# In LangChain, \"tools\" are like resources that are available to your agent to\n","# execute certain actions (like a Google Search) while trying to complete a\n","# set of tasks. An \"agent\" is the object that you \"run\" to try and get a \"Final Answer.\"\n","tools = load_tools([\"serpapi\"], llm=PredictionGuard(model=\"Nous-Hermes-Llama2-13B\"))\n","agent = initialize_agent(tools, PredictionGuard(model=\"Nous-Hermes-Llama2-13B\"),\n","                         agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)"],"metadata":{"id":"366oGvFWMR1g","executionInfo":{"status":"ok","timestamp":1696466739377,"user_tz":240,"elapsed":1583,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}}},"execution_count":5,"outputs":[]},{"cell_type":"code","source":["agent.run(\"How are Domino's gift cards delivered?\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":435},"id":"U26yVZ5NNj2o","executionInfo":{"status":"ok","timestamp":1696467307266,"user_tz":240,"elapsed":66630,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"f7996deb-ddb1-469e-ac8a-859ca8a7b3f4"},"execution_count":13,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","\n","\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n","\u001b[32;1m\u001b[1;3m I need to find out if there are different delivery options for Domino's gift cards.\n","Action: Search\n","Action Input: \"Domino's gift card delivery options\"\u001b[0m\n","Observation: \u001b[36;1m\u001b[1;3m[\"Domino's gift cards are great for any person and any occasion. There are a number of different options to choose from. Each comes with a personalized card ...\"]\u001b[0m\n","Thought:\u001b[32;1m\u001b[1;3m The options for delivering Domino's gift cards seem to vary based on the location.\n","Action: Search\n","Action Input: \"Domino's gift card delivery options by location\"\u001b[0m\n","Observation: \u001b[36;1m\u001b[1;3m['Each comes with a personalized card carrier and is delivered via US Mail. Buy ... Prices, delivery area, and charges may vary by store. Delivery orders are ...']\u001b[0m\n","Thought:\u001b[32;1m\u001b[1;3m It seems that the delivery options for Domino's gift cards depend on the location of the store.\n","Action: Search\n","Action Input: \"Domino's gift card delivery options near me\"\u001b[0m\n","Observation: \u001b[36;1m\u001b[1;3m['There are a number of different options to choose from. Each comes with a personalized card carrier and is delivered via US Mail. Buy Gift Cards(Plastic).']\u001b[0m\n","Thought:\u001b[32;1m\u001b[1;3m It looks like the delivery options for Domino's gift cards are not available online.\n","Action: Search\n","Action Input: \"Domino's gift card delivery options near me\"\u001b[0m\n","Observation: \u001b[36;1m\u001b[1;3m['There are a number of different options to choose from. Each comes with a personalized card carrier and is delivered via US Mail. Buy Gift Cards(Plastic).']\u001b[0m\n","Thought:\u001b[32;1m\u001b[1;3m It appears that the delivery options for Domino's gift cards are only available in-store.\n","Final Answer: Domino's gift cards can be delivered via US Mail, but the delivery options vary by location and are only available in-store.\u001b[0m\n","\n","\u001b[1m> Finished chain.\u001b[0m\n"]},{"output_type":"execute_result","data":{"text/plain":["\"Domino's gift cards can be delivered via US Mail, but the delivery options vary by location and are only available in-store.\""],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":13}]},{"cell_type":"code","source":["agent.run(\"What is a German translation of the Domino's pizza guarantee?\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":281},"id":"eoKY04TzNtIm","executionInfo":{"status":"ok","timestamp":1696467058082,"user_tz":240,"elapsed":31425,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"98c1c7c7-f464-4a54-b2a6-9ccd02dcf7d4"},"execution_count":11,"outputs":[{"output_type":"stream","name":"stdout","text":["\n","\n","\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n","\u001b[32;1m\u001b[1;3m I need to find a translation of the Domino's pizza guarantee from English to German.\n","Action: Search\n","Action Input: \"Domino's pizza guarantee German translation\"\u001b[0m\n","Observation: \u001b[36;1m\u001b[1;3m[\"Our Guarantee. Our Guarantee: If you are not completely satisfied with your Domino's Pizza experience, we will make it right or refund your money. Carryout ...\"]\u001b[0m\n","Thought:\u001b[32;1m\u001b[1;3m I found the translation, it says \"Unsere Garantie: Wenn Sie mit Ihrer Domino's Pizza Erfahrung nicht vollständig zufrieden sind, werden wir es richtig machen oder Ihr Geld zurückerstaten.\"\n","Thought: Now I know the German translation of the Domino's pizza guarantee.\n","Final Answer: The German translation of the Domino's pizza guarantee is \"Unsere Garantie: Wenn Sie mit Ihrer Domino's Pizza Erfahrung nicht vollständig zufrieden sind, werden wir es richtig machen oder Ihr Geld zurückerstaten.\"\u001b[0m\n","\n","\u001b[1m> Finished chain.\u001b[0m\n"]},{"output_type":"execute_result","data":{"text/plain":["'The German translation of the Domino\\'s pizza guarantee is \"Unsere Garantie: Wenn Sie mit Ihrer Domino\\'s Pizza Erfahrung nicht vollständig zufrieden sind, werden wir es richtig machen oder Ihr Geld zurückerstaten.\"'"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"string"}},"metadata":{},"execution_count":11}]},{"cell_type":"code","source":[],"metadata":{"id":"lbVCjZlhOnTu"},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------
/ai-eng-basic-prompting.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyMS0KHVtZ914C8eW2cKd7m3"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["**Prompting** is the process of providing a partial, usually text, input to a model. As we discussed in the last chapter, models will then use their parameterized data transformations to find a probable completion or output that matches the prompt.\n","\n","**Prompt and AI Engineering** is the emerging developer task of designing and optimizing prompts (and associated workflows/ infra) for AI models to achieve specific goals or outcomes. It involves creating high-quality inputs that can elicit accurate and relevant responses from AI models. The next several examples will help get you up to speed on common prompt engineering strategies.\n","\n","```\n","               +-------------------+\n","               |                   |\n","               |                   |  Completion\n","Prompt         |       Large       |  Generated text\n","--------------->     Language      +------------->\n","               |       Model       |\n","               |       (LLM)       |\n","               |                   |\n","               +-------------------+\n","```"],"metadata":{"id":"o14TvGScYAqZ"}},{"cell_type":"markdown","source":["# Dependencies and imports"],"metadata":{"id":"Jc-nVEbsX8bU"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"u9pot_Yc2FMw"},"outputs":[],"source":["! pip install predictionguard"]},{"cell_type":"code","source":["import os\n","\n","import predictionguard as pg\n","from getpass import getpass"],"metadata":{"id":"rOVhsPn42JEl"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["pg_access_token = getpass('Enter your Prediction Guard access token: ')\n","os.environ['PREDICTIONGUARD_TOKEN'] = pg_access_token"],"metadata":{"id":"l8sDezef2Me8"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Autocomplete"],"metadata":{"id":"haoOqKSw2azm"}},{"cell_type":"markdown","source":["Because LLMs are configured/ trained to perform the task of text completion, the most basic kind of prompt that you might provide is an **autocomplete** prompt. Regardless of prompt structure, the model function will compute the probabilities of words, tokens, or characters that might follow in the sequence of words, tokens, or characters that you provided in the prompt.\n","\n","Depending on the desired outcome, the prompt may be a single sentence, a paragraph, or even an partial story. Additionally, the prompt may be open-ended, providing only a general topic or theme, or it may be more specific, outlining a particular scenario or plot."],"metadata":{"id":"BInWcXfcYd0M"}},{"cell_type":"code","source":["result = pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=\"Daniel Whitenack, a long forgotten wizard from the Lord of the Rings, entered into Mordor to\"\n",")\n","\n","print(result['choices'][0]['text'])"],"metadata":{"id":"9EBTZ6-V2dpo"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["result = pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=\"Today I inspected the engine mounting equipment. I found a problem in one of the brackets so\"\n",")\n","\n","print(result['choices'][0]['text'])"],"metadata":{"id":"13GECBS2M-Aa"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["result = pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=\"\"\"CREATE TABLE llm_queries(id SERIAL PRIMARY KEY, name TEXT NOT NULL, value REAL);\n","INSERT INTO llm_queries('Daniel Whitenack', 'autocomplete')\n","SELECT\"\"\"\n",")\n","\n","print(result['choices'][0]['text'])"],"metadata":{"id":"RSoTsZtGNHwR"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Zero shot, Instruction prompts"],"metadata":{"id":"oZvw-mv52TL6"}},{"cell_type":"markdown","source":["Autocomplete is a great place to start, but it is only that: a place to start. Throughout this workshop we will be putting on our prompt engineering hats to do some impressive things with generative AI. As we continue along that path, there is a general prompt structure that will pop up over and over again:\n","\n","```\n"," Prompt:\n","+------------------------------------------------------------+\n","|                                                            |\n","|  +-------------------------------------------------------+ |\n","|  | ----------------------------------------------------- | | Task Descrip./\n","|  | ---------------------------------------               | | Instructions\n","|  +-------------------------------------------------------+ |\n","|                                                            |\n","|  +-------------------------------------------------------+ | Current Input/\n","|  | -------------                                         | | Context\n","|  +-------------------------------------------------------+ |\n","|                                                            |\n","|  +----------------------------------------+                | Output\n","|  | --------------------------             |                | Indicator\n","|  +----------------------------------------+                |\n","|                                                            |\n","+------------------------------------------------------------+\n","```\n","\n","One of the easiest ways to leverage the above prompt structure is to describe a task (e.g., sentiment analysis), provide a single piece of data as context, and then provide a single output indicator. This is called a **zero shot prompt**."],"metadata":{"id":"gmXVIVBoYsGJ"}},{"cell_type":"markdown","source":["## Sentiment analysis"],"metadata":{"id":"7yekZsMFNoxF"}},{"cell_type":"code","source":["pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=\"\"\"### Instruction:\n","Respond with a sentiment label for the text included in the below input. Use the label NEU for neutral sentiment, NEG for negative sentiment, and POS for positive sentiment.\n","\n","### Input:\n","This workshop is spectacular. I love it! So wonderful.\n","\n","### Response:\n","\"\"\"\n",")['choices'][0]['text']"],"metadata":{"id":"lpBfvmQp2Ryu"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Question Answering"],"metadata":{"id":"5ctMfmojNz_S"}},{"cell_type":"code","source":["prompt = \"\"\"### Instruction:\n","Read the context in the below input and respond with an answer to the question. If the question cannot be answered based on the context alone or the context does not explicitly say the answer to the question, write \"Sorry I had trouble answering this question, based on the information I found.\"\n","\n","### Input:\n","Context: Domino's gift cards are great for any person and any occasion. There are a number of different options to choose from. Each comes with a personalized card carrier and is delivered via US Mail.\n","\n","Question: How are gift cards delivered?\n","\n","### Response:\n","\"\"\""],"metadata":{"id":"kh4jlFH1NvUP"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt\n",")['choices'][0]['text']"],"metadata":{"id":"5e9ab2RiN7Js"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Fraud Detection"],"metadata":{"id":"u5uz0wnfME6p"}},{"cell_type":"code","source":["prompt = \"### Instruction:\\nRespond with a numerical fraud score for a customer of a life insurance company based on their transaction history in the below input. \"\n","prompt = prompt + \"The score ranges from 0 to 1. Scores close to zero represent that the customer is very unlikely to be committing fraud. Scores close to 1 represent that a customer is very likely to be committing fraud. \"\n","prompt = prompt + \"Do not respond with any text other than the numerical fraud score. \"\n","prompt = prompt + \"Transaction history data includes transaction dates, category, type of insurance product, reason code, reward amount, coverage limit, and income at the time of the transaction.\\n\\n\"\n","prompt = prompt + \"\"\"### Input:\n","1. Date: 9/10/1977; Category: IN; Type: T; Reason Code: NaN; Reward Amount: NaN; Coverage Limit: 50000.0; Income: 61000.0\n","2. Date: 12/31/2005; Category: CL; Type: T; Reason Code: NaN; Reward Amount: NaN; Coverage Limit: 50000.0; Income: 61000.0\n","3. Date: 1/17/2006; Category: RE; Type: T; Reason Code: 265.0; Reward Amount: 50000.0; Coverage Limit: NaN; Income: NaN\n","4. Date: 12/15/1998; Category: IN; Type: T; Reason Code: NaN; Reward Amount: NaN; Coverage Limit: 100000.0; Income: 29000.0\n","5. Date: 6/7/1961; Category: IN; Type: V; Reason Code: NaN; Reward Amount: NaN; Coverage Limit: 100000.0; Income: 48000.0\\n\\n\"\"\"\n","prompt = prompt + \"### Response:\\n\"\n","\n","print(prompt)"],"metadata":{"id":"ic9qo4pOMHYF"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt\n",")['choices'][0]['text']"],"metadata":{"id":"wkzcBIZ2MIWl"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["prompt = \"### Instruction:\\nRespond with a numerical fraud score for a customer of a life insurance company based on their transaction history and statistics of all customer transactions in the below input. \"\n","prompt = prompt + \"The score ranges from 0 to 1. Scores close to zero represent that the customer is very unlikely to be committing fraud. Scores close to 1 represent that a customer is very likely to be committing fraud. \"\n","prompt = prompt + \"Do not respond with any text other than the numerical fraud score. \"\n","prompt = prompt + \"Transaction history data includes transaction dates, category, type of insurance product, reason code, reward amount, coverage limit, and income at the time of the transaction.\\n\\n\"\n","prompt = prompt + \"\"\"### Input:\n","Statistics of all customer transactions:\n","Time between claim and reward: average is 23 days, standard deviation is 77 days\n","Coverage to income ratio: average is 7.2, standard deviation is 4.2\n","Recency according to an exponential distribution: average is 0.42, standard deviation is 0.24\n","Reward amount: average is $359,556, standard deviation $260,000\\n\\n\"\"\"\n","prompt = prompt + \"\"\"Customer transaction history:\n","1. Date: 9/10/1977; Category: IN; Type: T; Reason Code: NaN; Reward Amount: NaN; Coverage Limit: 50000.0; Income: 61000.0\n","2. Date: 12/31/2005; Category: CL; Type: T; Reason Code: NaN; Reward Amount: NaN; Coverage Limit: 50000.0; Income: 61000.0\n","3. Date: 1/17/2006; Category: RE; Type: T; Reason Code: 265.0; Reward Amount: 50000.0; Coverage Limit: NaN; Income: NaN\n","4. Date: 12/15/1998; Category: IN; Type: T; Reason Code: NaN; Reward Amount: NaN; Coverage Limit: 100000.0; Income: 29000.0\n","5. Date: 6/7/1961; Category: IN; Type: V; Reason Code: NaN; Reward Amount: NaN; Coverage Limit: 100000.0; Income: 48000.0\\n\\n\"\"\"\n","prompt = prompt + \"### Response:\\n\"\n","\n","print(prompt)"],"metadata":{"id":"9hSLy6gBMIh2"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt\n",")['choices'][0]['text']"],"metadata":{"id":"243ywn_xMIoo"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Few Shot prompts"],"metadata":{"id":"PlGYdHDy2XD1"}},{"cell_type":"markdown","source":["When your task is slightly more complicated or requires a few more leaps in reasoning to generate an appropriate response, you can turn to **few shot** prompting (aka **in context learning**). In few shot prompting, a small number of gold standard demonstrations are integrated into the prompt. These demonstrations serve as example (context, output) pairs for the model, which serve to tune the probable output on-the-fly to what we ideally want in the output.\n","\n","Although not always necessary (as seen above), few shot prompting generally produces better results than single shot prompting in terms of consistency and similarity to your ideal outputs. This does come at a cost for some models that might charge based on the number of characters or words that are input to the model API."],"metadata":{"id":"Z163pVfDZJIW"}},{"cell_type":"markdown","source":["## Sentiment"],"metadata":{"id":"_DjiAH9XOBDu"}},{"cell_type":"code","source":["prompt = \"\"\"Classify the sentiment of the text. Use the label NEU for neutral sentiment, NEG for negative sentiment, and POS for positive sentiment.\n","\n","Text: That pilot is adorable.\n","Sentiment: POS\n","\n","Text: This was an awful seat.\n","Sentiment: NEG\n","\n","Text: This pilot was brilliant.\n","Sentiment: POS\n","\n","Text: I saw the aircraft.\n","Sentiment: NEU\n","\n","Text: That food was exceptional.\n","Sentiment: POS\n","\n","Text: That was a private aircraft.\n","Sentiment: NEU\n","\n","Text: This is an unhappy pilot.\n","Sentiment: NEG\n","\n","Text: The staff is rough.\n","Sentiment: NEG\n","\n","Text: This staff is Australian.\n","Sentiment: NEU\n","\n","Text: The flight is boring.\n","Sentiment: \"\"\""],"metadata":{"id":"S7mwe3BP2egC"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["result = pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt\n",")\n","\n","print(result['choices'][0]['text'])"],"metadata":{"id":"WcY7yVhQOJO1"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Logic representations"],"metadata":{"id":"JqM6BzkHOOAr"}},{"cell_type":"code","source":["prompt = \"\"\"Transform text statements into first order logic.\n","\n","Statement: Pluto is a dog\n","Logic: (dog(Pluto))\n","\n","Statement: Deb is not tall\n","Logic: (¬tall(Deb))\n","\n","Statement: Not every person who plays football knows cricket\n","Logic: (¬∀(x)(((person(x))∧(plays(x,football)))→(knows(x,cricket))))\n","\n","Statement: Some people can sell any product\n","Logic: ∃(x)((people(x))∧∀(y)((product(y))→(sell(x,y))))\n","\n","Statement: If you enroll in the course and prepare hard then you will pass the course\n","Logic: (((enroll(you,course))∧(prepare-hard(you)))→(pass(you,course)))\n","\n","Statement: Do not forget to provide all of your demonstrations\n","Logic: \"\"\""],"metadata":{"id":"TTrfCdOaOK-H"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["result = pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt\n",")\n","\n","print(result['choices'][0]['text'])"],"metadata":{"id":"l3n6MKRvOVOW"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Chat, Hinglish conversation"],"metadata":{"id":"EN2mxlUGOabe"}},{"cell_type":"code","source":["prompt = \"\"\"The following is a conversation with an AI assistant in Hinglish. The assistant is helpful, creative, clever, and very friendly. The AI assistant responds in Hinglish.\n","\n","Human: Hello\n","AI: Hello!\n","\n","Human: Movie ke baare mein tumhara initial opinion kya hai? Zyada depth mein gaye bina? Tumne like kiya ki nahin?\n","AI: Overall, mujhe movie achchi lagi.  Usme kuh high-profile actors hain,  Tum kya kahte ho?\n","\n","Human: Tumhe lagta hai ki ek achche movie mein high-profile actors hone chahiye? Ya phir, ek movie ko achcha kahne ke liye high-profile actors hone chahiye?\n","AI: Zaroori nahin, par help zaroor milta hai.  Ek actor jab tak apna kaam theek se nahin karta, tab tak high-profile nahin ban sakta. Tumhe kya lagta hai?\n","\n","Human: muje lgta hai ki yeh kewal personal pasand ke bare mai hai meri pasandida american comedy films Will Ferrol or Seth Rogan ke sath kuch bhi samil slapstick films hai muje yakin hai ki ek film thi jisme Lindsay Lohan ko bhot bada bna diya tha bhale hi usne apne surati saalo mai movies mai acting ki thi\n","AI: \"\"\""],"metadata":{"id":"fZLp1dhBOXOt"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["output = pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt\n",")['choices'][0]['text'].split('\\n')[0]\n","\n","print(output)"],"metadata":{"id":"tCzKXV8zOl0q"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["prompt = \"\"\"### Instruction:\n","Respond with a English translation of the following input Hinglish text.\n","\n","### Input:\n","{hinglish}\n","\n","### Respond:\n","\"\"\".format(hinglish=output)\n","\n","pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt\n",")['choices'][0]['text']"],"metadata":{"id":"gntWOt7YOnBH"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"gP6yJn1IvTrb"},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------
/ai-eng-chaining-augmentation.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyN/B+I1W0+hKGADcxs5YL6M"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["We've actually already seen how it can be useful to \"chain\" various LLM operations together. In the Hinglish chat example we chained a response generation and then a machine translation using LLMs.\n","\n","**As you solve problems with LLMs, do NOT always think about your task as a single prompt.** Decompose your problem into multiple steps. Just like programming which uses multiple functions, classes, etc. LLM integration is a new kind of reasoning engine that you can \"program\" in a multi-step, conditional, control flow sort of fashion.\n","\n","Further, enterprise LLM appllications need reliability, trust, and consistency. **Because LLMs only predict probable text, they have no understanding or connection to reality.** This produces **hallucinations** that can be part of a coherent text block but factually (or otherwise) wrong. To deal with this we need to **ground** on LLM operations with external data."],"metadata":{"id":"xm6EIVbldZer"}},{"cell_type":"markdown","source":["# Dependencies and imports"],"metadata":{"id":"wK6zNdTOdVcE"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"I2yX1z4WyJP2"},"outputs":[],"source":["! pip install langchain predictionguard lancedb html2text sentence-transformers"]},{"cell_type":"code","source":["import os\n","import urllib.request\n","\n","import html2text\n","import predictionguard as pg\n","from langchain import PromptTemplate, FewShotPromptTemplate\n","from langchain.text_splitter import CharacterTextSplitter\n","from sentence_transformers import SentenceTransformer\n","import numpy as np\n","from getpass import getpass\n","import lancedb\n","from lancedb.embeddings import with_embeddings\n","import pandas as pd"],"metadata":{"id":"Go5vRQcTycUl"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["pg_access_token = getpass('Enter your Prediction Guard access token: ')\n","os.environ['PREDICTIONGUARD_TOKEN'] = pg_access_token"],"metadata":{"id":"HdSloPn7JTu0"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Chaining"],"metadata":{"id":"eTH1n7VuNm5g"}},{"cell_type":"code","source":["template = \"\"\"### Instruction:\n","Decide if the following input message is an informational question, a general chat message, or a request for code generation.\n","If the message is an informational question, answer it based on the informational context provided below.\n","If the message is a general chat message, respond in a kind and friendly manner based on the coversation context provided below.\n","If the message is a request for code generation, respond with a code snippet.\n","\n","### Input:\n","Message: {query}\n","\n","Informational Context: The Greater Los Angeles and San Francisco Bay areas in California are the nation's second and fifth-most populous urban regions, respectively. Greater Los Angeles has over 18.7 million residents and the San Francisco Bay Area has over 9.6 million residents. Los Angeles is state's most populous city and the nation's second-most populous city. San Francisco is the second-most densely populated major city in the country. Los Angeles County is the country's most populous county, and San Bernardino County is the nation's largest county by area. Sacramento is the state's capital.\n","\n","Conversational Context:\n","Human - \"Hello, how are you?\"\n","AI - \"I'm good, what can I help you with?\"\n","Human - \"What is the captital of California?\"\n","AI - \"Sacramento\"\n","Human - \"Thanks!\"\n","AI - \"You are welcome!\"\n","\n","### Response:\n","\"\"\"\n","\n","prompt = PromptTemplate(\n","    input_variables=[\"query\"],\n","    template=template,\n",")"],"metadata":{"id":"wiWYZO_xNr7q"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["result = pg.Completion.create(\n","    model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt.format(query=\"What is the population of LA?\")\n",")\n","\n","print(result['choices'][0]['text'])"],"metadata":{"id":"bpXQfN0uk59t"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["category_template = \"\"\"### Instruction:\n","Read the below input and determine if it is a request to generate computer code? Respond \"yes\" or \"no\".\n","\n","### Input:\n","{query}\n","\n","### Response:\n","\"\"\"\n","\n","category_prompt = PromptTemplate(\n","    input_variables=[\"query\"],\n","    template=category_template\n",")\n","\n","qa_template = \"\"\"### Instruction:\n","Read the context below and respond with an answer to the question. If the question cannot be answered based on the context alone or the context does not explicitly say the answer to the question, write \"Sorry I had trouble answering this question, based on the information I found.\"\n","\n","### Input:\n","Context: {context}\n","\n","Question: {query}\n","\n","### Response:\n","\"\"\"\n","\n","qa_prompt = PromptTemplate(\n","    input_variables=[\"context\", \"query\"],\n","    template=qa_template\n",")\n","\n","chat_template = \"\"\"### Instruction:\n","You are a friendly and clever AI assistant. Respond to the latest human message in the input conversation below.\n","\n","### Input:\n","{context}\n","Human: {query}\n","AI:\n","\n","### Response:\n","\"\"\"\n","\n","chat_prompt = PromptTemplate(\n","    input_variables=[\"context\", \"query\"],\n","    template=chat_template\n",")\n","\n","code_template = \"\"\"### Instruction:\n","You are a code generation assistant. Respond with a code snippet and any explanation requested in the below input.\n","\n","### Input:\n","{query}\n","\n","### Response:\n","\"\"\"\n","\n","code_prompt = PromptTemplate(\n","    input_variables=[\"query\"],\n","    template=code_template\n",")\n","\n","\n","# QuestionID provides some help in determining if a sentence is a question.\n","class QuestionID:\n","    \"\"\"\n","        QuestionID has the actual logic used to determine if sentence is a question\n","    \"\"\"\n","    def padCharacter(self, character: str, sentence: str):\n","        if character in sentence:\n","            position = sentence.index(character)\n","            if position > 0 and position < len(sentence):\n","\n","                # Check for existing white space before the special character.\n","                if (sentence[position - 1]) != \" \":\n","                    sentence = sentence.replace(character, (\" \" + character))\n","\n","        return sentence\n","\n","    def predict(self, sentence: str):\n","        questionStarters = [\n","            \"which\", \"wont\", \"cant\", \"isnt\", \"arent\", \"is\", \"do\", \"does\",\n","            \"will\", \"can\"\n","        ]\n","        questionElements = [\n","            \"who\", \"what\", \"when\", \"where\", \"why\", \"how\", \"sup\", \"?\"\n","        ]\n","\n","        sentence = sentence.lower()\n","        sentence = sentence.replace(\"\\'\", \"\")\n","        sentence = self.padCharacter('?', sentence)\n","        splitWords = sentence.split()\n","\n","        if any(word == splitWords[0] for word in questionStarters) or any(\n","                word in splitWords for word in questionElements):\n","            return True\n","        else:\n","            return False\n","\n","def response_chain(message, convo_context, info_context):\n","\n","  # Determine what kind of message this is.\n","  result = pg.Completion.create(\n","      model=\"WizardCoder\",\n","      prompt=category_prompt.format(query=message),\n","      output={\n","          \"type\": \"categorical\",\n","          \"categories\": [\"yes\", \"no\"]\n","      }\n","  )\n","\n","  # configure our chain\n","  code = result['choices'][0]['output']\n","  qIDModel = QuestionID()\n","  question = qIDModel.predict(message)\n","\n","  if code == \"no\" and question:\n","\n","    # Handle the informational request.\n","    result = pg.Completion.create(\n","        model=\"Nous-Hermes-Llama2-13B\",\n","        prompt=qa_prompt.format(context=info_context, query=message)\n","    )\n","    completion = result['choices'][0]['text'].split('#')[0].strip()\n","\n","  elif code == \"yes\":\n","\n","    # Handle the code generation request.\n","    result = pg.Completion.create(\n","        model=\"WizardCoder\",\n","        prompt=code_prompt.format(query=message),\n","        max_tokens=500\n","    )\n","    completion = result['choices'][0]['text']\n","\n","  else:\n","\n","    # Handle the chat message.\n","    result = pg.Completion.create(\n","        model=\"Nous-Hermes-Llama2-13B\",\n","        prompt=chat_prompt.format(context=convo_context, query=message),\n","        output={\n","            \"toxicity\": True\n","        }\n","    )\n","    completion = result['choices'][0]['text'].split('Human:')[0].strip()\n","\n","  return code, question, completion\n"],"metadata":{"id":"82oTOlF6l0-S"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["info_context = \"The Greater Los Angeles and San Francisco Bay areas in California are the nation's second and fifth-most populous urban regions, respectively. Greater Los Angeles has over 18.7 million residents and the San Francisco Bay Area has over 9.6 million residents. Los Angeles is state's most populous city and the nation's second-most populous city. San Francisco is the second-most densely populated major city in the country. Los Angeles County is the country's most populous county, and San Bernardino County is the nation's largest county by area. Sacramento is the state's capital.\"\n","\n","convo_context = \"\"\"Human: Hello, how are you?\n","AI: I'm good, what can I help you with?\n","Human: What is the captital of California?\n","AI: Sacramento\n","Human: Thanks!\n","AI: You are welcome!\"\"\"\n","\n","message = \"Which city in California has the highest population?\"\n","#message = \"I'm really enjoying this conversation.\"\n","#message = \"Generate some python code that gets the current weather in the bay area.\"\n","\n","code, question, completion = response_chain(message, convo_context, info_context)\n","print(\"CODE GEN REQUESTED:\", code)\n","print(\"QUESTION:\", question)\n","print(\"\")\n","print(\"RESPONSE:\", completion)"],"metadata":{"id":"yWM2HeVNpFNz"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# External knowledge in prompts, Grounding"],"metadata":{"id":"EMW-amKXI-nk"}},{"cell_type":"markdown","source":["We've actually already seen external knowledge within our prompts. In the question and answer example, the `context` that we pasted in was a copy of phrasing on the Domino's website."],"metadata":{"id":"UBxGhPzLebrA"}},{"cell_type":"code","source":["template = \"\"\"### Instruction:\n","Read the context below and respond with an answer to the question. If the question cannot be answered based on the context alone or the context does not explicitly say the answer to the question, write \"Sorry I had trouble answering this question, based on the information I found.\"\n","\n","### Input:\n","Context: {context}\n","\n","Question: {question}\n","\n","### Response:\n","\"\"\"\n","\n","prompt = PromptTemplate(\n","    input_variables=[\"context\", \"question\"],\n","    template=template,\n",")"],"metadata":{"id":"vkmyGTHuJEc2"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["context = \"Domino's gift cards are great for any person and any occasion. There are a number of different options to choose from. Each comes with a personalized card carrier and is delivered via US Mail.\"\n","\n","question = \"How are gift cards delivered?\"\n","\n","myprompt = prompt.format(context=context, question=question)\n","print(myprompt)"],"metadata":{"id":"XRkwJGTOJIAC"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["result = pg.Completion.create(\n","    model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=myprompt\n",")\n","result['choices'][0]['text'].split('#')[0].strip()"],"metadata":{"id":"sbyyGvyjJYCN"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Retrieval augmentation"],"metadata":{"id":"rEYisXxsynLc"}},{"cell_type":"markdown","source":["Retrieval-augmented generation (RAG) is an innovative approach that merges the capabilities of large-scale retrieval systems with sequence-to-sequence models to enhance their performance in generating detailed and contextually relevant responses. Instead of relying solely on the knowledge contained within the model's parameters, RAG allows the model to dynamically retrieve and integrate information from an external database or a set of documents during the generation process. By doing so, it provides a bridge between the vast knowledge stored in external sources and the powerful generation abilities of neural models, enabling more informed, diverse, and context-aware outputs in tasks like question answering, dialogue systems, and more."],"metadata":{"id":"3ysqzdilA62F"}},{"cell_type":"code","source":["# Let's get the html off of a website.\n","fp = urllib.request.urlopen(\"https://docs.kernel.org/process/submitting-patches.html\")\n","mybytes = fp.read()\n","html = mybytes.decode(\"utf8\")\n","fp.close()\n","\n","# And convert it to text.\n","h = html2text.HTML2Text()\n","h.ignore_links = True\n","text = h.handle(html)\n","\n","print(text)"],"metadata":{"id":"c0AfYyA7vcrB"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Clean things up just a bit.\n","text = text.split(\"### This Page\")[1]\n","text = text.split(\"## References\")[0]\n","print(text)"],"metadata":{"id":"AnH3LL-owZOW"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Chunk the text into smaller pieces for injection into LLM prompts.\n","text_splitter = CharacterTextSplitter(chunk_size=700, chunk_overlap=50)\n","docs = text_splitter.split_text(text)\n","len(docs)"],"metadata":{"id":"KjNBPI_Fwv5K"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Let's checkout some of the chunks!\n","for i in range(0, 3):\n","  print(\"Chunk\", str(i+1))\n","  print(\"----------------------------\")\n","  print(docs[i])\n","  print(\"\")"],"metadata":{"id":"XPgwoO2w4-Lr"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Let's take care of some of the formatting so it doesn't conflict with our\n","# typical prompt template structure\n","docs = [x.replace('#', '-') for x in docs]\n","print(docs[2])"],"metadata":{"id":"Sl5ib7UU5dKd"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Now we need to embed these documents and put them into a \"vector store\" or\n","# \"vector db\" that we will use for semantic search and retrieval.\n","\n","# Embeddings setup\n","name=\"all-MiniLM-L12-v2\"\n","model = SentenceTransformer(name)\n","\n","def embed_batch(batch):\n","    return [model.encode(sentence) for sentence in batch]\n","\n","def embed(sentence):\n","    return model.encode(sentence)\n","\n","# LanceDB setup\n","os.mkdir(\".lancedb\")\n","uri = \".lancedb\"\n","db = lancedb.connect(uri)\n","\n","# Create a dataframe with the chunk ids and chunks\n","metadata = []\n","for i in range(len(docs)):\n","    metadata.append([\n","        i,\n","        docs[i]\n","    ])\n","doc_df = pd.DataFrame(metadata, columns=[\"chunk\", \"text\"])\n","doc_df.head()"],"metadata":{"id":"-rsGYSGR5w95"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Embed the documents\n","data = with_embeddings(embed_batch, doc_df)\n","data.to_pandas().head()"],"metadata":{"id":"JgsIA1un7Pyq"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Create the DB table and add the records.\n","db.create_table(\"linux\", data=data)\n","table = db.open_table(\"linux\")\n","table.add(data=data)"],"metadata":{"id":"F022vhos7hth"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Let's try to match a query to one of our documents.\n","message = \"How many problems should be solved per patch?\"\n","results = table.search(embed(message)).limit(5).to_df()\n","results.head()"],"metadata":{"id":"daHLa4oH7q10"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Now let's augment our Q&A prompt with this external knowledge on-the-fly!!!\n","template = \"\"\"### Instruction:\n","Read the below input context and respond with a short answer to the given question. Use only the information in the below input to answer the question. If you cannot answer the question, respond with \"Sorry, I can't find an answer, but you might try looking in the following resource.\"\n","\n","### Input:\n","Context: {context}\n","\n","Question: {question}\n","\n","### Response:\n","\"\"\"\n","qa_prompt = PromptTemplate(\n","    input_variables=[\"context\", \"question\"],\n","    template=template,\n",")\n","\n","def rag_answer(message):\n","\n","  # Search the for relevant context\n","  results = table.search(embed(message)).limit(5).to_df()\n","  results.sort_values(by=['_distance'], inplace=True, ascending=True)\n","  doc_use = results['text'].values[0]\n","\n","  # Augment the prompt with the context\n","  prompt = qa_prompt.format(context=doc_use, question=message)\n","\n","  # Get a response\n","  result = pg.Completion.create(\n","      model=\"Nous-Hermes-Llama2-13B\",\n","      prompt=prompt\n","  )\n","\n","  return result['choices'][0]['text']\n"],"metadata":{"id":"IDscT_858T4A"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["response = rag_answer(\"How many problems should be solved in a single patch?\")\n","\n","print('')\n","print(\"RESPONSE:\", response)"],"metadata":{"id":"pRo91E2w9adt"},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------
/ai-eng-fine-tuning-flan.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"machine_shape":"hm","gpuType":"V100"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","gpuClass":"standard","widgets":{"application/vnd.jupyter.widget-state+json":{"954aa04eb17d4ca093432b771e11cab3":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_c53dc25ace2a4468980f2a4e72a8e577","IPY_MODEL_8dbf2e12ea35413994dd72d102fe39b9","IPY_MODEL_f3dfd67aad794b9caaa3bb62cdce7264"],"layout":"IPY_MODEL_29b8c7818e274453a20fba4dcd4c137c"}},"c53dc25ace2a4468980f2a4e72a8e577":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_76a679d261164e59b4b32da02e280587","placeholder":"​","style":"IPY_MODEL_d64b94054ae246f99abb4ec42d1dd4f5","value":"Map: 100%"}},"8dbf2e12ea35413994dd72d102fe39b9":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_b85ad254cdcb4fd9a97ebb6a3d9bb4a8","max":15551,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9f7087d0e4574c4c880ada8544881792","value":15551}},"f3dfd67aad794b9caaa3bb62cdce7264":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8157890c151e41bea610fb7a5b686ff8","placeholder":"​","style":"IPY_MODEL_bca9a769ee814656a39654a2a5f4e115","value":" 15551/15551 [00:00&lt;00:00, 24874.02 examples/s]"}},"29b8c7818e274453a20fba4dcd4c137c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"76a679d261164e59b4b32da02e280587":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d64b94054ae246f99abb4ec42d1dd4f5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b85ad254cdcb4fd9a97ebb6a3d9bb4a8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9f7087d0e4574c4c880ada8544881792":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"8157890c151e41bea610fb7a5b686ff8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"bca9a769ee814656a39654a2a5f4e115":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"055a0825c64840b6b6573d9940f5a204":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6207306e69524d819a45ae7adaf70768","IPY_MODEL_22f325f31854455a8e817758225be7ba","IPY_MODEL_12d089771dbd4d3f9c2e8b917b94f61e"],"layout":"IPY_MODEL_9a7667fc8e8b41098729e257d6888f40"}},"6207306e69524d819a45ae7adaf70768":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_935e3576752e4417af90e47ee2d902ee","placeholder":"​","style":"IPY_MODEL_47bbc81d96fb418ead4b7fc6ea66e09b","value":"Map: 100%"}},"22f325f31854455a8e817758225be7ba":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_e9d1b0517a9d444c827000484decc477","max":819,"min":0,"orientation":"horizontal","style":"IPY_MODEL_99c2a0f91de04deba1b222b96fc46888","value":819}},"12d089771dbd4d3f9c2e8b917b94f61e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1d3bcd46c35244b2bb1c16f1f6a61e22","placeholder":"​","style":"IPY_MODEL_2675db31a6fb417f90418c4e6cb95a9c","value":" 819/819 [00:00&lt;00:00, 1821.50 examples/s]"}},"9a7667fc8e8b41098729e257d6888f40":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"935e3576752e4417af90e47ee2d902ee":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"47bbc81d96fb418ead4b7fc6ea66e09b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"e9d1b0517a9d444c827000484decc477":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"99c2a0f91de04deba1b222b96fc46888":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"1d3bcd46c35244b2bb1c16f1f6a61e22":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2675db31a6fb417f90418c4e6cb95a9c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"code","execution_count":15,"metadata":{"id":"hlzal3OPBj8T","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696512656339,"user_tz":240,"elapsed":5958,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"6c45869a-c5bc-4d4e-b017-ccae03b95708"},"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.34.0)\n","Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.14.5)\n","Requirement already satisfied: py7zr in /usr/local/lib/python3.10/dist-packages (0.20.6)\n","Requirement already satisfied: evaluate in /usr/local/lib/python3.10/dist-packages (0.4.0)\n","Requirement already satisfied: rouge_score in /usr/local/lib/python3.10/dist-packages (0.1.2)\n","Collecting accelerate\n","  Downloading accelerate-0.23.0-py3-none-any.whl (258 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m258.1/258.1 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.4)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.16.4)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n","Requirement already satisfied: tokenizers<0.15,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.14.0)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.3)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n","Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n","Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.7)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n","Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.15)\n","Requirement already satisfied: fsspec[http]<2023.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.5)\n","Requirement already satisfied: texttable in /usr/local/lib/python3.10/dist-packages (from py7zr) (1.7.0)\n","Requirement already satisfied: pycryptodomex>=3.6.6 in /usr/local/lib/python3.10/dist-packages (from py7zr) (3.19.0)\n","Requirement already satisfied: pyzstd>=0.14.4 in /usr/local/lib/python3.10/dist-packages (from py7zr) (0.15.9)\n","Requirement already satisfied: pyppmd<1.1.0,>=0.18.1 in /usr/local/lib/python3.10/dist-packages (from py7zr) (1.0.0)\n","Requirement already satisfied: pybcj>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from py7zr) (1.0.1)\n","Requirement already satisfied: multivolumefile>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from py7zr) (0.2.3)\n","Requirement already satisfied: brotli>=1.0.9 in /usr/local/lib/python3.10/dist-packages (from py7zr) (1.1.0)\n","Requirement already satisfied: inflate64>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from py7zr) (0.3.1)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from py7zr) (5.9.5)\n","Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.10/dist-packages (from evaluate) (0.18.0)\n","Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.4.0)\n","Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge_score) (3.8.1)\n","Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge_score) (1.16.0)\n","Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.0.1+cu118)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n","Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (3.2.0)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n","Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.16.4->transformers) (4.5.0)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.5)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.0.0)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (3.27.5)\n","Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (17.0.1)\n","Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (8.1.7)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->rouge_score) (1.3.2)\n","Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3.post1)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n","Installing collected packages: accelerate\n","Successfully installed accelerate-0.23.0\n"]}],"source":["! pip install transformers datasets py7zr evaluate rouge_score accelerate>=0.20.1"]},{"cell_type":"code","source":["from random import randrange\n","\n","from datasets import load_dataset, concatenate_datasets\n","from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq\n","from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments\n","from transformers import pipeline\n","import evaluate\n","import nltk\n","import numpy as np\n","from nltk.tokenize import sent_tokenize\n","nltk.download(\"punkt\")"],"metadata":{"id":"DcB59-tJDQQr","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696512712092,"user_tz":240,"elapsed":5995,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"e4200bf1-a037-4c0c-ffb9-246ee08648be"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stderr","text":["[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data]   Package punkt is already up-to-date!\n"]},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{},"execution_count":1}]},{"cell_type":"markdown","source":["# Dataset"],"metadata":{"id":"1xxGzymLDR5N"}},{"cell_type":"markdown","source":["We are going to fine-tune the [Flan-T5](https://huggingface.co/google/flan-t5-small) model to summarize dialogue/ chat threads similar to what is done in the ChatGPT interface. We will use the [SAMSum](https://huggingface.co/datasets/samsum) dataset. The SAMSum dataset contains about 16k messenger-like conversations with summaries. Conversations were created and written down by linguists fluent in English.\n","\n","(Thanks to [Philipp Schmid](https://www.philschmid.de/) for great examples of this task that were adapted for this notebook)"],"metadata":{"id":"_Dans9qkH9IF"}},{"cell_type":"code","source":["dataset = load_dataset(\"samsum\")\n","\n","print(f\"Train dataset size: {len(dataset['train'])}\")\n","print(f\"Test dataset size: {len(dataset['test'])}\")"],"metadata":{"id":"Rb8Uzi1ODSgl","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696512713626,"user_tz":240,"elapsed":1536,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"f6f11b4b-9545-4b00-d9f6-5c17a1dd655b"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["Train dataset size: 14732\n","Test dataset size: 819\n"]}]},{"cell_type":"code","source":["dataset['train'][0]"],"metadata":{"id":"MuhhWXEbDfxs","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696512713626,"user_tz":240,"elapsed":29,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"ca05de38-15b3-48a2-cb32-bded76201c4d"},"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{'id': '13818513',\n"," 'dialogue': \"Amanda: I baked  cookies. Do you want some?\\r\\nJerry: Sure!\\r\\nAmanda: I'll bring you tomorrow :-)\",\n"," 'summary': 'Amanda baked cookies and will bring Jerry some tomorrow.'}"]},"metadata":{},"execution_count":3}]},{"cell_type":"code","source":["#pick a random sample to evaluate the model later\n","sample = dataset['test'][randrange(len(dataset[\"test\"]))]\n","sample"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"aI-_if_sqRCa","executionInfo":{"status":"ok","timestamp":1696512713627,"user_tz":240,"elapsed":15,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"5d5815db-b46f-4a17-8c7f-6e1c4086e5a9"},"execution_count":4,"outputs":[{"output_type":"execute_result","data":{"text/plain":["{'id': '13731240',\n"," 'dialogue': \"Ken: Hey, how are you doing today?\\r\\nEllen: Fine. How are you?\\r\\nKen: Not doing well at all today. Really depressed. Frustrated. Stressed out.\\r\\nEllen: Oh no!\\r\\nKen: Really bad day yesterday carrying over to today.\\r\\nEllen: Can I do anything?\\r\\nKen: I'll be fine. Just overwhelmed and burned out.\\r\\nEllen: Sorry!\\r\\nKen: Sorry, don't mean to bring you down.\\r\\nEllen: You didn't, just worried about you!\",\n"," 'summary': 'Ken is having some bad days.'}"]},"metadata":{},"execution_count":4}]},{"cell_type":"markdown","source":["# Pre-process data"],"metadata":{"id":"t_nKBP9yD3i4"}},{"cell_type":"code","source":["# We will use the Flan T5 tokenizer to help us pre-process the data.\n","model_id=\"google/flan-t5-small\"\n","\n","# Load tokenizer of Flan-T5\n","tokenizer = AutoTokenizer.from_pretrained(model_id)"],"metadata":{"id":"GMWk36xeDuaZ","executionInfo":{"status":"ok","timestamp":1696512714252,"user_tz":240,"elapsed":633,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}}},"execution_count":5,"outputs":[]},{"cell_type":"code","source":["# The maximum total input sequence length after tokenization.\n","# Sequences longer than this will be truncated, sequences shorter will be padded.\n","tokenized_inputs = concatenate_datasets([\n","    dataset[\"train\"],\n","    dataset[\"test\"]]).map(lambda x: tokenizer(x[\"dialogue\"], truncation=True),\n","    batched=True,\n","    remove_columns=[\"dialogue\", \"summary\"])\n","max_source_length = max([len(x) for x in tokenized_inputs[\"input_ids\"]])\n","print(f\"Max source length: {max_source_length}\")\n","\n","# The maximum total sequence length for target text after tokenization.\n","# Sequences longer than this will be truncated, sequences shorter will be padded.\"\n","tokenized_targets = concatenate_datasets([\n","    dataset[\"train\"],\n","    dataset[\"test\"]]).map(lambda x: tokenizer(x[\"summary\"], truncation=True),\n","    batched=True,\n","    remove_columns=[\"dialogue\", \"summary\"])\n","max_target_length = max([len(x) for x in tokenized_targets[\"input_ids\"]])\n","print(f\"Max target length: {max_target_length}\")"],"metadata":{"id":"Sf0CeMm-EJKD","colab":{"base_uri":"https://localhost:8080/","height":84,"referenced_widgets":["954aa04eb17d4ca093432b771e11cab3","c53dc25ace2a4468980f2a4e72a8e577","8dbf2e12ea35413994dd72d102fe39b9","f3dfd67aad794b9caaa3bb62cdce7264","29b8c7818e274453a20fba4dcd4c137c","76a679d261164e59b4b32da02e280587","d64b94054ae246f99abb4ec42d1dd4f5","b85ad254cdcb4fd9a97ebb6a3d9bb4a8","9f7087d0e4574c4c880ada8544881792","8157890c151e41bea610fb7a5b686ff8","bca9a769ee814656a39654a2a5f4e115"]},"executionInfo":{"status":"ok","timestamp":1696512717076,"user_tz":240,"elapsed":2148,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"a791a528-d6ca-4acb-ff4d-5add1163c1a1"},"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["Max source length: 512\n"]},{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/15551 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"954aa04eb17d4ca093432b771e11cab3"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Max target length: 95\n"]}]},{"cell_type":"code","source":["def preprocess_function(sample,padding=\"max_length\"):\n","\n","    # Add prefix to the input prompt for t5\n","    inputs = [\"summarize: \" + item for item in sample[\"dialogue\"]]\n","\n","    # tokenize inputs\n","    model_inputs = tokenizer(inputs, max_length=max_source_length,\n","                             padding=padding, truncation=True)\n","\n","    # Tokenize targets with the `text_target` keyword argument\n","    labels = tokenizer(text_target=sample[\"summary\"],\n","                       max_length=max_target_length, padding=padding,\n","                       truncation=True)\n","\n","    # If we are padding here, replace all tokenizer.pad_token_id in the labels\n","    # by -100 when we want to ignore padding in the loss.\n","    if padding == \"max_length\":\n","        labels[\"input_ids\"] = [\n","            [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels[\"input_ids\"]\n","        ]\n","\n","    model_inputs[\"labels\"] = labels[\"input_ids\"]\n","    return model_inputs"],"metadata":{"id":"gQfgzFk_EeLv","executionInfo":{"status":"ok","timestamp":1696512717077,"user_tz":240,"elapsed":8,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}}},"execution_count":7,"outputs":[]},{"cell_type":"code","source":["tokenized_dataset = dataset.map(preprocess_function, batched=True,\n","                                remove_columns=[\"dialogue\", \"summary\", \"id\"])\n","print(f\"Keys of tokenized dataset: {list(tokenized_dataset['train'].features)}\")"],"metadata":{"id":"O97GyjqkEkh-","colab":{"base_uri":"https://localhost:8080/","height":66,"referenced_widgets":["055a0825c64840b6b6573d9940f5a204","6207306e69524d819a45ae7adaf70768","22f325f31854455a8e817758225be7ba","12d089771dbd4d3f9c2e8b917b94f61e","9a7667fc8e8b41098729e257d6888f40","935e3576752e4417af90e47ee2d902ee","47bbc81d96fb418ead4b7fc6ea66e09b","e9d1b0517a9d444c827000484decc477","99c2a0f91de04deba1b222b96fc46888","1d3bcd46c35244b2bb1c16f1f6a61e22","2675db31a6fb417f90418c4e6cb95a9c"]},"executionInfo":{"status":"ok","timestamp":1696512722371,"user_tz":240,"elapsed":1046,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"aefebdab-0765-4e81-8b81-4a45a4f03dde"},"execution_count":8,"outputs":[{"output_type":"display_data","data":{"text/plain":["Map:   0%|          | 0/819 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"055a0825c64840b6b6573d9940f5a204"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Keys of tokenized dataset: ['input_ids', 'attention_mask', 'labels']\n"]}]},{"cell_type":"markdown","source":["# Base/ Foundation Model"],"metadata":{"id":"WSRhhX3UEulI"}},{"cell_type":"code","source":["# load model from the hub\n","model = AutoModelForSeq2SeqLM.from_pretrained(model_id)"],"metadata":{"id":"NuXzlS31Eqzx","executionInfo":{"status":"ok","timestamp":1696512727109,"user_tz":240,"elapsed":1586,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}}},"execution_count":9,"outputs":[]},{"cell_type":"markdown","source":["Before we fine-tune, let's see how the model performs on our sample!"],"metadata":{"id":"aQfD9OEGqnyn"}},{"cell_type":"code","source":["base_summarizer = pipeline(\"summarization\", model=model, tokenizer=tokenizer, device=0)\n","\n","# print dialogue and reference summary\n","print(f\"dialogue: \\n{sample['dialogue']}\\n---------------\")\n","print(f\"reference summary: \\n{sample['summary']}\\n---------------\")\n","\n","# print model summary\n","base_result = base_summarizer(sample[\"dialogue\"])\n","print(f\"base flan-t5-base summary:\\n{base_result[0]['summary_text']}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"DxKg0AhSqwL9","executionInfo":{"status":"ok","timestamp":1696512728005,"user_tz":240,"elapsed":911,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"30cc5488-05e7-48c7-fc49-b746ef7217ad"},"execution_count":10,"outputs":[{"output_type":"stream","name":"stderr","text":["Your max_length is set to 200, but your input_length is only 109. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=54)\n"]},{"output_type":"stream","name":"stdout","text":["dialogue: \n","Ken: Hey, how are you doing today?\r\n","Ellen: Fine. How are you?\r\n","Ken: Not doing well at all today. Really depressed. Frustrated. Stressed out.\r\n","Ellen: Oh no!\r\n","Ken: Really bad day yesterday carrying over to today.\r\n","Ellen: Can I do anything?\r\n","Ken: I'll be fine. Just overwhelmed and burned out.\r\n","Ellen: Sorry!\r\n","Ken: Sorry, don't mean to bring you down.\r\n","Ellen: You didn't, just worried about you!\n","---------------\n","reference summary: \n","Ken is having some bad days.\n","---------------\n","base flan-t5-base summary:\n","Ken is not doing well today. He is depressed and stressed out. Ellen will be fine. Ken will take care of her. Ellen is worried about her.\n"]}]},{"cell_type":"markdown","source":["# Fine-tune"],"metadata":{"id":"OPhPXIW-E9A-"}},{"cell_type":"code","source":["# We will use the Rouge metric for evaluations. ROUGE, or Recall-Oriented\n","# Understudy for Gisting Evaluation, is a set of metrics and a software package\n","# used for evaluating automatic summarization and machine translation software\n","# in natural language processing. The metrics compare an automatically produced\n","# summary or translation against a reference or a set of references\n","# (human-produced) summary or translation.\n","metric = evaluate.load(\"rouge\")\n","\n","# helper function to postprocess text\n","def postprocess_text(preds, labels):\n","\n","    preds = [pred.strip() for pred in preds]\n","    labels = [label.strip() for label in labels]\n","\n","    # rougeLSum expects newline after each sentence\n","    preds = [\"\\n\".join(sent_tokenize(pred)) for pred in preds]\n","    labels = [\"\\n\".join(sent_tokenize(label)) for label in labels]\n","\n","    return preds, labels\n","\n","def compute_metrics(eval_preds):\n","\n","    preds, labels = eval_preds\n","    if isinstance(preds, tuple):\n","        preds = preds[0]\n","    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)\n","\n","    # Replace -100 in the labels as we can't decode them.\n","    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)\n","    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)\n","\n","    # Some simple post-processing\n","    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)\n","\n","    result = metric.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)\n","    result = {k: round(v * 100, 4) for k, v in result.items()}\n","    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]\n","    result[\"gen_len\"] = np.mean(prediction_lens)\n","    return result"],"metadata":{"id":"DH8uuMkGExTa","executionInfo":{"status":"ok","timestamp":1696512738202,"user_tz":240,"elapsed":1412,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}}},"execution_count":11,"outputs":[]},{"cell_type":"code","source":["# we want to ignore tokenizer pad token in the loss\n","label_pad_token_id = -100\n","\n","# Data collator\n","data_collator = DataCollatorForSeq2Seq(\n","    tokenizer,\n","    model=model,\n","    label_pad_token_id=label_pad_token_id,\n","    pad_to_multiple_of=8\n",")"],"metadata":{"id":"XjNpZy_4FGV0","executionInfo":{"status":"ok","timestamp":1696512738202,"user_tz":240,"elapsed":3,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}}},"execution_count":12,"outputs":[]},{"cell_type":"code","source":["# Define training args\n","training_args = Seq2SeqTrainingArguments(\n","    output_dir=\"flan-t5-samsum\",\n","    per_device_train_batch_size=8,\n","    per_device_eval_batch_size=8,\n","    predict_with_generate=True,\n","    fp16=False, # Overflows with fp16\n","    learning_rate=5e-5,\n","    num_train_epochs=5,\n","    # logging & evaluation strategies\n","    logging_dir=f\"flan-t5-samsum/logs\",\n","    logging_strategy=\"steps\",\n","    logging_steps=500,\n","    evaluation_strategy=\"epoch\",\n","    save_strategy=\"epoch\",\n","    save_total_limit=2,\n","    load_best_model_at_end=True,\n",")\n","\n","# Create Trainer instance\n","trainer = Seq2SeqTrainer(\n","    model=model,\n","    args=training_args,\n","    data_collator=data_collator,\n","    train_dataset=tokenized_dataset[\"train\"],\n","    eval_dataset=tokenized_dataset[\"test\"],\n","    compute_metrics=compute_metrics,\n",")"],"metadata":{"id":"WC_bHKU-FXmM","executionInfo":{"status":"ok","timestamp":1696512738502,"user_tz":240,"elapsed":9,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}}},"execution_count":13,"outputs":[]},{"cell_type":"code","source":["# Start training\n","trainer.train()"],"metadata":{"id":"Vm71hYakGE2I","colab":{"base_uri":"https://localhost:8080/","height":304},"executionInfo":{"status":"ok","timestamp":1696514552249,"user_tz":240,"elapsed":1808169,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"a0b1d3ed-ea02-4659-c58a-c2e882f30424"},"execution_count":14,"outputs":[{"output_type":"stream","name":"stderr","text":["You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='9210' max='9210' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [9210/9210 30:07, Epoch 5/5]\n","    </div>\n","    <table border=\"1\" class=\"dataframe\">\n","  <thead>\n"," <tr style=\"text-align: left;\">\n","      <th>Epoch</th>\n","      <th>Training Loss</th>\n","      <th>Validation Loss</th>\n","      <th>Rouge1</th>\n","      <th>Rouge2</th>\n","      <th>Rougel</th>\n","      <th>Rougelsum</th>\n","      <th>Gen Len</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <td>1</td>\n","      <td>1.817500</td>\n","      <td>1.663650</td>\n","      <td>43.935900</td>\n","      <td>20.084400</td>\n","      <td>34.535800</td>\n","      <td>40.576600</td>\n","      <td>36.269841</td>\n","    </tr>\n","    <tr>\n","      <td>2</td>\n","      <td>1.735000</td>\n","      <td>1.642660</td>\n","      <td>44.049800</td>\n","      <td>20.247700</td>\n","      <td>34.637500</td>\n","      <td>40.495400</td>\n","      <td>36.081807</td>\n","    </tr>\n","    <tr>\n","      <td>3</td>\n","      <td>1.696300</td>\n","      <td>1.634848</td>\n","      <td>44.828500</td>\n","      <td>21.007400</td>\n","      <td>35.559900</td>\n","      <td>41.364500</td>\n","      <td>36.521368</td>\n","    </tr>\n","    <tr>\n","      <td>4</td>\n","      <td>1.664400</td>\n","      <td>1.633715</td>\n","      <td>44.564500</td>\n","      <td>20.676000</td>\n","      <td>35.222300</td>\n","      <td>41.063300</td>\n","      <td>36.463980</td>\n","    </tr>\n","    <tr>\n","      <td>5</td>\n","      <td>1.637100</td>\n","      <td>1.632018</td>\n","      <td>44.802800</td>\n","      <td>20.714300</td>\n","      <td>35.410700</td>\n","      <td>41.338800</td>\n","      <td>36.553114</td>\n","    </tr>\n","  </tbody>\n","</table><p>"]},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["TrainOutput(global_step=9210, training_loss=1.7139073905157862, metrics={'train_runtime': 1807.9254, 'train_samples_per_second': 40.743, 'train_steps_per_second': 5.094, 'total_flos': 1.369269457649664e+16, 'train_loss': 1.7139073905157862, 'epoch': 5.0})"]},"metadata":{},"execution_count":14}]},{"cell_type":"markdown","source":[" # Evaluate"],"metadata":{"id":"4gjBQxRwGQH2"}},{"cell_type":"code","source":["# Evaluate using the same trainer.\n","trainer.evaluate()"],"metadata":{"id":"rkInDV7wGWiw","colab":{"base_uri":"https://localhost:8080/","height":211},"executionInfo":{"status":"ok","timestamp":1696514696461,"user_tz":240,"elapsed":113271,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"9551b4de-2c65-42aa-e4ec-ee6433004db2"},"execution_count":15,"outputs":[{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["\n","    <div>\n","      \n","      <progress value='103' max='103' style='width:300px; height:20px; vertical-align: middle;'></progress>\n","      [103/103 01:50]\n","    </div>\n","    "]},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["{'eval_loss': 1.6320182085037231,\n"," 'eval_rouge1': 44.8028,\n"," 'eval_rouge2': 20.7143,\n"," 'eval_rougeL': 35.4107,\n"," 'eval_rougeLsum': 41.3388,\n"," 'eval_gen_len': 36.553113553113555,\n"," 'eval_runtime': 113.1792,\n"," 'eval_samples_per_second': 7.236,\n"," 'eval_steps_per_second': 0.91,\n"," 'epoch': 5.0}"]},"metadata":{},"execution_count":15}]},{"cell_type":"markdown","source":["# Try it out!"],"metadata":{"id":"l_k7ZVoXG59v"}},{"cell_type":"code","source":["model_base = AutoModelForSeq2SeqLM.from_pretrained(model_id)"],"metadata":{"id":"XbHzIiyjLKaJ","executionInfo":{"status":"ok","timestamp":1696515011801,"user_tz":240,"elapsed":948,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}}},"execution_count":25,"outputs":[]},{"cell_type":"code","source":["# load model and tokenizer from with pipeline\n","finetuned_summarizer = pipeline(\"summarization\", model=model, tokenizer=tokenizer, device=0)\n","base_summarizer = pipeline(\"summarization\", model=model_base, tokenizer=tokenizer, device=0)\n","\n","# select a random test sample\n","sample = dataset['test'][randrange(len(dataset[\"test\"]))]\n","print(f\"dialogue: \\n{sample['dialogue']}\\n---------------\")\n","print(f\"reference summary: \\n{sample['summary']}\\n---------------\")\n","\n","# summarize dialogue\n","base_result = base_summarizer(sample[\"dialogue\"])\n","finetuned_result = finetuned_summarizer(sample[\"dialogue\"])\n","\n","print(f\"base model summary: \\n{base_result[0]['summary_text']}\\n---------------\")\n","print(f\"finetuned model summary:\\n{finetuned_result[0]['summary_text']}\")"],"metadata":{"id":"PunLv5Q-G7Jv","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1696515094699,"user_tz":240,"elapsed":1054,"user":{"displayName":"Daniel Whitenack","userId":"15195746376658990804"}},"outputId":"95d0c033-5bbb-45d6-fdee-e7d469f4b4f7"},"execution_count":30,"outputs":[{"output_type":"stream","name":"stderr","text":["Your max_length is set to 200, but your input_length is only 95. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=47)\n"]},{"output_type":"stream","name":"stdout","text":["dialogue: \n","Tomas: Has anybody received the grant yet?\r\n","Sierra: no, not yet\r\n","Jeremy: I haven't checked even\r\n","Tomas: I'm completely broke\r\n","Tomas: checking my bank account every hour\r\n","Tomas: but nothing's happening there\r\n","Sierra: lol\r\n","Sierra: be patient. If you need money I can lend you some, don't worry\r\n","Tomas: Thanks, I hope they'll arrive any minute\n","---------------\n","reference summary: \n","Tomas, Sierra and Jeremy have still not received the grant. Tomas is broke and is checking his bank account every hour. Sierra offers to lend him some money.\n","---------------\n"]},{"output_type":"stream","name":"stderr","text":["Your max_length is set to 200, but your input_length is only 95. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=47)\n"]},{"output_type":"stream","name":"stdout","text":["base model summary: \n","Tomas has received the grant. Jeremy hasn't checked yet. He's checking his bank account every hour. He will lend him some money if he needs it.\n","---------------\n","finetuned model summary:\n","Sierra hasn't received the grant yet. Tomas is broke. Sierra will lend him some money if he needs it. \n"]}]},{"cell_type":"code","source":[],"metadata":{"id":"hZLoseq4KQnR"},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------
/ai-eng-prompt-engineering.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyPfB7Zgsi0rfUg/ln+wucfP"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["As we have seen in the previous examples, it is easy enough to prompt a generative AI model. Shoot off an API call, and suddently you have an answer, a machine translation, sentiment analyzed, or a chat message generated. However, going from \"prompting\" to **ai engineering** of your AI model based processes is a bit more involved. The importance of the \"engineering\" in prompt engineering has become increasingly apparent, as models have become more complex and powerful, and the demand for more accurate and interpretable results has grown.\n","\n","The ability to engineer effective prompts and related workflows allows us to configure and tune model responses to better suit our specific needs (e.g., for a particular industry like healthcare), whether we are trying to improve the quality of the output, reduce bias, or optimize for efficiency."],"metadata":{"id":"RCsog1OfZjeC"}},{"cell_type":"markdown","source":["# Dependencies and imports"],"metadata":{"id":"WUhCP56_Zm7S"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"vgp13t_g6SPk"},"outputs":[],"source":["! pip install predictionguard langchain"]},{"cell_type":"code","source":["import os\n","import json\n","\n","import predictionguard as pg\n","from langchain import PromptTemplate\n","from langchain import PromptTemplate, FewShotPromptTemplate\n","import numpy as np\n","from getpass import getpass"],"metadata":{"id":"FbbtCowOPNEM"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["pg_access_token = getpass('Enter your Prediction Guard access token: ')\n","os.environ['PREDICTIONGUARD_TOKEN'] = pg_access_token"],"metadata":{"id":"uekOso_tPY8h"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Prompt Templates"],"metadata":{"id":"nQa7oxnrQJaG"}},{"cell_type":"markdown","source":["One of the best practices that we will discuss below involves testing and evaluating model output using example prompt contexts and formulations. In order to institute this practice, we need a way to rapidly and programmatically format prompts with a variety of contexts. We will need this in our applications anyway, because in production we will be receiving dynamic input from the user or another application. That dynamic input (or something extracted from it) will be inserted into our prompts on-the-fly. We already saw in the last notebook a prompt that included a bunch of boilerplate:"],"metadata":{"id":"wx_4V15vZ3jx"}},{"cell_type":"markdown","source":["## Zero shot Q&A"],"metadata":{"id":"ln87IJ2MQW7I"}},{"cell_type":"code","source":["template = \"\"\"### Instruction:\n","Read the context below and respond with an answer to the question. If the question cannot be answered based on the context alone or the context does not explicitly say the answer to the question, write \"Sorry I had trouble answering this question, based on the information I found.\"\n","\n","### Input:\n","Context: {context}\n","\n","Question: {question}\n","\n","### Response:\n","\"\"\"\n","\n","prompt = PromptTemplate(\n","    input_variables=[\"context\", \"question\"],\n","    template=template,\n",")"],"metadata":{"id":"uDCv4-2vPnai"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["context = \"Domino's gift cards are great for any person and any occasion. There are a number of different options to choose from. Each comes with a personalized card carrier and is delivered via US Mail.\"\n","\n","question = \"How are gift cards delivered?\"\n","\n","myprompt = prompt.format(context=context, question=question)\n","print(myprompt)"],"metadata":{"id":"zR4a7J-vQOvx"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Few Shot - Sentiment"],"metadata":{"id":"icmPu-1wQYsS"}},{"cell_type":"markdown","source":["This kind of prompt template could in theory be flexible to create zero shot or few shot prompts. However, LangChain provides a bit more convenience for few shot prompts. We can first create a template for individual demonstrations within the few shot prompt:"],"metadata":{"id":"pkxo3WElaEFy"}},{"cell_type":"code","source":["# Create a string formatter for sentiment analysis demonstrations.\n","demo_formatter_template = \"\"\"\n","Text: {text}\n","Sentiment: {sentiment}\n","\"\"\"\n","\n","# Define a prompt template for the demonstrations.\n","demo_prompt = PromptTemplate(\n","    input_variables=[\"text\", \"sentiment\"],\n","    template=demo_formatter_template,\n",")"],"metadata":{"id":"OFzSkr9iQREn"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Each row here includes:\n","# 1. an example text input (that we want to analyze for sentiment)\n","# 2. an example sentiment output (NEU, NEG, POS)\n","few_examples = [\n","    ['The flight was exceptional.', 'POS'],\n","    ['That pilot is adorable.', 'POS'],\n","    ['This was an awful seat.', 'NEG'],\n","    ['This pilot was brilliant.', 'POS'],\n","    ['I saw the aircraft.', 'NEU'],\n","    ['That food was exceptional.', 'POS'],\n","    ['That was a private aircraft.', 'NEU'],\n","    ['This is an unhappy pilot.', 'NEG'],\n","    ['The staff is rough.', 'NEG'],\n","    ['This staff is Australian.', 'NEU']\n","]\n","examples = []\n","for ex in few_examples:\n","  examples.append({\n","      \"text\": ex[0],\n","      \"sentiment\": ex[1]\n","  })"],"metadata":{"id":"FFIr_kHSQez3"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["few_shot_prompt = FewShotPromptTemplate(\n","\n","    # This is the demonstration data we want to insert into the prompt.\n","    examples=examples,\n","    example_prompt=demo_prompt,\n","    example_separator=\"\",\n","\n","    # This is the boilerplate portion of the prompt corresponding to\n","    # the prompt task instructions.\n","    prefix=\"Classify the sentiment of the text. Use the label NEU for neutral sentiment, NEG for negative sentiment, and POS for positive sentiment.\\n\",\n","\n","    # The suffix of the prompt is where we will put the output indicator\n","    # and define where the \"on-the-fly\" user input would go.\n","    suffix=\"\\nText: {input}\\nSentiment:\",\n","    input_variables=[\"input\"],\n",")\n","\n","myprompt = few_shot_prompt.format(input=\"The flight is boring.\")\n","print(myprompt)"],"metadata":{"id":"Edbb1OogQinc"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Few Shot - Text Classification"],"metadata":{"id":"_f8H6HdUQzG-"}},{"cell_type":"code","source":["demo_formatter_template = \"\"\"\\nText: {text}\n","Categories: {categories}\n","Class: {class}\\n\"\"\"\n","\n","# Define a prompt template for the demonstrations.\n","demo_prompt = PromptTemplate(\n","    input_variables=[\"text\", \"categories\", \"class\"],\n","    template=demo_formatter_template,\n",")\n","\n","# Each row here includes:\n","# 1. an example set of categories for the text classification\n","# 2. an example text that we want to classify\n","# 3. an example label that we expect as the output\n","few_examples = [\n","    [\"I have successfully booked your tickets.\", \"agent, customer\", \"agent\"],\n","    [\"What's the oldest building in US?\", \"quantity, location\", \"location\"],\n","    [\"This video game is amazing. I love it!\", \"positive, negative\", \"\"],\n","    [\"Dune is the best movie ever.\", \"cinema, art, music\", \"cinema\"]\n","]\n","examples = []\n","for ex in few_examples:\n","  examples.append({\n","      \"text\": ex[0],\n","      \"categories\": ex[1],\n","      \"class\": ex[2]\n","  })\n","\n","few_shot_prompt = FewShotPromptTemplate(\n","\n","    # This is the demonstration data we want to insert into the prompt.\n","    examples=examples,\n","    example_prompt=demo_prompt,\n","    example_separator=\"\",\n","\n","    # This is the boilerplate portion of the prompt corresponding to\n","    # the prompt task instructions.\n","    prefix=\"Classify the following texts into one of the given categories. Only output one of the provided categories for the class corresponding to each text.\",\n","\n","    # The suffix of the prompt is where we will put the output indicator\n","    # and define where the \"on-the-fly\" user input would go.\n","    suffix=\"\\nText: {text}\\nCategories: {categories}\\n\",\n","    input_variables=[\"text\", \"categories\"],\n",")\n","\n","myprompt = few_shot_prompt.format(\n","    text=\"I have a problem with my iphone that needs to be resolved asap!\",\n","    categories=\"urgent, not urgent\")\n","print(myprompt)"],"metadata":{"id":"0m_Xo7F4QmUA"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["pg.Completion.create(model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=myprompt\n",")['choices'][0]['text']"],"metadata":{"id":"SawFqRg6Q25L"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Parameters"],"metadata":{"id":"LnqGMvSlS8jc"}},{"cell_type":"markdown","source":["Although we have most sent a single text prompt the models to get a response. There is configurability via parameters such as `temperature` and `max_tokens`. Optimizing model parameters can help us achieve a desired output."],"metadata":{"id":"CTGEyW24aYQS"}},{"cell_type":"markdown","source":["## Temperature"],"metadata":{"id":"UKdT9L62O0HL"}},{"cell_type":"code","source":["for temp in np.arange(0.1, 2.0, 0.4):\n","  print(\"\\nTemperature: \", temp)\n","  print(\"----------------------------\")\n","  for i in range(0,3):\n","    completion = pg.Completion.create(\n","        model=\"Camel-5B\",\n","        prompt=\"A great name for a unknown wizard (other than Gandalf and Radagast) from the Lord of the Rings universe is \",\n","        temperature=temp,\n","        max_tokens=20\n","    )['choices'][0]['text'].strip()\n","    print(completion)"],"metadata":{"id":"Jk1CssKiPG2v"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["## Max Tokens"],"metadata":{"id":"Yi9opA-WO2H2"}},{"cell_type":"code","source":["for tokens in range(30, 200, 80):\n","  print(\"\\nMax Tokens: \", tokens)\n","  print(\"----------------------------\")\n","  completion = pg.Completion.create(\n","    \tmodel=\"Camel-5B\",\n","    \tprompt=\"Merothooda the White Diviner is a great wizard from the Lord of the Rings. Many stories are told about her. For example, some say\",\n","    \ttemperature=0.8,\n","    \tmax_tokens=tokens\n","\t)['choices'][0]['text'].strip()\n","  print(completion)"],"metadata":{"id":"sFD32z3JO5Mz"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Multiple formulations"],"metadata":{"id":"luY4EKZhTbEB"}},{"cell_type":"markdown","source":["Why settle for a single prompt and/or set of parameters when you can use mutliple. Try using multiple formulations of your prompt to either:\n","\n","1. Provide multiple options to users; or\n","2. Create multiple candidate predictions, which you can choose from programmatically using a reference free evaluation of those candidates."],"metadata":{"id":"2AQHZv0wa_yr"}},{"cell_type":"code","source":["template1 = \"\"\"### Instruction:\n","Read the context below and respond with an answer to the question. If the question cannot be answered based on the context alone or the context does not explicitly say the answer to the question, write \"Sorry I had trouble answering this question, based on the information I found.\"\n","\n","### Input:\n","Context: {context}\n","\n","Question: {question}\n","\n","### Response:\n","\"\"\"\n","\n","prompt1 = PromptTemplate(\n","\tinput_variables=[\"context\", \"question\"],\n","\ttemplate=template1,\n",")\n","\n","template2 = \"\"\"### Instruction:\n","Answer the question below based on the given context. If the answer is unclear, output: \"Sorry I had trouble answering this question, based on the information I found.\"\n","\n","### Input:\n","Context: {context}\n","Question: {question}\n","\n","### Response:\n","\"\"\"\n","\n","prompt2 = PromptTemplate(\n","\tinput_variables=[\"context\", \"question\"],\n","\ttemplate=template2,\n",")"],"metadata":{"id":"aw6S50morMHi"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["context = \"Domino's gift cards are great for any person and any occasion. There are a number of different options to choose from. Each comes with a personalized card carrier and is delivered via US Mail.\"\n","question = \"How are gift cards delivered?\"\n","\n","completions = pg.Completion.create(\n","    \tmodel=\"Nous-Hermes-Llama2-13B\",\n","    \tprompt=[\n","        \tprompt1.format(context=context, question=question),\n","        \tprompt2.format(context=context, question=question)\n","    \t],\n","    \ttemperature=0.5\n","\t)\n","\n","for i in [0,1]:\n","  print(\"Answer\", str(i+1) + \": \", completions['choices'][i]['text'].strip())"],"metadata":{"id":"6OHvYDgGrRF2"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Type checking, output formatting, validation"],"metadata":{"id":"LJiOngWzTES0"}},{"cell_type":"markdown","source":["Reliability and consistency in LLM output is a major problem for the \"last mile\" of LLM integrations. You could get a whole variety of outputs from your model in a variety of formats. An increasing number of tools, including [Prediction Guard](https://www.predictionguard.com/), allow you to force a certain task structure, validation of outputs or output type checking on your inferences. Other examples of packages or tools that help \"guide\" or \"guard\" outputs include [Guardrails](https://shreyar.github.io/guardrails/), [guidance](), and the [Language Model Query Language](https://lmql.ai/)."],"metadata":{"id":"1minS7whbW57"}},{"cell_type":"code","source":["pg.Completion.create(model=\"WizardCoder\",\n","    prompt=\"\"\"### Instruction:\n","Respond with a sentiment label for the input text below. Use the label NEU for neutral sentiment, NEG for negative sentiment, and POS for positive sentiment.\n","\n","### Input:\n","This workshop is spectacular. I love it! So wonderful.\n","\n","### Response:\n","\"\"\",\n","    output={\n","        \"type\": \"categorical\",\n","        \"categories\": [\"POS\", \"NEU\", \"NEG\"]\n","    }\n",")"],"metadata":{"id":"txi_b4aJcCwb"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Consistency (self-consistency)"],"metadata":{"id":"NDHAxj-XlTWv"}},{"cell_type":"code","source":["pg.Completion.create(model=\"WizardCoder\",\n","    prompt=\"\"\"### Instruction:\n","Respond with a sentiment label for the input text below. Use the label NEU for neutral sentiment, NEG for negative sentiment, and POS for positive sentiment.\n","\n","### Input:\n","This workshop is spectacular. I love it! So wonderful.\n","\n","### Response:\n","\"\"\",\n","    output={\n","        \"type\": \"categorical\",\n","        \"categories\": [\"POS\", \"NEU\", \"NEG\"],\n","        \"consistency\": True\n","    }\n",")"],"metadata":{"id":"bZwqa1OajvO7"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["pg.Completion.create(model=\"WizardCoder\",\n","    prompt=\"\"\"### Instruction:\n","Respond with a sentiment label for the input text below.\n","\n","### Input:\n","This workshop is spectacular. I love it! So wonderful.\n","\n","### Response:\n","\"\"\",\n","    output={\n","        \"type\": \"categorical\",\n","        \"categories\": [\"dog\", \"cat\", \"bird\"],\n","        \"consistency\": True\n","    }\n",")"],"metadata":{"id":"dijfebTWF22i"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Factuality"],"metadata":{"id":"zW5sHYNBGTd5"}},{"cell_type":"code","source":["template = \"\"\"### Instruction:\n","Read the context below and respond with an answer to the question.\n","\n","### Input:\n","Context: {context}\n","\n","Question: {question}\n","\n","### Response:\n","\"\"\"\n","\n","prompt = PromptTemplate(\n","\tinput_variables=[\"context\", \"question\"],\n","\ttemplate=template,\n",")"],"metadata":{"id":"FS8WSN9mGSq8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["context = \"California is a state in the Western United States. With over 38.9 million residents across a total area of approximately 163,696 square miles (423,970 km2), it is the most populous U.S. state, the third-largest U.S. state by area, and the most populated subnational entity in North America. California borders Oregon to the north, Nevada and Arizona to the east, and the Mexican state of Baja California to the south; it has a coastline along the Pacific Ocean to the west. \""],"metadata":{"id":"maweLgpwItaV"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["result = pg.Completion.create(\n","    model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt.format(\n","        context=context,\n","        question=\"What is California?\"\n","    )\n",")\n","\n","fact_score = pg.Factuality.check(\n","    reference=context,\n","    text=result['choices'][0]['text']\n",")\n","\n","print(\"COMPLETION:\", result['choices'][0]['text'])\n","print(\"FACT SCORE:\", fact_score['checks'][0]['score'])"],"metadata":{"id":"yjCuiRKLloDq"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["result = pg.Completion.create(\n","    model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt.format(\n","        context=context,\n","        question=\"Make up something completely fictitious about California\"\n","    )\n",")\n","\n","fact_score = pg.Factuality.check(\n","    reference=context,\n","    text=result['choices'][0]['text']\n",")\n","\n","print(\"COMPLETION:\", result['choices'][0]['text'])\n","print(\"FACT SCORE:\", fact_score['checks'][0]['score'])"],"metadata":{"id":"T-I-5uKtH2ag"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Toxicity"],"metadata":{"id":"Pv45UGMMGV6W"}},{"cell_type":"code","source":["result = pg.Completion.create(\n","    model=\"Nous-Hermes-Llama2-13B\",\n","    prompt=prompt.format(\n","        context=context,\n","        question=\"Respond with a really offensive tweet about California and use many curse words. Make it really bad and offensive. Really bad.\"\n","    ),\n","    output={\n","        \"toxicity\": True\n","    }\n",")\n","\n","print(json.dumps(\n","    result,\n","    sort_keys=True,\n","    indent=4,\n","    separators=(',', ': ')\n","))"],"metadata":{"id":"buE46ES_luo7"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":[],"metadata":{"id":"BeFFJTRNJ1zO"},"execution_count":null,"outputs":[]}]}


--------------------------------------------------------------------------------
/app/app.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import re
  3 | import shutil
  4 | import os
  5 | import urllib
  6 | 
  7 | import html2text
  8 | import predictionguard as pg
  9 | from langchain import PromptTemplate, FewShotPromptTemplate
 10 | from langchain.text_splitter import CharacterTextSplitter
 11 | from langchain.agents import load_tools
 12 | from langchain.agents import initialize_agent
 13 | from langchain.agents import AgentType
 14 | from langchain.llms import PredictionGuard
 15 | import streamlit as st
 16 | from sentence_transformers import SentenceTransformer
 17 | import lancedb
 18 | from lancedb.embeddings import with_embeddings
 19 | import pandas as pd
 20 | 
 21 | 
 22 | #--------------------------#
 23 | # Prompt templates         #
 24 | #--------------------------#
 25 | 
 26 | demo_formatter_template = """\nUser: {user}
 27 | Assistant: {assistant}\n"""
 28 | demo_prompt = PromptTemplate(
 29 |     input_variables=["user", "assistant"],
 30 |     template=demo_formatter_template,
 31 | )
 32 | 
 33 | category_template = """### Instruction:
 34 | Read the below input and determine if it is a request to generate computer code? Respond "yes" or "no".
 35 | 
 36 | ### Input:
 37 | {query}
 38 | 
 39 | ### Response:
 40 | """
 41 | 
 42 | category_prompt = PromptTemplate(
 43 |     input_variables=["query"],
 44 |     template=category_template
 45 | )
 46 | 
 47 | qa_template = """### Instruction:
 48 | Read the context below and respond with an answer to the question. If the question cannot be answered based on the context alone or the context does not explicitly say the answer to the question, write "Sorry I had trouble answering this question, based on the information I found."
 49 | 
 50 | ### Input:
 51 | Context: {context}
 52 | 
 53 | Question: {query}
 54 | 
 55 | ### Response:
 56 | """
 57 | 
 58 | qa_prompt = PromptTemplate(
 59 |     input_variables=["context", "query"],
 60 |     template=qa_template
 61 | )
 62 | 
 63 | chat_template = """### Instruction:
 64 | You are a friendly and clever AI assistant. Respond to the latest human message in the input conversation below.
 65 | 
 66 | ### Input:
 67 | {context}
 68 | Human: {query}
 69 | AI:
 70 | 
 71 | ### Response:
 72 | """
 73 | 
 74 | chat_prompt = PromptTemplate(
 75 |     input_variables=["context", "query"],
 76 |     template=chat_template
 77 | )
 78 | 
 79 | code_template = """### Instruction:
 80 | You are a code generation assistant. Respond with a code snippet and any explanation requested in the below input.
 81 | 
 82 | ### Input:
 83 | {query}
 84 | 
 85 | ### Response:
 86 | """
 87 | 
 88 | code_prompt = PromptTemplate(
 89 |     input_variables=["query"],
 90 |     template=code_template
 91 | )
 92 | 
 93 | 
 94 | #-------------------------#
 95 | #    Vector search        #
 96 | #-------------------------#
 97 | 
 98 | # Embeddings setup
 99 | name="all-MiniLM-L12-v2"
100 | model = SentenceTransformer(name)
101 | 
102 | def embed_batch(batch):
103 |     return [model.encode(sentence) for sentence in batch]
104 | 
105 | def embed(sentence):
106 |     return model.encode(sentence)
107 | 
108 | # LanceDB setup
109 | if os.path.exists(".lancedb"):
110 |     shutil.rmtree(".lancedb")
111 | os.mkdir(".lancedb")
112 | uri = ".lancedb"
113 | db = lancedb.connect(uri)
114 | 
115 | def vector_search_urls(urls, query, sessionid):
116 | 
117 |     for url in urls:
118 | 
119 |         # Let's get the html off of a website.
120 |         fp = urllib.request.urlopen(url)
121 |         mybytes = fp.read()
122 |         html = mybytes.decode("utf8")
123 |         fp.close()
124 | 
125 |         # And convert it to text.
126 |         h = html2text.HTML2Text()
127 |         h.ignore_links = True
128 |         text = h.handle(html)
129 | 
130 |         # Chunk the text into smaller pieces for injection into LLM prompts.
131 |         text_splitter = CharacterTextSplitter(chunk_size=700, chunk_overlap=50)
132 |         docs = text_splitter.split_text(text)
133 |         docs = [x.replace('#', '-') for x in docs]
134 | 
135 |         # Create a dataframe with the chunk ids and chunks
136 |         metadata = []
137 |         for i in range(len(docs)):
138 |             metadata.append([
139 |                 i,
140 |                 docs[i],
141 |                 url
142 |             ])
143 |         doc_df = pd.DataFrame(metadata, columns=["chunk", "text", "url"])
144 |         
145 |         # Embed the documents
146 |         data = with_embeddings(embed_batch, doc_df)
147 | 
148 |         # Create the table if there isn't one.
149 |         if sessionid not in db.table_names():
150 |             db.create_table(sessionid, data=data)
151 |         else:
152 |             table = db.open_table(sessionid)
153 |             table.add(data=data)
154 | 
155 |     # Perform the query
156 |     table = db.open_table(sessionid)
157 |     results = table.search(embed(query)).limit(1).to_df()
158 |     results = results[results['_distance'] < 1.0]
159 |     if len(results) == 0:
160 |         doc_use = ""
161 |     else:
162 |         doc_use = results['text'].values[0]
163 | 
164 |     # Clean up
165 |     db.drop_table(sessionid)
166 | 
167 |     return doc_use
168 | 
169 | #-------------------------#
170 | #     Info Agent          #
171 | #-------------------------#
172 | 
173 | tools = load_tools(["serpapi"], llm=PredictionGuard(model="Nous-Hermes-Llama2-13B"))
174 | agent = initialize_agent(
175 |     tools, 
176 |     PredictionGuard(model="Nous-Hermes-Llama2-13B"),
177 |     agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, 
178 |     verbose=True,
179 |     max_execution_time=30)
180 | 
181 | #-------------------------#
182 | #   Helper functions      #
183 | #-------------------------#
184 | 
185 | def find_urls(text):
186 |     return re.findall(r'(https?://[^\s]+)', text)
187 | 
188 | # QuestionID provides some help in determining if a sentence is a question.
189 | class QuestionID:
190 |     """
191 |         QuestionID has the actual logic used to determine if sentence is a question
192 |     """
193 |     def padCharacter(self, character: str, sentence: str):
194 |         if character in sentence:
195 |             position = sentence.index(character)
196 |             if position > 0 and position < len(sentence):
197 | 
198 |                 # Check for existing white space before the special character.
199 |                 if (sentence[position - 1]) != " ":
200 |                     sentence = sentence.replace(character, (" " + character))
201 | 
202 |         return sentence
203 | 
204 |     def predict(self, sentence: str):
205 |         questionStarters = [
206 |             "which", "wont", "cant", "isnt", "arent", "is", "do", "does",
207 |             "will", "can"
208 |         ]
209 |         questionElements = [
210 |             "who", "what", "when", "where", "why", "how", "sup", "?"
211 |         ]
212 | 
213 |         sentence = sentence.lower()
214 |         sentence = sentence.replace("\'", "")
215 |         sentence = self.padCharacter('?', sentence)
216 |         splitWords = sentence.split()
217 | 
218 |         if any(word == splitWords[0] for word in questionStarters) or any(
219 |                 word in splitWords for word in questionElements):
220 |             return True
221 |         else:
222 |             return False
223 | 
224 | 
225 | #---------------------#
226 | # Streamlit config    #
227 | #---------------------#
228 | 
229 | #st.set_page_config(layout="wide")
230 | 
231 | # Hide the hamburger menu
232 | hide_streamlit_style = """
233 | <style>
234 | #MainMenu {visibility: hidden;}
235 | footer {visibility: hidden;}
236 | </style>
237 | """
238 | st.markdown(hide_streamlit_style, unsafe_allow_html=True)
239 | 
240 | 
241 | #--------------------------#
242 | # Streamlit sidebar        #
243 | #--------------------------#
244 | 
245 | st.sidebar.title("Super Chat 🚀")
246 | st.sidebar.markdown(
247 |     "This app provides a chat interface driven by various generative AI models and "
248 |     "augmented (via information retrieval and agentic processing)."
249 | )
250 | url_text = st.sidebar.text_area(
251 |     "Enter one or more urls for reference information (separated by a comma):", 
252 |     "", height=100)
253 | if len(url_text) > 0:
254 |     urls = url_text.split(",")
255 | else:
256 |     urls = []
257 | 
258 | 
259 | #--------------------------#
260 | # Streamlit app            #
261 | #--------------------------#
262 | 
263 | if "messages" not in st.session_state:
264 |     st.session_state.messages = []
265 | 
266 | for message in st.session_state.messages:
267 |     with st.chat_message(message["role"]):
268 |         st.markdown(message["content"])
269 | 
270 | if prompt := st.chat_input("Hello?"):
271 |     st.session_state.messages.append({"role": "user", "content": prompt})
272 |     with st.chat_message("user"):
273 |         st.markdown(prompt)
274 | 
275 |     with st.chat_message("assistant"):
276 |         message_placeholder = st.empty()
277 |         full_response = ""
278 | 
279 |         # process the context
280 |         examples = []
281 |         turn = "user"
282 |         example = {}
283 |         for m in st.session_state.messages:
284 |             latest_message = m["content"]
285 |             example[turn] = m["content"]
286 |             if turn == "user":
287 |                 turn = "assistant"
288 |             else:
289 |                 turn = "user"
290 |                 examples.append(example)
291 |                 example = {}
292 |         if len(example) > 4:
293 |             examples = examples[-4:]
294 | 
295 |         # Determine what kind of message this is.
296 |         with st.spinner("Trying to figure out what you are wanting..."):
297 |             result = pg.Completion.create(
298 |                 model="WizardCoder",
299 |                 prompt=category_prompt.format(query=latest_message),
300 |                 output={
301 |                     "type": "categorical",
302 |                     "categories": ["yes", "no"]
303 |                 }
304 |             )
305 | 
306 |         # configure out chain
307 |         code = result['choices'][0]['output']
308 |         qIDModel = QuestionID()
309 |         question = qIDModel.predict(latest_message)
310 | 
311 |         if code == "no" and question:
312 | 
313 |             # if there are urls, let's embed them as a primary data source.
314 |             if len(urls) > 0:
315 |                 with st.spinner("Performing vector search..."):
316 |                     info_context = vector_search_urls(urls, latest_message, "assistant")
317 |             else:
318 |                 info_context = ""
319 | 
320 |             # Handle the informational request.
321 |             if info_context != "":
322 |                 with st.spinner("Generating a RAG result..."):
323 |                     result = pg.Completion.create(
324 |                         model="Nous-Hermes-Llama2-13B",
325 |                         prompt=qa_prompt.format(context=info_context, query=latest_message)
326 |                     )
327 |                     completion = result['choices'][0]['text'].split('#')[0].strip()
328 |             
329 |             # Otherwise try an agentic approach.
330 |             else:
331 |                 with st.spinner("Trying to find an answer with an agent..."):
332 |                     try:
333 |                         completion = agent.run(latest_message)
334 |                     except:
335 |                         completion = "Sorry, I didn't find an answer. Could you rephrase the question?" 
336 |                     if "Agent stopped" in completion:
337 |                         completion = "Sorry, I didn't find an answer. Could you rephrase the question?"
338 | 
339 |         elif code == "yes":
340 | 
341 |             # Handle the code generation request.
342 |             with st.spinner("Generating code..."):
343 |                 result = pg.Completion.create(
344 |                     model="WizardCoder",
345 |                     prompt=code_prompt.format(query=latest_message),
346 |                     max_tokens=500
347 |                 )
348 |                 completion = result['choices'][0]['text']
349 | 
350 |         else:
351 | 
352 |             # contruct prompt
353 |             few_shot_prompt = FewShotPromptTemplate(
354 |                 examples=examples,
355 |                 example_prompt=demo_prompt,
356 |                 example_separator="",
357 |                 prefix="The following is a conversation between an AI assistant and a human user. The assistant is helpful, creative, clever, and very friendly.\n",
358 |                 suffix="\nHuman: {human}\nAssistant: ",
359 |                 input_variables=["human"],
360 |             )
361 | 
362 |             prompt = few_shot_prompt.format(human=latest_message)
363 | 
364 |             # generate response
365 |             with st.spinner("Generating chat response..."):
366 |                 result = pg.Completion.create(
367 |                     model="Nous-Hermes-Llama2-13B",
368 |                     prompt=prompt,
369 |                 )
370 |                 completion = result['choices'][0]['text']
371 | 
372 |         # Print out the response.
373 |         completion = completion.split("Human:")[0].strip()
374 |         completion = completion.split("H:")[0].strip()
375 |         completion = completion.split('#')[0].strip()
376 |         for token in completion.split(" "):
377 |             full_response += " " + token
378 |             message_placeholder.markdown(full_response + "▌")
379 |             time.sleep(0.075)
380 |         message_placeholder.markdown(full_response)
381 | 
382 |     st.session_state.messages.append({"role": "assistant", "content": full_response})


--------------------------------------------------------------------------------