├── .devcontainer ├── Dockerfile ├── devcontainer.json └── requirements.txt ├── .gitignore ├── .python-version ├── LICENSE ├── README.md ├── images ├── architecture.jpg └── launch.jpg ├── notebooks ├── genai_colab_lab1and2.ipynb ├── genai_colab_lab_3.ipynb └── genai_colab_lab_4.ipynb └── streamlit ├── .streamlit └── config.toml ├── app.py └── images ├── background-dark2.jpeg ├── elastic_logo_transp_100.png └── sidebar2-dark.png /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | # Based on the latest datascience-notebook as of 4/30/21 2 | ARG STARTER_IMAGE=mcr.microsoft.com/devcontainers/universal:2 3 | FROM ${STARTER_IMAGE} 4 | 5 | RUN 6 | 7 | # Add in your own requirements below. 8 | # For best maintainability use a specific version. 9 | COPY requirements.txt . 10 | RUN pip install --upgrade pip 11 | RUN pip install -r requirements.txt -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | "dockerfile": "Dockerfile" 4 | }, 5 | "hostRequirements": { 6 | "cpus": 4 7 | }, 8 | "waitFor": "onCreateCommand", 9 | "postAttachCommand": { 10 | "readme": "code README.md" 11 | }, 12 | "customizations": { 13 | "codespaces": { 14 | "openFiles": [] 15 | }, 16 | "vscode": { 17 | "extensions": [ 18 | "ms-toolsai.jupyter", 19 | "ms-python.python" 20 | ], 21 | "settings": { 22 | "workbench.editorAssociations": { 23 | "*.md": "vscode.markdown.preview.editor" 24 | } 25 | } 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /.devcontainer/requirements.txt: -------------------------------------------------------------------------------- 1 | ipywidgets==8.1.1 2 | matplotlib==3.8.1 3 | numpy==1.23.5 4 | pandas==1.5.3 5 | torch==2.1.0 6 | torchvision==0.16.0 7 | tqdm==4.64.0 8 | streamlit==1.30.0 9 | #Add Custom worlshop packages below: 10 | openai==1.3.9 11 | elasticsearch==8.12.0 12 | eland==8.11.1 13 | transformers==4.35.0 14 | sentence_transformers==2.2.2 15 | python-dotenv==1.0.0 16 | elastic-apm==6.20.0 17 | inquirer==3.2.1 18 | sentencepiece==0.1.99 19 | tiktoken==0.5.2 20 | cohere==4.38 21 | elastic-apm==6.20.0 22 | langchain==0.1.3 23 | beautifulsoup4==4.11.2 24 | scikit-learn==1.2.2 25 | scipy==1.11.4 26 | elasticsearch-llm-cache==0.9.5 27 | git+https://github.com/elastic/notebook-workshop-loader.git@main -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | llm_download_cache 2 | env 3 | .DS_Store 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # pdm 109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 110 | #pdm.lock 111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 112 | # in version control. 113 | # https://pdm.fming.dev/#use-with-ide 114 | .pdm.toml 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | env/ 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | 153 | # pytype static type analyzer 154 | .pytype/ 155 | 156 | # Cython debug symbols 157 | cython_debug/ 158 | 159 | # PyCharm 160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 162 | # and can be added to the global gitignore or merged into this file. For a more nuclear 163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 164 | #.idea/ 165 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | segment 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GenAI Dev Workshop - Azure 2 | 3 | Welome to this Elastic GenAI Workshop for Developers. We'll be using Github Codespaces as our Python development environement and different back end services to compare and contrast ways of creating embedding models, implementing semantic search, and using Large Language Models like Azure OpenAI ChatGPT. 4 | 5 | ![architecture](images/architecture.jpg) 6 | 7 | ## To run this lab you will need: 8 | * a workshop key from your instructor to load the environment variables required to run these notebooks 9 | * your own github account to use the free Github Codespaces execution environment 10 | 11 | ## Instructions 12 | 13 | Follow [this link](https://codespaces.new/elastic/genai-workshop-codespaces/tree/main) to launch the workshop in GitHub Codespaces. 14 | 15 | Alternatively, from this repository, start a Github Codespaces execution environment: click the green "<> Code" buttun above and to the right and select "Create codespace on main". 16 | 17 | ![launch](images/launch.jpg) 18 | 19 | Once the environment loads, the first step in the notebook will try to load the environment variables required to run the workshop. Please enter the workshop key provided to you by in the instructor. 20 | 21 | ## If you close your codespace accidentally 22 | 23 | You can get back to it here: https://github.com/codespaces 24 | 25 | ## Shutting it down when you are done 26 | 27 | Make sure to shut down the codespace when you are done with it. You can do that from here: https://github.com/codespaces -------------------------------------------------------------------------------- /images/architecture.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/genai-workshop-codespaces/68ee6494b597caa097782a9205dac9545e06b289/images/architecture.jpg -------------------------------------------------------------------------------- /images/launch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/genai-workshop-codespaces/68ee6494b597caa097782a9205dac9545e06b289/images/launch.jpg -------------------------------------------------------------------------------- /notebooks/genai_colab_lab1and2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "EEeUZ1r79kr0" 7 | }, 8 | "source": [ 9 | "# Setup Environment\n", 10 | "The following code loads the environment variables required to run this notebook.\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "id": "iqND-S5P9kr0" 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "FILE=\"GenAI Lab 1 and 2\"\n", 22 | "\n", 23 | "## suppress some warnings\n", 24 | "import warnings, os\n", 25 | "os.environ['PIP_ROOT_USER_ACTION'] = 'ignore'\n", 26 | "warnings.filterwarnings(\"ignore\", category=UserWarning, module='huggingface_hub.utils._token')\n", 27 | "warnings.filterwarnings(\"ignore\", message=\"torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead.\")\n", 28 | "warnings.filterwarnings(\"ignore\", category=DeprecationWarning, message=\"on_submit is deprecated.*\")\n", 29 | "\n", 30 | "# workshop environment - this is where you'll enter a key\n", 31 | "# ! pip install -qqq git+https://github.com/elastic/notebook-workshop-loader.git@main\n", 32 | "from notebookworkshoploader import loader\n", 33 | "from dotenv import load_dotenv\n", 34 | "if os.path.isfile(\"../env\"):\n", 35 | " load_dotenv(\"../env\", override=True)\n", 36 | " print('Successfully loaded environment variables from local env file')\n", 37 | "else:\n", 38 | " loader.load_remote_env(file=FILE, env_url=\"https://notebook-workshop-api-voldmqr2bq-uc.a.run.app\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "id": "Sm8uPLZxNOF3" 45 | }, 46 | "source": [ 47 | "# Lab 1-1: Using Transformer Models\n", 48 | "\n", 49 | "In this lab we will\n", 50 | "* Intro to Python Notebooks - Hello World, importing python libraries\n", 51 | "* Caching the download of a smaller LLM\n", 52 | "* Using a basic transformer models locally\n", 53 | "\n" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": { 59 | "id": "HqlXsCGMN6TC" 60 | }, 61 | "source": [ 62 | "## Step 1: Hit play on the next code **sample**" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "id": "r0WRcBgDNOF3" 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "print(\"Hello World\")" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": { 79 | "id": "P85HsyKmNOF4" 80 | }, 81 | "source": [ 82 | "## Step 2: Use ! to execute a shell command" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": { 89 | "id": "sfW6H9meNOF4" 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "! echo \"The shell thinks the Current Directory is: $(pwd)\"" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": { 99 | "id": "lSXuTJelNOF5" 100 | }, 101 | "source": [ 102 | "## Step 3: Environment setup\n", 103 | "\n", 104 | "First let us import some Python libraries we'll use in the first lab module." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "id": "FivGgCcXNOF5" 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "! ## pip install -qqq --upgrade pip\n", 116 | "# ! pip install -qqq torch==2.1.0\n", 117 | "# ! pip install -qqq --upgrade transformers==4.36.2\n", 118 | "# ! pip install -qqq python-dotenv==1.0.0\n", 119 | "# ! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9 ## for later in the lab\n", 120 | "! echo \"github codespaces has pre-installed these libraries\"" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": { 126 | "id": "Kk2GXx6ANOF5" 127 | }, 128 | "source": [ 129 | "## Step 4: Utility functions\n", 130 | "Some utility functions that are good to keep on hand" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": { 137 | "id": "0kVJFIJxNOF5" 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "import json\n", 142 | "# pretty printing JSON objects\n", 143 | "def json_pretty(input_object):\n", 144 | " print(json.dumps(input_object, indent=4))\n", 145 | "\n", 146 | "\n", 147 | "import textwrap\n", 148 | "# wrap text when printing, because colab scrolls output to the right too much\n", 149 | "def wrap_text(text, width):\n", 150 | " wrapped_text = textwrap.wrap(text, width)\n", 151 | " return '\\n'.join(wrapped_text)\n", 152 | "\n", 153 | "def print_light_blue(text):\n", 154 | " print(f'\\033[94m{text}\\033[0m')\n", 155 | "\n" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": { 161 | "id": "ZIPsPhYuNOF5" 162 | }, 163 | "source": [ 164 | "## Step 5: Download sentiment analysis model from HuggingFace\n", 165 | "\n", 166 | "We'll use the Huggingface Transformer library to download and ready an Open Source model called DistilBERT which can be used for sentiment analysis.\n", 167 | "\n", 168 | "* Details of the model can be found on its [Hugging Face Page](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)\n", 169 | "* This model is pretrained to determine if an input text is of *POSITIVE* or *NEGATIVE* sentiment and makes a good intro example to AI models.\n", 170 | "* Note we are caching the model files in a folder called ```llm_download_cache``` which will help us not have to re-download the files again within the connection to this runtime. You can see the download in the filesystem (using the left hand side menu)\n", 171 | "\n" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "id": "Rg4o4viHNOF5" 179 | }, 180 | "outputs": [], 181 | "source": [ 182 | "import torch\n", 183 | "import tqdm\n", 184 | "import json\n", 185 | "import os\n", 186 | "from transformers import (pipeline,\n", 187 | " DistilBertTokenizer,\n", 188 | " DistilBertForSequenceClassification)\n", 189 | "\n", 190 | "# Set the cache directory\n", 191 | "cache_directory = \"llm_download_cache\"\n", 192 | "\n", 193 | "# Create the cache directory if it doesn't exist\n", 194 | "if not os.path.exists(cache_directory):\n", 195 | " os.makedirs(cache_directory)\n", 196 | "\n", 197 | "model_id = \"distilbert-base-uncased-finetuned-sst-2-english\"\n", 198 | "sentiment_tokenizer = DistilBertTokenizer.from_pretrained(\n", 199 | " model_id, cache_dir=cache_directory)\n", 200 | "sentiment_model = DistilBertForSequenceClassification.from_pretrained(\n", 201 | " model_id, cache_dir=cache_directory)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": { 207 | "id": "1N1DeQ4SNOF5" 208 | }, 209 | "source": [ 210 | "\n", 211 | "## Step 6: Run sentiment analysis\n", 212 | "Okay! let's run the model ```sentiment_model``` on two pieces of sample text." 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "id": "KfJ4WqGLNOF6" 220 | }, 221 | "outputs": [], 222 | "source": [ 223 | "## With the distilbert model downloaded and cached we can call it for\n", 224 | "## sentiment analysis\n", 225 | "\n", 226 | "# Define the sentiment analysis pipeline\n", 227 | "sentiment_classifier = pipeline(\"sentiment-analysis\",\n", 228 | " model=sentiment_model,\n", 229 | " tokenizer=sentiment_tokenizer,\n", 230 | " device='cpu')\n", 231 | "#two samples\n", 232 | "classifier_results = sentiment_classifier([\n", 233 | " \"My dog is so cute, I love him.\",\n", 234 | "\n", 235 | " \"I am very sorry to inform you that the tax\\\n", 236 | " administration has decided to audit you.\"\n", 237 | "])\n", 238 | "\n", 239 | "json_pretty(classifier_results)" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": { 245 | "id": "zLn39tLNNOF6" 246 | }, 247 | "source": [ 248 | "### 🫵 Try it yourself - Get Creative 🫵\n", 249 | "Try some of your own examples.\n", 250 | "Note, AI models are subject to bias. The model card for this model goes into pretty good detail on the issue. [Read more here](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english#risks-limitations-and-biases)" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "id": "NYS-5TMPNOF6" 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "your_classifier_results = sentiment_classifier([\n", 262 | " \"CHANGE ME\",\n", 263 | " \"CHANGE ME\"\n", 264 | "])\n", 265 | "json_pretty(your_classifier_results)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": { 271 | "id": "n5f1vIeeNOF6" 272 | }, 273 | "source": [ 274 | "## Step 7: Generative LLM - Simple and Local - Download Flan T5\n", 275 | "\n", 276 | "Let's start with the Hello World of generative AI examples: completing a sentence. For this we'll install a fine tuned Flan-T5 variant model. ([LaMini-T5 ](https://huggingface.co/MBZUAI/LaMini-T5-738M))\n", 277 | "\n", 278 | "Note, while this is a smaller checkpoint of the model, it is still a 900 MB download. We'll cache the files in the same folder.\n" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": { 285 | "id": "l6kqG5siNOF6" 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "## Let's play with something a little bigger that can do a text completion\n", 290 | "## This is a 900 MB download and takes some RAM to run, but it works CPU only\n", 291 | "\n", 292 | "from transformers import pipeline\n", 293 | "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n", 294 | "\n", 295 | "# model_name = \"MBZUAI/LaMini-Flan-T5-77M\"\n", 296 | "model_name = \"MBZUAI/LaMini-T5-223M\" ## Trying this 900 MB version of the LLM\n", 297 | "# model_name = \"MBZUAI/LaMini-T5-738M\" ## 3GB version requires more RAM than we have in this local environment\n", 298 | "\n", 299 | "\n", 300 | "llm_tokenizer = AutoTokenizer.from_pretrained(model_name,\n", 301 | " cache_dir=cache_directory)\n", 302 | "llm_model = AutoModelForSeq2SeqLM.from_pretrained(model_name,\n", 303 | " cache_dir=cache_directory)\n", 304 | "\n", 305 | "llm_pipe = pipeline(\n", 306 | " \"text2text-generation\",\n", 307 | " model=llm_model,\n", 308 | " tokenizer=llm_tokenizer,\n", 309 | " max_length=100\n", 310 | " )\n" 311 | ] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": { 316 | "id": "RSCILNELNOF6" 317 | }, 318 | "source": [ 319 | "## Step 8: Generate text completions, watch for Hallucinations" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": { 326 | "id": "Gxpdh9UtNOF7" 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "countries = [\n", 331 | " \"United Kingdom\",\n", 332 | " \"France\",\n", 333 | " \"People's Republic of China\",\n", 334 | " \"United States\",\n", 335 | " \"Ecuador\",\n", 336 | " \"Freedonia\", ## high hallucination potential\n", 337 | " \"Faketopia\" ## high hallucination potential\n", 338 | " ]\n", 339 | "\n", 340 | "for country in countries:\n", 341 | " input_text = f\"The capital of the {country} is\"\n", 342 | " output = llm_pipe(input_text)\n", 343 | " completed_sentence = f\"\\033[94m{input_text}\\033[0m {output[0]['generated_text']}\"\n", 344 | " print(completed_sentence)" 345 | ] 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "metadata": { 350 | "id": "rpsbegewNOF7" 351 | }, 352 | "source": [ 353 | "### 🫵 Try it yourself - Get Creative 🫵\n", 354 | "Try some of your own examples.\n", 355 | "This thing isn't super smart without fine tuning, but it can handle some light context injection and prompt engineering. We'll learn more about those subjects in later modules.\n", 356 | "\n", 357 | "Notice the difference between asking a specific question and phrasing a completion\n", 358 | "* \"Who is the Prime Minister of the UK?\"\n", 359 | "* \"The current Prime Minister of the united kingdom is \"" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": { 366 | "id": "N6GhGplrNOF7" 367 | }, 368 | "outputs": [], 369 | "source": [ 370 | "prompt_text = \"The current Prime Minister of the United Kingdom is\" ## high stale data potential\n", 371 | "output = llm_pipe(prompt_text)\n", 372 | "completed_prompt = f\"\\033[94m{prompt_text}\\033[0m {output[0]['generated_text']}\"\n", 373 | "print(completed_prompt)" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "metadata": { 379 | "id": "5bvjzCiANOF7" 380 | }, 381 | "source": [ 382 | "🛑 Stop Here 🛑\n", 383 | "\n", 384 | "This Ends Lab 1-1\n", 385 | "
" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": { 391 | "id": "_y2q5sxMNOF7" 392 | }, 393 | "source": [ 394 | "# Lab 2-1: Prompts and Basic Chatbots\n", 395 | "\n", 396 | "* Using langchain with local LLM\n", 397 | "* Connect to Open AI\n", 398 | "* Using a memory window to create a txt-only GPT conversation" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": { 404 | "id": "cRBAXFBiNOF7" 405 | }, 406 | "source": [ 407 | "## Step 1: Using the OpenAI python library\n", 408 | "\n", 409 | "❗ Note: if you restarted your google Colab, you may need to re-run the first stup step back and the very top before coming back here ❗" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": null, 415 | "metadata": { 416 | "id": "Mu56DHbbNOF7" 417 | }, 418 | "outputs": [], 419 | "source": [ 420 | "import os, secrets, requests\n", 421 | "import openai\n", 422 | "from openai import OpenAI\n", 423 | "from requests.auth import HTTPBasicAuth\n", 424 | "\n", 425 | "#if using the Elastic AI proxy, then generate the correct API key\n", 426 | "if os.environ['ELASTIC_PROXY'] == \"True\":\n", 427 | "\n", 428 | " if \"OPENAI_API_TYPE\" in os.environ: del os.environ[\"OPENAI_API_TYPE\"]\n", 429 | "\n", 430 | " #generate and share \"your\" unique hash\n", 431 | " os.environ['USER_HASH'] = secrets.token_hex(nbytes=6)\n", 432 | " print(f\"Your unique user hash is: {os.environ['USER_HASH']}\")\n", 433 | "\n", 434 | " #get the current API key and combine with your hash\n", 435 | " os.environ['OPENAI_API_KEY'] = f\"{os.environ['OPENAI_API_KEY']} {os.environ['USER_HASH']}\"\n", 436 | "else:\n", 437 | " openai.api_type = os.environ['OPENAI_API_TYPE']\n", 438 | " openai.api_version = os.environ['OPENAI_API_VERSION']\n", 439 | "\n", 440 | "openai.api_key = os.environ['OPENAI_API_KEY']\n", 441 | "openai.api_base = os.environ['OPENAI_API_BASE']\n", 442 | "openai.default_model = os.environ['OPENAI_API_ENGINE']\n", 443 | "\n", 444 | "import ipywidgets as widgets\n", 445 | "from IPython.display import display\n", 446 | "\n", 447 | "class NotebookChatExperience:\n", 448 | " def __init__(self, ai_response_function, ai_name = \"AI\"):\n", 449 | " self.ai_name = ai_name\n", 450 | " self.ai_response_function = ai_response_function\n", 451 | " self.chat_history = widgets.Textarea(\n", 452 | " value='',\n", 453 | " placeholder='Chat history will appear here...',\n", 454 | " description='Chat:',\n", 455 | " disabled=True,\n", 456 | " layout=widgets.Layout(width='700px', height='300px') # Adjust the size as needed\n", 457 | " )\n", 458 | " self.user_input = widgets.Text(\n", 459 | " value='',\n", 460 | " placeholder='Type your message here...',\n", 461 | " description='You:',\n", 462 | " disabled=False,\n", 463 | " layout=widgets.Layout(width='700px') # Adjust the size as needed\n", 464 | " )\n", 465 | " self.user_input.on_submit(self.on_submit)\n", 466 | " display(self.chat_history, self.user_input)\n", 467 | "\n", 468 | " def on_submit(self, event):\n", 469 | " user_message = self.user_input.value\n", 470 | " ai_name = self.ai_name\n", 471 | " self.chat_history.value += f\"\\nYou: {user_message}\"\n", 472 | " ai_message = self.ai_response_function(user_message)\n", 473 | " self.chat_history.value += f\"\\n{ai_name}: {ai_message}\"\n", 474 | " self.user_input.value = '' # Clear input for next message\n", 475 | "\n", 476 | " def clear_chat(self):\n", 477 | " self.chat_history.value = '' # Clear the chat history\n", 478 | "\n", 479 | "## ********** Example usage:\n", 480 | "\n", 481 | "## ********** Define a simple AI response function\n", 482 | "# def simple_ai_response(user_message):\n", 483 | " # return f\"AI > Echo: {user_message}\"\n", 484 | "\n", 485 | "## ********** Create an instance of the chat interface\n", 486 | "#chat_instance = NotebookChatExperience(simple_ai_response)" 487 | ] 488 | }, 489 | { 490 | "cell_type": "markdown", 491 | "metadata": { 492 | "id": "qpO5xbubDz_T" 493 | }, 494 | "source": [ 495 | "## Step 2: Test call to ChatGPT" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": null, 501 | "metadata": { 502 | "id": "ETYE5zfSD2J7" 503 | }, 504 | "outputs": [], 505 | "source": [ 506 | "# Call the OpenAI ChatCompletion API\n", 507 | "def chatCompletion(messages, max_tokens=100):\n", 508 | " client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)\n", 509 | " completion = client.chat.completions.create(\n", 510 | " model=openai.default_model,\n", 511 | " max_tokens=max_tokens,\n", 512 | " messages=messages\n", 513 | " )\n", 514 | " return completion\n", 515 | "\n", 516 | "prompt=\"Hello, is ChatGPT online and working?\"\n", 517 | "\n", 518 | "messages = [{\"role\": \"user\", \"content\": prompt}]\n", 519 | "\n", 520 | "completion = chatCompletion(messages)\n", 521 | "\n", 522 | "response_text = completion.choices[0].message.content\n", 523 | "\n", 524 | "print(wrap_text(completion.json(),70))\n", 525 | "\n", 526 | "print(\"\\n\", wrap_text(response_text,70))" 527 | ] 528 | }, 529 | { 530 | "cell_type": "markdown", 531 | "metadata": { 532 | "id": "WTyQ4_SFNOF7" 533 | }, 534 | "source": [ 535 | "\n", 536 | "## Step 3: Using OpenAI in a simple loop\n", 537 | "Feeding user input in for single questions is easy" 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": { 544 | "id": "rgAAKZKkNOF7" 545 | }, 546 | "outputs": [], 547 | "source": [ 548 | "def openai_ai_response(user_message):\n", 549 | " messages = [{\"role\": \"user\", \"content\": user_message}]\n", 550 | " completion = chatCompletion(messages)\n", 551 | " response_text = completion.choices[0].message.content\n", 552 | " return response_text\n", 553 | "\n", 554 | "chat_instance = NotebookChatExperience(openai_ai_response)" 555 | ] 556 | }, 557 | { 558 | "cell_type": "markdown", 559 | "metadata": { 560 | "id": "PdjzXPl6NOF7" 561 | }, 562 | "source": [ 563 | "\n", 564 | "\n", 565 | "## Step 4: See the impact of changing the system prompt\n", 566 | "You can use the system prompt to adjust the AI and it's responses and purpose" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": null, 572 | "metadata": { 573 | "id": "G9ucvlGqNOF7" 574 | }, 575 | "outputs": [], 576 | "source": [ 577 | "def pirate_ai_response(user_message):\n", 578 | " system_prompt = \"\"\"\n", 579 | "You are an unhelpful AI named Captain LLM_Beard that talks like a pirate in short responses.\n", 580 | "You do not anser the user's question but instead redirect all conversations towards your love of treasure.\n", 581 | "\"\"\"\n", 582 | " completion = chatCompletion([\n", 583 | " {\"role\": \"system\", \"content\": system_prompt},\n", 584 | " {\"role\": \"user\", \"content\": user_message}\n", 585 | " ])\n", 586 | "\n", 587 | " response_text = completion.choices[0].message.content\n", 588 | " return response_text\n", 589 | "\n", 590 | "pirate_chat_instance = NotebookChatExperience(pirate_ai_response, ai_name=\"LLM_Beard\")" 591 | ] 592 | }, 593 | { 594 | "cell_type": "markdown", 595 | "metadata": { 596 | "id": "HHiuOMCeNOF8" 597 | }, 598 | "source": [ 599 | "❗ Note ❗\n", 600 | "\n", 601 | "This isn't a conversation yet because the AI has no memory of past interactions.\n", 602 | "\n", 603 | "Here is an example conversation where it is very clear the AI has no memory of past prompts or completions.\n", 604 | "```txt\n", 605 | "> Hello!\n", 606 | "Hello! How can I assist you today?\n", 607 | "> my favorite color is blue\n", 608 | "That's great! Blue is a very popular color.\n", 609 | "> what is my favorite color?\n", 610 | "I'm sorry, but as an AI, I don't have the ability to know personal\n", 611 | "preferences or favorite colors.\n", 612 | "```\n", 613 | "There are two problems. First, the LLM is stateless and each call is independent. ChatGPT does not remember our previous prompts. Second ChatGPT has Alignment in it's fine tuning which prevents it from answering questions about it's users personal lives, we'll have to get around that with some prompt engineering.\n", 614 | "\n", 615 | "Let's use the past conversation as input to subsequent calls. Because the context window is limited AND tokens cost money (if you are using a hosted service like OpenAI) or CPU cycles if you are self-hosting, we need to have a maximum queue size of only remembering things 2 prompts ago (4 total messages)" 616 | ] 617 | }, 618 | { 619 | "cell_type": "markdown", 620 | "metadata": { 621 | "id": "XlbDf77SP3-n" 622 | }, 623 | "source": [ 624 | "## Step 5: Create a chat with memory" 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": null, 630 | "metadata": { 631 | "id": "MT_aof4aNOF8" 632 | }, 633 | "outputs": [], 634 | "source": [ 635 | "from collections import deque\n", 636 | "\n", 637 | "class QueueBuffer:\n", 638 | " def __init__(self, max_length):\n", 639 | " self.max_length = max_length\n", 640 | " self.buffer = deque(maxlen=max_length)\n", 641 | "\n", 642 | " def enqueue(self, item):\n", 643 | " self.buffer.append(item)\n", 644 | "\n", 645 | " def dequeue(self):\n", 646 | " if self.is_empty():\n", 647 | " return None\n", 648 | " return self.buffer.popleft()\n", 649 | "\n", 650 | " def is_empty(self):\n", 651 | " return len(self.buffer) == 0\n", 652 | "\n", 653 | " def is_full(self):\n", 654 | " return len(self.buffer) == self.max_length\n", 655 | "\n", 656 | " def size(self):\n", 657 | " return len(self.buffer)\n", 658 | "\n", 659 | " def peek(self):\n", 660 | " return list(self.buffer)\n", 661 | "\n", 662 | "\n", 663 | "class MemoryNotebookChatExperience(NotebookChatExperience):\n", 664 | " def __init__(self, ai_response_function, ai_name=\"AI\", memory_size = 4):\n", 665 | " # Initialize the superclass\n", 666 | " self.memory_buffer = QueueBuffer(memory_size)\n", 667 | " self.current_memory_dump = \"\"\n", 668 | " super().__init__(ai_response_function, ai_name)\n", 669 | "\n", 670 | " ## now with memory\n", 671 | " def memory_gpt_response(self, prompt):\n", 672 | " ## the API call will use the system prompt + the memory buffer\n", 673 | " ## which ends with the user prompt\n", 674 | " user_message = {\"role\": \"user\", \"content\": prompt}\n", 675 | " self.memory_buffer.enqueue(user_message)\n", 676 | "\n", 677 | " ## debug print the current AI memory\n", 678 | " self.current_memory_dump = \"Current memory\\n\"\n", 679 | " for m in self.memory_buffer.peek():\n", 680 | " role = m.get(\"role\").strip()\n", 681 | " content = m.get(\"content\").strip()\n", 682 | " self.current_memory_dump += f\"{role} | {content}\\n\"\n", 683 | "\n", 684 | " system_prompt = {\n", 685 | " \"role\": \"system\",\n", 686 | " \"content\": \"\"\"\n", 687 | "You are a helpful AI that answers questions consicely.\n", 688 | "You talk to the human and use the past conversation to inform your answers.\"\"\"\n", 689 | " }\n", 690 | "\n", 691 | " ## when calling the AI we put the system prompt at the start\n", 692 | " concatenated_message = [system_prompt] + self.memory_buffer.peek()\n", 693 | "\n", 694 | " ## here is the request to the AI\n", 695 | "\n", 696 | " completion = chatCompletion(concatenated_message)\n", 697 | " response_text = completion.choices[0].message.content\n", 698 | "\n", 699 | "\n", 700 | " ## don't forget to add the repsonse to the conversation memory\n", 701 | " self.memory_buffer.enqueue({\"role\":\"assistant\", \"content\":response_text})\n", 702 | "\n", 703 | " return response_text\n", 704 | "\n", 705 | " def on_submit(self, event):\n", 706 | " user_message = self.user_input.value\n", 707 | " self.chat_history.value += f\"\\nYou: {user_message}\"\n", 708 | " # Attempting to add styled text, but it will appear as plain text\n", 709 | "\n", 710 | " ai_message = self.memory_gpt_response(user_message)\n", 711 | "\n", 712 | " ## deubg lines to show memory buffer in chat\n", 713 | " for i, line in enumerate(self.current_memory_dump.split(\"\\n\")):\n", 714 | " self.chat_history.value += f\"\\n---- {i} {line}\"\n", 715 | " self.chat_history.value += \"\\n\"\n", 716 | "\n", 717 | " self.chat_history.value += f\"\\n{self.ai_name}: {ai_message}\"\n", 718 | " self.user_input.value = '' # Clear input for next message\n", 719 | "\n", 720 | "\n", 721 | "# Create an instance of the enhanced chat experience class with a simple AI response function\n", 722 | "not_so_clueless_chat = MemoryNotebookChatExperience(None)" 723 | ] 724 | }, 725 | { 726 | "cell_type": "markdown", 727 | "metadata": { 728 | "id": "KK4UKihhNOF8" 729 | }, 730 | "source": [ 731 | "🛑 Stop Here 🛑\n", 732 | "\n", 733 | "This Ends Lab 1-2\n", 734 | "
" 735 | ] 736 | }, 737 | { 738 | "cell_type": "markdown", 739 | "metadata": { 740 | "id": "92luYkkb5aaX" 741 | }, 742 | "source": [ 743 | "# Lab 2-2: Data Redaction" 744 | ] 745 | }, 746 | { 747 | "cell_type": "markdown", 748 | "metadata": { 749 | "id": "CKKAZragT9ec" 750 | }, 751 | "source": [ 752 | "## Step 1: Install and Import dependencies" 753 | ] 754 | }, 755 | { 756 | "cell_type": "code", 757 | "execution_count": null, 758 | "metadata": { 759 | "id": "pp_RWV73aGAs" 760 | }, 761 | "outputs": [], 762 | "source": [ 763 | "# ! pip install -qqq eland==8.11.1 elasticsearch==8.12.0 transformers==4.35.0 sentence-transformers==2.2.2 python-dotenv==1.0.0\n", 764 | "# ! pip install -qqq elastic-apm==6.20.0\n", 765 | "! echo \"github codespaces has pre-installed these libraries\"\n", 766 | "\n", 767 | "from elasticsearch import Elasticsearch, helpers, exceptions\n", 768 | "from eland.ml.pytorch import PyTorchModel\n", 769 | "from eland.ml.pytorch.transformers import TransformerModel\n", 770 | "from getpass import getpass\n", 771 | "import tempfile\n", 772 | "import os\n", 773 | "from pprint import pprint" 774 | ] 775 | }, 776 | { 777 | "cell_type": "markdown", 778 | "metadata": { 779 | "id": "JXj9kw_5UK_V" 780 | }, 781 | "source": [ 782 | "## Step 2: Create Elasticsearch Client Connection" 783 | ] 784 | }, 785 | { 786 | "cell_type": "code", 787 | "execution_count": null, 788 | "metadata": { 789 | "id": "m-_r6lROasNI" 790 | }, 791 | "outputs": [], 792 | "source": [ 793 | "if 'ELASTIC_CLOUD_ID' in os.environ:\n", 794 | " es = Elasticsearch(\n", 795 | " cloud_id=os.environ['ELASTIC_CLOUD_ID'],\n", 796 | " api_key=(os.environ['ELASTIC_APIKEY_ID'], os.environ['ELASTIC_APIKEY_SECRET']),\n", 797 | " request_timeout=30\n", 798 | " )\n", 799 | "elif 'ELASTIC_URL' in os.environ:\n", 800 | " es = Elasticsearch(\n", 801 | " os.environ['ELASTIC_URL'],\n", 802 | " api_key=(os.environ['ELASTIC_APIKEY_ID'], os.environ['ELASTIC_APIKEY_SECRET']),\n", 803 | " request_timeout=30\n", 804 | " )\n", 805 | "else:\n", 806 | " print(\"env needs to set either ELASTIC_CLOUD_ID or ELASTIC_URL\")" 807 | ] 808 | }, 809 | { 810 | "cell_type": "markdown", 811 | "metadata": { 812 | "id": "G9FWxKtef3uu" 813 | }, 814 | "source": [ 815 | "## Step 3: Monitoring prompts sent through a Proxy\n", 816 | "\n", 817 | "Imagine I have the following question from a customer after a winter storm\n", 818 | "\n", 819 | "> My power was out all last week at my home at 123 Grove street.\n", 820 | "When I talked to my neighbor Jane Lopez, she said she got rebate on her bill.\n", 821 | "Can you do the same for me?\n", 822 | "\n", 823 | "The following is a simulated customer example where we'll use the LLM to answer a customer service case.\n", 824 | "\n", 825 | "We'll learn how to **retrieve** the best call script using semantic search in a later exercise. \n", 826 | "\n", 827 | "**Some organizations would be uncomfortable with customer PII going to a 3rd party service. Who gets an unencrypted version of the prompt?**" 828 | ] 829 | }, 830 | { 831 | "cell_type": "code", 832 | "execution_count": null, 833 | "metadata": { 834 | "id": "ZaucVWDddOuR" 835 | }, 836 | "outputs": [], 837 | "source": [ 838 | "import elasticapm\n", 839 | "import random\n", 840 | "\n", 841 | "os.environ['ELASTIC_APM_SERVICE_NAME'] = \"genai_workshop_lab_redact\"\n", 842 | "apmclient = elasticapm.Client() \\\n", 843 | " if elasticapm.get_client() is None \\\n", 844 | " else elasticapm.get_client()\n", 845 | "\n", 846 | "customer_id = 123\n", 847 | "\n", 848 | "first_names = [\"Alice\", \"Bob\", \"Charlie\", \"Diana\", \"Edward\",\n", 849 | " \"Fiona\", \"George\", \"Hannah\", \"Ian\", \"Julia\"]\n", 850 | "last_names = [\"Smith\", \"Johnson\", \"Williams\", \"Brown\", \"Jones\",\n", 851 | " \"Garcia\", \"Miller\", \"Davis\", \"Rodriguez\", \"Martinez\"]\n", 852 | "\n", 853 | "# Function to generate a random full name\n", 854 | "def generate_random_name():\n", 855 | " first_name = random.choice(first_names)\n", 856 | " last_name = random.choice(last_names)\n", 857 | " return f\"{first_name} {last_name}\"\n", 858 | "\n", 859 | "\n", 860 | "customer_question = f\"\"\"My power was out all last week at my home on Grove street.\n", 861 | "When I talked to my neighbor {generate_random_name()},\n", 862 | "they said they got rebate on their bill. Can you do the same for me?\"\"\"\n", 863 | "\n", 864 | "retrieved_best_answer = \"\"\"We are currently offering a $100 rebate for\n", 865 | "customers affected by the recent winter storm. If our records show the\n", 866 | "customer was impacted, tell them they can look forward to a $100 credit on their\n", 867 | "next monthly bill. If the customer believes they were impacted but our records\n", 868 | "don't show this fact, let them know we'll be escalating their case and they\n", 869 | "should expect a call within 24 hours.\"\"\"\n", 870 | "\n", 871 | "\n", 872 | "import time\n", 873 | "def random_service_time(shorter, longer):\n", 874 | " sleep_time = random.uniform(shorter, longer)\n", 875 | " time.sleep(sleep_time)\n", 876 | "\n", 877 | "def days_impacted_check(customer_id):\n", 878 | " apmclient.begin_transaction(\"impact_check\")\n", 879 | " ## simulated sevice call delay (some parts of the lab LLM are cached)\n", 880 | " random_service_time(0.1,0.3)\n", 881 | " days = 5 ## simulated result of a back end service call\n", 882 | " apmclient.end_transaction(\"impact_check\", \"success\")\n", 883 | " if days > 0 :\n", 884 | " return f\"the customer was impacted by the winter storm for {days} serice days\"\n", 885 | " else:\n", 886 | " return \"the customer was not impacted byt he winter storm\"\n", 887 | "\n", 888 | "\n", 889 | "system_prompt = f\"\"\"\n", 890 | "You are an AI customer support agent for a electric power utility company that\n", 891 | "You use the following retrieved approved call script and customer fact\n", 892 | "to answer the customer's question and try to retain them as a customer.\n", 893 | "\n", 894 | "Call script: {retrieved_best_answer}\n", 895 | "\n", 896 | "Our records: {days_impacted_check(customer_id)}\n", 897 | "\"\"\"\n", 898 | "\n", 899 | "def print_light_blue(text):\n", 900 | " print(f'\\033[94m{text}\\033[0m')\n", 901 | "\n", 902 | "def chatCompletion(messages):\n", 903 | "\n", 904 | " client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)\n", 905 | " completion = client.chat.completions.create(\n", 906 | " model=openai.default_model,\n", 907 | " max_tokens=150,\n", 908 | " messages=messages\n", 909 | " )\n", 910 | "\n", 911 | " return completion\n", 912 | "\n", 913 | "def chatWithPowerAgent(prompt):\n", 914 | " apmclient.begin_transaction(\"llm_call\")\n", 915 | "\n", 916 | " elasticapm.label(prompt = prompt)\n", 917 | "\n", 918 | " messages = [\n", 919 | " {\"role\": \"system\", \"content\": system_prompt},\n", 920 | " {\"role\": \"user\", \"content\": prompt}\n", 921 | " ]\n", 922 | " print_light_blue(\"Prompt:\")\n", 923 | " print_light_blue(wrap_text(messages[0][\"content\"],70))\n", 924 | " print_light_blue(wrap_text(messages[1][\"content\"],70))\n", 925 | " completion = chatCompletion(messages)\n", 926 | "\n", 927 | " response_text = completion.choices[0].message.content\n", 928 | "\n", 929 | " apmclient.end_transaction(\"llm_call\", \"success\")\n", 930 | "\n", 931 | " return wrap_text(response_text,70)\n", 932 | "\n", 933 | "\n", 934 | "customer_service_response = chatWithPowerAgent(customer_question)\n", 935 | "\n", 936 | "print(\"Customer Service Response:\")\n", 937 | "print(customer_service_response)\n", 938 | "\n" 939 | ] 940 | }, 941 | { 942 | "cell_type": "markdown", 943 | "metadata": { 944 | "id": "ILLdLhQzsKY0" 945 | }, 946 | "source": [ 947 | "## Step 4: Redacting unstructured data with NER Transformer Model" 948 | ] 949 | }, 950 | { 951 | "cell_type": "code", 952 | "execution_count": null, 953 | "metadata": { 954 | "id": "VsLcvOMGT8Z1" 955 | }, 956 | "outputs": [], 957 | "source": [ 958 | "from transformers import AutoTokenizer, AutoModelForTokenClassification\n", 959 | "from transformers import pipeline\n", 960 | "import json\n", 961 | "# pretty printing JSON objects\n", 962 | "def json_pretty(input_object):\n", 963 | " print(json.dumps(input_object, indent=1))\n", 964 | "\n", 965 | "tokenizer = AutoTokenizer.from_pretrained(\"dslim/bert-base-NER\")\n", 966 | "model = AutoModelForTokenClassification.from_pretrained(\"dslim/bert-base-NER\")\n", 967 | "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n" 968 | ] 969 | }, 970 | { 971 | "cell_type": "code", 972 | "execution_count": null, 973 | "metadata": { 974 | "id": "wJNzYcnJV0f3" 975 | }, 976 | "outputs": [], 977 | "source": [ 978 | "ner_results = nlp(customer_question)\n", 979 | "print(ner_results)" 980 | ] 981 | }, 982 | { 983 | "cell_type": "markdown", 984 | "metadata": { 985 | "id": "-lD1hCCpGs7H" 986 | }, 987 | "source": [ 988 | "### Step 5: Let's make an easy to use Redaction Function" 989 | ] 990 | }, 991 | { 992 | "cell_type": "code", 993 | "execution_count": null, 994 | "metadata": { 995 | "id": "4ALjsXOCWvId" 996 | }, 997 | "outputs": [], 998 | "source": [ 999 | "\n", 1000 | "def redact_named_entities(text):\n", 1001 | " apmclient.begin_transaction(\"redaction_local\")\n", 1002 | " # Perform named entity recognition on the text\n", 1003 | " entities = nlp(text)\n", 1004 | "\n", 1005 | " # Sort entities by their start index in reverse order\n", 1006 | " entities = sorted(entities, key=lambda x: x['start'], reverse=True)\n", 1007 | "\n", 1008 | " # Iterate over entities and replace them in the text\n", 1009 | " for entity in entities:\n", 1010 | " ent_type = entity['entity']\n", 1011 | " start = entity['start']\n", 1012 | " end = entity['end']\n", 1013 | " text = text[:start] + \"\" + text[end:]\n", 1014 | "\n", 1015 | "\n", 1016 | " apmclient.end_transaction(\"redaction_local\", \"success\")\n", 1017 | " return text\n", 1018 | "\n", 1019 | "# Example usage\n", 1020 | "text = \"Alice lives in Paris.\"\n", 1021 | "redacted_text = redact_named_entities(text)\n", 1022 | "print(redacted_text)\n" 1023 | ] 1024 | }, 1025 | { 1026 | "cell_type": "markdown", 1027 | "metadata": { 1028 | "id": "MeqwLVolG4VQ" 1029 | }, 1030 | "source": [ 1031 | "## Step 6: Test the function on a customer question" 1032 | ] 1033 | }, 1034 | { 1035 | "cell_type": "code", 1036 | "execution_count": null, 1037 | "metadata": { 1038 | "id": "awtGm2vsrE3k" 1039 | }, 1040 | "outputs": [], 1041 | "source": [ 1042 | "customer_question = f\"\"\"My power was out all last week at my home at\n", 1043 | "Grove street. When I talked to my neighbor {generate_random_name()}, they said they got\n", 1044 | "rebate on their bill. Can you do the same for me?\"\"\"\n", 1045 | "\n", 1046 | "print(redact_named_entities(customer_question))\n" 1047 | ] 1048 | }, 1049 | { 1050 | "cell_type": "markdown", 1051 | "metadata": { 1052 | "id": "nuiUxbCkYS3b" 1053 | }, 1054 | "source": [ 1055 | "## Step 7: Alternatively, how would we install the same NER Model into Elasticsarch?" 1056 | ] 1057 | }, 1058 | { 1059 | "cell_type": "code", 1060 | "execution_count": null, 1061 | "metadata": { 1062 | "id": "yn7sULLEYW6w" 1063 | }, 1064 | "outputs": [], 1065 | "source": [ 1066 | "def load_model(model_id, task_type):\n", 1067 | " with tempfile.TemporaryDirectory() as tmp_dir:\n", 1068 | " print(f\"Loading HuggingFace transformer tokenizer and model [{model_id}] for task [{task_type}]\" )\n", 1069 | "\n", 1070 | " tm = TransformerModel(model_id=model_id, task_type=task_type)\n", 1071 | " model_path, config, vocab_path = tm.save(tmp_dir)\n", 1072 | "\n", 1073 | " ptm = PyTorchModel(es, tm.elasticsearch_model_id())\n", 1074 | " model_exists = es.options(ignore_status=404).ml.get_trained_models(model_id=ptm.model_id).meta.status == 200\n", 1075 | "\n", 1076 | " if model_exists:\n", 1077 | " print(\"Model has already been imported\")\n", 1078 | " else:\n", 1079 | " print(\"Importing model\")\n", 1080 | " ptm.import_model(model_path=model_path, config_path=None, vocab_path=vocab_path, config=config)\n", 1081 | " print(\"Starting model deployment\")\n", 1082 | " ptm.start()\n", 1083 | " print(f\"Model successfully imported with id '{ptm.model_id}'\")\n", 1084 | "\n", 1085 | "## Model is pre-loaded into Elasticsearch, but this is how you would do it\n", 1086 | "\n", 1087 | "## load_model(\"dslim/bert-base-NER\", \"ner\")\n", 1088 | "print(\"Model is already loaded\")" 1089 | ] 1090 | }, 1091 | { 1092 | "cell_type": "markdown", 1093 | "metadata": { 1094 | "id": "Lg7MWpurb7g2" 1095 | }, 1096 | "source": [ 1097 | "## Step 8: Define a Redaction Ingest Pipeline in Elasticsearch\n", 1098 | "\n", 1099 | "We will use the [Elasticsearch Ingest Pipelines](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) to redact data before it is written to Elasticsearch. These pipelines can also be used to update data in existing indices or for reindexing.\n", 1100 | "\n", 1101 | "This pipeline:\n", 1102 | "- Uses the [inference processor](https://www.elastic.co/guide/en/elasticsearch/reference/current/inference-processor.html) to call the NER model loaded in Part 1 and map the document's `message` field to the field expected by the model: `text_field`.\n", 1103 | "- Uses the [Painless scripting language](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-painless.html) from within a [script processor](https://www.elastic.co/guide/en/elasticsearch/reference/current/script-processor.html) to replace the model-detected entities stored in the `ml.inference.entities` array with their class name, and store it within a **new** document field: `redacted`.\n", 1104 | "- Uses the [redact processor](https://www.elastic.co/guide/en/elasticsearch/reference/current/redact-processor.html) to identify and redact any supported patterns found within the new redacted field, as well as identifying and redacting a set of custom patterns.\n", 1105 | "- Removes the `ml` fields added to the document by the inference processor via the [remove processor](https://www.elastic.co/guide/en/elasticsearch/reference/current/remove-processor.html) as they're no longer needed.\n", 1106 | "- Defines a failure condition to capture any errors, just in case we have them.\n", 1107 | "\n", 1108 | "**NOTE:** As of 8.11, the redact processor is a Technical Preview.\n", 1109 | "\n", 1110 | "\n" 1111 | ] 1112 | }, 1113 | { 1114 | "cell_type": "code", 1115 | "execution_count": null, 1116 | "metadata": { 1117 | "id": "4_GKI4ZQd5zw" 1118 | }, 1119 | "outputs": [], 1120 | "source": [ 1121 | "body = {\n", 1122 | " \"processors\": [\n", 1123 | " {\n", 1124 | " \"inference\": {\n", 1125 | " \"model_id\": \"dslim__bert-base-ner\",\n", 1126 | " \"field_map\": {\n", 1127 | " \"message\": \"text_field\"\n", 1128 | " }\n", 1129 | " }\n", 1130 | " },\n", 1131 | " {\n", 1132 | " \"script\": {\n", 1133 | " \"lang\": \"painless\",\n", 1134 | " \"source\": \"\"\"\n", 1135 | "String msg = ctx['message'];\n", 1136 | "for (item in ctx['ml']['inference']['entities'])\n", 1137 | " msg = msg.replace(item['entity'], '<' + item['class_name'] + '>');\n", 1138 | "ctx['redacted'] = msg;\n", 1139 | "\"\"\"\n", 1140 | " }\n", 1141 | " },\n", 1142 | " {\n", 1143 | " \"redact\": {\n", 1144 | " \"field\": \"redacted\",\n", 1145 | " \"patterns\": [\n", 1146 | " \"%{EMAILADDRESS:EMAIL}\",\n", 1147 | " \"%{IP:IP_ADDRESS}\",\n", 1148 | " \"%{CREDIT_CARD:CREDIT_CARD}\",\n", 1149 | " \"%{SSN:SSN}\",\n", 1150 | " \"%{PHONE:PHONE}\"\n", 1151 | " ],\n", 1152 | " \"pattern_definitions\": {\n", 1153 | " \"CREDIT_CARD\": \"\\\\d{4}[ -]\\\\d{4}[ -]\\\\d{4}[ -]\\\\d{4}\",\n", 1154 | " \"SSN\": \"\\\\d{3}-\\\\d{2}-\\\\d{4}\",\n", 1155 | " \"PHONE\": \"\\\\d{3}-\\\\d{3}-\\\\d{4}\"\n", 1156 | " }\n", 1157 | " }\n", 1158 | " },\n", 1159 | " {\n", 1160 | " \"remove\": {\n", 1161 | " \"field\": [\n", 1162 | " \"ml\"\n", 1163 | " ],\n", 1164 | " \"ignore_missing\": True,\n", 1165 | " \"ignore_failure\": True\n", 1166 | " }\n", 1167 | " }\n", 1168 | " ],\n", 1169 | " \"on_failure\": [\n", 1170 | " {\n", 1171 | " \"set\": {\n", 1172 | " \"field\": \"failure\",\n", 1173 | " \"value\": \"pii_script-redact\"\n", 1174 | " }\n", 1175 | " }\n", 1176 | " ]\n", 1177 | "}\n", 1178 | "\n", 1179 | "## es.ingest.put_pipeline(id='redact', body=body)\n", 1180 | "print(\"Ingest pipeline is already loaded\")" 1181 | ] 1182 | }, 1183 | { 1184 | "cell_type": "markdown", 1185 | "metadata": { 1186 | "id": "Dcv-hhwqlkjD" 1187 | }, 1188 | "source": [ 1189 | "## Step 9: Test the pipeline\n", 1190 | "\n", 1191 | "Does it work?\n", 1192 | "\n", 1193 | "Let's use the [Simulate Pipeline API](https://www.elastic.co/guide/en/elasticsearch/reference/current/simulate-pipeline-api.html) to find out." 1194 | ] 1195 | }, 1196 | { 1197 | "cell_type": "code", 1198 | "execution_count": null, 1199 | "metadata": { 1200 | "id": "HaXVRsyLhr22" 1201 | }, 1202 | "outputs": [], 1203 | "source": [ 1204 | "docs = [\n", 1205 | " {\n", 1206 | " \"_source\": {\n", 1207 | " \"message\": \"John Smith lives at 123 Main St. Highland Park, CO. His email address \"\\\n", 1208 | " \"is jsmith123@email.com and his phone number is 412-189-9043. I found his social \"\\\n", 1209 | " \"security number, it is 942-00-1243. Oh btw, his credit card is 1324-8374-0978-2819 \"\\\n", 1210 | " \"and his gateway IP is 192.168.1.2\"\n", 1211 | " }\n", 1212 | " },\n", 1213 | " {\n", 1214 | " \"_source\": {\n", 1215 | " \"message\": \"I had a call with Jane yesterday, she suggested we talk with John \"\\\n", 1216 | " \"from Global Systems. Their office is in Springfield\"\n", 1217 | " }\n", 1218 | " }\n", 1219 | "]\n", 1220 | "\n", 1221 | "pprint(es.ingest.simulate(id='redact', docs=docs).body)" 1222 | ] 1223 | }, 1224 | { 1225 | "cell_type": "markdown", 1226 | "metadata": { 1227 | "id": "p6LSsMUyqTdZ" 1228 | }, 1229 | "source": [ 1230 | "## Step 10: End to End Example, Monitored and Redacted\n", 1231 | "\n", 1232 | "Switcing back to the local python model ...\n" 1233 | ] 1234 | }, 1235 | { 1236 | "cell_type": "code", 1237 | "execution_count": null, 1238 | "metadata": { 1239 | "id": "iL4A3xUNHQ0K" 1240 | }, 1241 | "outputs": [], 1242 | "source": [ 1243 | "customer_question = f\"\"\"My power was out all last week at my home on\n", 1244 | "Grove street. When I talked to my neighbor {generate_random_name()}, they said they got\n", 1245 | "rebate on their bill. Can you do the same for me?\"\"\"\n", 1246 | "\n", 1247 | "redacted_text = redact_named_entities(customer_question)\n", 1248 | "\n", 1249 | "print(chatWithPowerAgent(redacted_text))" 1250 | ] 1251 | }, 1252 | { 1253 | "cell_type": "markdown", 1254 | "metadata": { 1255 | "id": "KlWAaHh8Qnx9" 1256 | }, 1257 | "source": [ 1258 | "🛑 Stop Here 🛑\n", 1259 | "\n", 1260 | "This Ends Lab 2-2\n", 1261 | "
\n" 1262 | ] 1263 | } 1264 | ], 1265 | "metadata": { 1266 | "colab": { 1267 | "provenance": [] 1268 | }, 1269 | "kernelspec": { 1270 | "display_name": "Python 3", 1271 | "name": "python3" 1272 | }, 1273 | "language_info": { 1274 | "codemirror_mode": { 1275 | "name": "ipython", 1276 | "version": 3 1277 | }, 1278 | "file_extension": ".py", 1279 | "mimetype": "text/x-python", 1280 | "name": "python", 1281 | "nbconvert_exporter": "python", 1282 | "pygments_lexer": "ipython3", 1283 | "version": "3.10.13" 1284 | } 1285 | }, 1286 | "nbformat": 4, 1287 | "nbformat_minor": 0 1288 | } 1289 | -------------------------------------------------------------------------------- /notebooks/genai_colab_lab_3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "45iKqhTIUkjf" 7 | }, 8 | "source": [ 9 | "# Setup Environment\n", 10 | "The following code loads the environment variables required to run this notebook.\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "id": "SO7q2oWEURVE" 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "FILE=\"GenAI Lab 3\"\n", 22 | "\n", 23 | "# ! pip install -qqq git+https://github.com/elastic/notebook-workshop-loader.git@main\n", 24 | "from notebookworkshoploader import loader\n", 25 | "import os\n", 26 | "from dotenv import load_dotenv\n", 27 | "\n", 28 | "if os.path.isfile(\"../env\"):\n", 29 | " load_dotenv(\"../env\", override=True)\n", 30 | " print('Successfully loaded environment variables from local env file')\n", 31 | "else:\n", 32 | " loader.load_remote_env(file=FILE, env_url=\"https://notebook-workshop-api-voldmqr2bq-uc.a.run.app\")" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "id": "y2eoZ4hCUud2" 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "# ! pip install -qqq langchain==0.1.3 sentence-transformers==2.2.2 beautifulsoup4==4.11.2\n", 44 | "# ! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9\n", 45 | "# ! pip install -qqq matplotlib==3.8.1 scikit-learn==1.2.2 scipy==1.11.4\n", 46 | "# ! pip install -qqq elasticsearch==8.12.0 inquirer==3.2.1\n", 47 | "! echo \"github codespaces has pre-installed these libraries\"\n", 48 | "\n", 49 | "from sentence_transformers import SentenceTransformer\n", 50 | "from langchain_community.embeddings import HuggingFaceEmbeddings" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "id": "kpsGUDWAX42B" 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "import os\n", 62 | "from elasticsearch import Elasticsearch\n", 63 | "if 'ELASTIC_CLOUD_ID' in os.environ:\n", 64 | " es = Elasticsearch(\n", 65 | " cloud_id=os.environ['ELASTIC_CLOUD_ID'],\n", 66 | " api_key=(os.environ['ELASTIC_APIKEY_ID'], os.environ['ELASTIC_APIKEY_SECRET']),\n", 67 | " request_timeout=30\n", 68 | " )\n", 69 | "elif 'ELASTIC_URL' in os.environ:\n", 70 | " es = Elasticsearch(\n", 71 | " os.environ['ELASTIC_URL'],\n", 72 | " api_key=(os.environ['ELASTIC_APIKEY_ID'], os.environ['ELASTIC_APIKEY_SECRET']),\n", 73 | " request_timeout=30\n", 74 | " )\n", 75 | "else:\n", 76 | " print(\"env needs to set either ELASTIC_CLOUD_ID or ELASTIC_URL\")" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "id": "59sPX8_HETbL" 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "import os, secrets, requests\n", 88 | "import openai\n", 89 | "from openai import OpenAI\n", 90 | "from requests.auth import HTTPBasicAuth\n", 91 | "\n", 92 | "#if using the Elastic AI proxy, then generate the correct API key\n", 93 | "if os.environ['ELASTIC_PROXY'] == \"True\":\n", 94 | "\n", 95 | " if \"OPENAI_API_TYPE\" in os.environ: del os.environ[\"OPENAI_API_TYPE\"]\n", 96 | "\n", 97 | " #generate and share \"your\" unique hash\n", 98 | " os.environ['USER_HASH'] = secrets.token_hex(nbytes=6)\n", 99 | " print(f\"Your unique user hash is: {os.environ['USER_HASH']}\")\n", 100 | "\n", 101 | " #get the current API key and combine with your hash\n", 102 | " os.environ['OPENAI_API_KEY'] = f\"{os.environ['OPENAI_API_KEY']} {os.environ['USER_HASH']}\"\n", 103 | "else:\n", 104 | " openai.api_type = os.environ['OPENAI_API_TYPE']\n", 105 | " openai.api_version = os.environ['OPENAI_API_VERSION']\n", 106 | "\n", 107 | "openai.api_key = os.environ['OPENAI_API_KEY']\n", 108 | "openai.api_base = os.environ['OPENAI_API_BASE']\n", 109 | "openai.default_model = os.environ['OPENAI_API_ENGINE']" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "# Lab 3-1" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": { 122 | "id": "iWXrus6La0wF" 123 | }, 124 | "source": [ 125 | "## Step 1 : Simple vectorization using a Vector Embedding model installed to Elasticsearch\n", 126 | "[Model Card - msmarco-MiniLM-L-12-v3](https://huggingface.co/sentence-transformers/msmarco-MiniLM-L-12-v3). - note this model has a 512 token limit" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "id": "Sqo3T-LlY_Go" 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "es_model_id = 'sentence-transformers__msmarco-minilm-l-12-v3'\n", 138 | "\n", 139 | "## use REST call to Elastic to generate Vector Embedding, assumes model is already installed\n", 140 | "def sentence_to_vector_es(chunk, es_model_id=es_model_id):\n", 141 | " docs = [{\"text_field\": chunk}]\n", 142 | " chunk_vector = es.ml.infer_trained_model(model_id=es_model_id, docs=docs, )\n", 143 | " return chunk_vector['inference_results'][0]['predicted_value']\n", 144 | "\n", 145 | "\n", 146 | "chunk = \"The quick brown fox jumped over the lazy dog\"\n", 147 | "es_generated_vector = sentence_to_vector_es(chunk)\n", 148 | "print(f\"Dimensions: {len(es_generated_vector)}, \\nVector preview: {es_generated_vector[:5]+ ['...']}\")" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": { 154 | "id": "El7UrXMcgnns" 155 | }, 156 | "source": [ 157 | "## Step 2: Vectoring Data using a local E5 model and Sentence Transformer\n", 158 | "\n", 159 | "[Model card E5-large-v2](https://huggingface.co/intfloat/e5-large-v2)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "id": "q6a7y24zeBnU" 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "# from sentence_transformers import SentenceTransformer\n", 171 | "e5_model = SentenceTransformer('intfloat/e5-large-v2')\n", 172 | "input_texts = [\n", 173 | " 'query: how much protein should a female human eat',\n", 174 | " 'query: summit define',\n", 175 | " \"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.\",\n", 176 | " \"passage: Definition of summit for English Language Learners. : 1 the highest point of a mountain : the top of a mountain. : 2 the highest level. : 3 a meeting or series of meetings between the leaders of two or more governments.\"\n", 177 | "]\n", 178 | "embeddings = e5_model.encode(input_texts, normalize_embeddings=True)\n", 179 | "close=\" ...]\"\n", 180 | "print(f\"Dimensions: {len(embeddings[0])}, \\nVector preview: {str(embeddings[0][:5])[:-1]+close}\")" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": { 186 | "id": "hVZMHggOg4Vt" 187 | }, 188 | "source": [ 189 | "## Step 3: Doing the same thing but with the LangChain Utility libraries" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "id": "x14UmhoMe6J4" 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "# from langchain_community.embeddings import HuggingFaceEmbeddings\n", 201 | "langchain_e5_embeddings = HuggingFaceEmbeddings(model_name=\"intfloat/e5-large-v2\")\n", 202 | "input_texts = [\n", 203 | " 'query: how much protein should a female human eat',\n", 204 | " 'query: summit define',\n", 205 | " \"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.\",\n", 206 | " \"passage: Definition of summit for English Language Learners. : 1 the highest point of a mountain : the top of a mountain. : 2 the highest level. : 3 a meeting or series of meetings between the leaders of two or more governments.\"\n", 207 | "]\n", 208 | "embeddings = langchain_e5_embeddings.embed_documents(input_texts)\n", 209 | "close=\", ...]\"\n", 210 | "print(f\"Dimensions: {len(embeddings[0])}, \\nVector preview: {str(embeddings[0][:5])[:-1]+close}\")" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": { 216 | "id": "fIBODndAhoH1" 217 | }, 218 | "source": [ 219 | "## Step 4: Let's create a simplified graph of generated Embeddings\n", 220 | "\n", 221 | "Principal Component analysis can be used to simplify higher dimesions into a 2d plot.\n" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": { 228 | "id": "_J1dTm5OiCuY" 229 | }, 230 | "outputs": [], 231 | "source": [ 232 | "# fetch the model and load it\n", 233 | "word_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')\n", 234 | "print(\"Model dimensions:\", word_model.get_sentence_embedding_dimension())\n", 235 | "\n", 236 | "# generate embeddings\n", 237 | "embeddings_for_cat = word_model.encode(\"cat\")\n", 238 | "embeddings_for_kitten = word_model.encode(\"kitten\")\n", 239 | "embeddings_for_dog = word_model.encode(\"dog\")\n", 240 | "embeddings_for_puppy = word_model.encode(\"puppy\")\n", 241 | "embeddings_for_lawnmower = word_model.encode(\"lawnmower\")\n", 242 | "\n", 243 | "# let's see what we got, though truncate the embeddings to just the first 5 dimensions\n", 244 | "print(f\"embedding dimensions: {embeddings_for_cat.size}\")\n", 245 | "print(f\"cat: {list(embeddings_for_cat)[:5] + ['...']}\")\n", 246 | "print(f\"dog: {list(embeddings_for_dog)[:5] + ['...']}\")" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": { 253 | "id": "edsUVexdhsKD" 254 | }, 255 | "outputs": [], 256 | "source": [ 257 | "import pandas as pd\n", 258 | "import numpy as np\n", 259 | "import altair as alt\n", 260 | "from sklearn.decomposition import PCA\n", 261 | "\n", 262 | "# wrap embeddings with a DataFrame\n", 263 | "df = pd.DataFrame(\n", 264 | " [\n", 265 | " [embeddings_for_cat],\n", 266 | " [embeddings_for_kitten],\n", 267 | " [embeddings_for_dog],\n", 268 | " [embeddings_for_puppy],\n", 269 | " [embeddings_for_lawnmower],\n", 270 | " ],\n", 271 | " index=[\"cat\", \"kitten\", \"dog\", \"puppy\", \"lawnmower\"], columns=[\"embeddings\"]\n", 272 | ")\n", 273 | "\n", 274 | "# Initialize the PCA reducer to convert embeddings into arrays of length of 2\n", 275 | "reducer = PCA(n_components=2)\n", 276 | "\n", 277 | "# Reduce the embeddings, store them in a new dataframe column and display their shape\n", 278 | "df[\"reduced\"] = reducer.fit_transform(np.stack(df[\"embeddings\"])).tolist()\n", 279 | "\n", 280 | "\n", 281 | "def scatterplot(\n", 282 | " data: pd.DataFrame,\n", 283 | " tooltips=False,\n", 284 | " labels=False,\n", 285 | " width=800,\n", 286 | " height=600,\n", 287 | ") -> alt.Chart:\n", 288 | " base_chart = (\n", 289 | " alt.Chart(data)\n", 290 | " .encode(\n", 291 | " alt.X(\"x\", scale=alt.Scale(zero=False)),\n", 292 | " alt.Y(\"y\", scale=alt.Scale(zero=False)),\n", 293 | " )\n", 294 | " .properties(width=width, height=height)\n", 295 | " )\n", 296 | "\n", 297 | " if tooltips:\n", 298 | " base_chart = base_chart.encode(alt.Tooltip([\"text\"]))\n", 299 | "\n", 300 | " circles = base_chart.mark_circle(\n", 301 | " size=200, color=\"crimson\", stroke=\"white\", strokeWidth=1\n", 302 | " )\n", 303 | "\n", 304 | " if labels:\n", 305 | " labels = base_chart.mark_text(\n", 306 | " fontSize=13,\n", 307 | " align=\"left\",\n", 308 | " baseline=\"bottom\",\n", 309 | " dx=5,\n", 310 | " ).encode(text=\"text\")\n", 311 | " chart = circles + labels\n", 312 | " else:\n", 313 | " chart = circles\n", 314 | "\n", 315 | " return chart\n", 316 | "\n", 317 | "source = pd.DataFrame(\n", 318 | " {\n", 319 | " \"text\": df.index,\n", 320 | " \"x\": df[\"reduced\"].apply(lambda x: x[0]).to_list(),\n", 321 | " \"y\": df[\"reduced\"].apply(lambda x: x[1]).to_list(),\n", 322 | " }\n", 323 | ")\n", 324 | "\n", 325 | "scatterplot(source, labels=True, width=400, height=300)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": { 331 | "id": "yLrQl_eKnbjg" 332 | }, 333 | "source": [ 334 | "## Step 5 - using the more advanced e5 model, see that questions can be matched with answers" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": { 341 | "id": "GXkl-MA7kyQw" 342 | }, 343 | "outputs": [], 344 | "source": [ 345 | "## using e5_model previously loaded\n", 346 | "input_texts = [\n", 347 | " 'query: how much protein should a female human eat',\n", 348 | " 'query: summit define',\n", 349 | " \"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.\",\n", 350 | " \"passage: Definition of summit for English Language Learners. : 1 the highest point of a mountain : the top of a mountain. : 2 the highest level. : 3 a meeting or series of meetings between the leaders of two or more governments.\",\n", 351 | " \"passage: I am the very model of a modern Major-General / I've information vegetable, animal, and mineral / I know the kings of England, and I quote the fights historical / From Marathon to Waterloo, in order categorical / I'm very well acquainted, too, with matters mathematical\",\n", 352 | " \"passage: When, in the course of human events, it becomes necessary for one people to dissolve the political bands which have connected them with another, and to assume, among the powers of the earth\",\n", 353 | " \"passage: It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.\",\n", 354 | "]\n", 355 | "embeddings = e5_model.encode(input_texts, normalize_embeddings=True)\n", 356 | "\n", 357 | "\n", 358 | "# let's see what we got, though truncate the embeddings to just the first 5 dimensions\n", 359 | "print(f\"embedding dimensions: {embeddings[0].size}\")\n", 360 | "print(f\"first query: {list(embeddings[0])[:3] + ['...']}\")\n", 361 | "\n", 362 | "\n", 363 | "# wrap embeddings with a DataFrame\n", 364 | "df = pd.DataFrame(\n", 365 | " [\n", 366 | " [embeddings[0]],\n", 367 | " [embeddings[1]],\n", 368 | " [embeddings[2]],\n", 369 | " [embeddings[3]],\n", 370 | " [embeddings[4]],\n", 371 | " [embeddings[5]],\n", 372 | " [embeddings[6]],\n", 373 | " ],\n", 374 | " index=[\n", 375 | " \"q: protein\",\n", 376 | " \"q: summit\",\n", 377 | " \"p: protein guide\",\n", 378 | " \"p: summit def\",\n", 379 | " \"p: penzanse\",\n", 380 | " \"p: dec of ind\",\n", 381 | " \"p: austen\"\n", 382 | " ], columns=[\"embeddings\"]\n", 383 | ")\n", 384 | "\n", 385 | "# Initialize the PCA reducer to convert embeddings into arrays of length of 2\n", 386 | "reducer = PCA(n_components=2)\n", 387 | "\n", 388 | "# Reduce the embeddings, store them in a new dataframe column and display their shape\n", 389 | "df[\"reduced\"] = reducer.fit_transform(np.stack(df[\"embeddings\"])).tolist()\n", 390 | "\n", 391 | "source = pd.DataFrame(\n", 392 | " {\n", 393 | " \"text\": df.index,\n", 394 | " \"x\": df[\"reduced\"].apply(lambda x: x[0]).to_list(),\n", 395 | " \"y\": df[\"reduced\"].apply(lambda x: x[1]).to_list(),\n", 396 | " }\n", 397 | ")\n", 398 | "\n", 399 | "scatterplot(source, labels=True, width=400, height=300)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": { 405 | "id": "738GN3qE2lM8" 406 | }, 407 | "source": [ 408 | "## Step 6 : calculate the actual distance in 1024 dimensional space" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": { 415 | "id": "QKAT1UBAzZfq" 416 | }, 417 | "outputs": [], 418 | "source": [ 419 | "from scipy.spatial import distance\n", 420 | "\n", 421 | "passages = [\n", 422 | " \"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.\",\n", 423 | " \"passage: Definition of summit for English Language Learners. : 1 the highest point of a mountain : the top of a mountain. : 2 the highest level. : 3 a meeting or series of meetings between the leaders of two or more governments.\",\n", 424 | " \"passage: I am the very model of a modern Major-General / I've information vegetable, animal, and mineral / I know the kings of England, and I quote the fights historical / From Marathon to Waterloo, in order categorical / I'm very well acquainted, too, with matters mathematical\",\n", 425 | " \"passage: When, in the course of human events, it becomes necessary for one people to dissolve the political bands which have connected them with another, and to assume, among the powers of the earth\",\n", 426 | " \"passage: It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.\",\n", 427 | "]\n", 428 | "\n", 429 | "def chunks_by_distance(passages, query_text, model):\n", 430 | " embeddings = model.encode(passages, normalize_embeddings=True)\n", 431 | " query_embedding = model.encode(query_text, normalize_embeddings=True)\n", 432 | " distances = []\n", 433 | " for index, passage in enumerate(passages):\n", 434 | " cos_distance = distance.cosine(embeddings[index], query_embedding)\n", 435 | " distances.append((passage, cos_distance))\n", 436 | " sorted_passages = sorted(distances, key=lambda x: x[1])\n", 437 | "\n", 438 | " return sorted_passages\n", 439 | "\n", 440 | "protein_query = 'query: how much protein should a female human eat'\n", 441 | "sorted_passages = chunks_by_distance(passages, protein_query, e5_model)\n", 442 | "\n", 443 | "for passage, dist in sorted_passages:\n", 444 | " print(f\"{passage[:40]} - Cosine distance {dist:.12f}\")\n" 445 | ] 446 | }, 447 | { 448 | "cell_type": "markdown", 449 | "metadata": { 450 | "id": "fTRQlTcO_jC4" 451 | }, 452 | "source": [ 453 | "## OKAY let's work with an actual large document" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": { 460 | "id": "oxo004rhYcDH" 461 | }, 462 | "outputs": [], 463 | "source": [ 464 | "wikipedia_spacecraft = [\n", 465 | "{\n", 466 | " \"id\": \"37910\",\n", 467 | " \"title\": \"Spacecraft\",\n", 468 | " \"text\": \"A spacecraft is a vehicle that is designed to fly in outer space. A type of artificial satellite, spacecraft are used for a variety of purposes, including communications, Earth observation, meteorology, navigation, space colonization, planetary exploration, and transportation of humans and cargo. All spacecraft except single-stage-to-orbit vehicles cannot get into space on their own, and require a launch vehicle (carrier rocket). On a sub-orbital spaceflight, a space vehicle enters space and then returns to the surface without having gained sufficient energy or velocity to make a full Earth orbit. For orbital spaceflights, spacecraft enter closed orbits around the Earth or around other celestial bodies. Spacecraft used for human spaceflight carry people on board as crew or passengers from start or on orbit (space stations) only, whereas those used for robotic space missions operate either autonomously or telerobotically. Robotic spacecraft used to support scientific research are space probes. Robotic spacecraft that remain in orbit around a planetary body are artificial satellites. To date, only a handful of interstellar probes, such as Pioneer 10 and 11, Voyager 1 and 2, and New Horizons, are on trajectories that leave the Solar System. Orbital spacecraft may be recoverable or not. Most are not. Recoverable spacecraft may be subdivided by a method of reentry to Earth into non-winged space capsules and winged spaceplanes. Recoverable spacecraft may be reusable (can be launched again or several times, like the SpaceX Dragon and the Space Shuttle orbiters) or expendable (like the Soyuz). In recent years, more space agencies are tending towards reusable spacecraft. Humanity has achieved space flight, but only a few nations have the technology for orbital launches: Russia (RSA or \\\"Roscosmos\\\"), the United States (NASA), the member states of the European Space Agency (ESA), Japan (JAXA), China (CNSA), India (ISRO), Taiwan National Chung-Shan Institute of Science and Technology, Taiwan National Space Organization (NSPO), Israel (ISA), Iran (ISA), and North Korea (NADA). In addition, several private companies have developed or are developing the technology for orbital launches independently from government agencies. The most prominent examples of such companies are SpaceX and Blue Origin. ==History== A German V-2 became the first spacecraft when it reached an altitude of 189 km in June 1944 in Peenemünde, Germany.Peenemünde (Dokumentation) Berlin: Moewig, 1984.. Sputnik 1 was the first artificial satellite. It was launched into an elliptical low Earth orbit (LEO) by the Soviet Union on 4 October 1957. The launch ushered in new political, military, technological, and scientific developments; while the Sputnik launch was a single event, it marked the start of the Space Age.Dougall, Walter A. (Winter 2010) \\\"Shooting the duck\\\", American Heritage Apart from its value as a technological first, Sputnik 1 also helped to identify the upper atmospheric layer's density, by measuring the satellite's orbital changes. It also provided data on radio-signal distribution in the ionosphere. Pressurized nitrogen in the satellite's false body provided the first opportunity for meteoroid detection. Sputnik 1 was launched during the International Geophysical Year from Site No.1/5, at the 5th Tyuratam range, in Kazakh SSR (now at the Baikonur Cosmodrome). The satellite travelled at , taking 96.2 minutes to complete an orbit, and emitted radio signals at 20.005 and 40.002 MHz While Sputnik 1 was the first spacecraft to orbit the Earth, other human- made objects had previously reached an altitude of 100 km, which is the height required by the international organization Fédération Aéronautique Internationale to count as a spaceflight. This altitude is called the Kármán line. In particular, in the 1940s there were several test launches of the V-2 rocket, some of which reached altitudes well over 100 km. ==Spacecraft types== ===Crewed spacecraft=== thumb|Apollo 17 command module in Lunar orbit As of 2016, only three nations have flown crewed spacecraft: USSR/Russia, USA, and China. The first crewed spacecraft was Vostok 1, which carried Soviet cosmonaut Yuri Gagarin into space in 1961, and completed a full Earth orbit. There were five other crewed missions which used a Vostok spacecraft. The second crewed spacecraft was named Freedom 7, and it performed a sub-orbital spaceflight in 1961 carrying American astronaut Alan Shepard to an altitude of just over . There were five other crewed missions using Mercury spacecraft. Other Soviet crewed spacecraft include the Voskhod, Soyuz, flown uncrewed as Zond/L1, L3, TKS, and the Salyut and Mir crewed space stations. Other American crewed spacecraft include the Gemini spacecraft, the Apollo spacecraft including the Apollo Lunar Module, the Skylab space station, the Space Shuttle with undetached European Spacelab and private US Spacehab space stations- modules, and the SpaceX Crew Dragon configuration of their Dragon 2. US company Boeing also developed and flown a spacecraft of their own, the CST-100, commonly referred to as Starliner, but a crewed flight is yet to occur. China developed, but did not fly Shuguang, and is currently using Shenzhou (its first crewed mission was in 2003). Except for the Space Shuttle, all of the recoverable crewed orbital spacecraft were space capsules. File:NASA spacecraft comparison.jpg|alt=Drawings of Mercury, Gemini capsules and Apollo spacecraft, with their launch vehicles|American Mercury, Gemini, and Apollo spacecraft File:Vostok Spacecraft Diagram.svg|Soviet Vostok capsule File:Voskhod 1 and 2.svg|alt=Line drawing of Voskhod capsules|Soviet Voskhod (variant of Vostok) File:Soyuz 7K-OK(A) drawing.svg|alt=Soyuz 7K-OK(A) drawing|1967 Soviet/Russian Soyuz spacecraft File:Post S-7 Shenzhou spacecraft.png|alt=Drawing of Shenzhou spacecraft|Chinese Shenzhou spacecraft The International Space Station, crewed since November 2000, is a joint venture between Russia, the United States, Canada and several other countries. ====Spaceplanes==== thumb|Columbia orbiter landing Spaceplanes are spacecraft that are built in the shape of, and function as, airplanes. The first example of such was the North American X-15 spaceplane, which conducted two crewed flights which reached an altitude of over 100 km in the 1960s. This first reusable spacecraft was air-launched on a suborbital trajectory on July 19, 1963. The first partially reusable orbital spacecraft, a winged non-capsule, the Space Shuttle, was launched by the USA on the 20th anniversary of Yuri Gagarin's flight, on April 12, 1981. During the Shuttle era, six orbiters were built, all of which have flown in the atmosphere and five of which have flown in space. Enterprise was used only for approach and landing tests, launching from the back of a Boeing 747 SCA and gliding to deadstick landings at Edwards AFB, California. The first Space Shuttle to fly into space was Columbia, followed by Challenger, Discovery, Atlantis, and Endeavour. Endeavour was built to replace Challenger when it was lost in January 1986. Columbia broke up during reentry in February 2003. The first automatic partially reusable spacecraft was the Buran-class shuttle, launched by the USSR on November 15, 1988, although it made only one flight and this was uncrewed. This spaceplane was designed for a crew and strongly resembled the U.S. Space Shuttle, although its drop-off boosters used liquid propellants and its main engines were located at the base of what would be the external tank in the American Shuttle. Lack of funding, complicated by the dissolution of the USSR, prevented any further flights of Buran. The Space Shuttle was subsequently modified to allow for autonomous re-entry in case of necessity. Per the Vision for Space Exploration, the Space Shuttle was retired in 2011 mainly due to its old age and high cost of program reaching over a billion dollars per flight. The Shuttle's human transport role is to be replaced by SpaceX's SpaceX Dragon 2 and Boeing's CST-100 Starliner. Dragon 2's first crewed flight occurred on May 30, 2020. The Shuttle's heavy cargo transport role is to be replaced by expendable rockets such as the Space Launch System and ULA's Vulcan rocket, as well as the commercial launch vehicles. Scaled Composites' SpaceShipOne was a reusable suborbital spaceplane that carried pilots Mike Melvill and Brian Binnie on consecutive flights in 2004 to win the Ansari X Prize. The Spaceship Company will build its successor SpaceShipTwo. A fleet of SpaceShipTwos operated by Virgin Galactic was planned to begin reusable private spaceflight carrying paying passengers in 2014, but was delayed after the crash of VSS Enterprise. ===Uncrewed spacecraft=== Uncrewed spacecraft are spacecraft without people on board. Uncrewed spacecraft may have varying levels of autonomy from human input; they may be remote controlled, remote guided or even autonomous, meaning they have a pre-programmed list of operations, which they will execute unless otherwise instructed. Many space missions are more suited to telerobotic rather than crewed operation, due to lower cost and lower risk factors. In addition, some planetary destinations such as Venus or the vicinity of Jupiter are too hostile for human survival. Outer planets such as Saturn, Uranus, and Neptune are too distant to reach with current crewed spaceflight technology, so telerobotic probes are the only way to explore them. Telerobotics also allows exploration of regions that are vulnerable to contamination by Earth micro-organisms since spacecraft can be sterilized. Humans can not be sterilized in the same way as a spaceship, as they coexist with numerous micro-organisms, and these micro-organisms are also hard to contain within a spaceship or spacesuit. Multiple space probes were sent to study Moon, the planets, the Sun, multiple small Solar System bodies (comets and asteroids). Special class of uncrewed spacecraft is space telescopes, a telescope in outer space used to observe astronomical objects. The first operational telescopes were the American Orbiting Astronomical Observatory, OAO-2 launched in 1968, and the Soviet Orion 1 ultraviolet telescope aboard space station Salyut 1 in 1971. Space telescopes avoid the filtering and distortion (scintillation) of electromagnetic radiation which they observe, and avoid light pollution which ground-based observatories encounter. The best-known examples are Hubble Space Telescope and James Webb Space Telescope. Cargo spacecraft are designed to carry cargo, possibly to support space stations' operation by transporting food, propellant and other supplies. Automated cargo spacecraft have been used since 1978 and have serviced Salyut 6, Salyut 7, Mir, the International Space Station and Tiangong space station. ====Fastest spacecraft==== *Parker Solar Probe (estimated at first sun close pass, will reach at final perihelion) *Helios I and II Solar Probes () ==== Furthest spacecraft from the Sun ==== * Voyager 1 at 156.13 AU as of April 2022, traveling outward at about * Pioneer 10 at 122.48 AU as of December 2018, traveling outward at about *Voyager 2 at 122.82 AU as of January 2020, traveling outward at about *Pioneer 11 at 101.17 AU as of December 2018, traveling outward at about ==Subsystems== A spacecraft astrionics system comprises different subsystems, depending on the mission profile. Spacecraft subsystems comprise the spacecraft's bus and may include attitude determination and control (variously called ADAC, ADC, or ACS), guidance, navigation and control (GNC or GN&C;), communications (comms), command and data handling (CDH or C&DH;), power (EPS), thermal control (TCS), propulsion, and structures. Attached to the bus are typically payloads. ; Life support : Spacecraft intended for human spaceflight must also include a life support system for the crew. ; Attitude control : A Spacecraft needs an attitude control subsystem to be correctly oriented in space and respond to external torques and forces properly. The attitude control subsystem consists of sensors and actuators, together with controlling algorithms. The attitude- control subsystem permits proper pointing for the science objective, sun pointing for power to the solar arrays and earth pointing for communications. ; GNC : Guidance refers to the calculation of the commands (usually done by the CDH subsystem) needed to steer the spacecraft where it is desired to be. Navigation means determining a spacecraft's orbital elements or position. Control means adjusting the path of the spacecraft to meet mission requirements. ; Command and data handling : The C&DH; subsystem receives commands from the communications subsystem, performs validation and decoding of the commands, and distributes the commands to the appropriate spacecraft subsystems and components. The CDH also receives housekeeping data and science data from the other spacecraft subsystems and components, and packages the data for storage on a data recorder or transmission to the ground via the communications subsystem. Other functions of the CDH include maintaining the spacecraft clock and state-of-health monitoring. ; Communications : Spacecraft, both robotic and crewed, have various communications systems for communication with terrestrial stations and for inter-satellite service. Technologies include space radio station and optical communication. In addition, some spacecraft payloads are explicitly for the purpose of ground–ground communication using receiver/retransmitter electronic technologies. ; Power : Spacecraft need an electrical power generation and distribution subsystem for powering the various spacecraft subsystems. For spacecraft near the Sun, solar panels are frequently used to generate electrical power. Spacecraft designed to operate in more distant locations, for example Jupiter, might employ a radioisotope thermoelectric generator (RTG) to generate electrical power. Electrical power is sent through power conditioning equipment before it passes through a power distribution unit over an electrical bus to other spacecraft components. Batteries are typically connected to the bus via a battery charge regulator, and the batteries are used to provide electrical power during periods when primary power is not available, for example when a low Earth orbit spacecraft is eclipsed by Earth. ; Thermal control : Spacecraft must be engineered to withstand transit through Earth's atmosphere and the space environment. They must operate in a vacuum with temperatures potentially ranging across hundreds of degrees Celsius as well as (if subject to reentry) in the presence of plasmas. Material requirements are such that either high melting temperature, low density materials such as beryllium and reinforced carbon–carbon or (possibly due to the lower thickness requirements despite its high density) tungsten or ablative carbon–carbon composites are used. Depending on mission profile, spacecraft may also need to operate on the surface of another planetary body. The thermal control subsystem can be passive, dependent on the selection of materials with specific radiative properties. Active thermal control makes use of electrical heaters and certain actuators such as louvers to control temperature ranges of equipments within specific ranges. ; Spacecraft propulsion : Spacecraft may or may not have a propulsion subsystem, depending on whether or not the mission profile calls for propulsion. The Swift spacecraft is an example of a spacecraft that does not have a propulsion subsystem. Typically though, LEO spacecraft include a propulsion subsystem for altitude adjustments (drag make-up maneuvers) and inclination adjustment maneuvers. A propulsion system is also needed for spacecraft that perform momentum management maneuvers. Components of a conventional propulsion subsystem include fuel, tankage, valves, pipes, and thrusters. The thermal control system interfaces with the propulsion subsystem by monitoring the temperature of those components, and by preheating tanks and thrusters in preparation for a spacecraft maneuver. ; Structures : Spacecraft must be engineered to withstand launch loads imparted by the launch vehicle, and must have a point of attachment for all the other subsystems. Depending on mission profile, the structural subsystem might need to withstand loads imparted by entry into the atmosphere of another planetary body, and landing on the surface of another planetary body. ; Payload : The payload depends on the mission of the spacecraft, and is typically regarded as the part of the spacecraft \\\"that pays the bills\\\". Typical payloads could include scientific instruments (cameras, telescopes, or particle detectors, for example), cargo, or a human crew. ; Ground segment : The ground segment, though not technically part of the spacecraft, is vital to the operation of the spacecraft. Typical components of a ground segment in use during normal operations include a mission operations facility where the flight operations team conducts the operations of the spacecraft, a data processing and storage facility, ground stations to radiate signals to and receive signals from the spacecraft, and a voice and data communications network to connect all mission elements. ; Launch vehicle : The launch vehicle propels the spacecraft from Earth's surface, through the atmosphere, and into an orbit, the exact orbit being dependent on the mission configuration. The launch vehicle may be expendable or reusable. ==See also== *Astrionics *Commercial astronaut *Flying saucer *List of crewed spacecraft *List of fictional spacecraft *NewSpace *Spacecraft design *Space exploration *Space launch *Spaceships in science fiction *Space suit *Spaceflight records *Starship *Timeline of Solar System exploration *U.S. Space Exploration History on U.S. Stamps == References == === Citations === === Sources === * * ==External links== *NASA: Space Science Spacecraft Missions *NSSDC Master Catalog Spacecraft Query Form *Early History of Spacecraft *Basics of Spaceflight tutorial from JPL/Caltech *International Spaceflight Museum Category:Astronautics Category:Pressure vessels\",\n", 469 | " \"categories\": [\n", 470 | " \"Astronautics\",\n", 471 | " \"Pressure vessels\"\n", 472 | " ]\n", 473 | "},\n", 474 | " ]" 475 | ] 476 | }, 477 | { 478 | "cell_type": "markdown", 479 | "metadata": { 480 | "id": "0dwj-z5z7tnu" 481 | }, 482 | "source": [ 483 | "## Step 7: Truncation is a problem for long texts\n", 484 | "\n", 485 | "The semantic relevance will be low because most of the text is ignored in the vector computation." 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": null, 491 | "metadata": { 492 | "id": "xUYUyZ4G3-ns" 493 | }, 494 | "outputs": [], 495 | "source": [ 496 | "text = wikipedia_spacecraft[0][\"text\"]\n", 497 | "embeddings = e5_model.encode(text, normalize_embeddings=True)\n", 498 | "\n", 499 | "tokenized_text = e5_model.tokenizer(text)[\"input_ids\"]\n", 500 | "model_max_seq_length = e5_model.get_max_seq_length()\n", 501 | "text_token_count = len(tokenized_text)\n", 502 | "\n", 503 | "print(f\"text tokens {text_token_count} | model max sequence length {model_max_seq_length}\")\n", 504 | "\n", 505 | "if text_token_count > model_max_seq_length:\n", 506 | " print(f\"❗❗ The text will be truncated.❗❗\")\n", 507 | "else:\n", 508 | " print(f\"The text will not be truncated.\")" 509 | ] 510 | }, 511 | { 512 | "cell_type": "markdown", 513 | "metadata": { 514 | "id": "g0e_QS-i8rkm" 515 | }, 516 | "source": [ 517 | "## Step 8: Visualizing Chunking Strategies\n", 518 | "\n", 519 | "First some utility libraries" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": null, 525 | "metadata": { 526 | "id": "OSMKdU-d8yzw" 527 | }, 528 | "outputs": [], 529 | "source": [ 530 | "# Import Libraries\n", 531 | "import os\n", 532 | "import json\n", 533 | "import textwrap\n", 534 | "from pprint import pprint\n", 535 | "from bs4 import BeautifulSoup\n", 536 | "from IPython.display import HTML\n", 537 | "#from elasticsearch import Elasticsearch, helpers\n", 538 | "from langchain.text_splitter import RecursiveCharacterTextSplitter, \\\n", 539 | " SentenceTransformersTokenTextSplitter, \\\n", 540 | " CharacterTextSplitter, \\\n", 541 | " TextSplitter\n", 542 | "\n", 543 | "## Process splitting and display\n", 544 | "def split_and_print(documents, splitter, ret=False):\n", 545 | " es_docs = []\n", 546 | " for doc in documents:\n", 547 | " passages = []\n", 548 | "\n", 549 | " for chunk in splitter.split_text(doc['text']):\n", 550 | " passages.append({\n", 551 | " \"text\": chunk,\n", 552 | " })\n", 553 | " es_docs.append(passages)\n", 554 | "\n", 555 | " print(f'Number of chunks: {len(passages)}' + '\\n')\n", 556 | " display(HTML(process_chunks(passages)))\n", 557 | " if ret:\n", 558 | " return passages\n", 559 | " else:\n", 560 | " return False\n", 561 | "\n", 562 | "\n", 563 | "## Character Splitter\n", 564 | "def split_by_recursive_char(documents,\n", 565 | " chunk_size: int = 200,\n", 566 | " chunk_overlap: int = 0\n", 567 | " ):\n", 568 | " '''Chunking by character count'''\n", 569 | "\n", 570 | " text_splitter = RecursiveCharacterTextSplitter(\n", 571 | " chunk_size=chunk_size,\n", 572 | " chunk_overlap=chunk_overlap,\n", 573 | " length_function=len,\n", 574 | " is_separator_regex=False,\n", 575 | " )\n", 576 | " split_and_print(documents, text_splitter)\n", 577 | "\n", 578 | "\n", 579 | "def split_by_text(documents,\n", 580 | " chunk_size: int = 200,\n", 581 | " chunk_overlap: int = 0\n", 582 | " ):\n", 583 | " '''Chunking by character count'''\n", 584 | "\n", 585 | " text_splitter = CharacterTextSplitter(\n", 586 | " chunk_size=chunk_size,\n", 587 | " chunk_overlap=chunk_overlap,\n", 588 | " length_function=len,\n", 589 | " is_separator_regex=False,\n", 590 | " )\n", 591 | " r = split_and_print(documents, text_splitter)\n", 592 | "\n", 593 | "\n", 594 | "\n", 595 | "## Token Splitter\n", 596 | "def split_by_token(documents,\n", 597 | " tokens_per_chunk: int = 2,\n", 598 | " chunk_overlap: int = 0,\n", 599 | " ret=False\n", 600 | " ):\n", 601 | " '''Chunking by BERT Transformer Tokens'''\n", 602 | "\n", 603 | " text_splitter = SentenceTransformersTokenTextSplitter(\n", 604 | " tokens_per_chunk=tokens_per_chunk,\n", 605 | " chunk_overlap=chunk_overlap,\n", 606 | " model_name='intfloat/e5-large-v2' # 512 token input limit\n", 607 | " )\n", 608 | " r = split_and_print(documents, text_splitter, ret=ret)\n", 609 | " if ret:\n", 610 | " return r\n", 611 | "\n", 612 | "\n", 613 | "\n", 614 | "\n", 615 | "## Printing and Highlighting functions ##\n", 616 | "\n", 617 | "color_list = [\n", 618 | " \"yellow\",\n", 619 | " \"red\",\n", 620 | " \"lightgreen\",\n", 621 | " \"lightblue\",\n", 622 | " \"lightpink\",\n", 623 | " \"#F0A3FF\", # Vivid orchid\n", 624 | " \"#0075DC\", # Blue ribbon\n", 625 | " \"#2BCE48\", # Slimy green\n", 626 | " \"#FFCC99\", # Peach-orange\n", 627 | " \"#94FFB5\", # Mint green\n", 628 | "\n", 629 | "]\n", 630 | "\n", 631 | "def find_overlap(text1, text2):\n", 632 | " min_len = min(len(text1), len(text2))\n", 633 | " for i in range(min_len, 0, -1):\n", 634 | " if text1[-i:] == text2[:i]:\n", 635 | " return text1[-i:]\n", 636 | " return ''\n", 637 | "\n", 638 | "###################################################################################\n", 639 | "# Highted text -> White\n", 640 | "# Normal text -> Black\n", 641 | "\n", 642 | "### Uncomment these 3 functions if you are running in light mode\n", 643 | "\n", 644 | "# def highlight_first_occurrence(text, substring, color):\n", 645 | "# index = text.find(substring)\n", 646 | "# if index != -1:\n", 647 | "# return (text[:index] +\n", 648 | "# f\"{text[index:index+len(substring)]}\" +\n", 649 | "# text[index+len(substring):])\n", 650 | "# return text\n", 651 | "\n", 652 | "# def highlight_last_occurrence(text, substring, color):\n", 653 | "# index = text.rfind(substring)\n", 654 | "# if index != -1:\n", 655 | "# return (text[:index] +\n", 656 | "# f\"{text[index:index+len(substring)]}\" +\n", 657 | "# text[index+len(substring):])\n", 658 | "# return text\n", 659 | "\n", 660 | "# def process_chunks(chunks, colors=color_list):\n", 661 | "# html_output = \"\"\n", 662 | "# for i in range(len(chunks) - 1):\n", 663 | "# overlap = find_overlap(chunks[i][\"text\"], chunks[i + 1][\"text\"])\n", 664 | "# color = colors[i % len(colors)] # Cycle through the provided colors\n", 665 | "# if overlap:\n", 666 | "# chunks[i][\"text\"] = highlight_last_occurrence(chunks[i][\"text\"], overlap, color)\n", 667 | "# chunks[i + 1][\"text\"] = highlight_first_occurrence(chunks[i + 1][\"text\"], overlap, color)\n", 668 | "# html_output += chunks[i][\"text\"] + \"

\"\n", 669 | "# html_output += chunks[-1][\"text\"] # Add the last chunk\n", 670 | "# return html_output\n", 671 | "\n", 672 | "###################################################################################\n", 673 | "# Highted text -> Black\n", 674 | "# Normal text -> White\n", 675 | "\n", 676 | "### Comment out these 3 functions if running in light modes\n", 677 | "\n", 678 | "def highlight_first_occurrence(text, substring, color):\n", 679 | " index = text.find(substring)\n", 680 | " if index != -1:\n", 681 | " return (text[:index] +\n", 682 | " f\"{text[index:index+len(substring)]}\" +\n", 683 | " text[index+len(substring):])\n", 684 | " return text\n", 685 | "\n", 686 | "def highlight_last_occurrence(text, substring, color):\n", 687 | " index = text.rfind(substring)\n", 688 | " if index != -1:\n", 689 | " return (text[:index] +\n", 690 | " f\"{text[index:index+len(substring)]}\" +\n", 691 | " text[index+len(substring):])\n", 692 | " return text\n", 693 | "\n", 694 | "\n", 695 | "chunk_max_display = 10\n", 696 | "\n", 697 | "def process_chunks(chunks, colors=color_list):\n", 698 | " html_output = \"\"\n", 699 | " for i in range(min(chunk_max_display -1,len(chunks) - 1)):\n", 700 | " overlap = find_overlap(chunks[i][\"text\"], chunks[i + 1][\"text\"])\n", 701 | " color = colors[i % len(colors)] # Cycle through the provided colors\n", 702 | " if overlap:\n", 703 | " chunks[i][\"text\"] = highlight_last_occurrence(chunks[i][\"text\"], overlap, color)\n", 704 | " chunks[i + 1][\"text\"] = highlight_first_occurrence(chunks[i + 1][\"text\"], overlap, color)\n", 705 | " # Wrap each chunk of text in a span with white text color\n", 706 | " html_output += f\"{chunks[i]['text']}

\"\n", 707 | " # Add the last chunk with white text color\n", 708 | " html_output += f\"{chunks[-1]['text']}\"\n", 709 | " html_output += f\"

... additional chunks omitted\"\n", 710 | " return html_output" 711 | ] 712 | }, 713 | { 714 | "cell_type": "markdown", 715 | "metadata": { 716 | "id": "TZbfLoQhRFXy" 717 | }, 718 | "source": [ 719 | "## Step 9: Three Chunking Strategies\n", 720 | "\n", 721 | "[LangChain recursive character text splitter](https://python.langchain.com/docs/modules/data_connection/document_transformers/recursive_text_splitter)\n", 722 | "\n", 723 | "[LangChain splitting by tokens](https://python.langchain.com/docs/modules/data_connection/document_transformers/split_by_token)" 724 | ] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "execution_count": null, 729 | "metadata": { 730 | "id": "b0ClYN7K9NF6" 731 | }, 732 | "outputs": [], 733 | "source": [ 734 | "split_by_recursive_char(wikipedia_spacecraft, chunk_size=1024, chunk_overlap=0)" 735 | ] 736 | }, 737 | { 738 | "cell_type": "code", 739 | "execution_count": null, 740 | "metadata": { 741 | "id": "GXAMcK2I9vop" 742 | }, 743 | "outputs": [], 744 | "source": [ 745 | "split_by_recursive_char(wikipedia_spacecraft, chunk_size=1024, chunk_overlap=50)" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": null, 751 | "metadata": { 752 | "id": "LKliFbbn97ky" 753 | }, 754 | "outputs": [], 755 | "source": [ 756 | "token_c500_o0 = split_by_token(wikipedia_spacecraft, tokens_per_chunk=500, chunk_overlap=0, ret=True)" 757 | ] 758 | }, 759 | { 760 | "cell_type": "code", 761 | "execution_count": null, 762 | "metadata": { 763 | "id": "aAI1bgTR-ZXw" 764 | }, 765 | "outputs": [], 766 | "source": [ 767 | "token_c500_o250 = split_by_token(wikipedia_spacecraft, tokens_per_chunk=500, chunk_overlap=100, ret=True)" 768 | ] 769 | }, 770 | { 771 | "cell_type": "markdown", 772 | "metadata": { 773 | "id": "BUApSzG6SXfK" 774 | }, 775 | "source": [ 776 | "## Step 10: Let's comapare using the whole passage vs the best chunk with ChatGPT" 777 | ] 778 | }, 779 | { 780 | "cell_type": "code", 781 | "execution_count": null, 782 | "metadata": { 783 | "id": "xpjUDLUKEtSi" 784 | }, 785 | "outputs": [], 786 | "source": [ 787 | "the_full_text = wikipedia_spacecraft[0][\"text\"]\n", 788 | "\n", 789 | "question = \"What three countries have flown manned spacecraft?\"\n", 790 | "\n", 791 | "def gen_system_prompt(context):\n", 792 | " return f\"\"\"You are an AI assistant than answers questions based on the provided context.\n", 793 | "Use only the provided context. If the provided context does not have the answer\n", 794 | "reply only with 'I do not know'\n", 795 | "\n", 796 | "Context: {context}\"\"\"\n", 797 | "\n", 798 | "import textwrap\n", 799 | "# wrap text when printing, because colab scrolls output to the right too much\n", 800 | "def wrap_text(text, width):\n", 801 | " wrapped_text = textwrap.wrap(text, width)\n", 802 | " return '\\n'.join(wrapped_text)\n", 803 | "\n", 804 | "def print_light_blue(text):\n", 805 | " print(f'\\033[94m{text}\\033[0m')\n", 806 | "\n", 807 | "def chatCompletion(messages):\n", 808 | "\n", 809 | " client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)\n", 810 | " completion = client.chat.completions.create(\n", 811 | " model=openai.default_model,\n", 812 | " max_tokens=150,\n", 813 | " messages=messages\n", 814 | " )\n", 815 | " print_light_blue(f\"\\t{completion.usage}\")\n", 816 | "\n", 817 | " return completion\n", 818 | "\n", 819 | "def chatWithSpacePassage(prompt, context):\n", 820 | " messages = [\n", 821 | " {\"role\": \"system\", \"content\": gen_system_prompt(context)},\n", 822 | " {\"role\": \"user\", \"content\": prompt}\n", 823 | " ]\n", 824 | " print_light_blue(\"Prompt:\")\n", 825 | " print_light_blue(wrap_text(messages[1][\"content\"],70))\n", 826 | " completion = chatCompletion(messages)\n", 827 | "\n", 828 | " response_text = completion.choices[0].message.content\n", 829 | "\n", 830 | " return wrap_text(response_text,70)\n", 831 | "\n", 832 | "\n", 833 | "ai_response = chatWithSpacePassage(question, the_full_text)\n", 834 | "\n", 835 | "print(ai_response)\n" 836 | ] 837 | }, 838 | { 839 | "cell_type": "markdown", 840 | "metadata": { 841 | "id": "nI6AQ0ACSZ9n" 842 | }, 843 | "source": [ 844 | "## Step 11: Reducing LLM inference costs by 91%\n", 845 | "\n", 846 | "We'll deep dive into how to use Elasticearch to speed up the vector search and other kinds of Search Powered AI in the next part of the workshop.\n", 847 | "\n" 848 | ] 849 | }, 850 | { 851 | "cell_type": "code", 852 | "execution_count": null, 853 | "metadata": { 854 | "id": "RD6Znhd7Gfiu" 855 | }, 856 | "outputs": [], 857 | "source": [ 858 | "# the_full_text = wikipedia_spacecraft[0][\"text\"]\n", 859 | "\n", 860 | "text_splitter = RecursiveCharacterTextSplitter(\n", 861 | " chunk_size=1024,\n", 862 | " chunk_overlap=50,\n", 863 | " length_function=len\n", 864 | ")\n", 865 | "\n", 866 | "chunks = text_splitter.split_text(the_full_text)\n", 867 | "\n", 868 | "## Vectorizing can take time so I'm only processing the first few chunks\n", 869 | "sorted_chunks = chunks_by_distance(chunks[:5], question, e5_model)\n", 870 | "\n", 871 | "## top 3 chunk distances\n", 872 | "for passage, dist in sorted_chunks[:3]:\n", 873 | " print(f\"{passage[:40]} - Cosine distance {dist:.12f}\")\n", 874 | "print(\"\")\n", 875 | "\n", 876 | "top_passage = sorted_chunks[0][0]\n", 877 | "print(wrap_text(top_passage, 70))\n", 878 | "print(\"\")\n", 879 | "\n", 880 | "ai_response = chatWithSpacePassage(question, top_passage)\n", 881 | "print(ai_response)" 882 | ] 883 | }, 884 | { 885 | "cell_type": "markdown", 886 | "metadata": {}, 887 | "source": [ 888 | "🛑 Stop Here 🛑\n", 889 | "\n", 890 | "This Ends Lab 3-1\n", 891 | "
" 892 | ] 893 | } 894 | ], 895 | "metadata": { 896 | "colab": { 897 | "provenance": [] 898 | }, 899 | "kernelspec": { 900 | "display_name": "Python 3", 901 | "name": "python3" 902 | }, 903 | "language_info": { 904 | "codemirror_mode": { 905 | "name": "ipython", 906 | "version": 3 907 | }, 908 | "file_extension": ".py", 909 | "mimetype": "text/x-python", 910 | "name": "python", 911 | "nbconvert_exporter": "python", 912 | "pygments_lexer": "ipython3", 913 | "version": "3.10.13" 914 | } 915 | }, 916 | "nbformat": 4, 917 | "nbformat_minor": 0 918 | } 919 | -------------------------------------------------------------------------------- /notebooks/genai_colab_lab_4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "nFbQGw2POViM" 7 | }, 8 | "source": [ 9 | "# Lab 4 - RAG" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "iSVyZRkvmqyc" 16 | }, 17 | "source": [ 18 | "## Setup Environment\n", 19 | "The following code loads the environment variables, images for the RAG App, and libraries required to run this notebook.\n" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": { 26 | "id": "BwWfBNdUmqyd" 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "FILE=\"GenAI Lab 4\"\n", 31 | "\n", 32 | "# ! pip install -qqq git+https://github.com/elastic/notebook-workshop-loader.git@main\n", 33 | "from notebookworkshoploader import loader\n", 34 | "import os\n", 35 | "from dotenv import load_dotenv\n", 36 | "\n", 37 | "if os.path.isfile(\"../env\"):\n", 38 | " load_dotenv(\"../env\", override=True)\n", 39 | " print('Successfully loaded environment variables from local env file')\n", 40 | "else:\n", 41 | " loader.load_remote_env(file=FILE, env_url=\"https://notebook-workshop-api-voldmqr2bq-uc.a.run.app\")" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "id": "Ln-8SRvAI-jS" 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "# ! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9\n", 53 | "# ! pip install -qqq streamlit==1.30.0 elasticsearch==8.12.0 elastic-apm==6.20.0 inquirer==3.2.1 python-dotenv==1.0.0\n", 54 | "# ! pip install -qqq elasticsearch-llm-cache==0.9.5\n", 55 | "! echo \"github codespaces has pre-installed these libraries\"" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "id": "E0uQujqZclf0" 62 | }, 63 | "source": [ 64 | "## Labs\n" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "id": "IvZYvYkE62Df" 71 | }, 72 | "source": [ 73 | "### Lab 4.1 - Gathering Semantic documents from Elasticsearch\n", 74 | "This first exercise will allow us to see an example of returing semantically matching documents from Elasticsearch.\n", 75 | "\n", 76 | "It is not too important to understand all the Elasticsearch DSL syntax at this stage.\n", 77 | "\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": { 83 | "id": "DsCwwEc95qv8" 84 | }, 85 | "source": [ 86 | "#### Run the code block below to set up the query function\n", 87 | "---\n", 88 | "\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "id": "l7lu2VBg6vMN" 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "import os\n", 100 | "import openai\n", 101 | "from elasticsearch import Elasticsearch\n", 102 | "import time\n", 103 | "import json\n", 104 | "import textwrap\n", 105 | "\n", 106 | "\n", 107 | "index = os.environ['ELASTIC_INDEX_DOCS_W']\n", 108 | "\n", 109 | "# Create Elasticsearch Connection\n", 110 | "es = Elasticsearch(\n", 111 | " cloud_id=os.environ['ELASTIC_CLOUD_ID_W'],\n", 112 | " api_key=(os.environ['ELASTIC_APIKEY_ID_W']),\n", 113 | " request_timeout=30\n", 114 | " )\n", 115 | "\n", 116 | "\n", 117 | "# Search Function\n", 118 | "def es_hybrid_search(question):\n", 119 | " query = {\n", 120 | " \"nested\": {\n", 121 | " \"path\": \"passages\",\n", 122 | " \"query\": {\n", 123 | " \"bool\": {\n", 124 | " \"must\": [\n", 125 | " {\n", 126 | " \"match\": {\n", 127 | " \"passages.text\": question\n", 128 | " }\n", 129 | " }\n", 130 | " ]\n", 131 | " }\n", 132 | " }\n", 133 | " }\n", 134 | " }\n", 135 | "\n", 136 | " knn = {\n", 137 | " \"inner_hits\": {\n", 138 | " \"_source\": False,\n", 139 | " \"fields\": [\n", 140 | " \"passages.text\"\n", 141 | " ]\n", 142 | " },\n", 143 | " \"field\": \"passages.embeddings\",\n", 144 | " \"k\": 5,\n", 145 | " \"num_candidates\": 100,\n", 146 | " \"query_vector_builder\": {\n", 147 | " \"text_embedding\": {\n", 148 | " \"model_id\": \"sentence-transformers__all-distilroberta-v1\",\n", 149 | " \"model_text\": question\n", 150 | " }\n", 151 | " }\n", 152 | " }\n", 153 | "\n", 154 | " rank = {\n", 155 | " \"rrf\": {}\n", 156 | " }\n", 157 | "\n", 158 | " fields = [\n", 159 | " \"title\",\n", 160 | " \"text\"\n", 161 | " ]\n", 162 | "\n", 163 | " size = 5\n", 164 | "\n", 165 | " resp = es.search(index=index,\n", 166 | " #query=query,\n", 167 | " knn=knn,\n", 168 | " fields=fields,\n", 169 | " size=size,\n", 170 | " #rank=rank,\n", 171 | " source=False\n", 172 | " )\n", 173 | "\n", 174 | " title_text = []\n", 175 | " for doc in resp['hits']['hits']:\n", 176 | " title_text.append( { 'title' : doc['fields']['title'][0],\n", 177 | " 'passage' : doc['inner_hits']['passages']['hits']['hits'][0]['fields']['passages'][0]['text'][0] }\n", 178 | " )\n", 179 | "\n", 180 | " return title_text" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": { 186 | "id": "eKBumt6W68wE" 187 | }, 188 | "source": [ 189 | "#### Example Semantic Search With Elastic" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "id": "h4hlknOP-Tba" 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "user_question = \"Who is Batman?\"" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "id": "qpHyxzev4WZm" 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "es_augment_docs = es_hybrid_search(user_question)\n", 212 | "\n", 213 | "print('Wikipedia titles returned:\\n')\n", 214 | "for hit, wiki in enumerate(es_augment_docs):\n", 215 | " print(f\"{hit} - {wiki['title'] }\" )" 216 | ] 217 | }, 218 | { 219 | "cell_type": "markdown", 220 | "metadata": { 221 | "id": "dPVcfU_26rGI" 222 | }, 223 | "source": [ 224 | "### Lab 4.2 - Sending Elasticsearch docs with a prompt for a RAG response" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": { 230 | "id": "UZRE3N0q61L3" 231 | }, 232 | "source": [ 233 | "#### Run the code below to set up the LLM Connection" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "id": "aWeL5ANw65ND" 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "import openai\n", 245 | "from openai import OpenAI\n", 246 | "import textwrap\n", 247 | "\n", 248 | "\n", 249 | "# Configure OpenAI client\n", 250 | "openai.api_key = os.environ['OPENAI_API_KEY']\n", 251 | "openai.api_base = os.environ['OPENAI_API_BASE']\n", 252 | "openai.default_model = os.environ['OPENAI_API_ENGINE']\n", 253 | "openai.verify_ssl_certs = False\n", 254 | "client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)\n", 255 | "\n", 256 | "if os.environ['ELASTIC_PROXY'] != \"True\":\n", 257 | " openai.api_type = os.environ['OPENAI_API_TYPE']\n", 258 | " openai.api_version = os.environ['OPENAI_API_VERSION']\n", 259 | "\n", 260 | "\n", 261 | "# Text wrapper for colab readibility\n", 262 | "def wrap_text(text):\n", 263 | " wrapped_text = textwrap.wrap(text, 70)\n", 264 | " return '\\n'.join(wrapped_text)\n", 265 | "\n", 266 | "\n", 267 | "# Function to connect with LLM\n", 268 | "def chat_gpt(client, question, passages):\n", 269 | "\n", 270 | " system_prompt=\"You are a helpful assistant who answers questions from provided Wikipedia articles.\"\n", 271 | " user_prompt = f'''Answer the followng question: {question}\n", 272 | " using only the wikipedia `passages` provided.\n", 273 | " If the answer is not provided in the `passages` respond ONLY with:\n", 274 | " \"I am unable to answer the user's question from the provided passage\" and nothing else.\n", 275 | "\n", 276 | " passages: {passages}\n", 277 | "\n", 278 | " AI response:\n", 279 | " '''\n", 280 | "\n", 281 | " # Prepare the messages for the ChatGPT API\n", 282 | " messages = [{\"role\": \"system\", \"content\": system_prompt},\n", 283 | " {\"role\": \"user\", \"content\": user_prompt}]\n", 284 | "\n", 285 | " response = client.chat.completions.create(model=openai.default_model,\n", 286 | " temperature=0.2,\n", 287 | " messages=messages,\n", 288 | " )\n", 289 | " return response\n", 290 | "# return response.choices[0].message.content" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "metadata": { 296 | "id": "pQ4ZijSv65tQ" 297 | }, 298 | "source": [ 299 | "#### Pass the full prompt and wiki passages to LLM" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": { 306 | "id": "MR-XrChD6-E0" 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "ai = chat_gpt(client, user_question, es_augment_docs)\n", 311 | "print(f\"User Question: \\n{user_question}\\n\")\n", 312 | "print(\"AI response:\")\n", 313 | "print(wrap_text(ai.choices[0].message.content))" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": { 319 | "id": "t7RmurdZNPg-" 320 | }, 321 | "source": [ 322 | "### Lab 4.3 - Full RAG Application with UI\n" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "## Main Script\n", 330 | "We've placed the sample code in the streamlit folder of this repository\n", 331 | "\n", 332 | "Take a look at the code [streamlit/app.py](../streamlit/app.py)" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": { 338 | "id": "Wu0KfS0ESf6e" 339 | }, 340 | "source": [ 341 | "## Streamlit\n", 342 | "To start the Streamlit app you need to use the ```streamlit run``` command from the folder. You can do this either from this notebook or the Visual Studio Code terminal provided in Github Codespaces" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": { 349 | "id": "cHIHFID3NBXa" 350 | }, 351 | "outputs": [], 352 | "source": [ 353 | "! cd ../streamlit; streamlit run app.py " 354 | ] 355 | } 356 | ], 357 | "metadata": { 358 | "colab": { 359 | "provenance": [] 360 | }, 361 | "kernelspec": { 362 | "display_name": "Python 3", 363 | "name": "python3" 364 | }, 365 | "language_info": { 366 | "codemirror_mode": { 367 | "name": "ipython", 368 | "version": 3 369 | }, 370 | "file_extension": ".py", 371 | "mimetype": "text/x-python", 372 | "name": "python", 373 | "nbconvert_exporter": "python", 374 | "pygments_lexer": "ipython3", 375 | "version": "3.10.13" 376 | } 377 | }, 378 | "nbformat": 4, 379 | "nbformat_minor": 0 380 | } 381 | -------------------------------------------------------------------------------- /streamlit/.streamlit/config.toml: -------------------------------------------------------------------------------- 1 | [theme] 2 | base = "dark" -------------------------------------------------------------------------------- /streamlit/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import streamlit as st 3 | import openai 4 | import tiktoken 5 | import secrets 6 | from openai import OpenAI 7 | from elasticsearch import Elasticsearch 8 | import elasticapm 9 | import base64 10 | from elasticsearch_llm_cache.elasticsearch_llm_cache import ElasticsearchLLMCache 11 | import time 12 | import json 13 | import textwrap 14 | 15 | ###################################### 16 | # Streamlit Configuration 17 | st.set_page_config(layout="wide") 18 | 19 | 20 | # wrap text when printing, because colab scrolls output to the right too much 21 | def wrap_text(text, width): 22 | wrapped_text = textwrap.wrap(text, width) 23 | return '\n'.join(wrapped_text) 24 | 25 | 26 | @st.cache_data() 27 | def get_base64(bin_file): 28 | with open(bin_file, 'rb') as f: 29 | data = f.read() 30 | return base64.b64encode(data).decode() 31 | 32 | 33 | def set_background(png_file): 34 | bin_str = get_base64(png_file) 35 | page_bg_img = ''' 36 | 42 | ''' % bin_str 43 | st.markdown(page_bg_img, unsafe_allow_html=True) 44 | return 45 | 46 | 47 | set_background('images/background-dark2.jpeg') 48 | 49 | 50 | ###################################### 51 | 52 | ###################################### 53 | # Sidebar Options 54 | def sidebar_bg(side_bg): 55 | side_bg_ext = 'png' 56 | st.markdown( 57 | f""" 58 | 63 | """, 64 | unsafe_allow_html=True, 65 | ) 66 | 67 | 68 | side_bg = './images/sidebar2-dark.png' 69 | sidebar_bg(side_bg) 70 | 71 | # sidebar logo 72 | st.markdown( 73 | """ 74 | 83 | """, unsafe_allow_html=True 84 | ) 85 | 86 | with st.sidebar: 87 | st.image("images/elastic_logo_transp_100.png") 88 | 89 | ###################################### 90 | # expander markdown 91 | st.markdown( 92 | ''' 93 | 103 | ''', 104 | unsafe_allow_html=True 105 | ) 106 | 107 | ###################################### 108 | 109 | @st.cache_resource 110 | def initOpenAI(): 111 | #if using the Elastic AI proxy, then generate the correct API key 112 | if os.environ['ELASTIC_PROXY'] == "True": 113 | #generate and share "your" unique hash 114 | os.environ['USER_HASH'] = secrets.token_hex(nbytes=6) 115 | print(f"Your unique user hash is: {os.environ['USER_HASH']}") 116 | #get the current API key and combine with your hash 117 | os.environ['OPENAI_API_KEY'] = f"{os.environ['OPENAI_API_KEY']} {os.environ['USER_HASH']}" 118 | else: 119 | openai.api_type = os.environ['OPENAI_API_TYPE'] 120 | openai.api_version = os.environ['OPENAI_API_VERSION'] 121 | 122 | # Configure OpenAI client 123 | openai.api_key = os.environ['OPENAI_API_KEY'] 124 | openai.api_base = os.environ['OPENAI_API_BASE'] 125 | openai.default_model = os.environ['OPENAI_API_ENGINE'] 126 | openai.verify_ssl_certs = False 127 | client = OpenAI(api_key=openai.api_key, base_url=openai.api_base) 128 | return client 129 | 130 | openAIClient = initOpenAI() 131 | 132 | # Initialize Elasticsearch and APM clients 133 | # Configure APM and Elasticsearch clients 134 | @st.cache_resource 135 | def initElastic(): 136 | os.environ['ELASTIC_APM_SERVICE_NAME'] = "genai_workshop_v2_lab_2-2" 137 | apmclient = elasticapm.Client() 138 | elasticapm.instrument() 139 | 140 | if 'ELASTIC_CLOUD_ID_W' in os.environ: 141 | es = Elasticsearch( 142 | cloud_id=os.environ['ELASTIC_CLOUD_ID_W'], 143 | api_key=(os.environ['ELASTIC_APIKEY_ID_W']), 144 | request_timeout=30 145 | ) 146 | else: 147 | es = Elasticsearch( 148 | os.environ['ELASTIC_URL'], 149 | basic_auth=(os.environ['ELASTIC_USER'], os.environ['ELASTIC_PASSWORD']), 150 | request_timeout=30 151 | ) 152 | 153 | return apmclient, es 154 | 155 | 156 | apmclient, es = initElastic() 157 | 158 | # Set our data index 159 | index = os.environ['ELASTIC_INDEX_DOCS_W'] 160 | 161 | ############################################################### 162 | # Similarity Cache functions 163 | # move to env if time 164 | cache_index = "wikipedia-cache" 165 | 166 | 167 | def clear_es_cache(es): 168 | print('clearing cache') 169 | match_all_query = {"query": {"match_all": {}}} 170 | clear_response = es.delete_by_query(index=cache_index, body=match_all_query) 171 | return clear_response 172 | 173 | 174 | @elasticapm.capture_span("cache_search") 175 | def cache_query(cache, prompt_text, similarity_threshold=0.5): 176 | hit = cache.query(prompt_text=prompt_text, similarity_threshold=similarity_threshold) 177 | 178 | if hit: 179 | st.sidebar.markdown('`Cache Match Found`') 180 | else: 181 | st.sidebar.markdown('`Cache Miss`') 182 | 183 | return hit 184 | 185 | 186 | @elasticapm.capture_span("add_to_cache") 187 | def add_to_cache(cache, prompt, response): 188 | st.sidebar.markdown('`Adding response to cache`') 189 | print('adding to cache') 190 | print(prompt) 191 | print(response) 192 | resp = cache.add(prompt=prompt, response=response) 193 | st.markdown(resp) 194 | return resp 195 | 196 | 197 | def init_elastic_cache(): 198 | # Init Elasticsearch Cache 199 | # Only want to attempt to create the index on first run 200 | cache = ElasticsearchLLMCache(es_client=es, 201 | index_name=cache_index, 202 | create_index=False # setting only because of Streamlit behavior 203 | ) 204 | st.sidebar.markdown('`creating Elasticsearch Cache`') 205 | 206 | if "index_created" not in st.session_state: 207 | 208 | st.sidebar.markdown('`running create_index`') 209 | cache.create_index(768) 210 | 211 | # Set the flag so it doesn't run every time 212 | st.session_state.index_created = True 213 | else: 214 | st.sidebar.markdown('`index already created, skipping`') 215 | 216 | return cache 217 | 218 | 219 | def calc_similarity(score, func_type='dot_product'): 220 | if func_type == 'dot_product': 221 | return (score + 1) / 2 222 | elif func_type == 'cosine': 223 | return (1 + score) / 2 224 | elif func_type == 'l2_norm': 225 | return 1 / (1 + score ^ 2) 226 | else: 227 | return score 228 | 229 | 230 | ############################################################### 231 | 232 | 233 | def get_bm25_query(query_text, augment_method): 234 | if augment_method == "Full Text": 235 | return { 236 | "match": { 237 | "text": query_text 238 | } 239 | } 240 | elif augment_method == "Matching Chunk": 241 | return { 242 | "nested": { 243 | "path": "passages", 244 | "query": { 245 | "bool": { 246 | "must": [ 247 | { 248 | "match": { 249 | "passages.text": query_text 250 | } 251 | } 252 | ] 253 | } 254 | }, 255 | "inner_hits": { 256 | "_source": False, 257 | "fields": [ 258 | "passages.text" 259 | ] 260 | } 261 | 262 | } 263 | } 264 | 265 | 266 | # Run an Elasticsearch query using BM25 relevance scoring 267 | @elasticapm.capture_span("bm25_search") 268 | def search_bm25(query_text, 269 | es, 270 | size=1, 271 | augment_method="Full Text", 272 | use_hybrid=False # always false - use semantic opt for hybrid 273 | ): 274 | fields = [ 275 | "text", 276 | "title", 277 | ] 278 | 279 | resp = es.search(index=index, 280 | query=get_bm25_query(query_text, augment_method), 281 | fields=fields, 282 | size=size, 283 | source=False) 284 | # print(resp) 285 | body = resp 286 | url = 'nothing' 287 | 288 | return body, url 289 | 290 | 291 | @elasticapm.capture_span("knn_search") 292 | def search_knn(query_text, 293 | es, 294 | size=1, 295 | augment_method="Full Text", 296 | use_hybrid=False 297 | ): 298 | fields = [ 299 | "title", 300 | "text" 301 | ] 302 | 303 | knn = { 304 | "inner_hits": { 305 | "_source": False, 306 | "fields": [ 307 | "passages.text" 308 | ] 309 | }, 310 | "field": "passages.embeddings", 311 | "k": size, 312 | "num_candidates": 100, 313 | "query_vector_builder": { 314 | "text_embedding": { 315 | "model_id": "sentence-transformers__all-distilroberta-v1", 316 | "model_text": query_text 317 | } 318 | } 319 | } 320 | 321 | rank = {"rrf": {}} if use_hybrid else None 322 | 323 | # need to get the bm25 query if we are using hybrid 324 | if use_hybrid: 325 | print('using hybrid with augment method %s' % augment_method) 326 | query = get_bm25_query(query_text, augment_method) 327 | print(query) 328 | if augment_method == "Matching Chunk": 329 | del query['nested']['inner_hits'] 330 | else: 331 | print('not using hybrid') 332 | query = None 333 | 334 | print(query) 335 | print(knn) 336 | 337 | resp = es.search(index=index, 338 | knn=knn, 339 | query=query, 340 | fields=fields, 341 | size=size, 342 | rank=rank, 343 | source=False) 344 | 345 | return resp, None 346 | 347 | 348 | def truncate_text(text, max_tokens): 349 | tokens = text.split() 350 | if len(tokens) <= max_tokens: 351 | return text 352 | 353 | return ' '.join(tokens[:max_tokens]) 354 | 355 | 356 | def build_text_obj(resp, aug_method): 357 | 358 | tobj = {} 359 | 360 | for hit in resp['hits']['hits']: 361 | # tobj[hit['fields']['title'][0]] = [] 362 | title = hit['fields']['title'][0] 363 | tobj.setdefault(title, []) 364 | 365 | if aug_method == "Matching Chunk": 366 | print('hit') 367 | print(hit) 368 | # tobj['passages'] = [] 369 | for ihit in hit['inner_hits']['passages']['hits']['hits']: 370 | tobj[title].append( 371 | {'passage': ihit['fields']['passages'][0]['text'][0], 372 | '_score': ihit['_score']} 373 | ) 374 | elif aug_method == "Full Text": 375 | tobj[title].append( 376 | hit['fields'] 377 | ) 378 | 379 | return tobj 380 | 381 | 382 | def generate_response(query, 383 | es, 384 | search_method, 385 | custom_prompt, 386 | negative_response, 387 | show_prompt, size=1, 388 | augment_method="Full Text", 389 | use_hybrid=False, 390 | show_es_response=True, 391 | show_es_augment=True, 392 | ): 393 | 394 | # Perform the search based on the specified method 395 | search_functions = { 396 | 'bm25': {'method': search_bm25, 'display': 'Lexical Search'}, 397 | 'knn': {'method': search_knn, 'display': 'Semantic Search'} 398 | } 399 | search_func = search_functions.get(search_method)['method'] 400 | if not search_func: 401 | raise ValueError(f"Invalid search method: {search_method}") 402 | 403 | # Perform the search and format the docs 404 | response, url = search_func(query, es, size, augment_method, use_hybrid) 405 | es_time = time.time() 406 | augment_text = build_text_obj(response, augment_method) 407 | 408 | res_col1, res_col2 = st.columns(2) 409 | # Display the search results from ES 410 | with res_col2: 411 | st.header(':rainbow[Elasticsearch Response]') 412 | st.subheader(':orange[Search Settings]') 413 | st.write(':gray[Search Method:] :blue[%s]' % search_functions.get(search_method)['display']) 414 | st.write(':gray[Size Setting:] :blue[%s]' % size) 415 | st.write(':gray[Augment Setting:] :blue[%s]' % augment_method) 416 | st.write(':gray[Using Hybrid:] :blue[%s]' % ( 417 | 'Not Applicable with Lexical' if search_method == 'bm25' else use_hybrid)) 418 | 419 | st.subheader(':green[Augment Chunk(s) from Elasticsearch]') 420 | if show_es_augment: 421 | st.json(dict(augment_text)) 422 | else: 423 | st.write(':blue[Show Augment Disabled]') 424 | 425 | st.subheader(':violet[Elasticsearch Response]') 426 | if show_es_response: 427 | st.json(dict(response)) 428 | else: 429 | st.write(':blue[Response Received]') 430 | 431 | formatted_prompt = custom_prompt.replace("$query", query).replace("$response", str(augment_text)).replace( 432 | "$negResponse", negative_response) 433 | 434 | with res_col1: 435 | st.header(':orange[GenAI Response]') 436 | 437 | chat_response = chat_gpt(formatted_prompt, system_prompt="You are a helpful assistant.") 438 | 439 | # Display assistant response in chat message container 440 | with st.chat_message("assistant"): 441 | message_placeholder = st.empty() 442 | full_response = "" 443 | for chunk in chat_response.split(): 444 | full_response += chunk + " " 445 | time.sleep(0.02) 446 | # Add a blinking cursor to simulate typing 447 | message_placeholder.markdown(full_response + "▌") 448 | message_placeholder.markdown(full_response) 449 | 450 | # Display results 451 | if show_prompt: 452 | st.text("Full prompt sent to ChatGPT:") 453 | st.text(wrap_text(formatted_prompt, 70)) 454 | 455 | if negative_response not in chat_response: 456 | pass 457 | else: 458 | chat_response = None 459 | 460 | return es_time, chat_response 461 | 462 | def count_tokens(messages, model="gpt-35-turbo"): 463 | if "gpt-3.5-turbo" in model or "gpt-35-turbo" in model: 464 | model = "gpt-3.5-turbo-0613" 465 | elif "gpt-4" in model: 466 | model="gpt-4-0613" 467 | 468 | try: 469 | encoding = tiktoken.encoding_for_model(model) 470 | except KeyError: 471 | print("Warning: model not found. Using gpt-3.5-turbo-0613 encoding.") 472 | encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-0613") 473 | 474 | if isinstance(messages, str): 475 | return len(encoding.encode(messages)) 476 | else: 477 | tokens_per_message = 3 478 | tokens_per_name = 1 479 | 480 | num_tokens = 0 481 | for message in messages: 482 | num_tokens += tokens_per_message 483 | for key, value in message.items(): 484 | num_tokens += len(encoding.encode(value)) 485 | if key == "name": 486 | num_tokens += tokens_per_name 487 | num_tokens += 3 # every reply is primed with <|start|>assistant<|message|> 488 | return num_tokens 489 | 490 | def chat_gpt(user_prompt, system_prompt): 491 | """ 492 | Generates a response from ChatGPT based on the given user and system prompts. 493 | """ 494 | max_tokens = 1024 495 | max_context_tokens = 4000 496 | safety_margin = 5 497 | 498 | # Truncate the prompt content to fit within the model's context length 499 | truncated_prompt = truncate_text(user_prompt, max_context_tokens - max_tokens - safety_margin) 500 | 501 | # Prepare the messages for the ChatGPT API 502 | messages = [{"role": "system", "content": system_prompt}, 503 | {"role": "user", "content": truncated_prompt}] 504 | 505 | full_response = "" 506 | for response in openAIClient.chat.completions.create( 507 | model=openai.default_model, 508 | temperature=0, 509 | messages=messages, 510 | stream=True 511 | ): 512 | full_response += (response.choices[0].delta.content or "") 513 | 514 | # APM: add metadata labels of data we want to capture 515 | elasticapm.label(model = openai.default_model) 516 | elasticapm.label(prompt = user_prompt) 517 | elasticapm.label(prompt_tokens = count_tokens(messages, model=openai.default_model)) 518 | elasticapm.label(response_tokens = count_tokens(full_response, model=openai.default_model)) 519 | elasticapm.label(total_tokens = count_tokens(messages, model=openai.default_model) + count_tokens(full_response, model=openai.default_model)) 520 | if 'USER_HASH' in os.environ: elasticapm.label(user = os.environ['USER_HASH']) 521 | 522 | return full_response 523 | 524 | 525 | # Main chat form 526 | st.title("Wikipedia RAG Demo Platform") 527 | 528 | # Define the default prompt and negative response 529 | default_prompt_intro = "Answer this question:" 530 | default_response_instructions = ("using only the information from the wikipedia documents included and nothing " 531 | "else.\nwikipedia_docs: $response\n") 532 | default_negative_response = ("If the answer is not provided in the included documentation. You are to ONLY reply with " 533 | "'I'm unable to answer the question based on the information I have from wikipedia' and " 534 | "nothing else.") 535 | 536 | with st.form("chat_form"): 537 | query = st.text_input("Ask the Elastic Documentation a question:", 538 | placeholder='Who is Batman?') 539 | 540 | opt_col1, opt_col2 = st.columns(2) 541 | with opt_col1: 542 | with st.expander("Customize Prompt Template"): 543 | prompt_intro = st.text_area("Introduction/context of the prompt:", value=default_prompt_intro) 544 | prompt_query_placeholder = st.text_area("Placeholder for the user's query:", value="$query") 545 | prompt_response_placeholder = st.text_area("Placeholder for the Elasticsearch response:", 546 | value=default_response_instructions) 547 | prompt_negative_response = st.text_area("Negative response placeholder:", value=default_negative_response) 548 | prompt_closing = st.text_area("Closing remarks of the prompt:", 549 | value="Format the answer in complete markdown code format.") 550 | 551 | combined_prompt = f"{prompt_intro}\n{prompt_query_placeholder}\n{prompt_response_placeholder}\n{prompt_negative_response}\n{prompt_closing}" 552 | st.text_area("Preview of your custom prompt:", value=combined_prompt, disabled=True) 553 | 554 | with opt_col2: 555 | with st.expander("Retrieval Search and Display Options"): 556 | st.subheader("Retrieval Options") 557 | ret_1, ret_2 = st.columns(2) 558 | with ret_1: 559 | search_method = st.radio("Search Method", ("Semantic Search", "Lexical Search")) 560 | augment_method = st.radio("Augment Method", ("Full Text", "Matching Chunk")) 561 | with ret_2: 562 | # TODO this should update the title based on the augment_method 563 | doc_count_title = "Number of docs or chunks to Augment with" if augment_method == "Full Text" else "Number of Matching Chunks to Retrieve" 564 | doc_count = st.slider(doc_count_title, min_value=1, max_value=5, value=1) 565 | 566 | use_hybrid = st.checkbox('Use Hybrid Search') 567 | 568 | st.divider() 569 | 570 | st.subheader("Display Options") 571 | show_es_augment = st.checkbox('Show Elasticsearch Augment Text', value=True) 572 | show_es_response = st.checkbox('Show Elasticsearch Response', value=True) 573 | show_full_prompt = st.checkbox('Show Full Prompt Sent to LLM') 574 | 575 | st.divider() 576 | 577 | st.subheader("Caching Options") 578 | cache_1, cache_2 = st.columns(2) 579 | with cache_1: 580 | use_cache = st.checkbox('Use Similarity Cache') 581 | # Slider for adjusting similarity threshold 582 | similarity_threshold_selection = st.slider( 583 | "Select Similarity Threshold (dot_product - Higher Similarity means closer)", 584 | min_value=0.0, max_value=2.0, 585 | value=0.5, step=0.01) 586 | 587 | with cache_2: 588 | clear_cache_butt = st.form_submit_button(':red[Clear Similarity Cache]') 589 | 590 | col1, col2 = st.columns(2) 591 | with col1: 592 | answer_button = st.form_submit_button("Find my answer!") 593 | 594 | # Clear Cache Button 595 | if clear_cache_butt: 596 | st.session_state.clear_cache_clicked = True 597 | 598 | # Confirmation step 599 | if st.session_state.get("clear_cache_clicked", False): 600 | apmclient.begin_transaction("clear_cache") 601 | elasticapm.label(action="clear_cache") 602 | 603 | # Start timing 604 | start_time = time.time() 605 | 606 | if st.button(":red[Confirm Clear Cache]"): 607 | print('clear cache clicked') 608 | # TODO if index doesn't exist, catch exception then create it 609 | response = clear_es_cache(es) 610 | st.success("Cache cleared successfully!", icon="🤯") 611 | st.session_state.clear_cache_clicked = False # Reset the state 612 | 613 | apmclient.end_transaction("clear_cache", "success") 614 | 615 | if answer_button: 616 | search_method = "knn" if search_method == "Semantic Search" else "bm25" 617 | 618 | apmclient.begin_transaction("query") 619 | elasticapm.label(search_method=search_method) 620 | elasticapm.label(query=query) 621 | 622 | # Start timing 623 | start_time = time.time() 624 | 625 | if use_cache: 626 | cache = init_elastic_cache() 627 | 628 | # check the llm cache first 629 | st.sidebar.markdown('`Checking ES Cache`') 630 | cache_check = cache_query(cache, 631 | prompt_text=query, 632 | similarity_threshold=similarity_threshold_selection 633 | ) 634 | # st.markdown(cache_check) 635 | else: 636 | cache_check = None 637 | st.sidebar.markdown('`Skipping ES Cache`') 638 | 639 | try: 640 | 641 | if cache_check: 642 | es_time = time.time() 643 | st.sidebar.markdown('`cache match, using cached results`') 644 | st.subheader('Response from Cache') 645 | s_score = calc_similarity(cache_check['_score'], func_type='dot_product') 646 | st.code(f"Similarity Value: {s_score:.5f}") 647 | 648 | # Display response from LLM 649 | st.header('LLM Response') 650 | # st.markdown(cache_check['response'][0]) 651 | with st.chat_message("assistant"): 652 | message_placeholder = st.empty() 653 | full_response = "" 654 | for chunk in cache_check['response'][0].split(): 655 | full_response += chunk + " " 656 | time.sleep(0.02) 657 | # Add a blinking cursor to simulate typing 658 | message_placeholder.markdown(full_response + "▌") 659 | message_placeholder.markdown(full_response) 660 | 661 | llmAnswer = None # no need to recache the answer 662 | 663 | else: 664 | # Use combined_prompt and show_full_prompt as arguments 665 | es_time, llmAnswer = generate_response(query, 666 | es, 667 | search_method, 668 | combined_prompt, 669 | prompt_negative_response, 670 | show_full_prompt, 671 | doc_count, 672 | augment_method, 673 | use_hybrid, 674 | show_es_response, 675 | show_es_augment, 676 | ) 677 | apmclient.end_transaction("query", "success") 678 | 679 | if use_cache and llmAnswer: 680 | if "I'm unable to answer the question" in llmAnswer: 681 | st.sidebar.markdown('`unable to answer, not adding to cache`') 682 | else: 683 | st.sidebar.markdown('`adding prompt and response to cache`') 684 | add_to_cache(cache, query, llmAnswer) 685 | 686 | # End timing and print the elapsed time 687 | elapsed_time = time.time() - start_time 688 | es_elapsed_time = es_time - start_time 689 | 690 | ct1, ct2 = st.columns(2) 691 | with ct1: 692 | st.subheader("GenAI Time taken: :red[%.2f seconds]" % elapsed_time) 693 | 694 | with ct2: 695 | st.subheader("ES Query Time taken: :green[%.2f seconds]" % es_elapsed_time) 696 | 697 | except Exception as e: 698 | st.error(f"An error occurred: {str(e)}") 699 | apmclient.end_transaction("query", "failure") 700 | -------------------------------------------------------------------------------- /streamlit/images/background-dark2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/genai-workshop-codespaces/68ee6494b597caa097782a9205dac9545e06b289/streamlit/images/background-dark2.jpeg -------------------------------------------------------------------------------- /streamlit/images/elastic_logo_transp_100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/genai-workshop-codespaces/68ee6494b597caa097782a9205dac9545e06b289/streamlit/images/elastic_logo_transp_100.png -------------------------------------------------------------------------------- /streamlit/images/sidebar2-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elastic/genai-workshop-codespaces/68ee6494b597caa097782a9205dac9545e06b289/streamlit/images/sidebar2-dark.png --------------------------------------------------------------------------------