├── .github └── workflows │ ├── build-documentation.yaml │ ├── check.yaml │ ├── deploy-documentation.yaml │ ├── publish.yaml │ └── test.yaml ├── .gitignore ├── LICENSE ├── README.md ├── docs ├── clients │ ├── bedrock.md │ ├── huggingface.md │ ├── index.md │ └── sagemaker.md ├── examples │ ├── .gitkeep │ └── index.md ├── index.md ├── installation.md └── prompt_utils.md ├── easyllm ├── __init__.py ├── cli.py ├── clients │ ├── __init__.py │ ├── bedrock.py │ ├── huggingface.py │ └── sagemaker.py ├── data │ ├── __init__.py │ ├── extractor │ │ ├── __init__.py │ │ └── html_extractor.py │ └── filters │ │ ├── __init__.py │ │ ├── bulletpoint_ratio.py │ │ ├── common_word.py │ │ ├── cookie_banner.py │ │ ├── digit_to_character.py │ │ ├── kenlm_ppl.py │ │ ├── length.py │ │ ├── longword.py │ │ ├── n_gram.py │ │ ├── non_alpha_numeric.py │ │ ├── parantheses_ration.py │ │ ├── punctuation.py │ │ ├── repeating.py │ │ ├── url_ratio.py │ │ ├── whitespace_ration.py │ │ └── words_to_symbol.py ├── evol_instruct │ └── __init__.py ├── prompt_utils │ ├── __init__.py │ ├── anthropic.py │ ├── base.py │ ├── chatml_hf.py │ ├── falcon.py │ ├── llama2.py │ ├── open_assistant.py │ ├── stablebeluga.py │ ├── vicuna.py │ └── wizardlm.py ├── schema │ ├── base.py │ └── openai.py └── utils │ ├── __init__.py │ ├── aws.py │ └── logging.py ├── makefile ├── mkdocs.yml ├── notebooks ├── bedrock-chat-completion-api.ipynb ├── bedrock-stream-chat-completions.ipynb ├── chat-completion-api.ipynb ├── data-filter.ipynb ├── datasets │ └── filter-dataset.ipynb ├── falcon-180b-chat.ipynb ├── get-embeddings.ipynb ├── inference-endpoints-example.ipynb ├── llama2-agent-example.ipynb ├── llama2-rag-example.ipynb ├── sagemaker-chat-completion-api.ipynb ├── sagemaker-get-embeddings.ipynb ├── sagemaker-text-completion-api.ipynb ├── stream-chat-completions.ipynb ├── stream-text-completions.ipynb └── text-completion-api.ipynb ├── pyproject.toml ├── scripts └── .gitkeep └── tests ├── __init__.py ├── prompt_utils ├── test_chatml_hf.py ├── test_llama2.py ├── test_open_assistant.py ├── test_stablebeluga.py ├── test_vicuna.py └── test_wizardlm.py ├── schema └── test_base.py └── test_main.py /.github/workflows/build-documentation.yaml: -------------------------------------------------------------------------------- 1 | name: build documentation 2 | 3 | on: 4 | push: 5 | pull_request: 6 | - main 7 | paths: 8 | - 'docs/**' 9 | - 'notebooks/**' 10 | workflow_dispatch: 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 14 | cancel-in-progress: true 15 | 16 | permissions: 17 | contents: write 18 | 19 | jobs: 20 | documentation: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Set up Python 25 | uses: actions/setup-python@v4 26 | with: 27 | python-version: "3.9" 28 | cache: "pip" 29 | cache-dependency-path: pyproject.toml 30 | - uses: actions/cache@v3 31 | id: cache 32 | with: 33 | path: ${{ env.pythonLocation }} 34 | key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-docs 35 | - name: Install dependencies 36 | if: steps.cache.outputs.cache-hit != 'true' 37 | run: pip install ".[docs]" 38 | - name: build documentation 39 | run: make docs-build 40 | -------------------------------------------------------------------------------- /.github/workflows/check.yaml: -------------------------------------------------------------------------------- 1 | name: Quality Check 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | paths: 9 | - 'easyllm/**' 10 | - 'tests/**' 11 | workflow_dispatch: 12 | 13 | concurrency: 14 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | quality: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: "3.9" 26 | cache: "pip" 27 | cache-dependency-path: pyproject.toml 28 | - uses: actions/cache@v3 29 | id: cache 30 | with: 31 | path: ${{ env.pythonLocation }} 32 | key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-check 33 | - name: Install build dependencies 34 | if: steps.cache.outputs.cache-hit != 'true' 35 | run: pip install ".[dev]" 36 | - name: Run library checks 37 | run: make check 38 | -------------------------------------------------------------------------------- /.github/workflows/deploy-documentation.yaml: -------------------------------------------------------------------------------- 1 | name: Publish documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - 'docs/**' 9 | - 'notebooks/**' 10 | release: 11 | types: 12 | - created 13 | workflow_dispatch: 14 | 15 | concurrency: 16 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 17 | cancel-in-progress: true 18 | 19 | permissions: 20 | contents: write 21 | 22 | jobs: 23 | documentation: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v3 27 | - name: Set up Python 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: "3.9" 31 | cache: "pip" 32 | cache-dependency-path: pyproject.toml 33 | - uses: actions/cache@v3 34 | id: cache 35 | with: 36 | path: ${{ env.pythonLocation }} 37 | key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-docs 38 | - name: Install dependencies 39 | if: steps.cache.outputs.cache-hit != 'true' 40 | run: pip install ".[docs]" 41 | - name: publish documentation 42 | run: make docs-deploy -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: 7 | - created 8 | 9 | jobs: 10 | publish: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - name: Set up Python 15 | uses: actions/setup-python@v4 16 | with: 17 | python-version: "3.9" 18 | cache: "pip" 19 | cache-dependency-path: pyproject.toml 20 | - uses: actions/cache@v3 21 | id: cache 22 | with: 23 | path: ${{ env.pythonLocation }} 24 | key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-publish 25 | - name: Install build dependencies 26 | if: steps.cache.outputs.cache-hit != 'true' 27 | run: pip install build 28 | - name: Build distribution 29 | run: python -m build 30 | - name: Publish 31 | uses: pypa/gh-action-pypi-publish@v1.6.4 32 | with: 33 | password: ${{ secrets.PYPI_API_TOKEN }} 34 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | name: Unit Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | paths: 9 | - 'easyllm/**' 10 | - 'tests/**' 11 | workflow_dispatch: 12 | 13 | concurrency: 14 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | quality: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: "3.9" 26 | cache: "pip" 27 | cache-dependency-path: pyproject.toml 28 | - uses: actions/cache@v3 29 | id: cache 30 | with: 31 | path: ${{ env.pythonLocation }} 32 | key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-test 33 | - name: Install build dependencies 34 | if: steps.cache.outputs.cache-hit != 'true' 35 | run: pip install ".[test]" 36 | - name: Run tests 37 | run: pytest 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .ruff_cache 131 | docs/examples/*.ipynb 132 | .vscode -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Philipp Schmid 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

EasyLLM -

2 | 3 |
4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 | 21 | 22 | **EasyLLM** is an open source project that provides **helpful tools and methods for working with large language models** (LLMs), both open source and closed source. Get immediataly started or check out the [documentation](https://philschmid.github.io/easyllm/). 23 | 24 | EasyLLM implements clients that are **compatible with OpenAI's Completion API**. This means you can easily replace `openai.ChatCompletion`, `openai.Completion`, `openai.Embedding` with, for example, `huggingface.ChatCompletion`, `huggingface.Completion` or `huggingface.Embedding` by changing one line of code. 25 | 26 | ### Supported Clients 27 | 28 | * `huggingface` - [HuggingFace](https://huggingface.co/) models 29 | * `huggingface.ChatCompletion` - Chat with LLMs 30 | * `huggingface.Completion` - Text completion with LLMs 31 | * `huggingface.Embedding` - Create embeddings with LLMs 32 | * `sagemaker` - Open LLMs deployed on Amazon SageMaker 33 | * `sagemaker.ChatCompletion` - Chat with LLMs 34 | * `sagemaker.Completion` - Text completion with LLMs 35 | * `sagemaker.Embedding` - Create embeddings with LLMs 36 | * `bedrock` - Amazon Bedrock LLMs 37 | 38 | 39 | Check out the [Examples](./examples) to get started. 40 | 41 | ## 🚀 Getting Started 42 | 43 | Install EasyLLM via pip: 44 | 45 | ```bash 46 | pip install easyllm 47 | ``` 48 | 49 | Then import and start using the clients: 50 | 51 | ```python 52 | 53 | from easyllm.clients import huggingface 54 | 55 | # helper to build llama2 prompt 56 | huggingface.prompt_builder = "llama2" 57 | 58 | response = huggingface.ChatCompletion.create( 59 | model="meta-llama/Llama-2-70b-chat-hf", 60 | messages=[ 61 | {"role": "system", "content": "\nYou are a helpful assistant speaking like a pirate. argh!"}, 62 | {"role": "user", "content": "What is the sun?"}, 63 | ], 64 | temperature=0.9, 65 | top_p=0.6, 66 | max_tokens=256, 67 | ) 68 | 69 | print(response) 70 | ``` 71 | the result will look like 72 | 73 | ```bash 74 | { 75 | "id": "hf-lVC2iTMkFJ", 76 | "object": "chat.completion", 77 | "created": 1690661144, 78 | "model": "meta-llama/Llama-2-70b-chat-hf", 79 | "choices": [ 80 | { 81 | "index": 0, 82 | "message": { 83 | "role": "assistant", 84 | "content": " Arrrr, the sun be a big ol' ball o' fire in the sky, me hearty! It be the source o' light and warmth for our fair planet, and it be a mighty powerful force, savvy? Without the sun, we'd be sailin' through the darkness, lost and cold, so let's give a hearty \"Yarrr!\" for the sun, me hearties! Arrrr!" 85 | }, 86 | "finish_reason": null 87 | } 88 | ], 89 | "usage": { 90 | "prompt_tokens": 111, 91 | "completion_tokens": 299, 92 | "total_tokens": 410 93 | } 94 | } 95 | ``` 96 | 97 | Check out other examples: 98 | * [Detailed ChatCompletion Example](notebooks/chat-completion-api.ipynb) 99 | * [Example how to stream chat requests](notebooks/stream-chat-completions.ipynb) 100 | * [Example how to stream text requests](notebooks/stream-text-completions.ipynb) 101 | * [Detailed Completion Example](notebooks/text-completion-api.ipynb) 102 | * [Create Embeddings](notebooks/get-embeddings) 103 | 104 | See the [documentation](https://philschmid.github.io/easyllm/) for more detailed usage and examples. 105 | 106 | ## 💪🏻 Migration from OpenAI to HuggingFace 107 | 108 | Migrating from OpenAI to HuggingFace is easy. Just change the import statement and the client you want to use and optionally the prompt builder. 109 | 110 | ```diff 111 | - import openai 112 | + from easyllm.clients import huggingface 113 | + huggingface.prompt_builder = "llama2" 114 | 115 | 116 | - response = openai.ChatCompletion.create( 117 | + response = huggingface.ChatCompletion.create( 118 | - model="gpt-3.5-turbo", 119 | + model="meta-llama/Llama-2-70b-chat-hf", 120 | messages=[ 121 | {"role": "system", "content": "You are a helpful assistant."}, 122 | {"role": "user", "content": "Knock knock."}, 123 | ], 124 | ) 125 | ``` 126 | 127 | Make sure when you switch your client that your hyperparameters are still valid. For example, `temperature` of GPT-3 might be different than `temperature` of `Llama-2`. 128 | 129 | ## ☑️ Key Features 130 | 131 | ### 🤝 Compatible Clients 132 | 133 | - Implementation of clients compatible with OpenAI API format of `openai.ChatCompletion`, `openai.Completion`, `openai.Embedding`. 134 | - Easily switch between different LLMs like `openai.ChatCompletion` and `huggingface.ChatCompletion` by changing one line of code. 135 | - Support for streaming of completions, checkout example [How to stream completions](./notebooks/stream-chat-completions.ipynb). 136 | 137 | ### ⚙️ Helper Modules ⚙️ 138 | 139 | - `evol_instruct` (work in progress) - Use evolutionary algorithms create instructions for LLMs. 140 | 141 | - `prompt_utils` - Helper methods to easily convert between prompt formats like OpenAI Messages to prompts for open source models like Llama 2. 142 | 143 | ## 🙏 Contributing 144 | 145 | EasyLLM is an open source project and welcomes contributions of all kinds. 146 | 147 | The project uses [hatch](https://hatch.pypa.io/latest/) for development. To get started, fork the repository and clone 148 | it to your local machine. 149 | 150 | 0. Confirm [hatch](https://hatch.pypa.io/latest/install/) is installed (pipx is great to make it available globally on your machine) 151 | 1. Once in the project directory, run `hatch env create` to create a default virtual environment for development. 152 | 2. Activate the virtual environment with `hatch shell` 153 | 3. Start developing! 🤩 154 | 155 | ## 📔 Citation & Acknowledgements 156 | 157 | If you use EasyLLM, please share it with me on social media or email. I would love to hear about it! 158 | You can also cite the project using the following BibTeX: 159 | 160 | ```bash 161 | @software{Philipp_Schmid_EasyLLM_2023, 162 | author = {Philipp Schmid}, 163 | license = {Apache-2.0}, 164 | month = juj, 165 | title = {EasyLLM: Streamlined Tools for LLMs}, 166 | url = {https://github.com/philschmid/easyllm}, 167 | year = {2023} 168 | } 169 | ``` 170 | -------------------------------------------------------------------------------- /docs/clients/bedrock.md: -------------------------------------------------------------------------------- 1 | # Amazon Bedrock 2 | 3 | EasyLLM provides a client for interfacing with Amazon Bedrock models. 4 | 5 | - `bedrock.ChatCompletion` - a client for interfacing with Bedrock models that are compatible with the OpenAI ChatCompletion API. 6 | - `bedrock.Completion` - a client for interfacing with Bedrock models that are compatible with the OpenAI Completion API. 7 | - `bedrock.Embedding` - a client for interfacing with Bedrock models that are compatible with the OpenAI Embedding API. 8 | 9 | ## `bedrock.ChatCompletion` 10 | 11 | The `bedrock.ChatCompletion` client is used to interface with Bedrock models running on Text Generation inference that are compatible with the OpenAI ChatCompletion API. Checkout the [Examples](../examples/bedrock-chat-completion-api) 12 | 13 | 14 | ```python 15 | import os 16 | # set env for prompt builder 17 | os.environ["BEDROCK_PROMPT"] = "anthropic" # vicuna, wizardlm, stablebeluga, open_assistant 18 | os.environ["AWS_REGION"] = "us-east-1" # change to your region 19 | # os.environ["AWS_ACCESS_KEY_ID"] = "XXX" # needed if not using boto3 session 20 | # os.environ["AWS_SECRET_ACCESS_KEY"] = "XXX" # needed if not using boto3 session 21 | 22 | from easyllm.clients import bedrock 23 | 24 | response = bedrock.ChatCompletion.create( 25 | model="anthropic.claude-v2", 26 | messages=[ 27 | {"role": "user", "content": "What is 2 + 2?"}, 28 | ], 29 | temperature=0.9, 30 | top_p=0.6, 31 | max_tokens=1024, 32 | debug=False, 33 | ) 34 | ``` 35 | 36 | 37 | Supported parameters are: 38 | 39 | * `model` - The model to use for the completion. If not provided, defaults to the base url. 40 | * `messages` - `List[ChatMessage]` to use for the completion. 41 | * `temperature` - The temperature to use for the completion. Defaults to 0.9. 42 | * `top_p` - The top_p to use for the completion. Defaults to 0.6. 43 | * `top_k` - The top_k to use for the completion. Defaults to 10. 44 | * `n` - The number of completions to generate. Defaults to 1. 45 | * `max_tokens` - The maximum number of tokens to generate. Defaults to 1024. 46 | * `stop` - The stop sequence(s) to use for the completion. Defaults to None. 47 | * `stream` - Whether to stream the completion. Defaults to False. 48 | * `debug` - Whether to enable debug logging. Defaults to False. 49 | 50 | 51 | ### Build Prompt 52 | 53 | By default the `bedrock` client will try to read the `BEDROCK_PROMPT` environment variable and tries to map the value to the `PROMPT_MAPPING` dictionary. If this is not set, it will use the default prompt builder. 54 | You can also set it manually. 55 | 56 | Checkout the [Prompt Utils](../prompt_utils) for more details. 57 | 58 | 59 | manually setting the prompt builder: 60 | 61 | ```python 62 | from easyllm.clients import bedrock 63 | 64 | bedrock.prompt_builder = "anthropic" 65 | 66 | res = bedrock.ChatCompletion.create(...) 67 | ``` 68 | 69 | Using environment variable: 70 | 71 | ```python 72 | # can happen elsehwere 73 | import os 74 | os.environ["BEDROCK_PROMPT"] = "anthropic" 75 | 76 | from easyllm.clients import bedrock 77 | ``` -------------------------------------------------------------------------------- /docs/clients/huggingface.md: -------------------------------------------------------------------------------- 1 | # Hugging Face 2 | 3 | EasyLLM provides a client for interfacing with HuggingFace models. The client is compatible with the [HuggingFace Inference API](https://huggingface.co/docs/api-inference/index), [Hugging Face Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index) or any Web Service running [Text Generation Inference](https://github.com/huggingface/text-generation-inference) or compatible API endpoints. 4 | 5 | - `huggingface.ChatCompletion` - a client for interfacing with HuggingFace models that are compatible with the OpenAI ChatCompletion API. 6 | - `huggingface.Completion` - a client for interfacing with HuggingFace models that are compatible with the OpenAI Completion API. 7 | - `huggingface.Embedding` - a client for interfacing with HuggingFace models that are compatible with the OpenAI Embedding API. 8 | 9 | ## `huggingface.ChatCompletion` 10 | 11 | The `huggingface.ChatCompletion` client is used to interface with HuggingFace models running on Text Generation inference that are compatible with the OpenAI ChatCompletion API. Checkout the [Examples](../examples/chat-completion-api) for more details and [How to stream completions](../examples/stream-chat-completion-api) for an example how to stream requests. 12 | 13 | 14 | ```python 15 | from easyllm.clients import huggingface 16 | 17 | # The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file. 18 | # huggingface.api_key="hf_xxx" 19 | huggingface.prompt_builder = "llama2" 20 | 21 | response = huggingface.ChatCompletion.create( 22 | model="meta-llama/Llama-2-70b-chat-hf", 23 | messages=[ 24 | {"role": "system", "content": "\nYou are a helpful, respectful and honest assistant."}, 25 | {"role": "user", "content": "Knock knock."}, 26 | ], 27 | temperature=0.9, 28 | top_p=0.6, 29 | max_tokens=1024, 30 | ) 31 | ``` 32 | 33 | 34 | Supported parameters are: 35 | 36 | * `model` - The model to use for the completion. If not provided, defaults to the base url. 37 | * `messages` - `List[ChatMessage]` to use for the completion. 38 | * `temperature` - The temperature to use for the completion. Defaults to 0.9. 39 | * `top_p` - The top_p to use for the completion. Defaults to 0.6. 40 | * `top_k` - The top_k to use for the completion. Defaults to 10. 41 | * `n` - The number of completions to generate. Defaults to 1. 42 | * `max_tokens` - The maximum number of tokens to generate. Defaults to 1024. 43 | * `stop` - The stop sequence(s) to use for the completion. Defaults to None. 44 | * `stream` - Whether to stream the completion. Defaults to False. 45 | * `frequency_penalty` - The frequency penalty to use for the completion. Defaults to 1.0. 46 | * `debug` - Whether to enable debug logging. Defaults to False. 47 | 48 | ## `huggingface.Completion` 49 | 50 | The `huggingface.Completion` client is used to interface with HuggingFace models running on Text Generation inference that are compatible with the OpenAI Completion API. Checkout the [Examples](../examples/text-completion-api) for more details and [How to stream completions](../examples/stream-text-completion-api) for an example how to stream requests. 51 | 52 | 53 | ```python 54 | from easyllm.clients import huggingface 55 | 56 | # The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file. 57 | # huggingface.api_key="hf_xxx" 58 | hubbingface.prompt_builder = "llama2" 59 | 60 | response = huggingface.Completion.create( 61 | model="meta-llama/Llama-2-70b-chat-hf", 62 | prompt="What is the meaning of life?", 63 | temperature=0.9, 64 | top_p=0.6, 65 | max_tokens=1024, 66 | ) 67 | ``` 68 | 69 | 70 | Supported parameters are: 71 | 72 | * `model` - The model to use for the completion. If not provided, defaults to the base url. 73 | * `prompt` - Text to use for the completion, if prompt_builder is set, prompt will be formatted with the prompt_builder. 74 | * `temperature` - The temperature to use for the completion. Defaults to 0.9. 75 | * `top_p` - The top_p to use for the completion. Defaults to 0.6. 76 | * `top_k` - The top_k to use for the completion. Defaults to 10. 77 | * `n` - The number of completions to generate. Defaults to 1. 78 | * `max_tokens` - The maximum number of tokens to generate. Defaults to 1024. 79 | * `stop` - The stop sequence(s) to use for the completion. Defaults to None. 80 | * `stream` - Whether to stream the completion. Defaults to False. 81 | * `frequency_penalty` - The frequency penalty to use for the completion. Defaults to 1.0. 82 | * `debug` - Whether to enable debug logging. Defaults to False. 83 | * `echo` - Whether to echo the prompt. Defaults to False. 84 | * `logprobs` - Weather to return logprobs. Defaults to None. 85 | 86 | 87 | ## `huggingface.Embedding` 88 | 89 | The `huggingface.Embedding` client is used to interface with HuggingFace models running as an API that are compatible with the OpenAI Embedding API. Checkout the [Examples](../examples/get-embeddings) for more details. 90 | 91 | ```python 92 | from easyllm.clients import huggingface 93 | 94 | # The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file. 95 | # huggingface.api_key="hf_xxx" 96 | 97 | embedding = huggingface.Embedding.create( 98 | model="sentence-transformers/all-MiniLM-L6-v2", 99 | text="What is the meaning of life?", 100 | ) 101 | 102 | len(embedding["data"][0]["embedding"]) 103 | ``` 104 | 105 | Supported parameters are: 106 | 107 | * `model` - The model to use to create the embedding. If not provided, defaults to the base url. 108 | * `input` - `Union[str, List[str]]` document(s) to embed. 109 | 110 | 111 | ## Environment Configuration 112 | 113 | You can configure the `huggingface` client by setting environment variables or overwriting the default values. See below on how to adjust the HF token, url and prompt builder. 114 | 115 | ### Setting HF token 116 | 117 | By default the `huggingface` client will try to read the `HUGGINGFACE_TOKEN` environment variable. If this is not set, it will try to read the token from the `~/.huggingface` folder. If this is not set, it will not use a token. 118 | 119 | Alternatively you can set the token manually by setting `huggingface.api_key`. 120 | 121 | 122 | manually setting the api key: 123 | 124 | ```python 125 | from easyllm.clients import huggingface 126 | 127 | huggingface.api_key="hf_xxx" 128 | 129 | res = huggingface.ChatCompletion.create(...) 130 | ``` 131 | 132 | Using environment variable: 133 | 134 | ```python 135 | # can happen elsehwere 136 | import os 137 | os.environ["HUGGINGFACE_TOKEN"] = "hf_xxx" 138 | 139 | from easyllm.clients import huggingface 140 | ``` 141 | 142 | 143 | ### Changing url 144 | 145 | By default the `huggingface` client will try to read the `HUGGINGFACE_API_BASE` environment variable. If this is not set, it will use the default url `https://api-inference.huggingface.co/models`. This is helpful if you want to use a different url like `https://zj5lt7pmzqzbp0d1.us-east-1.aws.endpoints.huggingface.cloud` or a local url like `http://localhost:8000` or an Hugging Face Inference Endpoint. 146 | 147 | Alternatively you can set the url manually by setting `huggingface.api_base`. If you set a custom you have to leave the `model` parameter empty. 148 | 149 | manually setting the api base: 150 | 151 | ```python 152 | from easyllm.clients import huggingface 153 | 154 | huggingface.api_base="https://my-url" 155 | 156 | 157 | res = huggingface.ChatCompletion.create(...) 158 | ``` 159 | 160 | Using environment variable: 161 | 162 | ```python 163 | # can happen elsehwere 164 | import os 165 | os.environ["HUGGINGFACE_API_BASE"] = "https://my-url" 166 | 167 | from easyllm.clients import huggingface 168 | ``` 169 | 170 | 171 | 172 | 173 | ### Build Prompt 174 | 175 | By default the `huggingface` client will try to read the `HUGGINGFACE_PROMPT` environment variable and tries to map the value to the `PROMPT_MAPPING` dictionary. If this is not set, it will use the default prompt builder. 176 | You can also set it manually. 177 | 178 | Checkout the [Prompt Utils](../prompt_utils) for more details. 179 | 180 | 181 | manually setting the prompt builder: 182 | 183 | ```python 184 | from easyllm.clients import huggingface 185 | 186 | huggingface.prompt_builder = "llama2" 187 | 188 | res = huggingface.ChatCompletion.create(...) 189 | ``` 190 | 191 | Using environment variable: 192 | 193 | ```python 194 | # can happen elsehwere 195 | import os 196 | os.environ["HUGGINGFACE_PROMPT"] = "llama2" 197 | 198 | from easyllm.clients import huggingface 199 | ``` -------------------------------------------------------------------------------- /docs/clients/index.md: -------------------------------------------------------------------------------- 1 | # Clients 2 | 3 | In the context of EasyLLM, a "client" refers to the code that interfaces with a particular LLM API, e.g. OpenAI. 4 | 5 | Currently supported clients are: 6 | 7 | - `ChatCompletion` - ChatCompletion clients are used to interface with LLMs that are compatible with the OpenAI ChatCompletion API. 8 | - `Completion` - Completion clients are used to interface with LLMs that are compatible with the OpenAI Completion API. 9 | - `Embedding` - Embedding clients are used to interface with LLMs that are compatible with the OpenAI Embedding API. 10 | 11 | Currently supported clients are: 12 | 13 | ## Hugging Face 14 | 15 | - [huggingface.ChatCompletion](huggingface/#huggingfacechatcompletion) - a client for interfacing with HuggingFace models that are compatible with the OpenAI ChatCompletion API. 16 | - [huggingface.Completion](huggingface/#huggingfacechatcompletion) - a client for interfacing with HuggingFace models that are compatible with the OpenAI Completion API. 17 | - [huggingface.Embedding](huggingface/#huggingfacechatcompletion) - a client for interfacing with HuggingFace models that are compatible with the OpenAI Embedding API. 18 | 19 | ## Amazon SageMaker 20 | 21 | - [sagemaker.ChatCompletion](sagemaker/#sagemakerchatcompletion) - a client for interfacing with Amazon SageMaker models that are compatible with the OpenAI ChatCompletion API. 22 | - [sagemaker.Completion](sagemaker/#sagemakercompletion) - a client for interfacing with Amazon SageMaker models that are compatible with the OpenAI Completion API. 23 | - [sagemaker.Embedding](sagemaker/#sagemakerembedding) - a client for interfacing with Amazon SageMaker models that are compatible with the OpenAI Embedding API. 24 | 25 | ## Amazon Bedrock 26 | 27 | - [bedrock.ChatCompletion](bedrock/#bedrockchatcompletion) - a client for interfacing with Amazon Bedrock models that are compatible with the OpenAI ChatCompletion API. 28 | -------------------------------------------------------------------------------- /docs/clients/sagemaker.md: -------------------------------------------------------------------------------- 1 | # Amazon SageMaker 2 | 3 | EasyLLM provides a client for interfacing with Amazon SageMaker models. 4 | 5 | - `sagemaker.ChatCompletion` - a client for interfacing with sagemaker models that are compatible with the OpenAI ChatCompletion API. 6 | - `sagemaker.Completion` - a client for interfacing with sagemaker models that are compatible with the OpenAI Completion API. 7 | - `sagemaker.Embedding` - a client for interfacing with sagemaker models that are compatible with the OpenAI Embedding API. 8 | 9 | ## `sagemaker.ChatCompletion` 10 | 11 | The `sagemaker.ChatCompletion` client is used to interface with sagemaker models running on Text Generation inference that are compatible with the OpenAI ChatCompletion API. Checkout the [Examples](../examples/sagemaker-chat-completion-api) 12 | 13 | 14 | ```python 15 | import os 16 | from easyllm.clients import sagemaker 17 | 18 | # set env for prompt builder 19 | os.environ["HUGGINGFACE_PROMPT"] = "llama2" # vicuna, wizardlm, stablebeluga, open_assistant 20 | os.environ["AWS_REGION"] = "us-east-1" # change to your region 21 | # os.environ["AWS_ACCESS_KEY_ID"] = "XXX" # needed if not using boto3 session 22 | # os.environ["AWS_SECRET_ACCESS_KEY"] = "XXX" # needed if not using boto3 session 23 | 24 | 25 | response = sagemaker.ChatCompletion.create( 26 | model="huggingface-pytorch-tgi-inference-2023-08-08-14-15-52-703", 27 | messages=[ 28 | {"role": "system", "content": "\nYou are a helpful, respectful and honest assistant."}, 29 | {"role": "user", "content": "Knock knock."}, 30 | ], 31 | temperature=0.9, 32 | top_p=0.6, 33 | max_tokens=1024, 34 | ) 35 | ``` 36 | 37 | 38 | Supported parameters are: 39 | 40 | * `model` - The model to use for the completion. If not provided, defaults to the base url. 41 | * `messages` - `List[ChatMessage]` to use for the completion. 42 | * `temperature` - The temperature to use for the completion. Defaults to 0.9. 43 | * `top_p` - The top_p to use for the completion. Defaults to 0.6. 44 | * `top_k` - The top_k to use for the completion. Defaults to 10. 45 | * `n` - The number of completions to generate. Defaults to 1. 46 | * `max_tokens` - The maximum number of tokens to generate. Defaults to 1024. 47 | * `stop` - The stop sequence(s) to use for the completion. Defaults to None. 48 | * `stream` - Whether to stream the completion. Defaults to False. 49 | * `frequency_penalty` - The frequency penalty to use for the completion. Defaults to 1.0. 50 | * `debug` - Whether to enable debug logging. Defaults to False. 51 | 52 | ## `sagemaker.Completion` 53 | 54 | The `sagemaker.Completion` client is used to interface with sagemaker models running on Text Generation inference that are compatible with the OpenAI Completion API. Checkout the [Examples](../examples/sagemaker-text-completion-api). 55 | 56 | 57 | ```python 58 | import os 59 | from easyllm.clients import sagemaker 60 | 61 | # set env for prompt builder 62 | os.environ["HUGGINGFACE_PROMPT"] = "llama2" # vicuna, wizardlm, stablebeluga, open_assistant 63 | os.environ["AWS_REGION"] = "us-east-1" # change to your region 64 | # os.environ["AWS_ACCESS_KEY_ID"] = "XXX" # needed if not using boto3 session 65 | # os.environ["AWS_SECRET_ACCESS_KEY"] = "XXX" # needed if not using boto3 session 66 | 67 | response = sagemaker.Completion.create( 68 | model="meta-llama/Llama-2-70b-chat-hf", 69 | prompt="What is the meaning of life?", 70 | temperature=0.9, 71 | top_p=0.6, 72 | max_tokens=1024, 73 | ) 74 | ``` 75 | 76 | 77 | Supported parameters are: 78 | 79 | * `model` - The model to use for the completion. If not provided, defaults to the base url. 80 | * `prompt` - Text to use for the completion, if prompt_builder is set, prompt will be formatted with the prompt_builder. 81 | * `temperature` - The temperature to use for the completion. Defaults to 0.9. 82 | * `top_p` - The top_p to use for the completion. Defaults to 0.6. 83 | * `top_k` - The top_k to use for the completion. Defaults to 10. 84 | * `n` - The number of completions to generate. Defaults to 1. 85 | * `max_tokens` - The maximum number of tokens to generate. Defaults to 1024. 86 | * `stop` - The stop sequence(s) to use for the completion. Defaults to None. 87 | * `stream` - Whether to stream the completion. Defaults to False. 88 | * `frequency_penalty` - The frequency penalty to use for the completion. Defaults to 1.0. 89 | * `debug` - Whether to enable debug logging. Defaults to False. 90 | * `echo` - Whether to echo the prompt. Defaults to False. 91 | * `logprobs` - Weather to return logprobs. Defaults to None. 92 | 93 | 94 | ## `sagemaker.Embedding` 95 | 96 | The `sagemaker.Embedding` client is used to interface with sagemaker models running as an API that are compatible with the OpenAI Embedding API. Checkout the [Examples](../examples/sagemaker-get-embeddings) for more details. 97 | 98 | ```python 99 | import os 100 | # set env for prompt builder 101 | os.environ["HUGGINGFACE_PROMPT"] = "llama2" # vicuna, wizardlm, stablebeluga, open_assistant 102 | os.environ["AWS_REGION"] = "us-east-1" # change to your region 103 | # os.environ["AWS_ACCESS_KEY_ID"] = "XXX" # needed if not using boto3 session 104 | # os.environ["AWS_SECRET_ACCESS_KEY"] = "XXX" # needed if not using boto3 session 105 | 106 | from easyllm.clients import sagemaker 107 | 108 | embedding = sagemaker.Embedding.create( 109 | model="SageMakerModelEmbeddingEndpoint24E49D09-64prhjuiWUtE", 110 | input="That's a nice car.", 111 | ) 112 | 113 | len(embedding["data"][0]["embedding"]) 114 | ``` 115 | 116 | Supported parameters are: 117 | 118 | * `model` - The model to use to create the embedding. If not provided, defaults to the base url. 119 | * `input` - `Union[str, List[str]]` document(s) to embed. 120 | 121 | 122 | ## Environment Configuration 123 | 124 | You can configure the `sagemaker` client by setting environment variables or overwriting the default values. See below on how to adjust the HF token, url and prompt builder. 125 | 126 | ### Setting Credentials 127 | 128 | By default the `sagemaker` client will try to read the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variable. If this is not set, it will try to use `boto3`. 129 | 130 | Alternatively you can set the token manually by setting `sagemaker.*`. 131 | 132 | manually setting the api key: 133 | 134 | ```python 135 | from easyllm.clients import sagemaker 136 | 137 | sagemaker.api_aws_access_key="xxx" 138 | sagemaker.api_aws_secret_key="xxx" 139 | 140 | res = sagemaker.ChatCompletion.create(...) 141 | ``` 142 | 143 | Using environment variable: 144 | 145 | ```python 146 | # can happen elsehwere 147 | import os 148 | os.environ["AWS_ACCESS_KEY_ID"] = "xxx" 149 | os.environ["AWS_SECRET_ACCESS_KEY"] = "xxx" 150 | 151 | from easyllm.clients import sagemaker 152 | ``` 153 | 154 | 155 | ### Build Prompt 156 | 157 | By default the `sagemaker` client will try to read the `sagemaker_PROMPT` environment variable and tries to map the value to the `PROMPT_MAPPING` dictionary. If this is not set, it will use the default prompt builder. 158 | You can also set it manually. 159 | 160 | Checkout the [Prompt Utils](../prompt_utils) for more details. 161 | 162 | 163 | manually setting the prompt builder: 164 | 165 | ```python 166 | from easyllm.clients import sagemaker 167 | 168 | sagemaker.prompt_builder = "llama2" 169 | 170 | res = sagemaker.ChatCompletion.create(...) 171 | ``` 172 | 173 | Using environment variable: 174 | 175 | ```python 176 | # can happen elsehwere 177 | import os 178 | os.environ["HUGGINGFACE_PROMPT"] = "llama2" 179 | 180 | from easyllm.clients import sagemaker 181 | ``` -------------------------------------------------------------------------------- /docs/examples/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philschmid/easyllm/1f37a9307d68545f41639f392a301baadda7188a/docs/examples/.gitkeep -------------------------------------------------------------------------------- /docs/examples/index.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | Here are some examples to help you get started with the easyllm library: 4 | 5 | ## Hugging Face 6 | 7 | | Example | Description | 8 | | ----------------------------------------------------------------------- | -------------------------------------------------------------------------------------- | 9 | | [Detailed ChatCompletion Example](chat-completion-api) | Shows how to use the ChatCompletion API to have a conversational chat with the model. | 10 | | [Detailed Completion Example](text-completion-api) | Uses the TextCompletion API to generate text with the model. | 11 | | [Create Embeddings](get-embeddings) | Embeds text into vector representations using the model. | 12 | | [Example how to stream chat requests](stream-chat-completions) | Demonstrates streaming multiple chat requests to efficiently chat with the model. | 13 | | [Example how to stream text requests](stream-text-completions) | Shows how to stream multiple text completion requests. | 14 | | [Hugging Face Inference Endpoints Example](inference-endpoints-example) | Example on how to use custom endpoints, e.g. Inference Endpoints or localhost. | 15 | | [Retrieval Augmented Generation using Llama 2](llama2-rag-example) | Example on how to use Llama 2 70B for in-context retrival augmentation | 16 | | [Llama 2 70B Agent/Tool use example ](llama2-agent-example) | Example on how to use Llama 2 70B to interace with tools and could be used as an agent | 17 | 18 | The examples cover the main functionality of the library - chat, text completion, and embeddings. Let me know if you would like me to modify or expand the index page in any way. 19 | 20 | ## Amazon SageMaker 21 | 22 | | Example | Description | 23 | | ---------------------------------------------------------------- | ------------------------------------------------------------------------------------- | 24 | | [Detailed ChatCompletion Example](sagemaker-chat-completion-api) | Shows how to use the ChatCompletion API to have a conversational chat with the model. | 25 | | [Detailed Completion Example](sagemaker-text-completion-api) | Uses the TextCompletion API to generate text with the model. | 26 | | [Create Embeddings](sagemaker-get-embeddings) | Embeds text into vector representations using the model. | 27 | 28 | ## Amazon Bedrock 29 | 30 | | Example | Description | 31 | | ---------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | 32 | | [Detailed ChatCompletion Example](bedrock-chat-completion-api) | Shows how to use the ChatCompletion API to have a conversational chat with the model. | 33 | | [Example how to stream chat requests](bedrock-stream-chat-completions) | Demonstrates streaming multiple chat requests to efficiently chat with the model. | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # EasyLLM 2 | 3 | EasyLLM is an open source project that provides helpful tools and methods for working with large language models (LLMs), both open source and closed source. 4 | 5 | EasyLLM implements clients that are compatible with OpenAI's Completion API. This means you can easily replace `openai.ChatCompletion` with, for example, `huggingface.ChatCompletion`. 6 | 7 | * [ChatCompletion Clients](./clients) 8 | * [Prompt Utils](./prompt_utils) 9 | * [Examples](./examples) 10 | 11 | ## 🚀 Getting Started 12 | 13 | Install EasyLLM via pip: 14 | 15 | ```bash 16 | pip install easyllm 17 | ``` 18 | 19 | Then import and start using the clients: 20 | 21 | ```python 22 | 23 | from easyllm.clients import huggingface 24 | 25 | # helper to build llama2 prompt 26 | huggingface.prompt_builder = "llama2" 27 | 28 | response = huggingface.ChatCompletion.create( 29 | model="meta-llama/Llama-2-70b-chat-hf", 30 | messages=[ 31 | {"role": "system", "content": "\nYou are a helpful assistant speaking like a pirate. argh!"}, 32 | {"role": "user", "content": "What is the sun?"}, 33 | ], 34 | temperature=0.9, 35 | top_p=0.6, 36 | max_tokens=256, 37 | ) 38 | 39 | print(response) 40 | ``` 41 | the result will look like 42 | 43 | ```bash 44 | { 45 | "id": "hf-lVC2iTMkFJ", 46 | "object": "chat.completion", 47 | "created": 1690661144, 48 | "model": "meta-llama/Llama-2-70b-chat-hf", 49 | "choices": [ 50 | { 51 | "index": 0, 52 | "message": { 53 | "role": "assistant", 54 | "content": " Arrrr, the sun be a big ol' ball o' fire in the sky, me hearty! It be the source o' light and warmth for our fair planet, and it be a mighty powerful force, savvy? Without the sun, we'd be sailin' through the darkness, lost and cold, so let's give a hearty \"Yarrr!\" for the sun, me hearties! Arrrr!" 55 | }, 56 | "finish_reason": null 57 | } 58 | ], 59 | "usage": { 60 | "prompt_tokens": 111, 61 | "completion_tokens": 299, 62 | "total_tokens": 410 63 | } 64 | } 65 | ``` 66 | 67 | Check out other examples: 68 | 69 | * [Detailed ChatCompletion Example](examples/chat-completion-api) 70 | * [Example how to stream chat requests](examples/stream-chat-completion) 71 | * [Example how to stream text requests](examples/stream-text-completion) 72 | * [Detailed Completion Example](examples/text-completion-api) 73 | * [Create Embeddings](examples/get-embeddings) 74 | 75 | 76 | ## 💪🏻 Migration from OpenAI to HuggingFace 77 | 78 | Migrating from OpenAI to HuggingFace is easy. Just change the import statement and the client you want to use and optionally the prompt builder. 79 | 80 | ```diff 81 | - import openai 82 | + from easyllm.clients import huggingface 83 | + huggingface.prompt_builder = "llama2" 84 | 85 | 86 | - response = openai.ChatCompletion.create( 87 | + response = huggingface.ChatCompletion.create( 88 | - model="gpt-3.5-turbo", 89 | + model="meta-llama/Llama-2-70b-chat-hf", 90 | messages=[ 91 | {"role": "system", "content": "You are a helpful assistant."}, 92 | {"role": "user", "content": "Knock knock."}, 93 | ], 94 | ) 95 | ``` 96 | 97 | Make sure when you switch your client that your hyperparameters are still valid. For example, `temperature` of GPT-3 might be different than `temperature` of `Llama-2`. 98 | 99 | ## ☑️ Key Features 100 | 101 | ### 🤝 Compatible Clients 102 | 103 | - Implementation of clients compatible with OpenAI API format of `openai.ChatCompletion`. 104 | - Easily switch between different LLMs like `openai.ChatCompletion` and `huggingface.ChatCompletion` by changing one line of code. 105 | - Support for streaming of completions, checkout example [How to stream completions](examples/stream-chat-completions). 106 | 107 | ### ⚙️ Helper Modules ⚙️ 108 | 109 | - `evol_instruct` (work in progress) - Use evolutionary algorithms create instructions for LLMs. 110 | 111 | - `prompt_utils` - Helper methods to easily convert between prompt formats like OpenAI Messages to prompts for open source models like Llama 2. 112 | 113 | ## 📔 Citation & Acknowledgements 114 | 115 | If you use EasyLLM, please share it with me on social media or email. I would love to hear about it! 116 | You can also cite the project using the following BibTeX: 117 | 118 | ```bash 119 | @software{Philipp_Schmid_EasyLLM_2023, 120 | author = {Philipp Schmid}, 121 | license = {Apache-2.0}, 122 | month = juj, 123 | title = {EasyLLM: Streamlined Tools for LLMs}, 124 | url = {https://github.com/philschmid/easyllm}, 125 | year = {2023} 126 | } 127 | ``` 128 | 129 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | 2 | # Installation 3 | 4 | ## with pip recommended 5 | 6 | EasyLLM is published as a [Python package] and can be installed with 7 | `pip` from pypi or from the Github repository, Open up a terminal and install. 8 | === "Latest" 9 | 10 | ``` sh 11 | pip install easyllm 12 | ``` 13 | === "Github" 14 | 15 | ``` sh 16 | pip install git+https://github.com/philschmid/easyllm 17 | ``` -------------------------------------------------------------------------------- /docs/prompt_utils.md: -------------------------------------------------------------------------------- 1 | # Prompt utilities 2 | 3 | The `prompt_utils` module contains functions to assist with converting Message's Dictionaries into prompts that can be used with `ChatCompletion` clients. 4 | 5 | Supported prompt formats: 6 | 7 | - [Prompt utilities](#prompt-utilities) 8 | - [Set prompt builder for client](#set-prompt-builder-for-client) 9 | - [Llama 2 Chat builder](#llama-2-chat-builder) 10 | - [Vicuna Chat builder](#vicuna-chat-builder) 11 | - [Hugging Face ChatML builder](#hugging-face-chatml-builder) 12 | - [StarChat](#starchat) 13 | - [Falcon](#falcon) 14 | - [WizardLM Chat builder](#wizardlm-chat-builder) 15 | - [StableBeluga2 Chat builder](#stablebeluga2-chat-builder) 16 | - [Open Assistant Chat builder](#open-assistant-chat-builder) 17 | - [Anthropic Claude Chat builder](#anthropic-claude-chat-builder) 18 | 19 | Prompt utils are also exporting a mapping dictionary `PROMPT_MAPPING` that maps a model name to a prompt builder function. This can be used to select the correct prompt builder function via an environment variable. 20 | 21 | ```python 22 | PROMPT_MAPPING = { 23 | "chatml_falcon": build_chatml_falcon_prompt, 24 | "chatml_starchat": build_chatml_starchat_prompt, 25 | "llama2": build_llama2_prompt, 26 | "open_assistant": build_open_assistant_prompt, 27 | "stablebeluga": build_stablebeluga_prompt, 28 | "vicuna": build_vicuna_prompt, 29 | "wizardlm": build_wizardlm_prompt, 30 | } 31 | ``` 32 | 33 | ## Set prompt builder for client 34 | 35 | ```python 36 | from easyllm.clients import huggingface 37 | 38 | huggingface.prompt_builder = "llama2" # vicuna, chatml_falcon, chatml_starchat, wizardlm, stablebeluga, open_assistant 39 | ``` 40 | 41 | ## Llama 2 Chat builder 42 | 43 | Creates LLama 2 chat prompt for chat conversations. Learn more in the [Hugging Face Blog on how to prompt Llama 2](https://huggingface.co/blog/llama2#how-to-prompt-llama-2). If a `Message` with an unsupported `role` is passed, an error will be thrown. 44 | 45 | Example Models: 46 | 47 | * [meta-llama/Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) 48 | 49 | ```python 50 | from easyllm.prompt_utils import build_llama2_prompt 51 | 52 | messages=[ 53 | {"role": "system", "content": "You are a helpful assistant."}, 54 | {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."}, 55 | ] 56 | prompt = build_llama2_prompt(messages) 57 | ``` 58 | 59 | 60 | ## Vicuna Chat builder 61 | 62 | Creats a Vicuna prompt for a chat conversation. If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://github.com/lm-sys/FastChat/blob/main/docs/vicuna_weights_version.md#prompt-template) 63 | 64 | Example Models: 65 | 66 | * [ehartford/WizardLM-13B-V1.0-Uncensored](https://huggingface.co/ehartford/WizardLM-13B-V1.0-Uncensored) 67 | 68 | 69 | ```python 70 | from easyllm.prompt_utils import build_vicuna_prompt 71 | 72 | messages=[ 73 | {"role": "system", "content": "You are a helpful assistant."}, 74 | {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."}, 75 | ] 76 | prompt = build_vicuna_prompt(messages) 77 | ``` 78 | 79 | ## Hugging Face ChatML builder 80 | 81 | Creates a Hugging Face ChatML prompt for a chat conversation. The Hugging Face ChatML has different prompts for different Example Models, e.g. StarChat or Falcon. If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://huggingface.co/HuggingFaceH4/starchat-beta) 82 | 83 | Example Models: 84 | * [HuggingFaceH4/starchat-beta](https://huggingface.co/HuggingFaceH4/starchat-beta) 85 | 86 | ### StarChat 87 | 88 | ```python 89 | from easyllm.prompt_utils import build_chatml_starchat_prompt 90 | 91 | messages=[ 92 | {"role": "system", "content": "You are a helpful assistant."}, 93 | {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."}, 94 | ] 95 | prompt = build_chatml_starchat_prompt(messages) 96 | ``` 97 | 98 | ### Falcon 99 | 100 | ```python 101 | from easyllm.prompt_utils import build_chatml_falcon_prompt 102 | 103 | messages=[ 104 | {"role": "system", "content": "You are a helpful assistant."}, 105 | {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."}, 106 | ] 107 | prompt = build_chatml_falcon_prompt(messages) 108 | ``` 109 | 110 | ## WizardLM Chat builder 111 | 112 | Creates a WizardLM prompt for a chat conversation. If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://github.com/nlpxucan/WizardLM/blob/main/WizardLM/src/infer_wizardlm13b.py#L79) 113 | 114 | Example Models: 115 | 116 | * [WizardLM/WizardLM-13B-V1.2](https://huggingface.co/WizardLM/WizardLM-13B-V1.2) 117 | 118 | ```python 119 | from easyllm.prompt_utils import build_wizardlm_prompt 120 | 121 | messages=[ 122 | {"role": "system", "content": "You are a helpful assistant."}, 123 | {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."}, 124 | ] 125 | prompt = build_wizardlm_prompt(messages) 126 | ``` 127 | 128 | ## StableBeluga2 Chat builder 129 | 130 | Creates StableBeluga2 prompt for a chat conversation. If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://huggingface.co/stabilityai/StableBeluga2) 131 | 132 | ```python 133 | from easyllm.prompt_utils import build_stablebeluga_prompt 134 | 135 | messages=[ 136 | {"role": "system", "content": "You are a helpful assistant."}, 137 | {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."}, 138 | ] 139 | prompt = build_stablebeluga_prompt(messages) 140 | ``` 141 | 142 | ## Open Assistant Chat builder 143 | 144 | Creates Open Assistant ChatML template. Uses `<|prompter|>`, ``, `<|system|>`, and `<|assistant>` tokens. If a . If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://huggingface.co/OpenAssistant/llama2-13b-orca-8k-33192) 145 | 146 | Example Models: 147 | 148 | * [OpenAssistant/llama2-13b-orca-8k-3319](https://huggingface.co/OpenAssistant/llama2-13b-orca-8k-33192) 149 | 150 | ```python 151 | from easyllm.prompt_utils import build_open_assistant_prompt 152 | 153 | messages=[ 154 | {"role": "system", "content": "You are a helpful assistant."}, 155 | {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."}, 156 | ] 157 | prompt = build_open_assistant_prompt(messages) 158 | ``` 159 | 160 | ## Anthropic Claude Chat builder 161 | 162 | Creates Anthropic Claude template. Uses `\n\nHuman:`, `\n\nAssistant:`. If a . If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://docs.anthropic.com/claude/docs/introduction-to-prompt-design) 163 | 164 | Example Models: 165 | 166 | * [Bedrock](https://aws.amazon.com/bedrock/claude/) 167 | 168 | ```python 169 | from easyllm.prompt_utils import build_anthropic_prompt 170 | 171 | messages=[ 172 | {"role": "system", "content": "You are a helpful assistant."}, 173 | {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."}, 174 | ] 175 | prompt = build_anthropic_prompt(messages) 176 | ``` 177 | 178 | -------------------------------------------------------------------------------- /easyllm/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023-present philschmid 2 | # 3 | # SPDX-License-Identifier: MIT 4 | __version__ = "0.7.0.dev0" 5 | -------------------------------------------------------------------------------- /easyllm/cli.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | 4 | def parse_args(): 5 | parser = ArgumentParser(description="Description of your program") 6 | parser.add_argument("-f", "--foo", help="Description for foo argument", required=True) 7 | 8 | return parser.parse_args() 9 | 10 | 11 | def main(): 12 | args = parse_args() 13 | print(args) 14 | -------------------------------------------------------------------------------- /easyllm/clients/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philschmid/easyllm/1f37a9307d68545f41639f392a301baadda7188a/easyllm/clients/__init__.py -------------------------------------------------------------------------------- /easyllm/clients/bedrock.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from typing import Any, Dict, List, Optional 5 | 6 | from nanoid import generate 7 | 8 | from easyllm.prompt_utils.base import build_prompt, buildBasePrompt 9 | from easyllm.schema.base import ChatMessage, Usage, dump_object 10 | from easyllm.schema.openai import ( 11 | ChatCompletionRequest, 12 | ChatCompletionResponse, 13 | ChatCompletionResponseChoice, 14 | ChatCompletionResponseStreamChoice, 15 | ChatCompletionStreamResponse, 16 | DeltaMessage, 17 | ) 18 | from easyllm.utils import setup_logger 19 | from easyllm.utils.aws import get_bedrock_client 20 | 21 | logger = setup_logger() 22 | 23 | # default parameters 24 | api_type = "bedrock" 25 | api_aws_access_key = os.environ.get("AWS_ACCESS_KEY_ID", None) 26 | api_aws_secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None) 27 | api_aws_session_token = os.environ.get("AWS_SESSION_TOKEN", None) 28 | 29 | client = get_bedrock_client( 30 | aws_access_key_id=api_aws_access_key, 31 | aws_secret_access_key=api_aws_secret_key, 32 | aws_session_token=api_aws_session_token, 33 | ) 34 | 35 | 36 | SUPPORTED_MODELS = [ 37 | "anthropic.claude-v2", 38 | ] 39 | model_version_mapping = {"anthropic.claude-v2": "bedrock-2023-05-31"} 40 | 41 | api_version = os.environ.get("BEDROCK_API_VERSION", None) or "bedrock-2023-05-31" 42 | prompt_builder = os.environ.get("BEDROCK_PROMPT", None) 43 | stop_sequences = [] 44 | 45 | 46 | def stream_chat_request(client, body, model): 47 | """Utility function for streaming chat requests.""" 48 | id = f"hf-{generate(size=10)}" 49 | response = client.invoke_model_with_response_stream( 50 | body=json.dumps(body), modelId=model, accept="application/json", contentType="application/json" 51 | ) 52 | stream = response.get("body") 53 | 54 | yield dump_object( 55 | ChatCompletionStreamResponse( 56 | id=id, 57 | model=model, 58 | choices=[ChatCompletionResponseStreamChoice(index=0, delta=DeltaMessage(role="assistant"))], 59 | ) 60 | ) 61 | # yield each generated token 62 | reason = None 63 | for _idx, event in enumerate(stream): 64 | chunk = event.get("chunk") 65 | if chunk: 66 | chunk_obj = json.loads(chunk.get("bytes").decode()) 67 | text = chunk_obj["completion"] 68 | yield dump_object( 69 | ChatCompletionStreamResponse( 70 | id=id, 71 | model=model, 72 | choices=[ChatCompletionResponseStreamChoice(index=0, delta=DeltaMessage(content=text))], 73 | ) 74 | ) 75 | yield dump_object( 76 | ChatCompletionStreamResponse( 77 | id=id, 78 | model=model, 79 | choices=[ChatCompletionResponseStreamChoice(index=0, finish_reason=reason, delta={})], 80 | ) 81 | ) 82 | 83 | 84 | class ChatCompletion: 85 | @staticmethod 86 | def create( 87 | messages: List[ChatMessage], 88 | model: Optional[str] = None, 89 | temperature: float = 0.9, 90 | top_p: float = 0.6, 91 | top_k: Optional[int] = 10, 92 | n: int = 1, 93 | max_tokens: int = 1024, 94 | stop: Optional[List[str]] = None, 95 | stream: bool = False, 96 | frequency_penalty: Optional[float] = 1.0, 97 | debug: bool = False, 98 | ) -> Dict[str, Any]: 99 | """ 100 | Creates a new chat completion for the provided messages and parameters. 101 | 102 | Args: 103 | messages (`List[ChatMessage]`): to use for the completion. 104 | model (`str`, *optional*, defaults to None): The model to use for the completion. If not provided, 105 | defaults to the base url. 106 | temperature (`float`, defaults to 0.9): The temperature to use for the completion. 107 | top_p (`float`, defaults to 0.6): The top_p to use for the completion. 108 | top_k (`int`, *optional*, defaults to 10): The top_k to use for the completion. 109 | n (`int`, defaults to 1): The number of completions to generate. 110 | max_tokens (`int`, defaults to 1024): The maximum number of tokens to generate. 111 | stop (`List[str]`, *optional*, defaults to None): The stop sequence(s) to use for the completion. 112 | stream (`bool`, defaults to False): Whether to stream the completion. 113 | frequency_penalty (`float`, *optional*, defaults to 1.0): The frequency penalty to use for the completion. 114 | debug (`bool`, defaults to False): Whether to enable debug logging. 115 | 116 | Tip: Prompt builder 117 | Make sure to always use a prompt builder for your model. 118 | """ 119 | if debug: 120 | logger.setLevel(logging.DEBUG) 121 | 122 | # validate it model is in model_mapping 123 | if model not in SUPPORTED_MODELS: 124 | raise ValueError(f"Model {model} is not supported. Supported models are: {SUPPORTED_MODELS}") 125 | 126 | request = ChatCompletionRequest( 127 | messages=messages, 128 | model=model, 129 | temperature=temperature, 130 | top_p=top_p, 131 | top_k=top_k, 132 | n=n, 133 | max_tokens=max_tokens, 134 | stop=stop, 135 | stream=stream, 136 | frequency_penalty=frequency_penalty, 137 | ) 138 | 139 | if prompt_builder is None: 140 | logger.warn( 141 | f"""huggingface.prompt_builder is not set. 142 | Using default prompt builder for. Prompt sent to model will be: 143 | ---------------------------------------- 144 | {buildBasePrompt(request.messages)}. 145 | ---------------------------------------- 146 | If you want to use a custom prompt builder, set bedrock.prompt_builder to a function that takes a list of messages and returns a string. 147 | You can also use existing prompt builders by importing them from easyllm.prompt_utils""" 148 | ) 149 | prompt = buildBasePrompt(request.messages) 150 | else: 151 | prompt = build_prompt(request.messages, prompt_builder) 152 | 153 | # create stop sequences 154 | if isinstance(request.stop, list): 155 | stop = stop_sequences + request.stop 156 | elif isinstance(request.stop, str): 157 | stop = stop_sequences + [request.stop] 158 | else: 159 | stop = stop_sequences 160 | logger.debug(f"Stop sequences:\n{stop}") 161 | 162 | # check if we can stream 163 | if request.stream is True and request.n > 1: 164 | raise ValueError("Cannot stream more than one completion") 165 | 166 | # construct body 167 | body = { 168 | "prompt": prompt, 169 | "max_tokens_to_sample": request.max_tokens, 170 | "temperature": request.temperature, 171 | "top_k": request.top_k, 172 | "top_p": request.top_p, 173 | "stop_sequences": stop, 174 | "anthropic_version": model_version_mapping[model], 175 | } 176 | logger.debug(f"Generation body:\n{body}") 177 | 178 | if request.stream: 179 | return stream_chat_request(client, body, model) 180 | else: 181 | choices = [] 182 | generated_tokens = 0 183 | for _i in range(request.n): 184 | response = client.invoke_model( 185 | body=json.dumps(body), modelId=model, accept="application/json", contentType="application/json" 186 | ) 187 | # parse response 188 | res = json.loads(response.get("body").read()) 189 | 190 | # convert to schema 191 | parsed = ChatCompletionResponseChoice( 192 | index=_i, 193 | message=ChatMessage(role="assistant", content=res["completion"].strip()), 194 | finish_reason=res["stop_reason"], 195 | ) 196 | generated_tokens += len(res["completion"].strip()) // 4 197 | choices.append(parsed) 198 | logger.debug(f"Response at index {_i}:\n{parsed}") 199 | # calculate usage details 200 | # TODO: fix when details is fixed 201 | prompt_tokens = int(len(prompt) / 4) 202 | total_tokens = prompt_tokens + generated_tokens 203 | 204 | return dump_object( 205 | ChatCompletionResponse( 206 | model=request.model, 207 | choices=choices, 208 | usage=Usage( 209 | prompt_tokens=prompt_tokens, completion_tokens=generated_tokens, total_tokens=total_tokens 210 | ), 211 | ) 212 | ) 213 | 214 | @classmethod 215 | async def acreate(cls, *args, **kwargs): 216 | """ 217 | Creates a new chat completion for the provided messages and parameters. 218 | """ 219 | raise NotImplementedError("ChatCompletion.acreate is not implemented") 220 | -------------------------------------------------------------------------------- /easyllm/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philschmid/easyllm/1f37a9307d68545f41639f392a301baadda7188a/easyllm/data/__init__.py -------------------------------------------------------------------------------- /easyllm/data/extractor/__init__.py: -------------------------------------------------------------------------------- 1 | from easyllm.data.extractor.html_extractor import HtmlExtractor 2 | -------------------------------------------------------------------------------- /easyllm/data/extractor/html_extractor.py: -------------------------------------------------------------------------------- 1 | # 2 | from inscriptis import get_text 3 | from inscriptis.css_profiles import CSS_PROFILES 4 | from inscriptis.model.config import ParserConfig 5 | from pydantic import BaseModel 6 | from readability import Document 7 | 8 | INSCRIPTIS_CONFIG = ParserConfig(css=CSS_PROFILES["strict"]) 9 | 10 | 11 | class HtmlExtractor(BaseModel): 12 | """ 13 | Desc: Extracts text from the HTML document using mozzilas readability and inscriptis. 14 | """ 15 | 16 | name: str = "html_extractor" 17 | min_doc_length: int = 25 18 | 19 | def __call__(self, document: str) -> str: 20 | parsed_doc = Document(document, min_text_length=self.min_doc_length) 21 | clean_html = parsed_doc.summary(html_partial=True) 22 | content = get_text(clean_html, INSCRIPTIS_CONFIG).strip() 23 | return content 24 | -------------------------------------------------------------------------------- /easyllm/data/filters/__init__.py: -------------------------------------------------------------------------------- 1 | from easyllm.data.filters.bulletpoint_ratio import BulletpointRatioFilter 2 | from easyllm.data.filters.common_word import CommonWordFilter 3 | from easyllm.data.filters.digit_to_character import DigitToCharacter 4 | from easyllm.data.filters.kenlm_ppl import PerplexityFilter 5 | from easyllm.data.filters.length import LengthFilter 6 | from easyllm.data.filters.longword import LongWordFilter 7 | from easyllm.data.filters.n_gram import TopNGramsFilter 8 | from easyllm.data.filters.non_alpha_numeric import NonAlphaNumericFilter 9 | from easyllm.data.filters.parantheses_ration import ParenthesesRationFilter 10 | from easyllm.data.filters.punctuation import EllipsisFilter, PunctuationFilter 11 | from easyllm.data.filters.repeating import RepeatedLinesFilter, RepeatedParagraphFilter 12 | from easyllm.data.filters.url_ratio import UrlRatioFilter 13 | from easyllm.data.filters.whitespace_ration import WhitespaceRatioFilter 14 | from easyllm.data.filters.words_to_symbol import SymbolToWordFilter 15 | -------------------------------------------------------------------------------- /easyllm/data/filters/bulletpoint_ratio.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class BulletpointRatioFilter(BaseModel): 7 | """ 8 | Ref: Gopher (Rae et al., 2021) 9 | Desc: If more than 90% of the document are bulletpoints then remove 10 | """ 11 | 12 | name: str = "bulletpoint_ratio" 13 | potential_bullet_points: List[str] = [ 14 | "•", 15 | "‣", 16 | "⁃", 17 | "⁌", 18 | "⁍", 19 | "∙", 20 | "○", 21 | "●", 22 | "◘", 23 | "◦", 24 | "⦾", 25 | "⦿", 26 | "-", 27 | ] 28 | remove_percentage: float = 0.9 29 | 30 | def __call__(self, text): 31 | # split text into lines 32 | lines = text.split("\n") 33 | num_bullet_points = 0 34 | for line in lines: 35 | # check if the line is a bullet point 36 | if line.startswith(tuple(self.potential_bullet_points)): 37 | num_bullet_points += 1 38 | # check if the ratio of bullet points to lines is greater than the remove percentage 39 | if num_bullet_points / len(lines) > self.remove_percentage: 40 | return True 41 | # otherwise keep 42 | return False 43 | -------------------------------------------------------------------------------- /easyllm/data/filters/common_word.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import BaseModel 4 | 5 | COMMON_WORDS_EN = ["the", "be", "to", "of", "and", "that", "have", "with", "this"] 6 | COMMON_WORDS_DE = ["der", "die", "das", "er" "sein", "zu", "ist", "war", "von", "und", "haben", "mit"] 7 | 8 | 9 | class CommonWordFilter(BaseModel): 10 | """ 11 | Ref: Gopher (Rae et al., 2021) 12 | Desc: Makes sure that the document contains at least 2 common words if not remove 13 | """ 14 | 15 | name: str = "common_word" 16 | common_words: List[str] = COMMON_WORDS_EN 17 | n: int = 2 18 | 19 | def __call__(self, text): 20 | words = text.split() 21 | common_word_counter = 0 22 | # count the number of common words 23 | for word in words: 24 | if word.lower() in self.common_words: 25 | common_word_counter += 1 26 | if common_word_counter >= self.n: 27 | return False 28 | # otherwise remove 29 | return True 30 | -------------------------------------------------------------------------------- /easyllm/data/filters/cookie_banner.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from pydantic import BaseModel 4 | 5 | policy_substrings = [ 6 | "terms of use", 7 | "privacy policy", 8 | "cookie policy", 9 | "uses cookies", 10 | "privacy overview", 11 | "use of cookies", 12 | "use cookies", 13 | "privacy & cookies policy", 14 | "privacy and cookies policy", 15 | "This website uses cookies to improve your experience while you " 16 | "navigate through the website. Out of these cookies, the cookies " 17 | "that are categorized as necessary are stored on your browser as they " 18 | "are essential for the working of basic functionalities of the website. " 19 | "We also use third-party cookies that help us analyze and understand how " 20 | "you use this website. These cookies will be stored in your browser only " 21 | "with your consent. You also have the option to opt-out of these " 22 | "cookies. But opting out of some of these cookies may have an effect " 23 | "on your browsing experience.".lower(), 24 | "Necessary cookies are absolutely essential for the website to " 25 | "function properly. This category only includes cookies that " 26 | "ensures basic functionalities and security features of the website. " 27 | "These cookies do not store any personal information.".lower(), 28 | "Any cookies that may not be particularly necessary for the website " 29 | "to function and is used specifically to collect user personal data " 30 | "via analytics, ads, other embedded contents are termed as non-necessary " 31 | "cookies. It is mandatory to procure user consent prior to running these " 32 | "cookies on your website.".lower(), 33 | "This site uses cookies, including for analytics, personalization, and " 34 | "advertising purposes. For more information or to change your " 35 | "cookie settings, click here.".lower(), 36 | "If you continue to browse this site without changing your cookie " 37 | "settings, you agree to this use. AcceptRead More".lower(), 38 | ] 39 | 40 | 41 | class CookieBannerFilter(BaseModel): 42 | """ 43 | Ref: C4 Raffel et al. 44 | Desc: Removes documents if more than 40% of the documents include terms for cookies, tos, privacy policy, etc. Requires external list. 45 | """ 46 | 47 | name: str = "cookie_banner" 48 | regex: re.Pattern = re.compile(r"(terms of use|privacy policy|copyright|all rights reserved)", re.IGNORECASE) 49 | remove_percentage: float = 0.4 50 | 51 | def __call__(self, text): 52 | # check if the regex matches 53 | raise NotImplementedError("CookieBannerFilter not implemented yet") 54 | -------------------------------------------------------------------------------- /easyllm/data/filters/digit_to_character.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class DigitToCharacter(BaseModel): 7 | """ 8 | Desc: If more than 20% of the document are digits then remove 9 | """ 10 | 11 | name: str = "digit_to_character" 12 | remove_percentage: float = 0.2 13 | 14 | def __call__(self, text): 15 | digits = re.findall(r"\d", text) 16 | num_digits = len(digits) 17 | total_chars = len(text) 18 | # check if there are any characters in the text 19 | if num_digits / total_chars > self.remove_percentage: 20 | return True 21 | # otherwise keep 22 | return False 23 | -------------------------------------------------------------------------------- /easyllm/data/filters/kenlm_ppl.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | import re 3 | import unicodedata 4 | from typing import Dict 5 | 6 | from huggingface_hub import hf_hub_download 7 | from pydantic import BaseModel, ConfigDict 8 | 9 | _kenlm = importlib.util.find_spec("kenlm") is not None 10 | _sentencepiece = importlib.util.find_spec("sentencepiece") is not None 11 | 12 | if _kenlm or not _sentencepiece: 13 | import kenlm 14 | import sentencepiece 15 | 16 | 17 | class SentencePiece: 18 | def __init__( 19 | self, 20 | model: str, 21 | ): 22 | super().__init__() 23 | self.sp = sentencepiece.SentencePieceProcessor() 24 | self.sp.load(str(model)) 25 | 26 | def do(self, text: dict) -> dict: 27 | tokenized = self.sp.encode_as_pieces(text) 28 | return " ".join(tokenized) 29 | 30 | 31 | class KenlmModel: 32 | digit_re: re.Pattern[str] = re.compile(r"\d") 33 | unicode_punct: Dict[str, str] = { 34 | ",": ",", 35 | "。": ".", 36 | "、": ",", 37 | "„": '"', 38 | "”": '"', 39 | "“": '"', 40 | "«": '"', 41 | "»": '"', 42 | "1": '"', 43 | "」": '"', 44 | "「": '"', 45 | "《": '"', 46 | "》": '"', 47 | "´": "'", 48 | "∶": ":", 49 | ":": ":", 50 | "?": "?", 51 | "!": "!", 52 | "(": "(", 53 | ")": ")", 54 | ";": ";", 55 | "–": "-", 56 | "—": " - ", 57 | ".": ". ", 58 | "~": "~", 59 | "’": "'", 60 | "…": "...", 61 | "━": "-", 62 | "〈": "<", 63 | "〉": ">", 64 | "【": "[", 65 | "】": "]", 66 | "%": "%", 67 | "►": "-", 68 | } 69 | unicode_punct_re: re.Pattern = re.compile(f"[{''.join(unicode_punct.keys())}]") 70 | non_printing_chars_re: re.Pattern = re.compile(f"[{''.join(map(chr, list(range(0,32)) + list(range(127,160))))}]") 71 | model: kenlm.Model = None 72 | tokenizer: SentencePiece = None 73 | accent: bool = False 74 | case: bool = False 75 | numbers: bool = True 76 | punct: int = 1 77 | 78 | def __init__( 79 | self, 80 | model_path: str, 81 | tokenizer_path: str, 82 | lower_case: bool = False, 83 | remove_accents: bool = False, 84 | normalize_numbers: bool = True, 85 | punctuation: int = 1, 86 | ): 87 | self.model = kenlm.Model(model_path) 88 | self.tokenizer = SentencePiece(tokenizer_path) 89 | self.accent = remove_accents 90 | self.case = lower_case 91 | self.numbers = normalize_numbers 92 | self.punct = punctuation 93 | 94 | @classmethod 95 | def from_pretrained( 96 | cls, 97 | language_or_path: str, 98 | ): 99 | try: 100 | model = hf_hub_download("philschmid/kenlm", filename=f"wikipedia/{language_or_path}.arpa.bin") 101 | tokenizer = hf_hub_download("philschmid/kenlm", filename=f"wikipedia/{language_or_path}.sp.model") 102 | except Exception: 103 | raise ValueError( 104 | f"KenLM model for {language_or_path} not found at https://huggingface.co/philschmid/kenlm. Please train your own model and upload it to the hub." 105 | ) from None 106 | 107 | return cls( 108 | model, 109 | tokenizer, 110 | False, 111 | False, 112 | True, 113 | 1, 114 | ) 115 | 116 | def pp(self, log_score, length): 117 | return 10.0 ** (-log_score / length) 118 | 119 | def get_perplexity(self, doc: str, normalize_cc_net: bool = True): 120 | if normalize_cc_net: 121 | doc = self.normalize( 122 | doc, 123 | accent=self.accent, 124 | case=self.case, 125 | numbers=self.numbers, 126 | punct=self.punct, 127 | ) 128 | # Tokenize (after normalizing): See https://github.com/facebookresearch/cc_net/blob/bda555bd1cf1ee2e0b925363e62a61cd46c8b60d/cc_net/mine.py#L352 for full pipeline 129 | doc = self.tokenizer.do(doc) 130 | doc_log_score, doc_length = 0, 0 131 | for line in doc.split("\n"): 132 | log_score = self.model.score(line) 133 | length = len(line.split()) + 1 134 | doc_log_score += log_score 135 | doc_length += length 136 | return round(self.pp(doc_log_score, doc_length), 1) 137 | 138 | def normalize( 139 | self, 140 | line: str, 141 | accent: bool = True, 142 | case: bool = True, 143 | numbers: bool = True, 144 | punct: int = 1, 145 | ) -> str: 146 | line = line.strip() 147 | if not line: 148 | return line 149 | if case: 150 | line = line.lower() 151 | if accent: 152 | line = self.strip_accents(line) 153 | if numbers: 154 | line = self.digit_re.sub("0", line) 155 | if punct == 1: 156 | line = self.replace_unicode_punct(line) 157 | elif punct == 2: 158 | line = self.remove_unicode_punct(line) 159 | line = self.remove_non_printing_char(line) 160 | return line 161 | 162 | def strip_accents(self, line: str) -> str: 163 | """Strips accents from a piece of text.""" 164 | nfd = unicodedata.normalize("NFD", line) 165 | output = [c for c in nfd if unicodedata.category(c) != "Mn"] 166 | if len(output) == line: 167 | return line 168 | return "".join(output) 169 | 170 | def replace_unicode_punct(self, text: str) -> str: 171 | return "".join(self.unicode_punct.get(c, c) for c in text) 172 | 173 | def remove_unicode_punct(self, text: str) -> str: 174 | """More aggressive version of replace_unicode_punct but also faster.""" 175 | return self.unicode_punct_re.sub("", text) 176 | 177 | def remove_non_printing_char(self, text: str) -> str: 178 | return self.non_printing_chars_re.sub("", text) 179 | 180 | 181 | class PerplexityFilter(BaseModel): 182 | model: KenlmModel = None 183 | min_threshold: int = 0 184 | max_threshold: int = 1000 185 | model_config = ConfigDict(arbitrary_types_allowed=True) 186 | 187 | def __init__(self, language: str, min_threshold: int = 0, max_threshold: int = 1000): 188 | super().__init__() 189 | self.min_threshold = min_threshold 190 | self.max_threshold = max_threshold 191 | self.model = KenlmModel.from_pretrained(language) 192 | 193 | def __call__(self, doc: str) -> bool: 194 | # returns True if the perplexity of the document outside of the threshold, 195 | # meaning smaller than min_threshold or larger than max_threshold 196 | perplexity = self.model.get_perplexity(doc) 197 | if perplexity < self.min_threshold or perplexity > self.max_threshold: 198 | return True 199 | # otherwise keep 200 | return False 201 | -------------------------------------------------------------------------------- /easyllm/data/filters/length.py: -------------------------------------------------------------------------------- 1 | 2 | from pydantic import BaseModel 3 | 4 | 5 | class LengthFilter(BaseModel): 6 | """ 7 | Desc: Removes documents below or above a certain length of words 8 | """ 9 | 10 | name: str = "length" 11 | min_length: int = 10 12 | max_length: int = 1_000_000 13 | 14 | def __call__(self, text): 15 | num_words = len(text.split()) 16 | 17 | if num_words < self.min_length or num_words > self.max_length: 18 | return True 19 | # otherwise keep 20 | return False 21 | -------------------------------------------------------------------------------- /easyllm/data/filters/longword.py: -------------------------------------------------------------------------------- 1 | 2 | from pydantic import BaseModel 3 | 4 | 5 | class LongWordFilter(BaseModel): 6 | """ 7 | Ref: C4 Raffel et al. 8 | Desc: If document includes words with > 1000 character are removed, e.g. js or minified files. 9 | """ 10 | 11 | name: str = "long_word" 12 | max_length: int = 1000 13 | 14 | def __call__(self, text): 15 | words = text.split() 16 | max_len = max(len(word) for word in words) 17 | if max_len > self.max_length: 18 | return True 19 | # otherwise keep 20 | return False 21 | -------------------------------------------------------------------------------- /easyllm/data/filters/n_gram.py: -------------------------------------------------------------------------------- 1 | from collections import Counter 2 | from itertools import chain 3 | 4 | from pydantic import BaseModel 5 | 6 | 7 | def get_ngrams(input_list, n): 8 | return list(zip(*[input_list[i:] for i in range(n)])) 9 | 10 | 11 | class TopNGramsFilter(BaseModel): 12 | """ 13 | Ref: Gopher (Rae et al., 2021) 14 | Desc: If the document shrinks by > 20% after removing top n-grams then remove 15 | """ 16 | 17 | name: str = "top_n_grams" 18 | remove_percentage: float = 0.2 19 | n: int = 2 20 | 21 | def __call__(self, text): 22 | words = text.split() 23 | if len(words) <= self.n: 24 | return True 25 | ngrams = get_ngrams(words, self.n) 26 | n_grams = Counter(chain(ngrams)) 27 | most_common = n_grams.most_common(1)[0][0] 28 | 29 | if n_grams[most_common] / len(n_grams) > self.remove_percentage: 30 | return True 31 | # otherwise keep 32 | return False 33 | -------------------------------------------------------------------------------- /easyllm/data/filters/non_alpha_numeric.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class NonAlphaNumericFilter(BaseModel): 7 | """ 8 | Ref: Gopher (Rae et al., 2021) 9 | Desc: If more than 20% of the document is non-alphanumeric then remove 10 | """ 11 | 12 | name: str = "non_alpha_numeric" 13 | regex: re.Pattern = re.compile(r"[^a-zA-Z0-9\s]") 14 | remove_percentage: float = 0.2 15 | 16 | def __call__(self, text): 17 | num_characters = len(text) 18 | # check if there are any characters in the text 19 | if num_characters == 0: 20 | return True 21 | # calculate the percentage of non-alphanumeric characters 22 | percentage = 1 - ((num_characters - len(self.regex.findall(text))) / num_characters) 23 | # if the percentage is greater than the remove_percentage then remove 24 | if percentage > self.remove_percentage: 25 | return True 26 | # otherwise keep 27 | return False 28 | -------------------------------------------------------------------------------- /easyllm/data/filters/parantheses_ration.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class ParenthesesRationFilter(BaseModel): 7 | """ 8 | Desc: If more than 10% of the document are Parentheses then remove 9 | """ 10 | 11 | name: str = "parentheses_ratio" 12 | regex: re.Pattern = re.compile(r"\[|\]|\(|\)|{|}|⟨|⟩") 13 | remove_percentage: float = 0.1 14 | 15 | def __call__(self, text): 16 | # parentheses characters 17 | parentheses_count = len(self.regex.findall(text)) 18 | sentence_length = len(text) 19 | # check if the ratio of parentheses to text is greater than the remove percentage 20 | if parentheses_count / sentence_length > self.remove_percentage: 21 | return True 22 | # otherwise keep 23 | return False 24 | -------------------------------------------------------------------------------- /easyllm/data/filters/punctuation.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class PunctuationFilter(BaseModel): 7 | """ 8 | Ref: C4 Raffel et al. 9 | Desc: If less than 15% of the sentences end with a punctuation mark then remove 10 | """ 11 | 12 | name: str = "punctuation" 13 | punctuations: List[str] = [".", "!", "?"] 14 | remove_percentage: float = 0.15 15 | 16 | def __call__(self, text): 17 | sentences = text.split("\n") 18 | # count the number of sentences ending with a punctuation mark 19 | punc_counter = 0 20 | for sentence in sentences: 21 | for punc in self.punctuations: 22 | if sentence.endswith(punc): 23 | punc_counter += 1 24 | break 25 | # check if the ratio of sentences not ending with a punctuation mark is greater than the remove percentage 26 | if punc_counter / len(sentences) < self.remove_percentage: 27 | return True 28 | # otherwise keep 29 | return False 30 | 31 | 32 | class EllipsisFilter(BaseModel): 33 | """ 34 | Ref: C4 Raffel et al. 35 | Desc: If more than 30% of the sentences endwith an elipsis then remove 36 | """ 37 | 38 | name: str = "ellipsis" 39 | ellipsis: List[str] = ["...", "[...]", "…", "(...)", "[…]", "-»", "read more..", "read more"] 40 | remove_percentage: float = 0.3 41 | 42 | def __call__(self, text): 43 | sentences = text.split("\n") 44 | # count the number of sentences ending with an ellipsis 45 | ellipsis_counter = 0 46 | for sentence in sentences: 47 | for ellipsis in self.ellipsis: 48 | if sentence.endswith(ellipsis): 49 | ellipsis_counter += 1 50 | break 51 | # check if the ratio of sentences ending with an ellipsis is greater than the remove percentage 52 | if ellipsis_counter / len(sentences) > self.remove_percentage: 53 | return True 54 | # otherwise keep 55 | return False 56 | -------------------------------------------------------------------------------- /easyllm/data/filters/repeating.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class RepeatedLinesFilter(BaseModel): 5 | """ 6 | Ref: Gopher (Rae et al., 2021) 7 | Desc: If the document shrinks by > 30% after removing repeated lines then remove 8 | """ 9 | 10 | name: str = "repeated_lines" 11 | remove_percentage: float = 0.3 12 | 13 | def __call__(self, text): 14 | # split the text into lines 15 | lines = text.split("\n") 16 | # remove empty lines 17 | lines = [line for line in lines if line.strip()] 18 | if len(lines) == 0: 19 | return True 20 | # remove repeated lines 21 | unique_lines = list(set(lines)) 22 | # calculate the percentage of lines removed 23 | if len(unique_lines) / len(lines) < self.remove_percentage: 24 | return True 25 | # otherwise keep 26 | return False 27 | 28 | 29 | class RepeatedParagraphFilter(BaseModel): 30 | """ 31 | Ref: Gopher (Rae et al., 2021) 32 | Desc: If the document shrinks by > 30% after removing repeated paragraphs then remove 33 | """ 34 | 35 | name: str = "repeated_paragraph" 36 | remove_percentage: float = 0.3 37 | 38 | def __call__(self, text): 39 | # split the text into lines 40 | paragraphes = text.split("\n\n") 41 | # remove empty paragraph 42 | paragraphes = [p for p in paragraphes if p.strip()] 43 | if len(paragraphes) == 0: 44 | return True 45 | # remove repeated paragraphes 46 | unique_paragraphes = list(set(paragraphes)) 47 | # calculate the percentage of lines removed 48 | if len(unique_paragraphes) / len(paragraphes) < self.remove_percentage: 49 | return True 50 | # otherwise keep 51 | return False 52 | -------------------------------------------------------------------------------- /easyllm/data/filters/url_ratio.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class UrlRatioFilter(BaseModel): 7 | """ 8 | Desc: If more than 20% of the document are urls then remove 9 | """ 10 | 11 | name: str = "url_ratio" 12 | regex: re.Pattern[ 13 | str 14 | ] = r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)" 15 | remove_percentage: float = 0.2 16 | 17 | def __call__(self, text): 18 | # find all urls 19 | urls = re.findall(self.regex, text) 20 | # check if the ratio of urls to words is greater than the remove percentage 21 | if len(urls) / len(text.split()) > self.remove_percentage: 22 | return True 23 | # otherwise keep 24 | return False 25 | -------------------------------------------------------------------------------- /easyllm/data/filters/whitespace_ration.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class WhitespaceRatioFilter(BaseModel): 7 | """ 8 | Desc: If more than 25% of the document are bulletpoints then remove 9 | """ 10 | 11 | name: str = "whitespace_ratio" 12 | regex: re.Pattern = re.compile(r"\s") 13 | remove_percentage: float = 0.25 14 | 15 | def __call__(self, text): 16 | # whitespace characters 17 | whitespace_count = len(self.regex.findall(text)) 18 | text_length = len(text) 19 | # check if the ratio of whitespace to text is greater than the remove percentage 20 | if whitespace_count / text_length > self.remove_percentage: 21 | return True 22 | # otherwise keep 23 | return False 24 | -------------------------------------------------------------------------------- /easyllm/data/filters/words_to_symbol.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class SymbolToWordFilter(BaseModel): 7 | """ 8 | Ref: Gopher (Rae et al., 2021) 9 | Desc: If more than 10% of the document are symbols (hashes [#] or ellipsis (...)) then remove 10 | """ 11 | 12 | name: str = "symbol_to_word" 13 | regex: re.Pattern = r"(\#+|(\.{3,}))(?!\w)" 14 | remove_percentage: float = 0.1 15 | 16 | def __call__(self, text: str): 17 | num_hashes = len(re.findall(r"\#+", text)) 18 | num_ellipses = len(re.findall(r"\.{3,}", text)) 19 | num_words = len(re.findall(r"\w+", text)) 20 | 21 | # check if there are any words in the text 22 | if num_words == 0: 23 | return True 24 | 25 | hash_ratio = num_hashes / num_words 26 | ellipses_ratio = num_ellipses / num_words 27 | 28 | # if the percentage is greater than the remove_percentage then remove 29 | if hash_ratio > self.remove_percentage or ellipses_ratio > self.remove_percentage: 30 | return True 31 | 32 | # otherwise keep 33 | return False 34 | -------------------------------------------------------------------------------- /easyllm/evol_instruct/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philschmid/easyllm/1f37a9307d68545f41639f392a301baadda7188a/easyllm/evol_instruct/__init__.py -------------------------------------------------------------------------------- /easyllm/prompt_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from easyllm.prompt_utils.anthropic import anthropic_stop_sequences, build_anthropic_prompt 2 | 3 | from .chatml_hf import ( 4 | build_chatml_falcon_prompt, 5 | build_chatml_starchat_prompt, 6 | chatml_falcon_stop_sequences, 7 | chatml_starchat_stop_sequences, 8 | ) 9 | from .falcon import build_falcon_prompt 10 | from .llama2 import build_llama2_prompt, llama2_stop_sequences 11 | from .open_assistant import build_open_assistant_prompt, open_assistant_stop_sequences 12 | from .stablebeluga import build_stablebeluga_prompt, stablebeluga_stop_sequences 13 | from .vicuna import build_vicuna_prompt, vicuna_stop_sequences 14 | from .wizardlm import build_wizardlm_prompt, wizardlm_stop_sequences 15 | 16 | PROMPT_MAPPING = { 17 | "chatml_falcon": build_chatml_falcon_prompt, 18 | "chatml_starchat": build_chatml_starchat_prompt, 19 | "llama2": build_llama2_prompt, 20 | "open_assistant": build_open_assistant_prompt, 21 | "stablebeluga": build_stablebeluga_prompt, 22 | "vicuna": build_vicuna_prompt, 23 | "wizardlm": build_wizardlm_prompt, 24 | "falcon": build_falcon_prompt, 25 | "anthropic": build_anthropic_prompt, 26 | } 27 | -------------------------------------------------------------------------------- /easyllm/prompt_utils/anthropic.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | from easyllm.schema.base import ChatMessage 4 | 5 | # Define stop sequences for anthropic 6 | anthropic_stop_sequences = ["\n\nUser:", "User:"] 7 | 8 | 9 | def build_anthropic_prompt(messages: Union[List[Dict[str, str]], str, List[ChatMessage]]) -> str: 10 | """ 11 | Builds a anthropic prompt for a chat conversation. refrence https://huggingface.co/blog/anthropic-180b#prompt-format 12 | 13 | Args: 14 | messages (Union[List[ChatMessage], str]): The messages to use for the completion. 15 | Returns: 16 | str: The anthropic prompt string. 17 | """ 18 | ANTHROPIC_USER_TOKEN = "\n\nHuman:" 19 | ANTHROPIC_ASSISTANT_TOKEN = "\n\nAssistant:" 20 | 21 | conversation = [] 22 | 23 | if isinstance(messages, str): 24 | messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")] 25 | else: 26 | if isinstance(messages[0], dict): 27 | messages = [ChatMessage(**message) for message in messages] 28 | 29 | for index, message in enumerate(messages): 30 | if message.role == "user": 31 | conversation.append(f"{ANTHROPIC_USER_TOKEN} {message.content.strip()}") 32 | elif message.role == "assistant": 33 | conversation.append(f"{ANTHROPIC_ASSISTANT_TOKEN} {message.content.strip()}") 34 | elif message.role == "function": 35 | raise ValueError("anthropic does not support function calls.") 36 | elif message.role == "system" and index == 0: 37 | conversation.append(message.content) 38 | else: 39 | raise ValueError(f"Invalid message role: {message.role}") 40 | 41 | return "".join(conversation) + ANTHROPIC_ASSISTANT_TOKEN + " " 42 | -------------------------------------------------------------------------------- /easyllm/prompt_utils/base.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | 3 | from easyllm.prompt_utils import PROMPT_MAPPING 4 | from easyllm.schema.base import ChatMessage 5 | from easyllm.utils import setup_logger 6 | 7 | logger = setup_logger() 8 | 9 | 10 | def buildBasePrompt(messages: List[ChatMessage]) -> str: 11 | conversation = [] 12 | 13 | for index, message in enumerate(messages): 14 | if message.role == "user": 15 | conversation.append(f"USER: {message.content.strip()}") 16 | elif message.role == "assistant": 17 | conversation.append(f"ASSISTANT: {message.content}") 18 | elif message.role == "function": 19 | raise ValueError("Llama 2 does not support function calls.") 20 | elif message.role == "system" and index == 0: 21 | conversation.append(message.content) 22 | else: 23 | raise ValueError(f"Invalid message role: {message.role}") 24 | 25 | return "".join(conversation) 26 | 27 | 28 | def build_prompt(messages: List[ChatMessage], builder: Union[str, callable]) -> str: 29 | """ 30 | Tries to find the prompt builder in the PROMPT_MAPPING and returns a formatted prompt. 31 | """ 32 | if isinstance(builder, str): 33 | prompt_builder = PROMPT_MAPPING.get(builder, None) 34 | if prompt_builder is None: 35 | raise ValueError( 36 | f"Prompt builder {builder} not found. Are you sure you spelled it correctly? \ 37 | Available prompt builders are: {PROMPT_MAPPING.keys()}. \ 38 | You can open an issue or PR to add more prompt builders at https://github.com/philschmid/easyllm" 39 | ) 40 | prompt = prompt_builder(messages) 41 | else: 42 | prompt = builder(messages) 43 | 44 | logger.debug(f"Prompt sent to model will be:\n{prompt}") 45 | return prompt 46 | -------------------------------------------------------------------------------- /easyllm/prompt_utils/chatml_hf.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | from easyllm.schema.base import ChatMessage 4 | 5 | chatml_falcon_stop_sequences = ["<|endoftext|>"] 6 | 7 | 8 | def build_chatml_falcon_prompt(messages: Union[List[Dict[str, str]], str]) -> str: 9 | EOS_TOKEN = "<|endoftext|>" 10 | return build_chatml_hf_prompt(messages, EOS_TOKEN) 11 | 12 | 13 | chatml_starchat_stop_sequences = ["<|end|>"] 14 | 15 | 16 | def build_chatml_starchat_prompt(messages: Union[List[Dict[str, str]], str]) -> str: 17 | EOS_TOKEN = "<|end|>" 18 | return build_chatml_hf_prompt(messages, EOS_TOKEN) 19 | 20 | 21 | def build_chatml_hf_prompt(messages: Union[List[Dict[str, str]], str], EOS_TOKEN="<|end|>") -> str: 22 | """ 23 | Uses HuggingFaceH4 ChatML template used to in Models like, StarChat or Falcon. Uses <|user|>, <|end|>, <|system|>, and <|assistant> tokens. If a Message with an unsupported role is passed, an error will be thrown. 24 | <|system|>\nYou are a chat bot.<|end|>\n<|user|>\nHello!<|end|>\n<|assistant|>\nHi there!<|end|>\n<|assistant|> 25 | Args: 26 | messages (:obj:`List[ChatMessage]`): The messages to use for the completion. 27 | """ 28 | 29 | SYSTEM_TOKEN = "<|system|>" 30 | USER_TOKEN = "<|user|>" 31 | ASSISTANT_TOKEN = "<|assistant|>" 32 | conversation = [] 33 | 34 | if isinstance(messages, str): 35 | messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")] 36 | else: 37 | if isinstance(messages[0], dict): 38 | messages = [ChatMessage(**message) for message in messages] 39 | 40 | for index, message in enumerate(messages): 41 | if message.role == "user": 42 | conversation.append(f"{USER_TOKEN}\n{message.content.strip()}{EOS_TOKEN}\n") 43 | elif message.role == "assistant": 44 | conversation.append(f"{ASSISTANT_TOKEN}\n{message.content.strip()}{EOS_TOKEN}\n") 45 | elif message.role == "function": 46 | raise ValueError("HF ChatML does not support function calls.") 47 | elif message.role == "system" and index == 0: 48 | conversation.append(f"{SYSTEM_TOKEN}\n{message.content.strip()}{EOS_TOKEN}\n") 49 | else: 50 | raise ValueError(f"Invalid message role: {message.role}") 51 | 52 | return "".join(conversation) + ASSISTANT_TOKEN 53 | -------------------------------------------------------------------------------- /easyllm/prompt_utils/falcon.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | from easyllm.schema.base import ChatMessage 4 | 5 | # Define stop sequences for falcon 6 | falcon_stop_sequences = ["\nUser:", "<|endoftext|>", " User:", "###"] 7 | 8 | 9 | def build_falcon_prompt(messages: Union[List[Dict[str, str]], str, List[ChatMessage]]) -> str: 10 | """ 11 | Builds a falcon prompt for a chat conversation. refrence https://huggingface.co/blog/falcon-180b#prompt-format 12 | 13 | Args: 14 | messages (Union[List[ChatMessage], str]): The messages to use for the completion. 15 | Returns: 16 | str: The falcon prompt string. 17 | """ 18 | FALCON_SYSTEM_TOKEN = "System: " 19 | FALCON_USER_TOKEN = "User: " 20 | FALCON_ASSISTANT_TOKEN = "Falcon: " 21 | 22 | conversation = [] 23 | 24 | if isinstance(messages, str): 25 | messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")] 26 | else: 27 | if isinstance(messages[0], dict): 28 | messages = [ChatMessage(**message) for message in messages] 29 | 30 | for index, message in enumerate(messages): 31 | if message.role == "user": 32 | conversation.append(f"{FALCON_USER_TOKEN}{message.content.strip()}\n") 33 | elif message.role == "assistant": 34 | conversation.append(f"{FALCON_ASSISTANT_TOKEN}{message.content.strip()}\n") 35 | elif message.role == "function": 36 | raise ValueError("falcon does not support function calls.") 37 | elif message.role == "system" and index == 0: 38 | conversation.append(f"{FALCON_SYSTEM_TOKEN}{message.content}\n") 39 | else: 40 | raise ValueError(f"Invalid message role: {message.role}") 41 | 42 | return "".join(conversation) + FALCON_ASSISTANT_TOKEN 43 | -------------------------------------------------------------------------------- /easyllm/prompt_utils/llama2.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | from easyllm.schema.base import ChatMessage 4 | 5 | llama2_stop_sequences = [""] 6 | 7 | 8 | def build_llama2_prompt(messages: Union[List[Dict[str, str]], str]) -> str: 9 | """ 10 | Uses LLama 2 chat tokens (`[INST]`) to create a prompt, learn more in the [Hugging Face Blog on how to prompt Llama 2](https://huggingface.co/blog/llama2#how-to-prompt-llama-2). If a `Message` with an unsupported `role` is passed, an error will be thrown. 11 | Args: 12 | messages (:obj:`List[ChatMessage]`): The messages to use for the completion. 13 | """ 14 | 15 | startPrompt = "[INST] " 16 | endPrompt = " [/INST]" 17 | conversation = [] 18 | 19 | if isinstance(messages, str): 20 | messages = [ChatMessage(content=messages, role="user")] 21 | else: 22 | if isinstance(messages[0], dict): 23 | messages = [ChatMessage(**message) for message in messages] 24 | 25 | for index, message in enumerate(messages): 26 | if message.role == "user": 27 | conversation.append(message.content.strip()) 28 | elif message.role == "assistant": 29 | conversation.append(f" [/INST] {message.content} [INST] ") 30 | elif message.role == "function": 31 | raise ValueError("Llama 2 does not support function calls.") 32 | elif message.role == "system" and index == 0: 33 | conversation.append(f"<>\n{message.content}\n<>\n\n") 34 | else: 35 | raise ValueError(f"Invalid message role: {message.role}") 36 | 37 | return startPrompt + "".join(conversation) + endPrompt 38 | -------------------------------------------------------------------------------- /easyllm/prompt_utils/open_assistant.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | from easyllm.schema.base import ChatMessage 4 | 5 | open_assistant_stop_sequences = [""] 6 | 7 | 8 | def build_open_assistant_prompt(messages: Union[List[Dict[str, str]], str], EOS_TOKEN="<|end|>") -> str: 9 | """ 10 | Uses Open Assistant ChatML template used to in Models. Uses <|prompter|>, , <|system|>, and <|assistant> tokens. If a Message with an unsupported role is passed, an error will be thrown. 11 | <|system|>system message<|prompter|>user prompt<|assistant|> 12 | Args: 13 | messages (:obj:`List[ChatMessage]`): The messages to use for the completion. 14 | """ 15 | 16 | SYSTEM_TOKEN = "<|system|>" 17 | USER_TOKEN = "<|prompter|>" 18 | ASSISTANT_TOKEN = "<|assistant|>" 19 | EOS_TOKEN = "" 20 | conversation = [] 21 | 22 | if isinstance(messages, str): 23 | messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")] 24 | else: 25 | if isinstance(messages[0], dict): 26 | messages = [ChatMessage(**message) for message in messages] 27 | 28 | for index, message in enumerate(messages): 29 | if message.role == "user": 30 | conversation.append(f"{USER_TOKEN}{message.content.strip()}{EOS_TOKEN}") 31 | elif message.role == "assistant": 32 | conversation.append(f"{ASSISTANT_TOKEN}{message.content.strip()}{EOS_TOKEN}") 33 | elif message.role == "function": 34 | raise ValueError("Open Assistant does not support function calls.") 35 | elif message.role == "system" and index == 0: 36 | conversation.append(f"{SYSTEM_TOKEN}{message.content.strip()}{EOS_TOKEN}") 37 | else: 38 | raise ValueError(f"Invalid message role: {message.role}") 39 | 40 | return "".join(conversation) + ASSISTANT_TOKEN 41 | -------------------------------------------------------------------------------- /easyllm/prompt_utils/stablebeluga.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | from easyllm.schema.base import ChatMessage 4 | 5 | # Define stop sequences for stablebeluga 6 | stablebeluga_stop_sequences = [""] 7 | 8 | 9 | def build_stablebeluga_prompt(messages: Union[List[Dict[str, str]], str]) -> str: 10 | """ 11 | Builds a stablebeluga prompt for a chat conversation. refrence https://huggingface.co/stabilityai/StableBeluga2 or 12 | 13 | Args: 14 | messages (Union[List[ChatMessage], str]): The messages to use for the completion. 15 | Returns: 16 | str: The stablebeluga prompt string. 17 | """ 18 | SYSTEM_TOKEN = "### System:" 19 | USER_TOKEN = "### User:" 20 | ASSISTANT_TOKEN = "### Assistant:" 21 | 22 | conversation = [] 23 | 24 | if isinstance(messages, str): 25 | messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")] 26 | else: 27 | if isinstance(messages[0], dict): 28 | messages = [ChatMessage(**message) for message in messages] 29 | 30 | for index, message in enumerate(messages): 31 | if message.role == "user": 32 | conversation.append(f"{USER_TOKEN}\n{message.content.strip()}\n\n") 33 | elif message.role == "assistant": 34 | conversation.append(f"{ASSISTANT_TOKEN}\n{message.content.strip()}\n\n") 35 | elif message.role == "function": 36 | raise ValueError("stablebeluga does not support function calls.") 37 | elif message.role == "system" and index == 0: 38 | conversation.append(f"{SYSTEM_TOKEN}\n{message.content.strip()}\n\n") 39 | else: 40 | raise ValueError(f"Invalid message role: {message.role}") 41 | 42 | return "".join(conversation) + ASSISTANT_TOKEN 43 | -------------------------------------------------------------------------------- /easyllm/prompt_utils/vicuna.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | from easyllm.schema.base import ChatMessage 4 | 5 | # Define stop sequences for Vicuna 6 | vicuna_stop_sequences = [""] 7 | 8 | 9 | def build_vicuna_prompt(messages: Union[List[Dict[str, str]], str]) -> str: 10 | """ 11 | Builds a Vicuna prompt for a chat conversation. refrence https://github.com/lm-sys/FastChat/blob/main/docs/vicuna_weights_version.md#prompt-template 12 | 13 | Args: 14 | messages (Union[List[ChatMessage], str]): The messages to use for the completion. 15 | Returns: 16 | str: The Vicuna prompt string. 17 | """ 18 | VICUNA_EOS_TOKEN = "" 19 | VICUNA_USER_TOKEN = "USER: " 20 | VICUNA_ASSISTANT_TOKEN = "ASSISTANT: " 21 | 22 | conversation = [] 23 | 24 | if isinstance(messages, str): 25 | messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")] 26 | else: 27 | if isinstance(messages[0], dict): 28 | messages = [ChatMessage(**message) for message in messages] 29 | 30 | for index, message in enumerate(messages): 31 | if message.role == "user": 32 | conversation.append(f"{VICUNA_USER_TOKEN}{message.content.strip()}\n") 33 | elif message.role == "assistant": 34 | conversation.append(f"{VICUNA_ASSISTANT_TOKEN}{message.content.strip()}{VICUNA_EOS_TOKEN}\n") 35 | elif message.role == "function": 36 | raise ValueError("Vicuna does not support function calls.") 37 | elif message.role == "system" and index == 0: 38 | conversation.append(f"{message.content.strip()}\n\n") 39 | else: 40 | raise ValueError(f"Invalid message role: {message.role}") 41 | 42 | return "".join(conversation) + VICUNA_ASSISTANT_TOKEN 43 | -------------------------------------------------------------------------------- /easyllm/prompt_utils/wizardlm.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | from easyllm.schema.base import ChatMessage 4 | 5 | # Define stop sequences for wizardlm 6 | wizardlm_stop_sequences = [""] 7 | 8 | 9 | def build_wizardlm_prompt(messages: Union[List[Dict[str, str]], str]) -> str: 10 | """ 11 | Builds a WizardLM prompt for a chat conversation. refrence https://github.com/nlpxucan/WizardLM/blob/4af9edc59e412a49bba51cd1e8cfac2664e909e5/WizardLM/src/infer_wizardlm13b.py#L79 12 | 13 | Args: 14 | messages (Union[List[ChatMessage], str]): The messages to use for the completion. 15 | Returns: 16 | str: The WizardLM prompt string. 17 | """ 18 | WIZARDLM_USER_TOKEN = "USER: " 19 | WIZARDLM_ASSISTANT_TOKEN = "ASSISTANT: " 20 | 21 | conversation = [] 22 | 23 | if isinstance(messages, str): 24 | messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")] 25 | else: 26 | if isinstance(messages[0], dict): 27 | messages = [ChatMessage(**message) for message in messages] 28 | 29 | for index, message in enumerate(messages): 30 | if message.role == "user": 31 | conversation.append(f"{WIZARDLM_USER_TOKEN}{message.content.strip()}") 32 | elif message.role == "assistant": 33 | conversation.append(f"{WIZARDLM_ASSISTANT_TOKEN}{message.content.strip()}") 34 | elif message.role == "function": 35 | raise ValueError("WizardLM does not support function calls.") 36 | elif message.role == "system" and index == 0: 37 | conversation.append(f"{message.content.strip()}") 38 | else: 39 | raise ValueError(f"Invalid message role: {message.role}") 40 | 41 | return " ".join(conversation).lstrip() + " " + WIZARDLM_ASSISTANT_TOKEN 42 | -------------------------------------------------------------------------------- /easyllm/schema/base.py: -------------------------------------------------------------------------------- 1 | import importlib.metadata 2 | from typing import Literal, Optional 3 | 4 | from packaging.version import parse 5 | from pydantic import BaseModel 6 | 7 | 8 | def dump_object(object): 9 | if parse(importlib.metadata.version("pydantic")) < parse("2.0.0"): 10 | return object.dict() 11 | else: 12 | return object.model_dump(exclude_none=True) 13 | 14 | 15 | class ChatMessage(BaseModel): 16 | role: Literal["user", "assistant", "function", "system"] 17 | content: str 18 | 19 | 20 | class Usage(BaseModel): 21 | prompt_tokens: int 22 | completion_tokens: Optional[int] = None 23 | total_tokens: int 24 | -------------------------------------------------------------------------------- /easyllm/schema/openai.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Any, Dict, List, Literal, Optional, Union 3 | 4 | from nanoid import generate 5 | from pydantic import BaseModel, Field 6 | 7 | from easyllm.schema.base import ChatMessage, Usage 8 | 9 | 10 | # More documentation https://platform.openai.com/docs/api-reference/chat/create 11 | # adapted from https://github.com/lm-sys/FastChat/blob/main/fastchat/protocol/openai_api_protocol.py 12 | class ChatCompletionRequest(BaseModel): 13 | messages: List[ChatMessage] 14 | model: Optional[str] = None 15 | temperature: float = 0.9 16 | top_p: float = 0.6 17 | top_k: Optional[int] = 10 18 | n: int = 1 19 | max_tokens: int = 1024 20 | stop: Optional[List[str]] = None 21 | stream: bool = False 22 | frequency_penalty: Optional[float] = 1.0 23 | user: Optional[str] = None 24 | 25 | 26 | class ChatCompletionResponseChoice(BaseModel): 27 | index: int 28 | message: ChatMessage 29 | finish_reason: Optional[Literal["stop_sequence", "length", "eos_token", "max_tokens"]] = None 30 | 31 | 32 | class ChatCompletionResponse(BaseModel): 33 | id: str = Field(default_factory=lambda: f"hf-{generate(size=10)}") 34 | object: str = "chat.completion" 35 | created: int = Field(default_factory=lambda: int(time.time())) 36 | model: Optional[str] = "custom" 37 | choices: List[ChatCompletionResponseChoice] 38 | usage: Usage 39 | 40 | 41 | class DeltaMessage(BaseModel): 42 | role: Optional[str] = None 43 | content: Optional[str] = None 44 | 45 | 46 | class ChatCompletionResponseStreamChoice(BaseModel): 47 | index: int 48 | delta: Union[DeltaMessage, Dict[str, str]] 49 | finish_reason: Optional[Literal["stop", "length"]] = None 50 | 51 | 52 | class ChatCompletionStreamResponse(BaseModel): 53 | id: str = Field(default_factory=lambda: f"hf-{generate(size=10)}") 54 | object: str = "chat.completion.chunk" 55 | created: int = Field(default_factory=lambda: int(time.time())) 56 | model: Optional[str] = None 57 | choices: List[ChatCompletionResponseStreamChoice] 58 | 59 | 60 | class CompletionRequest(BaseModel): 61 | model: Optional[str] = None 62 | prompt: Union[str, List[Any]] 63 | suffix: Optional[str] = None 64 | temperature: float = 0.9 65 | top_p: float = 0.6 66 | top_k: Optional[int] = 10 67 | n: int = 1 68 | max_tokens: int = 1024 69 | stop: Optional[List[str]] = None 70 | stream: bool = False 71 | frequency_penalty: Optional[float] = 1.0 72 | user: Optional[str] = None 73 | logprobs: bool = False 74 | echo: bool = False 75 | 76 | 77 | class CompletionResponseChoice(BaseModel): 78 | index: int 79 | text: str 80 | logprobs: Union[Optional[List[Dict[str, Any]]], float] = None 81 | finish_reason: Optional[Literal["stop_sequence", "length", "eos_token"]] = None 82 | 83 | 84 | class CompletionResponse(BaseModel): 85 | id: str = Field(default_factory=lambda: f"hf-{generate(size=10)}") 86 | object: str = "text.completion" 87 | created: int = Field(default_factory=lambda: int(time.time())) 88 | model: Optional[str] = "custom" 89 | choices: List[CompletionResponseChoice] 90 | usage: Usage 91 | 92 | 93 | class CompletionResponseStreamChoice(BaseModel): 94 | index: int 95 | text: str 96 | logprobs: Optional[float] = None 97 | finish_reason: Optional[Literal["stop_sequence", "length", "eos_token"]] = None 98 | 99 | 100 | class CompletionStreamResponse(BaseModel): 101 | id: str = Field(default_factory=lambda: f"hf-{generate(size=10)}") 102 | object: str = "text.completion" 103 | created: int = Field(default_factory=lambda: int(time.time())) 104 | model: Optional[str] = "custom" 105 | choices: List[CompletionResponseStreamChoice] 106 | 107 | 108 | class EmbeddingsRequest(BaseModel): 109 | model: Optional[str] = None 110 | input: Union[str, List[Any]] 111 | user: Optional[str] = None 112 | 113 | 114 | class EmbeddingsObjectResponse(BaseModel): 115 | index: int 116 | object: str = "embedding" 117 | embedding: List[float] 118 | 119 | 120 | class EmbeddingsResponse(BaseModel): 121 | object: str = "list" 122 | data: List[EmbeddingsObjectResponse] 123 | model: Optional[str] = "custom" 124 | usage: Usage 125 | -------------------------------------------------------------------------------- /easyllm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from easyllm.utils.aws import AWSSigV4, get_bedrock_client 2 | from easyllm.utils.logging import setup_logger 3 | -------------------------------------------------------------------------------- /easyllm/utils/logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | 6 | def setup_logger() -> logging.Logger: 7 | # get local rank 8 | local_rank = int(os.environ.get("LOCAL_RANK", 0)) 9 | 10 | # create logger 11 | logger = logging.getLogger(__name__) 12 | 13 | # Setup logging 14 | logging.basicConfig( 15 | format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", 16 | datefmt="%m/%d/%Y %H:%M:%S", 17 | handlers=[logging.StreamHandler(sys.stdout)], 18 | ) 19 | 20 | if local_rank != 0: 21 | # disable logging for non-master processes 22 | print(f"Disabling logging for non-master process with local rank {local_rank}.") 23 | logging.disable(logging.CRITICAL) 24 | return logger 25 | else: 26 | log_level = logging.INFO 27 | # set the main code and the modules it uses to the same log-level according to the node 28 | logger.setLevel(log_level) 29 | # datasets_logging.set_verbosity(log_level) 30 | # trfs_logging.set_verbosity(log_level) 31 | return logger 32 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | .PHONY: style check test docs copy-docs docs-deploy 2 | 3 | check_dirs := . 4 | 5 | style: 6 | ruff $(check_dirs) --fix 7 | check: 8 | ruff $(check_dirs) 9 | test: 10 | pytest 11 | 12 | 13 | copy-docs: 14 | cp -r notebooks/* docs/examples/ 15 | 16 | docs: 17 | $(MAKE) copy-docs 18 | mkdocs serve 19 | 20 | docs-build: 21 | $(MAKE) copy-docs 22 | mkdocs build 23 | 24 | docs-deploy: 25 | $(MAKE) copy-docs 26 | mkdocs gh-deploy --force -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | # Project information 2 | site_name: EasyLLM 3 | site_author: Philipp Schmid 4 | site_url: https://philschmid.github.io/easyllm/ 5 | site_description: >- 6 | EasyLLM is an open source project that provides helpful tools and methods for working with large language models (LLMs), both open source and closed source. 7 | 8 | # Repository 9 | repo_name: philschmid/easyllm 10 | repo_url: https://github.com/philschmid/easyllm 11 | 12 | # Copyright 13 | copyright: Copyright © 2023 Philipp Schmid 14 | 15 | # mkdocs.yml 16 | theme: 17 | name: 'material' 18 | features: 19 | - announce.dismiss 20 | - content.action.edit 21 | - content.action.view 22 | - content.code.annotate 23 | - content.code.copy 24 | # - content.code.select 25 | # - content.tabs.link 26 | - content.tooltips 27 | # - header.autohide 28 | - navigation.expand 29 | - navigation.footer 30 | - navigation.indexes 31 | # - navigation.instant 32 | # - navigation.prune 33 | - navigation.sections 34 | - navigation.tabs 35 | - navigation.tabs.sticky 36 | - navigation.top 37 | - navigation.tracking 38 | - navigation.path 39 | - search.highlight 40 | - search.share 41 | - search.suggest 42 | - toc.follow 43 | # - toc.integrate 44 | palette: 45 | - scheme: default 46 | primary: teal 47 | accent: teal 48 | toggle: 49 | icon: material/brightness-7 50 | name: Switch to dark mode 51 | - scheme: slate 52 | primary: teal 53 | accent: teal 54 | toggle: 55 | icon: material/brightness-4 56 | name: Switch to light mode 57 | font: 58 | text: Roboto 59 | code: Roboto Mono 60 | 61 | plugins: 62 | - search 63 | - tags 64 | # - social 65 | - mkdocs-jupyter: 66 | include: ["examples/*.ipynb"] # Default: ["*.py", "*.ipynb"] 67 | - mkdocstrings 68 | # Extensions 69 | markdown_extensions: 70 | - abbr 71 | - admonition 72 | - attr_list 73 | - def_list 74 | - footnotes 75 | - md_in_html 76 | - toc: 77 | permalink: true 78 | - pymdownx.arithmatex: 79 | generic: true 80 | - pymdownx.betterem: 81 | smart_enable: all 82 | - pymdownx.caret 83 | - pymdownx.details 84 | - pymdownx.emoji: 85 | emoji_generator: !!python/name:materialx.emoji.to_svg 86 | emoji_index: !!python/name:materialx.emoji.twemoji 87 | - pymdownx.highlight: 88 | anchor_linenums: true 89 | line_spans: __span 90 | pygments_lang_class: true 91 | - pymdownx.inlinehilite 92 | - pymdownx.keys 93 | - pymdownx.magiclink: 94 | repo_url_shorthand: true 95 | user: squidfunk 96 | repo: mkdocs-material 97 | - pymdownx.mark 98 | - pymdownx.smartsymbols 99 | - pymdownx.superfences: 100 | custom_fences: 101 | - name: mermaid 102 | class: mermaid 103 | format: !!python/name:pymdownx.superfences.fence_code_format 104 | - pymdownx.tabbed: 105 | alternate_style: true 106 | - pymdownx.tasklist: 107 | custom_checkbox: true 108 | - pymdownx.tilde 109 | 110 | nav: 111 | - Documentation: 112 | - EasyLLM: index.md 113 | - Installation: installation.md 114 | - "API Reference": 115 | - "Clients": 116 | - clients/index.md 117 | - clients/huggingface.md 118 | - clients/sagemaker.md 119 | - clients/bedrock.md 120 | - prompt_utils.md 121 | - Examples: 122 | - examples/index.md 123 | - "Hugging Face": 124 | - examples/chat-completion-api.ipynb 125 | - examples/stream-chat-completions.ipynb 126 | - examples/text-completion-api.ipynb 127 | - examples/stream-text-completions.ipynb 128 | - examples/get-embeddings.ipynb 129 | - examples/inference-endpoints-example.ipynb 130 | - examples/llama2-rag-example.ipynb 131 | - examples/llama2-agent-example.ipynb 132 | - examples/falcon-180b-chat.ipynb 133 | - "Amazon SageMaker": 134 | - examples/sagemaker-chat-completion-api.ipynb 135 | - examples/sagemaker-text-completion-api.ipynb 136 | - examples/sagemaker-get-embeddings.ipynb 137 | - "Amazon Bedrock": 138 | - examples/bedrock-chat-completion-api.ipynb 139 | - examples/bedrock-stream-chat-completions.ipynb -------------------------------------------------------------------------------- /notebooks/bedrock-stream-chat-completions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# How to stream Chat Completion requests with Amazon Bedrock\n", 9 | "\n", 10 | "By default, when you request a completion, the entire completion is generated before being sent back in a single response.\n", 11 | "\n", 12 | "If you're generating long completions, waiting for the response can take many seconds.\n", 13 | "\n", 14 | "To get responses sooner, you can 'stream' the completion as it's being generated. This allows you to start printing or processing the beginning of the completion before the full completion is finished.\n", 15 | "\n", 16 | "To stream completions, set `stream=True` when calling the chat completions or completions endpoints. This will return an object that streams back the response as [data-only server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format). Extract chunks from the `delta` field rather than the `message` field.\n", 17 | "\n", 18 | "## Downsides\n", 19 | "\n", 20 | "Note that using `stream=True` in a production application makes it more difficult to moderate the content of the completions, as partial completions may be more difficult to evaluate. \n", 21 | "\n", 22 | "## Setup\n", 23 | "\n", 24 | "Before you can use `easyllm` with Amazon Bedrock you need setup permission and access to the models. You can do this by following of the instructions below:\n", 25 | "* https://docs.aws.amazon.com/IAM/latest/UserGuide/getting-set-up.html\n", 26 | "* https://docs.aws.amazon.com/IAM/latest/UserGuide/troubleshoot_access-denied.html\n", 27 | "* https://docs.aws.amazon.com/bedrock/latest/userguide/security-iam.html\n", 28 | "\n", 29 | "## Example code\n", 30 | "\n", 31 | "Below, this notebook shows:\n", 32 | "1. What a typical chat completion response looks like\n", 33 | "2. What a streaming chat completion response looks like\n", 34 | "3. How much time is saved by streaming a chat completion" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# if needed, install and/or upgrade to the latest version of the EasyLLM Python library\n", 44 | "%pip install --upgrade easyllm[bedrock] " 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 1, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "# imports\n", 54 | "import easyllm # for API calls" 55 | ] 56 | }, 57 | { 58 | "attachments": {}, 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "### 1. What a typical chat completion response looks like\n", 63 | "\n", 64 | "With a typical ChatCompletions API call, the response is first computed and then returned all at once." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 1, 70 | "metadata": {}, 71 | "outputs": [ 72 | { 73 | "name": "stdout", 74 | "output_type": "stream", 75 | "text": [ 76 | "10/26/2023 17:34:57 - INFO - easyllm.utils.logging - boto3 Bedrock client successfully created!\n", 77 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334497, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'role': 'assistant'}}]}\n", 78 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334498, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' Here'}}]}\n", 79 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334498, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' is counting to 100 with a comma'}}]}\n", 80 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334498, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' between each number and no newlines:\\n\\n1, 2, 3,'}}]}\n", 81 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334499, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 4, 5, 6, 7, 8, 9, 10, 11'}}]}\n", 82 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334499, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 12, 13, 14, 15, 16, 17, 18,'}}]}\n", 83 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334499, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,'}}]}\n", 84 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334500, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,'}}]}\n", 85 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334500, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,'}}]}\n", 86 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334501, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 49, 50, 51'}}]}\n", 87 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334501, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 52, 53,'}}]}\n", 88 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334502, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 54, 55, 56'}}]}\n", 89 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334503, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 57, 58, 59, 60, 61'}}]}\n", 90 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334504, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 62, 63, 64, 65, 66'}}]}\n", 91 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334504, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 67, 68, 69, 70, 71, 72, 73,'}}]}\n", 92 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334504, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 74, 75, 76, 77, 78, 79, 80, 81'}}]}\n", 93 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334505, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 82, 83, 84, 85, 86, 87, 88, 89, 90, 91'}}]}\n", 94 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334505, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 92, 93, 94, 95, 96, 97, 98, 99, 100'}}]}\n", 95 | "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334505, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {}}]}\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "import os \n", 101 | "# set env for prompt builder\n", 102 | "os.environ[\"BEDROCK_PROMPT\"] = \"anthropic\" # vicuna, wizardlm, stablebeluga, open_assistant\n", 103 | "os.environ[\"AWS_REGION\"] = \"us-east-1\" # change to your region\n", 104 | "# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"XXX\" # needed if not using boto3 session\n", 105 | "# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"XXX\" # needed if not using boto3 session\n", 106 | "\n", 107 | "from easyllm.clients import bedrock\n", 108 | "\n", 109 | "response = bedrock.ChatCompletion.create(\n", 110 | " model='anthropic.claude-v2',\n", 111 | " messages=[\n", 112 | " {'role': 'user', 'content': 'Count to 100, with a comma between each number and no newlines. E.g., 1, 2, 3, ...'}\n", 113 | " ],\n", 114 | " stream=True\n", 115 | ")\n", 116 | "\n", 117 | "for chunk in response:\n", 118 | " print(chunk)\n" 119 | ] 120 | }, 121 | { 122 | "attachments": {}, 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "As you can see above, streaming responses have a `delta` field rather than a `message` field. `delta` can hold things like:\n", 127 | "- a role token (e.g., `{\"role\": \"assistant\"}`)\n", 128 | "- a content token (e.g., `{\"content\": \"\\n\\n\"}`)\n", 129 | "- nothing (e.g., `{}`), when the stream is over" 130 | ] 131 | }, 132 | { 133 | "attachments": {}, 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "### 3. How much time is saved by streaming a chat completion\n", 138 | "\n", 139 | "Now let's ask `meta-llama/Llama-2-70b-chat-hf` to count to 100 again, and see how long it takes." 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 7, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | " Here is counting to 100 with commas and no newlines:\n", 152 | "\n", 153 | "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100Full conversation received: Here is counting to 100 with commas and no newlines:\n", 154 | "\n", 155 | "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "import os \n", 161 | "# set env for prompt builder\n", 162 | "os.environ[\"BEDROCK_PROMPT\"] = \"anthropic\" # vicuna, wizardlm, stablebeluga, open_assistant\n", 163 | "os.environ[\"AWS_REGION\"] = \"us-east-1\" # change to your region\n", 164 | "os.environ[\"AWS_PROFILE\"] = \"hf-sm\" # change to your region\n", 165 | "# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"XXX\" # needed if not using boto3 session\n", 166 | "# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"XXX\" # needed if not using boto3 session\n", 167 | "from easyllm.clients import bedrock\n", 168 | "\n", 169 | "# send a ChatCompletion request to count to 100\n", 170 | "response = bedrock.ChatCompletion.create(\n", 171 | " model='anthropic.claude-v2',\n", 172 | " messages=[\n", 173 | " {'role': 'user', 'content': 'Count to 100, with a comma between each number and no newlines. E.g., 1, 2, 3, ...'}\n", 174 | " ],\n", 175 | " stream=True\n", 176 | ")\n", 177 | "\n", 178 | "# create variables to collect the stream of chunks\n", 179 | "collected_chunks = []\n", 180 | "collected_messages = []\n", 181 | "# iterate through the stream of events\n", 182 | "for chunk in response:\n", 183 | " collected_chunks.append(chunk) # save the event response\n", 184 | " chunk_message = chunk['choices'][0]['delta'] # extract the message\n", 185 | " print(chunk_message.get('content', ''), end='') # print the message\n", 186 | " collected_messages.append(chunk_message) # save the message\n", 187 | " \n", 188 | "\n", 189 | "# print the time delay and text received\n", 190 | "full_reply_content = ''.join([m.get('content', '') for m in collected_messages])\n", 191 | "print(f\"Full conversation received: {full_reply_content}\")\n" 192 | ] 193 | } 194 | ], 195 | "metadata": { 196 | "kernelspec": { 197 | "display_name": "Python 3.9.9 ('openai')", 198 | "language": "python", 199 | "name": "python3" 200 | }, 201 | "language_info": { 202 | "codemirror_mode": { 203 | "name": "ipython", 204 | "version": 3 205 | }, 206 | "file_extension": ".py", 207 | "mimetype": "text/x-python", 208 | "name": "python", 209 | "nbconvert_exporter": "python", 210 | "pygments_lexer": "ipython3", 211 | "version": "3.8.12" 212 | }, 213 | "orig_nbformat": 4, 214 | "vscode": { 215 | "interpreter": { 216 | "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" 217 | } 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 2 222 | } 223 | -------------------------------------------------------------------------------- /notebooks/falcon-180b-chat.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# How to use Chat Completion clients\n", 9 | "\n", 10 | "EasyLLM can be used as an abstract layer to replace `gpt-3.5-turbo` and `gpt-4` with open source models.\n", 11 | "\n", 12 | "You can change your own applications from the OpenAI API, by simply changing the client. \n", 13 | "\n", 14 | "Chat models take a series of messages as input, and return an AI-written message as output.\n", 15 | "\n", 16 | "This guide illustrates the chat format with a few example API calls." 17 | ] 18 | }, 19 | { 20 | "attachments": {}, 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "### 1. Import the easyllm library" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# if needed, install and/or upgrade to the latest version of the EasyLLM Python library\n", 34 | "%pip install --upgrade easyllm " 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 4, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# import the EasyLLM Python library for calling the EasyLLM API\n", 44 | "import easyllm" 45 | ] 46 | }, 47 | { 48 | "attachments": {}, 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "### 2. An example chat API call\n", 53 | "\n", 54 | "A chat API call has two required inputs:\n", 55 | "- `model`: the name of the model you want to use (e.g., `meta-llama/Llama-2-70b-chat-hf`) or leave it empty to just call the api\n", 56 | "- `messages`: a list of message objects, where each object has two required fields:\n", 57 | " - `role`: the role of the messenger (either `system`, `user`, or `assistant`)\n", 58 | " - `content`: the content of the message (e.g., `Write me a beautiful poem`)\n", 59 | "\n", 60 | "Compared to OpenAI api is the `huggingface` module also exposing a `prompt_builder` and `stop_sequences` parameter you can use to customize the prompt and stop sequences. The EasyLLM package comes with prompt builder utilities.\n", 61 | "\n", 62 | "Let's look at an example chat API calls to see how the chat format works in practice." 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 1, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "{'id': 'hf-ceVG8KGm04',\n", 74 | " 'object': 'chat.completion',\n", 75 | " 'created': 1695106309,\n", 76 | " 'model': 'tiiuae/falcon-180B-chat',\n", 77 | " 'choices': [{'index': 0,\n", 78 | " 'message': {'role': 'assistant',\n", 79 | " 'content': \"*Knock knock* Who's there? Cat. Cat who? Cat got your tongue?\\nUser:\"},\n", 80 | " 'finish_reason': 'stop_sequence'}],\n", 81 | " 'usage': {'prompt_tokens': 144, 'completion_tokens': 23, 'total_tokens': 167}}" 82 | ] 83 | }, 84 | "execution_count": 1, 85 | "metadata": {}, 86 | "output_type": "execute_result" 87 | } 88 | ], 89 | "source": [ 90 | "import os \n", 91 | "# set env for prompt builder\n", 92 | "os.environ[\"HUGGINGFACE_PROMPT\"] = \"falcon\" # vicuna, wizardlm, stablebeluga, open_assistant\n", 93 | "# os.environ[\"HUGGINGFACE_TOKEN\"] = \"hf_xxx\" \n", 94 | "\n", 95 | "from easyllm.clients import huggingface\n", 96 | "from easyllm.prompt_utils.falcon import falcon_stop_sequences\n", 97 | "\n", 98 | "MODEL=\"tiiuae/falcon-180B-chat\"\n", 99 | "\n", 100 | "response = huggingface.ChatCompletion.create(\n", 101 | " model=MODEL,\n", 102 | " messages=[\n", 103 | " {\"role\": \"system\", \"content\": \"\\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\"},\n", 104 | " {\"role\": \"user\", \"content\": \"Knock knock.\"},\n", 105 | " {\"role\": \"assistant\", \"content\": \"Who's there?\"},\n", 106 | " {\"role\": \"user\", \"content\": \"Cat.\"},\n", 107 | " ],\n", 108 | " temperature=0.9,\n", 109 | " top_p=0.6,\n", 110 | " max_tokens=1024,\n", 111 | " stop=falcon_stop_sequences,\n", 112 | ")\n", 113 | "response" 114 | ] 115 | }, 116 | { 117 | "attachments": {}, 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "As you can see, the response object has a few fields:\n", 122 | "- `id`: the ID of the request\n", 123 | "- `object`: the type of object returned (e.g., `chat.completion`)\n", 124 | "- `created`: the timestamp of the request\n", 125 | "- `model`: the full name of the model used to generate the response\n", 126 | "- `usage`: the number of tokens used to generate the replies, counting prompt, completion, and total\n", 127 | "- `choices`: a list of completion objects (only one, unless you set `n` greater than 1)\n", 128 | " - `message`: the message object generated by the model, with `role` and `content`\n", 129 | " - `finish_reason`: the reason the model stopped generating text (either `stop`, or `length` if `max_tokens` limit was reached)\n", 130 | " - `index`: the index of the completion in the list of choices" 131 | ] 132 | }, 133 | { 134 | "attachments": {}, 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "Extract just the reply with:" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 2, 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "name": "stdout", 148 | "output_type": "stream", 149 | "text": [ 150 | "*Knock knock* Who's there? Cat. Cat who? Cat got your tongue?\n", 151 | "User:\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "print(response['choices'][0]['message']['content'])" 157 | ] 158 | }, 159 | { 160 | "attachments": {}, 161 | "cell_type": "markdown", 162 | "metadata": {}, 163 | "source": [ 164 | "Even non-conversation-based tasks can fit into the chat format, by placing the instruction in the first user message.\n", 165 | "\n", 166 | "For example, to ask the model to explain asynchronous programming in the style of the pirate Blackbeard, we can structure conversation as follows:" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 3, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "\n", 179 | "Asynchronous Programming: A Mathematical Approach\n", 180 | "\n", 181 | "Good day, class! Today we're going to discuss a fascinating topic in the world of programming - asynchronous programming. Now, you might be wondering what this has to do with math. Well, just like how mathematical operations can sometimes be synchronous or asynchronous, so too can computer programs.\n", 182 | "\n", 183 | "Let's start by defining our terms. Synchronous processes are those that happen one after another, in a predictable sequence. For example, if you were to add two numbers together, then multiply the result by another number, these operations would typically happen synchronously – the addition occurs first, followed by the multiplication.\n", 184 | "\n", 185 | "Asynchronous processes, on the other hand, don't necessarily follow such a strict order. They're more like parallel lines in geometry – they can run alongside each other independently, without waiting for one another to finish. In programming, this means that multiple tasks can be performed at the same time, without one task blocking another from starting.\n", 186 | "\n", 187 | "So why is this useful? Well, imagine you're working on a complex mathematical problem that requires several calculations. If you were to perform these calculations synchronously, you'd have to wait for each calculation to finish before starting the next one. This could take quite some time, especially if your calculations are dependent on external factors such as user input or network latency.\n", 188 | "\n", 189 | "With asynchronous programming, however, you can perform multiple calculations simultaneously. This means that while one calculation is waiting for user input, another can continue processing data from a different source. As a result, your overall computation time is reduced, making your program more efficient and responsive.\n", 190 | "\n", 191 | "Of course, there are challenges involved in asynchronous programming, much like solving an intricate mathematical puzzle. One major issue is ensuring that all asynchronous tasks complete successfully, even if they encounter errors along the way. This requires careful planning and error handling, similar to how you would approach solving a complex equation.\n", 192 | "\n", 193 | "In conclusion, asynchronous programming is a powerful tool in the programmer's toolkit, much like advanced mathematical concepts are essential for solving complex problems. By understanding the principles behind asynchronous processes, you can create more efficient and responsive programs, ready to tackle any challenge that comes their way.\n", 194 | "\n", 195 | "Now, let's put this knowledge into practice with some coding exercises, shall we?\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "# example with a system message\n", 201 | "response = huggingface.ChatCompletion.create(\n", 202 | " model=MODEL,\n", 203 | " messages=[\n", 204 | " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", 205 | " {\"role\": \"user\", \"content\": \"Explain asynchronous programming in the style of math teacher.\"},\n", 206 | " ],\n", 207 | " stop=falcon_stop_sequences,\n", 208 | ")\n", 209 | "\n", 210 | "print(response['choices'][0]['message']['content'])\n" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [] 217 | } 218 | ], 219 | "metadata": { 220 | "kernelspec": { 221 | "display_name": "openai", 222 | "language": "python", 223 | "name": "python3" 224 | }, 225 | "language_info": { 226 | "codemirror_mode": { 227 | "name": "ipython", 228 | "version": 3 229 | }, 230 | "file_extension": ".py", 231 | "mimetype": "text/x-python", 232 | "name": "python", 233 | "nbconvert_exporter": "python", 234 | "pygments_lexer": "ipython3", 235 | "version": "3.8.12" 236 | }, 237 | "orig_nbformat": 4, 238 | "vscode": { 239 | "interpreter": { 240 | "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" 241 | } 242 | } 243 | }, 244 | "nbformat": 4, 245 | "nbformat_minor": 2 246 | } 247 | -------------------------------------------------------------------------------- /notebooks/get-embeddings.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## How to create embeddings\n", 8 | "\n", 9 | "In this notebook, we will show you how to create embeddings for your own text data and and open source model from Hugging Face hosted as an endpoint on Hugging Face Inference API." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "### 1. Import the easyllm library" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "# if needed, install and/or upgrade to the latest version of the OpenAI Python library\n", 26 | "%pip install --upgrade easyllm " 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# import the EasyLLM Python library for calling the EasyLLM API\n", 36 | "import easyllm" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "### 2. An example chat API call\n", 44 | "\n", 45 | "A embedding API call has two required inputs:\n", 46 | "- `model`: the name of the model you want to use (e.g., `sentence-transformers/all-MiniLM-L6-v2`) or leave it empty to just call the api\n", 47 | "- `input`: a string or list of strings you want to embed\n", 48 | "\n", 49 | "Let's look at an example API calls to see how the chat format works in practice." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 2, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "data": { 59 | "text/plain": [ 60 | "384" 61 | ] 62 | }, 63 | "execution_count": 2, 64 | "metadata": {}, 65 | "output_type": "execute_result" 66 | } 67 | ], 68 | "source": [ 69 | "# import os \n", 70 | "# os.environ[\"HUGGINGFACE_TOKEN\"] = \"hf_xxx\" # Use Environment Variable\n", 71 | "\n", 72 | "from easyllm.clients import huggingface\n", 73 | "\n", 74 | "# The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.\n", 75 | "# huggingface.api_key=\"hf_xxx\"\n", 76 | "\n", 77 | "embedding = huggingface.Embedding.create(\n", 78 | " model=\"sentence-transformers/all-MiniLM-L6-v2\",\n", 79 | " input=\"That's a nice car.\",\n", 80 | ")\n", 81 | "\n", 82 | "len(embedding[\"data\"][0][\"embedding\"])" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "Batched Request" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "text/plain": [ 100 | "2" 101 | ] 102 | }, 103 | "execution_count": 3, 104 | "metadata": {}, 105 | "output_type": "execute_result" 106 | } 107 | ], 108 | "source": [ 109 | "from easyllm.clients import huggingface\n", 110 | "\n", 111 | "# The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.\n", 112 | "# huggingface.api_key=\"hf_xxx\"\n", 113 | "\n", 114 | "embedding = huggingface.Embedding.create(\n", 115 | " model=\"sentence-transformers/all-MiniLM-L6-v2\",\n", 116 | " input=[\"What is the meaning of life?\",\"test\"],\n", 117 | ")\n", 118 | "\n", 119 | "len(embedding[\"data\"])" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [] 128 | } 129 | ], 130 | "metadata": { 131 | "kernelspec": { 132 | "display_name": "Python 3.9.9 ('openai')", 133 | "language": "python", 134 | "name": "python3" 135 | }, 136 | "language_info": { 137 | "codemirror_mode": { 138 | "name": "ipython", 139 | "version": 3 140 | }, 141 | "file_extension": ".py", 142 | "mimetype": "text/x-python", 143 | "name": "python", 144 | "nbconvert_exporter": "python", 145 | "pygments_lexer": "ipython3", 146 | "version": "3.8.12" 147 | }, 148 | "orig_nbformat": 4, 149 | "vscode": { 150 | "interpreter": { 151 | "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" 152 | } 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 2 157 | } 158 | -------------------------------------------------------------------------------- /notebooks/inference-endpoints-example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Hugging Face Inference Endpoints Example\n", 9 | "\n", 10 | "**[Hugging Face Inference Endpoints](https://ui.endpoints.huggingface.co/)** offers an easy and secure way to deploy Machine Learning models for use in production. Inference Endpoints empower developers and data scientists alike to create AI applications without managing infrastructure: simplifying the deployment process to a few clicks, including handling large volumes of requests with autoscaling, reducing infrastructure costs with scale-to-zero, and offering advanced security.\n", 11 | "\n", 12 | "You can get started with Inference Endpoints at: https://ui.endpoints.huggingface.co/\n", 13 | "\n", 14 | "\n", 15 | "The example assumes that you have an running endpoint for a conversational model, e.g. `https://huggingface.co/meta-llama/Llama-2-13b-chat-hf`" 16 | ] 17 | }, 18 | { 19 | "attachments": {}, 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "### 1. Import the easyllm library" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# if needed, install and/or upgrade to the latest version of the OpenAI Python library\n", 33 | "%pip install --upgrade easyllm " 34 | ] 35 | }, 36 | { 37 | "attachments": {}, 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "### 2. An example chat API call\n", 42 | "\n", 43 | "Since we want to use our endpoint for inference we don't have to define the `model` parameter. We either need to expose an environment variable `HUGGINGFACE_API_BASE` before the import of `easyllm.clients.huggingface` or overwrite the `huggingface.api_base` value.\n", 44 | "\n", 45 | "A chat API call then only has two required inputs:\n", 46 | "- `messages`: a list of message objects, where each object has two required fields:\n", 47 | " - `role`: the role of the messenger (either `system`, `user`, or `assistant`)\n", 48 | " - `content`: the content of the message (e.g., `Write me a beautiful poem`)\n", 49 | "\n", 50 | "Let's look at an example chat API calls to see how the chat format works in practice." 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 1, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "data": { 60 | "text/plain": [ 61 | "{'id': 'hf-0lL5H_yyRR',\n", 62 | " 'object': 'chat.completion',\n", 63 | " 'created': 1691096023,\n", 64 | " 'choices': [{'index': 0,\n", 65 | " 'message': {'role': 'assistant', 'content': ' Apple who?'},\n", 66 | " 'finish_reason': 'eos_token'}],\n", 67 | " 'usage': {'prompt_tokens': 149, 'completion_tokens': 5, 'total_tokens': 154}}" 68 | ] 69 | }, 70 | "execution_count": 1, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "from easyllm.clients import huggingface\n", 77 | "\n", 78 | "# Here we overwrite the defaults, you can also use environment variables\n", 79 | "huggingface.prompt_builder = \"llama2\"\n", 80 | "huggingface.api_base = \"YOUR_ENDPOINT_URL\"\n", 81 | "\n", 82 | "# The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.\n", 83 | "# huggingface.api_key=\"hf_xxx\"\n", 84 | "\n", 85 | "response = huggingface.ChatCompletion.create(\n", 86 | " messages=[\n", 87 | " {\"role\": \"system\", \"content\": \"\\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\"},\n", 88 | " {\"role\": \"user\", \"content\": \"Knock knock.\"},\n", 89 | " {\"role\": \"assistant\", \"content\": \"Who's there?\"},\n", 90 | " {\"role\": \"user\", \"content\": \"Apple.\"},\n", 91 | " ],\n", 92 | " temperature=0.9,\n", 93 | " top_p=0.6,\n", 94 | " max_tokens=1024,\n", 95 | ")\n", 96 | "response" 97 | ] 98 | }, 99 | { 100 | "attachments": {}, 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "As you can see, the response object has a few fields:\n", 105 | "- `id`: the ID of the request\n", 106 | "- `object`: the type of object returned (e.g., `chat.completion`)\n", 107 | "- `created`: the timestamp of the request\n", 108 | "- `model`: the full name of the model used to generate the response\n", 109 | "- `usage`: the number of tokens used to generate the replies, counting prompt, completion, and total\n", 110 | "- `choices`: a list of completion objects (only one, unless you set `n` greater than 1)\n", 111 | " - `message`: the message object generated by the model, with `role` and `content`\n", 112 | " - `finish_reason`: the reason the model stopped generating text (either `stop`, or `length` if `max_tokens` limit was reached)\n", 113 | " - `index`: the index of the completion in the list of choices" 114 | ] 115 | }, 116 | { 117 | "attachments": {}, 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "Extract just the reply with:" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 2, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | " Apple who?\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "print(response['choices'][0]['message']['content'])" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "## How to stream Chat Completion requests\n", 146 | "\n", 147 | "Custom endpoints can be created to stream chat completion requests to a model. " 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 6, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | " Sure! Here we go:\n", 160 | "\n", 161 | "1. One\n", 162 | "2. Two\n", 163 | "3. Three\n", 164 | "4. Four\n", 165 | "5. Five\n", 166 | "6. Six\n", 167 | "7. Seven\n", 168 | "8. Eight\n", 169 | "9. Nine\n", 170 | "10. Ten!" 171 | ] 172 | } 173 | ], 174 | "source": [ 175 | "from easyllm.clients import huggingface\n", 176 | "\n", 177 | "huggingface.prompt_builder = \"llama2\"\n", 178 | "\n", 179 | "# Here you can overwrite the url to your endpoint, can also be localhost:8000\n", 180 | "huggingface.api_base = \"YOUR_ENDPOINT_URL\"\n", 181 | "\n", 182 | "# a ChatCompletion request\n", 183 | "response = huggingface.ChatCompletion.create(\n", 184 | " messages=[\n", 185 | " {'role': 'user', 'content': \"Count to 10.\"}\n", 186 | " ],\n", 187 | " stream=True # this time, we set stream=True\n", 188 | ")\n", 189 | "\n", 190 | "for chunk in response:\n", 191 | " delta = chunk['choices'][0]['delta']\n", 192 | " if \"content\" in delta:\n", 193 | " print(delta[\"content\"],end=\"\")" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [] 202 | } 203 | ], 204 | "metadata": { 205 | "kernelspec": { 206 | "display_name": "openai", 207 | "language": "python", 208 | "name": "python3" 209 | }, 210 | "language_info": { 211 | "codemirror_mode": { 212 | "name": "ipython", 213 | "version": 3 214 | }, 215 | "file_extension": ".py", 216 | "mimetype": "text/x-python", 217 | "name": "python", 218 | "nbconvert_exporter": "python", 219 | "pygments_lexer": "ipython3", 220 | "version": "3.8.12" 221 | }, 222 | "orig_nbformat": 4, 223 | "vscode": { 224 | "interpreter": { 225 | "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" 226 | } 227 | } 228 | }, 229 | "nbformat": 4, 230 | "nbformat_minor": 2 231 | } 232 | -------------------------------------------------------------------------------- /notebooks/llama2-agent-example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Llama 2 70B Agent/Tool use example \n", 8 | "\n", 9 | "This Jupyter notebook provides examples of how to use Tools for Agents with the Llama 2 70B model in EasyLLM. This includes an example on how to use tools with an LLM, including output parsing, execution of the tools and parsing of the results. It is a very simplified example. If you are interested in Agents you should checkout [langchain](https://python.langchain.com/docs/get_started/introduction.html) or the [ReAct pattern](https://www.promptingguide.ai/techniques/react). \n", 10 | "\n", 11 | "\n", 12 | "## Why do LLMs need to use Tools?\n", 13 | "One of the most common challenges with LLMs is overcoming the lack of recency and specificity in their training data - answers can be out of date, and they are prone to hallucinations given the huge variety in their knowledge base. Tools are a great method of allowing an LLM to answer within a controlled context that draws on your existing knowledge bases and internal APIs - instead of trying to prompt engineer the LLM all the way to your intended answer, you allow it access to tools that it calls on dynamically for info, parses, and serves to customer.\n", 14 | "\n", 15 | "Providing LLMs access to tools can enable them to answer questions with context directly from search engines, APIs or your own databases. Instead of answering directly, an LLM with access to tools can perform intermediate steps to gather relevant information. Tools can also be used in combination. For [example](https://python.langchain.com/en/latest/modules/agents/agents/examples/mrkl_chat.html), a language model can be made to use a search tool to lookup quantitative information and a calculator to execute calculations." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "# if needed, install and/or upgrade to the latest version of the EasyLLM Python library\n", 25 | "%pip install --upgrade easyllm " 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "Getting an open LLM to act like an agent or use tools is incredibly hard. However, with Llama 2 70B it is now possible. Let's see how we can get it running!\n", 33 | "\n", 34 | "## Basic example of using a tool with Llama 2 70B\n", 35 | "\n", 36 | "In the basic we are are going to only use one abstract tool, a `calculator`. Our model can use the calculator run mathmatical operations. To make it easy we provide some few-shot example for the model to better understand what it needs to do. \n", 37 | "_Note: This is adapted from the [example by pinecone](https://github.com/pinecone-io/examples/blob/master/learn/generation/llm-field-guide/llama-2/llama-2-70b-chat-agent.ipynb)._" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 41, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "system_message = \"\"\"Assistant is a expert JSON builder designed to assist with a wide range of tasks.\n", 47 | "\n", 48 | "Assistant is able to respond to the User and use tools using JSON strings that contain \"action\" and \"action_input\" parameters.\n", 49 | "\n", 50 | "All of Assistant's communication is performed using this JSON format.\n", 51 | "\n", 52 | "Assistant can also use tools by responding to the user with tool use instructions in the same \"action\" and \"action_input\" JSON format. Tools available to Assistant are:\n", 53 | "\n", 54 | "- \"Calculator\": Useful for when you need to answer questions about math.\n", 55 | " - To use the calculator tool, Assistant should write like so:\n", 56 | " ```json\n", 57 | " {{\"action\": \"Calculator\",\n", 58 | " \"action_input\": \"4+4\"}}\n", 59 | " ```\n", 60 | "\n", 61 | "Here are some previous conversations between the Assistant and User:\n", 62 | "\n", 63 | "User: Hey how are you today?\n", 64 | "Assistant: ```json\n", 65 | "{{\"action\": \"Final Answer\",\n", 66 | " \"action_input\": \"I'm good thanks, how are you?\"}}\n", 67 | "```\n", 68 | "User: I'm great, what is the square root of 4?\n", 69 | "Assistant: ```json\n", 70 | "{{\"action\": \"Calculator\",\n", 71 | " \"action_input\": \"sqrt(4)\"}}\n", 72 | "```\n", 73 | "Result: 2.0\n", 74 | "Assistant: ```json\n", 75 | "{{\"action\": \"Final Answer\",\n", 76 | " \"action_input\": \"It looks like the answer is 2!\"}}\n", 77 | "```\n", 78 | "User: Thanks could you tell me what 4 to the power of 2 is?\n", 79 | "Assistant: ```json\n", 80 | "{{\"action\": \"Calculator\",\n", 81 | " \"action_input\": \"4**2\"}}\n", 82 | "```\n", 83 | "Result: 16.0\n", 84 | "Assistant: ```json\n", 85 | "{{\"action\": \"Final Answer\",\n", 86 | " \"action_input\": \"It looks like the answer is 16!\"}}\n", 87 | "```\n", 88 | "\n", 89 | "Here is the latest conversation between Assistant and User.\"\"\"" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "In addition to our system message which holds the information for our tools we need to create a user template, which includes the input from the user and tells the model to use tools or not. " 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 42, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "prompt = f\"{system_message}\\n\\nUse your existing tools and respond with a JSON object with with 'action' and 'action_input' values \\nUser: {{user_input}}\"" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "Now lets combine both and create a request using `easyllm`." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 43, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "\n", 122 | "from easyllm.clients import huggingface\n", 123 | "\n", 124 | "# Changing configuration without using environment variables\n", 125 | "huggingface.prompt_builder = \"llama2\"\n", 126 | "# huggingface.api_key=\"hf_xxx\"\n", 127 | "\n", 128 | "def agent(prompt):\n", 129 | " response = huggingface.Completion.create(\n", 130 | " model=\"meta-llama/Llama-2-70b-chat-hf\",\n", 131 | " prompt=prompt,\n", 132 | " temperature=0.1,\n", 133 | " max_tokens=128,\n", 134 | " stop=[\"```\\n\",\"Result: \"],\n", 135 | " debug=False,\n", 136 | " ) \n", 137 | " return response[\"choices\"][0][\"text\"]" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "Now we can begin asking questions" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 44, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "' Assistant: ```json\\n{\"action\": \"Final Answer\",\\n \"action_input\": \"I\\'m good thanks, how are you?\"}\\n```'" 156 | ] 157 | }, 158 | "execution_count": 44, 159 | "metadata": {}, 160 | "output_type": "execute_result" 161 | } 162 | ], 163 | "source": [ 164 | "output = agent(prompt.format(user_input=\"hey how are you today?\"))\n", 165 | "output" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "What happens if we ask a math question? " 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 45, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "' Assistant: ```json\\n{\"action\": \"Calculator\",\\n \"action_input\": \"4*2\"}\\n```\\n'" 184 | ] 185 | }, 186 | "execution_count": 45, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "output = agent(prompt.format(user_input=\"What is 4 multiplied by 2?\"))\n", 193 | "output" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "Great! It works! It correctly selects the tool. Okay now to make it work we need to parse the output and execute it in the case for the calculator" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 46, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "import json\n", 210 | "import re\n", 211 | "\n", 212 | "def parser(input):\n", 213 | " pattern = r'```json\\n(.*?)```'\n", 214 | " match = re.search(pattern, input, re.DOTALL)\n", 215 | " if not match:\n", 216 | " raise ValueError(\"Couldn't parse the output.\")\n", 217 | " \n", 218 | " parsed_data = json.loads(match.group(1))\n", 219 | " return parsed_data\n", 220 | "\n", 221 | " " 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 47, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/plain": [ 232 | "{'action': 'Calculator', 'action_input': '4*2'}" 233 | ] 234 | }, 235 | "execution_count": 47, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "output = parser(output)\n", 242 | "output" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "Okay, Now lets execute it using the `eval` function from python" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 48, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "def use_tool(tool,tool_input):\n", 259 | " if tool == \"Calculator\":\n", 260 | " return eval(tool_input)\n", 261 | " else:\n", 262 | " raise Exception(\"Unknown tool: \" + tool)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": {}, 268 | "source": [ 269 | "Okay, now lets combine everyting and the cacluator result to our agent again. " 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 73, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [ 278 | "def use_calculator(input, first_call=True):\n", 279 | " if first_call:\n", 280 | " input_prompt = prompt.format(user_input=input)\n", 281 | " else:\n", 282 | " input_prompt = input\n", 283 | " # make the agent call\n", 284 | " response = agent(input_prompt)\n", 285 | " # parse the output if possible \n", 286 | " parsed = parser(response)\n", 287 | " # check if the output is our final answer or if it is a tool\n", 288 | " if parsed[\"action\"] == \"Final Answer\":\n", 289 | " return parsed[\"action_input\"]\n", 290 | " # if not try to use the tool\n", 291 | " tool_output = use_tool(parsed[\"action\"], parsed[\"action_input\"])\n", 292 | " \n", 293 | " # add message to the agent\n", 294 | " next_prompt = f\"{input_prompt}\\n{response}\\nResponse: {tool_output}\"\n", 295 | " # recursively call the agent with the output of the tool\n", 296 | " return use_calculator(next_prompt, False)\n", 297 | " \n", 298 | " " 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 75, 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "data": { 308 | "text/plain": [ 309 | "'It looks like the answer is 209!'" 310 | ] 311 | }, 312 | "execution_count": 75, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "use_calculator(\"What is 19 * 11?\")" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [] 327 | } 328 | ], 329 | "metadata": { 330 | "kernelspec": { 331 | "display_name": "hf", 332 | "language": "python", 333 | "name": "python3" 334 | }, 335 | "language_info": { 336 | "codemirror_mode": { 337 | "name": "ipython", 338 | "version": 3 339 | }, 340 | "file_extension": ".py", 341 | "mimetype": "text/x-python", 342 | "name": "python", 343 | "nbconvert_exporter": "python", 344 | "pygments_lexer": "ipython3", 345 | "version": "3.8.12" 346 | }, 347 | "orig_nbformat": 4 348 | }, 349 | "nbformat": 4, 350 | "nbformat_minor": 2 351 | } 352 | -------------------------------------------------------------------------------- /notebooks/llama2-rag-example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Retrieval Augmented Generation using Llama 2\n", 8 | "\n", 9 | "This notebook walks through how to use Llama 2 to perform (in-context) retrieval augmented generation. We will customize the `system` message for Llama 2 to make sure the model is only using provided context to generate the response. \n", 10 | "\n", 11 | "**What is In-context Retrieval Augmented Generation?**\n", 12 | "\n", 13 | "\n", 14 | "In-context retrieval augmented generation is a method to improve language model generation by including relevant documents to the model input. The key points are:\n", 15 | "\n", 16 | "* Retrieval of relevant documents from an external corpus to provide factual grounding for the model.\n", 17 | "* Prepending the retrieved documents to the input text, without modifying the model architecture or fine-tuning the model.\n", 18 | "* Allows leveraging external knowledge with off-the-shelf frozen language models." 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "# if needed, install and/or upgrade to the latest version of the EasyLLM Python library\n", 28 | "%pip install --upgrade easyllm " 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "## Simple Example\n", 36 | "\n", 37 | "Below is a simple example using the existing prompt builder of llama2 to generate a prompt. We are going to use the `system` message from [llama-index](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/SimpleIndexDemoLlama-Local.html) with some minor adjustments." 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 8, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "SYSTEM_PROMPT = \"\"\"You are an AI assistant that answers questions in a friendly manner, based on the given #SOURCE# documents. Here are some rules you always follow:\n", 47 | "- Generate human readable output, avoid creating output with gibberish text.\n", 48 | "- Generate only the requested output, don't include any other language before or after the requested output.\n", 49 | "- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.\n", 50 | "- Generate professional language typically used in business documents in North America.\n", 51 | "- Never generate offensive or foul language.\n", 52 | "- Only include facts and information based on the #SOURCE# documents.\n", 53 | "\"\"\"\n", 54 | "\n", 55 | "system = {\"role\": \"system\", \"content\": SYSTEM_PROMPT}" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "before we can now call our LLM. Lets create a user instruction with a `query` and a `context`. As a context i copied the the [wikipedia article of Nuremberg](https://en.wikipedia.org/wiki/Nuremberg) (the city i live). \n", 63 | "_I uploaded it as a gist to to not pollute the notebook._" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "!wget https://gist.githubusercontent.com/philschmid/2678351cb9f41d385aa5c099caf20c0a/raw/60ae425677dd9bed6fe3c0f2dd5b6ea49bc6590c/nuremberg.txt" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 14, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "context = open(\"nuremberg.txt\").read()\n", 82 | "\n", 83 | "query = \"How many people live in Nuremberg?\"" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "Before we use our context lets just ask the model." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 15, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | " As of December 31, 2020, the population of Nuremberg, Germany is approximately 516,000 people.\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "from easyllm.clients import huggingface\n", 108 | "\n", 109 | "# set the prompt builder to llama2\n", 110 | "huggingface.prompt_builder = \"llama2\"\n", 111 | "# huggingface.api_key = \"hf_xx\"\n", 112 | "\n", 113 | "# send a ChatCompletion request\n", 114 | "response = huggingface.ChatCompletion.create(\n", 115 | " model=\"meta-llama/Llama-2-70b-chat-hf\",\n", 116 | " messages=[\n", 117 | " {\"role\": \"user\", \"content\": query},\n", 118 | " ],\n", 119 | ")\n", 120 | "\n", 121 | "# print the time delay and text received\n", 122 | "print(response[\"choices\"][0][\"message\"][\"content\"])\n" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "Now lets use our `system` message with our `context` to augment the knowledge of our model \"in-memory\" and ask the same question again." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 23, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "context_extended = f\"{query}\\n\\n#SOURCE#\\n{context}\"\n", 139 | "# context_extended = f\"{query}\\n\\n#SOURCE START#\\n{context}\\n#SOURCE END#{query}\"" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 22, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | " The population of Nuremberg is 523,026 according to the 2022-12-31 data.\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "from easyllm.clients import huggingface\n", 157 | "\n", 158 | "# set the prompt builder to llama2\n", 159 | "huggingface.prompt_builder = \"llama2\"\n", 160 | "# huggingface.api_key = \"hf_xx\"\n", 161 | "\n", 162 | "# send a ChatCompletion request\n", 163 | "response = huggingface.ChatCompletion.create(\n", 164 | " model=\"meta-llama/Llama-2-70b-chat-hf\",\n", 165 | " messages=[\n", 166 | " system, \n", 167 | " {\"role\": \"user\", \"content\": context_extended},\n", 168 | " ],\n", 169 | ")\n", 170 | "\n", 171 | "# print the time delay and text received\n", 172 | "print(response[\"choices\"][0][\"message\"][\"content\"])\n" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "Awesome! if we check the gist, we can see a snippet in there with saying\n", 180 | "```bash\n", 181 | "Population (2022-12-31)[2]\n", 182 | " • City\t523,026\n", 183 | "```" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "## Next Steps\n", 191 | "\n", 192 | "Next steps, would be to connect your LLM using with external knowledge sources such as Wikis, the Web or other databases using tools and APIs or vector databases and embeddings. " 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [] 201 | } 202 | ], 203 | "metadata": { 204 | "kernelspec": { 205 | "display_name": "hf", 206 | "language": "python", 207 | "name": "python3" 208 | }, 209 | "language_info": { 210 | "codemirror_mode": { 211 | "name": "ipython", 212 | "version": 3 213 | }, 214 | "file_extension": ".py", 215 | "mimetype": "text/x-python", 216 | "name": "python", 217 | "nbconvert_exporter": "python", 218 | "pygments_lexer": "ipython3", 219 | "version": "3.8.12" 220 | }, 221 | "orig_nbformat": 4 222 | }, 223 | "nbformat": 4, 224 | "nbformat_minor": 2 225 | } 226 | -------------------------------------------------------------------------------- /notebooks/sagemaker-get-embeddings.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## How to create embeddings\n", 8 | "\n", 9 | "In this notebook, we will show you how to create embeddings for your own text data and and open source model from Hugging Face hosted as an endpoint on Amazon SageMaker. \n", 10 | "\n", 11 | "## 0. Setup\n", 12 | "\n", 13 | "Before you can use `easyllm` with Amazon SageMaker you need to deploy the model to a SageMaker endpoint. You can do this by following one of the bloh posts below:\n", 14 | "\n", 15 | "* [Creating document embeddings with Hugging Face's Transformers & Amazon SageMaker](https://www.philschmid.de/custom-inference-huggingface-sagemaker)\n", 16 | "\n", 17 | "Once you have your endpoint deploy copy the endpoint name of it. The endpoint name will be our `model` paramter. You can get the endpoint name in the AWS management console for Amazon SageMaker under \"Inference\" -> \"Endpoints\" -> \"Name\" or when you deployed your model using the sagemaker sdk you can get it from the `predictor.endpoint_name` attribute.\n" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "### 1. Import the easyllm library" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# if needed, install and/or upgrade to the latest version of the OpenAI Python library\n", 34 | "%pip install --upgrade easyllm " 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# import the EasyLLM Python library for calling the EasyLLM API\n", 44 | "import easyllm" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "### 2. An example chat API call\n", 52 | "\n", 53 | "A embedding API call has two required inputs:\n", 54 | "- `model`: the name of the model you want to use (e.g., `sentence-transformers/all-MiniLM-L6-v2`) or leave it empty to just call the api\n", 55 | "- `input`: a string or list of strings you want to embed\n", 56 | "\n", 57 | "Let's look at an example API calls to see how the chat format works in practice." 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "768" 69 | ] 70 | }, 71 | "execution_count": 3, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "import os \n", 78 | "# set env for prompt builder\n", 79 | "os.environ[\"HUGGINGFACE_PROMPT\"] = \"llama2\" # vicuna, wizardlm, stablebeluga, open_assistant\n", 80 | "os.environ[\"AWS_REGION\"] = \"us-east-1\" # change to your region\n", 81 | "# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"XXX\" # needed if not using boto3 session\n", 82 | "# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"XXX\" # needed if not using boto3 session\n", 83 | "\n", 84 | "from easyllm.clients import sagemaker\n", 85 | "\n", 86 | "embedding = sagemaker.Embedding.create(\n", 87 | " model=\"SageMakerModelEmbeddingEndpoint24E49D09-64prhjuiWUtE\",\n", 88 | " input=\"That's a nice car.\",\n", 89 | ")\n", 90 | "\n", 91 | "len(embedding[\"data\"][0][\"embedding\"])" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [] 100 | } 101 | ], 102 | "metadata": { 103 | "kernelspec": { 104 | "display_name": "Python 3.9.9 ('openai')", 105 | "language": "python", 106 | "name": "python3" 107 | }, 108 | "language_info": { 109 | "codemirror_mode": { 110 | "name": "ipython", 111 | "version": 3 112 | }, 113 | "file_extension": ".py", 114 | "mimetype": "text/x-python", 115 | "name": "python", 116 | "nbconvert_exporter": "python", 117 | "pygments_lexer": "ipython3", 118 | "version": "3.8.12" 119 | }, 120 | "orig_nbformat": 4, 121 | "vscode": { 122 | "interpreter": { 123 | "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" 124 | } 125 | } 126 | }, 127 | "nbformat": 4, 128 | "nbformat_minor": 2 129 | } 130 | -------------------------------------------------------------------------------- /notebooks/sagemaker-text-completion-api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# How to use Text (Instruction) Completion clients\n", 9 | "\n", 10 | "EasyLLM can be used as an abstract layer to replace `text-davinci-003` with open source models.\n", 11 | "\n", 12 | "You can change your own applications from the OpenAI API, by simply changing the client. \n", 13 | "\n", 14 | "Chat models take a series of messages as input, and return an AI-written message as output.\n", 15 | "\n", 16 | "This guide illustrates the chat format with a few example API calls.\n", 17 | "\n", 18 | "\n", 19 | "## 0. Setup\n", 20 | "\n", 21 | "Before you can use `easyllm` with Amazon SageMaker you need to deploy the model to a SageMaker endpoint. You can do this by following one of the bloh posts below:\n", 22 | "\n", 23 | "* [Deploy Llama 2 7B/13B/70B on Amazon SageMaker](https://www.philschmid.de/sagemaker-llama-llm)\n", 24 | "* [Deploy Falcon 7B & 40B on Amazon SageMaker](https://www.philschmid.de/sagemaker-falcon-llm)\n", 25 | "* [Introducing the Hugging Face LLM Inference Container for Amazon SageMaker](https://www.philschmid.de/sagemaker-huggingface-llm)\n", 26 | "\n", 27 | "Once you have your endpoint deploy copy the endpoint name of it. The endpoint name will be our `model` paramter. You can get the endpoint name in the AWS management console for Amazon SageMaker under \"Inference\" -> \"Endpoints\" -> \"Name\" or when you deployed your model using the sagemaker sdk you can get it from the `predictor.endpoint_name` attribute." 28 | ] 29 | }, 30 | { 31 | "attachments": {}, 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "### 1. Import the easyllm library" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "# if needed, install and/or upgrade to the latest version of the OpenAI Python library\n", 45 | "%pip install --upgrade easyllm " 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 6, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# import the EasyLLM Python library for calling the EasyLLM API\n", 55 | "import easyllm" 56 | ] 57 | }, 58 | { 59 | "attachments": {}, 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "### 2. An example chat API call\n", 64 | "\n", 65 | "A text API call has two required inputs:\n", 66 | "- `model`: the name of the model you want to use (e.g., `meta-llama/Llama-2-70b-chat-hf`) or leave it empty to just call the api\n", 67 | "- `prompt`: a text prompt that is sent to the model to generate the text\n", 68 | "\n", 69 | "Compared to OpenAI api is the `huggingface` module also exposing a `prompt_builder` and `stop_sequences` parameter you can use to customize the prompt and stop sequences. The EasyLLM package comes with build in popular methods for both of these parameters, e.g. `llama2_prompt_builder` and `llama2_stop_sequences`. \n", 70 | "\n", 71 | "Let's look at an example chat API calls to see how the chat format works in practice." 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 1, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "text/plain": [ 82 | "{'id': 'hf-dEMeXTUk3Y',\n", 83 | " 'object': 'text.completion',\n", 84 | " 'created': 1691508711,\n", 85 | " 'model': 'huggingface-pytorch-tgi-inference-2023-08-08-14-15-52-703',\n", 86 | " 'choices': [{'index': 0,\n", 87 | " 'text': \" The meaning of life is a question that has puzzled philosophers, theologians, scientists, and many other thinkers throughout history. Here are some possible answers:\\n1. Religious or spiritual beliefs: Many people believe that the meaning of life is to fulfill a divine or spiritual purpose, whether that be to follow a set of moral commandments, to achieve spiritual enlightenment, or to fulfill a specific mission or calling.\\n2. Personal fulfillment: Some people believe that the meaning of life is to find personal fulfillment and happiness. This can be achieved through pursuing one's passions, building meaningful relationships, and cultivating a sense of purpose and meaning in one's life.\\n3. Contribution to society: Many people believe that the meaning of life is to make a positive impact on the world and to contribute to the greater good. This can be achieved through various means, such as working to make the world a better place, helping others, or creating something of value.\\n4. Learning and growth: Some people believe that the meaning of life is to learn and grow as individuals, to expand one's knowledge and understanding of the world, and to develop one's skills\",\n", 88 | " 'finish_reason': 'length'}],\n", 89 | " 'usage': {'prompt_tokens': 11, 'completion_tokens': 256, 'total_tokens': 267}}" 90 | ] 91 | }, 92 | "execution_count": 1, 93 | "metadata": {}, 94 | "output_type": "execute_result" 95 | } 96 | ], 97 | "source": [ 98 | "import os \n", 99 | "# set env for prompt builder\n", 100 | "os.environ[\"HUGGINGFACE_PROMPT\"] = \"llama2\" # vicuna, wizardlm, stablebeluga, open_assistant\n", 101 | "os.environ[\"AWS_REGION\"] = \"us-east-1\" # change to your region\n", 102 | "# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"XXX\" # needed if not using boto3 session\n", 103 | "# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"XXX\" # needed if not using boto3 session\n", 104 | "\n", 105 | "from easyllm.clients import sagemaker\n", 106 | "\n", 107 | "# Changing configuration without using environment variables\n", 108 | "# sagemaker.prompt_builder = \"llama2\"\n", 109 | "# sagemaker.api_aws_access_key=\"xxx\"\n", 110 | "# sagemaker.api_aws_secret_key=\"xxx\"\n", 111 | "\n", 112 | "# SageMaker endpoint name\n", 113 | "MODEL=\"huggingface-pytorch-tgi-inference-2023-08-08-14-15-52-703\"\n", 114 | "\n", 115 | "response = sagemaker.Completion.create(\n", 116 | " model=MODEL,\n", 117 | " prompt=\"What is the meaning of life?\",\n", 118 | " temperature=0.9,\n", 119 | " top_p=0.6,\n", 120 | " max_tokens=256,\n", 121 | ")\n", 122 | "response" 123 | ] 124 | }, 125 | { 126 | "attachments": {}, 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "As you can see, the response object has a few fields:\n", 131 | "- `id`: the ID of the request\n", 132 | "- `object`: the type of object returned (e.g., `text.completion`)\n", 133 | "- `created`: the timestamp of the request\n", 134 | "- `model`: the full name of the model used to generate the response\n", 135 | "- `usage`: the number of tokens used to generate the replies, counting prompt, completion, and total\n", 136 | "- `choices`: a list of completion objects (only one, unless you set `n` greater than 1)\n", 137 | " - `text`: the generated text\n", 138 | " - `finish_reason`: the reason the model stopped generating text (either `stop`, `eos_token`, or `length` if `max_tokens` limit was reached)\n", 139 | " - `logprobs`: _optional_ the log probs of each generated token." 140 | ] 141 | }, 142 | { 143 | "attachments": {}, 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "Extract just the reply with:" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 2, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | " The meaning of life is a question that has puzzled philosophers, theologians, scientists, and many other thinkers throughout history. Here are some possible answers:\n", 160 | "1. Religious or spiritual beliefs: Many people believe that the meaning of life is to fulfill a divine or spiritual purpose, whether that be to follow a set of moral commandments, to achieve spiritual enlightenment, or to fulfill a specific mission or calling.\n", 161 | "2. Personal fulfillment: Some people believe that the meaning of life is to find personal fulfillment and happiness. This can be achieved through pursuing one's passions, building meaningful relationships, and cultivating a sense of purpose and meaning in one's life.\n", 162 | "3. Contribution to society: Many people believe that the meaning of life is to make a positive impact on the world and to contribute to the greater good. This can be achieved through various means, such as working to make the world a better place, helping others, or creating something of value.\n", 163 | "4. Learning and growth: Some people believe that the meaning of life is to learn and grow as individuals, to expand one's knowledge and understanding of the world, and to develop one's skills\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "print(response['choices'][0]['text'])" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "openai", 182 | "language": "python", 183 | "name": "python3" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 3 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython3", 195 | "version": "3.8.12" 196 | }, 197 | "orig_nbformat": 4, 198 | "vscode": { 199 | "interpreter": { 200 | "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" 201 | } 202 | } 203 | }, 204 | "nbformat": 4, 205 | "nbformat_minor": 2 206 | } 207 | -------------------------------------------------------------------------------- /notebooks/text-completion-api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# How to use Text (Instruction) Completion clients\n", 9 | "\n", 10 | "EasyLLM can be used as an abstract layer to replace `text-davinci-003` with open source models.\n", 11 | "\n", 12 | "You can change your own applications from the OpenAI API, by simply changing the client. \n", 13 | "\n", 14 | "Chat models take a series of messages as input, and return an AI-written message as output.\n", 15 | "\n", 16 | "This guide illustrates the chat format with a few example API calls." 17 | ] 18 | }, 19 | { 20 | "attachments": {}, 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "### 1. Import the easyllm library" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "# if needed, install and/or upgrade to the latest version of the OpenAI Python library\n", 34 | "%pip install --upgrade easyllm " 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 6, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "# import the EasyLLM Python library for calling the EasyLLM API\n", 44 | "import easyllm" 45 | ] 46 | }, 47 | { 48 | "attachments": {}, 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "### 2. An example chat API call\n", 53 | "\n", 54 | "A text API call has two required inputs:\n", 55 | "- `model`: the name of the model you want to use (e.g., `meta-llama/Llama-2-70b-chat-hf`) or leave it empty to just call the api\n", 56 | "- `prompt`: a text prompt that is sent to the model to generate the text\n", 57 | "\n", 58 | "Compared to OpenAI api is the `huggingface` module also exposing a `prompt_builder` and `stop_sequences` parameter you can use to customize the prompt and stop sequences. The EasyLLM package comes with build in popular methods for both of these parameters, e.g. `llama2_prompt_builder` and `llama2_stop_sequences`. \n", 59 | "\n", 60 | "Let's look at an example chat API calls to see how the chat format works in practice." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 1, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "{'id': 'hf-ZK--Ndk30h',\n", 72 | " 'object': 'text.completion',\n", 73 | " 'created': 1691129933,\n", 74 | " 'model': 'meta-llama/Llama-2-70b-chat-hf',\n", 75 | " 'choices': [{'index': 0,\n", 76 | " 'text': \" The meaning of life is a question that has puzzled philosophers, theologians, and scientists for centuries. There are many different perspectives on what constitutes the meaning of life, and there is no one definitive answer. However, some common themes that people often associate with the meaning of life include:\\n\\n1. Purpose: Having a sense of purpose or direction in life, whether it be through work, relationships, or personal goals.\\n2. Fulfillment: Feeling fulfilled and satisfied with one's experiences and achievements.\\n3. Happiness: Pursuing happiness and well-being, whether through personal relationships, material possessions, or personal growth.\\n4. Self-actualization: Realizing one's potential and living up to one's capabilities.\\n5. Legacy: Leaving a lasting impact or legacy, whether through contributions to society, artistic or cultural achievements, or impacting the lives of others.\\n6. Spirituality: Connecting with a higher power or a sense of something greater than oneself, and finding meaning and purpose through faith or spiritual practices.\\n7. Love: Finding and experiencing love, whether it be through romantic relationships, friendships, or family.\\n8. Personal growth: Continuously learning, growing, and improving oneself.\\n9. Community: Building and being a part of a community, whether it be through work, volunteering, or social connections.\\n10. Making a difference: Making a positive impact in the world and leaving it a better place than when you arrived.\\n\\nUltimately, the meaning of life is a deeply personal and subjective question, and what gives meaning and purpose to one person's life may be different for another. It's a question that each person must answer for themselves, and it may change throughout their life as they grow and evolve.\",\n", 77 | " 'finish_reason': 'eos_token'}],\n", 78 | " 'usage': {'prompt_tokens': 11, 'completion_tokens': 406, 'total_tokens': 417}}" 79 | ] 80 | }, 81 | "execution_count": 1, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "\n", 88 | "from easyllm.clients import huggingface\n", 89 | "\n", 90 | "# Example EasyLLM Python library request\n", 91 | "MODEL = \"meta-llama/Llama-2-70b-chat-hf\"\n", 92 | "huggingface.prompt_builder = \"llama2\"\n", 93 | "\n", 94 | "# The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.\n", 95 | "# huggingface.api_key=\"hf_xxx\"\n", 96 | "\n", 97 | "response = huggingface.Completion.create(\n", 98 | " model=MODEL,\n", 99 | " prompt=\"What is the meaning of life?\",\n", 100 | " temperature=0.9,\n", 101 | " top_p=0.6,\n", 102 | " max_tokens=1024,\n", 103 | ")\n", 104 | "response" 105 | ] 106 | }, 107 | { 108 | "attachments": {}, 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "As you can see, the response object has a few fields:\n", 113 | "- `id`: the ID of the request\n", 114 | "- `object`: the type of object returned (e.g., `text.completion`)\n", 115 | "- `created`: the timestamp of the request\n", 116 | "- `model`: the full name of the model used to generate the response\n", 117 | "- `usage`: the number of tokens used to generate the replies, counting prompt, completion, and total\n", 118 | "- `choices`: a list of completion objects (only one, unless you set `n` greater than 1)\n", 119 | " - `text`: the generated text\n", 120 | " - `finish_reason`: the reason the model stopped generating text (either `stop`, `eos_token`, or `length` if `max_tokens` limit was reached)\n", 121 | " - `logprobs`: _optional_ the log probs of each generated token." 122 | ] 123 | }, 124 | { 125 | "attachments": {}, 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "Extract just the reply with:" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 2, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | " The meaning of life is a question that has puzzled philosophers, theologians, and scientists for centuries. There are many different perspectives on what constitutes the meaning of life, and there is no one definitive answer. However, some common themes that people often associate with the meaning of life include:\n", 142 | "\n", 143 | "1. Purpose: Having a sense of purpose or direction in life, whether it be through work, relationships, or personal goals.\n", 144 | "2. Fulfillment: Feeling fulfilled and satisfied with one's experiences and achievements.\n", 145 | "3. Happiness: Pursuing happiness and well-being, whether through personal relationships, material possessions, or personal growth.\n", 146 | "4. Self-actualization: Realizing one's potential and living up to one's capabilities.\n", 147 | "5. Legacy: Leaving a lasting impact or legacy, whether through contributions to society, artistic or cultural achievements, or impacting the lives of others.\n", 148 | "6. Spirituality: Connecting with a higher power or a sense of something greater than oneself, and finding meaning and purpose through faith or spiritual practices.\n", 149 | "7. Love: Finding and experiencing love, whether it be through romantic relationships, friendships, or family.\n", 150 | "8. Personal growth: Continuously learning, growing, and improving oneself.\n", 151 | "9. Community: Building and being a part of a community, whether it be through work, volunteering, or social connections.\n", 152 | "10. Making a difference: Making a positive impact in the world and leaving it a better place than when you arrived.\n", 153 | "\n", 154 | "Ultimately, the meaning of life is a deeply personal and subjective question, and what gives meaning and purpose to one person's life may be different for another. It's a question that each person must answer for themselves, and it may change throughout their life as they grow and evolve.\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "print(response['choices'][0]['text'])" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [] 168 | } 169 | ], 170 | "metadata": { 171 | "kernelspec": { 172 | "display_name": "openai", 173 | "language": "python", 174 | "name": "python3" 175 | }, 176 | "language_info": { 177 | "codemirror_mode": { 178 | "name": "ipython", 179 | "version": 3 180 | }, 181 | "file_extension": ".py", 182 | "mimetype": "text/x-python", 183 | "name": "python", 184 | "nbconvert_exporter": "python", 185 | "pygments_lexer": "ipython3", 186 | "version": "3.8.12" 187 | }, 188 | "orig_nbformat": 4, 189 | "vscode": { 190 | "interpreter": { 191 | "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97" 192 | } 193 | } 194 | }, 195 | "nbformat": 4, 196 | "nbformat_minor": 2 197 | } 198 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project.urls] 6 | Documentation = "https://github.com/unknown/hatch-demo#readme" 7 | Issues = "https://github.com/unknown/hatch-demo/issues" 8 | Source = "https://github.com/unknown/hatch-demo" 9 | 10 | [tool.hatch.version] 11 | path = "easyllm/__init__.py" 12 | 13 | [tool.hatch.metadata] 14 | allow-direct-references = true 15 | 16 | [project] 17 | name = "easyllm" 18 | description = "Description" 19 | readme = "README.md" 20 | requires-python = ">=3.8" 21 | license = "MIT" 22 | authors = [{ name = "Philipp Schmid", email = "schmidphilipp1995@gmail.com" }] 23 | classifiers = [ 24 | "Topic :: Internet", 25 | "Topic :: Software Development :: Libraries :: Application Frameworks", 26 | "Topic :: Software Development :: Libraries :: Python Modules", 27 | "Topic :: Software Development :: Libraries", 28 | "Topic :: Software Development", 29 | "Intended Audience :: Developers", 30 | "License :: OSI Approved :: MIT License", 31 | "Programming Language :: Python :: 3 :: Only", 32 | "Programming Language :: Python :: 3.8", 33 | "Programming Language :: Python :: 3.9", 34 | "Programming Language :: Python :: 3.9", 35 | "Programming Language :: Python :: 3.10", 36 | ] 37 | dynamic = ["version"] 38 | scripts = { easyllm = "easyllm.cli:main" } 39 | dependencies = ["pydantic==2.1.1", "nanoid==2.0.0", "huggingface-hub==0.16.4"] 40 | 41 | [project.optional-dependencies] 42 | data = ["datasets","kenlm @ https://github.com/kpu/kenlm/archive/master.zip","sentencepiece","readability-lxml","inscriptis"] 43 | test = ["pytest", "ruff", "black", "isort", "mypy", "hatch"] 44 | bedrock = ["boto3"] 45 | dev = ["ruff", "black", "isort", "mypy", "hatch"] 46 | docs = [ 47 | "mkdocs", 48 | "mkdocs-material", 49 | "mkdocstrings", 50 | "mkdocstrings-python", 51 | "mkdocs-autorefs", 52 | "mkdocs-jupyter", 53 | ] 54 | 55 | [tool.isort] 56 | profile = "black" 57 | known_third_party = ["fastapi", "pydantic", "starlette"] 58 | 59 | # [tool.coverage.run] 60 | # parallel = true 61 | # source = [ 62 | # "docs_src", 63 | # "tests", 64 | # "fastapi" 65 | # ] 66 | # context = '${CONTEXT}' 67 | # omit = [ 68 | # "docs_src/response_model/tutorial003_04.py", 69 | # "docs_src/response_model/tutorial003_04_py310.py", 70 | # ] 71 | 72 | [tool.ruff] 73 | select = [ 74 | "E", # pycodestyle errors 75 | "W", # pycodestyle warnings 76 | "F", # pyflakes 77 | "I", # isort 78 | "C", # flake8-comprehensions 79 | "B", # flake8-bugbear 80 | ] 81 | ignore = [ 82 | "E501", # line too long, handled by black 83 | "B008", # do not perform function calls in argument defaults 84 | "C901", # too complex 85 | ] 86 | # Same as Black. 87 | line-length = 119 88 | 89 | # Allow unused variables when underscore-prefixed. 90 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 91 | 92 | # Assume Python 3.9. 93 | target-version = "py38" 94 | 95 | [tool.ruff.per-file-ignores] 96 | "__init__.py" = ["F401"] 97 | 98 | # [tool.ruff.isort] 99 | # known-third-party = ["fastapi", "pydantic", "starlette"] 100 | 101 | [tool.pytest.ini_options] 102 | addopts = "-ra" 103 | testpaths = ["tests"] 104 | pythonpath = ['.'] 105 | -------------------------------------------------------------------------------- /scripts/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/philschmid/easyllm/1f37a9307d68545f41639f392a301baadda7188a/scripts/.gitkeep -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2023-present philschmid 2 | # 3 | # SPDX-License-Identifier: MIT 4 | -------------------------------------------------------------------------------- /tests/prompt_utils/test_chatml_hf.py: -------------------------------------------------------------------------------- 1 | # test_build_chatml_hf_prompt.py 2 | 3 | import pytest 4 | 5 | from easyllm.prompt_utils.chatml_hf import build_chatml_hf_prompt 6 | 7 | 8 | def test_build_chatml_hf_prompt_single_message(): 9 | message = "Hello!" 10 | expected_output = f"<|system|>\n<|end|>\n<|user|>\n{message}<|end|>\n<|assistant|>" 11 | result = build_chatml_hf_prompt(message) 12 | assert result == expected_output 13 | 14 | 15 | def test_build_chatml_hf_prompt_multiple_messages(): 16 | messages = [ 17 | {"content":"You are a chat bot.", "role":"system"}, 18 | {"content":"Hello!", "role": "user"}, 19 | ] 20 | expected_output = "<|system|>\nYou are a chat bot.<|end|>\n<|user|>\nHello!<|end|>\n<|assistant|>" 21 | result = build_chatml_hf_prompt(messages) 22 | assert result == expected_output 23 | 24 | 25 | def test_build_chatml_hf_prompt_function_call(): 26 | messages = [ 27 | {"content":"You are a chat bot.", "role":"system"}, 28 | {"content":"some_function()", "role": "function"}, 29 | ] 30 | with pytest.raises(ValueError, match="HF ChatML does not support function calls."): 31 | build_chatml_hf_prompt(messages) 32 | -------------------------------------------------------------------------------- /tests/prompt_utils/test_llama2.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from easyllm.prompt_utils.llama2 import build_llama2_prompt 4 | 5 | 6 | def test_build_llama2_prompt_single_message(): 7 | message = "Hello!" 8 | expected_output = f"[INST] {message} [/INST]" 9 | result = build_llama2_prompt(message) 10 | assert result == expected_output 11 | 12 | 13 | def test_build_llama2_prompt_multiple_messages(): 14 | messages = [ 15 | {"content":"You are a chat bot.", "role":"system"}, 16 | {"content":"Hello!", "role": "user"}, 17 | ] 18 | expected_output = "[INST] <>\nYou are a chat bot.\n<>\n\nHello! [/INST]" 19 | result = build_llama2_prompt(messages) 20 | print(f"RESULT: {result}") 21 | assert result == expected_output 22 | 23 | 24 | def test_build_llama2_prompt_function_call(): 25 | messages = [ 26 | {"content":"You are a chat bot.", "role":"system"}, 27 | {"content":"some_function()", "role": "function"}, 28 | ] 29 | with pytest.raises(ValueError, match="Llama 2 does not support function calls."): 30 | build_llama2_prompt(messages) 31 | -------------------------------------------------------------------------------- /tests/prompt_utils/test_open_assistant.py: -------------------------------------------------------------------------------- 1 | # test_build_open_assistant_prompt.py 2 | 3 | import pytest 4 | 5 | from easyllm.prompt_utils.open_assistant import build_open_assistant_prompt 6 | 7 | 8 | def test_build_open_assistant_prompt_single_message(): 9 | message = "Hello!" 10 | expected_output = f"<|system|><|prompter|>{message}<|assistant|>" 11 | result = build_open_assistant_prompt(message) 12 | assert result == expected_output 13 | 14 | 15 | def test_build_open_assistant_prompt_multiple_messages(): 16 | messages = [ 17 | {"content":"You are a chat bot.", "role":"system"}, 18 | {"content":"Hello!", "role": "user"}, 19 | ] 20 | expected_output = "<|system|>You are a chat bot.<|prompter|>Hello!<|assistant|>" 21 | result = build_open_assistant_prompt(messages) 22 | assert result == expected_output 23 | 24 | 25 | def test_build_open_assistant_prompt_function_call(): 26 | messages = [ 27 | {"content":"You are a chat bot.", "role":"system"}, 28 | {"content":"some_function()", "role": "function"}, 29 | ] 30 | with pytest.raises(ValueError, match="Open Assistant does not support function calls."): 31 | build_open_assistant_prompt(messages) 32 | -------------------------------------------------------------------------------- /tests/prompt_utils/test_stablebeluga.py: -------------------------------------------------------------------------------- 1 | # test_build_stablebeluga_prompt.py 2 | 3 | import pytest 4 | 5 | from easyllm.prompt_utils.stablebeluga import build_stablebeluga_prompt 6 | 7 | 8 | def test_build_stablebeluga_prompt_single_message(): 9 | message = "Hello!" 10 | expected_output = f"### System:\n\n\n### User:\n{message}\n\n### Assistant:" 11 | result = build_stablebeluga_prompt(message) 12 | assert result == expected_output 13 | 14 | 15 | def test_build_stablebeluga_prompt_multiple_messages(): 16 | messages = [ 17 | {"content":"You are a chat bot.", "role":"system"}, 18 | {"content":"Hello!", "role": "user"}, 19 | ] 20 | expected_output = "### System:\nYou are a chat bot.\n\n### User:\nHello!\n\n### Assistant:" 21 | result = build_stablebeluga_prompt(messages) 22 | assert result == expected_output 23 | 24 | 25 | def test_build_stablebeluga_prompt_function_call(): 26 | messages = [ 27 | {"content":"You are a chat bot.", "role":"system"}, 28 | {"content":"some_function()", "role": "function"}, 29 | ] 30 | with pytest.raises(ValueError, match="stablebeluga does not support function calls."): 31 | build_stablebeluga_prompt(messages) 32 | -------------------------------------------------------------------------------- /tests/prompt_utils/test_vicuna.py: -------------------------------------------------------------------------------- 1 | # test_build_vicuna_prompt.py 2 | 3 | import pytest 4 | 5 | from easyllm.prompt_utils.vicuna import build_vicuna_prompt 6 | 7 | 8 | def test_build_vicuna_prompt_single_message(): 9 | message = "Hello!" 10 | expected_output = f"\n\nUSER: {message}\nASSISTANT: " 11 | result = build_vicuna_prompt(message) 12 | assert result == expected_output 13 | 14 | 15 | def test_build_vicuna_prompt_multiple_messages(): 16 | messages = [ 17 | {"content":"You are a chat bot.", "role":"system"}, 18 | {"content":"Hello!", "role": "user"}, 19 | ] 20 | expected_output = "You are a chat bot.\n\nUSER: Hello!\nASSISTANT: " 21 | result = build_vicuna_prompt(messages) 22 | assert result == expected_output 23 | 24 | 25 | def test_build_vicuna_prompt_function_call(): 26 | messages = [ 27 | {"content":"You are a chat bot.", "role":"system"}, 28 | {"content":"some_function()", "role": "function"}, 29 | ] 30 | with pytest.raises(ValueError, match="Vicuna does not support function calls."): 31 | build_vicuna_prompt(messages) 32 | -------------------------------------------------------------------------------- /tests/prompt_utils/test_wizardlm.py: -------------------------------------------------------------------------------- 1 | # test_build_wizardlm_prompt.py 2 | 3 | import pytest 4 | 5 | from easyllm.prompt_utils.wizardlm import build_wizardlm_prompt 6 | 7 | 8 | def test_build_wizardlm_prompt_single_message(): 9 | message = "Hello!" 10 | expected_output = f"USER: {message} ASSISTANT: " 11 | result = build_wizardlm_prompt(message) 12 | assert result == expected_output 13 | 14 | 15 | def test_build_wizardlm_prompt_multiple_messages(): 16 | messages = [ 17 | {"content":"You are a chat bot.", "role":"system"}, 18 | {"content":"Hello!", "role": "user"}, 19 | ] 20 | expected_output = "You are a chat bot. USER: Hello! ASSISTANT: " 21 | result = build_wizardlm_prompt(messages) 22 | assert result == expected_output 23 | 24 | 25 | def test_build_wizardlm_prompt_function_call(): 26 | messages = [ 27 | {"content":"You are a chat bot.", "role":"system"}, 28 | {"content":"some_function()", "role": "function"}, 29 | ] 30 | with pytest.raises(ValueError, match="WizardLM does not support function calls."): 31 | build_wizardlm_prompt(messages) 32 | -------------------------------------------------------------------------------- /tests/schema/test_base.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import pytest 4 | 5 | from easyllm.schema.base import ChatMessage, Usage 6 | 7 | 8 | def test_chat_message() -> None: 9 | """Test that the ChatMessage schema works as expected.""" 10 | text = "Never gonna give you up. Never gonna let you down." 11 | role = "user" 12 | 13 | message = ChatMessage(content=text, role=role) 14 | 15 | assert message.content == text 16 | assert isinstance(message.content, type(text)) 17 | assert message.role == role 18 | assert isinstance(message.role, type(role)) 19 | 20 | 21 | @pytest.mark.parametrize( 22 | "role", ["user", "assistant", "function", "system"] 23 | ) 24 | def test_all_valid_roles(role: str) -> None: 25 | """Test that all valid roles are accepted.""" 26 | message = ChatMessage(content="Hello!", role=role) 27 | assert message.role == role 28 | assert isinstance(message.role, type(role)) 29 | 30 | 31 | @pytest.mark.parametrize( 32 | "role", 33 | ["fellow", "bro", "", 1, 1.0, ["user", "assistant"], {"user": "John Doe"}], 34 | ) 35 | def test_all_invalid_roles(role: Any) -> None: 36 | """Test that all invalid roles are rejected.""" 37 | with pytest.raises(ValueError): 38 | ChatMessage(content="Hello!", role=role) 39 | 40 | 41 | @pytest.mark.parametrize( 42 | "message", 43 | [1234, ["Hello!"], {"content": "Hello!", "role": "user"}], 44 | ) 45 | def test_all_invalid_messages(message: Any) -> None: 46 | """Test that all invalid messages are rejected.""" 47 | with pytest.raises(ValueError): 48 | ChatMessage(content=message, role="user") 49 | 50 | 51 | def test_usage() -> None: 52 | """Test that the Usage schema works as expected.""" 53 | prompt_tokens = 10 54 | completion_tokens = 20 55 | total_tokens = 30 56 | 57 | usage = Usage( 58 | prompt_tokens=prompt_tokens, 59 | completion_tokens=completion_tokens, 60 | total_tokens=total_tokens, 61 | ) 62 | 63 | assert usage.prompt_tokens == prompt_tokens 64 | assert isinstance(usage.prompt_tokens, type(prompt_tokens)) 65 | assert usage.completion_tokens == completion_tokens 66 | assert isinstance(usage.completion_tokens, type(completion_tokens)) 67 | assert usage.total_tokens == total_tokens 68 | assert isinstance(usage.total_tokens, type(total_tokens)) 69 | 70 | 71 | @pytest.mark.parametrize( 72 | ["prompt_tokens", "completion_tokens", "total_tokens"], 73 | [ 74 | ("abc", 10.0, 10), 75 | (15, "def", 15.0), 76 | (20, 20.0, "ghi"), 77 | ], 78 | ) 79 | def test_invalid_usage(prompt_tokens, completion_tokens, total_tokens) -> None: 80 | """Test that invalid Usage inputs are rejected.""" 81 | with pytest.raises(ValueError): 82 | Usage( 83 | prompt_tokens=prompt_tokens, 84 | completion_tokens=completion_tokens, 85 | total_tokens=total_tokens, 86 | ) 87 | 88 | 89 | @pytest.mark.parametrize( 90 | ["prompt_tokens", "completion_tokens", "total_tokens"], 91 | [ 92 | (10, 10, 10), 93 | ("10", 10, 10), 94 | (10, "10", 10), 95 | (10, 10, "10"), 96 | ], 97 | ) 98 | def test_str_to_int_for_usage( 99 | prompt_tokens, completion_tokens, total_tokens 100 | ) -> None: 101 | """Test that str inputs are converted to int.""" 102 | usage = Usage( 103 | prompt_tokens=prompt_tokens, 104 | completion_tokens=completion_tokens, 105 | total_tokens=total_tokens, 106 | ) 107 | 108 | assert usage.prompt_tokens == 10 109 | assert isinstance(usage.prompt_tokens, type(10)) 110 | assert usage.completion_tokens == 10 111 | assert isinstance(usage.completion_tokens, type(10)) 112 | assert usage.total_tokens == 10 113 | assert isinstance(usage.total_tokens, type(10)) 114 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | 2 | # dummy pytest for ci 3 | def test_dummy(): 4 | assert True 5 | --------------------------------------------------------------------------------