├── .github
    └── workflows
    │   ├── build-documentation.yaml
    │   ├── check.yaml
    │   ├── deploy-documentation.yaml
    │   ├── publish.yaml
    │   └── test.yaml
├── .gitignore
├── LICENSE
├── README.md
├── docs
    ├── clients
    │   ├── bedrock.md
    │   ├── huggingface.md
    │   ├── index.md
    │   └── sagemaker.md
    ├── examples
    │   ├── .gitkeep
    │   └── index.md
    ├── index.md
    ├── installation.md
    └── prompt_utils.md
├── easyllm
    ├── __init__.py
    ├── cli.py
    ├── clients
    │   ├── __init__.py
    │   ├── bedrock.py
    │   ├── huggingface.py
    │   └── sagemaker.py
    ├── data
    │   ├── __init__.py
    │   ├── extractor
    │   │   ├── __init__.py
    │   │   └── html_extractor.py
    │   └── filters
    │   │   ├── __init__.py
    │   │   ├── bulletpoint_ratio.py
    │   │   ├── common_word.py
    │   │   ├── cookie_banner.py
    │   │   ├── digit_to_character.py
    │   │   ├── kenlm_ppl.py
    │   │   ├── length.py
    │   │   ├── longword.py
    │   │   ├── n_gram.py
    │   │   ├── non_alpha_numeric.py
    │   │   ├── parantheses_ration.py
    │   │   ├── punctuation.py
    │   │   ├── repeating.py
    │   │   ├── url_ratio.py
    │   │   ├── whitespace_ration.py
    │   │   └── words_to_symbol.py
    ├── evol_instruct
    │   └── __init__.py
    ├── prompt_utils
    │   ├── __init__.py
    │   ├── anthropic.py
    │   ├── base.py
    │   ├── chatml_hf.py
    │   ├── falcon.py
    │   ├── llama2.py
    │   ├── open_assistant.py
    │   ├── stablebeluga.py
    │   ├── vicuna.py
    │   └── wizardlm.py
    ├── schema
    │   ├── base.py
    │   └── openai.py
    └── utils
    │   ├── __init__.py
    │   ├── aws.py
    │   └── logging.py
├── makefile
├── mkdocs.yml
├── notebooks
    ├── bedrock-chat-completion-api.ipynb
    ├── bedrock-stream-chat-completions.ipynb
    ├── chat-completion-api.ipynb
    ├── data-filter.ipynb
    ├── datasets
    │   └── filter-dataset.ipynb
    ├── falcon-180b-chat.ipynb
    ├── get-embeddings.ipynb
    ├── inference-endpoints-example.ipynb
    ├── llama2-agent-example.ipynb
    ├── llama2-rag-example.ipynb
    ├── sagemaker-chat-completion-api.ipynb
    ├── sagemaker-get-embeddings.ipynb
    ├── sagemaker-text-completion-api.ipynb
    ├── stream-chat-completions.ipynb
    ├── stream-text-completions.ipynb
    └── text-completion-api.ipynb
├── pyproject.toml
├── scripts
    └── .gitkeep
└── tests
    ├── __init__.py
    ├── prompt_utils
        ├── test_chatml_hf.py
        ├── test_llama2.py
        ├── test_open_assistant.py
        ├── test_stablebeluga.py
        ├── test_vicuna.py
        └── test_wizardlm.py
    ├── schema
        └── test_base.py
    └── test_main.py


/.github/workflows/build-documentation.yaml:
--------------------------------------------------------------------------------
 1 | name: build documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     pull_request:
 6 |       - main  
 7 |     paths:
 8 |       - 'docs/**'
 9 |       - 'notebooks/**'      
10 |   workflow_dispatch:
11 | 
12 | concurrency:
13 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
14 |   cancel-in-progress: true
15 | 
16 | permissions:
17 |   contents: write
18 | 
19 | jobs:
20 |   documentation:
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |       - uses: actions/checkout@v3
24 |       - name: Set up Python
25 |         uses: actions/setup-python@v4
26 |         with:
27 |           python-version: "3.9"
28 |           cache: "pip"
29 |           cache-dependency-path: pyproject.toml
30 |       - uses: actions/cache@v3
31 |         id: cache
32 |         with:
33 |           path: ${{ env.pythonLocation }}
34 |           key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-docs
35 |       - name: Install dependencies
36 |         if: steps.cache.outputs.cache-hit != 'true'
37 |         run: pip install ".[docs]"
38 |       - name: build documentation
39 |         run: make docs-build
40 | 


--------------------------------------------------------------------------------
/.github/workflows/check.yaml:
--------------------------------------------------------------------------------
 1 | name: Quality Check
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |       paths:
 9 |       - 'easyllm/**'
10 |       - 'tests/**'
11 |   workflow_dispatch:
12 | 
13 | concurrency:
14 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
15 |   cancel-in-progress: true
16 | 
17 | jobs:
18 |   quality:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - uses: actions/checkout@v3
22 |       - name: Set up Python
23 |         uses: actions/setup-python@v4
24 |         with:
25 |           python-version: "3.9"
26 |           cache: "pip"
27 |           cache-dependency-path: pyproject.toml
28 |       - uses: actions/cache@v3
29 |         id: cache
30 |         with:
31 |           path: ${{ env.pythonLocation }}
32 |           key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-check
33 |       - name: Install build dependencies
34 |         if: steps.cache.outputs.cache-hit != 'true'
35 |         run: pip install ".[dev]"
36 |       - name: Run library checks
37 |         run: make check
38 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-documentation.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - 'docs/**'
 9 |       - 'notebooks/**'
10 |   release:
11 |     types:
12 |       - created
13 |   workflow_dispatch:
14 | 
15 | concurrency:
16 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
17 |   cancel-in-progress: true
18 | 
19 | permissions:
20 |   contents: write
21 | 
22 | jobs:
23 |   documentation:
24 |     runs-on: ubuntu-latest
25 |     steps:
26 |       - uses: actions/checkout@v3
27 |       - name: Set up Python
28 |         uses: actions/setup-python@v4
29 |         with:
30 |           python-version: "3.9"
31 |           cache: "pip"
32 |           cache-dependency-path: pyproject.toml
33 |       - uses: actions/cache@v3
34 |         id: cache
35 |         with:
36 |           path: ${{ env.pythonLocation }}
37 |           key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-docs
38 |       - name: Install dependencies
39 |         if: steps.cache.outputs.cache-hit != 'true'
40 |         run: pip install ".[docs]"
41 |       - name: publish documentation
42 |         run: make docs-deploy


--------------------------------------------------------------------------------
/.github/workflows/publish.yaml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   release:
 6 |     types:
 7 |       - created
 8 | 
 9 | jobs:
10 |   publish:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v3
14 |       - name: Set up Python
15 |         uses: actions/setup-python@v4
16 |         with:
17 |           python-version: "3.9"
18 |           cache: "pip"
19 |           cache-dependency-path: pyproject.toml
20 |       - uses: actions/cache@v3
21 |         id: cache
22 |         with:
23 |           path: ${{ env.pythonLocation }}
24 |           key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-publish
25 |       - name: Install build dependencies
26 |         if: steps.cache.outputs.cache-hit != 'true'
27 |         run: pip install build
28 |       - name: Build distribution
29 |         run: python -m build
30 |       - name: Publish
31 |         uses: pypa/gh-action-pypi-publish@v1.6.4
32 |         with:
33 |           password: ${{ secrets.PYPI_API_TOKEN }}
34 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
 1 | name: Unit Tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     paths:
 9 |       - 'easyllm/**'
10 |       - 'tests/**'
11 |   workflow_dispatch:
12 | 
13 | concurrency:
14 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
15 |   cancel-in-progress: true
16 | 
17 | jobs:
18 |   quality:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - uses: actions/checkout@v3
22 |       - name: Set up Python
23 |         uses: actions/setup-python@v4
24 |         with:
25 |           python-version: "3.9"
26 |           cache: "pip"
27 |           cache-dependency-path: pyproject.toml
28 |       - uses: actions/cache@v3
29 |         id: cache
30 |         with:
31 |           path: ${{ env.pythonLocation }}
32 |           key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml') }}-test
33 |       - name: Install build dependencies
34 |         if: steps.cache.outputs.cache-hit != 'true'
35 |         run: pip install ".[test]"
36 |       - name: Run tests
37 |         run: pytest
38 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | .ruff_cache
131 | docs/examples/*.ipynb
132 | .vscode


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Philipp Schmid
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">EasyLLM - </h1>
  2 | 
  3 | <div align="center">
  4 | 	<a  href="https://pypi.org/project/easyllm" target="_blank">
  5 | 		<img src="https://img.shields.io/pypi/v/easyllm.svg" />
  6 | 	</a>
  7 | 	<a  href="https://pypi.org/project/easyllm" target="_blank">
  8 | 		<img src="https://img.shields.io/pypi/pyversions/easyllm" />
  9 | 	</a>
 10 | 	<a  href="https://github.com/philschmid/easyllm/blob/main/LICENSE" target="_blank">
 11 | 		<img src="https://img.shields.io/pypi/l/easyllm" />
 12 | 	</a>
 13 | 	<a  href="https://github.com/philschmid/easyllm/actions?workflow=Unit Tests" target="_blank">
 14 | 		<img src="https://github.com/philschmid/easyllm/workflows/Unit Tests/badge.svg" />
 15 | 	</a>
 16 |   <a  href="https://github.com/pypa/hatch" target="_blank">
 17 | 		<img src="https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg" />
 18 | 	</a>
 19 | </div>
 20 | 
 21 | 
 22 | **EasyLLM** is an open source project that provides **helpful tools and methods for working with large language models** (LLMs), both open source and closed source. Get immediataly started or check out the [documentation](https://philschmid.github.io/easyllm/).
 23 | 
 24 | EasyLLM implements clients that are **compatible with OpenAI's Completion API**. This means you can easily replace `openai.ChatCompletion`, `openai.Completion`, `openai.Embedding` with, for example, `huggingface.ChatCompletion`, `huggingface.Completion` or `huggingface.Embedding` by changing one line of code.
 25 | 
 26 | ### Supported Clients 
 27 | 
 28 | * `huggingface` - [HuggingFace](https://huggingface.co/) models
 29 |   * `huggingface.ChatCompletion` - Chat with LLMs
 30 |   * `huggingface.Completion` - Text completion with LLMs
 31 |   * `huggingface.Embedding` - Create embeddings with LLMs
 32 | * `sagemaker` - Open LLMs deployed on Amazon SageMaker
 33 |   * `sagemaker.ChatCompletion` - Chat with LLMs
 34 |   * `sagemaker.Completion` - Text completion with LLMs
 35 |   * `sagemaker.Embedding` - Create embeddings with LLMs
 36 | * `bedrock` - Amazon Bedrock LLMs
 37 | 
 38 | 
 39 | Check out the [Examples](./examples) to get started.
 40 | 
 41 | ## 🚀 Getting Started
 42 | 
 43 | Install EasyLLM via pip:
 44 | 
 45 | ```bash
 46 | pip install easyllm
 47 | ```
 48 | 
 49 | Then import and start using the clients:
 50 | 
 51 | ```python
 52 | 
 53 | from easyllm.clients import huggingface
 54 | 
 55 | # helper to build llama2 prompt
 56 | huggingface.prompt_builder = "llama2"
 57 | 
 58 | response = huggingface.ChatCompletion.create(
 59 |     model="meta-llama/Llama-2-70b-chat-hf",
 60 |     messages=[
 61 |         {"role": "system", "content": "\nYou are a helpful assistant speaking like a pirate. argh!"},
 62 |         {"role": "user", "content": "What is the sun?"},
 63 |     ],
 64 |     temperature=0.9,
 65 |     top_p=0.6,
 66 |     max_tokens=256,
 67 | )
 68 | 
 69 | print(response)
 70 | ```
 71 | the result will look like 
 72 | 
 73 | ```bash
 74 | {
 75 |   "id": "hf-lVC2iTMkFJ",
 76 |   "object": "chat.completion",
 77 |   "created": 1690661144,
 78 |   "model": "meta-llama/Llama-2-70b-chat-hf",
 79 |   "choices": [
 80 |     {
 81 |       "index": 0,
 82 |       "message": {
 83 |         "role": "assistant",
 84 |         "content": " Arrrr, the sun be a big ol' ball o' fire in the sky, me hearty! It be the source o' light and warmth for our fair planet, and it be a mighty powerful force, savvy? Without the sun, we'd be sailin' through the darkness, lost and cold, so let's give a hearty \"Yarrr!\" for the sun, me hearties! Arrrr!"
 85 |       },
 86 |       "finish_reason": null
 87 |     }
 88 |   ],
 89 |   "usage": {
 90 |     "prompt_tokens": 111,
 91 |     "completion_tokens": 299,
 92 |     "total_tokens": 410
 93 |   }
 94 | }
 95 | ```
 96 | 
 97 | Check out other examples:
 98 | * [Detailed ChatCompletion Example](notebooks/chat-completion-api.ipynb)
 99 | * [Example how to stream chat requests](notebooks/stream-chat-completions.ipynb)
100 | * [Example how to stream text requests](notebooks/stream-text-completions.ipynb)
101 | * [Detailed Completion Example](notebooks/text-completion-api.ipynb)
102 | * [Create Embeddings](notebooks/get-embeddings)
103 | 
104 | See the [documentation](https://philschmid.github.io/easyllm/) for more detailed usage and examples.
105 | 
106 | ## 💪🏻 Migration from OpenAI to HuggingFace
107 | 
108 | Migrating from OpenAI to HuggingFace is easy. Just change the import statement and the client you want to use and optionally the prompt builder.
109 | 
110 | ```diff
111 | - import openai
112 | + from easyllm.clients import huggingface
113 | + huggingface.prompt_builder = "llama2"
114 | 
115 | 
116 | - response = openai.ChatCompletion.create(
117 | + response = huggingface.ChatCompletion.create(
118 | -    model="gpt-3.5-turbo",
119 | +    model="meta-llama/Llama-2-70b-chat-hf",
120 |     messages=[
121 |         {"role": "system", "content": "You are a helpful assistant."},
122 |         {"role": "user", "content": "Knock knock."},
123 |     ],
124 | )
125 | ```
126 | 
127 | Make sure when you switch your client that your hyperparameters are still valid. For example, `temperature` of GPT-3 might be different than `temperature` of `Llama-2`.
128 | 
129 | ## ☑️ Key Features
130 | 
131 | ### 🤝 Compatible Clients
132 | 
133 | - Implementation of clients compatible with OpenAI API format of `openai.ChatCompletion`, `openai.Completion`, `openai.Embedding`.
134 | - Easily switch between different LLMs like `openai.ChatCompletion` and `huggingface.ChatCompletion` by changing one line of code. 
135 | - Support for streaming of completions, checkout example [How to stream completions](./notebooks/stream-chat-completions.ipynb).
136 | 
137 | ### ⚙️ Helper Modules ⚙️
138 | 
139 | - `evol_instruct` (work in progress) - Use evolutionary algorithms create instructions for LLMs.
140 | 
141 | - `prompt_utils` - Helper methods to easily convert between prompt formats like OpenAI Messages to prompts for open source models like Llama 2.
142 | 
143 | ## 🙏 Contributing
144 | 
145 | EasyLLM is an open source project and welcomes contributions of all kinds.
146 | 
147 | The project uses [hatch](https://hatch.pypa.io/latest/) for development. To get started, fork the repository and clone 
148 | it to your local machine.
149 | 
150 | 0. Confirm [hatch](https://hatch.pypa.io/latest/install/) is installed (pipx is great to make it available globally on your machine)
151 | 1. Once in the project directory, run `hatch env create` to create a default virtual environment for development.
152 | 2. Activate the virtual environment with `hatch shell`
153 | 3. Start developing! 🤩
154 | 
155 | ## 📔 Citation & Acknowledgements
156 | 
157 | If you use EasyLLM, please share it with me on social media or email. I would love to hear about it!
158 | You can also cite the project using the following BibTeX:
159 | 
160 | ```bash
161 | @software{Philipp_Schmid_EasyLLM_2023,
162 | author = {Philipp Schmid},
163 | license = {Apache-2.0},
164 | month = juj,
165 | title = {EasyLLM: Streamlined Tools for LLMs},
166 | url = {https://github.com/philschmid/easyllm},
167 | year = {2023}
168 | }
169 | ```
170 | 


--------------------------------------------------------------------------------
/docs/clients/bedrock.md:
--------------------------------------------------------------------------------
 1 | # Amazon Bedrock
 2 | 
 3 | EasyLLM provides a client for interfacing with Amazon Bedrock models. 
 4 | 
 5 | - `bedrock.ChatCompletion` - a client for interfacing with Bedrock models that are compatible with the OpenAI ChatCompletion API.
 6 | - `bedrock.Completion` - a client for interfacing with Bedrock models that are compatible with the OpenAI Completion API.
 7 | - `bedrock.Embedding` - a client for interfacing with Bedrock models that are compatible with the OpenAI Embedding API.
 8 | 
 9 | ## `bedrock.ChatCompletion`
10 | 
11 | The `bedrock.ChatCompletion` client is used to interface with Bedrock models running on Text Generation inference that are compatible with the OpenAI ChatCompletion API. Checkout the [Examples](../examples/bedrock-chat-completion-api)
12 | 
13 | 
14 | ```python
15 | import os 
16 | # set env for prompt builder
17 | os.environ["BEDROCK_PROMPT"] = "anthropic" # vicuna, wizardlm, stablebeluga, open_assistant
18 | os.environ["AWS_REGION"] = "us-east-1"  # change to your region
19 | # os.environ["AWS_ACCESS_KEY_ID"] = "XXX" # needed if not using boto3 session
20 | # os.environ["AWS_SECRET_ACCESS_KEY"] = "XXX" # needed if not using boto3 session
21 | 
22 | from easyllm.clients import bedrock
23 | 
24 | response = bedrock.ChatCompletion.create(
25 |     model="anthropic.claude-v2",
26 |     messages=[
27 |         {"role": "user", "content": "What is 2 + 2?"},
28 |     ],
29 |       temperature=0.9,
30 |       top_p=0.6,
31 |       max_tokens=1024,
32 |       debug=False,
33 | )
34 | ```
35 | 
36 | 
37 | Supported parameters are:
38 | 
39 | * `model` - The model to use for the completion. If not provided, defaults to the base url.
40 | * `messages` - `List[ChatMessage]` to use for the completion.
41 | * `temperature` - The temperature to use for the completion. Defaults to 0.9.
42 | * `top_p` - The top_p to use for the completion. Defaults to 0.6.
43 | * `top_k` - The top_k to use for the completion. Defaults to 10.
44 | * `n` - The number of completions to generate. Defaults to 1.
45 | * `max_tokens` - The maximum number of tokens to generate. Defaults to 1024.
46 | * `stop` - The stop sequence(s) to use for the completion. Defaults to None.
47 | * `stream` - Whether to stream the completion. Defaults to False.
48 | * `debug` - Whether to enable debug logging. Defaults to False.
49 | 
50 | 
51 | ### Build Prompt
52 | 
53 | By default the `bedrock` client will try to read the `BEDROCK_PROMPT` environment variable and tries to map the value to the `PROMPT_MAPPING` dictionary. If this is not set, it will use the default prompt builder. 
54 | You can also set it manually.
55 | 
56 | Checkout the [Prompt Utils](../prompt_utils) for more details.
57 | 
58 | 
59 | manually setting the prompt builder:
60 | 
61 | ```python
62 | from easyllm.clients import bedrock
63 | 
64 | bedrock.prompt_builder = "anthropic"
65 | 
66 | res = bedrock.ChatCompletion.create(...)
67 | ```
68 | 
69 | Using environment variable:
70 | 
71 | ```python
72 | # can happen elsehwere
73 | import os
74 | os.environ["BEDROCK_PROMPT"] = "anthropic"
75 | 
76 | from easyllm.clients import bedrock
77 | ```


--------------------------------------------------------------------------------
/docs/clients/huggingface.md:
--------------------------------------------------------------------------------
  1 | # Hugging Face 
  2 | 
  3 | EasyLLM provides a client for interfacing with HuggingFace models. The client is compatible with the [HuggingFace Inference API](https://huggingface.co/docs/api-inference/index), [Hugging Face Inference Endpoints](https://huggingface.co/docs/inference-endpoints/index) or any Web Service running [Text Generation Inference](https://github.com/huggingface/text-generation-inference) or compatible API endpoints. 
  4 | 
  5 | - `huggingface.ChatCompletion` - a client for interfacing with HuggingFace models that are compatible with the OpenAI ChatCompletion API.
  6 | - `huggingface.Completion` - a client for interfacing with HuggingFace models that are compatible with the OpenAI Completion API.
  7 | - `huggingface.Embedding` - a client for interfacing with HuggingFace models that are compatible with the OpenAI Embedding API.
  8 | 
  9 | ## `huggingface.ChatCompletion`
 10 | 
 11 | The `huggingface.ChatCompletion` client is used to interface with HuggingFace models running on Text Generation inference that are compatible with the OpenAI ChatCompletion API. Checkout the [Examples](../examples/chat-completion-api) for more details and [How to stream completions](../examples/stream-chat-completion-api) for an example how to stream requests.
 12 | 
 13 | 
 14 | ```python
 15 | from easyllm.clients import huggingface
 16 | 
 17 | # The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.
 18 | # huggingface.api_key="hf_xxx"
 19 | huggingface.prompt_builder = "llama2"
 20 | 
 21 | response = huggingface.ChatCompletion.create(
 22 |     model="meta-llama/Llama-2-70b-chat-hf",
 23 |     messages=[
 24 |         {"role": "system", "content": "\nYou are a helpful, respectful and honest assistant."},
 25 |         {"role": "user", "content": "Knock knock."},
 26 |     ],
 27 |     temperature=0.9,
 28 |     top_p=0.6,
 29 |     max_tokens=1024,
 30 | )
 31 | ```
 32 | 
 33 | 
 34 | Supported parameters are:
 35 | 
 36 | * `model` - The model to use for the completion. If not provided, defaults to the base url.
 37 | * `messages` - `List[ChatMessage]` to use for the completion.
 38 | * `temperature` - The temperature to use for the completion. Defaults to 0.9.
 39 | * `top_p` - The top_p to use for the completion. Defaults to 0.6.
 40 | * `top_k` - The top_k to use for the completion. Defaults to 10.
 41 | * `n` - The number of completions to generate. Defaults to 1.
 42 | * `max_tokens` - The maximum number of tokens to generate. Defaults to 1024.
 43 | * `stop` - The stop sequence(s) to use for the completion. Defaults to None.
 44 | * `stream` - Whether to stream the completion. Defaults to False.
 45 | * `frequency_penalty` - The frequency penalty to use for the completion. Defaults to 1.0.
 46 | * `debug` - Whether to enable debug logging. Defaults to False.
 47 | 
 48 | ## `huggingface.Completion`
 49 | 
 50 | The `huggingface.Completion` client is used to interface with HuggingFace models running on Text Generation inference that are compatible with the OpenAI Completion API. Checkout the [Examples](../examples/text-completion-api) for more details and [How to stream completions](../examples/stream-text-completion-api) for an example how to stream requests.
 51 | 
 52 | 
 53 | ```python
 54 | from easyllm.clients import huggingface
 55 | 
 56 | # The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.
 57 | # huggingface.api_key="hf_xxx"
 58 | hubbingface.prompt_builder = "llama2"
 59 | 
 60 | response = huggingface.Completion.create(
 61 |     model="meta-llama/Llama-2-70b-chat-hf",
 62 |     prompt="What is the meaning of life?",
 63 |     temperature=0.9,
 64 |     top_p=0.6,
 65 |     max_tokens=1024,
 66 | )
 67 | ```
 68 | 
 69 | 
 70 | Supported parameters are:
 71 | 
 72 | * `model` - The model to use for the completion. If not provided, defaults to the base url.
 73 | * `prompt` -  Text to use for the completion, if prompt_builder is set, prompt will be formatted with the prompt_builder.
 74 | * `temperature` - The temperature to use for the completion. Defaults to 0.9.
 75 | * `top_p` - The top_p to use for the completion. Defaults to 0.6.
 76 | * `top_k` - The top_k to use for the completion. Defaults to 10.
 77 | * `n` - The number of completions to generate. Defaults to 1.
 78 | * `max_tokens` - The maximum number of tokens to generate. Defaults to 1024.
 79 | * `stop` - The stop sequence(s) to use for the completion. Defaults to None.
 80 | * `stream` - Whether to stream the completion. Defaults to False.
 81 | * `frequency_penalty` - The frequency penalty to use for the completion. Defaults to 1.0.
 82 | * `debug` - Whether to enable debug logging. Defaults to False.
 83 | * `echo` - Whether to echo the prompt. Defaults to False.
 84 | * `logprobs` - Weather to return logprobs. Defaults to None.
 85 | 
 86 | 
 87 | ## `huggingface.Embedding`
 88 | 
 89 | The `huggingface.Embedding` client is used to interface with HuggingFace models running as an API that are compatible with the OpenAI Embedding API. Checkout the [Examples](../examples/get-embeddings) for more details.
 90 | 
 91 | ```python
 92 | from easyllm.clients import huggingface
 93 | 
 94 | # The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.
 95 | # huggingface.api_key="hf_xxx"
 96 | 
 97 | embedding = huggingface.Embedding.create(
 98 |     model="sentence-transformers/all-MiniLM-L6-v2",
 99 |     text="What is the meaning of life?",
100 | )
101 | 
102 | len(embedding["data"][0]["embedding"])
103 | ```
104 | 
105 | Supported parameters are:
106 | 
107 | * `model` - The model to use to create the embedding. If not provided, defaults to the base url.
108 | * `input` -  `Union[str, List[str]]` document(s) to embed.
109 | 
110 | 
111 | ## Environment Configuration
112 | 
113 | You can configure the `huggingface` client by setting environment variables or overwriting the default values. See below on how to adjust the HF token, url and prompt builder.
114 | 
115 | ### Setting HF token 
116 | 
117 | By default the `huggingface` client will try to read the `HUGGINGFACE_TOKEN` environment variable. If this is not set, it will try to read the token from the `~/.huggingface` folder. If this is not set, it will not use a token.
118 | 
119 | Alternatively you can set the token manually by setting `huggingface.api_key`.
120 | 
121 | 
122 | manually setting the api key:
123 | 
124 | ```python
125 | from easyllm.clients import huggingface
126 | 
127 | huggingface.api_key="hf_xxx"
128 | 
129 | res = huggingface.ChatCompletion.create(...)
130 | ```
131 | 
132 | Using environment variable:
133 | 
134 | ```python
135 | # can happen elsehwere
136 | import os
137 | os.environ["HUGGINGFACE_TOKEN"] = "hf_xxx"
138 | 
139 | from easyllm.clients import huggingface
140 | ```
141 | 
142 | 
143 | ### Changing url 
144 | 
145 | By default the `huggingface` client will try to read the `HUGGINGFACE_API_BASE` environment variable. If this is not set, it will use the default url `https://api-inference.huggingface.co/models`. This is helpful if you want to use a different url like `https://zj5lt7pmzqzbp0d1.us-east-1.aws.endpoints.huggingface.cloud` or a local url like `http://localhost:8000` or an Hugging Face Inference Endpoint.
146 | 
147 | Alternatively you can set the url manually by setting `huggingface.api_base`. If you set a custom you have to leave the `model` parameter empty. 
148 | 
149 | manually setting the api base:
150 | 
151 | ```python
152 | from easyllm.clients import huggingface
153 | 
154 | huggingface.api_base="https://my-url"
155 | 
156 | 
157 | res = huggingface.ChatCompletion.create(...)
158 | ```
159 | 
160 | Using environment variable:
161 | 
162 | ```python
163 | # can happen elsehwere
164 | import os
165 | os.environ["HUGGINGFACE_API_BASE"] = "https://my-url"
166 | 
167 | from easyllm.clients import huggingface
168 | ```
169 | 
170 | 
171 | 
172 | 
173 | ### Build Prompt
174 | 
175 | By default the `huggingface` client will try to read the `HUGGINGFACE_PROMPT` environment variable and tries to map the value to the `PROMPT_MAPPING` dictionary. If this is not set, it will use the default prompt builder. 
176 | You can also set it manually.
177 | 
178 | Checkout the [Prompt Utils](../prompt_utils) for more details.
179 | 
180 | 
181 | manually setting the prompt builder:
182 | 
183 | ```python
184 | from easyllm.clients import huggingface
185 | 
186 | huggingface.prompt_builder = "llama2"
187 | 
188 | res = huggingface.ChatCompletion.create(...)
189 | ```
190 | 
191 | Using environment variable:
192 | 
193 | ```python
194 | # can happen elsehwere
195 | import os
196 | os.environ["HUGGINGFACE_PROMPT"] = "llama2"
197 | 
198 | from easyllm.clients import huggingface
199 | ```


--------------------------------------------------------------------------------
/docs/clients/index.md:
--------------------------------------------------------------------------------
 1 | # Clients
 2 | 
 3 | In the context of EasyLLM, a "client" refers to the code that interfaces with a particular LLM API, e.g. OpenAI.
 4 | 
 5 | Currently supported clients are:  
 6 | 
 7 | - `ChatCompletion` - ChatCompletion clients are used to interface with LLMs that are compatible with the OpenAI ChatCompletion API.
 8 | - `Completion` - Completion clients are used to interface with LLMs that are compatible with the OpenAI Completion API.
 9 | - `Embedding` - Embedding clients are used to interface with LLMs that are compatible with the OpenAI Embedding API.
10 | 
11 | Currently supported clients are:  
12 | 
13 | ## Hugging Face
14 | 
15 | - [huggingface.ChatCompletion](huggingface/#huggingfacechatcompletion) - a client for interfacing with HuggingFace models that are compatible with the OpenAI ChatCompletion API.
16 | - [huggingface.Completion](huggingface/#huggingfacechatcompletion) - a client for interfacing with HuggingFace models that are compatible with the OpenAI Completion API.
17 | - [huggingface.Embedding](huggingface/#huggingfacechatcompletion) - a client for interfacing with HuggingFace models that are compatible with the OpenAI Embedding API.
18 | 
19 | ## Amazon SageMaker
20 | 
21 | - [sagemaker.ChatCompletion](sagemaker/#sagemakerchatcompletion) - a client for interfacing with Amazon SageMaker models that are compatible with the OpenAI ChatCompletion API.
22 | - [sagemaker.Completion](sagemaker/#sagemakercompletion) - a client for interfacing with Amazon SageMaker models that are compatible with the OpenAI Completion API.
23 | - [sagemaker.Embedding](sagemaker/#sagemakerembedding) - a client for interfacing with Amazon SageMaker models that are compatible with the OpenAI Embedding API.
24 | 
25 | ## Amazon Bedrock
26 | 
27 | - [bedrock.ChatCompletion](bedrock/#bedrockchatcompletion) - a client for interfacing with Amazon Bedrock models that are compatible with the OpenAI ChatCompletion API.
28 | 


--------------------------------------------------------------------------------
/docs/clients/sagemaker.md:
--------------------------------------------------------------------------------
  1 | # Amazon SageMaker
  2 | 
  3 | EasyLLM provides a client for interfacing with Amazon SageMaker models. 
  4 | 
  5 | - `sagemaker.ChatCompletion` - a client for interfacing with sagemaker models that are compatible with the OpenAI ChatCompletion API.
  6 | - `sagemaker.Completion` - a client for interfacing with sagemaker models that are compatible with the OpenAI Completion API.
  7 | - `sagemaker.Embedding` - a client for interfacing with sagemaker models that are compatible with the OpenAI Embedding API.
  8 | 
  9 | ## `sagemaker.ChatCompletion`
 10 | 
 11 | The `sagemaker.ChatCompletion` client is used to interface with sagemaker models running on Text Generation inference that are compatible with the OpenAI ChatCompletion API. Checkout the [Examples](../examples/sagemaker-chat-completion-api)
 12 | 
 13 | 
 14 | ```python
 15 | import os 
 16 | from easyllm.clients import sagemaker
 17 | 
 18 | # set env for prompt builder
 19 | os.environ["HUGGINGFACE_PROMPT"] = "llama2" # vicuna, wizardlm, stablebeluga, open_assistant
 20 | os.environ["AWS_REGION"] = "us-east-1"  # change to your region
 21 | # os.environ["AWS_ACCESS_KEY_ID"] = "XXX" # needed if not using boto3 session
 22 | # os.environ["AWS_SECRET_ACCESS_KEY"] = "XXX" # needed if not using boto3 session
 23 | 
 24 | 
 25 | response = sagemaker.ChatCompletion.create(
 26 |     model="huggingface-pytorch-tgi-inference-2023-08-08-14-15-52-703",
 27 |     messages=[
 28 |         {"role": "system", "content": "\nYou are a helpful, respectful and honest assistant."},
 29 |         {"role": "user", "content": "Knock knock."},
 30 |     ],
 31 |     temperature=0.9,
 32 |     top_p=0.6,
 33 |     max_tokens=1024,
 34 | )
 35 | ```
 36 | 
 37 | 
 38 | Supported parameters are:
 39 | 
 40 | * `model` - The model to use for the completion. If not provided, defaults to the base url.
 41 | * `messages` - `List[ChatMessage]` to use for the completion.
 42 | * `temperature` - The temperature to use for the completion. Defaults to 0.9.
 43 | * `top_p` - The top_p to use for the completion. Defaults to 0.6.
 44 | * `top_k` - The top_k to use for the completion. Defaults to 10.
 45 | * `n` - The number of completions to generate. Defaults to 1.
 46 | * `max_tokens` - The maximum number of tokens to generate. Defaults to 1024.
 47 | * `stop` - The stop sequence(s) to use for the completion. Defaults to None.
 48 | * `stream` - Whether to stream the completion. Defaults to False.
 49 | * `frequency_penalty` - The frequency penalty to use for the completion. Defaults to 1.0.
 50 | * `debug` - Whether to enable debug logging. Defaults to False.
 51 | 
 52 | ## `sagemaker.Completion`
 53 | 
 54 | The `sagemaker.Completion` client is used to interface with sagemaker models running on Text Generation inference that are compatible with the OpenAI Completion API. Checkout the [Examples](../examples/sagemaker-text-completion-api).
 55 | 
 56 | 
 57 | ```python
 58 | import os 
 59 | from easyllm.clients import sagemaker
 60 | 
 61 | # set env for prompt builder
 62 | os.environ["HUGGINGFACE_PROMPT"] = "llama2" # vicuna, wizardlm, stablebeluga, open_assistant
 63 | os.environ["AWS_REGION"] = "us-east-1"  # change to your region
 64 | # os.environ["AWS_ACCESS_KEY_ID"] = "XXX" # needed if not using boto3 session
 65 | # os.environ["AWS_SECRET_ACCESS_KEY"] = "XXX" # needed if not using boto3 session
 66 | 
 67 | response = sagemaker.Completion.create(
 68 |     model="meta-llama/Llama-2-70b-chat-hf",
 69 |     prompt="What is the meaning of life?",
 70 |     temperature=0.9,
 71 |     top_p=0.6,
 72 |     max_tokens=1024,
 73 | )
 74 | ```
 75 | 
 76 | 
 77 | Supported parameters are:
 78 | 
 79 | * `model` - The model to use for the completion. If not provided, defaults to the base url.
 80 | * `prompt` -  Text to use for the completion, if prompt_builder is set, prompt will be formatted with the prompt_builder.
 81 | * `temperature` - The temperature to use for the completion. Defaults to 0.9.
 82 | * `top_p` - The top_p to use for the completion. Defaults to 0.6.
 83 | * `top_k` - The top_k to use for the completion. Defaults to 10.
 84 | * `n` - The number of completions to generate. Defaults to 1.
 85 | * `max_tokens` - The maximum number of tokens to generate. Defaults to 1024.
 86 | * `stop` - The stop sequence(s) to use for the completion. Defaults to None.
 87 | * `stream` - Whether to stream the completion. Defaults to False.
 88 | * `frequency_penalty` - The frequency penalty to use for the completion. Defaults to 1.0.
 89 | * `debug` - Whether to enable debug logging. Defaults to False.
 90 | * `echo` - Whether to echo the prompt. Defaults to False.
 91 | * `logprobs` - Weather to return logprobs. Defaults to None.
 92 | 
 93 | 
 94 | ## `sagemaker.Embedding`
 95 | 
 96 | The `sagemaker.Embedding` client is used to interface with sagemaker models running as an API that are compatible with the OpenAI Embedding API. Checkout the [Examples](../examples/sagemaker-get-embeddings) for more details.
 97 | 
 98 | ```python
 99 | import os 
100 | # set env for prompt builder
101 | os.environ["HUGGINGFACE_PROMPT"] = "llama2" # vicuna, wizardlm, stablebeluga, open_assistant
102 | os.environ["AWS_REGION"] = "us-east-1"  # change to your region
103 | # os.environ["AWS_ACCESS_KEY_ID"] = "XXX" # needed if not using boto3 session
104 | # os.environ["AWS_SECRET_ACCESS_KEY"] = "XXX" # needed if not using boto3 session
105 | 
106 | from easyllm.clients import sagemaker
107 | 
108 | embedding = sagemaker.Embedding.create(
109 |     model="SageMakerModelEmbeddingEndpoint24E49D09-64prhjuiWUtE",
110 |     input="That's a nice car.",
111 | )
112 | 
113 | len(embedding["data"][0]["embedding"])
114 | ```
115 | 
116 | Supported parameters are:
117 | 
118 | * `model` - The model to use to create the embedding. If not provided, defaults to the base url.
119 | * `input` -  `Union[str, List[str]]` document(s) to embed.
120 | 
121 | 
122 | ## Environment Configuration
123 | 
124 | You can configure the `sagemaker` client by setting environment variables or overwriting the default values. See below on how to adjust the HF token, url and prompt builder.
125 | 
126 | ### Setting Credentials
127 | 
128 | By default the `sagemaker` client will try to read the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variable. If this is not set, it will try to use `boto3`. 
129 | 
130 | Alternatively you can set the token manually by setting `sagemaker.*`.
131 | 
132 | manually setting the api key:
133 | 
134 | ```python
135 | from easyllm.clients import sagemaker
136 | 
137 | sagemaker.api_aws_access_key="xxx"
138 | sagemaker.api_aws_secret_key="xxx"
139 | 
140 | res = sagemaker.ChatCompletion.create(...)
141 | ```
142 | 
143 | Using environment variable:
144 | 
145 | ```python
146 | # can happen elsehwere
147 | import os
148 | os.environ["AWS_ACCESS_KEY_ID"] = "xxx"
149 | os.environ["AWS_SECRET_ACCESS_KEY"] = "xxx"
150 | 
151 | from easyllm.clients import sagemaker
152 | ```
153 | 
154 | 
155 | ### Build Prompt
156 | 
157 | By default the `sagemaker` client will try to read the `sagemaker_PROMPT` environment variable and tries to map the value to the `PROMPT_MAPPING` dictionary. If this is not set, it will use the default prompt builder. 
158 | You can also set it manually.
159 | 
160 | Checkout the [Prompt Utils](../prompt_utils) for more details.
161 | 
162 | 
163 | manually setting the prompt builder:
164 | 
165 | ```python
166 | from easyllm.clients import sagemaker
167 | 
168 | sagemaker.prompt_builder = "llama2"
169 | 
170 | res = sagemaker.ChatCompletion.create(...)
171 | ```
172 | 
173 | Using environment variable:
174 | 
175 | ```python
176 | # can happen elsehwere
177 | import os
178 | os.environ["HUGGINGFACE_PROMPT"] = "llama2"
179 | 
180 | from easyllm.clients import sagemaker
181 | ```


--------------------------------------------------------------------------------
/docs/examples/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philschmid/easyllm/1f37a9307d68545f41639f392a301baadda7188a/docs/examples/.gitkeep


--------------------------------------------------------------------------------
/docs/examples/index.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | Here are some examples to help you get started with the easyllm library:
 4 | 
 5 | ## Hugging Face
 6 | 
 7 | | Example                                                                 | Description                                                                            |
 8 | | ----------------------------------------------------------------------- | -------------------------------------------------------------------------------------- |
 9 | | [Detailed ChatCompletion Example](chat-completion-api)                  | Shows how to use the ChatCompletion API to have a conversational chat with the model.  |
10 | | [Detailed Completion Example](text-completion-api)                      | Uses the TextCompletion API to generate text with the model.                           |
11 | | [Create Embeddings](get-embeddings)                                     | Embeds text into vector representations using the model.                               |
12 | | [Example how to stream chat requests](stream-chat-completions)          | Demonstrates streaming multiple chat requests to efficiently chat with the model.      |
13 | | [Example how to stream text requests](stream-text-completions)          | Shows how to stream multiple text completion requests.                                 |
14 | | [Hugging Face Inference Endpoints Example](inference-endpoints-example) | Example on how to use custom endpoints, e.g. Inference Endpoints or localhost.         |
15 | | [Retrieval Augmented Generation using Llama 2](llama2-rag-example)      | Example on how to use Llama 2 70B for in-context retrival augmentation                 |
16 | | [Llama 2 70B Agent/Tool use example ](llama2-agent-example)             | Example on how to use Llama 2 70B to interace with tools and could be used as an agent |
17 | 
18 | The examples cover the main functionality of the library - chat, text completion, and embeddings. Let me know if you would like me to modify or expand the index page in any way.
19 | 
20 | ## Amazon SageMaker
21 | 
22 | | Example                                                          | Description                                                                           |
23 | | ---------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
24 | | [Detailed ChatCompletion Example](sagemaker-chat-completion-api) | Shows how to use the ChatCompletion API to have a conversational chat with the model. |
25 | | [Detailed Completion Example](sagemaker-text-completion-api)     | Uses the TextCompletion API to generate text with the model.                          |
26 | | [Create Embeddings](sagemaker-get-embeddings)                    | Embeds text into vector representations using the model.                              |
27 | 
28 | ## Amazon Bedrock
29 | 
30 | | Example                                                                | Description                                                                           |
31 | | ---------------------------------------------------------------------- | ------------------------------------------------------------------------------------- |
32 | | [Detailed ChatCompletion Example](bedrock-chat-completion-api)         | Shows how to use the ChatCompletion API to have a conversational chat with the model. |
33 | | [Example how to stream chat requests](bedrock-stream-chat-completions) | Demonstrates streaming multiple chat requests to efficiently chat with the model.     |


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
  1 | # EasyLLM
  2 | 
  3 | EasyLLM is an open source project that provides helpful tools and methods for working with large language models (LLMs), both open source and closed source. 
  4 | 
  5 | EasyLLM implements clients that are compatible with OpenAI's Completion API. This means you can easily replace `openai.ChatCompletion` with, for example, `huggingface.ChatCompletion`.
  6 | 
  7 | * [ChatCompletion Clients](./clients)
  8 | * [Prompt Utils](./prompt_utils)
  9 | * [Examples](./examples)
 10 | 
 11 | ## 🚀 Getting Started
 12 | 
 13 | Install EasyLLM via pip:
 14 | 
 15 | ```bash
 16 | pip install easyllm
 17 | ```
 18 | 
 19 | Then import and start using the clients:
 20 | 
 21 | ```python
 22 | 
 23 | from easyllm.clients import huggingface
 24 | 
 25 | # helper to build llama2 prompt
 26 | huggingface.prompt_builder = "llama2"
 27 | 
 28 | response = huggingface.ChatCompletion.create(
 29 |     model="meta-llama/Llama-2-70b-chat-hf",
 30 |     messages=[
 31 |         {"role": "system", "content": "\nYou are a helpful assistant speaking like a pirate. argh!"},
 32 |         {"role": "user", "content": "What is the sun?"},
 33 |     ],
 34 |     temperature=0.9,
 35 |     top_p=0.6,
 36 |     max_tokens=256,
 37 | )
 38 | 
 39 | print(response)
 40 | ```
 41 | the result will look like 
 42 | 
 43 | ```bash
 44 | {
 45 |   "id": "hf-lVC2iTMkFJ",
 46 |   "object": "chat.completion",
 47 |   "created": 1690661144,
 48 |   "model": "meta-llama/Llama-2-70b-chat-hf",
 49 |   "choices": [
 50 |     {
 51 |       "index": 0,
 52 |       "message": {
 53 |         "role": "assistant",
 54 |         "content": " Arrrr, the sun be a big ol' ball o' fire in the sky, me hearty! It be the source o' light and warmth for our fair planet, and it be a mighty powerful force, savvy? Without the sun, we'd be sailin' through the darkness, lost and cold, so let's give a hearty \"Yarrr!\" for the sun, me hearties! Arrrr!"
 55 |       },
 56 |       "finish_reason": null
 57 |     }
 58 |   ],
 59 |   "usage": {
 60 |     "prompt_tokens": 111,
 61 |     "completion_tokens": 299,
 62 |     "total_tokens": 410
 63 |   }
 64 | }
 65 | ```
 66 | 
 67 | Check out other examples:
 68 | 
 69 | * [Detailed ChatCompletion Example](examples/chat-completion-api)
 70 | * [Example how to stream chat requests](examples/stream-chat-completion)
 71 | * [Example how to stream text requests](examples/stream-text-completion)
 72 | * [Detailed Completion Example](examples/text-completion-api)
 73 | * [Create Embeddings](examples/get-embeddings)
 74 | 
 75 | 
 76 | ## 💪🏻 Migration from OpenAI to HuggingFace
 77 | 
 78 | Migrating from OpenAI to HuggingFace is easy. Just change the import statement and the client you want to use and optionally the prompt builder.
 79 | 
 80 | ```diff
 81 | - import openai
 82 | + from easyllm.clients import huggingface
 83 | + huggingface.prompt_builder = "llama2"
 84 | 
 85 | 
 86 | - response = openai.ChatCompletion.create(
 87 | + response = huggingface.ChatCompletion.create(
 88 | -    model="gpt-3.5-turbo",
 89 | +    model="meta-llama/Llama-2-70b-chat-hf",
 90 |     messages=[
 91 |         {"role": "system", "content": "You are a helpful assistant."},
 92 |         {"role": "user", "content": "Knock knock."},
 93 |     ],
 94 | )
 95 | ```
 96 | 
 97 | Make sure when you switch your client that your hyperparameters are still valid. For example, `temperature` of GPT-3 might be different than `temperature` of `Llama-2`.
 98 | 
 99 | ## ☑️ Key Features
100 | 
101 | ### 🤝 Compatible Clients
102 | 
103 | - Implementation of clients compatible with OpenAI API format of `openai.ChatCompletion`.
104 | - Easily switch between different LLMs like `openai.ChatCompletion` and `huggingface.ChatCompletion` by changing one line of code. 
105 | - Support for streaming of completions, checkout example [How to stream completions](examples/stream-chat-completions).
106 | 
107 | ### ⚙️ Helper Modules ⚙️
108 | 
109 | - `evol_instruct` (work in progress) - Use evolutionary algorithms create instructions for LLMs.
110 | 
111 | - `prompt_utils` - Helper methods to easily convert between prompt formats like OpenAI Messages to prompts for open source models like Llama 2.
112 | 
113 | ## 📔 Citation & Acknowledgements
114 | 
115 | If you use EasyLLM, please share it with me on social media or email. I would love to hear about it!
116 | You can also cite the project using the following BibTeX:
117 | 
118 | ```bash
119 | @software{Philipp_Schmid_EasyLLM_2023,
120 | author = {Philipp Schmid},
121 | license = {Apache-2.0},
122 | month = juj,
123 | title = {EasyLLM: Streamlined Tools for LLMs},
124 | url = {https://github.com/philschmid/easyllm},
125 | year = {2023}
126 | }
127 | ```
128 | 
129 | <!-- ## Code
130 | 
131 | Link to a function in the code:
132 | [`Object 1`][easyllm.utils.fancy_function] -->


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Installation
 3 | 
 4 | ## with pip <small>recommended</small>
 5 | 
 6 | EasyLLM is published as a [Python package] and can be installed with
 7 | `pip` from pypi or from the Github repository, Open up a terminal and install.
 8 | === "Latest"
 9 | 
10 |     ``` sh
11 |     pip install easyllm
12 |     ```
13 | === "Github"
14 | 
15 |     ``` sh
16 |     pip install git+https://github.com/philschmid/easyllm
17 |     ```


--------------------------------------------------------------------------------
/docs/prompt_utils.md:
--------------------------------------------------------------------------------
  1 | # Prompt utilities
  2 | 
  3 | The `prompt_utils`  module contains functions to assist with converting Message's Dictionaries into prompts that can be used with `ChatCompletion` clients. 
  4 | 
  5 | Supported prompt formats:
  6 | 
  7 | - [Prompt utilities](#prompt-utilities)
  8 |   - [Set prompt builder for client](#set-prompt-builder-for-client)
  9 |   - [Llama 2 Chat builder](#llama-2-chat-builder)
 10 |   - [Vicuna Chat builder](#vicuna-chat-builder)
 11 |   - [Hugging Face ChatML builder](#hugging-face-chatml-builder)
 12 |     - [StarChat](#starchat)
 13 |     - [Falcon](#falcon)
 14 |   - [WizardLM Chat builder](#wizardlm-chat-builder)
 15 |   - [StableBeluga2 Chat builder](#stablebeluga2-chat-builder)
 16 |   - [Open Assistant Chat builder](#open-assistant-chat-builder)
 17 |   - [Anthropic Claude Chat builder](#anthropic-claude-chat-builder)
 18 | 
 19 | Prompt utils are also exporting a mapping dictionary `PROMPT_MAPPING` that maps a model name to a prompt builder function. This can be used to select the correct prompt builder function via an environment variable. 
 20 | 
 21 | ```python
 22 | PROMPT_MAPPING = {
 23 |     "chatml_falcon": build_chatml_falcon_prompt,
 24 |     "chatml_starchat": build_chatml_starchat_prompt,
 25 |     "llama2": build_llama2_prompt,
 26 |     "open_assistant": build_open_assistant_prompt,
 27 |     "stablebeluga": build_stablebeluga_prompt,
 28 |     "vicuna": build_vicuna_prompt,
 29 |     "wizardlm": build_wizardlm_prompt,
 30 | }
 31 | ```
 32 | 
 33 | ## Set prompt builder for client
 34 | 
 35 | ```python
 36 | from easyllm.clients import huggingface
 37 | 
 38 | huggingface.prompt_builder = "llama2" # vicuna, chatml_falcon, chatml_starchat, wizardlm, stablebeluga, open_assistant
 39 | ```
 40 | 
 41 | ## Llama 2 Chat builder 
 42 | 
 43 | Creates LLama 2 chat prompt for chat conversations. Learn more in the [Hugging Face Blog on how to prompt Llama 2](https://huggingface.co/blog/llama2#how-to-prompt-llama-2). If a `Message` with an unsupported `role` is passed, an error will be thrown.
 44 | 
 45 | Example Models: 
 46 | 
 47 | * [meta-llama/Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf)
 48 | 
 49 | ```python
 50 | from easyllm.prompt_utils import build_llama2_prompt
 51 | 
 52 | messages=[
 53 |     {"role": "system", "content": "You are a helpful assistant."},
 54 |     {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."},
 55 | ]
 56 | prompt = build_llama2_prompt(messages)
 57 | ```
 58 | 
 59 | 
 60 | ## Vicuna Chat builder 
 61 | 
 62 | Creats a Vicuna prompt for a chat conversation. If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://github.com/lm-sys/FastChat/blob/main/docs/vicuna_weights_version.md#prompt-template)
 63 | 
 64 | Example Models: 
 65 | 
 66 | * [ehartford/WizardLM-13B-V1.0-Uncensored](https://huggingface.co/ehartford/WizardLM-13B-V1.0-Uncensored)
 67 | 
 68 | 
 69 | ```python
 70 | from easyllm.prompt_utils import build_vicuna_prompt
 71 | 
 72 | messages=[
 73 |     {"role": "system", "content": "You are a helpful assistant."},
 74 |     {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."},
 75 | ]
 76 | prompt = build_vicuna_prompt(messages)
 77 | ```
 78 | 
 79 | ## Hugging Face ChatML builder 
 80 | 
 81 | Creates a Hugging Face ChatML prompt for a chat conversation. The Hugging Face ChatML has different prompts for different Example Models, e.g. StarChat or Falcon. If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://huggingface.co/HuggingFaceH4/starchat-beta)
 82 | 
 83 | Example Models: 
 84 | * [HuggingFaceH4/starchat-beta](https://huggingface.co/HuggingFaceH4/starchat-beta)
 85 | 
 86 | ### StarChat
 87 | 
 88 | ```python
 89 | from easyllm.prompt_utils import build_chatml_starchat_prompt
 90 | 
 91 | messages=[
 92 |     {"role": "system", "content": "You are a helpful assistant."},
 93 |     {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."},
 94 | ]
 95 | prompt = build_chatml_starchat_prompt(messages)
 96 | ```
 97 | 
 98 | ### Falcon
 99 | 
100 | ```python
101 | from easyllm.prompt_utils import build_chatml_falcon_prompt
102 | 
103 | messages=[
104 |     {"role": "system", "content": "You are a helpful assistant."},
105 |     {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."},
106 | ]
107 | prompt = build_chatml_falcon_prompt(messages)
108 | ```
109 | 
110 | ## WizardLM Chat builder 
111 | 
112 | Creates a WizardLM prompt for a chat conversation. If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://github.com/nlpxucan/WizardLM/blob/main/WizardLM/src/infer_wizardlm13b.py#L79)
113 | 
114 | Example Models:
115 | 
116 | * [WizardLM/WizardLM-13B-V1.2](https://huggingface.co/WizardLM/WizardLM-13B-V1.2)
117 | 
118 | ```python
119 | from easyllm.prompt_utils import build_wizardlm_prompt
120 | 
121 | messages=[
122 |     {"role": "system", "content": "You are a helpful assistant."},
123 |     {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."},
124 | ]
125 | prompt = build_wizardlm_prompt(messages)
126 | ```
127 | 
128 | ## StableBeluga2 Chat builder 
129 | 
130 | Creates StableBeluga2 prompt for a chat conversation. If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://huggingface.co/stabilityai/StableBeluga2)
131 | 
132 | ```python
133 | from easyllm.prompt_utils import build_stablebeluga_prompt
134 | 
135 | messages=[
136 |     {"role": "system", "content": "You are a helpful assistant."},
137 |     {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."},
138 | ]
139 | prompt = build_stablebeluga_prompt(messages)
140 | ```
141 | 
142 | ## Open Assistant Chat builder 
143 | 
144 | Creates Open Assistant ChatML template. Uses `<|prompter|>`, `</s>`, `<|system|>`, and `<|assistant>` tokens. If a . If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://huggingface.co/OpenAssistant/llama2-13b-orca-8k-33192)
145 | 
146 | Example Models:
147 | 
148 | * [OpenAssistant/llama2-13b-orca-8k-3319](https://huggingface.co/OpenAssistant/llama2-13b-orca-8k-33192)
149 | 
150 | ```python
151 | from easyllm.prompt_utils import build_open_assistant_prompt
152 | 
153 | messages=[
154 |     {"role": "system", "content": "You are a helpful assistant."},
155 |     {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."},
156 | ]
157 | prompt = build_open_assistant_prompt(messages)
158 | ```
159 | 
160 | ## Anthropic Claude Chat builder 
161 | 
162 | Creates Anthropic Claude template. Uses `\n\nHuman:`, `\n\nAssistant:`. If a . If a `Message` with an unsupported `role` is passed, an error will be thrown. [Reference](https://docs.anthropic.com/claude/docs/introduction-to-prompt-design)
163 | 
164 | Example Models:
165 | 
166 | * [Bedrock](https://aws.amazon.com/bedrock/claude/)
167 | 
168 | ```python
169 | from easyllm.prompt_utils import build_anthropic_prompt
170 | 
171 | messages=[
172 |     {"role": "system", "content": "You are a helpful assistant."},
173 |     {"role": "user", "content": "Explain asynchronous programming in the style of the pirate Blackbeard."},
174 | ]
175 | prompt = build_anthropic_prompt(messages)
176 | ```
177 | 
178 | 


--------------------------------------------------------------------------------
/easyllm/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2023-present philschmid <schmidphilipp1995@gmail.com>
2 | #
3 | # SPDX-License-Identifier: MIT
4 | __version__ = "0.7.0.dev0"
5 | 


--------------------------------------------------------------------------------
/easyllm/cli.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | 
 4 | def parse_args():
 5 |     parser = ArgumentParser(description="Description of your program")
 6 |     parser.add_argument("-f", "--foo", help="Description for foo argument", required=True)
 7 | 
 8 |     return parser.parse_args()
 9 | 
10 | 
11 | def main():
12 |     args = parse_args()
13 |     print(args)
14 | 


--------------------------------------------------------------------------------
/easyllm/clients/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philschmid/easyllm/1f37a9307d68545f41639f392a301baadda7188a/easyllm/clients/__init__.py


--------------------------------------------------------------------------------
/easyllm/clients/bedrock.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | from typing import Any, Dict, List, Optional
  5 | 
  6 | from nanoid import generate
  7 | 
  8 | from easyllm.prompt_utils.base import build_prompt, buildBasePrompt
  9 | from easyllm.schema.base import ChatMessage, Usage, dump_object
 10 | from easyllm.schema.openai import (
 11 |     ChatCompletionRequest,
 12 |     ChatCompletionResponse,
 13 |     ChatCompletionResponseChoice,
 14 |     ChatCompletionResponseStreamChoice,
 15 |     ChatCompletionStreamResponse,
 16 |     DeltaMessage,
 17 | )
 18 | from easyllm.utils import setup_logger
 19 | from easyllm.utils.aws import get_bedrock_client
 20 | 
 21 | logger = setup_logger()
 22 | 
 23 | # default parameters
 24 | api_type = "bedrock"
 25 | api_aws_access_key = os.environ.get("AWS_ACCESS_KEY_ID", None)
 26 | api_aws_secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY", None)
 27 | api_aws_session_token = os.environ.get("AWS_SESSION_TOKEN", None)
 28 | 
 29 | client = get_bedrock_client(
 30 |     aws_access_key_id=api_aws_access_key,
 31 |     aws_secret_access_key=api_aws_secret_key,
 32 |     aws_session_token=api_aws_session_token,
 33 | )
 34 | 
 35 | 
 36 | SUPPORTED_MODELS = [
 37 |     "anthropic.claude-v2",
 38 | ]
 39 | model_version_mapping = {"anthropic.claude-v2": "bedrock-2023-05-31"}
 40 | 
 41 | api_version = os.environ.get("BEDROCK_API_VERSION", None) or "bedrock-2023-05-31"
 42 | prompt_builder = os.environ.get("BEDROCK_PROMPT", None)
 43 | stop_sequences = []
 44 | 
 45 | 
 46 | def stream_chat_request(client, body, model):
 47 |     """Utility function for streaming chat requests."""
 48 |     id = f"hf-{generate(size=10)}"
 49 |     response = client.invoke_model_with_response_stream(
 50 |         body=json.dumps(body), modelId=model, accept="application/json", contentType="application/json"
 51 |     )
 52 |     stream = response.get("body")
 53 | 
 54 |     yield dump_object(
 55 |         ChatCompletionStreamResponse(
 56 |             id=id,
 57 |             model=model,
 58 |             choices=[ChatCompletionResponseStreamChoice(index=0, delta=DeltaMessage(role="assistant"))],
 59 |         )
 60 |     )
 61 |     # yield each generated token
 62 |     reason = None
 63 |     for _idx, event in enumerate(stream):
 64 |         chunk = event.get("chunk")
 65 |         if chunk:
 66 |             chunk_obj = json.loads(chunk.get("bytes").decode())
 67 |             text = chunk_obj["completion"]
 68 |             yield dump_object(
 69 |                 ChatCompletionStreamResponse(
 70 |                     id=id,
 71 |                     model=model,
 72 |                     choices=[ChatCompletionResponseStreamChoice(index=0, delta=DeltaMessage(content=text))],
 73 |                 )
 74 |             )
 75 |     yield dump_object(
 76 |         ChatCompletionStreamResponse(
 77 |             id=id,
 78 |             model=model,
 79 |             choices=[ChatCompletionResponseStreamChoice(index=0, finish_reason=reason, delta={})],
 80 |         )
 81 |     )
 82 | 
 83 | 
 84 | class ChatCompletion:
 85 |     @staticmethod
 86 |     def create(
 87 |         messages: List[ChatMessage],
 88 |         model: Optional[str] = None,
 89 |         temperature: float = 0.9,
 90 |         top_p: float = 0.6,
 91 |         top_k: Optional[int] = 10,
 92 |         n: int = 1,
 93 |         max_tokens: int = 1024,
 94 |         stop: Optional[List[str]] = None,
 95 |         stream: bool = False,
 96 |         frequency_penalty: Optional[float] = 1.0,
 97 |         debug: bool = False,
 98 |     ) -> Dict[str, Any]:
 99 |         """
100 |         Creates a new chat completion for the provided messages and parameters.
101 | 
102 |         Args:
103 |             messages (`List[ChatMessage]`): to use for the completion.
104 |             model (`str`, *optional*, defaults to None): The model to use for the completion. If not provided,
105 |                 defaults to the base url.
106 |             temperature (`float`, defaults to 0.9): The temperature to use for the completion.
107 |             top_p (`float`, defaults to 0.6): The top_p to use for the completion.
108 |             top_k (`int`, *optional*, defaults to 10): The top_k to use for the completion.
109 |             n (`int`, defaults to 1): The number of completions to generate.
110 |             max_tokens (`int`, defaults to 1024): The maximum number of tokens to generate.
111 |             stop (`List[str]`, *optional*, defaults to None): The stop sequence(s) to use for the completion.
112 |             stream (`bool`, defaults to False): Whether to stream the completion.
113 |             frequency_penalty (`float`, *optional*, defaults to 1.0): The frequency penalty to use for the completion.
114 |             debug (`bool`, defaults to False): Whether to enable debug logging.
115 | 
116 |         Tip: Prompt builder
117 |             Make sure to always use a prompt builder for your model.
118 |         """
119 |         if debug:
120 |             logger.setLevel(logging.DEBUG)
121 | 
122 |         # validate it model is in model_mapping
123 |         if model not in SUPPORTED_MODELS:
124 |             raise ValueError(f"Model {model} is not supported. Supported models are: {SUPPORTED_MODELS}")
125 | 
126 |         request = ChatCompletionRequest(
127 |             messages=messages,
128 |             model=model,
129 |             temperature=temperature,
130 |             top_p=top_p,
131 |             top_k=top_k,
132 |             n=n,
133 |             max_tokens=max_tokens,
134 |             stop=stop,
135 |             stream=stream,
136 |             frequency_penalty=frequency_penalty,
137 |         )
138 | 
139 |         if prompt_builder is None:
140 |             logger.warn(
141 |                 f"""huggingface.prompt_builder is not set.
142 | Using default prompt builder for. Prompt sent to model will be:
143 | ----------------------------------------
144 | {buildBasePrompt(request.messages)}.
145 | ----------------------------------------
146 | If you want to use a custom prompt builder, set bedrock.prompt_builder to a function that takes a list of messages and returns a string.
147 | You can also use existing prompt builders by importing them from easyllm.prompt_utils"""
148 |             )
149 |             prompt = buildBasePrompt(request.messages)
150 |         else:
151 |             prompt = build_prompt(request.messages, prompt_builder)
152 | 
153 |         # create stop sequences
154 |         if isinstance(request.stop, list):
155 |             stop = stop_sequences + request.stop
156 |         elif isinstance(request.stop, str):
157 |             stop = stop_sequences + [request.stop]
158 |         else:
159 |             stop = stop_sequences
160 |         logger.debug(f"Stop sequences:\n{stop}")
161 | 
162 |         # check if we can stream
163 |         if request.stream is True and request.n > 1:
164 |             raise ValueError("Cannot stream more than one completion")
165 | 
166 |         # construct body
167 |         body = {
168 |             "prompt": prompt,
169 |             "max_tokens_to_sample": request.max_tokens,
170 |             "temperature": request.temperature,
171 |             "top_k": request.top_k,
172 |             "top_p": request.top_p,
173 |             "stop_sequences": stop,
174 |             "anthropic_version": model_version_mapping[model],
175 |         }
176 |         logger.debug(f"Generation body:\n{body}")
177 | 
178 |         if request.stream:
179 |             return stream_chat_request(client, body, model)
180 |         else:
181 |             choices = []
182 |             generated_tokens = 0
183 |             for _i in range(request.n):
184 |                 response = client.invoke_model(
185 |                     body=json.dumps(body), modelId=model, accept="application/json", contentType="application/json"
186 |                 )
187 |                 # parse response
188 |                 res = json.loads(response.get("body").read())
189 | 
190 |                 # convert to schema
191 |                 parsed = ChatCompletionResponseChoice(
192 |                     index=_i,
193 |                     message=ChatMessage(role="assistant", content=res["completion"].strip()),
194 |                     finish_reason=res["stop_reason"],
195 |                 )
196 |                 generated_tokens += len(res["completion"].strip()) // 4
197 |                 choices.append(parsed)
198 |                 logger.debug(f"Response at index {_i}:\n{parsed}")
199 |             # calculate usage details
200 |             # TODO: fix when details is fixed
201 |             prompt_tokens = int(len(prompt) / 4)
202 |             total_tokens = prompt_tokens + generated_tokens
203 | 
204 |             return dump_object(
205 |                 ChatCompletionResponse(
206 |                     model=request.model,
207 |                     choices=choices,
208 |                     usage=Usage(
209 |                         prompt_tokens=prompt_tokens, completion_tokens=generated_tokens, total_tokens=total_tokens
210 |                     ),
211 |                 )
212 |             )
213 | 
214 |     @classmethod
215 |     async def acreate(cls, *args, **kwargs):
216 |         """
217 |         Creates a new chat completion for the provided messages and parameters.
218 |         """
219 |         raise NotImplementedError("ChatCompletion.acreate is not implemented")
220 | 


--------------------------------------------------------------------------------
/easyllm/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philschmid/easyllm/1f37a9307d68545f41639f392a301baadda7188a/easyllm/data/__init__.py


--------------------------------------------------------------------------------
/easyllm/data/extractor/__init__.py:
--------------------------------------------------------------------------------
1 | from easyllm.data.extractor.html_extractor import HtmlExtractor
2 | 


--------------------------------------------------------------------------------
/easyllm/data/extractor/html_extractor.py:
--------------------------------------------------------------------------------
 1 | #
 2 | from inscriptis import get_text
 3 | from inscriptis.css_profiles import CSS_PROFILES
 4 | from inscriptis.model.config import ParserConfig
 5 | from pydantic import BaseModel
 6 | from readability import Document
 7 | 
 8 | INSCRIPTIS_CONFIG = ParserConfig(css=CSS_PROFILES["strict"])
 9 | 
10 | 
11 | class HtmlExtractor(BaseModel):
12 |     """
13 |     Desc: Extracts text from the HTML document using mozzilas readability and inscriptis.
14 |     """
15 | 
16 |     name: str = "html_extractor"
17 |     min_doc_length: int = 25
18 | 
19 |     def __call__(self, document: str) -> str:
20 |         parsed_doc = Document(document, min_text_length=self.min_doc_length)
21 |         clean_html = parsed_doc.summary(html_partial=True)
22 |         content = get_text(clean_html, INSCRIPTIS_CONFIG).strip()
23 |         return content
24 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/__init__.py:
--------------------------------------------------------------------------------
 1 | from easyllm.data.filters.bulletpoint_ratio import BulletpointRatioFilter
 2 | from easyllm.data.filters.common_word import CommonWordFilter
 3 | from easyllm.data.filters.digit_to_character import DigitToCharacter
 4 | from easyllm.data.filters.kenlm_ppl import PerplexityFilter
 5 | from easyllm.data.filters.length import LengthFilter
 6 | from easyllm.data.filters.longword import LongWordFilter
 7 | from easyllm.data.filters.n_gram import TopNGramsFilter
 8 | from easyllm.data.filters.non_alpha_numeric import NonAlphaNumericFilter
 9 | from easyllm.data.filters.parantheses_ration import ParenthesesRationFilter
10 | from easyllm.data.filters.punctuation import EllipsisFilter, PunctuationFilter
11 | from easyllm.data.filters.repeating import RepeatedLinesFilter, RepeatedParagraphFilter
12 | from easyllm.data.filters.url_ratio import UrlRatioFilter
13 | from easyllm.data.filters.whitespace_ration import WhitespaceRatioFilter
14 | from easyllm.data.filters.words_to_symbol import SymbolToWordFilter
15 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/bulletpoint_ratio.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class BulletpointRatioFilter(BaseModel):
 7 |     """
 8 |     Ref: Gopher (Rae et al., 2021)
 9 |     Desc: If more than 90% of the document are bulletpoints then remove
10 |     """
11 | 
12 |     name: str = "bulletpoint_ratio"
13 |     potential_bullet_points: List[str] = [
14 |         "•",
15 |         "‣",
16 |         "⁃",
17 |         "⁌",
18 |         "⁍",
19 |         "∙",
20 |         "○",
21 |         "●",
22 |         "◘",
23 |         "◦",
24 |         "⦾",
25 |         "⦿",
26 |         "-",
27 |     ]
28 |     remove_percentage: float = 0.9
29 | 
30 |     def __call__(self, text):
31 |         # split text into lines
32 |         lines = text.split("\n")
33 |         num_bullet_points = 0
34 |         for line in lines:
35 |             # check if the line is a bullet point
36 |             if line.startswith(tuple(self.potential_bullet_points)):
37 |                 num_bullet_points += 1
38 |         # check if the ratio of bullet points to lines is greater than the remove percentage
39 |         if num_bullet_points / len(lines) > self.remove_percentage:
40 |             return True
41 |         # otherwise keep
42 |         return False
43 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/common_word.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | COMMON_WORDS_EN = ["the", "be", "to", "of", "and", "that", "have", "with", "this"]
 6 | COMMON_WORDS_DE = ["der", "die", "das", "er" "sein", "zu", "ist", "war", "von", "und", "haben", "mit"]
 7 | 
 8 | 
 9 | class CommonWordFilter(BaseModel):
10 |     """
11 |     Ref: Gopher (Rae et al., 2021)
12 |     Desc: Makes sure that the document contains at least 2 common words if not remove
13 |     """
14 | 
15 |     name: str = "common_word"
16 |     common_words: List[str] = COMMON_WORDS_EN
17 |     n: int = 2
18 | 
19 |     def __call__(self, text):
20 |         words = text.split()
21 |         common_word_counter = 0
22 |         # count the number of common words
23 |         for word in words:
24 |             if word.lower() in self.common_words:
25 |                 common_word_counter += 1
26 |             if common_word_counter >= self.n:
27 |                 return False
28 |         # otherwise remove
29 |         return True
30 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/cookie_banner.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | policy_substrings = [
 6 |     "terms of use",
 7 |     "privacy policy",
 8 |     "cookie policy",
 9 |     "uses cookies",
10 |     "privacy overview",
11 |     "use of cookies",
12 |     "use cookies",
13 |     "privacy & cookies policy",
14 |     "privacy and cookies policy",
15 |     "This website uses cookies to improve your experience while you "
16 |     "navigate through the website. Out of these cookies, the cookies "
17 |     "that are categorized as necessary are stored on your browser as they "
18 |     "are essential for the working of basic functionalities of the website. "
19 |     "We also use third-party cookies that help us analyze and understand how "
20 |     "you use this website. These cookies will be stored in your browser only "
21 |     "with your consent. You also have the option to opt-out of these "
22 |     "cookies. But opting out of some of these cookies may have an effect "
23 |     "on your browsing experience.".lower(),
24 |     "Necessary cookies are absolutely essential for the website to "
25 |     "function properly. This category only includes cookies that "
26 |     "ensures basic functionalities and security features of the website. "
27 |     "These cookies do not store any personal information.".lower(),
28 |     "Any cookies that may not be particularly necessary for the website "
29 |     "to function and is used specifically to collect user personal data "
30 |     "via analytics, ads, other embedded contents are termed as non-necessary "
31 |     "cookies. It is mandatory to procure user consent prior to running these "
32 |     "cookies on your website.".lower(),
33 |     "This site uses cookies, including for analytics, personalization, and "
34 |     "advertising purposes. For more information or to change your "
35 |     "cookie settings, click here.".lower(),
36 |     "If you continue to browse this site without changing your cookie "
37 |     "settings, you agree to this use. AcceptRead More".lower(),
38 | ]
39 | 
40 | 
41 | class CookieBannerFilter(BaseModel):
42 |     """
43 |     Ref: C4 Raffel et al.
44 |     Desc: Removes documents if more than 40% of the documents include terms for cookies, tos, privacy policy, etc. Requires external list.
45 |     """
46 | 
47 |     name: str = "cookie_banner"
48 |     regex: re.Pattern = re.compile(r"(terms of use|privacy policy|copyright|all rights reserved)", re.IGNORECASE)
49 |     remove_percentage: float = 0.4
50 | 
51 |     def __call__(self, text):
52 |         # check if the regex matches
53 |         raise NotImplementedError("CookieBannerFilter not implemented yet")
54 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/digit_to_character.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class DigitToCharacter(BaseModel):
 7 |     """
 8 |     Desc: If more than 20% of the document are digits then remove
 9 |     """
10 | 
11 |     name: str = "digit_to_character"
12 |     remove_percentage: float = 0.2
13 | 
14 |     def __call__(self, text):
15 |         digits = re.findall(r"\d", text)
16 |         num_digits = len(digits)
17 |         total_chars = len(text)
18 |         # check if there are any characters in the text
19 |         if num_digits / total_chars > self.remove_percentage:
20 |             return True
21 |         # otherwise keep
22 |         return False
23 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/kenlm_ppl.py:
--------------------------------------------------------------------------------
  1 | import importlib.util
  2 | import re
  3 | import unicodedata
  4 | from typing import Dict
  5 | 
  6 | from huggingface_hub import hf_hub_download
  7 | from pydantic import BaseModel, ConfigDict
  8 | 
  9 | _kenlm = importlib.util.find_spec("kenlm") is not None
 10 | _sentencepiece = importlib.util.find_spec("sentencepiece") is not None
 11 | 
 12 | if _kenlm or not _sentencepiece:
 13 |     import kenlm
 14 |     import sentencepiece
 15 | 
 16 | 
 17 | class SentencePiece:
 18 |     def __init__(
 19 |         self,
 20 |         model: str,
 21 |     ):
 22 |         super().__init__()
 23 |         self.sp = sentencepiece.SentencePieceProcessor()
 24 |         self.sp.load(str(model))
 25 | 
 26 |     def do(self, text: dict) -> dict:
 27 |         tokenized = self.sp.encode_as_pieces(text)
 28 |         return " ".join(tokenized)
 29 | 
 30 | 
 31 | class KenlmModel:
 32 |     digit_re: re.Pattern[str] = re.compile(r"\d")
 33 |     unicode_punct: Dict[str, str] = {
 34 |         "，": ",",
 35 |         "。": ".",
 36 |         "、": ",",
 37 |         "„": '"',
 38 |         "”": '"',
 39 |         "“": '"',
 40 |         "«": '"',
 41 |         "»": '"',
 42 |         "１": '"',
 43 |         "」": '"',
 44 |         "「": '"',
 45 |         "《": '"',
 46 |         "》": '"',
 47 |         "´": "'",
 48 |         "∶": ":",
 49 |         "：": ":",
 50 |         "？": "?",
 51 |         "！": "!",
 52 |         "（": "(",
 53 |         "）": ")",
 54 |         "；": ";",
 55 |         "–": "-",
 56 |         "—": " - ",
 57 |         "．": ". ",
 58 |         "～": "~",
 59 |         "’": "'",
 60 |         "…": "...",
 61 |         "━": "-",
 62 |         "〈": "<",
 63 |         "〉": ">",
 64 |         "【": "[",
 65 |         "】": "]",
 66 |         "％": "%",
 67 |         "►": "-",
 68 |     }
 69 |     unicode_punct_re: re.Pattern = re.compile(f"[{''.join(unicode_punct.keys())}]")
 70 |     non_printing_chars_re: re.Pattern = re.compile(f"[{''.join(map(chr, list(range(0,32)) + list(range(127,160))))}]")
 71 |     model: kenlm.Model = None
 72 |     tokenizer: SentencePiece = None
 73 |     accent: bool = False
 74 |     case: bool = False
 75 |     numbers: bool = True
 76 |     punct: int = 1
 77 | 
 78 |     def __init__(
 79 |         self,
 80 |         model_path: str,
 81 |         tokenizer_path: str,
 82 |         lower_case: bool = False,
 83 |         remove_accents: bool = False,
 84 |         normalize_numbers: bool = True,
 85 |         punctuation: int = 1,
 86 |     ):
 87 |         self.model = kenlm.Model(model_path)
 88 |         self.tokenizer = SentencePiece(tokenizer_path)
 89 |         self.accent = remove_accents
 90 |         self.case = lower_case
 91 |         self.numbers = normalize_numbers
 92 |         self.punct = punctuation
 93 | 
 94 |     @classmethod
 95 |     def from_pretrained(
 96 |         cls,
 97 |         language_or_path: str,
 98 |     ):
 99 |         try:
100 |             model = hf_hub_download("philschmid/kenlm", filename=f"wikipedia/{language_or_path}.arpa.bin")
101 |             tokenizer = hf_hub_download("philschmid/kenlm", filename=f"wikipedia/{language_or_path}.sp.model")
102 |         except Exception:
103 |             raise ValueError(
104 |                 f"KenLM model for {language_or_path} not found at https://huggingface.co/philschmid/kenlm. Please train your own model and upload it to the hub."
105 |             ) from None
106 | 
107 |         return cls(
108 |             model,
109 |             tokenizer,
110 |             False,
111 |             False,
112 |             True,
113 |             1,
114 |         )
115 | 
116 |     def pp(self, log_score, length):
117 |         return 10.0 ** (-log_score / length)
118 | 
119 |     def get_perplexity(self, doc: str, normalize_cc_net: bool = True):
120 |         if normalize_cc_net:
121 |             doc = self.normalize(
122 |                 doc,
123 |                 accent=self.accent,
124 |                 case=self.case,
125 |                 numbers=self.numbers,
126 |                 punct=self.punct,
127 |             )
128 |         # Tokenize (after normalizing): See https://github.com/facebookresearch/cc_net/blob/bda555bd1cf1ee2e0b925363e62a61cd46c8b60d/cc_net/mine.py#L352 for full pipeline
129 |         doc = self.tokenizer.do(doc)
130 |         doc_log_score, doc_length = 0, 0
131 |         for line in doc.split("\n"):
132 |             log_score = self.model.score(line)
133 |             length = len(line.split()) + 1
134 |             doc_log_score += log_score
135 |             doc_length += length
136 |         return round(self.pp(doc_log_score, doc_length), 1)
137 | 
138 |     def normalize(
139 |         self,
140 |         line: str,
141 |         accent: bool = True,
142 |         case: bool = True,
143 |         numbers: bool = True,
144 |         punct: int = 1,
145 |     ) -> str:
146 |         line = line.strip()
147 |         if not line:
148 |             return line
149 |         if case:
150 |             line = line.lower()
151 |         if accent:
152 |             line = self.strip_accents(line)
153 |         if numbers:
154 |             line = self.digit_re.sub("0", line)
155 |         if punct == 1:
156 |             line = self.replace_unicode_punct(line)
157 |         elif punct == 2:
158 |             line = self.remove_unicode_punct(line)
159 |         line = self.remove_non_printing_char(line)
160 |         return line
161 | 
162 |     def strip_accents(self, line: str) -> str:
163 |         """Strips accents from a piece of text."""
164 |         nfd = unicodedata.normalize("NFD", line)
165 |         output = [c for c in nfd if unicodedata.category(c) != "Mn"]
166 |         if len(output) == line:
167 |             return line
168 |         return "".join(output)
169 | 
170 |     def replace_unicode_punct(self, text: str) -> str:
171 |         return "".join(self.unicode_punct.get(c, c) for c in text)
172 | 
173 |     def remove_unicode_punct(self, text: str) -> str:
174 |         """More aggressive version of replace_unicode_punct but also faster."""
175 |         return self.unicode_punct_re.sub("", text)
176 | 
177 |     def remove_non_printing_char(self, text: str) -> str:
178 |         return self.non_printing_chars_re.sub("", text)
179 | 
180 | 
181 | class PerplexityFilter(BaseModel):
182 |     model: KenlmModel = None
183 |     min_threshold: int = 0
184 |     max_threshold: int = 1000
185 |     model_config = ConfigDict(arbitrary_types_allowed=True)
186 | 
187 |     def __init__(self, language: str, min_threshold: int = 0, max_threshold: int = 1000):
188 |         super().__init__()
189 |         self.min_threshold = min_threshold
190 |         self.max_threshold = max_threshold
191 |         self.model = KenlmModel.from_pretrained(language)
192 | 
193 |     def __call__(self, doc: str) -> bool:
194 |         # returns True if the perplexity of the document outside of the threshold,
195 |         # meaning smaller than min_threshold or larger than max_threshold
196 |         perplexity = self.model.get_perplexity(doc)
197 |         if perplexity < self.min_threshold or perplexity > self.max_threshold:
198 |             return True
199 |         # otherwise keep
200 |         return False
201 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/length.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | class LengthFilter(BaseModel):
 6 |     """
 7 |     Desc: Removes documents below or above a certain length of words
 8 |     """
 9 | 
10 |     name: str = "length"
11 |     min_length: int = 10
12 |     max_length: int = 1_000_000
13 | 
14 |     def __call__(self, text):
15 |         num_words = len(text.split())
16 | 
17 |         if num_words < self.min_length or num_words > self.max_length:
18 |             return True
19 |         # otherwise keep
20 |         return False
21 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/longword.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from pydantic import BaseModel
 3 | 
 4 | 
 5 | class LongWordFilter(BaseModel):
 6 |     """
 7 |     Ref: C4 Raffel et al.
 8 |     Desc: If document includes words with > 1000 character are removed, e.g. js or minified files.
 9 |     """
10 | 
11 |     name: str = "long_word"
12 |     max_length: int = 1000
13 | 
14 |     def __call__(self, text):
15 |         words = text.split()
16 |         max_len = max(len(word) for word in words)
17 |         if max_len > self.max_length:
18 |             return True
19 |         # otherwise keep
20 |         return False
21 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/n_gram.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | from itertools import chain
 3 | 
 4 | from pydantic import BaseModel
 5 | 
 6 | 
 7 | def get_ngrams(input_list, n):
 8 |     return list(zip(*[input_list[i:] for i in range(n)]))
 9 | 
10 | 
11 | class TopNGramsFilter(BaseModel):
12 |     """
13 |     Ref: Gopher (Rae et al., 2021)
14 |     Desc: If the document shrinks by > 20% after removing top n-grams then remove
15 |     """
16 | 
17 |     name: str = "top_n_grams"
18 |     remove_percentage: float = 0.2
19 |     n: int = 2
20 | 
21 |     def __call__(self, text):
22 |         words = text.split()
23 |         if len(words) <= self.n:
24 |             return True
25 |         ngrams = get_ngrams(words, self.n)
26 |         n_grams = Counter(chain(ngrams))
27 |         most_common = n_grams.most_common(1)[0][0]
28 | 
29 |         if n_grams[most_common] / len(n_grams) > self.remove_percentage:
30 |             return True
31 |         # otherwise keep
32 |         return False
33 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/non_alpha_numeric.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class NonAlphaNumericFilter(BaseModel):
 7 |     """
 8 |     Ref: Gopher (Rae et al., 2021)
 9 |     Desc: If more than 20% of the document is non-alphanumeric then remove
10 |     """
11 | 
12 |     name: str = "non_alpha_numeric"
13 |     regex: re.Pattern = re.compile(r"[^a-zA-Z0-9\s]")
14 |     remove_percentage: float = 0.2
15 | 
16 |     def __call__(self, text):
17 |         num_characters = len(text)
18 |         # check if there are any characters in the text
19 |         if num_characters == 0:
20 |             return True
21 |         # calculate the percentage of non-alphanumeric characters
22 |         percentage = 1 - ((num_characters - len(self.regex.findall(text))) / num_characters)
23 |         # if the percentage is greater than the remove_percentage then remove
24 |         if percentage > self.remove_percentage:
25 |             return True
26 |         # otherwise keep
27 |         return False
28 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/parantheses_ration.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class ParenthesesRationFilter(BaseModel):
 7 |     """
 8 |     Desc: If more than 10% of the document are Parentheses then remove
 9 |     """
10 | 
11 |     name: str = "parentheses_ratio"
12 |     regex: re.Pattern = re.compile(r"\[|\]|\(|\)|{|}|⟨|⟩")
13 |     remove_percentage: float = 0.1
14 | 
15 |     def __call__(self, text):
16 |         # parentheses characters
17 |         parentheses_count = len(self.regex.findall(text))
18 |         sentence_length = len(text)
19 |         # check if the ratio of parentheses to text is greater than the remove percentage
20 |         if parentheses_count / sentence_length > self.remove_percentage:
21 |             return True
22 |         # otherwise keep
23 |         return False
24 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/punctuation.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class PunctuationFilter(BaseModel):
 7 |     """
 8 |     Ref: C4 Raffel et al.
 9 |     Desc: If less than 15% of the sentences end with a punctuation mark then remove
10 |     """
11 | 
12 |     name: str = "punctuation"
13 |     punctuations: List[str] = [".", "!", "?"]
14 |     remove_percentage: float = 0.15
15 | 
16 |     def __call__(self, text):
17 |         sentences = text.split("\n")
18 |         # count the number of sentences ending with a punctuation mark
19 |         punc_counter = 0
20 |         for sentence in sentences:
21 |             for punc in self.punctuations:
22 |                 if sentence.endswith(punc):
23 |                     punc_counter += 1
24 |                     break
25 |         # check if the ratio of sentences not ending with a punctuation mark is greater than the remove percentage
26 |         if punc_counter / len(sentences) < self.remove_percentage:
27 |             return True
28 |         # otherwise keep
29 |         return False
30 | 
31 | 
32 | class EllipsisFilter(BaseModel):
33 |     """
34 |     Ref: C4 Raffel et al.
35 |     Desc: If more than 30% of the sentences endwith an elipsis then remove
36 |     """
37 | 
38 |     name: str = "ellipsis"
39 |     ellipsis: List[str] = ["...", "[...]", "…", "(...)", "[…]", "-»", "read more..", "read more"]
40 |     remove_percentage: float = 0.3
41 | 
42 |     def __call__(self, text):
43 |         sentences = text.split("\n")
44 |         # count the number of sentences ending with an ellipsis
45 |         ellipsis_counter = 0
46 |         for sentence in sentences:
47 |             for ellipsis in self.ellipsis:
48 |                 if sentence.endswith(ellipsis):
49 |                     ellipsis_counter += 1
50 |                     break
51 |         # check if the ratio of sentences ending with an ellipsis is greater than the remove percentage
52 |         if ellipsis_counter / len(sentences) > self.remove_percentage:
53 |             return True
54 |         # otherwise keep
55 |         return False
56 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/repeating.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | 
 4 | class RepeatedLinesFilter(BaseModel):
 5 |     """
 6 |     Ref: Gopher (Rae et al., 2021)
 7 |     Desc: If the document shrinks by > 30% after removing repeated lines then remove
 8 |     """
 9 | 
10 |     name: str = "repeated_lines"
11 |     remove_percentage: float = 0.3
12 | 
13 |     def __call__(self, text):
14 |         # split the text into lines
15 |         lines = text.split("\n")
16 |         # remove empty lines
17 |         lines = [line for line in lines if line.strip()]
18 |         if len(lines) == 0:
19 |             return True
20 |         # remove repeated lines
21 |         unique_lines = list(set(lines))
22 |         # calculate the percentage of lines removed
23 |         if len(unique_lines) / len(lines) < self.remove_percentage:
24 |             return True
25 |         # otherwise keep
26 |         return False
27 | 
28 | 
29 | class RepeatedParagraphFilter(BaseModel):
30 |     """
31 |     Ref: Gopher (Rae et al., 2021)
32 |     Desc: If the document shrinks by > 30% after removing repeated paragraphs then remove
33 |     """
34 | 
35 |     name: str = "repeated_paragraph"
36 |     remove_percentage: float = 0.3
37 | 
38 |     def __call__(self, text):
39 |         # split the text into lines
40 |         paragraphes = text.split("\n\n")
41 |         # remove empty paragraph
42 |         paragraphes = [p for p in paragraphes if p.strip()]
43 |         if len(paragraphes) == 0:
44 |             return True
45 |         # remove repeated paragraphes
46 |         unique_paragraphes = list(set(paragraphes))
47 |         # calculate the percentage of lines removed
48 |         if len(unique_paragraphes) / len(paragraphes) < self.remove_percentage:
49 |             return True
50 |         # otherwise keep
51 |         return False
52 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/url_ratio.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class UrlRatioFilter(BaseModel):
 7 |     """
 8 |     Desc: If more than 20% of the document are urls then remove
 9 |     """
10 | 
11 |     name: str = "url_ratio"
12 |     regex: re.Pattern[
13 |         str
14 |     ] = r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)"
15 |     remove_percentage: float = 0.2
16 | 
17 |     def __call__(self, text):
18 |         # find all urls
19 |         urls = re.findall(self.regex, text)
20 |         # check if the ratio of urls to words is greater than the remove percentage
21 |         if len(urls) / len(text.split()) > self.remove_percentage:
22 |             return True
23 |         # otherwise keep
24 |         return False
25 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/whitespace_ration.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class WhitespaceRatioFilter(BaseModel):
 7 |     """
 8 |     Desc: If more than 25% of the document are bulletpoints then remove
 9 |     """
10 | 
11 |     name: str = "whitespace_ratio"
12 |     regex: re.Pattern = re.compile(r"\s")
13 |     remove_percentage: float = 0.25
14 | 
15 |     def __call__(self, text):
16 |         # whitespace characters
17 |         whitespace_count = len(self.regex.findall(text))
18 |         text_length = len(text)
19 |         # check if the ratio of whitespace to text is greater than the remove percentage
20 |         if whitespace_count / text_length > self.remove_percentage:
21 |             return True
22 |         # otherwise keep
23 |         return False
24 | 


--------------------------------------------------------------------------------
/easyllm/data/filters/words_to_symbol.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class SymbolToWordFilter(BaseModel):
 7 |     """
 8 |     Ref: Gopher (Rae et al., 2021)
 9 |     Desc: If more than 10% of the document are symbols (hashes [#] or ellipsis (...)) then remove
10 |     """
11 | 
12 |     name: str = "symbol_to_word"
13 |     regex: re.Pattern = r"(\#+|(\.{3,}))(?!\w)"
14 |     remove_percentage: float = 0.1
15 | 
16 |     def __call__(self, text: str):
17 |         num_hashes = len(re.findall(r"\#+", text))
18 |         num_ellipses = len(re.findall(r"\.{3,}", text))
19 |         num_words = len(re.findall(r"\w+", text))
20 | 
21 |         # check if there are any words in the text
22 |         if num_words == 0:
23 |             return True
24 | 
25 |         hash_ratio = num_hashes / num_words
26 |         ellipses_ratio = num_ellipses / num_words
27 | 
28 |         # if the percentage is greater than the remove_percentage then remove
29 |         if hash_ratio > self.remove_percentage or ellipses_ratio > self.remove_percentage:
30 |             return True
31 | 
32 |         # otherwise keep
33 |         return False
34 | 


--------------------------------------------------------------------------------
/easyllm/evol_instruct/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philschmid/easyllm/1f37a9307d68545f41639f392a301baadda7188a/easyllm/evol_instruct/__init__.py


--------------------------------------------------------------------------------
/easyllm/prompt_utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from easyllm.prompt_utils.anthropic import anthropic_stop_sequences, build_anthropic_prompt
 2 | 
 3 | from .chatml_hf import (
 4 |     build_chatml_falcon_prompt,
 5 |     build_chatml_starchat_prompt,
 6 |     chatml_falcon_stop_sequences,
 7 |     chatml_starchat_stop_sequences,
 8 | )
 9 | from .falcon import build_falcon_prompt
10 | from .llama2 import build_llama2_prompt, llama2_stop_sequences
11 | from .open_assistant import build_open_assistant_prompt, open_assistant_stop_sequences
12 | from .stablebeluga import build_stablebeluga_prompt, stablebeluga_stop_sequences
13 | from .vicuna import build_vicuna_prompt, vicuna_stop_sequences
14 | from .wizardlm import build_wizardlm_prompt, wizardlm_stop_sequences
15 | 
16 | PROMPT_MAPPING = {
17 |     "chatml_falcon": build_chatml_falcon_prompt,
18 |     "chatml_starchat": build_chatml_starchat_prompt,
19 |     "llama2": build_llama2_prompt,
20 |     "open_assistant": build_open_assistant_prompt,
21 |     "stablebeluga": build_stablebeluga_prompt,
22 |     "vicuna": build_vicuna_prompt,
23 |     "wizardlm": build_wizardlm_prompt,
24 |     "falcon": build_falcon_prompt,
25 |     "anthropic": build_anthropic_prompt,
26 | }
27 | 


--------------------------------------------------------------------------------
/easyllm/prompt_utils/anthropic.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | from easyllm.schema.base import ChatMessage
 4 | 
 5 | # Define stop sequences for anthropic
 6 | anthropic_stop_sequences = ["\n\nUser:", "User:"]
 7 | 
 8 | 
 9 | def build_anthropic_prompt(messages: Union[List[Dict[str, str]], str, List[ChatMessage]]) -> str:
10 |     """
11 |     Builds a anthropic prompt for a chat conversation. refrence https://huggingface.co/blog/anthropic-180b#prompt-format
12 | 
13 |     Args:
14 |         messages (Union[List[ChatMessage], str]): The messages to use for the completion.
15 |     Returns:
16 |         str: The anthropic prompt string.
17 |     """
18 |     ANTHROPIC_USER_TOKEN = "\n\nHuman:"
19 |     ANTHROPIC_ASSISTANT_TOKEN = "\n\nAssistant:"
20 | 
21 |     conversation = []
22 | 
23 |     if isinstance(messages, str):
24 |         messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")]
25 |     else:
26 |         if isinstance(messages[0], dict):
27 |             messages = [ChatMessage(**message) for message in messages]
28 | 
29 |     for index, message in enumerate(messages):
30 |         if message.role == "user":
31 |             conversation.append(f"{ANTHROPIC_USER_TOKEN} {message.content.strip()}")
32 |         elif message.role == "assistant":
33 |             conversation.append(f"{ANTHROPIC_ASSISTANT_TOKEN} {message.content.strip()}")
34 |         elif message.role == "function":
35 |             raise ValueError("anthropic does not support function calls.")
36 |         elif message.role == "system" and index == 0:
37 |             conversation.append(message.content)
38 |         else:
39 |             raise ValueError(f"Invalid message role: {message.role}")
40 | 
41 |     return "".join(conversation) + ANTHROPIC_ASSISTANT_TOKEN + " "
42 | 


--------------------------------------------------------------------------------
/easyllm/prompt_utils/base.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union
 2 | 
 3 | from easyllm.prompt_utils import PROMPT_MAPPING
 4 | from easyllm.schema.base import ChatMessage
 5 | from easyllm.utils import setup_logger
 6 | 
 7 | logger = setup_logger()
 8 | 
 9 | 
10 | def buildBasePrompt(messages: List[ChatMessage]) -> str:
11 |     conversation = []
12 | 
13 |     for index, message in enumerate(messages):
14 |         if message.role == "user":
15 |             conversation.append(f"USER: {message.content.strip()}")
16 |         elif message.role == "assistant":
17 |             conversation.append(f"ASSISTANT: {message.content}")
18 |         elif message.role == "function":
19 |             raise ValueError("Llama 2 does not support function calls.")
20 |         elif message.role == "system" and index == 0:
21 |             conversation.append(message.content)
22 |         else:
23 |             raise ValueError(f"Invalid message role: {message.role}")
24 | 
25 |     return "".join(conversation)
26 | 
27 | 
28 | def build_prompt(messages: List[ChatMessage], builder: Union[str, callable]) -> str:
29 |     """
30 |     Tries to find the prompt builder in the PROMPT_MAPPING and returns a formatted prompt.
31 |     """
32 |     if isinstance(builder, str):
33 |         prompt_builder = PROMPT_MAPPING.get(builder, None)
34 |         if prompt_builder is None:
35 |             raise ValueError(
36 |                 f"Prompt builder {builder} not found. Are you sure you spelled it correctly? \
37 | Available prompt builders are: {PROMPT_MAPPING.keys()}. \
38 | You can open an issue or PR to add more prompt builders at https://github.com/philschmid/easyllm"
39 |             )
40 |         prompt = prompt_builder(messages)
41 |     else:
42 |         prompt = builder(messages)
43 | 
44 |     logger.debug(f"Prompt sent to model will be:\n{prompt}")
45 |     return prompt
46 | 


--------------------------------------------------------------------------------
/easyllm/prompt_utils/chatml_hf.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | from easyllm.schema.base import ChatMessage
 4 | 
 5 | chatml_falcon_stop_sequences = ["<|endoftext|>"]
 6 | 
 7 | 
 8 | def build_chatml_falcon_prompt(messages: Union[List[Dict[str, str]], str]) -> str:
 9 |     EOS_TOKEN = "<|endoftext|>"
10 |     return build_chatml_hf_prompt(messages, EOS_TOKEN)
11 | 
12 | 
13 | chatml_starchat_stop_sequences = ["<|end|>"]
14 | 
15 | 
16 | def build_chatml_starchat_prompt(messages: Union[List[Dict[str, str]], str]) -> str:
17 |     EOS_TOKEN = "<|end|>"
18 |     return build_chatml_hf_prompt(messages, EOS_TOKEN)
19 | 
20 | 
21 | def build_chatml_hf_prompt(messages: Union[List[Dict[str, str]], str], EOS_TOKEN="<|end|>") -> str:
22 |     """
23 |     Uses HuggingFaceH4 ChatML template used to in Models like, StarChat or Falcon. Uses <|user|>, <|end|>, <|system|>, and <|assistant> tokens. If a Message with an unsupported role is passed, an error will be thrown.
24 |     <|system|>\nYou are a chat bot.<|end|>\n<|user|>\nHello!<|end|>\n<|assistant|>\nHi there!<|end|>\n<|assistant|>
25 |     Args:
26 |         messages (:obj:`List[ChatMessage]`): The messages to use for the completion.
27 |     """
28 | 
29 |     SYSTEM_TOKEN = "<|system|>"
30 |     USER_TOKEN = "<|user|>"
31 |     ASSISTANT_TOKEN = "<|assistant|>"
32 |     conversation = []
33 | 
34 |     if isinstance(messages, str):
35 |         messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")]
36 |     else:
37 |         if isinstance(messages[0], dict):
38 |             messages = [ChatMessage(**message) for message in messages]
39 | 
40 |     for index, message in enumerate(messages):
41 |         if message.role == "user":
42 |             conversation.append(f"{USER_TOKEN}\n{message.content.strip()}{EOS_TOKEN}\n")
43 |         elif message.role == "assistant":
44 |             conversation.append(f"{ASSISTANT_TOKEN}\n{message.content.strip()}{EOS_TOKEN}\n")
45 |         elif message.role == "function":
46 |             raise ValueError("HF ChatML does not support function calls.")
47 |         elif message.role == "system" and index == 0:
48 |             conversation.append(f"{SYSTEM_TOKEN}\n{message.content.strip()}{EOS_TOKEN}\n")
49 |         else:
50 |             raise ValueError(f"Invalid message role: {message.role}")
51 | 
52 |     return "".join(conversation) + ASSISTANT_TOKEN
53 | 


--------------------------------------------------------------------------------
/easyllm/prompt_utils/falcon.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | from easyllm.schema.base import ChatMessage
 4 | 
 5 | # Define stop sequences for falcon
 6 | falcon_stop_sequences = ["\nUser:", "<|endoftext|>", " User:", "###"]
 7 | 
 8 | 
 9 | def build_falcon_prompt(messages: Union[List[Dict[str, str]], str, List[ChatMessage]]) -> str:
10 |     """
11 |     Builds a falcon prompt for a chat conversation. refrence https://huggingface.co/blog/falcon-180b#prompt-format
12 | 
13 |     Args:
14 |         messages (Union[List[ChatMessage], str]): The messages to use for the completion.
15 |     Returns:
16 |         str: The falcon prompt string.
17 |     """
18 |     FALCON_SYSTEM_TOKEN = "System: "
19 |     FALCON_USER_TOKEN = "User: "
20 |     FALCON_ASSISTANT_TOKEN = "Falcon: "
21 | 
22 |     conversation = []
23 | 
24 |     if isinstance(messages, str):
25 |         messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")]
26 |     else:
27 |         if isinstance(messages[0], dict):
28 |             messages = [ChatMessage(**message) for message in messages]
29 | 
30 |     for index, message in enumerate(messages):
31 |         if message.role == "user":
32 |             conversation.append(f"{FALCON_USER_TOKEN}{message.content.strip()}\n")
33 |         elif message.role == "assistant":
34 |             conversation.append(f"{FALCON_ASSISTANT_TOKEN}{message.content.strip()}\n")
35 |         elif message.role == "function":
36 |             raise ValueError("falcon does not support function calls.")
37 |         elif message.role == "system" and index == 0:
38 |             conversation.append(f"{FALCON_SYSTEM_TOKEN}{message.content}\n")
39 |         else:
40 |             raise ValueError(f"Invalid message role: {message.role}")
41 | 
42 |     return "".join(conversation) + FALCON_ASSISTANT_TOKEN
43 | 


--------------------------------------------------------------------------------
/easyllm/prompt_utils/llama2.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | from easyllm.schema.base import ChatMessage
 4 | 
 5 | llama2_stop_sequences = ["</s>"]
 6 | 
 7 | 
 8 | def build_llama2_prompt(messages: Union[List[Dict[str, str]], str]) -> str:
 9 |     """
10 |     Uses LLama 2 chat tokens (`[INST]`) to create a prompt, learn more in the [Hugging Face Blog on how to prompt Llama 2](https://huggingface.co/blog/llama2#how-to-prompt-llama-2). If a `Message` with an unsupported `role` is passed, an error will be thrown.
11 |     Args:
12 |         messages (:obj:`List[ChatMessage]`): The messages to use for the completion.
13 |     """
14 | 
15 |     startPrompt = "<s>[INST] "
16 |     endPrompt = " [/INST]"
17 |     conversation = []
18 | 
19 |     if isinstance(messages, str):
20 |         messages = [ChatMessage(content=messages, role="user")]
21 |     else:
22 |         if isinstance(messages[0], dict):
23 |             messages = [ChatMessage(**message) for message in messages]
24 | 
25 |     for index, message in enumerate(messages):
26 |         if message.role == "user":
27 |             conversation.append(message.content.strip())
28 |         elif message.role == "assistant":
29 |             conversation.append(f" [/INST] {message.content} </s><s>[INST] ")
30 |         elif message.role == "function":
31 |             raise ValueError("Llama 2 does not support function calls.")
32 |         elif message.role == "system" and index == 0:
33 |             conversation.append(f"<<SYS>>\n{message.content}\n<</SYS>>\n\n")
34 |         else:
35 |             raise ValueError(f"Invalid message role: {message.role}")
36 | 
37 |     return startPrompt + "".join(conversation) + endPrompt
38 | 


--------------------------------------------------------------------------------
/easyllm/prompt_utils/open_assistant.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | from easyllm.schema.base import ChatMessage
 4 | 
 5 | open_assistant_stop_sequences = ["</s>"]
 6 | 
 7 | 
 8 | def build_open_assistant_prompt(messages: Union[List[Dict[str, str]], str], EOS_TOKEN="<|end|>") -> str:
 9 |     """
10 |     Uses Open Assistant ChatML template used to in Models. Uses <|prompter|>, </s>, <|system|>, and <|assistant> tokens. If a Message with an unsupported role is passed, an error will be thrown.
11 |     <|system|>system message</s><|prompter|>user prompt</s><|assistant|>
12 |     Args:
13 |         messages (:obj:`List[ChatMessage]`): The messages to use for the completion.
14 |     """
15 | 
16 |     SYSTEM_TOKEN = "<|system|>"
17 |     USER_TOKEN = "<|prompter|>"
18 |     ASSISTANT_TOKEN = "<|assistant|>"
19 |     EOS_TOKEN = "</s>"
20 |     conversation = []
21 | 
22 |     if isinstance(messages, str):
23 |         messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")]
24 |     else:
25 |         if isinstance(messages[0], dict):
26 |             messages = [ChatMessage(**message) for message in messages]
27 | 
28 |     for index, message in enumerate(messages):
29 |         if message.role == "user":
30 |             conversation.append(f"{USER_TOKEN}{message.content.strip()}{EOS_TOKEN}")
31 |         elif message.role == "assistant":
32 |             conversation.append(f"{ASSISTANT_TOKEN}{message.content.strip()}{EOS_TOKEN}")
33 |         elif message.role == "function":
34 |             raise ValueError("Open Assistant does not support function calls.")
35 |         elif message.role == "system" and index == 0:
36 |             conversation.append(f"{SYSTEM_TOKEN}{message.content.strip()}{EOS_TOKEN}")
37 |         else:
38 |             raise ValueError(f"Invalid message role: {message.role}")
39 | 
40 |     return "".join(conversation) + ASSISTANT_TOKEN
41 | 


--------------------------------------------------------------------------------
/easyllm/prompt_utils/stablebeluga.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | from easyllm.schema.base import ChatMessage
 4 | 
 5 | # Define stop sequences for stablebeluga
 6 | stablebeluga_stop_sequences = ["</s>"]
 7 | 
 8 | 
 9 | def build_stablebeluga_prompt(messages: Union[List[Dict[str, str]], str]) -> str:
10 |     """
11 |     Builds a stablebeluga prompt for a chat conversation. refrence https://huggingface.co/stabilityai/StableBeluga2 or
12 | 
13 |     Args:
14 |         messages (Union[List[ChatMessage], str]): The messages to use for the completion.
15 |     Returns:
16 |         str: The stablebeluga prompt string.
17 |     """
18 |     SYSTEM_TOKEN = "### System:"
19 |     USER_TOKEN = "### User:"
20 |     ASSISTANT_TOKEN = "### Assistant:"
21 | 
22 |     conversation = []
23 | 
24 |     if isinstance(messages, str):
25 |         messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")]
26 |     else:
27 |         if isinstance(messages[0], dict):
28 |             messages = [ChatMessage(**message) for message in messages]
29 | 
30 |     for index, message in enumerate(messages):
31 |         if message.role == "user":
32 |             conversation.append(f"{USER_TOKEN}\n{message.content.strip()}\n\n")
33 |         elif message.role == "assistant":
34 |             conversation.append(f"{ASSISTANT_TOKEN}\n{message.content.strip()}\n\n")
35 |         elif message.role == "function":
36 |             raise ValueError("stablebeluga does not support function calls.")
37 |         elif message.role == "system" and index == 0:
38 |             conversation.append(f"{SYSTEM_TOKEN}\n{message.content.strip()}\n\n")
39 |         else:
40 |             raise ValueError(f"Invalid message role: {message.role}")
41 | 
42 |     return "".join(conversation) + ASSISTANT_TOKEN
43 | 


--------------------------------------------------------------------------------
/easyllm/prompt_utils/vicuna.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | from easyllm.schema.base import ChatMessage
 4 | 
 5 | # Define stop sequences for Vicuna
 6 | vicuna_stop_sequences = ["</s>"]
 7 | 
 8 | 
 9 | def build_vicuna_prompt(messages: Union[List[Dict[str, str]], str]) -> str:
10 |     """
11 |     Builds a Vicuna prompt for a chat conversation. refrence https://github.com/lm-sys/FastChat/blob/main/docs/vicuna_weights_version.md#prompt-template
12 | 
13 |     Args:
14 |         messages (Union[List[ChatMessage], str]): The messages to use for the completion.
15 |     Returns:
16 |         str: The Vicuna prompt string.
17 |     """
18 |     VICUNA_EOS_TOKEN = "</s>"
19 |     VICUNA_USER_TOKEN = "USER: "
20 |     VICUNA_ASSISTANT_TOKEN = "ASSISTANT: "
21 | 
22 |     conversation = []
23 | 
24 |     if isinstance(messages, str):
25 |         messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")]
26 |     else:
27 |         if isinstance(messages[0], dict):
28 |             messages = [ChatMessage(**message) for message in messages]
29 | 
30 |     for index, message in enumerate(messages):
31 |         if message.role == "user":
32 |             conversation.append(f"{VICUNA_USER_TOKEN}{message.content.strip()}\n")
33 |         elif message.role == "assistant":
34 |             conversation.append(f"{VICUNA_ASSISTANT_TOKEN}{message.content.strip()}{VICUNA_EOS_TOKEN}\n")
35 |         elif message.role == "function":
36 |             raise ValueError("Vicuna does not support function calls.")
37 |         elif message.role == "system" and index == 0:
38 |             conversation.append(f"{message.content.strip()}\n\n")
39 |         else:
40 |             raise ValueError(f"Invalid message role: {message.role}")
41 | 
42 |     return "".join(conversation) + VICUNA_ASSISTANT_TOKEN
43 | 


--------------------------------------------------------------------------------
/easyllm/prompt_utils/wizardlm.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Union
 2 | 
 3 | from easyllm.schema.base import ChatMessage
 4 | 
 5 | # Define stop sequences for wizardlm
 6 | wizardlm_stop_sequences = ["</s>"]
 7 | 
 8 | 
 9 | def build_wizardlm_prompt(messages: Union[List[Dict[str, str]], str]) -> str:
10 |     """
11 |     Builds a WizardLM prompt for a chat conversation. refrence https://github.com/nlpxucan/WizardLM/blob/4af9edc59e412a49bba51cd1e8cfac2664e909e5/WizardLM/src/infer_wizardlm13b.py#L79
12 | 
13 |     Args:
14 |         messages (Union[List[ChatMessage], str]): The messages to use for the completion.
15 |     Returns:
16 |         str: The WizardLM prompt string.
17 |     """
18 |     WIZARDLM_USER_TOKEN = "USER: "
19 |     WIZARDLM_ASSISTANT_TOKEN = "ASSISTANT: "
20 | 
21 |     conversation = []
22 | 
23 |     if isinstance(messages, str):
24 |         messages = [ChatMessage(content="", role="system"), ChatMessage(content=messages, role="user")]
25 |     else:
26 |         if isinstance(messages[0], dict):
27 |             messages = [ChatMessage(**message) for message in messages]
28 | 
29 |     for index, message in enumerate(messages):
30 |         if message.role == "user":
31 |             conversation.append(f"{WIZARDLM_USER_TOKEN}{message.content.strip()}")
32 |         elif message.role == "assistant":
33 |             conversation.append(f"{WIZARDLM_ASSISTANT_TOKEN}{message.content.strip()}")
34 |         elif message.role == "function":
35 |             raise ValueError("WizardLM does not support function calls.")
36 |         elif message.role == "system" and index == 0:
37 |             conversation.append(f"{message.content.strip()}")
38 |         else:
39 |             raise ValueError(f"Invalid message role: {message.role}")
40 | 
41 |     return " ".join(conversation).lstrip() + " " + WIZARDLM_ASSISTANT_TOKEN
42 | 


--------------------------------------------------------------------------------
/easyllm/schema/base.py:
--------------------------------------------------------------------------------
 1 | import importlib.metadata
 2 | from typing import Literal, Optional
 3 | 
 4 | from packaging.version import parse
 5 | from pydantic import BaseModel
 6 | 
 7 | 
 8 | def dump_object(object):
 9 |     if parse(importlib.metadata.version("pydantic")) < parse("2.0.0"):
10 |         return object.dict()
11 |     else:
12 |         return object.model_dump(exclude_none=True)
13 | 
14 | 
15 | class ChatMessage(BaseModel):
16 |     role: Literal["user", "assistant", "function", "system"]
17 |     content: str
18 | 
19 | 
20 | class Usage(BaseModel):
21 |     prompt_tokens: int
22 |     completion_tokens: Optional[int] = None
23 |     total_tokens: int
24 | 


--------------------------------------------------------------------------------
/easyllm/schema/openai.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from typing import Any, Dict, List, Literal, Optional, Union
  3 | 
  4 | from nanoid import generate
  5 | from pydantic import BaseModel, Field
  6 | 
  7 | from easyllm.schema.base import ChatMessage, Usage
  8 | 
  9 | 
 10 | # More documentation https://platform.openai.com/docs/api-reference/chat/create
 11 | # adapted from https://github.com/lm-sys/FastChat/blob/main/fastchat/protocol/openai_api_protocol.py
 12 | class ChatCompletionRequest(BaseModel):
 13 |     messages: List[ChatMessage]
 14 |     model: Optional[str] = None
 15 |     temperature: float = 0.9
 16 |     top_p: float = 0.6
 17 |     top_k: Optional[int] = 10
 18 |     n: int = 1
 19 |     max_tokens: int = 1024
 20 |     stop: Optional[List[str]] = None
 21 |     stream: bool = False
 22 |     frequency_penalty: Optional[float] = 1.0
 23 |     user: Optional[str] = None
 24 | 
 25 | 
 26 | class ChatCompletionResponseChoice(BaseModel):
 27 |     index: int
 28 |     message: ChatMessage
 29 |     finish_reason: Optional[Literal["stop_sequence", "length", "eos_token", "max_tokens"]] = None
 30 | 
 31 | 
 32 | class ChatCompletionResponse(BaseModel):
 33 |     id: str = Field(default_factory=lambda: f"hf-{generate(size=10)}")
 34 |     object: str = "chat.completion"
 35 |     created: int = Field(default_factory=lambda: int(time.time()))
 36 |     model: Optional[str] = "custom"
 37 |     choices: List[ChatCompletionResponseChoice]
 38 |     usage: Usage
 39 | 
 40 | 
 41 | class DeltaMessage(BaseModel):
 42 |     role: Optional[str] = None
 43 |     content: Optional[str] = None
 44 | 
 45 | 
 46 | class ChatCompletionResponseStreamChoice(BaseModel):
 47 |     index: int
 48 |     delta: Union[DeltaMessage, Dict[str, str]]
 49 |     finish_reason: Optional[Literal["stop", "length"]] = None
 50 | 
 51 | 
 52 | class ChatCompletionStreamResponse(BaseModel):
 53 |     id: str = Field(default_factory=lambda: f"hf-{generate(size=10)}")
 54 |     object: str = "chat.completion.chunk"
 55 |     created: int = Field(default_factory=lambda: int(time.time()))
 56 |     model: Optional[str] = None
 57 |     choices: List[ChatCompletionResponseStreamChoice]
 58 | 
 59 | 
 60 | class CompletionRequest(BaseModel):
 61 |     model: Optional[str] = None
 62 |     prompt: Union[str, List[Any]]
 63 |     suffix: Optional[str] = None
 64 |     temperature: float = 0.9
 65 |     top_p: float = 0.6
 66 |     top_k: Optional[int] = 10
 67 |     n: int = 1
 68 |     max_tokens: int = 1024
 69 |     stop: Optional[List[str]] = None
 70 |     stream: bool = False
 71 |     frequency_penalty: Optional[float] = 1.0
 72 |     user: Optional[str] = None
 73 |     logprobs: bool = False
 74 |     echo: bool = False
 75 | 
 76 | 
 77 | class CompletionResponseChoice(BaseModel):
 78 |     index: int
 79 |     text: str
 80 |     logprobs: Union[Optional[List[Dict[str, Any]]], float] = None
 81 |     finish_reason: Optional[Literal["stop_sequence", "length", "eos_token"]] = None
 82 | 
 83 | 
 84 | class CompletionResponse(BaseModel):
 85 |     id: str = Field(default_factory=lambda: f"hf-{generate(size=10)}")
 86 |     object: str = "text.completion"
 87 |     created: int = Field(default_factory=lambda: int(time.time()))
 88 |     model: Optional[str] = "custom"
 89 |     choices: List[CompletionResponseChoice]
 90 |     usage: Usage
 91 | 
 92 | 
 93 | class CompletionResponseStreamChoice(BaseModel):
 94 |     index: int
 95 |     text: str
 96 |     logprobs: Optional[float] = None
 97 |     finish_reason: Optional[Literal["stop_sequence", "length", "eos_token"]] = None
 98 | 
 99 | 
100 | class CompletionStreamResponse(BaseModel):
101 |     id: str = Field(default_factory=lambda: f"hf-{generate(size=10)}")
102 |     object: str = "text.completion"
103 |     created: int = Field(default_factory=lambda: int(time.time()))
104 |     model: Optional[str] = "custom"
105 |     choices: List[CompletionResponseStreamChoice]
106 | 
107 | 
108 | class EmbeddingsRequest(BaseModel):
109 |     model: Optional[str] = None
110 |     input: Union[str, List[Any]]
111 |     user: Optional[str] = None
112 | 
113 | 
114 | class EmbeddingsObjectResponse(BaseModel):
115 |     index: int
116 |     object: str = "embedding"
117 |     embedding: List[float]
118 | 
119 | 
120 | class EmbeddingsResponse(BaseModel):
121 |     object: str = "list"
122 |     data: List[EmbeddingsObjectResponse]
123 |     model: Optional[str] = "custom"
124 |     usage: Usage
125 | 


--------------------------------------------------------------------------------
/easyllm/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from easyllm.utils.aws import AWSSigV4, get_bedrock_client
2 | from easyllm.utils.logging import setup_logger
3 | 


--------------------------------------------------------------------------------
/easyllm/utils/logging.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | 
 5 | 
 6 | def setup_logger() -> logging.Logger:
 7 |     # get local rank
 8 |     local_rank = int(os.environ.get("LOCAL_RANK", 0))
 9 | 
10 |     # create logger
11 |     logger = logging.getLogger(__name__)
12 | 
13 |     # Setup logging
14 |     logging.basicConfig(
15 |         format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
16 |         datefmt="%m/%d/%Y %H:%M:%S",
17 |         handlers=[logging.StreamHandler(sys.stdout)],
18 |     )
19 | 
20 |     if local_rank != 0:
21 |         # disable logging for non-master processes
22 |         print(f"Disabling logging for non-master process with local rank {local_rank}.")
23 |         logging.disable(logging.CRITICAL)
24 |         return logger
25 |     else:
26 |         log_level = logging.INFO
27 |         # set the main code and the modules it uses to the same log-level according to the node
28 |         logger.setLevel(log_level)
29 |         # datasets_logging.set_verbosity(log_level)
30 |         # trfs_logging.set_verbosity(log_level)
31 |         return logger
32 | 


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: style check test docs copy-docs docs-deploy
 2 | 
 3 | check_dirs := . 
 4 | 
 5 | style: 
 6 | 	ruff $(check_dirs) --fix
 7 | check: 
 8 | 	ruff $(check_dirs) 
 9 | test: 
10 | 	pytest
11 | 
12 | 
13 | copy-docs:
14 | 	cp -r notebooks/* docs/examples/
15 | 
16 | docs:
17 | 	$(MAKE) copy-docs
18 | 	mkdocs serve
19 | 
20 | docs-build:
21 | 	$(MAKE) copy-docs
22 | 	mkdocs build
23 | 
24 | docs-deploy:
25 | 	$(MAKE) copy-docs
26 | 	mkdocs gh-deploy --force


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | # Project information
  2 | site_name: EasyLLM
  3 | site_author: Philipp Schmid
  4 | site_url: https://philschmid.github.io/easyllm/
  5 | site_description: >-
  6 |   EasyLLM is an open source project that provides helpful tools and methods for working with large language models (LLMs), both open source and closed source. 
  7 | 
  8 | # Repository
  9 | repo_name: philschmid/easyllm
 10 | repo_url: https://github.com/philschmid/easyllm
 11 | 
 12 | # Copyright
 13 | copyright: Copyright &copy; 2023 Philipp Schmid
 14 | 
 15 | # mkdocs.yml
 16 | theme:
 17 |   name: 'material'
 18 |   features:
 19 |     - announce.dismiss
 20 |     - content.action.edit
 21 |     - content.action.view
 22 |     - content.code.annotate
 23 |     - content.code.copy
 24 |     # - content.code.select
 25 |     # - content.tabs.link
 26 |     - content.tooltips
 27 |     # - header.autohide
 28 |     - navigation.expand
 29 |     - navigation.footer
 30 |     - navigation.indexes
 31 |     # - navigation.instant
 32 |     # - navigation.prune
 33 |     - navigation.sections
 34 |     - navigation.tabs
 35 |     - navigation.tabs.sticky
 36 |     - navigation.top
 37 |     - navigation.tracking
 38 |     - navigation.path
 39 |     - search.highlight
 40 |     - search.share
 41 |     - search.suggest
 42 |     - toc.follow
 43 |     # - toc.integrate
 44 |   palette:
 45 |     - scheme: default
 46 |       primary: teal
 47 |       accent: teal
 48 |       toggle:
 49 |         icon: material/brightness-7
 50 |         name: Switch to dark mode
 51 |     - scheme: slate
 52 |       primary: teal
 53 |       accent: teal
 54 |       toggle:
 55 |         icon: material/brightness-4
 56 |         name: Switch to light mode
 57 |   font:
 58 |     text: Roboto
 59 |     code: Roboto Mono
 60 | 
 61 | plugins:
 62 |   - search
 63 |   - tags
 64 |   # - social
 65 |   - mkdocs-jupyter:
 66 |       include: ["examples/*.ipynb"] # Default: ["*.py", "*.ipynb"]
 67 |   - mkdocstrings
 68 | # Extensions
 69 | markdown_extensions:
 70 |   - abbr
 71 |   - admonition
 72 |   - attr_list
 73 |   - def_list
 74 |   - footnotes
 75 |   - md_in_html
 76 |   - toc:
 77 |       permalink: true
 78 |   - pymdownx.arithmatex:
 79 |       generic: true
 80 |   - pymdownx.betterem:
 81 |       smart_enable: all
 82 |   - pymdownx.caret
 83 |   - pymdownx.details
 84 |   - pymdownx.emoji:
 85 |       emoji_generator: !!python/name:materialx.emoji.to_svg
 86 |       emoji_index: !!python/name:materialx.emoji.twemoji
 87 |   - pymdownx.highlight:
 88 |       anchor_linenums: true
 89 |       line_spans: __span
 90 |       pygments_lang_class: true
 91 |   - pymdownx.inlinehilite
 92 |   - pymdownx.keys
 93 |   - pymdownx.magiclink:
 94 |       repo_url_shorthand: true
 95 |       user: squidfunk
 96 |       repo: mkdocs-material
 97 |   - pymdownx.mark
 98 |   - pymdownx.smartsymbols
 99 |   - pymdownx.superfences:
100 |       custom_fences:
101 |         - name: mermaid
102 |           class: mermaid
103 |           format: !!python/name:pymdownx.superfences.fence_code_format
104 |   - pymdownx.tabbed:
105 |       alternate_style: true
106 |   - pymdownx.tasklist:
107 |       custom_checkbox: true
108 |   - pymdownx.tilde
109 | 
110 | nav:
111 |   - Documentation:
112 |     - EasyLLM: index.md 
113 |     - Installation: installation.md
114 |     - "API Reference":
115 |       - "Clients":
116 |         - clients/index.md
117 |         - clients/huggingface.md
118 |         - clients/sagemaker.md
119 |         - clients/bedrock.md
120 |       - prompt_utils.md
121 |   - Examples:
122 |     - examples/index.md
123 |     - "Hugging Face":
124 |       - examples/chat-completion-api.ipynb
125 |       - examples/stream-chat-completions.ipynb
126 |       - examples/text-completion-api.ipynb
127 |       - examples/stream-text-completions.ipynb
128 |       - examples/get-embeddings.ipynb
129 |       - examples/inference-endpoints-example.ipynb
130 |       - examples/llama2-rag-example.ipynb
131 |       - examples/llama2-agent-example.ipynb
132 |       - examples/falcon-180b-chat.ipynb
133 |     - "Amazon SageMaker":
134 |       - examples/sagemaker-chat-completion-api.ipynb
135 |       - examples/sagemaker-text-completion-api.ipynb
136 |       - examples/sagemaker-get-embeddings.ipynb
137 |     - "Amazon Bedrock":
138 |       - examples/bedrock-chat-completion-api.ipynb
139 |       - examples/bedrock-stream-chat-completions.ipynb


--------------------------------------------------------------------------------
/notebooks/bedrock-stream-chat-completions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# How to stream Chat Completion requests with Amazon Bedrock\n",
  9 |     "\n",
 10 |     "By default, when you request a completion, the entire completion is generated before being sent back in a single response.\n",
 11 |     "\n",
 12 |     "If you're generating long completions, waiting for the response can take many seconds.\n",
 13 |     "\n",
 14 |     "To get responses sooner, you can 'stream' the completion as it's being generated. This allows you to start printing or processing the beginning of the completion before the full completion is finished.\n",
 15 |     "\n",
 16 |     "To stream completions, set `stream=True` when calling the chat completions or completions endpoints. This will return an object that streams back the response as [data-only server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format). Extract chunks from the `delta` field rather than the `message` field.\n",
 17 |     "\n",
 18 |     "## Downsides\n",
 19 |     "\n",
 20 |     "Note that using `stream=True` in a production application makes it more difficult to moderate the content of the completions, as partial completions may be more difficult to evaluate. \n",
 21 |     "\n",
 22 |     "## Setup\n",
 23 |     "\n",
 24 |     "Before you can use `easyllm` with Amazon Bedrock you need setup permission and access to the models. You can do this by following of the instructions below:\n",
 25 |     "* https://docs.aws.amazon.com/IAM/latest/UserGuide/getting-set-up.html\n",
 26 |     "* https://docs.aws.amazon.com/IAM/latest/UserGuide/troubleshoot_access-denied.html\n",
 27 |     "* https://docs.aws.amazon.com/bedrock/latest/userguide/security-iam.html\n",
 28 |     "\n",
 29 |     "## Example code\n",
 30 |     "\n",
 31 |     "Below, this notebook shows:\n",
 32 |     "1. What a typical chat completion response looks like\n",
 33 |     "2. What a streaming chat completion response looks like\n",
 34 |     "3. How much time is saved by streaming a chat completion"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# if needed, install and/or upgrade to the latest version of the EasyLLM Python library\n",
 44 |     "%pip install --upgrade easyllm[bedrock] "
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 1,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# imports\n",
 54 |     "import easyllm  # for API calls"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "attachments": {},
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "### 1. What a typical chat completion response looks like\n",
 63 |     "\n",
 64 |     "With a typical ChatCompletions API call, the response is first computed and then returned all at once."
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 1,
 70 |    "metadata": {},
 71 |    "outputs": [
 72 |     {
 73 |      "name": "stdout",
 74 |      "output_type": "stream",
 75 |      "text": [
 76 |       "10/26/2023 17:34:57 - INFO - easyllm.utils.logging - boto3 Bedrock client successfully created!\n",
 77 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334497, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'role': 'assistant'}}]}\n",
 78 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334498, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' Here'}}]}\n",
 79 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334498, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' is counting to 100 with a comma'}}]}\n",
 80 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334498, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' between each number and no newlines:\\n\\n1, 2, 3,'}}]}\n",
 81 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334499, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 4, 5, 6, 7, 8, 9, 10, 11'}}]}\n",
 82 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334499, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 12, 13, 14, 15, 16, 17, 18,'}}]}\n",
 83 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334499, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,'}}]}\n",
 84 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334500, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,'}}]}\n",
 85 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334500, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,'}}]}\n",
 86 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334501, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 49, 50, 51'}}]}\n",
 87 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334501, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 52, 53,'}}]}\n",
 88 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334502, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 54, 55, 56'}}]}\n",
 89 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334503, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 57, 58, 59, 60, 61'}}]}\n",
 90 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334504, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 62, 63, 64, 65, 66'}}]}\n",
 91 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334504, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 67, 68, 69, 70, 71, 72, 73,'}}]}\n",
 92 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334504, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ' 74, 75, 76, 77, 78, 79, 80, 81'}}]}\n",
 93 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334505, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 82, 83, 84, 85, 86, 87, 88, 89, 90, 91'}}]}\n",
 94 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334505, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {'content': ', 92, 93, 94, 95, 96, 97, 98, 99, 100'}}]}\n",
 95 |       "{'id': 'hf-Je8BGADPWN', 'object': 'chat.completion.chunk', 'created': 1698334505, 'model': 'anthropic.claude-v2', 'choices': [{'index': 0, 'delta': {}}]}\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "import os \n",
101 |     "# set env for prompt builder\n",
102 |     "os.environ[\"BEDROCK_PROMPT\"] = \"anthropic\" # vicuna, wizardlm, stablebeluga, open_assistant\n",
103 |     "os.environ[\"AWS_REGION\"] = \"us-east-1\"  # change to your region\n",
104 |     "# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"XXX\" # needed if not using boto3 session\n",
105 |     "# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"XXX\" # needed if not using boto3 session\n",
106 |     "\n",
107 |     "from easyllm.clients import bedrock\n",
108 |     "\n",
109 |     "response = bedrock.ChatCompletion.create(\n",
110 |     "    model='anthropic.claude-v2',\n",
111 |     "    messages=[\n",
112 |     "        {'role': 'user', 'content': 'Count to 100, with a comma between each number and no newlines. E.g., 1, 2, 3, ...'}\n",
113 |     "    ],\n",
114 |     "    stream=True\n",
115 |     ")\n",
116 |     "\n",
117 |     "for chunk in response:\n",
118 |     "    print(chunk)\n"
119 |    ]
120 |   },
121 |   {
122 |    "attachments": {},
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "As you can see above, streaming responses have a `delta` field rather than a `message` field. `delta` can hold things like:\n",
127 |     "- a role token (e.g., `{\"role\": \"assistant\"}`)\n",
128 |     "- a content token (e.g., `{\"content\": \"\\n\\n\"}`)\n",
129 |     "- nothing (e.g., `{}`), when the stream is over"
130 |    ]
131 |   },
132 |   {
133 |    "attachments": {},
134 |    "cell_type": "markdown",
135 |    "metadata": {},
136 |    "source": [
137 |     "### 3. How much time is saved by streaming a chat completion\n",
138 |     "\n",
139 |     "Now let's ask `meta-llama/Llama-2-70b-chat-hf` to count to 100 again, and see how long it takes."
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 7,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       " Here is counting to 100 with commas and no newlines:\n",
152 |       "\n",
153 |       "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100Full conversation received:  Here is counting to 100 with commas and no newlines:\n",
154 |       "\n",
155 |       "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100\n"
156 |      ]
157 |     }
158 |    ],
159 |    "source": [
160 |     "import os \n",
161 |     "# set env for prompt builder\n",
162 |     "os.environ[\"BEDROCK_PROMPT\"] = \"anthropic\" # vicuna, wizardlm, stablebeluga, open_assistant\n",
163 |     "os.environ[\"AWS_REGION\"] = \"us-east-1\"  # change to your region\n",
164 |     "os.environ[\"AWS_PROFILE\"] = \"hf-sm\"  # change to your region\n",
165 |     "# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"XXX\" # needed if not using boto3 session\n",
166 |     "# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"XXX\" # needed if not using boto3 session\n",
167 |     "from easyllm.clients import bedrock\n",
168 |     "\n",
169 |     "# send a ChatCompletion request to count to 100\n",
170 |     "response = bedrock.ChatCompletion.create(\n",
171 |     "    model='anthropic.claude-v2',\n",
172 |     "    messages=[\n",
173 |     "        {'role': 'user', 'content': 'Count to 100, with a comma between each number and no newlines. E.g., 1, 2, 3, ...'}\n",
174 |     "    ],\n",
175 |     "    stream=True\n",
176 |     ")\n",
177 |     "\n",
178 |     "# create variables to collect the stream of chunks\n",
179 |     "collected_chunks = []\n",
180 |     "collected_messages = []\n",
181 |     "# iterate through the stream of events\n",
182 |     "for chunk in response:\n",
183 |     "    collected_chunks.append(chunk)  # save the event response\n",
184 |     "    chunk_message = chunk['choices'][0]['delta']  # extract the message\n",
185 |     "    print(chunk_message.get('content', ''), end='')  # print the message\n",
186 |     "    collected_messages.append(chunk_message)  # save the message\n",
187 |     "    \n",
188 |     "\n",
189 |     "# print the time delay and text received\n",
190 |     "full_reply_content = ''.join([m.get('content', '') for m in collected_messages])\n",
191 |     "print(f\"Full conversation received: {full_reply_content}\")\n"
192 |    ]
193 |   }
194 |  ],
195 |  "metadata": {
196 |   "kernelspec": {
197 |    "display_name": "Python 3.9.9 ('openai')",
198 |    "language": "python",
199 |    "name": "python3"
200 |   },
201 |   "language_info": {
202 |    "codemirror_mode": {
203 |     "name": "ipython",
204 |     "version": 3
205 |    },
206 |    "file_extension": ".py",
207 |    "mimetype": "text/x-python",
208 |    "name": "python",
209 |    "nbconvert_exporter": "python",
210 |    "pygments_lexer": "ipython3",
211 |    "version": "3.8.12"
212 |   },
213 |   "orig_nbformat": 4,
214 |   "vscode": {
215 |    "interpreter": {
216 |     "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
217 |    }
218 |   }
219 |  },
220 |  "nbformat": 4,
221 |  "nbformat_minor": 2
222 | }
223 | 


--------------------------------------------------------------------------------
/notebooks/falcon-180b-chat.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# How to use Chat Completion clients\n",
  9 |     "\n",
 10 |     "EasyLLM can be used as an abstract layer to replace `gpt-3.5-turbo` and `gpt-4` with open source models.\n",
 11 |     "\n",
 12 |     "You can change your own applications from the OpenAI API, by simply changing the client. \n",
 13 |     "\n",
 14 |     "Chat models take a series of messages as input, and return an AI-written message as output.\n",
 15 |     "\n",
 16 |     "This guide illustrates the chat format with a few example API calls."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "attachments": {},
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "### 1. Import the easyllm library"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# if needed, install and/or upgrade to the latest version of the EasyLLM Python library\n",
 34 |     "%pip install --upgrade easyllm "
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 4,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# import the EasyLLM Python library for calling the EasyLLM API\n",
 44 |     "import easyllm"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "attachments": {},
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "### 2. An example chat API call\n",
 53 |     "\n",
 54 |     "A chat API call has two required inputs:\n",
 55 |     "- `model`: the name of the model you want to use (e.g., `meta-llama/Llama-2-70b-chat-hf`) or leave it empty to just call the api\n",
 56 |     "- `messages`: a list of message objects, where each object has two required fields:\n",
 57 |     "    - `role`: the role of the messenger (either `system`, `user`, or `assistant`)\n",
 58 |     "    - `content`: the content of the message (e.g., `Write me a beautiful poem`)\n",
 59 |     "\n",
 60 |     "Compared to OpenAI api is the `huggingface` module also exposing a `prompt_builder` and `stop_sequences` parameter you can use to customize the prompt and stop sequences. The EasyLLM package comes with prompt builder utilities.\n",
 61 |     "\n",
 62 |     "Let's look at an example chat API calls to see how the chat format works in practice."
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 1,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "data": {
 72 |       "text/plain": [
 73 |        "{'id': 'hf-ceVG8KGm04',\n",
 74 |        " 'object': 'chat.completion',\n",
 75 |        " 'created': 1695106309,\n",
 76 |        " 'model': 'tiiuae/falcon-180B-chat',\n",
 77 |        " 'choices': [{'index': 0,\n",
 78 |        "   'message': {'role': 'assistant',\n",
 79 |        "    'content': \"*Knock knock* Who's there? Cat. Cat who? Cat got your tongue?\\nUser:\"},\n",
 80 |        "   'finish_reason': 'stop_sequence'}],\n",
 81 |        " 'usage': {'prompt_tokens': 144, 'completion_tokens': 23, 'total_tokens': 167}}"
 82 |       ]
 83 |      },
 84 |      "execution_count": 1,
 85 |      "metadata": {},
 86 |      "output_type": "execute_result"
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "import os \n",
 91 |     "# set env for prompt builder\n",
 92 |     "os.environ[\"HUGGINGFACE_PROMPT\"] = \"falcon\" # vicuna, wizardlm, stablebeluga, open_assistant\n",
 93 |     "# os.environ[\"HUGGINGFACE_TOKEN\"] = \"hf_xxx\" \n",
 94 |     "\n",
 95 |     "from easyllm.clients import huggingface\n",
 96 |     "from easyllm.prompt_utils.falcon import falcon_stop_sequences\n",
 97 |     "\n",
 98 |     "MODEL=\"tiiuae/falcon-180B-chat\"\n",
 99 |     "\n",
100 |     "response = huggingface.ChatCompletion.create(\n",
101 |     "    model=MODEL,\n",
102 |     "    messages=[\n",
103 |     "        {\"role\": \"system\", \"content\": \"\\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\"},\n",
104 |     "        {\"role\": \"user\", \"content\": \"Knock knock.\"},\n",
105 |     "        {\"role\": \"assistant\", \"content\": \"Who's there?\"},\n",
106 |     "        {\"role\": \"user\", \"content\": \"Cat.\"},\n",
107 |     "    ],\n",
108 |     "      temperature=0.9,\n",
109 |     "      top_p=0.6,\n",
110 |     "      max_tokens=1024,\n",
111 |     "      stop=falcon_stop_sequences,\n",
112 |     ")\n",
113 |     "response"
114 |    ]
115 |   },
116 |   {
117 |    "attachments": {},
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "As you can see, the response object has a few fields:\n",
122 |     "- `id`: the ID of the request\n",
123 |     "- `object`: the type of object returned (e.g., `chat.completion`)\n",
124 |     "- `created`: the timestamp of the request\n",
125 |     "- `model`: the full name of the model used to generate the response\n",
126 |     "- `usage`: the number of tokens used to generate the replies, counting prompt, completion, and total\n",
127 |     "- `choices`: a list of completion objects (only one, unless you set `n` greater than 1)\n",
128 |     "    - `message`: the message object generated by the model, with `role` and `content`\n",
129 |     "    - `finish_reason`: the reason the model stopped generating text (either `stop`, or `length` if `max_tokens` limit was reached)\n",
130 |     "    - `index`: the index of the completion in the list of choices"
131 |    ]
132 |   },
133 |   {
134 |    "attachments": {},
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "Extract just the reply with:"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": 2,
144 |    "metadata": {},
145 |    "outputs": [
146 |     {
147 |      "name": "stdout",
148 |      "output_type": "stream",
149 |      "text": [
150 |       "*Knock knock* Who's there? Cat. Cat who? Cat got your tongue?\n",
151 |       "User:\n"
152 |      ]
153 |     }
154 |    ],
155 |    "source": [
156 |     "print(response['choices'][0]['message']['content'])"
157 |    ]
158 |   },
159 |   {
160 |    "attachments": {},
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "Even non-conversation-based tasks can fit into the chat format, by placing the instruction in the first user message.\n",
165 |     "\n",
166 |     "For example, to ask the model to explain asynchronous programming in the style of the pirate Blackbeard, we can structure conversation as follows:"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 3,
172 |    "metadata": {},
173 |    "outputs": [
174 |     {
175 |      "name": "stdout",
176 |      "output_type": "stream",
177 |      "text": [
178 |       "\n",
179 |       "Asynchronous Programming: A Mathematical Approach\n",
180 |       "\n",
181 |       "Good day, class! Today we're going to discuss a fascinating topic in the world of programming - asynchronous programming. Now, you might be wondering what this has to do with math. Well, just like how mathematical operations can sometimes be synchronous or asynchronous, so too can computer programs.\n",
182 |       "\n",
183 |       "Let's start by defining our terms. Synchronous processes are those that happen one after another, in a predictable sequence. For example, if you were to add two numbers together, then multiply the result by another number, these operations would typically happen synchronously – the addition occurs first, followed by the multiplication.\n",
184 |       "\n",
185 |       "Asynchronous processes, on the other hand, don't necessarily follow such a strict order. They're more like parallel lines in geometry – they can run alongside each other independently, without waiting for one another to finish. In programming, this means that multiple tasks can be performed at the same time, without one task blocking another from starting.\n",
186 |       "\n",
187 |       "So why is this useful? Well, imagine you're working on a complex mathematical problem that requires several calculations. If you were to perform these calculations synchronously, you'd have to wait for each calculation to finish before starting the next one. This could take quite some time, especially if your calculations are dependent on external factors such as user input or network latency.\n",
188 |       "\n",
189 |       "With asynchronous programming, however, you can perform multiple calculations simultaneously. This means that while one calculation is waiting for user input, another can continue processing data from a different source. As a result, your overall computation time is reduced, making your program more efficient and responsive.\n",
190 |       "\n",
191 |       "Of course, there are challenges involved in asynchronous programming, much like solving an intricate mathematical puzzle. One major issue is ensuring that all asynchronous tasks complete successfully, even if they encounter errors along the way. This requires careful planning and error handling, similar to how you would approach solving a complex equation.\n",
192 |       "\n",
193 |       "In conclusion, asynchronous programming is a powerful tool in the programmer's toolkit, much like advanced mathematical concepts are essential for solving complex problems. By understanding the principles behind asynchronous processes, you can create more efficient and responsive programs, ready to tackle any challenge that comes their way.\n",
194 |       "\n",
195 |       "Now, let's put this knowledge into practice with some coding exercises, shall we?\n"
196 |      ]
197 |     }
198 |    ],
199 |    "source": [
200 |     "# example with a system message\n",
201 |     "response = huggingface.ChatCompletion.create(\n",
202 |     "    model=MODEL,\n",
203 |     "    messages=[\n",
204 |     "        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
205 |     "        {\"role\": \"user\", \"content\": \"Explain asynchronous programming in the style of math teacher.\"},\n",
206 |     "    ],\n",
207 |     "    stop=falcon_stop_sequences,\n",
208 |     ")\n",
209 |     "\n",
210 |     "print(response['choices'][0]['message']['content'])\n"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {},
216 |    "source": []
217 |   }
218 |  ],
219 |  "metadata": {
220 |   "kernelspec": {
221 |    "display_name": "openai",
222 |    "language": "python",
223 |    "name": "python3"
224 |   },
225 |   "language_info": {
226 |    "codemirror_mode": {
227 |     "name": "ipython",
228 |     "version": 3
229 |    },
230 |    "file_extension": ".py",
231 |    "mimetype": "text/x-python",
232 |    "name": "python",
233 |    "nbconvert_exporter": "python",
234 |    "pygments_lexer": "ipython3",
235 |    "version": "3.8.12"
236 |   },
237 |   "orig_nbformat": 4,
238 |   "vscode": {
239 |    "interpreter": {
240 |     "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
241 |    }
242 |   }
243 |  },
244 |  "nbformat": 4,
245 |  "nbformat_minor": 2
246 | }
247 | 


--------------------------------------------------------------------------------
/notebooks/get-embeddings.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## How to create embeddings\n",
  8 |     "\n",
  9 |     "In this notebook, we will show you how to create embeddings for your own text data and and open source model from Hugging Face hosted as an endpoint on Hugging Face Inference API."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "### 1. Import the easyllm library"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": null,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "# if needed, install and/or upgrade to the latest version of the OpenAI Python library\n",
 26 |     "%pip install --upgrade easyllm "
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "# import the EasyLLM Python library for calling the EasyLLM API\n",
 36 |     "import easyllm"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "### 2. An example chat API call\n",
 44 |     "\n",
 45 |     "A embedding API call has two required inputs:\n",
 46 |     "- `model`: the name of the model you want to use (e.g., `sentence-transformers/all-MiniLM-L6-v2`) or leave it empty to just call the api\n",
 47 |     "- `input`: a string or list of strings you want to embed\n",
 48 |     "\n",
 49 |     "Let's look at an example API calls to see how the chat format works in practice."
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 2,
 55 |    "metadata": {},
 56 |    "outputs": [
 57 |     {
 58 |      "data": {
 59 |       "text/plain": [
 60 |        "384"
 61 |       ]
 62 |      },
 63 |      "execution_count": 2,
 64 |      "metadata": {},
 65 |      "output_type": "execute_result"
 66 |     }
 67 |    ],
 68 |    "source": [
 69 |     "# import os \n",
 70 |     "# os.environ[\"HUGGINGFACE_TOKEN\"] = \"hf_xxx\"  # Use Environment Variable\n",
 71 |     "\n",
 72 |     "from easyllm.clients import huggingface\n",
 73 |     "\n",
 74 |     "# The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.\n",
 75 |     "# huggingface.api_key=\"hf_xxx\"\n",
 76 |     "\n",
 77 |     "embedding = huggingface.Embedding.create(\n",
 78 |     "    model=\"sentence-transformers/all-MiniLM-L6-v2\",\n",
 79 |     "    input=\"That's a nice car.\",\n",
 80 |     ")\n",
 81 |     "\n",
 82 |     "len(embedding[\"data\"][0][\"embedding\"])"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "markdown",
 87 |    "metadata": {},
 88 |    "source": [
 89 |     "Batched Request"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 3,
 95 |    "metadata": {},
 96 |    "outputs": [
 97 |     {
 98 |      "data": {
 99 |       "text/plain": [
100 |        "2"
101 |       ]
102 |      },
103 |      "execution_count": 3,
104 |      "metadata": {},
105 |      "output_type": "execute_result"
106 |     }
107 |    ],
108 |    "source": [
109 |     "from easyllm.clients import huggingface\n",
110 |     "\n",
111 |     "# The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.\n",
112 |     "# huggingface.api_key=\"hf_xxx\"\n",
113 |     "\n",
114 |     "embedding = huggingface.Embedding.create(\n",
115 |     "    model=\"sentence-transformers/all-MiniLM-L6-v2\",\n",
116 |     "    input=[\"What is the meaning of life?\",\"test\"],\n",
117 |     ")\n",
118 |     "\n",
119 |     "len(embedding[\"data\"])"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": null,
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": []
128 |   }
129 |  ],
130 |  "metadata": {
131 |   "kernelspec": {
132 |    "display_name": "Python 3.9.9 ('openai')",
133 |    "language": "python",
134 |    "name": "python3"
135 |   },
136 |   "language_info": {
137 |    "codemirror_mode": {
138 |     "name": "ipython",
139 |     "version": 3
140 |    },
141 |    "file_extension": ".py",
142 |    "mimetype": "text/x-python",
143 |    "name": "python",
144 |    "nbconvert_exporter": "python",
145 |    "pygments_lexer": "ipython3",
146 |    "version": "3.8.12"
147 |   },
148 |   "orig_nbformat": 4,
149 |   "vscode": {
150 |    "interpreter": {
151 |     "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
152 |    }
153 |   }
154 |  },
155 |  "nbformat": 4,
156 |  "nbformat_minor": 2
157 | }
158 | 


--------------------------------------------------------------------------------
/notebooks/inference-endpoints-example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Hugging Face Inference Endpoints Example\n",
  9 |     "\n",
 10 |     "**[Hugging Face Inference Endpoints](https://ui.endpoints.huggingface.co/)** offers an easy and secure way to deploy Machine Learning models for use in production. Inference Endpoints empower developers and data scientists alike to create AI applications without managing infrastructure: simplifying the deployment process to a few clicks, including handling large volumes of requests with autoscaling, reducing infrastructure costs with scale-to-zero, and offering advanced security.\n",
 11 |     "\n",
 12 |     "You can get started with Inference Endpoints at: https://ui.endpoints.huggingface.co/\n",
 13 |     "\n",
 14 |     "\n",
 15 |     "The example assumes that you have an running endpoint for a conversational model, e.g. `https://huggingface.co/meta-llama/Llama-2-13b-chat-hf`"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "attachments": {},
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "### 1. Import the easyllm library"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# if needed, install and/or upgrade to the latest version of the OpenAI Python library\n",
 33 |     "%pip install --upgrade easyllm "
 34 |    ]
 35 |   },
 36 |   {
 37 |    "attachments": {},
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "### 2. An example chat API call\n",
 42 |     "\n",
 43 |     "Since we want to use our endpoint for inference we don't have to define the `model` parameter. We either need to expose an environment variable `HUGGINGFACE_API_BASE` before the import of `easyllm.clients.huggingface` or overwrite the `huggingface.api_base` value.\n",
 44 |     "\n",
 45 |     "A chat API call then only has two required inputs:\n",
 46 |     "- `messages`: a list of message objects, where each object has two required fields:\n",
 47 |     "    - `role`: the role of the messenger (either `system`, `user`, or `assistant`)\n",
 48 |     "    - `content`: the content of the message (e.g., `Write me a beautiful poem`)\n",
 49 |     "\n",
 50 |     "Let's look at an example chat API calls to see how the chat format works in practice."
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 1,
 56 |    "metadata": {},
 57 |    "outputs": [
 58 |     {
 59 |      "data": {
 60 |       "text/plain": [
 61 |        "{'id': 'hf-0lL5H_yyRR',\n",
 62 |        " 'object': 'chat.completion',\n",
 63 |        " 'created': 1691096023,\n",
 64 |        " 'choices': [{'index': 0,\n",
 65 |        "   'message': {'role': 'assistant', 'content': ' Apple who?'},\n",
 66 |        "   'finish_reason': 'eos_token'}],\n",
 67 |        " 'usage': {'prompt_tokens': 149, 'completion_tokens': 5, 'total_tokens': 154}}"
 68 |       ]
 69 |      },
 70 |      "execution_count": 1,
 71 |      "metadata": {},
 72 |      "output_type": "execute_result"
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "from easyllm.clients import huggingface\n",
 77 |     "\n",
 78 |     "# Here we overwrite the defaults, you can also use environment variables\n",
 79 |     "huggingface.prompt_builder = \"llama2\"\n",
 80 |     "huggingface.api_base = \"YOUR_ENDPOINT_URL\"\n",
 81 |     "\n",
 82 |     "# The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.\n",
 83 |     "# huggingface.api_key=\"hf_xxx\"\n",
 84 |     "\n",
 85 |     "response = huggingface.ChatCompletion.create(\n",
 86 |     "    messages=[\n",
 87 |     "        {\"role\": \"system\", \"content\": \"\\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\"},\n",
 88 |     "        {\"role\": \"user\", \"content\": \"Knock knock.\"},\n",
 89 |     "        {\"role\": \"assistant\", \"content\": \"Who's there?\"},\n",
 90 |     "        {\"role\": \"user\", \"content\": \"Apple.\"},\n",
 91 |     "    ],\n",
 92 |     "      temperature=0.9,\n",
 93 |     "      top_p=0.6,\n",
 94 |     "      max_tokens=1024,\n",
 95 |     ")\n",
 96 |     "response"
 97 |    ]
 98 |   },
 99 |   {
100 |    "attachments": {},
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "As you can see, the response object has a few fields:\n",
105 |     "- `id`: the ID of the request\n",
106 |     "- `object`: the type of object returned (e.g., `chat.completion`)\n",
107 |     "- `created`: the timestamp of the request\n",
108 |     "- `model`: the full name of the model used to generate the response\n",
109 |     "- `usage`: the number of tokens used to generate the replies, counting prompt, completion, and total\n",
110 |     "- `choices`: a list of completion objects (only one, unless you set `n` greater than 1)\n",
111 |     "    - `message`: the message object generated by the model, with `role` and `content`\n",
112 |     "    - `finish_reason`: the reason the model stopped generating text (either `stop`, or `length` if `max_tokens` limit was reached)\n",
113 |     "    - `index`: the index of the completion in the list of choices"
114 |    ]
115 |   },
116 |   {
117 |    "attachments": {},
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "Extract just the reply with:"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": 2,
127 |    "metadata": {},
128 |    "outputs": [
129 |     {
130 |      "name": "stdout",
131 |      "output_type": "stream",
132 |      "text": [
133 |       " Apple who?\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "print(response['choices'][0]['message']['content'])"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "## How to stream Chat Completion requests\n",
146 |     "\n",
147 |     "Custom endpoints can be created to stream chat completion requests to a model. "
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 6,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "name": "stdout",
157 |      "output_type": "stream",
158 |      "text": [
159 |       "  Sure! Here we go:\n",
160 |       "\n",
161 |       "1. One\n",
162 |       "2. Two\n",
163 |       "3. Three\n",
164 |       "4. Four\n",
165 |       "5. Five\n",
166 |       "6. Six\n",
167 |       "7. Seven\n",
168 |       "8. Eight\n",
169 |       "9. Nine\n",
170 |       "10. Ten!"
171 |      ]
172 |     }
173 |    ],
174 |    "source": [
175 |     "from easyllm.clients import huggingface\n",
176 |     "\n",
177 |     "huggingface.prompt_builder = \"llama2\"\n",
178 |     "\n",
179 |     "# Here you can overwrite the url to your endpoint, can also be localhost:8000\n",
180 |     "huggingface.api_base = \"YOUR_ENDPOINT_URL\"\n",
181 |     "\n",
182 |     "# a ChatCompletion request\n",
183 |     "response = huggingface.ChatCompletion.create(\n",
184 |     "    messages=[\n",
185 |     "        {'role': 'user', 'content': \"Count to 10.\"}\n",
186 |     "    ],\n",
187 |     "    stream=True  # this time, we set stream=True\n",
188 |     ")\n",
189 |     "\n",
190 |     "for chunk in response:\n",
191 |     "    delta = chunk['choices'][0]['delta']\n",
192 |     "    if \"content\" in delta:\n",
193 |     "        print(delta[\"content\"],end=\"\")"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": null,
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": []
202 |   }
203 |  ],
204 |  "metadata": {
205 |   "kernelspec": {
206 |    "display_name": "openai",
207 |    "language": "python",
208 |    "name": "python3"
209 |   },
210 |   "language_info": {
211 |    "codemirror_mode": {
212 |     "name": "ipython",
213 |     "version": 3
214 |    },
215 |    "file_extension": ".py",
216 |    "mimetype": "text/x-python",
217 |    "name": "python",
218 |    "nbconvert_exporter": "python",
219 |    "pygments_lexer": "ipython3",
220 |    "version": "3.8.12"
221 |   },
222 |   "orig_nbformat": 4,
223 |   "vscode": {
224 |    "interpreter": {
225 |     "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
226 |    }
227 |   }
228 |  },
229 |  "nbformat": 4,
230 |  "nbformat_minor": 2
231 | }
232 | 


--------------------------------------------------------------------------------
/notebooks/llama2-agent-example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Llama 2 70B Agent/Tool use example \n",
  8 |     "\n",
  9 |     "This Jupyter notebook provides examples of how to use Tools for Agents with the Llama 2 70B model in EasyLLM. This includes an example on how to use tools with an LLM, including output parsing, execution of the tools and parsing of the results. It is a very simplified example. If you are interested in Agents you should checkout [langchain](https://python.langchain.com/docs/get_started/introduction.html) or the [ReAct pattern](https://www.promptingguide.ai/techniques/react). \n",
 10 |     "\n",
 11 |     "\n",
 12 |     "## Why do LLMs need to use Tools?\n",
 13 |     "One of the most common challenges with LLMs is overcoming the lack of recency and specificity in their training data - answers can be out of date, and they are prone to hallucinations given the huge variety in their knowledge base. Tools are a great method of allowing an LLM to answer within a controlled context that draws on your existing knowledge bases and internal APIs - instead of trying to prompt engineer the LLM all the way to your intended answer, you allow it access to tools that it calls on dynamically for info, parses, and serves to customer.\n",
 14 |     "\n",
 15 |     "Providing LLMs access to tools can enable them to answer questions with context directly from search engines, APIs or your own databases. Instead of answering directly, an LLM with access to tools can perform intermediate steps to gather relevant information. Tools can also be used in combination. For [example](https://python.langchain.com/en/latest/modules/agents/agents/examples/mrkl_chat.html), a language model can be made to use a search tool to lookup quantitative information and a calculator to execute calculations."
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": null,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "# if needed, install and/or upgrade to the latest version of the EasyLLM Python library\n",
 25 |     "%pip install --upgrade easyllm "
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "Getting an open LLM to act like an agent or use tools is incredibly hard. However, with Llama 2 70B it is now possible. Let's see how we can get it running!\n",
 33 |     "\n",
 34 |     "## Basic example of using a tool with Llama 2 70B\n",
 35 |     "\n",
 36 |     "In the basic we are are going to only use one abstract tool, a `calculator`. Our model can use the calculator run mathmatical operations. To make it easy we provide some few-shot example for the model to better understand what it needs to do. \n",
 37 |     "_Note: This is adapted from the [example by pinecone](https://github.com/pinecone-io/examples/blob/master/learn/generation/llm-field-guide/llama-2/llama-2-70b-chat-agent.ipynb)._"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 41,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "system_message = \"\"\"Assistant is a expert JSON builder designed to assist with a wide range of tasks.\n",
 47 |     "\n",
 48 |     "Assistant is able to respond to the User and use tools using JSON strings that contain \"action\" and \"action_input\" parameters.\n",
 49 |     "\n",
 50 |     "All of Assistant's communication is performed using this JSON format.\n",
 51 |     "\n",
 52 |     "Assistant can also use tools by responding to the user with tool use instructions in the same \"action\" and \"action_input\" JSON format. Tools available to Assistant are:\n",
 53 |     "\n",
 54 |     "- \"Calculator\": Useful for when you need to answer questions about math.\n",
 55 |     "  - To use the calculator tool, Assistant should write like so:\n",
 56 |     "    ```json\n",
 57 |     "    {{\"action\": \"Calculator\",\n",
 58 |     "      \"action_input\": \"4+4\"}}\n",
 59 |     "    ```\n",
 60 |     "\n",
 61 |     "Here are some previous conversations between the Assistant and User:\n",
 62 |     "\n",
 63 |     "User: Hey how are you today?\n",
 64 |     "Assistant: ```json\n",
 65 |     "{{\"action\": \"Final Answer\",\n",
 66 |     " \"action_input\": \"I'm good thanks, how are you?\"}}\n",
 67 |     "```\n",
 68 |     "User: I'm great, what is the square root of 4?\n",
 69 |     "Assistant: ```json\n",
 70 |     "{{\"action\": \"Calculator\",\n",
 71 |     " \"action_input\": \"sqrt(4)\"}}\n",
 72 |     "```\n",
 73 |     "Result: 2.0\n",
 74 |     "Assistant: ```json\n",
 75 |     "{{\"action\": \"Final Answer\",\n",
 76 |     " \"action_input\": \"It looks like the answer is 2!\"}}\n",
 77 |     "```\n",
 78 |     "User: Thanks could you tell me what 4 to the power of 2 is?\n",
 79 |     "Assistant: ```json\n",
 80 |     "{{\"action\": \"Calculator\",\n",
 81 |     " \"action_input\": \"4**2\"}}\n",
 82 |     "```\n",
 83 |     "Result: 16.0\n",
 84 |     "Assistant: ```json\n",
 85 |     "{{\"action\": \"Final Answer\",\n",
 86 |     " \"action_input\": \"It looks like the answer is 16!\"}}\n",
 87 |     "```\n",
 88 |     "\n",
 89 |     "Here is the latest conversation between Assistant and User.\"\"\""
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "In addition to our system message which holds the information for our tools we need to create a user template, which includes the input from the user and tells the model to use tools or not. "
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 42,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": [
105 |     "prompt = f\"{system_message}\\n\\nUse your existing tools and respond with a JSON object with with 'action' and 'action_input' values \\nUser: {{user_input}}\""
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "Now lets combine both and create a request using `easyllm`."
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 43,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "\n",
122 |     "from easyllm.clients import huggingface\n",
123 |     "\n",
124 |     "# Changing configuration without using environment variables\n",
125 |     "huggingface.prompt_builder = \"llama2\"\n",
126 |     "# huggingface.api_key=\"hf_xxx\"\n",
127 |     "\n",
128 |     "def agent(prompt):\n",
129 |     "  response = huggingface.Completion.create(\n",
130 |     "      model=\"meta-llama/Llama-2-70b-chat-hf\",\n",
131 |     "      prompt=prompt,\n",
132 |     "      temperature=0.1,\n",
133 |     "      max_tokens=128,\n",
134 |     "      stop=[\"```\\n\",\"Result: \"],\n",
135 |     "      debug=False,\n",
136 |     "  )  \n",
137 |     "  return response[\"choices\"][0][\"text\"]"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "Now we can begin asking questions"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 44,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "data": {
154 |       "text/plain": [
155 |        "' Assistant: ```json\\n{\"action\": \"Final Answer\",\\n \"action_input\": \"I\\'m good thanks, how are you?\"}\\n```'"
156 |       ]
157 |      },
158 |      "execution_count": 44,
159 |      "metadata": {},
160 |      "output_type": "execute_result"
161 |     }
162 |    ],
163 |    "source": [
164 |     "output = agent(prompt.format(user_input=\"hey how are you today?\"))\n",
165 |     "output"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "markdown",
170 |    "metadata": {},
171 |    "source": [
172 |     "What happens if we ask a math question? "
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 45,
178 |    "metadata": {},
179 |    "outputs": [
180 |     {
181 |      "data": {
182 |       "text/plain": [
183 |        "' Assistant: ```json\\n{\"action\": \"Calculator\",\\n \"action_input\": \"4*2\"}\\n```\\n'"
184 |       ]
185 |      },
186 |      "execution_count": 45,
187 |      "metadata": {},
188 |      "output_type": "execute_result"
189 |     }
190 |    ],
191 |    "source": [
192 |     "output = agent(prompt.format(user_input=\"What is 4 multiplied by 2?\"))\n",
193 |     "output"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "Great! It works! It correctly selects the tool. Okay now to make it work we need to parse the output and execute it in the case for the calculator"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 46,
206 |    "metadata": {},
207 |    "outputs": [],
208 |    "source": [
209 |     "import json\n",
210 |     "import re\n",
211 |     "\n",
212 |     "def parser(input):\n",
213 |     "    pattern = r'```json\\n(.*?)```'\n",
214 |     "    match = re.search(pattern, input, re.DOTALL)\n",
215 |     "    if not match:\n",
216 |     "        raise ValueError(\"Couldn't parse the output.\")\n",
217 |     "    \n",
218 |     "    parsed_data = json.loads(match.group(1))\n",
219 |     "    return parsed_data\n",
220 |     "\n",
221 |     "    "
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 47,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "data": {
231 |       "text/plain": [
232 |        "{'action': 'Calculator', 'action_input': '4*2'}"
233 |       ]
234 |      },
235 |      "execution_count": 47,
236 |      "metadata": {},
237 |      "output_type": "execute_result"
238 |     }
239 |    ],
240 |    "source": [
241 |     "output = parser(output)\n",
242 |     "output"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "markdown",
247 |    "metadata": {},
248 |    "source": [
249 |     "Okay, Now lets execute it using the `eval` function from python"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": 48,
255 |    "metadata": {},
256 |    "outputs": [],
257 |    "source": [
258 |     "def use_tool(tool,tool_input):\n",
259 |     "  if tool == \"Calculator\":\n",
260 |     "    return eval(tool_input)\n",
261 |     "  else:\n",
262 |     "    raise Exception(\"Unknown tool: \" + tool)"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "Okay, now lets combine everyting and the cacluator result to our agent again. "
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 73,
275 |    "metadata": {},
276 |    "outputs": [],
277 |    "source": [
278 |     "def use_calculator(input, first_call=True):\n",
279 |     "  if first_call:\n",
280 |     "    input_prompt = prompt.format(user_input=input)\n",
281 |     "  else:\n",
282 |     "    input_prompt = input\n",
283 |     "  # make the agent call\n",
284 |     "  response = agent(input_prompt)\n",
285 |     "  # parse the output if possible \n",
286 |     "  parsed = parser(response)\n",
287 |     "  # check if the output is our final answer or if it is a tool\n",
288 |     "  if parsed[\"action\"] == \"Final Answer\":\n",
289 |     "    return parsed[\"action_input\"]\n",
290 |     "  # if not try to use the tool\n",
291 |     "  tool_output = use_tool(parsed[\"action\"], parsed[\"action_input\"])\n",
292 |     "  \n",
293 |     "  # add message to the agent\n",
294 |     "  next_prompt = f\"{input_prompt}\\n{response}\\nResponse: {tool_output}\"\n",
295 |     "  # recursively call the agent with the output of the tool\n",
296 |     "  return use_calculator(next_prompt, False)\n",
297 |     "  \n",
298 |     "  "
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "code",
303 |    "execution_count": 75,
304 |    "metadata": {},
305 |    "outputs": [
306 |     {
307 |      "data": {
308 |       "text/plain": [
309 |        "'It looks like the answer is 209!'"
310 |       ]
311 |      },
312 |      "execution_count": 75,
313 |      "metadata": {},
314 |      "output_type": "execute_result"
315 |     }
316 |    ],
317 |    "source": [
318 |     "use_calculator(\"What is 19 * 11?\")"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {},
325 |    "outputs": [],
326 |    "source": []
327 |   }
328 |  ],
329 |  "metadata": {
330 |   "kernelspec": {
331 |    "display_name": "hf",
332 |    "language": "python",
333 |    "name": "python3"
334 |   },
335 |   "language_info": {
336 |    "codemirror_mode": {
337 |     "name": "ipython",
338 |     "version": 3
339 |    },
340 |    "file_extension": ".py",
341 |    "mimetype": "text/x-python",
342 |    "name": "python",
343 |    "nbconvert_exporter": "python",
344 |    "pygments_lexer": "ipython3",
345 |    "version": "3.8.12"
346 |   },
347 |   "orig_nbformat": 4
348 |  },
349 |  "nbformat": 4,
350 |  "nbformat_minor": 2
351 | }
352 | 


--------------------------------------------------------------------------------
/notebooks/llama2-rag-example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Retrieval Augmented Generation using Llama 2\n",
  8 |     "\n",
  9 |     "This notebook walks through how to use Llama 2 to perform (in-context) retrieval augmented generation. We will customize the `system` message for Llama 2 to make sure the model is only using provided context to generate the response. \n",
 10 |     "\n",
 11 |     "**What is In-context Retrieval Augmented Generation?**\n",
 12 |     "\n",
 13 |     "\n",
 14 |     "In-context retrieval augmented generation is a method to improve language model generation by including relevant documents to the model input. The key points are:\n",
 15 |     "\n",
 16 |     "* Retrieval of relevant documents from an external corpus to provide factual grounding for the model.\n",
 17 |     "* Prepending the retrieved documents to the input text, without modifying the model architecture or fine-tuning the model.\n",
 18 |     "* Allows leveraging external knowledge with off-the-shelf frozen language models."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "# if needed, install and/or upgrade to the latest version of the EasyLLM Python library\n",
 28 |     "%pip install --upgrade easyllm "
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "## Simple Example\n",
 36 |     "\n",
 37 |     "Below is a simple example using the existing prompt builder of llama2 to generate a prompt. We are going to use the `system` message from [llama-index](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/SimpleIndexDemoLlama-Local.html) with some minor adjustments."
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 8,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "SYSTEM_PROMPT = \"\"\"You are an AI assistant that answers questions in a friendly manner, based on the given #SOURCE# documents. Here are some rules you always follow:\n",
 47 |     "- Generate human readable output, avoid creating output with gibberish text.\n",
 48 |     "- Generate only the requested output, don't include any other language before or after the requested output.\n",
 49 |     "- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.\n",
 50 |     "- Generate professional language typically used in business documents in North America.\n",
 51 |     "- Never generate offensive or foul language.\n",
 52 |     "- Only include facts and information based on the #SOURCE# documents.\n",
 53 |     "\"\"\"\n",
 54 |     "\n",
 55 |     "system = {\"role\": \"system\", \"content\": SYSTEM_PROMPT}"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "before we can now call our LLM. Lets create a user instruction with a `query` and a `context`. As a context i copied the the [wikipedia article of Nuremberg](https://en.wikipedia.org/wiki/Nuremberg) (the city i live). \n",
 63 |     "_I uploaded it as a gist to to not pollute the notebook._"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "!wget https://gist.githubusercontent.com/philschmid/2678351cb9f41d385aa5c099caf20c0a/raw/60ae425677dd9bed6fe3c0f2dd5b6ea49bc6590c/nuremberg.txt"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 14,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "context = open(\"nuremberg.txt\").read()\n",
 82 |     "\n",
 83 |     "query = \"How many people live in Nuremberg?\""
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {},
 89 |    "source": [
 90 |     "Before we use our context lets just ask the model."
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 15,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "name": "stdout",
100 |      "output_type": "stream",
101 |      "text": [
102 |       " As of December 31, 2020, the population of Nuremberg, Germany is approximately 516,000 people.\n"
103 |      ]
104 |     }
105 |    ],
106 |    "source": [
107 |     "from easyllm.clients import huggingface\n",
108 |     "\n",
109 |     "# set the prompt builder to llama2\n",
110 |     "huggingface.prompt_builder = \"llama2\"\n",
111 |     "# huggingface.api_key = \"hf_xx\"\n",
112 |     "\n",
113 |     "# send a ChatCompletion request\n",
114 |     "response = huggingface.ChatCompletion.create(\n",
115 |     "    model=\"meta-llama/Llama-2-70b-chat-hf\",\n",
116 |     "    messages=[\n",
117 |     "        {\"role\": \"user\", \"content\": query},\n",
118 |     "    ],\n",
119 |     ")\n",
120 |     "\n",
121 |     "# print the time delay and text received\n",
122 |     "print(response[\"choices\"][0][\"message\"][\"content\"])\n"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "Now lets use our `system` message with our `context` to augment the knowledge of our model \"in-memory\" and ask the same question again."
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 23,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "context_extended = f\"{query}\\n\\n#SOURCE#\\n{context}\"\n",
139 |     "# context_extended = f\"{query}\\n\\n#SOURCE START#\\n{context}\\n#SOURCE END#{query}\""
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 22,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       " The population of Nuremberg is 523,026 according to the 2022-12-31 data.\n"
152 |      ]
153 |     }
154 |    ],
155 |    "source": [
156 |     "from easyllm.clients import huggingface\n",
157 |     "\n",
158 |     "# set the prompt builder to llama2\n",
159 |     "huggingface.prompt_builder = \"llama2\"\n",
160 |     "# huggingface.api_key = \"hf_xx\"\n",
161 |     "\n",
162 |     "# send a ChatCompletion request\n",
163 |     "response = huggingface.ChatCompletion.create(\n",
164 |     "    model=\"meta-llama/Llama-2-70b-chat-hf\",\n",
165 |     "    messages=[\n",
166 |     "        system, \n",
167 |     "        {\"role\": \"user\", \"content\": context_extended},\n",
168 |     "    ],\n",
169 |     ")\n",
170 |     "\n",
171 |     "# print the time delay and text received\n",
172 |     "print(response[\"choices\"][0][\"message\"][\"content\"])\n"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {},
178 |    "source": [
179 |     "Awesome! if we check the gist, we can see a snippet in there with saying\n",
180 |     "```bash\n",
181 |     "Population (2022-12-31)[2]\n",
182 |     " • City\t523,026\n",
183 |     "```"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "markdown",
188 |    "metadata": {},
189 |    "source": [
190 |     "## Next Steps\n",
191 |     "\n",
192 |     "Next steps, would be to connect your LLM using with external knowledge sources such as Wikis, the Web or other databases using tools and APIs or vector databases and embeddings. "
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": []
201 |   }
202 |  ],
203 |  "metadata": {
204 |   "kernelspec": {
205 |    "display_name": "hf",
206 |    "language": "python",
207 |    "name": "python3"
208 |   },
209 |   "language_info": {
210 |    "codemirror_mode": {
211 |     "name": "ipython",
212 |     "version": 3
213 |    },
214 |    "file_extension": ".py",
215 |    "mimetype": "text/x-python",
216 |    "name": "python",
217 |    "nbconvert_exporter": "python",
218 |    "pygments_lexer": "ipython3",
219 |    "version": "3.8.12"
220 |   },
221 |   "orig_nbformat": 4
222 |  },
223 |  "nbformat": 4,
224 |  "nbformat_minor": 2
225 | }
226 | 


--------------------------------------------------------------------------------
/notebooks/sagemaker-get-embeddings.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## How to create embeddings\n",
  8 |     "\n",
  9 |     "In this notebook, we will show you how to create embeddings for your own text data and and open source model from Hugging Face hosted as an endpoint on Amazon SageMaker. \n",
 10 |     "\n",
 11 |     "## 0. Setup\n",
 12 |     "\n",
 13 |     "Before you can use `easyllm` with Amazon SageMaker you need to deploy the model to a SageMaker endpoint. You can do this by following one of the bloh posts below:\n",
 14 |     "\n",
 15 |     "* [Creating document embeddings with Hugging Face's Transformers & Amazon SageMaker](https://www.philschmid.de/custom-inference-huggingface-sagemaker)\n",
 16 |     "\n",
 17 |     "Once you have your endpoint deploy copy the endpoint name of it. The endpoint name will be our `model` paramter. You can get the endpoint name in the AWS management console for Amazon SageMaker under \"Inference\" -> \"Endpoints\" -> \"Name\" or when you deployed your model using the sagemaker sdk you can get it from the `predictor.endpoint_name` attribute.\n"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "### 1. Import the easyllm library"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# if needed, install and/or upgrade to the latest version of the OpenAI Python library\n",
 34 |     "%pip install --upgrade easyllm "
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# import the EasyLLM Python library for calling the EasyLLM API\n",
 44 |     "import easyllm"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "### 2. An example chat API call\n",
 52 |     "\n",
 53 |     "A embedding API call has two required inputs:\n",
 54 |     "- `model`: the name of the model you want to use (e.g., `sentence-transformers/all-MiniLM-L6-v2`) or leave it empty to just call the api\n",
 55 |     "- `input`: a string or list of strings you want to embed\n",
 56 |     "\n",
 57 |     "Let's look at an example API calls to see how the chat format works in practice."
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 3,
 63 |    "metadata": {},
 64 |    "outputs": [
 65 |     {
 66 |      "data": {
 67 |       "text/plain": [
 68 |        "768"
 69 |       ]
 70 |      },
 71 |      "execution_count": 3,
 72 |      "metadata": {},
 73 |      "output_type": "execute_result"
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "import os \n",
 78 |     "# set env for prompt builder\n",
 79 |     "os.environ[\"HUGGINGFACE_PROMPT\"] = \"llama2\" # vicuna, wizardlm, stablebeluga, open_assistant\n",
 80 |     "os.environ[\"AWS_REGION\"] = \"us-east-1\"  # change to your region\n",
 81 |     "# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"XXX\" # needed if not using boto3 session\n",
 82 |     "# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"XXX\" # needed if not using boto3 session\n",
 83 |     "\n",
 84 |     "from easyllm.clients import sagemaker\n",
 85 |     "\n",
 86 |     "embedding = sagemaker.Embedding.create(\n",
 87 |     "    model=\"SageMakerModelEmbeddingEndpoint24E49D09-64prhjuiWUtE\",\n",
 88 |     "    input=\"That's a nice car.\",\n",
 89 |     ")\n",
 90 |     "\n",
 91 |     "len(embedding[\"data\"][0][\"embedding\"])"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": []
100 |   }
101 |  ],
102 |  "metadata": {
103 |   "kernelspec": {
104 |    "display_name": "Python 3.9.9 ('openai')",
105 |    "language": "python",
106 |    "name": "python3"
107 |   },
108 |   "language_info": {
109 |    "codemirror_mode": {
110 |     "name": "ipython",
111 |     "version": 3
112 |    },
113 |    "file_extension": ".py",
114 |    "mimetype": "text/x-python",
115 |    "name": "python",
116 |    "nbconvert_exporter": "python",
117 |    "pygments_lexer": "ipython3",
118 |    "version": "3.8.12"
119 |   },
120 |   "orig_nbformat": 4,
121 |   "vscode": {
122 |    "interpreter": {
123 |     "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
124 |    }
125 |   }
126 |  },
127 |  "nbformat": 4,
128 |  "nbformat_minor": 2
129 | }
130 | 


--------------------------------------------------------------------------------
/notebooks/sagemaker-text-completion-api.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# How to use Text (Instruction) Completion clients\n",
  9 |     "\n",
 10 |     "EasyLLM can be used as an abstract layer to replace `text-davinci-003` with open source models.\n",
 11 |     "\n",
 12 |     "You can change your own applications from the OpenAI API, by simply changing the client. \n",
 13 |     "\n",
 14 |     "Chat models take a series of messages as input, and return an AI-written message as output.\n",
 15 |     "\n",
 16 |     "This guide illustrates the chat format with a few example API calls.\n",
 17 |     "\n",
 18 |     "\n",
 19 |     "## 0. Setup\n",
 20 |     "\n",
 21 |     "Before you can use `easyllm` with Amazon SageMaker you need to deploy the model to a SageMaker endpoint. You can do this by following one of the bloh posts below:\n",
 22 |     "\n",
 23 |     "* [Deploy Llama 2 7B/13B/70B on Amazon SageMaker](https://www.philschmid.de/sagemaker-llama-llm)\n",
 24 |     "* [Deploy Falcon 7B & 40B on Amazon SageMaker](https://www.philschmid.de/sagemaker-falcon-llm)\n",
 25 |     "* [Introducing the Hugging Face LLM Inference Container for Amazon SageMaker](https://www.philschmid.de/sagemaker-huggingface-llm)\n",
 26 |     "\n",
 27 |     "Once you have your endpoint deploy copy the endpoint name of it. The endpoint name will be our `model` paramter. You can get the endpoint name in the AWS management console for Amazon SageMaker under \"Inference\" -> \"Endpoints\" -> \"Name\" or when you deployed your model using the sagemaker sdk you can get it from the `predictor.endpoint_name` attribute."
 28 |    ]
 29 |   },
 30 |   {
 31 |    "attachments": {},
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "### 1. Import the easyllm library"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# if needed, install and/or upgrade to the latest version of the OpenAI Python library\n",
 45 |     "%pip install --upgrade easyllm "
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 6,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# import the EasyLLM Python library for calling the EasyLLM API\n",
 55 |     "import easyllm"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "attachments": {},
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "### 2. An example chat API call\n",
 64 |     "\n",
 65 |     "A text API call has two required inputs:\n",
 66 |     "- `model`: the name of the model you want to use (e.g., `meta-llama/Llama-2-70b-chat-hf`) or leave it empty to just call the api\n",
 67 |     "- `prompt`: a text prompt that is sent to the model to generate the text\n",
 68 |     "\n",
 69 |     "Compared to OpenAI api is the `huggingface` module also exposing a `prompt_builder` and `stop_sequences` parameter you can use to customize the prompt and stop sequences. The EasyLLM package comes with build in popular methods for both of these parameters, e.g. `llama2_prompt_builder` and `llama2_stop_sequences`. \n",
 70 |     "\n",
 71 |     "Let's look at an example chat API calls to see how the chat format works in practice."
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 1,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "data": {
 81 |       "text/plain": [
 82 |        "{'id': 'hf-dEMeXTUk3Y',\n",
 83 |        " 'object': 'text.completion',\n",
 84 |        " 'created': 1691508711,\n",
 85 |        " 'model': 'huggingface-pytorch-tgi-inference-2023-08-08-14-15-52-703',\n",
 86 |        " 'choices': [{'index': 0,\n",
 87 |        "   'text': \" The meaning of life is a question that has puzzled philosophers, theologians, scientists, and many other thinkers throughout history. Here are some possible answers:\\n1. Religious or spiritual beliefs: Many people believe that the meaning of life is to fulfill a divine or spiritual purpose, whether that be to follow a set of moral commandments, to achieve spiritual enlightenment, or to fulfill a specific mission or calling.\\n2. Personal fulfillment: Some people believe that the meaning of life is to find personal fulfillment and happiness. This can be achieved through pursuing one's passions, building meaningful relationships, and cultivating a sense of purpose and meaning in one's life.\\n3. Contribution to society: Many people believe that the meaning of life is to make a positive impact on the world and to contribute to the greater good. This can be achieved through various means, such as working to make the world a better place, helping others, or creating something of value.\\n4. Learning and growth: Some people believe that the meaning of life is to learn and grow as individuals, to expand one's knowledge and understanding of the world, and to develop one's skills\",\n",
 88 |        "   'finish_reason': 'length'}],\n",
 89 |        " 'usage': {'prompt_tokens': 11, 'completion_tokens': 256, 'total_tokens': 267}}"
 90 |       ]
 91 |      },
 92 |      "execution_count": 1,
 93 |      "metadata": {},
 94 |      "output_type": "execute_result"
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "import os \n",
 99 |     "# set env for prompt builder\n",
100 |     "os.environ[\"HUGGINGFACE_PROMPT\"] = \"llama2\" # vicuna, wizardlm, stablebeluga, open_assistant\n",
101 |     "os.environ[\"AWS_REGION\"] = \"us-east-1\"  # change to your region\n",
102 |     "# os.environ[\"AWS_ACCESS_KEY_ID\"] = \"XXX\" # needed if not using boto3 session\n",
103 |     "# os.environ[\"AWS_SECRET_ACCESS_KEY\"] = \"XXX\" # needed if not using boto3 session\n",
104 |     "\n",
105 |     "from easyllm.clients import sagemaker\n",
106 |     "\n",
107 |     "# Changing configuration without using environment variables\n",
108 |     "# sagemaker.prompt_builder = \"llama2\"\n",
109 |     "# sagemaker.api_aws_access_key=\"xxx\"\n",
110 |     "# sagemaker.api_aws_secret_key=\"xxx\"\n",
111 |     "\n",
112 |     "# SageMaker endpoint name\n",
113 |     "MODEL=\"huggingface-pytorch-tgi-inference-2023-08-08-14-15-52-703\"\n",
114 |     "\n",
115 |     "response = sagemaker.Completion.create(\n",
116 |     "    model=MODEL,\n",
117 |     "    prompt=\"What is the meaning of life?\",\n",
118 |     "    temperature=0.9,\n",
119 |     "    top_p=0.6,\n",
120 |     "    max_tokens=256,\n",
121 |     ")\n",
122 |     "response"
123 |    ]
124 |   },
125 |   {
126 |    "attachments": {},
127 |    "cell_type": "markdown",
128 |    "metadata": {},
129 |    "source": [
130 |     "As you can see, the response object has a few fields:\n",
131 |     "- `id`: the ID of the request\n",
132 |     "- `object`: the type of object returned (e.g., `text.completion`)\n",
133 |     "- `created`: the timestamp of the request\n",
134 |     "- `model`: the full name of the model used to generate the response\n",
135 |     "- `usage`: the number of tokens used to generate the replies, counting prompt, completion, and total\n",
136 |     "- `choices`: a list of completion objects (only one, unless you set `n` greater than 1)\n",
137 |     "    - `text`: the generated text\n",
138 |     "    - `finish_reason`: the reason the model stopped generating text (either `stop`, `eos_token`, or `length` if `max_tokens` limit was reached)\n",
139 |     "    - `logprobs`: _optional_ the log probs of each generated token."
140 |    ]
141 |   },
142 |   {
143 |    "attachments": {},
144 |    "cell_type": "markdown",
145 |    "metadata": {},
146 |    "source": [
147 |     "Extract just the reply with:"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 2,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "name": "stdout",
157 |      "output_type": "stream",
158 |      "text": [
159 |       " The meaning of life is a question that has puzzled philosophers, theologians, scientists, and many other thinkers throughout history. Here are some possible answers:\n",
160 |       "1. Religious or spiritual beliefs: Many people believe that the meaning of life is to fulfill a divine or spiritual purpose, whether that be to follow a set of moral commandments, to achieve spiritual enlightenment, or to fulfill a specific mission or calling.\n",
161 |       "2. Personal fulfillment: Some people believe that the meaning of life is to find personal fulfillment and happiness. This can be achieved through pursuing one's passions, building meaningful relationships, and cultivating a sense of purpose and meaning in one's life.\n",
162 |       "3. Contribution to society: Many people believe that the meaning of life is to make a positive impact on the world and to contribute to the greater good. This can be achieved through various means, such as working to make the world a better place, helping others, or creating something of value.\n",
163 |       "4. Learning and growth: Some people believe that the meaning of life is to learn and grow as individuals, to expand one's knowledge and understanding of the world, and to develop one's skills\n"
164 |      ]
165 |     }
166 |    ],
167 |    "source": [
168 |     "print(response['choices'][0]['text'])"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {},
175 |    "outputs": [],
176 |    "source": []
177 |   }
178 |  ],
179 |  "metadata": {
180 |   "kernelspec": {
181 |    "display_name": "openai",
182 |    "language": "python",
183 |    "name": "python3"
184 |   },
185 |   "language_info": {
186 |    "codemirror_mode": {
187 |     "name": "ipython",
188 |     "version": 3
189 |    },
190 |    "file_extension": ".py",
191 |    "mimetype": "text/x-python",
192 |    "name": "python",
193 |    "nbconvert_exporter": "python",
194 |    "pygments_lexer": "ipython3",
195 |    "version": "3.8.12"
196 |   },
197 |   "orig_nbformat": 4,
198 |   "vscode": {
199 |    "interpreter": {
200 |     "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
201 |    }
202 |   }
203 |  },
204 |  "nbformat": 4,
205 |  "nbformat_minor": 2
206 | }
207 | 


--------------------------------------------------------------------------------
/notebooks/text-completion-api.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "attachments": {},
  5 |    "cell_type": "markdown",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# How to use Text (Instruction) Completion clients\n",
  9 |     "\n",
 10 |     "EasyLLM can be used as an abstract layer to replace `text-davinci-003` with open source models.\n",
 11 |     "\n",
 12 |     "You can change your own applications from the OpenAI API, by simply changing the client. \n",
 13 |     "\n",
 14 |     "Chat models take a series of messages as input, and return an AI-written message as output.\n",
 15 |     "\n",
 16 |     "This guide illustrates the chat format with a few example API calls."
 17 |    ]
 18 |   },
 19 |   {
 20 |    "attachments": {},
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "### 1. Import the easyllm library"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "# if needed, install and/or upgrade to the latest version of the OpenAI Python library\n",
 34 |     "%pip install --upgrade easyllm "
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 6,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# import the EasyLLM Python library for calling the EasyLLM API\n",
 44 |     "import easyllm"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "attachments": {},
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "### 2. An example chat API call\n",
 53 |     "\n",
 54 |     "A text API call has two required inputs:\n",
 55 |     "- `model`: the name of the model you want to use (e.g., `meta-llama/Llama-2-70b-chat-hf`) or leave it empty to just call the api\n",
 56 |     "- `prompt`: a text prompt that is sent to the model to generate the text\n",
 57 |     "\n",
 58 |     "Compared to OpenAI api is the `huggingface` module also exposing a `prompt_builder` and `stop_sequences` parameter you can use to customize the prompt and stop sequences. The EasyLLM package comes with build in popular methods for both of these parameters, e.g. `llama2_prompt_builder` and `llama2_stop_sequences`. \n",
 59 |     "\n",
 60 |     "Let's look at an example chat API calls to see how the chat format works in practice."
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 1,
 66 |    "metadata": {},
 67 |    "outputs": [
 68 |     {
 69 |      "data": {
 70 |       "text/plain": [
 71 |        "{'id': 'hf-ZK--Ndk30h',\n",
 72 |        " 'object': 'text.completion',\n",
 73 |        " 'created': 1691129933,\n",
 74 |        " 'model': 'meta-llama/Llama-2-70b-chat-hf',\n",
 75 |        " 'choices': [{'index': 0,\n",
 76 |        "   'text': \" The meaning of life is a question that has puzzled philosophers, theologians, and scientists for centuries. There are many different perspectives on what constitutes the meaning of life, and there is no one definitive answer. However, some common themes that people often associate with the meaning of life include:\\n\\n1. Purpose: Having a sense of purpose or direction in life, whether it be through work, relationships, or personal goals.\\n2. Fulfillment: Feeling fulfilled and satisfied with one's experiences and achievements.\\n3. Happiness: Pursuing happiness and well-being, whether through personal relationships, material possessions, or personal growth.\\n4. Self-actualization: Realizing one's potential and living up to one's capabilities.\\n5. Legacy: Leaving a lasting impact or legacy, whether through contributions to society, artistic or cultural achievements, or impacting the lives of others.\\n6. Spirituality: Connecting with a higher power or a sense of something greater than oneself, and finding meaning and purpose through faith or spiritual practices.\\n7. Love: Finding and experiencing love, whether it be through romantic relationships, friendships, or family.\\n8. Personal growth: Continuously learning, growing, and improving oneself.\\n9. Community: Building and being a part of a community, whether it be through work, volunteering, or social connections.\\n10. Making a difference: Making a positive impact in the world and leaving it a better place than when you arrived.\\n\\nUltimately, the meaning of life is a deeply personal and subjective question, and what gives meaning and purpose to one person's life may be different for another. It's a question that each person must answer for themselves, and it may change throughout their life as they grow and evolve.\",\n",
 77 |        "   'finish_reason': 'eos_token'}],\n",
 78 |        " 'usage': {'prompt_tokens': 11, 'completion_tokens': 406, 'total_tokens': 417}}"
 79 |       ]
 80 |      },
 81 |      "execution_count": 1,
 82 |      "metadata": {},
 83 |      "output_type": "execute_result"
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "\n",
 88 |     "from easyllm.clients import huggingface\n",
 89 |     "\n",
 90 |     "# Example EasyLLM Python library request\n",
 91 |     "MODEL = \"meta-llama/Llama-2-70b-chat-hf\"\n",
 92 |     "huggingface.prompt_builder = \"llama2\"\n",
 93 |     "\n",
 94 |     "# The module automatically loads the HuggingFace API key from the environment variable HUGGINGFACE_TOKEN or from the HuggingFace CLI configuration file.\n",
 95 |     "# huggingface.api_key=\"hf_xxx\"\n",
 96 |     "\n",
 97 |     "response = huggingface.Completion.create(\n",
 98 |     "    model=MODEL,\n",
 99 |     "    prompt=\"What is the meaning of life?\",\n",
100 |     "    temperature=0.9,\n",
101 |     "    top_p=0.6,\n",
102 |     "    max_tokens=1024,\n",
103 |     ")\n",
104 |     "response"
105 |    ]
106 |   },
107 |   {
108 |    "attachments": {},
109 |    "cell_type": "markdown",
110 |    "metadata": {},
111 |    "source": [
112 |     "As you can see, the response object has a few fields:\n",
113 |     "- `id`: the ID of the request\n",
114 |     "- `object`: the type of object returned (e.g., `text.completion`)\n",
115 |     "- `created`: the timestamp of the request\n",
116 |     "- `model`: the full name of the model used to generate the response\n",
117 |     "- `usage`: the number of tokens used to generate the replies, counting prompt, completion, and total\n",
118 |     "- `choices`: a list of completion objects (only one, unless you set `n` greater than 1)\n",
119 |     "    - `text`: the generated text\n",
120 |     "    - `finish_reason`: the reason the model stopped generating text (either `stop`, `eos_token`, or `length` if `max_tokens` limit was reached)\n",
121 |     "    - `logprobs`: _optional_ the log probs of each generated token."
122 |    ]
123 |   },
124 |   {
125 |    "attachments": {},
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "Extract just the reply with:"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 2,
135 |    "metadata": {},
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       " The meaning of life is a question that has puzzled philosophers, theologians, and scientists for centuries. There are many different perspectives on what constitutes the meaning of life, and there is no one definitive answer. However, some common themes that people often associate with the meaning of life include:\n",
142 |       "\n",
143 |       "1. Purpose: Having a sense of purpose or direction in life, whether it be through work, relationships, or personal goals.\n",
144 |       "2. Fulfillment: Feeling fulfilled and satisfied with one's experiences and achievements.\n",
145 |       "3. Happiness: Pursuing happiness and well-being, whether through personal relationships, material possessions, or personal growth.\n",
146 |       "4. Self-actualization: Realizing one's potential and living up to one's capabilities.\n",
147 |       "5. Legacy: Leaving a lasting impact or legacy, whether through contributions to society, artistic or cultural achievements, or impacting the lives of others.\n",
148 |       "6. Spirituality: Connecting with a higher power or a sense of something greater than oneself, and finding meaning and purpose through faith or spiritual practices.\n",
149 |       "7. Love: Finding and experiencing love, whether it be through romantic relationships, friendships, or family.\n",
150 |       "8. Personal growth: Continuously learning, growing, and improving oneself.\n",
151 |       "9. Community: Building and being a part of a community, whether it be through work, volunteering, or social connections.\n",
152 |       "10. Making a difference: Making a positive impact in the world and leaving it a better place than when you arrived.\n",
153 |       "\n",
154 |       "Ultimately, the meaning of life is a deeply personal and subjective question, and what gives meaning and purpose to one person's life may be different for another. It's a question that each person must answer for themselves, and it may change throughout their life as they grow and evolve.\n"
155 |      ]
156 |     }
157 |    ],
158 |    "source": [
159 |     "print(response['choices'][0]['text'])"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": []
168 |   }
169 |  ],
170 |  "metadata": {
171 |   "kernelspec": {
172 |    "display_name": "openai",
173 |    "language": "python",
174 |    "name": "python3"
175 |   },
176 |   "language_info": {
177 |    "codemirror_mode": {
178 |     "name": "ipython",
179 |     "version": 3
180 |    },
181 |    "file_extension": ".py",
182 |    "mimetype": "text/x-python",
183 |    "name": "python",
184 |    "nbconvert_exporter": "python",
185 |    "pygments_lexer": "ipython3",
186 |    "version": "3.8.12"
187 |   },
188 |   "orig_nbformat": 4,
189 |   "vscode": {
190 |    "interpreter": {
191 |     "hash": "365536dcbde60510dc9073d6b991cd35db2d9bac356a11f5b64279a5e6708b97"
192 |    }
193 |   }
194 |  },
195 |  "nbformat": 4,
196 |  "nbformat_minor": 2
197 | }
198 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["hatchling"]
  3 | build-backend = "hatchling.build"
  4 | 
  5 | [project.urls]
  6 | Documentation = "https://github.com/unknown/hatch-demo#readme"
  7 | Issues = "https://github.com/unknown/hatch-demo/issues"
  8 | Source = "https://github.com/unknown/hatch-demo"
  9 | 
 10 | [tool.hatch.version]
 11 | path = "easyllm/__init__.py"
 12 | 
 13 | [tool.hatch.metadata]
 14 | allow-direct-references = true
 15 | 
 16 | [project]
 17 | name = "easyllm"
 18 | description = "Description"
 19 | readme = "README.md"
 20 | requires-python = ">=3.8"
 21 | license = "MIT"
 22 | authors = [{ name = "Philipp Schmid", email = "schmidphilipp1995@gmail.com" }]
 23 | classifiers = [
 24 |   "Topic :: Internet",
 25 |   "Topic :: Software Development :: Libraries :: Application Frameworks",
 26 |   "Topic :: Software Development :: Libraries :: Python Modules",
 27 |   "Topic :: Software Development :: Libraries",
 28 |   "Topic :: Software Development",
 29 |   "Intended Audience :: Developers",
 30 |   "License :: OSI Approved :: MIT License",
 31 |   "Programming Language :: Python :: 3 :: Only",
 32 |   "Programming Language :: Python :: 3.8",
 33 |   "Programming Language :: Python :: 3.9",
 34 |   "Programming Language :: Python :: 3.9",
 35 |   "Programming Language :: Python :: 3.10",
 36 | ]
 37 | dynamic = ["version"]
 38 | scripts = { easyllm = "easyllm.cli:main" }
 39 | dependencies = ["pydantic==2.1.1", "nanoid==2.0.0", "huggingface-hub==0.16.4"]
 40 | 
 41 | [project.optional-dependencies]
 42 | data = ["datasets","kenlm @ https://github.com/kpu/kenlm/archive/master.zip","sentencepiece","readability-lxml","inscriptis"]
 43 | test = ["pytest", "ruff", "black", "isort", "mypy", "hatch"]
 44 | bedrock = ["boto3"]
 45 | dev = ["ruff", "black", "isort", "mypy", "hatch"]
 46 | docs = [
 47 |   "mkdocs",
 48 |   "mkdocs-material",
 49 |   "mkdocstrings",
 50 |   "mkdocstrings-python",
 51 |   "mkdocs-autorefs",
 52 |   "mkdocs-jupyter",
 53 | ]
 54 | 
 55 | [tool.isort]
 56 | profile = "black"
 57 | known_third_party = ["fastapi", "pydantic", "starlette"]
 58 | 
 59 | # [tool.coverage.run]
 60 | # parallel = true
 61 | # source = [
 62 | #     "docs_src",
 63 | #     "tests",
 64 | #     "fastapi"
 65 | # ]
 66 | # context = '${CONTEXT}'
 67 | # omit = [
 68 | #     "docs_src/response_model/tutorial003_04.py",
 69 | #     "docs_src/response_model/tutorial003_04_py310.py",
 70 | # ]
 71 | 
 72 | [tool.ruff]
 73 | select = [
 74 |   "E", # pycodestyle errors
 75 |   "W", # pycodestyle warnings
 76 |   "F", # pyflakes
 77 |   "I", # isort
 78 |   "C", # flake8-comprehensions
 79 |   "B", # flake8-bugbear
 80 | ]
 81 | ignore = [
 82 |   "E501", # line too long, handled by black
 83 |   "B008", # do not perform function calls in argument defaults
 84 |   "C901", # too complex
 85 | ]
 86 | # Same as Black.
 87 | line-length = 119
 88 | 
 89 | # Allow unused variables when underscore-prefixed.
 90 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 91 | 
 92 | # Assume Python 3.9.
 93 | target-version = "py38"
 94 | 
 95 | [tool.ruff.per-file-ignores]
 96 | "__init__.py" = ["F401"]
 97 | 
 98 | # [tool.ruff.isort]
 99 | # known-third-party = ["fastapi", "pydantic", "starlette"]
100 | 
101 | [tool.pytest.ini_options]
102 | addopts = "-ra"
103 | testpaths = ["tests"]
104 | pythonpath = ['.']
105 | 


--------------------------------------------------------------------------------
/scripts/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/philschmid/easyllm/1f37a9307d68545f41639f392a301baadda7188a/scripts/.gitkeep


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: 2023-present philschmid <schmidphilipp1995@gmail.com>
2 | #
3 | # SPDX-License-Identifier: MIT
4 | 


--------------------------------------------------------------------------------
/tests/prompt_utils/test_chatml_hf.py:
--------------------------------------------------------------------------------
 1 | # test_build_chatml_hf_prompt.py
 2 | 
 3 | import pytest
 4 | 
 5 | from easyllm.prompt_utils.chatml_hf import build_chatml_hf_prompt
 6 | 
 7 | 
 8 | def test_build_chatml_hf_prompt_single_message():
 9 |     message = "Hello!"
10 |     expected_output = f"<|system|>\n<|end|>\n<|user|>\n{message}<|end|>\n<|assistant|>"
11 |     result = build_chatml_hf_prompt(message)
12 |     assert result == expected_output
13 | 
14 | 
15 | def test_build_chatml_hf_prompt_multiple_messages():
16 |     messages = [
17 |         {"content":"You are a chat bot.", "role":"system"},
18 |         {"content":"Hello!", "role": "user"},
19 |     ]
20 |     expected_output = "<|system|>\nYou are a chat bot.<|end|>\n<|user|>\nHello!<|end|>\n<|assistant|>"
21 |     result = build_chatml_hf_prompt(messages)
22 |     assert result == expected_output
23 | 
24 | 
25 | def test_build_chatml_hf_prompt_function_call():
26 |     messages = [
27 |         {"content":"You are a chat bot.", "role":"system"},
28 |         {"content":"some_function()", "role": "function"},
29 |     ]
30 |     with pytest.raises(ValueError, match="HF ChatML does not support function calls."):
31 |         build_chatml_hf_prompt(messages)
32 | 


--------------------------------------------------------------------------------
/tests/prompt_utils/test_llama2.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from easyllm.prompt_utils.llama2 import build_llama2_prompt
 4 | 
 5 | 
 6 | def test_build_llama2_prompt_single_message():
 7 |     message = "Hello!"
 8 |     expected_output = f"<s>[INST] {message} [/INST]"
 9 |     result = build_llama2_prompt(message)
10 |     assert result == expected_output
11 | 
12 | 
13 | def test_build_llama2_prompt_multiple_messages():
14 |     messages = [
15 |         {"content":"You are a chat bot.", "role":"system"},
16 |         {"content":"Hello!", "role": "user"},
17 |     ]
18 |     expected_output = "<s>[INST] <<SYS>>\nYou are a chat bot.\n<</SYS>>\n\nHello! [/INST]"
19 |     result = build_llama2_prompt(messages)
20 |     print(f"RESULT: {result}")
21 |     assert result == expected_output
22 | 
23 | 
24 | def test_build_llama2_prompt_function_call():
25 |     messages = [
26 |         {"content":"You are a chat bot.", "role":"system"},
27 |         {"content":"some_function()", "role": "function"},
28 |     ]
29 |     with pytest.raises(ValueError, match="Llama 2 does not support function calls."):
30 |         build_llama2_prompt(messages)
31 | 


--------------------------------------------------------------------------------
/tests/prompt_utils/test_open_assistant.py:
--------------------------------------------------------------------------------
 1 | # test_build_open_assistant_prompt.py
 2 | 
 3 | import pytest
 4 | 
 5 | from easyllm.prompt_utils.open_assistant import build_open_assistant_prompt
 6 | 
 7 | 
 8 | def test_build_open_assistant_prompt_single_message():
 9 |     message = "Hello!"
10 |     expected_output = f"<|system|></s><|prompter|>{message}</s><|assistant|>"
11 |     result = build_open_assistant_prompt(message)
12 |     assert result == expected_output
13 | 
14 | 
15 | def test_build_open_assistant_prompt_multiple_messages():
16 |     messages = [
17 |         {"content":"You are a chat bot.", "role":"system"},
18 |         {"content":"Hello!", "role": "user"},
19 |     ]
20 |     expected_output = "<|system|>You are a chat bot.</s><|prompter|>Hello!</s><|assistant|>"
21 |     result = build_open_assistant_prompt(messages)
22 |     assert result == expected_output
23 | 
24 | 
25 | def test_build_open_assistant_prompt_function_call():
26 |     messages = [
27 |         {"content":"You are a chat bot.", "role":"system"},
28 |         {"content":"some_function()", "role": "function"},
29 |     ]
30 |     with pytest.raises(ValueError, match="Open Assistant does not support function calls."):
31 |         build_open_assistant_prompt(messages)
32 | 


--------------------------------------------------------------------------------
/tests/prompt_utils/test_stablebeluga.py:
--------------------------------------------------------------------------------
 1 | # test_build_stablebeluga_prompt.py
 2 | 
 3 | import pytest
 4 | 
 5 | from easyllm.prompt_utils.stablebeluga import build_stablebeluga_prompt
 6 | 
 7 | 
 8 | def test_build_stablebeluga_prompt_single_message():
 9 |     message = "Hello!"
10 |     expected_output = f"### System:\n\n\n### User:\n{message}\n\n### Assistant:"
11 |     result = build_stablebeluga_prompt(message)
12 |     assert result == expected_output
13 | 
14 | 
15 | def test_build_stablebeluga_prompt_multiple_messages():
16 |     messages = [
17 |         {"content":"You are a chat bot.", "role":"system"},
18 |         {"content":"Hello!", "role": "user"},
19 |     ]
20 |     expected_output = "### System:\nYou are a chat bot.\n\n### User:\nHello!\n\n### Assistant:"
21 |     result = build_stablebeluga_prompt(messages)
22 |     assert result == expected_output
23 | 
24 | 
25 | def test_build_stablebeluga_prompt_function_call():
26 |     messages = [
27 |         {"content":"You are a chat bot.", "role":"system"},
28 |         {"content":"some_function()", "role": "function"},
29 |     ]
30 |     with pytest.raises(ValueError, match="stablebeluga does not support function calls."):
31 |         build_stablebeluga_prompt(messages)
32 | 


--------------------------------------------------------------------------------
/tests/prompt_utils/test_vicuna.py:
--------------------------------------------------------------------------------
 1 | # test_build_vicuna_prompt.py
 2 | 
 3 | import pytest
 4 | 
 5 | from easyllm.prompt_utils.vicuna import build_vicuna_prompt
 6 | 
 7 | 
 8 | def test_build_vicuna_prompt_single_message():
 9 |     message = "Hello!"
10 |     expected_output = f"\n\nUSER: {message}\nASSISTANT: "
11 |     result = build_vicuna_prompt(message)
12 |     assert result == expected_output
13 | 
14 | 
15 | def test_build_vicuna_prompt_multiple_messages():
16 |     messages = [
17 |         {"content":"You are a chat bot.", "role":"system"},
18 |         {"content":"Hello!", "role": "user"},
19 |     ]
20 |     expected_output = "You are a chat bot.\n\nUSER: Hello!\nASSISTANT: "
21 |     result = build_vicuna_prompt(messages)
22 |     assert result == expected_output
23 | 
24 | 
25 | def test_build_vicuna_prompt_function_call():
26 |     messages = [
27 |         {"content":"You are a chat bot.", "role":"system"},
28 |         {"content":"some_function()", "role": "function"},
29 |     ]
30 |     with pytest.raises(ValueError, match="Vicuna does not support function calls."):
31 |         build_vicuna_prompt(messages)
32 | 


--------------------------------------------------------------------------------
/tests/prompt_utils/test_wizardlm.py:
--------------------------------------------------------------------------------
 1 | # test_build_wizardlm_prompt.py
 2 | 
 3 | import pytest
 4 | 
 5 | from easyllm.prompt_utils.wizardlm import build_wizardlm_prompt
 6 | 
 7 | 
 8 | def test_build_wizardlm_prompt_single_message():
 9 |     message = "Hello!"
10 |     expected_output = f"USER: {message} ASSISTANT: "
11 |     result = build_wizardlm_prompt(message)
12 |     assert result == expected_output
13 | 
14 | 
15 | def test_build_wizardlm_prompt_multiple_messages():
16 |     messages = [
17 |         {"content":"You are a chat bot.", "role":"system"},
18 |         {"content":"Hello!", "role": "user"},
19 |     ]
20 |     expected_output = "You are a chat bot. USER: Hello! ASSISTANT: "
21 |     result = build_wizardlm_prompt(messages)
22 |     assert result == expected_output
23 | 
24 | 
25 | def test_build_wizardlm_prompt_function_call():
26 |     messages = [
27 |         {"content":"You are a chat bot.", "role":"system"},
28 |         {"content":"some_function()", "role": "function"},
29 |     ]
30 |     with pytest.raises(ValueError, match="WizardLM does not support function calls."):
31 |         build_wizardlm_prompt(messages)
32 | 


--------------------------------------------------------------------------------
/tests/schema/test_base.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | 
  3 | import pytest
  4 | 
  5 | from easyllm.schema.base import ChatMessage, Usage
  6 | 
  7 | 
  8 | def test_chat_message() -> None:
  9 |     """Test that the ChatMessage schema works as expected."""
 10 |     text = "Never gonna give you up. Never gonna let you down."
 11 |     role = "user"
 12 | 
 13 |     message = ChatMessage(content=text, role=role)
 14 | 
 15 |     assert message.content == text
 16 |     assert isinstance(message.content, type(text))
 17 |     assert message.role == role
 18 |     assert isinstance(message.role, type(role))
 19 | 
 20 | 
 21 | @pytest.mark.parametrize(
 22 |     "role", ["user", "assistant", "function", "system"]
 23 | )
 24 | def test_all_valid_roles(role: str) -> None:
 25 |     """Test that all valid roles are accepted."""
 26 |     message = ChatMessage(content="Hello!", role=role)
 27 |     assert message.role == role
 28 |     assert isinstance(message.role, type(role))
 29 | 
 30 | 
 31 | @pytest.mark.parametrize(
 32 |     "role",
 33 |     ["fellow", "bro", "", 1, 1.0, ["user", "assistant"], {"user": "John Doe"}],
 34 | )
 35 | def test_all_invalid_roles(role: Any) -> None:
 36 |     """Test that all invalid roles are rejected."""
 37 |     with pytest.raises(ValueError):
 38 |         ChatMessage(content="Hello!", role=role)
 39 | 
 40 | 
 41 | @pytest.mark.parametrize(
 42 |     "message",
 43 |     [1234, ["Hello!"], {"content": "Hello!", "role": "user"}],
 44 | )
 45 | def test_all_invalid_messages(message: Any) -> None:
 46 |     """Test that all invalid messages are rejected."""
 47 |     with pytest.raises(ValueError):
 48 |         ChatMessage(content=message, role="user")
 49 | 
 50 | 
 51 | def test_usage() -> None:
 52 |     """Test that the Usage schema works as expected."""
 53 |     prompt_tokens = 10
 54 |     completion_tokens = 20
 55 |     total_tokens = 30
 56 | 
 57 |     usage = Usage(
 58 |         prompt_tokens=prompt_tokens,
 59 |         completion_tokens=completion_tokens,
 60 |         total_tokens=total_tokens,
 61 |     )
 62 | 
 63 |     assert usage.prompt_tokens == prompt_tokens
 64 |     assert isinstance(usage.prompt_tokens, type(prompt_tokens))
 65 |     assert usage.completion_tokens == completion_tokens
 66 |     assert isinstance(usage.completion_tokens, type(completion_tokens))
 67 |     assert usage.total_tokens == total_tokens
 68 |     assert isinstance(usage.total_tokens, type(total_tokens))
 69 | 
 70 | 
 71 | @pytest.mark.parametrize(
 72 |     ["prompt_tokens", "completion_tokens", "total_tokens"],
 73 |     [
 74 |         ("abc", 10.0, 10),
 75 |         (15, "def", 15.0),
 76 |         (20, 20.0, "ghi"),
 77 |     ],
 78 | )
 79 | def test_invalid_usage(prompt_tokens, completion_tokens, total_tokens) -> None:
 80 |     """Test that invalid Usage inputs are rejected."""
 81 |     with pytest.raises(ValueError):
 82 |         Usage(
 83 |             prompt_tokens=prompt_tokens,
 84 |             completion_tokens=completion_tokens,
 85 |             total_tokens=total_tokens,
 86 |         )
 87 | 
 88 | 
 89 | @pytest.mark.parametrize(
 90 |     ["prompt_tokens", "completion_tokens", "total_tokens"],
 91 |     [
 92 |         (10, 10, 10),
 93 |         ("10", 10, 10),
 94 |         (10, "10", 10),
 95 |         (10, 10, "10"),
 96 |     ],
 97 | )
 98 | def test_str_to_int_for_usage(
 99 |     prompt_tokens, completion_tokens, total_tokens
100 | ) -> None:
101 |     """Test that str inputs are converted to int."""
102 |     usage = Usage(
103 |         prompt_tokens=prompt_tokens,
104 |         completion_tokens=completion_tokens,
105 |         total_tokens=total_tokens,
106 |     )
107 | 
108 |     assert usage.prompt_tokens == 10
109 |     assert isinstance(usage.prompt_tokens, type(10))
110 |     assert usage.completion_tokens == 10
111 |     assert isinstance(usage.completion_tokens, type(10))
112 |     assert usage.total_tokens == 10
113 |     assert isinstance(usage.total_tokens, type(10))
114 | 


--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
1 | 
2 | # dummy pytest for ci
3 | def test_dummy():
4 |     assert True
5 | 


--------------------------------------------------------------------------------